Updated benchmarking scripts

andrewherren · andrewherren · commit 21f2897d75cb · 2025-06-10T15:43:09.000-05:00
diff --git a/demo/debug/supervised_learning_feature_subsets.py b/demo/debug/supervised_learning_feature_subsets.py
@@ -2,9 +2,6 @@
 
 # Load necessary libraries
 import numpy as np
-import pandas as pd
-import seaborn as sns
-import matplotlib.pyplot as plt
 from stochtree import BARTModel
 from sklearn.model_selection import train_test_split
 import timeit
@@ -16,25 +13,21 @@
 
 # Generate covariates and basis
 n = 1000
-p_X = 100
-X = rng.uniform(0, 1, (n, p_X))
+p = 100
+X = rng.uniform(0, 1, (n, p))
 
 # Define the outcome mean function
 def outcome_mean(X):
-    return np.where(
-        (X[:,0] >= 0.0) & (X[:,0] < 0.25), -7.5, 
-        np.where(
-            (X[:,0] >= 0.25) & (X[:,0] < 0.5), -2.5, 
-            np.where(
-                (X[:,0] >= 0.5) & (X[:,0] < 0.75), 2.5, 
-                7.5
-            )
-        )
+    return (
+        np.sin(4*np.pi*X[:,0]) + np.cos(4*np.pi*X[:,1]) + np.sin(4*np.pi*X[:,2]) + np.cos(4*np.pi*X[:,3])
     )
 
 # Generate outcome
-epsilon = rng.normal(0, 1, n)
-y = outcome_mean(X) + epsilon
+snr = 2
+f_X = outcome_mean(X)
+noise_sd = np.std(f_X) / snr
+epsilon = rng.normal(0, 1, n) * noise_sd
+y = f_X + epsilon
 
 # Test-train split
 sample_inds = np.arange(n)
@@ -50,12 +43,28 @@ def outcome_mean(X):
 forest_config_a = {"num_trees": 100}
 bart_model_a.sample(X_train=X_train, y_train=y_train, X_test=X_test, num_gfr=100, num_mcmc=0, mean_forest_params=forest_config_a)
 """
-print(timeit.timeit(stmt=s, number=5, globals=globals()))
+timing_no_subsampling = timeit.timeit(stmt=s, number=5, globals=globals())
+print(f"Average runtime, without feature subsampling (p = {p:d}): {timing_no_subsampling:.2f}")
 
 # Run XBART with each tree considering random subsets of 5 features
 s = """\
 bart_model_b = BARTModel()
 forest_config_b = {"num_trees": 100, "num_features_subsample": 5}
 bart_model_b.sample(X_train=X_train, y_train=y_train, X_test=X_test, num_gfr=100, num_mcmc=0, mean_forest_params=forest_config_b)
 """
-print(timeit.timeit(stmt=s, number=5, globals=globals()))
+timing_subsampling = timeit.timeit(stmt=s, number=5, globals=globals())
+print(f"Average runtime, subsampling 5 out of {p:d} features: {timing_subsampling:.2f}")
+
+# Compare RMSEs of each model
+bart_model_a = BARTModel()
+forest_config_a = {"num_trees": 100}
+bart_model_a.sample(X_train=X_train, y_train=y_train, X_test=X_test, num_gfr=100, num_mcmc=0, mean_forest_params=forest_config_a)
+bart_model_b = BARTModel()
+forest_config_b = {"num_trees": 100, "num_features_subsample": 5}
+bart_model_b.sample(X_train=X_train, y_train=y_train, X_test=X_test, num_gfr=100, num_mcmc=0, mean_forest_params=forest_config_b)
+y_hat_test_a = np.squeeze(bart_model_a.y_hat_test).mean(axis = 1)
+rmse_no_subsampling = np.sqrt(np.mean(np.power(y_test - y_hat_test_a,2)))
+print(f"Test set RMSE, no subsampling (p = {p:d}): {rmse_no_subsampling:.2f}")
+y_hat_test_b = np.squeeze(bart_model_b.y_hat_test).mean(axis = 1)
+rmse_subsampling = np.sqrt(np.mean(np.power(y_test - y_hat_test_b,2)))
+print(f"Test set RMSE, subsampling 5 out of {p:d} features: {rmse_subsampling:.2f}")
diff --git a/tools/perf/bcf_gfr_feature_subsample_microbenchmark.R b/tools/perf/bcf_gfr_feature_subsample_microbenchmark.R
@@ -0,0 +1,89 @@
+# Load libraries
+library(stochtree)
+library(microbenchmark)
+
+# Generate the data
+n <- 1000
+p <- 100
+snr <- 2
+X <- matrix(rnorm(n*p), ncol = p)
+mu_x <- 1 + 2*X[,1] - 4*(X[,2] < 0) + 4*(X[,2] >= 0) + 3*(abs(X[,3]) - sqrt(2/pi))
+tau_x <- 1 + 2*X[,4]
+u <- runif(n)
+pi_x <- ((mu_x-1)/4) + 4*(u-0.5)
+Z <- pi_x + rnorm(n,0,1)
+E_XZ <- mu_x + Z*tau_x
+noise_sd <- sd(E_XZ) / snr
+y <- E_XZ + rnorm(n, 0, 1)*noise_sd
+
+# Split data into test and train sets
+test_set_pct <- 0.2
+n_test <- round(test_set_pct*n)
+n_train <- n - n_test
+test_inds <- sort(sample(1:n, n_test, replace = FALSE))
+train_inds <- (1:n)[!((1:n) %in% test_inds)]
+X_test <- X[test_inds,]
+X_train <- X[train_inds,]
+Z_test <- Z[test_inds]
+Z_train <- Z[train_inds]
+pi_x_test <- pi_x[test_inds]
+pi_x_train <- pi_x[train_inds]
+y_test <- y[test_inds]
+y_train <- y[train_inds]
+
+# Sampler settings
+num_gfr <- 100
+num_burnin <- 0
+num_mcmc <- 0
+general_params <- list(sample_sigma2_global = T)
+prog_params_a <- list(num_trees = 100, num_features_subsample = 5)
+trt_params_a <- list(num_trees = 100, num_features_subsample = 5)
+prog_params_b <- list(num_trees = 50)
+trt_params_b <- list(num_trees = 50)
+
+# Benchmark sampler with and without feature subsampling
+microbenchmark::microbenchmark(
+    stochtree::bcf(
+        X_train = X, Z_train = Z, propensity_train = pi_x, y_train = y, 
+        num_gfr = num_gfr, num_burnin = num_burnin, num_mcmc = num_mcmc, 
+        general_params = general_params, prognostic_forest_params = prog_params_a, 
+        treatment_effect_forest_params = trt_params_a
+    ), 
+    stochtree::bcf(
+        X_train = X, Z_train = Z, propensity_train = pi_x, y_train = y, 
+        num_gfr = num_gfr, num_burnin = num_burnin, num_mcmc = num_mcmc, 
+        general_params = general_params, prognostic_forest_params = prog_params_b, 
+        treatment_effect_forest_params = trt_params_b
+    ), 
+    times = 5
+)
+
+Rprof()
+model_subsampling <- stochtree::bcf(
+    X_train = X_train, Z_train = Z_train, propensity_train = pi_x_train, y_train = y_train, 
+    num_gfr = num_gfr, num_burnin = num_burnin, num_mcmc = num_mcmc, 
+    general_params = general_params, prognostic_forest_params = prog_params_a, 
+    treatment_effect_forest_params = trt_params_a
+)
+Rprof(NULL)
+summaryRprof()
+
+Rprof()
+model_no_subsampling <- stochtree::bcf(
+    X_train = X_train, Z_train = Z_train, propensity_train = pi_x_train, y_train = y_train, 
+    num_gfr = num_gfr, num_burnin = num_burnin, num_mcmc = num_mcmc, 
+    general_params = general_params, prognostic_forest_params = prog_params_b, 
+    treatment_effect_forest_params = trt_params_b
+)
+Rprof(NULL)
+summaryRprof()
+
+# Compare out of sample RMSE of the two models
+y_hat_test_subsampling <- rowMeans(predict(model_subsampling, X = X_test, Z = Z_test, propensity = pi_x_test)$y_hat)
+rmse_subsampling <- (
+    sqrt(mean((y_hat_test_subsampling - y_test)^2))
+)
+y_hat_test_no_subsampling <- rowMeans(predict(model_no_subsampling, X = X_test, Z = Z_test, propensity = pi_x_test)$y_hat)
+rmse_no_subsampling <- (
+    sqrt(mean((y_hat_test_no_subsampling - y_test)^2))
+)