From c2d1b5d662f0455d922d7d3041669e4c51e47399 Mon Sep 17 00:00:00 2001
From: Drew Herren <drewherrenopensource@gmail.com>
Date: Tue, 12 Aug 2025 10:15:33 -0500
Subject: [PATCH] Updated regression test suite

---
 .github/workflows/regression-test.yml         | 49 +++++++++++++++++--
 .../bart/individual_regression_test_bart.R    |  2 +-
 .../bcf/individual_regression_test_bcf.py     | 12 ++---
 3 files changed, 48 insertions(+), 15 deletions(-)

diff --git a/.github/workflows/regression-test.yml b/.github/workflows/regression-test.yml
index cdc00621..2e8c3391 100644
--- a/.github/workflows/regression-test.yml
+++ b/.github/workflows/regression-test.yml
@@ -31,29 +31,68 @@ jobs:
         with:
           extra-packages: any::testthat, any::decor, local::stochtree_cran
       
-      - name: Create output directory for BART regression test results
+      - name: Create output directory for R regression test results
         run: |
           mkdir -p tools/regression/bart/stochtree_bart_r_results
           mkdir -p tools/regression/bcf/stochtree_bcf_r_results
 
-      - name: Run the BART regression test benchmark suite
+      - name: Run the R regression test benchmark suite
         run: |
           Rscript tools/regression/bart/regression_test_dispatch_bart.R
           Rscript tools/regression/bcf/regression_test_dispatch_bcf.R
 
-      - name: Collate and analyze regression test results
+      - name: Collate and analyze R regression test results
         run: |
           Rscript tools/regression/bart/regression_test_analysis_bart.R
           Rscript tools/regression/bcf/regression_test_analysis_bcf.R
+      
+      - name: Setup Python 3.10
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.10"
+          cache: "pip"
+    
+      - name: Install Package with Relevant Dependencies
+        run: |
+          pip install --upgrade pip
+          pip install -r requirements.txt
+          pip install .
+
+      - name: Create output directory for python regression test results
+        run: |
+          mkdir -p tools/regression/bart/stochtree_bart_python_results
+          mkdir -p tools/regression/bcf/stochtree_bcf_python_results
+
+      - name: Run the python regression test benchmark suite
+        run: |
+          python tools/regression/bart/regression_test_dispatch_bart.py
+          python tools/regression/bcf/regression_test_dispatch_bcf.py
 
-      - name: Store BART benchmark test results as an artifact of the run
+      - name: Collate and analyze python regression test results
+        run: |
+          python tools/regression/bart/regression_test_analysis_bart.py
+          python tools/regression/bcf/regression_test_analysis_bcf.py
+
+      - name: Store R BART benchmark test results as an artifact of the run
         uses: actions/upload-artifact@v4
         with:
           name: stochtree-r-bart-summary
           path: tools/regression/bart/stochtree_bart_r_results/stochtree_bart_r_summary.csv
 
-      - name: Store BCF benchmark test results as an artifact of the run
+      - name: Store R BCF benchmark test results as an artifact of the run
         uses: actions/upload-artifact@v4
         with:
           name: stochtree-r-bcf-summary
           path: tools/regression/bcf/stochtree_bcf_r_results/stochtree_bcf_r_summary.csv
+
+      - name: Store python BART benchmark test results as an artifact of the run
+        uses: actions/upload-artifact@v4
+        with:
+          name: stochtree-python-bart-summary
+          path: tools/regression/bart/stochtree_bart_python_results/stochtree_bart_python_summary.csv
+
+      - name: Store python BCF benchmark test results as an artifact of the run
+        uses: actions/upload-artifact@v4
+        with:
+          name: stochtree-python-bcf-summary
+          path: tools/regression/bcf/stochtree_bcf_python_results/stochtree_bcf_python_summary.csv
diff --git a/tools/regression/bart/individual_regression_test_bart.R b/tools/regression/bart/individual_regression_test_bart.R
index 3f71e8cc..d43ef0e8 100644
--- a/tools/regression/bart/individual_regression_test_bart.R
+++ b/tools/regression/bart/individual_regression_test_bart.R
@@ -231,5 +231,5 @@ filename <- paste(
     "dgp_num", dgp_num, "snr", snr_rounded, "test_set_pct", test_set_pct_rounded, 
     "num_threads", num_threads_clean, sep = "_"
 )
-filename_full <- paste0("tools/regression/stochtree_bart_r_results/", filename, ".csv")
+filename_full <- paste0("tools/regression/bart/stochtree_bart_r_results/", filename, ".csv")
 write.csv(x = results_df, file = filename_full, row.names = F)
diff --git a/tools/regression/bcf/individual_regression_test_bcf.py b/tools/regression/bcf/individual_regression_test_bcf.py
index 7a1ce9a8..591b24d2 100644
--- a/tools/regression/bcf/individual_regression_test_bcf.py
+++ b/tools/regression/bcf/individual_regression_test_bcf.py
@@ -364,12 +364,8 @@ def main():
         
         y_hat_posterior_mean = np.mean(y_hat_posterior, axis=1)
         if has_multivariate_treatment:
-            # For multivariate treatment, tau_hat_posterior has shape (n_test, n_samples, n_treatments)
-            # We want to average over the samples (axis 1) to get (n_test, n_treatments)
-            tau_hat_posterior_mean = np.mean(tau_hat_posterior, axis=1)
+            tau_hat_posterior_mean = np.mean(tau_hat_posterior, axis=2)
         else:
-            # For univariate treatment, tau_hat_posterior has shape (n_test, n_samples)
-            # We want to average over the samples (axis 1) to get (n_test,)
             tau_hat_posterior_mean = np.mean(tau_hat_posterior, axis=1)
         
         # Outcome RMSE and coverage
@@ -387,15 +383,13 @@ def main():
         tau_hat_rmse_test = np.sqrt(np.mean((tau_hat_posterior_mean - treatment_effect_test) ** 2))
         
         if has_multivariate_treatment:
-            # For multivariate treatment, compute percentiles over samples (axis 1)
-            tau_hat_posterior_quantile_025 = np.percentile(tau_hat_posterior, 2.5, axis=1)
-            tau_hat_posterior_quantile_975 = np.percentile(tau_hat_posterior, 97.5, axis=1)
+            tau_hat_posterior_quantile_025 = np.percentile(tau_hat_posterior, 2.5, axis=2)
+            tau_hat_posterior_quantile_975 = np.percentile(tau_hat_posterior, 97.5, axis=2)
             tau_hat_covered = np.logical_and(
                 treatment_effect_test >= tau_hat_posterior_quantile_025,
                 treatment_effect_test <= tau_hat_posterior_quantile_975
             )
         else:
-            # For univariate treatment, compute percentiles over samples (axis 1)
             tau_hat_posterior_quantile_025 = np.percentile(tau_hat_posterior, 2.5, axis=1)
             tau_hat_posterior_quantile_975 = np.percentile(tau_hat_posterior, 97.5, axis=1)
             tau_hat_covered = np.logical_and(