VectorInstitute · emersodb · Jun 29, 2026 · Jun 29, 2026 · Jun 29, 2026 · Jun 29, 2026
diff --git a/.github/workflows/code_checks.yml b/.github/workflows/code_checks.yml
@@ -39,7 +39,7 @@ jobs:
           enable-cache: true
 
       - name: "Set up Python"
-        uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405
+        uses: actions/setup-python@ece7cb06caefa5fff74198d8649806c4678c61a1
         with:
           python-version-file: ".python-version"
 

diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
@@ -51,7 +51,7 @@ jobs:
           enable-cache: true
 
       - name: Set up Python
-        uses: actions/setup-python@v6.2.0
+        uses: actions/setup-python@v6.3.0
         with:
           python-version-file: ".python-version"
 

diff --git a/.github/workflows/integration_tests.yml b/.github/workflows/integration_tests.yml
@@ -51,7 +51,7 @@ jobs:
           enable-cache: true
 
       - name: "Set up Python"
-        uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405
+        uses: actions/setup-python@ece7cb06caefa5fff74198d8649806c4678c61a1
         with:
           python-version-file: ".python-version"
 

diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
@@ -26,7 +26,7 @@ jobs:
           enable-cache: true
 
       - name: "Set up Python"
-        uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405
+        uses: actions/setup-python@ece7cb06caefa5fff74198d8649806c4678c61a1
         with:
           python-version-file: ".python-version"
 

diff --git a/.github/workflows/unit_tests.yml b/.github/workflows/unit_tests.yml
@@ -51,7 +51,7 @@ jobs:
           enable-cache: true
 
       - name: "Set up Python"
-        uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405
+        uses: actions/setup-python@ece7cb06caefa5fff74198d8649806c4678c61a1
         with:
           python-version-file: ".python-version"
 

diff --git a/.gitignore b/.gitignore
@@ -51,6 +51,9 @@ tests/integration/attacks/ensemble/assets/workspace
 tests/integration/assets/tabsyn/processed_data
 tests/integration/assets/tabsyn/results
 
+# Emitted SynthEval analysis config file during metric creation. Unfortunately cannot be turned off...
+SE_analysis_config.json
+
 # Training Logs
 *.err
 *.out

diff --git a/pyproject.toml b/pyproject.toml
@@ -137,3 +137,6 @@ max-doc-length = 119
 markers = [
     "integration_test: marks tests as integration tests",
 ]
+env = [
+    "OMP_NUM_THREADS=1", # Forces single threading in tests to avoid segfaults due to nested spawning
+]
diff --git a/src/midst_toolkit/attacks/tartan_federer/data_utils.py b/src/midst_toolkit/attacks/tartan_federer/data_utils.py
@@ -139,8 +139,8 @@ def save_results_and_plot_roc_curve(
     plt.figure(figsize=(8, 6))
     plt.plot(fpr, tpr, color="darkorange", lw=2, label=f"ROC curve (AUC = {roc_auc:.4f})")
     plt.plot([0, 1], [0, 1], color="navy", lw=2, linestyle="--")
-    plt.xlim([0.0, 1.0])
-    plt.ylim([0.0, 1.05])
+    plt.xlim((0.0, 1.0))
+    plt.ylim((0.0, 1.05))
     plt.xlabel("False Positive Rate")
     plt.ylabel("True Positive Rate")
     plt.title("ROC Curve")

diff --git a/src/midst_toolkit/attacks/tartan_federer/tartan_federer_attack.py b/src/midst_toolkit/attacks/tartan_federer/tartan_federer_attack.py
@@ -127,9 +127,15 @@ def make_dataset_from_df_with_loaded(
         table_metadata,
         is_target_conditioned,
     )
-    numerical_features = {DataSplit.TRAIN.value: data[numerical_column_names].values.astype(np.float32)}
-    categorical_features = {DataSplit.TRAIN.value: data[categorical_column_names].to_numpy()}
-    targets = {DataSplit.TRAIN.value: data[[table_metadata.target_column_name]].values.astype(np.float32)}
+    numerical_features: dict[str, np.ndarray] = {
+        DataSplit.TRAIN.value: data[numerical_column_names].values.astype(np.float32)
+    }
+    categorical_features: dict[str, np.ndarray] = {
+        DataSplit.TRAIN.value: data[categorical_column_names].to_numpy(dtype=np.str_)
+    }
+    targets: dict[str, np.ndarray] = {
+        DataSplit.TRAIN.value: data[[table_metadata.target_column_name]].values.astype(np.float32)
+    }
 
     if len(categorical_column_names) > 0:
         all_categorical_features = categorical_features[DataSplit.TRAIN.value]

diff --git a/src/midst_toolkit/data_processing/utils.py b/src/midst_toolkit/data_processing/utils.py
@@ -18,6 +18,7 @@
 
 import numpy as np
 import pandas as pd
+from pandas.api.types import is_object_dtype, is_string_dtype
 from sklearn.preprocessing import MinMaxScaler, OrdinalEncoder
 
 from midst_toolkit.common.logger import log
@@ -191,9 +192,11 @@ def get_categorical_columns(dataframe: pd.DataFrame, threshold: int) -> list[str
     categorical_variables: list[str] = []
 
     for column_name in dataframe.columns:
-        # If dtype is an object (as str columns are), assume categorical
-        if dataframe[column_name].dtype == "object" or (
-            is_column_type_numerical(dataframe, column_name) and dataframe[column_name].nunique() <= threshold
+        # If dtype is an object or string type, assume categorical
+        if (
+            is_string_dtype(dataframe[column_name])
+            or is_object_dtype(dataframe[column_name])
+            or (is_column_type_numerical(dataframe, column_name) and dataframe[column_name].nunique() <= threshold)
         ):
             categorical_variables.append(column_name)
 

diff --git a/src/midst_toolkit/evaluation/metrics_base.py b/src/midst_toolkit/evaluation/metrics_base.py
@@ -29,6 +29,25 @@ def compute(self, real_data: pd.DataFrame, synthetic_data: pd.DataFrame) -> dict
         """
         raise NotImplementedError("Inheriting class must define compute")
 
+    def validate_dataframe_dtypes(self, dataframe: pd.DataFrame) -> None:
+        """
+        Validates that the dataframe does not contain string types. This is a requirement for many metrics in this
+        library, which require categorical columns to be preprocessed and encoded prior to computation.
+
+        Args:
+            dataframe: dataframe to validate.
+
+        Raises:
+            ValueError: If the dataframe contains string types.
+        """
+        any_string_dtypes = any(
+            (isinstance(dtype, pd.StringDtype) or dtype.name == "str") for dtype in dataframe.dtypes
+        )
+        if any_string_dtypes:
+            raise ValueError(
+                "Dataframe contains string types. Categorical columns must be preprocessed and encoded prior to computation."
+            )
+
 
 class SynthEvalMetric(MetricBase, ABC):
     def __init__(

diff --git a/src/midst_toolkit/evaluation/privacy/distance_closest_record.py b/src/midst_toolkit/evaluation/privacy/distance_closest_record.py
@@ -93,6 +93,12 @@ def compute(
                 self.meta_info, real_data, synthetic_data, holdout_data
             )
 
+        # Make sure the categorical columns are preprocessed and encoded before calling compute
+        self.validate_dataframe_dtypes(real_data)
+        self.validate_dataframe_dtypes(synthetic_data)
+        if holdout_data is not None:
+            self.validate_dataframe_dtypes(holdout_data)
+
         real_data_train_tensor = torch.tensor(real_data.to_numpy()).to(self.device)
         real_data_test_tensor = torch.tensor(holdout_data.to_numpy()).to(self.device)
         synthetic_data_tensor = torch.tensor(synthetic_data.to_numpy()).to(self.device)
@@ -192,6 +198,10 @@ def compute(self, real_data: pd.DataFrame, synthetic_data: pd.DataFrame) -> dict
         real_data_tensor = torch.tensor(real_data.to_numpy()).to(self.device)
         synthetic_data_tensor = torch.tensor(synthetic_data.to_numpy()).to(self.device)
 
+        # Make sure the categorical columns are preprocessed and encoded before calling compute
+        self.validate_dataframe_dtypes(real_data)
+        self.validate_dataframe_dtypes(synthetic_data)
+
         dcr_synthetic_to_real = []
         dcr_real_to_real = []
 

diff --git a/src/midst_toolkit/evaluation/privacy/epsilon_identifiability_risk.py b/src/midst_toolkit/evaluation/privacy/epsilon_identifiability_risk.py
@@ -125,6 +125,12 @@ def compute(
         else:
             raise ValueError(f"Unrecognized EpsilonIdentifiabilityNorm Option: {self.norm}")
 
+        # Make sure the categorical columns are preprocessed and encoded before calling compute
+        self.validate_dataframe_dtypes(filtered_real_data)
+        self.validate_dataframe_dtypes(filtered_synthetic_data)
+        if filtered_holdout_data is not None:
+            self.validate_dataframe_dtypes(filtered_holdout_data)
+
         self.syntheval_metric = EpsilonIdentifiability(
             real_data=filtered_real_data,
             synt_data=filtered_synthetic_data,
@@ -134,6 +140,7 @@ def compute(
             do_preprocessing=False,
             verbose=False,
             nn_dist=self.norm.value,
+            plot_figures=False,
         )
         result = self.syntheval_metric.evaluate()
         result["epsilon_identifiability_risk"] = result.pop("eps_risk")

diff --git a/src/midst_toolkit/evaluation/privacy/hitting_rate.py b/src/midst_toolkit/evaluation/privacy/hitting_rate.py
@@ -78,6 +78,10 @@ def compute(self, real_data: pd.DataFrame, synthetic_data: pd.DataFrame) -> dict
         filtered_real_data = real_data[self.all_columns]
         filtered_synthetic_data = synthetic_data[self.all_columns]
 
+        # Make sure the categorical columns are preprocessed and encoded before calling compute
+        self.validate_dataframe_dtypes(filtered_real_data)
+        self.validate_dataframe_dtypes(filtered_synthetic_data)
+
         self.syntheval_metric = SynthEvalHittingRate(
             real_data=filtered_real_data,
             synt_data=filtered_synthetic_data,
@@ -86,6 +90,7 @@ def compute(self, real_data: pd.DataFrame, synthetic_data: pd.DataFrame) -> dict
             num_cols=self.numerical_columns,
             do_preprocessing=False,
             verbose=False,
+            plot_figures=False,
         )
         result = self.syntheval_metric.evaluate(self.hitting_threshold)
         result["hitting_rate"] = result.pop("hit rate")

diff --git a/src/midst_toolkit/evaluation/privacy/nearest_neighbor_distance_ratio.py b/src/midst_toolkit/evaluation/privacy/nearest_neighbor_distance_ratio.py
@@ -114,6 +114,12 @@ def compute(
                     self.meta_info, real_data, synthetic_data, holdout_data
                 )
 
+        # Make sure the categorical columns are preprocessed and encoded before calling compute
+        self.validate_dataframe_dtypes(real_data)
+        self.validate_dataframe_dtypes(synthetic_data)
+        if holdout_data is not None:
+            self.validate_dataframe_dtypes(holdout_data)
+
         synthetic_data_tensor = torch.tensor(synthetic_data.to_numpy()).to(self.device)
         real_data_tensor = torch.tensor(real_data.to_numpy()).to(self.device)
         mean_nndr, nndr_standard_error = self._compute_mean_nearest_neighbor_distance_ratio(

diff --git a/src/midst_toolkit/evaluation/quality/confidence_interval_overlap.py b/src/midst_toolkit/evaluation/quality/confidence_interval_overlap.py
@@ -91,6 +91,7 @@ def compute(self, real_data: pd.DataFrame, synthetic_data: pd.DataFrame) -> dict
             num_cols=self.numerical_columns,
             do_preprocessing=False,
             verbose=False,
+            plot_figures=False,
         )
 
-        return self.syntheval_metric.evaluate(self.confidence_level.value)
+        return self.syntheval_metric.evaluate(ci="sem", confidence=self.confidence_level.value)
diff --git a/src/midst_toolkit/evaluation/quality/correlation_matrix_difference.py b/src/midst_toolkit/evaluation/quality/correlation_matrix_difference.py
@@ -72,6 +72,7 @@ def compute(self, real_data: pd.DataFrame, synthetic_data: pd.DataFrame) -> dict
             num_cols=self.numerical_columns,
             do_preprocessing=False,
             verbose=False,
+            plot_figures=False,
         )
 
         return self.syntheval_metric.evaluate(mixed_corr=self.compute_mixed_correlations, return_mats=False)
diff --git a/src/midst_toolkit/evaluation/quality/dimensionwise_mean_difference.py b/src/midst_toolkit/evaluation/quality/dimensionwise_mean_difference.py
@@ -1,5 +1,5 @@
 import pandas as pd
-from syntheval.metrics.utility.metric_dimensionwise_means import MetricClassName as SynthEvalDwm
+from syntheval.metrics.utility.metric_dimensionwise_means import DimensionWiseMeans
 
 from midst_toolkit.evaluation.metrics_base import SynthEvalMetric
 
@@ -27,13 +27,14 @@ def compute(self, real_data: pd.DataFrame, synthetic_data: pd.DataFrame) -> dict
         if self.do_preprocess:
             real_data, synthetic_data = self.preprocess(real_data, synthetic_data)
 
-        self.syntheval_metric = SynthEvalDwm(
+        self.syntheval_metric = DimensionWiseMeans(
             real_data=real_data,
             synt_data=synthetic_data,
             cat_cols=self.categorical_columns,
             num_cols=self.numerical_columns,
             do_preprocessing=False,
             verbose=False,
+            plot_figures=False,
         )
 
         result = self.syntheval_metric.evaluate()

diff --git a/src/midst_toolkit/evaluation/quality/kolmogorov_smirnov_total_variation.py b/src/midst_toolkit/evaluation/quality/kolmogorov_smirnov_total_variation.py
@@ -83,6 +83,7 @@ def compute(self, real_data: pd.DataFrame, synthetic_data: pd.DataFrame) -> dict
             num_cols=self.numerical_columns,
             do_preprocessing=False,
             verbose=False,
+            plot_figures=False,
         )
 
         return self.syntheval_metric.evaluate(sig_lvl=self.significance_level, n_perms=self.permutations)