From 1a49c7e36a8593de431ecccefacb7d5bb63fa26d Mon Sep 17 00:00:00 2001
From: Max Ghenis <mghenis@gmail.com>
Date: Fri, 6 Mar 2026 07:52:53 -0500
Subject: [PATCH] Switch from black to ruff format

Replace black with ruff as the code formatter across pyproject.toml,
Makefile, and CI workflows. Reformat all files with ruff defaults.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .github/bump_version.py                       |   4 +-
 .github/workflows/reusable_lint.yaml          |   6 +-
 Makefile                                      |   2 +-
 changelog.d/switch-to-ruff.changed.md         |   1 +
 docs/calibration_matrix.ipynb                 | 101 +++++--
 docs/hierarchical_uprating.ipynb              |  51 +---
 docs/local_area_calibration_setup.ipynb       |  56 +---
 modal_app/data_build.py                       |  21 +-
 modal_app/local_area.py                       |  36 +--
 modal_app/remote_calibration_runner.py        |   4 +-
 paper/scripts/build_from_content.py           |  36 +--
 .../calculate_distributional_metrics.py       |   2 +-
 paper/scripts/calculate_target_performance.py |   3 +-
 paper/scripts/generate_all_tables.py          |   8 +-
 paper/scripts/generate_validation_metrics.py  |   4 +-
 paper/scripts/markdown_to_latex.py            |  16 +-
 .../calibration/clone_and_assign.py           |   3 +-
 .../calibration/puf_impute.py                 |  54 +---
 .../calibration/source_impute.py              |  47 ++--
 .../calibration/unified_calibration.py        |  34 +--
 .../calibration/unified_matrix_builder.py     |  64 ++---
 policyengine_us_data/datasets/acs/acs.py      |  12 +-
 .../datasets/acs/census_acs.py                |  22 +-
 .../datasets/cps/census_cps.py                |  32 +--
 policyengine_us_data/datasets/cps/cps.py      | 247 +++++++-----------
 .../datasets/cps/enhanced_cps.py              |  32 +--
 .../block_assignment.py                       |  27 +-
 .../calibration_utils.py                      |  29 +-
 .../county_assignment.py                      |   4 +-
 .../create_stratified_cps.py                  |  23 +-
 .../publish_local_area.py                     |  54 ++--
 .../stacked_dataset_builder.py                | 121 +++------
 .../check_calibrated_estimates_interactive.py |  68 +++--
 .../cps/long_term/extract_ssa_costs.py        |   4 +-
 .../cps/long_term/projection_utils.py         |  16 +-
 .../cps/long_term/run_household_projection.py | 122 ++++-----
 .../datasets/cps/small_enhanced_cps.py        |  28 +-
 policyengine_us_data/datasets/puf/irs_puf.py  |   4 +-
 policyengine_us_data/datasets/puf/puf.py      |  43 ++-
 policyengine_us_data/datasets/scf/fed_scf.py  |  16 +-
 policyengine_us_data/datasets/scf/scf.py      |  36 +--
 policyengine_us_data/datasets/sipp/sipp.py    |   3 +-
 .../db/create_database_tables.py              |  39 +--
 .../db/create_initial_strata.py               |  16 +-
 policyengine_us_data/db/etl_age.py            |   8 +-
 policyengine_us_data/db/etl_irs_soi.py        |  79 ++----
 policyengine_us_data/db/etl_medicaid.py       |  12 +-
 .../db/etl_national_targets.py                |  52 +---
 policyengine_us_data/db/etl_pregnancy.py      |  24 +-
 policyengine_us_data/db/etl_snap.py           |   8 +-
 .../db/etl_state_income_tax.py                |  10 +-
 policyengine_us_data/db/validate_database.py  |   4 +-
 policyengine_us_data/db/validate_hierarchy.py |  58 ++--
 policyengine_us_data/geography/__init__.py    |   4 +-
 policyengine_us_data/geography/county_fips.py |   8 +-
 .../geography/create_zip_code_dataset.py      |   4 +-
 policyengine_us_data/parameters/__init__.py   |   4 +-
 .../calibration_targets/audit_county_enum.py  |   4 +-
 .../make_block_cd_distributions.py            |   8 +-
 .../make_block_crosswalk.py                   |  16 +-
 .../make_county_cd_distributions.py           |  16 +-
 .../make_district_mapping.py                  |   8 +-
 .../pull_hardcoded_targets.py                 |   8 +-
 .../calibration_targets/pull_snap_targets.py  |   8 +-
 .../calibration_targets/pull_soi_targets.py   |  87 ++----
 .../storage/upload_completed_datasets.py      |   7 +-
 .../test_build_matrix_masking.py              |  20 +-
 .../test_calibration/test_clone_and_assign.py |  13 +-
 .../tests/test_calibration/test_puf_impute.py |   8 +-
 .../test_retirement_imputation.py             | 127 +++------
 .../test_calibration/test_source_impute.py    |   4 +-
 .../test_unified_matrix_builder.py            |  23 +-
 .../tests/test_constraint_validation.py       |  12 +-
 policyengine_us_data/tests/test_database.py   |   2 +-
 .../tests/test_database_build.py              |  21 +-
 .../tests/test_datasets/test_county_fips.py   |   9 +-
 .../tests/test_datasets/test_cps.py           |  17 +-
 .../test_datasets/test_dataset_sanity.py      |  46 ++--
 .../tests/test_datasets/test_enhanced_cps.py  |  64 ++---
 .../tests/test_datasets/test_sipp_assets.py   |  24 +-
 .../test_datasets/test_small_enhanced_cps.py  |  10 +-
 .../test_datasets/test_sparse_enhanced_cps.py |  40 ++-
 .../create_test_fixture.py                    |  32 +--
 .../test_county_assignment.py                 |   8 +-
 .../test_stacked_dataset_builder.py           |  52 ++--
 policyengine_us_data/tests/test_puf_impute.py |   4 +-
 .../tests/test_schema_views_and_lookups.py    |  16 +-
 .../tests/test_stochastic_variables.py        |   4 -
 policyengine_us_data/utils/census.py          |   4 +-
 .../utils/constraint_validation.py            |  22 +-
 policyengine_us_data/utils/data_upload.py     |  16 +-
 policyengine_us_data/utils/db.py              |  25 +-
 policyengine_us_data/utils/huggingface.py     |   4 +-
 policyengine_us_data/utils/l0.py              |   4 +-
 policyengine_us_data/utils/loss.py            | 155 ++++-------
 policyengine_us_data/utils/randomness.py      |   4 +-
 policyengine_us_data/utils/soi.py             |  27 +-
 policyengine_us_data/utils/spm.py             |   4 +-
 policyengine_us_data/utils/uprating.py        |   4 +-
 pyproject.toml                                |  20 +-
 scripts/generate_test_data.py                 |  42 +--
 scripts/migrate_versioned_to_production.py    |   4 +-
 tests/test_h6_reform.py                       |  18 +-
 tests/test_no_formula_variables_stored.py     |  26 +-
 tests/test_reproducibility.py                 |   6 +-
 tests/test_weeks_unemployed.py                |   6 +-
 uv.lock                                       |  85 ++----
 validation/benefit_validation.py              |  20 +-
 validation/generate_qrf_statistics.py         |  38 +--
 validation/qrf_diagnostics.py                 |  42 +--
 validation/run_qrf_diagnostics.py             |   6 +-
 validation/tax_policy_validation.py           |   8 +-
 validation/validate_retirement_imputation.py  |  24 +-
 113 files changed, 1024 insertions(+), 2134 deletions(-)
 create mode 100644 changelog.d/switch-to-ruff.changed.md

diff --git a/.github/bump_version.py b/.github/bump_version.py
index bb0fd6dd3..779a82e38 100644
--- a/.github/bump_version.py
+++ b/.github/bump_version.py
@@ -19,9 +19,7 @@ def get_current_version(pyproject_path: Path) -> str:
 
 def infer_bump(changelog_dir: Path) -> str:
     fragments = [
-        f
-        for f in changelog_dir.iterdir()
-        if f.is_file() and f.name != ".gitkeep"
+        f for f in changelog_dir.iterdir() if f.is_file() and f.name != ".gitkeep"
     ]
     if not fragments:
         print("No changelog fragments found", file=sys.stderr)
diff --git a/.github/workflows/reusable_lint.yaml b/.github/workflows/reusable_lint.yaml
index f5fa02cf7..862e90a8a 100644
--- a/.github/workflows/reusable_lint.yaml
+++ b/.github/workflows/reusable_lint.yaml
@@ -8,7 +8,7 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v4
+      - name: Install ruff
+        run: pip install ruff>=0.9.0
       - name: Check formatting
-        uses: "lgeiger/black-action@master"
-        with:
-          args: ". -l 79 --check"
\ No newline at end of file
+        run: ruff format --check .
diff --git a/Makefile b/Makefile
index b34b8eb60..ce38e165e 100644
--- a/Makefile
+++ b/Makefile
@@ -5,7 +5,7 @@ HF_CLONE_DIR ?= $(HOME)/huggingface/policyengine-us-data
 all: data test
 
 format:
-	black . -l 79
+	ruff format .
 
 test:
 	pytest
diff --git a/changelog.d/switch-to-ruff.changed.md b/changelog.d/switch-to-ruff.changed.md
new file mode 100644
index 000000000..a514e08ff
--- /dev/null
+++ b/changelog.d/switch-to-ruff.changed.md
@@ -0,0 +1 @@
+Switch from black to ruff format.
diff --git a/docs/calibration_matrix.ipynb b/docs/calibration_matrix.ipynb
index 41497b1e8..44b5246b0 100644
--- a/docs/calibration_matrix.ipynb
+++ b/docs/calibration_matrix.ipynb
@@ -27,7 +27,28 @@
    "execution_count": null,
    "metadata": {},
    "outputs": [],
-   "source": "import numpy as np\nimport pandas as pd\nfrom policyengine_us import Microsimulation\nfrom policyengine_us_data.storage import STORAGE_FOLDER\nfrom policyengine_us_data.calibration.unified_matrix_builder import (\n    UnifiedMatrixBuilder,\n)\nfrom policyengine_us_data.calibration.clone_and_assign import (\n    assign_random_geography,\n)\nfrom policyengine_us_data.datasets.cps.local_area_calibration.calibration_utils import (\n    create_target_groups,\n    drop_target_groups,\n    get_geo_level,\n    STATE_CODES,\n)\n\ndb_path = STORAGE_FOLDER / \"calibration\" / \"policy_data.db\"\ndb_uri = f\"sqlite:///{db_path}\"\ndataset_path = STORAGE_FOLDER / \"stratified_extended_cps_2024.h5\""
+   "source": [
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "from policyengine_us import Microsimulation\n",
+    "from policyengine_us_data.storage import STORAGE_FOLDER\n",
+    "from policyengine_us_data.calibration.unified_matrix_builder import (\n",
+    "    UnifiedMatrixBuilder,\n",
+    ")\n",
+    "from policyengine_us_data.calibration.clone_and_assign import (\n",
+    "    assign_random_geography,\n",
+    ")\n",
+    "from policyengine_us_data.datasets.cps.local_area_calibration.calibration_utils import (\n",
+    "    create_target_groups,\n",
+    "    drop_target_groups,\n",
+    "    get_geo_level,\n",
+    "    STATE_CODES,\n",
+    ")\n",
+    "\n",
+    "db_path = STORAGE_FOLDER / \"calibration\" / \"policy_data.db\"\n",
+    "db_uri = f\"sqlite:///{db_path}\"\n",
+    "dataset_path = STORAGE_FOLDER / \"stratified_extended_cps_2024.h5\""
+   ]
   },
   {
    "cell_type": "code",
@@ -82,7 +103,19 @@
    "execution_count": null,
    "metadata": {},
    "outputs": [],
-   "source": "print(f\"Targets: {X_sparse.shape[0]}\")\nprint(f\"Columns: {X_sparse.shape[1]:,} ({N_CLONES} clones x {n_records:,} records)\")\nprint(f\"Non-zeros: {X_sparse.nnz:,}\")\nprint(f\"Density: {X_sparse.nnz / (X_sparse.shape[0] * X_sparse.shape[1]):.6f}\")\n\ngeo_levels = targets_df[\"geographic_id\"].apply(get_geo_level)\nlevel_names = {0: \"National\", 1: \"State\", 2: \"District\"}\nfor level in [0, 1, 2]:\n    n = (geo_levels == level).sum()\n    if n > 0:\n        print(f\"  {level_names[level]}: {n} targets\")"
+   "source": [
+    "print(f\"Targets: {X_sparse.shape[0]}\")\n",
+    "print(f\"Columns: {X_sparse.shape[1]:,} ({N_CLONES} clones x {n_records:,} records)\")\n",
+    "print(f\"Non-zeros: {X_sparse.nnz:,}\")\n",
+    "print(f\"Density: {X_sparse.nnz / (X_sparse.shape[0] * X_sparse.shape[1]):.6f}\")\n",
+    "\n",
+    "geo_levels = targets_df[\"geographic_id\"].apply(get_geo_level)\n",
+    "level_names = {0: \"National\", 1: \"State\", 2: \"District\"}\n",
+    "for level in [0, 1, 2]:\n",
+    "    n = (geo_levels == level).sum()\n",
+    "    if n > 0:\n",
+    "        print(f\"  {level_names[level]}: {n} targets\")"
+   ]
   },
   {
    "cell_type": "markdown",
@@ -294,14 +327,16 @@
     "for gid, info in enumerate(group_info):\n",
     "    mask = target_groups == gid\n",
     "    vals = targets_df.loc[mask, \"value\"]\n",
-    "    records.append({\n",
-    "        \"group_id\": gid,\n",
-    "        \"description\": info,\n",
-    "        \"n_targets\": mask.sum(),\n",
-    "        \"min_value\": vals.min(),\n",
-    "        \"median_value\": vals.median(),\n",
-    "        \"max_value\": vals.max(),\n",
-    "    })\n",
+    "    records.append(\n",
+    "        {\n",
+    "            \"group_id\": gid,\n",
+    "            \"description\": info,\n",
+    "            \"n_targets\": mask.sum(),\n",
+    "            \"min_value\": vals.min(),\n",
+    "            \"median_value\": vals.median(),\n",
+    "            \"max_value\": vals.max(),\n",
+    "        }\n",
+    "    )\n",
     "\n",
     "group_df = pd.DataFrame(records)\n",
     "print(group_df.to_string(index=False))"
@@ -431,8 +466,7 @@
     "    for r in nz_rows[:5]:\n",
     "        row = targets_df.iloc[r]\n",
     "        print(\n",
-    "            f\"  {row['variable']} (geo={row['geographic_id']}): \"\n",
-    "            f\"{X_sparse[r, col]:.2f}\"\n",
+    "            f\"  {row['variable']} (geo={row['geographic_id']}): {X_sparse[r, col]:.2f}\"\n",
     "        )\n",
     "    if len(nz_rows) > 5:\n",
     "        print(f\"  ... and {len(nz_rows) - 5} more\")"
@@ -475,7 +509,28 @@
    "execution_count": null,
    "metadata": {},
    "outputs": [],
-   "source": "nnz_per_row = np.diff(X_sparse.indptr)\nprint(f\"Non-zeros per row:\")\nprint(f\"  min:    {nnz_per_row.min():,}\")\nprint(f\"  median: {int(np.median(nnz_per_row)):,}\")\nprint(f\"  mean:   {nnz_per_row.mean():,.0f}\")\nprint(f\"  max:    {nnz_per_row.max():,}\")\n\ngeo_levels = targets_df[\"geographic_id\"].apply(get_geo_level)\nlevel_names = {0: \"National\", 1: \"State\", 2: \"District\"}\nprint(\"\\nBy geographic level:\")\nfor level in [0, 1, 2]:\n    mask = (geo_levels == level).values\n    if mask.any():\n        vals = nnz_per_row[mask]\n        print(\n            f\"  {level_names[level]:10s}: \"\n            f\"n={mask.sum():>4d}, \"\n            f\"median nnz={int(np.median(vals)):>7,}, \"\n            f\"range=[{vals.min():,}, {vals.max():,}]\"\n        )"
+   "source": [
+    "nnz_per_row = np.diff(X_sparse.indptr)\n",
+    "print(f\"Non-zeros per row:\")\n",
+    "print(f\"  min:    {nnz_per_row.min():,}\")\n",
+    "print(f\"  median: {int(np.median(nnz_per_row)):,}\")\n",
+    "print(f\"  mean:   {nnz_per_row.mean():,.0f}\")\n",
+    "print(f\"  max:    {nnz_per_row.max():,}\")\n",
+    "\n",
+    "geo_levels = targets_df[\"geographic_id\"].apply(get_geo_level)\n",
+    "level_names = {0: \"National\", 1: \"State\", 2: \"District\"}\n",
+    "print(\"\\nBy geographic level:\")\n",
+    "for level in [0, 1, 2]:\n",
+    "    mask = (geo_levels == level).values\n",
+    "    if mask.any():\n",
+    "        vals = nnz_per_row[mask]\n",
+    "        print(\n",
+    "            f\"  {level_names[level]:10s}: \"\n",
+    "            f\"n={mask.sum():>4d}, \"\n",
+    "            f\"median nnz={int(np.median(vals)):>7,}, \"\n",
+    "            f\"range=[{vals.min():,}, {vals.max():,}]\"\n",
+    "        )"
+   ]
   },
   {
    "cell_type": "code",
@@ -498,12 +553,16 @@
     "clone_nnz = []\n",
     "for ci in range(N_CLONES):\n",
     "    block = X_sparse[:, ci * n_records : (ci + 1) * n_records]\n",
-    "    n_states = len(np.unique(geography.state_fips[ci * n_records : (ci + 1) * n_records]))\n",
-    "    clone_nnz.append({\n",
-    "        \"clone\": ci,\n",
-    "        \"nnz\": block.nnz,\n",
-    "        \"unique_states\": n_states,\n",
-    "    })\n",
+    "    n_states = len(\n",
+    "        np.unique(geography.state_fips[ci * n_records : (ci + 1) * n_records])\n",
+    "    )\n",
+    "    clone_nnz.append(\n",
+    "        {\n",
+    "            \"clone\": ci,\n",
+    "            \"nnz\": block.nnz,\n",
+    "            \"unique_states\": n_states,\n",
+    "        }\n",
+    "    )\n",
     "\n",
     "clone_df = pd.DataFrame(clone_nnz)\n",
     "print(\"Non-zeros per clone block:\")\n",
@@ -666,7 +725,9 @@
     }
    ],
    "source": [
-    "ratios = row_sums[achievable_mask] / targets_filtered.loc[achievable_mask, \"value\"].values\n",
+    "ratios = (\n",
+    "    row_sums[achievable_mask] / targets_filtered.loc[achievable_mask, \"value\"].values\n",
+    ")\n",
     "ratio_df = targets_filtered[achievable_mask].copy()\n",
     "ratio_df[\"row_sum\"] = row_sums[achievable_mask]\n",
     "ratio_df[\"ratio\"] = ratios\n",
diff --git a/docs/hierarchical_uprating.ipynb b/docs/hierarchical_uprating.ipynb
index 4da30d82c..be0b43a84 100644
--- a/docs/hierarchical_uprating.ipynb
+++ b/docs/hierarchical_uprating.ipynb
@@ -264,8 +264,7 @@
    ],
    "source": [
     "snap_hh = raw[\n",
-    "    (raw[\"domain_variable\"] == \"snap\")\n",
-    "    & (raw[\"variable\"] == \"household_count\")\n",
+    "    (raw[\"domain_variable\"] == \"snap\") & (raw[\"variable\"] == \"household_count\")\n",
     "]\n",
     "for level in [\"state\", \"district\"]:\n",
     "    total = snap_hh[snap_hh[\"geo_level\"] == level][\"value\"].sum()\n",
@@ -333,9 +332,9 @@
    "source": [
     "raw[\"original_value\"] = raw[\"value\"].copy()\n",
     "raw[\"uprating_factor\"] = raw.apply(\n",
-    "    lambda r: builder._get_uprating_info(\n",
-    "        r[\"variable\"], r[\"period\"], uprating_factors\n",
-    "    )[0],\n",
+    "    lambda r: builder._get_uprating_info(r[\"variable\"], r[\"period\"], uprating_factors)[\n",
+    "        0\n",
+    "    ],\n",
     "    axis=1,\n",
     ")\n",
     "raw[\"value\"] = raw[\"original_value\"] * raw[\"uprating_factor\"]"
@@ -376,10 +375,7 @@
     "sample_states = {6: \"CA\", 48: \"TX\", 36: \"NY\"}\n",
     "\n",
     "for fips, abbr in sample_states.items():\n",
-    "    rows = raw[\n",
-    "        (raw[\"geo_level\"] == \"state\")\n",
-    "        & (raw[\"geographic_id\"] == str(fips))\n",
-    "    ]\n",
+    "    rows = raw[(raw[\"geo_level\"] == \"state\") & (raw[\"geographic_id\"] == str(fips))]\n",
     "    for _, r in rows.iterrows():\n",
     "        print(\n",
     "            f\"  {abbr} [{r['domain_variable']:8s}] \"\n",
@@ -412,9 +408,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "result = builder._apply_hierarchical_uprating(\n",
-    "    raw, DOMAINS, uprating_factors\n",
-    ")"
+    "result = builder._apply_hierarchical_uprating(raw, DOMAINS, uprating_factors)"
    ]
   },
   {
@@ -454,11 +448,7 @@
     "    for fips, abbr in sample_states.items():\n",
     "        cd_state = cd_domain[\n",
     "            cd_domain[\"geographic_id\"].apply(\n",
-    "                lambda g, s=fips: (\n",
-    "                    int(g) // 100 == s\n",
-    "                    if g not in (\"US\",)\n",
-    "                    else False\n",
-    "                )\n",
+    "                lambda g, s=fips: int(g) // 100 == s if g not in (\"US\",) else False\n",
     "            )\n",
     "        ]\n",
     "        if cd_state.empty:\n",
@@ -474,11 +464,7 @@
     "                & (raw[\"variable\"] == var)\n",
     "                & (raw[\"domain_variable\"] == domain)\n",
     "            ]\n",
-    "            uprated_state = (\n",
-    "                st_row[\"value\"].iloc[0]\n",
-    "                if len(st_row)\n",
-    "                else np.nan\n",
-    "            )\n",
+    "            uprated_state = st_row[\"value\"].iloc[0] if len(st_row) else np.nan\n",
     "            print(\n",
     "                f\"  {abbr} {var:20s}  \"\n",
     "                f\"hif={hif:.6f}  \"\n",
@@ -487,6 +473,7 @@
     "                f\"uprated_state={uprated_state:>14,.0f}\"\n",
     "            )\n",
     "\n",
+    "\n",
     "show_reconciliation(result, raw, \"aca_ptc\", sample_states)"
    ]
   },
@@ -527,9 +514,7 @@
     "]\n",
     "\n",
     "state_ufs = (\n",
-    "    aca_cds.assign(state_fips=aca_cds[\"geographic_id\"].apply(\n",
-    "        lambda g: int(g) // 100\n",
-    "    ))\n",
+    "    aca_cds.assign(state_fips=aca_cds[\"geographic_id\"].apply(lambda g: int(g) // 100))\n",
     "    .groupby(\"state_fips\")[\"state_uprating_factor\"]\n",
     "    .first()\n",
     "    .sort_values()\n",
@@ -537,7 +522,7 @@
     "\n",
     "print(\"ACA PTC uprating factors (aca_ptc = vol_mult * val_mult):\")\n",
     "print(f\"  {'State FIPS':>12s}  {'Factor':>8s}\")\n",
-    "print(f\"  {'─'*12}  {'─'*8}\")\n",
+    "print(f\"  {'─' * 12}  {'─' * 8}\")\n",
     "for fips in list(state_ufs.index[:5]) + [\"...\"] + list(state_ufs.index[-5:]):\n",
     "    if fips == \"...\":\n",
     "        print(f\"  {'...':>12s}\")\n",
@@ -676,9 +661,7 @@
    ],
    "source": [
     "level_counts = (\n",
-    "    result.groupby([\"domain_variable\", \"geo_level\"])\n",
-    "    .size()\n",
-    "    .reset_index(name=\"count\")\n",
+    "    result.groupby([\"domain_variable\", \"geo_level\"]).size().reset_index(name=\"count\")\n",
     ")\n",
     "level_counts"
    ]
@@ -749,20 +732,14 @@
     "checks = 0\n",
     "for domain in DOMAINS:\n",
     "    domain_result = result[result[\"domain_variable\"] == domain]\n",
-    "    cd_result = domain_result[\n",
-    "        domain_result[\"geo_level\"] == \"district\"\n",
-    "    ]\n",
+    "    cd_result = domain_result[domain_result[\"geo_level\"] == \"district\"]\n",
     "    if cd_result.empty:\n",
     "        continue\n",
     "\n",
     "    for fips, abbr in sorted(STATE_CODES.items()):\n",
     "        cd_rows = cd_result[\n",
     "            cd_result[\"geographic_id\"].apply(\n",
-    "                lambda g, s=fips: (\n",
-    "                    int(g) // 100 == s\n",
-    "                    if g not in (\"US\",)\n",
-    "                    else False\n",
-    "                )\n",
+    "                lambda g, s=fips: int(g) // 100 == s if g not in (\"US\",) else False\n",
     "            )\n",
     "        ]\n",
     "        if cd_rows.empty:\n",
diff --git a/docs/local_area_calibration_setup.ipynb b/docs/local_area_calibration_setup.ipynb
index 2e8614aa9..21e38750e 100644
--- a/docs/local_area_calibration_setup.ipynb
+++ b/docs/local_area_calibration_setup.ipynb
@@ -241,9 +241,7 @@
     }
    ],
    "source": [
-    "print(\n",
-    "    f\"Example household (record_idx={record_idx}) across {N_CLONES} clones:\\n\"\n",
-    ")\n",
+    "print(f\"Example household (record_idx={record_idx}) across {N_CLONES} clones:\\n\")\n",
     "rows = []\n",
     "for c in range(N_CLONES):\n",
     "    col = c * n_records + record_idx\n",
@@ -351,14 +349,8 @@
     "new_state = clone_states[record_idx]\n",
     "\n",
     "print(f\"Example household (record_idx={record_idx}):\")\n",
-    "print(\n",
-    "    f\"  Original state: {STATE_CODES.get(int(orig_state), '??')} \"\n",
-    "    f\"({int(orig_state)})\"\n",
-    ")\n",
-    "print(\n",
-    "    f\"  Clone 0 state:  {STATE_CODES.get(int(new_state), '??')} \"\n",
-    "    f\"({int(new_state)})\"\n",
-    ")\n",
+    "print(f\"  Original state: {STATE_CODES.get(int(orig_state), '??')} ({int(orig_state)})\")\n",
+    "print(f\"  Clone 0 state:  {STATE_CODES.get(int(new_state), '??')} ({int(new_state)})\")\n",
     "print(f\"  Original SNAP:  ${snap_values[record_idx]:,.2f}\")\n",
     "print(f\"  Clone 0 SNAP:   ${new_snap[record_idx]:,.2f}\")"
    ]
@@ -451,9 +443,7 @@
     "    s.set_input(\n",
     "        \"state_fips\",\n",
     "        2024,\n",
-    "        geography.state_fips[c * n_records : (c + 1) * n_records].astype(\n",
-    "            np.int32\n",
-    "        ),\n",
+    "        geography.state_fips[c * n_records : (c + 1) * n_records].astype(np.int32),\n",
     "    )\n",
     "    for var in get_calculated_variables(s):\n",
     "        s.delete_arrays(var)\n",
@@ -576,9 +566,7 @@
     "        f\"{col in cd_to_cols.get(cd, [])}\"\n",
     "    )\n",
     "    # Check an unrelated state\n",
-    "    print(\n",
-    "        f\"  Visible to NC (37) targets: \" f\"{col in state_to_cols.get(37, [])}\"\n",
-    "    )\n",
+    "    print(f\"  Visible to NC (37) targets: {col in state_to_cols.get(37, [])}\")\n",
     "    print()"
    ]
   },
@@ -634,14 +622,9 @@
     "    else:\n",
     "        rate = load_take_up_rate(rate_key, 2024)\n",
     "    rate_str = (\n",
-    "        f\"{rate:.2%}\"\n",
-    "        if isinstance(rate, float)\n",
-    "        else f\"dict ({len(rate)} entries)\"\n",
+    "        f\"{rate:.2%}\" if isinstance(rate, float) else f\"dict ({len(rate)} entries)\"\n",
     "    )\n",
-    "    print(\n",
-    "        f\"  {spec['variable']:40s} \"\n",
-    "        f\"entity={spec['entity']:10s} rate={rate_str}\"\n",
-    "    )"
+    "    print(f\"  {spec['variable']:40s} entity={spec['entity']:10s} rate={rate_str}\")"
    ]
   },
   {
@@ -965,14 +948,9 @@
     "os.makedirs(output_dir, exist_ok=True)\n",
     "output_path = os.path.join(output_dir, \"results.h5\")\n",
     "\n",
-    "print(\n",
-    "    f\"Weight vector: {len(w):,} entries \"\n",
-    "    f\"({n_demo_cds} CDs x {n_records:,} HH)\"\n",
-    ")\n",
+    "print(f\"Weight vector: {len(w):,} entries ({n_demo_cds} CDs x {n_records:,} HH)\")\n",
     "print(f\"Non-zero weights: {(w > 0).sum()}\")\n",
-    "print(\n",
-    "    f\"Example HH weight in CD 3701: {w[cd_idx_3701 * n_records + record_idx]}\"\n",
-    ")\n",
+    "print(f\"Example HH weight in CD 3701: {w[cd_idx_3701 * n_records + record_idx]}\")\n",
     "print(f\"Example HH weight in CD 201: {w[cd_idx_201 * n_records + record_idx]}\")"
    ]
   },
@@ -1118,22 +1096,14 @@
     ")\n",
     "print(f\"Stacked dataset: {len(hh_after_df)} households\\n\")\n",
     "\n",
-    "mapping_df = pd.read_csv(\n",
-    "    f\"{output_dir}/mappings/results_household_mapping.csv\"\n",
-    ")\n",
-    "example_mapping = mapping_df.loc[\n",
-    "    mapping_df.original_household_id == example_hh_id\n",
-    "]\n",
-    "print(f\"Example household (original_id={example_hh_id}) \" f\"in mapping:\\n\")\n",
+    "mapping_df = pd.read_csv(f\"{output_dir}/mappings/results_household_mapping.csv\")\n",
+    "example_mapping = mapping_df.loc[mapping_df.original_household_id == example_hh_id]\n",
+    "print(f\"Example household (original_id={example_hh_id}) in mapping:\\n\")\n",
     "print(example_mapping.to_string(index=False))\n",
     "\n",
     "new_ids = example_mapping.new_household_id\n",
     "print(f\"\\nIn stacked dataset:\\n\")\n",
-    "print(\n",
-    "    hh_after_df.loc[hh_after_df.household_id.isin(new_ids)].to_string(\n",
-    "        index=False\n",
-    "    )\n",
-    ")"
+    "print(hh_after_df.loc[hh_after_df.household_id.isin(new_ids)].to_string(index=False))"
    ]
   },
   {
diff --git a/modal_app/data_build.py b/modal_app/data_build.py
index 131e7f0bf..f3c2191bb 100644
--- a/modal_app/data_build.py
+++ b/modal_app/data_build.py
@@ -20,9 +20,7 @@
 )
 
 image = (
-    modal.Image.debian_slim(python_version="3.13")
-    .apt_install("git")
-    .pip_install("uv")
+    modal.Image.debian_slim(python_version="3.13").apt_install("git").pip_install("uv")
 )
 
 REPO_URL = "https://github.com/PolicyEngine/policyengine-us-data.git"
@@ -380,9 +378,7 @@ def build_datasets(
         print("=== Phase 3: Building extended CPS ===")
         run_script_with_checkpoint(
             "policyengine_us_data/datasets/cps/extended_cps.py",
-            SCRIPT_OUTPUTS[
-                "policyengine_us_data/datasets/cps/extended_cps.py"
-            ],
+            SCRIPT_OUTPUTS["policyengine_us_data/datasets/cps/extended_cps.py"],
             branch,
             checkpoint_volume,
             env=env,
@@ -390,18 +386,13 @@ def build_datasets(
 
         # GROUP 3: After extended_cps - run in parallel
         # enhanced_cps and stratified_cps both depend on extended_cps
-        print(
-            "=== Phase 4: Building enhanced and stratified CPS (parallel)"
-            " ==="
-        )
+        print("=== Phase 4: Building enhanced and stratified CPS (parallel) ===")
         with ThreadPoolExecutor(max_workers=2) as executor:
             futures = [
                 executor.submit(
                     run_script_with_checkpoint,
                     "policyengine_us_data/datasets/cps/enhanced_cps.py",
-                    SCRIPT_OUTPUTS[
-                        "policyengine_us_data/datasets/cps/enhanced_cps.py"
-                    ],
+                    SCRIPT_OUTPUTS["policyengine_us_data/datasets/cps/enhanced_cps.py"],
                     branch,
                     checkpoint_volume,
                     env=env,
@@ -426,9 +417,7 @@ def build_datasets(
         print("=== Phase 5: Building small enhanced CPS ===")
         run_script_with_checkpoint(
             "policyengine_us_data/datasets/cps/small_enhanced_cps.py",
-            SCRIPT_OUTPUTS[
-                "policyengine_us_data/datasets/cps/small_enhanced_cps.py"
-            ],
+            SCRIPT_OUTPUTS["policyengine_us_data/datasets/cps/small_enhanced_cps.py"],
             branch,
             checkpoint_volume,
             env=env,
diff --git a/modal_app/local_area.py b/modal_app/local_area.py
index 92e068335..76ba00537 100644
--- a/modal_app/local_area.py
+++ b/modal_app/local_area.py
@@ -245,9 +245,7 @@ def validate_staging(branch: str, version: str) -> Dict:
     print(f"  States: {manifest['totals']['states']}")
     print(f"  Districts: {manifest['totals']['districts']}")
     print(f"  Cities: {manifest['totals']['cities']}")
-    print(
-        f"  Total size: {manifest['totals']['total_size_bytes'] / 1e9:.2f} GB"
-    )
+    print(f"  Total size: {manifest['totals']['total_size_bytes'] / 1e9:.2f} GB")
 
     return manifest
 
@@ -362,8 +360,7 @@ def promote_publish(branch: str = "main", version: str = "") -> str:
     manifest_path = staging_dir / version / "manifest.json"
     if not manifest_path.exists():
         raise RuntimeError(
-            f"No manifest found at {manifest_path}. "
-            f"Run build+stage workflow first."
+            f"No manifest found at {manifest_path}. Run build+stage workflow first."
         )
 
     with open(manifest_path) as f:
@@ -405,7 +402,9 @@ def promote_publish(branch: str = "main", version: str = "") -> str:
     if result.returncode != 0:
         raise RuntimeError(f"Promote failed: {result.stderr}")
 
-    return f"Successfully promoted version {version} with {len(manifest['files'])} files"
+    return (
+        f"Successfully promoted version {version} with {len(manifest['files'])} files"
+    )
 
 
 @app.function(
@@ -436,12 +435,8 @@ def coordinate_publish(
     calibration_dir.mkdir(parents=True, exist_ok=True)
 
     # hf_hub_download preserves directory structure, so files are in calibration/ subdir
-    weights_path = (
-        calibration_dir / "calibration" / "w_district_calibration.npy"
-    )
-    dataset_path = (
-        calibration_dir / "calibration" / "stratified_extended_cps.h5"
-    )
+    weights_path = calibration_dir / "calibration" / "w_district_calibration.npy"
+    dataset_path = calibration_dir / "calibration" / "stratified_extended_cps.h5"
     db_path = calibration_dir / "calibration" / "policy_data.db"
 
     if not all(p.exists() for p in [weights_path, dataset_path, db_path]):
@@ -514,15 +509,10 @@ def coordinate_publish(
     completed = get_completed_from_volume(version_dir)
     print(f"Found {len(completed)} already-completed items on volume")
 
-    work_chunks = partition_work(
-        states, districts, cities, num_workers, completed
-    )
+    work_chunks = partition_work(states, districts, cities, num_workers, completed)
 
     total_remaining = sum(len(c) for c in work_chunks)
-    print(
-        f"Remaining work: {total_remaining} items "
-        f"across {len(work_chunks)} workers"
-    )
+    print(f"Remaining work: {total_remaining} items across {len(work_chunks)} workers")
 
     if total_remaining == 0:
         print("All items already built!")
@@ -594,14 +584,10 @@ def coordinate_publish(
     )
 
     if actual_total < expected_total:
-        print(
-            f"WARNING: Expected {expected_total} files, found {actual_total}"
-        )
+        print(f"WARNING: Expected {expected_total} files, found {actual_total}")
 
     print("\nStarting upload to staging...")
-    result = upload_to_staging.remote(
-        branch=branch, version=version, manifest=manifest
-    )
+    result = upload_to_staging.remote(branch=branch, version=version, manifest=manifest)
     print(result)
 
     print("\n" + "=" * 60)
diff --git a/modal_app/remote_calibration_runner.py b/modal_app/remote_calibration_runner.py
index 689d245dd..f3afb509d 100644
--- a/modal_app/remote_calibration_runner.py
+++ b/modal_app/remote_calibration_runner.py
@@ -7,9 +7,7 @@
 hf_secret = modal.Secret.from_name("huggingface-token")
 
 image = (
-    modal.Image.debian_slim(python_version="3.11")
-    .apt_install("git")
-    .pip_install("uv")
+    modal.Image.debian_slim(python_version="3.11").apt_install("git").pip_install("uv")
 )
 
 REPO_URL = "https://github.com/PolicyEngine/policyengine-us-data.git"
diff --git a/paper/scripts/build_from_content.py b/paper/scripts/build_from_content.py
index 21068f0db..52f88389d 100644
--- a/paper/scripts/build_from_content.py
+++ b/paper/scripts/build_from_content.py
@@ -47,12 +47,8 @@ def md_to_latex(self, content, section_type="section"):
             latex = re.sub(r"^# Abstract\n\n", "", latex)
         else:
             # Convert markdown headers to LaTeX sections
-            latex = re.sub(
-                r"^# (.+)$", r"\\section{\1}", latex, flags=re.MULTILINE
-            )
-            latex = re.sub(
-                r"^## (.+)$", r"\\subsection{\1}", latex, flags=re.MULTILINE
-            )
+            latex = re.sub(r"^# (.+)$", r"\\section{\1}", latex, flags=re.MULTILINE)
+            latex = re.sub(r"^## (.+)$", r"\\subsection{\1}", latex, flags=re.MULTILINE)
             latex = re.sub(
                 r"^### (.+)$",
                 r"\\subsubsection{\1}",
@@ -173,15 +169,11 @@ def convert_citation(match):
                     if len(author_list) == 1:
                         # Handle "Author1 and Author2" format
                         if " and " in authors:
-                            first_author = (
-                                authors.split(" and ")[0].strip().split()[-1]
-                            )
+                            first_author = authors.split(" and ")[0].strip().split()[-1]
                             cite_key = f"{first_author.lower()}{year}"
                         else:
                             # Single author
-                            author = (
-                                author_list[0].strip().split()[-1]
-                            )  # Last name
+                            author = author_list[0].strip().split()[-1]  # Last name
                             cite_key = f"{author.lower()}{year}"
                     else:
                         # Multiple authors - use first author
@@ -191,9 +183,7 @@ def convert_citation(match):
                 return f"\\citep{{{cite_key}}}"
             return match.group(0)  # Return original if no year found
 
-        latex = re.sub(
-            r"\(([^)]+(?:19|20)\d{2}[a-z]?)\)", convert_citation, latex
-        )
+        latex = re.sub(r"\(([^)]+(?:19|20)\d{2}[a-z]?)\)", convert_citation, latex)
 
         # Also handle inline citations like "Author (Year)" or "Author et al. (Year)"
         def convert_inline_citation(match):
@@ -276,15 +266,11 @@ def convert_myst_citation(match):
                     if len(author_list) == 1:
                         # Handle "Author1 and Author2" format
                         if " and " in authors:
-                            first_author = (
-                                authors.split(" and ")[0].strip().split()[-1]
-                            )
+                            first_author = authors.split(" and ")[0].strip().split()[-1]
                             cite_key = f"{first_author.lower()}{year}"
                         else:
                             # Single author
-                            author = (
-                                author_list[0].strip().split()[-1]
-                            )  # Last name
+                            author = author_list[0].strip().split()[-1]  # Last name
                             cite_key = f"{author.lower()}{year}"
                     else:
                         # Multiple authors - use first author
@@ -294,9 +280,7 @@ def convert_myst_citation(match):
                 return f"{{cite}}`{cite_key}`"
             return match.group(0)
 
-        myst = re.sub(
-            r"\(([^)]+(?:19|20)\d{2}[a-z]?)\)", convert_myst_citation, myst
-        )
+        myst = re.sub(r"\(([^)]+(?:19|20)\d{2}[a-z]?)\)", convert_myst_citation, myst)
 
         # Handle inline citations like "Author (Year)" - convert to {cite:t}`author_year`
         def convert_inline_myst(match):
@@ -343,9 +327,7 @@ def process_content_file(self, content_file):
         # LaTeX conversion
         if stem == "abstract":
             latex_content = self.md_to_latex(content, section_type="abstract")
-            latex_content = (
-                f"\\begin{{abstract}}\n{latex_content}\n\\end{{abstract}}"
-            )
+            latex_content = f"\\begin{{abstract}}\n{latex_content}\n\\end{{abstract}}"
             latex_path = self.paper_dir / "abstract.tex"
         elif stem == "introduction":
             latex_content = self.md_to_latex(content)
diff --git a/paper/scripts/calculate_distributional_metrics.py b/paper/scripts/calculate_distributional_metrics.py
index 4afdc67d9..61de771b9 100644
--- a/paper/scripts/calculate_distributional_metrics.py
+++ b/paper/scripts/calculate_distributional_metrics.py
@@ -82,7 +82,7 @@ def calculate_top_shares(values, weights, percentiles=[90, 99]):
         threshold = weighted_percentile(values, weights, p)
         mask = values >= threshold
         top_income = np.sum(values[mask] * weights[mask])
-        shares[f"top_{100-p}%"] = top_income / total_income
+        shares[f"top_{100 - p}%"] = top_income / total_income
 
     return shares
 
diff --git a/paper/scripts/calculate_target_performance.py b/paper/scripts/calculate_target_performance.py
index 1a50ab3c4..8f5a65f1d 100644
--- a/paper/scripts/calculate_target_performance.py
+++ b/paper/scripts/calculate_target_performance.py
@@ -79,8 +79,7 @@ def compare_dataset_performance(
 
     # Calculate average improvement by target category
     categories = {
-        "IRS Income": lambda x: "employment_income" in x
-        or "capital_gains" in x,
+        "IRS Income": lambda x: "employment_income" in x or "capital_gains" in x,
         "Demographics": lambda x: "age_" in x or "population" in x,
         "Programs": lambda x: "snap" in x or "social_security" in x,
         "Tax Expenditures": lambda x: "salt" in x or "charitable" in x,
diff --git a/paper/scripts/generate_all_tables.py b/paper/scripts/generate_all_tables.py
index 8f4762036..690b528d4 100644
--- a/paper/scripts/generate_all_tables.py
+++ b/paper/scripts/generate_all_tables.py
@@ -33,9 +33,7 @@ def create_latex_table(df, caption, label, float_format=None):
 
     # Format the dataframe as LaTeX
     if float_format:
-        table_body = df.to_latex(
-            index=False, escape=False, float_format=float_format
-        )
+        table_body = df.to_latex(index=False, escape=False, float_format=float_format)
     else:
         table_body = df.to_latex(index=False, escape=False)
 
@@ -44,9 +42,7 @@ def create_latex_table(df, caption, label, float_format=None):
     tabular_start = next(
         i for i, line in enumerate(lines) if "\\begin{tabular}" in line
     )
-    tabular_end = next(
-        i for i, line in enumerate(lines) if "\\end{tabular}" in line
-    )
+    tabular_end = next(i for i, line in enumerate(lines) if "\\end{tabular}" in line)
 
     # Indent the tabular content
     for i in range(tabular_start, tabular_end + 1):
diff --git a/paper/scripts/generate_validation_metrics.py b/paper/scripts/generate_validation_metrics.py
index db586959d..90b3624d8 100644
--- a/paper/scripts/generate_validation_metrics.py
+++ b/paper/scripts/generate_validation_metrics.py
@@ -235,9 +235,7 @@ def main():
 
     print(f"\nResults saved to {results_dir}/")
     print("\nNOTE: All metrics marked as [TO BE CALCULATED] require full")
-    print(
-        "dataset generation and microsimulation runs to compute actual values."
-    )
+    print("dataset generation and microsimulation runs to compute actual values.")
 
 
 if __name__ == "__main__":
diff --git a/paper/scripts/markdown_to_latex.py b/paper/scripts/markdown_to_latex.py
index 5c3b0e3bb..7cc80b049 100644
--- a/paper/scripts/markdown_to_latex.py
+++ b/paper/scripts/markdown_to_latex.py
@@ -24,12 +24,8 @@ def convert_markdown_to_latex(markdown_content: str) -> str:
 
     # Convert headers
     latex = re.sub(r"^# (.+)$", r"\\section{\1}", latex, flags=re.MULTILINE)
-    latex = re.sub(
-        r"^## (.+)$", r"\\subsection{\1}", latex, flags=re.MULTILINE
-    )
-    latex = re.sub(
-        r"^### (.+)$", r"\\subsubsection{\1}", latex, flags=re.MULTILINE
-    )
+    latex = re.sub(r"^## (.+)$", r"\\subsection{\1}", latex, flags=re.MULTILINE)
+    latex = re.sub(r"^### (.+)$", r"\\subsubsection{\1}", latex, flags=re.MULTILINE)
 
     # Convert bold and italic
     latex = re.sub(r"\*\*(.+?)\*\*", r"\\textbf{\1}", latex)
@@ -67,9 +63,7 @@ def convert_markdown_to_latex(markdown_content: str) -> str:
 
             # Manage list stack
             while len(list_stack) > indent_level + 1:
-                new_lines.append(
-                    "  " * (len(list_stack) - 1) + "\\end{itemize}"
-                )
+                new_lines.append("  " * (len(list_stack) - 1) + "\\end{itemize}")
                 list_stack.pop()
 
             if len(list_stack) <= indent_level:
@@ -81,9 +75,7 @@ def convert_markdown_to_latex(markdown_content: str) -> str:
         else:
             # Close any open lists
             while list_stack:
-                new_lines.append(
-                    "  " * (len(list_stack) - 1) + "\\end{itemize}"
-                )
+                new_lines.append("  " * (len(list_stack) - 1) + "\\end{itemize}")
                 list_stack.pop()
             new_lines.append(line)
             in_list = False
diff --git a/policyengine_us_data/calibration/clone_and_assign.py b/policyengine_us_data/calibration/clone_and_assign.py
index 9aa64cbbc..3e9642a19 100644
--- a/policyengine_us_data/calibration/clone_and_assign.py
+++ b/policyengine_us_data/calibration/clone_and_assign.py
@@ -45,8 +45,7 @@ def load_global_block_distribution():
     csv_path = STORAGE_FOLDER / "block_cd_distributions.csv.gz"
     if not csv_path.exists():
         raise FileNotFoundError(
-            f"{csv_path} not found. "
-            "Run make_block_cd_distributions.py to generate."
+            f"{csv_path} not found. Run make_block_cd_distributions.py to generate."
         )
 
     df = pd.read_csv(csv_path, dtype={"block_geoid": str})
diff --git a/policyengine_us_data/calibration/puf_impute.py b/policyengine_us_data/calibration/puf_impute.py
index 4e2224895..bf835583c 100644
--- a/policyengine_us_data/calibration/puf_impute.py
+++ b/policyengine_us_data/calibration/puf_impute.py
@@ -194,9 +194,7 @@
     "social_security",
 ]
 
-RETIREMENT_PREDICTORS = (
-    RETIREMENT_DEMOGRAPHIC_PREDICTORS + RETIREMENT_INCOME_PREDICTORS
-)
+RETIREMENT_PREDICTORS = RETIREMENT_DEMOGRAPHIC_PREDICTORS + RETIREMENT_INCOME_PREDICTORS
 
 
 def _get_retirement_limits(year: int) -> dict:
@@ -411,9 +409,7 @@ def reconcile_ss_subcomponents(
     if puf_has_ss.any():
         shares = _qrf_ss_shares(data, n_cps, time_period, puf_has_ss)
         if shares is None:
-            shares = _age_heuristic_ss_shares(
-                data, n_cps, time_period, puf_has_ss
-            )
+            shares = _age_heuristic_ss_shares(data, n_cps, time_period, puf_has_ss)
 
     for sub in SS_SUBCOMPONENTS:
         if sub not in data:
@@ -492,17 +488,13 @@ def _map_to_entity(pred_values, variable_name):
             return pred_values
         entity = var_meta.entity.key
         if entity != "person":
-            return cps_sim.populations[entity].value_from_first_person(
-                pred_values
-            )
+            return cps_sim.populations[entity].value_from_first_person(pred_values)
         return pred_values
 
     # Impute weeks_unemployed for PUF half
     puf_weeks = None
     if y_full is not None and dataset_path is not None:
-        puf_weeks = _impute_weeks_unemployed(
-            data, y_full, time_period, dataset_path
-        )
+        puf_weeks = _impute_weeks_unemployed(data, y_full, time_period, dataset_path)
 
     # Impute retirement contributions for PUF half
     puf_retirement = None
@@ -526,24 +518,14 @@ def _map_to_entity(pred_values, variable_name):
                 time_period: np.concatenate([values, values + values.max()])
             }
         elif "_weight" in variable:
-            new_data[variable] = {
-                time_period: np.concatenate([values, values * 0])
-            }
+            new_data[variable] = {time_period: np.concatenate([values, values * 0])}
         elif variable == "weeks_unemployed" and puf_weeks is not None:
-            new_data[variable] = {
-                time_period: np.concatenate([values, puf_weeks])
-            }
-        elif (
-            variable in CPS_RETIREMENT_VARIABLES and puf_retirement is not None
-        ):
+            new_data[variable] = {time_period: np.concatenate([values, puf_weeks])}
+        elif variable in CPS_RETIREMENT_VARIABLES and puf_retirement is not None:
             puf_vals = puf_retirement[variable]
-            new_data[variable] = {
-                time_period: np.concatenate([values, puf_vals])
-            }
+            new_data[variable] = {time_period: np.concatenate([values, puf_vals])}
         else:
-            new_data[variable] = {
-                time_period: np.concatenate([values, values])
-            }
+            new_data[variable] = {time_period: np.concatenate([values, values])}
 
     new_data["state_fips"] = {
         time_period: np.concatenate([state_fips, state_fips]).astype(np.int32)
@@ -662,11 +644,7 @@ def _impute_weeks_unemployed(
     logger.info(
         "Imputed weeks_unemployed for PUF: %d with weeks > 0, mean = %.1f",
         (imputed_weeks > 0).sum(),
-        (
-            imputed_weeks[imputed_weeks > 0].mean()
-            if (imputed_weeks > 0).any()
-            else 0
-        ),
+        (imputed_weeks[imputed_weeks > 0].mean() if (imputed_weeks > 0).any() else 0),
     )
 
     del fitted, predictions
@@ -836,9 +814,7 @@ def _run_qrf_imputation(
 
     puf_sim = Microsimulation(dataset=puf_dataset)
 
-    puf_agi = puf_sim.calculate(
-        "adjusted_gross_income", map_to="person"
-    ).values
+    puf_agi = puf_sim.calculate("adjusted_gross_income", map_to="person").values
 
     X_train_full = puf_sim.calculate_dataframe(
         DEMOGRAPHIC_PREDICTORS + IMPUTED_VARIABLES
@@ -873,9 +849,7 @@ def _run_qrf_imputation(
                 X_test[pred] = data[pred][time_period].astype(np.float32)
 
     logger.info("Imputing %d PUF variables (full)", len(IMPUTED_VARIABLES))
-    y_full = _batch_qrf(
-        X_train_full, X_test, DEMOGRAPHIC_PREDICTORS, IMPUTED_VARIABLES
-    )
+    y_full = _batch_qrf(X_train_full, X_test, DEMOGRAPHIC_PREDICTORS, IMPUTED_VARIABLES)
 
     logger.info(
         "Imputing %d PUF variables (override)",
@@ -915,9 +889,7 @@ def _stratified_subsample_index(
     if remaining_quota >= len(bottom_idx):
         selected_bottom = bottom_idx
     else:
-        selected_bottom = rng.choice(
-            bottom_idx, size=remaining_quota, replace=False
-        )
+        selected_bottom = rng.choice(bottom_idx, size=remaining_quota, replace=False)
 
     selected = np.concatenate([top_idx, selected_bottom])
     selected.sort()
diff --git a/policyengine_us_data/calibration/source_impute.py b/policyengine_us_data/calibration/source_impute.py
index 339e038ed..25c7975ad 100644
--- a/policyengine_us_data/calibration/source_impute.py
+++ b/policyengine_us_data/calibration/source_impute.py
@@ -225,9 +225,7 @@ def _person_state_fips(
     if hh_ids_person is not None:
         hh_ids = data["household_id"][time_period]
         hh_to_idx = {int(hh_id): i for i, hh_id in enumerate(hh_ids)}
-        return np.array(
-            [state_fips[hh_to_idx[int(hh_id)]] for hh_id in hh_ids_person]
-        )
+        return np.array([state_fips[hh_to_idx[int(hh_id)]] for hh_id in hh_ids_person])
     # Fallback: distribute persons across households as evenly
     # as possible (first households get any remainder).
     n_hh = len(data["household_id"][time_period])
@@ -264,9 +262,9 @@ def _impute_acs(
     predictors = ACS_PREDICTORS + ["state_fips"]
 
     acs_df = acs.calculate_dataframe(ACS_PREDICTORS + ACS_IMPUTED_VARIABLES)
-    acs_df["state_fips"] = acs.calculate(
-        "state_fips", map_to="person"
-    ).values.astype(np.float32)
+    acs_df["state_fips"] = acs.calculate("state_fips", map_to="person").values.astype(
+        np.float32
+    )
 
     train_df = acs_df[acs_df.is_household_head].sample(10_000, random_state=42)
     train_df = _encode_tenure_type(train_df)
@@ -368,16 +366,10 @@ def _impute_sipp(
     sipp_df["is_under_18"] = sipp_df.TAGE < 18
     sipp_df["is_under_6"] = sipp_df.TAGE < 6
     sipp_df["count_under_18"] = (
-        sipp_df.groupby("SSUID")["is_under_18"]
-        .sum()
-        .loc[sipp_df.SSUID.values]
-        .values
+        sipp_df.groupby("SSUID")["is_under_18"].sum().loc[sipp_df.SSUID.values].values
     )
     sipp_df["count_under_6"] = (
-        sipp_df.groupby("SSUID")["is_under_6"]
-        .sum()
-        .loc[sipp_df.SSUID.values]
-        .values
+        sipp_df.groupby("SSUID")["is_under_6"].sum().loc[sipp_df.SSUID.values].values
     )
 
     tip_cols = [
@@ -408,9 +400,9 @@ def _impute_sipp(
         age_df = pd.DataFrame({"hh": hh_ids_person, "age": person_ages})
         under_18 = age_df.groupby("hh")["age"].apply(lambda x: (x < 18).sum())
         under_6 = age_df.groupby("hh")["age"].apply(lambda x: (x < 6).sum())
-        cps_tip_df["count_under_18"] = under_18.loc[
-            hh_ids_person
-        ].values.astype(np.float32)
+        cps_tip_df["count_under_18"] = under_18.loc[hh_ids_person].values.astype(
+            np.float32
+        )
         cps_tip_df["count_under_6"] = under_6.loc[hh_ids_person].values.astype(
             np.float32
         )
@@ -499,10 +491,7 @@ def _impute_sipp(
                 asset_train.index,
                 size=min(20_000, len(asset_train)),
                 replace=True,
-                p=(
-                    asset_train.household_weight
-                    / asset_train.household_weight.sum()
-                ),
+                p=(asset_train.household_weight / asset_train.household_weight.sum()),
             )
         ]
 
@@ -513,15 +502,15 @@ def _impute_sipp(
             ["employment_income", "age", "is_male"],
         )
         if "is_male" in cps_asset_df.columns:
-            cps_asset_df["is_female"] = (
-                ~cps_asset_df["is_male"].astype(bool)
-            ).astype(np.float32)
+            cps_asset_df["is_female"] = (~cps_asset_df["is_male"].astype(bool)).astype(
+                np.float32
+            )
         else:
             cps_asset_df["is_female"] = 0.0
         if "is_married" in data:
-            cps_asset_df["is_married"] = data["is_married"][
-                time_period
-            ].astype(np.float32)
+            cps_asset_df["is_married"] = data["is_married"][time_period].astype(
+                np.float32
+            )
         else:
             cps_asset_df["is_married"] = 0.0
         cps_asset_df["count_under_18"] = (
@@ -623,9 +612,7 @@ def _impute_scf(
     cps_df = _build_cps_receiver(data, time_period, dataset_path, pe_vars)
 
     if "is_male" in cps_df.columns:
-        cps_df["is_female"] = (~cps_df["is_male"].astype(bool)).astype(
-            np.float32
-        )
+        cps_df["is_female"] = (~cps_df["is_male"].astype(bool)).astype(np.float32)
     else:
         cps_df["is_female"] = 0.0
 
diff --git a/policyengine_us_data/calibration/unified_calibration.py b/policyengine_us_data/calibration/unified_calibration.py
index 1fb7a6b34..6a7a8bd1b 100644
--- a/policyengine_us_data/calibration/unified_calibration.py
+++ b/policyengine_us_data/calibration/unified_calibration.py
@@ -144,20 +144,14 @@ def rerandomize_takeup(
 
         is_state_specific = isinstance(rate_or_dict, dict)
 
-        entity_ids = sim.calculate(
-            f"{entity_level}_id", map_to=entity_level
-        ).values
-        entity_hh_ids = sim.calculate(
-            "household_id", map_to=entity_level
-        ).values
+        entity_ids = sim.calculate(f"{entity_level}_id", map_to=entity_level).values
+        entity_hh_ids = sim.calculate("household_id", map_to=entity_level).values
         n_entities = len(entity_ids)
 
         draws = np.zeros(n_entities, dtype=np.float64)
         rates = np.zeros(n_entities, dtype=np.float64)
 
-        entity_blocks = np.array(
-            [hh_to_block.get(hid, "0") for hid in entity_hh_ids]
-        )
+        entity_blocks = np.array([hh_to_block.get(hid, "0") for hid in entity_hh_ids])
 
         unique_blocks = np.unique(entity_blocks)
         for block in unique_blocks:
@@ -185,9 +179,7 @@ def rerandomize_takeup(
 
 
 def parse_args(argv=None):
-    parser = argparse.ArgumentParser(
-        description="Unified L0 calibration pipeline"
-    )
+    parser = argparse.ArgumentParser(description="Unified L0 calibration pipeline")
     parser.add_argument(
         "--dataset",
         default=None,
@@ -308,8 +300,7 @@ def fit_l0_weights(
     initial_weights = np.ones(n_total) * 100
 
     logger.info(
-        "L0 calibration: %d targets, %d features, "
-        "lambda_l0=%.1e, epochs=%d",
+        "L0 calibration: %d targets, %d features, lambda_l0=%.1e, epochs=%d",
         X_sparse.shape[0],
         n_total,
         lambda_l0,
@@ -609,8 +600,7 @@ def run_calibration(
         )
 
         source_path = str(
-            Path(dataset_path).parent
-            / f"source_imputed_{Path(dataset_path).stem}.h5"
+            Path(dataset_path).parent / f"source_imputed_{Path(dataset_path).stem}.h5"
         )
         with h5py.File(source_path, "w") as f:
             for var, time_dict in data_dict.items():
@@ -716,9 +706,7 @@ def main(argv=None):
     dataset_path = args.dataset or str(
         STORAGE_FOLDER / "stratified_extended_cps_2024.h5"
     )
-    db_path = args.db_path or str(
-        STORAGE_FOLDER / "calibration" / "policy_data.db"
-    )
+    db_path = args.db_path or str(STORAGE_FOLDER / "calibration" / "policy_data.db")
     output_path = args.output or str(
         STORAGE_FOLDER / "calibration" / "unified_weights.npy"
     )
@@ -732,15 +720,11 @@ def main(argv=None):
 
     domain_variables = None
     if args.domain_variables:
-        domain_variables = [
-            x.strip() for x in args.domain_variables.split(",")
-        ]
+        domain_variables = [x.strip() for x in args.domain_variables.split(",")]
 
     hierarchical_domains = None
     if args.hierarchical_domains:
-        hierarchical_domains = [
-            x.strip() for x in args.hierarchical_domains.split(",")
-        ]
+        hierarchical_domains = [x.strip() for x in args.hierarchical_domains.split(",")]
 
     t_start = time.time()
 
diff --git a/policyengine_us_data/calibration/unified_matrix_builder.py b/policyengine_us_data/calibration/unified_matrix_builder.py
index ac31c34e1..c0b442f35 100644
--- a/policyengine_us_data/calibration/unified_matrix_builder.py
+++ b/policyengine_us_data/calibration/unified_matrix_builder.py
@@ -71,18 +71,10 @@ def _build_entity_relationship(self, sim) -> pd.DataFrame:
 
         self._entity_rel_cache = pd.DataFrame(
             {
-                "person_id": sim.calculate(
-                    "person_id", map_to="person"
-                ).values,
-                "household_id": sim.calculate(
-                    "household_id", map_to="person"
-                ).values,
-                "tax_unit_id": sim.calculate(
-                    "tax_unit_id", map_to="person"
-                ).values,
-                "spm_unit_id": sim.calculate(
-                    "spm_unit_id", map_to="person"
-                ).values,
+                "person_id": sim.calculate("person_id", map_to="person").values,
+                "household_id": sim.calculate("household_id", map_to="person").values,
+                "tax_unit_id": sim.calculate("tax_unit_id", map_to="person").values,
+                "spm_unit_id": sim.calculate("spm_unit_id", map_to="person").values,
             }
         )
         return self._entity_rel_cache
@@ -126,9 +118,7 @@ def _evaluate_constraints_entity_aware(
         df["satisfies"] = person_mask
         hh_mask = df.groupby("household_id")["satisfies"].any()
 
-        household_ids = sim.calculate(
-            "household_id", map_to="household"
-        ).values
+        household_ids = sim.calculate("household_id", map_to="household").values
         return np.array([hh_mask.get(hid, False) for hid in household_ids])
 
     # ---------------------------------------------------------------
@@ -240,9 +230,7 @@ def _calculate_uprating_factors(self, params) -> dict:
                 factors[(from_year, "cpi")] = 1.0
 
             try:
-                pop_from = params.calibration.gov.census.populations.total(
-                    from_year
-                )
+                pop_from = params.calibration.gov.census.populations.total(from_year)
                 pop_to = params.calibration.gov.census.populations.total(
                     self.time_period
                 )
@@ -326,9 +314,7 @@ def _get_state_uprating_factors(
                         var_factors[var] = 1.0
                         continue
                     period = row.iloc[0]["period"]
-                    factor, _ = self._get_uprating_info(
-                        var, period, national_factors
-                    )
+                    factor, _ = self._get_uprating_info(var, period, national_factors)
                     var_factors[var] = factor
 
             result[state_int] = var_factors
@@ -430,14 +416,12 @@ def print_uprating_summary(self, targets_df: pd.DataFrame) -> None:
         print("\n" + "=" * 60)
         print("UPRATING SUMMARY")
         print("=" * 60)
-        print(f"Uprated {len(uprated)} of " f"{len(targets_df)} targets")
+        print(f"Uprated {len(uprated)} of {len(targets_df)} targets")
         period_counts = uprated["period"].value_counts().sort_index()
         for period, count in period_counts.items():
             print(f"  Period {period}: {count} targets")
         factors = eff[eff != 1.0]
-        print(
-            f"  Factor range: [{factors.min():.4f}, " f"{factors.max():.4f}]"
-        )
+        print(f"  Factor range: [{factors.min():.4f}, {factors.max():.4f}]")
 
     # ---------------------------------------------------------------
     # Target naming
@@ -465,9 +449,7 @@ def _make_target_name(
 
         non_geo = [c for c in constraints if c["variable"] not in _GEO_VARS]
         if non_geo:
-            strs = [
-                f"{c['variable']}{c['operation']}{c['value']}" for c in non_geo
-            ]
+            strs = [f"{c['variable']}{c['operation']}{c['value']}" for c in non_geo]
             parts.append("[" + ",".join(strs) + "]")
 
         return "/".join(parts)
@@ -510,12 +492,8 @@ def _calculate_target_values(
                 return np.zeros(n_households, dtype=np.float32)
             person_mask &= apply_op(cv, c["operation"], c["value"])
 
-        target_entity = sim.tax_benefit_system.variables[
-            target_variable
-        ].entity.key
-        household_ids = sim.calculate(
-            "household_id", map_to="household"
-        ).values
+        target_entity = sim.tax_benefit_system.variables[target_variable].entity.key
+        household_ids = sim.calculate("household_id", map_to="household").values
 
         if target_entity == "household":
             if non_geo_constraints:
@@ -674,15 +652,9 @@ def build_matrix(
         n_targets = len(targets_df)
 
         # 2. Sort targets by geographic level
-        targets_df["_geo_level"] = targets_df["geographic_id"].apply(
-            get_geo_level
-        )
-        targets_df = targets_df.sort_values(
-            ["_geo_level", "variable", "geographic_id"]
-        )
-        targets_df = targets_df.drop(columns=["_geo_level"]).reset_index(
-            drop=True
-        )
+        targets_df["_geo_level"] = targets_df["geographic_id"].apply(get_geo_level)
+        targets_df = targets_df.sort_values(["_geo_level", "variable", "geographic_id"])
+        targets_df = targets_df.drop(columns=["_geo_level"]).reset_index(drop=True)
 
         # 3. Build column index structures from geography
         state_col_lists: Dict[int, list] = defaultdict(list)
@@ -709,9 +681,7 @@ def build_matrix(
             geo_id = row["geographic_id"]
             target_geo_info.append((geo_level, geo_id))
 
-            non_geo = [
-                c for c in constraints if c["variable"] not in _GEO_VARS
-            ]
+            non_geo = [c for c in constraints if c["variable"] not in _GEO_VARS]
             non_geo_constraints_list.append(non_geo)
 
             target_names.append(
@@ -745,7 +715,7 @@ def build_matrix(
             clone_states = geography.state_fips[col_start:col_end]
 
             logger.info(
-                "Processing clone %d/%d " "(cols %d-%d, %d unique states)...",
+                "Processing clone %d/%d (cols %d-%d, %d unique states)...",
                 clone_idx + 1,
                 n_clones,
                 col_start,
diff --git a/policyengine_us_data/datasets/acs/acs.py b/policyengine_us_data/datasets/acs/acs.py
index 0ecd3ee7c..11d1ef738 100644
--- a/policyengine_us_data/datasets/acs/acs.py
+++ b/policyengine_us_data/datasets/acs/acs.py
@@ -18,9 +18,7 @@ def generate(self) -> None:
 
         raw_data = self.census_acs(require=True).load()
         acs = h5py.File(self.file_path, mode="w")
-        person, household = [
-            raw_data[entity] for entity in ("person", "household")
-        ]
+        person, household = [raw_data[entity] for entity in ("person", "household")]
 
         self.add_id_variables(acs, person, household)
         self.add_person_variables(acs, person, household)
@@ -39,9 +37,7 @@ def add_id_variables(
         h_id_to_number = pd.Series(
             np.arange(len(household)), index=household["SERIALNO"]
         )
-        household["household_id"] = h_id_to_number[
-            household["SERIALNO"]
-        ].values
+        household["household_id"] = h_id_to_number[household["SERIALNO"]].values
         person["household_id"] = h_id_to_number[person["SERIALNO"]].values
         person["person_id"] = person.index + 1
 
@@ -100,9 +96,7 @@ def add_spm_variables(acs: h5py.File, spm_unit: DataFrame) -> None:
     @staticmethod
     def add_household_variables(acs: h5py.File, household: DataFrame) -> None:
         acs["household_vehicles_owned"] = household.VEH
-        acs["state_fips"] = acs["household_state_fips"] = household.ST.astype(
-            int
-        )
+        acs["state_fips"] = acs["household_state_fips"] = household.ST.astype(int)
 
 
 class ACS_2022(ACS):
diff --git a/policyengine_us_data/datasets/acs/census_acs.py b/policyengine_us_data/datasets/acs/census_acs.py
index 842af6279..7bd28bd61 100644
--- a/policyengine_us_data/datasets/acs/census_acs.py
+++ b/policyengine_us_data/datasets/acs/census_acs.py
@@ -66,9 +66,7 @@ def generate(self) -> None:
             household = self.process_household_data(
                 household_url, "psam_hus", HOUSEHOLD_COLUMNS
             )
-            person = self.process_person_data(
-                person_url, "psam_pus", PERSON_COLUMNS
-            )
+            person = self.process_person_data(person_url, "psam_pus", PERSON_COLUMNS)
             person = person[person.SERIALNO.isin(household.SERIALNO)]
             household = household[household.SERIALNO.isin(person.SERIALNO)]
             storage["household"] = household
@@ -106,9 +104,7 @@ def process_household_data(
         return res
 
     @staticmethod
-    def process_person_data(
-        url: str, prefix: str, columns: List[str]
-    ) -> pd.DataFrame:
+    def process_person_data(url: str, prefix: str, columns: List[str]) -> pd.DataFrame:
         req = requests.get(url, stream=True)
         with BytesIO() as f:
             pbar = tqdm()
@@ -137,9 +133,7 @@ def process_person_data(
         return res
 
     @staticmethod
-    def create_spm_unit_table(
-        storage: pd.HDFStore, person: pd.DataFrame
-    ) -> None:
+    def create_spm_unit_table(storage: pd.HDFStore, person: pd.DataFrame) -> None:
         SPM_UNIT_COLUMNS = [
             "CAPHOUSESUB",
             "CAPWKCCXPNS",
@@ -181,12 +175,10 @@ def create_spm_unit_table(
 
         # Ensure SERIALNO is treated as string
         JOIN_COLUMNS = ["SERIALNO", "SPORDER"]
-        original_person_table["SERIALNO"] = original_person_table[
-            "SERIALNO"
-        ].astype(str)
-        original_person_table["SPORDER"] = original_person_table[
-            "SPORDER"
-        ].astype(int)
+        original_person_table["SERIALNO"] = original_person_table["SERIALNO"].astype(
+            str
+        )
+        original_person_table["SPORDER"] = original_person_table["SPORDER"].astype(int)
         person["SERIALNO"] = person["SERIALNO"].astype(str)
         person["SPORDER"] = person["SPORDER"].astype(int)
 
diff --git a/policyengine_us_data/datasets/cps/census_cps.py b/policyengine_us_data/datasets/cps/census_cps.py
index 00ca020ef..042fefe56 100644
--- a/policyengine_us_data/datasets/cps/census_cps.py
+++ b/policyengine_us_data/datasets/cps/census_cps.py
@@ -15,9 +15,7 @@ class CensusCPS(Dataset):
 
     def generate(self):
         if self._cps_download_url is None:
-            raise ValueError(
-                f"No raw CPS data URL known for year {self.time_period}."
-            )
+            raise ValueError(f"No raw CPS data URL known for year {self.time_period}.")
 
         url = self._cps_download_url
 
@@ -28,9 +26,7 @@ def generate(self):
             ]
 
         response = requests.get(url, stream=True)
-        total_size_in_bytes = int(
-            response.headers.get("content-length", 200e6)
-        )
+        total_size_in_bytes = int(response.headers.get("content-length", 200e6))
         progress_bar = tqdm(
             total=total_size_in_bytes,
             unit="iB",
@@ -38,9 +34,7 @@ def generate(self):
             desc="Downloading ASEC",
         )
         if response.status_code == 404:
-            raise FileNotFoundError(
-                "Received a 404 response when fetching the data."
-            )
+            raise FileNotFoundError("Received a 404 response when fetching the data.")
         with BytesIO() as file:
             content_length_actual = 0
             for data in response.iter_content(int(1e6)):
@@ -65,33 +59,23 @@ def generate(self):
                     file_prefix = "cpspb/asec/prod/data/2019/"
                 else:
                     file_prefix = ""
-                with zipfile.open(
-                    f"{file_prefix}pppub{file_year_code}.csv"
-                ) as f:
+                with zipfile.open(f"{file_prefix}pppub{file_year_code}.csv") as f:
                     storage["person"] = pd.read_csv(
                         f,
-                        usecols=PERSON_COLUMNS
-                        + spm_unit_columns
-                        + TAX_UNIT_COLUMNS,
+                        usecols=PERSON_COLUMNS + spm_unit_columns + TAX_UNIT_COLUMNS,
                     ).fillna(0)
                     person = storage["person"]
-                with zipfile.open(
-                    f"{file_prefix}ffpub{file_year_code}.csv"
-                ) as f:
+                with zipfile.open(f"{file_prefix}ffpub{file_year_code}.csv") as f:
                     person_family_id = person.PH_SEQ * 10 + person.PF_SEQ
                     family = pd.read_csv(f).fillna(0)
                     family_id = family.FH_SEQ * 10 + family.FFPOS
                     family = family[family_id.isin(person_family_id)]
                     storage["family"] = family
-                with zipfile.open(
-                    f"{file_prefix}hhpub{file_year_code}.csv"
-                ) as f:
+                with zipfile.open(f"{file_prefix}hhpub{file_year_code}.csv") as f:
                     person_household_id = person.PH_SEQ
                     household = pd.read_csv(f).fillna(0)
                     household_id = household.H_SEQ
-                    household = household[
-                        household_id.isin(person_household_id)
-                    ]
+                    household = household[household_id.isin(person_household_id)]
                     storage["household"] = household
                 storage["tax_unit"] = self._create_tax_unit_table(person)
                 storage["spm_unit"] = self._create_spm_unit_table(
diff --git a/policyengine_us_data/datasets/cps/cps.py b/policyengine_us_data/datasets/cps/cps.py
index bbc7f4fba..ccbe48850 100644
--- a/policyengine_us_data/datasets/cps/cps.py
+++ b/policyengine_us_data/datasets/cps/cps.py
@@ -93,9 +93,7 @@ def downsample(self, frac: float):
 
         # Store original dtypes before modifying
         original_data: dict = self.load_dataset()
-        original_dtypes = {
-            key: original_data[key].dtype for key in original_data
-        }
+        original_dtypes = {key: original_data[key].dtype for key in original_data}
         sim = Microsimulation(dataset=self)
         sim.subsample(frac=frac)
 
@@ -208,18 +206,13 @@ def add_takeup(self):
     aca_rate = load_take_up_rate("aca", self.time_period)
     medicaid_rates_by_state = load_take_up_rate("medicaid", self.time_period)
     head_start_rate = load_take_up_rate("head_start", self.time_period)
-    early_head_start_rate = load_take_up_rate(
-        "early_head_start", self.time_period
-    )
+    early_head_start_rate = load_take_up_rate("early_head_start", self.time_period)
     ssi_rate = load_take_up_rate("ssi", self.time_period)
 
     # EITC: varies by number of children
     eitc_child_count = baseline.calculate("eitc_child_count").values
     eitc_takeup_rate = np.array(
-        [
-            eitc_rates_by_children.get(min(int(c), 3), 0.85)
-            for c in eitc_child_count
-        ]
+        [eitc_rates_by_children.get(min(int(c), 3), 0.85) for c in eitc_child_count]
     )
     rng = seeded_rng("takes_up_eitc")
     data["takes_up_eitc"] = rng.random(n_tax_units) < eitc_takeup_rate
@@ -238,9 +231,7 @@ def add_takeup(self):
     target_snap_takeup_count = int(snap_rate * n_spm_units)
     remaining_snap_needed = max(0, target_snap_takeup_count - n_snap_reporters)
     snap_non_reporter_rate = (
-        remaining_snap_needed / n_snap_non_reporters
-        if n_snap_non_reporters > 0
-        else 0
+        remaining_snap_needed / n_snap_non_reporters if n_snap_non_reporters > 0 else 0
     )
 
     # Assign: all reporters + adjusted rate for non-reporters
@@ -257,9 +248,7 @@ def add_takeup(self):
     hh_ids = data["household_id"]
     person_hh_ids = data["person_household_id"]
     hh_to_state = dict(zip(hh_ids, state_codes))
-    person_states = np.array(
-        [hh_to_state.get(hh_id, "CA") for hh_id in person_hh_ids]
-    )
+    person_states = np.array([hh_to_state.get(hh_id, "CA") for hh_id in person_hh_ids])
     medicaid_rate_by_person = np.array(
         [medicaid_rates_by_state.get(s, 0.93) for s in person_states]
     )
@@ -270,9 +259,7 @@ def add_takeup(self):
 
     # Head Start
     rng = seeded_rng("takes_up_head_start_if_eligible")
-    data["takes_up_head_start_if_eligible"] = (
-        rng.random(n_persons) < head_start_rate
-    )
+    data["takes_up_head_start_if_eligible"] = rng.random(n_persons) < head_start_rate
 
     # Early Head Start
     rng = seeded_rng("takes_up_early_head_start_if_eligible")
@@ -290,9 +277,7 @@ def add_takeup(self):
     target_ssi_takeup_count = int(ssi_rate * n_persons)
     remaining_ssi_needed = max(0, target_ssi_takeup_count - n_ssi_reporters)
     ssi_non_reporter_rate = (
-        remaining_ssi_needed / n_ssi_non_reporters
-        if n_ssi_non_reporters > 0
-        else 0
+        remaining_ssi_needed / n_ssi_non_reporters if n_ssi_non_reporters > 0 else 0
     )
 
     # Assign: all reporters + adjusted rate for non-reporters
@@ -315,9 +300,7 @@ def add_takeup(self):
     data["would_claim_wic"] = rng.random(n_persons) < wic_takeup_rate_by_person
 
     # WIC nutritional risk — fully resolved
-    wic_risk_rates = load_take_up_rate(
-        "wic_nutritional_risk", self.time_period
-    )
+    wic_risk_rates = load_take_up_rate("wic_nutritional_risk", self.time_period)
     wic_risk_rate_by_person = np.array(
         [wic_risk_rates.get(c, 0) for c in wic_categories]
     )
@@ -364,12 +347,8 @@ def uprate_cps_data(data, from_period, to_period):
     uprating = create_policyengine_uprating_factors_table()
     for variable in uprating.index.unique():
         if variable in data:
-            current_index = uprating[uprating.index == variable][
-                to_period
-            ].values[0]
-            start_index = uprating[uprating.index == variable][
-                from_period
-            ].values[0]
+            current_index = uprating[uprating.index == variable][to_period].values[0]
+            start_index = uprating[uprating.index == variable][from_period].values[0]
             growth = current_index / start_index
             data[variable] = data[variable] * growth
 
@@ -411,9 +390,7 @@ def add_id_variables(
 
     # Marital units
 
-    marital_unit_id = person.PH_SEQ * 1e6 + np.maximum(
-        person.A_LINENO, person.A_SPOUSE
-    )
+    marital_unit_id = person.PH_SEQ * 1e6 + np.maximum(person.A_LINENO, person.A_SPOUSE)
 
     # marital_unit_id is not the household ID, zero padded and followed
     # by the index within household (of each person, or their spouse if
@@ -453,9 +430,7 @@ def add_personal_variables(cps: h5py.File, person: DataFrame) -> None:
     # "Is...blind or does...have serious difficulty seeing even when Wearing
     #  glasses?" 1 -> Yes
     cps["is_blind"] = person.PEDISEYE == 1
-    DISABILITY_FLAGS = [
-        "PEDIS" + i for i in ["DRS", "EAR", "EYE", "OUT", "PHY", "REM"]
-    ]
+    DISABILITY_FLAGS = ["PEDIS" + i for i in ["DRS", "EAR", "EYE", "OUT", "PHY", "REM"]]
     cps["is_disabled"] = (person[DISABILITY_FLAGS] == 1).any(axis=1)
 
     def children_per_parent(col: str) -> pd.DataFrame:
@@ -477,9 +452,7 @@ def children_per_parent(col: str) -> pd.DataFrame:
 
     # Aggregate to parent.
     res = (
-        pd.concat(
-            [children_per_parent("PEPAR1"), children_per_parent("PEPAR2")]
-        )
+        pd.concat([children_per_parent("PEPAR1"), children_per_parent("PEPAR2")])
         .groupby(["PH_SEQ", "A_LINENO"])
         .children.sum()
         .reset_index()
@@ -505,9 +478,7 @@ def children_per_parent(col: str) -> pd.DataFrame:
     add_overtime_occupation(cps, person)
 
 
-def add_personal_income_variables(
-    cps: h5py.File, person: DataFrame, year: int
-):
+def add_personal_income_variables(cps: h5py.File, person: DataFrame, year: int):
     """Add income variables.
 
     Args:
@@ -533,16 +504,14 @@ def add_personal_income_variables(
     cps["weekly_hours_worked"] = person.HRSWK * person.WKSWORK / 52
     cps["hours_worked_last_week"] = person.A_HRS1 * person.WKSWORK / 52
 
-    cps["taxable_interest_income"] = person.INT_VAL * (
-        p["taxable_interest_fraction"]
-    )
+    cps["taxable_interest_income"] = person.INT_VAL * (p["taxable_interest_fraction"])
     cps["tax_exempt_interest_income"] = person.INT_VAL * (
         1 - p["taxable_interest_fraction"]
     )
     cps["self_employment_income"] = person.SEMP_VAL
     cps["farm_income"] = person.FRSE_VAL
-    cps["qualified_dividend_income"] = person.DIV_VAL * (
-        p["qualified_dividend_fraction"]
+    cps["qualified_dividend_income"] = (
+        person.DIV_VAL * (p["qualified_dividend_fraction"])
     )
     cps["non_qualified_dividend_income"] = person.DIV_VAL * (
         1 - p["qualified_dividend_fraction"]
@@ -561,18 +530,14 @@ def add_personal_income_variables(
     #   8 = Other
     is_retirement = (person.RESNSS1 == 1) | (person.RESNSS2 == 1)
     is_disability = (person.RESNSS1 == 2) | (person.RESNSS2 == 2)
-    is_survivor = np.isin(person.RESNSS1, [3, 5]) | np.isin(
-        person.RESNSS2, [3, 5]
-    )
+    is_survivor = np.isin(person.RESNSS1, [3, 5]) | np.isin(person.RESNSS2, [3, 5])
     is_dependent = np.isin(person.RESNSS1, [4, 6, 7]) | np.isin(
         person.RESNSS2, [4, 6, 7]
     )
 
     # Primary classification: assign full SS_VAL to the highest-
     # priority category when someone has multiple source codes.
-    cps["social_security_retirement"] = np.where(
-        is_retirement, person.SS_VAL, 0
-    )
+    cps["social_security_retirement"] = np.where(is_retirement, person.SS_VAL, 0)
     cps["social_security_disability"] = np.where(
         is_disability & ~is_retirement, person.SS_VAL, 0
     )
@@ -615,9 +580,7 @@ def add_personal_income_variables(
     # Add pensions and annuities.
     cps_pensions = person.PNSN_VAL + person.ANN_VAL
     # Assume a constant fraction of pension income is taxable.
-    cps["taxable_private_pension_income"] = (
-        cps_pensions * p["taxable_pension_fraction"]
-    )
+    cps["taxable_private_pension_income"] = cps_pensions * p["taxable_pension_fraction"]
     cps["tax_exempt_private_pension_income"] = cps_pensions * (
         1 - p["taxable_pension_fraction"]
     )
@@ -641,18 +604,11 @@ def add_personal_income_variables(
     for source_with_taxable_fraction in ["401k", "403b", "sep"]:
         cps[f"taxable_{source_with_taxable_fraction}_distributions"] = (
             cps[f"{source_with_taxable_fraction}_distributions"]
-            * p[
-                f"taxable_{source_with_taxable_fraction}_distribution_fraction"
-            ]
+            * p[f"taxable_{source_with_taxable_fraction}_distribution_fraction"]
         )
         cps[f"tax_exempt_{source_with_taxable_fraction}_distributions"] = cps[
             f"{source_with_taxable_fraction}_distributions"
-        ] * (
-            1
-            - p[
-                f"taxable_{source_with_taxable_fraction}_distribution_fraction"
-            ]
-        )
+        ] * (1 - p[f"taxable_{source_with_taxable_fraction}_distribution_fraction"])
         del cps[f"{source_with_taxable_fraction}_distributions"]
 
     # Assume all regular IRA distributions are taxable,
@@ -740,9 +696,7 @@ def add_personal_income_variables(
     cps["traditional_ira_contributions"] = ira_capped * trad_ira_share
     cps["roth_ira_contributions"] = ira_capped * (1 - trad_ira_share)
     # Allocate capital gains into long-term and short-term based on aggregate split.
-    cps["long_term_capital_gains"] = person.CAP_VAL * (
-        p["long_term_capgain_fraction"]
-    )
+    cps["long_term_capital_gains"] = person.CAP_VAL * (p["long_term_capgain_fraction"])
     cps["short_term_capital_gains"] = person.CAP_VAL * (
         1 - p["long_term_capgain_fraction"]
     )
@@ -770,10 +724,7 @@ def add_personal_income_variables(
 
     # Get QBI simulation parameters ---
     yamlfilename = (
-        files("policyengine_us_data")
-        / "datasets"
-        / "puf"
-        / "qbi_assumptions.yaml"
+        files("policyengine_us_data") / "datasets" / "puf" / "qbi_assumptions.yaml"
     )
     with open(yamlfilename, "r", encoding="utf-8") as yamlfile:
         p = yaml.safe_load(yamlfile)
@@ -827,14 +778,10 @@ def add_spm_variables(self, cps: h5py.File, spm_unit: DataFrame) -> None:
             3: "RENTER",
         }
         cps["spm_unit_tenure_type"] = (
-            spm_unit.SPM_TENMORTSTATUS.map(tenure_map)
-            .fillna("RENTER")
-            .astype("S")
+            spm_unit.SPM_TENMORTSTATUS.map(tenure_map).fillna("RENTER").astype("S")
         )
 
-    cps["reduced_price_school_meals_reported"] = (
-        cps["free_school_meals_reported"] * 0
-    )
+    cps["reduced_price_school_meals_reported"] = cps["free_school_meals_reported"] * 0
 
 
 def add_household_variables(cps: h5py.File, household: DataFrame) -> None:
@@ -968,9 +915,7 @@ def select_random_subset_to_target(
             share_to_move = min(share_to_move, 1.0)  # Cap at 100%
         else:
             # Calculate how much to move to reach target (for EAD case)
-            needed_weighted = (
-                current_weighted - target_weighted
-            )  # Will be negative
+            needed_weighted = current_weighted - target_weighted  # Will be negative
             total_weight = np.sum(person_weights[eligible_ids])
             share_to_move = abs(needed_weighted) / total_weight
             share_to_move = min(share_to_move, 1.0)  # Cap at 100%
@@ -1214,9 +1159,7 @@ def select_random_subset_to_target(
     )
 
     # CONDITION 10: Government Employees
-    is_government_worker = np.isin(
-        person.PEIO1COW, [1, 2, 3]
-    )  # Fed/state/local gov
+    is_government_worker = np.isin(person.PEIO1COW, [1, 2, 3])  # Fed/state/local gov
     is_military_occupation = person.A_MJOCC == 11  # Military occupation
     is_government_employee = is_government_worker | is_military_occupation
     condition_10_mask = potentially_undocumented & is_government_employee
@@ -1330,12 +1273,8 @@ def select_random_subset_to_target(
     undocumented_students_mask = (
         (ssn_card_type == 0) & noncitizens & (person.A_HSCOL == 2)
     )
-    undocumented_workers_count = np.sum(
-        person_weights[undocumented_workers_mask]
-    )
-    undocumented_students_count = np.sum(
-        person_weights[undocumented_students_mask]
-    )
+    undocumented_workers_count = np.sum(person_weights[undocumented_workers_mask])
+    undocumented_students_count = np.sum(person_weights[undocumented_students_mask])
 
     after_conditions_code_0 = np.sum(person_weights[ssn_card_type == 0])
     print(f"After conditions - Code 0 people: {after_conditions_code_0:,.0f}")
@@ -1530,15 +1469,11 @@ def select_random_subset_to_target(
                     f"Selected {len(selected_indices)} people from {len(mixed_household_candidates)} candidates in mixed households"
                 )
             else:
-                print(
-                    "No additional family members selected (target already reached)"
-                )
+                print("No additional family members selected (target already reached)")
         else:
             print("No mixed-status households found for family correlation")
     else:
-        print(
-            "No additional undocumented people needed - target already reached"
-        )
+        print("No additional undocumented people needed - target already reached")
 
     # Calculate the weighted impact
     code_0_after = np.sum(person_weights[ssn_card_type == 0])
@@ -1613,9 +1548,7 @@ def get_arrival_year_midpoint(peinusyr):
     age_at_entry = np.maximum(0, person.A_AGE - years_in_us)
 
     # start every non-citizen as LPR so no UNSET survives
-    immigration_status = np.full(
-        len(person), "LEGAL_PERMANENT_RESIDENT", dtype="U32"
-    )
+    immigration_status = np.full(len(person), "LEGAL_PERMANENT_RESIDENT", dtype="U32")
 
     # Set citizens (SSN card type 1) to CITIZEN status
     immigration_status[ssn_card_type == 1] = "CITIZEN"
@@ -1663,9 +1596,7 @@ def get_arrival_year_midpoint(peinusyr):
     immigration_status[recent_refugee_mask] = "REFUGEE"
 
     # 6. Temp non-qualified (Code 2 not caught by DACA rule)
-    mask = (ssn_card_type == 2) & (
-        immigration_status == "LEGAL_PERMANENT_RESIDENT"
-    )
+    mask = (ssn_card_type == 2) & (immigration_status == "LEGAL_PERMANENT_RESIDENT")
     immigration_status[mask] = "TPS"
 
     # Final write (all values now in ImmigrationStatus Enum)
@@ -1681,9 +1612,7 @@ def get_arrival_year_midpoint(peinusyr):
         2: "NON_CITIZEN_VALID_EAD",  # Non-citizens with work/study authorization
         3: "OTHER_NON_CITIZEN",  # Non-citizens with indicators of legal status
     }
-    ssn_card_type_str = (
-        pd.Series(ssn_card_type).map(code_to_str).astype("S").values
-    )
+    ssn_card_type_str = pd.Series(ssn_card_type).map(code_to_str).astype("S").values
     cps["ssn_card_type"] = ssn_card_type_str
 
     # Final population summary
@@ -1749,25 +1678,63 @@ def _update_documentation_with_numbers(log_df, docs_dir):
 
     # Define replacements based on our logging structure
     replacements = {
-        "- **Step 0 - Initial**: Code 0 people = *[Run cps.py to populate]*": lambda: f"- **Step 0 - Initial**: Code 0 people = {data_map.get(('Step 0 - Initial', 'Code 0 people'), 0):,.0f}",
-        "- **Step 1 - Citizens**: Moved to Code 1 = *[Run cps.py to populate]*": lambda: f"- **Step 1 - Citizens**: Moved to Code 1 = {data_map.get(('Step 1 - Citizens', 'Moved to Code 1'), 0):,.0f}",
-        "- **ASEC Conditions**: Current Code 0 people = *[Run cps.py to populate]*": lambda: f"- **ASEC Conditions**: Current Code 0 people = {data_map.get(('ASEC Conditions', 'Current Code 0 people'), 0):,.0f}",
-        "- **After conditions**: Code 0 people = *[Run cps.py to populate]*": lambda: f"- **After conditions**: Code 0 people = {data_map.get(('After conditions', 'Code 0 people'), 0):,.0f}",
-        "- **Before adjustment**: Undocumented workers = *[Run cps.py to populate]*": lambda: f"- **Before adjustment**: Undocumented workers = {data_map.get(('Before adjustment', 'Undocumented workers'), 0):,.0f}",
-        "- **Target**: Undocumented workers target = *[Run cps.py to populate]*": lambda: f"- **Target**: Undocumented workers target = {data_map.get(('Target', 'Undocumented workers target'), 0):,.0f}",
-        "- **Before adjustment**: Undocumented students = *[Run cps.py to populate]*": lambda: f"- **Before adjustment**: Undocumented students = {data_map.get(('Before adjustment', 'Undocumented students'), 0):,.0f}",
-        "- **Target**: Undocumented students target = *[Run cps.py to populate]*": lambda: f"- **Target**: Undocumented students target = {data_map.get(('Target', 'Undocumented students target'), 0):,.0f}",
-        "- **Step 3 - EAD workers**: Moved from Code 0 to Code 2 = *[Run cps.py to populate]*": lambda: f"- **Step 3 - EAD workers**: Moved from Code 0 to Code 2 = {data_map.get(('Step 3 - EAD workers', 'Moved from Code 0 to Code 2'), 0):,.0f}",
-        "- **Step 4 - EAD students**: Moved from Code 0 to Code 2 = *[Run cps.py to populate]*": lambda: f"- **Step 4 - EAD students**: Moved from Code 0 to Code 2 = {data_map.get(('Step 4 - EAD students', 'Moved from Code 0 to Code 2'), 0):,.0f}",
-        "- **After EAD assignment**: Code 0 people = *[Run cps.py to populate]*": lambda: f"- **After EAD assignment**: Code 0 people = {data_map.get(('After EAD assignment', 'Code 0 people'), 0):,.0f}",
-        "- **Step 5 - Family correlation**: Changed from Code 3 to Code 0 = *[Run cps.py to populate]*": lambda: f"- **Step 5 - Family correlation**: Changed from Code 3 to Code 0 = {data_map.get(('Step 5 - Family correlation', 'Changed from Code 3 to Code 0'), 0):,.0f}",
-        "- **After family correlation**: Code 0 people = *[Run cps.py to populate]*": lambda: f"- **After family correlation**: Code 0 people = {data_map.get(('After family correlation', 'Code 0 people'), 0):,.0f}",
-        "- **Final**: Code 0 (NONE) = *[Run cps.py to populate]*": lambda: f"- **Final**: Code 0 (NONE) = {data_map.get(('Final', 'Code 0 (NONE)'), 0):,.0f}",
-        "- **Final**: Code 1 (CITIZEN) = *[Run cps.py to populate]*": lambda: f"- **Final**: Code 1 (CITIZEN) = {data_map.get(('Final', 'Code 1 (CITIZEN)'), 0):,.0f}",
-        "- **Final**: Code 2 (NON_CITIZEN_VALID_EAD) = *[Run cps.py to populate]*": lambda: f"- **Final**: Code 2 (NON_CITIZEN_VALID_EAD) = {data_map.get(('Final', 'Code 2 (NON_CITIZEN_VALID_EAD)'), 0):,.0f}",
-        "- **Final**: Code 3 (OTHER_NON_CITIZEN) = *[Run cps.py to populate]*": lambda: f"- **Final**: Code 3 (OTHER_NON_CITIZEN) = {data_map.get(('Final', 'Code 3 (OTHER_NON_CITIZEN)'), 0):,.0f}",
-        "- **Final**: Total undocumented (Code 0) = *[Run cps.py to populate]*": lambda: f"- **Final**: Total undocumented (Code 0) = {data_map.get(('Final', 'Total undocumented (Code 0)'), 0):,.0f}",
-        "- **Final**: Undocumented target = *[Run cps.py to populate]*": lambda: f"- **Final**: Undocumented target = {data_map.get(('Final', 'Undocumented target'), 0):,.0f}",
+        "- **Step 0 - Initial**: Code 0 people = *[Run cps.py to populate]*": lambda: (
+            f"- **Step 0 - Initial**: Code 0 people = {data_map.get(('Step 0 - Initial', 'Code 0 people'), 0):,.0f}"
+        ),
+        "- **Step 1 - Citizens**: Moved to Code 1 = *[Run cps.py to populate]*": lambda: (
+            f"- **Step 1 - Citizens**: Moved to Code 1 = {data_map.get(('Step 1 - Citizens', 'Moved to Code 1'), 0):,.0f}"
+        ),
+        "- **ASEC Conditions**: Current Code 0 people = *[Run cps.py to populate]*": lambda: (
+            f"- **ASEC Conditions**: Current Code 0 people = {data_map.get(('ASEC Conditions', 'Current Code 0 people'), 0):,.0f}"
+        ),
+        "- **After conditions**: Code 0 people = *[Run cps.py to populate]*": lambda: (
+            f"- **After conditions**: Code 0 people = {data_map.get(('After conditions', 'Code 0 people'), 0):,.0f}"
+        ),
+        "- **Before adjustment**: Undocumented workers = *[Run cps.py to populate]*": lambda: (
+            f"- **Before adjustment**: Undocumented workers = {data_map.get(('Before adjustment', 'Undocumented workers'), 0):,.0f}"
+        ),
+        "- **Target**: Undocumented workers target = *[Run cps.py to populate]*": lambda: (
+            f"- **Target**: Undocumented workers target = {data_map.get(('Target', 'Undocumented workers target'), 0):,.0f}"
+        ),
+        "- **Before adjustment**: Undocumented students = *[Run cps.py to populate]*": lambda: (
+            f"- **Before adjustment**: Undocumented students = {data_map.get(('Before adjustment', 'Undocumented students'), 0):,.0f}"
+        ),
+        "- **Target**: Undocumented students target = *[Run cps.py to populate]*": lambda: (
+            f"- **Target**: Undocumented students target = {data_map.get(('Target', 'Undocumented students target'), 0):,.0f}"
+        ),
+        "- **Step 3 - EAD workers**: Moved from Code 0 to Code 2 = *[Run cps.py to populate]*": lambda: (
+            f"- **Step 3 - EAD workers**: Moved from Code 0 to Code 2 = {data_map.get(('Step 3 - EAD workers', 'Moved from Code 0 to Code 2'), 0):,.0f}"
+        ),
+        "- **Step 4 - EAD students**: Moved from Code 0 to Code 2 = *[Run cps.py to populate]*": lambda: (
+            f"- **Step 4 - EAD students**: Moved from Code 0 to Code 2 = {data_map.get(('Step 4 - EAD students', 'Moved from Code 0 to Code 2'), 0):,.0f}"
+        ),
+        "- **After EAD assignment**: Code 0 people = *[Run cps.py to populate]*": lambda: (
+            f"- **After EAD assignment**: Code 0 people = {data_map.get(('After EAD assignment', 'Code 0 people'), 0):,.0f}"
+        ),
+        "- **Step 5 - Family correlation**: Changed from Code 3 to Code 0 = *[Run cps.py to populate]*": lambda: (
+            f"- **Step 5 - Family correlation**: Changed from Code 3 to Code 0 = {data_map.get(('Step 5 - Family correlation', 'Changed from Code 3 to Code 0'), 0):,.0f}"
+        ),
+        "- **After family correlation**: Code 0 people = *[Run cps.py to populate]*": lambda: (
+            f"- **After family correlation**: Code 0 people = {data_map.get(('After family correlation', 'Code 0 people'), 0):,.0f}"
+        ),
+        "- **Final**: Code 0 (NONE) = *[Run cps.py to populate]*": lambda: (
+            f"- **Final**: Code 0 (NONE) = {data_map.get(('Final', 'Code 0 (NONE)'), 0):,.0f}"
+        ),
+        "- **Final**: Code 1 (CITIZEN) = *[Run cps.py to populate]*": lambda: (
+            f"- **Final**: Code 1 (CITIZEN) = {data_map.get(('Final', 'Code 1 (CITIZEN)'), 0):,.0f}"
+        ),
+        "- **Final**: Code 2 (NON_CITIZEN_VALID_EAD) = *[Run cps.py to populate]*": lambda: (
+            f"- **Final**: Code 2 (NON_CITIZEN_VALID_EAD) = {data_map.get(('Final', 'Code 2 (NON_CITIZEN_VALID_EAD)'), 0):,.0f}"
+        ),
+        "- **Final**: Code 3 (OTHER_NON_CITIZEN) = *[Run cps.py to populate]*": lambda: (
+            f"- **Final**: Code 3 (OTHER_NON_CITIZEN) = {data_map.get(('Final', 'Code 3 (OTHER_NON_CITIZEN)'), 0):,.0f}"
+        ),
+        "- **Final**: Total undocumented (Code 0) = *[Run cps.py to populate]*": lambda: (
+            f"- **Final**: Total undocumented (Code 0) = {data_map.get(('Final', 'Total undocumented (Code 0)'), 0):,.0f}"
+        ),
+        "- **Final**: Undocumented target = *[Run cps.py to populate]*": lambda: (
+            f"- **Final**: Undocumented target = {data_map.get(('Final', 'Undocumented target'), 0):,.0f}"
+        ),
     }
 
     # Apply replacements
@@ -1852,9 +1819,7 @@ def add_tips(self, cps: h5py.File):
     # Drop temporary columns used only for imputation
     # is_married is person-level here but policyengine-us defines it at Family
     # level, so we must not save it
-    cps = cps.drop(
-        columns=["is_married", "is_under_18", "is_under_6"], errors="ignore"
-    )
+    cps = cps.drop(columns=["is_married", "is_under_18", "is_under_6"], errors="ignore")
 
     self.save_dataset(cps)
 
@@ -1974,9 +1939,7 @@ def create_scf_reference_person_mask(cps_data, raw_person_data):
         all_persons_data["is_female"] = (raw_person_data.A_SEX == 2).values
 
         # Add marital status (A_MARITL codes: 1,2 = married with spouse present/absent)
-        all_persons_data["is_married"] = raw_person_data.A_MARITL.isin(
-            [1, 2]
-        ).values
+        all_persons_data["is_married"] = raw_person_data.A_MARITL.isin([1, 2]).values
 
         # Define adults as age 18+
         all_persons_data["is_adult"] = all_persons_data["age"] >= 18
@@ -1995,8 +1958,7 @@ def create_scf_reference_person_mask(cps_data, raw_person_data):
         # Identify couple households (households with exactly 2 married adults)
         married_adults_per_household = (
             all_persons_data[
-                (all_persons_data["is_adult"])
-                & (all_persons_data["is_married"])
+                (all_persons_data["is_adult"]) & (all_persons_data["is_married"])
             ]
             .groupby("person_household_id")
             .size()
@@ -2004,12 +1966,7 @@ def create_scf_reference_person_mask(cps_data, raw_person_data):
 
         couple_households = married_adults_per_household[
             (married_adults_per_household == 2)
-            & (
-                all_persons_data.groupby("person_household_id")[
-                    "n_adults"
-                ].first()
-                == 2
-            )
+            & (all_persons_data.groupby("person_household_id")["n_adults"].first() == 2)
         ].index
 
         all_persons_data["is_couple_household"] = all_persons_data[
@@ -2109,9 +2066,7 @@ def determine_reference_person(group):
     }
 
     # Apply the mapping to recode the race values
-    cps_data["cps_race"] = np.vectorize(CPS_RACE_MAPPING.get)(
-        cps_data["cps_race"]
-    )
+    cps_data["cps_race"] = np.vectorize(CPS_RACE_MAPPING.get)(cps_data["cps_race"])
 
     lengths = {k: len(v) for k, v in cps_data.items()}
     var_len = cps_data["person_household_id"].shape[0]
@@ -2143,9 +2098,7 @@ def determine_reference_person(group):
 
     # Add is_married variable for household heads based on raw person data
     reference_persons = person_data[mask]
-    receiver_data["is_married"] = reference_persons.A_MARITL.isin(
-        [1, 2]
-    ).values
+    receiver_data["is_married"] = reference_persons.A_MARITL.isin([1, 2]).values
 
     # Impute auto loan balance from the SCF
     from policyengine_us_data.datasets.scf.scf import SCF_2022
@@ -2180,9 +2133,7 @@ def determine_reference_person(group):
     logging.getLogger("microimpute").setLevel(getattr(logging, log_level))
 
     qrf_model = QRF()
-    donor_data = donor_data.sample(frac=0.5, random_state=42).reset_index(
-        drop=True
-    )
+    donor_data = donor_data.sample(frac=0.5, random_state=42).reset_index(drop=True)
     fitted_model = qrf_model.fit(
         X_train=donor_data,
         predictors=PREDICTORS,
diff --git a/policyengine_us_data/datasets/cps/enhanced_cps.py b/policyengine_us_data/datasets/cps/enhanced_cps.py
index 3bf5515b3..578756203 100644
--- a/policyengine_us_data/datasets/cps/enhanced_cps.py
+++ b/policyengine_us_data/datasets/cps/enhanced_cps.py
@@ -44,9 +44,7 @@ def reweight(
     normalisation_factor = np.where(
         is_national, nation_normalisation_factor, state_normalisation_factor
     )
-    normalisation_factor = torch.tensor(
-        normalisation_factor, dtype=torch.float32
-    )
+    normalisation_factor = torch.tensor(normalisation_factor, dtype=torch.float32)
     targets_array = torch.tensor(targets_array, dtype=torch.float32)
 
     inv_mean_normalisation = 1 / np.mean(normalisation_factor.numpy())
@@ -59,12 +57,8 @@ def loss(weights):
         estimate = weights @ loss_matrix
         if torch.isnan(estimate).any():
             raise ValueError("Estimate contains NaNs")
-        rel_error = (
-            ((estimate - targets_array) + 1) / (targets_array + 1)
-        ) ** 2
-        rel_error_normalized = (
-            inv_mean_normalisation * rel_error * normalisation_factor
-        )
+        rel_error = (((estimate - targets_array) + 1) / (targets_array + 1)) ** 2
+        rel_error_normalized = inv_mean_normalisation * rel_error * normalisation_factor
         if torch.isnan(rel_error_normalized).any():
             raise ValueError("Relative error contains NaNs")
         return rel_error_normalized.mean()
@@ -119,9 +113,7 @@ def loss(weights):
             start_loss = l.item()
         loss_rel_change = (l.item() - start_loss) / start_loss
         l.backward()
-        iterator.set_postfix(
-            {"loss": l.item(), "loss_rel_change": loss_rel_change}
-        )
+        iterator.set_postfix({"loss": l.item(), "loss_rel_change": loss_rel_change})
         optimizer.step()
         if log_path is not None:
             performance.to_csv(log_path, index=False)
@@ -180,9 +172,7 @@ def generate(self):
 
         # Run the optimization procedure to get (close to) minimum loss weights
         for year in range(self.start_year, self.end_year + 1):
-            loss_matrix, targets_array = build_loss_matrix(
-                self.input_dataset, year
-            )
+            loss_matrix, targets_array = build_loss_matrix(self.input_dataset, year)
             zero_mask = np.isclose(targets_array, 0.0, atol=0.1)
             bad_mask = loss_matrix.columns.isin(bad_targets)
             keep_mask_bool = ~(zero_mask | bad_mask)
@@ -204,9 +194,7 @@ def generate(self):
             # Validate dense weights
             w = optimised_weights
             if np.any(np.isnan(w)):
-                raise ValueError(
-                    f"Year {year}: household_weight contains NaN values"
-                )
+                raise ValueError(f"Year {year}: household_weight contains NaN values")
             if np.any(w < 0):
                 raise ValueError(
                     f"Year {year}: household_weight contains negative values"
@@ -247,12 +235,8 @@ def generate(self):
             1, 0.1, len(original_weights)
         )
         for year in [2024]:
-            loss_matrix, targets_array = build_loss_matrix(
-                self.input_dataset, year
-            )
-            optimised_weights = reweight(
-                original_weights, loss_matrix, targets_array
-            )
+            loss_matrix, targets_array = build_loss_matrix(self.input_dataset, year)
+            optimised_weights = reweight(original_weights, loss_matrix, targets_array)
             data["household_weight"] = optimised_weights
 
         self.save_dataset(data)
diff --git a/policyengine_us_data/datasets/cps/local_area_calibration/block_assignment.py b/policyengine_us_data/datasets/cps/local_area_calibration/block_assignment.py
index 73b435f69..f479435b5 100644
--- a/policyengine_us_data/datasets/cps/local_area_calibration/block_assignment.py
+++ b/policyengine_us_data/datasets/cps/local_area_calibration/block_assignment.py
@@ -127,7 +127,9 @@ def _load_cbsa_crosswalk() -> Dict[str, str]:
     Returns:
         Dict mapping 5-digit county FIPS to CBSA code (or None if not in CBSA)
     """
-    url = "https://data.nber.org/cbsa-csa-fips-county-crosswalk/2023/cbsa2fipsxw_2023.csv"
+    url = (
+        "https://data.nber.org/cbsa-csa-fips-county-crosswalk/2023/cbsa2fipsxw_2023.csv"
+    )
     try:
         df = pd.read_csv(url, dtype=str)
         # Build 5-digit county FIPS from state + county codes
@@ -171,8 +173,7 @@ def _load_block_crosswalk() -> pd.DataFrame:
 
     if not csv_path.exists():
         print(
-            f"Warning: {csv_path} not found. "
-            "Run make_block_crosswalk.py to generate."
+            f"Warning: {csv_path} not found. Run make_block_crosswalk.py to generate."
         )
         return pd.DataFrame()
 
@@ -260,14 +261,10 @@ def get_all_geography_from_block(block_geoid: str) -> Dict[str, Optional[str]]:
         result = {
             "sldu": row["sldu"] if pd.notna(row["sldu"]) else None,
             "sldl": row["sldl"] if pd.notna(row["sldl"]) else None,
-            "place_fips": (
-                row["place_fips"] if pd.notna(row["place_fips"]) else None
-            ),
+            "place_fips": (row["place_fips"] if pd.notna(row["place_fips"]) else None),
             "vtd": row["vtd"] if pd.notna(row["vtd"]) else None,
             "puma": row["puma"] if pd.notna(row["puma"]) else None,
-            "zcta": (
-                row["zcta"] if has_zcta and pd.notna(row["zcta"]) else None
-            ),
+            "zcta": (row["zcta"] if has_zcta and pd.notna(row["zcta"]) else None),
         }
         return result
     return {
@@ -436,17 +433,11 @@ def assign_geography_for_cd(
         - county_index: int32 indices into County enum (for backwards compat)
     """
     # Assign blocks first
-    block_geoids = assign_blocks_for_cd(
-        cd_geoid, n_households, seed, distributions
-    )
+    block_geoids = assign_blocks_for_cd(cd_geoid, n_households, seed, distributions)
 
     # Derive geography directly from block GEOID structure
-    county_fips = np.array(
-        [get_county_fips_from_block(b) for b in block_geoids]
-    )
-    tract_geoids = np.array(
-        [get_tract_geoid_from_block(b) for b in block_geoids]
-    )
+    county_fips = np.array([get_county_fips_from_block(b) for b in block_geoids])
+    tract_geoids = np.array([get_tract_geoid_from_block(b) for b in block_geoids])
     state_fips = np.array([get_state_fips_from_block(b) for b in block_geoids])
 
     # CBSA lookup via county (may be None for rural areas)
diff --git a/policyengine_us_data/datasets/cps/local_area_calibration/calibration_utils.py b/policyengine_us_data/datasets/cps/local_area_calibration/calibration_utils.py
index 97c82360d..3db2477bd 100644
--- a/policyengine_us_data/datasets/cps/local_area_calibration/calibration_utils.py
+++ b/policyengine_us_data/datasets/cps/local_area_calibration/calibration_utils.py
@@ -351,9 +351,7 @@ def create_target_groups(
 
         for domain_var, var_name in pairs:
             var_mask = (
-                (targets_df["variable"] == var_name)
-                & level_mask
-                & ~processed_mask
+                (targets_df["variable"] == var_name) & level_mask & ~processed_mask
             )
             if has_domain and domain_var is not None:
                 var_mask &= targets_df["domain_variable"] == domain_var
@@ -379,15 +377,11 @@ def create_target_groups(
             # Format output based on level and count
             if n_targets == 1:
                 value = matching["value"].iloc[0]
-                info_str = (
-                    f"{level_name} {label} (1 target, value={value:,.0f})"
-                )
+                info_str = f"{level_name} {label} (1 target, value={value:,.0f})"
                 print_str = f"  Group {group_id}: {label} = {value:,.0f}"
             else:
                 info_str = f"{level_name} {label} ({n_targets} targets)"
-                print_str = (
-                    f"  Group {group_id}: {label} ({n_targets} targets)"
-                )
+                print_str = f"  Group {group_id}: {label} ({n_targets} targets)"
 
             group_info.append(f"Group {group_id}: {info_str}")
             print(print_str)
@@ -440,9 +434,7 @@ def drop_target_groups(
                 drop_ids.add(gid)
                 matched = True
         if not matched:
-            print(
-                f"  WARNING: no match for " f"({label_substr!r}, {geo_name!r})"
-            )
+            print(f"  WARNING: no match for ({label_substr!r}, {geo_name!r})")
 
     keep_mask = ~np.isin(target_groups, list(drop_ids))
 
@@ -600,9 +592,7 @@ def calculate_spm_thresholds_for_cd(
         .reset_index()
     )
 
-    tenure_types = sim.calculate(
-        "spm_unit_tenure_type", map_to="spm_unit"
-    ).values
+    tenure_types = sim.calculate("spm_unit_tenure_type", map_to="spm_unit").values
     spm_unit_ids_unit = sim.calculate("spm_unit_id", map_to="spm_unit").values
 
     tenure_df = pd.DataFrame(
@@ -614,10 +604,7 @@ def calculate_spm_thresholds_for_cd(
 
     merged = agg.merge(tenure_df, on="spm_unit_id", how="left")
     merged["tenure_code"] = (
-        merged["tenure_type"]
-        .map(SPM_TENURE_STRING_TO_CODE)
-        .fillna(3)
-        .astype(int)
+        merged["tenure_type"].map(SPM_TENURE_STRING_TO_CODE).fillna(3).astype(int)
     )
 
     calc = SPMCalculator(year=year)
@@ -627,9 +614,7 @@ def calculate_spm_thresholds_for_cd(
     thresholds = np.zeros(n, dtype=np.float32)
 
     for i in range(n):
-        tenure_str = TENURE_CODE_MAP.get(
-            int(merged.iloc[i]["tenure_code"]), "renter"
-        )
+        tenure_str = TENURE_CODE_MAP.get(int(merged.iloc[i]["tenure_code"]), "renter")
         base = base_thresholds[tenure_str]
         equiv_scale = spm_equivalence_scale(
             int(merged.iloc[i]["num_adults"]),
diff --git a/policyengine_us_data/datasets/cps/local_area_calibration/county_assignment.py b/policyengine_us_data/datasets/cps/local_area_calibration/county_assignment.py
index 780bc4c77..54aaaf07f 100644
--- a/policyengine_us_data/datasets/cps/local_area_calibration/county_assignment.py
+++ b/policyengine_us_data/datasets/cps/local_area_calibration/county_assignment.py
@@ -150,9 +150,7 @@ def get_county_filter_probability(
     else:
         dist = _generate_uniform_distribution(cd_key)
 
-    return sum(
-        prob for county, prob in dist.items() if county in county_filter
-    )
+    return sum(prob for county, prob in dist.items() if county in county_filter)
 
 
 def get_filtered_county_distribution(
diff --git a/policyengine_us_data/datasets/cps/local_area_calibration/create_stratified_cps.py b/policyengine_us_data/datasets/cps/local_area_calibration/create_stratified_cps.py
index 54d9a959f..2aa15a9f3 100644
--- a/policyengine_us_data/datasets/cps/local_area_calibration/create_stratified_cps.py
+++ b/policyengine_us_data/datasets/cps/local_area_calibration/create_stratified_cps.py
@@ -57,7 +57,7 @@ def create_stratified_cps_dataset(
 
     print(f"Original dataset: {n_households_orig:,} households")
     print(f"Target dataset: {target_households:,} households")
-    print(f"Reduction ratio: {target_households/n_households_orig:.1%}")
+    print(f"Reduction ratio: {target_households / n_households_orig:.1%}")
 
     # Show income distribution
     print("\nAGI Percentiles (original):")
@@ -79,16 +79,14 @@ def create_stratified_cps_dataset(
         f"  Top {100 - high_income_percentile}% (AGI >= ${high_income_threshold:,.0f}): {n_top:,}"
     )
     print(f"  Middle 25-{high_income_percentile}%: {n_middle:,}")
-    print(
-        f"  Bottom 25% (AGI < ${bottom_25_pct_threshold:,.0f}): {n_bottom_25:,}"
-    )
+    print(f"  Bottom 25% (AGI < ${bottom_25_pct_threshold:,.0f}): {n_bottom_25:,}")
 
     # Calculate sampling rates
     # Keep ALL top earners, distribute remaining quota between middle and bottom
     remaining_quota = target_households - n_top
     if remaining_quota <= 0:
         raise ValueError(
-            f"Target ({target_households:,}) is less than top {100-high_income_percentile}% "
+            f"Target ({target_households:,}) is less than top {100 - high_income_percentile}% "
             f"count ({n_top:,}). Increase target_households."
         )
 
@@ -132,9 +130,7 @@ def create_stratified_cps_dataset(
     # Top earners - keep all
     top_mask = agi >= high_income_threshold
     selected_mask[top_mask] = True
-    print(
-        f"  Top {100 - high_income_percentile}%: selected {np.sum(top_mask):,}"
-    )
+    print(f"  Top {100 - high_income_percentile}%: selected {np.sum(top_mask):,}")
 
     # Bottom 25%
     bottom_mask = agi < bottom_25_pct_threshold
@@ -176,7 +172,7 @@ def create_stratified_cps_dataset(
 
     n_selected = np.sum(selected_mask)
     print(
-        f"\nTotal selected: {n_selected:,} households ({n_selected/n_households_orig:.1%} of original)"
+        f"\nTotal selected: {n_selected:,} households ({n_selected / n_households_orig:.1%} of original)"
     )
 
     # Verify high earners are preserved
@@ -271,10 +267,7 @@ def create_stratified_cps_dataset(
         if "person_id" in f and str(time_period) in f["person_id"]:
             person_ids = f["person_id"][str(time_period)][:]
             print(f"  Final persons: {len(person_ids):,}")
-        if (
-            "household_weight" in f
-            and str(time_period) in f["household_weight"]
-        ):
+        if "household_weight" in f and str(time_period) in f["household_weight"]:
             weights = f["household_weight"][str(time_period)][:]
             print(f"  Final household weights sum: {np.sum(weights):,.0f}")
 
@@ -342,7 +335,5 @@ def create_stratified_cps_dataset(
     )
     print("\nExamples:")
     print("  python create_stratified_cps.py 30000")
-    print(
-        "  python create_stratified_cps.py 50000 --top=99.5 --oversample-poor"
-    )
+    print("  python create_stratified_cps.py 50000 --top=99.5 --oversample-poor")
     print("  python create_stratified_cps.py 30000 --seed=123  # reproducible")
diff --git a/policyengine_us_data/datasets/cps/local_area_calibration/publish_local_area.py b/policyengine_us_data/datasets/cps/local_area_calibration/publish_local_area.py
index 4963f3979..e473e3653 100644
--- a/policyengine_us_data/datasets/cps/local_area_calibration/publish_local_area.py
+++ b/policyengine_us_data/datasets/cps/local_area_calibration/publish_local_area.py
@@ -113,9 +113,9 @@ def build_state_h5(
     states_dir.mkdir(parents=True, exist_ok=True)
     output_path = states_dir / f"{state_code}.h5"
 
-    print(f"\n{'='*60}")
+    print(f"\n{'=' * 60}")
     print(f"Building {state_code} ({len(cd_subset)} CDs)")
-    print(f"{'='*60}")
+    print(f"{'=' * 60}")
 
     create_sparse_cd_stacked_dataset(
         weights,
@@ -158,9 +158,9 @@ def build_district_h5(
     districts_dir.mkdir(parents=True, exist_ok=True)
     output_path = districts_dir / f"{friendly_name}.h5"
 
-    print(f"\n{'='*60}")
+    print(f"\n{'=' * 60}")
     print(f"Building {friendly_name}")
-    print(f"{'='*60}")
+    print(f"{'=' * 60}")
 
     create_sparse_cd_stacked_dataset(
         weights,
@@ -208,9 +208,9 @@ def build_city_h5(
     cities_dir.mkdir(parents=True, exist_ok=True)
     output_path = cities_dir / "NYC.h5"
 
-    print(f"\n{'='*60}")
+    print(f"\n{'=' * 60}")
     print(f"Building NYC ({len(cd_subset)} CDs)")
-    print(f"{'='*60}")
+    print(f"{'=' * 60}")
 
     create_sparse_cd_stacked_dataset(
         weights,
@@ -256,17 +256,15 @@ def build_and_upload_states(
             print(f"Skipping {state_code} (already completed)")
             continue
 
-        cd_subset = [
-            cd for cd in cds_to_calibrate if int(cd) // 100 == state_fips
-        ]
+        cd_subset = [cd for cd in cds_to_calibrate if int(cd) // 100 == state_fips]
         if not cd_subset:
             print(f"No CDs found for {state_code}, skipping")
             continue
 
         output_path = states_dir / f"{state_code}.h5"
-        print(f"\n{'='*60}")
+        print(f"\n{'=' * 60}")
         print(f"Building {state_code} ({len(cd_subset)} CDs)")
-        print(f"{'='*60}")
+        print(f"{'=' * 60}")
 
         try:
             create_sparse_cd_stacked_dataset(
@@ -288,9 +286,7 @@ def build_and_upload_states(
 
             # Flush HF queue every batch_size files
             if len(hf_queue) >= hf_batch_size:
-                print(
-                    f"\nUploading batch of {len(hf_queue)} files to HuggingFace..."
-                )
+                print(f"\nUploading batch of {len(hf_queue)} files to HuggingFace...")
                 upload_local_area_batch_to_hf(hf_queue)
                 hf_queue = []
 
@@ -300,9 +296,7 @@ def build_and_upload_states(
 
     # Flush remaining files to HuggingFace
     if hf_queue:
-        print(
-            f"\nUploading final batch of {len(hf_queue)} files to HuggingFace..."
-        )
+        print(f"\nUploading final batch of {len(hf_queue)} files to HuggingFace...")
         upload_local_area_batch_to_hf(hf_queue)
 
 
@@ -336,9 +330,9 @@ def build_and_upload_districts(
             continue
 
         output_path = districts_dir / f"{friendly_name}.h5"
-        print(f"\n{'='*60}")
-        print(f"[{i+1}/{len(cds_to_calibrate)}] Building {friendly_name}")
-        print(f"{'='*60}")
+        print(f"\n{'=' * 60}")
+        print(f"[{i + 1}/{len(cds_to_calibrate)}] Building {friendly_name}")
+        print(f"{'=' * 60}")
 
         try:
             create_sparse_cd_stacked_dataset(
@@ -360,9 +354,7 @@ def build_and_upload_districts(
 
             # Flush HF queue every batch_size files
             if len(hf_queue) >= hf_batch_size:
-                print(
-                    f"\nUploading batch of {len(hf_queue)} files to HuggingFace..."
-                )
+                print(f"\nUploading batch of {len(hf_queue)} files to HuggingFace...")
                 upload_local_area_batch_to_hf(hf_queue)
                 hf_queue = []
 
@@ -372,9 +364,7 @@ def build_and_upload_districts(
 
     # Flush remaining files to HuggingFace
     if hf_queue:
-        print(
-            f"\nUploading final batch of {len(hf_queue)} files to HuggingFace..."
-        )
+        print(f"\nUploading final batch of {len(hf_queue)} files to HuggingFace...")
         upload_local_area_batch_to_hf(hf_queue)
 
 
@@ -405,9 +395,9 @@ def build_and_upload_cities(
             print("No NYC-related CDs found, skipping")
         else:
             output_path = cities_dir / "NYC.h5"
-            print(f"\n{'='*60}")
+            print(f"\n{'=' * 60}")
             print(f"Building NYC ({len(cd_subset)} CDs)")
-            print(f"{'='*60}")
+            print(f"{'=' * 60}")
 
             try:
                 create_sparse_cd_stacked_dataset(
@@ -420,9 +410,7 @@ def build_and_upload_cities(
                 )
 
                 print("Uploading NYC.h5 to GCP...")
-                upload_local_area_file(
-                    str(output_path), "cities", skip_hf=True
-                )
+                upload_local_area_file(str(output_path), "cities", skip_hf=True)
 
                 # Queue for batched HuggingFace upload
                 hf_queue.append((str(output_path), "cities"))
@@ -436,9 +424,7 @@ def build_and_upload_cities(
 
     # Flush remaining files to HuggingFace
     if hf_queue:
-        print(
-            f"\nUploading batch of {len(hf_queue)} city files to HuggingFace..."
-        )
+        print(f"\nUploading batch of {len(hf_queue)} city files to HuggingFace...")
         upload_local_area_batch_to_hf(hf_queue)
 
 
diff --git a/policyengine_us_data/datasets/cps/local_area_calibration/stacked_dataset_builder.py b/policyengine_us_data/datasets/cps/local_area_calibration/stacked_dataset_builder.py
index 010e151f3..6991b8d98 100644
--- a/policyengine_us_data/datasets/cps/local_area_calibration/stacked_dataset_builder.py
+++ b/policyengine_us_data/datasets/cps/local_area_calibration/stacked_dataset_builder.py
@@ -107,9 +107,7 @@ def create_sparse_cd_stacked_dataset(
         # Process all CDs
         cd_indices = list(range(len(cds_to_calibrate)))
         cds_to_process = cds_to_calibrate
-        print(
-            f"Processing all {len(cds_to_calibrate)} congressional districts"
-        )
+        print(f"Processing all {len(cds_to_calibrate)} congressional districts")
 
     # Generate output path if not provided
     if output_path is None:
@@ -125,9 +123,7 @@ def create_sparse_cd_stacked_dataset(
     # Load the original simulation
     base_sim = Microsimulation(dataset=dataset_path)
 
-    household_ids = base_sim.calculate(
-        "household_id", map_to="household"
-    ).values
+    household_ids = base_sim.calculate("household_id", map_to="household").values
     n_households_orig = len(household_ids)
 
     # From the base sim, create mapping from household ID to index for proper filtering
@@ -155,9 +151,7 @@ def create_sparse_cd_stacked_dataset(
     # Extract only the CDs we want to process
     if cd_subset is not None:
         W = W_full[cd_indices, :]
-        print(
-            f"Extracted weights for {len(cd_indices)} CDs from full weight matrix"
-        )
+        print(f"Extracted weights for {len(cd_indices)} CDs from full weight matrix")
     else:
         W = W_full
 
@@ -177,9 +171,7 @@ def create_sparse_cd_stacked_dataset(
     for idx, cd_geoid in enumerate(cds_to_process):
         # Progress every 10 CDs and at the end ----
         if (idx + 1) % 10 == 0 or (idx + 1) == len(cds_to_process):
-            print(
-                f"Processing CD {cd_geoid} ({idx + 1}/{len(cds_to_process)})..."
-            )
+            print(f"Processing CD {cd_geoid} ({idx + 1}/{len(cds_to_process)})...")
 
         # Get the correct index in the weight matrix
         cd_idx = idx  # Index in our filtered W matrix
@@ -231,21 +223,13 @@ def create_sparse_cd_stacked_dataset(
 
         entity_rel = pd.DataFrame(
             {
-                "person_id": cd_sim.calculate(
-                    "person_id", map_to="person"
-                ).values,
+                "person_id": cd_sim.calculate("person_id", map_to="person").values,
                 "household_id": cd_sim.calculate(
                     "household_id", map_to="person"
                 ).values,
-                "tax_unit_id": cd_sim.calculate(
-                    "tax_unit_id", map_to="person"
-                ).values,
-                "spm_unit_id": cd_sim.calculate(
-                    "spm_unit_id", map_to="person"
-                ).values,
-                "family_id": cd_sim.calculate(
-                    "family_id", map_to="person"
-                ).values,
+                "tax_unit_id": cd_sim.calculate("tax_unit_id", map_to="person").values,
+                "spm_unit_id": cd_sim.calculate("spm_unit_id", map_to="person").values,
+                "family_id": cd_sim.calculate("family_id", map_to="person").values,
                 "marital_unit_id": cd_sim.calculate(
                     "marital_unit_id", map_to="person"
                 ).values,
@@ -264,9 +248,7 @@ def create_sparse_cd_stacked_dataset(
             .reset_index(name="persons_per_hh")
         )
         hh_df = hh_df.merge(counts)
-        hh_df["per_person_hh_weight"] = (
-            hh_df.household_weight / hh_df.persons_per_hh
-        )
+        hh_df["per_person_hh_weight"] = hh_df.household_weight / hh_df.persons_per_hh
 
         # SET WEIGHTS IN SIMULATION BEFORE EXTRACTING DATAFRAME
         # This is the key - set_input updates the simulation's internal state
@@ -300,12 +282,8 @@ def create_sparse_cd_stacked_dataset(
                 )
             new_weights_per_id[col] = hh_info2.id_weight
 
-        cd_sim.set_input(
-            "household_weight", time_period, hh_df.household_weight.values
-        )
-        cd_sim.set_input(
-            "person_weight", time_period, new_weights_per_id["person_id"]
-        )
+        cd_sim.set_input("household_weight", time_period, hh_df.household_weight.values)
+        cd_sim.set_input("person_weight", time_period, new_weights_per_id["person_id"])
         cd_sim.set_input(
             "tax_unit_weight", time_period, new_weights_per_id["tax_unit_id"]
         )
@@ -317,9 +295,7 @@ def create_sparse_cd_stacked_dataset(
             time_period,
             new_weights_per_id["marital_unit_id"],
         )
-        cd_sim.set_input(
-            "family_weight", time_period, new_weights_per_id["family_id"]
-        )
+        cd_sim.set_input("family_weight", time_period, new_weights_per_id["family_id"])
 
         # Extract state from CD GEOID and update simulation BEFORE calling to_input_dataframe()
         # This ensures calculated variables (SNAP, Medicaid) use the correct state
@@ -340,9 +316,7 @@ def create_sparse_cd_stacked_dataset(
         # Assign all geography using census block assignment
         # For city datasets: use only blocks in target counties
         if county_filter is not None:
-            filtered_dist = get_filtered_block_distribution(
-                cd_geoid, county_filter
-            )
+            filtered_dist = get_filtered_block_distribution(cd_geoid, county_filter)
             if not filtered_dist:
                 # Should not happen if we already checked p_target > 0
                 continue
@@ -380,9 +354,7 @@ def create_sparse_cd_stacked_dataset(
         new_spm_thresholds = calculate_spm_thresholds_for_cd(
             cd_sim, time_period, geoadj, year=time_period
         )
-        cd_sim.set_input(
-            "spm_unit_spm_threshold", time_period, new_spm_thresholds
-        )
+        cd_sim.set_input("spm_unit_spm_threshold", time_period, new_spm_thresholds)
 
         # Delete cached calculated variables to ensure they're recalculated
         # with new state and county. Exclude 'county' itself since we just set it.
@@ -460,9 +432,7 @@ def create_sparse_cd_stacked_dataset(
 
     # Group by household ID AND congressional district to create unique household-CD pairs
     hh_groups = (
-        combined_df.groupby([hh_id_col, cd_geoid_col])["_row_idx"]
-        .apply(list)
-        .to_dict()
+        combined_df.groupby([hh_id_col, cd_geoid_col])["_row_idx"].apply(list).to_dict()
     )
 
     # Assign new household IDs using 25k ranges per CD
@@ -484,9 +454,7 @@ def create_sparse_cd_stacked_dataset(
 
         # Check we haven't exceeded the range
         if new_hh_id > end_id:
-            raise ValueError(
-                f"CD {cd_str} exceeded its 25k household allocation"
-            )
+            raise ValueError(f"CD {cd_str} exceeded its 25k household allocation")
 
         # All rows in the same household-CD pair get the SAME new ID
         for row_idx in row_indices:
@@ -546,9 +514,7 @@ def create_sparse_cd_stacked_dataset(
             )
 
         # Create sequential IDs for this CD
-        new_person_ids = np.arange(
-            start_id, start_id + n_persons_in_cd, dtype=np.int32
-        )
+        new_person_ids = np.arange(start_id, start_id + n_persons_in_cd, dtype=np.int32)
 
         # Assign all at once using loc
         combined_df.loc[cd_mask, person_id_col] = new_person_ids
@@ -566,9 +532,7 @@ def create_sparse_cd_stacked_dataset(
     for entity_name, person_col, entity_col in entity_configs:
         print(f"  Reindexing {entity_name}...")
         # Group by (household_id, original_entity_id) and assign unique group numbers
-        new_ids = combined_df.groupby(
-            [hh_id_col, person_col], sort=False
-        ).ngroup()
+        new_ids = combined_df.groupby([hh_id_col, person_col], sort=False).ngroup()
         combined_df[person_col] = new_ids
         if entity_col in combined_df.columns:
             combined_df[entity_col] = new_ids
@@ -581,17 +545,13 @@ def create_sparse_cd_stacked_dataset(
     print(f"  Final households: {total_households:,}")
     print(f"  Final tax units: {combined_df[person_tax_unit_col].nunique():,}")
     print(f"  Final SPM units: {combined_df[person_spm_unit_col].nunique():,}")
-    print(
-        f"  Final marital units: {combined_df[person_marital_unit_col].nunique():,}"
-    )
+    print(f"  Final marital units: {combined_df[person_marital_unit_col].nunique():,}")
     print(f"  Final families: {combined_df[person_family_col].nunique():,}")
 
     # Check weights in combined_df AFTER reindexing
     print(f"\nWeights in combined_df AFTER reindexing:")
-    print(f"  HH weight sum: {combined_df[hh_weight_col].sum()/1e6:.2f}M")
-    print(
-        f"  Person weight sum: {combined_df[person_weight_col].sum()/1e6:.2f}M"
-    )
+    print(f"  HH weight sum: {combined_df[hh_weight_col].sum() / 1e6:.2f}M")
+    print(f"  Person weight sum: {combined_df[person_weight_col].sum() / 1e6:.2f}M")
     print(
         f"  Ratio: {combined_df[person_weight_col].sum() / combined_df[hh_weight_col].sum():.2f}"
     )
@@ -662,9 +622,7 @@ def create_sparse_cd_stacked_dataset(
 
             # Handle different value types
             if (
-                sparse_sim.tax_benefit_system.variables.get(
-                    variable
-                ).value_type
+                sparse_sim.tax_benefit_system.variables.get(variable).value_type
                 in (Enum, str)
                 and variable != "county_fips"
             ):
@@ -701,9 +659,7 @@ def create_sparse_cd_stacked_dataset(
     # Save household mapping to CSV in a mappings subdirectory
     mapping_df = pd.DataFrame(household_mapping)
     output_dir = os.path.dirname(output_path)
-    mappings_dir = (
-        os.path.join(output_dir, "mappings") if output_dir else "mappings"
-    )
+    mappings_dir = os.path.join(output_dir, "mappings") if output_dir else "mappings"
     os.makedirs(mappings_dir, exist_ok=True)
     csv_filename = os.path.basename(output_path).replace(
         ".h5", "_household_mapping.csv"
@@ -721,10 +677,7 @@ def create_sparse_cd_stacked_dataset(
         if "person_id" in f and str(time_period) in f["person_id"]:
             person_ids = f["person_id"][str(time_period)][:]
             print(f"  Final persons: {len(person_ids):,}")
-        if (
-            "household_weight" in f
-            and str(time_period) in f["household_weight"]
-        ):
+        if "household_weight" in f and str(time_period) in f["household_weight"]:
             weights = f["household_weight"][str(time_period)][:]
             print(
                 f"  Total population (from household weights): {np.sum(weights):,.0f}"
@@ -744,20 +697,14 @@ def create_sparse_cd_stacked_dataset(
 if __name__ == "__main__":
     import argparse
 
-    parser = argparse.ArgumentParser(
-        description="Create sparse CD-stacked datasets"
-    )
-    parser.add_argument(
-        "--weights-path", required=True, help="Path to w_cd.npy file"
-    )
+    parser = argparse.ArgumentParser(description="Create sparse CD-stacked datasets")
+    parser.add_argument("--weights-path", required=True, help="Path to w_cd.npy file")
     parser.add_argument(
         "--dataset-path",
         required=True,
         help="Path to stratified dataset .h5 file",
     )
-    parser.add_argument(
-        "--db-path", required=True, help="Path to policy_data.db"
-    )
+    parser.add_argument("--db-path", required=True, help="Path to policy_data.db")
     parser.add_argument(
         "--output-dir",
         default="./temp",
@@ -826,9 +773,7 @@ def create_sparse_cd_stacked_dataset(
 
     elif mode == "states":
         for state_fips, state_code in STATE_CODES.items():
-            cd_subset = [
-                cd for cd in cds_to_calibrate if int(cd) // 100 == state_fips
-            ]
+            cd_subset = [cd for cd in cds_to_calibrate if int(cd) // 100 == state_fips]
             if not cd_subset:
                 continue
             output_path = f"{output_dir}/{state_code}.h5"
@@ -852,7 +797,7 @@ def create_sparse_cd_stacked_dataset(
 
             output_path = f"{output_dir}/{friendly_name}.h5"
             print(
-                f"\n[{i+1}/{len(cds_to_calibrate)}] Creating {friendly_name}.h5 (GEOID {cd_geoid})"
+                f"\n[{i + 1}/{len(cds_to_calibrate)}] Creating {friendly_name}.h5 (GEOID {cd_geoid})"
             )
             create_sparse_cd_stacked_dataset(
                 w,
@@ -890,9 +835,7 @@ def create_sparse_cd_stacked_dataset(
         if state_fips is None:
             raise ValueError(f"Unknown state code: {args.state}")
 
-        cd_subset = [
-            cd for cd in cds_to_calibrate if int(cd) // 100 == state_fips
-        ]
+        cd_subset = [cd for cd in cds_to_calibrate if int(cd) // 100 == state_fips]
         if not cd_subset:
             raise ValueError(f"No CDs found for state {state_code_upper}")
 
@@ -914,9 +857,7 @@ def create_sparse_cd_stacked_dataset(
             raise ValueError("No NYC-related CDs found in calibrated CDs list")
 
         output_path = f"{output_dir}/NYC.h5"
-        print(
-            f"\nCreating NYC dataset with {len(cd_subset)} CDs: {output_path}"
-        )
+        print(f"\nCreating NYC dataset with {len(cd_subset)} CDs: {output_path}")
         print(f"  CDs: {', '.join(cd_subset)}")
         print("  Filtering to NYC counties only")
 
diff --git a/policyengine_us_data/datasets/cps/long_term/check_calibrated_estimates_interactive.py b/policyengine_us_data/datasets/cps/long_term/check_calibrated_estimates_interactive.py
index af0414841..5fe3e599e 100644
--- a/policyengine_us_data/datasets/cps/long_term/check_calibrated_estimates_interactive.py
+++ b/policyengine_us_data/datasets/cps/long_term/check_calibrated_estimates_interactive.py
@@ -24,8 +24,7 @@
 ## Taxable Payroll for Social Security
 taxible_estimate_b = (
     sim.calculate("taxable_earnings_for_social_security").sum() / 1e9
-    + sim.calculate("social_security_taxable_self_employment_income").sum()
-    / 1e9
+    + sim.calculate("social_security_taxable_self_employment_income").sum() / 1e9
 )
 
 ### Trustees SingleYearTRTables_TR2025.xlsx, Tab VI.G6 (nominal dollars in billions)
@@ -66,8 +65,7 @@
 ## Taxable Payroll for Social Security
 taxible_estimate_b = (
     sim.calculate("taxable_earnings_for_social_security").sum() / 1e9
-    + sim.calculate("social_security_taxable_self_employment_income").sum()
-    / 1e9
+    + sim.calculate("social_security_taxable_self_employment_income").sum() / 1e9
 )
 
 ### Trustees SingleYearTRTables_TR2025.xlsx, Tab VI.G6 (nominal dollars in billions)
@@ -175,9 +173,9 @@ def create_h6_reform():
         # The swapped rate error is 14x smaller and aligns with tax-cutting intent.
 
         # Tier 1 (Base): HI ONLY (35%)
-        reform_payload[
-            "gov.irs.social_security.taxability.rate.base.benefit_cap"
-        ][period] = 0.35
+        reform_payload["gov.irs.social_security.taxability.rate.base.benefit_cap"][
+            period
+        ] = 0.35
         reform_payload["gov.irs.social_security.taxability.rate.base.excess"][
             period
         ] = 0.35
@@ -186,25 +184,25 @@ def create_h6_reform():
         reform_payload[
             "gov.irs.social_security.taxability.rate.additional.benefit_cap"
         ][period] = 0.85
-        reform_payload[
-            "gov.irs.social_security.taxability.rate.additional.excess"
-        ][period] = 0.85
+        reform_payload["gov.irs.social_security.taxability.rate.additional.excess"][
+            period
+        ] = 0.85
 
         # --- SET THRESHOLDS (MIN/MAX SWAP) ---
         # Always put the smaller number in 'base' and larger in 'adjusted_base'
 
         # Single
-        reform_payload[
-            "gov.irs.social_security.taxability.threshold.base.main.SINGLE"
-        ][period] = min(oasdi_target_single, HI_SINGLE)
+        reform_payload["gov.irs.social_security.taxability.threshold.base.main.SINGLE"][
+            period
+        ] = min(oasdi_target_single, HI_SINGLE)
         reform_payload[
             "gov.irs.social_security.taxability.threshold.adjusted_base.main.SINGLE"
         ][period] = max(oasdi_target_single, HI_SINGLE)
 
         # Joint
-        reform_payload[
-            "gov.irs.social_security.taxability.threshold.base.main.JOINT"
-        ][period] = min(oasdi_target_joint, HI_JOINT)
+        reform_payload["gov.irs.social_security.taxability.threshold.base.main.JOINT"][
+            period
+        ] = min(oasdi_target_joint, HI_JOINT)
         reform_payload[
             "gov.irs.social_security.taxability.threshold.adjusted_base.main.JOINT"
         ][period] = max(oasdi_target_joint, HI_JOINT)
@@ -228,12 +226,12 @@ def create_h6_reform():
 
     # 1. Set Thresholds to "HI Only" mode
     # Base = $34k / $44k
-    reform_payload[
-        "gov.irs.social_security.taxability.threshold.base.main.SINGLE"
-    ][elim_period] = HI_SINGLE
-    reform_payload[
-        "gov.irs.social_security.taxability.threshold.base.main.JOINT"
-    ][elim_period] = HI_JOINT
+    reform_payload["gov.irs.social_security.taxability.threshold.base.main.SINGLE"][
+        elim_period
+    ] = HI_SINGLE
+    reform_payload["gov.irs.social_security.taxability.threshold.base.main.JOINT"][
+        elim_period
+    ] = HI_JOINT
 
     # Adjusted = Infinity (Disable the second tier effectively)
     reform_payload[
@@ -262,12 +260,12 @@ def create_h6_reform():
     ] = 0.35
 
     # Tier 2 (Disabled via threshold, but zero out for safety)
-    reform_payload[
-        "gov.irs.social_security.taxability.rate.additional.benefit_cap"
-    ][elim_period] = 0.35
-    reform_payload[
-        "gov.irs.social_security.taxability.rate.additional.excess"
-    ][elim_period] = 0.35
+    reform_payload["gov.irs.social_security.taxability.rate.additional.benefit_cap"][
+        elim_period
+    ] = 0.35
+    reform_payload["gov.irs.social_security.taxability.rate.additional.excess"][
+        elim_period
+    ] = 0.35
 
     return reform_payload
 
@@ -295,26 +293,20 @@ def create_h6_reform():
 
 # Calculate impact
 revenue_impact = reform_revenue - baseline_revenue
-print(f"revenue_impact (B): {revenue_impact / 1E9:.2f}")
+print(f"revenue_impact (B): {revenue_impact / 1e9:.2f}")
 
 # Calculate taxable payroll
-taxable_ss_earnings = baseline.calculate(
-    "taxable_earnings_for_social_security"
-)
+taxable_ss_earnings = baseline.calculate("taxable_earnings_for_social_security")
 taxable_self_employment = baseline.calculate(
     "social_security_taxable_self_employment_income"
 )
-total_taxable_payroll = (
-    taxable_ss_earnings.sum() + taxable_self_employment.sum()
-)
+total_taxable_payroll = taxable_ss_earnings.sum() + taxable_self_employment.sum()
 
 # Calculate SS benefits
 ss_benefits = baseline.calculate("social_security")
 total_ss_benefits = ss_benefits.sum()
 
-est_rev_as_pct_of_taxable_payroll = (
-    100 * revenue_impact / total_taxable_payroll
-)
+est_rev_as_pct_of_taxable_payroll = 100 * revenue_impact / total_taxable_payroll
 
 # From https://www.ssa.gov/oact/solvency/provisions/tables/table_run133.html:
 target_rev_as_pct_of_taxable_payroll = -1.12
diff --git a/policyengine_us_data/datasets/cps/long_term/extract_ssa_costs.py b/policyengine_us_data/datasets/cps/long_term/extract_ssa_costs.py
index 5ada2db9a..492a9d69f 100644
--- a/policyengine_us_data/datasets/cps/long_term/extract_ssa_costs.py
+++ b/policyengine_us_data/datasets/cps/long_term/extract_ssa_costs.py
@@ -2,9 +2,7 @@
 import numpy as np
 
 # Read the file
-df = pd.read_excel(
-    "SingleYearTRTables_TR2025.xlsx", sheet_name="VI.G9", header=None
-)
+df = pd.read_excel("SingleYearTRTables_TR2025.xlsx", sheet_name="VI.G9", header=None)
 
 print("DataFrame shape:", df.shape)
 print("\nChecking data types around row 66-70:")
diff --git a/policyengine_us_data/datasets/cps/long_term/projection_utils.py b/policyengine_us_data/datasets/cps/long_term/projection_utils.py
index d0af8533e..8aee4f3b7 100644
--- a/policyengine_us_data/datasets/cps/long_term/projection_utils.py
+++ b/policyengine_us_data/datasets/cps/long_term/projection_utils.py
@@ -27,9 +27,7 @@ def build_household_age_matrix(sim, n_ages=86):
     n_households = len(household_ids_unique)
 
     X = np.zeros((n_households, n_ages))
-    hh_id_to_idx = {
-        hh_id: idx for idx, hh_id in enumerate(household_ids_unique)
-    }
+    hh_id_to_idx = {hh_id: idx for idx, hh_id in enumerate(household_ids_unique)}
 
     for person_idx in range(len(age_person)):
         age = int(age_person.values[person_idx])
@@ -67,9 +65,7 @@ def get_pseudo_input_variables(sim):
     return pseudo_inputs
 
 
-def create_household_year_h5(
-    year, household_weights, base_dataset_path, output_dir
-):
+def create_household_year_h5(year, household_weights, base_dataset_path, output_dir):
     """
     Create a year-specific .h5 file with calibrated household weights.
 
@@ -193,9 +189,7 @@ def calculate_year_statistics(
     Returns:
         Dictionary with year statistics and calibrated weights
     """
-    income_tax_hh = sim.calculate(
-        "income_tax", period=year, map_to="household"
-    )
+    income_tax_hh = sim.calculate("income_tax", period=year, map_to="household")
     income_tax_baseline_total = income_tax_hh.sum()
     income_tax_values = income_tax_hh.values
 
@@ -206,9 +200,7 @@ def calculate_year_statistics(
     ss_values = None
     ss_target = None
     if use_ss:
-        ss_hh = sim.calculate(
-            "social_security", period=year, map_to="household"
-        )
+        ss_hh = sim.calculate("social_security", period=year, map_to="household")
         ss_baseline_total = ss_hh.sum()
         ss_values = ss_hh.values
 
diff --git a/policyengine_us_data/datasets/cps/long_term/run_household_projection.py b/policyengine_us_data/datasets/cps/long_term/run_household_projection.py
index 651f7b504..1413efe4b 100644
--- a/policyengine_us_data/datasets/cps/long_term/run_household_projection.py
+++ b/policyengine_us_data/datasets/cps/long_term/run_household_projection.py
@@ -105,9 +105,9 @@ def create_h6_reform():
         # The swapped rate error is 14x smaller and aligns with tax-cutting intent.
 
         # Tier 1 (Base): HI ONLY (35%)
-        reform_payload[
-            "gov.irs.social_security.taxability.rate.base.benefit_cap"
-        ][period] = 0.35
+        reform_payload["gov.irs.social_security.taxability.rate.base.benefit_cap"][
+            period
+        ] = 0.35
         reform_payload["gov.irs.social_security.taxability.rate.base.excess"][
             period
         ] = 0.35
@@ -116,25 +116,25 @@ def create_h6_reform():
         reform_payload[
             "gov.irs.social_security.taxability.rate.additional.benefit_cap"
         ][period] = 0.85
-        reform_payload[
-            "gov.irs.social_security.taxability.rate.additional.excess"
-        ][period] = 0.85
+        reform_payload["gov.irs.social_security.taxability.rate.additional.excess"][
+            period
+        ] = 0.85
 
         # --- SET THRESHOLDS (MIN/MAX SWAP) ---
         # Always put the smaller number in 'base' and larger in 'adjusted_base'
 
         # Single
-        reform_payload[
-            "gov.irs.social_security.taxability.threshold.base.main.SINGLE"
-        ][period] = min(oasdi_target_single, HI_SINGLE)
+        reform_payload["gov.irs.social_security.taxability.threshold.base.main.SINGLE"][
+            period
+        ] = min(oasdi_target_single, HI_SINGLE)
         reform_payload[
             "gov.irs.social_security.taxability.threshold.adjusted_base.main.SINGLE"
         ][period] = max(oasdi_target_single, HI_SINGLE)
 
         # Joint
-        reform_payload[
-            "gov.irs.social_security.taxability.threshold.base.main.JOINT"
-        ][period] = min(oasdi_target_joint, HI_JOINT)
+        reform_payload["gov.irs.social_security.taxability.threshold.base.main.JOINT"][
+            period
+        ] = min(oasdi_target_joint, HI_JOINT)
         reform_payload[
             "gov.irs.social_security.taxability.threshold.adjusted_base.main.JOINT"
         ][period] = max(oasdi_target_joint, HI_JOINT)
@@ -158,12 +158,12 @@ def create_h6_reform():
 
     # 1. Set Thresholds to "HI Only" mode
     # Base = $34k / $44k
-    reform_payload[
-        "gov.irs.social_security.taxability.threshold.base.main.SINGLE"
-    ][elim_period] = HI_SINGLE
-    reform_payload[
-        "gov.irs.social_security.taxability.threshold.base.main.JOINT"
-    ][elim_period] = HI_JOINT
+    reform_payload["gov.irs.social_security.taxability.threshold.base.main.SINGLE"][
+        elim_period
+    ] = HI_SINGLE
+    reform_payload["gov.irs.social_security.taxability.threshold.base.main.JOINT"][
+        elim_period
+    ] = HI_JOINT
 
     # Adjusted = Infinity (Disable the second tier effectively)
     reform_payload[
@@ -192,12 +192,12 @@ def create_h6_reform():
     ] = 0.35
 
     # Tier 2 (Disabled via threshold, but zero out for safety)
-    reform_payload[
-        "gov.irs.social_security.taxability.rate.additional.benefit_cap"
-    ][elim_period] = 0.35
-    reform_payload[
-        "gov.irs.social_security.taxability.rate.additional.excess"
-    ][elim_period] = 0.35
+    reform_payload["gov.irs.social_security.taxability.rate.additional.benefit_cap"][
+        elim_period
+    ] = 0.35
+    reform_payload["gov.irs.social_security.taxability.rate.additional.excess"][
+        elim_period
+    ] = 0.35
 
     # Create the Reform Object
     from policyengine_core.reforms import Reform
@@ -242,18 +242,14 @@ def create_h6_reform():
 if USE_PAYROLL:
     sys.argv.remove("--use-payroll")
     if not USE_GREG:
-        print(
-            "Warning: --use-payroll requires --greg, enabling GREG automatically"
-        )
+        print("Warning: --use-payroll requires --greg, enabling GREG automatically")
         USE_GREG = True
 
 USE_H6_REFORM = "--use-h6-reform" in sys.argv
 if USE_H6_REFORM:
     sys.argv.remove("--use-h6-reform")
     if not USE_GREG:
-        print(
-            "Warning: --use-h6-reform requires --greg, enabling GREG automatically"
-        )
+        print("Warning: --use-h6-reform requires --greg, enabling GREG automatically")
         USE_GREG = True
     from ssa_data import load_h6_income_rate_change
 
@@ -261,9 +257,7 @@ def create_h6_reform():
 if USE_TOB:
     sys.argv.remove("--use-tob")
     if not USE_GREG:
-        print(
-            "Warning: --use-tob requires --greg, enabling GREG automatically"
-        )
+        print("Warning: --use-tob requires --greg, enabling GREG automatically")
         USE_GREG = True
     from ssa_data import load_oasdi_tob_projections, load_hi_tob_projections
 
@@ -320,9 +314,7 @@ def create_h6_reform():
 print("STEP 1: DEMOGRAPHIC PROJECTIONS")
 print("=" * 70)
 
-target_matrix = load_ssa_age_projections(
-    start_year=START_YEAR, end_year=END_YEAR
-)
+target_matrix = load_ssa_age_projections(start_year=START_YEAR, end_year=END_YEAR)
 n_years = target_matrix.shape[1]
 n_ages = target_matrix.shape[0]
 
@@ -341,7 +333,7 @@ def create_h6_reform():
     idx = y - START_YEAR
     if idx < n_years:
         pop = target_matrix[:, idx].sum()
-        print(f"  {y}: {pop/1e6:6.1f}M")
+        print(f"  {y}: {pop / 1e6:6.1f}M")
 
 # =========================================================================
 # STEP 2: BUILD HOUSEHOLD AGE MATRIX
@@ -390,9 +382,7 @@ def create_h6_reform():
 
     sim = Microsimulation(dataset=BASE_DATASET_PATH)
 
-    income_tax_hh = sim.calculate(
-        "income_tax", period=year, map_to="household"
-    )
+    income_tax_hh = sim.calculate("income_tax", period=year, map_to="household")
     income_tax_baseline_total = income_tax_hh.sum()
     income_tax_values = income_tax_hh.values
 
@@ -405,15 +395,13 @@ def create_h6_reform():
     ss_values = None
     ss_target = None
     if USE_SS:
-        ss_hh = sim.calculate(
-            "social_security", period=year, map_to="household"
-        )
+        ss_hh = sim.calculate("social_security", period=year, map_to="household")
         ss_values = ss_hh.values
         ss_target = load_ssa_benefit_projections(year)
         if year in display_years:
             ss_baseline = np.sum(ss_values * baseline_weights)
             print(
-                f"  [DEBUG {year}] SS baseline: ${ss_baseline/1e9:.1f}B, target: ${ss_target/1e9:.1f}B"
+                f"  [DEBUG {year}] SS baseline: ${ss_baseline / 1e9:.1f}B, target: ${ss_target / 1e9:.1f}B"
             )
 
     payroll_values = None
@@ -435,7 +423,7 @@ def create_h6_reform():
         if year in display_years:
             payroll_baseline = np.sum(payroll_values * baseline_weights)
             print(
-                f"  [DEBUG {year}] Payroll baseline: ${payroll_baseline/1e9:.1f}B, target: ${payroll_target/1e9:.1f}B"
+                f"  [DEBUG {year}] Payroll baseline: ${payroll_baseline / 1e9:.1f}B, target: ${payroll_target / 1e9:.1f}B"
             )
 
     h6_income_values = None
@@ -452,9 +440,7 @@ def create_h6_reform():
         else:
             # Create and apply H6 reform
             h6_reform = create_h6_reform()
-            reform_sim = Microsimulation(
-                dataset=BASE_DATASET_PATH, reform=h6_reform
-            )
+            reform_sim = Microsimulation(dataset=BASE_DATASET_PATH, reform=h6_reform)
 
             # Calculate reform income tax
             income_tax_reform_hh = reform_sim.calculate(
@@ -472,14 +458,12 @@ def create_h6_reform():
 
             # Debug output for key years
             if year in display_years:
-                h6_impact_baseline = np.sum(
-                    h6_income_values * baseline_weights
-                )
+                h6_impact_baseline = np.sum(h6_income_values * baseline_weights)
                 print(
-                    f"  [DEBUG {year}] H6 baseline revenue: ${h6_impact_baseline/1e9:.3f}B, target: ${h6_revenue_target/1e9:.3f}B"
+                    f"  [DEBUG {year}] H6 baseline revenue: ${h6_impact_baseline / 1e9:.3f}B, target: ${h6_revenue_target / 1e9:.3f}B"
                 )
                 print(
-                    f"  [DEBUG {year}] H6 target ratio: {h6_target_ratio:.4f} × payroll ${payroll_target_year/1e9:.1f}B"
+                    f"  [DEBUG {year}] H6 target ratio: {h6_target_ratio:.4f} × payroll ${payroll_target_year / 1e9:.1f}B"
                 )
 
             del reform_sim
@@ -506,10 +490,10 @@ def create_h6_reform():
             oasdi_baseline = np.sum(oasdi_tob_values * baseline_weights)
             hi_baseline = np.sum(hi_tob_values * baseline_weights)
             print(
-                f"  [DEBUG {year}] OASDI TOB baseline: ${oasdi_baseline/1e9:.1f}B, target: ${oasdi_tob_target/1e9:.1f}B"
+                f"  [DEBUG {year}] OASDI TOB baseline: ${oasdi_baseline / 1e9:.1f}B, target: ${oasdi_tob_target / 1e9:.1f}B"
             )
             print(
-                f"  [DEBUG {year}] HI TOB baseline: ${hi_baseline/1e9:.1f}B, target: ${hi_tob_target/1e9:.1f}B"
+                f"  [DEBUG {year}] HI TOB baseline: ${hi_baseline / 1e9:.1f}B, target: ${hi_tob_target / 1e9:.1f}B"
             )
 
     y_target = target_matrix[:, year_idx]
@@ -547,43 +531,37 @@ def create_h6_reform():
                 f"largest: {max_neg:,.0f}"
             )
         else:
-            print(
-                f"  [DEBUG {year}] Negative weights: 0 (all weights non-negative)"
-            )
+            print(f"  [DEBUG {year}] Negative weights: 0 (all weights non-negative)")
 
-    if year in display_years and (
-        USE_SS or USE_PAYROLL or USE_H6_REFORM or USE_TOB
-    ):
+    if year in display_years and (USE_SS or USE_PAYROLL or USE_H6_REFORM or USE_TOB):
         if USE_SS:
             ss_achieved = np.sum(ss_values * w_new)
             print(
-                f"  [DEBUG {year}] SS achieved: ${ss_achieved/1e9:.1f}B (error: ${abs(ss_achieved - ss_target)/1e6:.1f}M, {(ss_achieved - ss_target)/ss_target*100:.3f}%)"
+                f"  [DEBUG {year}] SS achieved: ${ss_achieved / 1e9:.1f}B (error: ${abs(ss_achieved - ss_target) / 1e6:.1f}M, {(ss_achieved - ss_target) / ss_target * 100:.3f}%)"
             )
         if USE_PAYROLL:
             payroll_achieved = np.sum(payroll_values * w_new)
             print(
-                f"  [DEBUG {year}] Payroll achieved: ${payroll_achieved/1e9:.1f}B (error: ${abs(payroll_achieved - payroll_target)/1e6:.1f}M, {(payroll_achieved - payroll_target)/payroll_target*100:.3f}%)"
+                f"  [DEBUG {year}] Payroll achieved: ${payroll_achieved / 1e9:.1f}B (error: ${abs(payroll_achieved - payroll_target) / 1e6:.1f}M, {(payroll_achieved - payroll_target) / payroll_target * 100:.3f}%)"
             )
         if USE_H6_REFORM and h6_revenue_target is not None:
             h6_revenue_achieved = np.sum(h6_income_values * w_new)
             error_pct = (
-                (h6_revenue_achieved - h6_revenue_target)
-                / abs(h6_revenue_target)
-                * 100
+                (h6_revenue_achieved - h6_revenue_target) / abs(h6_revenue_target) * 100
                 if h6_revenue_target != 0
                 else 0
             )
             print(
-                f"  [DEBUG {year}] H6 achieved revenue: ${h6_revenue_achieved/1e9:.3f}B (error: ${abs(h6_revenue_achieved - h6_revenue_target)/1e6:.1f}M, {error_pct:.3f}%)"
+                f"  [DEBUG {year}] H6 achieved revenue: ${h6_revenue_achieved / 1e9:.3f}B (error: ${abs(h6_revenue_achieved - h6_revenue_target) / 1e6:.1f}M, {error_pct:.3f}%)"
             )
         if USE_TOB:
             oasdi_achieved = np.sum(oasdi_tob_values * w_new)
             hi_achieved = np.sum(hi_tob_values * w_new)
             print(
-                f"  [DEBUG {year}] OASDI TOB achieved: ${oasdi_achieved/1e9:.1f}B (error: ${abs(oasdi_achieved - oasdi_tob_target)/1e6:.1f}M, {(oasdi_achieved - oasdi_tob_target)/oasdi_tob_target*100:.3f}%)"
+                f"  [DEBUG {year}] OASDI TOB achieved: ${oasdi_achieved / 1e9:.1f}B (error: ${abs(oasdi_achieved - oasdi_tob_target) / 1e6:.1f}M, {(oasdi_achieved - oasdi_tob_target) / oasdi_tob_target * 100:.3f}%)"
             )
             print(
-                f"  [DEBUG {year}] HI TOB achieved: ${hi_achieved/1e9:.1f}B (error: ${abs(hi_achieved - hi_tob_target)/1e6:.1f}M, {(hi_achieved - hi_tob_target)/hi_tob_target*100:.3f}%)"
+                f"  [DEBUG {year}] HI TOB achieved: ${hi_achieved / 1e9:.1f}B (error: ${abs(hi_achieved - hi_tob_target) / 1e6:.1f}M, {(hi_achieved - hi_tob_target) / hi_tob_target * 100:.3f}%)"
             )
 
     weights_matrix[:, year_idx] = w_new
@@ -593,9 +571,7 @@ def create_h6_reform():
     total_population[year_idx] = np.sum(y_target)
 
     if SAVE_H5:
-        h5_path = create_household_year_h5(
-            year, w_new, BASE_DATASET_PATH, OUTPUT_DIR
-        )
+        h5_path = create_household_year_h5(year, w_new, BASE_DATASET_PATH, OUTPUT_DIR)
         if year in display_years:
             print(f"  Saved {year}.h5")
 
@@ -613,5 +589,5 @@ def create_h6_reform():
         )
     elif year_idx % 5 == 0:
         print(
-            f"{year}    Processing... ({year_idx+1}/{n_years})                        {mem_gb:.2f}GB"
+            f"{year}    Processing... ({year_idx + 1}/{n_years})                        {mem_gb:.2f}GB"
         )
diff --git a/policyengine_us_data/datasets/cps/small_enhanced_cps.py b/policyengine_us_data/datasets/cps/small_enhanced_cps.py
index c84181eae..a15080321 100644
--- a/policyengine_us_data/datasets/cps/small_enhanced_cps.py
+++ b/policyengine_us_data/datasets/cps/small_enhanced_cps.py
@@ -22,8 +22,7 @@ def create_small_ecps():
     weights = simulation.calculate("household_weight").values
     if np.all(weights == 0):
         raise ValueError(
-            "create_small_ecps: all household weights are zero "
-            "after subsample"
+            "create_small_ecps: all household weights are zero after subsample"
         )
     logging.info(
         f"create_small_ecps: subsample has "
@@ -36,9 +35,10 @@ def create_small_ecps():
         data[variable] = {}
         for time_period in simulation.get_holder(variable).get_known_periods():
             values = simulation.get_holder(variable).get_array(time_period)
-            if simulation.tax_benefit_system.variables.get(
-                variable
-            ).value_type in (Enum, str):
+            if simulation.tax_benefit_system.variables.get(variable).value_type in (
+                Enum,
+                str,
+            ):
                 if hasattr(values, "decode_to_str"):
                     values = values.decode_to_str().astype("S")
                 else:
@@ -95,8 +95,7 @@ def create_sparse_ecps():
             f"non-zero weight (expected > 1000)"
         )
     logging.info(
-        f"create_sparse_ecps: {len(h_ids)} households after "
-        f"zero-weight filtering"
+        f"create_sparse_ecps: {len(h_ids)} households after zero-weight filtering"
     )
 
     subset_df = df[df[df_household_id_column].isin(h_ids)].copy()
@@ -113,8 +112,7 @@ def create_sparse_ecps():
         for time_period in sim.get_holder(variable).get_known_periods():
             values = sim.get_holder(variable).get_array(time_period)
             if (
-                sim.tax_benefit_system.variables.get(variable).value_type
-                in (Enum, str)
+                sim.tax_benefit_system.variables.get(variable).value_type in (Enum, str)
                 and variable != "county_fips"
             ):
                 values = values.decode_to_str().astype("S")
@@ -137,9 +135,7 @@ def create_sparse_ecps():
     ]
     missing = [v for v in critical_vars if v not in data]
     if missing:
-        raise ValueError(
-            f"create_sparse_ecps: missing critical variables: {missing}"
-        )
+        raise ValueError(f"create_sparse_ecps: missing critical variables: {missing}")
     logging.info(f"create_sparse_ecps: data dict has {len(data)} variables")
 
     output_path = STORAGE_FOLDER / "sparse_enhanced_cps_2024.h5"
@@ -152,13 +148,9 @@ def create_sparse_ecps():
     file_size = os.path.getsize(output_path)
     if file_size < 1_000_000:
         raise ValueError(
-            f"create_sparse_ecps: output file only {file_size:,} bytes "
-            f"(expected > 1MB)"
+            f"create_sparse_ecps: output file only {file_size:,} bytes (expected > 1MB)"
         )
-    logging.info(
-        f"create_sparse_ecps: wrote {file_size / 1e6:.1f}MB to "
-        f"{output_path}"
-    )
+    logging.info(f"create_sparse_ecps: wrote {file_size / 1e6:.1f}MB to {output_path}")
 
 
 if __name__ == "__main__":
diff --git a/policyengine_us_data/datasets/puf/irs_puf.py b/policyengine_us_data/datasets/puf/irs_puf.py
index dd77890a3..c357cd56c 100644
--- a/policyengine_us_data/datasets/puf/irs_puf.py
+++ b/policyengine_us_data/datasets/puf/irs_puf.py
@@ -30,9 +30,7 @@ def generate(self):
 
         with pd.HDFStore(self.file_path, mode="w") as storage:
             storage.put("puf", pd.read_csv(puf_file_path))
-            storage.put(
-                "puf_demographics", pd.read_csv(puf_demographics_file_path)
-            )
+            storage.put("puf_demographics", pd.read_csv(puf_demographics_file_path))
 
 
 class IRS_PUF_2015(IRS_PUF):
diff --git a/policyengine_us_data/datasets/puf/puf.py b/policyengine_us_data/datasets/puf/puf.py
index f52153e38..040098c16 100644
--- a/policyengine_us_data/datasets/puf/puf.py
+++ b/policyengine_us_data/datasets/puf/puf.py
@@ -109,14 +109,10 @@ def simulate_w2_and_ubia_from_puf(puf, *, seed=None, diagnostics=True):
     )
     revenues = np.maximum(qbi, 0) / margins
 
-    logit = (
-        logit_params["intercept"] + logit_params["slope_per_dollar"] * revenues
-    )
+    logit = logit_params["intercept"] + logit_params["slope_per_dollar"] * revenues
 
     # Set p = 0 when simulated receipts == 0 (no revenue means no payroll)
-    pr_has_employees = np.where(
-        revenues == 0.0, 0.0, 1.0 / (1.0 + np.exp(-logit))
-    )
+    pr_has_employees = np.where(revenues == 0.0, 0.0, 1.0 / (1.0 + np.exp(-logit)))
     has_employees = rng.binomial(1, pr_has_employees)
 
     # Labor share simulation
@@ -125,8 +121,7 @@ def simulate_w2_and_ubia_from_puf(puf, *, seed=None, diagnostics=True):
     labor_ratios = np.where(
         is_rental,
         rng.beta(rental_beta_a, rental_beta_b, qbi.size) * rental_scale,
-        rng.beta(non_rental_beta_a, non_rental_beta_b, qbi.size)
-        * non_rental_scale,
+        rng.beta(non_rental_beta_a, non_rental_beta_b, qbi.size) * non_rental_scale,
     )
 
     w2_wages = revenues * labor_ratios * has_employees
@@ -155,9 +150,9 @@ def simulate_w2_and_ubia_from_puf(puf, *, seed=None, diagnostics=True):
         print(f"Share with QBI > 0: {share_qbi_pos:6.2%}")
         print(f"Among those, share with W-2 wages: {share_wages:6.2%}")
         if np.any(w2_wages > 0):
-            print(f"Mean W-2 (if >0): ${np.mean(w2_wages[w2_wages>0]):,.0f}")
+            print(f"Mean W-2 (if >0): ${np.mean(w2_wages[w2_wages > 0]):,.0f}")
         if np.any(ubia > 0):
-            print(f"Median UBIA (if >0): ${np.median(ubia[ubia>0]):,.0f}")
+            print(f"Median UBIA (if >0): ${np.median(ubia[ubia > 0]):,.0f}")
 
     return w2_wages, ubia
 
@@ -209,9 +204,7 @@ def impute_missing_demographics(
         .fillna(0)
     )
 
-    puf_with_demographics = puf_with_demographics.sample(
-        n=10_000, random_state=0
-    )
+    puf_with_demographics = puf_with_demographics.sample(n=10_000, random_state=0)
 
     DEMOGRAPHIC_VARIABLES = [
         "AGEDP1",
@@ -411,9 +404,7 @@ def preprocess_puf(puf: pd.DataFrame) -> pd.DataFrame:
         - puf["E25920"].fillna(0)
         - puf["E25960"].fillna(0)
     ) != 0
-    partnership_se = np.where(
-        has_partnership, gross_se - schedule_c_f_income, 0
-    )
+    partnership_se = np.where(has_partnership, gross_se - schedule_c_f_income, 0)
     puf["partnership_se_income"] = partnership_se
 
     # --- Qualified Business Income Deduction (QBID) simulation ---
@@ -424,9 +415,9 @@ def preprocess_puf(puf: pd.DataFrame) -> pd.DataFrame:
     puf_qbi_sources_for_sstb = puf[QBI_PARAMS["sstb_prob_map_by_name"].keys()]
     largest_qbi_source_name = puf_qbi_sources_for_sstb.idxmax(axis=1)
 
-    pr_sstb = largest_qbi_source_name.map(
-        QBI_PARAMS["sstb_prob_map_by_name"]
-    ).fillna(0.0)
+    pr_sstb = largest_qbi_source_name.map(QBI_PARAMS["sstb_prob_map_by_name"]).fillna(
+        0.0
+    )
     puf["business_is_sstb"] = np.random.binomial(n=1, p=pr_sstb)
 
     reit_params = QBI_PARAMS["reit_ptp_income_distribution"]
@@ -553,9 +544,9 @@ def generate(self):
                     current_index = uprating[uprating.Variable == variable][
                         self.time_period
                     ].values[0]
-                    start_index = uprating[uprating.Variable == variable][
-                        2021
-                    ].values[0]
+                    start_index = uprating[uprating.Variable == variable][2021].values[
+                        0
+                    ]
                     growth = current_index / start_index
                     arrays[variable] = arrays[variable] * growth
             self.save_dataset(arrays)
@@ -635,9 +626,7 @@ def generate(self):
 
         for group in groups_assumed_to_be_tax_unit_like:
             self.holder[f"{group}_id"] = self.holder["tax_unit_id"]
-            self.holder[f"person_{group}_id"] = self.holder[
-                "person_tax_unit_id"
-            ]
+            self.holder[f"person_{group}_id"] = self.holder["person_tax_unit_id"]
 
         for key in self.holder:
             if key == "filing_status":
@@ -689,9 +678,7 @@ def add_filer(self, row, tax_unit_id):
 
         # Assume all of the interest deduction is the filer's deductible mortgage interest
 
-        self.holder["deductible_mortgage_interest"].append(
-            row["interest_deduction"]
-        )
+        self.holder["deductible_mortgage_interest"].append(row["interest_deduction"])
 
         for key in self.available_financial_vars:
             if key == "deductible_mortgage_interest":
diff --git a/policyengine_us_data/datasets/scf/fed_scf.py b/policyengine_us_data/datasets/scf/fed_scf.py
index f67a2c076..8c0d8e8cc 100644
--- a/policyengine_us_data/datasets/scf/fed_scf.py
+++ b/policyengine_us_data/datasets/scf/fed_scf.py
@@ -32,16 +32,12 @@ def load(self):
 
     def generate(self):
         if self._scf_download_url is None:
-            raise ValueError(
-                f"No raw SCF data URL known for year {self.time_period}."
-            )
+            raise ValueError(f"No raw SCF data URL known for year {self.time_period}.")
 
         url = self._scf_download_url
 
         response = requests.get(url, stream=True)
-        total_size_in_bytes = int(
-            response.headers.get("content-length", 200e6)
-        )
+        total_size_in_bytes = int(response.headers.get("content-length", 200e6))
         progress_bar = tqdm(
             total=total_size_in_bytes,
             unit="iB",
@@ -49,9 +45,7 @@ def generate(self):
             desc="Downloading SCF",
         )
         if response.status_code == 404:
-            raise FileNotFoundError(
-                "Received a 404 response when fetching the data."
-            )
+            raise FileNotFoundError("Received a 404 response when fetching the data.")
         with BytesIO() as file:
             content_length_actual = 0
             for data in response.iter_content(int(1e6)):
@@ -65,9 +59,7 @@ def generate(self):
             zipfile = ZipFile(file)
             with pd.HDFStore(self.file_path, mode="w") as storage:
                 # Find the Stata file, which should be the only .dta file in the zip
-                dta_files = [
-                    f for f in zipfile.namelist() if f.endswith(".dta")
-                ]
+                dta_files = [f for f in zipfile.namelist() if f.endswith(".dta")]
                 if not dta_files:
                     raise FileNotFoundError(
                         "No .dta file found in the SCF zip archive."
diff --git a/policyengine_us_data/datasets/scf/scf.py b/policyengine_us_data/datasets/scf/scf.py
index 1567fbbb6..3f2f11a74 100644
--- a/policyengine_us_data/datasets/scf/scf.py
+++ b/policyengine_us_data/datasets/scf/scf.py
@@ -55,9 +55,7 @@ def generate(self):
                 try:
                     scf[key] = np.array(scf[key])
                 except Exception as e:
-                    print(
-                        f"Warning: Could not convert {key} to numpy array: {e}"
-                    )
+                    print(f"Warning: Could not convert {key} to numpy array: {e}")
 
         self.save_dataset(scf)
 
@@ -110,9 +108,7 @@ def downsample(self, frac: float):
 
         # Store original dtypes before modifying
         original_data: dict = self.load_dataset()
-        original_dtypes = {
-            key: original_data[key].dtype for key in original_data
-        }
+        original_dtypes = {key: original_data[key].dtype for key in original_data}
 
         sim = Microsimulation(dataset=self)
         sim.subsample(frac=frac)
@@ -189,17 +185,13 @@ def rename_columns_to_match_cps(scf: dict, raw_data: pd.DataFrame) -> None:
             4: 4,  # Asian
             5: 7,  # Other
         }
-        scf["cps_race"] = (
-            raw_data["racecl5"].map(race_map).fillna(6).astype(int).values
-        )
+        scf["cps_race"] = raw_data["racecl5"].map(race_map).fillna(6).astype(int).values
         # Hispanic indicator
         scf["is_hispanic"] = (raw_data["racecl5"] == 3).values
 
     # Children in household
     if "kids" in raw_data.columns:
-        scf["own_children_in_household"] = (
-            raw_data["kids"].fillna(0).astype(int).values
-        )
+        scf["own_children_in_household"] = raw_data["kids"].fillna(0).astype(int).values
 
     # Rent
     if "rent" in raw_data.columns:
@@ -207,9 +199,7 @@ def rename_columns_to_match_cps(scf: dict, raw_data: pd.DataFrame) -> None:
 
     # Vehicle loan (auto loan)
     if "veh_inst" in raw_data.columns:
-        scf["total_vehicle_installments"] = (
-            raw_data["veh_inst"].fillna(0).values
-        )
+        scf["total_vehicle_installments"] = raw_data["veh_inst"].fillna(0).values
 
     # Marital status
     if "married" in raw_data.columns:
@@ -269,9 +259,7 @@ def add_auto_loan_interest(scf: dict, year: int) -> None:
             logger.error(
                 f"Network error downloading SCF data for year {year}: {str(e)}"
             )
-            raise RuntimeError(
-                f"Failed to download SCF data for year {year}"
-            ) from e
+            raise RuntimeError(f"Failed to download SCF data for year {year}") from e
 
         # Process zip file
         try:
@@ -282,9 +270,7 @@ def add_auto_loan_interest(scf: dict, year: int) -> None:
             dta_files = [f for f in z.namelist() if f.endswith(".dta")]
             if not dta_files:
                 logger.error(f"No Stata files found in zip for year {year}")
-                raise ValueError(
-                    f"No Stata files found in zip for year {year}"
-                )
+                raise ValueError(f"No Stata files found in zip for year {year}")
 
             logger.info(f"Found Stata files: {dta_files}")
 
@@ -298,18 +284,14 @@ def add_auto_loan_interest(scf: dict, year: int) -> None:
                     )
                     logger.info(f"Read DataFrame with shape {df.shape}")
             except Exception as e:
-                logger.error(
-                    f"Error reading Stata file for year {year}: {str(e)}"
-                )
+                logger.error(f"Error reading Stata file for year {year}: {str(e)}")
                 raise RuntimeError(
                     f"Failed to process Stata file for year {year}"
                 ) from e
 
         except zipfile.BadZipFile as e:
             logger.error(f"Bad zip file for year {year}: {str(e)}")
-            raise RuntimeError(
-                f"Downloaded zip file is corrupt for year {year}"
-            ) from e
+            raise RuntimeError(f"Downloaded zip file is corrupt for year {year}") from e
 
         # Process the interest data and add to final SCF dictionary
         auto_df = df[IDENTIFYER_COLUMNS + AUTO_LOAN_COLUMNS].copy()
diff --git a/policyengine_us_data/datasets/sipp/sipp.py b/policyengine_us_data/datasets/sipp/sipp.py
index bf8b75ddc..d77082665 100644
--- a/policyengine_us_data/datasets/sipp/sipp.py
+++ b/policyengine_us_data/datasets/sipp/sipp.py
@@ -68,8 +68,7 @@ def train_tip_model():
         )
     # Sum tip columns (AJB*_TXAMT + TJB*_TXAMT) across all jobs.
     df["tip_income"] = (
-        df[df.columns[df.columns.str.contains("TXAMT")]].fillna(0).sum(axis=1)
-        * 12
+        df[df.columns[df.columns.str.contains("TXAMT")]].fillna(0).sum(axis=1) * 12
     )
     df["employment_income"] = df.TPTOTINC * 12
     df["is_under_18"] = (df.TAGE < 18) & (df.MONTHCODE == 12)
diff --git a/policyengine_us_data/db/create_database_tables.py b/policyengine_us_data/db/create_database_tables.py
index 8590f79e8..d89bad317 100644
--- a/policyengine_us_data/db/create_database_tables.py
+++ b/policyengine_us_data/db/create_database_tables.py
@@ -39,9 +39,7 @@ class Stratum(SQLModel, table=True):
         description="Unique identifier for the stratum.",
     )
     definition_hash: str = Field(
-        sa_column_kwargs={
-            "comment": "SHA-256 hash of the stratum's constraints."
-        },
+        sa_column_kwargs={"comment": "SHA-256 hash of the stratum's constraints."},
         max_length=64,
     )
     parent_stratum_id: Optional[int] = Field(
@@ -89,9 +87,7 @@ class StratumConstraint(SQLModel, table=True):
         primary_key=True,
         description="The comparison operator (==, !=, >, >=, <, <=).",
     )
-    value: str = Field(
-        description="The value for the constraint rule (e.g., '25')."
-    )
+    value: str = Field(description="The value for the constraint rule (e.g., '25').")
     notes: Optional[str] = Field(
         default=None, description="Optional notes about the constraint."
     )
@@ -117,9 +113,7 @@ class Target(SQLModel, table=True):
     variable: str = Field(
         description="A variable defined in policyengine-us (e.g., 'income_tax')."
     )
-    period: int = Field(
-        description="The time period for the data, typically a year."
-    )
+    period: int = Field(description="The time period for the data, typically a year.")
     stratum_id: int = Field(foreign_key="strata.stratum_id", index=True)
     reform_id: int = Field(
         default=0,
@@ -156,19 +150,13 @@ def calculate_definition_hash(mapper, connection, target: Stratum):
     Calculate and set the definition_hash before saving a Stratum instance.
     """
     constraints_history = get_history(target, "constraints_rel")
-    if not (
-        constraints_history.has_changes() or target.definition_hash is None
-    ):
+    if not (constraints_history.has_changes() or target.definition_hash is None):
         return
 
     if not target.constraints_rel:  # Handle cases with no constraints
         # Include parent_stratum_id to make hash unique per parent
-        parent_str = (
-            str(target.parent_stratum_id) if target.parent_stratum_id else ""
-        )
-        target.definition_hash = hashlib.sha256(
-            parent_str.encode("utf-8")
-        ).hexdigest()
+        parent_str = str(target.parent_stratum_id) if target.parent_stratum_id else ""
+        target.definition_hash = hashlib.sha256(parent_str.encode("utf-8")).hexdigest()
         return
 
     constraint_strings = [
@@ -178,9 +166,7 @@ def calculate_definition_hash(mapper, connection, target: Stratum):
 
     constraint_strings.sort()
     # Include parent_stratum_id in the hash to ensure uniqueness per parent
-    parent_str = (
-        str(target.parent_stratum_id) if target.parent_stratum_id else ""
-    )
+    parent_str = str(target.parent_stratum_id) if target.parent_stratum_id else ""
     fingerprint_text = parent_str + "\n" + "\n".join(constraint_strings)
     h = hashlib.sha256(fingerprint_text.encode("utf-8"))
     target.definition_hash = h.hexdigest()
@@ -241,10 +227,7 @@ def _validate_geographic_consistency(parent_rows, child_constraints):
                 )
 
     # CD must belong to the parent state.
-    if (
-        "state_fips" in parent_dict
-        and "congressional_district_geoid" in child_dict
-    ):
+    if "state_fips" in parent_dict and "congressional_district_geoid" in child_dict:
         parent_state = int(parent_dict["state_fips"])
         child_cd = int(child_dict["congressional_district_geoid"])
         cd_state = child_cd // 100
@@ -288,8 +271,7 @@ def validate_parent_child_constraints(mapper, connection, target: Stratum):
         return
 
     child_set = {
-        (c.constraint_variable, c.operation, c.value)
-        for c in target.constraints_rel
+        (c.constraint_variable, c.operation, c.value) for c in target.constraints_rel
     }
 
     for var, op, val in parent_rows:
@@ -306,8 +288,7 @@ def validate_parent_child_constraints(mapper, connection, target: Stratum):
             if any(int(cv) == int(val) for cv in child_vals):
                 continue
         raise ValueError(
-            f"Child stratum must include parent constraint "
-            f"({var} {op} {val})"
+            f"Child stratum must include parent constraint ({var} {op} {val})"
         )
 
 
diff --git a/policyengine_us_data/db/create_initial_strata.py b/policyengine_us_data/db/create_initial_strata.py
index 0b9ae8a6d..2af5df7f8 100644
--- a/policyengine_us_data/db/create_initial_strata.py
+++ b/policyengine_us_data/db/create_initial_strata.py
@@ -44,16 +44,12 @@ def fetch_congressional_districts(year):
     )
 
     # Filter out statewide summary records for multi-district states
-    df["n_districts"] = df.groupby("state_fips")["state_fips"].transform(
-        "count"
-    )
+    df["n_districts"] = df.groupby("state_fips")["state_fips"].transform("count")
     df = df[(df["n_districts"] == 1) | (df["district_number"] > 0)].copy()
     df = df.drop(columns=["n_districts"])
 
     df.loc[df["district_number"] == 0, "district_number"] = 1
-    df["congressional_district_geoid"] = (
-        df["state_fips"] * 100 + df["district_number"]
-    )
+    df["congressional_district_geoid"] = df["state_fips"] * 100 + df["district_number"]
 
     df = df[
         [
@@ -129,9 +125,7 @@ def main():
     # Fetch congressional district data
     cd_df = fetch_congressional_districts(year)
 
-    DATABASE_URL = (
-        f"sqlite:///{STORAGE_FOLDER / 'calibration' / 'policy_data.db'}"
-    )
+    DATABASE_URL = f"sqlite:///{STORAGE_FOLDER / 'calibration' / 'policy_data.db'}"
     engine = create_engine(DATABASE_URL)
 
     with Session(engine) as session:
@@ -156,9 +150,7 @@ def main():
         # Create state-level strata
         unique_states = cd_df["state_fips"].unique()
         for state_fips in sorted(unique_states):
-            state_name = STATE_NAMES.get(
-                state_fips, f"State FIPS {state_fips}"
-            )
+            state_name = STATE_NAMES.get(state_fips, f"State FIPS {state_fips}")
             state_stratum = Stratum(
                 parent_stratum_id=us_stratum_id,
                 notes=state_name,
diff --git a/policyengine_us_data/db/etl_age.py b/policyengine_us_data/db/etl_age.py
index 1a12f372f..db5e54da0 100644
--- a/policyengine_us_data/db/etl_age.py
+++ b/policyengine_us_data/db/etl_age.py
@@ -66,9 +66,7 @@ def transform_age_data(age_data, docs):
     # Filter out Puerto Rico's district and state records
     # 5001800US7298 = 118th Congress, 5001900US7298 = 119th Congress
     df_geos = df_data[
-        ~df_data["ucgid_str"].isin(
-            ["5001800US7298", "5001900US7298", "0400000US72"]
-        )
+        ~df_data["ucgid_str"].isin(["5001800US7298", "5001900US7298", "0400000US72"])
     ].copy()
 
     df = df_geos[["ucgid_str"] + AGE_COLS]
@@ -106,9 +104,7 @@ def load_age_data(df_long, geo, year):
         raise ValueError('geo must be one of "National", "State", "District"')
 
     # Prepare to load data -----------
-    DATABASE_URL = (
-        f"sqlite:///{STORAGE_FOLDER / 'calibration' / 'policy_data.db'}"
-    )
+    DATABASE_URL = f"sqlite:///{STORAGE_FOLDER / 'calibration' / 'policy_data.db'}"
     engine = create_engine(DATABASE_URL)
 
     with Session(engine) as session:
diff --git a/policyengine_us_data/db/etl_irs_soi.py b/policyengine_us_data/db/etl_irs_soi.py
index aa8122a59..f2b177957 100644
--- a/policyengine_us_data/db/etl_irs_soi.py
+++ b/policyengine_us_data/db/etl_irs_soi.py
@@ -104,9 +104,7 @@ def make_records(
                 f"WARNING: A59664 values appear to be in thousands (max={max_value:,.0f})"
             )
             print("The IRS may have fixed their data inconsistency.")
-            print(
-                "Please verify and remove the special case handling if confirmed."
-            )
+            print("Please verify and remove the special case handling if confirmed.")
             # Don't apply the fix - data appears to already be in thousands
         else:
             # Convert from dollars to thousands to match other columns
@@ -162,9 +160,7 @@ def convert_district_data(
     """Transforms data from pre- to post- 2020 census districts"""
     df = input_df.copy()
     old_districts_df = df[df["ucgid_str"].str.startswith("5001800US")].copy()
-    old_districts_df = old_districts_df.sort_values("ucgid_str").reset_index(
-        drop=True
-    )
+    old_districts_df = old_districts_df.sort_values("ucgid_str").reset_index(drop=True)
     old_values = old_districts_df["target_value"].to_numpy()
     new_values = mapping_matrix.T @ old_values
 
@@ -289,19 +285,15 @@ def transform_soi_data(raw_df):
     # State -------------------
     # You've got agi_stub == 0 in here, which you want to use any time you don't want to
     # divide data by AGI classes (i.e., agi_stub)
-    state_df = raw_df.copy().loc[
-        (raw_df.STATE != "US") & (raw_df.CONG_DISTRICT == 0)
-    ]
-    state_df["ucgid_str"] = "0400000US" + state_df["STATEFIPS"].astype(
-        str
-    ).str.zfill(2)
+    state_df = raw_df.copy().loc[(raw_df.STATE != "US") & (raw_df.CONG_DISTRICT == 0)]
+    state_df["ucgid_str"] = "0400000US" + state_df["STATEFIPS"].astype(str).str.zfill(2)
 
     # District ------------------
     district_df = raw_df.copy().loc[(raw_df.CONG_DISTRICT > 0)]
 
-    max_cong_district_by_state = raw_df.groupby("STATE")[
-        "CONG_DISTRICT"
-    ].transform("max")
+    max_cong_district_by_state = raw_df.groupby("STATE")["CONG_DISTRICT"].transform(
+        "max"
+    )
     district_df = raw_df.copy().loc[
         (raw_df["CONG_DISTRICT"] > 0) | (max_cong_district_by_state == 0)
     ]
@@ -370,9 +362,7 @@ def transform_soi_data(raw_df):
     # Pre- to Post- 2020 Census redisticting
     mapping = get_district_mapping()
     converted = [
-        convert_district_data(
-            r, mapping["mapping_matrix"], mapping["new_codes"]
-        )
+        convert_district_data(r, mapping["mapping_matrix"], mapping["new_codes"])
         for r in records
     ]
 
@@ -382,9 +372,7 @@ def transform_soi_data(raw_df):
 def load_soi_data(long_dfs, year):
     """Load a list of databases into the db, critically dependent on order"""
 
-    DATABASE_URL = (
-        f"sqlite:///{STORAGE_FOLDER / 'calibration' / 'policy_data.db'}"
-    )
+    DATABASE_URL = f"sqlite:///{STORAGE_FOLDER / 'calibration' / 'policy_data.db'}"
     engine = create_engine(DATABASE_URL)
 
     session = Session(engine)
@@ -458,9 +446,7 @@ def load_soi_data(long_dfs, year):
         filer_strata["state"][state_fips] = state_filer_stratum.stratum_id
 
     # District filer strata
-    for district_geoid, district_geo_stratum_id in geo_strata[
-        "district"
-    ].items():
+    for district_geoid, district_geo_stratum_id in geo_strata["district"].items():
         # Check if district filer stratum exists
         district_filer_stratum = (
             session.query(Stratum)
@@ -492,9 +478,7 @@ def load_soi_data(long_dfs, year):
             session.add(district_filer_stratum)
             session.flush()
 
-        filer_strata["district"][
-            district_geoid
-        ] = district_filer_stratum.stratum_id
+        filer_strata["district"][district_geoid] = district_filer_stratum.stratum_id
 
     session.commit()
 
@@ -525,9 +509,7 @@ def load_soi_data(long_dfs, year):
                     )
                 ]
             elif geo_info["type"] == "state":
-                parent_stratum_id = filer_strata["state"][
-                    geo_info["state_fips"]
-                ]
+                parent_stratum_id = filer_strata["state"][geo_info["state_fips"]]
                 note = f"State FIPS {geo_info['state_fips']} EITC received with {n_children} children (filers)"
                 constraints = [
                     StratumConstraint(
@@ -636,9 +618,7 @@ def load_soi_data(long_dfs, year):
 
             # Store lookup for later use
             if geo_info["type"] == "national":
-                eitc_stratum_lookup["national"][
-                    n_children
-                ] = new_stratum.stratum_id
+                eitc_stratum_lookup["national"][n_children] = new_stratum.stratum_id
             elif geo_info["type"] == "state":
                 key = (geo_info["state_fips"], n_children)
                 eitc_stratum_lookup["state"][key] = new_stratum.stratum_id
@@ -652,8 +632,7 @@ def load_soi_data(long_dfs, year):
     first_agi_index = [
         i
         for i in range(len(long_dfs))
-        if long_dfs[i][["target_variable"]].values[0]
-        == "adjusted_gross_income"
+        if long_dfs[i][["target_variable"]].values[0] == "adjusted_gross_income"
         and long_dfs[i][["breakdown_variable"]].values[0] == "one"
     ][0]
     for j in range(8, first_agi_index, 2):
@@ -676,17 +655,13 @@ def load_soi_data(long_dfs, year):
                 parent_stratum_id = filer_strata["national"]
                 geo_description = "National"
             elif geo_info["type"] == "state":
-                parent_stratum_id = filer_strata["state"][
-                    geo_info["state_fips"]
-                ]
+                parent_stratum_id = filer_strata["state"][geo_info["state_fips"]]
                 geo_description = f"State {geo_info['state_fips']}"
             elif geo_info["type"] == "district":
                 parent_stratum_id = filer_strata["district"][
                     geo_info["congressional_district_geoid"]
                 ]
-                geo_description = (
-                    f"CD {geo_info['congressional_district_geoid']}"
-                )
+                geo_description = f"CD {geo_info['congressional_district_geoid']}"
 
             # Create child stratum with constraint for this IRS variable
             # Note: This stratum will have the constraint that amount_variable > 0
@@ -741,9 +716,7 @@ def load_soi_data(long_dfs, year):
                         StratumConstraint(
                             constraint_variable="congressional_district_geoid",
                             operation="==",
-                            value=str(
-                                geo_info["congressional_district_geoid"]
-                            ),
+                            value=str(geo_info["congressional_district_geoid"]),
                         )
                     )
 
@@ -805,9 +778,7 @@ def load_soi_data(long_dfs, year):
         elif geo_info["type"] == "district":
             stratum = session.get(
                 Stratum,
-                filer_strata["district"][
-                    geo_info["congressional_district_geoid"]
-                ],
+                filer_strata["district"][geo_info["congressional_district_geoid"]],
             )
 
         # Check if target already exists
@@ -822,9 +793,7 @@ def load_soi_data(long_dfs, year):
         )
 
         if existing_target:
-            existing_target.value = agi_values.iloc[i][
-                ["target_value"]
-            ].values[0]
+            existing_target.value = agi_values.iloc[i][["target_value"]].values[0]
         else:
             stratum.targets_rel.append(
                 Target(
@@ -901,9 +870,7 @@ def load_soi_data(long_dfs, year):
             person_count = agi_df.iloc[i][["target_value"]].values[0]
 
             if geo_info["type"] == "state":
-                parent_stratum_id = filer_strata["state"][
-                    geo_info["state_fips"]
-                ]
+                parent_stratum_id = filer_strata["state"][geo_info["state_fips"]]
                 note = f"State FIPS {geo_info['state_fips']} filers, AGI >= {agi_income_lower}, AGI < {agi_income_upper}"
                 constraints = [
                     StratumConstraint(
@@ -1000,9 +967,9 @@ def load_soi_data(long_dfs, year):
             session.flush()
 
             if geo_info["type"] == "state":
-                agi_stratum_lookup["state"][
-                    geo_info["state_fips"]
-                ] = new_stratum.stratum_id
+                agi_stratum_lookup["state"][geo_info["state_fips"]] = (
+                    new_stratum.stratum_id
+                )
             elif geo_info["type"] == "district":
                 agi_stratum_lookup["district"][
                     geo_info["congressional_district_geoid"]
diff --git a/policyengine_us_data/db/etl_medicaid.py b/policyengine_us_data/db/etl_medicaid.py
index dfc19cdcc..2c4677996 100644
--- a/policyengine_us_data/db/etl_medicaid.py
+++ b/policyengine_us_data/db/etl_medicaid.py
@@ -116,9 +116,7 @@ def transform_administrative_medicaid_data(state_admin_df, year):
             ].sort_values("Reporting Period", ascending=False)
 
             if not state_history.empty:
-                fallback_value = state_history.iloc[0][
-                    "Total Medicaid Enrollment"
-                ]
+                fallback_value = state_history.iloc[0]["Total Medicaid Enrollment"]
                 fallback_period = state_history.iloc[0]["Reporting Period"]
                 print(
                     f"  {state_abbrev}: Using {fallback_value:,.0f} from period {fallback_period}"
@@ -153,9 +151,7 @@ def transform_survey_medicaid_data(cd_survey_df):
 
 def load_medicaid_data(long_state, long_cd, year):
 
-    DATABASE_URL = (
-        f"sqlite:///{STORAGE_FOLDER / 'calibration' / 'policy_data.db'}"
-    )
+    DATABASE_URL = f"sqlite:///{STORAGE_FOLDER / 'calibration' / 'policy_data.db'}"
     engine = create_engine(DATABASE_URL)
 
     with Session(engine) as session:
@@ -222,9 +218,7 @@ def load_medicaid_data(long_state, long_cd, year):
             )
             session.add(new_stratum)
             session.flush()
-            medicaid_stratum_lookup["state"][
-                state_fips
-            ] = new_stratum.stratum_id
+            medicaid_stratum_lookup["state"][state_fips] = new_stratum.stratum_id
 
         # District -------------------
         if long_cd is None:
diff --git a/policyengine_us_data/db/etl_national_targets.py b/policyengine_us_data/db/etl_national_targets.py
index 2b78b6d6e..0e87aa84a 100644
--- a/policyengine_us_data/db/etl_national_targets.py
+++ b/policyengine_us_data/db/etl_national_targets.py
@@ -423,14 +423,10 @@ def transform_national_targets(raw_targets):
     # Note: income_tax_positive from CBO and eitc from Treasury need
     # filer constraint
     cbo_non_tax = [
-        t
-        for t in raw_targets["cbo_targets"]
-        if t["variable"] != "income_tax_positive"
+        t for t in raw_targets["cbo_targets"] if t["variable"] != "income_tax_positive"
     ]
     cbo_tax = [
-        t
-        for t in raw_targets["cbo_targets"]
-        if t["variable"] == "income_tax_positive"
+        t for t in raw_targets["cbo_targets"] if t["variable"] == "income_tax_positive"
     ]
 
     all_direct_targets = raw_targets["direct_sum_targets"] + cbo_non_tax
@@ -443,14 +439,10 @@ def transform_national_targets(raw_targets):
     )
 
     direct_df = (
-        pd.DataFrame(all_direct_targets)
-        if all_direct_targets
-        else pd.DataFrame()
+        pd.DataFrame(all_direct_targets) if all_direct_targets else pd.DataFrame()
     )
     tax_filer_df = (
-        pd.DataFrame(all_tax_filer_targets)
-        if all_tax_filer_targets
-        else pd.DataFrame()
+        pd.DataFrame(all_tax_filer_targets) if all_tax_filer_targets else pd.DataFrame()
     )
 
     # Conditional targets stay as list for special processing
@@ -459,9 +451,7 @@ def transform_national_targets(raw_targets):
     return direct_df, tax_filer_df, conditional_targets
 
 
-def load_national_targets(
-    direct_targets_df, tax_filer_df, conditional_targets
-):
+def load_national_targets(direct_targets_df, tax_filer_df, conditional_targets):
     """
     Load national targets into the database.
 
@@ -475,17 +465,13 @@ def load_national_targets(
         List of conditional count targets requiring strata
     """
 
-    DATABASE_URL = (
-        f"sqlite:///{STORAGE_FOLDER / 'calibration' / 'policy_data.db'}"
-    )
+    DATABASE_URL = f"sqlite:///{STORAGE_FOLDER / 'calibration' / 'policy_data.db'}"
     engine = create_engine(DATABASE_URL)
 
     with Session(engine) as session:
         # Get the national stratum
         us_stratum = (
-            session.query(Stratum)
-            .filter(Stratum.parent_stratum_id == None)
-            .first()
+            session.query(Stratum).filter(Stratum.parent_stratum_id == None).first()
         )
 
         if not us_stratum:
@@ -511,9 +497,7 @@ def load_national_targets(
             notes_parts = []
             if pd.notna(target_data.get("notes")):
                 notes_parts.append(target_data["notes"])
-            notes_parts.append(
-                f"Source: {target_data.get('source', 'Unknown')}"
-            )
+            notes_parts.append(f"Source: {target_data.get('source', 'Unknown')}")
             combined_notes = " | ".join(notes_parts)
 
             if existing_target:
@@ -583,9 +567,7 @@ def load_national_targets(
                 notes_parts = []
                 if pd.notna(target_data.get("notes")):
                     notes_parts.append(target_data["notes"])
-                notes_parts.append(
-                    f"Source: {target_data.get('source', 'Unknown')}"
-                )
+                notes_parts.append(f"Source: {target_data.get('source', 'Unknown')}")
                 combined_notes = " | ".join(notes_parts)
 
                 if existing_target:
@@ -699,23 +681,17 @@ def load_national_targets(
                 ]
 
                 session.add(new_stratum)
-                print(
-                    f"Created stratum and target for {constraint_var} enrollment"
-                )
+                print(f"Created stratum and target for {constraint_var} enrollment")
 
         session.commit()
 
         total_targets = (
-            len(direct_targets_df)
-            + len(tax_filer_df)
-            + len(conditional_targets)
+            len(direct_targets_df) + len(tax_filer_df) + len(conditional_targets)
         )
         print(f"\nSuccessfully loaded {total_targets} national targets")
         print(f"  - {len(direct_targets_df)} direct sum targets")
         print(f"  - {len(tax_filer_df)} tax filer targets")
-        print(
-            f"  - {len(conditional_targets)} enrollment count targets (as strata)"
-        )
+        print(f"  - {len(conditional_targets)} enrollment count targets (as strata)")
 
 
 def main():
@@ -730,8 +706,8 @@ def main():
 
     # Transform
     print("Transforming targets...")
-    direct_targets_df, tax_filer_df, conditional_targets = (
-        transform_national_targets(raw_targets)
+    direct_targets_df, tax_filer_df, conditional_targets = transform_national_targets(
+        raw_targets
     )
 
     # Load
diff --git a/policyengine_us_data/db/etl_pregnancy.py b/policyengine_us_data/db/etl_pregnancy.py
index de3fec9dc..e8756cfb5 100644
--- a/policyengine_us_data/db/etl_pregnancy.py
+++ b/policyengine_us_data/db/etl_pregnancy.py
@@ -182,10 +182,7 @@ def extract_female_population(year: int) -> pd.DataFrame:
         data = load_json(cache_file)
     else:
         var_ids = ",".join([f"B01001_{i:03d}E" for i in range(30, 39)])
-        url = (
-            f"https://api.census.gov/data/{year}/acs/acs1"
-            f"?get={var_ids}&for=state:*"
-        )
+        url = f"https://api.census.gov/data/{year}/acs/acs1?get={var_ids}&for=state:*"
         logger.info(f"Fetching ACS B01001 female 15-44 for {year}")
         resp = requests.get(url, timeout=30)
         resp.raise_for_status()
@@ -222,9 +219,7 @@ def transform_pregnancy_data(
     df = births_df.merge(pop_df, on="state_abbrev")
     df["state_fips"] = df["state_abbrev"].map(STATE_ABBREV_TO_FIPS)
     # Point-in-time pregnancy count.
-    df["pregnancy_target"] = (
-        df["births"] * PREGNANCY_DURATION_FRACTION
-    ).round()
+    df["pregnancy_target"] = (df["births"] * PREGNANCY_DURATION_FRACTION).round()
     # Rate for stochastic assignment in the CPS build.
     df["pregnancy_rate"] = (
         df["births"] / df["female_15_44"]
@@ -246,9 +241,7 @@ def load_pregnancy_data(
         df: From transform_pregnancy_data.
         year: Target year for the calibration targets.
     """
-    db_url = (
-        f"sqlite:///" f"{STORAGE_FOLDER / 'calibration' / 'policy_data.db'}"
-    )
+    db_url = f"sqlite:///{STORAGE_FOLDER / 'calibration' / 'policy_data.db'}"
     engine = create_engine(db_url)
 
     with Session(engine) as session:
@@ -273,10 +266,7 @@ def load_pregnancy_data(
         for _, row in df.iterrows():
             state_fips = int(row["state_fips"])
             if state_fips not in geo_strata["state"]:
-                logger.warning(
-                    f"No geographic stratum for FIPS "
-                    f"{state_fips}, skipping"
-                )
+                logger.warning(f"No geographic stratum for FIPS {state_fips}, skipping")
                 continue
 
             parent_id = geo_strata["state"][state_fips]
@@ -368,16 +358,14 @@ def main():
         except Exception as e:
             logger.warning(f"ACS {acs_year} not available: {e}")
     if pop_df is None:
-        raise RuntimeError(
-            f"No ACS population data for " f"{year - 1} or {year - 2}"
-        )
+        raise RuntimeError(f"No ACS population data for {year - 1} or {year - 2}")
 
     df = transform_pregnancy_data(births_df, pop_df)
 
     total_births = df["births"].sum()
     total_target = df["pregnancy_target"].sum()
     print(f"Total births: {total_births:,.0f}")
-    print(f"Pregnancy target (point-in-time): " f"{total_target:,.0f}")
+    print(f"Pregnancy target (point-in-time): {total_target:,.0f}")
 
     load_pregnancy_data(df, year)
     print("Pregnancy calibration targets loaded.")
diff --git a/policyengine_us_data/db/etl_snap.py b/policyengine_us_data/db/etl_snap.py
index 48cb7e773..dc5975a4f 100644
--- a/policyengine_us_data/db/etl_snap.py
+++ b/policyengine_us_data/db/etl_snap.py
@@ -154,9 +154,7 @@ def transform_survey_snap_data(raw_df):
 
 def load_administrative_snap_data(df_states, year):
 
-    DATABASE_URL = (
-        f"sqlite:///{STORAGE_FOLDER / 'calibration' / 'policy_data.db'}"
-    )
+    DATABASE_URL = f"sqlite:///{STORAGE_FOLDER / 'calibration' / 'policy_data.db'}"
     engine = create_engine(DATABASE_URL)
 
     with Session(engine) as session:
@@ -244,9 +242,7 @@ def load_survey_snap_data(survey_df, year, snap_stratum_lookup):
     load_administrative_snap_data, so we don't recreate them.
     """
 
-    DATABASE_URL = (
-        f"sqlite:///{STORAGE_FOLDER / 'calibration' / 'policy_data.db'}"
-    )
+    DATABASE_URL = f"sqlite:///{STORAGE_FOLDER / 'calibration' / 'policy_data.db'}"
     engine = create_engine(DATABASE_URL)
 
     with Session(engine) as session:
diff --git a/policyengine_us_data/db/etl_state_income_tax.py b/policyengine_us_data/db/etl_state_income_tax.py
index a9ffa35c1..95fbc285c 100644
--- a/policyengine_us_data/db/etl_state_income_tax.py
+++ b/policyengine_us_data/db/etl_state_income_tax.py
@@ -320,11 +320,7 @@ def main():
     # Print summary
     total_collections = transformed_df["income_tax_collections"].sum()
     states_with_tax = len(
-        [
-            s
-            for s in transformed_df["state_abbrev"]
-            if s not in NO_INCOME_TAX_STATES
-        ]
+        [s for s in transformed_df["state_abbrev"] if s not in NO_INCOME_TAX_STATES]
     )
 
     logger.info(
@@ -337,9 +333,7 @@ def main():
 
     # Print Ohio specifically (for the issue reference)
     ohio_row = transformed_df[transformed_df["state_abbrev"] == "OH"].iloc[0]
-    logger.info(
-        f"  Ohio (OH): ${ohio_row['income_tax_collections'] / 1e9:.2f}B"
-    )
+    logger.info(f"  Ohio (OH): ${ohio_row['income_tax_collections'] / 1e9:.2f}B")
 
 
 if __name__ == "__main__":
diff --git a/policyengine_us_data/db/validate_database.py b/policyengine_us_data/db/validate_database.py
index 2fa819f29..b57a83c32 100644
--- a/policyengine_us_data/db/validate_database.py
+++ b/policyengine_us_data/db/validate_database.py
@@ -9,9 +9,7 @@
 import pandas as pd
 from policyengine_us.system import system
 
-conn = sqlite3.connect(
-    "policyengine_us_data/storage/calibration/policy_data.db"
-)
+conn = sqlite3.connect("policyengine_us_data/storage/calibration/policy_data.db")
 
 stratum_constraints_df = pd.read_sql("SELECT * FROM stratum_constraints", conn)
 targets_df = pd.read_sql("SELECT * FROM targets", conn)
diff --git a/policyengine_us_data/db/validate_hierarchy.py b/policyengine_us_data/db/validate_hierarchy.py
index 69a176f2e..1c555703f 100644
--- a/policyengine_us_data/db/validate_hierarchy.py
+++ b/policyengine_us_data/db/validate_hierarchy.py
@@ -31,9 +31,7 @@ def validate_geographic_hierarchy(session):
             "ERROR: No US-level stratum found (should have parent_stratum_id = None)"
         )
     else:
-        print(
-            f"✓ US stratum found: {us_stratum.notes} (ID: {us_stratum.stratum_id})"
-        )
+        print(f"✓ US stratum found: {us_stratum.notes} (ID: {us_stratum.stratum_id})")
 
         # Check it has no constraints
         us_constraints = session.exec(
@@ -89,14 +87,10 @@ def validate_geographic_hierarchy(session):
             c for c in constraints if c.constraint_variable == "state_fips"
         ]
         if not state_fips_constraint:
-            errors.append(
-                f"ERROR: State '{state.notes}' has no state_fips constraint"
-            )
+            errors.append(f"ERROR: State '{state.notes}' has no state_fips constraint")
         else:
             state_ids[state.stratum_id] = state.notes
-            print(
-                f"  - {state.notes}: state_fips = {state_fips_constraint[0].value}"
-            )
+            print(f"  - {state.notes}: state_fips = {state_fips_constraint[0].value}")
 
     # Check congressional districts
     print("\nChecking Congressional Districts...")
@@ -112,11 +106,10 @@ def validate_geographic_hierarchy(session):
             )
         ).all()
         constraint_vars = {c.constraint_variable for c in constraints}
-        if (
-            "congressional_district_geoid" in constraint_vars
-            and constraint_vars
-            <= {"state_fips", "congressional_district_geoid"}
-        ):
+        if "congressional_district_geoid" in constraint_vars and constraint_vars <= {
+            "state_fips",
+            "congressional_district_geoid",
+        }:
             all_cds.append(s)
 
     print(f"✓ Found {len(all_cds)} congressional/delegate districts")
@@ -158,9 +151,7 @@ def validate_geographic_hierarchy(session):
                 wyoming_cds.append(child)
 
         if len(wyoming_cds) != 1:
-            errors.append(
-                f"ERROR: Wyoming should have 1 CD, found {len(wyoming_cds)}"
-            )
+            errors.append(f"ERROR: Wyoming should have 1 CD, found {len(wyoming_cds)}")
         else:
             print(f"✓ Wyoming has correct number of CDs: 1")
 
@@ -184,9 +175,7 @@ def validate_geographic_hierarchy(session):
             for cd in wrong_parent_cds[:5]:
                 errors.append(f"  - {cd.notes}")
         else:
-            print(
-                "✓ No congressional districts incorrectly parented to Wyoming"
-            )
+            print("✓ No congressional districts incorrectly parented to Wyoming")
 
     return errors
 
@@ -237,13 +226,10 @@ def validate_demographic_strata(session):
         if actual == expected_total:
             print(f"✓ {domain}: {actual} strata")
         elif actual == 0:
-            errors.append(
-                f"ERROR: {domain} has no strata, " f"expected {expected_total}"
-            )
+            errors.append(f"ERROR: {domain} has no strata, expected {expected_total}")
         else:
             errors.append(
-                f"WARNING: {domain} has {actual} strata, "
-                f"expected {expected_total}"
+                f"WARNING: {domain} has {actual} strata, expected {expected_total}"
             )
 
     # Identify geographic strata (those with only geographic
@@ -291,11 +277,9 @@ def validate_demographic_strata(session):
                 )
         else:
             no_parents += 1
-            errors.append(
-                f"ERROR: Stratum {stratum.stratum_id} " f"has no parent"
-            )
+            errors.append(f"ERROR: Stratum {stratum.stratum_id} has no parent")
 
-    print(f"  Sample of {len(sample_strata)} " f"demographic strata:")
+    print(f"  Sample of {len(sample_strata)} demographic strata:")
     print(f"    - With geographic parent: {correct_parents}")
     print(f"    - With wrong parent: {wrong_parents}")
     print(f"    - With no parent: {no_parents}")
@@ -322,18 +306,12 @@ def validate_constraint_uniqueness(session):
         else:
             hash_counts[stratum.definition_hash] = [stratum]
 
-    duplicates = {
-        h: strata for h, strata in hash_counts.items() if len(strata) > 1
-    }
+    duplicates = {h: strata for h, strata in hash_counts.items() if len(strata) > 1}
 
     if duplicates:
-        errors.append(
-            f"ERROR: Found {len(duplicates)} duplicate definition_hashes"
-        )
+        errors.append(f"ERROR: Found {len(duplicates)} duplicate definition_hashes")
         for hash_val, strata in list(duplicates.items())[:3]:  # Show first 3
-            errors.append(
-                f"  Hash {hash_val[:10]}... appears {len(strata)} times:"
-            )
+            errors.append(f"  Hash {hash_val[:10]}... appears {len(strata)} times:")
             for s in strata[:3]:
                 errors.append(f"    - ID {s.stratum_id}: {s.notes[:50]}")
     else:
@@ -345,9 +323,7 @@ def validate_constraint_uniqueness(session):
 def main():
     """Run all validation checks"""
 
-    DATABASE_URL = (
-        f"sqlite:///{STORAGE_FOLDER / 'calibration' / 'policy_data.db'}"
-    )
+    DATABASE_URL = f"sqlite:///{STORAGE_FOLDER / 'calibration' / 'policy_data.db'}"
     engine = create_engine(DATABASE_URL)
 
     all_errors = []
diff --git a/policyengine_us_data/geography/__init__.py b/policyengine_us_data/geography/__init__.py
index 0bcc73f0b..f20068192 100644
--- a/policyengine_us_data/geography/__init__.py
+++ b/policyengine_us_data/geography/__init__.py
@@ -2,9 +2,7 @@
 import pandas as pd
 import os
 
-ZIP_CODE_DATASET_PATH = (
-    Path(__file__).parent.parent / "geography" / "zip_codes.csv.gz"
-)
+ZIP_CODE_DATASET_PATH = Path(__file__).parent.parent / "geography" / "zip_codes.csv.gz"
 
 # Avoid circular import error when -us-data is initialized
 if os.path.exists(ZIP_CODE_DATASET_PATH):
diff --git a/policyengine_us_data/geography/county_fips.py b/policyengine_us_data/geography/county_fips.py
index 3e5ac5183..6bb2b9e92 100644
--- a/policyengine_us_data/geography/county_fips.py
+++ b/policyengine_us_data/geography/county_fips.py
@@ -21,7 +21,9 @@ def generate_county_fips_2020_dataset():
     # COUNTYFP - Three-digit county portion of FIPS (001 for Autauga County, AL, if STATEFP is 01)
     # COUNTYNAME - County name
 
-    COUNTY_FIPS_2020_URL = "https://www2.census.gov/geo/docs/reference/codes2020/national_county2020.txt"
+    COUNTY_FIPS_2020_URL = (
+        "https://www2.census.gov/geo/docs/reference/codes2020/national_county2020.txt"
+    )
 
     # Download the base tab-delimited data file
     response = requests.get(COUNTY_FIPS_2020_URL)
@@ -68,9 +70,7 @@ def generate_county_fips_2020_dataset():
     csv_buffer = BytesIO()
 
     # Save CSV into buffer object and reset pointer
-    county_fips.to_csv(
-        csv_buffer, index=False, compression="gzip", encoding="utf-8"
-    )
+    county_fips.to_csv(csv_buffer, index=False, compression="gzip", encoding="utf-8")
     csv_buffer.seek(0)
 
     # Upload to Hugging Face
diff --git a/policyengine_us_data/geography/create_zip_code_dataset.py b/policyengine_us_data/geography/create_zip_code_dataset.py
index eb154cf70..981b5de5f 100644
--- a/policyengine_us_data/geography/create_zip_code_dataset.py
+++ b/policyengine_us_data/geography/create_zip_code_dataset.py
@@ -51,7 +51,5 @@
     zcta.set_index("zcta").population[zip_code.zcta].values
     / zip_code.groupby("zcta").zip_code.count()[zip_code.zcta].values
 )
-zip_code["county"] = (
-    zcta_to_county.set_index("zcta").county[zip_code.zcta].values
-)
+zip_code["county"] = zcta_to_county.set_index("zcta").county[zip_code.zcta].values
 zip_code.to_csv("zip_codes.csv", compression="gzip")
diff --git a/policyengine_us_data/parameters/__init__.py b/policyengine_us_data/parameters/__init__.py
index 2fcddb5af..dc385f8e0 100644
--- a/policyengine_us_data/parameters/__init__.py
+++ b/policyengine_us_data/parameters/__init__.py
@@ -65,8 +65,6 @@ def load_take_up_rate(variable_name: str, year: int = 2018):
             break
 
     if applicable_value is None:
-        raise ValueError(
-            f"No take-up rate found for {variable_name} in {year}"
-        )
+        raise ValueError(f"No take-up rate found for {variable_name} in {year}")
 
     return applicable_value
diff --git a/policyengine_us_data/storage/calibration_targets/audit_county_enum.py b/policyengine_us_data/storage/calibration_targets/audit_county_enum.py
index 4849a10ef..fcaf443ff 100644
--- a/policyengine_us_data/storage/calibration_targets/audit_county_enum.py
+++ b/policyengine_us_data/storage/calibration_targets/audit_county_enum.py
@@ -109,9 +109,7 @@ def print_categorized_report(invalid_entries, county_to_states):
     print("\n" + "=" * 60)
     print("WRONG STATE ASSIGNMENTS")
     print("=" * 60)
-    for name, wrong_state, correct_states in sorted(
-        invalid_entries["wrong_state"]
-    ):
+    for name, wrong_state, correct_states in sorted(invalid_entries["wrong_state"]):
         print(f"  {name}")
         print(f"    Listed as: {wrong_state}")
         print(f"    Actually exists in: {', '.join(sorted(correct_states))}")
diff --git a/policyengine_us_data/storage/calibration_targets/make_block_cd_distributions.py b/policyengine_us_data/storage/calibration_targets/make_block_cd_distributions.py
index 6f55e3f7c..f2b634e00 100644
--- a/policyengine_us_data/storage/calibration_targets/make_block_cd_distributions.py
+++ b/policyengine_us_data/storage/calibration_targets/make_block_cd_distributions.py
@@ -78,9 +78,7 @@ def build_block_cd_distributions():
 
     # Create CD geoid in our format: state_fips * 100 + district
     # Examples: AL-1 = 101, NY-10 = 3610, DC = 1198
-    df["cd_geoid"] = df["state_fips"].astype(int) * 100 + df["CD119"].astype(
-        int
-    )
+    df["cd_geoid"] = df["state_fips"].astype(int) * 100 + df["CD119"].astype(int)
 
     # Step 4: Calculate P(block|CD)
     print("\nCalculating block probabilities...")
@@ -97,9 +95,7 @@ def build_block_cd_distributions():
     output = df[["cd_geoid", "GEOID", "probability"]].rename(
         columns={"GEOID": "block_geoid"}
     )
-    output = output.sort_values(
-        ["cd_geoid", "probability"], ascending=[True, False]
-    )
+    output = output.sort_values(["cd_geoid", "probability"], ascending=[True, False])
 
     # Step 6: Save as gzipped CSV (parquet requires pyarrow)
     output_path = STORAGE_FOLDER / "block_cd_distributions.csv.gz"
diff --git a/policyengine_us_data/storage/calibration_targets/make_block_crosswalk.py b/policyengine_us_data/storage/calibration_targets/make_block_crosswalk.py
index 418e725f1..ed0d8cc1a 100644
--- a/policyengine_us_data/storage/calibration_targets/make_block_crosswalk.py
+++ b/policyengine_us_data/storage/calibration_targets/make_block_crosswalk.py
@@ -60,9 +60,7 @@ def download_state_baf(state_fips: str, state_abbr: str) -> dict:
             )
 
         # Place (City/CDP)
-        place_file = (
-            f"BlockAssign_ST{state_fips}_{state_abbr}_INCPLACE_CDP.txt"
-        )
+        place_file = f"BlockAssign_ST{state_fips}_{state_abbr}_INCPLACE_CDP.txt"
         if place_file in z.namelist():
             df = pd.read_csv(z.open(place_file), sep="|", dtype=str)
             results["place"] = df.rename(
@@ -168,23 +166,17 @@ def build_block_crosswalk():
 
                     # Merge other geographies
                     if "sldl" in bafs:
-                        df = df.merge(
-                            bafs["sldl"], on="block_geoid", how="left"
-                        )
+                        df = df.merge(bafs["sldl"], on="block_geoid", how="left")
                     else:
                         df["sldl"] = None
 
                     if "place" in bafs:
-                        df = df.merge(
-                            bafs["place"], on="block_geoid", how="left"
-                        )
+                        df = df.merge(bafs["place"], on="block_geoid", how="left")
                     else:
                         df["place_fips"] = None
 
                     if "vtd" in bafs:
-                        df = df.merge(
-                            bafs["vtd"], on="block_geoid", how="left"
-                        )
+                        df = df.merge(bafs["vtd"], on="block_geoid", how="left")
                     else:
                         df["vtd"] = None
 
diff --git a/policyengine_us_data/storage/calibration_targets/make_county_cd_distributions.py b/policyengine_us_data/storage/calibration_targets/make_county_cd_distributions.py
index ba68a5566..2c91f1ca0 100644
--- a/policyengine_us_data/storage/calibration_targets/make_county_cd_distributions.py
+++ b/policyengine_us_data/storage/calibration_targets/make_county_cd_distributions.py
@@ -126,15 +126,11 @@ def build_county_cd_distributions():
 
     # Create CD geoid in our format: state_fips * 100 + district
     # Examples: AL-1 = 101, NY-10 = 3610, DC = 1198
-    df["cd_geoid"] = df["state_fips"].astype(int) * 100 + df["CD119"].astype(
-        int
-    )
+    df["cd_geoid"] = df["state_fips"].astype(int) * 100 + df["CD119"].astype(int)
 
     # Step 4: Aggregate by (CD, county)
     print("\nAggregating population by CD and county...")
-    cd_county_pop = (
-        df.groupby(["cd_geoid", "county_fips"])["POP20"].sum().reset_index()
-    )
+    cd_county_pop = df.groupby(["cd_geoid", "county_fips"])["POP20"].sum().reset_index()
     print(f"  Unique CD-county pairs: {len(cd_county_pop):,}")
 
     # Step 5: Calculate P(county|CD)
@@ -151,9 +147,7 @@ def build_county_cd_distributions():
     # Step 6: Map county FIPS to enum names
     print("\nMapping county FIPS to enum names...")
     fips_to_enum = build_county_fips_to_enum_mapping()
-    cd_county_pop["county_name"] = cd_county_pop["county_fips"].map(
-        fips_to_enum
-    )
+    cd_county_pop["county_name"] = cd_county_pop["county_fips"].map(fips_to_enum)
 
     # Check for unmapped counties
     unmapped = cd_county_pop[cd_county_pop["county_name"].isna()]
@@ -177,9 +171,7 @@ def build_county_cd_distributions():
 
     # Step 8: Save CSV
     output = cd_county_pop[["cd_geoid", "county_name", "probability"]]
-    output = output.sort_values(
-        ["cd_geoid", "probability"], ascending=[True, False]
-    )
+    output = output.sort_values(["cd_geoid", "probability"], ascending=[True, False])
 
     output_path = STORAGE_FOLDER / "county_cd_distributions.csv"
     output.to_csv(output_path, index=False)
diff --git a/policyengine_us_data/storage/calibration_targets/make_district_mapping.py b/policyengine_us_data/storage/calibration_targets/make_district_mapping.py
index 2b930a2da..bfb4936e8 100644
--- a/policyengine_us_data/storage/calibration_targets/make_district_mapping.py
+++ b/policyengine_us_data/storage/calibration_targets/make_district_mapping.py
@@ -91,9 +91,7 @@ def fetch_block_to_district_map(congress: int) -> pd.DataFrame:
             return bef[["GEOID", f"CD{congress}"]]
 
     else:
-        raise ValueError(
-            f"Congress {congress} is not supported by this function."
-        )
+        raise ValueError(f"Congress {congress} is not supported by this function.")
 
 
 def fetch_block_population(state) -> pd.DataFrame:
@@ -145,9 +143,7 @@ def fetch_block_population(state) -> pd.DataFrame:
     geo_df = pd.DataFrame(geo_records, columns=["LOGRECNO", "GEOID"])
 
     # ---------------- P-file: pull total-population cell ----------------------
-    p1_records = [
-        (p[4], int(p[5])) for p in map(lambda x: x.split("|"), p1_lines)
-    ]
+    p1_records = [(p[4], int(p[5])) for p in map(lambda x: x.split("|"), p1_lines)]
     p1_df = pd.DataFrame(p1_records, columns=["LOGRECNO", "P0010001"])
 
     # ---------------- Merge & finish -----------------------------------------
diff --git a/policyengine_us_data/storage/calibration_targets/pull_hardcoded_targets.py b/policyengine_us_data/storage/calibration_targets/pull_hardcoded_targets.py
index da8b54121..3199a56a2 100644
--- a/policyengine_us_data/storage/calibration_targets/pull_hardcoded_targets.py
+++ b/policyengine_us_data/storage/calibration_targets/pull_hardcoded_targets.py
@@ -42,13 +42,9 @@ def pull_hardcoded_targets():
         "VARIABLE": list(HARD_CODED_TOTALS.keys()),
         "VALUE": list(HARD_CODED_TOTALS.values()),
         "IS_COUNT": [0.0]
-        * len(
-            HARD_CODED_TOTALS
-        ),  # All values are monetary amounts, not counts
+        * len(HARD_CODED_TOTALS),  # All values are monetary amounts, not counts
         "BREAKDOWN_VARIABLE": [np.nan]
-        * len(
-            HARD_CODED_TOTALS
-        ),  # No breakdown variable for hardcoded targets
+        * len(HARD_CODED_TOTALS),  # No breakdown variable for hardcoded targets
         "LOWER_BOUND": [np.nan] * len(HARD_CODED_TOTALS),
         "UPPER_BOUND": [np.nan] * len(HARD_CODED_TOTALS),
     }
diff --git a/policyengine_us_data/storage/calibration_targets/pull_snap_targets.py b/policyengine_us_data/storage/calibration_targets/pull_snap_targets.py
index 1830bdb3a..202286e70 100644
--- a/policyengine_us_data/storage/calibration_targets/pull_snap_targets.py
+++ b/policyengine_us_data/storage/calibration_targets/pull_snap_targets.py
@@ -84,7 +84,9 @@ def extract_usda_snap_data(year=2023):
         session.headers.update(headers)
 
         # Try to visit the main page first to get any necessary cookies
-        main_page = "https://www.fns.usda.gov/pd/supplemental-nutrition-assistance-program-snap"
+        main_page = (
+            "https://www.fns.usda.gov/pd/supplemental-nutrition-assistance-program-snap"
+        )
         try:
             session.get(main_page, timeout=30)
         except:
@@ -167,9 +169,7 @@ def extract_usda_snap_data(year=2023):
         .reset_index(drop=True)
     )
     df_states["GEO_ID"] = "0400000US" + df_states["STATE_FIPS"]
-    df_states["GEO_NAME"] = "state_" + df_states["State"].map(
-        STATE_NAME_TO_ABBREV
-    )
+    df_states["GEO_NAME"] = "state_" + df_states["State"].map(STATE_NAME_TO_ABBREV)
 
     count_df = df_states[["GEO_ID", "GEO_NAME"]].copy()
     count_df["VALUE"] = df_states["Households"]
diff --git a/policyengine_us_data/storage/calibration_targets/pull_soi_targets.py b/policyengine_us_data/storage/calibration_targets/pull_soi_targets.py
index 59050a1b3..ce6d9f887 100644
--- a/policyengine_us_data/storage/calibration_targets/pull_soi_targets.py
+++ b/policyengine_us_data/storage/calibration_targets/pull_soi_targets.py
@@ -129,26 +129,17 @@ def pull_national_soi_variable(
     national_df: Optional[pd.DataFrame] = None,
 ) -> pd.DataFrame:
     """Download and save national AGI totals."""
-    df = pd.read_excel(
-        "https://www.irs.gov/pub/irs-soi/22in54us.xlsx", skiprows=7
-    )
+    df = pd.read_excel("https://www.irs.gov/pub/irs-soi/22in54us.xlsx", skiprows=7)
 
     assert (
-        np.abs(
-            df.iloc[soi_variable_ident, 1]
-            - df.iloc[soi_variable_ident, 2:12].sum()
-        )
+        np.abs(df.iloc[soi_variable_ident, 1] - df.iloc[soi_variable_ident, 2:12].sum())
         < 100
     ), "Row 0 doesn't add up — check the file."
 
     agi_values = df.iloc[soi_variable_ident, 2:12].astype(int).to_numpy()
-    agi_values = np.concatenate(
-        [agi_values[:8], [agi_values[8] + agi_values[9]]]
-    )
+    agi_values = np.concatenate([agi_values[:8], [agi_values[8] + agi_values[9]]])
 
-    agi_brackets = [
-        AGI_STUB_TO_BAND[i] for i in range(1, len(SOI_COLUMNS) + 1)
-    ]
+    agi_brackets = [AGI_STUB_TO_BAND[i] for i in range(1, len(SOI_COLUMNS) + 1)]
 
     result = pd.DataFrame(
         {
@@ -161,9 +152,7 @@ def pull_national_soi_variable(
     )
 
     # final column order
-    result = result[
-        ["GEO_ID", "GEO_NAME", "LOWER_BOUND", "UPPER_BOUND", "VALUE"]
-    ]
+    result = result[["GEO_ID", "GEO_NAME", "LOWER_BOUND", "UPPER_BOUND", "VALUE"]]
     result["IS_COUNT"] = int(is_count)
     result["VARIABLE"] = variable_name
 
@@ -186,9 +175,7 @@ def pull_state_soi_variable(
     state_df: Optional[pd.DataFrame] = None,
 ) -> pd.DataFrame:
     """Download and save state AGI totals."""
-    df = pd.read_csv(
-        "https://www.irs.gov/pub/irs-soi/22in55cmcsv.csv", thousands=","
-    )
+    df = pd.read_csv("https://www.irs.gov/pub/irs-soi/22in55cmcsv.csv", thousands=",")
 
     merged = (
         df[df["AGI_STUB"].isin([9, 10])]
@@ -211,17 +198,11 @@ def pull_state_soi_variable(
         ["GEO_ID", "GEO_NAME", "agi_bracket", soi_variable_ident],
     ].rename(columns={soi_variable_ident: "VALUE"})
 
-    result["LOWER_BOUND"] = result["agi_bracket"].map(
-        lambda b: AGI_BOUNDS[b][0]
-    )
-    result["UPPER_BOUND"] = result["agi_bracket"].map(
-        lambda b: AGI_BOUNDS[b][1]
-    )
+    result["LOWER_BOUND"] = result["agi_bracket"].map(lambda b: AGI_BOUNDS[b][0])
+    result["UPPER_BOUND"] = result["agi_bracket"].map(lambda b: AGI_BOUNDS[b][1])
 
     # final column order
-    result = result[
-        ["GEO_ID", "GEO_NAME", "LOWER_BOUND", "UPPER_BOUND", "VALUE"]
-    ]
+    result = result[["GEO_ID", "GEO_NAME", "LOWER_BOUND", "UPPER_BOUND", "VALUE"]]
     result["IS_COUNT"] = int(is_count)
     result["VARIABLE"] = variable_name
 
@@ -249,9 +230,7 @@ def pull_district_soi_variable(
     df = df[df["agi_stub"] != 0]
 
     df["STATEFIPS"] = df["STATEFIPS"].astype(int).astype(str).str.zfill(2)
-    df["CONG_DISTRICT"] = (
-        df["CONG_DISTRICT"].astype(int).astype(str).str.zfill(2)
-    )
+    df["CONG_DISTRICT"] = df["CONG_DISTRICT"].astype(int).astype(str).str.zfill(2)
     if SOI_DISTRICT_TAX_YEAR >= 2024:
         raise RuntimeError(
             f"SOI tax year {SOI_DISTRICT_TAX_YEAR} may need "
@@ -288,12 +267,8 @@ def pull_district_soi_variable(
         ]
     ].rename(columns={soi_variable_ident: "VALUE"})
 
-    result["LOWER_BOUND"] = result["agi_bracket"].map(
-        lambda b: AGI_BOUNDS[b][0]
-    )
-    result["UPPER_BOUND"] = result["agi_bracket"].map(
-        lambda b: AGI_BOUNDS[b][1]
-    )
+    result["LOWER_BOUND"] = result["agi_bracket"].map(lambda b: AGI_BOUNDS[b][0])
+    result["UPPER_BOUND"] = result["agi_bracket"].map(lambda b: AGI_BOUNDS[b][1])
 
     # if redistrict:
     # result = apply_redistricting(result, variable_name)
@@ -308,25 +283,23 @@ def pull_district_soi_variable(
         # Check that all GEO_IDs are valid
         produced_codes = set(result["GEO_ID"])
         invalid_codes = produced_codes - valid_district_codes
-        assert (
-            not invalid_codes
-        ), f"Invalid district codes after redistricting: {invalid_codes}"
+        assert not invalid_codes, (
+            f"Invalid district codes after redistricting: {invalid_codes}"
+        )
 
         # Check we have exactly 436 districts
-        assert (
-            len(produced_codes) == 436
-        ), f"Expected 436 districts after redistricting, got {len(produced_codes)}"
+        assert len(produced_codes) == 436, (
+            f"Expected 436 districts after redistricting, got {len(produced_codes)}"
+        )
 
         # Check that all GEO_IDs successfully mapped to names
         missing_names = result[result["GEO_NAME"].isna()]["GEO_ID"].unique()
-        assert (
-            len(missing_names) == 0
-        ), f"GEO_IDs without names in ID_TO_NAME mapping: {missing_names}"
+        assert len(missing_names) == 0, (
+            f"GEO_IDs without names in ID_TO_NAME mapping: {missing_names}"
+        )
 
     # final column order
-    result = result[
-        ["GEO_ID", "GEO_NAME", "LOWER_BOUND", "UPPER_BOUND", "VALUE"]
-    ]
+    result = result[["GEO_ID", "GEO_NAME", "LOWER_BOUND", "UPPER_BOUND", "VALUE"]]
     result["IS_COUNT"] = int(is_count)
     result["VARIABLE"] = variable_name
 
@@ -457,15 +430,11 @@ def combine_geography_levels(districts: Optional[bool] = False) -> None:
                     )
 
                 # Get state totals indexed by STATEFIPS
-                state_totals = state.loc[state_mask].set_index("STATEFIPS")[
-                    "VALUE"
-                ]
+                state_totals = state.loc[state_mask].set_index("STATEFIPS")["VALUE"]
 
                 # Get district totals grouped by STATEFIPS
                 district_totals = (
-                    district.loc[district_mask]
-                    .groupby("STATEFIPS")["VALUE"]
-                    .sum()
+                    district.loc[district_mask].groupby("STATEFIPS")["VALUE"].sum()
                 )
 
                 # Check and rescale districts for each state
@@ -480,12 +449,8 @@ def combine_geography_levels(districts: Optional[bool] = False) -> None:
                             f"Districts' sum does not match {fips} state total for {variable}/{count_type} "
                             f"in bracket [{lower}, {upper}]. Rescaling district targets."
                         )
-                        rescale_mask = district_mask & (
-                            district["STATEFIPS"] == fips
-                        )
-                        district.loc[rescale_mask, "VALUE"] *= (
-                            s_total / d_total
-                        )
+                        rescale_mask = district_mask & (district["STATEFIPS"] == fips)
+                        district.loc[rescale_mask, "VALUE"] *= s_total / d_total
 
     # Combine all data
     combined = pd.concat(
diff --git a/policyengine_us_data/storage/upload_completed_datasets.py b/policyengine_us_data/storage/upload_completed_datasets.py
index 8f00b3753..7fcf59581 100644
--- a/policyengine_us_data/storage/upload_completed_datasets.py
+++ b/policyengine_us_data/storage/upload_completed_datasets.py
@@ -94,14 +94,11 @@ def _check_group_has_data(f, name):
             for group_name in REQUIRED_GROUPS:
                 if not _check_group_has_data(f, group_name):
                     errors.append(
-                        f"Required group '{group_name}' missing "
-                        f"or empty in H5 file."
+                        f"Required group '{group_name}' missing or empty in H5 file."
                     )
 
             # At least one income group must have data
-            has_income = any(
-                _check_group_has_data(f, g) for g in INCOME_GROUPS
-            )
+            has_income = any(_check_group_has_data(f, g) for g in INCOME_GROUPS)
             if not has_income:
                 errors.append(
                     f"No income data found. Need at least one of "
diff --git a/policyengine_us_data/tests/test_calibration/test_build_matrix_masking.py b/policyengine_us_data/tests/test_calibration/test_build_matrix_masking.py
index 8db56ddcb..853c6d04b 100644
--- a/policyengine_us_data/tests/test_calibration/test_build_matrix_masking.py
+++ b/policyengine_us_data/tests/test_calibration/test_build_matrix_masking.py
@@ -43,9 +43,7 @@ def matrix_result():
 
     sim = Microsimulation(dataset=DATASET_PATH)
     n_records = sim.calculate("household_id").values.shape[0]
-    geography = assign_random_geography(
-        n_records, n_clones=N_CLONES, seed=SEED
-    )
+    geography = assign_random_geography(n_records, n_clones=N_CLONES, seed=SEED)
     builder = UnifiedMatrixBuilder(
         db_uri=DB_URI,
         time_period=2024,
@@ -124,8 +122,7 @@ def test_clone_visible_only_to_own_state(self, matrix_result):
 
         if state_0 == state_1:
             pytest.skip(
-                "Both clones landed in the same state — "
-                "cannot test cross-state masking"
+                "Both clones landed in the same state — cannot test cross-state masking"
             )
 
         state_targets = targets_df[targets_df["geo_level"] == "state"]
@@ -164,11 +161,7 @@ def test_clone_visible_only_to_own_cd(self, matrix_result):
         vals_0 = X_csc[:, col_0].toarray().ravel()
 
         same_state_other_cd = district_targets[
-            (
-                district_targets["geographic_id"].apply(
-                    lambda g: g.startswith(state_0)
-                )
-            )
+            (district_targets["geographic_id"].apply(lambda g: g.startswith(state_0)))
             & (district_targets["geographic_id"] != cd_0)
         ]
 
@@ -198,10 +191,7 @@ def test_clone_nonzero_for_own_cd(self, matrix_result):
         X_csc = X.tocsc()
         vals_0 = X_csc[:, col_0].toarray().ravel()
 
-        any_nonzero = any(
-            vals_0[row.name] != 0 for _, row in own_cd_targets.iterrows()
-        )
+        any_nonzero = any(vals_0[row.name] != 0 for _, row in own_cd_targets.iterrows())
         assert any_nonzero, (
-            f"Clone 0 should have at least one non-zero entry "
-            f"for its own CD {cd_0}"
+            f"Clone 0 should have at least one non-zero entry for its own CD {cd_0}"
         )
diff --git a/policyengine_us_data/tests/test_calibration/test_clone_and_assign.py b/policyengine_us_data/tests/test_calibration/test_clone_and_assign.py
index 0ba330549..c13c6a89b 100644
--- a/policyengine_us_data/tests/test_calibration/test_clone_and_assign.py
+++ b/policyengine_us_data/tests/test_calibration/test_clone_and_assign.py
@@ -66,13 +66,10 @@ def test_loads_and_normalizes(self, tmp_path):
         csv_path = tmp_path / "block_cd_distributions.csv.gz"
         MOCK_BLOCKS.to_csv(csv_path, index=False, compression="gzip")
         with patch(
-            "policyengine_us_data.calibration"
-            ".clone_and_assign.STORAGE_FOLDER",
+            "policyengine_us_data.calibration.clone_and_assign.STORAGE_FOLDER",
             tmp_path,
         ):
-            blocks, cds, states, probs = (
-                load_global_block_distribution.__wrapped__()
-            )
+            blocks, cds, states, probs = load_global_block_distribution.__wrapped__()
         assert len(blocks) == 9
         np.testing.assert_almost_equal(probs.sum(), 1.0)
 
@@ -80,8 +77,7 @@ def test_state_fips_extracted(self, tmp_path):
         csv_path = tmp_path / "block_cd_distributions.csv.gz"
         MOCK_BLOCKS.to_csv(csv_path, index=False, compression="gzip")
         with patch(
-            "policyengine_us_data.calibration"
-            ".clone_and_assign.STORAGE_FOLDER",
+            "policyengine_us_data.calibration.clone_and_assign.STORAGE_FOLDER",
             tmp_path,
         ):
             _, _, states, _ = load_global_block_distribution.__wrapped__()
@@ -137,8 +133,7 @@ def test_missing_file_raises(self, tmp_path):
         fake = tmp_path / "nonexistent"
         fake.mkdir()
         with patch(
-            "policyengine_us_data.calibration"
-            ".clone_and_assign.STORAGE_FOLDER",
+            "policyengine_us_data.calibration.clone_and_assign.STORAGE_FOLDER",
             fake,
         ):
             with pytest.raises(FileNotFoundError):
diff --git a/policyengine_us_data/tests/test_calibration/test_puf_impute.py b/policyengine_us_data/tests/test_calibration/test_puf_impute.py
index 1bce3cf70..d803486ee 100644
--- a/policyengine_us_data/tests/test_calibration/test_puf_impute.py
+++ b/policyengine_us_data/tests/test_calibration/test_puf_impute.py
@@ -150,9 +150,7 @@ def test_reduces_to_target(self):
                 rng.uniform(500_000, 5_000_000, size=250),
             ]
         )
-        idx = _stratified_subsample_index(
-            income, target_n=10_000, top_pct=99.5
-        )
+        idx = _stratified_subsample_index(income, target_n=10_000, top_pct=99.5)
         assert len(idx) == 10_000
 
     def test_preserves_top_earners(self):
@@ -166,9 +164,7 @@ def test_preserves_top_earners(self):
         threshold = np.percentile(income, 99.5)
         n_top = (income >= threshold).sum()
 
-        idx = _stratified_subsample_index(
-            income, target_n=10_000, top_pct=99.5
-        )
+        idx = _stratified_subsample_index(income, target_n=10_000, top_pct=99.5)
         selected_income = income[idx]
         n_top_selected = (selected_income >= threshold).sum()
         assert n_top_selected == n_top
diff --git a/policyengine_us_data/tests/test_calibration/test_retirement_imputation.py b/policyengine_us_data/tests/test_calibration/test_retirement_imputation.py
index ce261a02b..cd4b45245 100644
--- a/policyengine_us_data/tests/test_calibration/test_retirement_imputation.py
+++ b/policyengine_us_data/tests/test_calibration/test_retirement_imputation.py
@@ -54,14 +54,8 @@ def _make_mock_data(n_persons=20, n_households=5, time_period=2024):
         "person_household_id": {time_period: hh_ids_person},
         "person_tax_unit_id": {time_period: hh_ids_person.copy()},
         "person_spm_unit_id": {time_period: hh_ids_person.copy()},
-        "age": {
-            time_period: rng.integers(18, 80, size=n_persons).astype(
-                np.float32
-            )
-        },
-        "is_male": {
-            time_period: rng.integers(0, 2, size=n_persons).astype(np.float32)
-        },
+        "age": {time_period: rng.integers(18, 80, size=n_persons).astype(np.float32)},
+        "is_male": {time_period: rng.integers(0, 2, size=n_persons).astype(np.float32)},
         "household_weight": {time_period: np.ones(n_households) * 1000},
         "employment_income": {
             time_period: rng.uniform(0, 100_000, n_persons).astype(np.float32)
@@ -71,9 +65,7 @@ def _make_mock_data(n_persons=20, n_households=5, time_period=2024):
         },
     }
     for var in CPS_RETIREMENT_VARIABLES:
-        data[var] = {
-            time_period: rng.uniform(0, 5000, n_persons).astype(np.float32)
-        }
+        data[var] = {time_period: rng.uniform(0, 5000, n_persons).astype(np.float32)}
     return data
 
 
@@ -139,9 +131,9 @@ class TestConstants:
     def test_retirement_vars_not_in_imputed(self):
         """Retirement vars must NOT be in IMPUTED_VARIABLES."""
         for var in CPS_RETIREMENT_VARIABLES:
-            assert (
-                var not in IMPUTED_VARIABLES
-            ), f"{var} should not be in IMPUTED_VARIABLES"
+            assert var not in IMPUTED_VARIABLES, (
+                f"{var} should not be in IMPUTED_VARIABLES"
+            )
 
     def test_retirement_vars_not_in_overridden(self):
         for var in CPS_RETIREMENT_VARIABLES:
@@ -171,14 +163,12 @@ def test_retirement_predictors_include_demographics(self):
     def test_income_predictors_in_imputed_variables(self):
         """All income predictors must be available from PUF QRF."""
         for var in RETIREMENT_INCOME_PREDICTORS:
-            assert (
-                var in IMPUTED_VARIABLES
-            ), f"{var} not in IMPUTED_VARIABLES — won't be in puf_imputations"
+            assert var in IMPUTED_VARIABLES, (
+                f"{var} not in IMPUTED_VARIABLES — won't be in puf_imputations"
+            )
 
     def test_predictors_are_combined_lists(self):
-        expected = (
-            RETIREMENT_DEMOGRAPHIC_PREDICTORS + RETIREMENT_INCOME_PREDICTORS
-        )
+        expected = RETIREMENT_DEMOGRAPHIC_PREDICTORS + RETIREMENT_INCOME_PREDICTORS
         assert RETIREMENT_PREDICTORS == expected
 
 
@@ -270,18 +260,12 @@ def _setup(self):
         self.puf_imputations = {
             "employment_income": emp,
             "self_employment_income": se,
-            "taxable_interest_income": rng.uniform(0, 5_000, self.n).astype(
-                np.float32
-            ),
+            "taxable_interest_income": rng.uniform(0, 5_000, self.n).astype(np.float32),
             "qualified_dividend_income": rng.uniform(0, 3_000, self.n).astype(
                 np.float32
             ),
-            "taxable_pension_income": rng.uniform(0, 20_000, self.n).astype(
-                np.float32
-            ),
-            "social_security": rng.uniform(0, 15_000, self.n).astype(
-                np.float32
-            ),
+            "taxable_pension_income": rng.uniform(0, 20_000, self.n).astype(np.float32),
+            "social_security": rng.uniform(0, 15_000, self.n).astype(np.float32),
         }
 
         self.cps_df = _make_cps_df(self.n, rng)
@@ -319,10 +303,7 @@ def _uniform_preds(self, value):
     def _random_preds(self, low, high, seed=99):
         rng = np.random.default_rng(seed)
         return pd.DataFrame(
-            {
-                var: rng.uniform(low, high, self.n)
-                for var in CPS_RETIREMENT_VARIABLES
-            }
+            {var: rng.uniform(low, high, self.n) for var in CPS_RETIREMENT_VARIABLES}
         )
 
     def test_returns_all_retirement_vars(self):
@@ -367,27 +348,23 @@ def test_401k_zero_when_no_wages(self):
             "traditional_401k_contributions",
             "roth_401k_contributions",
         ):
-            assert np.all(
-                result[var][zero_wage] == 0
-            ), f"{var} should be 0 when employment_income is 0"
+            assert np.all(result[var][zero_wage] == 0), (
+                f"{var} should be 0 when employment_income is 0"
+            )
 
     def test_se_pension_zero_when_no_se_income(self):
         result = self._call_with_mocks(self._uniform_preds(5_000.0))
         zero_se = self.puf_imputations["self_employment_income"] == 0
         assert zero_se.sum() == 20
-        assert np.all(
-            result["self_employed_pension_contributions"][zero_se] == 0
-        )
+        assert np.all(result["self_employed_pension_contributions"][zero_se] == 0)
 
     def test_catch_up_age_threshold(self):
         """Records age >= 50 get higher caps than younger."""
-        self.cps_df["age"] = np.concatenate(
-            [np.full(25, 30.0), np.full(25, 55.0)]
-        )
+        self.cps_df["age"] = np.concatenate([np.full(25, 30.0), np.full(25, 55.0)])
         # All have positive income
-        self.puf_imputations["employment_income"] = np.full(
-            self.n, 100_000.0
-        ).astype(np.float32)
+        self.puf_imputations["employment_income"] = np.full(self.n, 100_000.0).astype(
+            np.float32
+        )
 
         lim = _get_retirement_limits(self.time_period)
         val = float(lim["401k"]) + 1000  # 24000
@@ -404,9 +381,7 @@ def test_catch_up_age_threshold(self):
 
     def test_ira_catch_up_threshold(self):
         """IRA catch-up also works for age >= 50."""
-        self.cps_df["age"] = np.concatenate(
-            [np.full(25, 30.0), np.full(25, 55.0)]
-        )
+        self.cps_df["age"] = np.concatenate([np.full(25, 30.0), np.full(25, 55.0)])
         lim = _get_retirement_limits(self.time_period)
         val = float(lim["ira"]) + 500  # 7500
 
@@ -432,9 +407,7 @@ def test_401k_nonzero_for_positive_wages(self):
     def test_se_pension_nonzero_for_positive_se(self):
         result = self._call_with_mocks(self._uniform_preds(5_000.0))
         pos_se = self.puf_imputations["self_employment_income"] > 0
-        assert np.all(
-            result["self_employed_pension_contributions"][pos_se] > 0
-        )
+        assert np.all(result["self_employed_pension_contributions"][pos_se] > 0)
 
     def test_se_pension_capped_at_rate_times_income(self):
         """SE pension should not exceed 25% of SE income."""
@@ -460,9 +433,7 @@ def test_qrf_failure_returns_zeros(self):
 
         # Make a QRF that crashes on fit
         mock_qrf_cls = MagicMock()
-        mock_qrf_cls.return_value.fit.side_effect = RuntimeError(
-            "QRF exploded"
-        )
+        mock_qrf_cls.return_value.fit.side_effect = RuntimeError("QRF exploded")
 
         qrf_mod = sys.modules["microimpute.models.qrf"]
         old_qrf = getattr(qrf_mod, "QRF", None)
@@ -488,9 +459,7 @@ def test_training_data_failure_returns_zeros(self):
         import sys
 
         mock_sim = MagicMock()
-        mock_sim.calculate_dataframe.side_effect = ValueError(
-            "missing variable"
-        )
+        mock_sim.calculate_dataframe.side_effect = ValueError("missing variable")
 
         qrf_mod = sys.modules["microimpute.models.qrf"]
         old_qrf = getattr(qrf_mod, "QRF", None)
@@ -540,9 +509,7 @@ def test_retirement_vars_use_imputed_when_available(self):
         state_fips = np.array([1, 2, 36, 6, 48])
         n = 20
 
-        fake_retirement = {
-            var: np.full(n, 999.0) for var in CPS_RETIREMENT_VARIABLES
-        }
+        fake_retirement = {var: np.full(n, 999.0) for var in CPS_RETIREMENT_VARIABLES}
 
         with (
             patch(
@@ -551,16 +518,14 @@ def test_retirement_vars_use_imputed_when_available(self):
                 return_value=fake_retirement,
             ),
             patch(
-                "policyengine_us_data.calibration.puf_impute"
-                "._run_qrf_imputation",
+                "policyengine_us_data.calibration.puf_impute._run_qrf_imputation",
                 return_value=(
                     {v: np.zeros(n) for v in IMPUTED_VARIABLES},
                     {},
                 ),
             ),
             patch(
-                "policyengine_us_data.calibration.puf_impute"
-                "._impute_weeks_unemployed",
+                "policyengine_us_data.calibration.puf_impute._impute_weeks_unemployed",
                 return_value=np.zeros(n),
             ),
             patch(_MSIM_PATCH),
@@ -585,12 +550,8 @@ def test_cps_half_unchanged_with_imputation(self):
         state_fips = np.array([1, 2, 36, 6, 48])
         n = 20
 
-        originals = {
-            var: data[var][2024].copy() for var in CPS_RETIREMENT_VARIABLES
-        }
-        fake_retirement = {
-            var: np.zeros(n) for var in CPS_RETIREMENT_VARIABLES
-        }
+        originals = {var: data[var][2024].copy() for var in CPS_RETIREMENT_VARIABLES}
+        fake_retirement = {var: np.zeros(n) for var in CPS_RETIREMENT_VARIABLES}
 
         with (
             patch(
@@ -599,16 +560,14 @@ def test_cps_half_unchanged_with_imputation(self):
                 return_value=fake_retirement,
             ),
             patch(
-                "policyengine_us_data.calibration.puf_impute"
-                "._run_qrf_imputation",
+                "policyengine_us_data.calibration.puf_impute._run_qrf_imputation",
                 return_value=(
                     {v: np.zeros(n) for v in IMPUTED_VARIABLES},
                     {},
                 ),
             ),
             patch(
-                "policyengine_us_data.calibration.puf_impute"
-                "._impute_weeks_unemployed",
+                "policyengine_us_data.calibration.puf_impute._impute_weeks_unemployed",
                 return_value=np.zeros(n),
             ),
             patch(_MSIM_PATCH),
@@ -623,9 +582,7 @@ def test_cps_half_unchanged_with_imputation(self):
             )
 
         for var in CPS_RETIREMENT_VARIABLES:
-            np.testing.assert_array_equal(
-                result[var][2024][:n], originals[var]
-            )
+            np.testing.assert_array_equal(result[var][2024][:n], originals[var])
 
     def test_puf_half_gets_zero_retirement_for_zero_imputed(self):
         """When imputation returns zeros, PUF half should be zero."""
@@ -633,9 +590,7 @@ def test_puf_half_gets_zero_retirement_for_zero_imputed(self):
         state_fips = np.array([1, 2, 36, 6, 48])
         n = 20
 
-        fake_retirement = {
-            var: np.zeros(n) for var in CPS_RETIREMENT_VARIABLES
-        }
+        fake_retirement = {var: np.zeros(n) for var in CPS_RETIREMENT_VARIABLES}
 
         with (
             patch(
@@ -644,16 +599,14 @@ def test_puf_half_gets_zero_retirement_for_zero_imputed(self):
                 return_value=fake_retirement,
             ),
             patch(
-                "policyengine_us_data.calibration.puf_impute"
-                "._run_qrf_imputation",
+                "policyengine_us_data.calibration.puf_impute._run_qrf_imputation",
                 return_value=(
                     {v: np.zeros(n) for v in IMPUTED_VARIABLES},
                     {},
                 ),
             ),
             patch(
-                "policyengine_us_data.calibration.puf_impute"
-                "._impute_weeks_unemployed",
+                "policyengine_us_data.calibration.puf_impute._impute_weeks_unemployed",
                 return_value=np.zeros(n),
             ),
             patch(_MSIM_PATCH),
@@ -707,6 +660,6 @@ def test_401k_ira_from_policyengine_us(self):
             ours = _get_retirement_limits(year)
             pe = pe_limits(year)
             for key in ["401k", "401k_catch_up", "ira", "ira_catch_up"]:
-                assert (
-                    ours[key] == pe[key]
-                ), f"Year {year} key {key}: {ours[key]} != {pe[key]}"
+                assert ours[key] == pe[key], (
+                    f"Year {year} key {key}: {ours[key]} != {pe[key]}"
+                )
diff --git a/policyengine_us_data/tests/test_calibration/test_source_impute.py b/policyengine_us_data/tests/test_calibration/test_source_impute.py
index c69ec653a..517a559ef 100644
--- a/policyengine_us_data/tests/test_calibration/test_source_impute.py
+++ b/policyengine_us_data/tests/test_calibration/test_source_impute.py
@@ -71,9 +71,7 @@ def test_scf_variables_defined(self):
 
     def test_all_source_variables_defined(self):
         expected = (
-            ACS_IMPUTED_VARIABLES
-            + SIPP_IMPUTED_VARIABLES
-            + SCF_IMPUTED_VARIABLES
+            ACS_IMPUTED_VARIABLES + SIPP_IMPUTED_VARIABLES + SCF_IMPUTED_VARIABLES
         )
         assert ALL_SOURCE_VARIABLES == expected
 
diff --git a/policyengine_us_data/tests/test_calibration/test_unified_matrix_builder.py b/policyengine_us_data/tests/test_calibration/test_unified_matrix_builder.py
index ea2d49c5c..938f8a92f 100644
--- a/policyengine_us_data/tests/test_calibration/test_unified_matrix_builder.py
+++ b/policyengine_us_data/tests/test_calibration/test_unified_matrix_builder.py
@@ -130,11 +130,7 @@ def _insert_aca_ptc_data(engine):
         ]
         for tid, sid, var, val, period in targets:
             conn.execute(
-                text(
-                    "INSERT INTO targets "
-                    "VALUES (:tid, :sid, :var, :val, "
-                    ":period, 1)"
-                ),
+                text("INSERT INTO targets VALUES (:tid, :sid, :var, :val, :period, 1)"),
                 {
                     "tid": tid,
                     "sid": sid,
@@ -193,9 +189,7 @@ def test_geographic_id_populated(self):
         df = b._query_targets({"domain_variables": ["aca_ptc"]})
         national = df[df["geo_level"] == "national"]
         self.assertTrue((national["geographic_id"] == "US").all())
-        state_ca = df[
-            (df["geo_level"] == "state") & (df["geographic_id"] == "6")
-        ]
+        state_ca = df[(df["geo_level"] == "state") & (df["geographic_id"] == "6")]
         self.assertGreater(len(state_ca), 0)
 
 
@@ -227,9 +221,9 @@ def _get_targets_with_uprating(self, cpi_factor=1.1, pop_factor=1.02):
         }
         df["original_value"] = df["value"].copy()
         df["uprating_factor"] = df.apply(
-            lambda row: b._get_uprating_info(
-                row["variable"], row["period"], factors
-            )[0],
+            lambda row: b._get_uprating_info(row["variable"], row["period"], factors)[
+                0
+            ],
             axis=1,
         )
         df["value"] = df["original_value"] * df["uprating_factor"]
@@ -254,9 +248,7 @@ def test_cd_sums_match_uprated_state(self):
                 & (result["geo_level"] == "district")
                 & (
                     result["geographic_id"].apply(
-                        lambda g, s=sf: (
-                            int(g) // 100 == s if g.isdigit() else False
-                        )
+                        lambda g, s=sf: int(g) // 100 == s if g.isdigit() else False
                     )
                 )
             ]
@@ -290,8 +282,7 @@ def test_hif_is_one_when_cds_sum_to_state(self):
         b, df, factors = self._get_targets_with_uprating(cpi_factor=1.15)
         result = b._apply_hierarchical_uprating(df, ["aca_ptc"], factors)
         cd_aca = result[
-            (result["variable"] == "aca_ptc")
-            & (result["geo_level"] == "district")
+            (result["variable"] == "aca_ptc") & (result["geo_level"] == "district")
         ]
         for _, row in cd_aca.iterrows():
             self.assertAlmostEqual(row["hif"], 1.0, places=6)
diff --git a/policyengine_us_data/tests/test_constraint_validation.py b/policyengine_us_data/tests/test_constraint_validation.py
index 29920475f..e494f5c92 100644
--- a/policyengine_us_data/tests/test_constraint_validation.py
+++ b/policyengine_us_data/tests/test_constraint_validation.py
@@ -138,9 +138,7 @@ def test_conflicting_lower_bounds(self):
             Constraint(variable="age", operation=">", value="20"),
             Constraint(variable="age", operation=">=", value="25"),
         ]
-        with pytest.raises(
-            ConstraintValidationError, match="conflicting lower bounds"
-        ):
+        with pytest.raises(ConstraintValidationError, match="conflicting lower bounds"):
             ensure_consistent_constraint_set(constraints)
 
     def test_conflicting_upper_bounds(self):
@@ -149,9 +147,7 @@ def test_conflicting_upper_bounds(self):
             Constraint(variable="age", operation="<", value="50"),
             Constraint(variable="age", operation="<=", value="45"),
         ]
-        with pytest.raises(
-            ConstraintValidationError, match="conflicting upper bounds"
-        ):
+        with pytest.raises(ConstraintValidationError, match="conflicting upper bounds"):
             ensure_consistent_constraint_set(constraints)
 
 
@@ -193,9 +189,7 @@ class TestNonNumericValues:
     def test_string_equality_valid(self):
         """medicaid_enrolled == 'True' should pass."""
         constraints = [
-            Constraint(
-                variable="medicaid_enrolled", operation="==", value="True"
-            ),
+            Constraint(variable="medicaid_enrolled", operation="==", value="True"),
         ]
         ensure_consistent_constraint_set(constraints)  # No exception
 
diff --git a/policyengine_us_data/tests/test_database.py b/policyengine_us_data/tests/test_database.py
index c9cf14c7c..e0e329e53 100644
--- a/policyengine_us_data/tests/test_database.py
+++ b/policyengine_us_data/tests/test_database.py
@@ -14,7 +14,7 @@
 
 @pytest.fixture
 def engine(tmp_path):
-    db_uri = f"sqlite:///{tmp_path/'test.db'}"
+    db_uri = f"sqlite:///{tmp_path / 'test.db'}"
     return create_database(db_uri)
 
 
diff --git a/policyengine_us_data/tests/test_database_build.py b/policyengine_us_data/tests/test_database_build.py
index 3c0e4fb3f..87a6ce082 100644
--- a/policyengine_us_data/tests/test_database_build.py
+++ b/policyengine_us_data/tests/test_database_build.py
@@ -23,8 +23,7 @@
 # HuggingFace URL for the stratified CPS dataset.
 # ETL scripts use this only to derive the time period (2024).
 HF_DATASET = (
-    "hf://policyengine/policyengine-us-data"
-    "/calibration/stratified_extended_cps.h5"
+    "hf://policyengine/policyengine-us-data/calibration/stratified_extended_cps.h5"
 )
 
 # Scripts run in the same order as `make database` in the Makefile.
@@ -80,9 +79,7 @@ def built_db():
             )
 
     if errors:
-        pytest.fail(
-            f"{len(errors)} ETL script(s) failed:\n" + "\n\n".join(errors)
-        )
+        pytest.fail(f"{len(errors)} ETL script(s) failed:\n" + "\n\n".join(errors))
 
     assert DB_PATH.exists(), "policy_data.db was not created"
     return DB_PATH
@@ -99,9 +96,7 @@ def test_expected_tables_exist(built_db):
     conn = sqlite3.connect(str(built_db))
     tables = {
         row[0]
-        for row in conn.execute(
-            "SELECT name FROM sqlite_master WHERE type='table'"
-        )
+        for row in conn.execute("SELECT name FROM sqlite_master WHERE type='table'")
     }
     conn.close()
 
@@ -126,8 +121,7 @@ def test_national_targets_loaded(built_db):
     variables = {r[0] for r in rows}
     for expected in ["snap", "social_security", "ssi"]:
         assert expected in variables, (
-            f"National target '{expected}' missing. "
-            f"Found: {sorted(variables)}"
+            f"National target '{expected}' missing. Found: {sorted(variables)}"
         )
 
 
@@ -153,8 +147,7 @@ def test_state_income_tax_targets(built_db):
     ca_val = state_totals.get("06") or state_totals.get("6")
     assert ca_val is not None, "California (FIPS 06) target missing"
     assert ca_val > 100e9, (
-        f"California income tax should be > $100B, "
-        f"got ${ca_val / 1e9:.1f}B"
+        f"California income tax should be > $100B, got ${ca_val / 1e9:.1f}B"
     )
 
 
@@ -176,9 +169,7 @@ def test_all_target_variables_exist_in_policyengine(built_db):
     from policyengine_us.system import system
 
     conn = sqlite3.connect(str(built_db))
-    variables = {
-        r[0] for r in conn.execute("SELECT DISTINCT variable FROM targets")
-    }
+    variables = {r[0] for r in conn.execute("SELECT DISTINCT variable FROM targets")}
     conn.close()
 
     missing = [v for v in variables if v not in system.variables]
diff --git a/policyengine_us_data/tests/test_datasets/test_county_fips.py b/policyengine_us_data/tests/test_datasets/test_county_fips.py
index d692cf559..ac2eb9faf 100644
--- a/policyengine_us_data/tests/test_datasets/test_county_fips.py
+++ b/policyengine_us_data/tests/test_datasets/test_county_fips.py
@@ -48,9 +48,7 @@ def mock_upload_to_hf():
 def mock_local_folder():
     """Mock the LOCAL_FOLDER"""
     mock_path = MagicMock()
-    with patch(
-        "policyengine_us_data.geography.county_fips.LOCAL_FOLDER", mock_path
-    ):
+    with patch("policyengine_us_data.geography.county_fips.LOCAL_FOLDER", mock_path):
         yield mock_path
 
 
@@ -104,7 +102,6 @@ def test_download_failure():
         patch("requests.get", return_value=failed_response),
         pytest.raises(ValueError) as excinfo,
     ):
-
         # Run the function, expect ValueError
         generate_county_fips_2020_dataset()
 
@@ -180,6 +177,4 @@ def test_huggingface_upload(mock_upload_to_hf, mock_to_csv, mock_requests_get):
     assert call_kwargs["repo_file_path"] == "county_fips_2020.csv.gz"
 
     # Verify that the first parameter is a BytesIO instance
-    assert isinstance(
-        mock_upload_to_hf.call_args[1]["local_file_path"], BytesIO
-    )
+    assert isinstance(mock_upload_to_hf.call_args[1]["local_file_path"], BytesIO)
diff --git a/policyengine_us_data/tests/test_datasets/test_cps.py b/policyengine_us_data/tests/test_datasets/test_cps.py
index bbfba73bd..f03469393 100644
--- a/policyengine_us_data/tests/test_datasets/test_cps.py
+++ b/policyengine_us_data/tests/test_datasets/test_cps.py
@@ -13,18 +13,11 @@ def test_cps_has_auto_loan_interest():
     RELATIVE_TOLERANCE = 0.4
 
     assert (
-        abs(
-            sim.calculate("auto_loan_interest").sum()
-            / AUTO_LOAN_INTEREST_TARGET
-            - 1
-        )
+        abs(sim.calculate("auto_loan_interest").sum() / AUTO_LOAN_INTEREST_TARGET - 1)
         < RELATIVE_TOLERANCE
     )
     assert (
-        abs(
-            sim.calculate("auto_loan_balance").sum() / AUTO_LOAN_BALANCE_TARGET
-            - 1
-        )
+        abs(sim.calculate("auto_loan_balance").sum() / AUTO_LOAN_BALANCE_TARGET - 1)
         < RELATIVE_TOLERANCE
     )
 
@@ -38,11 +31,7 @@ def test_cps_has_fsla_overtime_premium():
     OVERTIME_PREMIUM_TARGET = 70e9
     RELATIVE_TOLERANCE = 0.2
     assert (
-        abs(
-            sim.calculate("fsla_overtime_premium").sum()
-            / OVERTIME_PREMIUM_TARGET
-            - 1
-        )
+        abs(sim.calculate("fsla_overtime_premium").sum() / OVERTIME_PREMIUM_TARGET - 1)
         < RELATIVE_TOLERANCE
     )
 
diff --git a/policyengine_us_data/tests/test_datasets/test_dataset_sanity.py b/policyengine_us_data/tests/test_datasets/test_dataset_sanity.py
index 4aeb13e6f..4a2d17f58 100644
--- a/policyengine_us_data/tests/test_datasets/test_dataset_sanity.py
+++ b/policyengine_us_data/tests/test_datasets/test_dataset_sanity.py
@@ -41,27 +41,23 @@ def test_ecps_employment_income_positive(ecps_sim):
 
 def test_ecps_self_employment_income_positive(ecps_sim):
     total = ecps_sim.calculate("self_employment_income").sum()
-    assert (
-        total > 50e9
-    ), f"self_employment_income sum is {total:.2e}, expected > 50B."
+    assert total > 50e9, f"self_employment_income sum is {total:.2e}, expected > 50B."
 
 
 def test_ecps_household_count(ecps_sim):
     """Household count should be roughly 130-160M."""
     total_hh = ecps_sim.calculate("household_weight").values.sum()
-    assert (
-        100e6 < total_hh < 200e6
-    ), f"Total households = {total_hh:.2e}, expected 100M-200M."
+    assert 100e6 < total_hh < 200e6, (
+        f"Total households = {total_hh:.2e}, expected 100M-200M."
+    )
 
 
 def test_ecps_person_count(ecps_sim):
     """Weighted person count should be roughly 330M."""
-    total_people = ecps_sim.calculate(
-        "household_weight", map_to="person"
-    ).values.sum()
-    assert (
-        250e6 < total_people < 400e6
-    ), f"Total people = {total_people:.2e}, expected 250M-400M."
+    total_people = ecps_sim.calculate("household_weight", map_to="person").values.sum()
+    assert 250e6 < total_people < 400e6, (
+        f"Total people = {total_people:.2e}, expected 250M-400M."
+    )
 
 
 def test_ecps_poverty_rate_reasonable(ecps_sim):
@@ -85,7 +81,7 @@ def test_ecps_mean_employment_income_reasonable(ecps_sim):
     income = ecps_sim.calculate("employment_income", map_to="person")
     mean = income.mean()
     assert 15_000 < mean < 80_000, (
-        f"Mean employment income = ${mean:,.0f}, " "expected $15k-$80k."
+        f"Mean employment income = ${mean:,.0f}, expected $15k-$80k."
     )
 
 
@@ -94,9 +90,7 @@ def test_ecps_mean_employment_income_reasonable(ecps_sim):
 
 def test_cps_employment_income_positive(cps_sim):
     total = cps_sim.calculate("employment_income").sum()
-    assert total > 5e12, (
-        f"CPS employment_income sum is {total:.2e}, " "expected > 5T."
-    )
+    assert total > 5e12, f"CPS employment_income sum is {total:.2e}, expected > 5T."
 
 
 def test_cps_household_count(cps_sim):
@@ -122,24 +116,20 @@ def sparse_sim():
 def test_sparse_employment_income_positive(sparse_sim):
     """Sparse dataset employment income must be in the trillions."""
     total = sparse_sim.calculate("employment_income").sum()
-    assert (
-        total > 5e12
-    ), f"Sparse employment_income sum is {total:.2e}, expected > 5T."
+    assert total > 5e12, f"Sparse employment_income sum is {total:.2e}, expected > 5T."
 
 
 def test_sparse_household_count(sparse_sim):
     total_hh = sparse_sim.calculate("household_weight").values.sum()
-    assert (
-        100e6 < total_hh < 200e6
-    ), f"Sparse total households = {total_hh:.2e}, expected 100M-200M."
+    assert 100e6 < total_hh < 200e6, (
+        f"Sparse total households = {total_hh:.2e}, expected 100M-200M."
+    )
 
 
 def test_sparse_poverty_rate_reasonable(sparse_sim):
     in_poverty = sparse_sim.calculate("person_in_poverty", map_to="person")
     rate = in_poverty.mean()
-    assert (
-        0.05 < rate < 0.25
-    ), f"Sparse poverty rate = {rate:.1%}, expected 5-25%."
+    assert 0.05 < rate < 0.25, f"Sparse poverty rate = {rate:.1%}, expected 5-25%."
 
 
 # ── File size checks ───────────────────────────────────────────
@@ -153,6 +143,6 @@ def test_ecps_file_size():
     if not path.exists():
         pytest.skip("enhanced_cps_2024.h5 not found")
     size_mb = path.stat().st_size / (1024 * 1024)
-    assert (
-        size_mb > 100
-    ), f"enhanced_cps_2024.h5 is only {size_mb:.1f}MB, expected >100MB"
+    assert size_mb > 100, (
+        f"enhanced_cps_2024.h5 is only {size_mb:.1f}MB, expected >100MB"
+    )
diff --git a/policyengine_us_data/tests/test_datasets/test_enhanced_cps.py b/policyengine_us_data/tests/test_datasets/test_enhanced_cps.py
index b3edbc9e3..298de5a4a 100644
--- a/policyengine_us_data/tests/test_datasets/test_enhanced_cps.py
+++ b/policyengine_us_data/tests/test_datasets/test_enhanced_cps.py
@@ -50,10 +50,10 @@ def test_ecps_replicates_jct_tax_expenditures():
         & (calibration_log["epoch"] == calibration_log["epoch"].max())
     ]
 
-    assert (
-        jct_rows.rel_abs_error.max() < 0.5
-    ), "JCT tax expenditure targets not met (see the calibration log for details). Max relative error: {:.2%}".format(
-        jct_rows.rel_abs_error.max()
+    assert jct_rows.rel_abs_error.max() < 0.5, (
+        "JCT tax expenditure targets not met (see the calibration log for details). Max relative error: {:.2%}".format(
+            jct_rows.rel_abs_error.max()
+        )
     )
 
 
@@ -71,9 +71,7 @@ def deprecated_test_ecps_replicates_jct_tax_expenditures_full():
     }
 
     baseline = Microsimulation(dataset=EnhancedCPS_2024)
-    income_tax_b = baseline.calculate(
-        "income_tax", period=2024, map_to="household"
-    )
+    income_tax_b = baseline.calculate("income_tax", period=2024, map_to="household")
 
     for deduction, target in EXPENDITURE_TARGETS.items():
         # Create reform that neutralizes the deduction
@@ -82,12 +80,8 @@ def apply(self):
                 self.neutralize_variable(deduction)
 
         # Run reform simulation
-        reformed = Microsimulation(
-            reform=RepealDeduction, dataset=EnhancedCPS_2024
-        )
-        income_tax_r = reformed.calculate(
-            "income_tax", period=2024, map_to="household"
-        )
+        reformed = Microsimulation(reform=RepealDeduction, dataset=EnhancedCPS_2024)
+        income_tax_r = reformed.calculate("income_tax", period=2024, map_to="household")
 
         # Calculate tax expenditure
         tax_expenditure = (income_tax_r - income_tax_b).sum()
@@ -95,7 +89,7 @@ def apply(self):
         TOLERANCE = 0.4
 
         print(
-            f"{deduction} tax expenditure {tax_expenditure/1e9:.1f}bn differs from target {target/1e9:.1f}bn by {pct_error:.2%}"
+            f"{deduction} tax expenditure {tax_expenditure / 1e9:.1f}bn differs from target {target / 1e9:.1f}bn by {pct_error:.2%}"
         )
         assert pct_error < TOLERANCE, deduction
 
@@ -137,9 +131,9 @@ def test_undocumented_matches_ssn_none():
 
     # 1. Per-person equivalence
     mismatches = np.where(ssn_type_none_mask != undocumented_mask)[0]
-    assert (
-        mismatches.size == 0
-    ), f"{mismatches.size} mismatches between 'NONE' SSN and 'UNDOCUMENTED' status"
+    assert mismatches.size == 0, (
+        f"{mismatches.size} mismatches between 'NONE' SSN and 'UNDOCUMENTED' status"
+    )
 
     # 2. Optional aggregate sanity-check
     count = undocumented_mask.sum()
@@ -164,9 +158,7 @@ def test_aca_calibration():
     # Monthly to yearly
     targets["spending"] = targets["spending"] * 12
     # Adjust to match national target
-    targets["spending"] = targets["spending"] * (
-        98e9 / targets["spending"].sum()
-    )
+    targets["spending"] = targets["spending"] * (98e9 / targets["spending"].sum())
 
     sim = Microsimulation(dataset=EnhancedCPS_2024)
     state_code_hh = sim.calculate("state_code", map_to="household").values
@@ -181,17 +173,15 @@ def test_aca_calibration():
 
         pct_error = abs(simulated - target_spending) / target_spending
         print(
-            f"{state}: simulated ${simulated/1e9:.2f} bn  "
-            f"target ${target_spending/1e9:.2f} bn  "
+            f"{state}: simulated ${simulated / 1e9:.2f} bn  "
+            f"target ${target_spending / 1e9:.2f} bn  "
             f"error {pct_error:.2%}"
         )
 
         if pct_error > TOLERANCE:
             failed = True
 
-    assert (
-        not failed
-    ), f"One or more states exceeded tolerance of {TOLERANCE:.0%}."
+    assert not failed, f"One or more states exceeded tolerance of {TOLERANCE:.0%}."
 
 
 def test_immigration_status_diversity():
@@ -227,20 +217,18 @@ def test_immigration_status_diversity():
     )
 
     # Also check that we have a reasonable percentage of citizens (should be 85-90%)
-    assert (
-        80 < citizen_pct < 95
-    ), f"Citizen percentage ({citizen_pct:.1f}%) outside expected range (80-95%)"
+    assert 80 < citizen_pct < 95, (
+        f"Citizen percentage ({citizen_pct:.1f}%) outside expected range (80-95%)"
+    )
 
     # Check that we have some non-citizens
     non_citizen_pct = 100 - citizen_pct
-    assert (
-        non_citizen_pct > 5
-    ), f"Too few non-citizens ({non_citizen_pct:.1f}%) - expected at least 5%"
-
-    print(
-        f"Immigration status diversity test passed: {citizen_pct:.1f}% citizens"
+    assert non_citizen_pct > 5, (
+        f"Too few non-citizens ({non_citizen_pct:.1f}%) - expected at least 5%"
     )
 
+    print(f"Immigration status diversity test passed: {citizen_pct:.1f}% citizens")
+
 
 def test_medicaid_calibration():
 
@@ -269,14 +257,12 @@ def test_medicaid_calibration():
 
         pct_error = abs(simulated - target_enrollment) / target_enrollment
         print(
-            f"{state}: simulated ${simulated/1e9:.2f} bn  "
-            f"target ${target_enrollment/1e9:.2f} bn  "
+            f"{state}: simulated ${simulated / 1e9:.2f} bn  "
+            f"target ${target_enrollment / 1e9:.2f} bn  "
             f"error {pct_error:.2%}"
         )
 
         if pct_error > TOLERANCE:
             failed = True
 
-    assert (
-        not failed
-    ), f"One or more states exceeded tolerance of {TOLERANCE:.0%}."
+    assert not failed, f"One or more states exceeded tolerance of {TOLERANCE:.0%}."
diff --git a/policyengine_us_data/tests/test_datasets/test_sipp_assets.py b/policyengine_us_data/tests/test_datasets/test_sipp_assets.py
index c8780d847..a79b4bce6 100644
--- a/policyengine_us_data/tests/test_datasets/test_sipp_assets.py
+++ b/policyengine_us_data/tests/test_datasets/test_sipp_assets.py
@@ -59,12 +59,12 @@ def test_ecps_has_liquid_assets():
     MAXIMUM_TOTAL = 30e12  # $30 trillion ceiling
 
     assert total > MINIMUM_TOTAL, (
-        f"Total liquid assets ${total/1e12:.1f}T below "
-        f"minimum ${MINIMUM_TOTAL/1e12:.0f}T"
+        f"Total liquid assets ${total / 1e12:.1f}T below "
+        f"minimum ${MINIMUM_TOTAL / 1e12:.0f}T"
     )
     assert total < MAXIMUM_TOTAL, (
-        f"Total liquid assets ${total/1e12:.1f}T above "
-        f"maximum ${MAXIMUM_TOTAL/1e12:.0f}T"
+        f"Total liquid assets ${total / 1e12:.1f}T above "
+        f"maximum ${MAXIMUM_TOTAL / 1e12:.0f}T"
     )
 
 
@@ -102,12 +102,10 @@ def test_liquid_assets_distribution():
     MEDIAN_MAX = 20_000
 
     assert weighted_median > MEDIAN_MIN, (
-        f"Median liquid assets ${weighted_median:,.0f} below "
-        f"minimum ${MEDIAN_MIN:,}"
+        f"Median liquid assets ${weighted_median:,.0f} below minimum ${MEDIAN_MIN:,}"
     )
     assert weighted_median < MEDIAN_MAX, (
-        f"Median liquid assets ${weighted_median:,.0f} above "
-        f"maximum ${MEDIAN_MAX:,}"
+        f"Median liquid assets ${weighted_median:,.0f} above maximum ${MEDIAN_MAX:,}"
     )
 
 
@@ -129,9 +127,7 @@ def test_asset_categories_exist():
     assert bonds >= 0, "Bond assets should be non-negative"
 
     # Bank accounts typically largest category of liquid assets
-    assert (
-        bank > stocks * 0.3
-    ), "Bank accounts should be substantial relative to stocks"
+    assert bank > stocks * 0.3, "Bank accounts should be substantial relative to stocks"
 
 
 def test_low_asset_households():
@@ -158,10 +154,8 @@ def test_low_asset_households():
     MAX_PCT = 0.70
 
     assert below_2k > MIN_PCT, (
-        f"Only {below_2k:.1%} have <$2k liquid assets, "
-        f"expected at least {MIN_PCT:.0%}"
+        f"Only {below_2k:.1%} have <$2k liquid assets, expected at least {MIN_PCT:.0%}"
     )
     assert below_2k < MAX_PCT, (
-        f"{below_2k:.1%} have <$2k liquid assets, "
-        f"expected at most {MAX_PCT:.0%}"
+        f"{below_2k:.1%} have <$2k liquid assets, expected at most {MAX_PCT:.0%}"
     )
diff --git a/policyengine_us_data/tests/test_datasets/test_small_enhanced_cps.py b/policyengine_us_data/tests/test_datasets/test_small_enhanced_cps.py
index 23b7b2dcb..9316d3909 100644
--- a/policyengine_us_data/tests/test_datasets/test_small_enhanced_cps.py
+++ b/policyengine_us_data/tests/test_datasets/test_small_enhanced_cps.py
@@ -19,12 +19,10 @@ def test_small_ecps_loads(year: int):
 
     # Employment income should be positive (not zero from missing vars)
     emp_income = sim.calculate("employment_income", 2025).sum()
-    assert (
-        emp_income > 0
-    ), f"Small ECPS employment_income sum is {emp_income}, expected > 0."
+    assert emp_income > 0, (
+        f"Small ECPS employment_income sum is {emp_income}, expected > 0."
+    )
 
     # Should have a reasonable number of households
     hh_count = len(sim.calculate("household_net_income", 2025))
-    assert (
-        hh_count > 100
-    ), f"Small ECPS has only {hh_count} households, expected > 100."
+    assert hh_count > 100, f"Small ECPS has only {hh_count} households, expected > 100."
diff --git a/policyengine_us_data/tests/test_datasets/test_sparse_enhanced_cps.py b/policyengine_us_data/tests/test_datasets/test_sparse_enhanced_cps.py
index 6a690f0cc..a7ee941bb 100644
--- a/policyengine_us_data/tests/test_datasets/test_sparse_enhanced_cps.py
+++ b/policyengine_us_data/tests/test_datasets/test_sparse_enhanced_cps.py
@@ -115,10 +115,10 @@ def test_sparse_ecps_replicates_jct_tax_expenditures():
         & (calibration_log["epoch"] == calibration_log["epoch"].max())
     ]
 
-    assert (
-        jct_rows.rel_abs_error.max() < 0.5
-    ), "JCT tax expenditure targets not met (see the calibration log for details). Max relative error: {:.2%}".format(
-        jct_rows.rel_abs_error.max()
+    assert jct_rows.rel_abs_error.max() < 0.5, (
+        "JCT tax expenditure targets not met (see the calibration log for details). Max relative error: {:.2%}".format(
+            jct_rows.rel_abs_error.max()
+        )
     )
 
 
@@ -133,9 +133,7 @@ def deprecated_test_sparse_ecps_replicates_jct_tax_expenditures_full(sim):
     }
 
     baseline = sim
-    income_tax_b = baseline.calculate(
-        "income_tax", period=2024, map_to="household"
-    )
+    income_tax_b = baseline.calculate("income_tax", period=2024, map_to="household")
 
     for deduction, target in EXPENDITURE_TARGETS.items():
         # Create reform that neutralizes the deduction
@@ -145,9 +143,7 @@ def apply(self):
 
         # Run reform simulation
         reformed = Microsimulation(reform=RepealDeduction, dataset=sim.dataset)
-        income_tax_r = reformed.calculate(
-            "income_tax", period=2024, map_to="household"
-        )
+        income_tax_r = reformed.calculate("income_tax", period=2024, map_to="household")
 
         # Calculate tax expenditure
         tax_expenditure = (income_tax_r - income_tax_b).sum()
@@ -155,8 +151,8 @@ def apply(self):
         TOLERANCE = 0.4
 
         logging.info(
-            f"{deduction} tax expenditure {tax_expenditure/1e9:.1f}bn "
-            f"differs from target {target/1e9:.1f}bn by {pct_error:.2%}"
+            f"{deduction} tax expenditure {tax_expenditure / 1e9:.1f}bn "
+            f"differs from target {target / 1e9:.1f}bn by {pct_error:.2%}"
         )
         assert pct_error < TOLERANCE, deduction
 
@@ -188,9 +184,7 @@ def test_sparse_aca_calibration(sim):
     # Monthly to yearly
     targets["spending"] = targets["spending"] * 12
     # Adjust to match national target
-    targets["spending"] = targets["spending"] * (
-        98e9 / targets["spending"].sum()
-    )
+    targets["spending"] = targets["spending"] * (98e9 / targets["spending"].sum())
 
     state_code_hh = sim.calculate("state_code", map_to="household").values
     aca_ptc = sim.calculate("aca_ptc", map_to="household", period=2025)
@@ -204,17 +198,15 @@ def test_sparse_aca_calibration(sim):
 
         pct_error = abs(simulated - target_spending) / target_spending
         logging.info(
-            f"{state}: simulated ${simulated/1e9:.2f} bn  "
-            f"target ${target_spending/1e9:.2f} bn  "
+            f"{state}: simulated ${simulated / 1e9:.2f} bn  "
+            f"target ${target_spending / 1e9:.2f} bn  "
             f"error {pct_error:.2%}"
         )
 
         if pct_error > TOLERANCE:
             failed = True
 
-    assert (
-        not failed
-    ), f"One or more states exceeded tolerance of {TOLERANCE:.0%}."
+    assert not failed, f"One or more states exceeded tolerance of {TOLERANCE:.0%}."
 
 
 def test_sparse_medicaid_calibration(sim):
@@ -238,14 +230,12 @@ def test_sparse_medicaid_calibration(sim):
 
         pct_error = abs(simulated - target_enrollment) / target_enrollment
         logging.info(
-            f"{state}: simulated ${simulated/1e9:.2f} bn  "
-            f"target ${target_enrollment/1e9:.2f} bn  "
+            f"{state}: simulated ${simulated / 1e9:.2f} bn  "
+            f"target ${target_enrollment / 1e9:.2f} bn  "
             f"error {pct_error:.2%}"
         )
 
         if pct_error > TOLERANCE:
             failed = True
 
-    assert (
-        not failed
-    ), f"One or more states exceeded tolerance of {TOLERANCE:.0%}."
+    assert not failed, f"One or more states exceeded tolerance of {TOLERANCE:.0%}."
diff --git a/policyengine_us_data/tests/test_local_area_calibration/create_test_fixture.py b/policyengine_us_data/tests/test_local_area_calibration/create_test_fixture.py
index 00334734d..2fadeeeb9 100644
--- a/policyengine_us_data/tests/test_local_area_calibration/create_test_fixture.py
+++ b/policyengine_us_data/tests/test_local_area_calibration/create_test_fixture.py
@@ -30,9 +30,7 @@ def create_test_fixture():
 
     # Household-level arrays
     household_ids = np.arange(N_HOUSEHOLDS, dtype=np.int32)
-    household_weights = np.random.uniform(500, 3000, N_HOUSEHOLDS).astype(
-        np.float32
-    )
+    household_weights = np.random.uniform(500, 3000, N_HOUSEHOLDS).astype(np.float32)
 
     # Assign households to states (use NC=37 and AK=2 for testing)
     # 40 households in NC, 10 in AK
@@ -102,18 +100,14 @@ def create_test_fixture():
         f["household_id"].create_dataset(TIME_PERIOD, data=household_ids)
 
         f.create_group("household_weight")
-        f["household_weight"].create_dataset(
-            TIME_PERIOD, data=household_weights
-        )
+        f["household_weight"].create_dataset(TIME_PERIOD, data=household_weights)
 
         # Person variables
         f.create_group("person_id")
         f["person_id"].create_dataset(TIME_PERIOD, data=person_ids)
 
         f.create_group("person_household_id")
-        f["person_household_id"].create_dataset(
-            TIME_PERIOD, data=person_household_ids
-        )
+        f["person_household_id"].create_dataset(TIME_PERIOD, data=person_household_ids)
 
         f.create_group("person_weight")
         f["person_weight"].create_dataset(TIME_PERIOD, data=person_weights)
@@ -122,18 +116,14 @@ def create_test_fixture():
         f["age"].create_dataset(TIME_PERIOD, data=ages)
 
         f.create_group("employment_income")
-        f["employment_income"].create_dataset(
-            TIME_PERIOD, data=employment_income
-        )
+        f["employment_income"].create_dataset(TIME_PERIOD, data=employment_income)
 
         # Tax unit
         f.create_group("tax_unit_id")
         f["tax_unit_id"].create_dataset(TIME_PERIOD, data=tax_unit_ids)
 
         f.create_group("person_tax_unit_id")
-        f["person_tax_unit_id"].create_dataset(
-            TIME_PERIOD, data=person_tax_unit_ids
-        )
+        f["person_tax_unit_id"].create_dataset(TIME_PERIOD, data=person_tax_unit_ids)
 
         f.create_group("tax_unit_weight")
         f["tax_unit_weight"].create_dataset(TIME_PERIOD, data=tax_unit_weights)
@@ -143,9 +133,7 @@ def create_test_fixture():
         f["spm_unit_id"].create_dataset(TIME_PERIOD, data=spm_unit_ids)
 
         f.create_group("person_spm_unit_id")
-        f["person_spm_unit_id"].create_dataset(
-            TIME_PERIOD, data=person_spm_unit_ids
-        )
+        f["person_spm_unit_id"].create_dataset(TIME_PERIOD, data=person_spm_unit_ids)
 
         f.create_group("spm_unit_weight")
         f["spm_unit_weight"].create_dataset(TIME_PERIOD, data=spm_unit_weights)
@@ -155,9 +143,7 @@ def create_test_fixture():
         f["family_id"].create_dataset(TIME_PERIOD, data=family_ids)
 
         f.create_group("person_family_id")
-        f["person_family_id"].create_dataset(
-            TIME_PERIOD, data=person_family_ids
-        )
+        f["person_family_id"].create_dataset(TIME_PERIOD, data=person_family_ids)
 
         f.create_group("family_weight")
         f["family_weight"].create_dataset(TIME_PERIOD, data=family_weights)
@@ -172,9 +158,7 @@ def create_test_fixture():
         )
 
         f.create_group("marital_unit_weight")
-        f["marital_unit_weight"].create_dataset(
-            TIME_PERIOD, data=marital_unit_weights
-        )
+        f["marital_unit_weight"].create_dataset(TIME_PERIOD, data=marital_unit_weights)
 
         # Geography (household level)
         f.create_group("state_fips")
diff --git a/policyengine_us_data/tests/test_local_area_calibration/test_county_assignment.py b/policyengine_us_data/tests/test_local_area_calibration/test_county_assignment.py
index 158e0ca68..e20c1797a 100644
--- a/policyengine_us_data/tests/test_local_area_calibration/test_county_assignment.py
+++ b/policyengine_us_data/tests/test_local_area_calibration/test_county_assignment.py
@@ -47,9 +47,7 @@ def test_ny_cd_gets_ny_counties(self):
         for idx in result:
             county_name = County._member_names_[idx]
             # Should end with _NY
-            assert county_name.endswith(
-                "_NY"
-            ), f"Got non-NY county: {county_name}"
+            assert county_name.endswith("_NY"), f"Got non-NY county: {county_name}"
 
     def test_ca_cd_gets_ca_counties(self):
         """Verify CA CDs get CA counties."""
@@ -58,9 +56,7 @@ def test_ca_cd_gets_ca_counties(self):
 
         for idx in result:
             county_name = County._member_names_[idx]
-            assert county_name.endswith(
-                "_CA"
-            ), f"Got non-CA county: {county_name}"
+            assert county_name.endswith("_CA"), f"Got non-CA county: {county_name}"
 
 
 class TestCountyIndex:
diff --git a/policyengine_us_data/tests/test_local_area_calibration/test_stacked_dataset_builder.py b/policyengine_us_data/tests/test_local_area_calibration/test_stacked_dataset_builder.py
index 2900eec19..4d2d7c74e 100644
--- a/policyengine_us_data/tests/test_local_area_calibration/test_stacked_dataset_builder.py
+++ b/policyengine_us_data/tests/test_local_area_calibration/test_stacked_dataset_builder.py
@@ -141,17 +141,15 @@ def test_counties_match_state(self, stacked_result):
             state_fips = row["state_fips"]
 
             if state_fips == 37:
-                assert county.endswith(
-                    "_NC"
-                ), f"NC county should end with _NC: {county}"
+                assert county.endswith("_NC"), (
+                    f"NC county should end with _NC: {county}"
+                )
             elif state_fips == 2:
-                assert county.endswith(
-                    "_AK"
-                ), f"AK county should end with _AK: {county}"
+                assert county.endswith("_AK"), (
+                    f"AK county should end with _AK: {county}"
+                )
 
-    def test_household_count_matches_weights(
-        self, stacked_result, test_weights
-    ):
+    def test_household_count_matches_weights(self, stacked_result, test_weights):
         """Number of output households should match non-zero weights."""
         hh_df = stacked_result["hh_df"]
         expected_households = (test_weights > 0).sum()
@@ -205,40 +203,30 @@ class TestEntityReindexing:
     def test_family_ids_are_unique(self, stacked_sim):
         """Family IDs should be globally unique across all CDs."""
         family_ids = stacked_sim.calculate("family_id", map_to="family").values
-        assert len(family_ids) == len(
-            set(family_ids)
-        ), "Family IDs should be unique"
+        assert len(family_ids) == len(set(family_ids)), "Family IDs should be unique"
 
     def test_tax_unit_ids_are_unique(self, stacked_sim):
         """Tax unit IDs should be globally unique."""
-        tax_unit_ids = stacked_sim.calculate(
-            "tax_unit_id", map_to="tax_unit"
-        ).values
-        assert len(tax_unit_ids) == len(
-            set(tax_unit_ids)
-        ), "Tax unit IDs should be unique"
+        tax_unit_ids = stacked_sim.calculate("tax_unit_id", map_to="tax_unit").values
+        assert len(tax_unit_ids) == len(set(tax_unit_ids)), (
+            "Tax unit IDs should be unique"
+        )
 
     def test_spm_unit_ids_are_unique(self, stacked_sim):
         """SPM unit IDs should be globally unique."""
-        spm_unit_ids = stacked_sim.calculate(
-            "spm_unit_id", map_to="spm_unit"
-        ).values
-        assert len(spm_unit_ids) == len(
-            set(spm_unit_ids)
-        ), "SPM unit IDs should be unique"
+        spm_unit_ids = stacked_sim.calculate("spm_unit_id", map_to="spm_unit").values
+        assert len(spm_unit_ids) == len(set(spm_unit_ids)), (
+            "SPM unit IDs should be unique"
+        )
 
     def test_person_family_id_matches_family_id(self, stacked_sim):
         """person_family_id should reference valid family_ids."""
         person_family_ids = stacked_sim.calculate(
             "person_family_id", map_to="person"
         ).values
-        family_ids = set(
-            stacked_sim.calculate("family_id", map_to="family").values
-        )
+        family_ids = set(stacked_sim.calculate("family_id", map_to="family").values)
         for pf_id in person_family_ids:
-            assert (
-                pf_id in family_ids
-            ), f"person_family_id {pf_id} not in family_ids"
+            assert pf_id in family_ids, f"person_family_id {pf_id} not in family_ids"
 
     def test_family_ids_unique_across_cds(self, stacked_sim_with_overlap):
         """Same household in different CDs should have different family_ids."""
@@ -247,9 +235,7 @@ def test_family_ids_unique_across_cds(self, stacked_sim_with_overlap):
         n_cds = len(TEST_CDS)
 
         family_ids = sim.calculate("family_id", map_to="family").values
-        household_ids = sim.calculate(
-            "household_id", map_to="household"
-        ).values
+        household_ids = sim.calculate("household_id", map_to="household").values
 
         # Should have n_overlap * n_cds unique families (one per HH-CD pair)
         expected_families = n_overlap * n_cds
diff --git a/policyengine_us_data/tests/test_puf_impute.py b/policyengine_us_data/tests/test_puf_impute.py
index fcdcf763f..d968fb16d 100644
--- a/policyengine_us_data/tests/test_puf_impute.py
+++ b/policyengine_us_data/tests/test_puf_impute.py
@@ -57,9 +57,7 @@ def _make_data(
     if age is not None:
         data["age"] = {tp: np.concatenate([age, age]).astype(np.float32)}
     if is_male is not None:
-        data["is_male"] = {
-            tp: np.concatenate([is_male, is_male]).astype(np.float32)
-        }
+        data["is_male"] = {tp: np.concatenate([is_male, is_male]).astype(np.float32)}
     return data, n, tp
 
 
diff --git a/policyengine_us_data/tests/test_schema_views_and_lookups.py b/policyengine_us_data/tests/test_schema_views_and_lookups.py
index 14521a214..8d99615cf 100644
--- a/policyengine_us_data/tests/test_schema_views_and_lookups.py
+++ b/policyengine_us_data/tests/test_schema_views_and_lookups.py
@@ -227,9 +227,7 @@ def _query_stratum_domain(self):
         from sqlalchemy import text
 
         with self.engine.connect() as conn:
-            rows = conn.execute(
-                text("SELECT * FROM stratum_domain")
-            ).fetchall()
+            rows = conn.execute(text("SELECT * FROM stratum_domain")).fetchall()
         return rows
 
     def test_geographic_stratum_excluded(self):
@@ -246,7 +244,7 @@ def test_geographic_stratum_excluded(self):
         domain_stratum_ids = {r[0] for r in rows}
         self.assertTrue(
             domain_stratum_ids.isdisjoint(geo_ids),
-            "Geographic strata should not appear in " "stratum_domain",
+            "Geographic strata should not appear in stratum_domain",
         )
 
     def test_single_domain_variable(self):
@@ -280,7 +278,7 @@ def test_geographic_constraints_filtered(self):
         }
         self.assertTrue(
             all_domain_vars.isdisjoint(excluded),
-            f"Found excluded vars: " f"{all_domain_vars & excluded}",
+            f"Found excluded vars: {all_domain_vars & excluded}",
         )
 
     # ----------------------------------------------------------------
@@ -291,18 +289,14 @@ def _query_target_overview(self):
         from sqlalchemy import text
 
         with self.engine.connect() as conn:
-            rows = conn.execute(
-                text("SELECT * FROM target_overview")
-            ).fetchall()
+            rows = conn.execute(text("SELECT * FROM target_overview")).fetchall()
         return rows
 
     def _overview_columns(self):
         from sqlalchemy import text
 
         with self.engine.connect() as conn:
-            cursor = conn.execute(
-                text("SELECT * FROM target_overview LIMIT 0")
-            )
+            cursor = conn.execute(text("SELECT * FROM target_overview LIMIT 0"))
             return [desc[0] for desc in cursor.cursor.description]
 
     def test_national_geo_level(self):
diff --git a/policyengine_us_data/tests/test_stochastic_variables.py b/policyengine_us_data/tests/test_stochastic_variables.py
index 172260784..b9ab13466 100644
--- a/policyengine_us_data/tests/test_stochastic_variables.py
+++ b/policyengine_us_data/tests/test_stochastic_variables.py
@@ -10,7 +10,6 @@
 
 
 class TestTakeUpRateParameters:
-
     def test_eitc_rate_loads(self):
         rates = load_take_up_rate("eitc", 2022)
         assert isinstance(rates, dict)
@@ -52,7 +51,6 @@ def test_ssi_takeup_rate_loads(self):
 
 
 class TestStableStringHash:
-
     def test_deterministic(self):
         h1 = _stable_string_hash("takes_up_snap_if_eligible")
         h2 = _stable_string_hash("takes_up_snap_if_eligible")
@@ -69,7 +67,6 @@ def test_returns_uint64(self):
 
 
 class TestSeededRng:
-
     def test_same_name_same_results(self):
         rng1 = seeded_rng("takes_up_snap_if_eligible")
         result1 = rng1.random(1000)
@@ -103,7 +100,6 @@ def test_order_independence(self):
 
 
 class TestTakeUpProportions:
-
     def test_take_up_produces_expected_proportion(self):
         rate = 0.7
         n = 10_000
diff --git a/policyengine_us_data/utils/census.py b/policyengine_us_data/utils/census.py
index c61cc166d..422d750c3 100644
--- a/policyengine_us_data/utils/census.py
+++ b/policyengine_us_data/utils/census.py
@@ -139,9 +139,7 @@
 
 
 def get_census_docs(year):
-    docs_url = (
-        f"https://api.census.gov/data/{year}/acs/acs1/subject/variables.json"
-    )
+    docs_url = f"https://api.census.gov/data/{year}/acs/acs1/subject/variables.json"
     cache_file = f"census_docs_{year}.json"
     if is_cached(cache_file):
         logger.info(f"Using cached {cache_file}")
diff --git a/policyengine_us_data/utils/constraint_validation.py b/policyengine_us_data/utils/constraint_validation.py
index c3e512c79..f533739cb 100644
--- a/policyengine_us_data/utils/constraint_validation.py
+++ b/policyengine_us_data/utils/constraint_validation.py
@@ -101,21 +101,17 @@ def _check_operation_compatibility(var_name: str, operations: set) -> None:
     # Cannot have both > and >= (conflicting lower bounds)
     if ">" in operations and ">=" in operations:
         raise ConstraintValidationError(
-            f"{var_name}: cannot have both '>' and '>=' "
-            "(conflicting lower bounds)"
+            f"{var_name}: cannot have both '>' and '>=' (conflicting lower bounds)"
         )
 
     # Cannot have both < and <= (conflicting upper bounds)
     if "<" in operations and "<=" in operations:
         raise ConstraintValidationError(
-            f"{var_name}: cannot have both '<' and '<=' "
-            "(conflicting upper bounds)"
+            f"{var_name}: cannot have both '<' and '<=' (conflicting upper bounds)"
         )
 
 
-def _check_range_validity(
-    var_name: str, constraints: List[Constraint]
-) -> None:
+def _check_range_validity(var_name: str, constraints: List[Constraint]) -> None:
     """Check that range constraints don't create an empty range."""
     lower_bound = float("-inf")
     upper_bound = float("inf")
@@ -130,9 +126,7 @@ def _check_range_validity(
             continue
 
         if c.operation == ">":
-            if val > lower_bound or (
-                val == lower_bound and not lower_inclusive
-            ):
+            if val > lower_bound or (val == lower_bound and not lower_inclusive):
                 lower_bound = val
                 lower_inclusive = False
         elif c.operation == ">=":
@@ -140,9 +134,7 @@ def _check_range_validity(
                 lower_bound = val
                 lower_inclusive = True
         elif c.operation == "<":
-            if val < upper_bound or (
-                val == upper_bound and not upper_inclusive
-            ):
+            if val < upper_bound or (val == upper_bound and not upper_inclusive):
                 upper_bound = val
                 upper_inclusive = False
         elif c.operation == "<=":
@@ -156,9 +148,7 @@ def _check_range_validity(
             f"{var_name}: empty range - lower bound {lower_bound} > "
             f"upper bound {upper_bound}"
         )
-    if lower_bound == upper_bound and not (
-        lower_inclusive and upper_inclusive
-    ):
+    if lower_bound == upper_bound and not (lower_inclusive and upper_inclusive):
         raise ConstraintValidationError(
             f"{var_name}: empty range - bounds equal at {lower_bound} "
             "but not both inclusive"
diff --git a/policyengine_us_data/utils/data_upload.py b/policyengine_us_data/utils/data_upload.py
index 42cd8feee..7b7481b3e 100644
--- a/policyengine_us_data/utils/data_upload.py
+++ b/policyengine_us_data/utils/data_upload.py
@@ -116,18 +116,14 @@ def upload_files_to_gcs(
     Upload files to Google Cloud Storage and set metadata with the version.
     """
     credentials, project_id = google.auth.default()
-    storage_client = storage.Client(
-        credentials=credentials, project=project_id
-    )
+    storage_client = storage.Client(credentials=credentials, project=project_id)
     bucket = storage_client.bucket(gcs_bucket_name)
 
     for file_path in files:
         file_path = Path(file_path)
         blob = bucket.blob(file_path.name)
         blob.upload_from_filename(file_path)
-        logging.info(
-            f"Uploaded {file_path.name} to GCS bucket {gcs_bucket_name}."
-        )
+        logging.info(f"Uploaded {file_path.name} to GCS bucket {gcs_bucket_name}.")
 
         # Set metadata
         blob.metadata = {"version": version}
@@ -164,9 +160,7 @@ def upload_local_area_file(
 
     # Upload to GCS with subdirectory
     credentials, project_id = google.auth.default()
-    storage_client = storage.Client(
-        credentials=credentials, project=project_id
-    )
+    storage_client = storage.Client(credentials=credentials, project=project_id)
     bucket = storage_client.bucket(gcs_bucket_name)
 
     blob_name = f"{subdirectory}/{file_path.name}"
@@ -336,9 +330,7 @@ def upload_to_staging_hf(
             f"Uploaded batch {i // batch_size + 1}: {len(operations)} files to staging/"
         )
 
-    logging.info(
-        f"Total: uploaded {total_uploaded} files to staging/ in HuggingFace"
-    )
+    logging.info(f"Total: uploaded {total_uploaded} files to staging/ in HuggingFace")
     return total_uploaded
 
 
diff --git a/policyengine_us_data/utils/db.py b/policyengine_us_data/utils/db.py
index 2d8f134bf..5dc78603c 100644
--- a/policyengine_us_data/utils/db.py
+++ b/policyengine_us_data/utils/db.py
@@ -44,10 +44,7 @@ def etl_argparser(
 
     args = parser.parse_args()
 
-    if (
-        not args.dataset.startswith("hf://")
-        and not Path(args.dataset).exists()
-    ):
+    if not args.dataset.startswith("hf://") and not Path(args.dataset).exists():
         raise FileNotFoundError(
             f"Dataset not found: {args.dataset}\n"
             f"Either build it locally (`make data`) or pass a "
@@ -69,18 +66,14 @@ def get_stratum_by_id(session: Session, stratum_id: int) -> Optional[Stratum]:
     return session.get(Stratum, stratum_id)
 
 
-def get_simple_stratum_by_ucgid(
-    session: Session, ucgid: str
-) -> Optional[Stratum]:
+def get_simple_stratum_by_ucgid(session: Session, ucgid: str) -> Optional[Stratum]:
     """
     Finds a stratum defined *only* by a single ucgid_str constraint.
     """
     constraint_count_subquery = (
         select(
             StratumConstraint.stratum_id,
-            sa.func.count(StratumConstraint.stratum_id).label(
-                "constraint_count"
-            ),
+            sa.func.count(StratumConstraint.stratum_id).label("constraint_count"),
         )
         .group_by(StratumConstraint.stratum_id)
         .subquery()
@@ -137,16 +130,12 @@ def parse_ucgid(ucgid_str: str) -> Dict:
     elif ucgid_str.startswith("0400000US"):
         state_fips = int(ucgid_str[9:])
         return {"type": "state", "state_fips": state_fips}
-    elif ucgid_str.startswith("5001800US") or ucgid_str.startswith(
-        "5001900US"
-    ):
+    elif ucgid_str.startswith("5001800US") or ucgid_str.startswith("5001900US"):
         # 5001800US = 118th Congress, 5001900US = 119th Congress
         state_and_district = ucgid_str[9:]
         state_fips = int(state_and_district[:2])
         district_number = int(state_and_district[2:])
-        if district_number == 0 or (
-            state_fips == 11 and district_number == 98
-        ):
+        if district_number == 0 or (state_fips == 11 and district_number == 98):
             district_number = 1
         cd_geoid = state_fips * 100 + district_number
         return {
@@ -201,9 +190,7 @@ def get_geographic_strata(session: Session) -> Dict:
         if not constraints:
             strata_map["national"] = stratum.stratum_id
         else:
-            constraint_vars = {
-                c.constraint_variable: c.value for c in constraints
-            }
+            constraint_vars = {c.constraint_variable: c.value for c in constraints}
 
             if "congressional_district_geoid" in constraint_vars:
                 cd_geoid = int(constraint_vars["congressional_district_geoid"])
diff --git a/policyengine_us_data/utils/huggingface.py b/policyengine_us_data/utils/huggingface.py
index a312b5240..c6d54af17 100644
--- a/policyengine_us_data/utils/huggingface.py
+++ b/policyengine_us_data/utils/huggingface.py
@@ -10,9 +10,7 @@
     )
 
 
-def download(
-    repo: str, repo_filename: str, local_folder: str, version: str = None
-):
+def download(repo: str, repo_filename: str, local_folder: str, version: str = None):
 
     hf_hub_download(
         repo_id=repo,
diff --git a/policyengine_us_data/utils/l0.py b/policyengine_us_data/utils/l0.py
index 3dd9e0145..a1d1a5a0d 100644
--- a/policyengine_us_data/utils/l0.py
+++ b/policyengine_us_data/utils/l0.py
@@ -191,11 +191,11 @@ def train_with_l0(model, train_loader, epochs=10, l0_lambda=1e-3):
         if epoch % 1 == 0:
             sparsity_stats = model.get_sparsity_stats()
             logging.info(
-                f"Epoch {epoch}: Loss={total_loss/len(train_loader):.4f}, L0={total_l0/len(train_loader):.4f}"
+                f"Epoch {epoch}: Loss={total_loss / len(train_loader):.4f}, L0={total_l0 / len(train_loader):.4f}"
             )
             for layer, stats in sparsity_stats.items():
                 logging.info(
-                    f"  {layer}: {stats['sparsity']*100:.1f}% sparse, {stats['active_params']:.1f} active params"
+                    f"  {layer}: {stats['sparsity'] * 100:.1f}% sparse, {stats['active_params']:.1f} active params"
                 )
 
 
diff --git a/policyengine_us_data/utils/loss.py b/policyengine_us_data/utils/loss.py
index d7410d2eb..5cbb879f8 100644
--- a/policyengine_us_data/utils/loss.py
+++ b/policyengine_us_data/utils/loss.py
@@ -101,10 +101,10 @@ def fmt(x):
     if x < 1e3:
         return f"{x:.0f}"
     if x < 1e6:
-        return f"{x/1e3:.0f}k"
+        return f"{x / 1e3:.0f}k"
     if x < 1e9:
-        return f"{x/1e6:.0f}m"
-    return f"{x/1e9:.1f}bn"
+        return f"{x / 1e6:.0f}m"
+    return f"{x / 1e9:.1f}bn"
 
 
 def build_loss_matrix(dataset: type, time_period):
@@ -164,9 +164,7 @@ def build_loss_matrix(dataset: type, time_period):
             continue
 
         mask = (
-            (agi >= row["AGI lower bound"])
-            * (agi < row["AGI upper bound"])
-            * filer
+            (agi >= row["AGI lower bound"]) * (agi < row["AGI upper bound"]) * filer
         ) > 0
 
         if row["Filing status"] == "Single":
@@ -186,12 +184,8 @@ def build_loss_matrix(dataset: type, time_period):
         if row["Count"]:
             values = (values > 0).astype(float)
 
-        agi_range_label = (
-            f"{fmt(row['AGI lower bound'])}-{fmt(row['AGI upper bound'])}"
-        )
-        taxable_label = (
-            "taxable" if row["Taxable only"] else "all" + " returns"
-        )
+        agi_range_label = f"{fmt(row['AGI lower bound'])}-{fmt(row['AGI upper bound'])}"
+        taxable_label = "taxable" if row["Taxable only"] else "all" + " returns"
         filing_status_label = row["Filing status"]
 
         variable_label = row["Variable"].replace("_", " ")
@@ -270,9 +264,7 @@ def build_loss_matrix(dataset: type, time_period):
 
     for variable_name in CBO_PROGRAMS:
         label = f"nation/cbo/{variable_name}"
-        loss_matrix[label] = sim.calculate(
-            variable_name, map_to="household"
-        ).values
+        loss_matrix[label] = sim.calculate(variable_name, map_to="household").values
         if any(loss_matrix[label].isna()):
             raise ValueError(f"Missing values for {label}")
         param_name = CBO_PARAM_NAME_MAP.get(variable_name, variable_name)
@@ -312,9 +304,9 @@ def build_loss_matrix(dataset: type, time_period):
 
     # National ACA Enrollment (people receiving a PTC)
     label = "nation/gov/aca_enrollment"
-    on_ptc = (
-        sim.calculate("aca_ptc", map_to="person", period=2025).values > 0
-    ).astype(int)
+    on_ptc = (sim.calculate("aca_ptc", map_to="person", period=2025).values > 0).astype(
+        int
+    )
     loss_matrix[label] = sim.map_result(on_ptc, "person", "household")
 
     ACA_PTC_ENROLLMENT_2024 = 19_743_689  # people enrolled
@@ -346,13 +338,9 @@ def build_loss_matrix(dataset: type, time_period):
         eitc_eligible_children = sim.calculate("eitc_child_count").values
         eitc = sim.calculate("eitc").values
         if row["count_children"] < 2:
-            meets_child_criteria = (
-                eitc_eligible_children == row["count_children"]
-            )
+            meets_child_criteria = eitc_eligible_children == row["count_children"]
         else:
-            meets_child_criteria = (
-                eitc_eligible_children >= row["count_children"]
-            )
+            meets_child_criteria = eitc_eligible_children >= row["count_children"]
         loss_matrix[returns_label] = sim.map_result(
             (eitc > 0) * meets_child_criteria,
             "tax_unit",
@@ -406,9 +394,7 @@ def build_loss_matrix(dataset: type, time_period):
     # Hard-coded totals
     for variable_name, target in HARD_CODED_TOTALS.items():
         label = f"nation/census/{variable_name}"
-        loss_matrix[label] = sim.calculate(
-            variable_name, map_to="household"
-        ).values
+        loss_matrix[label] = sim.calculate(variable_name, map_to="household").values
         if any(loss_matrix[label].isna()):
             raise ValueError(f"Missing values for {label}")
         targets_array.append(target)
@@ -416,8 +402,8 @@ def build_loss_matrix(dataset: type, time_period):
     # Negative household market income total rough estimate from the IRS SOI PUF
 
     market_income = sim.calculate("household_market_income").values
-    loss_matrix["nation/irs/negative_household_market_income_total"] = (
-        market_income * (market_income < 0)
+    loss_matrix["nation/irs/negative_household_market_income_total"] = market_income * (
+        market_income < 0
     )
     targets_array.append(-138e9)
 
@@ -439,7 +425,7 @@ def build_loss_matrix(dataset: type, time_period):
             "other_medical_expenses",
             "medicare_part_b_premiums",
         ]:
-            label = f"nation/census/{expense_type}/age_{age_lower_bound}_to_{age_lower_bound+9}"
+            label = f"nation/census/{expense_type}/age_{age_lower_bound}_to_{age_lower_bound + 9}"
             value = sim.calculate(expense_type).values
             loss_matrix[label] = sim.map_result(
                 in_age_range * value, "person", "household"
@@ -448,39 +434,27 @@ def build_loss_matrix(dataset: type, time_period):
 
     # AGI by SPM threshold totals
 
-    spm_threshold_agi = pd.read_csv(
-        CALIBRATION_FOLDER / "spm_threshold_agi.csv"
-    )
+    spm_threshold_agi = pd.read_csv(CALIBRATION_FOLDER / "spm_threshold_agi.csv")
 
     for _, row in spm_threshold_agi.iterrows():
-        spm_unit_agi = sim.calculate(
-            "adjusted_gross_income", map_to="spm_unit"
-        ).values
+        spm_unit_agi = sim.calculate("adjusted_gross_income", map_to="spm_unit").values
         spm_threshold = sim.calculate("spm_unit_spm_threshold").values
         in_threshold_range = (spm_threshold >= row["lower_spm_threshold"]) * (
             spm_threshold < row["upper_spm_threshold"]
         )
-        label = (
-            f"nation/census/agi_in_spm_threshold_decile_{int(row['decile'])}"
-        )
+        label = f"nation/census/agi_in_spm_threshold_decile_{int(row['decile'])}"
         loss_matrix[label] = sim.map_result(
             in_threshold_range * spm_unit_agi, "spm_unit", "household"
         )
         targets_array.append(row["adjusted_gross_income"])
 
-        label = (
-            f"nation/census/count_in_spm_threshold_decile_{int(row['decile'])}"
-        )
-        loss_matrix[label] = sim.map_result(
-            in_threshold_range, "spm_unit", "household"
-        )
+        label = f"nation/census/count_in_spm_threshold_decile_{int(row['decile'])}"
+        loss_matrix[label] = sim.map_result(in_threshold_range, "spm_unit", "household")
         targets_array.append(row["count"])
 
     # Population by state and population under 5 by state
 
-    state_population = pd.read_csv(
-        CALIBRATION_FOLDER / "population_by_state.csv"
-    )
+    state_population = pd.read_csv(CALIBRATION_FOLDER / "population_by_state.csv")
 
     for _, row in state_population.iterrows():
         in_state = sim.calculate("state_code", map_to="person") == row["state"]
@@ -491,9 +465,7 @@ def build_loss_matrix(dataset: type, time_period):
         under_5 = sim.calculate("age").values < 5
         in_state_under_5 = in_state * under_5
         label = f"state/census/population_under_5_by_state/{row['state']}"
-        loss_matrix[label] = sim.map_result(
-            in_state_under_5, "person", "household"
-        )
+        loss_matrix[label] = sim.map_result(in_state_under_5, "person", "household")
         targets_array.append(row["population_under_5"])
 
     age = sim.calculate("age").values
@@ -517,9 +489,7 @@ def build_loss_matrix(dataset: type, time_period):
 
     # SALT tax expenditure targeting
 
-    _add_tax_expenditure_targets(
-        dataset, time_period, sim, loss_matrix, targets_array
-    )
+    _add_tax_expenditure_targets(dataset, time_period, sim, loss_matrix, targets_array)
 
     if any(loss_matrix.isna().sum() > 0):
         raise ValueError("Some targets are missing from the loss matrix")
@@ -533,9 +503,7 @@ def build_loss_matrix(dataset: type, time_period):
 
         # Overall count by SSN card type
         label = f"nation/ssa/ssn_card_type_{card_type_str.lower()}_count"
-        loss_matrix[label] = sim.map_result(
-            ssn_type_mask, "person", "household"
-        )
+        loss_matrix[label] = sim.map_result(ssn_type_mask, "person", "household")
 
         # Target undocumented population by year based on various sources
         if card_type_str == "NONE":
@@ -571,14 +539,11 @@ def build_loss_matrix(dataset: type, time_period):
     for _, row in spending_by_state.iterrows():
         # Households located in this state
         in_state = (
-            sim.calculate("state_code", map_to="household").values
-            == row["state"]
+            sim.calculate("state_code", map_to="household").values == row["state"]
         )
 
         # ACA PTC amounts for every household (2025)
-        aca_value = sim.calculate(
-            "aca_ptc", map_to="household", period=2025
-        ).values
+        aca_value = sim.calculate("aca_ptc", map_to="household", period=2025).values
 
         # Add a loss-matrix entry and matching target
         label = f"nation/irs/aca_spending/{row['state'].lower()}"
@@ -611,9 +576,7 @@ def build_loss_matrix(dataset: type, time_period):
         in_state_enrolled = in_state & is_enrolled
 
         label = f"state/irs/aca_enrollment/{row['state'].lower()}"
-        loss_matrix[label] = sim.map_result(
-            in_state_enrolled, "person", "household"
-        )
+        loss_matrix[label] = sim.map_result(in_state_enrolled, "person", "household")
         if any(loss_matrix[label].isna()):
             raise ValueError(f"Missing values for {label}")
 
@@ -630,9 +593,7 @@ def build_loss_matrix(dataset: type, time_period):
     state_person = sim.calculate("state_code", map_to="person").values
 
     # Flag people in households that actually receive medicaid
-    has_medicaid = sim.calculate(
-        "medicaid_enrolled", map_to="person", period=2025
-    )
+    has_medicaid = sim.calculate("medicaid_enrolled", map_to="person", period=2025)
     is_medicaid_eligible = sim.calculate(
         "is_medicaid_eligible", map_to="person", period=2025
     ).values
@@ -644,9 +605,7 @@ def build_loss_matrix(dataset: type, time_period):
         in_state_enrolled = in_state & is_enrolled
 
         label = f"irs/medicaid_enrollment/{row['state'].lower()}"
-        loss_matrix[label] = sim.map_result(
-            in_state_enrolled, "person", "household"
-        )
+        loss_matrix[label] = sim.map_result(in_state_enrolled, "person", "household")
         if any(loss_matrix[label].isna()):
             raise ValueError(f"Missing values for {label}")
 
@@ -670,9 +629,7 @@ def build_loss_matrix(dataset: type, time_period):
                 age_lower_bound = int(age_range.replace("+", ""))
                 age_upper_bound = np.inf
             else:
-                age_lower_bound, age_upper_bound = map(
-                    int, age_range.split("-")
-                )
+                age_lower_bound, age_upper_bound = map(int, age_range.split("-"))
 
             age_mask = (age >= age_lower_bound) & (age <= age_upper_bound)
             label = f"state/census/age/{state}/{age_range}"
@@ -740,9 +697,7 @@ def apply(self):
         simulation.default_calculation_period = time_period
 
         # Calculate the baseline and reform income tax values.
-        income_tax_r = simulation.calculate(
-            "income_tax", map_to="household"
-        ).values
+        income_tax_r = simulation.calculate("income_tax", map_to="household").values
 
         # Compute the tax expenditure (TE) values.
         te_values = income_tax_r - income_tax_b
@@ -776,9 +731,7 @@ def _add_agi_state_targets():
         + soi_targets["VARIABLE"]
         + "/"
         + soi_targets.apply(
-            lambda r: get_agi_band_label(
-                r["AGI_LOWER_BOUND"], r["AGI_UPPER_BOUND"]
-            ),
+            lambda r: get_agi_band_label(r["AGI_LOWER_BOUND"], r["AGI_UPPER_BOUND"]),
             axis=1,
         )
     )
@@ -799,9 +752,7 @@ def _add_agi_metric_columns(
 
     agi = sim.calculate("adjusted_gross_income").values
     state = sim.calculate("state_code", map_to="person").values
-    state = sim.map_result(
-        state, "person", "tax_unit", how="value_from_first_person"
-    )
+    state = sim.map_result(state, "person", "tax_unit", how="value_from_first_person")
 
     for _, r in soi_targets.iterrows():
         lower, upper = r.AGI_LOWER_BOUND, r.AGI_UPPER_BOUND
@@ -845,13 +796,9 @@ def _add_state_real_estate_taxes(loss_matrix, targets_list, sim):
         rtol=1e-8,
     ), "Real estate tax totals do not sum to national target"
 
-    targets_list.extend(
-        real_estate_taxes_targets["real_estate_taxes_bn"].tolist()
-    )
+    targets_list.extend(real_estate_taxes_targets["real_estate_taxes_bn"].tolist())
 
-    real_estate_taxes = sim.calculate(
-        "real_estate_taxes", map_to="household"
-    ).values
+    real_estate_taxes = sim.calculate("real_estate_taxes", map_to="household").values
     state = sim.calculate("state_code", map_to="household").values
 
     for _, r in real_estate_taxes_targets.iterrows():
@@ -874,22 +821,16 @@ def _add_snap_state_targets(sim):
     ).calibration.gov.cbo._children["snap"]
     ratio = snap_targets[["Cost"]].sum().values[0] / national_cost_target
     snap_targets[["CostAdj"]] = snap_targets[["Cost"]] / ratio
-    assert (
-        np.round(snap_targets[["CostAdj"]].sum().values[0])
-        == national_cost_target
-    )
+    assert np.round(snap_targets[["CostAdj"]].sum().values[0]) == national_cost_target
 
     cost_targets = snap_targets.copy()[["GEO_ID", "CostAdj"]]
-    cost_targets["target_name"] = (
-        cost_targets["GEO_ID"].str[-4:] + "/snap-cost"
-    )
+    cost_targets["target_name"] = cost_targets["GEO_ID"].str[-4:] + "/snap-cost"
 
     hh_targets = snap_targets.copy()[["GEO_ID", "Households"]]
     hh_targets["target_name"] = snap_targets["GEO_ID"].str[-4:] + "/snap-hhs"
 
     target_names = (
-        cost_targets["target_name"].tolist()
-        + hh_targets["target_name"].tolist()
+        cost_targets["target_name"].tolist() + hh_targets["target_name"].tolist()
     )
     target_values = (
         cost_targets["CostAdj"].astype(float).tolist()
@@ -908,14 +849,12 @@ def _add_snap_metric_columns(
     snap_targets = pd.read_csv(CALIBRATION_FOLDER / "snap_state.csv")
 
     snap_cost = sim.calculate("snap_reported", map_to="household").values
-    snap_hhs = (
-        sim.calculate("snap_reported", map_to="household").values > 0
-    ).astype(int)
+    snap_hhs = (sim.calculate("snap_reported", map_to="household").values > 0).astype(
+        int
+    )
 
     state = sim.calculate("state_code", map_to="person").values
-    state = sim.map_result(
-        state, "person", "household", how="value_from_first_person"
-    )
+    state = sim.map_result(state, "person", "household", how="value_from_first_person")
     STATE_ABBR_TO_FIPS["DC"] = 11
     state_fips = pd.Series(state).apply(lambda s: STATE_ABBR_TO_FIPS[s])
 
@@ -934,9 +873,7 @@ def _add_snap_metric_columns(
     return loss_matrix
 
 
-def print_reweighting_diagnostics(
-    optimised_weights, loss_matrix, targets_array, label
-):
+def print_reweighting_diagnostics(optimised_weights, loss_matrix, targets_array, label):
     # Convert all inputs to NumPy arrays right at the start
     optimised_weights_np = (
         optimised_weights.numpy()
@@ -963,9 +900,7 @@ def print_reweighting_diagnostics(
     # All subsequent calculations use the guaranteed NumPy versions
     estimate = optimised_weights_np @ loss_matrix_np
 
-    rel_error = (
-        ((estimate - targets_array_np) + 1) / (targets_array_np + 1)
-    ) ** 2
+    rel_error = (((estimate - targets_array_np) + 1) / (targets_array_np + 1)) ** 2
     within_10_percent_mask = np.abs(estimate - targets_array_np) <= (
         0.10 * np.abs(targets_array_np)
     )
diff --git a/policyengine_us_data/utils/randomness.py b/policyengine_us_data/utils/randomness.py
index eac015227..001dbf2f8 100644
--- a/policyengine_us_data/utils/randomness.py
+++ b/policyengine_us_data/utils/randomness.py
@@ -11,9 +11,7 @@ def _stable_string_hash(s: str) -> np.uint64:
     Ported from policyengine_core.commons.formulas._stable_string_hash.
     """
     with warnings.catch_warnings():
-        warnings.filterwarnings(
-            "ignore", "overflow encountered", RuntimeWarning
-        )
+        warnings.filterwarnings("ignore", "overflow encountered", RuntimeWarning)
         h = np.uint64(0)
         for byte in s.encode("utf-8"):
             h = h * np.uint64(31) + np.uint64(byte)
diff --git a/policyengine_us_data/utils/soi.py b/policyengine_us_data/utils/soi.py
index d9538addb..b9755c30f 100644
--- a/policyengine_us_data/utils/soi.py
+++ b/policyengine_us_data/utils/soi.py
@@ -11,9 +11,7 @@ def pe_to_soi(pe_dataset, year):
     pe_sim.default_calculation_period = year
     df = pd.DataFrame()
 
-    pe = lambda variable: np.array(
-        pe_sim.calculate(variable, map_to="tax_unit")
-    )
+    pe = lambda variable: np.array(pe_sim.calculate(variable, map_to="tax_unit"))
 
     df["adjusted_gross_income"] = pe("adjusted_gross_income")
     df["exemption"] = pe("exemptions")
@@ -51,12 +49,8 @@ def pe_to_soi(pe_dataset, year):
     df["total_pension_income"] = pe("pension_income")
     df["taxable_pension_income"] = pe("taxable_pension_income")
     df["qualified_dividends"] = pe("qualified_dividend_income")
-    df["rent_and_royalty_net_income"] = pe("rental_income") * (
-        pe("rental_income") > 0
-    )
-    df["rent_and_royalty_net_losses"] = -pe("rental_income") * (
-        pe("rental_income") < 0
-    )
+    df["rent_and_royalty_net_income"] = pe("rental_income") * (pe("rental_income") > 0)
+    df["rent_and_royalty_net_losses"] = -pe("rental_income") * (pe("rental_income") < 0)
     df["total_social_security"] = pe("social_security")
     df["taxable_social_security"] = pe("taxable_social_security")
     df["income_tax_before_credits"] = pe("income_tax_before_credits")
@@ -176,8 +170,7 @@ def get_soi(year: int) -> pd.DataFrame:
         pe_name = uprating_map.get(variable)
         if pe_name in uprating.index:
             uprating_factors[variable] = (
-                uprating.loc[pe_name, year]
-                / uprating.loc[pe_name, soi.Year.max()]
+                uprating.loc[pe_name, year] / uprating.loc[pe_name, soi.Year.max()]
             )
         else:
             uprating_factors[variable] = (
@@ -218,9 +211,7 @@ def compare_soi_replication_to_soi(df, soi):
         elif fs == "Head of Household":
             subset = subset[subset.filing_status == "HEAD_OF_HOUSEHOLD"]
         elif fs == "Married Filing Jointly/Surviving Spouse":
-            subset = subset[
-                subset.filing_status.isin(["JOINT", "SURVIVING_SPOUSE"])
-            ]
+            subset = subset[subset.filing_status.isin(["JOINT", "SURVIVING_SPOUSE"])]
         elif fs == "Married Filing Separately":
             subset = subset[subset.filing_status == "SEPARATE"]
 
@@ -258,17 +249,13 @@ def compare_soi_replication_to_soi(df, soi):
         }
     )
 
-    soi_replication["Error"] = (
-        soi_replication["Value"] - soi_replication["SOI Value"]
-    )
+    soi_replication["Error"] = soi_replication["Value"] - soi_replication["SOI Value"]
     soi_replication["Absolute error"] = soi_replication["Error"].abs()
     soi_replication["Relative error"] = (
         (soi_replication["Error"] / soi_replication["SOI Value"])
         .replace([np.inf, -np.inf], np.nan)
         .fillna(0)
     )
-    soi_replication["Absolute relative error"] = soi_replication[
-        "Relative error"
-    ].abs()
+    soi_replication["Absolute relative error"] = soi_replication["Relative error"].abs()
 
     return soi_replication
diff --git a/policyengine_us_data/utils/spm.py b/policyengine_us_data/utils/spm.py
index b2e4538b5..ad3c9e9fb 100644
--- a/policyengine_us_data/utils/spm.py
+++ b/policyengine_us_data/utils/spm.py
@@ -44,9 +44,7 @@ def calculate_spm_thresholds_with_geoadj(
     for i in range(n):
         tenure_str = TENURE_CODE_MAP.get(int(tenure_codes[i]), "renter")
         base = base_thresholds[tenure_str]
-        equiv_scale = spm_equivalence_scale(
-            int(num_adults[i]), int(num_children[i])
-        )
+        equiv_scale = spm_equivalence_scale(int(num_adults[i]), int(num_children[i]))
         thresholds[i] = base * equiv_scale * geoadj[i]
 
     return thresholds
diff --git a/policyengine_us_data/utils/uprating.py b/policyengine_us_data/utils/uprating.py
index 6dd2f89ca..41d223b0b 100644
--- a/policyengine_us_data/utils/uprating.py
+++ b/policyengine_us_data/utils/uprating.py
@@ -23,9 +23,7 @@ def create_policyengine_uprating_factors_table():
             parameter = system.parameters.get_child(variable.uprating)
             start_value = parameter(START_YEAR)
             for year in range(START_YEAR, END_YEAR + 1):
-                population_growth = population_size(year) / population_size(
-                    START_YEAR
-                )
+                population_growth = population_size(year) / population_size(START_YEAR)
                 variable_names.append(variable.name)
                 years.append(year)
                 growth = parameter(year) / start_value
diff --git a/pyproject.toml b/pyproject.toml
index 95ada2a35..b9e309eb2 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -55,7 +55,7 @@ l0 = [
 
 [dependency-groups]
 dev = [
-    "black",
+    "ruff>=0.9.0",
     "pytest",
     "quantile-forest",
     "tabulate",
@@ -82,24 +82,6 @@ testpaths = [
     "policyengine_us_data/tests",
 ]
 
-[tool.black]
-line-length = 79
-target-version = ['py311', 'py312', 'py313']
-include = '\.pyi?$'
-extend-exclude = '''
-/(
-  # directories
-  \.eggs
-  | \.git
-  | \.hg
-  | \.mypy_cache
-  | \.tox
-  | \.venv
-  | build
-  | dist
-)/
-'''
-
 [tool.towncrier]
 package = "policyengine_us_data"
 directory = "changelog.d"
diff --git a/scripts/generate_test_data.py b/scripts/generate_test_data.py
index 75025bca6..2f0de3080 100644
--- a/scripts/generate_test_data.py
+++ b/scripts/generate_test_data.py
@@ -46,9 +46,7 @@ def generate_synthetic_cps(n_households=1000, seed=42):
                 "age": age,
                 "sex": np.random.choice([1, 2]),  # 1=male, 2=female
                 "person_weight": np.random.uniform(1000, 3000),
-                "employment_income": (
-                    np.random.lognormal(10, 1.5) if age >= 18 else 0
-                ),
+                "employment_income": (np.random.lognormal(10, 1.5) if age >= 18 else 0),
                 "is_disabled": np.random.random() < 0.15,
                 "role": role,
             }
@@ -82,52 +80,32 @@ def generate_synthetic_puf(n_returns=10000, seed=43):
     for i in range(n_returns):
         # Income components (log-normal distributions)
         wages = np.random.lognormal(10.5, 1.2)
-        interest = (
-            np.random.exponential(500) if np.random.random() < 0.3 else 0
-        )
-        dividends = (
-            np.random.exponential(1000) if np.random.random() < 0.2 else 0
-        )
+        interest = np.random.exponential(500) if np.random.random() < 0.3 else 0
+        dividends = np.random.exponential(1000) if np.random.random() < 0.2 else 0
         business = np.random.lognormal(9, 2) if np.random.random() < 0.1 else 0
-        cap_gains = (
-            np.random.exponential(5000) if np.random.random() < 0.15 else 0
-        )
+        cap_gains = np.random.exponential(5000) if np.random.random() < 0.15 else 0
 
         # Deductions
-        mortgage_int = (
-            np.random.exponential(8000) if np.random.random() < 0.25 else 0
-        )
-        charity = (
-            np.random.exponential(3000) if np.random.random() < 0.3 else 0
-        )
+        mortgage_int = np.random.exponential(8000) if np.random.random() < 0.25 else 0
+        charity = np.random.exponential(3000) if np.random.random() < 0.3 else 0
         salt = min(10000, wages * 0.05 + np.random.normal(0, 1000))
 
         # Demographics (limited in PUF)
-        filing_status = np.random.choice(
-            [1, 2, 3, 4], p=[0.45, 0.40, 0.10, 0.05]
-        )
-        num_deps = np.random.choice(
-            [0, 1, 2, 3, 4], p=[0.6, 0.15, 0.15, 0.08, 0.02]
-        )
+        filing_status = np.random.choice([1, 2, 3, 4], p=[0.45, 0.40, 0.10, 0.05])
+        num_deps = np.random.choice([0, 1, 2, 3, 4], p=[0.6, 0.15, 0.15, 0.08, 0.02])
 
         return_data = {
             "return_id": i,
             "filing_status": filing_status,
             "num_dependents": num_deps,
             "age_primary": np.random.randint(18, 85),
-            "age_secondary": (
-                np.random.randint(18, 85) if filing_status == 2 else 0
-            ),
+            "age_secondary": (np.random.randint(18, 85) if filing_status == 2 else 0),
             "wages": wages,
             "interest": interest,
             "dividends": dividends,
             "business_income": business,
             "capital_gains": cap_gains,
-            "total_income": wages
-            + interest
-            + dividends
-            + business
-            + cap_gains,
+            "total_income": wages + interest + dividends + business + cap_gains,
             "mortgage_interest": mortgage_int,
             "charitable_deduction": charity,
             "salt_deduction": salt,
diff --git a/scripts/migrate_versioned_to_production.py b/scripts/migrate_versioned_to_production.py
index 5f99f74e3..1f2d7f447 100644
--- a/scripts/migrate_versioned_to_production.py
+++ b/scripts/migrate_versioned_to_production.py
@@ -93,9 +93,7 @@ def main():
     parser.add_argument(
         "--execute", action="store_true", help="Actually perform the migration"
     )
-    parser.add_argument(
-        "--gcs-only", action="store_true", help="Only migrate GCS"
-    )
+    parser.add_argument("--gcs-only", action="store_true", help="Only migrate GCS")
     parser.add_argument(
         "--hf-only", action="store_true", help="Only migrate HuggingFace"
     )
diff --git a/tests/test_h6_reform.py b/tests/test_h6_reform.py
index e68ed8db3..2acdd8ccf 100644
--- a/tests/test_h6_reform.py
+++ b/tests/test_h6_reform.py
@@ -27,17 +27,13 @@ def calculate_oasdi_thresholds(year: int) -> tuple[int, int]:
     return oasdi_single, oasdi_joint
 
 
-def get_swapped_thresholds(
-    oasdi_threshold: int, hi_threshold: int
-) -> tuple[int, int]:
+def get_swapped_thresholds(oasdi_threshold: int, hi_threshold: int) -> tuple[int, int]:
     """
     Apply min/max swap to handle threshold crossover.
 
     Returns (base_threshold, adjusted_threshold) where base <= adjusted.
     """
-    return min(oasdi_threshold, hi_threshold), max(
-        oasdi_threshold, hi_threshold
-    )
+    return min(oasdi_threshold, hi_threshold), max(oasdi_threshold, hi_threshold)
 
 
 def needs_crossover_swap(oasdi_threshold: int, hi_threshold: int) -> bool:
@@ -145,9 +141,7 @@ def test_single_crossover_starts_2046(self):
         # 2046+: crossover
         for year in range(2046, 2054):
             oasdi_single, _ = calculate_oasdi_thresholds(year)
-            assert needs_crossover_swap(
-                oasdi_single, HI_SINGLE
-            ), f"Year {year}"
+            assert needs_crossover_swap(oasdi_single, HI_SINGLE), f"Year {year}"
 
 
 class TestH6ThresholdSwapping:
@@ -211,9 +205,9 @@ def test_2045_error_analysis(self):
 
         assert single_error_swapped == pytest.approx(225)
         assert joint_error_default == pytest.approx(3_150)
-        assert joint_error_default / single_error_swapped == pytest.approx(
-            14.0
-        ), "Swapped rates should have 14x less error"
+        assert joint_error_default / single_error_swapped == pytest.approx(14.0), (
+            "Swapped rates should have 14x less error"
+        )
 
     def test_swapped_rates_align_with_tax_cut_intent(self):
         """Swapped rates undertax (not overtax), aligning with reform intent."""
diff --git a/tests/test_no_formula_variables_stored.py b/tests/test_no_formula_variables_stored.py
index 9334a5c78..7c7cb0de5 100644
--- a/tests/test_no_formula_variables_stored.py
+++ b/tests/test_no_formula_variables_stored.py
@@ -109,11 +109,7 @@ def test_stored_values_match_computed(
 
             computed_total = np.sum(computed.astype(float))
             if abs(stored_total) > 0:
-                pct_diff = (
-                    abs(stored_total - computed_total)
-                    / abs(stored_total)
-                    * 100
-                )
+                pct_diff = abs(stored_total - computed_total) / abs(stored_total) * 100
             else:
                 pct_diff = 0
 
@@ -141,23 +137,13 @@ def test_ss_subcomponents_sum_to_computed_total(sim, dataset_path):
     stored in the dataset sum to the simulation's computed total.
     """
     with h5py.File(dataset_path, "r") as f:
-        ss_retirement = f["social_security_retirement"]["2024"][...].astype(
-            float
-        )
-        ss_disability = f["social_security_disability"]["2024"][...].astype(
-            float
-        )
-        ss_survivors = f["social_security_survivors"]["2024"][...].astype(
-            float
-        )
-        ss_dependents = f["social_security_dependents"]["2024"][...].astype(
-            float
-        )
+        ss_retirement = f["social_security_retirement"]["2024"][...].astype(float)
+        ss_disability = f["social_security_disability"]["2024"][...].astype(float)
+        ss_survivors = f["social_security_survivors"]["2024"][...].astype(float)
+        ss_dependents = f["social_security_dependents"]["2024"][...].astype(float)
 
     sub_sum = ss_retirement + ss_disability + ss_survivors + ss_dependents
-    computed_total = np.array(sim.calculate("social_security", 2024)).astype(
-        float
-    )
+    computed_total = np.array(sim.calculate("social_security", 2024)).astype(float)
 
     # Only check records that have any SS income
     has_ss = computed_total > 0
diff --git a/tests/test_reproducibility.py b/tests/test_reproducibility.py
index 1ec097a7b..25755f0a6 100644
--- a/tests/test_reproducibility.py
+++ b/tests/test_reproducibility.py
@@ -144,9 +144,9 @@ def test_output_checksums(self):
             if file_path.exists() and filename != "checksums.txt":
                 with open(file_path, "rb") as f:
                     actual_checksum = hashlib.sha256(f.read()).hexdigest()
-                assert (
-                    actual_checksum == expected_checksum
-                ), f"Checksum mismatch for {filename}"
+                assert actual_checksum == expected_checksum, (
+                    f"Checksum mismatch for {filename}"
+                )
 
     def test_memory_usage(self):
         """Test that memory usage stays within bounds."""
diff --git a/tests/test_weeks_unemployed.py b/tests/test_weeks_unemployed.py
index 18aa47629..d64d8b64c 100644
--- a/tests/test_weeks_unemployed.py
+++ b/tests/test_weeks_unemployed.py
@@ -21,9 +21,9 @@ def test_lkweeks_in_person_columns(self):
 
         # Check for correct variable
         assert '"LKWEEKS"' in content, "LKWEEKS should be in PERSON_COLUMNS"
-        assert (
-            '"WKSUNEM"' not in content
-        ), "WKSUNEM should not be in PERSON_COLUMNS (Census uses LKWEEKS)"
+        assert '"WKSUNEM"' not in content, (
+            "WKSUNEM should not be in PERSON_COLUMNS (Census uses LKWEEKS)"
+        )
 
     def test_cps_uses_lkweeks(self):
         """Test that cps.py uses LKWEEKS, not WKSUNEM."""
diff --git a/uv.lock b/uv.lock
index 11179f708..044161b89 100644
--- a/uv.lock
+++ b/uv.lock
@@ -167,33 +167,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/1a/39/47f9197bdd44df24d67ac8893641e16f386c984a0619ef2ee4c51fbbc019/beautifulsoup4-4.14.3-py3-none-any.whl", hash = "sha256:0918bfe44902e6ad8d57732ba310582e98da931428d231a5ecb9e7c703a735bb", size = 107721, upload-time = "2025-11-30T15:08:24.087Z" },
 ]
 
-[[package]]
-name = "black"
-version = "25.12.0"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "click" },
-    { name = "mypy-extensions" },
-    { name = "packaging" },
-    { name = "pathspec" },
-    { name = "platformdirs" },
-    { name = "pytokens" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/c4/d9/07b458a3f1c525ac392b5edc6b191ff140b596f9d77092429417a54e249d/black-25.12.0.tar.gz", hash = "sha256:8d3dd9cea14bff7ddc0eb243c811cdb1a011ebb4800a5f0335a01a68654796a7", size = 659264, upload-time = "2025-12-08T01:40:52.501Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/d1/bd/26083f805115db17fda9877b3c7321d08c647df39d0df4c4ca8f8450593e/black-25.12.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:31f96b7c98c1ddaeb07dc0f56c652e25bdedaac76d5b68a059d998b57c55594a", size = 1924178, upload-time = "2025-12-08T01:49:51.048Z" },
-    { url = "https://files.pythonhosted.org/packages/89/6b/ea00d6651561e2bdd9231c4177f4f2ae19cc13a0b0574f47602a7519b6ca/black-25.12.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:05dd459a19e218078a1f98178c13f861fe6a9a5f88fc969ca4d9b49eb1809783", size = 1742643, upload-time = "2025-12-08T01:49:59.09Z" },
-    { url = "https://files.pythonhosted.org/packages/6d/f3/360fa4182e36e9875fabcf3a9717db9d27a8d11870f21cff97725c54f35b/black-25.12.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c1f68c5eff61f226934be6b5b80296cf6939e5d2f0c2f7d543ea08b204bfaf59", size = 1800158, upload-time = "2025-12-08T01:44:27.301Z" },
-    { url = "https://files.pythonhosted.org/packages/f8/08/2c64830cb6616278067e040acca21d4f79727b23077633953081c9445d61/black-25.12.0-cp312-cp312-win_amd64.whl", hash = "sha256:274f940c147ddab4442d316b27f9e332ca586d39c85ecf59ebdea82cc9ee8892", size = 1426197, upload-time = "2025-12-08T01:45:51.198Z" },
-    { url = "https://files.pythonhosted.org/packages/d4/60/a93f55fd9b9816b7432cf6842f0e3000fdd5b7869492a04b9011a133ee37/black-25.12.0-cp312-cp312-win_arm64.whl", hash = "sha256:169506ba91ef21e2e0591563deda7f00030cb466e747c4b09cb0a9dae5db2f43", size = 1237266, upload-time = "2025-12-08T01:45:10.556Z" },
-    { url = "https://files.pythonhosted.org/packages/c8/52/c551e36bc95495d2aa1a37d50566267aa47608c81a53f91daa809e03293f/black-25.12.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:a05ddeb656534c3e27a05a29196c962877c83fa5503db89e68857d1161ad08a5", size = 1923809, upload-time = "2025-12-08T01:46:55.126Z" },
-    { url = "https://files.pythonhosted.org/packages/a0/f7/aac9b014140ee56d247e707af8db0aae2e9efc28d4a8aba92d0abd7ae9d1/black-25.12.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:9ec77439ef3e34896995503865a85732c94396edcc739f302c5673a2315e1e7f", size = 1742384, upload-time = "2025-12-08T01:49:37.022Z" },
-    { url = "https://files.pythonhosted.org/packages/74/98/38aaa018b2ab06a863974c12b14a6266badc192b20603a81b738c47e902e/black-25.12.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0e509c858adf63aa61d908061b52e580c40eae0dfa72415fa47ac01b12e29baf", size = 1798761, upload-time = "2025-12-08T01:46:05.386Z" },
-    { url = "https://files.pythonhosted.org/packages/16/3a/a8ac542125f61574a3f015b521ca83b47321ed19bb63fe6d7560f348bfe1/black-25.12.0-cp313-cp313-win_amd64.whl", hash = "sha256:252678f07f5bac4ff0d0e9b261fbb029fa530cfa206d0a636a34ab445ef8ca9d", size = 1429180, upload-time = "2025-12-08T01:45:34.903Z" },
-    { url = "https://files.pythonhosted.org/packages/e6/2d/bdc466a3db9145e946762d52cd55b1385509d9f9004fec1c97bdc8debbfb/black-25.12.0-cp313-cp313-win_arm64.whl", hash = "sha256:bc5b1c09fe3c931ddd20ee548511c64ebf964ada7e6f0763d443947fd1c603ce", size = 1239350, upload-time = "2025-12-08T01:46:09.458Z" },
-    { url = "https://files.pythonhosted.org/packages/68/11/21331aed19145a952ad28fca2756a1433ee9308079bd03bd898e903a2e53/black-25.12.0-py3-none-any.whl", hash = "sha256:48ceb36c16dbc84062740049eef990bb2ce07598272e673c17d1a7720c71c828", size = 206191, upload-time = "2025-12-08T01:40:50.963Z" },
-]
-
 [[package]]
 name = "bleach"
 version = "6.3.0"
@@ -637,6 +610,7 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/f8/0a/a3871375c7b9727edaeeea994bfff7c63ff7804c9829c19309ba2e058807/greenlet-3.3.0-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:b01548f6e0b9e9784a2c99c5651e5dc89ffcbe870bc5fb2e5ef864e9cc6b5dcb", size = 276379, upload-time = "2025-12-04T14:23:30.498Z" },
     { url = "https://files.pythonhosted.org/packages/43/ab/7ebfe34dce8b87be0d11dae91acbf76f7b8246bf9d6b319c741f99fa59c6/greenlet-3.3.0-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:349345b770dc88f81506c6861d22a6ccd422207829d2c854ae2af8025af303e3", size = 597294, upload-time = "2025-12-04T14:50:06.847Z" },
     { url = "https://files.pythonhosted.org/packages/a4/39/f1c8da50024feecd0793dbd5e08f526809b8ab5609224a2da40aad3a7641/greenlet-3.3.0-cp312-cp312-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:e8e18ed6995e9e2c0b4ed264d2cf89260ab3ac7e13555b8032b25a74c6d18655", size = 607742, upload-time = "2025-12-04T14:57:42.349Z" },
+    { url = "https://files.pythonhosted.org/packages/77/cb/43692bcd5f7a0da6ec0ec6d58ee7cddb606d055ce94a62ac9b1aa481e969/greenlet-3.3.0-cp312-cp312-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:c024b1e5696626890038e34f76140ed1daf858e37496d33f2af57f06189e70d7", size = 622297, upload-time = "2025-12-04T15:07:13.552Z" },
     { url = "https://files.pythonhosted.org/packages/75/b0/6bde0b1011a60782108c01de5913c588cf51a839174538d266de15e4bf4d/greenlet-3.3.0-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:047ab3df20ede6a57c35c14bf5200fcf04039d50f908270d3f9a7a82064f543b", size = 609885, upload-time = "2025-12-04T14:26:02.368Z" },
     { url = "https://files.pythonhosted.org/packages/49/0e/49b46ac39f931f59f987b7cd9f34bfec8ef81d2a1e6e00682f55be5de9f4/greenlet-3.3.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:2d9ad37fc657b1102ec880e637cccf20191581f75c64087a549e66c57e1ceb53", size = 1567424, upload-time = "2025-12-04T15:04:23.757Z" },
     { url = "https://files.pythonhosted.org/packages/05/f5/49a9ac2dff7f10091935def9165c90236d8f175afb27cbed38fb1d61ab6b/greenlet-3.3.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:83cd0e36932e0e7f36a64b732a6f60c2fc2df28c351bae79fbaf4f8092fe7614", size = 1636017, upload-time = "2025-12-04T14:27:29.688Z" },
@@ -644,6 +618,7 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/02/2f/28592176381b9ab2cafa12829ba7b472d177f3acc35d8fbcf3673d966fff/greenlet-3.3.0-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:a1e41a81c7e2825822f4e068c48cb2196002362619e2d70b148f20a831c00739", size = 275140, upload-time = "2025-12-04T14:23:01.282Z" },
     { url = "https://files.pythonhosted.org/packages/2c/80/fbe937bf81e9fca98c981fe499e59a3f45df2a04da0baa5c2be0dca0d329/greenlet-3.3.0-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9f515a47d02da4d30caaa85b69474cec77b7929b2e936ff7fb853d42f4bf8808", size = 599219, upload-time = "2025-12-04T14:50:08.309Z" },
     { url = "https://files.pythonhosted.org/packages/c2/ff/7c985128f0514271b8268476af89aee6866df5eec04ac17dcfbc676213df/greenlet-3.3.0-cp313-cp313-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:7d2d9fd66bfadf230b385fdc90426fcd6eb64db54b40c495b72ac0feb5766c54", size = 610211, upload-time = "2025-12-04T14:57:43.968Z" },
+    { url = "https://files.pythonhosted.org/packages/79/07/c47a82d881319ec18a4510bb30463ed6891f2ad2c1901ed5ec23d3de351f/greenlet-3.3.0-cp313-cp313-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:30a6e28487a790417d036088b3bcb3f3ac7d8babaa7d0139edbaddebf3af9492", size = 624311, upload-time = "2025-12-04T15:07:14.697Z" },
     { url = "https://files.pythonhosted.org/packages/fd/8e/424b8c6e78bd9837d14ff7df01a9829fc883ba2ab4ea787d4f848435f23f/greenlet-3.3.0-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:087ea5e004437321508a8d6f20efc4cfec5e3c30118e1417ea96ed1d93950527", size = 612833, upload-time = "2025-12-04T14:26:03.669Z" },
     { url = "https://files.pythonhosted.org/packages/b5/ba/56699ff9b7c76ca12f1cdc27a886d0f81f2189c3455ff9f65246780f713d/greenlet-3.3.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:ab97cf74045343f6c60a39913fa59710e4bd26a536ce7ab2397adf8b27e67c39", size = 1567256, upload-time = "2025-12-04T15:04:25.276Z" },
     { url = "https://files.pythonhosted.org/packages/1e/37/f31136132967982d698c71a281a8901daf1a8fbab935dce7c0cf15f942cc/greenlet-3.3.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:5375d2e23184629112ca1ea89a53389dddbffcf417dad40125713d88eb5f96e8", size = 1636483, upload-time = "2025-12-04T14:27:30.804Z" },
@@ -1252,15 +1227,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/e5/db/0314e4e2db56ebcf450f277904ffd84a7988b9e5da8d0d61ab2d057df2b6/msgpack-1.1.2-cp313-cp313-win_arm64.whl", hash = "sha256:e69b39f8c0aa5ec24b57737ebee40be647035158f14ed4b40e6f150077e21a84", size = 64118, upload-time = "2025-10-08T09:15:23.402Z" },
 ]
 
-[[package]]
-name = "mypy-extensions"
-version = "1.1.0"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/a2/6e/371856a3fb9d31ca8dac321cda606860fa4548858c0cc45d9d1d4ca2628b/mypy_extensions-1.1.0.tar.gz", hash = "sha256:52e68efc3284861e772bbcd66823fde5ae21fd2fdb51c62a211403730b916558", size = 6343, upload-time = "2025-04-22T14:54:24.164Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/79/7b/2c79738432f5c924bef5071f933bcc9efd0473bac3b4aa584a6f7c1c8df8/mypy_extensions-1.1.0-py3-none-any.whl", hash = "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505", size = 4963, upload-time = "2025-04-22T14:54:22.983Z" },
-]
-
 [[package]]
 name = "mystmd"
 version = "1.7.1"
@@ -1697,15 +1663,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/78/f9/690a8600b93c332de3ab4a344a4ac34f00c8f104917061f779db6a918ed6/pathlib-1.0.1-py3-none-any.whl", hash = "sha256:f35f95ab8b0f59e6d354090350b44a80a80635d22efdedfa84c7ad1cf0a74147", size = 14363, upload-time = "2022-05-04T13:37:20.585Z" },
 ]
 
-[[package]]
-name = "pathspec"
-version = "1.0.1"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/28/2e/83722ece0f6ee24387d6cb830dd562ddbcd6ce0b9d76072c6849670c31b4/pathspec-1.0.1.tar.gz", hash = "sha256:e2769b508d0dd47b09af6ee2c75b2744a2cb1f474ae4b1494fd6a1b7a841613c", size = 129791, upload-time = "2026-01-06T13:02:55.15Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/d2/fe/2257c71721aeab6a6e8aa1f00d01f2a20f58547d249a6c8fef5791f559fc/pathspec-1.0.1-py3-none-any.whl", hash = "sha256:8870061f22c58e6d83463cfce9a7dd6eca0512c772c1001fb09ac64091816721", size = 54584, upload-time = "2026-01-06T13:02:53.601Z" },
-]
-
 [[package]]
 name = "patsy"
 version = "1.0.2"
@@ -1894,7 +1851,6 @@ l0 = [
 
 [package.dev-dependencies]
 dev = [
-    { name = "black" },
     { name = "build" },
     { name = "furo" },
     { name = "itables" },
@@ -1902,6 +1858,7 @@ dev = [
     { name = "mystmd" },
     { name = "pytest" },
     { name = "quantile-forest" },
+    { name = "ruff" },
     { name = "tabulate" },
     { name = "tomli" },
     { name = "towncrier" },
@@ -1939,7 +1896,6 @@ provides-extras = ["calibration", "l0"]
 
 [package.metadata.requires-dev]
 dev = [
-    { name = "black" },
     { name = "build" },
     { name = "furo" },
     { name = "itables" },
@@ -1947,6 +1903,7 @@ dev = [
     { name = "mystmd", specifier = ">=1.7.0" },
     { name = "pytest" },
     { name = "quantile-forest" },
+    { name = "ruff", specifier = ">=0.9.0" },
     { name = "tabulate" },
     { name = "tomli" },
     { name = "towncrier", specifier = ">=24.8.0" },
@@ -2215,15 +2172,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/51/e5/fecf13f06e5e5f67e8837d777d1bc43fac0ed2b77a676804df5c34744727/python_json_logger-4.0.0-py3-none-any.whl", hash = "sha256:af09c9daf6a813aa4cc7180395f50f2a9e5fa056034c9953aec92e381c5ba1e2", size = 15548, upload-time = "2025-10-06T04:15:17.553Z" },
 ]
 
-[[package]]
-name = "pytokens"
-version = "0.3.0"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/4e/8d/a762be14dae1c3bf280202ba3172020b2b0b4c537f94427435f19c413b72/pytokens-0.3.0.tar.gz", hash = "sha256:2f932b14ed08de5fcf0b391ace2642f858f1394c0857202959000b68ed7a458a", size = 17644, upload-time = "2025-11-05T13:36:35.34Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/84/25/d9db8be44e205a124f6c98bc0324b2bb149b7431c53877fc6d1038dddaf5/pytokens-0.3.0-py3-none-any.whl", hash = "sha256:95b2b5eaf832e469d141a378872480ede3f251a5a5041b8ec6e581d3ac71bbf3", size = 12195, upload-time = "2025-11-05T13:36:33.183Z" },
-]
-
 [[package]]
 name = "pytz"
 version = "2025.2"
@@ -2477,6 +2425,31 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/64/8d/0133e4eb4beed9e425d9a98ed6e081a55d195481b7632472be1af08d2f6b/rsa-4.9.1-py3-none-any.whl", hash = "sha256:68635866661c6836b8d39430f97a996acbd61bfa49406748ea243539fe239762", size = 34696, upload-time = "2025-04-16T09:51:17.142Z" },
 ]
 
+[[package]]
+name = "ruff"
+version = "0.15.5"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/77/9b/840e0039e65fcf12758adf684d2289024d6140cde9268cc59887dc55189c/ruff-0.15.5.tar.gz", hash = "sha256:7c3601d3b6d76dce18c5c824fc8d06f4eef33d6df0c21ec7799510cde0f159a2", size = 4574214, upload-time = "2026-03-05T20:06:34.946Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/47/20/5369c3ce21588c708bcbe517a8fbe1a8dfdb5dfd5137e14790b1da71612c/ruff-0.15.5-py3-none-linux_armv6l.whl", hash = "sha256:4ae44c42281f42e3b06b988e442d344a5b9b72450ff3c892e30d11b29a96a57c", size = 10478185, upload-time = "2026-03-05T20:06:29.093Z" },
+    { url = "https://files.pythonhosted.org/packages/44/ed/e81dd668547da281e5dce710cf0bc60193f8d3d43833e8241d006720e42b/ruff-0.15.5-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:6edd3792d408ebcf61adabc01822da687579a1a023f297618ac27a5b51ef0080", size = 10859201, upload-time = "2026-03-05T20:06:32.632Z" },
+    { url = "https://files.pythonhosted.org/packages/c4/8f/533075f00aaf19b07c5cd6aa6e5d89424b06b3b3f4583bfa9c640a079059/ruff-0.15.5-py3-none-macosx_11_0_arm64.whl", hash = "sha256:89f463f7c8205a9f8dea9d658d59eff49db05f88f89cc3047fb1a02d9f344010", size = 10184752, upload-time = "2026-03-05T20:06:40.312Z" },
+    { url = "https://files.pythonhosted.org/packages/66/0e/ba49e2c3fa0395b3152bad634c7432f7edfc509c133b8f4529053ff024fb/ruff-0.15.5-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ba786a8295c6574c1116704cf0b9e6563de3432ac888d8f83685654fe528fd65", size = 10534857, upload-time = "2026-03-05T20:06:19.581Z" },
+    { url = "https://files.pythonhosted.org/packages/59/71/39234440f27a226475a0659561adb0d784b4d247dfe7f43ffc12dd02e288/ruff-0.15.5-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:fd4b801e57955fe9f02b31d20375ab3a5c4415f2e5105b79fb94cf2642c91440", size = 10309120, upload-time = "2026-03-05T20:06:00.435Z" },
+    { url = "https://files.pythonhosted.org/packages/f5/87/4140aa86a93df032156982b726f4952aaec4a883bb98cb6ef73c347da253/ruff-0.15.5-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:391f7c73388f3d8c11b794dbbc2959a5b5afe66642c142a6effa90b45f6f5204", size = 11047428, upload-time = "2026-03-05T20:05:51.867Z" },
+    { url = "https://files.pythonhosted.org/packages/5a/f7/4953e7e3287676f78fbe85e3a0ca414c5ca81237b7575bdadc00229ac240/ruff-0.15.5-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8dc18f30302e379fe1e998548b0f5e9f4dff907f52f73ad6da419ea9c19d66c8", size = 11914251, upload-time = "2026-03-05T20:06:22.887Z" },
+    { url = "https://files.pythonhosted.org/packages/77/46/0f7c865c10cf896ccf5a939c3e84e1cfaeed608ff5249584799a74d33835/ruff-0.15.5-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1cc6e7f90087e2d27f98dc34ed1b3ab7c8f0d273cc5431415454e22c0bd2a681", size = 11333801, upload-time = "2026-03-05T20:05:57.168Z" },
+    { url = "https://files.pythonhosted.org/packages/d3/01/a10fe54b653061585e655f5286c2662ebddb68831ed3eaebfb0eb08c0a16/ruff-0.15.5-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c1cb7169f53c1ddb06e71a9aebd7e98fc0fea936b39afb36d8e86d36ecc2636a", size = 11206821, upload-time = "2026-03-05T20:06:03.441Z" },
+    { url = "https://files.pythonhosted.org/packages/7a/0d/2132ceaf20c5e8699aa83da2706ecb5c5dcdf78b453f77edca7fb70f8a93/ruff-0.15.5-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:9b037924500a31ee17389b5c8c4d88874cc6ea8e42f12e9c61a3d754ff72f1ca", size = 11133326, upload-time = "2026-03-05T20:06:25.655Z" },
+    { url = "https://files.pythonhosted.org/packages/72/cb/2e5259a7eb2a0f87c08c0fe5bf5825a1e4b90883a52685524596bfc93072/ruff-0.15.5-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:65bb414e5b4eadd95a8c1e4804f6772bbe8995889f203a01f77ddf2d790929dd", size = 10510820, upload-time = "2026-03-05T20:06:37.79Z" },
+    { url = "https://files.pythonhosted.org/packages/ff/20/b67ce78f9e6c59ffbdb5b4503d0090e749b5f2d31b599b554698a80d861c/ruff-0.15.5-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:d20aa469ae3b57033519c559e9bc9cd9e782842e39be05b50e852c7c981fa01d", size = 10302395, upload-time = "2026-03-05T20:05:54.504Z" },
+    { url = "https://files.pythonhosted.org/packages/5f/e5/719f1acccd31b720d477751558ed74e9c88134adcc377e5e886af89d3072/ruff-0.15.5-py3-none-musllinux_1_2_i686.whl", hash = "sha256:15388dd28c9161cdb8eda68993533acc870aa4e646a0a277aa166de9ad5a8752", size = 10754069, upload-time = "2026-03-05T20:06:06.422Z" },
+    { url = "https://files.pythonhosted.org/packages/c3/9c/d1db14469e32d98f3ca27079dbd30b7b44dbb5317d06ab36718dee3baf03/ruff-0.15.5-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:b30da330cbd03bed0c21420b6b953158f60c74c54c5f4c1dabbdf3a57bf355d2", size = 11304315, upload-time = "2026-03-05T20:06:10.867Z" },
+    { url = "https://files.pythonhosted.org/packages/28/3a/950367aee7c69027f4f422059227b290ed780366b6aecee5de5039d50fa8/ruff-0.15.5-py3-none-win32.whl", hash = "sha256:732e5ee1f98ba5b3679029989a06ca39a950cced52143a0ea82a2102cb592b74", size = 10551676, upload-time = "2026-03-05T20:06:13.705Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/00/bf077a505b4e649bdd3c47ff8ec967735ce2544c8e4a43aba42ee9bf935d/ruff-0.15.5-py3-none-win_amd64.whl", hash = "sha256:821d41c5fa9e19117616c35eaa3f4b75046ec76c65e7ae20a333e9a8696bc7fe", size = 11678972, upload-time = "2026-03-05T20:06:45.379Z" },
+    { url = "https://files.pythonhosted.org/packages/fe/4e/cd76eca6db6115604b7626668e891c9dd03330384082e33662fb0f113614/ruff-0.15.5-py3-none-win_arm64.whl", hash = "sha256:b498d1c60d2fe5c10c45ec3f698901065772730b411f164ae270bb6bfcc4740b", size = 10965572, upload-time = "2026-03-05T20:06:16.984Z" },
+]
+
 [[package]]
 name = "samplics"
 version = "0.4.55"
diff --git a/validation/benefit_validation.py b/validation/benefit_validation.py
index d614ae032..cf4689720 100644
--- a/validation/benefit_validation.py
+++ b/validation/benefit_validation.py
@@ -50,9 +50,7 @@ def analyze_benefit_underreporting():
 
         # Participation
         participants = (benefit > 0).sum()
-        weighted_participants = (
-            (benefit > 0) * weight
-        ).sum() / 1e6  # millions
+        weighted_participants = ((benefit > 0) * weight).sum() / 1e6  # millions
 
         # Underreporting factor
         underreporting = info["admin_total"] / total if total > 0 else np.inf
@@ -168,9 +166,7 @@ def earnings_reform(parameters):
         earnings_change = earnings * pct_increase / 100
         net_change = reformed_net - original_net
 
-        emtr = np.where(
-            earnings_change > 0, 1 - (net_change / earnings_change), 0
-        )
+        emtr = np.where(earnings_change > 0, 1 - (net_change / earnings_change), 0)
 
         # Focus on sample
         sample_emtr = emtr[sample]
@@ -254,9 +250,7 @@ def analyze_aca_subsidies():
             total_ptc = (ptc[mask] * weight[mask]).sum() / 1e9
             recipients = ((ptc > 0) & mask).sum()
             weighted_recipients = (((ptc > 0) & mask) * weight).sum() / 1e6
-            mean_ptc = (
-                ptc[(ptc > 0) & mask].mean() if ((ptc > 0) & mask).any() else 0
-            )
+            mean_ptc = ptc[(ptc > 0) & mask].mean() if ((ptc > 0) & mask).any() else 0
 
             results.append(
                 {
@@ -307,9 +301,7 @@ def generate_benefit_validation_report():
     print("\n\n4. Top 10 States by SNAP Benefits")
     print("-" * 40)
     state_df = validate_state_benefits()
-    top_states = state_df.nlargest(10, "snap_billions")[
-        ["state_code", "snap_billions"]
-    ]
+    top_states = state_df.nlargest(10, "snap_billions")[["state_code", "snap_billions"]]
     print(top_states.to_string(index=False))
 
     # ACA analysis
@@ -319,9 +311,7 @@ def generate_benefit_validation_report():
     print(aca_df.to_string(index=False))
 
     # Save results
-    underreporting_df.to_csv(
-        "validation/benefit_underreporting.csv", index=False
-    )
+    underreporting_df.to_csv("validation/benefit_underreporting.csv", index=False)
     interactions_df.to_csv("validation/program_interactions.csv", index=False)
     emtr_df.to_csv("validation/effective_marginal_tax_rates.csv", index=False)
     state_df.to_csv("validation/state_benefit_totals.csv", index=False)
diff --git a/validation/generate_qrf_statistics.py b/validation/generate_qrf_statistics.py
index 87d43a54a..4015fe1ed 100644
--- a/validation/generate_qrf_statistics.py
+++ b/validation/generate_qrf_statistics.py
@@ -222,18 +222,14 @@
 print(support_df.round(3).to_string())
 
 print("\nSummary:")
-print(
-    f"- Average overlap coefficient: {support_df['overlap_coefficient'].mean():.3f}"
-)
+print(f"- Average overlap coefficient: {support_df['overlap_coefficient'].mean():.3f}")
 print(
     f"- All overlap coefficients > 0.85: {(support_df['overlap_coefficient'] > 0.85).all()}"
 )
 print(
     f"- Variables with SMD > 0.25: {(support_df['standardized_mean_diff'] > 0.25).sum()}"
 )
-print(
-    f"- All SMDs < 0.25: {(support_df['standardized_mean_diff'] < 0.25).all()}"
-)
+print(f"- All SMDs < 0.25: {(support_df['standardized_mean_diff'] < 0.25).all()}")
 print(
     f"- Variables with significant KS test (p<0.05): {(support_df['ks_pvalue'] < 0.05).sum()}"
 )
@@ -243,7 +239,7 @@
 print("\n\n2. VARIANCE EXPLAINED BY PREDICTORS")
 print("-" * 40)
 for var, r2 in variance_explained.items():
-    print(f"- {var.replace('_', ' ').title()}: {r2*100:.0f}%")
+    print(f"- {var.replace('_', ' ').title()}: {r2 * 100:.0f}%")
 
 # 3. Out-of-Sample Accuracy
 print("\n\n3. OUT-OF-SAMPLE PREDICTION ACCURACY")
@@ -279,9 +275,7 @@
 print(
     f"- All correlation differences < 0.05: {(joint_df['correlation_diff'] < 0.05).all()}"
 )
-print(
-    f"- Average correlation difference: {joint_df['correlation_diff'].mean():.3f}"
-)
+print(f"- Average correlation difference: {joint_df['correlation_diff'].mean():.3f}")
 
 # Save all results
 print("\n\nSAVING RESULTS...")
@@ -294,9 +288,7 @@
 )
 
 accuracy_df.to_csv("validation/outputs/qrf_accuracy_metrics.csv")
-print(
-    "✓ Saved accuracy metrics to validation/outputs/qrf_accuracy_metrics.csv"
-)
+print("✓ Saved accuracy metrics to validation/outputs/qrf_accuracy_metrics.csv")
 
 joint_df.to_csv("validation/outputs/joint_distribution_tests.csv", index=False)
 print(
@@ -308,10 +300,8 @@
     f.write("Variance Explained by Predictors (R-squared)\n")
     f.write("=" * 40 + "\n\n")
     for var, r2 in variance_explained.items():
-        f.write(f"{var.replace('_', ' ').title()}: {r2*100:.0f}%\n")
-print(
-    "✓ Saved variance explained to validation/outputs/variance_explained.txt"
-)
+        f.write(f"{var.replace('_', ' ').title()}: {r2 * 100:.0f}%\n")
+print("✓ Saved variance explained to validation/outputs/variance_explained.txt")
 
 # Create summary report
 with open("validation/outputs/qrf_diagnostics_summary.txt", "w") as f:
@@ -327,17 +317,13 @@
     f.write(
         f"All overlap coefficients > 0.85: {(support_df['overlap_coefficient'] > 0.85).all()}\n"
     )
-    f.write(
-        f"All SMDs < 0.25: {(support_df['standardized_mean_diff'] < 0.25).all()}\n"
-    )
-    f.write(
-        f"All KS tests p > 0.05: {(support_df['ks_pvalue'] > 0.05).all()}\n\n"
-    )
+    f.write(f"All SMDs < 0.25: {(support_df['standardized_mean_diff'] < 0.25).all()}\n")
+    f.write(f"All KS tests p > 0.05: {(support_df['ks_pvalue'] > 0.05).all()}\n\n")
 
     f.write("2. VARIANCE EXPLAINED\n")
     f.write("-" * 40 + "\n")
     for var, r2 in variance_explained.items():
-        f.write(f"{var.replace('_', ' ').title()}: {r2*100:.0f}%\n")
+        f.write(f"{var.replace('_', ' ').title()}: {r2 * 100:.0f}%\n")
 
     f.write("\n3. OUT-OF-SAMPLE ACCURACY\n")
     f.write("-" * 40 + "\n")
@@ -361,9 +347,7 @@
     )
 
     f.write("\n" + "=" * 60 + "\n")
-    f.write(
-        "These statistics demonstrate that the QRF methodology successfully:\n"
-    )
+    f.write("These statistics demonstrate that the QRF methodology successfully:\n")
     f.write("- Maintains strong common support between datasets\n")
     f.write("- Achieves high predictive accuracy for imputation\n")
     f.write("- Preserves joint distributions of variables\n")
diff --git a/validation/qrf_diagnostics.py b/validation/qrf_diagnostics.py
index dcd23b5ac..d22f883c1 100644
--- a/validation/qrf_diagnostics.py
+++ b/validation/qrf_diagnostics.py
@@ -28,9 +28,7 @@ def analyze_common_support(cps_data, puf_data, predictors):
 
         # Overlap coefficient (Weitzman 1970)
         # OVL = sum(min(f(x), g(x))) where f,g are densities
-        bins = np.histogram_bin_edges(
-            np.concatenate([cps_dist, puf_dist]), bins=50
-        )
+        bins = np.histogram_bin_edges(np.concatenate([cps_dist, puf_dist]), bins=50)
 
         cps_hist, _ = np.histogram(cps_dist, bins=bins, density=True)
         puf_hist, _ = np.histogram(puf_dist, bins=bins, density=True)
@@ -81,9 +79,7 @@ def validate_qrf_accuracy(puf_data, predictors, target_vars, n_estimators=100):
         )
 
         # Fit QRF
-        qrf = RandomForestQuantileRegressor(
-            n_estimators=n_estimators, random_state=42
-        )
+        qrf = RandomForestQuantileRegressor(n_estimators=n_estimators, random_state=42)
         qrf.fit(X_train, y_train)
 
         # Predictions at multiple quantiles
@@ -92,7 +88,7 @@ def validate_qrf_accuracy(puf_data, predictors, target_vars, n_estimators=100):
 
         for q in quantiles:
             pred = qrf.predict(X_test, quantiles=[q])
-            predictions[f"q{int(q*100)}"] = pred.flatten()
+            predictions[f"q{int(q * 100)}"] = pred.flatten()
 
         # Calculate metrics
         median_pred = predictions["q50"]
@@ -124,9 +120,7 @@ def validate_qrf_accuracy(puf_data, predictors, target_vars, n_estimators=100):
             "qrf_rmse": rmse,
             "hotdeck_mae": hotdeck_mae,
             "linear_mae": lr_mae,
-            "qrf_improvement_vs_hotdeck": (hotdeck_mae - mae)
-            / hotdeck_mae
-            * 100,
+            "qrf_improvement_vs_hotdeck": (hotdeck_mae - mae) / hotdeck_mae * 100,
             "qrf_improvement_vs_linear": (lr_mae - mae) / lr_mae * 100,
             "coverage_90pct": coverage_90,
             "coverage_50pct": coverage_50,
@@ -135,9 +129,7 @@ def validate_qrf_accuracy(puf_data, predictors, target_vars, n_estimators=100):
     return pd.DataFrame(results).T
 
 
-def test_joint_distribution_preservation(
-    original_data, imputed_data, var_pairs
-):
+def test_joint_distribution_preservation(original_data, imputed_data, var_pairs):
     """Test whether joint distributions are preserved in imputation."""
 
     results = []
@@ -159,12 +151,12 @@ def test_joint_distribution_preservation(
 
         # Joint distribution test (2D KS test approximation)
         # Using average of marginal KS statistics
-        ks1 = stats.ks_2samp(
-            original_data[var1].dropna(), imputed_data[var1].dropna()
-        )[0]
-        ks2 = stats.ks_2samp(
-            original_data[var2].dropna(), imputed_data[var2].dropna()
-        )[0]
+        ks1 = stats.ks_2samp(original_data[var1].dropna(), imputed_data[var1].dropna())[
+            0
+        ]
+        ks2 = stats.ks_2samp(original_data[var2].dropna(), imputed_data[var2].dropna())[
+            0
+        ]
         joint_ks = (ks1 + ks2) / 2
 
         results.append(
@@ -281,9 +273,7 @@ def generate_qrf_diagnostic_report(cps_data, puf_data, imputed_data):
     print(
         f"- Average QRF improvement vs linear: {accuracy_df['qrf_improvement_vs_linear'].mean():.1f}%"
     )
-    print(
-        f"- Average 90% coverage: {accuracy_df['coverage_90pct'].mean():.3f}"
-    )
+    print(f"- Average 90% coverage: {accuracy_df['coverage_90pct'].mean():.3f}")
 
     # Joint distribution preservation
     print("\n\n3. Joint Distribution Preservation")
@@ -295,16 +285,12 @@ def generate_qrf_diagnostic_report(cps_data, puf_data, imputed_data):
         ("pension_income", "social_security"),
     ]
 
-    joint_df = test_joint_distribution_preservation(
-        puf_data, imputed_data, var_pairs
-    )
+    joint_df = test_joint_distribution_preservation(puf_data, imputed_data, var_pairs)
     print(joint_df.to_string(index=False))
 
     # Create diagnostic plots
     create_diagnostic_plots(cps_data, puf_data, predictors)
-    print(
-        "\n\nDiagnostic plots saved to validation/common_support_diagnostics.png"
-    )
+    print("\n\nDiagnostic plots saved to validation/common_support_diagnostics.png")
 
     # Save results
     support_df.to_csv("validation/common_support_analysis.csv")
diff --git a/validation/run_qrf_diagnostics.py b/validation/run_qrf_diagnostics.py
index dae400597..b39b16f5b 100644
--- a/validation/run_qrf_diagnostics.py
+++ b/validation/run_qrf_diagnostics.py
@@ -225,7 +225,7 @@ def main():
     for display_name, actual_name in target_map.items():
         if actual_name in variance_results:
             print(
-                f"- {display_name.capitalize()}: {variance_results[actual_name]*100:.0f}%"
+                f"- {display_name.capitalize()}: {variance_results[actual_name] * 100:.0f}%"
             )
 
     # 3. Joint distribution preservation
@@ -281,7 +281,7 @@ def main():
         for display_name, actual_name in target_map.items():
             if actual_name in variance_results:
                 f.write(
-                    f"{display_name.capitalize()}: {variance_results[actual_name]*100:.0f}%\n"
+                    f"{display_name.capitalize()}: {variance_results[actual_name] * 100:.0f}%\n"
                 )
     print(
         "✓ Saved variance explained results to validation/outputs/variance_explained.txt"
@@ -321,7 +321,7 @@ def main():
         for display_name, actual_name in target_map.items():
             if actual_name in variance_results:
                 f.write(
-                    f"{display_name.capitalize()}: {variance_results[actual_name]*100:.0f}%\n"
+                    f"{display_name.capitalize()}: {variance_results[actual_name] * 100:.0f}%\n"
                 )
 
         if valid_pairs:
diff --git a/validation/tax_policy_validation.py b/validation/tax_policy_validation.py
index c7c4f6007..9e04982f1 100644
--- a/validation/tax_policy_validation.py
+++ b/validation/tax_policy_validation.py
@@ -101,9 +101,7 @@ def analyze_high_income_taxpayers():
     for threshold in thresholds:
         count = (weights[agi >= threshold]).sum()
         pct_returns = count / weights.sum() * 100
-        total_agi = (
-            agi[agi >= threshold] * weights[agi >= threshold]
-        ).sum() / 1e9
+        total_agi = (agi[agi >= threshold] * weights[agi >= threshold]).sum() / 1e9
 
         results.append(
             {
@@ -135,9 +133,7 @@ def validate_state_revenues():
 
             results.append({"state_code": state, "revenue_billions": total})
 
-    return pd.DataFrame(results).sort_values(
-        "revenue_billions", ascending=False
-    )
+    return pd.DataFrame(results).sort_values("revenue_billions", ascending=False)
 
 
 def generate_validation_report():
diff --git a/validation/validate_retirement_imputation.py b/validation/validate_retirement_imputation.py
index f57441751..065a82944 100644
--- a/validation/validate_retirement_imputation.py
+++ b/validation/validate_retirement_imputation.py
@@ -54,12 +54,8 @@ def validate_constraints(sim) -> list:
     issues = []
     year = 2024
 
-    emp_income = sim.calculate(
-        "employment_income", year, map_to="person"
-    ).values
-    se_income = sim.calculate(
-        "self_employment_income", year, map_to="person"
-    ).values
+    emp_income = sim.calculate("employment_income", year, map_to="person").values
+    se_income = sim.calculate("self_employment_income", year, map_to="person").values
     age = sim.calculate("age", year, map_to="person").values
 
     catch_up = age >= 50
@@ -79,9 +75,7 @@ def validate_constraints(sim) -> list:
 
         n_over_cap = (vals > max_401k + 1).sum()
         if n_over_cap > 0:
-            issues.append(
-                f"FAIL: {var} has {n_over_cap} values exceeding " f"401k cap"
-            )
+            issues.append(f"FAIL: {var} has {n_over_cap} values exceeding 401k cap")
 
         zero_wage = emp_income == 0
         n_nonzero_no_wage = (vals[zero_wage] > 0).sum()
@@ -110,9 +104,7 @@ def validate_constraints(sim) -> list:
 
         n_over_cap = (vals > max_ira + 1).sum()
         if n_over_cap > 0:
-            issues.append(
-                f"FAIL: {var} has {n_over_cap} values exceeding " f"IRA cap"
-            )
+            issues.append(f"FAIL: {var} has {n_over_cap} values exceeding IRA cap")
 
     # SE pension constraint
     var = "self_employed_pension_contributions"
@@ -141,9 +133,7 @@ def validate_aggregates(sim) -> list:
 
     weight = sim.calculate("person_weight", year).values
 
-    logger.info(
-        "\n%-45s %15s %15s %10s", "Variable", "Weighted Sum", "Target", "Ratio"
-    )
+    logger.info("\n%-45s %15s %15s %10s", "Variable", "Weighted Sum", "Target", "Ratio")
     logger.info("-" * 90)
 
     for var, target in TARGETS.items():
@@ -168,8 +158,8 @@ def validate_aggregates(sim) -> list:
         if ratio < 0.1 or ratio > 5.0:
             issues.append(
                 f"WARNING: {var} weighted sum "
-                f"${weighted_sum/1e9:.1f}B is far from "
-                f"target ${target/1e9:.1f}B "
+                f"${weighted_sum / 1e9:.1f}B is far from "
+                f"target ${target / 1e9:.1f}B "
                 f"(ratio={ratio:.2f})"
             )