From 11b78c3e3a9978a08bf4db794bfb611144e0170b Mon Sep 17 00:00:00 2001
From: Max Ghenis <mghenis@gmail.com>
Date: Mon, 8 Jun 2026 07:07:12 +0200
Subject: [PATCH] Read US scenario inputs at the benchmark year, not the
 dataset period

load_enhanced_cps_person_frame read promptable inputs at the Enhanced CPS
dataset's native period (sim.default_input_period, e.g. 2024) while scenarios
are built and scored at TAX_YEAR (2026). PolicyEngine uprates monetary inputs
across periods, so benchmark incomes sat several percent below their 2026 level
and out of step with the population weights, which are computed at TAX_YEAR. The
UK loader already reads at TAX_YEAR; this makes the US path consistent.

The dataset's native period is still returned as dataset_year for provenance
(the enhanced_cps_<year> artifact label). Reference outputs must be regenerated
for this to take effect in a published run.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 policybench/scenarios.py | 11 +++++++++--
 tests/test_scenarios.py  | 26 ++++++++++++++++++++++++++
 2 files changed, 35 insertions(+), 2 deletions(-)
diff --git a/policybench/scenarios.py b/policybench/scenarios.py
index 584c2bb..b5c6b40 100644
--- a/policybench/scenarios.py
+++ b/policybench/scenarios.py
@@ -590,6 +590,13 @@ def load_enhanced_cps_person_frame() -> tuple[pd.DataFrame, int]:
     from policybench.policyengine_runtime import make_us_microsimulation
 
     sim = make_us_microsimulation()
+    # Keep the dataset's native period for provenance, but read promptable
+    # inputs at the benchmark year (TAX_YEAR) so households are represented at
+    # the year they are scored. PolicyEngine uprates monetary inputs across
+    # periods, so reading at the dataset period (e.g. 2024) leaves incomes
+    # several percent below their TAX_YEAR level and out of step with the
+    # population weights, which are computed at TAX_YEAR. The UK loader already
+    # reads at TAX_YEAR; this keeps the US path consistent.
     dataset_year = sim.default_input_period
     input_specs = get_promptable_input_specs()
 
@@ -598,7 +605,7 @@ def load_enhanced_cps_person_frame() -> tuple[pd.DataFrame, int]:
         values[output_name] = np.asarray(
             sim.calculate(
                 variable_name,
-                dataset_year,
+                TAX_YEAR,
                 map_to="person",
                 use_weights=False,
             )
@@ -608,7 +615,7 @@ def load_enhanced_cps_person_frame() -> tuple[pd.DataFrame, int]:
         values[spec.output_name] = np.asarray(
             sim.calculate(
                 spec.source_name,
-                dataset_year,
+                TAX_YEAR,
                 map_to="person",
                 use_weights=False,
             )
diff --git a/tests/test_scenarios.py b/tests/test_scenarios.py
index 81287dd..17eefd6 100644
--- a/tests/test_scenarios.py
+++ b/tests/test_scenarios.py
@@ -1073,3 +1073,29 @@ def test_sample_household_ids_requires_enough_positive_weight():
     )
     with pytest.raises(ValueError, match="positive sampling weight"):
         _sample_household_ids(eligible, n=2, seed=0)
+
+
+def test_load_enhanced_cps_person_frame_reads_inputs_at_tax_year(monkeypatch):
+    from unittest.mock import MagicMock
+
+    import numpy as np
+
+    import policybench.policyengine_runtime as runtime
+    from policybench.config import TAX_YEAR
+    from policybench.scenarios import load_enhanced_cps_person_frame
+
+    sim = MagicMock()
+    # Dataset's native input period differs from the benchmark year.
+    sim.default_input_period = 2024
+    sim.calculate.return_value = np.array([1.0])
+    monkeypatch.setattr(runtime, "make_us_microsimulation", lambda: sim)
+
+    _frame, dataset_year = load_enhanced_cps_person_frame()
+
+    # Provenance keeps the dataset's native period ...
+    assert dataset_year == 2024
+    # ... but every promptable input is read at the benchmark year (TAX_YEAR),
+    # so households are represented at the year they are scored, not 2024.
+    assert sim.calculate.call_count > 0
+    periods = {call.args[1] for call in sim.calculate.call_args_list}
+    assert periods == {TAX_YEAR}