diff --git a/policybench/scenarios.py b/policybench/scenarios.py index 584c2bb..b5c6b40 100644 --- a/policybench/scenarios.py +++ b/policybench/scenarios.py @@ -590,6 +590,13 @@ def load_enhanced_cps_person_frame() -> tuple[pd.DataFrame, int]: from policybench.policyengine_runtime import make_us_microsimulation sim = make_us_microsimulation() + # Keep the dataset's native period for provenance, but read promptable + # inputs at the benchmark year (TAX_YEAR) so households are represented at + # the year they are scored. PolicyEngine uprates monetary inputs across + # periods, so reading at the dataset period (e.g. 2024) leaves incomes + # several percent below their TAX_YEAR level and out of step with the + # population weights, which are computed at TAX_YEAR. The UK loader already + # reads at TAX_YEAR; this keeps the US path consistent. dataset_year = sim.default_input_period input_specs = get_promptable_input_specs() @@ -598,7 +605,7 @@ def load_enhanced_cps_person_frame() -> tuple[pd.DataFrame, int]: values[output_name] = np.asarray( sim.calculate( variable_name, - dataset_year, + TAX_YEAR, map_to="person", use_weights=False, ) @@ -608,7 +615,7 @@ def load_enhanced_cps_person_frame() -> tuple[pd.DataFrame, int]: values[spec.output_name] = np.asarray( sim.calculate( spec.source_name, - dataset_year, + TAX_YEAR, map_to="person", use_weights=False, ) diff --git a/tests/test_scenarios.py b/tests/test_scenarios.py index 81287dd..17eefd6 100644 --- a/tests/test_scenarios.py +++ b/tests/test_scenarios.py @@ -1073,3 +1073,29 @@ def test_sample_household_ids_requires_enough_positive_weight(): ) with pytest.raises(ValueError, match="positive sampling weight"): _sample_household_ids(eligible, n=2, seed=0) + + +def test_load_enhanced_cps_person_frame_reads_inputs_at_tax_year(monkeypatch): + from unittest.mock import MagicMock + + import numpy as np + + import policybench.policyengine_runtime as runtime + from policybench.config import TAX_YEAR + from policybench.scenarios import load_enhanced_cps_person_frame + + sim = MagicMock() + # Dataset's native input period differs from the benchmark year. + sim.default_input_period = 2024 + sim.calculate.return_value = np.array([1.0]) + monkeypatch.setattr(runtime, "make_us_microsimulation", lambda: sim) + + _frame, dataset_year = load_enhanced_cps_person_frame() + + # Provenance keeps the dataset's native period ... + assert dataset_year == 2024 + # ... but every promptable input is read at the benchmark year (TAX_YEAR), + # so households are represented at the year they are scored, not 2024. + assert sim.calculate.call_count > 0 + periods = {call.args[1] for call in sim.calculate.call_args_list} + assert periods == {TAX_YEAR}