From abb31767a4d4881fa7b0d8c8fecaa24033c0ec87 Mon Sep 17 00:00:00 2001 From: Daphne Hansell <128793799+daphnehanse11@users.noreply.github.com> Date: Tue, 28 Apr 2026 12:54:10 -0400 Subject: [PATCH 1/8] Residualize modeled health premiums --- changelog.d/8089.fixed.md | 1 + policyengine_us_data/datasets/cps/cps.py | 116 ++++++++++++++++++ .../test_health_premium_residualization.py | 47 +++++++ 3 files changed, 164 insertions(+) create mode 100644 changelog.d/8089.fixed.md create mode 100644 tests/unit/datasets/test_health_premium_residualization.py diff --git a/changelog.d/8089.fixed.md b/changelog.d/8089.fixed.md new file mode 100644 index 000000000..667257e07 --- /dev/null +++ b/changelog.d/8089.fixed.md @@ -0,0 +1 @@ +Added a CPS health-premium residualization path for baseline computed premiums. diff --git a/policyengine_us_data/datasets/cps/cps.py b/policyengine_us_data/datasets/cps/cps.py index d12ba7eef..d97352eda 100644 --- a/policyengine_us_data/datasets/cps/cps.py +++ b/policyengine_us_data/datasets/cps/cps.py @@ -193,6 +193,8 @@ def generate(self): add_takeup(self) logging.info("Imputing Marketplace plan benchmark ratio") add_marketplace_plan_benchmark_ratio(self) + logging.info("Residualizing imputed health premium components") + residualize_modeled_health_premium_components(self) logging.info("Downsampling") # Downsample @@ -519,6 +521,120 @@ def add_marketplace_plan_benchmark_ratio(self): self.save_dataset(data) +MODELED_PREMIUM_RESIDUALIZATION_TARGETS = { + "health_insurance_premiums_without_medicare_part_b": ( + "chip_premium", + "marketplace_net_premium", + "medicaid_premium", + ), + "medicare_part_b_premiums_reported": ("income_adjusted_part_b_premium",), +} + + +def residualize_modeled_health_premium_components(self): + """Subtract baseline computed premiums from imputed premium inputs. + + The model adds computed premiums back in SPM MOOP, so CPS-reported + premium inputs need to carry only the residual not explained by baseline + computed premiums. Variables are version-gated because the data package + may be built against a policyengine-us release before a modeled premium + variable exists. + """ + from policyengine_us import Microsimulation + + data = self.load_dataset() + baseline = Microsimulation(dataset=self) + tbs = baseline.tax_benefit_system + period = self.time_period + changed = False + + for target, premium_variables in MODELED_PREMIUM_RESIDUALIZATION_TARGETS.items(): + if target not in data: + continue + + computed_premium = np.zeros(len(data[target]), dtype=float) + available_variables = [ + variable for variable in premium_variables if variable in tbs.variables + ] + for variable in available_variables: + values = np.asarray( + baseline.calculate(variable, period=period).values, + dtype=float, + ) + computed_premium += _premium_values_to_person( + data=data, + source_entity=tbs.variables[variable].entity.key, + values=values, + ) + + if available_variables: + data[target] = compute_premium_residual( + reported_premium=data[target], + baseline_computed_premium=computed_premium, + ) + logging.info( + "Residualized %s by subtracting baseline computed premiums: %s", + target, + ", ".join(available_variables), + ) + changed = True + + if changed: + self.save_dataset(data) + + +def compute_premium_residual( + reported_premium: np.ndarray, + baseline_computed_premium: np.ndarray, +) -> np.ndarray: + """Return the imputed premium residual after baseline computed premiums.""" + return np.asarray(reported_premium, dtype=float) - np.asarray( + baseline_computed_premium, dtype=float + ) + + +def _premium_values_to_person( + data: dict, + source_entity: str, + values: np.ndarray, +) -> np.ndarray: + """Map computed premiums to person rows for person-level residualization.""" + person_ids = data["person_id"] + if source_entity == "person": + if len(values) != len(person_ids): + raise ValueError( + "Person-level computed premium length does not match person rows: " + f"got {len(values)}, expected {len(person_ids)}." + ) + return values + + entity_id_variable = f"{source_entity}_id" + person_entity_id_variable = f"person_{source_entity}_id" + if entity_id_variable not in data or person_entity_id_variable not in data: + raise ValueError( + f"Cannot allocate {source_entity}-level premiums to people: missing " + f"{entity_id_variable} or {person_entity_id_variable}." + ) + + entity_ids = data[entity_id_variable] + person_entity_ids = data[person_entity_id_variable] + if len(values) != len(entity_ids): + raise ValueError( + f"{source_entity}-level computed premium length does not match " + f"{source_entity} rows: got {len(values)}, expected {len(entity_ids)}." + ) + + entity_position = {entity_id: index for index, entity_id in enumerate(entity_ids)} + allocated = np.zeros(len(person_ids), dtype=float) + seen_entities = set() + for person_index, entity_id in enumerate(person_entity_ids): + if entity_id in seen_entities: + continue + allocated[person_index] = values[entity_position[entity_id]] + seen_entities.add(entity_id) + return allocated + + MARKETPLACE_PLAN_BENCHMARK_RATIO_MIN = 0.5 MARKETPLACE_PLAN_BENCHMARK_RATIO_MAX = 1.5 diff --git a/tests/unit/datasets/test_health_premium_residualization.py b/tests/unit/datasets/test_health_premium_residualization.py new file mode 100644 index 000000000..e142754b5 --- /dev/null +++ b/tests/unit/datasets/test_health_premium_residualization.py @@ -0,0 +1,47 @@ +import numpy as np + +from policyengine_us_data.datasets.cps.cps import ( + _premium_values_to_person, + compute_premium_residual, +) + + +def test_premium_residual_subtracts_computed_premiums() -> None: + reported = np.array([500.0, 200.0, 50.0]) + computed = np.array([125.0, 250.0, 0.0]) + + result = compute_premium_residual( + reported_premium=reported, + baseline_computed_premium=computed, + ) + + np.testing.assert_allclose(result, [375.0, -50.0, 50.0]) + + +def test_tax_unit_premiums_allocate_to_first_person_only() -> None: + data = { + "person_id": np.array([1, 2, 3, 4]), + "tax_unit_id": np.array([10, 20]), + "person_tax_unit_id": np.array([10, 10, 20, 20]), + } + + result = _premium_values_to_person( + data=data, + source_entity="tax_unit", + values=np.array([300.0, 800.0]), + ) + + np.testing.assert_allclose(result, [300.0, 0.0, 800.0, 0.0]) + + +def test_person_premiums_pass_through_to_person_rows() -> None: + data = {"person_id": np.array([1, 2, 3])} + values = np.array([100.0, 200.0, 300.0]) + + result = _premium_values_to_person( + data=data, + source_entity="person", + values=values, + ) + + np.testing.assert_allclose(result, values) From 188f5c1d14f587a9fbe1a7a65b836a9a8f822b77 Mon Sep 17 00:00:00 2001 From: Max Ghenis Date: Tue, 28 Apr 2026 19:48:32 -0400 Subject: [PATCH 2/8] Store residual health insurance premiums --- changelog.d/8089.fixed.md | 2 +- policyengine_us_data/datasets/cps/cps.py | 59 +++++++++++-------- .../datasets/cps/extended_cps.py | 1 + .../test_health_premium_residualization.py | 12 ++++ 4 files changed, 47 insertions(+), 27 deletions(-) diff --git a/changelog.d/8089.fixed.md b/changelog.d/8089.fixed.md index 667257e07..1942dcfd2 100644 --- a/changelog.d/8089.fixed.md +++ b/changelog.d/8089.fixed.md @@ -1 +1 @@ -Added a CPS health-premium residualization path for baseline computed premiums. +Added a CPS health-premium residual input for baseline computed premiums. diff --git a/policyengine_us_data/datasets/cps/cps.py b/policyengine_us_data/datasets/cps/cps.py index d97352eda..f648a6167 100644 --- a/policyengine_us_data/datasets/cps/cps.py +++ b/policyengine_us_data/datasets/cps/cps.py @@ -522,22 +522,26 @@ def add_marketplace_plan_benchmark_ratio(self): MODELED_PREMIUM_RESIDUALIZATION_TARGETS = { - "health_insurance_premiums_without_medicare_part_b": ( - "chip_premium", - "marketplace_net_premium", - "medicaid_premium", - ), - "medicare_part_b_premiums_reported": ("income_adjusted_part_b_premium",), + "health_insurance_premium_residual": { + "reported_variable": "health_insurance_premiums_without_medicare_part_b", + "modeled_variables": ( + "chip_premium", + "marketplace_net_premium", + "medicaid_premium", + ), + }, } def residualize_modeled_health_premium_components(self): - """Subtract baseline computed premiums from imputed premium inputs. - - The model adds computed premiums back in SPM MOOP, so CPS-reported - premium inputs need to carry only the residual not explained by baseline - computed premiums. Variables are version-gated because the data package - may be built against a policyengine-us release before a modeled premium + """Create residual premium inputs net of baseline computed premiums. + + The SPM model adds computed premiums back explicitly, so it needs a + separate residual premium input for the parts of CPS-reported premiums not + explained by baseline computed premiums. The original CPS-reported premium + inputs remain unchanged for consumers that use reported medical expenses + directly. Variables are version-gated because the data package may be built + against a policyengine-us release before a residual or modeled premium variable exists. """ from policyengine_us import Microsimulation @@ -548,11 +552,14 @@ def residualize_modeled_health_premium_components(self): period = self.time_period changed = False - for target, premium_variables in MODELED_PREMIUM_RESIDUALIZATION_TARGETS.items(): - if target not in data: + for output_variable, config in MODELED_PREMIUM_RESIDUALIZATION_TARGETS.items(): + reported_variable = config["reported_variable"] + premium_variables = config["modeled_variables"] + + if reported_variable not in data or output_variable not in tbs.variables: continue - computed_premium = np.zeros(len(data[target]), dtype=float) + computed_premium = np.zeros(len(data[reported_variable]), dtype=float) available_variables = [ variable for variable in premium_variables if variable in tbs.variables ] @@ -567,17 +574,17 @@ def residualize_modeled_health_premium_components(self): values=values, ) - if available_variables: - data[target] = compute_premium_residual( - reported_premium=data[target], - baseline_computed_premium=computed_premium, - ) - logging.info( - "Residualized %s by subtracting baseline computed premiums: %s", - target, - ", ".join(available_variables), - ) - changed = True + data[output_variable] = compute_premium_residual( + reported_premium=data[reported_variable], + baseline_computed_premium=computed_premium, + ) + logging.info( + "Created %s from %s by subtracting baseline computed premiums: %s", + output_variable, + reported_variable, + ", ".join(available_variables) if available_variables else "none", + ) + changed = True if changed: self.save_dataset(data) diff --git a/policyengine_us_data/datasets/cps/extended_cps.py b/policyengine_us_data/datasets/cps/extended_cps.py index 53a6ceefe..be674f89c 100644 --- a/policyengine_us_data/datasets/cps/extended_cps.py +++ b/policyengine_us_data/datasets/cps/extended_cps.py @@ -151,6 +151,7 @@ def _supports_structural_mortgage_inputs() -> bool: "spm_unit_pre_subsidy_childcare_expenses", # Medical expenses "health_insurance_premiums_without_medicare_part_b", + "health_insurance_premium_residual", "over_the_counter_health_expenses", "other_medical_expenses", "child_support_expense", diff --git a/tests/unit/datasets/test_health_premium_residualization.py b/tests/unit/datasets/test_health_premium_residualization.py index e142754b5..92080b562 100644 --- a/tests/unit/datasets/test_health_premium_residualization.py +++ b/tests/unit/datasets/test_health_premium_residualization.py @@ -18,6 +18,18 @@ def test_premium_residual_subtracts_computed_premiums() -> None: np.testing.assert_allclose(result, [375.0, -50.0, 50.0]) +def test_premium_residual_preserves_reported_input() -> None: + reported = np.array([500.0, 200.0]) + computed = np.array([125.0, 250.0]) + + _ = compute_premium_residual( + reported_premium=reported, + baseline_computed_premium=computed, + ) + + np.testing.assert_allclose(reported, [500.0, 200.0]) + + def test_tax_unit_premiums_allocate_to_first_person_only() -> None: data = { "person_id": np.array([1, 2, 3, 4]), From af238d8142fa30cb3e489f0acac049bf026854d3 Mon Sep 17 00:00:00 2001 From: Max Ghenis Date: Wed, 29 Apr 2026 06:01:54 -0400 Subject: [PATCH 3/8] Rename other health insurance premium input --- changelog.d/8089.fixed.md | 2 +- .../calibration/target_config.yaml | 2 +- policyengine_us_data/datasets/cps/cps.py | 41 ++++++++----------- .../datasets/cps/extended_cps.py | 8 +--- policyengine_us_data/datasets/puf/puf.py | 2 +- .../db/etl_national_targets.py | 2 +- .../pull_hardcoded_targets.py | 2 +- policyengine_us_data/utils/loss.py | 4 +- policyengine_us_data/utils/policyengine.py | 6 --- ...> test_other_health_insurance_premiums.py} | 10 ++--- tests/unit/test_medical_expense_inputs.py | 18 ++++++++ tests/unit/test_medicare_part_b_inputs.py | 27 ------------ 12 files changed, 49 insertions(+), 75 deletions(-) rename tests/unit/datasets/{test_health_premium_residualization.py => test_other_health_insurance_premiums.py} (81%) create mode 100644 tests/unit/test_medical_expense_inputs.py delete mode 100644 tests/unit/test_medicare_part_b_inputs.py diff --git a/changelog.d/8089.fixed.md b/changelog.d/8089.fixed.md index 1942dcfd2..7828e3505 100644 --- a/changelog.d/8089.fixed.md +++ b/changelog.d/8089.fixed.md @@ -1 +1 @@ -Added a CPS health-premium residual input for baseline computed premiums. +Added other health insurance premiums as the non-Medicare premium category not covered by modeled Marketplace, CHIP, or Medicaid premiums. diff --git a/policyengine_us_data/calibration/target_config.yaml b/policyengine_us_data/calibration/target_config.yaml index 154dcf878..4b0702898 100644 --- a/policyengine_us_data/calibration/target_config.yaml +++ b/policyengine_us_data/calibration/target_config.yaml @@ -114,7 +114,7 @@ include: geo_level: national - variable: medicaid geo_level: national - - variable: medicare_part_b_premiums + - variable: medicare_part_b_premium geo_level: national - variable: other_medical_expenses geo_level: national diff --git a/policyengine_us_data/datasets/cps/cps.py b/policyengine_us_data/datasets/cps/cps.py index f648a6167..e3099bf56 100644 --- a/policyengine_us_data/datasets/cps/cps.py +++ b/policyengine_us_data/datasets/cps/cps.py @@ -52,7 +52,6 @@ ) from policyengine_us_data.utils.policyengine import ( supports_medicare_enrollment_input, - supports_modeled_medicare_part_b_inputs, ) @@ -193,8 +192,8 @@ def generate(self): add_takeup(self) logging.info("Imputing Marketplace plan benchmark ratio") add_marketplace_plan_benchmark_ratio(self) - logging.info("Residualizing imputed health premium components") - residualize_modeled_health_premium_components(self) + logging.info("Deriving other health insurance premiums") + derive_other_health_insurance_premiums(self) logging.info("Downsampling") # Downsample @@ -521,8 +520,8 @@ def add_marketplace_plan_benchmark_ratio(self): self.save_dataset(data) -MODELED_PREMIUM_RESIDUALIZATION_TARGETS = { - "health_insurance_premium_residual": { +OTHER_HEALTH_INSURANCE_PREMIUM_TARGETS = { + "other_health_insurance_premiums": { "reported_variable": "health_insurance_premiums_without_medicare_part_b", "modeled_variables": ( "chip_premium", @@ -533,16 +532,16 @@ def add_marketplace_plan_benchmark_ratio(self): } -def residualize_modeled_health_premium_components(self): - """Create residual premium inputs net of baseline computed premiums. +def derive_other_health_insurance_premiums(self): + """Create other premium inputs net of baseline computed premiums. - The SPM model adds computed premiums back explicitly, so it needs a - separate residual premium input for the parts of CPS-reported premiums not - explained by baseline computed premiums. The original CPS-reported premium - inputs remain unchanged for consumers that use reported medical expenses - directly. Variables are version-gated because the data package may be built - against a policyengine-us release before a residual or modeled premium - variable exists. + The model adds computed premiums back explicitly, so it needs a separate + other-premium input for the parts of CPS-reported non-Medicare premiums + not explained by baseline computed Marketplace, CHIP, or Medicaid + premiums. The original CPS-reported premium inputs remain unchanged as raw + source fields. Variables are version-gated because the data package may be + built against a policyengine-us release before a modeled premium variable + exists. """ from policyengine_us import Microsimulation @@ -552,7 +551,7 @@ def residualize_modeled_health_premium_components(self): period = self.time_period changed = False - for output_variable, config in MODELED_PREMIUM_RESIDUALIZATION_TARGETS.items(): + for output_variable, config in OTHER_HEALTH_INSURANCE_PREMIUM_TARGETS.items(): reported_variable = config["reported_variable"] premium_variables = config["modeled_variables"] @@ -574,7 +573,7 @@ def residualize_modeled_health_premium_components(self): values=values, ) - data[output_variable] = compute_premium_residual( + data[output_variable] = compute_other_health_insurance_premiums( reported_premium=data[reported_variable], baseline_computed_premium=computed_premium, ) @@ -590,11 +589,11 @@ def residualize_modeled_health_premium_components(self): self.save_dataset(data) -def compute_premium_residual( +def compute_other_health_insurance_premiums( reported_premium: np.ndarray, baseline_computed_premium: np.ndarray, ) -> np.ndarray: - """Return the imputed premium residual after baseline computed premiums.""" + """Return other premiums after subtracting baseline computed premiums.""" return np.asarray(reported_premium, dtype=float) - np.asarray( baseline_computed_premium, dtype=float ) @@ -605,7 +604,7 @@ def _premium_values_to_person( source_entity: str, values: np.ndarray, ) -> np.ndarray: - """Map computed premiums to person rows for person-level residualization.""" + """Map computed premiums to person rows for person-level premium accounting.""" person_ids = data["person_id"] if source_entity == "person": if len(values) != len(person_ids): @@ -1134,10 +1133,6 @@ def add_personal_income_variables(cps: h5py.File, person: DataFrame, year: int): cps["other_medical_expenses"] = person.PMED_VAL if supports_medicare_enrollment_input(): cps["medicare_enrolled"] = person.MCARE == 1 - if supports_modeled_medicare_part_b_inputs(): - cps["medicare_part_b_premiums_reported"] = person.PEMCPREM - else: - cps["medicare_part_b_premiums"] = person.PEMCPREM # Get QBI simulation parameters --- yamlfilename = ( diff --git a/policyengine_us_data/datasets/cps/extended_cps.py b/policyengine_us_data/datasets/cps/extended_cps.py index be674f89c..061dc5d9d 100644 --- a/policyengine_us_data/datasets/cps/extended_cps.py +++ b/policyengine_us_data/datasets/cps/extended_cps.py @@ -19,9 +19,6 @@ impute_tax_unit_mortgage_balance_hints, ) from policyengine_us_data.utils.policyengine import has_policyengine_us_variables -from policyengine_us_data.utils.policyengine import ( - supports_modeled_medicare_part_b_inputs, -) from policyengine_us_data.utils.retirement_limits import ( get_retirement_limits, get_se_pension_limits, @@ -151,7 +148,7 @@ def _supports_structural_mortgage_inputs() -> bool: "spm_unit_pre_subsidy_childcare_expenses", # Medical expenses "health_insurance_premiums_without_medicare_part_b", - "health_insurance_premium_residual", + "other_health_insurance_premiums", "over_the_counter_health_expenses", "other_medical_expenses", "child_support_expense", @@ -167,9 +164,6 @@ def _supports_structural_mortgage_inputs() -> bool: "self_employment_income_last_year", ] -if not supports_modeled_medicare_part_b_inputs(): - CPS_ONLY_IMPUTED_VARIABLES.append("medicare_part_b_premiums") - # Set for O(1) lookup in the splice loop. _CPS_ONLY_SET = set(CPS_ONLY_IMPUTED_VARIABLES) diff --git a/policyengine_us_data/datasets/puf/puf.py b/policyengine_us_data/datasets/puf/puf.py index dc89c4a9a..12c09fc66 100644 --- a/policyengine_us_data/datasets/puf/puf.py +++ b/policyengine_us_data/datasets/puf/puf.py @@ -984,7 +984,7 @@ class PUF_2024(PUF): "health_insurance_premiums_without_medicare_part_b": 0.453, "other_medical_expenses": 0.325, "over_the_counter_health_expenses": 0.085, - "medicare_part_b_premiums": 0.137, + "medicare_part_b_premium": 0.137, } if __name__ == "__main__": diff --git a/policyengine_us_data/db/etl_national_targets.py b/policyengine_us_data/db/etl_national_targets.py index a671daedb..9c882cf1b 100644 --- a/policyengine_us_data/db/etl_national_targets.py +++ b/policyengine_us_data/db/etl_national_targets.py @@ -155,7 +155,7 @@ def extract_national_targets(year: int = DEFAULT_YEAR): "year": 2024, }, { - "variable": "medicare_part_b_premiums", + "variable": "medicare_part_b_premium", "value": get_beneficiary_paid_medicare_part_b_premiums_target(2024), "source": get_beneficiary_paid_medicare_part_b_premiums_source(2024), "notes": get_beneficiary_paid_medicare_part_b_premiums_notes(2024), diff --git a/policyengine_us_data/storage/calibration_targets/pull_hardcoded_targets.py b/policyengine_us_data/storage/calibration_targets/pull_hardcoded_targets.py index 16e92ea01..cf37f9496 100644 --- a/policyengine_us_data/storage/calibration_targets/pull_hardcoded_targets.py +++ b/policyengine_us_data/storage/calibration_targets/pull_hardcoded_targets.py @@ -9,7 +9,7 @@ HARD_CODED_TOTALS = { "health_insurance_premiums_without_medicare_part_b": 385e9, "other_medical_expenses": 278e9, - "medicare_part_b_premiums": 112e9, + "medicare_part_b_premium": 112e9, "over_the_counter_health_expenses": 72e9, "spm_unit_spm_threshold": 3_945e9, "child_support_expense": 33e9, diff --git a/policyengine_us_data/utils/loss.py b/policyengine_us_data/utils/loss.py index ce71696cc..177935b7f 100644 --- a/policyengine_us_data/utils/loss.py +++ b/policyengine_us_data/utils/loss.py @@ -29,7 +29,7 @@ HARD_CODED_TOTALS = { "health_insurance_premiums_without_medicare_part_b": 385e9, "other_medical_expenses": 278e9, - "medicare_part_b_premiums": get_beneficiary_paid_medicare_part_b_premiums_target( + "medicare_part_b_premium": get_beneficiary_paid_medicare_part_b_premiums_target( 2024 ), "over_the_counter_health_expenses": 72e9, @@ -855,7 +855,7 @@ def build_loss_matrix(dataset: type, time_period): "health_insurance_premiums_without_medicare_part_b", "over_the_counter_health_expenses", "other_medical_expenses", - "medicare_part_b_premiums", + "medicare_part_b_premium", ]: label = f"nation/census/{expense_type}/{label_suffix}" value = sim.calculate(expense_type).values diff --git a/policyengine_us_data/utils/policyengine.py b/policyengine_us_data/utils/policyengine.py index 869b95d9a..ab870b1f7 100644 --- a/policyengine_us_data/utils/policyengine.py +++ b/policyengine_us_data/utils/policyengine.py @@ -138,9 +138,3 @@ def has_policyengine_us_variables(*variables: str) -> bool: def supports_medicare_enrollment_input() -> bool: return has_policyengine_us_variables("medicare_enrolled") - - -def supports_modeled_medicare_part_b_inputs() -> bool: - return has_policyengine_us_variables( - "medicare_part_b_premiums_reported", - ) diff --git a/tests/unit/datasets/test_health_premium_residualization.py b/tests/unit/datasets/test_other_health_insurance_premiums.py similarity index 81% rename from tests/unit/datasets/test_health_premium_residualization.py rename to tests/unit/datasets/test_other_health_insurance_premiums.py index 92080b562..97e28ef59 100644 --- a/tests/unit/datasets/test_health_premium_residualization.py +++ b/tests/unit/datasets/test_other_health_insurance_premiums.py @@ -2,15 +2,15 @@ from policyengine_us_data.datasets.cps.cps import ( _premium_values_to_person, - compute_premium_residual, + compute_other_health_insurance_premiums, ) -def test_premium_residual_subtracts_computed_premiums() -> None: +def test_other_health_insurance_premiums_subtracts_computed_premiums() -> None: reported = np.array([500.0, 200.0, 50.0]) computed = np.array([125.0, 250.0, 0.0]) - result = compute_premium_residual( + result = compute_other_health_insurance_premiums( reported_premium=reported, baseline_computed_premium=computed, ) @@ -18,11 +18,11 @@ def test_premium_residual_subtracts_computed_premiums() -> None: np.testing.assert_allclose(result, [375.0, -50.0, 50.0]) -def test_premium_residual_preserves_reported_input() -> None: +def test_other_health_insurance_premiums_preserves_reported_input() -> None: reported = np.array([500.0, 200.0]) computed = np.array([125.0, 250.0]) - _ = compute_premium_residual( + _ = compute_other_health_insurance_premiums( reported_premium=reported, baseline_computed_premium=computed, ) diff --git a/tests/unit/test_medical_expense_inputs.py b/tests/unit/test_medical_expense_inputs.py new file mode 100644 index 000000000..f71c0bf93 --- /dev/null +++ b/tests/unit/test_medical_expense_inputs.py @@ -0,0 +1,18 @@ +from policyengine_us_data.datasets.puf.puf import ( + MEDICAL_EXPENSE_CATEGORY_BREAKDOWNS, +) +from policyengine_us_data.utils import policyengine as policyengine_utils + + +def test_puf_medical_breakdown_still_sums_to_one(): + assert sum(MEDICAL_EXPENSE_CATEGORY_BREAKDOWNS.values()) == 1.0 + + +def test_supports_medicare_enrollment_input_allows_partial_support(monkeypatch): + monkeypatch.setattr( + policyengine_utils, + "has_policyengine_us_variables", + lambda *variables: variables == ("medicare_enrolled",), + ) + + assert policyengine_utils.supports_medicare_enrollment_input() is True diff --git a/tests/unit/test_medicare_part_b_inputs.py b/tests/unit/test_medicare_part_b_inputs.py deleted file mode 100644 index c69e88789..000000000 --- a/tests/unit/test_medicare_part_b_inputs.py +++ /dev/null @@ -1,27 +0,0 @@ -from policyengine_us_data.datasets.cps.extended_cps import ( - CPS_ONLY_IMPUTED_VARIABLES, - supports_modeled_medicare_part_b_inputs, -) -from policyengine_us_data.datasets.puf.puf import MEDICAL_EXPENSE_CATEGORY_BREAKDOWNS -from policyengine_us_data.utils import policyengine as policyengine_utils - - -def test_medicare_part_b_clone_imputation_matches_installed_model_support(): - assert ("medicare_part_b_premiums" in set(CPS_ONLY_IMPUTED_VARIABLES)) is ( - not supports_modeled_medicare_part_b_inputs() - ) - - -def test_puf_medical_breakdown_still_sums_to_one(): - assert sum(MEDICAL_EXPENSE_CATEGORY_BREAKDOWNS.values()) == 1.0 - - -def test_supports_medicare_enrollment_input_allows_partial_support(monkeypatch): - monkeypatch.setattr( - policyengine_utils, - "has_policyengine_us_variables", - lambda *variables: variables == ("medicare_enrolled",), - ) - - assert policyengine_utils.supports_medicare_enrollment_input() is True - assert policyengine_utils.supports_modeled_medicare_part_b_inputs() is False From f21f0f91c575f4e02b76933639e8ec89253e9dd8 Mon Sep 17 00:00:00 2001 From: Max Ghenis Date: Wed, 29 Apr 2026 06:17:25 -0400 Subject: [PATCH 4/8] Gate Part B target name on installed model --- .../calibration/target_config.yaml | 2 ++ policyengine_us_data/datasets/puf/puf.py | 7 +++-- .../db/etl_national_targets.py | 5 +++- .../pull_hardcoded_targets.py | 5 +++- policyengine_us_data/utils/loss.py | 25 ++++++++++++------ policyengine_us_data/utils/policyengine.py | 6 +++++ tests/unit/test_medical_expense_inputs.py | 26 +++++++++++++++++++ 7 files changed, 64 insertions(+), 12 deletions(-) diff --git a/policyengine_us_data/calibration/target_config.yaml b/policyengine_us_data/calibration/target_config.yaml index 4b0702898..e1a916aab 100644 --- a/policyengine_us_data/calibration/target_config.yaml +++ b/policyengine_us_data/calibration/target_config.yaml @@ -116,6 +116,8 @@ include: geo_level: national - variable: medicare_part_b_premium geo_level: national + - variable: medicare_part_b_premiums + geo_level: national - variable: other_medical_expenses geo_level: national - variable: over_the_counter_health_expenses diff --git a/policyengine_us_data/datasets/puf/puf.py b/policyengine_us_data/datasets/puf/puf.py index 12c09fc66..8c0587bbc 100644 --- a/policyengine_us_data/datasets/puf/puf.py +++ b/policyengine_us_data/datasets/puf/puf.py @@ -17,7 +17,10 @@ STRUCTURAL_MORTGAGE_VARIABLES, convert_mortgage_interest_to_structural_inputs, ) -from policyengine_us_data.utils.policyengine import has_policyengine_us_variables +from policyengine_us_data.utils.policyengine import ( + has_policyengine_us_variables, + medicare_part_b_premium_variable_name, +) from policyengine_us_data.utils.uprating import ( create_policyengine_uprating_factors_table, ) @@ -984,7 +987,7 @@ class PUF_2024(PUF): "health_insurance_premiums_without_medicare_part_b": 0.453, "other_medical_expenses": 0.325, "over_the_counter_health_expenses": 0.085, - "medicare_part_b_premium": 0.137, + medicare_part_b_premium_variable_name(): 0.137, } if __name__ == "__main__": diff --git a/policyengine_us_data/db/etl_national_targets.py b/policyengine_us_data/db/etl_national_targets.py index 9c882cf1b..57cbe8451 100644 --- a/policyengine_us_data/db/etl_national_targets.py +++ b/policyengine_us_data/db/etl_national_targets.py @@ -21,6 +21,9 @@ DEFAULT_YEAR, etl_argparser, ) +from policyengine_us_data.utils.policyengine import ( + medicare_part_b_premium_variable_name, +) def extract_national_targets(year: int = DEFAULT_YEAR): @@ -155,7 +158,7 @@ def extract_national_targets(year: int = DEFAULT_YEAR): "year": 2024, }, { - "variable": "medicare_part_b_premium", + "variable": medicare_part_b_premium_variable_name(), "value": get_beneficiary_paid_medicare_part_b_premiums_target(2024), "source": get_beneficiary_paid_medicare_part_b_premiums_source(2024), "notes": get_beneficiary_paid_medicare_part_b_premiums_notes(2024), diff --git a/policyengine_us_data/storage/calibration_targets/pull_hardcoded_targets.py b/policyengine_us_data/storage/calibration_targets/pull_hardcoded_targets.py index cf37f9496..0f74dbcd0 100644 --- a/policyengine_us_data/storage/calibration_targets/pull_hardcoded_targets.py +++ b/policyengine_us_data/storage/calibration_targets/pull_hardcoded_targets.py @@ -1,6 +1,9 @@ import pandas as pd import numpy as np from policyengine_us_data.storage import CALIBRATION_FOLDER +from policyengine_us_data.utils.policyengine import ( + medicare_part_b_premium_variable_name, +) """ Hardcoded targets for the year 2024 from CPS-derived statistics and other sources. Include medical expenses, sum of SPM thresholds, and child support expenses. @@ -9,7 +12,7 @@ HARD_CODED_TOTALS = { "health_insurance_premiums_without_medicare_part_b": 385e9, "other_medical_expenses": 278e9, - "medicare_part_b_premium": 112e9, + medicare_part_b_premium_variable_name(): 112e9, "over_the_counter_health_expenses": 72e9, "spm_unit_spm_threshold": 3_945e9, "child_support_expense": 33e9, diff --git a/policyengine_us_data/utils/loss.py b/policyengine_us_data/utils/loss.py index 177935b7f..904bc54c4 100644 --- a/policyengine_us_data/utils/loss.py +++ b/policyengine_us_data/utils/loss.py @@ -16,9 +16,15 @@ get_beneficiary_paid_medicare_part_b_premiums_target, ) from policyengine_us_data.db.etl_irs_soi import get_national_geography_soi_target +from policyengine_us_data.utils.policyengine import ( + medicare_part_b_premium_variable_name, +) from policyengine_core.reforms import Reform from policyengine_us_data.utils.soi import pe_to_soi, get_soi + +MEDICARE_PART_B_PREMIUM_VARIABLE = medicare_part_b_premium_variable_name() + # National calibration targets consumed by build_loss_matrix(). # These values are specific to 2024 — they should NOT be applied to # other years without re-sourcing. They are duplicated in @@ -29,8 +35,8 @@ HARD_CODED_TOTALS = { "health_insurance_premiums_without_medicare_part_b": 385e9, "other_medical_expenses": 278e9, - "medicare_part_b_premium": get_beneficiary_paid_medicare_part_b_premiums_target( - 2024 + MEDICARE_PART_B_PREMIUM_VARIABLE: ( + get_beneficiary_paid_medicare_part_b_premiums_target(2024) ), "over_the_counter_health_expenses": 72e9, "spm_unit_spm_threshold": 3_945e9, @@ -851,18 +857,21 @@ def build_loss_matrix(dataset: type, time_period): else: in_age_range = (age >= age_lower_bound) * (age < age_lower_bound + 10) label_suffix = f"age_{age_lower_bound}_to_{age_lower_bound + 9}" - for expense_type in [ - "health_insurance_premiums_without_medicare_part_b", - "over_the_counter_health_expenses", - "other_medical_expenses", - "medicare_part_b_premium", + for expense_type, target_column in [ + ( + "health_insurance_premiums_without_medicare_part_b", + "health_insurance_premiums_without_medicare_part_b", + ), + ("over_the_counter_health_expenses", "over_the_counter_health_expenses"), + ("other_medical_expenses", "other_medical_expenses"), + (MEDICARE_PART_B_PREMIUM_VARIABLE, "medicare_part_b_premiums"), ]: label = f"nation/census/{expense_type}/{label_suffix}" value = sim.calculate(expense_type).values loss_matrix[label] = sim.map_result( in_age_range * value, "person", "household" ) - targets_array.append(row[expense_type]) + targets_array.append(row[target_column]) # AGI by SPM threshold totals diff --git a/policyengine_us_data/utils/policyengine.py b/policyengine_us_data/utils/policyengine.py index ab870b1f7..3542eb82d 100644 --- a/policyengine_us_data/utils/policyengine.py +++ b/policyengine_us_data/utils/policyengine.py @@ -138,3 +138,9 @@ def has_policyengine_us_variables(*variables: str) -> bool: def supports_medicare_enrollment_input() -> bool: return has_policyengine_us_variables("medicare_enrolled") + + +def medicare_part_b_premium_variable_name() -> str: + if has_policyengine_us_variables("medicare_part_b_premium"): + return "medicare_part_b_premium" + return "medicare_part_b_premiums" diff --git a/tests/unit/test_medical_expense_inputs.py b/tests/unit/test_medical_expense_inputs.py index f71c0bf93..383fb9a10 100644 --- a/tests/unit/test_medical_expense_inputs.py +++ b/tests/unit/test_medical_expense_inputs.py @@ -16,3 +16,29 @@ def test_supports_medicare_enrollment_input_allows_partial_support(monkeypatch): ) assert policyengine_utils.supports_medicare_enrollment_input() is True + + +def test_medicare_part_b_premium_variable_name_prefers_clean_name(monkeypatch): + monkeypatch.setattr( + policyengine_utils, + "has_policyengine_us_variables", + lambda *variables: variables == ("medicare_part_b_premium",), + ) + + assert ( + policyengine_utils.medicare_part_b_premium_variable_name() + == "medicare_part_b_premium" + ) + + +def test_medicare_part_b_premium_variable_name_falls_back(monkeypatch): + monkeypatch.setattr( + policyengine_utils, + "has_policyengine_us_variables", + lambda *variables: False, + ) + + assert ( + policyengine_utils.medicare_part_b_premium_variable_name() + == "medicare_part_b_premiums" + ) From 4661ee8703ed3bf71e0955b3646bf3c2fd88dc13 Mon Sep 17 00:00:00 2001 From: Max Ghenis Date: Wed, 29 Apr 2026 07:00:35 -0400 Subject: [PATCH 5/8] Keep legacy Part B premium input for current model --- policyengine_us_data/datasets/cps/cps.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/policyengine_us_data/datasets/cps/cps.py b/policyengine_us_data/datasets/cps/cps.py index e3099bf56..fd0265670 100644 --- a/policyengine_us_data/datasets/cps/cps.py +++ b/policyengine_us_data/datasets/cps/cps.py @@ -51,6 +51,7 @@ build_household_vehicle_receiver, ) from policyengine_us_data.utils.policyengine import ( + medicare_part_b_premium_variable_name, supports_medicare_enrollment_input, ) @@ -1133,6 +1134,8 @@ def add_personal_income_variables(cps: h5py.File, person: DataFrame, year: int): cps["other_medical_expenses"] = person.PMED_VAL if supports_medicare_enrollment_input(): cps["medicare_enrolled"] = person.MCARE == 1 + if medicare_part_b_premium_variable_name() == "medicare_part_b_premiums": + cps["medicare_part_b_premiums"] = person.PEMCPREM # Get QBI simulation parameters --- yamlfilename = ( From 94fba6c36c6bf4f4584613f38e9e23cc0aaee17a Mon Sep 17 00:00:00 2001 From: Max Ghenis Date: Wed, 29 Apr 2026 07:39:07 -0400 Subject: [PATCH 6/8] Emit decomposed premium input for current builds --- .../calibration/target_config.yaml | 2 + policyengine_us_data/datasets/cps/cps.py | 9 ++-- tests/unit/calibration/test_target_config.py | 18 +++++++ .../test_other_health_insurance_premiums.py | 50 +++++++++++++++++++ 4 files changed, 75 insertions(+), 4 deletions(-) diff --git a/policyengine_us_data/calibration/target_config.yaml b/policyengine_us_data/calibration/target_config.yaml index e1a916aab..217f770ae 100644 --- a/policyengine_us_data/calibration/target_config.yaml +++ b/policyengine_us_data/calibration/target_config.yaml @@ -114,6 +114,8 @@ include: geo_level: national - variable: medicaid geo_level: national + # Target ETL emits only the Part B variable name supported by the installed + # policyengine-us version; keep both names so training config matches either. - variable: medicare_part_b_premium geo_level: national - variable: medicare_part_b_premiums diff --git a/policyengine_us_data/datasets/cps/cps.py b/policyengine_us_data/datasets/cps/cps.py index fd0265670..ef9a33660 100644 --- a/policyengine_us_data/datasets/cps/cps.py +++ b/policyengine_us_data/datasets/cps/cps.py @@ -540,9 +540,10 @@ def derive_other_health_insurance_premiums(self): other-premium input for the parts of CPS-reported non-Medicare premiums not explained by baseline computed Marketplace, CHIP, or Medicaid premiums. The original CPS-reported premium inputs remain unchanged as raw - source fields. Variables are version-gated because the data package may be - built against a policyengine-us release before a modeled premium variable - exists. + source fields. Computed premium variables are version-gated because the + data package may be built against a policyengine-us release before a + modeled premium variable exists. The derived output is still emitted so + datasets built on current releases are ready for the decomposed MOOP model. """ from policyengine_us import Microsimulation @@ -556,7 +557,7 @@ def derive_other_health_insurance_premiums(self): reported_variable = config["reported_variable"] premium_variables = config["modeled_variables"] - if reported_variable not in data or output_variable not in tbs.variables: + if reported_variable not in data: continue computed_premium = np.zeros(len(data[reported_variable]), dtype=float) diff --git a/tests/unit/calibration/test_target_config.py b/tests/unit/calibration/test_target_config.py index c698e83db..01e64795a 100644 --- a/tests/unit/calibration/test_target_config.py +++ b/tests/unit/calibration/test_target_config.py @@ -14,6 +14,9 @@ save_calibration_package, load_calibration_package, ) +from policyengine_us_data.utils.policyengine import ( + medicare_part_b_premium_variable_name, +) @pytest.fixture @@ -206,6 +209,21 @@ def test_training_config_includes_national_ctc_agi_targets(self): "domain_variable": "adjusted_gross_income,non_refundable_ctc", } in include_rules + def test_training_config_includes_current_medicare_part_b_target(self): + config = load_target_config( + str( + Path(__file__).resolve().parents[3] + / "policyengine_us_data" + / "calibration" + / "target_config.yaml" + ) + ) + + assert { + "variable": medicare_part_b_premium_variable_name(), + "geo_level": "national", + } in config["include"] + def test_training_config_includes_district_non_refundable_ctc_target(self): config = load_target_config( str( diff --git a/tests/unit/datasets/test_other_health_insurance_premiums.py b/tests/unit/datasets/test_other_health_insurance_premiums.py index 97e28ef59..5cb7ad53f 100644 --- a/tests/unit/datasets/test_other_health_insurance_premiums.py +++ b/tests/unit/datasets/test_other_health_insurance_premiums.py @@ -1,8 +1,11 @@ +from types import SimpleNamespace + import numpy as np from policyengine_us_data.datasets.cps.cps import ( _premium_values_to_person, compute_other_health_insurance_premiums, + derive_other_health_insurance_premiums, ) @@ -57,3 +60,50 @@ def test_person_premiums_pass_through_to_person_rows() -> None: ) np.testing.assert_allclose(result, values) + + +def test_derive_other_health_insurance_premiums_emits_future_output( + monkeypatch, +) -> None: + class FakeDataset: + time_period = 2024 + + def __init__(self): + self.saved_data = None + self.data = { + "person_id": np.array([1, 2]), + "health_insurance_premiums_without_medicare_part_b": np.array( + [500.0, 200.0] + ), + } + + def load_dataset(self): + return self.data.copy() + + def save_dataset(self, data): + self.saved_data = data + + class FakeMicrosimulation: + tax_benefit_system = SimpleNamespace( + variables={ + "chip_premium": SimpleNamespace(entity=SimpleNamespace(key="person")), + } + ) + + def __init__(self, dataset): + pass + + def calculate(self, variable, period): + assert variable == "chip_premium" + return SimpleNamespace(values=np.array([50.0, 75.0])) + + monkeypatch.setattr("policyengine_us.Microsimulation", FakeMicrosimulation) + + dataset = FakeDataset() + derive_other_health_insurance_premiums(dataset) + + assert dataset.saved_data is not None + np.testing.assert_allclose( + dataset.saved_data["other_health_insurance_premiums"], + [450.0, 125.0], + ) From 0a90922b1f0c2685da0d49348c485e957ce61ed3 Mon Sep 17 00:00:00 2001 From: Max Ghenis Date: Wed, 29 Apr 2026 09:15:06 -0400 Subject: [PATCH 7/8] Require clean MOOP policyengine-us version --- .../calibration/target_config.yaml | 4 -- policyengine_us_data/datasets/cps/cps.py | 24 +++--------- policyengine_us_data/datasets/puf/puf.py | 3 +- .../db/etl_national_targets.py | 5 +-- .../pull_hardcoded_targets.py | 5 +-- policyengine_us_data/utils/loss.py | 5 +-- policyengine_us_data/utils/policyengine.py | 10 ----- pyproject.toml | 2 +- tests/unit/calibration/test_target_config.py | 7 +--- .../test_other_health_insurance_premiums.py | 18 +++++++-- tests/unit/test_medical_expense_inputs.py | 37 ------------------- 11 files changed, 27 insertions(+), 93 deletions(-) diff --git a/policyengine_us_data/calibration/target_config.yaml b/policyengine_us_data/calibration/target_config.yaml index 217f770ae..4b0702898 100644 --- a/policyengine_us_data/calibration/target_config.yaml +++ b/policyengine_us_data/calibration/target_config.yaml @@ -114,12 +114,8 @@ include: geo_level: national - variable: medicaid geo_level: national - # Target ETL emits only the Part B variable name supported by the installed - # policyengine-us version; keep both names so training config matches either. - variable: medicare_part_b_premium geo_level: national - - variable: medicare_part_b_premiums - geo_level: national - variable: other_medical_expenses geo_level: national - variable: over_the_counter_health_expenses diff --git a/policyengine_us_data/datasets/cps/cps.py b/policyengine_us_data/datasets/cps/cps.py index ef9a33660..7c2ee0409 100644 --- a/policyengine_us_data/datasets/cps/cps.py +++ b/policyengine_us_data/datasets/cps/cps.py @@ -50,11 +50,6 @@ from policyengine_us_data.utils.asset_imputation import ( build_household_vehicle_receiver, ) -from policyengine_us_data.utils.policyengine import ( - medicare_part_b_premium_variable_name, - supports_medicare_enrollment_input, -) - CURRENT_HEALTH_COVERAGE_REPORTED_VAR_MAP = { "reported_has_direct_purchase_health_coverage_at_interview": "NOW_DIR", @@ -540,10 +535,9 @@ def derive_other_health_insurance_premiums(self): other-premium input for the parts of CPS-reported non-Medicare premiums not explained by baseline computed Marketplace, CHIP, or Medicaid premiums. The original CPS-reported premium inputs remain unchanged as raw - source fields. Computed premium variables are version-gated because the - data package may be built against a policyengine-us release before a - modeled premium variable exists. The derived output is still emitted so - datasets built on current releases are ready for the decomposed MOOP model. + source fields. The data package requires a policyengine-us release with + these modeled premium variables, so missing variables fail fast instead of + silently producing an incomplete decomposition. """ from policyengine_us import Microsimulation @@ -561,10 +555,7 @@ def derive_other_health_insurance_premiums(self): continue computed_premium = np.zeros(len(data[reported_variable]), dtype=float) - available_variables = [ - variable for variable in premium_variables if variable in tbs.variables - ] - for variable in available_variables: + for variable in premium_variables: values = np.asarray( baseline.calculate(variable, period=period).values, dtype=float, @@ -583,7 +574,7 @@ def derive_other_health_insurance_premiums(self): "Created %s from %s by subtracting baseline computed premiums: %s", output_variable, reported_variable, - ", ".join(available_variables) if available_variables else "none", + ", ".join(premium_variables), ) changed = True @@ -1133,10 +1124,7 @@ def add_personal_income_variables(cps: h5py.File, person: DataFrame, year: int): cps["health_insurance_premiums_without_medicare_part_b"] = person.PHIP_VAL cps["over_the_counter_health_expenses"] = person.POTC_VAL cps["other_medical_expenses"] = person.PMED_VAL - if supports_medicare_enrollment_input(): - cps["medicare_enrolled"] = person.MCARE == 1 - if medicare_part_b_premium_variable_name() == "medicare_part_b_premiums": - cps["medicare_part_b_premiums"] = person.PEMCPREM + cps["medicare_enrolled"] = person.MCARE == 1 # Get QBI simulation parameters --- yamlfilename = ( diff --git a/policyengine_us_data/datasets/puf/puf.py b/policyengine_us_data/datasets/puf/puf.py index 8c0587bbc..18a2083ab 100644 --- a/policyengine_us_data/datasets/puf/puf.py +++ b/policyengine_us_data/datasets/puf/puf.py @@ -19,7 +19,6 @@ ) from policyengine_us_data.utils.policyengine import ( has_policyengine_us_variables, - medicare_part_b_premium_variable_name, ) from policyengine_us_data.utils.uprating import ( create_policyengine_uprating_factors_table, @@ -987,7 +986,7 @@ class PUF_2024(PUF): "health_insurance_premiums_without_medicare_part_b": 0.453, "other_medical_expenses": 0.325, "over_the_counter_health_expenses": 0.085, - medicare_part_b_premium_variable_name(): 0.137, + "medicare_part_b_premium": 0.137, } if __name__ == "__main__": diff --git a/policyengine_us_data/db/etl_national_targets.py b/policyengine_us_data/db/etl_national_targets.py index 57cbe8451..9c882cf1b 100644 --- a/policyengine_us_data/db/etl_national_targets.py +++ b/policyengine_us_data/db/etl_national_targets.py @@ -21,9 +21,6 @@ DEFAULT_YEAR, etl_argparser, ) -from policyengine_us_data.utils.policyengine import ( - medicare_part_b_premium_variable_name, -) def extract_national_targets(year: int = DEFAULT_YEAR): @@ -158,7 +155,7 @@ def extract_national_targets(year: int = DEFAULT_YEAR): "year": 2024, }, { - "variable": medicare_part_b_premium_variable_name(), + "variable": "medicare_part_b_premium", "value": get_beneficiary_paid_medicare_part_b_premiums_target(2024), "source": get_beneficiary_paid_medicare_part_b_premiums_source(2024), "notes": get_beneficiary_paid_medicare_part_b_premiums_notes(2024), diff --git a/policyengine_us_data/storage/calibration_targets/pull_hardcoded_targets.py b/policyengine_us_data/storage/calibration_targets/pull_hardcoded_targets.py index 0f74dbcd0..cf37f9496 100644 --- a/policyengine_us_data/storage/calibration_targets/pull_hardcoded_targets.py +++ b/policyengine_us_data/storage/calibration_targets/pull_hardcoded_targets.py @@ -1,9 +1,6 @@ import pandas as pd import numpy as np from policyengine_us_data.storage import CALIBRATION_FOLDER -from policyengine_us_data.utils.policyengine import ( - medicare_part_b_premium_variable_name, -) """ Hardcoded targets for the year 2024 from CPS-derived statistics and other sources. Include medical expenses, sum of SPM thresholds, and child support expenses. @@ -12,7 +9,7 @@ HARD_CODED_TOTALS = { "health_insurance_premiums_without_medicare_part_b": 385e9, "other_medical_expenses": 278e9, - medicare_part_b_premium_variable_name(): 112e9, + "medicare_part_b_premium": 112e9, "over_the_counter_health_expenses": 72e9, "spm_unit_spm_threshold": 3_945e9, "child_support_expense": 33e9, diff --git a/policyengine_us_data/utils/loss.py b/policyengine_us_data/utils/loss.py index 904bc54c4..7150496ac 100644 --- a/policyengine_us_data/utils/loss.py +++ b/policyengine_us_data/utils/loss.py @@ -16,14 +16,11 @@ get_beneficiary_paid_medicare_part_b_premiums_target, ) from policyengine_us_data.db.etl_irs_soi import get_national_geography_soi_target -from policyengine_us_data.utils.policyengine import ( - medicare_part_b_premium_variable_name, -) from policyengine_core.reforms import Reform from policyengine_us_data.utils.soi import pe_to_soi, get_soi -MEDICARE_PART_B_PREMIUM_VARIABLE = medicare_part_b_premium_variable_name() +MEDICARE_PART_B_PREMIUM_VARIABLE = "medicare_part_b_premium" # National calibration targets consumed by build_loss_matrix(). # These values are specific to 2024 — they should NOT be applied to diff --git a/policyengine_us_data/utils/policyengine.py b/policyengine_us_data/utils/policyengine.py index 3542eb82d..1d150ee97 100644 --- a/policyengine_us_data/utils/policyengine.py +++ b/policyengine_us_data/utils/policyengine.py @@ -134,13 +134,3 @@ def has_policyengine_us_variables(*variables: str) -> bool: return False return set(variables).issubset(available_variables) - - -def supports_medicare_enrollment_input() -> bool: - return has_policyengine_us_variables("medicare_enrolled") - - -def medicare_part_b_premium_variable_name() -> str: - if has_policyengine_us_variables("medicare_part_b_premium"): - return "medicare_part_b_premium" - return "medicare_part_b_premiums" diff --git a/pyproject.toml b/pyproject.toml index 96f069460..03019f339 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,7 +22,7 @@ classifiers = [ "Programming Language :: Python :: 3.14", ] dependencies = [ - "policyengine-us>=1.637.0", + "policyengine-us>=1.673.0", "policyengine-core>=3.23.6", "pandas>=2.3.1", "requests>=2.25.0", diff --git a/tests/unit/calibration/test_target_config.py b/tests/unit/calibration/test_target_config.py index 01e64795a..8245c190e 100644 --- a/tests/unit/calibration/test_target_config.py +++ b/tests/unit/calibration/test_target_config.py @@ -14,9 +14,6 @@ save_calibration_package, load_calibration_package, ) -from policyengine_us_data.utils.policyengine import ( - medicare_part_b_premium_variable_name, -) @pytest.fixture @@ -209,7 +206,7 @@ def test_training_config_includes_national_ctc_agi_targets(self): "domain_variable": "adjusted_gross_income,non_refundable_ctc", } in include_rules - def test_training_config_includes_current_medicare_part_b_target(self): + def test_training_config_includes_medicare_part_b_target(self): config = load_target_config( str( Path(__file__).resolve().parents[3] @@ -220,7 +217,7 @@ def test_training_config_includes_current_medicare_part_b_target(self): ) assert { - "variable": medicare_part_b_premium_variable_name(), + "variable": "medicare_part_b_premium", "geo_level": "national", } in config["include"] diff --git a/tests/unit/datasets/test_other_health_insurance_premiums.py b/tests/unit/datasets/test_other_health_insurance_premiums.py index 5cb7ad53f..3d6b8ce59 100644 --- a/tests/unit/datasets/test_other_health_insurance_premiums.py +++ b/tests/unit/datasets/test_other_health_insurance_premiums.py @@ -62,7 +62,7 @@ def test_person_premiums_pass_through_to_person_rows() -> None: np.testing.assert_allclose(result, values) -def test_derive_other_health_insurance_premiums_emits_future_output( +def test_derive_other_health_insurance_premiums_emits_output( monkeypatch, ) -> None: class FakeDataset: @@ -87,6 +87,12 @@ class FakeMicrosimulation: tax_benefit_system = SimpleNamespace( variables={ "chip_premium": SimpleNamespace(entity=SimpleNamespace(key="person")), + "marketplace_net_premium": SimpleNamespace( + entity=SimpleNamespace(key="person") + ), + "medicaid_premium": SimpleNamespace( + entity=SimpleNamespace(key="person") + ), } ) @@ -94,8 +100,12 @@ def __init__(self, dataset): pass def calculate(self, variable, period): - assert variable == "chip_premium" - return SimpleNamespace(values=np.array([50.0, 75.0])) + values = { + "chip_premium": np.array([50.0, 75.0]), + "marketplace_net_premium": np.array([25.0, 0.0]), + "medicaid_premium": np.array([0.0, 10.0]), + } + return SimpleNamespace(values=values[variable]) monkeypatch.setattr("policyengine_us.Microsimulation", FakeMicrosimulation) @@ -105,5 +115,5 @@ def calculate(self, variable, period): assert dataset.saved_data is not None np.testing.assert_allclose( dataset.saved_data["other_health_insurance_premiums"], - [450.0, 125.0], + [425.0, 115.0], ) diff --git a/tests/unit/test_medical_expense_inputs.py b/tests/unit/test_medical_expense_inputs.py index 383fb9a10..5581054d7 100644 --- a/tests/unit/test_medical_expense_inputs.py +++ b/tests/unit/test_medical_expense_inputs.py @@ -1,44 +1,7 @@ from policyengine_us_data.datasets.puf.puf import ( MEDICAL_EXPENSE_CATEGORY_BREAKDOWNS, ) -from policyengine_us_data.utils import policyengine as policyengine_utils def test_puf_medical_breakdown_still_sums_to_one(): assert sum(MEDICAL_EXPENSE_CATEGORY_BREAKDOWNS.values()) == 1.0 - - -def test_supports_medicare_enrollment_input_allows_partial_support(monkeypatch): - monkeypatch.setattr( - policyengine_utils, - "has_policyengine_us_variables", - lambda *variables: variables == ("medicare_enrolled",), - ) - - assert policyengine_utils.supports_medicare_enrollment_input() is True - - -def test_medicare_part_b_premium_variable_name_prefers_clean_name(monkeypatch): - monkeypatch.setattr( - policyengine_utils, - "has_policyengine_us_variables", - lambda *variables: variables == ("medicare_part_b_premium",), - ) - - assert ( - policyengine_utils.medicare_part_b_premium_variable_name() - == "medicare_part_b_premium" - ) - - -def test_medicare_part_b_premium_variable_name_falls_back(monkeypatch): - monkeypatch.setattr( - policyengine_utils, - "has_policyengine_us_variables", - lambda *variables: False, - ) - - assert ( - policyengine_utils.medicare_part_b_premium_variable_name() - == "medicare_part_b_premiums" - ) From 4f71a3f6ecb00be8c30259e2029c577413516871 Mon Sep 17 00:00:00 2001 From: Max Ghenis Date: Wed, 29 Apr 2026 17:02:14 -0400 Subject: [PATCH 8/8] Pin policyengine-us release for MOOP decomposition --- pyproject.toml | 2 +- uv.lock | 15 ++++++++------- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 03019f339..4cad830c5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,7 +22,7 @@ classifiers = [ "Programming Language :: Python :: 3.14", ] dependencies = [ - "policyengine-us>=1.673.0", + "policyengine-us>=1.674.1", "policyengine-core>=3.23.6", "pandas>=2.3.1", "requests>=2.25.0", diff --git a/uv.lock b/uv.lock index f62f55f99..f5f7c340d 100644 --- a/uv.lock +++ b/uv.lock @@ -2095,7 +2095,7 @@ wheels = [ [[package]] name = "policyengine-core" -version = "3.23.6" +version = "3.25.3" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "dpath" }, @@ -2115,25 +2115,26 @@ dependencies = [ { name = "standard-imghdr" }, { name = "wheel" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/5d/de/5bc5b02626703ea7d288c84c474ec51e823aa726d55ebabafe7c85e7285f/policyengine_core-3.23.6.tar.gz", hash = "sha256:81bb4057f5d6380f2d7f1af2fe4932bd3bd37fdfda7b841f7ee38b30aa5cc8e6", size = 163499, upload-time = "2026-01-25T14:04:43.233Z" } +sdist = { url = "https://files.pythonhosted.org/packages/66/a6/46a316ef534adbedffbdfb8b2b9cfc89be572e3d75fa79c61103c771000e/policyengine_core-3.25.3.tar.gz", hash = "sha256:bf6a22cc49eeeaba310531321cb932c41a2f10c6a5f4cc20fd7677641f60055d", size = 466467, upload-time = "2026-04-28T00:36:10.153Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/82/7a/b47b239fb0a85a36b36b47e7665db981800fcac3384aeec6dadf92a9e548/policyengine_core-3.23.6-py3-none-any.whl", hash = "sha256:f0834107335de6f2452d39e53db7a72a57088ed26d3703a4c4eaded55a4e7bce", size = 225309, upload-time = "2026-01-25T14:04:41.844Z" }, + { url = "https://files.pythonhosted.org/packages/e2/e1/d3451e5c279bcea5da49c5cab3b3eec9e7fa35aafd62289394b769619b7e/policyengine_core-3.25.3-py3-none-any.whl", hash = "sha256:5b11ef29db4275121b58664a9c5ebd6478eeff5001e9f55b71e13716bbd9085f", size = 231186, upload-time = "2026-04-28T00:36:08.854Z" }, ] [[package]] name = "policyengine-us" -version = "1.637.0" +version = "1.674.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "microdf-python" }, { name = "pandas" }, { name = "policyengine-core" }, + { name = "spm-calculator" }, { name = "tables" }, { name = "tqdm" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/d7/3f/d72af00833f1e9dffed558ee6c5e3e74561ea582badb241fdc9b524af49d/policyengine_us-1.637.0.tar.gz", hash = "sha256:d1dbd2aba6dfd5fb1083f4deb2c75ae3bf10ba6933330cfcde137e6abb76714a", size = 8962293, upload-time = "2026-04-17T01:46:12.824Z" } +sdist = { url = "https://files.pythonhosted.org/packages/6c/d0/cef3e52ecd087d6446f8761ddc91f8dd8ab56b2b320b95b54f43d63c0604/policyengine_us-1.674.1.tar.gz", hash = "sha256:e3141a11e3036713850fdc39d07793df336a74a3fb50c8989f70cecde1ecb556", size = 9341088, upload-time = "2026-04-29T20:53:36.457Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/2d/19/358ef7d32f0dbf701b1a5d91b352ad939984eca4d81908b534009b020989/policyengine_us-1.637.0-py3-none-any.whl", hash = "sha256:292f8c3e8c2c4b29336ef2fe045f8d3f333fdd130303e7f1df3a11bf333be24f", size = 8963845, upload-time = "2026-04-17T01:46:09.269Z" }, + { url = "https://files.pythonhosted.org/packages/18/5e/d12fdb3dbaadc1dcc770d799c25d5265859967ce25fd59ae783af56f8604/policyengine_us-1.674.1-py3-none-any.whl", hash = "sha256:2d705e108255fae257dea54cafb324bed6d36fe597c9351987abf01a1c1eb097", size = 9663547, upload-time = "2026-04-29T20:53:32.207Z" }, ] [[package]] @@ -2202,7 +2203,7 @@ requires-dist = [ { name = "pandas", specifier = ">=2.3.1" }, { name = "pip-system-certs", specifier = ">=3.0" }, { name = "policyengine-core", specifier = ">=3.23.6" }, - { name = "policyengine-us", specifier = ">=1.637.0" }, + { name = "policyengine-us", specifier = ">=1.674.1" }, { name = "requests", specifier = ">=2.25.0" }, { name = "samplics", marker = "extra == 'calibration'" }, { name = "scipy", specifier = ">=1.15.3" },