PolicyEngine · MaxGhenis · Apr 26, 2026
diff --git a/.gitignore b/.gitignore
@@ -22,6 +22,7 @@ node_modules
 !population_by_state.csv
 !aca_spending_and_enrollment_2024.csv
 !aca_spending_and_enrollment_2025.csv
+!policyengine_us_data/storage/calibration_targets/acs_housing_costs_2024.csv
 !real_estate_taxes_by_state_acs.csv
 !snap_state.csv
 !age_state.csv

diff --git a/policyengine_us_data/datasets/cps/enhanced_cps.py b/policyengine_us_data/datasets/cps/enhanced_cps.py
@@ -1,7 +1,9 @@
 from policyengine_core.data import Dataset
 import pandas as pd
 from policyengine_us_data.utils import (
+    ABSOLUTE_ERROR_SCALE_TARGETS,
     build_loss_matrix,
+    get_target_error_normalisation,
     HardConcrete,
     print_reweighting_diagnostics,
     set_seeds,
@@ -113,6 +115,10 @@ def reweight(
 ):
     target_names = np.array(loss_matrix.columns)
     is_national = loss_matrix.columns.str.startswith("nation/")
+    numerator_shift_np, error_denominator_np = get_target_error_normalisation(
+        target_names,
+        targets_array,
+    )
     loss_matrix = torch.tensor(loss_matrix.values, dtype=torch.float32)
     nation_normalisation_factor = is_national * (1 / is_national.sum())
     state_normalisation_factor = ~is_national * (1 / (~is_national).sum())
@@ -121,6 +127,8 @@ def reweight(
     )
     normalisation_factor = torch.tensor(normalisation_factor, dtype=torch.float32)
     targets_array = torch.tensor(targets_array, dtype=torch.float32)
+    numerator_shift = torch.tensor(numerator_shift_np, dtype=torch.float32)
+    error_denominator = torch.tensor(error_denominator_np, dtype=torch.float32)
 
     inv_mean_normalisation = 1 / np.mean(normalisation_factor.numpy())
 
@@ -132,7 +140,9 @@ def loss(weights):
         estimate = weights @ loss_matrix
         if torch.isnan(estimate).any():
             raise ValueError("Estimate contains NaNs")
-        rel_error = (((estimate - targets_array) + 1) / (targets_array + 1)) ** 2
+        rel_error = (
+            (estimate - targets_array + numerator_shift) / error_denominator
+        ) ** 2
         rel_error_normalized = inv_mean_normalisation * rel_error * normalisation_factor
         if torch.isnan(rel_error_normalized).any():
             raise ValueError("Relative error contains NaNs")
@@ -176,7 +186,10 @@ def loss(weights):
             )
             df["epoch"] = i
             df["error"] = df.estimate - df.target
-            df["rel_error"] = df.error / df.target
+            df["error_denominator"] = error_denominator.detach().numpy()
+            df["rel_error"] = (
+                df.error + numerator_shift.detach().numpy()
+            ) / df.error_denominator
             df["abs_error"] = df.error.abs()
             df["rel_abs_error"] = df.rel_error.abs()
             df["loss"] = df.rel_abs_error**2
@@ -203,6 +216,7 @@ def loss(weights):
         loss_matrix,
         targets_array,
         "L0 Sparse Solution",
+        target_names=target_names,
     )
 
     return final_weights_sparse
@@ -248,7 +262,12 @@ def generate(self):
         # Run the optimization procedure to get (close to) minimum loss weights
         for year in range(self.start_year, self.end_year + 1):
             loss_matrix, targets_array = build_loss_matrix(self.input_dataset, year)
-            zero_mask = np.isclose(targets_array, 0.0, atol=0.1)
+            scaled_zero_target_mask = loss_matrix.columns.isin(
+                ABSOLUTE_ERROR_SCALE_TARGETS.keys()
+            )
+            zero_mask = np.isclose(targets_array, 0.0, atol=0.1) & (
+                ~scaled_zero_target_mask
+            )
             bad_mask = loss_matrix.columns.isin(bad_targets)
             keep_mask_bool = ~(zero_mask | bad_mask)
             keep_idx = np.where(keep_mask_bool)[0]

diff --git a/policyengine_us_data/db/etl_irs_soi.py b/policyengine_us_data/db/etl_irs_soi.py
@@ -450,6 +450,49 @@ def get_national_geography_soi_target(
     return _get_national_geography_soi_target_from_year(variable, geography_year)
 
 
+def _get_state_geography_soi_targets_from_year(
+    variable: str,
+    geography_year: int,
+) -> list[dict]:
+    spec = _get_geography_file_aggregate_target_spec(variable)
+    code = spec["code"]
+
+    raw_df = extract_soi_data(geography_year)
+    state_rows = raw_df[(raw_df["STATE"] != "US") & (raw_df["agi_stub"] == 0)]
+    if "CONG_DISTRICT" in state_rows.columns:
+        state_rows = state_rows[state_rows["CONG_DISTRICT"] == 0]
+    if state_rows.empty:
+        raise ValueError(
+            f"IRS geography SOI file for {geography_year} is missing state rows "
+            f"for {variable}"
+        )
+
+    targets = []
+    for row in state_rows.itertuples(index=False):
+        targets.append(
+            {
+                "variable": variable,
+                "source_year": geography_year,
+                "state_code": row.STATE,
+                "count": float(getattr(row, f"N{code}")),
+                "amount": float(getattr(row, f"A{code}")) * 1_000,
+            }
+        )
+
+    return sorted(targets, key=lambda target: target["state_code"])
+
+
+def get_state_geography_soi_targets(
+    variable: str,
+    dataset_year: int,
+    *,
+    lag: int = IRS_SOI_LAG_YEARS,
+) -> list[dict]:
+    """Return state count and amount targets from the IRS geography file."""
+    geography_year = get_geography_soi_year(dataset_year, lag=lag)
+    return _get_state_geography_soi_targets_from_year(variable, geography_year)
+
+
 def get_national_geography_soi_agi_targets(
     variable: str,
     dataset_year: int,

diff --git a/policyengine_us_data/db/etl_national_targets.py b/policyengine_us_data/db/etl_national_targets.py
@@ -112,20 +112,6 @@ def extract_national_targets(year: int = DEFAULT_YEAR):
     tax_expenditure_targets = [{**target} for target in raw_tax_expenditure_targets]
 
     direct_sum_targets = [
-        {
-            "variable": "alimony_income",
-            "value": 13e9,
-            "source": "Survey-reported (post-TCJA grandfathered)",
-            "notes": "Alimony received - survey reported, not tax-filer restricted",
-            "year": 2024,
-        },
-        {
-            "variable": "alimony_expense",
-            "value": 13e9,
-            "source": "Survey-reported (post-TCJA grandfathered)",
-            "notes": "Alimony paid - survey reported, not tax-filer restricted",
-            "year": 2024,
-        },
         {
             "variable": "medicaid",
             "value": 871.7e9,
@@ -140,20 +126,6 @@ def extract_national_targets(year: int = DEFAULT_YEAR):
             "notes": "Total household net worth",
             "year": 2024,
         },
-        {
-            "variable": "health_insurance_premiums_without_medicare_part_b",
-            "value": 385e9,
-            "source": "MEPS/NHEA",
-            "notes": "Health insurance premiums excluding Medicare Part B",
-            "year": 2024,
-        },
-        {
-            "variable": "other_medical_expenses",
-            "value": 278e9,
-            "source": "MEPS/NHEA",
-            "notes": "Out-of-pocket medical expenses",
-            "year": 2024,
-        },
         {
             "variable": "medicare_part_b_premiums",
             "value": get_beneficiary_paid_medicare_part_b_premiums_target(2024),
@@ -162,52 +134,24 @@ def extract_national_targets(year: int = DEFAULT_YEAR):
             "year": 2024,
         },
         {
-            "variable": "over_the_counter_health_expenses",
-            "value": 72e9,
-            "source": "Consumer Expenditure Survey",
-            "notes": "OTC health products and supplies",
-            "year": 2024,
-        },
-        {
-            "variable": "child_support_expense",
-            "value": 33e9,
-            "source": "Census Bureau",
-            "notes": "Child support payments",
-            "year": 2024,
-        },
-        {
-            "variable": "child_support_received",
-            "value": 33e9,
-            "source": "Census Bureau",
-            "notes": "Child support received",
-            "year": 2024,
-        },
-        {
-            "variable": "spm_unit_capped_work_childcare_expenses",
-            "value": 348e9,
-            "source": "Census Bureau SPM",
-            "notes": "Work and childcare expenses for SPM",
-            "year": 2024,
-        },
-        {
-            "variable": "spm_unit_capped_housing_subsidy",
-            "value": 35e9,
-            "source": "HUD/Census",
-            "notes": "Housing subsidies",
+            "variable": "rent",
+            "value": 764_925_694_800,
+            "source": "Census ACS 2024 1-year table B25060",
+            "notes": "Sum of state aggregate contract rent, annualized from monthly ACS aggregate contract rent",
             "year": 2024,
         },
         {
             "variable": "real_estate_taxes",
-            "value": 500e9,
-            "source": "Census Bureau",
-            "notes": "Property taxes paid",
+            "value": 370_014_207_400,
+            "source": "Census ACS 2024 1-year table B25090",
+            "notes": "Sum of state aggregate real estate taxes paid by owner-occupied housing units",
             "year": 2024,
         },
         {
-            "variable": "rent",
-            "value": 735e9,
-            "source": "Census Bureau/BLS",
-            "notes": "Rental payments",
+            "variable": "childcare_expenses",
+            "value": 63_092e6,
+            "source": "BLS Consumer Expenditure Surveys CE LABSTAT",
+            "notes": "Series CXU670320LB0101M aggregate expenditure: babysitting, childcare, daycare, preschool",
             "year": 2024,
         },
         {

diff --git a/policyengine_us_data/storage/calibration_targets/acs_housing_costs_2024.csv b/policyengine_us_data/storage/calibration_targets/acs_housing_costs_2024.csv
@@ -0,0 +1,52 @@
+state_code,state_fips,annual_contract_rent,real_estate_taxes
+AK,02,1350681600,664772900
+AL,01,5761773600,1537253700
+AR,05,3760575600,1167041400
+AZ,04,16849603200,4320807000
+CA,06,143291068800,52872735400
+CO,08,17072544000,5750527500
+CT,09,8116260000,7275184600
+DC,11,4602276000,778233300
+DE,10,1652836800,656213100
+FL,12,57303682800,24312484700
+GA,13,21304225200,8707748600
+HI,15,4073208000,981165300
+IA,19,4069554000,3234507400
+ID,16,3091480800,1222009800
+IL,17,24729199200,21262263300
+IN,18,9115561200,4242347000
+KS,20,4246785600,2863525400
+KY,21,5821017600,2434868700
+LA,22,5928199200,1822794700
+MA,25,21342618000,12097297000
+MD,24,14212159200,7520628800
+ME,23,2153030400,1668939000
+MI,26,13242972000,10402220500
+MN,27,9724164000,6501643100
+MO,29,8718777600,4428280300
+MS,28,3018102000,1026895200
+MT,30,1873186800,1018759800
+NC,37,20318032800,7550042500
+ND,38,1474936800,608757100
+NE,31,3199722000,2283083400
+NH,33,2585438400,2900421200
+NJ,34,25845276000,22119447000
+NM,35,2917616400,1218092800
+NV,32,8914724400,2031449700
+NY,36,71916831600,32203085100
+OH,39,17617650000,12129649100
+OK,40,5521292400,2206132700
+OR,41,10933761600,4917685900
+PA,42,22028415600,14303332700
+RI,44,2401389600,1519517700
+SC,45,7908846000,2768317200
+SD,46,1274104800,825527300
+TN,47,12780411600,3724735100
+TX,48,67268908800,34936256600
+UT,49,6183264000,2346772700
+VA,51,20114900400,8760836100
+VT,50,1119537600,1171089500
+WA,53,23878054800,10671295800
+WI,55,10165308000,6958356700
+WV,54,1337834400,584045200
+WY,56,793893600,505130800
diff --git a/policyengine_us_data/storage/calibration_targets/pull_hardcoded_targets.py b/policyengine_us_data/storage/calibration_targets/pull_hardcoded_targets.py
@@ -3,26 +3,13 @@
 from policyengine_us_data.storage import CALIBRATION_FOLDER
 
 """
-Hardcoded targets for the year 2024 from CPS-derived statistics and other sources. Include medical expenses, sum of SPM thresholds, and child support expenses.
+Hardcoded targets for the year 2024 from administrative and
+authoritative aggregate sources.
 """
 
 HARD_CODED_TOTALS = {
-    "health_insurance_premiums_without_medicare_part_b": 385e9,
-    "other_medical_expenses": 278e9,
     "medicare_part_b_premiums": 112e9,
-    "over_the_counter_health_expenses": 72e9,
-    "spm_unit_spm_threshold": 3_945e9,
-    "child_support_expense": 33e9,
-    "child_support_received": 33e9,
-    "spm_unit_capped_work_childcare_expenses": 348e9,
-    "spm_unit_capped_housing_subsidy": 35e9,
     "tanf": 7_788_317_474.55,
-    # Alimony could be targeted via SOI
-    "alimony_income": 13e9,
-    "alimony_expense": 13e9,
-    # Rough estimate, not CPS derived
-    "real_estate_taxes": 500e9,  # Rough estimate between 350bn and 600bn total property tax collections
-    "rent": 735e9,  # ACS total uprated by CPI
     # Table 5A from https://www.irs.gov/statistics/soi-tax-stats-individual-information-return-form-w2-statistics
     # shows $38,316,190,000 in Box 7: Social security tips (2018)
     # Wages and salaries grew 32% from 2018 to 2023: https://fred.stlouisfed.org/graph/?g=1J0CC

diff --git a/policyengine_us_data/storage/calibration_targets/refresh_acs_housing_cost_targets.py b/policyengine_us_data/storage/calibration_targets/refresh_acs_housing_cost_targets.py
@@ -0,0 +1,76 @@
+import csv
+import json
+from urllib.request import urlopen
+
+from policyengine_us_data.storage import CALIBRATION_FOLDER
+from policyengine_us_data.storage.calibration_targets.pull_soi_targets import (
+    STATE_ABBR_TO_FIPS,
+)
+
+
+YEAR = 2024
+ACS_DATASET = "acs/acs1"
+STATE_FIPS_TO_ABBR = {
+    fips: state_code for state_code, fips in STATE_ABBR_TO_FIPS.items()
+}
+
+
+def fetch_acs_housing_cost_targets(year: int = YEAR) -> list[dict]:
+    """Fetch ACS state rent and property-tax aggregates.
+
+    B25060 is aggregate monthly contract rent for renter-occupied units
+    paying cash rent. We annualize it to match the yearly `rent` variable.
+    B25090 is aggregate real estate taxes paid by owner-occupied units.
+    """
+    variables = "NAME,B25060_001E,B25090_001E"
+    url = (
+        f"https://api.census.gov/data/{year}/{ACS_DATASET}"
+        f"?get={variables}&for=state:*"
+    )
+    with urlopen(url) as response:
+        rows = json.load(response)
+
+    header = rows[0]
+    column_index = {column: index for index, column in enumerate(header)}
+
+    targets = []
+    for row in rows[1:]:
+        state_fips = row[column_index["state"]]
+        state_code = STATE_FIPS_TO_ABBR.get(state_fips)
+        if state_code is None:
+            continue
+
+        monthly_contract_rent = float(row[column_index["B25060_001E"]])
+        real_estate_taxes = float(row[column_index["B25090_001E"]])
+        targets.append(
+            {
+                "state_code": state_code,
+                "state_fips": state_fips,
+                "annual_contract_rent": int(monthly_contract_rent * 12),
+                "real_estate_taxes": int(real_estate_taxes),
+            }
+        )
+
+    return sorted(targets, key=lambda target: target["state_code"])
+
+
+def main() -> None:
+    targets = fetch_acs_housing_cost_targets()
+    output_path = CALIBRATION_FOLDER / f"acs_housing_costs_{YEAR}.csv"
+    with output_path.open("w", newline="") as output:
+        writer = csv.DictWriter(
+            output,
+            fieldnames=[
+                "state_code",
+                "state_fips",
+                "annual_contract_rent",
+                "real_estate_taxes",
+            ],
+            lineterminator="\n",
+        )
+        writer.writeheader()
+        writer.writerows(targets)
+
+
+if __name__ == "__main__":
+    main()