From 3cd3d3c89dc63979378368f1ac88d27f328672d0 Mon Sep 17 00:00:00 2001 From: Max Ghenis Date: Sat, 25 Apr 2026 17:18:34 -0400 Subject: [PATCH] Map CPS farm income to farm operations --- changelog.d/cps-farm-operations-income.fixed | 1 + policyengine_us_data/datasets/cps/cps.py | 2 +- .../datasets/test_cps_income_variables.py | 68 +++++++++++++++++++ 3 files changed, 70 insertions(+), 1 deletion(-) create mode 100644 changelog.d/cps-farm-operations-income.fixed create mode 100644 tests/unit/datasets/test_cps_income_variables.py diff --git a/changelog.d/cps-farm-operations-income.fixed b/changelog.d/cps-farm-operations-income.fixed new file mode 100644 index 000000000..b54206125 --- /dev/null +++ b/changelog.d/cps-farm-operations-income.fixed @@ -0,0 +1 @@ +Map CPS farm self-employment income to farm operations income. diff --git a/policyengine_us_data/datasets/cps/cps.py b/policyengine_us_data/datasets/cps/cps.py index d12ba7eef..877eed849 100644 --- a/policyengine_us_data/datasets/cps/cps.py +++ b/policyengine_us_data/datasets/cps/cps.py @@ -798,7 +798,7 @@ def add_personal_income_variables(cps: h5py.File, person: DataFrame, year: int): 1 - p["taxable_interest_fraction"] ) cps["self_employment_income"] = person.SEMP_VAL - cps["farm_income"] = person.FRSE_VAL + cps["farm_operations_income"] = person.FRSE_VAL cps["qualified_dividend_income"] = ( person.DIV_VAL * (p["qualified_dividend_fraction"]) ) diff --git a/tests/unit/datasets/test_cps_income_variables.py b/tests/unit/datasets/test_cps_income_variables.py new file mode 100644 index 000000000..b85639858 --- /dev/null +++ b/tests/unit/datasets/test_cps_income_variables.py @@ -0,0 +1,68 @@ +import numpy as np +import pandas as pd + +from policyengine_us_data.datasets.cps.cps import add_personal_income_variables + + +def _minimal_person_income_frame() -> pd.DataFrame: + columns = [ + "WSAL_VAL", + "HRSWK", + "A_HRS1", + "INT_VAL", + "SEMP_VAL", + "FRSE_VAL", + "DIV_VAL", + "RNT_VAL", + "RESNSS1", + "RESNSS2", + "SS_VAL", + "A_AGE", + "UC_VAL", + "LKWEEKS", + "PNSN_VAL", + "ANN_VAL", + "DST_SC1", + "DST_VAL1", + "DST_SC2", + "DST_VAL2", + "DST_SC1_YNG", + "DST_VAL1_YNG", + "DST_SC2_YNG", + "DST_VAL2_YNG", + "OI_OFF", + "OI_VAL", + "CSP_VAL", + "PAW_VAL", + "SSI_VAL", + "RETCB_VAL", + "CAP_VAL", + "WICYN", + "VET_VAL", + "WC_VAL", + "DIS_VAL1", + "DIS_SC1", + "DIS_VAL2", + "DIS_SC2", + "CHSP_VAL", + "PHIP_VAL", + "POTC_VAL", + "PMED_VAL", + "MCARE", + "PEMCPREM", + ] + person = pd.DataFrame({column: [0.0, 0.0] for column in columns}) + person["A_AGE"] = [30, 45] + person["LKWEEKS"] = [0, 0] + return person + + +def test_add_personal_income_variables_maps_farm_self_employment_to_operations(): + person = _minimal_person_income_frame() + person["FRSE_VAL"] = [1_000.0, -500.0] + cps = {} + + add_personal_income_variables(cps, person, 2024) + + np.testing.assert_array_equal(cps["farm_operations_income"], [1_000.0, -500.0]) + assert "farm_income" not in cps