Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions policyengine_us_data/datasets/cps/extended_cps.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,10 +64,31 @@ def _drop_formula_variables(cls, data):
Variables with formulas, ``adds``, or ``subtracts`` are
recomputed by the simulation engine, so storing them wastes
space and can mislead validation.

Aggregate variables whose ``adds`` include a behavioral-
response input (e.g. ``employment_income_before_lsr``) are
renamed to that input before dropping so the raw data is
preserved under the correct input-variable name.
"""
from policyengine_us import CountryTaxBenefitSystem

tbs = CountryTaxBenefitSystem()

_RESPONSE_SUFFIXES = ("_before_lsr", "_before_response")
for name, var in tbs.variables.items():
if name not in data:
continue
for add_var in getattr(var, "adds", None) or []:
if any(add_var.endswith(s) for s in _RESPONSE_SUFFIXES):
if add_var not in data:
logger.info(
"Renaming %s -> %s before drop",
name,
add_var,
)
data[add_var] = data.pop(name)
break

formula_vars = {
name
for name, var in tbs.variables.items()
Expand Down
6 changes: 3 additions & 3 deletions policyengine_us_data/datasets/cps/small_enhanced_cps.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,13 +125,13 @@ def create_sparse_ecps():
if values is not None:
data[variable][time_period] = values

if len(data[variable]) == 0:
del data[variable]
if len(data[variable]) == 0:
del data[variable]

# Validate critical variables exist before writing
critical_vars = [
"household_weight",
"employment_income",
"employment_income_before_lsr",
"household_id",
"person_id",
]
Expand Down