diff --git a/changelog.d/841.fixed.md b/changelog.d/841.fixed.md new file mode 100644 index 000000000..b3f10bd7a --- /dev/null +++ b/changelog.d/841.fixed.md @@ -0,0 +1 @@ +Populate American Opportunity Credit eligibility inputs in Enhanced CPS from the PUF-imputed AOTC signal. diff --git a/policyengine_us_data/datasets/cps/extended_cps.py b/policyengine_us_data/datasets/cps/extended_cps.py index 53a6ceefe..ee3b66a1d 100644 --- a/policyengine_us_data/datasets/cps/extended_cps.py +++ b/policyengine_us_data/datasets/cps/extended_cps.py @@ -13,6 +13,10 @@ ) from policyengine_us_data.datasets.puf import PUF, PUF_2024 from policyengine_us_data.storage import STORAGE_FOLDER +from policyengine_us_data.utils.aotc import ( + maximum_american_opportunity_credit_per_student, + qualifying_expenses_from_american_opportunity_credit, +) from policyengine_us_data.utils.mortgage_interest import ( STRUCTURAL_MORTGAGE_VARIABLES, convert_mortgage_interest_to_structural_inputs, @@ -30,6 +34,32 @@ logger = logging.getLogger(__name__) +AOTC_ELIGIBILITY_INPUTS = ( + "is_pursuing_credential_for_american_opportunity_credit", + "attends_eligible_educational_institution_for_american_opportunity_credit", + "is_enrolled_at_least_half_time_for_american_opportunity_credit", + "has_american_opportunity_credit_1098_t_or_exception", + "has_american_opportunity_credit_institution_ein", + "has_completed_first_four_years_of_postsecondary_education", + "has_felony_drug_conviction", + "american_opportunity_credit_claimed_prior_years", +) + + +LLC_ELIGIBILITY_INPUTS = ( + "attends_eligible_educational_institution_for_lifetime_learning_credit", + "has_lifetime_learning_credit_1098_t_or_exception", +) + + +def _supports_aotc_eligibility_inputs() -> bool: + return has_policyengine_us_variables(*AOTC_ELIGIBILITY_INPUTS) + + +def _supports_llc_eligibility_inputs() -> bool: + return has_policyengine_us_variables(*LLC_ELIGIBILITY_INPUTS) + + def _supports_structural_mortgage_inputs() -> bool: return has_policyengine_us_variables(*STRUCTURAL_MORTGAGE_VARIABLES) @@ -898,6 +928,8 @@ def generate(self): dataset_path=str(self.cps.file_path), ) + new_data = self._impute_aotc_eligibility_inputs(new_data, self.time_period) + new_data = self._impute_llc_eligibility_inputs(new_data, self.time_period) new_data = self._rename_imputed_to_inputs(new_data) if _supports_structural_mortgage_inputs(): had_positive_mortgage_input = self._has_positive_mortgage_input( @@ -920,6 +952,225 @@ def generate(self): new_data = self._drop_formula_variables(new_data) self.save_dataset(new_data) + @classmethod + def _impute_aotc_eligibility_inputs(cls, data, time_period): + """Convert imputed tax-unit AOTC amounts to person eligibility inputs.""" + credit = data.get("american_opportunity_credit", {}).get(time_period) + tax_unit_ids = data.get("tax_unit_id", {}).get(time_period) + person_tax_unit_ids = data.get("person_tax_unit_id", {}).get(time_period) + tuition = data.get("qualified_tuition_expenses", {}).get(time_period) + if ( + credit is None + or tax_unit_ids is None + or person_tax_unit_ids is None + or tuition is None + ): + return data + + credit = np.asarray(credit) + tax_unit_ids = np.asarray(tax_unit_ids) + person_tax_unit_ids = np.asarray(person_tax_unit_ids) + tuition = np.array(tuition, copy=True) + if len(credit) != len(tax_unit_ids) or len(tuition) != len(person_tax_unit_ids): + logger.warning( + "Skipping AOTC eligibility imputation due to entity length mismatch" + ) + return data + + aotc_student = np.zeros(len(person_tax_unit_ids), dtype=bool) + + full_time = data.get("is_full_time_college_student", {}).get(time_period) + full_time = ( + np.asarray(full_time, dtype=bool) + if full_time is not None + else np.zeros(len(person_tax_unit_ids), dtype=bool) + ) + dependent = data.get("is_tax_unit_dependent", {}).get(time_period) + dependent = ( + np.asarray(dependent, dtype=bool) + if dependent is not None + else np.zeros(len(person_tax_unit_ids), dtype=bool) + ) + + positive_credit = credit > 0 + if not positive_credit.any(): + return data + + positive_credit_units = tax_unit_ids[positive_credit] + credit_by_tax_unit_id = dict(zip(tax_unit_ids, credit)) + adjusted_tuition_count = 0 + max_student_credit = maximum_american_opportunity_credit_per_student( + time_period + ) + for tax_unit_id in positive_credit_units: + member_indices = np.flatnonzero(person_tax_unit_ids == tax_unit_id) + if member_indices.size == 0 or max_student_credit <= 0: + continue + + tuition_indices = member_indices[tuition[member_indices] > 0] + candidate_groups = [] + if tuition_indices.size > 0: + candidate_groups.append(tuition_indices) + candidate_groups.extend( + ( + member_indices[full_time[member_indices]], + member_indices[dependent[member_indices]], + member_indices, + ) + ) + ordered_candidates = [] + seen = set() + for group in candidate_groups: + for index in group: + if index not in seen: + ordered_candidates.append(index) + seen.add(index) + + remaining_credit = float(credit_by_tax_unit_id[tax_unit_id]) + for selected in ordered_candidates: + if remaining_credit <= 0: + break + student_credit = min(remaining_credit, max_student_credit) + target_tuition = qualifying_expenses_from_american_opportunity_credit( + student_credit, + time_period, + ) + if tuition[selected] != target_tuition: + adjusted_tuition_count += 1 + aotc_student[selected] = True + tuition[selected] = target_tuition + remaining_credit -= student_credit + + if not _supports_aotc_eligibility_inputs(): + existing = data.get("is_eligible_for_american_opportunity_credit", {}).get( + time_period + ) + values = ( + np.asarray(existing, dtype=bool).copy() + if existing is not None + else np.zeros(len(person_tax_unit_ids), dtype=bool) + ) + values[aotc_student] = True + data["is_eligible_for_american_opportunity_credit"] = {time_period: values} + data["qualified_tuition_expenses"] = {time_period: tuition} + logger.info( + "AOTC eligibility imputation populated the legacy " + "eligibility input for %d people across %d tax units " + "and adjusted tuition for %d people", + int(aotc_student.sum()), + int(positive_credit.sum()), + adjusted_tuition_count, + ) + return data + + for variable in ( + "is_pursuing_credential_for_american_opportunity_credit", + "attends_eligible_educational_institution_for_american_opportunity_credit", + "is_enrolled_at_least_half_time_for_american_opportunity_credit", + "has_american_opportunity_credit_1098_t_or_exception", + "has_american_opportunity_credit_institution_ein", + ): + existing = data.get(variable, {}).get(time_period) + values = ( + np.asarray(existing, dtype=bool).copy() + if existing is not None + else np.zeros(len(person_tax_unit_ids), dtype=bool) + ) + values[aotc_student] = True + data[variable] = {time_period: values} + + for variable in ( + "has_completed_first_four_years_of_postsecondary_education", + "has_felony_drug_conviction", + ): + existing = data.get(variable, {}).get(time_period) + values = ( + np.asarray(existing, dtype=bool).copy() + if existing is not None + else np.zeros(len(person_tax_unit_ids), dtype=bool) + ) + values[aotc_student] = False + data[variable] = {time_period: values} + + existing_prior_years = data.get( + "american_opportunity_credit_claimed_prior_years", {} + ).get(time_period) + prior_years = ( + np.asarray(existing_prior_years).copy() + if existing_prior_years is not None + else np.zeros(len(person_tax_unit_ids), dtype=np.int8) + ) + prior_years[aotc_student] = np.minimum(prior_years[aotc_student], 3) + data["american_opportunity_credit_claimed_prior_years"] = { + time_period: prior_years + } + data["qualified_tuition_expenses"] = {time_period: tuition} + logger.info( + "AOTC eligibility imputation populated inputs for %d people " + "across %d tax units and adjusted tuition for %d people", + int(aotc_student.sum()), + int(positive_credit.sum()), + adjusted_tuition_count, + ) + return data + + @classmethod + def _impute_llc_eligibility_inputs(cls, data, time_period): + """Populate LLC factual eligibility inputs for non-AOTC tuition records.""" + + if not _supports_llc_eligibility_inputs(): + return data + + person_tax_unit_ids = data.get("person_tax_unit_id", {}).get(time_period) + tuition = data.get("qualified_tuition_expenses", {}).get(time_period) + if person_tax_unit_ids is None or tuition is None: + return data + + person_tax_unit_ids = np.asarray(person_tax_unit_ids) + tuition = np.asarray(tuition) + if len(tuition) != len(person_tax_unit_ids): + logger.warning( + "Skipping LLC eligibility imputation due to entity length mismatch" + ) + return data + + aotc_student = data.get( + "is_pursuing_credential_for_american_opportunity_credit", + {}, + ).get(time_period) + if aotc_student is None: + aotc_student = data.get( + "is_eligible_for_american_opportunity_credit", + {}, + ).get(time_period) + aotc_student = ( + np.asarray(aotc_student, dtype=bool) + if aotc_student is not None + else np.zeros(len(person_tax_unit_ids), dtype=bool) + ) + + llc_student = (tuition > 0) & ~aotc_student + if not llc_student.any(): + return data + + for variable in LLC_ELIGIBILITY_INPUTS: + existing = data.get(variable, {}).get(time_period) + values = ( + np.asarray(existing, dtype=bool).copy() + if existing is not None + else np.zeros(len(person_tax_unit_ids), dtype=bool) + ) + values[llc_student] = True + data[variable] = {time_period: values} + + logger.info( + "LLC eligibility imputation populated inputs for %d people " + "across %d tax units", + int(llc_student.sum()), + int(np.unique(person_tax_unit_ids[llc_student]).size), + ) + return data + @classmethod def _rename_imputed_to_inputs(cls, data): """Rename QRF-imputed formula vars to their leaf inputs. diff --git a/policyengine_us_data/datasets/puf/puf.py b/policyengine_us_data/datasets/puf/puf.py index dc89c4a9a..3bbc6ebe5 100644 --- a/policyengine_us_data/datasets/puf/puf.py +++ b/policyengine_us_data/datasets/puf/puf.py @@ -24,6 +24,11 @@ rng = np.random.default_rng(seed=64) +PUF_LLC_ELIGIBILITY_INPUTS = ( + "attends_eligible_educational_institution_for_lifetime_learning_credit", + "has_lifetime_learning_credit_1098_t_or_exception", +) + # Get Qualified Business Income simulation parameters --- yamlfilename = ( files("policyengine_us_data") / "datasets" / "puf" / "qbi_assumptions.yaml" @@ -314,6 +319,45 @@ def decode_age_dependent(age_range: int) -> int: return rng.integers(low=lower, high=upper, endpoint=False) +def _qualified_tuition_expenses_from_puf(puf: pd.DataFrame) -> pd.Series: + """Return qualified tuition expenses from the most specific PUF fields.""" + + if "E87530" not in puf: + return puf.E03230 + return np.maximum(puf.E03230, puf.E87530) + + +def _lifetime_learning_credit_student_from_puf(puf: pd.DataFrame) -> pd.Series: + """Infer which PUF records have LLC-specific qualified expenses.""" + + if "E87530" in puf: + return puf.E87530 > 0 + return _qualified_tuition_expenses_from_puf(puf) > 0 + + +def _with_lifetime_learning_credit_inputs( + arrays: dict[str, np.ndarray], +) -> dict[str, np.ndarray]: + """Populate PUF LLC factual eligibility inputs when PE-US supports them.""" + + if not has_policyengine_us_variables(*PUF_LLC_ELIGIBILITY_INPUTS): + return arrays + tuition = arrays.get("qualified_tuition_expenses") + if tuition is None: + return arrays + + values = np.asarray(tuition) > 0 + for variable in PUF_LLC_ELIGIBILITY_INPUTS: + arrays.setdefault(variable, values) + return arrays + + +def _person_financial_value_from_puf_row(variable: str, row, share: float): + if variable in PUF_LLC_ELIGIBILITY_INPUTS: + return bool(row[variable]) and row["qualified_tuition_expenses"] * share > 0 + return row[variable] * share + + def preprocess_puf(puf: pd.DataFrame) -> pd.DataFrame: # Add variable renames puf.S006 = puf.S006 / 100 @@ -346,7 +390,10 @@ def preprocess_puf(puf: pd.DataFrame) -> pd.DataFrame: puf["unreimbursed_business_employee_expenses"] = puf.E20400 puf["non_qualified_dividend_income"] = puf.E00600 - puf.E00650 puf["qualified_dividend_income"] = puf.E00650 - puf["qualified_tuition_expenses"] = puf.E03230 + puf["qualified_tuition_expenses"] = _qualified_tuition_expenses_from_puf(puf) + llc_student = _lifetime_learning_credit_student_from_puf(puf) + for variable in PUF_LLC_ELIGIBILITY_INPUTS: + puf[variable] = llc_student puf["real_estate_taxes"] = puf.E18500 # Schedule E rent and royalty puf["rental_income"] = puf.E25850 - puf.E25860 @@ -388,7 +435,6 @@ def preprocess_puf(puf: pd.DataFrame) -> pd.DataFrame: puf["american_opportunity_credit"] = puf.E87521 puf["energy_efficient_home_improvement_credit"] = puf.E07260 puf["early_withdrawal_penalty"] = puf.E09900 - # puf["qualified_tuition_expenses"] = puf.E87530 # PE uses the same variable for qualified tuition (general) and qualified tuition (Lifetime Learning Credit). Revisit here. puf["other_credits"] = puf.P08000 puf["savers_credit"] = puf.E07240 puf["recapture_of_investment_credit"] = puf.E09700 @@ -549,6 +595,8 @@ def preprocess_puf(puf: pd.DataFrame) -> pd.DataFrame: "partnership_se_income", "qualified_reit_and_ptp_income", "qualified_bdc_income", + "attends_eligible_educational_institution_for_lifetime_learning_credit", + "has_lifetime_learning_credit_1098_t_or_exception", ] @@ -738,6 +786,7 @@ def generate(self): ] growth = current_index / start_index arrays[variable] = arrays[variable] * growth + arrays = _with_lifetime_learning_credit_inputs(arrays) self.save_dataset(arrays) return @@ -836,6 +885,7 @@ def generate(self): self.holder = { variable: values[self.time_period] for variable, values in holder_tp.items() } + self.holder = _with_lifetime_learning_credit_inputs(self.holder) self.save_dataset(self.holder) def add_tax_unit(self, row, tax_unit_id): @@ -886,7 +936,13 @@ def add_filer(self, row, tax_unit_id): # Skip this one- we are adding it artificially at the filer level. continue if self.variable_to_entity[key] == "person": - self.holder[key].append(row[key] * self.earn_splits[-1]) + self.holder[key].append( + _person_financial_value_from_puf_row( + key, + row, + self.earn_splits[-1], + ) + ) def add_spouse(self, row, tax_unit_id): person_id = int(tax_unit_id * 1e2 + 2) @@ -919,7 +975,13 @@ def add_spouse(self, row, tax_unit_id): # Skip this one- we are adding it artificially at the filer level. continue if self.variable_to_entity[key] == "person": - self.holder[key].append(row[key] * (1 - self.earn_splits[-1])) + self.holder[key].append( + _person_financial_value_from_puf_row( + key, + row, + 1 - self.earn_splits[-1], + ) + ) def add_dependent(self, row, tax_unit_id, dependent_id): person_id = int(tax_unit_id * 1e2 + 3 + dependent_id) @@ -943,7 +1005,9 @@ def add_dependent(self, row, tax_unit_id, dependent_id): # Skip this one- we are adding it artificially at the filer level. continue if self.variable_to_entity[key] == "person": - self.holder[key].append(0) + self.holder[key].append( + False if key in PUF_LLC_ELIGIBILITY_INPUTS else 0 + ) self.holder["is_male"].append(rng.choice([0, 1])) diff --git a/policyengine_us_data/datasets/puf/uprate_puf.py b/policyengine_us_data/datasets/puf/uprate_puf.py index 574b0133a..0f2d04bd3 100644 --- a/policyengine_us_data/datasets/puf/uprate_puf.py +++ b/policyengine_us_data/datasets/puf/uprate_puf.py @@ -64,6 +64,7 @@ "E20400", "E26270", "E03230", + "E87530", "E25850", "E25860", "E00900", @@ -181,6 +182,8 @@ def uprate_puf(puf, from_year, to_year): # (for now, because I'm not sure how to handle the deductions, # credits, and incomes separately) for variable in REMAINING_VARIABLES: + if variable not in puf: + continue growth = get_growth("adjusted_gross_income", from_year, to_year) puf[variable] *= growth diff --git a/policyengine_us_data/db/etl_irs_soi.py b/policyengine_us_data/db/etl_irs_soi.py index 2c386420b..705ebc722 100644 --- a/policyengine_us_data/db/etl_irs_soi.py +++ b/policyengine_us_data/db/etl_irs_soi.py @@ -151,8 +151,12 @@ def _skip_coarse_state_agi_person_count_target(geo_type: str, agi_stub: int) -> # +40.7% / +26.1% / +3.1% definitional mismatch at 2023 values. WORKBOOK_NATIONAL_DOMAIN_TARGETS = { "dividend_income": "ordinary_dividends", + "education_tax_credits": "education_tax_credits", "income_tax_before_credits": "income_tax_before_credits", "qualified_dividend_income": "qualified_dividends", + "refundable_american_opportunity_credit": ( + "refundable_american_opportunity_credit" + ), "rental_income": "rent_and_royalty_net_income", "tax_exempt_interest_income": "exempt_interest", "taxable_interest_income": "taxable_interest_income", diff --git a/policyengine_us_data/storage/calibration_targets/README.md b/policyengine_us_data/storage/calibration_targets/README.md index c43362467..169bb8f46 100644 --- a/policyengine_us_data/storage/calibration_targets/README.md +++ b/policyengine_us_data/storage/calibration_targets/README.md @@ -12,9 +12,11 @@ workbooks, run: `make refresh-soi-targets SOI_SOURCE_YEAR=2021 SOI_TARGET_YEAR=2023` This refresh path covers the tracked workbook-based national SOI table targets -in `soi_targets.csv`. The refresh code now rewrites the active Table 1.4 / -Table 2.1 targets with explicit semantic mappings for the current Publication -1304 layouts instead of reusing stale stored column letters. +in `soi_targets.csv`, including the Table 3.3 refundable American Opportunity +Credit count and amount and the combined nonrefundable education-credit count +and amount. The refresh code now rewrites the active Table 1.4 / Table 2.1 +targets with explicit semantic mappings for the current Publication 1304 +layouts instead of reusing stale stored column letters. `get_soi()` now selects the best available tracked year per variable for the requested simulation year, so TY2024 uses TY2023 where available, TY2022 uses diff --git a/policyengine_us_data/storage/calibration_targets/refresh_soi_table_targets.py b/policyengine_us_data/storage/calibration_targets/refresh_soi_table_targets.py index e4e8e5fc0..45e91d219 100644 --- a/policyengine_us_data/storage/calibration_targets/refresh_soi_table_targets.py +++ b/policyengine_us_data/storage/calibration_targets/refresh_soi_table_targets.py @@ -25,6 +25,7 @@ "Table 1.2": "in12ms.xls", "Table 1.4": "in14ar.xls", "Table 2.1": "in21id.xls", + "Table 3.3": "in33ar.xls", "Table 4.3": "in43ts.xls", } diff --git a/policyengine_us_data/storage/calibration_targets/soi_targets.csv b/policyengine_us_data/storage/calibration_targets/soi_targets.csv index 9a9c9aa63..0101b6ead 100644 --- a/policyengine_us_data/storage/calibration_targets/soi_targets.csv +++ b/policyengine_us_data/storage/calibration_targets/soi_targets.csv @@ -11911,3 +11911,15 @@ Year,SOI table,XLSX column,XLSX row,Variable,Filing status,AGI lower bound,AGI u 2023,Table 4.3,13,13,capital_gains_gross,All,675602.0,3100950.0,False,True,False,196881345000 2023,Table 4.3,17,13,business_net_profits,All,675602.0,3100950.0,False,True,False,51448953000 2023,Table 4.3,21,13,partnership_and_s_corp_income,All,675602.0,3100950.0,False,True,False,380628023000 +2021,Table 3.3,I,10,education_tax_credits,All,-inf,inf,True,False,True,8122952 +2021,Table 3.3,J,10,education_tax_credits,All,-inf,inf,False,False,True,8279606000 +2022,Table 3.3,I,10,education_tax_credits,All,-inf,inf,True,False,True,7528175 +2022,Table 3.3,J,10,education_tax_credits,All,-inf,inf,False,False,True,7850773000 +2023,Table 3.3,I,10,education_tax_credits,All,-inf,inf,True,False,True,7211349 +2023,Table 3.3,J,10,education_tax_credits,All,-inf,inf,False,False,True,7554668000 +2021,Table 3.3,AO,10,refundable_american_opportunity_credit,All,-inf,inf,True,False,True,6027056 +2021,Table 3.3,AP,10,refundable_american_opportunity_credit,All,-inf,inf,False,False,True,5170213000 +2022,Table 3.3,AO,10,refundable_american_opportunity_credit,All,-inf,inf,True,False,True,5957083 +2022,Table 3.3,AP,10,refundable_american_opportunity_credit,All,-inf,inf,False,False,True,5184485000 +2023,Table 3.3,AO,10,refundable_american_opportunity_credit,All,-inf,inf,True,False,True,5821688 +2023,Table 3.3,AP,10,refundable_american_opportunity_credit,All,-inf,inf,False,False,True,5090364000 diff --git a/policyengine_us_data/utils/aotc.py b/policyengine_us_data/utils/aotc.py new file mode 100644 index 000000000..a0407e396 --- /dev/null +++ b/policyengine_us_data/utils/aotc.py @@ -0,0 +1,72 @@ +"""American Opportunity Credit helpers backed by policyengine-us parameters.""" + +import math +from functools import lru_cache + +import numpy as np + + +@lru_cache(maxsize=16) +def get_american_opportunity_credit_amount_scale(year: int): + """Return the policyengine-us AOTC amount scale for a tax year.""" + from policyengine_us import CountryTaxBenefitSystem + + return CountryTaxBenefitSystem().parameters.gov.irs.credits.education.american_opportunity_credit.amount( + f"{year}-01-01" + ) + + +def qualifying_expenses_from_american_opportunity_credit( + credit: float, + year: int, +) -> float: + """Return the minimum expenses that generate ``credit`` under PE-US.""" + amount_scale = get_american_opportunity_credit_amount_scale(year) + return _minimum_base_for_marginal_amount(credit, amount_scale) + + +def maximum_american_opportunity_credit_per_student(year: int) -> float: + """Return the maximum AOTC generated by one student under PE-US.""" + amount_scale = get_american_opportunity_credit_amount_scale(year) + if len(amount_scale.thresholds) == 0: + return 0.0 + terminal_threshold = max(amount_scale.thresholds) + return float(amount_scale.calc(np.array([terminal_threshold], dtype=float))[0]) + + +def _minimum_base_for_marginal_amount(amount: float, scale) -> float: + """Invert a marginal amount schedule using the schedule brackets.""" + amount = max(float(amount), 0) + if amount == 0: + return 0.0 + + thresholds = np.asarray(scale.thresholds, dtype=float) + rates = np.asarray(scale.rates, dtype=float) + if thresholds.size == 0: + return 0.0 + + order = np.argsort(thresholds) + thresholds = thresholds[order] + rates = rates[order] + + accrued = 0.0 + for index, (lower, rate) in enumerate(zip(thresholds, rates)): + lower = float(lower) + rate = float(rate) + upper = ( + float(thresholds[index + 1]) if index + 1 < thresholds.size else math.inf + ) + + if amount <= accrued: + return lower + if rate <= 0: + continue + if math.isinf(upper): + return lower + (amount - accrued) / rate + + bracket_amount = (upper - lower) * rate + if amount <= accrued + bracket_amount: + return lower + (amount - accrued) / rate + accrued += bracket_amount + + return float(thresholds[-1]) diff --git a/policyengine_us_data/utils/loss.py b/policyengine_us_data/utils/loss.py index ce71696cc..aa9e83d9a 100644 --- a/policyengine_us_data/utils/loss.py +++ b/policyengine_us_data/utils/loss.py @@ -17,7 +17,7 @@ ) from policyengine_us_data.db.etl_irs_soi import get_national_geography_soi_target from policyengine_core.reforms import Reform -from policyengine_us_data.utils.soi import pe_to_soi, get_soi +from policyengine_us_data.utils.soi import pe_to_soi, get_soi, get_tracked_soi_row # National calibration targets consumed by build_loss_matrix(). # These values are specific to 2024 — they should NOT be applied to @@ -511,6 +511,88 @@ def _add_ctc_targets(loss_matrix, targets_list, sim, time_period): return targets_list, loss_matrix +def _get_refundable_aotc_target(time_period: int) -> dict: + """Return national refundable AOTC amount and count from IRS SOI Table 3.3.""" + + variable = "refundable_american_opportunity_credit" + amount_row = get_tracked_soi_row(variable, time_period, count=False) + count_row = get_tracked_soi_row(variable, time_period, count=True) + amount_year = int(amount_row["Year"]) + count_year = int(count_row["Year"]) + if amount_year != count_year: + raise ValueError( + f"AOTC count and amount source years differ: {count_year} vs {amount_year}" + ) + return { + "source_year": amount_year, + "amount": float(amount_row["Value"]), + "count": float(count_row["Value"]), + } + + +def _add_aotc_targets(loss_matrix, targets_list, sim, time_period): + """Add legacy national refundable AOTC amount and recipient-count targets.""" + + variable = "refundable_american_opportunity_credit" + target = _get_refundable_aotc_target(time_period) + label = f"nation/irs/{variable}" + loss_matrix[label] = sim.calculate( + variable, map_to="household", period=time_period + ).values + targets_list.append(target["amount"]) + + tax_unit_values = sim.calculate(variable, period=time_period).values + loss_matrix[f"{label}_count"] = sim.map_result( + (tax_unit_values > 0).astype(float), + "tax_unit", + "household", + ) + targets_list.append(target["count"]) + + return targets_list, loss_matrix + + +def _get_education_credit_target(time_period: int) -> dict: + """Return national nonrefundable education credit target from IRS SOI Table 3.3.""" + + variable = "education_tax_credits" + amount_row = get_tracked_soi_row(variable, time_period, count=False) + count_row = get_tracked_soi_row(variable, time_period, count=True) + amount_year = int(amount_row["Year"]) + count_year = int(count_row["Year"]) + if amount_year != count_year: + raise ValueError( + f"Education credit count and amount source years differ: {count_year} vs {amount_year}" + ) + return { + "source_year": amount_year, + "amount": float(amount_row["Value"]), + "count": float(count_row["Value"]), + } + + +def _add_education_credit_targets(loss_matrix, targets_list, sim, time_period): + """Add legacy national nonrefundable education credit amount and count targets.""" + + variable = "education_tax_credits" + target = _get_education_credit_target(time_period) + label = f"nation/irs/{variable}" + loss_matrix[label] = sim.calculate( + variable, map_to="household", period=time_period + ).values + targets_list.append(target["amount"]) + + tax_unit_values = sim.calculate(variable, period=time_period).values + loss_matrix[f"{label}_count"] = sim.map_result( + (tax_unit_values > 0).astype(float), + "tax_unit", + "household", + ) + targets_list.append(target["count"]) + + return targets_list, loss_matrix + + def build_loss_matrix(dataset: type, time_period): loss_matrix = pd.DataFrame() df = pe_to_soi(dataset, time_period) @@ -778,6 +860,19 @@ def build_loss_matrix(dataset: type, time_period): time_period, ) + targets_array, loss_matrix = _add_aotc_targets( + loss_matrix, + targets_array, + sim, + time_period, + ) + targets_array, loss_matrix = _add_education_credit_targets( + loss_matrix, + targets_array, + sim, + time_period, + ) + # Tax filer counts by AGI band (SOI Table 1.1). Calibrates total # filers (not just taxable returns), with granular bands sourced # from the latest SOI year <= calibration year to avoid hardcoding diff --git a/tests/unit/calibration/test_loss_targets.py b/tests/unit/calibration/test_loss_targets.py index a6d030ca8..457d605b9 100644 --- a/tests/unit/calibration/test_loss_targets.py +++ b/tests/unit/calibration/test_loss_targets.py @@ -4,6 +4,8 @@ from policyengine_us_data.utils.loss import ( _get_aca_national_targets, + _add_education_credit_targets, + _add_aotc_targets, _add_ctc_targets, _get_medicaid_national_targets, _load_aca_spending_and_enrollment_targets, @@ -72,6 +74,8 @@ def __init__(self): def calculate(self, variable, map_to=None, period=None): self.calculate_calls.append((variable, map_to, period)) values = { + "education_tax_credits": [500.0, 0.0, 300.0], + "refundable_american_opportunity_credit": [400.0, 0.0, 250.0], "refundable_ctc": [100.0, 0.0, 50.0], "non_refundable_ctc": [80.0, 10.0, 0.0], } @@ -126,5 +130,77 @@ def test_add_ctc_targets(monkeypatch): ) +def test_add_aotc_targets(monkeypatch): + def fake_get_tracked_soi_row(variable, requested_year, *, count, **kwargs): + assert variable == "refundable_american_opportunity_credit" + assert requested_year == 2024 + return pd.Series( + { + "Year": 2023, + "Value": 5_821_688.0 if count else 5_090_364_000.0, + "SOI table": "Table 3.3", + } + ) + + monkeypatch.setattr( + "policyengine_us_data.utils.loss.get_tracked_soi_row", + fake_get_tracked_soi_row, + ) + sim = _FakeSimulation() + + targets, loss_matrix = _add_aotc_targets( + pd.DataFrame(), + [], + sim, + 2024, + ) + + assert targets == [5_090_364_000.0, 5_821_688.0] + np.testing.assert_array_equal( + loss_matrix["nation/irs/refundable_american_opportunity_credit"], + np.array([400.0, 0.0, 250.0], dtype=np.float32), + ) + np.testing.assert_array_equal( + loss_matrix["nation/irs/refundable_american_opportunity_credit_count"], + np.array([1.0, 0.0, 1.0], dtype=np.float32), + ) + + +def test_add_education_credit_targets(monkeypatch): + def fake_get_tracked_soi_row(variable, requested_year, *, count, **kwargs): + assert variable == "education_tax_credits" + assert requested_year == 2024 + return pd.Series( + { + "Year": 2023, + "Value": 7_211_349.0 if count else 7_554_668_000.0, + "SOI table": "Table 3.3", + } + ) + + monkeypatch.setattr( + "policyengine_us_data.utils.loss.get_tracked_soi_row", + fake_get_tracked_soi_row, + ) + sim = _FakeSimulation() + + targets, loss_matrix = _add_education_credit_targets( + pd.DataFrame(), + [], + sim, + 2024, + ) + + assert targets == [7_554_668_000.0, 7_211_349.0] + np.testing.assert_array_equal( + loss_matrix["nation/irs/education_tax_credits"], + np.array([500.0, 0.0, 300.0], dtype=np.float32), + ) + np.testing.assert_array_equal( + loss_matrix["nation/irs/education_tax_credits_count"], + np.array([1.0, 0.0, 1.0], dtype=np.float32), + ) + + def test_tanf_hardcoded_target_uses_fy2024_basic_assistance_total(): assert HARD_CODED_TOTALS["tanf"] == pytest.approx(7_788_317_474.55) diff --git a/tests/unit/datasets/test_puf_tuition.py b/tests/unit/datasets/test_puf_tuition.py new file mode 100644 index 000000000..942048581 --- /dev/null +++ b/tests/unit/datasets/test_puf_tuition.py @@ -0,0 +1,78 @@ +import numpy as np +import pandas as pd + +from policyengine_us_data.datasets.puf import puf as puf_module +from policyengine_us_data.datasets.puf.puf import ( + _lifetime_learning_credit_student_from_puf, + _person_financial_value_from_puf_row, + _qualified_tuition_expenses_from_puf, + _with_lifetime_learning_credit_inputs, +) + + +def test_qualified_tuition_expenses_prefer_form_8863_llc_expenses(): + puf = pd.DataFrame( + { + "E03230": [1_000.0, 3_000.0, 0.0], + "E87530": [2_000.0, 1_500.0, 4_000.0], + } + ) + + result = _qualified_tuition_expenses_from_puf(puf) + + assert result.tolist() == [2_000.0, 3_000.0, 4_000.0] + + +def test_lifetime_learning_credit_student_uses_form_8863_when_available(): + puf = pd.DataFrame( + { + "E03230": [1_000.0, 0.0], + "E87530": [0.0, 2_000.0], + } + ) + + result = _lifetime_learning_credit_student_from_puf(puf) + + assert result.tolist() == [False, True] + + +def test_puf_arrays_add_lifetime_learning_credit_inputs(monkeypatch): + monkeypatch.setattr( + puf_module, + "has_policyengine_us_variables", + lambda *variables: True, + ) + arrays = {"qualified_tuition_expenses": np.array([0.0, 1_000.0])} + + result = _with_lifetime_learning_credit_inputs(arrays) + + for variable in puf_module.PUF_LLC_ELIGIBILITY_INPUTS: + np.testing.assert_array_equal(result[variable], np.array([False, True])) + + +def test_person_financial_value_keeps_llc_inputs_boolean(): + row = pd.Series( + { + "qualified_tuition_expenses": 1_000.0, + "attends_eligible_educational_institution_for_lifetime_learning_credit": True, + "employment_income": 100.0, + } + ) + + assert ( + _person_financial_value_from_puf_row( + "attends_eligible_educational_institution_for_lifetime_learning_credit", + row, + 0.25, + ) + is True + ) + assert ( + _person_financial_value_from_puf_row( + "attends_eligible_educational_institution_for_lifetime_learning_credit", + row, + 0, + ) + is False + ) + assert _person_financial_value_from_puf_row("employment_income", row, 0.25) == 25 diff --git a/tests/unit/datasets/test_uprate_puf.py b/tests/unit/datasets/test_uprate_puf.py index 38249e861..a4675fdf3 100644 --- a/tests/unit/datasets/test_uprate_puf.py +++ b/tests/unit/datasets/test_uprate_puf.py @@ -164,6 +164,37 @@ def test_chained_indexing_pattern_is_a_no_op_silent_under_cow(): assert puf["E00900"].equals(original["E00900"]) +def test_uprate_puf_scales_form_8863_llc_expenses(monkeypatch, tmp_path: Path): + with load_uprate_puf_module(tmp_path) as module: + monkeypatch.setattr(module, "SOI_TO_PUF_STRAIGHT_RENAMES", {}) + monkeypatch.setattr(module, "SOI_TO_PUF_POS_ONLY_RENAMES", {}) + monkeypatch.setattr(module, "SOI_TO_PUF_NEG_ONLY_RENAMES", {}) + monkeypatch.setattr( + module, + "REMAINING_VARIABLES", + ["E03230", "E87530", "OPTIONAL_MISSING_FIELD"], + ) + monkeypatch.setattr(module, "get_growth", lambda *args: 2.0) + monkeypatch.setattr( + module, + "get_soi_aggregate", + lambda variable, year, is_count: 10.0 if year == 2015 else 20.0, + ) + puf = pd.DataFrame( + { + "E03230": [1_000.0], + "E87530": [2_000.0], + "S006": [100.0], + } + ) + + result = module.uprate_puf(puf, 2015, 2021) + + assert result.loc[0, "E03230"] == pytest.approx(2_000.0) + assert result.loc[0, "E87530"] == pytest.approx(4_000.0) + assert result.loc[0, "S006"] == pytest.approx(200.0) + + def test_uprate_puf_pos_neg_split_module_helpers_intact(): """Verify the module's POS/NEG rename dicts still cover the SOI variables that trigger the chained-indexing path.""" diff --git a/tests/unit/test_etl_irs_soi_overlay.py b/tests/unit/test_etl_irs_soi_overlay.py index 230438aa1..64950f08b 100644 --- a/tests/unit/test_etl_irs_soi_overlay.py +++ b/tests/unit/test_etl_irs_soi_overlay.py @@ -191,6 +191,136 @@ def fake_get_tracked_soi_row(variable, requested_year, **kwargs): assert float(count_rows.iloc[0]["value"]) == 50.0 +def test_workbook_overlay_loads_refundable_aotc_target(monkeypatch, tmp_path): + db_uri, engine = _create_test_engine(tmp_path) + + monkeypatch.setattr( + "policyengine_us_data.db.etl_irs_soi.WORKBOOK_NATIONAL_DOMAIN_TARGETS", + { + "refundable_american_opportunity_credit": ( + "refundable_american_opportunity_credit" + ) + }, + ) + + def fake_get_tracked_soi_row(variable, requested_year, **kwargs): + count = kwargs["count"] + rows = { + ("adjusted_gross_income", False): { + "Year": 2023, + "Value": 1_000_000.0, + "SOI table": "Table 1.1", + }, + ("refundable_american_opportunity_credit", True): { + "Year": 2023, + "Value": 5_821_688.0, + "SOI table": "Table 3.3", + }, + ("refundable_american_opportunity_credit", False): { + "Year": 2023, + "Value": 5_090_364_000.0, + "SOI table": "Table 3.3", + }, + } + return pd.Series(rows[(variable, count)]) + + monkeypatch.setattr( + "policyengine_us_data.db.etl_irs_soi.get_tracked_soi_row", + fake_get_tracked_soi_row, + ) + + with Session(engine) as session: + national_filer_stratum = _create_national_filer_stratum(session) + load_national_workbook_soi_targets( + session, + national_filer_stratum.stratum_id, + 2024, + ) + session.commit() + + builder = UnifiedMatrixBuilder(db_uri=db_uri, time_period=2024) + rows = builder._query_targets( + { + "geo_level": "national", + "variables": [ + "tax_unit_count", + "refundable_american_opportunity_credit", + ], + "domain_variables": ["refundable_american_opportunity_credit"], + } + ) + + assert set(rows["variable"]) == { + "tax_unit_count", + "refundable_american_opportunity_credit", + } + assert set(rows["period"].astype(int)) == {2023} + assert set(rows["value"].astype(float)) == {5_821_688.0, 5_090_364_000.0} + + +def test_workbook_overlay_loads_education_tax_credit_target(monkeypatch, tmp_path): + db_uri, engine = _create_test_engine(tmp_path) + + monkeypatch.setattr( + "policyengine_us_data.db.etl_irs_soi.WORKBOOK_NATIONAL_DOMAIN_TARGETS", + {"education_tax_credits": "education_tax_credits"}, + ) + + def fake_get_tracked_soi_row(variable, requested_year, **kwargs): + count = kwargs["count"] + rows = { + ("adjusted_gross_income", False): { + "Year": 2023, + "Value": 1_000_000.0, + "SOI table": "Table 1.1", + }, + ("education_tax_credits", True): { + "Year": 2023, + "Value": 7_211_349.0, + "SOI table": "Table 3.3", + }, + ("education_tax_credits", False): { + "Year": 2023, + "Value": 7_554_668_000.0, + "SOI table": "Table 3.3", + }, + } + return pd.Series(rows[(variable, count)]) + + monkeypatch.setattr( + "policyengine_us_data.db.etl_irs_soi.get_tracked_soi_row", + fake_get_tracked_soi_row, + ) + + with Session(engine) as session: + national_filer_stratum = _create_national_filer_stratum(session) + load_national_workbook_soi_targets( + session, + national_filer_stratum.stratum_id, + 2024, + ) + session.commit() + + builder = UnifiedMatrixBuilder(db_uri=db_uri, time_period=2024) + rows = builder._query_targets( + { + "geo_level": "national", + "variables": [ + "tax_unit_count", + "education_tax_credits", + ], + "domain_variables": ["education_tax_credits"], + } + ) + + assert set(rows["variable"]) == { + "tax_unit_count", + "education_tax_credits", + } + assert set(rows["period"].astype(int)) == {2023} + assert set(rows["value"].astype(float)) == {7_211_349.0, 7_554_668_000.0} + + def test_skip_coarse_state_agi_person_count_target_only_for_state_stub_9(): assert _skip_coarse_state_agi_person_count_target("state", 9) is True assert _skip_coarse_state_agi_person_count_target("state", 8) is False diff --git a/tests/unit/test_extended_cps.py b/tests/unit/test_extended_cps.py index 2d4e2727a..5d85b8c02 100644 --- a/tests/unit/test_extended_cps.py +++ b/tests/unit/test_extended_cps.py @@ -15,6 +15,7 @@ IMPUTED_VARIABLES, OVERRIDDEN_IMPUTED_VARIABLES, ) +from policyengine_us_data.datasets.cps import extended_cps as extended_cps_module from policyengine_us_data.datasets.cps.extended_cps import ( CPS_CLONE_FEATURE_VARIABLES, CPS_ONLY_IMPUTED_VARIABLES, @@ -32,6 +33,10 @@ derive_treasury_tipped_occupation_code, ) from policyengine_us_data.datasets.org import ORG_IMPUTED_VARIABLES +from policyengine_us_data.utils.aotc import ( + get_american_opportunity_credit_amount_scale, + qualifying_expenses_from_american_opportunity_credit, +) class TestVariableListConsistency: @@ -158,6 +163,239 @@ def test_positive_mortgage_input_detects_positive_deductible_interest(self): assert ExtendedCPS._has_positive_mortgage_input(data, 2024) is True +class TestAOTCEligibilityInputImputation: + @pytest.fixture + def pe_us_supports_aotc_inputs(self, monkeypatch): + monkeypatch.setattr( + extended_cps_module, + "_supports_aotc_eligibility_inputs", + lambda: True, + ) + + def test_aotc_expense_fill_uses_policyengine_us_amount_scale(self): + amount_scale = get_american_opportunity_credit_amount_scale(2024) + max_credit = amount_scale.calc( + np.array([amount_scale.thresholds[-1]], dtype=float) + )[0] + + expenses = qualifying_expenses_from_american_opportunity_credit( + max_credit, + 2024, + ) + + np.testing.assert_allclose( + amount_scale.calc(np.array([expenses], dtype=float))[0], + max_credit, + ) + + def test_leaves_data_unchanged_without_positive_aotc_signal( + self, + pe_us_supports_aotc_inputs, + ): + data = { + "american_opportunity_credit": {2024: np.array([0.0])}, + "tax_unit_id": {2024: np.array([1])}, + "person_tax_unit_id": {2024: np.array([1])}, + "qualified_tuition_expenses": {2024: np.array([1_200.0])}, + } + + result = ExtendedCPS._impute_aotc_eligibility_inputs(data, 2024) + + assert "is_pursuing_credential_for_american_opportunity_credit" not in result + np.testing.assert_array_equal( + result["qualified_tuition_expenses"][2024], + np.array([1_200.0]), + ) + + def test_marks_tuition_members_in_positive_aotc_tax_units( + self, + pe_us_supports_aotc_inputs, + ): + target_credit = 1_000.0 + data = { + "american_opportunity_credit": {2024: np.array([target_credit, 0.0])}, + "tax_unit_id": {2024: np.array([1, 2])}, + "person_tax_unit_id": {2024: np.array([1, 1, 2])}, + "qualified_tuition_expenses": {2024: np.array([1_200.0, 0.0, 1_200.0])}, + "is_full_time_college_student": {2024: np.array([False, True, True])}, + } + + result = ExtendedCPS._impute_aotc_eligibility_inputs(data, 2024) + + expected = np.array([True, False, False]) + for variable in ( + "is_pursuing_credential_for_american_opportunity_credit", + "attends_eligible_educational_institution_for_american_opportunity_credit", + "is_enrolled_at_least_half_time_for_american_opportunity_credit", + "has_american_opportunity_credit_1098_t_or_exception", + "has_american_opportunity_credit_institution_ein", + ): + np.testing.assert_array_equal(result[variable][2024], expected) + for variable in ( + "has_completed_first_four_years_of_postsecondary_education", + "has_felony_drug_conviction", + ): + np.testing.assert_array_equal(result[variable][2024], np.zeros(3, bool)) + np.testing.assert_array_equal( + result["american_opportunity_credit_claimed_prior_years"][2024], + np.zeros(3, dtype=np.int8), + ) + np.testing.assert_array_equal( + result["qualified_tuition_expenses"][2024], + np.array( + [ + qualifying_expenses_from_american_opportunity_credit( + target_credit, + 2024, + ), + 0.0, + 1_200.0, + ] + ), + ) + + def test_fills_tuition_when_positive_aotc_unit_has_no_tuition( + self, + pe_us_supports_aotc_inputs, + ): + amount_scale = get_american_opportunity_credit_amount_scale(2024) + max_credit = amount_scale.calc( + np.array([amount_scale.thresholds[-1]], dtype=float) + )[0] + data = { + "american_opportunity_credit": {2024: np.array([max_credit])}, + "tax_unit_id": {2024: np.array([1])}, + "person_tax_unit_id": {2024: np.array([1, 1])}, + "qualified_tuition_expenses": {2024: np.array([0.0, 0.0])}, + "is_full_time_college_student": {2024: np.array([False, True])}, + } + + result = ExtendedCPS._impute_aotc_eligibility_inputs(data, 2024) + + expected = np.array([False, True]) + for variable in ( + "is_pursuing_credential_for_american_opportunity_credit", + "attends_eligible_educational_institution_for_american_opportunity_credit", + "is_enrolled_at_least_half_time_for_american_opportunity_credit", + "has_american_opportunity_credit_1098_t_or_exception", + "has_american_opportunity_credit_institution_ein", + ): + np.testing.assert_array_equal(result[variable][2024], expected) + expected_expenses = qualifying_expenses_from_american_opportunity_credit( + data["american_opportunity_credit"][2024][0], + 2024, + ) + np.testing.assert_array_equal( + result["qualified_tuition_expenses"][2024], + np.array([0.0, expected_expenses]), + ) + + def test_splits_multi_student_credit_across_multiple_candidates( + self, + pe_us_supports_aotc_inputs, + ): + amount_scale = get_american_opportunity_credit_amount_scale(2024) + max_credit = amount_scale.calc( + np.array([amount_scale.thresholds[-1]], dtype=float) + )[0] + data = { + "american_opportunity_credit": {2024: np.array([max_credit * 2])}, + "tax_unit_id": {2024: np.array([1])}, + "person_tax_unit_id": {2024: np.array([1, 1])}, + "qualified_tuition_expenses": {2024: np.array([0.0, 0.0])}, + "is_full_time_college_student": {2024: np.array([True, True])}, + } + + result = ExtendedCPS._impute_aotc_eligibility_inputs(data, 2024) + + expected = np.array([True, True]) + np.testing.assert_array_equal( + result["is_pursuing_credential_for_american_opportunity_credit"][2024], + expected, + ) + expected_expenses = qualifying_expenses_from_american_opportunity_credit( + max_credit, + 2024, + ) + np.testing.assert_array_equal( + result["qualified_tuition_expenses"][2024], + np.array([expected_expenses, expected_expenses]), + ) + + def test_uses_legacy_eligibility_input_when_pe_us_lacks_new_inputs( + self, + monkeypatch, + ): + monkeypatch.setattr( + extended_cps_module, + "_supports_aotc_eligibility_inputs", + lambda: False, + ) + data = { + "american_opportunity_credit": {2024: np.array([1_000.0])}, + "tax_unit_id": {2024: np.array([1])}, + "person_tax_unit_id": {2024: np.array([1])}, + "qualified_tuition_expenses": {2024: np.array([0.0])}, + } + + result = ExtendedCPS._impute_aotc_eligibility_inputs(data, 2024) + + np.testing.assert_array_equal( + result["is_eligible_for_american_opportunity_credit"][2024], + np.array([True]), + ) + assert "is_pursuing_credential_for_american_opportunity_credit" not in result + + +class TestLLCEligibilityInputImputation: + @pytest.fixture + def pe_us_supports_llc_inputs(self, monkeypatch): + monkeypatch.setattr( + extended_cps_module, + "_supports_llc_eligibility_inputs", + lambda: True, + ) + + def test_marks_non_aotc_tuition_people_as_llc_eligible( + self, + pe_us_supports_llc_inputs, + ): + data = { + "person_tax_unit_id": {2024: np.array([1, 1, 2])}, + "qualified_tuition_expenses": {2024: np.array([1_000.0, 2_000.0, 0.0])}, + "is_pursuing_credential_for_american_opportunity_credit": { + 2024: np.array([True, False, False]) + }, + } + + result = ExtendedCPS._impute_llc_eligibility_inputs(data, 2024) + + expected = np.array([False, True, False]) + for variable in ( + "attends_eligible_educational_institution_for_lifetime_learning_credit", + "has_lifetime_learning_credit_1098_t_or_exception", + ): + np.testing.assert_array_equal(result[variable][2024], expected) + + def test_leaves_data_unchanged_when_pe_us_lacks_llc_inputs(self, monkeypatch): + monkeypatch.setattr( + extended_cps_module, + "_supports_llc_eligibility_inputs", + lambda: False, + ) + data = { + "person_tax_unit_id": {2024: np.array([1])}, + "qualified_tuition_expenses": {2024: np.array([1_000.0])}, + } + + result = ExtendedCPS._impute_llc_eligibility_inputs(data, 2024) + + assert ( + "attends_eligible_educational_institution_for_lifetime_learning_credit" + not in result + ) + + class TestCloneChildcareDerivation: """Clone-half capped childcare should be derived deterministically.""" diff --git a/tests/unit/test_refresh_soi_table_targets.py b/tests/unit/test_refresh_soi_table_targets.py index c8057c289..5a86d06d7 100644 --- a/tests/unit/test_refresh_soi_table_targets.py +++ b/tests/unit/test_refresh_soi_table_targets.py @@ -101,6 +101,77 @@ def test_build_target_year_rows_reads_standard_table_cells(monkeypatch): assert refreshed["Value"].tolist() == [123_000.0, 789.0] +def test_build_target_year_rows_reads_table_3_3_education_credit_cells(monkeypatch): + module = load_module() + assert module.TABLE_FILE_SUFFIX["Table 3.3"] == "in33ar.xls" + + workbook = make_workbook(cols=45) + workbook.iat[9, module._column_index("I")] = 7_211_349.0 + workbook.iat[9, module._column_index("J")] = 7_554_668.0 + workbook.iat[9, module._column_index("AO")] = 5_821_688.0 + workbook.iat[9, module._column_index("AP")] = 5_090_364.0 + + targets = pd.DataFrame( + [ + make_target_row( + **{ + "SOI table": "Table 3.3", + "XLSX column": "I", + "XLSX row": 10, + "Year": 2023, + "Variable": "education_tax_credits", + "Count": True, + } + ), + make_target_row( + **{ + "SOI table": "Table 3.3", + "XLSX column": "J", + "XLSX row": 10, + "Year": 2023, + "Variable": "education_tax_credits", + "Count": False, + } + ), + make_target_row( + **{ + "SOI table": "Table 3.3", + "XLSX column": "AO", + "XLSX row": 10, + "Year": 2023, + "Variable": "refundable_american_opportunity_credit", + "Count": True, + } + ), + make_target_row( + **{ + "SOI table": "Table 3.3", + "XLSX column": "AP", + "XLSX row": 10, + "Year": 2023, + "Variable": "refundable_american_opportunity_credit", + "Count": False, + } + ), + ], + columns=TARGET_COLUMNS, + ) + + monkeypatch.setattr(module, "_load_workbook", lambda table, year: workbook) + + refreshed = module.build_target_year_rows( + targets, source_year=2023, target_year=2024 + ) + + assert refreshed["Year"].tolist() == [2024, 2024, 2024, 2024] + assert refreshed["Value"].tolist() == [ + 7_211_349.0, + 7_554_668_000.0, + 5_821_688.0, + 5_090_364_000.0, + ] + + def test_build_target_year_rows_uses_semantic_table_1_4_columns(monkeypatch): module = load_module() workbook = make_workbook(cols=80)