From 651956dbab1e66765ad658a9ddcaafcd8a8c0d3b Mon Sep 17 00:00:00 2001 From: David Trimmer Date: Fri, 6 Mar 2026 11:47:59 -0500 Subject: [PATCH] Add affordability rebates analysis notebooks Add microsimulation analysis of a proposed Affordability Rebates reform that provides $3,000 per adult and $3,000 per dependent via the Recovery Rebate Credit mechanism. Includes: - Main analysis notebook with cost estimates, benefit distribution by income bracket, and household composition breakdown - Data exploration notebook examining enhanced_cps_2024 dataset structure and demographics Co-Authored-By: Claude Opus 4.5 --- .../affordability_rebates_analysis.ipynb | 489 ++++++++++++++++++ us/federal/data_exploration.ipynb | 319 ++++++++++++ 2 files changed, 808 insertions(+) create mode 100644 us/federal/affordability_rebates_analysis.ipynb create mode 100644 us/federal/data_exploration.ipynb diff --git a/us/federal/affordability_rebates_analysis.ipynb b/us/federal/affordability_rebates_analysis.ipynb new file mode 100644 index 0000000..353335e --- /dev/null +++ b/us/federal/affordability_rebates_analysis.ipynb @@ -0,0 +1,489 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Affordability Rebates Analysis\n", + "\n", + "This notebook analyzes the impact of an \"Affordability Rebates\" reform on the enhanced_cps_2024 dataset.\n", + "\n", + "The reform provides $3,000 per adult and $3,000 per dependent through the Recovery Rebate Credit mechanism for 2026." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from policyengine_us import Microsimulation\n", + "from policyengine_core.reforms import Reform\n", + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "ENHANCED_CPS = \"hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. Define Reform" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Reform defined: Affordability Rebates\n", + " - $3,000 per adult\n", + " - $3,000 per dependent\n", + " - Phase-out thresholds: $75k (Single), $112.5k (HoH), $150k (Joint)\n" + ] + } + ], + "source": [ + "reform = Reform.from_dict({\n", + " \"gov.irs.credits.recovery_rebate_credit.arpa.max.adult\": {\n", + " \"2026-01-01.2026-12-31\": 3000,\n", + " \"2027-01-01.2035-12-31\": 0\n", + " },\n", + " \"gov.irs.credits.recovery_rebate_credit.arpa.max.dependent\": {\n", + " \"2026-01-01.2026-12-31\": 3000,\n", + " \"2027-01-01.2100-12-31\": 0\n", + " },\n", + " \"gov.irs.credits.recovery_rebate_credit.arpa.phase_out.threshold.JOINT\": {\n", + " \"2026-01-01.2026-12-31\": 150000,\n", + " \"2027-01-01.2035-12-31\": 0\n", + " },\n", + " \"gov.irs.credits.recovery_rebate_credit.arpa.phase_out.threshold.SINGLE\": {\n", + " \"2026-01-01.2026-12-31\": 75000,\n", + " \"2027-01-01.2035-12-31\": 0\n", + " },\n", + " \"gov.irs.credits.recovery_rebate_credit.arpa.phase_out.threshold.SEPARATE\": {\n", + " \"2026-01-01.2026-12-31\": 75000,\n", + " \"2027-01-01.2035-12-31\": 0\n", + " },\n", + " \"gov.irs.credits.recovery_rebate_credit.arpa.phase_out.threshold.SURVIVING_SPOUSE\": {\n", + " \"2026-01-01.2026-12-31\": 75000,\n", + " \"2027-01-01.2035-12-31\": 0\n", + " },\n", + " \"gov.irs.credits.recovery_rebate_credit.arpa.phase_out.threshold.HEAD_OF_HOUSEHOLD\": {\n", + " \"2026-01-01.2026-12-31\": 112500,\n", + " \"2027-01-01.2035-12-31\": 0\n", + " }\n", + "}, country_id=\"us\")\n", + "\n", + "print(\"Reform defined: Affordability Rebates\")\n", + "print(\" - $3,000 per adult\")\n", + "print(\" - $3,000 per dependent\")\n", + "print(\" - Phase-out thresholds: $75k (Single), $112.5k (HoH), $150k (Joint)\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. Reform Impact Analysis" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "# Create baseline and reformed simulations\n", + "baseline = Microsimulation(dataset=ENHANCED_CPS)\n", + "reformed = Microsimulation(reform=reform, dataset=ENHANCED_CPS)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Household Net Income Impact:\n", + " Baseline mean: $116,539\n", + " Reformed mean: $121,742\n", + " Average gain per household: $5,203\n" + ] + } + ], + "source": [ + "# Calculate household net income\n", + "baseline_income = baseline.calc(\"household_net_income\", period=2026, map_to=\"household\")\n", + "reformed_income = reformed.calc(\"household_net_income\", period=2026, map_to=\"household\")\n", + "difference_income = reformed_income - baseline_income\n", + "\n", + "print(\"Household Net Income Impact:\")\n", + "print(f\" Baseline mean: ${baseline_income.mean():,.0f}\")\n", + "print(f\" Reformed mean: ${reformed_income.mean():,.0f}\")\n", + "print(f\" Average gain per household: ${difference_income.mean():,.0f}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Total Program Cost:\n", + " $750,756,963,002\n", + " $750.76 billion\n" + ] + } + ], + "source": [ + "# Total cost of the program\n", + "total_cost = difference_income.sum()\n", + "print(f\"\\nTotal Program Cost:\")\n", + "print(f\" ${total_cost:,.0f}\")\n", + "print(f\" ${total_cost / 1e9:.2f} billion\")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Beneficiary Analysis:\n", + " Households receiving benefits: 118,223,733\n", + " Share of households benefiting: 81.9%\n" + ] + } + ], + "source": [ + "# Number of households receiving benefits\n", + "beneficiaries = (difference_income > 0)\n", + "beneficiary_count = beneficiaries.sum()\n", + "beneficiary_share = beneficiaries.mean()\n", + "\n", + "print(f\"\\nBeneficiary Analysis:\")\n", + "print(f\" Households receiving benefits: {beneficiary_count:,.0f}\")\n", + "print(f\" Share of households benefiting: {beneficiary_share:.1%}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Average benefit among beneficiaries: $6,350\n" + ] + } + ], + "source": [ + "# Average benefit among those receiving\n", + "difference_values = difference_income.values\n", + "weights_hh = baseline.calc(\"household_weight\", period=2026).values\n", + "\n", + "beneficiary_mask = difference_values > 0\n", + "avg_benefit_among_beneficiaries = np.average(\n", + " difference_values[beneficiary_mask], \n", + " weights=weights_hh[beneficiary_mask]\n", + ")\n", + "\n", + "print(f\"\\nAverage benefit among beneficiaries: ${avg_benefit_among_beneficiaries:,.0f}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "================================================================================\n", + "BENEFIT DISTRIBUTION BY INCOME BRACKET\n", + "================================================================================\n", + "Income Bracket Total Benefit Avg Benefit % Benefiting\n", + " $0-$10k $141.82B $5,042 98.3%\n", + " $10k-$25k $88.38B $5,884 98.9%\n", + " $25k-$50k $129.80B $5,362 94.1%\n", + " $50k-$75k $108.19B $6,195 97.1%\n", + " $75k-$100k $96.14B $6,404 80.8%\n", + " $100k-$150k $129.86B $7,150 83.0%\n", + " $150k-$200k $22.53B $2,231 29.4%\n", + " $200k+ $23.30B $1,570 30.2%\n", + "================================================================================\n" + ] + } + ], + "source": [ + "# Distribution of benefits by income bracket\n", + "agi_baseline = baseline.calc(\"adjusted_gross_income\", period=2026, map_to=\"household\").values\n", + "\n", + "income_brackets = [\n", + " (0, 10000, \"$0-$10k\"),\n", + " (10000, 25000, \"$10k-$25k\"),\n", + " (25000, 50000, \"$25k-$50k\"),\n", + " (50000, 75000, \"$50k-$75k\"),\n", + " (75000, 100000, \"$75k-$100k\"),\n", + " (100000, 150000, \"$100k-$150k\"),\n", + " (150000, 200000, \"$150k-$200k\"),\n", + " (200000, float('inf'), \"$200k+\")\n", + "]\n", + "\n", + "print(\"\\n\" + \"=\"*80)\n", + "print(\"BENEFIT DISTRIBUTION BY INCOME BRACKET\")\n", + "print(\"=\"*80)\n", + "\n", + "benefit_by_bracket = []\n", + "for lower, upper, label in income_brackets:\n", + " mask = (agi_baseline >= lower) & (agi_baseline < upper)\n", + " if mask.sum() > 0:\n", + " bracket_benefit = np.sum(difference_values[mask] * weights_hh[mask])\n", + " bracket_households = weights_hh[mask].sum()\n", + " avg_benefit = bracket_benefit / bracket_households if bracket_households > 0 else 0\n", + " benefiting_in_bracket = weights_hh[mask & beneficiary_mask].sum()\n", + " pct_benefiting = benefiting_in_bracket / bracket_households * 100 if bracket_households > 0 else 0\n", + " \n", + " benefit_by_bracket.append({\n", + " \"Income Bracket\": label,\n", + " \"Total Benefit\": f\"${bracket_benefit/1e9:.2f}B\",\n", + " \"Avg Benefit\": f\"${avg_benefit:,.0f}\",\n", + " \"% Benefiting\": f\"{pct_benefiting:.1f}%\"\n", + " })\n", + "\n", + "benefit_df = pd.DataFrame(benefit_by_bracket)\n", + "print(benefit_df.to_string(index=False))\n", + "print(\"=\"*80)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "ename": "ValueError", + "evalue": "Variable tax_unit_net_income does not exist.", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[1;32mIn[9], line 5\u001b[0m\n\u001b[0;32m 2\u001b[0m filing_status \u001b[38;5;241m=\u001b[39m baseline\u001b[38;5;241m.\u001b[39mcalc(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfiling_status\u001b[39m\u001b[38;5;124m\"\u001b[39m, period\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m2026\u001b[39m, map_to\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtax_unit\u001b[39m\u001b[38;5;124m\"\u001b[39m)\u001b[38;5;241m.\u001b[39mvalues\n\u001b[0;32m 3\u001b[0m tax_unit_weight \u001b[38;5;241m=\u001b[39m baseline\u001b[38;5;241m.\u001b[39mcalc(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtax_unit_weight\u001b[39m\u001b[38;5;124m\"\u001b[39m, period\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m2026\u001b[39m)\u001b[38;5;241m.\u001b[39mvalues\n\u001b[1;32m----> 5\u001b[0m baseline_tax_unit_income \u001b[38;5;241m=\u001b[39m \u001b[43mbaseline\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcalc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtax_unit_net_income\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mperiod\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m2026\u001b[39;49m\u001b[43m)\u001b[49m\u001b[38;5;241m.\u001b[39mvalues\n\u001b[0;32m 6\u001b[0m reformed_tax_unit_income \u001b[38;5;241m=\u001b[39m reformed\u001b[38;5;241m.\u001b[39mcalc(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtax_unit_net_income\u001b[39m\u001b[38;5;124m\"\u001b[39m, period\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m2026\u001b[39m)\u001b[38;5;241m.\u001b[39mvalues\n\u001b[0;32m 7\u001b[0m tax_unit_difference \u001b[38;5;241m=\u001b[39m reformed_tax_unit_income \u001b[38;5;241m-\u001b[39m baseline_tax_unit_income\n", + "File \u001b[1;32mc:\\Users\\dtsax\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\policyengine_core\\simulations\\microsimulation.py:54\u001b[0m, in \u001b[0;36mMicrosimulation.calculate\u001b[1;34m(self, variable_name, period, map_to, use_weights, decode_enums)\u001b[0m\n\u001b[0;32m 52\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m period \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdefault_calculation_period \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m 53\u001b[0m period \u001b[38;5;241m=\u001b[39m get_period(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdefault_calculation_period)\n\u001b[1;32m---> 54\u001b[0m values \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcalculate\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvariable_name\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mperiod\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmap_to\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdecode_enums\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 55\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m use_weights:\n\u001b[0;32m 56\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m values\n", + "File \u001b[1;32mc:\\Users\\dtsax\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\policyengine_core\\simulations\\simulation.py:491\u001b[0m, in \u001b[0;36mSimulation.calculate\u001b[1;34m(self, variable_name, period, map_to, decode_enums)\u001b[0m\n\u001b[0;32m 488\u001b[0m np\u001b[38;5;241m.\u001b[39mrandom\u001b[38;5;241m.\u001b[39mseed(\u001b[38;5;28mhash\u001b[39m(variable_name \u001b[38;5;241m+\u001b[39m \u001b[38;5;28mstr\u001b[39m(period)) \u001b[38;5;241m%\u001b[39m \u001b[38;5;241m1000000\u001b[39m)\n\u001b[0;32m 490\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m--> 491\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_calculate\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvariable_name\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mperiod\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 492\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(result, EnumArray) \u001b[38;5;129;01mand\u001b[39;00m decode_enums:\n\u001b[0;32m 493\u001b[0m result \u001b[38;5;241m=\u001b[39m result\u001b[38;5;241m.\u001b[39mdecode_to_str()\n", + "File \u001b[1;32mc:\\Users\\dtsax\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\policyengine_core\\simulations\\simulation.py:616\u001b[0m, in \u001b[0;36mSimulation._calculate\u001b[1;34m(self, variable_name, period)\u001b[0m\n\u001b[0;32m 605\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m 606\u001b[0m \u001b[38;5;124;03mCalculate the variable ``variable_name`` for the period ``period``, using the variable formula if it exists.\u001b[39;00m\n\u001b[0;32m 607\u001b[0m \n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 613\u001b[0m \u001b[38;5;124;03m ArrayLike: The calculated variable.\u001b[39;00m\n\u001b[0;32m 614\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m 615\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m variable_name \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtax_benefit_system\u001b[38;5;241m.\u001b[39mvariables:\n\u001b[1;32m--> 616\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mVariable \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mvariable_name\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m does not exist.\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 617\u001b[0m population \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mget_variable_population(variable_name)\n\u001b[0;32m 618\u001b[0m holder \u001b[38;5;241m=\u001b[39m population\u001b[38;5;241m.\u001b[39mget_holder(variable_name)\n", + "\u001b[1;31mValueError\u001b[0m: Variable tax_unit_net_income does not exist." + ] + } + ], + "source": [ + "# Distribution by filing status\n", + "filing_status = baseline.calc(\"filing_status\", period=2026, map_to=\"tax_unit\").values\n", + "tax_unit_weight = baseline.calc(\"tax_unit_weight\", period=2026).values\n", + "\n", + "baseline_tax_unit_income = baseline.calc(\"tax_unit_net_income\", period=2026).values\n", + "reformed_tax_unit_income = reformed.calc(\"tax_unit_net_income\", period=2026).values\n", + "tax_unit_difference = reformed_tax_unit_income - baseline_tax_unit_income\n", + "\n", + "filing_status_names = {\n", + " 1: \"SINGLE\",\n", + " 2: \"JOINT\",\n", + " 3: \"SEPARATE\",\n", + " 4: \"HEAD_OF_HOUSEHOLD\",\n", + " 5: \"SURVIVING_SPOUSE\"\n", + "}\n", + "\n", + "print(\"\\n\" + \"=\"*80)\n", + "print(\"BENEFIT DISTRIBUTION BY FILING STATUS\")\n", + "print(\"=\"*80)\n", + "\n", + "filing_data = []\n", + "for status_code, status_name in filing_status_names.items():\n", + " mask = filing_status == status_code\n", + " if mask.sum() > 0:\n", + " status_benefit = np.sum(tax_unit_difference[mask] * tax_unit_weight[mask])\n", + " status_count = tax_unit_weight[mask].sum()\n", + " avg_benefit = status_benefit / status_count if status_count > 0 else 0\n", + " benefiting = tax_unit_weight[mask & (tax_unit_difference > 0)].sum()\n", + " pct_benefiting = benefiting / status_count * 100 if status_count > 0 else 0\n", + " \n", + " filing_data.append({\n", + " \"Filing Status\": status_name,\n", + " \"Tax Units\": f\"{status_count:,.0f}\",\n", + " \"Total Benefit\": f\"${status_benefit/1e9:.2f}B\",\n", + " \"Avg Benefit\": f\"${avg_benefit:,.0f}\",\n", + " \"% Benefiting\": f\"{pct_benefiting:.1f}%\"\n", + " })\n", + "\n", + "filing_df = pd.DataFrame(filing_data)\n", + "print(filing_df.to_string(index=False))\n", + "print(\"=\"*80)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Impact on households with/without children\n", + "is_child = baseline.calc(\"is_child\", period=2026, map_to=\"person\")\n", + "household_id = baseline.calc(\"household_id\", period=2026, map_to=\"person\")\n", + "household_weight_person = baseline.calc(\"household_weight\", period=2026, map_to=\"person\")\n", + "\n", + "df_households = pd.DataFrame({\n", + " 'household_id': household_id.values,\n", + " 'is_child': is_child.values,\n", + " 'household_weight': household_weight_person.values\n", + "})\n", + "\n", + "children_per_household = df_households.groupby('household_id').agg({\n", + " 'is_child': 'sum',\n", + " 'household_weight': 'first'\n", + "}).reset_index()\n", + "\n", + "has_children = children_per_household.set_index('household_id')['is_child'] > 0\n", + "household_ids = baseline.calc(\"household_id\", period=2026, map_to=\"household\").values\n", + "\n", + "# Map has_children to households\n", + "has_children_map = has_children.reindex(household_ids).fillna(False).values\n", + "\n", + "# Households with children\n", + "with_children_mask = has_children_map\n", + "without_children_mask = ~has_children_map\n", + "\n", + "benefit_with_children = np.sum(difference_values[with_children_mask] * weights_hh[with_children_mask])\n", + "benefit_without_children = np.sum(difference_values[without_children_mask] * weights_hh[without_children_mask])\n", + "\n", + "count_with_children = weights_hh[with_children_mask].sum()\n", + "count_without_children = weights_hh[without_children_mask].sum()\n", + "\n", + "avg_with_children = benefit_with_children / count_with_children if count_with_children > 0 else 0\n", + "avg_without_children = benefit_without_children / count_without_children if count_without_children > 0 else 0\n", + "\n", + "print(\"\\n\" + \"=\"*80)\n", + "print(\"IMPACT BY HOUSEHOLD COMPOSITION\")\n", + "print(\"=\"*80)\n", + "print(f\"Households WITH children:\")\n", + "print(f\" Count: {count_with_children:,.0f}\")\n", + "print(f\" Total benefit: ${benefit_with_children/1e9:.2f}B\")\n", + "print(f\" Average benefit: ${avg_with_children:,.0f}\")\n", + "print(f\"\\nHouseholds WITHOUT children:\")\n", + "print(f\" Count: {count_without_children:,.0f}\")\n", + "print(f\" Total benefit: ${benefit_without_children/1e9:.2f}B\")\n", + "print(f\" Average benefit: ${avg_without_children:,.0f}\")\n", + "print(\"=\"*80)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3. Summary" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Create summary table\n", + "summary_data = {\n", + " 'Metric': [\n", + " 'Total Program Cost',\n", + " 'Households Benefiting',\n", + " 'Share of Households Benefiting',\n", + " 'Average Benefit (all households)',\n", + " 'Average Benefit (beneficiaries only)',\n", + " 'Benefit to HH with Children',\n", + " 'Benefit to HH without Children',\n", + " 'Avg Benefit - HH with Children',\n", + " 'Avg Benefit - HH without Children'\n", + " ],\n", + " 'Value': [\n", + " f\"${total_cost/1e9:.2f}B\",\n", + " f\"{beneficiary_count:,.0f}\",\n", + " f\"{beneficiary_share:.1%}\",\n", + " f\"${difference_income.mean():,.0f}\",\n", + " f\"${avg_benefit_among_beneficiaries:,.0f}\",\n", + " f\"${benefit_with_children/1e9:.2f}B\",\n", + " f\"${benefit_without_children/1e9:.2f}B\",\n", + " f\"${avg_with_children:,.0f}\",\n", + " f\"${avg_without_children:,.0f}\"\n", + " ]\n", + "}\n", + "\n", + "summary_df = pd.DataFrame(summary_data)\n", + "\n", + "print(\"\\n\" + \"=\"*70)\n", + "print(\"AFFORDABILITY REBATES - SUMMARY\")\n", + "print(\"=\"*70)\n", + "print(summary_df.to_string(index=False))\n", + "print(\"=\"*70)\n", + "\n", + "# Save summary\n", + "summary_df.to_csv('affordability_rebates_summary.csv', index=False)\n", + "print(\"\\nSummary saved to: affordability_rebates_summary.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Save benefit by income bracket\n", + "benefit_df.to_csv('affordability_rebates_by_income.csv', index=False)\n", + "print(\"Income bracket analysis saved to: affordability_rebates_by_income.csv\")\n", + "\n", + "# Save benefit by filing status\n", + "filing_df.to_csv('affordability_rebates_by_filing_status.csv', index=False)\n", + "print(\"Filing status analysis saved to: affordability_rebates_by_filing_status.csv\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.5" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/us/federal/data_exploration.ipynb b/us/federal/data_exploration.ipynb new file mode 100644 index 0000000..6f7fbcc --- /dev/null +++ b/us/federal/data_exploration.ipynb @@ -0,0 +1,319 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Enhanced CPS 2024 Data Exploration\n", + "\n", + "This notebook explores the enhanced_cps_2024 dataset to understand its structure, demographics, and income distribution." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from policyengine_us import Microsimulation\n", + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "ENHANCED_CPS = \"hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. Dataset Overview" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "# Load enhanced_cps_2024 dataset\n", + "sim = Microsimulation(dataset=ENHANCED_CPS)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of households in dataset: 15,929\n", + "Household count (weighted): 144,287,680\n", + "Person count (weighted): 339,597,307\n" + ] + } + ], + "source": [ + "# Check dataset size\n", + "household_weight = sim.calc(\"household_weight\", period=2026)\n", + "household_count = sim.calc(\"household_count\", period=2026, map_to=\"household\")\n", + "person_count = sim.calc(\"person_count\", period=2026, map_to=\"household\")\n", + "\n", + "print(f\"Number of households in dataset: {len(household_weight):,}\")\n", + "print(f\"Household count (weighted): {household_count.sum():,.0f}\")\n", + "print(f\"Person count (weighted): {person_count.sum():,.0f}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. Income Distribution" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Income distribution:\n", + " Median AGI: $54,423\n", + " 75th percentile: $117,905\n", + " 90th percentile: $202,534\n", + " 95th percentile: $324,016\n", + " Max AGI: $349,003,552\n" + ] + } + ], + "source": [ + "# Check household income distribution\n", + "agi = sim.calc(\"adjusted_gross_income\", period=2026, map_to=\"household\")\n", + "print(f\"Income distribution:\")\n", + "print(f\" Median AGI: ${agi.median():,.0f}\")\n", + "print(f\" 75th percentile: ${agi.quantile(0.75):,.0f}\")\n", + "print(f\" 90th percentile: ${agi.quantile(0.90):,.0f}\")\n", + "print(f\" 95th percentile: ${agi.quantile(0.95):,.0f}\")\n", + "print(f\" Max AGI: ${agi.max():,.0f}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Employment Income Analysis:\n", + " Total employment income (weighted): $10,943,358,507,618\n", + " Total employment income before LSR (weighted): $10,943,358,507,618\n", + " Mean employment income: $32,225\n", + " Mean employment income before LSR: $32,225\n", + " Median employment income: $0\n", + " Median employment income before LSR: $0\n" + ] + } + ], + "source": [ + "# Employment income analysis\n", + "employment_income = sim.calc(\"employment_income\", period=2026, map_to=\"person\")\n", + "employment_income_before_lsr = sim.calc(\"employment_income_before_lsr\", period=2026, map_to=\"person\")\n", + "\n", + "print(f\"\\nEmployment Income Analysis:\")\n", + "print(f\" Total employment income (weighted): ${employment_income.sum():,.0f}\")\n", + "print(f\" Total employment income before LSR (weighted): ${employment_income_before_lsr.sum():,.0f}\")\n", + "print(f\" Mean employment income: ${employment_income.mean():,.0f}\")\n", + "print(f\" Mean employment income before LSR: ${employment_income_before_lsr.mean():,.0f}\")\n", + "print(f\" Median employment income: ${employment_income.median():,.0f}\")\n", + "print(f\" Median employment income before LSR: ${employment_income_before_lsr.median():,.0f}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "======================================================================\n", + "HOUSEHOLD COUNTS BY INCOME BRACKET\n", + "======================================================================\n", + "Income Bracket Households % of All Households\n", + " $0-$10k 28,130,220 19.50%\n", + " $10k-$25k 15,020,060 10.41%\n", + " $25k-$50k 24,209,866 16.78%\n", + " $50k-$75k 17,464,564 12.10%\n", + " $75k-$100k 15,011,586 10.40%\n", + " $100k-$150k 18,161,620 12.59%\n", + " $150k-$200k 10,102,187 7.00%\n", + " $200k+ 14,839,484 10.28%\n", + "======================================================================\n" + ] + } + ], + "source": [ + "# Household counts by income brackets\n", + "agi_hh = np.array(sim.calc(\"adjusted_gross_income\", period=2026, map_to=\"household\").values)\n", + "weights = np.array(sim.calc(\"household_weight\", period=2026).values)\n", + "total_households = weights.sum()\n", + "\n", + "income_brackets = [\n", + " (0, 10000, \"$0-$10k\"),\n", + " (10000, 25000, \"$10k-$25k\"),\n", + " (25000, 50000, \"$25k-$50k\"),\n", + " (50000, 75000, \"$50k-$75k\"),\n", + " (75000, 100000, \"$75k-$100k\"),\n", + " (100000, 150000, \"$100k-$150k\"),\n", + " (150000, 200000, \"$150k-$200k\"),\n", + " (200000, float('inf'), \"$200k+\")\n", + "]\n", + "\n", + "bracket_data = []\n", + "for lower, upper, label in income_brackets:\n", + " mask = (agi_hh >= lower) & (agi_hh < upper)\n", + " count = weights[mask].sum()\n", + " pct_of_total = (count / total_households) * 100\n", + " \n", + " bracket_data.append({\n", + " \"Income Bracket\": label,\n", + " \"Households\": f\"{count:,.0f}\",\n", + " \"% of All Households\": f\"{pct_of_total:.2f}%\"\n", + " })\n", + "\n", + "income_df = pd.DataFrame(bracket_data)\n", + "\n", + "print(\"\\n\" + \"=\"*70)\n", + "print(\"HOUSEHOLD COUNTS BY INCOME BRACKET\")\n", + "print(\"=\"*70)\n", + "print(income_df.to_string(index=False))\n", + "print(\"=\"*70)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3. Household Composition" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Households with children (weighted):\n", + " Total households with children: 43,798,516\n", + " Households with 1 child: 24,479,972\n", + " Households with 2 children: 12,298,578\n", + " Households with 3+ children: 7,019,965\n" + ] + } + ], + "source": [ + "# Check households with children\n", + "is_child = sim.calc(\"is_child\", period=2026, map_to=\"person\")\n", + "household_id = sim.calc(\"household_id\", period=2026, map_to=\"person\")\n", + "person_weight = sim.calc(\"person_weight\", period=2026, map_to=\"person\")\n", + "household_weight_person = sim.calc(\"household_weight\", period=2026, map_to=\"person\")\n", + "\n", + "# Create DataFrame\n", + "df_households = pd.DataFrame({\n", + " 'household_id': household_id.values,\n", + " 'is_child': is_child.values,\n", + " 'household_weight': household_weight_person.values\n", + "})\n", + "\n", + "# Count children per household\n", + "children_per_household = df_households.groupby('household_id').agg({\n", + " 'is_child': 'sum',\n", + " 'household_weight': 'first'\n", + "}).reset_index()\n", + "\n", + "# Calculate weighted household counts\n", + "total_households_with_children = children_per_household[children_per_household['is_child'] > 0]['household_weight'].sum()\n", + "households_with_1_child = children_per_household[children_per_household['is_child'] == 1]['household_weight'].sum()\n", + "households_with_2_children = children_per_household[children_per_household['is_child'] == 2]['household_weight'].sum()\n", + "households_with_3plus_children = children_per_household[children_per_household['is_child'] >= 3]['household_weight'].sum()\n", + "\n", + "print(f\"\\nHouseholds with children (weighted):\")\n", + "print(f\" Total households with children: {total_households_with_children:,.0f}\")\n", + "print(f\" Households with 1 child: {households_with_1_child:,.0f}\")\n", + "print(f\" Households with 2 children: {households_with_2_children:,.0f}\")\n", + "print(f\" Households with 3+ children: {households_with_3plus_children:,.0f}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Children by age:\n", + " Total children under 18: 73,113,736\n", + " Children under 6: 22,428,570\n", + " Children under 3: 11,172,289\n" + ] + } + ], + "source": [ + "# Check children by age groups\n", + "age = sim.calc(\"age\", period=2026, map_to=\"person\")\n", + "\n", + "df_age = pd.DataFrame({\n", + " \"age\": age.values,\n", + " \"person_weight\": person_weight.values\n", + "})\n", + "\n", + "# Calculate weighted totals\n", + "total_children = df_age[df_age['age'] < 18]['person_weight'].sum()\n", + "children_under_6 = df_age[df_age['age'] < 6]['person_weight'].sum()\n", + "children_under_3 = df_age[df_age['age'] < 3]['person_weight'].sum()\n", + "\n", + "print(f\"\\nChildren by age:\")\n", + "print(f\" Total children under 18: {total_children:,.0f}\")\n", + "print(f\" Children under 6: {children_under_6:,.0f}\")\n", + "print(f\" Children under 3: {children_under_3:,.0f}\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.5" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}