From 651956dbab1e66765ad658a9ddcaafcd8a8c0d3b Mon Sep 17 00:00:00 2001
From: David Trimmer <david@policyengine.org>
Date: Fri, 6 Mar 2026 11:47:59 -0500
Subject: [PATCH] Add affordability rebates analysis notebooks

Add microsimulation analysis of a proposed Affordability Rebates reform that provides $3,000 per adult and $3,000 per dependent via the Recovery Rebate Credit mechanism. Includes:
- Main analysis notebook with cost estimates, benefit distribution by income bracket, and household composition breakdown
- Data exploration notebook examining enhanced_cps_2024 dataset structure and demographics

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 .../affordability_rebates_analysis.ipynb      | 489 ++++++++++++++++++
 us/federal/data_exploration.ipynb             | 319 ++++++++++++
 2 files changed, 808 insertions(+)
 create mode 100644 us/federal/affordability_rebates_analysis.ipynb
 create mode 100644 us/federal/data_exploration.ipynb

diff --git a/us/federal/affordability_rebates_analysis.ipynb b/us/federal/affordability_rebates_analysis.ipynb
new file mode 100644
index 0000000..353335e
--- /dev/null
+++ b/us/federal/affordability_rebates_analysis.ipynb
@@ -0,0 +1,489 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Affordability Rebates Analysis\n",
+    "\n",
+    "This notebook analyzes the impact of an \"Affordability Rebates\" reform on the enhanced_cps_2024 dataset.\n",
+    "\n",
+    "The reform provides $3,000 per adult and $3,000 per dependent through the Recovery Rebate Credit mechanism for 2026."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from policyengine_us import Microsimulation\n",
+    "from policyengine_core.reforms import Reform\n",
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "\n",
+    "ENHANCED_CPS = \"hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 1. Define Reform"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Reform defined: Affordability Rebates\n",
+      "  - $3,000 per adult\n",
+      "  - $3,000 per dependent\n",
+      "  - Phase-out thresholds: $75k (Single), $112.5k (HoH), $150k (Joint)\n"
+     ]
+    }
+   ],
+   "source": [
+    "reform = Reform.from_dict({\n",
+    "    \"gov.irs.credits.recovery_rebate_credit.arpa.max.adult\": {\n",
+    "        \"2026-01-01.2026-12-31\": 3000,\n",
+    "        \"2027-01-01.2035-12-31\": 0\n",
+    "    },\n",
+    "    \"gov.irs.credits.recovery_rebate_credit.arpa.max.dependent\": {\n",
+    "        \"2026-01-01.2026-12-31\": 3000,\n",
+    "        \"2027-01-01.2100-12-31\": 0\n",
+    "    },\n",
+    "    \"gov.irs.credits.recovery_rebate_credit.arpa.phase_out.threshold.JOINT\": {\n",
+    "        \"2026-01-01.2026-12-31\": 150000,\n",
+    "        \"2027-01-01.2035-12-31\": 0\n",
+    "    },\n",
+    "    \"gov.irs.credits.recovery_rebate_credit.arpa.phase_out.threshold.SINGLE\": {\n",
+    "        \"2026-01-01.2026-12-31\": 75000,\n",
+    "        \"2027-01-01.2035-12-31\": 0\n",
+    "    },\n",
+    "    \"gov.irs.credits.recovery_rebate_credit.arpa.phase_out.threshold.SEPARATE\": {\n",
+    "        \"2026-01-01.2026-12-31\": 75000,\n",
+    "        \"2027-01-01.2035-12-31\": 0\n",
+    "    },\n",
+    "    \"gov.irs.credits.recovery_rebate_credit.arpa.phase_out.threshold.SURVIVING_SPOUSE\": {\n",
+    "        \"2026-01-01.2026-12-31\": 75000,\n",
+    "        \"2027-01-01.2035-12-31\": 0\n",
+    "    },\n",
+    "    \"gov.irs.credits.recovery_rebate_credit.arpa.phase_out.threshold.HEAD_OF_HOUSEHOLD\": {\n",
+    "        \"2026-01-01.2026-12-31\": 112500,\n",
+    "        \"2027-01-01.2035-12-31\": 0\n",
+    "    }\n",
+    "}, country_id=\"us\")\n",
+    "\n",
+    "print(\"Reform defined: Affordability Rebates\")\n",
+    "print(\"  - $3,000 per adult\")\n",
+    "print(\"  - $3,000 per dependent\")\n",
+    "print(\"  - Phase-out thresholds: $75k (Single), $112.5k (HoH), $150k (Joint)\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 2. Reform Impact Analysis"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Create baseline and reformed simulations\n",
+    "baseline = Microsimulation(dataset=ENHANCED_CPS)\n",
+    "reformed = Microsimulation(reform=reform, dataset=ENHANCED_CPS)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Household Net Income Impact:\n",
+      "  Baseline mean: $116,539\n",
+      "  Reformed mean: $121,742\n",
+      "  Average gain per household: $5,203\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Calculate household net income\n",
+    "baseline_income = baseline.calc(\"household_net_income\", period=2026, map_to=\"household\")\n",
+    "reformed_income = reformed.calc(\"household_net_income\", period=2026, map_to=\"household\")\n",
+    "difference_income = reformed_income - baseline_income\n",
+    "\n",
+    "print(\"Household Net Income Impact:\")\n",
+    "print(f\"  Baseline mean: ${baseline_income.mean():,.0f}\")\n",
+    "print(f\"  Reformed mean: ${reformed_income.mean():,.0f}\")\n",
+    "print(f\"  Average gain per household: ${difference_income.mean():,.0f}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Total Program Cost:\n",
+      "  $750,756,963,002\n",
+      "  $750.76 billion\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Total cost of the program\n",
+    "total_cost = difference_income.sum()\n",
+    "print(f\"\\nTotal Program Cost:\")\n",
+    "print(f\"  ${total_cost:,.0f}\")\n",
+    "print(f\"  ${total_cost / 1e9:.2f} billion\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Beneficiary Analysis:\n",
+      "  Households receiving benefits: 118,223,733\n",
+      "  Share of households benefiting: 81.9%\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Number of households receiving benefits\n",
+    "beneficiaries = (difference_income > 0)\n",
+    "beneficiary_count = beneficiaries.sum()\n",
+    "beneficiary_share = beneficiaries.mean()\n",
+    "\n",
+    "print(f\"\\nBeneficiary Analysis:\")\n",
+    "print(f\"  Households receiving benefits: {beneficiary_count:,.0f}\")\n",
+    "print(f\"  Share of households benefiting: {beneficiary_share:.1%}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Average benefit among beneficiaries: $6,350\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Average benefit among those receiving\n",
+    "difference_values = difference_income.values\n",
+    "weights_hh = baseline.calc(\"household_weight\", period=2026).values\n",
+    "\n",
+    "beneficiary_mask = difference_values > 0\n",
+    "avg_benefit_among_beneficiaries = np.average(\n",
+    "    difference_values[beneficiary_mask], \n",
+    "    weights=weights_hh[beneficiary_mask]\n",
+    ")\n",
+    "\n",
+    "print(f\"\\nAverage benefit among beneficiaries: ${avg_benefit_among_beneficiaries:,.0f}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "================================================================================\n",
+      "BENEFIT DISTRIBUTION BY INCOME BRACKET\n",
+      "================================================================================\n",
+      "Income Bracket Total Benefit Avg Benefit % Benefiting\n",
+      "       $0-$10k      $141.82B      $5,042        98.3%\n",
+      "     $10k-$25k       $88.38B      $5,884        98.9%\n",
+      "     $25k-$50k      $129.80B      $5,362        94.1%\n",
+      "     $50k-$75k      $108.19B      $6,195        97.1%\n",
+      "    $75k-$100k       $96.14B      $6,404        80.8%\n",
+      "   $100k-$150k      $129.86B      $7,150        83.0%\n",
+      "   $150k-$200k       $22.53B      $2,231        29.4%\n",
+      "        $200k+       $23.30B      $1,570        30.2%\n",
+      "================================================================================\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Distribution of benefits by income bracket\n",
+    "agi_baseline = baseline.calc(\"adjusted_gross_income\", period=2026, map_to=\"household\").values\n",
+    "\n",
+    "income_brackets = [\n",
+    "    (0, 10000, \"$0-$10k\"),\n",
+    "    (10000, 25000, \"$10k-$25k\"),\n",
+    "    (25000, 50000, \"$25k-$50k\"),\n",
+    "    (50000, 75000, \"$50k-$75k\"),\n",
+    "    (75000, 100000, \"$75k-$100k\"),\n",
+    "    (100000, 150000, \"$100k-$150k\"),\n",
+    "    (150000, 200000, \"$150k-$200k\"),\n",
+    "    (200000, float('inf'), \"$200k+\")\n",
+    "]\n",
+    "\n",
+    "print(\"\\n\" + \"=\"*80)\n",
+    "print(\"BENEFIT DISTRIBUTION BY INCOME BRACKET\")\n",
+    "print(\"=\"*80)\n",
+    "\n",
+    "benefit_by_bracket = []\n",
+    "for lower, upper, label in income_brackets:\n",
+    "    mask = (agi_baseline >= lower) & (agi_baseline < upper)\n",
+    "    if mask.sum() > 0:\n",
+    "        bracket_benefit = np.sum(difference_values[mask] * weights_hh[mask])\n",
+    "        bracket_households = weights_hh[mask].sum()\n",
+    "        avg_benefit = bracket_benefit / bracket_households if bracket_households > 0 else 0\n",
+    "        benefiting_in_bracket = weights_hh[mask & beneficiary_mask].sum()\n",
+    "        pct_benefiting = benefiting_in_bracket / bracket_households * 100 if bracket_households > 0 else 0\n",
+    "        \n",
+    "        benefit_by_bracket.append({\n",
+    "            \"Income Bracket\": label,\n",
+    "            \"Total Benefit\": f\"${bracket_benefit/1e9:.2f}B\",\n",
+    "            \"Avg Benefit\": f\"${avg_benefit:,.0f}\",\n",
+    "            \"% Benefiting\": f\"{pct_benefiting:.1f}%\"\n",
+    "        })\n",
+    "\n",
+    "benefit_df = pd.DataFrame(benefit_by_bracket)\n",
+    "print(benefit_df.to_string(index=False))\n",
+    "print(\"=\"*80)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "ValueError",
+     "evalue": "Variable tax_unit_net_income does not exist.",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[1;31mValueError\u001b[0m                                Traceback (most recent call last)",
+      "Cell \u001b[1;32mIn[9], line 5\u001b[0m\n\u001b[0;32m      2\u001b[0m filing_status \u001b[38;5;241m=\u001b[39m baseline\u001b[38;5;241m.\u001b[39mcalc(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfiling_status\u001b[39m\u001b[38;5;124m\"\u001b[39m, period\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m2026\u001b[39m, map_to\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtax_unit\u001b[39m\u001b[38;5;124m\"\u001b[39m)\u001b[38;5;241m.\u001b[39mvalues\n\u001b[0;32m      3\u001b[0m tax_unit_weight \u001b[38;5;241m=\u001b[39m baseline\u001b[38;5;241m.\u001b[39mcalc(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtax_unit_weight\u001b[39m\u001b[38;5;124m\"\u001b[39m, period\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m2026\u001b[39m)\u001b[38;5;241m.\u001b[39mvalues\n\u001b[1;32m----> 5\u001b[0m baseline_tax_unit_income \u001b[38;5;241m=\u001b[39m \u001b[43mbaseline\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcalc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtax_unit_net_income\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mperiod\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m2026\u001b[39;49m\u001b[43m)\u001b[49m\u001b[38;5;241m.\u001b[39mvalues\n\u001b[0;32m      6\u001b[0m reformed_tax_unit_income \u001b[38;5;241m=\u001b[39m reformed\u001b[38;5;241m.\u001b[39mcalc(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtax_unit_net_income\u001b[39m\u001b[38;5;124m\"\u001b[39m, period\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m2026\u001b[39m)\u001b[38;5;241m.\u001b[39mvalues\n\u001b[0;32m      7\u001b[0m tax_unit_difference \u001b[38;5;241m=\u001b[39m reformed_tax_unit_income \u001b[38;5;241m-\u001b[39m baseline_tax_unit_income\n",
+      "File \u001b[1;32mc:\\Users\\dtsax\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\policyengine_core\\simulations\\microsimulation.py:54\u001b[0m, in \u001b[0;36mMicrosimulation.calculate\u001b[1;34m(self, variable_name, period, map_to, use_weights, decode_enums)\u001b[0m\n\u001b[0;32m     52\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m period \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdefault_calculation_period \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m     53\u001b[0m     period \u001b[38;5;241m=\u001b[39m get_period(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdefault_calculation_period)\n\u001b[1;32m---> 54\u001b[0m values \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcalculate\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvariable_name\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mperiod\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmap_to\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdecode_enums\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m     55\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m use_weights:\n\u001b[0;32m     56\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m values\n",
+      "File \u001b[1;32mc:\\Users\\dtsax\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\policyengine_core\\simulations\\simulation.py:491\u001b[0m, in \u001b[0;36mSimulation.calculate\u001b[1;34m(self, variable_name, period, map_to, decode_enums)\u001b[0m\n\u001b[0;32m    488\u001b[0m np\u001b[38;5;241m.\u001b[39mrandom\u001b[38;5;241m.\u001b[39mseed(\u001b[38;5;28mhash\u001b[39m(variable_name \u001b[38;5;241m+\u001b[39m \u001b[38;5;28mstr\u001b[39m(period)) \u001b[38;5;241m%\u001b[39m \u001b[38;5;241m1000000\u001b[39m)\n\u001b[0;32m    490\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m--> 491\u001b[0m     result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_calculate\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvariable_name\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mperiod\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m    492\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(result, EnumArray) \u001b[38;5;129;01mand\u001b[39;00m decode_enums:\n\u001b[0;32m    493\u001b[0m         result \u001b[38;5;241m=\u001b[39m result\u001b[38;5;241m.\u001b[39mdecode_to_str()\n",
+      "File \u001b[1;32mc:\\Users\\dtsax\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\policyengine_core\\simulations\\simulation.py:616\u001b[0m, in \u001b[0;36mSimulation._calculate\u001b[1;34m(self, variable_name, period)\u001b[0m\n\u001b[0;32m    605\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m    606\u001b[0m \u001b[38;5;124;03mCalculate the variable ``variable_name`` for the period ``period``, using the variable formula if it exists.\u001b[39;00m\n\u001b[0;32m    607\u001b[0m \n\u001b[1;32m   (...)\u001b[0m\n\u001b[0;32m    613\u001b[0m \u001b[38;5;124;03m    ArrayLike: The calculated variable.\u001b[39;00m\n\u001b[0;32m    614\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m    615\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m variable_name \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtax_benefit_system\u001b[38;5;241m.\u001b[39mvariables:\n\u001b[1;32m--> 616\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mVariable \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mvariable_name\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m does not exist.\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m    617\u001b[0m population \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mget_variable_population(variable_name)\n\u001b[0;32m    618\u001b[0m holder \u001b[38;5;241m=\u001b[39m population\u001b[38;5;241m.\u001b[39mget_holder(variable_name)\n",
+      "\u001b[1;31mValueError\u001b[0m: Variable tax_unit_net_income does not exist."
+     ]
+    }
+   ],
+   "source": [
+    "# Distribution by filing status\n",
+    "filing_status = baseline.calc(\"filing_status\", period=2026, map_to=\"tax_unit\").values\n",
+    "tax_unit_weight = baseline.calc(\"tax_unit_weight\", period=2026).values\n",
+    "\n",
+    "baseline_tax_unit_income = baseline.calc(\"tax_unit_net_income\", period=2026).values\n",
+    "reformed_tax_unit_income = reformed.calc(\"tax_unit_net_income\", period=2026).values\n",
+    "tax_unit_difference = reformed_tax_unit_income - baseline_tax_unit_income\n",
+    "\n",
+    "filing_status_names = {\n",
+    "    1: \"SINGLE\",\n",
+    "    2: \"JOINT\",\n",
+    "    3: \"SEPARATE\",\n",
+    "    4: \"HEAD_OF_HOUSEHOLD\",\n",
+    "    5: \"SURVIVING_SPOUSE\"\n",
+    "}\n",
+    "\n",
+    "print(\"\\n\" + \"=\"*80)\n",
+    "print(\"BENEFIT DISTRIBUTION BY FILING STATUS\")\n",
+    "print(\"=\"*80)\n",
+    "\n",
+    "filing_data = []\n",
+    "for status_code, status_name in filing_status_names.items():\n",
+    "    mask = filing_status == status_code\n",
+    "    if mask.sum() > 0:\n",
+    "        status_benefit = np.sum(tax_unit_difference[mask] * tax_unit_weight[mask])\n",
+    "        status_count = tax_unit_weight[mask].sum()\n",
+    "        avg_benefit = status_benefit / status_count if status_count > 0 else 0\n",
+    "        benefiting = tax_unit_weight[mask & (tax_unit_difference > 0)].sum()\n",
+    "        pct_benefiting = benefiting / status_count * 100 if status_count > 0 else 0\n",
+    "        \n",
+    "        filing_data.append({\n",
+    "            \"Filing Status\": status_name,\n",
+    "            \"Tax Units\": f\"{status_count:,.0f}\",\n",
+    "            \"Total Benefit\": f\"${status_benefit/1e9:.2f}B\",\n",
+    "            \"Avg Benefit\": f\"${avg_benefit:,.0f}\",\n",
+    "            \"% Benefiting\": f\"{pct_benefiting:.1f}%\"\n",
+    "        })\n",
+    "\n",
+    "filing_df = pd.DataFrame(filing_data)\n",
+    "print(filing_df.to_string(index=False))\n",
+    "print(\"=\"*80)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Impact on households with/without children\n",
+    "is_child = baseline.calc(\"is_child\", period=2026, map_to=\"person\")\n",
+    "household_id = baseline.calc(\"household_id\", period=2026, map_to=\"person\")\n",
+    "household_weight_person = baseline.calc(\"household_weight\", period=2026, map_to=\"person\")\n",
+    "\n",
+    "df_households = pd.DataFrame({\n",
+    "    'household_id': household_id.values,\n",
+    "    'is_child': is_child.values,\n",
+    "    'household_weight': household_weight_person.values\n",
+    "})\n",
+    "\n",
+    "children_per_household = df_households.groupby('household_id').agg({\n",
+    "    'is_child': 'sum',\n",
+    "    'household_weight': 'first'\n",
+    "}).reset_index()\n",
+    "\n",
+    "has_children = children_per_household.set_index('household_id')['is_child'] > 0\n",
+    "household_ids = baseline.calc(\"household_id\", period=2026, map_to=\"household\").values\n",
+    "\n",
+    "# Map has_children to households\n",
+    "has_children_map = has_children.reindex(household_ids).fillna(False).values\n",
+    "\n",
+    "# Households with children\n",
+    "with_children_mask = has_children_map\n",
+    "without_children_mask = ~has_children_map\n",
+    "\n",
+    "benefit_with_children = np.sum(difference_values[with_children_mask] * weights_hh[with_children_mask])\n",
+    "benefit_without_children = np.sum(difference_values[without_children_mask] * weights_hh[without_children_mask])\n",
+    "\n",
+    "count_with_children = weights_hh[with_children_mask].sum()\n",
+    "count_without_children = weights_hh[without_children_mask].sum()\n",
+    "\n",
+    "avg_with_children = benefit_with_children / count_with_children if count_with_children > 0 else 0\n",
+    "avg_without_children = benefit_without_children / count_without_children if count_without_children > 0 else 0\n",
+    "\n",
+    "print(\"\\n\" + \"=\"*80)\n",
+    "print(\"IMPACT BY HOUSEHOLD COMPOSITION\")\n",
+    "print(\"=\"*80)\n",
+    "print(f\"Households WITH children:\")\n",
+    "print(f\"  Count: {count_with_children:,.0f}\")\n",
+    "print(f\"  Total benefit: ${benefit_with_children/1e9:.2f}B\")\n",
+    "print(f\"  Average benefit: ${avg_with_children:,.0f}\")\n",
+    "print(f\"\\nHouseholds WITHOUT children:\")\n",
+    "print(f\"  Count: {count_without_children:,.0f}\")\n",
+    "print(f\"  Total benefit: ${benefit_without_children/1e9:.2f}B\")\n",
+    "print(f\"  Average benefit: ${avg_without_children:,.0f}\")\n",
+    "print(\"=\"*80)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 3. Summary"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Create summary table\n",
+    "summary_data = {\n",
+    "    'Metric': [\n",
+    "        'Total Program Cost',\n",
+    "        'Households Benefiting',\n",
+    "        'Share of Households Benefiting',\n",
+    "        'Average Benefit (all households)',\n",
+    "        'Average Benefit (beneficiaries only)',\n",
+    "        'Benefit to HH with Children',\n",
+    "        'Benefit to HH without Children',\n",
+    "        'Avg Benefit - HH with Children',\n",
+    "        'Avg Benefit - HH without Children'\n",
+    "    ],\n",
+    "    'Value': [\n",
+    "        f\"${total_cost/1e9:.2f}B\",\n",
+    "        f\"{beneficiary_count:,.0f}\",\n",
+    "        f\"{beneficiary_share:.1%}\",\n",
+    "        f\"${difference_income.mean():,.0f}\",\n",
+    "        f\"${avg_benefit_among_beneficiaries:,.0f}\",\n",
+    "        f\"${benefit_with_children/1e9:.2f}B\",\n",
+    "        f\"${benefit_without_children/1e9:.2f}B\",\n",
+    "        f\"${avg_with_children:,.0f}\",\n",
+    "        f\"${avg_without_children:,.0f}\"\n",
+    "    ]\n",
+    "}\n",
+    "\n",
+    "summary_df = pd.DataFrame(summary_data)\n",
+    "\n",
+    "print(\"\\n\" + \"=\"*70)\n",
+    "print(\"AFFORDABILITY REBATES - SUMMARY\")\n",
+    "print(\"=\"*70)\n",
+    "print(summary_df.to_string(index=False))\n",
+    "print(\"=\"*70)\n",
+    "\n",
+    "# Save summary\n",
+    "summary_df.to_csv('affordability_rebates_summary.csv', index=False)\n",
+    "print(\"\\nSummary saved to: affordability_rebates_summary.csv\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Save benefit by income bracket\n",
+    "benefit_df.to_csv('affordability_rebates_by_income.csv', index=False)\n",
+    "print(\"Income bracket analysis saved to: affordability_rebates_by_income.csv\")\n",
+    "\n",
+    "# Save benefit by filing status\n",
+    "filing_df.to_csv('affordability_rebates_by_filing_status.csv', index=False)\n",
+    "print(\"Filing status analysis saved to: affordability_rebates_by_filing_status.csv\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/us/federal/data_exploration.ipynb b/us/federal/data_exploration.ipynb
new file mode 100644
index 0000000..6f7fbcc
--- /dev/null
+++ b/us/federal/data_exploration.ipynb
@@ -0,0 +1,319 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Enhanced CPS 2024 Data Exploration\n",
+    "\n",
+    "This notebook explores the enhanced_cps_2024 dataset to understand its structure, demographics, and income distribution."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from policyengine_us import Microsimulation\n",
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "\n",
+    "ENHANCED_CPS = \"hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 1. Dataset Overview"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Load enhanced_cps_2024 dataset\n",
+    "sim = Microsimulation(dataset=ENHANCED_CPS)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Number of households in dataset: 15,929\n",
+      "Household count (weighted): 144,287,680\n",
+      "Person count (weighted): 339,597,307\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Check dataset size\n",
+    "household_weight = sim.calc(\"household_weight\", period=2026)\n",
+    "household_count = sim.calc(\"household_count\", period=2026, map_to=\"household\")\n",
+    "person_count = sim.calc(\"person_count\", period=2026, map_to=\"household\")\n",
+    "\n",
+    "print(f\"Number of households in dataset: {len(household_weight):,}\")\n",
+    "print(f\"Household count (weighted): {household_count.sum():,.0f}\")\n",
+    "print(f\"Person count (weighted): {person_count.sum():,.0f}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 2. Income Distribution"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Income distribution:\n",
+      "  Median AGI: $54,423\n",
+      "  75th percentile: $117,905\n",
+      "  90th percentile: $202,534\n",
+      "  95th percentile: $324,016\n",
+      "  Max AGI: $349,003,552\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Check household income distribution\n",
+    "agi = sim.calc(\"adjusted_gross_income\", period=2026, map_to=\"household\")\n",
+    "print(f\"Income distribution:\")\n",
+    "print(f\"  Median AGI: ${agi.median():,.0f}\")\n",
+    "print(f\"  75th percentile: ${agi.quantile(0.75):,.0f}\")\n",
+    "print(f\"  90th percentile: ${agi.quantile(0.90):,.0f}\")\n",
+    "print(f\"  95th percentile: ${agi.quantile(0.95):,.0f}\")\n",
+    "print(f\"  Max AGI: ${agi.max():,.0f}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Employment Income Analysis:\n",
+      "  Total employment income (weighted): $10,943,358,507,618\n",
+      "  Total employment income before LSR (weighted): $10,943,358,507,618\n",
+      "  Mean employment income: $32,225\n",
+      "  Mean employment income before LSR: $32,225\n",
+      "  Median employment income: $0\n",
+      "  Median employment income before LSR: $0\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Employment income analysis\n",
+    "employment_income = sim.calc(\"employment_income\", period=2026, map_to=\"person\")\n",
+    "employment_income_before_lsr = sim.calc(\"employment_income_before_lsr\", period=2026, map_to=\"person\")\n",
+    "\n",
+    "print(f\"\\nEmployment Income Analysis:\")\n",
+    "print(f\"  Total employment income (weighted): ${employment_income.sum():,.0f}\")\n",
+    "print(f\"  Total employment income before LSR (weighted): ${employment_income_before_lsr.sum():,.0f}\")\n",
+    "print(f\"  Mean employment income: ${employment_income.mean():,.0f}\")\n",
+    "print(f\"  Mean employment income before LSR: ${employment_income_before_lsr.mean():,.0f}\")\n",
+    "print(f\"  Median employment income: ${employment_income.median():,.0f}\")\n",
+    "print(f\"  Median employment income before LSR: ${employment_income_before_lsr.median():,.0f}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "======================================================================\n",
+      "HOUSEHOLD COUNTS BY INCOME BRACKET\n",
+      "======================================================================\n",
+      "Income Bracket Households % of All Households\n",
+      "       $0-$10k 28,130,220              19.50%\n",
+      "     $10k-$25k 15,020,060              10.41%\n",
+      "     $25k-$50k 24,209,866              16.78%\n",
+      "     $50k-$75k 17,464,564              12.10%\n",
+      "    $75k-$100k 15,011,586              10.40%\n",
+      "   $100k-$150k 18,161,620              12.59%\n",
+      "   $150k-$200k 10,102,187               7.00%\n",
+      "        $200k+ 14,839,484              10.28%\n",
+      "======================================================================\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Household counts by income brackets\n",
+    "agi_hh = np.array(sim.calc(\"adjusted_gross_income\", period=2026, map_to=\"household\").values)\n",
+    "weights = np.array(sim.calc(\"household_weight\", period=2026).values)\n",
+    "total_households = weights.sum()\n",
+    "\n",
+    "income_brackets = [\n",
+    "    (0, 10000, \"$0-$10k\"),\n",
+    "    (10000, 25000, \"$10k-$25k\"),\n",
+    "    (25000, 50000, \"$25k-$50k\"),\n",
+    "    (50000, 75000, \"$50k-$75k\"),\n",
+    "    (75000, 100000, \"$75k-$100k\"),\n",
+    "    (100000, 150000, \"$100k-$150k\"),\n",
+    "    (150000, 200000, \"$150k-$200k\"),\n",
+    "    (200000, float('inf'), \"$200k+\")\n",
+    "]\n",
+    "\n",
+    "bracket_data = []\n",
+    "for lower, upper, label in income_brackets:\n",
+    "    mask = (agi_hh >= lower) & (agi_hh < upper)\n",
+    "    count = weights[mask].sum()\n",
+    "    pct_of_total = (count / total_households) * 100\n",
+    "    \n",
+    "    bracket_data.append({\n",
+    "        \"Income Bracket\": label,\n",
+    "        \"Households\": f\"{count:,.0f}\",\n",
+    "        \"% of All Households\": f\"{pct_of_total:.2f}%\"\n",
+    "    })\n",
+    "\n",
+    "income_df = pd.DataFrame(bracket_data)\n",
+    "\n",
+    "print(\"\\n\" + \"=\"*70)\n",
+    "print(\"HOUSEHOLD COUNTS BY INCOME BRACKET\")\n",
+    "print(\"=\"*70)\n",
+    "print(income_df.to_string(index=False))\n",
+    "print(\"=\"*70)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 3. Household Composition"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Households with children (weighted):\n",
+      "  Total households with children: 43,798,516\n",
+      "  Households with 1 child: 24,479,972\n",
+      "  Households with 2 children: 12,298,578\n",
+      "  Households with 3+ children: 7,019,965\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Check households with children\n",
+    "is_child = sim.calc(\"is_child\", period=2026, map_to=\"person\")\n",
+    "household_id = sim.calc(\"household_id\", period=2026, map_to=\"person\")\n",
+    "person_weight = sim.calc(\"person_weight\", period=2026, map_to=\"person\")\n",
+    "household_weight_person = sim.calc(\"household_weight\", period=2026, map_to=\"person\")\n",
+    "\n",
+    "# Create DataFrame\n",
+    "df_households = pd.DataFrame({\n",
+    "    'household_id': household_id.values,\n",
+    "    'is_child': is_child.values,\n",
+    "    'household_weight': household_weight_person.values\n",
+    "})\n",
+    "\n",
+    "# Count children per household\n",
+    "children_per_household = df_households.groupby('household_id').agg({\n",
+    "    'is_child': 'sum',\n",
+    "    'household_weight': 'first'\n",
+    "}).reset_index()\n",
+    "\n",
+    "# Calculate weighted household counts\n",
+    "total_households_with_children = children_per_household[children_per_household['is_child'] > 0]['household_weight'].sum()\n",
+    "households_with_1_child = children_per_household[children_per_household['is_child'] == 1]['household_weight'].sum()\n",
+    "households_with_2_children = children_per_household[children_per_household['is_child'] == 2]['household_weight'].sum()\n",
+    "households_with_3plus_children = children_per_household[children_per_household['is_child'] >= 3]['household_weight'].sum()\n",
+    "\n",
+    "print(f\"\\nHouseholds with children (weighted):\")\n",
+    "print(f\"  Total households with children: {total_households_with_children:,.0f}\")\n",
+    "print(f\"  Households with 1 child: {households_with_1_child:,.0f}\")\n",
+    "print(f\"  Households with 2 children: {households_with_2_children:,.0f}\")\n",
+    "print(f\"  Households with 3+ children: {households_with_3plus_children:,.0f}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Children by age:\n",
+      "  Total children under 18: 73,113,736\n",
+      "  Children under 6: 22,428,570\n",
+      "  Children under 3: 11,172,289\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Check children by age groups\n",
+    "age = sim.calc(\"age\", period=2026, map_to=\"person\")\n",
+    "\n",
+    "df_age = pd.DataFrame({\n",
+    "    \"age\": age.values,\n",
+    "    \"person_weight\": person_weight.values\n",
+    "})\n",
+    "\n",
+    "# Calculate weighted totals\n",
+    "total_children = df_age[df_age['age'] < 18]['person_weight'].sum()\n",
+    "children_under_6 = df_age[df_age['age'] < 6]['person_weight'].sum()\n",
+    "children_under_3 = df_age[df_age['age'] < 3]['person_weight'].sum()\n",
+    "\n",
+    "print(f\"\\nChildren by age:\")\n",
+    "print(f\"  Total children under 18: {total_children:,.0f}\")\n",
+    "print(f\"  Children under 6: {children_under_6:,.0f}\")\n",
+    "print(f\"  Children under 3: {children_under_3:,.0f}\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}