diff --git a/.github/workflows/reusable_lint.yaml b/.github/workflows/reusable_lint.yaml index f5fa02cf7..8d15bd450 100644 --- a/.github/workflows/reusable_lint.yaml +++ b/.github/workflows/reusable_lint.yaml @@ -8,7 +8,7 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 + - name: Install uv + uses: astral-sh/setup-uv@v5 - name: Check formatting - uses: "lgeiger/black-action@master" - with: - args: ". -l 79 --check" \ No newline at end of file + run: uvx ruff format --check . diff --git a/CLAUDE.md b/CLAUDE.md index 804b82f71..1c5316fcb 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -11,8 +11,8 @@ - `make test` - Also runs all tests ## Formatting -- `make format` - Format all code using Black with 79 char line length -- `black . -l 79 --check` - Check formatting without changing files +- `make format` - Format all code using ruff with 79 char line length +- `ruff format --check .` - Check formatting without changing files ## Code Style Guidelines - **Imports**: Standard libraries first, then third-party, then internal @@ -20,7 +20,7 @@ - **Naming**: Classes: PascalCase, Functions/Variables: snake_case, Constants: UPPER_SNAKE_CASE - **Documentation**: Google-style docstrings with Args and Returns sections - **Error Handling**: Use validation checks with specific error messages -- **Line Length**: 79 characters max (Black configured in pyproject.toml) +- **Line Length**: 79 characters max (ruff configured in pyproject.toml) - **Python Version**: Targeting Python 3.11 ## Git and PR Guidelines diff --git a/Makefile b/Makefile index b34b8eb60..ce38e165e 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ HF_CLONE_DIR ?= $(HOME)/huggingface/policyengine-us-data all: data test format: - black . -l 79 + ruff format . test: pytest diff --git a/changelog.d/changed/switch-to-ruff.md b/changelog.d/changed/switch-to-ruff.md new file mode 100644 index 000000000..aeb771eb8 --- /dev/null +++ b/changelog.d/changed/switch-to-ruff.md @@ -0,0 +1 @@ +Switched code formatter from Black to Ruff. diff --git a/docs/calibration_matrix.ipynb b/docs/calibration_matrix.ipynb index 41497b1e8..65d79fb79 100644 --- a/docs/calibration_matrix.ipynb +++ b/docs/calibration_matrix.ipynb @@ -27,7 +27,28 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": "import numpy as np\nimport pandas as pd\nfrom policyengine_us import Microsimulation\nfrom policyengine_us_data.storage import STORAGE_FOLDER\nfrom policyengine_us_data.calibration.unified_matrix_builder import (\n UnifiedMatrixBuilder,\n)\nfrom policyengine_us_data.calibration.clone_and_assign import (\n assign_random_geography,\n)\nfrom policyengine_us_data.datasets.cps.local_area_calibration.calibration_utils import (\n create_target_groups,\n drop_target_groups,\n get_geo_level,\n STATE_CODES,\n)\n\ndb_path = STORAGE_FOLDER / \"calibration\" / \"policy_data.db\"\ndb_uri = f\"sqlite:///{db_path}\"\ndataset_path = STORAGE_FOLDER / \"stratified_extended_cps_2024.h5\"" + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "from policyengine_us import Microsimulation\n", + "from policyengine_us_data.storage import STORAGE_FOLDER\n", + "from policyengine_us_data.calibration.unified_matrix_builder import (\n", + " UnifiedMatrixBuilder,\n", + ")\n", + "from policyengine_us_data.calibration.clone_and_assign import (\n", + " assign_random_geography,\n", + ")\n", + "from policyengine_us_data.datasets.cps.local_area_calibration.calibration_utils import (\n", + " create_target_groups,\n", + " drop_target_groups,\n", + " get_geo_level,\n", + " STATE_CODES,\n", + ")\n", + "\n", + "db_path = STORAGE_FOLDER / \"calibration\" / \"policy_data.db\"\n", + "db_uri = f\"sqlite:///{db_path}\"\n", + "dataset_path = STORAGE_FOLDER / \"stratified_extended_cps_2024.h5\"" + ] }, { "cell_type": "code", @@ -65,7 +86,9 @@ ")\n", "\n", "n_total = n_records * N_CLONES\n", - "print(f\"Records: {n_records:,}, Clones: {N_CLONES}, Total columns: {n_total:,}\")\n", + "print(\n", + " f\"Records: {n_records:,}, Clones: {N_CLONES}, Total columns: {n_total:,}\"\n", + ")\n", "print(f\"Matrix shape: {X_sparse.shape}\")\n", "print(f\"Non-zero entries: {X_sparse.nnz:,}\")" ] @@ -82,7 +105,21 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": "print(f\"Targets: {X_sparse.shape[0]}\")\nprint(f\"Columns: {X_sparse.shape[1]:,} ({N_CLONES} clones x {n_records:,} records)\")\nprint(f\"Non-zeros: {X_sparse.nnz:,}\")\nprint(f\"Density: {X_sparse.nnz / (X_sparse.shape[0] * X_sparse.shape[1]):.6f}\")\n\ngeo_levels = targets_df[\"geographic_id\"].apply(get_geo_level)\nlevel_names = {0: \"National\", 1: \"State\", 2: \"District\"}\nfor level in [0, 1, 2]:\n n = (geo_levels == level).sum()\n if n > 0:\n print(f\" {level_names[level]}: {n} targets\")" + "source": [ + "print(f\"Targets: {X_sparse.shape[0]}\")\n", + "print(\n", + " f\"Columns: {X_sparse.shape[1]:,} ({N_CLONES} clones x {n_records:,} records)\"\n", + ")\n", + "print(f\"Non-zeros: {X_sparse.nnz:,}\")\n", + "print(f\"Density: {X_sparse.nnz / (X_sparse.shape[0] * X_sparse.shape[1]):.6f}\")\n", + "\n", + "geo_levels = targets_df[\"geographic_id\"].apply(get_geo_level)\n", + "level_names = {0: \"National\", 1: \"State\", 2: \"District\"}\n", + "for level in [0, 1, 2]:\n", + " n = (geo_levels == level).sum()\n", + " if n > 0:\n", + " print(f\" {level_names[level]}: {n} targets\")" + ] }, { "cell_type": "markdown", @@ -294,14 +331,16 @@ "for gid, info in enumerate(group_info):\n", " mask = target_groups == gid\n", " vals = targets_df.loc[mask, \"value\"]\n", - " records.append({\n", - " \"group_id\": gid,\n", - " \"description\": info,\n", - " \"n_targets\": mask.sum(),\n", - " \"min_value\": vals.min(),\n", - " \"median_value\": vals.median(),\n", - " \"max_value\": vals.max(),\n", - " })\n", + " records.append(\n", + " {\n", + " \"group_id\": gid,\n", + " \"description\": info,\n", + " \"n_targets\": mask.sum(),\n", + " \"min_value\": vals.min(),\n", + " \"median_value\": vals.median(),\n", + " \"max_value\": vals.max(),\n", + " }\n", + " )\n", "\n", "group_df = pd.DataFrame(records)\n", "print(group_df.to_string(index=False))" @@ -400,7 +439,9 @@ " col_vec = X_sparse[:, col]\n", " nnz = col_vec.nnz\n", " abbr = STATE_CODES.get(state, \"??\")\n", - " print(f\" col {col}: {abbr} (state={state}, CD={cd}) — {nnz} non-zero rows\")" + " print(\n", + " f\" col {col}: {abbr} (state={state}, CD={cd}) — {nnz} non-zero rows\"\n", + " )" ] }, { @@ -475,7 +516,28 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": "nnz_per_row = np.diff(X_sparse.indptr)\nprint(f\"Non-zeros per row:\")\nprint(f\" min: {nnz_per_row.min():,}\")\nprint(f\" median: {int(np.median(nnz_per_row)):,}\")\nprint(f\" mean: {nnz_per_row.mean():,.0f}\")\nprint(f\" max: {nnz_per_row.max():,}\")\n\ngeo_levels = targets_df[\"geographic_id\"].apply(get_geo_level)\nlevel_names = {0: \"National\", 1: \"State\", 2: \"District\"}\nprint(\"\\nBy geographic level:\")\nfor level in [0, 1, 2]:\n mask = (geo_levels == level).values\n if mask.any():\n vals = nnz_per_row[mask]\n print(\n f\" {level_names[level]:10s}: \"\n f\"n={mask.sum():>4d}, \"\n f\"median nnz={int(np.median(vals)):>7,}, \"\n f\"range=[{vals.min():,}, {vals.max():,}]\"\n )" + "source": [ + "nnz_per_row = np.diff(X_sparse.indptr)\n", + "print(f\"Non-zeros per row:\")\n", + "print(f\" min: {nnz_per_row.min():,}\")\n", + "print(f\" median: {int(np.median(nnz_per_row)):,}\")\n", + "print(f\" mean: {nnz_per_row.mean():,.0f}\")\n", + "print(f\" max: {nnz_per_row.max():,}\")\n", + "\n", + "geo_levels = targets_df[\"geographic_id\"].apply(get_geo_level)\n", + "level_names = {0: \"National\", 1: \"State\", 2: \"District\"}\n", + "print(\"\\nBy geographic level:\")\n", + "for level in [0, 1, 2]:\n", + " mask = (geo_levels == level).values\n", + " if mask.any():\n", + " vals = nnz_per_row[mask]\n", + " print(\n", + " f\" {level_names[level]:10s}: \"\n", + " f\"n={mask.sum():>4d}, \"\n", + " f\"median nnz={int(np.median(vals)):>7,}, \"\n", + " f\"range=[{vals.min():,}, {vals.max():,}]\"\n", + " )" + ] }, { "cell_type": "code", @@ -498,12 +560,16 @@ "clone_nnz = []\n", "for ci in range(N_CLONES):\n", " block = X_sparse[:, ci * n_records : (ci + 1) * n_records]\n", - " n_states = len(np.unique(geography.state_fips[ci * n_records : (ci + 1) * n_records]))\n", - " clone_nnz.append({\n", - " \"clone\": ci,\n", - " \"nnz\": block.nnz,\n", - " \"unique_states\": n_states,\n", - " })\n", + " n_states = len(\n", + " np.unique(geography.state_fips[ci * n_records : (ci + 1) * n_records])\n", + " )\n", + " clone_nnz.append(\n", + " {\n", + " \"clone\": ci,\n", + " \"nnz\": block.nnz,\n", + " \"unique_states\": n_states,\n", + " }\n", + " )\n", "\n", "clone_df = pd.DataFrame(clone_nnz)\n", "print(\"Non-zeros per clone block:\")\n", @@ -666,7 +732,10 @@ } ], "source": [ - "ratios = row_sums[achievable_mask] / targets_filtered.loc[achievable_mask, \"value\"].values\n", + "ratios = (\n", + " row_sums[achievable_mask]\n", + " / targets_filtered.loc[achievable_mask, \"value\"].values\n", + ")\n", "ratio_df = targets_filtered[achievable_mask].copy()\n", "ratio_df[\"row_sum\"] = row_sums[achievable_mask]\n", "ratio_df[\"ratio\"] = ratios\n", @@ -704,7 +773,9 @@ "X_final = X_filtered[achievable_mask, :]\n", "print(f\"Final matrix shape: {X_final.shape}\")\n", "print(f\"Final non-zero entries: {X_final.nnz:,}\")\n", - "print(f\"Final density: {X_final.nnz / (X_final.shape[0] * X_final.shape[1]):.6f}\")\n", + "print(\n", + " f\"Final density: {X_final.nnz / (X_final.shape[0] * X_final.shape[1]):.6f}\"\n", + ")\n", "print(\"\\nThis is what the optimizer receives.\")" ] }, diff --git a/docs/hierarchical_uprating.ipynb b/docs/hierarchical_uprating.ipynb index 4da30d82c..0115b3d87 100644 --- a/docs/hierarchical_uprating.ipynb +++ b/docs/hierarchical_uprating.ipynb @@ -264,8 +264,7 @@ ], "source": [ "snap_hh = raw[\n", - " (raw[\"domain_variable\"] == \"snap\")\n", - " & (raw[\"variable\"] == \"household_count\")\n", + " (raw[\"domain_variable\"] == \"snap\") & (raw[\"variable\"] == \"household_count\")\n", "]\n", "for level in [\"state\", \"district\"]:\n", " total = snap_hh[snap_hh[\"geo_level\"] == level][\"value\"].sum()\n", @@ -377,8 +376,7 @@ "\n", "for fips, abbr in sample_states.items():\n", " rows = raw[\n", - " (raw[\"geo_level\"] == \"state\")\n", - " & (raw[\"geographic_id\"] == str(fips))\n", + " (raw[\"geo_level\"] == \"state\") & (raw[\"geographic_id\"] == str(fips))\n", " ]\n", " for _, r in rows.iterrows():\n", " print(\n", @@ -412,9 +410,7 @@ "metadata": {}, "outputs": [], "source": [ - "result = builder._apply_hierarchical_uprating(\n", - " raw, DOMAINS, uprating_factors\n", - ")" + "result = builder._apply_hierarchical_uprating(raw, DOMAINS, uprating_factors)" ] }, { @@ -455,9 +451,7 @@ " cd_state = cd_domain[\n", " cd_domain[\"geographic_id\"].apply(\n", " lambda g, s=fips: (\n", - " int(g) // 100 == s\n", - " if g not in (\"US\",)\n", - " else False\n", + " int(g) // 100 == s if g not in (\"US\",) else False\n", " )\n", " )\n", " ]\n", @@ -474,11 +468,7 @@ " & (raw[\"variable\"] == var)\n", " & (raw[\"domain_variable\"] == domain)\n", " ]\n", - " uprated_state = (\n", - " st_row[\"value\"].iloc[0]\n", - " if len(st_row)\n", - " else np.nan\n", - " )\n", + " uprated_state = st_row[\"value\"].iloc[0] if len(st_row) else np.nan\n", " print(\n", " f\" {abbr} {var:20s} \"\n", " f\"hif={hif:.6f} \"\n", @@ -487,6 +477,7 @@ " f\"uprated_state={uprated_state:>14,.0f}\"\n", " )\n", "\n", + "\n", "show_reconciliation(result, raw, \"aca_ptc\", sample_states)" ] }, @@ -527,9 +518,9 @@ "]\n", "\n", "state_ufs = (\n", - " aca_cds.assign(state_fips=aca_cds[\"geographic_id\"].apply(\n", - " lambda g: int(g) // 100\n", - " ))\n", + " aca_cds.assign(\n", + " state_fips=aca_cds[\"geographic_id\"].apply(lambda g: int(g) // 100)\n", + " )\n", " .groupby(\"state_fips\")[\"state_uprating_factor\"]\n", " .first()\n", " .sort_values()\n", @@ -537,7 +528,7 @@ "\n", "print(\"ACA PTC uprating factors (aca_ptc = vol_mult * val_mult):\")\n", "print(f\" {'State FIPS':>12s} {'Factor':>8s}\")\n", - "print(f\" {'─'*12} {'─'*8}\")\n", + "print(f\" {'─' * 12} {'─' * 8}\")\n", "for fips in list(state_ufs.index[:5]) + [\"...\"] + list(state_ufs.index[-5:]):\n", " if fips == \"...\":\n", " print(f\" {'...':>12s}\")\n", @@ -749,9 +740,7 @@ "checks = 0\n", "for domain in DOMAINS:\n", " domain_result = result[result[\"domain_variable\"] == domain]\n", - " cd_result = domain_result[\n", - " domain_result[\"geo_level\"] == \"district\"\n", - " ]\n", + " cd_result = domain_result[domain_result[\"geo_level\"] == \"district\"]\n", " if cd_result.empty:\n", " continue\n", "\n", @@ -759,9 +748,7 @@ " cd_rows = cd_result[\n", " cd_result[\"geographic_id\"].apply(\n", " lambda g, s=fips: (\n", - " int(g) // 100 == s\n", - " if g not in (\"US\",)\n", - " else False\n", + " int(g) // 100 == s if g not in (\"US\",) else False\n", " )\n", " )\n", " ]\n", diff --git a/docs/local_area_calibration_setup.ipynb b/docs/local_area_calibration_setup.ipynb index 2e8614aa9..ce97f4ec4 100644 --- a/docs/local_area_calibration_setup.ipynb +++ b/docs/local_area_calibration_setup.ipynb @@ -576,9 +576,7 @@ " f\"{col in cd_to_cols.get(cd, [])}\"\n", " )\n", " # Check an unrelated state\n", - " print(\n", - " f\" Visible to NC (37) targets: \" f\"{col in state_to_cols.get(37, [])}\"\n", - " )\n", + " print(f\" Visible to NC (37) targets: {col in state_to_cols.get(37, [])}\")\n", " print()" ] }, @@ -639,8 +637,7 @@ " else f\"dict ({len(rate)} entries)\"\n", " )\n", " print(\n", - " f\" {spec['variable']:40s} \"\n", - " f\"entity={spec['entity']:10s} rate={rate_str}\"\n", + " f\" {spec['variable']:40s} entity={spec['entity']:10s} rate={rate_str}\"\n", " )" ] }, @@ -966,8 +963,7 @@ "output_path = os.path.join(output_dir, \"results.h5\")\n", "\n", "print(\n", - " f\"Weight vector: {len(w):,} entries \"\n", - " f\"({n_demo_cds} CDs x {n_records:,} HH)\"\n", + " f\"Weight vector: {len(w):,} entries ({n_demo_cds} CDs x {n_records:,} HH)\"\n", ")\n", "print(f\"Non-zero weights: {(w > 0).sum()}\")\n", "print(\n", @@ -1124,7 +1120,7 @@ "example_mapping = mapping_df.loc[\n", " mapping_df.original_household_id == example_hh_id\n", "]\n", - "print(f\"Example household (original_id={example_hh_id}) \" f\"in mapping:\\n\")\n", + "print(f\"Example household (original_id={example_hh_id}) in mapping:\\n\")\n", "print(example_mapping.to_string(index=False))\n", "\n", "new_ids = example_mapping.new_household_id\n", diff --git a/modal_app/data_build.py b/modal_app/data_build.py index 131e7f0bf..b27750af4 100644 --- a/modal_app/data_build.py +++ b/modal_app/data_build.py @@ -391,8 +391,7 @@ def build_datasets( # GROUP 3: After extended_cps - run in parallel # enhanced_cps and stratified_cps both depend on extended_cps print( - "=== Phase 4: Building enhanced and stratified CPS (parallel)" - " ===" + "=== Phase 4: Building enhanced and stratified CPS (parallel) ===" ) with ThreadPoolExecutor(max_workers=2) as executor: futures = [ diff --git a/paper/scripts/calculate_distributional_metrics.py b/paper/scripts/calculate_distributional_metrics.py index 4afdc67d9..61de771b9 100644 --- a/paper/scripts/calculate_distributional_metrics.py +++ b/paper/scripts/calculate_distributional_metrics.py @@ -82,7 +82,7 @@ def calculate_top_shares(values, weights, percentiles=[90, 99]): threshold = weighted_percentile(values, weights, p) mask = values >= threshold top_income = np.sum(values[mask] * weights[mask]) - shares[f"top_{100-p}%"] = top_income / total_income + shares[f"top_{100 - p}%"] = top_income / total_income return shares diff --git a/paper/scripts/calculate_target_performance.py b/paper/scripts/calculate_target_performance.py index 1a50ab3c4..a3435de24 100644 --- a/paper/scripts/calculate_target_performance.py +++ b/paper/scripts/calculate_target_performance.py @@ -79,8 +79,9 @@ def compare_dataset_performance( # Calculate average improvement by target category categories = { - "IRS Income": lambda x: "employment_income" in x - or "capital_gains" in x, + "IRS Income": lambda x: ( + "employment_income" in x or "capital_gains" in x + ), "Demographics": lambda x: "age_" in x or "population" in x, "Programs": lambda x: "snap" in x or "social_security" in x, "Tax Expenditures": lambda x: "salt" in x or "charitable" in x, diff --git a/policyengine_us_data/calibration/unified_calibration.py b/policyengine_us_data/calibration/unified_calibration.py index 1fb7a6b34..9481f5978 100644 --- a/policyengine_us_data/calibration/unified_calibration.py +++ b/policyengine_us_data/calibration/unified_calibration.py @@ -308,8 +308,7 @@ def fit_l0_weights( initial_weights = np.ones(n_total) * 100 logger.info( - "L0 calibration: %d targets, %d features, " - "lambda_l0=%.1e, epochs=%d", + "L0 calibration: %d targets, %d features, lambda_l0=%.1e, epochs=%d", X_sparse.shape[0], n_total, lambda_l0, diff --git a/policyengine_us_data/calibration/unified_matrix_builder.py b/policyengine_us_data/calibration/unified_matrix_builder.py index ac31c34e1..c1b3d000a 100644 --- a/policyengine_us_data/calibration/unified_matrix_builder.py +++ b/policyengine_us_data/calibration/unified_matrix_builder.py @@ -430,14 +430,12 @@ def print_uprating_summary(self, targets_df: pd.DataFrame) -> None: print("\n" + "=" * 60) print("UPRATING SUMMARY") print("=" * 60) - print(f"Uprated {len(uprated)} of " f"{len(targets_df)} targets") + print(f"Uprated {len(uprated)} of {len(targets_df)} targets") period_counts = uprated["period"].value_counts().sort_index() for period, count in period_counts.items(): print(f" Period {period}: {count} targets") factors = eff[eff != 1.0] - print( - f" Factor range: [{factors.min():.4f}, " f"{factors.max():.4f}]" - ) + print(f" Factor range: [{factors.min():.4f}, {factors.max():.4f}]") # --------------------------------------------------------------- # Target naming @@ -745,7 +743,7 @@ def build_matrix( clone_states = geography.state_fips[col_start:col_end] logger.info( - "Processing clone %d/%d " "(cols %d-%d, %d unique states)...", + "Processing clone %d/%d (cols %d-%d, %d unique states)...", clone_idx + 1, n_clones, col_start, diff --git a/policyengine_us_data/datasets/cps/cps.py b/policyengine_us_data/datasets/cps/cps.py index bbc7f4fba..d1a7c38ed 100644 --- a/policyengine_us_data/datasets/cps/cps.py +++ b/policyengine_us_data/datasets/cps/cps.py @@ -419,7 +419,9 @@ def add_id_variables( # by the index within household (of each person, or their spouse if # one exists earlier in the survey). - marital_unit_id = Series(marital_unit_id).rank( + marital_unit_id = Series( + marital_unit_id + ).rank( method="dense" ) # Simplify to a natural number sequence with repetitions [0, 1, 1, 2, 3, ...] @@ -533,16 +535,16 @@ def add_personal_income_variables( cps["weekly_hours_worked"] = person.HRSWK * person.WKSWORK / 52 cps["hours_worked_last_week"] = person.A_HRS1 * person.WKSWORK / 52 - cps["taxable_interest_income"] = person.INT_VAL * ( - p["taxable_interest_fraction"] + cps["taxable_interest_income"] = ( + person.INT_VAL * (p["taxable_interest_fraction"]) ) cps["tax_exempt_interest_income"] = person.INT_VAL * ( 1 - p["taxable_interest_fraction"] ) cps["self_employment_income"] = person.SEMP_VAL cps["farm_income"] = person.FRSE_VAL - cps["qualified_dividend_income"] = person.DIV_VAL * ( - p["qualified_dividend_fraction"] + cps["qualified_dividend_income"] = ( + person.DIV_VAL * (p["qualified_dividend_fraction"]) ) cps["non_qualified_dividend_income"] = person.DIV_VAL * ( 1 - p["qualified_dividend_fraction"] @@ -740,8 +742,8 @@ def add_personal_income_variables( cps["traditional_ira_contributions"] = ira_capped * trad_ira_share cps["roth_ira_contributions"] = ira_capped * (1 - trad_ira_share) # Allocate capital gains into long-term and short-term based on aggregate split. - cps["long_term_capital_gains"] = person.CAP_VAL * ( - p["long_term_capgain_fraction"] + cps["long_term_capital_gains"] = ( + person.CAP_VAL * (p["long_term_capgain_fraction"]) ) cps["short_term_capital_gains"] = person.CAP_VAL * ( 1 - p["long_term_capgain_fraction"] @@ -1749,25 +1751,63 @@ def _update_documentation_with_numbers(log_df, docs_dir): # Define replacements based on our logging structure replacements = { - "- **Step 0 - Initial**: Code 0 people = *[Run cps.py to populate]*": lambda: f"- **Step 0 - Initial**: Code 0 people = {data_map.get(('Step 0 - Initial', 'Code 0 people'), 0):,.0f}", - "- **Step 1 - Citizens**: Moved to Code 1 = *[Run cps.py to populate]*": lambda: f"- **Step 1 - Citizens**: Moved to Code 1 = {data_map.get(('Step 1 - Citizens', 'Moved to Code 1'), 0):,.0f}", - "- **ASEC Conditions**: Current Code 0 people = *[Run cps.py to populate]*": lambda: f"- **ASEC Conditions**: Current Code 0 people = {data_map.get(('ASEC Conditions', 'Current Code 0 people'), 0):,.0f}", - "- **After conditions**: Code 0 people = *[Run cps.py to populate]*": lambda: f"- **After conditions**: Code 0 people = {data_map.get(('After conditions', 'Code 0 people'), 0):,.0f}", - "- **Before adjustment**: Undocumented workers = *[Run cps.py to populate]*": lambda: f"- **Before adjustment**: Undocumented workers = {data_map.get(('Before adjustment', 'Undocumented workers'), 0):,.0f}", - "- **Target**: Undocumented workers target = *[Run cps.py to populate]*": lambda: f"- **Target**: Undocumented workers target = {data_map.get(('Target', 'Undocumented workers target'), 0):,.0f}", - "- **Before adjustment**: Undocumented students = *[Run cps.py to populate]*": lambda: f"- **Before adjustment**: Undocumented students = {data_map.get(('Before adjustment', 'Undocumented students'), 0):,.0f}", - "- **Target**: Undocumented students target = *[Run cps.py to populate]*": lambda: f"- **Target**: Undocumented students target = {data_map.get(('Target', 'Undocumented students target'), 0):,.0f}", - "- **Step 3 - EAD workers**: Moved from Code 0 to Code 2 = *[Run cps.py to populate]*": lambda: f"- **Step 3 - EAD workers**: Moved from Code 0 to Code 2 = {data_map.get(('Step 3 - EAD workers', 'Moved from Code 0 to Code 2'), 0):,.0f}", - "- **Step 4 - EAD students**: Moved from Code 0 to Code 2 = *[Run cps.py to populate]*": lambda: f"- **Step 4 - EAD students**: Moved from Code 0 to Code 2 = {data_map.get(('Step 4 - EAD students', 'Moved from Code 0 to Code 2'), 0):,.0f}", - "- **After EAD assignment**: Code 0 people = *[Run cps.py to populate]*": lambda: f"- **After EAD assignment**: Code 0 people = {data_map.get(('After EAD assignment', 'Code 0 people'), 0):,.0f}", - "- **Step 5 - Family correlation**: Changed from Code 3 to Code 0 = *[Run cps.py to populate]*": lambda: f"- **Step 5 - Family correlation**: Changed from Code 3 to Code 0 = {data_map.get(('Step 5 - Family correlation', 'Changed from Code 3 to Code 0'), 0):,.0f}", - "- **After family correlation**: Code 0 people = *[Run cps.py to populate]*": lambda: f"- **After family correlation**: Code 0 people = {data_map.get(('After family correlation', 'Code 0 people'), 0):,.0f}", - "- **Final**: Code 0 (NONE) = *[Run cps.py to populate]*": lambda: f"- **Final**: Code 0 (NONE) = {data_map.get(('Final', 'Code 0 (NONE)'), 0):,.0f}", - "- **Final**: Code 1 (CITIZEN) = *[Run cps.py to populate]*": lambda: f"- **Final**: Code 1 (CITIZEN) = {data_map.get(('Final', 'Code 1 (CITIZEN)'), 0):,.0f}", - "- **Final**: Code 2 (NON_CITIZEN_VALID_EAD) = *[Run cps.py to populate]*": lambda: f"- **Final**: Code 2 (NON_CITIZEN_VALID_EAD) = {data_map.get(('Final', 'Code 2 (NON_CITIZEN_VALID_EAD)'), 0):,.0f}", - "- **Final**: Code 3 (OTHER_NON_CITIZEN) = *[Run cps.py to populate]*": lambda: f"- **Final**: Code 3 (OTHER_NON_CITIZEN) = {data_map.get(('Final', 'Code 3 (OTHER_NON_CITIZEN)'), 0):,.0f}", - "- **Final**: Total undocumented (Code 0) = *[Run cps.py to populate]*": lambda: f"- **Final**: Total undocumented (Code 0) = {data_map.get(('Final', 'Total undocumented (Code 0)'), 0):,.0f}", - "- **Final**: Undocumented target = *[Run cps.py to populate]*": lambda: f"- **Final**: Undocumented target = {data_map.get(('Final', 'Undocumented target'), 0):,.0f}", + "- **Step 0 - Initial**: Code 0 people = *[Run cps.py to populate]*": lambda: ( + f"- **Step 0 - Initial**: Code 0 people = {data_map.get(('Step 0 - Initial', 'Code 0 people'), 0):,.0f}" + ), + "- **Step 1 - Citizens**: Moved to Code 1 = *[Run cps.py to populate]*": lambda: ( + f"- **Step 1 - Citizens**: Moved to Code 1 = {data_map.get(('Step 1 - Citizens', 'Moved to Code 1'), 0):,.0f}" + ), + "- **ASEC Conditions**: Current Code 0 people = *[Run cps.py to populate]*": lambda: ( + f"- **ASEC Conditions**: Current Code 0 people = {data_map.get(('ASEC Conditions', 'Current Code 0 people'), 0):,.0f}" + ), + "- **After conditions**: Code 0 people = *[Run cps.py to populate]*": lambda: ( + f"- **After conditions**: Code 0 people = {data_map.get(('After conditions', 'Code 0 people'), 0):,.0f}" + ), + "- **Before adjustment**: Undocumented workers = *[Run cps.py to populate]*": lambda: ( + f"- **Before adjustment**: Undocumented workers = {data_map.get(('Before adjustment', 'Undocumented workers'), 0):,.0f}" + ), + "- **Target**: Undocumented workers target = *[Run cps.py to populate]*": lambda: ( + f"- **Target**: Undocumented workers target = {data_map.get(('Target', 'Undocumented workers target'), 0):,.0f}" + ), + "- **Before adjustment**: Undocumented students = *[Run cps.py to populate]*": lambda: ( + f"- **Before adjustment**: Undocumented students = {data_map.get(('Before adjustment', 'Undocumented students'), 0):,.0f}" + ), + "- **Target**: Undocumented students target = *[Run cps.py to populate]*": lambda: ( + f"- **Target**: Undocumented students target = {data_map.get(('Target', 'Undocumented students target'), 0):,.0f}" + ), + "- **Step 3 - EAD workers**: Moved from Code 0 to Code 2 = *[Run cps.py to populate]*": lambda: ( + f"- **Step 3 - EAD workers**: Moved from Code 0 to Code 2 = {data_map.get(('Step 3 - EAD workers', 'Moved from Code 0 to Code 2'), 0):,.0f}" + ), + "- **Step 4 - EAD students**: Moved from Code 0 to Code 2 = *[Run cps.py to populate]*": lambda: ( + f"- **Step 4 - EAD students**: Moved from Code 0 to Code 2 = {data_map.get(('Step 4 - EAD students', 'Moved from Code 0 to Code 2'), 0):,.0f}" + ), + "- **After EAD assignment**: Code 0 people = *[Run cps.py to populate]*": lambda: ( + f"- **After EAD assignment**: Code 0 people = {data_map.get(('After EAD assignment', 'Code 0 people'), 0):,.0f}" + ), + "- **Step 5 - Family correlation**: Changed from Code 3 to Code 0 = *[Run cps.py to populate]*": lambda: ( + f"- **Step 5 - Family correlation**: Changed from Code 3 to Code 0 = {data_map.get(('Step 5 - Family correlation', 'Changed from Code 3 to Code 0'), 0):,.0f}" + ), + "- **After family correlation**: Code 0 people = *[Run cps.py to populate]*": lambda: ( + f"- **After family correlation**: Code 0 people = {data_map.get(('After family correlation', 'Code 0 people'), 0):,.0f}" + ), + "- **Final**: Code 0 (NONE) = *[Run cps.py to populate]*": lambda: ( + f"- **Final**: Code 0 (NONE) = {data_map.get(('Final', 'Code 0 (NONE)'), 0):,.0f}" + ), + "- **Final**: Code 1 (CITIZEN) = *[Run cps.py to populate]*": lambda: ( + f"- **Final**: Code 1 (CITIZEN) = {data_map.get(('Final', 'Code 1 (CITIZEN)'), 0):,.0f}" + ), + "- **Final**: Code 2 (NON_CITIZEN_VALID_EAD) = *[Run cps.py to populate]*": lambda: ( + f"- **Final**: Code 2 (NON_CITIZEN_VALID_EAD) = {data_map.get(('Final', 'Code 2 (NON_CITIZEN_VALID_EAD)'), 0):,.0f}" + ), + "- **Final**: Code 3 (OTHER_NON_CITIZEN) = *[Run cps.py to populate]*": lambda: ( + f"- **Final**: Code 3 (OTHER_NON_CITIZEN) = {data_map.get(('Final', 'Code 3 (OTHER_NON_CITIZEN)'), 0):,.0f}" + ), + "- **Final**: Total undocumented (Code 0) = *[Run cps.py to populate]*": lambda: ( + f"- **Final**: Total undocumented (Code 0) = {data_map.get(('Final', 'Total undocumented (Code 0)'), 0):,.0f}" + ), + "- **Final**: Undocumented target = *[Run cps.py to populate]*": lambda: ( + f"- **Final**: Undocumented target = {data_map.get(('Final', 'Undocumented target'), 0):,.0f}" + ), } # Apply replacements diff --git a/policyengine_us_data/datasets/cps/local_area_calibration/calibration_utils.py b/policyengine_us_data/datasets/cps/local_area_calibration/calibration_utils.py index 97c82360d..9facfb27c 100644 --- a/policyengine_us_data/datasets/cps/local_area_calibration/calibration_utils.py +++ b/policyengine_us_data/datasets/cps/local_area_calibration/calibration_utils.py @@ -440,9 +440,7 @@ def drop_target_groups( drop_ids.add(gid) matched = True if not matched: - print( - f" WARNING: no match for " f"({label_substr!r}, {geo_name!r})" - ) + print(f" WARNING: no match for ({label_substr!r}, {geo_name!r})") keep_mask = ~np.isin(target_groups, list(drop_ids)) diff --git a/policyengine_us_data/datasets/cps/local_area_calibration/create_stratified_cps.py b/policyengine_us_data/datasets/cps/local_area_calibration/create_stratified_cps.py index 54d9a959f..e2632366c 100644 --- a/policyengine_us_data/datasets/cps/local_area_calibration/create_stratified_cps.py +++ b/policyengine_us_data/datasets/cps/local_area_calibration/create_stratified_cps.py @@ -57,7 +57,7 @@ def create_stratified_cps_dataset( print(f"Original dataset: {n_households_orig:,} households") print(f"Target dataset: {target_households:,} households") - print(f"Reduction ratio: {target_households/n_households_orig:.1%}") + print(f"Reduction ratio: {target_households / n_households_orig:.1%}") # Show income distribution print("\nAGI Percentiles (original):") @@ -88,7 +88,7 @@ def create_stratified_cps_dataset( remaining_quota = target_households - n_top if remaining_quota <= 0: raise ValueError( - f"Target ({target_households:,}) is less than top {100-high_income_percentile}% " + f"Target ({target_households:,}) is less than top {100 - high_income_percentile}% " f"count ({n_top:,}). Increase target_households." ) @@ -176,7 +176,7 @@ def create_stratified_cps_dataset( n_selected = np.sum(selected_mask) print( - f"\nTotal selected: {n_selected:,} households ({n_selected/n_households_orig:.1%} of original)" + f"\nTotal selected: {n_selected:,} households ({n_selected / n_households_orig:.1%} of original)" ) # Verify high earners are preserved diff --git a/policyengine_us_data/datasets/cps/local_area_calibration/publish_local_area.py b/policyengine_us_data/datasets/cps/local_area_calibration/publish_local_area.py index 4963f3979..e49b411bb 100644 --- a/policyengine_us_data/datasets/cps/local_area_calibration/publish_local_area.py +++ b/policyengine_us_data/datasets/cps/local_area_calibration/publish_local_area.py @@ -113,9 +113,9 @@ def build_state_h5( states_dir.mkdir(parents=True, exist_ok=True) output_path = states_dir / f"{state_code}.h5" - print(f"\n{'='*60}") + print(f"\n{'=' * 60}") print(f"Building {state_code} ({len(cd_subset)} CDs)") - print(f"{'='*60}") + print(f"{'=' * 60}") create_sparse_cd_stacked_dataset( weights, @@ -158,9 +158,9 @@ def build_district_h5( districts_dir.mkdir(parents=True, exist_ok=True) output_path = districts_dir / f"{friendly_name}.h5" - print(f"\n{'='*60}") + print(f"\n{'=' * 60}") print(f"Building {friendly_name}") - print(f"{'='*60}") + print(f"{'=' * 60}") create_sparse_cd_stacked_dataset( weights, @@ -208,9 +208,9 @@ def build_city_h5( cities_dir.mkdir(parents=True, exist_ok=True) output_path = cities_dir / "NYC.h5" - print(f"\n{'='*60}") + print(f"\n{'=' * 60}") print(f"Building NYC ({len(cd_subset)} CDs)") - print(f"{'='*60}") + print(f"{'=' * 60}") create_sparse_cd_stacked_dataset( weights, @@ -264,9 +264,9 @@ def build_and_upload_states( continue output_path = states_dir / f"{state_code}.h5" - print(f"\n{'='*60}") + print(f"\n{'=' * 60}") print(f"Building {state_code} ({len(cd_subset)} CDs)") - print(f"{'='*60}") + print(f"{'=' * 60}") try: create_sparse_cd_stacked_dataset( @@ -336,9 +336,9 @@ def build_and_upload_districts( continue output_path = districts_dir / f"{friendly_name}.h5" - print(f"\n{'='*60}") - print(f"[{i+1}/{len(cds_to_calibrate)}] Building {friendly_name}") - print(f"{'='*60}") + print(f"\n{'=' * 60}") + print(f"[{i + 1}/{len(cds_to_calibrate)}] Building {friendly_name}") + print(f"{'=' * 60}") try: create_sparse_cd_stacked_dataset( @@ -405,9 +405,9 @@ def build_and_upload_cities( print("No NYC-related CDs found, skipping") else: output_path = cities_dir / "NYC.h5" - print(f"\n{'='*60}") + print(f"\n{'=' * 60}") print(f"Building NYC ({len(cd_subset)} CDs)") - print(f"{'='*60}") + print(f"{'=' * 60}") try: create_sparse_cd_stacked_dataset( diff --git a/policyengine_us_data/datasets/cps/local_area_calibration/stacked_dataset_builder.py b/policyengine_us_data/datasets/cps/local_area_calibration/stacked_dataset_builder.py index 010e151f3..a7c28c4ef 100644 --- a/policyengine_us_data/datasets/cps/local_area_calibration/stacked_dataset_builder.py +++ b/policyengine_us_data/datasets/cps/local_area_calibration/stacked_dataset_builder.py @@ -588,9 +588,9 @@ def create_sparse_cd_stacked_dataset( # Check weights in combined_df AFTER reindexing print(f"\nWeights in combined_df AFTER reindexing:") - print(f" HH weight sum: {combined_df[hh_weight_col].sum()/1e6:.2f}M") + print(f" HH weight sum: {combined_df[hh_weight_col].sum() / 1e6:.2f}M") print( - f" Person weight sum: {combined_df[person_weight_col].sum()/1e6:.2f}M" + f" Person weight sum: {combined_df[person_weight_col].sum() / 1e6:.2f}M" ) print( f" Ratio: {combined_df[person_weight_col].sum() / combined_df[hh_weight_col].sum():.2f}" @@ -852,7 +852,7 @@ def create_sparse_cd_stacked_dataset( output_path = f"{output_dir}/{friendly_name}.h5" print( - f"\n[{i+1}/{len(cds_to_calibrate)}] Creating {friendly_name}.h5 (GEOID {cd_geoid})" + f"\n[{i + 1}/{len(cds_to_calibrate)}] Creating {friendly_name}.h5 (GEOID {cd_geoid})" ) create_sparse_cd_stacked_dataset( w, diff --git a/policyengine_us_data/datasets/cps/long_term/check_calibrated_estimates_interactive.py b/policyengine_us_data/datasets/cps/long_term/check_calibrated_estimates_interactive.py index af0414841..28bdfd3ec 100644 --- a/policyengine_us_data/datasets/cps/long_term/check_calibrated_estimates_interactive.py +++ b/policyengine_us_data/datasets/cps/long_term/check_calibrated_estimates_interactive.py @@ -295,7 +295,7 @@ def create_h6_reform(): # Calculate impact revenue_impact = reform_revenue - baseline_revenue -print(f"revenue_impact (B): {revenue_impact / 1E9:.2f}") +print(f"revenue_impact (B): {revenue_impact / 1e9:.2f}") # Calculate taxable payroll taxable_ss_earnings = baseline.calculate( diff --git a/policyengine_us_data/datasets/cps/long_term/run_household_projection.py b/policyengine_us_data/datasets/cps/long_term/run_household_projection.py index 651f7b504..30d1857ad 100644 --- a/policyengine_us_data/datasets/cps/long_term/run_household_projection.py +++ b/policyengine_us_data/datasets/cps/long_term/run_household_projection.py @@ -341,7 +341,7 @@ def create_h6_reform(): idx = y - START_YEAR if idx < n_years: pop = target_matrix[:, idx].sum() - print(f" {y}: {pop/1e6:6.1f}M") + print(f" {y}: {pop / 1e6:6.1f}M") # ========================================================================= # STEP 2: BUILD HOUSEHOLD AGE MATRIX @@ -413,7 +413,7 @@ def create_h6_reform(): if year in display_years: ss_baseline = np.sum(ss_values * baseline_weights) print( - f" [DEBUG {year}] SS baseline: ${ss_baseline/1e9:.1f}B, target: ${ss_target/1e9:.1f}B" + f" [DEBUG {year}] SS baseline: ${ss_baseline / 1e9:.1f}B, target: ${ss_target / 1e9:.1f}B" ) payroll_values = None @@ -435,7 +435,7 @@ def create_h6_reform(): if year in display_years: payroll_baseline = np.sum(payroll_values * baseline_weights) print( - f" [DEBUG {year}] Payroll baseline: ${payroll_baseline/1e9:.1f}B, target: ${payroll_target/1e9:.1f}B" + f" [DEBUG {year}] Payroll baseline: ${payroll_baseline / 1e9:.1f}B, target: ${payroll_target / 1e9:.1f}B" ) h6_income_values = None @@ -476,10 +476,10 @@ def create_h6_reform(): h6_income_values * baseline_weights ) print( - f" [DEBUG {year}] H6 baseline revenue: ${h6_impact_baseline/1e9:.3f}B, target: ${h6_revenue_target/1e9:.3f}B" + f" [DEBUG {year}] H6 baseline revenue: ${h6_impact_baseline / 1e9:.3f}B, target: ${h6_revenue_target / 1e9:.3f}B" ) print( - f" [DEBUG {year}] H6 target ratio: {h6_target_ratio:.4f} × payroll ${payroll_target_year/1e9:.1f}B" + f" [DEBUG {year}] H6 target ratio: {h6_target_ratio:.4f} × payroll ${payroll_target_year / 1e9:.1f}B" ) del reform_sim @@ -506,10 +506,10 @@ def create_h6_reform(): oasdi_baseline = np.sum(oasdi_tob_values * baseline_weights) hi_baseline = np.sum(hi_tob_values * baseline_weights) print( - f" [DEBUG {year}] OASDI TOB baseline: ${oasdi_baseline/1e9:.1f}B, target: ${oasdi_tob_target/1e9:.1f}B" + f" [DEBUG {year}] OASDI TOB baseline: ${oasdi_baseline / 1e9:.1f}B, target: ${oasdi_tob_target / 1e9:.1f}B" ) print( - f" [DEBUG {year}] HI TOB baseline: ${hi_baseline/1e9:.1f}B, target: ${hi_tob_target/1e9:.1f}B" + f" [DEBUG {year}] HI TOB baseline: ${hi_baseline / 1e9:.1f}B, target: ${hi_tob_target / 1e9:.1f}B" ) y_target = target_matrix[:, year_idx] @@ -557,12 +557,12 @@ def create_h6_reform(): if USE_SS: ss_achieved = np.sum(ss_values * w_new) print( - f" [DEBUG {year}] SS achieved: ${ss_achieved/1e9:.1f}B (error: ${abs(ss_achieved - ss_target)/1e6:.1f}M, {(ss_achieved - ss_target)/ss_target*100:.3f}%)" + f" [DEBUG {year}] SS achieved: ${ss_achieved / 1e9:.1f}B (error: ${abs(ss_achieved - ss_target) / 1e6:.1f}M, {(ss_achieved - ss_target) / ss_target * 100:.3f}%)" ) if USE_PAYROLL: payroll_achieved = np.sum(payroll_values * w_new) print( - f" [DEBUG {year}] Payroll achieved: ${payroll_achieved/1e9:.1f}B (error: ${abs(payroll_achieved - payroll_target)/1e6:.1f}M, {(payroll_achieved - payroll_target)/payroll_target*100:.3f}%)" + f" [DEBUG {year}] Payroll achieved: ${payroll_achieved / 1e9:.1f}B (error: ${abs(payroll_achieved - payroll_target) / 1e6:.1f}M, {(payroll_achieved - payroll_target) / payroll_target * 100:.3f}%)" ) if USE_H6_REFORM and h6_revenue_target is not None: h6_revenue_achieved = np.sum(h6_income_values * w_new) @@ -574,16 +574,16 @@ def create_h6_reform(): else 0 ) print( - f" [DEBUG {year}] H6 achieved revenue: ${h6_revenue_achieved/1e9:.3f}B (error: ${abs(h6_revenue_achieved - h6_revenue_target)/1e6:.1f}M, {error_pct:.3f}%)" + f" [DEBUG {year}] H6 achieved revenue: ${h6_revenue_achieved / 1e9:.3f}B (error: ${abs(h6_revenue_achieved - h6_revenue_target) / 1e6:.1f}M, {error_pct:.3f}%)" ) if USE_TOB: oasdi_achieved = np.sum(oasdi_tob_values * w_new) hi_achieved = np.sum(hi_tob_values * w_new) print( - f" [DEBUG {year}] OASDI TOB achieved: ${oasdi_achieved/1e9:.1f}B (error: ${abs(oasdi_achieved - oasdi_tob_target)/1e6:.1f}M, {(oasdi_achieved - oasdi_tob_target)/oasdi_tob_target*100:.3f}%)" + f" [DEBUG {year}] OASDI TOB achieved: ${oasdi_achieved / 1e9:.1f}B (error: ${abs(oasdi_achieved - oasdi_tob_target) / 1e6:.1f}M, {(oasdi_achieved - oasdi_tob_target) / oasdi_tob_target * 100:.3f}%)" ) print( - f" [DEBUG {year}] HI TOB achieved: ${hi_achieved/1e9:.1f}B (error: ${abs(hi_achieved - hi_tob_target)/1e6:.1f}M, {(hi_achieved - hi_tob_target)/hi_tob_target*100:.3f}%)" + f" [DEBUG {year}] HI TOB achieved: ${hi_achieved / 1e9:.1f}B (error: ${abs(hi_achieved - hi_tob_target) / 1e6:.1f}M, {(hi_achieved - hi_tob_target) / hi_tob_target * 100:.3f}%)" ) weights_matrix[:, year_idx] = w_new @@ -613,5 +613,5 @@ def create_h6_reform(): ) elif year_idx % 5 == 0: print( - f"{year} Processing... ({year_idx+1}/{n_years}) {mem_gb:.2f}GB" + f"{year} Processing... ({year_idx + 1}/{n_years}) {mem_gb:.2f}GB" ) diff --git a/policyengine_us_data/datasets/cps/small_enhanced_cps.py b/policyengine_us_data/datasets/cps/small_enhanced_cps.py index c84181eae..1481bb046 100644 --- a/policyengine_us_data/datasets/cps/small_enhanced_cps.py +++ b/policyengine_us_data/datasets/cps/small_enhanced_cps.py @@ -22,8 +22,7 @@ def create_small_ecps(): weights = simulation.calculate("household_weight").values if np.all(weights == 0): raise ValueError( - "create_small_ecps: all household weights are zero " - "after subsample" + "create_small_ecps: all household weights are zero after subsample" ) logging.info( f"create_small_ecps: subsample has " @@ -156,8 +155,7 @@ def create_sparse_ecps(): f"(expected > 1MB)" ) logging.info( - f"create_sparse_ecps: wrote {file_size / 1e6:.1f}MB to " - f"{output_path}" + f"create_sparse_ecps: wrote {file_size / 1e6:.1f}MB to {output_path}" ) diff --git a/policyengine_us_data/datasets/puf/puf.py b/policyengine_us_data/datasets/puf/puf.py index f52153e38..ae8cf4fe8 100644 --- a/policyengine_us_data/datasets/puf/puf.py +++ b/policyengine_us_data/datasets/puf/puf.py @@ -155,9 +155,9 @@ def simulate_w2_and_ubia_from_puf(puf, *, seed=None, diagnostics=True): print(f"Share with QBI > 0: {share_qbi_pos:6.2%}") print(f"Among those, share with W-2 wages: {share_wages:6.2%}") if np.any(w2_wages > 0): - print(f"Mean W-2 (if >0): ${np.mean(w2_wages[w2_wages>0]):,.0f}") + print(f"Mean W-2 (if >0): ${np.mean(w2_wages[w2_wages > 0]):,.0f}") if np.any(ubia > 0): - print(f"Median UBIA (if >0): ${np.median(ubia[ubia>0]):,.0f}") + print(f"Median UBIA (if >0): ${np.median(ubia[ubia > 0]):,.0f}") return w2_wages, ubia diff --git a/policyengine_us_data/db/create_database_tables.py b/policyengine_us_data/db/create_database_tables.py index 8590f79e8..be22fcbbb 100644 --- a/policyengine_us_data/db/create_database_tables.py +++ b/policyengine_us_data/db/create_database_tables.py @@ -306,8 +306,7 @@ def validate_parent_child_constraints(mapper, connection, target: Stratum): if any(int(cv) == int(val) for cv in child_vals): continue raise ValueError( - f"Child stratum must include parent constraint " - f"({var} {op} {val})" + f"Child stratum must include parent constraint ({var} {op} {val})" ) diff --git a/policyengine_us_data/db/etl_irs_soi.py b/policyengine_us_data/db/etl_irs_soi.py index aa8122a59..3b155bee9 100644 --- a/policyengine_us_data/db/etl_irs_soi.py +++ b/policyengine_us_data/db/etl_irs_soi.py @@ -492,9 +492,9 @@ def load_soi_data(long_dfs, year): session.add(district_filer_stratum) session.flush() - filer_strata["district"][ - district_geoid - ] = district_filer_stratum.stratum_id + filer_strata["district"][district_geoid] = ( + district_filer_stratum.stratum_id + ) session.commit() @@ -636,9 +636,9 @@ def load_soi_data(long_dfs, year): # Store lookup for later use if geo_info["type"] == "national": - eitc_stratum_lookup["national"][ - n_children - ] = new_stratum.stratum_id + eitc_stratum_lookup["national"][n_children] = ( + new_stratum.stratum_id + ) elif geo_info["type"] == "state": key = (geo_info["state_fips"], n_children) eitc_stratum_lookup["state"][key] = new_stratum.stratum_id @@ -1000,9 +1000,9 @@ def load_soi_data(long_dfs, year): session.flush() if geo_info["type"] == "state": - agi_stratum_lookup["state"][ - geo_info["state_fips"] - ] = new_stratum.stratum_id + agi_stratum_lookup["state"][geo_info["state_fips"]] = ( + new_stratum.stratum_id + ) elif geo_info["type"] == "district": agi_stratum_lookup["district"][ geo_info["congressional_district_geoid"] diff --git a/policyengine_us_data/db/etl_medicaid.py b/policyengine_us_data/db/etl_medicaid.py index dfc19cdcc..83aa28d17 100644 --- a/policyengine_us_data/db/etl_medicaid.py +++ b/policyengine_us_data/db/etl_medicaid.py @@ -222,9 +222,9 @@ def load_medicaid_data(long_state, long_cd, year): ) session.add(new_stratum) session.flush() - medicaid_stratum_lookup["state"][ - state_fips - ] = new_stratum.stratum_id + medicaid_stratum_lookup["state"][state_fips] = ( + new_stratum.stratum_id + ) # District ------------------- if long_cd is None: diff --git a/policyengine_us_data/db/etl_pregnancy.py b/policyengine_us_data/db/etl_pregnancy.py index de3fec9dc..479bcdd2b 100644 --- a/policyengine_us_data/db/etl_pregnancy.py +++ b/policyengine_us_data/db/etl_pregnancy.py @@ -246,9 +246,7 @@ def load_pregnancy_data( df: From transform_pregnancy_data. year: Target year for the calibration targets. """ - db_url = ( - f"sqlite:///" f"{STORAGE_FOLDER / 'calibration' / 'policy_data.db'}" - ) + db_url = f"sqlite:///{STORAGE_FOLDER / 'calibration' / 'policy_data.db'}" engine = create_engine(db_url) with Session(engine) as session: @@ -274,8 +272,7 @@ def load_pregnancy_data( state_fips = int(row["state_fips"]) if state_fips not in geo_strata["state"]: logger.warning( - f"No geographic stratum for FIPS " - f"{state_fips}, skipping" + f"No geographic stratum for FIPS {state_fips}, skipping" ) continue @@ -369,7 +366,7 @@ def main(): logger.warning(f"ACS {acs_year} not available: {e}") if pop_df is None: raise RuntimeError( - f"No ACS population data for " f"{year - 1} or {year - 2}" + f"No ACS population data for {year - 1} or {year - 2}" ) df = transform_pregnancy_data(births_df, pop_df) @@ -377,7 +374,7 @@ def main(): total_births = df["births"].sum() total_target = df["pregnancy_target"].sum() print(f"Total births: {total_births:,.0f}") - print(f"Pregnancy target (point-in-time): " f"{total_target:,.0f}") + print(f"Pregnancy target (point-in-time): {total_target:,.0f}") load_pregnancy_data(df, year) print("Pregnancy calibration targets loaded.") diff --git a/policyengine_us_data/db/validate_hierarchy.py b/policyengine_us_data/db/validate_hierarchy.py index 69a176f2e..21ef7d46f 100644 --- a/policyengine_us_data/db/validate_hierarchy.py +++ b/policyengine_us_data/db/validate_hierarchy.py @@ -238,7 +238,7 @@ def validate_demographic_strata(session): print(f"✓ {domain}: {actual} strata") elif actual == 0: errors.append( - f"ERROR: {domain} has no strata, " f"expected {expected_total}" + f"ERROR: {domain} has no strata, expected {expected_total}" ) else: errors.append( @@ -291,11 +291,9 @@ def validate_demographic_strata(session): ) else: no_parents += 1 - errors.append( - f"ERROR: Stratum {stratum.stratum_id} " f"has no parent" - ) + errors.append(f"ERROR: Stratum {stratum.stratum_id} has no parent") - print(f" Sample of {len(sample_strata)} " f"demographic strata:") + print(f" Sample of {len(sample_strata)} demographic strata:") print(f" - With geographic parent: {correct_parents}") print(f" - With wrong parent: {wrong_parents}") print(f" - With no parent: {no_parents}") diff --git a/policyengine_us_data/storage/calibration_targets/pull_soi_targets.py b/policyengine_us_data/storage/calibration_targets/pull_soi_targets.py index 59050a1b3..ddb1bfee5 100644 --- a/policyengine_us_data/storage/calibration_targets/pull_soi_targets.py +++ b/policyengine_us_data/storage/calibration_targets/pull_soi_targets.py @@ -308,20 +308,20 @@ def pull_district_soi_variable( # Check that all GEO_IDs are valid produced_codes = set(result["GEO_ID"]) invalid_codes = produced_codes - valid_district_codes - assert ( - not invalid_codes - ), f"Invalid district codes after redistricting: {invalid_codes}" + assert not invalid_codes, ( + f"Invalid district codes after redistricting: {invalid_codes}" + ) # Check we have exactly 436 districts - assert ( - len(produced_codes) == 436 - ), f"Expected 436 districts after redistricting, got {len(produced_codes)}" + assert len(produced_codes) == 436, ( + f"Expected 436 districts after redistricting, got {len(produced_codes)}" + ) # Check that all GEO_IDs successfully mapped to names missing_names = result[result["GEO_NAME"].isna()]["GEO_ID"].unique() - assert ( - len(missing_names) == 0 - ), f"GEO_IDs without names in ID_TO_NAME mapping: {missing_names}" + assert len(missing_names) == 0, ( + f"GEO_IDs without names in ID_TO_NAME mapping: {missing_names}" + ) # final column order result = result[ diff --git a/policyengine_us_data/tests/test_calibration/test_clone_and_assign.py b/policyengine_us_data/tests/test_calibration/test_clone_and_assign.py index 0ba330549..f556ccdbf 100644 --- a/policyengine_us_data/tests/test_calibration/test_clone_and_assign.py +++ b/policyengine_us_data/tests/test_calibration/test_clone_and_assign.py @@ -66,8 +66,7 @@ def test_loads_and_normalizes(self, tmp_path): csv_path = tmp_path / "block_cd_distributions.csv.gz" MOCK_BLOCKS.to_csv(csv_path, index=False, compression="gzip") with patch( - "policyengine_us_data.calibration" - ".clone_and_assign.STORAGE_FOLDER", + "policyengine_us_data.calibration.clone_and_assign.STORAGE_FOLDER", tmp_path, ): blocks, cds, states, probs = ( @@ -80,8 +79,7 @@ def test_state_fips_extracted(self, tmp_path): csv_path = tmp_path / "block_cd_distributions.csv.gz" MOCK_BLOCKS.to_csv(csv_path, index=False, compression="gzip") with patch( - "policyengine_us_data.calibration" - ".clone_and_assign.STORAGE_FOLDER", + "policyengine_us_data.calibration.clone_and_assign.STORAGE_FOLDER", tmp_path, ): _, _, states, _ = load_global_block_distribution.__wrapped__() @@ -137,8 +135,7 @@ def test_missing_file_raises(self, tmp_path): fake = tmp_path / "nonexistent" fake.mkdir() with patch( - "policyengine_us_data.calibration" - ".clone_and_assign.STORAGE_FOLDER", + "policyengine_us_data.calibration.clone_and_assign.STORAGE_FOLDER", fake, ): with pytest.raises(FileNotFoundError): diff --git a/policyengine_us_data/tests/test_calibration/test_retirement_imputation.py b/policyengine_us_data/tests/test_calibration/test_retirement_imputation.py index ce261a02b..856c9ab02 100644 --- a/policyengine_us_data/tests/test_calibration/test_retirement_imputation.py +++ b/policyengine_us_data/tests/test_calibration/test_retirement_imputation.py @@ -139,9 +139,9 @@ class TestConstants: def test_retirement_vars_not_in_imputed(self): """Retirement vars must NOT be in IMPUTED_VARIABLES.""" for var in CPS_RETIREMENT_VARIABLES: - assert ( - var not in IMPUTED_VARIABLES - ), f"{var} should not be in IMPUTED_VARIABLES" + assert var not in IMPUTED_VARIABLES, ( + f"{var} should not be in IMPUTED_VARIABLES" + ) def test_retirement_vars_not_in_overridden(self): for var in CPS_RETIREMENT_VARIABLES: @@ -171,9 +171,9 @@ def test_retirement_predictors_include_demographics(self): def test_income_predictors_in_imputed_variables(self): """All income predictors must be available from PUF QRF.""" for var in RETIREMENT_INCOME_PREDICTORS: - assert ( - var in IMPUTED_VARIABLES - ), f"{var} not in IMPUTED_VARIABLES — won't be in puf_imputations" + assert var in IMPUTED_VARIABLES, ( + f"{var} not in IMPUTED_VARIABLES — won't be in puf_imputations" + ) def test_predictors_are_combined_lists(self): expected = ( @@ -367,9 +367,9 @@ def test_401k_zero_when_no_wages(self): "traditional_401k_contributions", "roth_401k_contributions", ): - assert np.all( - result[var][zero_wage] == 0 - ), f"{var} should be 0 when employment_income is 0" + assert np.all(result[var][zero_wage] == 0), ( + f"{var} should be 0 when employment_income is 0" + ) def test_se_pension_zero_when_no_se_income(self): result = self._call_with_mocks(self._uniform_preds(5_000.0)) @@ -707,6 +707,6 @@ def test_401k_ira_from_policyengine_us(self): ours = _get_retirement_limits(year) pe = pe_limits(year) for key in ["401k", "401k_catch_up", "ira", "ira_catch_up"]: - assert ( - ours[key] == pe[key] - ), f"Year {year} key {key}: {ours[key]} != {pe[key]}" + assert ours[key] == pe[key], ( + f"Year {year} key {key}: {ours[key]} != {pe[key]}" + ) diff --git a/policyengine_us_data/tests/test_database.py b/policyengine_us_data/tests/test_database.py index c9cf14c7c..e0e329e53 100644 --- a/policyengine_us_data/tests/test_database.py +++ b/policyengine_us_data/tests/test_database.py @@ -14,7 +14,7 @@ @pytest.fixture def engine(tmp_path): - db_uri = f"sqlite:///{tmp_path/'test.db'}" + db_uri = f"sqlite:///{tmp_path / 'test.db'}" return create_database(db_uri) diff --git a/policyengine_us_data/tests/test_database_build.py b/policyengine_us_data/tests/test_database_build.py index 3c0e4fb3f..36562149a 100644 --- a/policyengine_us_data/tests/test_database_build.py +++ b/policyengine_us_data/tests/test_database_build.py @@ -126,8 +126,7 @@ def test_national_targets_loaded(built_db): variables = {r[0] for r in rows} for expected in ["snap", "social_security", "ssi"]: assert expected in variables, ( - f"National target '{expected}' missing. " - f"Found: {sorted(variables)}" + f"National target '{expected}' missing. Found: {sorted(variables)}" ) @@ -153,8 +152,7 @@ def test_state_income_tax_targets(built_db): ca_val = state_totals.get("06") or state_totals.get("6") assert ca_val is not None, "California (FIPS 06) target missing" assert ca_val > 100e9, ( - f"California income tax should be > $100B, " - f"got ${ca_val / 1e9:.1f}B" + f"California income tax should be > $100B, got ${ca_val / 1e9:.1f}B" ) diff --git a/policyengine_us_data/tests/test_datasets/test_county_fips.py b/policyengine_us_data/tests/test_datasets/test_county_fips.py index d692cf559..0414aa55f 100644 --- a/policyengine_us_data/tests/test_datasets/test_county_fips.py +++ b/policyengine_us_data/tests/test_datasets/test_county_fips.py @@ -104,7 +104,6 @@ def test_download_failure(): patch("requests.get", return_value=failed_response), pytest.raises(ValueError) as excinfo, ): - # Run the function, expect ValueError generate_county_fips_2020_dataset() diff --git a/policyengine_us_data/tests/test_datasets/test_dataset_sanity.py b/policyengine_us_data/tests/test_datasets/test_dataset_sanity.py index 4aeb13e6f..30c9f3155 100644 --- a/policyengine_us_data/tests/test_datasets/test_dataset_sanity.py +++ b/policyengine_us_data/tests/test_datasets/test_dataset_sanity.py @@ -41,17 +41,17 @@ def test_ecps_employment_income_positive(ecps_sim): def test_ecps_self_employment_income_positive(ecps_sim): total = ecps_sim.calculate("self_employment_income").sum() - assert ( - total > 50e9 - ), f"self_employment_income sum is {total:.2e}, expected > 50B." + assert total > 50e9, ( + f"self_employment_income sum is {total:.2e}, expected > 50B." + ) def test_ecps_household_count(ecps_sim): """Household count should be roughly 130-160M.""" total_hh = ecps_sim.calculate("household_weight").values.sum() - assert ( - 100e6 < total_hh < 200e6 - ), f"Total households = {total_hh:.2e}, expected 100M-200M." + assert 100e6 < total_hh < 200e6, ( + f"Total households = {total_hh:.2e}, expected 100M-200M." + ) def test_ecps_person_count(ecps_sim): @@ -59,9 +59,9 @@ def test_ecps_person_count(ecps_sim): total_people = ecps_sim.calculate( "household_weight", map_to="person" ).values.sum() - assert ( - 250e6 < total_people < 400e6 - ), f"Total people = {total_people:.2e}, expected 250M-400M." + assert 250e6 < total_people < 400e6, ( + f"Total people = {total_people:.2e}, expected 250M-400M." + ) def test_ecps_poverty_rate_reasonable(ecps_sim): @@ -85,7 +85,7 @@ def test_ecps_mean_employment_income_reasonable(ecps_sim): income = ecps_sim.calculate("employment_income", map_to="person") mean = income.mean() assert 15_000 < mean < 80_000, ( - f"Mean employment income = ${mean:,.0f}, " "expected $15k-$80k." + f"Mean employment income = ${mean:,.0f}, expected $15k-$80k." ) @@ -95,7 +95,7 @@ def test_ecps_mean_employment_income_reasonable(ecps_sim): def test_cps_employment_income_positive(cps_sim): total = cps_sim.calculate("employment_income").sum() assert total > 5e12, ( - f"CPS employment_income sum is {total:.2e}, " "expected > 5T." + f"CPS employment_income sum is {total:.2e}, expected > 5T." ) @@ -122,24 +122,24 @@ def sparse_sim(): def test_sparse_employment_income_positive(sparse_sim): """Sparse dataset employment income must be in the trillions.""" total = sparse_sim.calculate("employment_income").sum() - assert ( - total > 5e12 - ), f"Sparse employment_income sum is {total:.2e}, expected > 5T." + assert total > 5e12, ( + f"Sparse employment_income sum is {total:.2e}, expected > 5T." + ) def test_sparse_household_count(sparse_sim): total_hh = sparse_sim.calculate("household_weight").values.sum() - assert ( - 100e6 < total_hh < 200e6 - ), f"Sparse total households = {total_hh:.2e}, expected 100M-200M." + assert 100e6 < total_hh < 200e6, ( + f"Sparse total households = {total_hh:.2e}, expected 100M-200M." + ) def test_sparse_poverty_rate_reasonable(sparse_sim): in_poverty = sparse_sim.calculate("person_in_poverty", map_to="person") rate = in_poverty.mean() - assert ( - 0.05 < rate < 0.25 - ), f"Sparse poverty rate = {rate:.1%}, expected 5-25%." + assert 0.05 < rate < 0.25, ( + f"Sparse poverty rate = {rate:.1%}, expected 5-25%." + ) # ── File size checks ─────────────────────────────────────────── @@ -153,6 +153,6 @@ def test_ecps_file_size(): if not path.exists(): pytest.skip("enhanced_cps_2024.h5 not found") size_mb = path.stat().st_size / (1024 * 1024) - assert ( - size_mb > 100 - ), f"enhanced_cps_2024.h5 is only {size_mb:.1f}MB, expected >100MB" + assert size_mb > 100, ( + f"enhanced_cps_2024.h5 is only {size_mb:.1f}MB, expected >100MB" + ) diff --git a/policyengine_us_data/tests/test_datasets/test_enhanced_cps.py b/policyengine_us_data/tests/test_datasets/test_enhanced_cps.py index b3edbc9e3..36be76a54 100644 --- a/policyengine_us_data/tests/test_datasets/test_enhanced_cps.py +++ b/policyengine_us_data/tests/test_datasets/test_enhanced_cps.py @@ -50,10 +50,10 @@ def test_ecps_replicates_jct_tax_expenditures(): & (calibration_log["epoch"] == calibration_log["epoch"].max()) ] - assert ( - jct_rows.rel_abs_error.max() < 0.5 - ), "JCT tax expenditure targets not met (see the calibration log for details). Max relative error: {:.2%}".format( - jct_rows.rel_abs_error.max() + assert jct_rows.rel_abs_error.max() < 0.5, ( + "JCT tax expenditure targets not met (see the calibration log for details). Max relative error: {:.2%}".format( + jct_rows.rel_abs_error.max() + ) ) @@ -95,7 +95,7 @@ def apply(self): TOLERANCE = 0.4 print( - f"{deduction} tax expenditure {tax_expenditure/1e9:.1f}bn differs from target {target/1e9:.1f}bn by {pct_error:.2%}" + f"{deduction} tax expenditure {tax_expenditure / 1e9:.1f}bn differs from target {target / 1e9:.1f}bn by {pct_error:.2%}" ) assert pct_error < TOLERANCE, deduction @@ -137,9 +137,9 @@ def test_undocumented_matches_ssn_none(): # 1. Per-person equivalence mismatches = np.where(ssn_type_none_mask != undocumented_mask)[0] - assert ( - mismatches.size == 0 - ), f"{mismatches.size} mismatches between 'NONE' SSN and 'UNDOCUMENTED' status" + assert mismatches.size == 0, ( + f"{mismatches.size} mismatches between 'NONE' SSN and 'UNDOCUMENTED' status" + ) # 2. Optional aggregate sanity-check count = undocumented_mask.sum() @@ -181,17 +181,17 @@ def test_aca_calibration(): pct_error = abs(simulated - target_spending) / target_spending print( - f"{state}: simulated ${simulated/1e9:.2f} bn " - f"target ${target_spending/1e9:.2f} bn " + f"{state}: simulated ${simulated / 1e9:.2f} bn " + f"target ${target_spending / 1e9:.2f} bn " f"error {pct_error:.2%}" ) if pct_error > TOLERANCE: failed = True - assert ( - not failed - ), f"One or more states exceeded tolerance of {TOLERANCE:.0%}." + assert not failed, ( + f"One or more states exceeded tolerance of {TOLERANCE:.0%}." + ) def test_immigration_status_diversity(): @@ -227,15 +227,15 @@ def test_immigration_status_diversity(): ) # Also check that we have a reasonable percentage of citizens (should be 85-90%) - assert ( - 80 < citizen_pct < 95 - ), f"Citizen percentage ({citizen_pct:.1f}%) outside expected range (80-95%)" + assert 80 < citizen_pct < 95, ( + f"Citizen percentage ({citizen_pct:.1f}%) outside expected range (80-95%)" + ) # Check that we have some non-citizens non_citizen_pct = 100 - citizen_pct - assert ( - non_citizen_pct > 5 - ), f"Too few non-citizens ({non_citizen_pct:.1f}%) - expected at least 5%" + assert non_citizen_pct > 5, ( + f"Too few non-citizens ({non_citizen_pct:.1f}%) - expected at least 5%" + ) print( f"Immigration status diversity test passed: {citizen_pct:.1f}% citizens" @@ -269,14 +269,14 @@ def test_medicaid_calibration(): pct_error = abs(simulated - target_enrollment) / target_enrollment print( - f"{state}: simulated ${simulated/1e9:.2f} bn " - f"target ${target_enrollment/1e9:.2f} bn " + f"{state}: simulated ${simulated / 1e9:.2f} bn " + f"target ${target_enrollment / 1e9:.2f} bn " f"error {pct_error:.2%}" ) if pct_error > TOLERANCE: failed = True - assert ( - not failed - ), f"One or more states exceeded tolerance of {TOLERANCE:.0%}." + assert not failed, ( + f"One or more states exceeded tolerance of {TOLERANCE:.0%}." + ) diff --git a/policyengine_us_data/tests/test_datasets/test_sipp_assets.py b/policyengine_us_data/tests/test_datasets/test_sipp_assets.py index c8780d847..36f637e64 100644 --- a/policyengine_us_data/tests/test_datasets/test_sipp_assets.py +++ b/policyengine_us_data/tests/test_datasets/test_sipp_assets.py @@ -59,12 +59,12 @@ def test_ecps_has_liquid_assets(): MAXIMUM_TOTAL = 30e12 # $30 trillion ceiling assert total > MINIMUM_TOTAL, ( - f"Total liquid assets ${total/1e12:.1f}T below " - f"minimum ${MINIMUM_TOTAL/1e12:.0f}T" + f"Total liquid assets ${total / 1e12:.1f}T below " + f"minimum ${MINIMUM_TOTAL / 1e12:.0f}T" ) assert total < MAXIMUM_TOTAL, ( - f"Total liquid assets ${total/1e12:.1f}T above " - f"maximum ${MAXIMUM_TOTAL/1e12:.0f}T" + f"Total liquid assets ${total / 1e12:.1f}T above " + f"maximum ${MAXIMUM_TOTAL / 1e12:.0f}T" ) @@ -129,9 +129,9 @@ def test_asset_categories_exist(): assert bonds >= 0, "Bond assets should be non-negative" # Bank accounts typically largest category of liquid assets - assert ( - bank > stocks * 0.3 - ), "Bank accounts should be substantial relative to stocks" + assert bank > stocks * 0.3, ( + "Bank accounts should be substantial relative to stocks" + ) def test_low_asset_households(): diff --git a/policyengine_us_data/tests/test_datasets/test_small_enhanced_cps.py b/policyengine_us_data/tests/test_datasets/test_small_enhanced_cps.py index 23b7b2dcb..c16829db9 100644 --- a/policyengine_us_data/tests/test_datasets/test_small_enhanced_cps.py +++ b/policyengine_us_data/tests/test_datasets/test_small_enhanced_cps.py @@ -19,12 +19,12 @@ def test_small_ecps_loads(year: int): # Employment income should be positive (not zero from missing vars) emp_income = sim.calculate("employment_income", 2025).sum() - assert ( - emp_income > 0 - ), f"Small ECPS employment_income sum is {emp_income}, expected > 0." + assert emp_income > 0, ( + f"Small ECPS employment_income sum is {emp_income}, expected > 0." + ) # Should have a reasonable number of households hh_count = len(sim.calculate("household_net_income", 2025)) - assert ( - hh_count > 100 - ), f"Small ECPS has only {hh_count} households, expected > 100." + assert hh_count > 100, ( + f"Small ECPS has only {hh_count} households, expected > 100." + ) diff --git a/policyengine_us_data/tests/test_datasets/test_sparse_enhanced_cps.py b/policyengine_us_data/tests/test_datasets/test_sparse_enhanced_cps.py index 6a690f0cc..f1b3f4de2 100644 --- a/policyengine_us_data/tests/test_datasets/test_sparse_enhanced_cps.py +++ b/policyengine_us_data/tests/test_datasets/test_sparse_enhanced_cps.py @@ -115,10 +115,10 @@ def test_sparse_ecps_replicates_jct_tax_expenditures(): & (calibration_log["epoch"] == calibration_log["epoch"].max()) ] - assert ( - jct_rows.rel_abs_error.max() < 0.5 - ), "JCT tax expenditure targets not met (see the calibration log for details). Max relative error: {:.2%}".format( - jct_rows.rel_abs_error.max() + assert jct_rows.rel_abs_error.max() < 0.5, ( + "JCT tax expenditure targets not met (see the calibration log for details). Max relative error: {:.2%}".format( + jct_rows.rel_abs_error.max() + ) ) @@ -155,8 +155,8 @@ def apply(self): TOLERANCE = 0.4 logging.info( - f"{deduction} tax expenditure {tax_expenditure/1e9:.1f}bn " - f"differs from target {target/1e9:.1f}bn by {pct_error:.2%}" + f"{deduction} tax expenditure {tax_expenditure / 1e9:.1f}bn " + f"differs from target {target / 1e9:.1f}bn by {pct_error:.2%}" ) assert pct_error < TOLERANCE, deduction @@ -204,17 +204,17 @@ def test_sparse_aca_calibration(sim): pct_error = abs(simulated - target_spending) / target_spending logging.info( - f"{state}: simulated ${simulated/1e9:.2f} bn " - f"target ${target_spending/1e9:.2f} bn " + f"{state}: simulated ${simulated / 1e9:.2f} bn " + f"target ${target_spending / 1e9:.2f} bn " f"error {pct_error:.2%}" ) if pct_error > TOLERANCE: failed = True - assert ( - not failed - ), f"One or more states exceeded tolerance of {TOLERANCE:.0%}." + assert not failed, ( + f"One or more states exceeded tolerance of {TOLERANCE:.0%}." + ) def test_sparse_medicaid_calibration(sim): @@ -238,14 +238,14 @@ def test_sparse_medicaid_calibration(sim): pct_error = abs(simulated - target_enrollment) / target_enrollment logging.info( - f"{state}: simulated ${simulated/1e9:.2f} bn " - f"target ${target_enrollment/1e9:.2f} bn " + f"{state}: simulated ${simulated / 1e9:.2f} bn " + f"target ${target_enrollment / 1e9:.2f} bn " f"error {pct_error:.2%}" ) if pct_error > TOLERANCE: failed = True - assert ( - not failed - ), f"One or more states exceeded tolerance of {TOLERANCE:.0%}." + assert not failed, ( + f"One or more states exceeded tolerance of {TOLERANCE:.0%}." + ) diff --git a/policyengine_us_data/tests/test_local_area_calibration/test_county_assignment.py b/policyengine_us_data/tests/test_local_area_calibration/test_county_assignment.py index 158e0ca68..d37b30138 100644 --- a/policyengine_us_data/tests/test_local_area_calibration/test_county_assignment.py +++ b/policyengine_us_data/tests/test_local_area_calibration/test_county_assignment.py @@ -47,9 +47,9 @@ def test_ny_cd_gets_ny_counties(self): for idx in result: county_name = County._member_names_[idx] # Should end with _NY - assert county_name.endswith( - "_NY" - ), f"Got non-NY county: {county_name}" + assert county_name.endswith("_NY"), ( + f"Got non-NY county: {county_name}" + ) def test_ca_cd_gets_ca_counties(self): """Verify CA CDs get CA counties.""" @@ -58,9 +58,9 @@ def test_ca_cd_gets_ca_counties(self): for idx in result: county_name = County._member_names_[idx] - assert county_name.endswith( - "_CA" - ), f"Got non-CA county: {county_name}" + assert county_name.endswith("_CA"), ( + f"Got non-CA county: {county_name}" + ) class TestCountyIndex: diff --git a/policyengine_us_data/tests/test_local_area_calibration/test_stacked_dataset_builder.py b/policyengine_us_data/tests/test_local_area_calibration/test_stacked_dataset_builder.py index 2900eec19..6e0710f2b 100644 --- a/policyengine_us_data/tests/test_local_area_calibration/test_stacked_dataset_builder.py +++ b/policyengine_us_data/tests/test_local_area_calibration/test_stacked_dataset_builder.py @@ -141,13 +141,13 @@ def test_counties_match_state(self, stacked_result): state_fips = row["state_fips"] if state_fips == 37: - assert county.endswith( - "_NC" - ), f"NC county should end with _NC: {county}" + assert county.endswith("_NC"), ( + f"NC county should end with _NC: {county}" + ) elif state_fips == 2: - assert county.endswith( - "_AK" - ), f"AK county should end with _AK: {county}" + assert county.endswith("_AK"), ( + f"AK county should end with _AK: {county}" + ) def test_household_count_matches_weights( self, stacked_result, test_weights @@ -205,27 +205,27 @@ class TestEntityReindexing: def test_family_ids_are_unique(self, stacked_sim): """Family IDs should be globally unique across all CDs.""" family_ids = stacked_sim.calculate("family_id", map_to="family").values - assert len(family_ids) == len( - set(family_ids) - ), "Family IDs should be unique" + assert len(family_ids) == len(set(family_ids)), ( + "Family IDs should be unique" + ) def test_tax_unit_ids_are_unique(self, stacked_sim): """Tax unit IDs should be globally unique.""" tax_unit_ids = stacked_sim.calculate( "tax_unit_id", map_to="tax_unit" ).values - assert len(tax_unit_ids) == len( - set(tax_unit_ids) - ), "Tax unit IDs should be unique" + assert len(tax_unit_ids) == len(set(tax_unit_ids)), ( + "Tax unit IDs should be unique" + ) def test_spm_unit_ids_are_unique(self, stacked_sim): """SPM unit IDs should be globally unique.""" spm_unit_ids = stacked_sim.calculate( "spm_unit_id", map_to="spm_unit" ).values - assert len(spm_unit_ids) == len( - set(spm_unit_ids) - ), "SPM unit IDs should be unique" + assert len(spm_unit_ids) == len(set(spm_unit_ids)), ( + "SPM unit IDs should be unique" + ) def test_person_family_id_matches_family_id(self, stacked_sim): """person_family_id should reference valid family_ids.""" @@ -236,9 +236,9 @@ def test_person_family_id_matches_family_id(self, stacked_sim): stacked_sim.calculate("family_id", map_to="family").values ) for pf_id in person_family_ids: - assert ( - pf_id in family_ids - ), f"person_family_id {pf_id} not in family_ids" + assert pf_id in family_ids, ( + f"person_family_id {pf_id} not in family_ids" + ) def test_family_ids_unique_across_cds(self, stacked_sim_with_overlap): """Same household in different CDs should have different family_ids.""" diff --git a/policyengine_us_data/tests/test_schema_views_and_lookups.py b/policyengine_us_data/tests/test_schema_views_and_lookups.py index 14521a214..b2a73214a 100644 --- a/policyengine_us_data/tests/test_schema_views_and_lookups.py +++ b/policyengine_us_data/tests/test_schema_views_and_lookups.py @@ -246,7 +246,7 @@ def test_geographic_stratum_excluded(self): domain_stratum_ids = {r[0] for r in rows} self.assertTrue( domain_stratum_ids.isdisjoint(geo_ids), - "Geographic strata should not appear in " "stratum_domain", + "Geographic strata should not appear in stratum_domain", ) def test_single_domain_variable(self): @@ -280,7 +280,7 @@ def test_geographic_constraints_filtered(self): } self.assertTrue( all_domain_vars.isdisjoint(excluded), - f"Found excluded vars: " f"{all_domain_vars & excluded}", + f"Found excluded vars: {all_domain_vars & excluded}", ) # ---------------------------------------------------------------- diff --git a/policyengine_us_data/tests/test_stochastic_variables.py b/policyengine_us_data/tests/test_stochastic_variables.py index 172260784..b9ab13466 100644 --- a/policyengine_us_data/tests/test_stochastic_variables.py +++ b/policyengine_us_data/tests/test_stochastic_variables.py @@ -10,7 +10,6 @@ class TestTakeUpRateParameters: - def test_eitc_rate_loads(self): rates = load_take_up_rate("eitc", 2022) assert isinstance(rates, dict) @@ -52,7 +51,6 @@ def test_ssi_takeup_rate_loads(self): class TestStableStringHash: - def test_deterministic(self): h1 = _stable_string_hash("takes_up_snap_if_eligible") h2 = _stable_string_hash("takes_up_snap_if_eligible") @@ -69,7 +67,6 @@ def test_returns_uint64(self): class TestSeededRng: - def test_same_name_same_results(self): rng1 = seeded_rng("takes_up_snap_if_eligible") result1 = rng1.random(1000) @@ -103,7 +100,6 @@ def test_order_independence(self): class TestTakeUpProportions: - def test_take_up_produces_expected_proportion(self): rate = 0.7 n = 10_000 diff --git a/policyengine_us_data/utils/l0.py b/policyengine_us_data/utils/l0.py index 3dd9e0145..a1d1a5a0d 100644 --- a/policyengine_us_data/utils/l0.py +++ b/policyengine_us_data/utils/l0.py @@ -191,11 +191,11 @@ def train_with_l0(model, train_loader, epochs=10, l0_lambda=1e-3): if epoch % 1 == 0: sparsity_stats = model.get_sparsity_stats() logging.info( - f"Epoch {epoch}: Loss={total_loss/len(train_loader):.4f}, L0={total_l0/len(train_loader):.4f}" + f"Epoch {epoch}: Loss={total_loss / len(train_loader):.4f}, L0={total_l0 / len(train_loader):.4f}" ) for layer, stats in sparsity_stats.items(): logging.info( - f" {layer}: {stats['sparsity']*100:.1f}% sparse, {stats['active_params']:.1f} active params" + f" {layer}: {stats['sparsity'] * 100:.1f}% sparse, {stats['active_params']:.1f} active params" ) diff --git a/policyengine_us_data/utils/loss.py b/policyengine_us_data/utils/loss.py index d7410d2eb..c188e71e0 100644 --- a/policyengine_us_data/utils/loss.py +++ b/policyengine_us_data/utils/loss.py @@ -101,10 +101,10 @@ def fmt(x): if x < 1e3: return f"{x:.0f}" if x < 1e6: - return f"{x/1e3:.0f}k" + return f"{x / 1e3:.0f}k" if x < 1e9: - return f"{x/1e6:.0f}m" - return f"{x/1e9:.1f}bn" + return f"{x / 1e6:.0f}m" + return f"{x / 1e9:.1f}bn" def build_loss_matrix(dataset: type, time_period): @@ -325,18 +325,14 @@ def build_loss_matrix(dataset: type, time_period): loss_matrix["nation/treasury/eitc"] = sim.calculate( "eitc", map_to="household" ).values - eitc_spending = ( - sim.tax_benefit_system.parameters.calibration.gov.treasury.tax_expenditures.eitc - ) + eitc_spending = sim.tax_benefit_system.parameters.calibration.gov.treasury.tax_expenditures.eitc targets_array.append(eitc_spending(time_period)) # IRS EITC filers and totals by child counts eitc_stats = pd.read_csv(CALIBRATION_FOLDER / "eitc.csv") eitc_spending_uprating = eitc_spending(time_period) / eitc_spending(2021) - population = ( - sim.tax_benefit_system.parameters.calibration.gov.census.populations.total - ) + population = sim.tax_benefit_system.parameters.calibration.gov.census.populations.total population_uprating = population(time_period) / population(2021) for _, row in eitc_stats.iterrows(): @@ -439,7 +435,7 @@ def build_loss_matrix(dataset: type, time_period): "other_medical_expenses", "medicare_part_b_premiums", ]: - label = f"nation/census/{expense_type}/age_{age_lower_bound}_to_{age_lower_bound+9}" + label = f"nation/census/{expense_type}/age_{age_lower_bound}_to_{age_lower_bound + 9}" value = sim.calculate(expense_type).values loss_matrix[label] = sim.map_result( in_age_range * value, "person", "household" diff --git a/pyproject.toml b/pyproject.toml index 95ada2a35..22ca72dc5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -55,7 +55,7 @@ l0 = [ [dependency-groups] dev = [ - "black", + "ruff>=0.9.0", "pytest", "quantile-forest", "tabulate", @@ -82,23 +82,8 @@ testpaths = [ "policyengine_us_data/tests", ] -[tool.black] +[tool.ruff] line-length = 79 -target-version = ['py311', 'py312', 'py313'] -include = '\.pyi?$' -extend-exclude = ''' -/( - # directories - \.eggs - | \.git - | \.hg - | \.mypy_cache - | \.tox - | \.venv - | build - | dist -)/ -''' [tool.towncrier] package = "policyengine_us_data" diff --git a/tests/test_h6_reform.py b/tests/test_h6_reform.py index e68ed8db3..4a25581c1 100644 --- a/tests/test_h6_reform.py +++ b/tests/test_h6_reform.py @@ -145,9 +145,9 @@ def test_single_crossover_starts_2046(self): # 2046+: crossover for year in range(2046, 2054): oasdi_single, _ = calculate_oasdi_thresholds(year) - assert needs_crossover_swap( - oasdi_single, HI_SINGLE - ), f"Year {year}" + assert needs_crossover_swap(oasdi_single, HI_SINGLE), ( + f"Year {year}" + ) class TestH6ThresholdSwapping: diff --git a/tests/test_reproducibility.py b/tests/test_reproducibility.py index 1ec097a7b..25755f0a6 100644 --- a/tests/test_reproducibility.py +++ b/tests/test_reproducibility.py @@ -144,9 +144,9 @@ def test_output_checksums(self): if file_path.exists() and filename != "checksums.txt": with open(file_path, "rb") as f: actual_checksum = hashlib.sha256(f.read()).hexdigest() - assert ( - actual_checksum == expected_checksum - ), f"Checksum mismatch for {filename}" + assert actual_checksum == expected_checksum, ( + f"Checksum mismatch for {filename}" + ) def test_memory_usage(self): """Test that memory usage stays within bounds.""" diff --git a/tests/test_weeks_unemployed.py b/tests/test_weeks_unemployed.py index 18aa47629..d64d8b64c 100644 --- a/tests/test_weeks_unemployed.py +++ b/tests/test_weeks_unemployed.py @@ -21,9 +21,9 @@ def test_lkweeks_in_person_columns(self): # Check for correct variable assert '"LKWEEKS"' in content, "LKWEEKS should be in PERSON_COLUMNS" - assert ( - '"WKSUNEM"' not in content - ), "WKSUNEM should not be in PERSON_COLUMNS (Census uses LKWEEKS)" + assert '"WKSUNEM"' not in content, ( + "WKSUNEM should not be in PERSON_COLUMNS (Census uses LKWEEKS)" + ) def test_cps_uses_lkweeks(self): """Test that cps.py uses LKWEEKS, not WKSUNEM.""" diff --git a/uv.lock b/uv.lock index 11179f708..044161b89 100644 --- a/uv.lock +++ b/uv.lock @@ -167,33 +167,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/1a/39/47f9197bdd44df24d67ac8893641e16f386c984a0619ef2ee4c51fbbc019/beautifulsoup4-4.14.3-py3-none-any.whl", hash = "sha256:0918bfe44902e6ad8d57732ba310582e98da931428d231a5ecb9e7c703a735bb", size = 107721, upload-time = "2025-11-30T15:08:24.087Z" }, ] -[[package]] -name = "black" -version = "25.12.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "click" }, - { name = "mypy-extensions" }, - { name = "packaging" }, - { name = "pathspec" }, - { name = "platformdirs" }, - { name = "pytokens" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/c4/d9/07b458a3f1c525ac392b5edc6b191ff140b596f9d77092429417a54e249d/black-25.12.0.tar.gz", hash = "sha256:8d3dd9cea14bff7ddc0eb243c811cdb1a011ebb4800a5f0335a01a68654796a7", size = 659264, upload-time = "2025-12-08T01:40:52.501Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/d1/bd/26083f805115db17fda9877b3c7321d08c647df39d0df4c4ca8f8450593e/black-25.12.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:31f96b7c98c1ddaeb07dc0f56c652e25bdedaac76d5b68a059d998b57c55594a", size = 1924178, upload-time = "2025-12-08T01:49:51.048Z" }, - { url = "https://files.pythonhosted.org/packages/89/6b/ea00d6651561e2bdd9231c4177f4f2ae19cc13a0b0574f47602a7519b6ca/black-25.12.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:05dd459a19e218078a1f98178c13f861fe6a9a5f88fc969ca4d9b49eb1809783", size = 1742643, upload-time = "2025-12-08T01:49:59.09Z" }, - { url = "https://files.pythonhosted.org/packages/6d/f3/360fa4182e36e9875fabcf3a9717db9d27a8d11870f21cff97725c54f35b/black-25.12.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c1f68c5eff61f226934be6b5b80296cf6939e5d2f0c2f7d543ea08b204bfaf59", size = 1800158, upload-time = "2025-12-08T01:44:27.301Z" }, - { url = "https://files.pythonhosted.org/packages/f8/08/2c64830cb6616278067e040acca21d4f79727b23077633953081c9445d61/black-25.12.0-cp312-cp312-win_amd64.whl", hash = "sha256:274f940c147ddab4442d316b27f9e332ca586d39c85ecf59ebdea82cc9ee8892", size = 1426197, upload-time = "2025-12-08T01:45:51.198Z" }, - { url = "https://files.pythonhosted.org/packages/d4/60/a93f55fd9b9816b7432cf6842f0e3000fdd5b7869492a04b9011a133ee37/black-25.12.0-cp312-cp312-win_arm64.whl", hash = "sha256:169506ba91ef21e2e0591563deda7f00030cb466e747c4b09cb0a9dae5db2f43", size = 1237266, upload-time = "2025-12-08T01:45:10.556Z" }, - { url = "https://files.pythonhosted.org/packages/c8/52/c551e36bc95495d2aa1a37d50566267aa47608c81a53f91daa809e03293f/black-25.12.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:a05ddeb656534c3e27a05a29196c962877c83fa5503db89e68857d1161ad08a5", size = 1923809, upload-time = "2025-12-08T01:46:55.126Z" }, - { url = "https://files.pythonhosted.org/packages/a0/f7/aac9b014140ee56d247e707af8db0aae2e9efc28d4a8aba92d0abd7ae9d1/black-25.12.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:9ec77439ef3e34896995503865a85732c94396edcc739f302c5673a2315e1e7f", size = 1742384, upload-time = "2025-12-08T01:49:37.022Z" }, - { url = "https://files.pythonhosted.org/packages/74/98/38aaa018b2ab06a863974c12b14a6266badc192b20603a81b738c47e902e/black-25.12.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0e509c858adf63aa61d908061b52e580c40eae0dfa72415fa47ac01b12e29baf", size = 1798761, upload-time = "2025-12-08T01:46:05.386Z" }, - { url = "https://files.pythonhosted.org/packages/16/3a/a8ac542125f61574a3f015b521ca83b47321ed19bb63fe6d7560f348bfe1/black-25.12.0-cp313-cp313-win_amd64.whl", hash = "sha256:252678f07f5bac4ff0d0e9b261fbb029fa530cfa206d0a636a34ab445ef8ca9d", size = 1429180, upload-time = "2025-12-08T01:45:34.903Z" }, - { url = "https://files.pythonhosted.org/packages/e6/2d/bdc466a3db9145e946762d52cd55b1385509d9f9004fec1c97bdc8debbfb/black-25.12.0-cp313-cp313-win_arm64.whl", hash = "sha256:bc5b1c09fe3c931ddd20ee548511c64ebf964ada7e6f0763d443947fd1c603ce", size = 1239350, upload-time = "2025-12-08T01:46:09.458Z" }, - { url = "https://files.pythonhosted.org/packages/68/11/21331aed19145a952ad28fca2756a1433ee9308079bd03bd898e903a2e53/black-25.12.0-py3-none-any.whl", hash = "sha256:48ceb36c16dbc84062740049eef990bb2ce07598272e673c17d1a7720c71c828", size = 206191, upload-time = "2025-12-08T01:40:50.963Z" }, -] - [[package]] name = "bleach" version = "6.3.0" @@ -637,6 +610,7 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/f8/0a/a3871375c7b9727edaeeea994bfff7c63ff7804c9829c19309ba2e058807/greenlet-3.3.0-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:b01548f6e0b9e9784a2c99c5651e5dc89ffcbe870bc5fb2e5ef864e9cc6b5dcb", size = 276379, upload-time = "2025-12-04T14:23:30.498Z" }, { url = "https://files.pythonhosted.org/packages/43/ab/7ebfe34dce8b87be0d11dae91acbf76f7b8246bf9d6b319c741f99fa59c6/greenlet-3.3.0-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:349345b770dc88f81506c6861d22a6ccd422207829d2c854ae2af8025af303e3", size = 597294, upload-time = "2025-12-04T14:50:06.847Z" }, { url = "https://files.pythonhosted.org/packages/a4/39/f1c8da50024feecd0793dbd5e08f526809b8ab5609224a2da40aad3a7641/greenlet-3.3.0-cp312-cp312-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:e8e18ed6995e9e2c0b4ed264d2cf89260ab3ac7e13555b8032b25a74c6d18655", size = 607742, upload-time = "2025-12-04T14:57:42.349Z" }, + { url = "https://files.pythonhosted.org/packages/77/cb/43692bcd5f7a0da6ec0ec6d58ee7cddb606d055ce94a62ac9b1aa481e969/greenlet-3.3.0-cp312-cp312-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:c024b1e5696626890038e34f76140ed1daf858e37496d33f2af57f06189e70d7", size = 622297, upload-time = "2025-12-04T15:07:13.552Z" }, { url = "https://files.pythonhosted.org/packages/75/b0/6bde0b1011a60782108c01de5913c588cf51a839174538d266de15e4bf4d/greenlet-3.3.0-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:047ab3df20ede6a57c35c14bf5200fcf04039d50f908270d3f9a7a82064f543b", size = 609885, upload-time = "2025-12-04T14:26:02.368Z" }, { url = "https://files.pythonhosted.org/packages/49/0e/49b46ac39f931f59f987b7cd9f34bfec8ef81d2a1e6e00682f55be5de9f4/greenlet-3.3.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:2d9ad37fc657b1102ec880e637cccf20191581f75c64087a549e66c57e1ceb53", size = 1567424, upload-time = "2025-12-04T15:04:23.757Z" }, { url = "https://files.pythonhosted.org/packages/05/f5/49a9ac2dff7f10091935def9165c90236d8f175afb27cbed38fb1d61ab6b/greenlet-3.3.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:83cd0e36932e0e7f36a64b732a6f60c2fc2df28c351bae79fbaf4f8092fe7614", size = 1636017, upload-time = "2025-12-04T14:27:29.688Z" }, @@ -644,6 +618,7 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/02/2f/28592176381b9ab2cafa12829ba7b472d177f3acc35d8fbcf3673d966fff/greenlet-3.3.0-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:a1e41a81c7e2825822f4e068c48cb2196002362619e2d70b148f20a831c00739", size = 275140, upload-time = "2025-12-04T14:23:01.282Z" }, { url = "https://files.pythonhosted.org/packages/2c/80/fbe937bf81e9fca98c981fe499e59a3f45df2a04da0baa5c2be0dca0d329/greenlet-3.3.0-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9f515a47d02da4d30caaa85b69474cec77b7929b2e936ff7fb853d42f4bf8808", size = 599219, upload-time = "2025-12-04T14:50:08.309Z" }, { url = "https://files.pythonhosted.org/packages/c2/ff/7c985128f0514271b8268476af89aee6866df5eec04ac17dcfbc676213df/greenlet-3.3.0-cp313-cp313-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:7d2d9fd66bfadf230b385fdc90426fcd6eb64db54b40c495b72ac0feb5766c54", size = 610211, upload-time = "2025-12-04T14:57:43.968Z" }, + { url = "https://files.pythonhosted.org/packages/79/07/c47a82d881319ec18a4510bb30463ed6891f2ad2c1901ed5ec23d3de351f/greenlet-3.3.0-cp313-cp313-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:30a6e28487a790417d036088b3bcb3f3ac7d8babaa7d0139edbaddebf3af9492", size = 624311, upload-time = "2025-12-04T15:07:14.697Z" }, { url = "https://files.pythonhosted.org/packages/fd/8e/424b8c6e78bd9837d14ff7df01a9829fc883ba2ab4ea787d4f848435f23f/greenlet-3.3.0-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:087ea5e004437321508a8d6f20efc4cfec5e3c30118e1417ea96ed1d93950527", size = 612833, upload-time = "2025-12-04T14:26:03.669Z" }, { url = "https://files.pythonhosted.org/packages/b5/ba/56699ff9b7c76ca12f1cdc27a886d0f81f2189c3455ff9f65246780f713d/greenlet-3.3.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:ab97cf74045343f6c60a39913fa59710e4bd26a536ce7ab2397adf8b27e67c39", size = 1567256, upload-time = "2025-12-04T15:04:25.276Z" }, { url = "https://files.pythonhosted.org/packages/1e/37/f31136132967982d698c71a281a8901daf1a8fbab935dce7c0cf15f942cc/greenlet-3.3.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:5375d2e23184629112ca1ea89a53389dddbffcf417dad40125713d88eb5f96e8", size = 1636483, upload-time = "2025-12-04T14:27:30.804Z" }, @@ -1252,15 +1227,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e5/db/0314e4e2db56ebcf450f277904ffd84a7988b9e5da8d0d61ab2d057df2b6/msgpack-1.1.2-cp313-cp313-win_arm64.whl", hash = "sha256:e69b39f8c0aa5ec24b57737ebee40be647035158f14ed4b40e6f150077e21a84", size = 64118, upload-time = "2025-10-08T09:15:23.402Z" }, ] -[[package]] -name = "mypy-extensions" -version = "1.1.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/a2/6e/371856a3fb9d31ca8dac321cda606860fa4548858c0cc45d9d1d4ca2628b/mypy_extensions-1.1.0.tar.gz", hash = "sha256:52e68efc3284861e772bbcd66823fde5ae21fd2fdb51c62a211403730b916558", size = 6343, upload-time = "2025-04-22T14:54:24.164Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/79/7b/2c79738432f5c924bef5071f933bcc9efd0473bac3b4aa584a6f7c1c8df8/mypy_extensions-1.1.0-py3-none-any.whl", hash = "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505", size = 4963, upload-time = "2025-04-22T14:54:22.983Z" }, -] - [[package]] name = "mystmd" version = "1.7.1" @@ -1697,15 +1663,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/78/f9/690a8600b93c332de3ab4a344a4ac34f00c8f104917061f779db6a918ed6/pathlib-1.0.1-py3-none-any.whl", hash = "sha256:f35f95ab8b0f59e6d354090350b44a80a80635d22efdedfa84c7ad1cf0a74147", size = 14363, upload-time = "2022-05-04T13:37:20.585Z" }, ] -[[package]] -name = "pathspec" -version = "1.0.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/28/2e/83722ece0f6ee24387d6cb830dd562ddbcd6ce0b9d76072c6849670c31b4/pathspec-1.0.1.tar.gz", hash = "sha256:e2769b508d0dd47b09af6ee2c75b2744a2cb1f474ae4b1494fd6a1b7a841613c", size = 129791, upload-time = "2026-01-06T13:02:55.15Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/d2/fe/2257c71721aeab6a6e8aa1f00d01f2a20f58547d249a6c8fef5791f559fc/pathspec-1.0.1-py3-none-any.whl", hash = "sha256:8870061f22c58e6d83463cfce9a7dd6eca0512c772c1001fb09ac64091816721", size = 54584, upload-time = "2026-01-06T13:02:53.601Z" }, -] - [[package]] name = "patsy" version = "1.0.2" @@ -1894,7 +1851,6 @@ l0 = [ [package.dev-dependencies] dev = [ - { name = "black" }, { name = "build" }, { name = "furo" }, { name = "itables" }, @@ -1902,6 +1858,7 @@ dev = [ { name = "mystmd" }, { name = "pytest" }, { name = "quantile-forest" }, + { name = "ruff" }, { name = "tabulate" }, { name = "tomli" }, { name = "towncrier" }, @@ -1939,7 +1896,6 @@ provides-extras = ["calibration", "l0"] [package.metadata.requires-dev] dev = [ - { name = "black" }, { name = "build" }, { name = "furo" }, { name = "itables" }, @@ -1947,6 +1903,7 @@ dev = [ { name = "mystmd", specifier = ">=1.7.0" }, { name = "pytest" }, { name = "quantile-forest" }, + { name = "ruff", specifier = ">=0.9.0" }, { name = "tabulate" }, { name = "tomli" }, { name = "towncrier", specifier = ">=24.8.0" }, @@ -2215,15 +2172,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/51/e5/fecf13f06e5e5f67e8837d777d1bc43fac0ed2b77a676804df5c34744727/python_json_logger-4.0.0-py3-none-any.whl", hash = "sha256:af09c9daf6a813aa4cc7180395f50f2a9e5fa056034c9953aec92e381c5ba1e2", size = 15548, upload-time = "2025-10-06T04:15:17.553Z" }, ] -[[package]] -name = "pytokens" -version = "0.3.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/4e/8d/a762be14dae1c3bf280202ba3172020b2b0b4c537f94427435f19c413b72/pytokens-0.3.0.tar.gz", hash = "sha256:2f932b14ed08de5fcf0b391ace2642f858f1394c0857202959000b68ed7a458a", size = 17644, upload-time = "2025-11-05T13:36:35.34Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/84/25/d9db8be44e205a124f6c98bc0324b2bb149b7431c53877fc6d1038dddaf5/pytokens-0.3.0-py3-none-any.whl", hash = "sha256:95b2b5eaf832e469d141a378872480ede3f251a5a5041b8ec6e581d3ac71bbf3", size = 12195, upload-time = "2025-11-05T13:36:33.183Z" }, -] - [[package]] name = "pytz" version = "2025.2" @@ -2477,6 +2425,31 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/64/8d/0133e4eb4beed9e425d9a98ed6e081a55d195481b7632472be1af08d2f6b/rsa-4.9.1-py3-none-any.whl", hash = "sha256:68635866661c6836b8d39430f97a996acbd61bfa49406748ea243539fe239762", size = 34696, upload-time = "2025-04-16T09:51:17.142Z" }, ] +[[package]] +name = "ruff" +version = "0.15.5" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/77/9b/840e0039e65fcf12758adf684d2289024d6140cde9268cc59887dc55189c/ruff-0.15.5.tar.gz", hash = "sha256:7c3601d3b6d76dce18c5c824fc8d06f4eef33d6df0c21ec7799510cde0f159a2", size = 4574214, upload-time = "2026-03-05T20:06:34.946Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/47/20/5369c3ce21588c708bcbe517a8fbe1a8dfdb5dfd5137e14790b1da71612c/ruff-0.15.5-py3-none-linux_armv6l.whl", hash = "sha256:4ae44c42281f42e3b06b988e442d344a5b9b72450ff3c892e30d11b29a96a57c", size = 10478185, upload-time = "2026-03-05T20:06:29.093Z" }, + { url = "https://files.pythonhosted.org/packages/44/ed/e81dd668547da281e5dce710cf0bc60193f8d3d43833e8241d006720e42b/ruff-0.15.5-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:6edd3792d408ebcf61adabc01822da687579a1a023f297618ac27a5b51ef0080", size = 10859201, upload-time = "2026-03-05T20:06:32.632Z" }, + { url = "https://files.pythonhosted.org/packages/c4/8f/533075f00aaf19b07c5cd6aa6e5d89424b06b3b3f4583bfa9c640a079059/ruff-0.15.5-py3-none-macosx_11_0_arm64.whl", hash = "sha256:89f463f7c8205a9f8dea9d658d59eff49db05f88f89cc3047fb1a02d9f344010", size = 10184752, upload-time = "2026-03-05T20:06:40.312Z" }, + { url = "https://files.pythonhosted.org/packages/66/0e/ba49e2c3fa0395b3152bad634c7432f7edfc509c133b8f4529053ff024fb/ruff-0.15.5-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ba786a8295c6574c1116704cf0b9e6563de3432ac888d8f83685654fe528fd65", size = 10534857, upload-time = "2026-03-05T20:06:19.581Z" }, + { url = "https://files.pythonhosted.org/packages/59/71/39234440f27a226475a0659561adb0d784b4d247dfe7f43ffc12dd02e288/ruff-0.15.5-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:fd4b801e57955fe9f02b31d20375ab3a5c4415f2e5105b79fb94cf2642c91440", size = 10309120, upload-time = "2026-03-05T20:06:00.435Z" }, + { url = "https://files.pythonhosted.org/packages/f5/87/4140aa86a93df032156982b726f4952aaec4a883bb98cb6ef73c347da253/ruff-0.15.5-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:391f7c73388f3d8c11b794dbbc2959a5b5afe66642c142a6effa90b45f6f5204", size = 11047428, upload-time = "2026-03-05T20:05:51.867Z" }, + { url = "https://files.pythonhosted.org/packages/5a/f7/4953e7e3287676f78fbe85e3a0ca414c5ca81237b7575bdadc00229ac240/ruff-0.15.5-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8dc18f30302e379fe1e998548b0f5e9f4dff907f52f73ad6da419ea9c19d66c8", size = 11914251, upload-time = "2026-03-05T20:06:22.887Z" }, + { url = "https://files.pythonhosted.org/packages/77/46/0f7c865c10cf896ccf5a939c3e84e1cfaeed608ff5249584799a74d33835/ruff-0.15.5-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1cc6e7f90087e2d27f98dc34ed1b3ab7c8f0d273cc5431415454e22c0bd2a681", size = 11333801, upload-time = "2026-03-05T20:05:57.168Z" }, + { url = "https://files.pythonhosted.org/packages/d3/01/a10fe54b653061585e655f5286c2662ebddb68831ed3eaebfb0eb08c0a16/ruff-0.15.5-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c1cb7169f53c1ddb06e71a9aebd7e98fc0fea936b39afb36d8e86d36ecc2636a", size = 11206821, upload-time = "2026-03-05T20:06:03.441Z" }, + { url = "https://files.pythonhosted.org/packages/7a/0d/2132ceaf20c5e8699aa83da2706ecb5c5dcdf78b453f77edca7fb70f8a93/ruff-0.15.5-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:9b037924500a31ee17389b5c8c4d88874cc6ea8e42f12e9c61a3d754ff72f1ca", size = 11133326, upload-time = "2026-03-05T20:06:25.655Z" }, + { url = "https://files.pythonhosted.org/packages/72/cb/2e5259a7eb2a0f87c08c0fe5bf5825a1e4b90883a52685524596bfc93072/ruff-0.15.5-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:65bb414e5b4eadd95a8c1e4804f6772bbe8995889f203a01f77ddf2d790929dd", size = 10510820, upload-time = "2026-03-05T20:06:37.79Z" }, + { url = "https://files.pythonhosted.org/packages/ff/20/b67ce78f9e6c59ffbdb5b4503d0090e749b5f2d31b599b554698a80d861c/ruff-0.15.5-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:d20aa469ae3b57033519c559e9bc9cd9e782842e39be05b50e852c7c981fa01d", size = 10302395, upload-time = "2026-03-05T20:05:54.504Z" }, + { url = "https://files.pythonhosted.org/packages/5f/e5/719f1acccd31b720d477751558ed74e9c88134adcc377e5e886af89d3072/ruff-0.15.5-py3-none-musllinux_1_2_i686.whl", hash = "sha256:15388dd28c9161cdb8eda68993533acc870aa4e646a0a277aa166de9ad5a8752", size = 10754069, upload-time = "2026-03-05T20:06:06.422Z" }, + { url = "https://files.pythonhosted.org/packages/c3/9c/d1db14469e32d98f3ca27079dbd30b7b44dbb5317d06ab36718dee3baf03/ruff-0.15.5-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:b30da330cbd03bed0c21420b6b953158f60c74c54c5f4c1dabbdf3a57bf355d2", size = 11304315, upload-time = "2026-03-05T20:06:10.867Z" }, + { url = "https://files.pythonhosted.org/packages/28/3a/950367aee7c69027f4f422059227b290ed780366b6aecee5de5039d50fa8/ruff-0.15.5-py3-none-win32.whl", hash = "sha256:732e5ee1f98ba5b3679029989a06ca39a950cced52143a0ea82a2102cb592b74", size = 10551676, upload-time = "2026-03-05T20:06:13.705Z" }, + { url = "https://files.pythonhosted.org/packages/b8/00/bf077a505b4e649bdd3c47ff8ec967735ce2544c8e4a43aba42ee9bf935d/ruff-0.15.5-py3-none-win_amd64.whl", hash = "sha256:821d41c5fa9e19117616c35eaa3f4b75046ec76c65e7ae20a333e9a8696bc7fe", size = 11678972, upload-time = "2026-03-05T20:06:45.379Z" }, + { url = "https://files.pythonhosted.org/packages/fe/4e/cd76eca6db6115604b7626668e891c9dd03330384082e33662fb0f113614/ruff-0.15.5-py3-none-win_arm64.whl", hash = "sha256:b498d1c60d2fe5c10c45ec3f698901065772730b411f164ae270bb6bfcc4740b", size = 10965572, upload-time = "2026-03-05T20:06:16.984Z" }, +] + [[package]] name = "samplics" version = "0.4.55" diff --git a/validation/generate_qrf_statistics.py b/validation/generate_qrf_statistics.py index 87d43a54a..4a026dea4 100644 --- a/validation/generate_qrf_statistics.py +++ b/validation/generate_qrf_statistics.py @@ -243,7 +243,7 @@ print("\n\n2. VARIANCE EXPLAINED BY PREDICTORS") print("-" * 40) for var, r2 in variance_explained.items(): - print(f"- {var.replace('_', ' ').title()}: {r2*100:.0f}%") + print(f"- {var.replace('_', ' ').title()}: {r2 * 100:.0f}%") # 3. Out-of-Sample Accuracy print("\n\n3. OUT-OF-SAMPLE PREDICTION ACCURACY") @@ -308,7 +308,7 @@ f.write("Variance Explained by Predictors (R-squared)\n") f.write("=" * 40 + "\n\n") for var, r2 in variance_explained.items(): - f.write(f"{var.replace('_', ' ').title()}: {r2*100:.0f}%\n") + f.write(f"{var.replace('_', ' ').title()}: {r2 * 100:.0f}%\n") print( "✓ Saved variance explained to validation/outputs/variance_explained.txt" ) @@ -337,7 +337,7 @@ f.write("2. VARIANCE EXPLAINED\n") f.write("-" * 40 + "\n") for var, r2 in variance_explained.items(): - f.write(f"{var.replace('_', ' ').title()}: {r2*100:.0f}%\n") + f.write(f"{var.replace('_', ' ').title()}: {r2 * 100:.0f}%\n") f.write("\n3. OUT-OF-SAMPLE ACCURACY\n") f.write("-" * 40 + "\n") diff --git a/validation/qrf_diagnostics.py b/validation/qrf_diagnostics.py index dcd23b5ac..4e572916e 100644 --- a/validation/qrf_diagnostics.py +++ b/validation/qrf_diagnostics.py @@ -92,7 +92,7 @@ def validate_qrf_accuracy(puf_data, predictors, target_vars, n_estimators=100): for q in quantiles: pred = qrf.predict(X_test, quantiles=[q]) - predictions[f"q{int(q*100)}"] = pred.flatten() + predictions[f"q{int(q * 100)}"] = pred.flatten() # Calculate metrics median_pred = predictions["q50"] diff --git a/validation/run_qrf_diagnostics.py b/validation/run_qrf_diagnostics.py index dae400597..b39b16f5b 100644 --- a/validation/run_qrf_diagnostics.py +++ b/validation/run_qrf_diagnostics.py @@ -225,7 +225,7 @@ def main(): for display_name, actual_name in target_map.items(): if actual_name in variance_results: print( - f"- {display_name.capitalize()}: {variance_results[actual_name]*100:.0f}%" + f"- {display_name.capitalize()}: {variance_results[actual_name] * 100:.0f}%" ) # 3. Joint distribution preservation @@ -281,7 +281,7 @@ def main(): for display_name, actual_name in target_map.items(): if actual_name in variance_results: f.write( - f"{display_name.capitalize()}: {variance_results[actual_name]*100:.0f}%\n" + f"{display_name.capitalize()}: {variance_results[actual_name] * 100:.0f}%\n" ) print( "✓ Saved variance explained results to validation/outputs/variance_explained.txt" @@ -321,7 +321,7 @@ def main(): for display_name, actual_name in target_map.items(): if actual_name in variance_results: f.write( - f"{display_name.capitalize()}: {variance_results[actual_name]*100:.0f}%\n" + f"{display_name.capitalize()}: {variance_results[actual_name] * 100:.0f}%\n" ) if valid_pairs: diff --git a/validation/validate_retirement_imputation.py b/validation/validate_retirement_imputation.py index f57441751..6a11eafd2 100644 --- a/validation/validate_retirement_imputation.py +++ b/validation/validate_retirement_imputation.py @@ -80,7 +80,7 @@ def validate_constraints(sim) -> list: n_over_cap = (vals > max_401k + 1).sum() if n_over_cap > 0: issues.append( - f"FAIL: {var} has {n_over_cap} values exceeding " f"401k cap" + f"FAIL: {var} has {n_over_cap} values exceeding 401k cap" ) zero_wage = emp_income == 0 @@ -111,7 +111,7 @@ def validate_constraints(sim) -> list: n_over_cap = (vals > max_ira + 1).sum() if n_over_cap > 0: issues.append( - f"FAIL: {var} has {n_over_cap} values exceeding " f"IRA cap" + f"FAIL: {var} has {n_over_cap} values exceeding IRA cap" ) # SE pension constraint @@ -168,8 +168,8 @@ def validate_aggregates(sim) -> list: if ratio < 0.1 or ratio > 5.0: issues.append( f"WARNING: {var} weighted sum " - f"${weighted_sum/1e9:.1f}B is far from " - f"target ${target/1e9:.1f}B " + f"${weighted_sum / 1e9:.1f}B is far from " + f"target ${target / 1e9:.1f}B " f"(ratio={ratio:.2f})" )