From 850f6be476212e0a1b6fd3c12c746e5007f37fdd Mon Sep 17 00:00:00 2001 From: Max Ghenis Date: Fri, 6 Mar 2026 07:53:05 -0500 Subject: [PATCH 1/2] Switch code formatter from Black to Ruff Replace Black with Ruff for code formatting across pyproject.toml, Makefile, and all Python source files. Uses Ruff defaults (88 char line length) with no custom configuration. Co-Authored-By: Claude Opus 4.6 --- .github/bump_version.py | 4 +- Makefile | 2 +- changelog.d/switch-to-ruff.changed | 1 + docs/LA_methodology.ipynb | 15 +- docs/constituency_methodology.ipynb | 15 +- docs/methodology.ipynb | 161 +++++++++--------- docs/validation/constituencies.ipynb | 71 ++++++-- docs/validation/local_authorities.ipynb | 68 ++++++-- docs/validation/national.ipynb | 7 +- .../datasets/childcare/takeup_rate.py | 27 +-- .../datasets/create_datasets.py | 4 +- policyengine_uk_data/datasets/frs.py | 148 ++++------------ .../datasets/imputations/capital_gains.py | 16 +- .../datasets/imputations/consumption.py | 26 +-- .../datasets/imputations/salary_sacrifice.py | 28 +-- .../datasets/imputations/services/etb.py | 8 +- .../datasets/imputations/services/nhs.py | 14 +- .../datasets/imputations/services/services.py | 4 +- .../boundary_changes/mapping_matrix.py | 4 +- .../local_areas/constituencies/calibrate.py | 16 +- .../local_areas/constituencies/loss.py | 16 +- .../targets/create_employment_incomes.py | 43 ++--- .../targets/fill_missing_age_demographics.py | 4 +- .../local_authorities/calibrate.py | 22 +-- .../local_areas/local_authorities/loss.py | 22 +-- .../targets/create_employment_incomes.py | 54 ++---- policyengine_uk_data/datasets/spi.py | 13 +- policyengine_uk_data/parameters/__init__.py | 8 +- .../targets/build_loss_matrix.py | 16 +- .../targets/compute/benefits.py | 32 +--- .../targets/compute/demographics.py | 10 +- .../targets/compute/households.py | 26 ++- .../targets/compute/income.py | 20 +-- policyengine_uk_data/targets/registry.py | 4 +- .../targets/sources/_common.py | 2 +- .../targets/sources/hmrc_salary_sacrifice.py | 11 +- .../targets/sources/hmrc_spi.py | 8 +- .../targets/sources/local_age.py | 12 +- .../targets/sources/local_income.py | 12 +- .../targets/sources/local_la_extras.py | 4 +- policyengine_uk_data/targets/sources/obr.py | 25 +-- .../targets/sources/ons_demographics.py | 8 +- .../targets/sources/ons_savings.py | 8 +- .../microsimulation/test_reform_impacts.py | 16 +- .../microsimulation/update_reform_impacts.py | 12 +- policyengine_uk_data/tests/test_aggregates.py | 10 +- .../tests/test_child_limit.py | 33 ++-- policyengine_uk_data/tests/test_childcare.py | 24 +-- .../tests/test_low_income_deciles.py | 4 +- .../tests/test_non_negative_incomes.py | 4 +- ...sion_contributions_via_salary_sacrifice.py | 16 +- policyengine_uk_data/tests/test_population.py | 6 +- .../tests/test_property_purchased.py | 20 +-- .../tests/test_salary_sacrifice_headcount.py | 12 +- .../tests/test_scotland_babies.py | 4 +- .../tests/test_scotland_uc_babies.py | 12 +- .../tests/test_target_registry.py | 8 +- .../tests/test_uc_by_children.py | 12 +- .../tests/test_vehicle_ownership.py | 8 +- policyengine_uk_data/utils/calibrate.py | 27 +-- policyengine_uk_data/utils/data_upload.py | 8 +- policyengine_uk_data/utils/datasets.py | 20 +-- policyengine_uk_data/utils/huggingface.py | 4 +- .../utils/incomes_projection.py | 30 +--- policyengine_uk_data/utils/loss.py | 4 +- policyengine_uk_data/utils/progress.py | 12 +- policyengine_uk_data/utils/qrf.py | 4 +- policyengine_uk_data/utils/spi.py | 8 +- policyengine_uk_data/utils/stack.py | 4 +- policyengine_uk_data/utils/uc_data.py | 12 +- pyproject.toml | 22 +-- uv.lock | 108 ++++++------ 72 files changed, 576 insertions(+), 907 deletions(-) create mode 100644 changelog.d/switch-to-ruff.changed diff --git a/.github/bump_version.py b/.github/bump_version.py index bb0fd6dd3..779a82e38 100644 --- a/.github/bump_version.py +++ b/.github/bump_version.py @@ -19,9 +19,7 @@ def get_current_version(pyproject_path: Path) -> str: def infer_bump(changelog_dir: Path) -> str: fragments = [ - f - for f in changelog_dir.iterdir() - if f.is_file() and f.name != ".gitkeep" + f for f in changelog_dir.iterdir() if f.is_file() and f.name != ".gitkeep" ] if not fragments: print("No changelog fragments found", file=sys.stderr) diff --git a/Makefile b/Makefile index 327895d36..64699a0d0 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ all: data test format: - black . -l 79 + ruff format . test: pytest . diff --git a/changelog.d/switch-to-ruff.changed b/changelog.d/switch-to-ruff.changed new file mode 100644 index 000000000..aeb771eb8 --- /dev/null +++ b/changelog.d/switch-to-ruff.changed @@ -0,0 +1 @@ +Switched code formatter from Black to Ruff. diff --git a/docs/LA_methodology.ipynb b/docs/LA_methodology.ipynb index 4b81035ac..46774f4b5 100644 --- a/docs/LA_methodology.ipynb +++ b/docs/LA_methodology.ipynb @@ -374,10 +374,13 @@ "import pandas as pd\n", "from itables import init_notebook_mode, show\n", "import itables.options as opt\n", + "\n", "opt.maxBytes = \"1MB\"\n", "init_notebook_mode(all_interactive=True)\n", "\n", - "pd.read_csv(\"../policyengine_uk_data/datasets/frs/local_areas/local_authorities/targets/total_income.csv\")" + "pd.read_csv(\n", + " \"../policyengine_uk_data/datasets/frs/local_areas/local_authorities/targets/total_income.csv\"\n", + ")" ] }, { @@ -818,10 +821,13 @@ "import pandas as pd\n", "from itables import init_notebook_mode, show\n", "import itables.options as opt\n", + "\n", "opt.maxBytes = \"1MB\"\n", "init_notebook_mode(all_interactive=True)\n", "\n", - "pd.read_csv(\"../policyengine_uk_data/datasets/frs/local_areas/local_authorities/targets/age.csv\")" + "pd.read_csv(\n", + " \"../policyengine_uk_data/datasets/frs/local_areas/local_authorities/targets/age.csv\"\n", + ")" ] }, { @@ -1188,10 +1194,13 @@ "import pandas as pd\n", "from itables import init_notebook_mode, show\n", "import itables.options as opt\n", + "\n", "opt.maxBytes = \"1MB\"\n", "init_notebook_mode(all_interactive=True)\n", "\n", - "pd.read_csv(\"../policyengine_uk_data/datasets/frs/local_areas/local_authorities/targets/employment_income.csv\")" + "pd.read_csv(\n", + " \"../policyengine_uk_data/datasets/frs/local_areas/local_authorities/targets/employment_income.csv\"\n", + ")" ] }, { diff --git a/docs/constituency_methodology.ipynb b/docs/constituency_methodology.ipynb index 8166105a7..9cacc36d3 100644 --- a/docs/constituency_methodology.ipynb +++ b/docs/constituency_methodology.ipynb @@ -374,10 +374,13 @@ "import pandas as pd\n", "from itables import init_notebook_mode, show\n", "import itables.options as opt\n", + "\n", "opt.maxBytes = \"1MB\"\n", "init_notebook_mode(all_interactive=True)\n", "\n", - "pd.read_csv(\"../policyengine_uk_data/datasets/frs/local_areas/constituencies/targets/total_income.csv\")" + "pd.read_csv(\n", + " \"../policyengine_uk_data/datasets/frs/local_areas/constituencies/targets/total_income.csv\"\n", + ")" ] }, { @@ -818,10 +821,13 @@ "import pandas as pd\n", "from itables import init_notebook_mode, show\n", "import itables.options as opt\n", + "\n", "opt.maxBytes = \"1MB\"\n", "init_notebook_mode(all_interactive=True)\n", "\n", - "pd.read_csv(\"../policyengine_uk_data/datasets/frs/local_areas/constituencies/targets/age.csv\")" + "pd.read_csv(\n", + " \"../policyengine_uk_data/datasets/frs/local_areas/constituencies/targets/age.csv\"\n", + ")" ] }, { @@ -1302,10 +1308,13 @@ "import pandas as pd\n", "from itables import init_notebook_mode, show\n", "import itables.options as opt\n", + "\n", "opt.maxBytes = \"1MB\"\n", "init_notebook_mode(all_interactive=True)\n", "\n", - "pd.read_csv(\"../policyengine_uk_data/datasets/frs/local_areas/constituencies/targets/employment_income.csv\")" + "pd.read_csv(\n", + " \"../policyengine_uk_data/datasets/frs/local_areas/constituencies/targets/employment_income.csv\"\n", + ")" ] }, { diff --git a/docs/methodology.ipynb b/docs/methodology.ipynb index 019359af7..892371e33 100644 --- a/docs/methodology.ipynb +++ b/docs/methodology.ipynb @@ -343,10 +343,12 @@ "import pandas as pd\n", "from itables import init_notebook_mode\n", "import itables.options as opt\n", + "\n", "opt.maxBytes = \"1MB\"\n", "\n", "init_notebook_mode(all_interactive=True)\n", "\n", + "\n", "def get_loss(dataset, reform, time_period):\n", " loss_results = get_loss_results(dataset, time_period, reform)\n", "\n", @@ -362,6 +364,7 @@ " loss_results[\"type\"] = loss_results.name.apply(get_type)\n", " return loss_results\n", "\n", + "\n", "reported_benefits = Reform.from_dict(\n", " {\n", " \"gov.contrib.policyengine.disable_simulated_benefits\": True,\n", @@ -414,10 +417,10 @@ 0.5348574279909283, 0.33769630678188683, 0.4724366479253982, - 0.056382436859752076, + 0.05638243685975208, 0.6323673982942971, 0.9366950828549806, - 0.11071012537714943, + 0.11071012537714944, 0.43224912766430945, 0.4932054498037529, 0.4262926636695862, @@ -428,7 +431,7 @@ 0.2753207784816197, 0.2723132074858013, 0.8419023088645935, - 0.38591469862268324, + 0.3859146986226832, 1, 1, 0.238984410722884, @@ -438,7 +441,7 @@ 0.07523280073668749, 0.6813040233247071, 1, - 0.09093881776916021, + 0.0909388177691602, 0.7953899467026746, 0.46345942912091304, 1, @@ -482,7 +485,7 @@ 0.056645307043117855, 0.04613438039833396, 0.17900131626571955, - 0.10437807305849049, + 0.10437807305849048, 0.04133579034001572, 0.01689458524887367, 0.07304306223253616, @@ -492,17 +495,17 @@ 0.02570792373305158, 0.2117702613060218, 0.06261644764053753, - 0.11048854967581091, + 0.11048854967581093, 0.05986189620431352, 0.0734813370478334, 0.14993575315772975, - 0.09216128695626061, + 0.0921612869562606, 0.06654101281860526, - 0.030242342070154266, + 0.030242342070154263, 0.20055057386109623, - 0.09490778457907527, + 0.09490778457907528, 0.15890381361993214, - 0.09147390139813617, + 0.09147390139813616, 0.07256124815551596, 0.14006199032920222, 0.06918929372173468, @@ -518,7 +521,7 @@ 0.05198270343522084, 0.02894483338415261, 0.19421519077802243, - 0.10227911889430563, + 0.10227911889430565, 0.022966625631099497, 0.04223796065100079, 0.07170636880139063, @@ -529,7 +532,7 @@ 0.29116400964006, 0.12510157647671732, 0.04460071124716505, - 0.13148586282369953, + 0.1314858628236995, 0.12155024410317292, 0.1461505005795304, 0.09404538078799742, @@ -537,14 +540,14 @@ 0.07684637616907224, 0.1739255359932635, 0.06968768763060447, - 0.048645930535396516, + 0.04864593053539651, 0.04715293440571787, - 0.061179568577858875, + 0.06117956857785887, 0.14599437481067512, 0.07423160697853332, 0.08014078307669949, 0.04069199366965248, - 0.18721575205594373, + 0.1872157520559437, 0.14720678189147138, 0.021828844225395293, 0.0625035269825679, @@ -556,8 +559,8 @@ 0.2078691963062574, 0.15441863025620098, 0.10737865871453003, - 0.09454279295816925, - 0.09633845930037417, + 0.09454279295816924, + 0.09633845930037416, 0.15611969585317312, 0.06873017587493258, 0.0791181851399526, @@ -567,11 +570,11 @@ 0.06731946150876865, 0.14050146457241625, 0.08162092155759072, - 0.11667759541165895, + 0.11667759541165897, 0.030289924549114444, 0.06339486495990165, 0.05731838222169315, - 0.18678826188260914, + 0.1867882618826091, 0.13891870679655027, 0.11545889612594176, 0.09001345414783932, @@ -617,14 +620,14 @@ 1, 1, 1, - 0.19149475966528307, - 0.24673403495070823, + 0.1914947596652831, + 0.24673403495070825, 0.3072794892096273, 0.379747974710697, 0.31039305362432584, 0.10848112754405202, 0.010619302099698998, - 0.13888253105203519, + 0.1388825310520352, 0.4073900806470958, 0.2531205286437236, 0.792085840046357, @@ -636,7 +639,7 @@ 0.03973531361317136, 0.2350261871873867, 0.4310117757696614, - 0.24612705047281858, + 0.24612705047281855, 0.12926913086657413, 0.21946877869613568, 0.5242635218702594, @@ -646,7 +649,7 @@ 0.7638985610220846, 0.41669987859417645, 0.06992062037206717, - 0.038954754430174716, + 0.03895475443017472, 0.1702228614236586, 0.3644700400430666, 0.40331728828616015, @@ -662,7 +665,7 @@ 0.054489850288550065, 0.12854107496184566, 0.11753069172373803, - 0.11321576973743849, + 0.11321576973743848, 0.34453613181731424, 0.1244811487104074, 0.06072357901250304, @@ -673,23 +676,23 @@ 0.30226424119880596, 0.8856712808994389, 0.4473604762734464, - 0.11850271000359837, + 0.11850271000359835, 0.003773473791717633, 0.5013148425482626, 0.032331919418836653, 0.3379801252340519, 0.1101123244337878, 0.002039042612175903, - 0.10686992109637393, + 0.10686992109637392, 0.529566917828135, 0.364533754611501, 0.8714741500070979, 0.3758462425849252, - 0.9160445175597087, + 0.9160445175597088, 0.4321594002439528, 0.06604500940315498, 0.03876573963794215, - 0.9211680206617687, + 0.9211680206617688, 0.22674879162547168, 0.3174237803524217, 0.10323686418719458, @@ -700,28 +703,28 @@ 1, 0.5029385157743501, 0.8471689821257619, - 0.21622933403097414, + 0.21622933403097416, 0.1315637600363954, - 0.014207504312205523, + 0.014207504312205525, 1, 0.43231937365869, 0.17936817815314637, 0.09069845844777394, 0.21653261782151073, - 0.10904328492252735, + 0.10904328492252736, 0.45696603359696897, 0.10027631605492449, - 0.14383375266562679, + 0.1438337526656268, 0.7256394043973807, 0.8423948571490725, 0.2589492388346111, 0.2353889384491187, - 0.14548208834407161, - 0.9851234327091203, + 0.1454820883440716, + 0.9851234327091204, 0.3397268942949406, - 0.39379184896953845, + 0.3937918489695385, 0.0786704963046369, - 0.09609641144440331, + 0.09609641144440333, 0.0074050587750516175, 0.6175878341381192, 0.2383574886847478, @@ -741,7 +744,7 @@ 0.273695744109254, 0.10090010598312445, 0.5747793559128609, - 0.9107437140452885, + 0.9107437140452884, 0.34401959095740303, 1, 1, @@ -750,7 +753,7 @@ 0.8684502342014089, 0.7344750160682946, 0.8686393732736275, - 0.43612221537997764, + 0.4361222153799776, 0.6769238453303257, 0.20200886652112943, 0.0266550847359111, @@ -760,10 +763,10 @@ 0.987640762587488, 0.9835036985633404, 0.14278799841519346, - 0.22361630855449383, + 0.22361630855449385, 0.9296698695284026, 0.827944905065973, - 0.9949206266575741, + 0.994920626657574, 0.8286463118940774, 0.7054238995836947, 0.6591252642520408, @@ -772,7 +775,7 @@ 0.9789237514645788, 0.7658415543956806, 0.9979477775731906, - 0.9599693030243193, + 0.9599693030243192, 0.8309056529775222, 0.7063448234599843, 0.025177568881478127, @@ -781,14 +784,14 @@ 1, 0.9926826451871978, 0.9128835701758315, - 0.9947484624313153, + 0.9947484624313152, 0.8693660641248387, 1, 1, 0.035836087718205054, 0.03891346894228955, 0.13385158796068683, - 0.12169435256029607, + 0.12169435256029608, 0.2836104416464377, 0.05094982968902384, 0.034294312830338104, @@ -797,7 +800,7 @@ 0.257589360854839, 0.6548370775766441, 0.2932462480888995, - 0.9040908751204909, + 0.9040908751204908, 0.33043668625756617 ], "xaxis": "x", @@ -1687,8 +1690,9 @@ "def format_fig(fig):\n", " return fig\n", "\n", + "\n", "loss_results.abs_rel_error = loss_results.abs_rel_error.clip(0, 1)\n", - " \n", + "\n", "fig = px.histogram(\n", " loss_results,\n", " x=\"abs_rel_error\",\n", @@ -1704,7 +1708,8 @@ " legend_title=\"Category\",\n", " xaxis_title=\"Absolute relative error\",\n", " yaxis_title=\"Number of variables\",\n", - " xaxis_tickformat=\".0%\",)\n", + " xaxis_tickformat=\".0%\",\n", + ")\n", "format_fig(fig)" ] }, @@ -1818,7 +1823,7 @@ "xaxis": "x", "y": [ 1033786.8507080078, - 1849527.0676879883, + 1849527.0676879885, 6388252.871826172, 5427908.585449219, 3428882.373565674, @@ -1826,7 +1831,7 @@ 1826260.506072998, 993297.5881958008, 313195.4216308594, - 204920.73791503906, + 204920.73791503903, 218245.16729736328, 845.8313598632812, 1075.3846435546875, @@ -1875,7 +1880,7 @@ -505763.6264343262, -13854.41845703125, -73651.49392700195, - 13914.588195800781, + 13914.58819580078, 39777.421630859375, 5029.7379150390625, 111758.16729736328, @@ -1920,18 +1925,18 @@ "xaxis": "x", "y": [ 1.5997536783990056, - -0.24673403495070823, + -0.24673403495070825, -0.05017349529511451, - -0.038954754430174716, + -0.03895475443017472, -0.12854107496184566, -0.003773473791717633, -0.03876573963794215, - 0.014207504312205523, - 0.14548208834407161, + 0.014207504312205525, + 0.1454820883440716, 0.02516240308487657, 1.0495005709369527, -0.9835036985633404, - -0.9599693030243193, + -0.9599693030243192, -0.03891346894228955 ], "yaxis": "y" @@ -1977,7 +1982,7 @@ 505763.6264343262, 13854.41845703125, 73651.49392700195, - 13914.588195800781, + 13914.58819580078, 39777.421630859375, 5029.7379150390625, 111758.16729736328, @@ -2022,18 +2027,18 @@ "xaxis": "x", "y": [ 1.5997536783990056, - 0.24673403495070823, + 0.24673403495070825, 0.05017349529511451, - 0.038954754430174716, + 0.03895475443017472, 0.12854107496184566, 0.003773473791717633, 0.03876573963794215, - 0.014207504312205523, - 0.14548208834407161, + 0.014207504312205525, + 0.1454820883440716, 0.02516240308487657, 1.0495005709369527, 0.9835036985633404, - 0.9599693030243193, + 0.9599693030243192, 0.03891346894228955 ], "yaxis": "y" @@ -2908,9 +2913,7 @@ "\n", "variable = \"employment_income\"\n", "count = True\n", - "variable_df = incomes[\n", - " (incomes.variable == variable) & (incomes[\"count\"] == count)\n", - "]\n", + "variable_df = incomes[(incomes.variable == variable) & (incomes[\"count\"] == count)]\n", "\n", "fig = px.bar(\n", " variable_df,\n", @@ -3141,7 +3144,7 @@ -0.3647842083382897, -0.4473604762734464, -0.4321594002439528, - -0.21622933403097414, + -0.21622933403097416, -0.2589492388346111, -0.2627782706474406, -0.34401959095740303, @@ -3243,7 +3246,7 @@ 0.3647842083382897, 0.4473604762734464, 0.4321594002439528, - 0.21622933403097414, + 0.21622933403097416, 0.2589492388346111, 0.2627782706474406, 0.34401959095740303, @@ -4115,9 +4118,7 @@ "source": [ "variable = \"dividend_income\"\n", "count = True\n", - "variable_df = incomes[\n", - " (incomes.variable == variable) & (incomes[\"count\"] == count)\n", - "]\n", + "variable_df = incomes[(incomes.variable == variable) & (incomes[\"count\"] == count)]\n", "\n", "fig = px.bar(\n", " variable_df,\n", @@ -5086,13 +5087,13 @@ ], "xaxis": "x", "y": [ - 1023954.3214297295, + 1023954.3214297296, 1839525.9950885773, 6387366.710470676, 5448517.7086930275, 3420158.3231887817, 3675436.0024147034, - 1825305.0654945374, + 1825305.0654945376, 995032.6799602509, 313195.4208712578, 204920.7386112213, @@ -5201,7 +5202,7 @@ 1.0495005534866122, -0.9835036991585266, -0.9599693047283234, - -0.038545385074422164 + -0.03854538507442216 ], "yaxis": "y" }, @@ -5303,7 +5304,7 @@ 1.0495005534866122, 0.9835036991585266, 0.9599693047283234, - 0.038545385074422164 + 0.03854538507442216 ], "yaxis": "y" } @@ -6174,9 +6175,7 @@ "\n", "variable = \"employment_income\"\n", "count = True\n", - "variable_df = incomes[\n", - " (incomes.variable == variable) & (incomes[\"count\"] == count)\n", - "]\n", + "variable_df = incomes[(incomes.variable == variable) & (incomes[\"count\"] == count)]\n", "\n", "fig = px.bar(\n", " variable_df,\n", @@ -6544,7 +6543,7 @@ 1606618.6615459488, 304110.76789712394, 217559.91823041387, - 115103.32235789967, + 115103.32235789968, 49768.71192988932, 21825.629729905864, 27469371.2800173 @@ -6647,7 +6646,7 @@ 0.11225584232612314, 0.0883927652091083, 0.08091431214983678, - -0.029357726530223446, + -0.02935772653022345, -0.18755100767175908, 0.04168935771146878 ], @@ -6749,7 +6748,7 @@ 0.11225584232612314, 0.0883927652091083, 0.08091431214983678, - 0.029357726530223446, + 0.02935772653022345, 0.18755100767175908, 0.04168935771146878 ], @@ -7622,9 +7621,7 @@ "\n", "variable = \"employment_income\"\n", "count = True\n", - "variable_df = incomes[\n", - " (incomes.variable == variable) & (incomes[\"count\"] == count)\n", - "]\n", + "variable_df = incomes[(incomes.variable == variable) & (incomes[\"count\"] == count)]\n", "\n", "fig = px.bar(\n", " variable_df,\n", diff --git a/docs/validation/constituencies.ipynb b/docs/validation/constituencies.ipynb index 782613f0d..bb6768724 100644 --- a/docs/validation/constituencies.ipynb +++ b/docs/validation/constituencies.ipynb @@ -177,35 +177,72 @@ "\n", "weights_file_path = STORAGE_FOLDER / \"parliamentary_constituency_weights.h5\"\n", "with h5py.File(weights_file_path, \"r\") as f:\n", - " weights = f[str(2025)][...]\n", + " weights = f[str(2025)][...]\n", "\n", "constituencies = pd.read_csv(STORAGE_FOLDER / \"constituencies_2024.csv\")\n", "\n", "baseline = Microsimulation()\n", "household_weights = baseline.calculate(\"household_weight\", 2025).values\n", "\n", - "from policyengine_uk_data.datasets.frs.local_areas.constituencies.loss import create_constituency_target_matrix, create_national_target_matrix\n", + "from policyengine_uk_data.datasets.frs.local_areas.constituencies.loss import (\n", + " create_constituency_target_matrix,\n", + " create_national_target_matrix,\n", + ")\n", "from policyengine_uk_data.datasets import EnhancedFRS_2022_23\n", - "constituency_target_matrix, constituency_actuals, _ = create_constituency_target_matrix(EnhancedFRS_2022_23, 2025, None)\n", - "national_target_matrix, national_actuals = create_national_target_matrix(EnhancedFRS_2022_23, 2025, None)\n", + "\n", + "constituency_target_matrix, constituency_actuals, _ = create_constituency_target_matrix(\n", + " EnhancedFRS_2022_23, 2025, None\n", + ")\n", + "national_target_matrix, national_actuals = create_national_target_matrix(\n", + " EnhancedFRS_2022_23, 2025, None\n", + ")\n", "\n", "constituency_wide = weights @ constituency_target_matrix\n", "constituency_wide.index = constituencies.code.values\n", "constituency_wide[\"name\"] = constituencies.name.values\n", "\n", - "constituency_results = pd.melt(constituency_wide.reset_index(), id_vars=[\"index\", \"name\"], var_name=\"variable\", value_name=\"value\")\n", + "constituency_results = pd.melt(\n", + " constituency_wide.reset_index(),\n", + " id_vars=[\"index\", \"name\"],\n", + " var_name=\"variable\",\n", + " value_name=\"value\",\n", + ")\n", "\n", "constituency_actuals.index = constituencies.code.values\n", "constituency_actuals[\"name\"] = constituencies.name.values\n", - "constituency_actuals_long = pd.melt(constituency_actuals.reset_index(), id_vars=[\"index\", \"name\"], var_name=\"variable\", value_name=\"value\")\n", + "constituency_actuals_long = pd.melt(\n", + " constituency_actuals.reset_index(),\n", + " id_vars=[\"index\", \"name\"],\n", + " var_name=\"variable\",\n", + " value_name=\"value\",\n", + ")\n", "\n", - "constituency_target_validation = pd.merge(constituency_results, constituency_actuals_long, on=[\"index\", \"variable\"], suffixes=(\"_target\", \"_actual\"))\n", + "constituency_target_validation = pd.merge(\n", + " constituency_results,\n", + " constituency_actuals_long,\n", + " on=[\"index\", \"variable\"],\n", + " suffixes=(\"_target\", \"_actual\"),\n", + ")\n", "constituency_target_validation.drop(\"name_actual\", axis=1, inplace=True)\n", - "constituency_target_validation.columns = [\"index\", \"name\", \"metric\", \"estimate\", \"target\"]\n", + "constituency_target_validation.columns = [\n", + " \"index\",\n", + " \"name\",\n", + " \"metric\",\n", + " \"estimate\",\n", + " \"target\",\n", + "]\n", "\n", - "constituency_target_validation[\"error\"] = constituency_target_validation[\"estimate\"] - constituency_target_validation[\"target\"]\n", - "constituency_target_validation[\"abs_error\"] = constituency_target_validation[\"error\"].abs()\n", - "constituency_target_validation[\"rel_abs_error\"] = constituency_target_validation[\"abs_error\"] / constituency_target_validation[\"target\"]" + "constituency_target_validation[\"error\"] = (\n", + " constituency_target_validation[\"estimate\"]\n", + " - constituency_target_validation[\"target\"]\n", + ")\n", + "constituency_target_validation[\"abs_error\"] = constituency_target_validation[\n", + " \"error\"\n", + "].abs()\n", + "constituency_target_validation[\"rel_abs_error\"] = (\n", + " constituency_target_validation[\"abs_error\"]\n", + " / constituency_target_validation[\"target\"]\n", + ")" ] }, { @@ -398,12 +435,18 @@ "outputs": [], "source": [ "national_performance = household_weights @ national_target_matrix\n", - "national_target_validation = pd.DataFrame({\"metric\": national_performance.index, \"estimate\": national_performance.values})\n", + "national_target_validation = pd.DataFrame(\n", + " {\"metric\": national_performance.index, \"estimate\": national_performance.values}\n", + ")\n", "national_target_validation[\"target\"] = national_actuals.values\n", "\n", - "national_target_validation[\"error\"] = national_target_validation[\"estimate\"] - national_target_validation[\"target\"]\n", + "national_target_validation[\"error\"] = (\n", + " national_target_validation[\"estimate\"] - national_target_validation[\"target\"]\n", + ")\n", "national_target_validation[\"abs_error\"] = national_target_validation[\"error\"].abs()\n", - "national_target_validation[\"rel_abs_error\"] = national_target_validation[\"abs_error\"] / national_target_validation[\"target\"]" + "national_target_validation[\"rel_abs_error\"] = (\n", + " national_target_validation[\"abs_error\"] / national_target_validation[\"target\"]\n", + ")" ] }, { diff --git a/docs/validation/local_authorities.ipynb b/docs/validation/local_authorities.ipynb index 10998700f..1a580330f 100644 --- a/docs/validation/local_authorities.ipynb +++ b/docs/validation/local_authorities.ipynb @@ -196,29 +196,65 @@ "baseline = Microsimulation()\n", "household_weights = baseline.calculate(\"household_weight\", 2025).values\n", "\n", - "from policyengine_uk_data.datasets.frs.local_areas.local_authorities.loss import create_local_authority_target_matrix, create_national_target_matrix\n", + "from policyengine_uk_data.datasets.frs.local_areas.local_authorities.loss import (\n", + " create_local_authority_target_matrix,\n", + " create_national_target_matrix,\n", + ")\n", "from policyengine_uk_data.datasets import EnhancedFRS_2022_23\n", "\n", - "local_authority_target_matrix, local_authority_actuals, _ = create_local_authority_target_matrix(EnhancedFRS_2022_23, 2025, None)\n", - "national_target_matrix, national_actuals = create_national_target_matrix(EnhancedFRS_2022_23, 2025, None)\n", + "local_authority_target_matrix, local_authority_actuals, _ = (\n", + " create_local_authority_target_matrix(EnhancedFRS_2022_23, 2025, None)\n", + ")\n", + "national_target_matrix, national_actuals = create_national_target_matrix(\n", + " EnhancedFRS_2022_23, 2025, None\n", + ")\n", "\n", "local_authority_wide = weights @ local_authority_target_matrix\n", "local_authority_wide.index = constituencies_2024.code.values\n", "local_authority_wide[\"name\"] = constituencies_2024.name.values\n", "\n", - "local_authority_results = pd.melt(local_authority_wide.reset_index(), id_vars=[\"index\", \"name\"], var_name=\"variable\", value_name=\"value\")\n", + "local_authority_results = pd.melt(\n", + " local_authority_wide.reset_index(),\n", + " id_vars=[\"index\", \"name\"],\n", + " var_name=\"variable\",\n", + " value_name=\"value\",\n", + ")\n", "\n", "local_authority_actuals.index = constituencies_2024.code.values\n", "local_authority_actuals[\"name\"] = constituencies_2024.name.values\n", - "local_authority_actuals_long = pd.melt(local_authority_actuals.reset_index(), id_vars=[\"index\", \"name\"], var_name=\"variable\", value_name=\"value\")\n", + "local_authority_actuals_long = pd.melt(\n", + " local_authority_actuals.reset_index(),\n", + " id_vars=[\"index\", \"name\"],\n", + " var_name=\"variable\",\n", + " value_name=\"value\",\n", + ")\n", "\n", - "local_authority_target_validation = pd.merge(local_authority_results, local_authority_actuals_long, on=[\"index\", \"variable\"], suffixes=(\"_target\", \"_actual\"))\n", + "local_authority_target_validation = pd.merge(\n", + " local_authority_results,\n", + " local_authority_actuals_long,\n", + " on=[\"index\", \"variable\"],\n", + " suffixes=(\"_target\", \"_actual\"),\n", + ")\n", "local_authority_target_validation.drop(\"name_actual\", axis=1, inplace=True)\n", - "local_authority_target_validation.columns = [\"index\", \"name\", \"metric\", \"estimate\", \"target\"]\n", + "local_authority_target_validation.columns = [\n", + " \"index\",\n", + " \"name\",\n", + " \"metric\",\n", + " \"estimate\",\n", + " \"target\",\n", + "]\n", "\n", - "local_authority_target_validation[\"error\"] = local_authority_target_validation[\"estimate\"] - local_authority_target_validation[\"target\"]\n", - "local_authority_target_validation[\"abs_error\"] = local_authority_target_validation[\"error\"].abs()\n", - "local_authority_target_validation[\"rel_abs_error\"] = local_authority_target_validation[\"abs_error\"] / local_authority_target_validation[\"target\"]" + "local_authority_target_validation[\"error\"] = (\n", + " local_authority_target_validation[\"estimate\"]\n", + " - local_authority_target_validation[\"target\"]\n", + ")\n", + "local_authority_target_validation[\"abs_error\"] = local_authority_target_validation[\n", + " \"error\"\n", + "].abs()\n", + "local_authority_target_validation[\"rel_abs_error\"] = (\n", + " local_authority_target_validation[\"abs_error\"]\n", + " / local_authority_target_validation[\"target\"]\n", + ")" ] }, { @@ -400,12 +436,18 @@ "outputs": [], "source": [ "national_performance = household_weights @ national_target_matrix\n", - "national_target_validation = pd.DataFrame({\"metric\": national_performance.index, \"estimate\": national_performance.values})\n", + "national_target_validation = pd.DataFrame(\n", + " {\"metric\": national_performance.index, \"estimate\": national_performance.values}\n", + ")\n", "national_target_validation[\"target\"] = national_actuals.values\n", "\n", - "national_target_validation[\"error\"] = national_target_validation[\"estimate\"] - national_target_validation[\"target\"]\n", + "national_target_validation[\"error\"] = (\n", + " national_target_validation[\"estimate\"] - national_target_validation[\"target\"]\n", + ")\n", "national_target_validation[\"abs_error\"] = national_target_validation[\"error\"].abs()\n", - "national_target_validation[\"rel_abs_error\"] = national_target_validation[\"abs_error\"] / national_target_validation[\"target\"]" + "national_target_validation[\"rel_abs_error\"] = (\n", + " national_target_validation[\"abs_error\"] / national_target_validation[\"target\"]\n", + ")" ] }, { diff --git a/docs/validation/national.ipynb b/docs/validation/national.ipynb index 3ade912a3..ad9b8e3be 100644 --- a/docs/validation/national.ipynb +++ b/docs/validation/national.ipynb @@ -170,10 +170,12 @@ "import pandas as pd\n", "from itables import init_notebook_mode\n", "import itables.options as opt\n", + "\n", "opt.maxBytes = \"1MB\"\n", "\n", "init_notebook_mode(all_interactive=True)\n", "\n", + "\n", "def get_validation():\n", " df = pd.DataFrame()\n", " for dataset in [FRS_2022_23, EnhancedFRS_2022_23]:\n", @@ -185,6 +187,7 @@ " df = df.reset_index(drop=True)\n", " return df\n", "\n", + "\n", "df = get_validation()\n", "truth_df = df[df.dataset == df.dataset.unique()[0]].reset_index()\n", "truth_df[\"estimate\"] = truth_df[\"target\"]\n", @@ -617,7 +620,9 @@ " on=[\"time_period\", \"name\"],\n", " suffixes=(\"_frs\", \"_efrs\"),\n", ")\n", - "merged[\"rel_error_change_under_efrs\"] = merged[\"abs_rel_error_efrs\"] - merged[\"abs_rel_error_frs\"]\n", + "merged[\"rel_error_change_under_efrs\"] = (\n", + " merged[\"abs_rel_error_efrs\"] - merged[\"abs_rel_error_frs\"]\n", + ")\n", "# Sort columns\n", "merged = merged[\n", " [\n", diff --git a/policyengine_uk_data/datasets/childcare/takeup_rate.py b/policyengine_uk_data/datasets/childcare/takeup_rate.py index dcb3ae188..db643e93f 100644 --- a/policyengine_uk_data/datasets/childcare/takeup_rate.py +++ b/policyengine_uk_data/datasets/childcare/takeup_rate.py @@ -59,9 +59,7 @@ def simulate_childcare_programs( np.random.seed(seed) # Take-up flags - sim.set_input( - "would_claim_tfc", 2024, np.random.random(benunit_count) < tfc - ) + sim.set_input("would_claim_tfc", 2024, np.random.random(benunit_count) < tfc) sim.set_input( "would_claim_extended_childcare", 2024, @@ -87,9 +85,7 @@ def simulate_childcare_programs( extended_hours_values = np.clip(extended_hours_values, 0, 30) # Set the maximum extended childcare hours usage variable - sim.set_input( - "maximum_extended_childcare_hours_usage", 2024, extended_hours_values - ) + sim.set_input("maximum_extended_childcare_hours_usage", 2024, extended_hours_values) # Calculate outputs df = sim.calculate_dataframe( @@ -114,14 +110,9 @@ def simulate_childcare_programs( spending = { "tfc": sim.calculate("tax_free_childcare", 2024).sum() / 1e9, - "extended": sim.calculate("extended_childcare_entitlement", 2024).sum() - / 1e9, - "targeted": sim.calculate("targeted_childcare_entitlement", 2024).sum() - / 1e9, - "universal": sim.calculate( - "universal_childcare_entitlement", 2024 - ).sum() - / 1e9, + "extended": sim.calculate("extended_childcare_entitlement", 2024).sum() / 1e9, + "targeted": sim.calculate("targeted_childcare_entitlement", 2024).sum() / 1e9, + "universal": sim.calculate("universal_childcare_entitlement", 2024).sum() / 1e9, } caseload = { @@ -161,13 +152,13 @@ def objective(params: list[float]) -> float: print("\nSpending (£ billion):") for key in targets["spending"]: print( - f" {key.upper()}: {spending[key]:.3f} (Target: {targets['spending'][key]:.3f}, Ratio: {spending[key]/targets['spending'][key]:.3f})" + f" {key.upper()}: {spending[key]:.3f} (Target: {targets['spending'][key]:.3f}, Ratio: {spending[key] / targets['spending'][key]:.3f})" ) print("\nCaseload (thousands):") for key in targets["caseload"]: print( - f" {key.upper()}: {caseload[key]:.1f} (Target: {targets['caseload'][key]:.1f}, Ratio: {caseload[key]/targets['caseload'][key]:.3f})" + f" {key.upper()}: {caseload[key]:.1f} (Target: {targets['caseload'][key]:.1f}, Ratio: {caseload[key] / targets['caseload'][key]:.3f})" ) return loss @@ -211,11 +202,11 @@ def objective(params: list[float]) -> float: print("\nSpending (£ billion):") for key in targets["spending"]: print( - f" {key.upper()}: {final_spending[key]:.3f} (Target: {targets['spending'][key]:.3f}, Ratio: {final_spending[key]/targets['spending'][key]:.3f})" + f" {key.upper()}: {final_spending[key]:.3f} (Target: {targets['spending'][key]:.3f}, Ratio: {final_spending[key] / targets['spending'][key]:.3f})" ) print("\nCaseload (thousands):") for key in targets["caseload"]: print( - f" {key.upper()}: {final_caseload[key]:.1f} (Target: {targets['caseload'][key]:.1f}, Ratio: {final_caseload[key]/targets['caseload'][key]:.3f})" + f" {key.upper()}: {final_caseload[key]:.1f} (Target: {targets['caseload'][key]:.1f}, Ratio: {final_caseload[key] / targets['caseload'][key]:.3f})" ) diff --git a/policyengine_uk_data/datasets/create_datasets.py b/policyengine_uk_data/datasets/create_datasets.py index 07f6b362f..2396946e3 100644 --- a/policyengine_uk_data/datasets/create_datasets.py +++ b/policyengine_uk_data/datasets/create_datasets.py @@ -165,9 +165,7 @@ def main(): # Downrate and save update_dataset("Downrate to 2023", "processing") - frs_calibrated = uprate_dataset( - frs_calibrated_constituencies, 2023 - ) + frs_calibrated = uprate_dataset(frs_calibrated_constituencies, 2023) update_dataset("Downrate to 2023", "completed") update_dataset("Save final dataset", "processing") diff --git a/policyengine_uk_data/datasets/frs.py b/policyengine_uk_data/datasets/frs.py index 37ce70dcd..7cc05cd36 100644 --- a/policyengine_uk_data/datasets/frs.py +++ b/policyengine_uk_data/datasets/frs.py @@ -74,23 +74,17 @@ def create_frs( if "benunit" in df.columns: # In the tables, benunit is the index of the benefit unit *within* the household. df.rename(columns={"benunit": "benunit_id"}, inplace=True) - df["benunit_id"] = ( - df["household_id"] * 1e2 + df["benunit_id"] - ).astype(int) + df["benunit_id"] = (df["household_id"] * 1e2 + df["benunit_id"]).astype(int) if "person" in df.columns: df.rename(columns={"person": "person_id"}, inplace=True) - df["person_id"] = ( - df["household_id"] * 1e3 + df["person_id"] - ).astype(int) + df["person_id"] = (df["household_id"] * 1e3 + df["person_id"]).astype(int) frs[table_name] = df # Combine adult and child tables for convenience - frs["person"] = ( - pd.concat([frs["adult"], frs["child"]]).sort_index().fillna(0) - ) + frs["person"] = pd.concat([frs["adult"], frs["child"]]).sort_index().fillna(0) person = frs["person"] benunit = frs["benunit"] @@ -165,12 +159,7 @@ def determine_education_level(fted_val, typeed2_val, age_val): elif ( typeed2_val in (2, 4) or (typeed2_val in (3, 8) and age_val < 11) - or ( - typeed2_val == 0 - and fted_val == 1 - and age_val > 5 - and age_val < 11 - ) + or (typeed2_val == 0 and fted_val == 1 and age_val > 5 and age_val < 11) ): return "PRIMARY" # In lower secondary @@ -191,19 +180,14 @@ def determine_education_level(fted_val, typeed2_val, age_val): elif typeed2_val in (7, 8) and age_val >= 19: return "POST_SECONDARY" # In tertiary - elif typeed2_val == 9 or ( - typeed2_val == 0 and fted_val == 1 and age_val >= 19 - ): + elif typeed2_val == 9 or (typeed2_val == 0 and fted_val == 1 and age_val >= 19): return "TERTIARY" else: return "NOT_IN_EDUCATION" # Apply the function to determine education level pe_person["current_education"] = pd.Series( - [ - determine_education_level(f, t, a) - for f, t, a in zip(fted, typeed2, age) - ], + [determine_education_level(f, t, a) for f, t, a in zip(fted, typeed2, age)], index=pe_person.index, ) @@ -326,9 +310,7 @@ def determine_education_level(fted_val, typeed2_val, age_val): ctannual = household.ctannual[CT_valid] # Build the table - ct_mean = ctannual.groupby( - [region, band, single_person], dropna=False - ).mean() + ct_mean = ctannual.groupby([region, band, single_person], dropna=False).mean() ct_mean = ct_mean.replace(-1, ct_mean.mean()) # For every household consult the table to find the imputed @@ -357,9 +339,7 @@ def determine_education_level(fted_val, typeed2_val, age_val): BANDS = ["A", "B", "C", "D", "E", "F", "G", "H", "I"] # Band 1 is the most common pe_household["council_tax_band"] = ( - categorical(household.ctband, 1, range(1, 10), BANDS) - .fillna("D") - .values + categorical(household.ctband, 1, range(1, 10), BANDS).fillna("D").values ) # Domestic rates variables are all weeklyised, unlike Council Tax variables (despite the variable name suggesting otherwise) if year < 2021: @@ -384,9 +364,7 @@ def determine_education_level(fted_val, typeed2_val, age_val): WEEKS_IN_YEAR = 365.25 / 7 - pe_person["employment_income"] = ( - np.maximum(0, person.inearns) * WEEKS_IN_YEAR - ) + pe_person["employment_income"] = np.maximum(0, person.inearns) * WEEKS_IN_YEAR pension_payment = sum_to_entity( pension.penpay * (pension.penpay > 0), @@ -400,10 +378,7 @@ def determine_education_level(fted_val, typeed2_val, age_val): ) pension_deductions_removed = sum_to_entity( pension.poamt - * ( - ((pension.poinc == 2) | (pension.penoth == 1)) - & (pension.poamt > 0) - ), + * (((pension.poinc == 2) | (pension.penoth == 1)) & (pension.poamt > 0)), pension.person_id, person.person_id, ) @@ -412,9 +387,7 @@ def determine_education_level(fted_val, typeed2_val, age_val): pension_payment + pension_tax_paid + pension_deductions_removed ) * WEEKS_IN_YEAR - pe_person["self_employment_income"] = ( - np.maximum(0, person.seincam2) * WEEKS_IN_YEAR - ) + pe_person["self_employment_income"] = np.maximum(0, person.seincam2) * WEEKS_IN_YEAR INVERTED_BASIC_RATE = 1.25 @@ -429,10 +402,7 @@ def determine_education_level(fted_val, typeed2_val, age_val): ) taxable_savings_interest = ( sum_to_entity( - ( - account.accint - * np.where(account.acctax == 1, INVERTED_BASIC_RATE, 1) - ) + (account.accint * np.where(account.acctax == 1, INVERTED_BASIC_RATE, 1)) * (account.account.isin((1, 3, 5, 27, 28))), account.person_id, person.person_id, @@ -446,10 +416,7 @@ def determine_education_level(fted_val, typeed2_val, age_val): pe_person["dividend_income"] = np.maximum( 0, sum_to_entity( - ( - account.accint - * np.where(account.invtax == 1, INVERTED_BASIC_RATE, 1) - ) + (account.accint * np.where(account.invtax == 1, INVERTED_BASIC_RATE, 1)) * ( ((account.account == 6) & (account.invtax == 1)) # GGES | account.account.isin((7, 8)) # Stocks/shares/UITs @@ -474,16 +441,14 @@ def determine_education_level(fted_val, typeed2_val, age_val): pe_person["property_income"] = ( np.maximum( 0, - is_head * persons_household_property_income - + person.cvpay - + person.royyr1, + is_head * persons_household_property_income + person.cvpay + person.royyr1, ) * WEEKS_IN_YEAR ) maintenance_to_self = np.maximum( - pd.Series( - np.where(person.mntus1 == 2, person.mntusam1, person.mntamt1) - ).fillna(0), + pd.Series(np.where(person.mntus1 == 2, person.mntusam1, person.mntamt1)).fillna( + 0 + ), 0, ) maintenance_from_dwp = person.mntamt2 @@ -519,8 +484,7 @@ def determine_education_level(fted_val, typeed2_val, age_val): ] pe_person["private_transfer_income"] = ( - sum_from_positive_fields(person, PRIVATE_TRANSFER_INCOME_FIELDS) - * WEEKS_IN_YEAR + sum_from_positive_fields(person, PRIVATE_TRANSFER_INCOME_FIELDS) * WEEKS_IN_YEAR ) pe_person["lump_sum_income"] = person.redamt @@ -561,9 +525,7 @@ def determine_education_level(fted_val, typeed2_val, age_val): pe_person["jsa_contrib_reported"] = ( sum_to_entity( - benefits.benamt - * (benefits.var2.isin((1, 3))) - * (benefits.benefit == 14), + benefits.benamt * (benefits.var2.isin((1, 3))) * (benefits.benefit == 14), benefits.person_id, person.person_id, ) @@ -571,9 +533,7 @@ def determine_education_level(fted_val, typeed2_val, age_val): ) pe_person["jsa_income_reported"] = ( sum_to_entity( - benefits.benamt - * (benefits.var2.isin((2, 4))) - * (benefits.benefit == 14), + benefits.benamt * (benefits.var2.isin((2, 4))) * (benefits.benefit == 14), benefits.person_id, person.person_id, ) @@ -581,9 +541,7 @@ def determine_education_level(fted_val, typeed2_val, age_val): ) pe_person["esa_contrib_reported"] = ( sum_to_entity( - benefits.benamt - * (benefits.var2.isin((1, 3))) - * (benefits.benefit == 16), + benefits.benamt * (benefits.var2.isin((1, 3))) * (benefits.benefit == 16), benefits.person_id, person.person_id, ) @@ -591,9 +549,7 @@ def determine_education_level(fted_val, typeed2_val, age_val): ) pe_person["esa_income_reported"] = ( sum_to_entity( - benefits.benamt - * (benefits.var2.isin((2, 4))) - * (benefits.benefit == 16), + benefits.benamt * (benefits.var2.isin((2, 4))) * (benefits.benefit == 16), benefits.person_id, person.person_id, ) @@ -647,9 +603,7 @@ def determine_education_level(fted_val, typeed2_val, age_val): pe_person["maintenance_expenses"] = ( pd.Series( - np.where( - maintenance.mrus == 2, maintenance.mruamt, maintenance.mramt - ) + np.where(maintenance.mrus == 2, maintenance.mruamt, maintenance.mramt) ) .groupby(maintenance.person_id) .sum() @@ -662,9 +616,7 @@ def determine_education_level(fted_val, typeed2_val, age_val): pe_household["mortgage_interest_repayment"] = ( household.mortint.fillna(0).values * WEEKS_IN_YEAR ) - mortgage_capital = np.where( - mortgage.rmort == 1, mortgage.rmamt, mortgage.borramt - ) + mortgage_capital = np.where(mortgage.rmort == 1, mortgage.rmamt, mortgage.borramt) mortgage_capital_repayment = sum_to_entity( mortgage_capital / mortgage.mortend, mortgage.household_id, @@ -674,9 +626,7 @@ def determine_education_level(fted_val, typeed2_val, age_val): pe_person["childcare_expenses"] = ( sum_to_entity( - childcare.chamt - * (childcare.cost == 1) - * (childcare.registrd == 1), + childcare.chamt * (childcare.cost == 1) * (childcare.registrd == 1), childcare.person_id, person.person_id, ) @@ -721,10 +671,7 @@ def determine_education_level(fted_val, typeed2_val, age_val): # respondents who were not asked the question (imputation candidates) if "salsac_raw" in job.columns: salsac_numeric = ( - job["salsac_raw"] - .map({"1": 1, "2": 0, " ": -1}) - .fillna(-1) - .astype(int) + job["salsac_raw"].map({"1": 1, "2": 0, " ": -1}).fillna(-1).astype(int) ) # Aggregate to person level: take max (any job with SS = person has SS) pe_person["salary_sacrifice_reported"] = np.clip( @@ -795,17 +742,13 @@ def determine_education_level(fted_val, typeed2_val, age_val): from policyengine_uk import Microsimulation sim = Microsimulation(dataset=dataset) - region = sim.populations["benunit"].household( - "region", dataset.time_period - ) + region = sim.populations["benunit"].household("region", dataset.time_period) lha_category = sim.calculate("LHA_category", year) brma = np.empty(len(region), dtype=object) # Sample from a random BRMA in the region, weighted by the number of observations in each BRMA - lha_list_of_rents = pd.read_csv( - STORAGE_FOLDER / "lha_list_of_rents.csv.gz" - ) + lha_list_of_rents = pd.read_csv(STORAGE_FOLDER / "lha_list_of_rents.csv.gz") lha_list_of_rents = lha_list_of_rents.copy() for possible_region in lha_list_of_rents.region.unique(): @@ -813,9 +756,7 @@ def determine_education_level(fted_val, typeed2_val, age_val): lor_mask = (lha_list_of_rents.region == possible_region) & ( lha_list_of_rents.lha_category == possible_lha_category ) - mask = (region == possible_region) & ( - lha_category == possible_lha_category - ) + mask = (region == possible_region) & (lha_category == possible_lha_category) brma[mask] = lha_list_of_rents[lor_mask].brma.sample( n=len(region[mask]), replace=True ) @@ -831,9 +772,7 @@ def determine_education_level(fted_val, typeed2_val, age_val): } ) - df = df.groupby("household_id").brma.aggregate( - lambda x: x.sample(n=1).iloc[0] - ) + df = df.groupby("household_id").brma.aggregate(lambda x: x.sample(n=1).iloc[0]) brmas = df[sim.calculate("household_id")].values pe_household["brma"] = brmas @@ -862,8 +801,7 @@ def determine_education_level(fted_val, typeed2_val, age_val): ) paragraph_4 = ( pe_person.pip_dl_reported - >= benefit.pip.daily_living.enhanced * WEEKS_IN_YEAR - - THRESHOLD_SAFETY_GAP + >= benefit.pip.daily_living.enhanced * WEEKS_IN_YEAR - THRESHOLD_SAFETY_GAP ) paragraph_5 = pe_person.afcs_reported > 0 pe_person["is_severely_disabled_for_benefits"] = ( @@ -882,9 +820,7 @@ def determine_education_level(fted_val, typeed2_val, age_val): pension_credit_rate = load_take_up_rate("pension_credit", year) universal_credit_rate = load_take_up_rate("universal_credit", year) marriage_allowance_rate = load_take_up_rate("marriage_allowance", year) - child_benefit_opts_out_rate = load_take_up_rate( - "child_benefit_opts_out_rate", year - ) + child_benefit_opts_out_rate = load_take_up_rate("child_benefit_opts_out_rate", year) tfc_rate = load_take_up_rate("tax_free_childcare", year) extended_childcare_rate = load_take_up_rate("extended_childcare", year) universal_childcare_rate = load_take_up_rate("universal_childcare", year) @@ -911,9 +847,7 @@ def determine_education_level(fted_val, typeed2_val, age_val): pe_benunit["would_claim_uc"] = ( generator.random(len(pe_benunit)) < universal_credit_rate ) - pe_benunit["would_claim_tfc"] = ( - generator.random(len(pe_benunit)) < tfc_rate - ) + pe_benunit["would_claim_tfc"] = generator.random(len(pe_benunit)) < tfc_rate pe_benunit["would_claim_extended_childcare"] = ( generator.random(len(pe_benunit)) < extended_childcare_rate ) @@ -937,12 +871,8 @@ def determine_education_level(fted_val, typeed2_val, age_val): # Generate other stochastic variables using rates from parameter files tv_ownership_rate = load_parameter("stochastic", "tv_ownership_rate", year) - tv_evasion_rate = load_parameter( - "stochastic", "tv_licence_evasion_rate", year - ) - first_time_buyer_rate = load_parameter( - "stochastic", "first_time_buyer_rate", year - ) + tv_evasion_rate = load_parameter("stochastic", "tv_licence_evasion_rate", year) + first_time_buyer_rate = load_parameter("stochastic", "first_time_buyer_rate", year) # Household-level: TV ownership pe_household["household_owns_tv"] = ( @@ -963,9 +893,7 @@ def determine_education_level(fted_val, typeed2_val, age_val): pe_person["higher_earner_tie_break"] = generator.random(len(pe_person)) # Person-level: Private school attendance random draw - pe_person["attends_private_school_random_draw"] = generator.random( - len(pe_person) - ) + pe_person["attends_private_school_random_draw"] = generator.random(len(pe_person)) # Generate extended childcare hours usage values with mean 15.019 and sd # 4.972 @@ -974,9 +902,7 @@ def determine_education_level(fted_val, typeed2_val, age_val): extended_hours_values = np.clip(extended_hours_values, 0, 30) # Add the maximum extended childcare hours usage - pe_benunit["maximum_extended_childcare_hours_usage"] = ( - extended_hours_values - ) + pe_benunit["maximum_extended_childcare_hours_usage"] = extended_hours_values # Add marital status at the benefit unit level diff --git a/policyengine_uk_data/datasets/imputations/capital_gains.py b/policyengine_uk_data/datasets/imputations/capital_gains.py index 7408af5a6..65dfb9b8b 100644 --- a/policyengine_uk_data/datasets/imputations/capital_gains.py +++ b/policyengine_uk_data/datasets/imputations/capital_gains.py @@ -20,9 +20,9 @@ capital_gains = pd.read_csv( STORAGE_FOLDER / "capital_gains_distribution_advani_summers.csv.gz" ) -capital_gains["maximum_total_income"] = ( - capital_gains.minimum_total_income.shift(-1).fillna(np.inf) -) +capital_gains["maximum_total_income"] = capital_gains.minimum_total_income.shift( + -1 +).fillna(np.inf) # Silence verbose logging logging.getLogger("root").setLevel(logging.WARNING) @@ -82,9 +82,7 @@ def loss(blend_factor): sim.map_result(ti_in_range, "person", "household", how="sum") ) household_cg_in_income_range_count = torch.tensor( - sim.map_result( - cg_in_income_range, "person", "household", how="sum" - ) + sim.map_result(cg_in_income_range, "person", "household", how="sum") ) pred_ti_in_range = ( blended_household_weight * household_ti_in_range_count @@ -116,9 +114,9 @@ def loss(blend_factor): new_household_weight[first_half] = ( blend_factor * original_household_weight[first_half] ) - new_household_weight[~first_half] = ( - 1 - blend_factor - ) * original_household_weight[first_half] + new_household_weight[~first_half] = (1 - blend_factor) * original_household_weight[ + first_half + ] # Impute actual capital gains amounts given gains new_cg = np.zeros(len(ti)) diff --git a/policyengine_uk_data/datasets/imputations/consumption.py b/policyengine_uk_data/datasets/imputations/consumption.py index 34cbdc8bf..639e95d47 100644 --- a/policyengine_uk_data/datasets/imputations/consumption.py +++ b/policyengine_uk_data/datasets/imputations/consumption.py @@ -213,16 +213,14 @@ def impute_has_fuel_to_lcfs(household: pd.DataFrame) -> pd.DataFrame: output_df = model.predict(input_df) # Clip to [0, 1] as it's a probability - household["has_fuel_consumption"] = output_df[ - "has_fuel_consumption" - ].values.clip(0, 1) + household["has_fuel_consumption"] = output_df["has_fuel_consumption"].values.clip( + 0, 1 + ) return household -def generate_lcfs_table( - lcfs_person: pd.DataFrame, lcfs_household: pd.DataFrame -): +def generate_lcfs_table(lcfs_person: pd.DataFrame, lcfs_household: pd.DataFrame): """ Generate LCFS training table for consumption imputation. @@ -247,14 +245,10 @@ def generate_lcfs_table( # This bridges WAS (has vehicles) to LCFS (has fuel spending) household = impute_has_fuel_to_lcfs(household) - return household[ - PREDICTOR_VARIABLES + IMPUTATIONS + ["household_weight"] - ].dropna() + return household[PREDICTOR_VARIABLES + IMPUTATIONS + ["household_weight"]].dropna() -def uprate_lcfs_table( - household: pd.DataFrame, time_period: str -) -> pd.DataFrame: +def uprate_lcfs_table(household: pd.DataFrame, time_period: str) -> pd.DataFrame: from policyengine_uk.system import system start_period = 2021 @@ -262,9 +256,7 @@ def uprate_lcfs_table( household["petrol_spending"] *= fuel_uprating household["diesel_spending"] *= fuel_uprating - cpi = ( - system.parameters.gov.economic_assumptions.indices.obr.consumer_price_index - ) + cpi = system.parameters.gov.economic_assumptions.indices.obr.consumer_price_index cpi_uprating = cpi(time_period) / cpi(start_period) for variable in IMPUTATIONS: @@ -300,9 +292,7 @@ def save_imputation_models(): def create_consumption_model(overwrite_existing: bool = False): from policyengine_uk_data.utils.qrf import QRF - if ( - STORAGE_FOLDER / "consumption.pkl" - ).exists() and not overwrite_existing: + if (STORAGE_FOLDER / "consumption.pkl").exists() and not overwrite_existing: return QRF(file_path=STORAGE_FOLDER / "consumption.pkl") return save_imputation_models() diff --git a/policyengine_uk_data/datasets/imputations/salary_sacrifice.py b/policyengine_uk_data/datasets/imputations/salary_sacrifice.py index 7769f7a5c..ff7cf4e2b 100644 --- a/policyengine_uk_data/datasets/imputations/salary_sacrifice.py +++ b/policyengine_uk_data/datasets/imputations/salary_sacrifice.py @@ -62,9 +62,7 @@ def save_salary_sacrifice_model(): employment_income = sim.calculate("employment_income").values # Get SS amounts and indicator for who was asked - ss_amount = ( - dataset.person.pension_contributions_via_salary_sacrifice.values - ) + ss_amount = dataset.person.pension_contributions_via_salary_sacrifice.values if "salary_sacrifice_asked" not in dataset.person.columns: raise ValueError( "Dataset missing salary_sacrifice_asked field. " @@ -77,17 +75,13 @@ def save_salary_sacrifice_model(): training_mask = ss_asked == 1 if training_mask.sum() == 0: - raise ValueError( - "No training data found - no respondents were asked SALSAC." - ) + raise ValueError("No training data found - no respondents were asked SALSAC.") train_df = pd.DataFrame( { "age": age[training_mask], "employment_income": employment_income[training_mask], - "pension_contributions_via_salary_sacrifice": ss_amount[ - training_mask - ], + "pension_contributions_via_salary_sacrifice": ss_amount[training_mask], } ) @@ -141,9 +135,7 @@ def impute_salary_sacrifice( # Get variables needed for imputation age = sim.calculate("age").values employment_income = sim.calculate("employment_income").values - current_ss = ( - dataset.person.pension_contributions_via_salary_sacrifice.values - ) + current_ss = dataset.person.pension_contributions_via_salary_sacrifice.values # Get indicator for who was asked if "salary_sacrifice_asked" not in dataset.person.columns: @@ -164,9 +156,7 @@ def impute_salary_sacrifice( predictions = model.predict(pred_df) # Get imputed amounts (QRF predicts continuous values) - imputed_ss = predictions[ - "pension_contributions_via_salary_sacrifice" - ].values + imputed_ss = predictions["pension_contributions_via_salary_sacrifice"].values # Ensure non-negative imputed_ss = np.maximum(0, imputed_ss) @@ -187,9 +177,7 @@ def impute_salary_sacrifice( # 4.3mn below 2k). Donors keep their full employee pension amount # so those above 2k become above-cap records and the rest below-cap. person_weight = sim.calculate("person_weight").values - employee_pension = dataset.person[ - "employee_pension_contributions" - ].values.copy() + employee_pension = dataset.person["employee_pension_contributions"].values.copy() has_ss = final_ss > 0 # Donor pool: employed pension contributors not already SS users @@ -207,9 +195,7 @@ def impute_salary_sacrifice( if donor_weighted > 0: imputation_rate = min(0.5, shortfall / donor_weighted) rng = np.random.default_rng(seed=2024) - newly_imputed = is_donor & ( - rng.random(len(final_ss)) < imputation_rate - ) + newly_imputed = is_donor & (rng.random(len(final_ss)) < imputation_rate) # Move full employee pension to SS so the above/below # 2k split reflects the natural pension distribution diff --git a/policyengine_uk_data/datasets/imputations/services/etb.py b/policyengine_uk_data/datasets/imputations/services/etb.py index cc9e28fda..31678e42c 100644 --- a/policyengine_uk_data/datasets/imputations/services/etb.py +++ b/policyengine_uk_data/datasets/imputations/services/etb.py @@ -48,9 +48,7 @@ def create_public_services_model(overwrite_existing: bool = False) -> None: overwrite_existing: Whether to overwrite an existing model file. """ # Check if model already exists and we're not overwriting - if ( - STORAGE_FOLDER / "public_services.pkl" - ).exists() and not overwrite_existing: + if (STORAGE_FOLDER / "public_services.pkl").exists() and not overwrite_existing: return etb_path = STORAGE_FOLDER / "etb_1977_21" / "householdv2_1977-2021.tab" @@ -161,9 +159,7 @@ def create_efrs_input_dataset(dataset: UKSingleYearDataset) -> pd.DataFrame: df["count_primary_education"] = education == "PRIMARY" df["count_secondary_education"] = education == "LOWER_SECONDARY" - df["count_further_education"] = education.isin( - ["UPPER_SECONDARY", "TERTIARY"] - ) + df["count_further_education"] = education.isin(["UPPER_SECONDARY", "TERTIARY"]) df["hbai_household_net_income"] = ( df["hbai_household_net_income"] / df["household_count_people"] ) diff --git a/policyengine_uk_data/datasets/imputations/services/nhs.py b/policyengine_uk_data/datasets/imputations/services/nhs.py index fa25d7ca5..09e73b539 100644 --- a/policyengine_uk_data/datasets/imputations/services/nhs.py +++ b/policyengine_uk_data/datasets/imputations/services/nhs.py @@ -66,9 +66,9 @@ def get_age_bounds(age_group: str): + nhs[nhs["Lower age"] > 80].groupby(["Gender", "Service"]).sum() ).reset_index() - nhs[nhs["Lower age"] == 80][["Activity Count", "Total Cost"]] = ( - over_80_values[["Activity Count", "Total Cost"]] - ) + nhs[nhs["Lower age"] == 80][["Activity Count", "Total Cost"]] = over_80_values[ + ["Activity Count", "Total Cost"] + ] nhs = nhs[nhs["Lower age"] <= 80] nhs[nhs["Lower age"] == 80]["Upper age"] = 120 @@ -85,12 +85,8 @@ def get_age_bounds(age_group: str): ].household_weight.values.sum() nhs.loc[i, "Total people"] = count - nhs["Per-person average units"] = ( - nhs["Activity Count"] / nhs["Total people"] - ) - nhs["Per-person average spending"] = ( - nhs["Total Cost"] / nhs["Total people"] - ) + nhs["Per-person average units"] = nhs["Activity Count"] / nhs["Total people"] + nhs["Per-person average spending"] = nhs["Total Cost"] / nhs["Total people"] indirect_cost_adjustment_factor = ( 202e9 / nhs["Total Cost"].sum() ) # £202 billion 2025/26 budget diff --git a/policyengine_uk_data/datasets/imputations/services/services.py b/policyengine_uk_data/datasets/imputations/services/services.py index 22717e076..2b7892f2a 100644 --- a/policyengine_uk_data/datasets/imputations/services/services.py +++ b/policyengine_uk_data/datasets/imputations/services/services.py @@ -86,8 +86,6 @@ def impute_services( ] for person_imputations in visit_variables + spending_variables: - dataset.person[person_imputations] = input_data[ - person_imputations - ].values + dataset.person[person_imputations] = input_data[person_imputations].values return dataset diff --git a/policyengine_uk_data/datasets/local_areas/constituencies/boundary_changes/mapping_matrix.py b/policyengine_uk_data/datasets/local_areas/constituencies/boundary_changes/mapping_matrix.py index c493f92fb..457133881 100644 --- a/policyengine_uk_data/datasets/local_areas/constituencies/boundary_changes/mapping_matrix.py +++ b/policyengine_uk_data/datasets/local_areas/constituencies/boundary_changes/mapping_matrix.py @@ -15,9 +15,7 @@ # 4. Calculate proportion of old constituency's population in each new constituency # First, compute total population of each old constituency using groupby and transform -total_old_pop = df.groupby("code_2010")["old_population_present"].transform( - "sum" -) +total_old_pop = df.groupby("code_2010")["old_population_present"].transform("sum") # Then compute the proportion for each row df["proportion"] = df["old_population_present"] / total_old_pop diff --git a/policyengine_uk_data/datasets/local_areas/constituencies/calibrate.py b/policyengine_uk_data/datasets/local_areas/constituencies/calibrate.py index 6ea99677d..24aa3c302 100644 --- a/policyengine_uk_data/datasets/local_areas/constituencies/calibrate.py +++ b/policyengine_uk_data/datasets/local_areas/constituencies/calibrate.py @@ -73,9 +73,9 @@ def get_performance(weights, m_c, y_c, m_n, y_n, excluded_targets): constituency_target_validation["estimate"] - constituency_target_validation["target"] ) - constituency_target_validation["abs_error"] = ( - constituency_target_validation["error"].abs() - ) + constituency_target_validation["abs_error"] = constituency_target_validation[ + "error" + ].abs() constituency_target_validation["rel_abs_error"] = ( constituency_target_validation["abs_error"] / constituency_target_validation["target"] @@ -91,15 +91,11 @@ def get_performance(weights, m_c, y_c, m_n, y_n, excluded_targets): national_target_validation["target"] = national_actuals.values national_target_validation["error"] = ( - national_target_validation["estimate"] - - national_target_validation["target"] + national_target_validation["estimate"] - national_target_validation["target"] ) - national_target_validation["abs_error"] = national_target_validation[ - "error" - ].abs() + national_target_validation["abs_error"] = national_target_validation["error"].abs() national_target_validation["rel_abs_error"] = ( - national_target_validation["abs_error"] - / national_target_validation["target"] + national_target_validation["abs_error"] / national_target_validation["target"] ) df = pd.concat( diff --git a/policyengine_uk_data/datasets/local_areas/constituencies/loss.py b/policyengine_uk_data/datasets/local_areas/constituencies/loss.py index bad839f4b..3ea6e12a0 100644 --- a/policyengine_uk_data/datasets/local_areas/constituencies/loss.py +++ b/policyengine_uk_data/datasets/local_areas/constituencies/loss.py @@ -50,9 +50,7 @@ def create_constituency_target_matrix( # ── Income targets ───────────────────────────────────────────── incomes = get_constituency_income_targets() - national_incomes = get_national_income_projections( - int(dataset.time_period) - ) + national_incomes = get_national_income_projections(int(dataset.time_period)) for income_variable in INCOME_VARIABLES: income_values = sim.calculate(income_variable).values @@ -107,16 +105,10 @@ def create_constituency_target_matrix( children_per_hh = sim.map_result(is_child, "person", "household") on_uc_hh = sim.map_result(on_uc, "benunit", "household") > 0 - matrix["uc_hh_0_children"] = (on_uc_hh & (children_per_hh == 0)).astype( - float - ) + matrix["uc_hh_0_children"] = (on_uc_hh & (children_per_hh == 0)).astype(float) matrix["uc_hh_1_child"] = (on_uc_hh & (children_per_hh == 1)).astype(float) - matrix["uc_hh_2_children"] = (on_uc_hh & (children_per_hh == 2)).astype( - float - ) - matrix["uc_hh_3plus_children"] = ( - on_uc_hh & (children_per_hh >= 3) - ).astype(float) + matrix["uc_hh_2_children"] = (on_uc_hh & (children_per_hh == 2)).astype(float) + matrix["uc_hh_3plus_children"] = (on_uc_hh & (children_per_hh >= 3)).astype(float) uc_by_children = get_constituency_uc_by_children_targets() for col in uc_by_children.columns: diff --git a/policyengine_uk_data/datasets/local_areas/constituencies/targets/create_employment_incomes.py b/policyengine_uk_data/datasets/local_areas/constituencies/targets/create_employment_incomes.py index a502fa781..a99c8dc9d 100644 --- a/policyengine_uk_data/datasets/local_areas/constituencies/targets/create_employment_incomes.py +++ b/policyengine_uk_data/datasets/local_areas/constituencies/targets/create_employment_incomes.py @@ -131,33 +131,23 @@ def fill_missing_percentiles(row): # If this percentile is missing in the row if pd.isna(row[col]): # Find the closest lower and upper known percentiles - lower = max( - [p for p in known_percentiles if p < percentile], default=None - ) - upper = min( - [p for p in known_percentiles if p > percentile], default=None - ) + lower = max([p for p in known_percentiles if p < percentile], default=None) + upper = min([p for p in known_percentiles if p > percentile], default=None) # If both lower and upper bounds exist, interpolate if lower is not None and upper is not None: # Ratio between the target percentile and the lower bound - lower_ratio = ( - reference_values[percentile] / reference_values[lower] - ) + lower_ratio = reference_values[percentile] / reference_values[lower] row[col] = row[f"{lower} percentile"] * lower_ratio # If only the lower bound exists, extrapolate upwards elif lower is not None: - lower_ratio = ( - reference_values[percentile] / reference_values[lower] - ) + lower_ratio = reference_values[percentile] / reference_values[lower] row[col] = row[f"{lower} percentile"] * lower_ratio # If only the upper bound exists, extrapolate downwards elif upper is not None: - upper_ratio = ( - reference_values[percentile] / reference_values[upper] - ) + upper_ratio = reference_values[percentile] / reference_values[upper] row[col] = row[f"{upper} percentile"] * upper_ratio return row @@ -311,9 +301,7 @@ def calculate_band_population(row): # Ensure lower_percentile is less than upper_percentile if lower_percentile < upper_percentile: # Integrate to get proportion in this income band - proportion_in_band, _ = quad( - spline, lower_percentile, upper_percentile - ) + proportion_in_band, _ = quad(spline, lower_percentile, upper_percentile) proportion_in_band = proportion_in_band / spline( filtered_percentiles[-1] ) # Normalize by max spline value @@ -340,9 +328,7 @@ def calculate_band_population(row): income_bands, columns=["income_lower_bound", "income_upper_bound"] ) band_df["population_count"] = band_population_counts - band_df["parliamentary constituency 2010"] = row[ - "parliamentary constituency 2010" - ] + band_df["parliamentary constituency 2010"] = row["parliamentary constituency 2010"] band_df["constituency_code"] = row["constituency_code"] return band_df @@ -435,9 +421,7 @@ def calculate_band_population(row): import numpy as np -def find_and_replace_zero_populations( - result_df_copy, total_income -) -> pd.DataFrame: +def find_and_replace_zero_populations(result_df_copy, total_income) -> pd.DataFrame: # Step 1: Find constituencies with all zero populations constituencies_with_zero_population = ( result_df_copy.groupby("constituency_code") @@ -480,15 +464,12 @@ def find_and_replace_zero_populations( # Calculate absolute differences differences = np.abs( - other_constituencies["total_income_count"] - - current_total_income + other_constituencies["total_income_count"] - current_total_income ) # Get the index of the minimum difference min_diff_idx = differences.values.argmin() - nearest_constituency = other_constituencies.iloc[min_diff_idx][ - "code" - ] + nearest_constituency = other_constituencies.iloc[min_diff_idx]["code"] # Step 3: Copy population and earnings data from nearest constituency # For each income band of the zero constituency @@ -543,9 +524,7 @@ def find_and_replace_zero_populations( ].values[0] except Exception as e: - print( - f"Error processing constituency {zero_constituency}: {str(e)}" - ) + print(f"Error processing constituency {zero_constituency}: {str(e)}") continue return result_df diff --git a/policyengine_uk_data/datasets/local_areas/constituencies/targets/fill_missing_age_demographics.py b/policyengine_uk_data/datasets/local_areas/constituencies/targets/fill_missing_age_demographics.py index b95d79385..9bacfb71a 100644 --- a/policyengine_uk_data/datasets/local_areas/constituencies/targets/fill_missing_age_demographics.py +++ b/policyengine_uk_data/datasets/local_areas/constituencies/targets/fill_missing_age_demographics.py @@ -29,9 +29,7 @@ missing_constituencies = pd.DataFrame( { "code": missing_constituencies.values, - "name": incomes.set_index("code") - .loc[missing_constituencies] - .name.values, + "name": incomes.set_index("code").loc[missing_constituencies].name.values, } ) for col in ages.columns[2:]: diff --git a/policyengine_uk_data/datasets/local_areas/local_authorities/calibrate.py b/policyengine_uk_data/datasets/local_areas/local_authorities/calibrate.py index 588f29555..746d94e73 100644 --- a/policyengine_uk_data/datasets/local_areas/local_authorities/calibrate.py +++ b/policyengine_uk_data/datasets/local_areas/local_authorities/calibrate.py @@ -18,12 +18,8 @@ def calibrate( ): return calibrate_local_areas( dataset=dataset, - matrix_fn=lambda ds: create_local_authority_target_matrix( - ds, ds.time_period - ), - national_matrix_fn=lambda ds: create_national_target_matrix( - ds, ds.time_period - ), + matrix_fn=lambda ds: create_local_authority_target_matrix(ds, ds.time_period), + national_matrix_fn=lambda ds: create_national_target_matrix(ds, ds.time_period), area_count=360, weight_file="local_authority_weights.h5", excluded_training_targets=excluded_training_targets, @@ -37,9 +33,7 @@ def calibrate( def get_performance(weights, m_c, y_c, m_n, y_n, excluded_targets): la_target_matrix, la_actuals = m_c, y_c national_target_matrix, national_actuals = m_n, y_n - local_authorities = pd.read_csv( - STORAGE_FOLDER / "local_authorities_2021.csv" - ) + local_authorities = pd.read_csv(STORAGE_FOLDER / "local_authorities_2021.csv") la_wide = weights @ la_target_matrix la_wide.index = local_authorities.code.values la_wide["name"] = local_authorities.name.values @@ -93,15 +87,11 @@ def get_performance(weights, m_c, y_c, m_n, y_n, excluded_targets): national_target_validation["target"] = national_actuals.values national_target_validation["error"] = ( - national_target_validation["estimate"] - - national_target_validation["target"] + national_target_validation["estimate"] - national_target_validation["target"] ) - national_target_validation["abs_error"] = national_target_validation[ - "error" - ].abs() + national_target_validation["abs_error"] = national_target_validation["error"].abs() national_target_validation["rel_abs_error"] = ( - national_target_validation["abs_error"] - / national_target_validation["target"] + national_target_validation["abs_error"] / national_target_validation["target"] ) df = pd.concat( diff --git a/policyengine_uk_data/datasets/local_areas/local_authorities/loss.py b/policyengine_uk_data/datasets/local_areas/local_authorities/loss.py index 1b4e113e3..446329baf 100644 --- a/policyengine_uk_data/datasets/local_areas/local_authorities/loss.py +++ b/policyengine_uk_data/datasets/local_areas/local_authorities/loss.py @@ -60,9 +60,7 @@ def create_local_authority_target_matrix( # ── Income targets ───────────────────────────────────────────── incomes = get_la_income_targets() - national_incomes = get_national_income_projections( - int(dataset.time_period) - ) + national_incomes = get_national_income_projections(int(dataset.time_period)) for income_variable in INCOME_VARIABLES: income_values = sim.calculate(income_variable).values @@ -127,9 +125,7 @@ def create_local_authority_target_matrix( ) hbai_net_income = sim.calculate("equiv_hbai_household_net_income").values - hbai_net_income_ahc = sim.calculate( - "equiv_hbai_household_net_income_ahc" - ).values + hbai_net_income_ahc = sim.calculate("equiv_hbai_household_net_income_ahc").values housing_costs = hbai_net_income - hbai_net_income_ahc matrix["ons/equiv_net_income_bhc"] = hbai_net_income @@ -194,15 +190,11 @@ def create_local_authority_target_matrix( ) tenure_type = sim.calculate("tenure_type").values - matrix["tenure/owned_outright"] = (tenure_type == "OWNED_OUTRIGHT").astype( - float - ) - matrix["tenure/owned_mortgage"] = ( - tenure_type == "OWNED_WITH_MORTGAGE" - ).astype(float) - matrix["tenure/private_rent"] = (tenure_type == "RENT_PRIVATELY").astype( + matrix["tenure/owned_outright"] = (tenure_type == "OWNED_OUTRIGHT").astype(float) + matrix["tenure/owned_mortgage"] = (tenure_type == "OWNED_WITH_MORTGAGE").astype( float ) + matrix["tenure/private_rent"] = (tenure_type == "RENT_PRIVATELY").astype(float) matrix["tenure/social_rent"] = ( (tenure_type == "RENT_FROM_COUNCIL") | (tenure_type == "RENT_FROM_HA") ).astype(float) @@ -219,9 +211,7 @@ def create_local_authority_target_matrix( ("social_rent", "social_rent_pct"), ]: targets = tenure_merged[pct_col] / 100 * tenure_merged["households"] - national = ( - original_weights * matrix[f"tenure/{tenure_key}"].values - ).sum() + national = (original_weights * matrix[f"tenure/{tenure_key}"].values).sum() y[f"tenure/{tenure_key}"] = np.where( has_tenure, targets.values, national * la_household_share ) diff --git a/policyengine_uk_data/datasets/local_areas/local_authorities/targets/create_employment_incomes.py b/policyengine_uk_data/datasets/local_areas/local_authorities/targets/create_employment_incomes.py index 4e34dd3b8..3f789442e 100644 --- a/policyengine_uk_data/datasets/local_areas/local_authorities/targets/create_employment_incomes.py +++ b/policyengine_uk_data/datasets/local_areas/local_authorities/targets/create_employment_incomes.py @@ -131,33 +131,23 @@ def fill_missing_percentiles(row): # If this percentile is missing in the row if pd.isna(row[col]): # Find the closest lower and upper known percentiles - lower = max( - [p for p in known_percentiles if p < percentile], default=None - ) - upper = min( - [p for p in known_percentiles if p > percentile], default=None - ) + lower = max([p for p in known_percentiles if p < percentile], default=None) + upper = min([p for p in known_percentiles if p > percentile], default=None) # If both lower and upper bounds exist, interpolate if lower is not None and upper is not None: # Ratio between the target percentile and the lower bound - lower_ratio = ( - reference_values[percentile] / reference_values[lower] - ) + lower_ratio = reference_values[percentile] / reference_values[lower] row[col] = row[f"{lower} percentile"] * lower_ratio # If only the lower bound exists, extrapolate upwards elif lower is not None: - lower_ratio = ( - reference_values[percentile] / reference_values[lower] - ) + lower_ratio = reference_values[percentile] / reference_values[lower] row[col] = row[f"{lower} percentile"] * lower_ratio # If only the upper bound exists, extrapolate downwards elif upper is not None: - upper_ratio = ( - reference_values[percentile] / reference_values[upper] - ) + upper_ratio = reference_values[percentile] / reference_values[upper] row[col] = row[f"{upper} percentile"] * upper_ratio return row @@ -280,9 +270,9 @@ def calculate_band_population(row): income_bands, columns=["income_lower_bound", "income_upper_bound"] ) band_df["population_count"] = [0] * len(income_bands) - band_df["local authority: district / unitary (as of April 2023)"] = ( - row["local authority: district / unitary (as of April 2023)"] - ) + band_df["local authority: district / unitary (as of April 2023)"] = row[ + "local authority: district / unitary (as of April 2023)" + ] band_df["LA_code"] = row["LA_code"] return band_df @@ -311,9 +301,7 @@ def calculate_band_population(row): # Ensure lower_percentile is less than upper_percentile if lower_percentile < upper_percentile: # Integrate to get proportion in this income band - proportion_in_band, _ = quad( - spline, lower_percentile, upper_percentile - ) + proportion_in_band, _ = quad(spline, lower_percentile, upper_percentile) proportion_in_band = proportion_in_band / spline( filtered_percentiles[-1] ) # Normalize by max spline value @@ -435,15 +423,11 @@ def calculate_band_population(row): import numpy as np -def find_and_replace_zero_populations( - result_df_copy, total_income -) -> pd.DataFrame: +def find_and_replace_zero_populations(result_df_copy, total_income) -> pd.DataFrame: # Step 1: Find local authorities with all zero populations LA_with_zero_population = ( result_df_copy.groupby("LA_code") - .filter(lambda group: (group["population_count"] == 0).all())[ - "LA_code" - ] + .filter(lambda group: (group["population_count"] == 0).all())["LA_code"] .unique() ) @@ -462,26 +446,18 @@ def find_and_replace_zero_populations( ) continue - current_total_income = current_LA_data[ - "total_income_count" - ].values[0] + current_total_income = current_LA_data["total_income_count"].values[0] # Find the nearest local authority by total_income_count # Exclude both the current local authority and other zero population local authorities - other_LA = total_income[ - ~total_income["code"].isin(LA_with_zero_population) - ] + other_LA = total_income[~total_income["code"].isin(LA_with_zero_population)] if other_LA.empty: - print( - f"Warning: No valid local authorities found to copy from" - ) + print(f"Warning: No valid local authorities found to copy from") continue # Calculate absolute differences - differences = np.abs( - other_LA["total_income_count"] - current_total_income - ) + differences = np.abs(other_LA["total_income_count"] - current_total_income) # Get the index of the minimum difference min_diff_idx = differences.values.argmin() diff --git a/policyengine_uk_data/datasets/spi.py b/policyengine_uk_data/datasets/spi.py index 253b14554..f0c08626c 100644 --- a/policyengine_uk_data/datasets/spi.py +++ b/policyengine_uk_data/datasets/spi.py @@ -76,19 +76,14 @@ def create_spi( percent_along_age_range = np.random.rand(len(df)) min_age = np.array([AGE_RANGES[age][0] for age in age_range]) max_age = np.array([AGE_RANGES[age][1] for age in age_range]) - person["age"] = ( - min_age + (max_age - min_age) * percent_along_age_range - ).astype(int) + person["age"] = (min_age + (max_age - min_age) * percent_along_age_range).astype( + int + ) person["state_pension_reported"] = df.SRP person["other_tax_credits"] = df.TAX_CRED person["miscellaneous_income"] = ( - df.MOTHINC - + df.INCPBEN - + df.OSSBEN - + df.TAXTERM - + df.UBISJA - + df.OTHERINC + df.MOTHINC + df.INCPBEN + df.OSSBEN + df.TAXTERM + df.UBISJA + df.OTHERINC ) person["gift_aid"] = df.GIFTAID + df.GIFTINV person["other_investment_income"] = df.OTHERINV diff --git a/policyengine_uk_data/parameters/__init__.py b/policyengine_uk_data/parameters/__init__.py index dcf981ba4..06f3b5582 100644 --- a/policyengine_uk_data/parameters/__init__.py +++ b/policyengine_uk_data/parameters/__init__.py @@ -11,9 +11,7 @@ PARAMETERS_DIR = Path(__file__).parent -def load_parameter( - category: str, variable_name: str, year: int = 2015 -) -> float: +def load_parameter(category: str, variable_name: str, year: int = 2015) -> float: """Load parameter from YAML files in a specific category. Args: @@ -48,9 +46,7 @@ def load_parameter( break if applicable_value is None: - raise ValueError( - f"No value found for {category}/{variable_name} in {year}" - ) + raise ValueError(f"No value found for {category}/{variable_name} in {year}") return applicable_value diff --git a/policyengine_uk_data/targets/build_loss_matrix.py b/policyengine_uk_data/targets/build_loss_matrix.py index 8af391a3d..3358a646b 100644 --- a/policyengine_uk_data/targets/build_loss_matrix.py +++ b/policyengine_uk_data/targets/build_loss_matrix.py @@ -161,9 +161,7 @@ def pe(self, variable: str): """Calculate variable mapped to household level.""" key = ("pe", variable) if key not in self._cache: - self._cache[key] = self.sim.calculate( - variable, map_to="household" - ).values + self._cache[key] = self.sim.calculate(variable, map_to="household").values return self._cache[key] def pe_person(self, variable: str): @@ -194,9 +192,7 @@ def household_from_family(self, values): @property def region(self): if "region" not in self._cache: - self._cache["region"] = self.sim.calculate( - "region", map_to="person" - ) + self._cache["region"] = self.sim.calculate("region", map_to="person") return self._cache["region"] @property @@ -225,9 +221,7 @@ def counterfactual_sim(self): if "counterfactual_sim" not in self._cache: from policyengine_uk import Microsimulation - ss = self.sim.calculate( - "pension_contributions_via_salary_sacrifice" - ) + ss = self.sim.calculate("pension_contributions_via_salary_sacrifice") emp = self.sim.calculate("employment_income") cf_sim = Microsimulation(dataset=self.dataset, reform=self.reform) cf_sim.set_input( @@ -247,9 +241,7 @@ def counterfactual_sim(self): # ── Column computation dispatch ────────────────────────────────────── -def _compute_column( - target: Target, ctx: _SimContext, year: int -) -> np.ndarray | None: +def _compute_column(target: Target, ctx: _SimContext, year: int) -> np.ndarray | None: """Compute the household-level column for a target. Dispatches to domain-specific compute modules. diff --git a/policyengine_uk_data/targets/compute/benefits.py b/policyengine_uk_data/targets/compute/benefits.py index 140eab68f..0a6d291be 100644 --- a/policyengine_uk_data/targets/compute/benefits.py +++ b/policyengine_uk_data/targets/compute/benefits.py @@ -17,9 +17,7 @@ def compute_benefit_cap(target, ctx) -> np.ndarray: return ctx.sim.calculate( "benefit_cap_reduction", map_to="household" ).values.astype(float) - reduction = ctx.sim.calculate( - "benefit_cap_reduction", map_to="household" - ).values + reduction = ctx.sim.calculate("benefit_cap_reduction", map_to="household").values return (reduction > 0).astype(float) @@ -29,9 +27,7 @@ def compute_scotland_uc_child(target, ctx) -> np.ndarray: on_uc = ctx.household_from_family(uc > 0) > 0 child_u1 = ctx.pe_person("is_child") & (ctx.age < 1) has_child_u1 = ctx.household_from_person(child_u1) > 0 - return ( - (ctx.household_region == "SCOTLAND") & on_uc & has_child_u1 - ).astype(float) + return ((ctx.household_region == "SCOTLAND") & on_uc & has_child_u1).astype(float) def compute_uc_by_children(target, ctx) -> np.ndarray: @@ -74,9 +70,7 @@ def ft_hh(value): if ft_str == "single_no_children": match = ft_hh("SINGLE") & (children_per_hh == 0) elif ft_str == "single_with_children": - match = (ft_hh("SINGLE") | ft_hh("LONE_PARENT")) & ( - children_per_hh > 0 - ) + match = (ft_hh("SINGLE") | ft_hh("LONE_PARENT")) & (children_per_hh > 0) elif ft_str == "couple_no_children": match = ft_hh("COUPLE_NO_CHILDREN") elif ft_str == "couple_with_children": @@ -95,15 +89,11 @@ def compute_uc_payment_dist(target, ctx) -> np.ndarray: lower = target.lower_bound upper = target.upper_bound - uc_payments = ctx.sim.calculate( - "universal_credit", map_to="benunit" - ).values + uc_payments = ctx.sim.calculate("universal_credit", map_to="benunit").values uc_family_type = ctx.sim.calculate("family_type", map_to="benunit").values in_band = ( - (uc_payments >= lower) - & (uc_payments < upper) - & (uc_family_type == family_type) + (uc_payments >= lower) & (uc_payments < upper) & (uc_family_type == family_type) ) return ctx.household_from_family(in_band) @@ -113,9 +103,7 @@ def compute_uc_jobseeker(target, ctx) -> np.ndarray: family = ctx.sim.populations["benunit"] uc = ctx.sim.calculate("universal_credit") on_uc = uc > 0 - unemployed = family.any( - ctx.sim.calculate("employment_status") == "UNEMPLOYED" - ) + unemployed = family.any(ctx.sim.calculate("employment_status") == "UNEMPLOYED") if "non_jobseekers" in target.name: mask = on_uc * ~unemployed @@ -164,17 +152,13 @@ def compute_two_child_limit(target, ctx) -> np.ndarray | None: if name == "dwp/uc/two_child_limit/children_affected": return children_in_capped if name == "dwp/uc/two_child_limit/children_in_affected_households": - total_children = sim.map_result( - is_child * child_in_uc, "person", "household" - ) + total_children = sim.map_result(is_child * child_in_uc, "person", "household") return total_children * capped_hh if "_children_households_total_children" in name: n = int(name.split("/")[-1].split("_")[0]) children_count = sim.map_result(is_child, "person", "household") - return (capped_hh * (children_count == n) * children_count).astype( - float - ) + return (capped_hh * (children_count == n) * children_count).astype(float) if "_children_households" in name and "total" not in name: n = int(name.split("/")[-1].split("_")[0]) children_count = sim.map_result(is_child, "person", "household") diff --git a/policyengine_uk_data/targets/compute/demographics.py b/policyengine_uk_data/targets/compute/demographics.py index 670b80723..4d0b80923 100644 --- a/policyengine_uk_data/targets/compute/demographics.py +++ b/policyengine_uk_data/targets/compute/demographics.py @@ -33,9 +33,7 @@ def compute_regional_age(target, ctx) -> np.ndarray | None: return None person_match = ( - (ctx.region.values == pe_region) - & (ctx.age >= lower) - & (ctx.age <= upper) + (ctx.region.values == pe_region) & (ctx.age >= lower) & (ctx.age <= upper) ) return ctx.household_from_person(person_match) @@ -73,7 +71,7 @@ def compute_scotland_demographics(target, ctx) -> np.ndarray | None: if name == "ons/scotland_households_3plus_children": is_child = ctx.pe_person("is_child") children_per_hh = ctx.household_from_person(is_child) - return ( - (ctx.household_region == "SCOTLAND") & (children_per_hh >= 3) - ).astype(float) + return ((ctx.household_region == "SCOTLAND") & (children_per_hh >= 3)).astype( + float + ) return None diff --git a/policyengine_uk_data/targets/compute/households.py b/policyengine_uk_data/targets/compute/households.py index be7686ac8..be51f2d01 100644 --- a/policyengine_uk_data/targets/compute/households.py +++ b/policyengine_uk_data/targets/compute/households.py @@ -15,18 +15,18 @@ def ft_hh(value): return ctx.household_from_family(ft == value) > 0 if name == "lone_households_under_65": - return ( - ft_hh("SINGLE") & (children_per_hh == 0) & (age_hh_head < 65) - ).astype(float) + return (ft_hh("SINGLE") & (children_per_hh == 0) & (age_hh_head < 65)).astype( + float + ) if name == "lone_households_over_65": - return ( - ft_hh("SINGLE") & (children_per_hh == 0) & (age_hh_head >= 65) - ).astype(float) + return (ft_hh("SINGLE") & (children_per_hh == 0) & (age_hh_head >= 65)).astype( + float + ) if name == "unrelated_adult_households": people_per_hh = ctx.household_from_person(np.ones_like(is_child)) - return ( - ft_hh("SINGLE") & (children_per_hh == 0) & (people_per_hh > 1) - ).astype(float) + return (ft_hh("SINGLE") & (children_per_hh == 0) & (people_per_hh > 1)).astype( + float + ) if name == "couple_no_children_households": return ft_hh("COUPLE_NO_CHILDREN").astype(float) if name == "couple_under_3_children_households": @@ -36,14 +36,10 @@ def ft_hh(value): & (children_per_hh <= 2) ).astype(float) if name == "couple_3_plus_children_households": - return (ft_hh("COUPLE_WITH_CHILDREN") & (children_per_hh >= 3)).astype( - float - ) + return (ft_hh("COUPLE_WITH_CHILDREN") & (children_per_hh >= 3)).astype(float) if name == "couple_non_dependent_children_only_households": people_per_hh = ctx.household_from_person(np.ones_like(is_child)) - return (ft_hh("COUPLE_NO_CHILDREN") & (people_per_hh > 2)).astype( - float - ) + return (ft_hh("COUPLE_NO_CHILDREN") & (people_per_hh > 2)).astype(float) if name == "lone_parent_dependent_children_households": return (ft_hh("LONE_PARENT") & (children_per_hh > 0)).astype(float) if name == "lone_parent_non_dependent_children_households": diff --git a/policyengine_uk_data/targets/compute/income.py b/policyengine_uk_data/targets/compute/income.py index c2f286c40..1953be0ee 100644 --- a/policyengine_uk_data/targets/compute/income.py +++ b/policyengine_uk_data/targets/compute/income.py @@ -12,9 +12,7 @@ def compute_income_band(target, ctx) -> np.ndarray: upper = target.upper_bound income_df = ctx.sim.calculate_dataframe(["total_income", variable]) - in_band = (income_df.total_income >= lower) & ( - income_df.total_income < upper - ) + in_band = (income_df.total_income >= lower) & (income_df.total_income < upper) if target.is_count: return ctx.household_from_person((income_df[variable] > 0) * in_band) @@ -39,9 +37,7 @@ def compute_ss_it_relief(target, ctx) -> np.ndarray: name = target.name if "basic" in name: - mask = (adj_net_income_cf > basic_thresh) & ( - adj_net_income_cf <= higher_thresh - ) + mask = (adj_net_income_cf > basic_thresh) & (adj_net_income_cf <= higher_thresh) elif "higher" in name: mask = (adj_net_income_cf > higher_thresh) & ( adj_net_income_cf <= additional_thresh @@ -65,14 +61,10 @@ def compute_ss_ni_relief(target, ctx) -> np.ndarray: name = target.name if "employee" in name: ni_base = ctx.sim.calculate("ni_employee") - ni_cf = ctx.counterfactual_sim.calculate( - "ni_employee", ctx.time_period - ) + ni_cf = ctx.counterfactual_sim.calculate("ni_employee", ctx.time_period) else: ni_base = ctx.sim.calculate("ni_employer") - ni_cf = ctx.counterfactual_sim.calculate( - "ni_employer", ctx.time_period - ) + ni_cf = ctx.counterfactual_sim.calculate("ni_employer", ctx.time_period) return ctx.household_from_person(ni_cf - ni_base) @@ -90,9 +82,7 @@ def compute_ss_headcount(target, ctx) -> np.ndarray: "Variable" ) row = "pension_contributions_via_salary_sacrifice" - price_adj = ( - uprating.loc[row, "2023"] / uprating.loc[row, str(ctx.time_period)] - ) + price_adj = uprating.loc[row, "2023"] / uprating.loc[row, str(ctx.time_period)] ss_base = ss * price_adj name = target.name diff --git a/policyengine_uk_data/targets/registry.py b/policyengine_uk_data/targets/registry.py index ebd0af4f1..b4c971084 100644 --- a/policyengine_uk_data/targets/registry.py +++ b/policyengine_uk_data/targets/registry.py @@ -25,9 +25,7 @@ def discover_source_modules() -> list: modules = [] package_path = Path(sources_pkg.__file__).parent for importer, modname, ispkg in pkgutil.iter_modules([str(package_path)]): - mod = importlib.import_module( - f"policyengine_uk_data.targets.sources.{modname}" - ) + mod = importlib.import_module(f"policyengine_uk_data.targets.sources.{modname}") if hasattr(mod, "get_targets"): modules.append(mod) return modules diff --git a/policyengine_uk_data/targets/sources/_common.py b/policyengine_uk_data/targets/sources/_common.py index 69fb69887..110bc8195 100644 --- a/policyengine_uk_data/targets/sources/_common.py +++ b/policyengine_uk_data/targets/sources/_common.py @@ -9,7 +9,7 @@ HEADERS = { "User-Agent": ( - "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7)" " AppleWebKit/537.36" + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36" ), } diff --git a/policyengine_uk_data/targets/sources/hmrc_salary_sacrifice.py b/policyengine_uk_data/targets/sources/hmrc_salary_sacrifice.py index 97ec499a7..f9865e6f7 100644 --- a/policyengine_uk_data/targets/sources/hmrc_salary_sacrifice.py +++ b/policyengine_uk_data/targets/sources/hmrc_salary_sacrifice.py @@ -32,9 +32,7 @@ def get_targets() -> list[Target]: targets = [] try: - r = requests.get( - ref, headers=HEADERS, allow_redirects=True, timeout=30 - ) + r = requests.get(ref, headers=HEADERS, allow_redirects=True, timeout=30) r.raise_for_status() df = pd.read_csv(io.StringIO(r.content.decode("utf-8-sig"))) @@ -108,14 +106,11 @@ def get_targets() -> list[Target]: ) except Exception as e: - logger.error( - "Failed to download/parse HMRC salary sacrifice CSV: %s", e - ) + logger.error("Failed to download/parse HMRC salary sacrifice CSV: %s", e) # Total salary sacrifice contributions (SPP Review 2025: £24bn base) _SS_CONTRIBUTIONS = { - y: 24e9 * _GROWTH ** max(0, y - _BASE_YEAR) - for y in range(_BASE_YEAR, 2030) + y: 24e9 * _GROWTH ** max(0, y - _BASE_YEAR) for y in range(_BASE_YEAR, 2030) } targets.append( Target( diff --git a/policyengine_uk_data/targets/sources/hmrc_spi.py b/policyengine_uk_data/targets/sources/hmrc_spi.py index de993d493..72d792e92 100644 --- a/policyengine_uk_data/targets/sources/hmrc_spi.py +++ b/policyengine_uk_data/targets/sources/hmrc_spi.py @@ -154,9 +154,7 @@ def get_targets() -> list[Target]: for idx, row in merged.iterrows(): lower = int(row["lower_bound"]) - upper = ( - _BAND_UPPER[idx] if idx < len(_BAND_UPPER) else float("inf") - ) + upper = _BAND_UPPER[idx] if idx < len(_BAND_UPPER) else float("inf") band_label = f"{lower:_}_to_{upper:_}" for variable in INCOME_VARIABLES: @@ -182,9 +180,7 @@ def get_targets() -> list[Target]: if count_col in row.index and row[count_col] > 0: targets.append( Target( - name=( - f"hmrc/{variable}_count_income_band_{band_label}" - ), + name=(f"hmrc/{variable}_count_income_band_{band_label}"), variable=variable, source="hmrc_spi", unit=Unit.COUNT, diff --git a/policyengine_uk_data/targets/sources/local_age.py b/policyengine_uk_data/targets/sources/local_age.py index 0f74bc446..f48799772 100644 --- a/policyengine_uk_data/targets/sources/local_age.py +++ b/policyengine_uk_data/targets/sources/local_age.py @@ -17,16 +17,8 @@ logger = logging.getLogger(__name__) -_CONST_DIR = ( - STORAGE.parent / "datasets" / "local_areas" / "constituencies" / "targets" -) -_LA_DIR = ( - STORAGE.parent - / "datasets" - / "local_areas" - / "local_authorities" - / "targets" -) +_CONST_DIR = STORAGE.parent / "datasets" / "local_areas" / "constituencies" / "targets" +_LA_DIR = STORAGE.parent / "datasets" / "local_areas" / "local_authorities" / "targets" _REF = ( "https://www.ons.gov.uk/peoplepopulationandcommunity/" diff --git a/policyengine_uk_data/targets/sources/local_income.py b/policyengine_uk_data/targets/sources/local_income.py index 695f790c9..8fd7adb32 100644 --- a/policyengine_uk_data/targets/sources/local_income.py +++ b/policyengine_uk_data/targets/sources/local_income.py @@ -19,16 +19,8 @@ logger = logging.getLogger(__name__) -_CONST_DIR = ( - STORAGE.parent / "datasets" / "local_areas" / "constituencies" / "targets" -) -_LA_DIR = ( - STORAGE.parent - / "datasets" - / "local_areas" - / "local_authorities" - / "targets" -) +_CONST_DIR = STORAGE.parent / "datasets" / "local_areas" / "constituencies" / "targets" +_LA_DIR = STORAGE.parent / "datasets" / "local_areas" / "local_authorities" / "targets" _REF = ( "https://www.gov.uk/government/statistics/" diff --git a/policyengine_uk_data/targets/sources/local_la_extras.py b/policyengine_uk_data/targets/sources/local_la_extras.py index 76bcf06d8..6bc20b6ff 100644 --- a/policyengine_uk_data/targets/sources/local_la_extras.py +++ b/policyengine_uk_data/targets/sources/local_la_extras.py @@ -28,9 +28,7 @@ "earningsandworkinghours/datasets/" "smallareaincomeestimatesformiddlelayersuperoutputareasenglandandwales" ) -_REF_TENURE = ( - "https://www.gov.uk/government/statistics/english-housing-survey-2023" -) +_REF_TENURE = "https://www.gov.uk/government/statistics/english-housing-survey-2023" _REF_RENT = ( "https://www.ons.gov.uk/peoplepopulationandcommunity/housing/datasets/" "privaterentalmarketsummarystatisticsinengland" diff --git a/policyengine_uk_data/targets/sources/obr.py b/policyengine_uk_data/targets/sources/obr.py index c4f92db35..3268f4282 100644 --- a/policyengine_uk_data/targets/sources/obr.py +++ b/policyengine_uk_data/targets/sources/obr.py @@ -45,9 +45,7 @@ def _download_workbook(url: str) -> openpyxl.Workbook: return openpyxl.load_workbook(io.BytesIO(r.content), data_only=False) -def _read_row_values( - ws, row_num: int, col_letters: list[str] -) -> dict[int, float]: +def _read_row_values(ws, row_num: int, col_letters: list[str]) -> dict[int, float]: """Read numeric values from a row, mapped to calendar years.""" result = {} for col in col_letters: @@ -353,9 +351,7 @@ def read_49(row_num: int) -> dict[int, float]: # Find the second UC row (outside cap section) for row in range(uc_outside_row + 1, 55): cell_val = ws[f"B{row}"].value - if cell_val and str(cell_val).strip().startswith( - "Universal credit" - ): + if cell_val and str(cell_val).strip().startswith("Universal credit"): values = read_49(row) if values: targets.append( @@ -427,27 +423,20 @@ def _parse_tv_licence(wb: openpyxl.Workbook) -> list[Target]: _PRIVATE_SCHOOL = {y: 557_000 for y in range(2018, 2032)} # SPP Review: salary sacrifice NI relief (uprated 3% pa from 2024 base) -_SS_EMPLOYEE_NI = { - y: 1.2e9 * 1.03 ** max(0, y - 2024) for y in range(2024, 2032) -} -_SS_EMPLOYER_NI = { - y: 2.9e9 * 1.03 ** max(0, y - 2024) for y in range(2024, 2032) -} +_SS_EMPLOYEE_NI = {y: 1.2e9 * 1.03 ** max(0, y - 2024) for y in range(2024, 2032)} +_SS_EMPLOYER_NI = {y: 2.9e9 * 1.03 ** max(0, y - 2024) for y in range(2024, 2032)} # Salary sacrifice headcount: 7.7m total (3.3m above £2k, 4.3m below) # OBR para 1.7: SS population grows 0.9% faster than employees (~2.4%/yr) _SS_HEADCOUNT_GROWTH = 1.024 _SS_TOTAL_USERS = { - y: 7_700_000 * _SS_HEADCOUNT_GROWTH ** max(0, y - 2024) - for y in range(2024, 2032) + y: 7_700_000 * _SS_HEADCOUNT_GROWTH ** max(0, y - 2024) for y in range(2024, 2032) } _SS_BELOW_CAP_USERS = { - y: 4_300_000 * _SS_HEADCOUNT_GROWTH ** max(0, y - 2024) - for y in range(2024, 2032) + y: 4_300_000 * _SS_HEADCOUNT_GROWTH ** max(0, y - 2024) for y in range(2024, 2032) } _SS_ABOVE_CAP_USERS = { - y: 3_300_000 * _SS_HEADCOUNT_GROWTH ** max(0, y - 2024) - for y in range(2024, 2032) + y: 3_300_000 * _SS_HEADCOUNT_GROWTH ** max(0, y - 2024) for y in range(2024, 2032) } diff --git a/policyengine_uk_data/targets/sources/ons_demographics.py b/policyengine_uk_data/targets/sources/ons_demographics.py index a51adf39b..dba77671d 100644 --- a/policyengine_uk_data/targets/sources/ons_demographics.py +++ b/policyengine_uk_data/targets/sources/ons_demographics.py @@ -78,9 +78,7 @@ @lru_cache(maxsize=1) def _download_uk_projection() -> pd.DataFrame: """Download and parse the UK principal population projection.""" - r = requests.get( - _UK_ZIP_URL, headers=HEADERS, allow_redirects=True, timeout=120 - ) + r = requests.get(_UK_ZIP_URL, headers=HEADERS, allow_redirects=True, timeout=120) r.raise_for_status() z = zipfile.ZipFile(io.BytesIO(r.content)) with z.open("uk/uk_ppp_machine_readable.xlsx") as f: @@ -182,9 +180,7 @@ def _parse_regional_from_csv() -> list[Target]: for _, row in demographics.iterrows(): name = row["name"] - if name in _SKIP_NAMES or any( - name.startswith(p) for p in _SKIP_PREFIXES - ): + if name in _SKIP_NAMES or any(name.startswith(p) for p in _SKIP_PREFIXES): continue values = {} for y in _YEARS: diff --git a/policyengine_uk_data/targets/sources/ons_savings.py b/policyengine_uk_data/targets/sources/ons_savings.py index 3764f22e0..2046a4d34 100644 --- a/policyengine_uk_data/targets/sources/ons_savings.py +++ b/policyengine_uk_data/targets/sources/ons_savings.py @@ -18,15 +18,15 @@ logger = logging.getLogger(__name__) -_API_URL = "https://www.ons.gov.uk/economy/grossdomesticproductgdp/timeseries/haxv/ukea/data" +_API_URL = ( + "https://www.ons.gov.uk/economy/grossdomesticproductgdp/timeseries/haxv/ukea/data" +) _REF = "https://www.ons.gov.uk/economy/grossdomesticproductgdp/timeseries/haxv/ukea" def get_targets() -> list[Target]: try: - r = requests.get( - _API_URL, headers=HEADERS, allow_redirects=True, timeout=30 - ) + r = requests.get(_API_URL, headers=HEADERS, allow_redirects=True, timeout=30) r.raise_for_status() data = r.json() diff --git a/policyengine_uk_data/tests/microsimulation/test_reform_impacts.py b/policyengine_uk_data/tests/microsimulation/test_reform_impacts.py index 19baa19f4..1e266ca9e 100644 --- a/policyengine_uk_data/tests/microsimulation/test_reform_impacts.py +++ b/policyengine_uk_data/tests/microsimulation/test_reform_impacts.py @@ -59,9 +59,9 @@ def test_reform_fiscal_impacts( """Test that each reform produces the expected fiscal impact.""" impact = get_fiscal_impact(baseline, enhanced_frs, reform) - assert ( - abs(impact - expected_impact) < tolerance - ), f"Impact for {reform_name} is {impact:.1f} billion, expected {expected_impact:.1f} billion" + assert abs(impact - expected_impact) < tolerance, ( + f"Impact for {reform_name} is {impact:.1f} billion, expected {expected_impact:.1f} billion" + ) def test_config_file_exists(): @@ -77,11 +77,9 @@ def test_all_reforms_have_required_fields(): for i, reform in enumerate(reforms_data): for field in required_fields: - assert ( - field in reform - ), f"Reform {i} missing required field: {field}" + assert field in reform, f"Reform {i} missing required field: {field}" - assert isinstance( - reform["parameters"], dict - ), f"Reform {i} parameters must be a dictionary" + assert isinstance(reform["parameters"], dict), ( + f"Reform {i} parameters must be a dictionary" + ) assert len(reform["parameters"]) > 0, f"Reform {i} has no parameters" diff --git a/policyengine_uk_data/tests/microsimulation/update_reform_impacts.py b/policyengine_uk_data/tests/microsimulation/update_reform_impacts.py index 56a23c55c..e98151b64 100644 --- a/policyengine_uk_data/tests/microsimulation/update_reform_impacts.py +++ b/policyengine_uk_data/tests/microsimulation/update_reform_impacts.py @@ -33,9 +33,7 @@ def get_fiscal_impact(reform: dict) -> float: return float((reform_revenue - baseline_revenue) / 1e9) -def update_impacts( - config_path: Path, dry_run: bool = False, verbose: bool = True -): +def update_impacts(config_path: Path, dry_run: bool = False, verbose: bool = True): """ Update the expected impacts in the configuration file with current model values. @@ -61,9 +59,7 @@ def update_impacts( old_impact = reform["expected_impact"] new_impact = round(get_fiscal_impact(reform["parameters"]), 1) - if ( - abs(old_impact - new_impact) > 0.01 - ): # Only record meaningful changes + if abs(old_impact - new_impact) > 0.01: # Only record meaningful changes changes.append( { "name": reform["name"], @@ -126,9 +122,7 @@ def main(): parser.add_argument( "--config", type=Path, - default=Path( - "policyengine_uk_data/tests/microsimulation/reforms_config.yaml" - ), + default=Path("policyengine_uk_data/tests/microsimulation/reforms_config.yaml"), help="Path to the reforms configuration file (default: reforms_config.yaml)", ) parser.add_argument( diff --git a/policyengine_uk_data/tests/test_aggregates.py b/policyengine_uk_data/tests/test_aggregates.py index 8a76d37f8..c8326a20b 100644 --- a/policyengine_uk_data/tests/test_aggregates.py +++ b/policyengine_uk_data/tests/test_aggregates.py @@ -10,10 +10,8 @@ @pytest.mark.parametrize("variable", AGGREGATES.keys()) def test_aggregates(baseline, variable: str): - estimate = baseline.calculate( - variable, map_to="household", period=2025 - ).sum() + estimate = baseline.calculate(variable, map_to="household", period=2025).sum() - assert ( - abs(estimate / AGGREGATES[variable] - 1) < 0.7 - ), f"Expected {AGGREGATES[variable]/1e9:.1f} billion for {variable}, got {estimate/1e9:.1f} billion (relative error = {abs(estimate / AGGREGATES[variable] - 1):.1%})." + assert abs(estimate / AGGREGATES[variable] - 1) < 0.7, ( + f"Expected {AGGREGATES[variable] / 1e9:.1f} billion for {variable}, got {estimate / 1e9:.1f} billion (relative error = {abs(estimate / AGGREGATES[variable] - 1):.1%})." + ) diff --git a/policyengine_uk_data/tests/test_child_limit.py b/policyengine_uk_data/tests/test_child_limit.py index e01039824..76c065dbc 100644 --- a/policyengine_uk_data/tests/test_child_limit.py +++ b/policyengine_uk_data/tests/test_child_limit.py @@ -10,34 +10,23 @@ def test_child_limit(baseline): > 0 ) * baseline.calculate("is_child", map_to="person").values child_in_uc_household = ( - baseline.calculate( - "universal_credit", map_to="person", period=2025 - ).values - > 0 + baseline.calculate("universal_credit", map_to="person", period=2025).values > 0 ) children_in_capped_households = baseline.map_result( child_is_affected * child_in_uc_household, "person", "household" ) capped_households = (children_in_capped_households > 0) * 1.0 - household_weight = baseline.calculate( - "household_weight", period=2025 - ).values - children_affected = ( - children_in_capped_households * household_weight - ).sum() + household_weight = baseline.calculate("household_weight", period=2025).values + children_affected = (children_in_capped_households * household_weight).sum() households_affected = (capped_households * household_weight).sum() UPRATING_24_25 = 1.12 # https://ifs.org.uk/articles/two-child-limit-poverty-incentives-and-cost, table at the end - child_target = ( - 1.6e6 * UPRATING_24_25 - ) # Expected number of affected children - household_target = ( - 440e3 * UPRATING_24_25 - ) # Expected number of affected households + child_target = 1.6e6 * UPRATING_24_25 # Expected number of affected children + household_target = 440e3 * UPRATING_24_25 # Expected number of affected households - assert ( - abs(children_affected / child_target - 1) < 0.3 - ), f"Expected {child_target/1e6:.1f} million affected children, got {children_affected/1e6:.1f} million." - assert ( - abs(households_affected / household_target - 1) < 0.3 - ), f"Expected {household_target/1e3:.0f} thousand affected households, got {households_affected/1e3:.0f} thousand." + assert abs(children_affected / child_target - 1) < 0.3, ( + f"Expected {child_target / 1e6:.1f} million affected children, got {children_affected / 1e6:.1f} million." + ) + assert abs(households_affected / household_target - 1) < 0.3, ( + f"Expected {household_target / 1e3:.0f} thousand affected households, got {households_affected / 1e3:.0f} thousand." + ) diff --git a/policyengine_uk_data/tests/test_childcare.py b/policyengine_uk_data/tests/test_childcare.py index 7e5e16e64..72842946f 100644 --- a/policyengine_uk_data/tests/test_childcare.py +++ b/policyengine_uk_data/tests/test_childcare.py @@ -41,17 +41,11 @@ def test_childcare(baseline, enhanced_frs): # Calculate actual spending values spending = { "tfc": baseline.calculate("tax_free_childcare", 2024).sum() / 1e9, - "extended": baseline.calculate( - "extended_childcare_entitlement", 2024 - ).sum() + "extended": baseline.calculate("extended_childcare_entitlement", 2024).sum() / 1e9, - "targeted": baseline.calculate( - "targeted_childcare_entitlement", 2024 - ).sum() + "targeted": baseline.calculate("targeted_childcare_entitlement", 2024).sum() / 1e9, - "universal": baseline.calculate( - "universal_childcare_entitlement", 2024 - ).sum() + "universal": baseline.calculate("universal_childcare_entitlement", 2024).sum() / 1e9, } @@ -82,14 +76,10 @@ def test_childcare(baseline, enhanced_frs): for key, rate in take_up_rates.items(): print(f"{key.upper():<12} {rate:.3f}") - print( - f"\nEXTENDED HOURS: Mean = {hours_mean:.2f}, Std Dev = {hours_std:.2f}" - ) + print(f"\nEXTENDED HOURS: Mean = {hours_mean:.2f}, Std Dev = {hours_std:.2f}") print("\nSPENDING (£ billion):") - print( - f"{'PROGRAM':<12} {'ACTUAL':<10} {'TARGET':<10} {'RATIO':<10} {'PASS?':<10}" - ) + print(f"{'PROGRAM':<12} {'ACTUAL':<10} {'TARGET':<10} {'RATIO':<10} {'PASS?':<10}") print("-" * 55) failed_any = False @@ -106,9 +96,7 @@ def test_childcare(baseline, enhanced_frs): failed_any = True print("\nCASELOAD (thousands):") - print( - f"{'PROGRAM':<12} {'ACTUAL':<10} {'TARGET':<10} {'RATIO':<10} {'PASS?':<10}" - ) + print(f"{'PROGRAM':<12} {'ACTUAL':<10} {'TARGET':<10} {'RATIO':<10} {'PASS?':<10}") print("-" * 55) # Test caseload for each program diff --git a/policyengine_uk_data/tests/test_low_income_deciles.py b/policyengine_uk_data/tests/test_low_income_deciles.py index 189ac0719..c093ddf61 100644 --- a/policyengine_uk_data/tests/test_low_income_deciles.py +++ b/policyengine_uk_data/tests/test_low_income_deciles.py @@ -42,8 +42,8 @@ def test_first_decile_tax_rate_reasonable(baseline): d1_tax_rate = d1_tax / d1_market assert d1_tax_rate < 1.75, ( f"First decile tax rate is {d1_tax_rate:.0%}, which exceeds 175%. " - f"Total D1 tax: £{d1_tax/1e9:.1f}bn, " - f"Total D1 market income: £{d1_market/1e9:.1f}bn. " + f"Total D1 tax: £{d1_tax / 1e9:.1f}bn, " + f"Total D1 market income: £{d1_market / 1e9:.1f}bn. " "This likely indicates a bug in property_purchased or similar variable." ) diff --git a/policyengine_uk_data/tests/test_non_negative_incomes.py b/policyengine_uk_data/tests/test_non_negative_incomes.py index 4d9f671a7..762d8fb0e 100644 --- a/policyengine_uk_data/tests/test_non_negative_incomes.py +++ b/policyengine_uk_data/tests/test_non_negative_incomes.py @@ -18,6 +18,4 @@ def test_income_non_negative(frs, variable: str): """Test that income variables have no negative values.""" values = frs.person[variable] min_value = values.min() - assert ( - min_value >= 0 - ), f"{variable} has negative values (min = {min_value:.2f})" + assert min_value >= 0, f"{variable} has negative values (min = {min_value:.2f})" diff --git a/policyengine_uk_data/tests/test_pension_contributions_via_salary_sacrifice.py b/policyengine_uk_data/tests/test_pension_contributions_via_salary_sacrifice.py index 57c5edf89..9ed48abfe 100644 --- a/policyengine_uk_data/tests/test_pension_contributions_via_salary_sacrifice.py +++ b/policyengine_uk_data/tests/test_pension_contributions_via_salary_sacrifice.py @@ -5,20 +5,18 @@ def test_pension_contributions_via_salary_sacrifice(baseline): ) # Basic validation: all values should be non-negative - assert ( - values >= 0 - ).all(), "Salary sacrifice pension contributions must be non-negative" + assert (values >= 0).all(), ( + "Salary sacrifice pension contributions must be non-negative" + ) # Should have some non-zero values (not everyone uses salary sacrifice, but some do) total = values.sum() - assert ( - total > 0 - ), f"Expected some salary sacrifice contributions, got {total}" + assert total > 0, f"Expected some salary sacrifice contributions, got {total}" # Reasonableness check: total should be less than total employment income # This is a very loose check just to catch major issues employment_income = baseline.calculate("employment_income", period=2025) total_employment = employment_income.sum() - assert ( - total < total_employment - ), f"Salary sacrifice contributions ({total/1e9:.1f}B) cannot exceed total employment income ({total_employment/1e9:.1f}B)" + assert total < total_employment, ( + f"Salary sacrifice contributions ({total / 1e9:.1f}B) cannot exceed total employment income ({total_employment / 1e9:.1f}B)" + ) diff --git a/policyengine_uk_data/tests/test_population.py b/policyengine_uk_data/tests/test_population.py index 321c377c6..43645791e 100644 --- a/policyengine_uk_data/tests/test_population.py +++ b/policyengine_uk_data/tests/test_population.py @@ -2,6 +2,6 @@ def test_population(baseline): population = baseline.calculate("people", 2025).sum() / 1e6 POPULATION_TARGET = 69.5 # Expected UK population in millions, per ONS 2022-based estimate here: https://www.ons.gov.uk/peoplepopulationandcommunity/populationandmigration/populationprojections/bulletins/nationalpopulationprojections/2022based # Tolerance temporarily relaxed to 7% due to calibration inflation issue #217 - assert ( - abs(population / POPULATION_TARGET - 1) < 0.07 - ), f"Expected UK population of {POPULATION_TARGET:.1f} million, got {population:.1f} million." + assert abs(population / POPULATION_TARGET - 1) < 0.07, ( + f"Expected UK population of {POPULATION_TARGET:.1f} million, got {population:.1f} million." + ) diff --git a/policyengine_uk_data/tests/test_property_purchased.py b/policyengine_uk_data/tests/test_property_purchased.py index 709e86bfc..0c742180c 100644 --- a/policyengine_uk_data/tests/test_property_purchased.py +++ b/policyengine_uk_data/tests/test_property_purchased.py @@ -34,9 +34,9 @@ def test_property_purchased_rate(baseline): target_rate = PROPERTY_PURCHASE_RATE tolerance = 0.02 - assert ( - abs(actual_rate - target_rate) < tolerance - ), f"property_purchased rate {actual_rate:.2%} is not close to target {target_rate:.2%}" + assert abs(actual_rate - target_rate) < tolerance, ( + f"property_purchased rate {actual_rate:.2%} is not close to target {target_rate:.2%}" + ) def test_property_purchased_not_all_true(baseline): @@ -47,9 +47,9 @@ def test_property_purchased_not_all_true(baseline): n_households = len(property_purchased) # Should NOT be 100% True (the bug we fixed) - assert ( - true_count < n_households * 0.1 - ), f"Too many households have property_purchased=True ({true_count}/{n_households})" + assert true_count < n_households * 0.1, ( + f"Too many households have property_purchased=True ({true_count}/{n_households})" + ) def test_property_purchased_has_some_true(baseline): @@ -80,13 +80,13 @@ def test_sdlt_total_reasonable(baseline): max_sdlt = 50e9 # £50bn maximum (official is ~£14bn) assert total_sdlt > min_sdlt, ( - f"Total SDLT £{total_sdlt/1e9:.1f}bn is too low " - f"(minimum expected: £{min_sdlt/1e9:.1f}bn)" + f"Total SDLT £{total_sdlt / 1e9:.1f}bn is too low " + f"(minimum expected: £{min_sdlt / 1e9:.1f}bn)" ) assert total_sdlt < max_sdlt, ( - f"Total SDLT £{total_sdlt/1e9:.1f}bn is unrealistically high " - f"(maximum expected: £{max_sdlt/1e9:.1f}bn). " + f"Total SDLT £{total_sdlt / 1e9:.1f}bn is unrealistically high " + f"(maximum expected: £{max_sdlt / 1e9:.1f}bn). " f"Official SDLT is ~£14bn. " "This suggests property_purchased may be incorrectly set to True for all households." ) diff --git a/policyengine_uk_data/tests/test_salary_sacrifice_headcount.py b/policyengine_uk_data/tests/test_salary_sacrifice_headcount.py index af0fdfd4c..09f3ab258 100644 --- a/policyengine_uk_data/tests/test_salary_sacrifice_headcount.py +++ b/policyengine_uk_data/tests/test_salary_sacrifice_headcount.py @@ -23,8 +23,8 @@ def test_salary_sacrifice_total_users(baseline): TARGET = 7_700_000 assert abs(total_users / TARGET - 1) < TOLERANCE, ( - f"Expected ~{TARGET/1e6:.1f}mn SS users, " - f"got {total_users/1e6:.1f}mn ({total_users/TARGET*100:.0f}% of target)" + f"Expected ~{TARGET / 1e6:.1f}mn SS users, " + f"got {total_users / 1e6:.1f}mn ({total_users / TARGET * 100:.0f}% of target)" ) @@ -44,8 +44,8 @@ def test_salary_sacrifice_below_cap_users(baseline): TARGET = 4_300_000 assert abs(total_below_cap / TARGET - 1) < TOLERANCE, ( - f"Expected ~{TARGET/1e6:.1f}mn below-cap SS users, " - f"got {total_below_cap/1e6:.1f}mn ({total_below_cap/TARGET*100:.0f}% of target)" + f"Expected ~{TARGET / 1e6:.1f}mn below-cap SS users, " + f"got {total_below_cap / 1e6:.1f}mn ({total_below_cap / TARGET * 100:.0f}% of target)" ) @@ -65,6 +65,6 @@ def test_salary_sacrifice_above_cap_users(baseline): TARGET = 3_300_000 assert abs(total_above_cap / TARGET - 1) < TOLERANCE, ( - f"Expected ~{TARGET/1e6:.1f}mn above-cap SS users, " - f"got {total_above_cap/1e6:.1f}mn ({total_above_cap/TARGET*100:.0f}% of target)" + f"Expected ~{TARGET / 1e6:.1f}mn above-cap SS users, " + f"got {total_above_cap / 1e6:.1f}mn ({total_above_cap / TARGET * 100:.0f}% of target)" ) diff --git a/policyengine_uk_data/tests/test_scotland_babies.py b/policyengine_uk_data/tests/test_scotland_babies.py index a6674018d..b5eba325a 100644 --- a/policyengine_uk_data/tests/test_scotland_babies.py +++ b/policyengine_uk_data/tests/test_scotland_babies.py @@ -30,6 +30,6 @@ def test_scotland_babies_under_1(baseline): TOLERANCE = 0.15 # 15% tolerance assert abs(total_babies / TARGET - 1) < TOLERANCE, ( - f"Expected ~{TARGET/1000:.0f}k babies under 1 in Scotland, " - f"got {total_babies/1000:.0f}k ({total_babies/TARGET*100:.0f}% of target)" + f"Expected ~{TARGET / 1000:.0f}k babies under 1 in Scotland, " + f"got {total_babies / 1000:.0f}k ({total_babies / TARGET * 100:.0f}% of target)" ) diff --git a/policyengine_uk_data/tests/test_scotland_uc_babies.py b/policyengine_uk_data/tests/test_scotland_uc_babies.py index 36246b48f..143948385 100644 --- a/policyengine_uk_data/tests/test_scotland_uc_babies.py +++ b/policyengine_uk_data/tests/test_scotland_uc_babies.py @@ -25,15 +25,11 @@ def test_scotland_uc_households_child_under_1(baseline): ).values # Check if household has child under 1 - is_child = baseline.calculate( - "is_child", map_to="person", period=2025 - ).values + is_child = baseline.calculate("is_child", map_to="person", period=2025).values age = baseline.calculate("age", map_to="person", period=2025).values child_under_1 = is_child & (age < 1) - has_child_under_1 = ( - baseline.map_result(child_under_1, "person", "household") > 0 - ) + has_child_under_1 = baseline.map_result(child_under_1, "person", "household") > 0 scotland_uc_child_under_1 = ( (region.values == "SCOTLAND") & (uc > 0) & has_child_under_1 @@ -44,6 +40,6 @@ def test_scotland_uc_households_child_under_1(baseline): TOLERANCE = 0.15 # 15% tolerance assert abs(total / TARGET - 1) < TOLERANCE, ( - f"Expected ~{TARGET/1000:.0f}k UC households with child under 1 in Scotland, " - f"got {total/1000:.0f}k ({total/TARGET*100:.0f}% of target)" + f"Expected ~{TARGET / 1000:.0f}k UC households with child under 1 in Scotland, " + f"got {total / 1000:.0f}k ({total / TARGET * 100:.0f}% of target)" ) diff --git a/policyengine_uk_data/tests/test_target_registry.py b/policyengine_uk_data/tests/test_target_registry.py index ccc49e005..bc0854c5d 100644 --- a/policyengine_uk_data/tests/test_target_registry.py +++ b/policyengine_uk_data/tests/test_target_registry.py @@ -52,9 +52,7 @@ def test_hmrc_spi_targets_exist(): targets = get_all_targets(year=2025) spi_targets = [t for t in targets if t.source == "hmrc_spi"] # 13 bands × 6 income types × 2 (count + amount) = 156 per year - assert ( - len(spi_targets) >= 100 - ), f"Expected 100+ SPI targets, got {len(spi_targets)}" + assert len(spi_targets) >= 100, f"Expected 100+ SPI targets, got {len(spi_targets)}" def test_dwp_pip_targets(): @@ -76,9 +74,7 @@ def test_voa_council_tax_targets(): def test_core_target_count(): """Total target count should be substantial.""" targets = get_all_targets(year=2025) - assert ( - len(targets) >= 200 - ), f"Expected 200+ targets for 2025, got {len(targets)}" + assert len(targets) >= 200, f"Expected 200+ targets for 2025, got {len(targets)}" def test_two_child_limit_targets(): diff --git a/policyengine_uk_data/tests/test_uc_by_children.py b/policyengine_uk_data/tests/test_uc_by_children.py index 3878d99bd..8de5d5587 100644 --- a/policyengine_uk_data/tests/test_uc_by_children.py +++ b/policyengine_uk_data/tests/test_uc_by_children.py @@ -35,9 +35,7 @@ def test_uc_households_by_children(baseline, bucket, target): uc = baseline.calculate("universal_credit", period=2025).values on_uc = baseline.map_result(uc > 0, "benunit", "household") > 0 - is_child = baseline.calculate( - "is_child", map_to="person", period=2025 - ).values + is_child = baseline.calculate("is_child", map_to="person", period=2025).values children_per_hh = baseline.map_result(is_child, "person", "household") if bucket == "0_children": @@ -49,12 +47,10 @@ def test_uc_households_by_children(baseline, bucket, target): else: # 3plus_children match = on_uc & (children_per_hh >= 3) - household_weight = baseline.calculate( - "household_weight", period=2025 - ).values + household_weight = baseline.calculate("household_weight", period=2025).values actual = (household_weight * match).sum() assert abs(actual / target - 1) < TOLERANCE, ( - f"UC households with {bucket}: expected {target/1e3:.0f}k, " - f"got {actual/1e3:.0f}k ({actual/target*100:.0f}% of target)" + f"UC households with {bucket}: expected {target / 1e3:.0f}k, " + f"got {actual / 1e3:.0f}k ({actual / target * 100:.0f}% of target)" ) diff --git a/policyengine_uk_data/tests/test_vehicle_ownership.py b/policyengine_uk_data/tests/test_vehicle_ownership.py index d2fbc73b8..6e9f6923a 100644 --- a/policyengine_uk_data/tests/test_vehicle_ownership.py +++ b/policyengine_uk_data/tests/test_vehicle_ownership.py @@ -9,9 +9,7 @@ def test_vehicle_ownership(baseline): """Test that vehicle ownership distribution matches NTS 2024 targets.""" - num_vehicles = baseline.calculate( - "num_vehicles", map_to="household", period=2025 - ) + num_vehicles = baseline.calculate("num_vehicles", map_to="household", period=2025) weights = baseline.calculate("household_weight", period=2025) total_hh = weights.sum() @@ -28,9 +26,7 @@ def test_vehicle_ownership(baseline): f"Expected {NTS_ONE_VEHICLE_RATE:.0%} households with one vehicle, " f"got {one_vehicle_rate:.0%}" ) - assert ( - abs(two_plus_rate - NTS_TWO_PLUS_VEHICLE_RATE) < ABSOLUTE_TOLERANCE - ), ( + assert abs(two_plus_rate - NTS_TWO_PLUS_VEHICLE_RATE) < ABSOLUTE_TOLERANCE, ( f"Expected {NTS_TWO_PLUS_VEHICLE_RATE:.0%} households with two+ vehicles, " f"got {two_plus_rate:.0%}" ) diff --git a/policyengine_uk_data/utils/calibrate.py b/policyengine_uk_data/utils/calibrate.py index 6e31402c6..c9fc5a92f 100644 --- a/policyengine_uk_data/utils/calibrate.py +++ b/policyengine_uk_data/utils/calibrate.py @@ -53,13 +53,10 @@ def calibrate_local_areas( areas_per_household = r.sum( axis=0 ) # number of areas each household can contribute to - areas_per_household = np.maximum( - areas_per_household, 1 - ) # avoid division by zero + areas_per_household = np.maximum(areas_per_household, 1) # avoid division by zero original_weights = np.log( dataset.household.household_weight.values / areas_per_household - + np.random.random(len(dataset.household.household_weight.values)) - * 0.01 + + np.random.random(len(dataset.household.household_weight.values)) * 0.01 ) weights = torch.tensor( np.ones((area_count, len(original_weights))) * original_weights, @@ -85,9 +82,7 @@ def calibrate_local_areas( matrix.values if hasattr(matrix, "values") else matrix, dtype=torch.float32, ) - y = torch.tensor( - y.values if hasattr(y, "values") else y, dtype=torch.float32 - ) + y = torch.tensor(y.values if hasattr(y, "values") else y, dtype=torch.float32) matrix_national = torch.tensor( m_national.values if hasattr(m_national, "values") else m_national, dtype=torch.float32, @@ -135,9 +130,7 @@ def pct_close(w, t=0.1, local=True, national=True): if local: pred_local = (w.unsqueeze(-1) * metrics.unsqueeze(0)).sum(dim=1) - e_local = torch.sum( - torch.abs((pred_local / (1 + y) - 1)) < t - ).item() + e_local = torch.sum(torch.abs((pred_local / (1 + y) - 1)) < t).item() c_local = pred_local.shape[0] * pred_local.shape[1] numerator += e_local denominator += c_local @@ -183,9 +176,7 @@ def dropout_weights(weights, p): optimizer.step() local_close = pct_close(weights_, local=True, national=False) - national_close = pct_close( - weights_, local=False, national=True - ) + national_close = pct_close(weights_, local=False, national=True) if dropout_targets: validation_loss = loss(weights_, validation=True) @@ -213,9 +204,7 @@ def dropout_weights(weights, p): excluded_training_targets, ) performance_step["epoch"] = epoch - performance_step["loss"] = ( - performance_step.rel_abs_error**2 - ) + performance_step["loss"] = performance_step.rel_abs_error**2 performance_step["target_name"] = [ f"{area}/{metric}" for area, metric in zip( @@ -231,9 +220,7 @@ def dropout_weights(weights, p): with h5py.File(STORAGE_FOLDER / weight_file, "w") as f: f.create_dataset(dataset_key, data=final_weights) - dataset.household.household_weight = final_weights.sum( - axis=0 - ) + dataset.household.household_weight = final_weights.sum(axis=0) else: for epoch in range(epochs): optimizer.zero_grad() diff --git a/policyengine_uk_data/utils/data_upload.py b/policyengine_uk_data/utils/data_upload.py index 89445ad96..342a02f7a 100644 --- a/policyengine_uk_data/utils/data_upload.py +++ b/policyengine_uk_data/utils/data_upload.py @@ -89,18 +89,14 @@ def upload_files_to_gcs( Upload files to Google Cloud Storage and set metadata with the version. """ credentials, project_id = google.auth.default() - storage_client = storage.Client( - credentials=credentials, project=project_id - ) + storage_client = storage.Client(credentials=credentials, project=project_id) bucket = storage_client.bucket(gcs_bucket_name) for file_path in files: file_path = Path(file_path) blob = bucket.blob(file_path.name) blob.upload_from_filename(file_path) - logging.info( - f"Uploaded {file_path.name} to GCS bucket {gcs_bucket_name}." - ) + logging.info(f"Uploaded {file_path.name} to GCS bucket {gcs_bucket_name}.") # Set metadata blob.metadata = {"version": version} diff --git a/policyengine_uk_data/utils/datasets.py b/policyengine_uk_data/utils/datasets.py index 82c9e2733..2f4cd1b46 100644 --- a/policyengine_uk_data/utils/datasets.py +++ b/policyengine_uk_data/utils/datasets.py @@ -10,9 +10,7 @@ warnings.filterwarnings("ignore") -def sum_to_entity( - values: pd.Series, foreign_key: pd.Series, primary_key -) -> np.ndarray: +def sum_to_entity(values: pd.Series, foreign_key: pd.Series, primary_key) -> np.ndarray: """Sums values by joining foreign and primary keys. Args: @@ -23,14 +21,10 @@ def sum_to_entity( Returns: pd.Series: A value for each person. """ - return ( - values.groupby(foreign_key).sum().reindex(primary_key).fillna(0).values - ) + return values.groupby(foreign_key).sum().reindex(primary_key).fillna(0).values -def categorical( - values: pd.Series, default: int, left: list, right: list -) -> pd.Series: +def categorical(values: pd.Series, default: int, left: list, right: list) -> pd.Series: """Maps a categorical input to an output using given left and right arrays. Args: @@ -45,9 +39,7 @@ def categorical( return values.fillna(default).map({i: j for i, j in zip(left, right)}) -def sum_from_positive_fields( - table: pd.DataFrame, fields: List[str] -) -> np.array: +def sum_from_positive_fields(table: pd.DataFrame, fields: List[str]) -> np.array: """Sum from fields in table, ignoring negative values. Args: @@ -57,9 +49,7 @@ def sum_from_positive_fields( Returns: np.array """ - return np.where( - table[fields].sum(axis=1) > 0, table[fields].sum(axis=1), 0 - ) + return np.where(table[fields].sum(axis=1) > 0, table[fields].sum(axis=1), 0) def sum_positive_variables(variables: List[str]) -> np.array: diff --git a/policyengine_uk_data/utils/huggingface.py b/policyengine_uk_data/utils/huggingface.py index 1ed8de25d..7fc5d9e9e 100644 --- a/policyengine_uk_data/utils/huggingface.py +++ b/policyengine_uk_data/utils/huggingface.py @@ -2,9 +2,7 @@ import os -def download( - repo: str, repo_filename: str, local_folder: str, version: str = None -): +def download(repo: str, repo_filename: str, local_folder: str, version: str = None): token = os.environ.get( "HUGGING_FACE_TOKEN", ) diff --git a/policyengine_uk_data/utils/incomes_projection.py b/policyengine_uk_data/utils/incomes_projection.py index 1f62b1cb2..302dea9d6 100644 --- a/policyengine_uk_data/utils/incomes_projection.py +++ b/policyengine_uk_data/utils/incomes_projection.py @@ -20,9 +20,9 @@ MAX_YEAR = 2029 for time_period in range(MIN_YEAR, MAX_YEAR + 1): - time_period_df = statistics[ - ["name", "unit", "reference", str(time_period)] - ].rename(columns={str(time_period): "value"}) + time_period_df = statistics[["name", "unit", "reference", str(time_period)]].rename( + columns={str(time_period): "value"} + ) time_period_df["time_period"] = time_period dfs.append(time_period_df) @@ -49,9 +49,7 @@ def create_target_matrix( sim = Microsimulation(dataset=dataset, reform=reform) sim.default_calculation_period = time_period - household_from_person = lambda values: sim.map_result( - values, "person", "household" - ) + household_from_person = lambda values: sim.map_result(values, "person", "household") df = pd.DataFrame() @@ -99,9 +97,7 @@ def create_target_matrix( target_values.append(row[variable + "_amount"]) target_names.append(name_amount) name_count = ( - "hmrc/" - + variable - + f"_count_income_band_{i}_{lower:_}_to_{upper:_}" + "hmrc/" + variable + f"_count_income_band_{i}_{lower:_}_to_{upper:_}" ) df[name_count] = household_from_person( (income_df[variable] > 0) * in_income_band @@ -184,16 +180,10 @@ def create_income_projections(): for variable in INCOME_VARIABLES: count_values = [] amount_values = [] - for i, (lower, upper) in enumerate( - zip(lower_bounds, upper_bounds) - ): - in_band = sim.calculate("total_income", year).between( - lower, upper - ) + for i, (lower, upper) in enumerate(zip(lower_bounds, upper_bounds)): + in_band = sim.calculate("total_income", year).between(lower, upper) value = sim.calculate(variable, year) - count_in_band_with_nonzero_value = round( - ((value > 0) * in_band).sum() - ) + count_in_band_with_nonzero_value = round(((value > 0) * in_band).sum()) amount_in_band = round(value[in_band].sum()) count_values.append(count_in_band_with_nonzero_value) amount_values.append(amount_in_band) @@ -202,9 +192,7 @@ def create_income_projections(): year_df["year"] = year projection_df = pd.concat([projection_df, year_df]) - projection_df.to_csv( - STORAGE_FOLDER / "incomes_projection.csv", index=False - ) + projection_df.to_csv(STORAGE_FOLDER / "incomes_projection.csv", index=False) if __name__ == "__main__": diff --git a/policyengine_uk_data/utils/loss.py b/policyengine_uk_data/utils/loss.py index 18d30bed0..27eb919fd 100644 --- a/policyengine_uk_data/utils/loss.py +++ b/policyengine_uk_data/utils/loss.py @@ -11,9 +11,7 @@ ) -def get_loss_results( - dataset, time_period, reform=None, household_weights=None -): +def get_loss_results(dataset, time_period, reform=None, household_weights=None): """Calculate loss metrics comparing model outputs to targets. Args: diff --git a/policyengine_uk_data/utils/progress.py b/policyengine_uk_data/utils/progress.py index ccb5efcf6..e6a70f89d 100644 --- a/policyengine_uk_data/utils/progress.py +++ b/policyengine_uk_data/utils/progress.py @@ -285,9 +285,7 @@ def update_calibration( description=f"[yellow]●[/yellow] Calibration epoch {iteration}/{iterations} • calculating loss", ) else: - loss_text = ( - f" • loss: {loss_value:.6f}" if loss_value else "" - ) + loss_text = f" • loss: {loss_value:.6f}" if loss_value else "" nested_progress.update_task( calibration_task, description=f"[blue]●[/blue] Calibration epoch {iteration}/{iterations}{loss_text}", @@ -317,9 +315,7 @@ def update_calibration( description=f"Calibration iteration {iteration}/{iterations} • [yellow]calculating loss[/yellow]", ) else: - loss_text = ( - f" • loss: {loss_value:.6f}" if loss_value else "" - ) + loss_text = f" • loss: {loss_value:.6f}" if loss_value else "" progress.update_task( main_task, description=f"Calibration iteration {iteration}/{iterations}{loss_text}", @@ -329,9 +325,7 @@ def update_calibration( yield update_calibration @contextmanager - def track_file_processing( - self, files: List[str], operation: str = "processing" - ): + def track_file_processing(self, files: List[str], operation: str = "processing"): """Track file processing operations. Args: diff --git a/policyengine_uk_data/utils/qrf.py b/policyengine_uk_data/utils/qrf.py index d99e5a257..05c0ba661 100644 --- a/policyengine_uk_data/utils/qrf.py +++ b/policyengine_uk_data/utils/qrf.py @@ -74,6 +74,4 @@ def save(self, file_path: str): file_path: Path where model should be saved. """ with open(file_path, "wb") as f: - pickle.dump( - {"model": self.model, "input_columns": self.input_columns}, f - ) + pickle.dump({"model": self.model, "input_columns": self.input_columns}, f) diff --git a/policyengine_uk_data/utils/spi.py b/policyengine_uk_data/utils/spi.py index fae6b451f..5e94bcffb 100644 --- a/policyengine_uk_data/utils/spi.py +++ b/policyengine_uk_data/utils/spi.py @@ -70,12 +70,8 @@ def parse_value(value): import numpy as np -income["total_income_lower_bound"] = list(income["income_range"][:-1]) + [ - 12_570 -] -income["total_income_upper_bound"] = ( - list(income["income_range"][1:-1]) + [np.inf] * 2 -) +income["total_income_lower_bound"] = list(income["income_range"][:-1]) + [12_570] +income["total_income_upper_bound"] = list(income["income_range"][1:-1]) + [np.inf] * 2 # Order the income bound columns first income = income[ [ diff --git a/policyengine_uk_data/utils/stack.py b/policyengine_uk_data/utils/stack.py index 2ded165cb..2fe9df828 100644 --- a/policyengine_uk_data/utils/stack.py +++ b/policyengine_uk_data/utils/stack.py @@ -17,8 +17,6 @@ def stack_datasets( return UKSingleYearDataset( person=pd.concat([data_1.person, data_2.person], ignore_index=True), benunit=pd.concat([data_1.benunit, data_2.benunit], ignore_index=True), - household=pd.concat( - [data_1.household, data_2.household], ignore_index=True - ), + household=pd.concat([data_1.household, data_2.household], ignore_index=True), fiscal_year=data_1.time_period, ) diff --git a/policyengine_uk_data/utils/uc_data.py b/policyengine_uk_data/utils/uc_data.py index 1e2282cb0..7bacebc57 100644 --- a/policyengine_uk_data/utils/uc_data.py +++ b/policyengine_uk_data/utils/uc_data.py @@ -63,9 +63,7 @@ def parse_band(band): "Couple, no children": "COUPLE_NO_CHILDREN", "Couple, with children": "COUPLE_WITH_CHILDREN", } - result_df["family_type"] = result_df["family_type"].map( - family_type_mapping - ) + result_df["family_type"] = result_df["family_type"].map(family_type_mapping) # Reorder columns and drop monthly band result_df = result_df[ @@ -111,9 +109,7 @@ def _parse_uc_pc_households(): # Parse NI data ni_file_path = storage_path / "dfc-ni-uc-stats-supp-tables-may-2025.ods" - df_ni = pd.read_excel( - ni_file_path, sheet_name="5b", engine="odf", header=None - ) + df_ni = pd.read_excel(ni_file_path, sheet_name="5b", engine="odf", header=None) # Get constituency names from row 2, columns 1-18 ni_constituencies = df_ni.iloc[2, 1:19].tolist() @@ -179,9 +175,7 @@ def _parse_uc_la_households(): # Parse NI data ni_file_path = storage_path / "dfc-ni-uc-stats-supp-tables-may-2025.ods" - df_ni = pd.read_excel( - ni_file_path, sheet_name="5c", engine="odf", header=None - ) + df_ni = pd.read_excel(ni_file_path, sheet_name="5c", engine="odf", header=None) # Get LGD names from row 2, columns 1-11 ni_lgd_names = df_ni.iloc[2, 1:12].tolist() diff --git a/pyproject.toml b/pyproject.toml index 72df5b5a6..9e4f10308 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,7 +24,7 @@ dependencies = [ "policyengine-uk>=2.43.5", "microcalibrate>=0.18.0", "microimpute>=1.0.1", - "black>=25.1.0", + "ruff>=0.9.0", "rich>=13.0.0", "odfpy", "pandas", @@ -35,7 +35,7 @@ dependencies = [ [project.optional-dependencies] dev = [ - "black", + "ruff>=0.9.0", "pytest", "torch", "tables", @@ -70,24 +70,6 @@ filterwarnings = [ "ignore::PendingDeprecationWarning", ] -[tool.black] -line-length = 79 -target-version = ['py311'] -include = '\.pyi?$' -extend-exclude = ''' -/( - # directories - \.eggs - | \.git - | \.hg - | \.mypy_cache - | \.tox - | \.venv - | build - | dist -)/ -''' - [tool.towncrier] package = "policyengine_uk_data" directory = "changelog.d" diff --git a/uv.lock b/uv.lock index 8c63a74ec..9e03803ab 100644 --- a/uv.lock +++ b/uv.lock @@ -99,31 +99,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/94/fe/3aed5d0be4d404d12d36ab97e2f1791424d9ca39c2f754a6285d59a3b01d/beautifulsoup4-4.14.2-py3-none-any.whl", hash = "sha256:5ef6fa3a8cbece8488d66985560f97ed091e22bbc4e9c2338508a9d5de6d4515", size = 106392, upload-time = "2025-09-29T10:05:43.771Z" }, ] -[[package]] -name = "black" -version = "25.11.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "click" }, - { name = "mypy-extensions" }, - { name = "packaging" }, - { name = "pathspec" }, - { name = "platformdirs" }, - { name = "pytokens" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/8c/ad/33adf4708633d047950ff2dfdea2e215d84ac50ef95aff14a614e4b6e9b2/black-25.11.0.tar.gz", hash = "sha256:9a323ac32f5dc75ce7470501b887250be5005a01602e931a15e45593f70f6e08", size = 655669, upload-time = "2025-11-10T01:53:50.558Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/ad/47/3378d6a2ddefe18553d1115e36aea98f4a90de53b6a3017ed861ba1bd3bc/black-25.11.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:0a1d40348b6621cc20d3d7530a5b8d67e9714906dfd7346338249ad9c6cedf2b", size = 1772446, upload-time = "2025-11-10T02:02:16.181Z" }, - { url = "https://files.pythonhosted.org/packages/ba/4b/0f00bfb3d1f7e05e25bfc7c363f54dc523bb6ba502f98f4ad3acf01ab2e4/black-25.11.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:51c65d7d60bb25429ea2bf0731c32b2a2442eb4bd3b2afcb47830f0b13e58bfd", size = 1607983, upload-time = "2025-11-10T02:02:52.502Z" }, - { url = "https://files.pythonhosted.org/packages/99/fe/49b0768f8c9ae57eb74cc10a1f87b4c70453551d8ad498959721cc345cb7/black-25.11.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:936c4dd07669269f40b497440159a221ee435e3fddcf668e0c05244a9be71993", size = 1682481, upload-time = "2025-11-10T01:57:12.35Z" }, - { url = "https://files.pythonhosted.org/packages/55/17/7e10ff1267bfa950cc16f0a411d457cdff79678fbb77a6c73b73a5317904/black-25.11.0-cp313-cp313-win_amd64.whl", hash = "sha256:f42c0ea7f59994490f4dccd64e6b2dd49ac57c7c84f38b8faab50f8759db245c", size = 1363869, upload-time = "2025-11-10T01:58:24.608Z" }, - { url = "https://files.pythonhosted.org/packages/67/c0/cc865ce594d09e4cd4dfca5e11994ebb51604328489f3ca3ae7bb38a7db5/black-25.11.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:35690a383f22dd3e468c85dc4b915217f87667ad9cce781d7b42678ce63c4170", size = 1771358, upload-time = "2025-11-10T02:03:33.331Z" }, - { url = "https://files.pythonhosted.org/packages/37/77/4297114d9e2fd2fc8ab0ab87192643cd49409eb059e2940391e7d2340e57/black-25.11.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:dae49ef7369c6caa1a1833fd5efb7c3024bb7e4499bf64833f65ad27791b1545", size = 1612902, upload-time = "2025-11-10T01:59:33.382Z" }, - { url = "https://files.pythonhosted.org/packages/de/63/d45ef97ada84111e330b2b2d45e1dd163e90bd116f00ac55927fb6bf8adb/black-25.11.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5bd4a22a0b37401c8e492e994bce79e614f91b14d9ea911f44f36e262195fdda", size = 1680571, upload-time = "2025-11-10T01:57:04.239Z" }, - { url = "https://files.pythonhosted.org/packages/ff/4b/5604710d61cdff613584028b4cb4607e56e148801ed9b38ee7970799dab6/black-25.11.0-cp314-cp314-win_amd64.whl", hash = "sha256:aa211411e94fdf86519996b7f5f05e71ba34835d8f0c0f03c00a26271da02664", size = 1382599, upload-time = "2025-11-10T01:57:57.427Z" }, - { url = "https://files.pythonhosted.org/packages/00/5d/aed32636ed30a6e7f9efd6ad14e2a0b0d687ae7c8c7ec4e4a557174b895c/black-25.11.0-py3-none-any.whl", hash = "sha256:e3f562da087791e96cefcd9dda058380a442ab322a02e222add53736451f604b", size = 204918, upload-time = "2025-11-10T01:53:48.917Z" }, -] - [[package]] name = "blosc2" version = "3.11.1" @@ -471,6 +446,7 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/49/e8/58c7f85958bda41dafea50497cbd59738c5c43dbbea5ee83d651234398f4/greenlet-3.2.4-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:1a921e542453fe531144e91e1feedf12e07351b1cf6c9e8a3325ea600a715a31", size = 272814, upload-time = "2025-08-07T13:15:50.011Z" }, { url = "https://files.pythonhosted.org/packages/62/dd/b9f59862e9e257a16e4e610480cfffd29e3fae018a68c2332090b53aac3d/greenlet-3.2.4-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cd3c8e693bff0fff6ba55f140bf390fa92c994083f838fece0f63be121334945", size = 641073, upload-time = "2025-08-07T13:42:57.23Z" }, { url = "https://files.pythonhosted.org/packages/f7/0b/bc13f787394920b23073ca3b6c4a7a21396301ed75a655bcb47196b50e6e/greenlet-3.2.4-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:710638eb93b1fa52823aa91bf75326f9ecdfd5e0466f00789246a5280f4ba0fc", size = 655191, upload-time = "2025-08-07T13:45:29.752Z" }, + { url = "https://files.pythonhosted.org/packages/f2/d6/6adde57d1345a8d0f14d31e4ab9c23cfe8e2cd39c3baf7674b4b0338d266/greenlet-3.2.4-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:c5111ccdc9c88f423426df3fd1811bfc40ed66264d35aa373420a34377efc98a", size = 649516, upload-time = "2025-08-07T13:53:16.314Z" }, { url = "https://files.pythonhosted.org/packages/7f/3b/3a3328a788d4a473889a2d403199932be55b1b0060f4ddd96ee7cdfcad10/greenlet-3.2.4-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d76383238584e9711e20ebe14db6c88ddcedc1829a9ad31a584389463b5aa504", size = 652169, upload-time = "2025-08-07T13:18:32.861Z" }, { url = "https://files.pythonhosted.org/packages/ee/43/3cecdc0349359e1a527cbf2e3e28e5f8f06d3343aaf82ca13437a9aa290f/greenlet-3.2.4-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:23768528f2911bcd7e475210822ffb5254ed10d71f4028387e5a99b4c6699671", size = 610497, upload-time = "2025-08-07T13:18:31.636Z" }, { url = "https://files.pythonhosted.org/packages/b8/19/06b6cf5d604e2c382a6f31cafafd6f33d5dea706f4db7bdab184bad2b21d/greenlet-3.2.4-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:00fadb3fedccc447f517ee0d3fd8fe49eae949e1cd0f6a611818f4f6fb7dc83b", size = 1121662, upload-time = "2025-08-07T13:42:41.117Z" }, @@ -481,6 +457,7 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/22/5c/85273fd7cc388285632b0498dbbab97596e04b154933dfe0f3e68156c68c/greenlet-3.2.4-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:49a30d5fda2507ae77be16479bdb62a660fa51b1eb4928b524975b3bde77b3c0", size = 273586, upload-time = "2025-08-07T13:16:08.004Z" }, { url = "https://files.pythonhosted.org/packages/d1/75/10aeeaa3da9332c2e761e4c50d4c3556c21113ee3f0afa2cf5769946f7a3/greenlet-3.2.4-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:299fd615cd8fc86267b47597123e3f43ad79c9d8a22bebdce535e53550763e2f", size = 686346, upload-time = "2025-08-07T13:42:59.944Z" }, { url = "https://files.pythonhosted.org/packages/c0/aa/687d6b12ffb505a4447567d1f3abea23bd20e73a5bed63871178e0831b7a/greenlet-3.2.4-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:c17b6b34111ea72fc5a4e4beec9711d2226285f0386ea83477cbb97c30a3f3a5", size = 699218, upload-time = "2025-08-07T13:45:30.969Z" }, + { url = "https://files.pythonhosted.org/packages/dc/8b/29aae55436521f1d6f8ff4e12fb676f3400de7fcf27fccd1d4d17fd8fecd/greenlet-3.2.4-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:b4a1870c51720687af7fa3e7cda6d08d801dae660f75a76f3845b642b4da6ee1", size = 694659, upload-time = "2025-08-07T13:53:17.759Z" }, { url = "https://files.pythonhosted.org/packages/92/2e/ea25914b1ebfde93b6fc4ff46d6864564fba59024e928bdc7de475affc25/greenlet-3.2.4-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:061dc4cf2c34852b052a8620d40f36324554bc192be474b9e9770e8c042fd735", size = 695355, upload-time = "2025-08-07T13:18:34.517Z" }, { url = "https://files.pythonhosted.org/packages/72/60/fc56c62046ec17f6b0d3060564562c64c862948c9d4bc8aa807cf5bd74f4/greenlet-3.2.4-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:44358b9bf66c8576a9f57a590d5f5d6e72fa4228b763d0e43fee6d3b06d3a337", size = 657512, upload-time = "2025-08-07T13:18:33.969Z" }, { url = "https://files.pythonhosted.org/packages/23/6e/74407aed965a4ab6ddd93a7ded3180b730d281c77b765788419484cdfeef/greenlet-3.2.4-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:2917bdf657f5859fbf3386b12d68ede4cf1f04c90c3a6bc1f013dd68a22e2269", size = 1612508, upload-time = "2025-11-04T12:42:23.427Z" }, @@ -914,15 +891,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/81/f2/08ace4142eb281c12701fc3b93a10795e4d4dc7f753911d836675050f886/msgpack-1.1.2-cp314-cp314t-win_arm64.whl", hash = "sha256:d99ef64f349d5ec3293688e91486c5fdb925ed03807f64d98d205d2713c60b46", size = 70868, upload-time = "2025-10-08T09:15:44.959Z" }, ] -[[package]] -name = "mypy-extensions" -version = "1.1.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/a2/6e/371856a3fb9d31ca8dac321cda606860fa4548858c0cc45d9d1d4ca2628b/mypy_extensions-1.1.0.tar.gz", hash = "sha256:52e68efc3284861e772bbcd66823fde5ae21fd2fdb51c62a211403730b916558", size = 6343, upload-time = "2025-04-22T14:54:24.164Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/79/7b/2c79738432f5c924bef5071f933bcc9efd0473bac3b4aa584a6f7c1c8df8/mypy_extensions-1.1.0-py3-none-any.whl", hash = "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505", size = 4963, upload-time = "2025-04-22T14:54:22.983Z" }, -] - [[package]] name = "ndindex" version = "1.10.1" @@ -1283,15 +1251,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/78/f9/690a8600b93c332de3ab4a344a4ac34f00c8f104917061f779db6a918ed6/pathlib-1.0.1-py3-none-any.whl", hash = "sha256:f35f95ab8b0f59e6d354090350b44a80a80635d22efdedfa84c7ad1cf0a74147", size = 14363, upload-time = "2022-05-04T13:37:20.585Z" }, ] -[[package]] -name = "pathspec" -version = "0.12.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/ca/bc/f35b8446f4531a7cb215605d100cd88b7ac6f44ab3fc94870c120ab3adbf/pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712", size = 51043, upload-time = "2023-12-10T22:30:45Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/cc/20/ff623b09d963f88bfde16306a54e12ee5ea43e9b597108672ff3a408aad6/pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08", size = 31191, upload-time = "2023-12-10T22:30:43.14Z" }, -] - [[package]] name = "patsy" version = "1.0.2" @@ -1407,10 +1366,9 @@ wheels = [ [[package]] name = "policyengine-uk-data" -version = "1.35.0" +version = "1.40.3" source = { editable = "." } dependencies = [ - { name = "black" }, { name = "google-auth" }, { name = "google-cloud-storage" }, { name = "huggingface-hub" }, @@ -1422,29 +1380,31 @@ dependencies = [ { name = "policyengine" }, { name = "policyengine-core" }, { name = "policyengine-uk" }, + { name = "pydantic" }, + { name = "pyyaml" }, { name = "requests" }, { name = "rich" }, + { name = "ruff" }, { name = "tabulate" }, { name = "tqdm" }, ] [package.optional-dependencies] dev = [ - { name = "black" }, { name = "build" }, { name = "furo" }, { name = "itables" }, { name = "pytest" }, { name = "quantile-forest" }, + { name = "ruff" }, { name = "tables" }, { name = "torch" }, + { name = "towncrier" }, { name = "yaml-changelog" }, ] [package.metadata] requires-dist = [ - { name = "black", specifier = ">=25.1.0" }, - { name = "black", marker = "extra == 'dev'" }, { name = "build", marker = "extra == 'dev'" }, { name = "furo", marker = "extra == 'dev'" }, { name = "google-auth" }, @@ -1459,13 +1419,18 @@ requires-dist = [ { name = "policyengine" }, { name = "policyengine-core", specifier = ">=3.19.4" }, { name = "policyengine-uk", specifier = ">=2.43.5" }, + { name = "pydantic", specifier = ">=2.0" }, { name = "pytest", marker = "extra == 'dev'" }, + { name = "pyyaml" }, { name = "quantile-forest", marker = "extra == 'dev'" }, { name = "requests" }, { name = "rich", specifier = ">=13.0.0" }, + { name = "ruff", specifier = ">=0.9.0" }, + { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.9.0" }, { name = "tables", marker = "extra == 'dev'" }, { name = "tabulate" }, { name = "torch", marker = "extra == 'dev'" }, + { name = "towncrier", marker = "extra == 'dev'", specifier = ">=24.8.0" }, { name = "tqdm" }, { name = "yaml-changelog", marker = "extra == 'dev'", specifier = ">=0.1.7" }, ] @@ -1687,15 +1652,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427", size = 229892, upload-time = "2024-03-01T18:36:18.57Z" }, ] -[[package]] -name = "pytokens" -version = "0.3.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/4e/8d/a762be14dae1c3bf280202ba3172020b2b0b4c537f94427435f19c413b72/pytokens-0.3.0.tar.gz", hash = "sha256:2f932b14ed08de5fcf0b391ace2642f858f1394c0857202959000b68ed7a458a", size = 17644, upload-time = "2025-11-05T13:36:35.34Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/84/25/d9db8be44e205a124f6c98bc0324b2bb149b7431c53877fc6d1038dddaf5/pytokens-0.3.0-py3-none-any.whl", hash = "sha256:95b2b5eaf832e469d141a378872480ede3f251a5a5041b8ec6e581d3ac71bbf3", size = 12195, upload-time = "2025-11-05T13:36:33.183Z" }, -] - [[package]] name = "pytz" version = "2025.2" @@ -1822,6 +1778,31 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/64/8d/0133e4eb4beed9e425d9a98ed6e081a55d195481b7632472be1af08d2f6b/rsa-4.9.1-py3-none-any.whl", hash = "sha256:68635866661c6836b8d39430f97a996acbd61bfa49406748ea243539fe239762", size = 34696, upload-time = "2025-04-16T09:51:17.142Z" }, ] +[[package]] +name = "ruff" +version = "0.15.5" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/77/9b/840e0039e65fcf12758adf684d2289024d6140cde9268cc59887dc55189c/ruff-0.15.5.tar.gz", hash = "sha256:7c3601d3b6d76dce18c5c824fc8d06f4eef33d6df0c21ec7799510cde0f159a2", size = 4574214, upload-time = "2026-03-05T20:06:34.946Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/47/20/5369c3ce21588c708bcbe517a8fbe1a8dfdb5dfd5137e14790b1da71612c/ruff-0.15.5-py3-none-linux_armv6l.whl", hash = "sha256:4ae44c42281f42e3b06b988e442d344a5b9b72450ff3c892e30d11b29a96a57c", size = 10478185, upload-time = "2026-03-05T20:06:29.093Z" }, + { url = "https://files.pythonhosted.org/packages/44/ed/e81dd668547da281e5dce710cf0bc60193f8d3d43833e8241d006720e42b/ruff-0.15.5-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:6edd3792d408ebcf61adabc01822da687579a1a023f297618ac27a5b51ef0080", size = 10859201, upload-time = "2026-03-05T20:06:32.632Z" }, + { url = "https://files.pythonhosted.org/packages/c4/8f/533075f00aaf19b07c5cd6aa6e5d89424b06b3b3f4583bfa9c640a079059/ruff-0.15.5-py3-none-macosx_11_0_arm64.whl", hash = "sha256:89f463f7c8205a9f8dea9d658d59eff49db05f88f89cc3047fb1a02d9f344010", size = 10184752, upload-time = "2026-03-05T20:06:40.312Z" }, + { url = "https://files.pythonhosted.org/packages/66/0e/ba49e2c3fa0395b3152bad634c7432f7edfc509c133b8f4529053ff024fb/ruff-0.15.5-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ba786a8295c6574c1116704cf0b9e6563de3432ac888d8f83685654fe528fd65", size = 10534857, upload-time = "2026-03-05T20:06:19.581Z" }, + { url = "https://files.pythonhosted.org/packages/59/71/39234440f27a226475a0659561adb0d784b4d247dfe7f43ffc12dd02e288/ruff-0.15.5-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:fd4b801e57955fe9f02b31d20375ab3a5c4415f2e5105b79fb94cf2642c91440", size = 10309120, upload-time = "2026-03-05T20:06:00.435Z" }, + { url = "https://files.pythonhosted.org/packages/f5/87/4140aa86a93df032156982b726f4952aaec4a883bb98cb6ef73c347da253/ruff-0.15.5-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:391f7c73388f3d8c11b794dbbc2959a5b5afe66642c142a6effa90b45f6f5204", size = 11047428, upload-time = "2026-03-05T20:05:51.867Z" }, + { url = "https://files.pythonhosted.org/packages/5a/f7/4953e7e3287676f78fbe85e3a0ca414c5ca81237b7575bdadc00229ac240/ruff-0.15.5-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8dc18f30302e379fe1e998548b0f5e9f4dff907f52f73ad6da419ea9c19d66c8", size = 11914251, upload-time = "2026-03-05T20:06:22.887Z" }, + { url = "https://files.pythonhosted.org/packages/77/46/0f7c865c10cf896ccf5a939c3e84e1cfaeed608ff5249584799a74d33835/ruff-0.15.5-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1cc6e7f90087e2d27f98dc34ed1b3ab7c8f0d273cc5431415454e22c0bd2a681", size = 11333801, upload-time = "2026-03-05T20:05:57.168Z" }, + { url = "https://files.pythonhosted.org/packages/d3/01/a10fe54b653061585e655f5286c2662ebddb68831ed3eaebfb0eb08c0a16/ruff-0.15.5-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c1cb7169f53c1ddb06e71a9aebd7e98fc0fea936b39afb36d8e86d36ecc2636a", size = 11206821, upload-time = "2026-03-05T20:06:03.441Z" }, + { url = "https://files.pythonhosted.org/packages/7a/0d/2132ceaf20c5e8699aa83da2706ecb5c5dcdf78b453f77edca7fb70f8a93/ruff-0.15.5-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:9b037924500a31ee17389b5c8c4d88874cc6ea8e42f12e9c61a3d754ff72f1ca", size = 11133326, upload-time = "2026-03-05T20:06:25.655Z" }, + { url = "https://files.pythonhosted.org/packages/72/cb/2e5259a7eb2a0f87c08c0fe5bf5825a1e4b90883a52685524596bfc93072/ruff-0.15.5-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:65bb414e5b4eadd95a8c1e4804f6772bbe8995889f203a01f77ddf2d790929dd", size = 10510820, upload-time = "2026-03-05T20:06:37.79Z" }, + { url = "https://files.pythonhosted.org/packages/ff/20/b67ce78f9e6c59ffbdb5b4503d0090e749b5f2d31b599b554698a80d861c/ruff-0.15.5-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:d20aa469ae3b57033519c559e9bc9cd9e782842e39be05b50e852c7c981fa01d", size = 10302395, upload-time = "2026-03-05T20:05:54.504Z" }, + { url = "https://files.pythonhosted.org/packages/5f/e5/719f1acccd31b720d477751558ed74e9c88134adcc377e5e886af89d3072/ruff-0.15.5-py3-none-musllinux_1_2_i686.whl", hash = "sha256:15388dd28c9161cdb8eda68993533acc870aa4e646a0a277aa166de9ad5a8752", size = 10754069, upload-time = "2026-03-05T20:06:06.422Z" }, + { url = "https://files.pythonhosted.org/packages/c3/9c/d1db14469e32d98f3ca27079dbd30b7b44dbb5317d06ab36718dee3baf03/ruff-0.15.5-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:b30da330cbd03bed0c21420b6b953158f60c74c54c5f4c1dabbdf3a57bf355d2", size = 11304315, upload-time = "2026-03-05T20:06:10.867Z" }, + { url = "https://files.pythonhosted.org/packages/28/3a/950367aee7c69027f4f422059227b290ed780366b6aecee5de5039d50fa8/ruff-0.15.5-py3-none-win32.whl", hash = "sha256:732e5ee1f98ba5b3679029989a06ca39a950cced52143a0ea82a2102cb592b74", size = 10551676, upload-time = "2026-03-05T20:06:13.705Z" }, + { url = "https://files.pythonhosted.org/packages/b8/00/bf077a505b4e649bdd3c47ff8ec967735ce2544c8e4a43aba42ee9bf935d/ruff-0.15.5-py3-none-win_amd64.whl", hash = "sha256:821d41c5fa9e19117616c35eaa3f4b75046ec76c65e7ae20a333e9a8696bc7fe", size = 11678972, upload-time = "2026-03-05T20:06:45.379Z" }, + { url = "https://files.pythonhosted.org/packages/fe/4e/cd76eca6db6115604b7626668e891c9dd03330384082e33662fb0f113614/ruff-0.15.5-py3-none-win_arm64.whl", hash = "sha256:b498d1c60d2fe5c10c45ec3f698901065772730b411f164ae270bb6bfcc4740b", size = 10965572, upload-time = "2026-03-05T20:06:16.984Z" }, +] + [[package]] name = "scikit-learn" version = "1.7.2" @@ -2238,6 +2219,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/db/2b/f7818f6ec88758dfd21da46b6cd46af9d1b3433e53ddbb19ad1e0da17f9b/torch-2.9.1-cp314-cp314t-win_amd64.whl", hash = "sha256:c88d3299ddeb2b35dcc31753305612db485ab6f1823e37fb29451c8b2732b87e", size = 111163659, upload-time = "2025-11-12T15:23:20.009Z" }, ] +[[package]] +name = "towncrier" +version = "25.8.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "click" }, + { name = "jinja2" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c2/eb/5bf25a34123698d3bbab39c5bc5375f8f8bcbcc5a136964ade66935b8b9d/towncrier-25.8.0.tar.gz", hash = "sha256:eef16d29f831ad57abb3ae32a0565739866219f1ebfbdd297d32894eb9940eb1", size = 76322, upload-time = "2025-08-30T11:41:55.393Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/42/06/8ba22ec32c74ac1be3baa26116e3c28bc0e76a5387476921d20b6fdade11/towncrier-25.8.0-py3-none-any.whl", hash = "sha256:b953d133d98f9aeae9084b56a3563fd2519dfc6ec33f61c9cd2c61ff243fb513", size = 65101, upload-time = "2025-08-30T11:41:53.644Z" }, +] + [[package]] name = "tqdm" version = "4.67.1" From d906ca6b84b5511483b2c66ee437a7e9033e2a39 Mon Sep 17 00:00:00 2001 From: Max Ghenis Date: Fri, 6 Mar 2026 08:33:37 -0500 Subject: [PATCH 2/2] Replace black with ruff in CI workflow files Co-Authored-By: Claude Opus 4.6 --- .github/workflows/pull_request.yaml | 4 ++-- .github/workflows/push.yaml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/pull_request.yaml b/.github/workflows/pull_request.yaml index e7be9e6f1..0b8e9d645 100644 --- a/.github/workflows/pull_request.yaml +++ b/.github/workflows/pull_request.yaml @@ -24,9 +24,9 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install black + pip install "ruff>=0.9.0" - name: Check formatting - run: black . -l 79 --check + run: ruff format --check . test: name: Test runs-on: ubuntu-latest diff --git a/.github/workflows/push.yaml b/.github/workflows/push.yaml index d4575eb6b..5ca271170 100644 --- a/.github/workflows/push.yaml +++ b/.github/workflows/push.yaml @@ -23,9 +23,9 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install black + pip install "ruff>=0.9.0" - name: Check formatting - run: black . -l 79 --check + run: ruff format --check . test: name: Build and test runs-on: ubuntu-latest