From 2359c8394501799fcab52fbbe200ed8248ef1bc8 Mon Sep 17 00:00:00 2001
From: PGijsbers <p.gijsbers@tue.nl>
Date: Sat, 11 Apr 2026 20:40:02 +0200
Subject: [PATCH 1/3] Be consistent about comparison order in asserts

---
 tests/config_test.py                          |  6 ++--
 tests/dependencies/fetch_user_test.py         |  4 +--
 .../openml/datasets_list_datasets_test.py     |  6 ++--
 .../routers/openml/datasets_qualities_test.py | 12 ++++----
 .../migration/datasets_migration_test.py      | 22 +++++++--------
 .../openml/migration/flows_migration_test.py  |  2 +-
 .../openml/migration/runs_migration_test.py   |  2 +-
 .../openml/migration/setups_migration_test.py | 28 +++++++++----------
 .../openml/migration/tasks_migration_test.py  |  2 +-
 tests/routers/openml/qualities_list_test.py   |  8 +++---
 tests/routers/openml/setups_tag_test.py       |  2 +-
 tests/routers/openml/setups_untag_test.py     |  2 +-
 tests/routers/openml/task_list_test.py        |  5 ++--
 13 files changed, 50 insertions(+), 51 deletions(-)

diff --git a/tests/config_test.py b/tests/config_test.py
index 287b128d..3218f802 100644
--- a/tests/config_test.py
+++ b/tests/config_test.py
@@ -9,21 +9,21 @@ def test_apply_defaults_to_siblings_applies_defaults() -> None:
     input_ = {"defaults": {1: 1}, "other": {}}
     expected = {"other": {1: 1}}
     output = _apply_defaults_to_siblings(input_)
-    assert expected == output
+    assert output == expected
 
 
 def test_apply_defaults_to_siblings_does_not_override() -> None:
     input_ = {"defaults": {1: 1}, "other": {1: 2}}
     expected = {"other": {1: 2}}
     output = _apply_defaults_to_siblings(input_)
-    assert expected == output
+    assert output == expected
 
 
 def test_apply_defaults_to_siblings_ignores_nontables() -> None:
     input_ = {"defaults": {1: 1}, "other": {1: 2}, "not-a-table": 3}
     expected = {"other": {1: 2}, "not-a-table": 3}
     output = _apply_defaults_to_siblings(input_)
-    assert expected == output
+    assert output == expected
 
 
 def test_load_configuration_adds_environment_variables(default_configuration_file: Path) -> None:
diff --git a/tests/dependencies/fetch_user_test.py b/tests/dependencies/fetch_user_test.py
index faae3d53..116bbdd9 100644
--- a/tests/dependencies/fetch_user_test.py
+++ b/tests/dependencies/fetch_user_test.py
@@ -21,8 +21,8 @@ async def test_fetch_user(api_key: str, user: User, user_test: AsyncConnection)
     async with aclosing(fetch_user(api_key, user_data=user_test)) as agen:
         db_user = await anext(agen)
     assert isinstance(db_user, User)
-    assert user.user_id == db_user.user_id
-    assert set(await user.get_groups()) == set(await db_user.get_groups())
+    assert db_user.user_id == user.user_id
+    assert set(await db_user.get_groups()) == set(await user.get_groups())
 
 
 async def test_fetch_user_no_key_no_user() -> None:
diff --git a/tests/routers/openml/datasets_list_datasets_test.py b/tests/routers/openml/datasets_list_datasets_test.py
index d8fb5735..a6abf6ca 100644
--- a/tests/routers/openml/datasets_list_datasets_test.py
+++ b/tests/routers/openml/datasets_list_datasets_test.py
@@ -105,8 +105,8 @@ async def test_list_data_identical(
 
     # PHP API has a double nested dictionary that never has other entries
     php_json = original.json()["data"]["dataset"]
-    assert len(php_json) == len(new_json)
-    assert php_json == new_json
+    assert len(new_json) == len(php_json)
+    assert new_json == php_json
     return None
 
 
@@ -341,6 +341,6 @@ async def test_list_data_quality(
         status=DatasetStatusFilter.ALL,
         user=None,
         expdb_db=expdb_test,
-        **{quality: range_},  # type: ignore[arg-type]
+        **{quality: range_},
     )
     assert len(result) == count
diff --git a/tests/routers/openml/datasets_qualities_test.py b/tests/routers/openml/datasets_qualities_test.py
index 841c320f..44e42834 100644
--- a/tests/routers/openml/datasets_qualities_test.py
+++ b/tests/routers/openml/datasets_qualities_test.py
@@ -119,7 +119,7 @@ async def test_get_quality(py_api: httpx.AsyncClient) -> None:
         {"name": "kNN1NErrRate", "value": 0.06347438752783964},
         {"name": "kNN1NKappa", "value": 0.8261102938928316},
     ]
-    difference = deepdiff.DeepDiff(expected, response.json(), ignore_order=True)
+    difference = deepdiff.DeepDiff(response.json(), expected, ignore_order=True)
     assert not difference
 
 
@@ -171,7 +171,7 @@ def _assert_get_quality_error_dataset_not_found(
     php_error = php_response.json()["error"]
     py_error = python_response.json()
 
-    assert php_error["code"] == py_error["code"]
+    assert py_error["code"] == php_error["code"]
     assert php_error["message"] == "Unknown dataset"
     assert re.match(r"Dataset with id \d+ not found.", py_error["detail"])
 
@@ -179,14 +179,14 @@ def _assert_get_quality_error_dataset_not_found(
 def _assert_get_quality_error_dataset_process_error(
     python_response: httpx.Response, php_response: httpx.Response
 ) -> None:
-    assert php_response.status_code == python_response.status_code
+    assert python_response.status_code == php_response.status_code
 
     php_error = php_response.json()["error"]
     py_error = python_response.json()
 
-    assert php_error["code"] == py_error["code"]
+    assert py_error["code"] == php_error["code"]
     assert php_error["message"] == "Dataset processed with error"
     assert py_error["title"] == "Dataset Processing Error"
     # The PHP can add some additional unnecessary escapes.
-    assert php_error["additional_information"][:30] == py_error["detail"][:30]
-    assert php_error["additional_information"][-30:] == py_error["detail"][-30:]
+    assert py_error["detail"][:30] == php_error["additional_information"][:30]
+    assert py_error["detail"][-30:] == php_error["additional_information"][-30:]
diff --git a/tests/routers/openml/migration/datasets_migration_test.py b/tests/routers/openml/migration/datasets_migration_test.py
index 73874a39..6feecd1f 100644
--- a/tests/routers/openml/migration/datasets_migration_test.py
+++ b/tests/routers/openml/migration/datasets_migration_test.py
@@ -28,13 +28,13 @@ async def test_dataset_response_is_identical(  # noqa: C901, PLR0912
     if new.status_code == HTTPStatus.FORBIDDEN:
         assert original.status_code == HTTPStatus.PRECONDITION_FAILED
     else:
-        assert original.status_code == new.status_code
+        assert new.status_code == original.status_code
 
     if new.status_code != HTTPStatus.OK:
         # RFC 9457: Python API now returns problem+json format
         assert new.headers["content-type"] == "application/problem+json"
         # Both APIs should return error responses in the same cases
-        assert original.json()["error"]["code"] == new.json()["code"]
+        assert new.json()["code"] == original.json()["error"]["code"]
         old_error_message = original.json()["error"]["message"]
         assert new.json()["detail"].startswith(old_error_message)
         return
@@ -95,7 +95,7 @@ async def test_dataset_response_is_identical(  # noqa: C901, PLR0912
     if "description" not in new_body:
         new_body["description"] = []
 
-    assert original_json == new_body
+    assert new_body == original_json
 
 
 @pytest.mark.parametrize(
@@ -146,7 +146,7 @@ async def test_private_dataset_owner_access(
         php_api.get(f"/data/{private_dataset}?api_key={api_key}"),
     )
     assert old_response.status_code == HTTPStatus.OK
-    assert old_response.status_code == new_response.status_code
+    assert new_response.status_code == old_response.status_code
     assert new_response.json()["id"] == private_dataset
 
 
@@ -201,7 +201,7 @@ async def test_dataset_tag_response_is_identical(
     # RFC 9457: Tag conflict now returns 409 instead of 500
     if original.status_code == HTTPStatus.INTERNAL_SERVER_ERROR and already_tagged:
         assert new.status_code == HTTPStatus.CONFLICT
-        assert original.json()["error"]["code"] == new.json()["code"]
+        assert new.json()["code"] == original.json()["error"]["code"]
         assert original.json()["error"]["message"] == "Entity already tagged by this tag."
         assert re.match(
             pattern=r"Dataset \d+ already tagged with " + f"'{tag}'.",
@@ -209,16 +209,16 @@ async def test_dataset_tag_response_is_identical(
         )
         return
 
-    assert original.status_code == new.status_code, original.json()
+    assert new.status_code == original.status_code, original.json()
     if new.status_code != HTTPStatus.OK:
-        assert original.json()["error"]["code"] == new.json()["code"]
-        assert original.json()["error"]["message"] == new.json()["detail"]
+        assert new.json()["code"] == original.json()["error"]["code"]
+        assert new.json()["detail"] == original.json()["error"]["message"]
         return
 
     original = original.json()
     new = new.json()
     new = nested_remove_single_element_list(new)
-    assert original == new
+    assert new == original
 
 
 @pytest.mark.parametrize(
@@ -238,12 +238,12 @@ async def test_datasets_feature_is_identical(
 
     if new.status_code != HTTPStatus.OK:
         error = original.json()["error"]
-        assert error["code"] == new.json()["code"]
+        assert new.json()["code"] == error["code"]
         if error["message"] == "No features found. Additionally, dataset processed with error":
             pattern = r"No features found. Additionally, dataset \d+ processed with error\."
             assert re.match(pattern, new.json()["detail"])
         else:
-            assert error["message"] == new.json()["detail"]
+            assert new.json()["detail"] == error["message"]
         return
 
     python_body = new.json()
diff --git a/tests/routers/openml/migration/flows_migration_test.py b/tests/routers/openml/migration/flows_migration_test.py
index 0061c34e..6636dd46 100644
--- a/tests/routers/openml/migration/flows_migration_test.py
+++ b/tests/routers/openml/migration/flows_migration_test.py
@@ -94,8 +94,8 @@ def convert_flow_naming_and_defaults(flow: dict[str, Any]) -> dict[str, Any]:
     # differences (e.g., '1.0' vs '1')
     expected = nested_str_to_num(expected)
     difference = deepdiff.diff.DeepDiff(
-        expected,
         new,
+        expected,
         ignore_order=True,
         ignore_numeric_type_changes=True,
     )
diff --git a/tests/routers/openml/migration/runs_migration_test.py b/tests/routers/openml/migration/runs_migration_test.py
index dff9f60b..590cc23b 100644
--- a/tests/routers/openml/migration/runs_migration_test.py
+++ b/tests/routers/openml/migration/runs_migration_test.py
@@ -33,7 +33,7 @@ async def test_get_run_trace_equal(
 
     php_error = php_response.json()["error"]
     py_error = py_response.json()
-    assert php_error["code"] == py_error["code"]
+    assert py_error["code"] == php_error["code"]
     if php_error["code"] == "571":
         assert php_error["message"] == "Run not found."
         assert py_error["detail"] == f"Run {run_id} not found."
diff --git a/tests/routers/openml/migration/setups_migration_test.py b/tests/routers/openml/migration/setups_migration_test.py
index 37c8bc1a..b4b1f110 100644
--- a/tests/routers/openml/migration/setups_migration_test.py
+++ b/tests/routers/openml/migration/setups_migration_test.py
@@ -85,15 +85,15 @@ async def test_setup_untag_response_is_identical_when_tag_exists(
         )
 
     if new.status_code == HTTPStatus.OK:
-        assert original.status_code == new.status_code
+        assert new.status_code == original.status_code
         original_untag = original.json()["setup_untag"]
         new_untag = new.json()["setup_untag"]
-        assert original_untag["id"] == new_untag["id"]
+        assert new_untag["id"] == original_untag["id"]
         if tags := original_untag.get("tag"):
             if isinstance(tags, str):
-                assert tags == new_untag["tag"][0]
+                assert new_untag["tag"][0] == tags
             else:
-                assert tags == new_untag["tag"]
+                assert new_untag["tag"] == tags
         else:
             assert new_untag["tag"] == []
         return
@@ -101,7 +101,7 @@ async def test_setup_untag_response_is_identical_when_tag_exists(
     code, message = original.json()["error"].values()
     assert original.status_code == HTTPStatus.PRECONDITION_FAILED
     assert new.status_code == HTTPStatus.FORBIDDEN
-    assert code == new.json()["code"]
+    assert new.json()["code"] == code
     assert message == "Tag is not owned by you"
     assert re.match(
         r"You may not remove tag \S+ of setup \d+ because it was not created by you.",
@@ -131,7 +131,7 @@ async def test_setup_untag_response_is_identical_setup_doesnt_exist(
     assert original.status_code == HTTPStatus.PRECONDITION_FAILED
     assert new.status_code == HTTPStatus.NOT_FOUND
     assert original.json()["error"]["message"] == "Entity not found."
-    assert original.json()["error"]["code"] == new.json()["code"]
+    assert new.json()["code"] == original.json()["error"]["code"]
     assert re.match(
         r"Setup \d+ not found.",
         new.json()["detail"],
@@ -159,7 +159,7 @@ async def test_setup_untag_response_is_identical_tag_doesnt_exist(
 
     assert original.status_code == HTTPStatus.PRECONDITION_FAILED
     assert new.status_code == HTTPStatus.NOT_FOUND
-    assert original.json()["error"]["code"] == new.json()["code"]
+    assert new.json()["code"] == original.json()["error"]["code"]
     assert original.json()["error"]["message"] == "Tag not found."
     assert re.match(
         r"Setup \d+ does not have tag '\S+'.",
@@ -208,15 +208,15 @@ async def test_setup_tag_response_is_identical_when_tag_doesnt_exist(  # noqa: P
         )
 
     assert new.status_code == HTTPStatus.OK
-    assert original.status_code == new.status_code
+    assert new.status_code == original.status_code
     original_tag = original.json()["setup_tag"]
     new_tag = new.json()["setup_tag"]
-    assert original_tag["id"] == new_tag["id"]
+    assert new_tag["id"] == original_tag["id"]
     if tags := original_tag.get("tag"):
         if isinstance(tags, str):
-            assert tags == new_tag["tag"][0]
+            assert new_tag["tag"][0] == tags
         else:
-            assert set(tags) == set(new_tag["tag"])
+            assert set(new_tag["tag"]) == set(tags)
     else:
         assert new_tag["tag"] == []
 
@@ -243,7 +243,7 @@ async def test_setup_tag_response_is_identical_setup_doesnt_exist(
     assert original.status_code == HTTPStatus.PRECONDITION_FAILED
     assert new.status_code == HTTPStatus.NOT_FOUND
     assert original.json()["error"]["message"] == "Entity not found."
-    assert original.json()["error"]["code"] == new.json()["code"]
+    assert new.json()["code"] == original.json()["error"]["code"]
     assert re.match(
         r"Setup \d+ not found.",
         new.json()["detail"],
@@ -293,7 +293,7 @@ async def test_get_setup_response_is_identical_setup_doesnt_exist(
     assert original.status_code == HTTPStatus.PRECONDITION_FAILED
     assert new.status_code == HTTPStatus.NOT_FOUND
     assert original.json()["error"]["message"] == "Unknown setup"
-    assert original.json()["error"]["code"] == new.json()["code"]
+    assert new.json()["code"] == original.json()["error"]["code"]
     assert new.json()["detail"] == f"Setup {setup_id} not found."
 
 
@@ -326,4 +326,4 @@ async def test_get_setup_response_is_identical(
     new_json = nested_str_to_num(new.json())
     new_json = nested_remove_values(new_json, values=[[], None])
 
-    assert original_json == new_json
+    assert new_json == original_json
diff --git a/tests/routers/openml/migration/tasks_migration_test.py b/tests/routers/openml/migration/tasks_migration_test.py
index 8957b78a..2aab7483 100644
--- a/tests/routers/openml/migration/tasks_migration_test.py
+++ b/tests/routers/openml/migration/tasks_migration_test.py
@@ -168,7 +168,7 @@ async def test_list_tasks_equal(
     php_ids = {int(t["task_id"]) for t in php_tasks}
     py_ids = {int(t["task_id"]) for t in py_tasks}
 
-    assert php_ids == py_ids, (
+    assert py_ids == php_ids, (
         f"PHP and Python must return the exact same task IDs: {php_ids ^ py_ids}"
     )
 
diff --git a/tests/routers/openml/qualities_list_test.py b/tests/routers/openml/qualities_list_test.py
index 6ca21ec0..1f12ceab 100644
--- a/tests/routers/openml/qualities_list_test.py
+++ b/tests/routers/openml/qualities_list_test.py
@@ -35,8 +35,8 @@ async def test_list_qualities_identical(
         py_api.get("/datasets/qualities/list"),
         php_api.get("/data/qualities/list"),
     )
-    assert original.status_code == new.status_code
-    assert original.json() == new.json()
+    assert new.status_code == original.status_code
+    assert new.json() == original.json()
     # To keep the test idempotent, we cannot test if reaction to database changes is identical
 
 
@@ -157,11 +157,11 @@ async def test_list_qualities(py_api: httpx.AsyncClient, expdb_test: AsyncConnec
             ],
         },
     }
-    assert expected == response.json()
+    assert response.json() == expected
 
     deleted = expected["data_qualities_list"]["quality"].pop()
     await _remove_quality_from_database(quality_name=deleted, expdb_test=expdb_test)
 
     response = await py_api.get("/datasets/qualities/list")
     assert response.status_code == HTTPStatus.OK
-    assert expected == response.json()
+    assert response.json() == expected
diff --git a/tests/routers/openml/setups_tag_test.py b/tests/routers/openml/setups_tag_test.py
index 12011a40..db629bc3 100644
--- a/tests/routers/openml/setups_tag_test.py
+++ b/tests/routers/openml/setups_tag_test.py
@@ -29,7 +29,7 @@ async def test_setup_tag_api_success(
 
     assert response.status_code == HTTPStatus.OK
     expected = {"setup_tag": {"id": "1", "tag": ["setup_tag_via_http"]}}
-    assert expected == response.json()
+    assert response.json() == expected
 
     rows = await expdb_test.execute(
         text("SELECT * FROM setup_tag WHERE id = 1 AND tag = :tag"),
diff --git a/tests/routers/openml/setups_untag_test.py b/tests/routers/openml/setups_untag_test.py
index 3adc37f0..b96671e5 100644
--- a/tests/routers/openml/setups_untag_test.py
+++ b/tests/routers/openml/setups_untag_test.py
@@ -34,7 +34,7 @@ async def test_setup_untag_api_success(
 
     assert response.status_code == HTTPStatus.OK
     expected = {"setup_untag": {"id": "1", "tag": []}}
-    assert expected == response.json()
+    assert response.json() == expected
 
     rows = await expdb_test.execute(
         text("SELECT * FROM setup_tag WHERE id = 1 AND tag = :tag"),
diff --git a/tests/routers/openml/task_list_test.py b/tests/routers/openml/task_list_test.py
index 0136d201..46679675 100644
--- a/tests/routers/openml/task_list_test.py
+++ b/tests/routers/openml/task_list_test.py
@@ -299,9 +299,8 @@ async def test_list_tasks_quality_values_are_strings(expdb_test: AsyncConnection
     """Quality values must be strings (to match PHP API behaviour)."""
     tasks = await list_tasks(pagination=Pagination(limit=5, offset=0), expdb=expdb_test)
     assert any(task["quality"] for task in tasks), "Expected at least one task to have qualities"
-    for task in tasks:
-        for quality in task["quality"]:
-            assert isinstance(quality["value"], str)
+    qualities = [quality for task in tasks for quality in task["quality"]]
+    assert all(isinstance(quality["value"], str) for quality in qualities)
 
 
 @pytest.mark.parametrize(

From 6ee1ded17d1ecad361e160750b9fd28af7665bfd Mon Sep 17 00:00:00 2001
From: PGijsbers <p.gijsbers@tue.nl>
Date: Sat, 11 Apr 2026 20:54:34 +0200
Subject: [PATCH 2/3] Be consistent about naming response variables

---
 tests/routers/openml/dataset_tag_test.py      |   6 +-
 .../openml/datasets_list_datasets_test.py     |  24 ++--
 .../routers/openml/datasets_qualities_test.py |  26 ++--
 .../migration/datasets_migration_test.py      | 132 ++++++++---------
 .../migration/evaluations_migration_test.py   |  16 +--
 .../openml/migration/flows_migration_test.py  |  20 +--
 .../openml/migration/runs_migration_test.py   |  10 +-
 .../openml/migration/setups_migration_test.py | 134 +++++++++---------
 .../migration/studies_migration_test.py       |  32 ++---
 .../openml/migration/tasks_migration_test.py  |  28 ++--
 tests/routers/openml/qualities_list_test.py   |   6 +-
 tests/routers/openml/study_post_test.py       |   6 +-
 tests/routers/openml/task_type_get_test.py    |   8 +-
 tests/routers/openml/task_type_list_test.py   |   6 +-
 14 files changed, 227 insertions(+), 227 deletions(-)

diff --git a/tests/routers/openml/dataset_tag_test.py b/tests/routers/openml/dataset_tag_test.py
index 41746f83..cddd0d84 100644
--- a/tests/routers/openml/dataset_tag_test.py
+++ b/tests/routers/openml/dataset_tag_test.py
@@ -36,13 +36,13 @@ async def test_dataset_tag_invalid_tag_is_rejected(
     tag: str,
     py_api: httpx.AsyncClient,
 ) -> None:
-    new = await py_api.post(
+    response = await py_api.post(
         f"/datasets/tag?api_key={ApiKey.ADMIN}",
         json={"data_id": 1, "tag": tag},
     )
 
-    assert new.status_code == HTTPStatus.UNPROCESSABLE_ENTITY
-    assert new.json()["detail"][0]["loc"] == ["body", "tag"]
+    assert response.status_code == HTTPStatus.UNPROCESSABLE_ENTITY
+    assert response.json()["detail"][0]["loc"] == ["body", "tag"]
 
 
 # ── Direct call tests: tag_dataset ──
diff --git a/tests/routers/openml/datasets_list_datasets_test.py b/tests/routers/openml/datasets_list_datasets_test.py
index a6abf6ca..5086ac50 100644
--- a/tests/routers/openml/datasets_list_datasets_test.py
+++ b/tests/routers/openml/datasets_list_datasets_test.py
@@ -74,39 +74,39 @@ async def test_list_data_identical(
         uri += f"/{'/'.join([str(v) for q in query for v in q])}"
     uri += api_key_query
 
-    new, original = await asyncio.gather(
+    py_response, php_response = await asyncio.gather(
         py_api.post(f"/datasets/list{api_key_query}", json=new_style),
         php_api.get(uri),
     )
 
     # Note: RFC 9457 changed some status codes (PRECONDITION_FAILED -> NOT_FOUND for no results)
     # and the error response format, so we can't compare error responses directly.
-    php_is_error = original.status_code == HTTPStatus.PRECONDITION_FAILED
-    py_is_error = new.status_code == HTTPStatus.NOT_FOUND
+    php_is_error = php_response.status_code == HTTPStatus.PRECONDITION_FAILED
+    py_is_error = py_response.status_code == HTTPStatus.NOT_FOUND
 
     if php_is_error or py_is_error:
         # Both should be errors in the same cases
         assert php_is_error == py_is_error, (
-            f"PHP status={original.status_code}, Python status={new.status_code}"
+            f"PHP status={php_response.status_code}, Python status={py_response.status_code}"
         )
         # Verify Python API returns RFC 9457 format
-        assert new.headers["content-type"] == "application/problem+json"
-        error = new.json()
+        assert py_response.headers["content-type"] == "application/problem+json"
+        error = py_response.json()
         assert error["type"] == NoResultsError.uri
         assert error["code"] == "372"
-        assert original.json()["error"]["message"] == "No results"
+        assert php_response.json()["error"]["message"] == "No results"
         assert error["detail"] == "No datasets match the search criteria."
         return None
-    new_json = new.json()
+    py_json = py_response.json()
     # Qualities in new response are typed
-    for dataset in new_json:
+    for dataset in py_json:
         for quality in dataset["quality"]:
             quality["value"] = str(quality["value"])
 
     # PHP API has a double nested dictionary that never has other entries
-    php_json = original.json()["data"]["dataset"]
-    assert len(new_json) == len(php_json)
-    assert new_json == php_json
+    php_json = php_response.json()["data"]["dataset"]
+    assert len(py_json) == len(php_json)
+    assert py_json == php_json
     return None
 
 
diff --git a/tests/routers/openml/datasets_qualities_test.py b/tests/routers/openml/datasets_qualities_test.py
index 44e42834..fb3559ce 100644
--- a/tests/routers/openml/datasets_qualities_test.py
+++ b/tests/routers/openml/datasets_qualities_test.py
@@ -130,28 +130,28 @@ async def test_get_quality(py_api: httpx.AsyncClient) -> None:
 async def test_get_quality_identical(
     data_id: int, py_api: httpx.AsyncClient, php_api: httpx.AsyncClient
 ) -> None:
-    python_response, php_response = await asyncio.gather(
+    py_response, php_response = await asyncio.gather(
         py_api.get(f"/datasets/qualities/{data_id}"),
         php_api.get(f"/data/qualities/{data_id}"),
     )
     if php_response.status_code == HTTPStatus.OK:
-        _assert_get_quality_success_equal(python_response, php_response)
+        _assert_get_quality_success_equal(py_response, php_response)
         return
 
     php_error_code = int(php_response.json()["error"]["code"])
     if php_error_code == 361:  # noqa: PLR2004
-        _assert_get_quality_error_dataset_not_found(python_response, php_response)
+        _assert_get_quality_error_dataset_not_found(py_response, php_response)
     elif php_error_code == 364:  # noqa: PLR2004
-        _assert_get_quality_error_dataset_process_error(python_response, php_response)
+        _assert_get_quality_error_dataset_process_error(py_response, php_response)
     else:
         msg = f"Dataset {data_id} response not under test:", php_response.json()
         raise AssertionError(msg)
 
 
 def _assert_get_quality_success_equal(
-    python_response: httpx.Response, php_response: httpx.Response
+    py_response: httpx.Response, php_response: httpx.Response
 ) -> None:
-    assert python_response.status_code == php_response.status_code
+    assert py_response.status_code == php_response.status_code
     expected = [
         {
             "name": quality["name"],
@@ -159,17 +159,17 @@ def _assert_get_quality_success_equal(
         }
         for quality in php_response.json()["data_qualities"]["quality"]
     ]
-    assert python_response.json() == expected
+    assert py_response.json() == expected
 
 
 def _assert_get_quality_error_dataset_not_found(
-    python_response: httpx.Response, php_response: httpx.Response
+    py_response: httpx.Response, php_response: httpx.Response
 ) -> None:
     assert php_response.status_code == HTTPStatus.PRECONDITION_FAILED
-    assert python_response.status_code == HTTPStatus.NOT_FOUND
+    assert py_response.status_code == HTTPStatus.NOT_FOUND
 
     php_error = php_response.json()["error"]
-    py_error = python_response.json()
+    py_error = py_response.json()
 
     assert py_error["code"] == php_error["code"]
     assert php_error["message"] == "Unknown dataset"
@@ -177,12 +177,12 @@ def _assert_get_quality_error_dataset_not_found(
 
 
 def _assert_get_quality_error_dataset_process_error(
-    python_response: httpx.Response, php_response: httpx.Response
+    py_response: httpx.Response, php_response: httpx.Response
 ) -> None:
-    assert python_response.status_code == php_response.status_code
+    assert py_response.status_code == php_response.status_code
 
     php_error = php_response.json()["error"]
-    py_error = python_response.json()
+    py_error = py_response.json()
 
     assert py_error["code"] == php_error["code"]
     assert php_error["message"] == "Dataset processed with error"
diff --git a/tests/routers/openml/migration/datasets_migration_test.py b/tests/routers/openml/migration/datasets_migration_test.py
index 6feecd1f..718c06fb 100644
--- a/tests/routers/openml/migration/datasets_migration_test.py
+++ b/tests/routers/openml/migration/datasets_migration_test.py
@@ -20,39 +20,39 @@ async def test_dataset_response_is_identical(  # noqa: C901, PLR0912
     py_api: httpx.AsyncClient,
     php_api: httpx.AsyncClient,
 ) -> None:
-    new, original = await asyncio.gather(
+    py_response, php_response = await asyncio.gather(
         py_api.get(f"/datasets/{dataset_id}"),
         php_api.get(f"/data/{dataset_id}"),
     )
 
-    if new.status_code == HTTPStatus.FORBIDDEN:
-        assert original.status_code == HTTPStatus.PRECONDITION_FAILED
+    if py_response.status_code == HTTPStatus.FORBIDDEN:
+        assert php_response.status_code == HTTPStatus.PRECONDITION_FAILED
     else:
-        assert new.status_code == original.status_code
+        assert py_response.status_code == php_response.status_code
 
-    if new.status_code != HTTPStatus.OK:
+    if py_response.status_code != HTTPStatus.OK:
         # RFC 9457: Python API now returns problem+json format
-        assert new.headers["content-type"] == "application/problem+json"
+        assert py_response.headers["content-type"] == "application/problem+json"
         # Both APIs should return error responses in the same cases
-        assert new.json()["code"] == original.json()["error"]["code"]
-        old_error_message = original.json()["error"]["message"]
-        assert new.json()["detail"].startswith(old_error_message)
+        assert py_response.json()["code"] == php_response.json()["error"]["code"]
+        old_error_message = php_response.json()["error"]["message"]
+        assert py_response.json()["detail"].startswith(old_error_message)
         return
 
     try:
-        original_json = original.json()["data_set_description"]
+        php_json = php_response.json()["data_set_description"]
     except json.decoder.JSONDecodeError:
         pytest.skip("A PHP error occurred on the test server.")
 
-    if "div" in original_json:
+    if "div" in php_json:
         pytest.skip("A PHP error occurred on the test server.")
 
     # There are a few changes between the old API and the new API, so we convert here:
     # The new API has normalized `format` field:
-    original_json["format"] = original_json["format"].lower()
+    php_json["format"] = php_json["format"].lower()
 
     # Pydantic HttpURL serialization omits port 80 for HTTP urls.
-    original_json["url"] = original_json["url"].replace(":80", "")
+    php_json["url"] = php_json["url"].replace(":80", "")
 
     # There is odd behavior in the live server that I don't want to recreate:
     # when the creator is a list of csv names, it can either be a str or a list
@@ -60,42 +60,42 @@ async def test_dataset_response_is_identical(  # noqa: C901, PLR0912
     # '"Alice", "Bob"' -> ["Alice", "Bob"]
     # 'Alice, Bob' -> 'Alice, Bob'
     if (
-        "creator" in original_json
-        and isinstance(original_json["creator"], str)
-        and len(original_json["creator"].split(",")) > 1
+        "creator" in php_json
+        and isinstance(php_json["creator"], str)
+        and len(php_json["creator"].split(",")) > 1
     ):
-        original_json["creator"] = [name.strip() for name in original_json["creator"].split(",")]
+        php_json["creator"] = [name.strip() for name in php_json["creator"].split(",")]
 
-    new_body = new.json()
-    if processing_data := new_body.get("processing_date"):
-        new_body["processing_date"] = str(processing_data).replace("T", " ")
+    py_json = py_response.json()
+    if processing_data := py_json.get("processing_date"):
+        py_json["processing_date"] = str(processing_data).replace("T", " ")
 
     manual = []
     # ref test.openml.org/d/33 (contributor) and d/34 (creator)
     #   contributor/creator in database is '""'
     #   json content is []
     for field in ["contributor", "creator"]:
-        if new_body[field] == [""]:
-            new_body[field] = []
+        if py_json[field] == [""]:
+            py_json[field] = []
             manual.append(field)
 
-    if isinstance(new_body["original_data_url"], list):
-        new_body["original_data_url"] = ", ".join(str(url) for url in new_body["original_data_url"])
+    if isinstance(py_json["original_data_url"], list):
+        py_json["original_data_url"] = ", ".join(str(url) for url in py_json["original_data_url"])
 
-    for field, value in list(new_body.items()):
+    for field, value in list(py_json.items()):
         if field in manual:
             continue
         if isinstance(value, int):
-            new_body[field] = str(value)
+            py_json[field] = str(value)
         elif isinstance(value, list) and len(value) == 1:
-            new_body[field] = str(value[0])
-        if not new_body[field]:
-            del new_body[field]
+            py_json[field] = str(value[0])
+        if not py_json[field]:
+            del py_json[field]
 
-    if "description" not in new_body:
-        new_body["description"] = []
+    if "description" not in py_json:
+        py_json["description"] = []
 
-    assert new_body == original_json
+    assert py_json == php_json
 
 
 @pytest.mark.parametrize(
@@ -141,13 +141,13 @@ async def test_private_dataset_owner_access(
     api_key: str,
 ) -> None:
     [private_dataset] = tests.constants.PRIVATE_DATASET_ID
-    new_response, old_response = await asyncio.gather(
+    py_response, php_response = await asyncio.gather(
         py_api.get(f"/datasets/{private_dataset}?api_key={api_key}"),
         php_api.get(f"/data/{private_dataset}?api_key={api_key}"),
     )
-    assert old_response.status_code == HTTPStatus.OK
-    assert new_response.status_code == old_response.status_code
-    assert new_response.json()["id"] == private_dataset
+    assert php_response.status_code == HTTPStatus.OK
+    assert py_response.status_code == php_response.status_code
+    assert py_response.json()["id"] == private_dataset
 
 
 @pytest.mark.mut
@@ -173,13 +173,13 @@ async def test_dataset_tag_response_is_identical(
     php_api: httpx.AsyncClient,
 ) -> None:
     # PHP request must happen first to check state, can't parallelize
-    original = await php_api.post(
+    php_response = await php_api.post(
         "/data/tag",
         data={"api_key": api_key, "tag": tag, "data_id": dataset_id},
     )
     already_tagged = (
-        original.status_code == HTTPStatus.INTERNAL_SERVER_ERROR
-        and "already tagged" in original.json()["error"]["message"]
+        php_response.status_code == HTTPStatus.INTERNAL_SERVER_ERROR
+        and "already tagged" in php_response.json()["error"]["message"]
     )
     if not already_tagged:
         # undo the tag, because we don't want to persist this change to the database
@@ -189,36 +189,36 @@ async def test_dataset_tag_response_is_identical(
             data={"api_key": api_key, "tag": tag, "data_id": dataset_id},
         )
     if (
-        original.status_code != HTTPStatus.OK
-        and original.json()["error"]["message"] == "An Elastic Search Exception occured."
+        php_response.status_code != HTTPStatus.OK
+        and php_response.json()["error"]["message"] == "An Elastic Search Exception occured."
     ):
         pytest.skip("Encountered Elastic Search error.")
-    new = await py_api.post(
+    py_response = await py_api.post(
         f"/datasets/tag?api_key={api_key}",
         json={"data_id": dataset_id, "tag": tag},
     )
 
     # RFC 9457: Tag conflict now returns 409 instead of 500
-    if original.status_code == HTTPStatus.INTERNAL_SERVER_ERROR and already_tagged:
-        assert new.status_code == HTTPStatus.CONFLICT
-        assert new.json()["code"] == original.json()["error"]["code"]
-        assert original.json()["error"]["message"] == "Entity already tagged by this tag."
+    if php_response.status_code == HTTPStatus.INTERNAL_SERVER_ERROR and already_tagged:
+        assert py_response.status_code == HTTPStatus.CONFLICT
+        assert py_response.json()["code"] == php_response.json()["error"]["code"]
+        assert php_response.json()["error"]["message"] == "Entity already tagged by this tag."
         assert re.match(
             pattern=r"Dataset \d+ already tagged with " + f"'{tag}'.",
-            string=new.json()["detail"],
+            string=py_response.json()["detail"],
         )
         return
 
-    assert new.status_code == original.status_code, original.json()
-    if new.status_code != HTTPStatus.OK:
-        assert new.json()["code"] == original.json()["error"]["code"]
-        assert new.json()["detail"] == original.json()["error"]["message"]
+    assert py_response.status_code == php_response.status_code, php_response.json()
+    if py_response.status_code != HTTPStatus.OK:
+        assert py_response.json()["code"] == php_response.json()["error"]["code"]
+        assert py_response.json()["detail"] == php_response.json()["error"]["message"]
         return
 
-    original = original.json()
-    new = new.json()
-    new = nested_remove_single_element_list(new)
-    assert new == original
+    php_json = php_response.json()
+    py_json = py_response.json()
+    py_json = nested_remove_single_element_list(py_json)
+    assert py_json == php_json
 
 
 @pytest.mark.parametrize(
@@ -230,24 +230,24 @@ async def test_datasets_feature_is_identical(
     py_api: httpx.AsyncClient,
     php_api: httpx.AsyncClient,
 ) -> None:
-    new, original = await asyncio.gather(
+    py_response, php_response = await asyncio.gather(
         py_api.get(f"/datasets/features/{data_id}"),
         php_api.get(f"/data/features/{data_id}"),
     )
-    assert new.status_code == original.status_code
+    assert py_response.status_code == php_response.status_code
 
-    if new.status_code != HTTPStatus.OK:
-        error = original.json()["error"]
-        assert new.json()["code"] == error["code"]
+    if py_response.status_code != HTTPStatus.OK:
+        error = php_response.json()["error"]
+        assert py_response.json()["code"] == error["code"]
         if error["message"] == "No features found. Additionally, dataset processed with error":
             pattern = r"No features found. Additionally, dataset \d+ processed with error\."
-            assert re.match(pattern, new.json()["detail"])
+            assert re.match(pattern, py_response.json()["detail"])
         else:
-            assert new.json()["detail"] == error["message"]
+            assert py_response.json()["detail"] == error["message"]
         return
 
-    python_body = new.json()
-    for feature in python_body:
+    py_json = py_response.json()
+    for feature in py_json:
         for key, value in list(feature.items()):
             if key == "nominal_values":
                 # The old API uses `nominal_value` instead of `nominal_values`
@@ -261,5 +261,5 @@ async def test_datasets_feature_is_identical(
             else:
                 # The old API formats bool as string in lower-case
                 feature[key] = str(value) if not isinstance(value, bool) else str(value).lower()
-    original_features = original.json()["data_features"]["feature"]
-    assert python_body == original_features
+    php_features = php_response.json()["data_features"]["feature"]
+    assert py_json == php_features
diff --git a/tests/routers/openml/migration/evaluations_migration_test.py b/tests/routers/openml/migration/evaluations_migration_test.py
index 074284fa..08fb75fb 100644
--- a/tests/routers/openml/migration/evaluations_migration_test.py
+++ b/tests/routers/openml/migration/evaluations_migration_test.py
@@ -7,29 +7,29 @@
 async def test_evaluationmeasure_list(
     py_api: httpx.AsyncClient, php_api: httpx.AsyncClient
 ) -> None:
-    new, original = await asyncio.gather(
+    py_response, php_response = await asyncio.gather(
         py_api.get("/evaluationmeasure/list"),
         php_api.get("/evaluationmeasure/list"),
     )
-    assert new.status_code == original.status_code
-    assert new.json() == original.json()["evaluation_measures"]["measures"]["measure"]
+    assert py_response.status_code == php_response.status_code
+    assert py_response.json() == php_response.json()["evaluation_measures"]["measures"]["measure"]
 
 
 async def test_estimation_procedure_list(
     py_api: httpx.AsyncClient, php_api: httpx.AsyncClient
 ) -> None:
-    new, original = await asyncio.gather(
+    py_response, php_response = await asyncio.gather(
         py_api.get("/estimationprocedure/list"),
         php_api.get("/estimationprocedure/list"),
     )
-    assert new.status_code == original.status_code
-    expected = original.json()["estimationprocedures"]["estimationprocedure"]
+    assert py_response.status_code == php_response.status_code
+    expected = php_response.json()["estimationprocedures"]["estimationprocedure"]
 
-    def new_to_old(procedure: dict[str, Any]) -> dict[str, Any]:
+    def py_to_php(procedure: dict[str, Any]) -> dict[str, Any]:
         procedure = {k: str(v) for k, v in procedure.items()}
         if "stratified_sampling" in procedure:
             procedure["stratified_sampling"] = procedure["stratified_sampling"].lower()
         procedure["ttid"] = procedure.pop("task_type_id")
         return procedure
 
-    assert [new_to_old(procedure) for procedure in new.json()] == expected
+    assert [py_to_php(procedure) for procedure in py_response.json()] == expected
diff --git a/tests/routers/openml/migration/flows_migration_test.py b/tests/routers/openml/migration/flows_migration_test.py
index 6636dd46..2ef7da19 100644
--- a/tests/routers/openml/migration/flows_migration_test.py
+++ b/tests/routers/openml/migration/flows_migration_test.py
@@ -62,13 +62,13 @@ async def test_flow_exists(
 async def test_get_flow_equal(
     flow_id: int, py_api: httpx.AsyncClient, php_api: httpx.AsyncClient
 ) -> None:
-    response, php_response = await asyncio.gather(
+    py_response, php_response = await asyncio.gather(
         py_api.get(f"/flows/{flow_id}"),
         php_api.get(f"/flow/{flow_id}"),
     )
-    assert response.status_code == HTTPStatus.OK
+    assert py_response.status_code == HTTPStatus.OK
 
-    new = response.json()
+    py_json = py_response.json()
 
     # PHP sets parameter default value to [], None is more appropriate, omission is considered
     # Similar for the default "identifier" of subflows.
@@ -86,16 +86,16 @@ def convert_flow_naming_and_defaults(flow: dict[str, Any]) -> dict[str, Any]:
             flow.pop("component")
         return flow
 
-    new = convert_flow_naming_and_defaults(new)
-    new = nested_remove_single_element_list(new)
+    py_json = convert_flow_naming_and_defaults(py_json)
+    py_json = nested_remove_single_element_list(py_json)
 
-    expected = php_response.json()["flow"]
-    # The reason we don't transform "new" to str is that it becomes harder to ignore numeric type
+    php_json = php_response.json()["flow"]
+    # The reason we don't transform py_json to str is that it becomes harder to ignore numeric type
     # differences (e.g., '1.0' vs '1')
-    expected = nested_str_to_num(expected)
+    php_json = nested_str_to_num(php_json)
     difference = deepdiff.diff.DeepDiff(
-        new,
-        expected,
+        py_json,
+        php_json,
         ignore_order=True,
         ignore_numeric_type_changes=True,
     )
diff --git a/tests/routers/openml/migration/runs_migration_test.py b/tests/routers/openml/migration/runs_migration_test.py
index 590cc23b..826aa189 100644
--- a/tests/routers/openml/migration/runs_migration_test.py
+++ b/tests/routers/openml/migration/runs_migration_test.py
@@ -51,16 +51,16 @@ def _assert_trace_response_success(
     assert py_response.status_code == HTTPStatus.OK
     assert php_response.status_code == HTTPStatus.OK
 
-    new_json = py_response.json()
+    py_json = py_response.json()
 
     # PHP nests response under "trace" key — match that structure
-    new_json = {"trace": new_json}
+    py_json = {"trace": py_json}
 
     # PHP uses "trace_iteration" key, Python uses "trace"
-    new_json["trace"]["trace_iteration"] = new_json["trace"].pop("trace")
+    py_json["trace"]["trace_iteration"] = py_json["trace"].pop("trace")
 
     # PHP returns all numeric values as strings — normalize Python response
-    new_json = nested_num_to_str(new_json)
+    py_json = nested_num_to_str(py_json)
 
     def _sort_trace(payload: dict[str, Any]) -> dict[str, Any]:
         """Sort trace iterations by (repeat, fold, iteration) for order-sensitive comparison."""
@@ -73,7 +73,7 @@ def _sort_trace(payload: dict[str, Any]) -> dict[str, Any]:
         return copied
 
     differences = deepdiff.diff.DeepDiff(
-        _sort_trace(new_json),
+        _sort_trace(py_json),
         _sort_trace(php_response.json()),
         ignore_order=False,
     )
diff --git a/tests/routers/openml/migration/setups_migration_test.py b/tests/routers/openml/migration/setups_migration_test.py
index b4b1f110..34613fc4 100644
--- a/tests/routers/openml/migration/setups_migration_test.py
+++ b/tests/routers/openml/migration/setups_migration_test.py
@@ -71,7 +71,7 @@ async def test_setup_untag_response_is_identical_when_tag_exists(
 
     all_tags = [tag, *other_tags]
     async with temporary_tags(tags=all_tags, setup_id=setup_id, persist=True):
-        original = await php_api.post(
+        php_response = await php_api.post(
             "/setup/untag",
             data={"api_key": api_key, "tag": tag, "setup_id": setup_id},
         )
@@ -79,33 +79,33 @@ async def test_setup_untag_response_is_identical_when_tag_exists(
     # expdb_test transaction shared with Python API,
     # no commit needed and rolled back at the end of the test
     async with temporary_tags(tags=all_tags, setup_id=setup_id):
-        new = await py_api.post(
+        py_response = await py_api.post(
             f"/setup/untag?api_key={api_key}",
             json={"setup_id": setup_id, "tag": tag},
         )
 
-    if new.status_code == HTTPStatus.OK:
-        assert new.status_code == original.status_code
-        original_untag = original.json()["setup_untag"]
-        new_untag = new.json()["setup_untag"]
-        assert new_untag["id"] == original_untag["id"]
-        if tags := original_untag.get("tag"):
+    if py_response.status_code == HTTPStatus.OK:
+        assert py_response.status_code == php_response.status_code
+        php_untag = php_response.json()["setup_untag"]
+        py_untag = py_response.json()["setup_untag"]
+        assert py_untag["id"] == php_untag["id"]
+        if tags := php_untag.get("tag"):
             if isinstance(tags, str):
-                assert new_untag["tag"][0] == tags
+                assert py_untag["tag"][0] == tags
             else:
-                assert new_untag["tag"] == tags
+                assert py_untag["tag"] == tags
         else:
-            assert new_untag["tag"] == []
+            assert py_untag["tag"] == []
         return
 
-    code, message = original.json()["error"].values()
-    assert original.status_code == HTTPStatus.PRECONDITION_FAILED
-    assert new.status_code == HTTPStatus.FORBIDDEN
-    assert new.json()["code"] == code
+    code, message = php_response.json()["error"].values()
+    assert php_response.status_code == HTTPStatus.PRECONDITION_FAILED
+    assert py_response.status_code == HTTPStatus.FORBIDDEN
+    assert py_response.json()["code"] == code
     assert message == "Tag is not owned by you"
     assert re.match(
         r"You may not remove tag \S+ of setup \d+ because it was not created by you.",
-        new.json()["detail"],
+        py_response.json()["detail"],
     )
 
 
@@ -117,7 +117,7 @@ async def test_setup_untag_response_is_identical_setup_doesnt_exist(
     tag = "totally_new_tag_for_migration_testing"
     api_key = ApiKey.SOME_USER
 
-    original, new = await asyncio.gather(
+    php_response, py_response = await asyncio.gather(
         php_api.post(
             "/setup/untag",
             data={"api_key": api_key, "tag": tag, "setup_id": setup_id},
@@ -128,13 +128,13 @@ async def test_setup_untag_response_is_identical_setup_doesnt_exist(
         ),
     )
 
-    assert original.status_code == HTTPStatus.PRECONDITION_FAILED
-    assert new.status_code == HTTPStatus.NOT_FOUND
-    assert original.json()["error"]["message"] == "Entity not found."
-    assert new.json()["code"] == original.json()["error"]["code"]
+    assert php_response.status_code == HTTPStatus.PRECONDITION_FAILED
+    assert py_response.status_code == HTTPStatus.NOT_FOUND
+    assert php_response.json()["error"]["message"] == "Entity not found."
+    assert py_response.json()["code"] == php_response.json()["error"]["code"]
     assert re.match(
         r"Setup \d+ not found.",
-        new.json()["detail"],
+        py_response.json()["detail"],
     )
 
 
@@ -146,7 +146,7 @@ async def test_setup_untag_response_is_identical_tag_doesnt_exist(
     tag = "totally_new_tag_for_migration_testing"
     api_key = ApiKey.SOME_USER
 
-    original, new = await asyncio.gather(
+    php_response, py_response = await asyncio.gather(
         php_api.post(
             "/setup/untag",
             data={"api_key": api_key, "tag": tag, "setup_id": setup_id},
@@ -157,13 +157,13 @@ async def test_setup_untag_response_is_identical_tag_doesnt_exist(
         ),
     )
 
-    assert original.status_code == HTTPStatus.PRECONDITION_FAILED
-    assert new.status_code == HTTPStatus.NOT_FOUND
-    assert new.json()["code"] == original.json()["error"]["code"]
-    assert original.json()["error"]["message"] == "Tag not found."
+    assert php_response.status_code == HTTPStatus.PRECONDITION_FAILED
+    assert py_response.status_code == HTTPStatus.NOT_FOUND
+    assert py_response.json()["code"] == php_response.json()["error"]["code"]
+    assert php_response.json()["error"]["message"] == "Tag not found."
     assert re.match(
         r"Setup \d+ does not have tag '\S+'.",
-        new.json()["detail"],
+        py_response.json()["detail"],
     )
 
 
@@ -190,7 +190,7 @@ async def test_setup_tag_response_is_identical_when_tag_doesnt_exist(  # noqa: P
     tag = "totally_new_tag_for_migration_testing"
 
     async with temporary_tags(tags=other_tags, setup_id=setup_id, persist=True):
-        original = await php_api.post(
+        php_response = await php_api.post(
             "/setup/tag",
             data={"api_key": api_key, "tag": tag, "setup_id": setup_id},
         )
@@ -202,23 +202,23 @@ async def test_setup_tag_response_is_identical_when_tag_doesnt_exist(  # noqa: P
         await expdb_test.commit()
 
     async with temporary_tags(tags=other_tags, setup_id=setup_id):
-        new = await py_api.post(
+        py_response = await py_api.post(
             f"/setup/tag?api_key={api_key}",
             json={"setup_id": setup_id, "tag": tag},
         )
 
-    assert new.status_code == HTTPStatus.OK
-    assert new.status_code == original.status_code
-    original_tag = original.json()["setup_tag"]
-    new_tag = new.json()["setup_tag"]
-    assert new_tag["id"] == original_tag["id"]
-    if tags := original_tag.get("tag"):
+    assert py_response.status_code == HTTPStatus.OK
+    assert py_response.status_code == php_response.status_code
+    php_tag = php_response.json()["setup_tag"]
+    py_tag = py_response.json()["setup_tag"]
+    assert py_tag["id"] == php_tag["id"]
+    if tags := php_tag.get("tag"):
         if isinstance(tags, str):
-            assert new_tag["tag"][0] == tags
+            assert py_tag["tag"][0] == tags
         else:
-            assert set(new_tag["tag"]) == set(tags)
+            assert set(py_tag["tag"]) == set(tags)
     else:
-        assert new_tag["tag"] == []
+        assert py_tag["tag"] == []
 
 
 async def test_setup_tag_response_is_identical_setup_doesnt_exist(
@@ -229,7 +229,7 @@ async def test_setup_tag_response_is_identical_setup_doesnt_exist(
     tag = "totally_new_tag_for_migration_testing"
     api_key = ApiKey.SOME_USER
 
-    original, new = await asyncio.gather(
+    php_response, py_response = await asyncio.gather(
         php_api.post(
             "/setup/tag",
             data={"api_key": api_key, "tag": tag, "setup_id": setup_id},
@@ -240,13 +240,13 @@ async def test_setup_tag_response_is_identical_setup_doesnt_exist(
         ),
     )
 
-    assert original.status_code == HTTPStatus.PRECONDITION_FAILED
-    assert new.status_code == HTTPStatus.NOT_FOUND
-    assert original.json()["error"]["message"] == "Entity not found."
-    assert new.json()["code"] == original.json()["error"]["code"]
+    assert php_response.status_code == HTTPStatus.PRECONDITION_FAILED
+    assert py_response.status_code == HTTPStatus.NOT_FOUND
+    assert php_response.json()["error"]["message"] == "Entity not found."
+    assert py_response.json()["code"] == php_response.json()["error"]["code"]
     assert re.match(
         r"Setup \d+ not found.",
-        new.json()["detail"],
+        py_response.json()["detail"],
     )
 
 
@@ -262,7 +262,7 @@ async def test_setup_tag_response_is_identical_tag_already_exists(
 
     async with temporary_tags(tags=[tag], setup_id=setup_id, persist=True):
         # Both APIs can be tested in parallel since the tag is already persisted
-        original, new = await asyncio.gather(
+        php_response, py_response = await asyncio.gather(
             php_api.post(
                 "/setup/tag",
                 data={"api_key": api_key, "tag": tag, "setup_id": setup_id},
@@ -273,10 +273,10 @@ async def test_setup_tag_response_is_identical_tag_already_exists(
             ),
         )
 
-    assert original.status_code == HTTPStatus.INTERNAL_SERVER_ERROR
-    assert new.status_code == HTTPStatus.CONFLICT
-    assert original.json()["error"]["message"] == "Entity already tagged by this tag."
-    assert new.json()["detail"] == f"Setup {setup_id} already has tag {tag!r}."
+    assert php_response.status_code == HTTPStatus.INTERNAL_SERVER_ERROR
+    assert py_response.status_code == HTTPStatus.CONFLICT
+    assert php_response.json()["error"]["message"] == "Entity already tagged by this tag."
+    assert py_response.json()["detail"] == f"Setup {setup_id} already has tag {tag!r}."
 
 
 async def test_get_setup_response_is_identical_setup_doesnt_exist(
@@ -285,16 +285,16 @@ async def test_get_setup_response_is_identical_setup_doesnt_exist(
 ) -> None:
     setup_id = 999999
 
-    original, new = await asyncio.gather(
+    php_response, py_response = await asyncio.gather(
         php_api.get(f"/setup/{setup_id}"),
         py_api.get(f"/setup/{setup_id}"),
     )
 
-    assert original.status_code == HTTPStatus.PRECONDITION_FAILED
-    assert new.status_code == HTTPStatus.NOT_FOUND
-    assert original.json()["error"]["message"] == "Unknown setup"
-    assert new.json()["code"] == original.json()["error"]["code"]
-    assert new.json()["detail"] == f"Setup {setup_id} not found."
+    assert php_response.status_code == HTTPStatus.PRECONDITION_FAILED
+    assert py_response.status_code == HTTPStatus.NOT_FOUND
+    assert php_response.json()["error"]["message"] == "Unknown setup"
+    assert py_response.json()["code"] == php_response.json()["error"]["code"]
+    assert py_response.json()["detail"] == f"Setup {setup_id} not found."
 
 
 @pytest.mark.parametrize("setup_id", range(1, 125))
@@ -303,27 +303,27 @@ async def test_get_setup_response_is_identical(
     py_api: httpx.AsyncClient,
     php_api: httpx.AsyncClient,
 ) -> None:
-    original, new = await asyncio.gather(
+    php_response, py_response = await asyncio.gather(
         php_api.get(f"/setup/{setup_id}"),
         py_api.get(f"/setup/{setup_id}"),
     )
 
-    if original.status_code == HTTPStatus.PRECONDITION_FAILED:
-        assert new.status_code == HTTPStatus.NOT_FOUND
+    if php_response.status_code == HTTPStatus.PRECONDITION_FAILED:
+        assert py_response.status_code == HTTPStatus.NOT_FOUND
         return
 
-    assert original.status_code == HTTPStatus.OK
-    assert new.status_code == HTTPStatus.OK
+    assert php_response.status_code == HTTPStatus.OK
+    assert py_response.status_code == HTTPStatus.OK
 
-    original_json = original.json()
+    php_json = php_response.json()
 
     # PHP returns integer fields as strings. To compare, we recursively convert string digits
     # to integers.
     # PHP also returns `[]` instead of null for empty string optional fields, which Python omits.
-    original_json = nested_str_to_num(original_json)
-    original_json = nested_remove_values(original_json, values=[[], None])
+    php_json = nested_str_to_num(php_json)
+    php_json = nested_remove_values(php_json, values=[[], None])
 
-    new_json = nested_str_to_num(new.json())
-    new_json = nested_remove_values(new_json, values=[[], None])
+    py_json = nested_str_to_num(py_response.json())
+    py_json = nested_remove_values(py_json, values=[[], None])
 
-    assert new_json == original_json
+    assert py_json == php_json
diff --git a/tests/routers/openml/migration/studies_migration_test.py b/tests/routers/openml/migration/studies_migration_test.py
index 07cdd0cb..fc1340cc 100644
--- a/tests/routers/openml/migration/studies_migration_test.py
+++ b/tests/routers/openml/migration/studies_migration_test.py
@@ -7,31 +7,31 @@
 
 
 async def test_get_study_equal(py_api: httpx.AsyncClient, php_api: httpx.AsyncClient) -> None:
-    new, old = await asyncio.gather(
+    py_response, php_response = await asyncio.gather(
         py_api.get("/studies/1"),
         php_api.get("/study/1"),
     )
-    assert new.status_code == old.status_code
+    assert py_response.status_code == php_response.status_code
 
-    new_json = new.json()
+    py_json = py_response.json()
     # New implementation is typed
-    new_json = nested_num_to_str(new_json)
+    py_json = nested_num_to_str(py_json)
     # New implementation has same fields even if empty
-    new_json = nested_remove_values(new_json, values=[None])
-    new_json["tasks"] = {"task_id": new_json.pop("task_ids")}
-    new_json["data"] = {"data_id": new_json.pop("data_ids")}
-    if runs := new_json.pop("run_ids", None):
-        new_json["runs"] = {"run_id": runs}
-    if flows := new_json.pop("flow_ids", None):
-        new_json["flows"] = {"flow_id": flows}
-    if setups := new_json.pop("setup_ids", None):
-        new_json["setup"] = {"setup_id": setups}
+    py_json = nested_remove_values(py_json, values=[None])
+    py_json["tasks"] = {"task_id": py_json.pop("task_ids")}
+    py_json["data"] = {"data_id": py_json.pop("data_ids")}
+    if runs := py_json.pop("run_ids", None):
+        py_json["runs"] = {"run_id": runs}
+    if flows := py_json.pop("flow_ids", None):
+        py_json["flows"] = {"flow_id": flows}
+    if setups := py_json.pop("setup_ids", None):
+        py_json["setup"] = {"setup_id": setups}
 
     # New implementation is not nested
-    new_json = {"study": new_json}
+    py_json = {"study": py_json}
     difference = deepdiff.diff.DeepDiff(
-        new_json,
-        old.json(),
+        py_json,
+        php_response.json(),
         ignore_order=True,
         ignore_numeric_type_changes=True,
     )
diff --git a/tests/routers/openml/migration/tasks_migration_test.py b/tests/routers/openml/migration/tasks_migration_test.py
index 2aab7483..a11f1a54 100644
--- a/tests/routers/openml/migration/tasks_migration_test.py
+++ b/tests/routers/openml/migration/tasks_migration_test.py
@@ -20,25 +20,25 @@
 async def test_get_task_equal(
     task_id: int, py_api: httpx.AsyncClient, php_api: httpx.AsyncClient
 ) -> None:
-    response, php_response = await asyncio.gather(
+    py_response, php_response = await asyncio.gather(
         py_api.get(f"/tasks/{task_id}"),
         php_api.get(f"/task/{task_id}"),
     )
-    assert response.status_code == HTTPStatus.OK
+    assert py_response.status_code == HTTPStatus.OK
     assert php_response.status_code == HTTPStatus.OK
 
-    new_json = response.json()
+    py_json = py_response.json()
     # Some fields are renamed (old = tag, new = tags)
-    new_json["tag"] = new_json.pop("tags")
-    new_json["task_id"] = new_json.pop("id")
-    new_json["task_name"] = new_json.pop("name")
+    py_json["tag"] = py_json.pop("tags")
+    py_json["task_id"] = py_json.pop("id")
+    py_json["task_name"] = py_json.pop("name")
     # PHP is not typed *and* automatically removes None values
-    new_json = nested_remove_values(new_json, values=[None])
-    new_json = nested_num_to_str(new_json)
+    py_json = nested_remove_values(py_json, values=[None])
+    py_json = nested_num_to_str(py_json)
     # It also removes "value" entries for parameters if the list is empty,
     # it does not remove *all* empty lists, e.g., for cost_matrix input they are kept
     estimation_procedure = next(
-        v["estimation_procedure"] for v in new_json["input"] if "estimation_procedure" in v
+        v["estimation_procedure"] for v in py_json["input"] if "estimation_procedure" in v
     )
     if "parameter" in estimation_procedure:
         estimation_procedure["parameter"] = [
@@ -46,16 +46,16 @@ async def test_get_task_equal(
             for parameter in estimation_procedure["parameter"]
         ]
     # Fields that may return in a list now always return a list
-    new_json = nested_remove_single_element_list(new_json)
+    py_json = nested_remove_single_element_list(py_json)
     # Tags are not returned if they are an empty list:
-    if new_json["tag"] == []:
-        new_json.pop("tag")
+    if py_json["tag"] == []:
+        py_json.pop("tag")
 
     # The response is no longer nested
-    new_json = {"task": new_json}
+    py_json = {"task": py_json}
 
     differences = deepdiff.diff.DeepDiff(
-        new_json,
+        py_json,
         php_response.json(),
         ignore_order=True,
     )
diff --git a/tests/routers/openml/qualities_list_test.py b/tests/routers/openml/qualities_list_test.py
index 1f12ceab..8eb51a58 100644
--- a/tests/routers/openml/qualities_list_test.py
+++ b/tests/routers/openml/qualities_list_test.py
@@ -31,12 +31,12 @@ async def _remove_quality_from_database(quality_name: str, expdb_test: AsyncConn
 async def test_list_qualities_identical(
     py_api: httpx.AsyncClient, php_api: httpx.AsyncClient
 ) -> None:
-    new, original = await asyncio.gather(
+    py_response, php_response = await asyncio.gather(
         py_api.get("/datasets/qualities/list"),
         php_api.get("/data/qualities/list"),
     )
-    assert new.status_code == original.status_code
-    assert new.json() == original.json()
+    assert py_response.status_code == php_response.status_code
+    assert py_response.json() == php_response.json()
     # To keep the test idempotent, we cannot test if reaction to database changes is identical
 
 
diff --git a/tests/routers/openml/study_post_test.py b/tests/routers/openml/study_post_test.py
index df0e5813..0cb00fdb 100644
--- a/tests/routers/openml/study_post_test.py
+++ b/tests/routers/openml/study_post_test.py
@@ -21,9 +21,9 @@ async def test_create_task_study(py_api: httpx.AsyncClient) -> None:
         },
     )
     assert response.status_code == HTTPStatus.OK
-    new = response.json()
-    assert "study_id" in new
-    study_id = new["study_id"]
+    body = response.json()
+    assert "study_id" in body
+    study_id = body["study_id"]
     assert isinstance(study_id, int)
 
     study = await py_api.get(f"/studies/{study_id}")
diff --git a/tests/routers/openml/task_type_get_test.py b/tests/routers/openml/task_type_get_test.py
index ef8e5549..61bd0c91 100644
--- a/tests/routers/openml/task_type_get_test.py
+++ b/tests/routers/openml/task_type_get_test.py
@@ -15,14 +15,14 @@
 async def test_get_task_type(
     ttype_id: int, py_api: httpx.AsyncClient, php_api: httpx.AsyncClient
 ) -> None:
-    response, original = await asyncio.gather(
+    py_response, php_response = await asyncio.gather(
         py_api.get(f"/tasktype/{ttype_id}"),
         php_api.get(f"/tasktype/{ttype_id}"),
     )
-    assert response.status_code == original.status_code
+    assert py_response.status_code == php_response.status_code
 
-    py_json = response.json()
-    php_json = original.json()
+    py_json = py_response.json()
+    php_json = php_response.json()
 
     # The PHP types distinguish between single (str) or multiple (list) creator/contrib
     for field in ["contributor", "creator"]:
diff --git a/tests/routers/openml/task_type_list_test.py b/tests/routers/openml/task_type_list_test.py
index d562838b..871def3b 100644
--- a/tests/routers/openml/task_type_list_test.py
+++ b/tests/routers/openml/task_type_list_test.py
@@ -4,9 +4,9 @@
 
 
 async def test_list_task_type(py_api: httpx.AsyncClient, php_api: httpx.AsyncClient) -> None:
-    response, original = await asyncio.gather(
+    py_response, php_response = await asyncio.gather(
         py_api.get("/tasktype/list"),
         php_api.get("/tasktype/list"),
     )
-    assert response.status_code == original.status_code
-    assert response.json() == original.json()
+    assert py_response.status_code == php_response.status_code
+    assert py_response.json() == php_response.json()

From 530061bd3653b4fde20830d408f853cfa22a10f4 Mon Sep 17 00:00:00 2001
From: PGijsbers <p.gijsbers@tue.nl>
Date: Sat, 11 Apr 2026 21:13:13 +0200
Subject: [PATCH 3/3] Make mypy aware of src directory

---
 pyproject.toml                                      | 1 +
 tests/routers/openml/datasets_list_datasets_test.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 2c53b9bc..565f58d7 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -108,6 +108,7 @@ ignore = [
 
 [tool.mypy]
 strict = true
+mypy_path = "src"
 plugins = [
     "pydantic.mypy"
 ]
diff --git a/tests/routers/openml/datasets_list_datasets_test.py b/tests/routers/openml/datasets_list_datasets_test.py
index 5086ac50..be08927d 100644
--- a/tests/routers/openml/datasets_list_datasets_test.py
+++ b/tests/routers/openml/datasets_list_datasets_test.py
@@ -341,6 +341,6 @@ async def test_list_data_quality(
         status=DatasetStatusFilter.ALL,
         user=None,
         expdb_db=expdb_test,
-        **{quality: range_},
+        **{quality: range_},  # type: ignore[arg-type]
     )
     assert len(result) == count