From ac80151a301b4d35a7a494a3ae2a76e89e154532 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Mon, 22 Dec 2025 19:26:14 +0100
Subject: [PATCH 1/2] Revert change of removing subflow context
The extra nesting provided a nickname of the subflow in the context
of the parent flow.
---
src/routers/openml/flows.py | 10 +-
src/schemas/flows.py | 9 +-
tests/routers/openml/flows_test.py | 279 +++++++++---------
.../openml/migration/flows_migration_test.py | 6 +-
4 files changed, 161 insertions(+), 143 deletions(-)
diff --git a/src/routers/openml/flows.py b/src/routers/openml/flows.py
index 083916b..981b546 100644
--- a/src/routers/openml/flows.py
+++ b/src/routers/openml/flows.py
@@ -49,8 +49,14 @@ def get_flow(flow_id: int, expdb: Annotated[Connection, Depends(expdb_connection
]
tags = database.flows.get_tags(flow_id, expdb)
- flow_rows = database.flows.get_subflows(flow_id, expdb)
- subflows = [get_flow(flow_id=flow.child_id, expdb=expdb) for flow in flow_rows]
+ subflow_rows = database.flows.get_subflows(flow_id, expdb)
+ subflows = [
+ {
+ "identifier": subflow.identifier,
+ "flow": get_flow(flow_id=subflow.child_id, expdb=expdb),
+ }
+ for subflow in subflow_rows
+ ]
return Flow(
id_=flow.id,
diff --git a/src/schemas/flows.py b/src/schemas/flows.py
index 33dc081..a6cd479 100644
--- a/src/schemas/flows.py
+++ b/src/schemas/flows.py
@@ -1,7 +1,7 @@
from __future__ import annotations
from datetime import datetime
-from typing import Any, Self
+from typing import Any, TypedDict
from pydantic import BaseModel, ConfigDict, Field
@@ -25,7 +25,12 @@ class Flow(BaseModel):
language: str | None = Field(max_length=128)
dependencies: str | None
parameter: list[Parameter]
- subflows: list[Self]
+ subflows: list[Subflow]
tag: list[str]
model_config = ConfigDict(arbitrary_types_allowed=True)
+
+
+class Subflow(TypedDict):
+ identifier: str | None
+ flow: Flow
diff --git a/tests/routers/openml/flows_test.py b/tests/routers/openml/flows_test.py
index 611e478..d5188d0 100644
--- a/tests/routers/openml/flows_test.py
+++ b/tests/routers/openml/flows_test.py
@@ -221,142 +221,149 @@ def test_get_flow_with_subflow(py_api: TestClient) -> None:
],
"subflows": [
{
- "id": 4,
- "uploader": 16,
- "name": "weka.J48",
- "class_name": "weka.classifiers.trees.J48",
- "version": 1,
- "external_version": "Weka_3.9.0_11194",
- "description": (
- "Ross Quinlan (1993). C4.5: Programs for Machine Learning. "
- "Morgan Kaufmann Publishers, San Mateo, CA."
- ),
- "upload_date": "2017-03-24T14:26:40",
- "language": "English",
- "dependencies": "Weka_3.9.0",
- "parameter": [
- {
- "name": "-do-not-check-capabilities",
- "data_type": "flag",
- "default_value": None,
- "description": (
- "If set, classifier capabilities are not checked"
- " before classifier is built\n\t(use with caution)."
- ),
- },
- {
- "name": "-doNotMakeSplitPointActualValue",
- "data_type": "flag",
- "default_value": None,
- "description": "Do not make split point actual value.",
- },
- {
- "name": "A",
- "data_type": "flag",
- "default_value": None,
- "description": "Laplace smoothing for predicted probabilities.",
- },
- {
- "name": "B",
- "data_type": "flag",
- "default_value": None,
- "description": "Use binary splits only.",
- },
- {
- "name": "C",
- "data_type": "option",
- "default_value": 0.25,
- "description": ("Set confidence threshold for pruning.\n\t(default 0.25)"),
- },
- {
- "name": "J",
- "data_type": "flag",
- "default_value": None,
- "description": (
- "Do not use MDL correction for info gain on numeric attributes."
- ),
- },
- {
- "name": "L",
- "data_type": "flag",
- "default_value": None,
- "description": "Do not clean up after the tree has been built.",
- },
- {
- "name": "M",
- "data_type": "option",
- "default_value": 2,
- "description": ("Set minimum number of instances per leaf.\n\t(default 2)"),
- },
- {
- "name": "N",
- "data_type": "option",
- "default_value": None,
- "description": (
- "Set number of folds for reduced error\n\t"
- "pruning. One fold is used as pruning set.\n\t(default 3)"
- ),
- },
- {
- "name": "O",
- "data_type": "flag",
- "default_value": None,
- "description": "Do not collapse tree.",
- },
- {
- "name": "Q",
- "data_type": "option",
- "default_value": None,
- "description": "Seed for random data shuffling (default 1).",
- },
- {
- "name": "R",
- "data_type": "flag",
- "default_value": None,
- "description": "Use reduced error pruning.",
- },
- {
- "name": "S",
- "data_type": "flag",
- "default_value": None,
- "description": "Do not perform subtree raising.",
- },
- {
- "name": "U",
- "data_type": "flag",
- "default_value": None,
- "description": "Use unpruned tree.",
- },
- {
- "name": "batch-size",
- "data_type": "option",
- "default_value": None,
- "description": (
- "The desired batch size for batch prediction (default 100)."
- ),
- },
- {
- "name": "num-decimal-places",
- "data_type": "option",
- "default_value": None,
- "description": (
- "The number of decimal places for the output of numbers"
- " in the model (default 2)."
- ),
- },
- {
- "name": "output-debug-info",
- "data_type": "flag",
- "default_value": None,
- "description": (
- "If set, classifier is run in debug mode and\n\t"
- "may output additional info to the console"
- ),
- },
- ],
- "tag": ["OpenmlWeka", "weka"],
- "subflows": [],
- },
+ "identifier": None,
+ "flow": {
+ "id": 4,
+ "uploader": 16,
+ "name": "weka.J48",
+ "class_name": "weka.classifiers.trees.J48",
+ "version": 1,
+ "external_version": "Weka_3.9.0_11194",
+ "description": (
+ "Ross Quinlan (1993). C4.5: Programs for Machine Learning. "
+ "Morgan Kaufmann Publishers, San Mateo, CA."
+ ),
+ "upload_date": "2017-03-24T14:26:40",
+ "language": "English",
+ "dependencies": "Weka_3.9.0",
+ "parameter": [
+ {
+ "name": "-do-not-check-capabilities",
+ "data_type": "flag",
+ "default_value": None,
+ "description": (
+ "If set, classifier capabilities are not checked"
+ " before classifier is built\n\t(use with caution)."
+ ),
+ },
+ {
+ "name": "-doNotMakeSplitPointActualValue",
+ "data_type": "flag",
+ "default_value": None,
+ "description": "Do not make split point actual value.",
+ },
+ {
+ "name": "A",
+ "data_type": "flag",
+ "default_value": None,
+ "description": "Laplace smoothing for predicted probabilities.",
+ },
+ {
+ "name": "B",
+ "data_type": "flag",
+ "default_value": None,
+ "description": "Use binary splits only.",
+ },
+ {
+ "name": "C",
+ "data_type": "option",
+ "default_value": 0.25,
+ "description": (
+ "Set confidence threshold for pruning.\n\t(default 0.25)"
+ ),
+ },
+ {
+ "name": "J",
+ "data_type": "flag",
+ "default_value": None,
+ "description": (
+ "Do not use MDL correction for info gain on numeric attributes."
+ ),
+ },
+ {
+ "name": "L",
+ "data_type": "flag",
+ "default_value": None,
+ "description": "Do not clean up after the tree has been built.",
+ },
+ {
+ "name": "M",
+ "data_type": "option",
+ "default_value": 2,
+ "description": (
+ "Set minimum number of instances per leaf.\n\t(default 2)"
+ ),
+ },
+ {
+ "name": "N",
+ "data_type": "option",
+ "default_value": None,
+ "description": (
+ "Set number of folds for reduced error\n\t"
+ "pruning. One fold is used as pruning set.\n\t(default 3)"
+ ),
+ },
+ {
+ "name": "O",
+ "data_type": "flag",
+ "default_value": None,
+ "description": "Do not collapse tree.",
+ },
+ {
+ "name": "Q",
+ "data_type": "option",
+ "default_value": None,
+ "description": "Seed for random data shuffling (default 1).",
+ },
+ {
+ "name": "R",
+ "data_type": "flag",
+ "default_value": None,
+ "description": "Use reduced error pruning.",
+ },
+ {
+ "name": "S",
+ "data_type": "flag",
+ "default_value": None,
+ "description": "Do not perform subtree raising.",
+ },
+ {
+ "name": "U",
+ "data_type": "flag",
+ "default_value": None,
+ "description": "Use unpruned tree.",
+ },
+ {
+ "name": "batch-size",
+ "data_type": "option",
+ "default_value": None,
+ "description": (
+ "The desired batch size for batch prediction (default 100)."
+ ),
+ },
+ {
+ "name": "num-decimal-places",
+ "data_type": "option",
+ "default_value": None,
+ "description": (
+ "The number of decimal places for the output of numbers"
+ " in the model (default 2)."
+ ),
+ },
+ {
+ "name": "output-debug-info",
+ "data_type": "flag",
+ "default_value": None,
+ "description": (
+ "If set, classifier is run in debug mode and\n\t"
+ "may output additional info to the console"
+ ),
+ },
+ ],
+ "tag": ["OpenmlWeka", "weka"],
+ "subflows": [],
+ },
+ }
],
"tag": ["OpenmlWeka", "weka"],
}
diff --git a/tests/routers/openml/migration/flows_migration_test.py b/tests/routers/openml/migration/flows_migration_test.py
index 14d8088..674bc43 100644
--- a/tests/routers/openml/migration/flows_migration_test.py
+++ b/tests/routers/openml/migration/flows_migration_test.py
@@ -65,7 +65,9 @@ def convert_flow_naming_and_defaults(flow: dict[str, Any]) -> dict[str, Any]:
if parameter["default_value"] is None:
parameter["default_value"] = []
for subflow in flow["subflows"]:
- convert_flow_naming_and_defaults(subflow)
+ subflow["flow"] = convert_flow_naming_and_defaults(subflow["flow"])
+ if subflow["identifier"] is None:
+ subflow["identifier"] = []
flow["component"] = flow.pop("subflows")
if flow["component"] == []:
flow.pop("component")
@@ -75,8 +77,6 @@ def convert_flow_naming_and_defaults(flow: dict[str, Any]) -> dict[str, Any]:
new = nested_remove_single_element_list(new)
expected = php_api.get(f"/flow/{flow_id}").json()["flow"]
- if subflow := expected.get("component"):
- expected["component"] = subflow["flow"]
# The reason we don't transform "new" to str is that it becomes harder to ignore numeric type
# differences (e.g., '1.0' vs '1')
expected = nested_str_to_num(expected)
From 0ebac7d1fbb9ad16b0350ff447e2ebb29318d941 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Mon, 22 Dec 2025 19:40:59 +0100
Subject: [PATCH 2/2] Use Subflow type for subflows
---
src/routers/openml/flows.py | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/src/routers/openml/flows.py b/src/routers/openml/flows.py
index 981b546..cb6df5d 100644
--- a/src/routers/openml/flows.py
+++ b/src/routers/openml/flows.py
@@ -7,7 +7,7 @@
import database.flows
from core.conversions import _str_to_num
from routers.dependencies import expdb_connection
-from schemas.flows import Flow, Parameter
+from schemas.flows import Flow, Parameter, Subflow
router = APIRouter(prefix="/flows", tags=["flows"])
@@ -51,10 +51,10 @@ def get_flow(flow_id: int, expdb: Annotated[Connection, Depends(expdb_connection
tags = database.flows.get_tags(flow_id, expdb)
subflow_rows = database.flows.get_subflows(flow_id, expdb)
subflows = [
- {
- "identifier": subflow.identifier,
- "flow": get_flow(flow_id=subflow.child_id, expdb=expdb),
- }
+ Subflow(
+ identifier=subflow.identifier,
+ flow=get_flow(flow_id=subflow.child_id, expdb=expdb),
+ )
for subflow in subflow_rows
]