diff --git a/src/routers/openml/flows.py b/src/routers/openml/flows.py index 083916b..cb6df5d 100644 --- a/src/routers/openml/flows.py +++ b/src/routers/openml/flows.py @@ -7,7 +7,7 @@ import database.flows from core.conversions import _str_to_num from routers.dependencies import expdb_connection -from schemas.flows import Flow, Parameter +from schemas.flows import Flow, Parameter, Subflow router = APIRouter(prefix="/flows", tags=["flows"]) @@ -49,8 +49,14 @@ def get_flow(flow_id: int, expdb: Annotated[Connection, Depends(expdb_connection ] tags = database.flows.get_tags(flow_id, expdb) - flow_rows = database.flows.get_subflows(flow_id, expdb) - subflows = [get_flow(flow_id=flow.child_id, expdb=expdb) for flow in flow_rows] + subflow_rows = database.flows.get_subflows(flow_id, expdb) + subflows = [ + Subflow( + identifier=subflow.identifier, + flow=get_flow(flow_id=subflow.child_id, expdb=expdb), + ) + for subflow in subflow_rows + ] return Flow( id_=flow.id, diff --git a/src/schemas/flows.py b/src/schemas/flows.py index 33dc081..a6cd479 100644 --- a/src/schemas/flows.py +++ b/src/schemas/flows.py @@ -1,7 +1,7 @@ from __future__ import annotations from datetime import datetime -from typing import Any, Self +from typing import Any, TypedDict from pydantic import BaseModel, ConfigDict, Field @@ -25,7 +25,12 @@ class Flow(BaseModel): language: str | None = Field(max_length=128) dependencies: str | None parameter: list[Parameter] - subflows: list[Self] + subflows: list[Subflow] tag: list[str] model_config = ConfigDict(arbitrary_types_allowed=True) + + +class Subflow(TypedDict): + identifier: str | None + flow: Flow diff --git a/tests/routers/openml/flows_test.py b/tests/routers/openml/flows_test.py index 611e478..d5188d0 100644 --- a/tests/routers/openml/flows_test.py +++ b/tests/routers/openml/flows_test.py @@ -221,142 +221,149 @@ def test_get_flow_with_subflow(py_api: TestClient) -> None: ], "subflows": [ { - "id": 4, - "uploader": 16, - "name": "weka.J48", - "class_name": "weka.classifiers.trees.J48", - "version": 1, - "external_version": "Weka_3.9.0_11194", - "description": ( - "Ross Quinlan (1993). C4.5: Programs for Machine Learning. " - "Morgan Kaufmann Publishers, San Mateo, CA." - ), - "upload_date": "2017-03-24T14:26:40", - "language": "English", - "dependencies": "Weka_3.9.0", - "parameter": [ - { - "name": "-do-not-check-capabilities", - "data_type": "flag", - "default_value": None, - "description": ( - "If set, classifier capabilities are not checked" - " before classifier is built\n\t(use with caution)." - ), - }, - { - "name": "-doNotMakeSplitPointActualValue", - "data_type": "flag", - "default_value": None, - "description": "Do not make split point actual value.", - }, - { - "name": "A", - "data_type": "flag", - "default_value": None, - "description": "Laplace smoothing for predicted probabilities.", - }, - { - "name": "B", - "data_type": "flag", - "default_value": None, - "description": "Use binary splits only.", - }, - { - "name": "C", - "data_type": "option", - "default_value": 0.25, - "description": ("Set confidence threshold for pruning.\n\t(default 0.25)"), - }, - { - "name": "J", - "data_type": "flag", - "default_value": None, - "description": ( - "Do not use MDL correction for info gain on numeric attributes." - ), - }, - { - "name": "L", - "data_type": "flag", - "default_value": None, - "description": "Do not clean up after the tree has been built.", - }, - { - "name": "M", - "data_type": "option", - "default_value": 2, - "description": ("Set minimum number of instances per leaf.\n\t(default 2)"), - }, - { - "name": "N", - "data_type": "option", - "default_value": None, - "description": ( - "Set number of folds for reduced error\n\t" - "pruning. One fold is used as pruning set.\n\t(default 3)" - ), - }, - { - "name": "O", - "data_type": "flag", - "default_value": None, - "description": "Do not collapse tree.", - }, - { - "name": "Q", - "data_type": "option", - "default_value": None, - "description": "Seed for random data shuffling (default 1).", - }, - { - "name": "R", - "data_type": "flag", - "default_value": None, - "description": "Use reduced error pruning.", - }, - { - "name": "S", - "data_type": "flag", - "default_value": None, - "description": "Do not perform subtree raising.", - }, - { - "name": "U", - "data_type": "flag", - "default_value": None, - "description": "Use unpruned tree.", - }, - { - "name": "batch-size", - "data_type": "option", - "default_value": None, - "description": ( - "The desired batch size for batch prediction (default 100)." - ), - }, - { - "name": "num-decimal-places", - "data_type": "option", - "default_value": None, - "description": ( - "The number of decimal places for the output of numbers" - " in the model (default 2)." - ), - }, - { - "name": "output-debug-info", - "data_type": "flag", - "default_value": None, - "description": ( - "If set, classifier is run in debug mode and\n\t" - "may output additional info to the console" - ), - }, - ], - "tag": ["OpenmlWeka", "weka"], - "subflows": [], - }, + "identifier": None, + "flow": { + "id": 4, + "uploader": 16, + "name": "weka.J48", + "class_name": "weka.classifiers.trees.J48", + "version": 1, + "external_version": "Weka_3.9.0_11194", + "description": ( + "Ross Quinlan (1993). C4.5: Programs for Machine Learning. " + "Morgan Kaufmann Publishers, San Mateo, CA." + ), + "upload_date": "2017-03-24T14:26:40", + "language": "English", + "dependencies": "Weka_3.9.0", + "parameter": [ + { + "name": "-do-not-check-capabilities", + "data_type": "flag", + "default_value": None, + "description": ( + "If set, classifier capabilities are not checked" + " before classifier is built\n\t(use with caution)." + ), + }, + { + "name": "-doNotMakeSplitPointActualValue", + "data_type": "flag", + "default_value": None, + "description": "Do not make split point actual value.", + }, + { + "name": "A", + "data_type": "flag", + "default_value": None, + "description": "Laplace smoothing for predicted probabilities.", + }, + { + "name": "B", + "data_type": "flag", + "default_value": None, + "description": "Use binary splits only.", + }, + { + "name": "C", + "data_type": "option", + "default_value": 0.25, + "description": ( + "Set confidence threshold for pruning.\n\t(default 0.25)" + ), + }, + { + "name": "J", + "data_type": "flag", + "default_value": None, + "description": ( + "Do not use MDL correction for info gain on numeric attributes." + ), + }, + { + "name": "L", + "data_type": "flag", + "default_value": None, + "description": "Do not clean up after the tree has been built.", + }, + { + "name": "M", + "data_type": "option", + "default_value": 2, + "description": ( + "Set minimum number of instances per leaf.\n\t(default 2)" + ), + }, + { + "name": "N", + "data_type": "option", + "default_value": None, + "description": ( + "Set number of folds for reduced error\n\t" + "pruning. One fold is used as pruning set.\n\t(default 3)" + ), + }, + { + "name": "O", + "data_type": "flag", + "default_value": None, + "description": "Do not collapse tree.", + }, + { + "name": "Q", + "data_type": "option", + "default_value": None, + "description": "Seed for random data shuffling (default 1).", + }, + { + "name": "R", + "data_type": "flag", + "default_value": None, + "description": "Use reduced error pruning.", + }, + { + "name": "S", + "data_type": "flag", + "default_value": None, + "description": "Do not perform subtree raising.", + }, + { + "name": "U", + "data_type": "flag", + "default_value": None, + "description": "Use unpruned tree.", + }, + { + "name": "batch-size", + "data_type": "option", + "default_value": None, + "description": ( + "The desired batch size for batch prediction (default 100)." + ), + }, + { + "name": "num-decimal-places", + "data_type": "option", + "default_value": None, + "description": ( + "The number of decimal places for the output of numbers" + " in the model (default 2)." + ), + }, + { + "name": "output-debug-info", + "data_type": "flag", + "default_value": None, + "description": ( + "If set, classifier is run in debug mode and\n\t" + "may output additional info to the console" + ), + }, + ], + "tag": ["OpenmlWeka", "weka"], + "subflows": [], + }, + } ], "tag": ["OpenmlWeka", "weka"], } diff --git a/tests/routers/openml/migration/flows_migration_test.py b/tests/routers/openml/migration/flows_migration_test.py index 14d8088..674bc43 100644 --- a/tests/routers/openml/migration/flows_migration_test.py +++ b/tests/routers/openml/migration/flows_migration_test.py @@ -65,7 +65,9 @@ def convert_flow_naming_and_defaults(flow: dict[str, Any]) -> dict[str, Any]: if parameter["default_value"] is None: parameter["default_value"] = [] for subflow in flow["subflows"]: - convert_flow_naming_and_defaults(subflow) + subflow["flow"] = convert_flow_naming_and_defaults(subflow["flow"]) + if subflow["identifier"] is None: + subflow["identifier"] = [] flow["component"] = flow.pop("subflows") if flow["component"] == []: flow.pop("component") @@ -75,8 +77,6 @@ def convert_flow_naming_and_defaults(flow: dict[str, Any]) -> dict[str, Any]: new = nested_remove_single_element_list(new) expected = php_api.get(f"/flow/{flow_id}").json()["flow"] - if subflow := expected.get("component"): - expected["component"] = subflow["flow"] # The reason we don't transform "new" to str is that it becomes harder to ignore numeric type # differences (e.g., '1.0' vs '1') expected = nested_str_to_num(expected)