Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 9 additions & 3 deletions src/routers/openml/flows.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import database.flows
from core.conversions import _str_to_num
from routers.dependencies import expdb_connection
from schemas.flows import Flow, Parameter
from schemas.flows import Flow, Parameter, Subflow

router = APIRouter(prefix="/flows", tags=["flows"])

Expand Down Expand Up @@ -49,8 +49,14 @@ def get_flow(flow_id: int, expdb: Annotated[Connection, Depends(expdb_connection
]

tags = database.flows.get_tags(flow_id, expdb)
flow_rows = database.flows.get_subflows(flow_id, expdb)
subflows = [get_flow(flow_id=flow.child_id, expdb=expdb) for flow in flow_rows]
subflow_rows = database.flows.get_subflows(flow_id, expdb)
subflows = [
Subflow(
identifier=subflow.identifier,
flow=get_flow(flow_id=subflow.child_id, expdb=expdb),
)
for subflow in subflow_rows
]

return Flow(
id_=flow.id,
Expand Down
9 changes: 7 additions & 2 deletions src/schemas/flows.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from __future__ import annotations

from datetime import datetime
from typing import Any, Self
from typing import Any, TypedDict

from pydantic import BaseModel, ConfigDict, Field

Expand All @@ -25,7 +25,12 @@ class Flow(BaseModel):
language: str | None = Field(max_length=128)
dependencies: str | None
parameter: list[Parameter]
subflows: list[Self]
subflows: list[Subflow]
tag: list[str]

model_config = ConfigDict(arbitrary_types_allowed=True)


class Subflow(TypedDict):
identifier: str | None
flow: Flow
279 changes: 143 additions & 136 deletions tests/routers/openml/flows_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,142 +221,149 @@ def test_get_flow_with_subflow(py_api: TestClient) -> None:
],
"subflows": [
{
"id": 4,
"uploader": 16,
"name": "weka.J48",
"class_name": "weka.classifiers.trees.J48",
"version": 1,
"external_version": "Weka_3.9.0_11194",
"description": (
"Ross Quinlan (1993). C4.5: Programs for Machine Learning. "
"Morgan Kaufmann Publishers, San Mateo, CA."
),
"upload_date": "2017-03-24T14:26:40",
"language": "English",
"dependencies": "Weka_3.9.0",
"parameter": [
{
"name": "-do-not-check-capabilities",
"data_type": "flag",
"default_value": None,
"description": (
"If set, classifier capabilities are not checked"
" before classifier is built\n\t(use with caution)."
),
},
{
"name": "-doNotMakeSplitPointActualValue",
"data_type": "flag",
"default_value": None,
"description": "Do not make split point actual value.",
},
{
"name": "A",
"data_type": "flag",
"default_value": None,
"description": "Laplace smoothing for predicted probabilities.",
},
{
"name": "B",
"data_type": "flag",
"default_value": None,
"description": "Use binary splits only.",
},
{
"name": "C",
"data_type": "option",
"default_value": 0.25,
"description": ("Set confidence threshold for pruning.\n\t(default 0.25)"),
},
{
"name": "J",
"data_type": "flag",
"default_value": None,
"description": (
"Do not use MDL correction for info gain on numeric attributes."
),
},
{
"name": "L",
"data_type": "flag",
"default_value": None,
"description": "Do not clean up after the tree has been built.",
},
{
"name": "M",
"data_type": "option",
"default_value": 2,
"description": ("Set minimum number of instances per leaf.\n\t(default 2)"),
},
{
"name": "N",
"data_type": "option",
"default_value": None,
"description": (
"Set number of folds for reduced error\n\t"
"pruning. One fold is used as pruning set.\n\t(default 3)"
),
},
{
"name": "O",
"data_type": "flag",
"default_value": None,
"description": "Do not collapse tree.",
},
{
"name": "Q",
"data_type": "option",
"default_value": None,
"description": "Seed for random data shuffling (default 1).",
},
{
"name": "R",
"data_type": "flag",
"default_value": None,
"description": "Use reduced error pruning.",
},
{
"name": "S",
"data_type": "flag",
"default_value": None,
"description": "Do not perform subtree raising.",
},
{
"name": "U",
"data_type": "flag",
"default_value": None,
"description": "Use unpruned tree.",
},
{
"name": "batch-size",
"data_type": "option",
"default_value": None,
"description": (
"The desired batch size for batch prediction (default 100)."
),
},
{
"name": "num-decimal-places",
"data_type": "option",
"default_value": None,
"description": (
"The number of decimal places for the output of numbers"
" in the model (default 2)."
),
},
{
"name": "output-debug-info",
"data_type": "flag",
"default_value": None,
"description": (
"If set, classifier is run in debug mode and\n\t"
"may output additional info to the console"
),
},
],
"tag": ["OpenmlWeka", "weka"],
"subflows": [],
},
"identifier": None,
"flow": {
"id": 4,
"uploader": 16,
"name": "weka.J48",
"class_name": "weka.classifiers.trees.J48",
"version": 1,
"external_version": "Weka_3.9.0_11194",
"description": (
"Ross Quinlan (1993). C4.5: Programs for Machine Learning. "
Comment on lines +224 to +233
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

suggestion (testing): Add a test case that covers subflows with a non-None identifier

This test only exercises identifier=None, while the expectation now treats each subflow as an object with both identifier and a nested flow. Please add or extend a test to cover a subflow with a non-empty identifier so we verify it is returned correctly and not ignored or mis-serialized.

Suggested implementation:

                "identifier": "weka.J48",

If you also want to explicitly keep coverage for identifier=None, you can either:

  1. Duplicate this subflow entry and keep one with "identifier": None and another with a non-empty identifier, or
  2. Add a separate test case that asserts the identifier is None for a different subflow in the response.

Those additional changes would need to align with the actual API response data your fixtures/mocks are producing.

"Morgan Kaufmann Publishers, San Mateo, CA."
),
"upload_date": "2017-03-24T14:26:40",
"language": "English",
"dependencies": "Weka_3.9.0",
"parameter": [
{
"name": "-do-not-check-capabilities",
"data_type": "flag",
"default_value": None,
"description": (
"If set, classifier capabilities are not checked"
" before classifier is built\n\t(use with caution)."
),
},
{
"name": "-doNotMakeSplitPointActualValue",
"data_type": "flag",
"default_value": None,
"description": "Do not make split point actual value.",
},
{
"name": "A",
"data_type": "flag",
"default_value": None,
"description": "Laplace smoothing for predicted probabilities.",
},
{
"name": "B",
"data_type": "flag",
"default_value": None,
"description": "Use binary splits only.",
},
{
"name": "C",
"data_type": "option",
"default_value": 0.25,
"description": (
"Set confidence threshold for pruning.\n\t(default 0.25)"
),
},
{
"name": "J",
"data_type": "flag",
"default_value": None,
"description": (
"Do not use MDL correction for info gain on numeric attributes."
),
},
{
"name": "L",
"data_type": "flag",
"default_value": None,
"description": "Do not clean up after the tree has been built.",
},
{
"name": "M",
"data_type": "option",
"default_value": 2,
"description": (
"Set minimum number of instances per leaf.\n\t(default 2)"
),
},
{
"name": "N",
"data_type": "option",
"default_value": None,
"description": (
"Set number of folds for reduced error\n\t"
"pruning. One fold is used as pruning set.\n\t(default 3)"
),
},
{
"name": "O",
"data_type": "flag",
"default_value": None,
"description": "Do not collapse tree.",
},
{
"name": "Q",
"data_type": "option",
"default_value": None,
"description": "Seed for random data shuffling (default 1).",
},
{
"name": "R",
"data_type": "flag",
"default_value": None,
"description": "Use reduced error pruning.",
},
{
"name": "S",
"data_type": "flag",
"default_value": None,
"description": "Do not perform subtree raising.",
},
{
"name": "U",
"data_type": "flag",
"default_value": None,
"description": "Use unpruned tree.",
},
{
"name": "batch-size",
"data_type": "option",
"default_value": None,
"description": (
"The desired batch size for batch prediction (default 100)."
),
},
{
"name": "num-decimal-places",
"data_type": "option",
"default_value": None,
"description": (
"The number of decimal places for the output of numbers"
" in the model (default 2)."
),
},
{
"name": "output-debug-info",
"data_type": "flag",
"default_value": None,
"description": (
"If set, classifier is run in debug mode and\n\t"
"may output additional info to the console"
),
},
],
"tag": ["OpenmlWeka", "weka"],
"subflows": [],
},
}
],
"tag": ["OpenmlWeka", "weka"],
}
Expand Down
6 changes: 3 additions & 3 deletions tests/routers/openml/migration/flows_migration_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,9 @@ def convert_flow_naming_and_defaults(flow: dict[str, Any]) -> dict[str, Any]:
if parameter["default_value"] is None:
parameter["default_value"] = []
for subflow in flow["subflows"]:
convert_flow_naming_and_defaults(subflow)
subflow["flow"] = convert_flow_naming_and_defaults(subflow["flow"])
if subflow["identifier"] is None:
subflow["identifier"] = []
flow["component"] = flow.pop("subflows")
if flow["component"] == []:
flow.pop("component")
Expand All @@ -75,8 +77,6 @@ def convert_flow_naming_and_defaults(flow: dict[str, Any]) -> dict[str, Any]:
new = nested_remove_single_element_list(new)

expected = php_api.get(f"/flow/{flow_id}").json()["flow"]
if subflow := expected.get("component"):
expected["component"] = subflow["flow"]
# The reason we don't transform "new" to str is that it becomes harder to ignore numeric type
# differences (e.g., '1.0' vs '1')
expected = nested_str_to_num(expected)
Expand Down