From 48b49abe7c52246b8eb88b23b8cea017da0f7f85 Mon Sep 17 00:00:00 2001 From: fangyangci <133664123+fangyangci@users.noreply.github.com> Date: Mon, 22 Jun 2026 19:05:39 -0700 Subject: [PATCH 1/4] fix analyze e2e test (#926) --- tests/e2e/test_analyze_e2e.py | 75 +++++++++++++++++++++-------------- 1 file changed, 45 insertions(+), 30 deletions(-) diff --git a/tests/e2e/test_analyze_e2e.py b/tests/e2e/test_analyze_e2e.py index 9f436b27e..babcf38fb 100644 --- a/tests/e2e/test_analyze_e2e.py +++ b/tests/e2e/test_analyze_e2e.py @@ -44,6 +44,7 @@ import pytest from click.testing import CliRunner +from tests.e2e.require_ep import require_ep from winml.modelkit.commands.analyze import analyze from winml.modelkit.utils.constants import EP_ALIASES as _EP_ALIASES from winml.modelkit.utils.constants import SUPPORTED_EPS @@ -66,30 +67,44 @@ def _invoke(args: list[str]): return CliRunner().invoke(analyze, args, obj={}, catch_exceptions=False) +def _build_rule_parquet_path(rules_dir: Path, ep: str, device: str, op: str) -> Path: + """Build parquet path using standard ``_/.parquet`` layout.""" + provider_dir = rules_dir / f"{ep}_{device.upper()}" + provider_dir.mkdir(parents=True, exist_ok=True) + return provider_dir / f"{op}_{ep}_{device.upper()}_ai.onnx_opset13.parquet" + + +def _write_rule_with_result( + rules_dir: Path, + ep: str, + device: str, + compile_run_success: tuple[bool, bool], + op: str = "MatMul", +) -> Path: + """Write a parquet rule with the given compile/run tuple.""" + parquet = _build_rule_parquet_path(rules_dir, ep, device, op) + pd.DataFrame([{"compile_run_success": compile_run_success}]).to_parquet(parquet, index=False) + return parquet + + def _write_supported_rule(rules_dir: Path, ep: str, device: str, op: str = "MatMul") -> Path: """Write a minimally-valid "always supported" parquet rule. The rule has no condition columns — only the ``compile_run_success`` tuple — so it unconditionally matches every node of the named op. """ - parquet = rules_dir / f"{op}_{ep}_{device}_ai.onnx_opset13.parquet" - pd.DataFrame([{"compile_run_success": (True, True)}]).to_parquet(parquet, index=False) - return parquet + return _write_rule_with_result(rules_dir, ep, device, (True, True), op) def _write_unsupported_rule(rules_dir: Path, ep: str, device: str, op: str = "MatMul") -> Path: """Write a parquet rule that classifies the op as unsupported (compile fails).""" - parquet = rules_dir / f"{op}_{ep}_{device}_ai.onnx_opset13.parquet" - pd.DataFrame([{"compile_run_success": (False, False)}]).to_parquet(parquet, index=False) - return parquet + return _write_rule_with_result(rules_dir, ep, device, (False, False), op) def _write_partial_rule(rules_dir: Path, ep: str, device: str, op: str = "MatMul") -> Path: """Write a parquet rule that classifies the op as partially supported (compile succeeds, run fails). No condition columns → unconditional match.""" - parquet = rules_dir / f"{op}_{ep}_{device}_ai.onnx_opset13.parquet" - pd.DataFrame([{"compile_run_success": (True, False)}]).to_parquet(parquet, index=False) - return parquet + return _write_rule_with_result(rules_dir, ep, device, (True, False), op) @pytest.fixture @@ -325,42 +340,42 @@ def test_default_device_auto_resolves_single_best_device_for_pinned_ep( self, onnx_model_path: Path, rules_dir: Path, - monkeypatch: pytest.MonkeyPatch, ) -> None: """Omitting ``--device`` resolves a single best device for the pinned EP. - ``auto`` now picks one target via the shared sysinfo helpers (like - build/run): for ``qnn`` locally available on NPU and GPU, the - highest-priority device (NPU) is chosen — a single ``(qnn, NPU)`` run. + ``auto`` picks one target via the shared sysinfo helpers (like + build/run). On a QNN-capable host the highest-priority device is NPU, + so ``--ep qnn`` with no ``--device`` resolves to a single ``(qnn, NPU)`` + run that is fully supported. - The test is hardware-agnostic: local availability is controlled via the - ORT device->EP map monkeypatch rather than real machine capabilities. + Real end-to-end: gated on actual QNN availability via ``require_ep`` + rather than monkeypatching local capabilities. The auto-resolution + logic itself is covered hardware-agnostically by the unit-level + selection-matrix test. """ - monkeypatch.setattr( - "winml.modelkit.sysinfo.device._get_device_ep_map_from_ort", - lambda: { - "npu": ("QNNExecutionProvider",), - "gpu": ("QNNExecutionProvider", "DmlExecutionProvider"), - "cpu": ("CPUExecutionProvider",), - }, - ) + require_ep("qnn") _write_supported_rule(rules_dir, "QNNExecutionProvider", "NPU") result = _invoke(["-m", str(onnx_model_path), "--ep", "qnn", "--quiet"]) assert result.exit_code == 0 def test_default_auto_selects_single_ep_when_ep_omitted( - self, onnx_model_path: Path, rules_dir: Path + self, + onnx_model_path: Path, + rules_dir: Path, ) -> None: """Omitting ``--ep`` resolves a single best EP from local availability. - With a synthetic rule present the run must complete cleanly; the auto - axis resolves from the real ORT device map (CPU EP is always available - as a fallback), so only documented exit codes are asserted.""" + On a QNN-capable host the highest-priority device (NPU) and its + highest-priority EP (QNN) win, so bare ``auto`` resolves to ``(qnn, + NPU)`` and should be fully supported. + + Real end-to-end: gated on actual QNN availability via ``require_ep`` + rather than monkeypatching local capabilities. + """ + require_ep("qnn") _write_supported_rule(rules_dir, "QNNExecutionProvider", "NPU") result = _invoke(["-m", str(onnx_model_path), "--quiet"]) - # Aggregate result depends on whether the resolved EP is fully - # supported; only assert documented exit codes. - assert result.exit_code in {0, 1, 2} + assert result.exit_code == 0 # =========================================================================== From 6df7dbac9e19a781719e86cd0e663cf0a0b88bac Mon Sep 17 00:00:00 2001 From: fangyangci <133664123+fangyangci@users.noreply.github.com> Date: Mon, 22 Jun 2026 19:45:11 -0700 Subject: [PATCH 2/4] fix analyze coverage bugs (#922) --- CHANGELOG.md | 2 +- .../modelkit/analyze/core/runtime_checker.py | 5 +- .../analyze/core/runtime_checker_query.py | 34 +++---- .../rules/runtime_check_rules/README.md | 17 ++-- .../modelkit/analyze/utils/rule_loader.py | 88 +++++++++--------- src/winml/modelkit/commands/analyze.py | 89 +++++++++++-------- .../op_input_gen/pad_input_generator.py | 5 ++ tests/unit/analyze/core/test_qdq.py | 4 +- .../test_runtime_checker_query_parquet.py | 31 +++++++ tests/unit/analyze/models/test_rule_loader.py | 65 ++++++++------ .../unit/analyze/test_static_analyzer_cli.py | 15 ++++ 11 files changed, 223 insertions(+), 132 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7e6ab75d9..9d2f5bfb0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -112,7 +112,7 @@ Expand-Archive -Path .\rules-v0.0.3.zip -DestinationPath src\winml\modelkit\anal `gh release download` skips pre-releases unless you pass `--tag`, so the explicit `v0.0.3` is required. -If you set `MODELKIT_RULES_DIR` anywhere (shell profile, CI pipeline, user env), rename it to `WINMLCLI_RULES_DIR`. Same `os.pathsep`-separated multi-directory semantics; relative paths still resolve from `src/winml/modelkit/analyze/utils/`. +If you set `MODELKIT_RULES_DIR` anywhere (shell profile, CI pipeline, user env), rename it to `WINMLCLI_RULES_DIR`. It points to a single rules directory (not split on `os.pathsep`); relative paths still resolve from `src/winml/modelkit/analyze/utils/`. Related PRs: #411 (Parquet migration), #600 (rules zip in release), #627 (versioned filename), #587 (env var rename as part of ModelKit → WinML CLI Wave 1). diff --git a/src/winml/modelkit/analyze/core/runtime_checker.py b/src/winml/modelkit/analyze/core/runtime_checker.py index 62c5d3d34..4a766c56b 100644 --- a/src/winml/modelkit/analyze/core/runtime_checker.py +++ b/src/winml/modelkit/analyze/core/runtime_checker.py @@ -221,12 +221,11 @@ def op_support( run_for_node_total_ms = 0 callback_total_ms = 0 - # Get all nodes from model - model_proto = self._model.get_model() # Get cached RuntimeCheckerQuery query = self._get_query() + # Use the same graph snapshot as RuntimeCheckerQuery (post shape inference). + nodes = query.model_proto.graph.node # Use tqdm for progress unless caller provides a callback - nodes = model_proto.graph.node iterator = nodes if on_node_result else tqdm.tqdm(nodes) for node in iterator: node_start = time.perf_counter() diff --git a/src/winml/modelkit/analyze/core/runtime_checker_query.py b/src/winml/modelkit/analyze/core/runtime_checker_query.py index 86e98e25a..e58928b34 100644 --- a/src/winml/modelkit/analyze/core/runtime_checker_query.py +++ b/src/winml/modelkit/analyze/core/runtime_checker_query.py @@ -62,7 +62,9 @@ shape_and_dtype_from_valueinfo, ) from ..utils.node_key_utils import build_node_key_by_node_id, resolve_stable_node_key -from ..utils.rule_loader import resolve_rule_parquet_path +from ..utils.rule_loader import ( + resolve_rule_parquet_path, +) from ..utils.timing_utils import make_timing_logger from .node_checkers.base import NodeChecker from .node_checkers.registry import NodeCheckerRegistry @@ -1935,13 +1937,13 @@ def _load_parquet_rule_table( op_since_version: int, is_qdq: bool, for_debug: bool = False, - ) -> tuple[pd.DataFrame | None, Path | None, _ParquetConditionTree | None]: + ) -> tuple[Path, pd.DataFrame | None, _ParquetConditionTree | None]: """Load per-op parquet rule table with cache. Returns: - tuple[pd.DataFrame | None, Path | None, _ParquetConditionTree | None]: - Loaded dataframe when available, otherwise None, - the resolved parquet path used for lookup when found, + tuple[Path, pd.DataFrame | None, _ParquetConditionTree | None]: + The resolved or expected parquet path for lookup, + loaded dataframe when available, otherwise None, and optional pre-built condition tree. """ parquet_name = ( @@ -1950,26 +1952,30 @@ def _load_parquet_rule_table( ) parquet_path = resolve_rule_parquet_path(parquet_name, for_debug=for_debug) + # This per-instance cache assumes a stable rules location for the query's + # lifetime: the rule-dir env vars must not change between calls. The path + # is recomputed each call (so reporting reflects the current location), + # but a cached None is reused without re-probing the filesystem. cache_key = (op_name, op_domain.value, op_since_version, is_qdq) if cache_key in self._parquet_rule_table_cache: - if parquet_path is not None: + if self._parquet_rule_table_cache[cache_key] is not None: _log_parquet_cache_hit(parquet_path, scope="instance") return ( - self._parquet_rule_table_cache[cache_key], parquet_path, + self._parquet_rule_table_cache[cache_key], self._parquet_condition_tree_cache.get(cache_key), ) - if parquet_path is None: + if not parquet_path.exists(): self._parquet_rule_table_cache[cache_key] = None self._parquet_condition_tree_cache[cache_key] = None - return None, None, None + return parquet_path, None, None table_df = _get_or_load_parquet_table_global(parquet_path) condition_tree = _build_condition_tree(table_df) self._parquet_rule_table_cache[cache_key] = table_df self._parquet_condition_tree_cache[cache_key] = condition_tree - return table_df, parquet_path, condition_tree + return parquet_path, table_df, condition_tree def _run_for_node_with_parquet_rules( self, @@ -2023,7 +2029,7 @@ def _finish(result: PatternRuntime, outcome: str, **extra: Any) -> PatternRuntim since_version_ms = _elapsed_ms(since_version_start) load_table_start = time.perf_counter() - table_df, parquet_path, condition_tree = self._load_parquet_rule_table( + parquet_path, table_df, condition_tree = self._load_parquet_rule_table( node.op_type, op_domain, op_since_version, @@ -2031,10 +2037,8 @@ def _finish(result: PatternRuntime, outcome: str, **extra: Any) -> PatternRuntim for_debug=for_debug, ) load_table_ms = _elapsed_ms(load_table_start) - parquet_file = parquet_path.name if parquet_path is not None else None - parquet_path_norm = ( - _normalize_table_path(parquet_path) if parquet_path is not None else None - ) + parquet_file = parquet_path.name + parquet_path_norm = _normalize_table_path(parquet_path) if table_df is None: if run_unknown_op: diff --git a/src/winml/modelkit/analyze/rules/runtime_check_rules/README.md b/src/winml/modelkit/analyze/rules/runtime_check_rules/README.md index 698a60a15..5f90fdbf9 100644 --- a/src/winml/modelkit/analyze/rules/runtime_check_rules/README.md +++ b/src/winml/modelkit/analyze/rules/runtime_check_rules/README.md @@ -56,7 +56,7 @@ Copy all runtime rule parquet files from: ### Option 4: Use external rules directories via environment variable -Set `WINMLCLI_RULES_DIR` to one or more directories containing parquet rule artifacts. +Set `WINMLCLI_RULES_DIR` to a single directory containing parquet rule artifacts. Important: relative paths are resolved from `src/winml/modelkit/analyze/utils/` (the directory of `rule_loader.py`), not from the current terminal working directory. @@ -64,17 +64,18 @@ directory of `rule_loader.py`), not from the current terminal working directory. - Windows (PowerShell, user-level absolute path): `[Environment]::SetEnvironmentVariable("WINMLCLI_RULES_DIR", "C:\*path*\rules", "User")` - Windows (PowerShell, user-level repo-relative path): `[Environment]::SetEnvironmentVariable("WINMLCLI_RULES_DIR", "..\..\..\..\..\..\ModelKitArtifacts\rules", "User")` -Multiple directories are supported using `os.pathsep` (`;` on Windows, `:` on Unix-like systems). +Only one directory is supported. The value is not split on `os.pathsep`; it is treated +as a single literal directory path. ## Rule lookup order -The analyzer searches directories in this order: +`WINMLCLI_RULES_DIR` overrides — it does not augment — the embedded default: -1. Directories listed in `WINMLCLI_RULES_DIR` (left to right) -2. Embedded default directory: `src/winml/modelkit/analyze/rules/runtime_check_rules/` - -`WINMLCLI_RULES_DIR` takes precedence over the embedded default when the same parquet file -exists in multiple locations. +- If `WINMLCLI_RULES_DIR` is set, only that single directory is searched. The embedded + default directory is **not** consulted, so that directory must contain every parquet + rule you need. +- If `WINMLCLI_RULES_DIR` is unset or empty, only the embedded default directory is searched: + `src/winml/modelkit/analyze/rules/runtime_check_rules/`. ## What happens if parquet rules are missing diff --git a/src/winml/modelkit/analyze/utils/rule_loader.py b/src/winml/modelkit/analyze/utils/rule_loader.py index a0f08ca0c..84f5dd94e 100644 --- a/src/winml/modelkit/analyze/utils/rule_loader.py +++ b/src/winml/modelkit/analyze/utils/rule_loader.py @@ -17,12 +17,12 @@ logger = logging.getLogger(__name__) -#: Environment variable for additional runtime check rules directories. -#: Use ``os.pathsep`` (`;` on Windows, `:` on Unix) to separate multiple paths. +#: Environment variable for the runtime check rules directory. +#: Holds a single directory path; it is not split on ``os.pathsep``. WINMLCLI_RULES_DIR_ENV = "WINMLCLI_RULES_DIR" -#: Environment variable for additional runtime debug rule directories. -#: Use ``os.pathsep`` (`;` on Windows, `:` on Unix) to separate multiple paths. +#: Environment variable for the runtime debug rule directory. +#: Holds a single directory path; it is not split on ``os.pathsep``. WINMLCLI_RULES_DIR_FOR_DEBUG_ENV = "WINMLCLI_RULES_DIR_FOR_DEBUG" # Directory containing this module file. Relative env-var entries are resolved from here. @@ -46,54 +46,59 @@ def _resolve_env_rules_dir_entry(entry: str) -> Path: return (_RULE_LOADER_DIR / entry_path).resolve() -def _get_env_rules_dirs(env_name: str) -> list[Path]: - """Parse ``os.pathsep``-separated env var values into absolute paths.""" - dirs: list[Path] = [] +def _get_env_rules_dir(env_name: str) -> Path | None: + """Resolve the single directory configured in ``env_name``. + + The value is treated as one directory path and is intentionally not split + on ``os.pathsep`` -- only a single rules directory is supported. Returns + ``None`` when the env var is unset or blank. + """ env_val = os.environ.get(env_name, "").strip() - if env_val: - for entry in env_val.split(os.pathsep): - entry = entry.strip() - if entry: - dirs.append(_resolve_env_rules_dir_entry(entry)) - return dirs + if not env_val: + return None + return _resolve_env_rules_dir_entry(env_val) def get_runtime_rules_search_dirs() -> list[Path]: - """Return ordered list of directories to search for runtime rule artifacts. + """Return the directory to search for runtime rule artifacts. - The search order is: - 1. Any extra directories listed in the :data:`WINMLCLI_RULES_DIR` env var - (separated by ``os.pathsep``). Absolute paths are used directly; - relative paths are resolved relative to this module file directory. - 2. Default embedded directory (``src/winml/modelkit/analyze/rules/runtime_check_rules/``) + Selection behavior: + 1. If :data:`WINMLCLI_RULES_DIR` is set, use only that directory. + Absolute paths are used directly; a relative path is resolved + relative to this module file directory. + 2. If :data:`WINMLCLI_RULES_DIR` is unset/empty, use the embedded default + directory (``src/winml/modelkit/analyze/rules/runtime_check_rules/``). Returns: - List of directory Paths (may include non-existent ones; callers filter). + Single-element list with the selected directory (the embedded default + when the env var is unset). The directory may not exist; callers filter. """ - dirs = _get_env_rules_dirs(WINMLCLI_RULES_DIR_ENV) - dirs.append(_DEFAULT_RUNTIME_RULES_DIR) - return dirs + env_dir = _get_env_rules_dir(WINMLCLI_RULES_DIR_ENV) + if env_dir is not None: + return [env_dir] + return [_DEFAULT_RUNTIME_RULES_DIR] def get_runtime_rules_debug_search_dirs() -> list[Path]: - """Return ordered debug-rule directories from env var only. + """Return the debug-rule directory from the env var only. Unlike :func:`get_runtime_rules_search_dirs`, this intentionally has no - embedded default fallback directory. + embedded default fallback: an empty list is returned when + :data:`WINMLCLI_RULES_DIR_FOR_DEBUG` is unset. """ - return _get_env_rules_dirs(WINMLCLI_RULES_DIR_FOR_DEBUG_ENV) + env_dir = _get_env_rules_dir(WINMLCLI_RULES_DIR_FOR_DEBUG_ENV) + return [env_dir] if env_dir is not None else [] -def resolve_rule_parquet_path(parquet_filename: str, for_debug: bool = False) -> Path | None: - """Resolve a parquet runtime-rule artifact from ``_/`` subdirs. +def resolve_rule_parquet_path(parquet_filename: str, for_debug: bool = False) -> Path: + """Resolve preferred parquet runtime-rule path from ``_/`` subdirs. Args: parquet_filename: Bare file name, e.g. ``Split_QNNExecutionProvider_NPU_ai.onnx_opset13.parquet`` Returns: - Resolved Path to the parquet file if found in provider subdirectories; - otherwise ``None``. + Preferred candidate Path in search order. Existence is not checked here. """ def _infer_ep_device_subdir(filename: str) -> str | None: @@ -108,21 +113,22 @@ def _infer_ep_device_subdir(filename: str) -> str | None: return f"{match.group('ep')}_{match.group('device')}" ep_device_subdir = _infer_ep_device_subdir(parquet_filename) - if ep_device_subdir is None: - return None + relative_path = ( + Path(ep_device_subdir) / parquet_filename + if ep_device_subdir is not None + else Path(parquet_filename) + ) if for_debug: - for debug_dir in get_runtime_rules_debug_search_dirs(): - candidate_in_subdir = debug_dir / ep_device_subdir / parquet_filename - if candidate_in_subdir.exists(): - return candidate_in_subdir + debug_dirs = get_runtime_rules_debug_search_dirs() + if debug_dirs: + return debug_dirs[0] / relative_path - for search_dir in get_runtime_rules_search_dirs(): - candidate_in_subdir = search_dir / ep_device_subdir / parquet_filename - if candidate_in_subdir.exists(): - return candidate_in_subdir + search_dirs = get_runtime_rules_search_dirs() + if search_dirs: + return search_dirs[0] / relative_path - return None + return relative_path class RuleLoader: diff --git a/src/winml/modelkit/commands/analyze.py b/src/winml/modelkit/commands/analyze.py index a8dc5f289..07c618f6e 100644 --- a/src/winml/modelkit/commands/analyze.py +++ b/src/winml/modelkit/commands/analyze.py @@ -928,45 +928,60 @@ def analyze( devices = [] devices = sorted(d.upper() for d in devices) - eps: list[EPName | None] - if ep == "all": - eps = list(SUPPORTED_EPS) - elif ep == "auto": - # Single highest-priority EP available on the target device. With - # device == "all" there is no single device context, so fall back to - # the best available device purely for EP selection. - if device == "all": - try: - ref_device, _ = resolve_device(device="auto") - except (ValueError, RuntimeError) as e: - logger.error("Could not auto-select an execution provider: %s", e) + execution_pairs: list[tuple[EPName, str]] + if ep == "auto" and device == "all": + # auto + all: resolve the best available EP per device rather than + # picking a single EP from one ref device and fanning it across + # unrelated devices. resolve_eps() already returns only EPs that are + # valid and locally available for the given device, so the resulting + # pairs need no further EP_SUPPORTED_DEVICES filtering. + execution_pairs = _sort_ep_device_pairs( + [ + (device_eps[0], target_device) + for target_device in devices + if (device_eps := resolve_eps(target_device)) + ] + ) + else: + eps: list[EPName | None] + if ep == "all": + eps = list(SUPPORTED_EPS) + elif ep == "auto": + # Single highest-priority EP available on the target device. + # device == "all" is handled above, so a concrete device context + # exists here -- but guard against an empty device list (e.g. a + # programmatic ``device=None`` call) so we exit cleanly instead + # of raising an unguarded IndexError on ``devices[0]``. + ref_device = devices[0] if devices else None + if not ref_device: + logger.error("No device context available for EP auto-resolution.") + sys.exit(2) + compatible_eps = resolve_eps(ref_device) + if not compatible_eps: + logger.error( + "No execution provider is available for device '%s'.", ref_device + ) sys.exit(2) + eps = [compatible_eps[0]] else: - ref_device = devices[0] - compatible_eps = resolve_eps(ref_device) - if not compatible_eps: - logger.error("No execution provider is available for device '%s'.", ref_device) - sys.exit(2) - eps = [compatible_eps[0]] - else: - # ep is a specific EP or alias - eps = [normalize_ep_name(ep)] - - # Build with a for-loop rather than a single nested comprehension so - # the `candidate_ep is not None and ... in EP_SUPPORTED_DEVICES` - # narrowing carries through to the appended tuple's type (EPName, - # not str). The inner generator stays a comprehension to satisfy - # ruff PERF401. - execution_pairs: list[tuple[EPName, str]] = [] - for candidate_ep in eps: - if candidate_ep is None or candidate_ep not in EP_SUPPORTED_DEVICES: - continue - execution_pairs.extend( - (candidate_ep, candidate_device) - for candidate_device in devices - if candidate_device.lower() in EP_SUPPORTED_DEVICES[candidate_ep] - ) - execution_pairs = _sort_ep_device_pairs(execution_pairs) + # ep is a specific EP or alias + eps = [normalize_ep_name(ep)] + + # Build with a for-loop rather than a single nested comprehension so + # the `candidate_ep is not None and ... in EP_SUPPORTED_DEVICES` + # narrowing carries through to the appended tuple's type (EPName, + # not str). The inner generator stays a comprehension to satisfy + # ruff PERF401. + execution_pairs = [] + for candidate_ep in eps: + if candidate_ep is None or candidate_ep not in EP_SUPPORTED_DEVICES: + continue + execution_pairs.extend( + (candidate_ep, candidate_device) + for candidate_device in devices + if candidate_device.lower() in EP_SUPPORTED_DEVICES[candidate_ep] + ) + execution_pairs = _sort_ep_device_pairs(execution_pairs) # Local pairs are still needed to gate --run-unknown-op probing # (_resolve_run_unknown_op). Single-target `auto` selection is already diff --git a/src/winml/modelkit/pattern/op_input_gen/pad_input_generator.py b/src/winml/modelkit/pattern/op_input_gen/pad_input_generator.py index 86bd49ffc..37bca04cd 100644 --- a/src/winml/modelkit/pattern/op_input_gen/pad_input_generator.py +++ b/src/winml/modelkit/pattern/op_input_gen/pad_input_generator.py @@ -86,6 +86,11 @@ def get_input_and_infinite_attribute_combinations( "pads": InputValueConstraint(np.array([0, 0, 1, 1, 0, 0, 1, 1], dtype=np.int64)), "constant_value": InputValueConstraint(np.array(0.0, dtype=np.float32)), }, + { + "data": InputShapeConstraint((2, 3, 4, 5)), + "pads": InputValueConstraint(np.array([0, 1, 2, 0, 1, 0, 0, 2], dtype=np.int64)), + "constant_value": InputValueConstraint(np.array(0.0, dtype=np.float32)), + }, # ===== 5D Input ===== { "data": InputShapeConstraint((2, 3, 4, 4, 5)), diff --git a/tests/unit/analyze/core/test_qdq.py b/tests/unit/analyze/core/test_qdq.py index 4f3a5a76e..cfda4b4fa 100644 --- a/tests/unit/analyze/core/test_qdq.py +++ b/tests/unit/analyze/core/test_qdq.py @@ -1178,8 +1178,8 @@ class TestIterQDQCombinations: ), # shape 3 * finite attributes 2 * 2 * 2 * optional combinations 2 * 2 * 2 * 4 ( "Pad", - 1152, - ), # shape 9 * mode 4 * QDQ 4 * is_constant pads 2 * constant_value present/absent 2 + 1280, + ), # shape 10 * mode 4 * QDQ 4 * is_constant pads 2 * constant_value present/absent 2 # * Tind 2 (axes not used) # All Reduce* use this and it is enough ( diff --git a/tests/unit/analyze/core/test_runtime_checker_query_parquet.py b/tests/unit/analyze/core/test_runtime_checker_query_parquet.py index 6fd4746d1..f8331ac17 100644 --- a/tests/unit/analyze/core/test_runtime_checker_query_parquet.py +++ b/tests/unit/analyze/core/test_runtime_checker_query_parquet.py @@ -167,6 +167,37 @@ def test_parquet_lookup_omits_debug_details_without_for_debug( assert result_parquet.result.run is False assert result_parquet.result.debug_details is None + def test_rules_not_found_reports_expected_table_path_and_file( + self, + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + patched_query_conditions, + ): + """Missing parquet should still report the expected lookup path and file name.""" + del patched_query_conditions + + monkeypatch.setenv("WINMLCLI_RULES_DIR", str(tmp_path)) + + model = _build_add_model() + node = model.graph.node[0] + + query_parquet = RuntimeCheckerQuery(model, "QNNExecutionProvider", "NPU") + query_parquet.node_checkers = [] + result = query_parquet.run_for_node(node, for_debug=True, run_unknown_op=False) + + assert result.result.no_data is True + assert result.result.reason == "rules_not_found" + + debug_details = result.result.debug_details + assert isinstance(debug_details, dict) + + expected_file = "Add_QNNExecutionProvider_NPU_ai.onnx_opset13.parquet" + expected_suffix = f"QNNExecutionProvider_NPU/{expected_file}" + + assert debug_details.get("table_file") == expected_file + table_path = str(debug_details.get("table_path", "")).replace("\\", "/") + assert table_path.endswith(expected_suffix) + def test_parquet_lookup_prefers_debug_dir_when_for_debug( self, tmp_path: Path, diff --git a/tests/unit/analyze/models/test_rule_loader.py b/tests/unit/analyze/models/test_rule_loader.py index 263da39e5..a81d73c23 100644 --- a/tests/unit/analyze/models/test_rule_loader.py +++ b/tests/unit/analyze/models/test_rule_loader.py @@ -466,20 +466,30 @@ class TestRuntimeRulesSearchDirs: """Test get_runtime_rules_search_dirs behavior.""" def test_default_search_dir_included(self, monkeypatch): - """Default embedded dir is always in the search list.""" + """Default embedded dir is used when env var is unset.""" monkeypatch.delenv("WINMLCLI_RULES_DIR", raising=False) dirs = get_runtime_rules_search_dirs() - assert len(dirs) >= 1 - assert dirs[0].name == "runtime_check_rules" + assert len(dirs) == 1 + assert dirs[0] == _DEFAULT_RUNTIME_RULES_DIR - def test_env_var_adds_dirs(self, monkeypatch): - """WINMLCLI_RULES_DIR adds extra search directories.""" - monkeypatch.setenv("WINMLCLI_RULES_DIR", f"/extra/path1{os.pathsep}/extra/path2") + def test_env_var_overrides_default_with_single_dir(self, monkeypatch): + """WINMLCLI_RULES_DIR overrides default and uses only that one directory.""" + monkeypatch.setenv("WINMLCLI_RULES_DIR", "/extra/path1") dirs = get_runtime_rules_search_dirs() - assert len(dirs) == 3 + assert len(dirs) == 1 assert dirs[0] == Path("/extra/path1").resolve() - assert dirs[1] == Path("/extra/path2").resolve() - assert dirs[2].name == "runtime_check_rules" + + def test_env_var_not_split_on_pathsep(self, monkeypatch): + """Only one directory is supported: the value is not split on os.pathsep. + + A value containing os.pathsep is treated as a single literal directory + path rather than multiple search dirs, so the embedded default is never + silently consulted as a fallback. + """ + monkeypatch.setenv("WINMLCLI_RULES_DIR", f"/extra/path1{os.pathsep}/extra/path2") + dirs = get_runtime_rules_search_dirs() + assert len(dirs) == 1 + assert _DEFAULT_RUNTIME_RULES_DIR not in dirs def test_env_var_relative_path_resolved_from_module_dir(self, monkeypatch): """Relative WINMLCLI_RULES_DIR entries are resolved from rule_loader.py dir.""" @@ -488,15 +498,15 @@ def test_env_var_relative_path_resolved_from_module_dir(self, monkeypatch): dirs = get_runtime_rules_search_dirs() - assert len(dirs) == 2 + assert len(dirs) == 1 assert dirs[0] == (_RULE_LOADER_DIR / relative_entry).resolve() - assert dirs[1] == _DEFAULT_RUNTIME_RULES_DIR def test_env_var_empty_ignored(self, monkeypatch): """Empty WINMLCLI_RULES_DIR is treated as unset.""" monkeypatch.setenv("WINMLCLI_RULES_DIR", " ") dirs = get_runtime_rules_search_dirs() assert len(dirs) == 1 + assert dirs[0] == _DEFAULT_RUNTIME_RULES_DIR class TestRuntimeRules4CharKeyRoundTrip: @@ -555,16 +565,21 @@ def test_round_trip_all_files_under_runtime_rules_search_dirs(self, monkeypatch, class TestRuntimeRulesDebugSearchDirs: """Test get_runtime_rules_debug_search_dirs behavior.""" - def test_debug_env_var_adds_dirs(self, monkeypatch): - """WINMLCLI_RULES_DIR_FOR_DEBUG adds extra debug search directories.""" + def test_debug_env_var_single_dir(self, monkeypatch): + """WINMLCLI_RULES_DIR_FOR_DEBUG uses a single debug search directory.""" + monkeypatch.setenv(WINMLCLI_RULES_DIR_FOR_DEBUG_ENV, "/debug/path1") + dirs = get_runtime_rules_debug_search_dirs() + assert len(dirs) == 1 + assert dirs[0] == Path("/debug/path1").resolve() + + def test_debug_env_var_not_split_on_pathsep(self, monkeypatch): + """Only one debug directory is supported: not split on os.pathsep.""" monkeypatch.setenv( WINMLCLI_RULES_DIR_FOR_DEBUG_ENV, f"/debug/path1{os.pathsep}/debug/path2", ) dirs = get_runtime_rules_debug_search_dirs() - assert len(dirs) == 2 - assert dirs[0] == Path("/debug/path1").resolve() - assert dirs[1] == Path("/debug/path2").resolve() + assert len(dirs) == 1 def test_debug_env_var_relative_path_resolved_from_module_dir(self, monkeypatch): """Relative WINMLCLI_RULES_DIR_FOR_DEBUG entries are module-dir relative.""" @@ -599,23 +614,23 @@ def test_resolve_parquet_finds_file_in_env_dir_provider_subdir(self, monkeypatch assert result == nested_dir.resolve() / parquet_name assert result.exists() - def test_resolve_parquet_returns_none_when_missing(self, monkeypatch): - """When parquet is missing everywhere, resolve returns None.""" + def test_resolve_parquet_returns_candidate_when_missing(self, monkeypatch): + """When parquet is missing, resolve still returns the preferred candidate path.""" monkeypatch.delenv("WINMLCLI_RULES_DIR", raising=False) parquet_name = "missing_rule.parquet" result = resolve_rule_parquet_path(parquet_name) - assert result is None + assert result == _DEFAULT_RUNTIME_RULES_DIR / parquet_name - def test_resolve_parquet_ignores_flat_layout(self, monkeypatch): - """Flat parquet under search dir is ignored; provider subdir is required.""" + def test_resolve_parquet_returns_provider_subdir_candidate_for_flat_layout(self, monkeypatch): + """Flat parquet does not affect the returned provider-subdir candidate path.""" with tempfile.TemporaryDirectory() as tmpdir: parquet_name = "Split_QNNExecutionProvider_NPU_ai.onnx_opset13.parquet" (Path(tmpdir) / parquet_name).write_bytes(b"PAR1") monkeypatch.setenv("WINMLCLI_RULES_DIR", tmpdir) result = resolve_rule_parquet_path(parquet_name) - assert result is None + assert result == Path(tmpdir).resolve() / "QNNExecutionProvider_NPU" / parquet_name def test_resolve_parquet_for_debug_prefers_debug_dir(self, monkeypatch): """for_debug=True should prioritize WINMLCLI_RULES_DIR_FOR_DEBUG entries first.""" @@ -634,8 +649,8 @@ def test_resolve_parquet_for_debug_prefers_debug_dir(self, monkeypatch): result = resolve_rule_parquet_path(parquet_name, for_debug=True) assert result == debug_file.resolve() - def test_resolve_parquet_for_debug_falls_back_to_rules_dir(self, monkeypatch): - """for_debug=True falls back to normal search dirs when debug file is missing.""" + def test_resolve_parquet_for_debug_returns_debug_candidate_even_if_missing(self, monkeypatch): + """for_debug=True returns debug candidate path without checking existence.""" with tempfile.TemporaryDirectory() as rules_tmp, tempfile.TemporaryDirectory() as debug_tmp: parquet_name = "Split_QNNExecutionProvider_NPU_ai.onnx_opset13.parquet" rules_file = Path(rules_tmp) / "QNNExecutionProvider_NPU" / parquet_name @@ -646,4 +661,4 @@ def test_resolve_parquet_for_debug_falls_back_to_rules_dir(self, monkeypatch): monkeypatch.setenv(WINMLCLI_RULES_DIR_FOR_DEBUG_ENV, debug_tmp) result = resolve_rule_parquet_path(parquet_name, for_debug=True) - assert result == rules_file.resolve() + assert result == Path(debug_tmp).resolve() / "QNNExecutionProvider_NPU" / parquet_name diff --git a/tests/unit/analyze/test_static_analyzer_cli.py b/tests/unit/analyze/test_static_analyzer_cli.py index 9649c2911..d1e412d15 100644 --- a/tests/unit/analyze/test_static_analyzer_cli.py +++ b/tests/unit/analyze/test_static_analyzer_cli.py @@ -1225,6 +1225,20 @@ class TestAnalyzeEPDeviceSelectionMatrix: None, ), ("openvino", "gpu", 0, [("OpenVINOExecutionProvider", "GPU")], None), + # ep=auto, device=all: best available EP *per device* rather than one + # ref-device EP fanned across all devices. GPU->NvTensorRTRTX, + # NPU->OpenVINO, CPU->OpenVINO from the simulated local matrix. + ( + None, + "all", + 0, + [ + ("NvTensorRTRTXExecutionProvider", "GPU"), + ("OpenVINOExecutionProvider", "NPU"), + ("OpenVINOExecutionProvider", "CPU"), + ], + None, + ), # ep=all, device=all: every (ep, device) combo allowed by EP_SUPPORTED_DEVICES. ( "all", @@ -1253,6 +1267,7 @@ class TestAnalyzeEPDeviceSelectionMatrix: "qnn-empty", "qnn-all", "openvino-gpu", + "auto-all", "all-all", ], ) From 32a8447579df34b61e0d865f33fb28bb5fd04ebe Mon Sep 17 00:00:00 2001 From: Zhipeng Wang Date: Tue, 23 Jun 2026 11:25:59 +0800 Subject: [PATCH 3/4] fix: declare psutil as a runtime dependency (#937) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary `winml perf` crashes on a clean install with `No module named 'psutil'` because `src/winml/modelkit/session/monitor/memory_tracker.py` imports `psutil` at module level, and the perf flow imports that module unconditionally. `psutil` was never declared in `[project].dependencies` — only the dev type stub `types-psutil` is present — so the published wheel's `Requires-Dist` omits it, breaking `winml perf` (and `--monitor` / `--memory`) out-of-the-box for every user. Regression from #861 (`feat: add --memory flag`); `memory_tracker.py` did not exist in v0.1.0, so `winml perf` was unaffected there. Installing `psutil` manually confirms perf/build/`--monitor` otherwise work correctly — the only defect is the missing dependency declaration. ## Change Add `psutil>=7` to `[project].dependencies` (aligns with the existing `types-psutil>=7.2.2` stub). Targeting `release/v0.2.0` directly as a release hotfix; `main` will pick it up via the post-release merge-back. Closes #936 --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 46fa96dc7..3465a921b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -56,6 +56,7 @@ dependencies = [ # non-functional but huge diffs across every generated rule artifact. If you # truly must bump this, regenerate ALL parquet artifacts in the same change. "pandas==2.3.3", + "psutil>=7", "pydantic>=2", "python-multipart>=0.0.22", "rapidfuzz>=3.9", From 1819375da59cb6e0412c26145bad61ebf11b61d1 Mon Sep 17 00:00:00 2001 From: Zhipeng Wang Date: Tue, 23 Jun 2026 21:18:37 +0800 Subject: [PATCH 4/4] Release v0.2.0: CHANGELOG + version bump (#938) T-2 release prep for **v0.2.0**, targeting `release/v0.2.0` so the release notes and version land on the branch before tagging. - **CHANGELOG**: add the v0.2.0 entry covering the 96 PRs merged since `v0.1.0`. - **Version**: bump `version` `0.1.0` -> `0.2.0` in `pyproject.toml`. Merge-back to `main` happens at T+1. --- CHANGELOG.md | 39 +++++++++++++++++++++++++++++++++++++++ pyproject.toml | 2 +- 2 files changed, 40 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9d2f5bfb0..68fb2f1f5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,45 @@ All notable changes to this project are documented in this file. The format is loosely based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). +## WinML CLI v0.2.0 + +This cycle unifies **task detection** across the CLI (modality- and architecture-aware) and expands the eval and perf surfaces — new depth-estimation and tensor-similarity evaluators, a full SA eval pipeline with an HTML report, `winml perf --memory` / `--ep-options`, and `--format json` on `eval` / `analyze` / `perf`. `winml compile` gains a multi-model shared EP context, `winml build` gains `--precision`, and timm image-classification is supported. See the behavior changes below. + +### ⚠️ Behavior changes + +- `winml perf` no longer compiles by default — added `--compile/--no-compile`, defaulting to no-compile (#879). +- Boolean CLI options are now `--flag/--no-flag` pairs (#844). +- Telemetry is enabled in the shipped wheel; consent reworded as "unlinked pseudonymized" (#810). + +### ✨ Improvements + +- **Task detection** — modality- and architecture-aware `detect_task`, unified across commands via `resolve_task` / `TaskResolution` (#807, #841, #878). +- `winml perf` — `--memory` reports RAM/VRAM per phase (#861); `--ep-options` passes runtime EP options (#865, #889); output now shows the model path and precision (#875). +- `winml compile` — multi-model shared EP context with a selectable backend (#871). +- `winml build` — added `--precision` (#914). +- `winml inspect` — renders composite (pipeline-led) model structure (#903). +- `winml analyze` — `--ep` / `--device` auto resolves to a single best target (#919); faster re-runs plus a `--debug` rule locator (#906). +- `winml eval` — new SA eval pipeline with per-stage perf and an HTML report (#599); depth-estimation (#326, #437) and tensor-similarity (#805) evaluators; scripts track ONNX size and sanitize output (#755). +- Cross-command — `--format json` on `eval` / `analyze` / `perf` (#855); `--allow-unsupported-nodes` on `perf` / `build` / `eval` / `run` (#821). +- Quality of life — timm image-classification via library routing (#790); `~` expanded in paths (#815); progress bar during EP warmup (#788); refreshed `--list-device` coloring (#812). + +### 🐛 Fixes + +- **`winml perf`** — declared `psutil` as a runtime dependency, fixing a crash on clean install (#937); composite (dual-encoder) models supported (#866); HF and ONNX paths unified through `PerfBenchmark` (#659); `--monitor` live chart in `--module` mode (#654, #920); `rich` Live thread crashes (#832). +- **`winml analyze`** — coverage-counting bugs (#922); analyzer API EP list matches the CLI (#803); Pad / Gemm rule conflicts (#906). +- **Task / config validation** — fill-mask heads detected as `text2text-generation` (#851); vision feature-extraction model-task inconsistency (#786); model task validated in config (#723); full encoder-decoder composite built for no-task seq2seq (#850, #862); device/EP combination validated without a system check (#780). +- **`winml export`** — `.data` files written to the output dir, not the cwd (#853); timm `image_size` from `pretrained_cfg` (#806). +- **`winml inspect` / `winml catalog`** — `--task` validated at parse time (#546, #771); `catalog -t` short flag aligned (#541, #772); VitisAI EP ordered last, catalog table width fixed (#763). +- **Feature extraction** — `last_hidden_state` now populated in the output (#863). +- **`winml optimize`** — untie batched constant `MatMul` for OpenVINO GPU (#817). +- **`winml eval`** — fixed failures on AMD hosts (#783); cleanup runs on `SKIP_*` / exception paths (#890). +- **CLI output** — quieted `optimum` logger noise (#904); unified verbosity, logger routed to stderr (#566, #793). + +### 📦 Assets + +- `winml_cli-0.2.0-py3-none-any.whl` +- `rules-v0.2.0.zip` + ## WinML CLI v0.1.0 First **public preview** release. With the Windows ML 2.0 baseline now in place, this release shifts focus to polishing the CLI surface: faster `winml inspect` / `winml eval`, more accurate device & EP resolution, a real PyPI release pipeline, and a meaningful pass over sysinfo and quantization behavior. diff --git a/pyproject.toml b/pyproject.toml index 3465a921b..3e5d69d1f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ requires = [ "setuptools>=61", "wheel" ] [project] name = "winml-cli" -version = "0.1.0" +version = "0.2.0" description = "Accelerate Model Deployment on WinML" readme = "README.md" keywords = [ "onnx", "winml" ]