From b4767888ba2df26dc12c240de8d0cdcce7e9a5c3 Mon Sep 17 00:00:00 2001 From: xieofxie Date: Wed, 24 Jun 2026 16:04:20 +0800 Subject: [PATCH 1/3] add ov runtime --- src/winml/modelkit/commands/perf.py | 99 ++++++- src/winml/modelkit/session/__init__.py | 2 + .../session/openvino/openvino_session.py | 246 ++++++++++++++++++ tests/unit/commands/test_perf_cli.py | 41 +++ tests/unit/session/test_openvino_session.py | 96 +++++++ 5 files changed, 480 insertions(+), 4 deletions(-) create mode 100644 src/winml/modelkit/session/openvino/openvino_session.py create mode 100644 tests/unit/session/test_openvino_session.py diff --git a/src/winml/modelkit/commands/perf.py b/src/winml/modelkit/commands/perf.py index 412b1e675..70a6dfcd8 100644 --- a/src/winml/modelkit/commands/perf.py +++ b/src/winml/modelkit/commands/perf.py @@ -92,6 +92,9 @@ class BenchmarkConfig: ep: EPNameOrAlias | None = None ep_options: dict[str, str] | None = None shape_config: dict | None = None + # Inference runtime backend: "ort" (ONNX Runtime, default) or "openvino" + # (OpenVINO Runtime on the raw ONNX file, for ORT-vs-OV comparison). + runtime: str = "ort" @dataclass @@ -320,6 +323,54 @@ def effective_batch_size( return requested +# ============================================================================= +# OpenVINO runner adapter +# ============================================================================= + + +class _OpenVINOModel: + """Minimal single-session model backed by OpenVINO Runtime. + + Exposes the same surface ``PerfBenchmark._run_single`` reads from a + ``WinMLPreTrainedModel`` (``io_config`` / ``device`` / ``ep_name`` / + ``task`` / ``running_model_path`` and an ``_session`` with + ``compile`` / ``run`` / ``perf``), delegating to an ``OpenVINOSession``. + This lets the shared benchmark engine drive OpenVINO without a + WinMLAutoModel build — OpenVINO reads the raw ONNX directly. + """ + + def __init__( + self, + onnx_path: Path, + device: str, + provider_options: dict[str, str] | None = None, + ) -> None: + from ..session.openvino.openvino_session import OpenVINOSession + + self._session = OpenVINOSession(onnx_path, device=device, provider_options=provider_options) + + @property + def io_config(self) -> dict: + return self._session.io_config + + @property + def device(self) -> str: + return self._session.device + + @property + def ep_name(self) -> EPName | None: + return self._session.ep_name + + @property + def task(self) -> str | None: + # OpenVINO runs the raw ONNX with no task metadata. + return None + + @property + def running_model_path(self) -> Path: + return self._session.running_model_path + + # ============================================================================= # Benchmark Engine # ============================================================================= @@ -341,7 +392,7 @@ class PerfBenchmark: def __init__(self, config: BenchmarkConfig) -> None: """Initialize benchmark with configuration.""" self.config = config - self._model: WinMLPreTrainedModel | WinMLCompositeModel | None = None + self._model: WinMLPreTrainedModel | WinMLCompositeModel | _OpenVINOModel | None = None self._inputs: dict[str, np.ndarray] | None = None self._effective_batch: int = config.batch_size self._memory: dict[str, float] | None = None @@ -560,13 +611,31 @@ def _load_model(self) -> None: from ..config import WinMLBuildConfig from ..models import WinMLAutoModel + model_id = self.config.model_id + model_path = Path(model_id) + is_onnx = model_path.suffix.lower() == ".onnx" + + # OpenVINO runner: read the raw ONNX directly (no build, no ORT EP + # resolution — OpenVINO is independent of ORT's execution providers). + # ONNX input only; the CLI guards non-ONNX inputs earlier. + if self.config.runtime == "openvino": + if not is_onnx: + raise ValueError( + f"--runtime openvino requires an ONNX (.onnx) model file, got: {model_id}" + ) + if not model_path.exists(): + raise FileNotFoundError(f"ONNX file not found: {model_path}") + self._model = _OpenVINOModel( + model_path, + device=self.config.device, + provider_options=self.config.ep_options, + ) + return + # Resolve the concrete device + EP first so a bad combo fails fast, # before from_pretrained/from_onnx kick off the build pipeline. self._resolve_device_ep() - model_id = self.config.model_id - model_path = Path(model_id) - is_onnx = model_path.suffix.lower() == ".onnx" if is_onnx and not model_path.exists(): # Surface a clear error for programmatic callers. The CLI guards # this earlier, but without this check from_pretrained would fall @@ -1469,6 +1538,15 @@ def _run_simple_loop( @cli_utils.ep_options_option( optional_message="Applied to both HuggingFace model IDs and ONNX file inputs.", ) +@click.option( + "--runtime", + type=click.Choice(["ort", "openvino"]), + default="ort", + show_default=True, + help="Inference runtime backend. 'ort' = ONNX Runtime (InferenceSession). " + "'openvino' runs the raw ONNX file directly via OpenVINO Runtime for an " + "ORT-vs-OV comparison (ONNX input only; build/quant/--ep flags are ignored).", +) @cli_utils.output_option( "Output JSON file path. Defaults to " "'~/.cache/winml/perf/[/]/.json'." @@ -1554,6 +1632,7 @@ def perf( precision: str, ep: EPNameOrAlias | None, ep_options: tuple[str, ...], + runtime: str, output: Path | None, batch_size: int, shape_config_path: Path | None, @@ -1613,6 +1692,17 @@ def perf( hf_model = model + # OpenVINO runner: ONNX input only, and no per-module path (submodule + # discovery walks a live nn.Module graph, which OpenVINO never builds). + if runtime == "openvino": + if Path(hf_model).suffix.lower() != ".onnx": + raise click.UsageError( + "--runtime openvino requires an ONNX (.onnx) model file " + f"(not a HuggingFace model ID), got: {hf_model}" + ) + if module_class: + raise click.UsageError("--runtime openvino does not support --module benchmarking.") + # Apply build config defaults (CLI explicit options take precedence). # Read raw JSON so missing keys are distinguishable from dataclass defaults. if config_file is not None: @@ -1731,6 +1821,7 @@ def perf( ep=ep, ep_options=ep_provider_options, shape_config=shape_config, + runtime=runtime, ) try: diff --git a/src/winml/modelkit/session/__init__.py b/src/winml/modelkit/session/__init__.py index 5148da0b3..74bd60ee4 100644 --- a/src/winml/modelkit/session/__init__.py +++ b/src/winml/modelkit/session/__init__.py @@ -10,6 +10,7 @@ from .monitor.openvino_monitor import OpenVinoMonitor from .monitor.qnn_monitor import QNNMonitor from .monitor.vitisai_monitor import VitisAIMonitor +from .openvino.openvino_session import OpenVINOSession from .qairt.qairt_session import WinMLQairtSession from .session import InferenceError, SessionState, WinMLSession from .stats import PerfStats @@ -20,6 +21,7 @@ "HWMonitor", "InferenceError", "NullEPMonitor", + "OpenVINOSession", "OpenVinoMonitor", "PerfStats", "QNNMonitor", diff --git a/src/winml/modelkit/session/openvino/openvino_session.py b/src/winml/modelkit/session/openvino/openvino_session.py new file mode 100644 index 000000000..729c8bf22 --- /dev/null +++ b/src/winml/modelkit/session/openvino/openvino_session.py @@ -0,0 +1,246 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- +"""OpenVINOSession - inference session backed by OpenVINO Runtime. + +Mirrors the subset of :class:`WinMLSession`'s surface that the perf +benchmark engine relies on (``compile`` / ``run`` / ``perf`` plus the +``io_config`` / ``device`` / ``ep_name`` / ``running_model_path`` +properties), so a ``winml perf`` run can swap ONNX Runtime for OpenVINO on +the same ONNX file for an apples-to-apples ORT-vs-OV comparison. + +ONNX input only: OpenVINO reads the provided ``.onnx`` directly (no +quantize / optimize / compile build). +""" + +from __future__ import annotations + +import logging +from contextlib import contextmanager +from pathlib import Path +from typing import TYPE_CHECKING, Any, cast + +import numpy as np + +from ...core.onnx_utils import get_io_config +from ..stats import PerfStats + + +if TYPE_CHECKING: + from collections.abc import Generator + + from ...utils.constants import EPName + + +logger = logging.getLogger(__name__) + + +# ModelKit device policy -> OpenVINO device name. "auto" maps to OpenVINO's +# AUTO plugin, which picks the best available device at compile time. +_OV_DEVICE_MAP = { + "cpu": "CPU", + "gpu": "GPU", + "npu": "NPU", + "auto": "AUTO", +} + +# Canonical EP name surfaced to perf reporting/JSON so OpenVINO runs are +# labeled consistently with the ORT path's provider names. +_OV_EP_NAME = "OpenVINOExecutionProvider" + + +class OpenVINOSession: + """ONNX inference session backed by OpenVINO Runtime. + + One session loads and runs a single ONNX file on one OpenVINO device. + The runtime is imported lazily in :meth:`compile` so importing this + module never pulls in the ``openvino`` package. + """ + + def __init__( + self, + onnx_path: str | Path, + device: str = "auto", + provider_options: dict[str, str] | None = None, + ) -> None: + """Initialize the session. + + Args: + onnx_path: Path to the ONNX model file. + device: Target device policy ("auto", "cpu", "gpu", "npu"), + mapped to an OpenVINO device name. + provider_options: OpenVINO config properties forwarded to + ``Core.compile_model`` (e.g. ``{"PERFORMANCE_HINT": + "LATENCY"}``). Empty/None means OpenVINO defaults. + """ + self._onnx_path = Path(onnx_path) + if not self._onnx_path.exists(): + raise FileNotFoundError(f"ONNX model not found: {onnx_path}") + + self._device = str(device).lower() + self._provider_options = dict(provider_options) if provider_options else {} + + # Populated by compile() + self._compiled: Any = None + self._ov_device: str | None = None + + # Cached I/O metadata (lazy-loaded) + self._io_config: dict | None = None + + # Performance tracking (enabled via perf() context manager) + self._perf_stats: PerfStats | None = None + + # OpenVINO reads the original ONNX directly, so the running model is + # always the input path (no EPContext / compiled artifact on disk). + self._running_model_path = self._onnx_path + + logger.info("OpenVINOSession initialized: %s", onnx_path) + + def compile(self) -> None: + """Read and compile the ONNX model with OpenVINO Runtime. + + Idempotent: compiles once per session. + """ + if self._compiled is not None: + logger.debug("Already compiled for %s", self._device) + return + + import openvino as ov + + requested = _OV_DEVICE_MAP.get(self._device, self._device.upper()) + core = ov.Core() + model = core.read_model(str(self._onnx_path)) + self._compiled = core.compile_model(model, requested, self._provider_options) + + # AUTO resolves to a concrete device at compile time; record what was + # actually selected for display, falling back to the requested name. + try: + devices = self._compiled.get_property("EXECUTION_DEVICES") + self._ov_device = devices[0] if devices else requested + except Exception: + self._ov_device = requested + + logger.info( + "OpenVINO compiled model on %s (requested %s), provider_options=%s", + self._ov_device, + requested, + self._provider_options, + ) + + def run(self, inputs: dict[str, Any]) -> dict[str, np.ndarray]: + """Run inference. + + Auto-compiles on first call. Validates and dtype-coerces inputs. + + Args: + inputs: Input tensors (numpy arrays or torch tensors) keyed by + input name. + + Returns: + Dict of output name -> numpy array. + """ + if not inputs: + raise ValueError("inputs cannot be empty") + + if self._compiled is None: + self.compile() + compiled = self._compiled + + ov_inputs = self._prepare_inputs(inputs) + + if self._perf_stats is not None: + result = self._perf_stats.record(lambda: compiled(ov_inputs)) + else: + result = compiled(ov_inputs) + + # Map outputs back to graph order. Index keying avoids OpenVINO + # output-name normalization mismatches (the order of model.outputs + # matches the ONNX graph output order get_io_config reads). + out_names = self.io_config["output_names"] + return {name: np.asarray(result[i]) for i, name in enumerate(out_names)} + + def _prepare_inputs(self, inputs: dict[str, Any]) -> dict[str, np.ndarray]: + """Convert inputs to numpy arrays and enforce model input dtypes.""" + io_cfg = self.io_config + name_to_type = dict(zip(io_cfg["input_names"], io_cfg["input_types"], strict=True)) + + ov_inputs: dict[str, np.ndarray] = {} + for name, value in inputs.items(): + if hasattr(value, "numpy"): # torch.Tensor + arr = value.cpu().numpy() + elif isinstance(value, np.ndarray): + arr = value + else: + arr = np.asarray(value) + + expected_type = name_to_type.get(name) + if expected_type is not None and arr.dtype != expected_type: + arr = arr.astype(expected_type) + + ov_inputs[name] = arr + + return ov_inputs + + @contextmanager + def perf(self, warmup: int = 0) -> Generator[PerfStats, None, None]: + """Context manager for scoped performance tracking. + + Mirrors :meth:`WinMLSession.perf` so the shared perf engine drives + either backend identically. + + Args: + warmup: Number of initial samples to exclude from statistics. + + Yields: + PerfStats collecting timing data within the context. + """ + self._perf_stats = PerfStats(warmup=warmup) + try: + yield self._perf_stats + finally: + self._perf_stats = None + + @property + def io_config(self) -> dict: + """ONNX I/O metadata (lazy-loaded, cached). + + Reuses the same extraction path as the ORT session so input/output + names, shapes and dtypes are identical across runtimes. + """ + if self._io_config is None: + from ...onnx import load_onnx + from ..session import WinMLSession + + model = load_onnx(self._onnx_path, load_weights=False, validate=False) + self._io_config = get_io_config(model) + # Reuse the operator-schema-based precision estimate (no + # architecture assumptions) so reports match the ORT path. + self._io_config["precision"] = WinMLSession._get_precision(model) + return self._io_config + + @property + def device(self) -> str: + """Target device label for this session.""" + return self._device + + @property + def ep_name(self) -> EPName | None: + """Canonical EP name, or None before compile. + + Returns ``"OpenVINOExecutionProvider"`` once compiled so perf + reporting labels OpenVINO runs consistently with ORT provider names. + """ + if self._compiled is None: + return None + return cast("EPName", _OV_EP_NAME) + + @property + def running_model_path(self) -> Path: + """Path to the ONNX model OpenVINO loads (always the input path).""" + return self._running_model_path + + @property + def is_compiled(self) -> bool: + """Whether the model has been compiled.""" + return self._compiled is not None diff --git a/tests/unit/commands/test_perf_cli.py b/tests/unit/commands/test_perf_cli.py index ba705d0a6..8b3fba082 100644 --- a/tests/unit/commands/test_perf_cli.py +++ b/tests/unit/commands/test_perf_cli.py @@ -385,6 +385,47 @@ def test_cli_onnx_not_found_error(self, runner: CliRunner, tmp_path: Path) -> No assert result.exit_code != 0 assert "not found" in result.output.lower() + def test_cli_openvino_requires_onnx(self, runner: CliRunner) -> None: + """--runtime openvino rejects a non-ONNX (HF) model id.""" + result = runner.invoke( + perf, + ["-m", "microsoft/resnet-50", "--runtime", "openvino"], + obj={}, + ) + assert result.exit_code != 0 + assert "requires an onnx" in result.output.lower() + + def test_cli_openvino_rejects_module(self, runner: CliRunner, tmp_path: Path) -> None: + """--runtime openvino does not support per-module benchmarking.""" + onnx_file = tmp_path / "model.onnx" + onnx_file.write_bytes(b"fake onnx") + result = runner.invoke( + perf, + ["-m", str(onnx_file), "--runtime", "openvino", "--module", "BertAttention"], + obj={}, + ) + assert result.exit_code != 0 + assert "module" in result.output.lower() + + def test_cli_openvino_routes_to_adapter(self, runner: CliRunner, tmp_path: Path) -> None: + """--runtime openvino builds an _OpenVINOModel, bypassing WinMLAutoModel.""" + onnx_file = tmp_path / "model.onnx" + onnx_file.write_bytes(b"fake onnx") + + config = BenchmarkConfig(model_id=str(onnx_file), device="cpu", runtime="openvino") + benchmark = PerfBenchmark(config) + + mock_session = MagicMock() + with patch( + "winml.modelkit.session.openvino.openvino_session.OpenVINOSession", + return_value=mock_session, + ) as mock_ov: + benchmark._load_model() + + mock_ov.assert_called_once() + assert benchmark._model is not None + assert benchmark._model.task is None + def test_onnx_load_model_passes_ep(self, tmp_path: Path) -> None: """EP argument should be forwarded to from_onnx.""" onnx_file = tmp_path / "model.onnx" diff --git a/tests/unit/session/test_openvino_session.py b/tests/unit/session/test_openvino_session.py new file mode 100644 index 000000000..a6f297bc0 --- /dev/null +++ b/tests/unit/session/test_openvino_session.py @@ -0,0 +1,96 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- +"""Tests for OpenVINOSession (OpenVINO Runtime backend for perf). + +OpenVINO runs the raw ONNX directly on CPU, so these tests gate only on the +``openvino`` package being importable (CPU is always available when it is) -- +not on the ORT OpenVINO EP, which is a different component. +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +import numpy as np +import pytest + +from winml.modelkit.session import OpenVINOSession + + +if TYPE_CHECKING: + from pathlib import Path + + +pytest.importorskip("openvino") + +# OpenVINO CPU plugin is always present when the package is installed. +pytestmark = pytest.mark.openvino + + +class TestOpenVINOSession: + """OpenVINOSession compile/run/perf surface on CPU.""" + + def test_io_config_matches_onnx(self, simple_matmul_onnx: Path) -> None: + session = OpenVINOSession(simple_matmul_onnx, device="cpu") + io = session.io_config + assert io["input_names"] == ["A"] + assert io["output_names"] == ["C"] + # input_value_ranges is ORT-only enrichment; precision is shared. + assert "precision" in io + + def test_compile_is_idempotent(self, simple_matmul_onnx: Path) -> None: + session = OpenVINOSession(simple_matmul_onnx, device="cpu") + assert not session.is_compiled + assert session.ep_name is None + session.compile() + compiled = session._compiled + session.compile() # second call is a no-op + assert session._compiled is compiled + assert session.is_compiled + assert session.ep_name == "OpenVINOExecutionProvider" + + def test_run_produces_correct_output( + self, simple_matmul_onnx: Path, sample_input: dict[str, np.ndarray] + ) -> None: + session = OpenVINOSession(simple_matmul_onnx, device="cpu") + outputs = session.run(sample_input) + assert set(outputs) == {"C"} + assert outputs["C"].shape == (1, 4) + assert outputs["C"].dtype == np.float32 + + def test_run_auto_compiles(self, simple_matmul_onnx: Path) -> None: + session = OpenVINOSession(simple_matmul_onnx, device="cpu") + session.run({"A": np.zeros((1, 4), dtype=np.float32)}) + assert session.is_compiled + + def test_run_empty_inputs_raises(self, simple_matmul_onnx: Path) -> None: + session = OpenVINOSession(simple_matmul_onnx, device="cpu") + with pytest.raises(ValueError, match="inputs cannot be empty"): + session.run({}) + + def test_run_enforces_input_dtype(self, simple_matmul_onnx: Path) -> None: + """Float64 input is coerced to the model's float32 without error.""" + session = OpenVINOSession(simple_matmul_onnx, device="cpu") + outputs = session.run({"A": np.ones((1, 4), dtype=np.float64)}) + assert outputs["C"].dtype == np.float32 + + def test_perf_records_samples(self, simple_matmul_onnx: Path) -> None: + session = OpenVINOSession(simple_matmul_onnx, device="cpu") + inputs = {"A": np.zeros((1, 4), dtype=np.float32)} + with session.perf(warmup=2) as stats: + for _ in range(7): + session.run(inputs) + assert stats.total_count == 7 + assert stats.count == 5 # warmup excluded + assert stats.mean_ms > 0 + + def test_running_model_path_is_input(self, simple_matmul_onnx: Path) -> None: + session = OpenVINOSession(simple_matmul_onnx, device="cpu") + session.compile() + assert session.running_model_path == simple_matmul_onnx + + def test_missing_file_raises(self, tmp_path: Path) -> None: + with pytest.raises(FileNotFoundError): + OpenVINOSession(tmp_path / "does_not_exist.onnx") From 9c7d8771191b7800cb06c264c1f017cb13528c35 Mon Sep 17 00:00:00 2001 From: xieofxie Date: Wed, 24 Jun 2026 16:24:43 +0800 Subject: [PATCH 2/3] feat(perf): type --runtime as RuntimeName and pre-check OV device - Add RuntimeName Literal + RUNTIME_NAMES to constants (mirrors CompilerName), thread it through BenchmarkConfig and the perf CLI instead of bare str. - Fail fast in OpenVINOSession.compile() when the requested device is absent from Core().available_devices, with a readable message instead of a raw backend stack trace. AUTO is exempt; matches plain (GPU) and indexed (GPU.0) device names. - Add a hardware-independent unit test for the unavailable-device path. --- src/winml/modelkit/commands/perf.py | 8 ++++---- .../modelkit/session/openvino/openvino_session.py | 14 ++++++++++++++ src/winml/modelkit/utils/constants.py | 9 +++++++++ tests/unit/session/test_openvino_session.py | 8 ++++++++ 4 files changed, 35 insertions(+), 4 deletions(-) diff --git a/src/winml/modelkit/commands/perf.py b/src/winml/modelkit/commands/perf.py index 70a6dfcd8..fedbafc1e 100644 --- a/src/winml/modelkit/commands/perf.py +++ b/src/winml/modelkit/commands/perf.py @@ -28,7 +28,7 @@ from rich.table import Table from ..utils import cli as cli_utils -from ..utils.constants import EPName, EPNameOrAlias +from ..utils.constants import RUNTIME_NAMES, EPName, EPNameOrAlias, RuntimeName from ..utils.logging import configure_logging from ._live_chart import LiveMonitorDisplay @@ -94,7 +94,7 @@ class BenchmarkConfig: shape_config: dict | None = None # Inference runtime backend: "ort" (ONNX Runtime, default) or "openvino" # (OpenVINO Runtime on the raw ONNX file, for ORT-vs-OV comparison). - runtime: str = "ort" + runtime: RuntimeName = "ort" @dataclass @@ -1540,7 +1540,7 @@ def _run_simple_loop( ) @click.option( "--runtime", - type=click.Choice(["ort", "openvino"]), + type=click.Choice(list(RUNTIME_NAMES)), default="ort", show_default=True, help="Inference runtime backend. 'ort' = ONNX Runtime (InferenceSession). " @@ -1632,7 +1632,7 @@ def perf( precision: str, ep: EPNameOrAlias | None, ep_options: tuple[str, ...], - runtime: str, + runtime: RuntimeName, output: Path | None, batch_size: int, shape_config_path: Path | None, diff --git a/src/winml/modelkit/session/openvino/openvino_session.py b/src/winml/modelkit/session/openvino/openvino_session.py index 729c8bf22..cac20dd0a 100644 --- a/src/winml/modelkit/session/openvino/openvino_session.py +++ b/src/winml/modelkit/session/openvino/openvino_session.py @@ -110,6 +110,20 @@ def compile(self) -> None: requested = _OV_DEVICE_MAP.get(self._device, self._device.upper()) core = ov.Core() + + # Friendly fail-fast when the requested device isn't present, instead of + # a raw backend stack trace from compile_model. AUTO is a virtual plugin + # (always selectable); concrete devices may be listed plain ("GPU") or + # indexed ("GPU.0"), so match on the base name. + available = core.available_devices + if requested != "AUTO" and not any( + dev == requested or dev.startswith(f"{requested}.") for dev in available + ): + raise RuntimeError( + f"OpenVINO device '{requested}' (from --device {self._device}) is " + f"not available. OpenVINO sees: {available}" + ) + model = core.read_model(str(self._onnx_path)) self._compiled = core.compile_model(model, requested, self._provider_options) diff --git a/src/winml/modelkit/utils/constants.py b/src/winml/modelkit/utils/constants.py index 4e85aaca6..6356a1306 100644 --- a/src/winml/modelkit/utils/constants.py +++ b/src/winml/modelkit/utils/constants.py @@ -64,6 +64,15 @@ COMPILER_NAMES: tuple[CompilerName, ...] = get_args(CompilerName) +# Inference runtime backends selectable via ``--runtime`` (see commands/perf.py): +# "ort" -> ONNX Runtime (ort.InferenceSession via WinMLSession, default) +# "openvino" -> OpenVINO Runtime on the raw ONNX (ORT-vs-OV comparison) +RuntimeName = Literal["ort", "openvino"] + +# Runtime-iterable form of ``RuntimeName`` (e.g. for the CLI choice list). +RUNTIME_NAMES: tuple[RuntimeName, ...] = get_args(RuntimeName) + + # Supported execution providers — derived from the ``EPName`` Literal above so # that ``utils.constants`` stays leaf-level (no import dependency on sysinfo). # Membership parity with ``sysinfo.device._EP_DEVICE_MAP`` is enforced by diff --git a/tests/unit/session/test_openvino_session.py b/tests/unit/session/test_openvino_session.py index a6f297bc0..8d514ce0f 100644 --- a/tests/unit/session/test_openvino_session.py +++ b/tests/unit/session/test_openvino_session.py @@ -94,3 +94,11 @@ def test_running_model_path_is_input(self, simple_matmul_onnx: Path) -> None: def test_missing_file_raises(self, tmp_path: Path) -> None: with pytest.raises(FileNotFoundError): OpenVINOSession(tmp_path / "does_not_exist.onnx") + + def test_unavailable_device_raises_friendly_error(self, simple_matmul_onnx: Path) -> None: + """A device absent from Core().available_devices fails fast with a + readable message instead of a raw backend stack trace. Uses a bogus + device name so the test is hardware-independent.""" + session = OpenVINOSession(simple_matmul_onnx, device="bogus") + with pytest.raises(RuntimeError, match=r"not available\. OpenVINO sees"): + session.compile() From d09a9388b5fb259c25355156d188b26cec005523 Mon Sep 17 00:00:00 2001 From: xieofxie Date: Wed, 24 Jun 2026 16:35:35 +0800 Subject: [PATCH 3/3] feat(perf): prompt to install winml-cli[openvino] when OpenVINO is missing Wrap the openvino import in OpenVINOSession.compile() so an absent package raises a clear install hint (pip install winml-cli[openvino]) instead of a bare ModuleNotFoundError. Add a unit test that simulates the missing module. --- .../modelkit/session/openvino/openvino_session.py | 8 +++++++- tests/unit/session/test_openvino_session.py | 13 +++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/src/winml/modelkit/session/openvino/openvino_session.py b/src/winml/modelkit/session/openvino/openvino_session.py index cac20dd0a..f841fdb10 100644 --- a/src/winml/modelkit/session/openvino/openvino_session.py +++ b/src/winml/modelkit/session/openvino/openvino_session.py @@ -106,7 +106,13 @@ def compile(self) -> None: logger.debug("Already compiled for %s", self._device) return - import openvino as ov + try: + import openvino as ov + except ImportError as exc: + raise ImportError( + "OpenVINO is not installed but --runtime openvino was requested. " + "Install it with: pip install winml-cli[openvino]" + ) from exc requested = _OV_DEVICE_MAP.get(self._device, self._device.upper()) core = ov.Core() diff --git a/tests/unit/session/test_openvino_session.py b/tests/unit/session/test_openvino_session.py index 8d514ce0f..7dcf7b06f 100644 --- a/tests/unit/session/test_openvino_session.py +++ b/tests/unit/session/test_openvino_session.py @@ -95,6 +95,19 @@ def test_missing_file_raises(self, tmp_path: Path) -> None: with pytest.raises(FileNotFoundError): OpenVINOSession(tmp_path / "does_not_exist.onnx") + def test_missing_openvino_prompts_install( + self, simple_matmul_onnx: Path, monkeypatch: pytest.MonkeyPatch + ) -> None: + """When the openvino package is absent, compile() raises a clear + install hint instead of a bare ModuleNotFoundError. Simulates absence + by shadowing the module so ``import openvino`` fails.""" + import sys + + monkeypatch.setitem(sys.modules, "openvino", None) + session = OpenVINOSession(simple_matmul_onnx, device="cpu") + with pytest.raises(ImportError, match=r"pip install winml-cli\[openvino\]"): + session.compile() + def test_unavailable_device_raises_friendly_error(self, simple_matmul_onnx: Path) -> None: """A device absent from Core().available_devices fails fast with a readable message instead of a raw backend stack trace. Uses a bogus