From b4767888ba2df26dc12c240de8d0cdcce7e9a5c3 Mon Sep 17 00:00:00 2001
From: xieofxie <xieofxie@126.com>
Date: Wed, 24 Jun 2026 16:04:20 +0800
Subject: [PATCH 1/3] add ov runtime

---
 src/winml/modelkit/commands/perf.py           |  99 ++++++-
 src/winml/modelkit/session/__init__.py        |   2 +
 .../session/openvino/openvino_session.py      | 246 ++++++++++++++++++
 tests/unit/commands/test_perf_cli.py          |  41 +++
 tests/unit/session/test_openvino_session.py   |  96 +++++++
 5 files changed, 480 insertions(+), 4 deletions(-)
 create mode 100644 src/winml/modelkit/session/openvino/openvino_session.py
 create mode 100644 tests/unit/session/test_openvino_session.py

diff --git a/src/winml/modelkit/commands/perf.py b/src/winml/modelkit/commands/perf.py
index 412b1e675..70a6dfcd8 100644
--- a/src/winml/modelkit/commands/perf.py
+++ b/src/winml/modelkit/commands/perf.py
@@ -92,6 +92,9 @@ class BenchmarkConfig:
     ep: EPNameOrAlias | None = None
     ep_options: dict[str, str] | None = None
     shape_config: dict | None = None
+    # Inference runtime backend: "ort" (ONNX Runtime, default) or "openvino"
+    # (OpenVINO Runtime on the raw ONNX file, for ORT-vs-OV comparison).
+    runtime: str = "ort"
 
 
 @dataclass
@@ -320,6 +323,54 @@ def effective_batch_size(
     return requested
 
 
+# =============================================================================
+# OpenVINO runner adapter
+# =============================================================================
+
+
+class _OpenVINOModel:
+    """Minimal single-session model backed by OpenVINO Runtime.
+
+    Exposes the same surface ``PerfBenchmark._run_single`` reads from a
+    ``WinMLPreTrainedModel`` (``io_config`` / ``device`` / ``ep_name`` /
+    ``task`` / ``running_model_path`` and an ``_session`` with
+    ``compile`` / ``run`` / ``perf``), delegating to an ``OpenVINOSession``.
+    This lets the shared benchmark engine drive OpenVINO without a
+    WinMLAutoModel build — OpenVINO reads the raw ONNX directly.
+    """
+
+    def __init__(
+        self,
+        onnx_path: Path,
+        device: str,
+        provider_options: dict[str, str] | None = None,
+    ) -> None:
+        from ..session.openvino.openvino_session import OpenVINOSession
+
+        self._session = OpenVINOSession(onnx_path, device=device, provider_options=provider_options)
+
+    @property
+    def io_config(self) -> dict:
+        return self._session.io_config
+
+    @property
+    def device(self) -> str:
+        return self._session.device
+
+    @property
+    def ep_name(self) -> EPName | None:
+        return self._session.ep_name
+
+    @property
+    def task(self) -> str | None:
+        # OpenVINO runs the raw ONNX with no task metadata.
+        return None
+
+    @property
+    def running_model_path(self) -> Path:
+        return self._session.running_model_path
+
+
 # =============================================================================
 # Benchmark Engine
 # =============================================================================
@@ -341,7 +392,7 @@ class PerfBenchmark:
     def __init__(self, config: BenchmarkConfig) -> None:
         """Initialize benchmark with configuration."""
         self.config = config
-        self._model: WinMLPreTrainedModel | WinMLCompositeModel | None = None
+        self._model: WinMLPreTrainedModel | WinMLCompositeModel | _OpenVINOModel | None = None
         self._inputs: dict[str, np.ndarray] | None = None
         self._effective_batch: int = config.batch_size
         self._memory: dict[str, float] | None = None
@@ -560,13 +611,31 @@ def _load_model(self) -> None:
         from ..config import WinMLBuildConfig
         from ..models import WinMLAutoModel
 
+        model_id = self.config.model_id
+        model_path = Path(model_id)
+        is_onnx = model_path.suffix.lower() == ".onnx"
+
+        # OpenVINO runner: read the raw ONNX directly (no build, no ORT EP
+        # resolution — OpenVINO is independent of ORT's execution providers).
+        # ONNX input only; the CLI guards non-ONNX inputs earlier.
+        if self.config.runtime == "openvino":
+            if not is_onnx:
+                raise ValueError(
+                    f"--runtime openvino requires an ONNX (.onnx) model file, got: {model_id}"
+                )
+            if not model_path.exists():
+                raise FileNotFoundError(f"ONNX file not found: {model_path}")
+            self._model = _OpenVINOModel(
+                model_path,
+                device=self.config.device,
+                provider_options=self.config.ep_options,
+            )
+            return
+
         # Resolve the concrete device + EP first so a bad combo fails fast,
         # before from_pretrained/from_onnx kick off the build pipeline.
         self._resolve_device_ep()
 
-        model_id = self.config.model_id
-        model_path = Path(model_id)
-        is_onnx = model_path.suffix.lower() == ".onnx"
         if is_onnx and not model_path.exists():
             # Surface a clear error for programmatic callers. The CLI guards
             # this earlier, but without this check from_pretrained would fall
@@ -1469,6 +1538,15 @@ def _run_simple_loop(
 @cli_utils.ep_options_option(
     optional_message="Applied to both HuggingFace model IDs and ONNX file inputs.",
 )
+@click.option(
+    "--runtime",
+    type=click.Choice(["ort", "openvino"]),
+    default="ort",
+    show_default=True,
+    help="Inference runtime backend. 'ort' = ONNX Runtime (InferenceSession). "
+    "'openvino' runs the raw ONNX file directly via OpenVINO Runtime for an "
+    "ORT-vs-OV comparison (ONNX input only; build/quant/--ep flags are ignored).",
+)
 @cli_utils.output_option(
     "Output JSON file path. Defaults to "
     "'~/.cache/winml/perf/<model_slug>[/<module_class>]/<timestamp>.json'."
@@ -1554,6 +1632,7 @@ def perf(
     precision: str,
     ep: EPNameOrAlias | None,
     ep_options: tuple[str, ...],
+    runtime: str,
     output: Path | None,
     batch_size: int,
     shape_config_path: Path | None,
@@ -1613,6 +1692,17 @@ def perf(
 
     hf_model = model
 
+    # OpenVINO runner: ONNX input only, and no per-module path (submodule
+    # discovery walks a live nn.Module graph, which OpenVINO never builds).
+    if runtime == "openvino":
+        if Path(hf_model).suffix.lower() != ".onnx":
+            raise click.UsageError(
+                "--runtime openvino requires an ONNX (.onnx) model file "
+                f"(not a HuggingFace model ID), got: {hf_model}"
+            )
+        if module_class:
+            raise click.UsageError("--runtime openvino does not support --module benchmarking.")
+
     # Apply build config defaults (CLI explicit options take precedence).
     # Read raw JSON so missing keys are distinguishable from dataclass defaults.
     if config_file is not None:
@@ -1731,6 +1821,7 @@ def perf(
         ep=ep,
         ep_options=ep_provider_options,
         shape_config=shape_config,
+        runtime=runtime,
     )
 
     try:
diff --git a/src/winml/modelkit/session/__init__.py b/src/winml/modelkit/session/__init__.py
index 5148da0b3..74bd60ee4 100644
--- a/src/winml/modelkit/session/__init__.py
+++ b/src/winml/modelkit/session/__init__.py
@@ -10,6 +10,7 @@
 from .monitor.openvino_monitor import OpenVinoMonitor
 from .monitor.qnn_monitor import QNNMonitor
 from .monitor.vitisai_monitor import VitisAIMonitor
+from .openvino.openvino_session import OpenVINOSession
 from .qairt.qairt_session import WinMLQairtSession
 from .session import InferenceError, SessionState, WinMLSession
 from .stats import PerfStats
@@ -20,6 +21,7 @@
     "HWMonitor",
     "InferenceError",
     "NullEPMonitor",
+    "OpenVINOSession",
     "OpenVinoMonitor",
     "PerfStats",
     "QNNMonitor",
diff --git a/src/winml/modelkit/session/openvino/openvino_session.py b/src/winml/modelkit/session/openvino/openvino_session.py
new file mode 100644
index 000000000..729c8bf22
--- /dev/null
+++ b/src/winml/modelkit/session/openvino/openvino_session.py
@@ -0,0 +1,246 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------
+"""OpenVINOSession - inference session backed by OpenVINO Runtime.
+
+Mirrors the subset of :class:`WinMLSession`'s surface that the perf
+benchmark engine relies on (``compile`` / ``run`` / ``perf`` plus the
+``io_config`` / ``device`` / ``ep_name`` / ``running_model_path``
+properties), so a ``winml perf`` run can swap ONNX Runtime for OpenVINO on
+the same ONNX file for an apples-to-apples ORT-vs-OV comparison.
+
+ONNX input only: OpenVINO reads the provided ``.onnx`` directly (no
+quantize / optimize / compile build).
+"""
+
+from __future__ import annotations
+
+import logging
+from contextlib import contextmanager
+from pathlib import Path
+from typing import TYPE_CHECKING, Any, cast
+
+import numpy as np
+
+from ...core.onnx_utils import get_io_config
+from ..stats import PerfStats
+
+
+if TYPE_CHECKING:
+    from collections.abc import Generator
+
+    from ...utils.constants import EPName
+
+
+logger = logging.getLogger(__name__)
+
+
+# ModelKit device policy -> OpenVINO device name. "auto" maps to OpenVINO's
+# AUTO plugin, which picks the best available device at compile time.
+_OV_DEVICE_MAP = {
+    "cpu": "CPU",
+    "gpu": "GPU",
+    "npu": "NPU",
+    "auto": "AUTO",
+}
+
+# Canonical EP name surfaced to perf reporting/JSON so OpenVINO runs are
+# labeled consistently with the ORT path's provider names.
+_OV_EP_NAME = "OpenVINOExecutionProvider"
+
+
+class OpenVINOSession:
+    """ONNX inference session backed by OpenVINO Runtime.
+
+    One session loads and runs a single ONNX file on one OpenVINO device.
+    The runtime is imported lazily in :meth:`compile` so importing this
+    module never pulls in the ``openvino`` package.
+    """
+
+    def __init__(
+        self,
+        onnx_path: str | Path,
+        device: str = "auto",
+        provider_options: dict[str, str] | None = None,
+    ) -> None:
+        """Initialize the session.
+
+        Args:
+            onnx_path: Path to the ONNX model file.
+            device: Target device policy ("auto", "cpu", "gpu", "npu"),
+                mapped to an OpenVINO device name.
+            provider_options: OpenVINO config properties forwarded to
+                ``Core.compile_model`` (e.g. ``{"PERFORMANCE_HINT":
+                "LATENCY"}``). Empty/None means OpenVINO defaults.
+        """
+        self._onnx_path = Path(onnx_path)
+        if not self._onnx_path.exists():
+            raise FileNotFoundError(f"ONNX model not found: {onnx_path}")
+
+        self._device = str(device).lower()
+        self._provider_options = dict(provider_options) if provider_options else {}
+
+        # Populated by compile()
+        self._compiled: Any = None
+        self._ov_device: str | None = None
+
+        # Cached I/O metadata (lazy-loaded)
+        self._io_config: dict | None = None
+
+        # Performance tracking (enabled via perf() context manager)
+        self._perf_stats: PerfStats | None = None
+
+        # OpenVINO reads the original ONNX directly, so the running model is
+        # always the input path (no EPContext / compiled artifact on disk).
+        self._running_model_path = self._onnx_path
+
+        logger.info("OpenVINOSession initialized: %s", onnx_path)
+
+    def compile(self) -> None:
+        """Read and compile the ONNX model with OpenVINO Runtime.
+
+        Idempotent: compiles once per session.
+        """
+        if self._compiled is not None:
+            logger.debug("Already compiled for %s", self._device)
+            return
+
+        import openvino as ov
+
+        requested = _OV_DEVICE_MAP.get(self._device, self._device.upper())
+        core = ov.Core()
+        model = core.read_model(str(self._onnx_path))
+        self._compiled = core.compile_model(model, requested, self._provider_options)
+
+        # AUTO resolves to a concrete device at compile time; record what was
+        # actually selected for display, falling back to the requested name.
+        try:
+            devices = self._compiled.get_property("EXECUTION_DEVICES")
+            self._ov_device = devices[0] if devices else requested
+        except Exception:
+            self._ov_device = requested
+
+        logger.info(
+            "OpenVINO compiled model on %s (requested %s), provider_options=%s",
+            self._ov_device,
+            requested,
+            self._provider_options,
+        )
+
+    def run(self, inputs: dict[str, Any]) -> dict[str, np.ndarray]:
+        """Run inference.
+
+        Auto-compiles on first call. Validates and dtype-coerces inputs.
+
+        Args:
+            inputs: Input tensors (numpy arrays or torch tensors) keyed by
+                input name.
+
+        Returns:
+            Dict of output name -> numpy array.
+        """
+        if not inputs:
+            raise ValueError("inputs cannot be empty")
+
+        if self._compiled is None:
+            self.compile()
+        compiled = self._compiled
+
+        ov_inputs = self._prepare_inputs(inputs)
+
+        if self._perf_stats is not None:
+            result = self._perf_stats.record(lambda: compiled(ov_inputs))
+        else:
+            result = compiled(ov_inputs)
+
+        # Map outputs back to graph order. Index keying avoids OpenVINO
+        # output-name normalization mismatches (the order of model.outputs
+        # matches the ONNX graph output order get_io_config reads).
+        out_names = self.io_config["output_names"]
+        return {name: np.asarray(result[i]) for i, name in enumerate(out_names)}
+
+    def _prepare_inputs(self, inputs: dict[str, Any]) -> dict[str, np.ndarray]:
+        """Convert inputs to numpy arrays and enforce model input dtypes."""
+        io_cfg = self.io_config
+        name_to_type = dict(zip(io_cfg["input_names"], io_cfg["input_types"], strict=True))
+
+        ov_inputs: dict[str, np.ndarray] = {}
+        for name, value in inputs.items():
+            if hasattr(value, "numpy"):  # torch.Tensor
+                arr = value.cpu().numpy()
+            elif isinstance(value, np.ndarray):
+                arr = value
+            else:
+                arr = np.asarray(value)
+
+            expected_type = name_to_type.get(name)
+            if expected_type is not None and arr.dtype != expected_type:
+                arr = arr.astype(expected_type)
+
+            ov_inputs[name] = arr
+
+        return ov_inputs
+
+    @contextmanager
+    def perf(self, warmup: int = 0) -> Generator[PerfStats, None, None]:
+        """Context manager for scoped performance tracking.
+
+        Mirrors :meth:`WinMLSession.perf` so the shared perf engine drives
+        either backend identically.
+
+        Args:
+            warmup: Number of initial samples to exclude from statistics.
+
+        Yields:
+            PerfStats collecting timing data within the context.
+        """
+        self._perf_stats = PerfStats(warmup=warmup)
+        try:
+            yield self._perf_stats
+        finally:
+            self._perf_stats = None
+
+    @property
+    def io_config(self) -> dict:
+        """ONNX I/O metadata (lazy-loaded, cached).
+
+        Reuses the same extraction path as the ORT session so input/output
+        names, shapes and dtypes are identical across runtimes.
+        """
+        if self._io_config is None:
+            from ...onnx import load_onnx
+            from ..session import WinMLSession
+
+            model = load_onnx(self._onnx_path, load_weights=False, validate=False)
+            self._io_config = get_io_config(model)
+            # Reuse the operator-schema-based precision estimate (no
+            # architecture assumptions) so reports match the ORT path.
+            self._io_config["precision"] = WinMLSession._get_precision(model)
+        return self._io_config
+
+    @property
+    def device(self) -> str:
+        """Target device label for this session."""
+        return self._device
+
+    @property
+    def ep_name(self) -> EPName | None:
+        """Canonical EP name, or None before compile.
+
+        Returns ``"OpenVINOExecutionProvider"`` once compiled so perf
+        reporting labels OpenVINO runs consistently with ORT provider names.
+        """
+        if self._compiled is None:
+            return None
+        return cast("EPName", _OV_EP_NAME)
+
+    @property
+    def running_model_path(self) -> Path:
+        """Path to the ONNX model OpenVINO loads (always the input path)."""
+        return self._running_model_path
+
+    @property
+    def is_compiled(self) -> bool:
+        """Whether the model has been compiled."""
+        return self._compiled is not None
diff --git a/tests/unit/commands/test_perf_cli.py b/tests/unit/commands/test_perf_cli.py
index ba705d0a6..8b3fba082 100644
--- a/tests/unit/commands/test_perf_cli.py
+++ b/tests/unit/commands/test_perf_cli.py
@@ -385,6 +385,47 @@ def test_cli_onnx_not_found_error(self, runner: CliRunner, tmp_path: Path) -> No
         assert result.exit_code != 0
         assert "not found" in result.output.lower()
 
+    def test_cli_openvino_requires_onnx(self, runner: CliRunner) -> None:
+        """--runtime openvino rejects a non-ONNX (HF) model id."""
+        result = runner.invoke(
+            perf,
+            ["-m", "microsoft/resnet-50", "--runtime", "openvino"],
+            obj={},
+        )
+        assert result.exit_code != 0
+        assert "requires an onnx" in result.output.lower()
+
+    def test_cli_openvino_rejects_module(self, runner: CliRunner, tmp_path: Path) -> None:
+        """--runtime openvino does not support per-module benchmarking."""
+        onnx_file = tmp_path / "model.onnx"
+        onnx_file.write_bytes(b"fake onnx")
+        result = runner.invoke(
+            perf,
+            ["-m", str(onnx_file), "--runtime", "openvino", "--module", "BertAttention"],
+            obj={},
+        )
+        assert result.exit_code != 0
+        assert "module" in result.output.lower()
+
+    def test_cli_openvino_routes_to_adapter(self, runner: CliRunner, tmp_path: Path) -> None:
+        """--runtime openvino builds an _OpenVINOModel, bypassing WinMLAutoModel."""
+        onnx_file = tmp_path / "model.onnx"
+        onnx_file.write_bytes(b"fake onnx")
+
+        config = BenchmarkConfig(model_id=str(onnx_file), device="cpu", runtime="openvino")
+        benchmark = PerfBenchmark(config)
+
+        mock_session = MagicMock()
+        with patch(
+            "winml.modelkit.session.openvino.openvino_session.OpenVINOSession",
+            return_value=mock_session,
+        ) as mock_ov:
+            benchmark._load_model()
+
+        mock_ov.assert_called_once()
+        assert benchmark._model is not None
+        assert benchmark._model.task is None
+
     def test_onnx_load_model_passes_ep(self, tmp_path: Path) -> None:
         """EP argument should be forwarded to from_onnx."""
         onnx_file = tmp_path / "model.onnx"
diff --git a/tests/unit/session/test_openvino_session.py b/tests/unit/session/test_openvino_session.py
new file mode 100644
index 000000000..a6f297bc0
--- /dev/null
+++ b/tests/unit/session/test_openvino_session.py
@@ -0,0 +1,96 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------
+"""Tests for OpenVINOSession (OpenVINO Runtime backend for perf).
+
+OpenVINO runs the raw ONNX directly on CPU, so these tests gate only on the
+``openvino`` package being importable (CPU is always available when it is) --
+not on the ORT OpenVINO EP, which is a different component.
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import numpy as np
+import pytest
+
+from winml.modelkit.session import OpenVINOSession
+
+
+if TYPE_CHECKING:
+    from pathlib import Path
+
+
+pytest.importorskip("openvino")
+
+# OpenVINO CPU plugin is always present when the package is installed.
+pytestmark = pytest.mark.openvino
+
+
+class TestOpenVINOSession:
+    """OpenVINOSession compile/run/perf surface on CPU."""
+
+    def test_io_config_matches_onnx(self, simple_matmul_onnx: Path) -> None:
+        session = OpenVINOSession(simple_matmul_onnx, device="cpu")
+        io = session.io_config
+        assert io["input_names"] == ["A"]
+        assert io["output_names"] == ["C"]
+        # input_value_ranges is ORT-only enrichment; precision is shared.
+        assert "precision" in io
+
+    def test_compile_is_idempotent(self, simple_matmul_onnx: Path) -> None:
+        session = OpenVINOSession(simple_matmul_onnx, device="cpu")
+        assert not session.is_compiled
+        assert session.ep_name is None
+        session.compile()
+        compiled = session._compiled
+        session.compile()  # second call is a no-op
+        assert session._compiled is compiled
+        assert session.is_compiled
+        assert session.ep_name == "OpenVINOExecutionProvider"
+
+    def test_run_produces_correct_output(
+        self, simple_matmul_onnx: Path, sample_input: dict[str, np.ndarray]
+    ) -> None:
+        session = OpenVINOSession(simple_matmul_onnx, device="cpu")
+        outputs = session.run(sample_input)
+        assert set(outputs) == {"C"}
+        assert outputs["C"].shape == (1, 4)
+        assert outputs["C"].dtype == np.float32
+
+    def test_run_auto_compiles(self, simple_matmul_onnx: Path) -> None:
+        session = OpenVINOSession(simple_matmul_onnx, device="cpu")
+        session.run({"A": np.zeros((1, 4), dtype=np.float32)})
+        assert session.is_compiled
+
+    def test_run_empty_inputs_raises(self, simple_matmul_onnx: Path) -> None:
+        session = OpenVINOSession(simple_matmul_onnx, device="cpu")
+        with pytest.raises(ValueError, match="inputs cannot be empty"):
+            session.run({})
+
+    def test_run_enforces_input_dtype(self, simple_matmul_onnx: Path) -> None:
+        """Float64 input is coerced to the model's float32 without error."""
+        session = OpenVINOSession(simple_matmul_onnx, device="cpu")
+        outputs = session.run({"A": np.ones((1, 4), dtype=np.float64)})
+        assert outputs["C"].dtype == np.float32
+
+    def test_perf_records_samples(self, simple_matmul_onnx: Path) -> None:
+        session = OpenVINOSession(simple_matmul_onnx, device="cpu")
+        inputs = {"A": np.zeros((1, 4), dtype=np.float32)}
+        with session.perf(warmup=2) as stats:
+            for _ in range(7):
+                session.run(inputs)
+        assert stats.total_count == 7
+        assert stats.count == 5  # warmup excluded
+        assert stats.mean_ms > 0
+
+    def test_running_model_path_is_input(self, simple_matmul_onnx: Path) -> None:
+        session = OpenVINOSession(simple_matmul_onnx, device="cpu")
+        session.compile()
+        assert session.running_model_path == simple_matmul_onnx
+
+    def test_missing_file_raises(self, tmp_path: Path) -> None:
+        with pytest.raises(FileNotFoundError):
+            OpenVINOSession(tmp_path / "does_not_exist.onnx")

From 9c7d8771191b7800cb06c264c1f017cb13528c35 Mon Sep 17 00:00:00 2001
From: xieofxie <xieofxie@126.com>
Date: Wed, 24 Jun 2026 16:24:43 +0800
Subject: [PATCH 2/3] feat(perf): type --runtime as RuntimeName and pre-check
 OV device

- Add RuntimeName Literal + RUNTIME_NAMES to constants (mirrors CompilerName),
  thread it through BenchmarkConfig and the perf CLI instead of bare str.
- Fail fast in OpenVINOSession.compile() when the requested device is absent
  from Core().available_devices, with a readable message instead of a raw
  backend stack trace. AUTO is exempt; matches plain (GPU) and indexed (GPU.0)
  device names.
- Add a hardware-independent unit test for the unavailable-device path.
---
 src/winml/modelkit/commands/perf.py                |  8 ++++----
 .../modelkit/session/openvino/openvino_session.py  | 14 ++++++++++++++
 src/winml/modelkit/utils/constants.py              |  9 +++++++++
 tests/unit/session/test_openvino_session.py        |  8 ++++++++
 4 files changed, 35 insertions(+), 4 deletions(-)

diff --git a/src/winml/modelkit/commands/perf.py b/src/winml/modelkit/commands/perf.py
index 70a6dfcd8..fedbafc1e 100644
--- a/src/winml/modelkit/commands/perf.py
+++ b/src/winml/modelkit/commands/perf.py
@@ -28,7 +28,7 @@
 from rich.table import Table
 
 from ..utils import cli as cli_utils
-from ..utils.constants import EPName, EPNameOrAlias
+from ..utils.constants import RUNTIME_NAMES, EPName, EPNameOrAlias, RuntimeName
 from ..utils.logging import configure_logging
 from ._live_chart import LiveMonitorDisplay
 
@@ -94,7 +94,7 @@ class BenchmarkConfig:
     shape_config: dict | None = None
     # Inference runtime backend: "ort" (ONNX Runtime, default) or "openvino"
     # (OpenVINO Runtime on the raw ONNX file, for ORT-vs-OV comparison).
-    runtime: str = "ort"
+    runtime: RuntimeName = "ort"
 
 
 @dataclass
@@ -1540,7 +1540,7 @@ def _run_simple_loop(
 )
 @click.option(
     "--runtime",
-    type=click.Choice(["ort", "openvino"]),
+    type=click.Choice(list(RUNTIME_NAMES)),
     default="ort",
     show_default=True,
     help="Inference runtime backend. 'ort' = ONNX Runtime (InferenceSession). "
@@ -1632,7 +1632,7 @@ def perf(
     precision: str,
     ep: EPNameOrAlias | None,
     ep_options: tuple[str, ...],
-    runtime: str,
+    runtime: RuntimeName,
     output: Path | None,
     batch_size: int,
     shape_config_path: Path | None,
diff --git a/src/winml/modelkit/session/openvino/openvino_session.py b/src/winml/modelkit/session/openvino/openvino_session.py
index 729c8bf22..cac20dd0a 100644
--- a/src/winml/modelkit/session/openvino/openvino_session.py
+++ b/src/winml/modelkit/session/openvino/openvino_session.py
@@ -110,6 +110,20 @@ def compile(self) -> None:
 
         requested = _OV_DEVICE_MAP.get(self._device, self._device.upper())
         core = ov.Core()
+
+        # Friendly fail-fast when the requested device isn't present, instead of
+        # a raw backend stack trace from compile_model. AUTO is a virtual plugin
+        # (always selectable); concrete devices may be listed plain ("GPU") or
+        # indexed ("GPU.0"), so match on the base name.
+        available = core.available_devices
+        if requested != "AUTO" and not any(
+            dev == requested or dev.startswith(f"{requested}.") for dev in available
+        ):
+            raise RuntimeError(
+                f"OpenVINO device '{requested}' (from --device {self._device}) is "
+                f"not available. OpenVINO sees: {available}"
+            )
+
         model = core.read_model(str(self._onnx_path))
         self._compiled = core.compile_model(model, requested, self._provider_options)
 
diff --git a/src/winml/modelkit/utils/constants.py b/src/winml/modelkit/utils/constants.py
index 4e85aaca6..6356a1306 100644
--- a/src/winml/modelkit/utils/constants.py
+++ b/src/winml/modelkit/utils/constants.py
@@ -64,6 +64,15 @@
 COMPILER_NAMES: tuple[CompilerName, ...] = get_args(CompilerName)
 
 
+# Inference runtime backends selectable via ``--runtime`` (see commands/perf.py):
+#   "ort"      -> ONNX Runtime (ort.InferenceSession via WinMLSession, default)
+#   "openvino" -> OpenVINO Runtime on the raw ONNX (ORT-vs-OV comparison)
+RuntimeName = Literal["ort", "openvino"]
+
+# Runtime-iterable form of ``RuntimeName`` (e.g. for the CLI choice list).
+RUNTIME_NAMES: tuple[RuntimeName, ...] = get_args(RuntimeName)
+
+
 # Supported execution providers — derived from the ``EPName`` Literal above so
 # that ``utils.constants`` stays leaf-level (no import dependency on sysinfo).
 # Membership parity with ``sysinfo.device._EP_DEVICE_MAP`` is enforced by
diff --git a/tests/unit/session/test_openvino_session.py b/tests/unit/session/test_openvino_session.py
index a6f297bc0..8d514ce0f 100644
--- a/tests/unit/session/test_openvino_session.py
+++ b/tests/unit/session/test_openvino_session.py
@@ -94,3 +94,11 @@ def test_running_model_path_is_input(self, simple_matmul_onnx: Path) -> None:
     def test_missing_file_raises(self, tmp_path: Path) -> None:
         with pytest.raises(FileNotFoundError):
             OpenVINOSession(tmp_path / "does_not_exist.onnx")
+
+    def test_unavailable_device_raises_friendly_error(self, simple_matmul_onnx: Path) -> None:
+        """A device absent from Core().available_devices fails fast with a
+        readable message instead of a raw backend stack trace. Uses a bogus
+        device name so the test is hardware-independent."""
+        session = OpenVINOSession(simple_matmul_onnx, device="bogus")
+        with pytest.raises(RuntimeError, match=r"not available\. OpenVINO sees"):
+            session.compile()

From d09a9388b5fb259c25355156d188b26cec005523 Mon Sep 17 00:00:00 2001
From: xieofxie <xieofxie@126.com>
Date: Wed, 24 Jun 2026 16:35:35 +0800
Subject: [PATCH 3/3] feat(perf): prompt to install winml-cli[openvino] when
 OpenVINO is missing

Wrap the openvino import in OpenVINOSession.compile() so an absent package
raises a clear install hint (pip install winml-cli[openvino]) instead of a
bare ModuleNotFoundError. Add a unit test that simulates the missing module.
---
 .../modelkit/session/openvino/openvino_session.py   |  8 +++++++-
 tests/unit/session/test_openvino_session.py         | 13 +++++++++++++
 2 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/src/winml/modelkit/session/openvino/openvino_session.py b/src/winml/modelkit/session/openvino/openvino_session.py
index cac20dd0a..f841fdb10 100644
--- a/src/winml/modelkit/session/openvino/openvino_session.py
+++ b/src/winml/modelkit/session/openvino/openvino_session.py
@@ -106,7 +106,13 @@ def compile(self) -> None:
             logger.debug("Already compiled for %s", self._device)
             return
 
-        import openvino as ov
+        try:
+            import openvino as ov
+        except ImportError as exc:
+            raise ImportError(
+                "OpenVINO is not installed but --runtime openvino was requested. "
+                "Install it with: pip install winml-cli[openvino]"
+            ) from exc
 
         requested = _OV_DEVICE_MAP.get(self._device, self._device.upper())
         core = ov.Core()
diff --git a/tests/unit/session/test_openvino_session.py b/tests/unit/session/test_openvino_session.py
index 8d514ce0f..7dcf7b06f 100644
--- a/tests/unit/session/test_openvino_session.py
+++ b/tests/unit/session/test_openvino_session.py
@@ -95,6 +95,19 @@ def test_missing_file_raises(self, tmp_path: Path) -> None:
         with pytest.raises(FileNotFoundError):
             OpenVINOSession(tmp_path / "does_not_exist.onnx")
 
+    def test_missing_openvino_prompts_install(
+        self, simple_matmul_onnx: Path, monkeypatch: pytest.MonkeyPatch
+    ) -> None:
+        """When the openvino package is absent, compile() raises a clear
+        install hint instead of a bare ModuleNotFoundError. Simulates absence
+        by shadowing the module so ``import openvino`` fails."""
+        import sys
+
+        monkeypatch.setitem(sys.modules, "openvino", None)
+        session = OpenVINOSession(simple_matmul_onnx, device="cpu")
+        with pytest.raises(ImportError, match=r"pip install winml-cli\[openvino\]"):
+            session.compile()
+
     def test_unavailable_device_raises_friendly_error(self, simple_matmul_onnx: Path) -> None:
         """A device absent from Core().available_devices fails fast with a
         readable message instead of a raw backend stack trace. Uses a bogus