Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
101 changes: 96 additions & 5 deletions src/winml/modelkit/commands/perf.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
from rich.table import Table

from ..utils import cli as cli_utils
from ..utils.constants import EPName, EPNameOrAlias
from ..utils.constants import RUNTIME_NAMES, EPName, EPNameOrAlias, RuntimeName
from ..utils.logging import configure_logging
from ._live_chart import LiveMonitorDisplay

Expand Down Expand Up @@ -92,6 +92,9 @@ class BenchmarkConfig:
ep: EPNameOrAlias | None = None
ep_options: dict[str, str] | None = None
shape_config: dict | None = None
# Inference runtime backend: "ort" (ONNX Runtime, default) or "openvino"
# (OpenVINO Runtime on the raw ONNX file, for ORT-vs-OV comparison).
runtime: RuntimeName = "ort"


@dataclass
Expand Down Expand Up @@ -320,6 +323,54 @@ def effective_batch_size(
return requested


# =============================================================================
# OpenVINO runner adapter
# =============================================================================


class _OpenVINOModel:
"""Minimal single-session model backed by OpenVINO Runtime.

Exposes the same surface ``PerfBenchmark._run_single`` reads from a
``WinMLPreTrainedModel`` (``io_config`` / ``device`` / ``ep_name`` /
``task`` / ``running_model_path`` and an ``_session`` with
``compile`` / ``run`` / ``perf``), delegating to an ``OpenVINOSession``.
This lets the shared benchmark engine drive OpenVINO without a
WinMLAutoModel build — OpenVINO reads the raw ONNX directly.
"""

def __init__(
self,
onnx_path: Path,
device: str,
provider_options: dict[str, str] | None = None,
) -> None:
from ..session.openvino.openvino_session import OpenVINOSession

self._session = OpenVINOSession(onnx_path, device=device, provider_options=provider_options)

@property
def io_config(self) -> dict:
return self._session.io_config

@property
def device(self) -> str:
return self._session.device

@property
def ep_name(self) -> EPName | None:
return self._session.ep_name

@property
def task(self) -> str | None:
# OpenVINO runs the raw ONNX with no task metadata.
return None

@property
def running_model_path(self) -> Path:
return self._session.running_model_path


# =============================================================================
# Benchmark Engine
# =============================================================================
Expand All @@ -341,7 +392,7 @@ class PerfBenchmark:
def __init__(self, config: BenchmarkConfig) -> None:
"""Initialize benchmark with configuration."""
self.config = config
self._model: WinMLPreTrainedModel | WinMLCompositeModel | None = None
self._model: WinMLPreTrainedModel | WinMLCompositeModel | _OpenVINOModel | None = None
self._inputs: dict[str, np.ndarray] | None = None
self._effective_batch: int = config.batch_size
self._memory: dict[str, float] | None = None
Expand Down Expand Up @@ -560,13 +611,31 @@ def _load_model(self) -> None:
from ..config import WinMLBuildConfig
from ..models import WinMLAutoModel

model_id = self.config.model_id
model_path = Path(model_id)
is_onnx = model_path.suffix.lower() == ".onnx"

# OpenVINO runner: read the raw ONNX directly (no build, no ORT EP
# resolution — OpenVINO is independent of ORT's execution providers).
# ONNX input only; the CLI guards non-ONNX inputs earlier.
if self.config.runtime == "openvino":
if not is_onnx:
raise ValueError(
f"--runtime openvino requires an ONNX (.onnx) model file, got: {model_id}"
)
if not model_path.exists():
raise FileNotFoundError(f"ONNX file not found: {model_path}")
self._model = _OpenVINOModel(
model_path,
device=self.config.device,
provider_options=self.config.ep_options,
)
return

# Resolve the concrete device + EP first so a bad combo fails fast,
# before from_pretrained/from_onnx kick off the build pipeline.
self._resolve_device_ep()

model_id = self.config.model_id
model_path = Path(model_id)
is_onnx = model_path.suffix.lower() == ".onnx"
if is_onnx and not model_path.exists():
# Surface a clear error for programmatic callers. The CLI guards
# this earlier, but without this check from_pretrained would fall
Expand Down Expand Up @@ -1469,6 +1538,15 @@ def _run_simple_loop(
@cli_utils.ep_options_option(
optional_message="Applied to both HuggingFace model IDs and ONNX file inputs.",
)
@click.option(
"--runtime",
type=click.Choice(list(RUNTIME_NAMES)),
default="ort",
show_default=True,
help="Inference runtime backend. 'ort' = ONNX Runtime (InferenceSession). "
"'openvino' runs the raw ONNX file directly via OpenVINO Runtime for an "
"ORT-vs-OV comparison (ONNX input only; build/quant/--ep flags are ignored).",
)
@cli_utils.output_option(
"Output JSON file path. Defaults to "
"'~/.cache/winml/perf/<model_slug>[/<module_class>]/<timestamp>.json'."
Expand Down Expand Up @@ -1554,6 +1632,7 @@ def perf(
precision: str,
ep: EPNameOrAlias | None,
ep_options: tuple[str, ...],
runtime: RuntimeName,
output: Path | None,
batch_size: int,
shape_config_path: Path | None,
Expand Down Expand Up @@ -1613,6 +1692,17 @@ def perf(

hf_model = model

# OpenVINO runner: ONNX input only, and no per-module path (submodule
# discovery walks a live nn.Module graph, which OpenVINO never builds).
if runtime == "openvino":
if Path(hf_model).suffix.lower() != ".onnx":
raise click.UsageError(
"--runtime openvino requires an ONNX (.onnx) model file "
f"(not a HuggingFace model ID), got: {hf_model}"
)
if module_class:
raise click.UsageError("--runtime openvino does not support --module benchmarking.")

# Apply build config defaults (CLI explicit options take precedence).
# Read raw JSON so missing keys are distinguishable from dataclass defaults.
if config_file is not None:
Expand Down Expand Up @@ -1731,6 +1821,7 @@ def perf(
ep=ep,
ep_options=ep_provider_options,
shape_config=shape_config,
runtime=runtime,
)

try:
Expand Down
2 changes: 2 additions & 0 deletions src/winml/modelkit/session/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from .monitor.openvino_monitor import OpenVinoMonitor
from .monitor.qnn_monitor import QNNMonitor
from .monitor.vitisai_monitor import VitisAIMonitor
from .openvino.openvino_session import OpenVINOSession
from .qairt.qairt_session import WinMLQairtSession
from .session import InferenceError, SessionState, WinMLSession
from .stats import PerfStats
Expand All @@ -20,6 +21,7 @@
"HWMonitor",
"InferenceError",
"NullEPMonitor",
"OpenVINOSession",
"OpenVinoMonitor",
"PerfStats",
"QNNMonitor",
Expand Down
Loading
Loading