diff --git a/src/winml/modelkit/commands/eval.py b/src/winml/modelkit/commands/eval.py
index 02779f270..fb13096c2 100644
--- a/src/winml/modelkit/commands/eval.py
+++ b/src/winml/modelkit/commands/eval.py
@@ -257,6 +257,20 @@ def eval(
             cfg.precision,
         )
 
+    # The build-pipeline flags only take effect when eval rebuilds the model.
+    # With a pre-built ONNX path and skip_build (the default), they are no-ops
+    # forwarded to from_onnx, so warn the user that they were ignored — mirrors
+    # the --precision warning above. Shared with perf via utils/cli.py.
+    build_flags_warning = cli_utils.ignored_build_flags_warning(
+        skip_build_onnx=cfg.model_path is not None and cfg.skip_build,
+        quant=cfg.quant,
+        optimize=cfg.optimize,
+        analyze=cfg.analyze,
+        max_optim_iterations=cfg.max_optim_iterations,
+    )
+    if build_flags_warning:
+        logger.warning(build_flags_warning)
+
     logger.debug("Effective eval config: %s", cfg.to_dict())
 
     json_mode = output_format == "json"
diff --git a/src/winml/modelkit/commands/perf.py b/src/winml/modelkit/commands/perf.py
index 412b1e675..d64dda258 100644
--- a/src/winml/modelkit/commands/perf.py
+++ b/src/winml/modelkit/commands/perf.py
@@ -1749,6 +1749,18 @@ def perf(
                     "pre-exported ONNX files (shapes are baked into the model)."
                 )
                 config.shape_config = None
+            # Build-pipeline flags are forwarded to from_onnx but no-op when the
+            # build is skipped (the default). Warn so the silent no-op is visible
+            # — shared detection with eval via utils/cli.py.
+            build_flags_warning = cli_utils.ignored_build_flags_warning(
+                skip_build_onnx=skip_build,
+                quant=quant,
+                optimize=optimize,
+                analyze=analyze,
+                max_optim_iterations=max_optim_iterations,
+            )
+            if build_flags_warning:
+                console.print(f"[yellow]Warning:[/yellow] {build_flags_warning}")
             console.print(f"[dim]Benchmarking ONNX:[/dim] {model_path}")
         else:
             if precision != "auto":
diff --git a/src/winml/modelkit/optim/api.py b/src/winml/modelkit/optim/api.py
index f403d8c4c..8993d76f7 100644
--- a/src/winml/modelkit/optim/api.py
+++ b/src/winml/modelkit/optim/api.py
@@ -160,19 +160,6 @@ def _convert_to_kwargs(config: dict[str, Any], all_caps: dict[str, Any]) -> dict
     return result
 
 
-def _hack_inject_quant_preprocess_metadata(model: onnx.ModelProto) -> None:
-    """Inject metadata that signals pre-processing was done.
-
-    Suppresses the ORT quantization warning:
-    'Please consider to run pre-processing before quantization.'
-    """
-    metadata = {"onnx.quant.pre_process": "onnxruntime.quant"}
-    if model.metadata_props:
-        for prop in model.metadata_props:
-            metadata[prop.key] = prop.value
-    onnx.helper.set_model_props(model, metadata)
-
-
 def optimize_onnx(
     model: str | Path | onnx.ModelProto,
     output: str | Path | None = None,
@@ -272,9 +259,6 @@ def optimize_onnx(
     optimized_model = optimizer.optimize(loaded_model, **optimizer_kwargs)
     optimized_model = optimizer.optimize(optimized_model, **optimizer_kwargs)
 
-    # Step 9.5: Inject quant pre-processing metadata to suppress ORT warning
-    _hack_inject_quant_preprocess_metadata(optimized_model)
-
     # Step 10: Save if output path provided
     if output is not None:
         output_path = Path(output)
diff --git a/src/winml/modelkit/quant/quantizer.py b/src/winml/modelkit/quant/quantizer.py
index c562599de..fe7a32dea 100644
--- a/src/winml/modelkit/quant/quantizer.py
+++ b/src/winml/modelkit/quant/quantizer.py
@@ -152,13 +152,20 @@ def quantize_onnx(
             extra_options=extra_options,
         )
 
-        # Step 2: Capture metadata before ORT quantization (it rebuilds the graph)
+        # Step 2: Load the input model, capture its metadata snapshot (ORT
+        # rebuilds the graph during quantization, so we restore afterwards),
+        # and tag it as pre-processed so quantize_static() does not emit the
+        # "run pre-processing before quantization" warning.  We hand this
+        # in-memory ModelProto to ORT directly rather than mutating the user's
+        # input file on disk.
+        from onnxruntime.quantization.quant_utils import add_pre_process_metadata
+
         from ..onnx import capture_metadata, load_onnx, restore_metadata, save_onnx
         from .qdq_fix import fix_qdq_dtype_info
 
-        pre_quant_model = load_onnx(model_path, load_weights=False, validate=False)
-        metadata_snapshot = capture_metadata(pre_quant_model)
-        del pre_quant_model
+        input_model = load_onnx(model_path, validate=False)
+        metadata_snapshot = capture_metadata(input_model)
+        add_pre_process_metadata(input_model)
 
         # Step 3: Apply quantization
         if use_external_data:
@@ -171,9 +178,8 @@ def quantize_onnx(
         # output directory rather than the process CWD.  Without this, a stale
         # .onnx.data sidecar in the process CWD from a previous build triggers
         # a false-positive FileExistsError even when the output dir is clean.
-        # Use absolute paths so the chdir does not break relative input/output
+        # Use an absolute output path so the chdir does not break its
         # resolution.  output_path.parent is guaranteed to exist (caller mkdir).
-        abs_model_input = str(Path(model_path).resolve())
         abs_model_output = str(Path(output_path).resolve())
         # Remove stale output artifacts from a previous build.  ORT/onnx refuse
         # to overwrite an existing external-data sidecar (e.g. quantized.onnx.data),
@@ -187,7 +193,7 @@ def quantize_onnx(
         try:
             os.chdir(output_path.parent)
             quantize(
-                model_input=abs_model_input,
+                model_input=input_model,
                 model_output=abs_model_output,
                 quant_config=qdq_config,
             )
diff --git a/src/winml/modelkit/utils/cli.py b/src/winml/modelkit/utils/cli.py
index 185751fb3..e0ef63d87 100644
--- a/src/winml/modelkit/utils/cli.py
+++ b/src/winml/modelkit/utils/cli.py
@@ -631,9 +631,9 @@ def max_optim_iterations_option(optional_message: str | None = None) -> Callable
     Returns:
         Decorator function.
     """
-    base_help = "Maximum autoconf re-optimization rounds (default: 3). --no-analyze sets this to 0."
+    base_help = "Maximum autoconf re-optimization rounds (default: 3). --no-analyze sets this to 0"
     if optional_message:
-        base_help = f"{base_help} {optional_message}"
+        base_help = f"{base_help}. {optional_message}"
     return click.option(
         "--max-optim-iterations",
         "max_optim_iterations",
@@ -676,6 +676,54 @@ def build_pipeline_extra_kwargs(
     return extra
 
 
+def ignored_build_flags_warning(
+    *,
+    skip_build_onnx: bool,
+    quant: bool = True,
+    optimize: bool = True,
+    analyze: bool = True,
+    max_optim_iterations: int | None = None,
+) -> str | None:
+    """Build a warning for build-pipeline flags that are no-ops on a pre-built ONNX.
+
+    Commands that accept a pre-built ``.onnx`` input (``eval``, ``perf``) forward
+    ``--no-quant``/``--no-optimize``/``--no-analyze``/``--max-optim-iterations`` to
+    ``from_onnx``, but with ``skip_build`` (the default) no build runs, so those
+    toggles silently take no effect. This returns a message naming the flags the
+    user actually set (or ``None`` when nothing was set or a build will run), so
+    callers can surface it through their own logger/console — mirroring the
+    ``--precision``-ignored warning.
+
+    Args:
+        skip_build_onnx: True when the input is a pre-built ONNX *and* the build
+            is skipped (the precondition under which the flags are no-ops).
+        quant/optimize/analyze: Enabled-semantics toggles (False = user passed
+            the ``--no-*`` form).
+        max_optim_iterations: Explicit value, or ``None`` when left at default.
+
+    Returns:
+        Warning message, or ``None`` if no ignored flags apply.
+    """
+    if not skip_build_onnx:
+        return None
+    ignored = [
+        flag
+        for flag, was_set in (
+            ("--no-quant", not quant),
+            ("--no-optimize", not optimize),
+            ("--no-analyze", not analyze),
+            ("--max-optim-iterations", max_optim_iterations is not None),
+        )
+        if was_set
+    ]
+    if not ignored:
+        return None
+    return (
+        f"{', '.join(ignored)} ignored for pre-built ONNX inputs "
+        "(no build runs; pass --no-skip-build to rebuild)."
+    )
+
+
 def allow_unsupported_nodes_option(optional_message: str | None = None) -> Callable[[F], F]:
     """Add shared --allow-unsupported-nodes option to a Click command.
 
diff --git a/tests/unit/commands/test_eval.py b/tests/unit/commands/test_eval.py
index 7f70a9322..92271e06f 100644
--- a/tests/unit/commands/test_eval.py
+++ b/tests/unit/commands/test_eval.py
@@ -716,6 +716,90 @@ def test_default_dataset_logs_warning(
         ), f"expected warning not found in {msgs!r}"
 
 
+# ---------------------------------------------------------------------------
+# Build-pipeline flags ignored for pre-built ONNX inputs
+# ---------------------------------------------------------------------------
+
+
+class TestPrebuiltOnnxIgnoredBuildFlags:
+    """A pre-built ONNX path with skip_build (the default) makes the build
+    flags (--no-quant/--no-optimize/--no-analyze/--max-optim-iterations)
+    no-ops, so the command warns they were ignored."""
+
+    @staticmethod
+    def _run(runner: CliRunner, args: list[str]):
+        """Invoke eval with ``evaluate`` stubbed so only the CLI front-half
+        (config resolution + warnings) runs. Returns the CliRunner result.
+
+        ``commands.eval`` imports ``evaluate`` lazily via ``from ..eval import
+        evaluate``, so the stub is installed on the ``winml.modelkit.eval``
+        package where that import resolves it."""
+        from winml.modelkit.commands.eval import eval as eval_cmd
+
+        with (
+            patch("winml.modelkit.eval.evaluate", return_value=object()),
+            patch("winml.modelkit.commands.eval._write_and_display", return_value=None),
+        ):
+            return runner.invoke(eval_cmd, args, obj={"debug": False})
+
+    def test_no_quant_warns_for_prebuilt_onnx(
+        self,
+        runner: CliRunner,
+        onnx_file,
+        caplog,
+    ):
+        import logging as _logging
+
+        with caplog.at_level(_logging.WARNING, logger="winml.modelkit.commands.eval"):
+            result = self._run(
+                runner,
+                [
+                    "-m",
+                    str(onnx_file),
+                    "--model-id",
+                    "some/model",
+                    "--task",
+                    "image-classification",
+                    "--no-quant",
+                    "--max-optim-iterations",
+                    "5",
+                ],
+            )
+        assert result.exit_code == 0, result.output
+        msgs = [r.getMessage() for r in caplog.records]
+        assert any(
+            "--no-quant" in m and "--max-optim-iterations" in m and "pre-built ONNX" in m
+            for m in msgs
+        ), f"expected ignored-build-flags warning not found in {msgs!r}"
+
+    def test_no_warning_when_flags_left_default(
+        self,
+        runner: CliRunner,
+        onnx_file,
+        caplog,
+    ):
+        """Default build flags emit no ignored-flags warning."""
+        import logging as _logging
+
+        with caplog.at_level(_logging.WARNING, logger="winml.modelkit.commands.eval"):
+            result = self._run(
+                runner,
+                [
+                    "-m",
+                    str(onnx_file),
+                    "--model-id",
+                    "some/model",
+                    "--task",
+                    "image-classification",
+                ],
+            )
+        assert result.exit_code == 0, result.output
+        msgs = [r.getMessage() for r in caplog.records]
+        assert not any("ignored for pre-built ONNX inputs (no build runs" in m for m in msgs), (
+            f"unexpected ignored-build-flags warning in {msgs!r}"
+        )
+
+
 # ---------------------------------------------------------------------------
 # --format json
 # ---------------------------------------------------------------------------
diff --git a/tests/unit/commands/test_perf_cli.py b/tests/unit/commands/test_perf_cli.py
index ba705d0a6..eacff3dd9 100644
--- a/tests/unit/commands/test_perf_cli.py
+++ b/tests/unit/commands/test_perf_cli.py
@@ -374,6 +374,72 @@ def capture_config(config: BenchmarkConfig) -> MagicMock:
         assert "Benchmarking ONNX" in result.output
         assert captured["config"].shape_config is None
 
+    def test_cli_onnx_warns_ignored_build_flags(self, runner: CliRunner, tmp_path: Path) -> None:
+        """Build-pipeline flags are no-ops for a pre-built ONNX with skip_build,
+        so the CLI surfaces a warning naming the flags the user set."""
+        onnx_file = tmp_path / "model.onnx"
+        onnx_file.write_bytes(b"fake onnx")
+
+        def capture_config(_config: BenchmarkConfig) -> MagicMock:
+            mock = MagicMock()
+            mock.run.return_value = MagicMock()
+            return mock
+
+        with (
+            patch(
+                "winml.modelkit.commands.perf.PerfBenchmark",
+                side_effect=capture_config,
+            ),
+            patch("winml.modelkit.commands.perf.display_console_report"),
+            patch("winml.modelkit.commands.perf.write_json_report"),
+        ):
+            result = runner.invoke(
+                perf,
+                [
+                    "-m",
+                    str(onnx_file),
+                    "--no-quant",
+                    "--no-optimize",
+                    "-o",
+                    str(tmp_path / "out.json"),
+                ],
+                obj={},
+            )
+
+        assert result.exit_code == 0, result.output
+        assert "--no-quant" in result.output
+        assert "--no-optimize" in result.output
+        assert "pre-built ONNX" in result.output
+
+    def test_cli_onnx_no_build_flag_warning_at_defaults(
+        self, runner: CliRunner, tmp_path: Path
+    ) -> None:
+        """No ignored-build-flags warning when the flags are left at defaults."""
+        onnx_file = tmp_path / "model.onnx"
+        onnx_file.write_bytes(b"fake onnx")
+
+        def capture_config(_config: BenchmarkConfig) -> MagicMock:
+            mock = MagicMock()
+            mock.run.return_value = MagicMock()
+            return mock
+
+        with (
+            patch(
+                "winml.modelkit.commands.perf.PerfBenchmark",
+                side_effect=capture_config,
+            ),
+            patch("winml.modelkit.commands.perf.display_console_report"),
+            patch("winml.modelkit.commands.perf.write_json_report"),
+        ):
+            result = runner.invoke(
+                perf,
+                ["-m", str(onnx_file), "-o", str(tmp_path / "out.json")],
+                obj={},
+            )
+
+        assert result.exit_code == 0, result.output
+        assert "ignored for pre-built ONNX inputs (no build runs" not in result.output
+
     def test_cli_onnx_not_found_error(self, runner: CliRunner, tmp_path: Path) -> None:
         """CLI with non-existent .onnx file should raise FileNotFoundError."""
         missing = tmp_path / "missing.onnx"
diff --git a/tests/unit/test_quantizer.py b/tests/unit/test_quantizer.py
index 8d2118df1..927956c87 100644
--- a/tests/unit/test_quantizer.py
+++ b/tests/unit/test_quantizer.py
@@ -39,6 +39,11 @@ class _FakeOrtQuantizationModule(ModuleType):
     QuantType: Any
     get_qdq_config: Any
     quantize: Any
+    quant_utils: ModuleType
+
+
+class _FakeOrtQuantUtilsModule(ModuleType):
+    add_pre_process_metadata: Any
 
 
 class _FakeOnnxModule(ModuleType):
@@ -74,9 +79,13 @@ def _install_fake_ort_quantization(monkeypatch: pytest.MonkeyPatch, *, quantize_
     )
     quant_module.get_qdq_config = lambda **_: SimpleNamespace(use_external_data_format=False)
     quant_module.quantize = quantize_impl
+    quant_utils_module = _FakeOrtQuantUtilsModule("onnxruntime.quantization.quant_utils")
+    quant_utils_module.add_pre_process_metadata = lambda _model: None
+    quant_module.quant_utils = quant_utils_module
     ort_module.quantization = quant_module
     monkeypatch.setitem(sys.modules, "onnxruntime", ort_module)
     monkeypatch.setitem(sys.modules, "onnxruntime.quantization", quant_module)
+    monkeypatch.setitem(sys.modules, "onnxruntime.quantization.quant_utils", quant_utils_module)
 
 
 def test_quantize_onnx_removes_only_exact_external_data_sidecar(
@@ -92,8 +101,12 @@ def test_quantize_onnx_removes_only_exact_external_data_sidecar(
     exact_sidecar.write_text("stale")
     extra_suffix_sidecar.write_text("keep")
 
-    def fake_quantize(*, model_input: str, model_output: str, quant_config) -> None:
-        assert model_input == str(model_path.resolve())
+    # The quantizer hands the in-memory input model (not the path) to ORT so it
+    # can tag it as pre-processed without mutating the user's input file.
+    input_model = SimpleNamespace()
+
+    def fake_quantize(*, model_input, model_output: str, quant_config) -> None:
+        assert model_input is input_model
         assert model_output == str(output_path.resolve())
         assert quant_config.use_external_data_format is True
         assert not exact_sidecar.exists()
@@ -106,7 +119,7 @@ def fake_quantize(*, model_input: str, model_output: str, quant_config) -> None:
     quantized_model = SimpleNamespace(
         graph=SimpleNamespace(node=[SimpleNamespace(op_type="QuantizeLinear")])
     )
-    load_results = [SimpleNamespace(), quantized_model]
+    load_results = [input_model, quantized_model]
     fake_onnx_module.capture_metadata = lambda _model: SimpleNamespace(node_count=0)
     fake_onnx_module.load_onnx = lambda *_args, **_kwargs: load_results.pop(0)
     fake_onnx_module.restore_metadata = lambda *_args, **_kwargs: None
diff --git a/tests/unit/utils/test_cli.py b/tests/unit/utils/test_cli.py
index 607a64d38..28e12f459 100644
--- a/tests/unit/utils/test_cli.py
+++ b/tests/unit/utils/test_cli.py
@@ -13,6 +13,7 @@
 from winml.modelkit.utils.cli import (
     analyze_option,
     build_pipeline_extra_kwargs,
+    ignored_build_flags_warning,
     max_optim_iterations_option,
     optimize_option,
     parse_ep_options,
@@ -212,9 +213,9 @@ class TestMaxOptimIterationsOption:
     """Tests for the shared max_optim_iterations_option() decorator."""
 
     @staticmethod
-    def _make_cmd() -> click.Command:
+    def _make_cmd(optional_message: str | None = None) -> click.Command:
         @click.command()
-        @max_optim_iterations_option()
+        @max_optim_iterations_option(optional_message=optional_message)
         def cmd(max_optim_iterations: int | None) -> None:
             click.echo(repr(max_optim_iterations))
 
@@ -230,6 +231,13 @@ def test_int_value(self) -> None:
     def test_rejects_non_int(self) -> None:
         assert CliRunner().invoke(self._make_cmd(), ["--max-optim-iterations", "x"]).exit_code != 0
 
+    def test_optional_message_appended_with_period_separator(self) -> None:
+        """optional_message joins the base help with '. ', matching sibling helpers."""
+        result = CliRunner().invoke(self._make_cmd(optional_message="Extra note."), ["--help"])
+        # Click reflows --help text, so assert on the joined token rather than line layout.
+        joined = " ".join(result.output.split())
+        assert "to 0. Extra note." in joined
+
 
 class TestBuildPipelineExtraKwargs:
     """Tests for the shared build_pipeline_extra_kwargs() translator."""
@@ -256,3 +264,53 @@ def test_no_analyze_wins_over_explicit_iterations(self) -> None:
     def test_combined_optimize_and_analyze(self) -> None:
         result = build_pipeline_extra_kwargs(optimize=False, analyze=False)
         assert result == {"skip_optimize": True, "hack_max_optim_iterations": 0}
+
+
+class TestIgnoredBuildFlagsWarning:
+    """Tests for the shared ignored_build_flags_warning() helper."""
+
+    def test_returns_none_when_build_runs(self) -> None:
+        """No warning when a build will run, even with flags set."""
+        assert (
+            ignored_build_flags_warning(
+                skip_build_onnx=False,
+                quant=False,
+                optimize=False,
+                analyze=False,
+                max_optim_iterations=5,
+            )
+            is None
+        )
+
+    def test_returns_none_when_no_flags_set(self) -> None:
+        """No warning when all flags are at their defaults."""
+        assert ignored_build_flags_warning(skip_build_onnx=True) is None
+
+    def test_names_each_set_flag(self) -> None:
+        msg = ignored_build_flags_warning(
+            skip_build_onnx=True,
+            quant=False,
+            optimize=False,
+            analyze=False,
+            max_optim_iterations=5,
+        )
+        assert msg is not None
+        for flag in ("--no-quant", "--no-optimize", "--no-analyze", "--max-optim-iterations"):
+            assert flag in msg
+        assert "pre-built ONNX" in msg
+        assert "--no-skip-build" in msg
+
+    def test_only_includes_set_flags(self) -> None:
+        """Unset flags are not named."""
+        msg = ignored_build_flags_warning(skip_build_onnx=True, quant=False)
+        assert msg is not None
+        assert "--no-quant" in msg
+        assert "--no-optimize" not in msg
+        assert "--no-analyze" not in msg
+        assert "--max-optim-iterations" not in msg
+
+    def test_max_optim_zero_counts_as_set(self) -> None:
+        """An explicit 0 is a user-set value (only None is the default)."""
+        msg = ignored_build_flags_warning(skip_build_onnx=True, max_optim_iterations=0)
+        assert msg is not None
+        assert "--max-optim-iterations" in msg