From 17d38f285f4df4c3a52228f224aaf21286f91a3f Mon Sep 17 00:00:00 2001 From: Yue Sun Date: Thu, 25 Jun 2026 12:48:33 +0800 Subject: [PATCH 1/2] test(quantize): use fake model for image-segmentation e2e calibration The image-segmentation quantize e2e test exported and calibrated the real nvidia/segformer-b0-finetuned-ade-512-512 model, whose heavy backbone caused random hangs on QNN hosts. Replace it with a tiny FP32 conv model that has identical segmentation I/O (pixel_values [batch, 3, H, W] -> logits [batch, num_labels, H/4, W/4]) and random weights, built via the new _build_fake_segmentation_onnx() helper. The ImageSegmentationDataset path is still fully exercised: the test keeps --model-name so the real image processor and dataset samples feed real pixel_values into calibration; only the heavy model is swapped out. --- tests/e2e/test_quantize_e2e.py | 86 +++++++++++++++++++++++++++++++--- 1 file changed, 80 insertions(+), 6 deletions(-) diff --git a/tests/e2e/test_quantize_e2e.py b/tests/e2e/test_quantize_e2e.py index d95288714..3452f08ad 100644 --- a/tests/e2e/test_quantize_e2e.py +++ b/tests/e2e/test_quantize_e2e.py @@ -62,6 +62,72 @@ def _build_tiny_onnx(path: Path, *, with_metadata: bool = True) -> None: onnx.save(model, str(path)) +# Image-segmentation I/O contract shared by HF semantic-segmentation exports +# (e.g. nvidia/segformer-*-ade-*): pixel_values [batch, 3, height, width] -> +# logits [batch, num_labels, height/4, width/4]. 150 = ADE20K class count. +_SEG_NUM_CHANNELS = 3 +_SEG_NUM_LABELS = 150 + + +def _build_fake_segmentation_onnx(path: Path) -> None: + """Build a tiny FP32 ONNX with segmentation-identical I/O and random weights. + + Replaces a real HF segmentation export (heavy backbone that can randomly + hang on QNN hosts during calibration) with a small conv stack that keeps the + same input/output contract: ``pixel_values`` [batch, 3, height, width] -> + ``logits`` [batch, num_labels, height/4, width/4]. Spatial dims stay dynamic + so the model accepts both calibration inputs (e.g. 512x512) and the test's + degenerate 1x1 inference probe. Two stride-2 convs reproduce the /4 logits + resolution; a 1x1 conv acts as the classifier head. + """ + rng = np.random.default_rng(1234) + pixel_values = onnx.helper.make_tensor_value_info( + "pixel_values", + onnx.TensorProto.FLOAT, + ["batch_size", _SEG_NUM_CHANNELS, "height", "width"], + ) + logits = onnx.helper.make_tensor_value_info( + "logits", + onnx.TensorProto.FLOAT, + ["batch_size", _SEG_NUM_LABELS, "height_out", "width_out"], + ) + + def _w(shape: tuple[int, ...], name: str) -> onnx.TensorProto: + return onnx.numpy_helper.from_array( + (rng.standard_normal(shape) * 0.1).astype(np.float32), name + ) + + w1 = _w((8, _SEG_NUM_CHANNELS, 3, 3), "seg_W1") + b1 = _w((8,), "seg_B1") + w2 = _w((16, 8, 3, 3), "seg_W2") + b2 = _w((16,), "seg_B2") + w3 = _w((_SEG_NUM_LABELS, 16, 1, 1), "seg_W3") + b3 = _w((_SEG_NUM_LABELS,), "seg_B3") + nodes = [ + onnx.helper.make_node( + "Conv", ["pixel_values", "seg_W1", "seg_B1"], ["c1"], + name="Conv_1", kernel_shape=[3, 3], strides=[2, 2], pads=[1, 1, 1, 1], + ), + onnx.helper.make_node("Relu", ["c1"], ["r1"], name="Relu_1"), + onnx.helper.make_node( + "Conv", ["r1", "seg_W2", "seg_B2"], ["c2"], + name="Conv_2", kernel_shape=[3, 3], strides=[2, 2], pads=[1, 1, 1, 1], + ), + onnx.helper.make_node("Relu", ["c2"], ["r2"], name="Relu_2"), + onnx.helper.make_node( + "Conv", ["r2", "seg_W3", "seg_B3"], ["logits"], + name="Classifier", kernel_shape=[1, 1], strides=[1, 1], pads=[0, 0, 0, 0], + ), + ] + graph = onnx.helper.make_graph( + nodes, "fake_segmentation", [pixel_values], [logits], [w1, b1, w2, b2, w3, b3] + ) + model = onnx.helper.make_model(graph, opset_imports=[onnx.helper.make_opsetid("", 17)]) + model.ir_version = 8 + onnx.checker.check_model(model) + onnx.save(model, str(path)) + + @pytest.fixture(scope="session") def tiny_onnx(tmp_path_factory: pytest.TempPathFactory) -> Path: d = tmp_path_factory.mktemp("tiny_quant") @@ -157,12 +223,20 @@ def onnx_objdet() -> Path: @pytest.fixture(scope="session") -def onnx_imgseg() -> Path: - return _export_hf_to_onnx( - "nvidia/segformer-b0-finetuned-ade-512-512", - "image-segmentation", - "segformer_b0", - ) +def onnx_imgseg(tmp_path_factory: pytest.TempPathFactory) -> Path: + """Fake segmentation ONNX standing in for a real HF export. + + The real ``nvidia/segformer-b0-finetuned-ade-512-512`` export ran as the + calibration model here, but its heavy backbone caused random hangs on QNN + hosts. This builds a tiny model with identical segmentation I/O instead, so + calibration still exercises the ImageSegmentationDataset path without running + a large model. The dataset itself (image processor + samples) is still loaded + from the real ``--model-name`` in the test. + """ + d = tmp_path_factory.mktemp("fake_imgseg") + p = d / "model.onnx" + _build_fake_segmentation_onnx(p) + return p @pytest.fixture(scope="session") From e856655b4b068bc01ae0cc6513cd4abdce84039b Mon Sep 17 00:00:00 2001 From: Yue Sun Date: Thu, 25 Jun 2026 14:45:13 +0800 Subject: [PATCH 2/2] test(quantize): move fake segmentation model into create_test_models Address review feedback: relocate the fake image-segmentation model builder out of the e2e test and into the shared tests/fixtures/create_test_models.py as create_fake_segmentation_model(), matching that module's existing create_*_model() -> ModelProto convention and wiring it into main(). Add tests/fixtures/__init__.py so the helper is importable (consistent with tests/assets). The quantize e2e fixture now imports and saves the shared model. No behavior change: I/O contract (pixel_values [batch, 3, H, W] -> logits [batch, num_labels, H/4, W/4]) and seeded-random weights are unchanged. --- tests/e2e/test_quantize_e2e.py | 78 ++----------------- tests/fixtures/__init__.py | 4 + tests/fixtures/create_test_models.py | 110 +++++++++++++++++++++++++++ 3 files changed, 121 insertions(+), 71 deletions(-) create mode 100644 tests/fixtures/__init__.py diff --git a/tests/e2e/test_quantize_e2e.py b/tests/e2e/test_quantize_e2e.py index 3452f08ad..faa08ef98 100644 --- a/tests/e2e/test_quantize_e2e.py +++ b/tests/e2e/test_quantize_e2e.py @@ -21,6 +21,7 @@ import onnxruntime as ort import pytest +from tests.fixtures.create_test_models import create_fake_segmentation_model from winml.modelkit.commands.quantize import quantize as quantize_cmd @@ -62,72 +63,6 @@ def _build_tiny_onnx(path: Path, *, with_metadata: bool = True) -> None: onnx.save(model, str(path)) -# Image-segmentation I/O contract shared by HF semantic-segmentation exports -# (e.g. nvidia/segformer-*-ade-*): pixel_values [batch, 3, height, width] -> -# logits [batch, num_labels, height/4, width/4]. 150 = ADE20K class count. -_SEG_NUM_CHANNELS = 3 -_SEG_NUM_LABELS = 150 - - -def _build_fake_segmentation_onnx(path: Path) -> None: - """Build a tiny FP32 ONNX with segmentation-identical I/O and random weights. - - Replaces a real HF segmentation export (heavy backbone that can randomly - hang on QNN hosts during calibration) with a small conv stack that keeps the - same input/output contract: ``pixel_values`` [batch, 3, height, width] -> - ``logits`` [batch, num_labels, height/4, width/4]. Spatial dims stay dynamic - so the model accepts both calibration inputs (e.g. 512x512) and the test's - degenerate 1x1 inference probe. Two stride-2 convs reproduce the /4 logits - resolution; a 1x1 conv acts as the classifier head. - """ - rng = np.random.default_rng(1234) - pixel_values = onnx.helper.make_tensor_value_info( - "pixel_values", - onnx.TensorProto.FLOAT, - ["batch_size", _SEG_NUM_CHANNELS, "height", "width"], - ) - logits = onnx.helper.make_tensor_value_info( - "logits", - onnx.TensorProto.FLOAT, - ["batch_size", _SEG_NUM_LABELS, "height_out", "width_out"], - ) - - def _w(shape: tuple[int, ...], name: str) -> onnx.TensorProto: - return onnx.numpy_helper.from_array( - (rng.standard_normal(shape) * 0.1).astype(np.float32), name - ) - - w1 = _w((8, _SEG_NUM_CHANNELS, 3, 3), "seg_W1") - b1 = _w((8,), "seg_B1") - w2 = _w((16, 8, 3, 3), "seg_W2") - b2 = _w((16,), "seg_B2") - w3 = _w((_SEG_NUM_LABELS, 16, 1, 1), "seg_W3") - b3 = _w((_SEG_NUM_LABELS,), "seg_B3") - nodes = [ - onnx.helper.make_node( - "Conv", ["pixel_values", "seg_W1", "seg_B1"], ["c1"], - name="Conv_1", kernel_shape=[3, 3], strides=[2, 2], pads=[1, 1, 1, 1], - ), - onnx.helper.make_node("Relu", ["c1"], ["r1"], name="Relu_1"), - onnx.helper.make_node( - "Conv", ["r1", "seg_W2", "seg_B2"], ["c2"], - name="Conv_2", kernel_shape=[3, 3], strides=[2, 2], pads=[1, 1, 1, 1], - ), - onnx.helper.make_node("Relu", ["c2"], ["r2"], name="Relu_2"), - onnx.helper.make_node( - "Conv", ["r2", "seg_W3", "seg_B3"], ["logits"], - name="Classifier", kernel_shape=[1, 1], strides=[1, 1], pads=[0, 0, 0, 0], - ), - ] - graph = onnx.helper.make_graph( - nodes, "fake_segmentation", [pixel_values], [logits], [w1, b1, w2, b2, w3, b3] - ) - model = onnx.helper.make_model(graph, opset_imports=[onnx.helper.make_opsetid("", 17)]) - model.ir_version = 8 - onnx.checker.check_model(model) - onnx.save(model, str(path)) - - @pytest.fixture(scope="session") def tiny_onnx(tmp_path_factory: pytest.TempPathFactory) -> Path: d = tmp_path_factory.mktemp("tiny_quant") @@ -228,14 +163,15 @@ def onnx_imgseg(tmp_path_factory: pytest.TempPathFactory) -> Path: The real ``nvidia/segformer-b0-finetuned-ade-512-512`` export ran as the calibration model here, but its heavy backbone caused random hangs on QNN - hosts. This builds a tiny model with identical segmentation I/O instead, so - calibration still exercises the ImageSegmentationDataset path without running - a large model. The dataset itself (image processor + samples) is still loaded - from the real ``--model-name`` in the test. + hosts. ``create_fake_segmentation_model`` builds a tiny model with identical + segmentation I/O instead, so calibration still exercises the + ImageSegmentationDataset path without running a large model. The dataset + itself (image processor + samples) is still loaded from the real + ``--model-name`` in the test. """ d = tmp_path_factory.mktemp("fake_imgseg") p = d / "model.onnx" - _build_fake_segmentation_onnx(p) + onnx.save(create_fake_segmentation_model(), str(p)) return p diff --git a/tests/fixtures/__init__.py b/tests/fixtures/__init__.py new file mode 100644 index 000000000..862c45ce3 --- /dev/null +++ b/tests/fixtures/__init__.py @@ -0,0 +1,4 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- diff --git a/tests/fixtures/create_test_models.py b/tests/fixtures/create_test_models.py index 343836dd8..087eebb66 100644 --- a/tests/fixtures/create_test_models.py +++ b/tests/fixtures/create_test_models.py @@ -15,6 +15,7 @@ from pathlib import Path +import numpy as np import onnx from onnx import TensorProto, helper @@ -201,6 +202,109 @@ def create_multi_op_model() -> onnx.ModelProto: return model +# Image-segmentation I/O contract shared by HF semantic-segmentation exports +# (e.g. nvidia/segformer-*-ade-*): pixel_values [batch, 3, height, width] -> +# logits [batch, num_labels, height/4, width/4]. 150 = ADE20K class count. +SEG_NUM_CHANNELS = 3 +SEG_NUM_LABELS = 150 + + +def create_fake_segmentation_model() -> onnx.ModelProto: + """Create a tiny FP32 semantic-segmentation model with random weights. + + Stands in for a real HuggingFace semantic-segmentation export (e.g. + ``nvidia/segformer-b0-finetuned-ade-512-512``) whose heavy backbone can + randomly hang on QNN hosts during quantization calibration. It keeps the + same I/O contract so calibration datasets and the quantizer treat it + identically to the real model: + + - Input: ``pixel_values`` [batch, 3, height, width] (FLOAT) + - Output: ``logits`` [batch, num_labels, height/4, width/4] (FLOAT) + + Two stride-2 convs reproduce the ``/4`` logits resolution; a 1x1 conv acts + as the classifier head. Spatial dims stay dynamic so the model accepts both + calibration inputs (e.g. 512x512) and a degenerate 1x1 inference probe. + Weights are seeded-random so regeneration stays deterministic. + """ + rng = np.random.default_rng(1234) + + pixel_values = helper.make_tensor_value_info( + "pixel_values", + TensorProto.FLOAT, + ["batch_size", SEG_NUM_CHANNELS, "height", "width"], + ) + logits = helper.make_tensor_value_info( + "logits", + TensorProto.FLOAT, + ["batch_size", SEG_NUM_LABELS, "height_out", "width_out"], + ) + + def _weight(shape: tuple[int, ...], name: str) -> onnx.TensorProto: + return onnx.numpy_helper.from_array( + (rng.standard_normal(shape) * 0.1).astype(np.float32), name + ) + + w1 = _weight((8, SEG_NUM_CHANNELS, 3, 3), "seg_W1") + b1 = _weight((8,), "seg_B1") + w2 = _weight((16, 8, 3, 3), "seg_W2") + b2 = _weight((16,), "seg_B2") + w3 = _weight((SEG_NUM_LABELS, 16, 1, 1), "seg_W3") + b3 = _weight((SEG_NUM_LABELS,), "seg_B3") + + nodes = [ + helper.make_node( + "Conv", + ["pixel_values", "seg_W1", "seg_B1"], + ["c1"], + name="Conv_1", + kernel_shape=[3, 3], + strides=[2, 2], + pads=[1, 1, 1, 1], + ), + helper.make_node("Relu", ["c1"], ["r1"], name="Relu_1"), + helper.make_node( + "Conv", + ["r1", "seg_W2", "seg_B2"], + ["c2"], + name="Conv_2", + kernel_shape=[3, 3], + strides=[2, 2], + pads=[1, 1, 1, 1], + ), + helper.make_node("Relu", ["c2"], ["r2"], name="Relu_2"), + helper.make_node( + "Conv", + ["r2", "seg_W3", "seg_B3"], + ["logits"], + name="Classifier", + kernel_shape=[1, 1], + strides=[1, 1], + pads=[0, 0, 0, 0], + ), + ] + + graph = helper.make_graph( + nodes=nodes, + name="FakeSegmentation", + inputs=[pixel_values], + outputs=[logits], + initializer=[w1, b1, w2, b2, w3, b3], + ) + + model = helper.make_model( + graph, + opset_imports=[helper.make_opsetid("", 17)], + producer_name="WinML CLI Test Fixture Generator", + ) + # Match the quantize e2e fixtures (ir_version 8) so onnxruntime's quantizer + # loads it identically to the other tiny models in that suite. + model.ir_version = 8 + + onnx.checker.check_model(model) + + return model + + def main() -> None: """Generate all test fixture models.""" fixtures_dir = Path(__file__).parent @@ -220,6 +324,12 @@ def main() -> None: onnx.save(multi_op, str(multi_op_path)) print(f"✓ Created {multi_op_path}") + # Generate fake_segmentation.onnx + fake_segmentation = create_fake_segmentation_model() + fake_segmentation_path = fixtures_dir / "fake_segmentation.onnx" + onnx.save(fake_segmentation, str(fake_segmentation_path)) + print(f"✓ Created {fake_segmentation_path}") + print("\nAll test fixtures generated successfully!")