diff --git a/tests/e2e/test_quantize_e2e.py b/tests/e2e/test_quantize_e2e.py index d95288714..faa08ef98 100644 --- a/tests/e2e/test_quantize_e2e.py +++ b/tests/e2e/test_quantize_e2e.py @@ -21,6 +21,7 @@ import onnxruntime as ort import pytest +from tests.fixtures.create_test_models import create_fake_segmentation_model from winml.modelkit.commands.quantize import quantize as quantize_cmd @@ -157,12 +158,21 @@ def onnx_objdet() -> Path: @pytest.fixture(scope="session") -def onnx_imgseg() -> Path: - return _export_hf_to_onnx( - "nvidia/segformer-b0-finetuned-ade-512-512", - "image-segmentation", - "segformer_b0", - ) +def onnx_imgseg(tmp_path_factory: pytest.TempPathFactory) -> Path: + """Fake segmentation ONNX standing in for a real HF export. + + The real ``nvidia/segformer-b0-finetuned-ade-512-512`` export ran as the + calibration model here, but its heavy backbone caused random hangs on QNN + hosts. ``create_fake_segmentation_model`` builds a tiny model with identical + segmentation I/O instead, so calibration still exercises the + ImageSegmentationDataset path without running a large model. The dataset + itself (image processor + samples) is still loaded from the real + ``--model-name`` in the test. + """ + d = tmp_path_factory.mktemp("fake_imgseg") + p = d / "model.onnx" + onnx.save(create_fake_segmentation_model(), str(p)) + return p @pytest.fixture(scope="session") diff --git a/tests/fixtures/__init__.py b/tests/fixtures/__init__.py new file mode 100644 index 000000000..862c45ce3 --- /dev/null +++ b/tests/fixtures/__init__.py @@ -0,0 +1,4 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- diff --git a/tests/fixtures/create_test_models.py b/tests/fixtures/create_test_models.py index 343836dd8..087eebb66 100644 --- a/tests/fixtures/create_test_models.py +++ b/tests/fixtures/create_test_models.py @@ -15,6 +15,7 @@ from pathlib import Path +import numpy as np import onnx from onnx import TensorProto, helper @@ -201,6 +202,109 @@ def create_multi_op_model() -> onnx.ModelProto: return model +# Image-segmentation I/O contract shared by HF semantic-segmentation exports +# (e.g. nvidia/segformer-*-ade-*): pixel_values [batch, 3, height, width] -> +# logits [batch, num_labels, height/4, width/4]. 150 = ADE20K class count. +SEG_NUM_CHANNELS = 3 +SEG_NUM_LABELS = 150 + + +def create_fake_segmentation_model() -> onnx.ModelProto: + """Create a tiny FP32 semantic-segmentation model with random weights. + + Stands in for a real HuggingFace semantic-segmentation export (e.g. + ``nvidia/segformer-b0-finetuned-ade-512-512``) whose heavy backbone can + randomly hang on QNN hosts during quantization calibration. It keeps the + same I/O contract so calibration datasets and the quantizer treat it + identically to the real model: + + - Input: ``pixel_values`` [batch, 3, height, width] (FLOAT) + - Output: ``logits`` [batch, num_labels, height/4, width/4] (FLOAT) + + Two stride-2 convs reproduce the ``/4`` logits resolution; a 1x1 conv acts + as the classifier head. Spatial dims stay dynamic so the model accepts both + calibration inputs (e.g. 512x512) and a degenerate 1x1 inference probe. + Weights are seeded-random so regeneration stays deterministic. + """ + rng = np.random.default_rng(1234) + + pixel_values = helper.make_tensor_value_info( + "pixel_values", + TensorProto.FLOAT, + ["batch_size", SEG_NUM_CHANNELS, "height", "width"], + ) + logits = helper.make_tensor_value_info( + "logits", + TensorProto.FLOAT, + ["batch_size", SEG_NUM_LABELS, "height_out", "width_out"], + ) + + def _weight(shape: tuple[int, ...], name: str) -> onnx.TensorProto: + return onnx.numpy_helper.from_array( + (rng.standard_normal(shape) * 0.1).astype(np.float32), name + ) + + w1 = _weight((8, SEG_NUM_CHANNELS, 3, 3), "seg_W1") + b1 = _weight((8,), "seg_B1") + w2 = _weight((16, 8, 3, 3), "seg_W2") + b2 = _weight((16,), "seg_B2") + w3 = _weight((SEG_NUM_LABELS, 16, 1, 1), "seg_W3") + b3 = _weight((SEG_NUM_LABELS,), "seg_B3") + + nodes = [ + helper.make_node( + "Conv", + ["pixel_values", "seg_W1", "seg_B1"], + ["c1"], + name="Conv_1", + kernel_shape=[3, 3], + strides=[2, 2], + pads=[1, 1, 1, 1], + ), + helper.make_node("Relu", ["c1"], ["r1"], name="Relu_1"), + helper.make_node( + "Conv", + ["r1", "seg_W2", "seg_B2"], + ["c2"], + name="Conv_2", + kernel_shape=[3, 3], + strides=[2, 2], + pads=[1, 1, 1, 1], + ), + helper.make_node("Relu", ["c2"], ["r2"], name="Relu_2"), + helper.make_node( + "Conv", + ["r2", "seg_W3", "seg_B3"], + ["logits"], + name="Classifier", + kernel_shape=[1, 1], + strides=[1, 1], + pads=[0, 0, 0, 0], + ), + ] + + graph = helper.make_graph( + nodes=nodes, + name="FakeSegmentation", + inputs=[pixel_values], + outputs=[logits], + initializer=[w1, b1, w2, b2, w3, b3], + ) + + model = helper.make_model( + graph, + opset_imports=[helper.make_opsetid("", 17)], + producer_name="WinML CLI Test Fixture Generator", + ) + # Match the quantize e2e fixtures (ir_version 8) so onnxruntime's quantizer + # loads it identically to the other tiny models in that suite. + model.ir_version = 8 + + onnx.checker.check_model(model) + + return model + + def main() -> None: """Generate all test fixture models.""" fixtures_dir = Path(__file__).parent @@ -220,6 +324,12 @@ def main() -> None: onnx.save(multi_op, str(multi_op_path)) print(f"✓ Created {multi_op_path}") + # Generate fake_segmentation.onnx + fake_segmentation = create_fake_segmentation_model() + fake_segmentation_path = fixtures_dir / "fake_segmentation.onnx" + onnx.save(fake_segmentation, str(fake_segmentation_path)) + print(f"✓ Created {fake_segmentation_path}") + print("\nAll test fixtures generated successfully!")