fix(export): Fix export to INT8_PTQ and INT8_ACQ (#3155)

rajeshgangireddy · Copilot · web-flow · commit 74ade818f5e1 · 2025-12-01T16:32:24.000+01:00
* fix(export): set default value of dynamo to False in ExportMixin; Add onnxscript to the dependencies.

* fix(export): Fixes INT8_ACQ

* feat(export): Add max_drop parameter for INT8_ACQ quantization and default metric handling

* test(export): Add unit tests for OpenVINO export with various compression types

* Update src/anomalib/models/components/base/export_mixin.py

Co-authored-by: Copilot &lt;175728472+Copilot@users.noreply.github.com&gt;
Signed-off-by: Rajesh Gangireddy &lt;rajesh.gangireddy@intel.com&gt;

* test(export): Enhance OpenVINO export tests with model file name assertions and CI skip conditions

---------

Signed-off-by: Rajesh Gangireddy &lt;rajesh.gangireddy@intel.com&gt;
Co-authored-by: Copilot &lt;175728472+Copilot@users.noreply.github.com&gt;
diff --git a/pyproject.toml b/pyproject.toml
@@ -58,7 +58,7 @@ dependencies = [
 
 [project.optional-dependencies]
 # Model-specific optional dependencies
-openvino = ["openvino>=2024.0", "nncf>=2.10.0", "onnx>=1.16.0"]
+openvino = ["openvino>=2024.0", "nncf>=2.10.0", "onnx>=1.16.0", "onnxscript"]
 clip = [
     # NOTE: open-clip-torch throws the following error on v2.26.1
     #   torch.onnx.errors.UnsupportedOperatorError: Exporting the operator
diff --git a/src/anomalib/engine/engine.py b/src/anomalib/engine/engine.py
@@ -742,6 +742,7 @@ def export(
         compression_type: CompressionType | None = None,
         datamodule: AnomalibDataModule | None = None,
         metric: Metric | str | None = None,
+        max_drop: float = 0.01,
         ov_args: dict[str, Any] | None = None,  # deprecated
         ov_kwargs: dict[str, Any] | None = None,
         onnx_kwargs: dict[str, Any] | None = None,
@@ -766,9 +767,14 @@ def export(
                 (OpenVINO export only).
                 Defaults to ``None``.
             metric (Metric | str | None, optional): Metric to measure quality loss when quantizing.
-                Must be provided if ``CompressionType.INT8_ACQ`` is selected and must return higher value for better
-                performance of the model (OpenVINO export only).
+                Only used for ``CompressionType.INT8_ACQ`` (OpenVINO export only).
+                If not provided for INT8_ACQ, defaults to F1Score at image level.
+                Must return higher value for better performance of the model.
                 Defaults to ``None``.
+            max_drop (float, optional): Maximum acceptable accuracy drop during quantization.
+                Only used for ``CompressionType.INT8_ACQ`` (OpenVINO export only).
+                Value should be between 0 and 1 (e.g., 0.01 means 1% drop is acceptable).
+                Defaults to ``0.01``.
             ov_args (dict[str, Any] | None, optional): Deprecated. Use ov_kwargs instead.
                 This is optional and used only for OpenVINO's model optimizer.
                 Defaults to None.
@@ -834,6 +840,25 @@ def export(
         if export_root is None:
             export_root = Path(self.trainer.default_root_dir)
 
+        # Warn if max_drop is provided but not used
+        if max_drop != 0.01 and compression_type != CompressionType.INT8_ACQ:
+            warnings.warn(
+                f"max_drop parameter is only used for CompressionType.INT8_ACQ but got {compression_type}. "
+                "The parameter will be ignored.",
+                UserWarning,
+                stacklevel=2,
+            )
+
+        # Set default metric for INT8_ACQ if not provided
+        if metric is None and compression_type == CompressionType.INT8_ACQ:
+            from anomalib.metrics import F1Score
+
+            metric = F1Score(fields=["pred_label", "gt_label"])
+            logger.info(
+                "No metric provided for INT8_ACQ quantization. "
+                "Using default: F1Score at image level (fields=['pred_label', 'gt_label']).",
+            )
+
         exported_model_path: Path | None = None
         if export_type == ExportType.TORCH:
             exported_model_path = model.to_torch(
@@ -855,6 +880,7 @@ def export(
                 compression_type=compression_type,
                 datamodule=datamodule,
                 metric=metric,
+                max_drop=max_drop,
                 ov_kwargs=ov_kwargs,
                 onnx_kwargs=onnx_kwargs,
             )
diff --git a/src/anomalib/models/components/base/export_mixin.py b/src/anomalib/models/components/base/export_mixin.py
@@ -50,7 +50,7 @@
 from torchmetrics import Metric
 
 from anomalib import TaskType
-from anomalib.data import AnomalibDataModule
+from anomalib.data import AnomalibDataModule, ImageBatch
 from anomalib.deploy.export import CompressionType, ExportType
 
 if TYPE_CHECKING:
@@ -179,6 +179,7 @@ def to_onnx(
             dynamic_axes=kwargs.pop("dynamic_axes", dynamic_axes),
             input_names=kwargs.pop("input_names", ["input"]),
             output_names=kwargs.pop("output_names", output_names),
+            dynamo=kwargs.pop("dynamo", False),  # Dynamo is changed to True by default in torch 2.9
             **kwargs,
         )
 
@@ -193,6 +194,7 @@ def to_openvino(
         datamodule: AnomalibDataModule | None = None,
         metric: Metric | None = None,
         task: TaskType | None = None,
+        max_drop: float = 0.01,
         ov_kwargs: dict[str, Any] | None = None,
         onnx_kwargs: dict[str, Any] | None = None,
     ) -> Path:
@@ -209,9 +211,14 @@ def to_openvino(
             datamodule (AnomalibDataModule | None): DataModule for quantization.
                 Required for ``INT8_PTQ`` and ``INT8_ACQ``. Defaults to ``None``
             metric (Metric | None): Metric for accuracy-aware quantization.
-                Required for ``INT8_ACQ``. Defaults to ``None``
+                Used for ``INT8_ACQ``. If not provided, a default F1Score at image level
+                will be used. Defaults to ``None``
             task (TaskType | None): Task type (classification/segmentation).
                 Defaults to ``None``
+            max_drop (float): Maximum acceptable accuracy drop during quantization.
+                Only used for ``INT8_ACQ`` compression. Value should be between 0 and 1
+                (e.g., 0.01 means 1% accuracy drop is acceptable).
+                Defaults to ``0.01``
             ov_kwargs (dict[str, Any] | None): OpenVINO model optimizer arguments.
                 Defaults to ``None``
             onnx_kwargs (dict[str, Any] | None): Additional arguments to pass to torch.onnx.export
@@ -257,7 +264,7 @@ def to_openvino(
 
             model = ov.convert_model(model_path, **(ov_kwargs or {}))
             if compression_type and compression_type != CompressionType.FP16:
-                model = self._compress_ov_model(model, compression_type, datamodule, metric, task)
+                model = self._compress_ov_model(model, compression_type, datamodule, metric, task, max_drop)
 
             # fp16 compression is enabled by default
             compress_to_fp16 = compression_type == CompressionType.FP16
@@ -272,6 +279,7 @@ def _compress_ov_model(
         datamodule: AnomalibDataModule | None = None,
         metric: Metric | None = None,
         task: TaskType | None = None,
+        max_drop: float = 0.01,
     ) -> "CompiledModel":
         """Compress OpenVINO model using NNCF.
 
@@ -285,6 +293,8 @@ def _compress_ov_model(
                 Required for ``INT8_ACQ``. Defaults to ``None``
             task (TaskType | None): Task type (classification/segmentation).
                 Defaults to ``None``
+            max_drop (float): Maximum acceptable accuracy drop during quantization.
+                Only used for ``INT8_ACQ``. Defaults to ``0.01``
 
         Returns:
             CompiledModel: Compressed OpenVINO model
@@ -304,7 +314,7 @@ def _compress_ov_model(
         elif compression_type == CompressionType.INT8_PTQ:
             model = self._post_training_quantization_ov(model, datamodule)
         elif compression_type == CompressionType.INT8_ACQ:
-            model = self._accuracy_control_quantization_ov(model, datamodule, metric, task)
+            model = self._accuracy_control_quantization_ov(model, datamodule, metric, task, max_drop)
         else:
             msg = f"Unrecognized compression type: {compression_type}"
             raise ValueError(msg)
@@ -356,6 +366,7 @@ def _accuracy_control_quantization_ov(
         datamodule: AnomalibDataModule | None = None,
         metric: Metric | None = None,
         task: TaskType | None = None,
+        max_drop: float = 0.01,
     ) -> "CompiledModel":
         """Apply accuracy-aware quantization to OpenVINO model.
 
@@ -366,15 +377,19 @@ def _accuracy_control_quantization_ov(
                 Defaults to ``None``
             metric (Metric | None): Metric to measure accuracy during quantization.
                 Higher values should indicate better performance.
+                If not provided, defaults to F1Score at image level.
                 Defaults to ``None``
             task (TaskType | None): Task type (classification/segmentation).
                 Defaults to ``None``
+            max_drop (float): Maximum acceptable accuracy drop during quantization.
+                Value should be between 0 and 1 (e.g., 0.01 means 1% drop is acceptable).
+                Defaults to ``0.01``
 
         Returns:
             CompiledModel: Quantized OpenVINO model
 
         Raises:
-            ValueError: If datamodule or metric is not provided
+            ValueError: If datamodule is not provided, or if max_drop is out of valid range
         """
         import nncf
 
@@ -386,9 +401,25 @@ def _accuracy_control_quantization_ov(
         # if task is not provided, use the task from the datamodule
         task = task or datamodule.task
 
-        if metric is None:
-            msg = "Metric must be provided for OpenVINO INT8_ACQ compression"
+        # Validate max_drop parameter
+        if not 0 <= max_drop <= 1:
+            msg = f"max_drop must be between 0 and 1, got {max_drop}"
             raise ValueError(msg)
+        if max_drop > 0.1:
+            logger.warning(
+                f"max_drop={max_drop} is a large value (>10%% accuracy drop). "
+                "Typical values are in the 0.01-0.03 range (1-3%%).",
+            )
+
+        # Set default metric if not provided
+        if metric is None:
+            from anomalib.metrics import F1Score
+
+            metric = F1Score(fields=["pred_label", "gt_label"])
+            logger.info(
+                "No metric provided for INT8_ACQ quantization. "
+                "Using default: F1Score at image level (fields=['pred_label', 'gt_label']).",
+            )
 
         model_input = model.input(0)
 
@@ -408,12 +439,28 @@ def _accuracy_control_quantization_ov(
         # validation function to evaluate the quality loss after quantization
         def val_fn(nncf_model: "CompiledModel", validation_data: Iterable) -> float:
             for batch in validation_data:
-                preds = torch.from_numpy(nncf_model(batch["image"])[0])
-                target = batch["label"] if task == TaskType.CLASSIFICATION else batch["mask"][:, None, :, :]
-                metric.update(preds, target)
+                ov_model_output = nncf_model(batch["image"])
+                result_batch = ImageBatch(
+                    image=batch["image"],
+                    # pred_score must be same size as gt_label for metrics like AUROC
+                    pred_score=torch.from_numpy(ov_model_output["pred_score"]).squeeze(),
+                    pred_label=torch.from_numpy(ov_model_output["pred_label"]).squeeze(),
+                    gt_label=batch["gt_label"],
+                    anomaly_map=torch.from_numpy(ov_model_output["anomaly_map"]),
+                    pred_mask=torch.from_numpy(ov_model_output["pred_mask"]),
+                    gt_mask=batch["gt_mask"][:, None, :, :],  # Make shape the same format as pred_mask
+                )
+                metric.update(result_batch)
+
             return metric.compute()
 
-        return nncf.quantize_with_accuracy_control(model, calibration_dataset, validation_dataset, val_fn)
+        return nncf.quantize_with_accuracy_control(
+            model,
+            calibration_dataset,
+            validation_dataset,
+            val_fn,
+            max_drop=max_drop,
+        )
 
 
 def _create_export_root(export_root: str | Path, export_type: ExportType) -> Path:
diff --git a/tests/unit/deploy/test_ov_export.py b/tests/unit/deploy/test_ov_export.py