From df0c240309c15e7cc02d203588c09da8e228235f Mon Sep 17 00:00:00 2001
From: Simon Strycek <simon.strycek@nxp.com>
Date: Mon, 27 Apr 2026 15:40:42 +0200
Subject: [PATCH 1/8] NXP backend: Adjust activation support check to follow
 new Neutron-C flow requirements

---
 .../ops_converters/clamp_converter.py         |  2 +-
 .../ops_converters/hardtanh_converter.py      |  2 +-
 .../ops_converters/relu_converter.py          |  2 +-
 .../nxp/backend/neutron_operator_support.py   | 30 ++++++++-----------
 4 files changed, 16 insertions(+), 20 deletions(-)

diff --git a/backends/nxp/backend/ir/converter/node_converters/ops_converters/clamp_converter.py b/backends/nxp/backend/ir/converter/node_converters/ops_converters/clamp_converter.py
index 0477984a24c..b0dbc56d17d 100644
--- a/backends/nxp/backend/ir/converter/node_converters/ops_converters/clamp_converter.py
+++ b/backends/nxp/backend/ir/converter/node_converters/ops_converters/clamp_converter.py
@@ -144,7 +144,7 @@ def supports_partitioning_result(
                 node, partition_list, filter_fn=is_not_qdq_node
             )
             if is_alone_in_partition:
-                return activation_supported_on_target(node, neutron_target_spec)
+                return activation_supported_on_target(node)
 
         return True
 
diff --git a/backends/nxp/backend/ir/converter/node_converters/ops_converters/hardtanh_converter.py b/backends/nxp/backend/ir/converter/node_converters/ops_converters/hardtanh_converter.py
index b4aa67bcc35..f67851895c2 100644
--- a/backends/nxp/backend/ir/converter/node_converters/ops_converters/hardtanh_converter.py
+++ b/backends/nxp/backend/ir/converter/node_converters/ops_converters/hardtanh_converter.py
@@ -94,7 +94,7 @@ def supports_partitioning_result(
                 node, partition_list, filter_fn=is_not_qdq_node
             )
             if is_alone_in_partition:
-                return activation_supported_on_target(node, neutron_target_spec)
+                return activation_supported_on_target(node)
 
         return True
 
diff --git a/backends/nxp/backend/ir/converter/node_converters/ops_converters/relu_converter.py b/backends/nxp/backend/ir/converter/node_converters/ops_converters/relu_converter.py
index 5bdc7fc0996..f05fed47d5d 100644
--- a/backends/nxp/backend/ir/converter/node_converters/ops_converters/relu_converter.py
+++ b/backends/nxp/backend/ir/converter/node_converters/ops_converters/relu_converter.py
@@ -43,7 +43,7 @@ def supports_partitioning_result(
             node, partition_list, filter_fn=is_not_qdq_node
         )
         if is_alone_in_partition:
-            return activation_supported_on_target(node, neutron_target_spec)
+            return activation_supported_on_target(node)
 
         return True
 
diff --git a/backends/nxp/backend/neutron_operator_support.py b/backends/nxp/backend/neutron_operator_support.py
index 3dafefef484..18d3fc4e9e5 100644
--- a/backends/nxp/backend/neutron_operator_support.py
+++ b/backends/nxp/backend/neutron_operator_support.py
@@ -3,11 +3,7 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
-from executorch.backends.nxp.backend.data_format import NXP_NODE_FORMAT
-from executorch.backends.nxp.backend.edge_helper import input_tensor
-from executorch.backends.nxp.backend.ir.converter.conversion.translator import (
-    dims_to_channels_last,
-)
+import torch
 from executorch.backends.nxp.backend.neutron_target_spec import NeutronTargetSpec
 from torch.fx import Node
 
@@ -86,20 +82,20 @@ def transposition_is_supported_on_neutron(
 
 
 def activation_supported_on_target(
-    node: Node, neutron_target_spec: NeutronTargetSpec
+    node: Node,
 ) -> bool:
     """This function determines if the current NeutronSoftware properly supports an activation operator represented by the given node.
 
     :param node: The node representing the activation operator.
-    :param neutron_target_spec: Object for querying the target platform to retrieve its properties.
     """
-    input_shape = list(input_tensor(node, 0).shape)
-    if node.args[0].meta[NXP_NODE_FORMAT].is_channels_first():
-        input_shape = dims_to_channels_last(input_shape)
-
-    c = input_shape[-1]
-    num_macs = neutron_target_spec.get_num_macs()
-
-    # activations in Neutron are delegable only
-    # if `num_channels` % `num_macs` == 0
-    return c % num_macs == 0
+    # Prevent circular import
+    from executorch.backends.nxp.backend.ir.converter.node_converter import (
+        NodeConverter,
+    )
+
+    return NodeConverter.uses_quantization_type_for_io(
+        node,
+        supported_types=[torch.int8, torch.uint8],
+        input_indices=[0],
+        output_indices=[0],
+    )

From f34c2b6af131bfdd6ba0ae4faaeba35868b7ee82 Mon Sep 17 00:00:00 2001
From: Simon Strycek <simon.strycek@nxp.com>
Date: Mon, 8 Jun 2026 10:24:02 +0200
Subject: [PATCH 2/8] NXP backend: Disable delegation for identity-convertible
 ReLU cases

---
 backends/nxp/backend/graph_utils.py           | 80 +++++++++++++++++++
 .../ops_converters/relu_converter.py          | 15 +++-
 .../ir/converter/quantization_utils.py        | 15 +++-
 3 files changed, 107 insertions(+), 3 deletions(-)

diff --git a/backends/nxp/backend/graph_utils.py b/backends/nxp/backend/graph_utils.py
index f93ba5ac5dd..88cd996d6fd 100644
--- a/backends/nxp/backend/graph_utils.py
+++ b/backends/nxp/backend/graph_utils.py
@@ -3,7 +3,13 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
+import numpy as np
 import torch
+from executorch.backends.nxp.backend.ir.converter.conversion.translator import (
+    torch_type_to_numpy_type,
+)
+from executorch.backends.nxp.backend.ir.converter.node_converter import _is_dequant_node
+from executorch.backends.nxp.backend.ir.converter.quantization_utils import quantize
 from executorch.exir.dialects._ops import ops as exir_ops
 from torch.fx import Node
 
@@ -47,3 +53,77 @@ def get_output_shape(node: Node) -> tuple[torch.Size] | torch.Size | None:
         return tuple([v.shape for v in val])
 
     return None
+
+
+def is_clamp_preserved_under_quantization(
+    node: Node, min_val: int = 0, max_val: int | None = None
+) -> bool:
+    """
+    Checks if Clamp/ReLU/HardTanh is preserved under quantization and did
+    not collapse into either identity or constant.
+
+     Valid quant. bounds -                Quant. bounds -
+    one hinge is preserved             Collapse to identity
+            │   │                           │ │
+            │   ▼/¯¯¯¯¯ ReLU6(x)            │ ▼/¯¯¯¯¯ ReLU6(x)
+            │   /                           │ /
+            │  /                            ▼/
+            ▼ /                             /
+        ¯¯¯¯¯ Hinge                   ¯¯¯¯¯ Hinge
+
+        Args:
+        node: Node to check whether is preserved
+        min_val: Lower bound (hinge) of the operator (eg. 0 for ReLU)
+        max_val: Upper bound of the operator (eg. 6 for ReLU6 or None for ReLU)
+    """
+
+    q_node = node.args[0]
+
+    if not _is_dequant_node(q_node):
+        return False
+
+    if len(q_node.args) == 6:
+        # per-tensor
+        _, scale, zp, quant_min, quant_max, q_type = q_node.args
+    else:
+        # per-channel
+        _, scale, zp, quant_min, quant_max, _, q_type = q_node.args
+
+    quant_min = np.iinfo(q_type).min if quant_min is None else quant_min
+    quant_max = np.iinfo(q_type).max if quant_max is None else quant_max
+
+    q_type = torch_type_to_numpy_type(q_type).type
+    quantized_min_val = quantize(
+        value=min_val,
+        zero_point=zp,
+        scale=scale,
+        quant_min=quant_min,
+        quant_max=quant_max,
+        dtype=q_type,
+    )
+
+    if max_val is not None:
+        quantized_max_val = quantize(
+            value=max_val,
+            zero_point=zp,
+            scale=scale,
+            quant_min=quant_min,
+            quant_max=quant_max,
+            dtype=q_type,
+        )
+        return (
+            # If at least one bound is inside the quantization range
+            # the hinge of the ReLU/HardTanh is preserved and therefore does not
+            # collapse to identity or constant.
+            (
+                np.all(quant_min < quantized_min_val)
+                or np.all(quantized_max_val < quant_max)
+            )
+            # When both operator bounds are outside the quantization range
+            # the operator collapses into constant value (eg. 0 or 6 for ReLU6).
+            and not np.all(quant_max < quantized_min_val)
+            and not np.all(quant_min > quantized_max_val)
+        )
+
+    # Ensure ReLU/HardTanh hinge is preserved.
+    return quant_min < quantized_min_val < quant_max
diff --git a/backends/nxp/backend/ir/converter/node_converters/ops_converters/relu_converter.py b/backends/nxp/backend/ir/converter/node_converters/ops_converters/relu_converter.py
index f05fed47d5d..c0f5bf944ef 100644
--- a/backends/nxp/backend/ir/converter/node_converters/ops_converters/relu_converter.py
+++ b/backends/nxp/backend/ir/converter/node_converters/ops_converters/relu_converter.py
@@ -3,6 +3,10 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
+
+from executorch.backends.nxp.backend.graph_utils import (
+    is_clamp_preserved_under_quantization,
+)
 from executorch.backends.nxp.backend.ir.converter.node_converter import (
     CustomDelegationOptions,
     is_not_qdq_node,
@@ -30,6 +34,15 @@ def _is_supported_in_IR(
     ) -> bool:
         return True
 
+    @staticmethod
+    def _is_supported_on_target(
+        node: Node,
+        neutron_target_spec: NeutronTargetSpec,
+        parameters_mapping: dict[str, Parameter],
+        custom_delegation_options: CustomDelegationOptions,
+    ) -> bool:
+        return activation_supported_on_target(node)
+
     @classmethod
     def supports_partitioning_result(
         cls,
@@ -43,7 +56,7 @@ def supports_partitioning_result(
             node, partition_list, filter_fn=is_not_qdq_node
         )
         if is_alone_in_partition:
-            return activation_supported_on_target(node)
+            return is_clamp_preserved_under_quantization(node)
 
         return True
 
diff --git a/backends/nxp/backend/ir/converter/quantization_utils.py b/backends/nxp/backend/ir/converter/quantization_utils.py
index 11de4eec13c..ba4ad14222b 100755
--- a/backends/nxp/backend/ir/converter/quantization_utils.py
+++ b/backends/nxp/backend/ir/converter/quantization_utils.py
@@ -135,8 +135,19 @@ def set_quantization_parameters_to_tensor(
 def quantize_int8(
     data: np.ndarray, scale: List[float], zero_point: List[int]
 ) -> np.ndarray:
-    new_data = np.add(np.round(np.divide(data, scale)), zero_point)
-    return np.clip(new_data, -128, 127).astype(np.int8)
+    return quantize(data, zero_point=zero_point, scale=scale)
+
+
+def quantize(
+    value: np.ndarray | int,
+    zero_point: List[int] | int,
+    scale: List[float] | float,
+    quant_min: int = -128,
+    quant_max: int = 127,
+    dtype: type = np.int8,
+) -> np.ndarray | np.integer:
+    rescaled_value = np.add(np.round(np.divide(value, scale)), zero_point)
+    return dtype(np.clip(rescaled_value, quant_min, quant_max))
 
 
 def dequantize(

From 602e8b8c952acd2ed12e140456ccc7adc37d5b7f Mon Sep 17 00:00:00 2001
From: Simon Strycek <simon.strycek@nxp.com>
Date: Tue, 9 Jun 2026 11:04:38 +0200
Subject: [PATCH 3/8] NXP backend: Update ReLU-related checks to reflect new
 flow support

---
 .../ops_converters/clamp_converter.py         | 48 +++++++++----------
 1 file changed, 22 insertions(+), 26 deletions(-)

diff --git a/backends/nxp/backend/ir/converter/node_converters/ops_converters/clamp_converter.py b/backends/nxp/backend/ir/converter/node_converters/ops_converters/clamp_converter.py
index b0dbc56d17d..25cf6074701 100644
--- a/backends/nxp/backend/ir/converter/node_converters/ops_converters/clamp_converter.py
+++ b/backends/nxp/backend/ir/converter/node_converters/ops_converters/clamp_converter.py
@@ -8,6 +8,9 @@
 import numpy as np
 import torch
 from executorch.backends.nxp.backend.edge_helper import try_get_arg
+from executorch.backends.nxp.backend.graph_utils import (
+    is_clamp_preserved_under_quantization,
+)
 from executorch.backends.nxp.backend.ir.converter.conversion.translator import (
     torch_type_to_numpy_type,
 )
@@ -20,6 +23,7 @@
 )
 from executorch.backends.nxp.backend.ir.converter.quantization_utils import (
     propagate_quantization,
+    quantize,
 )
 from executorch.backends.nxp.backend.ir.lib.tflite.BuiltinOperator import (
     BuiltinOperator,
@@ -117,17 +121,20 @@ def _is_supported_on_target(
             output_indices=[0],
         )
 
-        # We either convert to ReLU -> SingleInputQuantization pattern
-        # or we convert to Min/Max, which requires same quantization on
-        # both input and output.
-        return (relu_compatible | io_quant_consistent) and quant_supported
+        if relu_compatible and activation_supported_on_target(
+            node,
+        ):
+            return True
+
+        # We convert to Min/Max, which requires same quantization for both input and output.
+        return io_quant_consistent and quant_supported
 
     @classmethod
     def supports_partitioning_result(
         cls,
         node: Node,
         partition_list: list[Partition],
-        _: CustomDelegationOptions,
+        custom_delegation_options: CustomDelegationOptions,
         neutron_target_spec: NeutronTargetSpec,
         parameters_mapping: dict[str, Parameter],
     ) -> bool:
@@ -136,30 +143,19 @@ def supports_partitioning_result(
         # Neutron cannot delegate a partition where ReLU or ReLU6 is the only operator
         # and at the same time the node does not satisfy delegation requirements.
         # In contrast, ReLUN1To1 and ReLU0To1 are supported and delegated successfuly.
-        if bounds in [
-            cls.RELU_COMPATIBLE_BOUNDS["Relu"],
-            cls.RELU_COMPATIBLE_BOUNDS["Relu6"],
-        ]:
+        if bounds in cls.RELU_COMPATIBLE_BOUNDS.values():
             is_alone_in_partition = cls.is_node_alone_in_partition(
                 node, partition_list, filter_fn=is_not_qdq_node
             )
             if is_alone_in_partition:
-                return activation_supported_on_target(node)
+                return is_clamp_preserved_under_quantization(
+                    node,
+                    min_val=bounds[0],
+                    max_val=bounds[1],
+                )
 
         return True
 
-    @staticmethod
-    def _quantize_value(
-        value: int,
-        zp: int,
-        scale: float,
-        quant_min: int,
-        quant_max: int,
-        dtype: type = np.int8,
-    ) -> np.integer:
-        rescaled_value = round(value / scale) + zp
-        return dtype(np.clip(rescaled_value, quant_min, quant_max))
-
     def convert(self, node: Node):
         """Convert the `aten.clamp.default` operator to either
         Neutron IR `Relu*` operator or combination of `Min` and `Max`.
@@ -202,9 +198,9 @@ def convert(self, node: Node):
         min_value, max_value = bounds
 
         if min_value is not None:
-            min_value = self._quantize_value(
+            min_value = quantize(
                 value=min_value,
-                zp=zp,
+                zero_point=zp,
                 scale=scale,
                 quant_min=quant_min,
                 quant_max=quant_max,
@@ -216,9 +212,9 @@ def convert(self, node: Node):
             propagate_quantization(x, min_tensor)
 
         if max_value is not None:
-            max_value = self._quantize_value(
+            max_value = quantize(
                 value=max_value,
-                zp=zp,
+                zero_point=zp,
                 scale=scale,
                 quant_min=quant_min,
                 quant_max=quant_max,

From b1d9d0d2e92f3bbee83dc467531d42ca379b55ca Mon Sep 17 00:00:00 2001
From: Simon Strycek <simon.strycek@nxp.com>
Date: Mon, 25 May 2026 15:52:28 +0200
Subject: [PATCH 4/8] NXP backend: Add new Neutron C flow test cases for ReLU

---
 .../node_converter/test_relu_converter.py     | 248 ++++++++++--------
 backends/nxp/tests/models.py                  |   4 +-
 backends/nxp/tests/ops_aliases.py             |   2 +
 3 files changed, 147 insertions(+), 107 deletions(-)

diff --git a/backends/nxp/tests/ir/converter/node_converter/test_relu_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_relu_converter.py
index 2ec285d6363..ab42560f075 100644
--- a/backends/nxp/tests/ir/converter/node_converter/test_relu_converter.py
+++ b/backends/nxp/tests/ir/converter/node_converter/test_relu_converter.py
@@ -6,23 +6,23 @@
 import numpy as np
 import pytest
 import torch
-
-from executorch.backends.nxp.backend.edge_program_converter import (
-    EdgeProgramToIRConverter,
-    exir_ops,
-)
-from executorch.backends.nxp.tests.executorch_pipeline import (
-    to_edge_program,
-    to_quantized_edge_program,
-)
-from executorch.backends.nxp.tests.executors import (
-    convert_run_compare,
-    graph_contains_any_of_ops,
-    ToNCHWPreprocess,
-    ToNHWCPreprocess,
-)
+from executorch.backends.nxp.backend.edge_program_converter import exir_ops
+from executorch.backends.nxp.tests.dataset_creator import RandomDatasetCreator
+from executorch.backends.nxp.tests.executorch_pipeline import to_quantized_edge_program
+from executorch.backends.nxp.tests.executors import graph_contains_any_of_ops
+from executorch.backends.nxp.tests.graph_verifier import DetailedGraphVerifier
 from executorch.backends.nxp.tests.models import Conv2dModule, LinearModule, ReLUModule
-from torch.export import ExportedProgram
+from executorch.backends.nxp.tests.nsys_testing import lower_run_compare
+from executorch.backends.nxp.tests.ops_aliases import (
+    AddMm,
+    Convolution,
+    DequantizePerChannel,
+    DequantizePerTensor,
+    PermuteCopy,
+    QuantizePerTensor,
+    Relu,
+    ViewCopy,
+)
 from executorch.backends.nxp.tests.use_qat import *  # noqa F403
 
 
@@ -37,10 +37,10 @@ def reseed_model_per_test_run():
 
 
 class ConvReLUModule(torch.nn.Module):
-    def __init__(self):
+    def __init__(self, in_channels=4, out_channels=8):
         super().__init__()
 
-        self.conv = Conv2dModule()
+        self.conv = Conv2dModule(in_channels=in_channels, out_channels=out_channels)
         self.relu = torch.nn.ReLU()
 
     def forward(self, x):
@@ -49,10 +49,12 @@ def forward(self, x):
 
 
 class LinearReLUModule(torch.nn.Module):
-    def __init__(self):
+    def __init__(self, in_features: int = 32, out_features: int = 16):
         super().__init__()
 
-        self.linear = LinearModule(bias=True)
+        self.linear = LinearModule(
+            bias=True, in_features=in_features, out_features=out_features
+        )
         self.relu = torch.nn.ReLU()
 
     def forward(self, x):
@@ -60,89 +62,125 @@ def forward(self, x):
         return self.relu(x)
 
 
-def test_relu_conversion():
-    input_shape = (10, 4, 32, 32)
-    edge_program = to_edge_program(ReLUModule(), input_shape).exported_program()
-
-    input_data = 2 * np.random.random(input_shape).astype(np.float32) - 1
-
-    convert_run_compare(edge_program, input_data=input_data)
-
-
-def test_relu_with_conv_quant_conversion(mocker, use_qat):
-    input_shape = (1, 4, 32, 32)
-    converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program")
-
-    # Run conversion
-    delegated_ep = to_quantized_edge_program(
-        ConvReLUModule(),
-        input_shape,
-        use_qat=use_qat,
-        use_neutron_for_format_conversion=False,
-    ).exported_program()
-
-    # Capture generated model
-    tflite_flatbuffers_model, _ = converter_spy.spy_return
-
-    # Capture converted program
-    edge_program: ExportedProgram = converter_spy.call_args.args[1]
-
-    input_data = (
-        (2 * np.random.random(input_shape).astype(np.float32) - 1) * 50
-    ).astype(np.int8)
-
-    # Make sure the `relu` was delegated.
-    assert graph_contains_any_of_ops(delegated_ep.graph, [ExecutorchDelegateCall])
-    assert not graph_contains_any_of_ops(delegated_ep.graph, [ReLU])
-
-    convert_run_compare(
-        edge_program,
-        input_data,
-        tfl_model=tflite_flatbuffers_model,
-        tflite_input_preprocess=ToNHWCPreprocess(),
-        tflite_output_preprocess=ToNCHWPreprocess(),
+class TestReLUNewNeutronFlow:
+    @pytest.mark.parametrize(
+        ["model", "input_shape"],
+        [
+            pytest.param(
+                lambda: LinearReLUModule(in_features=9, out_features=17),
+                (9, 9),
+                id="Linear(1D-in): num_channels not divisible by NUM_MACS",
+            ),
+            pytest.param(
+                lambda: LinearReLUModule(in_features=9, out_features=15),
+                (1, 7, 9),
+                id="Linear(2D-in): num_channels not divisible by NUM_MACS",
+            ),
+            pytest.param(
+                lambda: LinearReLUModule(in_features=8, out_features=16),
+                (1, 8, 8),
+                id="Linear(2D-in): num_channels divisible by NUM_MACS",
+            ),
+            pytest.param(
+                lambda: LinearReLUModule(in_features=9, out_features=15),
+                (1, 9, 9, 9),
+                id="Linear(3D-in): num_channels not divisible by NUM_MACS",
+            ),
+            pytest.param(
+                lambda: ConvReLUModule(in_channels=17, out_channels=9),
+                (1, 17, 9, 9),
+                id="Conv: num_channels not divisible by NUM_MACS",
+            ),
+            pytest.param(
+                lambda: ConvReLUModule(in_channels=8, out_channels=16),
+                (1, 8, 8, 8),
+                id="Conv: num_channels divisible by NUM_MACS",
+            ),
+        ],
     )
-
-
-def test_relu_with_linear_quant_conversion(mocker, use_qat):
-    input_shape = (256, 32)
-    converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program")
-
-    # Run conversion
-    delegated_ep = to_quantized_edge_program(
-        LinearReLUModule(), input_shape, use_qat=use_qat
-    ).exported_program()
-
-    # Capture generated model
-    tflite_flatbuffers_model, _ = converter_spy.spy_return
-
-    # Capture converted program
-    edge_program: ExportedProgram = converter_spy.call_args.args[1]
-
-    input_data = (
-        (2 * np.random.random(input_shape).astype(np.float32) - 1) * 50
-    ).astype(np.int8)
-
-    # Make sure the `relu` was delegated.
-    assert graph_contains_any_of_ops(delegated_ep.graph, [ExecutorchDelegateCall])
-    assert not graph_contains_any_of_ops(delegated_ep.graph, [ReLU])
-
-    convert_run_compare(edge_program, input_data, tfl_model=tflite_flatbuffers_model)
-
-
-@pytest.mark.parametrize(
-    "input_shape",
-    [
-        pytest.param(
-            (3, 9, 7), id="num_channels not divisible by NUM_MACS, alone in partition"
-        ),
-    ],
-)
-def test_relu_conversion__unsupported(mocker, input_shape):
-    delegated_ep = to_quantized_edge_program(
-        ReLUModule(), input_shape
-    ).exported_program()
-
-    # Make sure the `relu` was NOT delegated.
-    assert not graph_contains_any_of_ops(delegated_ep.graph, [ExecutorchDelegateCall])
-    assert graph_contains_any_of_ops(delegated_ep.graph, [ReLU])
+    def test_relu_conversion__full_pipeline(self, mocker, model, input_shape):
+        model = model()  # Avoid model creation at import time
+        is_conv_module = not hasattr(model, "linear")
+
+        graph_verifier = DetailedGraphVerifier(
+            mocker=mocker,
+            expected_delegated_ops=(
+                {Convolution: 1, Relu: 1} if is_conv_module else {AddMm: 1, Relu: 1}
+            ),
+            expected_non_delegated_ops={},
+            ops_to_ignore=[
+                PermuteCopy,
+                ViewCopy,
+                QuantizePerTensor,
+                DequantizePerTensor,
+                DequantizePerChannel,
+            ],
+        )
+
+        lower_run_compare(
+            model,
+            input_shape,
+            graph_verifier,
+        )
+
+    @pytest.mark.parametrize(
+        "input_shape",
+        [
+            pytest.param(
+                (3, 9, 9),
+                id="num_channels not divisible by NUM_MACS, alone in partition",
+            ),
+            pytest.param(
+                (1, 17, 17),
+                id="num_channels not divisible by NUM_MACS, alone in partition",
+            ),
+        ],
+    )
+    def test_relu_conversion__non_delegated_with_old_flow(self, mocker, input_shape):
+        verifier = DetailedGraphVerifier(
+            mocker=mocker,
+            expected_delegated_ops={Relu: 1},
+            expected_non_delegated_ops={},
+        )
+
+        lower_run_compare(
+            ReLUModule(),
+            input_shape,
+            dlg_model_verifier=verifier,
+            dataset_creator=RandomDatasetCreator(low=-1, high=1),
+        )
+
+    @pytest.mark.parametrize(
+        "input_shape",
+        [
+            pytest.param(
+                (3, 9, 9),
+                id="num_channels not divisible by NUM_MACS, alone in partition",
+            ),
+            pytest.param(
+                (1, 17, 17),
+                id="num_channels not divisible by NUM_MACS, alone in partition",
+            ),
+        ],
+    )
+    def test_relu_conversion__no_delegated_node_when_noop(self, input_shape):
+        def generate_calibration_data(input_spec):
+            return [
+                # Generate inputs in range <0, 1> - ReLU degrades to identity
+                tuple([torch.rand(spec.shape, dtype=spec.dtype) for spec in input_spec])
+                for _ in range(4)
+            ]
+
+        # Run conversion
+        delegated_ep = to_quantized_edge_program(
+            ReLUModule(),
+            input_shape,
+            delegate_to_npu=True,
+            get_calibration_inputs_fn=generate_calibration_data,
+        ).exported_program()
+
+        # Ensure identity ReLU was not delegated
+        assert graph_contains_any_of_ops(delegated_ep.graph, [ReLU])
+        assert not graph_contains_any_of_ops(
+            delegated_ep.graph, [ExecutorchDelegateCall]
+        )
diff --git a/backends/nxp/tests/models.py b/backends/nxp/tests/models.py
index 0383734b4dd..7545dd940f2 100644
--- a/backends/nxp/tests/models.py
+++ b/backends/nxp/tests/models.py
@@ -194,9 +194,9 @@ def forward(self, x):
 
 
 class LinearModule(torch.nn.Module):
-    def __init__(self, bias: bool):
+    def __init__(self, bias: bool, in_features: int = 32, out_features: int = 16):
         super().__init__()
-        self.linear = torch.nn.Linear(32, 16, bias=bias)
+        self.linear = torch.nn.Linear(in_features, out_features, bias=bias)
 
     def forward(self, x):
         return self.linear(x)
diff --git a/backends/nxp/tests/ops_aliases.py b/backends/nxp/tests/ops_aliases.py
index 3106d32686b..0c5dc98cb6a 100644
--- a/backends/nxp/tests/ops_aliases.py
+++ b/backends/nxp/tests/ops_aliases.py
@@ -13,6 +13,7 @@
 
 Abs = exir_ops.edge.aten.abs.default
 AdaptiveAvgPool2D = exir_ops.edge.aten._adaptive_avg_pool2d.default
+AddMm = exir_ops.edge.aten.addmm.default
 AddTensor = exir_ops.edge.aten.add.Tensor
 AvgPool2D = exir_ops.edge.aten.avg_pool2d.default
 Bmm = exir_ops.edge.aten.bmm.default
@@ -29,6 +30,7 @@
 MaxPool2DWithIndices = exir_ops.edge.aten.max_pool2d_with_indices.default
 MeanDim = exir_ops.edge.aten.mean.dim
 MulTensor = exir_ops.edge.aten.mul.Tensor
+PermuteCopy = exir_ops.edge.aten.permute_copy.default
 QuantizePerChannel = exir_ops.edge.quantized_decomposed.quantize_per_channel.default
 QuantizePerTensor = exir_ops.edge.quantized_decomposed.quantize_per_tensor.default
 Relu = exir_ops.edge.aten.relu.default

From c2005f2b0aaa4e9b6aedb1877d7244e08fe21478 Mon Sep 17 00:00:00 2001
From: Simon Strycek <simon.strycek@nxp.com>
Date: Fri, 29 May 2026 09:32:35 +0200
Subject: [PATCH 5/8] NXP backend: Remove failing test due to ReLU now being
 delegated properly with new Neutron C flow

---
 .../node_converter/test_abs_converter.py      | 23 +------------
 .../node_converter/test_hardtanh_converter.py | 33 +------------------
 2 files changed, 2 insertions(+), 54 deletions(-)

diff --git a/backends/nxp/tests/ir/converter/node_converter/test_abs_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_abs_converter.py
index cf1965b8b13..ebe782c5a98 100644
--- a/backends/nxp/tests/ir/converter/node_converter/test_abs_converter.py
+++ b/backends/nxp/tests/ir/converter/node_converter/test_abs_converter.py
@@ -8,13 +8,12 @@
 # noinspection PyUnusedImports
 import pytest
 import torch
-
 from executorch.backends.nxp.tests.graph_verifier import DetailedGraphVerifier
 from executorch.backends.nxp.tests.nsys_testing import (
     lower_run_compare,
     RandomDatasetCreator,
 )
-from executorch.backends.nxp.tests.ops_aliases import Abs, Convolution, Relu
+from executorch.backends.nxp.tests.ops_aliases import Abs
 from executorch.backends.nxp.tests.use_qat import *  # noqa F403
 
 
@@ -99,23 +98,3 @@ def test__basic_nsys_inference__big(self, mocker):
             graph_verifier,
             dataset_creator,
         )
-
-    def test_basic_nsys_inference__with_conv(self, mocker):
-        input_shape = (2, 3, 6, 7)
-        in_channels = input_shape[1]
-        model = ConvBlocksWithAbsModule(conv_in_channels=in_channels)
-
-        # one `relu` ends up in the same delegated partition as `abs`
-        graph_verifier = DetailedGraphVerifier(
-            mocker,
-            expected_delegated_ops={Abs: 1, Relu: 1},
-            expected_non_delegated_ops={Relu: 1, Convolution: 2},
-        )
-
-        dataset_creator = self._get_dataset_creator()
-        lower_run_compare(
-            model,
-            input_shape,
-            graph_verifier,
-            dataset_creator,
-        )
diff --git a/backends/nxp/tests/ir/converter/node_converter/test_hardtanh_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_hardtanh_converter.py
index 3a3f5b957a8..67d3add978c 100644
--- a/backends/nxp/tests/ir/converter/node_converter/test_hardtanh_converter.py
+++ b/backends/nxp/tests/ir/converter/node_converter/test_hardtanh_converter.py
@@ -17,7 +17,7 @@
     ToChannelFirstPreprocess,
     ToChannelLastPreprocess,
 )
-from executorch.backends.nxp.tests.models import Conv2dWithActivation, HardTanhModule
+from executorch.backends.nxp.tests.models import Conv2dWithActivation
 from executorch.exir.dialects._ops import ops as exir_ops
 from torch.export import ExportedProgram
 from executorch.backends.nxp.tests.use_qat import *  # noqa F403
@@ -117,34 +117,3 @@ def test_custom_hardtanh_quant(
         input_data=input_data,
         atol=2.0,
     )
-
-
-@pytest.mark.parametrize(
-    "input_shape, activation_range",
-    [
-        pytest.param(
-            (3, 7, 15, 7),
-            (0, float("inf")),
-            id="activation range: Relu, num_channels not divisible by NUM_MACS, alone in partition",
-        ),
-        pytest.param(
-            (3, 7, 15, 7),
-            (0, 6),
-            id="activation range: Relu6, num_channels not divisible by NUM_MACS, alone in partition",
-        ),
-    ],
-)
-def test_hardtanh__unsupported(
-    input_shape: tuple[int],
-    activation_range: tuple[float, float],
-    use_qat: bool,
-):
-    min_val, max_val = activation_range
-    model = HardTanhModule(min_val, max_val)
-    delegated_ep = to_quantized_edge_program(
-        model, input_shape, use_qat=use_qat
-    ).exported_program()
-
-    # Make sure the `hardtanh` was NOT delegated.
-    assert not graph_contains_any_of_ops(delegated_ep.graph, [ExecutorchDelegateCall])
-    assert graph_contains_any_of_ops(delegated_ep.graph, [HardTanh, HardTanh_])

From ade8d31b9b6a84e5c54480b7cbd5208785ad7af6 Mon Sep 17 00:00:00 2001
From: Simon Strycek <simon.strycek@nxp.com>
Date: Wed, 10 Jun 2026 10:34:24 +0200
Subject: [PATCH 6/8] NXP backend: Prepare Clamp operator for re-use in
 HardTanhConverter

---
 .../ops_converters/clamp_converter.py         | 45 ++++++++++---------
 1 file changed, 24 insertions(+), 21 deletions(-)

diff --git a/backends/nxp/backend/ir/converter/node_converters/ops_converters/clamp_converter.py b/backends/nxp/backend/ir/converter/node_converters/ops_converters/clamp_converter.py
index 25cf6074701..23bac96f51c 100644
--- a/backends/nxp/backend/ir/converter/node_converters/ops_converters/clamp_converter.py
+++ b/backends/nxp/backend/ir/converter/node_converters/ops_converters/clamp_converter.py
@@ -42,17 +42,6 @@
 from torch.nn import Parameter
 
 
-def _is_convertible_to_relu(node):
-    bounds = ClampConverter._get_clamp_bounds(node)
-    bounds = tuple(v if v is not None and math.isfinite(v) else None for v in bounds)
-
-    # Some specific bounds can be replaced with single op ReLU.
-    if bounds not in ClampConverter.RELU_COMPATIBLE_BOUNDS.values():
-        return False
-
-    return True
-
-
 class ClampConverter(NodeConverter):
     RELU_COMPATIBLE_BOUNDS = {
         "ReluN1To1": (-1, 1),
@@ -70,12 +59,25 @@ class ClampConverter(NodeConverter):
 
     # noinspection PyShadowingBuiltins
     @staticmethod
-    def _get_clamp_bounds(clamp_node: Node) -> tuple[float | None, float | None]:
+    def _get_bounds(node: Node) -> tuple[float | None, float | None]:
         """Extract min and max bounds from `aten.clamp.default` node."""
-        min = try_get_arg(clamp_node, 1)
-        max = try_get_arg(clamp_node, 2)
+        min = try_get_arg(node, 1)
+        max = try_get_arg(node, 2)
         return min, max
 
+    @classmethod
+    def _is_convertible_to_relu(cls, node):
+        bounds = cls._get_bounds(node)
+        bounds = tuple(
+            v if v is not None and math.isfinite(v) else None for v in bounds
+        )
+
+        # Some specific bounds can be replaced with single op ReLU.
+        if bounds not in cls.RELU_COMPATIBLE_BOUNDS.values():
+            return False
+
+        return True
+
     @staticmethod
     def _is_supported_in_IR(
         node: Node,
@@ -100,20 +102,21 @@ def _io_quant_is_same(node: Node):
         dq_params = dequant.args[1:]
         return all(q == dq for q, dq in zip(q_params, dq_params))
 
-    @staticmethod
+    @classmethod
     def _is_supported_on_target(
+        cls,
         node: Node,
         neutron_target_spec: NeutronTargetSpec,
         parameters_mapping: dict[str, Parameter],
         custom_delegation_options: CustomDelegationOptions,
     ) -> bool:
-        relu_compatible = _is_convertible_to_relu(node)
-        bounds = ClampConverter._get_clamp_bounds(node)
+        relu_compatible = cls._is_convertible_to_relu(node)
+        bounds = cls._get_bounds(node)
 
         if all(b is None or math.isinf(b) for b in bounds):
             return False
 
-        io_quant_consistent = ClampConverter._io_quant_is_same(node)
+        io_quant_consistent = cls._io_quant_is_same(node)
         quant_supported = NodeConverter.uses_quantization_type_for_io(
             node,
             supported_types=[torch.int8, torch.uint8],
@@ -138,7 +141,7 @@ def supports_partitioning_result(
         neutron_target_spec: NeutronTargetSpec,
         parameters_mapping: dict[str, Parameter],
     ) -> bool:
-        bounds = cls._get_clamp_bounds(node)
+        bounds = cls._get_bounds(node)
 
         # Neutron cannot delegate a partition where ReLU or ReLU6 is the only operator
         # and at the same time the node does not satisfy delegation requirements.
@@ -167,9 +170,9 @@ def convert(self, node: Node):
             ) -> Tensor
         """
         self.assert_convertible(node)
-        to_relu = _is_convertible_to_relu(node)
+        to_relu = self._is_convertible_to_relu(node)
 
-        bounds = self._get_clamp_bounds(node)
+        bounds = self._get_bounds(node)
         bounds = tuple(
             v if v is not None and math.isfinite(v) else None for v in bounds
         )

From cb37aa01b485ea111000d6875abadfdb7db7849b Mon Sep 17 00:00:00 2001
From: Simon Strycek <simon.strycek@nxp.com>
Date: Wed, 10 Jun 2026 10:35:31 +0200
Subject: [PATCH 7/8] NXP backend: Extend HardTanh support to match new Neutron
 C flow

---
 .../ops_converters/hardtanh_converter.py      | 85 ++-----------------
 backends/nxp/quantizer/patterns.py            | 34 ++++----
 2 files changed, 21 insertions(+), 98 deletions(-)

diff --git a/backends/nxp/backend/ir/converter/node_converters/ops_converters/hardtanh_converter.py b/backends/nxp/backend/ir/converter/node_converters/ops_converters/hardtanh_converter.py
index f67851895c2..0159143c5f7 100644
--- a/backends/nxp/backend/ir/converter/node_converters/ops_converters/hardtanh_converter.py
+++ b/backends/nxp/backend/ir/converter/node_converters/ops_converters/hardtanh_converter.py
@@ -3,43 +3,16 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
-from executorch.backends.nxp.backend.ir.converter.node_converter import (
-    CustomDelegationOptions,
-    is_not_qdq_node,
-    NodeConverter,
-    Partition,
-)
-from executorch.backends.nxp.backend.ir.lib.tflite.BuiltinOperator import (
-    BuiltinOperator,
-)
-from executorch.backends.nxp.backend.neutron_operator_support import (
-    activation_supported_on_target,
+
+from executorch.backends.nxp.backend.ir.converter.node_converters.ops_converters.clamp_converter import (
+    ClampConverter,
 )
-from executorch.backends.nxp.backend.neutron_target_spec import NeutronTargetSpec
 from torch.fx import Node
-from torch.nn import Parameter
-
 
-class HardTanhConverter(NodeConverter):
-
-    # Maps possible input parameters of HardTanh to equivalent ReLU-based operators supported by TFLite.
-    SUPPORTED_MODES_MAP = {
-        (0.0, 6.0): BuiltinOperator.RELU6,
-        (-1.0, 1.0): BuiltinOperator.RELU_N1_TO_1,
-        (0.0, 1.0): BuiltinOperator.RELU_0_TO_1,
-        (0.0, float("inf")): BuiltinOperator.RELU,
-    }
-
-    # Maps possible modes of HardTanh to equivalent ReLU bounds.
-    SUPPORTED_BOUNDS_MAP = {
-        "ReluN1To1": (-1.0, 1.0),
-        "Relu0To1": (0.0, 1.0),
-        "Relu6": (0.0, 6.0),
-        "Relu": (0.0, float("inf")),
-    }
 
+class HardTanhConverter(ClampConverter):
     @staticmethod
-    def _get_hardtanh_bounds(node: Node) -> tuple[float, float]:
+    def _get_bounds(node: Node) -> tuple[float | None, float | None]:
         args = node.args
 
         match len(args):
@@ -62,51 +35,3 @@ def _get_hardtanh_bounds(node: Node) -> tuple[float, float]:
                 )
 
         return min_val, max_val
-
-    @staticmethod
-    def _is_supported_in_IR(
-        node: Node,
-        parameters_mapping: dict[str, Parameter],
-        custom_delegation_options: CustomDelegationOptions,
-    ) -> bool:
-        bounds = HardTanhConverter._get_hardtanh_bounds(node)
-        return bounds in HardTanhConverter.SUPPORTED_MODES_MAP
-
-    @classmethod
-    def supports_partitioning_result(
-        cls,
-        node: Node,
-        partition_list: list[Partition],
-        custom_delegation_options: CustomDelegationOptions,
-        neutron_target_spec: NeutronTargetSpec,
-        parameters_mapping: dict[str, Parameter],
-    ) -> bool:
-        bounds = HardTanhConverter._get_hardtanh_bounds(node)
-
-        # Neutron cannot delegate a partition where ReLU or ReLU6 is the only operator
-        # and at the same time the node does not satisfy delegation requirements.
-        # In contrast, ReLUN1To1 and ReLU0To1 are supported and delegated successfuly.
-        if bounds in [
-            cls.SUPPORTED_BOUNDS_MAP["Relu"],
-            cls.SUPPORTED_BOUNDS_MAP["Relu6"],
-        ]:
-            is_alone_in_partition = cls.is_node_alone_in_partition(
-                node, partition_list, filter_fn=is_not_qdq_node
-            )
-            if is_alone_in_partition:
-                return activation_supported_on_target(node)
-
-        return True
-
-    def convert(self, node: Node):
-        """Convert 'aten::hardtanh' to its supported ReLU equivalent."""
-        self.assert_convertible(node)
-
-        t_op = self._create_tflite_op_with_io_tensors(node)
-
-        bounds = HardTanhConverter._get_hardtanh_bounds(node)
-
-        op = self.SUPPORTED_MODES_MAP[bounds]
-        t_op.opcode_index = self.builder.op_code_index_for_op_type(op)
-
-        self.builder.append_operators([t_op])
diff --git a/backends/nxp/quantizer/patterns.py b/backends/nxp/quantizer/patterns.py
index 5d72a206fec..a72e56dd74f 100644
--- a/backends/nxp/quantizer/patterns.py
+++ b/backends/nxp/quantizer/patterns.py
@@ -11,7 +11,10 @@
 
 import torch
 from executorch.backends.nxp.backend.ir.converter.node_converters.ops_converters.clamp_converter import (
-    _is_convertible_to_relu,
+    ClampConverter,
+)
+from executorch.backends.nxp.backend.ir.converter.node_converters.ops_converters.hardtanh_converter import (
+    HardTanhConverter,
 )
 from executorch.backends.nxp.quantizer.utils import (
     get_bias_qparams,
@@ -438,7 +441,7 @@ def get_anchors(
     ) -> PartitionAnchors | None:
         node = fused_partition[0].nodes[-1]
 
-        if not _is_convertible_to_relu(node):
+        if not ClampConverter._is_convertible_to_relu(node):
             return SharedSpecPattern.get_shared_spec_anchors(gm, fused_partition)
         else:
             return SingleInputBasicPattern.get_single_input_anchors(gm, fused_partition)
@@ -726,11 +729,21 @@ class HardTanhPattern(SingleInputBasicPattern):
     def partition_types(self):
         return [torch.ops.aten.hardtanh.default]
 
+    def get_anchors(
+        self, gm: fx.GraphModule, fused_partition: list[fx.GraphModule]
+    ) -> PartitionAnchors | None:
+        node = fused_partition[0].nodes[-1]
+
+        if not HardTanhConverter._is_convertible_to_relu(node):
+            return SharedSpecPattern.get_shared_spec_anchors(gm, fused_partition)
+        else:
+            return SingleInputBasicPattern.get_single_input_anchors(gm, fused_partition)
+
     def replacement_op(self):
         raise AssertionError()
 
 
-class HardTanhInPlacePattern(SingleInputBasicPattern):
+class HardTanhInPlacePattern(HardTanhPattern):
     """
     Quantizer for HardTanh operator with param inplace=True.
     """
@@ -738,21 +751,6 @@ class HardTanhInPlacePattern(SingleInputBasicPattern):
     def partition_types(self):
         return [torch.ops.aten.hardtanh_.default]
 
-    def get_anchors(
-        self, gm: fx.GraphModule, fused_partition: list[fx.GraphModule]
-    ) -> PartitionAnchors | None:
-        node = fused_partition[0].nodes[-1]
-
-        return PartitionAnchors(
-            inputs=[(node, NodeArgsIdx(0))],
-            weights=[],
-            biases=[],
-            output=[(node,)],
-        )
-
-    def replacement_op(self):
-        raise AssertionError()
-
 
 class LeakyReluPattern(SingleInputBasicPattern):
     """Quantizer for the `aten.leaky_relu.default` operator."""

From f86214126dec76cb74c1819f758ccda2a8e3824c Mon Sep 17 00:00:00 2001
From: Simon Strycek <simon.strycek@nxp.com>
Date: Wed, 10 Jun 2026 10:36:55 +0200
Subject: [PATCH 8/8] NXP backend: Adjust HardTanh test to match new support

---
 .../node_converter/test_hardtanh_converter.py | 202 ++++++++++--------
 1 file changed, 108 insertions(+), 94 deletions(-)

diff --git a/backends/nxp/tests/ir/converter/node_converter/test_hardtanh_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_hardtanh_converter.py
index 67d3add978c..1283de2ec25 100644
--- a/backends/nxp/tests/ir/converter/node_converter/test_hardtanh_converter.py
+++ b/backends/nxp/tests/ir/converter/node_converter/test_hardtanh_converter.py
@@ -7,19 +7,12 @@
 import pytest
 import torch
 
-from executorch.backends.nxp.backend.edge_program_converter import (
-    EdgeProgramToIRConverter,
-)
-from executorch.backends.nxp.tests.executorch_pipeline import to_quantized_edge_program
-from executorch.backends.nxp.tests.executors import (
-    convert_run_compare,
-    graph_contains_any_of_ops,
-    ToChannelFirstPreprocess,
-    ToChannelLastPreprocess,
-)
-from executorch.backends.nxp.tests.models import Conv2dWithActivation
+from executorch.backends.nxp.tests.dataset_creator import RandomDatasetCreator
+from executorch.backends.nxp.tests.graph_verifier import DetailedGraphVerifier
+from executorch.backends.nxp.tests.models import Conv2dWithActivation, HardTanhModule
+from executorch.backends.nxp.tests.nsys_testing import lower_run_compare
+from executorch.backends.nxp.tests.ops_aliases import Convolution
 from executorch.exir.dialects._ops import ops as exir_ops
-from torch.export import ExportedProgram
 from executorch.backends.nxp.tests.use_qat import *  # noqa F403
 
 
@@ -31,89 +24,110 @@ def reseed_model_per_test_run():
 
 ExecutorchDelegateCall = torch.ops.higher_order.executorch_call_delegate
 HardTanh = exir_ops.edge.aten.hardtanh.default
-HardTanh_ = exir_ops.edge.aten.hardtanh_.default
 
 
-@pytest.mark.parametrize("input_shape", [(1, 3, 128, 128)])
-@pytest.mark.parametrize("inplace", [True, False])
-def test_relu6_quant(mocker, input_shape: tuple[int], inplace: bool, use_qat: bool):
-    # The torch.nn.Relu6 inherits from torch.nn.Hardtanh, and hence represented as HardTanh in ATen.
-    # Testing the hardtanh originated from torch.nn.Relu6 op.
-    model = Conv2dWithActivation(
-        activation=torch.nn.ReLU6(inplace=inplace), in_channels=input_shape[1]
+class TestHardTanhNewNeutronFlow:
+    @pytest.mark.parametrize("input_shape", [(1, 3, 128, 128)])
+    @pytest.mark.parametrize("inplace", [True, False])
+    def test_relu6_quant(
+        self, mocker, input_shape: tuple[int], inplace: bool, use_qat: bool
+    ):
+        # The torch.nn.Relu6 inherits from torch.nn.Hardtanh, and hence represented as HardTanh in ATen.
+        # Testing the hardtanh originated from torch.nn.Relu6 op.
+        model = Conv2dWithActivation(
+            activation=torch.nn.ReLU6(inplace=inplace), in_channels=input_shape[1]
+        )
+
+        graph_verifier = DetailedGraphVerifier(
+            mocker,
+            expected_delegated_ops={HardTanh: 1, Convolution: 1},
+            expected_non_delegated_ops={},
+        )
+
+        lower_run_compare(
+            model=model,
+            input_spec=input_shape,
+            dlg_model_verifier=graph_verifier,
+            use_qat=use_qat,
+        )
+
+    @pytest.mark.parametrize("input_shape", [(1, 3, 16, 16), (1, 3, 32, 32)])
+    @pytest.mark.parametrize(
+        "activation_range",
+        [
+            (0.0, 6.0),
+            (-1.0, 1.0),
+            (0.0, 1.0),
+            (0.0, float("inf")),
+            (0, 6),
+            (-1, 1),
+            (0, 1),
+            (0, float("inf")),
+        ],
     )
-
-    converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program")
-
-    quantized_program = to_quantized_edge_program(
-        model, input_shape, use_qat=use_qat, use_neutron_for_format_conversion=False
-    ).exported_program()
-
-    tflite_flatbuffers_model, io_formats = converter_spy.spy_return
-    exported_program: ExportedProgram = converter_spy.call_args.args[1]
-
-    assert not graph_contains_any_of_ops(quantized_program.graph, [HardTanh, HardTanh_])
-    assert graph_contains_any_of_ops(quantized_program.graph, [ExecutorchDelegateCall])
-
-    input_data = (np.random.random(input_shape) * 50).astype(np.int8)
-    convert_run_compare(
-        exported_program,
-        tfl_model=tflite_flatbuffers_model,
-        tflite_input_preprocess=ToChannelLastPreprocess(),
-        tflite_output_preprocess=ToChannelFirstPreprocess(),
-        input_data=input_data,
-        atol=2.0,
-    )
-
-
-@pytest.mark.parametrize("input_shape", [(1, 3, 16, 16), (1, 3, 32, 32)])
-@pytest.mark.parametrize(
-    "activation_range",
-    [
-        (0.0, 6.0),
-        (-1.0, 1.0),
-        (0.0, 1.0),
-        (0.0, float("inf")),
-        (0, 6),
-        (-1, 1),
-        (0, 1),
-        (0, float("inf")),
-    ],
-)
-@pytest.mark.parametrize("inplace", [True, False])
-def test_custom_hardtanh_quant(
-    mocker,
-    input_shape: tuple[int],
-    activation_range: tuple[float, float],
-    inplace: bool,
-    use_qat: bool,
-):
-    # TODO(13063): This test suffers from non-ideal testing random quantization, because we always use range <0,1>.
-    #  We should update (decrease atol) when the Conv/Linear + Activation fuse at quantization is in place.
-    min_val, max_val = activation_range
-    model = Conv2dWithActivation(
-        activation=torch.nn.Hardtanh(min_val=min_val, max_val=max_val, inplace=inplace),
-        in_channels=input_shape[1],
-    )
-
-    converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program")
-
-    quantized_program = to_quantized_edge_program(
-        model, input_shape, use_qat=use_qat, use_neutron_for_format_conversion=False
-    ).exported_program()
-
-    tflite_flatbuffers_model, io_formats = converter_spy.spy_return
-    exported_program: ExportedProgram = converter_spy.call_args.args[1]
-
-    assert not graph_contains_any_of_ops(quantized_program.graph, [HardTanh, HardTanh_])
-    assert graph_contains_any_of_ops(quantized_program.graph, [ExecutorchDelegateCall])
-
-    input_data = (np.random.random(input_shape) * 50).astype(np.int8)
-    convert_run_compare(
-        exported_program,
-        tfl_model=tflite_flatbuffers_model,
-        tflite_input_preprocess=ToChannelLastPreprocess(),
-        tflite_output_preprocess=ToChannelFirstPreprocess(),
-        input_data=input_data,
-        atol=2.0,
+    @pytest.mark.parametrize("inplace", [True, False])
+    def test_custom_hardtanh_quant(
+        self,
+        mocker,
+        input_shape: tuple[int],
+        activation_range: tuple[float, float],
+        inplace: bool,
+        use_qat: bool,
+    ):
+        min_val, max_val = activation_range
+        model = Conv2dWithActivation(
+            activation=torch.nn.Hardtanh(
+                min_val=min_val, max_val=max_val, inplace=inplace
+            ),
+            in_channels=input_shape[1],
+        )
+
+        graph_verifier = DetailedGraphVerifier(
+            mocker,
+            expected_delegated_ops={HardTanh: 1, Convolution: 1},
+            expected_non_delegated_ops={},
+        )
+
+        lower_run_compare(
+            model=model,
+            input_spec=input_shape,
+            dlg_model_verifier=graph_verifier,
+            use_qat=use_qat,
+        )
+
+    @pytest.mark.parametrize(
+        "input_shape, activation_range",
+        [
+            pytest.param(
+                (3, 7, 15, 7),
+                (0, float("inf")),
+                id="activation range: Relu, num_channels not divisible by NUM_MACS, alone in partition",
+            ),
+            pytest.param(
+                (3, 7, 15, 7),
+                (0, 6),
+                id="activation range: Relu6, num_channels not divisible by NUM_MACS, alone in partition",
+            ),
+        ],
     )
+    def test_hardtanh__old_flow_unsupported(
+        self,
+        mocker,
+        input_shape: tuple[int],
+        activation_range: tuple[float, float],
+        use_qat: bool,
+    ):
+        min_val, max_val = activation_range
+        model = HardTanhModule(min_val, max_val)
+
+        graph_verifier = DetailedGraphVerifier(
+            mocker, expected_delegated_ops={HardTanh: 1}, expected_non_delegated_ops={}
+        )
+
+        lower_run_compare(
+            model=model,
+            input_spec=input_shape,
+            dlg_model_verifier=graph_verifier,
+            dataset_creator=RandomDatasetCreator(low=-1, high=1),
+            use_qat=use_qat,
+        )