Skip to content
Draft
80 changes: 80 additions & 0 deletions backends/nxp/backend/graph_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,13 @@
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

import numpy as np
import torch
from executorch.backends.nxp.backend.ir.converter.conversion.translator import (
torch_type_to_numpy_type,
)
from executorch.backends.nxp.backend.ir.converter.node_converter import _is_dequant_node
from executorch.backends.nxp.backend.ir.converter.quantization_utils import quantize
from executorch.exir.dialects._ops import ops as exir_ops
from torch.fx import Node

Expand Down Expand Up @@ -47,3 +53,77 @@ def get_output_shape(node: Node) -> tuple[torch.Size] | torch.Size | None:
return tuple([v.shape for v in val])

return None


def is_clamp_preserved_under_quantization(
node: Node, min_val: int = 0, max_val: int | None = None
) -> bool:
"""
Checks if Clamp/ReLU/HardTanh is preserved under quantization and did
not collapse into either identity or constant.

Valid quant. bounds - Quant. bounds -
one hinge is preserved Collapse to identity
│ │ │ │
│ ▼/¯¯¯¯¯ ReLU6(x) │ ▼/¯¯¯¯¯ ReLU6(x)
│ / │ /
│ / ▼/
▼ / /
¯¯¯¯¯ Hinge ¯¯¯¯¯ Hinge

Args:
node: Node to check whether is preserved
min_val: Lower bound (hinge) of the operator (eg. 0 for ReLU)
max_val: Upper bound of the operator (eg. 6 for ReLU6 or None for ReLU)
"""

q_node = node.args[0]

if not _is_dequant_node(q_node):
return False

if len(q_node.args) == 6:
# per-tensor
_, scale, zp, quant_min, quant_max, q_type = q_node.args
else:
# per-channel
_, scale, zp, quant_min, quant_max, _, q_type = q_node.args

quant_min = np.iinfo(q_type).min if quant_min is None else quant_min
quant_max = np.iinfo(q_type).max if quant_max is None else quant_max

q_type = torch_type_to_numpy_type(q_type).type
quantized_min_val = quantize(
value=min_val,
zero_point=zp,
scale=scale,
quant_min=quant_min,
quant_max=quant_max,
dtype=q_type,
)

if max_val is not None:
quantized_max_val = quantize(
value=max_val,
zero_point=zp,
scale=scale,
quant_min=quant_min,
quant_max=quant_max,
dtype=q_type,
)
return (
# If at least one bound is inside the quantization range
# the hinge of the ReLU/HardTanh is preserved and therefore does not
# collapse to identity or constant.
(
np.all(quant_min < quantized_min_val)
or np.all(quantized_max_val < quant_max)
)
# When both operator bounds are outside the quantization range
# the operator collapses into constant value (eg. 0 or 6 for ReLU6).
and not np.all(quant_max < quantized_min_val)
and not np.all(quant_min > quantized_max_val)
)

# Ensure ReLU/HardTanh hinge is preserved.
return quant_min < quantized_min_val < quant_max
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@
import numpy as np
import torch
from executorch.backends.nxp.backend.edge_helper import try_get_arg
from executorch.backends.nxp.backend.graph_utils import (
is_clamp_preserved_under_quantization,
)
from executorch.backends.nxp.backend.ir.converter.conversion.translator import (
torch_type_to_numpy_type,
)
Expand All @@ -20,6 +23,7 @@
)
from executorch.backends.nxp.backend.ir.converter.quantization_utils import (
propagate_quantization,
quantize,
)
from executorch.backends.nxp.backend.ir.lib.tflite.BuiltinOperator import (
BuiltinOperator,
Expand All @@ -38,17 +42,6 @@
from torch.nn import Parameter


def _is_convertible_to_relu(node):
bounds = ClampConverter._get_clamp_bounds(node)
bounds = tuple(v if v is not None and math.isfinite(v) else None for v in bounds)

# Some specific bounds can be replaced with single op ReLU.
if bounds not in ClampConverter.RELU_COMPATIBLE_BOUNDS.values():
return False

return True


class ClampConverter(NodeConverter):
RELU_COMPATIBLE_BOUNDS = {
"ReluN1To1": (-1, 1),
Expand All @@ -66,12 +59,25 @@ class ClampConverter(NodeConverter):

# noinspection PyShadowingBuiltins
@staticmethod
def _get_clamp_bounds(clamp_node: Node) -> tuple[float | None, float | None]:
def _get_bounds(node: Node) -> tuple[float | None, float | None]:
"""Extract min and max bounds from `aten.clamp.default` node."""
min = try_get_arg(clamp_node, 1)
max = try_get_arg(clamp_node, 2)
min = try_get_arg(node, 1)
max = try_get_arg(node, 2)
return min, max

@classmethod
def _is_convertible_to_relu(cls, node):
bounds = cls._get_bounds(node)
bounds = tuple(
v if v is not None and math.isfinite(v) else None for v in bounds
)

# Some specific bounds can be replaced with single op ReLU.
if bounds not in cls.RELU_COMPATIBLE_BOUNDS.values():
return False

return True

@staticmethod
def _is_supported_in_IR(
node: Node,
Expand All @@ -96,70 +102,63 @@ def _io_quant_is_same(node: Node):
dq_params = dequant.args[1:]
return all(q == dq for q, dq in zip(q_params, dq_params))

@staticmethod
@classmethod
def _is_supported_on_target(
cls,
node: Node,
neutron_target_spec: NeutronTargetSpec,
parameters_mapping: dict[str, Parameter],
custom_delegation_options: CustomDelegationOptions,
) -> bool:
relu_compatible = _is_convertible_to_relu(node)
bounds = ClampConverter._get_clamp_bounds(node)
relu_compatible = cls._is_convertible_to_relu(node)
bounds = cls._get_bounds(node)

if all(b is None or math.isinf(b) for b in bounds):
return False

io_quant_consistent = ClampConverter._io_quant_is_same(node)
io_quant_consistent = cls._io_quant_is_same(node)
quant_supported = NodeConverter.uses_quantization_type_for_io(
node,
supported_types=[torch.int8, torch.uint8],
input_indices=[0],
output_indices=[0],
)

# We either convert to ReLU -> SingleInputQuantization pattern
# or we convert to Min/Max, which requires same quantization on
# both input and output.
return (relu_compatible | io_quant_consistent) and quant_supported
if relu_compatible and activation_supported_on_target(
node,
):
return True

# We convert to Min/Max, which requires same quantization for both input and output.
return io_quant_consistent and quant_supported

@classmethod
def supports_partitioning_result(
cls,
node: Node,
partition_list: list[Partition],
_: CustomDelegationOptions,
custom_delegation_options: CustomDelegationOptions,
neutron_target_spec: NeutronTargetSpec,
parameters_mapping: dict[str, Parameter],
) -> bool:
bounds = cls._get_clamp_bounds(node)
bounds = cls._get_bounds(node)

# Neutron cannot delegate a partition where ReLU or ReLU6 is the only operator
# and at the same time the node does not satisfy delegation requirements.
# In contrast, ReLUN1To1 and ReLU0To1 are supported and delegated successfuly.
if bounds in [
cls.RELU_COMPATIBLE_BOUNDS["Relu"],
cls.RELU_COMPATIBLE_BOUNDS["Relu6"],
]:
if bounds in cls.RELU_COMPATIBLE_BOUNDS.values():
is_alone_in_partition = cls.is_node_alone_in_partition(
node, partition_list, filter_fn=is_not_qdq_node
)
if is_alone_in_partition:
return activation_supported_on_target(node, neutron_target_spec)
return is_clamp_preserved_under_quantization(
node,
min_val=bounds[0],
max_val=bounds[1],
)

return True

@staticmethod
def _quantize_value(
value: int,
zp: int,
scale: float,
quant_min: int,
quant_max: int,
dtype: type = np.int8,
) -> np.integer:
rescaled_value = round(value / scale) + zp
return dtype(np.clip(rescaled_value, quant_min, quant_max))

def convert(self, node: Node):
"""Convert the `aten.clamp.default` operator to either
Neutron IR `Relu*` operator or combination of `Min` and `Max`.
Expand All @@ -171,9 +170,9 @@ def convert(self, node: Node):
) -> Tensor
"""
self.assert_convertible(node)
to_relu = _is_convertible_to_relu(node)
to_relu = self._is_convertible_to_relu(node)

bounds = self._get_clamp_bounds(node)
bounds = self._get_bounds(node)
bounds = tuple(
v if v is not None and math.isfinite(v) else None for v in bounds
)
Expand Down Expand Up @@ -202,9 +201,9 @@ def convert(self, node: Node):
min_value, max_value = bounds

if min_value is not None:
min_value = self._quantize_value(
min_value = quantize(
value=min_value,
zp=zp,
zero_point=zp,
scale=scale,
quant_min=quant_min,
quant_max=quant_max,
Expand All @@ -216,9 +215,9 @@ def convert(self, node: Node):
propagate_quantization(x, min_tensor)

if max_value is not None:
max_value = self._quantize_value(
max_value = quantize(
value=max_value,
zp=zp,
zero_point=zp,
scale=scale,
quant_min=quant_min,
quant_max=quant_max,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,43 +3,16 @@
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

from executorch.backends.nxp.backend.ir.converter.node_converter import (
CustomDelegationOptions,
is_not_qdq_node,
NodeConverter,
Partition,
)
from executorch.backends.nxp.backend.ir.lib.tflite.BuiltinOperator import (
BuiltinOperator,
)
from executorch.backends.nxp.backend.neutron_operator_support import (
activation_supported_on_target,

from executorch.backends.nxp.backend.ir.converter.node_converters.ops_converters.clamp_converter import (
ClampConverter,
)
from executorch.backends.nxp.backend.neutron_target_spec import NeutronTargetSpec
from torch.fx import Node
from torch.nn import Parameter


class HardTanhConverter(NodeConverter):

# Maps possible input parameters of HardTanh to equivalent ReLU-based operators supported by TFLite.
SUPPORTED_MODES_MAP = {
(0.0, 6.0): BuiltinOperator.RELU6,
(-1.0, 1.0): BuiltinOperator.RELU_N1_TO_1,
(0.0, 1.0): BuiltinOperator.RELU_0_TO_1,
(0.0, float("inf")): BuiltinOperator.RELU,
}

# Maps possible modes of HardTanh to equivalent ReLU bounds.
SUPPORTED_BOUNDS_MAP = {
"ReluN1To1": (-1.0, 1.0),
"Relu0To1": (0.0, 1.0),
"Relu6": (0.0, 6.0),
"Relu": (0.0, float("inf")),
}

class HardTanhConverter(ClampConverter):
@staticmethod
def _get_hardtanh_bounds(node: Node) -> tuple[float, float]:
def _get_bounds(node: Node) -> tuple[float | None, float | None]:
args = node.args

match len(args):
Expand All @@ -62,51 +35,3 @@ def _get_hardtanh_bounds(node: Node) -> tuple[float, float]:
)

return min_val, max_val

@staticmethod
def _is_supported_in_IR(
node: Node,
parameters_mapping: dict[str, Parameter],
custom_delegation_options: CustomDelegationOptions,
) -> bool:
bounds = HardTanhConverter._get_hardtanh_bounds(node)
return bounds in HardTanhConverter.SUPPORTED_MODES_MAP

@classmethod
def supports_partitioning_result(
cls,
node: Node,
partition_list: list[Partition],
custom_delegation_options: CustomDelegationOptions,
neutron_target_spec: NeutronTargetSpec,
parameters_mapping: dict[str, Parameter],
) -> bool:
bounds = HardTanhConverter._get_hardtanh_bounds(node)

# Neutron cannot delegate a partition where ReLU or ReLU6 is the only operator
# and at the same time the node does not satisfy delegation requirements.
# In contrast, ReLUN1To1 and ReLU0To1 are supported and delegated successfuly.
if bounds in [
cls.SUPPORTED_BOUNDS_MAP["Relu"],
cls.SUPPORTED_BOUNDS_MAP["Relu6"],
]:
is_alone_in_partition = cls.is_node_alone_in_partition(
node, partition_list, filter_fn=is_not_qdq_node
)
if is_alone_in_partition:
return activation_supported_on_target(node, neutron_target_spec)

return True

def convert(self, node: Node):
"""Convert 'aten::hardtanh' to its supported ReLU equivalent."""
self.assert_convertible(node)

t_op = self._create_tflite_op_with_io_tensors(node)

bounds = HardTanhConverter._get_hardtanh_bounds(node)

op = self.SUPPORTED_MODES_MAP[bounds]
t_op.opcode_index = self.builder.op_code_index_for_op_type(op)

self.builder.append_operators([t_op])
Loading
Loading