Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file added __init__.py
Empty file.
Empty file added backends/nxp/__init__.py
Empty file.
25 changes: 18 additions & 7 deletions backends/nxp/quantizer/utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# Copyright 2024-2025 NXP
# Copyright 2024-2026 NXP
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
Expand All @@ -10,7 +10,7 @@
import itertools
from collections import OrderedDict
from collections.abc import Iterable
from typing import Any, Dict, List, Tuple, Type
from typing import Any, Callable, Dict, List, Tuple, Type

import torch
from executorch.backends.nxp.aten_passes.fuse_batch_norm_with_linear_pass import (
Expand All @@ -30,8 +30,10 @@
check_subgraphs_connected,
SourcePartition,
)

from torchao.quantization.pt2e import (
move_exported_model_to_eval,
move_exported_model_to_train,
ObserverOrFakeQuantize,
)
from torchao.quantization.pt2e.quantize_pt2e import (
Expand Down Expand Up @@ -176,16 +178,17 @@ def calibrate_and_quantize(
calibration_inputs: Iterable[tuple[torch.Tensor, ...]],
quantizer: Quantizer,
is_qat: bool = False,
train_fn: Callable[[torch.fx.GraphModule], None] | None = None,
) -> fx.GraphModule:
"""Quantize the provided model.

:param model: Aten model (or it's GraphModule representation) to quantize.
:param calibration_inputs: Either a tuple of calibration input tensors where each element corresponds to a model
input. Or an iterator over such tuples.
:param calibration_inputs: An iterator over tuples of calibration input tensors where each tensor corresponds to a
model input.
:param quantizer: Quantizer to use.
:param is_qat: Whether quantization is done using Quantization Aware Training (QAT) or not.
Note: In QAT mode, training is not performed. Only calibration (in eval mode) is done.

:param train_fn: Optional training function to be called during QAT.
:return: Quantized GraphModule.
"""

Expand All @@ -195,12 +198,20 @@ def calibrate_and_quantize(
if is_qat:
m = prepare_qat_pt2e(model, quantizer)
m = AddSimulatedLinearBatchNormFusionQATPass()(m).graph_module

if train_fn:
m = move_exported_model_to_train(m)
train_fn(m)

m = move_exported_model_to_eval(m)
m = RemoveSimulatedLinearBatchNormFusionQATPass()(m).graph_module
m = FuseBatchNormWithLinearPass()(m).graph_module
else:
m = prepare_pt2e(model, quantizer)

for data in calibration_inputs:
m(*data)
if not is_qat or (is_qat and not train_fn):
for data in calibration_inputs:
m(*data)

if is_qat:
m = RemoveSimulatedLinearBatchNormFusionQATPass()(m).graph_module
Expand Down
Empty file.
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@

NSYS_PATH = pathlib.Path(shutil.which("nsys"))
NSYS_CONFIG_PATH = os.path.join(
PROJECT_DIR, "backends", "nxp", "tests_models", "neutron-imxrt700.ini"
PROJECT_DIR, "backends", "nxp", "tests", "neutron-imxrt700.ini"
)
NSYS_FIRMWARE_PATH = os.path.join(
os.path.dirname(eiq_neutron_sdk.__file__),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,6 @@

logger.debug("Importing from executorch-integration")
except ImportError:
import executorch.backends.nxp.tests_models.config as test_config # noqa F401
import executorch.backends.nxp.tests.config as test_config # noqa F401

logger.debug("Importing from executorch")
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import pathlib
import shutil

from executorch.backends.nxp.tests_models.outputs_dir_importer import outputs_dir
from executorch.backends.nxp.tests.outputs_dir_importer import outputs_dir


def pytest_addoption(parser):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@
import numpy as np
import torch
from executorch.backends.nxp.backend.ir.converter.conversion import translator
from executorch.backends.nxp.tests_models.calibration_dataset import CalibrationDataset
from executorch.backends.nxp.tests_models.model_input_spec import ModelInputSpec
from executorch.backends.nxp.tests.calibration_dataset import CalibrationDataset
from executorch.backends.nxp.tests.executorch_pipeline import ModelInputSpec
from torch import Tensor


Expand Down
161 changes: 121 additions & 40 deletions backends/nxp/tests/executorch_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,10 @@
import re
from dataclasses import dataclass
from functools import partial
from typing import Callable
from typing import Callable, Iterable

import eiq_neutron_sdk
import numpy as np
import torch

from executorch import exir
Expand All @@ -28,7 +29,6 @@
RemoveIOQuantOpsPass,
)
from executorch.backends.nxp.neutron_partitioner import NeutronPartitioner

from executorch.backends.nxp.nxp_backend import (
core_aten_ops_exception_list,
generate_neutron_compile_spec,
Expand All @@ -42,7 +42,7 @@
ExecutorchProgramManager,
to_edge_transform_and_lower,
)
from torch import nn
from torch import memory_format, nn
from torch.export import export
from torchao.quantization.pt2e.quantizer import Quantizer

Expand All @@ -52,7 +52,9 @@
@dataclass
class ModelInputSpec:
shape: tuple[int, ...]
type: np.dtype = np.float32
dtype: torch.dtype = torch.float32
dim_order: memory_format = torch.contiguous_format


def handle_kernel_selection(model_name: str = ""):
Expand Down Expand Up @@ -81,11 +83,11 @@ def handle_kernel_selection(model_name: str = ""):


def get_random_calibration_inputs(
input_spec: tuple[ModelInputSpec, ...]
input_spec: Iterable[ModelInputSpec], num_samples: int = 4
) -> list[tuple[torch.Tensor, ...]]:
return [
tuple([torch.randn(spec.shape, dtype=spec.dtype) for spec in input_spec])
for _ in range(4)
for _ in range(num_samples)
]


Expand All @@ -94,35 +96,91 @@ def _get_default_quantizer(target_spec: NeutronTargetSpec, use_qat: bool) -> Qua


def to_model_input_spec(
input_spec: tuple[ModelInputSpec, ...] | tuple[int, ...] | list[tuple[int, ...]]
input_spec: Iterable[ModelInputSpec, ...] | tuple[int, ...] | list[tuple[int, ...]]
) -> tuple[ModelInputSpec, ...]:
if isinstance(input_spec, tuple) and all(
isinstance(spec, ModelInputSpec) for spec in input_spec
):
return input_spec

elif isinstance(input_spec, tuple) and all(
isinstance(spec, int) for spec in input_spec
):
return (ModelInputSpec(input_spec),)

elif isinstance(input_spec, list) and all(
isinstance(input_shape, tuple) for input_shape in input_spec
):
return tuple([ModelInputSpec(spec) for spec in input_spec])
else:
raise TypeError(f"Unsupported type {type(input_spec)}")
match input_spec:
case tuple() | list() if all(
isinstance(spec, ModelInputSpec) for spec in input_spec
):
return tuple(input_spec)
case tuple() if all(isinstance(spec, int) for spec in input_spec):
return (ModelInputSpec(input_spec),)
case list() if all(
isinstance(input_shape, tuple) for input_shape in input_spec
):
return tuple(ModelInputSpec(spec) for spec in input_spec)
case _:
raise TypeError(f"Unsupported type {type(input_spec)}")


GetCalibrationInputsFn = Callable[
[tuple[ModelInputSpec, ...]], Iterable[tuple[torch.Tensor, ...]]
]


def get_calibration_inputs_fn_from_dataset_dir(dataset_dir) -> GetCalibrationInputsFn:
def _nested(
input_spec: tuple[ModelInputSpec, ...]
) -> Iterable[tuple[torch.Tensor, ...]]:
data = sorted(os.listdir(dataset_dir))
inputs_needed = len(input_spec)

for path in data:
path = os.path.join(dataset_dir, path)
files = []

if os.path.isdir(path):
files = [os.path.join(path, x) for x in sorted(os.listdir(path))]
else:
files.append(path)

input_data = []
for idx, file in enumerate(files):
if len(input_data) == inputs_needed:
break

tensor = np.fromfile(file, dtype=input_spec[idx].type).reshape(
input_spec[idx].shape
)
input_data += (torch.from_numpy(tensor),)
continue

if len(input_data) < inputs_needed:
continue

yield tuple(input_data)

return _nested


def _get_example_input(
input_spec: tuple[ModelInputSpec, ...]
) -> tuple[torch.Tensor, ...]:
example_input = []
for spec in input_spec:
match spec.dim_order:
case torch.contiguous_format:
sample = torch.ones(spec.shape, dtype=spec.dtype)
case torch.channels_last:
sample = torch.ones(spec.shape, dtype=spec.dtype).to(
memory_format=torch.channels_last
)
case _:
raise ValueError(f"Unsupported dim_order: {spec.dim_order}")
# noinspection PyUnboundLocalVariable
example_input.append(sample)

return tuple(example_input)


def to_quantized_edge_program(
model: torch.nn.Module,
input_spec: tuple[ModelInputSpec, ...] | tuple[int, ...] | list[tuple[int, ...]],
input_spec: Iterable[ModelInputSpec] | tuple[int, ...] | list[tuple[int, ...]],
operators_not_to_delegate: list[str] = None,
get_calibration_inputs_fn: Callable[
[tuple[ModelInputSpec, ...]], list[tuple[torch.Tensor, ...]]
] = get_random_calibration_inputs,
get_calibration_inputs_fn: GetCalibrationInputsFn = get_random_calibration_inputs,
target: str = "imxrt700",
use_qat: bool = False,
train_fn: Callable[[torch.fx.GraphModule], None] | None = None,
remove_quant_io_ops: bool = False,
custom_delegation_options: CustomDelegationOptions = CustomDelegationOptions(), # noqa B008
get_quantizer_fn: Callable[[], Quantizer] | None = None,
Expand All @@ -131,15 +189,16 @@ def to_quantized_edge_program(
fetch_constants_to_sram: bool = False,
dump_kernel_selection_code: bool = False,
use_new_flow_neutron_c: bool = False,
delegate_to_npu=True,
) -> EdgeProgramManager:
_neutron_target_spec = NeutronTargetSpec(target)
if get_quantizer_fn is None:
get_quantizer_fn = partial(
_get_default_quantizer, _neutron_target_spec, use_qat
)

calibration_inputs = get_calibration_inputs_fn(to_model_input_spec(input_spec))
example_input = calibration_inputs[0]
input_spec = to_model_input_spec(input_spec)
calibration_inputs = get_calibration_inputs_fn(input_spec)
example_input = _get_example_input(input_spec)

# Make sure the model is in the evaluation mode.
model.eval()
Expand All @@ -151,6 +210,7 @@ def to_quantized_edge_program(
calibration_inputs=calibration_inputs,
quantizer=get_quantizer_fn(),
is_qat=use_qat,
train_fn=train_fn,
)

# List of operators to not decompose during the lowering.
Expand All @@ -166,15 +226,18 @@ def to_quantized_edge_program(
post_quant_state_dict = (
exir_program_aten__module_quant.state_dict() if use_quant_state_dict else None
)
partitioners = [
NeutronPartitioner(
compile_spec,
_neutron_target_spec,
custom_delegation_options,
post_quant_state_dict,
preserve_ops=preserve_ops,
)
]
if delegate_to_npu:
partitioners = [
NeutronPartitioner(
compile_spec,
_neutron_target_spec,
custom_delegation_options,
post_quant_state_dict,
preserve_ops=preserve_ops,
)
]
else:
partitioners = []

edge_program_manager = to_edge_transform_and_lower(
export(exir_program_aten__module_quant, example_input, strict=True),
Expand Down Expand Up @@ -203,15 +266,33 @@ def to_quantized_edge_program(

def to_quantized_executorch_program(
model: torch.nn.Module,
input_spec: tuple[ModelInputSpec, ...] | tuple[int, ...] | list[tuple[int, ...]],
input_spec: Iterable[ModelInputSpec] | tuple[int, ...] | list[tuple[int, ...]],
use_qat: bool = False,
train_fn: Callable[[torch.fx.GraphModule], None] | None = None,
use_neutron_for_format_conversion: bool = True,
dataset_dir: str | None = None,
delegate_to_npu=True,
use_new_flow_neutron_c: bool = False,
) -> ExecutorchProgramManager:
if dataset_dir:
# Extract calibration data from a directory.
get_calibration_inputs_fn = {
"get_calibration_inputs_fn": get_calibration_inputs_fn_from_dataset_dir(
dataset_dir
)
}
else:
get_calibration_inputs_fn = {} # Use default parameter value.

edge_program_manager = to_quantized_edge_program(
model,
input_spec,
use_qat=use_qat,
train_fn=train_fn,
use_neutron_for_format_conversion=use_neutron_for_format_conversion,
delegate_to_npu=delegate_to_npu,
use_new_flow_neutron_c=use_new_flow_neutron_c,
**get_calibration_inputs_fn,
)

return edge_program_manager.to_executorch(
Expand All @@ -221,7 +302,7 @@ def to_quantized_executorch_program(

def to_edge_program(
model: nn.Module,
input_spec: tuple[ModelInputSpec, ...] | tuple[int, ...] | list[tuple[int, ...]],
input_spec: Iterable[ModelInputSpec] | tuple[int, ...] | list[tuple[int, ...]],
) -> EdgeProgramManager:
calibration_inputs = get_random_calibration_inputs(to_model_input_spec(input_spec))

Expand Down
Empty file.
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def test_aot_example__mobilenet_v2():
"""Test that mobilenet can be lowered to Neutron backend via `aot_neutron_compile.py` and all ops are delegated."""

# Find the executorch root directory (4 levels up from this test file)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

update to 5 levels up

executorch_root = Path(__file__).parent.parent.parent.parent
executorch_root = Path(__file__).parent.parent.parent.parent.parent
assert executorch_root.exists(), f"Executorch root not found at {executorch_root}"

# Run the compilation script as a module (like run_aot_example.sh does)
Expand Down
Loading
Loading