Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ This file contains the changelog for the Deeploy project. The changelog is divid
- Deeploy-GAP9 Platform [#143](https://github.com/pulp-platform/Deeploy/pull/143)
- Update CLI interface Across Project, Fix Tutorial, and Remove Legacy Test [#157](https://github.com/pulp-platform/Deeploy/pull/157)
- Fix for python error when using python 3.12.11 [#189]( https://github.com/pulp-platform/Deeploy/pull/189)
- Add support for Operators for Generic target needed in MAGIA [#193]( https://github.com/pulp-platform/Deeploy/pull/193)

### Added
- Add many missing docstrings
Expand All @@ -26,6 +27,7 @@ This file contains the changelog for the Deeploy project. The changelog is divid
- Add integer MaxPool1D for Generic platform and RQSConv1D support for PULPOpen, with corresponding kernel tests.
- Added GAP9 Platform Support: Deployer, Bindings, Templates, Tiler, DMA (L3Dma/MchanDma), target library, CI workflows
- Per-layer microbenchmarking on PULPOpen via `--profileMicrobenchmark`: new `PULPMicrobenchmark` code-transformation pass + `perf_utils.h` helpers report cycles, instructions, stalls and cache misses per layer in `RunNetwork`
- Add support for the Generic target for the following operators [Ceil](https://onnx.ai/onnx/operators/onnx__Ceil.html), [Floor](https://onnx.ai/onnx/operators/onnx__Floor.html), [Clip](https://onnx.ai/onnx/operators/onnx__Clip.html), [Sub](https://onnx.ai/onnx/operators/onnx__Sub.html), [Exp](https://onnx.ai/onnx/operators/onnx__Exp.html), [Sigmoid](https://onnx.ai/onnx/operators/onnx__Sigmoid.html), [Swish](https://onnx.ai/onnx/operators/onnx__Swish.html), [HardSigmoid](https://onnx.ai/onnx/operators/onnx__HardSigmoid.html), [HardSwish](https://onnx.ai/onnx/operators/onnx__HardSwish.html), [InstanceNormalization](https://onnx.ai/onnx/operators/onnx__InstanceNormalization.html), [GroupNormalization](https://onnx.ai/onnx/operators/onnx__GroupNormalization.html), [AveragePool](https://onnx.ai/onnx/operators/onnx__AveragePool.html), [GlobalAveragePool](https://onnx.ai/onnx/operators/onnx__GlobalAveragePool.html), [GlobalMaxPool](https://onnx.ai/onnx/operators/onnx__GlobalMaxPool.html).

### Changed
- Use by default `devel` container for GAP9 CI
Expand Down
107 changes: 100 additions & 7 deletions Deeploy/Targets/Generic/Bindings.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,16 @@
from Deeploy.FutureExtension.CodeTransformationPasses.FutureCodeTransformation import FutureGeneration
from Deeploy.Targets.Generic.Templates import AddTemplate, BatchNormalizationTemplate, ConcatTemplate, ConvTemplate, \
ConvTransposeTemplate, DebugPrintTemplate, DequantTemplate, DummyTemplate, DWConvTemplate, FloatAddTemplate, \
FloatConvTemplate, FloatDivTemplate, FloatDWConvTemplate, FloatGELUTemplate, FloatGemmTemplate, \
FloatLayernormTemplate, FloatMatMulTemplate, FloatMaxPoolTemplate, FloatMulTemplate, FloatPadTemplate, \
FloatPowTemplate, FloatReduceMeanTemplate, FloatReluTemplate, FloatSoftmaxTemplate, FloatSqrtTemplate, \
GatherTemplate, GemmTemplate, IntegerDivTemplate, ITAMaxTemplate, ITAPartialMaxTemplate, MatMulTemplate, \
MaxPoolTemplate, MulTemplate, PadTemplate, QuantTemplate, ReduceMeanTemplate, ReduceSumTemplate, \
RequantShiftTemplate, ReshapeTemplate, RQIntegerDivTemplate, RQSiGELUTemplate, SliceTemplate, TransposeTemplate, \
iGELUTemplate, iLayernormTemplate, iRMSNormTemplate, iSoftmaxTemplate
FloatAveragePoolTemplate, FloatCeilTemplate, FloatClipTemplate, FloatConvTemplate, FloatDivTemplate, \
FloatDWConvTemplate, FloatExpTemplate, FloatFloorTemplate, FloatGELUTemplate, FloatGemmTemplate, \
FloatGlobalAveragePoolTemplate, FloatGlobalMaxPoolTemplate, FloatGroupNormTemplate, FloatHardSigmoidTemplate, \
FloatHardSwishTemplate, FloatInstanceNormTemplate, FloatLayernormTemplate, FloatMatMulTemplate, \
FloatMaxPoolTemplate, FloatMulTemplate, FloatPadTemplate, FloatPowTemplate, FloatReduceMeanTemplate, \
FloatReluTemplate, FloatSigmoidTemplate, FloatSoftmaxTemplate, FloatSqrtTemplate, FloatSubTemplate, \
FloatSwishTemplate, GatherTemplate, GemmTemplate, IntegerDivTemplate, ITAMaxTemplate, ITAPartialMaxTemplate, \
MatMulTemplate, MaxPoolTemplate, MulTemplate, PadTemplate, QuantTemplate, ReduceMeanTemplate, ReduceSumTemplate, \
RequantShiftTemplate, ReshapeTemplate, RQIntegerDivTemplate, RQSiGELUTemplate, SliceTemplate, SubTemplate, \
TransposeTemplate, iGELUTemplate, iLayernormTemplate, iRMSNormTemplate, iSoftmaxTemplate
from Deeploy.Targets.Generic.TypeCheckers import AddChecker, BatchNormChecker, ConcatChecker, ConvChecker, \
DebugPrintChecker, DequantChecker, DivChecker, DummyChecker, GatherChecker, GELUChecker, GEMMChecker, \
LayerNormChecker, MatMulChecker, MaxPoolChecker, MulChecker, PadChecker, QuantChecker, ReduceMeanChecker, \
Expand Down Expand Up @@ -54,6 +57,17 @@
FloatAddTemplate.referenceTemplate, BasicTransformer)
]

# using AddChecker since they are exactly the same
BasicSubBindings = [
NodeBinding(AddChecker([PointerClass(type1), PointerClass(type2)], [PointerClass(int32_t)]),
SubTemplate.referenceTemplate, BasicTransformer)
for type1 in IntegerDataTypes
for type2 in IntegerDataTypes
] + [
NodeBinding(AddChecker([PointerClass(float32_t), PointerClass(float32_t)], [PointerClass(float32_t)]),
FloatSubTemplate.referenceTemplate, BasicTransformer)
]

BasicConv1DBindings = [
NodeBinding(ConvChecker(
[PointerClass(type), PointerClass(type), PointerClass(type)], [PointerClass(type)]),
Expand Down Expand Up @@ -327,3 +341,82 @@
ConvTransposeTemplate.referenceTemplate,
BasicTransformer) for type in FloatDataTypes
]

BasicCeilBindings = [
NodeBinding(DummyChecker([PointerClass(float32_t)], [PointerClass(float32_t)]), FloatCeilTemplate.referenceTemplate,
BasicTransformer),
]

BasicFloorBindings = [
NodeBinding(DummyChecker([PointerClass(float32_t)], [PointerClass(float32_t)]),
FloatFloorTemplate.referenceTemplate, BasicTransformer),
]

BasicClipBindings = [
NodeBinding(
DummyChecker(
[PointerClass(float32_t), PointerClass(float32_t),
PointerClass(float32_t)], [PointerClass(float32_t)]), FloatClipTemplate.referenceTemplate,
BasicTransformer),
]

BasicExpBindings = [
NodeBinding(DummyChecker([PointerClass(float32_t)], [PointerClass(float32_t)]), FloatExpTemplate.referenceTemplate,
BasicTransformer),
]

BasicSigmoidBindings = [
NodeBinding(DummyChecker([PointerClass(float32_t)], [PointerClass(float32_t)]),
FloatSigmoidTemplate.referenceTemplate, BasicTransformer),
]

BasicSwishBindings = [
NodeBinding(DummyChecker([PointerClass(float32_t)], [PointerClass(float32_t)]),
FloatSwishTemplate.referenceTemplate, BasicTransformer),
]

BasicHardSigmoidBindings = [
NodeBinding(DummyChecker([PointerClass(float32_t)], [PointerClass(float32_t)]),
FloatHardSigmoidTemplate.referenceTemplate, BasicTransformer),
]

BasicHardSwishBindings = [
NodeBinding(DummyChecker([PointerClass(float32_t)], [PointerClass(float32_t)]),
FloatHardSwishTemplate.referenceTemplate, BasicTransformer),
]

BasicInstanceNormBindings = [
NodeBinding(
DummyChecker(
[PointerClass(float32_t), PointerClass(float32_t),
PointerClass(float32_t)], [PointerClass(float32_t)]), FloatInstanceNormTemplate.referenceTemplate,
BasicTransformer),
]

BasicGroupNormBindings = [
NodeBinding(
DummyChecker(
[PointerClass(float32_t), PointerClass(float32_t),
PointerClass(float32_t)], [PointerClass(float32_t)]), FloatGroupNormTemplate.referenceTemplate,
BasicTransformer),
]

BasicAveragePool1DBindings = [
NodeBinding(DummyChecker([PointerClass(float32_t)], [PointerClass(float32_t)]),
FloatAveragePoolTemplate.referenceTemplate1d, BasicTransformer)
]

BasicAveragePool2DBindings = [
NodeBinding(DummyChecker([PointerClass(float32_t)], [PointerClass(float32_t)]),
FloatAveragePoolTemplate.referenceTemplate2d, BasicTransformer)
]

BasicGlobalAveragePoolBindings = [
NodeBinding(DummyChecker([PointerClass(float32_t)], [PointerClass(float32_t)]),
FloatGlobalAveragePoolTemplate.referenceTemplate, BasicTransformer)
]

BasicGlobalMaxPoolBindings = [
NodeBinding(DummyChecker([PointerClass(float32_t)], [PointerClass(float32_t)]),
FloatGlobalMaxPoolTemplate.referenceTemplate, BasicTransformer)
]
123 changes: 103 additions & 20 deletions Deeploy/Targets/Generic/Layers.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,12 @@
from Deeploy.DeeployTypes import NodeMapper, ONNXLayer, OperatorRepresentation, Shape


class SingleOperationPerElementLayer(ONNXLayer):

def computeOps(self):
return self.mapper.parser.operatorRepresentation['size']


class ConcatLayer(ONNXLayer):

def __init__(self, maps: List[NodeMapper]):
Expand Down Expand Up @@ -168,10 +174,7 @@ def computeOps(self):
return self.mapper.parser.operatorRepresentation['size'] * 3 # One add, one mul, one div


class AddLayer(ONNXLayer):

def __init__(self, maps: List[NodeMapper]):
super().__init__(maps)
class AddLayer(SingleOperationPerElementLayer):

def computeShapes(self, inputShapes: Shape, outputShapes: Shape, operatorRepresentation,
channels_first) -> Tuple[Shape, Shape]:
Expand All @@ -184,8 +187,8 @@ def computeShapes(self, inputShapes: Shape, outputShapes: Shape, operatorReprese
outputShapes = [inputShapes[0]]
return (inputShapes, outputShapes)

def computeOps(self):
return self.mapper.parser.operatorRepresentation['size']

SubLayer = AddLayer


class MatMulLayer(ONNXLayer):
Expand Down Expand Up @@ -329,10 +332,7 @@ def computeOps(self):
return gemm + rqs


class MulLayer(ONNXLayer):

def __init__(self, maps: List[NodeMapper]):
super().__init__(maps)
class MulLayer(SingleOperationPerElementLayer):

def computeShapes(self, inputShapes: Shape, outputShapes: Shape, operatorRepresentation,
channels_first) -> Tuple[Shape, Shape]:
Expand All @@ -346,9 +346,6 @@ def computeShapes(self, inputShapes: Shape, outputShapes: Shape, operatorReprese
inputShapes[0] = inputShapes[1]
return (inputShapes, outputShapes)

def computeOps(self):
return self.mapper.parser.operatorRepresentation['size']


class ConvLayer(ONNXLayer):

Expand Down Expand Up @@ -438,13 +435,8 @@ def computeShapes(self, inputShapes: Shape, outputShapes: Shape, operatorReprese
return (inputShapes, outputShapes)


class ReluLayer(ONNXLayer):

def __init__(self, maps: List[NodeMapper]):
super().__init__(maps)

def computeOps(self):
return self.mapper.parser.operatorRepresentation['size']
class ReluLayer(SingleOperationPerElementLayer):
pass


class LayerNormLayer(ONNXLayer):
Expand Down Expand Up @@ -709,3 +701,94 @@ def computeOps(self):
numPx = opRep['dim_im_out_x']

return numPx * opsPerPx


class CeilLayer(SingleOperationPerElementLayer):
pass


class FloorLayer(SingleOperationPerElementLayer):
pass


class ClipLayer(ONNXLayer):

def computeOps(self):
# compare vs min and max
return self.mapper.parser.operatorRepresentation['size'] * 2


class ExpLayer(SingleOperationPerElementLayer):
pass


class SigmoidLayer(ONNXLayer):

def computeOps(self):
# sigmoid(x) = 1 / (1 + exp(-x)): neg, exp, add, div
return self.mapper.parser.operatorRepresentation['size'] * 4


class SwishLayer(ONNXLayer):

def computeOps(self):
# x * sigmoid(x): 4 ops for sigmoid + 1 mul
return self.mapper.parser.operatorRepresentation['size'] * 5


class HardSigmoidLayer(ONNXLayer):

def computeOps(self):
# max(0, min(1, alpha*x + beta)): mul, add, clip(min), clip(max)
return self.mapper.parser.operatorRepresentation['size'] * 4


class HardSwishLayer(ONNXLayer):

def computeOps(self):
# x * HardSigmoid(x): 4 ops for hard sigmoid + 1 mul
return self.mapper.parser.operatorRepresentation['size'] * 5


class InstanceNormLayer(ONNXLayer):

def computeOps(self):
# per element: mean-sum(1) + variance(sub+sq+add=3) + normalize(sub+div=2) + affine(mul+add=2) = 8
# per (batch, channel): mean(div=1) + variance(sqrt+div=2) = 3
opRep = self.mapper.parser.operatorRepresentation
B, C, S = int(opRep['batch_size']), int(opRep['num_channels']), int(opRep['spatial'])
return B * C * (S * 8 + 3)


class GroupNormLayer(ONNXLayer):

def computeOps(self):
# same structure as InstanceNorm: 8 ops/element + 3 ops per (batch, channel)
opRep = self.mapper.parser.operatorRepresentation
B, C, S = int(opRep['batch_size']), int(opRep['num_channels']), int(opRep['spatial'])
return B * C * (S * 8 + 3)


class AveragePoolLayer(ONNXLayer):

def computeOps(self):
opRep = self.mapper.parser.operatorRepresentation
kernel_elements = int(np.prod(opRep['kernel_shape']))
# (kernel_elements - 1) additions + 1 division per output element
return opRep['data_out_size'] * kernel_elements


class GlobalAveragePoolLayer(ONNXLayer):

def computeOps(self):
opRep = self.mapper.parser.operatorRepresentation
# (spatial_size - 1) additions + 1 division per output channel
return int(opRep['batch_size'] * opRep['num_channels'] * opRep['spatial_size'])


class GlobalMaxPoolLayer(ONNXLayer):

def computeOps(self):
opRep = self.mapper.parser.operatorRepresentation
# (spatial_size - 1) comparisons per output channel
return int(opRep['batch_size'] * opRep['num_channels'] * (opRep['spatial_size'] - 1))
Loading
Loading