diff --git a/backends/xnnpack/runtime/graph/graph.cpp b/backends/xnnpack/runtime/graph/graph.cpp new file mode 100644 index 00000000000..6bd93276deb --- /dev/null +++ b/backends/xnnpack/runtime/graph/graph.cpp @@ -0,0 +1,169 @@ +#include + +#include +#include +#include + +#include + +namespace executorch::backends::xnnpack::graph { + +namespace { + +void scan_spec(const TensorSpec& spec, uint32_t& max_id) { + for (auto& dim : spec.sizes) { + for (auto& term : dim.coeffs) { + if (term.sym >= max_id) { + max_id = term.sym + 1; + } + } + } +} + +void scan_output_spec(const OutputSpec& os, uint32_t& max_id) { + std::visit( + overloaded{ + [&](const TensorSpec& s) { scan_spec(s, max_id); }, + [&](const std::vector& v) { + for (auto& s : v) + scan_spec(s, max_id); + }, + }, + os); +} + +} // namespace + +uint32_t Graph::symint_count() const { + uint32_t count = 0; + for (auto& spec : input_specs) { + scan_spec(spec, count); + } + for (auto& node : nodes) { + std::visit( + overloaded{ + [](const InputNode&) {}, + [](const ConstantNode&) {}, + [&](const CallOperatorNode& n) { + scan_output_spec(n.output_specs, count); + }, + [&](const CallSubgraphNode& n) { + scan_output_spec(n.output_specs, count); + }, + }, + node.value); + } + return count; +} + +void Graph::update_users() { + for (auto& node : nodes) { + node.users.clear(); + } + + for (NodeHandle i = 0; i < nodes.size(); ++i) { + std::visit( + overloaded{ + [](const InputNode&) {}, + [](const ConstantNode&) {}, + [&](const CallOperatorNode& n) { + for (auto arg : n.args) { + if (!arg.is_null()) { + nodes[arg.node].users.push_back(i); + } + } + }, + [&](const CallSubgraphNode& n) { + for (auto arg : n.args) { + if (!arg.is_null()) { + nodes[arg.node].users.push_back(i); + } + } + }, + }, + nodes[i].value); + } +} + +runtime::Error Graph::compact_nodes() { + std::vector remap(nodes.size(), UINT32_MAX); + uint32_t new_idx = 0; + for (NodeHandle i = 0; i < nodes.size(); i++) { + if ((nodes[i].flags & NodeFlags::Dead) != NodeFlags::None) { + continue; + } + remap[i] = new_idx++; + } + + // Validate that no live node or output references a dead/invalid node before + // mutating any handles, so a failure leaves the graph untouched. + bool valid = true; + auto check_vh = [&](const ValueHandle& vh) { + if (vh.is_null()) { + return; + } + if (vh.node >= remap.size() || remap[vh.node] == UINT32_MAX) { + valid = false; + } + }; + for (NodeHandle i = 0; i < nodes.size(); i++) { + if ((nodes[i].flags & NodeFlags::Dead) != NodeFlags::None) { + continue; + } + for (const auto& a : nodes[i].get_args()) { + check_vh(a); + } + } + for (const auto& out : outputs) { + check_vh(out); + } + ET_CHECK_OR_RETURN_ERROR( + valid, + Internal, + "compact_nodes: a live node or output references a dead node"); + + auto rewrite_vh = [&](ValueHandle& vh) { + if (!vh.is_null()) { + vh.node = remap[vh.node]; + } + }; + + for (NodeHandle i = 0; i < nodes.size(); i++) { + if ((nodes[i].flags & NodeFlags::Dead) != NodeFlags::None) { + continue; + } + std::visit( + overloaded{ + [](InputNode&) {}, + [](ConstantNode&) {}, + [&](CallOperatorNode& n) { + for (auto& a : n.args) + rewrite_vh(a); + }, + [&](CallSubgraphNode& n) { + for (auto& a : n.args) + rewrite_vh(a); + }, + }, + nodes[i].value); + } + + for (auto& out : outputs) { + rewrite_vh(out); + } + + std::vector compacted; + compacted.reserve(new_idx); + for (NodeHandle i = 0; i < nodes.size(); i++) { + if ((nodes[i].flags & NodeFlags::Dead) != NodeFlags::None) { + continue; + } + compacted.push_back(std::move(nodes[i])); + } + nodes = std::move(compacted); + + update_users(); + return runtime::Error::Ok; +} + +} // namespace executorch::backends::xnnpack::graph diff --git a/backends/xnnpack/runtime/graph/graph.h b/backends/xnnpack/runtime/graph/graph.h new file mode 100644 index 00000000000..b006d7221dc --- /dev/null +++ b/backends/xnnpack/runtime/graph/graph.h @@ -0,0 +1,71 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace executorch::backends::xnnpack::graph { + +/* + * Describes a computational graph. + */ +struct Graph { + std::vector input_specs; + std::vector nodes; + std::vector outputs; + + /* Clean up nodes marked as dead. */ + [[nodiscard]] runtime::Error compact_nodes(); + + /* Returns the number of symints referenced in the graph. */ + uint32_t symint_count() const; + + /* Regenerate user metadata on nodes. */ + void update_users(); + + /* Retrieve the output specifier for a given node handle. */ + inline OutputSpec get_output_spec_for_node(NodeHandle node) const { + return std::visit( + overloaded{ + [&](const InputNode& n) -> OutputSpec { + return input_specs.at(n.input); + }, + [](const ConstantNode& n) -> OutputSpec { + TensorSpec spec; + spec.dtype = n.tensor->dtype; + spec.sizes.reserve(n.tensor->sizes.size()); + for (auto s : n.tensor->sizes) { + spec.sizes.push_back( + DimSizeSpec::constant(static_cast(s))); + } + spec.quant_params = n.quant_params; + return spec; + }, + [](const CallOperatorNode& n) -> OutputSpec { + return n.output_specs; + }, + [](const CallSubgraphNode& n) -> OutputSpec { + return n.output_specs; + }, + }, + nodes[node].value); + } + + /* Retrieve the tensor spec for a given value handle. */ + inline TensorSpec get_tensor_spec(ValueHandle vh) const { + auto spec = get_output_spec_for_node(vh.node); + return std::visit( + overloaded{ + [](const TensorSpec& s) -> TensorSpec { return s; }, + [&](const std::vector& v) -> TensorSpec { + return v.at(vh.output); + }, + }, + spec); + } +}; + +} // namespace executorch::backends::xnnpack::graph diff --git a/backends/xnnpack/runtime/graph/graph_builder.cpp b/backends/xnnpack/runtime/graph/graph_builder.cpp new file mode 100644 index 00000000000..548e8f674e6 --- /dev/null +++ b/backends/xnnpack/runtime/graph/graph_builder.cpp @@ -0,0 +1,102 @@ +#include + +#include + +namespace executorch::backends::xnnpack::graph { + +Graph GraphBuilder::build() { + Graph g; + g.input_specs = std::move(input_specs_); + g.nodes = std::move(nodes_); + g.outputs = std::move(outputs_); + return g; +} + +ValueHandle GraphBuilder::createInput(TensorSpec spec) { + input_specs_.push_back(std::move(spec)); + + InputHandle input = next_input_; + next_input_++; + + ValueHandle handle{static_cast(nodes_.size())}; + Node node; + node.value = InputNode{input}; + nodes_.push_back(std::move(node)); + return handle; +} + +ValueHandle GraphBuilder::createConstant( + std::shared_ptr tensor, + std::optional quant_params) { + ValueHandle handle{static_cast(nodes_.size())}; + ConstantNode cn; + cn.tensor = std::move(tensor); + cn.quant_params = std::move(quant_params); + Node node; + node.value = std::move(cn); + nodes_.push_back(std::move(node)); + return handle; +} + +ValueHandle GraphBuilder::createOperator( + Operator op, + TensorSpec output_spec, + ValueHandles args) { + ValueHandle handle{static_cast(nodes_.size())}; + CallOperatorNode con; + con.args = std::move(args); + con.op = op; + con.output_specs = std::move(output_spec); + Node node; + node.value = std::move(con); + nodes_.push_back(std::move(node)); + return handle; +} + +ValueHandle GraphBuilder::createOperator( + Operator op, + TensorSpec output_spec, + ValueHandles args, + std::vector constant_args, + float output_min, + float output_max) { + ValueHandle handle{static_cast(nodes_.size())}; + CallOperatorNode con; + con.args = std::move(args); + con.op = op; + con.output_specs = std::move(output_spec); + con.constant_args = std::move(constant_args); + con.output_min = output_min; + con.output_max = output_max; + Node node; + node.value = std::move(con); + nodes_.push_back(std::move(node)); + return handle; +} + +ValueHandle GraphBuilder::createOperatorM( + Operator op, + std::vector output_specs, + ValueHandles args) { + ValueHandle handle{static_cast(nodes_.size())}; + CallOperatorNode con; + con.args = std::move(args); + con.op = op; + con.output_specs = std::move(output_specs); + Node node; + node.value = std::move(con); + nodes_.push_back(std::move(node)); + return handle; +} + +OutputHandle GraphBuilder::createOutput(ValueHandle handle) { + OutputHandle output = static_cast(outputs_.size()); + outputs_.push_back(handle); + return output; +} + +SymIntHandle GraphBuilder::createSymInt() { + return next_sym_int_++; +} + +} // namespace executorch::backends::xnnpack::graph diff --git a/backends/xnnpack/runtime/graph/graph_builder.h b/backends/xnnpack/runtime/graph/graph_builder.h new file mode 100644 index 00000000000..d54d195b99e --- /dev/null +++ b/backends/xnnpack/runtime/graph/graph_builder.h @@ -0,0 +1,77 @@ +#pragma once + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +namespace executorch::backends::xnnpack::graph { + +/* + * A builder class for graph::Graph. + */ +class GraphBuilder { + public: + Graph build(); + + /* Add an input node to the graph. */ + ValueHandle createInput(TensorSpec spec); + + /* Add a constant node to the graph. */ + ValueHandle createConstant( + std::shared_ptr tensor, + std::optional quant_params = std::nullopt); + + /* Add an operator node to the graph. */ + ValueHandle + createOperator(Operator op, TensorSpec output_spec, ValueHandles args); + + /* Add an operator node to the graph. */ + ValueHandle createOperator( + Operator op, + TensorSpec output_spec, + ValueHandles args, + std::vector constant_args, + float output_min = -INFINITY, + float output_max = INFINITY); + + /* Add a multi-output operator node to the graph. */ + ValueHandle createOperatorM( + Operator op, + std::vector output_specs, + ValueHandles args); + + /* Add an output node to the graph. */ + OutputHandle createOutput(ValueHandle handle); + + /* Allocate a fresh symbolic int and return its handle. */ + SymIntHandle createSymInt(); + + template + ValueHandle createOperator(Operator op, TensorSpec output_spec, Ts... ts) { + return createOperator( + op, output_spec, ValueHandles{std::forward(ts)...}); + } + template + ValueHandle + createOperatorM(Operator op, std::vector output_specs, Ts... ts) { + return createOperatorM( + op, output_specs, ValueHandles{std::forward(ts)...}); + } + + private: + std::vector input_specs_; + std::vector nodes_; + std::vector outputs_; + uint32_t next_input_ = 0; + uint32_t next_sym_int_ = 0; +}; + +} // namespace executorch::backends::xnnpack::graph diff --git a/backends/xnnpack/runtime/graph/handles.h b/backends/xnnpack/runtime/graph/handles.h new file mode 100644 index 00000000000..d2e58dfd2df --- /dev/null +++ b/backends/xnnpack/runtime/graph/handles.h @@ -0,0 +1,31 @@ +#pragma once + +#include +#include + +namespace executorch::backends::xnnpack::graph { + +using InputHandle = uint32_t; +using NodeHandle = uint32_t; +using OutputHandle = uint32_t; +using SymIntHandle = uint32_t; + +struct ValueHandle { + uint32_t node; + uint32_t output = 0; + + bool operator==(const ValueHandle& o) const { + return node == o.node && output == o.output; + } + + static constexpr ValueHandle null() { + return {UINT32_MAX, UINT32_MAX}; + } + bool is_null() const { + return node == UINT32_MAX && output == UINT32_MAX; + } +}; + +using ValueHandles = std::vector; + +} // namespace executorch::backends::xnnpack::graph diff --git a/backends/xnnpack/runtime/graph/node.h b/backends/xnnpack/runtime/graph/node.h new file mode 100644 index 00000000000..26da431843d --- /dev/null +++ b/backends/xnnpack/runtime/graph/node.h @@ -0,0 +1,182 @@ +#pragma once + +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include + +namespace executorch::backends::xnnpack::graph { + +enum class NodeFlags : uint8_t { + None = 0, + Dead = (1 << 0), // This node is marked for deletion. + UseXnnpack = (1 << 1), // Sub-delegate this node to XNNPACK. + PassInternal1 = (1 << 2), // Pass-internal usage. +}; + +inline NodeFlags operator|(NodeFlags a, NodeFlags b) { + return static_cast( + static_cast(a) | static_cast(b)); +} + +inline NodeFlags operator&(NodeFlags a, NodeFlags b) { + return static_cast( + static_cast(a) & static_cast(b)); +} + +inline NodeFlags operator~(NodeFlags a) { + return static_cast(~static_cast(a)); +} + +inline NodeFlags& operator|=(NodeFlags& a, NodeFlags b) { + a = a | b; + return a; +} + +inline NodeFlags& operator&=(NodeFlags& a, NodeFlags b) { + a = a & b; + return a; +} + +using ConstantArg = std::variant>; +using OutputSpec = std::variant>; + +struct Graph; + +/* + * A node that represents a call into an inner graph. This is primarily + * used for delegation of subgraphs to run with XNNPACK. + */ +struct CallSubgraphNode { + std::vector args; + OutputSpec output_specs; + std::unique_ptr subgraph; +}; + +/* + * A node that represents a top-level graph input. + */ +struct InputNode { + InputHandle input; + + bool operator==(const InputNode& o) const { + return input == o.input; + } +}; + +/* + * A node that represents a constant value. + */ +struct ConstantNode { + std::shared_ptr tensor; + std::optional quant_params; + + bool operator==(const ConstantNode& o) const { + return tensor == o.tensor && quant_params == o.quant_params; + } +}; + +/* + * A node that represents the result of a operator invocation. + */ +struct CallOperatorNode { + std::vector args; + Operator op; + OutputSpec output_specs; + std::vector constant_args; + + // Fused output activation bounds (e.g. a ReLU/HardTanh folded into this op). + // Defaults are a no-op clamp. + float output_min = -INFINITY; + float output_max = INFINITY; + + bool operator==(const CallOperatorNode& o) const { + return args == o.args && op == o.op && output_specs == o.output_specs && + constant_args == o.constant_args && output_min == o.output_min && + output_max == o.output_max; + } +}; + +using NodeVariant = + std::variant; + +/* + * A + */ +struct Node { + NodeVariant value; + + /* A list of nodes that directly consume the output of this node. */ + std::vector users; + + /* + * An opaque field used internally by various passes. This field is not + * guaranteed to be preserved across passes and should only be used as + * internal, temporary state for graph transformations. + */ + uint32_t tag = 0; + + NodeFlags flags = NodeFlags::None; + + /* Returns a list of value handles this node takes as inputs. */ + const std::vector& get_args() const { + return std::visit( + overloaded{ + [](const InputNode&) -> const std::vector& { + static const std::vector empty; + return empty; + }, + [](const ConstantNode&) -> const std::vector& { + static const std::vector empty; + return empty; + }, + [](const CallOperatorNode& n) -> const std::vector& { + return n.args; + }, + [](const CallSubgraphNode& n) -> const std::vector& { + return n.args; + }, + }, + value); + } + + /* Returns the number of output values produced by this node. */ + uint32_t output_count() const { + return std::visit( + overloaded{ + [](const InputNode&) -> uint32_t { return 1; }, + [](const ConstantNode&) -> uint32_t { return 1; }, + [](const CallOperatorNode& n) -> uint32_t { + return std::visit( + overloaded{ + [](const TensorSpec&) -> uint32_t { return 1; }, + [](const std::vector& v) -> uint32_t { + return static_cast(v.size()); + }, + }, + n.output_specs); + }, + [](const CallSubgraphNode& n) -> uint32_t { + return std::visit( + overloaded{ + [](const TensorSpec&) -> uint32_t { return 1; }, + [](const std::vector& v) -> uint32_t { + return static_cast(v.size()); + }, + }, + n.output_specs); + }, + }, + value); + } +}; + +} // namespace executorch::backends::xnnpack::graph diff --git a/backends/xnnpack/runtime/graph/operator.h b/backends/xnnpack/runtime/graph/operator.h new file mode 100644 index 00000000000..75b7722f07b --- /dev/null +++ b/backends/xnnpack/runtime/graph/operator.h @@ -0,0 +1,88 @@ +#pragma once + +namespace executorch::backends::xnnpack::graph { + +/* + * Operators known to the CPU backend. + */ +enum class Operator { + // Binary elementwise + Add = 0, + Subtract = 1, + Multiply = 2, + Divide = 3, + Maximum = 4, + Minimum = 5, + CopySign = 6, + SquaredDifference = 7, + PReLU = 8, + Modulus = 9, + Atan2 = 10, + Pow = 11, + + // Unary elementwise + Abs = 12, + Negate = 13, + Clamp = 14, + Ceiling = 15, + Floor = 16, + Round = 17, + Square = 18, + SquareRoot = 19, + ReciprocalSquareRoot = 20, + Exp = 21, + Log = 22, + Sigmoid = 23, + Tanh = 24, + ELU = 25, + GELU = 26, + HardSwish = 27, + LeakyReLU = 28, + Sine = 29, + Cosine = 30, + Sign = 31, + ReLU = 32, + + // Linear + Linear = 33, + BatchMatrixMultiply = 34, + + // Convolution + Conv2d = 35, + ConvTranspose2d = 36, + + // Pooling + AvgPool2d = 37, + AdaptiveAvgPool2d = 38, + MaxPool2d = 39, + + // Reduction + Softmax = 40, + Mean = 41, + Sum = 42, + + // Shape / memory + Reshape = 43, + View = 44, + Transpose = 45, + Permute = 46, + Slice = 47, + Cat = 48, + Chunk = 49, + Unsqueeze = 50, + Expand = 51, + Clone = 52, + Pad = 53, + + // Quantization + Quantize = 54, + Dequantize = 55, + + // Norms + LayerNorm = 56, + + DepthwiseConv2d = 57, + StaticResizeBilinear2D = 58, +}; + +} // namespace executorch::backends::xnnpack::graph diff --git a/backends/xnnpack/runtime/graph/tensor_spec.h b/backends/xnnpack/runtime/graph/tensor_spec.h new file mode 100644 index 00000000000..ab375fba9fd --- /dev/null +++ b/backends/xnnpack/runtime/graph/tensor_spec.h @@ -0,0 +1,71 @@ +#pragma once + +#include +#include +#include + +#include +#include +#include + +namespace executorch::backends::xnnpack::graph { + +/* + * Specifies the size of a single tensor dimension as a fixed constant + * plus a linear combination of zero or more symints. + */ +struct DimSizeSpec { + struct Term { + SymIntHandle sym; + int64_t coefficient; + + bool operator==(const Term& o) const { + return sym == o.sym && coefficient == o.coefficient; + } + }; + + std::vector coeffs; + int64_t offset = 0; + + /* + * Construct a DimSizeSpec for a constant size. + */ + static DimSizeSpec constant(int64_t value) { + return {{}, value}; + } + + /* + * Construct a DimSizeSpec for a symint. + */ + static DimSizeSpec sym(SymIntHandle s) { + return {{{s, 1}}, 0}; + } + + /* + * Returns true if the dim size does not depend on any + * symint values. + */ + bool is_constant() const { + return coeffs.empty(); + } + + bool operator==(const DimSizeSpec& o) const { + return coeffs == o.coeffs && offset == o.offset; + } +}; + +/* + * Describes the shape, dtype, and quantization of a tensor. + */ +struct TensorSpec { + core::DType dtype; + std::vector sizes; + std::optional quant_params; + + bool operator==(const TensorSpec& o) const { + return dtype == o.dtype && sizes == o.sizes && + quant_params == o.quant_params; + } +}; + +} // namespace executorch::backends::xnnpack::graph diff --git a/backends/xnnpack/test/CMakeLists.txt b/backends/xnnpack/test/CMakeLists.txt index 667cd2580b6..d8d57e81f9d 100644 --- a/backends/xnnpack/test/CMakeLists.txt +++ b/backends/xnnpack/test/CMakeLists.txt @@ -42,7 +42,9 @@ target_include_directories( ) # Graph runtime unit tests. -set(_graph_runtime_test_srcs runtime/test_quant_params.cpp) +set(_graph_runtime_test_srcs runtime/test_quant_params.cpp + runtime/test_graph_builder.cpp +) et_cxx_test( backends_xnnpack_graph_runtime_test diff --git a/backends/xnnpack/test/runtime/test_graph_builder.cpp b/backends/xnnpack/test/runtime/test_graph_builder.cpp new file mode 100644 index 00000000000..c1be8983b1f --- /dev/null +++ b/backends/xnnpack/test/runtime/test_graph_builder.cpp @@ -0,0 +1,42 @@ +#include + +#include + +using namespace executorch::backends::xnnpack::core; +using namespace executorch::backends::xnnpack::graph; + +TEST(TestGraphBuilder, add) { + // A simple tests which constructs a graph with one input tensor, + // one add operator, and one output tensor. + auto builder = GraphBuilder(); + + auto tensor_spec = TensorSpec{ + .dtype = DType::Float32, + .sizes = {DimSizeSpec::constant(1), DimSizeSpec::constant(10)}}; + + auto input_a = builder.createInput(tensor_spec); + auto input_b = builder.createInput(tensor_spec); + auto add = + builder.createOperator(Operator::Add, tensor_spec, input_a, input_b); + auto output = builder.createOutput(add); + + auto graph = builder.build(); + + // Verify the graph. + EXPECT_EQ(graph.input_specs.size(), 2); + EXPECT_EQ(graph.input_specs[0], tensor_spec); + EXPECT_EQ(graph.input_specs[1], tensor_spec); + + EXPECT_EQ(graph.nodes.size(), 3); + EXPECT_EQ(std::get(graph.nodes[0].value), InputNode{0}); + EXPECT_EQ(std::get(graph.nodes[1].value), InputNode{1}); + EXPECT_EQ( + std::get(graph.nodes[2].value), + (CallOperatorNode{ + .args = {ValueHandle{0}, ValueHandle{1}}, + .op = Operator::Add, + .output_specs = tensor_spec})); + + EXPECT_EQ(graph.outputs.size(), 1); + EXPECT_EQ(graph.outputs[0], (ValueHandle{2})); +} diff --git a/shim_et/xplat/executorch/build/build_variables.bzl b/shim_et/xplat/executorch/build/build_variables.bzl index 4a8cc479075..6ff7af998ba 100644 --- a/shim_et/xplat/executorch/build/build_variables.bzl +++ b/shim_et/xplat/executorch/build/build_variables.bzl @@ -482,6 +482,8 @@ XNNPACK_BACKEND_BUCK_SRCS = [ "runtime/profiling/XNNProfiler.cpp", "runtime/core/tensor.cpp", "runtime/core/quant_params.cpp", + "runtime/graph/graph.cpp", + "runtime/graph/graph_builder.cpp", ] XNNPACK_BACKEND_SRCS = ["backends/xnnpack/" + x for x in XNNPACK_BACKEND_BUCK_SRCS]