Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion backends/vulkan/op_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -1343,7 +1343,7 @@ def register_grid_priors():
@update_features(exir_ops.edge.aten.repeat.default)
def register_repeat():
return OpFeatures(
inputs_storage=utils.ANY_TEXTURE,
inputs_storage=utils.ANY_STORAGE,
inputs_dtypes=utils.FP_INT_BOOL_T,
)

Expand Down
51 changes: 51 additions & 0 deletions backends/vulkan/runtime/graph/ops/glsl/repeat_buffer.glsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

#version 450 core

${define_required_extensions("buffer", DTYPE)}

#define PRECISION ${PRECISION}

#define T ${buffer_scalar_type(DTYPE)}

${define_active_storage_type("buffer")}

layout(std430) buffer;

#include "indexing.glslh"

${layout_declare_tensor(B, "w", "t_out", DTYPE, "buffer")}
${layout_declare_tensor(B, "r", "t_in", DTYPE, "buffer")}

${layout_declare_ubo(B, "BufferMetadata", "out_meta")}
${layout_declare_ubo(B, "BufferMetadata", "in_meta")}

layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;

void main() {
const uint out_bufi = gl_GlobalInvocationID.x;
if (out_of_bounds(out_bufi, out_meta)) {
return;
}

TensorIndex out_tidx = linear_idx_to_tensor_idx(out_meta, out_bufi);

TensorIndex in_tidx;
initialize(in_tidx);

const int n = int_ndim(out_meta);
for (int d = 0; d < n; d++) {
in_tidx.data[div_4(d)][mod_4(d)] =
idx_at(out_tidx, d) % size_at(in_meta, d);
}

const uint in_bufi = tensor_idx_to_linear_idx(in_meta, in_tidx);

t_out[out_bufi] = t_in[in_bufi];
}
13 changes: 13 additions & 0 deletions backends/vulkan/runtime/graph/ops/glsl/repeat_buffer.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
repeat_buffer:
parameter_names_with_default_values:
DTYPE: float
STORAGE: buffer
generate_variant_forall:
DTYPE:
- VALUE: half
- VALUE: float
- VALUE: int32
- VALUE: int8
- VALUE: uint8
shader_variants:
- NAME: repeat_buffer
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ layout(push_constant) uniform restrict Block {
ivec4 range;
// source tensor sizes in WHCB dims respectively
ivec4 src_dims;
// destination tensor repeats in WHCB dims respectively
ivec4 dst_repeats;
// output tensor sizes in WHCB dims respectively
ivec4 out_dims;
};

#include "indexing_utils.h"
Expand Down Expand Up @@ -58,7 +58,7 @@ void main() {
// if tensors are channel packed
if (packed_dim == C_DIM) {
// the output channels in a batch will be channel size * channel repetitions aligned by 4
const int out_channel_size = alignup4(src_dims.z * dst_repeats.z);
const int out_channel_size = alignup4(out_dims.z);

// batch index in the output
const int out_pos_batch_index = pos.z / out_channel_size;
Expand All @@ -76,7 +76,7 @@ void main() {
channel_index = (pos.z - (batch_index + batch_repetition_index * src_dims.w) * out_channel_size) % src_dims.z;
} else {
// the output channels in a batch will be channel size * channel repetitions
const int out_channel_size = src_dims.z * dst_repeats.z;
const int out_channel_size = out_dims.z;

// source batch index for based on current output pos
batch_index = (pos.z / out_channel_size) % src_dims.w;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
repeat:
repeat_texture:
parameter_names_with_default_values:
DTYPE: float
NDIM: 3
Expand All @@ -11,4 +11,4 @@ repeat:
- VALUE: int8
- VALUE: uint8
shader_variants:
- NAME: repeat
- NAME: repeat_texture3d
141 changes: 52 additions & 89 deletions backends/vulkan/runtime/graph/ops/impl/Repeat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,107 +16,70 @@

namespace vkcompute {

namespace {

void check_args(
ComputeGraph& graph,
const ValueRef in,
const std::vector<int64_t>& repeats,
const ValueRef out) {
VK_CHECK_COND(graph.packed_dim_of(in) == graph.packed_dim_of(out));

VK_CHECK_COND(graph.storage_type_of(in) == graph.storage_type_of(out));
if (graph.storage_type_of(in) == utils::kTexture2D) {
VK_CHECK_COND(graph.dim_of(in) <= 2);
void resize_repeat_node(
ComputeGraph* graph,
const std::vector<ArgGroup>& args,
const std::vector<ValueRef>& extra_args) {
const ValueRef in = args.at(1).refs.at(0);
const ValueRef out = args.at(0).refs.at(0);
const ValueRef repeats_ref = extra_args.at(0);

const std::vector<int64_t> in_sizes = graph->sizes_of(in);
const std::vector<int64_t> repeats =
graph->extract_int_or_symint_list(repeats_ref);

const size_t out_ndim = std::max(in_sizes.size(), repeats.size());
std::vector<int64_t> out_sizes(out_ndim);
for (size_t i = 0; i < out_ndim; i++) {
const size_t in_offset = i + in_sizes.size() - out_ndim;
const size_t rep_offset = i + repeats.size() - out_ndim;
// Prepend 1s to in_sizes if repeats is longer, and vice versa
const int64_t in_size =
(i >= out_ndim - in_sizes.size()) ? in_sizes[in_offset] : 1;
const int64_t r =
(i >= out_ndim - repeats.size()) ? repeats[rep_offset] : 1;
out_sizes[i] = in_size * r;
}

const int64_t in_dim = graph.dim_of(in);
VK_CHECK_COND(
in_dim <= repeats.size(),
"Input tensor dim size must be not greater than the repeat argument's size");

const std::vector<int64_t> in_sizes = graph.sizes_of(in);
const std::vector<int64_t> out_sizes = graph.sizes_of(out);

VK_CHECK_COND(
dim_at<kWidth4D>(in_sizes) * dim_at<kWidth4D>(repeats) ==
dim_at<kWidth4D>(out_sizes),
"Output's width doesn't match input's width * repeat count");

VK_CHECK_COND(
dim_at<kHeight4D>(in_sizes) * dim_at<kHeight4D>(repeats) ==
dim_at<kHeight4D>(out_sizes),
"Output's height doesn't match input's height * repeat count");

VK_CHECK_COND(
dim_at<kChannel4D>(in_sizes) * dim_at<kChannel4D>(repeats) ==
dim_at<kChannel4D>(out_sizes),
"Output's channel doesn't match input's channel * repeat count");

VK_CHECK_COND(
dim_at<kBatch4D>(in_sizes) * dim_at<kBatch4D>(repeats) ==
dim_at<kBatch4D>(out_sizes),
"Output's batch doesn't match input's batch * repeat count");
graph->virtual_resize(out, out_sizes);
}

} // namespace

void add_repeat_node(
ComputeGraph& graph,
ValueRef in,
ValueRef repeats_ref,
ValueRef out) {
const std::vector<int64_t> repeats = *(graph.get_int_list(repeats_ref));

check_args(graph, in, repeats, out);

const std::vector<int64_t> in_sizes = graph.sizes_of(in);
const utils::ivec4 src_dims{
dim_at<kWidth4D>(in_sizes),
dim_at<kHeight4D>(in_sizes),
dim_at<kChannel4D>(in_sizes),
dim_at<kBatch4D>(in_sizes)};
const utils::ivec4 dst_repeats{
dim_at<kWidth4D>(repeats),
dim_at<kHeight4D>(repeats),
dim_at<kChannel4D>(repeats),
dim_at<kBatch4D>(repeats)};

std::string kernel_name = "repeat";
kernel_name.reserve(kShaderNameReserve);
add_storage_type_suffix(kernel_name, graph.storage_type_of(out));
add_dtype_suffix(kernel_name, graph.dtype_of(out));

// A copy of range with the last element set to batch size of the input tensor
const utils::ivec3 wg_size = graph.logical_limits_of(out);

const auto shader = VK_KERNEL_FROM_STR(kernel_name);

graph.execute_nodes().emplace_back(new DynamicDispatchNode(
graph,
VK_KERNEL_FROM_STR(kernel_name),
default_pick_global_wg_size,
default_pick_local_wg_size,
// Inputs and Outputs
{
{out, vkapi::kWrite},
{in, vkapi::kRead},
},
// Parameter buffers
{},
// Push Constants
{
PushConstantDataInfo(&wg_size, sizeof(wg_size), sizeof(utils::ivec4)),
PushConstantDataInfo(
&src_dims, sizeof(src_dims), sizeof(utils::ivec4)),
PushConstantDataInfo(
&dst_repeats, sizeof(dst_repeats), sizeof(utils::ivec4)),
},
// Specialization Constants
{graph.hashed_layout_of(out), graph.hashed_layout_of(in)},
// Resize Args
{},
// Resizing Logic
nullptr));
if (graph.is_buffer_storage(out)) {
graph.execute_nodes().emplace_back(new DynamicDispatchNode(
graph,
VK_KERNEL_FROM_STR(kernel_name),
default_pick_global_wg_size,
default_pick_local_wg_size,
{{out, vkapi::kWrite}, {in, vkapi::kRead}},
{graph.meta_ubo(out), graph.meta_ubo(in)},
{},
{},
{repeats_ref},
resize_repeat_node));
} else {
graph.execute_nodes().emplace_back(new DynamicDispatchNode(
graph,
VK_KERNEL_FROM_STR(kernel_name),
default_pick_global_wg_size,
default_pick_local_wg_size,
{{out, vkapi::kWrite}, {in, vkapi::kRead}},
{},
{graph.logical_limits_pc_of(out),
graph.sizes_pc_of(in),
graph.sizes_pc_of(out)},
{graph.hashed_layout_of(out), graph.hashed_layout_of(in)},
{repeats_ref},
resize_repeat_node));
}
}

void repeat(ComputeGraph& graph, const std::vector<ValueRef>& args) {
Expand Down
4 changes: 2 additions & 2 deletions backends/vulkan/test/op_tests/cases.py
Original file line number Diff line number Diff line change
Expand Up @@ -1308,7 +1308,7 @@ def get_repeat_inputs():
"utils::kHeightPacked",
"utils::kChannelsPacked",
]
test_suite_2d.storage_types = ["utils::kTexture3D"]
test_suite_2d.storage_types = ["utils::kTexture3D", "utils::kBuffer"]
test_suite_2d.data_gen = "make_seq_tensor"
test_suite_2d.dtypes = ["at::kFloat"]
test_suite_2d.test_name_suffix = "2d"
Expand Down Expand Up @@ -1353,7 +1353,7 @@ def get_repeat_inputs():
"utils::kHeightPacked",
"utils::kChannelsPacked",
]
test_suite_3d.storage_types = ["utils::kTexture3D"]
test_suite_3d.storage_types = ["utils::kTexture3D", "utils::kBuffer"]
test_suite_3d.data_gen = "make_seq_tensor"
test_suite_3d.dtypes = ["at::kFloat"]
test_suite_3d.test_name_suffix = "3d"
Expand Down
Loading