diff --git a/be/benchmark/benchmark_column_array_view.hpp b/be/benchmark/benchmark_column_array_view.hpp new file mode 100644 index 00000000000000..09baf2bd435030 --- /dev/null +++ b/be/benchmark/benchmark_column_array_view.hpp @@ -0,0 +1,418 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// ============================================================ +// Benchmark: ColumnArrayView vs hand-written array column access +// +// ColumnArrayView (see column_array_view.h) provides a unified interface +// to read array column elements regardless of whether the underlying +// column is Plain, ColumnConst, or ColumnNullable. +// +// This benchmark measures whether ColumnArrayView introduces measurable +// overhead compared to hand-written (direct) array column access code. +// +// Test scenarios: +// 1. Int64 array: sum all elements across all rows +// 2. String array: sum lengths of all elements across all rows +// 3. Const array: same as above but with ColumnConst wrapper +// 4. Nullable array: with outer nullable wrapper +// ============================================================ + +#include + +#include +#include + +#include "core/assert_cast.h" +#include "core/column/column_array.h" +#include "core/column/column_array_view.h" +#include "core/column/column_const.h" +#include "core/column/column_nullable.h" +#include "core/column/column_string.h" +#include "core/column/column_vector.h" +#include "core/data_type/primitive_type.h" + +namespace doris { + +static constexpr size_t ARR_NUM_ROWS = 4096; +static constexpr size_t ARR_ELEM_PER_ROW = 8; + +// ============================================================ +// Array column factory helpers +// ============================================================ + +// Build Array with ARR_NUM_ROWS rows, each having ARR_ELEM_PER_ROW elements. +static ColumnPtr make_int64_array_column() { + auto data_col = ColumnInt64::create(); + auto null_col = ColumnUInt8::create(); + auto offsets = ColumnArray::ColumnOffsets::create(); + + data_col->reserve(ARR_NUM_ROWS * ARR_ELEM_PER_ROW); + null_col->reserve(ARR_NUM_ROWS * ARR_ELEM_PER_ROW); + + size_t offset = 0; + for (size_t i = 0; i < ARR_NUM_ROWS; ++i) { + for (size_t j = 0; j < ARR_ELEM_PER_ROW; ++j) { + data_col->insert_value(static_cast(i * ARR_ELEM_PER_ROW + j + 1)); + null_col->insert_value(0); + } + offset += ARR_ELEM_PER_ROW; + offsets->insert_value(offset); + } + + auto nullable_data = ColumnNullable::create(std::move(data_col), std::move(null_col)); + return ColumnArray::create(std::move(nullable_data), std::move(offsets)); +} + +// Build Array with some null elements (every 5th element is null). +static ColumnPtr make_int64_array_column_with_nulls() { + auto data_col = ColumnInt64::create(); + auto null_col = ColumnUInt8::create(); + auto offsets = ColumnArray::ColumnOffsets::create(); + + data_col->reserve(ARR_NUM_ROWS * ARR_ELEM_PER_ROW); + null_col->reserve(ARR_NUM_ROWS * ARR_ELEM_PER_ROW); + + size_t offset = 0; + size_t flat_idx = 0; + for (size_t i = 0; i < ARR_NUM_ROWS; ++i) { + for (size_t j = 0; j < ARR_ELEM_PER_ROW; ++j) { + data_col->insert_value(static_cast(flat_idx + 1)); + null_col->insert_value(flat_idx % 5 == 0 ? 1 : 0); + flat_idx++; + } + offset += ARR_ELEM_PER_ROW; + offsets->insert_value(offset); + } + + auto nullable_data = ColumnNullable::create(std::move(data_col), std::move(null_col)); + return ColumnArray::create(std::move(nullable_data), std::move(offsets)); +} + +// Build Array with ARR_NUM_ROWS rows. +static ColumnPtr make_string_array_column() { + auto data_col = ColumnString::create(); + auto null_col = ColumnUInt8::create(); + auto offsets = ColumnArray::ColumnOffsets::create(); + + size_t offset = 0; + for (size_t i = 0; i < ARR_NUM_ROWS; ++i) { + for (size_t j = 0; j < ARR_ELEM_PER_ROW; ++j) { + std::string val = "str_" + std::to_string(i * ARR_ELEM_PER_ROW + j); + data_col->insert_data(val.data(), val.size()); + null_col->insert_value(0); + } + offset += ARR_ELEM_PER_ROW; + offsets->insert_value(offset); + } + + auto nullable_data = ColumnNullable::create(std::move(data_col), std::move(null_col)); + return ColumnArray::create(std::move(nullable_data), std::move(offsets)); +} + +// Wrap with outer Nullable (no rows are actually null, just the wrapper overhead). +static ColumnPtr wrap_nullable(const ColumnPtr& col) { + return ColumnNullable::create(col->assume_mutable(), + ColumnUInt8::create(col->size(), 0)); +} + +// Wrap as Const. +static ColumnPtr wrap_const(const ColumnPtr& col) { + // Take the first row of the array column, make a 1-row column, then const-expand. + auto single = col->clone_empty(); + single->insert_from(*col, 0); + return ColumnConst::create(std::move(single), ARR_NUM_ROWS); +} + +// ============================================================ +// Hand-written accessor for Array +// ============================================================ + +struct HandwrittenArrayAccessor { + const ColumnArray::Offsets64& offsets; + const ColumnInt64::Container& data; + const NullMap& nested_null_map; + + explicit HandwrittenArrayAccessor(const ColumnPtr& col) + : offsets(assert_cast(*col).get_offsets()), + data(assert_cast( + assert_cast( + assert_cast(*col).get_data()) + .get_nested_column()) + .get_data()), + nested_null_map(assert_cast( + assert_cast(*col).get_data()) + .get_null_map_data()) {} + + size_t row_begin(size_t row) const { return offsets[row - 1]; } + size_t row_end(size_t row) const { return offsets[row]; } + int64_t value_at(size_t flat_idx) const { return data[flat_idx]; } + bool is_null_at(size_t flat_idx) const { return nested_null_map[flat_idx]; } +}; + +// ============================================================ +// 1. Int64 Plain Array: sum all elements +// ============================================================ + +static void Handwritten_ArrayInt64_Plain(benchmark::State& state) { + const auto col = make_int64_array_column(); + HandwrittenArrayAccessor acc(col); + for (auto _ : state) { + int64_t sum = 0; + for (size_t i = 0; i < ARR_NUM_ROWS; ++i) { + size_t begin = acc.row_begin(i); + size_t end = acc.row_end(i); + for (size_t j = begin; j < end; ++j) { + sum += acc.value_at(j); + } + } + benchmark::DoNotOptimize(sum); + } +} +BENCHMARK(Handwritten_ArrayInt64_Plain)->Unit(benchmark::kNanosecond); + +static void ArrayView_ArrayInt64_Plain(benchmark::State& state) { + const auto col = make_int64_array_column(); + const auto view = ColumnArrayView::create(col); + for (auto _ : state) { + int64_t sum = 0; + for (size_t i = 0; i < ARR_NUM_ROWS; ++i) { + auto arr = view[i]; + for (size_t j = 0; j < arr.size(); ++j) { + sum += arr.value_at(j); + } + } + benchmark::DoNotOptimize(sum); + } +} +BENCHMARK(ArrayView_ArrayInt64_Plain)->Unit(benchmark::kNanosecond); + +// ============================================================ +// 2. Int64 Array with null elements: sum non-null elements +// ============================================================ + +static void Handwritten_ArrayInt64_WithNulls(benchmark::State& state) { + const auto col = make_int64_array_column_with_nulls(); + HandwrittenArrayAccessor acc(col); + for (auto _ : state) { + int64_t sum = 0; + for (size_t i = 0; i < ARR_NUM_ROWS; ++i) { + size_t begin = acc.row_begin(i); + size_t end = acc.row_end(i); + for (size_t j = begin; j < end; ++j) { + if (!acc.is_null_at(j)) { + sum += acc.value_at(j); + } + } + } + benchmark::DoNotOptimize(sum); + } +} +BENCHMARK(Handwritten_ArrayInt64_WithNulls)->Unit(benchmark::kNanosecond); + +static void ArrayView_ArrayInt64_WithNulls(benchmark::State& state) { + const auto col = make_int64_array_column_with_nulls(); + const auto view = ColumnArrayView::create(col); + for (auto _ : state) { + int64_t sum = 0; + for (size_t i = 0; i < ARR_NUM_ROWS; ++i) { + auto arr = view[i]; + for (size_t j = 0; j < arr.size(); ++j) { + if (!arr.is_null_at(j)) { + sum += arr.value_at(j); + } + } + } + benchmark::DoNotOptimize(sum); + } +} +BENCHMARK(ArrayView_ArrayInt64_WithNulls)->Unit(benchmark::kNanosecond); + +static void ArrayView_ArrayInt64_WithNulls_Flat(benchmark::State& state) { + const auto col = make_int64_array_column_with_nulls(); + const auto view = ColumnArrayView::create(col); + const auto* data = view.get_data(); + const auto* null_map = view.get_null_map_data(); + for (auto _ : state) { + int64_t sum = 0; + for (size_t i = 0; i < ARR_NUM_ROWS; ++i) { + size_t begin = view.row_begin(i); + size_t end = view.row_end(i); + for (size_t j = begin; j < end; ++j) { + if (!null_map[j]) { + sum += data[j]; + } + } + } + benchmark::DoNotOptimize(sum); + } +} +BENCHMARK(ArrayView_ArrayInt64_WithNulls_Flat)->Unit(benchmark::kNanosecond); + +// ============================================================ +// 3. String Array: sum string lengths +// ============================================================ + +struct HandwrittenStringArrayAccessor { + const ColumnArray::Offsets64& offsets; + const ColumnString& str_col; + const NullMap& nested_null_map; + + explicit HandwrittenStringArrayAccessor(const ColumnPtr& col) + : offsets(assert_cast(*col).get_offsets()), + str_col(assert_cast( + assert_cast( + assert_cast(*col).get_data()) + .get_nested_column())), + nested_null_map(assert_cast( + assert_cast(*col).get_data()) + .get_null_map_data()) {} + + size_t row_begin(size_t row) const { return offsets[row - 1]; } + size_t row_end(size_t row) const { return offsets[row]; } + StringRef value_at(size_t flat_idx) const { return str_col.get_data_at(flat_idx); } + bool is_null_at(size_t flat_idx) const { return nested_null_map[flat_idx]; } +}; + +static void Handwritten_ArrayString_Plain(benchmark::State& state) { + const auto col = make_string_array_column(); + HandwrittenStringArrayAccessor acc(col); + for (auto _ : state) { + int64_t sum = 0; + for (size_t i = 0; i < ARR_NUM_ROWS; ++i) { + size_t begin = acc.row_begin(i); + size_t end = acc.row_end(i); + for (size_t j = begin; j < end; ++j) { + sum += acc.value_at(j).size; + } + } + benchmark::DoNotOptimize(sum); + } +} +BENCHMARK(Handwritten_ArrayString_Plain)->Unit(benchmark::kNanosecond); + +static void ArrayView_ArrayString_Plain(benchmark::State& state) { + const auto col = make_string_array_column(); + const auto view = ColumnArrayView::create(col); + for (auto _ : state) { + int64_t sum = 0; + for (size_t i = 0; i < ARR_NUM_ROWS; ++i) { + auto arr = view[i]; + for (size_t j = 0; j < arr.size(); ++j) { + sum += arr.value_at(j).size; + } + } + benchmark::DoNotOptimize(sum); + } +} +BENCHMARK(ArrayView_ArrayString_Plain)->Unit(benchmark::kNanosecond); + +// ============================================================ +// 4. Const Array: Const(Array) +// ============================================================ + +static void Handwritten_ArrayInt64_Const(benchmark::State& state) { + const auto base = make_int64_array_column(); + const auto const_col = wrap_const(base); + // Hand-written: unpack const, then access the single row repeatedly + const auto& inner = assert_cast(*const_col).get_data_column(); + const auto& array_col = assert_cast(inner); + const auto& arr_offsets = array_col.get_offsets(); + const auto& nested_nullable = assert_cast(array_col.get_data()); + const auto& int_data = assert_cast(nested_nullable.get_nested_column()).get_data(); + + size_t begin = arr_offsets[-1]; // sentinel = 0 + size_t end = arr_offsets[0]; + + for (auto _ : state) { + int64_t sum = 0; + for (size_t i = 0; i < ARR_NUM_ROWS; ++i) { + for (size_t j = begin; j < end; ++j) { + sum += int_data[j]; + } + } + benchmark::DoNotOptimize(sum); + } +} +BENCHMARK(Handwritten_ArrayInt64_Const)->Unit(benchmark::kNanosecond); + +static void ArrayView_ArrayInt64_Const(benchmark::State& state) { + const auto base = make_int64_array_column(); + const auto const_col = wrap_const(base); + const auto view = ColumnArrayView::create(const_col); + for (auto _ : state) { + int64_t sum = 0; + for (size_t i = 0; i < ARR_NUM_ROWS; ++i) { + auto arr = view[i]; + for (size_t j = 0; j < arr.size(); ++j) { + sum += arr.value_at(j); + } + } + benchmark::DoNotOptimize(sum); + } +} +BENCHMARK(ArrayView_ArrayInt64_Const)->Unit(benchmark::kNanosecond); + +// ============================================================ +// 5. Nullable Array: Nullable(Array) +// ============================================================ + +static void Handwritten_ArrayInt64_Nullable(benchmark::State& state) { + const auto base = make_int64_array_column(); + const auto nullable_col = wrap_nullable(base); + // Hand-written: unpack nullable + const auto& nullable = assert_cast(*nullable_col); + const auto& outer_null_map = nullable.get_null_map_data(); + const auto& array_col = assert_cast(nullable.get_nested_column()); + const auto& arr_offsets = array_col.get_offsets(); + const auto& nested_nullable = assert_cast(array_col.get_data()); + const auto& int_data = assert_cast(nested_nullable.get_nested_column()).get_data(); + + for (auto _ : state) { + int64_t sum = 0; + for (size_t i = 0; i < ARR_NUM_ROWS; ++i) { + if (outer_null_map[i]) continue; + size_t begin = arr_offsets[i - 1]; + size_t end = arr_offsets[i]; + for (size_t j = begin; j < end; ++j) { + sum += int_data[j]; + } + } + benchmark::DoNotOptimize(sum); + } +} +BENCHMARK(Handwritten_ArrayInt64_Nullable)->Unit(benchmark::kNanosecond); + +static void ArrayView_ArrayInt64_Nullable(benchmark::State& state) { + const auto base = make_int64_array_column(); + const auto nullable_col = wrap_nullable(base); + const auto view = ColumnArrayView::create(nullable_col); + for (auto _ : state) { + int64_t sum = 0; + for (size_t i = 0; i < ARR_NUM_ROWS; ++i) { + if (view.is_null_at(i)) continue; + auto arr = view[i]; + for (size_t j = 0; j < arr.size(); ++j) { + sum += arr.value_at(j); + } + } + benchmark::DoNotOptimize(sum); + } +} +BENCHMARK(ArrayView_ArrayInt64_Nullable)->Unit(benchmark::kNanosecond); + +} // namespace doris diff --git a/be/benchmark/benchmark_column_array_view_distance.hpp b/be/benchmark/benchmark_column_array_view_distance.hpp new file mode 100644 index 00000000000000..34fd287f2030ff --- /dev/null +++ b/be/benchmark/benchmark_column_array_view_distance.hpp @@ -0,0 +1,353 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// ============================================================ +// Benchmark: ColumnArrayView vs hand-written for array distance +// +// Simulates the FunctionArrayDistance pattern: +// - Build Array columns +// - Extract raw float* pointers + dimensions per row +// - Call faiss L2 distance on each row pair +// +// Compares: +// 1. Hand-written: manual Const/Nullable unwrapping + offsets +// 2. ColumnArrayView: original row-view access via ArrayDataView::get_data() +// 3. ColumnArrayView flat access: prefetch flat data pointer + row offsets +// ============================================================ + +#include + +#include +#include +#include + +#include "core/assert_cast.h" +#include "core/column/column_array.h" +#include "core/column/column_array_view.h" +#include "core/column/column_const.h" +#include "core/column/column_nullable.h" +#include "core/column/column_vector.h" +#include "core/data_type/primitive_type.h" + +namespace doris { + +// Inline L2 distance to avoid faiss build dependency in benchmark. +// Both paths call the same function, so the measurement is purely +// about pointer-extraction overhead, not about the distance kernel. +static inline float inline_l2_distance(const float* x, const float* y, size_t d) { + float sum = 0.0f; + for (size_t i = 0; i < d; ++i) { + float diff = x[i] - y[i]; + sum += diff * diff; + } + return std::sqrt(sum); +} + +static constexpr size_t DIST_NUM_ROWS = 4096; +static constexpr size_t DIST_DIM = 128; // typical embedding dimension + +// ============================================================ +// Column factory: Array with fixed dimension +// ============================================================ + +static ColumnPtr make_float_array_column_for_dist(size_t num_rows, size_t dim) { + auto data_col = ColumnFloat32::create(); + auto null_col = ColumnUInt8::create(); + auto offsets = ColumnArray::ColumnOffsets::create(); + + data_col->reserve(num_rows * dim); + null_col->reserve(num_rows * dim); + + std::mt19937 rng(42); + std::uniform_real_distribution dist(-1.0f, 1.0f); + + size_t offset = 0; + for (size_t i = 0; i < num_rows; ++i) { + for (size_t j = 0; j < dim; ++j) { + data_col->insert_value(dist(rng)); + null_col->insert_value(0); + } + offset += dim; + offsets->insert_value(offset); + } + + auto nullable_data = ColumnNullable::create(std::move(data_col), std::move(null_col)); + return ColumnArray::create(std::move(nullable_data), std::move(offsets)); +} + +static ColumnPtr make_const_float_array_for_dist(size_t dim) { + auto single = make_float_array_column_for_dist(1, dim); + return ColumnConst::create(std::move(single), DIST_NUM_ROWS); +} + +// ============================================================ +// 1. Both columns non-const: L2 distance per row +// ============================================================ + +static void Handwritten_Distance_Plain_Plain(benchmark::State& state) { + const auto col1 = make_float_array_column_for_dist(DIST_NUM_ROWS, DIST_DIM); + const auto col2 = make_float_array_column_for_dist(DIST_NUM_ROWS, DIST_DIM); + + // Hand-written extraction (mirrors FunctionArrayDistance::execute_impl) + const auto& arr1 = assert_cast(*col1); + const auto& arr2 = assert_cast(*col2); + const auto& nested1 = assert_cast(arr1.get_data()); + const auto& nested2 = assert_cast(arr2.get_data()); + const auto& float1 = assert_cast(nested1.get_nested_column()); + const auto& float2 = assert_cast(nested2.get_nested_column()); + const auto* fdata1 = float1.get_data().data(); + const auto* fdata2 = float2.get_data().data(); + const auto& offsets1 = arr1.get_offsets(); + const auto& offsets2 = arr2.get_offsets(); + + auto dst = ColumnFloat32::create(DIST_NUM_ROWS); + auto& dst_data = dst->get_data(); + + for (auto _ : state) { + for (size_t row = 0; row < DIST_NUM_ROWS; ++row) { + auto prev1 = offsets1[row - 1]; + auto prev2 = offsets2[row - 1]; + auto size1 = offsets1[row] - prev1; + dst_data[row] = inline_l2_distance(fdata1 + prev1, fdata2 + prev2, size1); + } + benchmark::ClobberMemory(); + } +} +BENCHMARK(Handwritten_Distance_Plain_Plain)->Unit(benchmark::kNanosecond); + +static void ArrayView_Distance_Plain_Plain(benchmark::State& state) { + const auto col1 = make_float_array_column_for_dist(DIST_NUM_ROWS, DIST_DIM); + const auto col2 = make_float_array_column_for_dist(DIST_NUM_ROWS, DIST_DIM); + + const auto view1 = ColumnArrayView::create(col1); + const auto view2 = ColumnArrayView::create(col2); + + auto dst = ColumnFloat32::create(DIST_NUM_ROWS); + auto& dst_data = dst->get_data(); + + for (auto _ : state) { + for (size_t row = 0; row < DIST_NUM_ROWS; ++row) { + auto a1 = view1[row]; + auto a2 = view2[row]; + const float* p1 = a1.get_data(); + const float* p2 = a2.get_data(); + dst_data[row] = inline_l2_distance(p1, p2, a1.size()); + } + benchmark::ClobberMemory(); + } +} +BENCHMARK(ArrayView_Distance_Plain_Plain)->Unit(benchmark::kNanosecond); + +static void ArrayView_Distance_Plain_Plain_Flat(benchmark::State& state) { + const auto col1 = make_float_array_column_for_dist(DIST_NUM_ROWS, DIST_DIM); + const auto col2 = make_float_array_column_for_dist(DIST_NUM_ROWS, DIST_DIM); + + const auto view1 = ColumnArrayView::create(col1); + const auto view2 = ColumnArrayView::create(col2); + const auto* data1 = view1.get_data(); + const auto* data2 = view2.get_data(); + + auto dst = ColumnFloat32::create(DIST_NUM_ROWS); + auto& dst_data = dst->get_data(); + + for (auto _ : state) { + for (size_t row = 0; row < DIST_NUM_ROWS; ++row) { + size_t begin1 = view1.row_begin(row); + size_t begin2 = view2.row_begin(row); + size_t dim1 = view1.row_end(row) - begin1; + dst_data[row] = inline_l2_distance(data1 + begin1, data2 + begin2, dim1); + } + benchmark::ClobberMemory(); + } +} +BENCHMARK(ArrayView_Distance_Plain_Plain_Flat)->Unit(benchmark::kNanosecond); + +// ============================================================ +// 2. One column const (query vs many vectors) +// ============================================================ + +static void Handwritten_Distance_Const_Plain(benchmark::State& state) { + const auto const_col = make_const_float_array_for_dist(DIST_DIM); + const auto col2 = make_float_array_column_for_dist(DIST_NUM_ROWS, DIST_DIM); + + // Extract const array once + const auto& const_inner = assert_cast(*const_col).get_data_column(); + const auto& const_arr = assert_cast(const_inner); + const auto& const_nested = assert_cast(const_arr.get_data()); + const auto& const_float = assert_cast(const_nested.get_nested_column()); + const float* const_data = const_float.get_data().data(); + size_t const_dim = const_float.size(); + + // Extract non-const array + const auto& arr2 = assert_cast(*col2); + const auto& nested2 = assert_cast(arr2.get_data()); + const auto& float2 = assert_cast(nested2.get_nested_column()); + const auto* fdata2 = float2.get_data().data(); + const auto& offsets2 = arr2.get_offsets(); + + auto dst = ColumnFloat32::create(DIST_NUM_ROWS); + auto& dst_data = dst->get_data(); + + for (auto _ : state) { + for (size_t row = 0; row < DIST_NUM_ROWS; ++row) { + auto prev2 = offsets2[row - 1]; + dst_data[row] = inline_l2_distance(const_data, fdata2 + prev2, const_dim); + } + benchmark::ClobberMemory(); + } +} +BENCHMARK(Handwritten_Distance_Const_Plain)->Unit(benchmark::kNanosecond); + +static void ArrayView_Distance_Const_Plain(benchmark::State& state) { + const auto const_col = make_const_float_array_for_dist(DIST_DIM); + const auto col2 = make_float_array_column_for_dist(DIST_NUM_ROWS, DIST_DIM); + + const auto view1 = ColumnArrayView::create(const_col); + const auto view2 = ColumnArrayView::create(col2); + + auto dst = ColumnFloat32::create(DIST_NUM_ROWS); + auto& dst_data = dst->get_data(); + + for (auto _ : state) { + for (size_t row = 0; row < DIST_NUM_ROWS; ++row) { + auto a1 = view1[row]; + auto a2 = view2[row]; + const float* p1 = a1.get_data(); + const float* p2 = a2.get_data(); + dst_data[row] = inline_l2_distance(p1, p2, a1.size()); + } + benchmark::ClobberMemory(); + } +} +BENCHMARK(ArrayView_Distance_Const_Plain)->Unit(benchmark::kNanosecond); + +static void ArrayView_Distance_Const_Plain_Flat(benchmark::State& state) { + const auto const_col = make_const_float_array_for_dist(DIST_DIM); + const auto col2 = make_float_array_column_for_dist(DIST_NUM_ROWS, DIST_DIM); + + const auto view1 = ColumnArrayView::create(const_col); + const auto view2 = ColumnArrayView::create(col2); + const auto* data1 = view1.get_data(); + const auto* data2 = view2.get_data(); + + auto dst = ColumnFloat32::create(DIST_NUM_ROWS); + auto& dst_data = dst->get_data(); + + for (auto _ : state) { + for (size_t row = 0; row < DIST_NUM_ROWS; ++row) { + size_t begin1 = view1.row_begin(row); + size_t begin2 = view2.row_begin(row); + size_t dim1 = view1.row_end(row) - begin1; + dst_data[row] = inline_l2_distance(data1 + begin1, data2 + begin2, dim1); + } + benchmark::ClobberMemory(); + } +} +BENCHMARK(ArrayView_Distance_Const_Plain_Flat)->Unit(benchmark::kNanosecond); + +// ============================================================ +// 3. Nullable(Array) vs plain Array +// ============================================================ + +static ColumnPtr wrap_nullable_for_dist(const ColumnPtr& col) { + return ColumnNullable::create(col->assume_mutable(), ColumnUInt8::create(col->size(), 0)); +} + +static void Handwritten_Distance_Nullable_Plain(benchmark::State& state) { + const auto base1 = make_float_array_column_for_dist(DIST_NUM_ROWS, DIST_DIM); + const auto nullable_col1 = wrap_nullable_for_dist(base1); + const auto col2 = make_float_array_column_for_dist(DIST_NUM_ROWS, DIST_DIM); + + // Unwrap nullable + const auto& nullable1 = assert_cast(*nullable_col1); + const auto& arr1 = assert_cast(nullable1.get_nested_column()); + const auto& nested1 = assert_cast(arr1.get_data()); + const auto& float1 = assert_cast(nested1.get_nested_column()); + const auto* fdata1 = float1.get_data().data(); + const auto& offsets1 = arr1.get_offsets(); + + const auto& arr2 = assert_cast(*col2); + const auto& nested2 = assert_cast(arr2.get_data()); + const auto& float2 = assert_cast(nested2.get_nested_column()); + const auto* fdata2 = float2.get_data().data(); + const auto& offsets2 = arr2.get_offsets(); + + auto dst = ColumnFloat32::create(DIST_NUM_ROWS); + auto& dst_data = dst->get_data(); + + for (auto _ : state) { + for (size_t row = 0; row < DIST_NUM_ROWS; ++row) { + auto prev1 = offsets1[row - 1]; + auto prev2 = offsets2[row - 1]; + auto size1 = offsets1[row] - prev1; + dst_data[row] = inline_l2_distance(fdata1 + prev1, fdata2 + prev2, size1); + } + benchmark::ClobberMemory(); + } +} +BENCHMARK(Handwritten_Distance_Nullable_Plain)->Unit(benchmark::kNanosecond); + +static void ArrayView_Distance_Nullable_Plain(benchmark::State& state) { + const auto base1 = make_float_array_column_for_dist(DIST_NUM_ROWS, DIST_DIM); + const auto nullable_col1 = wrap_nullable_for_dist(base1); + const auto col2 = make_float_array_column_for_dist(DIST_NUM_ROWS, DIST_DIM); + + const auto view1 = ColumnArrayView::create(nullable_col1); + const auto view2 = ColumnArrayView::create(col2); + + auto dst = ColumnFloat32::create(DIST_NUM_ROWS); + auto& dst_data = dst->get_data(); + + for (auto _ : state) { + for (size_t row = 0; row < DIST_NUM_ROWS; ++row) { + auto a1 = view1[row]; + auto a2 = view2[row]; + const float* p1 = a1.get_data(); + const float* p2 = a2.get_data(); + dst_data[row] = inline_l2_distance(p1, p2, a1.size()); + } + benchmark::ClobberMemory(); + } +} +BENCHMARK(ArrayView_Distance_Nullable_Plain)->Unit(benchmark::kNanosecond); + +static void ArrayView_Distance_Nullable_Plain_Flat(benchmark::State& state) { + const auto base1 = make_float_array_column_for_dist(DIST_NUM_ROWS, DIST_DIM); + const auto nullable_col1 = wrap_nullable_for_dist(base1); + const auto col2 = make_float_array_column_for_dist(DIST_NUM_ROWS, DIST_DIM); + + const auto view1 = ColumnArrayView::create(nullable_col1); + const auto view2 = ColumnArrayView::create(col2); + const auto* data1 = view1.get_data(); + const auto* data2 = view2.get_data(); + + auto dst = ColumnFloat32::create(DIST_NUM_ROWS); + auto& dst_data = dst->get_data(); + + for (auto _ : state) { + for (size_t row = 0; row < DIST_NUM_ROWS; ++row) { + size_t begin1 = view1.row_begin(row); + size_t begin2 = view2.row_begin(row); + size_t dim1 = view1.row_end(row) - begin1; + dst_data[row] = inline_l2_distance(data1 + begin1, data2 + begin2, dim1); + } + benchmark::ClobberMemory(); + } +} +BENCHMARK(ArrayView_Distance_Nullable_Plain_Flat)->Unit(benchmark::kNanosecond); + +} // namespace doris diff --git a/be/benchmark/benchmark_main.cpp b/be/benchmark/benchmark_main.cpp index 905331fa422659..caf5459c46af51 100644 --- a/be/benchmark/benchmark_main.cpp +++ b/be/benchmark/benchmark_main.cpp @@ -18,6 +18,8 @@ #include #include "benchmark_bit_pack.hpp" +#include "benchmark_column_array_view.hpp" +#include "benchmark_column_array_view_distance.hpp" #include "benchmark_fastunion.hpp" #include "benchmark_fmod.hpp" #include "benchmark_hll_merge.hpp" diff --git a/be/benchmark/binary_cast_benchmark.hpp b/be/benchmark/binary_cast_benchmark.hpp index cc5874a82ca44c..9949a783b05a1d 100644 --- a/be/benchmark/binary_cast_benchmark.hpp +++ b/be/benchmark/binary_cast_benchmark.hpp @@ -53,51 +53,10 @@ To old_binary_cast(From from) { from_decv2_to_i128 || from_decv2_to_i256 || from_ui32_to_date_v2 || from_date_v2_to_ui32 || from_ui64_to_datetime_v2 || from_datetime_v2_to_ui64); - if constexpr (from_u64_to_db) { - TypeConverter conv; - conv.u64 = from; - return conv.dbl; - } else if constexpr (from_i64_to_db) { - TypeConverter conv; - conv.i64 = from; - return conv.dbl; - } else if constexpr (from_db_to_i64) { - TypeConverter conv; - conv.dbl = from; - return conv.i64; - } else if constexpr (from_db_to_u64) { - TypeConverter conv; - conv.dbl = from; - return conv.u64; - } else if constexpr (from_i64_to_vec_dt) { - VecDateTimeInt64Union conv = {.i64 = from}; - return conv.dt; - } else if constexpr (from_ui32_to_date_v2) { - DateV2UInt32Union conv = {.ui32 = from}; - return conv.dt; - } else if constexpr (from_date_v2_to_ui32) { - DateV2UInt32Union conv = {.dt = from}; - return conv.ui32; - } else if constexpr (from_ui64_to_datetime_v2) { - DateTimeV2UInt64Union conv = {.ui64 = from}; - return conv.dt; - } else if constexpr (from_datetime_v2_to_ui64) { - DateTimeV2UInt64Union conv = {.dt = from}; - return conv.ui64; - } else if constexpr (from_vec_dt_to_i64) { - VecDateTimeInt64Union conv = {.dt = from}; - return conv.i64; - } else if constexpr (from_i128_to_decv2) { - DecimalInt128Union conv; - conv.i128 = from; - return conv.decimal; - } else if constexpr (from_decv2_to_i128) { - DecimalInt128Union conv; - conv.decimal = from; - return conv.i128; - } else { - throw Exception(Status::FatalError("__builtin_unreachable")); - } + static_assert(sizeof(From) == sizeof(To)); + To to; + std::memcpy(&to, &from, sizeof(To)); + return to; } // Generate random datetime values in uint64_t format for testing diff --git a/be/src/core/binary_cast.hpp b/be/src/core/binary_cast.hpp index 7da0844a3cd312..3c8d9a50d9193b 100644 --- a/be/src/core/binary_cast.hpp +++ b/be/src/core/binary_cast.hpp @@ -35,7 +35,7 @@ static_assert(sizeof(DecimalV2Value) == sizeof(__int128_t)); // similar to reinterpret_cast but won't break strict-aliasing rules. you can treat it as std::bit_cast with type checking template -constexpr PURE To binary_cast(const From& from) { +constexpr To binary_cast(const From& from) { constexpr bool from_u64_to_db = match_v; constexpr bool from_i64_to_db = match_v; constexpr bool from_db_to_i64 = match_v; diff --git a/be/src/core/column/column_array_view.h b/be/src/core/column/column_array_view.h new file mode 100644 index 00000000000000..cc74d6e3c7088d --- /dev/null +++ b/be/src/core/column/column_array_view.h @@ -0,0 +1,135 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include "core/column/column_array.h" +#include "core/column/column_execute_util.h" + +namespace doris { + +// ArrayDataView represents a read-only view of a single row's array data +// (a slice of ColumnArray's flat nested data). +// Used as the return type of ColumnArrayView::operator[]. +template +struct ArrayDataView { + using ElementType = typename ColumnElementView::ElementType; + + const ColumnElementView& data; + const NullMap& nested_null_map; + const size_t offset; + const size_t length; + + size_t size() const { return length; } + + const ElementType* get_data() const { + const ElementType* raw_data = data.get_data(); + return raw_data + offset; + } + + const UInt8* get_null_map_data() const { return nested_null_map.data() + offset; } + + // ColumnArray's data column is always Nullable, no need to check nullptr + bool is_null_at(size_t idx) const { return nested_null_map[offset + idx]; } + + ElementType value_at(size_t idx) const { return data.get_element(offset + idx); } +}; + +// ColumnArrayView provides a read-only view over a column of Array, +// handling Const / Nullable wrapping automatically. +// +// Supports index-based access: operator[](row) returns ArrayDataView, uses offsets[row-1] (sentinel) +template +struct ColumnArrayView { + const ColumnElementView element_data; + const ColumnArray::Offsets64& offsets; + const NullMap* outer_null_map; + const NullMap& nested_null_map; + const bool is_const; + const size_t count; + + static ColumnArrayView create(const ColumnPtr& column_ptr) { + // Step 1: unpack const + const auto& [unpacked, is_const] = unpack_if_const(column_ptr); + + // Step 2: unpack outer nullable + const NullMap* outer_null_map = nullptr; + const IColumn* array_raw = nullptr; + if (const auto* nullable = check_and_get_column(unpacked.get())) { + outer_null_map = &nullable->get_null_map_data(); + array_raw = nullable->get_nested_column_ptr().get(); + } else { + array_raw = unpacked.get(); + } + + // Step 3: get ColumnArray + const auto& array_column = assert_cast(*array_raw); + + // Step 4: unpack inner nullable (data column is always Nullable) + if (!array_column.get_data().is_nullable()) { + throw doris::Exception(ErrorCode::INTERNAL_ERROR, + "ColumnArray's data column is expected to be Nullable"); + } + + const auto& nested_nullable = assert_cast(array_column.get_data()); + const NullMap& nested_null_map = nested_nullable.get_null_map_data(); + const IColumn* data_column = nested_nullable.get_nested_column_ptr().get(); + + return ColumnArrayView {.element_data = ColumnElementView(*data_column), + .offsets = array_column.get_offsets(), + .outer_null_map = outer_null_map, + .nested_null_map = nested_null_map, + .is_const = is_const, + .count = column_ptr->size()}; + } + + size_t size() const { return count; } + + auto get_data() const { return element_data.get_data(); } + + const UInt8* get_null_map_data() const { return nested_null_map.data(); } + + size_t row_begin(size_t idx) const { + size_t actual = is_const ? 0 : idx; + return offsets[actual - 1]; + } + + size_t row_end(size_t idx) const { + size_t actual = is_const ? 0 : idx; + return offsets[actual]; + } + + bool is_null_at(size_t idx) const { + if (outer_null_map) { + return (*outer_null_map)[is_const ? 0 : idx]; + } + return false; + } + + // Index-based access: uses offsets[actual - 1] (PaddedPODArray sentinel guarantees [-1] is valid) + ArrayDataView operator[](size_t idx) const { + size_t actual = is_const ? 0 : idx; + size_t off = offsets[actual - 1]; + size_t len = offsets[actual] - off; + return ArrayDataView {.data = element_data, + .nested_null_map = nested_null_map, + .offset = off, + .length = len}; + } +}; + +} // namespace doris diff --git a/be/src/core/column/column_execute_util.h b/be/src/core/column/column_execute_util.h index d8f33782c48c1a..0b6513ba5b779b 100644 --- a/be/src/core/column/column_execute_util.h +++ b/be/src/core/column/column_execute_util.h @@ -39,6 +39,7 @@ struct ColumnElementView { using ElementType = typename ColumnType::value_type; const typename ColumnType::Container& data; ElementType get_element(size_t idx) const { return data[idx]; } + const ElementType* get_data() const { return data.data(); } ColumnElementView(const IColumn& column) : data(assert_cast(column).get_data()) {} diff --git a/be/src/core/column/column_nullable.h b/be/src/core/column/column_nullable.h index bc0a02cd95f02d..34ee0dffebc9e2 100644 --- a/be/src/core/column/column_nullable.h +++ b/be/src/core/column/column_nullable.h @@ -20,7 +20,6 @@ #pragma once -#include "common/compiler_util.h" // IWYU pragma: keep #include "common/status.h" #include "core/assert_cast.h" #include "core/column/column.h" @@ -100,9 +99,7 @@ class ColumnNullable final : public COWHelper { std::string get_name() const override { return "Nullable(" + _nested_column->get_name() + ")"; } MutableColumnPtr clone_resized(size_t size) const override; size_t size() const override { return get_null_map_column().size(); } - PURE bool is_null_at(size_t n) const override { - return get_null_map_column().get_data()[n] != 0; - } + bool is_null_at(size_t n) const override { return get_null_map_column().get_data()[n] != 0; } Field operator[](size_t n) const override; void get(size_t n, Field& res) const override; bool get_bool(size_t n) const override { diff --git a/be/src/core/data_type_serde/datelike_serde_common.hpp b/be/src/core/data_type_serde/datelike_serde_common.hpp index c58a90ed62b7b9..f9db889cc8ba3e 100644 --- a/be/src/core/data_type_serde/datelike_serde_common.hpp +++ b/be/src/core/data_type_serde/datelike_serde_common.hpp @@ -47,19 +47,19 @@ enum class DatelikeFastParseResult : uint8_t { DATE_TIME, }; -inline PURE bool is_fixed_two_digit_ascii(const char* ptr) { +inline bool is_fixed_two_digit_ascii(const char* ptr) { return static_cast(ptr[0] - '0') < 10 && static_cast(ptr[1] - '0') < 10; } -inline PURE bool is_fixed_four_digit_ascii(const char* ptr) { +inline bool is_fixed_four_digit_ascii(const char* ptr) { return is_fixed_two_digit_ascii(ptr) && is_fixed_two_digit_ascii(ptr + 2); } -inline PURE uint32_t parse_fixed_two_digit_ascii(const char* ptr) { +inline uint32_t parse_fixed_two_digit_ascii(const char* ptr) { return (ptr[0] - '0') * 10 + (ptr[1] - '0'); } -inline PURE uint32_t parse_fixed_four_digit_ascii(const char* ptr) { +inline uint32_t parse_fixed_four_digit_ascii(const char* ptr) { return parse_fixed_two_digit_ascii(ptr) * 100 + parse_fixed_two_digit_ascii(ptr + 2); } @@ -117,7 +117,7 @@ inline DatelikeFastParseResult try_parse_fixed_canonical_datelike_prefix(const c return DatelikeFastParseResult::DATE_TIME; } -inline PURE uint32_t complete_4digit_year(uint32_t year) { +inline uint32_t complete_4digit_year(uint32_t year) { if (year < 70) { return year + 2000; // 00-69 -> 2000-2069 } else { diff --git a/be/src/core/string_ref.h b/be/src/core/string_ref.h index fb775fbe9e3deb..ca67ba91c06912 100644 --- a/be/src/core/string_ref.h +++ b/be/src/core/string_ref.h @@ -147,8 +147,7 @@ inline bool memequalSSE2Wide(const char* p1, const char* p2, size_t size) { // - s1/n1: ptr/len for the first string // - s2/n2: ptr/len for the second string // - len: min(n1, n2) - this can be more cheaply passed in by the caller -PURE inline int64_t string_compare(const char* s1, int64_t n1, const char* s2, int64_t n2, - int64_t len) { +inline int64_t string_compare(const char* s1, int64_t n1, const char* s2, int64_t n2, int64_t len) { DCHECK_EQ(len, std::min(n1, n2)); #if defined(__SSE4_2__) || defined(__aarch64__) while (len >= sse_util::CHARS_PER_128_BIT_REGISTER) { diff --git a/be/src/exec/exchange/local_exchange_sink_operator.cpp b/be/src/exec/exchange/local_exchange_sink_operator.cpp index 0a11596cfeea9a..10f1d52831b5c5 100644 --- a/be/src/exec/exchange/local_exchange_sink_operator.cpp +++ b/be/src/exec/exchange/local_exchange_sink_operator.cpp @@ -141,7 +141,7 @@ std::string LocalExchangeSinkLocalState::debug_string(int indentation_level) con return fmt::to_string(debug_string_buffer); } -Status LocalExchangeSinkOperatorX::sink(RuntimeState* state, Block* in_block, bool eos) { +Status LocalExchangeSinkOperatorX::sink_impl(RuntimeState* state, Block* in_block, bool eos) { auto& local_state = get_local_state(state); SCOPED_TIMER(local_state.exec_time_counter()); COUNTER_UPDATE(local_state.rows_input_counter(), (int64_t)in_block->rows()); diff --git a/be/src/exec/exchange/local_exchange_sink_operator.h b/be/src/exec/exchange/local_exchange_sink_operator.h index 01b958645d32fa..0e6844cd1fba1a 100644 --- a/be/src/exec/exchange/local_exchange_sink_operator.h +++ b/be/src/exec/exchange/local_exchange_sink_operator.h @@ -103,7 +103,7 @@ class LocalExchangeSinkOperatorX final : public DataSinkOperatorXget_block( diff --git a/be/src/exec/exchange/local_exchange_source_operator.h b/be/src/exec/exchange/local_exchange_source_operator.h index 58252b24ec2c23..d99dd57a378dd0 100644 --- a/be/src/exec/exchange/local_exchange_source_operator.h +++ b/be/src/exec/exchange/local_exchange_source_operator.h @@ -78,7 +78,7 @@ class LocalExchangeSourceOperatorX final : public OperatorXrow_descriptor(); } const RowDescriptor& row_desc() const override { return _child->row_desc(); } - Status get_block(RuntimeState* state, Block* block, bool* eos) override; + Status get_block_impl(RuntimeState* state, Block* block, bool* eos) override; bool is_source() const override { return true; } diff --git a/be/src/exec/operator/aggregation_sink_operator.cpp b/be/src/exec/operator/aggregation_sink_operator.cpp index 268f32ced7b990..1422e024ecca5b 100644 --- a/be/src/exec/operator/aggregation_sink_operator.cpp +++ b/be/src/exec/operator/aggregation_sink_operator.cpp @@ -872,7 +872,7 @@ Status AggSinkOperatorX::_check_agg_fn_output() { return Status::OK(); } -Status AggSinkOperatorX::sink(doris::RuntimeState* state, Block* in_block, bool eos) { +Status AggSinkOperatorX::sink_impl(doris::RuntimeState* state, Block* in_block, bool eos) { auto& local_state = get_local_state(state); SCOPED_TIMER(local_state.exec_time_counter()); COUNTER_UPDATE(local_state.rows_input_counter(), (int64_t)in_block->rows()); diff --git a/be/src/exec/operator/aggregation_sink_operator.h b/be/src/exec/operator/aggregation_sink_operator.h index 9774d2b95e512a..fe0a4023cdaabe 100644 --- a/be/src/exec/operator/aggregation_sink_operator.h +++ b/be/src/exec/operator/aggregation_sink_operator.h @@ -151,7 +151,7 @@ class AggSinkOperatorX MOCK_REMOVE(final) : public DataSinkOperatorX { AggSourceOperatorX() = default; #endif - Status get_block(RuntimeState* state, Block* block, bool* eos) override; + Status get_block_impl(RuntimeState* state, Block* block, bool* eos) override; bool is_source() const override { return true; } diff --git a/be/src/exec/operator/analytic_sink_operator.cpp b/be/src/exec/operator/analytic_sink_operator.cpp index 863acc4a59b4a3..0360d6168c324e 100644 --- a/be/src/exec/operator/analytic_sink_operator.cpp +++ b/be/src/exec/operator/analytic_sink_operator.cpp @@ -746,7 +746,7 @@ Status AnalyticSinkOperatorX::prepare(RuntimeState* state) { return Status::OK(); } -Status AnalyticSinkOperatorX::sink(doris::RuntimeState* state, Block* input_block, bool eos) { +Status AnalyticSinkOperatorX::sink_impl(doris::RuntimeState* state, Block* input_block, bool eos) { auto& local_state = get_local_state(state); SCOPED_TIMER(local_state.exec_time_counter()); COUNTER_UPDATE(local_state.rows_input_counter(), (int64_t)input_block->rows()); diff --git a/be/src/exec/operator/analytic_sink_operator.h b/be/src/exec/operator/analytic_sink_operator.h index a54088025f12f2..a7466761e18ee2 100644 --- a/be/src/exec/operator/analytic_sink_operator.h +++ b/be/src/exec/operator/analytic_sink_operator.h @@ -210,7 +210,7 @@ class AnalyticSinkOperatorX final : public DataSinkOperatorX(pool, tnode, operator_id, descs) {} -Status AnalyticSourceOperatorX::get_block(RuntimeState* state, Block* output_block, bool* eos) { +Status AnalyticSourceOperatorX::get_block_impl(RuntimeState* state, Block* output_block, + bool* eos) { RETURN_IF_CANCELLED(state); auto& local_state = get_local_state(state); SCOPED_TIMER(local_state.exec_time_counter()); diff --git a/be/src/exec/operator/analytic_source_operator.h b/be/src/exec/operator/analytic_source_operator.h index e4d9cb2c2b69d0..c5591e81cfee67 100644 --- a/be/src/exec/operator/analytic_source_operator.h +++ b/be/src/exec/operator/analytic_source_operator.h @@ -48,7 +48,7 @@ class AnalyticSourceOperatorX final : public OperatorX { #ifdef BE_TEST AnalyticSourceOperatorX() = default; #endif - Status get_block(RuntimeState* state, Block* block, bool* eos) override; + Status get_block_impl(RuntimeState* state, Block* block, bool* eos) override; bool is_source() const override { return true; } diff --git a/be/src/exec/operator/blackhole_sink_operator.cpp b/be/src/exec/operator/blackhole_sink_operator.cpp index 0745c3285ccea0..e8daabec852261 100644 --- a/be/src/exec/operator/blackhole_sink_operator.cpp +++ b/be/src/exec/operator/blackhole_sink_operator.cpp @@ -44,7 +44,7 @@ Status BlackholeSinkOperatorX::init(const TDataSink& tsink) { return Status::OK(); } -Status BlackholeSinkOperatorX::sink(RuntimeState* state, Block* block, bool eos) { +Status BlackholeSinkOperatorX::sink_impl(RuntimeState* state, Block* block, bool eos) { auto& local_state = get_local_state(state); SCOPED_TIMER(local_state.exec_time_counter()); COUNTER_UPDATE(local_state.rows_input_counter(), (int64_t)block->rows()); diff --git a/be/src/exec/operator/blackhole_sink_operator.h b/be/src/exec/operator/blackhole_sink_operator.h index 23a2e9953063b0..8bb32e3d4f7d22 100644 --- a/be/src/exec/operator/blackhole_sink_operator.h +++ b/be/src/exec/operator/blackhole_sink_operator.h @@ -68,7 +68,7 @@ class BlackholeSinkOperatorX final : public DataSinkOperatorXrows()); diff --git a/be/src/exec/operator/cache_sink_operator.h b/be/src/exec/operator/cache_sink_operator.h index 084cc5249137c3..77f7a3f2ae35ce 100644 --- a/be/src/exec/operator/cache_sink_operator.h +++ b/be/src/exec/operator/cache_sink_operator.h @@ -58,7 +58,7 @@ class CacheSinkOperatorX final : public DataSinkOperatorX { DataSinkOperatorX::_name); } - Status sink(RuntimeState* state, Block* in_block, bool eos) override; + Status sink_impl(RuntimeState* state, Block* in_block, bool eos) override; std::shared_ptr create_shared_state() const override { std::shared_ptr ss = std::make_shared(); diff --git a/be/src/exec/operator/cache_source_operator.cpp b/be/src/exec/operator/cache_source_operator.cpp index 12e95baa11ba05..3bb12c47f9ea33 100644 --- a/be/src/exec/operator/cache_source_operator.cpp +++ b/be/src/exec/operator/cache_source_operator.cpp @@ -119,7 +119,7 @@ std::string CacheSourceLocalState::debug_string(int indentation_level) const { return fmt::to_string(debug_string_buffer); } -Status CacheSourceOperatorX::get_block(RuntimeState* state, Block* block, bool* eos) { +Status CacheSourceOperatorX::get_block_impl(RuntimeState* state, Block* block, bool* eos) { auto& local_state = get_local_state(state); SCOPED_TIMER(local_state.exec_time_counter()); diff --git a/be/src/exec/operator/cache_source_operator.h b/be/src/exec/operator/cache_source_operator.h index dca13774cf405b..dfbb9468882753 100644 --- a/be/src/exec/operator/cache_source_operator.h +++ b/be/src/exec/operator/cache_source_operator.h @@ -79,7 +79,7 @@ class CacheSourceOperatorX final : public OperatorX { #endif ~CacheSourceOperatorX() override = default; - Status get_block(RuntimeState* state, Block* block, bool* eos) override; + Status get_block_impl(RuntimeState* state, Block* block, bool* eos) override; bool is_source() const override { return true; } diff --git a/be/src/exec/operator/datagen_operator.cpp b/be/src/exec/operator/datagen_operator.cpp index 457675666194ee..c4d4e969fe8aee 100644 --- a/be/src/exec/operator/datagen_operator.cpp +++ b/be/src/exec/operator/datagen_operator.cpp @@ -61,7 +61,7 @@ Status DataGenSourceOperatorX::prepare(RuntimeState* state) { return Status::OK(); } -Status DataGenSourceOperatorX::get_block(RuntimeState* state, Block* block, bool* eos) { +Status DataGenSourceOperatorX::get_block_impl(RuntimeState* state, Block* block, bool* eos) { if (state == nullptr || block == nullptr) { return Status::InternalError("input is NULL pointer"); } diff --git a/be/src/exec/operator/datagen_operator.h b/be/src/exec/operator/datagen_operator.h index 7950725fde09b1..d8cc0fc4508d2d 100644 --- a/be/src/exec/operator/datagen_operator.h +++ b/be/src/exec/operator/datagen_operator.h @@ -59,7 +59,7 @@ class DataGenSourceOperatorX final : public OperatorX { Status init(const TPlanNode& tnode, RuntimeState* state) override; Status prepare(RuntimeState* state) override; - Status get_block(RuntimeState* state, Block* block, bool* eos) override; + Status get_block_impl(RuntimeState* state, Block* block, bool* eos) override; [[nodiscard]] bool is_source() const override { return true; } diff --git a/be/src/exec/operator/dict_sink_operator.cpp b/be/src/exec/operator/dict_sink_operator.cpp index c529c4ced25598..6feac5784fb3c9 100644 --- a/be/src/exec/operator/dict_sink_operator.cpp +++ b/be/src/exec/operator/dict_sink_operator.cpp @@ -159,7 +159,7 @@ Status DictSinkOperatorX::prepare(RuntimeState* state) { return Status::OK(); } -Status DictSinkOperatorX::sink(RuntimeState* state, Block* in_block, bool eos) { +Status DictSinkOperatorX::sink_impl(RuntimeState* state, Block* in_block, bool eos) { auto& local_state = get_local_state(state); SCOPED_TIMER(local_state.exec_time_counter()); COUNTER_UPDATE(local_state.rows_input_counter(), (int64_t)in_block->rows()); diff --git a/be/src/exec/operator/dict_sink_operator.h b/be/src/exec/operator/dict_sink_operator.h index 252136b33085c1..ddb49e183c95c9 100644 --- a/be/src/exec/operator/dict_sink_operator.h +++ b/be/src/exec/operator/dict_sink_operator.h @@ -50,7 +50,7 @@ class DictSinkOperatorX final : public DataSinkOperatorX { const std::vector& dict_input_expr, const TDictionarySink& dict_sink); Status prepare(RuntimeState* state) override; - Status sink(RuntimeState* state, Block* in_block, bool eos) override; + Status sink_impl(RuntimeState* state, Block* in_block, bool eos) override; private: friend class DictSinkLocalState; diff --git a/be/src/exec/operator/empty_set_operator.cpp b/be/src/exec/operator/empty_set_operator.cpp index 0dd0b66bce340a..1b4a80afe0121a 100644 --- a/be/src/exec/operator/empty_set_operator.cpp +++ b/be/src/exec/operator/empty_set_operator.cpp @@ -24,7 +24,7 @@ namespace doris { #include "common/compile_check_begin.h" -Status EmptySetSourceOperatorX::get_block(RuntimeState* state, Block* block, bool* eos) { +Status EmptySetSourceOperatorX::get_block_impl(RuntimeState* state, Block* block, bool* eos) { *eos = true; return Status::OK(); } diff --git a/be/src/exec/operator/empty_set_operator.h b/be/src/exec/operator/empty_set_operator.h index 5c8f70071c0edd..edc827435d5f2c 100644 --- a/be/src/exec/operator/empty_set_operator.h +++ b/be/src/exec/operator/empty_set_operator.h @@ -43,7 +43,7 @@ class EmptySetSourceOperatorX final : public OperatorX { EmptySetSourceOperatorX() = default; #endif - Status get_block(RuntimeState* state, Block* block, bool* eos) override; + Status get_block_impl(RuntimeState* state, Block* block, bool* eos) override; [[nodiscard]] bool is_source() const override { return true; } }; diff --git a/be/src/exec/operator/exchange_sink_operator.cpp b/be/src/exec/operator/exchange_sink_operator.cpp index beec1ed0313f3d..6a64dc212a5d0b 100644 --- a/be/src/exec/operator/exchange_sink_operator.cpp +++ b/be/src/exec/operator/exchange_sink_operator.cpp @@ -386,7 +386,7 @@ Status ExchangeSinkOperatorX::_handle_eof_channel(RuntimeState* state, ChannelPt return channel->close(state); } -Status ExchangeSinkOperatorX::sink(RuntimeState* state, Block* block, bool eos) { +Status ExchangeSinkOperatorX::sink_impl(RuntimeState* state, Block* block, bool eos) { auto& local_state = get_local_state(state); COUNTER_UPDATE(local_state.rows_input_counter(), (int64_t)block->rows()); // for auto-partition, may decease when do_partitioning diff --git a/be/src/exec/operator/exchange_sink_operator.h b/be/src/exec/operator/exchange_sink_operator.h index 369f03ec6bfcc9..c850c13cac4b6c 100644 --- a/be/src/exec/operator/exchange_sink_operator.h +++ b/be/src/exec/operator/exchange_sink_operator.h @@ -198,7 +198,7 @@ class ExchangeSinkOperatorX MOCK_REMOVE(final) : public DataSinkOperatorX { Status reset(RuntimeState* state) override; - Status get_block(RuntimeState* state, Block* block, bool* eos) override; + Status get_block_impl(RuntimeState* state, Block* block, bool* eos) override; std::string debug_string(int indentation_level = 0) const override; diff --git a/be/src/exec/operator/group_commit_block_sink_operator.cpp b/be/src/exec/operator/group_commit_block_sink_operator.cpp index fd44a9e74a627d..ac0c7328772b3b 100644 --- a/be/src/exec/operator/group_commit_block_sink_operator.cpp +++ b/be/src/exec/operator/group_commit_block_sink_operator.cpp @@ -300,7 +300,7 @@ Status GroupCommitBlockSinkOperatorX::prepare(RuntimeState* state) { return VExpr::open(_output_vexpr_ctxs, state); } -Status GroupCommitBlockSinkOperatorX::sink(RuntimeState* state, Block* input_block, bool eos) { +Status GroupCommitBlockSinkOperatorX::sink_impl(RuntimeState* state, Block* input_block, bool eos) { auto& local_state = get_local_state(state); SCOPED_TIMER(local_state.exec_time_counter()); COUNTER_UPDATE(local_state.rows_input_counter(), (int64_t)input_block->rows()); diff --git a/be/src/exec/operator/group_commit_block_sink_operator.h b/be/src/exec/operator/group_commit_block_sink_operator.h index 406c83294c5c11..1e335bc5261358 100644 --- a/be/src/exec/operator/group_commit_block_sink_operator.h +++ b/be/src/exec/operator/group_commit_block_sink_operator.h @@ -107,7 +107,7 @@ class GroupCommitBlockSinkOperatorX final Status prepare(RuntimeState* state) override; - Status sink(RuntimeState* state, Block* block, bool eos) override; + Status sink_impl(RuntimeState* state, Block* block, bool eos) override; private: friend class GroupCommitBlockSinkLocalState; diff --git a/be/src/exec/operator/group_commit_scan_operator.cpp b/be/src/exec/operator/group_commit_scan_operator.cpp index 26c2cf4fe8202b..7d11e1c33d582c 100644 --- a/be/src/exec/operator/group_commit_scan_operator.cpp +++ b/be/src/exec/operator/group_commit_scan_operator.cpp @@ -29,7 +29,7 @@ GroupCommitOperatorX::GroupCommitOperatorX(ObjectPool* pool, const TPlanNode& tn _output_tuple_id = tnode.file_scan_node.tuple_id; } -Status GroupCommitOperatorX::get_block(RuntimeState* state, Block* block, bool* eos) { +Status GroupCommitOperatorX::get_block_impl(RuntimeState* state, Block* block, bool* eos) { auto& local_state = get_local_state(state); SCOPED_TIMER(local_state.exec_time_counter()); bool find_node = false; diff --git a/be/src/exec/operator/group_commit_scan_operator.h b/be/src/exec/operator/group_commit_scan_operator.h index ebaf26a2561b82..aab43aafe8bd29 100644 --- a/be/src/exec/operator/group_commit_scan_operator.h +++ b/be/src/exec/operator/group_commit_scan_operator.h @@ -55,7 +55,7 @@ class GroupCommitOperatorX final : public ScanOperatorX { GroupCommitOperatorX(ObjectPool* pool, const TPlanNode& tnode, int operator_id, const DescriptorTbl& descs, int parallel_tasks); - Status get_block(RuntimeState* state, Block* block, bool* eos) override; + Status get_block_impl(RuntimeState* state, Block* block, bool* eos) override; protected: friend class GroupCommitLocalState; diff --git a/be/src/exec/operator/hashjoin_build_sink.cpp b/be/src/exec/operator/hashjoin_build_sink.cpp index c29199db35d7c2..8c6f78a032821d 100644 --- a/be/src/exec/operator/hashjoin_build_sink.cpp +++ b/be/src/exec/operator/hashjoin_build_sink.cpp @@ -821,7 +821,7 @@ Status HashJoinBuildSinkOperatorX::prepare(RuntimeState* state) { return VExpr::open(_build_expr_ctxs, state); } -Status HashJoinBuildSinkOperatorX::sink(RuntimeState* state, Block* in_block, bool eos) { +Status HashJoinBuildSinkOperatorX::sink_impl(RuntimeState* state, Block* in_block, bool eos) { auto& local_state = get_local_state(state); SCOPED_TIMER(local_state.exec_time_counter()); COUNTER_UPDATE(local_state.rows_input_counter(), (int64_t)in_block->rows()); diff --git a/be/src/exec/operator/hashjoin_build_sink.h b/be/src/exec/operator/hashjoin_build_sink.h index 9ece55d5fcbfaf..af2155bab1c646 100644 --- a/be/src/exec/operator/hashjoin_build_sink.h +++ b/be/src/exec/operator/hashjoin_build_sink.h @@ -119,7 +119,7 @@ class HashJoinBuildSinkOperatorX MOCK_REMOVE(final) Status prepare(RuntimeState* state) override; - Status sink(RuntimeState* state, Block* in_block, bool eos) override; + Status sink_impl(RuntimeState* state, Block* in_block, bool eos) override; size_t get_reserve_mem_size(RuntimeState* state, bool eos) override; diff --git a/be/src/exec/operator/hive_table_sink_operator.h b/be/src/exec/operator/hive_table_sink_operator.h index ff4a681f425613..34a1e8094b1a86 100644 --- a/be/src/exec/operator/hive_table_sink_operator.h +++ b/be/src/exec/operator/hive_table_sink_operator.h @@ -66,7 +66,7 @@ class HiveTableSinkOperatorX final : public DataSinkOperatorXrows()); diff --git a/be/src/exec/operator/iceberg_delete_sink_operator.h b/be/src/exec/operator/iceberg_delete_sink_operator.h index f9cbbd60e311b2..651983b83c9c1b 100644 --- a/be/src/exec/operator/iceberg_delete_sink_operator.h +++ b/be/src/exec/operator/iceberg_delete_sink_operator.h @@ -65,7 +65,7 @@ class IcebergDeleteSinkOperatorX final : public DataSinkOperatorXrows()); diff --git a/be/src/exec/operator/iceberg_merge_sink_operator.h b/be/src/exec/operator/iceberg_merge_sink_operator.h index 362d7ad7c45993..4b1bc706ff0328 100644 --- a/be/src/exec/operator/iceberg_merge_sink_operator.h +++ b/be/src/exec/operator/iceberg_merge_sink_operator.h @@ -64,7 +64,7 @@ class IcebergMergeSinkOperatorX final : public DataSinkOperatorXrows()); diff --git a/be/src/exec/operator/iceberg_table_sink_operator.h b/be/src/exec/operator/iceberg_table_sink_operator.h index 5d3867323bd2c6..1d5cfc9c25fc76 100644 --- a/be/src/exec/operator/iceberg_table_sink_operator.h +++ b/be/src/exec/operator/iceberg_table_sink_operator.h @@ -65,7 +65,7 @@ class IcebergTableSinkOperatorX final : public DataSinkOperatorXrows()); diff --git a/be/src/exec/operator/jdbc_table_sink_operator.cpp b/be/src/exec/operator/jdbc_table_sink_operator.cpp index 85761effac1ac0..1b7ac4062c3319 100644 --- a/be/src/exec/operator/jdbc_table_sink_operator.cpp +++ b/be/src/exec/operator/jdbc_table_sink_operator.cpp @@ -47,7 +47,7 @@ Status JdbcTableSinkOperatorX::prepare(RuntimeState* state) { return Status::OK(); } -Status JdbcTableSinkOperatorX::sink(RuntimeState* state, Block* block, bool eos) { +Status JdbcTableSinkOperatorX::sink_impl(RuntimeState* state, Block* block, bool eos) { auto& local_state = get_local_state(state); SCOPED_TIMER(local_state.exec_time_counter()); COUNTER_UPDATE(local_state.rows_input_counter(), (int64_t)block->rows()); diff --git a/be/src/exec/operator/jdbc_table_sink_operator.h b/be/src/exec/operator/jdbc_table_sink_operator.h index 7481549ccda2f0..95d0f470c15169 100644 --- a/be/src/exec/operator/jdbc_table_sink_operator.h +++ b/be/src/exec/operator/jdbc_table_sink_operator.h @@ -46,7 +46,7 @@ class JdbcTableSinkOperatorX final : public DataSinkOperatorXrows()); diff --git a/be/src/exec/operator/memory_scratch_sink_operator.cpp b/be/src/exec/operator/memory_scratch_sink_operator.cpp index 917d54da763a12..3492b9774ee17d 100644 --- a/be/src/exec/operator/memory_scratch_sink_operator.cpp +++ b/be/src/exec/operator/memory_scratch_sink_operator.cpp @@ -87,7 +87,7 @@ Status MemoryScratchSinkOperatorX::prepare(RuntimeState* state) { return Status::OK(); } -Status MemoryScratchSinkOperatorX::sink(RuntimeState* state, Block* input_block, bool eos) { +Status MemoryScratchSinkOperatorX::sink_impl(RuntimeState* state, Block* input_block, bool eos) { auto& local_state = get_local_state(state); SCOPED_TIMER(local_state.exec_time_counter()); if (nullptr == input_block || 0 == input_block->rows()) { diff --git a/be/src/exec/operator/memory_scratch_sink_operator.h b/be/src/exec/operator/memory_scratch_sink_operator.h index 03ec59647a917d..f5ef12721ec9a2 100644 --- a/be/src/exec/operator/memory_scratch_sink_operator.h +++ b/be/src/exec/operator/memory_scratch_sink_operator.h @@ -58,7 +58,7 @@ class MemoryScratchSinkOperatorX final : public DataSinkOperatorX { ENABLE_FACTORY_CREATOR(MockOperatorX); MockOperatorX() = default; - Status get_block(RuntimeState* state, Block* block, bool* eos) override { + Status get_block_impl(RuntimeState* state, Block* block, bool* eos) override { if (_outout_blocks.empty()) { *eos = true; return Status::OK(); diff --git a/be/src/exec/operator/mock_scan_operator.h b/be/src/exec/operator/mock_scan_operator.h index e6a570a2f8548c..ad3722c1de6ae0 100644 --- a/be/src/exec/operator/mock_scan_operator.h +++ b/be/src/exec/operator/mock_scan_operator.h @@ -84,7 +84,7 @@ class MockScanOperatorX final : public ScanOperatorX { _output_blocks.push_back(std::move(block)); } - Status get_block(RuntimeState* state, Block* block, bool* eos) override { + Status get_block_impl(RuntimeState* state, Block* block, bool* eos) override { if (_output_blocks.empty()) { *eos = true; return Status::OK(); diff --git a/be/src/exec/operator/multi_cast_data_stream_sink.cpp b/be/src/exec/operator/multi_cast_data_stream_sink.cpp index 437416b1b23a67..9d34e0627da7e8 100644 --- a/be/src/exec/operator/multi_cast_data_stream_sink.cpp +++ b/be/src/exec/operator/multi_cast_data_stream_sink.cpp @@ -63,7 +63,7 @@ std::string MultiCastDataStreamSinkLocalState::debug_string(int indentation_leve return fmt::to_string(debug_string_buffer); } -Status MultiCastDataStreamSinkOperatorX::sink(RuntimeState* state, Block* in_block, bool eos) { +Status MultiCastDataStreamSinkOperatorX::sink_impl(RuntimeState* state, Block* in_block, bool eos) { auto& local_state = get_local_state(state); SCOPED_TIMER(local_state.exec_time_counter()); if (in_block->rows() > 0 || eos) { diff --git a/be/src/exec/operator/multi_cast_data_stream_sink.h b/be/src/exec/operator/multi_cast_data_stream_sink.h index 602371ce02c3ce..239003833a6196 100644 --- a/be/src/exec/operator/multi_cast_data_stream_sink.h +++ b/be/src/exec/operator/multi_cast_data_stream_sink.h @@ -57,7 +57,7 @@ class MultiCastDataStreamSinkOperatorX final _num_dests(sources.size()) {} ~MultiCastDataStreamSinkOperatorX() override = default; - Status sink(RuntimeState* state, Block* in_block, bool eos) override; + Status sink_impl(RuntimeState* state, Block* in_block, bool eos) override; std::shared_ptr create_shared_state() const override; diff --git a/be/src/exec/operator/multi_cast_data_stream_source.cpp b/be/src/exec/operator/multi_cast_data_stream_source.cpp index 7f19535859fab3..fea748baea138d 100644 --- a/be/src/exec/operator/multi_cast_data_stream_source.cpp +++ b/be/src/exec/operator/multi_cast_data_stream_source.cpp @@ -80,8 +80,8 @@ Status MultiCastDataStreamSourceLocalState::close(RuntimeState* state) { return Base::close(state); } -Status MultiCastDataStreamerSourceOperatorX::get_block(RuntimeState* state, Block* block, - bool* eos) { +Status MultiCastDataStreamerSourceOperatorX::get_block_impl(RuntimeState* state, Block* block, + bool* eos) { //auto& local_state = get_local_state(state); auto& local_state = get_local_state(state); SCOPED_TIMER(local_state.exec_time_counter()); diff --git a/be/src/exec/operator/multi_cast_data_stream_source.h b/be/src/exec/operator/multi_cast_data_stream_source.h index 3673269e92f4a0..5727419c7c589d 100644 --- a/be/src/exec/operator/multi_cast_data_stream_source.h +++ b/be/src/exec/operator/multi_cast_data_stream_source.h @@ -105,7 +105,7 @@ class MultiCastDataStreamerSourceOperatorX final return Status::OK(); } - Status get_block(RuntimeState* state, Block* block, bool* eos) override; + Status get_block_impl(RuntimeState* state, Block* block, bool* eos) override; bool is_source() const override { return true; } diff --git a/be/src/exec/operator/nested_loop_join_build_operator.cpp b/be/src/exec/operator/nested_loop_join_build_operator.cpp index 91a0debda69356..350e0664046930 100644 --- a/be/src/exec/operator/nested_loop_join_build_operator.cpp +++ b/be/src/exec/operator/nested_loop_join_build_operator.cpp @@ -95,7 +95,8 @@ Status NestedLoopJoinBuildSinkOperatorX::prepare(RuntimeState* state) { return VExpr::open(_filter_src_expr_ctxs, state); } -Status NestedLoopJoinBuildSinkOperatorX::sink(doris::RuntimeState* state, Block* block, bool eos) { +Status NestedLoopJoinBuildSinkOperatorX::sink_impl(doris::RuntimeState* state, Block* block, + bool eos) { auto& local_state = get_local_state(state); SCOPED_TIMER(local_state.exec_time_counter()); COUNTER_UPDATE(local_state.rows_input_counter(), (int64_t)block->rows()); diff --git a/be/src/exec/operator/nested_loop_join_build_operator.h b/be/src/exec/operator/nested_loop_join_build_operator.h index 04fbd894bb471d..a8fc817380274a 100644 --- a/be/src/exec/operator/nested_loop_join_build_operator.h +++ b/be/src/exec/operator/nested_loop_join_build_operator.h @@ -65,7 +65,7 @@ class NestedLoopJoinBuildSinkOperatorX final Status prepare(RuntimeState* state) override; - Status sink(RuntimeState* state, Block* in_block, bool eos) override; + Status sink_impl(RuntimeState* state, Block* in_block, bool eos) override; DataDistribution required_data_distribution(RuntimeState* /*state*/) const override { if (_join_op == TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN) { diff --git a/be/src/exec/operator/olap_table_sink_operator.h b/be/src/exec/operator/olap_table_sink_operator.h index fdb3756bc023c8..3ef0a51084b335 100644 --- a/be/src/exec/operator/olap_table_sink_operator.h +++ b/be/src/exec/operator/olap_table_sink_operator.h @@ -58,7 +58,7 @@ class OlapTableSinkOperatorX final : public DataSinkOperatorXrows()); diff --git a/be/src/exec/operator/olap_table_sink_v2_operator.h b/be/src/exec/operator/olap_table_sink_v2_operator.h index d97ea631a08429..8c72a4051429de 100644 --- a/be/src/exec/operator/olap_table_sink_v2_operator.h +++ b/be/src/exec/operator/olap_table_sink_v2_operator.h @@ -59,7 +59,7 @@ class OlapTableSinkV2OperatorX final : public DataSinkOperatorXrows()); diff --git a/be/src/exec/operator/operator.cpp b/be/src/exec/operator/operator.cpp index 1de468d82be571..34fa48dd06b71f 100644 --- a/be/src/exec/operator/operator.cpp +++ b/be/src/exec/operator/operator.cpp @@ -695,13 +695,15 @@ Status PipelineXSinkLocalState::close(RuntimeState* state, Status e } template -Status StreamingOperatorX::get_block(RuntimeState* state, Block* block, bool* eos) { +Status StreamingOperatorX::get_block_impl(RuntimeState* state, Block* block, + bool* eos) { RETURN_IF_ERROR(OperatorX::_child->get_block_after_projects(state, block, eos)); return pull(state, block, eos); } template -Status StatefulOperatorX::get_block(RuntimeState* state, Block* block, bool* eos) { +Status StatefulOperatorX::get_block_impl(RuntimeState* state, Block* block, + bool* eos) { auto& local_state = get_local_state(state); if (need_more_input_data(state)) { local_state._child_block->clear_column_data( diff --git a/be/src/exec/operator/operator.h b/be/src/exec/operator/operator.h index 25ae1477f8abff..e8ee719546717b 100644 --- a/be/src/exec/operator/operator.h +++ b/be/src/exec/operator/operator.h @@ -633,7 +633,12 @@ class DataSinkOperatorXBase : public OperatorBase { return result.value()->is_finished(); } - [[nodiscard]] virtual Status sink(RuntimeState* state, Block* block, bool eos) = 0; + [[nodiscard]] Status sink(RuntimeState* state, Block* block, bool eos) { + RETURN_IF_ERROR(block->check_type_and_column()); + return sink_impl(state, block, eos); + } + + [[nodiscard]] virtual Status sink_impl(RuntimeState* state, Block* block, bool eos) = 0; [[nodiscard]] virtual Status setup_local_state(RuntimeState* state, LocalSinkStateInfo& info) = 0; @@ -874,7 +879,13 @@ class OperatorXBase : public OperatorBase { Status prepare(RuntimeState* state) override; Status terminate(RuntimeState* state) override; - [[nodiscard]] virtual Status get_block(RuntimeState* state, Block* block, bool* eos) = 0; + [[nodiscard]] Status get_block(RuntimeState* state, Block* block, bool* eos) { + RETURN_IF_ERROR(get_block_impl(state, block, eos)); + RETURN_IF_ERROR(block->check_type_and_column()); + return Status::OK(); + } + + [[nodiscard]] virtual Status get_block_impl(RuntimeState* state, Block* block, bool* eos) = 0; Status close(RuntimeState* state) override; @@ -1067,7 +1078,7 @@ class StreamingOperatorX : public OperatorX { virtual ~StreamingOperatorX() = default; - Status get_block(RuntimeState* state, Block* block, bool* eos) override; + Status get_block_impl(RuntimeState* state, Block* block, bool* eos) override; virtual Status pull(RuntimeState* state, Block* block, bool* eos) = 0; }; @@ -1093,7 +1104,7 @@ class StatefulOperatorX : public OperatorX { using OperatorX::get_local_state; - [[nodiscard]] Status get_block(RuntimeState* state, Block* block, bool* eos) override; + [[nodiscard]] Status get_block_impl(RuntimeState* state, Block* block, bool* eos) override; [[nodiscard]] virtual Status pull(RuntimeState* state, Block* block, bool* eos) const = 0; [[nodiscard]] virtual Status push(RuntimeState* state, Block* input_block, bool eos) const = 0; @@ -1167,7 +1178,7 @@ class DummyOperator final : public OperatorX { [[nodiscard]] bool is_source() const override { return true; } - Status get_block(RuntimeState* state, Block* block, bool* eos) override { + Status get_block_impl(RuntimeState* state, Block* block, bool* eos) override { *eos = _eos; return Status::OK(); } @@ -1222,7 +1233,7 @@ class DummySinkOperatorX final : public DataSinkOperatorX { public: DummySinkOperatorX(int op_id, int node_id, int dest_id) : DataSinkOperatorX(op_id, node_id, dest_id) {} - Status sink(RuntimeState* state, Block* in_block, bool eos) override { + Status sink_impl(RuntimeState* state, Block* in_block, bool eos) override { return _return_eof ? Status::Error("source have closed") : Status::OK(); } diff --git a/be/src/exec/operator/partition_sort_sink_operator.cpp b/be/src/exec/operator/partition_sort_sink_operator.cpp index 66ed84d021e18b..7eb6be95aab4d3 100644 --- a/be/src/exec/operator/partition_sort_sink_operator.cpp +++ b/be/src/exec/operator/partition_sort_sink_operator.cpp @@ -112,7 +112,7 @@ Status PartitionSortSinkOperatorX::prepare(RuntimeState* state) { return Status::OK(); } -Status PartitionSortSinkOperatorX::sink(RuntimeState* state, Block* input_block, bool eos) { +Status PartitionSortSinkOperatorX::sink_impl(RuntimeState* state, Block* input_block, bool eos) { auto& local_state = get_local_state(state); auto current_rows = input_block->rows(); SCOPED_TIMER(local_state.exec_time_counter()); diff --git a/be/src/exec/operator/partition_sort_sink_operator.h b/be/src/exec/operator/partition_sort_sink_operator.h index 03a81941259432..5dce3e12653791 100644 --- a/be/src/exec/operator/partition_sort_sink_operator.h +++ b/be/src/exec/operator/partition_sort_sink_operator.h @@ -93,7 +93,7 @@ class PartitionSortSinkOperatorX final : public DataSinkOperatorXprepare(state); } -Status PartitionedAggSinkOperatorX::sink(doris::RuntimeState* state, Block* in_block, bool eos) { +Status PartitionedAggSinkOperatorX::sink_impl(doris::RuntimeState* state, Block* in_block, + bool eos) { auto& local_state = get_local_state(state); SCOPED_TIMER(local_state.exec_time_counter()); COUNTER_UPDATE(local_state.rows_input_counter(), (int64_t)in_block->rows()); diff --git a/be/src/exec/operator/partitioned_aggregation_sink_operator.h b/be/src/exec/operator/partitioned_aggregation_sink_operator.h index 0ca4817eb9f906..58b461a5b995c0 100644 --- a/be/src/exec/operator/partitioned_aggregation_sink_operator.h +++ b/be/src/exec/operator/partitioned_aggregation_sink_operator.h @@ -115,7 +115,7 @@ class PartitionedAggSinkOperatorX : public DataSinkOperatorX Status close(RuntimeState* state) override; - Status get_block(RuntimeState* state, Block* block, bool* eos) override; + Status get_block_impl(RuntimeState* state, Block* block, bool* eos) override; bool is_source() const override { return true; } diff --git a/be/src/exec/operator/partitioned_hash_join_probe_operator.cpp b/be/src/exec/operator/partitioned_hash_join_probe_operator.cpp index 6cb9ce5c1016a9..1a28eace53582e 100644 --- a/be/src/exec/operator/partitioned_hash_join_probe_operator.cpp +++ b/be/src/exec/operator/partitioned_hash_join_probe_operator.cpp @@ -1018,7 +1018,8 @@ Status PartitionedHashJoinProbeOperatorX::revoke_memory(RuntimeState* state) { return local_state.revoke_build_data(state); } -Status PartitionedHashJoinProbeOperatorX::get_block(RuntimeState* state, Block* block, bool* eos) { +Status PartitionedHashJoinProbeOperatorX::get_block_impl(RuntimeState* state, Block* block, + bool* eos) { *eos = false; auto& local_state = get_local_state(state); const bool is_spilled = local_state._shared_state->_is_spilled; diff --git a/be/src/exec/operator/partitioned_hash_join_probe_operator.h b/be/src/exec/operator/partitioned_hash_join_probe_operator.h index 2a53458e12983e..76721eb584ec3a 100644 --- a/be/src/exec/operator/partitioned_hash_join_probe_operator.h +++ b/be/src/exec/operator/partitioned_hash_join_probe_operator.h @@ -223,7 +223,7 @@ class PartitionedHashJoinProbeOperatorX final Status init(const TPlanNode& tnode, RuntimeState* state) override; Status prepare(RuntimeState* state) override; - [[nodiscard]] Status get_block(RuntimeState* state, Block* block, bool* eos) override; + [[nodiscard]] Status get_block_impl(RuntimeState* state, Block* block, bool* eos) override; Status push(RuntimeState* state, Block* input_block, bool eos) const override; Status pull(doris::RuntimeState* state, Block* output_block, bool* eos) const override; diff --git a/be/src/exec/operator/partitioned_hash_join_sink_operator.cpp b/be/src/exec/operator/partitioned_hash_join_sink_operator.cpp index b481c5a6b5b5a9..4857015c37ee5c 100644 --- a/be/src/exec/operator/partitioned_hash_join_sink_operator.cpp +++ b/be/src/exec/operator/partitioned_hash_join_sink_operator.cpp @@ -515,7 +515,7 @@ void PartitionedHashJoinSinkLocalState::update_profile_from_inner() { #undef UPDATE_COUNTER_FROM_INNER -Status PartitionedHashJoinSinkOperatorX::sink(RuntimeState* state, Block* in_block, bool eos) { +Status PartitionedHashJoinSinkOperatorX::sink_impl(RuntimeState* state, Block* in_block, bool eos) { auto& local_state = get_local_state(state); SCOPED_TIMER(local_state.exec_time_counter()); const auto rows = in_block->rows(); diff --git a/be/src/exec/operator/partitioned_hash_join_sink_operator.h b/be/src/exec/operator/partitioned_hash_join_sink_operator.h index c4ffeb0ce44078..a9fb27f6b330a1 100644 --- a/be/src/exec/operator/partitioned_hash_join_sink_operator.h +++ b/be/src/exec/operator/partitioned_hash_join_sink_operator.h @@ -116,7 +116,7 @@ class PartitionedHashJoinSinkOperatorX Status prepare(RuntimeState* state) override; - Status sink(RuntimeState* state, Block* in_block, bool eos) override; + Status sink_impl(RuntimeState* state, Block* in_block, bool eos) override; bool should_dry_run(RuntimeState* state) override { return false; } diff --git a/be/src/exec/operator/rec_cte_anchor_sink_operator.h b/be/src/exec/operator/rec_cte_anchor_sink_operator.h index 19585ff02a8c07..e30d38d180c13a 100644 --- a/be/src/exec/operator/rec_cte_anchor_sink_operator.h +++ b/be/src/exec/operator/rec_cte_anchor_sink_operator.h @@ -81,7 +81,7 @@ class RecCTEAnchorSinkOperatorX MOCK_REMOVE(final) return Base::close(state); } - Status sink(RuntimeState* state, Block* input_block, bool eos) override { + Status sink_impl(RuntimeState* state, Block* input_block, bool eos) override { auto& local_state = get_local_state(state); RETURN_IF_ERROR(_notify_rec_side_ready_if_needed(state)); diff --git a/be/src/exec/operator/rec_cte_scan_operator.h b/be/src/exec/operator/rec_cte_scan_operator.h index 470f497ef9119c..954d7b0169b2c1 100644 --- a/be/src/exec/operator/rec_cte_scan_operator.h +++ b/be/src/exec/operator/rec_cte_scan_operator.h @@ -66,7 +66,7 @@ class RecCTEScanOperatorX final : public OperatorX { const DescriptorTbl& descs) : OperatorX(pool, tnode, operator_id, descs) {} - Status get_block(RuntimeState* state, Block* block, bool* eos) override { + Status get_block_impl(RuntimeState* state, Block* block, bool* eos) override { auto& local_state = get_local_state(state); if (local_state._blocks.empty()) { diff --git a/be/src/exec/operator/rec_cte_sink_operator.h b/be/src/exec/operator/rec_cte_sink_operator.h index a8a526854df4e4..34796b5658835c 100644 --- a/be/src/exec/operator/rec_cte_sink_operator.h +++ b/be/src/exec/operator/rec_cte_sink_operator.h @@ -82,7 +82,7 @@ class RecCTESinkOperatorX MOCK_REMOVE(final) : public DataSinkOperatorXrows()); diff --git a/be/src/exec/operator/rec_cte_source_operator.h b/be/src/exec/operator/rec_cte_source_operator.h index 92ffe43794cc17..ef31bfcb97614e 100644 --- a/be/src/exec/operator/rec_cte_source_operator.h +++ b/be/src/exec/operator/rec_cte_source_operator.h @@ -208,7 +208,7 @@ class RecCTESourceOperatorX : public OperatorX { return {ExchangeType::NOOP}; } - Status get_block(RuntimeState* state, Block* block, bool* eos) override { + Status get_block_impl(RuntimeState* state, Block* block, bool* eos) override { auto& local_state = get_local_state(state); auto& ctx = local_state._shared_state; ctx->update_ready_to_return(); diff --git a/be/src/exec/operator/result_file_sink_operator.cpp b/be/src/exec/operator/result_file_sink_operator.cpp index 97de630d3e74b3..3baafe30e98eaf 100644 --- a/be/src/exec/operator/result_file_sink_operator.cpp +++ b/be/src/exec/operator/result_file_sink_operator.cpp @@ -151,7 +151,7 @@ Status ResultFileSinkLocalState::close(RuntimeState* state, Status exec_status) return Base::close(state, exec_status); } -Status ResultFileSinkOperatorX::sink(RuntimeState* state, Block* in_block, bool eos) { +Status ResultFileSinkOperatorX::sink_impl(RuntimeState* state, Block* in_block, bool eos) { auto& local_state = get_local_state(state); SCOPED_TIMER(local_state.exec_time_counter()); COUNTER_UPDATE(local_state.rows_input_counter(), (int64_t)in_block->rows()); diff --git a/be/src/exec/operator/result_file_sink_operator.h b/be/src/exec/operator/result_file_sink_operator.h index 2f4bcb8e2c8321..c45ac00eb58c80 100644 --- a/be/src/exec/operator/result_file_sink_operator.h +++ b/be/src/exec/operator/result_file_sink_operator.h @@ -62,7 +62,7 @@ class ResultFileSinkOperatorX final : public DataSinkOperatorXrows()); diff --git a/be/src/exec/operator/result_sink_operator.h b/be/src/exec/operator/result_sink_operator.h index e8cc0fd25de90b..790aaa59cf65b1 100644 --- a/be/src/exec/operator/result_sink_operator.h +++ b/be/src/exec/operator/result_sink_operator.h @@ -160,7 +160,7 @@ class ResultSinkOperatorX final : public DataSinkOperatorX const std::vector& select_exprs, const TResultSink& sink); Status prepare(RuntimeState* state) override; - Status sink(RuntimeState* state, Block* in_block, bool eos) override; + Status sink_impl(RuntimeState* state, Block* in_block, bool eos) override; private: friend class ResultSinkLocalState; diff --git a/be/src/exec/operator/scan_operator.cpp b/be/src/exec/operator/scan_operator.cpp index 70ab35d143013f..ae2b84cc98e72e 100644 --- a/be/src/exec/operator/scan_operator.cpp +++ b/be/src/exec/operator/scan_operator.cpp @@ -1249,7 +1249,7 @@ Status ScanLocalState::close(RuntimeState* state) { } template -Status ScanOperatorX::get_block(RuntimeState* state, Block* block, bool* eos) { +Status ScanOperatorX::get_block_impl(RuntimeState* state, Block* block, bool* eos) { auto& local_state = get_local_state(state); SCOPED_TIMER(local_state.exec_time_counter()); diff --git a/be/src/exec/operator/scan_operator.h b/be/src/exec/operator/scan_operator.h index d52df35a5ca1d3..931917482736a6 100644 --- a/be/src/exec/operator/scan_operator.h +++ b/be/src/exec/operator/scan_operator.h @@ -328,9 +328,9 @@ class ScanOperatorX : public OperatorX { public: Status init(const TPlanNode& tnode, RuntimeState* state) override; Status prepare(RuntimeState* state) override; - Status get_block(RuntimeState* state, Block* block, bool* eos) override; + Status get_block_impl(RuntimeState* state, Block* block, bool* eos) override; Status get_block_after_projects(RuntimeState* state, Block* block, bool* eos) override { - Status status = get_block(state, block, eos); + Status status = OperatorX::get_block(state, block, eos); if (status.ok()) { state->get_local_state(operator_id())->update_output_block_counters(*block); } diff --git a/be/src/exec/operator/schema_scan_operator.cpp b/be/src/exec/operator/schema_scan_operator.cpp index abe9ee39797ace..0f4e5562962c1a 100644 --- a/be/src/exec/operator/schema_scan_operator.cpp +++ b/be/src/exec/operator/schema_scan_operator.cpp @@ -209,7 +209,7 @@ Status SchemaScanOperatorX::prepare(RuntimeState* state) { return Status::OK(); } -Status SchemaScanOperatorX::get_block(RuntimeState* state, Block* block, bool* eos) { +Status SchemaScanOperatorX::get_block_impl(RuntimeState* state, Block* block, bool* eos) { auto& local_state = get_local_state(state); SCOPED_TIMER(local_state.exec_time_counter()); RETURN_IF_CANCELLED(state); diff --git a/be/src/exec/operator/schema_scan_operator.h b/be/src/exec/operator/schema_scan_operator.h index 1d8cf22c4a0be0..b7540a393c699b 100644 --- a/be/src/exec/operator/schema_scan_operator.h +++ b/be/src/exec/operator/schema_scan_operator.h @@ -64,7 +64,7 @@ class SchemaScanOperatorX final : public OperatorX { Status init(const TPlanNode& tnode, RuntimeState* state) override; Status prepare(RuntimeState* state) override; - Status get_block(RuntimeState* state, Block* block, bool* eos) override; + Status get_block_impl(RuntimeState* state, Block* block, bool* eos) override; [[nodiscard]] bool is_source() const override { return true; } diff --git a/be/src/exec/operator/set_probe_sink_operator.cpp b/be/src/exec/operator/set_probe_sink_operator.cpp index 26913e97641e1a..c951f787bd019e 100644 --- a/be/src/exec/operator/set_probe_sink_operator.cpp +++ b/be/src/exec/operator/set_probe_sink_operator.cpp @@ -62,7 +62,8 @@ Status SetProbeSinkOperatorX::prepare(RuntimeState* state) { } template -Status SetProbeSinkOperatorX::sink(RuntimeState* state, Block* in_block, bool eos) { +Status SetProbeSinkOperatorX::sink_impl(RuntimeState* state, Block* in_block, + bool eos) { RETURN_IF_CANCELLED(state); auto& local_state = get_local_state(state); SCOPED_TIMER(local_state.exec_time_counter()); diff --git a/be/src/exec/operator/set_probe_sink_operator.h b/be/src/exec/operator/set_probe_sink_operator.h index bab8fbe536f628..cae10f7672667b 100644 --- a/be/src/exec/operator/set_probe_sink_operator.h +++ b/be/src/exec/operator/set_probe_sink_operator.h @@ -99,7 +99,7 @@ class SetProbeSinkOperatorX final : public DataSinkOperatorX::close(RuntimeState* state, Status exec_s } template -Status SetSinkOperatorX::sink(RuntimeState* state, Block* in_block, bool eos) { +Status SetSinkOperatorX::sink_impl(RuntimeState* state, Block* in_block, bool eos) { RETURN_IF_CANCELLED(state); auto& local_state = get_local_state(state); diff --git a/be/src/exec/operator/set_sink_operator.h b/be/src/exec/operator/set_sink_operator.h index 2ed03b4b0aea14..26b359101984a8 100644 --- a/be/src/exec/operator/set_sink_operator.h +++ b/be/src/exec/operator/set_sink_operator.h @@ -110,7 +110,7 @@ class SetSinkOperatorX final : public DataSinkOperatorX::open(RuntimeState* state) { } template -Status SetSourceOperatorX::get_block(RuntimeState* state, Block* block, bool* eos) { +Status SetSourceOperatorX::get_block_impl(RuntimeState* state, Block* block, + bool* eos) { RETURN_IF_CANCELLED(state); auto& local_state = get_local_state(state); SCOPED_TIMER(local_state.exec_time_counter()); diff --git a/be/src/exec/operator/set_source_operator.h b/be/src/exec/operator/set_source_operator.h index 31e5fc77542458..b0f3d974de4e60 100644 --- a/be/src/exec/operator/set_source_operator.h +++ b/be/src/exec/operator/set_source_operator.h @@ -85,7 +85,7 @@ class SetSourceOperatorX MOCK_REMOVE(final) : public OperatorXrows()); diff --git a/be/src/exec/operator/sort_sink_operator.h b/be/src/exec/operator/sort_sink_operator.h index f31d0debe07745..b0710cb15e2f69 100644 --- a/be/src/exec/operator/sort_sink_operator.h +++ b/be/src/exec/operator/sort_sink_operator.h @@ -77,7 +77,7 @@ class SortSinkOperatorX final : public DataSinkOperatorX { Status init(const TPlanNode& tnode, RuntimeState* state) override; Status prepare(RuntimeState* state) override; - Status sink(RuntimeState* state, Block* in_block, bool eos) override; + Status sink_impl(RuntimeState* state, Block* in_block, bool eos) override; DataDistribution required_data_distribution(RuntimeState* /*state*/) const override { if (_is_analytic_sort) { return _is_colocate && _require_bucket_distribution diff --git a/be/src/exec/operator/sort_source_operator.cpp b/be/src/exec/operator/sort_source_operator.cpp index 18fe81dc4c8b6b..a14fc054300368 100644 --- a/be/src/exec/operator/sort_source_operator.cpp +++ b/be/src/exec/operator/sort_source_operator.cpp @@ -31,7 +31,7 @@ SortSourceOperatorX::SortSourceOperatorX(ObjectPool* pool, const TPlanNode& tnod const DescriptorTbl& descs) : OperatorX(pool, tnode, operator_id, descs) {} -Status SortSourceOperatorX::get_block(RuntimeState* state, Block* block, bool* eos) { +Status SortSourceOperatorX::get_block_impl(RuntimeState* state, Block* block, bool* eos) { auto& local_state = get_local_state(state); SCOPED_TIMER(local_state.exec_time_counter()); SCOPED_PEAK_MEM(&local_state._estimate_memory_usage); diff --git a/be/src/exec/operator/sort_source_operator.h b/be/src/exec/operator/sort_source_operator.h index c2a63b82ccd607..79e59f635e0b72 100644 --- a/be/src/exec/operator/sort_source_operator.h +++ b/be/src/exec/operator/sort_source_operator.h @@ -43,7 +43,7 @@ class SortSourceOperatorX MOCK_REMOVE(final) : public OperatorX #ifdef BE_TEST SortSourceOperatorX() = default; #endif - Status get_block(RuntimeState* state, Block* block, bool* eos) override; + Status get_block_impl(RuntimeState* state, Block* block, bool* eos) override; bool is_source() const override { return true; } diff --git a/be/src/exec/operator/spill_iceberg_table_sink_operator.cpp b/be/src/exec/operator/spill_iceberg_table_sink_operator.cpp index 58a54868799f3b..20b4eea954a599 100644 --- a/be/src/exec/operator/spill_iceberg_table_sink_operator.cpp +++ b/be/src/exec/operator/spill_iceberg_table_sink_operator.cpp @@ -120,7 +120,7 @@ Status SpillIcebergTableSinkOperatorX::prepare(RuntimeState* state) { return VExpr::open(_output_vexpr_ctxs, state); } -Status SpillIcebergTableSinkOperatorX::sink(RuntimeState* state, Block* in_block, bool eos) { +Status SpillIcebergTableSinkOperatorX::sink_impl(RuntimeState* state, Block* in_block, bool eos) { auto& local_state = get_local_state(state); SCOPED_TIMER(local_state.exec_time_counter()); COUNTER_UPDATE(local_state.rows_input_counter(), (int64_t)in_block->rows()); diff --git a/be/src/exec/operator/spill_iceberg_table_sink_operator.h b/be/src/exec/operator/spill_iceberg_table_sink_operator.h index d81e515f05d59a..7e6a037d2f55ed 100644 --- a/be/src/exec/operator/spill_iceberg_table_sink_operator.h +++ b/be/src/exec/operator/spill_iceberg_table_sink_operator.h @@ -64,7 +64,7 @@ class SpillIcebergTableSinkOperatorX final Status prepare(RuntimeState* state) override; - Status sink(RuntimeState* state, Block* in_block, bool eos) override; + Status sink_impl(RuntimeState* state, Block* in_block, bool eos) override; size_t get_reserve_mem_size(RuntimeState* state, bool eos) override; diff --git a/be/src/exec/operator/spill_sort_sink_operator.cpp b/be/src/exec/operator/spill_sort_sink_operator.cpp index c0ccf4657d4df5..b1d83f3a63d8b8 100644 --- a/be/src/exec/operator/spill_sort_sink_operator.cpp +++ b/be/src/exec/operator/spill_sort_sink_operator.cpp @@ -144,7 +144,7 @@ size_t SpillSortSinkOperatorX::revocable_mem_size(RuntimeState* state) const { return mem_size > state->spill_min_revocable_mem() ? mem_size : 0; } -Status SpillSortSinkOperatorX::sink(doris::RuntimeState* state, Block* in_block, bool eos) { +Status SpillSortSinkOperatorX::sink_impl(doris::RuntimeState* state, Block* in_block, bool eos) { auto& local_state = get_local_state(state); SCOPED_TIMER(local_state.exec_time_counter()); COUNTER_UPDATE(local_state.rows_input_counter(), (int64_t)in_block->rows()); diff --git a/be/src/exec/operator/spill_sort_sink_operator.h b/be/src/exec/operator/spill_sort_sink_operator.h index 823bc5e7f04a90..692405bc6f5c91 100644 --- a/be/src/exec/operator/spill_sort_sink_operator.h +++ b/be/src/exec/operator/spill_sort_sink_operator.h @@ -80,7 +80,7 @@ class SpillSortSinkOperatorX final : public DataSinkOperatorXrequired_data_distribution(state); } diff --git a/be/src/exec/operator/spill_sort_source_operator.cpp b/be/src/exec/operator/spill_sort_source_operator.cpp index e516ead73c61fd..f2a58beab1ccc8 100644 --- a/be/src/exec/operator/spill_sort_source_operator.cpp +++ b/be/src/exec/operator/spill_sort_source_operator.cpp @@ -246,7 +246,7 @@ Status SpillSortSourceOperatorX::close(RuntimeState* state) { return _sort_source_operator->close(state); } -Status SpillSortSourceOperatorX::get_block(RuntimeState* state, Block* block, bool* eos) { +Status SpillSortSourceOperatorX::get_block_impl(RuntimeState* state, Block* block, bool* eos) { auto& local_state = get_local_state(state); SCOPED_TIMER(local_state.exec_time_counter()); diff --git a/be/src/exec/operator/spill_sort_source_operator.h b/be/src/exec/operator/spill_sort_source_operator.h index 969582243e0d19..4af4d4954f5444 100644 --- a/be/src/exec/operator/spill_sort_source_operator.h +++ b/be/src/exec/operator/spill_sort_source_operator.h @@ -82,7 +82,7 @@ class SpillSortSourceOperatorX : public OperatorX { Status close(RuntimeState* state) override; - Status get_block(RuntimeState* state, Block* block, bool* eos) override; + Status get_block_impl(RuntimeState* state, Block* block, bool* eos) override; bool is_source() const override { return true; } diff --git a/be/src/exec/operator/tvf_table_sink_operator.h b/be/src/exec/operator/tvf_table_sink_operator.h index 0a47d37ed609c9..8b1c4e98450303 100644 --- a/be/src/exec/operator/tvf_table_sink_operator.h +++ b/be/src/exec/operator/tvf_table_sink_operator.h @@ -65,7 +65,7 @@ class TVFTableSinkOperatorX final : public DataSinkOperatorXrows()); diff --git a/be/src/exec/operator/union_sink_operator.cpp b/be/src/exec/operator/union_sink_operator.cpp index 5484ea166aebb1..b0ed5e80ae1d68 100644 --- a/be/src/exec/operator/union_sink_operator.cpp +++ b/be/src/exec/operator/union_sink_operator.cpp @@ -94,7 +94,7 @@ Status UnionSinkOperatorX::prepare(RuntimeState* state) { return Status::OK(); } -Status UnionSinkOperatorX::sink(RuntimeState* state, Block* in_block, bool eos) { +Status UnionSinkOperatorX::sink_impl(RuntimeState* state, Block* in_block, bool eos) { auto& local_state = get_local_state(state); if (local_state.low_memory_mode()) { set_low_memory_mode(state); diff --git a/be/src/exec/operator/union_sink_operator.h b/be/src/exec/operator/union_sink_operator.h index 79dc6a7688c61e..43d7129d4b1556 100644 --- a/be/src/exec/operator/union_sink_operator.h +++ b/be/src/exec/operator/union_sink_operator.h @@ -102,7 +102,7 @@ class UnionSinkOperatorX MOCK_REMOVE(final) : public DataSinkOperatorX create_shared_state() const override { if (_cur_child_id > 0) { diff --git a/be/src/exec/operator/union_source_operator.cpp b/be/src/exec/operator/union_source_operator.cpp index 0b382a69c729a4..396a2d8595606a 100644 --- a/be/src/exec/operator/union_source_operator.cpp +++ b/be/src/exec/operator/union_source_operator.cpp @@ -101,7 +101,7 @@ std::string UnionSourceLocalState::debug_string(int indentation_level) const { return fmt::to_string(debug_string_buffer); } -Status UnionSourceOperatorX::get_block(RuntimeState* state, Block* block, bool* eos) { +Status UnionSourceOperatorX::get_block_impl(RuntimeState* state, Block* block, bool* eos) { auto& local_state = get_local_state(state); Defer set_eos {[&]() { // the eos check of union operator is complex, need check all logical if you want modify diff --git a/be/src/exec/operator/union_source_operator.h b/be/src/exec/operator/union_source_operator.h index e4858ea53d8fa3..f6f5d88d134319 100644 --- a/be/src/exec/operator/union_source_operator.h +++ b/be/src/exec/operator/union_source_operator.h @@ -69,7 +69,7 @@ class UnionSourceOperatorX MOCK_REMOVE(final) : public OperatorXget_block_after_projects(_state, block, &eos)); - RETURN_IF_ERROR(block->check_type_and_column()); _eos = eos; } @@ -717,7 +716,7 @@ Status PipelineTask::execute(bool* done) { } } }); - RETURN_IF_ERROR(block->check_type_and_column()); + status = _sink->sink(_state, block, _eos); if (_eos) { diff --git a/be/src/exprs/function/array/function_array_distance.h b/be/src/exprs/function/array/function_array_distance.h index e40618267b2590..12969c23e3481f 100644 --- a/be/src/exprs/function/array/function_array_distance.h +++ b/be/src/exprs/function/array/function_array_distance.h @@ -21,13 +21,12 @@ #include #include -#include - #include "common/exception.h" #include "common/status.h" #include "core/assert_cast.h" #include "core/column/column.h" #include "core/column/column_array.h" +#include "core/column/column_array_view.h" #include "core/column/column_const.h" #include "core/column/column_nullable.h" #include "core/data_type/data_type.h" @@ -37,7 +36,6 @@ #include "core/data_type/primitive_type.h" #include "core/types.h" #include "exec/common/util.hpp" -#include "exprs/function/array/function_array_utils.h" #include "exprs/function/function.h" namespace doris { @@ -118,133 +116,64 @@ class FunctionArrayDistance : public IFunction { // We want to make sure throw exception if input columns contain NULL. bool use_default_implementation_for_nulls() const override { return false; } - // Extract the ColumnArray from a column, unwrapping Nullable if present. - // Validates that no NULL values exist. - static const ColumnArray* _extract_array_column(const IColumn* col, const char* arg_name, - const String& func_name) { - if (col->is_nullable()) { - if (col->has_null()) { - throw doris::Exception(ErrorCode::INVALID_ARGUMENT, - "{} for function {} cannot be null", arg_name, func_name); - } - auto nullable = assert_cast(col); - return assert_cast(nullable->get_nested_column_ptr().get()); + // Validate that neither outer column nor inner array elements contain NULL. + // Distance functions always throw on NULL input. + static void _validate_no_nulls(const ColumnPtr& col, const char* arg_name, + const String& func_name) { + const IColumn* raw = col.get(); + + // Unwrap const + if (is_column_const(*raw)) { + raw = assert_cast(raw)->get_data_column_ptr().get(); } - return assert_cast(col); - } - // Extract the ColumnFloat32 data from an array column, unwrapping Nullable if present. - // Validates that no NULL elements exist within the array. - static const ColumnFloat32* _extract_float_data(const ColumnArray* arr, const char* arg_name, - const String& func_name) { - if (arr->get_data_ptr()->is_nullable()) { - if (arr->get_data_ptr()->has_null()) { + // Check outer nullable + if (raw->is_nullable()) { + if (raw->has_null()) { throw doris::Exception(ErrorCode::INVALID_ARGUMENT, - "{} for function {} cannot have null", arg_name, func_name); + "{} for function {} cannot be null", arg_name, func_name); } - auto nullable = assert_cast(arr->get_data_ptr().get()); - return assert_cast(nullable->get_nested_column_ptr().get()); + raw = assert_cast(raw)->get_nested_column_ptr().get(); } - return assert_cast(arr->get_data_ptr().get()); - } - // Holds the extracted float data pointer and dimension for a const array argument, - // avoiding repeated per-row extraction. - struct ConstArrayInfo { - const float* data = nullptr; - ssize_t dim = 0; - }; - - // Try to extract const array info from a column. If the column is ColumnConst, - // extract the float data pointer and dimension once; otherwise return nullopt. - std::optional _try_extract_const(const ColumnPtr& col, - const char* arg_name) const { - if (!is_column_const(*col)) { - return std::nullopt; + // Check inner nullable (array elements) + const auto& array_col = assert_cast(*raw); + if (array_col.get_data_ptr()->is_nullable() && array_col.get_data_ptr()->has_null()) { + throw doris::Exception(ErrorCode::INVALID_ARGUMENT, + "{} for function {} cannot have null", arg_name, func_name); } - auto const_col = assert_cast(col.get()); - const IColumn* inner = const_col->get_data_column_ptr().get(); - const ColumnArray* arr = _extract_array_column(inner, arg_name, get_name()); - const ColumnFloat32* float_col = _extract_float_data(arr, arg_name, get_name()); - ssize_t dim = static_cast(float_col->size()); - return ConstArrayInfo {float_col->get_data().data(), dim}; } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, uint32_t result, size_t input_rows_count) const override { - const auto& arg1 = block.get_by_position(arguments[0]); - const auto& arg2 = block.get_by_position(arguments[1]); - - // Try to handle const columns without expanding them. - auto const_info1 = _try_extract_const(arg1.column, "First argument"); - auto const_info2 = _try_extract_const(arg2.column, "Second argument"); - - // For non-const columns, expand and extract normally. - ColumnPtr materialized_col1, materialized_col2; - const ColumnArray* arr1 = nullptr; - const ColumnArray* arr2 = nullptr; - const ColumnFloat32* float1 = nullptr; - const ColumnFloat32* float2 = nullptr; - const ColumnOffset64* offset1 = nullptr; - const ColumnOffset64* offset2 = nullptr; - const IColumn::Offsets64* offsets_data1 = nullptr; - const IColumn::Offsets64* offsets_data2 = nullptr; - const float* float_data1 = nullptr; - const float* float_data2 = nullptr; - - if (!const_info1) { - materialized_col1 = arg1.column->convert_to_full_column_if_const(); - arr1 = _extract_array_column(materialized_col1.get(), "First argument", get_name()); - float1 = _extract_float_data(arr1, "First argument", get_name()); - offset1 = arr1->get_offsets_ptr().get(); - offsets_data1 = &offset1->get_data(); - float_data1 = float1->get_data().data(); - } + const auto& col1 = block.get_by_position(arguments[0]).column; + const auto& col2 = block.get_by_position(arguments[1]).column; - if (!const_info2) { - materialized_col2 = arg2.column->convert_to_full_column_if_const(); - arr2 = _extract_array_column(materialized_col2.get(), "Second argument", get_name()); - float2 = _extract_float_data(arr2, "Second argument", get_name()); - offset2 = arr2->get_offsets_ptr().get(); - offsets_data2 = &offset2->get_data(); - float_data2 = float2->get_data().data(); - } + // Validate no NULLs (distance functions always throw on NULL input) + _validate_no_nulls(col1, "First argument", get_name()); + _validate_no_nulls(col2, "Second argument", get_name()); + + // Create views — handles Const/Nullable unwrapping automatically + auto view1 = ColumnArrayView::create(col1); + auto view2 = ColumnArrayView::create(col2); - // prepare return data auto dst = ColumnType::create(input_rows_count); auto& dst_data = dst->get_data(); for (size_t row = 0; row < input_rows_count; ++row) { - const float* data_ptr1; - const float* data_ptr2; - ssize_t size1, size2; - const auto idx = static_cast(row); - - if (const_info1) { - data_ptr1 = const_info1->data; - size1 = const_info1->dim; - } else { - // -1 is valid for PaddedPODArray-backed offsets. - const auto prev_offset1 = (*offsets_data1)[idx - 1]; - size1 = (*offsets_data1)[idx] - prev_offset1; - data_ptr1 = float_data1 + prev_offset1; - } - - if (const_info2) { - data_ptr2 = const_info2->data; - size2 = const_info2->dim; - } else { - const auto prev_offset2 = (*offsets_data2)[idx - 1]; - size2 = (*offsets_data2)[idx] - prev_offset2; - data_ptr2 = float_data2 + prev_offset2; - } - - if (size1 != size2) [[unlikely]] { + auto a1 = view1[row]; + auto a2 = view2[row]; + const float* p1 = a1.get_data(); + const float* p2 = a2.get_data(); + auto dim1 = a1.size(); + auto dim2 = a2.size(); + + if (dim1 != dim2) [[unlikely]] { return Status::InvalidArgument( "function {} have different input element sizes of array: {} and {}", - get_name(), size1, size2); + get_name(), dim1, dim2); } - dst_data[row] = DistanceImpl::distance(data_ptr1, data_ptr2, size1); + dst_data[row] = DistanceImpl::distance(p1, p2, dim1); } block.replace_by_position(result, std::move(dst)); diff --git a/be/src/exprs/function/array/function_array_exists.cpp b/be/src/exprs/function/array/function_array_exists.cpp index 9009ba2f7550d8..ffa74d24e8d633 100644 --- a/be/src/exprs/function/array/function_array_exists.cpp +++ b/be/src/exprs/function/array/function_array_exists.cpp @@ -78,9 +78,8 @@ class FunctionArrayExists : public IFunction { nested_nullable_column.get_null_map_column_ptr()->clone_resized(nested_column_size); // 2. compute result - MutableColumnPtr result_column = ColumnUInt8::create(nested_column_size, 0); - auto* __restrict result_column_data = - assert_cast(*result_column).get_data().data(); + auto result_column = ColumnUInt8::create(nested_column_size, 0); + auto* __restrict result_column_data = result_column->get_data().data(); MutableColumnPtr result_offset_column = first_off_data.clone_resized(first_off_data.size()); const auto* __restrict nested_column_data = assert_cast(*nested_column).get_data().data(); diff --git a/be/src/exprs/function/array/function_array_join.h b/be/src/exprs/function/array/function_array_join.h index e140f20f63a4cf..48e98478fea1fd 100644 --- a/be/src/exprs/function/array/function_array_join.h +++ b/be/src/exprs/function/array/function_array_join.h @@ -18,12 +18,12 @@ #include "core/block/block.h" #include "core/column/column_array.h" +#include "core/column/column_array_view.h" #include "core/column/column_const.h" #include "core/column/column_execute_util.h" #include "core/data_type/data_type_array.h" #include "core/data_type/data_type_string.h" #include "core/string_ref.h" -#include "exprs/function/array/function_array_utils.h" namespace doris { @@ -59,22 +59,15 @@ struct ArrayJoinImpl { static Status execute(Block& block, const ColumnNumbers& arguments, uint32_t result, const DataTypeArray* data_type_array, const ColumnArray& array) { - ColumnPtr src_column = - block.get_by_position(arguments[0]).column->convert_to_full_column_if_const(); - ColumnArrayExecutionData src; - if (!extract_column_array_info(*src_column, src)) { - return Status::RuntimeError(fmt::format( - "execute failed, unsupported types for function {}({})", "array_join", - block.get_by_position(arguments[0]).type->get_name())); - } + ColumnPtr src_column = block.get_by_position(arguments[0]).column; + auto array_view = ColumnArrayView::create(src_column); - auto nested_type = data_type_array->get_nested_type(); auto dest_column_ptr = ColumnString::create(); auto& dest_chars = dest_column_ptr->get_chars(); auto& dest_offsets = dest_column_ptr->get_offsets(); - dest_offsets.resize_fill(src_column->size(), 0); + dest_offsets.resize_fill(array_view.size(), 0); auto sep_column = ColumnView::create(block.get_by_position(arguments[1]).column); @@ -83,8 +76,7 @@ struct ArrayJoinImpl { auto null_replace_column = ColumnView::create(block.get_by_position(arguments[2]).column); - _execute_string(*src.nested_col, *src.offsets_ptr, src.nested_nullmap_data, sep_column, - null_replace_column, dest_chars, dest_offsets); + _execute_string(array_view, sep_column, null_replace_column, dest_chars, dest_offsets); } else { auto tmp_column_string = ColumnString::create(); @@ -95,8 +87,7 @@ struct ArrayJoinImpl { auto null_replace_column = ColumnView::create(tmp_const_column); - _execute_string(*src.nested_col, *src.offsets_ptr, src.nested_nullmap_data, sep_column, - null_replace_column, dest_chars, dest_offsets); + _execute_string(array_view, sep_column, null_replace_column, dest_chars, dest_offsets); } block.replace_by_position(result, std::move(dest_column_ptr)); @@ -130,27 +121,23 @@ struct ArrayJoinImpl { } } - static void _execute_string(const IColumn& src_column, - const ColumnArray::Offsets64& src_offsets, - const UInt8* src_null_map, ColumnView& sep_column, + static void _execute_string(const ColumnArrayView& array_view, + ColumnView& sep_column, ColumnView& null_replace_column, ColumnString::Chars& dest_chars, ColumnString::Offsets& dest_offsets) { - const auto& src_data = assert_cast(src_column); - uint32_t total_size = 0; - for (int64_t i = 0; i < src_offsets.size(); ++i) { - auto begin = src_offsets[i - 1]; - auto end = src_offsets[i]; + for (int64_t i = 0; i < array_view.size(); ++i) { + auto arr = array_view[i]; auto sep_str = sep_column.value_at(i); auto null_replace_str = null_replace_column.value_at(i); bool is_first_elem = true; - for (size_t j = begin; j < end; ++j) { - if (src_null_map && src_null_map[j]) { + for (size_t j = 0; j < arr.size(); ++j) { + if (arr.is_null_at(j)) { if (null_replace_str.size != 0) { _fill_result_string(i, null_replace_str, sep_str, dest_chars, total_size, is_first_elem); @@ -158,7 +145,7 @@ struct ArrayJoinImpl { continue; } - StringRef src_str_ref = src_data.get_data_at(j); + StringRef src_str_ref = arr.value_at(j); _fill_result_string(i, src_str_ref, sep_str, dest_chars, total_size, is_first_elem); } diff --git a/be/src/exprs/function/cast/cast_to_map.h b/be/src/exprs/function/cast/cast_to_map.h index 80ad74b02d3063..ca6b8e5e90bd3a 100644 --- a/be/src/exprs/function/cast/cast_to_map.h +++ b/be/src/exprs/function/cast/cast_to_map.h @@ -22,15 +22,53 @@ namespace doris::CastWrapper { #include "common/compile_check_begin.h" + +inline Status deduplicate_map_keys_in_result(Block& block, uint32_t result) { + auto result_column_name = block.get_by_position(result).column->get_name(); + auto mutable_result_column = IColumn::mutate(std::move(block.get_by_position(result).column)); + + if (auto* nullable_column = check_and_get_column(*mutable_result_column)) { + auto nested_column = IColumn::mutate(nullable_column->get_nested_column_ptr()); + auto* map_column = check_and_get_column(*nested_column); + if (!map_column) { + return Status::RuntimeError("Illegal column {} for function CAST AS MAP", + result_column_name); + } + + RETURN_IF_ERROR(map_column->deduplicate_keys(true)); + ColumnPtr nested_column_ptr = std::move(nested_column); + nullable_column->change_nested_column(nested_column_ptr); + } else { + auto* map_column = check_and_get_column(*mutable_result_column); + if (!map_column) { + return Status::RuntimeError("Illegal column {} for function CAST AS MAP", + result_column_name); + } + + RETURN_IF_ERROR(map_column->deduplicate_keys(true)); + } + + block.get_by_position(result).column = std::move(mutable_result_column); + return Status::OK(); +} + +inline WrapperType wrap_string_to_map_wrapper(WrapperType wrapper) { + return [wrapper = std::move(wrapper)](FunctionContext* context, Block& block, + const ColumnNumbers& arguments, uint32_t result, + size_t input_rows_count, + const NullMap::value_type* null_map = nullptr) { + RETURN_IF_ERROR(wrapper(context, block, arguments, result, input_rows_count, null_map)); + return deduplicate_map_keys_in_result(block, result); + }; +} + //TODO(Amory) . Need support more cast for key , value for map WrapperType create_map_wrapper(FunctionContext* context, const DataTypePtr& from_type, const DataTypeMap& to_type) { if (is_string_type(from_type->get_primitive_type())) { - if (context->enable_strict_mode()) { - return cast_from_string_to_complex_type_strict_mode; - } else { - return cast_from_string_to_complex_type; - } + auto wrapper = context->enable_strict_mode() ? cast_from_string_to_complex_type_strict_mode + : cast_from_string_to_complex_type; + return wrap_string_to_map_wrapper(wrapper); } const auto* from = check_and_get_data_type(from_type.get()); if (!from) { @@ -83,4 +121,4 @@ WrapperType create_map_wrapper(FunctionContext* context, const DataTypePtr& from }; } #include "common/compile_check_end.h" -} // namespace doris::CastWrapper \ No newline at end of file +} // namespace doris::CastWrapper diff --git a/be/src/exprs/function/function_ip.h b/be/src/exprs/function/function_ip.h index 7f704b078de4c6..3278caa5e465ee 100644 --- a/be/src/exprs/function/function_ip.h +++ b/be/src/exprs/function/function_ip.h @@ -1346,10 +1346,10 @@ class FunctionIPv6FromUInt128StringOrNull : public IFunction { unpack_if_const(ipv6_column_with_type_and_name.column); const auto* ipv6_addr_column = assert_cast(ipv6_column.get()); // result is nullable column - auto col_res = ColumnNullable::create(ColumnIPv6::create(input_rows_count, 0), - ColumnUInt8::create(input_rows_count, 1)); - auto& col_res_data = assert_cast(&col_res->get_nested_column())->get_data(); - auto& res_null_map_data = col_res->get_null_map_data(); + auto col_res_nested = ColumnIPv6::create(input_rows_count, 0); + auto col_res_null_map = ColumnUInt8::create(input_rows_count, 1); + auto& col_res_data = col_res_nested->get_data(); + auto& res_null_map_data = col_res_null_map->get_data(); for (size_t i = 0; i < input_rows_count; ++i) { IPv6 ipv6 = 0; @@ -1365,7 +1365,8 @@ class FunctionIPv6FromUInt128StringOrNull : public IFunction { } } - block.replace_by_position(result, std::move(col_res)); + block.replace_by_position(result, ColumnNullable::create(std::move(col_res_nested), + std::move(col_res_null_map))); return Status::OK(); } }; diff --git a/be/src/exprs/function/function_jsonb.cpp b/be/src/exprs/function/function_jsonb.cpp index 28015309cdb6d7..e572573a3c825a 100644 --- a/be/src/exprs/function/function_jsonb.cpp +++ b/be/src/exprs/function/function_jsonb.cpp @@ -711,11 +711,11 @@ class FunctionJsonbExtractPath : public IFunction { return Status::OK(); }; - MutableColumnPtr result_null_map_column; + ColumnUInt8::MutablePtr result_null_map_column; NullMap* result_null_map = nullptr; if (data_null_map || path_null_map) { result_null_map_column = ColumnUInt8::create(input_rows_count, 0); - result_null_map = &static_cast(*result_null_map_column).get_data(); + result_null_map = &result_null_map_column->get_data(); if (data_null_map) { VectorizedUtils::update_null_map(*result_null_map, *data_null_map, diff --git a/be/src/exprs/function/function_other_types_to_date.cpp b/be/src/exprs/function/function_other_types_to_date.cpp index bc24d76df278ea..122fdb9338ad57 100644 --- a/be/src/exprs/function/function_other_types_to_date.cpp +++ b/be/src/exprs/function/function_other_types_to_date.cpp @@ -142,31 +142,29 @@ struct StrToDate { // Because of we cant distinguish by return_type when we find function. so the return_type may NOT be same with real return type // which decided by FE. we directly use block column's type which decided by FE. if (block.get_by_position(result).type->get_primitive_type() == TYPE_DATEV2) { - res = ColumnDateV2::create(input_rows_count); + auto res_column = ColumnDateV2::create(input_rows_count); if (col_const[1]) { - execute_impl_const_right( - context, ldata, loffsets, specific_char_column->get_data_at(0), - result_null_map, - static_cast(res->assert_mutable().get())->get_data()); + execute_impl_const_right(context, ldata, loffsets, + specific_char_column->get_data_at(0), + result_null_map, res_column->get_data()); } else { - execute_impl( - context, ldata, loffsets, rdata, roffsets, result_null_map, - static_cast(res->assert_mutable().get())->get_data()); + execute_impl(context, ldata, loffsets, rdata, roffsets, + result_null_map, res_column->get_data()); } + res = std::move(res_column); } else { DCHECK(block.get_by_position(result).type->get_primitive_type() == TYPE_DATETIMEV2); - res = ColumnDateTimeV2::create(input_rows_count); + auto res_column = ColumnDateTimeV2::create(input_rows_count); if (col_const[1]) { - execute_impl_const_right( - context, ldata, loffsets, specific_char_column->get_data_at(0), - result_null_map, - static_cast(res->assert_mutable().get())->get_data()); + execute_impl_const_right(context, ldata, loffsets, + specific_char_column->get_data_at(0), + result_null_map, res_column->get_data()); } else { - execute_impl( - context, ldata, loffsets, rdata, roffsets, result_null_map, - static_cast(res->assert_mutable().get())->get_data()); + execute_impl(context, ldata, loffsets, rdata, roffsets, + result_null_map, res_column->get_data()); } + res = std::move(res_column); } // Wrap result in nullable column only if input has nullable arguments @@ -292,17 +290,13 @@ struct MakeDateImpl { const auto* year_col = assert_cast(argument_columns[0].get()); const auto* dayofyear_col = assert_cast(argument_columns[1].get()); - ColumnPtr res_column; - - res_column = ColumnDateV2::create(input_rows_count); + auto res_column = ColumnDateV2::create(input_rows_count); if (col_const[1]) { - execute_impl_right_const( - year_col->get_data(), dayofyear_col->get_element(0), result_null_map, - static_cast(res_column->assert_mutable().get())->get_data()); + execute_impl_right_const(year_col->get_data(), dayofyear_col->get_element(0), + result_null_map, res_column->get_data()); } else { - execute_impl( - year_col->get_data(), dayofyear_col->get_data(), result_null_map, - static_cast(res_column->assert_mutable().get())->get_data()); + execute_impl(year_col->get_data(), dayofyear_col->get_data(), result_null_map, + res_column->get_data()); } // Wrap result in nullable column only if input has nullable arguments @@ -451,13 +445,6 @@ struct MakeTimeImpl { } }; -struct DateTruncState { - using Callback_function = - std::function; - Callback_function callback_function; - cctz::time_zone timezone; -}; - template struct DateTrunc { static constexpr auto name = "date_trunc"; @@ -465,6 +452,13 @@ struct DateTrunc { using ColumnType = typename PrimitiveTypeTraits::ColumnType; using DateValueType = typename PrimitiveTypeTraits::CppType; + struct State { + using CallbackFunction = + std::function; + CallbackFunction callback_function; + cctz::time_zone timezone; + }; + static bool is_variadic() { return true; } static size_t get_number_of_arguments() { return 2; } @@ -495,7 +489,7 @@ struct DateTrunc { std::transform(lower_str.begin(), lower_str.end(), lower_str.begin(), [](unsigned char c) { return std::tolower(c); }); - std::shared_ptr state = std::make_shared(); + std::shared_ptr state = std::make_shared(); state->timezone = context->state()->timezone_obj(); if (std::strncmp("year", lower_str.data(), 4) == 0) { state->callback_function = &execute_impl_right_const; @@ -528,21 +522,22 @@ struct DateTrunc { const auto& datetime_column = block.get_by_position(arguments[DateArgIsFirst ? 0 : 1]) .column->convert_to_full_column_if_const(); - ColumnPtr res = ColumnType::create(input_rows_count); - auto* state = reinterpret_cast( + auto res = ColumnType::create(input_rows_count); + auto* state = reinterpret_cast( context->get_function_state(FunctionContext::THREAD_LOCAL)); DCHECK(state != nullptr); - state->callback_function(datetime_column, res, input_rows_count, state->timezone); + state->callback_function(datetime_column, *res, input_rows_count, state->timezone); block.replace_by_position(result, std::move(res)); return Status::OK(); } private: template - static void execute_impl_right_const(const ColumnPtr& datetime_column, ColumnPtr& result_column, - size_t input_rows_count, const cctz::time_zone& timezone) { + static void execute_impl_right_const(const ColumnPtr& datetime_column, + ColumnType& result_column, size_t input_rows_count, + const cctz::time_zone& timezone) { auto& data = static_cast(datetime_column.get())->get_data(); - auto& res = static_cast(result_column->assert_mutable().get())->get_data(); + auto& res = result_column.get_data(); for (size_t i = 0; i < input_rows_count; ++i) { auto dt = data[i]; // datetime_trunc only raise only when dt invalid which is impossible. so we dont throw error better. @@ -609,15 +604,15 @@ class FromDays : public IFunction { ColumnPtr res_column; if (block.get_by_position(result).type->get_primitive_type() == PrimitiveType::TYPE_DATE) { - res_column = ColumnDate::create(input_rows_count); - _execute( - input_rows_count, data_col->get_data(), result_null_map, - static_cast(res_column->assert_mutable().get())->get_data()); + auto column_date = ColumnDate::create(input_rows_count); + _execute(input_rows_count, data_col->get_data(), result_null_map, + column_date->get_data()); + res_column = std::move(column_date); } else { - res_column = ColumnDateV2::create(input_rows_count); - _execute>( - input_rows_count, data_col->get_data(), result_null_map, - static_cast(res_column->assert_mutable().get())->get_data()); + auto column_datev2 = ColumnDateV2::create(input_rows_count); + _execute>(input_rows_count, data_col->get_data(), + result_null_map, column_datev2->get_data()); + res_column = std::move(column_datev2); } // Wrap result in nullable column only if input has nullable arguments @@ -1053,10 +1048,8 @@ struct LastDayImpl { const auto is_nullable = block.get_by_position(result).type->is_nullable(); auto data_col = assert_cast(argument_column.get()); auto res_column = ResultColumnType::create(input_rows_count); - execute_straight( - input_rows_count, data_col->get_data(), - static_cast(res_column->assert_mutable().get())->get_data(), - result_null_map); + execute_straight(input_rows_count, data_col->get_data(), res_column->get_data(), + result_null_map); if (is_nullable) { block.replace_by_position(result, @@ -1128,10 +1121,8 @@ struct ToMondayImpl { const auto is_nullable = block.get_by_position(result).type->is_nullable(); auto data_col = assert_cast(argument_column.get()); auto res_column = ResultColumnType::create(input_rows_count); - execute_straight( - input_rows_count, data_col->get_data(), - static_cast(res_column->assert_mutable().get())->get_data(), - result_null_map); + execute_straight(input_rows_count, data_col->get_data(), res_column->get_data(), + result_null_map); if (is_nullable) { block.replace_by_position(result, diff --git a/be/src/exprs/function/random.cpp b/be/src/exprs/function/random.cpp index b6fd52b225265d..797e477ae93e20 100644 --- a/be/src/exprs/function/random.cpp +++ b/be/src/exprs/function/random.cpp @@ -144,7 +144,7 @@ class Random : public IFunction { static const double min = 0.0; static const double max = 1.0; auto res_column = ColumnFloat64::create(input_rows_count); - auto& res_data = static_cast(*res_column).get_data(); + auto& res_data = res_column->get_data(); auto* generator = reinterpret_cast( context->get_function_state(FunctionContext::THREAD_LOCAL)); diff --git a/be/src/exprs/function/uniform.cpp b/be/src/exprs/function/uniform.cpp index e639df7a2958bb..d3e16fac0e2115 100644 --- a/be/src/exprs/function/uniform.cpp +++ b/be/src/exprs/function/uniform.cpp @@ -106,7 +106,7 @@ struct UniformDoubleImpl { const ColumnNumbers& arguments, uint32_t result, size_t input_rows_count) { auto res_column = ColumnFloat64::create(input_rows_count); - auto& res_data = static_cast(*res_column).get_data(); + auto& res_data = res_column->get_data(); // Get min and max values (constants) const auto& left = diff --git a/be/src/exprs/table_function/vexplode_numbers.cpp b/be/src/exprs/table_function/vexplode_numbers.cpp index 43a93ffe877279..344d0083c8c85c 100644 --- a/be/src/exprs/table_function/vexplode_numbers.cpp +++ b/be/src/exprs/table_function/vexplode_numbers.cpp @@ -65,14 +65,14 @@ Status VExplodeNumbersTableFunction::process_init(Block* block, RuntimeState* st _cur_size = assert_cast(column_nested.get())->get_element(0); } - ((ColumnInt32*)_elements_column.get())->clear(); + _elements_column->clear(); //_cur_size may be a negative number _cur_size = std::max(static_cast(0L), _cur_size); if (_cur_size && _cur_size <= state->batch_size()) { // avoid elements_column too big or empty _is_const = true; // use const optimize for (int i = 0; i < _cur_size; i++) { - ((ColumnInt32*)_elements_column.get())->insert_value(i); + _elements_column->insert_value(i); } } } diff --git a/be/src/exprs/table_function/vexplode_numbers.h b/be/src/exprs/table_function/vexplode_numbers.h index 4108416bb70c21..2c68395bed2934 100644 --- a/be/src/exprs/table_function/vexplode_numbers.h +++ b/be/src/exprs/table_function/vexplode_numbers.h @@ -85,7 +85,7 @@ class VExplodeNumbersTableFunction : public TableFunction { private: ColumnPtr _value_column; - ColumnPtr _elements_column = ColumnInt32::create(); + ColumnInt32::MutablePtr _elements_column = ColumnInt32::create(); }; #include "common/compile_check_end.h" diff --git a/be/src/storage/iterator/olap_data_convertor.h b/be/src/storage/iterator/olap_data_convertor.h index a23369f569e567..c0bda171d1ce2a 100644 --- a/be/src/storage/iterator/olap_data_convertor.h +++ b/be/src/storage/iterator/olap_data_convertor.h @@ -182,11 +182,10 @@ class OlapBlockDataConvertor { static ColumnPtr clone_and_padding(const ColumnString* input, size_t padding_length) { auto column = ColumnString::create(); - auto padded_column = assert_cast(column->assert_mutable().get()); column->offsets.resize(input->size()); column->chars.resize(input->size() * padding_length); - memset(padded_column->chars.data(), 0, input->size() * padding_length); + memset(column->chars.data(), 0, input->size() * padding_length); for (size_t i = 0; i < input->size(); i++) { column->offsets[i] = cast_set((i + 1) * padding_length); @@ -198,7 +197,7 @@ class OlapBlockDataConvertor { << ", real=" << str.size; if (str.size) { - memcpy(padded_column->chars.data() + i * padding_length, str.data, str.size); + memcpy(column->chars.data() + i * padding_length, str.data, str.size); } } diff --git a/be/src/storage/segment/variant/hierarchical_data_iterator.cpp b/be/src/storage/segment/variant/hierarchical_data_iterator.cpp index 28fa108b3b8588..7dad2b767ff3f4 100644 --- a/be/src/storage/segment/variant/hierarchical_data_iterator.cpp +++ b/be/src/storage/segment/variant/hierarchical_data_iterator.cpp @@ -224,11 +224,9 @@ Status HierarchicalDataIterator::_process_nested_columns( // will type the type of ColumnVariant::NESTED_TYPE, whih is Nullable>. for (const auto& entry : nested_subcolumns) { const auto* base_array = - check_and_get_column(*remove_nullable(entry.second[0].column)); - MutableColumnPtr nested_object = - ColumnVariant::create(0, false, base_array->get_data().size()); + assert_cast(remove_nullable(entry.second[0].column).get()); + auto nested_object_variant = ColumnVariant::create(0, false, base_array->get_data().size()); MutableColumnPtr offset = IColumn::mutate(base_array->get_offsets_ptr()); - auto* nested_object_ptr = assert_cast(nested_object.get()); // flatten nested arrays for (const auto& subcolumn : entry.second) { const auto& column = subcolumn.column; @@ -253,13 +251,13 @@ Status HierarchicalDataIterator::_process_nested_columns( check_and_get_data_type(remove_nullable(type).get()) ->get_nested_type(); // add sub path without parent prefix - nested_object_ptr->add_sub_column( + nested_object_variant->add_sub_column( subcolumn.path.copy_pop_nfront(entry.first.get_parts().size()), std::move(flattend_column), std::move(flattend_type)); } - const size_t nested_object_size = nested_object->size(); - nested_object = ColumnNullable::create(std::move(nested_object), - ColumnUInt8::create(nested_object_size, 0)); + const size_t nested_object_size = nested_object_variant->size(); + MutableColumnPtr nested_object = ColumnNullable::create( + std::move(nested_object_variant), ColumnUInt8::create(nested_object_size, 0)); auto array = ColumnArray::create(std::move(nested_object), std::move(offset)); const size_t array_size = array->size(); auto nullable_array = diff --git a/be/src/storage/segment/variant/variant_doc_snpashot_compact_iterator.h b/be/src/storage/segment/variant/variant_doc_snpashot_compact_iterator.h index ae524630a1a477..33430ba8bf9ac9 100644 --- a/be/src/storage/segment/variant/variant_doc_snpashot_compact_iterator.h +++ b/be/src/storage/segment/variant/variant_doc_snpashot_compact_iterator.h @@ -56,11 +56,10 @@ class VariantDocValueCompactIterator : public ColumnIterator { Status _set_doc_value_into_variant(MutableColumnPtr& dst, MutableColumnPtr&& doc_value_column, size_t count) const { auto& variant = assert_cast(*dst); - MutableColumnPtr container = ColumnVariant::create(variant.max_subcolumns_count(), - variant.enable_doc_mode(), count); - auto& container_variant = assert_cast(*container); - container_variant.set_doc_value_column(std::move(doc_value_column)); - variant.insert_range_from(container_variant, 0, count); + auto container = ColumnVariant::create(variant.max_subcolumns_count(), + variant.enable_doc_mode(), count); + container->set_doc_value_column(std::move(doc_value_column)); + variant.insert_range_from(*container, 0, count); return Status::OK(); } diff --git a/be/src/util/string_parser.hpp b/be/src/util/string_parser.hpp index c2be9a709cf0a2..d2a325cbb88123 100644 --- a/be/src/util/string_parser.hpp +++ b/be/src/util/string_parser.hpp @@ -36,7 +36,6 @@ #include #include -#include "common/compiler_util.h" // IWYU pragma: keep #include "common/status.h" #include "core/data_type/number_traits.h" #include "core/data_type/primitive_type.h" @@ -266,7 +265,7 @@ uint32_t count_valid_length(const char* s, const char* end) { inline auto count_digits = count_valid_length; -inline PURE std::string combine_tz_offset(char sign, uint32_t hour_offset, uint32_t minute_offset) { +inline std::string combine_tz_offset(char sign, uint32_t hour_offset, uint32_t minute_offset) { std::string result(6, '0'); result[0] = sign; result[1] = '0' + (hour_offset / 10); diff --git a/be/src/util/thrift_util.h b/be/src/util/thrift_util.h index a7d6620d5d31f3..f73d7ff6aa6335 100644 --- a/be/src/util/thrift_util.h +++ b/be/src/util/thrift_util.h @@ -175,8 +175,8 @@ void t_network_address_to_string(const TNetworkAddress& address, std::string* ou // string representation bool t_network_address_comparator(const TNetworkAddress& a, const TNetworkAddress& b); -PURE std::string to_string(const TUniqueId& id); +std::string to_string(const TUniqueId& id); -PURE bool _has_inverted_index_v1_or_partial_update(TOlapTableSink sink); +bool _has_inverted_index_v1_or_partial_update(TOlapTableSink sink); } // namespace doris diff --git a/be/test/core/column/column_array_view_test.cpp b/be/test/core/column/column_array_view_test.cpp new file mode 100644 index 00000000000000..39c9696a8f4fe6 --- /dev/null +++ b/be/test/core/column/column_array_view_test.cpp @@ -0,0 +1,292 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "core/column/column_array_view.h" + +#include + +#include "core/column/column_array.h" +#include "core/column/column_const.h" +#include "core/column/column_nullable.h" +#include "core/column/column_string.h" +#include "core/column/column_vector.h" +#include "core/data_type/data_type_number.h" +#include "core/data_type/data_type_string.h" +#include "testutil/column_helper.h" + +namespace doris { + +// Helper: build a ColumnArray with Nullable(ColumnInt32) nested data. +// arrays: each inner vector is one row's array elements +// element_nulls: parallel to the flattened data, 1 = null +// row_nulls: per-row outer null (empty means no outer nullable wrapper) +static ColumnPtr build_int32_array_column(const std::vector>& arrays, + const std::vector& element_nulls, + const std::vector& row_nulls = {}) { + // Build nested data column (Nullable(Int32)) + auto data_col = ColumnInt32::create(); + auto null_col = ColumnUInt8::create(); + size_t flat_idx = 0; + for (const auto& arr : arrays) { + for (auto val : arr) { + data_col->insert_value(val); + null_col->insert_value(flat_idx < element_nulls.size() ? element_nulls[flat_idx] : 0); + flat_idx++; + } + } + auto nullable_data = ColumnNullable::create(std::move(data_col), std::move(null_col)); + + // Build offsets + auto offsets = ColumnArray::ColumnOffsets::create(); + size_t offset = 0; + for (const auto& arr : arrays) { + offset += arr.size(); + offsets->insert_value(offset); + } + + ColumnPtr array_col = ColumnArray::create(std::move(nullable_data), std::move(offsets)); + + // Wrap in outer Nullable if row_nulls provided + if (!row_nulls.empty()) { + auto outer_null = ColumnUInt8::create(); + for (auto v : row_nulls) { + outer_null->insert_value(v); + } + array_col = ColumnNullable::create(IColumn::mutate(std::move(array_col)), + std::move(outer_null)); + } + return array_col; +} + +// Helper: build a ColumnArray with Nullable(ColumnString) nested data. +static ColumnPtr build_string_array_column(const std::vector>& arrays, + const std::vector& element_nulls, + const std::vector& row_nulls = {}) { + auto data_col = ColumnString::create(); + auto null_col = ColumnUInt8::create(); + size_t flat_idx = 0; + for (const auto& arr : arrays) { + for (const auto& val : arr) { + data_col->insert_data(val.data(), val.size()); + null_col->insert_value(flat_idx < element_nulls.size() ? element_nulls[flat_idx] : 0); + flat_idx++; + } + } + auto nullable_data = ColumnNullable::create(std::move(data_col), std::move(null_col)); + + auto offsets = ColumnArray::ColumnOffsets::create(); + size_t offset = 0; + for (const auto& arr : arrays) { + offset += arr.size(); + offsets->insert_value(offset); + } + + ColumnPtr array_col = ColumnArray::create(std::move(nullable_data), std::move(offsets)); + + if (!row_nulls.empty()) { + auto outer_null = ColumnUInt8::create(); + for (auto v : row_nulls) { + outer_null->insert_value(v); + } + array_col = ColumnNullable::create(IColumn::mutate(std::move(array_col)), + std::move(outer_null)); + } + return array_col; +} + +// ==================== ArrayDataView (index-based) Tests ==================== + +// Test basic non-nullable, non-const array column +// Row 0: [10, 20, 30], Row 1: [40], Row 2: [50, 60] +TEST(ColumnArrayViewTest, IndexAccess_basic) { + auto col = build_int32_array_column({{10, 20, 30}, {40}, {50, 60}}, {0, 0, 0, 0, 0, 0}); + auto view = ColumnArrayView::create(col); + + EXPECT_EQ(view.size(), 3); + EXPECT_FALSE(view.is_const); + + // Row 0 + EXPECT_FALSE(view.is_null_at(0)); + auto arr0 = view[0]; + EXPECT_EQ(arr0.size(), 3); + EXPECT_EQ(arr0.value_at(0), 10); + EXPECT_EQ(arr0.value_at(1), 20); + EXPECT_EQ(arr0.value_at(2), 30); + EXPECT_FALSE(arr0.is_null_at(0)); + EXPECT_FALSE(arr0.is_null_at(1)); + EXPECT_FALSE(arr0.is_null_at(2)); + + // Row 1 + auto arr1 = view[1]; + EXPECT_EQ(arr1.size(), 1); + EXPECT_EQ(arr1.value_at(0), 40); + + // Row 2 + auto arr2 = view[2]; + EXPECT_EQ(arr2.size(), 2); + EXPECT_EQ(arr2.value_at(0), 50); + EXPECT_EQ(arr2.value_at(1), 60); +} + +TEST(ColumnArrayViewTest, IndexAccess_get_data) { + auto col = build_int32_array_column({{10, 20, 30}, {40}, {50, 60}}, {0, 0, 0, 0, 0, 0}); + auto view = ColumnArrayView::create(col); + + auto arr0 = view[0]; + const auto* data0 = arr0.get_data(); + ASSERT_NE(data0, nullptr); + EXPECT_EQ(data0[0], 10); + EXPECT_EQ(data0[1], 20); + EXPECT_EQ(data0[2], 30); + + auto arr1 = view[1]; + const auto* data1 = arr1.get_data(); + ASSERT_NE(data1, nullptr); + EXPECT_EQ(data1[0], 40); + + auto arr2 = view[2]; + const auto* data2 = arr2.get_data(); + ASSERT_NE(data2, nullptr); + EXPECT_EQ(data2[0], 50); + EXPECT_EQ(data2[1], 60); +} + +// Test with null elements inside arrays +// Row 0: [1, NULL, 3], Row 1: [NULL] +TEST(ColumnArrayViewTest, IndexAccess_with_null_elements) { + auto col = build_int32_array_column({{1, 0, 3}, {0}}, {0, 1, 0, 1}); + auto view = ColumnArrayView::create(col); + + EXPECT_EQ(view.size(), 2); + + auto arr0 = view[0]; + EXPECT_EQ(arr0.size(), 3); + EXPECT_FALSE(arr0.is_null_at(0)); + EXPECT_TRUE(arr0.is_null_at(1)); + EXPECT_FALSE(arr0.is_null_at(2)); + EXPECT_EQ(arr0.value_at(0), 1); + EXPECT_EQ(arr0.value_at(2), 3); + + auto arr1 = view[1]; + EXPECT_EQ(arr1.size(), 1); + EXPECT_TRUE(arr1.is_null_at(0)); +} + +// Test with outer nullable (some rows are entirely null) +// Row 0: [1, 2], Row 1: NULL, Row 2: [5] +TEST(ColumnArrayViewTest, IndexAccess_outer_nullable) { + auto col = build_int32_array_column({{1, 2}, {0}, {5}}, {0, 0, 0, 0}, {0, 1, 0}); + auto view = ColumnArrayView::create(col); + + EXPECT_EQ(view.size(), 3); + EXPECT_FALSE(view.is_null_at(0)); + EXPECT_TRUE(view.is_null_at(1)); + EXPECT_FALSE(view.is_null_at(2)); + + auto arr0 = view[0]; + EXPECT_EQ(arr0.size(), 2); + EXPECT_EQ(arr0.value_at(0), 1); + EXPECT_EQ(arr0.value_at(1), 2); + + auto arr2 = view[2]; + EXPECT_EQ(arr2.size(), 1); + EXPECT_EQ(arr2.value_at(0), 5); +} + +// Test const column: Const(Array([10, 20])) with 4 rows +TEST(ColumnArrayViewTest, IndexAccess_const) { + auto inner = build_int32_array_column({{10, 20}}, {0, 0}); + ColumnPtr const_col = ColumnConst::create(inner, 4); + auto view = ColumnArrayView::create(const_col); + + EXPECT_EQ(view.size(), 4); + EXPECT_TRUE(view.is_const); + + for (size_t i = 0; i < 4; ++i) { + EXPECT_FALSE(view.is_null_at(i)); + auto arr = view[i]; + EXPECT_EQ(arr.size(), 2); + EXPECT_EQ(arr.value_at(0), 10); + EXPECT_EQ(arr.value_at(1), 20); + } +} + +// Test Const(Nullable(Array([7, 8, 9]))) with 3 rows, non-null +TEST(ColumnArrayViewTest, IndexAccess_const_nullable) { + auto inner = build_int32_array_column({{7, 8, 9}}, {0, 0, 0}, {0}); + ColumnPtr const_col = ColumnConst::create(inner, 3); + auto view = ColumnArrayView::create(const_col); + + EXPECT_EQ(view.size(), 3); + EXPECT_TRUE(view.is_const); + + for (size_t i = 0; i < 3; ++i) { + EXPECT_FALSE(view.is_null_at(i)); + auto arr = view[i]; + EXPECT_EQ(arr.size(), 3); + EXPECT_EQ(arr.value_at(0), 7); + EXPECT_EQ(arr.value_at(1), 8); + EXPECT_EQ(arr.value_at(2), 9); + } +} + +// Test Const(Nullable(NULL)) with 3 rows, all null +TEST(ColumnArrayViewTest, IndexAccess_const_nullable_null) { + // Build one-row array, then wrap as nullable with null=1, then const + auto inner = build_int32_array_column({{0}}, {0}, {1}); + ColumnPtr const_col = ColumnConst::create(inner, 3); + auto view = ColumnArrayView::create(const_col); + + EXPECT_EQ(view.size(), 3); + EXPECT_TRUE(view.is_const); + + for (size_t i = 0; i < 3; ++i) { + EXPECT_TRUE(view.is_null_at(i)); + } +} + +// Test empty array rows +// Row 0: [], Row 1: [100], Row 2: [] +TEST(ColumnArrayViewTest, IndexAccess_empty_arrays) { + auto col = build_int32_array_column({{}, {100}, {}}, {0}); + auto view = ColumnArrayView::create(col); + + EXPECT_EQ(view.size(), 3); + EXPECT_EQ(view[0].size(), 0); + EXPECT_EQ(view[1].size(), 1); + EXPECT_EQ(view[1].value_at(0), 100); + EXPECT_EQ(view[2].size(), 0); +} + +// Test string array +// Row 0: ["hello", "world"], Row 1: ["test"] +TEST(ColumnArrayViewTest, IndexAccess_string) { + auto col = build_string_array_column({{"hello", "world"}, {"test"}}, {0, 0, 0}); + auto view = ColumnArrayView::create(col); + + EXPECT_EQ(view.size(), 2); + auto arr0 = view[0]; + EXPECT_EQ(arr0.size(), 2); + EXPECT_EQ(arr0.value_at(0).to_string(), "hello"); + EXPECT_EQ(arr0.value_at(1).to_string(), "world"); + + auto arr1 = view[1]; + EXPECT_EQ(arr1.size(), 1); + EXPECT_EQ(arr1.value_at(0).to_string(), "test"); +} + +} // namespace doris diff --git a/be/test/exec/operator/agg_operator_test.cpp b/be/test/exec/operator/agg_operator_test.cpp index ae750013c84423..b3178af7a8d8d5 100644 --- a/be/test/exec/operator/agg_operator_test.cpp +++ b/be/test/exec/operator/agg_operator_test.cpp @@ -92,6 +92,23 @@ struct MockAggSourceOperator : public AggSourceOperatorX { std::unique_ptr mock_row_descriptor; }; +class MockDistributionOperator final : public OperatorX { +public: + MockDistributionOperator(ExchangeType exchange_type) : _exchange_type(exchange_type) {} + + Status get_block_impl(RuntimeState* /*state*/, Block* /*block*/, bool* eos) override { + *eos = true; + return Status::OK(); + } + + DataDistribution required_data_distribution(RuntimeState* /*state*/) const override { + return {_exchange_type}; + } + +private: + ExchangeType _exchange_type; +}; + std::shared_ptr create_agg_sink_op(OperatorContext& ctx, bool is_merge, bool without_key) { auto op = std::make_shared(); diff --git a/be/test/exec/operator/analytic_sink_operator_test.cpp b/be/test/exec/operator/analytic_sink_operator_test.cpp index 517c73642ce393..b5e5787e8a6049 100644 --- a/be/test/exec/operator/analytic_sink_operator_test.cpp +++ b/be/test/exec/operator/analytic_sink_operator_test.cpp @@ -41,7 +41,9 @@ class MockAnalyticSinkOperator : public OperatorXBase { return Status::OK(); } - Status get_block(RuntimeState* state, Block* block, bool* eos) override { return Status::OK(); } + Status get_block_impl(RuntimeState* state, Block* block, bool* eos) override { + return Status::OK(); + } Status setup_local_state(RuntimeState* state, LocalStateInfo& info) override { return Status::OK(); } diff --git a/be/test/exec/operator/partition_sort_sink_operator_test.cpp b/be/test/exec/operator/partition_sort_sink_operator_test.cpp index 744ca8e84521f0..36a90bf5a381e1 100644 --- a/be/test/exec/operator/partition_sort_sink_operator_test.cpp +++ b/be/test/exec/operator/partition_sort_sink_operator_test.cpp @@ -37,7 +37,9 @@ class PartitionSortOperatorMockOperator : public OperatorXBase { return Status::OK(); } - Status get_block(RuntimeState* state, Block* block, bool* eos) override { return Status::OK(); } + Status get_block_impl(RuntimeState* state, Block* block, bool* eos) override { + return Status::OK(); + } Status setup_local_state(RuntimeState* state, LocalStateInfo& info) override { return Status::OK(); } diff --git a/be/test/exec/operator/partitioned_aggregation_test_helper.h b/be/test/exec/operator/partitioned_aggregation_test_helper.h index 5ecfe8dd29771f..da0881e84ead6b 100644 --- a/be/test/exec/operator/partitioned_aggregation_test_helper.h +++ b/be/test/exec/operator/partitioned_aggregation_test_helper.h @@ -83,7 +83,9 @@ class MockPartitionedAggSinkOperatorX : public PartitionedAggSinkOperatorX { return Status::OK(); } - Status sink(RuntimeState* state, Block* in_block, bool eos) override { return Status::OK(); } + Status sink_impl(RuntimeState* state, Block* in_block, bool eos) override { + return Status::OK(); + } }; class MockPartitionedAggLocalState : public PartitionedAggLocalState { diff --git a/be/test/exec/operator/partitioned_hash_join_test_helper.h b/be/test/exec/operator/partitioned_hash_join_test_helper.h index 9dcbb7335f560c..ba4ff6612886ed 100644 --- a/be/test/exec/operator/partitioned_hash_join_test_helper.h +++ b/be/test/exec/operator/partitioned_hash_join_test_helper.h @@ -115,7 +115,9 @@ class MockHashJoinBuildOperator : public HashJoinBuildSinkOperatorX { return Status::OK(); } - Status sink(RuntimeState* state, Block* in_block, bool eos) override { return Status::OK(); } + Status sink_impl(RuntimeState* state, Block* in_block, bool eos) override { + return Status::OK(); + } std::string get_memory_usage_debug_str(RuntimeState* state) const override { return "mock"; } }; diff --git a/be/test/exec/operator/query_cache_operator_test.cpp b/be/test/exec/operator/query_cache_operator_test.cpp index a99e9bcb9d9e1a..91c73b99077247 100644 --- a/be/test/exec/operator/query_cache_operator_test.cpp +++ b/be/test/exec/operator/query_cache_operator_test.cpp @@ -36,7 +36,9 @@ class QueryCacheMockChildOperator : public OperatorXBase { return Status::OK(); } - Status get_block(RuntimeState* state, Block* block, bool* eos) override { return Status::OK(); } + Status get_block_impl(RuntimeState* state, Block* block, bool* eos) override { + return Status::OK(); + } Status setup_local_state(RuntimeState* state, LocalStateInfo& info) override { return Status::OK(); } diff --git a/be/test/exec/operator/sort_operator_test.cpp b/be/test/exec/operator/sort_operator_test.cpp index 23fa37e57b01ef..62cb3d448f9092 100644 --- a/be/test/exec/operator/sort_operator_test.cpp +++ b/be/test/exec/operator/sort_operator_test.cpp @@ -36,7 +36,9 @@ class MockOperator : public OperatorXBase { return Status::OK(); } - Status get_block(RuntimeState* state, Block* block, bool* eos) override { return Status::OK(); } + Status get_block_impl(RuntimeState* state, Block* block, bool* eos) override { + return Status::OK(); + } Status setup_local_state(RuntimeState* state, LocalStateInfo& info) override { return Status::OK(); } diff --git a/be/test/exec/operator/spill_sort_test_helper.h b/be/test/exec/operator/spill_sort_test_helper.h index c887212b2fd1f8..81ca44ce2bd33b 100644 --- a/be/test/exec/operator/spill_sort_test_helper.h +++ b/be/test/exec/operator/spill_sort_test_helper.h @@ -53,7 +53,7 @@ class MockSortSourceOperatorX : public SortSourceOperatorX { const DescriptorTbl& descs) : SortSourceOperatorX(pool, tnode, operator_id, descs) {} - Status get_block(RuntimeState* state, Block* block, bool* eos) override { + Status get_block_impl(RuntimeState* state, Block* block, bool* eos) override { std::swap(*block, this->block); *eos = this->eos; return Status::OK(); diff --git a/be/test/exec/operator/streaming_agg_operator_test.cpp b/be/test/exec/operator/streaming_agg_operator_test.cpp index 0421d58bfd256b..d56e88ec5a04a9 100644 --- a/be/test/exec/operator/streaming_agg_operator_test.cpp +++ b/be/test/exec/operator/streaming_agg_operator_test.cpp @@ -65,7 +65,9 @@ class MockStreamingAggOperatorChildOperator : public OperatorXBase { return Status::OK(); } - Status get_block(RuntimeState* state, Block* block, bool* eos) override { return Status::OK(); } + Status get_block_impl(RuntimeState* state, Block* block, bool* eos) override { + return Status::OK(); + } Status setup_local_state(RuntimeState* state, LocalStateInfo& info) override { return Status::OK(); } diff --git a/be/test/exec/operator/table_function_operator_test.cpp b/be/test/exec/operator/table_function_operator_test.cpp index 24217b7eb8d824..1da139707cae47 100644 --- a/be/test/exec/operator/table_function_operator_test.cpp +++ b/be/test/exec/operator/table_function_operator_test.cpp @@ -53,7 +53,9 @@ class MockTableFunctionChildOperator : public OperatorXBase { return Status::OK(); } - Status get_block(RuntimeState* state, Block* block, bool* eos) override { return Status::OK(); } + Status get_block_impl(RuntimeState* state, Block* block, bool* eos) override { + return Status::OK(); + } Status setup_local_state(RuntimeState* state, LocalStateInfo& info) override { return Status::OK(); } diff --git a/be/test/exec/pipeline/pipeline_task_test.cpp b/be/test/exec/pipeline/pipeline_task_test.cpp index 0780498e29f1fd..33b94ebfd8362b 100644 --- a/be/test/exec/pipeline/pipeline_task_test.cpp +++ b/be/test/exec/pipeline/pipeline_task_test.cpp @@ -113,7 +113,9 @@ class CountingBlockableSinkOperator final : public DataSinkOperatorX(op_id, node_id, dest_id), _blockable_checks(blockable_checks) {} - Status sink(RuntimeState* state, Block* in_block, bool eos) override { return Status::OK(); } + Status sink_impl(RuntimeState* state, Block* in_block, bool eos) override { + return Status::OK(); + } bool is_blockable(RuntimeState* state) const override { _blockable_checks->fetch_add(1, std::memory_order_relaxed); diff --git a/be/test/testutil/mock/mock_operators.h b/be/test/testutil/mock/mock_operators.h index bba11eb74737c8..1077a767018fd7 100644 --- a/be/test/testutil/mock/mock_operators.h +++ b/be/test/testutil/mock/mock_operators.h @@ -34,7 +34,7 @@ class MockChildOperator : public OperatorXBase { return Status::OK(); } - Status get_block(RuntimeState* state, Block* block, bool* eos) override { + Status get_block_impl(RuntimeState* state, Block* block, bool* eos) override { block->swap(_block); *eos = _eos; return Status::OK(); @@ -57,7 +57,7 @@ class MockSourceOperator : public MockChildOperator { class MockSinkOperator final : public DataSinkOperatorXBase { public: - Status sink(RuntimeState* state, Block* block, bool eos) override { return Status::OK(); } + Status sink_impl(RuntimeState* state, Block* block, bool eos) override { return Status::OK(); } Status setup_local_state(RuntimeState* state, LocalSinkStateInfo& info) override { return Status::OK(); diff --git a/be/test/util/profile_spec_test.cpp b/be/test/util/profile_spec_test.cpp index 9d2561416120b7..56f7a98f4a45df 100644 --- a/be/test/util/profile_spec_test.cpp +++ b/be/test/util/profile_spec_test.cpp @@ -96,7 +96,7 @@ class ProfileSpecTest : public testing::Test { Status prepare(RuntimeState* state) override { return Status::OK(); } Status open(RuntimeState* state) { return Status::OK(); } Status close(RuntimeState* state) override { return Status::OK(); } - Status get_block(RuntimeState* state, Block* block, bool* eos) override { + Status get_block_impl(RuntimeState* state, Block* block, bool* eos) override { return Status::OK(); } }; @@ -112,7 +112,7 @@ class ProfileSpecTest : public testing::Test { Status prepare(RuntimeState* state) override { return Status::OK(); } Status close(RuntimeState* state) override { return Status::OK(); } - Status get_block(RuntimeState* state, Block* block, bool* eos) override { + Status get_block_impl(RuntimeState* state, Block* block, bool* eos) override { *eos = true; block->swap(_block); return Status::OK(); diff --git a/regression-test/data/doc/sql-manual/basic-elements/data-types/map-md.out b/regression-test/data/doc/sql-manual/basic-elements/data-types/map-md.out index a9030f76745d74..db9b0c7442766d 100644 --- a/regression-test/data/doc/sql-manual/basic-elements/data-types/map-md.out +++ b/regression-test/data/doc/sql-manual/basic-elements/data-types/map-md.out @@ -77,6 +77,21 @@ -- !sql -- {"key1":1, "key2":2} +-- !sql -- +{"a":2} + +-- !sql -- +1 + +-- !sql -- +2 + +-- !sql -- +{"outer":{"a":2}} + +-- !sql -- +2 + -- !sql -- {"key1":1, "key2":2} 2 diff --git a/regression-test/suites/doc/sql-manual/basic-elements/data-types/map-md.groovy b/regression-test/suites/doc/sql-manual/basic-elements/data-types/map-md.groovy index 14e7c4596db8b4..7dc32500d3a46a 100644 --- a/regression-test/suites/doc/sql-manual/basic-elements/data-types/map-md.groovy +++ b/regression-test/suites/doc/sql-manual/basic-elements/data-types/map-md.groovy @@ -159,6 +159,16 @@ suite("map-md", "p0") { qt_sql """ SELECT CAST('{"key1":1,"key2":2}' AS MAP) """ + qt_sql """ SELECT CAST('{"a":1,"a":2}' AS MAP) """ + + qt_sql """ SELECT size(CAST('{"a":1,"a":2}' AS MAP)) """ + + qt_sql """ SELECT element_at(CAST('{"a":1,"a":2}' AS MAP), 'a') """ + + qt_sql """ SELECT CAST('{"outer":{"a":1,"a":2}}' AS MAP>) """ + + qt_sql """ SELECT element_at(element_at(CAST('{"outer":{"a":1,"a":2}}' AS MAP>), 'outer'), 'a') """ + sql """ DROP TABLE IF EXISTS ${tableName}; """ sql """ CREATE TABLE IF NOT EXISTS ${tableName} (