From 1a8376080ada5679208a10afeba38ea9a226ebd9 Mon Sep 17 00:00:00 2001 From: Mryange Date: Mon, 25 May 2026 11:48:20 +0800 Subject: [PATCH 1/6] [refine](array) introduce ColumnArrayView to unify array column access in array functions (#63386) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Issue Number: N/A Problem Summary: Array functions like `array_distance` and `array_join` previously required hand-written boilerplate to unwrap `Const`, `Nullable`, and plain `ColumnArray` variants before accessing element data. This led to duplicated code, manual offset arithmetic, and a proliferation of helper structs (`ConstArrayInfo`, `ColumnArrayExecutionData`, etc.). Root cause: there was no shared abstraction for "read a row of an array column regardless of its outer wrapper". Each function solved this independently, accumulating inconsistent patterns. This PR introduces `ColumnArrayView` (and its row-accessor `ArrayDataView`) in `be/src/core/column/column_array_view.h`. The view is created once via `ColumnArrayView::create(col)` and handles Const/Nullable unwrapping automatically. Per-row access via `operator[](row)` returns an `ArrayDataView` with `get_data()`, `size()`, and `is_null_at()` — a uniform interface regardless of the underlying column shape. For ultra-light nullable primitive loops, `ColumnArrayView` also exposes flat-access helpers (`get_data()`, `get_null_map_data()`, `row_begin()`, `row_end()`) so callers can keep wrapper unwrapping centralized while still iterating directly over the flattened buffers when benchmark data shows that per-element row-view access would regress. **Benchmark results** (4096 rows, RELEASE build, `--benchmark_repetitions=5` on a shared host with CPU scaling enabled; raw outputs saved in `benchmark_array_view_raw_results_20260519.txt` and `benchmark_array_view_distance_split_raw_results_20260519.txt`): **Row-view access (`operator[]` / `ArrayDataView`)** | Scenario | Handwritten CPU (ns) | ColumnArrayView CPU (ns) | Delta | |---|---|---|---| | Distance Plain/Plain | 322530 | 311276 | **-3.5%** | | Distance Const/Plain | 301473 | 289794 | **-3.9%** | | Distance Nullable/Plain | 305970 | 313687 | +2.5% | | Int64 Plain sum | 15971 | 16036 | +0.4% | | Int64 WithNulls sum | 26700 | 29497 | +10.5% | | String Plain len-sum | 16857 | 17120 | +1.6% | | Int64 Const sum | 16051 | 16148 | +0.6% | | Int64 Nullable sum | 16198 | 16174 | -0.1% | **Flat-access follow-up (`get_data()` / `get_null_map_data()` / `row_begin()` / `row_end()`)** | Scenario | Handwritten CPU (ns) | ColumnArrayView Flat CPU (ns) | Delta | |---|---|---|---| | Int64 WithNulls sum | 26700 | 26765 | +0.2% | | Distance Plain/Plain | 322530 | 301274 | **-6.6%** | | Distance Const/Plain | 301473 | 314259 | +4.2% | | Distance Nullable/Plain | 305970 | 314077 | +2.7% | Most production-shaped cases stay within a few percent on this shared host. The only stable double-digit regression is the synthetic `Int64 WithNulls` microbenchmark, where each element performs only `if (!null) sum += val`. The flat-access helper path removes that regression (+0.2% vs handwritten) while keeping `Const` / `Nullable` unwrapping centralized in `ColumnArrayView`. Because these numbers were collected on a shared machine with CPU scaling enabled, the distance cases show visible run-to-run noise; (cherry picked from commit 73b32d29744638d76c7379db9a0261a4f1988e6c) --- be/benchmark/benchmark_column_array_view.hpp | 418 ++++++++++++++++++ .../benchmark_column_array_view_distance.hpp | 353 +++++++++++++++ be/benchmark/benchmark_main.cpp | 2 + be/benchmark/binary_cast_benchmark.hpp | 49 +- be/src/core/column/column_array_view.h | 135 ++++++ be/src/core/column/column_execute_util.h | 1 + .../function/array/function_array_distance.h | 149 ++----- .../function/array/function_array_join.h | 39 +- .../core/column/column_array_view_test.cpp | 290 ++++++++++++ 9 files changed, 1255 insertions(+), 181 deletions(-) create mode 100644 be/benchmark/benchmark_column_array_view.hpp create mode 100644 be/benchmark/benchmark_column_array_view_distance.hpp create mode 100644 be/src/core/column/column_array_view.h create mode 100644 be/test/core/column/column_array_view_test.cpp diff --git a/be/benchmark/benchmark_column_array_view.hpp b/be/benchmark/benchmark_column_array_view.hpp new file mode 100644 index 00000000000000..09baf2bd435030 --- /dev/null +++ b/be/benchmark/benchmark_column_array_view.hpp @@ -0,0 +1,418 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// ============================================================ +// Benchmark: ColumnArrayView vs hand-written array column access +// +// ColumnArrayView (see column_array_view.h) provides a unified interface +// to read array column elements regardless of whether the underlying +// column is Plain, ColumnConst, or ColumnNullable. +// +// This benchmark measures whether ColumnArrayView introduces measurable +// overhead compared to hand-written (direct) array column access code. +// +// Test scenarios: +// 1. Int64 array: sum all elements across all rows +// 2. String array: sum lengths of all elements across all rows +// 3. Const array: same as above but with ColumnConst wrapper +// 4. Nullable array: with outer nullable wrapper +// ============================================================ + +#include + +#include +#include + +#include "core/assert_cast.h" +#include "core/column/column_array.h" +#include "core/column/column_array_view.h" +#include "core/column/column_const.h" +#include "core/column/column_nullable.h" +#include "core/column/column_string.h" +#include "core/column/column_vector.h" +#include "core/data_type/primitive_type.h" + +namespace doris { + +static constexpr size_t ARR_NUM_ROWS = 4096; +static constexpr size_t ARR_ELEM_PER_ROW = 8; + +// ============================================================ +// Array column factory helpers +// ============================================================ + +// Build Array with ARR_NUM_ROWS rows, each having ARR_ELEM_PER_ROW elements. +static ColumnPtr make_int64_array_column() { + auto data_col = ColumnInt64::create(); + auto null_col = ColumnUInt8::create(); + auto offsets = ColumnArray::ColumnOffsets::create(); + + data_col->reserve(ARR_NUM_ROWS * ARR_ELEM_PER_ROW); + null_col->reserve(ARR_NUM_ROWS * ARR_ELEM_PER_ROW); + + size_t offset = 0; + for (size_t i = 0; i < ARR_NUM_ROWS; ++i) { + for (size_t j = 0; j < ARR_ELEM_PER_ROW; ++j) { + data_col->insert_value(static_cast(i * ARR_ELEM_PER_ROW + j + 1)); + null_col->insert_value(0); + } + offset += ARR_ELEM_PER_ROW; + offsets->insert_value(offset); + } + + auto nullable_data = ColumnNullable::create(std::move(data_col), std::move(null_col)); + return ColumnArray::create(std::move(nullable_data), std::move(offsets)); +} + +// Build Array with some null elements (every 5th element is null). +static ColumnPtr make_int64_array_column_with_nulls() { + auto data_col = ColumnInt64::create(); + auto null_col = ColumnUInt8::create(); + auto offsets = ColumnArray::ColumnOffsets::create(); + + data_col->reserve(ARR_NUM_ROWS * ARR_ELEM_PER_ROW); + null_col->reserve(ARR_NUM_ROWS * ARR_ELEM_PER_ROW); + + size_t offset = 0; + size_t flat_idx = 0; + for (size_t i = 0; i < ARR_NUM_ROWS; ++i) { + for (size_t j = 0; j < ARR_ELEM_PER_ROW; ++j) { + data_col->insert_value(static_cast(flat_idx + 1)); + null_col->insert_value(flat_idx % 5 == 0 ? 1 : 0); + flat_idx++; + } + offset += ARR_ELEM_PER_ROW; + offsets->insert_value(offset); + } + + auto nullable_data = ColumnNullable::create(std::move(data_col), std::move(null_col)); + return ColumnArray::create(std::move(nullable_data), std::move(offsets)); +} + +// Build Array with ARR_NUM_ROWS rows. +static ColumnPtr make_string_array_column() { + auto data_col = ColumnString::create(); + auto null_col = ColumnUInt8::create(); + auto offsets = ColumnArray::ColumnOffsets::create(); + + size_t offset = 0; + for (size_t i = 0; i < ARR_NUM_ROWS; ++i) { + for (size_t j = 0; j < ARR_ELEM_PER_ROW; ++j) { + std::string val = "str_" + std::to_string(i * ARR_ELEM_PER_ROW + j); + data_col->insert_data(val.data(), val.size()); + null_col->insert_value(0); + } + offset += ARR_ELEM_PER_ROW; + offsets->insert_value(offset); + } + + auto nullable_data = ColumnNullable::create(std::move(data_col), std::move(null_col)); + return ColumnArray::create(std::move(nullable_data), std::move(offsets)); +} + +// Wrap with outer Nullable (no rows are actually null, just the wrapper overhead). +static ColumnPtr wrap_nullable(const ColumnPtr& col) { + return ColumnNullable::create(col->assume_mutable(), + ColumnUInt8::create(col->size(), 0)); +} + +// Wrap as Const. +static ColumnPtr wrap_const(const ColumnPtr& col) { + // Take the first row of the array column, make a 1-row column, then const-expand. + auto single = col->clone_empty(); + single->insert_from(*col, 0); + return ColumnConst::create(std::move(single), ARR_NUM_ROWS); +} + +// ============================================================ +// Hand-written accessor for Array +// ============================================================ + +struct HandwrittenArrayAccessor { + const ColumnArray::Offsets64& offsets; + const ColumnInt64::Container& data; + const NullMap& nested_null_map; + + explicit HandwrittenArrayAccessor(const ColumnPtr& col) + : offsets(assert_cast(*col).get_offsets()), + data(assert_cast( + assert_cast( + assert_cast(*col).get_data()) + .get_nested_column()) + .get_data()), + nested_null_map(assert_cast( + assert_cast(*col).get_data()) + .get_null_map_data()) {} + + size_t row_begin(size_t row) const { return offsets[row - 1]; } + size_t row_end(size_t row) const { return offsets[row]; } + int64_t value_at(size_t flat_idx) const { return data[flat_idx]; } + bool is_null_at(size_t flat_idx) const { return nested_null_map[flat_idx]; } +}; + +// ============================================================ +// 1. Int64 Plain Array: sum all elements +// ============================================================ + +static void Handwritten_ArrayInt64_Plain(benchmark::State& state) { + const auto col = make_int64_array_column(); + HandwrittenArrayAccessor acc(col); + for (auto _ : state) { + int64_t sum = 0; + for (size_t i = 0; i < ARR_NUM_ROWS; ++i) { + size_t begin = acc.row_begin(i); + size_t end = acc.row_end(i); + for (size_t j = begin; j < end; ++j) { + sum += acc.value_at(j); + } + } + benchmark::DoNotOptimize(sum); + } +} +BENCHMARK(Handwritten_ArrayInt64_Plain)->Unit(benchmark::kNanosecond); + +static void ArrayView_ArrayInt64_Plain(benchmark::State& state) { + const auto col = make_int64_array_column(); + const auto view = ColumnArrayView::create(col); + for (auto _ : state) { + int64_t sum = 0; + for (size_t i = 0; i < ARR_NUM_ROWS; ++i) { + auto arr = view[i]; + for (size_t j = 0; j < arr.size(); ++j) { + sum += arr.value_at(j); + } + } + benchmark::DoNotOptimize(sum); + } +} +BENCHMARK(ArrayView_ArrayInt64_Plain)->Unit(benchmark::kNanosecond); + +// ============================================================ +// 2. Int64 Array with null elements: sum non-null elements +// ============================================================ + +static void Handwritten_ArrayInt64_WithNulls(benchmark::State& state) { + const auto col = make_int64_array_column_with_nulls(); + HandwrittenArrayAccessor acc(col); + for (auto _ : state) { + int64_t sum = 0; + for (size_t i = 0; i < ARR_NUM_ROWS; ++i) { + size_t begin = acc.row_begin(i); + size_t end = acc.row_end(i); + for (size_t j = begin; j < end; ++j) { + if (!acc.is_null_at(j)) { + sum += acc.value_at(j); + } + } + } + benchmark::DoNotOptimize(sum); + } +} +BENCHMARK(Handwritten_ArrayInt64_WithNulls)->Unit(benchmark::kNanosecond); + +static void ArrayView_ArrayInt64_WithNulls(benchmark::State& state) { + const auto col = make_int64_array_column_with_nulls(); + const auto view = ColumnArrayView::create(col); + for (auto _ : state) { + int64_t sum = 0; + for (size_t i = 0; i < ARR_NUM_ROWS; ++i) { + auto arr = view[i]; + for (size_t j = 0; j < arr.size(); ++j) { + if (!arr.is_null_at(j)) { + sum += arr.value_at(j); + } + } + } + benchmark::DoNotOptimize(sum); + } +} +BENCHMARK(ArrayView_ArrayInt64_WithNulls)->Unit(benchmark::kNanosecond); + +static void ArrayView_ArrayInt64_WithNulls_Flat(benchmark::State& state) { + const auto col = make_int64_array_column_with_nulls(); + const auto view = ColumnArrayView::create(col); + const auto* data = view.get_data(); + const auto* null_map = view.get_null_map_data(); + for (auto _ : state) { + int64_t sum = 0; + for (size_t i = 0; i < ARR_NUM_ROWS; ++i) { + size_t begin = view.row_begin(i); + size_t end = view.row_end(i); + for (size_t j = begin; j < end; ++j) { + if (!null_map[j]) { + sum += data[j]; + } + } + } + benchmark::DoNotOptimize(sum); + } +} +BENCHMARK(ArrayView_ArrayInt64_WithNulls_Flat)->Unit(benchmark::kNanosecond); + +// ============================================================ +// 3. String Array: sum string lengths +// ============================================================ + +struct HandwrittenStringArrayAccessor { + const ColumnArray::Offsets64& offsets; + const ColumnString& str_col; + const NullMap& nested_null_map; + + explicit HandwrittenStringArrayAccessor(const ColumnPtr& col) + : offsets(assert_cast(*col).get_offsets()), + str_col(assert_cast( + assert_cast( + assert_cast(*col).get_data()) + .get_nested_column())), + nested_null_map(assert_cast( + assert_cast(*col).get_data()) + .get_null_map_data()) {} + + size_t row_begin(size_t row) const { return offsets[row - 1]; } + size_t row_end(size_t row) const { return offsets[row]; } + StringRef value_at(size_t flat_idx) const { return str_col.get_data_at(flat_idx); } + bool is_null_at(size_t flat_idx) const { return nested_null_map[flat_idx]; } +}; + +static void Handwritten_ArrayString_Plain(benchmark::State& state) { + const auto col = make_string_array_column(); + HandwrittenStringArrayAccessor acc(col); + for (auto _ : state) { + int64_t sum = 0; + for (size_t i = 0; i < ARR_NUM_ROWS; ++i) { + size_t begin = acc.row_begin(i); + size_t end = acc.row_end(i); + for (size_t j = begin; j < end; ++j) { + sum += acc.value_at(j).size; + } + } + benchmark::DoNotOptimize(sum); + } +} +BENCHMARK(Handwritten_ArrayString_Plain)->Unit(benchmark::kNanosecond); + +static void ArrayView_ArrayString_Plain(benchmark::State& state) { + const auto col = make_string_array_column(); + const auto view = ColumnArrayView::create(col); + for (auto _ : state) { + int64_t sum = 0; + for (size_t i = 0; i < ARR_NUM_ROWS; ++i) { + auto arr = view[i]; + for (size_t j = 0; j < arr.size(); ++j) { + sum += arr.value_at(j).size; + } + } + benchmark::DoNotOptimize(sum); + } +} +BENCHMARK(ArrayView_ArrayString_Plain)->Unit(benchmark::kNanosecond); + +// ============================================================ +// 4. Const Array: Const(Array) +// ============================================================ + +static void Handwritten_ArrayInt64_Const(benchmark::State& state) { + const auto base = make_int64_array_column(); + const auto const_col = wrap_const(base); + // Hand-written: unpack const, then access the single row repeatedly + const auto& inner = assert_cast(*const_col).get_data_column(); + const auto& array_col = assert_cast(inner); + const auto& arr_offsets = array_col.get_offsets(); + const auto& nested_nullable = assert_cast(array_col.get_data()); + const auto& int_data = assert_cast(nested_nullable.get_nested_column()).get_data(); + + size_t begin = arr_offsets[-1]; // sentinel = 0 + size_t end = arr_offsets[0]; + + for (auto _ : state) { + int64_t sum = 0; + for (size_t i = 0; i < ARR_NUM_ROWS; ++i) { + for (size_t j = begin; j < end; ++j) { + sum += int_data[j]; + } + } + benchmark::DoNotOptimize(sum); + } +} +BENCHMARK(Handwritten_ArrayInt64_Const)->Unit(benchmark::kNanosecond); + +static void ArrayView_ArrayInt64_Const(benchmark::State& state) { + const auto base = make_int64_array_column(); + const auto const_col = wrap_const(base); + const auto view = ColumnArrayView::create(const_col); + for (auto _ : state) { + int64_t sum = 0; + for (size_t i = 0; i < ARR_NUM_ROWS; ++i) { + auto arr = view[i]; + for (size_t j = 0; j < arr.size(); ++j) { + sum += arr.value_at(j); + } + } + benchmark::DoNotOptimize(sum); + } +} +BENCHMARK(ArrayView_ArrayInt64_Const)->Unit(benchmark::kNanosecond); + +// ============================================================ +// 5. Nullable Array: Nullable(Array) +// ============================================================ + +static void Handwritten_ArrayInt64_Nullable(benchmark::State& state) { + const auto base = make_int64_array_column(); + const auto nullable_col = wrap_nullable(base); + // Hand-written: unpack nullable + const auto& nullable = assert_cast(*nullable_col); + const auto& outer_null_map = nullable.get_null_map_data(); + const auto& array_col = assert_cast(nullable.get_nested_column()); + const auto& arr_offsets = array_col.get_offsets(); + const auto& nested_nullable = assert_cast(array_col.get_data()); + const auto& int_data = assert_cast(nested_nullable.get_nested_column()).get_data(); + + for (auto _ : state) { + int64_t sum = 0; + for (size_t i = 0; i < ARR_NUM_ROWS; ++i) { + if (outer_null_map[i]) continue; + size_t begin = arr_offsets[i - 1]; + size_t end = arr_offsets[i]; + for (size_t j = begin; j < end; ++j) { + sum += int_data[j]; + } + } + benchmark::DoNotOptimize(sum); + } +} +BENCHMARK(Handwritten_ArrayInt64_Nullable)->Unit(benchmark::kNanosecond); + +static void ArrayView_ArrayInt64_Nullable(benchmark::State& state) { + const auto base = make_int64_array_column(); + const auto nullable_col = wrap_nullable(base); + const auto view = ColumnArrayView::create(nullable_col); + for (auto _ : state) { + int64_t sum = 0; + for (size_t i = 0; i < ARR_NUM_ROWS; ++i) { + if (view.is_null_at(i)) continue; + auto arr = view[i]; + for (size_t j = 0; j < arr.size(); ++j) { + sum += arr.value_at(j); + } + } + benchmark::DoNotOptimize(sum); + } +} +BENCHMARK(ArrayView_ArrayInt64_Nullable)->Unit(benchmark::kNanosecond); + +} // namespace doris diff --git a/be/benchmark/benchmark_column_array_view_distance.hpp b/be/benchmark/benchmark_column_array_view_distance.hpp new file mode 100644 index 00000000000000..34fd287f2030ff --- /dev/null +++ b/be/benchmark/benchmark_column_array_view_distance.hpp @@ -0,0 +1,353 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// ============================================================ +// Benchmark: ColumnArrayView vs hand-written for array distance +// +// Simulates the FunctionArrayDistance pattern: +// - Build Array columns +// - Extract raw float* pointers + dimensions per row +// - Call faiss L2 distance on each row pair +// +// Compares: +// 1. Hand-written: manual Const/Nullable unwrapping + offsets +// 2. ColumnArrayView: original row-view access via ArrayDataView::get_data() +// 3. ColumnArrayView flat access: prefetch flat data pointer + row offsets +// ============================================================ + +#include + +#include +#include +#include + +#include "core/assert_cast.h" +#include "core/column/column_array.h" +#include "core/column/column_array_view.h" +#include "core/column/column_const.h" +#include "core/column/column_nullable.h" +#include "core/column/column_vector.h" +#include "core/data_type/primitive_type.h" + +namespace doris { + +// Inline L2 distance to avoid faiss build dependency in benchmark. +// Both paths call the same function, so the measurement is purely +// about pointer-extraction overhead, not about the distance kernel. +static inline float inline_l2_distance(const float* x, const float* y, size_t d) { + float sum = 0.0f; + for (size_t i = 0; i < d; ++i) { + float diff = x[i] - y[i]; + sum += diff * diff; + } + return std::sqrt(sum); +} + +static constexpr size_t DIST_NUM_ROWS = 4096; +static constexpr size_t DIST_DIM = 128; // typical embedding dimension + +// ============================================================ +// Column factory: Array with fixed dimension +// ============================================================ + +static ColumnPtr make_float_array_column_for_dist(size_t num_rows, size_t dim) { + auto data_col = ColumnFloat32::create(); + auto null_col = ColumnUInt8::create(); + auto offsets = ColumnArray::ColumnOffsets::create(); + + data_col->reserve(num_rows * dim); + null_col->reserve(num_rows * dim); + + std::mt19937 rng(42); + std::uniform_real_distribution dist(-1.0f, 1.0f); + + size_t offset = 0; + for (size_t i = 0; i < num_rows; ++i) { + for (size_t j = 0; j < dim; ++j) { + data_col->insert_value(dist(rng)); + null_col->insert_value(0); + } + offset += dim; + offsets->insert_value(offset); + } + + auto nullable_data = ColumnNullable::create(std::move(data_col), std::move(null_col)); + return ColumnArray::create(std::move(nullable_data), std::move(offsets)); +} + +static ColumnPtr make_const_float_array_for_dist(size_t dim) { + auto single = make_float_array_column_for_dist(1, dim); + return ColumnConst::create(std::move(single), DIST_NUM_ROWS); +} + +// ============================================================ +// 1. Both columns non-const: L2 distance per row +// ============================================================ + +static void Handwritten_Distance_Plain_Plain(benchmark::State& state) { + const auto col1 = make_float_array_column_for_dist(DIST_NUM_ROWS, DIST_DIM); + const auto col2 = make_float_array_column_for_dist(DIST_NUM_ROWS, DIST_DIM); + + // Hand-written extraction (mirrors FunctionArrayDistance::execute_impl) + const auto& arr1 = assert_cast(*col1); + const auto& arr2 = assert_cast(*col2); + const auto& nested1 = assert_cast(arr1.get_data()); + const auto& nested2 = assert_cast(arr2.get_data()); + const auto& float1 = assert_cast(nested1.get_nested_column()); + const auto& float2 = assert_cast(nested2.get_nested_column()); + const auto* fdata1 = float1.get_data().data(); + const auto* fdata2 = float2.get_data().data(); + const auto& offsets1 = arr1.get_offsets(); + const auto& offsets2 = arr2.get_offsets(); + + auto dst = ColumnFloat32::create(DIST_NUM_ROWS); + auto& dst_data = dst->get_data(); + + for (auto _ : state) { + for (size_t row = 0; row < DIST_NUM_ROWS; ++row) { + auto prev1 = offsets1[row - 1]; + auto prev2 = offsets2[row - 1]; + auto size1 = offsets1[row] - prev1; + dst_data[row] = inline_l2_distance(fdata1 + prev1, fdata2 + prev2, size1); + } + benchmark::ClobberMemory(); + } +} +BENCHMARK(Handwritten_Distance_Plain_Plain)->Unit(benchmark::kNanosecond); + +static void ArrayView_Distance_Plain_Plain(benchmark::State& state) { + const auto col1 = make_float_array_column_for_dist(DIST_NUM_ROWS, DIST_DIM); + const auto col2 = make_float_array_column_for_dist(DIST_NUM_ROWS, DIST_DIM); + + const auto view1 = ColumnArrayView::create(col1); + const auto view2 = ColumnArrayView::create(col2); + + auto dst = ColumnFloat32::create(DIST_NUM_ROWS); + auto& dst_data = dst->get_data(); + + for (auto _ : state) { + for (size_t row = 0; row < DIST_NUM_ROWS; ++row) { + auto a1 = view1[row]; + auto a2 = view2[row]; + const float* p1 = a1.get_data(); + const float* p2 = a2.get_data(); + dst_data[row] = inline_l2_distance(p1, p2, a1.size()); + } + benchmark::ClobberMemory(); + } +} +BENCHMARK(ArrayView_Distance_Plain_Plain)->Unit(benchmark::kNanosecond); + +static void ArrayView_Distance_Plain_Plain_Flat(benchmark::State& state) { + const auto col1 = make_float_array_column_for_dist(DIST_NUM_ROWS, DIST_DIM); + const auto col2 = make_float_array_column_for_dist(DIST_NUM_ROWS, DIST_DIM); + + const auto view1 = ColumnArrayView::create(col1); + const auto view2 = ColumnArrayView::create(col2); + const auto* data1 = view1.get_data(); + const auto* data2 = view2.get_data(); + + auto dst = ColumnFloat32::create(DIST_NUM_ROWS); + auto& dst_data = dst->get_data(); + + for (auto _ : state) { + for (size_t row = 0; row < DIST_NUM_ROWS; ++row) { + size_t begin1 = view1.row_begin(row); + size_t begin2 = view2.row_begin(row); + size_t dim1 = view1.row_end(row) - begin1; + dst_data[row] = inline_l2_distance(data1 + begin1, data2 + begin2, dim1); + } + benchmark::ClobberMemory(); + } +} +BENCHMARK(ArrayView_Distance_Plain_Plain_Flat)->Unit(benchmark::kNanosecond); + +// ============================================================ +// 2. One column const (query vs many vectors) +// ============================================================ + +static void Handwritten_Distance_Const_Plain(benchmark::State& state) { + const auto const_col = make_const_float_array_for_dist(DIST_DIM); + const auto col2 = make_float_array_column_for_dist(DIST_NUM_ROWS, DIST_DIM); + + // Extract const array once + const auto& const_inner = assert_cast(*const_col).get_data_column(); + const auto& const_arr = assert_cast(const_inner); + const auto& const_nested = assert_cast(const_arr.get_data()); + const auto& const_float = assert_cast(const_nested.get_nested_column()); + const float* const_data = const_float.get_data().data(); + size_t const_dim = const_float.size(); + + // Extract non-const array + const auto& arr2 = assert_cast(*col2); + const auto& nested2 = assert_cast(arr2.get_data()); + const auto& float2 = assert_cast(nested2.get_nested_column()); + const auto* fdata2 = float2.get_data().data(); + const auto& offsets2 = arr2.get_offsets(); + + auto dst = ColumnFloat32::create(DIST_NUM_ROWS); + auto& dst_data = dst->get_data(); + + for (auto _ : state) { + for (size_t row = 0; row < DIST_NUM_ROWS; ++row) { + auto prev2 = offsets2[row - 1]; + dst_data[row] = inline_l2_distance(const_data, fdata2 + prev2, const_dim); + } + benchmark::ClobberMemory(); + } +} +BENCHMARK(Handwritten_Distance_Const_Plain)->Unit(benchmark::kNanosecond); + +static void ArrayView_Distance_Const_Plain(benchmark::State& state) { + const auto const_col = make_const_float_array_for_dist(DIST_DIM); + const auto col2 = make_float_array_column_for_dist(DIST_NUM_ROWS, DIST_DIM); + + const auto view1 = ColumnArrayView::create(const_col); + const auto view2 = ColumnArrayView::create(col2); + + auto dst = ColumnFloat32::create(DIST_NUM_ROWS); + auto& dst_data = dst->get_data(); + + for (auto _ : state) { + for (size_t row = 0; row < DIST_NUM_ROWS; ++row) { + auto a1 = view1[row]; + auto a2 = view2[row]; + const float* p1 = a1.get_data(); + const float* p2 = a2.get_data(); + dst_data[row] = inline_l2_distance(p1, p2, a1.size()); + } + benchmark::ClobberMemory(); + } +} +BENCHMARK(ArrayView_Distance_Const_Plain)->Unit(benchmark::kNanosecond); + +static void ArrayView_Distance_Const_Plain_Flat(benchmark::State& state) { + const auto const_col = make_const_float_array_for_dist(DIST_DIM); + const auto col2 = make_float_array_column_for_dist(DIST_NUM_ROWS, DIST_DIM); + + const auto view1 = ColumnArrayView::create(const_col); + const auto view2 = ColumnArrayView::create(col2); + const auto* data1 = view1.get_data(); + const auto* data2 = view2.get_data(); + + auto dst = ColumnFloat32::create(DIST_NUM_ROWS); + auto& dst_data = dst->get_data(); + + for (auto _ : state) { + for (size_t row = 0; row < DIST_NUM_ROWS; ++row) { + size_t begin1 = view1.row_begin(row); + size_t begin2 = view2.row_begin(row); + size_t dim1 = view1.row_end(row) - begin1; + dst_data[row] = inline_l2_distance(data1 + begin1, data2 + begin2, dim1); + } + benchmark::ClobberMemory(); + } +} +BENCHMARK(ArrayView_Distance_Const_Plain_Flat)->Unit(benchmark::kNanosecond); + +// ============================================================ +// 3. Nullable(Array) vs plain Array +// ============================================================ + +static ColumnPtr wrap_nullable_for_dist(const ColumnPtr& col) { + return ColumnNullable::create(col->assume_mutable(), ColumnUInt8::create(col->size(), 0)); +} + +static void Handwritten_Distance_Nullable_Plain(benchmark::State& state) { + const auto base1 = make_float_array_column_for_dist(DIST_NUM_ROWS, DIST_DIM); + const auto nullable_col1 = wrap_nullable_for_dist(base1); + const auto col2 = make_float_array_column_for_dist(DIST_NUM_ROWS, DIST_DIM); + + // Unwrap nullable + const auto& nullable1 = assert_cast(*nullable_col1); + const auto& arr1 = assert_cast(nullable1.get_nested_column()); + const auto& nested1 = assert_cast(arr1.get_data()); + const auto& float1 = assert_cast(nested1.get_nested_column()); + const auto* fdata1 = float1.get_data().data(); + const auto& offsets1 = arr1.get_offsets(); + + const auto& arr2 = assert_cast(*col2); + const auto& nested2 = assert_cast(arr2.get_data()); + const auto& float2 = assert_cast(nested2.get_nested_column()); + const auto* fdata2 = float2.get_data().data(); + const auto& offsets2 = arr2.get_offsets(); + + auto dst = ColumnFloat32::create(DIST_NUM_ROWS); + auto& dst_data = dst->get_data(); + + for (auto _ : state) { + for (size_t row = 0; row < DIST_NUM_ROWS; ++row) { + auto prev1 = offsets1[row - 1]; + auto prev2 = offsets2[row - 1]; + auto size1 = offsets1[row] - prev1; + dst_data[row] = inline_l2_distance(fdata1 + prev1, fdata2 + prev2, size1); + } + benchmark::ClobberMemory(); + } +} +BENCHMARK(Handwritten_Distance_Nullable_Plain)->Unit(benchmark::kNanosecond); + +static void ArrayView_Distance_Nullable_Plain(benchmark::State& state) { + const auto base1 = make_float_array_column_for_dist(DIST_NUM_ROWS, DIST_DIM); + const auto nullable_col1 = wrap_nullable_for_dist(base1); + const auto col2 = make_float_array_column_for_dist(DIST_NUM_ROWS, DIST_DIM); + + const auto view1 = ColumnArrayView::create(nullable_col1); + const auto view2 = ColumnArrayView::create(col2); + + auto dst = ColumnFloat32::create(DIST_NUM_ROWS); + auto& dst_data = dst->get_data(); + + for (auto _ : state) { + for (size_t row = 0; row < DIST_NUM_ROWS; ++row) { + auto a1 = view1[row]; + auto a2 = view2[row]; + const float* p1 = a1.get_data(); + const float* p2 = a2.get_data(); + dst_data[row] = inline_l2_distance(p1, p2, a1.size()); + } + benchmark::ClobberMemory(); + } +} +BENCHMARK(ArrayView_Distance_Nullable_Plain)->Unit(benchmark::kNanosecond); + +static void ArrayView_Distance_Nullable_Plain_Flat(benchmark::State& state) { + const auto base1 = make_float_array_column_for_dist(DIST_NUM_ROWS, DIST_DIM); + const auto nullable_col1 = wrap_nullable_for_dist(base1); + const auto col2 = make_float_array_column_for_dist(DIST_NUM_ROWS, DIST_DIM); + + const auto view1 = ColumnArrayView::create(nullable_col1); + const auto view2 = ColumnArrayView::create(col2); + const auto* data1 = view1.get_data(); + const auto* data2 = view2.get_data(); + + auto dst = ColumnFloat32::create(DIST_NUM_ROWS); + auto& dst_data = dst->get_data(); + + for (auto _ : state) { + for (size_t row = 0; row < DIST_NUM_ROWS; ++row) { + size_t begin1 = view1.row_begin(row); + size_t begin2 = view2.row_begin(row); + size_t dim1 = view1.row_end(row) - begin1; + dst_data[row] = inline_l2_distance(data1 + begin1, data2 + begin2, dim1); + } + benchmark::ClobberMemory(); + } +} +BENCHMARK(ArrayView_Distance_Nullable_Plain_Flat)->Unit(benchmark::kNanosecond); + +} // namespace doris diff --git a/be/benchmark/benchmark_main.cpp b/be/benchmark/benchmark_main.cpp index 905331fa422659..caf5459c46af51 100644 --- a/be/benchmark/benchmark_main.cpp +++ b/be/benchmark/benchmark_main.cpp @@ -18,6 +18,8 @@ #include #include "benchmark_bit_pack.hpp" +#include "benchmark_column_array_view.hpp" +#include "benchmark_column_array_view_distance.hpp" #include "benchmark_fastunion.hpp" #include "benchmark_fmod.hpp" #include "benchmark_hll_merge.hpp" diff --git a/be/benchmark/binary_cast_benchmark.hpp b/be/benchmark/binary_cast_benchmark.hpp index cc5874a82ca44c..9949a783b05a1d 100644 --- a/be/benchmark/binary_cast_benchmark.hpp +++ b/be/benchmark/binary_cast_benchmark.hpp @@ -53,51 +53,10 @@ To old_binary_cast(From from) { from_decv2_to_i128 || from_decv2_to_i256 || from_ui32_to_date_v2 || from_date_v2_to_ui32 || from_ui64_to_datetime_v2 || from_datetime_v2_to_ui64); - if constexpr (from_u64_to_db) { - TypeConverter conv; - conv.u64 = from; - return conv.dbl; - } else if constexpr (from_i64_to_db) { - TypeConverter conv; - conv.i64 = from; - return conv.dbl; - } else if constexpr (from_db_to_i64) { - TypeConverter conv; - conv.dbl = from; - return conv.i64; - } else if constexpr (from_db_to_u64) { - TypeConverter conv; - conv.dbl = from; - return conv.u64; - } else if constexpr (from_i64_to_vec_dt) { - VecDateTimeInt64Union conv = {.i64 = from}; - return conv.dt; - } else if constexpr (from_ui32_to_date_v2) { - DateV2UInt32Union conv = {.ui32 = from}; - return conv.dt; - } else if constexpr (from_date_v2_to_ui32) { - DateV2UInt32Union conv = {.dt = from}; - return conv.ui32; - } else if constexpr (from_ui64_to_datetime_v2) { - DateTimeV2UInt64Union conv = {.ui64 = from}; - return conv.dt; - } else if constexpr (from_datetime_v2_to_ui64) { - DateTimeV2UInt64Union conv = {.dt = from}; - return conv.ui64; - } else if constexpr (from_vec_dt_to_i64) { - VecDateTimeInt64Union conv = {.dt = from}; - return conv.i64; - } else if constexpr (from_i128_to_decv2) { - DecimalInt128Union conv; - conv.i128 = from; - return conv.decimal; - } else if constexpr (from_decv2_to_i128) { - DecimalInt128Union conv; - conv.decimal = from; - return conv.i128; - } else { - throw Exception(Status::FatalError("__builtin_unreachable")); - } + static_assert(sizeof(From) == sizeof(To)); + To to; + std::memcpy(&to, &from, sizeof(To)); + return to; } // Generate random datetime values in uint64_t format for testing diff --git a/be/src/core/column/column_array_view.h b/be/src/core/column/column_array_view.h new file mode 100644 index 00000000000000..cc74d6e3c7088d --- /dev/null +++ b/be/src/core/column/column_array_view.h @@ -0,0 +1,135 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include "core/column/column_array.h" +#include "core/column/column_execute_util.h" + +namespace doris { + +// ArrayDataView represents a read-only view of a single row's array data +// (a slice of ColumnArray's flat nested data). +// Used as the return type of ColumnArrayView::operator[]. +template +struct ArrayDataView { + using ElementType = typename ColumnElementView::ElementType; + + const ColumnElementView& data; + const NullMap& nested_null_map; + const size_t offset; + const size_t length; + + size_t size() const { return length; } + + const ElementType* get_data() const { + const ElementType* raw_data = data.get_data(); + return raw_data + offset; + } + + const UInt8* get_null_map_data() const { return nested_null_map.data() + offset; } + + // ColumnArray's data column is always Nullable, no need to check nullptr + bool is_null_at(size_t idx) const { return nested_null_map[offset + idx]; } + + ElementType value_at(size_t idx) const { return data.get_element(offset + idx); } +}; + +// ColumnArrayView provides a read-only view over a column of Array, +// handling Const / Nullable wrapping automatically. +// +// Supports index-based access: operator[](row) returns ArrayDataView, uses offsets[row-1] (sentinel) +template +struct ColumnArrayView { + const ColumnElementView element_data; + const ColumnArray::Offsets64& offsets; + const NullMap* outer_null_map; + const NullMap& nested_null_map; + const bool is_const; + const size_t count; + + static ColumnArrayView create(const ColumnPtr& column_ptr) { + // Step 1: unpack const + const auto& [unpacked, is_const] = unpack_if_const(column_ptr); + + // Step 2: unpack outer nullable + const NullMap* outer_null_map = nullptr; + const IColumn* array_raw = nullptr; + if (const auto* nullable = check_and_get_column(unpacked.get())) { + outer_null_map = &nullable->get_null_map_data(); + array_raw = nullable->get_nested_column_ptr().get(); + } else { + array_raw = unpacked.get(); + } + + // Step 3: get ColumnArray + const auto& array_column = assert_cast(*array_raw); + + // Step 4: unpack inner nullable (data column is always Nullable) + if (!array_column.get_data().is_nullable()) { + throw doris::Exception(ErrorCode::INTERNAL_ERROR, + "ColumnArray's data column is expected to be Nullable"); + } + + const auto& nested_nullable = assert_cast(array_column.get_data()); + const NullMap& nested_null_map = nested_nullable.get_null_map_data(); + const IColumn* data_column = nested_nullable.get_nested_column_ptr().get(); + + return ColumnArrayView {.element_data = ColumnElementView(*data_column), + .offsets = array_column.get_offsets(), + .outer_null_map = outer_null_map, + .nested_null_map = nested_null_map, + .is_const = is_const, + .count = column_ptr->size()}; + } + + size_t size() const { return count; } + + auto get_data() const { return element_data.get_data(); } + + const UInt8* get_null_map_data() const { return nested_null_map.data(); } + + size_t row_begin(size_t idx) const { + size_t actual = is_const ? 0 : idx; + return offsets[actual - 1]; + } + + size_t row_end(size_t idx) const { + size_t actual = is_const ? 0 : idx; + return offsets[actual]; + } + + bool is_null_at(size_t idx) const { + if (outer_null_map) { + return (*outer_null_map)[is_const ? 0 : idx]; + } + return false; + } + + // Index-based access: uses offsets[actual - 1] (PaddedPODArray sentinel guarantees [-1] is valid) + ArrayDataView operator[](size_t idx) const { + size_t actual = is_const ? 0 : idx; + size_t off = offsets[actual - 1]; + size_t len = offsets[actual] - off; + return ArrayDataView {.data = element_data, + .nested_null_map = nested_null_map, + .offset = off, + .length = len}; + } +}; + +} // namespace doris diff --git a/be/src/core/column/column_execute_util.h b/be/src/core/column/column_execute_util.h index d8f33782c48c1a..0b6513ba5b779b 100644 --- a/be/src/core/column/column_execute_util.h +++ b/be/src/core/column/column_execute_util.h @@ -39,6 +39,7 @@ struct ColumnElementView { using ElementType = typename ColumnType::value_type; const typename ColumnType::Container& data; ElementType get_element(size_t idx) const { return data[idx]; } + const ElementType* get_data() const { return data.data(); } ColumnElementView(const IColumn& column) : data(assert_cast(column).get_data()) {} diff --git a/be/src/exprs/function/array/function_array_distance.h b/be/src/exprs/function/array/function_array_distance.h index e40618267b2590..12969c23e3481f 100644 --- a/be/src/exprs/function/array/function_array_distance.h +++ b/be/src/exprs/function/array/function_array_distance.h @@ -21,13 +21,12 @@ #include #include -#include - #include "common/exception.h" #include "common/status.h" #include "core/assert_cast.h" #include "core/column/column.h" #include "core/column/column_array.h" +#include "core/column/column_array_view.h" #include "core/column/column_const.h" #include "core/column/column_nullable.h" #include "core/data_type/data_type.h" @@ -37,7 +36,6 @@ #include "core/data_type/primitive_type.h" #include "core/types.h" #include "exec/common/util.hpp" -#include "exprs/function/array/function_array_utils.h" #include "exprs/function/function.h" namespace doris { @@ -118,133 +116,64 @@ class FunctionArrayDistance : public IFunction { // We want to make sure throw exception if input columns contain NULL. bool use_default_implementation_for_nulls() const override { return false; } - // Extract the ColumnArray from a column, unwrapping Nullable if present. - // Validates that no NULL values exist. - static const ColumnArray* _extract_array_column(const IColumn* col, const char* arg_name, - const String& func_name) { - if (col->is_nullable()) { - if (col->has_null()) { - throw doris::Exception(ErrorCode::INVALID_ARGUMENT, - "{} for function {} cannot be null", arg_name, func_name); - } - auto nullable = assert_cast(col); - return assert_cast(nullable->get_nested_column_ptr().get()); + // Validate that neither outer column nor inner array elements contain NULL. + // Distance functions always throw on NULL input. + static void _validate_no_nulls(const ColumnPtr& col, const char* arg_name, + const String& func_name) { + const IColumn* raw = col.get(); + + // Unwrap const + if (is_column_const(*raw)) { + raw = assert_cast(raw)->get_data_column_ptr().get(); } - return assert_cast(col); - } - // Extract the ColumnFloat32 data from an array column, unwrapping Nullable if present. - // Validates that no NULL elements exist within the array. - static const ColumnFloat32* _extract_float_data(const ColumnArray* arr, const char* arg_name, - const String& func_name) { - if (arr->get_data_ptr()->is_nullable()) { - if (arr->get_data_ptr()->has_null()) { + // Check outer nullable + if (raw->is_nullable()) { + if (raw->has_null()) { throw doris::Exception(ErrorCode::INVALID_ARGUMENT, - "{} for function {} cannot have null", arg_name, func_name); + "{} for function {} cannot be null", arg_name, func_name); } - auto nullable = assert_cast(arr->get_data_ptr().get()); - return assert_cast(nullable->get_nested_column_ptr().get()); + raw = assert_cast(raw)->get_nested_column_ptr().get(); } - return assert_cast(arr->get_data_ptr().get()); - } - // Holds the extracted float data pointer and dimension for a const array argument, - // avoiding repeated per-row extraction. - struct ConstArrayInfo { - const float* data = nullptr; - ssize_t dim = 0; - }; - - // Try to extract const array info from a column. If the column is ColumnConst, - // extract the float data pointer and dimension once; otherwise return nullopt. - std::optional _try_extract_const(const ColumnPtr& col, - const char* arg_name) const { - if (!is_column_const(*col)) { - return std::nullopt; + // Check inner nullable (array elements) + const auto& array_col = assert_cast(*raw); + if (array_col.get_data_ptr()->is_nullable() && array_col.get_data_ptr()->has_null()) { + throw doris::Exception(ErrorCode::INVALID_ARGUMENT, + "{} for function {} cannot have null", arg_name, func_name); } - auto const_col = assert_cast(col.get()); - const IColumn* inner = const_col->get_data_column_ptr().get(); - const ColumnArray* arr = _extract_array_column(inner, arg_name, get_name()); - const ColumnFloat32* float_col = _extract_float_data(arr, arg_name, get_name()); - ssize_t dim = static_cast(float_col->size()); - return ConstArrayInfo {float_col->get_data().data(), dim}; } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, uint32_t result, size_t input_rows_count) const override { - const auto& arg1 = block.get_by_position(arguments[0]); - const auto& arg2 = block.get_by_position(arguments[1]); - - // Try to handle const columns without expanding them. - auto const_info1 = _try_extract_const(arg1.column, "First argument"); - auto const_info2 = _try_extract_const(arg2.column, "Second argument"); - - // For non-const columns, expand and extract normally. - ColumnPtr materialized_col1, materialized_col2; - const ColumnArray* arr1 = nullptr; - const ColumnArray* arr2 = nullptr; - const ColumnFloat32* float1 = nullptr; - const ColumnFloat32* float2 = nullptr; - const ColumnOffset64* offset1 = nullptr; - const ColumnOffset64* offset2 = nullptr; - const IColumn::Offsets64* offsets_data1 = nullptr; - const IColumn::Offsets64* offsets_data2 = nullptr; - const float* float_data1 = nullptr; - const float* float_data2 = nullptr; - - if (!const_info1) { - materialized_col1 = arg1.column->convert_to_full_column_if_const(); - arr1 = _extract_array_column(materialized_col1.get(), "First argument", get_name()); - float1 = _extract_float_data(arr1, "First argument", get_name()); - offset1 = arr1->get_offsets_ptr().get(); - offsets_data1 = &offset1->get_data(); - float_data1 = float1->get_data().data(); - } + const auto& col1 = block.get_by_position(arguments[0]).column; + const auto& col2 = block.get_by_position(arguments[1]).column; - if (!const_info2) { - materialized_col2 = arg2.column->convert_to_full_column_if_const(); - arr2 = _extract_array_column(materialized_col2.get(), "Second argument", get_name()); - float2 = _extract_float_data(arr2, "Second argument", get_name()); - offset2 = arr2->get_offsets_ptr().get(); - offsets_data2 = &offset2->get_data(); - float_data2 = float2->get_data().data(); - } + // Validate no NULLs (distance functions always throw on NULL input) + _validate_no_nulls(col1, "First argument", get_name()); + _validate_no_nulls(col2, "Second argument", get_name()); + + // Create views — handles Const/Nullable unwrapping automatically + auto view1 = ColumnArrayView::create(col1); + auto view2 = ColumnArrayView::create(col2); - // prepare return data auto dst = ColumnType::create(input_rows_count); auto& dst_data = dst->get_data(); for (size_t row = 0; row < input_rows_count; ++row) { - const float* data_ptr1; - const float* data_ptr2; - ssize_t size1, size2; - const auto idx = static_cast(row); - - if (const_info1) { - data_ptr1 = const_info1->data; - size1 = const_info1->dim; - } else { - // -1 is valid for PaddedPODArray-backed offsets. - const auto prev_offset1 = (*offsets_data1)[idx - 1]; - size1 = (*offsets_data1)[idx] - prev_offset1; - data_ptr1 = float_data1 + prev_offset1; - } - - if (const_info2) { - data_ptr2 = const_info2->data; - size2 = const_info2->dim; - } else { - const auto prev_offset2 = (*offsets_data2)[idx - 1]; - size2 = (*offsets_data2)[idx] - prev_offset2; - data_ptr2 = float_data2 + prev_offset2; - } - - if (size1 != size2) [[unlikely]] { + auto a1 = view1[row]; + auto a2 = view2[row]; + const float* p1 = a1.get_data(); + const float* p2 = a2.get_data(); + auto dim1 = a1.size(); + auto dim2 = a2.size(); + + if (dim1 != dim2) [[unlikely]] { return Status::InvalidArgument( "function {} have different input element sizes of array: {} and {}", - get_name(), size1, size2); + get_name(), dim1, dim2); } - dst_data[row] = DistanceImpl::distance(data_ptr1, data_ptr2, size1); + dst_data[row] = DistanceImpl::distance(p1, p2, dim1); } block.replace_by_position(result, std::move(dst)); diff --git a/be/src/exprs/function/array/function_array_join.h b/be/src/exprs/function/array/function_array_join.h index e140f20f63a4cf..48e98478fea1fd 100644 --- a/be/src/exprs/function/array/function_array_join.h +++ b/be/src/exprs/function/array/function_array_join.h @@ -18,12 +18,12 @@ #include "core/block/block.h" #include "core/column/column_array.h" +#include "core/column/column_array_view.h" #include "core/column/column_const.h" #include "core/column/column_execute_util.h" #include "core/data_type/data_type_array.h" #include "core/data_type/data_type_string.h" #include "core/string_ref.h" -#include "exprs/function/array/function_array_utils.h" namespace doris { @@ -59,22 +59,15 @@ struct ArrayJoinImpl { static Status execute(Block& block, const ColumnNumbers& arguments, uint32_t result, const DataTypeArray* data_type_array, const ColumnArray& array) { - ColumnPtr src_column = - block.get_by_position(arguments[0]).column->convert_to_full_column_if_const(); - ColumnArrayExecutionData src; - if (!extract_column_array_info(*src_column, src)) { - return Status::RuntimeError(fmt::format( - "execute failed, unsupported types for function {}({})", "array_join", - block.get_by_position(arguments[0]).type->get_name())); - } + ColumnPtr src_column = block.get_by_position(arguments[0]).column; + auto array_view = ColumnArrayView::create(src_column); - auto nested_type = data_type_array->get_nested_type(); auto dest_column_ptr = ColumnString::create(); auto& dest_chars = dest_column_ptr->get_chars(); auto& dest_offsets = dest_column_ptr->get_offsets(); - dest_offsets.resize_fill(src_column->size(), 0); + dest_offsets.resize_fill(array_view.size(), 0); auto sep_column = ColumnView::create(block.get_by_position(arguments[1]).column); @@ -83,8 +76,7 @@ struct ArrayJoinImpl { auto null_replace_column = ColumnView::create(block.get_by_position(arguments[2]).column); - _execute_string(*src.nested_col, *src.offsets_ptr, src.nested_nullmap_data, sep_column, - null_replace_column, dest_chars, dest_offsets); + _execute_string(array_view, sep_column, null_replace_column, dest_chars, dest_offsets); } else { auto tmp_column_string = ColumnString::create(); @@ -95,8 +87,7 @@ struct ArrayJoinImpl { auto null_replace_column = ColumnView::create(tmp_const_column); - _execute_string(*src.nested_col, *src.offsets_ptr, src.nested_nullmap_data, sep_column, - null_replace_column, dest_chars, dest_offsets); + _execute_string(array_view, sep_column, null_replace_column, dest_chars, dest_offsets); } block.replace_by_position(result, std::move(dest_column_ptr)); @@ -130,27 +121,23 @@ struct ArrayJoinImpl { } } - static void _execute_string(const IColumn& src_column, - const ColumnArray::Offsets64& src_offsets, - const UInt8* src_null_map, ColumnView& sep_column, + static void _execute_string(const ColumnArrayView& array_view, + ColumnView& sep_column, ColumnView& null_replace_column, ColumnString::Chars& dest_chars, ColumnString::Offsets& dest_offsets) { - const auto& src_data = assert_cast(src_column); - uint32_t total_size = 0; - for (int64_t i = 0; i < src_offsets.size(); ++i) { - auto begin = src_offsets[i - 1]; - auto end = src_offsets[i]; + for (int64_t i = 0; i < array_view.size(); ++i) { + auto arr = array_view[i]; auto sep_str = sep_column.value_at(i); auto null_replace_str = null_replace_column.value_at(i); bool is_first_elem = true; - for (size_t j = begin; j < end; ++j) { - if (src_null_map && src_null_map[j]) { + for (size_t j = 0; j < arr.size(); ++j) { + if (arr.is_null_at(j)) { if (null_replace_str.size != 0) { _fill_result_string(i, null_replace_str, sep_str, dest_chars, total_size, is_first_elem); @@ -158,7 +145,7 @@ struct ArrayJoinImpl { continue; } - StringRef src_str_ref = src_data.get_data_at(j); + StringRef src_str_ref = arr.value_at(j); _fill_result_string(i, src_str_ref, sep_str, dest_chars, total_size, is_first_elem); } diff --git a/be/test/core/column/column_array_view_test.cpp b/be/test/core/column/column_array_view_test.cpp new file mode 100644 index 00000000000000..57492c958171be --- /dev/null +++ b/be/test/core/column/column_array_view_test.cpp @@ -0,0 +1,290 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "core/column/column_array_view.h" + +#include + +#include "core/column/column_array.h" +#include "core/column/column_const.h" +#include "core/column/column_nullable.h" +#include "core/column/column_string.h" +#include "core/column/column_vector.h" +#include "core/data_type/data_type_number.h" +#include "core/data_type/data_type_string.h" +#include "testutil/column_helper.h" + +namespace doris { + +// Helper: build a ColumnArray with Nullable(ColumnInt32) nested data. +// arrays: each inner vector is one row's array elements +// element_nulls: parallel to the flattened data, 1 = null +// row_nulls: per-row outer null (empty means no outer nullable wrapper) +static ColumnPtr build_int32_array_column(const std::vector>& arrays, + const std::vector& element_nulls, + const std::vector& row_nulls = {}) { + // Build nested data column (Nullable(Int32)) + auto data_col = ColumnInt32::create(); + auto null_col = ColumnUInt8::create(); + size_t flat_idx = 0; + for (const auto& arr : arrays) { + for (auto val : arr) { + data_col->insert_value(val); + null_col->insert_value(flat_idx < element_nulls.size() ? element_nulls[flat_idx] : 0); + flat_idx++; + } + } + auto nullable_data = ColumnNullable::create(std::move(data_col), std::move(null_col)); + + // Build offsets + auto offsets = ColumnArray::ColumnOffsets::create(); + size_t offset = 0; + for (const auto& arr : arrays) { + offset += arr.size(); + offsets->insert_value(offset); + } + + ColumnPtr array_col = ColumnArray::create(std::move(nullable_data), std::move(offsets)); + + // Wrap in outer Nullable if row_nulls provided + if (!row_nulls.empty()) { + auto outer_null = ColumnUInt8::create(); + for (auto v : row_nulls) { + outer_null->insert_value(v); + } + array_col = ColumnNullable::create(array_col->assume_mutable(), std::move(outer_null)); + } + return array_col; +} + +// Helper: build a ColumnArray with Nullable(ColumnString) nested data. +static ColumnPtr build_string_array_column(const std::vector>& arrays, + const std::vector& element_nulls, + const std::vector& row_nulls = {}) { + auto data_col = ColumnString::create(); + auto null_col = ColumnUInt8::create(); + size_t flat_idx = 0; + for (const auto& arr : arrays) { + for (const auto& val : arr) { + data_col->insert_data(val.data(), val.size()); + null_col->insert_value(flat_idx < element_nulls.size() ? element_nulls[flat_idx] : 0); + flat_idx++; + } + } + auto nullable_data = ColumnNullable::create(std::move(data_col), std::move(null_col)); + + auto offsets = ColumnArray::ColumnOffsets::create(); + size_t offset = 0; + for (const auto& arr : arrays) { + offset += arr.size(); + offsets->insert_value(offset); + } + + ColumnPtr array_col = ColumnArray::create(std::move(nullable_data), std::move(offsets)); + + if (!row_nulls.empty()) { + auto outer_null = ColumnUInt8::create(); + for (auto v : row_nulls) { + outer_null->insert_value(v); + } + array_col = ColumnNullable::create(array_col->assume_mutable(), std::move(outer_null)); + } + return array_col; +} + +// ==================== ArrayDataView (index-based) Tests ==================== + +// Test basic non-nullable, non-const array column +// Row 0: [10, 20, 30], Row 1: [40], Row 2: [50, 60] +TEST(ColumnArrayViewTest, IndexAccess_basic) { + auto col = build_int32_array_column({{10, 20, 30}, {40}, {50, 60}}, {0, 0, 0, 0, 0, 0}); + auto view = ColumnArrayView::create(col); + + EXPECT_EQ(view.size(), 3); + EXPECT_FALSE(view.is_const); + + // Row 0 + EXPECT_FALSE(view.is_null_at(0)); + auto arr0 = view[0]; + EXPECT_EQ(arr0.size(), 3); + EXPECT_EQ(arr0.value_at(0), 10); + EXPECT_EQ(arr0.value_at(1), 20); + EXPECT_EQ(arr0.value_at(2), 30); + EXPECT_FALSE(arr0.is_null_at(0)); + EXPECT_FALSE(arr0.is_null_at(1)); + EXPECT_FALSE(arr0.is_null_at(2)); + + // Row 1 + auto arr1 = view[1]; + EXPECT_EQ(arr1.size(), 1); + EXPECT_EQ(arr1.value_at(0), 40); + + // Row 2 + auto arr2 = view[2]; + EXPECT_EQ(arr2.size(), 2); + EXPECT_EQ(arr2.value_at(0), 50); + EXPECT_EQ(arr2.value_at(1), 60); +} + +TEST(ColumnArrayViewTest, IndexAccess_get_data) { + auto col = build_int32_array_column({{10, 20, 30}, {40}, {50, 60}}, {0, 0, 0, 0, 0, 0}); + auto view = ColumnArrayView::create(col); + + auto arr0 = view[0]; + const auto* data0 = arr0.get_data(); + ASSERT_NE(data0, nullptr); + EXPECT_EQ(data0[0], 10); + EXPECT_EQ(data0[1], 20); + EXPECT_EQ(data0[2], 30); + + auto arr1 = view[1]; + const auto* data1 = arr1.get_data(); + ASSERT_NE(data1, nullptr); + EXPECT_EQ(data1[0], 40); + + auto arr2 = view[2]; + const auto* data2 = arr2.get_data(); + ASSERT_NE(data2, nullptr); + EXPECT_EQ(data2[0], 50); + EXPECT_EQ(data2[1], 60); +} + +// Test with null elements inside arrays +// Row 0: [1, NULL, 3], Row 1: [NULL] +TEST(ColumnArrayViewTest, IndexAccess_with_null_elements) { + auto col = build_int32_array_column({{1, 0, 3}, {0}}, {0, 1, 0, 1}); + auto view = ColumnArrayView::create(col); + + EXPECT_EQ(view.size(), 2); + + auto arr0 = view[0]; + EXPECT_EQ(arr0.size(), 3); + EXPECT_FALSE(arr0.is_null_at(0)); + EXPECT_TRUE(arr0.is_null_at(1)); + EXPECT_FALSE(arr0.is_null_at(2)); + EXPECT_EQ(arr0.value_at(0), 1); + EXPECT_EQ(arr0.value_at(2), 3); + + auto arr1 = view[1]; + EXPECT_EQ(arr1.size(), 1); + EXPECT_TRUE(arr1.is_null_at(0)); +} + +// Test with outer nullable (some rows are entirely null) +// Row 0: [1, 2], Row 1: NULL, Row 2: [5] +TEST(ColumnArrayViewTest, IndexAccess_outer_nullable) { + auto col = build_int32_array_column({{1, 2}, {0}, {5}}, {0, 0, 0, 0}, {0, 1, 0}); + auto view = ColumnArrayView::create(col); + + EXPECT_EQ(view.size(), 3); + EXPECT_FALSE(view.is_null_at(0)); + EXPECT_TRUE(view.is_null_at(1)); + EXPECT_FALSE(view.is_null_at(2)); + + auto arr0 = view[0]; + EXPECT_EQ(arr0.size(), 2); + EXPECT_EQ(arr0.value_at(0), 1); + EXPECT_EQ(arr0.value_at(1), 2); + + auto arr2 = view[2]; + EXPECT_EQ(arr2.size(), 1); + EXPECT_EQ(arr2.value_at(0), 5); +} + +// Test const column: Const(Array([10, 20])) with 4 rows +TEST(ColumnArrayViewTest, IndexAccess_const) { + auto inner = build_int32_array_column({{10, 20}}, {0, 0}); + ColumnPtr const_col = ColumnConst::create(inner, 4); + auto view = ColumnArrayView::create(const_col); + + EXPECT_EQ(view.size(), 4); + EXPECT_TRUE(view.is_const); + + for (size_t i = 0; i < 4; ++i) { + EXPECT_FALSE(view.is_null_at(i)); + auto arr = view[i]; + EXPECT_EQ(arr.size(), 2); + EXPECT_EQ(arr.value_at(0), 10); + EXPECT_EQ(arr.value_at(1), 20); + } +} + +// Test Const(Nullable(Array([7, 8, 9]))) with 3 rows, non-null +TEST(ColumnArrayViewTest, IndexAccess_const_nullable) { + auto inner = build_int32_array_column({{7, 8, 9}}, {0, 0, 0}, {0}); + ColumnPtr const_col = ColumnConst::create(inner, 3); + auto view = ColumnArrayView::create(const_col); + + EXPECT_EQ(view.size(), 3); + EXPECT_TRUE(view.is_const); + + for (size_t i = 0; i < 3; ++i) { + EXPECT_FALSE(view.is_null_at(i)); + auto arr = view[i]; + EXPECT_EQ(arr.size(), 3); + EXPECT_EQ(arr.value_at(0), 7); + EXPECT_EQ(arr.value_at(1), 8); + EXPECT_EQ(arr.value_at(2), 9); + } +} + +// Test Const(Nullable(NULL)) with 3 rows, all null +TEST(ColumnArrayViewTest, IndexAccess_const_nullable_null) { + // Build one-row array, then wrap as nullable with null=1, then const + auto inner = build_int32_array_column({{0}}, {0}, {1}); + ColumnPtr const_col = ColumnConst::create(inner, 3); + auto view = ColumnArrayView::create(const_col); + + EXPECT_EQ(view.size(), 3); + EXPECT_TRUE(view.is_const); + + for (size_t i = 0; i < 3; ++i) { + EXPECT_TRUE(view.is_null_at(i)); + } +} + +// Test empty array rows +// Row 0: [], Row 1: [100], Row 2: [] +TEST(ColumnArrayViewTest, IndexAccess_empty_arrays) { + auto col = build_int32_array_column({{}, {100}, {}}, {0}); + auto view = ColumnArrayView::create(col); + + EXPECT_EQ(view.size(), 3); + EXPECT_EQ(view[0].size(), 0); + EXPECT_EQ(view[1].size(), 1); + EXPECT_EQ(view[1].value_at(0), 100); + EXPECT_EQ(view[2].size(), 0); +} + +// Test string array +// Row 0: ["hello", "world"], Row 1: ["test"] +TEST(ColumnArrayViewTest, IndexAccess_string) { + auto col = build_string_array_column({{"hello", "world"}, {"test"}}, {0, 0, 0}); + auto view = ColumnArrayView::create(col); + + EXPECT_EQ(view.size(), 2); + auto arr0 = view[0]; + EXPECT_EQ(arr0.size(), 2); + EXPECT_EQ(arr0.value_at(0).to_string(), "hello"); + EXPECT_EQ(arr0.value_at(1).to_string(), "world"); + + auto arr1 = view[1]; + EXPECT_EQ(arr1.size(), 1); + EXPECT_EQ(arr1.value_at(0).to_string(), "test"); +} + +} // namespace doris \ No newline at end of file From d1e5dcdff08ff3ab8c6905165fd15f5f102a58d3 Mon Sep 17 00:00:00 2001 From: Mryange Date: Tue, 2 Jun 2026 13:50:32 +0800 Subject: [PATCH 2/6] [refine](function) use concrete column pointers for local result columns (#63938) Some BE expression and storage code creates a concrete column type and then immediately casts the generic `ColumnPtr` or `MutableColumnPtr` back to the same concrete type before writing data. This adds unnecessary casts and makes the ownership intent less direct. Root cause: several local result columns were declared as generic column pointers even though the concrete column type was already known at creation time. This PR refines those local variables to keep concrete column pointers where the type is explicit, and directly accesses the concrete column data. It also updates the explode-numbers table function member to use a concrete column pointer. The change is limited to local refactoring and does not change runtime behavior. None - Test - [ ] Regression test - [ ] Unit Test - [ ] Manual test (add detailed scripts or steps below) - [ ] No need to test or manual test. Explain why: - [ ] This is a refactor/code format and no logic has been changed. - [ ] Previous test can cover this change. - [ ] No code files have been changed. - [ ] Other reason - Behavior changed: - [ ] No. - [ ] Yes. - Does this need documentation? - [ ] No. - [ ] Yes. - [ ] Confirm the release note - [ ] Confirm test cases - [ ] Confirm document - [ ] Add branch pick label (cherry picked from commit a0a09b0eac406d143a57752ff06430e2dd14d4e0) --- .../function/array/function_array_exists.cpp | 5 +- be/src/exprs/function/function_ip.h | 11 +- be/src/exprs/function/function_jsonb.cpp | 4 +- .../function/function_other_types_to_date.cpp | 101 ++++++++---------- be/src/exprs/function/random.cpp | 2 +- be/src/exprs/function/uniform.cpp | 2 +- .../exprs/table_function/vexplode_numbers.cpp | 4 +- .../exprs/table_function/vexplode_numbers.h | 2 +- be/src/storage/iterator/olap_data_convertor.h | 5 +- .../variant/hierarchical_data_iterator.cpp | 14 ++- .../variant_doc_snpashot_compact_iterator.h | 9 +- 11 files changed, 73 insertions(+), 86 deletions(-) diff --git a/be/src/exprs/function/array/function_array_exists.cpp b/be/src/exprs/function/array/function_array_exists.cpp index 9009ba2f7550d8..ffa74d24e8d633 100644 --- a/be/src/exprs/function/array/function_array_exists.cpp +++ b/be/src/exprs/function/array/function_array_exists.cpp @@ -78,9 +78,8 @@ class FunctionArrayExists : public IFunction { nested_nullable_column.get_null_map_column_ptr()->clone_resized(nested_column_size); // 2. compute result - MutableColumnPtr result_column = ColumnUInt8::create(nested_column_size, 0); - auto* __restrict result_column_data = - assert_cast(*result_column).get_data().data(); + auto result_column = ColumnUInt8::create(nested_column_size, 0); + auto* __restrict result_column_data = result_column->get_data().data(); MutableColumnPtr result_offset_column = first_off_data.clone_resized(first_off_data.size()); const auto* __restrict nested_column_data = assert_cast(*nested_column).get_data().data(); diff --git a/be/src/exprs/function/function_ip.h b/be/src/exprs/function/function_ip.h index 7f704b078de4c6..3278caa5e465ee 100644 --- a/be/src/exprs/function/function_ip.h +++ b/be/src/exprs/function/function_ip.h @@ -1346,10 +1346,10 @@ class FunctionIPv6FromUInt128StringOrNull : public IFunction { unpack_if_const(ipv6_column_with_type_and_name.column); const auto* ipv6_addr_column = assert_cast(ipv6_column.get()); // result is nullable column - auto col_res = ColumnNullable::create(ColumnIPv6::create(input_rows_count, 0), - ColumnUInt8::create(input_rows_count, 1)); - auto& col_res_data = assert_cast(&col_res->get_nested_column())->get_data(); - auto& res_null_map_data = col_res->get_null_map_data(); + auto col_res_nested = ColumnIPv6::create(input_rows_count, 0); + auto col_res_null_map = ColumnUInt8::create(input_rows_count, 1); + auto& col_res_data = col_res_nested->get_data(); + auto& res_null_map_data = col_res_null_map->get_data(); for (size_t i = 0; i < input_rows_count; ++i) { IPv6 ipv6 = 0; @@ -1365,7 +1365,8 @@ class FunctionIPv6FromUInt128StringOrNull : public IFunction { } } - block.replace_by_position(result, std::move(col_res)); + block.replace_by_position(result, ColumnNullable::create(std::move(col_res_nested), + std::move(col_res_null_map))); return Status::OK(); } }; diff --git a/be/src/exprs/function/function_jsonb.cpp b/be/src/exprs/function/function_jsonb.cpp index 28015309cdb6d7..e572573a3c825a 100644 --- a/be/src/exprs/function/function_jsonb.cpp +++ b/be/src/exprs/function/function_jsonb.cpp @@ -711,11 +711,11 @@ class FunctionJsonbExtractPath : public IFunction { return Status::OK(); }; - MutableColumnPtr result_null_map_column; + ColumnUInt8::MutablePtr result_null_map_column; NullMap* result_null_map = nullptr; if (data_null_map || path_null_map) { result_null_map_column = ColumnUInt8::create(input_rows_count, 0); - result_null_map = &static_cast(*result_null_map_column).get_data(); + result_null_map = &result_null_map_column->get_data(); if (data_null_map) { VectorizedUtils::update_null_map(*result_null_map, *data_null_map, diff --git a/be/src/exprs/function/function_other_types_to_date.cpp b/be/src/exprs/function/function_other_types_to_date.cpp index bc24d76df278ea..122fdb9338ad57 100644 --- a/be/src/exprs/function/function_other_types_to_date.cpp +++ b/be/src/exprs/function/function_other_types_to_date.cpp @@ -142,31 +142,29 @@ struct StrToDate { // Because of we cant distinguish by return_type when we find function. so the return_type may NOT be same with real return type // which decided by FE. we directly use block column's type which decided by FE. if (block.get_by_position(result).type->get_primitive_type() == TYPE_DATEV2) { - res = ColumnDateV2::create(input_rows_count); + auto res_column = ColumnDateV2::create(input_rows_count); if (col_const[1]) { - execute_impl_const_right( - context, ldata, loffsets, specific_char_column->get_data_at(0), - result_null_map, - static_cast(res->assert_mutable().get())->get_data()); + execute_impl_const_right(context, ldata, loffsets, + specific_char_column->get_data_at(0), + result_null_map, res_column->get_data()); } else { - execute_impl( - context, ldata, loffsets, rdata, roffsets, result_null_map, - static_cast(res->assert_mutable().get())->get_data()); + execute_impl(context, ldata, loffsets, rdata, roffsets, + result_null_map, res_column->get_data()); } + res = std::move(res_column); } else { DCHECK(block.get_by_position(result).type->get_primitive_type() == TYPE_DATETIMEV2); - res = ColumnDateTimeV2::create(input_rows_count); + auto res_column = ColumnDateTimeV2::create(input_rows_count); if (col_const[1]) { - execute_impl_const_right( - context, ldata, loffsets, specific_char_column->get_data_at(0), - result_null_map, - static_cast(res->assert_mutable().get())->get_data()); + execute_impl_const_right(context, ldata, loffsets, + specific_char_column->get_data_at(0), + result_null_map, res_column->get_data()); } else { - execute_impl( - context, ldata, loffsets, rdata, roffsets, result_null_map, - static_cast(res->assert_mutable().get())->get_data()); + execute_impl(context, ldata, loffsets, rdata, roffsets, + result_null_map, res_column->get_data()); } + res = std::move(res_column); } // Wrap result in nullable column only if input has nullable arguments @@ -292,17 +290,13 @@ struct MakeDateImpl { const auto* year_col = assert_cast(argument_columns[0].get()); const auto* dayofyear_col = assert_cast(argument_columns[1].get()); - ColumnPtr res_column; - - res_column = ColumnDateV2::create(input_rows_count); + auto res_column = ColumnDateV2::create(input_rows_count); if (col_const[1]) { - execute_impl_right_const( - year_col->get_data(), dayofyear_col->get_element(0), result_null_map, - static_cast(res_column->assert_mutable().get())->get_data()); + execute_impl_right_const(year_col->get_data(), dayofyear_col->get_element(0), + result_null_map, res_column->get_data()); } else { - execute_impl( - year_col->get_data(), dayofyear_col->get_data(), result_null_map, - static_cast(res_column->assert_mutable().get())->get_data()); + execute_impl(year_col->get_data(), dayofyear_col->get_data(), result_null_map, + res_column->get_data()); } // Wrap result in nullable column only if input has nullable arguments @@ -451,13 +445,6 @@ struct MakeTimeImpl { } }; -struct DateTruncState { - using Callback_function = - std::function; - Callback_function callback_function; - cctz::time_zone timezone; -}; - template struct DateTrunc { static constexpr auto name = "date_trunc"; @@ -465,6 +452,13 @@ struct DateTrunc { using ColumnType = typename PrimitiveTypeTraits::ColumnType; using DateValueType = typename PrimitiveTypeTraits::CppType; + struct State { + using CallbackFunction = + std::function; + CallbackFunction callback_function; + cctz::time_zone timezone; + }; + static bool is_variadic() { return true; } static size_t get_number_of_arguments() { return 2; } @@ -495,7 +489,7 @@ struct DateTrunc { std::transform(lower_str.begin(), lower_str.end(), lower_str.begin(), [](unsigned char c) { return std::tolower(c); }); - std::shared_ptr state = std::make_shared(); + std::shared_ptr state = std::make_shared(); state->timezone = context->state()->timezone_obj(); if (std::strncmp("year", lower_str.data(), 4) == 0) { state->callback_function = &execute_impl_right_const; @@ -528,21 +522,22 @@ struct DateTrunc { const auto& datetime_column = block.get_by_position(arguments[DateArgIsFirst ? 0 : 1]) .column->convert_to_full_column_if_const(); - ColumnPtr res = ColumnType::create(input_rows_count); - auto* state = reinterpret_cast( + auto res = ColumnType::create(input_rows_count); + auto* state = reinterpret_cast( context->get_function_state(FunctionContext::THREAD_LOCAL)); DCHECK(state != nullptr); - state->callback_function(datetime_column, res, input_rows_count, state->timezone); + state->callback_function(datetime_column, *res, input_rows_count, state->timezone); block.replace_by_position(result, std::move(res)); return Status::OK(); } private: template - static void execute_impl_right_const(const ColumnPtr& datetime_column, ColumnPtr& result_column, - size_t input_rows_count, const cctz::time_zone& timezone) { + static void execute_impl_right_const(const ColumnPtr& datetime_column, + ColumnType& result_column, size_t input_rows_count, + const cctz::time_zone& timezone) { auto& data = static_cast(datetime_column.get())->get_data(); - auto& res = static_cast(result_column->assert_mutable().get())->get_data(); + auto& res = result_column.get_data(); for (size_t i = 0; i < input_rows_count; ++i) { auto dt = data[i]; // datetime_trunc only raise only when dt invalid which is impossible. so we dont throw error better. @@ -609,15 +604,15 @@ class FromDays : public IFunction { ColumnPtr res_column; if (block.get_by_position(result).type->get_primitive_type() == PrimitiveType::TYPE_DATE) { - res_column = ColumnDate::create(input_rows_count); - _execute( - input_rows_count, data_col->get_data(), result_null_map, - static_cast(res_column->assert_mutable().get())->get_data()); + auto column_date = ColumnDate::create(input_rows_count); + _execute(input_rows_count, data_col->get_data(), result_null_map, + column_date->get_data()); + res_column = std::move(column_date); } else { - res_column = ColumnDateV2::create(input_rows_count); - _execute>( - input_rows_count, data_col->get_data(), result_null_map, - static_cast(res_column->assert_mutable().get())->get_data()); + auto column_datev2 = ColumnDateV2::create(input_rows_count); + _execute>(input_rows_count, data_col->get_data(), + result_null_map, column_datev2->get_data()); + res_column = std::move(column_datev2); } // Wrap result in nullable column only if input has nullable arguments @@ -1053,10 +1048,8 @@ struct LastDayImpl { const auto is_nullable = block.get_by_position(result).type->is_nullable(); auto data_col = assert_cast(argument_column.get()); auto res_column = ResultColumnType::create(input_rows_count); - execute_straight( - input_rows_count, data_col->get_data(), - static_cast(res_column->assert_mutable().get())->get_data(), - result_null_map); + execute_straight(input_rows_count, data_col->get_data(), res_column->get_data(), + result_null_map); if (is_nullable) { block.replace_by_position(result, @@ -1128,10 +1121,8 @@ struct ToMondayImpl { const auto is_nullable = block.get_by_position(result).type->is_nullable(); auto data_col = assert_cast(argument_column.get()); auto res_column = ResultColumnType::create(input_rows_count); - execute_straight( - input_rows_count, data_col->get_data(), - static_cast(res_column->assert_mutable().get())->get_data(), - result_null_map); + execute_straight(input_rows_count, data_col->get_data(), res_column->get_data(), + result_null_map); if (is_nullable) { block.replace_by_position(result, diff --git a/be/src/exprs/function/random.cpp b/be/src/exprs/function/random.cpp index b6fd52b225265d..797e477ae93e20 100644 --- a/be/src/exprs/function/random.cpp +++ b/be/src/exprs/function/random.cpp @@ -144,7 +144,7 @@ class Random : public IFunction { static const double min = 0.0; static const double max = 1.0; auto res_column = ColumnFloat64::create(input_rows_count); - auto& res_data = static_cast(*res_column).get_data(); + auto& res_data = res_column->get_data(); auto* generator = reinterpret_cast( context->get_function_state(FunctionContext::THREAD_LOCAL)); diff --git a/be/src/exprs/function/uniform.cpp b/be/src/exprs/function/uniform.cpp index e639df7a2958bb..d3e16fac0e2115 100644 --- a/be/src/exprs/function/uniform.cpp +++ b/be/src/exprs/function/uniform.cpp @@ -106,7 +106,7 @@ struct UniformDoubleImpl { const ColumnNumbers& arguments, uint32_t result, size_t input_rows_count) { auto res_column = ColumnFloat64::create(input_rows_count); - auto& res_data = static_cast(*res_column).get_data(); + auto& res_data = res_column->get_data(); // Get min and max values (constants) const auto& left = diff --git a/be/src/exprs/table_function/vexplode_numbers.cpp b/be/src/exprs/table_function/vexplode_numbers.cpp index 43a93ffe877279..344d0083c8c85c 100644 --- a/be/src/exprs/table_function/vexplode_numbers.cpp +++ b/be/src/exprs/table_function/vexplode_numbers.cpp @@ -65,14 +65,14 @@ Status VExplodeNumbersTableFunction::process_init(Block* block, RuntimeState* st _cur_size = assert_cast(column_nested.get())->get_element(0); } - ((ColumnInt32*)_elements_column.get())->clear(); + _elements_column->clear(); //_cur_size may be a negative number _cur_size = std::max(static_cast(0L), _cur_size); if (_cur_size && _cur_size <= state->batch_size()) { // avoid elements_column too big or empty _is_const = true; // use const optimize for (int i = 0; i < _cur_size; i++) { - ((ColumnInt32*)_elements_column.get())->insert_value(i); + _elements_column->insert_value(i); } } } diff --git a/be/src/exprs/table_function/vexplode_numbers.h b/be/src/exprs/table_function/vexplode_numbers.h index 4108416bb70c21..2c68395bed2934 100644 --- a/be/src/exprs/table_function/vexplode_numbers.h +++ b/be/src/exprs/table_function/vexplode_numbers.h @@ -85,7 +85,7 @@ class VExplodeNumbersTableFunction : public TableFunction { private: ColumnPtr _value_column; - ColumnPtr _elements_column = ColumnInt32::create(); + ColumnInt32::MutablePtr _elements_column = ColumnInt32::create(); }; #include "common/compile_check_end.h" diff --git a/be/src/storage/iterator/olap_data_convertor.h b/be/src/storage/iterator/olap_data_convertor.h index a23369f569e567..c0bda171d1ce2a 100644 --- a/be/src/storage/iterator/olap_data_convertor.h +++ b/be/src/storage/iterator/olap_data_convertor.h @@ -182,11 +182,10 @@ class OlapBlockDataConvertor { static ColumnPtr clone_and_padding(const ColumnString* input, size_t padding_length) { auto column = ColumnString::create(); - auto padded_column = assert_cast(column->assert_mutable().get()); column->offsets.resize(input->size()); column->chars.resize(input->size() * padding_length); - memset(padded_column->chars.data(), 0, input->size() * padding_length); + memset(column->chars.data(), 0, input->size() * padding_length); for (size_t i = 0; i < input->size(); i++) { column->offsets[i] = cast_set((i + 1) * padding_length); @@ -198,7 +197,7 @@ class OlapBlockDataConvertor { << ", real=" << str.size; if (str.size) { - memcpy(padded_column->chars.data() + i * padding_length, str.data, str.size); + memcpy(column->chars.data() + i * padding_length, str.data, str.size); } } diff --git a/be/src/storage/segment/variant/hierarchical_data_iterator.cpp b/be/src/storage/segment/variant/hierarchical_data_iterator.cpp index 28fa108b3b8588..7dad2b767ff3f4 100644 --- a/be/src/storage/segment/variant/hierarchical_data_iterator.cpp +++ b/be/src/storage/segment/variant/hierarchical_data_iterator.cpp @@ -224,11 +224,9 @@ Status HierarchicalDataIterator::_process_nested_columns( // will type the type of ColumnVariant::NESTED_TYPE, whih is Nullable>. for (const auto& entry : nested_subcolumns) { const auto* base_array = - check_and_get_column(*remove_nullable(entry.second[0].column)); - MutableColumnPtr nested_object = - ColumnVariant::create(0, false, base_array->get_data().size()); + assert_cast(remove_nullable(entry.second[0].column).get()); + auto nested_object_variant = ColumnVariant::create(0, false, base_array->get_data().size()); MutableColumnPtr offset = IColumn::mutate(base_array->get_offsets_ptr()); - auto* nested_object_ptr = assert_cast(nested_object.get()); // flatten nested arrays for (const auto& subcolumn : entry.second) { const auto& column = subcolumn.column; @@ -253,13 +251,13 @@ Status HierarchicalDataIterator::_process_nested_columns( check_and_get_data_type(remove_nullable(type).get()) ->get_nested_type(); // add sub path without parent prefix - nested_object_ptr->add_sub_column( + nested_object_variant->add_sub_column( subcolumn.path.copy_pop_nfront(entry.first.get_parts().size()), std::move(flattend_column), std::move(flattend_type)); } - const size_t nested_object_size = nested_object->size(); - nested_object = ColumnNullable::create(std::move(nested_object), - ColumnUInt8::create(nested_object_size, 0)); + const size_t nested_object_size = nested_object_variant->size(); + MutableColumnPtr nested_object = ColumnNullable::create( + std::move(nested_object_variant), ColumnUInt8::create(nested_object_size, 0)); auto array = ColumnArray::create(std::move(nested_object), std::move(offset)); const size_t array_size = array->size(); auto nullable_array = diff --git a/be/src/storage/segment/variant/variant_doc_snpashot_compact_iterator.h b/be/src/storage/segment/variant/variant_doc_snpashot_compact_iterator.h index ae524630a1a477..33430ba8bf9ac9 100644 --- a/be/src/storage/segment/variant/variant_doc_snpashot_compact_iterator.h +++ b/be/src/storage/segment/variant/variant_doc_snpashot_compact_iterator.h @@ -56,11 +56,10 @@ class VariantDocValueCompactIterator : public ColumnIterator { Status _set_doc_value_into_variant(MutableColumnPtr& dst, MutableColumnPtr&& doc_value_column, size_t count) const { auto& variant = assert_cast(*dst); - MutableColumnPtr container = ColumnVariant::create(variant.max_subcolumns_count(), - variant.enable_doc_mode(), count); - auto& container_variant = assert_cast(*container); - container_variant.set_doc_value_column(std::move(doc_value_column)); - variant.insert_range_from(container_variant, 0, count); + auto container = ColumnVariant::create(variant.max_subcolumns_count(), + variant.enable_doc_mode(), count); + container->set_doc_value_column(std::move(doc_value_column)); + variant.insert_range_from(*container, 0, count); return Status::OK(); } From 1e98435116710e84ab63810f0e0d49466622ecab Mon Sep 17 00:00:00 2001 From: Mryange Date: Fri, 29 May 2026 10:42:08 +0800 Subject: [PATCH 3/6] [fix](function) deduplicate map keys after string-to-map cast (#63713) Problem Summary: Casting a JSON string with duplicated object keys to MAP kept all duplicated entries because the string-to-complex cast path returned the generic wrapper directly and skipped ColumnMap::deduplicate_keys(). This made string-to-map casts inconsistent with MAP constructor semantics where the last value wins. Reproduction SQL: ```sql SELECT CAST('{"a":1,"a":2}' AS MAP); SELECT size(CAST('{"a":1,"a":2}' AS MAP)); SELECT element_at(CAST('{"a":1,"a":2}' AS MAP), 'a'); SELECT CAST('{"outer":{"a":1,"a":2}}' AS MAP>); SELECT element_at(element_at(CAST('{"outer":{"a":1,"a":2}}' AS MAP>), 'outer'), 'a'); SELECT map('a',1,'a',2); SELECT size(map('a',1,'a',2)); SELECT element_at(map('a',1,'a',2), 'a'); ``` Before this fix: ```text {"a":1, "a":2} 2 1 {"outer":{"a":1, "a":2}} 1 {"a":2} 1 2 ``` After this fix: ```text {"a":2} 1 2 {"outer":{"a":2}} 2 {"a":2} 1 2 ``` (cherry picked from commit b653831c9fc7ad6a182b4bdfdc028c0134448c59) --- be/src/exprs/function/cast/cast_to_map.h | 50 ++++++++++++++++--- .../basic-elements/data-types/map-md.out | 15 ++++++ .../basic-elements/data-types/map-md.groovy | 10 ++++ 3 files changed, 69 insertions(+), 6 deletions(-) diff --git a/be/src/exprs/function/cast/cast_to_map.h b/be/src/exprs/function/cast/cast_to_map.h index 80ad74b02d3063..ca6b8e5e90bd3a 100644 --- a/be/src/exprs/function/cast/cast_to_map.h +++ b/be/src/exprs/function/cast/cast_to_map.h @@ -22,15 +22,53 @@ namespace doris::CastWrapper { #include "common/compile_check_begin.h" + +inline Status deduplicate_map_keys_in_result(Block& block, uint32_t result) { + auto result_column_name = block.get_by_position(result).column->get_name(); + auto mutable_result_column = IColumn::mutate(std::move(block.get_by_position(result).column)); + + if (auto* nullable_column = check_and_get_column(*mutable_result_column)) { + auto nested_column = IColumn::mutate(nullable_column->get_nested_column_ptr()); + auto* map_column = check_and_get_column(*nested_column); + if (!map_column) { + return Status::RuntimeError("Illegal column {} for function CAST AS MAP", + result_column_name); + } + + RETURN_IF_ERROR(map_column->deduplicate_keys(true)); + ColumnPtr nested_column_ptr = std::move(nested_column); + nullable_column->change_nested_column(nested_column_ptr); + } else { + auto* map_column = check_and_get_column(*mutable_result_column); + if (!map_column) { + return Status::RuntimeError("Illegal column {} for function CAST AS MAP", + result_column_name); + } + + RETURN_IF_ERROR(map_column->deduplicate_keys(true)); + } + + block.get_by_position(result).column = std::move(mutable_result_column); + return Status::OK(); +} + +inline WrapperType wrap_string_to_map_wrapper(WrapperType wrapper) { + return [wrapper = std::move(wrapper)](FunctionContext* context, Block& block, + const ColumnNumbers& arguments, uint32_t result, + size_t input_rows_count, + const NullMap::value_type* null_map = nullptr) { + RETURN_IF_ERROR(wrapper(context, block, arguments, result, input_rows_count, null_map)); + return deduplicate_map_keys_in_result(block, result); + }; +} + //TODO(Amory) . Need support more cast for key , value for map WrapperType create_map_wrapper(FunctionContext* context, const DataTypePtr& from_type, const DataTypeMap& to_type) { if (is_string_type(from_type->get_primitive_type())) { - if (context->enable_strict_mode()) { - return cast_from_string_to_complex_type_strict_mode; - } else { - return cast_from_string_to_complex_type; - } + auto wrapper = context->enable_strict_mode() ? cast_from_string_to_complex_type_strict_mode + : cast_from_string_to_complex_type; + return wrap_string_to_map_wrapper(wrapper); } const auto* from = check_and_get_data_type(from_type.get()); if (!from) { @@ -83,4 +121,4 @@ WrapperType create_map_wrapper(FunctionContext* context, const DataTypePtr& from }; } #include "common/compile_check_end.h" -} // namespace doris::CastWrapper \ No newline at end of file +} // namespace doris::CastWrapper diff --git a/regression-test/data/doc/sql-manual/basic-elements/data-types/map-md.out b/regression-test/data/doc/sql-manual/basic-elements/data-types/map-md.out index a9030f76745d74..db9b0c7442766d 100644 --- a/regression-test/data/doc/sql-manual/basic-elements/data-types/map-md.out +++ b/regression-test/data/doc/sql-manual/basic-elements/data-types/map-md.out @@ -77,6 +77,21 @@ -- !sql -- {"key1":1, "key2":2} +-- !sql -- +{"a":2} + +-- !sql -- +1 + +-- !sql -- +2 + +-- !sql -- +{"outer":{"a":2}} + +-- !sql -- +2 + -- !sql -- {"key1":1, "key2":2} 2 diff --git a/regression-test/suites/doc/sql-manual/basic-elements/data-types/map-md.groovy b/regression-test/suites/doc/sql-manual/basic-elements/data-types/map-md.groovy index 14e7c4596db8b4..7dc32500d3a46a 100644 --- a/regression-test/suites/doc/sql-manual/basic-elements/data-types/map-md.groovy +++ b/regression-test/suites/doc/sql-manual/basic-elements/data-types/map-md.groovy @@ -159,6 +159,16 @@ suite("map-md", "p0") { qt_sql """ SELECT CAST('{"key1":1,"key2":2}' AS MAP) """ + qt_sql """ SELECT CAST('{"a":1,"a":2}' AS MAP) """ + + qt_sql """ SELECT size(CAST('{"a":1,"a":2}' AS MAP)) """ + + qt_sql """ SELECT element_at(CAST('{"a":1,"a":2}' AS MAP), 'a') """ + + qt_sql """ SELECT CAST('{"outer":{"a":1,"a":2}}' AS MAP>) """ + + qt_sql """ SELECT element_at(element_at(CAST('{"outer":{"a":1,"a":2}}' AS MAP>), 'outer'), 'a') """ + sql """ DROP TABLE IF EXISTS ${tableName}; """ sql """ CREATE TABLE IF NOT EXISTS ${tableName} ( From d42ecb95b6d398ba36acdd4ec9284e92da8b35bd Mon Sep 17 00:00:00 2001 From: Mryange Date: Thu, 21 May 2026 12:00:11 +0800 Subject: [PATCH 4/6] [refine](be) remove unsafe PURE annotations (#63440) Problem Summary: Some BE functions were marked with `PURE` even though their definitions are already visible in headers or they can allocate and throw exceptions. This change removes those annotations, because throwing from a `pure` function can make surrounding `catch` blocks unreliable: https://godbolt.org/z/Y7f73bKoY (cherry picked from commit b307a23967f2cc244eb49fa90005c9ed0fffa071) --- be/src/core/binary_cast.hpp | 2 +- be/src/core/column/column_nullable.h | 5 +---- be/src/core/data_type_serde/datelike_serde_common.hpp | 10 +++++----- be/src/core/string_ref.h | 3 +-- be/src/util/string_parser.hpp | 3 +-- be/src/util/thrift_util.h | 4 ++-- 6 files changed, 11 insertions(+), 16 deletions(-) diff --git a/be/src/core/binary_cast.hpp b/be/src/core/binary_cast.hpp index 7da0844a3cd312..3c8d9a50d9193b 100644 --- a/be/src/core/binary_cast.hpp +++ b/be/src/core/binary_cast.hpp @@ -35,7 +35,7 @@ static_assert(sizeof(DecimalV2Value) == sizeof(__int128_t)); // similar to reinterpret_cast but won't break strict-aliasing rules. you can treat it as std::bit_cast with type checking template -constexpr PURE To binary_cast(const From& from) { +constexpr To binary_cast(const From& from) { constexpr bool from_u64_to_db = match_v; constexpr bool from_i64_to_db = match_v; constexpr bool from_db_to_i64 = match_v; diff --git a/be/src/core/column/column_nullable.h b/be/src/core/column/column_nullable.h index bc0a02cd95f02d..34ee0dffebc9e2 100644 --- a/be/src/core/column/column_nullable.h +++ b/be/src/core/column/column_nullable.h @@ -20,7 +20,6 @@ #pragma once -#include "common/compiler_util.h" // IWYU pragma: keep #include "common/status.h" #include "core/assert_cast.h" #include "core/column/column.h" @@ -100,9 +99,7 @@ class ColumnNullable final : public COWHelper { std::string get_name() const override { return "Nullable(" + _nested_column->get_name() + ")"; } MutableColumnPtr clone_resized(size_t size) const override; size_t size() const override { return get_null_map_column().size(); } - PURE bool is_null_at(size_t n) const override { - return get_null_map_column().get_data()[n] != 0; - } + bool is_null_at(size_t n) const override { return get_null_map_column().get_data()[n] != 0; } Field operator[](size_t n) const override; void get(size_t n, Field& res) const override; bool get_bool(size_t n) const override { diff --git a/be/src/core/data_type_serde/datelike_serde_common.hpp b/be/src/core/data_type_serde/datelike_serde_common.hpp index c58a90ed62b7b9..f9db889cc8ba3e 100644 --- a/be/src/core/data_type_serde/datelike_serde_common.hpp +++ b/be/src/core/data_type_serde/datelike_serde_common.hpp @@ -47,19 +47,19 @@ enum class DatelikeFastParseResult : uint8_t { DATE_TIME, }; -inline PURE bool is_fixed_two_digit_ascii(const char* ptr) { +inline bool is_fixed_two_digit_ascii(const char* ptr) { return static_cast(ptr[0] - '0') < 10 && static_cast(ptr[1] - '0') < 10; } -inline PURE bool is_fixed_four_digit_ascii(const char* ptr) { +inline bool is_fixed_four_digit_ascii(const char* ptr) { return is_fixed_two_digit_ascii(ptr) && is_fixed_two_digit_ascii(ptr + 2); } -inline PURE uint32_t parse_fixed_two_digit_ascii(const char* ptr) { +inline uint32_t parse_fixed_two_digit_ascii(const char* ptr) { return (ptr[0] - '0') * 10 + (ptr[1] - '0'); } -inline PURE uint32_t parse_fixed_four_digit_ascii(const char* ptr) { +inline uint32_t parse_fixed_four_digit_ascii(const char* ptr) { return parse_fixed_two_digit_ascii(ptr) * 100 + parse_fixed_two_digit_ascii(ptr + 2); } @@ -117,7 +117,7 @@ inline DatelikeFastParseResult try_parse_fixed_canonical_datelike_prefix(const c return DatelikeFastParseResult::DATE_TIME; } -inline PURE uint32_t complete_4digit_year(uint32_t year) { +inline uint32_t complete_4digit_year(uint32_t year) { if (year < 70) { return year + 2000; // 00-69 -> 2000-2069 } else { diff --git a/be/src/core/string_ref.h b/be/src/core/string_ref.h index fb775fbe9e3deb..ca67ba91c06912 100644 --- a/be/src/core/string_ref.h +++ b/be/src/core/string_ref.h @@ -147,8 +147,7 @@ inline bool memequalSSE2Wide(const char* p1, const char* p2, size_t size) { // - s1/n1: ptr/len for the first string // - s2/n2: ptr/len for the second string // - len: min(n1, n2) - this can be more cheaply passed in by the caller -PURE inline int64_t string_compare(const char* s1, int64_t n1, const char* s2, int64_t n2, - int64_t len) { +inline int64_t string_compare(const char* s1, int64_t n1, const char* s2, int64_t n2, int64_t len) { DCHECK_EQ(len, std::min(n1, n2)); #if defined(__SSE4_2__) || defined(__aarch64__) while (len >= sse_util::CHARS_PER_128_BIT_REGISTER) { diff --git a/be/src/util/string_parser.hpp b/be/src/util/string_parser.hpp index c2be9a709cf0a2..d2a325cbb88123 100644 --- a/be/src/util/string_parser.hpp +++ b/be/src/util/string_parser.hpp @@ -36,7 +36,6 @@ #include #include -#include "common/compiler_util.h" // IWYU pragma: keep #include "common/status.h" #include "core/data_type/number_traits.h" #include "core/data_type/primitive_type.h" @@ -266,7 +265,7 @@ uint32_t count_valid_length(const char* s, const char* end) { inline auto count_digits = count_valid_length; -inline PURE std::string combine_tz_offset(char sign, uint32_t hour_offset, uint32_t minute_offset) { +inline std::string combine_tz_offset(char sign, uint32_t hour_offset, uint32_t minute_offset) { std::string result(6, '0'); result[0] = sign; result[1] = '0' + (hour_offset / 10); diff --git a/be/src/util/thrift_util.h b/be/src/util/thrift_util.h index a7d6620d5d31f3..f73d7ff6aa6335 100644 --- a/be/src/util/thrift_util.h +++ b/be/src/util/thrift_util.h @@ -175,8 +175,8 @@ void t_network_address_to_string(const TNetworkAddress& address, std::string* ou // string representation bool t_network_address_comparator(const TNetworkAddress& a, const TNetworkAddress& b); -PURE std::string to_string(const TUniqueId& id); +std::string to_string(const TUniqueId& id); -PURE bool _has_inverted_index_v1_or_partial_update(TOlapTableSink sink); +bool _has_inverted_index_v1_or_partial_update(TOlapTableSink sink); } // namespace doris From 2276bb851c7c6ccc61df6bbc7dccc05f11628402 Mon Sep 17 00:00:00 2001 From: Mryange Date: Sat, 6 Jun 2026 17:46:46 +0800 Subject: [PATCH 5/6] [refactor](be) Add operator IO wrappers (#64139) Issue Number: N/A Problem Summary: Pipeline operator source and sink paths need a common place to validate output and input blocks. Before this change, `sink` and `get_block` were the virtual override points, so common validation either had to stay in call sites or be duplicated across operator implementations. Root cause: the public operator data-flow entry points were also the polymorphic implementation hooks, which left no wrapper layer for shared checks. This change makes `DataSinkOperatorXBase::sink` and `OperatorXBase::get_block` non-virtual wrappers. The wrappers run `Block::check_type_and_column()` at the source/sink boundary and then dispatch to the new virtual `sink_impl` and `get_block_impl` methods. All pipeline operator implementations, exchange operators, scan operators, and related BE test mocks are migrated to the new impl methods. The scan projection path is updated to call the base `get_block` wrapper so the shared checks still apply. (cherry picked from commit c27fd0ba968daffe8329186205315f8b1cd147b7) --- .../exchange/local_exchange_sink_operator.cpp | 2 +- .../exchange/local_exchange_sink_operator.h | 2 +- .../local_exchange_source_operator.cpp | 2 +- .../exchange/local_exchange_source_operator.h | 2 +- .../operator/aggregation_sink_operator.cpp | 2 +- .../exec/operator/aggregation_sink_operator.h | 2 +- .../operator/aggregation_source_operator.cpp | 2 +- .../operator/aggregation_source_operator.h | 2 +- .../exec/operator/analytic_sink_operator.cpp | 2 +- be/src/exec/operator/analytic_sink_operator.h | 2 +- .../operator/analytic_source_operator.cpp | 3 ++- .../exec/operator/analytic_source_operator.h | 2 +- .../exec/operator/blackhole_sink_operator.cpp | 2 +- .../exec/operator/blackhole_sink_operator.h | 2 +- be/src/exec/operator/cache_sink_operator.cpp | 2 +- be/src/exec/operator/cache_sink_operator.h | 2 +- .../exec/operator/cache_source_operator.cpp | 2 +- be/src/exec/operator/cache_source_operator.h | 2 +- be/src/exec/operator/datagen_operator.cpp | 2 +- be/src/exec/operator/datagen_operator.h | 2 +- be/src/exec/operator/dict_sink_operator.cpp | 2 +- be/src/exec/operator/dict_sink_operator.h | 2 +- be/src/exec/operator/empty_set_operator.cpp | 2 +- be/src/exec/operator/empty_set_operator.h | 2 +- .../exec/operator/exchange_sink_operator.cpp | 2 +- be/src/exec/operator/exchange_sink_operator.h | 2 +- .../operator/exchange_source_operator.cpp | 2 +- .../exec/operator/exchange_source_operator.h | 2 +- .../group_commit_block_sink_operator.cpp | 2 +- .../group_commit_block_sink_operator.h | 2 +- .../operator/group_commit_scan_operator.cpp | 2 +- .../operator/group_commit_scan_operator.h | 2 +- be/src/exec/operator/hashjoin_build_sink.cpp | 2 +- be/src/exec/operator/hashjoin_build_sink.h | 2 +- .../exec/operator/hive_table_sink_operator.h | 2 +- .../operator/iceberg_delete_sink_operator.h | 2 +- .../operator/iceberg_merge_sink_operator.h | 2 +- .../operator/iceberg_table_sink_operator.h | 2 +- .../operator/jdbc_table_sink_operator.cpp | 2 +- .../exec/operator/jdbc_table_sink_operator.h | 2 +- .../local_merge_sort_source_operator.cpp | 2 +- .../local_merge_sort_source_operator.h | 2 +- .../operator/maxcompute_table_sink_operator.h | 2 +- .../operator/memory_scratch_sink_operator.cpp | 2 +- .../operator/memory_scratch_sink_operator.h | 2 +- be/src/exec/operator/mock_operator.h | 2 +- be/src/exec/operator/mock_scan_operator.h | 2 +- .../operator/multi_cast_data_stream_sink.cpp | 2 +- .../operator/multi_cast_data_stream_sink.h | 2 +- .../multi_cast_data_stream_source.cpp | 4 ++-- .../operator/multi_cast_data_stream_source.h | 2 +- .../nested_loop_join_build_operator.cpp | 3 ++- .../nested_loop_join_build_operator.h | 2 +- .../exec/operator/olap_table_sink_operator.h | 2 +- .../operator/olap_table_sink_v2_operator.h | 2 +- be/src/exec/operator/operator.cpp | 6 +++-- be/src/exec/operator/operator.h | 23 ++++++++++++++----- .../operator/partition_sort_sink_operator.cpp | 2 +- .../operator/partition_sort_sink_operator.h | 2 +- .../partition_sort_source_operator.cpp | 4 ++-- .../operator/partition_sort_source_operator.h | 2 +- .../partitioned_aggregation_sink_operator.cpp | 3 ++- .../partitioned_aggregation_sink_operator.h | 2 +- ...artitioned_aggregation_source_operator.cpp | 2 +- .../partitioned_aggregation_source_operator.h | 2 +- .../partitioned_hash_join_probe_operator.cpp | 3 ++- .../partitioned_hash_join_probe_operator.h | 2 +- .../partitioned_hash_join_sink_operator.cpp | 2 +- .../partitioned_hash_join_sink_operator.h | 2 +- .../operator/rec_cte_anchor_sink_operator.h | 2 +- be/src/exec/operator/rec_cte_scan_operator.h | 2 +- be/src/exec/operator/rec_cte_sink_operator.h | 2 +- .../exec/operator/rec_cte_source_operator.h | 2 +- .../operator/result_file_sink_operator.cpp | 2 +- .../exec/operator/result_file_sink_operator.h | 2 +- be/src/exec/operator/result_sink_operator.cpp | 2 +- be/src/exec/operator/result_sink_operator.h | 2 +- be/src/exec/operator/scan_operator.cpp | 2 +- be/src/exec/operator/scan_operator.h | 4 ++-- be/src/exec/operator/schema_scan_operator.cpp | 2 +- be/src/exec/operator/schema_scan_operator.h | 2 +- .../exec/operator/set_probe_sink_operator.cpp | 3 ++- .../exec/operator/set_probe_sink_operator.h | 2 +- be/src/exec/operator/set_sink_operator.cpp | 2 +- be/src/exec/operator/set_sink_operator.h | 2 +- be/src/exec/operator/set_source_operator.cpp | 3 ++- be/src/exec/operator/set_source_operator.h | 2 +- be/src/exec/operator/sort_sink_operator.cpp | 2 +- be/src/exec/operator/sort_sink_operator.h | 2 +- be/src/exec/operator/sort_source_operator.cpp | 2 +- be/src/exec/operator/sort_source_operator.h | 2 +- .../spill_iceberg_table_sink_operator.cpp | 2 +- .../spill_iceberg_table_sink_operator.h | 2 +- .../operator/spill_sort_sink_operator.cpp | 2 +- .../exec/operator/spill_sort_sink_operator.h | 2 +- .../operator/spill_sort_source_operator.cpp | 2 +- .../operator/spill_sort_source_operator.h | 2 +- .../exec/operator/tvf_table_sink_operator.h | 2 +- be/src/exec/operator/union_sink_operator.cpp | 2 +- be/src/exec/operator/union_sink_operator.h | 2 +- .../exec/operator/union_source_operator.cpp | 2 +- be/src/exec/operator/union_source_operator.h | 2 +- be/src/exec/pipeline/pipeline_task.cpp | 3 +-- be/test/exec/operator/agg_operator_test.cpp | 17 ++++++++++++++ .../operator/analytic_sink_operator_test.cpp | 4 +++- .../partition_sort_sink_operator_test.cpp | 4 +++- .../partitioned_aggregation_test_helper.h | 4 +++- .../partitioned_hash_join_test_helper.h | 4 +++- .../operator/query_cache_operator_test.cpp | 4 +++- be/test/exec/operator/sort_operator_test.cpp | 4 +++- .../exec/operator/spill_sort_test_helper.h | 2 +- .../operator/streaming_agg_operator_test.cpp | 4 +++- .../operator/table_function_operator_test.cpp | 4 +++- be/test/testutil/mock/mock_operators.h | 4 ++-- be/test/util/profile_spec_test.cpp | 4 ++-- 115 files changed, 177 insertions(+), 126 deletions(-) diff --git a/be/src/exec/exchange/local_exchange_sink_operator.cpp b/be/src/exec/exchange/local_exchange_sink_operator.cpp index 0a11596cfeea9a..10f1d52831b5c5 100644 --- a/be/src/exec/exchange/local_exchange_sink_operator.cpp +++ b/be/src/exec/exchange/local_exchange_sink_operator.cpp @@ -141,7 +141,7 @@ std::string LocalExchangeSinkLocalState::debug_string(int indentation_level) con return fmt::to_string(debug_string_buffer); } -Status LocalExchangeSinkOperatorX::sink(RuntimeState* state, Block* in_block, bool eos) { +Status LocalExchangeSinkOperatorX::sink_impl(RuntimeState* state, Block* in_block, bool eos) { auto& local_state = get_local_state(state); SCOPED_TIMER(local_state.exec_time_counter()); COUNTER_UPDATE(local_state.rows_input_counter(), (int64_t)in_block->rows()); diff --git a/be/src/exec/exchange/local_exchange_sink_operator.h b/be/src/exec/exchange/local_exchange_sink_operator.h index 01b958645d32fa..0e6844cd1fba1a 100644 --- a/be/src/exec/exchange/local_exchange_sink_operator.h +++ b/be/src/exec/exchange/local_exchange_sink_operator.h @@ -103,7 +103,7 @@ class LocalExchangeSinkOperatorX final : public DataSinkOperatorXget_block( diff --git a/be/src/exec/exchange/local_exchange_source_operator.h b/be/src/exec/exchange/local_exchange_source_operator.h index 58252b24ec2c23..d99dd57a378dd0 100644 --- a/be/src/exec/exchange/local_exchange_source_operator.h +++ b/be/src/exec/exchange/local_exchange_source_operator.h @@ -78,7 +78,7 @@ class LocalExchangeSourceOperatorX final : public OperatorXrow_descriptor(); } const RowDescriptor& row_desc() const override { return _child->row_desc(); } - Status get_block(RuntimeState* state, Block* block, bool* eos) override; + Status get_block_impl(RuntimeState* state, Block* block, bool* eos) override; bool is_source() const override { return true; } diff --git a/be/src/exec/operator/aggregation_sink_operator.cpp b/be/src/exec/operator/aggregation_sink_operator.cpp index 268f32ced7b990..1422e024ecca5b 100644 --- a/be/src/exec/operator/aggregation_sink_operator.cpp +++ b/be/src/exec/operator/aggregation_sink_operator.cpp @@ -872,7 +872,7 @@ Status AggSinkOperatorX::_check_agg_fn_output() { return Status::OK(); } -Status AggSinkOperatorX::sink(doris::RuntimeState* state, Block* in_block, bool eos) { +Status AggSinkOperatorX::sink_impl(doris::RuntimeState* state, Block* in_block, bool eos) { auto& local_state = get_local_state(state); SCOPED_TIMER(local_state.exec_time_counter()); COUNTER_UPDATE(local_state.rows_input_counter(), (int64_t)in_block->rows()); diff --git a/be/src/exec/operator/aggregation_sink_operator.h b/be/src/exec/operator/aggregation_sink_operator.h index 9774d2b95e512a..fe0a4023cdaabe 100644 --- a/be/src/exec/operator/aggregation_sink_operator.h +++ b/be/src/exec/operator/aggregation_sink_operator.h @@ -151,7 +151,7 @@ class AggSinkOperatorX MOCK_REMOVE(final) : public DataSinkOperatorX { AggSourceOperatorX() = default; #endif - Status get_block(RuntimeState* state, Block* block, bool* eos) override; + Status get_block_impl(RuntimeState* state, Block* block, bool* eos) override; bool is_source() const override { return true; } diff --git a/be/src/exec/operator/analytic_sink_operator.cpp b/be/src/exec/operator/analytic_sink_operator.cpp index 863acc4a59b4a3..0360d6168c324e 100644 --- a/be/src/exec/operator/analytic_sink_operator.cpp +++ b/be/src/exec/operator/analytic_sink_operator.cpp @@ -746,7 +746,7 @@ Status AnalyticSinkOperatorX::prepare(RuntimeState* state) { return Status::OK(); } -Status AnalyticSinkOperatorX::sink(doris::RuntimeState* state, Block* input_block, bool eos) { +Status AnalyticSinkOperatorX::sink_impl(doris::RuntimeState* state, Block* input_block, bool eos) { auto& local_state = get_local_state(state); SCOPED_TIMER(local_state.exec_time_counter()); COUNTER_UPDATE(local_state.rows_input_counter(), (int64_t)input_block->rows()); diff --git a/be/src/exec/operator/analytic_sink_operator.h b/be/src/exec/operator/analytic_sink_operator.h index a54088025f12f2..a7466761e18ee2 100644 --- a/be/src/exec/operator/analytic_sink_operator.h +++ b/be/src/exec/operator/analytic_sink_operator.h @@ -210,7 +210,7 @@ class AnalyticSinkOperatorX final : public DataSinkOperatorX(pool, tnode, operator_id, descs) {} -Status AnalyticSourceOperatorX::get_block(RuntimeState* state, Block* output_block, bool* eos) { +Status AnalyticSourceOperatorX::get_block_impl(RuntimeState* state, Block* output_block, + bool* eos) { RETURN_IF_CANCELLED(state); auto& local_state = get_local_state(state); SCOPED_TIMER(local_state.exec_time_counter()); diff --git a/be/src/exec/operator/analytic_source_operator.h b/be/src/exec/operator/analytic_source_operator.h index e4d9cb2c2b69d0..c5591e81cfee67 100644 --- a/be/src/exec/operator/analytic_source_operator.h +++ b/be/src/exec/operator/analytic_source_operator.h @@ -48,7 +48,7 @@ class AnalyticSourceOperatorX final : public OperatorX { #ifdef BE_TEST AnalyticSourceOperatorX() = default; #endif - Status get_block(RuntimeState* state, Block* block, bool* eos) override; + Status get_block_impl(RuntimeState* state, Block* block, bool* eos) override; bool is_source() const override { return true; } diff --git a/be/src/exec/operator/blackhole_sink_operator.cpp b/be/src/exec/operator/blackhole_sink_operator.cpp index 0745c3285ccea0..e8daabec852261 100644 --- a/be/src/exec/operator/blackhole_sink_operator.cpp +++ b/be/src/exec/operator/blackhole_sink_operator.cpp @@ -44,7 +44,7 @@ Status BlackholeSinkOperatorX::init(const TDataSink& tsink) { return Status::OK(); } -Status BlackholeSinkOperatorX::sink(RuntimeState* state, Block* block, bool eos) { +Status BlackholeSinkOperatorX::sink_impl(RuntimeState* state, Block* block, bool eos) { auto& local_state = get_local_state(state); SCOPED_TIMER(local_state.exec_time_counter()); COUNTER_UPDATE(local_state.rows_input_counter(), (int64_t)block->rows()); diff --git a/be/src/exec/operator/blackhole_sink_operator.h b/be/src/exec/operator/blackhole_sink_operator.h index 23a2e9953063b0..8bb32e3d4f7d22 100644 --- a/be/src/exec/operator/blackhole_sink_operator.h +++ b/be/src/exec/operator/blackhole_sink_operator.h @@ -68,7 +68,7 @@ class BlackholeSinkOperatorX final : public DataSinkOperatorXrows()); diff --git a/be/src/exec/operator/cache_sink_operator.h b/be/src/exec/operator/cache_sink_operator.h index 084cc5249137c3..77f7a3f2ae35ce 100644 --- a/be/src/exec/operator/cache_sink_operator.h +++ b/be/src/exec/operator/cache_sink_operator.h @@ -58,7 +58,7 @@ class CacheSinkOperatorX final : public DataSinkOperatorX { DataSinkOperatorX::_name); } - Status sink(RuntimeState* state, Block* in_block, bool eos) override; + Status sink_impl(RuntimeState* state, Block* in_block, bool eos) override; std::shared_ptr create_shared_state() const override { std::shared_ptr ss = std::make_shared(); diff --git a/be/src/exec/operator/cache_source_operator.cpp b/be/src/exec/operator/cache_source_operator.cpp index 12e95baa11ba05..3bb12c47f9ea33 100644 --- a/be/src/exec/operator/cache_source_operator.cpp +++ b/be/src/exec/operator/cache_source_operator.cpp @@ -119,7 +119,7 @@ std::string CacheSourceLocalState::debug_string(int indentation_level) const { return fmt::to_string(debug_string_buffer); } -Status CacheSourceOperatorX::get_block(RuntimeState* state, Block* block, bool* eos) { +Status CacheSourceOperatorX::get_block_impl(RuntimeState* state, Block* block, bool* eos) { auto& local_state = get_local_state(state); SCOPED_TIMER(local_state.exec_time_counter()); diff --git a/be/src/exec/operator/cache_source_operator.h b/be/src/exec/operator/cache_source_operator.h index dca13774cf405b..dfbb9468882753 100644 --- a/be/src/exec/operator/cache_source_operator.h +++ b/be/src/exec/operator/cache_source_operator.h @@ -79,7 +79,7 @@ class CacheSourceOperatorX final : public OperatorX { #endif ~CacheSourceOperatorX() override = default; - Status get_block(RuntimeState* state, Block* block, bool* eos) override; + Status get_block_impl(RuntimeState* state, Block* block, bool* eos) override; bool is_source() const override { return true; } diff --git a/be/src/exec/operator/datagen_operator.cpp b/be/src/exec/operator/datagen_operator.cpp index 457675666194ee..c4d4e969fe8aee 100644 --- a/be/src/exec/operator/datagen_operator.cpp +++ b/be/src/exec/operator/datagen_operator.cpp @@ -61,7 +61,7 @@ Status DataGenSourceOperatorX::prepare(RuntimeState* state) { return Status::OK(); } -Status DataGenSourceOperatorX::get_block(RuntimeState* state, Block* block, bool* eos) { +Status DataGenSourceOperatorX::get_block_impl(RuntimeState* state, Block* block, bool* eos) { if (state == nullptr || block == nullptr) { return Status::InternalError("input is NULL pointer"); } diff --git a/be/src/exec/operator/datagen_operator.h b/be/src/exec/operator/datagen_operator.h index 7950725fde09b1..d8cc0fc4508d2d 100644 --- a/be/src/exec/operator/datagen_operator.h +++ b/be/src/exec/operator/datagen_operator.h @@ -59,7 +59,7 @@ class DataGenSourceOperatorX final : public OperatorX { Status init(const TPlanNode& tnode, RuntimeState* state) override; Status prepare(RuntimeState* state) override; - Status get_block(RuntimeState* state, Block* block, bool* eos) override; + Status get_block_impl(RuntimeState* state, Block* block, bool* eos) override; [[nodiscard]] bool is_source() const override { return true; } diff --git a/be/src/exec/operator/dict_sink_operator.cpp b/be/src/exec/operator/dict_sink_operator.cpp index c529c4ced25598..6feac5784fb3c9 100644 --- a/be/src/exec/operator/dict_sink_operator.cpp +++ b/be/src/exec/operator/dict_sink_operator.cpp @@ -159,7 +159,7 @@ Status DictSinkOperatorX::prepare(RuntimeState* state) { return Status::OK(); } -Status DictSinkOperatorX::sink(RuntimeState* state, Block* in_block, bool eos) { +Status DictSinkOperatorX::sink_impl(RuntimeState* state, Block* in_block, bool eos) { auto& local_state = get_local_state(state); SCOPED_TIMER(local_state.exec_time_counter()); COUNTER_UPDATE(local_state.rows_input_counter(), (int64_t)in_block->rows()); diff --git a/be/src/exec/operator/dict_sink_operator.h b/be/src/exec/operator/dict_sink_operator.h index 252136b33085c1..ddb49e183c95c9 100644 --- a/be/src/exec/operator/dict_sink_operator.h +++ b/be/src/exec/operator/dict_sink_operator.h @@ -50,7 +50,7 @@ class DictSinkOperatorX final : public DataSinkOperatorX { const std::vector& dict_input_expr, const TDictionarySink& dict_sink); Status prepare(RuntimeState* state) override; - Status sink(RuntimeState* state, Block* in_block, bool eos) override; + Status sink_impl(RuntimeState* state, Block* in_block, bool eos) override; private: friend class DictSinkLocalState; diff --git a/be/src/exec/operator/empty_set_operator.cpp b/be/src/exec/operator/empty_set_operator.cpp index 0dd0b66bce340a..1b4a80afe0121a 100644 --- a/be/src/exec/operator/empty_set_operator.cpp +++ b/be/src/exec/operator/empty_set_operator.cpp @@ -24,7 +24,7 @@ namespace doris { #include "common/compile_check_begin.h" -Status EmptySetSourceOperatorX::get_block(RuntimeState* state, Block* block, bool* eos) { +Status EmptySetSourceOperatorX::get_block_impl(RuntimeState* state, Block* block, bool* eos) { *eos = true; return Status::OK(); } diff --git a/be/src/exec/operator/empty_set_operator.h b/be/src/exec/operator/empty_set_operator.h index 5c8f70071c0edd..edc827435d5f2c 100644 --- a/be/src/exec/operator/empty_set_operator.h +++ b/be/src/exec/operator/empty_set_operator.h @@ -43,7 +43,7 @@ class EmptySetSourceOperatorX final : public OperatorX { EmptySetSourceOperatorX() = default; #endif - Status get_block(RuntimeState* state, Block* block, bool* eos) override; + Status get_block_impl(RuntimeState* state, Block* block, bool* eos) override; [[nodiscard]] bool is_source() const override { return true; } }; diff --git a/be/src/exec/operator/exchange_sink_operator.cpp b/be/src/exec/operator/exchange_sink_operator.cpp index beec1ed0313f3d..6a64dc212a5d0b 100644 --- a/be/src/exec/operator/exchange_sink_operator.cpp +++ b/be/src/exec/operator/exchange_sink_operator.cpp @@ -386,7 +386,7 @@ Status ExchangeSinkOperatorX::_handle_eof_channel(RuntimeState* state, ChannelPt return channel->close(state); } -Status ExchangeSinkOperatorX::sink(RuntimeState* state, Block* block, bool eos) { +Status ExchangeSinkOperatorX::sink_impl(RuntimeState* state, Block* block, bool eos) { auto& local_state = get_local_state(state); COUNTER_UPDATE(local_state.rows_input_counter(), (int64_t)block->rows()); // for auto-partition, may decease when do_partitioning diff --git a/be/src/exec/operator/exchange_sink_operator.h b/be/src/exec/operator/exchange_sink_operator.h index 369f03ec6bfcc9..c850c13cac4b6c 100644 --- a/be/src/exec/operator/exchange_sink_operator.h +++ b/be/src/exec/operator/exchange_sink_operator.h @@ -198,7 +198,7 @@ class ExchangeSinkOperatorX MOCK_REMOVE(final) : public DataSinkOperatorX { Status reset(RuntimeState* state) override; - Status get_block(RuntimeState* state, Block* block, bool* eos) override; + Status get_block_impl(RuntimeState* state, Block* block, bool* eos) override; std::string debug_string(int indentation_level = 0) const override; diff --git a/be/src/exec/operator/group_commit_block_sink_operator.cpp b/be/src/exec/operator/group_commit_block_sink_operator.cpp index fd44a9e74a627d..ac0c7328772b3b 100644 --- a/be/src/exec/operator/group_commit_block_sink_operator.cpp +++ b/be/src/exec/operator/group_commit_block_sink_operator.cpp @@ -300,7 +300,7 @@ Status GroupCommitBlockSinkOperatorX::prepare(RuntimeState* state) { return VExpr::open(_output_vexpr_ctxs, state); } -Status GroupCommitBlockSinkOperatorX::sink(RuntimeState* state, Block* input_block, bool eos) { +Status GroupCommitBlockSinkOperatorX::sink_impl(RuntimeState* state, Block* input_block, bool eos) { auto& local_state = get_local_state(state); SCOPED_TIMER(local_state.exec_time_counter()); COUNTER_UPDATE(local_state.rows_input_counter(), (int64_t)input_block->rows()); diff --git a/be/src/exec/operator/group_commit_block_sink_operator.h b/be/src/exec/operator/group_commit_block_sink_operator.h index 406c83294c5c11..1e335bc5261358 100644 --- a/be/src/exec/operator/group_commit_block_sink_operator.h +++ b/be/src/exec/operator/group_commit_block_sink_operator.h @@ -107,7 +107,7 @@ class GroupCommitBlockSinkOperatorX final Status prepare(RuntimeState* state) override; - Status sink(RuntimeState* state, Block* block, bool eos) override; + Status sink_impl(RuntimeState* state, Block* block, bool eos) override; private: friend class GroupCommitBlockSinkLocalState; diff --git a/be/src/exec/operator/group_commit_scan_operator.cpp b/be/src/exec/operator/group_commit_scan_operator.cpp index 26c2cf4fe8202b..7d11e1c33d582c 100644 --- a/be/src/exec/operator/group_commit_scan_operator.cpp +++ b/be/src/exec/operator/group_commit_scan_operator.cpp @@ -29,7 +29,7 @@ GroupCommitOperatorX::GroupCommitOperatorX(ObjectPool* pool, const TPlanNode& tn _output_tuple_id = tnode.file_scan_node.tuple_id; } -Status GroupCommitOperatorX::get_block(RuntimeState* state, Block* block, bool* eos) { +Status GroupCommitOperatorX::get_block_impl(RuntimeState* state, Block* block, bool* eos) { auto& local_state = get_local_state(state); SCOPED_TIMER(local_state.exec_time_counter()); bool find_node = false; diff --git a/be/src/exec/operator/group_commit_scan_operator.h b/be/src/exec/operator/group_commit_scan_operator.h index ebaf26a2561b82..aab43aafe8bd29 100644 --- a/be/src/exec/operator/group_commit_scan_operator.h +++ b/be/src/exec/operator/group_commit_scan_operator.h @@ -55,7 +55,7 @@ class GroupCommitOperatorX final : public ScanOperatorX { GroupCommitOperatorX(ObjectPool* pool, const TPlanNode& tnode, int operator_id, const DescriptorTbl& descs, int parallel_tasks); - Status get_block(RuntimeState* state, Block* block, bool* eos) override; + Status get_block_impl(RuntimeState* state, Block* block, bool* eos) override; protected: friend class GroupCommitLocalState; diff --git a/be/src/exec/operator/hashjoin_build_sink.cpp b/be/src/exec/operator/hashjoin_build_sink.cpp index c29199db35d7c2..8c6f78a032821d 100644 --- a/be/src/exec/operator/hashjoin_build_sink.cpp +++ b/be/src/exec/operator/hashjoin_build_sink.cpp @@ -821,7 +821,7 @@ Status HashJoinBuildSinkOperatorX::prepare(RuntimeState* state) { return VExpr::open(_build_expr_ctxs, state); } -Status HashJoinBuildSinkOperatorX::sink(RuntimeState* state, Block* in_block, bool eos) { +Status HashJoinBuildSinkOperatorX::sink_impl(RuntimeState* state, Block* in_block, bool eos) { auto& local_state = get_local_state(state); SCOPED_TIMER(local_state.exec_time_counter()); COUNTER_UPDATE(local_state.rows_input_counter(), (int64_t)in_block->rows()); diff --git a/be/src/exec/operator/hashjoin_build_sink.h b/be/src/exec/operator/hashjoin_build_sink.h index 9ece55d5fcbfaf..af2155bab1c646 100644 --- a/be/src/exec/operator/hashjoin_build_sink.h +++ b/be/src/exec/operator/hashjoin_build_sink.h @@ -119,7 +119,7 @@ class HashJoinBuildSinkOperatorX MOCK_REMOVE(final) Status prepare(RuntimeState* state) override; - Status sink(RuntimeState* state, Block* in_block, bool eos) override; + Status sink_impl(RuntimeState* state, Block* in_block, bool eos) override; size_t get_reserve_mem_size(RuntimeState* state, bool eos) override; diff --git a/be/src/exec/operator/hive_table_sink_operator.h b/be/src/exec/operator/hive_table_sink_operator.h index ff4a681f425613..34a1e8094b1a86 100644 --- a/be/src/exec/operator/hive_table_sink_operator.h +++ b/be/src/exec/operator/hive_table_sink_operator.h @@ -66,7 +66,7 @@ class HiveTableSinkOperatorX final : public DataSinkOperatorXrows()); diff --git a/be/src/exec/operator/iceberg_delete_sink_operator.h b/be/src/exec/operator/iceberg_delete_sink_operator.h index f9cbbd60e311b2..651983b83c9c1b 100644 --- a/be/src/exec/operator/iceberg_delete_sink_operator.h +++ b/be/src/exec/operator/iceberg_delete_sink_operator.h @@ -65,7 +65,7 @@ class IcebergDeleteSinkOperatorX final : public DataSinkOperatorXrows()); diff --git a/be/src/exec/operator/iceberg_merge_sink_operator.h b/be/src/exec/operator/iceberg_merge_sink_operator.h index 362d7ad7c45993..4b1bc706ff0328 100644 --- a/be/src/exec/operator/iceberg_merge_sink_operator.h +++ b/be/src/exec/operator/iceberg_merge_sink_operator.h @@ -64,7 +64,7 @@ class IcebergMergeSinkOperatorX final : public DataSinkOperatorXrows()); diff --git a/be/src/exec/operator/iceberg_table_sink_operator.h b/be/src/exec/operator/iceberg_table_sink_operator.h index 5d3867323bd2c6..1d5cfc9c25fc76 100644 --- a/be/src/exec/operator/iceberg_table_sink_operator.h +++ b/be/src/exec/operator/iceberg_table_sink_operator.h @@ -65,7 +65,7 @@ class IcebergTableSinkOperatorX final : public DataSinkOperatorXrows()); diff --git a/be/src/exec/operator/jdbc_table_sink_operator.cpp b/be/src/exec/operator/jdbc_table_sink_operator.cpp index 85761effac1ac0..1b7ac4062c3319 100644 --- a/be/src/exec/operator/jdbc_table_sink_operator.cpp +++ b/be/src/exec/operator/jdbc_table_sink_operator.cpp @@ -47,7 +47,7 @@ Status JdbcTableSinkOperatorX::prepare(RuntimeState* state) { return Status::OK(); } -Status JdbcTableSinkOperatorX::sink(RuntimeState* state, Block* block, bool eos) { +Status JdbcTableSinkOperatorX::sink_impl(RuntimeState* state, Block* block, bool eos) { auto& local_state = get_local_state(state); SCOPED_TIMER(local_state.exec_time_counter()); COUNTER_UPDATE(local_state.rows_input_counter(), (int64_t)block->rows()); diff --git a/be/src/exec/operator/jdbc_table_sink_operator.h b/be/src/exec/operator/jdbc_table_sink_operator.h index 7481549ccda2f0..95d0f470c15169 100644 --- a/be/src/exec/operator/jdbc_table_sink_operator.h +++ b/be/src/exec/operator/jdbc_table_sink_operator.h @@ -46,7 +46,7 @@ class JdbcTableSinkOperatorX final : public DataSinkOperatorXrows()); diff --git a/be/src/exec/operator/memory_scratch_sink_operator.cpp b/be/src/exec/operator/memory_scratch_sink_operator.cpp index 917d54da763a12..3492b9774ee17d 100644 --- a/be/src/exec/operator/memory_scratch_sink_operator.cpp +++ b/be/src/exec/operator/memory_scratch_sink_operator.cpp @@ -87,7 +87,7 @@ Status MemoryScratchSinkOperatorX::prepare(RuntimeState* state) { return Status::OK(); } -Status MemoryScratchSinkOperatorX::sink(RuntimeState* state, Block* input_block, bool eos) { +Status MemoryScratchSinkOperatorX::sink_impl(RuntimeState* state, Block* input_block, bool eos) { auto& local_state = get_local_state(state); SCOPED_TIMER(local_state.exec_time_counter()); if (nullptr == input_block || 0 == input_block->rows()) { diff --git a/be/src/exec/operator/memory_scratch_sink_operator.h b/be/src/exec/operator/memory_scratch_sink_operator.h index 03ec59647a917d..f5ef12721ec9a2 100644 --- a/be/src/exec/operator/memory_scratch_sink_operator.h +++ b/be/src/exec/operator/memory_scratch_sink_operator.h @@ -58,7 +58,7 @@ class MemoryScratchSinkOperatorX final : public DataSinkOperatorX { ENABLE_FACTORY_CREATOR(MockOperatorX); MockOperatorX() = default; - Status get_block(RuntimeState* state, Block* block, bool* eos) override { + Status get_block_impl(RuntimeState* state, Block* block, bool* eos) override { if (_outout_blocks.empty()) { *eos = true; return Status::OK(); diff --git a/be/src/exec/operator/mock_scan_operator.h b/be/src/exec/operator/mock_scan_operator.h index e6a570a2f8548c..ad3722c1de6ae0 100644 --- a/be/src/exec/operator/mock_scan_operator.h +++ b/be/src/exec/operator/mock_scan_operator.h @@ -84,7 +84,7 @@ class MockScanOperatorX final : public ScanOperatorX { _output_blocks.push_back(std::move(block)); } - Status get_block(RuntimeState* state, Block* block, bool* eos) override { + Status get_block_impl(RuntimeState* state, Block* block, bool* eos) override { if (_output_blocks.empty()) { *eos = true; return Status::OK(); diff --git a/be/src/exec/operator/multi_cast_data_stream_sink.cpp b/be/src/exec/operator/multi_cast_data_stream_sink.cpp index 437416b1b23a67..9d34e0627da7e8 100644 --- a/be/src/exec/operator/multi_cast_data_stream_sink.cpp +++ b/be/src/exec/operator/multi_cast_data_stream_sink.cpp @@ -63,7 +63,7 @@ std::string MultiCastDataStreamSinkLocalState::debug_string(int indentation_leve return fmt::to_string(debug_string_buffer); } -Status MultiCastDataStreamSinkOperatorX::sink(RuntimeState* state, Block* in_block, bool eos) { +Status MultiCastDataStreamSinkOperatorX::sink_impl(RuntimeState* state, Block* in_block, bool eos) { auto& local_state = get_local_state(state); SCOPED_TIMER(local_state.exec_time_counter()); if (in_block->rows() > 0 || eos) { diff --git a/be/src/exec/operator/multi_cast_data_stream_sink.h b/be/src/exec/operator/multi_cast_data_stream_sink.h index 602371ce02c3ce..239003833a6196 100644 --- a/be/src/exec/operator/multi_cast_data_stream_sink.h +++ b/be/src/exec/operator/multi_cast_data_stream_sink.h @@ -57,7 +57,7 @@ class MultiCastDataStreamSinkOperatorX final _num_dests(sources.size()) {} ~MultiCastDataStreamSinkOperatorX() override = default; - Status sink(RuntimeState* state, Block* in_block, bool eos) override; + Status sink_impl(RuntimeState* state, Block* in_block, bool eos) override; std::shared_ptr create_shared_state() const override; diff --git a/be/src/exec/operator/multi_cast_data_stream_source.cpp b/be/src/exec/operator/multi_cast_data_stream_source.cpp index 7f19535859fab3..fea748baea138d 100644 --- a/be/src/exec/operator/multi_cast_data_stream_source.cpp +++ b/be/src/exec/operator/multi_cast_data_stream_source.cpp @@ -80,8 +80,8 @@ Status MultiCastDataStreamSourceLocalState::close(RuntimeState* state) { return Base::close(state); } -Status MultiCastDataStreamerSourceOperatorX::get_block(RuntimeState* state, Block* block, - bool* eos) { +Status MultiCastDataStreamerSourceOperatorX::get_block_impl(RuntimeState* state, Block* block, + bool* eos) { //auto& local_state = get_local_state(state); auto& local_state = get_local_state(state); SCOPED_TIMER(local_state.exec_time_counter()); diff --git a/be/src/exec/operator/multi_cast_data_stream_source.h b/be/src/exec/operator/multi_cast_data_stream_source.h index 3673269e92f4a0..5727419c7c589d 100644 --- a/be/src/exec/operator/multi_cast_data_stream_source.h +++ b/be/src/exec/operator/multi_cast_data_stream_source.h @@ -105,7 +105,7 @@ class MultiCastDataStreamerSourceOperatorX final return Status::OK(); } - Status get_block(RuntimeState* state, Block* block, bool* eos) override; + Status get_block_impl(RuntimeState* state, Block* block, bool* eos) override; bool is_source() const override { return true; } diff --git a/be/src/exec/operator/nested_loop_join_build_operator.cpp b/be/src/exec/operator/nested_loop_join_build_operator.cpp index 91a0debda69356..350e0664046930 100644 --- a/be/src/exec/operator/nested_loop_join_build_operator.cpp +++ b/be/src/exec/operator/nested_loop_join_build_operator.cpp @@ -95,7 +95,8 @@ Status NestedLoopJoinBuildSinkOperatorX::prepare(RuntimeState* state) { return VExpr::open(_filter_src_expr_ctxs, state); } -Status NestedLoopJoinBuildSinkOperatorX::sink(doris::RuntimeState* state, Block* block, bool eos) { +Status NestedLoopJoinBuildSinkOperatorX::sink_impl(doris::RuntimeState* state, Block* block, + bool eos) { auto& local_state = get_local_state(state); SCOPED_TIMER(local_state.exec_time_counter()); COUNTER_UPDATE(local_state.rows_input_counter(), (int64_t)block->rows()); diff --git a/be/src/exec/operator/nested_loop_join_build_operator.h b/be/src/exec/operator/nested_loop_join_build_operator.h index 04fbd894bb471d..a8fc817380274a 100644 --- a/be/src/exec/operator/nested_loop_join_build_operator.h +++ b/be/src/exec/operator/nested_loop_join_build_operator.h @@ -65,7 +65,7 @@ class NestedLoopJoinBuildSinkOperatorX final Status prepare(RuntimeState* state) override; - Status sink(RuntimeState* state, Block* in_block, bool eos) override; + Status sink_impl(RuntimeState* state, Block* in_block, bool eos) override; DataDistribution required_data_distribution(RuntimeState* /*state*/) const override { if (_join_op == TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN) { diff --git a/be/src/exec/operator/olap_table_sink_operator.h b/be/src/exec/operator/olap_table_sink_operator.h index fdb3756bc023c8..3ef0a51084b335 100644 --- a/be/src/exec/operator/olap_table_sink_operator.h +++ b/be/src/exec/operator/olap_table_sink_operator.h @@ -58,7 +58,7 @@ class OlapTableSinkOperatorX final : public DataSinkOperatorXrows()); diff --git a/be/src/exec/operator/olap_table_sink_v2_operator.h b/be/src/exec/operator/olap_table_sink_v2_operator.h index d97ea631a08429..8c72a4051429de 100644 --- a/be/src/exec/operator/olap_table_sink_v2_operator.h +++ b/be/src/exec/operator/olap_table_sink_v2_operator.h @@ -59,7 +59,7 @@ class OlapTableSinkV2OperatorX final : public DataSinkOperatorXrows()); diff --git a/be/src/exec/operator/operator.cpp b/be/src/exec/operator/operator.cpp index 1de468d82be571..34fa48dd06b71f 100644 --- a/be/src/exec/operator/operator.cpp +++ b/be/src/exec/operator/operator.cpp @@ -695,13 +695,15 @@ Status PipelineXSinkLocalState::close(RuntimeState* state, Status e } template -Status StreamingOperatorX::get_block(RuntimeState* state, Block* block, bool* eos) { +Status StreamingOperatorX::get_block_impl(RuntimeState* state, Block* block, + bool* eos) { RETURN_IF_ERROR(OperatorX::_child->get_block_after_projects(state, block, eos)); return pull(state, block, eos); } template -Status StatefulOperatorX::get_block(RuntimeState* state, Block* block, bool* eos) { +Status StatefulOperatorX::get_block_impl(RuntimeState* state, Block* block, + bool* eos) { auto& local_state = get_local_state(state); if (need_more_input_data(state)) { local_state._child_block->clear_column_data( diff --git a/be/src/exec/operator/operator.h b/be/src/exec/operator/operator.h index 25ae1477f8abff..e8ee719546717b 100644 --- a/be/src/exec/operator/operator.h +++ b/be/src/exec/operator/operator.h @@ -633,7 +633,12 @@ class DataSinkOperatorXBase : public OperatorBase { return result.value()->is_finished(); } - [[nodiscard]] virtual Status sink(RuntimeState* state, Block* block, bool eos) = 0; + [[nodiscard]] Status sink(RuntimeState* state, Block* block, bool eos) { + RETURN_IF_ERROR(block->check_type_and_column()); + return sink_impl(state, block, eos); + } + + [[nodiscard]] virtual Status sink_impl(RuntimeState* state, Block* block, bool eos) = 0; [[nodiscard]] virtual Status setup_local_state(RuntimeState* state, LocalSinkStateInfo& info) = 0; @@ -874,7 +879,13 @@ class OperatorXBase : public OperatorBase { Status prepare(RuntimeState* state) override; Status terminate(RuntimeState* state) override; - [[nodiscard]] virtual Status get_block(RuntimeState* state, Block* block, bool* eos) = 0; + [[nodiscard]] Status get_block(RuntimeState* state, Block* block, bool* eos) { + RETURN_IF_ERROR(get_block_impl(state, block, eos)); + RETURN_IF_ERROR(block->check_type_and_column()); + return Status::OK(); + } + + [[nodiscard]] virtual Status get_block_impl(RuntimeState* state, Block* block, bool* eos) = 0; Status close(RuntimeState* state) override; @@ -1067,7 +1078,7 @@ class StreamingOperatorX : public OperatorX { virtual ~StreamingOperatorX() = default; - Status get_block(RuntimeState* state, Block* block, bool* eos) override; + Status get_block_impl(RuntimeState* state, Block* block, bool* eos) override; virtual Status pull(RuntimeState* state, Block* block, bool* eos) = 0; }; @@ -1093,7 +1104,7 @@ class StatefulOperatorX : public OperatorX { using OperatorX::get_local_state; - [[nodiscard]] Status get_block(RuntimeState* state, Block* block, bool* eos) override; + [[nodiscard]] Status get_block_impl(RuntimeState* state, Block* block, bool* eos) override; [[nodiscard]] virtual Status pull(RuntimeState* state, Block* block, bool* eos) const = 0; [[nodiscard]] virtual Status push(RuntimeState* state, Block* input_block, bool eos) const = 0; @@ -1167,7 +1178,7 @@ class DummyOperator final : public OperatorX { [[nodiscard]] bool is_source() const override { return true; } - Status get_block(RuntimeState* state, Block* block, bool* eos) override { + Status get_block_impl(RuntimeState* state, Block* block, bool* eos) override { *eos = _eos; return Status::OK(); } @@ -1222,7 +1233,7 @@ class DummySinkOperatorX final : public DataSinkOperatorX { public: DummySinkOperatorX(int op_id, int node_id, int dest_id) : DataSinkOperatorX(op_id, node_id, dest_id) {} - Status sink(RuntimeState* state, Block* in_block, bool eos) override { + Status sink_impl(RuntimeState* state, Block* in_block, bool eos) override { return _return_eof ? Status::Error("source have closed") : Status::OK(); } diff --git a/be/src/exec/operator/partition_sort_sink_operator.cpp b/be/src/exec/operator/partition_sort_sink_operator.cpp index 66ed84d021e18b..7eb6be95aab4d3 100644 --- a/be/src/exec/operator/partition_sort_sink_operator.cpp +++ b/be/src/exec/operator/partition_sort_sink_operator.cpp @@ -112,7 +112,7 @@ Status PartitionSortSinkOperatorX::prepare(RuntimeState* state) { return Status::OK(); } -Status PartitionSortSinkOperatorX::sink(RuntimeState* state, Block* input_block, bool eos) { +Status PartitionSortSinkOperatorX::sink_impl(RuntimeState* state, Block* input_block, bool eos) { auto& local_state = get_local_state(state); auto current_rows = input_block->rows(); SCOPED_TIMER(local_state.exec_time_counter()); diff --git a/be/src/exec/operator/partition_sort_sink_operator.h b/be/src/exec/operator/partition_sort_sink_operator.h index 03a81941259432..5dce3e12653791 100644 --- a/be/src/exec/operator/partition_sort_sink_operator.h +++ b/be/src/exec/operator/partition_sort_sink_operator.h @@ -93,7 +93,7 @@ class PartitionSortSinkOperatorX final : public DataSinkOperatorXprepare(state); } -Status PartitionedAggSinkOperatorX::sink(doris::RuntimeState* state, Block* in_block, bool eos) { +Status PartitionedAggSinkOperatorX::sink_impl(doris::RuntimeState* state, Block* in_block, + bool eos) { auto& local_state = get_local_state(state); SCOPED_TIMER(local_state.exec_time_counter()); COUNTER_UPDATE(local_state.rows_input_counter(), (int64_t)in_block->rows()); diff --git a/be/src/exec/operator/partitioned_aggregation_sink_operator.h b/be/src/exec/operator/partitioned_aggregation_sink_operator.h index 0ca4817eb9f906..58b461a5b995c0 100644 --- a/be/src/exec/operator/partitioned_aggregation_sink_operator.h +++ b/be/src/exec/operator/partitioned_aggregation_sink_operator.h @@ -115,7 +115,7 @@ class PartitionedAggSinkOperatorX : public DataSinkOperatorX Status close(RuntimeState* state) override; - Status get_block(RuntimeState* state, Block* block, bool* eos) override; + Status get_block_impl(RuntimeState* state, Block* block, bool* eos) override; bool is_source() const override { return true; } diff --git a/be/src/exec/operator/partitioned_hash_join_probe_operator.cpp b/be/src/exec/operator/partitioned_hash_join_probe_operator.cpp index 6cb9ce5c1016a9..1a28eace53582e 100644 --- a/be/src/exec/operator/partitioned_hash_join_probe_operator.cpp +++ b/be/src/exec/operator/partitioned_hash_join_probe_operator.cpp @@ -1018,7 +1018,8 @@ Status PartitionedHashJoinProbeOperatorX::revoke_memory(RuntimeState* state) { return local_state.revoke_build_data(state); } -Status PartitionedHashJoinProbeOperatorX::get_block(RuntimeState* state, Block* block, bool* eos) { +Status PartitionedHashJoinProbeOperatorX::get_block_impl(RuntimeState* state, Block* block, + bool* eos) { *eos = false; auto& local_state = get_local_state(state); const bool is_spilled = local_state._shared_state->_is_spilled; diff --git a/be/src/exec/operator/partitioned_hash_join_probe_operator.h b/be/src/exec/operator/partitioned_hash_join_probe_operator.h index 2a53458e12983e..76721eb584ec3a 100644 --- a/be/src/exec/operator/partitioned_hash_join_probe_operator.h +++ b/be/src/exec/operator/partitioned_hash_join_probe_operator.h @@ -223,7 +223,7 @@ class PartitionedHashJoinProbeOperatorX final Status init(const TPlanNode& tnode, RuntimeState* state) override; Status prepare(RuntimeState* state) override; - [[nodiscard]] Status get_block(RuntimeState* state, Block* block, bool* eos) override; + [[nodiscard]] Status get_block_impl(RuntimeState* state, Block* block, bool* eos) override; Status push(RuntimeState* state, Block* input_block, bool eos) const override; Status pull(doris::RuntimeState* state, Block* output_block, bool* eos) const override; diff --git a/be/src/exec/operator/partitioned_hash_join_sink_operator.cpp b/be/src/exec/operator/partitioned_hash_join_sink_operator.cpp index b481c5a6b5b5a9..4857015c37ee5c 100644 --- a/be/src/exec/operator/partitioned_hash_join_sink_operator.cpp +++ b/be/src/exec/operator/partitioned_hash_join_sink_operator.cpp @@ -515,7 +515,7 @@ void PartitionedHashJoinSinkLocalState::update_profile_from_inner() { #undef UPDATE_COUNTER_FROM_INNER -Status PartitionedHashJoinSinkOperatorX::sink(RuntimeState* state, Block* in_block, bool eos) { +Status PartitionedHashJoinSinkOperatorX::sink_impl(RuntimeState* state, Block* in_block, bool eos) { auto& local_state = get_local_state(state); SCOPED_TIMER(local_state.exec_time_counter()); const auto rows = in_block->rows(); diff --git a/be/src/exec/operator/partitioned_hash_join_sink_operator.h b/be/src/exec/operator/partitioned_hash_join_sink_operator.h index c4ffeb0ce44078..a9fb27f6b330a1 100644 --- a/be/src/exec/operator/partitioned_hash_join_sink_operator.h +++ b/be/src/exec/operator/partitioned_hash_join_sink_operator.h @@ -116,7 +116,7 @@ class PartitionedHashJoinSinkOperatorX Status prepare(RuntimeState* state) override; - Status sink(RuntimeState* state, Block* in_block, bool eos) override; + Status sink_impl(RuntimeState* state, Block* in_block, bool eos) override; bool should_dry_run(RuntimeState* state) override { return false; } diff --git a/be/src/exec/operator/rec_cte_anchor_sink_operator.h b/be/src/exec/operator/rec_cte_anchor_sink_operator.h index 19585ff02a8c07..e30d38d180c13a 100644 --- a/be/src/exec/operator/rec_cte_anchor_sink_operator.h +++ b/be/src/exec/operator/rec_cte_anchor_sink_operator.h @@ -81,7 +81,7 @@ class RecCTEAnchorSinkOperatorX MOCK_REMOVE(final) return Base::close(state); } - Status sink(RuntimeState* state, Block* input_block, bool eos) override { + Status sink_impl(RuntimeState* state, Block* input_block, bool eos) override { auto& local_state = get_local_state(state); RETURN_IF_ERROR(_notify_rec_side_ready_if_needed(state)); diff --git a/be/src/exec/operator/rec_cte_scan_operator.h b/be/src/exec/operator/rec_cte_scan_operator.h index 470f497ef9119c..954d7b0169b2c1 100644 --- a/be/src/exec/operator/rec_cte_scan_operator.h +++ b/be/src/exec/operator/rec_cte_scan_operator.h @@ -66,7 +66,7 @@ class RecCTEScanOperatorX final : public OperatorX { const DescriptorTbl& descs) : OperatorX(pool, tnode, operator_id, descs) {} - Status get_block(RuntimeState* state, Block* block, bool* eos) override { + Status get_block_impl(RuntimeState* state, Block* block, bool* eos) override { auto& local_state = get_local_state(state); if (local_state._blocks.empty()) { diff --git a/be/src/exec/operator/rec_cte_sink_operator.h b/be/src/exec/operator/rec_cte_sink_operator.h index a8a526854df4e4..34796b5658835c 100644 --- a/be/src/exec/operator/rec_cte_sink_operator.h +++ b/be/src/exec/operator/rec_cte_sink_operator.h @@ -82,7 +82,7 @@ class RecCTESinkOperatorX MOCK_REMOVE(final) : public DataSinkOperatorXrows()); diff --git a/be/src/exec/operator/rec_cte_source_operator.h b/be/src/exec/operator/rec_cte_source_operator.h index 92ffe43794cc17..ef31bfcb97614e 100644 --- a/be/src/exec/operator/rec_cte_source_operator.h +++ b/be/src/exec/operator/rec_cte_source_operator.h @@ -208,7 +208,7 @@ class RecCTESourceOperatorX : public OperatorX { return {ExchangeType::NOOP}; } - Status get_block(RuntimeState* state, Block* block, bool* eos) override { + Status get_block_impl(RuntimeState* state, Block* block, bool* eos) override { auto& local_state = get_local_state(state); auto& ctx = local_state._shared_state; ctx->update_ready_to_return(); diff --git a/be/src/exec/operator/result_file_sink_operator.cpp b/be/src/exec/operator/result_file_sink_operator.cpp index 97de630d3e74b3..3baafe30e98eaf 100644 --- a/be/src/exec/operator/result_file_sink_operator.cpp +++ b/be/src/exec/operator/result_file_sink_operator.cpp @@ -151,7 +151,7 @@ Status ResultFileSinkLocalState::close(RuntimeState* state, Status exec_status) return Base::close(state, exec_status); } -Status ResultFileSinkOperatorX::sink(RuntimeState* state, Block* in_block, bool eos) { +Status ResultFileSinkOperatorX::sink_impl(RuntimeState* state, Block* in_block, bool eos) { auto& local_state = get_local_state(state); SCOPED_TIMER(local_state.exec_time_counter()); COUNTER_UPDATE(local_state.rows_input_counter(), (int64_t)in_block->rows()); diff --git a/be/src/exec/operator/result_file_sink_operator.h b/be/src/exec/operator/result_file_sink_operator.h index 2f4bcb8e2c8321..c45ac00eb58c80 100644 --- a/be/src/exec/operator/result_file_sink_operator.h +++ b/be/src/exec/operator/result_file_sink_operator.h @@ -62,7 +62,7 @@ class ResultFileSinkOperatorX final : public DataSinkOperatorXrows()); diff --git a/be/src/exec/operator/result_sink_operator.h b/be/src/exec/operator/result_sink_operator.h index e8cc0fd25de90b..790aaa59cf65b1 100644 --- a/be/src/exec/operator/result_sink_operator.h +++ b/be/src/exec/operator/result_sink_operator.h @@ -160,7 +160,7 @@ class ResultSinkOperatorX final : public DataSinkOperatorX const std::vector& select_exprs, const TResultSink& sink); Status prepare(RuntimeState* state) override; - Status sink(RuntimeState* state, Block* in_block, bool eos) override; + Status sink_impl(RuntimeState* state, Block* in_block, bool eos) override; private: friend class ResultSinkLocalState; diff --git a/be/src/exec/operator/scan_operator.cpp b/be/src/exec/operator/scan_operator.cpp index 70ab35d143013f..ae2b84cc98e72e 100644 --- a/be/src/exec/operator/scan_operator.cpp +++ b/be/src/exec/operator/scan_operator.cpp @@ -1249,7 +1249,7 @@ Status ScanLocalState::close(RuntimeState* state) { } template -Status ScanOperatorX::get_block(RuntimeState* state, Block* block, bool* eos) { +Status ScanOperatorX::get_block_impl(RuntimeState* state, Block* block, bool* eos) { auto& local_state = get_local_state(state); SCOPED_TIMER(local_state.exec_time_counter()); diff --git a/be/src/exec/operator/scan_operator.h b/be/src/exec/operator/scan_operator.h index d52df35a5ca1d3..931917482736a6 100644 --- a/be/src/exec/operator/scan_operator.h +++ b/be/src/exec/operator/scan_operator.h @@ -328,9 +328,9 @@ class ScanOperatorX : public OperatorX { public: Status init(const TPlanNode& tnode, RuntimeState* state) override; Status prepare(RuntimeState* state) override; - Status get_block(RuntimeState* state, Block* block, bool* eos) override; + Status get_block_impl(RuntimeState* state, Block* block, bool* eos) override; Status get_block_after_projects(RuntimeState* state, Block* block, bool* eos) override { - Status status = get_block(state, block, eos); + Status status = OperatorX::get_block(state, block, eos); if (status.ok()) { state->get_local_state(operator_id())->update_output_block_counters(*block); } diff --git a/be/src/exec/operator/schema_scan_operator.cpp b/be/src/exec/operator/schema_scan_operator.cpp index abe9ee39797ace..0f4e5562962c1a 100644 --- a/be/src/exec/operator/schema_scan_operator.cpp +++ b/be/src/exec/operator/schema_scan_operator.cpp @@ -209,7 +209,7 @@ Status SchemaScanOperatorX::prepare(RuntimeState* state) { return Status::OK(); } -Status SchemaScanOperatorX::get_block(RuntimeState* state, Block* block, bool* eos) { +Status SchemaScanOperatorX::get_block_impl(RuntimeState* state, Block* block, bool* eos) { auto& local_state = get_local_state(state); SCOPED_TIMER(local_state.exec_time_counter()); RETURN_IF_CANCELLED(state); diff --git a/be/src/exec/operator/schema_scan_operator.h b/be/src/exec/operator/schema_scan_operator.h index 1d8cf22c4a0be0..b7540a393c699b 100644 --- a/be/src/exec/operator/schema_scan_operator.h +++ b/be/src/exec/operator/schema_scan_operator.h @@ -64,7 +64,7 @@ class SchemaScanOperatorX final : public OperatorX { Status init(const TPlanNode& tnode, RuntimeState* state) override; Status prepare(RuntimeState* state) override; - Status get_block(RuntimeState* state, Block* block, bool* eos) override; + Status get_block_impl(RuntimeState* state, Block* block, bool* eos) override; [[nodiscard]] bool is_source() const override { return true; } diff --git a/be/src/exec/operator/set_probe_sink_operator.cpp b/be/src/exec/operator/set_probe_sink_operator.cpp index 26913e97641e1a..c951f787bd019e 100644 --- a/be/src/exec/operator/set_probe_sink_operator.cpp +++ b/be/src/exec/operator/set_probe_sink_operator.cpp @@ -62,7 +62,8 @@ Status SetProbeSinkOperatorX::prepare(RuntimeState* state) { } template -Status SetProbeSinkOperatorX::sink(RuntimeState* state, Block* in_block, bool eos) { +Status SetProbeSinkOperatorX::sink_impl(RuntimeState* state, Block* in_block, + bool eos) { RETURN_IF_CANCELLED(state); auto& local_state = get_local_state(state); SCOPED_TIMER(local_state.exec_time_counter()); diff --git a/be/src/exec/operator/set_probe_sink_operator.h b/be/src/exec/operator/set_probe_sink_operator.h index bab8fbe536f628..cae10f7672667b 100644 --- a/be/src/exec/operator/set_probe_sink_operator.h +++ b/be/src/exec/operator/set_probe_sink_operator.h @@ -99,7 +99,7 @@ class SetProbeSinkOperatorX final : public DataSinkOperatorX::close(RuntimeState* state, Status exec_s } template -Status SetSinkOperatorX::sink(RuntimeState* state, Block* in_block, bool eos) { +Status SetSinkOperatorX::sink_impl(RuntimeState* state, Block* in_block, bool eos) { RETURN_IF_CANCELLED(state); auto& local_state = get_local_state(state); diff --git a/be/src/exec/operator/set_sink_operator.h b/be/src/exec/operator/set_sink_operator.h index 2ed03b4b0aea14..26b359101984a8 100644 --- a/be/src/exec/operator/set_sink_operator.h +++ b/be/src/exec/operator/set_sink_operator.h @@ -110,7 +110,7 @@ class SetSinkOperatorX final : public DataSinkOperatorX::open(RuntimeState* state) { } template -Status SetSourceOperatorX::get_block(RuntimeState* state, Block* block, bool* eos) { +Status SetSourceOperatorX::get_block_impl(RuntimeState* state, Block* block, + bool* eos) { RETURN_IF_CANCELLED(state); auto& local_state = get_local_state(state); SCOPED_TIMER(local_state.exec_time_counter()); diff --git a/be/src/exec/operator/set_source_operator.h b/be/src/exec/operator/set_source_operator.h index 31e5fc77542458..b0f3d974de4e60 100644 --- a/be/src/exec/operator/set_source_operator.h +++ b/be/src/exec/operator/set_source_operator.h @@ -85,7 +85,7 @@ class SetSourceOperatorX MOCK_REMOVE(final) : public OperatorXrows()); diff --git a/be/src/exec/operator/sort_sink_operator.h b/be/src/exec/operator/sort_sink_operator.h index f31d0debe07745..b0710cb15e2f69 100644 --- a/be/src/exec/operator/sort_sink_operator.h +++ b/be/src/exec/operator/sort_sink_operator.h @@ -77,7 +77,7 @@ class SortSinkOperatorX final : public DataSinkOperatorX { Status init(const TPlanNode& tnode, RuntimeState* state) override; Status prepare(RuntimeState* state) override; - Status sink(RuntimeState* state, Block* in_block, bool eos) override; + Status sink_impl(RuntimeState* state, Block* in_block, bool eos) override; DataDistribution required_data_distribution(RuntimeState* /*state*/) const override { if (_is_analytic_sort) { return _is_colocate && _require_bucket_distribution diff --git a/be/src/exec/operator/sort_source_operator.cpp b/be/src/exec/operator/sort_source_operator.cpp index 18fe81dc4c8b6b..a14fc054300368 100644 --- a/be/src/exec/operator/sort_source_operator.cpp +++ b/be/src/exec/operator/sort_source_operator.cpp @@ -31,7 +31,7 @@ SortSourceOperatorX::SortSourceOperatorX(ObjectPool* pool, const TPlanNode& tnod const DescriptorTbl& descs) : OperatorX(pool, tnode, operator_id, descs) {} -Status SortSourceOperatorX::get_block(RuntimeState* state, Block* block, bool* eos) { +Status SortSourceOperatorX::get_block_impl(RuntimeState* state, Block* block, bool* eos) { auto& local_state = get_local_state(state); SCOPED_TIMER(local_state.exec_time_counter()); SCOPED_PEAK_MEM(&local_state._estimate_memory_usage); diff --git a/be/src/exec/operator/sort_source_operator.h b/be/src/exec/operator/sort_source_operator.h index c2a63b82ccd607..79e59f635e0b72 100644 --- a/be/src/exec/operator/sort_source_operator.h +++ b/be/src/exec/operator/sort_source_operator.h @@ -43,7 +43,7 @@ class SortSourceOperatorX MOCK_REMOVE(final) : public OperatorX #ifdef BE_TEST SortSourceOperatorX() = default; #endif - Status get_block(RuntimeState* state, Block* block, bool* eos) override; + Status get_block_impl(RuntimeState* state, Block* block, bool* eos) override; bool is_source() const override { return true; } diff --git a/be/src/exec/operator/spill_iceberg_table_sink_operator.cpp b/be/src/exec/operator/spill_iceberg_table_sink_operator.cpp index 58a54868799f3b..20b4eea954a599 100644 --- a/be/src/exec/operator/spill_iceberg_table_sink_operator.cpp +++ b/be/src/exec/operator/spill_iceberg_table_sink_operator.cpp @@ -120,7 +120,7 @@ Status SpillIcebergTableSinkOperatorX::prepare(RuntimeState* state) { return VExpr::open(_output_vexpr_ctxs, state); } -Status SpillIcebergTableSinkOperatorX::sink(RuntimeState* state, Block* in_block, bool eos) { +Status SpillIcebergTableSinkOperatorX::sink_impl(RuntimeState* state, Block* in_block, bool eos) { auto& local_state = get_local_state(state); SCOPED_TIMER(local_state.exec_time_counter()); COUNTER_UPDATE(local_state.rows_input_counter(), (int64_t)in_block->rows()); diff --git a/be/src/exec/operator/spill_iceberg_table_sink_operator.h b/be/src/exec/operator/spill_iceberg_table_sink_operator.h index d81e515f05d59a..7e6a037d2f55ed 100644 --- a/be/src/exec/operator/spill_iceberg_table_sink_operator.h +++ b/be/src/exec/operator/spill_iceberg_table_sink_operator.h @@ -64,7 +64,7 @@ class SpillIcebergTableSinkOperatorX final Status prepare(RuntimeState* state) override; - Status sink(RuntimeState* state, Block* in_block, bool eos) override; + Status sink_impl(RuntimeState* state, Block* in_block, bool eos) override; size_t get_reserve_mem_size(RuntimeState* state, bool eos) override; diff --git a/be/src/exec/operator/spill_sort_sink_operator.cpp b/be/src/exec/operator/spill_sort_sink_operator.cpp index c0ccf4657d4df5..b1d83f3a63d8b8 100644 --- a/be/src/exec/operator/spill_sort_sink_operator.cpp +++ b/be/src/exec/operator/spill_sort_sink_operator.cpp @@ -144,7 +144,7 @@ size_t SpillSortSinkOperatorX::revocable_mem_size(RuntimeState* state) const { return mem_size > state->spill_min_revocable_mem() ? mem_size : 0; } -Status SpillSortSinkOperatorX::sink(doris::RuntimeState* state, Block* in_block, bool eos) { +Status SpillSortSinkOperatorX::sink_impl(doris::RuntimeState* state, Block* in_block, bool eos) { auto& local_state = get_local_state(state); SCOPED_TIMER(local_state.exec_time_counter()); COUNTER_UPDATE(local_state.rows_input_counter(), (int64_t)in_block->rows()); diff --git a/be/src/exec/operator/spill_sort_sink_operator.h b/be/src/exec/operator/spill_sort_sink_operator.h index 823bc5e7f04a90..692405bc6f5c91 100644 --- a/be/src/exec/operator/spill_sort_sink_operator.h +++ b/be/src/exec/operator/spill_sort_sink_operator.h @@ -80,7 +80,7 @@ class SpillSortSinkOperatorX final : public DataSinkOperatorXrequired_data_distribution(state); } diff --git a/be/src/exec/operator/spill_sort_source_operator.cpp b/be/src/exec/operator/spill_sort_source_operator.cpp index e516ead73c61fd..f2a58beab1ccc8 100644 --- a/be/src/exec/operator/spill_sort_source_operator.cpp +++ b/be/src/exec/operator/spill_sort_source_operator.cpp @@ -246,7 +246,7 @@ Status SpillSortSourceOperatorX::close(RuntimeState* state) { return _sort_source_operator->close(state); } -Status SpillSortSourceOperatorX::get_block(RuntimeState* state, Block* block, bool* eos) { +Status SpillSortSourceOperatorX::get_block_impl(RuntimeState* state, Block* block, bool* eos) { auto& local_state = get_local_state(state); SCOPED_TIMER(local_state.exec_time_counter()); diff --git a/be/src/exec/operator/spill_sort_source_operator.h b/be/src/exec/operator/spill_sort_source_operator.h index 969582243e0d19..4af4d4954f5444 100644 --- a/be/src/exec/operator/spill_sort_source_operator.h +++ b/be/src/exec/operator/spill_sort_source_operator.h @@ -82,7 +82,7 @@ class SpillSortSourceOperatorX : public OperatorX { Status close(RuntimeState* state) override; - Status get_block(RuntimeState* state, Block* block, bool* eos) override; + Status get_block_impl(RuntimeState* state, Block* block, bool* eos) override; bool is_source() const override { return true; } diff --git a/be/src/exec/operator/tvf_table_sink_operator.h b/be/src/exec/operator/tvf_table_sink_operator.h index 0a47d37ed609c9..8b1c4e98450303 100644 --- a/be/src/exec/operator/tvf_table_sink_operator.h +++ b/be/src/exec/operator/tvf_table_sink_operator.h @@ -65,7 +65,7 @@ class TVFTableSinkOperatorX final : public DataSinkOperatorXrows()); diff --git a/be/src/exec/operator/union_sink_operator.cpp b/be/src/exec/operator/union_sink_operator.cpp index 5484ea166aebb1..b0ed5e80ae1d68 100644 --- a/be/src/exec/operator/union_sink_operator.cpp +++ b/be/src/exec/operator/union_sink_operator.cpp @@ -94,7 +94,7 @@ Status UnionSinkOperatorX::prepare(RuntimeState* state) { return Status::OK(); } -Status UnionSinkOperatorX::sink(RuntimeState* state, Block* in_block, bool eos) { +Status UnionSinkOperatorX::sink_impl(RuntimeState* state, Block* in_block, bool eos) { auto& local_state = get_local_state(state); if (local_state.low_memory_mode()) { set_low_memory_mode(state); diff --git a/be/src/exec/operator/union_sink_operator.h b/be/src/exec/operator/union_sink_operator.h index 79dc6a7688c61e..43d7129d4b1556 100644 --- a/be/src/exec/operator/union_sink_operator.h +++ b/be/src/exec/operator/union_sink_operator.h @@ -102,7 +102,7 @@ class UnionSinkOperatorX MOCK_REMOVE(final) : public DataSinkOperatorX create_shared_state() const override { if (_cur_child_id > 0) { diff --git a/be/src/exec/operator/union_source_operator.cpp b/be/src/exec/operator/union_source_operator.cpp index 0b382a69c729a4..396a2d8595606a 100644 --- a/be/src/exec/operator/union_source_operator.cpp +++ b/be/src/exec/operator/union_source_operator.cpp @@ -101,7 +101,7 @@ std::string UnionSourceLocalState::debug_string(int indentation_level) const { return fmt::to_string(debug_string_buffer); } -Status UnionSourceOperatorX::get_block(RuntimeState* state, Block* block, bool* eos) { +Status UnionSourceOperatorX::get_block_impl(RuntimeState* state, Block* block, bool* eos) { auto& local_state = get_local_state(state); Defer set_eos {[&]() { // the eos check of union operator is complex, need check all logical if you want modify diff --git a/be/src/exec/operator/union_source_operator.h b/be/src/exec/operator/union_source_operator.h index e4858ea53d8fa3..f6f5d88d134319 100644 --- a/be/src/exec/operator/union_source_operator.h +++ b/be/src/exec/operator/union_source_operator.h @@ -69,7 +69,7 @@ class UnionSourceOperatorX MOCK_REMOVE(final) : public OperatorXget_block_after_projects(_state, block, &eos)); - RETURN_IF_ERROR(block->check_type_and_column()); _eos = eos; } @@ -717,7 +716,7 @@ Status PipelineTask::execute(bool* done) { } } }); - RETURN_IF_ERROR(block->check_type_and_column()); + status = _sink->sink(_state, block, _eos); if (_eos) { diff --git a/be/test/exec/operator/agg_operator_test.cpp b/be/test/exec/operator/agg_operator_test.cpp index ae750013c84423..b3178af7a8d8d5 100644 --- a/be/test/exec/operator/agg_operator_test.cpp +++ b/be/test/exec/operator/agg_operator_test.cpp @@ -92,6 +92,23 @@ struct MockAggSourceOperator : public AggSourceOperatorX { std::unique_ptr mock_row_descriptor; }; +class MockDistributionOperator final : public OperatorX { +public: + MockDistributionOperator(ExchangeType exchange_type) : _exchange_type(exchange_type) {} + + Status get_block_impl(RuntimeState* /*state*/, Block* /*block*/, bool* eos) override { + *eos = true; + return Status::OK(); + } + + DataDistribution required_data_distribution(RuntimeState* /*state*/) const override { + return {_exchange_type}; + } + +private: + ExchangeType _exchange_type; +}; + std::shared_ptr create_agg_sink_op(OperatorContext& ctx, bool is_merge, bool without_key) { auto op = std::make_shared(); diff --git a/be/test/exec/operator/analytic_sink_operator_test.cpp b/be/test/exec/operator/analytic_sink_operator_test.cpp index 517c73642ce393..b5e5787e8a6049 100644 --- a/be/test/exec/operator/analytic_sink_operator_test.cpp +++ b/be/test/exec/operator/analytic_sink_operator_test.cpp @@ -41,7 +41,9 @@ class MockAnalyticSinkOperator : public OperatorXBase { return Status::OK(); } - Status get_block(RuntimeState* state, Block* block, bool* eos) override { return Status::OK(); } + Status get_block_impl(RuntimeState* state, Block* block, bool* eos) override { + return Status::OK(); + } Status setup_local_state(RuntimeState* state, LocalStateInfo& info) override { return Status::OK(); } diff --git a/be/test/exec/operator/partition_sort_sink_operator_test.cpp b/be/test/exec/operator/partition_sort_sink_operator_test.cpp index 744ca8e84521f0..36a90bf5a381e1 100644 --- a/be/test/exec/operator/partition_sort_sink_operator_test.cpp +++ b/be/test/exec/operator/partition_sort_sink_operator_test.cpp @@ -37,7 +37,9 @@ class PartitionSortOperatorMockOperator : public OperatorXBase { return Status::OK(); } - Status get_block(RuntimeState* state, Block* block, bool* eos) override { return Status::OK(); } + Status get_block_impl(RuntimeState* state, Block* block, bool* eos) override { + return Status::OK(); + } Status setup_local_state(RuntimeState* state, LocalStateInfo& info) override { return Status::OK(); } diff --git a/be/test/exec/operator/partitioned_aggregation_test_helper.h b/be/test/exec/operator/partitioned_aggregation_test_helper.h index 5ecfe8dd29771f..da0881e84ead6b 100644 --- a/be/test/exec/operator/partitioned_aggregation_test_helper.h +++ b/be/test/exec/operator/partitioned_aggregation_test_helper.h @@ -83,7 +83,9 @@ class MockPartitionedAggSinkOperatorX : public PartitionedAggSinkOperatorX { return Status::OK(); } - Status sink(RuntimeState* state, Block* in_block, bool eos) override { return Status::OK(); } + Status sink_impl(RuntimeState* state, Block* in_block, bool eos) override { + return Status::OK(); + } }; class MockPartitionedAggLocalState : public PartitionedAggLocalState { diff --git a/be/test/exec/operator/partitioned_hash_join_test_helper.h b/be/test/exec/operator/partitioned_hash_join_test_helper.h index 9dcbb7335f560c..ba4ff6612886ed 100644 --- a/be/test/exec/operator/partitioned_hash_join_test_helper.h +++ b/be/test/exec/operator/partitioned_hash_join_test_helper.h @@ -115,7 +115,9 @@ class MockHashJoinBuildOperator : public HashJoinBuildSinkOperatorX { return Status::OK(); } - Status sink(RuntimeState* state, Block* in_block, bool eos) override { return Status::OK(); } + Status sink_impl(RuntimeState* state, Block* in_block, bool eos) override { + return Status::OK(); + } std::string get_memory_usage_debug_str(RuntimeState* state) const override { return "mock"; } }; diff --git a/be/test/exec/operator/query_cache_operator_test.cpp b/be/test/exec/operator/query_cache_operator_test.cpp index a99e9bcb9d9e1a..91c73b99077247 100644 --- a/be/test/exec/operator/query_cache_operator_test.cpp +++ b/be/test/exec/operator/query_cache_operator_test.cpp @@ -36,7 +36,9 @@ class QueryCacheMockChildOperator : public OperatorXBase { return Status::OK(); } - Status get_block(RuntimeState* state, Block* block, bool* eos) override { return Status::OK(); } + Status get_block_impl(RuntimeState* state, Block* block, bool* eos) override { + return Status::OK(); + } Status setup_local_state(RuntimeState* state, LocalStateInfo& info) override { return Status::OK(); } diff --git a/be/test/exec/operator/sort_operator_test.cpp b/be/test/exec/operator/sort_operator_test.cpp index 23fa37e57b01ef..62cb3d448f9092 100644 --- a/be/test/exec/operator/sort_operator_test.cpp +++ b/be/test/exec/operator/sort_operator_test.cpp @@ -36,7 +36,9 @@ class MockOperator : public OperatorXBase { return Status::OK(); } - Status get_block(RuntimeState* state, Block* block, bool* eos) override { return Status::OK(); } + Status get_block_impl(RuntimeState* state, Block* block, bool* eos) override { + return Status::OK(); + } Status setup_local_state(RuntimeState* state, LocalStateInfo& info) override { return Status::OK(); } diff --git a/be/test/exec/operator/spill_sort_test_helper.h b/be/test/exec/operator/spill_sort_test_helper.h index c887212b2fd1f8..81ca44ce2bd33b 100644 --- a/be/test/exec/operator/spill_sort_test_helper.h +++ b/be/test/exec/operator/spill_sort_test_helper.h @@ -53,7 +53,7 @@ class MockSortSourceOperatorX : public SortSourceOperatorX { const DescriptorTbl& descs) : SortSourceOperatorX(pool, tnode, operator_id, descs) {} - Status get_block(RuntimeState* state, Block* block, bool* eos) override { + Status get_block_impl(RuntimeState* state, Block* block, bool* eos) override { std::swap(*block, this->block); *eos = this->eos; return Status::OK(); diff --git a/be/test/exec/operator/streaming_agg_operator_test.cpp b/be/test/exec/operator/streaming_agg_operator_test.cpp index 0421d58bfd256b..d56e88ec5a04a9 100644 --- a/be/test/exec/operator/streaming_agg_operator_test.cpp +++ b/be/test/exec/operator/streaming_agg_operator_test.cpp @@ -65,7 +65,9 @@ class MockStreamingAggOperatorChildOperator : public OperatorXBase { return Status::OK(); } - Status get_block(RuntimeState* state, Block* block, bool* eos) override { return Status::OK(); } + Status get_block_impl(RuntimeState* state, Block* block, bool* eos) override { + return Status::OK(); + } Status setup_local_state(RuntimeState* state, LocalStateInfo& info) override { return Status::OK(); } diff --git a/be/test/exec/operator/table_function_operator_test.cpp b/be/test/exec/operator/table_function_operator_test.cpp index 24217b7eb8d824..1da139707cae47 100644 --- a/be/test/exec/operator/table_function_operator_test.cpp +++ b/be/test/exec/operator/table_function_operator_test.cpp @@ -53,7 +53,9 @@ class MockTableFunctionChildOperator : public OperatorXBase { return Status::OK(); } - Status get_block(RuntimeState* state, Block* block, bool* eos) override { return Status::OK(); } + Status get_block_impl(RuntimeState* state, Block* block, bool* eos) override { + return Status::OK(); + } Status setup_local_state(RuntimeState* state, LocalStateInfo& info) override { return Status::OK(); } diff --git a/be/test/testutil/mock/mock_operators.h b/be/test/testutil/mock/mock_operators.h index bba11eb74737c8..1077a767018fd7 100644 --- a/be/test/testutil/mock/mock_operators.h +++ b/be/test/testutil/mock/mock_operators.h @@ -34,7 +34,7 @@ class MockChildOperator : public OperatorXBase { return Status::OK(); } - Status get_block(RuntimeState* state, Block* block, bool* eos) override { + Status get_block_impl(RuntimeState* state, Block* block, bool* eos) override { block->swap(_block); *eos = _eos; return Status::OK(); @@ -57,7 +57,7 @@ class MockSourceOperator : public MockChildOperator { class MockSinkOperator final : public DataSinkOperatorXBase { public: - Status sink(RuntimeState* state, Block* block, bool eos) override { return Status::OK(); } + Status sink_impl(RuntimeState* state, Block* block, bool eos) override { return Status::OK(); } Status setup_local_state(RuntimeState* state, LocalSinkStateInfo& info) override { return Status::OK(); diff --git a/be/test/util/profile_spec_test.cpp b/be/test/util/profile_spec_test.cpp index 9d2561416120b7..56f7a98f4a45df 100644 --- a/be/test/util/profile_spec_test.cpp +++ b/be/test/util/profile_spec_test.cpp @@ -96,7 +96,7 @@ class ProfileSpecTest : public testing::Test { Status prepare(RuntimeState* state) override { return Status::OK(); } Status open(RuntimeState* state) { return Status::OK(); } Status close(RuntimeState* state) override { return Status::OK(); } - Status get_block(RuntimeState* state, Block* block, bool* eos) override { + Status get_block_impl(RuntimeState* state, Block* block, bool* eos) override { return Status::OK(); } }; @@ -112,7 +112,7 @@ class ProfileSpecTest : public testing::Test { Status prepare(RuntimeState* state) override { return Status::OK(); } Status close(RuntimeState* state) override { return Status::OK(); } - Status get_block(RuntimeState* state, Block* block, bool* eos) override { + Status get_block_impl(RuntimeState* state, Block* block, bool* eos) override { *eos = true; block->swap(_block); return Status::OK(); From ad2a79428f0d718014867602e0d151c719a562b5 Mon Sep 17 00:00:00 2001 From: Mryange Date: Wed, 1 Jul 2026 15:02:59 +0800 Subject: [PATCH 6/6] fix beut --- be/test/core/column/column_array_view_test.cpp | 8 +++++--- be/test/exec/pipeline/pipeline_task_test.cpp | 4 +++- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/be/test/core/column/column_array_view_test.cpp b/be/test/core/column/column_array_view_test.cpp index 57492c958171be..39c9696a8f4fe6 100644 --- a/be/test/core/column/column_array_view_test.cpp +++ b/be/test/core/column/column_array_view_test.cpp @@ -66,7 +66,8 @@ static ColumnPtr build_int32_array_column(const std::vector for (auto v : row_nulls) { outer_null->insert_value(v); } - array_col = ColumnNullable::create(array_col->assume_mutable(), std::move(outer_null)); + array_col = ColumnNullable::create(IColumn::mutate(std::move(array_col)), + std::move(outer_null)); } return array_col; } @@ -101,7 +102,8 @@ static ColumnPtr build_string_array_column(const std::vectorinsert_value(v); } - array_col = ColumnNullable::create(array_col->assume_mutable(), std::move(outer_null)); + array_col = ColumnNullable::create(IColumn::mutate(std::move(array_col)), + std::move(outer_null)); } return array_col; } @@ -287,4 +289,4 @@ TEST(ColumnArrayViewTest, IndexAccess_string) { EXPECT_EQ(arr1.value_at(0).to_string(), "test"); } -} // namespace doris \ No newline at end of file +} // namespace doris diff --git a/be/test/exec/pipeline/pipeline_task_test.cpp b/be/test/exec/pipeline/pipeline_task_test.cpp index 0780498e29f1fd..33b94ebfd8362b 100644 --- a/be/test/exec/pipeline/pipeline_task_test.cpp +++ b/be/test/exec/pipeline/pipeline_task_test.cpp @@ -113,7 +113,9 @@ class CountingBlockableSinkOperator final : public DataSinkOperatorX(op_id, node_id, dest_id), _blockable_checks(blockable_checks) {} - Status sink(RuntimeState* state, Block* in_block, bool eos) override { return Status::OK(); } + Status sink_impl(RuntimeState* state, Block* in_block, bool eos) override { + return Status::OK(); + } bool is_blockable(RuntimeState* state) const override { _blockable_checks->fetch_add(1, std::memory_order_relaxed);