From 701ea48747134b78a00f29c023c01d1d7f79572a Mon Sep 17 00:00:00 2001 From: Mryange Date: Wed, 1 Jul 2026 15:17:19 +0800 Subject: [PATCH] upd --- be/src/core/block/block.cpp | 10 +- be/src/core/column/column.cpp | 37 ++--- be/src/core/column/column.h | 33 +++-- be/src/core/column/column_array.h | 17 +-- be/src/core/column/column_const.h | 6 +- be/src/core/column/column_map.h | 20 +-- be/src/core/column/column_nullable.h | 15 +- be/src/core/column/column_struct.cpp | 10 +- be/src/core/column/column_struct.h | 3 +- be/src/core/column/column_variant.cpp | 31 +++- be/src/core/column/column_variant.h | 3 +- .../column/column_mutate_subcolumns_test.cpp | 139 ++++++++++++++++++ 12 files changed, 245 insertions(+), 79 deletions(-) create mode 100644 be/test/core/column/column_mutate_subcolumns_test.cpp diff --git a/be/src/core/block/block.cpp b/be/src/core/block/block.cpp index 4dd6530f55de97..88006f7b2fd52f 100644 --- a/be/src/core/block/block.cpp +++ b/be/src/core/block/block.cpp @@ -89,17 +89,13 @@ bool is_recursively_exclusive(const IColumn& column) { } bool exclusive = true; - IColumn::ColumnCallback callback = [&](IColumn::WrappedPtr& subcolumn) { + IColumn::ColumnCallback callback = [&](const IColumn& subcolumn) { if (!exclusive) { return; } - const ColumnPtr& subcolumn_ptr = const_cast(subcolumn); - DCHECK(subcolumn_ptr); - exclusive = is_recursively_exclusive(*subcolumn_ptr); + exclusive = is_recursively_exclusive(subcolumn); }; - // `for_each_subcolumn` only exposes a mutable callback type. This callback - // only reads the wrapped pointers and never calls the non-const accessors. - const_cast(column).for_each_subcolumn(callback); + column.for_each_subcolumn(callback); return exclusive; } diff --git a/be/src/core/column/column.cpp b/be/src/core/column/column.cpp index 3fea47f93887ec..68048b10053b2b 100644 --- a/be/src/core/column/column.cpp +++ b/be/src/core/column/column.cpp @@ -32,12 +32,11 @@ std::string IColumn::dump_structure() const { std::stringstream res; res << get_name() << "(size = " << size(); - ColumnCallback callback = [&](ColumnPtr& subcolumn) { - res << ", " << subcolumn->dump_structure(); + ColumnCallback callback = [&](const IColumn& subcolumn) { + res << ", " << subcolumn.dump_structure(); }; - // simply read using for_each_subcolumn without modification; const_cast can be used. - const_cast(this)->for_each_subcolumn(callback); + for_each_subcolumn(callback); res << ")"; return res.str(); @@ -45,11 +44,10 @@ std::string IColumn::dump_structure() const { int IColumn::count_const_column() const { int count = is_column_const(*this) ? 1 : 0; - ColumnCallback callback = [&](ColumnPtr& subcolumn) { - count += subcolumn->count_const_column(); + ColumnCallback callback = [&](const IColumn& subcolumn) { + count += subcolumn.count_const_column(); }; - // simply read using for_each_subcolumn without modification; const_cast can be used. - const_cast(this)->for_each_subcolumn(callback); + for_each_subcolumn(callback); return count; } @@ -95,13 +93,12 @@ bool IColumn::column_boolean_check() const { }; bool is_valid = check_boolean_is_zero_or_one(*this); - ColumnCallback callback = [&](ColumnPtr& subcolumn) { - if (!subcolumn->column_boolean_check()) { + ColumnCallback callback = [&](const IColumn& subcolumn) { + if (!subcolumn.column_boolean_check()) { is_valid = false; } }; - // simply read using for_each_subcolumn without modification; const_cast can be used. - const_cast(this)->for_each_subcolumn(callback); + for_each_subcolumn(callback); return is_valid; } @@ -122,13 +119,12 @@ bool IColumn::null_map_check() const { }; bool is_valid = check_null_map_is_zero_or_one(*this); - ColumnCallback callback = [&](ColumnPtr& subcolumn) { - if (!subcolumn->null_map_check()) { + ColumnCallback callback = [&](const IColumn& subcolumn) { + if (!subcolumn.null_map_check()) { is_valid = false; } }; - // simply read using for_each_subcolumn without modification; const_cast can be used. - const_cast(this)->for_each_subcolumn(callback); + for_each_subcolumn(callback); return is_valid; } @@ -231,15 +227,14 @@ bool is_column_const(const IColumn& column) { } void IColumn::check_const_only_in_top_level() const { - ColumnCallback throw_if_const = [&](WrappedPtr& column) { - const ColumnPtr& col = const_cast(column); - if (is_column_const(*col)) { + ColumnCallback throw_if_const = [&](const IColumn& column) { + if (is_column_const(column)) { throw doris::Exception(ErrorCode::INTERNAL_ERROR, "const column is not allowed to be nested, but got {}", - col->get_name()); + column.get_name()); } }; - const_cast(this)->for_each_subcolumn(throw_if_const); + for_each_subcolumn(throw_if_const); } } // namespace doris diff --git a/be/src/core/column/column.h b/be/src/core/column/column.h index ecd0245d718c18..38112a1f9fe8e1 100644 --- a/be/src/core/column/column.h +++ b/be/src/core/column/column.h @@ -563,10 +563,27 @@ class IColumn : public COW { /// If the column contains subcolumns (such as Array, Nullable, etc), do callback on them. /// Shallow: doesn't do recursive calls; don't do call for itself. - using ColumnCallback = std::function; - using ImutableColumnCallback = std::function; - virtual void for_each_subcolumn(ColumnCallback) {} + using ColumnCallback = std::function; + virtual void for_each_subcolumn(ColumnCallback) const {} +protected: + virtual void mutate_subcolumns() {} + + static void mutate_subcolumn(WrappedPtr& subcolumn) { + static_cast(subcolumn) = + std::move(*static_cast(subcolumn)).mutate(); + } + + template + static void mutate_subcolumn(typename ColumnType::WrappedPtr& subcolumn) { + auto mutated = std::move(*static_cast(subcolumn)).mutate(); + auto typed_mutated = ColumnType::cast_to_column_mutptr( + assert_cast(mutated.get())); + mutated = nullptr; + static_cast(subcolumn) = std::move(typed_mutated); + } + +public: /// Columns have equal structure. /// If true - you can use "compare_at", "insert_from", etc. methods. virtual bool structure_equals(const IColumn&) const { @@ -580,10 +597,7 @@ class IColumn : public COW { // exclusive nodes are reused through the COW fast path. MutablePtr mutate() const&& { MutablePtr res = shallow_mutate(); - res->for_each_subcolumn([](WrappedPtr& subcolumn) { - static_cast(subcolumn) = - std::move(*static_cast(subcolumn)).mutate(); - }); + res->mutate_subcolumns(); return res; } @@ -594,10 +608,7 @@ class IColumn : public COW { static MutablePtr mutate(Ptr ptr) { MutablePtr res = ptr->shallow_mutate(); /// Now use_count is 2. ptr.reset(); /// Reset use_count to 1. - res->for_each_subcolumn([](WrappedPtr& subcolumn) { - static_cast(subcolumn) = - std::move(*static_cast(subcolumn)).mutate(); - }); + res->mutate_subcolumns(); return res; } diff --git a/be/src/core/column/column_array.h b/be/src/core/column/column_array.h index 9c4e37ed79415a..645b1662d7b238 100644 --- a/be/src/core/column/column_array.h +++ b/be/src/core/column/column_array.h @@ -37,7 +37,6 @@ #include "core/field.h" #include "core/string_ref.h" #include "core/types.h" -#include "util/defer_op.h" class SipHash; @@ -216,14 +215,14 @@ class ColumnArray final : public COWHelper { return get_offsets()[i] - get_offsets()[i - 1]; } - void for_each_subcolumn(ColumnCallback callback) override { - IColumn::WrappedPtr offsets_column(std::move(static_cast(offsets))); - Defer defer([&] { - static_cast(offsets) = - cast_to_column(static_cast(offsets_column)); - }); - callback(offsets_column); - callback(data); + void mutate_subcolumns() override { + mutate_subcolumn(offsets); + mutate_subcolumn(data); + } + + void for_each_subcolumn(ColumnCallback callback) const override { + callback(*static_cast(offsets)); + callback(*static_cast(data)); } ColumnPtr convert_column_if_overflow() override { diff --git a/be/src/core/column/column_const.h b/be/src/core/column/column_const.h index 238d0f971b5a03..834bfc2a08c899 100644 --- a/be/src/core/column/column_const.h +++ b/be/src/core/column/column_const.h @@ -261,7 +261,11 @@ class ColumnConst final : public COWHelper { } } - void for_each_subcolumn(ColumnCallback callback) override { callback(data); } + void mutate_subcolumns() override { mutate_subcolumn(data); } + + void for_each_subcolumn(ColumnCallback callback) const override { + callback(*static_cast(data)); + } bool structure_equals(const IColumn& rhs) const override { if (const auto* rhs_concrete = check_and_get_column(&rhs)) { diff --git a/be/src/core/column/column_map.h b/be/src/core/column/column_map.h index f5bb29d8be5e7e..0781a2cb503569 100644 --- a/be/src/core/column/column_map.h +++ b/be/src/core/column/column_map.h @@ -43,7 +43,6 @@ #include "core/string_ref.h" #include "core/types.h" #include "exec/common/sip_hash.h" -#include "util/defer_op.h" class SipHash; @@ -74,15 +73,16 @@ class ColumnMap final : public COWHelper { std::string get_name() const override; - void for_each_subcolumn(ColumnCallback callback) override { - IColumn::WrappedPtr offsets(std::move(static_cast(offsets_column))); - Defer defer([&] { - static_cast(offsets_column) = - cast_to_column(static_cast(offsets)); - }); - callback(keys_column); - callback(values_column); - callback(offsets); + void mutate_subcolumns() override { + mutate_subcolumn(keys_column); + mutate_subcolumn(values_column); + mutate_subcolumn(offsets_column); + } + + void for_each_subcolumn(ColumnCallback callback) const override { + callback(*static_cast(keys_column)); + callback(*static_cast(values_column)); + callback(*static_cast(offsets_column)); } void sanity_check() const override { diff --git a/be/src/core/column/column_nullable.h b/be/src/core/column/column_nullable.h index 563d5e7011ddbf..27679d7bed38c7 100644 --- a/be/src/core/column/column_nullable.h +++ b/be/src/core/column/column_nullable.h @@ -32,7 +32,6 @@ #include "core/typeid_cast.h" #include "core/types.h" #include "storage/olap_common.h" -#include "util/defer_op.h" class SipHash; @@ -250,14 +249,14 @@ class ColumnNullable final : public COWHelper { return get_ptr(); } - void for_each_subcolumn(ColumnCallback callback) override { - callback(_nested_column); + void mutate_subcolumns() override { + mutate_subcolumn(_nested_column); + mutate_subcolumn(_null_map); + } - IColumn::WrappedPtr null_map(std::move(static_cast(_null_map))); - Defer defer([&] { - _null_map = cast_to_column(static_cast(null_map)); - }); - callback(null_map); + void for_each_subcolumn(ColumnCallback callback) const override { + callback(*static_cast(_nested_column)); + callback(*static_cast(_null_map)); } bool structure_equals(const IColumn& rhs) const override { diff --git a/be/src/core/column/column_struct.cpp b/be/src/core/column/column_struct.cpp index e2a90432c56da4..fb15785df8bb2d 100644 --- a/be/src/core/column/column_struct.cpp +++ b/be/src/core/column/column_struct.cpp @@ -379,9 +379,15 @@ bool ColumnStruct::has_enough_capacity(const IColumn& src) const { return true; } -void ColumnStruct::for_each_subcolumn(ColumnCallback callback) { +void ColumnStruct::mutate_subcolumns() { for (auto& column : columns) { - callback(column); + mutate_subcolumn(column); + } +} + +void ColumnStruct::for_each_subcolumn(ColumnCallback callback) const { + for (const auto& column : columns) { + callback(*static_cast(column)); } } diff --git a/be/src/core/column/column_struct.h b/be/src/core/column/column_struct.h index e1f81950ddc0d3..83affe7296558f 100644 --- a/be/src/core/column/column_struct.h +++ b/be/src/core/column/column_struct.h @@ -155,7 +155,8 @@ class ColumnStruct final : public COWHelper { size_t byte_size() const override; size_t allocated_bytes() const override; bool has_enough_capacity(const IColumn& src) const override; - void for_each_subcolumn(ColumnCallback callback) override; + void mutate_subcolumns() override; + void for_each_subcolumn(ColumnCallback callback) const override; bool structure_equals(const IColumn& rhs) const override; size_t tuple_size() const { return columns.size(); } diff --git a/be/src/core/column/column_variant.cpp b/be/src/core/column/column_variant.cpp index 723d52d46b9c09..ec34c619cdc14b 100644 --- a/be/src/core/column/column_variant.cpp +++ b/be/src/core/column/column_variant.cpp @@ -68,7 +68,6 @@ #include "exprs/aggregate/aggregate_function.h" #include "exprs/json_functions.h" #include "storage/olap_common.h" -#include "util/defer_op.h" #include "util/json/path_in_data.h" #include "util/jsonb_document.h" #include "util/jsonb_document_cast.h" @@ -826,19 +825,28 @@ size_t ColumnVariant::allocated_bytes() const { return res; } -void ColumnVariant::for_each_subcolumn(ColumnCallback callback) { +void ColumnVariant::mutate_subcolumns() { for (auto& entry : subcolumns) { for (auto& part : entry->data.data) { - callback(part); + mutate_subcolumn(part); } } - callback(serialized_sparse_column); - callback(serialized_doc_value_column); - // callback may be filter, so the row count may be changed + mutate_subcolumn(serialized_sparse_column); + mutate_subcolumn(serialized_doc_value_column); num_rows = serialized_sparse_column->size(); ENABLE_CHECK_CONSISTENCY(this); } +void ColumnVariant::for_each_subcolumn(ColumnCallback callback) const { + for (const auto& entry : subcolumns) { + for (const auto& part : entry->data.data) { + callback(*static_cast(part)); + } + } + callback(*static_cast(serialized_sparse_column)); + callback(*static_cast(serialized_doc_value_column)); +} + void ColumnVariant::insert_from(const IColumn& src, size_t n) { const auto* src_v = assert_cast(&src); ENABLE_CHECK_CONSISTENCY(src_v); @@ -2375,7 +2383,7 @@ size_t ColumnVariant::filter(const Filter& filter) { for (auto& subcolumn : subcolumns) { subcolumn->data.num_rows = count; } - for_each_subcolumn([&](auto& part) { + auto filter_part = [&](IColumn::WrappedPtr& part) { if (part->size() != count) { if (part->is_exclusive()) { const auto result_size = part->filter(filter); @@ -2390,7 +2398,14 @@ size_t ColumnVariant::filter(const Filter& filter) { part = part->filter(filter, count); } } - }); + }; + for (auto& entry : subcolumns) { + for (auto& part : entry->data.data) { + filter_part(part); + } + } + filter_part(serialized_sparse_column); + filter_part(serialized_doc_value_column); } num_rows = count; ENABLE_CHECK_CONSISTENCY(this); diff --git a/be/src/core/column/column_variant.h b/be/src/core/column/column_variant.h index 1d5c4eed1378a2..a650555f49ec26 100644 --- a/be/src/core/column/column_variant.h +++ b/be/src/core/column/column_variant.h @@ -469,7 +469,8 @@ class ColumnVariant final : public COWHelper { bool has_enough_capacity(const IColumn& src) const override { return false; } - void for_each_subcolumn(ColumnCallback callback) override; + void mutate_subcolumns() override; + void for_each_subcolumn(ColumnCallback callback) const override; // Do nothing, call try_insert instead void insert(const Field& field) override { try_insert(field); } diff --git a/be/test/core/column/column_mutate_subcolumns_test.cpp b/be/test/core/column/column_mutate_subcolumns_test.cpp new file mode 100644 index 00000000000000..576684267c5b3f --- /dev/null +++ b/be/test/core/column/column_mutate_subcolumns_test.cpp @@ -0,0 +1,139 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include + +#include "core/column/column_array.h" +#include "core/column/column_const.h" +#include "core/column/column_map.h" +#include "core/column/column_nullable.h" +#include "core/column/column_struct.h" +#include "core/column/column_vector.h" + +namespace doris { +namespace { + +ColumnInt64::MutablePtr create_int64_column(int64_t value) { + auto column = ColumnInt64::create(); + column->insert_value(value); + return column; +} + +ColumnUInt8::MutablePtr create_uint8_column(uint8_t value) { + auto column = ColumnUInt8::create(); + column->insert_value(value); + return column; +} + +ColumnArray::ColumnOffsets::MutablePtr create_single_element_offsets() { + auto offsets = ColumnArray::ColumnOffsets::create(); + offsets->insert_value(1); + return offsets; +} + +} // namespace + +TEST(ColumnMutateSubcolumnsTest, NullableKeepsExclusiveSubcolumns) { + ColumnPtr nullable = ColumnNullable::create(create_int64_column(10), create_uint8_column(0)); + const auto& nullable_ref = assert_cast(*nullable); + const auto* nested_raw = nullable_ref.get_nested_column_ptr().get(); + const auto* null_map_raw = nullable_ref.get_null_map_column_ptr().get(); + + auto mutated = IColumn::mutate(std::move(nullable)); + const auto& mutated_nullable = assert_cast(*mutated); + + EXPECT_EQ(mutated_nullable.get_nested_column_ptr().get(), nested_raw); + EXPECT_EQ(mutated_nullable.get_null_map_column_ptr().get(), null_map_raw); +} + +TEST(ColumnMutateSubcolumnsTest, NullableDetachesSharedSubcolumns) { + ColumnPtr nested = create_int64_column(10); + ColumnPtr nested_alias = nested; + ColumnPtr null_map = create_uint8_column(0); + ColumnPtr null_map_alias = null_map; + + ColumnPtr nullable = ColumnNullable::create(nested, null_map); + auto mutated = IColumn::mutate(std::move(nullable)); + auto& mutated_nullable = assert_cast(*mutated); + + EXPECT_NE(mutated_nullable.get_nested_column_ptr().get(), nested_alias.get()); + EXPECT_NE(mutated_nullable.get_null_map_column_ptr().get(), null_map_alias.get()); + EXPECT_EQ(mutated_nullable.get_null_map_data()[0], 0); + + mutated_nullable.get_null_map_data()[0] = 1; + const auto& original_null_map = assert_cast(*null_map_alias); + EXPECT_EQ(original_null_map.get_data()[0], 0); +} + +TEST(ColumnMutateSubcolumnsTest, ArrayKeepsExclusiveSubcolumns) { + ColumnPtr array = ColumnArray::create(create_int64_column(10), create_single_element_offsets()); + const auto& array_ref = assert_cast(*array); + const auto* data_raw = array_ref.get_data_ptr().get(); + const auto* offsets_raw = array_ref.get_offsets_ptr().get(); + + auto mutated = IColumn::mutate(std::move(array)); + const auto& mutated_array = assert_cast(*mutated); + + EXPECT_EQ(mutated_array.get_data_ptr().get(), data_raw); + EXPECT_EQ(mutated_array.get_offsets_ptr().get(), offsets_raw); +} + +TEST(ColumnMutateSubcolumnsTest, MapKeepsExclusiveSubcolumns) { + ColumnPtr map = ColumnMap::create(create_int64_column(1), create_int64_column(10), + create_single_element_offsets()); + const auto& map_ref = assert_cast(*map); + const auto* keys_raw = map_ref.get_keys_ptr().get(); + const auto* values_raw = map_ref.get_values_ptr().get(); + const auto* offsets_raw = map_ref.get_offsets_ptr().get(); + + auto mutated = IColumn::mutate(std::move(map)); + const auto& mutated_map = assert_cast(*mutated); + + EXPECT_EQ(mutated_map.get_keys_ptr().get(), keys_raw); + EXPECT_EQ(mutated_map.get_values_ptr().get(), values_raw); + EXPECT_EQ(mutated_map.get_offsets_ptr().get(), offsets_raw); +} + +TEST(ColumnMutateSubcolumnsTest, ConstKeepsExclusiveSubcolumn) { + ColumnPtr column_const = ColumnConst::create(create_int64_column(10), 3); + const auto& const_ref = assert_cast(*column_const); + const auto* data_raw = const_ref.get_data_column_ptr().get(); + + auto mutated = IColumn::mutate(std::move(column_const)); + const auto& mutated_const = assert_cast(*mutated); + + EXPECT_EQ(mutated_const.get_data_column_ptr().get(), data_raw); +} + +TEST(ColumnMutateSubcolumnsTest, StructKeepsExclusiveSubcolumns) { + MutableColumns columns; + columns.push_back(create_int64_column(10)); + columns.push_back(create_uint8_column(1)); + + ColumnPtr column_struct = ColumnStruct::create(std::move(columns)); + const auto& struct_ref = assert_cast(*column_struct); + const auto* first_raw = struct_ref.get_column_ptr(0).get(); + const auto* second_raw = struct_ref.get_column_ptr(1).get(); + + auto mutated = IColumn::mutate(std::move(column_struct)); + const auto& mutated_struct = assert_cast(*mutated); + + EXPECT_EQ(mutated_struct.get_column_ptr(0).get(), first_raw); + EXPECT_EQ(mutated_struct.get_column_ptr(1).get(), second_raw); +} + +} // namespace doris