From 29c1d97efd414ca8ed2352e38d0203c9263ccb88 Mon Sep 17 00:00:00 2001 From: Luwei <814383175@qq.com> Date: Sat, 31 Jan 2026 00:04:51 +0800 Subject: [PATCH] [fix](compaction) Fix crash caused by concurrent compaction accessing shared sample_infos (#60376) Previously, all compaction types (base, cumulative, full) shared a single sample_infos vector per tablet. When different compaction types ran concurrently on the same tablet, one compaction could resize sample_infos while another was accessing it, causing out-of-bounds access and crash. Crash stack: ```gdb *** Aborted at 1769502009 (unix time) try "date -d @1769502009" if you are using GNU date *** *** Current BE git commitID: 0c75960cd13 *** *** SIGABRT unknown detail explain (@0x4c61) received by PID 19553 (TID 20096 OR 0x7b7f13caa640) from PID 19553; stack trace: *** 0# doris::signal::(anonymous namespace)::FailureSignalHandler(int, siginfo_t*, void*) at /mnt/disk3/pipeline/repo/selectdb-core_branch-selectdb-doris-4.0/selectdb-core/be/src/common/signal_handler.h:420 1# 0x00007F82B398B520 in /lib/x86_64-linux-gnu/libc.so.6 2# pthread_kill at ./nptl/pthread_kill.c:89 3# raise at ../sysdeps/posix/raise.c:27 4# abort at ./stdlib/abort.c:81 5# 0x000055BA75135461 in /mnt/hdd01/ci/doris-deploy-branch-selectdb-doris-4.0-cloud/be/lib/doris_be 6# std::vector >::operator[](unsigned long) at /usr/local/ldb-toolchain-v0.26/bin/../lib/gcc/x86_64-pc-linux-gnu/15/include/g++-v15/bits/stl_vector.h:1263 7# doris::estimate_batch_size(int, std::shared_ptr, long) at /mnt/disk3/pipeline/repo/selectdb-core_branch-selectdb-doris-4.0/selectdb-core/be/src/olap/merger.cpp:416 8# doris::Merger::vertical_merge_rowsets(std::shared_ptr, doris::ReaderType, doris::TabletSchema const&, std::vector, std::allocator > > const&, doris::RowsetWriter*, unsigned int, long, doris::Merger::Statistics*) at /mnt/disk3/pipeline/repo/selectdb-core_branch-selectdb-doris-4.0/selectdb-core/be/src/olap/merger.cpp:496 9# doris::Compaction::merge_input_rowsets() at /mnt/disk3/pipeline/repo/selectdb-core_branch-selectdb-doris-4.0/selectdb-core/be/src/olap/compaction.cpp:210 10# doris::CloudCompactionMixin::execute_compact_impl(long) at /mnt/disk3/pipeline/repo/selectdb-core_branch-selectdb-doris-4.0/selectdb-core/be/src/olap/compaction.cpp:1490 11# doris::CloudCompactionMixin::execute_compact() at /mnt/disk3/pipeline/repo/selectdb-core_branch-selectdb-doris-4.0/selectdb-core/be/src/olap/compaction.cpp:1528 12# doris::CloudBaseCompaction::execute_compact() at /mnt/disk3/pipeline/repo/selectdb-core_branch-selectdb-doris-4.0/selectdb-core/be/src/cloud/cloud_base_compaction.cpp:296 13# doris::CloudStorageEngine::_submit_base_compaction_task(std::shared_ptr const&)::$_0::operator()() const at /mnt/disk3/pipeline/repo/selectdb-core_branch-selectdb-doris-4.0/selectdb-core/be/src/cloud/cloud_storage_engine.cpp:806 14# void std::__invoke_impl const&)::$_0&>(std::__invoke_other, doris::CloudStorageEngine::_submit_base_compaction_task(std::shared_ptr const&)::$_0&) at /usr/local/ldb-toolchain-v0.26/bin/../lib/gcc/x86_64-pc-linux-gnu/15/include/g++-v15/bits/invoke.h:63 15# std::enable_if const&)::$_0&>, void>::type std::__invoke_r const&)::$_0&>(doris::CloudStorageEngine::_submit_base_compaction_task(std::shared_ptr const&)::$_0&) at /usr/local/ldb-toolchain-v0.26/bin/../lib/gcc/x86_64-pc-linux-gnu/15/include/g++-v15/bits/invoke.h:119 16# std::_Function_handler const&)::$_0>::_M_invoke(std::_Any_data const&) at /usr/local/ldb-toolchain-v0.26/bin/../lib/gcc/x86_64-pc-linux-gnu/15/include/g++-v15/bits/std_function.h:292 17# std::function::operator()() const at /usr/local/ldb-toolchain-v0.26/bin/../lib/gcc/x86_64-pc-linux-gnu/15/include/g++-v15/bits/std_function.h:593 18# doris::FunctionRunnable::run() at /mnt/disk3/pipeline/repo/selectdb-core_branch-selectdb-doris-4.0/selectdb-core/be/src/util/threadpool.cpp:60 19# doris::ThreadPool::dispatch_thread() at /mnt/disk3/pipeline/repo/selectdb-core_branch-selectdb-doris-4.0/selectdb-core/be/src/util/threadpool.cpp:616 20# void std::__invoke_impl(std::__invoke_memfun_deref, void (doris::ThreadPool::*&)(), doris::ThreadPool*&) at /usr/local/ldb-toolchain-v0.26/bin/../lib/gcc/x86_64-pc-linux-gnu/15/include/g++-v15/bits/invoke.h:76 21# std::__invoke_result::type std::__invoke(void (doris::ThreadPool::*&)(), doris::ThreadPool*&) at /usr/local/ldb-toolchain-v0.26/bin/../lib/gcc/x86_64-pc-linux-gnu/15/include/g++-v15/bits/invoke.h:98 22# void std::_Bind::__call(std::tuple<>&&, std::_Index_tuple<0ul>) at /usr/local/ldb-toolchain-v0.26/bin/../lib/gcc/x86_64-pc-linux-gnu/15/include/g++-v15/functional:515 23# void std::_Bind::operator()<, void>() at /usr/local/ldb-toolchain-v0.26/bin/../lib/gcc/x86_64-pc-linux-gnu/15/include/g++-v15/functional:600 24# void std::__invoke_impl&>(std::__invoke_other, std::_Bind&) at /usr/local/ldb-toolchain-v0.26/bin/../lib/gcc/x86_64-pc-linux-gnu/15/include/g++-v15/bits/invoke.h:63 25# std::enable_if&>, void>::type std::__invoke_r&>(std::_Bind&) at /usr/local/ldb-toolchain-v0.26/bin/../lib/gcc/x86_64-pc-linux-gnu/15/include/g++-v15/bits/invoke.h:119 26# std::_Function_handler >::_M_invoke(std::_Any_data const&) at /usr/local/ldb-toolchain-v0.26/bin/../lib/gcc/x86_64-pc-linux-gnu/15/include/g++-v15/bits/std_function.h:292 27# std::function::operator()() const at /usr/local/ldb-toolchain-v0.26/bin/../lib/gcc/x86_64-pc-linux-gnu/15/include/g++-v15/bits/std_function.h:593 28# doris::Thread::supervise_thread(void*) at /mnt/disk3/pipeline/repo/selectdb-core_branch-selectdb-doris-4.0/selectdb-core/be/src/util/thread.cpp:460 29# asan_thread_start(void*) in /mnt/hdd01/ci/doris-deploy-branch-selectdb-doris-4.0-cloud/be/lib/doris_be 30# start_thread at ./nptl/pthread_create.c:442 31# 0x00007F82B3A6F850 at ../sysdeps/unix/sysv/linux/x86_64/clone3.S:83 ``` Root cause: Base/Full/Cumulative compactions can run concurrently on the same tablet They share a single sample_infos vector resize() and operator[] are not in the same critical section Fix: Separate sample_infos for each compaction type (cumu/base/full) Each type has its own mutex and vector Add getter methods to select the correct sample_infos by ReaderType --- be/src/olap/base_tablet.h | 39 ++- be/src/olap/merger.cpp | 33 ++- be/test/olap/compaction_sample_info_test.cpp | 297 +++++++++++++++++++ 3 files changed, 353 insertions(+), 16 deletions(-) create mode 100644 be/test/olap/compaction_sample_info_test.cpp diff --git a/be/src/olap/base_tablet.h b/be/src/olap/base_tablet.h index 48b937903f62c8..b5c9e787d97982 100644 --- a/be/src/olap/base_tablet.h +++ b/be/src/olap/base_tablet.h @@ -25,6 +25,7 @@ #include #include "common/status.h" +#include "io/io_common.h" #include "olap/iterators.h" #include "olap/olap_common.h" #include "olap/partial_update_info.h" @@ -397,10 +398,44 @@ class BaseTablet : public std::enable_shared_from_this { std::atomic compaction_count = 0; CompactionStage compaction_stage = CompactionStage::NOT_SCHEDULED; - std::mutex sample_info_lock; - std::vector sample_infos; + // Separate sample_infos for each compaction type to avoid race condition + // when different types of compaction run concurrently on the same tablet + std::mutex cumu_sample_info_lock; + std::mutex base_sample_info_lock; + std::mutex full_sample_info_lock; + std::vector cumu_sample_infos; + std::vector base_sample_infos; + std::vector full_sample_infos; Status last_compaction_status = Status::OK(); + std::mutex& get_sample_info_lock(ReaderType reader_type) { + switch (reader_type) { + case ReaderType::READER_CUMULATIVE_COMPACTION: + return cumu_sample_info_lock; + case ReaderType::READER_BASE_COMPACTION: + return base_sample_info_lock; + case ReaderType::READER_FULL_COMPACTION: + return full_sample_info_lock; + default: + // For other compaction types, use base_sample_info_lock as default + return base_sample_info_lock; + } + } + + std::vector& get_sample_infos(ReaderType reader_type) { + switch (reader_type) { + case ReaderType::READER_CUMULATIVE_COMPACTION: + return cumu_sample_infos; + case ReaderType::READER_BASE_COMPACTION: + return base_sample_infos; + case ReaderType::READER_FULL_COMPACTION: + return full_sample_infos; + default: + // For other compaction types, use base_sample_infos as default + return base_sample_infos; + } + } + // Density ratio for sparse optimization (non_null_cells / total_cells) // Value range: [0.0, 1.0], smaller value means more sparse // Default 1.0 means no history data, will not enable sparse optimization initially diff --git a/be/src/olap/merger.cpp b/be/src/olap/merger.cpp index 4f0b1ce3b71e42..b9c15f01d0481f 100644 --- a/be/src/olap/merger.cpp +++ b/be/src/olap/merger.cpp @@ -412,9 +412,12 @@ Status Merger::vertical_compact_one_group( return Status::OK(); } -int64_t estimate_batch_size(int group_index, BaseTabletSPtr tablet, int64_t way_cnt) { - std::unique_lock lock(tablet->sample_info_lock); - CompactionSampleInfo info = tablet->sample_infos[group_index]; +int64_t estimate_batch_size(int group_index, BaseTabletSPtr tablet, int64_t way_cnt, + ReaderType reader_type) { + auto& sample_info_lock = tablet->get_sample_info_lock(reader_type); + auto& sample_infos = tablet->get_sample_infos(reader_type); + std::unique_lock lock(sample_info_lock); + CompactionSampleInfo info = sample_infos[group_index]; if (way_cnt <= 0) { LOG(INFO) << "estimate batch size for vertical compaction, tablet id: " << tablet->tablet_id() << " way cnt: " << way_cnt; @@ -431,12 +434,12 @@ int64_t estimate_batch_size(int group_index, BaseTabletSPtr tablet, int64_t way_ group_data_size = int64_t((cast_set(info.group_data_size) * (1 - smoothing_factor)) + (cast_set(info.bytes / info.rows) * smoothing_factor)); - tablet->sample_infos[group_index].group_data_size = group_data_size; + sample_infos[group_index].group_data_size = group_data_size; } else if (info.group_data_size > 0 && (info.bytes <= 0 || info.rows <= 0)) { group_data_size = info.group_data_size; } else if (info.group_data_size <= 0 && info.bytes > 0 && info.rows > 0) { group_data_size = info.bytes / info.rows; - tablet->sample_infos[group_index].group_data_size = group_data_size; + sample_infos[group_index].group_data_size = group_data_size; } else { LOG(INFO) << "estimate batch size for vertical compaction, tablet id: " << tablet->tablet_id() << " group data size: " << info.group_data_size @@ -450,8 +453,8 @@ int64_t estimate_batch_size(int group_index, BaseTabletSPtr tablet, int64_t way_ return 4096 - 32; } - tablet->sample_infos[group_index].bytes = 0; - tablet->sample_infos[group_index].rows = 0; + sample_infos[group_index].bytes = 0; + sample_infos[group_index].rows = 0; int64_t batch_size = block_mem_limit / group_data_size; int64_t res = std::max(std::min(batch_size, int64_t(4096 - 32)), int64_t(32L)); @@ -509,9 +512,11 @@ Status Merger::vertical_merge_rowsets(BaseTabletSPtr tablet, ReaderType reader_t if (stats_output != nullptr) { total_stats.rowid_conversion = stats_output->rowid_conversion; } + auto& sample_info_lock = tablet->get_sample_info_lock(reader_type); + auto& sample_infos = tablet->get_sample_infos(reader_type); { - std::unique_lock lock(tablet->sample_info_lock); - tablet->sample_infos.resize(column_groups.size()); + std::unique_lock lock(sample_info_lock); + sample_infos.resize(column_groups.size()); } // compact group one by one for (auto i = 0; i < column_groups.size(); ++i) { @@ -519,7 +524,7 @@ Status Merger::vertical_merge_rowsets(BaseTabletSPtr tablet, ReaderType reader_t bool is_key = (i == 0); int64_t batch_size = config::compaction_batch_size != -1 ? config::compaction_batch_size - : estimate_batch_size(i, tablet, merge_way_num); + : estimate_batch_size(i, tablet, merge_way_num, reader_type); CompactionSampleInfo sample_info; Merger::Statistics group_stats; group_stats.rowid_conversion = total_stats.rowid_conversion; @@ -529,8 +534,8 @@ Status Merger::vertical_merge_rowsets(BaseTabletSPtr tablet, ReaderType reader_t src_rowset_readers, dst_rowset_writer, max_rows_per_segment, group_stats_ptr, key_group_cluster_key_idxes, batch_size, &sample_info, enable_sparse_optimization); { - std::unique_lock lock(tablet->sample_info_lock); - tablet->sample_infos[i] = sample_info; + std::unique_lock lock(sample_info_lock); + sample_infos[i] = sample_info; } RETURN_IF_ERROR(st); if (stats_output != nullptr) { @@ -556,9 +561,9 @@ Status Merger::vertical_merge_rowsets(BaseTabletSPtr tablet, ReaderType reader_t // density = (total_cells - total_null_count) / total_cells // Smaller density means more sparse { - std::unique_lock lock(tablet->sample_info_lock); + std::unique_lock lock(sample_info_lock); int64_t total_null_count = 0; - for (const auto& info : tablet->sample_infos) { + for (const auto& info : sample_infos) { total_null_count += info.null_count; } int64_t total_cells = total_rows * tablet_schema.num_columns(); diff --git a/be/test/olap/compaction_sample_info_test.cpp b/be/test/olap/compaction_sample_info_test.cpp new file mode 100644 index 00000000000000..47b11d3bb9bb75 --- /dev/null +++ b/be/test/olap/compaction_sample_info_test.cpp @@ -0,0 +1,297 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include + +#include +#include +#include + +#include "io/io_common.h" +#include "olap/cumulative_compaction_policy.h" +#include "olap/storage_engine.h" +#include "olap/tablet.h" +#include "olap/tablet_meta.h" +#include "util/uid_util.h" + +namespace doris { + +class CompactionSampleInfoTest : public testing::Test { +protected: + void SetUp() override { + _engine = std::make_unique(EngineOptions {}); + TabletMetaSharedPtr tablet_meta; + tablet_meta.reset(new TabletMeta(1, 2, 15673, 15674, 4, 5, TTabletSchema(), 6, {{7, 8}}, + UniqueId(9, 10), TTabletType::TABLET_TYPE_DISK, + TCompressionType::LZ4F)); + _tablet = std::make_shared(*_engine, tablet_meta, nullptr, + CUMULATIVE_SIZE_BASED_POLICY); + } + + std::unique_ptr _engine; + std::shared_ptr _tablet; +}; + +// Test that get_sample_infos returns the correct reference for each compaction type +TEST_F(CompactionSampleInfoTest, GetSampleInfosReturnsCorrectReference) { + auto& cumu_infos = _tablet->get_sample_infos(ReaderType::READER_CUMULATIVE_COMPACTION); + auto& base_infos = _tablet->get_sample_infos(ReaderType::READER_BASE_COMPACTION); + auto& full_infos = _tablet->get_sample_infos(ReaderType::READER_FULL_COMPACTION); + + // Verify they are different references + EXPECT_NE(&cumu_infos, &base_infos); + EXPECT_NE(&base_infos, &full_infos); + EXPECT_NE(&cumu_infos, &full_infos); + + // Verify they match the expected member variables + EXPECT_EQ(&cumu_infos, &_tablet->cumu_sample_infos); + EXPECT_EQ(&base_infos, &_tablet->base_sample_infos); + EXPECT_EQ(&full_infos, &_tablet->full_sample_infos); +} + +// Test that get_sample_info_lock returns the correct reference for each compaction type +TEST_F(CompactionSampleInfoTest, GetSampleInfoLockReturnsCorrectReference) { + auto& cumu_lock = _tablet->get_sample_info_lock(ReaderType::READER_CUMULATIVE_COMPACTION); + auto& base_lock = _tablet->get_sample_info_lock(ReaderType::READER_BASE_COMPACTION); + auto& full_lock = _tablet->get_sample_info_lock(ReaderType::READER_FULL_COMPACTION); + + // Verify they are different references + EXPECT_NE(&cumu_lock, &base_lock); + EXPECT_NE(&base_lock, &full_lock); + EXPECT_NE(&cumu_lock, &full_lock); + + // Verify they match the expected member variables + EXPECT_EQ(&cumu_lock, &_tablet->cumu_sample_info_lock); + EXPECT_EQ(&base_lock, &_tablet->base_sample_info_lock); + EXPECT_EQ(&full_lock, &_tablet->full_sample_info_lock); +} + +// Test that default reader types fall back to base_sample_infos +TEST_F(CompactionSampleInfoTest, DefaultReaderTypeFallsBackToBase) { + auto& query_infos = _tablet->get_sample_infos(ReaderType::READER_QUERY); + auto& alter_infos = _tablet->get_sample_infos(ReaderType::READER_ALTER_TABLE); + auto& cold_infos = _tablet->get_sample_infos(ReaderType::READER_COLD_DATA_COMPACTION); + + // All should fall back to base_sample_infos + EXPECT_EQ(&query_infos, &_tablet->base_sample_infos); + EXPECT_EQ(&alter_infos, &_tablet->base_sample_infos); + EXPECT_EQ(&cold_infos, &_tablet->base_sample_infos); +} + +// Test that different compaction types can have different sample_infos sizes +TEST_F(CompactionSampleInfoTest, IndependentSampleInfosSizes) { + auto& cumu_infos = _tablet->get_sample_infos(ReaderType::READER_CUMULATIVE_COMPACTION); + auto& base_infos = _tablet->get_sample_infos(ReaderType::READER_BASE_COMPACTION); + auto& full_infos = _tablet->get_sample_infos(ReaderType::READER_FULL_COMPACTION); + + // Resize each to different sizes + cumu_infos.resize(3); + base_infos.resize(5); + full_infos.resize(7); + + // Verify sizes are independent + EXPECT_EQ(cumu_infos.size(), 3); + EXPECT_EQ(base_infos.size(), 5); + EXPECT_EQ(full_infos.size(), 7); + + // Resize one doesn't affect others + cumu_infos.resize(10); + EXPECT_EQ(cumu_infos.size(), 10); + EXPECT_EQ(base_infos.size(), 5); + EXPECT_EQ(full_infos.size(), 7); +} + +// Test concurrent access to different sample_infos doesn't cause issues +TEST_F(CompactionSampleInfoTest, ConcurrentAccessToDifferentTypes) { + std::atomic has_error {false}; + std::atomic completed_threads {0}; + constexpr int kIterations = 1000; + + // Thread simulating cumulative compaction + auto cumu_thread = [this, &has_error, &completed_threads]() { + try { + for (int i = 0; i < kIterations; ++i) { + auto& lock = + _tablet->get_sample_info_lock(ReaderType::READER_CUMULATIVE_COMPACTION); + auto& infos = _tablet->get_sample_infos(ReaderType::READER_CUMULATIVE_COMPACTION); + std::unique_lock guard(lock); + infos.resize((i % 5) + 1); + for (size_t j = 0; j < infos.size(); ++j) { + infos[j].group_data_size = i; + infos[j].bytes = i * 100; + infos[j].rows = i * 10; + } + } + } catch (...) { + has_error = true; + } + completed_threads++; + }; + + // Thread simulating base compaction + auto base_thread = [this, &has_error, &completed_threads]() { + try { + for (int i = 0; i < kIterations; ++i) { + auto& lock = _tablet->get_sample_info_lock(ReaderType::READER_BASE_COMPACTION); + auto& infos = _tablet->get_sample_infos(ReaderType::READER_BASE_COMPACTION); + std::unique_lock guard(lock); + infos.resize((i % 7) + 1); + for (size_t j = 0; j < infos.size(); ++j) { + infos[j].group_data_size = i * 2; + infos[j].bytes = i * 200; + infos[j].rows = i * 20; + } + } + } catch (...) { + has_error = true; + } + completed_threads++; + }; + + // Thread simulating full compaction + auto full_thread = [this, &has_error, &completed_threads]() { + try { + for (int i = 0; i < kIterations; ++i) { + auto& lock = _tablet->get_sample_info_lock(ReaderType::READER_FULL_COMPACTION); + auto& infos = _tablet->get_sample_infos(ReaderType::READER_FULL_COMPACTION); + std::unique_lock guard(lock); + infos.resize((i % 3) + 1); + for (size_t j = 0; j < infos.size(); ++j) { + infos[j].group_data_size = i * 3; + infos[j].bytes = i * 300; + infos[j].rows = i * 30; + } + } + } catch (...) { + has_error = true; + } + completed_threads++; + }; + + // Run all threads concurrently + std::thread t1(cumu_thread); + std::thread t2(base_thread); + std::thread t3(full_thread); + + t1.join(); + t2.join(); + t3.join(); + + EXPECT_FALSE(has_error); + EXPECT_EQ(completed_threads, 3); +} + +// Test that simulates the race condition scenario that caused the original crash +// This test verifies that concurrent resize and access on different compaction types +// don't interfere with each other +TEST_F(CompactionSampleInfoTest, SimulateOriginalCrashScenario) { + std::atomic has_crash {false}; + std::atomic completed_iterations {0}; + constexpr int kIterations = 500; + + // This simulates the scenario where: + // - Thread A (base compaction) resizes to 5 groups and starts processing + // - Thread B (cumu compaction) resizes to 3 groups + // - Thread A tries to access group index 4 - with the fix, this should be safe + // because they use different sample_infos + + auto base_compaction_thread = [this, &has_crash, &completed_iterations]() { + for (int iter = 0; iter < kIterations && !has_crash; ++iter) { + auto& lock = _tablet->get_sample_info_lock(ReaderType::READER_BASE_COMPACTION); + auto& infos = _tablet->get_sample_infos(ReaderType::READER_BASE_COMPACTION); + + // Resize to 5 groups + { + std::unique_lock guard(lock); + infos.resize(5); + } + + // Access all 5 groups (simulating estimate_batch_size calls) + for (int group_idx = 0; group_idx < 5; ++group_idx) { + std::unique_lock guard(lock); + // This access should be safe even if cumu_compaction resizes its own infos + if (static_cast(group_idx) >= infos.size()) { + has_crash = true; + return; + } + infos[group_idx].group_data_size = iter; + } + completed_iterations++; + } + }; + + auto cumu_compaction_thread = [this, &has_crash, &completed_iterations]() { + for (int iter = 0; iter < kIterations && !has_crash; ++iter) { + auto& lock = _tablet->get_sample_info_lock(ReaderType::READER_CUMULATIVE_COMPACTION); + auto& infos = _tablet->get_sample_infos(ReaderType::READER_CUMULATIVE_COMPACTION); + + // Resize to 3 groups (smaller than base compaction's 5) + { + std::unique_lock guard(lock); + infos.resize(3); + } + + // Access all 3 groups + for (int group_idx = 0; group_idx < 3; ++group_idx) { + std::unique_lock guard(lock); + if (static_cast(group_idx) >= infos.size()) { + has_crash = true; + return; + } + infos[group_idx].group_data_size = iter; + } + completed_iterations++; + } + }; + + auto full_compaction_thread = [this, &has_crash, &completed_iterations]() { + for (int iter = 0; iter < kIterations && !has_crash; ++iter) { + auto& lock = _tablet->get_sample_info_lock(ReaderType::READER_FULL_COMPACTION); + auto& infos = _tablet->get_sample_infos(ReaderType::READER_FULL_COMPACTION); + + // Resize to 2 groups (even smaller) + { + std::unique_lock guard(lock); + infos.resize(2); + } + + // Access all 2 groups + for (int group_idx = 0; group_idx < 2; ++group_idx) { + std::unique_lock guard(lock); + if (static_cast(group_idx) >= infos.size()) { + has_crash = true; + return; + } + infos[group_idx].group_data_size = iter; + } + completed_iterations++; + } + }; + + std::thread t1(base_compaction_thread); + std::thread t2(cumu_compaction_thread); + std::thread t3(full_compaction_thread); + + t1.join(); + t2.join(); + t3.join(); + + EXPECT_FALSE(has_crash) << "Detected out-of-bounds access during concurrent compaction"; + EXPECT_EQ(completed_iterations, kIterations * 3); +} + +} // namespace doris