From 29c1d97efd414ca8ed2352e38d0203c9263ccb88 Mon Sep 17 00:00:00 2001
From: Luwei <814383175@qq.com>
Date: Sat, 31 Jan 2026 00:04:51 +0800
Subject: [PATCH] [fix](compaction) Fix crash caused by concurrent compaction
 accessing shared sample_infos (#60376)

Previously, all compaction types (base, cumulative, full) shared a
single sample_infos vector per tablet. When different compaction types
ran concurrently on the same tablet, one compaction could resize
sample_infos while another was accessing it, causing out-of-bounds
access and crash.

Crash stack:

```gdb
*** Aborted at 1769502009 (unix time) try "date -d @1769502009" if you are using GNU date ***
*** Current BE git commitID: 0c75960cd13 ***
*** SIGABRT unknown detail explain (@0x4c61) received by PID 19553 (TID 20096 OR 0x7b7f13caa640) from PID 19553; stack trace: ***
 0# doris::signal::(anonymous namespace)::FailureSignalHandler(int, siginfo_t*, void*) at /mnt/disk3/pipeline/repo/selectdb-core_branch-selectdb-doris-4.0/selectdb-core/be/src/common/signal_handler.h:420
 1# 0x00007F82B398B520 in /lib/x86_64-linux-gnu/libc.so.6
 2# pthread_kill at ./nptl/pthread_kill.c:89
 3# raise at ../sysdeps/posix/raise.c:27
 4# abort at ./stdlib/abort.c:81
 5# 0x000055BA75135461 in /mnt/hdd01/ci/doris-deploy-branch-selectdb-doris-4.0-cloud/be/lib/doris_be
 6# std::vector >::operator[](unsigned long) at /usr/local/ldb-toolchain-v0.26/bin/../lib/gcc/x86_64-pc-linux-gnu/15/include/g++-v15/bits/stl_vector.h:1263
 7# doris::estimate_batch_size(int, std::shared_ptr, long) at /mnt/disk3/pipeline/repo/selectdb-core_branch-selectdb-doris-4.0/selectdb-core/be/src/olap/merger.cpp:416
 8# doris::Merger::vertical_merge_rowsets(std::shared_ptr, doris::ReaderType, doris::TabletSchema const&, std::vector, std::allocator > > const&, doris::RowsetWriter*, unsigned int, long, doris::Merger::Statistics*) at /mnt/disk3/pipeline/repo/selectdb-core_branch-selectdb-doris-4.0/selectdb-core/be/src/olap/merger.cpp:496
 9# doris::Compaction::merge_input_rowsets() at /mnt/disk3/pipeline/repo/selectdb-core_branch-selectdb-doris-4.0/selectdb-core/be/src/olap/compaction.cpp:210
10# doris::CloudCompactionMixin::execute_compact_impl(long) at /mnt/disk3/pipeline/repo/selectdb-core_branch-selectdb-doris-4.0/selectdb-core/be/src/olap/compaction.cpp:1490
11# doris::CloudCompactionMixin::execute_compact() at /mnt/disk3/pipeline/repo/selectdb-core_branch-selectdb-doris-4.0/selectdb-core/be/src/olap/compaction.cpp:1528
12# doris::CloudBaseCompaction::execute_compact() at /mnt/disk3/pipeline/repo/selectdb-core_branch-selectdb-doris-4.0/selectdb-core/be/src/cloud/cloud_base_compaction.cpp:296
13# doris::CloudStorageEngine::_submit_base_compaction_task(std::shared_ptr const&)::$_0::operator()() const at /mnt/disk3/pipeline/repo/selectdb-core_branch-selectdb-doris-4.0/selectdb-core/be/src/cloud/cloud_storage_engine.cpp:806
14# void std::__invoke_impl const&)::$_0&>(std::__invoke_other, doris::CloudStorageEngine::_submit_base_compaction_task(std::shared_ptr const&)::$_0&) at /usr/local/ldb-toolchain-v0.26/bin/../lib/gcc/x86_64-pc-linux-gnu/15/include/g++-v15/bits/invoke.h:63
15# std::enable_if const&)::$_0&>, void>::type std::__invoke_r const&)::$_0&>(doris::CloudStorageEngine::_submit_base_compaction_task(std::shared_ptr const&)::$_0&) at /usr/local/ldb-toolchain-v0.26/bin/../lib/gcc/x86_64-pc-linux-gnu/15/include/g++-v15/bits/invoke.h:119
16# std::_Function_handler const&)::$_0>::_M_invoke(std::_Any_data const&) at /usr/local/ldb-toolchain-v0.26/bin/../lib/gcc/x86_64-pc-linux-gnu/15/include/g++-v15/bits/std_function.h:292
17# std::function::operator()() const at /usr/local/ldb-toolchain-v0.26/bin/../lib/gcc/x86_64-pc-linux-gnu/15/include/g++-v15/bits/std_function.h:593
18# doris::FunctionRunnable::run() at /mnt/disk3/pipeline/repo/selectdb-core_branch-selectdb-doris-4.0/selectdb-core/be/src/util/threadpool.cpp:60
19# doris::ThreadPool::dispatch_thread() at /mnt/disk3/pipeline/repo/selectdb-core_branch-selectdb-doris-4.0/selectdb-core/be/src/util/threadpool.cpp:616
20# void std::__invoke_impl(std::__invoke_memfun_deref, void (doris::ThreadPool::*&)(), doris::ThreadPool*&) at /usr/local/ldb-toolchain-v0.26/bin/../lib/gcc/x86_64-pc-linux-gnu/15/include/g++-v15/bits/invoke.h:76
21# std::__invoke_result::type std::__invoke(void (doris::ThreadPool::*&)(), doris::ThreadPool*&) at /usr/local/ldb-toolchain-v0.26/bin/../lib/gcc/x86_64-pc-linux-gnu/15/include/g++-v15/bits/invoke.h:98
22# void std::_Bind::__call(std::tuple<>&&, std::_Index_tuple<0ul>) at /usr/local/ldb-toolchain-v0.26/bin/../lib/gcc/x86_64-pc-linux-gnu/15/include/g++-v15/functional:515
23# void std::_Bind::operator()<, void>() at /usr/local/ldb-toolchain-v0.26/bin/../lib/gcc/x86_64-pc-linux-gnu/15/include/g++-v15/functional:600
24# void std::__invoke_impl&>(std::__invoke_other, std::_Bind&) at /usr/local/ldb-toolchain-v0.26/bin/../lib/gcc/x86_64-pc-linux-gnu/15/include/g++-v15/bits/invoke.h:63
25# std::enable_if&>, void>::type std::__invoke_r&>(std::_Bind&) at /usr/local/ldb-toolchain-v0.26/bin/../lib/gcc/x86_64-pc-linux-gnu/15/include/g++-v15/bits/invoke.h:119
26# std::_Function_handler >::_M_invoke(std::_Any_data const&) at /usr/local/ldb-toolchain-v0.26/bin/../lib/gcc/x86_64-pc-linux-gnu/15/include/g++-v15/bits/std_function.h:292
27# std::function::operator()() const at /usr/local/ldb-toolchain-v0.26/bin/../lib/gcc/x86_64-pc-linux-gnu/15/include/g++-v15/bits/std_function.h:593
28# doris::Thread::supervise_thread(void*) at /mnt/disk3/pipeline/repo/selectdb-core_branch-selectdb-doris-4.0/selectdb-core/be/src/util/thread.cpp:460
29# asan_thread_start(void*) in /mnt/hdd01/ci/doris-deploy-branch-selectdb-doris-4.0-cloud/be/lib/doris_be
30# start_thread at ./nptl/pthread_create.c:442
31# 0x00007F82B3A6F850 at ../sysdeps/unix/sysv/linux/x86_64/clone3.S:83
```

Root cause:
Base/Full/Cumulative compactions can run concurrently on the same tablet
  They share a single sample_infos vector
  resize() and operator[] are not in the same critical section

Fix:
  Separate sample_infos for each compaction type (cumu/base/full)
  Each type has its own mutex and vector
  Add getter methods to select the correct sample_infos by ReaderType
---
 be/src/olap/base_tablet.h                    |  39 ++-
 be/src/olap/merger.cpp                       |  33 ++-
 be/test/olap/compaction_sample_info_test.cpp | 297 +++++++++++++++++++
 3 files changed, 353 insertions(+), 16 deletions(-)
 create mode 100644 be/test/olap/compaction_sample_info_test.cpp

diff --git a/be/src/olap/base_tablet.h b/be/src/olap/base_tablet.h
index 48b937903f62c8..b5c9e787d97982 100644
--- a/be/src/olap/base_tablet.h
+++ b/be/src/olap/base_tablet.h
@@ -25,6 +25,7 @@
 #include <string>
 
 #include "common/status.h"
+#include "io/io_common.h"
 #include "olap/iterators.h"
 #include "olap/olap_common.h"
 #include "olap/partial_update_info.h"
@@ -397,10 +398,44 @@ class BaseTablet : public std::enable_shared_from_this<BaseTablet> {
     std::atomic<int64_t> compaction_count = 0;
 
     CompactionStage compaction_stage = CompactionStage::NOT_SCHEDULED;
-    std::mutex sample_info_lock;
-    std::vector<CompactionSampleInfo> sample_infos;
+    // Separate sample_infos for each compaction type to avoid race condition
+    // when different types of compaction run concurrently on the same tablet
+    std::mutex cumu_sample_info_lock;
+    std::mutex base_sample_info_lock;
+    std::mutex full_sample_info_lock;
+    std::vector<CompactionSampleInfo> cumu_sample_infos;
+    std::vector<CompactionSampleInfo> base_sample_infos;
+    std::vector<CompactionSampleInfo> full_sample_infos;
     Status last_compaction_status = Status::OK();
 
+    std::mutex& get_sample_info_lock(ReaderType reader_type) {
+        switch (reader_type) {
+        case ReaderType::READER_CUMULATIVE_COMPACTION:
+            return cumu_sample_info_lock;
+        case ReaderType::READER_BASE_COMPACTION:
+            return base_sample_info_lock;
+        case ReaderType::READER_FULL_COMPACTION:
+            return full_sample_info_lock;
+        default:
+            // For other compaction types, use base_sample_info_lock as default
+            return base_sample_info_lock;
+        }
+    }
+
+    std::vector<CompactionSampleInfo>& get_sample_infos(ReaderType reader_type) {
+        switch (reader_type) {
+        case ReaderType::READER_CUMULATIVE_COMPACTION:
+            return cumu_sample_infos;
+        case ReaderType::READER_BASE_COMPACTION:
+            return base_sample_infos;
+        case ReaderType::READER_FULL_COMPACTION:
+            return full_sample_infos;
+        default:
+            // For other compaction types, use base_sample_infos as default
+            return base_sample_infos;
+        }
+    }
+
     // Density ratio for sparse optimization (non_null_cells / total_cells)
     // Value range: [0.0, 1.0], smaller value means more sparse
     // Default 1.0 means no history data, will not enable sparse optimization initially
diff --git a/be/src/olap/merger.cpp b/be/src/olap/merger.cpp
index 4f0b1ce3b71e42..b9c15f01d0481f 100644
--- a/be/src/olap/merger.cpp
+++ b/be/src/olap/merger.cpp
@@ -412,9 +412,12 @@ Status Merger::vertical_compact_one_group(
     return Status::OK();
 }
 
-int64_t estimate_batch_size(int group_index, BaseTabletSPtr tablet, int64_t way_cnt) {
-    std::unique_lock<std::mutex> lock(tablet->sample_info_lock);
-    CompactionSampleInfo info = tablet->sample_infos[group_index];
+int64_t estimate_batch_size(int group_index, BaseTabletSPtr tablet, int64_t way_cnt,
+                            ReaderType reader_type) {
+    auto& sample_info_lock = tablet->get_sample_info_lock(reader_type);
+    auto& sample_infos = tablet->get_sample_infos(reader_type);
+    std::unique_lock<std::mutex> lock(sample_info_lock);
+    CompactionSampleInfo info = sample_infos[group_index];
     if (way_cnt <= 0) {
         LOG(INFO) << "estimate batch size for vertical compaction, tablet id: "
                   << tablet->tablet_id() << " way cnt: " << way_cnt;
@@ -431,12 +434,12 @@ int64_t estimate_batch_size(int group_index, BaseTabletSPtr tablet, int64_t way_
         group_data_size =
                 int64_t((cast_set<double>(info.group_data_size) * (1 - smoothing_factor)) +
                         (cast_set<double>(info.bytes / info.rows) * smoothing_factor));
-        tablet->sample_infos[group_index].group_data_size = group_data_size;
+        sample_infos[group_index].group_data_size = group_data_size;
     } else if (info.group_data_size > 0 && (info.bytes <= 0 || info.rows <= 0)) {
         group_data_size = info.group_data_size;
     } else if (info.group_data_size <= 0 && info.bytes > 0 && info.rows > 0) {
         group_data_size = info.bytes / info.rows;
-        tablet->sample_infos[group_index].group_data_size = group_data_size;
+        sample_infos[group_index].group_data_size = group_data_size;
     } else {
         LOG(INFO) << "estimate batch size for vertical compaction, tablet id: "
                   << tablet->tablet_id() << " group data size: " << info.group_data_size
@@ -450,8 +453,8 @@ int64_t estimate_batch_size(int group_index, BaseTabletSPtr tablet, int64_t way_
         return 4096 - 32;
     }
 
-    tablet->sample_infos[group_index].bytes = 0;
-    tablet->sample_infos[group_index].rows = 0;
+    sample_infos[group_index].bytes = 0;
+    sample_infos[group_index].rows = 0;
 
     int64_t batch_size = block_mem_limit / group_data_size;
     int64_t res = std::max(std::min(batch_size, int64_t(4096 - 32)), int64_t(32L));
@@ -509,9 +512,11 @@ Status Merger::vertical_merge_rowsets(BaseTabletSPtr tablet, ReaderType reader_t
     if (stats_output != nullptr) {
         total_stats.rowid_conversion = stats_output->rowid_conversion;
     }
+    auto& sample_info_lock = tablet->get_sample_info_lock(reader_type);
+    auto& sample_infos = tablet->get_sample_infos(reader_type);
     {
-        std::unique_lock<std::mutex> lock(tablet->sample_info_lock);
-        tablet->sample_infos.resize(column_groups.size());
+        std::unique_lock<std::mutex> lock(sample_info_lock);
+        sample_infos.resize(column_groups.size());
     }
     // compact group one by one
     for (auto i = 0; i < column_groups.size(); ++i) {
@@ -519,7 +524,7 @@ Status Merger::vertical_merge_rowsets(BaseTabletSPtr tablet, ReaderType reader_t
         bool is_key = (i == 0);
         int64_t batch_size = config::compaction_batch_size != -1
                                      ? config::compaction_batch_size
-                                     : estimate_batch_size(i, tablet, merge_way_num);
+                                     : estimate_batch_size(i, tablet, merge_way_num, reader_type);
         CompactionSampleInfo sample_info;
         Merger::Statistics group_stats;
         group_stats.rowid_conversion = total_stats.rowid_conversion;
@@ -529,8 +534,8 @@ Status Merger::vertical_merge_rowsets(BaseTabletSPtr tablet, ReaderType reader_t
                 src_rowset_readers, dst_rowset_writer, max_rows_per_segment, group_stats_ptr,
                 key_group_cluster_key_idxes, batch_size, &sample_info, enable_sparse_optimization);
         {
-            std::unique_lock<std::mutex> lock(tablet->sample_info_lock);
-            tablet->sample_infos[i] = sample_info;
+            std::unique_lock<std::mutex> lock(sample_info_lock);
+            sample_infos[i] = sample_info;
         }
         RETURN_IF_ERROR(st);
         if (stats_output != nullptr) {
@@ -556,9 +561,9 @@ Status Merger::vertical_merge_rowsets(BaseTabletSPtr tablet, ReaderType reader_t
     // density = (total_cells - total_null_count) / total_cells
     // Smaller density means more sparse
     {
-        std::unique_lock<std::mutex> lock(tablet->sample_info_lock);
+        std::unique_lock<std::mutex> lock(sample_info_lock);
         int64_t total_null_count = 0;
-        for (const auto& info : tablet->sample_infos) {
+        for (const auto& info : sample_infos) {
             total_null_count += info.null_count;
         }
         int64_t total_cells = total_rows * tablet_schema.num_columns();
diff --git a/be/test/olap/compaction_sample_info_test.cpp b/be/test/olap/compaction_sample_info_test.cpp
new file mode 100644
index 00000000000000..47b11d3bb9bb75
--- /dev/null
+++ b/be/test/olap/compaction_sample_info_test.cpp
@@ -0,0 +1,297 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest.h>
+
+#include <atomic>
+#include <thread>
+#include <vector>
+
+#include "io/io_common.h"
+#include "olap/cumulative_compaction_policy.h"
+#include "olap/storage_engine.h"
+#include "olap/tablet.h"
+#include "olap/tablet_meta.h"
+#include "util/uid_util.h"
+
+namespace doris {
+
+class CompactionSampleInfoTest : public testing::Test {
+protected:
+    void SetUp() override {
+        _engine = std::make_unique<StorageEngine>(EngineOptions {});
+        TabletMetaSharedPtr tablet_meta;
+        tablet_meta.reset(new TabletMeta(1, 2, 15673, 15674, 4, 5, TTabletSchema(), 6, {{7, 8}},
+                                         UniqueId(9, 10), TTabletType::TABLET_TYPE_DISK,
+                                         TCompressionType::LZ4F));
+        _tablet = std::make_shared<Tablet>(*_engine, tablet_meta, nullptr,
+                                           CUMULATIVE_SIZE_BASED_POLICY);
+    }
+
+    std::unique_ptr<StorageEngine> _engine;
+    std::shared_ptr<Tablet> _tablet;
+};
+
+// Test that get_sample_infos returns the correct reference for each compaction type
+TEST_F(CompactionSampleInfoTest, GetSampleInfosReturnsCorrectReference) {
+    auto& cumu_infos = _tablet->get_sample_infos(ReaderType::READER_CUMULATIVE_COMPACTION);
+    auto& base_infos = _tablet->get_sample_infos(ReaderType::READER_BASE_COMPACTION);
+    auto& full_infos = _tablet->get_sample_infos(ReaderType::READER_FULL_COMPACTION);
+
+    // Verify they are different references
+    EXPECT_NE(&cumu_infos, &base_infos);
+    EXPECT_NE(&base_infos, &full_infos);
+    EXPECT_NE(&cumu_infos, &full_infos);
+
+    // Verify they match the expected member variables
+    EXPECT_EQ(&cumu_infos, &_tablet->cumu_sample_infos);
+    EXPECT_EQ(&base_infos, &_tablet->base_sample_infos);
+    EXPECT_EQ(&full_infos, &_tablet->full_sample_infos);
+}
+
+// Test that get_sample_info_lock returns the correct reference for each compaction type
+TEST_F(CompactionSampleInfoTest, GetSampleInfoLockReturnsCorrectReference) {
+    auto& cumu_lock = _tablet->get_sample_info_lock(ReaderType::READER_CUMULATIVE_COMPACTION);
+    auto& base_lock = _tablet->get_sample_info_lock(ReaderType::READER_BASE_COMPACTION);
+    auto& full_lock = _tablet->get_sample_info_lock(ReaderType::READER_FULL_COMPACTION);
+
+    // Verify they are different references
+    EXPECT_NE(&cumu_lock, &base_lock);
+    EXPECT_NE(&base_lock, &full_lock);
+    EXPECT_NE(&cumu_lock, &full_lock);
+
+    // Verify they match the expected member variables
+    EXPECT_EQ(&cumu_lock, &_tablet->cumu_sample_info_lock);
+    EXPECT_EQ(&base_lock, &_tablet->base_sample_info_lock);
+    EXPECT_EQ(&full_lock, &_tablet->full_sample_info_lock);
+}
+
+// Test that default reader types fall back to base_sample_infos
+TEST_F(CompactionSampleInfoTest, DefaultReaderTypeFallsBackToBase) {
+    auto& query_infos = _tablet->get_sample_infos(ReaderType::READER_QUERY);
+    auto& alter_infos = _tablet->get_sample_infos(ReaderType::READER_ALTER_TABLE);
+    auto& cold_infos = _tablet->get_sample_infos(ReaderType::READER_COLD_DATA_COMPACTION);
+
+    // All should fall back to base_sample_infos
+    EXPECT_EQ(&query_infos, &_tablet->base_sample_infos);
+    EXPECT_EQ(&alter_infos, &_tablet->base_sample_infos);
+    EXPECT_EQ(&cold_infos, &_tablet->base_sample_infos);
+}
+
+// Test that different compaction types can have different sample_infos sizes
+TEST_F(CompactionSampleInfoTest, IndependentSampleInfosSizes) {
+    auto& cumu_infos = _tablet->get_sample_infos(ReaderType::READER_CUMULATIVE_COMPACTION);
+    auto& base_infos = _tablet->get_sample_infos(ReaderType::READER_BASE_COMPACTION);
+    auto& full_infos = _tablet->get_sample_infos(ReaderType::READER_FULL_COMPACTION);
+
+    // Resize each to different sizes
+    cumu_infos.resize(3);
+    base_infos.resize(5);
+    full_infos.resize(7);
+
+    // Verify sizes are independent
+    EXPECT_EQ(cumu_infos.size(), 3);
+    EXPECT_EQ(base_infos.size(), 5);
+    EXPECT_EQ(full_infos.size(), 7);
+
+    // Resize one doesn't affect others
+    cumu_infos.resize(10);
+    EXPECT_EQ(cumu_infos.size(), 10);
+    EXPECT_EQ(base_infos.size(), 5);
+    EXPECT_EQ(full_infos.size(), 7);
+}
+
+// Test concurrent access to different sample_infos doesn't cause issues
+TEST_F(CompactionSampleInfoTest, ConcurrentAccessToDifferentTypes) {
+    std::atomic<bool> has_error {false};
+    std::atomic<int> completed_threads {0};
+    constexpr int kIterations = 1000;
+
+    // Thread simulating cumulative compaction
+    auto cumu_thread = [this, &has_error, &completed_threads]() {
+        try {
+            for (int i = 0; i < kIterations; ++i) {
+                auto& lock =
+                        _tablet->get_sample_info_lock(ReaderType::READER_CUMULATIVE_COMPACTION);
+                auto& infos = _tablet->get_sample_infos(ReaderType::READER_CUMULATIVE_COMPACTION);
+                std::unique_lock<std::mutex> guard(lock);
+                infos.resize((i % 5) + 1);
+                for (size_t j = 0; j < infos.size(); ++j) {
+                    infos[j].group_data_size = i;
+                    infos[j].bytes = i * 100;
+                    infos[j].rows = i * 10;
+                }
+            }
+        } catch (...) {
+            has_error = true;
+        }
+        completed_threads++;
+    };
+
+    // Thread simulating base compaction
+    auto base_thread = [this, &has_error, &completed_threads]() {
+        try {
+            for (int i = 0; i < kIterations; ++i) {
+                auto& lock = _tablet->get_sample_info_lock(ReaderType::READER_BASE_COMPACTION);
+                auto& infos = _tablet->get_sample_infos(ReaderType::READER_BASE_COMPACTION);
+                std::unique_lock<std::mutex> guard(lock);
+                infos.resize((i % 7) + 1);
+                for (size_t j = 0; j < infos.size(); ++j) {
+                    infos[j].group_data_size = i * 2;
+                    infos[j].bytes = i * 200;
+                    infos[j].rows = i * 20;
+                }
+            }
+        } catch (...) {
+            has_error = true;
+        }
+        completed_threads++;
+    };
+
+    // Thread simulating full compaction
+    auto full_thread = [this, &has_error, &completed_threads]() {
+        try {
+            for (int i = 0; i < kIterations; ++i) {
+                auto& lock = _tablet->get_sample_info_lock(ReaderType::READER_FULL_COMPACTION);
+                auto& infos = _tablet->get_sample_infos(ReaderType::READER_FULL_COMPACTION);
+                std::unique_lock<std::mutex> guard(lock);
+                infos.resize((i % 3) + 1);
+                for (size_t j = 0; j < infos.size(); ++j) {
+                    infos[j].group_data_size = i * 3;
+                    infos[j].bytes = i * 300;
+                    infos[j].rows = i * 30;
+                }
+            }
+        } catch (...) {
+            has_error = true;
+        }
+        completed_threads++;
+    };
+
+    // Run all threads concurrently
+    std::thread t1(cumu_thread);
+    std::thread t2(base_thread);
+    std::thread t3(full_thread);
+
+    t1.join();
+    t2.join();
+    t3.join();
+
+    EXPECT_FALSE(has_error);
+    EXPECT_EQ(completed_threads, 3);
+}
+
+// Test that simulates the race condition scenario that caused the original crash
+// This test verifies that concurrent resize and access on different compaction types
+// don't interfere with each other
+TEST_F(CompactionSampleInfoTest, SimulateOriginalCrashScenario) {
+    std::atomic<bool> has_crash {false};
+    std::atomic<int> completed_iterations {0};
+    constexpr int kIterations = 500;
+
+    // This simulates the scenario where:
+    // - Thread A (base compaction) resizes to 5 groups and starts processing
+    // - Thread B (cumu compaction) resizes to 3 groups
+    // - Thread A tries to access group index 4 - with the fix, this should be safe
+    //   because they use different sample_infos
+
+    auto base_compaction_thread = [this, &has_crash, &completed_iterations]() {
+        for (int iter = 0; iter < kIterations && !has_crash; ++iter) {
+            auto& lock = _tablet->get_sample_info_lock(ReaderType::READER_BASE_COMPACTION);
+            auto& infos = _tablet->get_sample_infos(ReaderType::READER_BASE_COMPACTION);
+
+            // Resize to 5 groups
+            {
+                std::unique_lock<std::mutex> guard(lock);
+                infos.resize(5);
+            }
+
+            // Access all 5 groups (simulating estimate_batch_size calls)
+            for (int group_idx = 0; group_idx < 5; ++group_idx) {
+                std::unique_lock<std::mutex> guard(lock);
+                // This access should be safe even if cumu_compaction resizes its own infos
+                if (static_cast<size_t>(group_idx) >= infos.size()) {
+                    has_crash = true;
+                    return;
+                }
+                infos[group_idx].group_data_size = iter;
+            }
+            completed_iterations++;
+        }
+    };
+
+    auto cumu_compaction_thread = [this, &has_crash, &completed_iterations]() {
+        for (int iter = 0; iter < kIterations && !has_crash; ++iter) {
+            auto& lock = _tablet->get_sample_info_lock(ReaderType::READER_CUMULATIVE_COMPACTION);
+            auto& infos = _tablet->get_sample_infos(ReaderType::READER_CUMULATIVE_COMPACTION);
+
+            // Resize to 3 groups (smaller than base compaction's 5)
+            {
+                std::unique_lock<std::mutex> guard(lock);
+                infos.resize(3);
+            }
+
+            // Access all 3 groups
+            for (int group_idx = 0; group_idx < 3; ++group_idx) {
+                std::unique_lock<std::mutex> guard(lock);
+                if (static_cast<size_t>(group_idx) >= infos.size()) {
+                    has_crash = true;
+                    return;
+                }
+                infos[group_idx].group_data_size = iter;
+            }
+            completed_iterations++;
+        }
+    };
+
+    auto full_compaction_thread = [this, &has_crash, &completed_iterations]() {
+        for (int iter = 0; iter < kIterations && !has_crash; ++iter) {
+            auto& lock = _tablet->get_sample_info_lock(ReaderType::READER_FULL_COMPACTION);
+            auto& infos = _tablet->get_sample_infos(ReaderType::READER_FULL_COMPACTION);
+
+            // Resize to 2 groups (even smaller)
+            {
+                std::unique_lock<std::mutex> guard(lock);
+                infos.resize(2);
+            }
+
+            // Access all 2 groups
+            for (int group_idx = 0; group_idx < 2; ++group_idx) {
+                std::unique_lock<std::mutex> guard(lock);
+                if (static_cast<size_t>(group_idx) >= infos.size()) {
+                    has_crash = true;
+                    return;
+                }
+                infos[group_idx].group_data_size = iter;
+            }
+            completed_iterations++;
+        }
+    };
+
+    std::thread t1(base_compaction_thread);
+    std::thread t2(cumu_compaction_thread);
+    std::thread t3(full_compaction_thread);
+
+    t1.join();
+    t2.join();
+    t3.join();
+
+    EXPECT_FALSE(has_crash) << "Detected out-of-bounds access during concurrent compaction";
+    EXPECT_EQ(completed_iterations, kIterations * 3);
+}
+
+} // namespace doris