From 8f859c6dd1eb0607eb535d4a9b4cf5c1d0e5a4ea Mon Sep 17 00:00:00 2001 From: Luwei <814383175@qq.com> Date: Tue, 17 Mar 2026 20:05:41 +0800 Subject: [PATCH] [fix](backup) reject upload snapshots on broken storage path (#61251) Backup upload reuses snapshot paths returned by MAKE_SNAPSHOT. When a data dir is later marked as broken, the stale snapshot directory can still remain on that disk and be picked up by upload. In that case the upload task may continue into file checksum and remote upload logic with a snapshot source that is no longer safe to read. This change adds a broken-storage-path validation step to SnapshotLoader local source path checking for upload. The check canonicalizes the snapshot path, matches it to its DataDir, and rejects the source early when the owning DataDir is offline or the path is listed in broken_storage_path. That turns the broken-disk case into a normal task error instead of letting upload continue on an invalid local snapshot source. The unit tests cover both the direct broken-path case and a canonicalized symlink path to ensure the validation cannot be bypassed by path indirection. --- be/src/io/fs/local_file_system.cpp | 14 +++++++ be/src/io/fs/local_file_system.h | 2 + be/src/runtime/snapshot_loader.cpp | 28 ++++++++++++++ be/src/runtime/snapshot_loader.h | 2 + be/test/io/fs/local_file_system_test.cpp | 9 +++++ be/test/runtime/snapshot_loader_test.cpp | 47 ++++++++++++++++++++++++ 6 files changed, 102 insertions(+) diff --git a/be/src/io/fs/local_file_system.cpp b/be/src/io/fs/local_file_system.cpp index 358ba7d5ff1ee4..6dc366a0bfd0ed 100644 --- a/be/src/io/fs/local_file_system.cpp +++ b/be/src/io/fs/local_file_system.cpp @@ -412,6 +412,20 @@ bool LocalFileSystem::contain_path(const Path& parent_, const Path& sub_) { return true; } +bool LocalFileSystem::equal_or_sub_path(const Path& parent, const Path& child) { + auto parent_path = parent.lexically_normal(); + auto child_path = child.lexically_normal(); + auto parent_it = parent_path.begin(); + auto child_it = child_path.begin(); + for (; parent_it != parent_path.end() && child_it != child_path.end(); + ++parent_it, ++child_it) { + if (*parent_it != *child_it) { + return false; + } + } + return parent_it == parent_path.end(); +} + const std::shared_ptr& global_local_filesystem() { static std::shared_ptr local_fs(new LocalFileSystem()); return local_fs; diff --git a/be/src/io/fs/local_file_system.h b/be/src/io/fs/local_file_system.h index 4540df47c16d81..c1ffbd22949252 100644 --- a/be/src/io/fs/local_file_system.h +++ b/be/src/io/fs/local_file_system.h @@ -59,6 +59,8 @@ class LocalFileSystem final : public FileSystem { Status copy_path(const Path& src, const Path& dest); // return true if parent path contain sub path static bool contain_path(const Path& parent, const Path& sub); + // return true if `parent` equals `child` or contains it as a descendant path. + static bool equal_or_sub_path(const Path& parent, const Path& child); // delete dir or file Status delete_directory_or_file(const Path& path); // change the file permission of the given path diff --git a/be/src/runtime/snapshot_loader.cpp b/be/src/runtime/snapshot_loader.cpp index d6fb88a6552488..16a5d8bf6ce6c1 100644 --- a/be/src/runtime/snapshot_loader.cpp +++ b/be/src/runtime/snapshot_loader.cpp @@ -1332,11 +1332,39 @@ Status SnapshotLoader::_check_local_snapshot_paths( LOG(WARNING) << ss.str(); return Status::RuntimeError(ss.str()); } + if (check_src) { + RETURN_IF_ERROR(_check_snapshot_path_on_broken_storage(path)); + } } LOG(INFO) << "all local snapshot paths are existing. num: " << src_to_dest_path.size(); return Status::OK(); } +Status SnapshotLoader::_check_snapshot_path_on_broken_storage(const std::string& path) { + std::string canonical_path; + RETURN_IF_ERROR(io::global_local_filesystem()->canonicalize(path, &canonical_path)); + + auto broken_paths = _engine.get_broken_paths(); + for (auto* store : _engine.get_stores(true)) { + std::string canonical_store_path; + RETURN_IF_ERROR( + io::global_local_filesystem()->canonicalize(store->path(), &canonical_store_path)); + if (!io::LocalFileSystem::equal_or_sub_path(canonical_store_path, canonical_path)) { + continue; + } + if (!store->is_used() || broken_paths.contains(store->path()) || + broken_paths.contains(canonical_store_path)) { + return Status::IOError( + "snapshot path is on broken storage path, snapshot_path={}, " + "storage_path={}", + canonical_path, canonical_store_path); + } + break; + } + + return Status::OK(); +} + Status SnapshotLoader::_get_existing_files_from_local(const std::string& local_path, std::vector* local_files) { bool exists = true; diff --git a/be/src/runtime/snapshot_loader.h b/be/src/runtime/snapshot_loader.h index b023c0ef24596c..222a4b6ba8766c 100644 --- a/be/src/runtime/snapshot_loader.h +++ b/be/src/runtime/snapshot_loader.h @@ -127,6 +127,8 @@ class SnapshotLoader : public BaseSnapshotLoader { Status _replace_tablet_id(const std::string& file_name, int64_t tablet_id, std::string* new_file_name); + Status _check_snapshot_path_on_broken_storage(const std::string& path); + Status _check_local_snapshot_paths(const std::map& src_to_dest_path, bool check_src); diff --git a/be/test/io/fs/local_file_system_test.cpp b/be/test/io/fs/local_file_system_test.cpp index c930ba72eabf86..450c3daea8f2f6 100644 --- a/be/test/io/fs/local_file_system_test.cpp +++ b/be/test/io/fs/local_file_system_test.cpp @@ -467,4 +467,13 @@ TEST_F(LocalFileSystemTest, TestConvertToAbsPath) { st = doris::io::LocalFileSystem::convert_to_abs_path("hdfs:/abc", abs_path); ASSERT_TRUE(!st.ok()); } + +TEST_F(LocalFileSystemTest, TestEqualOrSubPath) { + EXPECT_TRUE(io::LocalFileSystem::equal_or_sub_path("/data/store", "/data/store")); + EXPECT_TRUE(io::LocalFileSystem::equal_or_sub_path("/data/store", "/data/store/snapshot")); + EXPECT_TRUE(io::LocalFileSystem::equal_or_sub_path("/data/store/./snapshot", + "/data/store/snapshot/dir/../dir")); + EXPECT_FALSE(io::LocalFileSystem::equal_or_sub_path("/data/store1", "/data/store11/snapshot")); + EXPECT_FALSE(io::LocalFileSystem::equal_or_sub_path("/data/store/snapshot", "/data/store")); +} } // namespace doris diff --git a/be/test/runtime/snapshot_loader_test.cpp b/be/test/runtime/snapshot_loader_test.cpp index 9b6e64a7d31612..8995e6af0e7243 100644 --- a/be/test/runtime/snapshot_loader_test.cpp +++ b/be/test/runtime/snapshot_loader_test.cpp @@ -326,6 +326,53 @@ TEST_F(SnapshotLoaderTest, NormalCase) { EXPECT_EQ(10005, tablet_id); } +TEST_F(SnapshotLoaderTest, RejectBrokenSnapshotPath) { + SnapshotLoader loader(*engine_ref, ExecEnv::GetInstance(), 1L, 2L); + auto snapshot_path = + fmt::format("{}/snapshot/20260311120000.0.86400/10001/12345", storage_root_path); + std::filesystem::remove_all(snapshot_path); + std::filesystem::create_directories(snapshot_path); + + std::map src_to_dest; + src_to_dest[snapshot_path] = "unused"; + + auto st = loader._check_local_snapshot_paths(src_to_dest, true); + ASSERT_TRUE(st.ok()) << st; + + engine_ref->add_broken_path(storage_root_path); + st = loader._check_local_snapshot_paths(src_to_dest, true); + EXPECT_FALSE(st.ok()); + EXPECT_TRUE(st.is()) << st; + EXPECT_NE(st.to_string().find("broken storage path"), std::string::npos) << st; + + EXPECT_TRUE(engine_ref->remove_broken_path(storage_root_path)); + std::filesystem::remove_all(snapshot_path); +} + +TEST_F(SnapshotLoaderTest, RejectBrokenSnapshotPathAfterCanonicalize) { + SnapshotLoader loader(*engine_ref, ExecEnv::GetInstance(), 1L, 2L); + auto snapshot_path = + fmt::format("{}/snapshot/20260311120001.0.86400/10001/12345", storage_root_path); + auto symlink_path = fmt::format("{}/snapshot-link", storage_root_path); + std::filesystem::remove_all(snapshot_path); + std::filesystem::remove(symlink_path); + std::filesystem::create_directories(snapshot_path); + std::filesystem::create_directory_symlink(snapshot_path, symlink_path); + + std::map src_to_dest; + src_to_dest[symlink_path] = "unused"; + + engine_ref->add_broken_path(storage_root_path); + auto st = loader._check_local_snapshot_paths(src_to_dest, true); + EXPECT_FALSE(st.ok()); + EXPECT_TRUE(st.is()) << st; + EXPECT_NE(st.to_string().find("broken storage path"), std::string::npos) << st; + + EXPECT_TRUE(engine_ref->remove_broken_path(storage_root_path)); + std::filesystem::remove(symlink_path); + std::filesystem::remove_all(snapshot_path); +} + TEST_F(SnapshotLoaderTest, DirMoveTaskIsIdempotent) { // 1. create a tablet int64_t tablet_id = 111;