diff --git a/be/src/io/fs/local_file_system.cpp b/be/src/io/fs/local_file_system.cpp index 358ba7d5ff1ee4..6dc366a0bfd0ed 100644 --- a/be/src/io/fs/local_file_system.cpp +++ b/be/src/io/fs/local_file_system.cpp @@ -412,6 +412,20 @@ bool LocalFileSystem::contain_path(const Path& parent_, const Path& sub_) { return true; } +bool LocalFileSystem::equal_or_sub_path(const Path& parent, const Path& child) { + auto parent_path = parent.lexically_normal(); + auto child_path = child.lexically_normal(); + auto parent_it = parent_path.begin(); + auto child_it = child_path.begin(); + for (; parent_it != parent_path.end() && child_it != child_path.end(); + ++parent_it, ++child_it) { + if (*parent_it != *child_it) { + return false; + } + } + return parent_it == parent_path.end(); +} + const std::shared_ptr& global_local_filesystem() { static std::shared_ptr local_fs(new LocalFileSystem()); return local_fs; diff --git a/be/src/io/fs/local_file_system.h b/be/src/io/fs/local_file_system.h index 4540df47c16d81..c1ffbd22949252 100644 --- a/be/src/io/fs/local_file_system.h +++ b/be/src/io/fs/local_file_system.h @@ -59,6 +59,8 @@ class LocalFileSystem final : public FileSystem { Status copy_path(const Path& src, const Path& dest); // return true if parent path contain sub path static bool contain_path(const Path& parent, const Path& sub); + // return true if `parent` equals `child` or contains it as a descendant path. + static bool equal_or_sub_path(const Path& parent, const Path& child); // delete dir or file Status delete_directory_or_file(const Path& path); // change the file permission of the given path diff --git a/be/src/runtime/snapshot_loader.cpp b/be/src/runtime/snapshot_loader.cpp index d6fb88a6552488..16a5d8bf6ce6c1 100644 --- a/be/src/runtime/snapshot_loader.cpp +++ b/be/src/runtime/snapshot_loader.cpp @@ -1332,11 +1332,39 @@ Status SnapshotLoader::_check_local_snapshot_paths( LOG(WARNING) << ss.str(); return Status::RuntimeError(ss.str()); } + if (check_src) { + RETURN_IF_ERROR(_check_snapshot_path_on_broken_storage(path)); + } } LOG(INFO) << "all local snapshot paths are existing. num: " << src_to_dest_path.size(); return Status::OK(); } +Status SnapshotLoader::_check_snapshot_path_on_broken_storage(const std::string& path) { + std::string canonical_path; + RETURN_IF_ERROR(io::global_local_filesystem()->canonicalize(path, &canonical_path)); + + auto broken_paths = _engine.get_broken_paths(); + for (auto* store : _engine.get_stores(true)) { + std::string canonical_store_path; + RETURN_IF_ERROR( + io::global_local_filesystem()->canonicalize(store->path(), &canonical_store_path)); + if (!io::LocalFileSystem::equal_or_sub_path(canonical_store_path, canonical_path)) { + continue; + } + if (!store->is_used() || broken_paths.contains(store->path()) || + broken_paths.contains(canonical_store_path)) { + return Status::IOError( + "snapshot path is on broken storage path, snapshot_path={}, " + "storage_path={}", + canonical_path, canonical_store_path); + } + break; + } + + return Status::OK(); +} + Status SnapshotLoader::_get_existing_files_from_local(const std::string& local_path, std::vector* local_files) { bool exists = true; diff --git a/be/src/runtime/snapshot_loader.h b/be/src/runtime/snapshot_loader.h index b023c0ef24596c..222a4b6ba8766c 100644 --- a/be/src/runtime/snapshot_loader.h +++ b/be/src/runtime/snapshot_loader.h @@ -127,6 +127,8 @@ class SnapshotLoader : public BaseSnapshotLoader { Status _replace_tablet_id(const std::string& file_name, int64_t tablet_id, std::string* new_file_name); + Status _check_snapshot_path_on_broken_storage(const std::string& path); + Status _check_local_snapshot_paths(const std::map& src_to_dest_path, bool check_src); diff --git a/be/test/io/fs/local_file_system_test.cpp b/be/test/io/fs/local_file_system_test.cpp index c930ba72eabf86..450c3daea8f2f6 100644 --- a/be/test/io/fs/local_file_system_test.cpp +++ b/be/test/io/fs/local_file_system_test.cpp @@ -467,4 +467,13 @@ TEST_F(LocalFileSystemTest, TestConvertToAbsPath) { st = doris::io::LocalFileSystem::convert_to_abs_path("hdfs:/abc", abs_path); ASSERT_TRUE(!st.ok()); } + +TEST_F(LocalFileSystemTest, TestEqualOrSubPath) { + EXPECT_TRUE(io::LocalFileSystem::equal_or_sub_path("/data/store", "/data/store")); + EXPECT_TRUE(io::LocalFileSystem::equal_or_sub_path("/data/store", "/data/store/snapshot")); + EXPECT_TRUE(io::LocalFileSystem::equal_or_sub_path("/data/store/./snapshot", + "/data/store/snapshot/dir/../dir")); + EXPECT_FALSE(io::LocalFileSystem::equal_or_sub_path("/data/store1", "/data/store11/snapshot")); + EXPECT_FALSE(io::LocalFileSystem::equal_or_sub_path("/data/store/snapshot", "/data/store")); +} } // namespace doris diff --git a/be/test/runtime/snapshot_loader_test.cpp b/be/test/runtime/snapshot_loader_test.cpp index 9b6e64a7d31612..8995e6af0e7243 100644 --- a/be/test/runtime/snapshot_loader_test.cpp +++ b/be/test/runtime/snapshot_loader_test.cpp @@ -326,6 +326,53 @@ TEST_F(SnapshotLoaderTest, NormalCase) { EXPECT_EQ(10005, tablet_id); } +TEST_F(SnapshotLoaderTest, RejectBrokenSnapshotPath) { + SnapshotLoader loader(*engine_ref, ExecEnv::GetInstance(), 1L, 2L); + auto snapshot_path = + fmt::format("{}/snapshot/20260311120000.0.86400/10001/12345", storage_root_path); + std::filesystem::remove_all(snapshot_path); + std::filesystem::create_directories(snapshot_path); + + std::map src_to_dest; + src_to_dest[snapshot_path] = "unused"; + + auto st = loader._check_local_snapshot_paths(src_to_dest, true); + ASSERT_TRUE(st.ok()) << st; + + engine_ref->add_broken_path(storage_root_path); + st = loader._check_local_snapshot_paths(src_to_dest, true); + EXPECT_FALSE(st.ok()); + EXPECT_TRUE(st.is()) << st; + EXPECT_NE(st.to_string().find("broken storage path"), std::string::npos) << st; + + EXPECT_TRUE(engine_ref->remove_broken_path(storage_root_path)); + std::filesystem::remove_all(snapshot_path); +} + +TEST_F(SnapshotLoaderTest, RejectBrokenSnapshotPathAfterCanonicalize) { + SnapshotLoader loader(*engine_ref, ExecEnv::GetInstance(), 1L, 2L); + auto snapshot_path = + fmt::format("{}/snapshot/20260311120001.0.86400/10001/12345", storage_root_path); + auto symlink_path = fmt::format("{}/snapshot-link", storage_root_path); + std::filesystem::remove_all(snapshot_path); + std::filesystem::remove(symlink_path); + std::filesystem::create_directories(snapshot_path); + std::filesystem::create_directory_symlink(snapshot_path, symlink_path); + + std::map src_to_dest; + src_to_dest[symlink_path] = "unused"; + + engine_ref->add_broken_path(storage_root_path); + auto st = loader._check_local_snapshot_paths(src_to_dest, true); + EXPECT_FALSE(st.ok()); + EXPECT_TRUE(st.is()) << st; + EXPECT_NE(st.to_string().find("broken storage path"), std::string::npos) << st; + + EXPECT_TRUE(engine_ref->remove_broken_path(storage_root_path)); + std::filesystem::remove(symlink_path); + std::filesystem::remove_all(snapshot_path); +} + TEST_F(SnapshotLoaderTest, DirMoveTaskIsIdempotent) { // 1. create a tablet int64_t tablet_id = 111;