From 86a1f03a179bc30dff71b7e79cf297260c3e5106 Mon Sep 17 00:00:00 2001 From: slfan1989 Date: Mon, 18 May 2026 14:31:12 +0800 Subject: [PATCH 1/3] feat(io): add bulk delete API to FileIO. --- src/iceberg/file_io.h | 20 ++++++++- src/iceberg/test/CMakeLists.txt | 1 + src/iceberg/test/file_io_test.cc | 72 ++++++++++++++++++++++++++++++++ src/iceberg/test/meson.build | 1 + 4 files changed, 93 insertions(+), 1 deletion(-) create mode 100644 src/iceberg/test/file_io_test.cc diff --git a/src/iceberg/file_io.h b/src/iceberg/file_io.h index e772b5336..443a657e5 100644 --- a/src/iceberg/file_io.h +++ b/src/iceberg/file_io.h @@ -151,9 +151,27 @@ class ICEBERG_EXPORT FileIO { /// /// \param file_location The location of the file to delete. /// \return void if the delete succeeded, an error code if the delete failed. - virtual Status DeleteFile(const std::string& file_location) { + virtual Status DeleteFile(const std::string&) { return NotImplemented("DeleteFile not implemented"); } + + /// \brief Delete files at the given locations. + /// + /// Implementations that can delete multiple files efficiently should override this + /// method. The default implementation deletes files sequentially using DeleteFile + /// and returns the first error encountered. + /// + /// \param file_locations The locations of the files to delete. + /// \return void if all deletes succeeded, an error code if any delete failed. + virtual Status DeleteFiles(std::span file_locations) { + for (const auto& file_location : file_locations) { + auto status = DeleteFile(file_location); + if (!status.has_value()) { + return status; + } + } + return {}; + } }; } // namespace iceberg diff --git a/src/iceberg/test/CMakeLists.txt b/src/iceberg/test/CMakeLists.txt index afafc4c14..8e2484fbf 100644 --- a/src/iceberg/test/CMakeLists.txt +++ b/src/iceberg/test/CMakeLists.txt @@ -124,6 +124,7 @@ add_iceberg_test(util_test data_file_set_test.cc decimal_test.cc endian_test.cc + file_io_test.cc formatter_test.cc lazy_test.cc location_util_test.cc diff --git a/src/iceberg/test/file_io_test.cc b/src/iceberg/test/file_io_test.cc new file mode 100644 index 000000000..7c6df27f9 --- /dev/null +++ b/src/iceberg/test/file_io_test.cc @@ -0,0 +1,72 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "iceberg/file_io.h" + +#include +#include +#include + +#include + +#include "iceberg/test/matchers.h" + +namespace iceberg { +namespace { + +class RecordingFileIO : public FileIO { + public: + explicit RecordingFileIO(std::string failure_path = "") + : failure_path_(std::move(failure_path)) {} + + Status DeleteFile(const std::string& file_location) override { + deleted_paths.push_back(file_location); + if (file_location == failure_path_) { + return IOError("failed to delete {}", file_location); + } + return {}; + } + + std::vector deleted_paths; + + private: + std::string failure_path_; +}; + +TEST(FileIOTest, DeleteFilesFallsBackToDeleteFileForEachPath) { + RecordingFileIO file_io; + std::vector paths = {"file-a.avro", "file-b.avro"}; + + EXPECT_THAT(file_io.DeleteFiles(paths), IsOk()); + EXPECT_THAT(file_io.deleted_paths, ::testing::ElementsAre("file-a.avro", "file-b.avro")); +} + +TEST(FileIOTest, DeleteFilesReturnsFirstDeleteFileError) { + RecordingFileIO file_io("file-b.avro"); + std::vector paths = {"file-a.avro", "file-b.avro", "file-c.avro"}; + + auto status = file_io.DeleteFiles(paths); + + EXPECT_THAT(status, IsError(ErrorKind::kIOError)); + EXPECT_THAT(status, HasErrorMessage("failed to delete file-b.avro")); + EXPECT_THAT(file_io.deleted_paths, ::testing::ElementsAre("file-a.avro", "file-b.avro")); +} + +} // namespace +} // namespace iceberg diff --git a/src/iceberg/test/meson.build b/src/iceberg/test/meson.build index e168d08bf..1acb46e9b 100644 --- a/src/iceberg/test/meson.build +++ b/src/iceberg/test/meson.build @@ -88,6 +88,7 @@ iceberg_tests = { 'data_file_set_test.cc', 'decimal_test.cc', 'endian_test.cc', + 'file_io_test.cc', 'formatter_test.cc', 'lazy_test.cc', 'location_util_test.cc', From 2b4e2d41abb1af797eab5d1c05f965c2378e7cb8 Mon Sep 17 00:00:00 2001 From: slfan1989 Date: Mon, 18 May 2026 14:44:22 +0800 Subject: [PATCH 2/3] feat(io): add bulk delete API to FileIO. --- src/iceberg/test/file_io_test.cc | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/iceberg/test/file_io_test.cc b/src/iceberg/test/file_io_test.cc index 7c6df27f9..bf5625abf 100644 --- a/src/iceberg/test/file_io_test.cc +++ b/src/iceberg/test/file_io_test.cc @@ -54,7 +54,8 @@ TEST(FileIOTest, DeleteFilesFallsBackToDeleteFileForEachPath) { std::vector paths = {"file-a.avro", "file-b.avro"}; EXPECT_THAT(file_io.DeleteFiles(paths), IsOk()); - EXPECT_THAT(file_io.deleted_paths, ::testing::ElementsAre("file-a.avro", "file-b.avro")); + EXPECT_THAT(file_io.deleted_paths, + ::testing::ElementsAre("file-a.avro", "file-b.avro")); } TEST(FileIOTest, DeleteFilesReturnsFirstDeleteFileError) { @@ -65,7 +66,8 @@ TEST(FileIOTest, DeleteFilesReturnsFirstDeleteFileError) { EXPECT_THAT(status, IsError(ErrorKind::kIOError)); EXPECT_THAT(status, HasErrorMessage("failed to delete file-b.avro")); - EXPECT_THAT(file_io.deleted_paths, ::testing::ElementsAre("file-a.avro", "file-b.avro")); + EXPECT_THAT(file_io.deleted_paths, + ::testing::ElementsAre("file-a.avro", "file-b.avro")); } } // namespace From 04cef13bed43101d3329dc94f3b6063489fadc6a Mon Sep 17 00:00:00 2001 From: slfan1989 Date: Tue, 19 May 2026 13:40:17 +0800 Subject: [PATCH 3/3] feat(io): add bulk delete API to FileIO. --- src/iceberg/file_io.h | 2 +- src/iceberg/test/file_io_test.cc | 18 ++++++++---------- src/iceberg/update/expire_snapshots.cc | 18 +++++++----------- 3 files changed, 16 insertions(+), 22 deletions(-) diff --git a/src/iceberg/file_io.h b/src/iceberg/file_io.h index 443a657e5..6718a36a5 100644 --- a/src/iceberg/file_io.h +++ b/src/iceberg/file_io.h @@ -151,7 +151,7 @@ class ICEBERG_EXPORT FileIO { /// /// \param file_location The location of the file to delete. /// \return void if the delete succeeded, an error code if the delete failed. - virtual Status DeleteFile(const std::string&) { + virtual Status DeleteFile(const std::string& file_location) { return NotImplemented("DeleteFile not implemented"); } diff --git a/src/iceberg/test/file_io_test.cc b/src/iceberg/test/file_io_test.cc index bf5625abf..4c054f59a 100644 --- a/src/iceberg/test/file_io_test.cc +++ b/src/iceberg/test/file_io_test.cc @@ -27,18 +27,17 @@ #include "iceberg/test/matchers.h" -namespace iceberg { namespace { -class RecordingFileIO : public FileIO { +class RecordingFileIO : public iceberg::FileIO { public: explicit RecordingFileIO(std::string failure_path = "") : failure_path_(std::move(failure_path)) {} - Status DeleteFile(const std::string& file_location) override { + iceberg::Status DeleteFile(const std::string& file_location) override { deleted_paths.push_back(file_location); if (file_location == failure_path_) { - return IOError("failed to delete {}", file_location); + return iceberg::IOError("failed to delete {}", file_location); } return {}; } @@ -49,11 +48,13 @@ class RecordingFileIO : public FileIO { std::string failure_path_; }; +} // namespace + TEST(FileIOTest, DeleteFilesFallsBackToDeleteFileForEachPath) { RecordingFileIO file_io; std::vector paths = {"file-a.avro", "file-b.avro"}; - EXPECT_THAT(file_io.DeleteFiles(paths), IsOk()); + EXPECT_THAT(file_io.DeleteFiles(paths), iceberg::IsOk()); EXPECT_THAT(file_io.deleted_paths, ::testing::ElementsAre("file-a.avro", "file-b.avro")); } @@ -64,11 +65,8 @@ TEST(FileIOTest, DeleteFilesReturnsFirstDeleteFileError) { auto status = file_io.DeleteFiles(paths); - EXPECT_THAT(status, IsError(ErrorKind::kIOError)); - EXPECT_THAT(status, HasErrorMessage("failed to delete file-b.avro")); + EXPECT_THAT(status, iceberg::IsError(iceberg::ErrorKind::kIOError)); + EXPECT_THAT(status, iceberg::HasErrorMessage("failed to delete file-b.avro")); EXPECT_THAT(file_io.deleted_paths, ::testing::ElementsAre("file-a.avro", "file-b.avro")); } - -} // namespace -} // namespace iceberg diff --git a/src/iceberg/update/expire_snapshots.cc b/src/iceberg/update/expire_snapshots.cc index ce65882c9..fb508cc09 100644 --- a/src/iceberg/update/expire_snapshots.cc +++ b/src/iceberg/update/expire_snapshots.cc @@ -75,26 +75,22 @@ class FileCleanupStrategy { CleanupLevel level) = 0; protected: - /// \brief Delete a single file - void DeleteFile(const std::string& path) { + /// \brief Delete files at the given locations. + void DeleteFiles(const std::unordered_set& paths) { try { if (delete_func_) { - delete_func_(path); + for (const auto& path : paths) { + delete_func_(path); + } } else { - std::ignore = file_io_->DeleteFile(path); + std::vector path_list(paths.begin(), paths.end()); + std::ignore = file_io_->DeleteFiles(path_list); } } catch (...) { /// TODO(shangxinli): add retry } } - /// TODO(shangxinli): Add bulk deletion - void DeleteFiles(const std::unordered_set& paths) { - for (const auto& path : paths) { - DeleteFile(path); - } - } - bool HasAnyStatisticsFiles(const TableMetadata& metadata) const { return !metadata.statistics.empty() || !metadata.partition_statistics.empty(); }