diff --git a/src/iceberg/file_io.h b/src/iceberg/file_io.h index e772b5336..6718a36a5 100644 --- a/src/iceberg/file_io.h +++ b/src/iceberg/file_io.h @@ -154,6 +154,24 @@ class ICEBERG_EXPORT FileIO { virtual Status DeleteFile(const std::string& file_location) { return NotImplemented("DeleteFile not implemented"); } + + /// \brief Delete files at the given locations. + /// + /// Implementations that can delete multiple files efficiently should override this + /// method. The default implementation deletes files sequentially using DeleteFile + /// and returns the first error encountered. + /// + /// \param file_locations The locations of the files to delete. + /// \return void if all deletes succeeded, an error code if any delete failed. + virtual Status DeleteFiles(std::span file_locations) { + for (const auto& file_location : file_locations) { + auto status = DeleteFile(file_location); + if (!status.has_value()) { + return status; + } + } + return {}; + } }; } // namespace iceberg diff --git a/src/iceberg/test/CMakeLists.txt b/src/iceberg/test/CMakeLists.txt index afafc4c14..8e2484fbf 100644 --- a/src/iceberg/test/CMakeLists.txt +++ b/src/iceberg/test/CMakeLists.txt @@ -124,6 +124,7 @@ add_iceberg_test(util_test data_file_set_test.cc decimal_test.cc endian_test.cc + file_io_test.cc formatter_test.cc lazy_test.cc location_util_test.cc diff --git a/src/iceberg/test/file_io_test.cc b/src/iceberg/test/file_io_test.cc new file mode 100644 index 000000000..4c054f59a --- /dev/null +++ b/src/iceberg/test/file_io_test.cc @@ -0,0 +1,72 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "iceberg/file_io.h" + +#include +#include +#include + +#include + +#include "iceberg/test/matchers.h" + +namespace { + +class RecordingFileIO : public iceberg::FileIO { + public: + explicit RecordingFileIO(std::string failure_path = "") + : failure_path_(std::move(failure_path)) {} + + iceberg::Status DeleteFile(const std::string& file_location) override { + deleted_paths.push_back(file_location); + if (file_location == failure_path_) { + return iceberg::IOError("failed to delete {}", file_location); + } + return {}; + } + + std::vector deleted_paths; + + private: + std::string failure_path_; +}; + +} // namespace + +TEST(FileIOTest, DeleteFilesFallsBackToDeleteFileForEachPath) { + RecordingFileIO file_io; + std::vector paths = {"file-a.avro", "file-b.avro"}; + + EXPECT_THAT(file_io.DeleteFiles(paths), iceberg::IsOk()); + EXPECT_THAT(file_io.deleted_paths, + ::testing::ElementsAre("file-a.avro", "file-b.avro")); +} + +TEST(FileIOTest, DeleteFilesReturnsFirstDeleteFileError) { + RecordingFileIO file_io("file-b.avro"); + std::vector paths = {"file-a.avro", "file-b.avro", "file-c.avro"}; + + auto status = file_io.DeleteFiles(paths); + + EXPECT_THAT(status, iceberg::IsError(iceberg::ErrorKind::kIOError)); + EXPECT_THAT(status, iceberg::HasErrorMessage("failed to delete file-b.avro")); + EXPECT_THAT(file_io.deleted_paths, + ::testing::ElementsAre("file-a.avro", "file-b.avro")); +} diff --git a/src/iceberg/test/meson.build b/src/iceberg/test/meson.build index e168d08bf..1acb46e9b 100644 --- a/src/iceberg/test/meson.build +++ b/src/iceberg/test/meson.build @@ -88,6 +88,7 @@ iceberg_tests = { 'data_file_set_test.cc', 'decimal_test.cc', 'endian_test.cc', + 'file_io_test.cc', 'formatter_test.cc', 'lazy_test.cc', 'location_util_test.cc', diff --git a/src/iceberg/update/expire_snapshots.cc b/src/iceberg/update/expire_snapshots.cc index ce65882c9..fb508cc09 100644 --- a/src/iceberg/update/expire_snapshots.cc +++ b/src/iceberg/update/expire_snapshots.cc @@ -75,26 +75,22 @@ class FileCleanupStrategy { CleanupLevel level) = 0; protected: - /// \brief Delete a single file - void DeleteFile(const std::string& path) { + /// \brief Delete files at the given locations. + void DeleteFiles(const std::unordered_set& paths) { try { if (delete_func_) { - delete_func_(path); + for (const auto& path : paths) { + delete_func_(path); + } } else { - std::ignore = file_io_->DeleteFile(path); + std::vector path_list(paths.begin(), paths.end()); + std::ignore = file_io_->DeleteFiles(path_list); } } catch (...) { /// TODO(shangxinli): add retry } } - /// TODO(shangxinli): Add bulk deletion - void DeleteFiles(const std::unordered_set& paths) { - for (const auto& path : paths) { - DeleteFile(path); - } - } - bool HasAnyStatisticsFiles(const TableMetadata& metadata) const { return !metadata.statistics.empty() || !metadata.partition_statistics.empty(); }