From e34669317289e77e473922e7aa1b6bf2049ef2ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Tue, 26 May 2026 13:59:21 +0200 Subject: [PATCH] GH-46369: [C++] Add key-value pairs to the FileSystemFactory interface --- cpp/src/arrow/filesystem/filesystem.cc | 28 +++++++--- cpp/src/arrow/filesystem/filesystem.h | 68 +++++++++++++++++++++++- cpp/src/arrow/filesystem/localfs_test.cc | 19 +++++-- cpp/src/arrow/filesystem/s3fs.cc | 8 +-- cpp/src/arrow/testing/examplefs.cc | 5 +- 5 files changed, 111 insertions(+), 17 deletions(-) diff --git a/cpp/src/arrow/filesystem/filesystem.cc b/cpp/src/arrow/filesystem/filesystem.cc index 8281bed7ce16..75b9616ff00f 100644 --- a/cpp/src/arrow/filesystem/filesystem.cc +++ b/cpp/src/arrow/filesystem/filesystem.cc @@ -893,10 +893,10 @@ Status LoadFileSystemFactories(const char* libpath) { namespace { -Result> FileSystemFromUriReal(const Uri& uri, - const std::string& uri_string, - const io::IOContext& io_context, - std::string* out_path) { +Result> FileSystemFromUriReal( + const Uri& uri, const std::string& uri_string, + const std::vector>& options, + const io::IOContext& io_context, std::string* out_path) { const auto scheme = uri.scheme(); { @@ -904,7 +904,7 @@ Result> FileSystemFromUriReal(const Uri& uri, auto* factory, FileSystemFactoryRegistry::GetInstance()->FactoryForScheme(scheme)); if (factory != nullptr) { - return factory->function(uri, io_context, out_path); + return factory->function(uri, options, io_context, out_path); } } @@ -962,14 +962,28 @@ Result> FileSystemFromUriReal(const Uri& uri, Result> FileSystemFromUri(const std::string& uri_string, std::string* out_path) { - return FileSystemFromUri(uri_string, io::default_io_context(), out_path); + return FileSystemFromUri(uri_string, /*options=*/{}, io::default_io_context(), + out_path); +} + +Result> FileSystemFromUri( + const std::string& uri_string, + const std::vector>& options, std::string* out_path) { + return FileSystemFromUri(uri_string, options, io::default_io_context(), out_path); } Result> FileSystemFromUri(const std::string& uri_string, const io::IOContext& io_context, std::string* out_path) { + return FileSystemFromUri(uri_string, /*options=*/{}, io_context, out_path); +} + +Result> FileSystemFromUri( + const std::string& uri_string, + const std::vector>& options, + const io::IOContext& io_context, std::string* out_path) { ARROW_ASSIGN_OR_RAISE(auto fsuri, ParseFileSystemUri(uri_string)); - return FileSystemFromUriReal(fsuri, uri_string, io_context, out_path); + return FileSystemFromUriReal(fsuri, uri_string, options, io_context, out_path); } Result> FileSystemFromUriOrPath(const std::string& uri_string, diff --git a/cpp/src/arrow/filesystem/filesystem.h b/cpp/src/arrow/filesystem/filesystem.h index 3a47eb62f524..5c4fc3c42c9e 100644 --- a/cpp/src/arrow/filesystem/filesystem.h +++ b/cpp/src/arrow/filesystem/filesystem.h @@ -17,6 +17,7 @@ #pragma once +#include #include #include #include @@ -28,6 +29,7 @@ #include "arrow/filesystem/type_fwd.h" #include "arrow/io/interfaces.h" +#include "arrow/result.h" #include "arrow/type_fwd.h" #include "arrow/util/compare.h" #include "arrow/util/macros.h" @@ -359,11 +361,34 @@ class ARROW_EXPORT FileSystem struct FileSystemFactory { std::function>( - const Uri& uri, const io::IOContext& io_context, std::string* out_path)> + const Uri& uri, const std::vector>& options, + const io::IOContext& io_context, std::string* out_path)> function; std::string_view file; int line; + /// Construct from an options-aware factory function. + FileSystemFactory(std::function>( + const Uri&, const std::vector>&, + const io::IOContext&, std::string*)> + fn, + std::string_view file, int line) + : function(std::move(fn)), file(file), line(line) {} + + /// Construct from a non-options aware factory function maintaing source compatibility + /// with existing factories. + FileSystemFactory(std::function>( + const Uri&, const io::IOContext&, std::string*)> + fn, + std::string_view file, int line) + : function([fn = std::move(fn)]( + const Uri& uri, + const std::vector>& /*ignored*/, + const io::IOContext& ctx, + std::string* out_path) { return fn(uri, ctx, out_path); }), + file(file), + line(line) {} + bool operator==(const FileSystemFactory& other) const { // In the case where libarrow is linked statically both to the executable and to a // dynamically loaded filesystem implementation library, the library contains a @@ -547,6 +572,26 @@ ARROW_EXPORT Result> FileSystemFromUri(const std::string& uri, std::string* out_path = NULLPTR); +/// \brief Create a new FileSystem by URI with extended backend-specific filesystem +/// options +/// +/// Recognized schemes are "file", "mock", "hdfs", "viewfs", "s3", +/// "gs" and "gcs". +/// +/// Support for other schemes can be added using RegisterFileSystemFactory. +/// +/// \param[in] uri the URI to give access to +/// \param[in] options a list of backend-specific filesystem options +/// Each option is a (name, value) pair. +/// The expected type is specific to the backend and +/// option name. +/// \param[out] out_path (optional) Path inside the filesystem. +/// \return out_fs FileSystem instance. +ARROW_EXPORT +Result> FileSystemFromUri( + const std::string& uri, const std::vector>& options, + std::string* out_path = NULLPTR); + /// \brief Create a new FileSystem by URI with a custom IO context /// /// Recognized schemes are "file", "mock", "hdfs", "viewfs", "s3", @@ -563,6 +608,27 @@ Result> FileSystemFromUri(const std::string& uri, const io::IOContext& io_context, std::string* out_path = NULLPTR); +/// \brief Create a new FileSystem by URI with a custom IO context with backend-specific +/// filesystem options +/// +/// Recognized schemes are "file", "mock", "hdfs", "viewfs", "s3", +/// "gs" and "gcs". +/// +/// Support for other schemes can be added using RegisterFileSystemFactory. +/// +/// \param[in] uri a URI-based path, ex: file:///some/local/path +/// \param[in] options a list of backend-specific filesystem options +/// Each option is a (name, value) pair. +/// The expected type is specific to the backend and +/// option name. +/// \param[in] io_context an IOContext which will be associated with the filesystem +/// \param[out] out_path (optional) Path inside the filesystem. +/// \return out_fs FileSystem instance. +ARROW_EXPORT +Result> FileSystemFromUri( + const std::string& uri, const std::vector>& options, + const io::IOContext& io_context, std::string* out_path = NULLPTR); + /// \brief Create a new FileSystem by URI /// /// Support for other schemes can be added using RegisterFileSystemFactory. diff --git a/cpp/src/arrow/filesystem/localfs_test.cc b/cpp/src/arrow/filesystem/localfs_test.cc index 2e91783c92dc..c63f4bfbf1cd 100644 --- a/cpp/src/arrow/filesystem/localfs_test.cc +++ b/cpp/src/arrow/filesystem/localfs_test.cc @@ -171,10 +171,21 @@ TEST(FileSystemFromUri, RuntimeRegisteredFactory) { } FileSystemRegistrar kSegfaultFileSystemModule[]{ - ARROW_REGISTER_FILESYSTEM("segfault", nullptr, {}), - ARROW_REGISTER_FILESYSTEM("segfault", nullptr, {}), - ARROW_REGISTER_FILESYSTEM("segfault", nullptr, {}), -}; + ARROW_REGISTER_FILESYSTEM( + "segfault", + std::function>( + const Uri&, const io::IOContext&, std::string*)>(nullptr), + {}), + ARROW_REGISTER_FILESYSTEM( + "segfault", + std::function>( + const Uri&, const io::IOContext&, std::string*)>(nullptr), + {}), + ARROW_REGISTER_FILESYSTEM( + "segfault", + std::function>( + const Uri&, const io::IOContext&, std::string*)>(nullptr), + {})}; TEST(FileSystemFromUri, LinkedRegisteredFactoryNameCollision) { // Since multiple registrars are defined in this translation unit which all // register factories for the 'segfault' scheme, using that scheme in FileSystemFromUri diff --git a/cpp/src/arrow/filesystem/s3fs.cc b/cpp/src/arrow/filesystem/s3fs.cc index 0c15f6f18444..d5eb31a16458 100644 --- a/cpp/src/arrow/filesystem/s3fs.cc +++ b/cpp/src/arrow/filesystem/s3fs.cc @@ -3603,11 +3603,13 @@ Result ResolveS3BucketRegion(const std::string& bucket) { auto kS3FileSystemModule = ARROW_REGISTER_FILESYSTEM( "s3", - [](const arrow::util::Uri& uri, const io::IOContext& io_context, + [](const arrow::util::Uri& uri, + const std::vector>& options, + const io::IOContext& io_context, std::string* out_path) -> Result> { RETURN_NOT_OK(EnsureS3Initialized()); - ARROW_ASSIGN_OR_RAISE(auto options, S3Options::FromUri(uri, out_path)); - return S3FileSystem::Make(options, io_context); + ARROW_ASSIGN_OR_RAISE(auto s3_options, S3Options::FromUri(uri, out_path)); + return S3FileSystem::Make(s3_options, io_context); }, [] { DCHECK_OK(EnsureS3Finalized()); }); diff --git a/cpp/src/arrow/testing/examplefs.cc b/cpp/src/arrow/testing/examplefs.cc index 5c9d5f9d9071..eab26349e2ef 100644 --- a/cpp/src/arrow/testing/examplefs.cc +++ b/cpp/src/arrow/testing/examplefs.cc @@ -26,12 +26,13 @@ namespace arrow::fs { auto kExampleFileSystemModule = ARROW_REGISTER_FILESYSTEM( "example", - [](const Uri& uri, const io::IOContext& io_context, + [](const Uri& uri, const std::vector>& options, + const io::IOContext& io_context, std::string* out_path) -> Result> { constexpr std::string_view kScheme = "example"; EXPECT_EQ(uri.scheme(), kScheme); auto local_uri = "file" + uri.ToString().substr(kScheme.size()); - return FileSystemFromUri(local_uri, io_context, out_path); + return FileSystemFromUri(local_uri, options, io_context, out_path); }, {});