Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 21 additions & 7 deletions cpp/src/arrow/filesystem/filesystem.cc
Original file line number Diff line number Diff line change
Expand Up @@ -893,18 +893,18 @@ Status LoadFileSystemFactories(const char* libpath) {

namespace {

Result<std::shared_ptr<FileSystem>> FileSystemFromUriReal(const Uri& uri,
const std::string& uri_string,
const io::IOContext& io_context,
std::string* out_path) {
Result<std::shared_ptr<FileSystem>> FileSystemFromUriReal(
const Uri& uri, const std::string& uri_string,
const std::vector<std::pair<std::string, std::any>>& options,
const io::IOContext& io_context, std::string* out_path) {
const auto scheme = uri.scheme();

{
ARROW_ASSIGN_OR_RAISE(
auto* factory,
FileSystemFactoryRegistry::GetInstance()->FactoryForScheme(scheme));
if (factory != nullptr) {
return factory->function(uri, io_context, out_path);
return factory->function(uri, options, io_context, out_path);
}
}

Expand Down Expand Up @@ -962,14 +962,28 @@ Result<std::shared_ptr<FileSystem>> FileSystemFromUriReal(const Uri& uri,

Result<std::shared_ptr<FileSystem>> FileSystemFromUri(const std::string& uri_string,
std::string* out_path) {
return FileSystemFromUri(uri_string, io::default_io_context(), out_path);
return FileSystemFromUri(uri_string, /*options=*/{}, io::default_io_context(),
out_path);
}

Result<std::shared_ptr<FileSystem>> FileSystemFromUri(
const std::string& uri_string,
const std::vector<std::pair<std::string, std::any>>& options, std::string* out_path) {
return FileSystemFromUri(uri_string, options, io::default_io_context(), out_path);
}

Result<std::shared_ptr<FileSystem>> FileSystemFromUri(const std::string& uri_string,
const io::IOContext& io_context,
std::string* out_path) {
return FileSystemFromUri(uri_string, /*options=*/{}, io_context, out_path);
}

Result<std::shared_ptr<FileSystem>> FileSystemFromUri(
const std::string& uri_string,
const std::vector<std::pair<std::string, std::any>>& options,
const io::IOContext& io_context, std::string* out_path) {
ARROW_ASSIGN_OR_RAISE(auto fsuri, ParseFileSystemUri(uri_string));
return FileSystemFromUriReal(fsuri, uri_string, io_context, out_path);
return FileSystemFromUriReal(fsuri, uri_string, options, io_context, out_path);
}
Comment on lines 963 to 987

Result<std::shared_ptr<FileSystem>> FileSystemFromUriOrPath(const std::string& uri_string,
Expand Down
68 changes: 67 additions & 1 deletion cpp/src/arrow/filesystem/filesystem.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

#pragma once

#include <any>
#include <chrono>
#include <cstdint>
#include <functional>
Expand All @@ -28,6 +29,7 @@

#include "arrow/filesystem/type_fwd.h"
#include "arrow/io/interfaces.h"
#include "arrow/result.h"
#include "arrow/type_fwd.h"
#include "arrow/util/compare.h"
#include "arrow/util/macros.h"
Expand Down Expand Up @@ -359,11 +361,34 @@ class ARROW_EXPORT FileSystem

struct FileSystemFactory {
std::function<Result<std::shared_ptr<FileSystem>>(
const Uri& uri, const io::IOContext& io_context, std::string* out_path)>
const Uri& uri, const std::vector<std::pair<std::string, std::any>>& options,
const io::IOContext& io_context, std::string* out_path)>
function;
std::string_view file;
int line;

/// Construct from an options-aware factory function.
FileSystemFactory(std::function<Result<std::shared_ptr<FileSystem>>(
const Uri&, const std::vector<std::pair<std::string, std::any>>&,
const io::IOContext&, std::string*)>
fn,
std::string_view file, int line)
: function(std::move(fn)), file(file), line(line) {}

/// Construct from a non-options aware factory function maintaing source compatibility
/// with existing factories.
Comment on lines +378 to +379
FileSystemFactory(std::function<Result<std::shared_ptr<FileSystem>>(
const Uri&, const io::IOContext&, std::string*)>
fn,
std::string_view file, int line)
: function([fn = std::move(fn)](
const Uri& uri,
const std::vector<std::pair<std::string, std::any>>& /*ignored*/,
const io::IOContext& ctx,
std::string* out_path) { return fn(uri, ctx, out_path); }),
file(file),
line(line) {}

bool operator==(const FileSystemFactory& other) const {
// In the case where libarrow is linked statically both to the executable and to a
// dynamically loaded filesystem implementation library, the library contains a
Expand Down Expand Up @@ -547,6 +572,26 @@ ARROW_EXPORT
Result<std::shared_ptr<FileSystem>> FileSystemFromUri(const std::string& uri,
std::string* out_path = NULLPTR);

/// \brief Create a new FileSystem by URI with extended backend-specific filesystem
/// options
///
/// Recognized schemes are "file", "mock", "hdfs", "viewfs", "s3",
/// "gs" and "gcs".
///
/// Support for other schemes can be added using RegisterFileSystemFactory.
///
/// \param[in] uri the URI to give access to
/// \param[in] options a list of backend-specific filesystem options
/// Each option is a (name, value) pair.
/// The expected type is specific to the backend and
/// option name.
/// \param[out] out_path (optional) Path inside the filesystem.
/// \return out_fs FileSystem instance.
ARROW_EXPORT
Result<std::shared_ptr<FileSystem>> FileSystemFromUri(
const std::string& uri, const std::vector<std::pair<std::string, std::any>>& options,
std::string* out_path = NULLPTR);
Comment on lines +575 to +593
Comment on lines +591 to +593
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am unsure whether we want to be purely additive here and drop this for the case of previous external usages for FileSystemFromUri(const std::string&, {}, std::string*) or keep it with a documentation note.


/// \brief Create a new FileSystem by URI with a custom IO context
///
/// Recognized schemes are "file", "mock", "hdfs", "viewfs", "s3",
Expand All @@ -563,6 +608,27 @@ Result<std::shared_ptr<FileSystem>> FileSystemFromUri(const std::string& uri,
const io::IOContext& io_context,
std::string* out_path = NULLPTR);

/// \brief Create a new FileSystem by URI with a custom IO context with backend-specific
/// filesystem options
///
/// Recognized schemes are "file", "mock", "hdfs", "viewfs", "s3",
/// "gs" and "gcs".
///
/// Support for other schemes can be added using RegisterFileSystemFactory.
///
/// \param[in] uri a URI-based path, ex: file:///some/local/path
/// \param[in] options a list of backend-specific filesystem options
/// Each option is a (name, value) pair.
/// The expected type is specific to the backend and
/// option name.
/// \param[in] io_context an IOContext which will be associated with the filesystem
/// \param[out] out_path (optional) Path inside the filesystem.
/// \return out_fs FileSystem instance.
ARROW_EXPORT
Result<std::shared_ptr<FileSystem>> FileSystemFromUri(
const std::string& uri, const std::vector<std::pair<std::string, std::any>>& options,
const io::IOContext& io_context, std::string* out_path = NULLPTR);

/// \brief Create a new FileSystem by URI
///
/// Support for other schemes can be added using RegisterFileSystemFactory.
Expand Down
19 changes: 15 additions & 4 deletions cpp/src/arrow/filesystem/localfs_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -171,10 +171,21 @@ TEST(FileSystemFromUri, RuntimeRegisteredFactory) {
}

FileSystemRegistrar kSegfaultFileSystemModule[]{
ARROW_REGISTER_FILESYSTEM("segfault", nullptr, {}),
ARROW_REGISTER_FILESYSTEM("segfault", nullptr, {}),
ARROW_REGISTER_FILESYSTEM("segfault", nullptr, {}),
};
ARROW_REGISTER_FILESYSTEM(
"segfault",
std::function<Result<std::shared_ptr<FileSystem>>(
const Uri&, const io::IOContext&, std::string*)>(nullptr),
{}),
ARROW_REGISTER_FILESYSTEM(
"segfault",
std::function<Result<std::shared_ptr<FileSystem>>(
const Uri&, const io::IOContext&, std::string*)>(nullptr),
{}),
ARROW_REGISTER_FILESYSTEM(
"segfault",
std::function<Result<std::shared_ptr<FileSystem>>(
const Uri&, const io::IOContext&, std::string*)>(nullptr),
{})};
TEST(FileSystemFromUri, LinkedRegisteredFactoryNameCollision) {
// Since multiple registrars are defined in this translation unit which all
// register factories for the 'segfault' scheme, using that scheme in FileSystemFromUri
Expand Down
8 changes: 5 additions & 3 deletions cpp/src/arrow/filesystem/s3fs.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3603,11 +3603,13 @@ Result<std::string> ResolveS3BucketRegion(const std::string& bucket) {

auto kS3FileSystemModule = ARROW_REGISTER_FILESYSTEM(
"s3",
[](const arrow::util::Uri& uri, const io::IOContext& io_context,
[](const arrow::util::Uri& uri,
const std::vector<std::pair<std::string, std::any>>& options,
const io::IOContext& io_context,
std::string* out_path) -> Result<std::shared_ptr<fs::FileSystem>> {
RETURN_NOT_OK(EnsureS3Initialized());
ARROW_ASSIGN_OR_RAISE(auto options, S3Options::FromUri(uri, out_path));
return S3FileSystem::Make(options, io_context);
ARROW_ASSIGN_OR_RAISE(auto s3_options, S3Options::FromUri(uri, out_path));
return S3FileSystem::Make(s3_options, io_context);
},
[] { DCHECK_OK(EnsureS3Finalized()); });

Expand Down
5 changes: 3 additions & 2 deletions cpp/src/arrow/testing/examplefs.cc
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,13 @@ namespace arrow::fs {

auto kExampleFileSystemModule = ARROW_REGISTER_FILESYSTEM(
"example",
[](const Uri& uri, const io::IOContext& io_context,
[](const Uri& uri, const std::vector<std::pair<std::string, std::any>>& options,
const io::IOContext& io_context,
std::string* out_path) -> Result<std::shared_ptr<FileSystem>> {
constexpr std::string_view kScheme = "example";
EXPECT_EQ(uri.scheme(), kScheme);
auto local_uri = "file" + uri.ToString().substr(kScheme.size());
return FileSystemFromUri(local_uri, io_context, out_path);
return FileSystemFromUri(local_uri, options, io_context, out_path);
},
{});

Expand Down
Loading