diff --git a/src/core/uri/escaping.h b/src/core/uri/escaping.h index 6e1c7bb36..0f60013f9 100644 --- a/src/core/uri/escaping.h +++ b/src/core/uri/escaping.h @@ -32,7 +32,10 @@ enum class URIEscapeMode : std::uint8_t { Fragment, // Like SkipSubDelims but also preserves ":" for Windows filesystem paths // (drive letters like C:) - Filesystem + Filesystem, + // userinfo = *( unreserved / pct-encoded / sub-delims / ":" ) + // See https://www.rfc-editor.org/rfc/rfc3986#appendix-A + UserInfo }; inline auto uri_escape(std::istream &input, std::ostream &output, @@ -68,7 +71,8 @@ inline auto uri_escape(std::istream &input, std::ostream &output, } if (mode == URIEscapeMode::SkipSubDelims || mode == URIEscapeMode::Path || - mode == URIEscapeMode::Fragment || mode == URIEscapeMode::Filesystem) { + mode == URIEscapeMode::Fragment || mode == URIEscapeMode::Filesystem || + mode == URIEscapeMode::UserInfo) { if (uri_is_sub_delim(character)) { output << character; continue; @@ -91,7 +95,7 @@ inline auto uri_escape(std::istream &input, std::ostream &output, } } - if (mode == URIEscapeMode::Filesystem) { + if (mode == URIEscapeMode::Filesystem || mode == URIEscapeMode::UserInfo) { if (character == URI_COLON) { output << character; continue; diff --git a/src/core/uri/include/sourcemeta/core/uri.h b/src/core/uri/include/sourcemeta/core/uri.h index 59e8727e3..8b853b837 100644 --- a/src/core/uri/include/sourcemeta/core/uri.h +++ b/src/core/uri/include/sourcemeta/core/uri.h @@ -324,6 +324,20 @@ class SOURCEMETA_CORE_URI_EXPORT URI { /// ``` [[nodiscard]] auto query() const -> std::optional; + /// Set the query part of the URI. A leading `?` in the input is stripped. + /// Passing an empty string clears the query. For example: + /// + /// ```cpp + /// #include + /// #include + /// + /// sourcemeta::core::URI uri{"https://www.sourcemeta.com"}; + /// uri.query("foo=bar"); + /// assert(uri.query().has_value()); + /// assert(uri.query().value() == "foo=bar"); + /// ``` + auto query(const std::string_view query) -> URI &; + /// Recompose a URI as established by RFC 3986. For example: /// /// ```cpp @@ -436,6 +450,20 @@ class SOURCEMETA_CORE_URI_EXPORT URI { /// colon. See https://tools.ietf.org/html/rfc3986#section-3.2.1 [[nodiscard]] auto userinfo() const -> std::optional; + /// Set the user information part of the URI. Passing an empty string clears + /// the user information. For example: + /// + /// ```cpp + /// #include + /// #include + /// + /// sourcemeta::core::URI uri{"http://host/path"}; + /// uri.userinfo("user"); + /// assert(uri.userinfo().has_value()); + /// assert(uri.userinfo().value() == "user"); + /// ``` + auto userinfo(const std::string_view userinfo) -> URI &; + /// To support equality of URIs auto operator==(const URI &other) const noexcept -> bool = default; diff --git a/src/core/uri/recompose.cc b/src/core/uri/recompose.cc index 2f8ac5154..ffcd955e5 100644 --- a/src/core/uri/recompose.cc +++ b/src/core/uri/recompose.cc @@ -39,7 +39,8 @@ auto escape_component_to_string(std::string &output, std::string_view input, } if (mode == URIEscapeMode::SkipSubDelims || mode == URIEscapeMode::Path || - mode == URIEscapeMode::Fragment || mode == URIEscapeMode::Filesystem) { + mode == URIEscapeMode::Fragment || mode == URIEscapeMode::Filesystem || + mode == URIEscapeMode::UserInfo) { if (uri_is_sub_delim(character)) { output += character; continue; @@ -62,7 +63,7 @@ auto escape_component_to_string(std::string &output, std::string_view input, } } - if (mode == URIEscapeMode::Filesystem) { + if (mode == URIEscapeMode::Filesystem || mode == URIEscapeMode::UserInfo) { if (character == URI_COLON) { output += character; continue; @@ -125,7 +126,7 @@ auto URI::recompose_without_fragment() const -> std::optional { if (user_info.has_value()) { escape_component_to_string(result, user_info.value(), - URIEscapeMode::Fragment); + URIEscapeMode::UserInfo); result += '@'; } diff --git a/src/core/uri/setters.cc b/src/core/uri/setters.cc index 6ba2984ac..1eec4e379 100644 --- a/src/core/uri/setters.cc +++ b/src/core/uri/setters.cc @@ -1,5 +1,7 @@ #include +#include "escaping.h" + #include // std::optional #include // std::string #include // std::string_view @@ -154,4 +156,28 @@ auto URI::fragment(const std::string_view fragment) -> URI & { return *this; } +auto URI::query(const std::string_view query) -> URI & { + if (query.empty()) { + this->query_ = std::nullopt; + return *this; + } + + std::string value{query.starts_with('?') ? query.substr(1) : query}; + uri_unescape_unreserved_inplace(value); + this->query_ = std::move(value); + return *this; +} + +auto URI::userinfo(const std::string_view userinfo) -> URI & { + if (userinfo.empty()) { + this->userinfo_ = std::nullopt; + return *this; + } + + std::string value{userinfo}; + uri_unescape_unreserved_inplace(value); + this->userinfo_ = std::move(value); + return *this; +} + } // namespace sourcemeta::core diff --git a/test/uri/uri_query_test.cc b/test/uri/uri_query_test.cc index 4397decbc..396efbbf8 100644 --- a/test/uri/uri_query_test.cc +++ b/test/uri/uri_query_test.cc @@ -116,3 +116,86 @@ TEST(URI_query, encoded_equals_preserved) { EXPECT_TRUE(uri.query().has_value()); EXPECT_EQ(uri.query().value(), "key%3Dvalue"); } + +TEST(URI_query_setter, set_on_uri_without_query) { + sourcemeta::core::URI uri{"https://example.com/path"}; + uri.query("foo=bar"); + EXPECT_TRUE(uri.query().has_value()); + EXPECT_EQ(uri.query().value(), "foo=bar"); + EXPECT_EQ(uri.recompose(), "https://example.com/path?foo=bar"); +} + +TEST(URI_query_setter, replace_existing_query) { + sourcemeta::core::URI uri{"https://example.com/path?old=value"}; + uri.query("foo=bar"); + EXPECT_TRUE(uri.query().has_value()); + EXPECT_EQ(uri.query().value(), "foo=bar"); + EXPECT_EQ(uri.recompose(), "https://example.com/path?foo=bar"); +} + +TEST(URI_query_setter, clear_with_empty_string) { + sourcemeta::core::URI uri{"https://example.com/path?foo=bar"}; + uri.query(""); + EXPECT_FALSE(uri.query().has_value()); + EXPECT_EQ(uri.recompose(), "https://example.com/path"); +} + +TEST(URI_query_setter, clear_when_already_absent) { + sourcemeta::core::URI uri{"https://example.com/path"}; + uri.query(""); + EXPECT_FALSE(uri.query().has_value()); + EXPECT_EQ(uri.recompose(), "https://example.com/path"); +} + +TEST(URI_query_setter, leading_question_mark_stripped) { + sourcemeta::core::URI uri{"https://example.com/path"}; + uri.query("?foo=bar"); + EXPECT_TRUE(uri.query().has_value()); + EXPECT_EQ(uri.query().value(), "foo=bar"); + EXPECT_EQ(uri.recompose(), "https://example.com/path?foo=bar"); +} + +TEST(URI_query_setter, clear_preserves_fragment) { + sourcemeta::core::URI uri{"https://example.com/path?foo=bar#section"}; + uri.query(""); + EXPECT_FALSE(uri.query().has_value()); + EXPECT_EQ(uri.recompose(), "https://example.com/path#section"); +} + +TEST(URI_query_setter, percent_encoding_applied_on_recompose) { + sourcemeta::core::URI uri{"https://example.com/path"}; + uri.query("foo=hello world"); + EXPECT_TRUE(uri.query().has_value()); + EXPECT_EQ(uri.query().value(), "foo=hello world"); + EXPECT_EQ(uri.recompose(), "https://example.com/path?foo=hello%20world"); +} + +TEST(URI_query_setter, returns_reference_for_chaining) { + sourcemeta::core::URI uri{"https://example.com"}; + uri.query("a=1").fragment("section"); + EXPECT_EQ(uri.recompose(), "https://example.com?a=1#section"); +} + +TEST(URI_query_setter, normalizes_unreserved_percent_encoding) { + sourcemeta::core::URI uri{"https://example.com"}; + uri.query("foo=%7Ebar"); + EXPECT_TRUE(uri.query().has_value()); + // %7E encodes ~, an unreserved character, so it must be decoded + EXPECT_EQ(uri.query().value(), "foo=~bar"); +} + +TEST(URI_query_setter, preserves_reserved_percent_encoding) { + sourcemeta::core::URI uri{"https://example.com"}; + uri.query("foo=%23bar"); + EXPECT_TRUE(uri.query().has_value()); + // %23 encodes #, a reserved character, so it must not be decoded + EXPECT_EQ(uri.query().value(), "foo=%23bar"); +} + +TEST(URI_query_setter, matches_parsed_uri_for_unreserved_percent) { + sourcemeta::core::URI built{"https://example.com"}; + built.query("foo=%7Ebar"); + const sourcemeta::core::URI parsed{"https://example.com?foo=%7Ebar"}; + EXPECT_EQ(built.query().value(), parsed.query().value()); + EXPECT_EQ(built.recompose(), parsed.recompose()); +} diff --git a/test/uri/uri_user_info_test.cc b/test/uri/uri_user_info_test.cc index 7dbc66942..1d008cbe1 100644 --- a/test/uri/uri_user_info_test.cc +++ b/test/uri/uri_user_info_test.cc @@ -69,3 +69,99 @@ TEST(URI_user_info, rfc3986_userinfo_with_at_sign) { // and must not be decoded EXPECT_EQ(uri.userinfo().value(), "user%40domain:pass"); } + +TEST(URI_user_info_setter, set_on_uri_without_userinfo) { + sourcemeta::core::URI uri{"http://host/path"}; + uri.userinfo("user"); + EXPECT_TRUE(uri.userinfo().has_value()); + EXPECT_EQ(uri.userinfo().value(), "user"); + EXPECT_EQ(uri.recompose(), "http://user@host/path"); +} + +TEST(URI_user_info_setter, replace_existing_userinfo) { + sourcemeta::core::URI uri{"http://user:pass@host/path"}; + uri.userinfo("alice"); + EXPECT_TRUE(uri.userinfo().has_value()); + EXPECT_EQ(uri.userinfo().value(), "alice"); + EXPECT_EQ(uri.recompose(), "http://alice@host/path"); +} + +TEST(URI_user_info_setter, clear_with_empty_string) { + sourcemeta::core::URI uri{"http://user:pass@host/path"}; + uri.userinfo(""); + EXPECT_FALSE(uri.userinfo().has_value()); + EXPECT_EQ(uri.recompose(), "http://host/path"); +} + +TEST(URI_user_info_setter, clear_when_already_absent) { + sourcemeta::core::URI uri{"http://host/path"}; + uri.userinfo(""); + EXPECT_FALSE(uri.userinfo().has_value()); + EXPECT_EQ(uri.recompose(), "http://host/path"); +} + +TEST(URI_user_info_setter, set_with_colon) { + sourcemeta::core::URI uri{"http://host/path"}; + uri.userinfo("user:secret"); + EXPECT_TRUE(uri.userinfo().has_value()); + EXPECT_EQ(uri.userinfo().value(), "user:secret"); + EXPECT_EQ(uri.recompose(), "http://user:secret@host/path"); +} + +TEST(URI_user_info_setter, percent_encoding_applied_on_recompose) { + sourcemeta::core::URI uri{"http://host/path"}; + uri.userinfo("user name"); + EXPECT_TRUE(uri.userinfo().has_value()); + EXPECT_EQ(uri.userinfo().value(), "user name"); + EXPECT_EQ(uri.recompose(), "http://user%20name@host/path"); +} + +TEST(URI_user_info_setter, returns_reference_for_chaining) { + sourcemeta::core::URI uri{"http://host"}; + uri.userinfo("user").fragment("section"); + EXPECT_EQ(uri.recompose(), "http://user@host#section"); +} + +TEST(URI_user_info_setter, normalizes_unreserved_percent_encoding) { + sourcemeta::core::URI uri{"http://host/path"}; + uri.userinfo("user%7Ename"); + EXPECT_TRUE(uri.userinfo().has_value()); + // %7E encodes ~, an unreserved character, so it must be decoded + EXPECT_EQ(uri.userinfo().value(), "user~name"); +} + +TEST(URI_user_info_setter, preserves_reserved_percent_encoding) { + sourcemeta::core::URI uri{"http://host/path"}; + uri.userinfo("user%40name"); + EXPECT_TRUE(uri.userinfo().has_value()); + // %40 encodes @, a reserved gen-delim, so it must not be decoded + EXPECT_EQ(uri.userinfo().value(), "user%40name"); +} + +TEST(URI_user_info_setter, matches_parsed_uri_for_unreserved_percent) { + sourcemeta::core::URI built{"http://host/path"}; + built.userinfo("user%7Ename"); + const sourcemeta::core::URI parsed{"http://user%7Ename@host/path"}; + EXPECT_EQ(built.userinfo().value(), parsed.userinfo().value()); + EXPECT_EQ(built.recompose(), parsed.recompose()); +} + +TEST(URI_user_info_setter, percent_encodes_at_sign_on_recompose) { + sourcemeta::core::URI uri{"http://host/path"}; + uri.userinfo("user@domain"); + EXPECT_TRUE(uri.userinfo().has_value()); + EXPECT_EQ(uri.userinfo().value(), "user@domain"); + // @ is a gen-delim that terminates userinfo, so it must be encoded + // on recompose to avoid producing an ambiguous URI + EXPECT_EQ(uri.recompose(), "http://user%40domain@host/path"); +} + +TEST(URI_user_info_setter, recompose_roundtrip_after_at_sign_encoded) { + sourcemeta::core::URI uri{"http://host/path"}; + uri.userinfo("user@domain"); + const auto recomposed{uri.recompose()}; + const sourcemeta::core::URI parsed{recomposed}; + EXPECT_EQ(parsed.recompose(), recomposed); + EXPECT_TRUE(parsed.userinfo().has_value()); + EXPECT_EQ(parsed.userinfo().value(), "user%40domain"); +}