Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 7 additions & 3 deletions src/core/uri/escaping.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,10 @@ enum class URIEscapeMode : std::uint8_t {
Fragment,
// Like SkipSubDelims but also preserves ":" for Windows filesystem paths
// (drive letters like C:)
Filesystem
Filesystem,
// userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
// See https://www.rfc-editor.org/rfc/rfc3986#appendix-A
UserInfo
};

inline auto uri_escape(std::istream &input, std::ostream &output,
Expand Down Expand Up @@ -68,7 +71,8 @@ inline auto uri_escape(std::istream &input, std::ostream &output,
}

if (mode == URIEscapeMode::SkipSubDelims || mode == URIEscapeMode::Path ||
mode == URIEscapeMode::Fragment || mode == URIEscapeMode::Filesystem) {
mode == URIEscapeMode::Fragment || mode == URIEscapeMode::Filesystem ||
mode == URIEscapeMode::UserInfo) {
if (uri_is_sub_delim(character)) {
output << character;
continue;
Expand All @@ -91,7 +95,7 @@ inline auto uri_escape(std::istream &input, std::ostream &output,
}
}

if (mode == URIEscapeMode::Filesystem) {
if (mode == URIEscapeMode::Filesystem || mode == URIEscapeMode::UserInfo) {
if (character == URI_COLON) {
output << character;
continue;
Expand Down
28 changes: 28 additions & 0 deletions src/core/uri/include/sourcemeta/core/uri.h
Original file line number Diff line number Diff line change
Expand Up @@ -324,6 +324,20 @@ class SOURCEMETA_CORE_URI_EXPORT URI {
/// ```
[[nodiscard]] auto query() const -> std::optional<std::string_view>;

/// Set the query part of the URI. A leading `?` in the input is stripped.
/// Passing an empty string clears the query. For example:
///
/// ```cpp
/// #include <sourcemeta/core/uri.h>
/// #include <cassert>
///
/// sourcemeta::core::URI uri{"https://www.sourcemeta.com"};
/// uri.query("foo=bar");
/// assert(uri.query().has_value());
/// assert(uri.query().value() == "foo=bar");
/// ```
auto query(const std::string_view query) -> URI &;

/// Recompose a URI as established by RFC 3986. For example:
///
/// ```cpp
Expand Down Expand Up @@ -436,6 +450,20 @@ class SOURCEMETA_CORE_URI_EXPORT URI {
/// colon. See https://tools.ietf.org/html/rfc3986#section-3.2.1
[[nodiscard]] auto userinfo() const -> std::optional<std::string_view>;

/// Set the user information part of the URI. Passing an empty string clears
/// the user information. For example:
///
/// ```cpp
/// #include <sourcemeta/core/uri.h>
/// #include <cassert>
///
/// sourcemeta::core::URI uri{"http://host/path"};
/// uri.userinfo("user");
/// assert(uri.userinfo().has_value());
/// assert(uri.userinfo().value() == "user");
/// ```
auto userinfo(const std::string_view userinfo) -> URI &;

/// To support equality of URIs
auto operator==(const URI &other) const noexcept -> bool = default;

Expand Down
7 changes: 4 additions & 3 deletions src/core/uri/recompose.cc
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@ auto escape_component_to_string(std::string &output, std::string_view input,
}

if (mode == URIEscapeMode::SkipSubDelims || mode == URIEscapeMode::Path ||
mode == URIEscapeMode::Fragment || mode == URIEscapeMode::Filesystem) {
mode == URIEscapeMode::Fragment || mode == URIEscapeMode::Filesystem ||
mode == URIEscapeMode::UserInfo) {
if (uri_is_sub_delim(character)) {
output += character;
continue;
Expand All @@ -62,7 +63,7 @@ auto escape_component_to_string(std::string &output, std::string_view input,
}
}

if (mode == URIEscapeMode::Filesystem) {
if (mode == URIEscapeMode::Filesystem || mode == URIEscapeMode::UserInfo) {
if (character == URI_COLON) {
output += character;
continue;
Expand Down Expand Up @@ -125,7 +126,7 @@ auto URI::recompose_without_fragment() const -> std::optional<std::string> {

if (user_info.has_value()) {
escape_component_to_string(result, user_info.value(),
URIEscapeMode::Fragment);
URIEscapeMode::UserInfo);
result += '@';
}

Expand Down
26 changes: 26 additions & 0 deletions src/core/uri/setters.cc
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
#include <sourcemeta/core/uri.h>

#include "escaping.h"

#include <optional> // std::optional
#include <string> // std::string
#include <string_view> // std::string_view
Expand Down Expand Up @@ -154,4 +156,28 @@ auto URI::fragment(const std::string_view fragment) -> URI & {
return *this;
}

auto URI::query(const std::string_view query) -> URI & {
if (query.empty()) {
this->query_ = std::nullopt;
return *this;
}

std::string value{query.starts_with('?') ? query.substr(1) : query};
uri_unescape_unreserved_inplace(value);
this->query_ = std::move(value);
return *this;
}

auto URI::userinfo(const std::string_view userinfo) -> URI & {
if (userinfo.empty()) {
Copy link
Copy Markdown

@augmentcode augmentcode Bot Apr 30, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The parser treats http://@host/... as an explicit empty userinfo marker (userinfo().has_value()==true and value is ""), but URI::userinfo("") clears userinfo_. If callers need to construct/roundtrip an empty userinfo marker via the setter API, this behavior is currently lossy vs the parse model.

Severity: medium

Fix This in Augment

🤖 Was this useful? React with 👍 or 👎, or 🚀 if it prevented an incident/outage.

this->userinfo_ = std::nullopt;
return *this;
}

std::string value{userinfo};
uri_unescape_unreserved_inplace(value);
this->userinfo_ = std::move(value);
return *this;
}

} // namespace sourcemeta::core
83 changes: 83 additions & 0 deletions test/uri/uri_query_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -116,3 +116,86 @@ TEST(URI_query, encoded_equals_preserved) {
EXPECT_TRUE(uri.query().has_value());
EXPECT_EQ(uri.query().value(), "key%3Dvalue");
}

TEST(URI_query_setter, set_on_uri_without_query) {
sourcemeta::core::URI uri{"https://example.com/path"};
uri.query("foo=bar");
EXPECT_TRUE(uri.query().has_value());
EXPECT_EQ(uri.query().value(), "foo=bar");
EXPECT_EQ(uri.recompose(), "https://example.com/path?foo=bar");
}

TEST(URI_query_setter, replace_existing_query) {
sourcemeta::core::URI uri{"https://example.com/path?old=value"};
uri.query("foo=bar");
EXPECT_TRUE(uri.query().has_value());
EXPECT_EQ(uri.query().value(), "foo=bar");
EXPECT_EQ(uri.recompose(), "https://example.com/path?foo=bar");
}

TEST(URI_query_setter, clear_with_empty_string) {
sourcemeta::core::URI uri{"https://example.com/path?foo=bar"};
uri.query("");
EXPECT_FALSE(uri.query().has_value());
EXPECT_EQ(uri.recompose(), "https://example.com/path");
}

TEST(URI_query_setter, clear_when_already_absent) {
sourcemeta::core::URI uri{"https://example.com/path"};
uri.query("");
EXPECT_FALSE(uri.query().has_value());
EXPECT_EQ(uri.recompose(), "https://example.com/path");
}

TEST(URI_query_setter, leading_question_mark_stripped) {
sourcemeta::core::URI uri{"https://example.com/path"};
uri.query("?foo=bar");
EXPECT_TRUE(uri.query().has_value());
EXPECT_EQ(uri.query().value(), "foo=bar");
EXPECT_EQ(uri.recompose(), "https://example.com/path?foo=bar");
}

TEST(URI_query_setter, clear_preserves_fragment) {
sourcemeta::core::URI uri{"https://example.com/path?foo=bar#section"};
uri.query("");
EXPECT_FALSE(uri.query().has_value());
EXPECT_EQ(uri.recompose(), "https://example.com/path#section");
}

TEST(URI_query_setter, percent_encoding_applied_on_recompose) {
sourcemeta::core::URI uri{"https://example.com/path"};
uri.query("foo=hello world");
EXPECT_TRUE(uri.query().has_value());
EXPECT_EQ(uri.query().value(), "foo=hello world");
EXPECT_EQ(uri.recompose(), "https://example.com/path?foo=hello%20world");
}

TEST(URI_query_setter, returns_reference_for_chaining) {
sourcemeta::core::URI uri{"https://example.com"};
uri.query("a=1").fragment("section");
EXPECT_EQ(uri.recompose(), "https://example.com?a=1#section");
}

TEST(URI_query_setter, normalizes_unreserved_percent_encoding) {
sourcemeta::core::URI uri{"https://example.com"};
uri.query("foo=%7Ebar");
EXPECT_TRUE(uri.query().has_value());
// %7E encodes ~, an unreserved character, so it must be decoded
EXPECT_EQ(uri.query().value(), "foo=~bar");
}

TEST(URI_query_setter, preserves_reserved_percent_encoding) {
sourcemeta::core::URI uri{"https://example.com"};
uri.query("foo=%23bar");
EXPECT_TRUE(uri.query().has_value());
// %23 encodes #, a reserved character, so it must not be decoded
EXPECT_EQ(uri.query().value(), "foo=%23bar");
}

TEST(URI_query_setter, matches_parsed_uri_for_unreserved_percent) {
sourcemeta::core::URI built{"https://example.com"};
built.query("foo=%7Ebar");
const sourcemeta::core::URI parsed{"https://example.com?foo=%7Ebar"};
EXPECT_EQ(built.query().value(), parsed.query().value());
EXPECT_EQ(built.recompose(), parsed.recompose());
}
96 changes: 96 additions & 0 deletions test/uri/uri_user_info_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -69,3 +69,99 @@ TEST(URI_user_info, rfc3986_userinfo_with_at_sign) {
// and must not be decoded
EXPECT_EQ(uri.userinfo().value(), "user%40domain:pass");
}

TEST(URI_user_info_setter, set_on_uri_without_userinfo) {
sourcemeta::core::URI uri{"http://host/path"};
uri.userinfo("user");
EXPECT_TRUE(uri.userinfo().has_value());
EXPECT_EQ(uri.userinfo().value(), "user");
EXPECT_EQ(uri.recompose(), "http://user@host/path");
}

TEST(URI_user_info_setter, replace_existing_userinfo) {
sourcemeta::core::URI uri{"http://user:pass@host/path"};
uri.userinfo("alice");
EXPECT_TRUE(uri.userinfo().has_value());
EXPECT_EQ(uri.userinfo().value(), "alice");
EXPECT_EQ(uri.recompose(), "http://alice@host/path");
}

TEST(URI_user_info_setter, clear_with_empty_string) {
sourcemeta::core::URI uri{"http://user:pass@host/path"};
uri.userinfo("");
EXPECT_FALSE(uri.userinfo().has_value());
EXPECT_EQ(uri.recompose(), "http://host/path");
}

TEST(URI_user_info_setter, clear_when_already_absent) {
sourcemeta::core::URI uri{"http://host/path"};
uri.userinfo("");
EXPECT_FALSE(uri.userinfo().has_value());
EXPECT_EQ(uri.recompose(), "http://host/path");
}

TEST(URI_user_info_setter, set_with_colon) {
sourcemeta::core::URI uri{"http://host/path"};
uri.userinfo("user:secret");
EXPECT_TRUE(uri.userinfo().has_value());
EXPECT_EQ(uri.userinfo().value(), "user:secret");
EXPECT_EQ(uri.recompose(), "http://user:secret@host/path");
}

TEST(URI_user_info_setter, percent_encoding_applied_on_recompose) {
sourcemeta::core::URI uri{"http://host/path"};
uri.userinfo("user name");
EXPECT_TRUE(uri.userinfo().has_value());
EXPECT_EQ(uri.userinfo().value(), "user name");
EXPECT_EQ(uri.recompose(), "http://user%20name@host/path");
}

TEST(URI_user_info_setter, returns_reference_for_chaining) {
sourcemeta::core::URI uri{"http://host"};
uri.userinfo("user").fragment("section");
EXPECT_EQ(uri.recompose(), "http://user@host#section");
}

TEST(URI_user_info_setter, normalizes_unreserved_percent_encoding) {
sourcemeta::core::URI uri{"http://host/path"};
uri.userinfo("user%7Ename");
EXPECT_TRUE(uri.userinfo().has_value());
// %7E encodes ~, an unreserved character, so it must be decoded
EXPECT_EQ(uri.userinfo().value(), "user~name");
}

TEST(URI_user_info_setter, preserves_reserved_percent_encoding) {
sourcemeta::core::URI uri{"http://host/path"};
uri.userinfo("user%40name");
EXPECT_TRUE(uri.userinfo().has_value());
// %40 encodes @, a reserved gen-delim, so it must not be decoded
EXPECT_EQ(uri.userinfo().value(), "user%40name");
}

TEST(URI_user_info_setter, matches_parsed_uri_for_unreserved_percent) {
sourcemeta::core::URI built{"http://host/path"};
built.userinfo("user%7Ename");
const sourcemeta::core::URI parsed{"http://user%7Ename@host/path"};
EXPECT_EQ(built.userinfo().value(), parsed.userinfo().value());
EXPECT_EQ(built.recompose(), parsed.recompose());
}

TEST(URI_user_info_setter, percent_encodes_at_sign_on_recompose) {
sourcemeta::core::URI uri{"http://host/path"};
uri.userinfo("user@domain");
EXPECT_TRUE(uri.userinfo().has_value());
EXPECT_EQ(uri.userinfo().value(), "user@domain");
// @ is a gen-delim that terminates userinfo, so it must be encoded
// on recompose to avoid producing an ambiguous URI
EXPECT_EQ(uri.recompose(), "http://user%40domain@host/path");
}

TEST(URI_user_info_setter, recompose_roundtrip_after_at_sign_encoded) {
sourcemeta::core::URI uri{"http://host/path"};
uri.userinfo("user@domain");
const auto recomposed{uri.recompose()};
const sourcemeta::core::URI parsed{recomposed};
EXPECT_EQ(parsed.recompose(), recomposed);
EXPECT_TRUE(parsed.userinfo().has_value());
EXPECT_EQ(parsed.userinfo().value(), "user%40domain");
}
Loading