|
| 1 | +from __future__ import annotations |
| 2 | + |
| 3 | +import re |
| 4 | +from typing import ( |
| 5 | + Any, |
| 6 | + Mapping, |
| 7 | + Callable, |
| 8 | +) |
| 9 | +from urllib.parse import quote |
| 10 | + |
| 11 | +# Matches '.' or '..' where each dot is either literal or percent-encoded (%2e / %2E). |
| 12 | +_DOT_SEGMENT_RE = re.compile(r"^(?:\.|%2[eE]){1,2}$") |
| 13 | + |
| 14 | +_PLACEHOLDER_RE = re.compile(r"\{(\w+)\}") |
| 15 | + |
| 16 | + |
| 17 | +def _quote_path_segment_part(value: str) -> str: |
| 18 | + """Percent-encode `value` for use in a URI path segment. |
| 19 | +
|
| 20 | + Considers characters not in `pchar` set from RFC 3986 §3.3 to be unsafe. |
| 21 | + https://datatracker.ietf.org/doc/html/rfc3986#section-3.3 |
| 22 | + """ |
| 23 | + # quote() already treats unreserved characters (letters, digits, and -._~) |
| 24 | + # as safe, so we only need to add sub-delims, ':', and '@'. |
| 25 | + # Notably, unlike the default `safe` for quote(), / is unsafe and must be quoted. |
| 26 | + return quote(value, safe="!$&'()*+,;=:@") |
| 27 | + |
| 28 | + |
| 29 | +def _quote_query_part(value: str) -> str: |
| 30 | + """Percent-encode `value` for use in a URI query string. |
| 31 | +
|
| 32 | + Considers &, = and characters not in `query` set from RFC 3986 §3.4 to be unsafe. |
| 33 | + https://datatracker.ietf.org/doc/html/rfc3986#section-3.4 |
| 34 | + """ |
| 35 | + return quote(value, safe="!$'()*+,;:@/?") |
| 36 | + |
| 37 | + |
| 38 | +def _quote_fragment_part(value: str) -> str: |
| 39 | + """Percent-encode `value` for use in a URI fragment. |
| 40 | +
|
| 41 | + Considers characters not in `fragment` set from RFC 3986 §3.5 to be unsafe. |
| 42 | + https://datatracker.ietf.org/doc/html/rfc3986#section-3.5 |
| 43 | + """ |
| 44 | + return quote(value, safe="!$&'()*+,;=:@/?") |
| 45 | + |
| 46 | + |
| 47 | +def _interpolate( |
| 48 | + template: str, |
| 49 | + values: Mapping[str, Any], |
| 50 | + quoter: Callable[[str], str], |
| 51 | +) -> str: |
| 52 | + """Replace {name} placeholders in `template`, quoting each value with `quoter`. |
| 53 | +
|
| 54 | + Placeholder names are looked up in `values`. |
| 55 | +
|
| 56 | + Raises: |
| 57 | + KeyError: If a placeholder is not found in `values`. |
| 58 | + """ |
| 59 | + # re.split with a capturing group returns alternating |
| 60 | + # [text, name, text, name, ..., text] elements. |
| 61 | + parts = _PLACEHOLDER_RE.split(template) |
| 62 | + |
| 63 | + for i in range(1, len(parts), 2): |
| 64 | + name = parts[i] |
| 65 | + if name not in values: |
| 66 | + raise KeyError(f"a value for placeholder {{{name}}} was not provided") |
| 67 | + val = values[name] |
| 68 | + if val is None: |
| 69 | + parts[i] = "null" |
| 70 | + elif isinstance(val, bool): |
| 71 | + parts[i] = "true" if val else "false" |
| 72 | + else: |
| 73 | + parts[i] = quoter(str(values[name])) |
| 74 | + |
| 75 | + return "".join(parts) |
| 76 | + |
| 77 | + |
| 78 | +def path_template(template: str, /, **kwargs: Any) -> str: |
| 79 | + """Interpolate {name} placeholders in `template` from keyword arguments. |
| 80 | +
|
| 81 | + Args: |
| 82 | + template: The template string containing {name} placeholders. |
| 83 | + **kwargs: Keyword arguments to interpolate into the template. |
| 84 | +
|
| 85 | + Returns: |
| 86 | + The template with placeholders interpolated and percent-encoded. |
| 87 | +
|
| 88 | + Safe characters for percent-encoding are dependent on the URI component. |
| 89 | + Placeholders in path and fragment portions are percent-encoded where the `segment` |
| 90 | + and `fragment` sets from RFC 3986 respectively are considered safe. |
| 91 | + Placeholders in the query portion are percent-encoded where the `query` set from |
| 92 | + RFC 3986 §3.3 is considered safe except for = and & characters. |
| 93 | +
|
| 94 | + Raises: |
| 95 | + KeyError: If a placeholder is not found in `kwargs`. |
| 96 | + ValueError: If resulting path contains /./ or /../ segments (including percent-encoded dot-segments). |
| 97 | + """ |
| 98 | + # Split the template into path, query, and fragment portions. |
| 99 | + fragment_template: str | None = None |
| 100 | + query_template: str | None = None |
| 101 | + |
| 102 | + rest = template |
| 103 | + if "#" in rest: |
| 104 | + rest, fragment_template = rest.split("#", 1) |
| 105 | + if "?" in rest: |
| 106 | + rest, query_template = rest.split("?", 1) |
| 107 | + path_template = rest |
| 108 | + |
| 109 | + # Interpolate each portion with the appropriate quoting rules. |
| 110 | + path_result = _interpolate(path_template, kwargs, _quote_path_segment_part) |
| 111 | + |
| 112 | + # Reject dot-segments (. and ..) in the final assembled path. The check |
| 113 | + # runs after interpolation so that adjacent placeholders or a mix of static |
| 114 | + # text and placeholders that together form a dot-segment are caught. |
| 115 | + # Also reject percent-encoded dot-segments to protect against incorrectly |
| 116 | + # implemented normalization in servers/proxies. |
| 117 | + for segment in path_result.split("/"): |
| 118 | + if _DOT_SEGMENT_RE.match(segment): |
| 119 | + raise ValueError(f"Constructed path {path_result!r} contains dot-segment {segment!r} which is not allowed") |
| 120 | + |
| 121 | + result = path_result |
| 122 | + if query_template is not None: |
| 123 | + result += "?" + _interpolate(query_template, kwargs, _quote_query_part) |
| 124 | + if fragment_template is not None: |
| 125 | + result += "#" + _interpolate(fragment_template, kwargs, _quote_fragment_part) |
| 126 | + |
| 127 | + return result |
0 commit comments