1- def resolve (relative_iri : str , base_iri : str | None = None ) -> str :
2- # TODO: implement
3- return ''
1+ """
2+ The functions 'remove_dot_segments()', 'resolve()' and 'is_character_allowed_after_relative_path_segment()' are direct ports from [relative-to-absolute-iri.js](https://github.com/rubensworks/relative-to-absolute-iri.js)
3+ """
4+
5+ def is_character_allowed_after_relative_path_segment (ch : str ) -> bool :
6+ """Return True if a character is valid after '.' or '..' in a path segment."""
7+ return not ch or ch in ('#' , '?' , '/' )
8+
49
510def remove_dot_segments (path : str ) -> str :
611 """
7- Removes dot segments from a URL path.
12+ Removes dot segments ('.' and '..') from a URL path,
13+ as described in https://www.ietf.org/rfc/rfc3986.txt (page 32).
814
9- :param path: the path to remove dot segments from.
15+ :param path: the IRI path to remove dot segments from.
1016
11- :return: a path with normalized dot segments.
17+ :return: a path with normalized dot segments, will always start with a '/' .
1218 """
19+ segment_buffers = []
20+ i = 0
21+ length = len (path )
1322
14- # RFC 3986 5.2.4 (reworked)
23+ while i < length :
24+ ch = path [i ]
1525
16- # empty path shortcut
17- if len (path ) == 0 :
18- return ''
26+ if ch == '/' :
27+ # Handle '/.' or '/..'
28+ if i + 1 < length and path [i + 1 ] == '.' :
29+ # Handle '/..'
30+ if i + 2 < length and path [i + 2 ] == '.' :
31+ next_ch = path [i + 3 ] if i + 3 < length else ''
32+ if not is_character_allowed_after_relative_path_segment (next_ch ):
33+ segment_buffers .append ([])
34+ i += 1
35+ continue
1936
20- input = path .split ('/' )
21- output = []
37+ # Go to parent directory
38+ if segment_buffers :
39+ segment_buffers .pop ()
2240
23- while len ( input ) > 0 :
24- next = input . pop ( 0 )
25- done = len ( input ) == 0
41+ # Add trailing slash segment if ends with '/..'
42+ if i + 3 >= length :
43+ segment_buffers . append ([])
2644
27- if next == '.' :
28- if done :
29- # ensure output has trailing /
30- output .append ('' )
31- continue
45+ i += 3
46+ continue
47+
48+ # Handle '/.'
49+ next_ch = path [i + 2 ] if i + 2 < length else ''
50+ if not is_character_allowed_after_relative_path_segment (next_ch ):
51+ segment_buffers .append ([])
52+ i += 1
53+ continue
54+
55+ # Add trailing slash if ends with '/.'
56+ if i + 2 >= length :
57+ segment_buffers .append ([])
3258
33- if next == '..' :
34- if len (output ) > 0 :
35- output .pop ()
36- if done :
37- # ensure output has trailing /
38- output .append ('' )
59+ # Stay in current directory — skip
60+ i += 2
61+ continue
62+
63+ # Regular '/' starts a new segment
64+ segment_buffers .append ([])
65+ i += 1
3966 continue
4067
41- output .append (next )
68+ elif ch in ('#' , '?' ):
69+ # Query or fragment → append unchanged and stop
70+ if not segment_buffers :
71+ segment_buffers .append ([])
72+ segment_buffers [- 1 ].append (path [i :])
73+
74+ # Break the while loop
75+ break
76+
77+ else :
78+ # Regular character → append to current segment
79+ if not segment_buffers :
80+ segment_buffers .append ([])
81+ segment_buffers [- 1 ].append (ch )
82+ i += 1
83+
84+ return '/' + '/' .join ('' .join (buffer ) for buffer in segment_buffers )
85+
86+
87+ def remove_dot_segments_of_path (iri : str , colon_position : int ) -> str :
88+ """
89+ Remove dot segments from the path portion of an IRI (RFC 3986 §5.2.4).
90+
91+ :param iri: an IRI (or part of IRI).
92+ :param colonPosition: the position of the first ':' in the IRI.
93+
94+ :return: the IRI where dot segments were removed.
95+ """
96+ # Determine where to start looking for the first '/' that indicates the start of the path
97+ if colon_position >= 0 :
98+ if len (iri ) > colon_position + 2 and iri [colon_position + 1 ] == '/' and iri [colon_position + 2 ] == '/' :
99+ search_offset = colon_position + 3
100+ else :
101+ search_offset = colon_position + 1
102+ else :
103+ if len (iri ) > 1 and iri [0 ] == '/' and iri [1 ] == '/' :
104+ search_offset = 2
105+ else :
106+ search_offset = 0
107+
108+ # Find the start of the path
109+ path_separator = iri .find ('/' , search_offset )
110+ if path_separator < 0 :
111+ return iri
112+
113+ base = iri [:path_separator ]
114+ path = iri [path_separator :]
115+
116+ # Remove dot segments from the path
117+ return base + remove_dot_segments (path )
118+
119+ def resolve (relative_iri : str , base_iri : str = "" ) -> str :
120+ # """
121+ # Resolves a given relative IRI to an absolute IRI.
122+
123+ # :param base_iri: the base IRI.
124+ # :param relative_iri: the relative IRI.
125+
126+ # :return: the absolute IRI.
127+ # """
128+
129+ base_fragment_pos = base_iri .find ("#" )
130+
131+ # Ignore any fragments in the base IRI
132+ if base_fragment_pos > 0 :
133+ base_iri = base_iri [:base_fragment_pos ]
134+
135+ # Convert empty value directly to base IRI
136+ if not relative_iri :
137+ if ":" not in base_iri :
138+ raise ValueError (f"Found invalid baseIRI '{ base_iri } ' for value '{ relative_iri } '" )
139+ return base_iri
140+
141+ # If the value starts with a query character, concat directly (strip existing query)
142+ if relative_iri .startswith ("?" ):
143+ base_query_pos = base_iri .find ("?" )
144+ if base_query_pos > 0 :
145+ base_iri = base_iri [:base_query_pos ]
146+ return base_iri + relative_iri
147+
148+ # If the value starts with a fragment character, concat directly
149+ if relative_iri .startswith ("#" ):
150+ return base_iri + relative_iri
151+
152+ # Ignore baseIRI if it is empty
153+ if not base_iri :
154+ relative_colon_pos = relative_iri .find (":" )
155+ if relative_colon_pos < 0 :
156+ raise ValueError (f"Found invalid relative IRI '{ relative_iri } ' for a missing baseIRI" )
157+ return remove_dot_segments_of_path (relative_iri , relative_colon_pos )
158+
159+ # Ignore baseIRI if the value is absolute
160+ value_colon_pos = relative_iri .find (":" )
161+ if value_colon_pos >= 0 :
162+ return remove_dot_segments_of_path (relative_iri , value_colon_pos )
163+
164+ # baseIRI must be absolute
165+ base_colon_pos = base_iri .find (":" )
166+ if base_colon_pos < 0 :
167+ raise ValueError (f"Found invalid baseIRI '{ base_iri } ' for value '{ relative_iri } '" )
168+
169+ base_scheme = base_iri [:base_colon_pos + 1 ]
170+
171+ # Inherit base scheme if relative starts with '//'
172+ if relative_iri .startswith ("//" ):
173+ return base_scheme + remove_dot_segments_of_path (relative_iri , value_colon_pos )
174+
175+ # Determine where the path of base starts
176+ if base_iri .find ("//" , base_colon_pos ) == base_colon_pos + 1 :
177+ base_slash_after_colon_pos = base_iri .find ("/" , base_colon_pos + 3 )
178+ if base_slash_after_colon_pos < 0 :
179+ if len (base_iri ) > base_colon_pos + 3 :
180+ return base_iri + "/" + remove_dot_segments_of_path (relative_iri , value_colon_pos )
181+ else :
182+ return base_scheme + remove_dot_segments_of_path (relative_iri , value_colon_pos )
183+ else :
184+ base_slash_after_colon_pos = base_iri .find ("/" , base_colon_pos + 1 )
185+ if base_slash_after_colon_pos < 0 :
186+ return base_scheme + remove_dot_segments_of_path (relative_iri , value_colon_pos )
187+
188+ # If relative starts with '/', append after base authority
189+ if relative_iri .startswith ("/" ):
190+ return base_iri [:base_slash_after_colon_pos ] + remove_dot_segments (relative_iri )
191+
192+ base_path = base_iri [base_slash_after_colon_pos :]
193+ last_slash = base_path .rfind ("/" )
194+
195+ # Ignore everything after last '/' in base path
196+ if last_slash >= 0 and last_slash < len (base_path ) - 1 :
197+ base_path = base_path [:last_slash + 1 ]
198+ if (relative_iri .startswith ("." ) and
199+ not relative_iri .startswith (".." ) and
200+ not relative_iri .startswith ("./" ) and
201+ len (relative_iri ) > 2 ):
202+ relative_iri = relative_iri [1 :]
42203
43- # ensure output has leading /
44- # merge path segments from section 5.2.3
45- # note that if the path includes no segments, the entire path is removed
46- if len (output ) > 0 and path .startswith ('/' ) and output [0 ] != '' :
47- output .insert (0 , '' )
48- if len (output ) == 1 and output [0 ] == '' :
49- return '/'
204+ relative_iri = base_path + relative_iri
205+ relative_iri = remove_dot_segments (relative_iri )
50206
51- return '/' . join ( output )
207+ return base_iri [: base_slash_after_colon_pos ] + relative_iri
0 commit comments