Skip to content

Commit cb880d5

Browse files
committed
Finish JS port and fix tests
1 parent f88815e commit cb880d5

File tree

2 files changed

+194
-38
lines changed

2 files changed

+194
-38
lines changed

lib/pyld/iri_resolver.py

Lines changed: 191 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1,51 +1,207 @@
1-
def resolve(relative_iri: str, base_iri: str | None = None) -> str:
2-
# TODO: implement
3-
return ''
1+
"""
2+
The functions 'remove_dot_segments()', 'resolve()' and 'is_character_allowed_after_relative_path_segment()' are direct ports from [relative-to-absolute-iri.js](https://github.com/rubensworks/relative-to-absolute-iri.js)
3+
"""
4+
5+
def is_character_allowed_after_relative_path_segment(ch: str) -> bool:
6+
"""Return True if a character is valid after '.' or '..' in a path segment."""
7+
return not ch or ch in ('#', '?', '/')
8+
49

510
def remove_dot_segments(path: str) -> str:
611
"""
7-
Removes dot segments from a URL path.
12+
Removes dot segments ('.' and '..') from a URL path,
13+
as described in https://www.ietf.org/rfc/rfc3986.txt (page 32).
814
9-
:param path: the path to remove dot segments from.
15+
:param path: the IRI path to remove dot segments from.
1016
11-
:return: a path with normalized dot segments.
17+
:return: a path with normalized dot segments, will always start with a '/'.
1218
"""
19+
segment_buffers = []
20+
i = 0
21+
length = len(path)
1322

14-
# RFC 3986 5.2.4 (reworked)
23+
while i < length:
24+
ch = path[i]
1525

16-
# empty path shortcut
17-
if len(path) == 0:
18-
return ''
26+
if ch == '/':
27+
# Handle '/.' or '/..'
28+
if i + 1 < length and path[i + 1] == '.':
29+
# Handle '/..'
30+
if i + 2 < length and path[i + 2] == '.':
31+
next_ch = path[i + 3] if i + 3 < length else ''
32+
if not is_character_allowed_after_relative_path_segment(next_ch):
33+
segment_buffers.append([])
34+
i += 1
35+
continue
1936

20-
input = path.split('/')
21-
output = []
37+
# Go to parent directory
38+
if segment_buffers:
39+
segment_buffers.pop()
2240

23-
while len(input) > 0:
24-
next = input.pop(0)
25-
done = len(input) == 0
41+
# Add trailing slash segment if ends with '/..'
42+
if i + 3 >= length:
43+
segment_buffers.append([])
2644

27-
if next == '.':
28-
if done:
29-
# ensure output has trailing /
30-
output.append('')
31-
continue
45+
i += 3
46+
continue
47+
48+
# Handle '/.'
49+
next_ch = path[i + 2] if i + 2 < length else ''
50+
if not is_character_allowed_after_relative_path_segment(next_ch):
51+
segment_buffers.append([])
52+
i += 1
53+
continue
54+
55+
# Add trailing slash if ends with '/.'
56+
if i + 2 >= length:
57+
segment_buffers.append([])
3258

33-
if next == '..':
34-
if len(output) > 0:
35-
output.pop()
36-
if done:
37-
# ensure output has trailing /
38-
output.append('')
59+
# Stay in current directory — skip
60+
i += 2
61+
continue
62+
63+
# Regular '/' starts a new segment
64+
segment_buffers.append([])
65+
i += 1
3966
continue
4067

41-
output.append(next)
68+
elif ch in ('#', '?'):
69+
# Query or fragment → append unchanged and stop
70+
if not segment_buffers:
71+
segment_buffers.append([])
72+
segment_buffers[-1].append(path[i:])
73+
74+
# Break the while loop
75+
break
76+
77+
else:
78+
# Regular character → append to current segment
79+
if not segment_buffers:
80+
segment_buffers.append([])
81+
segment_buffers[-1].append(ch)
82+
i += 1
83+
84+
return '/' + '/'.join(''.join(buffer) for buffer in segment_buffers)
85+
86+
87+
def remove_dot_segments_of_path(iri: str, colon_position: int) -> str:
88+
"""
89+
Remove dot segments from the path portion of an IRI (RFC 3986 §5.2.4).
90+
91+
:param iri: an IRI (or part of IRI).
92+
:param colonPosition: the position of the first ':' in the IRI.
93+
94+
:return: the IRI where dot segments were removed.
95+
"""
96+
# Determine where to start looking for the first '/' that indicates the start of the path
97+
if colon_position >= 0:
98+
if len(iri) > colon_position + 2 and iri[colon_position + 1] == '/' and iri[colon_position + 2] == '/':
99+
search_offset = colon_position + 3
100+
else:
101+
search_offset = colon_position + 1
102+
else:
103+
if len(iri) > 1 and iri[0] == '/' and iri[1] == '/':
104+
search_offset = 2
105+
else:
106+
search_offset = 0
107+
108+
# Find the start of the path
109+
path_separator = iri.find('/', search_offset)
110+
if path_separator < 0:
111+
return iri
112+
113+
base = iri[:path_separator]
114+
path = iri[path_separator:]
115+
116+
# Remove dot segments from the path
117+
return base + remove_dot_segments(path)
118+
119+
def resolve(relative_iri: str, base_iri: str = "") -> str:
120+
# """
121+
# Resolves a given relative IRI to an absolute IRI.
122+
123+
# :param base_iri: the base IRI.
124+
# :param relative_iri: the relative IRI.
125+
126+
# :return: the absolute IRI.
127+
# """
128+
129+
base_fragment_pos = base_iri.find("#")
130+
131+
# Ignore any fragments in the base IRI
132+
if base_fragment_pos > 0:
133+
base_iri = base_iri[:base_fragment_pos]
134+
135+
# Convert empty value directly to base IRI
136+
if not relative_iri:
137+
if ":" not in base_iri:
138+
raise ValueError(f"Found invalid baseIRI '{base_iri}' for value '{relative_iri}'")
139+
return base_iri
140+
141+
# If the value starts with a query character, concat directly (strip existing query)
142+
if relative_iri.startswith("?"):
143+
base_query_pos = base_iri.find("?")
144+
if base_query_pos > 0:
145+
base_iri = base_iri[:base_query_pos]
146+
return base_iri + relative_iri
147+
148+
# If the value starts with a fragment character, concat directly
149+
if relative_iri.startswith("#"):
150+
return base_iri + relative_iri
151+
152+
# Ignore baseIRI if it is empty
153+
if not base_iri:
154+
relative_colon_pos = relative_iri.find(":")
155+
if relative_colon_pos < 0:
156+
raise ValueError(f"Found invalid relative IRI '{relative_iri}' for a missing baseIRI")
157+
return remove_dot_segments_of_path(relative_iri, relative_colon_pos)
158+
159+
# Ignore baseIRI if the value is absolute
160+
value_colon_pos = relative_iri.find(":")
161+
if value_colon_pos >= 0:
162+
return remove_dot_segments_of_path(relative_iri, value_colon_pos)
163+
164+
# baseIRI must be absolute
165+
base_colon_pos = base_iri.find(":")
166+
if base_colon_pos < 0:
167+
raise ValueError(f"Found invalid baseIRI '{base_iri}' for value '{relative_iri}'")
168+
169+
base_scheme = base_iri[:base_colon_pos + 1]
170+
171+
# Inherit base scheme if relative starts with '//'
172+
if relative_iri.startswith("//"):
173+
return base_scheme + remove_dot_segments_of_path(relative_iri, value_colon_pos)
174+
175+
# Determine where the path of base starts
176+
if base_iri.find("//", base_colon_pos) == base_colon_pos + 1:
177+
base_slash_after_colon_pos = base_iri.find("/", base_colon_pos + 3)
178+
if base_slash_after_colon_pos < 0:
179+
if len(base_iri) > base_colon_pos + 3:
180+
return base_iri + "/" + remove_dot_segments_of_path(relative_iri, value_colon_pos)
181+
else:
182+
return base_scheme + remove_dot_segments_of_path(relative_iri, value_colon_pos)
183+
else:
184+
base_slash_after_colon_pos = base_iri.find("/", base_colon_pos + 1)
185+
if base_slash_after_colon_pos < 0:
186+
return base_scheme + remove_dot_segments_of_path(relative_iri, value_colon_pos)
187+
188+
# If relative starts with '/', append after base authority
189+
if relative_iri.startswith("/"):
190+
return base_iri[:base_slash_after_colon_pos] + remove_dot_segments(relative_iri)
191+
192+
base_path = base_iri[base_slash_after_colon_pos:]
193+
last_slash = base_path.rfind("/")
194+
195+
# Ignore everything after last '/' in base path
196+
if last_slash >= 0 and last_slash < len(base_path) - 1:
197+
base_path = base_path[:last_slash + 1]
198+
if (relative_iri.startswith(".") and
199+
not relative_iri.startswith("..") and
200+
not relative_iri.startswith("./") and
201+
len(relative_iri) > 2):
202+
relative_iri = relative_iri[1:]
42203

43-
# ensure output has leading /
44-
# merge path segments from section 5.2.3
45-
# note that if the path includes no segments, the entire path is removed
46-
if len(output) > 0 and path.startswith('/') and output[0] != '':
47-
output.insert(0, '')
48-
if len(output) == 1 and output[0] == '':
49-
return '/'
204+
relative_iri = base_path + relative_iri
205+
relative_iri = remove_dot_segments(relative_iri)
50206

51-
return '/'.join(output)
207+
return base_iri[:base_slash_after_colon_pos] + relative_iri

tests/test_iri_resolver.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,7 @@ def test_empty_relative_with_complex_base(self):
168168
assert resolve('', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/d;p?q'
169169

170170
def test_dot_relative_with_complex_base(self):
171-
assert resolve('.', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/d;p?q'
171+
assert resolve('.', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/'
172172

173173
def test_dot_slash_relative_with_complex_base(self):
174174
assert resolve('./', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/'
@@ -210,7 +210,7 @@ def test_slash_double_dot_slash_g_relative_with_complex_base(self):
210210
assert resolve('/../g', 'file:///a/bb/ccc/d;p?q') == 'file:///g'
211211

212212
def test_dot_suffix_relative_with_complex_base(self):
213-
assert resolve('g.', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/g'
213+
assert resolve('g.', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/g.'
214214

215215
def test_dot_prefix_relative_with_complex_base(self):
216216
assert resolve('.g', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/.g'
@@ -326,7 +326,7 @@ def test_not_modify_fragments(self):
326326
assert remove_dot_segments('/abc#abcdef') == '/abc#abcdef'
327327

328328
def test_not_modify_paths_in_fragments(self):
329-
assert remove_dot_segments('/abc#abcdef') == '/abc#a/bc/def'
329+
assert remove_dot_segments('/abc#a/bc/def') == '/abc#a/bc/def'
330330

331331
def test_not_modify_current_paths_in_fragments(self):
332332
assert remove_dot_segments('/abc#a/./bc/def') == '/abc#a/./bc/def'

0 commit comments

Comments
 (0)