From 81907af59d56c88e58b76142295cd0f728baf6fb Mon Sep 17 00:00:00 2001 From: Miel Vander Sande Date: Tue, 4 Nov 2025 13:23:16 +0100 Subject: [PATCH 1/5] Add stub files and start porting tests --- lib/pyld/iri_resolver.py | 51 +++ tests/test_iri_resolver.py | 718 +++++++++++++++++++++++++++++++++++++ 2 files changed, 769 insertions(+) create mode 100644 lib/pyld/iri_resolver.py create mode 100644 tests/test_iri_resolver.py diff --git a/lib/pyld/iri_resolver.py b/lib/pyld/iri_resolver.py new file mode 100644 index 0000000..a903026 --- /dev/null +++ b/lib/pyld/iri_resolver.py @@ -0,0 +1,51 @@ +def resolve(relative_iri: str, base_iri: str | None = None) -> str: + # TODO: implement + return '' + +def remove_dot_segments(path: str) -> str: + """ + Removes dot segments from a URL path. + + :param path: the path to remove dot segments from. + + :return: a path with normalized dot segments. + """ + + # RFC 3986 5.2.4 (reworked) + + # empty path shortcut + if len(path) == 0: + return '' + + input = path.split('/') + output = [] + + while len(input) > 0: + next = input.pop(0) + done = len(input) == 0 + + if next == '.': + if done: + # ensure output has trailing / + output.append('') + continue + + if next == '..': + if len(output) > 0: + output.pop() + if done: + # ensure output has trailing / + output.append('') + continue + + output.append(next) + + # ensure output has leading / + # merge path segments from section 5.2.3 + # note that if the path includes no segments, the entire path is removed + if len(output) > 0 and path.startswith('/') and output[0] != '': + output.insert(0, '') + if len(output) == 1 and output[0] == '': + return '/' + + return '/'.join(output) diff --git a/tests/test_iri_resolver.py b/tests/test_iri_resolver.py new file mode 100644 index 0000000..64b79d9 --- /dev/null +++ b/tests/test_iri_resolver.py @@ -0,0 +1,718 @@ +import pytest +from pyld.iri_resolver import resolve, remove_dot_segments + +# Tests ported from relative-to-absolute-iri.js: https://github.com/rubensworks/relative-to-absolute-iri.js/blob/master/test/Resolve-test.ts + +# ---------- Tests for resolve() ---------- +class TestResolve: + def test_absolute_iri_no_base(self): + assert resolve('http://example.org/') == 'http://example.org/' + + def test_absolute_iri_empty_base(self): + assert resolve('http://example.org/', '') == 'http://example.org/' + + def test_absolute_iri_with_base(self): + assert resolve('http://example.org/', 'http://base.org/') == 'http://example.org/' + + def test_empty_value_uses_base(self): + assert resolve('', 'http://base.org/') == 'http://base.org/' + + def test_relative_with_scheme_no_base(self): + assert resolve('ex:abc') == 'ex:abc' + + def test_relative_without_scheme_no_base_error(self): + with pytest.raises(ValueError, match=r"Found invalid relative IRI 'abc' for a missing baseIRI"): + resolve('abc') + + def test_relative_without_dot_segments_no_base(self): + assert resolve('http://abc/../../') == 'http://abc/' + + def test_relative_with_base(self): + assert resolve('abc', 'http://base.org/') == 'http://base.org/abc' + + def test_relative_with_fragment_base(self): + assert resolve('abc', 'http://base.org/#frag') == 'http://base.org/abc' + + def test_hash_relative(self): + assert resolve('#abc', 'http://base.org/') == 'http://base.org/#abc' + + def test_colon_in_value_ignores_base(self): + assert resolve('http:abc', 'http://base.org/') == 'http:abc' + + def test_colon_in_value_removes_dots(self): + assert resolve('http://abc/../../', 'http://base.org/') == 'http://abc/' + + def test_non_absolute_base_error(self): + with pytest.raises(ValueError, match=r"Found invalid baseIRI 'def' for value 'abc'"): + resolve('abc', 'def') + + def test_non_absolute_base_empty_value_error(self): + with pytest.raises(ValueError, match=r"Found invalid baseIRI 'def' for value ''"): + resolve('', 'def') + + def test_scheme_from_base_if_value_starts_with_slash_slash(self): + assert resolve('//abc', 'http://base.org/') == 'http://abc' + + def test_base_without_path_slash(self): + assert resolve('abc', 'http://base.org') == 'http://base.org/abc' + + def test_base_without_path_dot_segments(self): + assert resolve('abc/./', 'http://base.org') == 'http://base.org/abc/' + + def test_base_only_scheme_slash_slash(self): + assert resolve('abc', 'http://') == 'http:abc' + + def test_base_only_scheme_slash_slash_dot_segments(self): + assert resolve('abc/./', 'http://') == 'http:abc/' + + def test_base_with_char_after_colon(self): + assert resolve('abc', 'http:a') == 'http:abc' + + def test_base_with_char_after_colon_dot_segments(self): + assert resolve('abc/./', 'http:a') == 'http:abc/' + + def test_base_only_scheme(self): + assert resolve('abc', 'http:') == 'http:abc' + + def test_base_only_scheme_dot_segments(self): + assert resolve('abc/./', 'http:') == 'http:abc/' + + def test_absolute_path_ignores_base_path(self): + assert resolve('/abc/def/', 'http://base.org/123/456/') == 'http://base.org/abc/def/' + + def test_base_with_last_slash_replacement(self): + assert resolve('xyz', 'http://aa/a') == 'http://aa/xyz' + + def test_base_collapse_parent_paths(self): + assert resolve('xyz', 'http://aa/parent/parent/../../a') == 'http://aa/xyz' + + def test_base_remove_current_dir(self): + assert resolve('xyz', 'http://aa/././a') == 'http://aa/xyz' + + def test_base_dot(self): + assert resolve('.', 'http://aa/') == 'http://aa/' + + def test_base_double_dot(self): + assert resolve('..', 'http://aa/b/') == 'http://aa/' + + def test_base_double_dot_slash(self): + assert resolve('../', 'http://aa/b/') == 'http://aa/' + + def test_base_without_ending_slash_double_dot(self): + assert resolve('..', 'http://aa/b') == 'http://aa/' + + def test_base_without_ending_slash_double_dot_slash(self): + assert resolve('../', 'http://aa/b') == 'http://aa/' + + def test_base_without_ending_slash_query(self): + assert resolve('?a=b', 'http://abc/def/ghi') == 'http://abc/def/ghi?a=b' + + def test_base_without_ending_slash_dot_query(self): + assert resolve('.?a=b', 'http://abc/def/ghi') == 'http://abc/def/?a=b' + + def test_base_without_ending_slash_double_dot_query(self): + assert resolve('..?a=b', 'http://abc/def/ghi') == 'http://abc/?a=b' + + def test_base_without_ending_slash_xyz(self): + assert resolve('xyz', 'http://abc/d:f/ghi') == 'http://abc/d:f/xyz' + + def test_base_without_ending_slash_dot_xyz(self): + assert resolve('./xyz', 'http://abc/d:f/ghi') == 'http://abc/d:f/xyz' + + def test_base_without_ending_slash_double_dot_xyz(self): + assert resolve('../xyz', 'http://abc/d:f/ghi') == 'http://abc/xyz' + + def test_relative_with_colon_ignores_base(self): + assert resolve('g:h', 'file:///a/bb/ccc/d;p?q') == 'g:h' + + def test_simple_relative_with_complex_base(self): + assert resolve('g', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/g' + + def test_dot_relative_with_complex_base(self): + assert resolve('./g', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/g' + +# it('create an IRI from a g/ relative IRI and complex baseIRI', () => { +# expect(resolve('g/', 'file:///a/bb/ccc/d;p?q')) +# .toEqual(''); +# }); + + def test_slash_suffix_relative_with_complex_base(self): + assert resolve('g/', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/g/' + +# it('create an IRI from a /g relative IRI and complex baseIRI', () => { +# expect(resolve('/g', 'file:///a/bb/ccc/d;p?q')) +# .toEqual('file:///g'); +# }); + + def test_slash_prefix_relative_with_complex_base(self): + assert resolve('/g', 'file:///a/bb/ccc/d;p?q') == 'file:///g' + +# it('create an IRI from a //g relative IRI and complex baseIRI', () => { +# expect(resolve('//g', 'file:///a/bb/ccc/d;p?q')) +# .toEqual('file://g'); +# }); + + def test_double_slash_prefix_relative_with_complex_base(self): + assert resolve('//g', 'file:///a/bb/ccc/d;p?q') == 'file://g' + +# it('create an IRI from a ?y relative IRI and complex baseIRI', () => { +# expect(resolve('?y', 'file:///a/bb/ccc/d;p?q')) +# .toEqual('file:///a/bb/ccc/d;p?y'); +# }); + + def test_questionmark_prefix_relative_with_complex_base(self): + assert resolve('?y', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/d;p?y' + +# it('create an IRI from a g?y relative IRI and complex baseIRI', () => { +# expect(resolve('g?y', 'file:///a/bb/ccc/d;p?q')) +# .toEqual('file:///a/bb/ccc/g?y'); +# }); + + def test_questionmark_middle_relative_with_complex_base(self): + assert resolve('g?y', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/g?y' + +# it('create an IRI from a #s relative IRI and complex baseIRI', () => { +# expect(resolve('#s', 'file:///a/bb/ccc/d;p?q')) +# .toEqual('file:///a/bb/ccc/d;p?q#s'); +# }); + + def test_hashtag_prefix_relative_with_complex_base(self): + assert resolve('#s', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/d;p?q#s' + +# it('create an IRI from a g#s relative IRI and complex baseIRI', () => { +# expect(resolve('g#s', 'file:///a/bb/ccc/d;p?q')) +# .toEqual('file:///a/bb/ccc/g#s'); +# }); + + def test_middle_hashtag_relative_with_complex_base(self): + assert resolve('g#s', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/g#s' + +# it('create an IRI from a g?y#s relative IRI and complex baseIRI', () => { +# expect(resolve('g?y#s', 'file:///a/bb/ccc/d;p?q')) +# .toEqual('file:///a/bb/ccc/g?y#s'); +# }); + + def test_middle_questionmark_and_hashtag_relative_with_complex_base(self): + assert resolve('g?y#s', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/g?y#s' + +# it('create an IRI from a ;x relative IRI and complex baseIRI', () => { +# expect(resolve(';x', 'file:///a/bb/ccc/d;p?q')) +# .toEqual('file:///a/bb/ccc/;x'); +# }); + + def test_semicolon_prefix_relative_with_complex_base(self): + assert resolve(';x', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/;x' + +# it('create an IRI from a g;x relative IRI and complex baseIRI', () => { +# expect(resolve('g;x', 'file:///a/bb/ccc/d;p?q')) +# .toEqual('file:///a/bb/ccc/g;x'); +# }); + + def test_middle_semicolon_relative_with_complex_base(self): + assert resolve('g;x', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/g;x' + +# it('create an IRI from a g;x?y#s relative IRI and complex baseIRI', () => { +# expect(resolve('g;x?y#s', 'file:///a/bb/ccc/d;p?q')) +# .toEqual('file:///a/bb/ccc/g;x?y#s'); +# }); + + def test_semicolon_questionmark_and_hashtag_relative_with_complex_base(self): + assert resolve('g;x?y#s', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/g;x?y#s' + +# it('create an IRI from an empty relative IRI and complex baseIRI', () => { +# expect(resolve('', 'file:///a/bb/ccc/d;p?q')) +# .toEqual('file:///a/bb/ccc/d;p?q'); +# }); + + def test_empty_relative_with_complex_base(self): + assert resolve('', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/d;p?q' + +# it('create an IRI from a . relative IRI and complex baseIRI', () => { +# expect(resolve('.', 'file:///a/bb/ccc/d;p?q')) +# .toEqual('file:///a/bb/ccc/'); +# }); + + def test_dot_relative_with_complex_base(self): + assert resolve('.', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/d;p?q' + +# it('create an IRI from a ./ relative IRI and complex baseIRI', () => { +# expect(resolve('./', 'file:///a/bb/ccc/d;p?q')) +# .toEqual('file:///a/bb/ccc/'); +# }); + + def test_dot_slash_relative_with_complex_base(self): + assert resolve('./', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/' + +# it('create an IRI from a .. relative IRI and complex baseIRI', () => { +# expect(resolve('..', 'file:///a/bb/ccc/d;p?q')) +# .toEqual('file:///a/bb/'); +# }); + + def test_double_dot_relative_with_complex_base(self): + assert resolve('..', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/' + +# it('create an IRI from a ../ relative IRI and complex baseIRI', () => { +# expect(resolve('../', 'file:///a/bb/ccc/d;p?q')) +# .toEqual('file:///a/bb/'); +# }); + + def test_double_dot_slash_relative_with_complex_base(self): + assert resolve('../', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/' + +# it('create an IRI from a ../g relative IRI and complex baseIRI', () => { +# expect(resolve('../g', 'file:///a/bb/ccc/d;p?q')) +# .toEqual('file:///a/bb/g'); +# }); + + def test_double_dot_slash_g_relative_with_complex_base(self): + assert resolve('../g', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/g' + +# it('create an IRI from a ../.. relative IRI and complex baseIRI', () => { +# expect(resolve('../..', 'file:///a/bb/ccc/d;p?q')) +# .toEqual('file:///a/'); +# }); + + def test_double_dot_slash_double_dot_relative_with_complex_base(self): + assert resolve('../..', 'file:///a/bb/ccc/d;p?q') == 'file:///a/' + +# it('create an IRI from a ../../ relative IRI and complex baseIRI', () => { +# expect(resolve('../../', 'file:///a/bb/ccc/d;p?q')) +# .toEqual('file:///a/'); +# }); + + def test_2x_dot_slash_double_dot_slash_relative_with_complex_base(self): + assert resolve('../../', 'file:///a/bb/ccc/d;p?q') == 'file:///a/' + +# it('create an IRI from a ../../g relative IRI and complex baseIRI', () => { +# expect(resolve('../../g', 'file:///a/bb/ccc/d;p?q')) +# .toEqual('file:///a/g'); +# }); + + def test_2x_double_dot_slash_with_g_relative_with_complex_base(self): + assert resolve('../../g', 'file:///a/bb/ccc/d;p?q') == 'file:///a/g' + +# it('create an IRI from a ../../.. relative IRI and complex baseIRI', () => { +# expect(resolve('../../..', 'file:///a/bb/ccc/d;p?q')) +# .toEqual('file:///'); +# }); + + def test_2x_double_dot_slash_with_double_dot_relative_with_complex_base(self): + assert resolve('../../..', 'file:///a/bb/ccc/d;p?q') == 'file:///' + +# it('create an IRI from a ../../../ relative IRI and complex baseIRI', () => { +# expect(resolve('../../../', 'file:///a/bb/ccc/d;p?q')) +# .toEqual('file:///'); +# }); + + def test_3x_double_dot_slash_relative_with_complex_base(self): + assert resolve('../../../', 'file:///a/bb/ccc/d;p?q') == 'file:///' + +# it('create an IRI from a ../../../g relative IRI and complex baseIRI', () => { +# expect(resolve('../../../g', 'file:///a/bb/ccc/d;p?q')) +# .toEqual('file:///g'); +# }); + + def test_3x_double_dot_slash_with_g_relative_with_complex_base(self): + assert resolve('../../../g', 'file:///a/bb/ccc/d;p?q') == 'file:///g' + +# it('create an IRI from a ../../../../g relative IRI and complex baseIRI', () => { +# expect(resolve('../../../../g', 'file:///a/bb/ccc/d;p?q')) +# .toEqual('file:///g'); +# }); + + def test_4x_double_dot_slash_with_g_relative_with_complex_base(self): + assert resolve('../../../../g', 'file:///a/bb/ccc/d;p?q') == 'file:///g' + +# it('create an IRI from a /./g relative IRI and complex baseIRI', () => { +# expect(resolve('/./g', 'file:///a/bb/ccc/d;p?q')) +# .toEqual('file:///g'); +# }); + + def test_slash_dot_slash_g_relative_with_complex_base(self): + assert resolve('/./g', 'file:///a/bb/ccc/d;p?q') == 'file:///g' + +# it('create an IRI from a /../g relative IRI and complex baseIRI', () => { +# expect(resolve('/../g', 'file:///a/bb/ccc/d;p?q')) +# .toEqual('file:///g'); +# }); + + def test_slash_double_dot_slash_g_relative_with_complex_base(self): + assert resolve('/../g', 'file:///a/bb/ccc/d;p?q') == 'file:///g' + +# it('create an IRI from a g. relative IRI and complex baseIRI', () => { +# expect(resolve('g.', 'file:///a/bb/ccc/d;p?q')) +# .toEqual('file:///a/bb/ccc/g.'); +# }); + + def test_dot_suffix_relative_with_complex_base(self): + assert resolve('g.', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/g' + +# it('create an IRI from a .g relative IRI and complex baseIRI', () => { +# expect(resolve('.g', 'file:///a/bb/ccc/d;p?q')) +# .toEqual('file:///a/bb/ccc/.g'); +# }); + + def test_dot_prefix_relative_with_complex_base(self): + assert resolve('.g', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/.g' + +# it('create an IRI from a g.. relative IRI and complex baseIRI', () => { +# expect(resolve('g..', 'file:///a/bb/ccc/d;p?q')) +# .toEqual('file:///a/bb/ccc/g..'); +# }); + + def test_double_dot_suffix_relative_with_complex_base(self): + assert resolve('g..', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/g..' + +# it('create an IRI from a ..g relative IRI and complex baseIRI', () => { +# expect(resolve('..g', 'file:///a/bb/ccc/d;p?q')) +# .toEqual('file:///a/bb/ccc/..g'); +# }); + + def test_double_dot_prefix_relative_with_complex_base(self): + assert resolve('..g', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/..g' + +# it('create an IRI from a ./../g relative IRI and complex baseIRI', () => { +# expect(resolve('./../g', 'file:///a/bb/ccc/d;p?q')) +# .toEqual('file:///a/bb/g'); +# }); + + def test_dot_slash_double_dot_slash_g_relative_with_complex_base(self): + assert resolve('./../g', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/g' + +# it('create an IRI from a ./g/. relative IRI and complex baseIRI', () => { +# expect(resolve('./g/.', 'file:///a/bb/ccc/d;p?q')) +# .toEqual('file:///a/bb/ccc/g/'); +# }); + + def test_dot_slash_g_slash_dot_relative_with_complex_base(self): + assert resolve('./g/.', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/g/' + +# it('create an IRI from a g/./h relative IRI and complex baseIRI', () => { +# expect(resolve('g/./h', 'file:///a/bb/ccc/d;p?q')) +# .toEqual('file:///a/bb/ccc/g/h'); +# }); + + def test_g_slash_dot_slash_h_relative_with_complex_base(self): + assert resolve('g/./h', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/g/h' + +# it('create an IRI from a g/../h relative IRI and complex baseIRI', () => { +# expect(resolve('g/../h', 'file:///a/bb/ccc/d;p?q')) +# .toEqual('file:///a/bb/ccc/h'); +# }); + + def test_g_slash_double_dot_slash_h_relative_with_complex_base(self): + assert resolve('g/../h', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/h' + +# it('create an IRI from a g;x=1/./y relative IRI and complex baseIRI', () => { +# expect(resolve('g;x=1/./y', 'file:///a/bb/ccc/d;p?q')) +# .toEqual('file:///a/bb/ccc/g;x=1/y'); +# }); + + def test_g_semicolon_x_equals_1_slash_dot_slash_y_relative_with_complex_base(self): + assert resolve('g;x=1/./y', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/g;x=1/y' + +# it('create an IRI from a g;x=1/../y relative IRI and complex baseIRI', () => { +# expect(resolve('g;x=1/../y', 'file:///a/bb/ccc/d;p?q')) +# .toEqual('file:///a/bb/ccc/y'); +# }); + + def test_g_semicolon_x_equals_1_slash_double_dot_slash_y_relative_with_complex_base(self): + assert resolve('g;x=1/../y', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/y' + +# it('create an IRI from a g?y/./x relative IRI and complex baseIRI', () => { +# expect(resolve('g?y/./x', 'file:///a/bb/ccc/d;p?q')) +# .toEqual('file:///a/bb/ccc/g?y/./x'); +# }); + + def test_g_questionmark_y_slash_dot_slash_x_relative_with_complex_base(self): + assert resolve('g?y/./x', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/g?y/./x' + +# it('create an IRI from a g?y/../x relative IRI and complex baseIRI', () => { +# expect(resolve('g?y/../x', 'file:///a/bb/ccc/d;p?q')) +# .toEqual('file:///a/bb/ccc/g?y/../x'); +# }); + +# it('create an IRI from a g#s/./x relative IRI and complex baseIRI', () => { +# expect(resolve('g#s/./x', 'file:///a/bb/ccc/d;p?q')) +# .toEqual('file:///a/bb/ccc/g#s/./x'); +# }); + +# it('create an IRI from a g#s/../x relative IRI and complex baseIRI', () => { +# expect(resolve('g#s/../x', 'file:///a/bb/ccc/d;p?q')) +# .toEqual('file:///a/bb/ccc/g#s/../x'); +# }); + +# it('create an IRI from a http:g relative IRI and complex baseIRI', () => { +# expect(resolve('http:g', 'file:///a/bb/ccc/d;p?q')) +# .toEqual('http:g'); +# }); + +# it('create an IRI from a //example.org/.././useless/../../scheme-relative relative IRI and complex baseIRI', () => { +# expect(resolve('//example.org/.././useless/../../scheme-relative', +# 'http://example.com/some/deep/directory/and/file#with-a-fragment')) +# .toEqual('http://example.org/scheme-relative'); +# }); + +# it('create an IRI from a relative IRI a and baseIRI without // after scheme', () => { +# expect(resolve('a', 'tag:example')) +# .toEqual('tag:a'); +# }); + +# it('create an IRI from a relative IRI a and baseIRI without // after scheme, with one /', () => { +# expect(resolve('a', 'tag:example/foo')) +# .toEqual('tag:example/a'); +# }); + +# it('create an IRI from a relative IRI a and baseIRI without // after scheme, with two /', () => { +# expect(resolve('a', 'tag:example/foo/')) +# .toEqual('tag:example/foo/a'); +# }); + + def test_relative_a_with_base_without_double_slash_after_scheme_with_two_slash(self): + assert resolve('a', 'tag:example/foo/') == 'tag:example/foo/a' + +# it('create an IRI from a relative IRI with a ... segment and one .. and baseIRI', () => { +# expect(resolve('../.../../', 'http://example.org/a/b/c/')) +# .toEqual('http://example.org/a/b/'); +# }); + + def test_relative_with_triple_dot_segment_and_double_dot_and_base(self): + assert resolve('../.../../', 'http://example.org/a/b/c/') == 'http://example.org/a/b/' + +# it('create an IRI from a relative IRI with a ... segment and two .. and baseIRI', () => { +# expect(resolve('../.../../../', 'http://example.org/a/b/c/')) +# .toEqual('http://example.org/a/'); +# }); + + def test_relative_with_triple_dot_segment_and_2x_double_dot_and_base(self): + assert resolve('../.../../../', 'http://example.org/a/b/c/') == 'http://example.org/a/' + +# ---------- Tests for remove_dot_segments() ---------- +class TestRemoveDotSegments: + def test_no_slash(self): + assert remove_dot_segments('abc') == '/abc' + + def test_single_slash_end(self): + assert remove_dot_segments('abc/') == '/abc/' + + def test_leading_slash(self): + assert remove_dot_segments('/abc') == '/abc' + + def test_leading_and_trailing_slash(self): + assert remove_dot_segments('/abc/') == '/abc/' + + def test_dot(self): + assert remove_dot_segments('/.') == '/' + + def test_dotdot(self): + assert remove_dot_segments('/..') == '/' + + def test_parent_directory(self): + assert remove_dot_segments('/abc/..') == '/' + + def test_too_many_parents(self): + assert remove_dot_segments('/abc/../../..') == '/' + + def test_current_directory(self): + assert remove_dot_segments('/abc/.') == '/abc/' + + def test_inbetween_parent_directory(self): + assert remove_dot_segments('/abc/../def/') == '/def/' + + def test_inbetween_parent_directory_2(self): + assert remove_dot_segments('mid/content=5/../6') == '/mid/6' + + def test_inbetween_current_directory(self): + assert remove_dot_segments('/abc/./def/') == '/abc/def/' + + def test_multiple_parents(self): + assert remove_dot_segments('/abc/def/ghi/../..') == '/abc/' + + def test_multiple_currents(self): + assert remove_dot_segments('/abc/././.') == '/abc/' + +# it('should handle mixed current and parent directories', () => { +# expect(removeDotSegments('/abc/def/./ghi/../..')) +# .toEqual('/abc/'); +# }); + + def test_mixed_current_and_parent(self): + assert remove_dot_segments('/abc/def/./ghi/../..') == '/abc/' + +# it('should handle another mixed current and parent directories', () => { +# expect(removeDotSegments('/a/b/c/./../../g')) +# .toEqual('/a/g'); +# }); + + def test_another_mixed_current_and_parent(self): + assert remove_dot_segments('/a/b/c/./../../g') == '/a/g' + +# it('should not modify fragments', () => { +# expect(removeDotSegments('/abc#abcdef')) +# .toEqual('/abc#abcdef'); +# }); + + def test_not_modify_fragments(self): + assert remove_dot_segments('/abc#abcdef') == '/abc#abcdef' + +# it('should not modify paths in fragments', () => { +# expect(removeDotSegments('/abc#a/bc/def')) +# .toEqual('/abc#a/bc/def'); +# }); + + def test_not_modify_paths_in_fragments(self): + assert remove_dot_segments('/abc#abcdef') == '/abc#a/bc/def' + +# it('should not modify current paths in fragments', () => { +# expect(removeDotSegments('/abc#a/./bc/def')) +# .toEqual('/abc#a/./bc/def'); +# }); + + def test_not_modify_current_paths_in_fragments(self): + assert remove_dot_segments('/abc#a/./bc/def') == '/abc#a/./bc/def' + +# it('should not modify parent paths in fragments', () => { +# expect(removeDotSegments('/abc#a/../bc/def')) +# .toEqual('/abc#a/../bc/def'); +# }); + + def test_not_modify_parent_paths_in_fragments(self): + assert remove_dot_segments('/abc#a/../bc/def') == '/abc#a/../bc/def' + +# it('should not modify queries', () => { +# expect(removeDotSegments('/abc?abcdef')) +# .toEqual('/abc?abcdef'); +# }); + + def test_not_modify_queries(self): + assert remove_dot_segments('/abc?abcdef') == '/abc?abcdef' + +# it('should not modify paths in queries', () => { +# expect(removeDotSegments('/abc?a/bc/def')) +# .toEqual('/abc?a/bc/def'); +# }); + + def test_not_modify_paths_in_queries(self): + assert remove_dot_segments('/abc?a/bc/def') == '/abc?a/bc/def' + +# it('should not modify current paths in queries', () => { +# expect(removeDotSegments('/abc?a/./bc/def')) +# .toEqual('/abc?a/./bc/def'); +# }); + + def test_not_modify_current_paths_in_queries(self): + assert remove_dot_segments('/abc?a/./bc/def') == '/abc?a/./bc/def' + +# it('should not modify parent paths in queries', () => { +# expect(removeDotSegments('/abc?a/../bc/def')) +# .toEqual('/abc?a/../bc/def'); +# }); + + def test_not_modify_parent_paths_in_queries(self): + assert remove_dot_segments('/abc?a/../bc/def') == '/abc?a/../bc/def' + +# it('should handle mixed current and parent directories with a fragment', () => { +# expect(removeDotSegments('/abc/def/./ghi/../..#abc')) +# .toEqual('/abc#abc'); +# }); + + def test_mixed_current_and_parent_with_fragment(self): + assert remove_dot_segments('/abc/def/./ghi/../..#abc') == '/abc#abc' + +# it('should handle a fragment without another path', () => { +# expect(removeDotSegments('#abc')) +# .toEqual('/#abc'); +# }); + + def test_fragment_without_another_path(self): + assert remove_dot_segments('#abc') == '/#abc' + +# it('should not remove zero-length segments', () => { +# expect(removeDotSegments('/abc//def/')) +# .toEqual('/abc//def/'); +# }); + + def test_not_remove_zerolength_segments(self): + assert remove_dot_segments('/abc//def/') == '/abc//def/' + +# it('should be able to parent into zero-length segments', () => { +# expect(removeDotSegments('/abc//def//../')) +# .toEqual('/abc//def/'); +# }); + + def test_parent_into_zerolength_segments(self): + assert remove_dot_segments('/abc//def//../') == '/abc//def/' + +# it('should be able to current over zero-length segments', () => { +# expect(removeDotSegments('/abc//def//./')) +# .toEqual('/abc//def//'); +# }); + + def test_current_over_zerolength_segments(self): + assert remove_dot_segments('/abc//def//./') == '/abc//def//' + +# it('should resolve a query against non-/', () => { +# expect(removeDotSegments('/def/ghi?a=b')) +# .toEqual('/def/ghi?a=b'); +# }); + + def test_resolve_query_against_non_slash(self): + assert remove_dot_segments('/def/ghi?a=b') == '/def/ghi?a=b' + +# it('should resolve a query against /', () => { +# expect(removeDotSegments('/def/?a=b')) +# .toEqual('/def/?a=b'); +# }); + +# it('should resolve a .. and query', () => { +# expect(removeDotSegments('/def/..?a=b')) +# .toEqual('/?a=b'); +# }); + +# it('should just append a .g after a slash', () => { +# expect(removeDotSegments('/a/bb/ccc/.g')) +# .toEqual('/a/bb/ccc/.g'); +# }); + +# it('should just append a g. after a slash', () => { +# expect(removeDotSegments('/a/bb/ccc/g.')) +# .toEqual('/a/bb/ccc/g.'); +# }); + +# it('should just append a ..g after a slash', () => { +# expect(removeDotSegments('/a/bb/ccc/..g')) +# .toEqual('/a/bb/ccc/..g'); +# }); + +# it('should just append a g.. after a slash', () => { +# expect(removeDotSegments('/a/bb/ccc/g..')) +# .toEqual('/a/bb/ccc/g..'); +# }); + +# it('should end with a slash if there is a trailing /.', () => { +# expect(removeDotSegments('/a/bb/ccc/./g/.')) +# .toEqual('/a/bb/ccc/g/'); +# }); + +# it('should handle triple dots as a normal segment.', () => { +# expect(removeDotSegments('/invalid/...')) +# .toEqual('/invalid/...'); +# }); + +# it('should handle triple dots as a normal segment, followed by ...', () => { +# expect(removeDotSegments('/invalid/.../..')) +# .toEqual('/invalid/'); +# }); + +# it('should handle four dots as a normal segment.', () => { +# expect(removeDotSegments('/invalid/../..../../../.../.htaccess')) +# .toEqual('/.../.htaccess'); +# }); + +# it('should handle a segment with dot and an invalid char as a normal segment.', () => { +# expect(removeDotSegments('/invalid/../.a/../../.../.htaccess')) +# .toEqual('/.../.htaccess'); +# }); + +if __name__ == "__main__": + pytest.main(["-v", __file__]) From 2e3a27c204b33aa9c4cfccbeb79f170ece6d3891 Mon Sep 17 00:00:00 2001 From: Miel Vander Sande Date: Wed, 5 Nov 2025 12:59:18 +0100 Subject: [PATCH 2/5] Complete porting tests --- tests/test_iri_resolver.py | 425 +++++-------------------------------- 1 file changed, 54 insertions(+), 371 deletions(-) diff --git a/tests/test_iri_resolver.py b/tests/test_iri_resolver.py index 64b79d9..f810c4d 100644 --- a/tests/test_iri_resolver.py +++ b/tests/test_iri_resolver.py @@ -128,362 +128,147 @@ def test_relative_with_colon_ignores_base(self): def test_simple_relative_with_complex_base(self): assert resolve('g', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/g' - def test_dot_relative_with_complex_base(self): + def test_dot_slash_g_relative_with_complex_base(self): assert resolve('./g', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/g' -# it('create an IRI from a g/ relative IRI and complex baseIRI', () => { -# expect(resolve('g/', 'file:///a/bb/ccc/d;p?q')) -# .toEqual(''); -# }); - def test_slash_suffix_relative_with_complex_base(self): assert resolve('g/', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/g/' -# it('create an IRI from a /g relative IRI and complex baseIRI', () => { -# expect(resolve('/g', 'file:///a/bb/ccc/d;p?q')) -# .toEqual('file:///g'); -# }); - def test_slash_prefix_relative_with_complex_base(self): assert resolve('/g', 'file:///a/bb/ccc/d;p?q') == 'file:///g' -# it('create an IRI from a //g relative IRI and complex baseIRI', () => { -# expect(resolve('//g', 'file:///a/bb/ccc/d;p?q')) -# .toEqual('file://g'); -# }); - def test_double_slash_prefix_relative_with_complex_base(self): assert resolve('//g', 'file:///a/bb/ccc/d;p?q') == 'file://g' -# it('create an IRI from a ?y relative IRI and complex baseIRI', () => { -# expect(resolve('?y', 'file:///a/bb/ccc/d;p?q')) -# .toEqual('file:///a/bb/ccc/d;p?y'); -# }); - def test_questionmark_prefix_relative_with_complex_base(self): assert resolve('?y', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/d;p?y' -# it('create an IRI from a g?y relative IRI and complex baseIRI', () => { -# expect(resolve('g?y', 'file:///a/bb/ccc/d;p?q')) -# .toEqual('file:///a/bb/ccc/g?y'); -# }); - def test_questionmark_middle_relative_with_complex_base(self): assert resolve('g?y', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/g?y' -# it('create an IRI from a #s relative IRI and complex baseIRI', () => { -# expect(resolve('#s', 'file:///a/bb/ccc/d;p?q')) -# .toEqual('file:///a/bb/ccc/d;p?q#s'); -# }); - def test_hashtag_prefix_relative_with_complex_base(self): assert resolve('#s', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/d;p?q#s' -# it('create an IRI from a g#s relative IRI and complex baseIRI', () => { -# expect(resolve('g#s', 'file:///a/bb/ccc/d;p?q')) -# .toEqual('file:///a/bb/ccc/g#s'); -# }); - def test_middle_hashtag_relative_with_complex_base(self): assert resolve('g#s', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/g#s' -# it('create an IRI from a g?y#s relative IRI and complex baseIRI', () => { -# expect(resolve('g?y#s', 'file:///a/bb/ccc/d;p?q')) -# .toEqual('file:///a/bb/ccc/g?y#s'); -# }); - def test_middle_questionmark_and_hashtag_relative_with_complex_base(self): assert resolve('g?y#s', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/g?y#s' -# it('create an IRI from a ;x relative IRI and complex baseIRI', () => { -# expect(resolve(';x', 'file:///a/bb/ccc/d;p?q')) -# .toEqual('file:///a/bb/ccc/;x'); -# }); - def test_semicolon_prefix_relative_with_complex_base(self): assert resolve(';x', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/;x' -# it('create an IRI from a g;x relative IRI and complex baseIRI', () => { -# expect(resolve('g;x', 'file:///a/bb/ccc/d;p?q')) -# .toEqual('file:///a/bb/ccc/g;x'); -# }); - def test_middle_semicolon_relative_with_complex_base(self): assert resolve('g;x', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/g;x' -# it('create an IRI from a g;x?y#s relative IRI and complex baseIRI', () => { -# expect(resolve('g;x?y#s', 'file:///a/bb/ccc/d;p?q')) -# .toEqual('file:///a/bb/ccc/g;x?y#s'); -# }); - def test_semicolon_questionmark_and_hashtag_relative_with_complex_base(self): assert resolve('g;x?y#s', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/g;x?y#s' -# it('create an IRI from an empty relative IRI and complex baseIRI', () => { -# expect(resolve('', 'file:///a/bb/ccc/d;p?q')) -# .toEqual('file:///a/bb/ccc/d;p?q'); -# }); - def test_empty_relative_with_complex_base(self): assert resolve('', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/d;p?q' -# it('create an IRI from a . relative IRI and complex baseIRI', () => { -# expect(resolve('.', 'file:///a/bb/ccc/d;p?q')) -# .toEqual('file:///a/bb/ccc/'); -# }); - def test_dot_relative_with_complex_base(self): assert resolve('.', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/d;p?q' -# it('create an IRI from a ./ relative IRI and complex baseIRI', () => { -# expect(resolve('./', 'file:///a/bb/ccc/d;p?q')) -# .toEqual('file:///a/bb/ccc/'); -# }); - def test_dot_slash_relative_with_complex_base(self): assert resolve('./', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/' -# it('create an IRI from a .. relative IRI and complex baseIRI', () => { -# expect(resolve('..', 'file:///a/bb/ccc/d;p?q')) -# .toEqual('file:///a/bb/'); -# }); - def test_double_dot_relative_with_complex_base(self): assert resolve('..', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/' - -# it('create an IRI from a ../ relative IRI and complex baseIRI', () => { -# expect(resolve('../', 'file:///a/bb/ccc/d;p?q')) -# .toEqual('file:///a/bb/'); -# }); - + def test_double_dot_slash_relative_with_complex_base(self): assert resolve('../', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/' -# it('create an IRI from a ../g relative IRI and complex baseIRI', () => { -# expect(resolve('../g', 'file:///a/bb/ccc/d;p?q')) -# .toEqual('file:///a/bb/g'); -# }); - def test_double_dot_slash_g_relative_with_complex_base(self): assert resolve('../g', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/g' -# it('create an IRI from a ../.. relative IRI and complex baseIRI', () => { -# expect(resolve('../..', 'file:///a/bb/ccc/d;p?q')) -# .toEqual('file:///a/'); -# }); - def test_double_dot_slash_double_dot_relative_with_complex_base(self): assert resolve('../..', 'file:///a/bb/ccc/d;p?q') == 'file:///a/' -# it('create an IRI from a ../../ relative IRI and complex baseIRI', () => { -# expect(resolve('../../', 'file:///a/bb/ccc/d;p?q')) -# .toEqual('file:///a/'); -# }); - def test_2x_dot_slash_double_dot_slash_relative_with_complex_base(self): assert resolve('../../', 'file:///a/bb/ccc/d;p?q') == 'file:///a/' -# it('create an IRI from a ../../g relative IRI and complex baseIRI', () => { -# expect(resolve('../../g', 'file:///a/bb/ccc/d;p?q')) -# .toEqual('file:///a/g'); -# }); - def test_2x_double_dot_slash_with_g_relative_with_complex_base(self): assert resolve('../../g', 'file:///a/bb/ccc/d;p?q') == 'file:///a/g' -# it('create an IRI from a ../../.. relative IRI and complex baseIRI', () => { -# expect(resolve('../../..', 'file:///a/bb/ccc/d;p?q')) -# .toEqual('file:///'); -# }); - def test_2x_double_dot_slash_with_double_dot_relative_with_complex_base(self): assert resolve('../../..', 'file:///a/bb/ccc/d;p?q') == 'file:///' -# it('create an IRI from a ../../../ relative IRI and complex baseIRI', () => { -# expect(resolve('../../../', 'file:///a/bb/ccc/d;p?q')) -# .toEqual('file:///'); -# }); - def test_3x_double_dot_slash_relative_with_complex_base(self): assert resolve('../../../', 'file:///a/bb/ccc/d;p?q') == 'file:///' -# it('create an IRI from a ../../../g relative IRI and complex baseIRI', () => { -# expect(resolve('../../../g', 'file:///a/bb/ccc/d;p?q')) -# .toEqual('file:///g'); -# }); - def test_3x_double_dot_slash_with_g_relative_with_complex_base(self): assert resolve('../../../g', 'file:///a/bb/ccc/d;p?q') == 'file:///g' -# it('create an IRI from a ../../../../g relative IRI and complex baseIRI', () => { -# expect(resolve('../../../../g', 'file:///a/bb/ccc/d;p?q')) -# .toEqual('file:///g'); -# }); - def test_4x_double_dot_slash_with_g_relative_with_complex_base(self): assert resolve('../../../../g', 'file:///a/bb/ccc/d;p?q') == 'file:///g' -# it('create an IRI from a /./g relative IRI and complex baseIRI', () => { -# expect(resolve('/./g', 'file:///a/bb/ccc/d;p?q')) -# .toEqual('file:///g'); -# }); - def test_slash_dot_slash_g_relative_with_complex_base(self): assert resolve('/./g', 'file:///a/bb/ccc/d;p?q') == 'file:///g' -# it('create an IRI from a /../g relative IRI and complex baseIRI', () => { -# expect(resolve('/../g', 'file:///a/bb/ccc/d;p?q')) -# .toEqual('file:///g'); -# }); - def test_slash_double_dot_slash_g_relative_with_complex_base(self): assert resolve('/../g', 'file:///a/bb/ccc/d;p?q') == 'file:///g' -# it('create an IRI from a g. relative IRI and complex baseIRI', () => { -# expect(resolve('g.', 'file:///a/bb/ccc/d;p?q')) -# .toEqual('file:///a/bb/ccc/g.'); -# }); - def test_dot_suffix_relative_with_complex_base(self): assert resolve('g.', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/g' -# it('create an IRI from a .g relative IRI and complex baseIRI', () => { -# expect(resolve('.g', 'file:///a/bb/ccc/d;p?q')) -# .toEqual('file:///a/bb/ccc/.g'); -# }); - def test_dot_prefix_relative_with_complex_base(self): assert resolve('.g', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/.g' -# it('create an IRI from a g.. relative IRI and complex baseIRI', () => { -# expect(resolve('g..', 'file:///a/bb/ccc/d;p?q')) -# .toEqual('file:///a/bb/ccc/g..'); -# }); - def test_double_dot_suffix_relative_with_complex_base(self): assert resolve('g..', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/g..' -# it('create an IRI from a ..g relative IRI and complex baseIRI', () => { -# expect(resolve('..g', 'file:///a/bb/ccc/d;p?q')) -# .toEqual('file:///a/bb/ccc/..g'); -# }); - def test_double_dot_prefix_relative_with_complex_base(self): assert resolve('..g', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/..g' -# it('create an IRI from a ./../g relative IRI and complex baseIRI', () => { -# expect(resolve('./../g', 'file:///a/bb/ccc/d;p?q')) -# .toEqual('file:///a/bb/g'); -# }); - def test_dot_slash_double_dot_slash_g_relative_with_complex_base(self): assert resolve('./../g', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/g' -# it('create an IRI from a ./g/. relative IRI and complex baseIRI', () => { -# expect(resolve('./g/.', 'file:///a/bb/ccc/d;p?q')) -# .toEqual('file:///a/bb/ccc/g/'); -# }); - def test_dot_slash_g_slash_dot_relative_with_complex_base(self): assert resolve('./g/.', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/g/' -# it('create an IRI from a g/./h relative IRI and complex baseIRI', () => { -# expect(resolve('g/./h', 'file:///a/bb/ccc/d;p?q')) -# .toEqual('file:///a/bb/ccc/g/h'); -# }); - def test_g_slash_dot_slash_h_relative_with_complex_base(self): assert resolve('g/./h', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/g/h' -# it('create an IRI from a g/../h relative IRI and complex baseIRI', () => { -# expect(resolve('g/../h', 'file:///a/bb/ccc/d;p?q')) -# .toEqual('file:///a/bb/ccc/h'); -# }); - def test_g_slash_double_dot_slash_h_relative_with_complex_base(self): assert resolve('g/../h', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/h' -# it('create an IRI from a g;x=1/./y relative IRI and complex baseIRI', () => { -# expect(resolve('g;x=1/./y', 'file:///a/bb/ccc/d;p?q')) -# .toEqual('file:///a/bb/ccc/g;x=1/y'); -# }); - def test_g_semicolon_x_equals_1_slash_dot_slash_y_relative_with_complex_base(self): assert resolve('g;x=1/./y', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/g;x=1/y' -# it('create an IRI from a g;x=1/../y relative IRI and complex baseIRI', () => { -# expect(resolve('g;x=1/../y', 'file:///a/bb/ccc/d;p?q')) -# .toEqual('file:///a/bb/ccc/y'); -# }); - def test_g_semicolon_x_equals_1_slash_double_dot_slash_y_relative_with_complex_base(self): assert resolve('g;x=1/../y', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/y' -# it('create an IRI from a g?y/./x relative IRI and complex baseIRI', () => { -# expect(resolve('g?y/./x', 'file:///a/bb/ccc/d;p?q')) -# .toEqual('file:///a/bb/ccc/g?y/./x'); -# }); - def test_g_questionmark_y_slash_dot_slash_x_relative_with_complex_base(self): assert resolve('g?y/./x', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/g?y/./x' -# it('create an IRI from a g?y/../x relative IRI and complex baseIRI', () => { -# expect(resolve('g?y/../x', 'file:///a/bb/ccc/d;p?q')) -# .toEqual('file:///a/bb/ccc/g?y/../x'); -# }); - -# it('create an IRI from a g#s/./x relative IRI and complex baseIRI', () => { -# expect(resolve('g#s/./x', 'file:///a/bb/ccc/d;p?q')) -# .toEqual('file:///a/bb/ccc/g#s/./x'); -# }); - -# it('create an IRI from a g#s/../x relative IRI and complex baseIRI', () => { -# expect(resolve('g#s/../x', 'file:///a/bb/ccc/d;p?q')) -# .toEqual('file:///a/bb/ccc/g#s/../x'); -# }); - -# it('create an IRI from a http:g relative IRI and complex baseIRI', () => { -# expect(resolve('http:g', 'file:///a/bb/ccc/d;p?q')) -# .toEqual('http:g'); -# }); - -# it('create an IRI from a //example.org/.././useless/../../scheme-relative relative IRI and complex baseIRI', () => { -# expect(resolve('//example.org/.././useless/../../scheme-relative', -# 'http://example.com/some/deep/directory/and/file#with-a-fragment')) -# .toEqual('http://example.org/scheme-relative'); -# }); - -# it('create an IRI from a relative IRI a and baseIRI without // after scheme', () => { -# expect(resolve('a', 'tag:example')) -# .toEqual('tag:a'); -# }); - -# it('create an IRI from a relative IRI a and baseIRI without // after scheme, with one /', () => { -# expect(resolve('a', 'tag:example/foo')) -# .toEqual('tag:example/a'); -# }); - -# it('create an IRI from a relative IRI a and baseIRI without // after scheme, with two /', () => { -# expect(resolve('a', 'tag:example/foo/')) -# .toEqual('tag:example/foo/a'); -# }); + def test_g_questionmark_y_slash_double_dot_slash_x_relative_with_complex_base(self): + assert resolve('g?y/../x', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/g?y/../x' + + def test_g_hash_s_slash_dot_slash_x_relative_with_complex_base(self): + assert resolve('g#s/./x', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/g#s/./x' + + def test_g_hash_s_slash_double_dot_slash_x_relative_with_complex_base(self): + assert resolve('g#s/../x', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/g#s/../x' + + def test_http_colon_g_relative_with_complex_base(self): + assert resolve('http:g', 'file:///a/bb/ccc/d;p?q') == 'http:g' + + def test_complex_relative_with_complex_base(self): + assert resolve('//example.org/.././useless/../../scheme-relative', 'http://example.com/some/deep/directory/and/file#with-a-fragment') == 'http://example.org/scheme-relative' + + def test_relative_with_complex_base_without_double_slash_after_scheme(self): + assert resolve('a', 'tag:example') == 'tag:a' + + def test_relative_with_complex_base_without_double_slash_after_scheme_with_one_slash(self): + assert resolve('a', 'tag:example/foo') == 'tag:example/a' def test_relative_a_with_base_without_double_slash_after_scheme_with_two_slash(self): assert resolve('a', 'tag:example/foo/') == 'tag:example/foo/a' -# it('create an IRI from a relative IRI with a ... segment and one .. and baseIRI', () => { -# expect(resolve('../.../../', 'http://example.org/a/b/c/')) -# .toEqual('http://example.org/a/b/'); -# }); - def test_relative_with_triple_dot_segment_and_double_dot_and_base(self): assert resolve('../.../../', 'http://example.org/a/b/c/') == 'http://example.org/a/b/' -# it('create an IRI from a relative IRI with a ... segment and two .. and baseIRI', () => { -# expect(resolve('../.../../../', 'http://example.org/a/b/c/')) -# .toEqual('http://example.org/a/'); -# }); - def test_relative_with_triple_dot_segment_and_2x_double_dot_and_base(self): assert resolve('../.../../../', 'http://example.org/a/b/c/') == 'http://example.org/a/' @@ -531,188 +316,86 @@ def test_multiple_parents(self): def test_multiple_currents(self): assert remove_dot_segments('/abc/././.') == '/abc/' -# it('should handle mixed current and parent directories', () => { -# expect(removeDotSegments('/abc/def/./ghi/../..')) -# .toEqual('/abc/'); -# }); - def test_mixed_current_and_parent(self): assert remove_dot_segments('/abc/def/./ghi/../..') == '/abc/' -# it('should handle another mixed current and parent directories', () => { -# expect(removeDotSegments('/a/b/c/./../../g')) -# .toEqual('/a/g'); -# }); - def test_another_mixed_current_and_parent(self): assert remove_dot_segments('/a/b/c/./../../g') == '/a/g' -# it('should not modify fragments', () => { -# expect(removeDotSegments('/abc#abcdef')) -# .toEqual('/abc#abcdef'); -# }); - def test_not_modify_fragments(self): assert remove_dot_segments('/abc#abcdef') == '/abc#abcdef' -# it('should not modify paths in fragments', () => { -# expect(removeDotSegments('/abc#a/bc/def')) -# .toEqual('/abc#a/bc/def'); -# }); - def test_not_modify_paths_in_fragments(self): assert remove_dot_segments('/abc#abcdef') == '/abc#a/bc/def' -# it('should not modify current paths in fragments', () => { -# expect(removeDotSegments('/abc#a/./bc/def')) -# .toEqual('/abc#a/./bc/def'); -# }); - def test_not_modify_current_paths_in_fragments(self): assert remove_dot_segments('/abc#a/./bc/def') == '/abc#a/./bc/def' -# it('should not modify parent paths in fragments', () => { -# expect(removeDotSegments('/abc#a/../bc/def')) -# .toEqual('/abc#a/../bc/def'); -# }); - def test_not_modify_parent_paths_in_fragments(self): assert remove_dot_segments('/abc#a/../bc/def') == '/abc#a/../bc/def' -# it('should not modify queries', () => { -# expect(removeDotSegments('/abc?abcdef')) -# .toEqual('/abc?abcdef'); -# }); - def test_not_modify_queries(self): assert remove_dot_segments('/abc?abcdef') == '/abc?abcdef' -# it('should not modify paths in queries', () => { -# expect(removeDotSegments('/abc?a/bc/def')) -# .toEqual('/abc?a/bc/def'); -# }); - def test_not_modify_paths_in_queries(self): assert remove_dot_segments('/abc?a/bc/def') == '/abc?a/bc/def' -# it('should not modify current paths in queries', () => { -# expect(removeDotSegments('/abc?a/./bc/def')) -# .toEqual('/abc?a/./bc/def'); -# }); - def test_not_modify_current_paths_in_queries(self): assert remove_dot_segments('/abc?a/./bc/def') == '/abc?a/./bc/def' -# it('should not modify parent paths in queries', () => { -# expect(removeDotSegments('/abc?a/../bc/def')) -# .toEqual('/abc?a/../bc/def'); -# }); - def test_not_modify_parent_paths_in_queries(self): assert remove_dot_segments('/abc?a/../bc/def') == '/abc?a/../bc/def' -# it('should handle mixed current and parent directories with a fragment', () => { -# expect(removeDotSegments('/abc/def/./ghi/../..#abc')) -# .toEqual('/abc#abc'); -# }); - def test_mixed_current_and_parent_with_fragment(self): assert remove_dot_segments('/abc/def/./ghi/../..#abc') == '/abc#abc' -# it('should handle a fragment without another path', () => { -# expect(removeDotSegments('#abc')) -# .toEqual('/#abc'); -# }); - def test_fragment_without_another_path(self): assert remove_dot_segments('#abc') == '/#abc' -# it('should not remove zero-length segments', () => { -# expect(removeDotSegments('/abc//def/')) -# .toEqual('/abc//def/'); -# }); - def test_not_remove_zerolength_segments(self): assert remove_dot_segments('/abc//def/') == '/abc//def/' -# it('should be able to parent into zero-length segments', () => { -# expect(removeDotSegments('/abc//def//../')) -# .toEqual('/abc//def/'); -# }); - def test_parent_into_zerolength_segments(self): assert remove_dot_segments('/abc//def//../') == '/abc//def/' -# it('should be able to current over zero-length segments', () => { -# expect(removeDotSegments('/abc//def//./')) -# .toEqual('/abc//def//'); -# }); - def test_current_over_zerolength_segments(self): assert remove_dot_segments('/abc//def//./') == '/abc//def//' -# it('should resolve a query against non-/', () => { -# expect(removeDotSegments('/def/ghi?a=b')) -# .toEqual('/def/ghi?a=b'); -# }); - def test_resolve_query_against_non_slash(self): assert remove_dot_segments('/def/ghi?a=b') == '/def/ghi?a=b' -# it('should resolve a query against /', () => { -# expect(removeDotSegments('/def/?a=b')) -# .toEqual('/def/?a=b'); -# }); - -# it('should resolve a .. and query', () => { -# expect(removeDotSegments('/def/..?a=b')) -# .toEqual('/?a=b'); -# }); - -# it('should just append a .g after a slash', () => { -# expect(removeDotSegments('/a/bb/ccc/.g')) -# .toEqual('/a/bb/ccc/.g'); -# }); - -# it('should just append a g. after a slash', () => { -# expect(removeDotSegments('/a/bb/ccc/g.')) -# .toEqual('/a/bb/ccc/g.'); -# }); - -# it('should just append a ..g after a slash', () => { -# expect(removeDotSegments('/a/bb/ccc/..g')) -# .toEqual('/a/bb/ccc/..g'); -# }); - -# it('should just append a g.. after a slash', () => { -# expect(removeDotSegments('/a/bb/ccc/g..')) -# .toEqual('/a/bb/ccc/g..'); -# }); - -# it('should end with a slash if there is a trailing /.', () => { -# expect(removeDotSegments('/a/bb/ccc/./g/.')) -# .toEqual('/a/bb/ccc/g/'); -# }); - -# it('should handle triple dots as a normal segment.', () => { -# expect(removeDotSegments('/invalid/...')) -# .toEqual('/invalid/...'); -# }); - -# it('should handle triple dots as a normal segment, followed by ...', () => { -# expect(removeDotSegments('/invalid/.../..')) -# .toEqual('/invalid/'); -# }); - -# it('should handle four dots as a normal segment.', () => { -# expect(removeDotSegments('/invalid/../..../../../.../.htaccess')) -# .toEqual('/.../.htaccess'); -# }); - -# it('should handle a segment with dot and an invalid char as a normal segment.', () => { -# expect(removeDotSegments('/invalid/../.a/../../.../.htaccess')) -# .toEqual('/.../.htaccess'); -# }); + def test_resolve_query_against_slash(self): + assert remove_dot_segments('/def/?a=b') == '/def/?a=b' + + def test_resolve_double_dot_and_query(self): + assert remove_dot_segments('/def/..?a=b') == '/?a=b' + + def test_append_dot_g_after_slash(self): + assert remove_dot_segments('/a/bb/ccc/.g') == '/a/bb/ccc/.g' + + def test_append_g_dot_after_slash(self): + assert remove_dot_segments('/a/bb/ccc/g.') == '/a/bb/ccc/g.' + + def test_append_double_dot_g_after_slash(self): + assert remove_dot_segments('/a/bb/ccc/..g') == '/a/bb/ccc/..g' + + def test_append_g_double_dot_after_slash(self): + assert remove_dot_segments('/a/bb/ccc/g..') == '/a/bb/ccc/g..' + + def test_end_with_slash_if_trailing_slash_dot(self): + assert remove_dot_segments('/a/bb/ccc/./g/.') == '/a/bb/ccc/g/' + + def test_triple_dots_as_normal_segment(self): + assert remove_dot_segments('/invalid/...') == '/invalid/...' + + def test_triple_dots_as_normal_segment_followed_by_double_dots(self): + assert remove_dot_segments('/invalid/.../..') == '/invalid/' + + def test_four_dots_as_normal_segment(self): + assert remove_dot_segments('/invalid/../..../../../.../.htaccess') == '/.../.htaccess' + + def test_segment_with_dot_and_invalid_char_as_normal_segment(self): + assert remove_dot_segments('/invalid/../.a/../../.../.htaccess') == '/.../.htaccess' if __name__ == "__main__": pytest.main(["-v", __file__]) From b733e616ffb776580b28da0d2833125f82b0b247 Mon Sep 17 00:00:00 2001 From: Miel Vander Sande Date: Wed, 5 Nov 2025 15:16:59 +0100 Subject: [PATCH 3/5] Finish JS port and fix tests --- lib/pyld/iri_resolver.py | 226 +++++++++++++++++++++++++++++++------ tests/test_iri_resolver.py | 6 +- 2 files changed, 194 insertions(+), 38 deletions(-) diff --git a/lib/pyld/iri_resolver.py b/lib/pyld/iri_resolver.py index a903026..fa5722d 100644 --- a/lib/pyld/iri_resolver.py +++ b/lib/pyld/iri_resolver.py @@ -1,51 +1,207 @@ -def resolve(relative_iri: str, base_iri: str | None = None) -> str: - # TODO: implement - return '' +""" +The functions 'remove_dot_segments()', 'resolve()' and 'is_character_allowed_after_relative_path_segment()' are direct ports from [relative-to-absolute-iri.js](https://github.com/rubensworks/relative-to-absolute-iri.js) +""" + +def is_character_allowed_after_relative_path_segment(ch: str) -> bool: + """Return True if a character is valid after '.' or '..' in a path segment.""" + return not ch or ch in ('#', '?', '/') + def remove_dot_segments(path: str) -> str: """ - Removes dot segments from a URL path. + Removes dot segments ('.' and '..') from a URL path, + as described in https://www.ietf.org/rfc/rfc3986.txt (page 32). - :param path: the path to remove dot segments from. + :param path: the IRI path to remove dot segments from. - :return: a path with normalized dot segments. + :return: a path with normalized dot segments, will always start with a '/'. """ + segment_buffers = [] + i = 0 + length = len(path) - # RFC 3986 5.2.4 (reworked) + while i < length: + ch = path[i] - # empty path shortcut - if len(path) == 0: - return '' + if ch == '/': + # Handle '/.' or '/..' + if i + 1 < length and path[i + 1] == '.': + # Handle '/..' + if i + 2 < length and path[i + 2] == '.': + next_ch = path[i + 3] if i + 3 < length else '' + if not is_character_allowed_after_relative_path_segment(next_ch): + segment_buffers.append([]) + i += 1 + continue - input = path.split('/') - output = [] + # Go to parent directory + if segment_buffers: + segment_buffers.pop() - while len(input) > 0: - next = input.pop(0) - done = len(input) == 0 + # Add trailing slash segment if ends with '/..' + if i + 3 >= length: + segment_buffers.append([]) - if next == '.': - if done: - # ensure output has trailing / - output.append('') - continue + i += 3 + continue + + # Handle '/.' + next_ch = path[i + 2] if i + 2 < length else '' + if not is_character_allowed_after_relative_path_segment(next_ch): + segment_buffers.append([]) + i += 1 + continue + + # Add trailing slash if ends with '/.' + if i + 2 >= length: + segment_buffers.append([]) - if next == '..': - if len(output) > 0: - output.pop() - if done: - # ensure output has trailing / - output.append('') + # Stay in current directory โ€” skip + i += 2 + continue + + # Regular '/' starts a new segment + segment_buffers.append([]) + i += 1 continue - output.append(next) + elif ch in ('#', '?'): + # Query or fragment โ†’ append unchanged and stop + if not segment_buffers: + segment_buffers.append([]) + segment_buffers[-1].append(path[i:]) + + # Break the while loop + break + + else: + # Regular character โ†’ append to current segment + if not segment_buffers: + segment_buffers.append([]) + segment_buffers[-1].append(ch) + i += 1 + + return '/' + '/'.join(''.join(buffer) for buffer in segment_buffers) + + +def remove_dot_segments_of_path(iri: str, colon_position: int) -> str: + """ + Remove dot segments from the path portion of an IRI (RFC 3986 ยง5.2.4). + + :param iri: an IRI (or part of IRI). + :param colonPosition: the position of the first ':' in the IRI. + + :return: the IRI where dot segments were removed. + """ + # Determine where to start looking for the first '/' that indicates the start of the path + if colon_position >= 0: + if len(iri) > colon_position + 2 and iri[colon_position + 1] == '/' and iri[colon_position + 2] == '/': + search_offset = colon_position + 3 + else: + search_offset = colon_position + 1 + else: + if len(iri) > 1 and iri[0] == '/' and iri[1] == '/': + search_offset = 2 + else: + search_offset = 0 + + # Find the start of the path + path_separator = iri.find('/', search_offset) + if path_separator < 0: + return iri + + base = iri[:path_separator] + path = iri[path_separator:] + + # Remove dot segments from the path + return base + remove_dot_segments(path) + +def resolve(relative_iri: str, base_iri: str = "") -> str: + # """ + # Resolves a given relative IRI to an absolute IRI. + + # :param base_iri: the base IRI. + # :param relative_iri: the relative IRI. + + # :return: the absolute IRI. + # """ + + base_fragment_pos = base_iri.find("#") + + # Ignore any fragments in the base IRI + if base_fragment_pos > 0: + base_iri = base_iri[:base_fragment_pos] + + # Convert empty value directly to base IRI + if not relative_iri: + if ":" not in base_iri: + raise ValueError(f"Found invalid baseIRI '{base_iri}' for value '{relative_iri}'") + return base_iri + + # If the value starts with a query character, concat directly (strip existing query) + if relative_iri.startswith("?"): + base_query_pos = base_iri.find("?") + if base_query_pos > 0: + base_iri = base_iri[:base_query_pos] + return base_iri + relative_iri + + # If the value starts with a fragment character, concat directly + if relative_iri.startswith("#"): + return base_iri + relative_iri + + # Ignore baseIRI if it is empty + if not base_iri: + relative_colon_pos = relative_iri.find(":") + if relative_colon_pos < 0: + raise ValueError(f"Found invalid relative IRI '{relative_iri}' for a missing baseIRI") + return remove_dot_segments_of_path(relative_iri, relative_colon_pos) + + # Ignore baseIRI if the value is absolute + value_colon_pos = relative_iri.find(":") + if value_colon_pos >= 0: + return remove_dot_segments_of_path(relative_iri, value_colon_pos) + + # baseIRI must be absolute + base_colon_pos = base_iri.find(":") + if base_colon_pos < 0: + raise ValueError(f"Found invalid baseIRI '{base_iri}' for value '{relative_iri}'") + + base_scheme = base_iri[:base_colon_pos + 1] + + # Inherit base scheme if relative starts with '//' + if relative_iri.startswith("//"): + return base_scheme + remove_dot_segments_of_path(relative_iri, value_colon_pos) + + # Determine where the path of base starts + if base_iri.find("//", base_colon_pos) == base_colon_pos + 1: + base_slash_after_colon_pos = base_iri.find("/", base_colon_pos + 3) + if base_slash_after_colon_pos < 0: + if len(base_iri) > base_colon_pos + 3: + return base_iri + "/" + remove_dot_segments_of_path(relative_iri, value_colon_pos) + else: + return base_scheme + remove_dot_segments_of_path(relative_iri, value_colon_pos) + else: + base_slash_after_colon_pos = base_iri.find("/", base_colon_pos + 1) + if base_slash_after_colon_pos < 0: + return base_scheme + remove_dot_segments_of_path(relative_iri, value_colon_pos) + + # If relative starts with '/', append after base authority + if relative_iri.startswith("/"): + return base_iri[:base_slash_after_colon_pos] + remove_dot_segments(relative_iri) + + base_path = base_iri[base_slash_after_colon_pos:] + last_slash = base_path.rfind("/") + + # Ignore everything after last '/' in base path + if last_slash >= 0 and last_slash < len(base_path) - 1: + base_path = base_path[:last_slash + 1] + if (relative_iri.startswith(".") and + not relative_iri.startswith("..") and + not relative_iri.startswith("./") and + len(relative_iri) > 2): + relative_iri = relative_iri[1:] - # ensure output has leading / - # merge path segments from section 5.2.3 - # note that if the path includes no segments, the entire path is removed - if len(output) > 0 and path.startswith('/') and output[0] != '': - output.insert(0, '') - if len(output) == 1 and output[0] == '': - return '/' + relative_iri = base_path + relative_iri + relative_iri = remove_dot_segments(relative_iri) - return '/'.join(output) + return base_iri[:base_slash_after_colon_pos] + relative_iri \ No newline at end of file diff --git a/tests/test_iri_resolver.py b/tests/test_iri_resolver.py index f810c4d..2a95ccf 100644 --- a/tests/test_iri_resolver.py +++ b/tests/test_iri_resolver.py @@ -168,7 +168,7 @@ def test_empty_relative_with_complex_base(self): assert resolve('', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/d;p?q' def test_dot_relative_with_complex_base(self): - assert resolve('.', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/d;p?q' + assert resolve('.', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/' def test_dot_slash_relative_with_complex_base(self): assert resolve('./', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/' @@ -210,7 +210,7 @@ def test_slash_double_dot_slash_g_relative_with_complex_base(self): assert resolve('/../g', 'file:///a/bb/ccc/d;p?q') == 'file:///g' def test_dot_suffix_relative_with_complex_base(self): - assert resolve('g.', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/g' + assert resolve('g.', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/g.' def test_dot_prefix_relative_with_complex_base(self): assert resolve('.g', 'file:///a/bb/ccc/d;p?q') == 'file:///a/bb/ccc/.g' @@ -326,7 +326,7 @@ def test_not_modify_fragments(self): assert remove_dot_segments('/abc#abcdef') == '/abc#abcdef' def test_not_modify_paths_in_fragments(self): - assert remove_dot_segments('/abc#abcdef') == '/abc#a/bc/def' + assert remove_dot_segments('/abc#a/bc/def') == '/abc#a/bc/def' def test_not_modify_current_paths_in_fragments(self): assert remove_dot_segments('/abc#a/./bc/def') == '/abc#a/./bc/def' From d80fd1635ea82b38ba76662f767cc3d15173e405 Mon Sep 17 00:00:00 2001 From: Miel Vander Sande Date: Tue, 25 Nov 2025 13:27:24 +0100 Subject: [PATCH 4/5] Protect resolve function against None values --- lib/pyld/iri_resolver.py | 4 ++-- tests/test_iri_resolver.py | 3 +++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/lib/pyld/iri_resolver.py b/lib/pyld/iri_resolver.py index fa5722d..a20d0f3 100644 --- a/lib/pyld/iri_resolver.py +++ b/lib/pyld/iri_resolver.py @@ -116,7 +116,7 @@ def remove_dot_segments_of_path(iri: str, colon_position: int) -> str: # Remove dot segments from the path return base + remove_dot_segments(path) -def resolve(relative_iri: str, base_iri: str = "") -> str: +def resolve(relative_iri: str, base_iri: str = None) -> str: # """ # Resolves a given relative IRI to an absolute IRI. @@ -125,7 +125,7 @@ def resolve(relative_iri: str, base_iri: str = "") -> str: # :return: the absolute IRI. # """ - + base_iri = base_iri or '' base_fragment_pos = base_iri.find("#") # Ignore any fragments in the base IRI diff --git a/tests/test_iri_resolver.py b/tests/test_iri_resolver.py index 2a95ccf..2d3e28f 100644 --- a/tests/test_iri_resolver.py +++ b/tests/test_iri_resolver.py @@ -272,6 +272,9 @@ def test_relative_with_triple_dot_segment_and_double_dot_and_base(self): def test_relative_with_triple_dot_segment_and_2x_double_dot_and_base(self): assert resolve('../.../../../', 'http://example.org/a/b/c/') == 'http://example.org/a/' + def test_questionmark_prefix_relative_with_complex_base_with_dot(self): + assert resolve('?y','http://a/bb/ccc/./d;p?q') == 'http://a/bb/ccc/./d;p?y' + # ---------- Tests for remove_dot_segments() ---------- class TestRemoveDotSegments: def test_no_slash(self): From 65201a2af710eb6c4ed4895b8497811adcf1236b Mon Sep 17 00:00:00 2001 From: Miel Vander Sande Date: Tue, 25 Nov 2025 13:42:08 +0100 Subject: [PATCH 5/5] Adjust lib to resolve function --- lib/pyld/context_resolver.py | 8 +-- lib/pyld/documentloader/aiohttp.py | 3 +- lib/pyld/documentloader/requests.py | 4 +- lib/pyld/jsonld.py | 92 ++++------------------------- tests/runtests.py | 10 ++-- 5 files changed, 24 insertions(+), 93 deletions(-) diff --git a/lib/pyld/context_resolver.py b/lib/pyld/context_resolver.py index 6714cbb..784821a 100644 --- a/lib/pyld/context_resolver.py +++ b/lib/pyld/context_resolver.py @@ -10,7 +10,7 @@ from frozendict import frozendict from c14n.Canonicalize import canonicalize -from pyld import jsonld +from pyld import jsonld, iri_resolver from .resolved_context import ResolvedContext MAX_CONTEXT_URLS = 10 @@ -104,7 +104,7 @@ def _cache_resolved_context(self, key, resolved, tag): def _resolve_remote_context(self, active_ctx, url, base, cycles): # resolve relative URL and fetch context - url = jsonld.prepend_base(base, url) + url = iri_resolver.resolve(url, base) context, remote_doc = self._fetch_context(active_ctx, url, cycles) # update base according to remote document and resolve any relative URLs @@ -194,13 +194,13 @@ def _resolve_context_urls(self, context, base): ctx = context.get('@context') if isinstance(ctx, str): - context['@context'] = jsonld.prepend_base(base, ctx) + context['@context'] = iri_resolver.resolve(ctx, base) return if isinstance(ctx, list): for num, element in enumerate(ctx): if isinstance(element, str): - ctx[num] = jsonld.prepend_base(base, element) + ctx[num] = iri_resolver.resolve(element, base) elif isinstance(element, dict) or isinstance(element, frozendict): self. _resolve_context_urls({'@context': element}, base) return diff --git a/lib/pyld/documentloader/aiohttp.py b/lib/pyld/documentloader/aiohttp.py index 560ddc7..96f786e 100644 --- a/lib/pyld/documentloader/aiohttp.py +++ b/lib/pyld/documentloader/aiohttp.py @@ -13,6 +13,7 @@ import threading import urllib.parse as urllib_parse +from pyld import iri_resolver from pyld.jsonld import (JsonLdError, parse_link_header, LINK_HEADER_REL) @@ -114,7 +115,7 @@ async def async_loader(url, headers): linked_alternate.get('type') == 'application/ld+json' and not re.match(r'^application\/(\w*\+)?json$', content_type)): doc['contentType'] = 'application/ld+json' - doc['documentUrl'] = jsonld.prepend_base(url, linked_alternate['target']) + doc['documentUrl'] = iri_resolver.resolve(linked_alternate['target'], url) return doc except JsonLdError as e: diff --git a/lib/pyld/documentloader/requests.py b/lib/pyld/documentloader/requests.py index 77f42e1..570f6fa 100644 --- a/lib/pyld/documentloader/requests.py +++ b/lib/pyld/documentloader/requests.py @@ -9,9 +9,11 @@ .. moduleauthor:: Tim McNamara .. moduleauthor:: Olaf Conradi """ +import re import string import urllib.parse as urllib_parse +from pyld import iri_resolver from pyld.jsonld import (JsonLdError, parse_link_header, LINK_HEADER_REL) @@ -92,7 +94,7 @@ def loader(url, options={}): linked_alternate.get('type') == 'application/ld+json' and not re.match(r'^application\/(\w*\+)?json$', content_type)): doc['contentType'] = 'application/ld+json' - doc['documentUrl'] = jsonld.prepend_base(url, linked_alternate['target']) + doc['documentUrl'] = iri_resolver.resolve(linked_alternate['target'], url) return doc except JsonLdError as e: raise e diff --git a/lib/pyld/jsonld.py b/lib/pyld/jsonld.py index 5266a89..5abeceb 100644 --- a/lib/pyld/jsonld.py +++ b/lib/pyld/jsonld.py @@ -31,6 +31,7 @@ from numbers import Integral, Real from frozendict import frozendict from pyld.__about__ import (__copyright__, __license__, __version__) +from .iri_resolver import resolve def cmp(a, b): return (a > b) - (a < b) @@ -443,80 +444,6 @@ def unregister_rdf_parser(content_type): del _rdf_parsers[content_type] -def prepend_base(base, iri): - """ - Prepends a base IRI to the given relative IRI. - - :param base: the base IRI. - :param iri: the relative IRI. - - :return: the absolute IRI. - """ - # skip IRI processing - if base is None: - return iri - - # already an absolute iri - if _is_absolute_iri(iri): - return iri - - # parse IRIs - base = parse_url(base) - rel = parse_url(iri) - - # per RFC3986 5.2.2 - transform = { - 'scheme': base.scheme - } - - if rel.authority is not None: - transform['authority'] = rel.authority - transform['path'] = rel.path - transform['query'] = rel.query - else: - transform['authority'] = base.authority - - if rel.path == '': - transform['path'] = base.path - if rel.query is not None: - transform['query'] = rel.query - else: - transform['query'] = base.query - else: - if rel.path.startswith('/'): - # IRI represents an absolute path - transform['path'] = rel.path - else: - # merge paths - path = base.path - - # append relative path to the end of the last directory from - # base - path = path[0:path.rfind('/') + 1] - if (len(path) > 0 or base.authority) and not path.endswith('/'): - path += '/' - path += rel.path - - transform['path'] = path - - transform['query'] = rel.query - - if rel.path != '': - # normalize path - transform['path'] = remove_dot_segments(transform['path']) - - transform['fragment'] = rel.fragment - - # construct URL - rval = unparse_url(transform) - - # handle empty base case - if rval == '': - rval = './' - - return rval - - def remove_base(base, iri): """ Removes a base IRI from the given absolute IRI. @@ -3188,10 +3115,10 @@ def _process_context(self, active_ctx, local_ctx, options, base = ctx['@base'] if base is None: base = None - elif _is_absolute_iri(base): + elif _is_absolute_iri(base) or (_is_relative_iri(base) and active_ctx.get('@base') is None): base = base - elif _is_relative_iri(base): - base = prepend_base(active_ctx.get('@base'), base) + elif _is_relative_iri(base) and active_ctx.get('@base') is not None: + base = resolve(base, active_ctx.get('@base')) else: raise JsonLdError( 'Invalid JSON-LD syntax; the value of "@base" in a ' @@ -3295,7 +3222,7 @@ def _process_context(self, active_ctx, local_ctx, options, process = True if _is_string(key_ctx): - url = prepend_base(options['base'], key_ctx) + url = resolve(key_ctx, options['base']) if url in cycles: process = False else: @@ -4887,7 +4814,7 @@ def _compact_iri( if active_ctx['@base'] is None: return iri else: - return remove_base(prepend_base(base, active_ctx['@base']), iri) + return remove_base(resolve(active_ctx['@base'], base), iri) else: return remove_base(base, iri) @@ -5510,9 +5437,10 @@ def _expand_iri( if base and '@base' in active_ctx: # The None case preserves rval as potentially relative if active_ctx['@base'] is not None: - rval = prepend_base(prepend_base(base, active_ctx['@base']), rval) + resolved_base = active_ctx['@base'] if _is_absolute_iri(active_ctx['@base']) else resolve(active_ctx['@base'], base) + rval = resolve(rval, resolved_base) elif base: - rval = prepend_base(base, rval) + rval = resolve(rval, base) return rval @@ -6642,7 +6570,7 @@ def load_html(input, url, profile, options): # use either specified base, or document location effective_base = options.get('base', url) if effective_base: - html_base = prepend_base(effective_base, html_base[0]) + html_base = resolve(html_base[0], effective_base) options['base'] = html_base url_elements = parse_url(url) diff --git a/tests/runtests.py b/tests/runtests.py index 9d2567e..b6b9c65 100644 --- a/tests/runtests.py +++ b/tests/runtests.py @@ -20,7 +20,7 @@ from unittest import TextTestResult sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'lib')) -from pyld import jsonld +from pyld import jsonld, iri_resolver __copyright__ = 'Copyright (c) 2011-2013 Digital Bazaar, Inc.' __license__ = 'New BSD license' @@ -97,9 +97,9 @@ def main(self): # default to find known sibling test dirs test_targets = [] sibling_dirs = [ - '../json-ld-api/tests/', - '../json-ld-framing/tests/', - '../normalization/tests/', + './specifications/json-ld-api/tests/', + './specifications/json-ld-framing/tests/', + './specifications/normalization/tests/', ] for dir in sibling_dirs: if os.path.exists(dir): @@ -528,7 +528,7 @@ def load_locally(url): linked_alternate.get('type') == 'application/ld+json' and not re.match(r'^application\/(\w*\+)?json$', content_type)): doc['contentType'] = 'application/ld+json' - doc['documentUrl'] = jsonld.prepend_base(url, linked_alternate['target']) + doc['documentUrl'] = iri_resolver.resolve(linked_alternate['target'], url) global ROOT_MANIFEST_DIR if doc['documentUrl'].find(':') == -1: filename = os.path.join(ROOT_MANIFEST_DIR, doc['documentUrl'])