From edc30008689f96e095abafbe189106e722e866cb Mon Sep 17 00:00:00 2001 From: Andrew Davison Date: Thu, 13 Nov 2025 15:52:46 +0100 Subject: [PATCH 1/4] Fix #73 - Infinite recursion in validate() --- pipeline/src/base.py | 14 ++++++++++++-- pipeline/src/properties.py | 14 +++++++------- pipeline/tests/test_regressions.py | 16 ++++++++++++++++ 3 files changed, 35 insertions(+), 9 deletions(-) diff --git a/pipeline/src/base.py b/pipeline/src/base.py index 5937ea25..80e5be39 100644 --- a/pipeline/src/base.py +++ b/pipeline/src/base.py @@ -161,11 +161,21 @@ def validate(self, ignore=None): Returns a dict containing information about any validation failures. """ + return self._validate(ignore=ignore) + + def _validate(self, ignore=None, seen=None): + # this is implemented as an internal method so that the + # "seen" set, needed to avoid possible infinite recursion, + # can be hidden from the public interface. + if seen is None: + seen = set() failures = defaultdict(list) for property in self.properties: value = getattr(self, property.name, None) - for key, values in property.validate(value, ignore=ignore).items(): - failures[key] += values + if id(value) not in seen: + seen.add(id(value)) + for key, values in property.validate(value, ignore=ignore, seen=seen).items(): + failures[key] += values return failures @property diff --git a/pipeline/src/properties.py b/pipeline/src/properties.py index eca95c2a..7cb7dcd5 100644 --- a/pipeline/src/properties.py +++ b/pipeline/src/properties.py @@ -100,7 +100,7 @@ def types(self): def is_link(self) -> bool: return issubclass(self.types[0], Node) - def validate(self, value, ignore=None): + def validate(self, value, ignore=None, seen=None): """ Check whether `value` satisfies all constraints. @@ -108,6 +108,8 @@ def validate(self, value, ignore=None): value: the value to be checked ignore: an optional list of check types that should be ignored ("required", "type", "multiplicity") + seen: for internal use: contains a set with Python object ids that have + already been encountered in the validation tree. Returns a dict containing information about any validation failures. """ @@ -131,11 +133,10 @@ def validate(self, value, ignore=None): else: item_type = f"value contains {type(item)}" failures["type"].append( - f"{self.name}: Expected {', '.join(t.__name__ for t in self.types)}, " + - item_type + f"{self.name}: Expected {', '.join(t.__name__ for t in self.types)}, " + item_type ) elif isinstance(item, (Node, IRI)): - failures.update(item.validate(ignore=ignore)) + failures.update(item._validate(ignore=ignore, seen=seen)) if self.min_items: if len(value) < self.min_items and "multiplicity" not in ignore: failures["multiplicity"].append( @@ -167,11 +168,10 @@ def validate(self, value, ignore=None): else: value_type = f"value contains {type(value)}" failures["type"].append( - f"{self.name}: Expected {', '.join(t.__name__ for t in self.types)}, " + - value_type + f"{self.name}: Expected {', '.join(t.__name__ for t in self.types)}, " + value_type ) elif isinstance(value, (Node, IRI)): - failures.update(value.validate(ignore=ignore)) + failures.update(value._validate(ignore=ignore, seen=seen)) # todo: check formatting, multiline return failures diff --git a/pipeline/tests/test_regressions.py b/pipeline/tests/test_regressions.py index 595b70b2..ef208ea4 100644 --- a/pipeline/tests/test_regressions.py +++ b/pipeline/tests/test_regressions.py @@ -254,3 +254,19 @@ def test_issue0056(): assert failures["multiplicity"] == ['digital_identifier does not accept multiple values, but contains 2'] data = dataset.to_jsonld() json.dumps(data) # this should not raise an Exception + + +def test_issue0073(): + # https://github.com/openMetadataInitiative/openMINDS_Python/issues/73 + # Infinite recursion in validate() + ds1 = omcore.DatasetVersion( + short_name="ds1", + is_alternative_version_of=None + ) + ds2 = omcore.DatasetVersion( + short_name="ds2", + is_alternative_version_of=ds1 + ) + ds1.is_alternative_version_of = ds2 + + failures = ds1.validate() From e5a7298ea7762d1c79853a678be3494f568d3296 Mon Sep 17 00:00:00 2001 From: Andrew Davison Date: Fri, 14 Nov 2025 16:59:48 +0100 Subject: [PATCH 2/4] update property name to the one used in "latest" --- pipeline/tests/test_regressions.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pipeline/tests/test_regressions.py b/pipeline/tests/test_regressions.py index ef208ea4..a7e43694 100644 --- a/pipeline/tests/test_regressions.py +++ b/pipeline/tests/test_regressions.py @@ -261,12 +261,12 @@ def test_issue0073(): # Infinite recursion in validate() ds1 = omcore.DatasetVersion( short_name="ds1", - is_alternative_version_of=None + is_variant_of=None ) ds2 = omcore.DatasetVersion( short_name="ds2", - is_alternative_version_of=ds1 + is_variant_of=ds1 ) - ds1.is_alternative_version_of = ds2 + ds1.is_variant_of = ds2 failures = ds1.validate() From 97cd134a6dd730c1a64df15bc0c53a8796135a5b Mon Sep 17 00:00:00 2001 From: Andrew Davison Date: Sun, 16 Nov 2025 19:07:35 +0100 Subject: [PATCH 3/4] bug-fix: in avoiding infinite recursion, it is whether we've seen "self" and the "property" before that matters, not whether we've seen the value before. --- pipeline/src/base.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pipeline/src/base.py b/pipeline/src/base.py index 80e5be39..f9ea99e7 100644 --- a/pipeline/src/base.py +++ b/pipeline/src/base.py @@ -172,8 +172,8 @@ def _validate(self, ignore=None, seen=None): failures = defaultdict(list) for property in self.properties: value = getattr(self, property.name, None) - if id(value) not in seen: - seen.add(id(value)) + if (id(self), property.name) not in seen: + seen.add((id(self), property.name)) for key, values in property.validate(value, ignore=ignore, seen=seen).items(): failures[key] += values return failures From ac2b1e794a2fd132ad2f650787f010d6d589d69d Mon Sep 17 00:00:00 2001 From: Andrew Davison Date: Sun, 16 Nov 2025 23:03:30 +0100 Subject: [PATCH 4/4] missed a couple of needed changes --- pipeline/src/base.py | 2 +- pipeline/tests/test_instantiation.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pipeline/src/base.py b/pipeline/src/base.py index f9ea99e7..d952b2ea 100644 --- a/pipeline/src/base.py +++ b/pipeline/src/base.py @@ -308,7 +308,7 @@ def __str__(self): def to_jsonld(self): return self.value - def validate(self, ignore=None): + def _validate(self, ignore=None, seen=None): if ignore is None: ignore = [] failures = defaultdict(list) diff --git a/pipeline/tests/test_instantiation.py b/pipeline/tests/test_instantiation.py index 1b007e5d..bee2fef1 100644 --- a/pipeline/tests/test_instantiation.py +++ b/pipeline/tests/test_instantiation.py @@ -69,7 +69,7 @@ def test_IRI(): for value in valid_iris: iri = IRI(value) assert iri.value == value - failures = iri.validate() + failures = iri._validate() if value.startswith("http"): assert not failures else: