From 72cf4af4fcac58dad39e53f360ae4e167abbb12f Mon Sep 17 00:00:00 2001 From: Andreas Poehlmann Date: Sat, 31 Jan 2026 15:36:54 +0100 Subject: [PATCH 1/8] add failing test for direct subclass init --- upath/tests/test_core.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/upath/tests/test_core.py b/upath/tests/test_core.py index 9d6d9317..194bd771 100644 --- a/upath/tests/test_core.py +++ b/upath/tests/test_core.py @@ -11,6 +11,7 @@ from upath import UPath from upath.implementations.cloud import GCSPath from upath.implementations.cloud import S3Path +from upath.registry import get_upath_class from upath.types import ReadablePath from upath.types import WritablePath @@ -483,3 +484,18 @@ def test_constructor_compatible_protocol_uri(uri, protocol): def test_constructor_incompatible_protocol_uri(uri, protocol): with pytest.raises(ValueError, match=r".*incompatible with"): UPath(uri, protocol=protocol) + + +@pytest.mark.parametrize( + "uri,protocol", + [ + ("s3://bucket/folder", "gs"), + ("gs://bucket/folder", "s3"), + ("memory://folder", "s3"), + ("file:/tmp/folder", "s3"), + ], +) +def test_subclass_constructor_incompatible_protocol_uri(uri, protocol): + cls = get_upath_class(protocol) + with pytest.raises(ValueError, match=r".*incompatible with"): + cls(uri) From c7a00dae75436bffad34b0b73c08484f839a3be4 Mon Sep 17 00:00:00 2001 From: Andreas Poehlmann Date: Sat, 31 Jan 2026 15:52:46 +0100 Subject: [PATCH 2/8] adjust subclass tests: require registering --- upath/tests/test_core.py | 34 +++++++++++++++++++++++++++++----- 1 file changed, 29 insertions(+), 5 deletions(-) diff --git a/upath/tests/test_core.py b/upath/tests/test_core.py index 194bd771..dd15c569 100644 --- a/upath/tests/test_core.py +++ b/upath/tests/test_core.py @@ -12,6 +12,7 @@ from upath.implementations.cloud import GCSPath from upath.implementations.cloud import S3Path from upath.registry import get_upath_class +from upath.registry import register_implementation from upath.types import ReadablePath from upath.types import WritablePath @@ -113,12 +114,35 @@ def test_subclass(local_testdir): class MyPath(UPath): pass - with pytest.warns( - DeprecationWarning, match=r"MyPath\(...\) detected protocol '' .*" - ): - path = MyPath(local_testdir) - assert str(path) == pathlib.Path(local_testdir).as_posix() + with pytest.raises(ValueError, match=r".*incompatible with"): + MyPath(local_testdir) + + +@pytest.fixture(scope="function") +def upath_registry_snapshot(): + """Save and restore the upath registry state around a test.""" + from upath.registry import _registry + + # Save the current state of the registry's mutable mapping + saved_m = _registry._m.maps[0].copy() + try: + yield + finally: + # Restore the registry state + _registry._m.maps[0].clear() + _registry._m.maps[0].update(saved_m) + get_upath_class.cache_clear() + + +def test_subclass_registered(upath_registry_snapshot): + class MyPath(UPath): + pass + + register_implementation("memory", MyPath, clobber=True) + path = MyPath("memory:///test_path") + assert str(path) == "memory:///test_path" assert issubclass(MyPath, UPath) + assert isinstance(path, MyPath) assert isinstance(path, pathlib_abc.ReadablePath) assert isinstance(path, pathlib_abc.WritablePath) assert not isinstance(path, pathlib.Path) From b9d742bd92715cbd22776b174fe6f7508606e95d Mon Sep 17 00:00:00 2001 From: Andreas Poehlmann Date: Sat, 31 Jan 2026 18:01:02 +0100 Subject: [PATCH 3/8] raise error when instantiating subclass with incorrect protocol --- upath/core.py | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) diff --git a/upath/core.py b/upath/core.py index d4c08a46..fc656efb 100644 --- a/upath/core.py +++ b/upath/core.py @@ -38,6 +38,7 @@ from upath._protocol import compatible_protocol from upath._protocol import get_upath_protocol from upath._stat import UPathStatResult +from upath.registry import available_implementations from upath.registry import get_upath_class from upath.types import UNSET_DEFAULT from upath.types import JoinablePathLike @@ -468,26 +469,19 @@ def __new__( raise RuntimeError("UPath.__new__ expected cls to be subclass of UPath") else: - msg_protocol = repr(pth_protocol) + msg_protocol = pth_protocol if not pth_protocol: msg_protocol += " (empty string)" msg = ( - f"{cls.__name__!s}(...) detected protocol {msg_protocol!s} and" - f" returns a {upath_cls.__name__} instance that isn't a direct" - f" subclass of {cls.__name__}. This will raise an exception in" - " future universal_pathlib versions. To prevent the issue, use" - " UPath(...) to create instances of unrelated protocols or you" - f" can instead derive your subclass {cls.__name__!s}(...) from" - f" {upath_cls.__name__} or alternatively override behavior via" - f" registering the {cls.__name__} implementation with protocol" - f" {msg_protocol!s} replacing the default implementation." + f"{cls.__name__!s}(...) detected protocol {msg_protocol!s}" + f" which is incompatible with {cls.__name__}." ) - warnings.warn( - msg, - DeprecationWarning, - stacklevel=2, - ) - upath_cls = cls + if not pth_protocol or pth_protocol not in available_implementations(): + msg += ( + " Did you forget to register the subclass for this protocol" + " with upath.registry.register_implementation()?" + ) + raise _IncompatibleProtocolError(msg) return object.__new__(upath_cls) From 1e9e21fe656422543a10414c846ffc2d0a94227e Mon Sep 17 00:00:00 2001 From: Andreas Poehlmann Date: Sat, 31 Jan 2026 19:40:00 +0100 Subject: [PATCH 4/8] upath.registry: add _get_implementation_protocols --- upath/registry.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/upath/registry.py b/upath/registry.py index 9470d43e..02f5c59b 100644 --- a/upath/registry.py +++ b/upath/registry.py @@ -155,6 +155,7 @@ def __setitem__(self, item: str, value: type[upath.UPath] | str) -> None: ) if not item or item in self._m: get_upath_class.cache_clear() # type: ignore[attr-defined] + _get_implementation_protocols.cache_clear() # type: ignore[attr-defined] self._m[item] = value def __delitem__(self, __v: str) -> None: @@ -211,6 +212,25 @@ def register_implementation( _registry[protocol] = cls +@lru_cache # type: ignore[misc] +def _get_implementation_protocols(cls: type[upath.UPath]) -> list[str]: + """return protocols registered for a given UPath class without triggering imports""" + if not issubclass(cls, upath.UPath): + raise ValueError(f"{cls!r} is not a UPath subclass") + loaded = (p for p, c in _registry._m.maps[0].items() if c is cls) + known = ( + p + for p, fqn in _registry.known_implementations.items() + if fqn == f"{cls.__module__}.{cls.__name__}" + ) + eps = ( + p + for p, ep in _registry._entries.items() + if ep.module == cls.__module__ and ep.attr == cls.__name__ + ) + return list(dict.fromkeys((*loaded, *known, *eps))) + + # --- get_upath_class type overloads ------------------------------------------ if TYPE_CHECKING: # noqa: C901 From c1a19816d0ce64b923909451171e392b21fd0f9a Mon Sep 17 00:00:00 2001 From: Andreas Poehlmann Date: Sat, 31 Jan 2026 19:42:12 +0100 Subject: [PATCH 5/8] upath.core: fix protocol handling when instantiating subclasses directly --- upath/core.py | 40 ++++++++++++++++++++++++++++++++++++---- 1 file changed, 36 insertions(+), 4 deletions(-) diff --git a/upath/core.py b/upath/core.py index fc656efb..3ba9c4c7 100644 --- a/upath/core.py +++ b/upath/core.py @@ -38,6 +38,7 @@ from upath._protocol import compatible_protocol from upath._protocol import get_upath_protocol from upath._stat import UPathStatResult +from upath.registry import _get_implementation_protocols from upath.registry import available_implementations from upath.registry import get_upath_class from upath.types import UNSET_DEFAULT @@ -405,7 +406,7 @@ def _fs_factory( _protocol_dispatch: bool | None = None - def __new__( + def __new__( # noqa C901 cls, *args: JoinablePathLike, protocol: str | None = None, @@ -436,6 +437,27 @@ def __new__( if "incompatible with" in str(e): raise _IncompatibleProtocolError(str(e)) from e raise + + # subclasses should default to their own protocol + if protocol is None and cls is not UPath: + impl_protocols = _get_implementation_protocols(cls) + if not pth_protocol and impl_protocols: + pth_protocol = impl_protocols[0] + elif pth_protocol and pth_protocol not in impl_protocols: + msg_protocol = pth_protocol + if not pth_protocol: + msg_protocol = "'' (empty string)" + msg = ( + f"{cls.__name__!s}(...) detected protocol {msg_protocol!s}" + f" which is incompatible with {cls.__name__}." + ) + if not pth_protocol or pth_protocol not in available_implementations(): + msg += ( + " Did you forget to register the subclass for this protocol" + " with upath.registry.register_implementation()?" + ) + raise _IncompatibleProtocolError(msg) + # determine which UPath subclass to dispatch to upath_cls: type[UPath] | None if cls._protocol_dispatch or cls._protocol_dispatch is None: @@ -471,12 +493,17 @@ def __new__( else: msg_protocol = pth_protocol if not pth_protocol: - msg_protocol += " (empty string)" + msg_protocol = "'' (empty string)" msg = ( f"{cls.__name__!s}(...) detected protocol {msg_protocol!s}" f" which is incompatible with {cls.__name__}." ) - if not pth_protocol or pth_protocol not in available_implementations(): + if ( + # find a better way + (not pth_protocol and cls.__name__ not in ["CloudPath", "LocalPath"]) + or pth_protocol + and pth_protocol not in available_implementations() + ): msg += ( " Did you forget to register the subclass for this protocol" " with upath.registry.register_implementation()?" @@ -514,7 +541,6 @@ def __init__( Additional storage options for the path. """ - # todo: avoid duplicating this call from __new__ protocol = get_upath_protocol( args[0] if args else "", @@ -533,6 +559,12 @@ def __init__( if not compatible_protocol(protocol, *args): raise ValueError("can't combine incompatible UPath protocols") + # subclasses should default to their own protocol + if not protocol: + impl_protocols = _get_implementation_protocols(type(self)) + if impl_protocols: + protocol = impl_protocols[0] + if args: args0 = args[0] if isinstance(args0, UPath): From 773b904c836dd42e3b0059b82a8f704d21b5ab4b Mon Sep 17 00:00:00 2001 From: Andreas Poehlmann Date: Sat, 31 Jan 2026 20:20:46 +0100 Subject: [PATCH 6/8] tests: add protocol incompatibility tests --- upath/tests/test_core.py | 99 +++++++++++++++++++++++++--------- upath/tests/test_extensions.py | 31 +++++++++++ 2 files changed, 106 insertions(+), 24 deletions(-) diff --git a/upath/tests/test_core.py b/upath/tests/test_core.py index dd15c569..c1805df8 100644 --- a/upath/tests/test_core.py +++ b/upath/tests/test_core.py @@ -478,14 +478,48 @@ def test_open_a_local_upath(tmp_path, protocol): @pytest.mark.parametrize( "uri,protocol", [ + # s3 compatible protocols ("s3://bucket/folder", "s3"), - ("gs://bucket/folder", "gs"), + ("s3a://bucket/folder", "s3a"), ("bucket/folder", "s3"), + # gcs compatible + ("gs://bucket/folder", "gs"), + ("gcs://bucket/folder", "gcs"), + ("bucket/folder", "gs"), + # azure compatible + ("az://container/blob", "az"), + ("abfs://container/blob", "abfs"), + ("abfss://container/blob", "abfss"), + ("adl://container/blob", "adl"), + # memory ("memory://folder", "memory"), + ("/folder", "memory"), + # file/local ("file:/tmp/folder", "file"), ("/tmp/folder", "file"), + ("file:/tmp/folder", "local"), + ("/tmp/folder", "local"), ("/tmp/folder", ""), ("a/b/c", ""), + # http/https + ("http://example.com/path", "http"), + ("https://example.com/path", "https"), + # ftp + ("ftp://example.com/path", "ftp"), + # sftp/ssh + ("sftp://example.com/path", "sftp"), + ("ssh://example.com/path", "ssh"), + # smb + ("smb://server/share/path", "smb"), + # hdfs + ("hdfs://namenode/path", "hdfs"), + # webdav - requires base_url, skip for now + # github + ("github://owner:repo@branch/path", "github"), + # data + ("data:text/plain;base64,SGVsbG8=", "data"), + # huggingface + ("hf://datasets/user/repo/path", "hf"), ], ) def test_constructor_compatible_protocol_uri(uri, protocol): @@ -493,33 +527,50 @@ def test_constructor_compatible_protocol_uri(uri, protocol): assert p.protocol == protocol -@pytest.mark.parametrize( - "uri,protocol", - [ - ("s3://bucket/folder", "gs"), - ("gs://bucket/folder", "s3"), - ("memory://folder", "s3"), - ("file:/tmp/folder", "s3"), - ("s3://bucket/folder", ""), - ("memory://folder", ""), - ("file:/tmp/folder", ""), - ], -) +# Protocol to sample URI mapping +_PROTOCOL_URIS = { + "s3": "s3://bucket/folder", + "gs": "gs://bucket/folder", + "az": "az://container/blob", + "memory": "memory://folder", + "file": "file:/tmp/folder", + "http": "http://example.com/path", + "ftp": "ftp://example.com/path", + "sftp": "sftp://example.com/path", + "smb": "smb://server/share/path", + "hdfs": "hdfs://namenode/path", +} + +# Generate incompatible combinations: each protocol with URIs from other protocols +_INCOMPATIBLE_CASES = [ + (_PROTOCOL_URIS[uri_protocol], target_protocol) + for target_protocol in _PROTOCOL_URIS + for uri_protocol in _PROTOCOL_URIS + if target_protocol != uri_protocol +] + +# Also test explicit empty protocol with protocol-prefixed URIs +_INCOMPATIBLE_CASES.extend([(uri, "") for uri in _PROTOCOL_URIS.values()]) + + +@pytest.mark.parametrize("uri,protocol", _INCOMPATIBLE_CASES) def test_constructor_incompatible_protocol_uri(uri, protocol): - with pytest.raises(ValueError, match=r".*incompatible with"): + with pytest.raises(TypeError, match=r".*incompatible with"): UPath(uri, protocol=protocol) -@pytest.mark.parametrize( - "uri,protocol", - [ - ("s3://bucket/folder", "gs"), - ("gs://bucket/folder", "s3"), - ("memory://folder", "s3"), - ("file:/tmp/folder", "s3"), - ], -) +# Test subclass instantiation with incompatible URIs +# Use protocols that have registered implementations we can get via get_upath_class +_SUBCLASS_INCOMPATIBLE_CASES = [ + (_PROTOCOL_URIS[uri_protocol], target_protocol) + for target_protocol in _PROTOCOL_URIS + for uri_protocol in _PROTOCOL_URIS + if target_protocol != uri_protocol +] + + +@pytest.mark.parametrize("uri,protocol", _SUBCLASS_INCOMPATIBLE_CASES) def test_subclass_constructor_incompatible_protocol_uri(uri, protocol): cls = get_upath_class(protocol) - with pytest.raises(ValueError, match=r".*incompatible with"): + with pytest.raises(TypeError, match=r".*incompatible with"): cls(uri) diff --git a/upath/tests/test_extensions.py b/upath/tests/test_extensions.py index 7a3bc8c8..6876454c 100644 --- a/upath/tests/test_extensions.py +++ b/upath/tests/test_extensions.py @@ -214,3 +214,34 @@ class MyPath(UPath): a = MyPath(".", protocol="memory") assert isinstance(a, MyPath) + + +# Protocol to sample URI mapping for compatibility tests +_PROTOCOL_URIS = { + "s3": "s3://bucket/folder", + "gs": "gs://bucket/folder", + "memory": "memory://folder", + "file": "file:/tmp/folder", + "http": "http://example.com/path", + "": "/tmp/folder", +} + +# Generate incompatible combinations +_PROXY_INCOMPATIBLE_CASES = [ + (_PROTOCOL_URIS[uri_protocol], target_protocol) + for target_protocol in _PROTOCOL_URIS + for uri_protocol in _PROTOCOL_URIS + if target_protocol != uri_protocol and uri_protocol != "" +] + + +@pytest.mark.parametrize("uri,protocol", _PROXY_INCOMPATIBLE_CASES) +def test_proxy_subclass_incompatible_protocol_uri(uri, protocol): + """Test that ProxyUPath subclasses raise TypeError for incompatible protocols.""" + + class MyProxyPath(ProxyUPath): + pass + + # ProxyUPath wraps the underlying path, so it should also raise TypeError + with pytest.raises(TypeError, match=r".*incompatible with"): + MyProxyPath(uri, protocol=protocol) From 67b3544555e7b846799da509e043b74f5cc945fb Mon Sep 17 00:00:00 2001 From: Andreas Poehlmann Date: Sat, 31 Jan 2026 20:51:56 +0100 Subject: [PATCH 7/8] upath._protocol: fix incompatible protocol for partially loaded impls and fallbacks --- upath/_protocol.py | 18 ++++++++++++++++++ upath/registry.py | 3 +++ 2 files changed, 21 insertions(+) diff --git a/upath/_protocol.py b/upath/_protocol.py index 84162491..a0cea386 100644 --- a/upath/_protocol.py +++ b/upath/_protocol.py @@ -55,6 +55,24 @@ def _fsspec_protocol_equals(p0: str, p1: str) -> bool: except KeyError: raise ValueError(f"Protocol not known: {p1!r}") + if o0 == o1: + return True + + if isinstance(o0, dict): + o0 = o0.get("class") + elif isinstance(o0, type): + if o0.__module__ is not None: + o0 = o0.__module__ + "." + o0.__name__ + else: + o0 = o0.__name__ + if isinstance(o1, dict): + o1 = o1.get("class") + elif isinstance(o1, type): + if o1.__module__ is not None: + o1 = o1.__module__ + "." + o1.__name__ + else: + o1 = o1.__name__ + return o0 == o1 diff --git a/upath/registry.py b/upath/registry.py index 02f5c59b..ce30f284 100644 --- a/upath/registry.py +++ b/upath/registry.py @@ -217,6 +217,9 @@ def _get_implementation_protocols(cls: type[upath.UPath]) -> list[str]: """return protocols registered for a given UPath class without triggering imports""" if not issubclass(cls, upath.UPath): raise ValueError(f"{cls!r} is not a UPath subclass") + if cls.__module__ == "upath.implementations._experimental": + # experimental fallback implementations have no registry entry + return [cls.__name__[1:-4].lower()] loaded = (p for p, c in _registry._m.maps[0].items() if c is cls) known = ( p From c67ed16f7ceb87d450360dcf0e4d2dcca462d184 Mon Sep 17 00:00:00 2001 From: Andreas Poehlmann Date: Sat, 31 Jan 2026 21:05:03 +0100 Subject: [PATCH 8/8] upath: fix typing --- upath/_protocol.py | 4 ++-- upath/registry.py | 6 +++++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/upath/_protocol.py b/upath/_protocol.py index a0cea386..5f109087 100644 --- a/upath/_protocol.py +++ b/upath/_protocol.py @@ -61,14 +61,14 @@ def _fsspec_protocol_equals(p0: str, p1: str) -> bool: if isinstance(o0, dict): o0 = o0.get("class") elif isinstance(o0, type): - if o0.__module__ is not None: + if o0.__module__: o0 = o0.__module__ + "." + o0.__name__ else: o0 = o0.__name__ if isinstance(o1, dict): o1 = o1.get("class") elif isinstance(o1, type): - if o1.__module__ is not None: + if o1.__module__: o1 = o1.__module__ + "." + o1.__name__ else: o1 = o1.__name__ diff --git a/upath/registry.py b/upath/registry.py index ce30f284..fb88ed4b 100644 --- a/upath/registry.py +++ b/upath/registry.py @@ -220,7 +220,11 @@ def _get_implementation_protocols(cls: type[upath.UPath]) -> list[str]: if cls.__module__ == "upath.implementations._experimental": # experimental fallback implementations have no registry entry return [cls.__name__[1:-4].lower()] - loaded = (p for p, c in _registry._m.maps[0].items() if c is cls) + loaded = ( + p + for p, c in _registry._m.maps[0].items() # type: ignore[attr-defined] + if c is cls + ) known = ( p for p, fqn in _registry.known_implementations.items()