NVIDIA · mdboom · Jan 16, 2026 · Jan 16, 2026 · Jan 16, 2026 · Jan 16, 2026
diff --git a/cuda_bindings/tests/nvml/conftest.py b/cuda_bindings/tests/nvml/conftest.py
@@ -2,6 +2,7 @@
 # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
 
 from collections import namedtuple
+from contextlib import contextmanager
 
 import pytest
 from cuda.bindings import _nvml as nvml
@@ -128,3 +129,39 @@ def pci_info(ngpus, handles):
     pci_info = [nvml.device_get_pci_info_v3(handles[i]) for i in range(ngpus)]
     assert len(pci_info) == ngpus
     return pci_info
+
+
+@contextmanager
+def unsupported_before(device: int, expected_device_arch: nvml.DeviceArch | str | None):
+    device_arch = nvml.device_get_architecture(device)
+
+    if isinstance(expected_device_arch, nvml.DeviceArch):
+        expected_device_arch_int = int(expected_device_arch)
+    elif expected_device_arch == "FERMI":
+        expected_device_arch_int = 1
+    else:
+        expected_device_arch_int = 0
+
+    if expected_device_arch is None or expected_device_arch == "HAS_INFOROM" or device_arch == nvml.DeviceArch.UNKNOWN:
+        # In this case, we don't /know/ if it will fail, but we are ok if it
+        # does or does not.
+
+        # TODO: There are APIs that are documented as supported only if the
+        # device has an InfoROM, but I couldn't find a way to detect that.  For
+        # now, they are just handled as "possibly failing".
+
+        try:
+            yield
+        except nvml.NotSupportedError:
+            pytest.skip(
+                f"Unsupported call for device architecture {nvml.DeviceArch(device_arch).name} "
+                f"on device '{nvml.device_get_name(device)}'"
+            )
+    elif int(device_arch) < expected_device_arch_int:
+        # In this case, we /know/ if will fail, and we want to assert that it does.
+        with pytest.raises(nvml.NotSupportedError):
+            yield
+        pytest.skip(f"Unsupported before {expected_device_arch.name}, got {nvml.device_get_name(device)}")
+    else:
+        # In this case, we /know/ it should work, and if it fails, the test should fail.
+        yield
diff --git a/cuda_bindings/tests/nvml/test_compute_mode.py b/cuda_bindings/tests/nvml/test_compute_mode.py
@@ -7,6 +7,8 @@
 import pytest
 from cuda.bindings import _nvml as nvml
 
+from .conftest import unsupported_before
+
 COMPUTE_MODES = [
     nvml.ComputeMode.COMPUTEMODE_DEFAULT,
     nvml.ComputeMode.COMPUTEMODE_PROHIBITED,
@@ -16,18 +18,11 @@
 
 @pytest.mark.skipif(sys.platform == "win32", reason="Test not supported on Windows")
 def test_compute_mode_supported_nonroot(all_devices):
-    skip_reasons = set()
     for device in all_devices:
-        try:
+        with unsupported_before(device, None):
             original_compute_mode = nvml.device_get_compute_mode(device)
-        except nvml.NotSupportedError:
-            skip_reasons.add(f"nvmlDeviceGetComputeMode not supported for device {device}")
-            continue
 
         for cm in COMPUTE_MODES:
             with pytest.raises(nvml.NoPermissionError):
                 nvml.device_set_compute_mode(device, cm)
             assert original_compute_mode == nvml.device_get_compute_mode(device), "Compute mode shouldn't have changed"
-
-    if skip_reasons:
-        pytest.skip(" ; ".join(skip_reasons))
diff --git a/cuda_bindings/tests/nvml/test_gpu.py b/cuda_bindings/tests/nvml/test_gpu.py
@@ -5,6 +5,7 @@
 from cuda.bindings import _nvml as nvml
 
 from . import util
+from .conftest import unsupported_before
 
 
 def test_gpu_get_module_id(nvml_init):
@@ -23,23 +24,14 @@ def test_gpu_get_module_id(nvml_init):
 
 
 def test_gpu_get_platform_info(all_devices):
-    skip_reasons = set()
     for device in all_devices:
         if util.is_vgpu(device):
-            skip_reasons.add(f"Not supported on vGPU device {device}")
-            continue
+            pytest.skip(f"Not supported on vGPU device {device}")
 
-        # TODO
-        # if device.feature_dict.board.chip < board_class.Architecture.Blackwell:
-        #     test_utils.skip_test("Not supported on chip before Blackwell")
+        # Documentation says Blackwell or newer only, but this does seem to pass
+        # on some newer GPUs.
 
-        try:
+        with unsupported_before(device, None):
             platform_info = nvml.device_get_platform_info(device)
-        except nvml.NotSupportedError:
-            skip_reasons.add(f"Not supported returned, linkely NVLink is disable for {device}")
-            continue
 
         assert isinstance(platform_info, nvml.PlatformInfo_v2)
-
-    if skip_reasons:
-        pytest.skip(" ; ".join(skip_reasons))
diff --git a/cuda_bindings/tests/nvml/test_pynvml.py b/cuda_bindings/tests/nvml/test_pynvml.py
@@ -10,6 +10,7 @@
 from cuda.bindings import _nvml as nvml
 
 from . import util
+from .conftest import unsupported_before
 
 XFAIL_LEGACY_NVLINK_MSG = "Legacy NVLink test expected to fail."
 
@@ -66,7 +67,8 @@ def test_device_get_handle_by_pci_bus_id(ngpus, pci_info):
 def test_device_get_memory_affinity(handles, scope):
     size = 1024
     for handle in handles:
-        node_set = nvml.device_get_memory_affinity(handle, size, scope)
+        with unsupported_before(handle, nvml.DeviceArch.KEPLER):
+            node_set = nvml.device_get_memory_affinity(handle, size, scope)
         assert node_set is not None
         assert len(node_set) == size
 
@@ -76,7 +78,8 @@ def test_device_get_memory_affinity(handles, scope):
 def test_device_get_cpu_affinity_within_scope(handles, scope):
     size = 1024
     for handle in handles:
-        cpu_set = nvml.device_get_cpu_affinity_within_scope(handle, size, scope)
+        with unsupported_before(handle, nvml.DeviceArch.KEPLER):
+            cpu_set = nvml.device_get_cpu_affinity_within_scope(handle, size, scope)
         assert cpu_set is not None
         assert len(cpu_set) == size
 
@@ -136,22 +139,22 @@ def test_device_get_p2p_status(handles, index):
 
 def test_device_get_power_usage(ngpus, handles):
     for i in range(ngpus):
-        try:
+        # Note: documentation says this is supported on Fermi or newer,
+        # but in practice it fails on some later architectures.
+        with unsupported_before(handles[i], None):
             power_mwatts = nvml.device_get_power_usage(handles[i])
-        except nvml.NotSupportedError:
-            pytest.skip("device_get_power_usage not supported")
         assert power_mwatts >= 0.0
 
 
 def test_device_get_total_energy_consumption(ngpus, handles):
     for i in range(ngpus):
-        try:
+        with unsupported_before(handles[i], nvml.DeviceArch.VOLTA):
             energy_mjoules1 = nvml.device_get_total_energy_consumption(handles[i])
-        except nvml.NotSupportedError:
-            pytest.skip("device_get_total_energy_consumption not supported")
+
         for j in range(10):  # idle for 150 ms
             time.sleep(0.015)  # and check for increase every 15 ms
-            energy_mjoules2 = nvml.device_get_total_energy_consumption(handles[i])
+            with unsupported_before(handles[i], nvml.DeviceArch.VOLTA):
+                energy_mjoules2 = nvml.device_get_total_energy_consumption(handles[i])
             assert energy_mjoules2 >= energy_mjoules1
             if energy_mjoules2 > energy_mjoules1:
                 break
@@ -182,7 +185,8 @@ def test_device_get_memory_info(ngpus, handles):
 
 def test_device_get_utilization_rates(ngpus, handles):
     for i in range(ngpus):
-        urate = nvml.device_get_utilization_rates(handles[i])
+        with unsupported_before(handles[i], "FERMI"):
+            urate = nvml.device_get_utilization_rates(handles[i])
         assert urate.gpu >= 0
         assert urate.memory >= 0
 
@@ -239,7 +243,8 @@ def test_device_get_utilization_rates(ngpus, handles):
 
 def test_device_get_pcie_throughput(ngpus, handles):
     for i in range(ngpus):
-        tx_bytes_tp = nvml.device_get_pcie_throughput(handles[i], nvml.PcieUtilCounter.PCIE_UTIL_TX_BYTES)
+        with unsupported_before(handles[i], nvml.DeviceArch.MAXWELL):
+            tx_bytes_tp = nvml.device_get_pcie_throughput(handles[i], nvml.PcieUtilCounter.PCIE_UTIL_TX_BYTES)
         assert tx_bytes_tp >= 0
         rx_bytes_tp = nvml.device_get_pcie_throughput(handles[i], nvml.PcieUtilCounter.PCIE_UTIL_RX_BYTES)
         assert rx_bytes_tp >= 0
@@ -271,10 +276,10 @@ def test_device_get_pcie_throughput(ngpus, handles):
 def test_device_get_nvlink_capability(ngpus, handles, cap_type):
     for i in range(ngpus):
         for j in range(nvml.NVLINK_MAX_LINKS):
-            try:
+            # By the documentation, this should be supported on PASCAL or newer,
+            # but this also seems to fail on newer.
+            with unsupported_before(handles[i], None):
                 cap = nvml.device_get_nvlink_capability(handles[i], j, cap_type)
-            except nvml.NotSupportedError:
-                pytest.skip("NVLink capability not supported")
             assert cap >= 0
 
 

diff --git a/cuda_core/cuda/core/system/_device.pyx b/cuda_core/cuda/core/system/_device.pyx
@@ -21,6 +21,7 @@ ClocksEventReasons = nvml.ClocksEventReasons
 ClockType = nvml.ClockType
 CoolerControl = nvml.CoolerControl
 CoolerTarget = nvml.CoolerTarget
+DeviceArch = nvml.DeviceArch
 EventType = nvml.EventType
 FanControlPolicy = nvml.FanControlPolicy
 FieldId = nvml.FieldId
@@ -45,41 +46,6 @@ include "_performance.pxi"
 include "_temperature.pxi"
 
 
-class DeviceArchitecture:
-    """
-    Device architecture enumeration.
-    """
-
-    def __init__(self, architecture: int):
-        try:
-            self._architecture = nvml.DeviceArch(architecture)
-        except ValueError:
-            self._architecture = None
-
-    @property
-    def id(self) -> int:
-        """
-        The numeric id of the device architecture.
-
-        Returns -1 if the device is unknown.
-        """
-        if self._architecture is None:
-            return -1
-        return int(self._architecture)
-
-    @property
-    def name(self) -> str:
-        """
-        The name of the device architecture.
-
-        Returns "Unlisted" if the device is unknown.
-        """
-        if self._architecture is None:
-            return "Unlisted"
-        name = self._architecture.name
-        return name[name.rfind("_") + 1 :].title()
-
-
 cdef class MemoryInfo:
     """
     Memory allocation information for a device.
@@ -952,16 +918,15 @@ cdef class Device:
         return [Pstates(x) for x in nvml.device_get_supported_performance_states(self._handle)]
 
     @property
-    def architecture(self) -> DeviceArchitecture:
+    def arch(self) -> DeviceArch:
         """
-        Device architecture. For example, a Tesla V100 will report
-        ``DeviceArchitecture.name == "Volta"``, and RTX A6000 will report
-        ``DeviceArchitecture.name == "Ampere"``. If the device returns an
-        architecture that is unknown to NVML then ``DeviceArchitecture.name ==
-        "Unknown"`` is reported, whereas an architecture that is unknown to
-        cuda.core.system is reported as ``DeviceArchitecture.name == "Unlisted"``.
+        Device architecture.
+
+        For example, a Tesla V100 will report ``DeviceArchitecture.name ==
+        "VOLTA"``, and RTX A6000 will report ``DeviceArchitecture.name ==
+        "AMPERE"``.
         """
-        return DeviceArchitecture(nvml.device_get_architecture(self._handle))
+        return DeviceArch(nvml.device_get_architecture(self._handle))
 
     @property
     def bar1_memory_info(self) -> BAR1MemoryInfo:
@@ -1027,6 +992,8 @@ cdef class Device:
         """
         Retrieves the globally unique board serial number associated with this
         device's board.
+
+        For all products with an InfoROM.
         """
         return nvml.device_get_serial(self._handle)
 
@@ -1268,6 +1235,8 @@ cdef class Device:
         """
         Get the addressing mode of the device.
 
+        For Turing &tm; or newer fully supported devices.
+
         Addressing modes can be one of:
 
         - :attr:`AddressingMode.DEVICE_ADDRESSING_MODE_HMM`: System allocated
@@ -1486,7 +1455,7 @@ __all__ = [
     "CoolerInfo",
     "CoolerTarget",
     "Device",
-    "DeviceArchitecture",
+    "DeviceArch",
     "DeviceAttributes",
     "DeviceEvents",
     "EventData",

diff --git a/cuda_core/docs/source/api.rst b/cuda_core/docs/source/api.rst
@@ -103,7 +103,7 @@ CUDA system information and NVIDIA Management Library (NVML)
    system.CoolerControl
    system.CoolerInfo
    system.CoolerTarget
-   system.DeviceArchitecture
+   system.DeviceArch
    system.DeviceAttributes
    system.DeviceEvents
    system.EventData

diff --git a/cuda_core/tests/system/conftest.py b/cuda_core/tests/system/conftest.py
@@ -3,9 +3,48 @@
 # SPDX-License-Identifier: Apache-2.0
 
 
+from contextlib import contextmanager
+
 import pytest
 from cuda.core import system
 
 skip_if_nvml_unsupported = pytest.mark.skipif(
     not system.CUDA_BINDINGS_NVML_IS_COMPATIBLE, reason="NVML support requires cuda.bindings version 12.9.6+ or 13.1.2+"
 )
+
+
+@contextmanager
+def unsupported_before(device, expected_device_arch):
+    device_arch = device.arch
+
+    if isinstance(expected_device_arch, system.DeviceArch):
+        expected_device_arch_int = int(expected_device_arch)
+    elif expected_device_arch == "FERMI":
+        expected_device_arch_int = 1
+    else:
+        expected_device_arch_int = 0
+
+    if (
+        expected_device_arch is None
+        or expected_device_arch == "HAS_INFOROM"
+        or device_arch == system.DeviceArch.UNKNOWN
+    ):
+        # In this case, we don't /know/ if it will fail, but we are ok if it
+        # does or does not.
+
+        # TODO: There are APIs that are documented as supported only if the
+        # device has an InfoROM, but I couldn't find a way to detect that.  For now, they
+        # are just handled as "possibly failing".
+
+        try:
+            yield
+        except system.NotSupportedError:
+            pytest.skip(f"Unsupported call for device architecture {device_arch.name} on device '{device.name}'")
+    elif int(device_arch) < expected_device_arch_int:
+        # In this case, we /know/ if will fail, and we want to assert that it does.
+        with pytest.raises(system.NotSupportedError):
+            yield
+        pytest.skip(f"Unsupported before {expected_device_arch.name}, got {device_arch.name}")
+    else:
+        # In this case, we /know/ it should work, and if it fails, the test should fail.
+        yield