Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 37 additions & 0 deletions cuda_bindings/tests/nvml/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE

from collections import namedtuple
from contextlib import contextmanager

import pytest
from cuda.bindings import _nvml as nvml
Expand Down Expand Up @@ -128,3 +129,39 @@ def pci_info(ngpus, handles):
pci_info = [nvml.device_get_pci_info_v3(handles[i]) for i in range(ngpus)]
assert len(pci_info) == ngpus
return pci_info


@contextmanager
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The below came from Cursor (after discarding my cuda.bindings.tests.helpers idea I posted offline).

The idea is that you can give this to Cursor on your end to play with the options.


Suggestion: Deduplicating unsupported_before

I noticed we now have two nearly identical unsupported_before context managers:

  • cuda_bindings/tests/nvml/conftest.py
  • cuda_core/tests/system/conftest.py

The logic is the same, but they use different API surfaces:

Aspect nvml conftest cuda.core conftest
Get arch nvml.device_get_architecture(device) device.arch
Get name nvml.device_get_name(device) device.name
Arch enum nvml.DeviceArch system.DeviceArch
Exception nvml.NotSupportedError system.NotSupportedError

Options to consider

Option 1: Factory pattern in cuda_python_test_helpers (recommended)

Keep cuda_python_test_helpers CUDA-agnostic but share the logic via dependency injection:

# cuda_python_test_helpers/__init__.py
from contextlib import contextmanager

def make_unsupported_before(*, get_arch, get_name, arch_enum, not_supported_error):
    """Factory to create an unsupported_before context manager with injected dependencies."""
    @contextmanager
    def unsupported_before(device, expected_device_arch):
        device_arch = get_arch(device)

        if isinstance(expected_device_arch, arch_enum):
            expected_device_arch_int = int(expected_device_arch)
        elif expected_device_arch == "FERMI":
            expected_device_arch_int = 1
        else:
            expected_device_arch_int = 0

        if (
            expected_device_arch is None
            or expected_device_arch == "HAS_INFOROM"
            or device_arch == arch_enum.UNKNOWN
        ):
            try:
                yield
            except not_supported_error:
                import pytest
                pytest.skip(
                    f"Unsupported call for device architecture {arch_enum(device_arch).name} "
                    f"on device '{get_name(device)}'"
                )
        elif int(device_arch) < expected_device_arch_int:
            import pytest
            with pytest.raises(not_supported_error):
                yield
            pytest.skip(f"Unsupported before {expected_device_arch.name}, got {get_name(device)}")
        else:
            yield

    return unsupported_before

Then in each conftest:

# cuda_bindings/tests/nvml/conftest.py
from cuda_python_test_helpers import make_unsupported_before
from cuda.bindings import _nvml as nvml

unsupported_before = make_unsupported_before(
    get_arch=nvml.device_get_architecture,
    get_name=nvml.device_get_name,
    arch_enum=nvml.DeviceArch,
    not_supported_error=nvml.NotSupportedError,
)
# cuda_core/tests/system/conftest.py
from cuda_python_test_helpers import make_unsupported_before
from cuda.core import system

unsupported_before = make_unsupported_before(
    get_arch=lambda d: d.arch,
    get_name=lambda d: d.name,
    arch_enum=system.DeviceArch,
    not_supported_error=system.NotSupportedError,
)

Pros: Single source of truth for the logic, cuda_python_test_helpers stays CUDA-agnostic.
Cons: Slightly more indirection.

Option 2: Let cuda_python_test_helpers depend on cuda-bindings

Provide a single nvml-based implementation. cuda.core tests would extract device handles.

Pros: Simpler API.
Cons: Reverses the direction of commit 6afdd5c; ties test helpers to CUDA packages.

Option 3: Accept the duplication

~40 lines duplicated in 2 files. The implementations use different abstraction layers, so some might argue they're legitimately different.

Pros: No new abstractions.
Cons: Bug fixes / enhancements need to be applied twice.


I'd lean toward Option 1 since it keeps the CUDA-agnostic design while eliminating the duplication. Happy to pair on this if you want to explore it further.

def unsupported_before(device: int, expected_device_arch: nvml.DeviceArch | str | None):
device_arch = nvml.device_get_architecture(device)

if isinstance(expected_device_arch, nvml.DeviceArch):
expected_device_arch_int = int(expected_device_arch)
elif expected_device_arch == "FERMI":
expected_device_arch_int = 1
else:
expected_device_arch_int = 0

if expected_device_arch is None or expected_device_arch == "HAS_INFOROM" or device_arch == nvml.DeviceArch.UNKNOWN:
# In this case, we don't /know/ if it will fail, but we are ok if it
# does or does not.

# TODO: There are APIs that are documented as supported only if the
# device has an InfoROM, but I couldn't find a way to detect that. For
# now, they are just handled as "possibly failing".

try:
yield
except nvml.NotSupportedError:
pytest.skip(
f"Unsupported call for device architecture {nvml.DeviceArch(device_arch).name} "
f"on device '{nvml.device_get_name(device)}'"
)
elif int(device_arch) < expected_device_arch_int:
# In this case, we /know/ if will fail, and we want to assert that it does.
with pytest.raises(nvml.NotSupportedError):
yield
pytest.skip(f"Unsupported before {expected_device_arch.name}, got {nvml.device_get_name(device)}")
else:
# In this case, we /know/ it should work, and if it fails, the test should fail.
yield
11 changes: 3 additions & 8 deletions cuda_bindings/tests/nvml/test_compute_mode.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
import pytest
from cuda.bindings import _nvml as nvml

from .conftest import unsupported_before

COMPUTE_MODES = [
nvml.ComputeMode.COMPUTEMODE_DEFAULT,
nvml.ComputeMode.COMPUTEMODE_PROHIBITED,
Expand All @@ -16,18 +18,11 @@

@pytest.mark.skipif(sys.platform == "win32", reason="Test not supported on Windows")
def test_compute_mode_supported_nonroot(all_devices):
skip_reasons = set()
for device in all_devices:
try:
with unsupported_before(device, None):
original_compute_mode = nvml.device_get_compute_mode(device)
except nvml.NotSupportedError:
skip_reasons.add(f"nvmlDeviceGetComputeMode not supported for device {device}")
continue

for cm in COMPUTE_MODES:
with pytest.raises(nvml.NoPermissionError):
nvml.device_set_compute_mode(device, cm)
assert original_compute_mode == nvml.device_get_compute_mode(device), "Compute mode shouldn't have changed"

if skip_reasons:
pytest.skip(" ; ".join(skip_reasons))
18 changes: 5 additions & 13 deletions cuda_bindings/tests/nvml/test_gpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from cuda.bindings import _nvml as nvml

from . import util
from .conftest import unsupported_before


def test_gpu_get_module_id(nvml_init):
Expand All @@ -23,23 +24,14 @@ def test_gpu_get_module_id(nvml_init):


def test_gpu_get_platform_info(all_devices):
skip_reasons = set()
for device in all_devices:
if util.is_vgpu(device):
skip_reasons.add(f"Not supported on vGPU device {device}")
continue
pytest.skip(f"Not supported on vGPU device {device}")

# TODO
# if device.feature_dict.board.chip < board_class.Architecture.Blackwell:
# test_utils.skip_test("Not supported on chip before Blackwell")
# Documentation says Blackwell or newer only, but this does seem to pass
# on some newer GPUs.

try:
with unsupported_before(device, None):
platform_info = nvml.device_get_platform_info(device)
except nvml.NotSupportedError:
skip_reasons.add(f"Not supported returned, linkely NVLink is disable for {device}")
continue

assert isinstance(platform_info, nvml.PlatformInfo_v2)

if skip_reasons:
pytest.skip(" ; ".join(skip_reasons))
33 changes: 19 additions & 14 deletions cuda_bindings/tests/nvml/test_pynvml.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from cuda.bindings import _nvml as nvml

from . import util
from .conftest import unsupported_before

XFAIL_LEGACY_NVLINK_MSG = "Legacy NVLink test expected to fail."

Expand Down Expand Up @@ -66,7 +67,8 @@ def test_device_get_handle_by_pci_bus_id(ngpus, pci_info):
def test_device_get_memory_affinity(handles, scope):
size = 1024
for handle in handles:
node_set = nvml.device_get_memory_affinity(handle, size, scope)
with unsupported_before(handle, nvml.DeviceArch.KEPLER):
node_set = nvml.device_get_memory_affinity(handle, size, scope)
assert node_set is not None
assert len(node_set) == size

Expand All @@ -76,7 +78,8 @@ def test_device_get_memory_affinity(handles, scope):
def test_device_get_cpu_affinity_within_scope(handles, scope):
size = 1024
for handle in handles:
cpu_set = nvml.device_get_cpu_affinity_within_scope(handle, size, scope)
with unsupported_before(handle, nvml.DeviceArch.KEPLER):
cpu_set = nvml.device_get_cpu_affinity_within_scope(handle, size, scope)
assert cpu_set is not None
assert len(cpu_set) == size

Expand Down Expand Up @@ -136,22 +139,22 @@ def test_device_get_p2p_status(handles, index):

def test_device_get_power_usage(ngpus, handles):
for i in range(ngpus):
try:
# Note: documentation says this is supported on Fermi or newer,
# but in practice it fails on some later architectures.
with unsupported_before(handles[i], None):
power_mwatts = nvml.device_get_power_usage(handles[i])
except nvml.NotSupportedError:
pytest.skip("device_get_power_usage not supported")
assert power_mwatts >= 0.0


def test_device_get_total_energy_consumption(ngpus, handles):
for i in range(ngpus):
try:
with unsupported_before(handles[i], nvml.DeviceArch.VOLTA):
energy_mjoules1 = nvml.device_get_total_energy_consumption(handles[i])
except nvml.NotSupportedError:
pytest.skip("device_get_total_energy_consumption not supported")

for j in range(10): # idle for 150 ms
time.sleep(0.015) # and check for increase every 15 ms
energy_mjoules2 = nvml.device_get_total_energy_consumption(handles[i])
with unsupported_before(handles[i], nvml.DeviceArch.VOLTA):
energy_mjoules2 = nvml.device_get_total_energy_consumption(handles[i])
assert energy_mjoules2 >= energy_mjoules1
if energy_mjoules2 > energy_mjoules1:
break
Expand Down Expand Up @@ -182,7 +185,8 @@ def test_device_get_memory_info(ngpus, handles):

def test_device_get_utilization_rates(ngpus, handles):
for i in range(ngpus):
urate = nvml.device_get_utilization_rates(handles[i])
with unsupported_before(handles[i], "FERMI"):
urate = nvml.device_get_utilization_rates(handles[i])
assert urate.gpu >= 0
assert urate.memory >= 0

Expand Down Expand Up @@ -239,7 +243,8 @@ def test_device_get_utilization_rates(ngpus, handles):

def test_device_get_pcie_throughput(ngpus, handles):
for i in range(ngpus):
tx_bytes_tp = nvml.device_get_pcie_throughput(handles[i], nvml.PcieUtilCounter.PCIE_UTIL_TX_BYTES)
with unsupported_before(handles[i], nvml.DeviceArch.MAXWELL):
tx_bytes_tp = nvml.device_get_pcie_throughput(handles[i], nvml.PcieUtilCounter.PCIE_UTIL_TX_BYTES)
assert tx_bytes_tp >= 0
rx_bytes_tp = nvml.device_get_pcie_throughput(handles[i], nvml.PcieUtilCounter.PCIE_UTIL_RX_BYTES)
assert rx_bytes_tp >= 0
Expand Down Expand Up @@ -271,10 +276,10 @@ def test_device_get_pcie_throughput(ngpus, handles):
def test_device_get_nvlink_capability(ngpus, handles, cap_type):
for i in range(ngpus):
for j in range(nvml.NVLINK_MAX_LINKS):
try:
# By the documentation, this should be supported on PASCAL or newer,
# but this also seems to fail on newer.
with unsupported_before(handles[i], None):
cap = nvml.device_get_nvlink_capability(handles[i], j, cap_type)
except nvml.NotSupportedError:
pytest.skip("NVLink capability not supported")
assert cap >= 0


Expand Down
57 changes: 13 additions & 44 deletions cuda_core/cuda/core/system/_device.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ ClocksEventReasons = nvml.ClocksEventReasons
ClockType = nvml.ClockType
CoolerControl = nvml.CoolerControl
CoolerTarget = nvml.CoolerTarget
DeviceArch = nvml.DeviceArch
EventType = nvml.EventType
FanControlPolicy = nvml.FanControlPolicy
FieldId = nvml.FieldId
Expand All @@ -45,41 +46,6 @@ include "_performance.pxi"
include "_temperature.pxi"


class DeviceArchitecture:
"""
Device architecture enumeration.
"""

def __init__(self, architecture: int):
try:
self._architecture = nvml.DeviceArch(architecture)
except ValueError:
self._architecture = None

@property
def id(self) -> int:
"""
The numeric id of the device architecture.

Returns -1 if the device is unknown.
"""
if self._architecture is None:
return -1
return int(self._architecture)

@property
def name(self) -> str:
"""
The name of the device architecture.

Returns "Unlisted" if the device is unknown.
"""
if self._architecture is None:
return "Unlisted"
name = self._architecture.name
return name[name.rfind("_") + 1 :].title()


cdef class MemoryInfo:
"""
Memory allocation information for a device.
Expand Down Expand Up @@ -952,16 +918,15 @@ cdef class Device:
return [Pstates(x) for x in nvml.device_get_supported_performance_states(self._handle)]

@property
def architecture(self) -> DeviceArchitecture:
def arch(self) -> DeviceArch:
"""
Device architecture. For example, a Tesla V100 will report
``DeviceArchitecture.name == "Volta"``, and RTX A6000 will report
``DeviceArchitecture.name == "Ampere"``. If the device returns an
architecture that is unknown to NVML then ``DeviceArchitecture.name ==
"Unknown"`` is reported, whereas an architecture that is unknown to
cuda.core.system is reported as ``DeviceArchitecture.name == "Unlisted"``.
Device architecture.

For example, a Tesla V100 will report ``DeviceArchitecture.name ==
"VOLTA"``, and RTX A6000 will report ``DeviceArchitecture.name ==
"AMPERE"``.
"""
return DeviceArchitecture(nvml.device_get_architecture(self._handle))
return DeviceArch(nvml.device_get_architecture(self._handle))

@property
def bar1_memory_info(self) -> BAR1MemoryInfo:
Expand Down Expand Up @@ -1027,6 +992,8 @@ cdef class Device:
"""
Retrieves the globally unique board serial number associated with this
device's board.

For all products with an InfoROM.
"""
return nvml.device_get_serial(self._handle)

Expand Down Expand Up @@ -1268,6 +1235,8 @@ cdef class Device:
"""
Get the addressing mode of the device.

For Turing &tm; or newer fully supported devices.

Addressing modes can be one of:

- :attr:`AddressingMode.DEVICE_ADDRESSING_MODE_HMM`: System allocated
Expand Down Expand Up @@ -1486,7 +1455,7 @@ __all__ = [
"CoolerInfo",
"CoolerTarget",
"Device",
"DeviceArchitecture",
"DeviceArch",
"DeviceAttributes",
"DeviceEvents",
"EventData",
Expand Down
2 changes: 1 addition & 1 deletion cuda_core/docs/source/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ CUDA system information and NVIDIA Management Library (NVML)
system.CoolerControl
system.CoolerInfo
system.CoolerTarget
system.DeviceArchitecture
system.DeviceArch
system.DeviceAttributes
system.DeviceEvents
system.EventData
Expand Down
39 changes: 39 additions & 0 deletions cuda_core/tests/system/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,48 @@
# SPDX-License-Identifier: Apache-2.0


from contextlib import contextmanager

import pytest
from cuda.core import system

skip_if_nvml_unsupported = pytest.mark.skipif(
not system.CUDA_BINDINGS_NVML_IS_COMPATIBLE, reason="NVML support requires cuda.bindings version 12.9.6+ or 13.1.2+"
)


@contextmanager
def unsupported_before(device, expected_device_arch):
device_arch = device.arch

if isinstance(expected_device_arch, system.DeviceArch):
expected_device_arch_int = int(expected_device_arch)
elif expected_device_arch == "FERMI":
expected_device_arch_int = 1
else:
expected_device_arch_int = 0

if (
expected_device_arch is None
or expected_device_arch == "HAS_INFOROM"
or device_arch == system.DeviceArch.UNKNOWN
):
# In this case, we don't /know/ if it will fail, but we are ok if it
# does or does not.

# TODO: There are APIs that are documented as supported only if the
# device has an InfoROM, but I couldn't find a way to detect that. For now, they
# are just handled as "possibly failing".

try:
yield
except system.NotSupportedError:
pytest.skip(f"Unsupported call for device architecture {device_arch.name} on device '{device.name}'")
elif int(device_arch) < expected_device_arch_int:
# In this case, we /know/ if will fail, and we want to assert that it does.
with pytest.raises(system.NotSupportedError):
yield
pytest.skip(f"Unsupported before {expected_device_arch.name}, got {device_arch.name}")
else:
# In this case, we /know/ it should work, and if it fails, the test should fail.
yield
Loading
Loading