diff --git a/cuda_bindings/cuda/bindings/_nvml.pxd b/cuda_bindings/cuda/bindings/_nvml.pxd
index a0e6ed9ad9..4dd1c728a2 100644
--- a/cuda_bindings/cuda/bindings/_nvml.pxd
+++ b/cuda_bindings/cuda/bindings/_nvml.pxd
@@ -14,6 +14,8 @@ from .cy_nvml cimport *
 ###############################################################################
 
 ctypedef nvmlDramEncryptionInfo_v1_t DramEncryptionInfo_v1
+ctypedef nvmlMarginTemperature_v1_t MarginTemperature_v1
+ctypedef nvmlFanSpeedInfo_v1_t FanSpeedInfo_v1
 ctypedef nvmlConfComputeSetKeyRotationThresholdInfo_v1_t ConfComputeSetKeyRotationThresholdInfo_v1
 ctypedef nvmlSystemDriverBranchInfo_v1_t SystemDriverBranchInfo_v1
 ctypedef nvmlTemperature_v1_t Temperature_v1
@@ -196,14 +198,12 @@ cpdef object device_get_supported_graphics_clocks(intptr_t device, unsigned int
 cpdef tuple device_get_auto_boosted_clocks_enabled(intptr_t device)
 cpdef unsigned int device_get_fan_speed(intptr_t device) except? 0
 cpdef unsigned int device_get_fan_speed_v2(intptr_t device, unsigned int fan) except? 0
-cpdef object device_get_fan_speed_rpm(intptr_t device)
 cpdef unsigned int device_get_target_fan_speed(intptr_t device, unsigned int fan) except? 0
 cpdef tuple device_get_min_max_fan_speed(intptr_t device)
 cpdef unsigned int device_get_fan_control_policy_v2(intptr_t device, unsigned int fan) except *
 cpdef unsigned int device_get_num_fans(intptr_t device) except? 0
 cpdef object device_get_cooler_info(intptr_t device)
 cpdef unsigned int device_get_temperature_threshold(intptr_t device, int threshold_type) except? 0
-cpdef object device_get_margin_temperature(intptr_t device)
 cpdef object device_get_thermal_settings(intptr_t device, unsigned int sensor_ind_ex)
 cpdef int device_get_performance_state(intptr_t device) except? -1
 cpdef unsigned long long device_get_current_clocks_event_reasons(intptr_t device) except? 0
@@ -214,7 +214,6 @@ cpdef int device_get_mem_clk_vf_offset(intptr_t device) except? 0
 cpdef tuple device_get_min_max_clock_of_p_state(intptr_t device, int type, int pstate)
 cpdef tuple device_get_gpc_clk_min_max_vf_offset(intptr_t device)
 cpdef tuple device_get_mem_clk_min_max_vf_offset(intptr_t device)
-cpdef object device_get_clock_offsets(intptr_t device)
 cpdef device_set_clock_offsets(intptr_t device, intptr_t info)
 cpdef object device_get_performance_modes(intptr_t device)
 cpdef object device_get_current_clock_freqs(intptr_t device)
diff --git a/cuda_bindings/cuda/bindings/_nvml.pyx b/cuda_bindings/cuda/bindings/_nvml.pyx
index dbb87e8d0b..ea8f56dc45 100644
--- a/cuda_bindings/cuda/bindings/_nvml.pyx
+++ b/cuda_bindings/cuda/bindings/_nvml.pyx
@@ -1198,16 +1198,16 @@ class PowerMizerMode(_IntEnum):
 
 
 class DeviceArch(_IntEnum):
-    DEVICE_ARCH_KEPLER = 2
-    DEVICE_ARCH_MAXWELL = 3
-    DEVICE_ARCH_PASCAL = 4
-    DEVICE_ARCH_VOLTA = 5
-    DEVICE_ARCH_TURING = 6
-    DEVICE_ARCH_AMPERE = 7
-    DEVICE_ARCH_ADA = 8
-    DEVICE_ARCH_HOPPER = 9
-    DEVICE_ARCH_BLACKWELL = 10
-    DEVICE_ARCH_UNKNOWN = 0xFFFFFFFF
+    KEPLER = 2
+    MAXWELL = 3
+    PASCAL = 4
+    VOLTA = 5
+    TURING = 6
+    AMPERE = 7
+    ADA = 8
+    HOPPER = 9
+    BLACKWELL = 10
+    UNKNOWN = 0xFFFFFFFF
 
 
 class BusType(_IntEnum):
@@ -1361,10 +1361,9 @@ class SystemEventType(_IntEnum):
     SYSTEM_EVENT_TYPE_GPU_DRIVER_BIND = 0x0000000000000002
 
 
-class ClocksEvent(_IntEnum):
+class ClocksEventReasons(_IntEnum):
     CLOCKS_EVENT_REASON_GPU_IDLE = 0x0000000000000001
     CLOCKS_EVENT_REASON_APPLICATIONS_CLOCKS_SETTING = 0x0000000000000002
-    CLOCKS_THROTTLE_REASON_USER_DEFINED_CLOCKS = 0x0000000000000002
     CLOCKS_EVENT_REASON_SW_POWER_CAP = 0x0000000000000004
     CLOCKS_THROTTLE_REASON_HW_SLOWDOWN = 0x0000000000000008
     CLOCKS_EVENT_REASON_SYNC_BOOST = 0x0000000000000010
@@ -1373,13 +1372,6 @@ class ClocksEvent(_IntEnum):
     CLOCKS_THROTTLE_REASON_HW_POWER_BRAKE_SLOWDOWN = 0x0000000000000080
     CLOCKS_EVENT_REASON_DISPLAY_CLOCK_SETTING = 0x0000000000000100
     CLOCKS_EVENT_REASON_NONE = 0x0000000000000000
-    CLOCKS_THROTTLE_REASON_GPU_IDLE = 0x0000000000000001
-    CLOCKS_THROTTLE_REASON_APPLICATIONS_CLOCKS_SETTING = 0x0000000000002
-    CLOCKS_THROTTLE_REASON_SYNC_BOOST = 0x00000000000010
-    CLOCKS_THROTTLE_REASON_SW_POWER_CAP = 0x00000000000004
-    CLOCKS_THROTTLE_REASON_SW_THERMAL_SLOWDOWN = 0x00000000000020
-    CLOCKS_THROTTLE_REASON_DISPLAY_CLOCK_SETTING = 0x00000000000100
-    CLOCKS_THROTTLE_REASON_NONE = 0x0000000000000000
 
 
 class EncoderQuery(_IntEnum):
@@ -4295,138 +4287,6 @@ cdef class CoolerInfo_v1:
         return obj
 
 
-cdef _get_margin_temperature_v1_dtype_offsets():
-    cdef nvmlMarginTemperature_v1_t pod = nvmlMarginTemperature_v1_t()
-    return _numpy.dtype({
-        'names': ['version', 'margin_temperature'],
-        'formats': [_numpy.uint32, _numpy.int32],
-        'offsets': [
-            (<intptr_t>&(pod.version)) - (<intptr_t>&pod),
-            (<intptr_t>&(pod.marginTemperature)) - (<intptr_t>&pod),
-        ],
-        'itemsize': sizeof(nvmlMarginTemperature_v1_t),
-    })
-
-margin_temperature_v1_dtype = _get_margin_temperature_v1_dtype_offsets()
-
-cdef class MarginTemperature_v1:
-    """Empty-initialize an instance of `nvmlMarginTemperature_v1_t`.
-
-
-    .. seealso:: `nvmlMarginTemperature_v1_t`
-    """
-    cdef:
-        nvmlMarginTemperature_v1_t *_ptr
-        object _owner
-        bint _owned
-        bint _readonly
-
-    def __init__(self):
-        self._ptr = <nvmlMarginTemperature_v1_t *>calloc(1, sizeof(nvmlMarginTemperature_v1_t))
-        if self._ptr == NULL:
-            raise MemoryError("Error allocating MarginTemperature_v1")
-        self._owner = None
-        self._owned = True
-        self._readonly = False
-
-    def __dealloc__(self):
-        cdef nvmlMarginTemperature_v1_t *ptr
-        if self._owned and self._ptr != NULL:
-            ptr = self._ptr
-            self._ptr = NULL
-            free(ptr)
-
-    def __repr__(self):
-        return f"<{__name__}.MarginTemperature_v1 object at {hex(id(self))}>"
-
-    @property
-    def ptr(self):
-        """Get the pointer address to the data as Python :class:`int`."""
-        return <intptr_t>(self._ptr)
-
-    cdef intptr_t _get_ptr(self):
-        return <intptr_t>(self._ptr)
-
-    def __int__(self):
-        return <intptr_t>(self._ptr)
-
-    def __eq__(self, other):
-        cdef MarginTemperature_v1 other_
-        if not isinstance(other, MarginTemperature_v1):
-            return False
-        other_ = other
-        return (memcmp(<void *><intptr_t>(self._ptr), <void *><intptr_t>(other_._ptr), sizeof(nvmlMarginTemperature_v1_t)) == 0)
-
-    def __setitem__(self, key, val):
-        if key == 0 and isinstance(val, _numpy.ndarray):
-            self._ptr = <nvmlMarginTemperature_v1_t *>malloc(sizeof(nvmlMarginTemperature_v1_t))
-            if self._ptr == NULL:
-                raise MemoryError("Error allocating MarginTemperature_v1")
-            memcpy(<void*>self._ptr, <void*><intptr_t>val.ctypes.data, sizeof(nvmlMarginTemperature_v1_t))
-            self._owner = None
-            self._owned = True
-            self._readonly = not val.flags.writeable
-        else:
-            setattr(self, key, val)
-
-    @property
-    def version(self):
-        """int: The version number of this struct."""
-        return self._ptr[0].version
-
-    @version.setter
-    def version(self, val):
-        if self._readonly:
-            raise ValueError("This MarginTemperature_v1 instance is read-only")
-        self._ptr[0].version = val
-
-    @property
-    def margin_temperature(self):
-        """int: The margin temperature value."""
-        return self._ptr[0].marginTemperature
-
-    @margin_temperature.setter
-    def margin_temperature(self, val):
-        if self._readonly:
-            raise ValueError("This MarginTemperature_v1 instance is read-only")
-        self._ptr[0].marginTemperature = val
-
-    @staticmethod
-    def from_data(data):
-        """Create an MarginTemperature_v1 instance wrapping the given NumPy array.
-
-        Args:
-            data (_numpy.ndarray): a single-element array of dtype `margin_temperature_v1_dtype` holding the data.
-        """
-        return __from_data(data, "margin_temperature_v1_dtype", margin_temperature_v1_dtype, MarginTemperature_v1)
-
-    @staticmethod
-    def from_ptr(intptr_t ptr, bint readonly=False, object owner=None):
-        """Create an MarginTemperature_v1 instance wrapping the given pointer.
-
-        Args:
-            ptr (intptr_t): pointer address as Python :class:`int` to the data.
-            owner (object): The Python object that owns the pointer. If not provided, data will be copied.
-            readonly (bool): whether the data is read-only (to the user). default is `False`.
-        """
-        if ptr == 0:
-            raise ValueError("ptr must not be null (0)")
-        cdef MarginTemperature_v1 obj = MarginTemperature_v1.__new__(MarginTemperature_v1)
-        if owner is None:
-            obj._ptr = <nvmlMarginTemperature_v1_t *>malloc(sizeof(nvmlMarginTemperature_v1_t))
-            if obj._ptr == NULL:
-                raise MemoryError("Error allocating MarginTemperature_v1")
-            memcpy(<void*>(obj._ptr), <void*>ptr, sizeof(nvmlMarginTemperature_v1_t))
-            obj._owner = None
-            obj._owned = True
-        else:
-            obj._ptr = <nvmlMarginTemperature_v1_t *>ptr
-            obj._owner = owner
-            obj._owned = False
-        obj._readonly = readonly
-        return obj
-
-
 cdef _get_clk_mon_fault_info_dtype_offsets():
     cdef nvmlClkMonFaultInfo_t pod = nvmlClkMonFaultInfo_t()
     return _numpy.dtype({
@@ -4753,150 +4613,6 @@ cdef class ClockOffset_v1:
         return obj
 
 
-cdef _get_fan_speed_info_v1_dtype_offsets():
-    cdef nvmlFanSpeedInfo_v1_t pod = nvmlFanSpeedInfo_v1_t()
-    return _numpy.dtype({
-        'names': ['version', 'fan', 'speed'],
-        'formats': [_numpy.uint32, _numpy.uint32, _numpy.uint32],
-        'offsets': [
-            (<intptr_t>&(pod.version)) - (<intptr_t>&pod),
-            (<intptr_t>&(pod.fan)) - (<intptr_t>&pod),
-            (<intptr_t>&(pod.speed)) - (<intptr_t>&pod),
-        ],
-        'itemsize': sizeof(nvmlFanSpeedInfo_v1_t),
-    })
-
-fan_speed_info_v1_dtype = _get_fan_speed_info_v1_dtype_offsets()
-
-cdef class FanSpeedInfo_v1:
-    """Empty-initialize an instance of `nvmlFanSpeedInfo_v1_t`.
-
-
-    .. seealso:: `nvmlFanSpeedInfo_v1_t`
-    """
-    cdef:
-        nvmlFanSpeedInfo_v1_t *_ptr
-        object _owner
-        bint _owned
-        bint _readonly
-
-    def __init__(self):
-        self._ptr = <nvmlFanSpeedInfo_v1_t *>calloc(1, sizeof(nvmlFanSpeedInfo_v1_t))
-        if self._ptr == NULL:
-            raise MemoryError("Error allocating FanSpeedInfo_v1")
-        self._owner = None
-        self._owned = True
-        self._readonly = False
-
-    def __dealloc__(self):
-        cdef nvmlFanSpeedInfo_v1_t *ptr
-        if self._owned and self._ptr != NULL:
-            ptr = self._ptr
-            self._ptr = NULL
-            free(ptr)
-
-    def __repr__(self):
-        return f"<{__name__}.FanSpeedInfo_v1 object at {hex(id(self))}>"
-
-    @property
-    def ptr(self):
-        """Get the pointer address to the data as Python :class:`int`."""
-        return <intptr_t>(self._ptr)
-
-    cdef intptr_t _get_ptr(self):
-        return <intptr_t>(self._ptr)
-
-    def __int__(self):
-        return <intptr_t>(self._ptr)
-
-    def __eq__(self, other):
-        cdef FanSpeedInfo_v1 other_
-        if not isinstance(other, FanSpeedInfo_v1):
-            return False
-        other_ = other
-        return (memcmp(<void *><intptr_t>(self._ptr), <void *><intptr_t>(other_._ptr), sizeof(nvmlFanSpeedInfo_v1_t)) == 0)
-
-    def __setitem__(self, key, val):
-        if key == 0 and isinstance(val, _numpy.ndarray):
-            self._ptr = <nvmlFanSpeedInfo_v1_t *>malloc(sizeof(nvmlFanSpeedInfo_v1_t))
-            if self._ptr == NULL:
-                raise MemoryError("Error allocating FanSpeedInfo_v1")
-            memcpy(<void*>self._ptr, <void*><intptr_t>val.ctypes.data, sizeof(nvmlFanSpeedInfo_v1_t))
-            self._owner = None
-            self._owned = True
-            self._readonly = not val.flags.writeable
-        else:
-            setattr(self, key, val)
-
-    @property
-    def version(self):
-        """int: the API version number"""
-        return self._ptr[0].version
-
-    @version.setter
-    def version(self, val):
-        if self._readonly:
-            raise ValueError("This FanSpeedInfo_v1 instance is read-only")
-        self._ptr[0].version = val
-
-    @property
-    def fan(self):
-        """int: the fan index"""
-        return self._ptr[0].fan
-
-    @fan.setter
-    def fan(self, val):
-        if self._readonly:
-            raise ValueError("This FanSpeedInfo_v1 instance is read-only")
-        self._ptr[0].fan = val
-
-    @property
-    def speed(self):
-        """int: OUT: the fan speed in RPM."""
-        return self._ptr[0].speed
-
-    @speed.setter
-    def speed(self, val):
-        if self._readonly:
-            raise ValueError("This FanSpeedInfo_v1 instance is read-only")
-        self._ptr[0].speed = val
-
-    @staticmethod
-    def from_data(data):
-        """Create an FanSpeedInfo_v1 instance wrapping the given NumPy array.
-
-        Args:
-            data (_numpy.ndarray): a single-element array of dtype `fan_speed_info_v1_dtype` holding the data.
-        """
-        return __from_data(data, "fan_speed_info_v1_dtype", fan_speed_info_v1_dtype, FanSpeedInfo_v1)
-
-    @staticmethod
-    def from_ptr(intptr_t ptr, bint readonly=False, object owner=None):
-        """Create an FanSpeedInfo_v1 instance wrapping the given pointer.
-
-        Args:
-            ptr (intptr_t): pointer address as Python :class:`int` to the data.
-            owner (object): The Python object that owns the pointer. If not provided, data will be copied.
-            readonly (bool): whether the data is read-only (to the user). default is `False`.
-        """
-        if ptr == 0:
-            raise ValueError("ptr must not be null (0)")
-        cdef FanSpeedInfo_v1 obj = FanSpeedInfo_v1.__new__(FanSpeedInfo_v1)
-        if owner is None:
-            obj._ptr = <nvmlFanSpeedInfo_v1_t *>malloc(sizeof(nvmlFanSpeedInfo_v1_t))
-            if obj._ptr == NULL:
-                raise MemoryError("Error allocating FanSpeedInfo_v1")
-            memcpy(<void*>(obj._ptr), <void*>ptr, sizeof(nvmlFanSpeedInfo_v1_t))
-            obj._owner = None
-            obj._owned = True
-        else:
-            obj._ptr = <nvmlFanSpeedInfo_v1_t *>ptr
-            obj._owner = owner
-            obj._owned = False
-        obj._readonly = readonly
-        return obj
-
-
 cdef _get_device_perf_modes_v1_dtype_offsets():
     cdef nvmlDevicePerfModes_v1_t pod = nvmlDevicePerfModes_v1_t()
     return _numpy.dtype({
@@ -22669,26 +22385,6 @@ cpdef unsigned int device_get_fan_speed_v2(intptr_t device, unsigned int fan) ex
     return speed
 
 
-cpdef object device_get_fan_speed_rpm(intptr_t device):
-    """Retrieves the intended operating speed in rotations per minute (RPM) of the device's specified fan.
-
-    Args:
-        device (intptr_t): The identifier of the target device.
-
-    Returns:
-        nvmlFanSpeedInfo_v1_t: Structure specifying the index of the target fan (input) and retrieved fan speed value (output).
-
-    .. seealso:: `nvmlDeviceGetFanSpeedRPM`
-    """
-    cdef FanSpeedInfo_v1 fan_speed_py = FanSpeedInfo_v1()
-    cdef nvmlFanSpeedInfo_t *fan_speed = <nvmlFanSpeedInfo_t *><intptr_t>(fan_speed_py._get_ptr())
-    fan_speed.version = sizeof(nvmlFanSpeedInfo_v1_t) | (1 << 24)
-    with nogil:
-        __status__ = nvmlDeviceGetFanSpeedRPM(<Device>device, fan_speed)
-    check_status(__status__)
-    return fan_speed_py
-
-
 cpdef unsigned int device_get_target_fan_speed(intptr_t device, unsigned int fan) except? 0:
     """Retrieves the intended target speed of the device's specified fan.
 
@@ -22806,26 +22502,6 @@ cpdef unsigned int device_get_temperature_threshold(intptr_t device, int thresho
     return temp
 
 
-cpdef object device_get_margin_temperature(intptr_t device):
-    """Retrieves the thermal margin temperature (distance to nearest slowdown threshold).
-
-    Args:
-        device (intptr_t): The identifier of the target device.
-
-    Returns:
-        nvmlMarginTemperature_v1_t: Versioned structure in which to return the temperature reading.
-
-    .. seealso:: `nvmlDeviceGetMarginTemperature`
-    """
-    cdef MarginTemperature_v1 margin_temp_info_py = MarginTemperature_v1()
-    cdef nvmlMarginTemperature_t *margin_temp_info = <nvmlMarginTemperature_t *><intptr_t>(margin_temp_info_py._get_ptr())
-    margin_temp_info.version = sizeof(nvmlMarginTemperature_v1_t) | (1 << 24)
-    with nogil:
-        __status__ = nvmlDeviceGetMarginTemperature(<Device>device, margin_temp_info)
-    check_status(__status__)
-    return margin_temp_info_py
-
-
 cpdef object device_get_thermal_settings(intptr_t device, unsigned int sensor_ind_ex):
     """Used to execute a list of thermal system instructions.
 
@@ -23023,26 +22699,6 @@ cpdef tuple device_get_mem_clk_min_max_vf_offset(intptr_t device):
     return (min_offset, max_offset)
 
 
-cpdef object device_get_clock_offsets(intptr_t device):
-    """Retrieve min, max and current clock offset of some clock domain for a given PState.
-
-    Args:
-        device (intptr_t): The identifier of the target device.
-
-    Returns:
-        nvmlClockOffset_v1_t: Structure specifying the clock type (input) and the pstate (input) retrieved clock offset value (output), min clock offset (output) and max clock offset (output).
-
-    .. seealso:: `nvmlDeviceGetClockOffsets`
-    """
-    cdef ClockOffset_v1 info_py = ClockOffset_v1()
-    cdef nvmlClockOffset_t *info = <nvmlClockOffset_t *><intptr_t>(info_py._get_ptr())
-    info.version = sizeof(nvmlClockOffset_v1_t) | (1 << 24)
-    with nogil:
-        __status__ = nvmlDeviceGetClockOffsets(<Device>device, info)
-    check_status(__status__)
-    return info_py
-
-
 cpdef device_set_clock_offsets(intptr_t device, intptr_t info):
     """Control current clock offset of some clock domain for a given PState.
 
@@ -27173,7 +26829,7 @@ cpdef object device_get_topology_nearest_gpus(intptr_t device, unsigned int leve
     check_status_size(__status__)
     if count[0] == 0:
         return view.array(shape=(1,), itemsize=sizeof(intptr_t), format="P", mode="c")[:0]
-    cdef view.array deviceArray = view.array(shape=(deviceCount[0],), itemsize=sizeof(intptr_t), format="P", mode="c")
+    cdef view.array deviceArray = view.array(shape=(count[0],), itemsize=sizeof(intptr_t), format="P", mode="c")
     with nogil:
         __status__ = nvmlDeviceGetTopologyNearestGpus(
             <Device>device,
@@ -27206,15 +26862,13 @@ cpdef object device_get_temperature_v(intptr_t device, nvmlTemperatureSensors_t
     return temperature.temperature
 
 
-cpdef object device_get_supported_performance_states(intptr_t device, unsigned int size):
+cpdef object device_get_supported_performance_states(intptr_t device):
     """Get all supported Performance States (P-States) for the device.
 
     Args:
         device (Device): The identifier of the target device.
-        size (unsigned int): The number of states to return.
     """
-    if size == 0:
-        return view.array(shape=(1,), itemsize=sizeof(unsigned int), format="I", mode="c")[:0]
+    cdef int size = 16  # NVML_MAX_GPU_PERF_STATES
     cdef view.array pstates = view.array(shape=(size,), itemsize=sizeof(unsigned int), format="I", mode="c")
 
     # The header says "size is the size of the pstates array in bytes".
@@ -28066,3 +27720,65 @@ cpdef object system_event_set_wait(intptr_t event_set, unsigned int timeout_ms,
     check_status(__status__)
     event_data._data.resize((request[0].numEvent,))
     return event_data
+
+
+cpdef unsigned int device_get_fan_speed_rpm(intptr_t device, unsigned int fan):
+    """Retrieves the intended operating speed in rotations per minute (RPM) of the device's specified fan.
+
+    Args:
+        device (intptr_t): The identifier of the target device.
+        fan (unsigned int): The index of the fan to query.
+
+    Returns:
+        rpm (unsigned int): The fan speed in RPM.
+
+    .. seealso:: `nvmlDeviceGetFanSpeedRPM`
+    """
+    cdef nvmlFanSpeedInfo_v1_t[1] fan_speed
+    fan_speed[0].version = sizeof(nvmlFanSpeedInfo_v1_t) | (1 << 24)
+    fan_speed[0].fan = fan
+    with nogil:
+        __status__ = nvmlDeviceGetFanSpeedRPM(<Device>device, fan_speed)
+    check_status(__status__)
+    return fan_speed[0].speed
+
+
+cpdef int device_get_margin_temperature(intptr_t device):
+    """Retrieves the thermal margin temperature (distance to nearest slowdown threshold).
+
+    Args:
+        device (intptr_t): The identifier of the target device.
+
+    Returns:
+        margin_temperature (int): The margin temperature value.
+
+    .. seealso:: `nvmlDeviceGetMarginTemperature`
+    """
+    cdef nvmlMarginTemperature_v1_t[1] margin_temp_info
+    margin_temp_info[0].version = sizeof(nvmlMarginTemperature_v1_t) | (1 << 24)
+    with nogil:
+        __status__ = nvmlDeviceGetMarginTemperature(<Device>device, margin_temp_info)
+    check_status(__status__)
+    return margin_temp_info[0].marginTemperature
+
+
+cpdef object device_get_clock_offsets(intptr_t device, nvmlClockType_t clock_type, nvmlPstates_t pstate):
+    """Retrieve min, max and current clock offset of some clock domain for a given PState.
+
+    Args:
+        device (intptr_t): The identifier of the target device.
+
+    Returns:
+        nvmlClockOffset_v1_t: Structure specifying the clock type (input) and the pstate (input) retrieved clock offset value (output), min clock offset (output) and max clock offset (output).
+
+    .. seealso:: `nvmlDeviceGetClockOffsets`
+    """
+    cdef ClockOffset_v1 info_py = ClockOffset_v1()
+    cdef nvmlClockOffset_v1_t *info = <nvmlClockOffset_v1_t *><intptr_t>(info_py._get_ptr())
+    info.version = sizeof(nvmlClockOffset_v1_t) | (1 << 24)
+    info.type = clock_type
+    info.pstate = pstate
+    with nogil:
+        __status__ = nvmlDeviceGetClockOffsets(<Device>device, info)
+    check_status(__status__)
+    return info_py
diff --git a/cuda_core/cuda/core/system/_clock.pxi b/cuda_core/cuda/core/system/_clock.pxi
new file mode 100644
index 0000000000..911ef4ce72
--- /dev/null
+++ b/cuda_core/cuda/core/system/_clock.pxi
@@ -0,0 +1,130 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+
+cdef class ClockOffsets:
+    """
+    Contains clock offset information.
+    """
+
+    cdef object _clock_offset
+
+    def __init__(self, clock_offset: nvml.ClockOffset):
+        self._clock_offset = clock_offset
+
+    @property
+    def clock_offset_mhz(self) -> int:
+        """
+        The current clock offset in MHz.
+        """
+        return self._clock_offset.clock_offset_m_hz
+
+    @property
+    def max_offset_mhz(self) -> int:
+        """
+        The maximum clock offset in MHz.
+        """
+        return self._clock_offset.max_clock_offset_m_hz
+
+    @property
+    def min_offset_mhz(self) -> int:
+        """
+        The minimum clock offset in MHz.
+        """
+        return self._clock_offset.min_clock_offset_m_hz
+
+
+cdef class ClockInfo:
+    """
+    Accesses various clock information about a device.
+    """
+
+    cdef intptr_t _handle
+    cdef int _clock_type
+
+    def __init__(self, handle, clock_type: ClockType):
+        self._handle = handle
+        self._clock_type = int(clock_type)
+
+    def get_current_mhz(self, clock_id: ClockId = ClockId.CURRENT) -> int:
+        """
+        Get the current clock speed of a specific clock domain, in MHz.
+
+        For Kepler™ or newer fully supported devices.
+
+        Parameters
+        ----------
+        clock_id: :class:`ClockId`
+            The clock ID to query.
+
+        Returns
+        -------
+        int
+            The clock speed in MHz.
+        """
+        return nvml.device_get_clock(self._handle, self._clock_type, clock_id)
+
+    def get_max_mhz(self) -> int:
+        """
+        Get the maximum clock speed of a specific clock domain, in MHz.
+
+        For Fermi™ or newer fully supported devices.
+
+        Current P0 clocks (reported by :meth:`get_current_mhz` can differ from
+        max clocks by a few MHz.
+
+        Returns
+        -------
+        int
+            The maximum clock speed in MHz.
+        """
+        return nvml.device_get_max_clock_info(self._handle, self._clock_type)
+
+    def get_max_customer_boost_mhz(self) -> int:
+        """
+        Get the maximum customer boost clock speed of a specific clock, in MHz.
+
+        For Pascal™ or newer fully supported devices.
+
+        Returns
+        -------
+        int
+            The maximum customer boost clock speed in MHz.
+        """
+        return nvml.device_get_max_customer_boost_clock(self._handle, self._clock_type)
+
+    def get_min_max_clock_of_pstate_mhz(self, pstate: Pstates) -> tuple[int, int]:
+        """
+        Get the minimum and maximum clock speeds for this clock domain
+        at a given performance state (Pstate), in MHz.
+
+        Parameters
+        ----------
+        pstate: :class:`Pstates`
+            The performance state to query.
+
+        Returns
+        -------
+        tuple[int, int]
+            A tuple containing the minimum and maximum clock speeds in MHz.
+        """
+        return nvml.device_get_min_max_clock_of_p_state(self._handle, self._clock_type, pstate)
+
+    def get_offsets(self, pstate: Pstates) -> ClockOffsets:
+        """
+        Retrieve min, max and current clock offset of some clock domain for a given Pstate.
+
+        For Maxwell™ or newer fully supported devices.
+
+        Parameters
+        ----------
+        pstate: :class:`Pstates`
+            The performance state to query.
+
+        Returns
+        -------
+        ClockOffsets
+            An object with the min, max and current clock offset.
+        """
+        return ClockOffsets(nvml.device_get_clock_offsets(self._handle, self._clock_type, pstate))
diff --git a/cuda_core/cuda/core/system/_cooler.pxi b/cuda_core/cuda/core/system/_cooler.pxi
new file mode 100644
index 0000000000..4d49f7ae9e
--- /dev/null
+++ b/cuda_core/cuda/core/system/_cooler.pxi
@@ -0,0 +1,31 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+
+cdef class CoolerInfo:
+    cdef object _cooler_info
+
+    def __init__(self, cooler_info: nvml.CoolerInfo):
+        self._cooler_info = cooler_info
+
+    @property
+    def signal_type(self) -> CoolerControl:
+        """
+        The cooler's control signal characteristics.
+
+        The possible types are restricted, variable and toggle.  See
+        :class:`CoolerControl` for details.
+        """
+        return CoolerControl(self._cooler_info.signal_type)
+
+    @property
+    def target(self) -> list[CoolerTarget]:
+        """
+        The target that cooler controls.
+
+        Targets may be GPU, Memory, Power Supply, or all of these.  See
+        :class:`CoolerTarget` for details.
+        """
+        cdef uint64_t[1] targets = [self._cooler_info.target]
+        return [CoolerTarget(1 << ev) for ev in _unpack_bitmask(targets)]
diff --git a/cuda_core/cuda/core/system/_device.pyx b/cuda_core/cuda/core/system/_device.pyx
index b013ef79ca..71cb35b907 100644
--- a/cuda_core/cuda/core/system/_device.pyx
+++ b/cuda_core/cuda/core/system/_device.pyx
@@ -12,19 +12,37 @@ from cuda.bindings import _nvml as nvml
 
 from ._nvml_context cimport initialize
 
-include "_device_utils.pxi"
-include "_inforom.pxi"
-
 
 AddressingMode = nvml.DeviceAddressingModeType
+AffinityScope = nvml.AffinityScope
 BrandType = nvml.BrandType
+ClockId = nvml.ClockId
+ClocksEventReasons = nvml.ClocksEventReasons
+ClockType = nvml.ClockType
+CoolerControl = nvml.CoolerControl
+CoolerTarget = nvml.CoolerTarget
 EventType = nvml.EventType
+FanControlPolicy = nvml.FanControlPolicy
 FieldId = nvml.FieldId
 GpuP2PCapsIndex = nvml.GpuP2PCapsIndex
 GpuP2PStatus = nvml.GpuP2PStatus
 GpuTopologyLevel = nvml.GpuTopologyLevel
 InforomObject = nvml.InforomObject
 PcieUtilCounter = nvml.PcieUtilCounter
+Pstates = nvml.Pstates
+TemperatureSensors = nvml.TemperatureSensors
+TemperatureThresholds = nvml.TemperatureThresholds
+ThermalController = nvml.ThermalController
+ThermalTarget = nvml.ThermalTarget
+
+
+include "_clock.pxi"
+include "_cooler.pxi"
+include "_device_utils.pxi"
+include "_fan.pxi"
+include "_inforom.pxi"
+include "_performance.pxi"
+include "_temperature.pxi"
 
 
 class DeviceArchitecture:
@@ -752,6 +770,187 @@ cdef class Device:
             device._handle = handle
             yield device
 
+    def get_memory_affinity(self, scope: AffinityScope=AffinityScope.NODE) -> list[int]:
+        """
+        Retrieves a list of indices of NUMA nodes or CPU sockets with the ideal
+        memory affinity for the device.
+
+        For Kepler™ or newer fully supported devices.
+
+        Supported on Linux only.
+
+        If requested scope is not applicable to the target topology, the API
+        will fall back to reporting the memory affinity for the immediate non-I/O
+        ancestor of the device.
+        """
+        return _unpack_bitmask(
+            nvml.device_get_memory_affinity(
+                self._handle,
+                <unsigned int>ceil(cpu_count() / 64),
+                scope
+            )
+        )
+
+    def get_cpu_affinity(self, scope: AffinityScope=AffinityScope.NODE) -> list[int]:
+        """
+        Retrieves a list of indices of NUMA nodes or CPU sockets with the ideal
+        CPU affinity for the device.
+
+        For Kepler™ or newer fully supported devices.
+
+        Supported on Linux only.
+
+        If requested scope is not applicable to the target topology, the API
+        will fall back to reporting the memory affinity for the immediate non-I/O
+        ancestor of the device.
+        """
+        return _unpack_bitmask(
+            nvml.device_get_cpu_affinity_within_scope(
+                self._handle,
+                <unsigned int>ceil(cpu_count() / 64),
+                scope,
+            )
+        )
+
+    def set_cpu_affinity(self):
+        """
+        Sets the ideal affinity for the calling thread and device.
+
+        For Kepler™ or newer fully supported devices.
+
+        Supported on Linux only.
+        """
+        nvml.device_set_cpu_affinity(self._handle)
+
+    def clear_cpu_affinity(self):
+        """
+        Clear all affinity bindings for the calling thread.
+
+        For Kepler™ or newer fully supported devices.
+
+        Supported on Linux only.
+        """
+        nvml.device_clear_cpu_affinity(self._handle)
+
+    @property
+    def numa_node_id(self) -> int:
+        """
+        The NUMA node of the given GPU device.
+
+        This only applies to platforms where the GPUs are NUMA nodes.
+        """
+        return nvml.device_get_numa_node_id(self._handle)
+
+    def clock(self, clock_type: ClockType) -> ClockInfo:
+        """
+        Get information about and manage a specific clock on a device.
+        """
+        return ClockInfo(self._handle, clock_type)
+
+    def get_auto_boosted_clocks_enabled(self) -> tuple[bool, bool]:
+        """
+        Retrieve the current state of auto boosted clocks on a device.
+
+        For Kepler™ or newer fully supported devices.
+
+        Auto Boosted clocks are enabled by default on some hardware, allowing
+        the GPU to run at higher clock rates to maximize performance as thermal
+        limits allow.
+
+        On Pascal™ and newer hardware, Auto Boosted clocks are controlled
+        through application clocks. Use :meth:`set_application_clocks` and
+        :meth:`reset_application_clocks` to control Auto Boost behavior.
+
+        Returns
+        -------
+        bool
+            The current state of Auto Boosted clocks
+        bool
+            The default Auto Boosted clocks behavior
+
+        """
+        current, default = nvml.device_get_auto_boosted_clocks_enabled(self._handle)
+        return current == nvml.EnableState.FEATURE_ENABLED, default == nvml.EnableState.FEATURE_ENABLED
+
+    def get_current_clock_event_reasons(self) -> list[ClocksEventReasons]:
+        """
+        Retrieves the current clocks event reasons.
+
+        For all fully supported products.
+        """
+        cdef uint64_t[1] reasons
+        reasons[0] = nvml.device_get_current_clocks_event_reasons(self._handle)
+        return [ClocksEventReasons(1 << reason) for reason in _unpack_bitmask(reasons)]
+
+    def get_supported_clock_event_reasons(self) -> list[ClocksEventReasons]:
+        """
+        Retrieves supported clocks event reasons that can be returned by
+        :meth:`get_current_clock_event_reasons`.
+
+        For all fully supported products.
+
+        This method is not supported in virtual machines running virtual GPU (vGPU).
+        """
+        cdef uint64_t[1] reasons
+        reasons[0] = nvml.device_get_supported_clocks_event_reasons(self._handle)
+        return [ClocksEventReasons(1 << reason) for reason in _unpack_bitmask(reasons)]
+
+    def fan(self, fan: int = 0) -> FanInfo:
+        """
+        Get information and manage a specific fan on a device.
+        """
+        if fan < 0 or fan >= self.num_fans:
+            raise ValueError(f"Fan index {fan} is out of range [0, {self.num_fans})")
+        return FanInfo(self._handle, fan)
+
+    @property
+    def num_fans(self) -> int:
+        """
+        The number of fans on the device.
+        """
+        return nvml.device_get_num_fans(self._handle)
+
+    @property
+    def cooler(self) -> CoolerInfo:
+        """
+        Get information about cooler on a device.
+        """
+        return CoolerInfo(nvml.device_get_cooler_info(self._handle))
+
+    @property
+    def temperature(self) -> Temperature:
+        """
+        Get information about temperatures on a device.
+        """
+        return Temperature(self._handle)
+
+    @property
+    def performance_state(self) -> Pstates:
+        """
+        The current performance state of the device.
+
+        For Fermi™ or newer fully supported devices.
+
+        See :class:`Pstates` for possible performance states.
+        """
+        return Pstates(nvml.device_get_performance_state(self._handle))
+
+    @property
+    def dynamic_pstates_info(self) -> GpuDynamicPstatesInfo:
+        """
+        Retrieve performance monitor samples from the associated subdevice.
+        """
+        return GpuDynamicPstatesInfo(nvml.device_get_dynamic_pstates_info(self._handle))
+
+    def get_supported_pstates(self) -> list[Pstates]:
+        """
+        Get all supported Performance States (P-States) for the device.
+
+        The returned list contains a contiguous list of valid P-States supported by
+        the device.
+        """
+        return [Pstates(x) for x in nvml.device_get_supported_performance_states(self._handle)]
+
     @property
     def architecture(self) -> DeviceArchitecture:
         """
@@ -775,22 +974,6 @@ cdef class Device:
         """
         return BAR1MemoryInfo(nvml.device_get_bar1_memory_info(self._handle))
 
-    @property
-    def cpu_affinity(self) -> list[int]:
-        """
-        Get a list containing the CPU indices to which the GPU is directly connected.
-
-        Examples
-        --------
-        >>> Device(index=0).cpu_affinity
-        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
-         40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59]
-        """
-        return _unpack_bitmask(nvml.device_get_cpu_affinity(
-            self._handle,
-            <unsigned int>ceil(cpu_count() / 64),
-        ))
-
     @property
     def cuda_compute_capability(self) -> tuple[int, int]:
         """
@@ -1038,6 +1221,130 @@ cdef class Device:
             device._handle = handle
             yield device
 
+    @property
+    def index(self) -> int:
+        """
+        The NVML index of this device.
+
+        Valid indices are derived from the count returned by
+        :meth:`Device.get_device_count`.  For example, if ``get_device_count()``
+        returns 2, the valid indices are 0 and 1, corresponding to GPU 0 and GPU
+        1.
+
+        The order in which NVML enumerates devices has no guarantees of
+        consistency between reboots. For that reason, it is recommended that
+        devices be looked up by their PCI ids or GPU UUID.
+
+        Note: The NVML index may not correlate with other APIs, such as the CUDA
+        device index.
+        """
+        return nvml.device_get_index(self._handle)
+
+    @property
+    def module_id(self) -> int:
+        """
+        Get a unique identifier for the device module on the baseboard.
+
+        This API retrieves a unique identifier for each GPU module that exists
+        on a given baseboard.  For non-baseboard products, this ID would always
+        be 0.
+        """
+        return nvml.device_get_module_id(self._handle)
+
+    @property
+    def minor_number(self) -> int:
+        """
+        The minor number of this device.
+
+        For Linux only.
+
+        The minor number is used by the Linux device driver to identify the
+        device node in ``/dev/nvidiaX``.
+        """
+        return nvml.device_get_minor_number(self._handle)
+
+    @property
+    def addressing_mode(self) -> AddressingMode:
+        """
+        Get the addressing mode of the device.
+
+        Addressing modes can be one of:
+
+        - :attr:`AddressingMode.DEVICE_ADDRESSING_MODE_HMM`: System allocated
+          memory (``malloc``, ``mmap``) is addressable from the device (GPU), via
+          software-based mirroring of the CPU's page tables, on the GPU.
+        - :attr:`AddressingMode.DEVICE_ADDRESSING_MODE_ATS`: System allocated
+          memory (``malloc``, ``mmap``) is addressable from the device (GPU), via
+          Address Translation Services. This means that there is (effectively) a
+          single set of page tables, and the CPU and GPU both use them.
+        - :attr:`AddressingMode.DEVICE_ADDRESSING_MODE_NONE`: Neither HMM nor ATS
+          is active.
+        """
+        return AddressingMode(nvml.device_get_addressing_mode(self._handle).value)
+
+    @property
+    def display_mode(self) -> bool:
+        """
+        The display mode for this device.
+
+        Indicates whether a physical display (e.g. monitor) is currently connected to
+        any of the device's connectors.
+        """
+        return True if nvml.device_get_display_mode(self._handle) == nvml.EnableState.FEATURE_ENABLED else False
+
+    @property
+    def display_active(self) -> bool:
+        """
+        The display active status for this device.
+
+        Indicates whether a display is initialized on the device.  For example,
+        whether X Server is attached to this device and has allocated memory for
+        the screen.
+
+        Display can be active even when no monitor is physically attached.
+        """
+        return True if nvml.device_get_display_active(self._handle) == nvml.EnableState.FEATURE_ENABLED else False
+
+    @property
+    def repair_status(self) -> RepairStatus:
+        """
+        Get the repair status for TPC/Channel repair.
+
+        For Ampere™ or newer fully supported devices.
+        """
+        return RepairStatus(self._handle)
+
+    @property
+    def inforom(self) -> InforomInfo:
+        """
+        Accessor for InfoROM information.
+
+        For all products with an InfoROM.
+        """
+        return InforomInfo(self)
+
+    def get_topology_nearest_gpus(self, level: GpuTopologyLevel) -> Iterable[Device]:
+        """
+        Retrieve the GPUs that are nearest to this device at a specific interconnectivity level.
+
+        Supported on Linux only.
+
+        Parameters
+        ----------
+        level: :class:`GpuTopologyLevel`
+            The topology level.
+
+        Returns
+        -------
+        Iterable of :class:`Device`
+            The nearest devices at the given topology level.
+        """
+        cdef Device device
+        for handle in nvml.device_get_topology_nearest_gpus(self._handle, level):
+            device = Device.__new__(Device)
+            device._handle = handle
+            yield device
+
     @property
     def attributes(self) -> DeviceAttributes:
         """
@@ -1167,17 +1474,32 @@ def get_p2p_status(device1: Device, device2: Device, index: GpuP2PCapsIndex) ->
 
 __all__ = [
     "AddressingMode",
+    "AffinityScope",
     "BAR1MemoryInfo",
     "BrandType",
+    "ClockId",
+    "ClockInfo",
+    "ClockOffsets",
+    "ClocksEventReasons",
+    "ClockType",
+    "CoolerControl",
+    "CoolerInfo",
+    "CoolerTarget",
     "Device",
     "DeviceArchitecture",
     "DeviceAttributes",
     "DeviceEvents",
     "EventData",
     "EventType",
+    "FanControlPolicy",
+    "FanInfo",
     "FieldId",
     "FieldValue",
     "FieldValues",
+    "get_p2p_status",
+    "get_topology_common_ancestor",
+    "GpuDynamicPstatesInfo",
+    "GpuDynamicPstatesUtilization",
     "GpuP2PCapsIndex",
     "GpuP2PStatus",
     "GpuTopologyLevel",
@@ -1186,7 +1508,13 @@ __all__ = [
     "MemoryInfo",
     "PcieUtilCounter",
     "PciInfo",
+    "Pstates",
     "RepairStatus",
-    "get_p2p_status",
-    "get_topology_common_ancestor",
+    "Temperature",
+    "TemperatureSensors",
+    "TemperatureThresholds",
+    "ThermalController",
+    "ThermalSensor",
+    "ThermalSettings",
+    "ThermalTarget",
 ]
diff --git a/cuda_core/cuda/core/system/_fan.pxi b/cuda_core/cuda/core/system/_fan.pxi
new file mode 100644
index 0000000000..18525a21b1
--- /dev/null
+++ b/cuda_core/cuda/core/system/_fan.pxi
@@ -0,0 +1,103 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+
+cdef class FanInfo:
+    """
+    Manages information related to a specific fan on a specific device.
+    """
+
+    cdef intptr_t _handle
+    cdef int _fan
+
+    def __init__(self, handle: int, fan: int):
+        self._handle = handle
+        self._fan = fan
+
+    @property
+    def speed(self) -> int:
+        """
+        Get/set the intended operating speed of the device's fan.
+
+        For all discrete products with dedicated fans.
+
+        Note: The reported speed is the intended fan speed.  If the fan is
+        physically blocked and unable to spin, the output will not match the
+        actual fan speed.
+
+        The fan speed is expressed as a percentage of the product's maximum
+        noise tolerance fan speed.  This value may exceed 100% in certain cases.
+        """
+        return nvml.device_get_fan_speed_v2(self._handle, self._fan)
+
+    @speed.setter
+    def speed(self, speed: int):
+        nvml.device_set_fan_speed_v2(self._handle, self._fan, speed)
+
+    @property
+    def speed_rpm(self) -> int:
+        """
+        The intended operating speed of the device's fan in rotations per minute
+        (RPM).
+
+        For Maxwell™ or newer fully supported devices.
+
+        For all discrete products with dedicated fans.
+
+        Note: The reported speed is the intended fan speed.  If the fan is
+        physically blocked and unable to spin, the output will not match the
+        actual fan speed.
+        """
+        return nvml.device_get_fan_speed_rpm(self._handle, self._fan)
+
+    @property
+    def target_speed(self) -> int:
+        """
+        Retrieves the intended target speed of the device's specified fan.
+
+        For all discrete products with dedicated fans.
+
+        Normally, the driver dynamically adjusts the fan based on
+        the needs of the GPU.  But when user set fan speed using :property:`speed`
+        the driver will attempt to make the fan achieve the setting in
+        :property:`speed`.  The actual current speed of the fan
+        is reported in :property:`speed`.
+
+        The fan speed is expressed as a percentage of the product's maximum
+        noise tolerance fan speed.  This value may exceed 100% in certain cases.
+        """
+        return nvml.device_get_target_fan_speed(self._handle, self._fan)
+
+    @property
+    def min_max_speed(self) -> tuple[int, int]:
+        """
+        Retrieves the minimum and maximum fan speed all of the device's fans.
+
+        For all discrete products with dedicated fans.
+
+        Returns
+        -------
+        tuple[int, int]
+            A tuple of (min_speed, max_speed)
+        """
+        return nvml.device_get_min_max_fan_speed(self._handle)
+
+    @property
+    def control_policy(self) -> FanControlPolicy:
+        """
+        The current fan control policy.
+
+        For Maxwell™ or newer fully supported devices.
+
+        For all CUDA-capable discrete products with fans.
+        """
+        return FanControlPolicy(nvml.device_get_fan_control_policy_v2(self._handle, self._fan))
+
+    def set_default_fan_speed(self):
+        """
+        Set the speed of the fan control policy to default.
+
+        For all CUDA-capable discrete products with fans.
+        """
+        nvml.device_set_default_fan_speed_v2(self._handle, self._fan)
diff --git a/cuda_core/cuda/core/system/_performance.pxi b/cuda_core/cuda/core/system/_performance.pxi
new file mode 100644
index 0000000000..6ba1d40f9c
--- /dev/null
+++ b/cuda_core/cuda/core/system/_performance.pxi
@@ -0,0 +1,74 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+
+# In cuda.bindings.nvml, this is an anonymous struct inside nvmlGpuDynamicPstatesInfo_t.
+
+
+ctypedef struct _GpuDynamicPstatesUtilization:
+    unsigned int bIsPresent
+    unsigned int percentage
+    unsigned int incThreshold
+    unsigned int decThreshold
+
+
+cdef class GpuDynamicPstatesUtilization:
+    cdef:
+        _GpuDynamicPstatesUtilization *_ptr
+        object _owner
+
+    def __init__(self, ptr: int, owner: object):
+        # ptr points to a part of the numpy buffer held by `_owner`, so we need
+        # to maintain a reference to `_owner` to keep it alive.
+        self._ptr = <_GpuDynamicPstatesUtilization *><intptr_t>ptr
+        self._owner = owner
+
+    @property
+    def is_present(self) -> bool:
+        """
+        Set if the utilization domain is present on this GPU.
+        """
+        return bool(self._ptr[0].bIsPresent)
+
+    @property
+    def percentage(self) -> int:
+        """
+        Percentage of time where the domain is considered busy in the last 1-second interval.
+        """
+        return self._ptr[0].percentage
+
+    @property
+    def inc_threshold(self) -> int:
+        """
+        Utilization threshold that can trigger a perf-increasing P-State change when crossed.
+        """
+        return self._ptr[0].incThreshold
+
+    @property
+    def dec_threshold(self) -> int:
+        """
+        Utilization threshold that can trigger a perf-decreasing P-State change when crossed.
+        """
+        return self._ptr[0].decThreshold
+
+
+cdef class GpuDynamicPstatesInfo:
+    """
+    Handles performance monitor samples from the device.
+    """
+    cdef object _gpu_dynamic_pstates_info
+
+    def __init__(self, gpu_dynamic_pstates_info: nvml.GpuDynamicPstatesInfo):
+        self._gpu_dynamic_pstates_info = gpu_dynamic_pstates_info
+
+    def __len__(self):
+        return nvml.MAX_GPU_UTILIZATIONS
+
+    def __getitem__(self, idx: int) -> GpuDynamicPstatesUtilization:
+        if idx < 0 or idx >= nvml.MAX_GPU_UTILIZATIONS:
+            raise IndexError("GPU dynamic P-states index out of range")
+        return GpuDynamicPstatesUtilization(
+            self._gpu_dynamic_pstates_info.utilization.ptr + idx * sizeof(_GpuDynamicPstatesUtilization),
+            self._gpu_dynamic_pstates_info
+        )
diff --git a/cuda_core/cuda/core/system/_system.pyx b/cuda_core/cuda/core/system/_system.pyx
index c29d20dd20..3e15420dc8 100644
--- a/cuda_core/cuda/core/system/_system.pyx
+++ b/cuda_core/cuda/core/system/_system.pyx
@@ -22,7 +22,7 @@ if CUDA_BINDINGS_NVML_IS_COMPATIBLE:
     from cuda.bindings import _nvml as nvml
     # TODO: We need to be even more specific than version numbers for development.
     # This can be removed once we have a release including everything we need.
-    for member in ["FieldId"]:
+    for member in ["FieldId", "ClocksEventReasons"]:
         if not hasattr(nvml, member):
             CUDA_BINDINGS_NVML_IS_COMPATIBLE = False
             break
diff --git a/cuda_core/cuda/core/system/_temperature.pxi b/cuda_core/cuda/core/system/_temperature.pxi
new file mode 100644
index 0000000000..20e5f6f99e
--- /dev/null
+++ b/cuda_core/cuda/core/system/_temperature.pxi
@@ -0,0 +1,140 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+
+# In cuda.bindings.nvml, this is an anonymous struct inside nvmlThermalSettings_t.
+
+
+ctypedef struct _ThermalSensor:
+    int controller
+    int defaultMinTemp
+    int defaultMaxTemp
+    int currentTemp
+    int target
+
+
+cdef class ThermalSensor:
+    cdef:
+        _ThermalSensor *_ptr
+        object _owner
+
+    def __init__(self, ptr: int, owner: object):
+        # ptr points to a part of the numpy buffer held by `_owner`, so we need
+        # to maintain a reference to `_owner` to keep it alive.
+        self._ptr = <_ThermalSensor *><intptr_t>ptr
+        self._owner = owner
+
+    @property
+    def controller(self) -> ThermalController:
+        return ThermalController(self._ptr[0].controller)
+
+    @property
+    def default_min_temp(self) -> int:
+        return self._ptr[0].defaultMinTemp
+
+    @property
+    def default_max_temp(self) -> int:
+        return self._ptr[0].defaultMaxTemp
+
+    @property
+    def current_temp(self) -> int:
+        return self._ptr[0].currentTemp
+
+    @property
+    def target(self) -> ThermalTarget:
+        return ThermalTarget(self._ptr[0].target)
+
+
+cdef class ThermalSettings:
+    cdef object _thermal_settings
+
+    def __init__(self, thermal_settings: nvml.ThermalSettings):
+        self._thermal_settings = thermal_settings
+
+    def __len__(self):
+        # MAX_THERMAL_SENSORS_PER_GPU is 3
+        return min(self._thermal_settings.count, 3)
+
+    def __getitem__(self, idx: int) -> nvml.ThermalSensor:
+        if idx < 0 or idx >= len(self):
+            raise IndexError("Thermal sensor index out of range")
+        return ThermalSensor(
+            self._thermal_settings.sensor.ptr + idx * sizeof(_ThermalSensor),
+            self._thermal_settings
+        )
+
+
+cdef class Temperature:
+    cdef intptr_t _handle
+
+    def __init__(self, handle: int):
+        self._handle = handle
+
+    def sensor(
+        self,
+        sensor: TemperatureSensors = TemperatureSensors.TEMPERATURE_GPU
+    ) -> int:
+        """
+        Get the temperature reading from a specific sensor on the device, in
+        degrees Celsius.
+
+        Parameters
+        ----------
+        sensor: :class:`TemperatureSensors`, optional
+            The temperature sensor to query.
+
+        Returns
+        -------
+        int
+            The temperature in degrees Celsius.
+        """
+        return nvml.device_get_temperature_v(self._handle, sensor)
+
+    def threshold(self, threshold_type: TemperatureThresholds) -> int:
+        """
+        Retrieves the temperature threshold for this GPU with the specified
+        threshold type, in degrees Celsius.
+
+        For Kepler™ or newer fully supported devices.
+
+        See :class:`TemperatureThresholds` for possible threshold types.
+
+        Note: This API is no longer the preferred interface for retrieving the
+        following temperature thresholds on Ada and later architectures:
+        ``NVML_TEMPERATURE_THRESHOLD_SHUTDOWN``,
+        ``NVML_TEMPERATURE_THRESHOLD_SLOWDOWN``,
+        ``NVML_TEMPERATURE_THRESHOLD_MEM_MAX`` and
+        ``NVML_TEMPERATURE_THRESHOLD_GPU_MAX``.
+
+        Support for reading these temperature thresholds for Ada and later
+        architectures would be removed from this API in future releases. Please
+        use :meth:`get_field_values` with ``NVML_FI_DEV_TEMPERATURE_*`` fields
+        to retrieve temperature thresholds on these architectures.
+        """
+        return nvml.device_get_temperature_threshold(self._handle, threshold_type)
+
+    @property
+    def margin(self) -> int:
+        """
+        The thermal margin temperature (distance to nearest slowdown threshold) for the device.
+        """
+        return nvml.device_get_margin_temperature(self._handle)
+
+    def thermal_settings(self, sensor_index: ThermalTarget) -> ThermalSettings:
+        """
+        Used to execute a list of thermal system instructions.
+
+        TODO: The above docstring is from the NVML header, but it doesn't seem to make sense.
+
+        Parameters
+        ----------
+        sensor_index: ThermalTarget
+            The index of the thermal sensor.
+
+        Returns
+        -------
+        :class:`ThermalSettings`
+            The thermal settings for the specified sensor.
+        """
+        return ThermalSettings(nvml.device_get_thermal_settings(self._handle, sensor_index))
diff --git a/cuda_core/docs/source/api.rst b/cuda_core/docs/source/api.rst
index 1c10bb7298..9772b78786 100644
--- a/cuda_core/docs/source/api.rst
+++ b/cuda_core/docs/source/api.rst
@@ -92,16 +92,29 @@ CUDA system information and NVIDIA Management Library (NVML)
 
    system.Device
    system.AddressingMode
+   system.AffinityScope
    system.BAR1MemoryInfo
    system.BrandType
+   system.ClockId
+   system.ClockInfo
+   system.ClockOffsets
+   system.ClocksEventReasons
+   system.ClockType
+   system.CoolerControl
+   system.CoolerInfo
+   system.CoolerTarget
    system.DeviceArchitecture
    system.DeviceAttributes
    system.DeviceEvents
    system.EventData
    system.EventType
+   system.FanControlPolicy
+   system.FanInfo
    system.FieldId
    system.FieldValue
    system.FieldValues
+   system.GpuDynamicPstatesInfo
+   system.GpuDynamicPstatesUtilization
    system.GpuP2PCapsIndex
    system.GpuP2PStatus
    system.GpuTopologyLevel
@@ -110,7 +123,15 @@ CUDA system information and NVIDIA Management Library (NVML)
    system.MemoryInfo
    system.PcieUtilCounter
    system.PciInfo
+   system.Pstates
    system.RepairStatus
+   system.Temperature
+   system.TemperatureSensors
+   system.TemperatureThresholds
+   system.ThermalController
+   system.ThermalSensor
+   system.ThermalSettings
+   system.ThermalTarget
 
 .. module:: cuda.core.utils
 
diff --git a/cuda_core/tests/system/test_system_device.py b/cuda_core/tests/system/test_system_device.py
index 8f07b2ee27..2e762ce860 100644
--- a/cuda_core/tests/system/test_system_device.py
+++ b/cuda_core/tests/system/test_system_device.py
@@ -64,11 +64,12 @@ def test_device_bar1_memory():
         assert free + used == total
 
 
+@pytest.mark.skipif(helpers.IS_WSL or helpers.IS_WINDOWS, reason="Device attributes not supported on WSL or Windows")
 def test_device_cpu_affinity():
     skip_reasons = set()
     for device in system.Device.get_all_devices():
         try:
-            affinity = device.cpu_affinity
+            affinity = device.get_cpu_affinity(system.AffinityScope.NODE)
         except system.NotSupportedError:
             skip_reasons.add(f"CPU affinity not supported on '{device.name}'")
         else:
@@ -79,6 +80,42 @@ def test_device_cpu_affinity():
         pytest.skip(" ; ".join(skip_reasons))
 
 
+@pytest.mark.skipif(helpers.IS_WSL or helpers.IS_WINDOWS, reason="Device attributes not supported on WSL or Windows")
+def test_affinity():
+    skip_reasons = set()
+    for device in system.Device.get_all_devices():
+        for scope in (system.AffinityScope.NODE, system.AffinityScope.SOCKET):
+            try:
+                affinity = device.get_cpu_affinity(scope)
+            except system.NotSupportedError:
+                skip_reasons.add(f"CPU affinity not supported on '{device.name}'")
+            else:
+                assert isinstance(affinity, list)
+
+            try:
+                affinity = device.get_memory_affinity(scope)
+            except system.NotSupportedError:
+                skip_reasons.add(f"Memory affinity not supported on '{device.name}'")
+            else:
+                assert isinstance(affinity, list)
+    if skip_reasons:
+        pytest.skip(" ; ".join(skip_reasons))
+
+
+def test_numa_node_id():
+    skip_reasons = set()
+    for device in system.Device.get_all_devices():
+        try:
+            numa_node_id = device.numa_node_id
+        except system.NotSupportedError:
+            skip_reasons.add(f"NUMA node ID not supported by device '{device.name}'")
+        else:
+            assert isinstance(numa_node_id, int)
+            assert numa_node_id >= -1
+    if skip_reasons:
+        pytest.skip(" ; ".join(skip_reasons))
+
+
 def test_device_cuda_compute_capability():
     for device in system.Device.get_all_devices():
         cuda_compute_capability = device.cuda_compute_capability
@@ -390,7 +427,7 @@ def test_get_all_devices_with_cpu_affinity():
     try:
         for i in range(multiprocessing.cpu_count()):
             for device in system.Device.get_all_devices_with_cpu_affinity(i):
-                affinity = device.cpu_affinity
+                affinity = device.get_cpu_affinity()
                 assert isinstance(affinity, list)
                 assert i in affinity
     except system.NotSupportedError:
@@ -522,3 +559,179 @@ def test_get_inforom_version():
             assert len(board_part_number) > 0
 
         inforom.validate()
+
+
+def test_clock():
+    for device in system.Device.get_all_devices():
+        try:
+            current, default = device.get_auto_boosted_clocks_enabled()
+        except system.NotSupportedError:
+            pass
+        else:
+            assert isinstance(current, bool)
+            assert isinstance(default, bool)
+
+        for clock_type in system.ClockType:
+            clock = device.clock(clock_type)
+            assert isinstance(clock, system.ClockInfo)
+
+            try:
+                current_mhz = clock.get_current_mhz()
+            except system.NotSupportedError:
+                continue
+            assert isinstance(current_mhz, int)
+            assert current_mhz >= 0
+
+            current_mhz = clock.get_current_mhz(system.ClockId.CURRENT)
+            assert isinstance(current_mhz, int)
+            assert current_mhz >= 0
+
+            max_mhz = clock.get_max_mhz()
+            assert isinstance(max_mhz, int)
+            assert max_mhz >= 0
+
+            try:
+                max_customer_boost = clock.get_max_customer_boost_mhz()
+            except system.NotSupportedError:
+                pass
+            else:
+                assert isinstance(max_customer_boost, int)
+                assert max_customer_boost >= 0
+
+            pstate = device.performance_state
+
+            min_, max_ = clock.get_min_max_clock_of_pstate_mhz(pstate)
+            assert isinstance(min_, int)
+            assert min_ >= 0
+            assert isinstance(max_, int)
+            assert max_ >= 0
+
+            try:
+                offsets = clock.get_offsets(pstate)
+            except system.InvalidArgumentError:
+                offsets = system.ClockOffsets(nvml.ClockOffset_v1())
+            assert isinstance(offsets, system.ClockOffsets)
+            assert isinstance(offsets.clock_offset_mhz, int)
+            assert isinstance(offsets.max_offset_mhz, int)
+            assert isinstance(offsets.min_offset_mhz, int)
+
+
+def test_clock_event_reasons():
+    for device in system.Device.get_all_devices():
+        reasons = device.get_current_clock_event_reasons()
+        assert all(isinstance(reason, system.ClocksEventReasons) for reason in reasons)
+
+        reasons = device.get_supported_clock_event_reasons()
+        assert all(isinstance(reason, system.ClocksEventReasons) for reason in reasons)
+
+
+def test_fan():
+    for device in system.Device.get_all_devices():
+        for fan_idx in range(device.num_fans):
+            fan_info = device.fan(fan_idx)
+            assert isinstance(fan_info, system.FanInfo)
+
+            try:
+                speed = fan_info.speed
+                assert isinstance(speed, int)
+                assert 0 <= speed <= 200
+
+                fan_info.speed = 50
+                fan_info.speed = speed
+
+                speed_rpm = fan_info.speed_rpm
+                assert isinstance(speed_rpm, int)
+                assert speed_rpm >= 0
+
+                target_speed = fan_info.target_speed
+                assert isinstance(target_speed, int)
+                assert speed <= target_speed * 2
+
+                min_, max_ = fan_info.min_max_speed
+                assert isinstance(min_, int)
+                assert isinstance(max_, int)
+                assert min_ <= max_
+                if speed > 0:
+                    assert min_ <= speed <= max_
+
+                control_policy = fan_info.control_policy
+                assert isinstance(control_policy, system.FanControlPolicy)
+            finally:
+                fan_info.set_default_fan_speed()
+
+
+def test_cooler():
+    for device in system.Device.get_all_devices():
+        try:
+            cooler_info = device.cooler
+        except system.NotSupportedError:
+            pytest.skip("CoolerInfo not supported on this device")
+
+        assert isinstance(cooler_info, system.CoolerInfo)
+
+        signal_type = cooler_info.signal_type
+        assert isinstance(signal_type, system.CoolerControl)
+
+        target = cooler_info.target
+        assert all(isinstance(t, system.CoolerTarget) for t in target)
+
+
+def test_temperature():
+    for device in system.Device.get_all_devices():
+        temperature = device.temperature
+        assert isinstance(temperature, system.Temperature)
+
+        sensor = temperature.sensor()
+        assert isinstance(sensor, int)
+        assert sensor >= 0
+
+        for threshold in list(system.TemperatureThresholds)[:-1]:
+            try:
+                t = temperature.threshold(threshold)
+            except system.NotSupportedError:
+                continue
+            else:
+                assert isinstance(t, int)
+                assert t >= 0
+
+        try:
+            margin = temperature.margin
+        except system.NotSupportedError:
+            pass
+        else:
+            assert isinstance(margin, int)
+            assert margin >= 0
+
+        thermals = temperature.thermal_settings(system.ThermalTarget.ALL)
+        assert isinstance(thermals, system.ThermalSettings)
+
+        for i, sensor in enumerate(thermals):
+            assert isinstance(sensor, system.ThermalSensor)
+            assert isinstance(sensor.target, system.ThermalTarget)
+            assert isinstance(sensor.controller, system.ThermalController)
+            assert isinstance(sensor.default_min_temp, int)
+            assert sensor.default_min_temp >= 0
+            assert isinstance(sensor.default_max_temp, int)
+            assert sensor.default_max_temp >= sensor.default_min_temp
+            assert isinstance(sensor.current_temp, int)
+            assert sensor.default_min_temp <= sensor.current_temp <= sensor.default_max_temp
+
+
+def test_pstates():
+    for device in system.Device.get_all_devices():
+        pstate = device.performance_state
+        assert isinstance(pstate, system.Pstates)
+
+        pstates = device.get_supported_pstates()
+        assert all(isinstance(p, system.Pstates) for p in pstates)
+
+        dynamic_pstates_info = device.dynamic_pstates_info
+        assert isinstance(dynamic_pstates_info, system.GpuDynamicPstatesInfo)
+
+        assert len(dynamic_pstates_info) == nvml.MAX_GPU_UTILIZATIONS
+
+        for utilization in dynamic_pstates_info:
+            assert isinstance(utilization.is_present, bool)
+            assert isinstance(utilization.percentage, int)
+            assert isinstance(utilization.inc_threshold, int)
+            assert isinstance(utilization.dec_threshold, int)