Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 15 additions & 3 deletions modelopt/onnx/quantization/autotune/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -534,13 +534,25 @@ def _alloc_pinned_host(size: int, dtype: np.dtype) -> tuple[Any, np.ndarray, Any
(host_ptr, arr, err): On success err is cudaSuccess; on failure host_ptr/arr
may be None and err is the CUDA error code.
"""
nbytes = size * np.dtype(dtype).itemsize
dtype = np.dtype(dtype)
nbytes = size * dtype.itemsize
err, host_ptr = cudart.cudaMallocHost(nbytes)
if err != cudart.cudaError_t.cudaSuccess:
return (None, None, err)
addr = int(host_ptr) if hasattr(host_ptr, "__int__") else host_ptr
ctype = np.ctypeslib.as_ctypes_type(dtype)
arr = np.ctypeslib.as_array((ctype * size).from_address(addr))
try:
ctype = np.ctypeslib.as_ctypes_type(dtype)
arr = np.ctypeslib.as_array((ctype * size).from_address(addr))
except NotImplementedError as e:
# float16/bfloat16 have no ctypes equivalent; use same-size type and view
if dtype.itemsize == 2:
ctype = ctypes.c_uint16
else:
raise TypeError(
f"Pinned host allocation for dtype {dtype} is not supported: "
"no ctypes mapping and no fallback for this itemsize"
) from e
arr = np.ctypeslib.as_array((ctype * size).from_address(addr)).view(dtype)
return (host_ptr, arr, cudart.cudaError_t.cudaSuccess)

@staticmethod
Expand Down