diff --git a/modelopt/onnx/quantization/autotune/benchmark.py b/modelopt/onnx/quantization/autotune/benchmark.py
index 700cb97eb..7613a119a 100644
--- a/modelopt/onnx/quantization/autotune/benchmark.py
+++ b/modelopt/onnx/quantization/autotune/benchmark.py
@@ -534,13 +534,25 @@ def _alloc_pinned_host(size: int, dtype: np.dtype) -> tuple[Any, np.ndarray, Any
             (host_ptr, arr, err): On success err is cudaSuccess; on failure host_ptr/arr
             may be None and err is the CUDA error code.
         """
-        nbytes = size * np.dtype(dtype).itemsize
+        dtype = np.dtype(dtype)
+        nbytes = size * dtype.itemsize
         err, host_ptr = cudart.cudaMallocHost(nbytes)
         if err != cudart.cudaError_t.cudaSuccess:
             return (None, None, err)
         addr = int(host_ptr) if hasattr(host_ptr, "__int__") else host_ptr
-        ctype = np.ctypeslib.as_ctypes_type(dtype)
-        arr = np.ctypeslib.as_array((ctype * size).from_address(addr))
+        try:
+            ctype = np.ctypeslib.as_ctypes_type(dtype)
+            arr = np.ctypeslib.as_array((ctype * size).from_address(addr))
+        except NotImplementedError as e:
+            # float16/bfloat16 have no ctypes equivalent; use same-size type and view
+            if dtype.itemsize == 2:
+                ctype = ctypes.c_uint16
+            else:
+                raise TypeError(
+                    f"Pinned host allocation for dtype {dtype} is not supported: "
+                    "no ctypes mapping and no fallback for this itemsize"
+                ) from e
+            arr = np.ctypeslib.as_array((ctype * size).from_address(addr)).view(dtype)
         return (host_ptr, arr, cudart.cudaError_t.cudaSuccess)
 
     @staticmethod