diff --git a/.github/actions/fetch_ctk/action.yml b/.github/actions/fetch_ctk/action.yml index 559c6d5a8b..001e3a84d8 100644 --- a/.github/actions/fetch_ctk/action.yml +++ b/.github/actions/fetch_ctk/action.yml @@ -14,7 +14,7 @@ inputs: cuda-components: description: "A list of the CTK components to install as a comma-separated list. e.g. 'cuda_nvcc,cuda_nvrtc,cuda_cudart'" required: false - default: "cuda_nvcc,cuda_cudart,cuda_crt,libnvvm,cuda_nvrtc,cuda_profiler_api,cuda_cccl,libnvjitlink,libcufile" + default: "cuda_nvcc,cuda_cudart,cuda_crt,libnvvm,cuda_nvrtc,cuda_profiler_api,cuda_cccl,libnvjitlink,libcufile,libnvfatbin" cuda-path: description: "where the CTK components will be installed to, relative to $PWD" required: false diff --git a/cuda_bindings/cuda/bindings/_internal/nvfatbin.pxd b/cuda_bindings/cuda/bindings/_internal/nvfatbin.pxd new file mode 100644 index 0000000000..9f3b187560 --- /dev/null +++ b/cuda_bindings/cuda/bindings/_internal/nvfatbin.pxd @@ -0,0 +1,24 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE +# +# This code was automatically generated across versions from 12.4.1 to 13.1.0. Do not modify it directly. + +from ..cynvfatbin cimport * + + +############################################################################### +# Wrapper functions +############################################################################### + +cdef const char* _nvFatbinGetErrorString(nvFatbinResult result) except?NULL nogil +cdef nvFatbinResult _nvFatbinCreate(nvFatbinHandle* handle_indirect, const char** options, size_t optionsCount) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil +cdef nvFatbinResult _nvFatbinDestroy(nvFatbinHandle* handle_indirect) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil +cdef nvFatbinResult _nvFatbinAddPTX(nvFatbinHandle handle, const char* code, size_t size, const char* arch, const char* identifier, const char* optionsCmdLine) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil +cdef nvFatbinResult _nvFatbinAddCubin(nvFatbinHandle handle, const void* code, size_t size, const char* arch, const char* identifier) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil +cdef nvFatbinResult _nvFatbinAddLTOIR(nvFatbinHandle handle, const void* code, size_t size, const char* arch, const char* identifier, const char* optionsCmdLine) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil +cdef nvFatbinResult _nvFatbinSize(nvFatbinHandle handle, size_t* size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil +cdef nvFatbinResult _nvFatbinGet(nvFatbinHandle handle, void* buffer) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil +cdef nvFatbinResult _nvFatbinVersion(unsigned int* major, unsigned int* minor) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil +cdef nvFatbinResult _nvFatbinAddReloc(nvFatbinHandle handle, const void* code, size_t size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil +cdef nvFatbinResult _nvFatbinAddTileIR(nvFatbinHandle handle, const void* code, size_t size, const char* identifier, const char* optionsCmdLine) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil diff --git a/cuda_bindings/cuda/bindings/_internal/nvfatbin_linux.pyx b/cuda_bindings/cuda/bindings/_internal/nvfatbin_linux.pyx new file mode 100644 index 0000000000..1e1e8d9a12 --- /dev/null +++ b/cuda_bindings/cuda/bindings/_internal/nvfatbin_linux.pyx @@ -0,0 +1,344 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE +# +# This code was automatically generated across versions from 12.4.1 to 13.1.0. Do not modify it directly. + +from libc.stdint cimport intptr_t, uintptr_t + +import threading +from .utils import FunctionNotFoundError, NotSupportedError + +from cuda.pathfinder import load_nvidia_dynamic_lib + + +############################################################################### +# Extern +############################################################################### + +# You must 'from .utils import NotSupportedError' before using this template + +cdef extern from "" nogil: + void* dlopen(const char*, int) + char* dlerror() + void* dlsym(void*, const char*) + int dlclose(void*) + + enum: + RTLD_LAZY + RTLD_NOW + RTLD_GLOBAL + RTLD_LOCAL + + const void* RTLD_DEFAULT 'RTLD_DEFAULT' + +cdef int get_cuda_version(): + cdef void* handle = NULL + cdef int err, driver_ver = 0 + + # Load driver to check version + handle = dlopen('libcuda.so.1', RTLD_NOW | RTLD_GLOBAL) + if handle == NULL: + err_msg = dlerror() + raise NotSupportedError(f'CUDA driver is not found ({err_msg.decode()})') + cuDriverGetVersion = dlsym(handle, "cuDriverGetVersion") + if cuDriverGetVersion == NULL: + raise RuntimeError('Did not find cuDriverGetVersion symbol in libcuda.so.1') + err = (cuDriverGetVersion)(&driver_ver) + if err != 0: + raise RuntimeError(f'cuDriverGetVersion returned error code {err}') + + return driver_ver + + + +############################################################################### +# Wrapper init +############################################################################### + +cdef object __symbol_lock = threading.Lock() +cdef bint __py_nvfatbin_init = False + +cdef void* __nvFatbinGetErrorString = NULL +cdef void* __nvFatbinCreate = NULL +cdef void* __nvFatbinDestroy = NULL +cdef void* __nvFatbinAddPTX = NULL +cdef void* __nvFatbinAddCubin = NULL +cdef void* __nvFatbinAddLTOIR = NULL +cdef void* __nvFatbinSize = NULL +cdef void* __nvFatbinGet = NULL +cdef void* __nvFatbinVersion = NULL +cdef void* __nvFatbinAddReloc = NULL +cdef void* __nvFatbinAddTileIR = NULL + + +cdef void* load_library() except* with gil: + cdef uintptr_t handle = load_nvidia_dynamic_lib("nvfatbin")._handle_uint + return handle + + +cdef int _init_nvfatbin() except -1 nogil: + global __py_nvfatbin_init + + cdef void* handle = NULL + + with gil, __symbol_lock: + # Recheck the flag after obtaining the locks + if __py_nvfatbin_init: + return 0 + + # Load function + global __nvFatbinGetErrorString + __nvFatbinGetErrorString = dlsym(RTLD_DEFAULT, 'nvFatbinGetErrorString') + if __nvFatbinGetErrorString == NULL: + if handle == NULL: + handle = load_library() + __nvFatbinGetErrorString = dlsym(handle, 'nvFatbinGetErrorString') + + global __nvFatbinCreate + __nvFatbinCreate = dlsym(RTLD_DEFAULT, 'nvFatbinCreate') + if __nvFatbinCreate == NULL: + if handle == NULL: + handle = load_library() + __nvFatbinCreate = dlsym(handle, 'nvFatbinCreate') + + global __nvFatbinDestroy + __nvFatbinDestroy = dlsym(RTLD_DEFAULT, 'nvFatbinDestroy') + if __nvFatbinDestroy == NULL: + if handle == NULL: + handle = load_library() + __nvFatbinDestroy = dlsym(handle, 'nvFatbinDestroy') + + global __nvFatbinAddPTX + __nvFatbinAddPTX = dlsym(RTLD_DEFAULT, 'nvFatbinAddPTX') + if __nvFatbinAddPTX == NULL: + if handle == NULL: + handle = load_library() + __nvFatbinAddPTX = dlsym(handle, 'nvFatbinAddPTX') + + global __nvFatbinAddCubin + __nvFatbinAddCubin = dlsym(RTLD_DEFAULT, 'nvFatbinAddCubin') + if __nvFatbinAddCubin == NULL: + if handle == NULL: + handle = load_library() + __nvFatbinAddCubin = dlsym(handle, 'nvFatbinAddCubin') + + global __nvFatbinAddLTOIR + __nvFatbinAddLTOIR = dlsym(RTLD_DEFAULT, 'nvFatbinAddLTOIR') + if __nvFatbinAddLTOIR == NULL: + if handle == NULL: + handle = load_library() + __nvFatbinAddLTOIR = dlsym(handle, 'nvFatbinAddLTOIR') + + global __nvFatbinSize + __nvFatbinSize = dlsym(RTLD_DEFAULT, 'nvFatbinSize') + if __nvFatbinSize == NULL: + if handle == NULL: + handle = load_library() + __nvFatbinSize = dlsym(handle, 'nvFatbinSize') + + global __nvFatbinGet + __nvFatbinGet = dlsym(RTLD_DEFAULT, 'nvFatbinGet') + if __nvFatbinGet == NULL: + if handle == NULL: + handle = load_library() + __nvFatbinGet = dlsym(handle, 'nvFatbinGet') + + global __nvFatbinVersion + __nvFatbinVersion = dlsym(RTLD_DEFAULT, 'nvFatbinVersion') + if __nvFatbinVersion == NULL: + if handle == NULL: + handle = load_library() + __nvFatbinVersion = dlsym(handle, 'nvFatbinVersion') + + global __nvFatbinAddReloc + __nvFatbinAddReloc = dlsym(RTLD_DEFAULT, 'nvFatbinAddReloc') + if __nvFatbinAddReloc == NULL: + if handle == NULL: + handle = load_library() + __nvFatbinAddReloc = dlsym(handle, 'nvFatbinAddReloc') + + global __nvFatbinAddTileIR + __nvFatbinAddTileIR = dlsym(RTLD_DEFAULT, 'nvFatbinAddTileIR') + if __nvFatbinAddTileIR == NULL: + if handle == NULL: + handle = load_library() + __nvFatbinAddTileIR = dlsym(handle, 'nvFatbinAddTileIR') + + __py_nvfatbin_init = True + return 0 + + +cdef inline int _check_or_init_nvfatbin() except -1 nogil: + if __py_nvfatbin_init: + return 0 + + return _init_nvfatbin() + +cdef dict func_ptrs = None + + +cpdef dict _inspect_function_pointers(): + global func_ptrs + if func_ptrs is not None: + return func_ptrs + + _check_or_init_nvfatbin() + cdef dict data = {} + + global __nvFatbinGetErrorString + data["__nvFatbinGetErrorString"] = __nvFatbinGetErrorString + + global __nvFatbinCreate + data["__nvFatbinCreate"] = __nvFatbinCreate + + global __nvFatbinDestroy + data["__nvFatbinDestroy"] = __nvFatbinDestroy + + global __nvFatbinAddPTX + data["__nvFatbinAddPTX"] = __nvFatbinAddPTX + + global __nvFatbinAddCubin + data["__nvFatbinAddCubin"] = __nvFatbinAddCubin + + global __nvFatbinAddLTOIR + data["__nvFatbinAddLTOIR"] = __nvFatbinAddLTOIR + + global __nvFatbinSize + data["__nvFatbinSize"] = __nvFatbinSize + + global __nvFatbinGet + data["__nvFatbinGet"] = __nvFatbinGet + + global __nvFatbinVersion + data["__nvFatbinVersion"] = __nvFatbinVersion + + global __nvFatbinAddReloc + data["__nvFatbinAddReloc"] = __nvFatbinAddReloc + + global __nvFatbinAddTileIR + data["__nvFatbinAddTileIR"] = __nvFatbinAddTileIR + + func_ptrs = data + return data + + +cpdef _inspect_function_pointer(str name): + global func_ptrs + if func_ptrs is None: + func_ptrs = _inspect_function_pointers() + return func_ptrs[name] + + +############################################################################### +# Wrapper functions +############################################################################### + +cdef const char* _nvFatbinGetErrorString(nvFatbinResult result) except?NULL nogil: + global __nvFatbinGetErrorString + _check_or_init_nvfatbin() + if __nvFatbinGetErrorString == NULL: + with gil: + raise FunctionNotFoundError("function nvFatbinGetErrorString is not found") + return (__nvFatbinGetErrorString)( + result) + + +cdef nvFatbinResult _nvFatbinCreate(nvFatbinHandle* handle_indirect, const char** options, size_t optionsCount) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + global __nvFatbinCreate + _check_or_init_nvfatbin() + if __nvFatbinCreate == NULL: + with gil: + raise FunctionNotFoundError("function nvFatbinCreate is not found") + return (__nvFatbinCreate)( + handle_indirect, options, optionsCount) + + +cdef nvFatbinResult _nvFatbinDestroy(nvFatbinHandle* handle_indirect) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + global __nvFatbinDestroy + _check_or_init_nvfatbin() + if __nvFatbinDestroy == NULL: + with gil: + raise FunctionNotFoundError("function nvFatbinDestroy is not found") + return (__nvFatbinDestroy)( + handle_indirect) + + +cdef nvFatbinResult _nvFatbinAddPTX(nvFatbinHandle handle, const char* code, size_t size, const char* arch, const char* identifier, const char* optionsCmdLine) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + global __nvFatbinAddPTX + _check_or_init_nvfatbin() + if __nvFatbinAddPTX == NULL: + with gil: + raise FunctionNotFoundError("function nvFatbinAddPTX is not found") + return (__nvFatbinAddPTX)( + handle, code, size, arch, identifier, optionsCmdLine) + + +cdef nvFatbinResult _nvFatbinAddCubin(nvFatbinHandle handle, const void* code, size_t size, const char* arch, const char* identifier) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + global __nvFatbinAddCubin + _check_or_init_nvfatbin() + if __nvFatbinAddCubin == NULL: + with gil: + raise FunctionNotFoundError("function nvFatbinAddCubin is not found") + return (__nvFatbinAddCubin)( + handle, code, size, arch, identifier) + + +cdef nvFatbinResult _nvFatbinAddLTOIR(nvFatbinHandle handle, const void* code, size_t size, const char* arch, const char* identifier, const char* optionsCmdLine) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + global __nvFatbinAddLTOIR + _check_or_init_nvfatbin() + if __nvFatbinAddLTOIR == NULL: + with gil: + raise FunctionNotFoundError("function nvFatbinAddLTOIR is not found") + return (__nvFatbinAddLTOIR)( + handle, code, size, arch, identifier, optionsCmdLine) + + +cdef nvFatbinResult _nvFatbinSize(nvFatbinHandle handle, size_t* size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + global __nvFatbinSize + _check_or_init_nvfatbin() + if __nvFatbinSize == NULL: + with gil: + raise FunctionNotFoundError("function nvFatbinSize is not found") + return (__nvFatbinSize)( + handle, size) + + +cdef nvFatbinResult _nvFatbinGet(nvFatbinHandle handle, void* buffer) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + global __nvFatbinGet + _check_or_init_nvfatbin() + if __nvFatbinGet == NULL: + with gil: + raise FunctionNotFoundError("function nvFatbinGet is not found") + return (__nvFatbinGet)( + handle, buffer) + + +cdef nvFatbinResult _nvFatbinVersion(unsigned int* major, unsigned int* minor) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + global __nvFatbinVersion + _check_or_init_nvfatbin() + if __nvFatbinVersion == NULL: + with gil: + raise FunctionNotFoundError("function nvFatbinVersion is not found") + return (__nvFatbinVersion)( + major, minor) + + +cdef nvFatbinResult _nvFatbinAddReloc(nvFatbinHandle handle, const void* code, size_t size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + global __nvFatbinAddReloc + _check_or_init_nvfatbin() + if __nvFatbinAddReloc == NULL: + with gil: + raise FunctionNotFoundError("function nvFatbinAddReloc is not found") + return (__nvFatbinAddReloc)( + handle, code, size) + + +cdef nvFatbinResult _nvFatbinAddTileIR(nvFatbinHandle handle, const void* code, size_t size, const char* identifier, const char* optionsCmdLine) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + global __nvFatbinAddTileIR + _check_or_init_nvfatbin() + if __nvFatbinAddTileIR == NULL: + with gil: + raise FunctionNotFoundError("function nvFatbinAddTileIR is not found") + return (__nvFatbinAddTileIR)( + handle, code, size, identifier, optionsCmdLine) diff --git a/cuda_bindings/cuda/bindings/_internal/nvfatbin_windows.pyx b/cuda_bindings/cuda/bindings/_internal/nvfatbin_windows.pyx new file mode 100644 index 0000000000..b0b3f94e5a --- /dev/null +++ b/cuda_bindings/cuda/bindings/_internal/nvfatbin_windows.pyx @@ -0,0 +1,315 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE +# +# This code was automatically generated across versions from 12.4.1 to 13.1.0. Do not modify it directly. + +from libc.stdint cimport intptr_t + +import threading +from .utils import FunctionNotFoundError, NotSupportedError + +from cuda.pathfinder import load_nvidia_dynamic_lib + +from libc.stddef cimport wchar_t +from libc.stdint cimport uintptr_t +from cpython cimport PyUnicode_AsWideCharString, PyMem_Free + +# You must 'from .utils import NotSupportedError' before using this template + +cdef extern from "windows.h" nogil: + ctypedef void* HMODULE + ctypedef void* HANDLE + ctypedef void* FARPROC + ctypedef unsigned long DWORD + ctypedef const wchar_t *LPCWSTR + ctypedef const char *LPCSTR + + cdef DWORD LOAD_LIBRARY_SEARCH_SYSTEM32 = 0x00000800 + cdef DWORD LOAD_LIBRARY_SEARCH_DEFAULT_DIRS = 0x00001000 + cdef DWORD LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR = 0x00000100 + + HMODULE _LoadLibraryExW "LoadLibraryExW"( + LPCWSTR lpLibFileName, + HANDLE hFile, + DWORD dwFlags + ) + + FARPROC _GetProcAddress "GetProcAddress"(HMODULE hModule, LPCSTR lpProcName) + +cdef inline uintptr_t LoadLibraryExW(str path, HANDLE hFile, DWORD dwFlags): + cdef uintptr_t result + cdef wchar_t* wpath = PyUnicode_AsWideCharString(path, NULL) + with nogil: + result = _LoadLibraryExW( + wpath, + hFile, + dwFlags + ) + PyMem_Free(wpath) + return result + +cdef inline void *GetProcAddress(uintptr_t hModule, const char* lpProcName) nogil: + return _GetProcAddress(hModule, lpProcName) + +cdef int get_cuda_version(): + cdef int err, driver_ver = 0 + + # Load driver to check version + handle = LoadLibraryExW("nvcuda.dll", NULL, LOAD_LIBRARY_SEARCH_SYSTEM32) + if handle == 0: + raise NotSupportedError('CUDA driver is not found') + cuDriverGetVersion = GetProcAddress(handle, 'cuDriverGetVersion') + if cuDriverGetVersion == NULL: + raise RuntimeError('Did not find cuDriverGetVersion symbol in nvcuda.dll') + err = (cuDriverGetVersion)(&driver_ver) + if err != 0: + raise RuntimeError(f'cuDriverGetVersion returned error code {err}') + + return driver_ver + + + +############################################################################### +# Wrapper init +############################################################################### + +cdef object __symbol_lock = threading.Lock() +cdef bint __py_nvfatbin_init = False + +cdef void* __nvFatbinGetErrorString = NULL +cdef void* __nvFatbinCreate = NULL +cdef void* __nvFatbinDestroy = NULL +cdef void* __nvFatbinAddPTX = NULL +cdef void* __nvFatbinAddCubin = NULL +cdef void* __nvFatbinAddLTOIR = NULL +cdef void* __nvFatbinSize = NULL +cdef void* __nvFatbinGet = NULL +cdef void* __nvFatbinVersion = NULL +cdef void* __nvFatbinAddReloc = NULL +cdef void* __nvFatbinAddTileIR = NULL + + +cdef int _init_nvfatbin() except -1 nogil: + global __py_nvfatbin_init + + with gil, __symbol_lock: + # Recheck the flag after obtaining the locks + if __py_nvfatbin_init: + return 0 + + # Load library + handle = load_nvidia_dynamic_lib("nvfatbin")._handle_uint + + # Load function + global __nvFatbinGetErrorString + __nvFatbinGetErrorString = GetProcAddress(handle, 'nvFatbinGetErrorString') + + global __nvFatbinCreate + __nvFatbinCreate = GetProcAddress(handle, 'nvFatbinCreate') + + global __nvFatbinDestroy + __nvFatbinDestroy = GetProcAddress(handle, 'nvFatbinDestroy') + + global __nvFatbinAddPTX + __nvFatbinAddPTX = GetProcAddress(handle, 'nvFatbinAddPTX') + + global __nvFatbinAddCubin + __nvFatbinAddCubin = GetProcAddress(handle, 'nvFatbinAddCubin') + + global __nvFatbinAddLTOIR + __nvFatbinAddLTOIR = GetProcAddress(handle, 'nvFatbinAddLTOIR') + + global __nvFatbinSize + __nvFatbinSize = GetProcAddress(handle, 'nvFatbinSize') + + global __nvFatbinGet + __nvFatbinGet = GetProcAddress(handle, 'nvFatbinGet') + + global __nvFatbinVersion + __nvFatbinVersion = GetProcAddress(handle, 'nvFatbinVersion') + + global __nvFatbinAddReloc + __nvFatbinAddReloc = GetProcAddress(handle, 'nvFatbinAddReloc') + + global __nvFatbinAddTileIR + __nvFatbinAddTileIR = GetProcAddress(handle, 'nvFatbinAddTileIR') + + __py_nvfatbin_init = True + return 0 + + +cdef inline int _check_or_init_nvfatbin() except -1 nogil: + if __py_nvfatbin_init: + return 0 + + return _init_nvfatbin() + + +cdef dict func_ptrs = None + + +cpdef dict _inspect_function_pointers(): + global func_ptrs + if func_ptrs is not None: + return func_ptrs + + _check_or_init_nvfatbin() + cdef dict data = {} + + global __nvFatbinGetErrorString + data["__nvFatbinGetErrorString"] = __nvFatbinGetErrorString + + global __nvFatbinCreate + data["__nvFatbinCreate"] = __nvFatbinCreate + + global __nvFatbinDestroy + data["__nvFatbinDestroy"] = __nvFatbinDestroy + + global __nvFatbinAddPTX + data["__nvFatbinAddPTX"] = __nvFatbinAddPTX + + global __nvFatbinAddCubin + data["__nvFatbinAddCubin"] = __nvFatbinAddCubin + + global __nvFatbinAddLTOIR + data["__nvFatbinAddLTOIR"] = __nvFatbinAddLTOIR + + global __nvFatbinSize + data["__nvFatbinSize"] = __nvFatbinSize + + global __nvFatbinGet + data["__nvFatbinGet"] = __nvFatbinGet + + global __nvFatbinVersion + data["__nvFatbinVersion"] = __nvFatbinVersion + + global __nvFatbinAddReloc + data["__nvFatbinAddReloc"] = __nvFatbinAddReloc + + global __nvFatbinAddTileIR + data["__nvFatbinAddTileIR"] = __nvFatbinAddTileIR + + func_ptrs = data + return data + + +cpdef _inspect_function_pointer(str name): + global func_ptrs + if func_ptrs is None: + func_ptrs = _inspect_function_pointers() + return func_ptrs[name] + + +############################################################################### +# Wrapper functions +############################################################################### + +cdef const char* _nvFatbinGetErrorString(nvFatbinResult result) except?NULL nogil: + global __nvFatbinGetErrorString + _check_or_init_nvfatbin() + if __nvFatbinGetErrorString == NULL: + with gil: + raise FunctionNotFoundError("function nvFatbinGetErrorString is not found") + return (__nvFatbinGetErrorString)( + result) + + +cdef nvFatbinResult _nvFatbinCreate(nvFatbinHandle* handle_indirect, const char** options, size_t optionsCount) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + global __nvFatbinCreate + _check_or_init_nvfatbin() + if __nvFatbinCreate == NULL: + with gil: + raise FunctionNotFoundError("function nvFatbinCreate is not found") + return (__nvFatbinCreate)( + handle_indirect, options, optionsCount) + + +cdef nvFatbinResult _nvFatbinDestroy(nvFatbinHandle* handle_indirect) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + global __nvFatbinDestroy + _check_or_init_nvfatbin() + if __nvFatbinDestroy == NULL: + with gil: + raise FunctionNotFoundError("function nvFatbinDestroy is not found") + return (__nvFatbinDestroy)( + handle_indirect) + + +cdef nvFatbinResult _nvFatbinAddPTX(nvFatbinHandle handle, const char* code, size_t size, const char* arch, const char* identifier, const char* optionsCmdLine) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + global __nvFatbinAddPTX + _check_or_init_nvfatbin() + if __nvFatbinAddPTX == NULL: + with gil: + raise FunctionNotFoundError("function nvFatbinAddPTX is not found") + return (__nvFatbinAddPTX)( + handle, code, size, arch, identifier, optionsCmdLine) + + +cdef nvFatbinResult _nvFatbinAddCubin(nvFatbinHandle handle, const void* code, size_t size, const char* arch, const char* identifier) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + global __nvFatbinAddCubin + _check_or_init_nvfatbin() + if __nvFatbinAddCubin == NULL: + with gil: + raise FunctionNotFoundError("function nvFatbinAddCubin is not found") + return (__nvFatbinAddCubin)( + handle, code, size, arch, identifier) + + +cdef nvFatbinResult _nvFatbinAddLTOIR(nvFatbinHandle handle, const void* code, size_t size, const char* arch, const char* identifier, const char* optionsCmdLine) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + global __nvFatbinAddLTOIR + _check_or_init_nvfatbin() + if __nvFatbinAddLTOIR == NULL: + with gil: + raise FunctionNotFoundError("function nvFatbinAddLTOIR is not found") + return (__nvFatbinAddLTOIR)( + handle, code, size, arch, identifier, optionsCmdLine) + + +cdef nvFatbinResult _nvFatbinSize(nvFatbinHandle handle, size_t* size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + global __nvFatbinSize + _check_or_init_nvfatbin() + if __nvFatbinSize == NULL: + with gil: + raise FunctionNotFoundError("function nvFatbinSize is not found") + return (__nvFatbinSize)( + handle, size) + + +cdef nvFatbinResult _nvFatbinGet(nvFatbinHandle handle, void* buffer) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + global __nvFatbinGet + _check_or_init_nvfatbin() + if __nvFatbinGet == NULL: + with gil: + raise FunctionNotFoundError("function nvFatbinGet is not found") + return (__nvFatbinGet)( + handle, buffer) + + +cdef nvFatbinResult _nvFatbinVersion(unsigned int* major, unsigned int* minor) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + global __nvFatbinVersion + _check_or_init_nvfatbin() + if __nvFatbinVersion == NULL: + with gil: + raise FunctionNotFoundError("function nvFatbinVersion is not found") + return (__nvFatbinVersion)( + major, minor) + + +cdef nvFatbinResult _nvFatbinAddReloc(nvFatbinHandle handle, const void* code, size_t size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + global __nvFatbinAddReloc + _check_or_init_nvfatbin() + if __nvFatbinAddReloc == NULL: + with gil: + raise FunctionNotFoundError("function nvFatbinAddReloc is not found") + return (__nvFatbinAddReloc)( + handle, code, size) + + +cdef nvFatbinResult _nvFatbinAddTileIR(nvFatbinHandle handle, const void* code, size_t size, const char* identifier, const char* optionsCmdLine) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + global __nvFatbinAddTileIR + _check_or_init_nvfatbin() + if __nvFatbinAddTileIR == NULL: + with gil: + raise FunctionNotFoundError("function nvFatbinAddTileIR is not found") + return (__nvFatbinAddTileIR)( + handle, code, size, identifier, optionsCmdLine) diff --git a/cuda_bindings/cuda/bindings/cynvfatbin.pxd b/cuda_bindings/cuda/bindings/cynvfatbin.pxd new file mode 100644 index 0000000000..5969fafee1 --- /dev/null +++ b/cuda_bindings/cuda/bindings/cynvfatbin.pxd @@ -0,0 +1,55 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE +# +# This code was automatically generated across versions from 12.4.1 to 13.1.0. Do not modify it directly. + +from libc.stdint cimport intptr_t, uint32_t + + +############################################################################### +# Types (structs, enums, ...) +############################################################################### + +# enums +ctypedef enum nvFatbinResult "nvFatbinResult": + NVFATBIN_SUCCESS "NVFATBIN_SUCCESS" = 0 + NVFATBIN_ERROR_INTERNAL "NVFATBIN_ERROR_INTERNAL" + NVFATBIN_ERROR_ELF_ARCH_MISMATCH "NVFATBIN_ERROR_ELF_ARCH_MISMATCH" + NVFATBIN_ERROR_ELF_SIZE_MISMATCH "NVFATBIN_ERROR_ELF_SIZE_MISMATCH" + NVFATBIN_ERROR_MISSING_PTX_VERSION "NVFATBIN_ERROR_MISSING_PTX_VERSION" + NVFATBIN_ERROR_NULL_POINTER "NVFATBIN_ERROR_NULL_POINTER" + NVFATBIN_ERROR_COMPRESSION_FAILED "NVFATBIN_ERROR_COMPRESSION_FAILED" + NVFATBIN_ERROR_COMPRESSED_SIZE_EXCEEDED "NVFATBIN_ERROR_COMPRESSED_SIZE_EXCEEDED" + NVFATBIN_ERROR_UNRECOGNIZED_OPTION "NVFATBIN_ERROR_UNRECOGNIZED_OPTION" + NVFATBIN_ERROR_INVALID_ARCH "NVFATBIN_ERROR_INVALID_ARCH" + NVFATBIN_ERROR_INVALID_NVVM "NVFATBIN_ERROR_INVALID_NVVM" + NVFATBIN_ERROR_EMPTY_INPUT "NVFATBIN_ERROR_EMPTY_INPUT" + NVFATBIN_ERROR_MISSING_PTX_ARCH "NVFATBIN_ERROR_MISSING_PTX_ARCH" + NVFATBIN_ERROR_PTX_ARCH_MISMATCH "NVFATBIN_ERROR_PTX_ARCH_MISMATCH" + NVFATBIN_ERROR_MISSING_FATBIN "NVFATBIN_ERROR_MISSING_FATBIN" + NVFATBIN_ERROR_INVALID_INDEX "NVFATBIN_ERROR_INVALID_INDEX" + NVFATBIN_ERROR_IDENTIFIER_REUSE "NVFATBIN_ERROR_IDENTIFIER_REUSE" + NVFATBIN_ERROR_INTERNAL_PTX_OPTION "NVFATBIN_ERROR_INTERNAL_PTX_OPTION" + _NVFATBINRESULT_INTERNAL_LOADING_ERROR "_NVFATBINRESULT_INTERNAL_LOADING_ERROR" = -42 + + +# types +ctypedef void* nvFatbinHandle 'nvFatbinHandle' + + +############################################################################### +# Functions +############################################################################### + +cdef const char* nvFatbinGetErrorString(nvFatbinResult result) except?NULL nogil +cdef nvFatbinResult nvFatbinCreate(nvFatbinHandle* handle_indirect, const char** options, size_t optionsCount) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil +cdef nvFatbinResult nvFatbinDestroy(nvFatbinHandle* handle_indirect) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil +cdef nvFatbinResult nvFatbinAddPTX(nvFatbinHandle handle, const char* code, size_t size, const char* arch, const char* identifier, const char* optionsCmdLine) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil +cdef nvFatbinResult nvFatbinAddCubin(nvFatbinHandle handle, const void* code, size_t size, const char* arch, const char* identifier) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil +cdef nvFatbinResult nvFatbinAddLTOIR(nvFatbinHandle handle, const void* code, size_t size, const char* arch, const char* identifier, const char* optionsCmdLine) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil +cdef nvFatbinResult nvFatbinSize(nvFatbinHandle handle, size_t* size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil +cdef nvFatbinResult nvFatbinGet(nvFatbinHandle handle, void* buffer) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil +cdef nvFatbinResult nvFatbinVersion(unsigned int* major, unsigned int* minor) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil +cdef nvFatbinResult nvFatbinAddReloc(nvFatbinHandle handle, const void* code, size_t size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil +cdef nvFatbinResult nvFatbinAddTileIR(nvFatbinHandle handle, const void* code, size_t size, const char* identifier, const char* optionsCmdLine) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil diff --git a/cuda_bindings/cuda/bindings/cynvfatbin.pyx b/cuda_bindings/cuda/bindings/cynvfatbin.pyx new file mode 100644 index 0000000000..f0f8300cb0 --- /dev/null +++ b/cuda_bindings/cuda/bindings/cynvfatbin.pyx @@ -0,0 +1,55 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE +# +# This code was automatically generated across versions from 12.4.1 to 13.1.0. Do not modify it directly. + +from ._internal cimport nvfatbin as _nvfatbin + + +############################################################################### +# Wrapper functions +############################################################################### + +cdef const char* nvFatbinGetErrorString(nvFatbinResult result) except?NULL nogil: + return _nvfatbin._nvFatbinGetErrorString(result) + + +cdef nvFatbinResult nvFatbinCreate(nvFatbinHandle* handle_indirect, const char** options, size_t optionsCount) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + return _nvfatbin._nvFatbinCreate(handle_indirect, options, optionsCount) + + +cdef nvFatbinResult nvFatbinDestroy(nvFatbinHandle* handle_indirect) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + return _nvfatbin._nvFatbinDestroy(handle_indirect) + + +cdef nvFatbinResult nvFatbinAddPTX(nvFatbinHandle handle, const char* code, size_t size, const char* arch, const char* identifier, const char* optionsCmdLine) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + return _nvfatbin._nvFatbinAddPTX(handle, code, size, arch, identifier, optionsCmdLine) + + +cdef nvFatbinResult nvFatbinAddCubin(nvFatbinHandle handle, const void* code, size_t size, const char* arch, const char* identifier) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + return _nvfatbin._nvFatbinAddCubin(handle, code, size, arch, identifier) + + +cdef nvFatbinResult nvFatbinAddLTOIR(nvFatbinHandle handle, const void* code, size_t size, const char* arch, const char* identifier, const char* optionsCmdLine) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + return _nvfatbin._nvFatbinAddLTOIR(handle, code, size, arch, identifier, optionsCmdLine) + + +cdef nvFatbinResult nvFatbinSize(nvFatbinHandle handle, size_t* size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + return _nvfatbin._nvFatbinSize(handle, size) + + +cdef nvFatbinResult nvFatbinGet(nvFatbinHandle handle, void* buffer) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + return _nvfatbin._nvFatbinGet(handle, buffer) + + +cdef nvFatbinResult nvFatbinVersion(unsigned int* major, unsigned int* minor) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + return _nvfatbin._nvFatbinVersion(major, minor) + + +cdef nvFatbinResult nvFatbinAddReloc(nvFatbinHandle handle, const void* code, size_t size) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + return _nvfatbin._nvFatbinAddReloc(handle, code, size) + + +cdef nvFatbinResult nvFatbinAddTileIR(nvFatbinHandle handle, const void* code, size_t size, const char* identifier, const char* optionsCmdLine) except?_NVFATBINRESULT_INTERNAL_LOADING_ERROR nogil: + return _nvfatbin._nvFatbinAddTileIR(handle, code, size, identifier, optionsCmdLine) diff --git a/cuda_bindings/cuda/bindings/nvfatbin.pxd b/cuda_bindings/cuda/bindings/nvfatbin.pxd new file mode 100644 index 0000000000..e0744efbd7 --- /dev/null +++ b/cuda_bindings/cuda/bindings/nvfatbin.pxd @@ -0,0 +1,38 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE +# +# This code was automatically generated across versions from 12.4.1 to 13.1.0. Do not modify it directly. + +from libc.stdint cimport intptr_t, uint32_t + +from .cynvfatbin cimport * + + +############################################################################### +# Types +############################################################################### + +ctypedef nvFatbinHandle Handle + + +############################################################################### +# Enum +############################################################################### + +ctypedef nvFatbinResult _Result + + +############################################################################### +# Functions +############################################################################### + +cpdef intptr_t create(options, size_t options_count) except -1 +cpdef add_ptx(intptr_t handle, code, size_t size, arch, identifier, options_cmd_line) +cpdef add_cubin(intptr_t handle, code, size_t size, arch, identifier) +cpdef add_ltoir(intptr_t handle, code, size_t size, arch, identifier, options_cmd_line) +cpdef size_t size(intptr_t handle) except? 0 +cpdef get(intptr_t handle, buffer) +cpdef tuple version() +cpdef add_reloc(intptr_t handle, code, size_t size) +cpdef add_tile_ir(intptr_t handle, code, size_t size, identifier, options_cmd_line) diff --git a/cuda_bindings/cuda/bindings/nvfatbin.pyx b/cuda_bindings/cuda/bindings/nvfatbin.pyx new file mode 100644 index 0000000000..32cdeb300b --- /dev/null +++ b/cuda_bindings/cuda/bindings/nvfatbin.pyx @@ -0,0 +1,309 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE +# +# This code was automatically generated across versions from 12.4.1 to 13.1.0. Do not modify it directly. + +cimport cython # NOQA + +from ._internal.utils cimport (get_resource_ptr, get_nested_resource_ptr, nested_resource, nullable_unique_ptr, + get_buffer_pointer, get_resource_ptrs) + +from enum import IntEnum as _IntEnum +from libcpp.vector cimport vector + + +############################################################################### +# Enum +############################################################################### + +class Result(_IntEnum): + """See `nvFatbinResult`.""" + SUCCESS = NVFATBIN_SUCCESS + ERROR_INTERNAL = NVFATBIN_ERROR_INTERNAL + ERROR_ELF_ARCH_MISMATCH = NVFATBIN_ERROR_ELF_ARCH_MISMATCH + ERROR_ELF_SIZE_MISMATCH = NVFATBIN_ERROR_ELF_SIZE_MISMATCH + ERROR_MISSING_PTX_VERSION = NVFATBIN_ERROR_MISSING_PTX_VERSION + ERROR_NULL_POINTER = NVFATBIN_ERROR_NULL_POINTER + ERROR_COMPRESSION_FAILED = NVFATBIN_ERROR_COMPRESSION_FAILED + ERROR_COMPRESSED_SIZE_EXCEEDED = NVFATBIN_ERROR_COMPRESSED_SIZE_EXCEEDED + ERROR_UNRECOGNIZED_OPTION = NVFATBIN_ERROR_UNRECOGNIZED_OPTION + ERROR_INVALID_ARCH = NVFATBIN_ERROR_INVALID_ARCH + ERROR_INVALID_NVVM = NVFATBIN_ERROR_INVALID_NVVM + ERROR_EMPTY_INPUT = NVFATBIN_ERROR_EMPTY_INPUT + ERROR_MISSING_PTX_ARCH = NVFATBIN_ERROR_MISSING_PTX_ARCH + ERROR_PTX_ARCH_MISMATCH = NVFATBIN_ERROR_PTX_ARCH_MISMATCH + ERROR_MISSING_FATBIN = NVFATBIN_ERROR_MISSING_FATBIN + ERROR_INVALID_INDEX = NVFATBIN_ERROR_INVALID_INDEX + ERROR_IDENTIFIER_REUSE = NVFATBIN_ERROR_IDENTIFIER_REUSE + ERROR_INTERNAL_PTX_OPTION = NVFATBIN_ERROR_INTERNAL_PTX_OPTION + + +############################################################################### +# Error handling +############################################################################### + +class nvFatbinError(Exception): + + def __init__(self, status): + self.status = status + s = Result(status) + cdef str err = f"{s.name} ({s.value})" + super(nvFatbinError, self).__init__(err) + + def __reduce__(self): + return (type(self), (self.status,)) + + +@cython.profile(False) +cdef int check_status(int status) except 1 nogil: + if status != 0: + with gil: + raise nvFatbinError(status) + return status + + +############################################################################### +# Wrapper functions +############################################################################### + +cpdef destroy(intptr_t handle): + """nvFatbinDestroy frees the memory associated with the given handle. + + Args: + handle (intptr_t): nvFatbin handle. + + .. seealso:: `nvFatbinDestroy` + """ + cdef Handle h = handle + with nogil: + status = nvFatbinDestroy(&h) + check_status(status) + + +cpdef str get_error_string(int result): + """nvFatbinGetErrorString returns an error description string for each error code. + + Args: + result (Result): error code. + + .. seealso:: `nvFatbinGetErrorString` + """ + cdef const char* _output_ + cdef bytes _output_bytes_ + _output_ = nvFatbinGetErrorString(<_Result>result) + + if _output_ == NULL: + return "" + + _output_bytes_ = _output_ + return _output_bytes_.decode() + + +cpdef intptr_t create(options, size_t options_count) except -1: + """nvFatbinCreate creates a new handle. + + Args: + options (object): An array of strings, each containing a single option. It can be: + + - an :class:`int` as the pointer address to the nested sequence, or + - a Python sequence of :class:`int`\s, each of which is a pointer address + to a valid sequence of 'char', or + - a nested Python sequence of ``str``. + + options_count (size_t): Number of options. + + Returns: + intptr_t: Address of nvFatbin handle. + + .. seealso:: `nvFatbinCreate` + """ + cdef nested_resource[ char ] _options_ + get_nested_resource_ptr[char](_options_, options, NULL) + cdef Handle handle_indirect + with nogil: + __status__ = nvFatbinCreate(&handle_indirect, (_options_.ptrs.data()), options_count) + check_status(__status__) + return handle_indirect + + +cpdef add_ptx(intptr_t handle, code, size_t size, arch, identifier, options_cmd_line): + """nvFatbinAddPTX adds PTX to the fatbinary. + + Args: + handle (intptr_t): nvFatbin handle. + code (bytes): The PTX code. + size (size_t): The size of the PTX code. + arch (str): The numerical architecture that this PTX is for (the XX of any sm_XX, lto_XX, or compute_XX). + identifier (str): Name of the PTX, useful when extracting the fatbin with tools like cuobjdump. + options_cmd_line (str): Options used during JIT compilation. + + .. seealso:: `nvFatbinAddPTX` + """ + cdef void* _code_ = get_buffer_pointer(code, size, readonly=True) + if not isinstance(arch, str): + raise TypeError("arch must be a Python str") + cdef bytes _temp_arch_ = (arch).encode() + cdef char* _arch_ = _temp_arch_ + if not isinstance(identifier, str): + raise TypeError("identifier must be a Python str") + cdef bytes _temp_identifier_ = (identifier).encode() + cdef char* _identifier_ = _temp_identifier_ + if not isinstance(options_cmd_line, str): + raise TypeError("options_cmd_line must be a Python str") + cdef bytes _temp_options_cmd_line_ = (options_cmd_line).encode() + cdef char* _options_cmd_line_ = _temp_options_cmd_line_ + with nogil: + __status__ = nvFatbinAddPTX(handle, _code_, size, _arch_, _identifier_, _options_cmd_line_) + check_status(__status__) + + +cpdef add_cubin(intptr_t handle, code, size_t size, arch, identifier): + """nvFatbinAddCubin adds a CUDA binary to the fatbinary. + + Args: + handle (intptr_t): nvFatbin handle. + code (bytes): The cubin. + size (size_t): The size of the cubin. + arch (str): The numerical architecture that this cubin is for (the XX of any sm_XX, lto_XX, or compute_XX). + identifier (str): Name of the cubin, useful when extracting the fatbin with tools like cuobjdump. + + .. seealso:: `nvFatbinAddCubin` + """ + cdef void* _code_ = get_buffer_pointer(code, size, readonly=True) + if not isinstance(arch, str): + raise TypeError("arch must be a Python str") + cdef bytes _temp_arch_ = (arch).encode() + cdef char* _arch_ = _temp_arch_ + if not isinstance(identifier, str): + raise TypeError("identifier must be a Python str") + cdef bytes _temp_identifier_ = (identifier).encode() + cdef char* _identifier_ = _temp_identifier_ + with nogil: + __status__ = nvFatbinAddCubin(handle, _code_, size, _arch_, _identifier_) + check_status(__status__) + + +cpdef add_ltoir(intptr_t handle, code, size_t size, arch, identifier, options_cmd_line): + """nvFatbinAddLTOIR adds LTOIR to the fatbinary. + + Args: + handle (intptr_t): nvFatbin handle. + code (bytes): The LTOIR code. + size (size_t): The size of the LTOIR code. + arch (str): The numerical architecture that this LTOIR is for (the XX of any sm_XX, lto_XX, or compute_XX). + identifier (str): Name of the LTOIR, useful when extracting the fatbin with tools like cuobjdump. + options_cmd_line (str): Options used during JIT compilation. + + .. seealso:: `nvFatbinAddLTOIR` + """ + cdef void* _code_ = get_buffer_pointer(code, size, readonly=True) + if not isinstance(arch, str): + raise TypeError("arch must be a Python str") + cdef bytes _temp_arch_ = (arch).encode() + cdef char* _arch_ = _temp_arch_ + if not isinstance(identifier, str): + raise TypeError("identifier must be a Python str") + cdef bytes _temp_identifier_ = (identifier).encode() + cdef char* _identifier_ = _temp_identifier_ + if not isinstance(options_cmd_line, str): + raise TypeError("options_cmd_line must be a Python str") + cdef bytes _temp_options_cmd_line_ = (options_cmd_line).encode() + cdef char* _options_cmd_line_ = _temp_options_cmd_line_ + with nogil: + __status__ = nvFatbinAddLTOIR(handle, _code_, size, _arch_, _identifier_, _options_cmd_line_) + check_status(__status__) + + +cpdef size_t size(intptr_t handle) except? 0: + """nvFatbinSize returns the fatbinary's size. + + Args: + handle (intptr_t): nvFatbin handle. + + Returns: + size_t: The fatbinary's size. + + .. seealso:: `nvFatbinSize` + """ + cdef size_t size + with nogil: + __status__ = nvFatbinSize(handle, &size) + check_status(__status__) + return size + + +cpdef get(intptr_t handle, buffer): + """nvFatbinGet returns the completed fatbinary. + + Args: + handle (intptr_t): nvFatbin handle. + buffer (bytes): memory to store fatbinary. + + .. seealso:: `nvFatbinGet` + """ + cdef void* _buffer_ = get_buffer_pointer(buffer, -1, readonly=False) + with nogil: + __status__ = nvFatbinGet(handle, _buffer_) + check_status(__status__) + + +cpdef tuple version(): + """nvFatbinVersion returns the current version of nvFatbin. + + Returns: + A 2-tuple containing: + + - unsigned int: The major version. + - unsigned int: The minor version. + + .. seealso:: `nvFatbinVersion` + """ + cdef unsigned int major + cdef unsigned int minor + with nogil: + __status__ = nvFatbinVersion(&major, &minor) + check_status(__status__) + return (major, minor) + + +cpdef add_reloc(intptr_t handle, code, size_t size): + """nvFatbinAddReloc adds relocatable PTX entries from a host object to the fatbinary. + + Args: + handle (intptr_t): nvFatbin handle. + code (bytes): The host object image. + size (size_t): The size of the host object image code. + + .. seealso:: `nvFatbinAddReloc` + """ + cdef void* _code_ = get_buffer_pointer(code, size, readonly=True) + with nogil: + __status__ = nvFatbinAddReloc(handle, _code_, size) + check_status(__status__) + + +cpdef add_tile_ir(intptr_t handle, code, size_t size, identifier, options_cmd_line): + """nvFatbinAddTileIR adds Tile IR to the fatbinary. + + Args: + handle (intptr_t): nvFatbin handle. + code (bytes): The Tile IR. + size (size_t): The size of the Tile IR. + identifier (str): Name of the Tile IR, useful when extracting the fatbin with tools like cuobjdump. + options_cmd_line (str): Options used during JIT compilation. + + .. seealso:: `nvFatbinAddTileIR` + """ + cdef void* _code_ = get_buffer_pointer(code, size, readonly=True) + if not isinstance(identifier, str): + raise TypeError("identifier must be a Python str") + cdef bytes _temp_identifier_ = (identifier).encode() + cdef char* _identifier_ = _temp_identifier_ + if not isinstance(options_cmd_line, str): + raise TypeError("options_cmd_line must be a Python str") + cdef bytes _temp_options_cmd_line_ = (options_cmd_line).encode() + cdef char* _options_cmd_line_ = _temp_options_cmd_line_ + with nogil: + __status__ = nvFatbinAddTileIR(handle, _code_, size, _identifier_, _options_cmd_line_) + check_status(__status__) diff --git a/cuda_bindings/docs/source/api.rst b/cuda_bindings/docs/source/api.rst index 4277bc745b..e6ee4b99dd 100644 --- a/cuda_bindings/docs/source/api.rst +++ b/cuda_bindings/docs/source/api.rst @@ -14,5 +14,6 @@ CUDA Python API Reference module/nvrtc module/nvjitlink module/nvvm + module/nvfatbin module/cufile module/utils diff --git a/cuda_bindings/docs/source/module/nvfatbin.rst b/cuda_bindings/docs/source/module/nvfatbin.rst new file mode 100644 index 0000000000..297d4baa85 --- /dev/null +++ b/cuda_bindings/docs/source/module/nvfatbin.rst @@ -0,0 +1,89 @@ +.. SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +.. SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE + +.. default-role:: cpp:any + +nvfatbin +======== + +Note +---- + +The nvfatbin bindings are not supported on nvFatbin installations <12.4. Ensure the installed CUDA toolkit's nvFatbin version is >=12.4. + +The Tile IR API (:func:`cuda.bindings.nvfatbin.add_tile_ir`) is only available in CUDA 13.1+. + +Functions +--------- + +NvFatbin defines the following functions for creating and populating fatbinaries. + +.. autofunction:: cuda.bindings.nvfatbin.create +.. autofunction:: cuda.bindings.nvfatbin.destroy +.. autofunction:: cuda.bindings.nvfatbin.add_ptx +.. autofunction:: cuda.bindings.nvfatbin.add_cubin +.. autofunction:: cuda.bindings.nvfatbin.add_ltoir +.. autofunction:: cuda.bindings.nvfatbin.add_reloc +.. autofunction:: cuda.bindings.nvfatbin.add_tile_ir +.. autofunction:: cuda.bindings.nvfatbin.size +.. autofunction:: cuda.bindings.nvfatbin.get +.. autofunction:: cuda.bindings.nvfatbin.get_error_string +.. autofunction:: cuda.bindings.nvfatbin.version + +Types +--------- +.. autoclass:: cuda.bindings.nvfatbin.Result + + .. autoattribute:: cuda.bindings.nvfatbin.Result.SUCCESS + + + .. autoattribute:: cuda.bindings.nvfatbin.Result.ERROR_INTERNAL + + + .. autoattribute:: cuda.bindings.nvfatbin.Result.ERROR_ELF_ARCH_MISMATCH + + + .. autoattribute:: cuda.bindings.nvfatbin.Result.ERROR_ELF_SIZE_MISMATCH + + + .. autoattribute:: cuda.bindings.nvfatbin.Result.ERROR_MISSING_PTX_VERSION + + + .. autoattribute:: cuda.bindings.nvfatbin.Result.ERROR_NULL_POINTER + + + .. autoattribute:: cuda.bindings.nvfatbin.Result.ERROR_COMPRESSION_FAILED + + + .. autoattribute:: cuda.bindings.nvfatbin.Result.ERROR_COMPRESSED_SIZE_EXCEEDED + + + .. autoattribute:: cuda.bindings.nvfatbin.Result.ERROR_UNRECOGNIZED_OPTION + + + .. autoattribute:: cuda.bindings.nvfatbin.Result.ERROR_INVALID_ARCH + + + .. autoattribute:: cuda.bindings.nvfatbin.Result.ERROR_INVALID_NVVM + + + .. autoattribute:: cuda.bindings.nvfatbin.Result.ERROR_EMPTY_INPUT + + + .. autoattribute:: cuda.bindings.nvfatbin.Result.ERROR_MISSING_PTX_ARCH + + + .. autoattribute:: cuda.bindings.nvfatbin.Result.ERROR_PTX_ARCH_MISMATCH + + + .. autoattribute:: cuda.bindings.nvfatbin.Result.ERROR_MISSING_FATBIN + + + .. autoattribute:: cuda.bindings.nvfatbin.Result.ERROR_INVALID_INDEX + + + .. autoattribute:: cuda.bindings.nvfatbin.Result.ERROR_IDENTIFIER_REUSE + + + .. autoattribute:: cuda.bindings.nvfatbin.Result.ERROR_INTERNAL_PTX_OPTION + diff --git a/cuda_bindings/docs/source/release/13.1.X-notes.rst b/cuda_bindings/docs/source/release/13.1.X-notes.rst index 21323682b1..92a3f8dbcb 100644 --- a/cuda_bindings/docs/source/release/13.1.X-notes.rst +++ b/cuda_bindings/docs/source/release/13.1.X-notes.rst @@ -9,6 +9,8 @@ Highlights ---------- +* Add ``nvFatbin`` bindings. (PR #1467 _) + Experimental ------------ diff --git a/cuda_bindings/pyproject.toml b/cuda_bindings/pyproject.toml index 7c4bddb434..614f7bb63a 100644 --- a/cuda_bindings/pyproject.toml +++ b/cuda_bindings/pyproject.toml @@ -34,7 +34,7 @@ dependencies = ["cuda-pathfinder ~=1.1"] [project.optional-dependencies] all = [ - "cuda-toolkit[nvrtc,nvjitlink,nvvm]==13.*", + "cuda-toolkit[nvrtc,nvjitlink,nvvm,nvfatbin]==13.*", "cuda-toolkit[cufile]==13.*; sys_platform == 'linux'", ] diff --git a/cuda_bindings/tests/test_nvfatbin.py b/cuda_bindings/tests/test_nvfatbin.py new file mode 100644 index 0000000000..c3b25db2f2 --- /dev/null +++ b/cuda_bindings/tests/test_nvfatbin.py @@ -0,0 +1,321 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE + + +import base64 +import binascii +import shutil +import subprocess + +import pytest +from cuda.bindings import nvfatbin, nvrtc + +ARCHITECTURES = ["sm_75", "sm_80", "sm_90", "sm_100"] +PTX_VERSIONS = ["6.4", "7.0", "8.5", "8.8"] + +PTX_TEMPLATE = """ +.version {PTX_VERSION} +.target {ARCH} +.address_size 64 + + // .globl _Z6kernelPi + +.visible .entry _Z6kernelPi( + .param .u64 _Z6kernelPi_param_0 +) +{{ + .reg .b32 %r<7>; + .reg .b64 %rd<5>; + + + ld.param.u64 %rd1, [_Z6kernelPi_param_0]; + cvta.to.global.u64 %rd2, %rd1; + mov.u32 %r1, %tid.x; + mov.u32 %r2, %ctaid.x; + mov.u32 %r3, %ntid.x; + mad.lo.s32 %r4, %r2, %r3, %r1; + mul.wide.s32 %rd3, %r4, 4; + add.s64 %rd4, %rd2, %rd3; + ld.global.u32 %r5, [%rd4]; + add.s32 %r6, %r5, 1; + st.global.u32 [%rd4], %r6; + ret; + +}} +""" + +CODE = """ +int __device__ inc(int x) { + return x + 1; +} +""" + +# Base64 encoded TileIR generated by the toolshed/dump_cutile_b64.py script. +TILEIR_b64 = ( + "f1RpbGVJUgANAQAAgo0BCAECBgYBCwEECgCBAUQHBgQIEAAABgUMAQABBgUIEAALQwEICgEMAAYE" + "CBAAAwYFDAEABAYFCBAAD0MBCA4BEAAGBAgQAAYGBQwBAAcGBQgQABNDAQgSARQAMAUFBUIJDT4C" + "CgcEABkBFglCCRE+AgoHBAAcARYJAgoAABodQgkVZgEHBAAfIAEWCVwAAIQICADLy8vLy8vLg5oC" + "CMvLy8sBy8vLAAAAABfLy8vLy8vLBAAAAAAAAAAEAAAAAAAAAAQAAAAAAAAABAAAAAAAAAAEAAAA" + "AAAAAAQAAAAAAAAABAAAAAAAAAAEAAAAAAAAAAQAAAAAAAAABAAAAAAAAAAEAAAAAAAAAAQAAAAA" + "AAAABAAAAAAAAAAEAAAAAAAAAAUAAAAAAAAABgAAAAAAAAAGAAAAAAAAAAcAAAAAAAAABwAAAAAA" + "AAAIAAAAAAAAAAkAAAAAAAAACQAAAAAAAAAEAAAAAAAAAAnLy8sAAAAAAwAAAAUAAAAMAAAAEQAA" + "ABYAAAAbAAAAIAAAACUAAAACAAEBAQUBFwICAhcEAwMYBAQDAxkTBAMDGhEEAwMbEQQDAx0WBAMD" + "HgiFdATLy8sLy8vLAAAAAAEAAAACAAAAAwAAAAUAAAAIAAAACwAAABcAAAAYAAAALAAAADkAAAAA" + "AwcMAg0DAA0BABAJBAUFBAUFBAUFABEOAgEAAAAAAAAAgAEBAAAAAAAAAA8BEAAAAAgBAAAAAAAN" + "AgEQAAAAAAAAAIGIAQQFy8vLAAAAABIAAAAsAAAAPQAAAGoAAABkdW1wX2N1dGlsZV9iNjQucHkv" + "bG9jYWxob21lL2xvY2FsLXdhbmdtL3RveXZlY3Rvcl9hZGRfa2VybmVsL2xvY2FsaG9tZS9sb2Nh" + "bC13YW5nbS90b3kvZHVtcF9jdXRpbGVfYjY0LnB5c21fMTIwAA==" +) + + +def get_version() -> tuple[int, int]: + return nvfatbin.version() + + +@pytest.fixture(params=ARCHITECTURES) +def arch(request): + return request.param + + +@pytest.fixture(params=PTX_VERSIONS) +def ptx_version(request): + return request.param + + +@pytest.fixture +def PTX(arch, ptx_version): + return PTX_TEMPLATE.format(PTX_VERSION=ptx_version, ARCH=arch) + + +@pytest.fixture +def nvcc_smoke(tmpdir) -> str: + # TODO: Use cuda-pathfinder to locate nvcc on system. + nvcc = shutil.which("nvcc") + if nvcc is None: + pytest.skip("nvcc not found on PATH") + + # Smoke test: make sure nvcc is actually usable (toolkit + host compiler are set up), + # not merely present on PATH. + src = tmpdir / "nvcc_smoke.cu" + out = tmpdir / "nvcc_smoke.o" + with open(src, "w") as f: + f.write("") + try: + subprocess.run( # noqa: S603 + [nvcc, "-c", str(src), "-o", str(out)], + check=True, + capture_output=True, + ) + except subprocess.CalledProcessError as e: + stdout = (e.stdout or b"").decode(errors="replace") + stderr = (e.stderr or b"").decode(errors="replace") + pytest.skip( + "nvcc found on PATH but failed to compile a trivial input.\n" + f"command: {[nvcc, '-c', str(src), '-o', str(out)]!r}\n" + f"exit_code: {e.returncode}\n" + f"stdout:\n{stdout}\n" + f"stderr:\n{stderr}\n" + ) + + return nvcc + + +@pytest.fixture +def CUBIN(arch): + def CHECK_NVRTC(err): + if err != nvrtc.nvrtcResult.NVRTC_SUCCESS: + raise RuntimeError(repr(err)) + + err, program_handle = nvrtc.nvrtcCreateProgram(CODE.encode(), b"", 0, [], []) + CHECK_NVRTC(err) + err = nvrtc.nvrtcCompileProgram(program_handle, 1, [f"-arch={arch}".encode()])[0] + CHECK_NVRTC(err) + err, size = nvrtc.nvrtcGetCUBINSize(program_handle) + CHECK_NVRTC(err) + cubin = b" " * size + (err,) = nvrtc.nvrtcGetCUBIN(program_handle, cubin) + CHECK_NVRTC(err) + (err,) = nvrtc.nvrtcDestroyProgram(program_handle) + CHECK_NVRTC(err) + return cubin + + +# create a valid LTOIR input for testing +@pytest.fixture +def LTOIR(arch): + arch = arch.replace("sm", "compute") + + def CHECK_NVRTC(err): + if err != nvrtc.nvrtcResult.NVRTC_SUCCESS: + raise RuntimeError(repr(err)) + + empty_cplusplus_kernel = "__global__ void A() {}" + err, program_handle = nvrtc.nvrtcCreateProgram(empty_cplusplus_kernel.encode(), b"", 0, [], []) + CHECK_NVRTC(err) + err = nvrtc.nvrtcCompileProgram(program_handle, 1, [b"-dlto", f"-arch={arch}".encode()])[0] + CHECK_NVRTC(err) + err, size = nvrtc.nvrtcGetLTOIRSize(program_handle) + CHECK_NVRTC(err) + empty_kernel_ltoir = b" " * size + (err,) = nvrtc.nvrtcGetLTOIR(program_handle, empty_kernel_ltoir) + CHECK_NVRTC(err) + (err,) = nvrtc.nvrtcDestroyProgram(program_handle) + CHECK_NVRTC(err) + return empty_kernel_ltoir + + +@pytest.fixture +def OBJECT(arch, tmpdir, nvcc_smoke): + empty_cplusplus_kernel = "__global__ void A() {}" + with open(tmpdir / "object.cu", "w") as f: + f.write(empty_cplusplus_kernel) + + nvcc = nvcc_smoke + + # This is a test fixture that intentionally invokes a trusted tool (`nvcc`) to + # compile a temporary CUDA translation unit. + cmd = [nvcc, "-c", "-arch", arch, "-o", str(tmpdir / "object.o"), str(tmpdir / "object.cu")] + try: + subprocess.run( # noqa: S603 + cmd, + check=True, + capture_output=True, + ) + except subprocess.CalledProcessError as e: + stdout = (e.stdout or b"").decode(errors="replace") + stderr = (e.stderr or b"").decode(errors="replace") + raise RuntimeError( + "nvcc smoke test passed, but nvcc failed while compiling the test object.\n" + f"command: {cmd!r}\n" + f"exit_code: {e.returncode}\n" + f"stdout:\n{stdout}\n" + f"stderr:\n{stderr}\n" + ) from e + with open(tmpdir / "object.o", "rb") as f: + object = f.read() + + return object + + +@pytest.fixture +def TILEIR(tmpdir): + try: + binary_data = base64.b64decode(TILEIR_b64) + except binascii.Error as e: + raise ValueError( + "Base64 encoded TileIR is corrupted. Please regenerate the TileIR" + "by executing the toolshed/dump_cutile_b64.py script." + ) from e + return binary_data + + +@pytest.mark.parametrize("error_enum", nvfatbin.Result) +def test_get_error_string(error_enum): + es = nvfatbin.get_error_string(error_enum) + + if error_enum is nvfatbin.Result.SUCCESS: + assert es == "" + else: + assert es != "" + + +def test_nvfatbin_get_version(): + major, minor = nvfatbin.version() + assert major is not None + assert minor is not None + + +def test_nvfatbin_empty_create_and_destroy(): + handle = nvfatbin.create([], 0) + assert handle is not None + nvfatbin.destroy(handle) + + +def test_nvfatbin_invalid_input_create(): + with pytest.raises(nvfatbin.nvFatbinError, match="ERROR_UNRECOGNIZED_OPTION"): + nvfatbin.create(["--unsupported_option"], 1) + + +def test_nvfatbin_get_empty(): + handle = nvfatbin.create([], 0) + size = nvfatbin.size(handle) + + buffer = bytearray(size) + nvfatbin.get(handle, buffer) + + nvfatbin.destroy(handle) + + +def test_nvfatbin_add_ptx(PTX, arch): + arch_numeric = arch.split("_")[1] + + handle = nvfatbin.create([], 0) + nvfatbin.add_ptx(handle, PTX.encode(), len(PTX), arch_numeric, "add", f"-arch={arch}") + + buffer = bytearray(nvfatbin.size(handle)) + + nvfatbin.get(handle, buffer) + nvfatbin.destroy(handle) + + +@pytest.mark.parametrize("arch", ["sm_80"], indirect=True) +def test_nvfatbin_add_cubin_ELF_SIZE_MISMATCH(CUBIN, arch): + handle = nvfatbin.create([], 0) + with pytest.raises(nvfatbin.nvFatbinError, match="ERROR_ELF_ARCH_MISMATCH"): + nvfatbin.add_cubin(handle, CUBIN, len(CUBIN), "75", "inc") + + nvfatbin.destroy(handle) + + +def test_nvfatbin_add_cubin(CUBIN, arch): + arch_numeric = arch.split("_")[1] + + handle = nvfatbin.create([], 0) + nvfatbin.add_cubin(handle, CUBIN, len(CUBIN), arch_numeric, "inc") + + buffer = bytearray(nvfatbin.size(handle)) + + nvfatbin.get(handle, buffer) + nvfatbin.destroy(handle) + + +@pytest.mark.parametrize("arch", ["sm_80"], indirect=True) +def test_nvfatbin_add_cubin_ELF_ARCH_MISMATCH(CUBIN, arch): + handle = nvfatbin.create([], 0) + with pytest.raises(nvfatbin.nvFatbinError, match="ERROR_ELF_ARCH_MISMATCH"): + nvfatbin.add_cubin(handle, CUBIN, len(CUBIN), "75", "inc") + + nvfatbin.destroy(handle) + + +def test_nvdfatbin_add_ltoir(LTOIR, arch): + arch_numeric = arch.split("_")[1] + + handle = nvfatbin.create([], 0) + nvfatbin.add_ltoir(handle, LTOIR, len(LTOIR), arch_numeric, "inc", "") + + buffer = bytearray(nvfatbin.size(handle)) + + nvfatbin.get(handle, buffer) + nvfatbin.destroy(handle) + + +def test_nvfatbin_add_reloc(OBJECT): + handle = nvfatbin.create([], 0) + nvfatbin.add_reloc(handle, OBJECT, len(OBJECT)) + + buffer = bytearray(nvfatbin.size(handle)) + + nvfatbin.get(handle, buffer) + nvfatbin.destroy(handle) + + +@pytest.mark.skipif(get_version() < (13, 1), reason="TileIR API is not supported in CUDA < 13.1") +def test_nvfatbin_add_tile_ir(TILEIR): + handle = nvfatbin.create([], 0) + nvfatbin.add_tile_ir(handle, TILEIR, len(TILEIR), "VectorAdd", "") + + buffer = bytearray(nvfatbin.size(handle)) + + nvfatbin.get(handle, buffer) + nvfatbin.destroy(handle) diff --git a/toolshed/dump_cutile_b64.py b/toolshed/dump_cutile_b64.py new file mode 100644 index 0000000000..4ce5a82a9f --- /dev/null +++ b/toolshed/dump_cutile_b64.py @@ -0,0 +1,79 @@ +#!/usr/bin/env python3 + +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE + +""" +Embeds a sample cuTile kernel, executes it with CUDA_TILE_DUMP_BYTECODE=., +loads the resulting .cutile file, and prints its base64-encoded content. +""" + +import base64 +import glob +import os +import sys + +import cuda.tile as ct +import cupy + + +def _run_sample_cutile_kernel() -> None: + # Import after env var setup so CUDA_TILE_DUMP_BYTECODE is honored. + TILE_SIZE = 16 + + @ct.kernel + def vector_add_kernel(a, b, result): + block_id = ct.bid(0) + a_tile = ct.load(a, index=(block_id,), shape=(TILE_SIZE,)) + b_tile = ct.load(b, index=(block_id,), shape=(TILE_SIZE,)) + + result_tile = a_tile + b_tile + ct.store(result, index=(block_id,), tile=result_tile) + + a = cupy.arange(128, dtype="float32") + b = cupy.arange(128, dtype="float32") + result = cupy.zeros_like(a) + + grid = (ct.cdiv(a.shape[0], TILE_SIZE), 1, 1) + ct.launch(cupy.cuda.get_current_stream(), grid, vector_add_kernel, (a, b, result)) + + cupy.cuda.get_current_stream().synchronize() + + assert result[-1] == 254 + + +def main(): + # CUDA_TILE_DUMP_BYTECODE=. means dump to current directory + os.environ["CUDA_TILE_DUMP_BYTECODE"] = "." + + try: + _run_sample_cutile_kernel() + except Exception as e: + print(f"Sample kernel execution failed: {e}", file=sys.stderr) + raise + + # Find the .cutile file in current directory + cutile_files = glob.glob("./*.cutile") + if not cutile_files: + print("No .cutile file found in current directory", file=sys.stderr) + sys.exit(1) + + # Use the most recently modified one if multiple exist + cutile_path = max(cutile_files, key=os.path.getmtime) + + # Read the binary content + with open(cutile_path, "rb") as f: + binary_content = f.read() + + # Encode with base64 in ASCII mode + b64_encoded = base64.b64encode(binary_content).decode("ascii") + + # Print with lines less than 79 characters, wrapped with quotes + line_width = 76 # 78 - 2 for the quotes on both sides + for i in range(0, len(b64_encoded), line_width): + chunk = b64_encoded[i : i + line_width] + print(f'"{chunk}"') + + +if __name__ == "__main__": + main()