@@ -10,7 +10,7 @@ export @cuda, cudaconvert, cufunction, dynamic_cufunction, nearest_warpsize
1010function split_kwargs (kwargs)
1111 macro_kws = [:dynamic ]
1212 compiler_kws = [:minthreads , :maxthreads , :blocks_per_sm , :maxregs , :name ]
13- call_kws = [:cooperative , :blocks , :threads , :shmem , :stream ]
13+ call_kws = [:cooperative , :blocks , :threads , :config , : shmem , :stream ]
1414 macro_kwargs = []
1515 compiler_kwargs = []
1616 call_kwargs = []
@@ -226,6 +226,9 @@ The following keyword arguments are supported:
226226- `threads` (defaults to 1)
227227- `blocks` (defaults to 1)
228228- `shmem` (defaults to 0)
229+ - `config`: callback function to dynamically compute the launch configuration.
230+ should accept a `HostKernel` and return a name tuple with any of the above as fields.
231+ this functionality is intended to be used in combination with the CUDA occupancy API.
229232- `stream` (defaults to the default stream)
230233"""
231234AbstractKernel
269272
270273@doc (@doc AbstractKernel) HostKernel
271274
272- @inline cudacall (kernel:: HostKernel , tt, args... ; kwargs... ) =
273- CUDAdrv. cudacall (kernel. fun, tt, args... ; kwargs... )
275+ @inline function cudacall (kernel:: HostKernel , tt, args... ; config= nothing , kwargs... )
276+ if config != = nothing
277+ CUDAdrv. cudacall (kernel. fun, tt, args... ; kwargs... , config (kernel)... )
278+ else
279+ CUDAdrv. cudacall (kernel. fun, tt, args... ; kwargs... )
280+ end
281+ end
274282
275283"""
276284 version(k::HostKernel)
0 commit comments