@@ -122,15 +122,6 @@ function codegen(target::Symbol, job::CompilerJob;
122122 end
123123
124124 kernel_fn = LLVM. name (kernel)
125-
126- if libraries
127- # linking the device run-time library requires use of the CUDA linker,
128- # which in turn switches compilation to device relocatable code (-rdc) mode.
129- #
130- # even if not doing any actual calls that need -rdc (i.e., calls to the run-time
131- # library), this significantly hurts performance, so don't do it unconditionally
132- need_libcudadevrt = ! isempty (decls (ir))
133- end
134125 end
135126
136127 # dynamic parallelism
@@ -236,16 +227,25 @@ function codegen(target::Symbol, job::CompilerJob;
236227 jit_options[CUDAdrv. GENERATE_DEBUG_INFO] = true
237228 end
238229
239- if libraries && need_libcudadevrt
240- # link the CUDA device library
241- @timeit to[] " linking" begin
242- linker = CUDAdrv. CuLink (jit_options)
243- CUDAdrv. add_file! (linker, libcudadevrt, CUDAdrv. LIBRARY)
244- CUDAdrv. add_data! (linker, kernel_fn, asm)
245- image = CUDAdrv. complete (linker)
230+ # link the CUDA device library
231+ image = asm
232+ if libraries
233+ # linking the device runtime library requires use of the CUDA linker,
234+ # which in turn switches compilation to device relocatable code (-rdc) mode.
235+ #
236+ # even if not doing any actual calls that need -rdc (i.e., calls to the runtime
237+ # library), this significantly hurts performance, so don't do it unconditionally
238+ undefined_fns = LLVM. name .(decls (ir))
239+ intrinsic_fns = [" vprintf" , " malloc" , " free" , " __assertfail" ,
240+ " __nvvm_reflect" #= TODO : should have been optimized away =# ]
241+ if ! isempty (setdiff (undefined_fns, intrinsic_fns))
242+ @timeit to[] " device runtime library" begin
243+ linker = CUDAdrv. CuLink (jit_options)
244+ CUDAdrv. add_file! (linker, libcudadevrt, CUDAdrv. LIBRARY)
245+ CUDAdrv. add_data! (linker, kernel_fn, asm)
246+ image = CUDAdrv. complete (linker)
247+ end
246248 end
247- else
248- image = asm
249249 end
250250
251251 @timeit to[] " compilation" begin
0 commit comments