Skip to content
This repository was archived by the owner on May 27, 2021. It is now read-only.

Commit 67107e3

Browse files
committed
Don't link devrt for system calls.
1 parent fc6889a commit 67107e3

File tree

1 file changed

+18
-18
lines changed

1 file changed

+18
-18
lines changed

src/compiler/driver.jl

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -122,15 +122,6 @@ function codegen(target::Symbol, job::CompilerJob;
122122
end
123123

124124
kernel_fn = LLVM.name(kernel)
125-
126-
if libraries
127-
# linking the device run-time library requires use of the CUDA linker,
128-
# which in turn switches compilation to device relocatable code (-rdc) mode.
129-
#
130-
# even if not doing any actual calls that need -rdc (i.e., calls to the run-time
131-
# library), this significantly hurts performance, so don't do it unconditionally
132-
need_libcudadevrt = !isempty(decls(ir))
133-
end
134125
end
135126

136127
# dynamic parallelism
@@ -236,16 +227,25 @@ function codegen(target::Symbol, job::CompilerJob;
236227
jit_options[CUDAdrv.GENERATE_DEBUG_INFO] = true
237228
end
238229

239-
if libraries && need_libcudadevrt
240-
# link the CUDA device library
241-
@timeit to[] "linking" begin
242-
linker = CUDAdrv.CuLink(jit_options)
243-
CUDAdrv.add_file!(linker, libcudadevrt, CUDAdrv.LIBRARY)
244-
CUDAdrv.add_data!(linker, kernel_fn, asm)
245-
image = CUDAdrv.complete(linker)
230+
# link the CUDA device library
231+
image = asm
232+
if libraries
233+
# linking the device runtime library requires use of the CUDA linker,
234+
# which in turn switches compilation to device relocatable code (-rdc) mode.
235+
#
236+
# even if not doing any actual calls that need -rdc (i.e., calls to the runtime
237+
# library), this significantly hurts performance, so don't do it unconditionally
238+
undefined_fns = LLVM.name.(decls(ir))
239+
intrinsic_fns = ["vprintf", "malloc", "free", "__assertfail",
240+
"__nvvm_reflect" #= TODO: should have been optimized away =#]
241+
if !isempty(setdiff(undefined_fns, intrinsic_fns))
242+
@timeit to[] "device runtime library" begin
243+
linker = CUDAdrv.CuLink(jit_options)
244+
CUDAdrv.add_file!(linker, libcudadevrt, CUDAdrv.LIBRARY)
245+
CUDAdrv.add_data!(linker, kernel_fn, asm)
246+
image = CUDAdrv.complete(linker)
247+
end
246248
end
247-
else
248-
image = asm
249249
end
250250

251251
@timeit to[] "compilation" begin

0 commit comments

Comments
 (0)