Skip to content
This repository was archived by the owner on May 27, 2021. It is now read-only.

Commit fc6889a

Browse files
committed
Don't unconditionally link and trigger rdc compilation.
1 parent 3972441 commit fc6889a

File tree

1 file changed

+19
-6
lines changed

1 file changed

+19
-6
lines changed

src/compiler/driver.jl

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,15 @@ function codegen(target::Symbol, job::CompilerJob;
122122
end
123123

124124
kernel_fn = LLVM.name(kernel)
125+
126+
if libraries
127+
# linking the device run-time library requires use of the CUDA linker,
128+
# which in turn switches compilation to device relocatable code (-rdc) mode.
129+
#
130+
# even if not doing any actual calls that need -rdc (i.e., calls to the run-time
131+
# library), this significantly hurts performance, so don't do it unconditionally
132+
need_libcudadevrt = !isempty(decls(ir))
133+
end
125134
end
126135

127136
# dynamic parallelism
@@ -227,12 +236,16 @@ function codegen(target::Symbol, job::CompilerJob;
227236
jit_options[CUDAdrv.GENERATE_DEBUG_INFO] = true
228237
end
229238

230-
# link the CUDA device library
231-
@timeit to[] "linking" begin
232-
linker = CUDAdrv.CuLink(jit_options)
233-
CUDAdrv.add_file!(linker, libcudadevrt, CUDAdrv.LIBRARY)
234-
CUDAdrv.add_data!(linker, kernel_fn, asm)
235-
image = CUDAdrv.complete(linker)
239+
if libraries && need_libcudadevrt
240+
# link the CUDA device library
241+
@timeit to[] "linking" begin
242+
linker = CUDAdrv.CuLink(jit_options)
243+
CUDAdrv.add_file!(linker, libcudadevrt, CUDAdrv.LIBRARY)
244+
CUDAdrv.add_data!(linker, kernel_fn, asm)
245+
image = CUDAdrv.complete(linker)
246+
end
247+
else
248+
image = asm
236249
end
237250

238251
@timeit to[] "compilation" begin

0 commit comments

Comments
 (0)