@@ -12,6 +12,7 @@ module Runtime
1212using .. CUDAnative
1313using LLVM
1414using LLVM. Interop
15+ using CUDAdrv
1516
1617
1718# # representation of a runtime method instance
@@ -251,4 +252,85 @@ for (T, t) in [Int8 => :int8, Int16 => :int16, Int32 => :int32, Int64 =>
251252 end
252253end
253254
255+ # # Bump allocator.
256+
257+ # Gets a pointer to a global with a particular name. If the global
258+ # does not exist yet, then it is declared in the global memory address
259+ # space.
260+ @generated function get_global_pointer (:: Val{global_name} , :: Type{T} ):: Ptr{T} where {global_name, T}
261+ T_global = convert (LLVMType, T)
262+ T_result = convert (LLVMType, Ptr{T})
263+
264+ # Create a thunk that computes a pointer to the global.
265+ llvm_f, _ = create_function (T_result)
266+ mod = LLVM. parent (llvm_f)
267+
268+ # Figure out if the global has been defined already.
269+ global_set = LLVM. globals (mod)
270+ global_name_string = String (global_name)
271+ if haskey (global_set, global_name_string)
272+ global_var = global_set[global_name_string]
273+ else
274+ # If the global hasn't been defined already, then we'll define
275+ # it in the global address space, i.e., address space one.
276+ global_var = GlobalVariable (mod, T_global, global_name_string, 1 )
277+ linkage! (global_var, LLVM. API. LLVMLinkOnceODRLinkage)
278+ initializer! (global_var, LLVM. null (T_global))
279+ end
280+
281+ # Generate IR that computes the global's address.
282+ Builder (JuliaContext ()) do builder
283+ entry = BasicBlock (llvm_f, " entry" , JuliaContext ())
284+ position! (builder, entry)
285+
286+ # Cast the global variable's type to the result type.
287+ result = ptrtoint! (builder, global_var, T_result)
288+ ret! (builder, result)
289+ end
290+
291+ # Call the function.
292+ call_function (llvm_f, Ptr{T})
293+ end
294+
295+ macro cuda_global_ptr (name, type)
296+ return :(convert (
297+ DevicePtr{T},
298+ get_global_pointer (
299+ $ (Val (Symbol (name))),
300+ $ (esc (type)))))
301+ end
302+
303+ # Allocates `bytesize` bytes of storage by bumping the global bump
304+ # allocator pointer.
305+ function bump_alloc (bytesize:: Csize_t ):: Ptr{UInt8}
306+ ptr = @cuda_global_ptr (" bump_alloc_ptr" , Csize_t)
307+ chunk_address = atomic_add! (ptr, bytesize)
308+ end_ptr = unsafe_load (@cuda_global_ptr (" bump_alloc_end" , Csize_t))
309+ if chunk_address < end_ptr
310+ return convert (Ptr{UInt8}, chunk_address)
311+ else
312+ return C_NULL
313+ end
314+ end
315+
316+ compile (bump_alloc, Ptr{UInt8}, (Csize_t,))
317+
318+ function maybe_set_global (kernel, name, value:: T ) where T
319+ try
320+ global_handle = CuGlobal {T} (kernel. mod, name)
321+ set (global_handle, value)
322+ catch exception
323+ # The interrupt pointer may not have been declared (because it is unused).
324+ # In that case, we should do nothing.
325+ if ! isa (exception, CUDAdrv. CuError) || exception. code != CUDAdrv. ERROR_NOT_FOUND. code
326+ rethrow ()
327+ end
328+ end
329+ end
330+
331+ function bump_alloc_init! (kernel, buffer_start, buffer_size)
332+ maybe_set_global (kernel, " bump_alloc_ptr" , buffer_start)
333+ maybe_set_global (kernel, " bump_alloc_end" , buffer_start + buffer_size)
334+ end
335+
254336end
0 commit comments