Skip to content

Commit a808d3f

Browse files
PawelJurekpszymich
authored andcommitted
Raytracing: change functions that use implicit dispatch globals to use subdevice (aka Tile) id
On multi-tile GPUs with implicit scaling enabled, when we use implicit rtDispatchGlobals pointer the runtime will send an array of pointers, one per tile.
1 parent c4328f4 commit a808d3f

File tree

11 files changed

+149
-88
lines changed

11 files changed

+149
-88
lines changed
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
/*========================== begin_copyright_notice ============================
2+
3+
Copyright (C) 2023 Intel Corporation
4+
5+
SPDX-License-Identifier: MIT
6+
7+
============================= end_copyright_notice ===========================*/
8+
9+
// This file stores constants that we use in OpenCL-C builtins for Raytracing and share with Neo.
10+
11+
#pragma once
12+
13+
// Dispatch globals passed as an array will be aligned up to page size = 64 kilobytes.
14+
const int DISPATCH_GLOBALS_STRIDE = 65536;

IGC/BiFModule/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -450,6 +450,7 @@ set(IGC_BUILD__BIF_OCL_COMMON_INC_DIRS
450450
"${IGC_OPTION__BIF_SRC_OCL_DIR}/Languages/OpenCL/PointerSize"
451451
"${IGC_OPTION__BIF_SRC_OCL_DIR}/Languages/OpenCL/Raytracing"
452452
"${IGC_OPTION__BIF_SRC_OCL_DIR}/Headers"
453+
"${CMAKE_CURRENT_SOURCE_DIR}/../AdaptorOCL/ocl_igc_shared"
453454
"${CMAKE_CURRENT_SOURCE_DIR}/../AdaptorOCL/ocl_igc_shared/device_enqueue"
454455
)
455456

IGC/BiFModule/Headers/spirv.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5541,5 +5541,8 @@ void SPIRV_OVERLOADABLE SPIRV_OCL_BUILTIN(prefetch, _p1v16f64_i64, )( global dou
55415541
uint __builtin_spirv_OpReadClockKHR_i32_i32(uint scope);
55425542
ulong __builtin_spirv_OpReadClockKHR_i64_i32(uint scope);
55435543

5544+
int SPIRV_OVERLOADABLE SPIRV_BUILTIN_NO_OP(BuiltInSubDeviceIDINTEL, , )(void);
5545+
int SPIRV_OVERLOADABLE SPIRV_BUILTIN_NO_OP(GlobalHWThreadIDINTEL, , )(void);
5546+
55445547
#endif // __SPIRV_H__
55455548

IGC/BiFModule/Implementation/IGCBiF_Intrinsics.cl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -905,7 +905,7 @@ void* __builtin_IB_intel_get_global_btd_stack(rtglobals_t rt_dispatch_globals);
905905
rtfence_t __builtin_IB_intel_dispatch_trace_ray_query(
906906
rtglobals_t rt_dispatch_globals, uint bvh_level, uint traceTayCtrl);
907907
void __builtin_IB_intel_rt_sync(rtfence_t fence);
908-
global void* __builtin_IB_intel_get_implicit_dispatch_globals();
908+
global void* __builtin_IB_intel_get_rt_global_buffer();
909909
#endif // defined(cl_intel_pvc_rt_validation) || defined(cl_intel_rt_production)
910910

911911
void __builtin_IB_hdc_uncompressed_write_uchar(__global uchar *buf, uchar val);

IGC/BiFModule/Languages/OpenCL/PreRelease/IBiF_intel_rt_validation.cl

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@ SPDX-License-Identifier: MIT
88

99
#if defined(cl_intel_pvc_rt_validation)
1010

11+
#include "IBiF_intel_rt_utils.h"
12+
1113
void* intel_get_rt_stack(rtglobals_t rt_dispatch_globals)
1214
{
1315
return __builtin_IB_intel_get_rt_stack(rt_dispatch_globals);
@@ -37,7 +39,7 @@ void intel_rt_sync(rtfence_t fence)
3739

3840
global void* intel_get_implicit_dispatch_globals()
3941
{
40-
return __builtin_IB_intel_get_implicit_dispatch_globals();
42+
return __getImplicitDispatchGlobals();
4143
}
4244

4345
#endif // defined(cl_intel_pvc_rt_validation)

IGC/BiFModule/Languages/OpenCL/Raytracing/IBiF_intel_rt_production.cl

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,8 @@ SPDX-License-Identifier: MIT
66
77
============================= end_copyright_notice ===========================*/
88

9-
#include "IBiF_intel_rt_struct_defs.cl"
9+
#include "IBiF_intel_rt_struct_defs.h"
10+
#include "IBiF_intel_rt_utils.h"
1011

1112
#if defined(cl_intel_rt_production)
1213

@@ -21,7 +22,7 @@ intel_ray_query_t intel_ray_query_init(
2122
global HWAccel* hwaccel = to_global((HWAccel*)accel);
2223
unsigned int bvh_level = 0;
2324

24-
rtglobals_t dispatchGlobalsPtr = (rtglobals_t)__builtin_IB_intel_get_implicit_dispatch_globals();
25+
rtglobals_t dispatchGlobalsPtr = (rtglobals_t) __getImplicitDispatchGlobals();
2526
global RTStack* rtStack =
2627
to_global((RTStack*)__builtin_IB_intel_get_rt_stack(dispatchGlobalsPtr));
2728

IGC/BiFModule/Languages/OpenCL/Raytracing/IBiF_intel_rt_struct_defs.cl renamed to IGC/BiFModule/Languages/OpenCL/Raytracing/IBiF_intel_rt_struct_defs.h

Lines changed: 73 additions & 73 deletions
Large diffs are not rendered by default.

IGC/BiFModule/Languages/OpenCL/Raytracing/IBiF_intel_rt_utils.cl

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,32 +1,35 @@
11
/*========================== begin_copyright_notice ============================
22
3-
Copyright (C) 2022 Intel Corporation
3+
Copyright (C) 2022-2023 Intel Corporation
44
55
SPDX-License-Identifier: MIT
66
77
============================= end_copyright_notice ===========================*/
88

9+
#include "raytracing/constants.h"
10+
#include "IBiF_intel_rt_utils.h"
11+
912
#define sizeofbits(val) (8 * sizeof(val))
1013

1114
// === --------------------------------------------------------------------===
1215
// === Bitfield accessors
1316
// === --------------------------------------------------------------------===
1417

15-
ushort getBits16(ushort value, uint startBit, uint width)
18+
ushort __getBits16(ushort value, uint startBit, uint width)
1619
{
1720
ushort value_aligned = value >> startBit;
1821
ushort mask = USHRT_MAX >> (sizeofbits(ushort) - width);
1922
return value_aligned & mask;
2023
}
2124

22-
uint getBits32(uint value, uint startBit, uint width)
25+
uint __getBits32(uint value, uint startBit, uint width)
2326
{
2427
uint value_aligned = value >> startBit;
2528
uint mask = UINT_MAX >> (sizeofbits(uint) - width);
2629
return value_aligned & mask;
2730
}
2831

29-
ulong getBits64(ulong value, uint startBit, uint width)
32+
ulong __getBits64(ulong value, uint startBit, uint width)
3033
{
3134
ulong value_aligned = value >> startBit;
3235
ulong mask = ULONG_MAX >> (sizeofbits(ulong) - width);
@@ -42,23 +45,33 @@ ulong getBits64(ulong value, uint startBit, uint width)
4245
// slotWidthMask = 00000111
4346
// valueSlitMask = 11100011
4447

45-
ushort setBits16(ushort value, ushort slot, uint startBit, uint width)
48+
ushort __setBits16(ushort value, ushort slot, uint startBit, uint width)
4649
{
4750
ushort slotWidthMask = USHRT_MAX >> (sizeofbits(ushort) - width);
4851
ushort valueSlitMask = ~(slotWidthMask << startBit);
4952
return (value & valueSlitMask) | ((slot & slotWidthMask) << startBit);
5053
}
5154

52-
uint setBits32(uint value, uint slot, uint startBit, uint width)
55+
uint __setBits32(uint value, uint slot, uint startBit, uint width)
5356
{
5457
uint slotWidthMask = UINT_MAX >> (sizeofbits(uint) - width);
5558
uint valueSlitMask = ~(slotWidthMask << startBit);
5659
return (value & valueSlitMask) | ((slot & slotWidthMask) << startBit);
5760
}
5861

59-
ulong setBits64(ulong value, ulong slot, uint startBit, uint width)
62+
ulong __setBits64(ulong value, ulong slot, uint startBit, uint width)
6063
{
6164
ulong slotWidthMask = ULONG_MAX >> (sizeofbits(ulong) - width);
6265
ulong valueSlitMask = ~(slotWidthMask << startBit);
6366
return (value & valueSlitMask) | ((slot & slotWidthMask) << startBit);
6467
}
68+
69+
// === --------------------------------------------------------------------===
70+
// === Helper functions
71+
// === --------------------------------------------------------------------===
72+
global void* __getImplicitDispatchGlobals()
73+
{
74+
global char* globalBuffer = __builtin_IB_intel_get_rt_global_buffer();
75+
int subDeviceID = SPIRV_BUILTIN_NO_OP(BuiltInSubDeviceIDINTEL, , )();
76+
return globalBuffer + subDeviceID * DISPATCH_GLOBALS_STRIDE;
77+
}
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
/*========================== begin_copyright_notice ============================
2+
3+
Copyright (C) 2023 Intel Corporation
4+
5+
SPDX-License-Identifier: MIT
6+
7+
============================= end_copyright_notice ===========================*/
8+
#pragma once
9+
10+
// === --------------------------------------------------------------------===
11+
// === Bitfield accessors
12+
// === --------------------------------------------------------------------===
13+
ushort __getBits16(ushort value, uint startBit, uint width);
14+
uint __getBits32(uint value, uint startBit, uint width);
15+
ulong __getBits64(ulong value, uint startBit, uint width);
16+
17+
// === --------------------------------------------------------------------===
18+
// === Bitfield setters
19+
// === --------------------------------------------------------------------===
20+
ushort __setBits16(ushort value, ushort slot, uint startBit, uint width);
21+
uint __setBits32(uint value, uint slot, uint startBit, uint width);
22+
ulong __setBits64(ulong value, ulong slot, uint startBit, uint width);
23+
24+
// === --------------------------------------------------------------------===
25+
// === Helper functions
26+
// === --------------------------------------------------------------------===
27+
global void* __getImplicitDispatchGlobals();

IGC/Compiler/Optimizer/OpenCLPasses/RayTracing/ResolveOCLRaytracingBuiltins.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ namespace {
3838
{"__builtin_IB_intel_get_global_btd_stack", &ResolveOCLRaytracingBuiltins::handleGetGlobalBTDStack },
3939
{"__builtin_IB_intel_dispatch_trace_ray_query", &ResolveOCLRaytracingBuiltins::handleDispatchTraceRayQuery },
4040
{"__builtin_IB_intel_rt_sync", &ResolveOCLRaytracingBuiltins::handleRTSync },
41-
{"__builtin_IB_intel_get_implicit_dispatch_globals", &ResolveOCLRaytracingBuiltins::handleGetImplicitDG },
41+
{"__builtin_IB_intel_get_rt_global_buffer", &ResolveOCLRaytracingBuiltins::handleGetRTGlobalBuffer },
4242

4343
// Handling for builtins operating on intel_ray_query_t from intel_rt_production extension
4444
{"__builtin_IB_intel_init_ray_query", &ResolveOCLRaytracingBuiltins::handleInitRayQuery },
@@ -374,12 +374,12 @@ void ResolveOCLRaytracingBuiltins::handleRTSync(CallInst& callInst) {
374374

375375
/*
376376
Handler for
377-
void __builtin_IB_intel_get_implicit_dispatch_globals();
377+
void __builtin_IB_intel_get_rt_global_buffer();
378378
379379
Description:
380380
Returns IMPLICIT_RT_GLOBAL_BUFFER implicit argument.
381381
*/
382-
void ResolveOCLRaytracingBuiltins::handleGetImplicitDG(llvm::CallInst& callInst) {
382+
void ResolveOCLRaytracingBuiltins::handleGetRTGlobalBuffer(llvm::CallInst& callInst) {
383383
RTBuilder rtbuilder(m_builder->getContext(), *m_pCtx);
384384
rtbuilder.SetInsertPoint(&callInst);
385385
auto v = rtbuilder.getGlobalBufferPtr();

0 commit comments

Comments
 (0)