Skip to content

Commit c1fba87

Browse files
vsemenov368igcbot
authored andcommitted
Enable integer 64-bit SLM atomic emulation
Emulating integer 64-bit SLM atomic instructions such as INC, DEC, ADD, SUB, SMIN, SMAX, UMIN, UMAX, AND, OR, XOR using compare exchange instruction.
1 parent 6428ca7 commit c1fba87

File tree

9 files changed

+391
-1
lines changed

9 files changed

+391
-1
lines changed

IGC/VectorCompiler/CMCL/lib/Headers/cm-cl/detail/builtins.h

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,33 @@ void __cm_cl_scatter(vector_impl<T, width> data, int addrspace,
7676
vector_impl<uint64_t, width> ptrs, int alignment,
7777
vector_impl<char, width> mask);
7878

79+
// vector BTI atomic
80+
template <typename T, int width>
81+
vector_impl<T, width> __cm_cl_vector_atomic_bti(
82+
vector_impl<char, width> mask, char opcode, char addrsize, char elementsize,
83+
char l1cachecontrol, char l3cachecontrol, int bti,
84+
vector_impl<int, width> index, short scale, int offset,
85+
vector_impl<T, width> src1, vector_impl<T, width> src2,
86+
vector_impl<T, width> passthru);
87+
88+
// vector SLM atomic
89+
template <typename T, int width>
90+
vector_impl<T, width> __cm_cl_vector_atomic_slm(
91+
vector_impl<char, width> mask, char opcode, char addrsize, char elementsize,
92+
char l1cachecontrol, char l3cachecontrol, int base,
93+
vector_impl<int, width> index, short scale, int offset,
94+
vector_impl<T, width> src1, vector_impl<T, width> src2,
95+
vector_impl<T, width> passthru);
96+
97+
// vector UGM atomic
98+
template <typename T, int width>
99+
vector_impl<T, width> __cm_cl_vector_atomic_ugm(
100+
vector_impl<char, width> mask, char opcode, char addrsize, char elementsize,
101+
char l1cachecontrol, char l3cachecontrol, long base,
102+
vector_impl<long, width> index, short scale, int offset,
103+
vector_impl<T, width> src1, vector_impl<T, width> src2,
104+
vector_impl<T, width> passthru);
105+
79106
uint32_t __cm_cl_lzd(uint32_t src);
80107
template <int width>
81108
vector_impl<uint32_t, width> __cm_cl_lzd(vector_impl<uint32_t, width> src);

IGC/VectorCompiler/CMCL/lib/Support/BuiltinTranslator.cpp

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -384,6 +384,82 @@ Value &createMainInst<BuiltinID::Scatter>(const std::vector<Value *> &Operands,
384384
return *ScatterV;
385385
}
386386

387+
template <>
388+
Value &
389+
createMainInst<BuiltinID::AtomicBti>(const std::vector<Value *> &Operands,
390+
Type &RetTy, IRBuilder<> &IRB) {
391+
assert(Operands.size() == AtomicBtiOperand::Size &&
392+
"builtin operands should be trasformed into VC intrinsic "
393+
"vector_atomic_bti "
394+
"intrinsic operands without changes");
395+
396+
auto *RetVTy = cast<IGCLLVM::FixedVectorType>(&RetTy);
397+
auto *MaskVTy =
398+
IGCLLVM::FixedVectorType::get(IRB.getInt1Ty(), RetVTy->getNumElements());
399+
auto *MaskV = IRB.CreateTrunc(Operands[AtomicBtiOperand::Mask], MaskVTy);
400+
401+
std::vector<Value *> Args = Operands;
402+
403+
Args[AtomicBtiOperand::Mask] = MaskV;
404+
405+
auto *Decl = getAnyDeclarationForIdFromArgs(
406+
RetTy, Args, IntrinsicForBuiltin[BuiltinID::AtomicBti],
407+
*IRB.GetInsertBlock()->getModule());
408+
auto *CI = IRB.CreateCall(Decl, Args, RetTy.isVoidTy() ? "" : "cmcl.builtin");
409+
return *CI;
410+
}
411+
412+
template <>
413+
Value &
414+
createMainInst<BuiltinID::AtomicSlm>(const std::vector<Value *> &Operands,
415+
Type &RetTy, IRBuilder<> &IRB) {
416+
assert(Operands.size() == AtomicSlmOperand::Size &&
417+
"builtin operands should be trasformed into VC intrinsic "
418+
"vector_atomic_slm "
419+
"intrinsic operands without changes");
420+
421+
auto *RetVTy = cast<IGCLLVM::FixedVectorType>(&RetTy);
422+
auto *MaskVTy =
423+
IGCLLVM::FixedVectorType::get(IRB.getInt1Ty(), RetVTy->getNumElements());
424+
auto *MaskV = IRB.CreateTrunc(Operands[AtomicSlmOperand::Mask], MaskVTy);
425+
426+
std::vector<Value *> Args = Operands;
427+
428+
Args[AtomicSlmOperand::Mask] = MaskV;
429+
430+
auto *Decl = getAnyDeclarationForIdFromArgs(
431+
RetTy, Args, IntrinsicForBuiltin[BuiltinID::AtomicSlm],
432+
*IRB.GetInsertBlock()->getModule());
433+
auto *CI = IRB.CreateCall(Decl, Args, RetTy.isVoidTy() ? "" : "cmcl.builtin");
434+
return *CI;
435+
}
436+
437+
template <>
438+
Value &
439+
createMainInst<BuiltinID::AtomicUgm>(const std::vector<Value *> &Operands,
440+
Type &RetTy, IRBuilder<> &IRB) {
441+
assert(Operands.size() == AtomicUgmOperand::Size &&
442+
"builtin operands should be trasformed into VC intrinsic "
443+
"vector_atomic_ugm "
444+
"intrinsic operands without changes");
445+
446+
auto *RetVTy = cast<IGCLLVM::FixedVectorType>(&RetTy);
447+
auto *MaskVTy =
448+
IGCLLVM::FixedVectorType::get(IRB.getInt1Ty(), RetVTy->getNumElements());
449+
auto *MaskV = IRB.CreateTrunc(Operands[AtomicUgmOperand::Mask], MaskVTy);
450+
451+
std::vector<Value *> Args = Operands;
452+
453+
Args[AtomicUgmOperand::Mask] = MaskV;
454+
455+
auto *Decl = getAnyDeclarationForIdFromArgs(
456+
RetTy, Args, IntrinsicForBuiltin[BuiltinID::AtomicUgm],
457+
*IRB.GetInsertBlock()->getModule());
458+
auto *CI = IRB.CreateCall(Decl, Args, RetTy.isVoidTy() ? "" : "cmcl.builtin");
459+
return *CI;
460+
}
461+
462+
387463
//----------------------- Rounding operations ----------------------------//
388464
template <>
389465
Value &createMainInst<BuiltinID::Ceil>(const std::vector<Value *> &Operands,

IGC/VectorCompiler/CMCL/lib/Support/TranslationDescription.json

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,117 @@
218218
]
219219
}
220220
},
221+
"AtomicBti": {
222+
"Name": "vector_atomic_bti",
223+
"Operands": [
224+
{"Name": "Mask", "Kind": "Input"},
225+
{"Name": "Opcode", "Kind": "Input"},
226+
{"Name": "AddrSize", "Kind": "Input"},
227+
{"Name": "ElemSize", "Kind": "Input"},
228+
{"Name": "L1CacheControl", "Kind": "Input"},
229+
{"Name": "L3CacheControl", "Kind": "Input"},
230+
{"Name": "Bti", "Kind": "Input"},
231+
{"Name": "Index", "Kind": "Input"},
232+
{"Name": "Scale", "Kind": "Input"},
233+
{"Name": "Offset", "Kind": "Input"},
234+
{"Name": "Src1", "Kind": "Input"},
235+
{"Name": "Src2", "Kind": "Input"},
236+
{"Name": "Passthru", "Kind": "Input"}
237+
],
238+
"TranslateInto": {
239+
"VC-Intrinsic": "lsc_atomic_bti",
240+
"ReturnType": {"GetBuiltinReturnType": []},
241+
"Operands": [
242+
{"GetBuiltinOperand": ["Mask"]},
243+
{"GetBuiltinOperand": ["Opcode"]},
244+
{"GetBuiltinOperand": ["AddrSize"]},
245+
{"GetBuiltinOperand": ["ElemSize"]},
246+
{"GetBuiltinOperand": ["L1CacheControl"]},
247+
{"GetBuiltinOperand": ["L3CacheControl"]},
248+
{"GetBuiltinOperand": ["Bti"]},
249+
{"GetBuiltinOperand": ["Index"]},
250+
{"GetBuiltinOperand": ["Scale"]},
251+
{"GetBuiltinOperand": ["Offset"]},
252+
{"GetBuiltinOperand": ["Src1"]},
253+
{"GetBuiltinOperand": ["Src2"]},
254+
{"GetBuiltinOperand": ["Passthru"]}
255+
]
256+
}
257+
},
258+
"AtomicSlm": {
259+
"Name": "vector_atomic_slm",
260+
"Operands": [
261+
{"Name": "Mask", "Kind": "Input"},
262+
{"Name": "Opcode", "Kind": "Input"},
263+
{"Name": "AddrSize", "Kind": "Input"},
264+
{"Name": "ElemSize", "Kind": "Input"},
265+
{"Name": "L1CacheControl", "Kind": "Input"},
266+
{"Name": "L3CacheControl", "Kind": "Input"},
267+
{"Name": "Base", "Kind": "Input"},
268+
{"Name": "Index", "Kind": "Input"},
269+
{"Name": "Scale", "Kind": "Input"},
270+
{"Name": "Offset", "Kind": "Input"},
271+
{"Name": "Src1", "Kind": "Input"},
272+
{"Name": "Src2", "Kind": "Input"},
273+
{"Name": "Passthru", "Kind": "Input"}
274+
],
275+
"TranslateInto": {
276+
"VC-Intrinsic": "lsc_atomic_slm",
277+
"ReturnType": {"GetBuiltinReturnType": []},
278+
"Operands": [
279+
{"GetBuiltinOperand": ["Mask"]},
280+
{"GetBuiltinOperand": ["Opcode"]},
281+
{"GetBuiltinOperand": ["AddrSize"]},
282+
{"GetBuiltinOperand": ["ElemSize"]},
283+
{"GetBuiltinOperand": ["L1CacheControl"]},
284+
{"GetBuiltinOperand": ["L3CacheControl"]},
285+
{"GetBuiltinOperand": ["Base"]},
286+
{"GetBuiltinOperand": ["Index"]},
287+
{"GetBuiltinOperand": ["Scale"]},
288+
{"GetBuiltinOperand": ["Offset"]},
289+
{"GetBuiltinOperand": ["Src1"]},
290+
{"GetBuiltinOperand": ["Src2"]},
291+
{"GetBuiltinOperand": ["Passthru"]}
292+
]
293+
}
294+
},
295+
"AtomicUgm": {
296+
"Name": "vector_atomic_ugm",
297+
"Operands": [
298+
{"Name": "Mask", "Kind": "Input"},
299+
{"Name": "Opcode", "Kind": "Input"},
300+
{"Name": "AddrSize", "Kind": "Input"},
301+
{"Name": "ElemSize", "Kind": "Input"},
302+
{"Name": "L1CacheControl", "Kind": "Input"},
303+
{"Name": "L3CacheControl", "Kind": "Input"},
304+
{"Name": "Base", "Kind": "Input"},
305+
{"Name": "Index", "Kind": "Input"},
306+
{"Name": "Scale", "Kind": "Input"},
307+
{"Name": "Offset", "Kind": "Input"},
308+
{"Name": "Src1", "Kind": "Input"},
309+
{"Name": "Src2", "Kind": "Input"},
310+
{"Name": "Passthru", "Kind": "Input"}
311+
],
312+
"TranslateInto": {
313+
"VC-Intrinsic": "lsc_atomic_ugm",
314+
"ReturnType": {"GetBuiltinReturnType": []},
315+
"Operands": [
316+
{"GetBuiltinOperand": ["Mask"]},
317+
{"GetBuiltinOperand": ["Opcode"]},
318+
{"GetBuiltinOperand": ["AddrSize"]},
319+
{"GetBuiltinOperand": ["ElemSize"]},
320+
{"GetBuiltinOperand": ["L1CacheControl"]},
321+
{"GetBuiltinOperand": ["L3CacheControl"]},
322+
{"GetBuiltinOperand": ["Base"]},
323+
{"GetBuiltinOperand": ["Index"]},
324+
{"GetBuiltinOperand": ["Scale"]},
325+
{"GetBuiltinOperand": ["Offset"]},
326+
{"GetBuiltinOperand": ["Src1"]},
327+
{"GetBuiltinOperand": ["Src2"]},
328+
{"GetBuiltinOperand": ["Passthru"]}
329+
]
330+
}
331+
},
221332
"All": {
222333
"Name": "all",
223334
"Operands": [

IGC/VectorCompiler/lib/BiF/CMakeLists.txt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,9 @@ set(BUILTIN_SOURCES
5353

5454
Library/Math/Integer/i64divrem.cpp
5555
Library/Math/Integer/sdivrem.cpp
56-
Library/Math/Integer/udivrem.cpp)
56+
Library/Math/Integer/udivrem.cpp
57+
58+
Library/Atomics/Local/binop.cpp)
5759

5860
vc_embed_optimized_bif(BUILTINS_CPP_PATH "${BUILTIN_SOURCES}" VCBuiltins 64)
5961

Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
/*========================== begin_copyright_notice ============================
2+
3+
Copyright (C) 2023 Intel Corporation
4+
5+
SPDX-License-Identifier: MIT
6+
7+
============================= end_copyright_notice ===========================*/
8+
9+
#include <cm-cl/vector.h>
10+
11+
using namespace cm;
12+
13+
namespace {
14+
15+
// Value are taken from LSC_OP enum
16+
// Source/visa/include/visa_igc_common_header.h
17+
enum class AtomicOp : char {
18+
Add = 0x0C,
19+
Sub = 0x0D,
20+
Xchg = 0x0B,
21+
And = 0x18,
22+
Or = 0x19,
23+
Xor = 0x1A,
24+
SMin = 0x0E,
25+
SMax = 0x0F,
26+
UMin = 0x10,
27+
UMax = 0x11,
28+
Dec = 0x09,
29+
Inc = 0x08,
30+
Load = 0x0A,
31+
};
32+
33+
template <int N>
34+
CM_NODEBUG CM_INLINE vector<uint64_t, N>
35+
__impl_atomic_local_binop(mask<N> pred, AtomicOp op, char l1cachecontrol,
36+
char l3cachecontrol, int base, vector<int, N> index,
37+
short scale, int offset, vector<uint64_t, N> src,
38+
vector<uint64_t, N> passthru) {
39+
vector<int, N> addr = base + index * scale + offset;
40+
vector<uint64_t, N> laddr = addr;
41+
vector<uint64_t, N> orig =
42+
detail::__cm_cl_gather(3, laddr.cl_vector(), sizeof(uint64_t),
43+
pred.cl_vector(), passthru.cl_vector());
44+
45+
// Value should be equal to LSC_ATOMIC_ICAS from
46+
// Source/visa/include/visa_igc_common_header.h
47+
constexpr char OpcodeICAS = 0x12;
48+
49+
// Value should be equal to LSC_ADDR_SIZE_32b from
50+
// Source/visa/include/visa_igc_common_header.h
51+
constexpr char AddrSize = 2;
52+
53+
// Value should be equal to LSC_DATA_SIZE_64b from
54+
// Source/visa/include/visa_igc_common_header.h
55+
constexpr char DataSize = 4;
56+
57+
do {
58+
vector<uint64_t, N> newval = orig;
59+
switch (op) {
60+
case AtomicOp::Add:
61+
newval += src;
62+
break;
63+
case AtomicOp::Sub:
64+
newval -= src;
65+
break;
66+
case AtomicOp::And:
67+
newval &= src;
68+
break;
69+
case AtomicOp::Or:
70+
newval |= src;
71+
break;
72+
case AtomicOp::Xor:
73+
newval ^= src;
74+
break;
75+
case AtomicOp::Xchg:
76+
newval = src;
77+
break;
78+
case AtomicOp::SMin: {
79+
vector<int64_t, N> ssrc = src.template format<int64_t>();
80+
vector<int64_t, N> snewval = newval.template format<int64_t>();
81+
newval.merge(src, ssrc < snewval);
82+
} break;
83+
case AtomicOp::SMax: {
84+
vector<int64_t, N> ssrc = src.template format<int64_t>();
85+
vector<int64_t, N> snewval = newval.template format<int64_t>();
86+
newval.merge(src, ssrc > snewval);
87+
} break;
88+
case AtomicOp::UMin:
89+
newval.merge(src, src < newval);
90+
break;
91+
case AtomicOp::UMax:
92+
newval.merge(src, src > newval);
93+
break;
94+
case AtomicOp::Inc:
95+
newval = newval + 1;
96+
break;
97+
case AtomicOp::Dec:
98+
newval = newval - 1;
99+
break;
100+
case AtomicOp::Load:
101+
break;
102+
default:
103+
break;
104+
}
105+
106+
vector<uint64_t, N> res = detail::__cm_cl_vector_atomic_slm(
107+
pred.cl_vector(), OpcodeICAS, AddrSize, DataSize, l1cachecontrol,
108+
l3cachecontrol, 0, addr.cl_vector(), 1, 0, orig.cl_vector(),
109+
newval.cl_vector(), orig.cl_vector());
110+
pred &= res != orig;
111+
orig = res;
112+
} while (pred.any());
113+
114+
return orig;
115+
}
116+
117+
} // namespace
118+
119+
#define ATOMIC(WIDTH) \
120+
CM_NODEBUG CM_INLINE extern "C" cl_vector<uint64_t, WIDTH> \
121+
__vc_builtin_atomic_slm_v##WIDTH##i64( \
122+
cl_vector<char, WIDTH> pred, AtomicOp op, char l1cachecontrol, \
123+
char l3cachecontrol, int base, cl_vector<int, WIDTH> index, \
124+
short scale, int offset, cl_vector<uint64_t, WIDTH> src, \
125+
cl_vector<uint64_t, WIDTH> passthru) { \
126+
mask<WIDTH> vpred{pred}; \
127+
vector<int, WIDTH> vindex{index}; \
128+
vector<uint64_t, WIDTH> vsrc{src}; \
129+
vector<uint64_t, WIDTH> vpassthru{passthru}; \
130+
return __impl_atomic_local_binop<WIDTH>(vpred, op, l1cachecontrol, \
131+
l3cachecontrol, base, vindex, \
132+
scale, offset, vsrc, vpassthru) \
133+
.cl_vector(); \
134+
}
135+
136+
ATOMIC(1)
137+
ATOMIC(2)
138+
ATOMIC(4)
139+
ATOMIC(8)
140+
ATOMIC(16)
141+
ATOMIC(32)

0 commit comments

Comments
 (0)