Skip to content

Commit 5eb8d29

Browse files
authored
[AMDGPU][GlobalISel] Add RegBankLegalize support for G_BLOCK_ADDR and G_GLOBAL_VALUE (#165340)
1 parent e67ac07 commit 5eb8d29

File tree

7 files changed

+136
-6
lines changed

7 files changed

+136
-6
lines changed

llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -873,6 +873,7 @@ LLT RegBankLegalizeHelper::getTyFromID(RegBankLLTMappingApplyID ID) {
873873
case Sgpr128:
874874
case Vgpr128:
875875
return LLT::scalar(128);
876+
case SgprP0:
876877
case VgprP0:
877878
return LLT::pointer(0, 64);
878879
case SgprP1:
@@ -887,6 +888,8 @@ LLT RegBankLegalizeHelper::getTyFromID(RegBankLLTMappingApplyID ID) {
887888
case SgprP5:
888889
case VgprP5:
889890
return LLT::pointer(5, 32);
891+
case SgprP8:
892+
return LLT::pointer(8, 128);
890893
case SgprV2S16:
891894
case VgprV2S16:
892895
case UniInVgprV2S16:
@@ -972,10 +975,12 @@ RegBankLegalizeHelper::getRegBankFromID(RegBankLLTMappingApplyID ID) {
972975
case Sgpr32_WF:
973976
case Sgpr64:
974977
case Sgpr128:
978+
case SgprP0:
975979
case SgprP1:
976980
case SgprP3:
977981
case SgprP4:
978982
case SgprP5:
983+
case SgprP8:
979984
case SgprPtr32:
980985
case SgprPtr64:
981986
case SgprPtr128:
@@ -1055,10 +1060,12 @@ void RegBankLegalizeHelper::applyMappingDst(
10551060
case Sgpr32:
10561061
case Sgpr64:
10571062
case Sgpr128:
1063+
case SgprP0:
10581064
case SgprP1:
10591065
case SgprP3:
10601066
case SgprP4:
10611067
case SgprP5:
1068+
case SgprP8:
10621069
case SgprV2S16:
10631070
case SgprV2S32:
10641071
case SgprV4S32:
@@ -1198,10 +1205,12 @@ void RegBankLegalizeHelper::applyMappingSrc(
11981205
case Sgpr32:
11991206
case Sgpr64:
12001207
case Sgpr128:
1208+
case SgprP0:
12011209
case SgprP1:
12021210
case SgprP3:
12031211
case SgprP4:
12041212
case SgprP5:
1213+
case SgprP8:
12051214
case SgprV2S16:
12061215
case SgprV2S32:
12071216
case SgprV4S32: {

llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,8 @@ bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID,
6666
return MRI.getType(Reg) == LLT::pointer(4, 64);
6767
case P5:
6868
return MRI.getType(Reg) == LLT::pointer(5, 32);
69+
case P8:
70+
return MRI.getType(Reg) == LLT::pointer(8, 128);
6971
case Ptr32:
7072
return isAnyPtr(MRI.getType(Reg), 32);
7173
case Ptr64:
@@ -108,6 +110,8 @@ bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID,
108110
return MRI.getType(Reg) == LLT::pointer(4, 64) && MUI.isUniform(Reg);
109111
case UniP5:
110112
return MRI.getType(Reg) == LLT::pointer(5, 32) && MUI.isUniform(Reg);
113+
case UniP8:
114+
return MRI.getType(Reg) == LLT::pointer(8, 128) && MUI.isUniform(Reg);
111115
case UniPtr32:
112116
return isAnyPtr(MRI.getType(Reg), 32) && MUI.isUniform(Reg);
113117
case UniPtr64:
@@ -918,6 +922,15 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
918922
addRulesForGOpcs({G_READSTEADYCOUNTER, G_READCYCLECOUNTER}, Standard)
919923
.Uni(S64, {{Sgpr64}, {}});
920924

925+
addRulesForGOpcs({G_BLOCK_ADDR}).Any({{UniP0}, {{SgprP0}, {}}});
926+
927+
addRulesForGOpcs({G_GLOBAL_VALUE})
928+
.Any({{UniP0}, {{SgprP0}, {}}})
929+
.Any({{UniP1}, {{SgprP1}, {}}})
930+
.Any({{UniP3}, {{SgprP3}, {}}})
931+
.Any({{UniP4}, {{SgprP4}, {}}})
932+
.Any({{UniP8}, {{SgprP8}, {}}});
933+
921934
bool hasSALUFloat = ST->hasSALUFloatInsts();
922935

923936
addRulesForGOpcs({G_FADD}, Standard)

llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ enum UniformityLLTOpPredicateID {
6363
P3,
6464
P4,
6565
P5,
66+
P8,
6667
Ptr32,
6768
Ptr64,
6869
Ptr128,
@@ -72,6 +73,7 @@ enum UniformityLLTOpPredicateID {
7273
UniP3,
7374
UniP4,
7475
UniP5,
76+
UniP8,
7577
UniPtr32,
7678
UniPtr64,
7779
UniPtr128,
@@ -136,10 +138,12 @@ enum RegBankLLTMappingApplyID {
136138
Sgpr32,
137139
Sgpr64,
138140
Sgpr128,
141+
SgprP0,
139142
SgprP1,
140143
SgprP3,
141144
SgprP4,
142145
SgprP5,
146+
SgprP8,
143147
SgprPtr32,
144148
SgprPtr64,
145149
SgprPtr128,
Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
2+
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN %s
3+
4+
@flat = external global i32, align 4
5+
@global = external addrspace(1) global i32, align 4
6+
@lds = addrspace(3) global i32 poison, align 4
7+
@constant = external addrspace(4) constant i32, align 4
8+
@buf = external addrspace(8) global i8
9+
10+
define ptr @global_value_as0_external() {
11+
; GCN-LABEL: global_value_as0_external:
12+
; GCN: ; %bb.0:
13+
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14+
; GCN-NEXT: s_getpc_b64 s[4:5]
15+
; GCN-NEXT: s_add_u32 s4, s4, flat@gotpcrel32@lo+4
16+
; GCN-NEXT: s_addc_u32 s5, s5, flat@gotpcrel32@hi+12
17+
; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
18+
; GCN-NEXT: s_waitcnt lgkmcnt(0)
19+
; GCN-NEXT: v_mov_b32_e32 v0, s4
20+
; GCN-NEXT: v_mov_b32_e32 v1, s5
21+
; GCN-NEXT: s_setpc_b64 s[30:31]
22+
ret ptr @flat
23+
}
24+
25+
define ptr addrspace(1) @global_value_as1_external() {
26+
; GCN-LABEL: global_value_as1_external:
27+
; GCN: ; %bb.0:
28+
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
29+
; GCN-NEXT: s_getpc_b64 s[4:5]
30+
; GCN-NEXT: s_add_u32 s4, s4, global@gotpcrel32@lo+4
31+
; GCN-NEXT: s_addc_u32 s5, s5, global@gotpcrel32@hi+12
32+
; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
33+
; GCN-NEXT: s_waitcnt lgkmcnt(0)
34+
; GCN-NEXT: v_mov_b32_e32 v0, s4
35+
; GCN-NEXT: v_mov_b32_e32 v1, s5
36+
; GCN-NEXT: s_setpc_b64 s[30:31]
37+
ret ptr addrspace(1) @global
38+
}
39+
40+
define ptr addrspace(4) @global_value_as4_external() {
41+
; GCN-LABEL: global_value_as4_external:
42+
; GCN: ; %bb.0:
43+
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
44+
; GCN-NEXT: s_getpc_b64 s[4:5]
45+
; GCN-NEXT: s_add_u32 s4, s4, constant@gotpcrel32@lo+4
46+
; GCN-NEXT: s_addc_u32 s5, s5, constant@gotpcrel32@hi+12
47+
; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
48+
; GCN-NEXT: s_waitcnt lgkmcnt(0)
49+
; GCN-NEXT: v_mov_b32_e32 v0, s4
50+
; GCN-NEXT: v_mov_b32_e32 v1, s5
51+
; GCN-NEXT: s_setpc_b64 s[30:31]
52+
ret ptr addrspace(4) @constant
53+
}
54+
55+
define amdgpu_kernel void @global_value_as3_lds_kernel(ptr addrspace(1) %out) {
56+
; GCN-LABEL: global_value_as3_lds_kernel:
57+
; GCN: ; %bb.0:
58+
; GCN-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
59+
; GCN-NEXT: v_mov_b32_e32 v0, 0
60+
; GCN-NEXT: s_waitcnt lgkmcnt(0)
61+
; GCN-NEXT: global_store_dword v0, v0, s[0:1]
62+
; GCN-NEXT: s_endpgm
63+
%addr = ptrtoint ptr addrspace(3) @lds to i32
64+
store i32 %addr, ptr addrspace(1) %out
65+
ret void
66+
}
67+
68+
define void @global_value_as8_buffer_store(i32 %val) {
69+
; GCN-LABEL: global_value_as8_buffer_store:
70+
; GCN: ; %bb.0:
71+
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
72+
; GCN-NEXT: s_getpc_b64 s[8:9]
73+
; GCN-NEXT: s_add_u32 s8, s8, buf@gotpcrel32@lo+4
74+
; GCN-NEXT: s_addc_u32 s9, s9, buf@gotpcrel32@hi+12
75+
; GCN-NEXT: s_load_dwordx4 s[4:7], s[8:9], 0x0
76+
; GCN-NEXT: s_waitcnt lgkmcnt(0)
77+
; GCN-NEXT: buffer_store_dword v0, off, s[4:7], 0
78+
; GCN-NEXT: s_waitcnt vmcnt(0)
79+
; GCN-NEXT: s_setpc_b64 s[30:31]
80+
call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 %val, ptr addrspace(8) @buf, i32 0, i32 0, i32 0)
81+
ret void
82+
}
83+
84+
define i32 @global_value_as8_buffer_load(i32 %offset) {
85+
; GCN-LABEL: global_value_as8_buffer_load:
86+
; GCN: ; %bb.0:
87+
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
88+
; GCN-NEXT: s_getpc_b64 s[8:9]
89+
; GCN-NEXT: s_add_u32 s8, s8, buf@gotpcrel32@lo+4
90+
; GCN-NEXT: s_addc_u32 s9, s9, buf@gotpcrel32@hi+12
91+
; GCN-NEXT: s_load_dwordx4 s[4:7], s[8:9], 0x0
92+
; GCN-NEXT: s_waitcnt lgkmcnt(0)
93+
; GCN-NEXT: buffer_load_dword v0, v0, s[4:7], 0 offen
94+
; GCN-NEXT: s_waitcnt vmcnt(0)
95+
; GCN-NEXT: s_setpc_b64 s[30:31]
96+
%val = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) @buf, i32 %offset, i32 0, i32 0)
97+
ret i32 %val
98+
}
99+
100+
declare void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32, ptr addrspace(8) nocapture writeonly, i32, i32, i32 immarg) #0
101+
declare i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) nocapture readonly, i32, i32, i32 immarg) #1
102+
103+
attributes #0 = { nocallback nofree nosync nounwind willreturn memory(argmem: write) }
104+
attributes #1 = { nocallback nofree nosync nounwind willreturn memory(argmem: read) }

llvm/test/CodeGen/AMDGPU/GlobalISel/lds-relocs.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -amdgpu-enable-lower-module-lds=0 -show-mc-encoding < %s | FileCheck -check-prefixes=GCN %s
1+
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -amdgpu-enable-lower-module-lds=0 -show-mc-encoding < %s | FileCheck -check-prefixes=GCN %s
22
; FIXME: Merge with DAG test
33

44
@lds.external = external unnamed_addr addrspace(3) global [0 x i32]

llvm/test/CodeGen/AMDGPU/GlobalISel/lds-zero-initializer.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
1-
; RUN: llc -mtriple=amdgcn -mcpu=tahiti -global-isel -stop-after=instruction-select -o - %s | FileCheck -check-prefixes=GCN,GFX8 %s
2-
; RUN: llc -mtriple=amdgcn -mcpu=tonga -global-isel -stop-after=instruction-select -o - %s | FileCheck -check-prefixes=GCN,GFX9 %s
1+
; RUN: llc -mtriple=amdgcn -mcpu=tahiti -global-isel -new-reg-bank-select -stop-after=instruction-select -o - %s | FileCheck -check-prefixes=GCN,GFX8 %s
2+
; RUN: llc -mtriple=amdgcn -mcpu=tonga -global-isel -new-reg-bank-select -stop-after=instruction-select -o - %s | FileCheck -check-prefixes=GCN,GFX9 %s
33

4-
; RUN: not llc -mtriple=amdgcn -mcpu=tahiti -global-isel < %s 2>&1 | FileCheck %s
5-
; RUN: not llc -mtriple=amdgcn -mcpu=tonga -global-isel < %s 2>&1 | FileCheck %s
4+
; RUN: not llc -mtriple=amdgcn -mcpu=tahiti -global-isel -new-reg-bank-select < %s 2>&1 | FileCheck %s
5+
; RUN: not llc -mtriple=amdgcn -mcpu=tonga -global-isel -new-reg-bank-select < %s 2>&1 | FileCheck %s
66

77
; CHECK: error: lds: unsupported initializer for address space
88

llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-block-addr.mir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2-
# RUN: llc -O0 -march amdgcn -mcpu=fiji -run-pass=regbankselect %s -o - | FileCheck %s
2+
# RUN: llc -O0 -march amdgcn -mcpu=fiji -run-pass="amdgpu-regbankselect,amdgpu-regbanklegalize" %s -o - | FileCheck %s
33

44
--- |
55

0 commit comments

Comments
 (0)