Skip to content

Commit 36e9a0b

Browse files
authored
[AArch64][GlobalISel] Correct instructions for 64bit fneg constant vectors. (#166537)
This code was assuming that the vectors were 128bit. Add handling for 64bit vectors. Some of the tests do not apply yet due to not matching non-splat vectors. Fixes #166400
1 parent d4b41b9 commit 36e9a0b

File tree

2 files changed

+120
-26
lines changed

2 files changed

+120
-26
lines changed

llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5666,6 +5666,9 @@ AArch64InstructionSelector::emitConstantVector(Register Dst, Constant *CV,
56665666
MachineRegisterInfo &MRI) {
56675667
LLT DstTy = MRI.getType(Dst);
56685668
unsigned DstSize = DstTy.getSizeInBits();
5669+
assert((DstSize == 64 || DstSize == 128) &&
5670+
"Unexpected vector constant size");
5671+
56695672
if (CV->isNullValue()) {
56705673
if (DstSize == 128) {
56715674
auto Mov =
@@ -5735,17 +5738,24 @@ AArch64InstructionSelector::emitConstantVector(Register Dst, Constant *CV,
57355738
// Try to create the new constants with MOVI, and if so generate a fneg
57365739
// for it.
57375740
if (auto *NewOp = TryMOVIWithBits(NegBits)) {
5738-
Register NewDst = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
5741+
Register NewDst = MRI.createVirtualRegister(
5742+
DstSize == 64 ? &AArch64::FPR64RegClass : &AArch64::FPR128RegClass);
57395743
NewOp->getOperand(0).setReg(NewDst);
57405744
return MIRBuilder.buildInstr(NegOpc, {Dst}, {NewDst});
57415745
}
57425746
return nullptr;
57435747
};
57445748
MachineInstr *R;
5745-
if ((R = TryWithFNeg(DefBits, 32, AArch64::FNEGv4f32)) ||
5746-
(R = TryWithFNeg(DefBits, 64, AArch64::FNEGv2f64)) ||
5749+
if ((R = TryWithFNeg(DefBits, 32,
5750+
DstSize == 64 ? AArch64::FNEGv2f32
5751+
: AArch64::FNEGv4f32)) ||
5752+
(R = TryWithFNeg(DefBits, 64,
5753+
DstSize == 64 ? AArch64::FNEGDr
5754+
: AArch64::FNEGv2f64)) ||
57475755
(STI.hasFullFP16() &&
5748-
(R = TryWithFNeg(DefBits, 16, AArch64::FNEGv8f16))))
5756+
(R = TryWithFNeg(DefBits, 16,
5757+
DstSize == 64 ? AArch64::FNEGv4f16
5758+
: AArch64::FNEGv8f16))))
57495759
return R;
57505760
}
57515761

llvm/test/CodeGen/AArch64/neon-mov.ll

Lines changed: 106 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,15 @@ define <2 x i32> @movi2s_lsl16() {
7676
ret <2 x i32> <i32 16711680, i32 16711680>
7777
}
7878

79+
define <2 x i32> @movi2s_fneg() {
80+
; CHECK-LABEL: movi2s_fneg:
81+
; CHECK: // %bb.0:
82+
; CHECK-NEXT: movi v0.2s, #240, lsl #8
83+
; CHECK-NEXT: fneg v0.2s, v0.2s
84+
; CHECK-NEXT: ret
85+
ret <2 x i32> <i32 2147545088, i32 2147545088>
86+
}
87+
7988
define <2 x i32> @movi2s_lsl24() {
8089
; CHECK-LABEL: movi2s_lsl24:
8190
; CHECK: // %bb.0:
@@ -149,6 +158,33 @@ define <4 x i16> @movi4h_lsl8() {
149158
ret <4 x i16> <i16 65280, i16 65280, i16 65280, i16 65280>
150159
}
151160

161+
define <4 x i16> @movi4h_fneg() {
162+
; CHECK-NOFP16-SD-LABEL: movi4h_fneg:
163+
; CHECK-NOFP16-SD: // %bb.0:
164+
; CHECK-NOFP16-SD-NEXT: movi v0.4h, #127, lsl #8
165+
; CHECK-NOFP16-SD-NEXT: fneg v0.2s, v0.2s
166+
; CHECK-NOFP16-SD-NEXT: ret
167+
;
168+
; CHECK-FP16-SD-LABEL: movi4h_fneg:
169+
; CHECK-FP16-SD: // %bb.0:
170+
; CHECK-FP16-SD-NEXT: movi v0.4h, #127, lsl #8
171+
; CHECK-FP16-SD-NEXT: fneg v0.2s, v0.2s
172+
; CHECK-FP16-SD-NEXT: ret
173+
;
174+
; CHECK-NOFP16-GI-LABEL: movi4h_fneg:
175+
; CHECK-NOFP16-GI: // %bb.0:
176+
; CHECK-NOFP16-GI-NEXT: adrp x8, .LCPI18_0
177+
; CHECK-NOFP16-GI-NEXT: ldr d0, [x8, :lo12:.LCPI18_0]
178+
; CHECK-NOFP16-GI-NEXT: ret
179+
;
180+
; CHECK-FP16-GI-LABEL: movi4h_fneg:
181+
; CHECK-FP16-GI: // %bb.0:
182+
; CHECK-FP16-GI-NEXT: adrp x8, .LCPI18_0
183+
; CHECK-FP16-GI-NEXT: ldr d0, [x8, :lo12:.LCPI18_0]
184+
; CHECK-FP16-GI-NEXT: ret
185+
ret <4 x i16> <i16 32512, i16 65280, i16 32512, i16 65280>
186+
}
187+
152188
define <8 x i16> @movi8h_lsl0() {
153189
; CHECK-LABEL: movi8h_lsl0:
154190
; CHECK: // %bb.0:
@@ -180,14 +216,14 @@ define <8 x i16> @movi8h_fneg() {
180216
;
181217
; CHECK-NOFP16-GI-LABEL: movi8h_fneg:
182218
; CHECK-NOFP16-GI: // %bb.0:
183-
; CHECK-NOFP16-GI-NEXT: adrp x8, .LCPI19_0
184-
; CHECK-NOFP16-GI-NEXT: ldr q0, [x8, :lo12:.LCPI19_0]
219+
; CHECK-NOFP16-GI-NEXT: adrp x8, .LCPI21_0
220+
; CHECK-NOFP16-GI-NEXT: ldr q0, [x8, :lo12:.LCPI21_0]
185221
; CHECK-NOFP16-GI-NEXT: ret
186222
;
187223
; CHECK-FP16-GI-LABEL: movi8h_fneg:
188224
; CHECK-FP16-GI: // %bb.0:
189-
; CHECK-FP16-GI-NEXT: adrp x8, .LCPI19_0
190-
; CHECK-FP16-GI-NEXT: ldr q0, [x8, :lo12:.LCPI19_0]
225+
; CHECK-FP16-GI-NEXT: adrp x8, .LCPI21_0
226+
; CHECK-FP16-GI-NEXT: ldr q0, [x8, :lo12:.LCPI21_0]
191227
; CHECK-FP16-GI-NEXT: ret
192228
ret <8 x i16> <i16 32512, i16 65280, i16 32512, i16 65280, i16 32512, i16 65280, i16 32512, i16 65280>
193229
}
@@ -275,6 +311,27 @@ define <4 x i16> @mvni4h_lsl8() {
275311
ret <4 x i16> <i16 61439, i16 61439, i16 61439, i16 61439>
276312
}
277313

314+
define <4 x i16> @mvni4h_neg() {
315+
; CHECK-NOFP16-SD-LABEL: mvni4h_neg:
316+
; CHECK-NOFP16-SD: // %bb.0:
317+
; CHECK-NOFP16-SD-NEXT: mov w8, #33008 // =0x80f0
318+
; CHECK-NOFP16-SD-NEXT: dup v0.4h, w8
319+
; CHECK-NOFP16-SD-NEXT: ret
320+
;
321+
; CHECK-FP16-LABEL: mvni4h_neg:
322+
; CHECK-FP16: // %bb.0:
323+
; CHECK-FP16-NEXT: movi v0.4h, #240
324+
; CHECK-FP16-NEXT: fneg v0.4h, v0.4h
325+
; CHECK-FP16-NEXT: ret
326+
;
327+
; CHECK-NOFP16-GI-LABEL: mvni4h_neg:
328+
; CHECK-NOFP16-GI: // %bb.0:
329+
; CHECK-NOFP16-GI-NEXT: adrp x8, .LCPI32_0
330+
; CHECK-NOFP16-GI-NEXT: ldr d0, [x8, :lo12:.LCPI32_0]
331+
; CHECK-NOFP16-GI-NEXT: ret
332+
ret <4 x i16> <i16 33008, i16 33008, i16 33008, i16 33008>
333+
}
334+
278335
define <8 x i16> @mvni8h_lsl0() {
279336
; CHECK-LABEL: mvni8h_lsl0:
280337
; CHECK: // %bb.0:
@@ -306,8 +363,8 @@ define <8 x i16> @mvni8h_neg() {
306363
;
307364
; CHECK-NOFP16-GI-LABEL: mvni8h_neg:
308365
; CHECK-NOFP16-GI: // %bb.0:
309-
; CHECK-NOFP16-GI-NEXT: adrp x8, .LCPI32_0
310-
; CHECK-NOFP16-GI-NEXT: ldr q0, [x8, :lo12:.LCPI32_0]
366+
; CHECK-NOFP16-GI-NEXT: adrp x8, .LCPI35_0
367+
; CHECK-NOFP16-GI-NEXT: ldr q0, [x8, :lo12:.LCPI35_0]
311368
; CHECK-NOFP16-GI-NEXT: ret
312369
ret <8 x i16> <i16 33008, i16 33008, i16 33008, i16 33008, i16 33008, i16 33008, i16 33008, i16 33008>
313370
}
@@ -486,6 +543,33 @@ define <2 x double> @fmov2d_neg0() {
486543
ret <2 x double> <double -0.0, double -0.0>
487544
}
488545

546+
define <1 x double> @fmov1d_neg0() {
547+
; CHECK-NOFP16-SD-LABEL: fmov1d_neg0:
548+
; CHECK-NOFP16-SD: // %bb.0:
549+
; CHECK-NOFP16-SD-NEXT: movi d0, #0000000000000000
550+
; CHECK-NOFP16-SD-NEXT: fneg d0, d0
551+
; CHECK-NOFP16-SD-NEXT: ret
552+
;
553+
; CHECK-FP16-SD-LABEL: fmov1d_neg0:
554+
; CHECK-FP16-SD: // %bb.0:
555+
; CHECK-FP16-SD-NEXT: movi d0, #0000000000000000
556+
; CHECK-FP16-SD-NEXT: fneg d0, d0
557+
; CHECK-FP16-SD-NEXT: ret
558+
;
559+
; CHECK-NOFP16-GI-LABEL: fmov1d_neg0:
560+
; CHECK-NOFP16-GI: // %bb.0:
561+
; CHECK-NOFP16-GI-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000
562+
; CHECK-NOFP16-GI-NEXT: fmov d0, x8
563+
; CHECK-NOFP16-GI-NEXT: ret
564+
;
565+
; CHECK-FP16-GI-LABEL: fmov1d_neg0:
566+
; CHECK-FP16-GI: // %bb.0:
567+
; CHECK-FP16-GI-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000
568+
; CHECK-FP16-GI-NEXT: fmov d0, x8
569+
; CHECK-FP16-GI-NEXT: ret
570+
ret <1 x double> <double -0.0>
571+
}
572+
489573
define <2 x i32> @movi1d_1() {
490574
; CHECK-NOFP16-SD-LABEL: movi1d_1:
491575
; CHECK-NOFP16-SD: // %bb.0:
@@ -499,14 +583,14 @@ define <2 x i32> @movi1d_1() {
499583
;
500584
; CHECK-NOFP16-GI-LABEL: movi1d_1:
501585
; CHECK-NOFP16-GI: // %bb.0:
502-
; CHECK-NOFP16-GI-NEXT: adrp x8, .LCPI52_0
503-
; CHECK-NOFP16-GI-NEXT: ldr d0, [x8, :lo12:.LCPI52_0]
586+
; CHECK-NOFP16-GI-NEXT: adrp x8, .LCPI56_0
587+
; CHECK-NOFP16-GI-NEXT: ldr d0, [x8, :lo12:.LCPI56_0]
504588
; CHECK-NOFP16-GI-NEXT: ret
505589
;
506590
; CHECK-FP16-GI-LABEL: movi1d_1:
507591
; CHECK-FP16-GI: // %bb.0:
508-
; CHECK-FP16-GI-NEXT: adrp x8, .LCPI52_0
509-
; CHECK-FP16-GI-NEXT: ldr d0, [x8, :lo12:.LCPI52_0]
592+
; CHECK-FP16-GI-NEXT: adrp x8, .LCPI56_0
593+
; CHECK-FP16-GI-NEXT: ldr d0, [x8, :lo12:.LCPI56_0]
510594
; CHECK-FP16-GI-NEXT: ret
511595
ret <2 x i32> <i32 -65536, i32 65535>
512596
}
@@ -517,31 +601,31 @@ define <2 x i32> @movi1d() {
517601
; CHECK-NOFP16-SD-LABEL: movi1d:
518602
; CHECK-NOFP16-SD: // %bb.0:
519603
; CHECK-NOFP16-SD-NEXT: movi d1, #0x00ffffffff0000
520-
; CHECK-NOFP16-SD-NEXT: adrp x8, .LCPI53_0
521-
; CHECK-NOFP16-SD-NEXT: ldr d0, [x8, :lo12:.LCPI53_0]
604+
; CHECK-NOFP16-SD-NEXT: adrp x8, .LCPI57_0
605+
; CHECK-NOFP16-SD-NEXT: ldr d0, [x8, :lo12:.LCPI57_0]
522606
; CHECK-NOFP16-SD-NEXT: b test_movi1d
523607
;
524608
; CHECK-FP16-SD-LABEL: movi1d:
525609
; CHECK-FP16-SD: // %bb.0:
526610
; CHECK-FP16-SD-NEXT: movi d1, #0x00ffffffff0000
527-
; CHECK-FP16-SD-NEXT: adrp x8, .LCPI53_0
528-
; CHECK-FP16-SD-NEXT: ldr d0, [x8, :lo12:.LCPI53_0]
611+
; CHECK-FP16-SD-NEXT: adrp x8, .LCPI57_0
612+
; CHECK-FP16-SD-NEXT: ldr d0, [x8, :lo12:.LCPI57_0]
529613
; CHECK-FP16-SD-NEXT: b test_movi1d
530614
;
531615
; CHECK-NOFP16-GI-LABEL: movi1d:
532616
; CHECK-NOFP16-GI: // %bb.0:
533-
; CHECK-NOFP16-GI-NEXT: adrp x8, .LCPI53_1
534-
; CHECK-NOFP16-GI-NEXT: adrp x9, .LCPI53_0
535-
; CHECK-NOFP16-GI-NEXT: ldr d0, [x8, :lo12:.LCPI53_1]
536-
; CHECK-NOFP16-GI-NEXT: ldr d1, [x9, :lo12:.LCPI53_0]
617+
; CHECK-NOFP16-GI-NEXT: adrp x8, .LCPI57_1
618+
; CHECK-NOFP16-GI-NEXT: adrp x9, .LCPI57_0
619+
; CHECK-NOFP16-GI-NEXT: ldr d0, [x8, :lo12:.LCPI57_1]
620+
; CHECK-NOFP16-GI-NEXT: ldr d1, [x9, :lo12:.LCPI57_0]
537621
; CHECK-NOFP16-GI-NEXT: b test_movi1d
538622
;
539623
; CHECK-FP16-GI-LABEL: movi1d:
540624
; CHECK-FP16-GI: // %bb.0:
541-
; CHECK-FP16-GI-NEXT: adrp x8, .LCPI53_1
542-
; CHECK-FP16-GI-NEXT: adrp x9, .LCPI53_0
543-
; CHECK-FP16-GI-NEXT: ldr d0, [x8, :lo12:.LCPI53_1]
544-
; CHECK-FP16-GI-NEXT: ldr d1, [x9, :lo12:.LCPI53_0]
625+
; CHECK-FP16-GI-NEXT: adrp x8, .LCPI57_1
626+
; CHECK-FP16-GI-NEXT: adrp x9, .LCPI57_0
627+
; CHECK-FP16-GI-NEXT: ldr d0, [x8, :lo12:.LCPI57_1]
628+
; CHECK-FP16-GI-NEXT: ldr d1, [x9, :lo12:.LCPI57_0]
545629
; CHECK-FP16-GI-NEXT: b test_movi1d
546630
%1 = tail call <2 x i32> @test_movi1d(<2 x i32> <i32 -2147483648, i32 2147450880>, <2 x i32> <i32 -65536, i32 65535>)
547631
ret <2 x i32> %1

0 commit comments

Comments
 (0)