@@ -27,9 +27,8 @@ define <4 x float> @fadd_v4f32(<4 x i1> %b, <4 x float> noundef %x, <4 x float>
2727; AVX512VL: # %bb.0:
2828; AVX512VL-NEXT: vpslld $31, %xmm0, %xmm0
2929; AVX512VL-NEXT: vptestmd %xmm0, %xmm0, %k1
30- ; AVX512VL-NEXT: vbroadcastss {{.*#+}} xmm0 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
31- ; AVX512VL-NEXT: vmovaps %xmm2, %xmm0 {%k1}
32- ; AVX512VL-NEXT: vaddps %xmm0, %xmm1, %xmm0
30+ ; AVX512VL-NEXT: vaddps %xmm2, %xmm1, %xmm1 {%k1}
31+ ; AVX512VL-NEXT: vmovaps %xmm1, %xmm0
3332; AVX512VL-NEXT: retq
3433 %s = select <4 x i1 > %b , <4 x float > %y , <4 x float > <float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 >
3534 %r = fadd <4 x float > %x , %s
@@ -62,9 +61,8 @@ define <8 x float> @fadd_v8f32_commute(<8 x i1> %b, <8 x float> noundef %x, <8 x
6261; AVX512VL-NEXT: vpmovsxwd %xmm0, %ymm0
6362; AVX512VL-NEXT: vpslld $31, %ymm0, %ymm0
6463; AVX512VL-NEXT: vptestmd %ymm0, %ymm0, %k1
65- ; AVX512VL-NEXT: vbroadcastss {{.*#+}} ymm0 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
66- ; AVX512VL-NEXT: vmovaps %ymm2, %ymm0 {%k1}
67- ; AVX512VL-NEXT: vaddps %ymm1, %ymm0, %ymm0
64+ ; AVX512VL-NEXT: vaddps %ymm2, %ymm1, %ymm1 {%k1}
65+ ; AVX512VL-NEXT: vmovaps %ymm1, %ymm0
6866; AVX512VL-NEXT: retq
6967 %s = select <8 x i1 > %b , <8 x float > %y , <8 x float > <float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 >
7068 %r = fadd <8 x float > %s , %x
@@ -92,8 +90,8 @@ define <16 x float> @fadd_v16f32_swap(<16 x i1> %b, <16 x float> noundef %x, <16
9290; AVX512-NEXT: vpmovsxbd %xmm0, %zmm0
9391; AVX512-NEXT: vpslld $31, %zmm0, %zmm0
9492; AVX512-NEXT: vptestmd %zmm0, %zmm0, %k1
95- ; AVX512-NEXT: vbroadcastss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2 {%k1}
9693; AVX512-NEXT: vaddps %zmm2, %zmm1, %zmm0
94+ ; AVX512-NEXT: vmovaps %zmm1, %zmm0 {%k1}
9795; AVX512-NEXT: retq
9896 %s = select <16 x i1 > %b , <16 x float > <float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 >, <16 x float > %y
9997 %r = fadd <16 x float > %x , %s
@@ -121,8 +119,8 @@ define <16 x float> @fadd_v16f32_commute_swap(<16 x i1> %b, <16 x float> noundef
121119; AVX512-NEXT: vpmovsxbd %xmm0, %zmm0
122120; AVX512-NEXT: vpslld $31, %zmm0, %zmm0
123121; AVX512-NEXT: vptestmd %zmm0, %zmm0, %k1
124- ; AVX512-NEXT: vbroadcastss {{\.?LCPI[0-9]+_[0-9]+}}(%rip) , %zmm2 {%k1}
125- ; AVX512-NEXT: vaddps %zmm1, %zmm2, %zmm0
122+ ; AVX512-NEXT: vaddps %zmm2 , %zmm1, %zmm0
123+ ; AVX512-NEXT: vmovaps %zmm1, %zmm0 {%k1}
126124; AVX512-NEXT: retq
127125 %s = select <16 x i1 > %b , <16 x float > <float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 >, <16 x float > %y
128126 %r = fadd <16 x float > %s , %x
@@ -152,14 +150,16 @@ define <4 x float> @fsub_v4f32(<4 x i1> %b, <4 x float> noundef %x, <4 x float>
152150; AVX512VL: # %bb.0:
153151; AVX512VL-NEXT: vpslld $31, %xmm0, %xmm0
154152; AVX512VL-NEXT: vptestmd %xmm0, %xmm0, %k1
155- ; AVX512VL-NEXT: vmovaps %xmm2, %xmm0 {%k1} {z }
156- ; AVX512VL-NEXT: vsubps %xmm0, %xmm1, %xmm0
153+ ; AVX512VL-NEXT: vsubps %xmm2, %xmm1, %xmm1 {%k1 }
154+ ; AVX512VL-NEXT: vmovaps %xmm1, %xmm0
157155; AVX512VL-NEXT: retq
158156 %s = select <4 x i1 > %b , <4 x float > %y , <4 x float > zeroinitializer
159157 %r = fsub <4 x float > %x , %s
160158 ret <4 x float > %r
161159}
162160
161+ ; negative test - fsub is not commutative; there is no identity constant for operand 0
162+
163163define <8 x float > @fsub_v8f32_commute (<8 x i1 > %b , <8 x float > noundef %x , <8 x float > noundef %y ) {
164164; AVX2-LABEL: fsub_v8f32_commute:
165165; AVX2: # %bb.0:
@@ -214,15 +214,17 @@ define <16 x float> @fsub_v16f32_swap(<16 x i1> %b, <16 x float> noundef %x, <16
214214; AVX512: # %bb.0:
215215; AVX512-NEXT: vpmovsxbd %xmm0, %zmm0
216216; AVX512-NEXT: vpslld $31, %zmm0, %zmm0
217- ; AVX512-NEXT: vptestnmd %zmm0, %zmm0, %k1
218- ; AVX512-NEXT: vmovaps %zmm2, %zmm0 {%k1} {z}
219- ; AVX512-NEXT: vsubps %zmm0, % zmm1, %zmm0
217+ ; AVX512-NEXT: vptestmd %zmm0, %zmm0, %k1
218+ ; AVX512-NEXT: vsubps %zmm2, %zmm1, %zmm0
219+ ; AVX512-NEXT: vmovaps % zmm1, %zmm0 {%k1}
220220; AVX512-NEXT: retq
221221 %s = select <16 x i1 > %b , <16 x float > zeroinitializer , <16 x float > %y
222222 %r = fsub <16 x float > %x , %s
223223 ret <16 x float > %r
224224}
225225
226+ ; negative test - fsub is not commutative; there is no identity constant for operand 0
227+
226228define <16 x float > @fsub_v16f32_commute_swap (<16 x i1 > %b , <16 x float > noundef %x , <16 x float > noundef %y ) {
227229; AVX2-LABEL: fsub_v16f32_commute_swap:
228230; AVX2: # %bb.0:
@@ -570,9 +572,7 @@ define <8 x float> @fadd_v8f32_cast_cond(i8 noundef zeroext %pb, <8 x float> nou
570572; AVX512VL-LABEL: fadd_v8f32_cast_cond:
571573; AVX512VL: # %bb.0:
572574; AVX512VL-NEXT: kmovw %edi, %k1
573- ; AVX512VL-NEXT: vbroadcastss {{.*#+}} ymm2 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
574- ; AVX512VL-NEXT: vmovaps %ymm1, %ymm2 {%k1}
575- ; AVX512VL-NEXT: vaddps %ymm2, %ymm0, %ymm0
575+ ; AVX512VL-NEXT: vaddps %ymm1, %ymm0, %ymm0 {%k1}
576576; AVX512VL-NEXT: retq
577577 %b = bitcast i8 %pb to <8 x i1 >
578578 %s = select <8 x i1 > %b , <8 x float > %y , <8 x float > <float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 >
@@ -636,9 +636,7 @@ define <8 x double> @fadd_v8f64_cast_cond(i8 noundef zeroext %pb, <8 x double> n
636636; AVX512-LABEL: fadd_v8f64_cast_cond:
637637; AVX512: # %bb.0:
638638; AVX512-NEXT: kmovw %edi, %k1
639- ; AVX512-NEXT: vbroadcastsd {{.*#+}} zmm2 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
640- ; AVX512-NEXT: vmovapd %zmm1, %zmm2 {%k1}
641- ; AVX512-NEXT: vaddpd %zmm2, %zmm0, %zmm0
639+ ; AVX512-NEXT: vaddpd %zmm1, %zmm0, %zmm0 {%k1}
642640; AVX512-NEXT: retq
643641 %b = bitcast i8 %pb to <8 x i1 >
644642 %s = select <8 x i1 > %b , <8 x double > %y , <8 x double > <double -0 .0 , double -0 .0 , double -0 .0 , double -0 .0 , double -0 .0 , double -0 .0 , double -0 .0 , double -0 .0 >
@@ -709,8 +707,7 @@ define <8 x float> @fsub_v8f32_cast_cond(i8 noundef zeroext %pb, <8 x float> nou
709707; AVX512VL-LABEL: fsub_v8f32_cast_cond:
710708; AVX512VL: # %bb.0:
711709; AVX512VL-NEXT: kmovw %edi, %k1
712- ; AVX512VL-NEXT: vmovaps %ymm1, %ymm1 {%k1} {z}
713- ; AVX512VL-NEXT: vsubps %ymm1, %ymm0, %ymm0
710+ ; AVX512VL-NEXT: vsubps %ymm1, %ymm0, %ymm0 {%k1}
714711; AVX512VL-NEXT: retq
715712 %b = bitcast i8 %pb to <8 x i1 >
716713 %s = select <8 x i1 > %b , <8 x float > %y , <8 x float > zeroinitializer
@@ -775,8 +772,7 @@ define <8 x double> @fsub_v8f64_cast_cond(i8 noundef zeroext %pb, <8 x double> n
775772; AVX512-LABEL: fsub_v8f64_cast_cond:
776773; AVX512: # %bb.0:
777774; AVX512-NEXT: kmovw %edi, %k1
778- ; AVX512-NEXT: vmovapd %zmm1, %zmm1 {%k1} {z}
779- ; AVX512-NEXT: vsubpd %zmm1, %zmm0, %zmm0
775+ ; AVX512-NEXT: vsubpd %zmm1, %zmm0, %zmm0 {%k1}
780776; AVX512-NEXT: retq
781777 %b = bitcast i8 %pb to <8 x i1 >
782778 %s = select <8 x i1 > %b , <8 x double > %y , <8 x double > zeroinitializer
0 commit comments