11; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
22; RUN: opt < %s -slp-vectorizer -S -mtriple=x86_64-- | FileCheck %s
33
4- define <4 x i8 > @pr52275 (<4 x i8 > %v , i8* %x ) {
5- ; CHECK-LABEL: @pr52275(
6- ; CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds i8, i8* [[X:%.*]], i64 1
7- ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[X]] to <2 x i8>*
8- ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i8>, <2 x i8>* [[TMP1]], align 4
9- ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i8> [[TMP2]], <2 x i8> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
10- ; CHECK-NEXT: [[V11:%.*]] = shufflevector <4 x i8> [[V:%.*]], <4 x i8> [[TMP3]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
11- ; CHECK-NEXT: [[V2:%.*]] = add <4 x i8> [[V11]], [[V11]]
12- ; CHECK-NEXT: ret <4 x i8> [[V2]]
13- ;
4+ define <4 x i8 > @test (<4 x i8 > %v , i8* %x ) {
145 %x0 = load i8 , i8* %x , align 4
156 %g1 = getelementptr inbounds i8 , i8* %x , i64 1
167 %x1 = load i8 , i8* %g1 , align 4
@@ -19,3 +10,139 @@ define <4 x i8> @pr52275(<4 x i8> %v, i8* %x) {
1910 %v2 = add <4 x i8 > %v0 , %v1
2011 ret <4 x i8 > %v2
2112}
13+
14+ define <2 x i8 > @test2 (<2 x i8 > %t6 , i32* %t1 ) {
15+ ; CHECK-LABEL: @test2(
16+ ; CHECK-NEXT: [[T3:%.*]] = load i32, i32* [[T1:%.*]], align 4
17+ ; CHECK-NEXT: [[T4:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 1
18+ ; CHECK-NEXT: [[T5:%.*]] = load i32, i32* [[T4]], align 4
19+ ; CHECK-NEXT: [[T7:%.*]] = trunc i32 [[T3]] to i8
20+ ; CHECK-NEXT: [[T8:%.*]] = insertelement <2 x i8> [[T6:%.*]], i8 [[T7]], i64 0
21+ ; CHECK-NEXT: [[T9:%.*]] = trunc i32 [[T5]] to i8
22+ ; CHECK-NEXT: [[T10:%.*]] = insertelement <2 x i8> [[T8]], i8 [[T9]], i64 1
23+ ; CHECK-NEXT: [[T11:%.*]] = add <2 x i8> [[T10]], [[T8]]
24+ ; CHECK-NEXT: ret <2 x i8> [[T11]]
25+ ;
26+ ; FORCE_SLP-LABEL: @test2(
27+ ; FORCE_SLP-NEXT: [[T3:%.*]] = load i32, i32* [[T1:%.*]], align 4
28+ ; FORCE_SLP-NEXT: [[T4:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 1
29+ ; FORCE_SLP-NEXT: [[T5:%.*]] = load i32, i32* [[T4]], align 4
30+ ; FORCE_SLP-NEXT: [[T7:%.*]] = trunc i32 [[T3]] to i8
31+ ; FORCE_SLP-NEXT: [[T8:%.*]] = insertelement <2 x i8> [[T6:%.*]], i8 [[T7]], i64 0
32+ ; FORCE_SLP-NEXT: [[T9:%.*]] = trunc i32 [[T5]] to i8
33+ ; FORCE_SLP-NEXT: [[T10:%.*]] = insertelement <2 x i8> [[T8]], i8 [[T9]], i64 1
34+ ; FORCE_SLP-NEXT: [[T11:%.*]] = add <2 x i8> [[T10]], [[T8]]
35+ ; FORCE_SLP-NEXT: ret <2 x i8> [[T11]]
36+ ;
37+ %t3 = load i32 , i32* %t1 , align 4
38+ %t4 = getelementptr inbounds i32 , i32* %t1 , i64 1
39+ %t5 = load i32 , i32* %t4 , align 4
40+ %t7 = trunc i32 %t3 to i8
41+ %t8 = insertelement <2 x i8 > %t6 , i8 %t7 , i64 0
42+ %t9 = trunc i32 %t5 to i8
43+ %t10 = insertelement <2 x i8 > %t8 , i8 %t9 , i64 1
44+ %t11 = add <2 x i8 > %t10 , %t8
45+ ret <2 x i8 > %t11
46+ }
47+
48+ define <2 x i8 > @test_reorder (<2 x i8 > %t6 , i32* %t1 ) {
49+ ; CHECK-LABEL: @test_reorder(
50+ ; CHECK-NEXT: [[T3:%.*]] = load i32, i32* [[T1:%.*]], align 4
51+ ; CHECK-NEXT: [[T4:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 1
52+ ; CHECK-NEXT: [[T5:%.*]] = load i32, i32* [[T4]], align 4
53+ ; CHECK-NEXT: [[T7:%.*]] = trunc i32 [[T3]] to i8
54+ ; CHECK-NEXT: [[T8:%.*]] = insertelement <2 x i8> [[T6:%.*]], i8 [[T7]], i64 1
55+ ; CHECK-NEXT: [[T9:%.*]] = trunc i32 [[T5]] to i8
56+ ; CHECK-NEXT: [[T10:%.*]] = insertelement <2 x i8> [[T8]], i8 [[T9]], i64 0
57+ ; CHECK-NEXT: [[T11:%.*]] = add <2 x i8> [[T10]], [[T8]]
58+ ; CHECK-NEXT: ret <2 x i8> [[T11]]
59+ ;
60+ ; FORCE_SLP-LABEL: @test_reorder(
61+ ; FORCE_SLP-NEXT: [[T3:%.*]] = load i32, i32* [[T1:%.*]], align 4
62+ ; FORCE_SLP-NEXT: [[T4:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 1
63+ ; FORCE_SLP-NEXT: [[T5:%.*]] = load i32, i32* [[T4]], align 4
64+ ; FORCE_SLP-NEXT: [[T7:%.*]] = trunc i32 [[T3]] to i8
65+ ; FORCE_SLP-NEXT: [[T8:%.*]] = insertelement <2 x i8> [[T6:%.*]], i8 [[T7]], i64 1
66+ ; FORCE_SLP-NEXT: [[T9:%.*]] = trunc i32 [[T5]] to i8
67+ ; FORCE_SLP-NEXT: [[T10:%.*]] = insertelement <2 x i8> [[T8]], i8 [[T9]], i64 0
68+ ; FORCE_SLP-NEXT: [[T11:%.*]] = add <2 x i8> [[T10]], [[T8]]
69+ ; FORCE_SLP-NEXT: ret <2 x i8> [[T11]]
70+ ;
71+ %t3 = load i32 , i32* %t1 , align 4
72+ %t4 = getelementptr inbounds i32 , i32* %t1 , i64 1
73+ %t5 = load i32 , i32* %t4 , align 4
74+ %t7 = trunc i32 %t3 to i8
75+ %t8 = insertelement <2 x i8 > %t6 , i8 %t7 , i64 1
76+ %t9 = trunc i32 %t5 to i8
77+ %t10 = insertelement <2 x i8 > %t8 , i8 %t9 , i64 0
78+ %t11 = add <2 x i8 > %t10 , %t8
79+ ret <2 x i8 > %t11
80+ }
81+
82+ define <4 x i8 > @test_subvector (<4 x i8 > %t6 , i32* %t1 ) {
83+ ; CHECK-LABEL: @test_subvector(
84+ ; CHECK-NEXT: [[T3:%.*]] = load i32, i32* [[T1:%.*]], align 4
85+ ; CHECK-NEXT: [[T4:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 1
86+ ; CHECK-NEXT: [[T5:%.*]] = load i32, i32* [[T4]], align 4
87+ ; CHECK-NEXT: [[T7:%.*]] = trunc i32 [[T3]] to i8
88+ ; CHECK-NEXT: [[T8:%.*]] = insertelement <4 x i8> [[T6:%.*]], i8 [[T7]], i64 0
89+ ; CHECK-NEXT: [[T9:%.*]] = trunc i32 [[T5]] to i8
90+ ; CHECK-NEXT: [[T10:%.*]] = insertelement <4 x i8> [[T8]], i8 [[T9]], i64 1
91+ ; CHECK-NEXT: [[T11:%.*]] = add <4 x i8> [[T10]], [[T8]]
92+ ; CHECK-NEXT: ret <4 x i8> [[T11]]
93+ ;
94+ ; FORCE_SLP-LABEL: @test_subvector(
95+ ; FORCE_SLP-NEXT: [[T3:%.*]] = load i32, i32* [[T1:%.*]], align 4
96+ ; FORCE_SLP-NEXT: [[T4:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 1
97+ ; FORCE_SLP-NEXT: [[T5:%.*]] = load i32, i32* [[T4]], align 4
98+ ; FORCE_SLP-NEXT: [[T7:%.*]] = trunc i32 [[T3]] to i8
99+ ; FORCE_SLP-NEXT: [[T8:%.*]] = insertelement <4 x i8> [[T6:%.*]], i8 [[T7]], i64 0
100+ ; FORCE_SLP-NEXT: [[T9:%.*]] = trunc i32 [[T5]] to i8
101+ ; FORCE_SLP-NEXT: [[T10:%.*]] = insertelement <4 x i8> [[T8]], i8 [[T9]], i64 1
102+ ; FORCE_SLP-NEXT: [[T11:%.*]] = add <4 x i8> [[T10]], [[T8]]
103+ ; FORCE_SLP-NEXT: ret <4 x i8> [[T11]]
104+ ;
105+ %t3 = load i32 , i32* %t1 , align 4
106+ %t4 = getelementptr inbounds i32 , i32* %t1 , i64 1
107+ %t5 = load i32 , i32* %t4 , align 4
108+ %t7 = trunc i32 %t3 to i8
109+ %t8 = insertelement <4 x i8 > %t6 , i8 %t7 , i64 0
110+ %t9 = trunc i32 %t5 to i8
111+ %t10 = insertelement <4 x i8 > %t8 , i8 %t9 , i64 1
112+ %t11 = add <4 x i8 > %t10 , %t8
113+ ret <4 x i8 > %t11
114+ }
115+
116+ define <4 x i8 > @test_subvector_reorder (<4 x i8 > %t6 , i32* %t1 ) {
117+ ; CHECK-LABEL: @test_subvector_reorder(
118+ ; CHECK-NEXT: [[T3:%.*]] = load i32, i32* [[T1:%.*]], align 4
119+ ; CHECK-NEXT: [[T4:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 1
120+ ; CHECK-NEXT: [[T5:%.*]] = load i32, i32* [[T4]], align 4
121+ ; CHECK-NEXT: [[T7:%.*]] = trunc i32 [[T3]] to i8
122+ ; CHECK-NEXT: [[T8:%.*]] = insertelement <4 x i8> [[T6:%.*]], i8 [[T7]], i64 3
123+ ; CHECK-NEXT: [[T9:%.*]] = trunc i32 [[T5]] to i8
124+ ; CHECK-NEXT: [[T10:%.*]] = insertelement <4 x i8> [[T8]], i8 [[T9]], i64 2
125+ ; CHECK-NEXT: [[T11:%.*]] = add <4 x i8> [[T10]], [[T8]]
126+ ; CHECK-NEXT: ret <4 x i8> [[T11]]
127+ ;
128+ ; FORCE_SLP-LABEL: @test_subvector_reorder(
129+ ; FORCE_SLP-NEXT: [[T3:%.*]] = load i32, i32* [[T1:%.*]], align 4
130+ ; FORCE_SLP-NEXT: [[T4:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 1
131+ ; FORCE_SLP-NEXT: [[T5:%.*]] = load i32, i32* [[T4]], align 4
132+ ; FORCE_SLP-NEXT: [[T7:%.*]] = trunc i32 [[T3]] to i8
133+ ; FORCE_SLP-NEXT: [[T8:%.*]] = insertelement <4 x i8> [[T6:%.*]], i8 [[T7]], i64 3
134+ ; FORCE_SLP-NEXT: [[T9:%.*]] = trunc i32 [[T5]] to i8
135+ ; FORCE_SLP-NEXT: [[T10:%.*]] = insertelement <4 x i8> [[T8]], i8 [[T9]], i64 2
136+ ; FORCE_SLP-NEXT: [[T11:%.*]] = add <4 x i8> [[T10]], [[T8]]
137+ ; FORCE_SLP-NEXT: ret <4 x i8> [[T11]]
138+ ;
139+ %t3 = load i32 , i32* %t1 , align 4
140+ %t4 = getelementptr inbounds i32 , i32* %t1 , i64 1
141+ %t5 = load i32 , i32* %t4 , align 4
142+ %t7 = trunc i32 %t3 to i8
143+ %t8 = insertelement <4 x i8 > %t6 , i8 %t7 , i64 3
144+ %t9 = trunc i32 %t5 to i8
145+ %t10 = insertelement <4 x i8 > %t8 , i8 %t9 , i64 2
146+ %t11 = add <4 x i8 > %t10 , %t8
147+ ret <4 x i8 > %t11
148+ }
0 commit comments