@@ -11,19 +11,32 @@ void square2(double** x, int sstart, int send, int sinc, int tstart, int tend, i
1111
1212
1313// CHECK: func @square2(%arg0: memref<?xmemref<?xf64>>, %arg1: i32, %arg2: i32, %arg3: i32, %arg4: i32, %arg5: i32, %arg6: i32) attributes {llvm.linkage = #llvm.linkage<external>} {
14- // CHECK-NEXT: %0 = arith.index_cast %arg1 : i32 to index
15- // CHECK-NEXT: %1 = arith.index_cast %arg4 : i32 to index
16- // CHECK-NEXT: %2 = arith.index_cast %arg2 : i32 to index
17- // CHECK-NEXT: %3 = arith.index_cast %arg5 : i32 to index
18- // CHECK-NEXT: %4 = arith.index_cast %arg3 : i32 to index
19- // CHECK-NEXT: %5 = arith.index_cast %arg6 : i32 to index
20- // CHECK-NEXT: scf.parallel (%arg7, %arg8) = (%0, %1) to (%2, %3) step (%4, %5) {
21- // CHECK-NEXT: %6 = arith.index_cast %arg7 : index to i64
22- // CHECK-NEXT: %7 = arith.index_cast %arg8 : index to i64
23- // CHECK-NEXT: %8 = memref.load %arg0[%arg7] : memref<?xmemref<?xf64>>
24- // CHECK-NEXT: %9 = arith.addi %6, %7 : i64
25- // CHECK-NEXT: %10 = arith.sitofp %9 : i64 to f64
26- // CHECK-NEXT: memref.store %10, %8[%arg8] : memref<?xf64>
14+ // CHECK-NEXT: %c1 = arith.constant 1 : index
15+ // CHECK-DAG: %[[i0:.+]] = arith.index_cast %arg1 : i32 to index
16+ // CHECK-DAG: %[[i1:.+]] = arith.index_cast %arg2 : i32 to index
17+ // CHECK-DAG: %[[i2:.+]] = arith.index_cast %arg3 : i32 to index
18+ // CHECK-DAG: %[[i3:.+]] = arith.index_cast %arg4 : i32 to index
19+ // CHECK-DAG: %[[i4:.+]] = arith.index_cast %arg5 : i32 to index
20+ // CHECK-DAG: %[[i5:.+]] = arith.index_cast %arg6 : i32 to index
21+ // CHECK-DAG: %6 = arith.subi %[[i1]], %[[i0]] : index
22+ // CHECK-NEXT: %7 = arith.subi %6, %c1 : index
23+ // CHECK-NEXT: %8 = arith.addi %7, %[[i2]] : index
24+ // CHECK-NEXT: %9 = arith.divui %8, %[[i2]] : index
25+ // CHECK-NEXT: %10 = arith.muli %9, %[[i2]] : index
26+ // CHECK-NEXT: %11 = arith.addi %[[i0]], %10 : index
27+ // CHECK-NEXT: %12 = arith.subi %[[i4]], %[[i3]] : index
28+ // CHECK-NEXT: %13 = arith.subi %12, %c1 : index
29+ // CHECK-NEXT: %14 = arith.addi %13, %[[i5]] : index
30+ // CHECK-NEXT: %15 = arith.divui %14, %[[i5]] : index
31+ // CHECK-NEXT: %16 = arith.muli %15, %[[i5]] : index
32+ // CHECK-NEXT: %17 = arith.addi %[[i3:.+]], %16 : index
33+ // CHECK-NEXT: scf.parallel (%arg7, %arg8) = (%[[i0]], %[[i3]]) to (%11, %17) step (%[[i2]], %[[i5]]) {
34+ // CHECK-NEXT: %18 = arith.index_cast %arg7 : index to i64
35+ // CHECK-NEXT: %19 = arith.index_cast %arg8 : index to i64
36+ // CHECK-NEXT: %20 = memref.load %arg0[%arg7] : memref<?xmemref<?xf64>>
37+ // CHECK-NEXT: %21 = arith.addi %18, %19 : i64
38+ // CHECK-NEXT: %22 = arith.sitofp %21 : i64 to f64
39+ // CHECK-NEXT: memref.store %22, %20[%arg8] : memref<?xf64>
2740// CHECK-NEXT: scf.yield
2841// CHECK-NEXT: }
2942// CHECK-NEXT: return
0 commit comments