Skip to content

Commit 6408703

Browse files
authored
[Polly] Retain vectorization for fallback loop when RTC is unsatisfiable (#165525)
When Polly generates a false runtime condition (RTC), the associated Polly generated loop is never executed and is eventually eliminated. As a result, the fallback loop becomes the default execution path. Disabling vectorization for this fallback loop will be counterproductive. This patch ensures that vectorization is only disabled when the RTC is not false (no Codegen failure).
1 parent f613801 commit 6408703

File tree

2 files changed

+43
-9
lines changed

2 files changed

+43
-9
lines changed

polly/lib/CodeGen/CodeGeneration.cpp

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -235,15 +235,6 @@ static bool generateCode(Scop &S, IslAstInfo &AI, LoopInfo &LI,
235235
NodeBuilder.allocateNewArrays(StartExitBlocks);
236236
Annotator.buildAliasScopes(S);
237237

238-
// The code below annotates the "llvm.loop.vectorize.enable" to false
239-
// for the code flow taken when RTCs fail. Because we don't want the
240-
// Loop Vectorizer to come in later and vectorize the original fall back
241-
// loop when Polly is enabled.
242-
for (Loop *L : LI.getLoopsInPreorder()) {
243-
if (S.contains(L))
244-
addStringMetadataToLoop(L, "llvm.loop.vectorize.enable", 0);
245-
}
246-
247238
if (PerfMonitoring) {
248239
PerfMonitor P(S, EnteringBB->getParent()->getParent());
249240
P.initialize();
@@ -285,6 +276,21 @@ static bool generateCode(Scop &S, IslAstInfo &AI, LoopInfo &LI,
285276

286277
Builder.GetInsertBlock()->getTerminator()->setOperand(0, RTC);
287278

279+
auto *CI = dyn_cast<ConstantInt>(RTC);
280+
// The code below annotates the "llvm.loop.vectorize.enable" to false
281+
// for the code flow taken when RTCs fail. Because we don't want the
282+
// Loop Vectorizer to come in later and vectorize the original fall back
283+
// loop when Polly is enabled. This avoids loop versioning on fallback
284+
// loop by Loop Vectorizer. Don't do this when Polly's RTC value is
285+
// false (due to code generation failure), as we are left with only one
286+
// version of Loop.
287+
if (!(CI && CI->isZero())) {
288+
for (Loop *L : LI.getLoopsInPreorder()) {
289+
if (S.contains(L))
290+
addStringMetadataToLoop(L, "llvm.loop.vectorize.enable", 0);
291+
}
292+
}
293+
288294
// Explicitly set the insert point to the end of the block to avoid that a
289295
// split at the builder's current
290296
// insert position would move the malloc calls to the wrong BasicBlock.
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
; RUN: opt %loadNPMPolly -S -passes=polly-codegen -polly-annotate-metadata-vectorize < %s | FileCheck %s
2+
; RUN: opt %loadNPMPolly -S -passes=polly-codegen < %s | FileCheck %s
3+
4+
; Verify vectorization is not disabled when RTC of Polly is false
5+
6+
; CHECK: attributes {{.*}} = { "polly-optimized" }
7+
; CHECK-NOT: {{.*}} = !{!"llvm.loop.vectorize.enable", i32 0}
8+
9+
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
10+
target triple = "aarch64-unknown-linux-android10000"
11+
12+
define void @ham(i64 %arg) {
13+
bb:
14+
br label %bb1
15+
16+
bb1: ; preds = %bb3, %bb
17+
%phi = phi ptr [ %getelementptr4, %bb3 ], [ null, %bb ]
18+
br label %bb2
19+
20+
bb2: ; preds = %bb2, %bb1
21+
%getelementptr = getelementptr i8, ptr %phi, i64 1
22+
store i8 0, ptr %getelementptr, align 1
23+
br i1 false, label %bb2, label %bb3
24+
25+
bb3: ; preds = %bb2
26+
%getelementptr4 = getelementptr i8, ptr %phi, i64 %arg
27+
br label %bb1
28+
}

0 commit comments

Comments
 (0)