[llvm] [LoopVectorize] In LoopVectorize.cpp start using getSymbolicMaxBackedgeTakenCount (PR #108833)
David Sherwood via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 1 09:57:30 PDT 2024
https://github.com/david-arm updated https://github.com/llvm/llvm-project/pull/108833
>From d8c2e12d224f775ad0d101190bf54c2f6f88f32b Mon Sep 17 00:00:00 2001
From: David Sherwood <david.sherwood at arm.com>
Date: Mon, 16 Sep 2024 14:09:02 +0000
Subject: [PATCH 1/4] [LoopVectorize] In LoopVectorize start using
getSymbolicMaxBackedgeTakenCount
LoopVectorizationLegality currently only treats a loop as legal
to vectorise if PredicatedScalarEvolution::getBackedgeTakenCount
returns a valid SCEV, or more precisely that the loop must have
an exact backedge taken count. Therefore, in LoopVectorize.cpp
we can safely replace all calls to getBackedgeTakenCount with
calls to getSymbolicMaxBackedgeTakenCount, since the result is
the same.
This also helps prepare the loop vectoriser for PR #88385.
---
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 7 ++++++-
llvm/lib/Transforms/Vectorize/VPlan.cpp | 13 +++++++++----
2 files changed, 15 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 08e78cb49c69fc..353c35efea776d 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -4054,7 +4054,12 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
unsigned MaxVFtimesIC =
UserIC ? *MaxPowerOf2RuntimeVF * UserIC : *MaxPowerOf2RuntimeVF;
ScalarEvolution *SE = PSE.getSE();
- const SCEV *BackedgeTakenCount = PSE.getBackedgeTakenCount();
+ // Currently only loops with countable exits are vectorized so it's safe to
+ // use getSymbolicMaxBackedgeTakenCount as it should give the same result
+ // as getBackedgeTakenCount.
+ const SCEV *BackedgeTakenCount = PSE.getSymbolicMaxBackedgeTakenCount();
+ assert(!isa<SCEVCouldNotCompute>(BackedgeTakenCount) &&
+ "Invalid loop count");
const SCEV *ExitCount = SE->getAddExpr(
BackedgeTakenCount, SE->getOne(BackedgeTakenCount->getType()));
const SCEV *Rem = SE->getURemExpr(
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 6ddbfcf0ecfe58..4316c7dd9b6430 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -880,11 +880,16 @@ VPlanPtr VPlan::createInitialVPlan(Type *InductionTy,
auto Plan = std::make_unique<VPlan>(Entry, VecPreheader);
// Create SCEV and VPValue for the trip count.
- const SCEV *BackedgeTakenCount = PSE.getBackedgeTakenCount();
- assert(!isa<SCEVCouldNotCompute>(BackedgeTakenCount) && "Invalid loop count");
+
+ // Using getSymbolicMaxBackedgeTakenCount instead of getBackedgeTakenCount,
+ // since they should be identical as we currently only vectorize loops when
+ // all exits are countable.
+ const SCEV *BackedgeTakenCountSCEV = PSE.getSymbolicMaxBackedgeTakenCount();
+ assert(!isa<SCEVCouldNotCompute>(BackedgeTakenCountSCEV) &&
+ "Invalid loop count");
ScalarEvolution &SE = *PSE.getSE();
- const SCEV *TripCount =
- SE.getTripCountFromExitCount(BackedgeTakenCount, InductionTy, TheLoop);
+ const SCEV *TripCount = SE.getTripCountFromExitCount(BackedgeTakenCountSCEV,
+ InductionTy, TheLoop);
Plan->TripCount =
vputils::getOrCreateVPValueForSCEVExpr(*Plan, TripCount, SE);
>From d12dbae1750515389753d7b3d3c39f1c9d78ce06 Mon Sep 17 00:00:00 2001
From: David Sherwood <david.sherwood at arm.com>
Date: Mon, 30 Sep 2024 12:12:56 +0000
Subject: [PATCH 2/4] Address review comment
* Updated comments around calls to getSymbolicMaxBackedgeTakenCount
---
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 7 ++++---
llvm/lib/Transforms/Vectorize/VPlan.cpp | 7 ++++---
2 files changed, 8 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 353c35efea776d..b02ea32348107a 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -4054,9 +4054,10 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
unsigned MaxVFtimesIC =
UserIC ? *MaxPowerOf2RuntimeVF * UserIC : *MaxPowerOf2RuntimeVF;
ScalarEvolution *SE = PSE.getSE();
- // Currently only loops with countable exits are vectorized so it's safe to
- // use getSymbolicMaxBackedgeTakenCount as it should give the same result
- // as getBackedgeTakenCount.
+ // Currently only loops with countable exits are vectorized, but calling
+ // getSymbolicMaxBackedgeTakenCount allows enablement work for loops with
+ // uncountable exits whilst also ensuring the symbolic maximum and known
+ // back-edge taken count remain identical for loops with countable exits.
const SCEV *BackedgeTakenCount = PSE.getSymbolicMaxBackedgeTakenCount();
assert(!isa<SCEVCouldNotCompute>(BackedgeTakenCount) &&
"Invalid loop count");
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 4316c7dd9b6430..096ad3ba4768cb 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -881,9 +881,10 @@ VPlanPtr VPlan::createInitialVPlan(Type *InductionTy,
// Create SCEV and VPValue for the trip count.
- // Using getSymbolicMaxBackedgeTakenCount instead of getBackedgeTakenCount,
- // since they should be identical as we currently only vectorize loops when
- // all exits are countable.
+ // Currently only loops with countable exits are vectorized, but calling
+ // getSymbolicMaxBackedgeTakenCount allows enablement work for loops with
+ // uncountable exits whilst also ensuring the symbolic maximum and known
+ // back-edge taken count remain identical for loops with countable exits.
const SCEV *BackedgeTakenCountSCEV = PSE.getSymbolicMaxBackedgeTakenCount();
assert(!isa<SCEVCouldNotCompute>(BackedgeTakenCountSCEV) &&
"Invalid loop count");
>From 4c124211ebe0d632f44c19f7f1a1abaa4294f701 Mon Sep 17 00:00:00 2001
From: David Sherwood <david.sherwood at arm.com>
Date: Tue, 1 Oct 2024 09:11:16 +0000
Subject: [PATCH 3/4] Address review comments
* Updated assert.
* Add outer loop vectorisation test that has an early exit in the
outer loop. I checked and there are no existing tests for this. It
fails in LoopVectorizationLegality so we never reach the point
in LoopVectorize.cpp where attempt to calculate the backedge taken
count.
---
.../Transforms/Vectorize/LoopVectorize.cpp | 2 +-
llvm/lib/Transforms/Vectorize/VPlan.cpp | 2 +-
.../LoopVectorize/outer_loop_early_exit.ll | 49 +++++++++++++++++++
3 files changed, 51 insertions(+), 2 deletions(-)
create mode 100644 llvm/test/Transforms/LoopVectorize/outer_loop_early_exit.ll
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index b02ea32348107a..d1a4b657b9369b 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -4059,7 +4059,7 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
// uncountable exits whilst also ensuring the symbolic maximum and known
// back-edge taken count remain identical for loops with countable exits.
const SCEV *BackedgeTakenCount = PSE.getSymbolicMaxBackedgeTakenCount();
- assert(!isa<SCEVCouldNotCompute>(BackedgeTakenCount) &&
+ assert(BackedgeTakenCount == PSE.getBackedgeTakenCount() &&
"Invalid loop count");
const SCEV *ExitCount = SE->getAddExpr(
BackedgeTakenCount, SE->getOne(BackedgeTakenCount->getType()));
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 096ad3ba4768cb..6e2d6e92c076aa 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -886,7 +886,7 @@ VPlanPtr VPlan::createInitialVPlan(Type *InductionTy,
// uncountable exits whilst also ensuring the symbolic maximum and known
// back-edge taken count remain identical for loops with countable exits.
const SCEV *BackedgeTakenCountSCEV = PSE.getSymbolicMaxBackedgeTakenCount();
- assert(!isa<SCEVCouldNotCompute>(BackedgeTakenCountSCEV) &&
+ assert(BackedgeTakenCountSCEV == PSE.getBackedgeTakenCount() &&
"Invalid loop count");
ScalarEvolution &SE = *PSE.getSE();
const SCEV *TripCount = SE.getTripCountFromExitCount(BackedgeTakenCountSCEV,
diff --git a/llvm/test/Transforms/LoopVectorize/outer_loop_early_exit.ll b/llvm/test/Transforms/LoopVectorize/outer_loop_early_exit.ll
new file mode 100644
index 00000000000000..e4aa82c0678524
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/outer_loop_early_exit.ll
@@ -0,0 +1,49 @@
+; REQUIRES: asserts
+; RUN: opt -S -passes=loop-vectorize -enable-vplan-native-path -disable-output -debug 2>&1 < %s | FileCheck %s
+
+; CHECK-LABEL: LV: Found a loop: for.body
+; CHECK: LV: Not vectorizing: Unsupported conditional branch.
+; CHECK: loop not vectorized: loop control flow is not understood by vectorizer
+; CHECK: LV: Not vectorizing: Unsupported outer loop.
+
+ at arr2 = external global [8 x i32], align 16
+ at arr = external global [8 x [8 x i32]], align 16
+
+define i32 @foo(i32 %n) {
+entry:
+ br label %for.body
+
+for.body:
+ %indvars.iv21 = phi i64 [ 0, %entry ], [ %indvars.iv.next22, %for.inc ]
+ %arrayidx = getelementptr inbounds [8 x i32], ptr @arr2, i64 0, i64 %indvars.iv21
+ %ld1 = load i32, ptr %arrayidx, align 4
+ %0 = trunc i64 %indvars.iv21 to i32
+ store i32 %0, ptr %arrayidx, align 4
+ %1 = trunc i64 %indvars.iv21 to i32
+ %add = add nsw i32 %1, %n
+ %cmp.early = icmp eq i32 %ld1, 3
+ br i1 %cmp.early, label %for.early, label %for.body.inner
+
+for.body.inner:
+ %indvars.iv = phi i64 [ 0, %for.body ], [ %indvars.iv.next, %for.body.inner ]
+ %arrayidx7 = getelementptr inbounds [8 x [8 x i32]], ptr @arr, i64 0, i64 %indvars.iv, i64 %indvars.iv21
+ store i32 %add, ptr %arrayidx7, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, 8
+ br i1 %exitcond, label %for.inc, label %for.body.inner
+
+for.inc:
+ %indvars.iv.next22 = add nuw nsw i64 %indvars.iv21, 1
+ %exitcond23 = icmp eq i64 %indvars.iv.next22, 8
+ br i1 %exitcond23, label %for.end, label %for.body, !llvm.loop !1
+
+for.early:
+ ret i32 1
+
+for.end:
+ ret i32 0
+}
+
+!1 = distinct !{!1, !2, !3}
+!2 = !{!"llvm.loop.vectorize.width", i32 4}
+!3 = !{!"llvm.loop.vectorize.enable", i1 true}
>From ef5f9634a7b6543effc5c5c6cdd3bcde9c301bf4 Mon Sep 17 00:00:00 2001
From: David Sherwood <david.sherwood at arm.com>
Date: Tue, 1 Oct 2024 16:54:09 +0000
Subject: [PATCH 4/4] Re-add
"assert(!isa<SCEVCouldNotCompute>(BackedgeTakenCount)"
---
llvm/lib/Transforms/Vectorize/VPlan.cpp | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 6e2d6e92c076aa..a7fa1d32843c10 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -886,7 +886,8 @@ VPlanPtr VPlan::createInitialVPlan(Type *InductionTy,
// uncountable exits whilst also ensuring the symbolic maximum and known
// back-edge taken count remain identical for loops with countable exits.
const SCEV *BackedgeTakenCountSCEV = PSE.getSymbolicMaxBackedgeTakenCount();
- assert(BackedgeTakenCountSCEV == PSE.getBackedgeTakenCount() &&
+ assert((!isa<SCEVCouldNotCompute>(BackedgeTakenCountSCEV) &&
+ BackedgeTakenCountSCEV == PSE.getBackedgeTakenCount()) &&
"Invalid loop count");
ScalarEvolution &SE = *PSE.getSE();
const SCEV *TripCount = SE.getTripCountFromExitCount(BackedgeTakenCountSCEV,
More information about the llvm-commits
mailing list