[llvm] [LV] Ignore some costs when loop gets fully unrolled (PR #106699)
Igor Kirillov via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 9 08:02:57 PST 2024
https://github.com/igogo-x86 updated https://github.com/llvm/llvm-project/pull/106699
>From d7a1335d2453d3a5c05f98616e8906320016a554 Mon Sep 17 00:00:00 2001
From: Igor Kirillov <igor.kirillov at arm.com>
Date: Fri, 30 Aug 2024 09:22:21 +0000
Subject: [PATCH 1/9] [LV] Ignore some costs when loop gets fully unrolled
When VF has a fixed width and equals the number of iterations, and we are not
tail folding by masking, comparison instruction and induction operation will
be DCEed later.
Ignoring the costs of these instructions improves the cost model.
---
.../Transforms/Vectorize/LoopVectorize.cpp | 20 +++++++++++++++++++
1 file changed, 20 insertions(+)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 37118702762956..b8c89a4683ee20 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7281,6 +7281,26 @@ LoopVectorizationPlanner::precomputeCosts(VPlan &Plan, ElementCount VF,
continue;
IVInsts.push_back(CI);
}
+
+ // If the given VF loop gets fully unrolled, ignore the costs of comparison
+ // and increment instruction, as they'll get simplified away
+ auto TC = CM.PSE.getSE()->getSmallConstantTripCount(OrigLoop);
+ auto *Cmp = OrigLoop->getLatchCmpInst();
+ if (Cmp && VF.isFixed() && VF.getFixedValue() == TC) {
+ CostCtx.SkipCostComputation.insert(Cmp);
+ for (Instruction *IVInst : IVInsts) {
+ bool IsSimplifiedAway = true;
+ for (auto *UIV : IVInst->users()) {
+ if (!Legal->isInductionVariable(UIV) && UIV != Cmp) {
+ IsSimplifiedAway = false;
+ break;
+ }
+ }
+ if (IsSimplifiedAway)
+ CostCtx.SkipCostComputation.insert(IVInst);
+ }
+ }
+
for (Instruction *IVInst : IVInsts) {
if (CostCtx.skipCostComputation(IVInst, VF.isVector()))
continue;
>From 22528cacb9cef4e9007f80684ae328dd7e0b2761 Mon Sep 17 00:00:00 2001
From: Igor Kirillov <igor.kirillov at arm.com>
Date: Wed, 2 Oct 2024 09:05:33 +0000
Subject: [PATCH 2/9] Update to pass assertion comparing two cost models
---
.../Transforms/Vectorize/LoopVectorize.cpp | 36 +++++++++++--------
1 file changed, 22 insertions(+), 14 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index b8c89a4683ee20..1eadf275d14187 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -5592,6 +5592,20 @@ InstructionCost LoopVectorizationCostModel::computePredInstDiscount(
InstructionCost LoopVectorizationCostModel::expectedCost(ElementCount VF) {
InstructionCost Cost;
+ // If with the given VF loop gets fully unrolled, ignore the costs of
+ // comparison and induction instructions, as they'll get simplified away
+ SmallPtrSet<const Value *, 16> ValuesToIgnoreForVF;
+ auto TC = PSE.getSE()->getSmallConstantTripCount(TheLoop);
+ auto *Cmp = TheLoop->getLatchCmpInst();
+ if (Cmp && TC == VF.getKnownMinValue()) {
+ ValuesToIgnoreForVF.insert(Cmp);
+ for (const auto &[IV, IndDesc] : Legal->getInductionVars()) {
+ Instruction *IVInc = cast<Instruction>(
+ IV->getIncomingValueForBlock(TheLoop->getLoopLatch()));
+ ValuesToIgnoreForVF.insert(IVInc);
+ }
+ }
+
// For each block.
for (BasicBlock *BB : TheLoop->blocks()) {
InstructionCost BlockCost;
@@ -5599,7 +5613,7 @@ InstructionCost LoopVectorizationCostModel::expectedCost(ElementCount VF) {
// For each instruction in the old loop.
for (Instruction &I : BB->instructionsWithoutDebug()) {
// Skip ignored values.
- if (ValuesToIgnore.count(&I) ||
+ if (ValuesToIgnore.count(&I) || ValuesToIgnoreForVF.count(&I) ||
(VF.isVector() && VecValuesToIgnore.count(&I)))
continue;
@@ -7282,22 +7296,16 @@ LoopVectorizationPlanner::precomputeCosts(VPlan &Plan, ElementCount VF,
IVInsts.push_back(CI);
}
- // If the given VF loop gets fully unrolled, ignore the costs of comparison
- // and increment instruction, as they'll get simplified away
+ // If with the given VF loop gets fully unrolled, ignore the costs of
+ // comparison and induction instructions, as they'll get simplified away
auto TC = CM.PSE.getSE()->getSmallConstantTripCount(OrigLoop);
auto *Cmp = OrigLoop->getLatchCmpInst();
- if (Cmp && VF.isFixed() && VF.getFixedValue() == TC) {
+ if (Cmp && TC == VF.getKnownMinValue()) {
CostCtx.SkipCostComputation.insert(Cmp);
- for (Instruction *IVInst : IVInsts) {
- bool IsSimplifiedAway = true;
- for (auto *UIV : IVInst->users()) {
- if (!Legal->isInductionVariable(UIV) && UIV != Cmp) {
- IsSimplifiedAway = false;
- break;
- }
- }
- if (IsSimplifiedAway)
- CostCtx.SkipCostComputation.insert(IVInst);
+ for (const auto &[IV, IndDesc] : Legal->getInductionVars()) {
+ Instruction *IVInc = cast<Instruction>(
+ IV->getIncomingValueForBlock(OrigLoop->getLoopLatch()));
+ CostCtx.SkipCostComputation.insert(IVInc);
}
}
>From 59f8c86a099204ab4093b78463a7b58a39af06a2 Mon Sep 17 00:00:00 2001
From: Igor Kirillov <igor.kirillov at arm.com>
Date: Tue, 12 Nov 2024 13:51:21 +0000
Subject: [PATCH 3/9] Check that induction variable has no unsimplifiable users
Add AArch64 test
---
.../Transforms/Vectorize/LoopVectorize.cpp | 59 ++++++++++++-------
1 file changed, 37 insertions(+), 22 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 1eadf275d14187..71d88da52328c9 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -2682,6 +2682,33 @@ static Value *getExpandedStep(const InductionDescriptor &ID,
return I->second;
}
+/// Knowing that loop \p L would be fully unrolled after vectorisation, add
+/// instructions that will get simplified and thus should not have any cost to
+/// \p InstsToIgnore
+static void AddFullyUnrolledInstructionsToIgnore(
+ Loop *L, const LoopVectorizationLegality::InductionList &IL,
+ SmallPtrSetImpl<Instruction *> &InstsToIgnore) {
+ auto *Cmp = L->getLatchCmpInst();
+ if (!Cmp)
+ return;
+ InstsToIgnore.insert(Cmp);
+ for (const auto &[IV, IndDesc] : IL) {
+ // Get next iteration value of the induction variable
+ Instruction *IVInst =
+ cast<Instruction>(IV->getIncomingValueForBlock(L->getLoopLatch()));
+ bool IsSimplifiedAway = true;
+ // Check that this value used only to exit the loop
+ for (auto *UIV : IVInst->users()) {
+ if (UIV != IV && UIV != Cmp) {
+ IsSimplifiedAway = false;
+ break;
+ }
+ }
+ if (IsSimplifiedAway)
+ InstsToIgnore.insert(IVInst);
+ }
+}
+
void InnerLoopVectorizer::createInductionResumeVPValues(
const SCEV2ValueTy &ExpandedSCEVs, Value *MainVectorTripCount,
SmallPtrSetImpl<PHINode *> *IVSubset) {
@@ -5592,19 +5619,13 @@ InstructionCost LoopVectorizationCostModel::computePredInstDiscount(
InstructionCost LoopVectorizationCostModel::expectedCost(ElementCount VF) {
InstructionCost Cost;
- // If with the given VF loop gets fully unrolled, ignore the costs of
- // comparison and induction instructions, as they'll get simplified away
- SmallPtrSet<const Value *, 16> ValuesToIgnoreForVF;
+ // If with the given fixed width VF loop gets fully unrolled, ignore the costs
+ // of comparison and induction instructions, as they'll get simplified away
+ SmallPtrSet<Instruction *, 2> ValuesToIgnoreForVF;
auto TC = PSE.getSE()->getSmallConstantTripCount(TheLoop);
- auto *Cmp = TheLoop->getLatchCmpInst();
- if (Cmp && TC == VF.getKnownMinValue()) {
- ValuesToIgnoreForVF.insert(Cmp);
- for (const auto &[IV, IndDesc] : Legal->getInductionVars()) {
- Instruction *IVInc = cast<Instruction>(
- IV->getIncomingValueForBlock(TheLoop->getLoopLatch()));
- ValuesToIgnoreForVF.insert(IVInc);
- }
- }
+ if (VF.isFixed() && TC == VF.getFixedValue())
+ AddFullyUnrolledInstructionsToIgnore(TheLoop, Legal->getInductionVars(),
+ ValuesToIgnoreForVF);
// For each block.
for (BasicBlock *BB : TheLoop->blocks()) {
@@ -7298,16 +7319,10 @@ LoopVectorizationPlanner::precomputeCosts(VPlan &Plan, ElementCount VF,
// If with the given VF loop gets fully unrolled, ignore the costs of
// comparison and induction instructions, as they'll get simplified away
- auto TC = CM.PSE.getSE()->getSmallConstantTripCount(OrigLoop);
- auto *Cmp = OrigLoop->getLatchCmpInst();
- if (Cmp && TC == VF.getKnownMinValue()) {
- CostCtx.SkipCostComputation.insert(Cmp);
- for (const auto &[IV, IndDesc] : Legal->getInductionVars()) {
- Instruction *IVInc = cast<Instruction>(
- IV->getIncomingValueForBlock(OrigLoop->getLoopLatch()));
- CostCtx.SkipCostComputation.insert(IVInc);
- }
- }
+ auto TC = PSE.getSE()->getSmallConstantTripCount(OrigLoop);
+ if (VF.isFixed() && TC == VF.getFixedValue())
+ AddFullyUnrolledInstructionsToIgnore(OrigLoop, Legal->getInductionVars(),
+ CostCtx.SkipCostComputation);
for (Instruction *IVInst : IVInsts) {
if (CostCtx.skipCostComputation(IVInst, VF.isVector()))
>From a4948b859b34a34c37460617809b1ef0dba9b5f4 Mon Sep 17 00:00:00 2001
From: Igor Kirillov <igor.kirillov at arm.com>
Date: Fri, 15 Nov 2024 14:35:22 +0000
Subject: [PATCH 4/9] Addressing suggestions
* Fixing comments
* Adding more tests
* Remove cmp latch presence requirements
---
.../Transforms/Vectorize/LoopVectorize.cpp | 42 +++++++++----------
1 file changed, 19 insertions(+), 23 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 71d88da52328c9..3c1baf3d2e32f8 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -2682,29 +2682,21 @@ static Value *getExpandedStep(const InductionDescriptor &ID,
return I->second;
}
-/// Knowing that loop \p L would be fully unrolled after vectorisation, add
-/// instructions that will get simplified and thus should not have any cost to
-/// \p InstsToIgnore
-static void AddFullyUnrolledInstructionsToIgnore(
+/// Knowing that loop \p L executes a single vector iteration, add instructions
+/// that will get simplified and thus should not have any cost to \p
+/// InstsToIgnore.
+static void addFullyUnrolledInstructionsToIgnore(
Loop *L, const LoopVectorizationLegality::InductionList &IL,
SmallPtrSetImpl<Instruction *> &InstsToIgnore) {
auto *Cmp = L->getLatchCmpInst();
- if (!Cmp)
- return;
- InstsToIgnore.insert(Cmp);
+ if (Cmp)
+ InstsToIgnore.insert(Cmp);
for (const auto &[IV, IndDesc] : IL) {
- // Get next iteration value of the induction variable
+ // Get next iteration value of the induction variable.
Instruction *IVInst =
cast<Instruction>(IV->getIncomingValueForBlock(L->getLoopLatch()));
- bool IsSimplifiedAway = true;
- // Check that this value used only to exit the loop
- for (auto *UIV : IVInst->users()) {
- if (UIV != IV && UIV != Cmp) {
- IsSimplifiedAway = false;
- break;
- }
- }
- if (IsSimplifiedAway)
+ if (all_of(IVInst->users(),
+ [&](const User *U) { return U == IV || U == Cmp; }))
InstsToIgnore.insert(IVInst);
}
}
@@ -5619,12 +5611,13 @@ InstructionCost LoopVectorizationCostModel::computePredInstDiscount(
InstructionCost LoopVectorizationCostModel::expectedCost(ElementCount VF) {
InstructionCost Cost;
- // If with the given fixed width VF loop gets fully unrolled, ignore the costs
- // of comparison and induction instructions, as they'll get simplified away
+ // If the vector loop gets executed exactly once with the given VF, ignore the
+ // costs of comparison and induction instructions, as they'll get simplified
+ // away.
SmallPtrSet<Instruction *, 2> ValuesToIgnoreForVF;
auto TC = PSE.getSE()->getSmallConstantTripCount(TheLoop);
if (VF.isFixed() && TC == VF.getFixedValue())
- AddFullyUnrolledInstructionsToIgnore(TheLoop, Legal->getInductionVars(),
+ addFullyUnrolledInstructionsToIgnore(TheLoop, Legal->getInductionVars(),
ValuesToIgnoreForVF);
// For each block.
@@ -7317,11 +7310,14 @@ LoopVectorizationPlanner::precomputeCosts(VPlan &Plan, ElementCount VF,
IVInsts.push_back(CI);
}
- // If with the given VF loop gets fully unrolled, ignore the costs of
- // comparison and induction instructions, as they'll get simplified away
+ // If the vector loop gets executed exactly once with the given VF, ignore
+ // the costs of comparison and induction instructions, as they'll get
+ // simplified away.
+ // TODO: Remove this code after stepping away from the legacy cost model and
+ // adding code to simplify VPlans before calculating their costs.
auto TC = PSE.getSE()->getSmallConstantTripCount(OrigLoop);
if (VF.isFixed() && TC == VF.getFixedValue())
- AddFullyUnrolledInstructionsToIgnore(OrigLoop, Legal->getInductionVars(),
+ addFullyUnrolledInstructionsToIgnore(OrigLoop, Legal->getInductionVars(),
CostCtx.SkipCostComputation);
for (Instruction *IVInst : IVInsts) {
>From dac9e7ee51236ffa19e861871ad01e46bc9b6137 Mon Sep 17 00:00:00 2001
From: Igor Kirillov <igor.kirillov at arm.com>
Date: Tue, 19 Nov 2024 12:19:27 +0000
Subject: [PATCH 5/9] Reduce cost only when not tail-folding
---
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 3c1baf3d2e32f8..9d195fcb3ff78c 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -5616,7 +5616,7 @@ InstructionCost LoopVectorizationCostModel::expectedCost(ElementCount VF) {
// away.
SmallPtrSet<Instruction *, 2> ValuesToIgnoreForVF;
auto TC = PSE.getSE()->getSmallConstantTripCount(TheLoop);
- if (VF.isFixed() && TC == VF.getFixedValue())
+ if (VF.isFixed() && TC == VF.getFixedValue() && !foldTailByMasking())
addFullyUnrolledInstructionsToIgnore(TheLoop, Legal->getInductionVars(),
ValuesToIgnoreForVF);
@@ -7316,7 +7316,7 @@ LoopVectorizationPlanner::precomputeCosts(VPlan &Plan, ElementCount VF,
// TODO: Remove this code after stepping away from the legacy cost model and
// adding code to simplify VPlans before calculating their costs.
auto TC = PSE.getSE()->getSmallConstantTripCount(OrigLoop);
- if (VF.isFixed() && TC == VF.getFixedValue())
+ if (VF.isFixed() && TC == VF.getFixedValue() && !CM.foldTailByMasking())
addFullyUnrolledInstructionsToIgnore(OrigLoop, Legal->getInductionVars(),
CostCtx.SkipCostComputation);
>From 4f6f7023103469e347a43d9893c19cbaa6bca125 Mon Sep 17 00:00:00 2001
From: Igor Kirillov <igor.kirillov at arm.com>
Date: Mon, 2 Dec 2024 15:17:30 +0000
Subject: [PATCH 6/9] tmp
---
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 9d195fcb3ff78c..ee7a190f9be000 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -5616,9 +5616,11 @@ InstructionCost LoopVectorizationCostModel::expectedCost(ElementCount VF) {
// away.
SmallPtrSet<Instruction *, 2> ValuesToIgnoreForVF;
auto TC = PSE.getSE()->getSmallConstantTripCount(TheLoop);
- if (VF.isFixed() && TC == VF.getFixedValue() && !foldTailByMasking())
+ if (VF.isFixed() && TC == VF.getFixedValue()) {
+ assert(!foldTailByMasking());
addFullyUnrolledInstructionsToIgnore(TheLoop, Legal->getInductionVars(),
ValuesToIgnoreForVF);
+ }
// For each block.
for (BasicBlock *BB : TheLoop->blocks()) {
>From e9763fab28fcfa1997d456500da1f1e47fa087b6 Mon Sep 17 00:00:00 2001
From: Igor Kirillov <igor.kirillov at arm.com>
Date: Tue, 3 Dec 2024 16:59:11 +0000
Subject: [PATCH 7/9] Rebase and update after pre-commiting tests
---
.../LoopVectorize/AArch64/fully-unrolled-cost.ll | 14 ++++----------
1 file changed, 4 insertions(+), 10 deletions(-)
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/fully-unrolled-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/fully-unrolled-cost.ll
index aea5467c0edb12..199995f1988554 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/fully-unrolled-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/fully-unrolled-cost.ll
@@ -12,12 +12,10 @@ define i64 @test(ptr %a, ptr %b) #0 {
; CHECK-NEXT: Cost of 1 for VF 8: exit condition instruction %exitcond.not = icmp eq i64 %i.iv.next, 16
; CHECK-NEXT: Cost of 0 for VF 8: EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
; CHECK: Cost for VF 8: 26
-; CHECK-NEXT: Cost of 1 for VF 16: induction instruction %i.iv.next = add nuw nsw i64 %i.iv, 1
; CHECK-NEXT: Cost of 0 for VF 16: induction instruction %i.iv = phi i64 [ 0, %entry ], [ %i.iv.next, %for.body ]
-; CHECK-NEXT: Cost of 1 for VF 16: exit condition instruction %exitcond.not = icmp eq i64 %i.iv.next, 16
; CHECK-NEXT: Cost of 0 for VF 16: EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
-; CHECK: Cost for VF 16: 50
-; CHECK: LV: Selecting VF: vscale x 2
+; CHECK: Cost for VF 16: 48
+; CHECK: LV: Selecting VF: 16
entry:
br label %for.body
@@ -50,9 +48,8 @@ define i64 @test_external_iv_user(ptr %a, ptr %b) #0 {
; CHECK: Cost for VF 8: 26
; CHECK-NEXT: Cost of 1 for VF 16: induction instruction %i.iv.next = add nuw nsw i64 %i.iv, 1
; CHECK-NEXT: Cost of 0 for VF 16: induction instruction %i.iv = phi i64 [ 0, %entry ], [ %i.iv.next, %for.body ]
-; CHECK-NEXT: Cost of 1 for VF 16: exit condition instruction %exitcond.not = icmp eq i64 %i.iv.next, 16
; CHECK-NEXT: Cost of 0 for VF 16: EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
-; CHECK: Cost for VF 16: 50
+; CHECK: Cost for VF 16: 49
; CHECK: LV: Selecting VF: vscale x 2
entry:
br label %for.body
@@ -86,13 +83,10 @@ define i64 @test_two_ivs(ptr %a, ptr %b, i64 %start) #0 {
; CHECK-NEXT: Cost of 1 for VF 8: exit condition instruction %exitcond.not = icmp eq i64 %i.iv.next, 16
; CHECK-NEXT: Cost of 0 for VF 8: EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
; CHECK: Cost for VF 8: 27
-; CHECK-NEXT: Cost of 1 for VF 16: induction instruction %i.iv.next = add nuw nsw i64 %i.iv, 1
; CHECK-NEXT: Cost of 0 for VF 16: induction instruction %i.iv = phi i64 [ 0, %entry ], [ %i.iv.next, %for.body ]
-; CHECK-NEXT: Cost of 1 for VF 16: induction instruction %j.iv.next = add nuw nsw i64 %j.iv, 1
; CHECK-NEXT: Cost of 0 for VF 16: induction instruction %j.iv = phi i64 [ %start, %entry ], [ %j.iv.next, %for.body ]
-; CHECK-NEXT: Cost of 1 for VF 16: exit condition instruction %exitcond.not = icmp eq i64 %i.iv.next, 16
; CHECK-NEXT: Cost of 0 for VF 16: EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
-; CHECK: Cost for VF 16: 51
+; CHECK: Cost for VF 16: 48
; CHECK: LV: Selecting VF: 16
entry:
br label %for.body
>From d2f9ad9e9fdeb0cc08c882f342c2cab93c90ef89 Mon Sep 17 00:00:00 2001
From: Igor Kirillov <igor.kirillov at arm.com>
Date: Thu, 5 Dec 2024 10:55:08 +0000
Subject: [PATCH 8/9] Remove randomly introduced change
---
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index ee7a190f9be000..9d195fcb3ff78c 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -5616,11 +5616,9 @@ InstructionCost LoopVectorizationCostModel::expectedCost(ElementCount VF) {
// away.
SmallPtrSet<Instruction *, 2> ValuesToIgnoreForVF;
auto TC = PSE.getSE()->getSmallConstantTripCount(TheLoop);
- if (VF.isFixed() && TC == VF.getFixedValue()) {
- assert(!foldTailByMasking());
+ if (VF.isFixed() && TC == VF.getFixedValue() && !foldTailByMasking())
addFullyUnrolledInstructionsToIgnore(TheLoop, Legal->getInductionVars(),
ValuesToIgnoreForVF);
- }
// For each block.
for (BasicBlock *BB : TheLoop->blocks()) {
>From 7417e083e6829439746c8436a0e0f51589315627 Mon Sep 17 00:00:00 2001
From: Igor Kirillov <igor.kirillov at arm.com>
Date: Mon, 9 Dec 2024 16:02:17 +0000
Subject: [PATCH 9/9] Added test with extra user of cmp instruction
---
.../Transforms/LoopVectorize/AArch64/fully-unrolled-cost.ll | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/fully-unrolled-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/fully-unrolled-cost.ll
index 199995f1988554..f5ffc731eac842 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/fully-unrolled-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/fully-unrolled-cost.ll
@@ -119,11 +119,9 @@ define i1 @test_extra_cmp_user(ptr nocapture noundef %dst, ptr nocapture noundef
; CHECK-NEXT: Cost of 4 for VF 8: exit condition instruction %exitcond.not = icmp eq i64 %indvars.iv.next, 16
; CHECK-NEXT: Cost of 0 for VF 8: EMIT vp<%3> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
; CHECK: Cost for VF 8: 12
-; CHECK-NEXT: Cost of 8 for VF 16: induction instruction %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
; CHECK-NEXT: Cost of 0 for VF 16: induction instruction %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-; CHECK-NEXT: Cost of 8 for VF 16: exit condition instruction %exitcond.not = icmp eq i64 %indvars.iv.next, 16
; CHECK-NEXT: Cost of 0 for VF 16: EMIT vp<%3> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
-; CHECK: Cost for VF 16: 20
+; CHECK: Cost for VF 16: 4
; CHECK: LV: Selecting VF: 16
entry:
br label %for.body
More information about the llvm-commits
mailing list