[llvm] [LV] Ignore some costs when loop gets fully unrolled (PR #106699)
Igor Kirillov via llvm-commits
llvm-commits at lists.llvm.org
Thu Dec 5 03:29:43 PST 2024
https://github.com/igogo-x86 updated https://github.com/llvm/llvm-project/pull/106699
>From def2ca767206201a49feac404aa7d063e43adf90 Mon Sep 17 00:00:00 2001
From: Igor Kirillov <igor.kirillov at arm.com>
Date: Fri, 30 Aug 2024 09:22:21 +0000
Subject: [PATCH 1/8] [LV] Ignore some costs when loop gets fully unrolled
When VF has a fixed width and equals the number of iterations, and we are not
tail folding by masking, comparison instruction and induction operation will
be DCEed later.
Ignoring the costs of these instructions improves the cost model.
---
.../Transforms/Vectorize/LoopVectorize.cpp | 20 +++++++++++++++++++
1 file changed, 20 insertions(+)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 3c7c044a042719..29a20a67fd783f 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7248,6 +7248,26 @@ LoopVectorizationPlanner::precomputeCosts(VPlan &Plan, ElementCount VF,
continue;
IVInsts.push_back(CI);
}
+
+ // If the given VF loop gets fully unrolled, ignore the costs of comparison
+ // and increment instruction, as they'll get simplified away
+ auto TC = CM.PSE.getSE()->getSmallConstantTripCount(OrigLoop);
+ auto *Cmp = OrigLoop->getLatchCmpInst();
+ if (Cmp && VF.isFixed() && VF.getFixedValue() == TC) {
+ CostCtx.SkipCostComputation.insert(Cmp);
+ for (Instruction *IVInst : IVInsts) {
+ bool IsSimplifiedAway = true;
+ for (auto *UIV : IVInst->users()) {
+ if (!Legal->isInductionVariable(UIV) && UIV != Cmp) {
+ IsSimplifiedAway = false;
+ break;
+ }
+ }
+ if (IsSimplifiedAway)
+ CostCtx.SkipCostComputation.insert(IVInst);
+ }
+ }
+
for (Instruction *IVInst : IVInsts) {
if (CostCtx.skipCostComputation(IVInst, VF.isVector()))
continue;
>From 4eb13cc8ae798b632aa490ac546b7e5957f15d33 Mon Sep 17 00:00:00 2001
From: Igor Kirillov <igor.kirillov at arm.com>
Date: Wed, 2 Oct 2024 09:05:33 +0000
Subject: [PATCH 2/8] Update to pass assertion comparing two cost models
---
.../Transforms/Vectorize/LoopVectorize.cpp | 36 +++++++++++--------
1 file changed, 22 insertions(+), 14 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 29a20a67fd783f..af9c6aa3fa033a 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -5559,6 +5559,20 @@ InstructionCost LoopVectorizationCostModel::computePredInstDiscount(
InstructionCost LoopVectorizationCostModel::expectedCost(ElementCount VF) {
InstructionCost Cost;
+ // If with the given VF loop gets fully unrolled, ignore the costs of
+ // comparison and induction instructions, as they'll get simplified away
+ SmallPtrSet<const Value *, 16> ValuesToIgnoreForVF;
+ auto TC = PSE.getSE()->getSmallConstantTripCount(TheLoop);
+ auto *Cmp = TheLoop->getLatchCmpInst();
+ if (Cmp && TC == VF.getKnownMinValue()) {
+ ValuesToIgnoreForVF.insert(Cmp);
+ for (const auto &[IV, IndDesc] : Legal->getInductionVars()) {
+ Instruction *IVInc = cast<Instruction>(
+ IV->getIncomingValueForBlock(TheLoop->getLoopLatch()));
+ ValuesToIgnoreForVF.insert(IVInc);
+ }
+ }
+
// For each block.
for (BasicBlock *BB : TheLoop->blocks()) {
InstructionCost BlockCost;
@@ -5566,7 +5580,7 @@ InstructionCost LoopVectorizationCostModel::expectedCost(ElementCount VF) {
// For each instruction in the old loop.
for (Instruction &I : BB->instructionsWithoutDebug()) {
// Skip ignored values.
- if (ValuesToIgnore.count(&I) ||
+ if (ValuesToIgnore.count(&I) || ValuesToIgnoreForVF.count(&I) ||
(VF.isVector() && VecValuesToIgnore.count(&I)))
continue;
@@ -7249,22 +7263,16 @@ LoopVectorizationPlanner::precomputeCosts(VPlan &Plan, ElementCount VF,
IVInsts.push_back(CI);
}
- // If the given VF loop gets fully unrolled, ignore the costs of comparison
- // and increment instruction, as they'll get simplified away
+ // If with the given VF loop gets fully unrolled, ignore the costs of
+ // comparison and induction instructions, as they'll get simplified away
auto TC = CM.PSE.getSE()->getSmallConstantTripCount(OrigLoop);
auto *Cmp = OrigLoop->getLatchCmpInst();
- if (Cmp && VF.isFixed() && VF.getFixedValue() == TC) {
+ if (Cmp && TC == VF.getKnownMinValue()) {
CostCtx.SkipCostComputation.insert(Cmp);
- for (Instruction *IVInst : IVInsts) {
- bool IsSimplifiedAway = true;
- for (auto *UIV : IVInst->users()) {
- if (!Legal->isInductionVariable(UIV) && UIV != Cmp) {
- IsSimplifiedAway = false;
- break;
- }
- }
- if (IsSimplifiedAway)
- CostCtx.SkipCostComputation.insert(IVInst);
+ for (const auto &[IV, IndDesc] : Legal->getInductionVars()) {
+ Instruction *IVInc = cast<Instruction>(
+ IV->getIncomingValueForBlock(OrigLoop->getLoopLatch()));
+ CostCtx.SkipCostComputation.insert(IVInc);
}
}
>From f3149247a8e46c13b5593462be2a027bc46569ed Mon Sep 17 00:00:00 2001
From: Igor Kirillov <igor.kirillov at arm.com>
Date: Tue, 12 Nov 2024 13:51:21 +0000
Subject: [PATCH 3/8] Check that induction variable has no unsimplifiable users
Add AArch64 test
---
.../Transforms/Vectorize/LoopVectorize.cpp | 59 ++++++++++++-------
1 file changed, 37 insertions(+), 22 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index af9c6aa3fa033a..0d715e0ee07cb9 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -2652,6 +2652,33 @@ static Value *getExpandedStep(const InductionDescriptor &ID,
return I->second;
}
+/// Knowing that loop \p L would be fully unrolled after vectorisation, add
+/// instructions that will get simplified and thus should not have any cost to
+/// \p InstsToIgnore
+static void AddFullyUnrolledInstructionsToIgnore(
+ Loop *L, const LoopVectorizationLegality::InductionList &IL,
+ SmallPtrSetImpl<Instruction *> &InstsToIgnore) {
+ auto *Cmp = L->getLatchCmpInst();
+ if (!Cmp)
+ return;
+ InstsToIgnore.insert(Cmp);
+ for (const auto &[IV, IndDesc] : IL) {
+ // Get next iteration value of the induction variable
+ Instruction *IVInst =
+ cast<Instruction>(IV->getIncomingValueForBlock(L->getLoopLatch()));
+ bool IsSimplifiedAway = true;
+ // Check that this value used only to exit the loop
+ for (auto *UIV : IVInst->users()) {
+ if (UIV != IV && UIV != Cmp) {
+ IsSimplifiedAway = false;
+ break;
+ }
+ }
+ if (IsSimplifiedAway)
+ InstsToIgnore.insert(IVInst);
+ }
+}
+
void InnerLoopVectorizer::createInductionResumeValues(
const SCEV2ValueTy &ExpandedSCEVs,
std::pair<BasicBlock *, Value *> AdditionalBypass) {
@@ -5559,19 +5586,13 @@ InstructionCost LoopVectorizationCostModel::computePredInstDiscount(
InstructionCost LoopVectorizationCostModel::expectedCost(ElementCount VF) {
InstructionCost Cost;
- // If with the given VF loop gets fully unrolled, ignore the costs of
- // comparison and induction instructions, as they'll get simplified away
- SmallPtrSet<const Value *, 16> ValuesToIgnoreForVF;
+ // If with the given fixed width VF loop gets fully unrolled, ignore the costs
+ // of comparison and induction instructions, as they'll get simplified away
+ SmallPtrSet<Instruction *, 2> ValuesToIgnoreForVF;
auto TC = PSE.getSE()->getSmallConstantTripCount(TheLoop);
- auto *Cmp = TheLoop->getLatchCmpInst();
- if (Cmp && TC == VF.getKnownMinValue()) {
- ValuesToIgnoreForVF.insert(Cmp);
- for (const auto &[IV, IndDesc] : Legal->getInductionVars()) {
- Instruction *IVInc = cast<Instruction>(
- IV->getIncomingValueForBlock(TheLoop->getLoopLatch()));
- ValuesToIgnoreForVF.insert(IVInc);
- }
- }
+ if (VF.isFixed() && TC == VF.getFixedValue())
+ AddFullyUnrolledInstructionsToIgnore(TheLoop, Legal->getInductionVars(),
+ ValuesToIgnoreForVF);
// For each block.
for (BasicBlock *BB : TheLoop->blocks()) {
@@ -7265,16 +7286,10 @@ LoopVectorizationPlanner::precomputeCosts(VPlan &Plan, ElementCount VF,
// If with the given VF loop gets fully unrolled, ignore the costs of
// comparison and induction instructions, as they'll get simplified away
- auto TC = CM.PSE.getSE()->getSmallConstantTripCount(OrigLoop);
- auto *Cmp = OrigLoop->getLatchCmpInst();
- if (Cmp && TC == VF.getKnownMinValue()) {
- CostCtx.SkipCostComputation.insert(Cmp);
- for (const auto &[IV, IndDesc] : Legal->getInductionVars()) {
- Instruction *IVInc = cast<Instruction>(
- IV->getIncomingValueForBlock(OrigLoop->getLoopLatch()));
- CostCtx.SkipCostComputation.insert(IVInc);
- }
- }
+ auto TC = PSE.getSE()->getSmallConstantTripCount(OrigLoop);
+ if (VF.isFixed() && TC == VF.getFixedValue())
+ AddFullyUnrolledInstructionsToIgnore(OrigLoop, Legal->getInductionVars(),
+ CostCtx.SkipCostComputation);
for (Instruction *IVInst : IVInsts) {
if (CostCtx.skipCostComputation(IVInst, VF.isVector()))
>From fa1246a1449d449fac6fcfe3c700555cde46f236 Mon Sep 17 00:00:00 2001
From: Igor Kirillov <igor.kirillov at arm.com>
Date: Fri, 15 Nov 2024 14:35:22 +0000
Subject: [PATCH 4/8] Addressing suggestions
* Fixing comments
* Adding more tests
* Remove cmp latch presence requirements
---
.../Transforms/Vectorize/LoopVectorize.cpp | 42 +++++++++----------
1 file changed, 19 insertions(+), 23 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 0d715e0ee07cb9..823fd603554d65 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -2652,29 +2652,21 @@ static Value *getExpandedStep(const InductionDescriptor &ID,
return I->second;
}
-/// Knowing that loop \p L would be fully unrolled after vectorisation, add
-/// instructions that will get simplified and thus should not have any cost to
-/// \p InstsToIgnore
-static void AddFullyUnrolledInstructionsToIgnore(
+/// Knowing that loop \p L executes a single vector iteration, add instructions
+/// that will get simplified and thus should not have any cost to \p
+/// InstsToIgnore.
+static void addFullyUnrolledInstructionsToIgnore(
Loop *L, const LoopVectorizationLegality::InductionList &IL,
SmallPtrSetImpl<Instruction *> &InstsToIgnore) {
auto *Cmp = L->getLatchCmpInst();
- if (!Cmp)
- return;
- InstsToIgnore.insert(Cmp);
+ if (Cmp)
+ InstsToIgnore.insert(Cmp);
for (const auto &[IV, IndDesc] : IL) {
- // Get next iteration value of the induction variable
+ // Get next iteration value of the induction variable.
Instruction *IVInst =
cast<Instruction>(IV->getIncomingValueForBlock(L->getLoopLatch()));
- bool IsSimplifiedAway = true;
- // Check that this value used only to exit the loop
- for (auto *UIV : IVInst->users()) {
- if (UIV != IV && UIV != Cmp) {
- IsSimplifiedAway = false;
- break;
- }
- }
- if (IsSimplifiedAway)
+ if (all_of(IVInst->users(),
+ [&](const User *U) { return U == IV || U == Cmp; }))
InstsToIgnore.insert(IVInst);
}
}
@@ -5586,12 +5578,13 @@ InstructionCost LoopVectorizationCostModel::computePredInstDiscount(
InstructionCost LoopVectorizationCostModel::expectedCost(ElementCount VF) {
InstructionCost Cost;
- // If with the given fixed width VF loop gets fully unrolled, ignore the costs
- // of comparison and induction instructions, as they'll get simplified away
+ // If the vector loop gets executed exactly once with the given VF, ignore the
+ // costs of comparison and induction instructions, as they'll get simplified
+ // away.
SmallPtrSet<Instruction *, 2> ValuesToIgnoreForVF;
auto TC = PSE.getSE()->getSmallConstantTripCount(TheLoop);
if (VF.isFixed() && TC == VF.getFixedValue())
- AddFullyUnrolledInstructionsToIgnore(TheLoop, Legal->getInductionVars(),
+ addFullyUnrolledInstructionsToIgnore(TheLoop, Legal->getInductionVars(),
ValuesToIgnoreForVF);
// For each block.
@@ -7284,11 +7277,14 @@ LoopVectorizationPlanner::precomputeCosts(VPlan &Plan, ElementCount VF,
IVInsts.push_back(CI);
}
- // If with the given VF loop gets fully unrolled, ignore the costs of
- // comparison and induction instructions, as they'll get simplified away
+ // If the vector loop gets executed exactly once with the given VF, ignore
+ // the costs of comparison and induction instructions, as they'll get
+ // simplified away.
+ // TODO: Remove this code after stepping away from the legacy cost model and
+ // adding code to simplify VPlans before calculating their costs.
auto TC = PSE.getSE()->getSmallConstantTripCount(OrigLoop);
if (VF.isFixed() && TC == VF.getFixedValue())
- AddFullyUnrolledInstructionsToIgnore(OrigLoop, Legal->getInductionVars(),
+ addFullyUnrolledInstructionsToIgnore(OrigLoop, Legal->getInductionVars(),
CostCtx.SkipCostComputation);
for (Instruction *IVInst : IVInsts) {
>From 8024cd97c96aac899ad4451e5ceec9b5ec8f2df0 Mon Sep 17 00:00:00 2001
From: Igor Kirillov <igor.kirillov at arm.com>
Date: Tue, 19 Nov 2024 12:19:27 +0000
Subject: [PATCH 5/8] Reduce cost only when not tail-folding
---
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 823fd603554d65..6cc177380ee9e2 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -5583,7 +5583,7 @@ InstructionCost LoopVectorizationCostModel::expectedCost(ElementCount VF) {
// away.
SmallPtrSet<Instruction *, 2> ValuesToIgnoreForVF;
auto TC = PSE.getSE()->getSmallConstantTripCount(TheLoop);
- if (VF.isFixed() && TC == VF.getFixedValue())
+ if (VF.isFixed() && TC == VF.getFixedValue() && !foldTailByMasking())
addFullyUnrolledInstructionsToIgnore(TheLoop, Legal->getInductionVars(),
ValuesToIgnoreForVF);
@@ -7283,7 +7283,7 @@ LoopVectorizationPlanner::precomputeCosts(VPlan &Plan, ElementCount VF,
// TODO: Remove this code after stepping away from the legacy cost model and
// adding code to simplify VPlans before calculating their costs.
auto TC = PSE.getSE()->getSmallConstantTripCount(OrigLoop);
- if (VF.isFixed() && TC == VF.getFixedValue())
+ if (VF.isFixed() && TC == VF.getFixedValue() && !CM.foldTailByMasking())
addFullyUnrolledInstructionsToIgnore(OrigLoop, Legal->getInductionVars(),
CostCtx.SkipCostComputation);
>From b8c866f0e7796552ff57c1f1bd6c6c69a2fc9d3d Mon Sep 17 00:00:00 2001
From: Igor Kirillov <igor.kirillov at arm.com>
Date: Mon, 2 Dec 2024 15:17:30 +0000
Subject: [PATCH 6/8] tmp
---
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 6cc177380ee9e2..815f0c666ee20e 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -5583,9 +5583,11 @@ InstructionCost LoopVectorizationCostModel::expectedCost(ElementCount VF) {
// away.
SmallPtrSet<Instruction *, 2> ValuesToIgnoreForVF;
auto TC = PSE.getSE()->getSmallConstantTripCount(TheLoop);
- if (VF.isFixed() && TC == VF.getFixedValue() && !foldTailByMasking())
+ if (VF.isFixed() && TC == VF.getFixedValue()) {
+ assert(!foldTailByMasking());
addFullyUnrolledInstructionsToIgnore(TheLoop, Legal->getInductionVars(),
ValuesToIgnoreForVF);
+ }
// For each block.
for (BasicBlock *BB : TheLoop->blocks()) {
>From 940f2280ddda57209cf52a3c67e232483734c1b6 Mon Sep 17 00:00:00 2001
From: Igor Kirillov <igor.kirillov at arm.com>
Date: Tue, 3 Dec 2024 16:59:11 +0000
Subject: [PATCH 7/8] Rebase and update after pre-commiting tests
---
.../LoopVectorize/AArch64/fully-unrolled-cost.ll | 14 ++++----------
1 file changed, 4 insertions(+), 10 deletions(-)
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/fully-unrolled-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/fully-unrolled-cost.ll
index ab29bf8d2d52a3..c1ede2410238ca 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/fully-unrolled-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/fully-unrolled-cost.ll
@@ -12,12 +12,10 @@ define i64 @test(ptr %a, ptr %b) #0 {
; CHECK-NEXT: Cost of 1 for VF 8: exit condition instruction %exitcond.not = icmp eq i64 %i.iv.next, 16
; CHECK-NEXT: Cost of 0 for VF 8: EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
; CHECK: Cost for VF 8: 26
-; CHECK-NEXT: Cost of 1 for VF 16: induction instruction %i.iv.next = add nuw nsw i64 %i.iv, 1
; CHECK-NEXT: Cost of 0 for VF 16: induction instruction %i.iv = phi i64 [ 0, %entry ], [ %i.iv.next, %for.body ]
-; CHECK-NEXT: Cost of 1 for VF 16: exit condition instruction %exitcond.not = icmp eq i64 %i.iv.next, 16
; CHECK-NEXT: Cost of 0 for VF 16: EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
-; CHECK: Cost for VF 16: 50
-; CHECK: LV: Selecting VF: vscale x 2
+; CHECK: Cost for VF 16: 48
+; CHECK: LV: Selecting VF: 16
entry:
br label %for.body
@@ -50,9 +48,8 @@ define i64 @test_external_iv_user(ptr %a, ptr %b) #0 {
; CHECK: Cost for VF 8: 26
; CHECK-NEXT: Cost of 1 for VF 16: induction instruction %i.iv.next = add nuw nsw i64 %i.iv, 1
; CHECK-NEXT: Cost of 0 for VF 16: induction instruction %i.iv = phi i64 [ 0, %entry ], [ %i.iv.next, %for.body ]
-; CHECK-NEXT: Cost of 1 for VF 16: exit condition instruction %exitcond.not = icmp eq i64 %i.iv.next, 16
; CHECK-NEXT: Cost of 0 for VF 16: EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
-; CHECK: Cost for VF 16: 50
+; CHECK: Cost for VF 16: 49
; CHECK: LV: Selecting VF: vscale x 2
entry:
br label %for.body
@@ -86,13 +83,10 @@ define i64 @test_two_ivs(ptr %a, ptr %b, i64 %start) #0 {
; CHECK-NEXT: Cost of 1 for VF 8: exit condition instruction %exitcond.not = icmp eq i64 %i.iv.next, 16
; CHECK-NEXT: Cost of 0 for VF 8: EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
; CHECK: Cost for VF 8: 27
-; CHECK-NEXT: Cost of 1 for VF 16: induction instruction %i.iv.next = add nuw nsw i64 %i.iv, 1
; CHECK-NEXT: Cost of 0 for VF 16: induction instruction %i.iv = phi i64 [ 0, %entry ], [ %i.iv.next, %for.body ]
-; CHECK-NEXT: Cost of 1 for VF 16: induction instruction %j.iv.next = add nuw nsw i64 %j.iv, 1
; CHECK-NEXT: Cost of 0 for VF 16: induction instruction %j.iv = phi i64 [ %start, %entry ], [ %j.iv.next, %for.body ]
-; CHECK-NEXT: Cost of 1 for VF 16: exit condition instruction %exitcond.not = icmp eq i64 %i.iv.next, 16
; CHECK-NEXT: Cost of 0 for VF 16: EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
-; CHECK: Cost for VF 16: 51
+; CHECK: Cost for VF 16: 48
; CHECK: LV: Selecting VF: 16
entry:
br label %for.body
>From 86b9f1eead97358e50b6c3d9ff38075329fe15f4 Mon Sep 17 00:00:00 2001
From: Igor Kirillov <igor.kirillov at arm.com>
Date: Thu, 5 Dec 2024 10:55:08 +0000
Subject: [PATCH 8/8] Remove randomly introduced change
---
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 815f0c666ee20e..6cc177380ee9e2 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -5583,11 +5583,9 @@ InstructionCost LoopVectorizationCostModel::expectedCost(ElementCount VF) {
// away.
SmallPtrSet<Instruction *, 2> ValuesToIgnoreForVF;
auto TC = PSE.getSE()->getSmallConstantTripCount(TheLoop);
- if (VF.isFixed() && TC == VF.getFixedValue()) {
- assert(!foldTailByMasking());
+ if (VF.isFixed() && TC == VF.getFixedValue() && !foldTailByMasking())
addFullyUnrolledInstructionsToIgnore(TheLoop, Legal->getInductionVars(),
ValuesToIgnoreForVF);
- }
// For each block.
for (BasicBlock *BB : TheLoop->blocks()) {
More information about the llvm-commits
mailing list