[llvm] [VPlan] Only use selectVectorizationFactor for cross-check (NFCI). (PR #103033)
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 19 05:30:25 PDT 2024
https://github.com/fhahn updated https://github.com/llvm/llvm-project/pull/103033
>From 8608a2edba7996bb101ce140c56f3bd205a3727e Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Wed, 7 Aug 2024 19:49:11 +0100
Subject: [PATCH 1/3] [VPlan] Only use selectVectorizationFactor for
cross-check (NFCI).
Use getBestVF to select VF up-front and only use
selectVectorizationFactor to get the VF legacy VF to check the
vectorization decision matches the VPlan-based cost model.
---
.../Vectorize/LoopVectorizationPlanner.h | 9 ++-
.../Transforms/Vectorize/LoopVectorize.cpp | 81 ++++++++-----------
.../RISCV/riscv-vector-reverse.ll | 2 -
3 files changed, 39 insertions(+), 53 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
index f6271300539320..edc0ace31ac048 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -354,9 +354,10 @@ class LoopVectorizationPlanner {
: OrigLoop(L), LI(LI), DT(DT), TLI(TLI), TTI(TTI), Legal(Legal), CM(CM),
IAI(IAI), PSE(PSE), Hints(Hints), ORE(ORE) {}
- /// Plan how to best vectorize, return the best VF and its cost, or
- /// std::nullopt if vectorization and interleaving should be avoided up front.
- std::optional<VectorizationFactor> plan(ElementCount UserVF, unsigned UserIC);
+ /// Build VPlans for the specified \p UserVF and \p UserIC if they are
+ /// non-zero or all applicable candidate VFs otherwise. If vectorization and
+ /// interleaving should be avoided up-front, no plans are generated.
+ void plan(ElementCount UserVF, unsigned UserIC);
/// Use the VPlan-native path to plan how to best vectorize, return the best
/// VF and its cost.
@@ -367,7 +368,7 @@ class LoopVectorizationPlanner {
/// Return the most profitable vectorization factor. Also collect all
/// profitable VFs in ProfitableVFs.
- ElementCount getBestVF();
+ VectorizationFactor getBestVF();
/// Generate the IR code for the vectorized loop captured in VPlan \p BestPlan
/// according to the best selected \p VF and \p UF.
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 0d1262fa187298..020faa69015dd5 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -6953,15 +6953,14 @@ LoopVectorizationPlanner::planInVPlanNativePath(ElementCount UserVF) {
return VectorizationFactor::Disabled();
}
-std::optional<VectorizationFactor>
-LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) {
+void LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) {
assert(OrigLoop->isInnermost() && "Inner loop expected.");
CM.collectValuesToIgnore();
CM.collectElementTypesForWidening();
FixedScalableVFPair MaxFactors = CM.computeMaxVF(UserVF, UserIC);
if (!MaxFactors) // Cases that should not to be vectorized nor interleaved.
- return std::nullopt;
+ return;
// Invalidate interleave groups if all blocks of loop will be predicated.
if (CM.blockNeedsPredicationForAnyReason(OrigLoop->getHeader()) &&
@@ -6995,11 +6994,11 @@ LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) {
if (!hasPlanWithVF(UserVF)) {
LLVM_DEBUG(dbgs() << "LV: No VPlan could be built for " << UserVF
<< ".\n");
- return std::nullopt;
+ return;
}
LLVM_DEBUG(printPlans(dbgs()));
- return {{UserVF, 0, 0}};
+ return;
} else
reportVectorizationInfo("UserVF ignored because of invalid costs.",
"InvalidCost", ORE, OrigLoop);
@@ -7029,24 +7028,6 @@ LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) {
buildVPlansWithVPRecipes(ElementCount::getScalable(1), MaxFactors.ScalableVF);
LLVM_DEBUG(printPlans(dbgs()));
- if (VPlans.empty())
- return std::nullopt;
- if (all_of(VPlans,
- [](std::unique_ptr<VPlan> &P) { return P->hasScalarVFOnly(); }))
- return VectorizationFactor::Disabled();
-
- // Select the optimal vectorization factor according to the legacy cost-model.
- // This is now only used to verify the decisions by the new VPlan-based
- // cost-model and will be retired once the VPlan-based cost-model is
- // stabilized.
- VectorizationFactor VF = selectVectorizationFactor();
- assert((VF.Width.isScalar() || VF.ScalarCost > 0) && "when vectorizing, the scalar cost must be non-zero.");
- if (!hasPlanWithVF(VF.Width)) {
- LLVM_DEBUG(dbgs() << "LV: No VPlan could be built for " << VF.Width
- << ".\n");
- return std::nullopt;
- }
- return VF;
}
InstructionCost VPCostContext::getLegacyCost(Instruction *UI,
@@ -7217,11 +7198,13 @@ InstructionCost LoopVectorizationPlanner::cost(VPlan &Plan,
return Cost;
}
-ElementCount LoopVectorizationPlanner::getBestVF() {
+VectorizationFactor LoopVectorizationPlanner::getBestVF() {
+ if (VPlans.empty())
+ return VectorizationFactor::Disabled();
// If there is a single VPlan with a single VF, return it directly.
VPlan &FirstPlan = *VPlans[0];
if (VPlans.size() == 1 && size(FirstPlan.vectorFactors()) == 1)
- return *FirstPlan.vectorFactors().begin();
+ return {*FirstPlan.vectorFactors().begin(), 0, 0};
ElementCount ScalarVF = ElementCount::getFixed(1);
assert(hasPlanWithVF(ScalarVF) &&
@@ -7229,6 +7212,7 @@ ElementCount LoopVectorizationPlanner::getBestVF() {
// TODO: Compute scalar cost using VPlan-based cost model.
InstructionCost ScalarCost = CM.expectedCost(ScalarVF);
+ LLVM_DEBUG(dbgs() << "LV: Scalar loop costs: " << ScalarCost << ".\n");
VectorizationFactor ScalarFactor(ScalarVF, ScalarCost, ScalarCost);
VectorizationFactor BestFactor = ScalarFactor;
@@ -7262,7 +7246,19 @@ ElementCount LoopVectorizationPlanner::getBestVF() {
ProfitableVFs.push_back(CurrentFactor);
}
}
- return BestFactor.Width;
+
+#ifndef NDEBUG
+ // Select the optimal vectorization factor according to the legacy cost-model.
+ // This is now only used to verify the decisions by the new VPlan-based
+ // cost-model and will be retired once the VPlan-based cost-model is
+ // stabilized.
+ VectorizationFactor LegacyVF = selectVectorizationFactor();
+ assert(BestFactor.Width == LegacyVF.Width);
+ assert((BestFactor.Width.isScalar() || BestFactor.ScalarCost > 0) &&
+ "when vectorizing, the scalar cost must be non-zero.");
+#endif
+
+ return BestFactor;
}
VPlan &LoopVectorizationPlanner::getBestPlanFor(ElementCount VF) const {
@@ -9883,20 +9879,18 @@ bool LoopVectorizePass::processLoop(Loop *L) {
unsigned UserIC = Hints.getInterleave();
// Plan how to best vectorize, return the best VF and its cost.
- std::optional<VectorizationFactor> MaybeVF = LVP.plan(UserVF, UserIC);
+ LVP.plan(UserVF, UserIC);
+ VectorizationFactor VF = LVP.getBestVF();
+ unsigned IC = 1;
if (ORE->allowExtraAnalysis(LV_NAME))
LVP.emitInvalidCostRemarks(ORE);
- VectorizationFactor VF = VectorizationFactor::Disabled();
- unsigned IC = 1;
-
bool AddBranchWeights =
hasBranchWeightMD(*L->getLoopLatch()->getTerminator());
GeneratedRTChecks Checks(*PSE.getSE(), DT, LI, TTI,
F->getDataLayout(), AddBranchWeights);
- if (MaybeVF) {
- VF = *MaybeVF;
+ if (LVP.hasPlanWithVF(VF.Width)) {
// Select the interleave count.
IC = CM.selectInterleaveCount(VF.Width, VF.Cost);
@@ -9936,7 +9930,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
VectorizeLoop = false;
}
- if (!MaybeVF && UserIC > 1) {
+ if (!LVP.hasPlanWithVF(VF.Width) && UserIC > 1) {
// Tell the user interleaving was avoided up-front, despite being explicitly
// requested.
LLVM_DEBUG(dbgs() << "LV: Ignoring UserIC, because vectorization and "
@@ -10018,11 +10012,8 @@ bool LoopVectorizePass::processLoop(Loop *L) {
InnerLoopUnroller Unroller(L, PSE, LI, DT, TLI, TTI, AC, ORE, IC, &LVL,
&CM, BFI, PSI, Checks);
- ElementCount BestVF = LVP.getBestVF();
- assert(BestVF.isScalar() &&
- "VPlan cost model and legacy cost model disagreed");
- VPlan &BestPlan = LVP.getBestPlanFor(BestVF);
- LVP.executePlan(BestVF, IC, BestPlan, Unroller, DT, false);
+ VPlan &BestPlan = LVP.getBestPlanFor(VF.Width);
+ LVP.executePlan(VF.Width, IC, BestPlan, Unroller, DT, false);
ORE->emit([&]() {
return OptimizationRemark(LV_NAME, "Interleaved", L->getStartLoc(),
@@ -10033,20 +10024,16 @@ bool LoopVectorizePass::processLoop(Loop *L) {
} else {
// If we decided that it is *legal* to vectorize the loop, then do it.
- ElementCount BestVF = LVP.getBestVF();
- LLVM_DEBUG(dbgs() << "VF picked by VPlan cost model: " << BestVF << "\n");
- assert(VF.Width == BestVF &&
- "VPlan cost model and legacy cost model disagreed");
- VPlan &BestPlan = LVP.getBestPlanFor(BestVF);
+ VPlan &BestPlan = LVP.getBestPlanFor(VF.Width);
// Consider vectorizing the epilogue too if it's profitable.
VectorizationFactor EpilogueVF =
- LVP.selectEpilogueVectorizationFactor(BestVF, IC);
+ LVP.selectEpilogueVectorizationFactor(VF.Width, IC);
if (EpilogueVF.Width.isVector()) {
// The first pass vectorizes the main loop and creates a scalar epilogue
// to be vectorized by executing the plan (potentially with a different
// factor) again shortly afterwards.
- EpilogueLoopVectorizationInfo EPI(BestVF, IC, EpilogueVF.Width, 1);
+ EpilogueLoopVectorizationInfo EPI(VF.Width, IC, EpilogueVF.Width, 1);
EpilogueVectorizerMainLoop MainILV(L, PSE, LI, DT, TLI, TTI, AC, ORE,
EPI, &LVL, &CM, BFI, PSI, Checks);
@@ -10141,10 +10128,10 @@ bool LoopVectorizePass::processLoop(Loop *L) {
if (!MainILV.areSafetyChecksAdded())
DisableRuntimeUnroll = true;
} else {
- InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, BestVF,
+ InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, VF.Width,
VF.MinProfitableTripCount, IC, &LVL, &CM, BFI,
PSI, Checks);
- LVP.executePlan(BestVF, IC, BestPlan, LB, DT, false);
+ LVP.executePlan(VF.Width, IC, BestPlan, LB, DT, false);
++LoopsVectorized;
// Add metadata to disable runtime unrolling a scalar loop when there
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll
index 9f70a891efe76b..38af580e25c9cc 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll
@@ -133,7 +133,6 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur
; CHECK-NEXT: LV: Not Interleaving.
; CHECK-NEXT: LV: Interleaving is not beneficial.
; CHECK-NEXT: LV: Found a vectorizable loop (vscale x 4) in <stdin>
-; CHECK-NEXT: VF picked by VPlan cost model: vscale x 4
; CHECK-NEXT: LEV: Epilogue vectorization is not profitable for this loop
; CHECK-NEXT: Executing best plan with VF=vscale x 4, UF=1
; CHECK-NEXT: VPlan 'Final VPlan for VF={vscale x 4},UF>=1' {
@@ -336,7 +335,6 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur
; CHECK-NEXT: LV: Not Interleaving.
; CHECK-NEXT: LV: Interleaving is not beneficial.
; CHECK-NEXT: LV: Found a vectorizable loop (vscale x 4) in <stdin>
-; CHECK-NEXT: VF picked by VPlan cost model: vscale x 4
; CHECK-NEXT: LEV: Epilogue vectorization is not profitable for this loop
; CHECK-NEXT: Executing best plan with VF=vscale x 4, UF=1
; CHECK-NEXT: VPlan 'Final VPlan for VF={vscale x 4},UF>=1' {
>From 7c5c96f806aaa2ab73aeebdbf1ed61a15b2f2e6e Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Mon, 19 Aug 2024 13:11:50 +0100
Subject: [PATCH 2/3] !fixup address comments, thanks!
---
.../Transforms/Vectorize/LoopVectorizationPlanner.h | 2 ++
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 13 ++++++-------
llvm/lib/Transforms/Vectorize/VPlan.cpp | 4 ++++
3 files changed, 12 insertions(+), 7 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
index f5f136f81f539f..3bb7a8e651a3f6 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -451,12 +451,14 @@ class LoopVectorizationPlanner {
VPRecipeBuilder &RecipeBuilder,
ElementCount MinVF);
+#ifndef NDEBUG
/// \return The most profitable vectorization factor for the available VPlans
/// and the cost of that VF.
/// This is now only used to verify the decisions by the new VPlan-based
/// cost-model and will be retired once the VPlan-based cost-model is
/// stabilized.
VectorizationFactor selectVectorizationFactor();
+#endif
/// Returns true if the per-lane cost of VectorizationFactor A is lower than
/// that of B.
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index ffee8a2341e778..809a9d21ce32f7 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -4546,6 +4546,7 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF,
return false;
}
+#ifndef NDEBUG
VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() {
InstructionCost ExpectedCost = CM.expectedCost(ElementCount::getFixed(1));
LLVM_DEBUG(dbgs() << "LV: Scalar loop costs: " << ExpectedCost << ".\n");
@@ -4578,7 +4579,6 @@ VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() {
InstructionCost C = CM.expectedCost(VF);
VectorizationFactor Candidate(VF, C, ScalarCost.ScalarCost);
-#ifndef NDEBUG
unsigned AssumedMinimumVscale =
getVScaleForTuning(OrigLoop, TTI).value_or(1);
unsigned Width =
@@ -4591,7 +4591,6 @@ VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() {
LLVM_DEBUG(dbgs() << " (assuming a minimum vscale of "
<< AssumedMinimumVscale << ")");
LLVM_DEBUG(dbgs() << ".\n");
-#endif
if (!ForceVectorization && !willGenerateVectors(*P, VF, TTI)) {
LLVM_DEBUG(
@@ -4621,6 +4620,7 @@ VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() {
LLVM_DEBUG(dbgs() << "LV: Selecting VF: " << ChosenFactor.Width << ".\n");
return ChosenFactor;
}
+#endif
bool LoopVectorizationPlanner::isCandidateForEpilogueVectorization(
ElementCount VF) const {
@@ -7030,7 +7030,6 @@ void LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) {
if (!hasPlanWithVF(UserVF)) {
LLVM_DEBUG(dbgs()
<< "LV: No VPlan could be built for " << UserVF << ".\n");
- return std::nullopt;
}
LLVM_DEBUG(printPlans(dbgs()));
@@ -7236,7 +7235,7 @@ InstructionCost LoopVectorizationPlanner::cost(VPlan &Plan,
return Cost;
}
-ElementCount LoopVectorizationPlanner::computeBestVF() {
+VectorizationFactor LoopVectorizationPlanner::computeBestVF() {
if (VPlans.empty())
return VectorizationFactor::Disabled();
// If there is a single VPlan with a single VF, return it directly.
@@ -7291,7 +7290,7 @@ ElementCount LoopVectorizationPlanner::computeBestVF() {
// cost-model and will be retired once the VPlan-based cost-model is
// stabilized.
VectorizationFactor LegacyVF = selectVectorizationFactor();
- assert(BestFactor.Width == LegacyVF.Width);
+ assert(BestFactor.Width == LegacyVF.Width && " VPlan cost model and legacy cost model disagreed");
assert((BestFactor.Width.isScalar() || BestFactor.ScalarCost > 0) &&
"when vectorizing, the scalar cost must be non-zero.");
#endif
@@ -9824,9 +9823,9 @@ bool LoopVectorizePass::processLoop(Loop *L) {
ElementCount UserVF = Hints.getWidth();
unsigned UserIC = Hints.getInterleave();
- // Plan how to best vectorize, return the best VF and its cost.
+ // Plan how to best vectorize,
LVP.plan(UserVF, UserIC);
- VectorizationFactor VF = LVP.getBestVF();
+ VectorizationFactor VF = LVP.computeBestVF();
unsigned IC = 1;
if (ORE->allowExtraAnalysis(LV_NAME))
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 15acfc00251e29..e9bea93a085790 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -1695,6 +1695,10 @@ VPlan &LoopVectorizationPlanner::getPlanFor(ElementCount VF) const {
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void LoopVectorizationPlanner::printPlans(raw_ostream &O) {
+ if (VPlans.empty()) {
+ O << "LV: Not VPlans built.";
+ return;
+ }
for (const auto &Plan : VPlans)
if (PrintVPlansInDotFormat)
Plan->printDOT(O);
>From 119925475906a70f78944bf933ca80776b2875a3 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Mon, 19 Aug 2024 13:29:58 +0100
Subject: [PATCH 3/3] !fixup fix formatting
---
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 809a9d21ce32f7..0eca685dd0ee89 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7290,7 +7290,8 @@ VectorizationFactor LoopVectorizationPlanner::computeBestVF() {
// cost-model and will be retired once the VPlan-based cost-model is
// stabilized.
VectorizationFactor LegacyVF = selectVectorizationFactor();
- assert(BestFactor.Width == LegacyVF.Width && " VPlan cost model and legacy cost model disagreed");
+ assert(BestFactor.Width == LegacyVF.Width &&
+ " VPlan cost model and legacy cost model disagreed");
assert((BestFactor.Width.isScalar() || BestFactor.ScalarCost > 0) &&
"when vectorizing, the scalar cost must be non-zero.");
#endif
More information about the llvm-commits
mailing list