[llvm] 4e04286 - [VPlan] Only use selectVectorizationFactor for cross-check (NFCI). (#103033)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 21 04:09:04 PDT 2024
Author: Florian Hahn
Date: 2024-08-21T13:09:01+02:00
New Revision: 4e04286d61edfb56338ca3a6d0735c5384508b00
URL: https://github.com/llvm/llvm-project/commit/4e04286d61edfb56338ca3a6d0735c5384508b00
DIFF: https://github.com/llvm/llvm-project/commit/4e04286d61edfb56338ca3a6d0735c5384508b00.diff
LOG: [VPlan] Only use selectVectorizationFactor for cross-check (NFCI). (#103033)
Use getBestVF to select VF up-front and only use
selectVectorizationFactor to get the VF legacy VF to check the
vectorization decision matches the VPlan-based cost model.
PR: https://github.com/llvm/llvm-project/pull/103033
Added:
Modified:
llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
llvm/lib/Transforms/Vectorize/VPlan.cpp
llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll
llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll
llvm/test/tools/UpdateTestChecks/update_analyze_test_checks/Inputs/x86-loopvectorize-costmodel.ll.expected
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
index 7d242082172c62..3bb7a8e651a3f6 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -354,9 +354,10 @@ class LoopVectorizationPlanner {
: OrigLoop(L), LI(LI), DT(DT), TLI(TLI), TTI(TTI), Legal(Legal), CM(CM),
IAI(IAI), PSE(PSE), Hints(Hints), ORE(ORE) {}
- /// Plan how to best vectorize, return the best VF and its cost, or
- /// std::nullopt if vectorization and interleaving should be avoided up front.
- std::optional<VectorizationFactor> plan(ElementCount UserVF, unsigned UserIC);
+ /// Build VPlans for the specified \p UserVF and \p UserIC if they are
+ /// non-zero or all applicable candidate VFs otherwise. If vectorization and
+ /// interleaving should be avoided up-front, no plans are generated.
+ void plan(ElementCount UserVF, unsigned UserIC);
/// Use the VPlan-native path to plan how to best vectorize, return the best
/// VF and its cost.
@@ -368,7 +369,7 @@ class LoopVectorizationPlanner {
/// Compute and return the most profitable vectorization factor. Also collect
/// all profitable VFs in ProfitableVFs.
- ElementCount computeBestVF();
+ VectorizationFactor computeBestVF();
/// Generate the IR code for the vectorized loop captured in VPlan \p BestPlan
/// according to the best selected \p VF and \p UF.
@@ -450,12 +451,14 @@ class LoopVectorizationPlanner {
VPRecipeBuilder &RecipeBuilder,
ElementCount MinVF);
+#ifndef NDEBUG
/// \return The most profitable vectorization factor for the available VPlans
/// and the cost of that VF.
/// This is now only used to verify the decisions by the new VPlan-based
/// cost-model and will be retired once the VPlan-based cost-model is
/// stabilized.
VectorizationFactor selectVectorizationFactor();
+#endif
/// Returns true if the per-lane cost of VectorizationFactor A is lower than
/// that of B.
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 364166b3ab5380..e3049da1b22188 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -4546,6 +4546,7 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF,
return false;
}
+#ifndef NDEBUG
VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() {
InstructionCost ExpectedCost = CM.expectedCost(ElementCount::getFixed(1));
LLVM_DEBUG(dbgs() << "LV: Scalar loop costs: " << ExpectedCost << ".\n");
@@ -4578,7 +4579,6 @@ VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() {
InstructionCost C = CM.expectedCost(VF);
VectorizationFactor Candidate(VF, C, ScalarCost.ScalarCost);
-#ifndef NDEBUG
unsigned AssumedMinimumVscale =
getVScaleForTuning(OrigLoop, TTI).value_or(1);
unsigned Width =
@@ -4591,7 +4591,6 @@ VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() {
LLVM_DEBUG(dbgs() << " (assuming a minimum vscale of "
<< AssumedMinimumVscale << ")");
LLVM_DEBUG(dbgs() << ".\n");
-#endif
if (!ForceVectorization && !willGenerateVectors(*P, VF, TTI)) {
LLVM_DEBUG(
@@ -4621,6 +4620,7 @@ VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() {
LLVM_DEBUG(dbgs() << "LV: Selecting VF: " << ChosenFactor.Width << ".\n");
return ChosenFactor;
}
+#endif
bool LoopVectorizationPlanner::isCandidateForEpilogueVectorization(
ElementCount VF) const {
@@ -6985,15 +6985,14 @@ LoopVectorizationPlanner::planInVPlanNativePath(ElementCount UserVF) {
return VectorizationFactor::Disabled();
}
-std::optional<VectorizationFactor>
-LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) {
+void LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) {
assert(OrigLoop->isInnermost() && "Inner loop expected.");
CM.collectValuesToIgnore();
CM.collectElementTypesForWidening();
FixedScalableVFPair MaxFactors = CM.computeMaxVF(UserVF, UserIC);
if (!MaxFactors) // Cases that should not to be vectorized nor interleaved.
- return std::nullopt;
+ return;
// Invalidate interleave groups if all blocks of loop will be predicated.
if (CM.blockNeedsPredicationForAnyReason(OrigLoop->getHeader()) &&
@@ -7028,14 +7027,8 @@ LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) {
if (CM.selectUserVectorizationFactor(UserVF)) {
LLVM_DEBUG(dbgs() << "LV: Using user VF " << UserVF << ".\n");
buildVPlansWithVPRecipes(UserVF, UserVF);
- if (!hasPlanWithVF(UserVF)) {
- LLVM_DEBUG(dbgs()
- << "LV: No VPlan could be built for " << UserVF << ".\n");
- return std::nullopt;
- }
-
LLVM_DEBUG(printPlans(dbgs()));
- return {{UserVF, 0, 0}};
+ return;
} else
reportVectorizationInfo("UserVF ignored because of invalid costs.",
"InvalidCost", ORE, OrigLoop);
@@ -7066,24 +7059,6 @@ LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) {
buildVPlansWithVPRecipes(ElementCount::getScalable(1), MaxFactors.ScalableVF);
LLVM_DEBUG(printPlans(dbgs()));
- if (VPlans.empty())
- return std::nullopt;
- if (all_of(VPlans,
- [](std::unique_ptr<VPlan> &P) { return P->hasScalarVFOnly(); }))
- return VectorizationFactor::Disabled();
-
- // Select the optimal vectorization factor according to the legacy cost-model.
- // This is now only used to verify the decisions by the new VPlan-based
- // cost-model and will be retired once the VPlan-based cost-model is
- // stabilized.
- VectorizationFactor VF = selectVectorizationFactor();
- assert((VF.Width.isScalar() || VF.ScalarCost > 0) && "when vectorizing, the scalar cost must be non-zero.");
- if (!hasPlanWithVF(VF.Width)) {
- LLVM_DEBUG(dbgs() << "LV: No VPlan could be built for " << VF.Width
- << ".\n");
- return std::nullopt;
- }
- return VF;
}
InstructionCost VPCostContext::getLegacyCost(Instruction *UI,
@@ -7255,11 +7230,13 @@ InstructionCost LoopVectorizationPlanner::cost(VPlan &Plan,
return Cost;
}
-ElementCount LoopVectorizationPlanner::computeBestVF() {
+VectorizationFactor LoopVectorizationPlanner::computeBestVF() {
+ if (VPlans.empty())
+ return VectorizationFactor::Disabled();
// If there is a single VPlan with a single VF, return it directly.
VPlan &FirstPlan = *VPlans[0];
if (VPlans.size() == 1 && size(FirstPlan.vectorFactors()) == 1)
- return *FirstPlan.vectorFactors().begin();
+ return {*FirstPlan.vectorFactors().begin(), 0, 0};
ElementCount ScalarVF = ElementCount::getFixed(1);
assert(hasPlanWithVF(ScalarVF) &&
@@ -7267,6 +7244,7 @@ ElementCount LoopVectorizationPlanner::computeBestVF() {
// TODO: Compute scalar cost using VPlan-based cost model.
InstructionCost ScalarCost = CM.expectedCost(ScalarVF);
+ LLVM_DEBUG(dbgs() << "LV: Scalar loop costs: " << ScalarCost << ".\n");
VectorizationFactor ScalarFactor(ScalarVF, ScalarCost, ScalarCost);
VectorizationFactor BestFactor = ScalarFactor;
@@ -7300,7 +7278,20 @@ ElementCount LoopVectorizationPlanner::computeBestVF() {
ProfitableVFs.push_back(CurrentFactor);
}
}
- return BestFactor.Width;
+
+#ifndef NDEBUG
+ // Select the optimal vectorization factor according to the legacy cost-model.
+ // This is now only used to verify the decisions by the new VPlan-based
+ // cost-model and will be retired once the VPlan-based cost-model is
+ // stabilized.
+ VectorizationFactor LegacyVF = selectVectorizationFactor();
+ assert(BestFactor.Width == LegacyVF.Width &&
+ " VPlan cost model and legacy cost model disagreed");
+ assert((BestFactor.Width.isScalar() || BestFactor.ScalarCost > 0) &&
+ "when vectorizing, the scalar cost must be computed.");
+#endif
+
+ return BestFactor;
}
static void AddRuntimeUnrollDisableMetaData(Loop *L) {
@@ -9971,21 +9962,19 @@ bool LoopVectorizePass::processLoop(Loop *L) {
ElementCount UserVF = Hints.getWidth();
unsigned UserIC = Hints.getInterleave();
- // Plan how to best vectorize, return the best VF and its cost.
- std::optional<VectorizationFactor> MaybeVF = LVP.plan(UserVF, UserIC);
+ // Plan how to best vectorize.
+ LVP.plan(UserVF, UserIC);
+ VectorizationFactor VF = LVP.computeBestVF();
+ unsigned IC = 1;
if (ORE->allowExtraAnalysis(LV_NAME))
LVP.emitInvalidCostRemarks(ORE);
- VectorizationFactor VF = VectorizationFactor::Disabled();
- unsigned IC = 1;
-
bool AddBranchWeights =
hasBranchWeightMD(*L->getLoopLatch()->getTerminator());
GeneratedRTChecks Checks(*PSE.getSE(), DT, LI, TTI,
F->getDataLayout(), AddBranchWeights);
- if (MaybeVF) {
- VF = *MaybeVF;
+ if (LVP.hasPlanWithVF(VF.Width)) {
// Select the interleave count.
IC = CM.selectInterleaveCount(VF.Width, VF.Cost);
@@ -10025,7 +10014,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
VectorizeLoop = false;
}
- if (!MaybeVF && UserIC > 1) {
+ if (!LVP.hasPlanWithVF(VF.Width) && UserIC > 1) {
// Tell the user interleaving was avoided up-front, despite being explicitly
// requested.
LLVM_DEBUG(dbgs() << "LV: Ignoring UserIC, because vectorization and "
@@ -10107,11 +10096,8 @@ bool LoopVectorizePass::processLoop(Loop *L) {
InnerLoopUnroller Unroller(L, PSE, LI, DT, TLI, TTI, AC, ORE, IC, &LVL,
&CM, BFI, PSI, Checks);
- ElementCount BestVF = LVP.computeBestVF();
- assert(BestVF.isScalar() &&
- "VPlan cost model and legacy cost model disagreed");
- VPlan &BestPlan = LVP.getPlanFor(BestVF);
- LVP.executePlan(BestVF, IC, BestPlan, Unroller, DT, false);
+ VPlan &BestPlan = LVP.getPlanFor(VF.Width);
+ LVP.executePlan(VF.Width, IC, BestPlan, Unroller, DT, false);
ORE->emit([&]() {
return OptimizationRemark(LV_NAME, "Interleaved", L->getStartLoc(),
@@ -10122,20 +10108,16 @@ bool LoopVectorizePass::processLoop(Loop *L) {
} else {
// If we decided that it is *legal* to vectorize the loop, then do it.
- ElementCount BestVF = LVP.computeBestVF();
- LLVM_DEBUG(dbgs() << "VF picked by VPlan cost model: " << BestVF << "\n");
- assert(VF.Width == BestVF &&
- "VPlan cost model and legacy cost model disagreed");
- VPlan &BestPlan = LVP.getPlanFor(BestVF);
+ VPlan &BestPlan = LVP.getPlanFor(VF.Width);
// Consider vectorizing the epilogue too if it's profitable.
VectorizationFactor EpilogueVF =
- LVP.selectEpilogueVectorizationFactor(BestVF, IC);
+ LVP.selectEpilogueVectorizationFactor(VF.Width, IC);
if (EpilogueVF.Width.isVector()) {
// The first pass vectorizes the main loop and creates a scalar epilogue
// to be vectorized by executing the plan (potentially with a
diff erent
// factor) again shortly afterwards.
- EpilogueLoopVectorizationInfo EPI(BestVF, IC, EpilogueVF.Width, 1);
+ EpilogueLoopVectorizationInfo EPI(VF.Width, IC, EpilogueVF.Width, 1);
EpilogueVectorizerMainLoop MainILV(L, PSE, LI, DT, TLI, TTI, AC, ORE,
EPI, &LVL, &CM, BFI, PSI, Checks);
@@ -10230,10 +10212,10 @@ bool LoopVectorizePass::processLoop(Loop *L) {
if (!MainILV.areSafetyChecksAdded())
DisableRuntimeUnroll = true;
} else {
- InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, BestVF,
+ InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, VF.Width,
VF.MinProfitableTripCount, IC, &LVL, &CM, BFI,
PSI, Checks);
- LVP.executePlan(BestVF, IC, BestPlan, LB, DT, false);
+ LVP.executePlan(VF.Width, IC, BestPlan, LB, DT, false);
++LoopsVectorized;
// Add metadata to disable runtime unrolling a scalar loop when there
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 15acfc00251e29..f487b1de1e5b37 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -1695,6 +1695,10 @@ VPlan &LoopVectorizationPlanner::getPlanFor(ElementCount VF) const {
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void LoopVectorizationPlanner::printPlans(raw_ostream &O) {
+ if (VPlans.empty()) {
+ O << "LV: No VPlans built.\n";
+ return;
+ }
for (const auto &Plan : VPlans)
if (PrintVPlansInDotFormat)
Plan->printDOT(O);
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll
index 9f70a891efe76b..38af580e25c9cc 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll
@@ -133,7 +133,6 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur
; CHECK-NEXT: LV: Not Interleaving.
; CHECK-NEXT: LV: Interleaving is not beneficial.
; CHECK-NEXT: LV: Found a vectorizable loop (vscale x 4) in <stdin>
-; CHECK-NEXT: VF picked by VPlan cost model: vscale x 4
; CHECK-NEXT: LEV: Epilogue vectorization is not profitable for this loop
; CHECK-NEXT: Executing best plan with VF=vscale x 4, UF=1
; CHECK-NEXT: VPlan 'Final VPlan for VF={vscale x 4},UF>=1' {
@@ -336,7 +335,6 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur
; CHECK-NEXT: LV: Not Interleaving.
; CHECK-NEXT: LV: Interleaving is not beneficial.
; CHECK-NEXT: LV: Found a vectorizable loop (vscale x 4) in <stdin>
-; CHECK-NEXT: VF picked by VPlan cost model: vscale x 4
; CHECK-NEXT: LEV: Epilogue vectorization is not profitable for this loop
; CHECK-NEXT: Executing best plan with VF=vscale x 4, UF=1
; CHECK-NEXT: VPlan 'Final VPlan for VF={vscale x 4},UF>=1' {
diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll
index 66a50f6e3f373f..6522ed2b52b4fb 100644
--- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll
+++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll
@@ -154,7 +154,7 @@ exit:
; FOR (for.y) should be moved which is not currently supported.
define i32 @test_chained_first_order_recurrences_4(ptr %base) {
; CHECK-LABEL: 'test_chained_first_order_recurrences_4'
-; CHECK: No VPlan could be built for
+; CHECK: No VPlans built.
entry:
br label %loop
diff --git a/llvm/test/tools/UpdateTestChecks/update_analyze_test_checks/Inputs/x86-loopvectorize-costmodel.ll.expected b/llvm/test/tools/UpdateTestChecks/update_analyze_test_checks/Inputs/x86-loopvectorize-costmodel.ll.expected
index 5aa270e76f4c80..4b146a5c4bc3c5 100644
--- a/llvm/test/tools/UpdateTestChecks/update_analyze_test_checks/Inputs/x86-loopvectorize-costmodel.ll.expected
+++ b/llvm/test/tools/UpdateTestChecks/update_analyze_test_checks/Inputs/x86-loopvectorize-costmodel.ll.expected
@@ -11,13 +11,13 @@ target triple = "x86_64-unknown-linux-gnu"
define void @test() {
; CHECK-LABEL: 'test'
; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
+; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
; CHECK: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4
; CHECK: LV: Found an estimated cost of 3 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4
; CHECK: LV: Found an estimated cost of 3 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4
; CHECK: LV: Found an estimated cost of 5 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4
; CHECK: LV: Found an estimated cost of 22 for VF 32 For instruction: %v0 = load float, ptr %in0, align 4
; CHECK: LV: Found an estimated cost of 92 for VF 64 For instruction: %v0 = load float, ptr %in0, align 4
-; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
;
entry:
br label %for.body
More information about the llvm-commits
mailing list