[llvm] f40a790 - [LV] Move selecting vectorization factor logic to LVP (NFC).
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Sat May 13 04:29:07 PDT 2023
Author: Florian Hahn
Date: 2023-05-13T12:28:14+01:00
New Revision: f40a7901d1762d2715e14074825bd195b3675874
URL: https://github.com/llvm/llvm-project/commit/f40a7901d1762d2715e14074825bd195b3675874
DIFF: https://github.com/llvm/llvm-project/commit/f40a7901d1762d2715e14074825bd195b3675874.diff
LOG: [LV] Move selecting vectorization factor logic to LVP (NFC).
Split off from D143938. This moves the planning logic to select the
vectorization factor to LoopVectorizationPlanner as a step towards only
computing costs for individual VFs in LoopVectorizationCostModel and do
planning in LVP.
Reviewed By: Ayal
Differential Revision: https://reviews.llvm.org/D150197
Added:
Modified:
llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
index 106a2476d070..d67772129cef 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -25,6 +25,7 @@
#define LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONPLANNER_H
#include "VPlan.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/Support/InstructionCost.h"
namespace llvm {
@@ -217,6 +218,16 @@ struct VectorizationFactor {
}
};
+/// ElementCountComparator creates a total ordering for ElementCount
+/// for the purposes of using it in a set structure.
+struct ElementCountComparator {
+ bool operator()(const ElementCount &LHS, const ElementCount &RHS) const {
+ return std::make_tuple(LHS.isScalable(), LHS.getKnownMinValue()) <
+ std::make_tuple(RHS.isScalable(), RHS.getKnownMinValue());
+ }
+};
+using ElementCountSet = SmallSet<ElementCount, 16, ElementCountComparator>;
+
/// A class that represents two vectorization factors (initialized with 0 by
/// default). One for fixed-width vectorization and one for scalable
/// vectorization. This can be used by the vectorizer to choose from a range of
@@ -280,6 +291,9 @@ class LoopVectorizationPlanner {
SmallVector<VPlanPtr, 4> VPlans;
+ /// Profitable vector factors.
+ SmallVector<VectorizationFactor, 8> ProfitableVFs;
+
/// A builder used to construct the current plan.
VPBuilder Builder;
@@ -342,6 +356,12 @@ class LoopVectorizationPlanner {
/// Check if the number of runtime checks exceeds the threshold.
bool requiresTooManyRuntimeChecks() const;
+ /// \return The most profitable vectorization factor and the cost of that VF
+ /// for vectorizing the epilogue. Returns VectorizationFactor::Disabled if
+ /// epilogue vectorization is not supported for the loop.
+ VectorizationFactor
+ selectEpilogueVectorizationFactor(const ElementCount MaxVF);
+
protected:
/// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive,
/// according to the information gathered by Legal when it checked if it is
@@ -376,6 +396,20 @@ class LoopVectorizationPlanner {
void adjustRecipesForReductions(VPBasicBlock *LatchVPBB, VPlanPtr &Plan,
VPRecipeBuilder &RecipeBuilder,
ElementCount MinVF);
+
+ /// \return The most profitable vectorization factor and the cost of that VF.
+ /// This method checks every VF in \p CandidateVFs.
+ VectorizationFactor
+ selectVectorizationFactor(const ElementCountSet &CandidateVFs);
+
+ /// Returns true if the per-lane cost of VectorizationFactor A is lower than
+ /// that of B.
+ bool isMoreProfitable(const VectorizationFactor &A,
+ const VectorizationFactor &B) const;
+
+ /// Determines if we have the infrastructure to vectorize the loop and its
+ /// epilogue, assuming the main loop is vectorized by \p VF.
+ bool isCandidateForEpilogueVectorization(const ElementCount VF) const;
};
} // namespace llvm
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index f51679eeadb3..0ca5649c8e49 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1169,16 +1169,6 @@ enum ScalarEpilogueLowering {
CM_ScalarEpilogueNotAllowedUsePredicate
};
-/// ElementCountComparator creates a total ordering for ElementCount
-/// for the purposes of using it in a set structure.
-struct ElementCountComparator {
- bool operator()(const ElementCount &LHS, const ElementCount &RHS) const {
- return std::make_tuple(LHS.isScalable(), LHS.getKnownMinValue()) <
- std::make_tuple(RHS.isScalable(), RHS.getKnownMinValue());
- }
-};
-using ElementCountSet = SmallSet<ElementCount, 16, ElementCountComparator>;
-
using InstructionVFPair = std::pair<Instruction *, ElementCount>;
/// LoopVectorizationCostModel - estimates the expected speedups due to
@@ -1212,18 +1202,6 @@ class LoopVectorizationCostModel {
/// otherwise.
bool runtimeChecksRequired();
- /// \return The most profitable vectorization factor and the cost of that VF.
- /// This method checks every VF in \p CandidateVFs.
- VectorizationFactor
- selectVectorizationFactor(const ElementCountSet &CandidateVFs);
-
- /// \return The most profitable vectorization factor and the cost of that VF
- /// for vectorizing the epilogue. Returns VectorizationFactor::Disabled if
- /// epilogue vectorization is not supported for the loop.
- VectorizationFactor
- selectEpilogueVectorizationFactor(const ElementCount MaxVF,
- const LoopVectorizationPlanner &LVP);
-
/// Setup cost-based decisions for user vectorization factor.
/// \return true if the UserVF is a feasible VF to be chosen.
bool selectUserVectorizationFactor(ElementCount UserVF) {
@@ -1641,11 +1619,6 @@ class LoopVectorizationCostModel {
Function **Variant,
bool *NeedsMask = nullptr) const;
- /// Returns true if the per-lane cost of VectorizationFactor A is lower than
- /// that of B.
- bool isMoreProfitable(const VectorizationFactor &A,
- const VectorizationFactor &B) const;
-
/// Invalidates decisions already taken by the cost model.
void invalidateCostModelingDecisions() {
WideningDecisions.clear();
@@ -1653,6 +1626,29 @@ class LoopVectorizationCostModel {
Scalars.clear();
}
+ /// The vectorization cost is a combination of the cost itself and a boolean
+ /// indicating whether any of the contributing operations will actually
+ /// operate on vector values after type legalization in the backend. If this
+ /// latter value is false, then all operations will be scalarized (i.e. no
+ /// vectorization has actually taken place).
+ using VectorizationCostTy = std::pair<InstructionCost, bool>;
+
+ /// Returns the expected execution cost. The unit of the cost does
+ /// not matter because we use the 'cost' units to compare
diff erent
+ /// vector widths. The cost that is returned is *not* normalized by
+ /// the factor width. If \p Invalid is not nullptr, this function
+ /// will add a pair(Instruction*, ElementCount) to \p Invalid for
+ /// each instruction that has an Invalid cost for the given VF.
+ VectorizationCostTy
+ expectedCost(ElementCount VF,
+ SmallVectorImpl<InstructionVFPair> *Invalid = nullptr);
+
+ bool hasPredStores() const { return NumPredStores > 0; }
+
+ /// Returns true if epilogue vectorization is considered profitable, and
+ /// false otherwise.
+ /// \p VF is the vectorization factor chosen for the original loop.
+ bool isEpilogueVectorizationProfitable(const ElementCount VF) const;
private:
unsigned NumPredStores = 0;
@@ -1679,23 +1675,6 @@ class LoopVectorizationCostModel {
/// of elements.
ElementCount getMaxLegalScalableVF(unsigned MaxSafeElements);
- /// The vectorization cost is a combination of the cost itself and a boolean
- /// indicating whether any of the contributing operations will actually
- /// operate on vector values after type legalization in the backend. If this
- /// latter value is false, then all operations will be scalarized (i.e. no
- /// vectorization has actually taken place).
- using VectorizationCostTy = std::pair<InstructionCost, bool>;
-
- /// Returns the expected execution cost. The unit of the cost does
- /// not matter because we use the 'cost' units to compare
diff erent
- /// vector widths. The cost that is returned is *not* normalized by
- /// the factor width. If \p Invalid is not nullptr, this function
- /// will add a pair(Instruction*, ElementCount) to \p Invalid for
- /// each instruction that has an Invalid cost for the given VF.
- VectorizationCostTy
- expectedCost(ElementCount VF,
- SmallVectorImpl<InstructionVFPair> *Invalid = nullptr);
-
/// Returns the execution time cost of an instruction for a given vector
/// width. Vector width of one means scalar.
VectorizationCostTy getInstructionCost(Instruction *I, ElementCount VF);
@@ -1858,15 +1837,6 @@ class LoopVectorizationCostModel {
Ops, [this, VF](Value *V) { return this->needsExtract(V, VF); }));
}
- /// Determines if we have the infrastructure to vectorize the loop and its
- /// epilogue, assuming the main loop is vectorized by \p VF.
- bool isCandidateForEpilogueVectorization(const ElementCount VF) const;
-
- /// Returns true if epilogue vectorization is considered profitable, and
- /// false otherwise.
- /// \p VF is the vectorization factor chosen for the original loop.
- bool isEpilogueVectorizationProfitable(const ElementCount VF) const;
-
public:
/// The loop that we evaluate.
Loop *TheLoop;
@@ -1912,9 +1882,6 @@ class LoopVectorizationCostModel {
/// All element types found in the loop.
SmallPtrSet<Type *, 16> ElementTypesInLoop;
-
- /// Profitable vector factors.
- SmallVector<VectorizationFactor, 8> ProfitableVFs;
};
} // end namespace llvm
@@ -5353,12 +5320,12 @@ getVScaleForTuning(const Loop *L, const TargetTransformInfo &TTI) {
return TTI.getVScaleForTuning();
}
-bool LoopVectorizationCostModel::isMoreProfitable(
+bool LoopVectorizationPlanner::isMoreProfitable(
const VectorizationFactor &A, const VectorizationFactor &B) const {
InstructionCost CostA = A.Cost;
InstructionCost CostB = B.Cost;
- unsigned MaxTripCount = PSE.getSE()->getSmallConstantMaxTripCount(TheLoop);
+ unsigned MaxTripCount = PSE.getSE()->getSmallConstantMaxTripCount(OrigLoop);
if (!A.Width.isScalable() && !B.Width.isScalable() && MaxTripCount) {
// If the trip count is a known (possibly small) constant, the trip count
@@ -5372,9 +5339,9 @@ bool LoopVectorizationCostModel::isMoreProfitable(
auto GetCostForTC = [MaxTripCount, this](unsigned VF,
InstructionCost VectorCost,
InstructionCost ScalarCost) {
- return foldTailByMasking() ? VectorCost * divideCeil(MaxTripCount, VF)
- : VectorCost * (MaxTripCount / VF) +
- ScalarCost * (MaxTripCount % VF);
+ return CM.foldTailByMasking() ? VectorCost * divideCeil(MaxTripCount, VF)
+ : VectorCost * (MaxTripCount / VF) +
+ ScalarCost * (MaxTripCount % VF);
};
auto RTCostA = GetCostForTC(A.Width.getFixedValue(), CostA, A.ScalarCost);
auto RTCostB = GetCostForTC(B.Width.getFixedValue(), CostB, B.ScalarCost);
@@ -5385,7 +5352,7 @@ bool LoopVectorizationCostModel::isMoreProfitable(
// Improve estimate for the vector width if it is scalable.
unsigned EstimatedWidthA = A.Width.getKnownMinValue();
unsigned EstimatedWidthB = B.Width.getKnownMinValue();
- if (std::optional<unsigned> VScale = getVScaleForTuning(TheLoop, TTI)) {
+ if (std::optional<unsigned> VScale = getVScaleForTuning(OrigLoop, TTI)) {
if (A.Width.isScalable())
EstimatedWidthA *= *VScale;
if (B.Width.isScalable())
@@ -5468,9 +5435,10 @@ static void emitInvalidCostRemarks(SmallVector<InstructionVFPair> InvalidCosts,
} while (!Tail.empty());
}
-VectorizationFactor LoopVectorizationCostModel::selectVectorizationFactor(
+VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor(
const ElementCountSet &VFCandidates) {
- InstructionCost ExpectedCost = expectedCost(ElementCount::getFixed(1)).first;
+ InstructionCost ExpectedCost =
+ CM.expectedCost(ElementCount::getFixed(1)).first;
LLVM_DEBUG(dbgs() << "LV: Scalar loop costs: " << ExpectedCost << ".\n");
assert(ExpectedCost.isValid() && "Unexpected invalid cost for scalar loop");
assert(VFCandidates.count(ElementCount::getFixed(1)) &&
@@ -5480,7 +5448,7 @@ VectorizationFactor LoopVectorizationCostModel::selectVectorizationFactor(
ExpectedCost);
VectorizationFactor ChosenFactor = ScalarCost;
- bool ForceVectorization = Hints->getForce() == LoopVectorizeHints::FK_Enabled;
+ bool ForceVectorization = Hints.getForce() == LoopVectorizeHints::FK_Enabled;
if (ForceVectorization && VFCandidates.size() > 1) {
// Ignore scalar width, because the user explicitly wants vectorization.
// Initialize cost to max so that VF = 2 is, at least, chosen during cost
@@ -5494,12 +5462,13 @@ VectorizationFactor LoopVectorizationCostModel::selectVectorizationFactor(
if (i.isScalar())
continue;
- VectorizationCostTy C = expectedCost(i, &InvalidCosts);
+ LoopVectorizationCostModel::VectorizationCostTy C =
+ CM.expectedCost(i, &InvalidCosts);
VectorizationFactor Candidate(i, C.first, ScalarCost.ScalarCost);
#ifndef NDEBUG
unsigned AssumedMinimumVscale = 1;
- if (std::optional<unsigned> VScale = getVScaleForTuning(TheLoop, TTI))
+ if (std::optional<unsigned> VScale = getVScaleForTuning(OrigLoop, TTI))
AssumedMinimumVscale = *VScale;
unsigned Width =
Candidate.Width.isScalable()
@@ -5528,12 +5497,13 @@ VectorizationFactor LoopVectorizationCostModel::selectVectorizationFactor(
ChosenFactor = Candidate;
}
- emitInvalidCostRemarks(InvalidCosts, ORE, TheLoop);
+ emitInvalidCostRemarks(InvalidCosts, ORE, OrigLoop);
- if (!EnableCondStoresVectorization && NumPredStores) {
- reportVectorizationFailure("There are conditional stores.",
+ if (!EnableCondStoresVectorization && CM.hasPredStores()) {
+ reportVectorizationFailure(
+ "There are conditional stores.",
"store that is conditionally executed prevents vectorization",
- "ConditionalStore", ORE, TheLoop);
+ "ConditionalStore", ORE, OrigLoop);
ChosenFactor = ScalarCost;
}
@@ -5545,11 +5515,11 @@ VectorizationFactor LoopVectorizationCostModel::selectVectorizationFactor(
return ChosenFactor;
}
-bool LoopVectorizationCostModel::isCandidateForEpilogueVectorization(
+bool LoopVectorizationPlanner::isCandidateForEpilogueVectorization(
ElementCount VF) const {
// Cross iteration phis such as reductions need special handling and are
// currently unsupported.
- if (any_of(TheLoop->getHeader()->phis(),
+ if (any_of(OrigLoop->getHeader()->phis(),
[&](PHINode &Phi) { return Legal->isFixedOrderRecurrence(&Phi); }))
return false;
@@ -5558,20 +5528,20 @@ bool LoopVectorizationCostModel::isCandidateForEpilogueVectorization(
for (const auto &Entry : Legal->getInductionVars()) {
// Look for uses of the value of the induction at the last iteration.
Value *PostInc =
- Entry.first->getIncomingValueForBlock(TheLoop->getLoopLatch());
+ Entry.first->getIncomingValueForBlock(OrigLoop->getLoopLatch());
for (User *U : PostInc->users())
- if (!TheLoop->contains(cast<Instruction>(U)))
+ if (!OrigLoop->contains(cast<Instruction>(U)))
return false;
// Look for uses of penultimate value of the induction.
for (User *U : Entry.first->users())
- if (!TheLoop->contains(cast<Instruction>(U)))
+ if (!OrigLoop->contains(cast<Instruction>(U)))
return false;
}
// Epilogue vectorization code has not been auditted to ensure it handles
// non-latch exits properly. It may be fine, but it needs auditted and
// tested.
- if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch())
+ if (OrigLoop->getExitingBlock() != OrigLoop->getLoopLatch())
return false;
return true;
@@ -5601,16 +5571,15 @@ bool LoopVectorizationCostModel::isEpilogueVectorizationProfitable(
return false;
}
-VectorizationFactor
-LoopVectorizationCostModel::selectEpilogueVectorizationFactor(
- const ElementCount MainLoopVF, const LoopVectorizationPlanner &LVP) {
+VectorizationFactor LoopVectorizationPlanner::selectEpilogueVectorizationFactor(
+ const ElementCount MainLoopVF) {
VectorizationFactor Result = VectorizationFactor::Disabled();
if (!EnableEpilogueVectorization) {
LLVM_DEBUG(dbgs() << "LEV: Epilogue vectorization is disabled.\n");
return Result;
}
- if (!isScalarEpilogueAllowed()) {
+ if (!CM.isScalarEpilogueAllowed()) {
LLVM_DEBUG(dbgs() << "LEV: Unable to vectorize epilogue because no "
"epilogue is allowed.\n");
return Result;
@@ -5627,7 +5596,7 @@ LoopVectorizationCostModel::selectEpilogueVectorizationFactor(
if (EpilogueVectorizationForceVF > 1) {
LLVM_DEBUG(dbgs() << "LEV: Epilogue vectorization factor is forced.\n");
ElementCount ForcedEC = ElementCount::getFixed(EpilogueVectorizationForceVF);
- if (LVP.hasPlanWithVF(ForcedEC))
+ if (hasPlanWithVF(ForcedEC))
return {ForcedEC, 0, 0};
else {
LLVM_DEBUG(dbgs() << "LEV: Epilogue vectorization forced factor is not "
@@ -5636,14 +5605,14 @@ LoopVectorizationCostModel::selectEpilogueVectorizationFactor(
}
}
- if (TheLoop->getHeader()->getParent()->hasOptSize() ||
- TheLoop->getHeader()->getParent()->hasMinSize()) {
+ if (OrigLoop->getHeader()->getParent()->hasOptSize() ||
+ OrigLoop->getHeader()->getParent()->hasMinSize()) {
LLVM_DEBUG(
dbgs() << "LEV: Epilogue vectorization skipped due to opt for size.\n");
return Result;
}
- if (!isEpilogueVectorizationProfitable(MainLoopVF)) {
+ if (!CM.isEpilogueVectorizationProfitable(MainLoopVF)) {
LLVM_DEBUG(dbgs() << "LEV: Epilogue vectorization is not profitable for "
"this loop\n");
return Result;
@@ -5655,7 +5624,7 @@ LoopVectorizationCostModel::selectEpilogueVectorizationFactor(
ElementCount EstimatedRuntimeVF = MainLoopVF;
if (MainLoopVF.isScalable()) {
EstimatedRuntimeVF = ElementCount::getFixed(MainLoopVF.getKnownMinValue());
- if (std::optional<unsigned> VScale = getVScaleForTuning(TheLoop, TTI))
+ if (std::optional<unsigned> VScale = getVScaleForTuning(OrigLoop, TTI))
EstimatedRuntimeVF *= *VScale;
}
@@ -5664,7 +5633,7 @@ LoopVectorizationCostModel::selectEpilogueVectorizationFactor(
ElementCount::isKnownLT(NextVF.Width, EstimatedRuntimeVF)) ||
ElementCount::isKnownLT(NextVF.Width, MainLoopVF)) &&
(Result.Width.isScalar() || isMoreProfitable(NextVF, Result)) &&
- LVP.hasPlanWithVF(NextVF.Width))
+ hasPlanWithVF(NextVF.Width))
Result = NextVF;
if (Result != VectorizationFactor::Disabled())
@@ -7624,7 +7593,7 @@ LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) {
return VectorizationFactor::Disabled();
// Select the optimal vectorization factor.
- VectorizationFactor VF = CM.selectVectorizationFactor(VFCandidates);
+ VectorizationFactor VF = selectVectorizationFactor(VFCandidates);
assert((VF.Width.isScalar() || VF.ScalarCost > 0) && "when vectorizing, the scalar cost must be non-zero.");
if (!hasPlanWithVF(VF.Width)) {
LLVM_DEBUG(dbgs() << "LV: No VPlan could be built for " << VF.Width
@@ -10392,7 +10361,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
// Consider vectorizing the epilogue too if it's profitable.
VectorizationFactor EpilogueVF =
- CM.selectEpilogueVectorizationFactor(VF.Width, LVP);
+ LVP.selectEpilogueVectorizationFactor(VF.Width);
if (EpilogueVF.Width.isVector()) {
// The first pass vectorizes the main loop and creates a scalar epilogue
More information about the llvm-commits
mailing list