[llvm] This is a draft for enabling opt-in tail-folding on vectorized epilogue. (PR #181401)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 13 11:01:38 PST 2026
github-actions[bot] wrote:
<!--LLVM CODE FORMAT COMMENT: {clang-format}-->
:warning: C/C++ code formatter, clang-format found issues in your code. :warning:
<details>
<summary>
You can test this locally with the following command:
</summary>
``````````bash
git-clang-format --diff origin/main HEAD --extensions h,cpp -- llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h llvm/lib/Transforms/Utils/BasicBlockUtils.cpp llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h llvm/lib/Transforms/Vectorize/LoopVectorize.cpp llvm/lib/Transforms/Vectorize/VPlan.cpp llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp llvm/lib/Transforms/Vectorize/VPlanTransforms.h --diff_from_common_commit
``````````
:warning:
The reproduction instructions above might return results for more than one PR
in a stack if you are using a stacked PR workflow. You can limit the results by
changing `origin/main` to the base branch/commit you want to compare against.
:warning:
</details>
<details>
<summary>
View the diff from clang-format here.
</summary>
``````````diff
diff --git a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
index b0eb4dc96..cd8ba5b4b 100644
--- a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -152,7 +152,7 @@ void llvm::DeleteDeadBlocks(ArrayRef <BasicBlock *> BBs, DomTreeUpdater *DTU,
#ifndef NDEBUG
// Make sure that all predecessors of each dead block is also dead.
SmallPtrSet<BasicBlock *, 4> Dead(llvm::from_range, BBs);
-// assert(Dead.size() == BBs.size() && "Duplicating blocks?");
+ // assert(Dead.size() == BBs.size() && "Duplicating blocks?");
for (auto *BB : Dead)
for (BasicBlock *Pred : predecessors(BB))
assert(Dead.count(Pred) && "All predecessors must be dead!");
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index 835723362..4a2491958 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -2156,7 +2156,8 @@ void LoopVectorizationLegality::prepareToFoldTailByMasking() {
// Mark all blocks for predication, including those that ordinarily do not
// need predication such as the header block.
for (BasicBlock *BB : TheLoop->blocks()) {
- [[maybe_unused]] bool R = blockCanBePredicated(BB, SafePointers, TailFoldedMaskedOp);
+ [[maybe_unused]] bool R =
+ blockCanBePredicated(BB, SafePointers, TailFoldedMaskedOp);
assert(R && "Must be able to predicate block when tail-folding.");
}
}
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
index 39f8ffbb2..5cb0ac8e3 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -540,7 +540,8 @@ public:
PredicatedScalarEvolution &PSE, const LoopVectorizeHints &Hints,
OptimizationRemarkEmitter *ORE)
: OrigLoop(L), LI(LI), DT(DT), TLI(TLI), TTI(TTI), Legal(Legal), CM(CM),
- EpilogueTailFoldedCM(nullptr), IAI(IAI), PSE(PSE), Hints(Hints), ORE(ORE) {}
+ EpilogueTailFoldedCM(nullptr), IAI(IAI), PSE(PSE), Hints(Hints),
+ ORE(ORE) {}
void setEpilogueTailFoldingCM(LoopVectorizationCostModel *Cost) {
EpilogueTailFoldedCM = Cost;
@@ -552,9 +553,7 @@ public:
bool isEpilogueTailFolded() const;
- void disableEpilogueTailFolding() {
- EpilogueTailFoldedCM = nullptr;
- }
+ void disableEpilogueTailFolding() { EpilogueTailFoldedCM = nullptr; }
/// Build VPlans for the specified \p UserVF and \p UserIC if they are
/// non-zero or all applicable candidate VFs otherwise. If vectorization and
@@ -603,8 +602,10 @@ public:
/// Look through the existing plans and return true if we have one with
/// vectorization factor \p VF.
bool hasPlanWithVF(ElementCount VF, bool ForEpilogue = false) const {
- return any_of((ForEpilogue && isEpilogueTailFolded()) ? EpilogueTailFoldedPlans : VPlans,
- [&](const VPlanPtr &Plan) { return Plan->hasVF(VF); });
+ return any_of((ForEpilogue && isEpilogueTailFolded())
+ ? EpilogueTailFoldedPlans
+ : VPlans,
+ [&](const VPlanPtr &Plan) { return Plan->hasVF(VF); });
}
/// Test a \p Predicate on a \p Range of VF's. Return the value of applying
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 6755f8a81..c87af8731 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -204,35 +204,33 @@ static cl::opt<unsigned> VectorizeMemoryCheckThreshold(
// and predicate the instructions accordingly. If tail-folding fails, there are
// different fallback strategies depending on these values:
namespace PreferPredicateTy {
- enum Option {
- ScalarEpilogue = 0,
- PredicateElseScalarEpilogue,
- PredicateOrDontVectorize,
- PredicatedEpilogue
- };
+enum Option {
+ ScalarEpilogue = 0,
+ PredicateElseScalarEpilogue,
+ PredicateOrDontVectorize,
+ PredicatedEpilogue
+};
} // namespace PreferPredicateTy
static cl::opt<PreferPredicateTy::Option> PreferPredicateOverEpilogue(
"prefer-predicate-over-epilogue",
- cl::init(PreferPredicateTy::ScalarEpilogue),
- cl::Hidden,
+ cl::init(PreferPredicateTy::ScalarEpilogue), cl::Hidden,
cl::desc("Tail-folding and predication preferences over creating a scalar "
"epilogue loop."),
- cl::values(clEnumValN(PreferPredicateTy::ScalarEpilogue,
- "scalar-epilogue",
- "Don't tail-predicate loops, create scalar epilogue"),
- clEnumValN(PreferPredicateTy::PredicateElseScalarEpilogue,
- "predicate-else-scalar-epilogue",
- "prefer tail-folding, create scalar epilogue if tail "
- "folding fails."),
- clEnumValN(PreferPredicateTy::PredicateOrDontVectorize,
- "predicate-dont-vectorize",
- "prefers tail-folding, don't attempt vectorization if "
- "tail-folding fails."),
- clEnumValN(PreferPredicateTy::PredicatedEpilogue,
- "predicated-epilogue",
- "prefers predicated vector epilogues, falling back on "
- "scalar epilogues if it fails.")));
+ cl::values(
+ clEnumValN(PreferPredicateTy::ScalarEpilogue, "scalar-epilogue",
+ "Don't tail-predicate loops, create scalar epilogue"),
+ clEnumValN(PreferPredicateTy::PredicateElseScalarEpilogue,
+ "predicate-else-scalar-epilogue",
+ "prefer tail-folding, create scalar epilogue if tail "
+ "folding fails."),
+ clEnumValN(PreferPredicateTy::PredicateOrDontVectorize,
+ "predicate-dont-vectorize",
+ "prefers tail-folding, don't attempt vectorization if "
+ "tail-folding fails."),
+ clEnumValN(PreferPredicateTy::PredicatedEpilogue, "predicated-epilogue",
+ "prefers predicated vector epilogues, falling back on "
+ "scalar epilogues if it fails.")));
static cl::opt<TailFoldingStyle> ForceTailFoldingStyle(
"force-tail-folding-style", cl::desc("Force the tail folding style"),
@@ -672,18 +670,15 @@ protected:
/// epilogues.
class EpilogueVectorizerMainLoop : public InnerLoopAndEpilogueVectorizer {
public:
- EpilogueVectorizerMainLoop(Loop *OrigLoop, PredicatedScalarEvolution &PSE,
- LoopInfo *LI, DominatorTree *DT,
- const TargetTransformInfo *TTI,
- AssumptionCache *AC,
- EpilogueLoopVectorizationInfo &EPI,
- LoopVectorizationCostModel *CM,
- GeneratedRTChecks &Check, VPlan &Plan,
- bool isEpilogueTailFolded)
+ EpilogueVectorizerMainLoop(
+ Loop *OrigLoop, PredicatedScalarEvolution &PSE, LoopInfo *LI,
+ DominatorTree *DT, const TargetTransformInfo *TTI, AssumptionCache *AC,
+ EpilogueLoopVectorizationInfo &EPI, LoopVectorizationCostModel *CM,
+ GeneratedRTChecks &Check, VPlan &Plan, bool isEpilogueTailFolded)
: InnerLoopAndEpilogueVectorizer(OrigLoop, PSE, LI, DT, TTI, AC, EPI, CM,
Check, Plan, EPI.MainLoopVF,
EPI.MainLoopVF, EPI.MainLoopUF,
- isEpilogueTailFolded) {}
+ isEpilogueTailFolded) {}
/// Implements the interface for creating a vectorized skeleton using the
/// *main loop* strategy (i.e., the first pass of VPlan execution).
BasicBlock *createVectorizedLoopSkeleton() final;
@@ -712,14 +707,11 @@ protected:
// their epilogues.
class EpilogueVectorizerEpilogueLoop : public InnerLoopAndEpilogueVectorizer {
public:
- EpilogueVectorizerEpilogueLoop(Loop *OrigLoop, PredicatedScalarEvolution &PSE,
- LoopInfo *LI, DominatorTree *DT,
- const TargetTransformInfo *TTI,
- AssumptionCache *AC,
- EpilogueLoopVectorizationInfo &EPI,
- LoopVectorizationCostModel *CM,
- GeneratedRTChecks &Checks, VPlan &Plan,
- bool isEpilogueTailFolded)
+ EpilogueVectorizerEpilogueLoop(
+ Loop *OrigLoop, PredicatedScalarEvolution &PSE, LoopInfo *LI,
+ DominatorTree *DT, const TargetTransformInfo *TTI, AssumptionCache *AC,
+ EpilogueLoopVectorizationInfo &EPI, LoopVectorizationCostModel *CM,
+ GeneratedRTChecks &Checks, VPlan &Plan, bool isEpilogueTailFolded)
: InnerLoopAndEpilogueVectorizer(OrigLoop, PSE, LI, DT, TTI, AC, EPI, CM,
Checks, Plan, EPI.EpilogueVF,
EPI.EpilogueVF, EPI.EpilogueUF,
@@ -1339,12 +1331,14 @@ public:
// the cost model may still decide it's not worth it and should fall back
// on a scalar epilogue.
return ScalarEpilogueStatus == CM_ScalarEpilogueAllowed ||
- ScalarEpilogueStatus == CM_ScalarEpilogueNotNeededUsePredicatedEpilogue;
+ ScalarEpilogueStatus ==
+ CM_ScalarEpilogueNotNeededUsePredicatedEpilogue;
}
/// Returns true if tail-folding is preferred over a scalar epilogue.
bool preferPredicatedLoop() const {
- return ScalarEpilogueStatus == CM_ScalarEpilogueNotNeededUsePredicatedBody ||
+ return ScalarEpilogueStatus ==
+ CM_ScalarEpilogueNotNeededUsePredicatedBody ||
ScalarEpilogueStatus == CM_ScalarEpilogueNotAllowedUsePredicate;
}
@@ -1362,7 +1356,7 @@ public:
/// \param UserIC User specific interleave count.
void setTailFoldingStyles(bool IsScalableVF, unsigned UserIC) {
// TODO: Should probably have separate style for the main and epilogue.
-
+
assert(!ChosenTailFoldingStyle && "Tail folding must not be selected yet.");
if (!Legal->canFoldTailByMasking()) {
ChosenTailFoldingStyle = {TailFoldingStyle::None, TailFoldingStyle::None};
@@ -3690,7 +3684,7 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
// none were taken so far.
InterleaveInfo.invalidateGroupsRequiringScalarEpilogue();
}
- bool TailFoldedIntoMainBody =
+ bool TailFoldedIntoMainBody =
ScalarEpilogueStatus != CM_ScalarEpilogueNotNeededUsePredicatedEpilogue;
FixedScalableVFPair MaxFactors =
computeFeasibleMaxVF(MaxTC, UserVF, UserIC, TailFoldedIntoMainBody);
@@ -4536,7 +4530,7 @@ VectorizationFactor LoopVectorizationPlanner::selectEpilogueVectorizationFactor(
}
if (!EpilogueTailFoldedCM &&
- !CM.isEpilogueVectorizationProfitable(MainLoopVF, IC)) {
+ !CM.isEpilogueVectorizationProfitable(MainLoopVF, IC)) {
LLVM_DEBUG(dbgs() << "LEV: Epilogue vectorization is not profitable for "
"this loop\n");
return Result;
@@ -4596,7 +4590,7 @@ VectorizationFactor LoopVectorizationPlanner::selectEpilogueVectorizationFactor(
for (auto &NextVF : ProfitableVFs) {
// Skip candidate VFs without a corresponding VPlan.
if (!hasPlanWithVF(NextVF.Width,
- /*ForEpilogue*/ true))
+ /*ForEpilogue*/ true))
continue;
// Skip candidate VFs with widths >= the (estimated) runtime VF (scalable
@@ -4605,7 +4599,7 @@ VectorizationFactor LoopVectorizationPlanner::selectEpilogueVectorizationFactor(
if ((!NextVF.Width.isScalable() && MainLoopVF.isScalable() &&
ElementCount::isKnownGE(NextVF.Width, EstimatedRuntimeVF)) ||
(NextVF.Width.isScalable() &&
- ElementCount::isKnownGE(NextVF.Width, MainLoopVF)) ||
+ ElementCount::isKnownGE(NextVF.Width, MainLoopVF)) ||
(!NextVF.Width.isScalable() && !MainLoopVF.isScalable() &&
ElementCount::isKnownGT(NextVF.Width, MainLoopVF)))
continue;
@@ -4638,8 +4632,10 @@ VectorizationFactor LoopVectorizationPlanner::selectEpilogueVectorizationFactor(
// TODO: Need to update last flag passed to isMoreProfitable.
if (Result.Width.isScalar() ||
isMoreProfitable(NextVF, Result, MaxTripCount,
- EpilogueTailFoldedCM ? !EpilogueTailFoldedCM->foldTailByMasking() :
- !CM.foldTailByMasking(), /*IsEpilogue*/ true))
+ EpilogueTailFoldedCM
+ ? !EpilogueTailFoldedCM->foldTailByMasking()
+ : !CM.foldTailByMasking(),
+ /*IsEpilogue*/ true))
Result = NextVF;
}
@@ -6912,28 +6908,32 @@ void LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) {
// dbgs() << "Preparing additional predicated version of cost model\n";
EpilogueTailFoldedCM->collectValuesToIgnore();
EpilogueTailFoldedCM->collectElementTypesForWidening();
- EpilogueTailFoldedCM->setTailFoldingStyles(MaxFactors.ScalableVF.isNonZero(), UserIC);
+ EpilogueTailFoldedCM->setTailFoldingStyles(
+ MaxFactors.ScalableVF.isNonZero(), UserIC);
// If the max VF is likely to be 2, then there probably isn't much point
// generating a predicated vector epilogue.
unsigned EstimatedMaxVF = MaxFactors.FixedVF.getFixedValue();
if (MaxFactors.ScalableVF.isNonZero()) {
- unsigned EstimatedMaxScalableVF = estimateElementCount(MaxFactors.ScalableVF, CM.getVScaleForTuning());
+ unsigned EstimatedMaxScalableVF =
+ estimateElementCount(MaxFactors.ScalableVF, CM.getVScaleForTuning());
EstimatedMaxVF = std::max(EstimatedMaxVF, EstimatedMaxScalableVF);
}
- if (EpilogueTailFoldedCM->foldTailByMasking() && !EpilogueTailFoldedCM->InterleaveInfo.hasGroups() && EstimatedMaxVF > 2) {
+ if (EpilogueTailFoldedCM->foldTailByMasking() &&
+ !EpilogueTailFoldedCM->InterleaveInfo.hasGroups() &&
+ EstimatedMaxVF > 2) {
EpilogueTailFoldedCM->copyMinimalBitwidths(CM.getMinimalBitwidths());
// Invalidate interleave groups if all blocks of loop will be predicated.
- if (EpilogueTailFoldedCM->blockNeedsPredicationForAnyReason(OrigLoop->getHeader()) &&
+ if (EpilogueTailFoldedCM->blockNeedsPredicationForAnyReason(
+ OrigLoop->getHeader()) &&
!useMaskedInterleavedAccesses(TTI)) {
- LLVM_DEBUG(
- dbgs()
- << "LV: Invalidate all interleaved groups due to fold-tail by masking "
- "which requires masked-interleaved support.\n");
+ LLVM_DEBUG(dbgs() << "LV: Invalidate all interleaved groups due to "
+ "fold-tail by masking "
+ "which requires masked-interleaved support.\n");
if (EpilogueTailFoldedCM->InterleaveInfo.invalidateGroups())
- // Invalidating interleave groups also requires invalidating all decisions
- // based on them, which includes widening decisions and uniform and scalar
- // values.
+ // Invalidating interleave groups also requires invalidating all
+ // decisions based on them, which includes widening decisions and
+ // uniform and scalar values.
EpilogueTailFoldedCM->invalidateCostModelingDecisions();
}
} else
@@ -6941,7 +6941,8 @@ void LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) {
}
// TODO: Does this only apply for predicated main body?
- if (CM.foldTailByMasking() || (EpilogueTailFoldedCM && EpilogueTailFoldedCM->foldTailByMasking()))
+ if (CM.foldTailByMasking() ||
+ (EpilogueTailFoldedCM && EpilogueTailFoldedCM->foldTailByMasking()))
Legal->prepareToFoldTailByMasking();
ElementCount MaxUserVF =
@@ -7421,55 +7422,59 @@ VectorizationFactor LoopVectorizationPlanner::computeBestVF() {
}
}
-if (EpilogueTailFoldedCM) {
- // Find profitable VFs for vector epilogue.
- ProfitableVFs.clear();
-
- for (auto &P : EpilogueTailFoldedPlans) {
- ArrayRef<ElementCount> VFs(P->vectorFactors().begin(),
- P->vectorFactors().end());
+ if (EpilogueTailFoldedCM) {
+ // Find profitable VFs for vector epilogue.
+ ProfitableVFs.clear();
+
+ for (auto &P : EpilogueTailFoldedPlans) {
+ ArrayRef<ElementCount> VFs(P->vectorFactors().begin(),
+ P->vectorFactors().end());
+
+ SmallVector<VPRegisterUsage, 8> RUs;
+ if (EpilogueTailFoldedCM->useMaxBandwidth(
+ TargetTransformInfo::RGK_ScalableVector) ||
+ EpilogueTailFoldedCM->useMaxBandwidth(
+ TargetTransformInfo::RGK_FixedWidthVector))
+ RUs = calculateRegisterUsageForPlan(
+ *P, VFs, TTI, EpilogueTailFoldedCM->ValuesToIgnore);
+
+ for (unsigned I = 0; I < VFs.size(); I++) {
+ ElementCount VF = VFs[I];
+ if (VF.isScalar())
+ continue;
+ if (!ForceVectorization && !willGenerateVectors(*P, VF, TTI)) {
+ LLVM_DEBUG(
+ dbgs()
+ << "LV: Not considering vector loop of width " << VF
+ << " because it will not generate any vector instructions.\n");
+ continue;
+ }
+ if (EpilogueTailFoldedCM->OptForSize && !ForceVectorization &&
+ hasReplicatorRegion(*P)) {
+ LLVM_DEBUG(
+ dbgs()
+ << "LV: Not considering vector loop of width " << VF
+ << " because it would cause replicated blocks to be generated,"
+ << " which isn't allowed when optimizing for size.\n");
+ continue;
+ }
- SmallVector<VPRegisterUsage, 8> RUs;
- if (EpilogueTailFoldedCM->useMaxBandwidth(TargetTransformInfo::RGK_ScalableVector) ||
- EpilogueTailFoldedCM->useMaxBandwidth(TargetTransformInfo::RGK_FixedWidthVector))
- RUs = calculateRegisterUsageForPlan(*P, VFs, TTI, EpilogueTailFoldedCM->ValuesToIgnore);
+ InstructionCost Cost = cost(*P, VF);
+ VectorizationFactor CurrentFactor(VF, Cost, ScalarCost);
- for (unsigned I = 0; I < VFs.size(); I++) {
- ElementCount VF = VFs[I];
- if (VF.isScalar())
- continue;
- if (!ForceVectorization && !willGenerateVectors(*P, VF, TTI)) {
- LLVM_DEBUG(
- dbgs()
- << "LV: Not considering vector loop of width " << VF
- << " because it will not generate any vector instructions.\n");
- continue;
- }
- if (EpilogueTailFoldedCM->OptForSize && !ForceVectorization && hasReplicatorRegion(*P)) {
- LLVM_DEBUG(
- dbgs()
- << "LV: Not considering vector loop of width " << VF
- << " because it would cause replicated blocks to be generated,"
- << " which isn't allowed when optimizing for size.\n");
- continue;
- }
-
- InstructionCost Cost = cost(*P, VF);
- VectorizationFactor CurrentFactor(VF, Cost, ScalarCost);
+ if (EpilogueTailFoldedCM->shouldConsiderRegPressureForVF(VF) &&
+ RUs[I].exceedsMaxNumRegs(TTI, ForceTargetNumVectorRegs)) {
+ LLVM_DEBUG(dbgs() << "LV(REG): Not considering vector loop of width "
+ << VF << " because it uses too many registers\n");
+ continue;
+ }
- if (EpilogueTailFoldedCM->shouldConsiderRegPressureForVF(VF) &&
- RUs[I].exceedsMaxNumRegs(TTI, ForceTargetNumVectorRegs)) {
- LLVM_DEBUG(dbgs() << "LV(REG): Not considering vector loop of width "
- << VF << " because it uses too many registers\n");
- continue;
+ // If profitable add it to ProfitableVF list.
+ if (isMoreProfitable(CurrentFactor, ScalarFactor, P->hasScalarTail()))
+ ProfitableVFs.push_back(CurrentFactor);
}
-
- // If profitable add it to ProfitableVF list.
- if (isMoreProfitable(CurrentFactor, ScalarFactor, P->hasScalarTail()))
- ProfitableVFs.push_back(CurrentFactor);
}
}
-}
#ifndef NDEBUG
// Select the optimal vectorization factor according to the legacy cost-model.
@@ -7820,12 +7825,10 @@ BasicBlock *EpilogueVectorizerMainLoop::emitIterationCountCheck(
}
if (ForEpilogue && isEpilogueTailFolded) {
- BranchInst &BI =
- *BranchInst::Create(Bypass, VectorPH, Builder.getFalse());
+ BranchInst &BI = *BranchInst::Create(Bypass, VectorPH, Builder.getFalse());
ReplaceInstWithInst(TCCheckBlock->getTerminator(), &BI);
} else {
- BranchInst &BI =
- *BranchInst::Create(Bypass, VectorPH, CheckMinIters);
+ BranchInst &BI = *BranchInst::Create(Bypass, VectorPH, CheckMinIters);
if (hasBranchWeightMD(*OrigLoop->getLoopLatch()->getTerminator()))
setBranchWeights(BI, MinItersBypassWeights, /*IsExpected=*/false);
ReplaceInstWithInst(TCCheckBlock->getTerminator(), &BI);
@@ -8332,8 +8335,9 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
auto MaxVFTimes2 = MaxVF * 2;
for (ElementCount VF = MinVF; ElementCount::isKnownLT(VF, MaxVFTimes2);) {
VFRange SubRange = {VF, MaxVFTimes2};
- if (auto Plan = tryToBuildVPlanWithVPRecipes(&CM,
- std::unique_ptr<VPlan>(VPlan0->duplicate()), SubRange, &LVer)) {
+ if (auto Plan = tryToBuildVPlanWithVPRecipes(
+ &CM, std::unique_ptr<VPlan>(VPlan0->duplicate()), SubRange,
+ &LVer)) {
// Now optimize the initial VPlan.
VPlanTransforms::hoistPredicatedLoads(*Plan, PSE, OrigLoop);
VPlanTransforms::sinkPredicatedStores(*Plan, PSE, OrigLoop);
@@ -8357,12 +8361,13 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
for (ElementCount VF = MinVF; ElementCount::isKnownLT(VF, MaxVFTimes2);) {
VFRange SubRange = {VF, MaxVFTimes2};
if (auto Plan = tryToBuildVPlanWithVPRecipes(
- EpilogueTailFoldedCM, std::unique_ptr<VPlan>(VPlan0->duplicate()), SubRange, &LVer)) {
+ EpilogueTailFoldedCM, std::unique_ptr<VPlan>(VPlan0->duplicate()),
+ SubRange, &LVer)) {
bool HasScalarVF = Plan->hasScalarVFOnly();
// Now optimize the initial VPlan.
if (!HasScalarVF)
- RUN_VPLAN_PASS(VPlanTransforms::truncateToMinimalBitwidths,
- *Plan, EpilogueTailFoldedCM->getMinimalBitwidths());
+ RUN_VPLAN_PASS(VPlanTransforms::truncateToMinimalBitwidths, *Plan,
+ EpilogueTailFoldedCM->getMinimalBitwidths());
RUN_VPLAN_PASS(VPlanTransforms::optimize, *Plan);
assert(!EpilogueTailFoldedCM->foldTailWithEVL());
assert(verifyVPlanIsValid(*Plan) && "VPlan is invalid");
@@ -8594,7 +8599,8 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
// for this VPlan, replace the Recipes widening its memory instructions with a
// single VPInterleaveRecipe at its insertion point.
RUN_VPLAN_PASS(VPlanTransforms::createInterleaveGroups, *Plan,
- InterleaveGroups, RecipeBuilder, Cost->isScalarEpilogueAllowed());
+ InterleaveGroups, RecipeBuilder,
+ Cost->isScalarEpilogueAllowed());
// Replace VPValues for known constant strides.
RUN_VPLAN_PASS(VPlanTransforms::replaceSymbolicStrides, *Plan, PSE,
@@ -8943,7 +8949,8 @@ static ScalarEpilogueLowering getScalarEpilogueLowering(
// 2) If set, obey the directives
if (PreferPredicateOverEpilogue.getNumOccurrences()) {
- if (ForEpilogue && PreferPredicateOverEpilogue == PreferPredicateTy::PredicatedEpilogue)
+ if (ForEpilogue &&
+ PreferPredicateOverEpilogue == PreferPredicateTy::PredicatedEpilogue)
return CM_ScalarEpilogueNotNeededUsePredicatedEpilogue;
switch (PreferPredicateOverEpilogue) {
case PreferPredicateTy::ScalarEpilogue:
@@ -9388,12 +9395,13 @@ static SmallVector<Instruction *> preparePlanForEpilogueVectorLoop(
Value *ResumeV = nullptr;
if (isa<VPActiveLaneMaskPHIRecipe>(&R)) {
// Needs extracting from the start value ActiveLaneMask instruction.
- auto *ALM = cast<VPInstruction>(cast<VPActiveLaneMaskPHIRecipe>(&R)->getOperand(0));
+ auto *ALM = cast<VPInstruction>(
+ cast<VPActiveLaneMaskPHIRecipe>(&R)->getOperand(0));
assert(ALM->getOpcode() == VPInstruction::ActiveLaneMask);
assert(IVResumeVal && "must have a resume value for the canonical IV");
VPValue *VPV = Plan.getOrAddLiveIn(IVResumeVal);
ALM->setOperand(0, VPV);
- continue;
+ continue;
}
// TODO: Move setting of resume values to prepareToExecute.
if (auto *ReductionPhi = dyn_cast<VPReductionPHIRecipe>(&R)) {
@@ -9591,12 +9599,13 @@ static void fixScalarResumeValuesFromBypass(LoopVectorizationPlanner &LVP,
// and runtime checks of the main loop, as well as updating various phis. \p
// InstsToMove contains instructions that need to be moved to the preheader of
// the epilogue vector loop.
-static void connectEpilogueVectorLoop(
- LoopVectorizationPlanner &LVP,
- VPlan &EpiPlan, Loop *L, EpilogueLoopVectorizationInfo &EPI,
- DominatorTree *DT, LoopVectorizationLegality &LVL,
- DenseMap<const SCEV *, Value *> &ExpandedSCEVs, GeneratedRTChecks &Checks,
- ArrayRef<Instruction *> InstsToMove) {
+static void
+connectEpilogueVectorLoop(LoopVectorizationPlanner &LVP, VPlan &EpiPlan,
+ Loop *L, EpilogueLoopVectorizationInfo &EPI,
+ DominatorTree *DT, LoopVectorizationLegality &LVL,
+ DenseMap<const SCEV *, Value *> &ExpandedSCEVs,
+ GeneratedRTChecks &Checks,
+ ArrayRef<Instruction *> InstsToMove) {
BasicBlock *VecEpilogueIterationCountCheck =
cast<VPIRBasicBlock>(EpiPlan.getEntry())->getIRBasicBlock();
@@ -9674,12 +9683,13 @@ static void connectEpilogueVectorLoop(
auto IP = VecEpiloguePreHeader->getFirstNonPHIIt();
for (auto *I : InstsToMove)
I->moveBefore(IP);
-
+
// VecEpilogueIterationCountCheck conditionally skips over the epilogue loop
// after executing the main loop. We need to update the resume values of
// inductions and reductions during epilogue vectorization.
- fixScalarResumeValuesFromBypass(LVP, VecEpilogueIterationCountCheck, L, EpiPlan,
- LVL, ExpandedSCEVs, EPI.VectorTripCount);
+ fixScalarResumeValuesFromBypass(LVP, VecEpilogueIterationCountCheck, L,
+ EpiPlan, LVL, ExpandedSCEVs,
+ EPI.VectorTripCount);
}
bool LoopVectorizePass::processLoop(Loop *L) {
@@ -9871,8 +9881,9 @@ bool LoopVectorizePass::processLoop(Loop *L) {
ScalarEpilogueLowering EpilogueTailFoldedSEL =
getScalarEpilogueLowering(F, L, Hints, OptForSize, TTI, TLI, LVL, &IAI,
/*ForEpilogue*/ true);
- LoopVectorizationCostModel EpilogueTailFoldedCM(EpilogueTailFoldedSEL, L, PSE, LI, &LVL, *TTI, TLI, DB, AC, ORE,
- GetBFI, F, &Hints, IAI, OptForSize);
+ LoopVectorizationCostModel EpilogueTailFoldedCM(
+ EpilogueTailFoldedSEL, L, PSE, LI, &LVL, *TTI, TLI, DB, AC, ORE, GetBFI,
+ F, &Hints, IAI, OptForSize);
// Use the planner for vectorization.
LoopVectorizationPlanner LVP(L, LI, DT, TLI, *TTI, &LVL, CM, IAI, PSE, Hints,
@@ -9885,8 +9896,9 @@ bool LoopVectorizePass::processLoop(Loop *L) {
UserIC = 1;
// What about predicated hint?
- if (EpilogueTailFoldedSEL == CM_ScalarEpilogueNotNeededUsePredicatedEpilogue
- && !CM.requiresScalarEpilogue(true)) {
+ if (EpilogueTailFoldedSEL ==
+ CM_ScalarEpilogueNotNeededUsePredicatedEpilogue &&
+ !CM.requiresScalarEpilogue(true)) {
bool HasReductions = !LVL.getReductionVars().empty();
bool HasSelectCmpReductions =
HasReductions &&
@@ -9974,8 +9986,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
"Ignoring user-specified interleave count due to possibly "
"unsafe dependencies in the loop."};
InterleaveLoop = false;
- } else if (!LVP.hasPlanWithVF(VF.Width) &&
- UserIC > 1) {
+ } else if (!LVP.hasPlanWithVF(VF.Width) && UserIC > 1) {
// Tell the user interleaving was avoided up-front, despite being explicitly
// requested.
LLVM_DEBUG(dbgs() << "LV: Ignoring UserIC, because vectorization and "
@@ -10113,16 +10124,19 @@ bool LoopVectorizePass::processLoop(Loop *L) {
// Second pass vectorizes the epilogue and adjusts the control flow
// edges from the first pass.
- EpilogueVectorizerEpilogueLoop EpilogILV(L, PSE, LI, DT, TTI, AC, EPI,
- LVP.isEpilogueTailFolded() ? LVP.getEpilogueTailFoldingCM() : &CM,
- Checks, BestEpiPlan, LVP.isEpilogueTailFolded());
+ EpilogueVectorizerEpilogueLoop EpilogILV(
+ L, PSE, LI, DT, TTI, AC, EPI,
+ LVP.isEpilogueTailFolded() ? LVP.getEpilogueTailFoldingCM() : &CM,
+ Checks, BestEpiPlan, LVP.isEpilogueTailFolded());
SmallVector<Instruction *> InstsToMove = preparePlanForEpilogueVectorLoop(
- BestEpiPlan, L, ExpandedSCEVs, EPI, LVP.isEpilogueTailFolded() ? *LVP.getEpilogueTailFoldingCM() : CM, *PSE.getSE());
+ BestEpiPlan, L, ExpandedSCEVs, EPI,
+ LVP.isEpilogueTailFolded() ? *LVP.getEpilogueTailFoldingCM() : CM,
+ *PSE.getSE());
LVP.executePlan(EPI.EpilogueVF, EPI.EpilogueUF, BestEpiPlan, EpilogILV, DT,
true);
if (!LVP.isEpilogueTailFolded())
- connectEpilogueVectorLoop(LVP, BestEpiPlan, L, EPI, DT, LVL, ExpandedSCEVs,
- Checks, InstsToMove);
+ connectEpilogueVectorLoop(LVP, BestEpiPlan, L, EPI, DT, LVL,
+ ExpandedSCEVs, Checks, InstsToMove);
++LoopsEpilogueVectorized;
} else {
InnerLoopVectorizer LB(L, PSE, LI, DT, TTI, AC, VF.Width, IC, &CM, Checks,
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index a0dc62020..009576adc 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -1598,8 +1598,8 @@ VPlan &LoopVectorizationPlanner::getPlanFor(ElementCount VF,
1 &&
"Multiple VPlans for VF.");
- for (const VPlanPtr &Plan :
- UseEpilogueTailFoldedPlans ? EpilogueTailFoldedPlans : VPlans) {
+ for (const VPlanPtr &Plan :
+ UseEpilogueTailFoldedPlans ? EpilogueTailFoldedPlans : VPlans) {
if (Plan->hasVF(VF))
return *Plan.get();
}
``````````
</details>
https://github.com/llvm/llvm-project/pull/181401
More information about the llvm-commits
mailing list