[llvm] [VPlan] Create header phis once, after constructing VPlan0 (NFC). (PR #168291)
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Sun Dec 14 13:04:22 PST 2025
https://github.com/fhahn updated https://github.com/llvm/llvm-project/pull/168291
>From 5b739c78e8d48bea689091f6b421458c4046775b Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Sun, 16 Nov 2025 21:37:24 +0000
Subject: [PATCH 1/2] [VPlan] Create header phis once, after constructing
VPlan0 (NFC).
---
.../Transforms/Vectorize/LoopVectorize.cpp | 136 ++++--------------
.../Transforms/Vectorize/VPRecipeBuilder.h | 4 -
llvm/lib/Transforms/Vectorize/VPlan.h | 7 +
.../Vectorize/VPlanConstruction.cpp | 102 +++++++++++++
.../Transforms/Vectorize/VPlanTransforms.h | 11 ++
5 files changed, 144 insertions(+), 116 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 082489f70f1c6..dfb39e2b7be20 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1405,6 +1405,11 @@ class LoopVectorizationCostModel {
return InLoopReductions.contains(Phi);
}
+ /// Returns the set of in-loop reduction PHIs.
+ const SmallPtrSetImpl<PHINode *> &getInLoopReductions() const {
+ return InLoopReductions;
+ }
+
/// Returns true if the predicated reduction select should be used to set the
/// incoming value for the reduction phi.
bool usePredicatedReductionSelect() const {
@@ -7683,60 +7688,6 @@ VPWidenMemoryRecipe *VPRecipeBuilder::tryToWidenMemory(VPInstruction *VPI,
Consecutive, Reverse, *VPI, VPI->getDebugLoc());
}
-/// Creates a VPWidenIntOrFpInductionRecipe for \p PhiR. If needed, it will
-/// also insert a recipe to expand the step for the induction recipe.
-static VPWidenIntOrFpInductionRecipe *
-createWidenInductionRecipes(VPInstruction *PhiR,
- const InductionDescriptor &IndDesc, VPlan &Plan,
- ScalarEvolution &SE, Loop &OrigLoop) {
- assert(SE.isLoopInvariant(IndDesc.getStep(), &OrigLoop) &&
- "step must be loop invariant");
-
- VPValue *Start = PhiR->getOperand(0);
- assert((Plan.getLiveIn(IndDesc.getStartValue()) == Start ||
- (SE.isSCEVable(IndDesc.getStartValue()->getType()) &&
- SE.getSCEV(IndDesc.getStartValue()) ==
- vputils::getSCEVExprForVPValue(Start, SE))) &&
- "Start VPValue must match IndDesc's start value");
-
- // It is always safe to copy over the NoWrap and FastMath flags. In
- // particular, when folding tail by masking, the masked-off lanes are never
- // used, so it is safe.
- VPIRFlags Flags = vputils::getFlagsFromIndDesc(IndDesc);
- VPValue *Step =
- vputils::getOrCreateVPValueForSCEVExpr(Plan, IndDesc.getStep());
-
- // Update wide induction increments to use the same step as the corresponding
- // wide induction. This enables detecting induction increments directly in
- // VPlan and removes redundant splats.
- using namespace llvm::VPlanPatternMatch;
- if (match(PhiR->getOperand(1), m_Add(m_Specific(PhiR), m_VPValue())))
- PhiR->getOperand(1)->getDefiningRecipe()->setOperand(1, Step);
-
- PHINode *Phi = cast<PHINode>(PhiR->getUnderlyingInstr());
- return new VPWidenIntOrFpInductionRecipe(Phi, Start, Step, &Plan.getVF(),
- IndDesc, Flags, PhiR->getDebugLoc());
-}
-
-VPHeaderPHIRecipe *
-VPRecipeBuilder::tryToOptimizeInductionPHI(VPInstruction *VPI) {
- auto *Phi = cast<PHINode>(VPI->getUnderlyingInstr());
-
- // Check if this is an integer or fp induction. If so, build the recipe that
- // produces its scalar and vector values.
- if (auto *II = Legal->getIntOrFpInductionDescriptor(Phi))
- return createWidenInductionRecipes(VPI, *II, Plan, *PSE.getSE(), *OrigLoop);
-
- // Check if this is pointer induction. If so, build the recipe for it.
- if (auto *II = Legal->getPointerInductionDescriptor(Phi)) {
- VPValue *Step = vputils::getOrCreateVPValueForSCEVExpr(Plan, II->getStep());
- return new VPWidenPointerInductionRecipe(Phi, VPI->getOperand(0), Step,
- &Plan.getVFxUF(), *II,
- VPI->getDebugLoc());
- }
- return nullptr;
-}
-
VPWidenIntOrFpInductionRecipe *
VPRecipeBuilder::tryToOptimizeInductionTruncate(VPInstruction *VPI,
VFRange &Range) {
@@ -8220,54 +8171,12 @@ bool VPRecipeBuilder::getScaledReductions(
VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(VPSingleDefRecipe *R,
VFRange &Range) {
- // First, check for specific widening recipes that deal with inductions, Phi
- // nodes, calls and memory operations.
- VPRecipeBase *Recipe;
- if (auto *PhiR = dyn_cast<VPPhi>(R)) {
- VPBasicBlock *Parent = PhiR->getParent();
- [[maybe_unused]] VPRegionBlock *LoopRegionOf =
- Parent->getEnclosingLoopRegion();
- assert(LoopRegionOf && LoopRegionOf->getEntry() == Parent &&
- "Non-header phis should have been handled during predication");
- auto *Phi = cast<PHINode>(R->getUnderlyingInstr());
- assert(R->getNumOperands() == 2 && "Must have 2 operands for header phis");
- if ((Recipe = tryToOptimizeInductionPHI(PhiR)))
- return Recipe;
-
- assert((Legal->isReductionVariable(Phi) ||
- Legal->isFixedOrderRecurrence(Phi)) &&
- "can only widen reductions and fixed-order recurrences here");
- VPValue *StartV = R->getOperand(0);
- VPValue *BackedgeValue = R->getOperand(1);
- if (Legal->isReductionVariable(Phi)) {
- const RecurrenceDescriptor &RdxDesc = Legal->getRecurrenceDescriptor(Phi);
- assert(RdxDesc.getRecurrenceStartValue() ==
- Phi->getIncomingValueForBlock(OrigLoop->getLoopPreheader()));
-
- // If the PHI is used by a partial reduction, set the scale factor.
- bool UseInLoopReduction = CM.isInLoopReduction(Phi);
- bool UseOrderedReductions = CM.useOrderedReductions(RdxDesc);
- unsigned ScaleFactor =
- getScalingForReduction(RdxDesc.getLoopExitInstr()).value_or(1);
-
- return new VPReductionPHIRecipe(
- Phi, RdxDesc.getRecurrenceKind(), *StartV, *BackedgeValue,
- getReductionStyle(UseInLoopReduction, UseOrderedReductions,
- ScaleFactor),
- RdxDesc.hasUsesOutsideReductionChain());
- }
-
- // TODO: Currently fixed-order recurrences are modeled as chains of
- // first-order recurrences. If there are no users of the intermediate
- // recurrences in the chain, the fixed order recurrence should be modeled
- // directly, enabling more efficient codegen.
- return new VPFirstOrderRecurrencePHIRecipe(Phi, *StartV, *BackedgeValue);
- }
-
- assert(!R->isPhi() && "only VPPhi nodes expected at this point");
+ // First, check for specific widening recipes that deal with optimizing
+ // truncates, calls and memory operations.
+ assert(!R->isPhi() && "phis must be handled earlier");
+ VPRecipeBase *Recipe;
auto *VPI = cast<VPInstruction>(R);
- Instruction *Instr = R->getUnderlyingInstr();
if (VPI->getOpcode() == Instruction::Trunc &&
(Recipe = tryToOptimizeInductionTruncate(VPI, Range)))
return Recipe;
@@ -8280,6 +8189,7 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(VPSingleDefRecipe *R,
if (VPI->getOpcode() == Instruction::Call)
return tryToWidenCall(VPI, Range);
+ Instruction *Instr = R->getUnderlyingInstr();
if (VPI->getOpcode() == Instruction::Store)
if (auto HistInfo = Legal->getHistogramInfo(cast<StoreInst>(Instr)))
return tryToWidenHistogram(*HistInfo, VPI);
@@ -8327,6 +8237,9 @@ VPRecipeBuilder::tryToCreatePartialReduction(VPInstruction *Reduction,
cast<VPReductionRecipe>(BinOpRecipe)->isPartialReduction()))
std::swap(BinOp, Accumulator);
+ if (auto *RedPhiR = dyn_cast<VPReductionPHIRecipe>(Accumulator))
+ RedPhiR->setVFScaleFactor(ScaleFactor);
+
assert(ScaleFactor ==
vputils::getVFScaleFactor(Accumulator->getDefiningRecipe()) &&
"all accumulators in chain must have same scale factor");
@@ -8374,6 +8287,12 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
OrigLoop, *LI, Legal->getWidestInductionType(),
getDebugLocFromInstOrOperands(Legal->getPrimaryInduction()), PSE, &LVer);
+ // Create recipes for header phis.
+ VPlanTransforms::createHeaderPhiRecipes(
+ *VPlan0, *PSE.getSE(), *OrigLoop, Legal->getInductionVars(),
+ Legal->getReductionVars(), Legal->getFixedOrderRecurrences(),
+ CM.getInLoopReductions(), Hints.allowReordering());
+
auto MaxVFTimes2 = MaxVF * 2;
for (ElementCount VF = MinVF; ElementCount::isKnownLT(VF, MaxVFTimes2);) {
VFRange SubRange = {VF, MaxVFTimes2};
@@ -8496,25 +8415,18 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
// Mapping from VPValues in the initial plan to their widened VPValues. Needed
// temporarily to update created block masks.
DenseMap<VPValue *, VPValue *> Old2New;
+
+ // Now process all other blocks and instructions.
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {
// Convert input VPInstructions to widened recipes.
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
- auto *SingleDef = cast<VPSingleDefRecipe>(&R);
- auto *UnderlyingValue = SingleDef->getUnderlyingValue();
- // Skip recipes that do not need transforming, including canonical IV,
- // wide canonical IV and VPInstructions without underlying values. The
- // latter are added above for masking.
- // FIXME: Migrate code relying on the underlying instruction from VPlan0
- // to construct recipes below to not use the underlying instruction.
- if (isa<VPCanonicalIVPHIRecipe, VPWidenCanonicalIVRecipe, VPBlendRecipe>(
- &R) ||
- (isa<VPInstruction>(&R) && !UnderlyingValue))
+ auto *SingleDef = dyn_cast<VPInstruction>(&R);
+ if (!SingleDef || !SingleDef->getUnderlyingValue())
continue;
- assert(isa<VPInstruction>(&R) && UnderlyingValue && "unsupported recipe");
// TODO: Gradually replace uses of underlying instruction by analyses on
// VPlan.
- Instruction *Instr = cast<Instruction>(UnderlyingValue);
+ Instruction *Instr = cast<Instruction>(SingleDef->getUnderlyingValue());
Builder.setInsertPoint(SingleDef);
// The stores with invariant address inside the loop will be deleted, and
diff --git a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
index 1808be118cd2a..fb96d0c213f3b 100644
--- a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
+++ b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
@@ -94,10 +94,6 @@ class VPRecipeBuilder {
/// recipe that takes an additional VPInstruction for the mask.
VPWidenMemoryRecipe *tryToWidenMemory(VPInstruction *VPI, VFRange &Range);
- /// Check if an induction recipe should be constructed for \p VPI. If so build
- /// and return it. If not, return null.
- VPHeaderPHIRecipe *tryToOptimizeInductionPHI(VPInstruction *VPI);
-
/// Optimize the special case where the operand of \p VPI is a constant
/// integer induction variable.
VPWidenIntOrFpInductionRecipe *
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 60318960353d6..f39fab4256b86 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -2471,6 +2471,13 @@ class VPReductionPHIRecipe : public VPHeaderPHIRecipe,
return Partial ? Partial->VFScaleFactor : 1;
}
+ /// Set the VFScaleFactor for this reduction phi. Can only be set to a factor
+ /// > 1
+ void setVFScaleFactor(unsigned ScaleFactor) {
+ assert(ScaleFactor > 1 && "must set to scale factor > 1");
+ Style = RdxUnordered{ScaleFactor};
+ }
+
/// Returns the number of incoming values, also number of incoming blocks.
/// Note that at the moment, VPWidenPointerInductionRecipe only has a single
/// incoming value, its start value.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
index f9a61969f201f..b56776c462ee7 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
@@ -594,6 +594,108 @@ VPlanTransforms::buildVPlan0(Loop *TheLoop, LoopInfo &LI, Type *InductionTy,
return VPlan0;
}
+/// Creates a VPWidenIntOrFpInductionRecipe or VPWidenPointerInductionRecipe
+/// for \p Phi based on \p IndDesc.
+static VPHeaderPHIRecipe *
+createWidenInductionRecipe(PHINode *Phi, VPPhi *PhiR, VPValue *Start,
+ const InductionDescriptor &IndDesc, VPlan &Plan,
+ ScalarEvolution &SE, Loop &OrigLoop, DebugLoc DL) {
+ assert(SE.isLoopInvariant(IndDesc.getStep(), &OrigLoop) &&
+ "step must be loop invariant");
+ assert((Plan.getLiveIn(IndDesc.getStartValue()) == Start ||
+ (SE.isSCEVable(IndDesc.getStartValue()->getType()) &&
+ SE.getSCEV(IndDesc.getStartValue()) ==
+ vputils::getSCEVExprForVPValue(Start, SE))) &&
+ "Start VPValue must match IndDesc's start value");
+
+ VPValue *Step =
+ vputils::getOrCreateVPValueForSCEVExpr(Plan, IndDesc.getStep());
+
+ if (IndDesc.getKind() == InductionDescriptor::IK_PtrInduction)
+ return new VPWidenPointerInductionRecipe(Phi, Start, Step, &Plan.getVFxUF(),
+ IndDesc, DL);
+
+ // Update wide induction increments to use the same step as the corresponding
+ // wide induction. This enables detecting induction increments directly in
+ // VPlan and removes redundant splats.
+ using namespace llvm::VPlanPatternMatch;
+ if (match(PhiR->getOperand(1), m_Add(m_Specific(PhiR), m_VPValue())))
+ PhiR->getOperand(1)->getDefiningRecipe()->setOperand(1, Step);
+
+ // It is always safe to copy over the NoWrap and FastMath flags. In
+ // particular, when folding tail by masking, the masked-off lanes are never
+ // used, so it is safe.
+ VPIRFlags Flags = vputils::getFlagsFromIndDesc(IndDesc);
+
+ return new VPWidenIntOrFpInductionRecipe(Phi, Start, Step, &Plan.getVF(),
+ IndDesc, Flags, DL);
+}
+
+void VPlanTransforms::createHeaderPhiRecipes(
+ VPlan &Plan, ScalarEvolution &SE, Loop &OrigLoop,
+ const MapVector<PHINode *, InductionDescriptor> &Inductions,
+ const MapVector<PHINode *, RecurrenceDescriptor> &Reductions,
+ const SmallPtrSetImpl<const PHINode *> &FixedOrderRecurrences,
+ const SmallPtrSetImpl<PHINode *> &InLoopReductions, bool AllowReordering) {
+
+ VPBasicBlock *HeaderVPBB = cast<VPBasicBlock>(
+ Plan.getEntry()->getSuccessors()[1]->getSingleSuccessor());
+
+ for (VPRecipeBase &R : make_early_inc_range(*HeaderVPBB)) {
+ if (isa<VPCanonicalIVPHIRecipe>(&R))
+ continue;
+ auto *PhiR = dyn_cast<VPPhi>(&R);
+ if (!PhiR)
+ break;
+
+ // TODO: Gradually replace uses of underlying instruction by analyses on
+ // VPlan.
+ auto *Phi = cast<PHINode>(PhiR->getUnderlyingInstr());
+ assert(PhiR->getNumOperands() == 2 &&
+ "Must have 2 operands for header phis");
+
+ // Extract common values once.
+ VPValue *Start = PhiR->getOperand(0);
+ VPValue *BackedgeValue = PhiR->getOperand(1);
+ DebugLoc DL = PhiR->getDebugLoc();
+
+ VPHeaderPHIRecipe *HeaderPhiR = nullptr;
+ auto InductionIt = Inductions.find(Phi);
+ if (InductionIt != Inductions.end()) {
+ HeaderPhiR = createWidenInductionRecipe(
+ Phi, PhiR, Start, InductionIt->second, Plan, SE, OrigLoop, DL);
+ } else {
+ auto ReductionIt = Reductions.find(Phi);
+ if (ReductionIt != Reductions.end()) {
+ const RecurrenceDescriptor &RdxDesc = ReductionIt->second;
+ assert(RdxDesc.getRecurrenceStartValue() ==
+ Phi->getIncomingValueForBlock(OrigLoop.getLoopPreheader()));
+
+ bool UseOrderedReductions = !AllowReordering && RdxDesc.isOrdered();
+
+ HeaderPhiR = new VPReductionPHIRecipe(
+ Phi, RdxDesc.getRecurrenceKind(), *Start,
+ getReductionStyle(InLoopReductions.contains(Phi),
+ UseOrderedReductions, 1),
+ RdxDesc.hasUsesOutsideReductionChain());
+ HeaderPhiR->addOperand(BackedgeValue);
+ } else {
+ assert(FixedOrderRecurrences.contains(Phi) &&
+ "can only widen reductions and fixed-order recurrences here");
+ // TODO: Currently fixed-order recurrences are modeled as chains of
+ // first-order recurrences. If there are no users of the intermediate
+ // recurrences in the chain, the fixed order recurrence should be
+ // modeled directly, enabling more efficient codegen.
+ HeaderPhiR = new VPFirstOrderRecurrencePHIRecipe(Phi, *Start);
+ HeaderPhiR->addOperand(BackedgeValue);
+ }
+ }
+ HeaderPhiR->insertBefore(PhiR);
+ PhiR->replaceAllUsesWith(HeaderPhiR);
+ PhiR->eraseFromParent();
+ }
+}
+
void VPlanTransforms::handleEarlyExits(VPlan &Plan,
bool HasUncountableEarlyExit) {
auto *MiddleVPBB = cast<VPBasicBlock>(
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index afdf1655b4622..1a3ff4f9b9bbc 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -102,6 +102,17 @@ struct VPlanTransforms {
buildVPlan0(Loop *TheLoop, LoopInfo &LI, Type *InductionTy, DebugLoc IVDL,
PredicatedScalarEvolution &PSE, LoopVersioning *LVer = nullptr);
+ /// Replace VPPhi recipes in \p Plan's header with corresponding
+ /// VPHeaderPHIRecipe subclasses for inductions, reductions, and
+ /// fixed-order recurrences. This processes all header phis and creates
+ /// the appropriate widened recipe for each one.
+ static void createHeaderPhiRecipes(
+ VPlan &Plan, ScalarEvolution &SE, Loop &OrigLoop,
+ const MapVector<PHINode *, InductionDescriptor> &Inductions,
+ const MapVector<PHINode *, RecurrenceDescriptor> &Reductions,
+ const SmallPtrSetImpl<const PHINode *> &FixedOrderRecurrences,
+ const SmallPtrSetImpl<PHINode *> &InLoopReductions, bool AllowReordering);
+
/// Update \p Plan to account for all early exits.
LLVM_ABI_FOR_TEST static void handleEarlyExits(VPlan &Plan,
bool HasUncountableExit);
>From dec335da591f01ae371089c9d936814f3c2dff92 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Sun, 14 Dec 2025 17:28:10 +0000
Subject: [PATCH 2/2] !fixup address comments, thanks!
---
.../Transforms/Vectorize/LoopVectorize.cpp | 33 ++++----
.../Transforms/Vectorize/VPRecipeBuilder.h | 14 ++--
llvm/lib/Transforms/Vectorize/VPlan.h | 2 +-
.../Vectorize/VPlanConstruction.cpp | 79 ++++++++++---------
4 files changed, 64 insertions(+), 64 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index dfb39e2b7be20..2ebaa12bc2e3c 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -8169,11 +8169,12 @@ bool VPRecipeBuilder::getScaledReductions(
return false;
}
-VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(VPSingleDefRecipe *R,
- VFRange &Range) {
+VPRecipeBase *
+VPRecipeBuilder::tryToCreateWidenNonPhiRecipe(VPSingleDefRecipe *R,
+ VFRange &Range) {
+ assert(!R->isPhi() && "phis must be handled earlier");
// First, check for specific widening recipes that deal with optimizing
// truncates, calls and memory operations.
- assert(!R->isPhi() && "phis must be handled earlier");
VPRecipeBase *Recipe;
auto *VPI = cast<VPInstruction>(R);
@@ -8398,8 +8399,8 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
// Construct wide recipes and apply predication for original scalar
// VPInstructions in the loop.
// ---------------------------------------------------------------------------
- VPRecipeBuilder RecipeBuilder(*Plan, OrigLoop, TLI, &TTI, Legal, CM, PSE,
- Builder, BlockMaskCache);
+ VPRecipeBuilder RecipeBuilder(*Plan, OrigLoop, TLI, &TTI, Legal, CM, Builder,
+ BlockMaskCache);
// TODO: Handle partial reductions with EVL tail folding.
if (!CM.foldTailWithEVL())
RecipeBuilder.collectScaledReductions(Range);
@@ -8420,14 +8421,15 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {
// Convert input VPInstructions to widened recipes.
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
- auto *SingleDef = dyn_cast<VPInstruction>(&R);
- if (!SingleDef || !SingleDef->getUnderlyingValue())
+ auto *VPI = dyn_cast<VPInstruction>(&R);
+ if (!VPI || !VPI->getUnderlyingValue())
continue;
// TODO: Gradually replace uses of underlying instruction by analyses on
- // VPlan.
- Instruction *Instr = cast<Instruction>(SingleDef->getUnderlyingValue());
- Builder.setInsertPoint(SingleDef);
+ // VPlan. Migrate code relying on the underlying instruction from VPlan0
+ // to construct recipes below to not use the underlying instruction.
+ Instruction *Instr = cast<Instruction>(VPI->getUnderlyingValue());
+ Builder.setInsertPoint(VPI);
// The stores with invariant address inside the loop will be deleted, and
// in the exit block, a uniform store recipe will be created for the final
@@ -8437,7 +8439,6 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
Legal->isInvariantAddressOfReduction(SI->getPointerOperand())) {
// Only create recipe for the final invariant store of the reduction.
if (Legal->isInvariantStoreOfReduction(SI)) {
- auto *VPI = cast<VPInstruction>(SingleDef);
auto *Recipe = new VPReplicateRecipe(
SI, R.operands(), true /* IsUniform */, nullptr /*Mask*/, *VPI,
*VPI, VPI->getDebugLoc());
@@ -8448,10 +8449,10 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
}
VPRecipeBase *Recipe =
- RecipeBuilder.tryToCreateWidenRecipe(SingleDef, Range);
+ RecipeBuilder.tryToCreateWidenNonPhiRecipe(VPI, Range);
if (!Recipe)
- Recipe = RecipeBuilder.handleReplication(cast<VPInstruction>(SingleDef),
- Range);
+ Recipe =
+ RecipeBuilder.handleReplication(cast<VPInstruction>(VPI), Range);
RecipeBuilder.setRecipe(Instr, Recipe);
if (isa<VPWidenIntOrFpInductionRecipe>(Recipe) && isa<TruncInst>(Instr)) {
@@ -8462,8 +8463,8 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
Builder.insert(Recipe);
}
if (Recipe->getNumDefinedValues() == 1) {
- SingleDef->replaceAllUsesWith(Recipe->getVPSingleValue());
- Old2New[SingleDef] = Recipe->getVPSingleValue();
+ VPI->replaceAllUsesWith(Recipe->getVPSingleValue());
+ Old2New[VPI] = Recipe->getVPSingleValue();
} else {
assert(Recipe->getNumDefinedValues() == 0 &&
"Unexpected multidef recipe");
diff --git a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
index fb96d0c213f3b..59f72ea29c946 100644
--- a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
+++ b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
@@ -63,8 +63,6 @@ class VPRecipeBuilder {
/// The profitablity analysis.
LoopVectorizationCostModel &CM;
- PredicatedScalarEvolution &PSE;
-
VPBuilder &Builder;
/// The mask of each VPBB, generated earlier and used for predicating recipes
@@ -133,11 +131,10 @@ class VPRecipeBuilder {
VPRecipeBuilder(VPlan &Plan, Loop *OrigLoop, const TargetLibraryInfo *TLI,
const TargetTransformInfo *TTI,
LoopVectorizationLegality *Legal,
- LoopVectorizationCostModel &CM,
- PredicatedScalarEvolution &PSE, VPBuilder &Builder,
+ LoopVectorizationCostModel &CM, VPBuilder &Builder,
DenseMap<VPBasicBlock *, VPValue *> &BlockMaskCache)
: Plan(Plan), OrigLoop(OrigLoop), TLI(TLI), TTI(TTI), Legal(Legal),
- CM(CM), PSE(PSE), Builder(Builder), BlockMaskCache(BlockMaskCache) {}
+ CM(CM), Builder(Builder), BlockMaskCache(BlockMaskCache) {}
std::optional<unsigned> getScalingForReduction(const Instruction *ExitInst) {
auto It = ScaledReductionMap.find(ExitInst);
@@ -149,9 +146,10 @@ class VPRecipeBuilder {
/// that are valid so recipes can be formed later.
void collectScaledReductions(VFRange &Range);
- /// Create and return a widened recipe for \p R if one can be created within
- /// the given VF \p Range.
- VPRecipeBase *tryToCreateWidenRecipe(VPSingleDefRecipe *R, VFRange &Range);
+ /// Create and return a widened recipe for a non-phi recipe \p R if one can be
+ /// created within the given VF \p Range.
+ VPRecipeBase *tryToCreateWidenNonPhiRecipe(VPSingleDefRecipe *R,
+ VFRange &Range);
/// Create and return a partial reduction recipe for a reduction instruction
/// along with binary operation and reduction phi operands.
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index f39fab4256b86..6def6bfa3c183 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -2472,7 +2472,7 @@ class VPReductionPHIRecipe : public VPHeaderPHIRecipe,
}
/// Set the VFScaleFactor for this reduction phi. Can only be set to a factor
- /// > 1
+ /// > 1.
void setVFScaleFactor(unsigned ScaleFactor) {
assert(ScaleFactor > 1 && "must set to scale factor > 1");
Style = RdxUnordered{ScaleFactor};
diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
index b56776c462ee7..1e9ccd5582f1c 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
@@ -637,17 +637,14 @@ void VPlanTransforms::createHeaderPhiRecipes(
const MapVector<PHINode *, RecurrenceDescriptor> &Reductions,
const SmallPtrSetImpl<const PHINode *> &FixedOrderRecurrences,
const SmallPtrSetImpl<PHINode *> &InLoopReductions, bool AllowReordering) {
-
+ // Retrieve the header manually from the intial plain-CFG VPlan.
VPBasicBlock *HeaderVPBB = cast<VPBasicBlock>(
Plan.getEntry()->getSuccessors()[1]->getSingleSuccessor());
+ assert(VPDominatorTree(Plan).dominates(HeaderVPBB,
+ HeaderVPBB->getPredecessors()[1]) &&
+ "header must dominate its latch");
- for (VPRecipeBase &R : make_early_inc_range(*HeaderVPBB)) {
- if (isa<VPCanonicalIVPHIRecipe>(&R))
- continue;
- auto *PhiR = dyn_cast<VPPhi>(&R);
- if (!PhiR)
- break;
-
+ auto CreateHeaderPhiRecipe = [&](VPPhi *PhiR) -> VPHeaderPHIRecipe * {
// TODO: Gradually replace uses of underlying instruction by analyses on
// VPlan.
auto *Phi = cast<PHINode>(PhiR->getUnderlyingInstr());
@@ -657,39 +654,43 @@ void VPlanTransforms::createHeaderPhiRecipes(
// Extract common values once.
VPValue *Start = PhiR->getOperand(0);
VPValue *BackedgeValue = PhiR->getOperand(1);
- DebugLoc DL = PhiR->getDebugLoc();
- VPHeaderPHIRecipe *HeaderPhiR = nullptr;
- auto InductionIt = Inductions.find(Phi);
- if (InductionIt != Inductions.end()) {
- HeaderPhiR = createWidenInductionRecipe(
- Phi, PhiR, Start, InductionIt->second, Plan, SE, OrigLoop, DL);
- } else {
- auto ReductionIt = Reductions.find(Phi);
- if (ReductionIt != Reductions.end()) {
- const RecurrenceDescriptor &RdxDesc = ReductionIt->second;
- assert(RdxDesc.getRecurrenceStartValue() ==
- Phi->getIncomingValueForBlock(OrigLoop.getLoopPreheader()));
-
- bool UseOrderedReductions = !AllowReordering && RdxDesc.isOrdered();
-
- HeaderPhiR = new VPReductionPHIRecipe(
- Phi, RdxDesc.getRecurrenceKind(), *Start,
- getReductionStyle(InLoopReductions.contains(Phi),
- UseOrderedReductions, 1),
- RdxDesc.hasUsesOutsideReductionChain());
- HeaderPhiR->addOperand(BackedgeValue);
- } else {
- assert(FixedOrderRecurrences.contains(Phi) &&
- "can only widen reductions and fixed-order recurrences here");
- // TODO: Currently fixed-order recurrences are modeled as chains of
- // first-order recurrences. If there are no users of the intermediate
- // recurrences in the chain, the fixed order recurrence should be
- // modeled directly, enabling more efficient codegen.
- HeaderPhiR = new VPFirstOrderRecurrencePHIRecipe(Phi, *Start);
- HeaderPhiR->addOperand(BackedgeValue);
- }
+ if (FixedOrderRecurrences.contains(Phi)) {
+ // TODO: Currently fixed-order recurrences are modeled as chains of
+ // first-order recurrences. If there are no users of the intermediate
+ // recurrences in the chain, the fixed order recurrence should be
+ // modeled directly, enabling more efficient codegen.
+ return new VPFirstOrderRecurrencePHIRecipe(Phi, *Start, *BackedgeValue);
}
+
+ auto InductionIt = Inductions.find(Phi);
+ if (InductionIt != Inductions.end())
+ return createWidenInductionRecipe(Phi, PhiR, Start, InductionIt->second,
+ Plan, SE, OrigLoop,
+ PhiR->getDebugLoc());
+
+ assert(Reductions.contains(Phi) &&
+ "can only widen reductions and fixed-order recurrences here");
+ const RecurrenceDescriptor &RdxDesc = Reductions.lookup(Phi);
+ assert(RdxDesc.getRecurrenceStartValue() ==
+ Phi->getIncomingValueForBlock(OrigLoop.getLoopPreheader()) &&
+ "incoming value must match start value");
+ bool UseOrderedReductions = !AllowReordering && RdxDesc.isOrdered();
+ return new VPReductionPHIRecipe(
+ Phi, RdxDesc.getRecurrenceKind(), *Start, *BackedgeValue,
+ getReductionStyle(InLoopReductions.contains(Phi), UseOrderedReductions,
+ 1),
+ RdxDesc.hasUsesOutsideReductionChain());
+ };
+
+ for (VPRecipeBase &R : make_early_inc_range(HeaderVPBB->phis())) {
+ if (isa<VPCanonicalIVPHIRecipe>(&R))
+ continue;
+ auto *PhiR = dyn_cast<VPPhi>(&R);
+ if (!PhiR)
+ break;
+
+ VPHeaderPHIRecipe *HeaderPhiR = CreateHeaderPhiRecipe(PhiR);
HeaderPhiR->insertBefore(PhiR);
PhiR->replaceAllUsesWith(HeaderPhiR);
PhiR->eraseFromParent();
More information about the llvm-commits
mailing list