[llvm] [VPlan] Create header phis once, after constructing VPlan0 (NFC). (PR #168291)
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Sun Nov 23 14:02:58 PST 2025
https://github.com/fhahn updated https://github.com/llvm/llvm-project/pull/168291
>From 8e26db28f374cc8c699c61b627d87aadda00d086 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Sun, 16 Nov 2025 21:37:24 +0000
Subject: [PATCH] [VPlan] Create header phis once, after constructing VPlan0
(NFC).
---
.../Transforms/Vectorize/LoopVectorize.cpp | 122 +++---------------
.../Transforms/Vectorize/VPRecipeBuilder.h | 4 -
llvm/lib/Transforms/Vectorize/VPlan.h | 11 +-
.../Vectorize/VPlanConstruction.cpp | 94 ++++++++++++++
.../Transforms/Vectorize/VPlanTransforms.h | 11 ++
5 files changed, 135 insertions(+), 107 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 277e43a38018e..786f2c8105b08 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1412,6 +1412,11 @@ class LoopVectorizationCostModel {
return InLoopReductions.contains(Phi);
}
+ /// Returns the set of in-loop reduction PHIs.
+ const SmallPtrSetImpl<PHINode *> &getInLoopReductions() const {
+ return InLoopReductions;
+ }
+
/// Returns true if the predicated reduction select should be used to set the
/// incoming value for the reduction phi.
bool usePredicatedReductionSelect() const {
@@ -7627,57 +7632,6 @@ VPWidenMemoryRecipe *VPRecipeBuilder::tryToWidenMemory(VPInstruction *VPI,
Consecutive, Reverse, *VPI, VPI->getDebugLoc());
}
-/// Creates a VPWidenIntOrFpInductionRecipe for \p PhiR. If needed, it will
-/// also insert a recipe to expand the step for the induction recipe.
-static VPWidenIntOrFpInductionRecipe *
-createWidenInductionRecipes(VPInstruction *PhiR,
- const InductionDescriptor &IndDesc, VPlan &Plan,
- ScalarEvolution &SE, Loop &OrigLoop) {
- assert(SE.isLoopInvariant(IndDesc.getStep(), &OrigLoop) &&
- "step must be loop invariant");
-
- VPValue *Start = PhiR->getOperand(0);
- assert(Plan.getLiveIn(IndDesc.getStartValue()) == Start &&
- "Start VPValue must match IndDesc's start value");
-
- // It is always safe to copy over the NoWrap and FastMath flags. In
- // particular, when folding tail by masking, the masked-off lanes are never
- // used, so it is safe.
- VPIRFlags Flags = vputils::getFlagsFromIndDesc(IndDesc);
- VPValue *Step =
- vputils::getOrCreateVPValueForSCEVExpr(Plan, IndDesc.getStep());
-
- // Update wide induction increments to use the same step as the corresponding
- // wide induction. This enables detecting induction increments directly in
- // VPlan and removes redundant splats.
- using namespace llvm::VPlanPatternMatch;
- if (match(PhiR->getOperand(1), m_Add(m_Specific(PhiR), m_VPValue())))
- PhiR->getOperand(1)->getDefiningRecipe()->setOperand(1, Step);
-
- PHINode *Phi = cast<PHINode>(PhiR->getUnderlyingInstr());
- return new VPWidenIntOrFpInductionRecipe(Phi, Start, Step, &Plan.getVF(),
- IndDesc, Flags, PhiR->getDebugLoc());
-}
-
-VPHeaderPHIRecipe *
-VPRecipeBuilder::tryToOptimizeInductionPHI(VPInstruction *VPI) {
- auto *Phi = cast<PHINode>(VPI->getUnderlyingInstr());
-
- // Check if this is an integer or fp induction. If so, build the recipe that
- // produces its scalar and vector values.
- if (auto *II = Legal->getIntOrFpInductionDescriptor(Phi))
- return createWidenInductionRecipes(VPI, *II, Plan, *PSE.getSE(), *OrigLoop);
-
- // Check if this is pointer induction. If so, build the recipe for it.
- if (auto *II = Legal->getPointerInductionDescriptor(Phi)) {
- VPValue *Step = vputils::getOrCreateVPValueForSCEVExpr(Plan, II->getStep());
- return new VPWidenPointerInductionRecipe(Phi, VPI->getOperand(0), Step,
- &Plan.getVFxUF(), *II,
- VPI->getDebugLoc());
- }
- return nullptr;
-}
-
VPWidenIntOrFpInductionRecipe *
VPRecipeBuilder::tryToOptimizeInductionTruncate(VPInstruction *VPI,
VFRange &Range) {
@@ -8186,45 +8140,7 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(VPSingleDefRecipe *R,
// First, check for specific widening recipes that deal with inductions, Phi
// nodes, calls and memory operations.
VPRecipeBase *Recipe;
- if (auto *PhiR = dyn_cast<VPPhi>(R)) {
- VPBasicBlock *Parent = PhiR->getParent();
- [[maybe_unused]] VPRegionBlock *LoopRegionOf =
- Parent->getEnclosingLoopRegion();
- assert(LoopRegionOf && LoopRegionOf->getEntry() == Parent &&
- "Non-header phis should have been handled during predication");
- auto *Phi = cast<PHINode>(R->getUnderlyingInstr());
- assert(R->getNumOperands() == 2 && "Must have 2 operands for header phis");
- if ((Recipe = tryToOptimizeInductionPHI(PhiR)))
- return Recipe;
-
- VPHeaderPHIRecipe *PhiRecipe = nullptr;
- assert((Legal->isReductionVariable(Phi) ||
- Legal->isFixedOrderRecurrence(Phi)) &&
- "can only widen reductions and fixed-order recurrences here");
- VPValue *StartV = R->getOperand(0);
- if (Legal->isReductionVariable(Phi)) {
- const RecurrenceDescriptor &RdxDesc = Legal->getRecurrenceDescriptor(Phi);
- assert(RdxDesc.getRecurrenceStartValue() ==
- Phi->getIncomingValueForBlock(OrigLoop->getLoopPreheader()));
-
- // If the PHI is used by a partial reduction, set the scale factor.
- unsigned ScaleFactor =
- getScalingForReduction(RdxDesc.getLoopExitInstr()).value_or(1);
- PhiRecipe = new VPReductionPHIRecipe(
- Phi, RdxDesc.getRecurrenceKind(), *StartV, CM.isInLoopReduction(Phi),
- CM.useOrderedReductions(RdxDesc), ScaleFactor);
- } else {
- // TODO: Currently fixed-order recurrences are modeled as chains of
- // first-order recurrences. If there are no users of the intermediate
- // recurrences in the chain, the fixed order recurrence should be modeled
- // directly, enabling more efficient codegen.
- PhiRecipe = new VPFirstOrderRecurrencePHIRecipe(Phi, *StartV);
- }
- // Add backedge value.
- PhiRecipe->addOperand(R->getOperand(1));
- return PhiRecipe;
- }
- assert(!R->isPhi() && "only VPPhi nodes expected at this point");
+ assert(!R->isPhi() && "phis must be handled earlier");
auto *VPI = cast<VPInstruction>(R);
Instruction *Instr = R->getUnderlyingInstr();
@@ -8284,6 +8200,9 @@ VPRecipeBuilder::tryToCreatePartialReduction(VPInstruction *Reduction,
if (isa<VPReductionPHIRecipe>(BinOp) || isa<VPPartialReductionRecipe>(BinOp))
std::swap(BinOp, Accumulator);
+ if (auto *RedPhiR = dyn_cast<VPReductionPHIRecipe>(Accumulator))
+ RedPhiR->setVFScaleFactor(ScaleFactor);
+
assert(ScaleFactor ==
vputils::getVFScaleFactor(Accumulator->getDefiningRecipe()) &&
"all accumulators in chain must have same scale factor");
@@ -8331,6 +8250,12 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
OrigLoop, *LI, Legal->getWidestInductionType(),
getDebugLocFromInstOrOperands(Legal->getPrimaryInduction()), PSE, &LVer);
+ // Create recipes for header phis.
+ VPlanTransforms::createHeaderPhiRecipes(
+ *VPlan0, *PSE.getSE(), *OrigLoop, Legal->getInductionVars(),
+ Legal->getReductionVars(), Legal->getFixedOrderRecurrences(),
+ CM.getInLoopReductions(), Hints.allowReordering());
+
auto MaxVFTimes2 = MaxVF * 2;
for (ElementCount VF = MinVF; ElementCount::isKnownLT(VF, MaxVFTimes2);) {
VFRange SubRange = {VF, MaxVFTimes2};
@@ -8451,25 +8376,18 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
// Mapping from VPValues in the initial plan to their widened VPValues. Needed
// temporarily to update created block masks.
DenseMap<VPValue *, VPValue *> Old2New;
+
+ // Now process all other blocks and instructions.
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {
// Convert input VPInstructions to widened recipes.
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
- auto *SingleDef = cast<VPSingleDefRecipe>(&R);
- auto *UnderlyingValue = SingleDef->getUnderlyingValue();
- // Skip recipes that do not need transforming, including canonical IV,
- // wide canonical IV and VPInstructions without underlying values. The
- // latter are added above for masking.
- // FIXME: Migrate code relying on the underlying instruction from VPlan0
- // to construct recipes below to not use the underlying instruction.
- if (isa<VPCanonicalIVPHIRecipe, VPWidenCanonicalIVRecipe, VPBlendRecipe>(
- &R) ||
- (isa<VPInstruction>(&R) && !UnderlyingValue))
+ auto *SingleDef = dyn_cast<VPInstruction>(&R);
+ if (!SingleDef || !SingleDef->getUnderlyingValue())
continue;
- assert(isa<VPInstruction>(&R) && UnderlyingValue && "unsupported recipe");
// TODO: Gradually replace uses of underlying instruction by analyses on
// VPlan.
- Instruction *Instr = cast<Instruction>(UnderlyingValue);
+ Instruction *Instr = cast<Instruction>(SingleDef->getUnderlyingValue());
Builder.setInsertPoint(SingleDef);
// The stores with invariant address inside the loop will be deleted, and
diff --git a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
index 1808be118cd2a..fb96d0c213f3b 100644
--- a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
+++ b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
@@ -94,10 +94,6 @@ class VPRecipeBuilder {
/// recipe that takes an additional VPInstruction for the mask.
VPWidenMemoryRecipe *tryToWidenMemory(VPInstruction *VPI, VFRange &Range);
- /// Check if an induction recipe should be constructed for \p VPI. If so build
- /// and return it. If not, return null.
- VPHeaderPHIRecipe *tryToOptimizeInductionPHI(VPInstruction *VPI);
-
/// Optimize the special case where the operand of \p VPI is a constant
/// integer induction variable.
VPWidenIntOrFpInductionRecipe *
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 405f83a6ce8e5..3e0a08c183690 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -2378,8 +2378,10 @@ struct VPFirstOrderRecurrencePHIRecipe : public VPHeaderPHIRecipe {
VP_CLASSOF_IMPL(VPDef::VPFirstOrderRecurrencePHISC)
VPFirstOrderRecurrencePHIRecipe *clone() override {
- return new VPFirstOrderRecurrencePHIRecipe(
+ auto *R = new VPFirstOrderRecurrencePHIRecipe(
cast<PHINode>(getUnderlyingInstr()), *getOperand(0));
+ R->addOperand(getOperand(1));
+ return R;
}
void execute(VPTransformState &State) override;
@@ -2449,6 +2451,13 @@ class VPReductionPHIRecipe : public VPHeaderPHIRecipe,
/// Get the factor that the VF of this recipe's output should be scaled by.
unsigned getVFScaleFactor() const { return VFScaleFactor; }
+ /// Set the VFScaleFactor for this reduction phi. Can only be set to a factor
+ /// > 1
+ void setVFScaleFactor(unsigned ScaleFactor) {
+ assert(ScaleFactor > 1 && "must set to scale factor > 1");
+ VFScaleFactor = ScaleFactor;
+ }
+
/// Returns the number of incoming values, also number of incoming blocks.
/// Note that at the moment, VPWidenPointerInductionRecipe only has a single
/// incoming value, its start value.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
index 5fbd61a929fe2..3152921a1209e 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
@@ -575,6 +575,100 @@ VPlanTransforms::buildVPlan0(Loop *TheLoop, LoopInfo &LI, Type *InductionTy,
return VPlan0;
}
+/// Creates a VPWidenIntOrFpInductionRecipe or VPWidenPointerInductionRecipe
+/// for \p Phi based on \p IndDesc.
+static VPHeaderPHIRecipe *
+createWidenInductionRecipe(PHINode *Phi, VPPhi *PhiR, VPValue *Start,
+ const InductionDescriptor &IndDesc, VPlan &Plan,
+ ScalarEvolution &SE, Loop &OrigLoop, DebugLoc DL) {
+ assert(SE.isLoopInvariant(IndDesc.getStep(), &OrigLoop) &&
+ "step must be loop invariant");
+ assert(Plan.getLiveIn(IndDesc.getStartValue()) == Start &&
+ "Start VPValue must match IndDesc's start value");
+
+ VPValue *Step =
+ vputils::getOrCreateVPValueForSCEVExpr(Plan, IndDesc.getStep());
+
+ if (IndDesc.getKind() == InductionDescriptor::IK_PtrInduction)
+ return new VPWidenPointerInductionRecipe(Phi, Start, Step, &Plan.getVFxUF(),
+ IndDesc, DL);
+
+ // Update wide induction increments to use the same step as the corresponding
+ // wide induction. This enables detecting induction increments directly in
+ // VPlan and removes redundant splats.
+ using namespace llvm::VPlanPatternMatch;
+ if (match(PhiR->getOperand(1), m_Add(m_Specific(PhiR), m_VPValue())))
+ PhiR->getOperand(1)->getDefiningRecipe()->setOperand(1, Step);
+
+ // It is always safe to copy over the NoWrap and FastMath flags. In
+ // particular, when folding tail by masking, the masked-off lanes are never
+ // used, so it is safe.
+ VPIRFlags Flags = vputils::getFlagsFromIndDesc(IndDesc);
+
+ return new VPWidenIntOrFpInductionRecipe(Phi, Start, Step, &Plan.getVF(),
+ IndDesc, Flags, DL);
+}
+
+void VPlanTransforms::createHeaderPhiRecipes(
+ VPlan &Plan, ScalarEvolution &SE, Loop &OrigLoop,
+ const MapVector<PHINode *, InductionDescriptor> &Inductions,
+ const MapVector<PHINode *, RecurrenceDescriptor> &Reductions,
+ const SmallPtrSetImpl<const PHINode *> &FixedOrderRecurrences,
+ const SmallPtrSetImpl<PHINode *> &InLoopReductions, bool AllowReordering) {
+
+ VPBasicBlock *HeaderVPBB = cast<VPBasicBlock>(
+ Plan.getEntry()->getSuccessors()[1]->getSingleSuccessor());
+
+ for (VPRecipeBase &R : make_early_inc_range(*HeaderVPBB)) {
+ if (isa<VPCanonicalIVPHIRecipe>(&R))
+ continue;
+ auto *PhiR = dyn_cast<VPPhi>(&R);
+ if (!PhiR)
+ break;
+
+ // TODO: Gradually replace uses of underlying instruction by analyses on
+ // VPlan.
+ auto *Phi = cast<PHINode>(PhiR->getUnderlyingInstr());
+ assert(PhiR->getNumOperands() == 2 &&
+ "Must have 2 operands for header phis");
+
+ // Extract common values once.
+ VPValue *Start = PhiR->getOperand(0);
+ VPValue *BackedgeValue = PhiR->getOperand(1);
+ DebugLoc DL = PhiR->getDebugLoc();
+
+ VPHeaderPHIRecipe *HeaderPhiR = nullptr;
+ if (auto InductionIt = Inductions.find(Phi);
+ InductionIt != Inductions.end()) {
+ HeaderPhiR = createWidenInductionRecipe(
+ Phi, PhiR, Start, InductionIt->second, Plan, SE, OrigLoop, DL);
+ } else if (auto ReductionIt = Reductions.find(Phi);
+ ReductionIt != Reductions.end()) {
+ const RecurrenceDescriptor &RdxDesc = ReductionIt->second;
+ assert(RdxDesc.getRecurrenceStartValue() ==
+ Phi->getIncomingValueForBlock(OrigLoop.getLoopPreheader()));
+
+ bool UseOrderedReductions = !AllowReordering && RdxDesc.isOrdered();
+ HeaderPhiR = new VPReductionPHIRecipe(
+ Phi, RdxDesc.getRecurrenceKind(), *Start,
+ InLoopReductions.contains(Phi), UseOrderedReductions);
+ HeaderPhiR->addOperand(BackedgeValue);
+ } else {
+ assert(FixedOrderRecurrences.contains(Phi) &&
+ "can only widen reductions and fixed-order recurrences here");
+ // TODO: Currently fixed-order recurrences are modeled as chains of
+ // first-order recurrences. If there are no users of the intermediate
+ // recurrences in the chain, the fixed order recurrence should be
+ // modeled directly, enabling more efficient codegen.
+ HeaderPhiR = new VPFirstOrderRecurrencePHIRecipe(Phi, *Start);
+ HeaderPhiR->addOperand(BackedgeValue);
+ }
+ HeaderPhiR->insertBefore(PhiR);
+ PhiR->replaceAllUsesWith(HeaderPhiR);
+ PhiR->eraseFromParent();
+ }
+}
+
void VPlanTransforms::handleEarlyExits(VPlan &Plan,
bool HasUncountableEarlyExit) {
auto *MiddleVPBB = cast<VPBasicBlock>(
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index 5fd3f756c55e3..ad1d63a9c2e81 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -102,6 +102,17 @@ struct VPlanTransforms {
buildVPlan0(Loop *TheLoop, LoopInfo &LI, Type *InductionTy, DebugLoc IVDL,
PredicatedScalarEvolution &PSE, LoopVersioning *LVer = nullptr);
+ /// Replace VPPhi recipes in \p Plan's header with corresponding
+ /// VPHeaderPHIRecipe subclasses for inductions, reductions, and
+ /// fixed-order recurrences. This processes all header phis and creates
+ /// the appropriate widened recipe for each one.
+ static void createHeaderPhiRecipes(
+ VPlan &Plan, ScalarEvolution &SE, Loop &OrigLoop,
+ const MapVector<PHINode *, InductionDescriptor> &Inductions,
+ const MapVector<PHINode *, RecurrenceDescriptor> &Reductions,
+ const SmallPtrSetImpl<const PHINode *> &FixedOrderRecurrences,
+ const SmallPtrSetImpl<PHINode *> &InLoopReductions, bool AllowReordering);
+
/// Update \p Plan to account for all early exits.
LLVM_ABI_FOR_TEST static void handleEarlyExits(VPlan &Plan,
bool HasUncountableExit);
More information about the llvm-commits
mailing list