[llvm] [VPlan] Create header phis once, after constructing VPlan0 (NFC). (PR #168291)

Sun Dec 14 13:04:22 PST 2025

https://github.com/fhahn updated https://github.com/llvm/llvm-project/pull/168291

>From 5b739c78e8d48bea689091f6b421458c4046775b Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Sun, 16 Nov 2025 21:37:24 +0000
Subject: [PATCH 1/2] [VPlan] Create header phis once, after constructing
 VPlan0 (NFC).

---
 .../Transforms/Vectorize/LoopVectorize.cpp    | 136 ++++--------------
 .../Transforms/Vectorize/VPRecipeBuilder.h    |   4 -
 llvm/lib/Transforms/Vectorize/VPlan.h         |   7 +
 .../Vectorize/VPlanConstruction.cpp           | 102 +++++++++++++
 .../Transforms/Vectorize/VPlanTransforms.h    |  11 ++
 5 files changed, 144 insertions(+), 116 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 082489f70f1c6..dfb39e2b7be20 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1405,6 +1405,11 @@ class LoopVectorizationCostModel {
     return InLoopReductions.contains(Phi);
   }
 
+  /// Returns the set of in-loop reduction PHIs.
+  const SmallPtrSetImpl<PHINode *> &getInLoopReductions() const {
+    return InLoopReductions;
+  }
+
   /// Returns true if the predicated reduction select should be used to set the
   /// incoming value for the reduction phi.
   bool usePredicatedReductionSelect() const {
@@ -7683,60 +7688,6 @@ VPWidenMemoryRecipe *VPRecipeBuilder::tryToWidenMemory(VPInstruction *VPI,
                                 Consecutive, Reverse, *VPI, VPI->getDebugLoc());
 }
 
-/// Creates a VPWidenIntOrFpInductionRecipe for \p PhiR. If needed, it will
-/// also insert a recipe to expand the step for the induction recipe.
-static VPWidenIntOrFpInductionRecipe *
-createWidenInductionRecipes(VPInstruction *PhiR,
-                            const InductionDescriptor &IndDesc, VPlan &Plan,
-                            ScalarEvolution &SE, Loop &OrigLoop) {
-  assert(SE.isLoopInvariant(IndDesc.getStep(), &OrigLoop) &&
-         "step must be loop invariant");
-
-  VPValue *Start = PhiR->getOperand(0);
-  assert((Plan.getLiveIn(IndDesc.getStartValue()) == Start ||
-          (SE.isSCEVable(IndDesc.getStartValue()->getType()) &&
-           SE.getSCEV(IndDesc.getStartValue()) ==
-               vputils::getSCEVExprForVPValue(Start, SE))) &&
-         "Start VPValue must match IndDesc's start value");
-
-  // It is always safe to copy over the NoWrap and FastMath flags. In
-  // particular, when folding tail by masking, the masked-off lanes are never
-  // used, so it is safe.
-  VPIRFlags Flags = vputils::getFlagsFromIndDesc(IndDesc);
-  VPValue *Step =
-      vputils::getOrCreateVPValueForSCEVExpr(Plan, IndDesc.getStep());
-
-  // Update wide induction increments to use the same step as the corresponding
-  // wide induction. This enables detecting induction increments directly in
-  // VPlan and removes redundant splats.
-  using namespace llvm::VPlanPatternMatch;
-  if (match(PhiR->getOperand(1), m_Add(m_Specific(PhiR), m_VPValue())))
-    PhiR->getOperand(1)->getDefiningRecipe()->setOperand(1, Step);
-
-  PHINode *Phi = cast<PHINode>(PhiR->getUnderlyingInstr());
-  return new VPWidenIntOrFpInductionRecipe(Phi, Start, Step, &Plan.getVF(),
-                                           IndDesc, Flags, PhiR->getDebugLoc());
-}
-
-VPHeaderPHIRecipe *
-VPRecipeBuilder::tryToOptimizeInductionPHI(VPInstruction *VPI) {
-  auto *Phi = cast<PHINode>(VPI->getUnderlyingInstr());
-
-  // Check if this is an integer or fp induction. If so, build the recipe that
-  // produces its scalar and vector values.
-  if (auto *II = Legal->getIntOrFpInductionDescriptor(Phi))
-    return createWidenInductionRecipes(VPI, *II, Plan, *PSE.getSE(), *OrigLoop);
-
-  // Check if this is pointer induction. If so, build the recipe for it.
-  if (auto *II = Legal->getPointerInductionDescriptor(Phi)) {
-    VPValue *Step = vputils::getOrCreateVPValueForSCEVExpr(Plan, II->getStep());
-    return new VPWidenPointerInductionRecipe(Phi, VPI->getOperand(0), Step,
-                                             &Plan.getVFxUF(), *II,
-                                             VPI->getDebugLoc());
-  }
-  return nullptr;
-}
-
 VPWidenIntOrFpInductionRecipe *
 VPRecipeBuilder::tryToOptimizeInductionTruncate(VPInstruction *VPI,
                                                 VFRange &Range) {
@@ -8220,54 +8171,12 @@ bool VPRecipeBuilder::getScaledReductions(
 
 VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(VPSingleDefRecipe *R,
                                                       VFRange &Range) {
-  // First, check for specific widening recipes that deal with inductions, Phi
-  // nodes, calls and memory operations.
-  VPRecipeBase *Recipe;
-  if (auto *PhiR = dyn_cast<VPPhi>(R)) {
-    VPBasicBlock *Parent = PhiR->getParent();
-    [[maybe_unused]] VPRegionBlock *LoopRegionOf =
-        Parent->getEnclosingLoopRegion();
-    assert(LoopRegionOf && LoopRegionOf->getEntry() == Parent &&
-           "Non-header phis should have been handled during predication");
-    auto *Phi = cast<PHINode>(R->getUnderlyingInstr());
-    assert(R->getNumOperands() == 2 && "Must have 2 operands for header phis");
-    if ((Recipe = tryToOptimizeInductionPHI(PhiR)))
-      return Recipe;
-
-    assert((Legal->isReductionVariable(Phi) ||
-            Legal->isFixedOrderRecurrence(Phi)) &&
-           "can only widen reductions and fixed-order recurrences here");
-    VPValue *StartV = R->getOperand(0);
-    VPValue *BackedgeValue = R->getOperand(1);
-    if (Legal->isReductionVariable(Phi)) {
-      const RecurrenceDescriptor &RdxDesc = Legal->getRecurrenceDescriptor(Phi);
-      assert(RdxDesc.getRecurrenceStartValue() ==
-             Phi->getIncomingValueForBlock(OrigLoop->getLoopPreheader()));
-
-      // If the PHI is used by a partial reduction, set the scale factor.
-      bool UseInLoopReduction = CM.isInLoopReduction(Phi);
-      bool UseOrderedReductions = CM.useOrderedReductions(RdxDesc);
-      unsigned ScaleFactor =
-          getScalingForReduction(RdxDesc.getLoopExitInstr()).value_or(1);
-
-      return new VPReductionPHIRecipe(
-          Phi, RdxDesc.getRecurrenceKind(), *StartV, *BackedgeValue,
-          getReductionStyle(UseInLoopReduction, UseOrderedReductions,
-                            ScaleFactor),
-          RdxDesc.hasUsesOutsideReductionChain());
-    }
-
-    // TODO: Currently fixed-order recurrences are modeled as chains of
-    // first-order recurrences. If there are no users of the intermediate
-    // recurrences in the chain, the fixed order recurrence should be modeled
-    // directly, enabling more efficient codegen.
-    return new VPFirstOrderRecurrencePHIRecipe(Phi, *StartV, *BackedgeValue);
-  }
-
-  assert(!R->isPhi() && "only VPPhi nodes expected at this point");
+  // First, check for specific widening recipes that deal with optimizing
+  // truncates, calls and memory operations.
+  assert(!R->isPhi() && "phis must be handled earlier");
 
+  VPRecipeBase *Recipe;
   auto *VPI = cast<VPInstruction>(R);
-  Instruction *Instr = R->getUnderlyingInstr();
   if (VPI->getOpcode() == Instruction::Trunc &&
       (Recipe = tryToOptimizeInductionTruncate(VPI, Range)))
     return Recipe;
@@ -8280,6 +8189,7 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(VPSingleDefRecipe *R,
   if (VPI->getOpcode() == Instruction::Call)
     return tryToWidenCall(VPI, Range);
 
+  Instruction *Instr = R->getUnderlyingInstr();
   if (VPI->getOpcode() == Instruction::Store)
     if (auto HistInfo = Legal->getHistogramInfo(cast<StoreInst>(Instr)))
       return tryToWidenHistogram(*HistInfo, VPI);
@@ -8327,6 +8237,9 @@ VPRecipeBuilder::tryToCreatePartialReduction(VPInstruction *Reduction,
        cast<VPReductionRecipe>(BinOpRecipe)->isPartialReduction()))
     std::swap(BinOp, Accumulator);
 
+  if (auto *RedPhiR = dyn_cast<VPReductionPHIRecipe>(Accumulator))
+    RedPhiR->setVFScaleFactor(ScaleFactor);
+
   assert(ScaleFactor ==
              vputils::getVFScaleFactor(Accumulator->getDefiningRecipe()) &&
          "all accumulators in chain must have same scale factor");
@@ -8374,6 +8287,12 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
       OrigLoop, *LI, Legal->getWidestInductionType(),
       getDebugLocFromInstOrOperands(Legal->getPrimaryInduction()), PSE, &LVer);
 
+  // Create recipes for header phis.
+  VPlanTransforms::createHeaderPhiRecipes(
+      *VPlan0, *PSE.getSE(), *OrigLoop, Legal->getInductionVars(),
+      Legal->getReductionVars(), Legal->getFixedOrderRecurrences(),
+      CM.getInLoopReductions(), Hints.allowReordering());
+
   auto MaxVFTimes2 = MaxVF * 2;
   for (ElementCount VF = MinVF; ElementCount::isKnownLT(VF, MaxVFTimes2);) {
     VFRange SubRange = {VF, MaxVFTimes2};
@@ -8496,25 +8415,18 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
   // Mapping from VPValues in the initial plan to their widened VPValues. Needed
   // temporarily to update created block masks.
   DenseMap<VPValue *, VPValue *> Old2New;
+
+  // Now process all other blocks and instructions.
   for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {
     // Convert input VPInstructions to widened recipes.
     for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
-      auto *SingleDef = cast<VPSingleDefRecipe>(&R);
-      auto *UnderlyingValue = SingleDef->getUnderlyingValue();
-      // Skip recipes that do not need transforming, including canonical IV,
-      // wide canonical IV and VPInstructions without underlying values. The
-      // latter are added above for masking.
-      // FIXME: Migrate code relying on the underlying instruction from VPlan0
-      // to construct recipes below to not use the underlying instruction.
-      if (isa<VPCanonicalIVPHIRecipe, VPWidenCanonicalIVRecipe, VPBlendRecipe>(
-              &R) ||
-          (isa<VPInstruction>(&R) && !UnderlyingValue))
+      auto *SingleDef = dyn_cast<VPInstruction>(&R);
+      if (!SingleDef || !SingleDef->getUnderlyingValue())
         continue;
-      assert(isa<VPInstruction>(&R) && UnderlyingValue && "unsupported recipe");
 
       // TODO: Gradually replace uses of underlying instruction by analyses on
       // VPlan.
-      Instruction *Instr = cast<Instruction>(UnderlyingValue);
+      Instruction *Instr = cast<Instruction>(SingleDef->getUnderlyingValue());
       Builder.setInsertPoint(SingleDef);
 
       // The stores with invariant address inside the loop will be deleted, and
diff --git a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
index 1808be118cd2a..fb96d0c213f3b 100644
--- a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
+++ b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
@@ -94,10 +94,6 @@ class VPRecipeBuilder {
   /// recipe that takes an additional VPInstruction for the mask.
   VPWidenMemoryRecipe *tryToWidenMemory(VPInstruction *VPI, VFRange &Range);
 
-  /// Check if an induction recipe should be constructed for \p VPI. If so build
-  /// and return it. If not, return null.
-  VPHeaderPHIRecipe *tryToOptimizeInductionPHI(VPInstruction *VPI);
-
   /// Optimize the special case where the operand of \p VPI is a constant
   /// integer induction variable.
   VPWidenIntOrFpInductionRecipe *
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 60318960353d6..f39fab4256b86 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -2471,6 +2471,13 @@ class VPReductionPHIRecipe : public VPHeaderPHIRecipe,
     return Partial ? Partial->VFScaleFactor : 1;
   }
 
+  /// Set the VFScaleFactor for this reduction phi. Can only be set to a factor
+  /// > 1
+  void setVFScaleFactor(unsigned ScaleFactor) {
+    assert(ScaleFactor > 1 && "must set to scale factor > 1");
+    Style = RdxUnordered{ScaleFactor};
+  }
+
   /// Returns the number of incoming values, also number of incoming blocks.
   /// Note that at the moment, VPWidenPointerInductionRecipe only has a single
   /// incoming value, its start value.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
index f9a61969f201f..b56776c462ee7 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
@@ -594,6 +594,108 @@ VPlanTransforms::buildVPlan0(Loop *TheLoop, LoopInfo &LI, Type *InductionTy,
   return VPlan0;
 }
 
+/// Creates a VPWidenIntOrFpInductionRecipe or VPWidenPointerInductionRecipe
+/// for \p Phi based on \p IndDesc.
+static VPHeaderPHIRecipe *
+createWidenInductionRecipe(PHINode *Phi, VPPhi *PhiR, VPValue *Start,
+                           const InductionDescriptor &IndDesc, VPlan &Plan,
+                           ScalarEvolution &SE, Loop &OrigLoop, DebugLoc DL) {
+  assert(SE.isLoopInvariant(IndDesc.getStep(), &OrigLoop) &&
+         "step must be loop invariant");
+  assert((Plan.getLiveIn(IndDesc.getStartValue()) == Start ||
+          (SE.isSCEVable(IndDesc.getStartValue()->getType()) &&
+           SE.getSCEV(IndDesc.getStartValue()) ==
+               vputils::getSCEVExprForVPValue(Start, SE))) &&
+         "Start VPValue must match IndDesc's start value");
+
+  VPValue *Step =
+      vputils::getOrCreateVPValueForSCEVExpr(Plan, IndDesc.getStep());
+
+  if (IndDesc.getKind() == InductionDescriptor::IK_PtrInduction)
+    return new VPWidenPointerInductionRecipe(Phi, Start, Step, &Plan.getVFxUF(),
+                                             IndDesc, DL);
+
+  // Update wide induction increments to use the same step as the corresponding
+  // wide induction. This enables detecting induction increments directly in
+  // VPlan and removes redundant splats.
+  using namespace llvm::VPlanPatternMatch;
+  if (match(PhiR->getOperand(1), m_Add(m_Specific(PhiR), m_VPValue())))
+    PhiR->getOperand(1)->getDefiningRecipe()->setOperand(1, Step);
+
+  // It is always safe to copy over the NoWrap and FastMath flags. In
+  // particular, when folding tail by masking, the masked-off lanes are never
+  // used, so it is safe.
+  VPIRFlags Flags = vputils::getFlagsFromIndDesc(IndDesc);
+
+  return new VPWidenIntOrFpInductionRecipe(Phi, Start, Step, &Plan.getVF(),
+                                           IndDesc, Flags, DL);
+}
+
+void VPlanTransforms::createHeaderPhiRecipes(
+    VPlan &Plan, ScalarEvolution &SE, Loop &OrigLoop,
+    const MapVector<PHINode *, InductionDescriptor> &Inductions,
+    const MapVector<PHINode *, RecurrenceDescriptor> &Reductions,
+    const SmallPtrSetImpl<const PHINode *> &FixedOrderRecurrences,
+    const SmallPtrSetImpl<PHINode *> &InLoopReductions, bool AllowReordering) {
+
+  VPBasicBlock *HeaderVPBB = cast<VPBasicBlock>(
+      Plan.getEntry()->getSuccessors()[1]->getSingleSuccessor());
+
+  for (VPRecipeBase &R : make_early_inc_range(*HeaderVPBB)) {
+    if (isa<VPCanonicalIVPHIRecipe>(&R))
+      continue;
+    auto *PhiR = dyn_cast<VPPhi>(&R);
+    if (!PhiR)
+      break;
+
+    // TODO: Gradually replace uses of underlying instruction by analyses on
+    // VPlan.
+    auto *Phi = cast<PHINode>(PhiR->getUnderlyingInstr());
+    assert(PhiR->getNumOperands() == 2 &&
+           "Must have 2 operands for header phis");
+
+    // Extract common values once.
+    VPValue *Start = PhiR->getOperand(0);
+    VPValue *BackedgeValue = PhiR->getOperand(1);
+    DebugLoc DL = PhiR->getDebugLoc();
+
+    VPHeaderPHIRecipe *HeaderPhiR = nullptr;
+    auto InductionIt = Inductions.find(Phi);
+    if (InductionIt != Inductions.end()) {
+      HeaderPhiR = createWidenInductionRecipe(
+          Phi, PhiR, Start, InductionIt->second, Plan, SE, OrigLoop, DL);
+    } else {
+      auto ReductionIt = Reductions.find(Phi);
+      if (ReductionIt != Reductions.end()) {
+        const RecurrenceDescriptor &RdxDesc = ReductionIt->second;
+        assert(RdxDesc.getRecurrenceStartValue() ==
+               Phi->getIncomingValueForBlock(OrigLoop.getLoopPreheader()));
+
+        bool UseOrderedReductions = !AllowReordering && RdxDesc.isOrdered();
+
+        HeaderPhiR = new VPReductionPHIRecipe(
+            Phi, RdxDesc.getRecurrenceKind(), *Start,
+            getReductionStyle(InLoopReductions.contains(Phi),
+                              UseOrderedReductions, 1),
+            RdxDesc.hasUsesOutsideReductionChain());
+        HeaderPhiR->addOperand(BackedgeValue);
+      } else {
+        assert(FixedOrderRecurrences.contains(Phi) &&
+               "can only widen reductions and fixed-order recurrences here");
+        // TODO: Currently fixed-order recurrences are modeled as chains of
+        // first-order recurrences. If there are no users of the intermediate
+        // recurrences in the chain, the fixed order recurrence should be
+        // modeled directly, enabling more efficient codegen.
+        HeaderPhiR = new VPFirstOrderRecurrencePHIRecipe(Phi, *Start);
+        HeaderPhiR->addOperand(BackedgeValue);
+      }
+    }
+    HeaderPhiR->insertBefore(PhiR);
+    PhiR->replaceAllUsesWith(HeaderPhiR);
+    PhiR->eraseFromParent();
+  }
+}
+
 void VPlanTransforms::handleEarlyExits(VPlan &Plan,
                                        bool HasUncountableEarlyExit) {
   auto *MiddleVPBB = cast<VPBasicBlock>(
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index afdf1655b4622..1a3ff4f9b9bbc 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -102,6 +102,17 @@ struct VPlanTransforms {
   buildVPlan0(Loop *TheLoop, LoopInfo &LI, Type *InductionTy, DebugLoc IVDL,
               PredicatedScalarEvolution &PSE, LoopVersioning *LVer = nullptr);
 
+  /// Replace VPPhi recipes in \p Plan's header with corresponding
+  /// VPHeaderPHIRecipe subclasses for inductions, reductions, and
+  /// fixed-order recurrences. This processes all header phis and creates
+  /// the appropriate widened recipe for each one.
+  static void createHeaderPhiRecipes(
+      VPlan &Plan, ScalarEvolution &SE, Loop &OrigLoop,
+      const MapVector<PHINode *, InductionDescriptor> &Inductions,
+      const MapVector<PHINode *, RecurrenceDescriptor> &Reductions,
+      const SmallPtrSetImpl<const PHINode *> &FixedOrderRecurrences,
+      const SmallPtrSetImpl<PHINode *> &InLoopReductions, bool AllowReordering);
+
   /// Update \p Plan to account for all early exits.
   LLVM_ABI_FOR_TEST static void handleEarlyExits(VPlan &Plan,
                                                  bool HasUncountableExit);

>From dec335da591f01ae371089c9d936814f3c2dff92 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Sun, 14 Dec 2025 17:28:10 +0000
Subject: [PATCH 2/2] !fixup address comments, thanks!

---
 .../Transforms/Vectorize/LoopVectorize.cpp    | 33 ++++----
 .../Transforms/Vectorize/VPRecipeBuilder.h    | 14 ++--
 llvm/lib/Transforms/Vectorize/VPlan.h         |  2 +-
 .../Vectorize/VPlanConstruction.cpp           | 79 ++++++++++---------
 4 files changed, 64 insertions(+), 64 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index dfb39e2b7be20..2ebaa12bc2e3c 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -8169,11 +8169,12 @@ bool VPRecipeBuilder::getScaledReductions(
   return false;
 }
 
-VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(VPSingleDefRecipe *R,
-                                                      VFRange &Range) {
+VPRecipeBase *
+VPRecipeBuilder::tryToCreateWidenNonPhiRecipe(VPSingleDefRecipe *R,
+                                              VFRange &Range) {
+  assert(!R->isPhi() && "phis must be handled earlier");
   // First, check for specific widening recipes that deal with optimizing
   // truncates, calls and memory operations.
-  assert(!R->isPhi() && "phis must be handled earlier");
 
   VPRecipeBase *Recipe;
   auto *VPI = cast<VPInstruction>(R);
@@ -8398,8 +8399,8 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
   // Construct wide recipes and apply predication for original scalar
   // VPInstructions in the loop.
   // ---------------------------------------------------------------------------
-  VPRecipeBuilder RecipeBuilder(*Plan, OrigLoop, TLI, &TTI, Legal, CM, PSE,
-                                Builder, BlockMaskCache);
+  VPRecipeBuilder RecipeBuilder(*Plan, OrigLoop, TLI, &TTI, Legal, CM, Builder,
+                                BlockMaskCache);
   // TODO: Handle partial reductions with EVL tail folding.
   if (!CM.foldTailWithEVL())
     RecipeBuilder.collectScaledReductions(Range);
@@ -8420,14 +8421,15 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
   for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {
     // Convert input VPInstructions to widened recipes.
     for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
-      auto *SingleDef = dyn_cast<VPInstruction>(&R);
-      if (!SingleDef || !SingleDef->getUnderlyingValue())
+      auto *VPI = dyn_cast<VPInstruction>(&R);
+      if (!VPI || !VPI->getUnderlyingValue())
         continue;
 
       // TODO: Gradually replace uses of underlying instruction by analyses on
-      // VPlan.
-      Instruction *Instr = cast<Instruction>(SingleDef->getUnderlyingValue());
-      Builder.setInsertPoint(SingleDef);
+      // VPlan. Migrate code relying on the underlying instruction from VPlan0
+      // to construct recipes below to not use the underlying instruction.
+      Instruction *Instr = cast<Instruction>(VPI->getUnderlyingValue());
+      Builder.setInsertPoint(VPI);
 
       // The stores with invariant address inside the loop will be deleted, and
       // in the exit block, a uniform store recipe will be created for the final
@@ -8437,7 +8439,6 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
           Legal->isInvariantAddressOfReduction(SI->getPointerOperand())) {
         // Only create recipe for the final invariant store of the reduction.
         if (Legal->isInvariantStoreOfReduction(SI)) {
-          auto *VPI = cast<VPInstruction>(SingleDef);
           auto *Recipe = new VPReplicateRecipe(
               SI, R.operands(), true /* IsUniform */, nullptr /*Mask*/, *VPI,
               *VPI, VPI->getDebugLoc());
@@ -8448,10 +8449,10 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
       }
 
       VPRecipeBase *Recipe =
-          RecipeBuilder.tryToCreateWidenRecipe(SingleDef, Range);
+          RecipeBuilder.tryToCreateWidenNonPhiRecipe(VPI, Range);
       if (!Recipe)
-        Recipe = RecipeBuilder.handleReplication(cast<VPInstruction>(SingleDef),
-                                                 Range);
+        Recipe =
+            RecipeBuilder.handleReplication(cast<VPInstruction>(VPI), Range);
 
       RecipeBuilder.setRecipe(Instr, Recipe);
       if (isa<VPWidenIntOrFpInductionRecipe>(Recipe) && isa<TruncInst>(Instr)) {
@@ -8462,8 +8463,8 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
         Builder.insert(Recipe);
       }
       if (Recipe->getNumDefinedValues() == 1) {
-        SingleDef->replaceAllUsesWith(Recipe->getVPSingleValue());
-        Old2New[SingleDef] = Recipe->getVPSingleValue();
+        VPI->replaceAllUsesWith(Recipe->getVPSingleValue());
+        Old2New[VPI] = Recipe->getVPSingleValue();
       } else {
         assert(Recipe->getNumDefinedValues() == 0 &&
                "Unexpected multidef recipe");
diff --git a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
index fb96d0c213f3b..59f72ea29c946 100644
--- a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
+++ b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
@@ -63,8 +63,6 @@ class VPRecipeBuilder {
   /// The profitablity analysis.
   LoopVectorizationCostModel &CM;
 
-  PredicatedScalarEvolution &PSE;
-
   VPBuilder &Builder;
 
   /// The mask of each VPBB, generated earlier and used for predicating recipes
@@ -133,11 +131,10 @@ class VPRecipeBuilder {
   VPRecipeBuilder(VPlan &Plan, Loop *OrigLoop, const TargetLibraryInfo *TLI,
                   const TargetTransformInfo *TTI,
                   LoopVectorizationLegality *Legal,
-                  LoopVectorizationCostModel &CM,
-                  PredicatedScalarEvolution &PSE, VPBuilder &Builder,
+                  LoopVectorizationCostModel &CM, VPBuilder &Builder,
                   DenseMap<VPBasicBlock *, VPValue *> &BlockMaskCache)
       : Plan(Plan), OrigLoop(OrigLoop), TLI(TLI), TTI(TTI), Legal(Legal),
-        CM(CM), PSE(PSE), Builder(Builder), BlockMaskCache(BlockMaskCache) {}
+        CM(CM), Builder(Builder), BlockMaskCache(BlockMaskCache) {}
 
   std::optional<unsigned> getScalingForReduction(const Instruction *ExitInst) {
     auto It = ScaledReductionMap.find(ExitInst);
@@ -149,9 +146,10 @@ class VPRecipeBuilder {
   /// that are valid so recipes can be formed later.
   void collectScaledReductions(VFRange &Range);
 
-  /// Create and return a widened recipe for \p R if one can be created within
-  /// the given VF \p Range.
-  VPRecipeBase *tryToCreateWidenRecipe(VPSingleDefRecipe *R, VFRange &Range);
+  /// Create and return a widened recipe for a non-phi recipe \p R if one can be
+  /// created within the given VF \p Range.
+  VPRecipeBase *tryToCreateWidenNonPhiRecipe(VPSingleDefRecipe *R,
+                                             VFRange &Range);
 
   /// Create and return a partial reduction recipe for a reduction instruction
   /// along with binary operation and reduction phi operands.
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index f39fab4256b86..6def6bfa3c183 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -2472,7 +2472,7 @@ class VPReductionPHIRecipe : public VPHeaderPHIRecipe,
   }
 
   /// Set the VFScaleFactor for this reduction phi. Can only be set to a factor
-  /// > 1
+  /// > 1.
   void setVFScaleFactor(unsigned ScaleFactor) {
     assert(ScaleFactor > 1 && "must set to scale factor > 1");
     Style = RdxUnordered{ScaleFactor};
diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
index b56776c462ee7..1e9ccd5582f1c 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
@@ -637,17 +637,14 @@ void VPlanTransforms::createHeaderPhiRecipes(
     const MapVector<PHINode *, RecurrenceDescriptor> &Reductions,
     const SmallPtrSetImpl<const PHINode *> &FixedOrderRecurrences,
     const SmallPtrSetImpl<PHINode *> &InLoopReductions, bool AllowReordering) {
-
+  // Retrieve the header manually from the intial plain-CFG VPlan.
   VPBasicBlock *HeaderVPBB = cast<VPBasicBlock>(
       Plan.getEntry()->getSuccessors()[1]->getSingleSuccessor());
+  assert(VPDominatorTree(Plan).dominates(HeaderVPBB,
+                                         HeaderVPBB->getPredecessors()[1]) &&
+         "header must dominate its latch");
 
-  for (VPRecipeBase &R : make_early_inc_range(*HeaderVPBB)) {
-    if (isa<VPCanonicalIVPHIRecipe>(&R))
-      continue;
-    auto *PhiR = dyn_cast<VPPhi>(&R);
-    if (!PhiR)
-      break;
-
+  auto CreateHeaderPhiRecipe = [&](VPPhi *PhiR) -> VPHeaderPHIRecipe * {
     // TODO: Gradually replace uses of underlying instruction by analyses on
     // VPlan.
     auto *Phi = cast<PHINode>(PhiR->getUnderlyingInstr());
@@ -657,39 +654,43 @@ void VPlanTransforms::createHeaderPhiRecipes(
     // Extract common values once.
     VPValue *Start = PhiR->getOperand(0);
     VPValue *BackedgeValue = PhiR->getOperand(1);
-    DebugLoc DL = PhiR->getDebugLoc();
 
-    VPHeaderPHIRecipe *HeaderPhiR = nullptr;
-    auto InductionIt = Inductions.find(Phi);
-    if (InductionIt != Inductions.end()) {
-      HeaderPhiR = createWidenInductionRecipe(
-          Phi, PhiR, Start, InductionIt->second, Plan, SE, OrigLoop, DL);
-    } else {
-      auto ReductionIt = Reductions.find(Phi);
-      if (ReductionIt != Reductions.end()) {
-        const RecurrenceDescriptor &RdxDesc = ReductionIt->second;
-        assert(RdxDesc.getRecurrenceStartValue() ==
-               Phi->getIncomingValueForBlock(OrigLoop.getLoopPreheader()));
-
-        bool UseOrderedReductions = !AllowReordering && RdxDesc.isOrdered();
-
-        HeaderPhiR = new VPReductionPHIRecipe(
-            Phi, RdxDesc.getRecurrenceKind(), *Start,
-            getReductionStyle(InLoopReductions.contains(Phi),
-                              UseOrderedReductions, 1),
-            RdxDesc.hasUsesOutsideReductionChain());
-        HeaderPhiR->addOperand(BackedgeValue);
-      } else {
-        assert(FixedOrderRecurrences.contains(Phi) &&
-               "can only widen reductions and fixed-order recurrences here");
-        // TODO: Currently fixed-order recurrences are modeled as chains of
-        // first-order recurrences. If there are no users of the intermediate
-        // recurrences in the chain, the fixed order recurrence should be
-        // modeled directly, enabling more efficient codegen.
-        HeaderPhiR = new VPFirstOrderRecurrencePHIRecipe(Phi, *Start);
-        HeaderPhiR->addOperand(BackedgeValue);
-      }
+    if (FixedOrderRecurrences.contains(Phi)) {
+      // TODO: Currently fixed-order recurrences are modeled as chains of
+      // first-order recurrences. If there are no users of the intermediate
+      // recurrences in the chain, the fixed order recurrence should be
+      // modeled directly, enabling more efficient codegen.
+      return new VPFirstOrderRecurrencePHIRecipe(Phi, *Start, *BackedgeValue);
     }
+
+    auto InductionIt = Inductions.find(Phi);
+    if (InductionIt != Inductions.end())
+      return createWidenInductionRecipe(Phi, PhiR, Start, InductionIt->second,
+                                        Plan, SE, OrigLoop,
+                                        PhiR->getDebugLoc());
+
+    assert(Reductions.contains(Phi) &&
+           "can only widen reductions and fixed-order recurrences here");
+    const RecurrenceDescriptor &RdxDesc = Reductions.lookup(Phi);
+    assert(RdxDesc.getRecurrenceStartValue() ==
+               Phi->getIncomingValueForBlock(OrigLoop.getLoopPreheader()) &&
+           "incoming value must match start value");
+    bool UseOrderedReductions = !AllowReordering && RdxDesc.isOrdered();
+    return new VPReductionPHIRecipe(
+        Phi, RdxDesc.getRecurrenceKind(), *Start, *BackedgeValue,
+        getReductionStyle(InLoopReductions.contains(Phi), UseOrderedReductions,
+                          1),
+        RdxDesc.hasUsesOutsideReductionChain());
+  };
+
+  for (VPRecipeBase &R : make_early_inc_range(HeaderVPBB->phis())) {
+    if (isa<VPCanonicalIVPHIRecipe>(&R))
+      continue;
+    auto *PhiR = dyn_cast<VPPhi>(&R);
+    if (!PhiR)
+      break;
+
+    VPHeaderPHIRecipe *HeaderPhiR = CreateHeaderPhiRecipe(PhiR);
     HeaderPhiR->insertBefore(PhiR);
     PhiR->replaceAllUsesWith(HeaderPhiR);
     PhiR->eraseFromParent();