[llvm] [VPlan] Introduce scalar loop header in plan, remove VPLiveOut. (PR #109975)

Florian Hahn via llvm-commits llvm-commits at lists.llvm.org
Sat Oct 19 19:18:05 PDT 2024


https://github.com/fhahn updated https://github.com/llvm/llvm-project/pull/109975

>From 908577d8b59ccde5c9b68e0224366aa832f008ee Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Wed, 25 Sep 2024 13:56:06 +0100
Subject: [PATCH] [VPlan] Introduce scalar loop header in plan, remove
 VPLiveOut.

Update VPlan to include the scalar loop header. This allows retiring
VPLiveOut, as the remaining live-outs can now be handled by adding
operands to the wrapped phis in the scalar loop header.

Note that the current version only includes the scalar loop header, no
other loop blocks and also does not wrap it in a region block. This can
either be included in this PR or in follow-ups as needed.
---
 .../Transforms/Vectorize/LoopVectorize.cpp    | 29 +++----
 llvm/lib/Transforms/Vectorize/VPlan.cpp       | 61 +++++++-------
 llvm/lib/Transforms/Vectorize/VPlan.h         | 76 +++++------------
 .../lib/Transforms/Vectorize/VPlanRecipes.cpp | 34 +-------
 .../Transforms/Vectorize/VPlanTransforms.cpp  |  2 +-
 llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp | 13 +--
 .../Transforms/Vectorize/VPlanVerifier.cpp    | 14 ----
 .../AArch64/sve2-histcnt-vplan.ll             | 10 +++
 .../AArch64/synthesize-mask-for-call.ll       | 30 +++++++
 .../widen-call-with-intrinsic-or-libfunc.ll   | 10 +++
 .../RISCV/riscv-vector-reverse.ll             | 24 ++++++
 .../RISCV/vplan-vp-intrinsics-reduction.ll    | 18 ++++
 .../first-order-recurrence-chains-vplan.ll    | 22 +++--
 ...-order-recurrence-sink-replicate-region.ll | 49 ++++++++---
 .../interleave-and-scalarize-only.ll          | 14 +++-
 .../LoopVectorize/vplan-iv-transforms.ll      |  5 ++
 .../LoopVectorize/vplan-predicate-switch.ll   |  5 ++
 .../vplan-printing-before-execute.ll          | 12 +++
 .../vplan-printing-outer-loop.ll              |  7 ++
 .../LoopVectorize/vplan-printing.ll           | 82 ++++++++++++++++++-
 .../vplan-sink-scalars-and-merge-vf1.ll       |  6 ++
 .../vplan-sink-scalars-and-merge.ll           | 18 ++++
 .../Transforms/Vectorize/VPDomTreeTest.cpp    | 14 +++-
 .../Transforms/Vectorize/VPlanHCFGTest.cpp    | 12 +++
 .../Transforms/Vectorize/VPlanTest.cpp        | 48 +++++++----
 .../Vectorize/VPlanVerifierTest.cpp           | 30 +++++--
 26 files changed, 440 insertions(+), 205 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 0d35bfb921dc79..eaf5c015f0974a 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -2965,10 +2965,6 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State,
                    IVEndValues[Entry.first], LoopMiddleBlock, Plan, State);
   }
 
-  // Fix live-out phis not already fixed earlier.
-  for (const auto &KV : Plan.getLiveOuts())
-    KV.second->fixPhi(Plan, State);
-
   for (Instruction *PI : PredicatedInstructions)
     sinkScalarOperands(&*PI);
 
@@ -8856,21 +8852,9 @@ static void addLiveOutsForFirstOrderRecurrences(
   // Start by finding out if middle block branches to scalar preheader, which is
   // not a VPIRBasicBlock, unlike Exit block - the other possible successor of
   // middle block.
-  // TODO: Should be replaced by
-  // Plan->getScalarLoopRegion()->getSinglePredecessor() in the future once the
-  // scalar region is modeled as well.
-  auto *MiddleVPBB = cast<VPBasicBlock>(VectorRegion->getSingleSuccessor());
-  VPBasicBlock *ScalarPHVPBB = nullptr;
-  if (MiddleVPBB->getNumSuccessors() == 2) {
-    // Order is strict: first is the exit block, second is the scalar preheader.
-    ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSuccessors()[1]);
-  } else if (ExitUsersToFix.empty()) {
-    ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSingleSuccessor());
-  } else {
-    llvm_unreachable("unsupported CFG in VPlan");
-  }
-
+  VPBasicBlock *ScalarPHVPBB = Plan.getScalarPreheader();
   VPBuilder ScalarPHBuilder(ScalarPHVPBB);
+  auto *MiddleVPBB = cast<VPBasicBlock>(VectorRegion->getSingleSuccessor());
   VPBuilder MiddleBuilder(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
   VPValue *OneVPV = Plan.getOrAddLiveIn(
       ConstantInt::get(Plan.getCanonicalIV()->getScalarType(), 1));
@@ -8957,7 +8941,14 @@ static void addLiveOutsForFirstOrderRecurrences(
         VPInstruction::ResumePhi, {Resume, FOR->getStartValue()}, {},
         "scalar.recur.init");
     auto *FORPhi = cast<PHINode>(FOR->getUnderlyingInstr());
-    Plan.addLiveOut(FORPhi, ResumePhiRecipe);
+    for (VPRecipeBase &R :
+         *cast<VPIRBasicBlock>(ScalarPHVPBB->getSingleSuccessor())) {
+      auto *IRI = cast<VPIRInstruction>(&R);
+      if (&IRI->getInstruction() == FORPhi) {
+        IRI->addOperand(ResumePhiRecipe);
+        break;
+      }
+    }
 
     // Now update VPIRInstructions modeling LCSSA phis in the exit block.
     // Extract the penultimate value of the recurrence and use it as operand for
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index c1b97791331bcf..96d833d5436163 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -456,10 +456,17 @@ void VPIRBasicBlock::execute(VPTransformState *State) {
   State->Builder.SetInsertPoint(getIRBasicBlock()->getTerminator());
   executeRecipes(State, getIRBasicBlock());
   if (getSingleSuccessor()) {
-    assert(isa<UnreachableInst>(getIRBasicBlock()->getTerminator()));
-    auto *Br = State->Builder.CreateBr(getIRBasicBlock());
-    Br->setOperand(0, nullptr);
-    getIRBasicBlock()->getTerminator()->eraseFromParent();
+    auto *SuccVPIRBB = dyn_cast<VPIRBasicBlock>(getSingleSuccessor());
+    if (SuccVPIRBB && SuccVPIRBB->getIRBasicBlock() ==
+                          getIRBasicBlock()->getSingleSuccessor()) {
+      cast<BranchInst>(getIRBasicBlock()->getTerminator())
+          ->setOperand(0, nullptr);
+    } else {
+      assert(isa<UnreachableInst>(getIRBasicBlock()->getTerminator()));
+      auto *Br = State->Builder.CreateBr(getIRBasicBlock());
+      Br->setOperand(0, nullptr);
+      getIRBasicBlock()->getTerminator()->eraseFromParent();
+    }
   }
 
   for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors()) {
@@ -663,14 +670,16 @@ void VPBasicBlock::print(raw_ostream &O, const Twine &Indent,
 }
 #endif
 
-static std::pair<VPBlockBase *, VPBlockBase *> cloneFrom(VPBlockBase *Entry);
+static std::tuple<VPBlockBase *, VPBlockBase *, VPIRBasicBlock *>
+cloneFrom(VPBlockBase *Entry, VPIRBasicBlock *ScalarHeader = nullptr);
 
 // Clone the CFG for all nodes reachable from \p Entry, this includes cloning
 // the blocks and their recipes. Operands of cloned recipes will NOT be updated.
 // Remapping of operands must be done separately. Returns a pair with the new
 // entry and exiting blocks of the cloned region. If \p Entry isn't part of a
 // region, return nullptr for the exiting block.
-static std::pair<VPBlockBase *, VPBlockBase *> cloneFrom(VPBlockBase *Entry) {
+static std::tuple<VPBlockBase *, VPBlockBase *, VPIRBasicBlock *>
+cloneFrom(VPBlockBase *Entry, VPIRBasicBlock *ScalarHeader) {
   DenseMap<VPBlockBase *, VPBlockBase *> Old2NewVPBlocks;
   VPBlockBase *Exiting = nullptr;
   bool InRegion = Entry->getParent();
@@ -716,12 +725,14 @@ static std::pair<VPBlockBase *, VPBlockBase *> cloneFrom(VPBlockBase *Entry) {
   }
 #endif
 
-  return std::make_pair(Old2NewVPBlocks[Entry],
-                        Exiting ? Old2NewVPBlocks[Exiting] : nullptr);
+  return std::tuple(
+      Old2NewVPBlocks[Entry], Exiting ? Old2NewVPBlocks[Exiting] : nullptr,
+      ScalarHeader ? cast<VPIRBasicBlock>(Old2NewVPBlocks[ScalarHeader])
+                   : nullptr);
 }
 
 VPRegionBlock *VPRegionBlock::clone() {
-  const auto &[NewEntry, NewExiting] = cloneFrom(getEntry());
+  const auto &[NewEntry, NewExiting, _] = cloneFrom(getEntry());
   auto *NewRegion =
       new VPRegionBlock(NewEntry, NewExiting, getName(), isReplicator());
   for (VPBlockBase *Block : vp_depth_first_shallow(NewEntry))
@@ -843,10 +854,6 @@ void VPRegionBlock::print(raw_ostream &O, const Twine &Indent,
 #endif
 
 VPlan::~VPlan() {
-  for (auto &KV : LiveOuts)
-    delete KV.second;
-  LiveOuts.clear();
-
   if (Entry) {
     VPValue DummyValue;
     for (VPBlockBase *Block : vp_depth_first_shallow(Entry))
@@ -878,7 +885,9 @@ VPlanPtr VPlan::createInitialVPlan(Type *InductionTy,
   VPIRBasicBlock *Entry =
       VPIRBasicBlock::fromBasicBlock(TheLoop->getLoopPreheader());
   VPBasicBlock *VecPreheader = new VPBasicBlock("vector.ph");
-  auto Plan = std::make_unique<VPlan>(Entry, VecPreheader);
+  VPIRBasicBlock *ScalarHeader =
+      VPIRBasicBlock::fromBasicBlock(TheLoop->getHeader());
+  auto Plan = std::make_unique<VPlan>(Entry, VecPreheader, ScalarHeader);
 
   // Create SCEV and VPValue for the trip count.
 
@@ -909,6 +918,7 @@ VPlanPtr VPlan::createInitialVPlan(Type *InductionTy,
   VPBlockUtils::insertBlockAfter(MiddleVPBB, TopRegion);
 
   VPBasicBlock *ScalarPH = new VPBasicBlock("scalar.ph");
+  VPBlockUtils::connectBlocks(ScalarPH, ScalarHeader);
   if (!RequiresScalarEpilogueCheck) {
     VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH);
     return Plan;
@@ -1054,6 +1064,8 @@ void VPlan::execute(VPTransformState *State) {
   BrInst->insertBefore(MiddleBB->getTerminator());
   MiddleBB->getTerminator()->eraseFromParent();
   State->CFG.DTU.applyUpdates({{DominatorTree::Delete, MiddleBB, ScalarPh}});
+  State->CFG.DTU.applyUpdates(
+      {{DominatorTree::Delete, ScalarPh, ScalarPh->getSingleSuccessor()}});
 
   // Generate code in the loop pre-header and body.
   for (VPBlockBase *Block : vp_depth_first_shallow(Entry))
@@ -1172,12 +1184,6 @@ void VPlan::print(raw_ostream &O) const {
     Block->print(O, "", SlotTracker);
   }
 
-  if (!LiveOuts.empty())
-    O << "\n";
-  for (const auto &KV : LiveOuts) {
-    KV.second->print(O, SlotTracker);
-  }
-
   O << "}\n";
 }
 
@@ -1214,11 +1220,6 @@ LLVM_DUMP_METHOD
 void VPlan::dump() const { print(dbgs()); }
 #endif
 
-void VPlan::addLiveOut(PHINode *PN, VPValue *V) {
-  assert(LiveOuts.count(PN) == 0 && "an exit value for PN already exists");
-  LiveOuts.insert({PN, new VPLiveOut(PN, V)});
-}
-
 static void remapOperands(VPBlockBase *Entry, VPBlockBase *NewEntry,
                           DenseMap<VPValue *, VPValue *> &Old2NewVPValues) {
   // Update the operands of all cloned recipes starting at NewEntry. This
@@ -1260,10 +1261,12 @@ static void remapOperands(VPBlockBase *Entry, VPBlockBase *NewEntry,
 VPlan *VPlan::duplicate() {
   // Clone blocks.
   VPBasicBlock *NewPreheader = Preheader->clone();
-  const auto &[NewEntry, __] = cloneFrom(Entry);
+  const auto &[NewEntry, __, NewScalarHeader] =
+      cloneFrom(Entry, getScalarHeader());
 
   // Create VPlan, clone live-ins and remap operands in the cloned blocks.
-  auto *NewPlan = new VPlan(NewPreheader, cast<VPBasicBlock>(NewEntry));
+  auto *NewPlan =
+      new VPlan(NewPreheader, cast<VPBasicBlock>(NewEntry), NewScalarHeader);
   DenseMap<VPValue *, VPValue *> Old2NewVPValues;
   for (VPValue *OldLiveIn : VPLiveInsToFree) {
     Old2NewVPValues[OldLiveIn] =
@@ -1286,10 +1289,6 @@ VPlan *VPlan::duplicate() {
   remapOperands(Preheader, NewPreheader, Old2NewVPValues);
   remapOperands(Entry, NewEntry, Old2NewVPValues);
 
-  // Clone live-outs.
-  for (const auto &[_, LO] : LiveOuts)
-    NewPlan->addLiveOut(LO->getPhi(), Old2NewVPValues[LO->getOperand(0)]);
-
   // Initialize remaining fields of cloned VPlan.
   NewPlan->VFs = VFs;
   NewPlan->UFs = UFs;
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 59a084401cc9bf..3803a660cf48a8 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -675,48 +675,6 @@ class VPBlockBase {
   virtual VPBlockBase *clone() = 0;
 };
 
-/// A value that is used outside the VPlan. The operand of the user needs to be
-/// added to the associated phi node. The incoming block from VPlan is
-/// determined by where the VPValue is defined: if it is defined by a recipe
-/// outside a region, its parent block is used, otherwise the middle block is
-/// used.
-class VPLiveOut : public VPUser {
-  PHINode *Phi;
-
-public:
-  VPLiveOut(PHINode *Phi, VPValue *Op)
-      : VPUser({Op}, VPUser::VPUserID::LiveOut), Phi(Phi) {}
-
-  static inline bool classof(const VPUser *U) {
-    return U->getVPUserID() == VPUser::VPUserID::LiveOut;
-  }
-
-  /// Fix the wrapped phi node. This means adding an incoming value to exit
-  /// block phi's from the vector loop via middle block (values from scalar loop
-  /// already reach these phi's), and updating the value to scalar header phi's
-  /// from the scalar preheader.
-  void fixPhi(VPlan &Plan, VPTransformState &State);
-
-  /// Returns true if the VPLiveOut uses scalars of operand \p Op.
-  bool usesScalars(const VPValue *Op) const override {
-    assert(is_contained(operands(), Op) &&
-           "Op must be an operand of the recipe");
-    return true;
-  }
-
-  PHINode *getPhi() const { return Phi; }
-
-  /// Live-outs are marked as only using the first part during the transition
-  /// to unrolling directly on VPlan.
-  /// TODO: Remove after unroller transition.
-  bool onlyFirstPartUsed(const VPValue *Op) const override { return true; }
-
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-  /// Print the VPLiveOut to \p O.
-  void print(raw_ostream &O, VPSlotTracker &SlotTracker) const;
-#endif
-};
-
 /// Struct to hold various analysis needed for cost computations.
 struct VPCostContext {
   const TargetTransformInfo &TTI;
@@ -3592,6 +3550,9 @@ class VPlan {
   /// rest of VPlan execution.
   VPBasicBlock *Preheader;
 
+  /// VPIRBasicBlock wrapping the header of the original scalar loop.
+  VPIRBasicBlock *ScalarHeader;
+
   /// Holds the VFs applicable to this VPlan.
   SmallSetVector<ElementCount, 2> VFs;
 
@@ -3627,11 +3588,6 @@ class VPlan {
   /// definitions are VPValues that hold a pointer to their underlying IR.
   SmallVector<VPValue *, 16> VPLiveInsToFree;
 
-  /// Values used outside the plan. It contains live-outs that need fixing. Any
-  /// live-out that is fixed outside VPlan needs to be removed. The remaining
-  /// live-outs are fixed via VPLiveOut::fixPhi.
-  MapVector<PHINode *, VPLiveOut *> LiveOuts;
-
   /// Mapping from SCEVs to the VPValues representing their expansions.
   /// NOTE: This mapping is temporary and will be removed once all users have
   /// been modeled in VPlan directly.
@@ -3642,8 +3598,9 @@ class VPlan {
   /// and \p Entry to the plan. At the moment, \p Preheader and \p Entry need to
   /// be disconnected, as the bypass blocks between them are not yet modeled in
   /// VPlan.
-  VPlan(VPBasicBlock *Preheader, VPValue *TC, VPBasicBlock *Entry)
-      : VPlan(Preheader, Entry) {
+  VPlan(VPBasicBlock *Preheader, VPValue *TC, VPBasicBlock *Entry,
+        VPIRBasicBlock *ScalarHeader)
+      : VPlan(Preheader, Entry, ScalarHeader) {
     TripCount = TC;
   }
 
@@ -3651,13 +3608,16 @@ class VPlan {
   /// the plan. At the moment, \p Preheader and \p Entry need to be
   /// disconnected, as the bypass blocks between them are not yet modeled in
   /// VPlan.
-  VPlan(VPBasicBlock *Preheader, VPBasicBlock *Entry)
-      : Entry(Entry), Preheader(Preheader) {
+  VPlan(VPBasicBlock *Preheader, VPBasicBlock *Entry,
+        VPIRBasicBlock *ScalarHeader)
+      : Entry(Entry), Preheader(Preheader), ScalarHeader(ScalarHeader) {
     Entry->setPlan(this);
     Preheader->setPlan(this);
     assert(Preheader->getNumSuccessors() == 0 &&
            Preheader->getNumPredecessors() == 0 &&
            "preheader must be disconnected");
+    assert(ScalarHeader->getNumSuccessors() == 0 &&
+           "scalar header must be a leaf node");
   }
 
   ~VPlan();
@@ -3689,6 +3649,14 @@ class VPlan {
   VPBasicBlock *getEntry() { return Entry; }
   const VPBasicBlock *getEntry() const { return Entry; }
 
+  /// Return the VPIRBasicBlock wrapping the header of the scalar loop.
+  VPIRBasicBlock *getScalarHeader() { return ScalarHeader; }
+
+  /// Return the VPBasicBlock for the preheader of the scalar loop.
+  VPBasicBlock *getScalarPreheader() {
+    return cast<VPBasicBlock>(ScalarHeader->getSinglePredecessor());
+  }
+
   /// The trip count of the original loop.
   VPValue *getTripCount() const {
     assert(TripCount && "trip count needs to be set before accessing it");
@@ -3811,12 +3779,6 @@ class VPlan {
     return cast<VPCanonicalIVPHIRecipe>(&*EntryVPBB->begin());
   }
 
-  void addLiveOut(PHINode *PN, VPValue *V);
-
-  const MapVector<PHINode *, VPLiveOut *> &getLiveOuts() const {
-    return LiveOuts;
-  }
-
   VPValue *getSCEVExpansion(const SCEV *S) const {
     return SCEVToExpansion.lookup(S);
   }
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 41f13cc2d9a978..ab7318b67d9bb1 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -210,35 +210,6 @@ bool VPRecipeBase::mayHaveSideEffects() const {
   }
 }
 
-void VPLiveOut::fixPhi(VPlan &Plan, VPTransformState &State) {
-  VPValue *ExitValue = getOperand(0);
-  VPBasicBlock *MiddleVPBB =
-      cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSingleSuccessor());
-  VPRecipeBase *ExitingRecipe = ExitValue->getDefiningRecipe();
-  auto *ExitingVPBB = ExitingRecipe ? ExitingRecipe->getParent() : nullptr;
-  // Values leaving the vector loop reach live out phi's in the exiting block
-  // via middle block.
-  auto *PredVPBB = !ExitingVPBB || ExitingVPBB->getEnclosingLoopRegion()
-                       ? MiddleVPBB
-                       : ExitingVPBB;
-  BasicBlock *PredBB = State.CFG.VPBB2IRBB[PredVPBB];
-  Value *V = State.get(ExitValue, VPLane(0));
-  if (Phi->getBasicBlockIndex(PredBB) != -1)
-    Phi->setIncomingValueForBlock(PredBB, V);
-  else
-    Phi->addIncoming(V, PredBB);
-}
-
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-void VPLiveOut::print(raw_ostream &O, VPSlotTracker &SlotTracker) const {
-  O << "Live-out ";
-  getPhi()->printAsOperand(O);
-  O << " = ";
-  getOperand(0)->printAsOperand(O, SlotTracker);
-  O << "\n";
-}
-#endif
-
 void VPRecipeBase::insertBefore(VPRecipeBase *InsertPos) {
   assert(!Parent && "Recipe already in some VPBasicBlock");
   assert(InsertPos->getParent() &&
@@ -869,7 +840,10 @@ void VPIRInstruction::execute(VPTransformState &State) {
     State.Builder.SetInsertPoint(PredBB, PredBB->getFirstNonPHIIt());
     Value *V = State.get(ExitValue, VPLane(Lane));
     auto *Phi = cast<PHINode>(&I);
-    Phi->addIncoming(V, PredBB);
+    if (Phi->getBasicBlockIndex(PredBB) == -1)
+      Phi->addIncoming(V, PredBB);
+    else
+      Phi->setIncomingValueForBlock(PredBB, V);
   }
 
   // Advance the insert point after the wrapped IR instruction. This allows
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index d50f3c0c3f3e04..9d3bb83644077f 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -379,7 +379,7 @@ static bool mergeBlocksIntoPredecessors(VPlan &Plan) {
     // Don't fold the exit block of the Plan into its single predecessor for
     // now.
     // TODO: Remove restriction once more of the skeleton is modeled in VPlan.
-    if (VPBB->getNumSuccessors() == 0 && !VPBB->getParent())
+    if (!VPBB->getParent())
       continue;
     auto *PredVPBB =
         dyn_cast_or_null<VPBasicBlock>(VPBB->getSinglePredecessor());
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
index ca78f32506ef71..3fa3100b53c730 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
@@ -264,6 +264,13 @@ void UnrollState::unrollRecipeByUF(VPRecipeBase &R) {
     return;
 
   if (auto *VPI = dyn_cast<VPInstruction>(&R)) {
+    VPValue *Op0, *Op1;
+    if (match(VPI, m_VPInstruction<VPInstruction::ResumePhi>(m_VPValue(Op0),
+                                                             m_VPValue(Op1)))) {
+      addUniformForAllParts(VPI);
+      return;
+    }
+
     if (vputils::onlyFirstPartUsed(VPI)) {
       addUniformForAllParts(VPI);
       return;
@@ -449,11 +456,5 @@ void VPlanTransforms::unrollByUF(VPlan &Plan, unsigned UF, LLVMContext &Ctx) {
     Part++;
   }
 
-  // Remap the operand of live-outs to the last part.
-  for (const auto &[_, LO] : Plan.getLiveOuts()) {
-    VPValue *In = Unroller.getValueForPart(LO->getOperand(0), UF - 1);
-    LO->setOperand(0, In);
-  }
-
   VPlanTransforms::removeDeadRecipes(Plan);
 }
diff --git a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
index 7ea5ee341cc547..3b7ba61454899d 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
@@ -248,14 +248,6 @@ bool VPlanVerifier::verifyVPBasicBlock(const VPBasicBlock *VPBB) {
     return false;
   }
 
-  VPBlockBase *MiddleBB =
-      IRBB->getPlan()->getVectorLoopRegion()->getSingleSuccessor();
-  if (IRBB != IRBB->getPlan()->getPreheader() &&
-      IRBB->getSinglePredecessor() != MiddleBB) {
-    errs() << "VPIRBasicBlock can only be used as pre-header or a successor of "
-              "middle-block at the moment!\n";
-    return false;
-  }
   return true;
 }
 
@@ -420,12 +412,6 @@ bool VPlanVerifier::verify(const VPlan &Plan) {
     return false;
   }
 
-  for (const auto &KV : Plan.getLiveOuts())
-    if (KV.second->getNumOperands() != 1) {
-      errs() << "live outs must have a single operand\n";
-      return false;
-    }
-
   return true;
 }
 
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-vplan.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-vplan.ll
index 9be068ce880ea8..6257d3325f9796 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-vplan.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-vplan.ll
@@ -45,6 +45,11 @@ target triple = "aarch64-unknown-linux-gnu"
 ; CHECK-NEXT: No successors
 ; CHECK-EMPTY:
 ; CHECK-NEXT: scalar.ph:
+; CHECK-NEXT: Successor(s): ir-bb<for.body>
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<for.body>:
+; CHECK-NEXT:   IR   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+; CHECK:        IR   %exitcond = icmp eq i64 %iv.next, %N
 ; CHECK-NEXT: No successors
 ; CHECK-NEXT: }
 
@@ -82,6 +87,11 @@ target triple = "aarch64-unknown-linux-gnu"
 ; CHECK-NEXT: No successors
 ; CHECK-EMPTY:
 ; CHECK-NEXT: scalar.ph:
+; CHECK-NEXT: Successor(s): ir-bb<for.body>
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<for.body>:
+; CHECK-NEXT:   IR   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+; CHECK:        IR   %exitcond = icmp eq i64 %iv.next, %N
 ; CHECK-NEXT: No successors
 ; CHECK-NEXT: }
 
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/synthesize-mask-for-call.ll b/llvm/test/Transforms/LoopVectorize/AArch64/synthesize-mask-for-call.ll
index 74fd76df992597..994f2f5e377632 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/synthesize-mask-for-call.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/synthesize-mask-for-call.ll
@@ -43,6 +43,11 @@ target triple = "aarch64-unknown-linux-gnu"
 ; CHECK-NEXT: No successors
 ; CHECK-EMPTY:
 ; CHECK-NEXT: scalar.ph:
+; CHECK-NEXT: Successor(s): ir-bb<for.body>
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<for.body>:
+; CHECK-NEXT:   IR   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+; CHECK:        IR   %exitcond = icmp eq i64 %indvars.iv.next, 1024
 ; CHECK-NEXT: No successors
 ; CHECK-NEXT: }
 
@@ -80,6 +85,11 @@ target triple = "aarch64-unknown-linux-gnu"
 ; CHECK-NEXT: No successors
 ; CHECK-EMPTY:
 ; CHECK-NEXT: scalar.ph:
+; CHECK-NEXT: Successor(s): ir-bb<for.body>
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<for.body>:
+; CHECK-NEXT:   IR   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+; CHECK:        IR   %exitcond = icmp eq i64 %indvars.iv.next, 1024
 ; CHECK-NEXT: No successors
 ; CHECK-NEXT: }
 
@@ -122,6 +132,11 @@ target triple = "aarch64-unknown-linux-gnu"
 ; CHECK-NEXT: No successors
 ; CHECK-EMPTY:
 ; CHECK-NEXT: scalar.ph:
+; CHECK-NEXT: Successor(s): ir-bb<for.body>
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<for.body>:
+; CHECK-NEXT:   IR   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+; CHECK:        IR   %exitcond = icmp eq i64 %indvars.iv.next, 1024
 ; CHECK-NEXT: No successors
 ; CHECK-NEXT: }
 
@@ -159,6 +174,11 @@ target triple = "aarch64-unknown-linux-gnu"
 ; CHECK-NEXT: No successors
 ; CHECK-EMPTY:
 ; CHECK-NEXT: scalar.ph:
+; CHECK-NEXT: Successor(s): ir-bb<for.body>
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<for.body>:
+; CHECK-NEXT:   IR   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+; CHECK:        IR   %exitcond = icmp eq i64 %indvars.iv.next, 1024
 ; CHECK-NEXT: No successors
 ; CHECK-NEXT: }
 
@@ -200,6 +220,11 @@ target triple = "aarch64-unknown-linux-gnu"
 ; CHECK-NEXT: No successors
 ; CHECK-EMPTY:
 ; CHECK-NEXT: scalar.ph:
+; CHECK-NEXT: Successor(s): ir-bb<for.body>
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<for.body>:
+; CHECK-NEXT:   IR   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+; CHECK:        IR   %exitcond = icmp eq i64 %indvars.iv.next, 1024
 ; CHECK-NEXT: No successors
 ; CHECK-NEXT: }
 
@@ -237,6 +262,11 @@ target triple = "aarch64-unknown-linux-gnu"
 ; CHECK-NEXT: No successors
 ; CHECK-EMPTY:
 ; CHECK-NEXT: scalar.ph:
+; CHECK-NEXT: Successor(s): ir-bb<for.body>
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<for.body>:
+; CHECK-NEXT:   IR   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+; CHECK:        IR   %exitcond = icmp eq i64 %indvars.iv.next, 1024
 ; CHECK-NEXT: No successors
 ; CHECK-NEXT: }
 
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/widen-call-with-intrinsic-or-libfunc.ll b/llvm/test/Transforms/LoopVectorize/AArch64/widen-call-with-intrinsic-or-libfunc.ll
index e9303ec9d3eb76..afc2fd5a049ad1 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/widen-call-with-intrinsic-or-libfunc.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/widen-call-with-intrinsic-or-libfunc.ll
@@ -41,6 +41,11 @@ target triple = "arm64-apple-ios"
 ; CHECK-NEXT: No successors
 ; CHECK-EMPTY:
 ; CHECK-NEXT: scalar.ph:
+; CHECK-NEXT: Successor(s): ir-bb<loop>
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<loop>:
+; CHECK-NEXT:   IR   %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+; CHECK:        IR   %cmp = icmp ne i64 %iv.next, 1024
 ; CHECK-NEXT: No successors
 ; CHECK-NEXT: }
 
@@ -78,6 +83,11 @@ target triple = "arm64-apple-ios"
 ; CHECK-NEXT: No successors
 ; CHECK-EMPTY:
 ; CHECK-NEXT: scalar.ph:
+; CHECK-NEXT: Successor(s): ir-bb<loop>
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<loop>:
+; CHECK-NEXT:   IR   %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+; CHECK:        IR   %cmp = icmp ne i64 %iv.next, 1024
 ; CHECK-NEXT: No successors
 ; CHECK-NEXT: }
 ;
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll
index 9a716f7756072e..70de36441fc961 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll
@@ -95,6 +95,12 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur
 ; CHECK-NEXT:  No successors
 ; CHECK-EMPTY:
 ; CHECK-NEXT:  scalar.ph:
+; CHECK-NEXT:  Successor(s): ir-bb<for.body>
+; CHECK-EMPTY:
+; CHECK-NEXT:  ir-bb<for.body>:
+; CHECK-NEXT:    IR   %indvars.iv = phi i64 [ %0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+; CHECK-NEXT:    IR   %i.0.in8 = phi i32 [ %n, %for.body.preheader ], [ %i.0, %for.body ]
+; CHECK:         IR   %indvars.iv.next = add nsw i64 %indvars.iv, -1
 ; CHECK-NEXT:  No successors
 ; CHECK-NEXT:  }
 ; CHECK-NEXT:  LV: Found an estimated cost of 0 for VF vscale x 4 For instruction: %indvars.iv = phi i64 [ %0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
@@ -179,6 +185,12 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur
 ; CHECK-NEXT:  No successors
 ; CHECK-EMPTY:
 ; CHECK-NEXT:  scalar.ph:
+; CHECK-NEXT:  Successor(s): ir-bb<for.body>
+; CHECK-EMPTY:
+; CHECK-NEXT:  ir-bb<for.body>:
+; CHECK-NEXT:    IR   %indvars.iv = phi i64 [ %0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+; CHECK-NEXT:    IR   %i.0.in8 = phi i32 [ %n, %for.body.preheader ], [ %i.0, %for.body ]
+; CHECK:         IR   %indvars.iv.next = add nsw i64 %indvars.iv, -1
 ; CHECK-NEXT:  No successors
 ; CHECK-NEXT:  }
 ; CHECK-NEXT:  LV: Loop does not require scalar epilogue
@@ -300,6 +312,12 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur
 ; CHECK-NEXT:  No successors
 ; CHECK-EMPTY:
 ; CHECK-NEXT:  scalar.ph:
+; CHECK-NEXT:  Successor(s): ir-bb<for.body>
+; CHECK-EMPTY:
+; CHECK-NEXT:  ir-bb<for.body>:
+; CHECK-NEXT:    IR   %indvars.iv = phi i64 [ %0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+; CHECK-NEXT:    IR   %i.0.in8 = phi i32 [ %n, %for.body.preheader ], [ %i.0, %for.body ]
+; CHECK:         IR   %indvars.iv.next = add nsw i64 %indvars.iv, -1
 ; CHECK-NEXT:  No successors
 ; CHECK-NEXT:  }
 ; CHECK-NEXT:  LV: Found an estimated cost of 0 for VF vscale x 4 For instruction: %indvars.iv = phi i64 [ %0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
@@ -384,6 +402,12 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur
 ; CHECK-NEXT:  No successors
 ; CHECK-EMPTY:
 ; CHECK-NEXT:  scalar.ph:
+; CHECK-NEXT:  Successor(s): ir-bb<for.body>
+; CHECK-EMPTY:
+; CHECK-NEXT:  ir-bb<for.body>:
+; CHECK-NEXT:    IR   %indvars.iv = phi i64 [ %0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+; CHECK-NEXT:    IR   %i.0.in8 = phi i32 [ %n, %for.body.preheader ], [ %i.0, %for.body ]
+; CHECK:         IR   %indvars.iv.next = add nsw i64 %indvars.iv, -1
 ; CHECK-NEXT:  No successors
 ; CHECK-NEXT:  }
 ; CHECK-NEXT:  LV: Loop does not require scalar epilogue
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll
index 1326751a847d7d..5b400acea9ef28 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll
@@ -65,6 +65,12 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) {
 ; IF-EVL-INLOOP-NEXT: No successors
 ; IF-EVL-INLOOP-EMPTY:
 ; IF-EVL-INLOOP-NEXT: scalar.ph:
+; IF-EVL-INLOOP-NEXT: Successor(s): ir-bb<for.body>
+; IF-EVL-INLOOP-EMPTY:
+; IF-EVL-INLOOP-NEXT: ir-bb<for.body>:
+; IF-EVL-INLOOP-NEXT:   IR   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+; IF-EVL-INLOOP-NEXT:   IR   %rdx = phi i32 [ %start, %entry ], [ %add, %for.body ]
+; IF-EVL-INLOOP:        IR   %exitcond.not = icmp eq i64 %iv.next, %n
 ; IF-EVL-INLOOP-NEXT: No successors
 ; IF-EVL-INLOOP-NEXT: }
 ;
@@ -104,6 +110,12 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) {
 ; NO-VP-OUTLOOP-NEXT: No successors
 ; NO-VP-OUTLOOP-EMPTY:
 ; NO-VP-OUTLOOP-NEXT: scalar.ph:
+; NO-VP-OUTLOOP-NEXT: Successor(s): ir-bb<for.body>
+; NO-VP-OUTLOOP-EMPTY:
+; NO-VP-OUTLOOP-NEXT: ir-bb<for.body>:
+; NO-VP-OUTLOOP-NEXT:   IR   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+; NO-VP-OUTLOOP-NEXT:   IR   %rdx = phi i32 [ %start, %entry ], [ %add, %for.body ]
+; NO-VP-OUTLOOP:        IR   %exitcond.not = icmp eq i64 %iv.next, %n
 ; NO-VP-OUTLOOP-NEXT: No successors
 ; NO-VP-OUTLOOP-NEXT: }
 ;
@@ -143,6 +155,12 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) {
 ; NO-VP-INLOOP-NEXT: No successors
 ; NO-VP-INLOOP-EMPTY:
 ; NO-VP-INLOOP-NEXT: scalar.ph:
+; NO-VP-INLOOP-NEXT: Successor(s): ir-bb<for.body>
+; NO-VP-INLOOP-EMPTY:
+; NO-VP-INLOOP-NEXT: ir-bb<for.body>:
+; NO-VP-INLOOP-NEXT:   IR   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+; NO-VP-INLOOP-NEXT:   IR   %rdx = phi i32 [ %start, %entry ], [ %add, %for.body ]
+; NO-VP-INLOOP:        IR   %exitcond.not = icmp eq i64 %iv.next, %n
 ; NO-VP-INLOOP-NEXT: No successors
 ; NO-VP-INLOOP-NEXT: }
 ;
diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll
index 9de675b2853097..9c6415cd2c122d 100644
--- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll
+++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll
@@ -45,10 +45,14 @@ define void @test_chained_first_order_recurrences_1(ptr %ptr) {
 ; CHECK-NEXT:  scalar.ph
 ; CHECK-NEXT:    EMIT vp<[[RESUME_1_P:%.*]]> = resume-phi vp<[[RESUME_1]]>, ir<22>
 ; CHECK-NEXT:    EMIT vp<[[RESUME_2_P:%.*]]>.1 = resume-phi vp<[[RESUME_2]]>.1, ir<33>
-; CHECK-NEXT:  No successors
+; CHECK-NEXT:  Successor(s): ir-bb<loop>
 ; CHECK-EMPTY:
-; CHECK-NEXT: Live-out i16 %for.1 = vp<[[RESUME_1_P]]>
-; CHECK-NEXT: Live-out i16 %for.2 = vp<[[RESUME_2_P]]>.1
+; CHECK-NEXT:  ir-bb<loop>:
+; CHECK-NEXT:    IR   %for.1 = phi i16 [ 22, %entry ], [ %for.1.next, %loop ] (extra operand: vp<[[RESUME_1_P]]>)
+; CHECK-NEXT:    IR   %for.2 = phi i16 [ 33, %entry ], [ %for.1, %loop ] (extra operand: vp<[[RESUME_2_P]]>.1)
+; CHECK-NEXT:    IR   %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+; CHECK:         IR   %exitcond.not = icmp eq i64 %iv.next, 1000
+; CHECK-NEXT:  No successors
 ; CHECK-NEXT: }
 ;
 entry:
@@ -118,11 +122,15 @@ define void @test_chained_first_order_recurrences_3(ptr %ptr) {
 ; CHECK-NEXT:    EMIT vp<[[RESUME_1_P:%.*]]> = resume-phi vp<[[RESUME_1]]>, ir<22>
 ; CHECK-NEXT:    EMIT vp<[[RESUME_2_P:%.*]]>.1 = resume-phi vp<[[RESUME_2]]>.1, ir<33>
 ; CHECK-NEXT:    EMIT vp<[[RESUME_3_P:%.*]]>.2 = resume-phi vp<[[RESUME_3]]>.2, ir<33>
-; CHECK-NEXT: No successors
+; CHECK-NEXT:  Successor(s): ir-bb<loop>
 ; CHECK-EMPTY:
-; CHECK-NEXT: Live-out i16 %for.1 = vp<[[RESUME_1_P]]>
-; CHECK-NEXT: Live-out i16 %for.2 = vp<[[RESUME_2_P]]>.1
-; CHECK-NEXT: Live-out i16 %for.3 = vp<[[RESUME_3_P]]>.2
+; CHECK-NEXT:  ir-bb<loop>:
+; CHECK-NEXT:    IR   %for.1 = phi i16 [ 22, %entry ], [ %for.1.next, %loop ] (extra operand: vp<[[RESUME_1_P]]>)
+; CHECK-NEXT:    IR   %for.2 = phi i16 [ 33, %entry ], [ %for.1, %loop ] (extra operand: vp<[[RESUME_2_P]]>.1)
+; CHECK-NEXT:    IR   %for.3 = phi i16 [ 33, %entry ], [ %for.2, %loop ] (extra operand: vp<[[RESUME_3_P]]>.2)
+; CHECK-NEXT:    IR   %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+; CHECK:         IR   %exitcond.not = icmp eq i64 %iv.next, 1000
+; CHECK-NEXT: No successors
 ; CHECK-NEXT: }
 ;
 entry:
diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll
index 8e56614a2e3d5c..4cc8fec60be03f 100644
--- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll
+++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll
@@ -82,9 +82,13 @@ define void @sink_replicate_region_1(i32 %x, ptr %ptr, ptr noalias %dst) optsize
 ; CHECK-EMPTY:
 ; CHECK-NEXT: scalar.ph
 ; CHECK-NEXT:   EMIT vp<[[RESUME_1_P:%.*]]> = resume-phi vp<[[RESUME_1]]>, ir<0>
-; CHECK-NEXT: No successors
+; CHECK-NEXT: Successor(s): ir-bb<loop>
 ; CHECK-EMPTY:
-; CHECK-NEXT: Live-out i32 %0 = vp<[[RESUME_1_P]]>
+; CHECK-NEXT: ir-bb<loop>:
+; CHECK-NEXT:   IR   %0 = phi i32 [ 0, %entry ], [ %conv, %loop ] (extra operand: vp<[[RESUME_1_P]]>)
+; CHECK-NEXT:   IR   %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+; CHECK:        IR   %ec = icmp eq i32 %iv.next, 20001
+; CHECK-NEXT: No successors
 ; CHECK-NEXT: }
 ;
 entry:
@@ -165,9 +169,13 @@ define void @sink_replicate_region_2(i32 %x, i8 %y, ptr %ptr) optsize {
 ; CHECK-EMPTY:
 ; CHECK-NEXT: scalar.ph
 ; CHECK-NEXT:   EMIT vp<[[RESUME_1_P:%.*]]> = resume-phi vp<[[RESUME_1]]>, ir<0>
-; CHECK-NEXT: No successors
+; CHECK-NEXT: Successor(s): ir-bb<loop>
 ; CHECK-EMPTY:
-; CHECK-NEXT: Live-out i32 %recur = vp<[[RESUME_1_P]]>
+; CHECK-NEXT: ir-bb<loop>:
+; CHECK-NEXT:   IR   %recur = phi i32 [ 0, %entry ], [ %recur.next, %loop ] (extra operand: vp<[[RESUME_1_P]]>)
+; CHECK-NEXT:   IR   %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+; CHECK:        IR   %ec = icmp eq i32 %iv.next, 20001
+; CHECK-NEXT: No successors
 ; CHECK-NEXT: }
 ;
 entry:
@@ -232,9 +240,14 @@ define i32 @sink_replicate_region_3_reduction(i32 %x, i8 %y, ptr %ptr) optsize {
 ; CHECK-EMPTY:
 ; CHECK-NEXT: scalar.ph
 ; CHECK-NEXT:   EMIT vp<[[RESUME_1_P:%.*]]> = resume-phi vp<[[RESUME_1]]>, ir<0>
-; CHECK-NEXT: No successors
+; CHECK-NEXT: Successor(s): ir-bb<loop>
 ; CHECK-EMPTY:
-; CHECK-NEXT: Live-out i32 %recur = vp<[[RESUME_1_P]]>
+; CHECK-NEXT: ir-bb<loop>:
+; CHECK-NEXT:   IR   %recur = phi i32 [ 0, %entry ], [ %recur.next, %loop ] (extra operand: vp<[[RESUME_1_P]]>)
+; CHECK-NEXT:   IR   %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+; CHECK-NEXT:   IR   %and.red = phi i32 [ 1234, %entry ], [ %and.red.next, %loop ]
+; CHECK:        IR   %ec = icmp eq i32 %iv.next, 20001
+; CHECK-NEXT: No successors
 ; CHECK-NEXT: }
 ;
 entry:
@@ -338,9 +351,13 @@ define void @sink_replicate_region_4_requires_split_at_end_of_block(i32 %x, ptr
 ; CHECK-EMPTY:
 ; CHECK-NEXT: scalar.ph
 ; CHECK-NEXT:   EMIT vp<[[RESUME_1_P:%.*]]> = resume-phi vp<[[RESUME_1]]>, ir<0>
-; CHECK-NEXT: No successors
+; CHECK-NEXT: Successor(s): ir-bb<loop>
 ; CHECK-EMPTY:
-; CHECK-NEXT: Live-out i32 %0 = vp<[[RESUME_1_P]]>
+; CHECK-NEXT: ir-bb<loop>:
+; CHECK-NEXT:   IR   %0 = phi i32 [ 0, %entry ], [ %conv, %loop ] (extra operand: vp<[[RESUME_1_P]]>)
+; CHECK-NEXT:   IR   %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+; CHECK:        IR   %ec = icmp eq i32 %iv.next, 20001
+; CHECK-NEXT: No successors
 ; CHECK-NEXT: }
 ;
 entry:
@@ -431,9 +448,13 @@ define void @sink_replicate_region_after_replicate_region(ptr %ptr, ptr noalias
 ; CHECK-EMPTY:
 ; CHECK-NEXT: scalar.ph
 ; CHECK-NEXT:   EMIT vp<[[RESUME_1_P:%.*]]> = resume-phi vp<[[RESUME_1]]>, ir<0>
-; CHECK-NEXT: No successors
+; CHECK-NEXT: Successor(s): ir-bb<loop>
 ; CHECK-EMPTY:
-; CHECK-NEXT: Live-out i32 %recur = vp<[[RESUME_1_P]]>
+; CHECK-NEXT: ir-bb<loop>:
+; CHECK-NEXT:   IR   %recur = phi i32 [ 0, %entry ], [ %recur.next, %loop ] (extra operand: vp<[[RESUME_1_P]]>)
+; CHECK-NEXT:   IR   %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+; CHECK:        IR   %C = icmp sgt i32 %iv.next, %recur.next
+; CHECK-NEXT: No successors
 ; CHECK-NEXT: }
 ;
 entry:
@@ -513,9 +534,13 @@ define void @need_new_block_after_sinking_pr56146(i32 %x, ptr %src, ptr noalias
 ; CHECK-EMPTY:
 ; CHECK-NEXT: scalar.ph
 ; CHECK-NEXT:   EMIT vp<[[RESUME_1_P:%.*]]> = resume-phi vp<[[RESUME_1]]>, ir<0>
-; CHECK-NEXT: No successors
+; CHECK-NEXT: Successor(s): ir-bb<loop>
 ; CHECK-EMPTY:
-; CHECK-NEXT: Live-out i32 %.pn = vp<[[RESUME_1_P]]>
+; CHECK-NEXT: ir-bb<loop>:
+; CHECK-NEXT:   IR   %iv = phi i64 [ 2, %entry ], [ %iv.next, %loop ]
+; CHECK-NEXT:   IR   %.pn = phi i32 [ 0, %entry ], [ %l, %loop ] (extra operand: vp<[[RESUME_1_P]]>)
+; CHECK:        IR   %ec = icmp ugt i64 %iv, 3
+; CHECK-NEXT: No successors
 ; CHECK-NEXT: }
 ;
 entry:
diff --git a/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll b/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll
index b7f8ddbfa5d7c1..a71666d8c3167a 100644
--- a/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll
+++ b/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll
@@ -116,6 +116,12 @@ declare i32 @llvm.smin.i32(i32, i32)
 ; DBG-NEXT: No successors
 ; DBG-EMPTY:
 ; DBG-NEXT: scalar.ph:
+; DBG-NEXT: Successor(s): ir-bb<loop.header>
+; DBG-EMPTY:
+; DBG-NEXT: ir-bb<loop.header>:
+; DBG-NEXT:   IR   %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
+; DBG-NEXT:   IR   %d = phi i1 [ false, %entry ], [ %d.next, %loop.latch ]
+; DBG-NEXT:   IR   %d.next = xor i1 %d, true
 ; DBG-NEXT: No successors
 ; DBG-NEXT: }
 
@@ -217,9 +223,13 @@ exit:
 ; DBG-EMPTY:
 ; DBG-NEXT: scalar.ph:
 ; DBG-NEXT:  EMIT vp<[[RESUME_P:%.*]]> = resume-phi vp<[[RESUME_1]]>, ir<0>
-; DBG-NEXT: No successors
+; DBG-NEXT: Successor(s): ir-bb<loop>
 ; DBG-EMPTY:
-; DBG-NEXT: Live-out i32 %for = vp<[[RESUME_P]]>
+; DBG-NEXT: ir-bb<loop>:
+; DBG-NEXT:   IR   %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+; DBG-NEXT:   IR   %for = phi i32 [ 0, %entry ], [ %iv.trunc, %loop ] (extra operand: vp<[[RESUME_P]]>)
+; DBG:        IR   %ec = icmp slt i32 %iv.next.trunc, %n
+; DBG-NEXT: No successors
 ; DBG-NEXT: }
 
 define void @first_order_recurrence_using_induction(i32 %n, ptr %dst) {
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll b/llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll
index cab784b61c5449..1f815899ed55c2 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll
@@ -35,6 +35,11 @@ define void @iv_no_binary_op_in_descriptor(i1 %c, ptr %dst) {
 ; CHECK-NEXT:  No successors
 ; CHECK-EMPTY:
 ; CHECK-NEXT:  scalar.ph:
+; CHECK-NEXT:  Successor(s): ir-bb<loop.header>
+; CHECK-EMPTY:
+; CHECK-NEXT:  ir-bb<loop.header>:
+; CHECK-NEXT:    IR   %iv = phi i64 [ 0, %entry ], [ %iv.next.p, %loop.latch ]
+; CHECK:         IR   %iv.next = add i64 %iv, 1
 ; CHECK-NEXT:  No successors
 ; CHECK-NEXT: }
 ;
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-predicate-switch.ll b/llvm/test/Transforms/LoopVectorize/vplan-predicate-switch.ll
index a1e28999a40026..53f5a5658fb68d 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-predicate-switch.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-predicate-switch.ll
@@ -91,6 +91,11 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) {
 ; CHECK-NEXT: No successors
 ; CHECK-EMPTY:
 ; CHECK-NEXT: scalar.ph:
+; CHECK-NEXT: Successor(s): ir-bb<loop.header>
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<loop.header>:
+; CHECK-NEXT:   IR   %ptr.iv = phi ptr [ %start, %entry ], [ %ptr.iv.next, %loop.latch ]
+; CHECK-NEXT:   IR   %l = load i8, ptr %ptr.iv, align 1
 ; CHECK-NEXT: No successors
 ; CHECK-NEXT: }
 ;
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll
index 81c56f75900793..c9612ced3eee01 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll
@@ -46,6 +46,12 @@ define void @test_tc_less_than_16(ptr %A, i64 %N) {
 ; CHECK-NEXT: No successors
 ; CHECK-EMPTY:
 ; CHECK-NEXT: scalar.ph:
+; CHECK-NEXT: Successor(s): ir-bb<loop>
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<loop>:
+; CHECK-NEXT:   IR   %iv = phi i64 [ %and, %entry ], [ %iv.next, %loop ]
+; CHECK-NEXT:   IR   %p.src = phi ptr [ %A, %entry ], [ %p.src.next, %loop ]
+; CHECK:        IR   %cmp = icmp eq i64 %iv.next, 0
 ; CHECK-NEXT: No successors
 ; CHECK-NEXT: }
 ;
@@ -93,6 +99,12 @@ define void @test_tc_less_than_16(ptr %A, i64 %N) {
 ; CHECK-NEXT: No successors
 ; CHECK-EMPTY:
 ; CHECK-NEXT: scalar.ph:
+; CHECK-NEXT: Successor(s): ir-bb<loop>
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<loop>:
+; CHECK-NEXT:   IR   %iv = phi i64 [ %and, %entry ], [ %iv.next, %loop ]
+; CHECK-NEXT:   IR   %p.src = phi ptr [ %A, %entry ], [ %p.src.next, %loop ]
+; CHECK:        IR   %cmp = icmp eq i64 %iv.next, 0
 ; CHECK-NEXT: No successors
 ; CHECK-NEXT: }
 ;
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll
index 8c7a4e57f9d358..50d406d0c04164 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll
@@ -51,6 +51,13 @@ define void @foo(i64 %n) {
 ; CHECK-NEXT: No successors
 ; CHECK-EMPTY:
 ; CHECK-NEXT: scalar.ph:
+; CHECK-NEXT: Successor(s): ir-bb<outer.header>
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<outer.header>:
+; CHECK-NEXT:   IR   %outer.iv = phi i64 [ 0, %entry ], [ %outer.iv.next, %outer.latch ]
+; CHECK-NEXT:   IR   %gep.1 = getelementptr inbounds [8 x i64], ptr @arr2, i64 0, i64 %outer.iv
+; CHECK-NEXT:   IR   store i64 %outer.iv, ptr %gep.1, align 4
+; CHECK-NEXT:   IR   %add = add nsw i64 %outer.iv, %n
 ; CHECK-NEXT: No successors
 ; CHECK-NEXT: }
 entry:
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll
index 0dde507d08be74..fe9d2aff3d05ca 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll
@@ -42,6 +42,11 @@ define void @print_call_and_memory(i64 %n, ptr noalias %y, ptr noalias %x) nounw
 ; CHECK-NEXT:  No successors
 ; CHECK-EMPTY:
 ; CHECK-NEXT:  scalar.ph
+; CHECK-NEXT:  Successor(s): ir-bb<for.body>
+; CHECK-EMPTY:
+; CHECK-NEXT:  ir-bb<for.body>:
+; CHECK-NEXT:    IR   %iv = phi i64 [ %iv.next, %for.body ], [ 0, %for.body.preheader ]
+; CHECK:         IR   %exitcond = icmp eq i64 %iv.next, %n
 ; CHECK-NEXT:  No successors
 ; CHECK-NEXT: }
 ;
@@ -104,6 +109,11 @@ define void @print_widen_gep_and_select(i64 %n, ptr noalias %y, ptr noalias %x,
 ; CHECK-NEXT:  No successors
 ; CHECK-EMPTY:
 ; CHECK-NEXT:  scalar.ph
+; CHECK-NEXT:  Successor(s): ir-bb<for.body>
+; CHECK-EMPTY:
+; CHECK-NEXT:  ir-bb<for.body>:
+; CHECK-NEXT:    IR   %iv = phi i64 [ %iv.next, %for.body ], [ 0, %for.body.preheader ]
+; CHECK:         IR   %exitcond = icmp eq i64 %iv.next, %n
 ; CHECK-NEXT:  No successors
 ; CHECK-NEXT: }
 ;
@@ -165,6 +175,11 @@ define float @print_reduction(i64 %n, ptr noalias %y) {
 ; CHECK-NEXT: No successors
 ; CHECK-EMPTY:
 ; CHECK-NEXT: scalar.ph
+; CHECK-NEXT:  Successor(s): ir-bb<for.body>
+; CHECK-EMPTY:
+; CHECK-NEXT:  ir-bb<for.body>:
+; CHECK-NEXT:    IR   %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
+; CHECK:         IR   %exitcond = icmp eq i64 %iv.next, %n
 ; CHECK-NEXT: No successors
 ; CHECK-NEXT: }
 ;
@@ -221,6 +236,12 @@ define void @print_reduction_with_invariant_store(i64 %n, ptr noalias %y, ptr no
 ; CHECK-NEXT: No successors
 ; CHECK-EMPTY:
 ; CHECK-NEXT: scalar.ph
+; CHECK-NEXT:  Successor(s): ir-bb<for.body>
+; CHECK-EMPTY:
+; CHECK-NEXT:  ir-bb<for.body>:
+; CHECK-NEXT:    IR   %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
+; CHECK-NEXT:    IR   %red = phi float [ %red.next, %for.body ], [ 0.000000e+00, %entry ]
+; CHECK:         IR   %exitcond = icmp eq i64 %iv.next, %n
 ; CHECK-NEXT: No successors
 ; CHECK-NEXT: }
 ;
@@ -300,6 +321,11 @@ define void @print_replicate_predicated_phi(i64 %n, ptr %x) {
 ; CHECK-NEXT: No successors
 ; CHECK-EMPTY:
 ; CHECK-NEXT: scalar.ph
+; CHECK-NEXT:  Successor(s): ir-bb<for.body>
+; CHECK-EMPTY:
+; CHECK-NEXT:  ir-bb<for.body>:
+; CHECK-NEXT:    IR   %i = phi i64 [ 0, %entry ], [ %i.next, %for.inc ]
+; CHECK-NEXT:    IR   %cmp = icmp ult i64 %i, 5
 ; CHECK-NEXT: No successors
 ; CHECK-NEXT: }
 ;
@@ -372,6 +398,11 @@ define void @print_interleave_groups(i32 %C, i32 %D) {
 ; CHECK-NEXT: No successors
 ; CHECK-EMPTY:
 ; CHECK-NEXT: scalar.ph
+; CHECK-NEXT:  Successor(s): ir-bb<for.body>
+; CHECK-EMPTY:
+; CHECK-NEXT:  ir-bb<for.body>:
+; CHECK-NEXT:    IR   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+; CHECK:         IR   %cmp = icmp slt i64 %iv.next, 1024
 ; CHECK-NEXT: No successors
 ; CHECK-NEXT: }
 ;
@@ -447,6 +478,12 @@ define float @print_fmuladd_strict(ptr %a, ptr %b, i64 %n) {
 ; CHECK-NEXT: No successors
 ; CHECK-EMPTY:
 ; CHECK-NEXT: scalar.ph
+; CHECK-NEXT:  Successor(s): ir-bb<for.body>
+; CHECK-EMPTY:
+; CHECK-NEXT:  ir-bb<for.body>:
+; CHECK-NEXT:    IR   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+; CHECK-NEXT:    IR   %sum.07 = phi float [ 0.000000e+00, %entry ], [ %muladd, %for.body ]
+; CHECK:         IR   %exitcond.not = icmp eq i64 %iv.next, %n
 ; CHECK-NEXT: No successors
 ; CHECK-NEXT:}
 
@@ -529,7 +566,12 @@ define void @debug_loc_vpinstruction(ptr nocapture %asd, ptr nocapture %bsd) !db
 ; CHECK-NEXT: ir-bb<exit>
 ; CHECK-NEXT: No successors
 ; CHECK-EMPTY:
-; CHECK-NEXT: scalar.ph
+; CHECK-NEXT: scalar.ph:
+; CHECK-NEXT:  Successor(s): ir-bb<loop>
+; CHECK-EMPTY:
+; CHECK-NEXT:  ir-bb<loop>:
+; CHECK-NEXT:    IR   %iv = phi i64 [ 0, %entry ], [ %iv.next, %if.end ]
+; CHECK:         IR   %cmp1 = icmp slt i32 %lsd, 100
 ; CHECK-NEXT: No successors
 ; CHECK-NEXT:}
 ;
@@ -610,6 +652,11 @@ define void @print_expand_scev(i64 %y, ptr %ptr) {
 ; CHECK-NEXT: No successors
 ; CHECK-EMPTY:
 ; CHECK-NEXT: scalar.ph
+; CHECK-NEXT:  Successor(s): ir-bb<loop>
+; CHECK-EMPTY:
+; CHECK-NEXT:  ir-bb<loop>:
+; CHECK-NEXT:    IR   %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ]
+; CHECK:         IR   %iv.next = add i64 %iv, %inc
 ; CHECK-NEXT: No successors
 ; CHECK-NEXT: }
 ;
@@ -670,6 +717,11 @@ define i32 @print_exit_value(ptr %ptr, i32 %off) {
 ; CHECK-NEXT: No successors
 ; CHECK-EMPTY:
 ; CHECK-NEXT: scalar.ph
+; CHECK-NEXT:  Successor(s): ir-bb<loop>
+; CHECK-EMPTY:
+; CHECK-NEXT:  ir-bb<loop>:
+; CHECK-NEXT:    IR   %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+; CHECK:         IR   %ec = icmp eq i32 %iv.next, 1000
 ; CHECK-NEXT: No successors
 ; CHECK-NEXT: }
 ;
@@ -728,6 +780,11 @@ define void @print_fast_math_flags(i64 %n, ptr noalias %y, ptr noalias %x, ptr %
 ; CHECK-NEXT: No successors
 ; CHECK-EMPTY:
 ; CHECK-NEXT: scalar.ph
+; CHECK-NEXT:  Successor(s): ir-bb<loop>
+; CHECK-EMPTY:
+; CHECK-NEXT:  ir-bb<loop>:
+; CHECK-NEXT:    IR   %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+; CHECK:         IR   %exitcond = icmp eq i64 %iv.next, %n
 ; CHECK-NEXT: No successors
 ; CHECK-NEXT: }
 ;
@@ -788,6 +845,11 @@ define void @print_exact_flags(i64 %n, ptr noalias %x) {
 ; CHECK-NEXT: No successors
 ; CHECK-EMPTY:
 ; CHECK-NEXT: scalar.ph
+; CHECK-NEXT:  Successor(s): ir-bb<loop>
+; CHECK-EMPTY:
+; CHECK-NEXT:  ir-bb<loop>:
+; CHECK-NEXT:    IR   %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+; CHECK:         IR   %exitcond = icmp eq i64 %iv.next, %n
 ; CHECK-NEXT: No successors
 ; CHECK-NEXT: }
 ;
@@ -868,6 +930,11 @@ define void @print_call_flags(ptr readonly %src, ptr noalias %dest, i64 %n) {
 ; CHECK-NEXT: No successors
 ; CHECK-EMPTY:
 ; CHECK-NEXT: scalar.ph
+; CHECK-NEXT:  Successor(s): ir-bb<for.body>
+; CHECK-EMPTY:
+; CHECK-NEXT:  ir-bb<for.body>:
+; CHECK-NEXT:    IR   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.loop ]
+; CHECK:         IR   %ifcond = fcmp oeq float %ld.value, 5.0
 ; CHECK-NEXT: No successors
 ; CHECK-NEXT: }
 ;
@@ -937,6 +1004,11 @@ define void @print_disjoint_flags(i64 %n, ptr noalias %x) {
 ; CHECK-NEXT: No successors
 ; CHECK-EMPTY:
 ; CHECK-NEXT: scalar.ph
+; CHECK-NEXT:  Successor(s): ir-bb<loop>
+; CHECK-EMPTY:
+; CHECK-NEXT:  ir-bb<loop>:
+; CHECK-NEXT:    IR   %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+; CHECK:         IR   %exitcond = icmp eq i64 %iv.next, %n
 ; CHECK-NEXT: No successors
 ; CHECK-NEXT: }
 ;
@@ -1041,9 +1113,13 @@ define i16 @print_first_order_recurrence_and_result(ptr %ptr) {
 ; CHECK-EMPTY:
 ; CHECK-NEXT: scalar.ph
 ; CHECK-NEXT:   EMIT vp<[[RESUME_P:%.*]]> = resume-phi vp<[[RESUME_1]]>, ir<22>
-; CHECK-NEXT: No successors
+; CHECK-NEXT:  Successor(s): ir-bb<loop>
 ; CHECK-EMPTY:
-; CHECK-NEXT:   Live-out i16 %for.1 = vp<[[RESUME_P]]>
+; CHECK-NEXT:  ir-bb<loop>:
+; CHECK-NEXT:    IR   %for.1 = phi i16 [ 22, %entry ], [ %for.1.next, %loop ] (extra operand: vp<[[RESUME_P]]>)
+; CHECK-NEXT:    IR   %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+; CHECK:         IR   %exitcond.not = icmp eq i64 %iv.next, 1000
+; CHECK-NEXT: No successors
 ; CHECK-NEXT: }
 ;
 entry:
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll
index f846ba0166b2cb..cdeffeff84d03a 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll
@@ -53,6 +53,12 @@ define void @sink_with_sideeffects(i1 %c, ptr %ptr) {
 ; CHECK-NEXT:  No successors
 ; CHECK-EMPTY:
 ; CHECK-NEXT:  scalar.ph:
+; CHECK-NEXT:  Successor(s): ir-bb<for.body>
+; CHECK-EMPTY:
+; CHECK-NEXT:  ir-bb<for.body>:
+; CHECK-NEXT:    IR   %tmp0 = phi i64 [ %tmp6, %for.inc ], [ 0, %entry ]
+; CHECK-NEXT:    IR   %tmp1 = phi i64 [ %tmp7, %for.inc ], [ 0, %entry ]
+; CHECK:         IR   %tmp5 = trunc i32 %tmp4 to i8
 ; CHECK-NEXT:  No successors
 ; CHECK-NEXT: }
 ;
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll
index 0f3cd9d4ca4d61..2dddf766cb9cda 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll
@@ -1077,6 +1077,17 @@ define void @merge_with_dead_gep_between_regions(i32 %n, ptr noalias %src, ptr n
 ; CHECK-NEXT: No successors
 ; CHECK-EMPTY:
 ; CHECK-NEXT: scalar.ph
+; CHECK-NEXT: Successor(s): ir-bb<loop>
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<loop>:
+; CHECK-NEXT:   IR   %iv = phi i32 [ %n, %entry ], [ %iv.next, %loop ]
+; CHECK-NEXT:   IR   %iv.next = add nsw i32 %iv, -1
+; CHECK-NEXT:   IR   %gep.src = getelementptr inbounds i32, ptr %src, i32 %iv
+; CHECK-NEXT:   IR   %l = load i32, ptr %gep.src, align 16
+; CHECK-NEXT:   IR   %dead_gep = getelementptr inbounds i32, ptr %dst, i64 1
+; CHECK-NEXT:   IR   %gep.dst = getelementptr inbounds i32, ptr %dst, i32 %iv
+; CHECK-NEXT:   IR   store i32 %l, ptr %gep.dst, align 16
+; CHECK-NEXT:   IR   %ec = icmp eq i32 %iv.next, 0
 ; CHECK-NEXT: No successors
 ; CHECK-NEXT: }
 ;
@@ -1156,6 +1167,13 @@ define void @ptr_induction_remove_dead_recipe(ptr %start, ptr %end) {
 ; CHECK-NEXT: No successors
 ; CHECK-EMPTY:
 ; CHECK-NEXT: scalar.ph:
+; CHECK-NEXT: Successor(s): ir-bb<loop.header>
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<loop.header>:
+; CHECK-NEXT:   IR   %ptr.iv = phi ptr [ %start, %entry ], [ %ptr.iv.next, %loop.latch ]
+; CHECK-NEXT:   IR   %ptr.iv.next = getelementptr inbounds i8, ptr %ptr.iv, i64 -1
+; CHECK-NEXT:   IR   %l = load i8, ptr %ptr.iv.next, align 1
+; CHECK-NEXT:   IR   %c.1 = icmp eq i8 %l, 0
 ; CHECK-NEXT: No successors
 ; CHECK-NEXT: }
 ;
diff --git a/llvm/unittests/Transforms/Vectorize/VPDomTreeTest.cpp b/llvm/unittests/Transforms/Vectorize/VPDomTreeTest.cpp
index bb064b5f63b724..37c505e8b37904 100644
--- a/llvm/unittests/Transforms/Vectorize/VPDomTreeTest.cpp
+++ b/llvm/unittests/Transforms/Vectorize/VPDomTreeTest.cpp
@@ -41,7 +41,11 @@ TEST(VPDominatorTreeTest, DominanceNoRegionsTest) {
   VPBlockUtils::connectBlocks(VPBB3, VPBB4);
 
   auto TC = std::make_unique<VPValue>();
-  VPlan Plan(VPPH, &*TC, VPBB0);
+  LLVMContext C;
+  auto ScalarHeader = std::make_unique<BasicBlock *>(BasicBlock::Create(C, ""));
+  VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(*ScalarHeader);
+  VPlan Plan(VPPH, &*TC, VPBB0, ScalarHeaderVPBB);
+
   VPDominatorTree VPDT;
   VPDT.recalculate(Plan);
 
@@ -71,6 +75,8 @@ checkDomChildren(VPDominatorTree &VPDT, VPBlockBase *Src,
 }
 
 TEST(VPDominatorTreeTest, DominanceRegionsTest) {
+  LLVMContext C;
+  auto ScalarHeader = std::make_unique<BasicBlock *>(BasicBlock::Create(C, ""));
   {
     // 2 consecutive regions.
     // VPBB0
@@ -115,7 +121,8 @@ TEST(VPDominatorTreeTest, DominanceRegionsTest) {
     VPBlockUtils::connectBlocks(R1, R2);
 
     auto TC = std::make_unique<VPValue>();
-    VPlan Plan(VPPH, &*TC, VPBB0);
+    VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(*ScalarHeader);
+    VPlan Plan(VPPH, &*TC, VPBB0, ScalarHeaderVPBB);
     VPDominatorTree VPDT;
     VPDT.recalculate(Plan);
 
@@ -195,7 +202,8 @@ TEST(VPDominatorTreeTest, DominanceRegionsTest) {
     VPBlockUtils::connectBlocks(R1, VPBB2);
 
     auto TC = std::make_unique<VPValue>();
-    VPlan Plan(VPPH, &*TC, VPBB1);
+    VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(*ScalarHeader);
+    VPlan Plan(VPPH, &*TC, VPBB1, ScalarHeaderVPBB);
     VPDominatorTree VPDT;
     VPDT.recalculate(Plan);
 
diff --git a/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp
index 00a3c737c0e47a..33d21d9f3f94f4 100644
--- a/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp
+++ b/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp
@@ -145,6 +145,18 @@ compound=true
   ]
   N6 [label =
     "scalar.ph:\l" +
+    "Successor(s): ir-bb\<for.body\>\l"
+  ]
+  N6 -> N7 [ label=""]
+  N7 [label =
+    "ir-bb\<for.body\>:\l" +
+    "  IR   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]\l" +
+    "  IR   %arr.idx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv\l" +
+    "  IR   %l1 = load i32, ptr %arr.idx, align 4\l" +
+    "  IR   %res = add i32 %l1, 10\l" +
+    "  IR   store i32 %res, ptr %arr.idx, align 4\l" +
+    "  IR   %indvars.iv.next = add i64 %indvars.iv, 1\l" +
+    "  IR   %exitcond = icmp ne i64 %indvars.iv.next, %N\l" +
     "No successors\l"
   ]
 }
diff --git a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp
index 0f170efac207b7..ae0122cd67916d 100644
--- a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp
+++ b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp
@@ -238,6 +238,8 @@ TEST(VPInstructionTest, releaseOperandsAtDeletion) {
   delete VPV2;
 }
 TEST(VPBasicBlockTest, getPlan) {
+  LLVMContext C;
+  auto ScalarHeader = std::make_unique<BasicBlock *>(BasicBlock::Create(C, ""));
   {
     VPBasicBlock *VPPH = new VPBasicBlock("ph");
     VPBasicBlock *VPBB1 = new VPBasicBlock();
@@ -256,7 +258,8 @@ TEST(VPBasicBlockTest, getPlan) {
     VPBlockUtils::connectBlocks(VPBB3, VPBB4);
 
     auto TC = std::make_unique<VPValue>();
-    VPlan Plan(VPPH, &*TC, VPBB1);
+    VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(*ScalarHeader);
+    VPlan Plan(VPPH, &*TC, VPBB1, ScalarHeaderVPBB);
 
     EXPECT_EQ(&Plan, VPBB1->getPlan());
     EXPECT_EQ(&Plan, VPBB2->getPlan());
@@ -276,7 +279,8 @@ TEST(VPBasicBlockTest, getPlan) {
     VPBlockUtils::connectBlocks(VPBB1, R1);
 
     auto TC = std::make_unique<VPValue>();
-    VPlan Plan(VPPH, &*TC, VPBB1);
+    VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(*ScalarHeader);
+    VPlan Plan(VPPH, &*TC, VPBB1, ScalarHeaderVPBB);
 
     EXPECT_EQ(&Plan, VPBB1->getPlan());
     EXPECT_EQ(&Plan, R1->getPlan());
@@ -306,7 +310,8 @@ TEST(VPBasicBlockTest, getPlan) {
     VPBlockUtils::connectBlocks(R2, VPBB2);
 
     auto TC = std::make_unique<VPValue>();
-    VPlan Plan(VPPH, &*TC, VPBB1);
+    VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(*ScalarHeader);
+    VPlan Plan(VPPH, &*TC, VPBB1, ScalarHeaderVPBB);
 
     EXPECT_EQ(&Plan, VPBB1->getPlan());
     EXPECT_EQ(&Plan, R1->getPlan());
@@ -320,6 +325,8 @@ TEST(VPBasicBlockTest, getPlan) {
 }
 
 TEST(VPBasicBlockTest, TraversingIteratorTest) {
+  LLVMContext C;
+  auto ScalarHeader = std::make_unique<BasicBlock *>(BasicBlock::Create(C, ""));
   {
     // VPBasicBlocks only
     //     VPBB1
@@ -347,7 +354,8 @@ TEST(VPBasicBlockTest, TraversingIteratorTest) {
 
     // Use Plan to properly clean up created blocks.
     auto TC = std::make_unique<VPValue>();
-    VPlan Plan(VPPH, &*TC, VPBB1);
+    VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(*ScalarHeader);
+    VPlan Plan(VPPH, &*TC, VPBB1, ScalarHeaderVPBB);
   }
 
   {
@@ -447,7 +455,8 @@ TEST(VPBasicBlockTest, TraversingIteratorTest) {
 
     // Use Plan to properly clean up created blocks.
     auto TC = std::make_unique<VPValue>();
-    VPlan Plan(VPPH, &*TC, VPBB0);
+    VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(*ScalarHeader);
+    VPlan Plan(VPPH, &*TC, VPBB0, ScalarHeaderVPBB);
   }
 
   {
@@ -530,7 +539,8 @@ TEST(VPBasicBlockTest, TraversingIteratorTest) {
 
     // Use Plan to properly clean up created blocks.
     auto TC = std::make_unique<VPValue>();
-    VPlan Plan(VPPH, &*TC, VPBB1);
+    VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(*ScalarHeader);
+    VPlan Plan(VPPH, &*TC, VPBB1, ScalarHeaderVPBB);
   }
 
   {
@@ -578,7 +588,8 @@ TEST(VPBasicBlockTest, TraversingIteratorTest) {
 
     // Use Plan to properly clean up created blocks.
     auto TC = std::make_unique<VPValue>();
-    VPlan Plan(VPPH, &*TC, VPBB1);
+    VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(*ScalarHeader);
+    VPlan Plan(VPPH, &*TC, VPBB1, ScalarHeaderVPBB);
   }
 
   {
@@ -670,7 +681,8 @@ TEST(VPBasicBlockTest, TraversingIteratorTest) {
 
     // Use Plan to properly clean up created blocks.
     auto TC = std::make_unique<VPValue>();
-    VPlan Plan(VPPH, &*TC, VPBB1);
+    VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(*ScalarHeader);
+    VPlan Plan(VPPH, &*TC, VPBB1, ScalarHeaderVPBB);
   }
 }
 
@@ -708,7 +720,10 @@ TEST(VPBasicBlockTest, print) {
     EXPECT_EQ("EMIT br <badref>, <badref>", I3Dump);
   }
 
-  VPlan Plan(VPBB0, TC, VPBB1);
+  LLVMContext C;
+  auto ScalarHeader = std::make_unique<BasicBlock *>(BasicBlock::Create(C, ""));
+  VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(*ScalarHeader);
+  VPlan Plan(VPBB0, TC, VPBB1, ScalarHeaderVPBB);
   std::string FullDump;
   raw_string_ostream OS(FullDump);
   Plan.printDOT(OS);
@@ -790,7 +805,10 @@ TEST(VPBasicBlockTest, printPlanWithVFsAndUFs) {
   VPBB1->appendRecipe(I1);
   VPBB1->setName("bb1");
 
-  VPlan Plan(VPBB0, TC, VPBB1);
+  LLVMContext C;
+  auto ScalarHeader = std::make_unique<BasicBlock *>(BasicBlock::Create(C, ""));
+  VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(*ScalarHeader);
+  VPlan Plan(VPBB0, TC, VPBB1, ScalarHeaderVPBB);
   Plan.setName("TestPlan");
   Plan.addVF(ElementCount::getFixed(4));
 
@@ -1250,9 +1268,10 @@ TEST(VPRecipeTest, MayHaveSideEffectsAndMayReadWriteMemory) {
 TEST(VPRecipeTest, dumpRecipeInPlan) {
   VPBasicBlock *VPBB0 = new VPBasicBlock("preheader");
   VPBasicBlock *VPBB1 = new VPBasicBlock();
-  VPlan Plan(VPBB0, VPBB1);
-
   LLVMContext C;
+  auto ScalarHeader = std::make_unique<BasicBlock *>(BasicBlock::Create(C, ""));
+  VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(*ScalarHeader);
+  VPlan Plan(VPBB0, VPBB1, ScalarHeaderVPBB);
 
   IntegerType *Int32 = IntegerType::get(C, 32);
   auto *AI = BinaryOperator::CreateAdd(PoisonValue::get(Int32),
@@ -1319,9 +1338,10 @@ TEST(VPRecipeTest, dumpRecipeInPlan) {
 TEST(VPRecipeTest, dumpRecipeUnnamedVPValuesInPlan) {
   VPBasicBlock *VPBB0 = new VPBasicBlock("preheader");
   VPBasicBlock *VPBB1 = new VPBasicBlock();
-  VPlan Plan(VPBB0, VPBB1);
-
   LLVMContext C;
+  auto ScalarHeader = std::make_unique<BasicBlock *>(BasicBlock::Create(C, ""));
+  VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(*ScalarHeader);
+  VPlan Plan(VPBB0, VPBB1, ScalarHeaderVPBB);
 
   IntegerType *Int32 = IntegerType::get(C, 32);
   auto *AI = BinaryOperator::CreateAdd(PoisonValue::get(Int32),
diff --git a/llvm/unittests/Transforms/Vectorize/VPlanVerifierTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanVerifierTest.cpp
index 9958d6ea124f81..e70cd271b8d732 100644
--- a/llvm/unittests/Transforms/Vectorize/VPlanVerifierTest.cpp
+++ b/llvm/unittests/Transforms/Vectorize/VPlanVerifierTest.cpp
@@ -28,7 +28,11 @@ TEST(VPVerifierTest, VPInstructionUseBeforeDefSameBB) {
   VPBasicBlock *VPBB2 = new VPBasicBlock();
   VPRegionBlock *R1 = new VPRegionBlock(VPBB2, VPBB2, "R1");
   VPBlockUtils::connectBlocks(VPBB1, R1);
-  VPlan Plan(VPPH, &*TC, VPBB1);
+
+  LLVMContext C;
+  auto ScalarHeader = std::make_unique<BasicBlock *>(BasicBlock::Create(C, ""));
+  VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(*ScalarHeader);
+  VPlan Plan(VPPH, &*TC, VPBB1, ScalarHeaderVPBB);
 
 #if GTEST_HAS_STREAM_REDIRECTION
   ::testing::internal::CaptureStderr();
@@ -60,7 +64,10 @@ TEST(VPVerifierTest, VPInstructionUseBeforeDefDifferentBB) {
   VPBlockUtils::connectBlocks(VPBB1, R1);
 
   auto TC = std::make_unique<VPValue>();
-  VPlan Plan(VPPH, &*TC, VPBB1);
+  LLVMContext C;
+  auto ScalarHeader = std::make_unique<BasicBlock *>(BasicBlock::Create(C, ""));
+  VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(*ScalarHeader);
+  VPlan Plan(VPPH, &*TC, VPBB1, ScalarHeaderVPBB);
 
 #if GTEST_HAS_STREAM_REDIRECTION
   ::testing::internal::CaptureStderr();
@@ -103,7 +110,9 @@ TEST(VPVerifierTest, VPBlendUseBeforeDefDifferentBB) {
   VPBB3->setParent(R1);
 
   auto TC = std::make_unique<VPValue>();
-  VPlan Plan(VPPH, &*TC, VPBB1);
+  auto ScalarHeader = std::make_unique<BasicBlock *>(BasicBlock::Create(C, ""));
+  VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(*ScalarHeader);
+  VPlan Plan(VPPH, &*TC, VPBB1, ScalarHeaderVPBB);
 
 #if GTEST_HAS_STREAM_REDIRECTION
   ::testing::internal::CaptureStderr();
@@ -139,7 +148,10 @@ TEST(VPVerifierTest, DuplicateSuccessorsOutsideRegion) {
   VPBlockUtils::connectBlocks(VPBB1, R1);
 
   auto TC = std::make_unique<VPValue>();
-  VPlan Plan(VPPH, &*TC, VPBB1);
+  LLVMContext C;
+  auto ScalarHeader = std::make_unique<BasicBlock *>(BasicBlock::Create(C, ""));
+  VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(*ScalarHeader);
+  VPlan Plan(VPPH, &*TC, VPBB1, ScalarHeaderVPBB);
 
 #if GTEST_HAS_STREAM_REDIRECTION
   ::testing::internal::CaptureStderr();
@@ -176,7 +188,10 @@ TEST(VPVerifierTest, DuplicateSuccessorsInsideRegion) {
   VPBB3->setParent(R1);
 
   auto TC = std::make_unique<VPValue>();
-  VPlan Plan(VPPH, &*TC, VPBB1);
+  LLVMContext C;
+  auto ScalarHeader = std::make_unique<BasicBlock *>(BasicBlock::Create(C, ""));
+  VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(*ScalarHeader);
+  VPlan Plan(VPPH, &*TC, VPBB1, ScalarHeaderVPBB);
 
 #if GTEST_HAS_STREAM_REDIRECTION
   ::testing::internal::CaptureStderr();
@@ -205,7 +220,10 @@ TEST(VPVerifierTest, BlockOutsideRegionWithParent) {
   VPBB1->setParent(R1);
 
   auto TC = std::make_unique<VPValue>();
-  VPlan Plan(VPPH, &*TC, VPBB1);
+  LLVMContext C;
+  auto ScalarHeader = std::make_unique<BasicBlock *>(BasicBlock::Create(C, ""));
+  VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(*ScalarHeader);
+  VPlan Plan(VPPH, &*TC, VPBB1, ScalarHeaderVPBB);
 
 #if GTEST_HAS_STREAM_REDIRECTION
   ::testing::internal::CaptureStderr();



More information about the llvm-commits mailing list