[llvm] 256c6b0 - [VPlan] Model pre-header explicitly.

Sat Apr 9 05:25:39 PDT 2022

Author: Florian Hahn
Date: 2022-04-09T14:19:47+02:00
New Revision: 256c6b0ba14e8a7ab6373b61b7193ea8c0a3651c

URL: https://github.com/llvm/llvm-project/commit/256c6b0ba14e8a7ab6373b61b7193ea8c0a3651c
DIFF: https://github.com/llvm/llvm-project/commit/256c6b0ba14e8a7ab6373b61b7193ea8c0a3651c.diff

LOG: [VPlan] Model pre-header explicitly.

This patch extends the scope of VPlan to also model the pre-header.
The pre-header can be used to place recipes that should be code-gen'd
outside the loop, like SCEV expansion.

Depends on D121623.

Reviewed By: Ayal

Differential Revision: https://reviews.llvm.org/D121624

Added: 
    

Modified: 
    llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
    llvm/lib/Transforms/Vectorize/VPlan.cpp
    llvm/lib/Transforms/Vectorize/VPlan.h
    llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll
    llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll
    llvm/test/Transforms/LoopVectorize/X86/small-size.ll
    llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll
    llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll
    llvm/test/Transforms/LoopVectorize/vplan-dot-printing.ll
    llvm/test/Transforms/LoopVectorize/vplan-printing.ll
    llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll
    llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 5de2c3299765b..f0c14da981425 100644

--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -3102,10 +3102,9 @@ void InnerLoopVectorizer::createVectorLoopSkeleton(StringRef Prefix) {
   BrInst->setDebugLoc(ScalarLatchTerm->getDebugLoc());
   ReplaceInstWithInst(LoopMiddleBlock->getTerminator(), BrInst);
 
-  SplitBlock(LoopVectorPreHeader, LoopVectorPreHeader->getTerminator(), DT, LI,
-             nullptr, Twine(Prefix) + "vector.body");
-
-  // Update dominator for loop exit.
+  // Update dominator for loop exit. During skeleton creation, only the vector
+  // pre-header and the middle block are created. The vector loop is entirely
+  // created during VPlan exection.
   if (!Cost->requiresScalarEpilogue(VF))
     // If there is an epilogue which must run, there's no edge from the
     // middle block to exit blocks  and thus no need to update the immediate
@@ -3244,7 +3243,7 @@ InnerLoopVectorizer::createVectorizedLoopSkeleton() {
   |/    |
   |     v
   |    [  ] \
-  |    [  ]_|   <-- vector loop.
+  |    [  ]_|   <-- vector loop (created during VPlan execution).
   |     |
   |     v
   \   -[ ]   <--- middle-block.
@@ -7600,10 +7599,11 @@ void LoopVectorizationPlanner::executePlan(ElementCount BestVF, unsigned BestUF,
 
   // Perform the actual loop transformation.
 
-  // 1. Create a new empty loop. Unlink the old loop and connect the new one.
+  // 1. Set up the skeleton for vectorization, including vector pre-header and
+  // middle block. The vector loop is created during VPlan execution.
   VPTransformState State{BestVF, BestUF, LI, DT, ILV.Builder, &ILV, &BestVPlan};
   Value *CanonicalIVStartValue;
-  std::tie(State.CFG.VectorPreHeader, CanonicalIVStartValue) =
+  std::tie(State.CFG.PrevBB, CanonicalIVStartValue) =
       ILV.createVectorizedLoopSkeleton();
   ILV.collectPoisonGeneratingRecipes(State);
 
@@ -8670,8 +8670,6 @@ static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, DebugLoc DL,
   auto *CanonicalIVPHI = new VPCanonicalIVPHIRecipe(StartV, DL);
   VPRegionBlock *TopRegion = Plan.getVectorLoopRegion();
   VPBasicBlock *Header = TopRegion->getEntryBasicBlock();
-  if (IsVPlanNative)
-    Header = cast<VPBasicBlock>(Header->getSingleSuccessor());
   Header->insert(CanonicalIVPHI, Header->begin());
 
   auto *CanonicalIVIncrement =
@@ -8681,10 +8679,8 @@ static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, DebugLoc DL,
   CanonicalIVPHI->addOperand(CanonicalIVIncrement);
 
   VPBasicBlock *EB = TopRegion->getExitBasicBlock();
-  if (IsVPlanNative) {
-    EB = cast<VPBasicBlock>(EB->getSinglePredecessor());
+  if (IsVPlanNative)
     EB->setCondBit(nullptr);
-  }
   EB->appendRecipe(CanonicalIVIncrement);
 
   auto *BranchOnCount =
@@ -8753,12 +8749,17 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
   // visit each basic block after having visited its predecessor basic blocks.
   // ---------------------------------------------------------------------------
 
-  // Create initial VPlan skeleton, with separate header and latch blocks.
-  VPBasicBlock *HeaderVPBB = new VPBasicBlock();
+  // Create initial VPlan skeleton, starting with a block for the pre-header,
+  // followed by a region for the vector loop. The skeleton vector loop region
+  // contains a header and latch block.
+  VPBasicBlock *Preheader = new VPBasicBlock("vector.ph");
+  auto Plan = std::make_unique<VPlan>(Preheader);
+
+  VPBasicBlock *HeaderVPBB = new VPBasicBlock("vector.body");
   VPBasicBlock *LatchVPBB = new VPBasicBlock("vector.latch");
   VPBlockUtils::insertBlockAfter(LatchVPBB, HeaderVPBB);
   auto *TopRegion = new VPRegionBlock(HeaderVPBB, LatchVPBB, "vector loop");
-  auto Plan = std::make_unique<VPlan>(TopRegion);
+  VPBlockUtils::insertBlockAfter(TopRegion, Preheader);
 
   Instruction *DLInst =
       getDebugLocFromInstOrOperands(Legal->getPrimaryInduction());
@@ -8777,7 +8778,8 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
     // Relevant instructions from basic block BB will be grouped into VPRecipe
     // ingredients and fill a new VPBasicBlock.
     unsigned VPBBsForBB = 0;
-    VPBB->setName(BB->getName());
+    if (VPBB != HeaderVPBB)
+      VPBB->setName(BB->getName());
     Builder.setInsertPoint(VPBB);
 
     // Introduce each ingredient into VPlan.
@@ -9065,6 +9067,31 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
       [this](PHINode *P) { return Legal->getIntOrFpInductionDescriptor(P); },
       DeadInstructions, *PSE.getSE());
 
+  // Update plan to be compatible with the inner loop vectorizer for
+  // code-generation.
+  VPRegionBlock *LoopRegion = Plan->getVectorLoopRegion();
+  VPBasicBlock *Preheader = LoopRegion->getEntryBasicBlock();
+  VPBasicBlock *Exit = LoopRegion->getExitBasicBlock();
+  VPBlockBase *Latch = Exit->getSinglePredecessor();
+  VPBlockBase *Header = Preheader->getSingleSuccessor();
+
+  // 1. Move preheader block out of main vector loop.
+  Preheader->setParent(LoopRegion->getParent());
+  VPBlockUtils::disconnectBlocks(Preheader, Header);
+  VPBlockUtils::connectBlocks(Preheader, LoopRegion);
+  Plan->setEntry(Preheader);
+
+  // 2. Disconnect backedge and exit block.
+  VPBlockUtils::disconnectBlocks(Latch, Header);
+  VPBlockUtils::disconnectBlocks(Latch, Exit);
+
+  // 3. Update entry and exit of main vector loop region.
+  LoopRegion->setEntry(Header);
+  LoopRegion->setExit(Latch);
+
+  // 4. Remove exit block.
+  delete Exit;
+
   addCanonicalIVRecipes(*Plan, Legal->getWidestInductionType(), DebugLoc(),
                         true, true);
   return Plan;
@@ -9442,13 +9469,14 @@ void VPWidenIntOrFpInductionRecipe::execute(VPTransformState &State) {
 
   auto &DL = EntryVal->getModule()->getDataLayout();
 
+  BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
   // Generate code for the induction step. Note that induction steps are
   // required to be loop-invariant
   auto CreateStepValue = [&](const SCEV *Step) -> Value * {
     if (SE.isSCEVable(IV->getType())) {
       SCEVExpander Exp(SE, DL, "induction");
       return Exp.expandCodeFor(Step, Step->getType(),
-                               State.CFG.VectorPreHeader->getTerminator());
+                               VectorPH->getTerminator());
     }
     return cast<SCEVUnknown>(Step)->getValue();
   };
@@ -9466,7 +9494,7 @@ void VPWidenIntOrFpInductionRecipe::execute(VPTransformState &State) {
 
   // Construct the initial value of the vector IV in the vector loop preheader
   auto CurrIP = Builder.saveIP();
-  Builder.SetInsertPoint(State.CFG.VectorPreHeader->getTerminator());
+  Builder.SetInsertPoint(VectorPH->getTerminator());
   if (isa<TruncInst>(EntryVal)) {
     assert(Start->getType()->isIntegerTy() &&
            "Truncation requires an integer type");
@@ -9530,13 +9558,13 @@ void VPWidenIntOrFpInductionRecipe::execute(VPTransformState &State) {
   }
 
   LastInduction->setName("vec.ind.next");
-  VecInd->addIncoming(SteppedStart, State.CFG.VectorPreHeader);
+  VecInd->addIncoming(SteppedStart, VectorPH);
   // Add induction update using an incorrect block temporarily. The phi node
   // will be fixed after VPlan execution. Note that at this point the latch
   // block cannot be used, as it does not exist yet.
   // TODO: Model increment value in VPlan, by turning the recipe into a
   // multi-def and a subclass of VPHeaderPHIRecipe.
-  VecInd->addIncoming(LastInduction, State.CFG.VectorPreHeader);
+  VecInd->addIncoming(LastInduction, VectorPH);
 }
 
 void VPWidenPointerInductionRecipe::execute(VPTransformState &State) {
@@ -9591,7 +9619,9 @@ void VPWidenPointerInductionRecipe::execute(VPTransformState &State) {
   Type *ScStValueType = ScalarStartValue->getType();
   PHINode *NewPointerPhi =
       PHINode::Create(ScStValueType, 2, "pointer.phi", CanonicalIV);
-  NewPointerPhi->addIncoming(ScalarStartValue, State.CFG.VectorPreHeader);
+
+  BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
+  NewPointerPhi->addIncoming(ScalarStartValue, VectorPH);
 
   // A pointer induction, performed by using a gep
   const DataLayout &DL = NewPointerPhi->getModule()->getDataLayout();
@@ -9612,7 +9642,7 @@ void VPWidenPointerInductionRecipe::execute(VPTransformState &State) {
   // block cannot be used, as it does not exist yet.
   // TODO: Model increment value in VPlan, by turning the recipe into a
   // multi-def and a subclass of VPHeaderPHIRecipe.
-  NewPointerPhi->addIncoming(InductionGEP, State.CFG.VectorPreHeader);
+  NewPointerPhi->addIncoming(InductionGEP, VectorPH);
 
   // Create UF many actual address geps that use the pointer
   // phi as base and a vectorized version of the step value

diff  --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 64d582cfbedfc..d531c7a6a761b 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -245,6 +245,10 @@ Value *VPTransformState::get(VPValue *Def, const VPIteration &Instance) {
   // set(Def, Extract, Instance);
   return Extract;
 }
+BasicBlock *VPTransformState::CFGState::getPreheaderBBFor(VPRecipeBase *R) {
+  VPRegionBlock *LoopRegion = R->getParent()->getEnclosingLoopRegion();
+  return VPBB2IRBB[LoopRegion->getPreheaderVPBB()];
+}
 
 BasicBlock *
 VPBasicBlock::createEmptyBasicBlock(VPTransformState::CFGState &CFG) {
@@ -277,20 +281,34 @@ VPBasicBlock::createEmptyBasicBlock(VPTransformState::CFGState &CFG) {
     assert(PredBB && "Predecessor basic-block not found building successor.");
     auto *PredBBTerminator = PredBB->getTerminator();
     LLVM_DEBUG(dbgs() << "LV: draw edge from" << PredBB->getName() << '\n');
-    if (isa<UnreachableInst>(PredBBTerminator)) {
+
+    auto *TermBr = dyn_cast<BranchInst>(PredBBTerminator);
+    if (isa<UnreachableInst>(PredBBTerminator) ||
+        (TermBr && !TermBr->isConditional())) {
       assert(PredVPSuccessors.size() == 1 &&
              "Predecessor ending w/o branch must have single successor.");
-      DebugLoc DL = PredBBTerminator->getDebugLoc();
-      PredBBTerminator->eraseFromParent();
-      auto *Br = BranchInst::Create(NewBB, PredBB);
-      Br->setDebugLoc(DL);
+      if (TermBr) {
+        TermBr->setSuccessor(0, NewBB);
+      } else {
+        DebugLoc DL = PredBBTerminator->getDebugLoc();
+        PredBBTerminator->eraseFromParent();
+        auto *Br = BranchInst::Create(NewBB, PredBB);
+        Br->setDebugLoc(DL);
+      }
     } else {
-      assert(PredVPSuccessors.size() == 2 &&
-             "Predecessor ending with branch must have two successors.");
-      unsigned idx = PredVPSuccessors.front() == this ? 0 : 1;
-      assert(!PredBBTerminator->getSuccessor(idx) &&
-             "Trying to reset an existing successor block.");
-      PredBBTerminator->setSuccessor(idx, NewBB);
+      if (PredVPSuccessors.size() == 2) {
+        unsigned idx = PredVPSuccessors.front() == this ? 0 : 1;
+        assert(!PredBBTerminator->getSuccessor(idx) &&
+               "Trying to reset an existing successor block.");
+        PredBBTerminator->setSuccessor(idx, NewBB);
+      } else {
+        auto *Reg = dyn_cast<VPRegionBlock>(PredVPBB->getParent());
+        assert(Reg && !Reg->isReplicator());
+        assert(this == Reg->getSingleSuccessor());
+        PredBBTerminator->setSuccessor(0, NewBB);
+        PredBBTerminator->setSuccessor(
+            1, CFG.VPBB2IRBB[Reg->getEntryBasicBlock()]);
+      }
     }
   }
   return NewBB;
@@ -302,40 +320,36 @@ void VPBasicBlock::execute(VPTransformState *State) {
   VPBlockBase *SingleHPred = nullptr;
   BasicBlock *NewBB = State->CFG.PrevBB; // Reuse it if possible.
 
+  auto IsNonReplicateR = [](VPBlockBase *BB) {
+    auto *R = dyn_cast<VPRegionBlock>(BB);
+    return R && !R->isReplicator();
+  };
+
   // 1. Create an IR basic block, or reuse the last one if possible.
   // The last IR basic block is reused, as an optimization, in three cases:
-  // A. the first VPBB reuses the loop header BB - when PrevVPBB is null;
+  // A. the first VPBB reuses the loop pre-header BB - when PrevVPBB is null;
   // B. when the current VPBB has a single (hierarchical) predecessor which
-  //    is PrevVPBB and the latter has a single (hierarchical) successor; and
+  //    is PrevVPBB and the latter has a single (hierarchical) successor which
+  //    both are in the same non-replicator region; and
   // C. when the current VPBB is an entry of a region replica - where PrevVPBB
   //    is the exit of this region from a previous instance, or the predecessor
   //    of this region.
   if (PrevVPBB && /* A */
       !((SingleHPred = getSingleHierarchicalPredecessor()) &&
         SingleHPred->getExitBasicBlock() == PrevVPBB &&
-        PrevVPBB->getSingleHierarchicalSuccessor()) && /* B */
-      !(Replica && getPredecessors().empty())) {       /* C */
+        PrevVPBB->getSingleHierarchicalSuccessor() &&
+        (SingleHPred->getParent() == getEnclosingLoopRegion() &&
+         !IsNonReplicateR(SingleHPred))) &&      /* B */
+      !(Replica && getPredecessors().empty())) { /* C */
     NewBB = createEmptyBasicBlock(State->CFG);
     State->Builder.SetInsertPoint(NewBB);
     // Temporarily terminate with unreachable until CFG is rewired.
     UnreachableInst *Terminator = State->Builder.CreateUnreachable();
     // Register NewBB in its loop. In innermost loops its the same for all BB's.
-    State->CurrentVectorLoop->addBasicBlockToLoop(NewBB, *State->LI);
+    if (State->CurrentVectorLoop)
+      State->CurrentVectorLoop->addBasicBlockToLoop(NewBB, *State->LI);
     State->Builder.SetInsertPoint(Terminator);
     State->CFG.PrevBB = NewBB;
-  } else {
-    // If the current VPBB is re-using the header block from skeleton creation,
-    // move it to the new vector loop.
-    VPBasicBlock *HeaderVPBB =
-        getPlan()->getVectorLoopRegion()->getEntryBasicBlock();
-    if (EnableVPlanNativePath)
-      HeaderVPBB = cast<VPBasicBlock>(HeaderVPBB->getSingleSuccessor());
-    if (this == HeaderVPBB) {
-      assert(State->CurrentVectorLoop);
-      State->LI->removeBlock(State->CFG.PrevBB);
-      State->CurrentVectorLoop->addBasicBlockToLoop(State->CFG.PrevBB,
-                                                    *State->LI);
-    }
   }
 
   // 2. Fill the IR basic block with IR instructions.
@@ -409,6 +423,16 @@ VPBasicBlock *VPBasicBlock::splitAt(iterator SplitAt) {
   return SplitBlock;
 }
 
+VPRegionBlock *VPBasicBlock::getEnclosingLoopRegion() {
+  VPRegionBlock *P = getParent();
+  if (P && P->isReplicator()) {
+    P = P->getParent();
+    assert(!cast<VPRegionBlock>(P)->isReplicator() &&
+           "unexpected nested replicate regions");
+  }
+  return P;
+}
+
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 void VPBlockBase::printSuccessors(raw_ostream &O, const Twine &Indent) const {
   if (getSuccessors().empty()) {
@@ -465,7 +489,8 @@ void VPRegionBlock::execute(VPTransformState *State) {
     // Create and register the new vector loop.
     Loop *PrevLoop = State->CurrentVectorLoop;
     State->CurrentVectorLoop = State->LI->AllocateLoop();
-    Loop *ParentLoop = State->LI->getLoopFor(State->CFG.VectorPreHeader);
+    BasicBlock *VectorPH = State->CFG.VPBB2IRBB[getPreheaderVPBB()];
+    Loop *ParentLoop = State->LI->getLoopFor(VectorPH);
 
     // Insert the new loop into the loop nest and register the new basic blocks
     // before calling any utilities such as SCEV that require valid LoopInfo.
@@ -476,20 +501,6 @@ void VPRegionBlock::execute(VPTransformState *State) {
 
     // Visit the VPBlocks connected to "this", starting from it.
     for (VPBlockBase *Block : RPOT) {
-      if (EnableVPlanNativePath) {
-        // The inner loop vectorization path does not represent loop preheader
-        // and exit blocks as part of the VPlan. In the VPlan-native path, skip
-        // vectorizing loop preheader block. In future, we may replace this
-        // check with the check for loop preheader.
-        if (Block->getNumPredecessors() == 0)
-          continue;
-
-        // Skip vectorizing loop exit block. In future, we may replace this
-        // check with the check for loop exit.
-        if (Block->getNumSuccessors() == 0)
-          continue;
-      }
-
       LLVM_DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n');
       Block->execute(State);
     }
@@ -886,7 +897,7 @@ void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV,
 
   // Check if the backedge taken count is needed, and if so build it.
   if (BackedgeTakenCount && BackedgeTakenCount->getNumUsers()) {
-    IRBuilder<> Builder(State.CFG.VectorPreHeader->getTerminator());
+    IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
     auto *TCMO = Builder.CreateSub(TripCountV,
                                    ConstantInt::get(TripCountV->getType(), 1),
                                    "trip.count.minus.1");
@@ -923,9 +934,9 @@ void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV,
   }
 }
 
-/// Generate the code inside the body of the vectorized loop. Assumes a single
-/// LoopVectorBody basic-block was created for this. Introduce additional
-/// basic-blocks as needed, and fill them all.
+/// Generate the code inside the preheader and body of the vectorized loop.
+/// Assumes a single pre-header basic-block was created for this. Introduce
+/// additional basic-blocks as needed, and fill them all.
 void VPlan::execute(VPTransformState *State) {
   // Set the reverse mapping from VPValues to Values for code generation.
   for (auto &Entry : Value2VPValue)
@@ -933,21 +944,11 @@ void VPlan::execute(VPTransformState *State) {
 
   // Initialize CFG state.
   State->CFG.PrevVPBB = nullptr;
-  BasicBlock *VectorHeaderBB = State->CFG.VectorPreHeader->getSingleSuccessor();
-  State->CFG.PrevBB = VectorHeaderBB;
-  State->CFG.ExitBB = VectorHeaderBB->getSingleSuccessor();
-  State->CurrentVectorLoop = State->LI->getLoopFor(VectorHeaderBB);
-
-  // Remove the edge between Header and Latch to allow other connections.
-  // Temporarily terminate with unreachable until CFG is rewired.
-  // Note: this asserts the generated code's assumption that
-  // getFirstInsertionPt() can be dereferenced into an Instruction.
-  VectorHeaderBB->getTerminator()->eraseFromParent();
-  State->Builder.SetInsertPoint(VectorHeaderBB);
-  UnreachableInst *Terminator = State->Builder.CreateUnreachable();
-  State->Builder.SetInsertPoint(Terminator);
-
-  // Generate code in loop body.
+  State->CFG.ExitBB = State->CFG.PrevBB->getSingleSuccessor();
+  BasicBlock *VectorPreHeader = State->CFG.PrevBB;
+  State->Builder.SetInsertPoint(VectorPreHeader->getTerminator());
+
+  // Generate code in the loop pre-header and body.
   for (VPBlockBase *Block : depth_first(Entry))
     Block->execute(State);
 
@@ -974,10 +975,6 @@ void VPlan::execute(VPTransformState *State) {
   // Fix the latch value of canonical, reduction and first-order recurrences
   // phis in the vector loop.
   VPBasicBlock *Header = getVectorLoopRegion()->getEntryBasicBlock();
-  if (Header->empty()) {
-    assert(EnableVPlanNativePath);
-    Header = cast<VPBasicBlock>(Header->getSingleSuccessor());
-  }
   for (VPRecipeBase &R : Header->phis()) {
     // Skip phi-like recipes that generate their backedege values themselves.
     if (isa<VPWidenPHIRecipe>(&R))
@@ -1029,9 +1026,12 @@ void VPlan::execute(VPTransformState *State) {
   }
 
   // We do not attempt to preserve DT for outer loop vectorization currently.
-  if (!EnableVPlanNativePath)
+  if (!EnableVPlanNativePath) {
+    BasicBlock *VectorHeaderBB = State->CFG.VPBB2IRBB[Header];
+    State->DT->addNewBlock(VectorHeaderBB, VectorPreHeader);
     updateDominatorTree(State->DT, VectorHeaderBB, VectorLatchBB,
                         State->CFG.ExitBB);
+  }
 }
 
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -1452,7 +1452,9 @@ void VPCanonicalIVPHIRecipe::execute(VPTransformState &State) {
   Value *Start = getStartValue()->getLiveInIRValue();
   PHINode *EntryPart = PHINode::Create(
       Start->getType(), 2, "index", &*State.CFG.PrevBB->getFirstInsertionPt());
-  EntryPart->addIncoming(Start, State.CFG.VectorPreHeader);
+
+  BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
+  EntryPart->addIncoming(Start, VectorPH);
   EntryPart->setDebugLoc(DL);
   for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
     State.set(this, EntryPart, Part);
@@ -1469,11 +1471,12 @@ void VPCanonicalIVPHIRecipe::print(raw_ostream &O, const Twine &Indent,
 
 void VPExpandSCEVRecipe::execute(VPTransformState &State) {
   assert(!State.Instance && "cannot be used in per-lane");
-  const DataLayout &DL =
-      State.CFG.VectorPreHeader->getModule()->getDataLayout();
+  const DataLayout &DL = State.CFG.PrevBB->getModule()->getDataLayout();
   SCEVExpander Exp(SE, DL, "induction");
-  Value *Res = Exp.expandCodeFor(Expr, Expr->getType(),
-                                 State.CFG.VectorPreHeader->getTerminator());
+
+  BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
+  Value *Res =
+      Exp.expandCodeFor(Expr, Expr->getType(), VectorPH->getTerminator());
 
   for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
     State.set(this, Res, Part);
@@ -1526,11 +1529,12 @@ void VPFirstOrderRecurrencePHIRecipe::execute(VPTransformState &State) {
                     ? VectorInit->getType()
                     : VectorType::get(VectorInit->getType(), State.VF);
 
+  BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
   if (State.VF.isVector()) {
     auto *IdxTy = Builder.getInt32Ty();
     auto *One = ConstantInt::get(IdxTy, 1);
     IRBuilder<>::InsertPointGuard Guard(Builder);
-    Builder.SetInsertPoint(State.CFG.VectorPreHeader->getTerminator());
+    Builder.SetInsertPoint(VectorPH->getTerminator());
     auto *RuntimeVF = getRuntimeVF(Builder, IdxTy, State.VF);
     auto *LastIdx = Builder.CreateSub(RuntimeVF, One);
     VectorInit = Builder.CreateInsertElement(
@@ -1540,7 +1544,7 @@ void VPFirstOrderRecurrencePHIRecipe::execute(VPTransformState &State) {
   // Create a phi node for the new recurrence.
   PHINode *EntryPart = PHINode::Create(
       VecTy, 2, "vector.recur", &*State.CFG.PrevBB->getFirstInsertionPt());
-  EntryPart->addIncoming(VectorInit, State.CFG.VectorPreHeader);
+  EntryPart->addIncoming(VectorInit, VectorPH);
   State.set(this, EntryPart, 0);
 }
 
@@ -1576,6 +1580,8 @@ void VPReductionPHIRecipe::execute(VPTransformState &State) {
     State.set(this, EntryPart, Part);
   }
 
+  BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
+
   // Reductions do not have to start at zero. They can start with
   // any loop invariant values.
   VPValue *StartVPV = getStartValue();
@@ -1590,7 +1596,7 @@ void VPReductionPHIRecipe::execute(VPTransformState &State) {
       Iden = StartV;
     } else {
       IRBuilderBase::InsertPointGuard IPBuilder(Builder);
-      Builder.SetInsertPoint(State.CFG.VectorPreHeader->getTerminator());
+      Builder.SetInsertPoint(VectorPH->getTerminator());
       StartV = Iden =
           Builder.CreateVectorSplat(State.VF, StartV, "minmax.ident");
     }
@@ -1601,7 +1607,7 @@ void VPReductionPHIRecipe::execute(VPTransformState &State) {
     if (!ScalarPHI) {
       Iden = Builder.CreateVectorSplat(State.VF, Iden);
       IRBuilderBase::InsertPointGuard IPBuilder(Builder);
-      Builder.SetInsertPoint(State.CFG.VectorPreHeader->getTerminator());
+      Builder.SetInsertPoint(VectorPH->getTerminator());
       Constant *Zero = Builder.getInt32(0);
       StartV = Builder.CreateInsertElement(Iden, StartV, Zero);
     }
@@ -1612,7 +1618,7 @@ void VPReductionPHIRecipe::execute(VPTransformState &State) {
     // Make sure to add the reduction start value only to the
     // first unroll part.
     Value *StartVal = (Part == 0) ? StartV : Iden;
-    cast<PHINode>(EntryPart)->addIncoming(StartVal, State.CFG.VectorPreHeader);
+    cast<PHINode>(EntryPart)->addIncoming(StartVal, VectorPH);
   }
 }
 

diff  --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 3fe981e4c0954..683613097ef4f 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -311,13 +311,6 @@ struct VPTransformState {
     /// vector loop.
     BasicBlock *ExitBB = nullptr;
 
-    /// The IR BasicBlock that is the preheader of the vector loop in the output
-    /// IR.
-    /// FIXME: The vector preheader should also be modeled in VPlan, so any code
-    /// that needs to be added to the preheader gets directly generated by
-    /// VPlan. There should be no need to manage a pointer to the IR BasicBlock.
-    BasicBlock *VectorPreHeader = nullptr;
-
     /// A mapping of each VPBasicBlock to the corresponding BasicBlock. In case
     /// of replication, maps the BasicBlock of the last replica created.
     SmallDenseMap<VPBasicBlock *, BasicBlock *> VPBB2IRBB;
@@ -327,6 +320,10 @@ struct VPTransformState {
     SmallVector<VPBasicBlock *, 8> VPBBsToFix;
 
     CFGState() = default;
+
+    /// Returns the BasicBlock* mapped to the pre-header of the loop region
+    /// containing \p R.
+    BasicBlock *getPreheaderBBFor(VPRecipeBase *R);
   } CFG;
 
   /// Hold a pointer to LoopInfo to register new basic blocks in the loop.
@@ -2077,6 +2074,8 @@ class VPBasicBlock : public VPBlockBase {
   /// SplitAt to the new block. Returns the new block.
   VPBasicBlock *splitAt(iterator SplitAt);
 
+  VPRegionBlock *getEnclosingLoopRegion();
+
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
   /// Print this VPBsicBlock to \p O, prefixing all lines with \p Indent. \p
   /// SlotTracker is used to print unnamed VPValue's using consequtive numbers.
@@ -2170,6 +2169,12 @@ class VPRegionBlock : public VPBlockBase {
     ExitBlock->setParent(this);
   }
 
+  /// Returns the pre-header VPBasicBlock of the loop region.
+  VPBasicBlock *getPreheaderVPBB() {
+    assert(!isReplicator() && "should only get pre-header of loop regions");
+    return getSinglePredecessor()->getExitBasicBlock();
+  }
+
   /// An indicator whether this region is to generate multiple replicated
   /// instances of output IR corresponding to its VPBlockBases.
   bool isReplicator() const { return IsReplicator; }
@@ -2647,10 +2652,14 @@ class VPlan {
 
   /// Returns the VPRegionBlock of the vector loop.
   VPRegionBlock *getVectorLoopRegion() {
-    return cast<VPRegionBlock>(getEntry());
+    if (auto *R = dyn_cast<VPRegionBlock>(getEntry()))
+      return R;
+    return cast<VPRegionBlock>(getEntry()->getSingleSuccessor());
   }
   const VPRegionBlock *getVectorLoopRegion() const {
-    return cast<VPRegionBlock>(getEntry());
+    if (auto *R = dyn_cast<VPRegionBlock>(getEntry()))
+      return R;
+    return cast<VPRegionBlock>(getEntry()->getSingleSuccessor());
   }
 
   /// Returns the canonical induction recipe of the vector loop.

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll
index 024e8e7296cec..6b4cc3f83464b 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll
@@ -10,6 +10,9 @@ target triple = "aarch64-unknown-linux-gnu"
 ; CHECK:        VPlan 'Initial VPlan for VF={vscale x 2},UF>=1' {
 ; CHECK-NEXT:   Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count
 ; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: Successor(s): vector loop
+; CHECK-EMPTY:
 ; CHECK-NEXT:   <x1> vector loop: {
 ; CHECK-NEXT:   vector.body:
 ; CHECK-NEXT:     EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll b/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll
index 93bab6c1673c1..53a95d9a5ed93 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll
@@ -19,6 +19,7 @@ target triple = "x86_64-unknown-linux-gnu"
 ; CHECK-NOT:   LV: Found uniform instruction: %tmp3 = getelementptr inbounds %data, %data* %d, i64 0, i32 0, i64 %i
 ; CHECK-NOT:   LV: Found uniform instruction: %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ]
 ; CHECK-NOT:   LV: Found uniform instruction: %i.next = add nuw nsw i64 %i, 5
+; CHECK:       define void @PR31671(
 ; CHECK:       vector.ph:
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x float> poison, float %x, i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x float> [[BROADCAST_SPLATINSERT]], <16 x float> poison, <16 x i32> zeroinitializer

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/small-size.ll b/llvm/test/Transforms/LoopVectorize/X86/small-size.ll
index d11b8fa3dd6bf..8510d956bfb2b 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/small-size.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/small-size.ll
@@ -136,8 +136,8 @@ define void @example2(i32 %n, i32 %x) optsize {
 ; CHECK-NEXT:    [[TMP17:%.*]] = icmp eq i32 [[N]], 0
 ; CHECK-NEXT:    br i1 [[TMP17]], label [[DOT_CRIT_EDGE:%.*]], label [[DOTLR_PH_PREHEADER:%.*]]
 ; CHECK:       .lr.ph.preheader:
-; CHECK-NEXT:    br i1 false, label [[SCALAR_PH8:%.*]], label [[VECTOR_PH10:%.*]]
-; CHECK:       vector.ph10:
+; CHECK-NEXT:    br i1 false, label [[SCALAR_PH8:%.*]], label [[VECTOR_PH9:%.*]]
+; CHECK:       vector.ph9:
 ; CHECK-NEXT:    [[TMP18:%.*]] = add i32 [[N]], -1
 ; CHECK-NEXT:    [[TMP19:%.*]] = zext i32 [[TMP18]] to i64
 ; CHECK-NEXT:    [[N_RND_UP11:%.*]] = add nuw nsw i64 [[TMP19]], 4
@@ -145,8 +145,8 @@ define void @example2(i32 %n, i32 %x) optsize {
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT18:%.*]] = insertelement <4 x i64> poison, i64 [[TMP19]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT19:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT18]], <4 x i64> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY9:%.*]]
-; CHECK:       vector.body9:
-; CHECK-NEXT:    [[INDEX20:%.*]] = phi i64 [ 0, [[VECTOR_PH10]] ], [ [[INDEX_NEXT31:%.*]], [[PRED_STORE_CONTINUE30:%.*]] ]
+; CHECK:       vector.body19:
+; CHECK-NEXT:    [[INDEX20:%.*]] = phi i64 [ 0, [[VECTOR_PH9]] ], [ [[INDEX_NEXT31:%.*]], [[PRED_STORE_CONTINUE30:%.*]] ]
 ; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = add i64 [[I_0_LCSSA]], [[INDEX20]]
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT21:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX20]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT22:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT21]], <4 x i64> poison, <4 x i32> zeroinitializer

diff  --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll
index fcbfa22cd74e8..9dd88f724e0aa 100644
--- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll
+++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll
@@ -13,6 +13,9 @@ define void @sink_replicate_region_1(i32 %x, i8* %ptr) optsize {
 ; CHECK-EMPTY:
 ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count
 ; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: Successor(s): vector loop
+; CHECK-EMPTY:
 ; CHECK-NEXT: <x1> vector loop: {
 ; CHECK-NEXT: vector.body:
 ; CHECK-NEXT:   EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
@@ -98,6 +101,9 @@ define void @sink_replicate_region_2(i32 %x, i8 %y, i32* %ptr) optsize {
 ; CHECK-EMPTY:
 ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count
 ; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: Successor(s): vector loop
+; CHECK-EMPTY:
 ; CHECK-NEXT: <x1> vector loop: {
 ; CHECK-NEXT: vector.body:
 ; CHECK-NEXT:   EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
@@ -168,6 +174,9 @@ define i32 @sink_replicate_region_3_reduction(i32 %x, i8 %y, i32* %ptr) optsize
 ; CHECK-EMPTY:
 ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count
 ; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: Successor(s): vector loop
+; CHECK-EMPTY:
 ; CHECK-NEXT: <x1> vector loop: {
 ; CHECK-NEXT: vector.body:
 ; CHECK-NEXT:   EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
@@ -238,6 +247,9 @@ define void @sink_replicate_region_4_requires_split_at_end_of_block(i32 %x, i8*
 ; CHECK-EMPTY:
 ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count
 ; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: Successor(s): vector loop
+; CHECK-EMPTY:
 ; CHECK-NEXT: <x1> vector loop: {
 ; CHECK-NEXT: vector.body:
 ; CHECK-NEXT:   EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
@@ -333,6 +345,9 @@ define void @sink_replicate_region_after_replicate_region(i32* %ptr, i32 %x, i8
 ; CHECK-EMPTY:
 ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count
 ; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: Successor(s): vector loop
+; CHECK-EMPTY:
 ; CHECK-NEXT: <x1> vector loop: {
 ; CHECK-NEXT: vector.body:
 ; CHECK-NEXT:   EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION

diff  --git a/llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll b/llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll
index 3cdf854b50974..15ff79489b347 100644
--- a/llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll
+++ b/llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll
@@ -41,6 +41,9 @@ for.end:
 ; CHECK-EMPTY:
 ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count
 ; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: Successor(s): vector loop
+; CHECK-EMPTY:
 ; CHECK-NEXT: <x1> vector loop: {
 ; CHECK-NEXT: vector.body:
 ; CHECK-NEXT:   EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION

diff  --git a/llvm/test/Transforms/LoopVectorize/vplan-dot-printing.ll b/llvm/test/Transforms/LoopVectorize/vplan-dot-printing.ll
index d2da955ceb820..2e27a90787d92 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-dot-printing.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-dot-printing.ll
@@ -7,7 +7,17 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ; Verify that -vplan-print-in-dot-format option works.
 
 define void @print_call_and_memory(i64 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
-; CHECK:       subgraph cluster_N0 {
+; CHECK:      digraph VPlan {
+; CHECK-NEXT:  graph [labelloc=t, fontsize=30; label="Vectorization Plan\nInitial VPlan for VF=\{4\},UF\>=1"]
+; CHECK-NEXT:  node [shape=rect, fontname=Courier, fontsize=30]
+; CHECK-NEXT:  edge [fontname=Courier, fontsize=30]
+; CHECK-NEXT:  compound=true
+; CHECK-NEXT:  N0 [label =
+; CHECK-NEXT:    "vector.ph:\l" +
+; CHECK-NEXT:    "Successor(s): vector loop\l"
+; CHECK-NEXT:  ]
+; CHECK-NEXT:  N0 -> N1 [ label="" lhead=cluster_N2]
+; CHECK-NEXT:  subgraph cluster_N2 {
 ; CHECK-NEXT:    fontname=Courier
 ; CHECK-NEXT:    label="\<x1\> vector loop"
 ; CHECK-NEXT:    N1 [label =

diff  --git a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll
index 6ec8fd102de8f..ab61a0e4437a8 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll
@@ -11,6 +11,9 @@ define void @print_call_and_memory(i64 %n, float* noalias %y, float* noalias %x)
 ; CHECK:      VPlan 'Initial VPlan for VF={4},UF>=1' {
 ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count
 ; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: Successor(s): vector loop
+; CHECK-EMPTY:
 ; CHECK-NEXT: <x1> vector loop: {
 ; CHECK-NEXT: vector.body:
 ; CHECK-NEXT:   EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
@@ -51,6 +54,9 @@ define void @print_widen_gep_and_select(i64 %n, float* noalias %y, float* noalia
 ; CHECK:      VPlan 'Initial VPlan for VF={4},UF>=1' {
 ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count
 ; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: Successor(s): vector loop
+; CHECK-EMPTY:
 ; CHECK-NEXT: <x1> vector loop: {
 ; CHECK-NEXT: vector.body:
 ; CHECK-NEXT:   EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
@@ -96,6 +102,9 @@ define float @print_reduction(i64 %n, float* noalias %y) {
 ; CHECK:      VPlan 'Initial VPlan for VF={4},UF>=1' {
 ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count
 ; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: Successor(s): vector loop
+; CHECK-EMPTY:
 ; CHECK-NEXT: <x1> vector loop: {
 ; CHECK-NEXT: vector.body:
 ; CHECK-NEXT:   EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
@@ -133,6 +142,9 @@ define void @print_replicate_predicated_phi(i64 %n, i64* %x) {
 ; CHECK:      VPlan 'Initial VPlan for VF={4},UF>=1' {
 ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count
 ; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: Successor(s): vector loop
+; CHECK-EMPTY:
 ; CHECK-NEXT: <x1> vector loop: {
 ; CHECK-NEXT: vector.body:
 ; CHECK-NEXT:   EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
@@ -207,6 +219,9 @@ define void @print_interleave_groups(i32 %C, i32 %D) {
 ; CHECK:       VPlan 'Initial VPlan for VF={4},UF>=1' {
 ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count
 ; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: Successor(s): vector loop
+; CHECK-EMPTY:
 ; CHECK-NEXT: <x1> vector loop: {
 ; CHECK-NEXT:  vector.body:
 ; CHECK-NEXT:   EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
@@ -267,6 +282,9 @@ define float @print_fmuladd_strict(float* %a, float* %b, i64 %n) {
 ; CHECK:      VPlan 'Initial VPlan for VF={4},UF>=1' {
 ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count
 ; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: Successor(s): vector loop
+; CHECK-EMPTY:
 ; CHECK-NEXT: <x1> vector loop: {
 ; CHECK-NEXT: vector.body:
 ; CHECK-NEXT:   EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
@@ -307,6 +325,9 @@ define void @debug_loc_vpinstruction(i32* nocapture %asd, i32* nocapture %bsd) !
 ; CHECK:    VPlan 'Initial VPlan for VF={4},UF>=1' {
 ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count
 ; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: Successor(s): vector loop
+; CHECK-EMPTY:
 ; CHECK-NEXT: <x1> vector loop: {
 ; CHECK-NEXT:  vector.body:
 ; CHECK-NEXT:    EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
@@ -396,6 +417,9 @@ define void @print_expand_scev(i64 %y, i8* %ptr) {
 ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' {
 ; CHECK-NEXT: Live-in vp<%0> = vector-trip-count
 ; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: Successor(s): vector loop
+; CHECK-EMPTY:
 ; CHECK-NEXT: <x1> vector loop: {
 ; CHECK-NEXT:   vector.body:
 ; CHECK-NEXT:     EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION

diff  --git a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll
index 93c507618d460..fccc9becf12f7 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll
@@ -10,6 +10,9 @@ define void @sink_with_sideeffects(i1 %c, i8* %ptr) {
 ; CHECK:      VPlan 'Initial VPlan for VF={1},UF>=1' {
 ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count
 ; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: Successor(s): vector loop
+; CHECK-EMPTY:
 ; CHECK-NEXT: <x1> vector loop: {
 ; CHECK-NEXT: vector.body:
 ; CHECK-NEXT:   EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION

diff  --git a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll
index c31a486aa7fe2..89d21f6904dc8 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll
@@ -15,6 +15,9 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ; CHECK-EMPTY:
 ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count
 ; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: Successor(s): vector loop
+; CHECK-EMPTY:
 ; CHECK-NEXT: <x1> vector loop: {
 ; CHECK-NEXT: vector.body:
 ; CHECK-NEXT:   EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
@@ -82,6 +85,9 @@ exit:
 ; CHECK-EMPTY:
 ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count
 ; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: Successor(s): vector loop
+; CHECK-EMPTY:
 ; CHECK-NEXT: <x1> vector loop: {
 ; CHECK-NEXT: vector.body:
 ; CHECK-NEXT:   EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
@@ -162,6 +168,9 @@ exit:
 ; CHECK-EMPTY:
 ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count
 ; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: Successor(s): vector loop
+; CHECK-EMPTY:
 ; CHECK-NEXT: <x1> vector loop: {
 ; CHECK-NEXT: vector.body:
 ; CHECK-NEXT:   EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
@@ -244,6 +253,9 @@ define void @uniform_gep(i64 %k, i16* noalias %A, i16* noalias %B) {
 ; CHECK-EMPTY:
 ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count
 ; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: Successor(s): vector loop
+; CHECK-EMPTY:
 ; CHECK-NEXT: <x1> vector loop: {
 ; CHECK-NEXT: vector.body:
 ; CHECK-NEXT:   EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
@@ -335,6 +347,9 @@ define void @pred_cfg1(i32 %k, i32 %j) {
 ; CHECK-EMPTY:
 ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count
 ; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: Successor(s): vector loop
+; CHECK-EMPTY:
 ; CHECK-NEXT: <x1> vector loop: {
 ; CHECK-NEXT: vector.body:
 ; CHECK-NEXT:   EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
@@ -437,6 +452,9 @@ define void @pred_cfg2(i32 %k, i32 %j) {
 ; CHECK-EMPTY:
 ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count
 ; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: Successor(s): vector loop
+; CHECK-EMPTY:
 ; CHECK-NEXT: <x1> vector loop: {
 ; CHECK-NEXT: vector.body:
 ; CHECK-NEXT:   EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
@@ -554,6 +572,9 @@ define void @pred_cfg3(i32 %k, i32 %j) {
 ; CHECK-EMPTY:
 ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count
 ; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: Successor(s): vector loop
+; CHECK-EMPTY:
 ; CHECK-NEXT: <x1> vector loop: {
 ; CHECK-NEXT: vector.body:
 ; CHECK-NEXT:   EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
@@ -665,6 +686,9 @@ define void @merge_3_replicate_region(i32 %k, i32 %j) {
 ; CHECK-EMPTY:
 ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count
 ; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: Successor(s): vector loop
+; CHECK-EMPTY:
 ; CHECK-NEXT: <x1> vector loop: {
 ; CHECK-NEXT: vector.body:
 ; CHECK-NEXT:   EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
@@ -781,6 +805,9 @@ define void @update_2_uses_in_same_recipe_in_merged_block(i32 %k) {
 ; CHECK-EMPTY:
 ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count
 ; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: Successor(s): vector loop
+; CHECK-EMPTY:
 ; CHECK-NEXT: <x1> vector loop: {
 ; CHECK-NEXT: vector.body:
 ; CHECK-NEXT:   EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
@@ -849,6 +876,9 @@ define void @recipe_in_merge_candidate_used_by_first_order_recurrence(i32 %k) {
 ; CHECK-EMPTY:
 ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count
 ; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: Successor(s): vector loop
+; CHECK-EMPTY:
 ; CHECK-NEXT: <x1> vector loop: {
 ; CHECK-NEXT: vector.body:
 ; CHECK-NEXT:   EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
@@ -932,6 +962,9 @@ define void @update_multiple_users(i16* noalias %src, i8* noalias %dst, i1 %c) {
 ; CHECK:      VPlan 'Initial VPlan for VF={2},UF>=1' {
 ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count
 ; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: Successor(s): vector loop
+; CHECK-EMPTY:
 ; CHECK-NEXT: <x1> vector loop: {
 ; CHECK-NEXT: vector.body:
 ; CHECK-NEXT:   EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
@@ -1003,6 +1036,9 @@ define void @sinking_requires_duplication(float* %addr) {
 ; CHECK:      VPlan 'Initial VPlan for VF={2},UF>=1' {
 ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count
 ; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: Successor(s): vector loop
+; CHECK-EMPTY:
 ; CHECK-NEXT: <x1> vector loop: {
 ; CHECK-NEXT: vector.body:
 ; CHECK-NEXT:   EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION