[llvm] [VPlan] Hook IR blocks into VPlan during skeleton creation (NFC) (PR #114292)

Florian Hahn via llvm-commits llvm-commits at lists.llvm.org
Mon Nov 4 11:09:58 PST 2024


https://github.com/fhahn updated https://github.com/llvm/llvm-project/pull/114292

>From 1b89761c30d273ae665b6da4b2ede0675373f2b1 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Fri, 11 Oct 2024 10:04:40 +0100
Subject: [PATCH] [VPlan] Hook IR blocks into VPlan during skeleton creation
 (NFC)

As a first step to move towards modeling the full skeleton in VPlan,
start by wrapping IR blocks created during legacy skeleton creation in
VPIRBasicBlocks and hook them into the VPlan. This means the skeleton
CFG is represented in VPlan, just before execute. This allows moving
parts of skeleton creation into recipes in the VPBBs gradually.

Note that this allows retiring some manual DT updates, as this will be
handled automatically during VPlan execution.
---
 .../Transforms/Vectorize/LoopVectorize.cpp    | 76 +++++++++++++------
 llvm/lib/Transforms/Vectorize/VPlan.cpp       | 67 +++++++++-------
 llvm/lib/Transforms/Vectorize/VPlan.h         | 51 ++++---------
 .../lib/Transforms/Vectorize/VPlanRecipes.cpp | 16 ++--
 .../Transforms/Vectorize/VPlanTransforms.cpp  |  2 +
 llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp |  2 -
 llvm/lib/Transforms/Vectorize/VPlanUtils.cpp  |  2 +-
 .../Transforms/Vectorize/VPlanVerifier.cpp    |  9 ---
 .../Transforms/Vectorize/VPDomTreeTest.cpp    |  9 ++-
 .../Transforms/Vectorize/VPlanTest.cpp        | 38 +++++++---
 .../Vectorize/VPlanVerifierTest.cpp           | 18 +++--
 11 files changed, 167 insertions(+), 123 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 1c64bd2982d764..2dbee4331e238f 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -2426,6 +2426,26 @@ InnerLoopVectorizer::getOrCreateVectorTripCount(BasicBlock *InsertBlock) {
   return VectorTripCount;
 }
 
+static void connectScalarPreheaderInVPlan(VPlan &Plan) {
+  VPBlockBase *VectorPH = Plan.getVectorPreheader();
+  VPBlockBase *ScalarPH = Plan.getScalarPreheader();
+  VPBlockBase *PredVPB = VectorPH->getSinglePredecessor();
+  VPBlockUtils::disconnectBlocks(Plan.getEntry(), VectorPH);
+  VPBlockUtils::connectBlocks(PredVPB, ScalarPH);
+  VPBlockUtils::connectBlocks(PredVPB, VectorPH);
+}
+
+static void connectCheckBlockInVPlan(VPlan &Plan, BasicBlock *CheckIRBB) {
+  VPBlockBase *ScalarPH = Plan.getScalarPreheader();
+  VPBlockBase *VectorPH = Plan.getVectorPreheader();
+  VPBlockBase *PredVPB = VectorPH->getSinglePredecessor();
+  VPBlockUtils::disconnectBlocks(PredVPB, VectorPH);
+  VPIRBasicBlock *CheckVPIRBB = VPIRBasicBlock::fromBasicBlock(CheckIRBB);
+  VPBlockUtils::connectBlocks(PredVPB, CheckVPIRBB);
+  VPBlockUtils::connectBlocks(CheckVPIRBB, ScalarPH);
+  VPBlockUtils::connectBlocks(CheckVPIRBB, VectorPH);
+}
+
 void InnerLoopVectorizer::emitIterationCountCheck(BasicBlock *Bypass) {
   Value *Count = getTripCount();
   // Reuse existing vector loop preheader for TC checks.
@@ -2511,13 +2531,14 @@ void InnerLoopVectorizer::emitIterationCountCheck(BasicBlock *Bypass) {
          "TC check is expected to dominate Bypass");
 
   // Update dominator for Bypass & LoopExit (if needed).
-  DT->changeImmediateDominator(Bypass, TCCheckBlock);
   BranchInst &BI =
       *BranchInst::Create(Bypass, LoopVectorPreHeader, CheckMinIters);
   if (hasBranchWeightMD(*OrigLoop->getLoopLatch()->getTerminator()))
     setBranchWeights(BI, MinItersBypassWeights, /*IsExpected=*/false);
   ReplaceInstWithInst(TCCheckBlock->getTerminator(), &BI);
   LoopBypassBlocks.push_back(TCCheckBlock);
+
+  connectScalarPreheaderInVPlan(Plan);
 }
 
 BasicBlock *InnerLoopVectorizer::emitSCEVChecks(BasicBlock *Bypass) {
@@ -2534,6 +2555,8 @@ BasicBlock *InnerLoopVectorizer::emitSCEVChecks(BasicBlock *Bypass) {
          "Should already be a bypass block due to iteration count check");
   LoopBypassBlocks.push_back(SCEVCheckBlock);
   AddedSafetyChecks = true;
+
+  connectCheckBlockInVPlan(Plan, SCEVCheckBlock);
   return SCEVCheckBlock;
 }
 
@@ -2570,6 +2593,7 @@ BasicBlock *InnerLoopVectorizer::emitMemRuntimeChecks(BasicBlock *Bypass) {
 
   AddedSafetyChecks = true;
 
+  connectCheckBlockInVPlan(Plan, MemCheckBlock);
   return MemCheckBlock;
 }
 
@@ -7648,10 +7672,10 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
 
   // 0. Generate SCEV-dependent code into the preheader, including TripCount,
   // before making any changes to the CFG.
-  if (!BestVPlan.getPreheader()->empty()) {
+  if (!BestVPlan.getEntry()->empty()) {
     State.CFG.PrevBB = OrigLoop->getLoopPreheader();
     State.Builder.SetInsertPoint(OrigLoop->getLoopPreheader()->getTerminator());
-    BestVPlan.getPreheader()->execute(&State);
+    BestVPlan.getEntry()->execute(&State);
   }
   if (!ILV.getTripCount())
     ILV.setTripCount(State.get(BestVPlan.getTripCount(), VPLane(0)));
@@ -7859,8 +7883,6 @@ EpilogueVectorizerMainLoop::emitIterationCountCheck(BasicBlock *Bypass,
                                  DT->getNode(Bypass)->getIDom()) &&
            "TC check is expected to dominate Bypass");
 
-    // Update dominator for Bypass.
-    DT->changeImmediateDominator(Bypass, TCCheckBlock);
     LoopBypassBlocks.push_back(TCCheckBlock);
 
     // Save the trip count so we don't have to regenerate it in the
@@ -7875,6 +7897,12 @@ EpilogueVectorizerMainLoop::emitIterationCountCheck(BasicBlock *Bypass,
     setBranchWeights(BI, MinItersBypassWeights, /*IsExpected=*/false);
   ReplaceInstWithInst(TCCheckBlock->getTerminator(), &BI);
 
+  VPBlockBase *VectorPH = Plan.getVectorPreheader();
+  VPBlockBase *PredVPB = VectorPH->getSinglePredecessor();
+  if (PredVPB->getNumSuccessors() == 1)
+    connectScalarPreheaderInVPlan(Plan);
+  else
+    connectCheckBlockInVPlan(Plan, TCCheckBlock);
   return TCCheckBlock;
 }
 
@@ -7905,32 +7933,19 @@ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton(
   EPI.MainLoopIterationCountCheck->getTerminator()->replaceUsesOfWith(
       VecEpilogueIterationCountCheck, LoopVectorPreHeader);
 
-  DT->changeImmediateDominator(LoopVectorPreHeader,
-                               EPI.MainLoopIterationCountCheck);
-
   EPI.EpilogueIterationCountCheck->getTerminator()->replaceUsesOfWith(
       VecEpilogueIterationCountCheck, LoopScalarPreHeader);
 
   if (EPI.SCEVSafetyCheck)
     EPI.SCEVSafetyCheck->getTerminator()->replaceUsesOfWith(
         VecEpilogueIterationCountCheck, LoopScalarPreHeader);
-  if (EPI.MemSafetyCheck)
+  if (EPI.MemSafetyCheck) {
     EPI.MemSafetyCheck->getTerminator()->replaceUsesOfWith(
         VecEpilogueIterationCountCheck, LoopScalarPreHeader);
-
-  DT->changeImmediateDominator(
-      VecEpilogueIterationCountCheck,
-      VecEpilogueIterationCountCheck->getSinglePredecessor());
+  }
 
   DT->changeImmediateDominator(LoopScalarPreHeader,
                                EPI.EpilogueIterationCountCheck);
-  if (!Cost->requiresScalarEpilogue(EPI.EpilogueVF.isVector()))
-    // If there is an epilogue which must run, there's no edge from the
-    // middle block to exit blocks  and thus no need to update the immediate
-    // dominator of the exit blocks.
-    DT->changeImmediateDominator(LoopExitBlock,
-                                 EPI.EpilogueIterationCountCheck);
-
   // Keep track of bypass blocks, as they feed start values to the induction and
   // reduction phis in the scalar loop preheader.
   if (EPI.SCEVSafetyCheck)
@@ -8033,6 +8048,20 @@ EpilogueVectorizerEpilogueLoop::emitMinimumVectorEpilogueIterCountCheck(
   }
   ReplaceInstWithInst(Insert->getTerminator(), &BI);
   LoopBypassBlocks.push_back(Insert);
+
+  // A new entry block has been created for the epilogue VPlan. Hook it in.
+  VPIRBasicBlock *NewEntry = VPIRBasicBlock::fromBasicBlock(Insert);
+  VPBasicBlock *OldEntry = Plan.getEntry();
+  VPBlockUtils::reassociateBlocks(OldEntry, NewEntry);
+  Plan.setEntry(NewEntry);
+  for (auto &R : make_early_inc_range(*NewEntry)) {
+    auto *VPIR = dyn_cast<VPIRInstruction>(&R);
+    if (!VPIR || !isa<PHINode>(VPIR->getInstruction()))
+      break;
+    VPIR->eraseFromParent();
+  }
+
+  connectScalarPreheaderInVPlan(Plan);
   return Insert;
 }
 
@@ -10256,7 +10285,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
         // should be removed once induction resume value creation is done
         // directly in VPlan.
         EpilogILV.setTripCount(MainILV.getTripCount());
-        for (auto &R : make_early_inc_range(*BestEpiPlan.getPreheader())) {
+        for (auto &R : make_early_inc_range(*BestEpiPlan.getEntry())) {
           auto *ExpandR = dyn_cast<VPExpandSCEVRecipe>(&R);
           if (!ExpandR)
             continue;
@@ -10316,8 +10345,6 @@ bool LoopVectorizePass::processLoop(Loop *L) {
           cast<VPHeaderPHIRecipe>(&R)->setStartValue(StartVal);
         }
 
-        assert(DT->verify(DominatorTree::VerificationLevel::Fast) &&
-               "DT not preserved correctly");
         LVP.executePlan(EPI.EpilogueVF, EPI.EpilogueUF, BestEpiPlan, EpilogILV,
                         DT, true, &ExpandedSCEVs);
         ++LoopsEpilogueVectorized;
@@ -10345,6 +10372,9 @@ bool LoopVectorizePass::processLoop(Loop *L) {
       checkMixedPrecision(L, ORE);
   }
 
+  assert(DT->verify(DominatorTree::VerificationLevel::Fast) &&
+         "DT not preserved correctly");
+
   std::optional<MDNode *> RemainderLoopID =
       makeFollowupLoopID(OrigLoopID, {LLVMLoopVectorizeFollowupAll,
                                       LLVMLoopVectorizeFollowupEpilogue});
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 54aae122877b98..7de8830bd866ce 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -170,9 +170,8 @@ VPBasicBlock *VPBlockBase::getEntryBasicBlock() {
 }
 
 void VPBlockBase::setPlan(VPlan *ParentPlan) {
-  assert(
-      (ParentPlan->getEntry() == this || ParentPlan->getPreheader() == this) &&
-      "Can only set plan on its entry or preheader block.");
+  assert(ParentPlan->getEntry() == this &&
+         "Can only set plan on its entry or preheader block.");
   Plan = ParentPlan;
 }
 
@@ -463,7 +462,6 @@ void VPIRBasicBlock::execute(VPTransformState *State) {
         (getNumSuccessors() == 0 || isa<BranchInst>(IRBB->getTerminator())) &&
         "other blocks must be terminated by a branch");
   }
-
   for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors()) {
     VPBasicBlock *PredVPBB = PredVPBlock->getExitingBasicBlock();
     BasicBlock *PredBB = State->CFG.VPBB2IRBB[PredVPBB];
@@ -851,9 +849,6 @@ VPlan::~VPlan() {
       Block->dropAllReferences(&DummyValue);
 
     VPBlockBase::deleteCFG(Entry);
-
-    Preheader->dropAllReferences(&DummyValue);
-    delete Preheader;
   }
   for (VPValue *VPV : VPLiveInsToFree)
     delete VPV;
@@ -876,9 +871,10 @@ VPlanPtr VPlan::createInitialVPlan(Type *InductionTy,
   VPIRBasicBlock *Entry =
       VPIRBasicBlock::fromBasicBlock(TheLoop->getLoopPreheader());
   VPBasicBlock *VecPreheader = new VPBasicBlock("vector.ph");
+  VPBlockUtils::connectBlocks(Entry, VecPreheader);
   VPIRBasicBlock *ScalarHeader =
       VPIRBasicBlock::fromBasicBlock(TheLoop->getHeader());
-  auto Plan = std::make_unique<VPlan>(Entry, VecPreheader, ScalarHeader);
+  auto Plan = std::make_unique<VPlan>(Entry, ScalarHeader);
 
   // Create SCEV and VPValue for the trip count.
 
@@ -1021,8 +1017,9 @@ void VPlan::execute(VPTransformState *State) {
   BasicBlock *VectorPreHeader = State->CFG.PrevBB;
   State->Builder.SetInsertPoint(VectorPreHeader->getTerminator());
 
-  // Disconnect VectorPreHeader from ExitBB in both the CFG and DT.
-  cast<BranchInst>(VectorPreHeader->getTerminator())->setSuccessor(0, nullptr);
+  replaceVPBBWithIRVPBB(
+      cast<VPBasicBlock>(getVectorLoopRegion()->getSinglePredecessor()),
+      VectorPreHeader);
   State->CFG.DTU.applyUpdates(
       {{DominatorTree::Delete, VectorPreHeader, State->CFG.ExitBB}});
 
@@ -1049,8 +1046,10 @@ void VPlan::execute(VPTransformState *State) {
   State->CFG.DTU.applyUpdates(
       {{DominatorTree::Delete, ScalarPh, ScalarPh->getSingleSuccessor()}});
 
+  ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>> RPOT(
+      Entry);
   // Generate code in the loop pre-header and body.
-  for (VPBlockBase *Block : vp_depth_first_shallow(Entry))
+  for (VPBlockBase *Block : make_range(RPOT.begin(), RPOT.end()))
     Block->execute(State);
 
   VPBasicBlock *LatchVPBB = getVectorLoopRegion()->getExitingBasicBlock();
@@ -1101,9 +1100,6 @@ void VPlan::execute(VPTransformState *State) {
   }
 
   State->CFG.DTU.flush();
-  assert(State->CFG.DTU.getDomTree().verify(
-             DominatorTree::VerificationLevel::Fast) &&
-         "DT not preserved correctly");
 }
 
 InstructionCost VPlan::cost(ElementCount VF, VPCostContext &Ctx) {
@@ -1156,12 +1152,10 @@ void VPlan::print(raw_ostream &O) const {
 
   printLiveIns(O);
 
-  if (!getPreheader()->empty()) {
-    O << "\n";
-    getPreheader()->print(O, "", SlotTracker);
-  }
+  ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<const VPBlockBase *>>
+      RPOT(getEntry());
 
-  for (const VPBlockBase *Block : vp_depth_first_shallow(getEntry())) {
+  for (const VPBlockBase *Block : RPOT) {
     O << '\n';
     Block->print(O, "", SlotTracker);
   }
@@ -1192,6 +1186,20 @@ std::string VPlan::getName() const {
   return Out;
 }
 
+VPRegionBlock *VPlan::getVectorLoopRegion() {
+  for (VPBlockBase *B : vp_depth_first_shallow(getEntry()))
+    if (auto *R = dyn_cast<VPRegionBlock>(B))
+      return R;
+  return nullptr;
+}
+
+const VPRegionBlock *VPlan::getVectorLoopRegion() const {
+  for (const VPBlockBase *B : vp_depth_first_shallow(getEntry()))
+    if (auto *R = dyn_cast<VPRegionBlock>(B))
+      return R;
+  return nullptr;
+}
+
 LLVM_DUMP_METHOD
 void VPlan::printDOT(raw_ostream &O) const {
   VPlanPrinter Printer(O, *this);
@@ -1242,7 +1250,6 @@ static void remapOperands(VPBlockBase *Entry, VPBlockBase *NewEntry,
 
 VPlan *VPlan::duplicate() {
   // Clone blocks.
-  VPBasicBlock *NewPreheader = Preheader->clone();
   const auto &[NewEntry, __] = cloneFrom(Entry);
 
   BasicBlock *ScalarHeaderIRBB = getScalarHeader()->getIRBasicBlock();
@@ -1252,8 +1259,7 @@ VPlan *VPlan::duplicate() {
         return VPIRBB && VPIRBB->getIRBasicBlock() == ScalarHeaderIRBB;
       }));
   // Create VPlan, clone live-ins and remap operands in the cloned blocks.
-  auto *NewPlan =
-      new VPlan(NewPreheader, cast<VPBasicBlock>(NewEntry), NewScalarHeader);
+  auto *NewPlan = new VPlan(cast<VPBasicBlock>(NewEntry), NewScalarHeader);
   DenseMap<VPValue *, VPValue *> Old2NewVPValues;
   for (VPValue *OldLiveIn : VPLiveInsToFree) {
     Old2NewVPValues[OldLiveIn] =
@@ -1273,7 +1279,6 @@ VPlan *VPlan::duplicate() {
   // else NewTripCount will be created and inserted into Old2NewVPValues when
   // TripCount is cloned. In any case NewPlan->TripCount is updated below.
 
-  remapOperands(Preheader, NewPreheader, Old2NewVPValues);
   remapOperands(Entry, NewEntry, Old2NewVPValues);
 
   // Initialize remaining fields of cloned VPlan.
@@ -1287,6 +1292,19 @@ VPlan *VPlan::duplicate() {
   return NewPlan;
 }
 
+VPBasicBlock *VPlan::getScalarPreheader() {
+  auto *MiddleVPBB =
+      cast<VPBasicBlock>(getVectorLoopRegion()->getSingleSuccessor());
+  if (MiddleVPBB->getNumSuccessors() == 2) {
+    // Order is strict: first is the exit block, second is the scalar preheader.
+    return cast<VPBasicBlock>(MiddleVPBB->getSuccessors()[1]);
+  }
+  if (auto *IRVPBB = dyn_cast<VPBasicBlock>(MiddleVPBB->getSingleSuccessor()))
+    return IRVPBB;
+
+  return nullptr;
+}
+
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 
 Twine VPlanPrinter::getUID(const VPBlockBase *Block) {
@@ -1325,8 +1343,6 @@ void VPlanPrinter::dump() {
   OS << "edge [fontname=Courier, fontsize=30]\n";
   OS << "compound=true\n";
 
-  dumpBlock(Plan.getPreheader());
-
   for (const VPBlockBase *Block : vp_depth_first_shallow(Plan.getEntry()))
     dumpBlock(Block);
 
@@ -1587,7 +1603,6 @@ void VPSlotTracker::assignNames(const VPlan &Plan) {
     assignName(Plan.BackedgeTakenCount);
   for (VPValue *LI : Plan.VPLiveInsToFree)
     assignName(LI);
-  assignNames(Plan.getPreheader());
 
   ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<const VPBlockBase *>>
       RPOT(VPBlockDeepTraversalWrapper<const VPBlockBase *>(Plan.getEntry()));
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 035b9e66fd062a..c9393958521295 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -3703,11 +3703,6 @@ class VPlan {
   /// preheader of the vector loop.
   VPBasicBlock *Entry;
 
-  /// VPBasicBlock corresponding to the original preheader. Used to place
-  /// VPExpandSCEV recipes for expressions used during skeleton creation and the
-  /// rest of VPlan execution.
-  VPBasicBlock *Preheader;
-
   /// VPIRBasicBlock wrapping the header of the original scalar loop.
   VPIRBasicBlock *ScalarHeader;
 
@@ -3752,36 +3747,27 @@ class VPlan {
   DenseMap<const SCEV *, VPValue *> SCEVToExpansion;
 
 public:
-  /// Construct a VPlan with original preheader \p Preheader, trip count \p TC,
-  /// \p Entry to the plan and with \p ScalarHeader wrapping the original header
-  /// of the scalar loop. At the moment, \p Preheader and \p Entry need to be
-  /// disconnected, as the bypass blocks between them are not yet modeled in
-  /// VPlan.
-  VPlan(VPBasicBlock *Preheader, VPValue *TC, VPBasicBlock *Entry,
-        VPIRBasicBlock *ScalarHeader)
-      : VPlan(Preheader, Entry, ScalarHeader) {
+  /// Construct a VPlan with \p Entry entering the plan and trip count \p TC.
+  VPlan(VPBasicBlock *Entry, VPValue *TC, VPIRBasicBlock *ScalarHeader)
+      : VPlan(Entry, ScalarHeader) {
     TripCount = TC;
   }
 
-  /// Construct a VPlan with original preheader \p Preheader, \p Entry to
-  /// the plan and with \p ScalarHeader wrapping the original header of the
-  /// scalar loop. At the moment, \p Preheader and \p Entry need to be
-  /// disconnected, as the bypass blocks between them are not yet modeled in
-  /// VPlan.
-  VPlan(VPBasicBlock *Preheader, VPBasicBlock *Entry,
-        VPIRBasicBlock *ScalarHeader)
-      : Entry(Entry), Preheader(Preheader), ScalarHeader(ScalarHeader) {
+  /// Construct a VPlan with \p Entry to the plan.
+  VPlan(VPBasicBlock *Entry, VPIRBasicBlock *ScalarHeader)
+      : Entry(Entry), ScalarHeader(ScalarHeader) {
     Entry->setPlan(this);
-    Preheader->setPlan(this);
-    assert(Preheader->getNumSuccessors() == 0 &&
-           Preheader->getNumPredecessors() == 0 &&
-           "preheader must be disconnected");
     assert(ScalarHeader->getNumSuccessors() == 0 &&
            "scalar header must be a leaf node");
   }
 
   ~VPlan();
 
+  void setEntry(VPBasicBlock *VPBB) {
+    Entry = VPBB;
+    VPBB->setPlan(this);
+  }
+
   /// Create initial VPlan, having an "entry" VPBasicBlock (wrapping
   /// original scalar pre-header ) which contains SCEV expansions that need
   /// to happen before the CFG is modified; a VPBasicBlock for the vector
@@ -3932,12 +3918,9 @@ class VPlan {
 #endif
 
   /// Returns the VPRegionBlock of the vector loop.
-  VPRegionBlock *getVectorLoopRegion() {
-    return cast<VPRegionBlock>(getEntry()->getSingleSuccessor());
-  }
-  const VPRegionBlock *getVectorLoopRegion() const {
-    return cast<VPRegionBlock>(getEntry()->getSingleSuccessor());
-  }
+  VPRegionBlock *getVectorLoopRegion();
+
+  const VPRegionBlock *getVectorLoopRegion() const;
 
   /// Returns the preheader of the vector loop region.
   VPBasicBlock *getVectorPreheader() {
@@ -3963,13 +3946,11 @@ class VPlan {
     SCEVToExpansion[S] = V;
   }
 
-  /// \return The block corresponding to the original preheader.
-  VPBasicBlock *getPreheader() { return Preheader; }
-  const VPBasicBlock *getPreheader() const { return Preheader; }
-
   /// Clone the current VPlan, update all VPValues of the new VPlan and cloned
   /// recipes to refer to the clones, and return it.
   VPlan *duplicate();
+
+  VPBasicBlock *getScalarPreheader();
 };
 
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 6254ea15191819..39e06736cfb320 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -627,11 +627,11 @@ Value *VPInstruction::generate(VPTransformState &State) {
         Builder.CreatePHI(IncomingFromOtherPreds->getType(), 2, Name);
     BasicBlock *VPlanPred =
         State.CFG
-            .VPBB2IRBB[cast<VPBasicBlock>(getParent()->getSinglePredecessor())];
+            .VPBB2IRBB[cast<VPBasicBlock>(getParent()->getPredecessors()[0])];
     NewPhi->addIncoming(IncomingFromVPlanPred, VPlanPred);
     for (auto *OtherPred : predecessors(Builder.GetInsertBlock())) {
-      assert(OtherPred != VPlanPred &&
-             "VPlan predecessors should not be connected yet");
+      if (OtherPred == VPlanPred)
+        continue;
       NewPhi->addIncoming(IncomingFromOtherPreds, OtherPred);
     }
     return NewPhi;
@@ -3235,13 +3235,17 @@ void VPWidenPointerInductionRecipe::print(raw_ostream &O, const Twine &Indent,
 
 void VPExpandSCEVRecipe::execute(VPTransformState &State) {
   assert(!State.Lane && "cannot be used in per-lane");
+  if (State.ExpandedSCEVs.contains(Expr)) {
+    State.Builder.SetInsertPoint(State.CFG.VPBB2IRBB[getParent()]);
+    return;
+  }
+
   const DataLayout &DL = State.CFG.PrevBB->getDataLayout();
   SCEVExpander Exp(SE, DL, "induction");
-
   Value *Res = Exp.expandCodeFor(Expr, Expr->getType(),
                                  &*State.Builder.GetInsertPoint());
-  assert(!State.ExpandedSCEVs.contains(Expr) &&
-         "Same SCEV expanded multiple times");
+  /*  assert(!State.ExpandedSCEVs.contains(Expr) &&*/
+  /*"Same SCEV expanded multiple times");*/
   State.ExpandedSCEVs[Expr] = Res;
   State.set(this, Res, VPLane(0));
 }
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index a4f0df17f5832f..33ef10dbefc507 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -381,6 +381,8 @@ static bool mergeBlocksIntoPredecessors(VPlan &Plan) {
         dyn_cast_or_null<VPBasicBlock>(VPBB->getSinglePredecessor());
     if (!PredVPBB || PredVPBB->getNumSuccessors() != 1)
       continue;
+    if (isa<VPIRBasicBlock>(PredVPBB))
+      continue;
     WorkList.push_back(VPBB);
   }
 
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
index dd005682203b75..ae53774bff98db 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
@@ -412,8 +412,6 @@ void VPlanTransforms::unrollByUF(VPlan &Plan, unsigned UF, LLVMContext &Ctx) {
 
   UnrollState Unroller(Plan, UF, Ctx);
 
-  Unroller.unrollBlock(Plan.getPreheader());
-
   // Iterate over all blocks in the plan starting from Entry, and unroll
   // recipes inside them. This includes the vector preheader and middle blocks,
   // which may set up or post-process per-part values.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
index 4621c28b051298..e40af3e2e3d30a 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
@@ -34,7 +34,7 @@ VPValue *vputils::getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr,
     Expanded = Plan.getOrAddLiveIn(E->getValue());
   else {
     Expanded = new VPExpandSCEVRecipe(Expr, SE);
-    Plan.getPreheader()->appendRecipe(Expanded->getDefiningRecipe());
+    Plan.getEntry()->appendRecipe(Expanded->getDefiningRecipe());
   }
   Plan.addSCEVExpansion(Expr, Expanded);
   return Expanded;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
index 8bdb3133243582..fc6b61491f8b5e 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
@@ -195,15 +195,6 @@ bool VPlanVerifier::verifyVPBasicBlock(const VPBasicBlock *VPBB) {
     RecipeNumbering[&R] = Cnt++;
 
   for (const VPRecipeBase &R : *VPBB) {
-    if (isa<VPIRInstruction>(&R) ^ isa<VPIRBasicBlock>(VPBB)) {
-      errs() << "VPIRInstructions ";
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-      R.dump();
-      errs() << " ";
-#endif
-      errs() << "not in a VPIRBasicBlock!\n";
-      return false;
-    }
     for (const VPValue *V : R.definedValues()) {
       for (const VPUser *U : V->users()) {
         auto *UI = dyn_cast<VPRecipeBase>(U);
diff --git a/llvm/unittests/Transforms/Vectorize/VPDomTreeTest.cpp b/llvm/unittests/Transforms/Vectorize/VPDomTreeTest.cpp
index 86182ccae0b55b..604f87b634bba6 100644
--- a/llvm/unittests/Transforms/Vectorize/VPDomTreeTest.cpp
+++ b/llvm/unittests/Transforms/Vectorize/VPDomTreeTest.cpp
@@ -44,8 +44,9 @@ TEST(VPDominatorTreeTest, DominanceNoRegionsTest) {
   LLVMContext C;
   auto *ScalarHeader = BasicBlock::Create(C, "");
   VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(ScalarHeader);
+  VPBlockUtils::connectBlocks(VPPH, VPBB0);
   VPBlockUtils::connectBlocks(R1, ScalarHeaderVPBB);
-  VPlan Plan(VPPH, &*TC, VPBB0, ScalarHeaderVPBB);
+  VPlan Plan(VPPH, &*TC, ScalarHeaderVPBB);
 
   VPDominatorTree VPDT;
   VPDT.recalculate(Plan);
@@ -124,8 +125,9 @@ TEST(VPDominatorTreeTest, DominanceRegionsTest) {
 
     auto TC = std::make_unique<VPValue>();
     VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(ScalarHeader);
+    VPBlockUtils::connectBlocks(VPPH, VPBB0);
     VPBlockUtils::connectBlocks(R2, ScalarHeaderVPBB);
-    VPlan Plan(VPPH, &*TC, VPBB0, ScalarHeaderVPBB);
+    VPlan Plan(VPPH, &*TC, ScalarHeaderVPBB);
     VPDominatorTree VPDT;
     VPDT.recalculate(Plan);
 
@@ -206,8 +208,9 @@ TEST(VPDominatorTreeTest, DominanceRegionsTest) {
 
     auto TC = std::make_unique<VPValue>();
     VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(ScalarHeader);
+    VPBlockUtils::connectBlocks(VPPH, VPBB1);
     VPBlockUtils::connectBlocks(VPBB2, ScalarHeaderVPBB);
-    VPlan Plan(VPPH, &*TC, VPBB1, ScalarHeaderVPBB);
+    VPlan Plan(VPPH, &*TC, ScalarHeaderVPBB);
     VPDominatorTree VPDT;
     VPDT.recalculate(Plan);
 
diff --git a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp
index 4a8615cc086b08..81f1707aaf7fba 100644
--- a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp
+++ b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp
@@ -259,8 +259,9 @@ TEST(VPBasicBlockTest, getPlan) {
 
     auto TC = std::make_unique<VPValue>();
     VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(ScalarHeader);
+    VPBlockUtils::connectBlocks(VPPH, VPBB1);
     VPBlockUtils::connectBlocks(VPBB4, ScalarHeaderVPBB);
-    VPlan Plan(VPPH, &*TC, VPBB1, ScalarHeaderVPBB);
+    VPlan Plan(VPPH, &*TC, ScalarHeaderVPBB);
 
     EXPECT_EQ(&Plan, VPBB1->getPlan());
     EXPECT_EQ(&Plan, VPBB2->getPlan());
@@ -281,8 +282,9 @@ TEST(VPBasicBlockTest, getPlan) {
 
     auto TC = std::make_unique<VPValue>();
     VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(ScalarHeader);
+    VPBlockUtils::connectBlocks(VPPH, VPBB1);
     VPBlockUtils::connectBlocks(R1, ScalarHeaderVPBB);
-    VPlan Plan(VPPH, &*TC, VPBB1, ScalarHeaderVPBB);
+    VPlan Plan(VPPH, &*TC, ScalarHeaderVPBB);
 
     EXPECT_EQ(&Plan, VPBB1->getPlan());
     EXPECT_EQ(&Plan, R1->getPlan());
@@ -313,8 +315,9 @@ TEST(VPBasicBlockTest, getPlan) {
 
     auto TC = std::make_unique<VPValue>();
     VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(ScalarHeader);
+    VPBlockUtils::connectBlocks(VPPH, VPBB1);
     VPBlockUtils::connectBlocks(R2, ScalarHeaderVPBB);
-    VPlan Plan(VPPH, &*TC, VPBB1, ScalarHeaderVPBB);
+    VPlan Plan(VPPH, &*TC, ScalarHeaderVPBB);
 
     EXPECT_EQ(&Plan, VPBB1->getPlan());
     EXPECT_EQ(&Plan, R1->getPlan());
@@ -359,8 +362,9 @@ TEST(VPBasicBlockTest, TraversingIteratorTest) {
     // Use Plan to properly clean up created blocks.
     auto TC = std::make_unique<VPValue>();
     VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(ScalarHeader);
+    VPBlockUtils::connectBlocks(VPPH, VPBB1);
     VPBlockUtils::connectBlocks(VPBB4, ScalarHeaderVPBB);
-    VPlan Plan(VPPH, &*TC, VPBB1, ScalarHeaderVPBB);
+    VPlan Plan(VPPH, &*TC, ScalarHeaderVPBB);
   }
 
   {
@@ -461,8 +465,9 @@ TEST(VPBasicBlockTest, TraversingIteratorTest) {
     // Use Plan to properly clean up created blocks.
     auto TC = std::make_unique<VPValue>();
     VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(ScalarHeader);
+    VPBlockUtils::connectBlocks(VPPH, VPBB0);
     VPBlockUtils::connectBlocks(R2, ScalarHeaderVPBB);
-    VPlan Plan(VPPH, &*TC, VPBB0, ScalarHeaderVPBB);
+    VPlan Plan(VPPH, &*TC, ScalarHeaderVPBB);
   }
 
   {
@@ -546,8 +551,9 @@ TEST(VPBasicBlockTest, TraversingIteratorTest) {
     // Use Plan to properly clean up created blocks.
     auto TC = std::make_unique<VPValue>();
     VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(ScalarHeader);
+    VPBlockUtils::connectBlocks(VPPH, VPBB1);
     VPBlockUtils::connectBlocks(VPBB2, ScalarHeaderVPBB);
-    VPlan Plan(VPPH, &*TC, VPBB1, ScalarHeaderVPBB);
+    VPlan Plan(VPPH, &*TC, ScalarHeaderVPBB);
   }
 
   {
@@ -596,8 +602,9 @@ TEST(VPBasicBlockTest, TraversingIteratorTest) {
     // Use Plan to properly clean up created blocks.
     auto TC = std::make_unique<VPValue>();
     VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(ScalarHeader);
+    VPBlockUtils::connectBlocks(VPPH, VPBB1);
     VPBlockUtils::connectBlocks(R1, ScalarHeaderVPBB);
-    VPlan Plan(VPPH, &*TC, VPBB1, ScalarHeaderVPBB);
+    VPlan Plan(VPPH, &*TC, ScalarHeaderVPBB);
   }
 
   {
@@ -690,8 +697,9 @@ TEST(VPBasicBlockTest, TraversingIteratorTest) {
     // Use Plan to properly clean up created blocks.
     auto TC = std::make_unique<VPValue>();
     VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(ScalarHeader);
+    VPBlockUtils::connectBlocks(VPPH, VPBB1);
     VPBlockUtils::connectBlocks(VPBB2, ScalarHeaderVPBB);
-    VPlan Plan(VPPH, &*TC, VPBB1, ScalarHeaderVPBB);
+    VPlan Plan(VPPH, &*TC, ScalarHeaderVPBB);
   }
   delete ScalarHeader;
 }
@@ -734,7 +742,8 @@ TEST(VPBasicBlockTest, print) {
   auto *ScalarHeader = BasicBlock::Create(C, "");
   // FIXME: This looks wrong.
   auto ScalarHeaderVPBB = std::make_unique<VPIRBasicBlock>(ScalarHeader);
-  VPlan Plan(VPBB0, TC, VPBB1, ScalarHeaderVPBB.get());
+  VPBlockUtils::connectBlocks(VPBB0, VPBB1);
+  VPlan Plan(VPBB0, TC, ScalarHeaderVPBB.get());
   std::string FullDump;
   raw_string_ostream OS(FullDump);
   Plan.printDOT(OS);
@@ -820,8 +829,9 @@ TEST(VPBasicBlockTest, printPlanWithVFsAndUFs) {
   LLVMContext C;
   auto *ScalarHeader = BasicBlock::Create(C, "");
   VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(ScalarHeader);
+  VPBlockUtils::connectBlocks(VPBB0, VPBB1);
   VPBlockUtils::connectBlocks(VPBB1, ScalarHeaderVPBB);
-  VPlan Plan(VPBB0, TC, VPBB1, ScalarHeaderVPBB);
+  VPlan Plan(VPBB0, TC, ScalarHeaderVPBB);
   Plan.setName("TestPlan");
   Plan.addVF(ElementCount::getFixed(4));
 
@@ -1291,11 +1301,13 @@ TEST(VPRecipeTest, MayHaveSideEffectsAndMayReadWriteMemory) {
 TEST(VPRecipeTest, dumpRecipeInPlan) {
   VPBasicBlock *VPBB0 = new VPBasicBlock("preheader");
   VPBasicBlock *VPBB1 = new VPBasicBlock();
+  VPBlockUtils::connectBlocks(VPBB0, VPBB1);
+
   LLVMContext C;
   auto *ScalarHeader = BasicBlock::Create(C, "");
   VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(ScalarHeader);
   VPBlockUtils::connectBlocks(VPBB1, ScalarHeaderVPBB);
-  VPlan Plan(VPBB0, VPBB1, ScalarHeaderVPBB);
+  VPlan Plan(VPBB0, ScalarHeaderVPBB);
 
   IntegerType *Int32 = IntegerType::get(C, 32);
   auto *AI = BinaryOperator::CreateAdd(PoisonValue::get(Int32),
@@ -1363,11 +1375,13 @@ TEST(VPRecipeTest, dumpRecipeInPlan) {
 TEST(VPRecipeTest, dumpRecipeUnnamedVPValuesInPlan) {
   VPBasicBlock *VPBB0 = new VPBasicBlock("preheader");
   VPBasicBlock *VPBB1 = new VPBasicBlock();
+  VPBlockUtils::connectBlocks(VPBB0, VPBB1);
+
   LLVMContext C;
   auto *ScalarHeader = BasicBlock::Create(C, "");
   VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(ScalarHeader);
   VPBlockUtils::connectBlocks(VPBB1, ScalarHeaderVPBB);
-  VPlan Plan(VPBB0, VPBB1, ScalarHeaderVPBB);
+  VPlan Plan(VPBB0, ScalarHeaderVPBB);
 
   IntegerType *Int32 = IntegerType::get(C, 32);
   auto *AI = BinaryOperator::CreateAdd(PoisonValue::get(Int32),
diff --git a/llvm/unittests/Transforms/Vectorize/VPlanVerifierTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanVerifierTest.cpp
index edb3f8a2952942..2c7566c973c4e1 100644
--- a/llvm/unittests/Transforms/Vectorize/VPlanVerifierTest.cpp
+++ b/llvm/unittests/Transforms/Vectorize/VPlanVerifierTest.cpp
@@ -32,8 +32,9 @@ TEST(VPVerifierTest, VPInstructionUseBeforeDefSameBB) {
   LLVMContext C;
   auto *ScalarHeader = BasicBlock::Create(C, "");
   VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(ScalarHeader);
+  VPBlockUtils::connectBlocks(VPPH, VPBB1);
   VPBlockUtils::connectBlocks(R1, ScalarHeaderVPBB);
-  VPlan Plan(VPPH, &*TC, VPBB1, ScalarHeaderVPBB);
+  VPlan Plan(VPPH, &*TC, ScalarHeaderVPBB);
 
 #if GTEST_HAS_STREAM_REDIRECTION
   ::testing::internal::CaptureStderr();
@@ -69,8 +70,9 @@ TEST(VPVerifierTest, VPInstructionUseBeforeDefDifferentBB) {
   LLVMContext C;
   auto *ScalarHeader = BasicBlock::Create(C, "");
   VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(ScalarHeader);
+  VPBlockUtils::connectBlocks(VPPH, VPBB1);
   VPBlockUtils::connectBlocks(R1, ScalarHeaderVPBB);
-  VPlan Plan(VPPH, &*TC, VPBB1, ScalarHeaderVPBB);
+  VPlan Plan(VPPH, &*TC, ScalarHeaderVPBB);
 
 #if GTEST_HAS_STREAM_REDIRECTION
   ::testing::internal::CaptureStderr();
@@ -116,8 +118,9 @@ TEST(VPVerifierTest, VPBlendUseBeforeDefDifferentBB) {
   auto TC = std::make_unique<VPValue>();
   auto *ScalarHeader = BasicBlock::Create(C, "");
   VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(ScalarHeader);
+  VPBlockUtils::connectBlocks(VPPH, VPBB1);
   VPBlockUtils::connectBlocks(R1, ScalarHeaderVPBB);
-  VPlan Plan(VPPH, &*TC, VPBB1, ScalarHeaderVPBB);
+  VPlan Plan(VPPH, &*TC, ScalarHeaderVPBB);
 
 #if GTEST_HAS_STREAM_REDIRECTION
   ::testing::internal::CaptureStderr();
@@ -157,8 +160,9 @@ TEST(VPVerifierTest, DuplicateSuccessorsOutsideRegion) {
   LLVMContext C;
   auto *ScalarHeader = BasicBlock::Create(C, "");
   VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(ScalarHeader);
+  VPBlockUtils::connectBlocks(VPPH, VPBB1);
   VPBlockUtils::connectBlocks(R1, ScalarHeaderVPBB);
-  VPlan Plan(VPPH, &*TC, VPBB1, ScalarHeaderVPBB);
+  VPlan Plan(VPPH, &*TC, ScalarHeaderVPBB);
 
 #if GTEST_HAS_STREAM_REDIRECTION
   ::testing::internal::CaptureStderr();
@@ -199,8 +203,9 @@ TEST(VPVerifierTest, DuplicateSuccessorsInsideRegion) {
   LLVMContext C;
   auto *ScalarHeader = BasicBlock::Create(C, "");
   VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(ScalarHeader);
+  VPBlockUtils::connectBlocks(VPPH, VPBB1);
   VPBlockUtils::connectBlocks(R1, ScalarHeaderVPBB);
-  VPlan Plan(VPPH, &*TC, VPBB1, ScalarHeaderVPBB);
+  VPlan Plan(VPPH, &*TC, ScalarHeaderVPBB);
 
 #if GTEST_HAS_STREAM_REDIRECTION
   ::testing::internal::CaptureStderr();
@@ -233,8 +238,9 @@ TEST(VPVerifierTest, BlockOutsideRegionWithParent) {
   LLVMContext C;
   auto *ScalarHeader = BasicBlock::Create(C, "");
   VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(ScalarHeader);
+  VPBlockUtils::connectBlocks(VPPH, R1);
   VPBlockUtils::connectBlocks(R1, ScalarHeaderVPBB);
-  VPlan Plan(VPPH, &*TC, VPBB1, ScalarHeaderVPBB);
+  VPlan Plan(VPPH, &*TC, ScalarHeaderVPBB);
 
 #if GTEST_HAS_STREAM_REDIRECTION
   ::testing::internal::CaptureStderr();



More information about the llvm-commits mailing list