[llvm] [VPlan] Replace VPRegionBlock with explicit CFG before execute (NFCI). (PR #117506)

Florian Hahn via llvm-commits llvm-commits at lists.llvm.org
Tue Mar 18 15:46:02 PDT 2025


https://github.com/fhahn updated https://github.com/llvm/llvm-project/pull/117506

>From d31b3f623800ba1e54a807f5656a6364abbc2058 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Tue, 18 Mar 2025 22:37:13 +0000
Subject: [PATCH] [VPlan] Replace VPRegionBlock with explicit CFG before
 execute (NFCI).

---
 .../Transforms/Vectorize/LoopVectorize.cpp    |  29 +--
 llvm/lib/Transforms/Vectorize/VPlan.cpp       | 194 ++++++++++--------
 llvm/lib/Transforms/Vectorize/VPlan.h         |   7 +
 .../lib/Transforms/Vectorize/VPlanRecipes.cpp |  13 +-
 .../Transforms/Vectorize/VPlanTransforms.cpp  |  28 +++
 .../Transforms/Vectorize/VPlanTransforms.h    |   3 +-
 6 files changed, 165 insertions(+), 109 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 0214dbb558ade..7271945289d2e 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -2433,12 +2433,6 @@ void InnerLoopVectorizer::scalarizeInstruction(const Instruction *Instr,
   // End if-block.
   VPRegionBlock *Parent = RepRecipe->getParent()->getParent();
   bool IfPredicateInstr = Parent ? Parent->isReplicator() : false;
-  assert(
-      (Parent || !RepRecipe->getParent()->getPlan()->getVectorLoopRegion() ||
-       all_of(RepRecipe->operands(),
-              [](VPValue *Op) { return Op->isDefinedOutsideLoopRegions(); })) &&
-      "Expected a recipe is either within a region or all of its operands "
-      "are defined outside the vectorized region.");
   if (IfPredicateInstr)
     PredicatedInstructions.push_back(Cloned);
 }
@@ -2947,14 +2941,18 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) {
 
   // Don't apply optimizations below when no vector region remains, as they all
   // require a vector loop at the moment.
-  if (!State.Plan->getVectorLoopRegion())
+  VPBasicBlock *HeaderVPBB = find_singleton<VPBasicBlock>(
+      vp_depth_first_shallow(State.Plan->getEntry()),
+      [&State](VPBlockBase *VPB, bool) {
+        auto *VPBB = dyn_cast<VPBasicBlock>(VPB);
+        return VPBB && VPBB->isHeader(State.VPDT) ? VPBB : nullptr;
+      });
+  if (!HeaderVPBB)
     return;
 
   for (Instruction *PI : PredicatedInstructions)
     sinkScalarOperands(&*PI);
 
-  VPRegionBlock *VectorRegion = State.Plan->getVectorLoopRegion();
-  VPBasicBlock *HeaderVPBB = VectorRegion->getEntryBasicBlock();
   BasicBlock *HeaderBB = State.CFG.VPBB2IRBB[HeaderVPBB];
 
   // Remove redundant induction instructions.
@@ -3055,7 +3053,7 @@ void InnerLoopVectorizer::sinkScalarOperands(Instruction *PredInst) {
 }
 
 void InnerLoopVectorizer::fixNonInductionPHIs(VPTransformState &State) {
-  auto Iter = vp_depth_first_deep(Plan.getEntry());
+  auto Iter = vp_depth_first_shallow(Plan.getEntry());
   for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(Iter)) {
     for (VPRecipeBase &P : VPBB->phis()) {
       VPWidenPHIRecipe *VPPhi = dyn_cast<VPWidenPHIRecipe>(&P);
@@ -7812,14 +7810,19 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
   // 2.6. Maintain Loop Hints
   // Keep all loop hints from the original loop on the vector loop (we'll
   // replace the vectorizer-specific hints below).
-  if (auto *LoopRegion = BestVPlan.getVectorLoopRegion()) {
+  auto BlockRange = vp_depth_first_shallow(BestVPlan.getEntry());
+  auto HeaderVPBBIter = find_if(BlockRange, [&State](VPBlockBase *VPB) {
+    auto *VPBB = dyn_cast<VPBasicBlock>(VPB);
+    return VPBB && VPBB->isHeader(State.VPDT) ? VPBB : nullptr;
+  });
+  if (HeaderVPBBIter != BlockRange.end()) {
+    VPBasicBlock *HeaderVPBB = cast<VPBasicBlock>(*HeaderVPBBIter);
     MDNode *OrigLoopID = OrigLoop->getLoopID();
 
     std::optional<MDNode *> VectorizedLoopID =
         makeFollowupLoopID(OrigLoopID, {LLVMLoopVectorizeFollowupAll,
                                         LLVMLoopVectorizeFollowupVectorized});
 
-    VPBasicBlock *HeaderVPBB = LoopRegion->getEntryBasicBlock();
     Loop *L = LI->getLoopFor(State.CFG.VPBB2IRBB[HeaderVPBB]);
     if (VectorizedLoopID) {
       L->setLoopID(*VectorizedLoopID);
@@ -7845,7 +7848,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
   ILV.printDebugTracesAtEnd();
 
   // 4. Adjust branch weight of the branch in the middle block.
-  if (BestVPlan.getVectorLoopRegion()) {
+  if (HeaderVPBBIter != BlockRange.end()) {
     auto *MiddleVPBB = BestVPlan.getMiddleBlock();
     auto *MiddleTerm =
         cast<BranchInst>(State.CFG.VPBB2IRBB[MiddleVPBB]->getTerminator());
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index b63cdd04530ab..220065e88faff 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -208,6 +208,11 @@ VPBlockBase *VPBlockBase::getEnclosingBlockWithPredecessors() {
   return Parent->getEnclosingBlockWithPredecessors();
 }
 
+bool VPBasicBlock::isHeader(const VPDominatorTree &VPDT) const {
+  return getPredecessors().size() == 2 &&
+         VPDT.dominates(this, getPredecessors()[1]);
+}
+
 VPBasicBlock::iterator VPBasicBlock::getFirstNonPhi() {
   iterator It = begin();
   while (It != end() && It->isPhi())
@@ -352,8 +357,7 @@ Value *VPTransformState::get(const VPValue *Def, bool NeedsScalar) {
 }
 
 BasicBlock *VPTransformState::CFGState::getPreheaderBBFor(VPRecipeBase *R) {
-  VPRegionBlock *LoopRegion = R->getParent()->getEnclosingLoopRegion();
-  return VPBB2IRBB[LoopRegion->getPreheaderVPBB()];
+  return VPBB2IRBB[cast<VPBasicBlock>(R->getParent()->getPredecessors()[0])];
 }
 
 void VPTransformState::addNewMetadata(Instruction *To,
@@ -436,7 +440,9 @@ void VPBasicBlock::connectToPredecessors(VPTransformState &State) {
   for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors()) {
     VPBasicBlock *PredVPBB = PredVPBlock->getExitingBasicBlock();
     auto &PredVPSuccessors = PredVPBB->getHierarchicalSuccessors();
-    BasicBlock *PredBB = CFG.VPBB2IRBB[PredVPBB];
+    BasicBlock *PredBB = CFG.VPBB2IRBB.lookup(PredVPBB);
+    if (!PredBB)
+      continue;
 
     assert(PredBB && "Predecessor basic-block not found building successor.");
     auto *PredBBTerminator = PredBB->getTerminator();
@@ -444,6 +450,8 @@ void VPBasicBlock::connectToPredecessors(VPTransformState &State) {
 
     auto *TermBr = dyn_cast<BranchInst>(PredBBTerminator);
     if (isa<UnreachableInst>(PredBBTerminator)) {
+      if (PredVPSuccessors.size() == 2)
+        continue;
       assert(PredVPSuccessors.size() == 1 &&
              "Predecessor ending w/o branch must have single successor.");
       DebugLoc DL = PredBBTerminator->getDebugLoc();
@@ -499,11 +507,25 @@ void VPBasicBlock::execute(VPTransformState *State) {
   bool Replica = bool(State->Lane);
   BasicBlock *NewBB = State->CFG.PrevBB; // Reuse it if possible.
 
+  if (isHeader(State->VPDT)) {
+    // Create and register the new vector loop.
+    Loop *PrevParentLoop = State->CurrentParentLoop;
+    State->CurrentParentLoop = State->LI->AllocateLoop();
+
+    // Insert the new loop into the loop nest and register the new basic blocks
+    // before calling any utilities such as SCEV that require valid LoopInfo.
+    if (PrevParentLoop)
+      PrevParentLoop->addChildLoop(State->CurrentParentLoop);
+    else
+      State->LI->addTopLevelLoop(State->CurrentParentLoop);
+  }
+
   auto IsReplicateRegion = [](VPBlockBase *BB) {
     auto *R = dyn_cast_or_null<VPRegionBlock>(BB);
-    return R && R->isReplicator();
+    assert((!R || R->isReplicator()) &&
+           "only replicate region blocks should remain");
+    return R;
   };
-
   // 1. Create an IR basic block.
   if ((Replica && this == getParent()->getEntry()) ||
       IsReplicateRegion(getSingleHierarchicalPredecessor())) {
@@ -537,6 +559,14 @@ void VPBasicBlock::execute(VPTransformState *State) {
 
   // 2. Fill the IR basic block with IR instructions.
   executeRecipes(State, NewBB);
+
+  // If this block is a latch, update CurrentParentLoop.
+  if (any_of(getSuccessors(), [State, this](VPBlockBase *Succ) {
+        auto *VPBB = dyn_cast<VPBasicBlock>(Succ);
+        return VPBB && VPBB->isHeader(State->VPDT) &&
+               State->VPDT.dominates(Succ, this);
+      }))
+    State->CurrentParentLoop = State->CurrentParentLoop->getParentLoop();
 }
 
 VPBasicBlock *VPBasicBlock::clone() {
@@ -747,35 +777,13 @@ VPRegionBlock *VPRegionBlock::clone() {
 }
 
 void VPRegionBlock::execute(VPTransformState *State) {
-  ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>>
-      RPOT(Entry);
-
-  if (!isReplicator()) {
-    // Create and register the new vector loop.
-    Loop *PrevParentLoop = State->CurrentParentLoop;
-    State->CurrentParentLoop = State->LI->AllocateLoop();
-
-    // Insert the new loop into the loop nest and register the new basic blocks
-    // before calling any utilities such as SCEV that require valid LoopInfo.
-    if (PrevParentLoop)
-      PrevParentLoop->addChildLoop(State->CurrentParentLoop);
-    else
-      State->LI->addTopLevelLoop(State->CurrentParentLoop);
-
-    // Visit the VPBlocks connected to "this", starting from it.
-    for (VPBlockBase *Block : RPOT) {
-      LLVM_DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n');
-      Block->execute(State);
-    }
-
-    State->CurrentParentLoop = PrevParentLoop;
-    return;
-  }
-
+  assert(isReplicator() &&
+         "Loop regions should have been lowered to plain CFG");
   assert(!State->Lane && "Replicating a Region with non-null instance.");
-
-  // Enter replicating mode.
   assert(!State->VF.isScalable() && "VF is assumed to be non scalable.");
+
+  ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>> RPOT(
+      Entry);
   State->Lane = VPLane(0);
   for (unsigned Lane = 0, VF = State->VF.getKnownMinValue(); Lane < VF;
        ++Lane) {
@@ -850,6 +858,22 @@ void VPRegionBlock::print(raw_ostream &O, const Twine &Indent,
 }
 #endif
 
+void VPRegionBlock::removeRegion() {
+  auto *Header = cast<VPBasicBlock>(getEntry());
+  VPBlockBase *Preheader = getSinglePredecessor();
+  auto *Exiting = cast<VPBasicBlock>(getExiting());
+
+  VPBlockBase *Middle = getSingleSuccessor();
+  VPBlockUtils::disconnectBlocks(Preheader, this);
+  VPBlockUtils::disconnectBlocks(this, Middle);
+
+  for (VPBlockBase *VPB : vp_depth_first_shallow(Entry))
+    VPB->setParent(nullptr);
+
+  VPBlockUtils::connectBlocks(Preheader, Header);
+  VPBlockUtils::connectBlocks(Exiting, Middle);
+}
+
 VPlan::VPlan(Loop *L) {
   setEntry(createVPIRBasicBlock(L->getLoopPreheader()));
   ScalarHeader = createVPIRBasicBlock(L->getHeader());
@@ -961,57 +985,57 @@ void VPlan::execute(VPTransformState *State) {
   for (VPBlockBase *Block : RPOT)
     Block->execute(State);
 
-  State->CFG.DTU.flush();
-
-  auto *LoopRegion = getVectorLoopRegion();
-  if (!LoopRegion)
-    return;
-
-  VPBasicBlock *LatchVPBB = LoopRegion->getExitingBasicBlock();
-  BasicBlock *VectorLatchBB = State->CFG.VPBB2IRBB[LatchVPBB];
-
   // Fix the latch value of canonical, reduction and first-order recurrences
   // phis in the vector loop.
-  VPBasicBlock *Header = LoopRegion->getEntryBasicBlock();
-  for (VPRecipeBase &R : Header->phis()) {
-    // Skip phi-like recipes that generate their backedege values themselves.
-    if (isa<VPWidenPHIRecipe>(&R))
+  for (VPBasicBlock *Header :
+       VPBlockUtils::blocksOnly<VPBasicBlock>(vp_depth_first_shallow(Entry))) {
+    if (!Header->isHeader(State->VPDT))
       continue;
+    for (VPRecipeBase &R : Header->phis()) {
+      if (isa<VPWidenPHIRecipe>(&R))
+        continue;
 
-    if (isa<VPWidenInductionRecipe>(&R)) {
-      PHINode *Phi = nullptr;
-      if (isa<VPWidenIntOrFpInductionRecipe>(&R)) {
-        Phi = cast<PHINode>(State->get(R.getVPSingleValue()));
-      } else {
-        auto *WidenPhi = cast<VPWidenPointerInductionRecipe>(&R);
-        assert(!WidenPhi->onlyScalarsGenerated(State->VF.isScalable()) &&
-               "recipe generating only scalars should have been replaced");
-        auto *GEP = cast<GetElementPtrInst>(State->get(WidenPhi));
-        Phi = cast<PHINode>(GEP->getPointerOperand());
+      auto *LatchVPBB = cast<VPBasicBlock>(Header->getPredecessors()[1]);
+      BasicBlock *VectorLatchBB = State->CFG.VPBB2IRBB[LatchVPBB];
+
+      if (isa<VPWidenInductionRecipe>(&R)) {
+        PHINode *Phi = nullptr;
+        if (isa<VPWidenIntOrFpInductionRecipe>(&R)) {
+          Phi = cast<PHINode>(State->get(R.getVPSingleValue()));
+        } else {
+          auto *WidenPhi = cast<VPWidenPointerInductionRecipe>(&R);
+          assert(!WidenPhi->onlyScalarsGenerated(State->VF.isScalable()) &&
+                 "recipe generating only scalars should have been replaced");
+          auto *GEP = cast<GetElementPtrInst>(State->get(WidenPhi));
+          Phi = cast<PHINode>(GEP->getPointerOperand());
+        }
+
+        Phi->setIncomingBlock(1, VectorLatchBB);
+
+        // Move the last step to the end of the latch block. This ensures
+        // consistent placement of all induction updates.
+        Instruction *Inc = cast<Instruction>(Phi->getIncomingValue(1));
+        Inc->moveBefore(
+            std::prev(VectorLatchBB->getTerminator()->getIterator()));
+
+        // Use the steps for the last part as backedge value for the induction.
+        if (auto *IV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&R))
+          Inc->setOperand(0, State->get(IV->getLastUnrolledPartOperand()));
+        continue;
       }
 
-      Phi->setIncomingBlock(1, VectorLatchBB);
-
-      // Move the last step to the end of the latch block. This ensures
-      // consistent placement of all induction updates.
-      Instruction *Inc = cast<Instruction>(Phi->getIncomingValue(1));
-      Inc->moveBefore(std::prev(VectorLatchBB->getTerminator()->getIterator()));
-
-      // Use the steps for the last part as backedge value for the induction.
-      if (auto *IV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&R))
-        Inc->setOperand(0, State->get(IV->getLastUnrolledPartOperand()));
-      continue;
+      auto *PhiR = cast<VPSingleDefRecipe>(&R);
+      // VPInstructions currently model scalar Phis only.
+      bool NeedsScalar = isa<VPInstruction>(PhiR) ||
+                         (isa<VPReductionPHIRecipe>(PhiR) &&
+                          cast<VPReductionPHIRecipe>(PhiR)->isInLoop());
+
+      Value *Phi = State->get(PhiR, NeedsScalar);
+      // VPHeaderPHIRecipe supports getBackedgeValue() but VPInstruction does
+      // not.
+      Value *Val = State->get(PhiR->getOperand(1), NeedsScalar);
+      cast<PHINode>(Phi)->addIncoming(Val, VectorLatchBB);
     }
-
-    auto *PhiR = cast<VPSingleDefRecipe>(&R);
-    // VPInstructions currently model scalar Phis only.
-    bool NeedsScalar = isa<VPInstruction>(PhiR) ||
-                       (isa<VPReductionPHIRecipe>(PhiR) &&
-                        cast<VPReductionPHIRecipe>(PhiR)->isInLoop());
-    Value *Phi = State->get(PhiR, NeedsScalar);
-    // VPHeaderPHIRecipe supports getBackedgeValue() but VPInstruction does not.
-    Value *Val = State->get(PhiR->getOperand(1), NeedsScalar);
-    cast<PHINode>(Phi)->addIncoming(Val, VectorLatchBB);
   }
 }
 
@@ -1363,16 +1387,16 @@ void VPlanPrinter::dumpRegion(const VPRegionBlock *Region) {
 
 #endif
 
-/// Returns true if there is a vector loop region and \p VPV is defined in a
-/// loop region.
-static bool isDefinedInsideLoopRegions(const VPValue *VPV) {
-  const VPRecipeBase *DefR = VPV->getDefiningRecipe();
-  return DefR && (!DefR->getParent()->getPlan()->getVectorLoopRegion() ||
-                  DefR->getParent()->getEnclosingLoopRegion());
-}
-
 bool VPValue::isDefinedOutsideLoopRegions() const {
-  return !isDefinedInsideLoopRegions(this);
+  auto *DefR = getDefiningRecipe();
+  if (!DefR)
+    return true;
+
+  const VPBasicBlock *DefVPBB = DefR->getParent();
+  auto *Plan = DefVPBB->getPlan();
+  if (Plan->getVectorLoopRegion())
+    return !DefR->getParent()->getEnclosingLoopRegion();
+  return DefVPBB == Plan->getEntry();
 }
 void VPValue::replaceAllUsesWith(VPValue *New) {
   replaceUsesWithIf(New, [](VPUser &, unsigned) { return true; });
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 80b3d2a760293..0f45914515e44 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -57,6 +57,7 @@ class SCEV;
 class Type;
 class VPBasicBlock;
 class VPBuilder;
+class VPDominatorTree;
 class VPRegionBlock;
 class VPlan;
 class VPLane;
@@ -3221,6 +3222,8 @@ class VPBasicBlock : public VPBlockBase {
   /// the cloned recipes.
   VPBasicBlock *clone() override;
 
+  bool isHeader(const VPDominatorTree &VPDT) const;
+
 protected:
   /// Execute the recipes in the IR basic block \p BB.
   void executeRecipes(VPTransformState *State, BasicBlock *BB);
@@ -3367,6 +3370,10 @@ class VPRegionBlock : public VPBlockBase {
   /// Clone all blocks in the single-entry single-exit region of the block and
   /// their recipes without updating the operands of the cloned recipes.
   VPRegionBlock *clone() override;
+
+  /// Remove the current region from its VPlan, connecting its predecessor to
+  /// its entry and exiting block to its successor.
+  void removeRegion();
 };
 
 /// VPlan models a candidate for vectorization, encoding various decisions take
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index ad6b75f4deea0..c495862057b4e 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -481,10 +481,6 @@ Value *VPInstruction::generate(VPTransformState &State) {
     return Builder.CreateCmp(getPredicate(), A, B, Name);
   }
   case Instruction::PHI: {
-    assert(getParent() ==
-               getParent()->getPlan()->getVectorLoopRegion()->getEntry() &&
-           "VPInstructions with PHI opcodes must be used for header phis only "
-           "at the moment");
     BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
     Value *Start = State.get(getOperand(0), VPLane(0));
     PHINode *Phi = State.Builder.CreatePHI(Start->getType(), 2, Name);
@@ -580,11 +576,10 @@ Value *VPInstruction::generate(VPTransformState &State) {
     CondBr->setSuccessor(0, nullptr);
     Builder.GetInsertBlock()->getTerminator()->eraseFromParent();
 
-    if (!getParent()->isExiting())
+    VPBasicBlock *Header = cast<VPBasicBlock>(getParent()->getSuccessors()[1]);
+    if (!State.CFG.VPBB2IRBB.contains(Header))
       return CondBr;
 
-    VPRegionBlock *ParentRegion = getParent()->getParent();
-    VPBasicBlock *Header = ParentRegion->getEntryBasicBlock();
     CondBr->setSuccessor(1, State.CFG.VPBB2IRBB[Header]);
     return CondBr;
   }
@@ -595,9 +590,7 @@ Value *VPInstruction::generate(VPTransformState &State) {
     Value *Cond = Builder.CreateICmpEQ(IV, TC);
 
     // Now create the branch.
-    auto *Plan = getParent()->getPlan();
-    VPRegionBlock *TopRegion = Plan->getVectorLoopRegion();
-    VPBasicBlock *Header = TopRegion->getEntry()->getEntryBasicBlock();
+    VPBasicBlock *Header = cast<VPBasicBlock>(getParent()->getSuccessors()[1]);
 
     // Replace the temporary unreachable terminator with a new conditional
     // branch, hooking it up to backward destination (the header) now and to the
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index e6f788a188052..57f766675f82f 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -992,6 +992,19 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
       TypeInfo.inferScalarType(R.getOperand(1)) ==
           TypeInfo.inferScalarType(R.getVPSingleValue()))
     return R.getVPSingleValue()->replaceAllUsesWith(R.getOperand(1));
+
+  // TODO: Split off.
+  if (auto *RepR = dyn_cast<VPReplicateRecipe>(&R)) {
+    if (!RepR->getParent()->getParent() &&
+        RepR->getParent()->getPlan()->getVectorLoopRegion()) {
+      auto *Clone = new VPReplicateRecipe(RepR->getUnderlyingInstr(),
+                                          RepR->operands(), true);
+      Clone->insertBefore(RepR);
+      RepR->replaceAllUsesWith(Clone);
+      RepR->eraseFromParent();
+      return;
+    }
+  }
 }
 
 void VPlanTransforms::simplifyRecipes(VPlan &Plan, Type &CanonicalIVTy) {
@@ -2082,6 +2095,21 @@ void VPlanTransforms::createInterleaveGroups(
 }
 
 void VPlanTransforms::convertToConcreteRecipes(VPlan &Plan) {
+  // Replace loop regions with explicity CFG.
+  SmallVector<VPRegionBlock *> LoopRegions;
+  for (VPRegionBlock *R : VPBlockUtils::blocksOnly<VPRegionBlock>(
+           vp_depth_first_deep(Plan.getEntry()))) {
+    if (!R->isReplicator())
+      LoopRegions.push_back(R);
+  }
+  for (VPRegionBlock *R : LoopRegions) {
+    VPBlockBase *Header = R->getEntry();
+    VPBlockBase *Latch = R->getExiting();
+    R->removeRegion();
+    // Add explicit backedge.
+    VPBlockUtils::connectBlocks(Latch, Header);
+  }
+
   for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
            vp_depth_first_deep(Plan.getEntry()))) {
     for (VPRecipeBase &R : make_early_inc_range(VPBB->phis())) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index 2ec07eea1ecce..1723b745d06d7 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -175,7 +175,8 @@ struct VPlanTransforms {
                                          BasicBlock *UncountableExitingBlock,
                                          VPRecipeBuilder &RecipeBuilder);
 
-  /// Lower abstract recipes to concrete ones, that can be codegen'd.
+  /// Lower abstract recipes to concrete ones, that can be codegen'd and replace
+  /// loop regions with explicit CFG.
   static void convertToConcreteRecipes(VPlan &Plan);
 
   /// Perform instcombine-like simplifications on recipes in \p Plan. Use \p



More information about the llvm-commits mailing list