[llvm] bc03d6c - [VPlan] Introduce all loop regions as VPlan transform. (NFC) (#129402)

via llvm-commits llvm-commits at lists.llvm.org
Wed Apr 16 04:30:49 PDT 2025


Author: Florian Hahn
Date: 2025-04-16T13:30:45+02:00
New Revision: bc03d6cce25712601423398350f56114e64e4e29

URL: https://github.com/llvm/llvm-project/commit/bc03d6cce25712601423398350f56114e64e4e29
DIFF: https://github.com/llvm/llvm-project/commit/bc03d6cce25712601423398350f56114e64e4e29.diff

LOG: [VPlan] Introduce all loop regions as VPlan transform. (NFC) (#129402)

Further simplify VPlan CFG builder by moving introduction of inner
regions to a VPlan transform, building on
https://github.com/llvm/llvm-project/pull/128419.

The HCFG builder now only constructs plain CFGs. I will move it to
VPlanConstruction as follow-up.

Depends on https://github.com/llvm/llvm-project/pull/128419.

PR: https://github.com/llvm/llvm-project/pull/129402

Added: 
    

Modified: 
    llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
    llvm/lib/Transforms/Vectorize/VPlan.h
    llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
    llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp
    llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.h
    llvm/lib/Transforms/Vectorize/VPlanTransforms.h
    llvm/lib/Transforms/Vectorize/VPlanValue.h
    llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll
    llvm/test/Transforms/LoopVectorize/vplan_hcfg_stress_test.ll
    llvm/unittests/Transforms/Vectorize/VPlanTestBase.h

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index af94dc01c8c5c..dd7f05465a50b 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -9544,14 +9544,14 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
           Range);
   auto Plan = std::make_unique<VPlan>(OrigLoop);
   // Build hierarchical CFG.
-  // Convert to VPlan-transform and consoliate all transforms for VPlan
+  // TODO: Convert to VPlan-transform and consolidate all transforms for VPlan
   // creation.
   VPlanHCFGBuilder HCFGBuilder(OrigLoop, LI, *Plan);
-  HCFGBuilder.buildHierarchicalCFG();
+  HCFGBuilder.buildPlainCFG();
 
-  VPlanTransforms::introduceTopLevelVectorLoopRegion(
-      *Plan, Legal->getWidestInductionType(), PSE, RequiresScalarEpilogueCheck,
-      CM.foldTailByMasking(), OrigLoop);
+  VPlanTransforms::createLoopRegions(*Plan, Legal->getWidestInductionType(),
+                                     PSE, RequiresScalarEpilogueCheck,
+                                     CM.foldTailByMasking(), OrigLoop);
 
   // Don't use getDecisionAndClampRange here, because we don't know the UF
   // so this function is better to be conservative, rather than to split
@@ -9851,10 +9851,10 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlan(VFRange &Range) {
   auto Plan = std::make_unique<VPlan>(OrigLoop);
   // Build hierarchical CFG
   VPlanHCFGBuilder HCFGBuilder(OrigLoop, LI, *Plan);
-  HCFGBuilder.buildHierarchicalCFG();
+  HCFGBuilder.buildPlainCFG();
 
-  VPlanTransforms::introduceTopLevelVectorLoopRegion(
-      *Plan, Legal->getWidestInductionType(), PSE, true, false, OrigLoop);
+  VPlanTransforms::createLoopRegions(*Plan, Legal->getWidestInductionType(),
+                                     PSE, true, false, OrigLoop);
 
   for (ElementCount VF : Range)
     Plan->addVF(VF);

diff  --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 94b5167c60089..7084676af6d5b 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -57,6 +57,7 @@ class SCEV;
 class Type;
 class VPBasicBlock;
 class VPBuilder;
+class VPDominatorTree;
 class VPRegionBlock;
 class VPlan;
 class VPLane;
@@ -303,6 +304,13 @@ class VPBlockBase {
   /// Remove all the successors of this block.
   void clearSuccessors() { Successors.clear(); }
 
+  /// Swap predecessors of the block. The block must have exactly 2
+  /// predecessors.
+  void swapPredecessors() {
+    assert(Predecessors.size() == 2 && "must have 2 predecessors to swap");
+    std::swap(Predecessors[0], Predecessors[1]);
+  }
+
   /// Swap successors of the block. The block must have exactly 2 successors.
   // TODO: This should be part of introducing conditional branch recipes rather
   // than being independent.

diff  --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
index f58f0290b5fa9..1e687d0879f18 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
@@ -14,26 +14,88 @@
 #include "LoopVectorizationPlanner.h"
 #include "VPlan.h"
 #include "VPlanCFG.h"
+#include "VPlanDominatorTree.h"
 #include "VPlanTransforms.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/ScalarEvolution.h"
 
 using namespace llvm;
 
-void VPlanTransforms::introduceTopLevelVectorLoopRegion(
-    VPlan &Plan, Type *InductionTy, PredicatedScalarEvolution &PSE,
-    bool RequiresScalarEpilogueCheck, bool TailFolded, Loop *TheLoop) {
-  // TODO: Generalize to introduce all loop regions.
-  auto *HeaderVPBB = cast<VPBasicBlock>(Plan.getEntry()->getSingleSuccessor());
-  VPBlockUtils::disconnectBlocks(Plan.getEntry(), HeaderVPBB);
+/// Checks if \p HeaderVPB is a loop header block in the plain CFG; that is, it
+/// has exactly 2 predecessors (preheader and latch), where the block
+/// dominates the latch and the preheader dominates the block. If it is a
+/// header block return true, making sure the preheader appears first and
+/// the latch second. Otherwise return false.
+static bool canonicalHeader(VPBlockBase *HeaderVPB,
+                            const VPDominatorTree &VPDT) {
+  ArrayRef<VPBlockBase *> Preds = HeaderVPB->getPredecessors();
+  if (Preds.size() != 2)
+    return false;
 
-  VPBasicBlock *OriginalLatch =
-      cast<VPBasicBlock>(HeaderVPBB->getSinglePredecessor());
-  VPBlockUtils::disconnectBlocks(OriginalLatch, HeaderVPBB);
-  VPBasicBlock *VecPreheader = Plan.createVPBasicBlock("vector.ph");
-  VPBlockUtils::connectBlocks(Plan.getEntry(), VecPreheader);
-  assert(OriginalLatch->getNumSuccessors() == 0 &&
-         "Plan should end at top level latch");
+  auto *PreheaderVPBB = Preds[0];
+  auto *LatchVPBB = Preds[1];
+  if (VPDT.dominates(PreheaderVPBB, HeaderVPB) &&
+      VPDT.dominates(HeaderVPB, LatchVPBB))
+    return true;
+
+  std::swap(PreheaderVPBB, LatchVPBB);
+
+  if (VPDT.dominates(PreheaderVPBB, HeaderVPB) &&
+      VPDT.dominates(HeaderVPB, LatchVPBB)) {
+    // Canonicalize predecessors of header so that preheader is first and latch
+    // second.
+    HeaderVPB->swapPredecessors();
+    for (VPRecipeBase &R : cast<VPBasicBlock>(HeaderVPB)->phis())
+      R.swapOperands();
+    return true;
+  }
+
+  return false;
+}
+
+/// Create a new VPRegionBlock for the loop starting at \p HeaderVPB.
+static void createLoopRegion(VPlan &Plan, VPBlockBase *HeaderVPB) {
+  auto *PreheaderVPBB = HeaderVPB->getPredecessors()[0];
+  auto *LatchVPBB = HeaderVPB->getPredecessors()[1];
+
+  VPBlockUtils::disconnectBlocks(PreheaderVPBB, HeaderVPB);
+  VPBlockUtils::disconnectBlocks(LatchVPBB, HeaderVPB);
+  VPBlockBase *Succ = LatchVPBB->getSingleSuccessor();
+  assert(LatchVPBB->getNumSuccessors() <= 1 &&
+         "Latch has more than one successor");
+  if (Succ)
+    VPBlockUtils::disconnectBlocks(LatchVPBB, Succ);
+
+  auto *R = Plan.createVPRegionBlock(HeaderVPB, LatchVPBB, "",
+                                     false /*isReplicator*/);
+  R->setParent(HeaderVPB->getParent());
+  // All VPBB's reachable shallowly from HeaderVPB belong to top level loop,
+  // because VPlan is expected to end at top level latch disconnected above.
+  for (VPBlockBase *VPBB : vp_depth_first_shallow(HeaderVPB))
+    VPBB->setParent(R);
+
+  VPBlockUtils::insertBlockAfter(R, PreheaderVPBB);
+  if (Succ)
+    VPBlockUtils::connectBlocks(R, Succ);
+}
+
+void VPlanTransforms::createLoopRegions(VPlan &Plan, Type *InductionTy,
+                                        PredicatedScalarEvolution &PSE,
+                                        bool RequiresScalarEpilogueCheck,
+                                        bool TailFolded, Loop *TheLoop) {
+  VPDominatorTree VPDT;
+  VPDT.recalculate(Plan);
+  for (VPBlockBase *HeaderVPB : vp_depth_first_shallow(Plan.getEntry()))
+    if (canonicalHeader(HeaderVPB, VPDT))
+      createLoopRegion(Plan, HeaderVPB);
+
+  VPRegionBlock *TopRegion = Plan.getVectorLoopRegion();
+  auto *OrigExiting = TopRegion->getExiting();
+  VPBasicBlock *LatchVPBB = Plan.createVPBasicBlock("vector.latch");
+  VPBlockUtils::insertBlockAfter(LatchVPBB, OrigExiting);
+  TopRegion->setExiting(LatchVPBB);
+  TopRegion->setName("vector loop");
+  TopRegion->getEntryBasicBlock()->setName("vector.body");
 
   // Create SCEV and VPValue for the trip count.
   // We use the symbolic max backedge-taken-count, which works also when
@@ -47,18 +109,9 @@ void VPlanTransforms::introduceTopLevelVectorLoopRegion(
   Plan.setTripCount(
       vputils::getOrCreateVPValueForSCEVExpr(Plan, TripCount, SE));
 
-  // Create VPRegionBlock, with existing header and new empty latch block, to be
-  // filled.
-  VPBasicBlock *LatchVPBB = Plan.createVPBasicBlock("vector.latch");
-  VPBlockUtils::insertBlockAfter(LatchVPBB, OriginalLatch);
-  auto *TopRegion = Plan.createVPRegionBlock(
-      HeaderVPBB, LatchVPBB, "vector loop", false /*isReplicator*/);
-  // All VPBB's reachable shallowly from HeaderVPBB belong to top level loop,
-  // because VPlan is expected to end at top level latch.
-  for (VPBlockBase *VPBB : vp_depth_first_shallow(HeaderVPBB))
-    VPBB->setParent(TopRegion);
-
-  VPBlockUtils::insertBlockAfter(TopRegion, VecPreheader);
+  VPBasicBlock *VecPreheader = Plan.createVPBasicBlock("vector.ph");
+  VPBlockUtils::insertBlockAfter(VecPreheader, Plan.getEntry());
+
   VPBasicBlock *MiddleVPBB = Plan.createVPBasicBlock("middle.block");
   VPBlockUtils::insertBlockAfter(MiddleVPBB, TopRegion);
 

diff  --git a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp
index 4b8a2420b3037..5bacd2d4e6d88 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp
@@ -12,9 +12,7 @@
 /// components and steps:
 //
 /// 1. PlainCFGBuilder class: builds a plain VPBasicBlock-based CFG that
-/// faithfully represents the CFG in the incoming IR. A VPRegionBlock (Top
-/// Region) is created to enclose and serve as parent of all the VPBasicBlocks
-/// in the plain CFG.
+/// faithfully represents the CFG in the incoming IR.
 /// NOTE: At this point, there is a direct correspondence between all the
 /// VPBasicBlocks created for the initial plain CFG and the incoming
 /// BasicBlocks. However, this might change in the future.
@@ -57,12 +55,8 @@ class PlainCFGBuilder {
   // Hold phi node's that need to be fixed once the plain CFG has been built.
   SmallVector<PHINode *, 8> PhisToFix;
 
-  /// Maps loops in the original IR to their corresponding region.
-  DenseMap<Loop *, VPRegionBlock *> Loop2Region;
-
   // Utility functions.
   void setVPBBPredsFromBB(VPBasicBlock *VPBB, BasicBlock *BB);
-  void setRegionPredsFromBB(VPRegionBlock *VPBB, BasicBlock *BB);
   void fixHeaderPhis();
   VPBasicBlock *getOrCreateVPBB(BasicBlock *BB);
 #ifndef NDEBUG
@@ -83,25 +77,6 @@ class PlainCFGBuilder {
 // Set predecessors of \p VPBB in the same order as they are in \p BB. \p VPBB
 // must have no predecessors.
 void PlainCFGBuilder::setVPBBPredsFromBB(VPBasicBlock *VPBB, BasicBlock *BB) {
-  auto GetLatchOfExit = [this](BasicBlock *BB) -> BasicBlock * {
-    auto *SinglePred = BB->getSinglePredecessor();
-    Loop *LoopForBB = LI->getLoopFor(BB);
-    if (!SinglePred || LI->getLoopFor(SinglePred) == LoopForBB)
-      return nullptr;
-    // The input IR must be in loop-simplify form, ensuring a single predecessor
-    // for exit blocks.
-    assert(SinglePred == LI->getLoopFor(SinglePred)->getLoopLatch() &&
-           "SinglePred must be the only loop latch");
-    return SinglePred;
-  };
-  if (auto *LatchBB = GetLatchOfExit(BB)) {
-    auto *PredRegion = getOrCreateVPBB(LatchBB)->getParent();
-    assert(VPBB == cast<VPBasicBlock>(PredRegion->getSingleSuccessor()) &&
-           "successor must already be set for PredRegion; it must have VPBB "
-           "as single successor");
-    VPBB->setPredecessors({PredRegion});
-    return;
-  }
   // Collect VPBB predecessors.
   SmallVector<VPBlockBase *, 2> VPBBPreds;
   for (BasicBlock *Pred : predecessors(BB))
@@ -113,13 +88,6 @@ static bool isHeaderBB(BasicBlock *BB, Loop *L) {
   return L && BB == L->getHeader();
 }
 
-void PlainCFGBuilder::setRegionPredsFromBB(VPRegionBlock *Region,
-                                           BasicBlock *BB) {
-  // BB is a loop header block. Connect the region to the loop preheader.
-  Loop *LoopOfBB = LI->getLoopFor(BB);
-  Region->setPredecessors({getOrCreateVPBB(LoopOfBB->getLoopPredecessor())});
-}
-
 // Add operands to VPInstructions representing phi nodes from the input IR.
 void PlainCFGBuilder::fixHeaderPhis() {
   for (auto *Phi : PhisToFix) {
@@ -130,43 +98,18 @@ void PlainCFGBuilder::fixHeaderPhis() {
     auto *VPPhi = cast<VPWidenPHIRecipe>(VPVal);
     assert(VPPhi->getNumOperands() == 0 &&
            "Expected VPInstruction with no operands.");
-
-    Loop *L = LI->getLoopFor(Phi->getParent());
-    assert(isHeaderBB(Phi->getParent(), L));
-    // For header phis, make sure the incoming value from the loop
-    // predecessor is the first operand of the recipe.
+    assert(isHeaderBB(Phi->getParent(), LI->getLoopFor(Phi->getParent())) &&
+           "Expected Phi in header block.");
     assert(Phi->getNumOperands() == 2 &&
            "header phi must have exactly 2 operands");
-    BasicBlock *LoopPred = L->getLoopPredecessor();
-    VPPhi->addOperand(
-        getOrCreateVPOperand(Phi->getIncomingValueForBlock(LoopPred)));
-    BasicBlock *LoopLatch = L->getLoopLatch();
-    VPPhi->addOperand(
-        getOrCreateVPOperand(Phi->getIncomingValueForBlock(LoopLatch)));
-  }
-}
-
-static bool isHeaderVPBB(VPBasicBlock *VPBB) {
-  return VPBB->getParent() && VPBB->getParent()->getEntry() == VPBB;
-}
-
-/// Return true of \p L loop is contained within \p OuterLoop.
-static bool doesContainLoop(const Loop *L, const Loop *OuterLoop) {
-  if (L->getLoopDepth() < OuterLoop->getLoopDepth())
-    return false;
-  const Loop *P = L;
-  while (P) {
-    if (P == OuterLoop)
-      return true;
-    P = P->getParentLoop();
+    for (BasicBlock *Pred : predecessors(Phi->getParent()))
+      VPPhi->addOperand(
+          getOrCreateVPOperand(Phi->getIncomingValueForBlock(Pred)));
   }
-  return false;
 }
 
-// Create a new empty VPBasicBlock for an incoming BasicBlock in the region
-// corresponding to the containing loop  or retrieve an existing one if it was
-// already created. If no region exists yet for the loop containing \p BB, a new
-// one is created.
+// Create a new empty VPBasicBlock for an incoming BasicBlock or retrieve an
+// existing one if it was already created.
 VPBasicBlock *PlainCFGBuilder::getOrCreateVPBB(BasicBlock *BB) {
   if (auto *VPBB = BB2VPBB.lookup(BB)) {
     // Retrieve existing VPBB.
@@ -174,32 +117,10 @@ VPBasicBlock *PlainCFGBuilder::getOrCreateVPBB(BasicBlock *BB) {
   }
 
   // Create new VPBB.
-  StringRef Name = isHeaderBB(BB, TheLoop) ? "vector.body" : BB->getName();
+  StringRef Name = BB->getName();
   LLVM_DEBUG(dbgs() << "Creating VPBasicBlock for " << Name << "\n");
   VPBasicBlock *VPBB = Plan.createVPBasicBlock(Name);
   BB2VPBB[BB] = VPBB;
-
-  // Get or create a region for the loop containing BB, except for the top
-  // region of TheLoop which is created later.
-  Loop *LoopOfBB = LI->getLoopFor(BB);
-  if (!LoopOfBB || LoopOfBB == TheLoop || !doesContainLoop(LoopOfBB, TheLoop))
-    return VPBB;
-
-  auto *RegionOfVPBB = Loop2Region.lookup(LoopOfBB);
-  if (!isHeaderBB(BB, LoopOfBB)) {
-    assert(RegionOfVPBB &&
-           "Region should have been created by visiting header earlier");
-    VPBB->setParent(RegionOfVPBB);
-    return VPBB;
-  }
-
-  assert(!RegionOfVPBB &&
-         "First visit of a header basic block expects to register its region.");
-  // Handle a header - take care of its Region.
-  RegionOfVPBB = Plan.createVPRegionBlock(Name.str(), false /*isReplicator*/);
-  RegionOfVPBB->setParent(Loop2Region[LoopOfBB->getParentLoop()]);
-  RegionOfVPBB->setEntry(VPBB);
-  Loop2Region[LoopOfBB] = RegionOfVPBB;
   return VPBB;
 }
 
@@ -351,6 +272,8 @@ void PlainCFGBuilder::createVPInstructionsForVPBB(VPBasicBlock *VPBB,
 // Main interface to build the plain CFG.
 void PlainCFGBuilder::buildPlainCFG(
     DenseMap<VPBlockBase *, BasicBlock *> &VPB2IRBB) {
+  VPIRBasicBlock *Entry = cast<VPIRBasicBlock>(Plan.getEntry());
+  BB2VPBB[Entry->getIRBasicBlock()] = Entry;
 
   // 1. Scan the body of the loop in a topological order to visit each basic
   // block after having visited its predecessor basic blocks. Create a VPBB for
@@ -376,26 +299,13 @@ void PlainCFGBuilder::buildPlainCFG(
   for (BasicBlock *BB : RPO) {
     // Create or retrieve the VPBasicBlock for this BB.
     VPBasicBlock *VPBB = getOrCreateVPBB(BB);
-    VPRegionBlock *Region = VPBB->getParent();
     Loop *LoopForBB = LI->getLoopFor(BB);
     // Set VPBB predecessors in the same order as they are in the incoming BB.
-    if (!isHeaderBB(BB, LoopForBB)) {
-      setVPBBPredsFromBB(VPBB, BB);
-    } else if (Region) {
-      // BB is a loop header and there's a corresponding region, set the
-      // predecessor for it.
-      setRegionPredsFromBB(Region, BB);
-    }
+    setVPBBPredsFromBB(VPBB, BB);
 
     // Create VPInstructions for BB.
     createVPInstructionsForVPBB(VPBB, BB);
 
-    if (BB == TheLoop->getLoopLatch()) {
-      VPBasicBlock *HeaderVPBB = getOrCreateVPBB(LoopForBB->getHeader());
-      VPBlockUtils::connectBlocks(VPBB, HeaderVPBB);
-      continue;
-    }
-
     // Set VPBB successors. We create empty VPBBs for successors if they don't
     // exist already. Recipes will be created when the successor is visited
     // during the RPO traversal.
@@ -410,10 +320,7 @@ void PlainCFGBuilder::buildPlainCFG(
     auto *BI = cast<BranchInst>(BB->getTerminator());
     unsigned NumSuccs = succ_size(BB);
     if (NumSuccs == 1) {
-      auto *Successor = getOrCreateVPBB(BB->getSingleSuccessor());
-      VPBB->setOneSuccessor(isHeaderVPBB(Successor)
-                                ? Successor->getParent()
-                                : static_cast<VPBlockBase *>(Successor));
+      VPBB->setOneSuccessor(getOrCreateVPBB(BB->getSingleSuccessor()));
       continue;
     }
     assert(BI->isConditional() && NumSuccs == 2 && BI->isConditional() &&
@@ -423,21 +330,11 @@ void PlainCFGBuilder::buildPlainCFG(
     BasicBlock *IRSucc1 = BI->getSuccessor(1);
     VPBasicBlock *Successor0 = getOrCreateVPBB(IRSucc0);
     VPBasicBlock *Successor1 = getOrCreateVPBB(IRSucc1);
-    if (BB == LoopForBB->getLoopLatch()) {
-      // For a latch we need to set the successor of the region rather than that
-      // of VPBB and it should be set to the exit, i.e., non-header successor,
-      // except for the top region, which is handled elsewhere.
-      assert(LoopForBB != TheLoop &&
-             "Latch of the top region should have been handled earlier");
-      Region->setOneSuccessor(isHeaderVPBB(Successor0) ? Successor1
-                                                       : Successor0);
-      Region->setExiting(VPBB);
-      continue;
-    }
 
-    // Don't connect any blocks outside the current loop except the latch for
-    // now. The latch is handled above.
-    if (LoopForBB) {
+    // Don't connect any blocks outside the current loop except the latches for
+    // inner loops.
+    // TODO: Also connect exit blocks during initial VPlan construction.
+    if (LoopForBB == TheLoop || BB != LoopForBB->getLoopLatch()) {
       if (!LoopForBB->contains(IRSucc0)) {
         VPBB->setOneSuccessor(Successor1);
         continue;
@@ -456,21 +353,16 @@ void PlainCFGBuilder::buildPlainCFG(
   // corresponding VPlan operands.
   fixHeaderPhis();
 
-  VPBlockUtils::connectBlocks(Plan.getEntry(),
-                              getOrCreateVPBB(TheLoop->getHeader()));
+  Plan.getEntry()->setOneSuccessor(getOrCreateVPBB(TheLoop->getHeader()));
+  Plan.getEntry()->setPlan(&Plan);
 
   for (const auto &[IRBB, VPB] : BB2VPBB)
     VPB2IRBB[VPB] = IRBB;
+
+  LLVM_DEBUG(Plan.setName("Plain CFG\n"); dbgs() << Plan);
 }
 
 void VPlanHCFGBuilder::buildPlainCFG() {
   PlainCFGBuilder PCFGBuilder(TheLoop, LI, Plan);
   PCFGBuilder.buildPlainCFG(VPB2IRBB);
 }
-
-// Public interface to build a H-CFG.
-void VPlanHCFGBuilder::buildHierarchicalCFG() {
-  // Build Top Region enclosing the plain CFG.
-  buildPlainCFG();
-  LLVM_DEBUG(Plan.setName("HCFGBuilder: Plain CFG\n"); dbgs() << Plan);
-}

diff  --git a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.h b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.h
index f7f98ed7b1755..f2e90d3f4d9b3 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.h
@@ -30,7 +30,6 @@ namespace llvm {
 
 class Loop;
 class LoopInfo;
-class VPRegionBlock;
 class VPlan;
 class VPlanTestIRBase;
 class VPBlockBase;
@@ -54,15 +53,12 @@ class VPlanHCFGBuilder {
   /// created for a input IR basic block.
   DenseMap<VPBlockBase *, BasicBlock *> VPB2IRBB;
 
-  /// Build plain CFG for TheLoop and connects it to Plan's entry.
-  void buildPlainCFG();
-
 public:
   VPlanHCFGBuilder(Loop *Lp, LoopInfo *LI, VPlan &P)
       : TheLoop(Lp), LI(LI), Plan(P) {}
 
-  /// Build H-CFG for TheLoop and update Plan accordingly.
-  void buildHierarchicalCFG();
+  /// Build plain CFG for TheLoop and connects it to Plan's entry.
+  void buildPlainCFG();
 
   /// Return the input IR BasicBlock corresponding to \p VPB. Returns nullptr if
   /// there is no such corresponding block.

diff  --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index ee3642a8aff73..a9461b261ddb6 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -52,20 +52,19 @@ struct VPlanTransforms {
       verifyVPlanIsValid(Plan);
   }
 
-  /// Introduce the top-level VPRegionBlock for the main loop in \p Plan. Coming
-  /// into this function, \p Plan's top-level loop is modeled using a plain CFG.
-  /// This transform wraps the plain CFG of the top-level loop within a
-  /// VPRegionBlock and creates a VPValue expression for the original trip
-  /// count. It will also introduce a dedicated VPBasicBlock for the vector
-  /// pre-header as well a VPBasicBlock as exit block of the region
-  /// (middle.block). If a check is needed to guard executing the scalar
+  /// Replace loops in \p Plan's flat CFG with VPRegionBlocks, turing \p Plan's
+  /// flat CFG into a hierarchical CFG. It also creates a VPValue expression for
+  /// the original trip count. It will also introduce a dedicated VPBasicBlock
+  /// for the vector pre-header as well a VPBasicBlock as exit block of the
+  /// region (middle.block). If a check is needed to guard executing the scalar
   /// epilogue loop, it will be added to the middle block, together with
   /// VPBasicBlocks for the scalar preheader and exit blocks. \p InductionTy is
   /// the type of the canonical induction and used for related values, like the
   /// trip count expression.
-  static void introduceTopLevelVectorLoopRegion(
-      VPlan &Plan, Type *InductionTy, PredicatedScalarEvolution &PSE,
-      bool RequiresScalarEpilogueCheck, bool TailFolded, Loop *TheLoop);
+  static void createLoopRegions(VPlan &Plan, Type *InductionTy,
+                                PredicatedScalarEvolution &PSE,
+                                bool RequiresScalarEpilogueCheck,
+                                bool TailFolded, Loop *TheLoop);
 
   /// Replaces the VPInstructions in \p Plan with corresponding
   /// widen recipes. Returns false if any VPInstructions could not be converted

diff  --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h
index ced60a30ad56e..638156eab7a84 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanValue.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h
@@ -246,6 +246,12 @@ class VPUser {
     New->addUser(*this);
   }
 
+  /// Swap operands of the VPUser. It must have exactly 2 operands.
+  void swapOperands() {
+    assert(Operands.size() == 2 && "must have 2 operands to swap");
+    std::swap(Operands[0], Operands[1]);
+  }
+
   /// Replaces all uses of \p From in the VPUser with \p To.
   void replaceUsesOfWith(VPValue *From, VPValue *To);
 

diff  --git a/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll
index 625a32c098f94..91a5ea6b7fe36 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll
@@ -6,35 +6,32 @@
 @arr = external global [8 x [8 x i64]], align 16
 
 define void @foo(i64 %n) {
-; CHECK:      VPlan 'HCFGBuilder: Plain CFG
+; CHECK:      VPlan 'Plain CFG
 ; CHECK-NEXT: {
 ; CHECK-EMPTY:
 ; CHECK-NEXT: ir-bb<entry>:
-; CHECK-NEXT: Successor(s): vector.body
+; CHECK-NEXT: Successor(s): outer.header
 ; CHECK-EMPTY:
-; CHECK-NEXT: vector.body:
-; CHECK-NEXT:   WIDEN-PHI ir<%outer.iv> = phi ir<0>, ir<%outer.iv.next>
+; CHECK-NEXT: outer.header:
+; CHECK-NEXT:   WIDEN-PHI ir<%outer.iv> = phi ir<%outer.iv.next>, ir<0>
 ; CHECK-NEXT:   EMIT ir<%gep.1> = getelementptr ir<@arr2>, ir<0>, ir<%outer.iv>
 ; CHECK-NEXT:   EMIT store ir<%outer.iv>, ir<%gep.1>
 ; CHECK-NEXT:   EMIT ir<%add> = add ir<%outer.iv>, ir<%n>
 ; CHECK-NEXT: Successor(s): inner
 ; CHECK-EMPTY:
-; CHECK-NEXT: <x1> inner: {
-; CHECK-NEXT:   inner:
-; CHECK-NEXT:     WIDEN-PHI ir<%inner.iv> = phi ir<0>, ir<%inner.iv.next>
-; CHECK-NEXT:     EMIT ir<%gep.2> = getelementptr ir<@arr>, ir<0>, ir<%inner.iv>, ir<%outer.iv>
-; CHECK-NEXT:     EMIT store ir<%add>, ir<%gep.2>
-; CHECK-NEXT:     EMIT ir<%inner.iv.next> = add ir<%inner.iv>, ir<1>
-; CHECK-NEXT:     EMIT ir<%inner.ec> = icmp ir<%inner.iv.next>, ir<8>
-; CHECK-NEXT:     EMIT branch-on-cond ir<%inner.ec>
-; CHECK-NEXT:   No successors
-; CHECK-NEXT: }
-; CHECK-NEXT: Successor(s): outer.latch
+; CHECK-NEXT: inner:
+; CHECK-NEXT:   WIDEN-PHI ir<%inner.iv> = phi ir<%inner.iv.next>, ir<0>
+; CHECK-NEXT:   EMIT ir<%gep.2> = getelementptr ir<@arr>, ir<0>, ir<%inner.iv>, ir<%outer.iv>
+; CHECK-NEXT:   EMIT store ir<%add>, ir<%gep.2>
+; CHECK-NEXT:   EMIT ir<%inner.iv.next> = add ir<%inner.iv>, ir<1>
+; CHECK-NEXT:   EMIT ir<%inner.ec> = icmp ir<%inner.iv.next>, ir<8>
+; CHECK-NEXT:   EMIT branch-on-cond ir<%inner.ec>
+; CHECK-NEXT: Successor(s): outer.latch, inner
 ; CHECK-EMPTY:
 ; CHECK-NEXT: outer.latch:
 ; CHECK-NEXT:   EMIT ir<%outer.iv.next> = add ir<%outer.iv>, ir<1>
 ; CHECK-NEXT:   EMIT ir<%outer.ec> = icmp ir<%outer.iv.next>, ir<8>
-; CHECK-NEXT: Successor(s): vector.body
+; CHECK-NEXT: Successor(s): outer.header
 ; CHECK-NEXT: }
 entry:
   br label %outer.header

diff  --git a/llvm/test/Transforms/LoopVectorize/vplan_hcfg_stress_test.ll b/llvm/test/Transforms/LoopVectorize/vplan_hcfg_stress_test.ll
index 89eaca0cfa8c8..29aeb7c4e97f9 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan_hcfg_stress_test.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan_hcfg_stress_test.ll
@@ -4,7 +4,7 @@
 ; Verify that the stress testing flag for the VPlan H-CFG builder works as
 ; expected with and without enabling the VPlan H-CFG Verifier.
 
-; CHECK: VPlan 'HCFGBuilder: Plain CFG
+; CHECK: VPlan 'Plain CFG
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 

diff  --git a/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h b/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h
index caf5d2357411d..486296535996b 100644
--- a/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h
+++ b/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h
@@ -73,9 +73,9 @@ class VPlanTestIRBase : public testing::Test {
     PredicatedScalarEvolution PSE(*SE, *L);
     auto Plan = std::make_unique<VPlan>(L);
     VPlanHCFGBuilder HCFGBuilder(L, LI.get(), *Plan);
-    HCFGBuilder.buildHierarchicalCFG();
-    VPlanTransforms::introduceTopLevelVectorLoopRegion(
-        *Plan, IntegerType::get(*Ctx, 64), PSE, true, false, L);
+    HCFGBuilder.buildPlainCFG();
+    VPlanTransforms::createLoopRegions(*Plan, IntegerType::get(*Ctx, 64), PSE,
+                                       true, false, L);
     return Plan;
   }
 };


        


More information about the llvm-commits mailing list