[llvm] [VPlan] Introduce child regions as VPlan transform. (PR #129402)

Florian Hahn via llvm-commits llvm-commits at lists.llvm.org
Sun Mar 30 07:58:22 PDT 2025


https://github.com/fhahn updated https://github.com/llvm/llvm-project/pull/129402

>From 4d6cabdbed0c85b586add3eb932942de11764908 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Sat, 1 Mar 2025 21:07:00 +0000
Subject: [PATCH] [VPlan] Introduce child regions as VPlan transform.

Further simplify VPlan CFG builder by moving introduction of inner
regions to a VPlan transform, building on
https://github.com/llvm/llvm-project/pull/128419

The HCFG builder now only constructs plain CFGs. I will move it to
VPlanConstruction as follow-up.
---
 .../Transforms/Vectorize/LoopVectorize.cpp    |   6 +-
 .../Vectorize/VPlanConstruction.cpp           |  33 ++++++
 .../Transforms/Vectorize/VPlanHCFGBuilder.cpp | 104 ++----------------
 .../Transforms/Vectorize/VPlanHCFGBuilder.h   |   8 +-
 .../vplan-printing-outer-loop.ll              |  21 ++--
 .../LoopVectorize/vplan_hcfg_stress_test.ll   |   2 +-
 .../Transforms/Vectorize/VPlanTestBase.h      |   2 +-
 7 files changed, 59 insertions(+), 117 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 4b4a56be19fe5..a541b4772f868 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -9308,10 +9308,10 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
           Range);
   auto Plan = std::make_unique<VPlan>(OrigLoop);
   // Build hierarchical CFG.
-  // Convert to VPlan-transform and consoliate all transforms for VPlan
+  // TODO: Convert to VPlan-transform and consoliate all transforms for VPlan
   // creation.
   VPlanHCFGBuilder HCFGBuilder(OrigLoop, LI, *Plan);
-  HCFGBuilder.buildHierarchicalCFG();
+  HCFGBuilder.buildPlainCFG();
 
   VPlanTransforms::introduceTopLevelVectorLoopRegion(
       *Plan, Legal->getWidestInductionType(), PSE, RequiresScalarEpilogueCheck,
@@ -9615,7 +9615,7 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlan(VFRange &Range) {
   auto Plan = std::make_unique<VPlan>(OrigLoop);
   // Build hierarchical CFG
   VPlanHCFGBuilder HCFGBuilder(OrigLoop, LI, *Plan);
-  HCFGBuilder.buildHierarchicalCFG();
+  HCFGBuilder.buildPlainCFG();
 
   VPlanTransforms::introduceTopLevelVectorLoopRegion(
       *Plan, Legal->getWidestInductionType(), PSE, true, false, OrigLoop);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
index f58f0290b5fa9..ba68ec99b0c74 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
@@ -14,12 +14,43 @@
 #include "LoopVectorizationPlanner.h"
 #include "VPlan.h"
 #include "VPlanCFG.h"
+#include "VPlanDominatorTree.h"
 #include "VPlanTransforms.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/ScalarEvolution.h"
 
 using namespace llvm;
 
+/// Introduce VPRegionBlocks for each loop modeled using a plain CFG in \p Plan.
+static void introduceInnerLoopRegions(VPlan &Plan) {
+  VPDominatorTree VPDT;
+  VPDT.recalculate(Plan);
+
+  for (VPBlockBase *HeaderVPBB :
+       vp_depth_first_shallow(Plan.getVectorLoopRegion()->getEntry())) {
+    if (HeaderVPBB->getNumPredecessors() != 2)
+      continue;
+    VPBlockBase *PreheaderVPBB = HeaderVPBB->getPredecessors()[0];
+    VPBlockBase *LatchVPBB = HeaderVPBB->getPredecessors()[1];
+    if (!VPDT.dominates(HeaderVPBB, LatchVPBB))
+      continue;
+    assert(VPDT.dominates(PreheaderVPBB, HeaderVPBB) &&
+           "preheader must dominate header");
+    VPBlockUtils::disconnectBlocks(PreheaderVPBB, HeaderVPBB);
+    VPBlockUtils::disconnectBlocks(LatchVPBB, HeaderVPBB);
+    VPBlockBase *Succ = LatchVPBB->getSingleSuccessor();
+    VPBlockUtils::disconnectBlocks(LatchVPBB, Succ);
+
+    auto *R = Plan.createVPRegionBlock(HeaderVPBB, LatchVPBB, "",
+                                       false /*isReplicator*/);
+    for (VPBlockBase *VPBB : vp_depth_first_shallow(HeaderVPBB))
+      VPBB->setParent(R);
+
+    VPBlockUtils::insertBlockAfter(R, PreheaderVPBB);
+    VPBlockUtils::connectBlocks(R, Succ);
+  }
+}
+
 void VPlanTransforms::introduceTopLevelVectorLoopRegion(
     VPlan &Plan, Type *InductionTy, PredicatedScalarEvolution &PSE,
     bool RequiresScalarEpilogueCheck, bool TailFolded, Loop *TheLoop) {
@@ -98,4 +129,6 @@ void VPlanTransforms::introduceTopLevelVectorLoopRegion(
                                ScalarLatchTerm->getDebugLoc(), "cmp.n");
   Builder.createNaryOp(VPInstruction::BranchOnCond, {Cmp},
                        ScalarLatchTerm->getDebugLoc());
+
+  introduceInnerLoopRegions(Plan);
 }
diff --git a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp
index 4b8a2420b3037..5e31b09bcd7d3 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp
@@ -12,9 +12,7 @@
 /// components and steps:
 //
 /// 1. PlainCFGBuilder class: builds a plain VPBasicBlock-based CFG that
-/// faithfully represents the CFG in the incoming IR. A VPRegionBlock (Top
-/// Region) is created to enclose and serve as parent of all the VPBasicBlocks
-/// in the plain CFG.
+/// faithfully represents the CFG in the incoming IR.
 /// NOTE: At this point, there is a direct correspondence between all the
 /// VPBasicBlocks created for the initial plain CFG and the incoming
 /// BasicBlocks. However, this might change in the future.
@@ -57,12 +55,8 @@ class PlainCFGBuilder {
   // Hold phi node's that need to be fixed once the plain CFG has been built.
   SmallVector<PHINode *, 8> PhisToFix;
 
-  /// Maps loops in the original IR to their corresponding region.
-  DenseMap<Loop *, VPRegionBlock *> Loop2Region;
-
   // Utility functions.
   void setVPBBPredsFromBB(VPBasicBlock *VPBB, BasicBlock *BB);
-  void setRegionPredsFromBB(VPRegionBlock *VPBB, BasicBlock *BB);
   void fixHeaderPhis();
   VPBasicBlock *getOrCreateVPBB(BasicBlock *BB);
 #ifndef NDEBUG
@@ -83,25 +77,6 @@ class PlainCFGBuilder {
 // Set predecessors of \p VPBB in the same order as they are in \p BB. \p VPBB
 // must have no predecessors.
 void PlainCFGBuilder::setVPBBPredsFromBB(VPBasicBlock *VPBB, BasicBlock *BB) {
-  auto GetLatchOfExit = [this](BasicBlock *BB) -> BasicBlock * {
-    auto *SinglePred = BB->getSinglePredecessor();
-    Loop *LoopForBB = LI->getLoopFor(BB);
-    if (!SinglePred || LI->getLoopFor(SinglePred) == LoopForBB)
-      return nullptr;
-    // The input IR must be in loop-simplify form, ensuring a single predecessor
-    // for exit blocks.
-    assert(SinglePred == LI->getLoopFor(SinglePred)->getLoopLatch() &&
-           "SinglePred must be the only loop latch");
-    return SinglePred;
-  };
-  if (auto *LatchBB = GetLatchOfExit(BB)) {
-    auto *PredRegion = getOrCreateVPBB(LatchBB)->getParent();
-    assert(VPBB == cast<VPBasicBlock>(PredRegion->getSingleSuccessor()) &&
-           "successor must already be set for PredRegion; it must have VPBB "
-           "as single successor");
-    VPBB->setPredecessors({PredRegion});
-    return;
-  }
   // Collect VPBB predecessors.
   SmallVector<VPBlockBase *, 2> VPBBPreds;
   for (BasicBlock *Pred : predecessors(BB))
@@ -113,13 +88,6 @@ static bool isHeaderBB(BasicBlock *BB, Loop *L) {
   return L && BB == L->getHeader();
 }
 
-void PlainCFGBuilder::setRegionPredsFromBB(VPRegionBlock *Region,
-                                           BasicBlock *BB) {
-  // BB is a loop header block. Connect the region to the loop preheader.
-  Loop *LoopOfBB = LI->getLoopFor(BB);
-  Region->setPredecessors({getOrCreateVPBB(LoopOfBB->getLoopPredecessor())});
-}
-
 // Add operands to VPInstructions representing phi nodes from the input IR.
 void PlainCFGBuilder::fixHeaderPhis() {
   for (auto *Phi : PhisToFix) {
@@ -150,19 +118,6 @@ static bool isHeaderVPBB(VPBasicBlock *VPBB) {
   return VPBB->getParent() && VPBB->getParent()->getEntry() == VPBB;
 }
 
-/// Return true of \p L loop is contained within \p OuterLoop.
-static bool doesContainLoop(const Loop *L, const Loop *OuterLoop) {
-  if (L->getLoopDepth() < OuterLoop->getLoopDepth())
-    return false;
-  const Loop *P = L;
-  while (P) {
-    if (P == OuterLoop)
-      return true;
-    P = P->getParentLoop();
-  }
-  return false;
-}
-
 // Create a new empty VPBasicBlock for an incoming BasicBlock in the region
 // corresponding to the containing loop  or retrieve an existing one if it was
 // already created. If no region exists yet for the loop containing \p BB, a new
@@ -178,28 +133,6 @@ VPBasicBlock *PlainCFGBuilder::getOrCreateVPBB(BasicBlock *BB) {
   LLVM_DEBUG(dbgs() << "Creating VPBasicBlock for " << Name << "\n");
   VPBasicBlock *VPBB = Plan.createVPBasicBlock(Name);
   BB2VPBB[BB] = VPBB;
-
-  // Get or create a region for the loop containing BB, except for the top
-  // region of TheLoop which is created later.
-  Loop *LoopOfBB = LI->getLoopFor(BB);
-  if (!LoopOfBB || LoopOfBB == TheLoop || !doesContainLoop(LoopOfBB, TheLoop))
-    return VPBB;
-
-  auto *RegionOfVPBB = Loop2Region.lookup(LoopOfBB);
-  if (!isHeaderBB(BB, LoopOfBB)) {
-    assert(RegionOfVPBB &&
-           "Region should have been created by visiting header earlier");
-    VPBB->setParent(RegionOfVPBB);
-    return VPBB;
-  }
-
-  assert(!RegionOfVPBB &&
-         "First visit of a header basic block expects to register its region.");
-  // Handle a header - take care of its Region.
-  RegionOfVPBB = Plan.createVPRegionBlock(Name.str(), false /*isReplicator*/);
-  RegionOfVPBB->setParent(Loop2Region[LoopOfBB->getParentLoop()]);
-  RegionOfVPBB->setEntry(VPBB);
-  Loop2Region[LoopOfBB] = RegionOfVPBB;
   return VPBB;
 }
 
@@ -376,15 +309,13 @@ void PlainCFGBuilder::buildPlainCFG(
   for (BasicBlock *BB : RPO) {
     // Create or retrieve the VPBasicBlock for this BB.
     VPBasicBlock *VPBB = getOrCreateVPBB(BB);
-    VPRegionBlock *Region = VPBB->getParent();
     Loop *LoopForBB = LI->getLoopFor(BB);
     // Set VPBB predecessors in the same order as they are in the incoming BB.
     if (!isHeaderBB(BB, LoopForBB)) {
       setVPBBPredsFromBB(VPBB, BB);
-    } else if (Region) {
-      // BB is a loop header and there's a corresponding region, set the
-      // predecessor for it.
-      setRegionPredsFromBB(Region, BB);
+    } else if (LoopForBB != TheLoop) {
+      VPBB->setPredecessors({getOrCreateVPBB(LoopForBB->getLoopPredecessor()),
+                             getOrCreateVPBB(LoopForBB->getLoopLatch())});
     }
 
     // Create VPInstructions for BB.
@@ -423,21 +354,11 @@ void PlainCFGBuilder::buildPlainCFG(
     BasicBlock *IRSucc1 = BI->getSuccessor(1);
     VPBasicBlock *Successor0 = getOrCreateVPBB(IRSucc0);
     VPBasicBlock *Successor1 = getOrCreateVPBB(IRSucc1);
-    if (BB == LoopForBB->getLoopLatch()) {
-      // For a latch we need to set the successor of the region rather than that
-      // of VPBB and it should be set to the exit, i.e., non-header successor,
-      // except for the top region, which is handled elsewhere.
-      assert(LoopForBB != TheLoop &&
-             "Latch of the top region should have been handled earlier");
-      Region->setOneSuccessor(isHeaderVPBB(Successor0) ? Successor1
-                                                       : Successor0);
-      Region->setExiting(VPBB);
-      continue;
-    }
 
-    // Don't connect any blocks outside the current loop except the latch for
-    // now. The latch is handled above.
-    if (LoopForBB) {
+    // Don't connect any blocks outside the current loop except the latch, which
+    // is handled below.
+    if (LoopForBB &&
+        (LoopForBB == TheLoop || BB != LoopForBB->getLoopLatch())) {
       if (!LoopForBB->contains(IRSucc0)) {
         VPBB->setOneSuccessor(Successor1);
         continue;
@@ -461,16 +382,11 @@ void PlainCFGBuilder::buildPlainCFG(
 
   for (const auto &[IRBB, VPB] : BB2VPBB)
     VPB2IRBB[VPB] = IRBB;
+
+  LLVM_DEBUG(Plan.setName("Plain CFG\n"); dbgs() << Plan);
 }
 
 void VPlanHCFGBuilder::buildPlainCFG() {
   PlainCFGBuilder PCFGBuilder(TheLoop, LI, Plan);
   PCFGBuilder.buildPlainCFG(VPB2IRBB);
 }
-
-// Public interface to build a H-CFG.
-void VPlanHCFGBuilder::buildHierarchicalCFG() {
-  // Build Top Region enclosing the plain CFG.
-  buildPlainCFG();
-  LLVM_DEBUG(Plan.setName("HCFGBuilder: Plain CFG\n"); dbgs() << Plan);
-}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.h b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.h
index f7f98ed7b1755..f2e90d3f4d9b3 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.h
@@ -30,7 +30,6 @@ namespace llvm {
 
 class Loop;
 class LoopInfo;
-class VPRegionBlock;
 class VPlan;
 class VPlanTestIRBase;
 class VPBlockBase;
@@ -54,15 +53,12 @@ class VPlanHCFGBuilder {
   /// created for a input IR basic block.
   DenseMap<VPBlockBase *, BasicBlock *> VPB2IRBB;
 
-  /// Build plain CFG for TheLoop and connects it to Plan's entry.
-  void buildPlainCFG();
-
 public:
   VPlanHCFGBuilder(Loop *Lp, LoopInfo *LI, VPlan &P)
       : TheLoop(Lp), LI(LI), Plan(P) {}
 
-  /// Build H-CFG for TheLoop and update Plan accordingly.
-  void buildHierarchicalCFG();
+  /// Build plain CFG for TheLoop and connects it to Plan's entry.
+  void buildPlainCFG();
 
   /// Return the input IR BasicBlock corresponding to \p VPB. Returns nullptr if
   /// there is no such corresponding block.
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll
index 625a32c098f94..b4b6d3d760349 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll
@@ -6,7 +6,7 @@
 @arr = external global [8 x [8 x i64]], align 16
 
 define void @foo(i64 %n) {
-; CHECK:      VPlan 'HCFGBuilder: Plain CFG
+; CHECK:      VPlan 'Plain CFG
 ; CHECK-NEXT: {
 ; CHECK-EMPTY:
 ; CHECK-NEXT: ir-bb<entry>:
@@ -19,17 +19,14 @@ define void @foo(i64 %n) {
 ; CHECK-NEXT:   EMIT ir<%add> = add ir<%outer.iv>, ir<%n>
 ; CHECK-NEXT: Successor(s): inner
 ; CHECK-EMPTY:
-; CHECK-NEXT: <x1> inner: {
-; CHECK-NEXT:   inner:
-; CHECK-NEXT:     WIDEN-PHI ir<%inner.iv> = phi ir<0>, ir<%inner.iv.next>
-; CHECK-NEXT:     EMIT ir<%gep.2> = getelementptr ir<@arr>, ir<0>, ir<%inner.iv>, ir<%outer.iv>
-; CHECK-NEXT:     EMIT store ir<%add>, ir<%gep.2>
-; CHECK-NEXT:     EMIT ir<%inner.iv.next> = add ir<%inner.iv>, ir<1>
-; CHECK-NEXT:     EMIT ir<%inner.ec> = icmp ir<%inner.iv.next>, ir<8>
-; CHECK-NEXT:     EMIT branch-on-cond ir<%inner.ec>
-; CHECK-NEXT:   No successors
-; CHECK-NEXT: }
-; CHECK-NEXT: Successor(s): outer.latch
+; CHECK-NEXT: inner:
+; CHECK-NEXT:   WIDEN-PHI ir<%inner.iv> = phi ir<0>, ir<%inner.iv.next>
+; CHECK-NEXT:   EMIT ir<%gep.2> = getelementptr ir<@arr>, ir<0>, ir<%inner.iv>, ir<%outer.iv>
+; CHECK-NEXT:   EMIT store ir<%add>, ir<%gep.2>
+; CHECK-NEXT:   EMIT ir<%inner.iv.next> = add ir<%inner.iv>, ir<1>
+; CHECK-NEXT:   EMIT ir<%inner.ec> = icmp ir<%inner.iv.next>, ir<8>
+; CHECK-NEXT:   EMIT branch-on-cond ir<%inner.ec>
+; CHECK-NEXT: Successor(s): outer.latch, inner
 ; CHECK-EMPTY:
 ; CHECK-NEXT: outer.latch:
 ; CHECK-NEXT:   EMIT ir<%outer.iv.next> = add ir<%outer.iv>, ir<1>
diff --git a/llvm/test/Transforms/LoopVectorize/vplan_hcfg_stress_test.ll b/llvm/test/Transforms/LoopVectorize/vplan_hcfg_stress_test.ll
index 89eaca0cfa8c8..29aeb7c4e97f9 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan_hcfg_stress_test.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan_hcfg_stress_test.ll
@@ -4,7 +4,7 @@
 ; Verify that the stress testing flag for the VPlan H-CFG builder works as
 ; expected with and without enabling the VPlan H-CFG Verifier.
 
-; CHECK: VPlan 'HCFGBuilder: Plain CFG
+; CHECK: VPlan 'Plain CFG
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 
diff --git a/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h b/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h
index caf5d2357411d..92961e44c5e54 100644
--- a/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h
+++ b/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h
@@ -73,7 +73,7 @@ class VPlanTestIRBase : public testing::Test {
     PredicatedScalarEvolution PSE(*SE, *L);
     auto Plan = std::make_unique<VPlan>(L);
     VPlanHCFGBuilder HCFGBuilder(L, LI.get(), *Plan);
-    HCFGBuilder.buildHierarchicalCFG();
+    HCFGBuilder.buildPlainCFG();
     VPlanTransforms::introduceTopLevelVectorLoopRegion(
         *Plan, IntegerType::get(*Ctx, 64), PSE, true, false, L);
     return Plan;



More information about the llvm-commits mailing list