[llvm] [VPlan] Retain exit conditions and edges in initial VPlan (NFC). (PR #137709)

Florian Hahn via llvm-commits llvm-commits at lists.llvm.org
Tue May 6 12:52:29 PDT 2025


https://github.com/fhahn updated https://github.com/llvm/llvm-project/pull/137709

>From de0fc2dce15b8cb4f22ea99374b3715c694d6fa1 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Mon, 28 Apr 2025 12:12:55 +0100
Subject: [PATCH 1/3] [VPlan] Retain exit conditions and edges in initial VPlan
 (NFC).

Update initial VPlan construction to include exit conditions and
edges.

For now, all early exits are disconnected before forming the regions,
but a follow-up will update uncountable exit handling to also happen
here. This is required to enable VPlan predication and remove the
dependence any IR BBs (https://github.com/llvm/llvm-project/pull/128420).

This includes updates in a few places to use
replaceSuccessor/replacePredecessor to preserve the order of predecessors
and successors, to reduce the need of fixing up phi operand orderings.
This unfortunately required making them public, not sure if there's a
---
 llvm/lib/Transforms/Vectorize/VPlan.h         |  3 +-
 .../Vectorize/VPlanConstruction.cpp           | 78 +++++++++----------
 .../vplan-printing-outer-loop.ll              |  6 +-
 3 files changed, 44 insertions(+), 43 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index c77c944c01a36..53332dce7181d 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -117,6 +117,7 @@ class VPBlockBase {
     Predecessors.erase(Pos);
   }
 
+public:
   /// Remove \p Successor from the successors of this block.
   void removeSuccessor(VPBlockBase *Successor) {
     auto Pos = find(Successors, Successor);
@@ -129,8 +130,6 @@ class VPBlockBase {
   void replacePredecessor(VPBlockBase *Old, VPBlockBase *New) {
     auto I = find(Predecessors, Old);
     assert(I != Predecessors.end());
-    assert(Old->getParent() == New->getParent() &&
-           "replaced predecessor must have the same parent");
     *I = New;
   }
 
diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
index c7132e84f689c..49edc43fd6179 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
@@ -113,6 +113,9 @@ VPBasicBlock *PlainCFGBuilder::getOrCreateVPBB(BasicBlock *BB) {
     return VPBB;
   }
 
+  if (!TheLoop->contains(BB))
+    return Plan->getExitBlock(BB);
+
   // Create new VPBB.
   StringRef Name = BB->getName();
   LLVM_DEBUG(dbgs() << "Creating VPBasicBlock for " << Name << "\n");
@@ -146,14 +149,6 @@ bool PlainCFGBuilder::isExternalDef(Value *Val) {
     // Instruction definition is in outermost loop PH.
     return false;
 
-  // Check whether Instruction definition is in a loop exit.
-  SmallVector<BasicBlock *> ExitBlocks;
-  TheLoop->getExitBlocks(ExitBlocks);
-  if (is_contained(ExitBlocks, InstParent)) {
-    // Instruction definition is in outermost loop exit.
-    return false;
-  }
-
   // Check whether Instruction definition is in loop body.
   return !TheLoop->contains(Inst);
 }
@@ -202,11 +197,6 @@ void PlainCFGBuilder::createVPInstructionsForVPBB(VPBasicBlock *VPBB,
            "Instruction shouldn't have been visited.");
 
     if (auto *Br = dyn_cast<BranchInst>(Inst)) {
-      if (TheLoop->getLoopLatch() == BB ||
-          any_of(successors(BB),
-                 [this](BasicBlock *Succ) { return !TheLoop->contains(Succ); }))
-        continue;
-
       // Conditional branch instruction are represented using BranchOnCond
       // recipes.
       if (Br->isConditional()) {
@@ -296,7 +286,6 @@ std::unique_ptr<VPlan> PlainCFGBuilder::buildPlainCFG(
   for (BasicBlock *BB : RPO) {
     // Create or retrieve the VPBasicBlock for this BB.
     VPBasicBlock *VPBB = getOrCreateVPBB(BB);
-    Loop *LoopForBB = LI->getLoopFor(BB);
     // Set VPBB predecessors in the same order as they are in the incoming BB.
     setVPBBPredsFromBB(VPBB, BB);
 
@@ -327,24 +316,12 @@ std::unique_ptr<VPlan> PlainCFGBuilder::buildPlainCFG(
     BasicBlock *IRSucc1 = BI->getSuccessor(1);
     VPBasicBlock *Successor0 = getOrCreateVPBB(IRSucc0);
     VPBasicBlock *Successor1 = getOrCreateVPBB(IRSucc1);
-
-    // Don't connect any blocks outside the current loop except the latches for
-    // inner loops.
-    // TODO: Also connect exit blocks during initial VPlan construction.
-    if (LoopForBB == TheLoop || BB != LoopForBB->getLoopLatch()) {
-      if (!LoopForBB->contains(IRSucc0)) {
-        VPBB->setOneSuccessor(Successor1);
-        continue;
-      }
-      if (!LoopForBB->contains(IRSucc1)) {
-        VPBB->setOneSuccessor(Successor0);
-        continue;
-      }
-    }
-
     VPBB->setTwoSuccessors(Successor0, Successor1);
   }
 
+  for (auto *EB : Plan->getExitBlocks())
+    setVPBBPredsFromBB(EB, EB->getIRBasicBlock());
+
   // 2. The whole CFG has been built at this point so all the input Values must
   // have a VPlan counterpart. Fix VPlan header phi by adding their
   // corresponding VPlan operands.
@@ -447,19 +424,21 @@ static void createLoopRegion(VPlan &Plan, VPBlockBase *HeaderVPB) {
   VPBlockBase *Succ = LatchVPBB->getSingleSuccessor();
   assert(LatchVPBB->getNumSuccessors() <= 1 &&
          "Latch has more than one successor");
-  if (Succ)
-    VPBlockUtils::disconnectBlocks(LatchVPBB, Succ);
+  LatchVPBB->removeSuccessor(Succ);
 
   auto *R = Plan.createVPRegionBlock(HeaderVPB, LatchVPBB, "",
                                      false /*isReplicator*/);
   // All VPBB's reachable shallowly from HeaderVPB belong to top level loop,
   // because VPlan is expected to end at top level latch disconnected above.
+  SmallPtrSet<VPBlockBase *, 2> ExitBlocks(Plan.getExitBlocks().begin(),
+                                           Plan.getExitBlocks().end());
   for (VPBlockBase *VPBB : vp_depth_first_shallow(HeaderVPB))
-    VPBB->setParent(R);
+    if (!ExitBlocks.contains(VPBB))
+      VPBB->setParent(R);
 
   VPBlockUtils::insertBlockAfter(R, PreheaderVPBB);
-  if (Succ)
-    VPBlockUtils::connectBlocks(R, Succ);
+  R->setOneSuccessor(Succ);
+  Succ->replacePredecessor(LatchVPBB, R);
 }
 
 // Add the necessary canonical IV and branch recipes required to control the
@@ -511,12 +490,33 @@ void VPlanTransforms::prepareForVectorization(VPlan &Plan, Type *InductionTy,
   VPBlockUtils::insertBlockAfter(VecPreheader, Plan.getEntry());
 
   VPBasicBlock *MiddleVPBB = Plan.createVPBasicBlock("middle.block");
-  VPBlockUtils::connectBlocks(LatchVPB, MiddleVPBB);
-  LatchVPB->swapSuccessors();
+  VPBlockBase *LatchExitVPB = LatchVPB->getNumSuccessors() == 2
+                                  ? LatchVPB->getSuccessors()[0]
+                                  : nullptr;
+  if (LatchExitVPB) {
+    LatchVPB->getSuccessors()[0] = MiddleVPBB;
+    MiddleVPBB->setPredecessors({LatchVPB});
+    MiddleVPBB->setSuccessors({LatchExitVPB});
+    LatchExitVPB->replacePredecessor(LatchVPB, MiddleVPBB);
+  } else {
+    VPBlockUtils::connectBlocks(LatchVPB, MiddleVPBB);
+    LatchVPB->swapSuccessors();
+  }
 
   addCanonicalIVRecipes(Plan, cast<VPBasicBlock>(HeaderVPB),
                         cast<VPBasicBlock>(LatchVPB), InductionTy, IVDL);
 
+  // Disconnect all edges between exit blocks other than from the latch.
+  // TODO: Uncountable exit blocks should be handled here.
+  for (VPBlockBase *EB : to_vector(Plan.getExitBlocks())) {
+    for (VPBlockBase *Pred : to_vector(EB->getPredecessors())) {
+      if (Pred == MiddleVPBB)
+        continue;
+      cast<VPBasicBlock>(Pred)->getTerminator()->eraseFromParent();
+      VPBlockUtils::disconnectBlocks(Pred, EB);
+    }
+  }
+
   // Create SCEV and VPValue for the trip count.
   // We use the symbolic max backedge-taken-count, which works also when
   // vectorizing loops with uncountable early exits.
@@ -541,8 +541,9 @@ void VPlanTransforms::prepareForVectorization(VPlan &Plan, Type *InductionTy,
   //    Thus if tail is to be folded, we know we don't need to run the
   //    remainder and we can set the condition to true.
   // 3) Otherwise, construct a runtime check.
-
   if (!RequiresScalarEpilogueCheck) {
+    if (LatchExitVPB)
+      VPBlockUtils::disconnectBlocks(MiddleVPBB, LatchExitVPB);
     VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH);
     // The exit blocks are unreachable, remove their recipes to make sure no
     // users remain that may pessimize transforms.
@@ -554,9 +555,6 @@ void VPlanTransforms::prepareForVectorization(VPlan &Plan, Type *InductionTy,
   }
 
   // The connection order corresponds to the operands of the conditional branch.
-  BasicBlock *IRExitBlock = TheLoop->getUniqueLatchExitBlock();
-  auto *VPExitBlock = Plan.getExitBlock(IRExitBlock);
-  VPBlockUtils::connectBlocks(MiddleVPBB, VPExitBlock);
   VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH);
 
   auto *ScalarLatchTerm = TheLoop->getLoopLatch()->getTerminator();
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll
index 91a5ea6b7fe36..fe845ae74cbee 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll
@@ -31,7 +31,11 @@ define void @foo(i64 %n) {
 ; CHECK-NEXT: outer.latch:
 ; CHECK-NEXT:   EMIT ir<%outer.iv.next> = add ir<%outer.iv>, ir<1>
 ; CHECK-NEXT:   EMIT ir<%outer.ec> = icmp ir<%outer.iv.next>, ir<8>
-; CHECK-NEXT: Successor(s): outer.header
+; CHECK-NEXT:   EMIT branch-on-cond ir<%outer.ec>
+; CHECK-NEXT: Successor(s): ir-bb<exit>, outer.header
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<exit>:
+; CHECK-NEXT: No successors
 ; CHECK-NEXT: }
 entry:
   br label %outer.header

>From 55b4d5e5e2bc0ae80f80eac4531c3501b5eee8f5 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Tue, 6 May 2025 19:15:47 +0100
Subject: [PATCH 2/3] !fixup address first set of comments, thanks

---
 .../Vectorize/VPlanConstruction.cpp           | 30 +++++++++++--------
 1 file changed, 17 insertions(+), 13 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
index 11f5ee0d2175d..8657f78acd22b 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
@@ -113,9 +113,6 @@ VPBasicBlock *PlainCFGBuilder::getOrCreateVPBB(BasicBlock *BB) {
     return VPBB;
   }
 
-  if (!TheLoop->contains(BB))
-    return Plan->getExitBlock(BB);
-
   // Create new VPBB.
   StringRef Name = BB->getName();
   LLVM_DEBUG(dbgs() << "Creating VPBasicBlock for " << Name << "\n");
@@ -249,6 +246,8 @@ std::unique_ptr<VPlan> PlainCFGBuilder::buildPlainCFG(
     DenseMap<VPBlockBase *, BasicBlock *> &VPB2IRBB) {
   VPIRBasicBlock *Entry = cast<VPIRBasicBlock>(Plan->getEntry());
   BB2VPBB[Entry->getIRBasicBlock()] = Entry;
+  for (VPIRBasicBlock *ExitVPBB : Plan->getExitBlocks())
+    BB2VPBB[ExitVPBB->getIRBasicBlock()] = ExitVPBB;
 
   // 1. Scan the body of the loop in a topological order to visit each basic
   // block after having visited its predecessor basic blocks. Create a VPBB for
@@ -410,9 +409,12 @@ static void createLoopRegion(VPlan &Plan, VPBlockBase *HeaderVPB) {
   VPBlockUtils::disconnectBlocks(PreheaderVPBB, HeaderVPB);
   VPBlockUtils::disconnectBlocks(LatchVPBB, HeaderVPB);
   VPBlockBase *Succ = LatchVPBB->getSingleSuccessor();
-  assert(LatchVPBB->getNumSuccessors() <= 1 &&
-         "Latch has more than one successor");
-  LatchVPBB->removeSuccessor(Succ);
+  assert(Succ && "Latch expected to be left with a single successor");
+
+  auto *PlaceHolder = Plan.createVPBasicBlock("Region place holder");
+  VPBlockUtils::insertOnEdge(LatchVPBB, Succ, PlaceHolder);
+  VPBlockUtils::disconnectBlocks(LatchVPBB, PlaceHolder);
+  VPBlockUtils::connectBlocks(PreheaderVPBB, PlaceHolder);
 
   auto *R = Plan.createVPRegionBlock(HeaderVPB, LatchVPBB, "",
                                      false /*isReplicator*/);
@@ -425,8 +427,9 @@ static void createLoopRegion(VPlan &Plan, VPBlockBase *HeaderVPB) {
       VPBB->setParent(R);
 
   VPBlockUtils::insertBlockAfter(R, PreheaderVPBB);
-  R->setOneSuccessor(Succ);
-  Succ->replacePredecessor(LatchVPBB, R);
+  VPBlockUtils::insertOnEdge(PlaceHolder, Succ, R);
+  VPBlockUtils::disconnectBlocks(R, PlaceHolder);
+  VPBlockUtils::disconnectBlocks(PlaceHolder, R);
 }
 
 // Add the necessary canonical IV and branch recipes required to control the
@@ -481,11 +484,12 @@ void VPlanTransforms::prepareForVectorization(VPlan &Plan, Type *InductionTy,
   VPBlockBase *LatchExitVPB = LatchVPB->getNumSuccessors() == 2
                                   ? LatchVPB->getSuccessors()[0]
                                   : nullptr;
-  if (LatchExitVPB) {
-    LatchVPB->getSuccessors()[0] = MiddleVPBB;
-    MiddleVPBB->setPredecessors({LatchVPB});
-    MiddleVPBB->setSuccessors({LatchExitVPB});
-    LatchExitVPB->replacePredecessor(LatchVPB, MiddleVPBB);
+  // Canonical LatchVPB has header block as last successor. If it has another
+  // successor, the latter is an exit block - insert middle block on its edge.
+  // Otherwise, add middle block as another successor retaining header as last.
+  if (LatchVPB->getNumSuccessors() == 2) {
+    VPBlockBase *LatchExitVPB = LatchVPB->getSuccessors()[0];
+    VPBlockUtils::insertOnEdge(LatchVPB, LatchExitVPB, MiddleVPBB);
   } else {
     VPBlockUtils::connectBlocks(LatchVPB, MiddleVPBB);
     LatchVPB->swapSuccessors();

>From ca45f8822607db7ca22453cd67aed2c49f1bdf5c Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Tue, 6 May 2025 20:31:14 +0100
Subject: [PATCH 3/3] !fixup address remaining comments, thanks

---
 llvm/lib/Transforms/Vectorize/VPlan.h         |  1 -
 .../Vectorize/VPlanConstruction.cpp           | 23 +++++++++++--------
 2 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 53332dce7181d..2febfe3cc7887 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -117,7 +117,6 @@ class VPBlockBase {
     Predecessors.erase(Pos);
   }
 
-public:
   /// Remove \p Successor from the successors of this block.
   void removeSuccessor(VPBlockBase *Successor) {
     auto Pos = find(Successors, Successor);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
index 8657f78acd22b..fd58f802bbe9d 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
@@ -411,6 +411,8 @@ static void createLoopRegion(VPlan &Plan, VPBlockBase *HeaderVPB) {
   VPBlockBase *Succ = LatchVPBB->getSingleSuccessor();
   assert(Succ && "Latch expected to be left with a single successor");
 
+  // Use a temporary placeholder between LatchVPBB and its successor, to
+  // preserve the original predecessor/successor order of the blocks.
   auto *PlaceHolder = Plan.createVPBasicBlock("Region place holder");
   VPBlockUtils::insertOnEdge(LatchVPBB, Succ, PlaceHolder);
   VPBlockUtils::disconnectBlocks(LatchVPBB, PlaceHolder);
@@ -418,8 +420,8 @@ static void createLoopRegion(VPlan &Plan, VPBlockBase *HeaderVPB) {
 
   auto *R = Plan.createVPRegionBlock(HeaderVPB, LatchVPBB, "",
                                      false /*isReplicator*/);
-  // All VPBB's reachable shallowly from HeaderVPB belong to top level loop,
-  // because VPlan is expected to end at top level latch disconnected above.
+  // All VPBB's reachable shallowly from HeaderVPB belong to the current region,
+  // except the exit blocks reachable via non-latch exiting blocks,
   SmallPtrSet<VPBlockBase *, 2> ExitBlocks(Plan.getExitBlocks().begin(),
                                            Plan.getExitBlocks().end());
   for (VPBlockBase *VPBB : vp_depth_first_shallow(HeaderVPB))
@@ -428,6 +430,8 @@ static void createLoopRegion(VPlan &Plan, VPBlockBase *HeaderVPB) {
 
   VPBlockUtils::insertBlockAfter(R, PreheaderVPBB);
   VPBlockUtils::insertOnEdge(PlaceHolder, Succ, R);
+
+  // Remove placeholder block.
   VPBlockUtils::disconnectBlocks(R, PlaceHolder);
   VPBlockUtils::disconnectBlocks(PlaceHolder, R);
 }
@@ -481,9 +485,6 @@ void VPlanTransforms::prepareForVectorization(VPlan &Plan, Type *InductionTy,
   VPBlockUtils::insertBlockAfter(VecPreheader, Plan.getEntry());
 
   VPBasicBlock *MiddleVPBB = Plan.createVPBasicBlock("middle.block");
-  VPBlockBase *LatchExitVPB = LatchVPB->getNumSuccessors() == 2
-                                  ? LatchVPB->getSuccessors()[0]
-                                  : nullptr;
   // Canonical LatchVPB has header block as last successor. If it has another
   // successor, the latter is an exit block - insert middle block on its edge.
   // Otherwise, add middle block as another successor retaining header as last.
@@ -498,8 +499,10 @@ void VPlanTransforms::prepareForVectorization(VPlan &Plan, Type *InductionTy,
   addCanonicalIVRecipes(Plan, cast<VPBasicBlock>(HeaderVPB),
                         cast<VPBasicBlock>(LatchVPB), InductionTy, IVDL);
 
-  // Disconnect all edges between exit blocks other than from the latch.
-  // TODO: Uncountable exit blocks should be handled here.
+  // Disconnect all edges to exit blocks other than from the middle block.
+  // TODO: VPlans with early exits should be explicitly converted to a form only
+  // exiting via the latch here, including adjusting the exit condition, instead
+  // of simplify disconnecting the edges and adjusting the VPlan later.
   for (VPBlockBase *EB : to_vector(Plan.getExitBlocks())) {
     for (VPBlockBase *Pred : to_vector(EB->getPredecessors())) {
       if (Pred == MiddleVPBB)
@@ -533,8 +536,9 @@ void VPlanTransforms::prepareForVectorization(VPlan &Plan, Type *InductionTy,
   //    Thus if tail is to be folded, we know we don't need to run the
   //    remainder and we can set the condition to true.
   // 3) Otherwise, construct a runtime check.
+
   if (!RequiresScalarEpilogueCheck) {
-    if (LatchExitVPB)
+    if (auto *LatchExitVPB = MiddleVPBB->getSingleSuccessor())
       VPBlockUtils::disconnectBlocks(MiddleVPBB, LatchExitVPB);
     VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH);
     // The exit blocks are unreachable, remove their recipes to make sure no
@@ -569,7 +573,8 @@ void VPlanTransforms::prepareForVectorization(VPlan &Plan, Type *InductionTy,
 void VPlanTransforms::createLoopRegions(VPlan &Plan) {
   VPDominatorTree VPDT;
   VPDT.recalculate(Plan);
-  for (VPBlockBase *HeaderVPB : vp_depth_first_shallow(Plan.getEntry()))
+  for (VPBlockBase *HeaderVPB : post_order(
+           VPBlockShallowTraversalWrapper<VPBlockBase *>(Plan.getEntry())))
     if (canonicalHeaderAndLatch(HeaderVPB, VPDT))
       createLoopRegion(Plan, HeaderVPB);
 



More information about the llvm-commits mailing list