[llvm] [VPlan] Handle early exit before forming regions. (NFC) (PR #138393)

Florian Hahn via llvm-commits llvm-commits at lists.llvm.org
Fri May 9 11:49:59 PDT 2025


https://github.com/fhahn updated https://github.com/llvm/llvm-project/pull/138393

>From d0d2c2ed878bff39d8c222a0939aa6c7ad961837 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Mon, 28 Apr 2025 19:35:23 +0100
Subject: [PATCH 1/4] [VPlan] Handle early exit before forming regions. (NFC)

Move early-exit handling up front to original VPlan construction, before
introducing early exits.

This builds on https://github.com/llvm/llvm-project/pull/137709, which
adds exiting edges to the original VPlan, instead of adding exit blocks
later.

This retains the exit conditions early, and means we can handle early
exits before forming regions, without the reliance on VPRecipeBuilder.

Once we retain all exits initially, handling early exits before region
construction ensures the regions are valid; otherwise we would leave
edges exiting the region from elsewhere than the latch.

Removing the reliance on VPRecipeBuilder removes the dependence on
mapping IR BBs to VPBBs and unblocks predication as VPlan transform:
https://github.com/llvm/llvm-project/pull/128420.

Depends on https://github.com/llvm/llvm-project/pull/137709.
---
 .../Transforms/Vectorize/LoopVectorize.cpp    |  12 +-
 .../Vectorize/VPlanConstruction.cpp           |  33 +++---
 .../Transforms/Vectorize/VPlanTransforms.cpp  | 108 +++++++++++-------
 .../Transforms/Vectorize/VPlanTransforms.h    |   8 +-
 .../Transforms/Vectorize/VPlanTestBase.h      |   4 +-
 5 files changed, 94 insertions(+), 71 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 9208fc45a0188..6c5d543ad9fa7 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -9384,7 +9384,8 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
   VPlanTransforms::prepareForVectorization(
       *Plan, Legal->getWidestInductionType(), PSE, RequiresScalarEpilogueCheck,
       CM.foldTailByMasking(), OrigLoop,
-      getDebugLocFromInstOrOperands(Legal->getPrimaryInduction()));
+      getDebugLocFromInstOrOperands(Legal->getPrimaryInduction()),
+      Legal->hasUncountableEarlyExit(), Range);
   VPlanTransforms::createLoopRegions(*Plan);
 
   // Don't use getDecisionAndClampRange here, because we don't know the UF
@@ -9582,12 +9583,6 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
     R->setOperand(1, WideIV->getStepValue());
   }
 
-  if (auto *UncountableExitingBlock =
-          Legal->getUncountableEarlyExitingBlock()) {
-    VPlanTransforms::runPass(VPlanTransforms::handleUncountableEarlyExit, *Plan,
-                             OrigLoop, UncountableExitingBlock, RecipeBuilder,
-                             Range);
-  }
   DenseMap<VPValue *, VPValue *> IVEndValues;
   addScalarResumePhis(RecipeBuilder, *Plan, IVEndValues);
   SetVector<VPIRInstruction *> ExitUsersToFix =
@@ -9685,7 +9680,8 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlan(VFRange &Range) {
   auto Plan = VPlanTransforms::buildPlainCFG(OrigLoop, *LI, VPB2IRBB);
   VPlanTransforms::prepareForVectorization(
       *Plan, Legal->getWidestInductionType(), PSE, true, false, OrigLoop,
-      getDebugLocFromInstOrOperands(Legal->getPrimaryInduction()));
+      getDebugLocFromInstOrOperands(Legal->getPrimaryInduction()), false,
+      Range);
   VPlanTransforms::createLoopRegions(*Plan);
 
   for (ElementCount VF : Range)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
index a0edd296caab8..73420b406b8e3 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
@@ -460,11 +460,10 @@ static void addCanonicalIVRecipes(VPlan &Plan, VPBasicBlock *HeaderVPBB,
                        {CanonicalIVIncrement, &Plan.getVectorTripCount()}, DL);
 }
 
-void VPlanTransforms::prepareForVectorization(VPlan &Plan, Type *InductionTy,
-                                              PredicatedScalarEvolution &PSE,
-                                              bool RequiresScalarEpilogueCheck,
-                                              bool TailFolded, Loop *TheLoop,
-                                              DebugLoc IVDL) {
+void VPlanTransforms::prepareForVectorization(
+    VPlan &Plan, Type *InductionTy, PredicatedScalarEvolution &PSE,
+    bool RequiresScalarEpilogueCheck, bool TailFolded, Loop *TheLoop,
+    DebugLoc IVDL, bool HandleUncountableExit, VFRange &Range) {
   VPDominatorTree VPDT;
   VPDT.recalculate(Plan);
 
@@ -491,16 +490,20 @@ void VPlanTransforms::prepareForVectorization(VPlan &Plan, Type *InductionTy,
   addCanonicalIVRecipes(Plan, cast<VPBasicBlock>(HeaderVPB),
                         cast<VPBasicBlock>(LatchVPB), InductionTy, IVDL);
 
-  // Disconnect all edges to exit blocks other than from the middle block.
-  // TODO: VPlans with early exits should be explicitly converted to a form
-  // exiting only via the latch here, including adjusting the exit condition,
-  // instead of simply disconnecting the edges and adjusting the VPlan later.
-  for (VPBlockBase *EB : Plan.getExitBlocks()) {
-    for (VPBlockBase *Pred : to_vector(EB->getPredecessors())) {
-      if (Pred == MiddleVPBB)
-        continue;
-      cast<VPBasicBlock>(Pred)->getTerminator()->eraseFromParent();
-      VPBlockUtils::disconnectBlocks(Pred, EB);
+  if (HandleUncountableExit) {
+    // Convert VPlans with early exits to a form only exiting via the latch
+    // here, including adjusting the exit condition.
+    handleUncountableEarlyExit(Plan, cast<VPBasicBlock>(HeaderVPB),
+                               cast<VPBasicBlock>(LatchVPB), Range);
+  } else {
+    // Disconnect all edges to exit blocks other than from the middle block.
+    for (VPBlockBase *EB : to_vector(Plan.getExitBlocks())) {
+      for (VPBlockBase *Pred : to_vector(EB->getPredecessors())) {
+        if (Pred == MiddleVPBB)
+          continue;
+        cast<VPBasicBlock>(Pred)->getTerminator()->eraseFromParent();
+        VPBlockUtils::disconnectBlocks(Pred, EB);
+      }
     }
   }
 
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 79ddb8bf0b09b..3a6c5bc02cdf1 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -2458,64 +2458,86 @@ void VPlanTransforms::convertToConcreteRecipes(VPlan &Plan,
     R->eraseFromParent();
 }
 
-void VPlanTransforms::handleUncountableEarlyExit(
-    VPlan &Plan, Loop *OrigLoop, BasicBlock *UncountableExitingBlock,
-    VPRecipeBuilder &RecipeBuilder, VFRange &Range) {
-  VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
-  auto *LatchVPBB = cast<VPBasicBlock>(LoopRegion->getExiting());
+void VPlanTransforms::handleUncountableEarlyExit(VPlan &Plan,
+                                                 VPBasicBlock *HeaderVPBB,
+                                                 VPBasicBlock *LatchVPBB,
+                                                 VFRange &Range) {
+  // First find the uncountable early exiting block by looking at the
+  // predecessors of the exit blocks.
+  VPBlockBase *MiddleVPBB = LatchVPBB->getSuccessors()[0];
+  VPBasicBlock *EarlyExitingVPBB = nullptr;
+  VPIRBasicBlock *EarlyExitVPBB = nullptr;
+  for (auto *EB : Plan.getExitBlocks()) {
+    for (VPBlockBase *Pred : EB->getPredecessors()) {
+      if (Pred != MiddleVPBB) {
+        EarlyExitingVPBB = cast<VPBasicBlock>(Pred);
+        EarlyExitVPBB = EB;
+        break;
+      }
+    }
+  }
+  assert(EarlyExitVPBB && "Must have a early exiting block.");
+  assert(all_of(Plan.getExitBlocks(),
+                [EarlyExitingVPBB, MiddleVPBB](VPIRBasicBlock *EB) {
+                  return all_of(
+                      EB->getPredecessors(),
+                      [EarlyExitingVPBB, MiddleVPBB](VPBlockBase *Pred) {
+                        return Pred == EarlyExitingVPBB || Pred == MiddleVPBB;
+                      });
+                }) &&
+         "All exit blocks must only have EarlyExitingVPBB or MiddleVPBB as "
+         "predecessors.");
+
   VPBuilder Builder(LatchVPBB->getTerminator());
-  auto *MiddleVPBB = Plan.getMiddleBlock();
-  VPValue *IsEarlyExitTaken = nullptr;
-
-  // Process the uncountable exiting block. Update IsEarlyExitTaken, which
-  // tracks if the uncountable early exit has been taken. Also split the middle
-  // block and have it conditionally branch to the early exit block if
-  // EarlyExitTaken.
-  auto *EarlyExitingBranch =
-      cast<BranchInst>(UncountableExitingBlock->getTerminator());
-  BasicBlock *TrueSucc = EarlyExitingBranch->getSuccessor(0);
-  BasicBlock *FalseSucc = EarlyExitingBranch->getSuccessor(1);
-  BasicBlock *EarlyExitIRBB =
-      !OrigLoop->contains(TrueSucc) ? TrueSucc : FalseSucc;
-  VPIRBasicBlock *VPEarlyExitBlock = Plan.getExitBlock(EarlyExitIRBB);
-
-  VPValue *EarlyExitNotTakenCond = RecipeBuilder.getBlockInMask(
-      OrigLoop->contains(TrueSucc) ? TrueSucc : FalseSucc);
-  auto *EarlyExitTakenCond = Builder.createNot(EarlyExitNotTakenCond);
-  IsEarlyExitTaken =
-      Builder.createNaryOp(VPInstruction::AnyOf, {EarlyExitTakenCond});
+  VPBlockBase *TrueSucc = EarlyExitingVPBB->getSuccessors()[0];
+  VPValue *EarlyExitCond = EarlyExitingVPBB->getTerminator()->getOperand(0);
+  auto *EarlyExitTakenCond = TrueSucc == EarlyExitVPBB
+                                 ? EarlyExitCond
+                                 : Builder.createNot(EarlyExitCond);
+
+  if (!EarlyExitVPBB->getSinglePredecessor() &&
+      EarlyExitVPBB->getPredecessors()[0] != MiddleVPBB) {
+    for (VPRecipeBase &R : EarlyExitVPBB->phis()) {
+      // Early exit operand should always be last, i.e., 0 if EarlyExitVPBB has
+      // a single predecessor and 1 if it has two.
+      // If EarlyExitVPBB has two predecessors, they are already ordered such
+      // that early exit is second (and latch exit is first), by construction.
+      // But its underlying IRBB (EarlyExitIRBB) may have its predecessors
+      // ordered the other way around, and it is the order of the latter which
+      // corresponds to the order of operands of EarlyExitVPBB's phi recipes.
+      // Therefore, if early exit (UncountableExitingBlock) is the first
+      // predecessor of EarlyExitIRBB, we swap the operands of phi recipes,
+      // thereby bringing them to match EarlyExitVPBB's predecessor order,
+      // with early exit being last (second). Otherwise they already match.
+      cast<VPIRPhi>(&R)->swapOperands();
+    }
+  }
 
+  EarlyExitingVPBB->getTerminator()->eraseFromParent();
+  VPBlockUtils::disconnectBlocks(EarlyExitingVPBB, EarlyExitVPBB);
+
+  // Split the middle block and have it conditionally branch to the early exit
+  // block if EarlyExitTaken.
+  VPValue *IsEarlyExitTaken =
+      Builder.createNaryOp(VPInstruction::AnyOf, {EarlyExitTakenCond});
   VPBasicBlock *NewMiddle = Plan.createVPBasicBlock("middle.split");
   VPBasicBlock *VectorEarlyExitVPBB =
       Plan.createVPBasicBlock("vector.early.exit");
-  VPBlockUtils::insertOnEdge(LoopRegion, MiddleVPBB, NewMiddle);
+  VPBlockUtils::insertOnEdge(LatchVPBB, MiddleVPBB, NewMiddle);
   VPBlockUtils::connectBlocks(NewMiddle, VectorEarlyExitVPBB);
   NewMiddle->swapSuccessors();
 
-  VPBlockUtils::connectBlocks(VectorEarlyExitVPBB, VPEarlyExitBlock);
+  VPBlockUtils::connectBlocks(VectorEarlyExitVPBB, EarlyExitVPBB);
 
   // Update the exit phis in the early exit block.
   VPBuilder MiddleBuilder(NewMiddle);
   VPBuilder EarlyExitB(VectorEarlyExitVPBB);
-  for (VPRecipeBase &R : VPEarlyExitBlock->phis()) {
+  for (VPRecipeBase &R : EarlyExitVPBB->phis()) {
     auto *ExitIRI = cast<VPIRPhi>(&R);
-    // Early exit operand should always be last, i.e., 0 if VPEarlyExitBlock has
+    // Early exit operand should always be last, i.e., 0 if EarlyExitVPBB has
     // a single predecessor and 1 if it has two.
     unsigned EarlyExitIdx = ExitIRI->getNumOperands() - 1;
-    if (!VPEarlyExitBlock->getSinglePredecessor()) {
-      // If VPEarlyExitBlock has two predecessors, they are already ordered such
-      // that early exit is second (and latch exit is first), by construction.
-      // But its underlying IRBB (EarlyExitIRBB) may have its predecessors
-      // ordered the other way around, and it is the order of the latter which
-      // corresponds to the order of operands of VPEarlyExitBlock's phi recipes.
-      // Therefore, if early exit (UncountableExitingBlock) is the first
-      // predecessor of EarlyExitIRBB, we swap the operands of phi recipes,
-      // thereby bringing them to match VPEarlyExitBlock's predecessor order,
-      // with early exit being last (second). Otherwise they already match.
-      if (*pred_begin(VPEarlyExitBlock->getIRBasicBlock()) ==
-          UncountableExitingBlock)
-        ExitIRI->swapOperands();
-
+    if (!EarlyExitVPBB->getSinglePredecessor()) {
       // The first of two operands corresponds to the latch exit, via MiddleVPBB
       // predecessor. Extract its last lane.
       ExitIRI->extractLastLaneOfFirstOperand(MiddleBuilder);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index 7a05816f2e2da..adb984fc56bac 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -69,7 +69,8 @@ struct VPlanTransforms {
                                       PredicatedScalarEvolution &PSE,
                                       bool RequiresScalarEpilogueCheck,
                                       bool TailFolded, Loop *TheLoop,
-                                      DebugLoc IVDL);
+                                      DebugLoc IVDL, bool HandleUncountableExit,
+                                      VFRange &Range);
 
   /// Replace loops in \p Plan's flat CFG with VPRegionBlocks, turning \p Plan's
   /// flat CFG into a hierarchical CFG.
@@ -179,9 +180,8 @@ struct VPlanTransforms {
   ///    exit conditions
   ///  * splitting the original middle block to branch to the early exit block
   ///    if taken.
-  static void handleUncountableEarlyExit(VPlan &Plan, Loop *OrigLoop,
-                                         BasicBlock *UncountableExitingBlock,
-                                         VPRecipeBuilder &RecipeBuilder,
+  static void handleUncountableEarlyExit(VPlan &Plan, VPBasicBlock *HeaderVPBB,
+                                         VPBasicBlock *LatchVPBB,
                                          VFRange &Range);
 
   /// Lower abstract recipes to concrete ones, that can be codegen'd. Use \p
diff --git a/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h b/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h
index bf67a5596b270..15e21972840f6 100644
--- a/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h
+++ b/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h
@@ -13,6 +13,7 @@
 #define LLVM_UNITTESTS_TRANSFORMS_VECTORIZE_VPLANTESTBASE_H
 
 #include "../lib/Transforms/Vectorize/VPlan.h"
+#include "../lib/Transforms/Vectorize/VPlanHelpers.h"
 #include "../lib/Transforms/Vectorize/VPlanTransforms.h"
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/BasicAliasAnalysis.h"
@@ -72,8 +73,9 @@ class VPlanTestIRBase : public testing::Test {
     PredicatedScalarEvolution PSE(*SE, *L);
     DenseMap<VPBlockBase *, BasicBlock *> VPB2IRBB;
     auto Plan = VPlanTransforms::buildPlainCFG(L, *LI, VPB2IRBB);
+    VFRange R(ElementCount::getFixed(1), ElementCount::getFixed(2));
     VPlanTransforms::prepareForVectorization(*Plan, IntegerType::get(*Ctx, 64),
-                                             PSE, true, false, L, {});
+                                             PSE, true, false, L, {}, false, R);
     VPlanTransforms::createLoopRegions(*Plan);
     return Plan;
   }

>From 76c470a914b38e32f7c40b234c23083911db68cc Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Thu, 8 May 2025 20:11:33 +0100
Subject: [PATCH 2/4] !fixup address comments, thanks!

---
 .../Vectorize/VPlanConstruction.cpp           | 40 +++++++----
 .../Transforms/Vectorize/VPlanTransforms.cpp  | 72 ++++++-------------
 .../Transforms/Vectorize/VPlanTransforms.h    |  8 ++-
 3 files changed, 54 insertions(+), 66 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
index 73420b406b8e3..4270564fccec0 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
@@ -463,7 +463,7 @@ static void addCanonicalIVRecipes(VPlan &Plan, VPBasicBlock *HeaderVPBB,
 void VPlanTransforms::prepareForVectorization(
     VPlan &Plan, Type *InductionTy, PredicatedScalarEvolution &PSE,
     bool RequiresScalarEpilogueCheck, bool TailFolded, Loop *TheLoop,
-    DebugLoc IVDL, bool HandleUncountableExit, VFRange &Range) {
+    DebugLoc IVDL, bool HasUncountableEarlyExit, VFRange &Range) {
   VPDominatorTree VPDT;
   VPDT.recalculate(Plan);
 
@@ -490,23 +490,35 @@ void VPlanTransforms::prepareForVectorization(
   addCanonicalIVRecipes(Plan, cast<VPBasicBlock>(HeaderVPB),
                         cast<VPBasicBlock>(LatchVPB), InductionTy, IVDL);
 
-  if (HandleUncountableExit) {
-    // Convert VPlans with early exits to a form only exiting via the latch
-    // here, including adjusting the exit condition.
-    handleUncountableEarlyExit(Plan, cast<VPBasicBlock>(HeaderVPB),
-                               cast<VPBasicBlock>(LatchVPB), Range);
-  } else {
-    // Disconnect all edges to exit blocks other than from the middle block.
-    for (VPBlockBase *EB : to_vector(Plan.getExitBlocks())) {
-      for (VPBlockBase *Pred : to_vector(EB->getPredecessors())) {
-        if (Pred == MiddleVPBB)
-          continue;
-        cast<VPBasicBlock>(Pred)->getTerminator()->eraseFromParent();
-        VPBlockUtils::disconnectBlocks(Pred, EB);
+  [[maybe_unused]] bool HandledUncountableEarlyExit = false;
+  for (VPIRBasicBlock *EB : Plan.getExitBlocks()) {
+    for (VPBlockBase *Pred : to_vector(EB->getPredecessors())) {
+      if (Pred == MiddleVPBB)
+        continue;
+
+      if (HasUncountableEarlyExit) {
+        assert(!HandledUncountableEarlyExit &&
+               "can handle exactly one uncountable early exit");
+        // Convert VPlans with early exits to a form exiting only via the latch
+        // here, including adjusting the exit condition of the latch.
+        handleUncountableEarlyExit(cast<VPBasicBlock>(Pred), EB, Plan,
+                                   cast<VPBasicBlock>(HeaderVPB),
+                                   cast<VPBasicBlock>(LatchVPB), Range);
+        HandledUncountableEarlyExit = true;
+        continue;
       }
+
+      // Otherwise all early exits must be countable and we require at least one
+      // iteration in the scalar epilogue. Disconnect all edges to exit blocks
+      // other than from the middle block.
+      cast<VPBasicBlock>(Pred)->getTerminator()->eraseFromParent();
+      VPBlockUtils::disconnectBlocks(Pred, EB);
     }
   }
 
+  assert((!HasUncountableEarlyExit || HandledUncountableEarlyExit) &&
+         "did not handle uncountable early exit");
+
   // Create SCEV and VPValue for the trip count.
   // We use the symbolic max backedge-taken-count, which works also when
   // vectorizing loops with uncountable early exits.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 3a6c5bc02cdf1..92ff1fd05fc64 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -2458,68 +2458,42 @@ void VPlanTransforms::convertToConcreteRecipes(VPlan &Plan,
     R->eraseFromParent();
 }
 
-void VPlanTransforms::handleUncountableEarlyExit(VPlan &Plan,
+void VPlanTransforms::handleUncountableEarlyExit(VPBasicBlock *EarlyExitingVPBB,
+                                                 VPBasicBlock *EarlyExitVPBB,
+
+                                                 VPlan &Plan,
                                                  VPBasicBlock *HeaderVPBB,
                                                  VPBasicBlock *LatchVPBB,
                                                  VFRange &Range) {
-  // First find the uncountable early exiting block by looking at the
-  // predecessors of the exit blocks.
-  VPBlockBase *MiddleVPBB = LatchVPBB->getSuccessors()[0];
-  VPBasicBlock *EarlyExitingVPBB = nullptr;
-  VPIRBasicBlock *EarlyExitVPBB = nullptr;
-  for (auto *EB : Plan.getExitBlocks()) {
-    for (VPBlockBase *Pred : EB->getPredecessors()) {
-      if (Pred != MiddleVPBB) {
-        EarlyExitingVPBB = cast<VPBasicBlock>(Pred);
-        EarlyExitVPBB = EB;
-        break;
-      }
-    }
-  }
-  assert(EarlyExitVPBB && "Must have a early exiting block.");
-  assert(all_of(Plan.getExitBlocks(),
-                [EarlyExitingVPBB, MiddleVPBB](VPIRBasicBlock *EB) {
-                  return all_of(
-                      EB->getPredecessors(),
-                      [EarlyExitingVPBB, MiddleVPBB](VPBlockBase *Pred) {
-                        return Pred == EarlyExitingVPBB || Pred == MiddleVPBB;
-                      });
-                }) &&
-         "All exit blocks must only have EarlyExitingVPBB or MiddleVPBB as "
-         "predecessors.");
-
-  VPBuilder Builder(LatchVPBB->getTerminator());
-  VPBlockBase *TrueSucc = EarlyExitingVPBB->getSuccessors()[0];
-  VPValue *EarlyExitCond = EarlyExitingVPBB->getTerminator()->getOperand(0);
-  auto *EarlyExitTakenCond = TrueSucc == EarlyExitVPBB
-                                 ? EarlyExitCond
-                                 : Builder.createNot(EarlyExitCond);
+  using namespace llvm::VPlanPatternMatch;
 
+  VPBlockBase *MiddleVPBB = LatchVPBB->getSuccessors()[0];
   if (!EarlyExitVPBB->getSinglePredecessor() &&
       EarlyExitVPBB->getPredecessors()[0] != MiddleVPBB) {
-    for (VPRecipeBase &R : EarlyExitVPBB->phis()) {
-      // Early exit operand should always be last, i.e., 0 if EarlyExitVPBB has
-      // a single predecessor and 1 if it has two.
-      // If EarlyExitVPBB has two predecessors, they are already ordered such
-      // that early exit is second (and latch exit is first), by construction.
-      // But its underlying IRBB (EarlyExitIRBB) may have its predecessors
-      // ordered the other way around, and it is the order of the latter which
-      // corresponds to the order of operands of EarlyExitVPBB's phi recipes.
-      // Therefore, if early exit (UncountableExitingBlock) is the first
-      // predecessor of EarlyExitIRBB, we swap the operands of phi recipes,
-      // thereby bringing them to match EarlyExitVPBB's predecessor order,
-      // with early exit being last (second). Otherwise they already match.
+    // Early exit operand should always be last phi operand. If EarlyExitVPBB
+    // has two predecessors and MiddleVPBB isn't the first, swap the operands of
+    // the phis.
+    for (VPRecipeBase &R : EarlyExitVPBB->phis())
       cast<VPIRPhi>(&R)->swapOperands();
-    }
   }
 
+  VPBuilder Builder(LatchVPBB->getTerminator());
+  VPBlockBase *TrueSucc = EarlyExitingVPBB->getSuccessors()[0];
+  assert(
+      match(EarlyExitingVPBB->getTerminator(), m_BranchOnCond(m_VPValue())) &&
+      "Terminator must be be BranchOnCond");
+  VPValue *CondOfEarlyExitingVPBB =
+      EarlyExitingVPBB->getTerminator()->getOperand(0);
+  auto *CondToEarlyExit = TrueSucc == EarlyExitVPBB
+                              ? CondOfEarlyExitingVPBB
+                              : Builder.createNot(CondOfEarlyExitingVPBB);
   EarlyExitingVPBB->getTerminator()->eraseFromParent();
   VPBlockUtils::disconnectBlocks(EarlyExitingVPBB, EarlyExitVPBB);
 
   // Split the middle block and have it conditionally branch to the early exit
   // block if EarlyExitTaken.
   VPValue *IsEarlyExitTaken =
-      Builder.createNaryOp(VPInstruction::AnyOf, {EarlyExitTakenCond});
+      Builder.createNaryOp(VPInstruction::AnyOf, {CondToEarlyExit});
   VPBasicBlock *NewMiddle = Plan.createVPBasicBlock("middle.split");
   VPBasicBlock *VectorEarlyExitVPBB =
       Plan.createVPBasicBlock("vector.early.exit");
@@ -2537,7 +2511,7 @@ void VPlanTransforms::handleUncountableEarlyExit(VPlan &Plan,
     // Early exit operand should always be last, i.e., 0 if EarlyExitVPBB has
     // a single predecessor and 1 if it has two.
     unsigned EarlyExitIdx = ExitIRI->getNumOperands() - 1;
-    if (!EarlyExitVPBB->getSinglePredecessor()) {
+    if (ExitIRI->getNumOperands() != 1) {
       // The first of two operands corresponds to the latch exit, via MiddleVPBB
       // predecessor. Extract its last lane.
       ExitIRI->extractLastLaneOfFirstOperand(MiddleBuilder);
@@ -2553,7 +2527,7 @@ void VPlanTransforms::handleUncountableEarlyExit(VPlan &Plan,
         LoopVectorizationPlanner::getDecisionAndClampRange(IsVector, Range)) {
       // Update the incoming value from the early exit.
       VPValue *FirstActiveLane = EarlyExitB.createNaryOp(
-          VPInstruction::FirstActiveLane, {EarlyExitTakenCond}, nullptr,
+          VPInstruction::FirstActiveLane, {CondToEarlyExit}, nullptr,
           "first.active.lane");
       IncomingFromEarlyExit = EarlyExitB.createNaryOp(
           Instruction::ExtractElement, {IncomingFromEarlyExit, FirstActiveLane},
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index adb984fc56bac..2992bc56d8ac8 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -174,13 +174,15 @@ struct VPlanTransforms {
   /// Remove dead recipes from \p Plan.
   static void removeDeadRecipes(VPlan &Plan);
 
-  /// Update \p Plan to account for the uncountable early exit block in \p
-  /// UncountableExitingBlock by
+  /// Update \p Plan to account for the uncountable early exit from \p
+  /// EarlyExitingVPBB to \p EarlyExitVPBB by
   ///  * updating the condition exiting the vector loop to include the early
   ///    exit conditions
   ///  * splitting the original middle block to branch to the early exit block
   ///    if taken.
-  static void handleUncountableEarlyExit(VPlan &Plan, VPBasicBlock *HeaderVPBB,
+  static void handleUncountableEarlyExit(VPBasicBlock *EarlyExitingVPBB,
+                                         VPBasicBlock *EarlyExitVPBB,
+                                         VPlan &Plan, VPBasicBlock *HeaderVPBB,
                                          VPBasicBlock *LatchVPBB,
                                          VFRange &Range);
 

>From 56d576a99a564b4a1c0dfe0d61d2f19d1d24888e Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Fri, 9 May 2025 16:16:40 +0100
Subject: [PATCH 3/4] !fixup address comments, thanks

---
 llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp | 3 +++
 llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp   | 3 +++
 2 files changed, 6 insertions(+)

diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
index 4270564fccec0..3a2dc791b024d 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
@@ -491,6 +491,9 @@ void VPlanTransforms::prepareForVectorization(
                         cast<VPBasicBlock>(LatchVPB), InductionTy, IVDL);
 
   [[maybe_unused]] bool HandledUncountableEarlyExit = false;
+  // Handle the remaining early exits, either by converting the plan to one only
+  // exiting via the latch or by disconnecting all early exiting edges and
+  // requiring a scalar epilogue.
   for (VPIRBasicBlock *EB : Plan.getExitBlocks()) {
     for (VPBlockBase *Pred : to_vector(EB->getPredecessors())) {
       if (Pred == MiddleVPBB)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 20041fb1194b1..7ec4faee08f62 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -2472,6 +2472,9 @@ void VPlanTransforms::handleUncountableEarlyExit(VPBasicBlock *EarlyExitingVPBB,
   VPBlockBase *MiddleVPBB = LatchVPBB->getSuccessors()[0];
   if (!EarlyExitVPBB->getSinglePredecessor() &&
       EarlyExitVPBB->getPredecessors()[0] != MiddleVPBB) {
+    assert(EarlyExitVPBB->getNumPredecessors() == 2 &&
+           EarlyExitVPBB->getPredecessors()[1] == MiddleVPBB &&
+           "unsupported earl exit VPBB");
     // Early exit operand should always be last phi operand. If EarlyExitVPBB
     // has two predecessors and MiddleVPBB isn't the first, swap the operands of
     // the phis.

>From 2289a5e0d01b26c0818000001604ca316efe3e3e Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Fri, 9 May 2025 19:49:17 +0100
Subject: [PATCH 4/4] !fixup address latest comments, thanks

---
 .../Transforms/Vectorize/VPlanConstruction.cpp  | 17 ++++++-----------
 .../Transforms/Vectorize/VPlanTransforms.cpp    | 15 ++++++---------
 llvm/lib/Transforms/Vectorize/VPlanTransforms.h |  6 +++---
 3 files changed, 15 insertions(+), 23 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
index 3a2dc791b024d..b924b14035261 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
@@ -491,36 +491,31 @@ void VPlanTransforms::prepareForVectorization(
                         cast<VPBasicBlock>(LatchVPB), InductionTy, IVDL);
 
   [[maybe_unused]] bool HandledUncountableEarlyExit = false;
-  // Handle the remaining early exits, either by converting the plan to one only
-  // exiting via the latch or by disconnecting all early exiting edges and
-  // requiring a scalar epilogue.
+  // Disconnect all early exits from the loop leaving it with a single exit from
+  // the latch. Early exits that are countable are left for a scalar epilog. The
+  // condition of uncountable early exits (currently at most one is supported)
+  // is fused into the latch exit, and used to branch from middle block to the
+  // early exit destination.
   for (VPIRBasicBlock *EB : Plan.getExitBlocks()) {
     for (VPBlockBase *Pred : to_vector(EB->getPredecessors())) {
       if (Pred == MiddleVPBB)
         continue;
-
       if (HasUncountableEarlyExit) {
         assert(!HandledUncountableEarlyExit &&
                "can handle exactly one uncountable early exit");
-        // Convert VPlans with early exits to a form exiting only via the latch
-        // here, including adjusting the exit condition of the latch.
         handleUncountableEarlyExit(cast<VPBasicBlock>(Pred), EB, Plan,
                                    cast<VPBasicBlock>(HeaderVPB),
                                    cast<VPBasicBlock>(LatchVPB), Range);
         HandledUncountableEarlyExit = true;
-        continue;
       }
 
-      // Otherwise all early exits must be countable and we require at least one
-      // iteration in the scalar epilogue. Disconnect all edges to exit blocks
-      // other than from the middle block.
       cast<VPBasicBlock>(Pred)->getTerminator()->eraseFromParent();
       VPBlockUtils::disconnectBlocks(Pred, EB);
     }
   }
 
   assert((!HasUncountableEarlyExit || HandledUncountableEarlyExit) &&
-         "did not handle uncountable early exit");
+         "missed an uncountable exit that must be handled");
 
   // Create SCEV and VPValue for the trip count.
   // We use the symbolic max backedge-taken-count, which works also when
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 7ec4faee08f62..c195efe86c806 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -2462,7 +2462,6 @@ void VPlanTransforms::convertToConcreteRecipes(VPlan &Plan,
 
 void VPlanTransforms::handleUncountableEarlyExit(VPBasicBlock *EarlyExitingVPBB,
                                                  VPBasicBlock *EarlyExitVPBB,
-
                                                  VPlan &Plan,
                                                  VPBasicBlock *HeaderVPBB,
                                                  VPBasicBlock *LatchVPBB,
@@ -2471,13 +2470,13 @@ void VPlanTransforms::handleUncountableEarlyExit(VPBasicBlock *EarlyExitingVPBB,
 
   VPBlockBase *MiddleVPBB = LatchVPBB->getSuccessors()[0];
   if (!EarlyExitVPBB->getSinglePredecessor() &&
-      EarlyExitVPBB->getPredecessors()[0] != MiddleVPBB) {
+      EarlyExitVPBB->getPredecessors()[1] == MiddleVPBB) {
     assert(EarlyExitVPBB->getNumPredecessors() == 2 &&
-           EarlyExitVPBB->getPredecessors()[1] == MiddleVPBB &&
-           "unsupported earl exit VPBB");
+           EarlyExitVPBB->getPredecessors()[0] == EarlyExitingVPBB &&
+           "unsupported early exit VPBB");
     // Early exit operand should always be last phi operand. If EarlyExitVPBB
-    // has two predecessors and MiddleVPBB isn't the first, swap the operands of
-    // the phis.
+    // has two predecessors and EarlyExitingVPBB is the first, swap the operands
+    // of the phis.
     for (VPRecipeBase &R : EarlyExitVPBB->phis())
       cast<VPIRPhi>(&R)->swapOperands();
   }
@@ -2492,11 +2491,9 @@ void VPlanTransforms::handleUncountableEarlyExit(VPBasicBlock *EarlyExitingVPBB,
   auto *CondToEarlyExit = TrueSucc == EarlyExitVPBB
                               ? CondOfEarlyExitingVPBB
                               : Builder.createNot(CondOfEarlyExitingVPBB);
-  EarlyExitingVPBB->getTerminator()->eraseFromParent();
-  VPBlockUtils::disconnectBlocks(EarlyExitingVPBB, EarlyExitVPBB);
 
   // Split the middle block and have it conditionally branch to the early exit
-  // block if EarlyExitTaken.
+  // block if CondToEarlyExit.
   VPValue *IsEarlyExitTaken =
       Builder.createNaryOp(VPInstruction::AnyOf, {CondToEarlyExit});
   VPBasicBlock *NewMiddle = Plan.createVPBasicBlock("middle.split");
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index 2992bc56d8ac8..530e06d983e23 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -69,7 +69,7 @@ struct VPlanTransforms {
                                       PredicatedScalarEvolution &PSE,
                                       bool RequiresScalarEpilogueCheck,
                                       bool TailFolded, Loop *TheLoop,
-                                      DebugLoc IVDL, bool HandleUncountableExit,
+                                      DebugLoc IVDL, bool HasUncountableExit,
                                       VFRange &Range);
 
   /// Replace loops in \p Plan's flat CFG with VPRegionBlocks, turning \p Plan's
@@ -177,9 +177,9 @@ struct VPlanTransforms {
   /// Update \p Plan to account for the uncountable early exit from \p
   /// EarlyExitingVPBB to \p EarlyExitVPBB by
   ///  * updating the condition exiting the vector loop to include the early
-  ///    exit conditions
+  ///    exit condition,
   ///  * splitting the original middle block to branch to the early exit block
-  ///    if taken.
+  ///    conditionally - according to the early exit condition.
   static void handleUncountableEarlyExit(VPBasicBlock *EarlyExitingVPBB,
                                          VPBasicBlock *EarlyExitVPBB,
                                          VPlan &Plan, VPBasicBlock *HeaderVPBB,



More information about the llvm-commits mailing list