[llvm] 2f55123 - [VPlan] Handle early exit before forming regions. (NFC) (#138393)
via llvm-commits
llvm-commits at lists.llvm.org
Mon May 12 04:53:23 PDT 2025
Author: Florian Hahn
Date: 2025-05-12T12:53:20+01:00
New Revision: 2f55123cbb52a4c845a8d9ff837d8be90968175c
URL: https://github.com/llvm/llvm-project/commit/2f55123cbb52a4c845a8d9ff837d8be90968175c
DIFF: https://github.com/llvm/llvm-project/commit/2f55123cbb52a4c845a8d9ff837d8be90968175c.diff
LOG: [VPlan] Handle early exit before forming regions. (NFC) (#138393)
Move early-exit handling up front to original VPlan construction, before
introducing early exits.
This builds on https://github.com/llvm/llvm-project/pull/137709, which
adds exiting edges to the original VPlan, instead of adding exit blocks
later.
This retains the exit conditions early, and means we can handle early
exits before forming regions, without the reliance on VPRecipeBuilder.
Once we retain all exits initially, handling early exits before region
construction ensures the regions are valid; otherwise we would leave
edges exiting the region from elsewhere than the latch.
Removing the reliance on VPRecipeBuilder removes the dependence on
mapping IR BBs to VPBBs and unblocks predication as VPlan transform:
https://github.com/llvm/llvm-project/pull/128420.
Depends on https://github.com/llvm/llvm-project/pull/137709 (included in
PR).
PR: https://github.com/llvm/llvm-project/pull/138393
Added:
Modified:
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
llvm/lib/Transforms/Vectorize/VPlanTransforms.h
llvm/unittests/Transforms/Vectorize/VPlanTestBase.h
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index cc58d014b5df9..79474b5bf7f61 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -9383,7 +9383,8 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range,
VPlanTransforms::prepareForVectorization(
*Plan, Legal->getWidestInductionType(), PSE, RequiresScalarEpilogueCheck,
CM.foldTailByMasking(), OrigLoop,
- getDebugLocFromInstOrOperands(Legal->getPrimaryInduction()));
+ getDebugLocFromInstOrOperands(Legal->getPrimaryInduction()),
+ Legal->hasUncountableEarlyExit(), Range);
VPlanTransforms::createLoopRegions(*Plan);
// Don't use getDecisionAndClampRange here, because we don't know the UF
@@ -9584,12 +9585,6 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range,
R->setOperand(1, WideIV->getStepValue());
}
- if (auto *UncountableExitingBlock =
- Legal->getUncountableEarlyExitingBlock()) {
- VPlanTransforms::runPass(VPlanTransforms::handleUncountableEarlyExit, *Plan,
- OrigLoop, UncountableExitingBlock, RecipeBuilder,
- Range);
- }
DenseMap<VPValue *, VPValue *> IVEndValues;
addScalarResumePhis(RecipeBuilder, *Plan, IVEndValues);
SetVector<VPIRInstruction *> ExitUsersToFix =
@@ -9687,7 +9682,8 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlan(VFRange &Range) {
auto Plan = VPlanTransforms::buildPlainCFG(OrigLoop, *LI, VPB2IRBB);
VPlanTransforms::prepareForVectorization(
*Plan, Legal->getWidestInductionType(), PSE, true, false, OrigLoop,
- getDebugLocFromInstOrOperands(Legal->getPrimaryInduction()));
+ getDebugLocFromInstOrOperands(Legal->getPrimaryInduction()), false,
+ Range);
VPlanTransforms::createLoopRegions(*Plan);
for (ElementCount VF : Range)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
index 35e5415a6d4e3..287bc93ce496a 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
@@ -460,11 +460,10 @@ static void addCanonicalIVRecipes(VPlan &Plan, VPBasicBlock *HeaderVPBB,
{CanonicalIVIncrement, &Plan.getVectorTripCount()}, DL);
}
-void VPlanTransforms::prepareForVectorization(VPlan &Plan, Type *InductionTy,
- PredicatedScalarEvolution &PSE,
- bool RequiresScalarEpilogueCheck,
- bool TailFolded, Loop *TheLoop,
- DebugLoc IVDL) {
+void VPlanTransforms::prepareForVectorization(
+ VPlan &Plan, Type *InductionTy, PredicatedScalarEvolution &PSE,
+ bool RequiresScalarEpilogueCheck, bool TailFolded, Loop *TheLoop,
+ DebugLoc IVDL, bool HasUncountableEarlyExit, VFRange &Range) {
VPDominatorTree VPDT;
VPDT.recalculate(Plan);
@@ -491,19 +490,33 @@ void VPlanTransforms::prepareForVectorization(VPlan &Plan, Type *InductionTy,
addCanonicalIVRecipes(Plan, cast<VPBasicBlock>(HeaderVPB),
cast<VPBasicBlock>(LatchVPB), InductionTy, IVDL);
- // Disconnect all edges to exit blocks other than from the middle block.
- // TODO: VPlans with early exits should be explicitly converted to a form
- // exiting only via the latch here, including adjusting the exit condition,
- // instead of simply disconnecting the edges and adjusting the VPlan later.
- for (VPBlockBase *EB : Plan.getExitBlocks()) {
+ [[maybe_unused]] bool HandledUncountableEarlyExit = false;
+ // Disconnect all early exits from the loop leaving it with a single exit from
+ // the latch. Early exits that are countable are left for a scalar epilog. The
+ // condition of uncountable early exits (currently at most one is supported)
+ // is fused into the latch exit, and used to branch from middle block to the
+ // early exit destination.
+ for (VPIRBasicBlock *EB : Plan.getExitBlocks()) {
for (VPBlockBase *Pred : to_vector(EB->getPredecessors())) {
if (Pred == MiddleVPBB)
continue;
+ if (HasUncountableEarlyExit) {
+ assert(!HandledUncountableEarlyExit &&
+ "can handle exactly one uncountable early exit");
+ handleUncountableEarlyExit(cast<VPBasicBlock>(Pred), EB, Plan,
+ cast<VPBasicBlock>(HeaderVPB),
+ cast<VPBasicBlock>(LatchVPB), Range);
+ HandledUncountableEarlyExit = true;
+ }
+
cast<VPBasicBlock>(Pred)->getTerminator()->eraseFromParent();
VPBlockUtils::disconnectBlocks(Pred, EB);
}
}
+ assert((!HasUncountableEarlyExit || HandledUncountableEarlyExit) &&
+ "missed an uncountable exit that must be handled");
+
// Create SCEV and VPValue for the trip count.
// We use the symbolic max backedge-taken-count, which works also when
// vectorizing loops with uncountable early exits.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index b10b47cc1282a..806c20ef8cf73 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -2461,63 +2461,56 @@ void VPlanTransforms::convertToConcreteRecipes(VPlan &Plan,
}
void VPlanTransforms::handleUncountableEarlyExit(
- VPlan &Plan, Loop *OrigLoop, BasicBlock *UncountableExitingBlock,
- VPRecipeBuilder &RecipeBuilder, VFRange &Range) {
- VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
- auto *LatchVPBB = cast<VPBasicBlock>(LoopRegion->getExiting());
- VPBuilder Builder(LatchVPBB->getTerminator());
- auto *MiddleVPBB = Plan.getMiddleBlock();
- VPValue *IsEarlyExitTaken = nullptr;
-
- // Process the uncountable exiting block. Update IsEarlyExitTaken, which
- // tracks if the uncountable early exit has been taken. Also split the middle
- // block and have it conditionally branch to the early exit block if
- // EarlyExitTaken.
- auto *EarlyExitingBranch =
- cast<BranchInst>(UncountableExitingBlock->getTerminator());
- BasicBlock *TrueSucc = EarlyExitingBranch->getSuccessor(0);
- BasicBlock *FalseSucc = EarlyExitingBranch->getSuccessor(1);
- BasicBlock *EarlyExitIRBB =
- !OrigLoop->contains(TrueSucc) ? TrueSucc : FalseSucc;
- VPIRBasicBlock *VPEarlyExitBlock = Plan.getExitBlock(EarlyExitIRBB);
-
- VPValue *EarlyExitNotTakenCond = RecipeBuilder.getBlockInMask(
- OrigLoop->contains(TrueSucc) ? TrueSucc : FalseSucc);
- auto *EarlyExitTakenCond = Builder.createNot(EarlyExitNotTakenCond);
- IsEarlyExitTaken =
- Builder.createNaryOp(VPInstruction::AnyOf, {EarlyExitTakenCond});
+ VPBasicBlock *EarlyExitingVPBB, VPBasicBlock *EarlyExitVPBB, VPlan &Plan,
+ VPBasicBlock *HeaderVPBB, VPBasicBlock *LatchVPBB, VFRange &Range) {
+ using namespace llvm::VPlanPatternMatch;
+ VPBlockBase *MiddleVPBB = LatchVPBB->getSuccessors()[0];
+ if (!EarlyExitVPBB->getSinglePredecessor() &&
+ EarlyExitVPBB->getPredecessors()[1] == MiddleVPBB) {
+ assert(EarlyExitVPBB->getNumPredecessors() == 2 &&
+ EarlyExitVPBB->getPredecessors()[0] == EarlyExitingVPBB &&
+ "unsupported early exit VPBB");
+ // Early exit operand should always be last phi operand. If EarlyExitVPBB
+ // has two predecessors and EarlyExitingVPBB is the first, swap the operands
+ // of the phis.
+ for (VPRecipeBase &R : EarlyExitVPBB->phis())
+ cast<VPIRPhi>(&R)->swapOperands();
+ }
+
+ VPBuilder Builder(LatchVPBB->getTerminator());
+ VPBlockBase *TrueSucc = EarlyExitingVPBB->getSuccessors()[0];
+ assert(
+ match(EarlyExitingVPBB->getTerminator(), m_BranchOnCond(m_VPValue())) &&
+ "Terminator must be be BranchOnCond");
+ VPValue *CondOfEarlyExitingVPBB =
+ EarlyExitingVPBB->getTerminator()->getOperand(0);
+ auto *CondToEarlyExit = TrueSucc == EarlyExitVPBB
+ ? CondOfEarlyExitingVPBB
+ : Builder.createNot(CondOfEarlyExitingVPBB);
+
+ // Split the middle block and have it conditionally branch to the early exit
+ // block if CondToEarlyExit.
+ VPValue *IsEarlyExitTaken =
+ Builder.createNaryOp(VPInstruction::AnyOf, {CondToEarlyExit});
VPBasicBlock *NewMiddle = Plan.createVPBasicBlock("middle.split");
VPBasicBlock *VectorEarlyExitVPBB =
Plan.createVPBasicBlock("vector.early.exit");
- VPBlockUtils::insertOnEdge(LoopRegion, MiddleVPBB, NewMiddle);
+ VPBlockUtils::insertOnEdge(LatchVPBB, MiddleVPBB, NewMiddle);
VPBlockUtils::connectBlocks(NewMiddle, VectorEarlyExitVPBB);
NewMiddle->swapSuccessors();
- VPBlockUtils::connectBlocks(VectorEarlyExitVPBB, VPEarlyExitBlock);
+ VPBlockUtils::connectBlocks(VectorEarlyExitVPBB, EarlyExitVPBB);
// Update the exit phis in the early exit block.
VPBuilder MiddleBuilder(NewMiddle);
VPBuilder EarlyExitB(VectorEarlyExitVPBB);
- for (VPRecipeBase &R : VPEarlyExitBlock->phis()) {
+ for (VPRecipeBase &R : EarlyExitVPBB->phis()) {
auto *ExitIRI = cast<VPIRPhi>(&R);
- // Early exit operand should always be last, i.e., 0 if VPEarlyExitBlock has
+ // Early exit operand should always be last, i.e., 0 if EarlyExitVPBB has
// a single predecessor and 1 if it has two.
unsigned EarlyExitIdx = ExitIRI->getNumOperands() - 1;
- if (!VPEarlyExitBlock->getSinglePredecessor()) {
- // If VPEarlyExitBlock has two predecessors, they are already ordered such
- // that early exit is second (and latch exit is first), by construction.
- // But its underlying IRBB (EarlyExitIRBB) may have its predecessors
- // ordered the other way around, and it is the order of the latter which
- // corresponds to the order of operands of VPEarlyExitBlock's phi recipes.
- // Therefore, if early exit (UncountableExitingBlock) is the first
- // predecessor of EarlyExitIRBB, we swap the operands of phi recipes,
- // thereby bringing them to match VPEarlyExitBlock's predecessor order,
- // with early exit being last (second). Otherwise they already match.
- if (*pred_begin(VPEarlyExitBlock->getIRBasicBlock()) ==
- UncountableExitingBlock)
- ExitIRI->swapOperands();
-
+ if (ExitIRI->getNumOperands() != 1) {
// The first of two operands corresponds to the latch exit, via MiddleVPBB
// predecessor. Extract its last lane.
ExitIRI->extractLastLaneOfFirstOperand(MiddleBuilder);
@@ -2533,7 +2526,7 @@ void VPlanTransforms::handleUncountableEarlyExit(
LoopVectorizationPlanner::getDecisionAndClampRange(IsVector, Range)) {
// Update the incoming value from the early exit.
VPValue *FirstActiveLane = EarlyExitB.createNaryOp(
- VPInstruction::FirstActiveLane, {EarlyExitTakenCond}, nullptr,
+ VPInstruction::FirstActiveLane, {CondToEarlyExit}, nullptr,
"first.active.lane");
IncomingFromEarlyExit = EarlyExitB.createNaryOp(
Instruction::ExtractElement, {IncomingFromEarlyExit, FirstActiveLane},
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index cb127d37661c7..d284d916633c8 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -69,7 +69,8 @@ struct VPlanTransforms {
PredicatedScalarEvolution &PSE,
bool RequiresScalarEpilogueCheck,
bool TailFolded, Loop *TheLoop,
- DebugLoc IVDL);
+ DebugLoc IVDL, bool HasUncountableExit,
+ VFRange &Range);
/// Replace loops in \p Plan's flat CFG with VPRegionBlocks, turning \p Plan's
/// flat CFG into a hierarchical CFG.
@@ -173,15 +174,16 @@ struct VPlanTransforms {
/// Remove dead recipes from \p Plan.
static void removeDeadRecipes(VPlan &Plan);
- /// Update \p Plan to account for the uncountable early exit block in \p
- /// UncountableExitingBlock by
- /// * updating the condition exiting the vector loop to include the early
- /// exit conditions
+ /// Update \p Plan to account for the uncountable early exit from \p
+ /// EarlyExitingVPBB to \p EarlyExitVPBB by
+ /// * updating the condition exiting the loop via the latch to include the
+ /// early exit condition,
/// * splitting the original middle block to branch to the early exit block
- /// if taken.
- static void handleUncountableEarlyExit(VPlan &Plan, Loop *OrigLoop,
- BasicBlock *UncountableExitingBlock,
- VPRecipeBuilder &RecipeBuilder,
+ /// conditionally - according to the early exit condition.
+ static void handleUncountableEarlyExit(VPBasicBlock *EarlyExitingVPBB,
+ VPBasicBlock *EarlyExitVPBB,
+ VPlan &Plan, VPBasicBlock *HeaderVPBB,
+ VPBasicBlock *LatchVPBB,
VFRange &Range);
/// Lower abstract recipes to concrete ones, that can be codegen'd. Use \p
diff --git a/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h b/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h
index d9bd413bcc186..2a15e907e5fa5 100644
--- a/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h
+++ b/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h
@@ -13,6 +13,7 @@
#define LLVM_UNITTESTS_TRANSFORMS_VECTORIZE_VPLANTESTBASE_H
#include "../lib/Transforms/Vectorize/VPlan.h"
+#include "../lib/Transforms/Vectorize/VPlanHelpers.h"
#include "../lib/Transforms/Vectorize/VPlanTransforms.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
@@ -72,8 +73,9 @@ class VPlanTestIRBase : public testing::Test {
PredicatedScalarEvolution PSE(*SE, *L);
DenseMap<const VPBlockBase *, BasicBlock *> VPB2IRBB;
auto Plan = VPlanTransforms::buildPlainCFG(L, *LI, VPB2IRBB);
+ VFRange R(ElementCount::getFixed(1), ElementCount::getFixed(2));
VPlanTransforms::prepareForVectorization(*Plan, IntegerType::get(*Ctx, 64),
- PSE, true, false, L, {});
+ PSE, true, false, L, {}, false, R);
VPlanTransforms::createLoopRegions(*Plan);
return Plan;
}
More information about the llvm-commits
mailing list