[llvm] 0f00a96 - [VPlan] Simplify branch on False in VPlan transform (NFC). (#140409)
via llvm-commits
llvm-commits at lists.llvm.org
Sat May 31 12:32:48 PDT 2025
Author: Florian Hahn
Date: 2025-05-31T20:32:45+01:00
New Revision: 0f00a96fede09ceb7a764c95243a2cf673d0c466
URL: https://github.com/llvm/llvm-project/commit/0f00a96fede09ceb7a764c95243a2cf673d0c466
DIFF: https://github.com/llvm/llvm-project/commit/0f00a96fede09ceb7a764c95243a2cf673d0c466.diff
LOG: [VPlan] Simplify branch on False in VPlan transform (NFC). (#140409)
Simplify branch on false, starting with the branch from the middle block
to the scalar preheader. Initially this helps simplifying the initial
VPlan construction.
Depends on https://github.com/llvm/llvm-project/pull/140405.
PR: https://github.com/llvm/llvm-project/pull/140409
Added:
Modified:
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
llvm/lib/Transforms/Vectorize/VPlan.h
llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
llvm/lib/Transforms/Vectorize/VPlanValue.h
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 9421bd84b3cb8..e9ace195684b3 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -2380,10 +2380,12 @@ void InnerLoopVectorizer::introduceCheckBlockInVPlan(BasicBlock *CheckIRBB) {
// We just connected a new block to the scalar preheader. Update all
// VPPhis by adding an incoming value for it, replicating the last value.
+ unsigned NumPredecessors = ScalarPH->getNumPredecessors();
for (VPRecipeBase &R : cast<VPBasicBlock>(ScalarPH)->phis()) {
- auto *ResumePhi = cast<VPPhi>(&R);
- ResumePhi->addOperand(
- ResumePhi->getOperand(ResumePhi->getNumOperands() - 1));
+ assert(isa<VPPhi>(&R) && "Phi expected to be VPPhi");
+ assert(cast<VPPhi>(&R)->getNumIncoming() == NumPredecessors - 1 &&
+ "must have incoming values for all operands");
+ R.addOperand(R.getOperand(NumPredecessors - 2));
}
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 389a63c8ecf9f..44f0b6d964a6e 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -1136,6 +1136,10 @@ class VPPhiAccessors {
return getAsRecipe()->getNumOperands();
}
+ /// Removes the incoming value for \p IncomingBlock, which must be a
+ /// predecessor.
+ void removeIncomingValueFor(VPBlockBase *IncomingBlock) const;
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the recipe.
void printPhiOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const;
@@ -3545,14 +3549,13 @@ template <> struct CastIsPossible<VPPhiAccessors, const VPRecipeBase *> {
};
/// Support casting from VPRecipeBase -> VPPhiAccessors, by down-casting to the
/// recipe types implementing VPPhiAccessors. Used by cast<>, dyn_cast<> & co.
-template <>
-struct CastInfo<VPPhiAccessors, const VPRecipeBase *>
- : public CastIsPossible<VPPhiAccessors, const VPRecipeBase *> {
+template <typename SrcTy>
+struct CastInfoVPPhiAccessors : public CastIsPossible<VPPhiAccessors, SrcTy> {
- using Self = CastInfo<VPPhiAccessors, const VPRecipeBase *>;
+ using Self = CastInfo<VPPhiAccessors, SrcTy>;
/// doCast is used by cast<>.
- static inline VPPhiAccessors *doCast(const VPRecipeBase *R) {
+ static inline VPPhiAccessors *doCast(SrcTy R) {
return const_cast<VPPhiAccessors *>([R]() -> const VPPhiAccessors * {
switch (R->getVPDefID()) {
case VPDef::VPInstructionSC:
@@ -3568,12 +3571,18 @@ struct CastInfo<VPPhiAccessors, const VPRecipeBase *>
}
/// doCastIfPossible is used by dyn_cast<>.
- static inline VPPhiAccessors *doCastIfPossible(const VPRecipeBase *f) {
+ static inline VPPhiAccessors *doCastIfPossible(SrcTy f) {
if (!Self::isPossible(f))
return nullptr;
return doCast(f);
}
};
+template <>
+struct CastInfo<VPPhiAccessors, VPRecipeBase *>
+ : CastInfoVPPhiAccessors<VPRecipeBase *> {};
+template <>
+struct CastInfo<VPPhiAccessors, const VPRecipeBase *>
+ : CastInfoVPPhiAccessors<const VPRecipeBase *> {};
/// VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph. It
/// holds a sequence of zero or more VPRecipe's each representing a sequence of
diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
index b92ea757060cb..593e5063802ba 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
@@ -501,8 +501,10 @@ void VPlanTransforms::prepareForVectorization(
cast<VPBasicBlock>(HeaderVPB),
cast<VPBasicBlock>(LatchVPB), Range);
HandledUncountableEarlyExit = true;
+ } else {
+ for (VPRecipeBase &R : EB->phis())
+ cast<VPIRPhi>(&R)->removeIncomingValueFor(Pred);
}
-
cast<VPBasicBlock>(Pred)->getTerminator()->eraseFromParent();
VPBlockUtils::disconnectBlocks(Pred, EB);
}
@@ -526,32 +528,6 @@ void VPlanTransforms::prepareForVectorization(
VPBasicBlock *ScalarPH = Plan.createVPBasicBlock("scalar.ph");
VPBlockUtils::connectBlocks(ScalarPH, Plan.getScalarHeader());
- // If needed, add a check in the middle block to see if we have completed
- // all of the iterations in the first vector loop. Three cases:
- // 1) If we require a scalar epilogue, there is no conditional branch as
- // we unconditionally branch to the scalar preheader. Remove the recipes
- // from the exit blocks.
- // 2) If (N - N%VF) == N, then we *don't* need to run the remainder.
- // Thus if tail is to be folded, we know we don't need to run the
- // remainder and we can set the condition to true.
- // 3) Otherwise, construct a runtime check.
-
- if (!RequiresScalarEpilogueCheck) {
- if (auto *LatchExitVPB = MiddleVPBB->getSingleSuccessor())
- VPBlockUtils::disconnectBlocks(MiddleVPBB, LatchExitVPB);
- VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH);
- VPBlockUtils::connectBlocks(Plan.getEntry(), ScalarPH);
- Plan.getEntry()->swapSuccessors();
-
- // The exit blocks are unreachable, remove their recipes to make sure no
- // users remain that may pessimize transforms.
- for (auto *EB : Plan.getExitBlocks()) {
- for (VPRecipeBase &R : make_early_inc_range(*EB))
- R.eraseFromParent();
- }
- return;
- }
-
// The connection order corresponds to the operands of the conditional branch,
// with the middle block already connected to the exit block.
VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH);
@@ -561,21 +537,45 @@ void VPlanTransforms::prepareForVectorization(
VPBlockUtils::connectBlocks(Plan.getEntry(), ScalarPH);
Plan.getEntry()->swapSuccessors();
- auto *ScalarLatchTerm = TheLoop->getLoopLatch()->getTerminator();
- // Here we use the same DebugLoc as the scalar loop latch terminator instead
- // of the corresponding compare because they may have ended up with
- //
diff erent line numbers and we want to avoid awkward line stepping while
- // debugging. Eg. if the compare has got a line number inside the loop.
+ // If MiddleVPBB has a single successor then the original loop does not exit
+ // via the latch and the single successor must be the scalar preheader.
+ // There's no need to add a runtime check to MiddleVPBB.
+ if (MiddleVPBB->getNumSuccessors() == 1) {
+ assert(MiddleVPBB->getSingleSuccessor() == ScalarPH &&
+ "must have ScalarPH as single successor");
+ return;
+ }
+
+ assert(MiddleVPBB->getNumSuccessors() == 2 && "must have 2 successors");
+
+ // Add a check in the middle block to see if we have completed all of the
+ // iterations in the first vector loop.
+ //
+ // Three cases:
+ // 1) If we require a scalar epilogue, the scalar ph must execute. Set the
+ // condition to false.
+ // 2) If (N - N%VF) == N, then we *don't* need to run the
+ // remainder. Thus if tail is to be folded, we know we don't need to run
+ // the remainder and we can set the condition to true.
+ // 3) Otherwise, construct a runtime check.
+
+ // We use the same DebugLoc as the scalar loop latch terminator instead of
+ // the corresponding compare because they may have ended up with
diff erent
+ // line numbers and we want to avoid awkward line stepping while debugging.
+ // E.g., if the compare has got a line number inside the loop.
+ DebugLoc LatchDL = TheLoop->getLoopLatch()->getTerminator()->getDebugLoc();
VPBuilder Builder(MiddleVPBB);
- VPValue *Cmp =
- TailFolded
- ? Plan.getOrAddLiveIn(ConstantInt::getTrue(
- IntegerType::getInt1Ty(TripCount->getType()->getContext())))
- : Builder.createICmp(CmpInst::ICMP_EQ, Plan.getTripCount(),
- &Plan.getVectorTripCount(),
- ScalarLatchTerm->getDebugLoc(), "cmp.n");
- Builder.createNaryOp(VPInstruction::BranchOnCond, {Cmp},
- ScalarLatchTerm->getDebugLoc());
+ VPValue *Cmp;
+ if (!RequiresScalarEpilogueCheck)
+ Cmp = Plan.getOrAddLiveIn(ConstantInt::getFalse(
+ IntegerType::getInt1Ty(TripCount->getType()->getContext())));
+ else if (TailFolded)
+ Cmp = Plan.getOrAddLiveIn(ConstantInt::getTrue(
+ IntegerType::getInt1Ty(TripCount->getType()->getContext())));
+ else
+ Cmp = Builder.createICmp(CmpInst::ICMP_EQ, Plan.getTripCount(),
+ &Plan.getVectorTripCount(), LatchDL, "cmp.n");
+ Builder.createNaryOp(VPInstruction::BranchOnCond, {Cmp}, LatchDL);
}
void VPlanTransforms::createLoopRegions(VPlan &Plan) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index ca9876175d3c0..a4831ea7c11f7 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -1185,6 +1185,14 @@ void VPIRPhi::execute(VPTransformState &State) {
State.Builder.SetInsertPoint(Phi->getParent(), std::next(Phi->getIterator()));
}
+void VPPhiAccessors::removeIncomingValueFor(VPBlockBase *IncomingBlock) const {
+ VPRecipeBase *R = const_cast<VPRecipeBase *>(getAsRecipe());
+ assert(R->getNumOperands() == R->getParent()->getNumPredecessors() &&
+ "Number of phi operands must match number of predecessors");
+ unsigned Position = R->getParent()->getIndexForPredecessor(IncomingBlock);
+ R->removeOperand(Position);
+}
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPPhiAccessors::printPhiOperands(raw_ostream &O,
VPSlotTracker &SlotTracker) const {
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 6ea61c43f9528..5c8849be3d23e 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1841,40 +1841,37 @@ void VPlanTransforms::truncateToMinimalBitwidths(
}
}
-/// Remove BranchOnCond recipes with true conditions together with removing
-/// dead edges to their successors.
-static void removeBranchOnCondTrue(VPlan &Plan) {
+/// Remove BranchOnCond recipes with true or false conditions together with
+/// removing dead edges to their successors.
+static void removeBranchOnConst(VPlan &Plan) {
using namespace llvm::VPlanPatternMatch;
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
vp_depth_first_shallow(Plan.getEntry()))) {
+ VPValue *Cond;
if (VPBB->getNumSuccessors() != 2 || VPBB == Plan.getEntry() ||
- !match(&VPBB->back(), m_BranchOnCond(m_True())))
+ !match(&VPBB->back(), m_BranchOnCond(m_VPValue(Cond))))
continue;
- VPBasicBlock *RemovedSucc = cast<VPBasicBlock>(VPBB->getSuccessors()[1]);
- unsigned DeadIdx = RemovedSucc->getIndexForPredecessor(VPBB);
-
- // Values coming from VPBB into ResumePhi recipes of RemoveSucc are removed
- // from these recipes.
- for (VPRecipeBase &R : make_early_inc_range(*RemovedSucc)) {
- assert((!isa<VPIRInstruction>(&R) ||
- !isa<PHINode>(cast<VPIRInstruction>(&R)->getInstruction())) &&
- !isa<VPHeaderPHIRecipe>(&R) &&
- "Cannot update VPIRInstructions wrapping phis or header phis yet");
- auto *VPI = dyn_cast<VPPhi>(&R);
- if (!VPI)
- break;
- VPBuilder B(VPI);
- SmallVector<VPValue *> NewOperands;
- // Create new operand list, with the dead incoming value filtered out.
- for (const auto &[Idx, Op] : enumerate(VPI->operands())) {
- if (Idx == DeadIdx)
- continue;
- NewOperands.push_back(Op);
- }
- VPI->replaceAllUsesWith(
- B.createScalarPhi(NewOperands, VPI->getDebugLoc(), VPI->getName()));
- VPI->eraseFromParent();
+ unsigned RemovedIdx;
+ if (match(Cond, m_True()))
+ RemovedIdx = 1;
+ else if (match(Cond, m_False()))
+ RemovedIdx = 0;
+ else
+ continue;
+
+ VPBasicBlock *RemovedSucc =
+ cast<VPBasicBlock>(VPBB->getSuccessors()[RemovedIdx]);
+ const auto &Preds = RemovedSucc->getPredecessors();
+ assert(count(Preds, VPBB) == 1 &&
+ "There must be a single edge between VPBB and its successor");
+ // Values coming from VPBB into phi recipes of RemoveSucc are removed from
+ // these recipes.
+ for (VPRecipeBase &R : RemovedSucc->phis()) {
+ auto *Phi = cast<VPPhiAccessors>(&R);
+ assert((!isa<VPIRPhi>(&R) || RemovedSucc->getNumPredecessors() == 1) &&
+ "VPIRPhis must have a single predecessor");
+ Phi->removeIncomingValueFor(VPBB);
}
// Disconnect blocks and remove the terminator. RemovedSucc will be deleted
// automatically on VPlan destruction if it becomes unreachable.
@@ -1894,7 +1891,7 @@ void VPlanTransforms::optimize(VPlan &Plan) {
runPass(legalizeAndOptimizeInductions, Plan);
runPass(removeRedundantExpandSCEVRecipes, Plan);
runPass(simplifyRecipes, Plan, *Plan.getCanonicalIV()->getScalarType());
- runPass(removeBranchOnCondTrue, Plan);
+ runPass(removeBranchOnConst, Plan);
runPass(removeDeadRecipes, Plan);
runPass(createAndOptimizeReplicateRegions, Plan);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h
index 01e8bf78ef04c..ad347ed6f50f8 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanValue.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h
@@ -38,6 +38,7 @@ class VPSlotTracker;
class VPUser;
class VPRecipeBase;
class VPInterleaveRecipe;
+class VPPhiAccessors;
// This is the base class of the VPlan Def/Use graph, used for modeling the data
// flow into, within and out of the VPlan. VPValues can stand for live-ins
@@ -199,8 +200,18 @@ raw_ostream &operator<<(raw_ostream &OS, const VPRecipeBase &R);
/// This class augments VPValue with operands which provide the inverse def-use
/// edges from VPValue's users to their defs.
class VPUser {
+ /// Grant access to removeOperand for VPPhiAccessors, the only supported user.
+ friend class VPPhiAccessors;
+
SmallVector<VPValue *, 2> Operands;
+ /// Removes the operand at index \p Idx. This also removes the VPUser from the
+ /// use-list of the operand.
+ void removeOperand(unsigned Idx) {
+ getOperand(Idx)->removeUser(*this);
+ Operands.erase(Operands.begin() + Idx);
+ }
+
protected:
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the operands to \p O.
More information about the llvm-commits
mailing list