[llvm] [LV] Transform to handle exits in the scalar loop (PR #148626)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 14 06:03:00 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: Graham Hunter (huntergr-arm)
<details>
<summary>Changes</summary>
In preparation for supporting stores in early exit loops, this transform replicates the uncounted exit condition in a new check block and adjust the in-loop recipes so that if the next vector iteration would encounter an exit, we instead transfer to the scalar loop.
This can be used with architectures that do not have advanced masking features for state-changing operations (like stores), or in cases where creating multiple mask partitions in the vector loop could be expensive.
---
Patch is 28.61 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/148626.diff
10 Files Affected:
- (modified) llvm/lib/Transforms/Vectorize/LoopVectorize.cpp (+18-6)
- (modified) llvm/lib/Transforms/Vectorize/VPlan.cpp (+2)
- (modified) llvm/lib/Transforms/Vectorize/VPlan.h (+24)
- (modified) llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp (+3)
- (modified) llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h (+31)
- (modified) llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp (+1-1)
- (modified) llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp (+212-43)
- (modified) llvm/lib/Transforms/Vectorize/VPlanTransforms.h (+11)
- (modified) llvm/lib/Transforms/Vectorize/VPlanValue.h (+2)
- (added) llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit_scalar_exits.ll (+79)
``````````diff
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 5380a0fc6498a..368ed47a8e590 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -400,6 +400,10 @@ static cl::opt<bool> EnableEarlyExitVectorization(
cl::desc(
"Enable vectorization of early exit loops with uncountable exits."));
+static cl::opt<bool> HandleEarlyExitsInScalarTail(
+ "handle-early-exits-in-scalar-tail", cl::init(false), cl::Hidden,
+ cl::desc("Use the scalar tail to deal with early exit logic"));
+
// Likelyhood of bypassing the vectorized loop because there are zero trips left
// after prolog. See `emitIterationCountCheck`.
static constexpr uint32_t MinItersBypassWeights[] = {1, 127};
@@ -491,8 +495,8 @@ class InnerLoopVectorizer {
AC(AC), ORE(ORE), VF(VecWidth),
MinProfitableTripCount(MinProfitableTripCount), UF(UnrollFactor),
Builder(PSE.getSE()->getContext()), Cost(CM), BFI(BFI), PSI(PSI),
- RTChecks(RTChecks), Plan(Plan),
- VectorPHVPB(Plan.getVectorLoopRegion()->getSinglePredecessor()) {}
+ RTChecks(RTChecks), Plan(Plan), VectorPHVPB(Plan.getVectorPreheader()) {
+ }
virtual ~InnerLoopVectorizer() = default;
@@ -8322,6 +8326,7 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
auto MaxVFTimes2 = MaxVF * 2;
auto VPlan0 = VPlanTransforms::buildPlainCFG(OrigLoop, *LI);
+ VPlan0->setEarlyExitContinuesInScalarLoop(HandleEarlyExitsInScalarTail);
for (ElementCount VF = MinVF; ElementCount::isKnownLT(VF, MaxVFTimes2);) {
VFRange SubRange = {VF, MaxVFTimes2};
if (auto Plan = tryToBuildVPlanWithVPRecipes(
@@ -8338,6 +8343,14 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
!VPlanTransforms::runPass(VPlanTransforms::tryAddExplicitVectorLength,
*Plan, CM.getMaxSafeElements()))
break;
+ // See if we can convert an early exit vplan to bail out to a scalar
+ // loop if state-changing operations (like stores) are present and
+ // an exit will be taken in the next vector iteration.
+ // If not, discard the plan.
+ if (HandleEarlyExitsInScalarTail && !HasScalarVF &&
+ !VPlanTransforms::runPass(VPlanTransforms::handleExitsInScalarLoop,
+ *Plan))
+ break;
assert(verifyVPlanIsValid(*Plan) && "VPlan is invalid");
VPlans.push_back(std::move(Plan));
}
@@ -8391,8 +8404,7 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan,
auto *ScalarPH = Plan.getScalarPreheader();
auto *MiddleVPBB = cast<VPBasicBlock>(ScalarPH->getPredecessors()[0]);
VPRegionBlock *VectorRegion = Plan.getVectorLoopRegion();
- VPBuilder VectorPHBuilder(
- cast<VPBasicBlock>(VectorRegion->getSinglePredecessor()));
+ VPBuilder VectorPHBuilder(cast<VPBasicBlock>(Plan.getVectorPreheader()));
VPBuilder MiddleBuilder(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
VPBuilder ScalarPHBuilder(ScalarPH);
for (VPRecipeBase &ScalarPhiR : Plan.getScalarHeader()->phis()) {
@@ -8839,8 +8851,7 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
auto CanUseVersionedStride = [&Plan](VPUser &U, unsigned) {
auto *R = cast<VPRecipeBase>(&U);
return R->getParent()->getParent() ||
- R->getParent() ==
- Plan->getVectorLoopRegion()->getSinglePredecessor();
+ R->getParent() == Plan->getVectorPreheader();
};
for (auto [_, Stride] : Legal->getLAI()->getSymbolicStrides()) {
auto *StrideV = cast<SCEVUnknown>(Stride)->getValue();
@@ -8906,6 +8917,7 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlan(VFRange &Range) {
assert(EnableVPlanNativePath && "VPlan-native path is not enabled.");
auto Plan = VPlanTransforms::buildPlainCFG(OrigLoop, *LI);
+ Plan->setEarlyExitContinuesInScalarLoop(HandleEarlyExitsInScalarTail);
VPlanTransforms::prepareForVectorization(
*Plan, Legal->getWidestInductionType(), PSE, true, false, OrigLoop,
getDebugLocFromInstOrOperands(Legal->getPrimaryInduction()), false,
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 40a55656bfa7e..2d966702148e0 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -1291,6 +1291,8 @@ VPlan *VPlan::duplicate() {
NewPlan->ExitBlocks.push_back(cast<VPIRBasicBlock>(VPB));
}
+ NewPlan->setEarlyExitContinuesInScalarLoop(EarlyExitContinuesInScalarLoop);
+
return NewPlan;
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 85741b977bb77..834ede74a5b50 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -3883,6 +3883,14 @@ class VPlan {
/// VPlan is destroyed.
SmallVector<VPBlockBase *> CreatedBlocks;
+ /// The entry block in a vplan, which may be a check block that needs to
+ /// be wired up in the right place with existing check blocks.
+ std::optional<VPBasicBlock *> EarlyExitPreheader;
+
+ /// Indicates that an early exit loop will exit before the condition is
+ /// reached, and that the scalar loop must perform the last few iterations.
+ bool EarlyExitContinuesInScalarLoop = false;
+
/// Construct a VPlan with \p Entry to the plan and with \p ScalarHeader
/// wrapping the original header of the scalar loop.
VPlan(VPBasicBlock *Entry, VPIRBasicBlock *ScalarHeader)
@@ -3929,12 +3937,17 @@ class VPlan {
/// Returns the preheader of the vector loop region, if one exists, or null
/// otherwise.
VPBasicBlock *getVectorPreheader() {
+ if (EarlyExitPreheader)
+ return *EarlyExitPreheader;
VPRegionBlock *VectorRegion = getVectorLoopRegion();
return VectorRegion
? cast<VPBasicBlock>(VectorRegion->getSinglePredecessor())
: nullptr;
}
+ /// Overrides the current vplan preheader block.
+ void setEarlyExitPreheader(VPBasicBlock *BB) { EarlyExitPreheader = BB; }
+
/// Returns the VPRegionBlock of the vector loop.
LLVM_ABI_FOR_TEST VPRegionBlock *getVectorLoopRegion();
LLVM_ABI_FOR_TEST const VPRegionBlock *getVectorLoopRegion() const;
@@ -4187,6 +4200,17 @@ class VPlan {
(ExitBlocks.size() == 1 && ExitBlocks[0]->getNumPredecessors() > 1);
}
+ /// Returns true if the vector iteration containing an exit should be handled
+ /// in the scalar loop instead of by masking.
+ bool shouldEarlyExitContinueInScalarLoop() const {
+ return EarlyExitContinuesInScalarLoop;
+ }
+
+ /// If set to true, early exits should be handled in the scalar loop.
+ void setEarlyExitContinuesInScalarLoop(bool Continues) {
+ EarlyExitContinuesInScalarLoop = Continues;
+ }
+
/// Returns true if the scalar tail may execute after the vector loop. Note
/// that this relies on unneeded branches to the scalar tail loop being
/// removed.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
index 52eecb000d0c2..decf7a01c3ae9 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
@@ -502,6 +502,9 @@ void VPlanTransforms::prepareForVectorization(
cast<VPBasicBlock>(HeaderVPB),
cast<VPBasicBlock>(LatchVPB), Range);
HandledUncountableEarlyExit = true;
+ if (Plan.shouldEarlyExitContinueInScalarLoop())
+ for (VPRecipeBase &R : EB->phis())
+ cast<VPIRPhi>(&R)->removeIncomingValueFor(Pred);
} else {
for (VPRecipeBase &R : EB->phis())
cast<VPIRPhi>(&R)->removeIncomingValueFor(Pred);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
index d133610ef4f75..26fca9ae99cca 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
@@ -632,6 +632,37 @@ m_Intrinsic(const T0 &Op0, const T1 &Op1, const T2 &Op2, const T3 &Op3) {
return m_CombineAnd(m_Intrinsic<IntrID>(Op0, Op1, Op2), m_Argument<3>(Op3));
}
+struct loop_invariant_vpvalue {
+ template <typename ITy> bool match(ITy *V) const {
+ VPValue *Val = dyn_cast<VPValue>(V);
+ return Val && Val->isDefinedOutsideLoopRegions();
+ }
+};
+
+inline loop_invariant_vpvalue m_LoopInvVPValue() {
+ return loop_invariant_vpvalue();
+}
+
+template <typename Op0_t>
+inline UnaryVPInstruction_match<Op0_t, VPInstruction::AnyOf>
+m_AnyOf(const Op0_t &Op0) {
+ return m_VPInstruction<VPInstruction::AnyOf>(Op0);
+}
+
+template <typename SubPattern_t> struct OneUse_match {
+ SubPattern_t SubPattern;
+
+ OneUse_match(const SubPattern_t &SP) : SubPattern(SP) {}
+
+ template <typename OpTy> bool match(OpTy *V) {
+ return V->hasOneUse() && SubPattern.match(V);
+ }
+};
+
+template <typename T> inline OneUse_match<T> m_OneUse(const T &SubPattern) {
+ return SubPattern;
+}
+
} // namespace VPlanPatternMatch
} // namespace llvm
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 3c367664a0988..cf30852394f21 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -285,7 +285,7 @@ bool VPRecipeBase::isPhi() const {
return (getVPDefID() >= VPFirstPHISC && getVPDefID() <= VPLastPHISC) ||
(isa<VPInstruction>(this) &&
cast<VPInstruction>(this)->getOpcode() == Instruction::PHI) ||
- isa<VPIRPhi>(this);
+ isa<VPPhi>(this) || isa<VPIRPhi>(this);
}
bool VPRecipeBase::isScalarCast() const {
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 6a3b3e6e41955..af420911ae6b7 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1490,7 +1490,7 @@ static bool simplifyBranchConditionForVFAndUF(VPlan &Plan, ElementCount BestVF,
HeaderPhiR->eraseFromParent();
}
- VPBlockBase *Preheader = VectorRegion->getSinglePredecessor();
+ VPBlockBase *Preheader = Plan.getVectorPreheader();
VPBlockBase *Exit = VectorRegion->getSingleSuccessor();
VPBlockUtils::disconnectBlocks(Preheader, VectorRegion);
VPBlockUtils::disconnectBlocks(VectorRegion, Exit);
@@ -2612,7 +2612,11 @@ expandVPWidenIntOrFpInduction(VPWidenIntOrFpInductionRecipe *WidenIVR,
}
// If the phi is truncated, truncate the start and step values.
- VPBuilder Builder(Plan->getVectorPreheader());
+ VPBasicBlock *VectorPH = Plan->getVectorPreheader();
+ VPBuilder Builder(VectorPH);
+ if (VPRecipeBase *Br = VectorPH->getTerminator())
+ Builder.setInsertPoint(Br);
+
Type *StepTy = TypeInfo.inferScalarType(Step);
if (Ty->getScalarSizeInBits() < StepTy->getScalarSizeInBits()) {
assert(StepTy->isIntegerTy() && "Truncation requires an integer type");
@@ -2776,8 +2780,9 @@ void VPlanTransforms::handleUncountableEarlyExit(
// Early exit operand should always be last phi operand. If EarlyExitVPBB
// has two predecessors and EarlyExitingVPBB is the first, swap the operands
// of the phis.
- for (VPRecipeBase &R : EarlyExitVPBB->phis())
- cast<VPIRPhi>(&R)->swapOperands();
+ if (!Plan.shouldEarlyExitContinueInScalarLoop())
+ for (VPRecipeBase &R : EarlyExitVPBB->phis())
+ cast<VPIRPhi>(&R)->swapOperands();
}
VPBuilder Builder(LatchVPBB->getTerminator());
@@ -2795,48 +2800,51 @@ void VPlanTransforms::handleUncountableEarlyExit(
// block if CondToEarlyExit.
VPValue *IsEarlyExitTaken =
Builder.createNaryOp(VPInstruction::AnyOf, {CondToEarlyExit});
- VPBasicBlock *NewMiddle = Plan.createVPBasicBlock("middle.split");
- VPBasicBlock *VectorEarlyExitVPBB =
- Plan.createVPBasicBlock("vector.early.exit");
- VPBlockUtils::insertOnEdge(LatchVPBB, MiddleVPBB, NewMiddle);
- VPBlockUtils::connectBlocks(NewMiddle, VectorEarlyExitVPBB);
- NewMiddle->swapSuccessors();
-
- VPBlockUtils::connectBlocks(VectorEarlyExitVPBB, EarlyExitVPBB);
-
- // Update the exit phis in the early exit block.
- VPBuilder MiddleBuilder(NewMiddle);
- VPBuilder EarlyExitB(VectorEarlyExitVPBB);
- for (VPRecipeBase &R : EarlyExitVPBB->phis()) {
- auto *ExitIRI = cast<VPIRPhi>(&R);
- // Early exit operand should always be last, i.e., 0 if EarlyExitVPBB has
- // a single predecessor and 1 if it has two.
- unsigned EarlyExitIdx = ExitIRI->getNumOperands() - 1;
- if (ExitIRI->getNumOperands() != 1) {
- // The first of two operands corresponds to the latch exit, via MiddleVPBB
- // predecessor. Extract its last lane.
- ExitIRI->extractLastLaneOfFirstOperand(MiddleBuilder);
- }
+ if (!Plan.shouldEarlyExitContinueInScalarLoop()) {
+ VPBasicBlock *NewMiddle = Plan.createVPBasicBlock("middle.split");
+ VPBasicBlock *VectorEarlyExitVPBB =
+ Plan.createVPBasicBlock("vector.early.exit");
+ VPBlockUtils::insertOnEdge(LatchVPBB, MiddleVPBB, NewMiddle);
+ VPBlockUtils::connectBlocks(NewMiddle, VectorEarlyExitVPBB);
+ NewMiddle->swapSuccessors();
+
+ VPBlockUtils::connectBlocks(VectorEarlyExitVPBB, EarlyExitVPBB);
+
+ // Update the exit phis in the early exit block.
+ VPBuilder MiddleBuilder(NewMiddle);
+ VPBuilder EarlyExitB(VectorEarlyExitVPBB);
+ for (VPRecipeBase &R : EarlyExitVPBB->phis()) {
+ auto *ExitIRI = cast<VPIRPhi>(&R);
+ // Early exit operand should always be last, i.e., 0 if EarlyExitVPBB has
+ // a single predecessor and 1 if it has two.
+ unsigned EarlyExitIdx = ExitIRI->getNumOperands() - 1;
+ if (ExitIRI->getNumOperands() != 1) {
+ // The first of two operands corresponds to the latch exit, via
+ // MiddleVPBB predecessor. Extract its last lane.
+ ExitIRI->extractLastLaneOfFirstOperand(MiddleBuilder);
+ }
- VPValue *IncomingFromEarlyExit = ExitIRI->getOperand(EarlyExitIdx);
- auto IsVector = [](ElementCount VF) { return VF.isVector(); };
- // When the VFs are vectors, need to add `extract` to get the incoming value
- // from early exit. When the range contains scalar VF, limit the range to
- // scalar VF to prevent mis-compilation for the range containing both scalar
- // and vector VFs.
- if (!IncomingFromEarlyExit->isLiveIn() &&
- LoopVectorizationPlanner::getDecisionAndClampRange(IsVector, Range)) {
- // Update the incoming value from the early exit.
- VPValue *FirstActiveLane = EarlyExitB.createNaryOp(
- VPInstruction::FirstActiveLane, {CondToEarlyExit}, nullptr,
- "first.active.lane");
- IncomingFromEarlyExit = EarlyExitB.createNaryOp(
- Instruction::ExtractElement, {IncomingFromEarlyExit, FirstActiveLane},
- nullptr, "early.exit.value");
- ExitIRI->setOperand(EarlyExitIdx, IncomingFromEarlyExit);
+ VPValue *IncomingFromEarlyExit = ExitIRI->getOperand(EarlyExitIdx);
+ auto IsVector = [](ElementCount VF) { return VF.isVector(); };
+ // When the VFs are vectors, need to add `extract` to get the incoming
+ // value from early exit. When the range contains scalar VF, limit the
+ // range to scalar VF to prevent mis-compilation for the range containing
+ // both scalar and vector VFs.
+ if (!IncomingFromEarlyExit->isLiveIn() &&
+ LoopVectorizationPlanner::getDecisionAndClampRange(IsVector, Range)) {
+ // Update the incoming value from the early exit.
+ VPValue *FirstActiveLane = EarlyExitB.createNaryOp(
+ VPInstruction::FirstActiveLane, {CondToEarlyExit}, nullptr,
+ "first.active.lane");
+ IncomingFromEarlyExit =
+ EarlyExitB.createNaryOp(Instruction::ExtractElement,
+ {IncomingFromEarlyExit, FirstActiveLane},
+ nullptr, "early.exit.value");
+ ExitIRI->setOperand(EarlyExitIdx, IncomingFromEarlyExit);
+ }
}
+ MiddleBuilder.createNaryOp(VPInstruction::BranchOnCond, {IsEarlyExitTaken});
}
- MiddleBuilder.createNaryOp(VPInstruction::BranchOnCond, {IsEarlyExitTaken});
// Replace the condition controlling the non-early exit from the vector loop
// with one exiting if either the original condition of the vector latch is
@@ -2853,6 +2861,167 @@ void VPlanTransforms::handleUncountableEarlyExit(
LatchExitingBranch->eraseFromParent();
}
+bool VPlanTransforms::handleExitsInScalarLoop(VPlan &Plan) {
+ // We can abandon a vplan entirely if we return false here, so we shouldn't
+ // crash if some earlier assumptions on scalar IR don't hold for the vplan
+ // version of the loop.
+ if (Plan.hasScalarVFOnly())
+ return false;
+ auto *Region = Plan.getVectorLoopRegion();
+ using namespace llvm::VPlanPatternMatch;
+ VPCanonicalIVPHIRecipe *IV = Plan.getCanonicalIV();
+ VPInstruction *IVUpdate = dyn_cast<VPInstruction>(IV->getBackedgeValue());
+ if (!IVUpdate)
+ return false;
+
+ // Find the uncounted loop exit condition.
+ VPValue *Uncounted = nullptr;
+ if (!match(Region->getExitingBasicBlock()->getTerminator(),
+ m_BranchOnCond(m_OneUse(m_c_BinaryOr(
+ m_OneUse(m_AnyOf(m_VPValue(Uncounted))), m_VPValue())))))
+ return false;
+
+ // Extract the recipes needed to create the uncountable exit condition.
+ // Looking for icmp(load(gep(base, iv)), loop_inv) or similar
+ SmallVector<VPValue *, 4> Worklist;
+ SmallDenseMap<VPValue *, VPRecipeBase *, 8> CloneMap;
+ SmallVector<VPReplicateRecipe *, 2> GEPs;
+ SmallVector<VPRecipeBase *, 8> ConditionRecipes;
+ bool LoadFound = false;
+ bool IVLinkedGEP = false;
+
+ Worklist.push_back(Uncounted);
+ while (!Worklist.empty()) {
+ VPValue *V = Worklist.pop_back_val();
+ if (V->isDefinedOutsideLoopRegions())
+ continue;
+ if (V->getNumUsers() > 1)
+ return false;
+ if (auto *Cmp = dyn_cast<VPWidenRecipe>(V)) {
+ if (Cmp->getOpcode() != Instruction::ICmp)
+ return false;
+ Worklist.push_back(Cmp->getOperand(0));
+ Worklist.push_back(Cmp->getOperand(1));
+ ConditionRecipes.push_back(Cmp);
+ } else if (auto *Load = dyn_cast<VPWidenLoadRecipe>(V)) {
+ if (!Load->isConsecutive() || Load->isMasked())
+ return false;
+ Worklist.push_back(Load->getAddr());
+ ConditionRecipes.push_back(Load);
+ LoadFound = true;
+ } else if (auto *VecPtr = dyn_cast<VPVectorPointerRecipe>(V)) {
+ Worklist.push_back(VecPtr->getOperand(0));
+ ConditionRecipes.push_back(VecPtr);
+ } else if (auto *GEP = dyn_cast<VPReplicateRecipe>(V)) {
+ if (GEP->getNumOperands() != 2)
+ return false;
+ if (!match(GEP, m_GetElementPtr(
+ m_LoopInvVPValue(),
+ m_ScalarIVSteps(m_Specific(IV), m_SpecificInt(1),
+ m_Specific(&Plan.getVF())))))
+ return false;
+ GEPs.push_back(GEP);
+ ConditionRecipes.push_back(GEP);
+ IVLinkedGEP = true;
+ } else
+ return false;
+ }
+
+ // If we didn't find any recipes, didn't find a load, or didn't find a
+ // GEP linked to the IV, bail out.
+ if (ConditionRecipes.empty() || !LoadFound || !IVLinkedGEP)
+ return false;
+
+ // Clone the condition recipes into the preheader
+ VPBasicBlock *VectorPH = Plan.getVectorPreheader();
+ for (VPRecipeBase *R : reverse(ConditionRecipes)) {
+ VPRecipeBase *Clone = nullptr;
+ Clone = R->clone();
+ VectorPH->appendRecipe(Clone);
+ CloneMap[R->getVPSingleValue()] = Clone;
+ }
+
+ // Remap the cloned recipes to use the corresponding operands.
+ for (VPRecipeBase *R : ConditionRecipes) {
+ auto *Clone = CloneMap.at(R->getVPSingleValue());
+ for (unsigned I = 0; I < R->getNumOperands(); ++I)
+ if (VPRecipeBase *OpR = CloneMap.lookup(R->getOperand(I)))
+ Clone->setOperand(I, OpR->getVPSingleValue());
+ }
+
+ // Adjust preheader GEPs
+ for (auto *GEP : GEPs)
+ CloneMap[GEP]->setOperand(
+ 1, Plan.getOrAddLiveIn(ConstantInt::get(IV->getScalarType(), 0)));
+
+ // Split vector preheader to form a new bypass block.
+ VPBasicBlock *NewPH = VectorPH->splitAt(VectorPH->end());
+ VPB...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/148626
More information about the llvm-commits
mailing list