[llvm] [LV] Add epilogue minimum iteration check in VPlan as well. (PR #189372)
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 30 06:00:17 PDT 2026
https://github.com/fhahn created https://github.com/llvm/llvm-project/pull/189372
Update LV to also use the VPlan-based addMinimumIterationCheck for the iteration count check for the epilogue.
As the VPlan-based addMinimumIterationCheck uses VPExpandSCEV, those need to be placed in the entry block for now, moving vscale * VF * IC to the entry for scalable vectors.
The new logic also fails to simplify some checks involving PtrToInt, because they were only simplified when going through generated IR, then folding some PtrToInt in IR, then constructing SCEVs again. But those should be cleaned up by later combines, and there is not really much we can do other than trying to go through IR.
>From 8e8a57f7fd44f12ccc2643d2b3b8a2ee4df66ea5 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Mon, 30 Mar 2026 13:58:24 +0100
Subject: [PATCH] [LV] Add epilogue minimum iteration check in VPlan as well.
Update LV to also use the VPlan-based addMinimumIterationCheck for the
iteration count check for the epilogue.
As the VPlan-based addMinimumIterationCheck uses VPExpandSCEV, those
need to be placed in the entry block for now, moving vscale * VF * IC to
the entry for scalable vectors.
The new logic also fails to simplify some checks involving PtrToInt,
because they were only simplified when going through generated IR, then
folding some PtrToInt in IR, then constructing SCEVs again. But those
should be cleaned up by later combines, and there is not really much we
can do other than trying to go through IR.
---
.../Vectorize/LoopVectorizationPlanner.h | 13 +-
.../Transforms/Vectorize/LoopVectorize.cpp | 230 +++++-------------
.../Vectorize/VPlanConstruction.cpp | 32 ++-
.../Transforms/Vectorize/VPlanTransforms.h | 13 +-
.../AArch64/epilog-vectorization-factors.ll | 6 +-
.../AArch64/induction-costs-sve.ll | 7 +-
.../AArch64/load-cast-context.ll | 9 +-
.../AArch64/low_trip_count_predicates.ll | 8 +-
.../partial-reduce-sub-epilogue-vec.ll | 8 +-
.../AArch64/reduction-recurrence-costs-sve.ll | 7 +-
.../LoopVectorize/AArch64/store-costs-sve.ll | 12 +-
.../sve-epilog-vect-inloop-reductions.ll | 4 +-
.../AArch64/sve-epilog-vect-reductions.ll | 4 +-
.../sve-epilog-vect-strict-reductions.ll | 4 +-
.../LoopVectorize/AArch64/sve-epilog-vect.ll | 30 +--
.../AArch64/sve-epilog-vscale-fixed.ll | 11 +-
.../AArch64/sve2-histcnt-epilogue.ll | 7 +-
.../LoopVectorize/AArch64/sve2-histcnt.ll | 7 +-
...row-interleave-to-widen-memory-scalable.ll | 9 +-
.../LoopVectorize/AArch64/vector-reverse.ll | 4 +-
...interleave-to-widen-memory-epilogue-vec.ll | 4 +-
.../epilog-vectorization-reductions.ll | 6 +-
22 files changed, 182 insertions(+), 253 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
index 7652ee37fe686..e05628926c738 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -620,6 +620,15 @@ class LoopVectorizationPlanner {
void addMinimumIterationCheck(VPlan &Plan, ElementCount VF, unsigned UF,
ElementCount MinProfitableTripCount) const;
+ /// Add a new check block before the vector preheader to \p Plan to check
+ /// if the main vector loop should be executed (TC >= VF * UF).
+ void addIterationCountCheckBlock(VPlan &Plan, ElementCount VF,
+ unsigned UF) const;
+
+ /// Attach the runtime checks of \p RTChecks to \p Plan.
+ void attachRuntimeChecks(VPlan &Plan, GeneratedRTChecks &RTChecks,
+ bool HasBranchWeights) const;
+
/// Update loop metadata and profile info for both the scalar remainder loop
/// and \p VectorLoop, if it exists. Keeps all loop hints from the original
/// loop on the vector loop and replaces vectorizer-specific metadata. The
@@ -672,10 +681,6 @@ class LoopVectorizationPlanner {
VPRecipeBuilder &RecipeBuilder,
ElementCount MinVF);
- /// Attach the runtime checks of \p RTChecks to \p Plan.
- void attachRuntimeChecks(VPlan &Plan, GeneratedRTChecks &RTChecks,
- bool HasBranchWeights) const;
-
#ifndef NDEBUG
/// \return The most profitable vectorization factor for the available VPlans
/// and the cost of that VF.
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 9c009ec7c4f7b..f8e8acf6c9a36 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -711,25 +711,8 @@ class EpilogueVectorizerMainLoop : public InnerLoopAndEpilogueVectorizer {
: InnerLoopAndEpilogueVectorizer(OrigLoop, PSE, LI, DT, TTI, AC, EPI, CM,
Check, Plan, EPI.MainLoopVF,
EPI.MainLoopVF, EPI.MainLoopUF) {}
- /// Implements the interface for creating a vectorized skeleton using the
- /// *main loop* strategy (i.e., the first pass of VPlan execution).
- BasicBlock *createVectorizedLoopSkeleton() final;
protected:
- /// Introduces a new VPIRBasicBlock for \p CheckIRBB to Plan between the
- /// vector preheader and its predecessor, also connecting the new block to the
- /// scalar preheader.
- void introduceCheckBlockInVPlan(BasicBlock *CheckIRBB);
-
- // Create a check to see if the main vector loop should be executed
- Value *createIterationCountCheck(BasicBlock *VectorPH, ElementCount VF,
- unsigned UF) const;
-
- /// Emits an iteration count bypass check once for the main loop (when \p
- /// ForEpilogue is false) and once for the epilogue loop (when \p
- /// ForEpilogue is true).
- BasicBlock *emitIterationCountCheck(BasicBlock *VectorPH, BasicBlock *Bypass,
- bool ForEpilogue);
void printDebugTracesAtStart() override;
void printDebugTracesAtEnd() override;
};
@@ -2330,89 +2313,6 @@ static bool useMaskedInterleavedAccesses(const TargetTransformInfo &TTI) {
return TTI.enableMaskedInterleavedAccessVectorization();
}
-void EpilogueVectorizerMainLoop::introduceCheckBlockInVPlan(
- BasicBlock *CheckIRBB) {
- // Note: The block with the minimum trip-count check is already connected
- // during earlier VPlan construction.
- VPBlockBase *ScalarPH = Plan.getScalarPreheader();
- VPBlockBase *PreVectorPH = VectorPHVPBB->getSinglePredecessor();
- assert(PreVectorPH->getNumSuccessors() == 2 && "Expected 2 successors");
- assert(PreVectorPH->getSuccessors()[0] == ScalarPH && "Unexpected successor");
- VPIRBasicBlock *CheckVPIRBB = Plan.createVPIRBasicBlock(CheckIRBB);
- VPBlockUtils::insertOnEdge(PreVectorPH, VectorPHVPBB, CheckVPIRBB);
- PreVectorPH = CheckVPIRBB;
- VPBlockUtils::connectBlocks(PreVectorPH, ScalarPH);
- PreVectorPH->swapSuccessors();
-
- // We just connected a new block to the scalar preheader. Update all
- // VPPhis by adding an incoming value for it, replicating the last value.
- unsigned NumPredecessors = ScalarPH->getNumPredecessors();
- for (VPRecipeBase &R : cast<VPBasicBlock>(ScalarPH)->phis()) {
- assert(isa<VPPhi>(&R) && "Phi expected to be VPPhi");
- assert(cast<VPPhi>(&R)->getNumIncoming() == NumPredecessors - 1 &&
- "must have incoming values for all operands");
- R.addOperand(R.getOperand(NumPredecessors - 2));
- }
-}
-
-Value *EpilogueVectorizerMainLoop::createIterationCountCheck(
- BasicBlock *VectorPH, ElementCount VF, unsigned UF) const {
- // Generate code to check if the loop's trip count is less than VF * UF, or
- // equal to it in case a scalar epilogue is required; this implies that the
- // vector trip count is zero. This check also covers the case where adding one
- // to the backedge-taken count overflowed leading to an incorrect trip count
- // of zero. In this case we will also jump to the scalar loop.
- auto P = Cost->requiresScalarEpilogue(VF.isVector()) ? ICmpInst::ICMP_ULE
- : ICmpInst::ICMP_ULT;
-
- // Reuse existing vector loop preheader for TC checks.
- // Note that new preheader block is generated for vector loop.
- BasicBlock *const TCCheckBlock = VectorPH;
- IRBuilder<InstSimplifyFolder> Builder(
- TCCheckBlock->getContext(),
- InstSimplifyFolder(TCCheckBlock->getDataLayout()));
- Builder.SetInsertPoint(TCCheckBlock->getTerminator());
-
- // If tail is to be folded, vector loop takes care of all iterations.
- Value *Count = getTripCount();
- Type *CountTy = Count->getType();
- Value *CheckMinIters = Builder.getFalse();
- auto CreateStep = [&]() -> Value * {
- // Create step with max(MinProTripCount, UF * VF).
- if (UF * VF.getKnownMinValue() >= MinProfitableTripCount.getKnownMinValue())
- return createStepForVF(Builder, CountTy, VF, UF);
-
- Value *MinProfTC =
- Builder.CreateElementCount(CountTy, MinProfitableTripCount);
- if (!VF.isScalable())
- return MinProfTC;
- return Builder.CreateBinaryIntrinsic(
- Intrinsic::umax, MinProfTC, createStepForVF(Builder, CountTy, VF, UF));
- };
-
- TailFoldingStyle Style = Cost->getTailFoldingStyle();
- if (Style == TailFoldingStyle::None) {
- Value *Step = CreateStep();
- ScalarEvolution &SE = *PSE.getSE();
- // TODO: Emit unconditional branch to vector preheader instead of
- // conditional branch with known condition.
- const SCEV *TripCountSCEV = SE.applyLoopGuards(SE.getSCEV(Count), OrigLoop);
- // Check if the trip count is < the step.
- if (SE.isKnownPredicate(P, TripCountSCEV, SE.getSCEV(Step))) {
- // TODO: Ensure step is at most the trip count when determining max VF and
- // UF, w/o tail folding.
- CheckMinIters = Builder.getTrue();
- } else if (!SE.isKnownPredicate(CmpInst::getInversePredicate(P),
- TripCountSCEV, SE.getSCEV(Step))) {
- // Generate the minimum iteration check only if we cannot prove the
- // check is known to be true, or known to be false.
- CheckMinIters = Builder.CreateICmp(P, Count, Step, "min.iters.check");
- } // else step known to be < trip count, use CheckMinIters preset to false.
- }
-
- return CheckMinIters;
-}
-
/// Replace \p VPBB with a VPIRBasicBlock wrapping \p IRBB. All recipes from \p
/// VPBB are moved to the end of the newly created VPIRBasicBlock. All
/// predecessors and successors of VPBB, if any, are rewired to the new
@@ -7417,10 +7317,6 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
BestVPlan, BestVF, VScale);
}
- // Checks are the same for all VPlans, added to BestVPlan only for
- // compactness.
- attachRuntimeChecks(BestVPlan, ILV.RTChecks, HasBranchWeights);
-
// Retrieving VectorPH now when it's easier while VPlan still has Regions.
VPBasicBlock *VectorPH = cast<VPBasicBlock>(BestVPlan.getVectorPreheader());
@@ -7471,6 +7367,9 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
// making any changes to the CFG.
DenseMap<const SCEV *, Value *> ExpandedSCEVs =
VPlanTransforms::expandSCEVs(BestVPlan, *PSE.getSE());
+ // Check if the epilogue VPlan is executed (where the trip count was already
+ // set when executing the main plan).
+ bool IsEpilogueVectorization = VectorizingEpilogue && ILV.getTripCount();
if (!ILV.getTripCount()) {
ILV.setTripCount(BestVPlan.getTripCount()->getLiveInIRValue());
} else {
@@ -7541,7 +7440,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
updateLoopMetadataAndProfileInfo(
HeaderVPBB ? LI->getLoopFor(State.CFG.VPBB2IRBB.lookup(HeaderVPBB))
: nullptr,
- HeaderVPBB, BestVPlan, VectorizingEpilogue, LID, OrigAverageTripCount,
+ HeaderVPBB, BestVPlan, IsEpilogueVectorization, LID, OrigAverageTripCount,
OrigLoopInvocationWeight,
estimateElementCount(BestVF * BestUF, CM.getVScaleForTuning()),
DisableRuntimeUnroll);
@@ -7559,33 +7458,6 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
// EpilogueVectorizerMainLoop
//===--------------------------------------------------------------------===//
-/// This function is partially responsible for generating the control flow
-/// depicted in https://llvm.org/docs/Vectorizers.html#epilogue-vectorization.
-BasicBlock *EpilogueVectorizerMainLoop::createVectorizedLoopSkeleton() {
- BasicBlock *ScalarPH = createScalarPreheader("");
- BasicBlock *VectorPH = ScalarPH->getSinglePredecessor();
-
- // Generate the code to check the minimum iteration count of the vector
- // epilogue (see below).
- EPI.EpilogueIterationCountCheck =
- emitIterationCountCheck(VectorPH, ScalarPH, true);
- EPI.EpilogueIterationCountCheck->setName("iter.check");
-
- VectorPH = cast<CondBrInst>(EPI.EpilogueIterationCountCheck->getTerminator())
- ->getSuccessor(1);
- // Generate the iteration count check for the main loop, *after* the check
- // for the epilogue loop, so that the path-length is shorter for the case
- // that goes directly through the vector epilogue. The longer-path length for
- // the main loop is compensated for, by the gain from vectorizing the larger
- // trip count. Note: the branch will get updated later on when we vectorize
- // the epilogue.
- EPI.MainLoopIterationCountCheck =
- emitIterationCountCheck(VectorPH, ScalarPH, false);
-
- return cast<CondBrInst>(EPI.MainLoopIterationCountCheck->getTerminator())
- ->getSuccessor(1);
-}
-
void EpilogueVectorizerMainLoop::printDebugTracesAtStart() {
LLVM_DEBUG({
dbgs() << "Create Skeleton for epilogue vectorized loop (first pass)\n"
@@ -7603,46 +7475,6 @@ void EpilogueVectorizerMainLoop::printDebugTracesAtEnd() {
});
}
-BasicBlock *EpilogueVectorizerMainLoop::emitIterationCountCheck(
- BasicBlock *VectorPH, BasicBlock *Bypass, bool ForEpilogue) {
- assert(Bypass && "Expected valid bypass basic block.");
- Value *Count = getTripCount();
- MinProfitableTripCount = ElementCount::getFixed(0);
- Value *CheckMinIters = createIterationCountCheck(
- VectorPH, ForEpilogue ? EPI.EpilogueVF : EPI.MainLoopVF,
- ForEpilogue ? EPI.EpilogueUF : EPI.MainLoopUF);
-
- BasicBlock *const TCCheckBlock = VectorPH;
- if (!ForEpilogue)
- TCCheckBlock->setName("vector.main.loop.iter.check");
-
- // Create new preheader for vector loop.
- VectorPH = SplitBlock(TCCheckBlock, TCCheckBlock->getTerminator(),
- static_cast<DominatorTree *>(nullptr), LI, nullptr,
- "vector.ph");
- if (ForEpilogue) {
- // Save the trip count so we don't have to regenerate it in the
- // vec.epilog.iter.check. This is safe to do because the trip count
- // generated here dominates the vector epilog iter check.
- EPI.TripCount = Count;
- } else {
- VectorPHVPBB = replaceVPBBWithIRVPBB(VectorPHVPBB, VectorPH);
- }
-
- CondBrInst &BI = *CondBrInst::Create(CheckMinIters, Bypass, VectorPH);
- if (hasBranchWeightMD(*OrigLoop->getLoopLatch()->getTerminator()))
- setBranchWeights(BI, MinItersBypassWeights, /*IsExpected=*/false);
- ReplaceInstWithInst(TCCheckBlock->getTerminator(), &BI);
-
- // When vectorizing the main loop, its trip-count check is placed in a new
- // block, whereas the overall trip-count check is placed in the VPlan entry
- // block. When vectorizing the epilogue loop, its trip-count check is placed
- // in the VPlan entry block.
- if (!ForEpilogue)
- introduceCheckBlockInVPlan(TCCheckBlock);
- return TCCheckBlock;
-}
-
//===--------------------------------------------------------------------===//
// EpilogueVectorizerEpilogueLoop
//===--------------------------------------------------------------------===//
@@ -8700,6 +8532,19 @@ void LoopVectorizationPlanner::addMinimumIterationCheck(
OrigLoop->getLoopPredecessor()->getTerminator()->getDebugLoc(), PSE);
}
+void LoopVectorizationPlanner::addIterationCountCheckBlock(VPlan &Plan,
+ ElementCount VF,
+ unsigned UF) const {
+ const uint32_t *BranchWeights =
+ hasBranchWeightMD(*OrigLoop->getLoopLatch()->getTerminator())
+ ? &MinItersBypassWeights[0]
+ : nullptr;
+ VPlanTransforms::addIterationCountCheckBlock(
+ Plan, VF, UF, CM.requiresScalarEpilogue(VF.isVector()), OrigLoop,
+ BranchWeights,
+ OrigLoop->getLoopPredecessor()->getTerminator()->getDebugLoc(), PSE);
+}
+
// Determine how to lower the scalar epilogue, which depends on 1) optimising
// for minimum code-size, 2) predicate compiler options, 3) loop hints forcing
// predication, and 4) a TTI hook that analyses whether the loop is suitable
@@ -8797,6 +8642,9 @@ static bool processLoopInVPlanNativePath(
<< L->getHeader()->getParent()->getName() << "\"\n");
LVP.addMinimumIterationCheck(BestPlan, VF.Width, /*UF=*/1,
VF.MinProfitableTripCount);
+ LVP.attachRuntimeChecks(
+ BestPlan, Checks,
+ hasBranchWeightMD(*L->getLoopLatch()->getTerminator()));
LVP.executePlan(VF.Width, /*UF=*/1, BestPlan, LB, DT, false);
}
@@ -9811,18 +9659,51 @@ bool LoopVectorizePass::processLoop(Loop *L) {
preparePlanForMainVectorLoop(*BestMainPlan, BestEpiPlan);
EpilogueLoopVectorizationInfo EPI(VF.Width, IC, EpilogueVF.Width, 1,
BestEpiPlan);
+
+ // Add minimum iteration check for the epilogue plan, followed by runtime
+ // checks for the main plan.
+ LVP.addMinimumIterationCheck(*BestMainPlan, EPI.EpilogueVF, EPI.EpilogueUF,
+ ElementCount::getFixed(0));
+ bool HasBranchWeights =
+ hasBranchWeightMD(*L->getLoopLatch()->getTerminator());
+ LVP.attachRuntimeChecks(*BestMainPlan, Checks, HasBranchWeights);
+ LVP.addIterationCountCheckBlock(*BestMainPlan, EPI.MainLoopVF,
+ EPI.MainLoopUF);
+
EpilogueVectorizerMainLoop MainILV(L, PSE, LI, DT, TTI, AC, EPI, &CM,
Checks, *BestMainPlan);
auto ExpandedSCEVs = LVP.executePlan(EPI.MainLoopVF, EPI.MainLoopUF,
- *BestMainPlan, MainILV, DT, false);
+ *BestMainPlan, MainILV, DT, true);
++LoopsVectorized;
+ // Derive EPI fields from VPlan-generated IR.
+ EPI.TripCount = MainILV.getTripCount();
+ BasicBlock *EntryBB =
+ cast<VPIRBasicBlock>(BestMainPlan->getEntry())->getIRBasicBlock();
+ EntryBB->setName("iter.check");
+ EPI.EpilogueIterationCountCheck = EntryBB;
+ // The check chain is: Entry -> [SCEV] -> [Mem] -> MainCheck -> VecPH.
+ // MainCheck is the non-bypass successor of the last runtime check block
+ // (or Entry if there are no runtime checks).
+ BasicBlock *LastCheck = EntryBB;
+ if (BasicBlock *MemBB = Checks.getMemRuntimeChecks().second)
+ LastCheck = MemBB;
+ else if (BasicBlock *SCEVBB = Checks.getSCEVChecks().second)
+ LastCheck = SCEVBB;
+ BasicBlock *ScalarPH = L->getLoopPreheader();
+ auto *BI = cast<CondBrInst>(LastCheck->getTerminator());
+ EPI.MainLoopIterationCountCheck = BI->getSuccessor(0) == ScalarPH
+ ? BI->getSuccessor(1)
+ : BI->getSuccessor(0);
+
// Second pass vectorizes the epilogue and adjusts the control flow
// edges from the first pass.
EpilogueVectorizerEpilogueLoop EpilogILV(L, PSE, LI, DT, TTI, AC, EPI, &CM,
Checks, BestEpiPlan);
+ EpilogILV.setTripCount(EPI.TripCount);
SmallVector<Instruction *> InstsToMove = preparePlanForEpilogueVectorLoop(
BestEpiPlan, L, ExpandedSCEVs, EPI, CM, *PSE.getSE());
+ LVP.attachRuntimeChecks(BestEpiPlan, Checks, HasBranchWeights);
LVP.executePlan(EPI.EpilogueVF, EPI.EpilogueUF, BestEpiPlan, EpilogILV, DT,
true);
connectEpilogueVectorLoop(BestEpiPlan, L, EPI, DT, Checks, InstsToMove,
@@ -9837,6 +9718,9 @@ bool LoopVectorizePass::processLoop(Loop *L) {
BestPlan, VF.Width, IC, PSE);
LVP.addMinimumIterationCheck(BestPlan, VF.Width, IC,
VF.MinProfitableTripCount);
+ LVP.attachRuntimeChecks(
+ BestPlan, Checks,
+ hasBranchWeightMD(*L->getLoopLatch()->getTerminator()));
LVP.executePlan(VF.Width, IC, BestPlan, LB, DT, false);
++LoopsVectorized;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
index 2747b7e8c92cb..ea259433b3fde 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
@@ -1216,11 +1216,21 @@ void VPlanTransforms::attachCheckBlock(VPlan &Plan, Value *Cond,
addBypassBranch(Plan, CheckBlockVPBB, CondVPV, AddBranchWeights);
}
+/// Return an insert point in \p EntryVPBB after existing VPIRPhi,
+/// VPIRInstruction and VPExpandSCEVRecipe recipes.
+static VPBasicBlock::iterator getExpandSCEVInsertPt(VPBasicBlock *EntryVPBB) {
+ auto InsertPt = EntryVPBB->begin();
+ while (InsertPt != EntryVPBB->end() &&
+ isa<VPExpandSCEVRecipe, VPIRPhi, VPIRInstruction>(&*InsertPt))
+ ++InsertPt;
+ return InsertPt;
+}
+
void VPlanTransforms::addMinimumIterationCheck(
VPlan &Plan, ElementCount VF, unsigned UF,
ElementCount MinProfitableTripCount, bool RequiresScalarEpilogue,
bool TailFolded, Loop *OrigLoop, const uint32_t *MinItersBypassWeights,
- DebugLoc DL, PredicatedScalarEvolution &PSE) {
+ DebugLoc DL, PredicatedScalarEvolution &PSE, VPBasicBlock *CheckBlock) {
// Generate code to check if the loop's trip count is less than VF * UF, or
// equal to it in case a scalar epilogue is required; this implies that the
// vector trip count is zero. This check also covers the case where adding one
@@ -1248,7 +1258,9 @@ void VPlanTransforms::addMinimumIterationCheck(
};
VPBasicBlock *EntryVPBB = Plan.getEntry();
- VPBuilder Builder(EntryVPBB);
+ // Place compare and branch in CheckBlock if given, ExpandSCEVs in Entry.
+ VPBasicBlock *CheckVPBB = CheckBlock ? CheckBlock : EntryVPBB;
+ VPBuilder Builder(CheckVPBB);
VPValue *TripCountCheck = Plan.getFalse();
const SCEV *Step = GetMinTripCount();
// TripCountCheck = false, folding tail implies positive vector trip
@@ -1266,7 +1278,9 @@ void VPlanTransforms::addMinimumIterationCheck(
TripCount, Step)) {
// Generate the minimum iteration check only if we cannot prove the
// check is known to be true, or known to be false.
- VPValue *MinTripCountVPV = Builder.createExpandSCEV(Step);
+ // ExpandSCEV must be placed in Entry.
+ VPBuilder SCEVBuilder(EntryVPBB, getExpandSCEVInsertPt(EntryVPBB));
+ VPValue *MinTripCountVPV = SCEVBuilder.createExpandSCEV(Step);
TripCountCheck = Builder.createICmp(
CmpPred, TripCountVPV, MinTripCountVPV, DL, "min.iters.check");
} // else step known to be < trip count, use TripCountCheck preset to false.
@@ -1281,6 +1295,18 @@ void VPlanTransforms::addMinimumIterationCheck(
}
}
+void VPlanTransforms::addIterationCountCheckBlock(
+ VPlan &Plan, ElementCount VF, unsigned UF, bool RequiresScalarEpilogue,
+ Loop *OrigLoop, const uint32_t *MinItersBypassWeights, DebugLoc DL,
+ PredicatedScalarEvolution &PSE) {
+ auto *CheckBlock = Plan.createVPBasicBlock("vector.main.loop.iter.check");
+ insertCheckBlockBeforeVectorLoop(Plan, CheckBlock);
+ addMinimumIterationCheck(Plan, VF, UF, ElementCount::getFixed(0),
+ RequiresScalarEpilogue, /*TailFolded=*/false,
+ OrigLoop, MinItersBypassWeights, DL, PSE,
+ CheckBlock);
+}
+
void VPlanTransforms::addMinimumVectorEpilogueIterationCheck(
VPlan &Plan, Value *TripCount, Value *VectorTripCount,
bool RequiresScalarEpilogue, ElementCount EpilogueVF, unsigned EpilogueUF,
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index 09abfd5366aa6..e108e9690ec19 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -163,11 +163,22 @@ struct VPlanTransforms {
LLVM_ABI_FOR_TEST static void addMiddleCheck(VPlan &Plan, bool TailFolded);
// Create a check to \p Plan to see if the vector loop should be executed.
+ // If \p CheckBlock is non-null, the compare and branch are placed there;
+ // ExpandSCEV recipes are always placed in Entry.
static void addMinimumIterationCheck(
VPlan &Plan, ElementCount VF, unsigned UF,
ElementCount MinProfitableTripCount, bool RequiresScalarEpilogue,
bool TailFolded, Loop *OrigLoop, const uint32_t *MinItersBypassWeights,
- DebugLoc DL, PredicatedScalarEvolution &PSE);
+ DebugLoc DL, PredicatedScalarEvolution &PSE,
+ VPBasicBlock *CheckBlock = nullptr);
+
+ /// Add a new check block before the vector preheader to \p Plan to check if
+ /// the main vector loop should be executed (TC >= VF * UF).
+ static void
+ addIterationCountCheckBlock(VPlan &Plan, ElementCount VF, unsigned UF,
+ bool RequiresScalarEpilogue, Loop *OrigLoop,
+ const uint32_t *MinItersBypassWeights,
+ DebugLoc DL, PredicatedScalarEvolution &PSE);
/// Add a check to \p Plan to see if the epilogue vector loop should be
/// executed.
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-factors.ll b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-factors.ll
index efa64c661ebc0..28684fe527016 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-factors.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-factors.ll
@@ -433,7 +433,8 @@ define void @trip_count_based_on_ptrtoint(i64 %x) "target-cpu"="apple-m1" {
; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[ADD]], [[X]]
; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 2
; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
-; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 4
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
; CHECK: vector.scevcheck:
; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[ADD]] to i2
; CHECK-NEXT: [[TMP4:%.*]] = trunc i64 [[X]] to i2
@@ -442,7 +443,8 @@ define void @trip_count_based_on_ptrtoint(i64 %x) "target-cpu"="apple-m1" {
; CHECK-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i64 [[TMP6]], 0
; CHECK-NEXT: br i1 [[IDENT_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
; CHECK: vector.main.loop.iter.check:
-; CHECK-NEXT: br i1 true, label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[TMP2]], 16
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 16
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll
index 42fb0970a1fb4..960768434d261 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll
@@ -14,7 +14,10 @@ define void @iv_casts(ptr %dst, ptr %src, i32 %x, i64 %N) #0 {
; DEFAULT-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1
; DEFAULT-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
; DEFAULT-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP1]], 2
+; DEFAULT-NEXT: [[TMP8:%.*]] = shl nuw i64 [[TMP1]], 4
; DEFAULT-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]]
+; DEFAULT-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
+; DEFAULT-NEXT: [[TMP31:%.*]] = shl nuw i64 [[TMP7]], 2
; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
; DEFAULT: [[VECTOR_MEMCHECK]]:
; DEFAULT-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
@@ -24,8 +27,6 @@ define void @iv_casts(ptr %dst, ptr %src, i32 %x, i64 %N) #0 {
; DEFAULT-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], [[TMP5]]
; DEFAULT-NEXT: br i1 [[DIFF_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]]
; DEFAULT: [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
-; DEFAULT-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
-; DEFAULT-NEXT: [[TMP8:%.*]] = shl nuw i64 [[TMP7]], 4
; DEFAULT-NEXT: [[MIN_ITERS_CHECK3:%.*]] = icmp ult i64 [[TMP0]], [[TMP8]]
; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK3]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]]
; DEFAULT: [[VECTOR_PH]]:
@@ -65,7 +66,7 @@ define void @iv_casts(ptr %dst, ptr %src, i32 %x, i64 %N) #0 {
; DEFAULT-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
; DEFAULT-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]]
; DEFAULT: [[VEC_EPILOG_ITER_CHECK]]:
-; DEFAULT-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], [[TMP2]]
+; DEFAULT-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], [[TMP31]]
; DEFAULT-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]]
; DEFAULT: [[VEC_EPILOG_PH]]:
; DEFAULT-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/load-cast-context.ll b/llvm/test/Transforms/LoopVectorize/AArch64/load-cast-context.ll
index 2b0684f3cda01..0d2f6ba3fb1aa 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/load-cast-context.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/load-cast-context.ll
@@ -57,7 +57,10 @@ define i32 @sext_of_non_memory_op(ptr %src, i32 %offset, i64 %n) #0 {
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP1]], 3
+; CHECK-NEXT: [[TMP8:%.*]] = shl nuw i64 [[TMP1]], 6
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]]
+; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP43:%.*]] = shl nuw i64 [[TMP9]], 3
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
; CHECK: [[VECTOR_SCEVCHECK]]:
; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[N]] to i32
@@ -67,9 +70,7 @@ define i32 @sext_of_non_memory_op(ptr %src, i32 %offset, i64 %n) #0 {
; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]]
; CHECK-NEXT: br i1 [[TMP7]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]]
; CHECK: [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
-; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP8]], 6
-; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[TMP0]], [[TMP9]]
+; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[TMP0]], [[TMP8]]
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
@@ -116,7 +117,7 @@ define i32 @sext_of_non_memory_op(ptr %src, i32 %offset, i64 %n) #0 {
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]]
; CHECK: [[VEC_EPILOG_ITER_CHECK]]:
-; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], [[TMP2]]
+; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], [[TMP43]]
; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF4:![0-9]+]]
; CHECK: [[VEC_EPILOG_PH]]:
; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll b/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll
index 945012311aa89..feecffd82622b 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll
@@ -48,6 +48,8 @@ define void @low_vf_ic_is_better(ptr nocapture noundef %p, i32 %tc, i16 noundef
; CHECK-VS1-NEXT: [[TMP1:%.*]] = add i32 [[TC]], 1
; CHECK-VS1-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
; CHECK-VS1-NEXT: [[TMP3:%.*]] = sub i64 20, [[TMP2]]
+; CHECK-VS1-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-VS1-NEXT: [[TMP15:%.*]] = shl nuw i64 [[TMP14]], 4
; CHECK-VS1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 8
; CHECK-VS1-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
; CHECK-VS1: [[VECTOR_SCEVCHECK]]:
@@ -61,8 +63,6 @@ define void @low_vf_ic_is_better(ptr nocapture noundef %p, i32 %tc, i16 noundef
; CHECK-VS1-NEXT: [[TMP13:%.*]] = or i1 [[TMP11]], [[TMP12]]
; CHECK-VS1-NEXT: br i1 [[TMP13]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]]
; CHECK-VS1: [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
-; CHECK-VS1-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-VS1-NEXT: [[TMP15:%.*]] = shl nuw i64 [[TMP14]], 4
; CHECK-VS1-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[TMP3]], [[TMP15]]
; CHECK-VS1-NEXT: br i1 [[MIN_ITERS_CHECK1]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK-VS1: [[VECTOR_PH]]:
@@ -141,6 +141,8 @@ define void @low_vf_ic_is_better(ptr nocapture noundef %p, i32 %tc, i16 noundef
; CHECK-VS2-NEXT: [[TMP1:%.*]] = add i32 [[TC]], 1
; CHECK-VS2-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
; CHECK-VS2-NEXT: [[TMP3:%.*]] = sub i64 20, [[TMP2]]
+; CHECK-VS2-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-VS2-NEXT: [[TMP15:%.*]] = shl nuw i64 [[TMP14]], 3
; CHECK-VS2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 8
; CHECK-VS2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
; CHECK-VS2: [[VECTOR_SCEVCHECK]]:
@@ -154,8 +156,6 @@ define void @low_vf_ic_is_better(ptr nocapture noundef %p, i32 %tc, i16 noundef
; CHECK-VS2-NEXT: [[TMP13:%.*]] = or i1 [[TMP11]], [[TMP12]]
; CHECK-VS2-NEXT: br i1 [[TMP13]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]]
; CHECK-VS2: [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
-; CHECK-VS2-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-VS2-NEXT: [[TMP15:%.*]] = shl nuw i64 [[TMP14]], 3
; CHECK-VS2-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[TMP3]], [[TMP15]]
; CHECK-VS2-NEXT: br i1 [[MIN_ITERS_CHECK1]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK-VS2: [[VECTOR_PH]]:
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-sub-epilogue-vec.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-sub-epilogue-vec.ll
index 766ce9ffce2e1..8db3a3294d7bc 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-sub-epilogue-vec.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-sub-epilogue-vec.ll
@@ -8,10 +8,10 @@ define i32 @sub_reduction(i32 %startval, ptr %src1, ptr %src2) #0 {
; CHECK-EPI-LABEL: define i32 @sub_reduction(
; CHECK-EPI-SAME: i32 [[STARTVAL:%.*]], ptr [[SRC1:%.*]], ptr [[SRC2:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-EPI-NEXT: [[ITER_CHECK:.*]]:
+; CHECK-EPI-NEXT: [[TMP0:%.*]] = call i32 @llvm.vscale.i32()
+; CHECK-EPI-NEXT: [[TMP1:%.*]] = shl nuw nsw i32 [[TMP0]], 4
; CHECK-EPI-NEXT: br i1 false, label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]]
; CHECK-EPI: [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
-; CHECK-EPI-NEXT: [[TMP0:%.*]] = call i32 @llvm.vscale.i32()
-; CHECK-EPI-NEXT: [[TMP1:%.*]] = shl nuw i32 [[TMP0]], 4
; CHECK-EPI-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 39, [[TMP1]]
; CHECK-EPI-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK-EPI: [[VECTOR_PH]]:
@@ -87,10 +87,10 @@ define i32 @sub_reduction(i32 %startval, ptr %src1, ptr %src2) #0 {
; CHECK-PARTIAL-RED-EPI-LABEL: define i32 @sub_reduction(
; CHECK-PARTIAL-RED-EPI-SAME: i32 [[STARTVAL:%.*]], ptr [[SRC1:%.*]], ptr [[SRC2:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-PARTIAL-RED-EPI-NEXT: [[ITER_CHECK:.*]]:
+; CHECK-PARTIAL-RED-EPI-NEXT: [[TMP0:%.*]] = call i32 @llvm.vscale.i32()
+; CHECK-PARTIAL-RED-EPI-NEXT: [[TMP1:%.*]] = shl nuw nsw i32 [[TMP0]], 4
; CHECK-PARTIAL-RED-EPI-NEXT: br i1 false, label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]]
; CHECK-PARTIAL-RED-EPI: [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
-; CHECK-PARTIAL-RED-EPI-NEXT: [[TMP0:%.*]] = call i32 @llvm.vscale.i32()
-; CHECK-PARTIAL-RED-EPI-NEXT: [[TMP1:%.*]] = shl nuw i32 [[TMP0]], 4
; CHECK-PARTIAL-RED-EPI-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 39, [[TMP1]]
; CHECK-PARTIAL-RED-EPI-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK-PARTIAL-RED-EPI: [[VECTOR_PH]]:
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll b/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll
index 586e156a07139..8179adfec8201 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll
@@ -340,11 +340,12 @@ define i16 @reduce_udiv(ptr %src, i16 %x, i64 %N) #0 {
; VSCALEFORTUNING2-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1
; VSCALEFORTUNING2-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
; VSCALEFORTUNING2-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP1]], 1
+; VSCALEFORTUNING2-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP1]], 3
; VSCALEFORTUNING2-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[TMP0]], [[TMP5]]
+; VSCALEFORTUNING2-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
+; VSCALEFORTUNING2-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP6]], 1
; VSCALEFORTUNING2-NEXT: br i1 [[MIN_ITERS_CHECK1]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]]
; VSCALEFORTUNING2: [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
-; VSCALEFORTUNING2-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
-; VSCALEFORTUNING2-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP6]], 3
; VSCALEFORTUNING2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]]
; VSCALEFORTUNING2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]]
; VSCALEFORTUNING2: [[VECTOR_PH]]:
@@ -377,7 +378,7 @@ define i16 @reduce_udiv(ptr %src, i16 %x, i64 %N) #0 {
; VSCALEFORTUNING2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
; VSCALEFORTUNING2-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]]
; VSCALEFORTUNING2: [[VEC_EPILOG_ITER_CHECK]]:
-; VSCALEFORTUNING2-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], [[TMP5]]
+; VSCALEFORTUNING2-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], [[TMP9]]
; VSCALEFORTUNING2-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF5:![0-9]+]]
; VSCALEFORTUNING2: [[VEC_EPILOG_PH]]:
; VSCALEFORTUNING2-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll b/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll
index a916701093f39..9ee7e82a48c4e 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll
@@ -11,11 +11,12 @@ define void @cost_store_i8(ptr %dst) #0 {
; DEFAULT-NEXT: iter.check:
; DEFAULT-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
; DEFAULT-NEXT: [[TMP13:%.*]] = shl nuw i64 [[TMP10]], 2
+; DEFAULT-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP10]], 5
; DEFAULT-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 101, [[TMP13]]
+; DEFAULT-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
+; DEFAULT-NEXT: [[TMP15:%.*]] = shl nuw i64 [[TMP6]], 2
; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
; DEFAULT: vector.main.loop.iter.check:
-; DEFAULT-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; DEFAULT-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 5
; DEFAULT-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 101, [[TMP1]]
; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
; DEFAULT: vector.ph:
@@ -38,7 +39,7 @@ define void @cost_store_i8(ptr %dst) #0 {
; DEFAULT-NEXT: [[CMP_N:%.*]] = icmp eq i64 101, [[N_VEC]]
; DEFAULT-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
; DEFAULT: vec.epilog.iter.check:
-; DEFAULT-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], [[TMP13]]
+; DEFAULT-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], [[TMP15]]
; DEFAULT-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]]
; DEFAULT: vec.epilog.ph:
; DEFAULT-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
@@ -122,10 +123,7 @@ define void @trunc_store(ptr %dst, ptr %src, i16 %x) #1 {
; DEFAULT-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
; DEFAULT-NEXT: br i1 [[FOUND_CONFLICT]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
; DEFAULT: vector.main.loop.iter.check:
-; DEFAULT-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; DEFAULT-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 5
-; DEFAULT-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1000, [[TMP1]]
-; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
+; DEFAULT-NEXT: br i1 false, label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
; DEFAULT: vector.ph:
; DEFAULT-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
; DEFAULT-NEXT: [[TMP16:%.*]] = shl nuw i64 [[TMP2]], 4
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-inloop-reductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-inloop-reductions.ll
index b9bbc272fc63a..217c96c8a2492 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-inloop-reductions.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-inloop-reductions.ll
@@ -7,11 +7,11 @@
define i64 @int_reduction_and(ptr noalias nocapture %a, i64 %N) {
; CHECK-LABEL: @int_reduction_and(
; CHECK-NEXT: iter.check:
+; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 2
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
; CHECK: vector.main.loop.iter.check:
-; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2
; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[N]], [[TMP1]]
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-reductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-reductions.ll
index 347ebeadb40a3..ac64e2a4cf7d5 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-reductions.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-reductions.ll
@@ -7,11 +7,11 @@
define i64 @int_reduction_add(ptr %a, i64 %N) {
; CHECK-LABEL: @int_reduction_add(
; CHECK-NEXT: iter.check:
+; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 2
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
; CHECK: vector.main.loop.iter.check:
-; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2
; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[N]], [[TMP1]]
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-strict-reductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-strict-reductions.ll
index 9d636067c59aa..b78fee37eb623 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-strict-reductions.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-strict-reductions.ll
@@ -7,11 +7,11 @@
define float @fadd_strict(ptr noalias nocapture readonly %a, i64 %n) {
; CHECK-LABEL: @fadd_strict(
; CHECK-NEXT: iter.check:
+; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 2
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
; CHECK: vector.main.loop.iter.check:
-; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3
; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[N]], [[TMP1]]
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll
index afd0fe4c9b39a..52e3bd7167fa9 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll
@@ -51,10 +51,10 @@ define void @main_vf_vscale_x_16(ptr %A) #0 {
;
; CHECK-VF8-LABEL: @main_vf_vscale_x_16(
; CHECK-VF8-NEXT: iter.check:
-; CHECK-VF8-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
-; CHECK-VF8: vector.main.loop.iter.check:
; CHECK-VF8-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-VF8-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 5
+; CHECK-VF8-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
+; CHECK-VF8: vector.main.loop.iter.check:
; CHECK-VF8-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]]
; CHECK-VF8-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-VF8: vector.ph:
@@ -142,10 +142,7 @@ define void @main_vf_vscale_x_2_no_epi_iteration(ptr %A) #0 vscale_range(8, 8) {
; CHECK-VF8-NEXT: iter.check:
; CHECK-VF8-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
; CHECK-VF8: vector.main.loop.iter.check:
-; CHECK-VF8-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-VF8-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2
-; CHECK-VF8-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]]
-; CHECK-VF8-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK-VF8-NEXT: br i1 false, label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-VF8: vector.ph:
; CHECK-VF8-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-VF8-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP2]], 1
@@ -217,11 +214,11 @@ exit:
define void @main_vf_vscale_x_2(ptr %A, i64 %n) #0 vscale_range(8, 8) {
; CHECK-LABEL: @main_vf_vscale_x_2(
; CHECK-NEXT: iter.check:
+; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 8
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
; CHECK: vector.main.loop.iter.check:
-; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2
; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[N]], [[TMP1]]
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
@@ -268,11 +265,11 @@ define void @main_vf_vscale_x_2(ptr %A, i64 %n) #0 vscale_range(8, 8) {
;
; CHECK-VF8-LABEL: @main_vf_vscale_x_2(
; CHECK-VF8-NEXT: iter.check:
+; CHECK-VF8-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-VF8-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2
; CHECK-VF8-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 8
; CHECK-VF8-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
; CHECK-VF8: vector.main.loop.iter.check:
-; CHECK-VF8-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-VF8-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2
; CHECK-VF8-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[N]], [[TMP1]]
; CHECK-VF8-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-VF8: vector.ph:
@@ -337,10 +334,10 @@ exit:
define void @test_pr57912_pointer_induction(ptr %start) #0 {
; CHECK-LABEL: @test_pr57912_pointer_induction(
; CHECK-NEXT: iter.check:
-; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
-; CHECK: vector.main.loop.iter.check:
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 5
+; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
+; CHECK: vector.main.loop.iter.check:
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 10000, [[TMP1]]
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
@@ -387,10 +384,10 @@ define void @test_pr57912_pointer_induction(ptr %start) #0 {
;
; CHECK-VF8-LABEL: @test_pr57912_pointer_induction(
; CHECK-VF8-NEXT: iter.check:
-; CHECK-VF8-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
-; CHECK-VF8: vector.main.loop.iter.check:
; CHECK-VF8-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-VF8-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 5
+; CHECK-VF8-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
+; CHECK-VF8: vector.main.loop.iter.check:
; CHECK-VF8-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 10000, [[TMP1]]
; CHECK-VF8-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-VF8: vector.ph:
@@ -459,10 +456,7 @@ define void @trip_count_vscale(ptr noalias %a, ptr noalias %b) vscale_range(1, 1
; CHECK-NEXT: [[N:%.*]] = mul nuw nsw i64 [[V]], 1033
; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
; CHECK: vector.main.loop.iter.check:
-; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3
-; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]]
-; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP2]], 2
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vscale-fixed.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vscale-fixed.ll
index 061f9dc551e22..e141aaca75173 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vscale-fixed.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vscale-fixed.ll
@@ -27,11 +27,11 @@ target triple = "aarch64-linux-gnu"
define void @main_vf_vscale_x_16(ptr %A, i64 %n) #0 {
; CHECK-LABEL: @main_vf_vscale_x_16(
; CHECK-NEXT: iter.check:
+; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 5
; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[N:%.*]], 8
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
; CHECK: vector.main.loop.iter.check:
-; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 5
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]]
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
@@ -80,11 +80,12 @@ define void @main_vf_vscale_x_16(ptr %A, i64 %n) #0 {
; CHECK-EPILOG-PREFER-SCALABLE-NEXT: iter.check:
; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3
+; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP0]], 5
; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]]
+; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP8]], 3
; CHECK-EPILOG-PREFER-SCALABLE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
; CHECK-EPILOG-PREFER-SCALABLE: vector.main.loop.iter.check:
-; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 5
; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[N]], [[TMP3]]
; CHECK-EPILOG-PREFER-SCALABLE-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-EPILOG-PREFER-SCALABLE: vector.ph:
@@ -107,7 +108,7 @@ define void @main_vf_vscale_x_16(ptr %A, i64 %n) #0 {
; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-EPILOG-PREFER-SCALABLE-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
; CHECK-EPILOG-PREFER-SCALABLE: vec.epilog.iter.check:
-; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], [[TMP1]]
+; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], [[TMP11]]
; CHECK-EPILOG-PREFER-SCALABLE-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]]
; CHECK-EPILOG-PREFER-SCALABLE: vec.epilog.ph:
; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-epilogue.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-epilogue.ll
index 1aac78b88e9d2..8d77b3c900d96 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-epilogue.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-epilogue.ll
@@ -11,10 +11,11 @@ define void @simple_histogram(ptr noalias %buckets, ptr readonly %indices, i64 %
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 1
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]]
+; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP4]], 1
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.main.loop.iter.check:
-; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP6]], 2
+; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP0]], 2
; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[N]], [[TMP3]]
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH1:%.*]]
; CHECK: vector.ph:
@@ -37,7 +38,7 @@ define void @simple_histogram(ptr noalias %buckets, ptr readonly %indices, i64 %
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
; CHECK: vec.epilog.iter.check:
-; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], [[TMP1]]
+; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], [[TMP5]]
; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]]
; CHECK: vec.epilog.ph:
; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_PH]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt.ll
index 71e9b6ac30df3..611143e287c22 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt.ll
@@ -242,10 +242,11 @@ define void @histogram_8bit(ptr noalias %buckets, ptr readonly %indices, i64 %N)
; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP9:%.*]] = shl nuw nsw i64 [[TMP5]], 3
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP9]]
+; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP7:%.*]] = shl nuw nsw i64 [[TMP2]], 3
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[ENTRY:%.*]]
; CHECK: vector.main.loop.iter.check:
-; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[TMP2]], 4
+; CHECK-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[TMP5]], 4
; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[N]], [[TMP6]]
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
@@ -269,7 +270,7 @@ define void @histogram_8bit(ptr noalias %buckets, ptr readonly %indices, i64 %N)
; CHECK-NEXT: [[CMP_N1:%.*]] = icmp eq i64 [[N_VEC]], 0
; CHECK-NEXT: br i1 [[CMP_N1]], label [[FOR_EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
; CHECK: vec.epilog.iter.check:
-; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC]], [[TMP9]]
+; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC]], [[TMP7]]
; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF11:![0-9]+]]
; CHECK: vec.epilog.ph:
; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC1]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ENTRY]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-scalable.ll b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-scalable.ll
index 3becb967ba109..fd143625d02a4 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-scalable.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-scalable.ll
@@ -226,7 +226,10 @@ define void @test_masked_interleave_group(i32 %N, ptr %mask, ptr %src, ptr %dst)
; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[TMP0]], 1
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[UMAX:%.*]] = shl nuw i64 [[TMP2]], 3
+; CHECK-NEXT: [[TMP8:%.*]] = shl nuw i64 [[TMP2]], 4
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP1]], [[UMAX]]
+; CHECK-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP29:%.*]] = shl nuw i64 [[TMP20]], 3
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
; CHECK: [[VECTOR_MEMCHECK]]:
; CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[N]] to i64
@@ -245,9 +248,7 @@ define void @test_masked_interleave_group(i32 %N, ptr %mask, ptr %src, ptr %dst)
; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT5]]
; CHECK-NEXT: br i1 [[CONFLICT_RDX]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]]
; CHECK: [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
-; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP20:%.*]] = shl nuw i64 [[TMP8]], 4
-; CHECK-NEXT: [[MIN_ITERS_CHECK6:%.*]] = icmp ult i64 [[TMP1]], [[TMP20]]
+; CHECK-NEXT: [[MIN_ITERS_CHECK6:%.*]] = icmp ult i64 [[TMP1]], [[TMP8]]
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK6]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64()
@@ -287,7 +288,7 @@ define void @test_masked_interleave_group(i32 %N, ptr %mask, ptr %src, ptr %dst)
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP1]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]]
; CHECK: [[VEC_EPILOG_ITER_CHECK]]:
-; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], [[UMAX]]
+; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], [[TMP29]]
; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF17:![0-9]+]]
; CHECK: [[VEC_EPILOG_PH]]:
; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse.ll
index 74bcfcd85614c..518a502a08502 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse.ll
@@ -136,11 +136,11 @@ define i32 @reverse_store_with_partial_reduction(ptr noalias %dst, ptr noalias %
; CHECK-SAME: ptr noalias [[DST:%.*]], ptr noalias [[SRC:%.*]], i64 [[N:%.*]]) #[[ATTR1:[0-9]+]] {
; CHECK-NEXT: [[ITER_CHECK:.*:]]
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1
+; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP1]], 5
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 4
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]]
; CHECK: [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
-; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP1]], 5
; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]]
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
diff --git a/llvm/test/Transforms/LoopVectorize/X86/transform-narrow-interleave-to-widen-memory-epilogue-vec.ll b/llvm/test/Transforms/LoopVectorize/X86/transform-narrow-interleave-to-widen-memory-epilogue-vec.ll
index b626bb804845b..9ef242258ca8a 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/transform-narrow-interleave-to-widen-memory-epilogue-vec.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/transform-narrow-interleave-to-widen-memory-epilogue-vec.ll
@@ -203,7 +203,7 @@ define void @test_non_unit_step_resume_values(ptr %p) #1 {
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 96
; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
-; CHECK-NEXT: br label %[[VEC_EPILOG_ITER_CHECK:.*]]
+; CHECK-NEXT: br i1 false, label %[[EXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]]
; CHECK: [[VEC_EPILOG_ITER_CHECK]]:
; CHECK-NEXT: br i1 true, label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF3]]
; CHECK: [[VEC_EPILOG_PH]]:
@@ -221,7 +221,7 @@ define void @test_non_unit_step_resume_values(ptr %p) #1 {
; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT12]], 99
; CHECK-NEXT: br i1 [[TMP15]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
; CHECK: [[VEC_EPILOG_MIDDLE_BLOCK]]:
-; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[VEC_EPILOG_SCALAR_PH]]
+; CHECK-NEXT: br i1 true, label %[[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]]
; CHECK: [[VEC_EPILOG_SCALAR_PH]]:
; CHECK-NEXT: [[BC_RESUME_VAL13:%.*]] = phi i64 [ 396, %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 384, %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ]
; CHECK-NEXT: [[BC_RESUME_VAL14:%.*]] = phi i64 [ 0, %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 3, %[[VEC_EPILOG_ITER_CHECK]] ], [ 99, %[[ITER_CHECK]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll
index 213e2f47805a2..f08f7ac0fd4e2 100644
--- a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll
+++ b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll
@@ -1019,7 +1019,8 @@ define i64 @reduction_with_ptr_iv_inttoptr_exit_cond(ptr %base, ptr %src) {
; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 768614336404564651
; CHECK-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP2]], 3
; CHECK-NEXT: [[TMP4:%.*]] = add nuw nsw i64 [[TMP3]], 1
-; CHECK-NEXT: br i1 false, label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP4]], 4
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
; CHECK: [[VECTOR_SCEVCHECK]]:
; CHECK-NEXT: [[TMP5:%.*]] = trunc i64 [[ADD]] to i3
; CHECK-NEXT: [[TMP6:%.*]] = trunc i64 [[PTRTOINT]] to i3
@@ -1028,7 +1029,8 @@ define i64 @reduction_with_ptr_iv_inttoptr_exit_cond(ptr %base, ptr %src) {
; CHECK-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i64 [[TMP8]], 0
; CHECK-NEXT: br i1 [[IDENT_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]]
; CHECK: [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
-; CHECK-NEXT: br i1 false, label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[TMP4]], 4
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP4]], 4
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP4]], [[N_MOD_VF]]
More information about the llvm-commits
mailing list