[llvm] [VPlan] Add VPlan-based addMinIterCheck, replace ILV for non-epilogue. (PR #153643)
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 26 07:09:58 PDT 2025
https://github.com/fhahn updated https://github.com/llvm/llvm-project/pull/153643
>From cd7a170a30b2d2cdde4f0a860064c520b2b65e7b Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Thu, 31 Jul 2025 17:18:25 +0100
Subject: [PATCH 1/7] [VPlan] Add VPlan-based addMinIterCheck, replace ILV for
non-epilogue.
This patch adds a new VPlan-based addMinimumIterationCheck, which
replaced the ILV version for the non-epilogue case.
The VPlan-based version constructs a SCEV expression to compute the minimum
iterations, use that to check if the check is known true or false.
Otherwise it creates a VPExpandSCEV recipe and emits a
compare-and-branch.
When using epilogue vectorization, we still need to create the minimum
trip-count-check during the legacy skeleton creation. The patch moves
the definitions out of ILV.
---
llvm/include/llvm/Analysis/ScalarEvolution.h | 4 +-
llvm/lib/Analysis/ScalarEvolution.cpp | 5 +-
.../Vectorize/LoopVectorizationPlanner.h | 8 +
.../Transforms/Vectorize/LoopVectorize.cpp | 188 ++++++++----------
.../Vectorize/VPlanConstruction.cpp | 74 +++++++
.../Transforms/Vectorize/VPlanTransforms.h | 7 +
.../AArch64/conditional-branches-cost.ll | 2 +-
.../AArch64/divs-with-scalable-vfs.ll | 2 +-
.../AArch64/eliminate-tail-predication.ll | 18 +-
.../AArch64/epilog-iv-select-cmp.ll | 4 +-
.../AArch64/partial-reduce-chained.ll | 36 ++--
.../partial-reduce-dot-product-mixed.ll | 8 -
.../AArch64/partial-reduce-dot-product.ll | 58 +-----
.../AArch64/partial-reduce-sub.ll | 6 -
.../LoopVectorize/AArch64/partial-reduce.ll | 16 +-
.../AArch64/pr60831-sve-inv-store-crash.ll | 2 -
.../AArch64/scalable-strict-fadd.ll | 30 +--
.../AArch64/simple_early_exit.ll | 5 +-
.../AArch64/single-early-exit-interleave.ll | 2 +-
.../LoopVectorize/AArch64/sve-epilog-vect.ll | 6 -
.../LoopVectorize/AArch64/sve-fneg.ll | 2 +-
.../LoopVectorize/AArch64/sve-inv-store.ll | 4 +-
.../sve-runtime-check-size-based-threshold.ll | 2 +-
.../LoopVectorize/AArch64/sve-tail-folding.ll | 2 -
.../AArch64/sve-vscale-based-trip-counts.ll | 18 +-
.../LoopVectorize/RISCV/dead-ops-cost.ll | 2 +-
.../LoopVectorize/RISCV/fminimumnum.ll | 16 +-
.../RISCV/tail-folding-bin-unary-ops-args.ll | 36 ++--
.../RISCV/tail-folding-call-intrinsics.ll | 18 +-
.../RISCV/tail-folding-cast-intrinsics.ll | 20 +-
.../LoopVectorize/epilog-iv-select-cmp.ll | 8 +-
.../preserve-dbg-loc-and-loop-metadata.ll | 4 +-
.../LoopVectorize/vplan-iv-transforms.ll | 13 +-
.../LoopVectorize/vplan-predicate-switch.ll | 13 +-
.../vplan-printing-before-execute.ll | 5 +-
35 files changed, 314 insertions(+), 330 deletions(-)
diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h
index 167845ce646b9..858c1d5392071 100644
--- a/llvm/include/llvm/Analysis/ScalarEvolution.h
+++ b/llvm/include/llvm/Analysis/ScalarEvolution.h
@@ -569,7 +569,9 @@ class ScalarEvolution {
LLVM_ABI const SCEV *getTruncateExpr(const SCEV *Op, Type *Ty,
unsigned Depth = 0);
LLVM_ABI const SCEV *getVScale(Type *Ty);
- LLVM_ABI const SCEV *getElementCount(Type *Ty, ElementCount EC);
+ LLVM_ABI const SCEV *
+ getElementCount(Type *Ty, ElementCount EC,
+ SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap);
LLVM_ABI const SCEV *getZeroExtendExpr(const SCEV *Op, Type *Ty,
unsigned Depth = 0);
LLVM_ABI const SCEV *getZeroExtendExprImpl(const SCEV *Op, Type *Ty,
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index d2c445f1ffaa0..db7b036ddba8a 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -501,10 +501,11 @@ const SCEV *ScalarEvolution::getVScale(Type *Ty) {
return S;
}
-const SCEV *ScalarEvolution::getElementCount(Type *Ty, ElementCount EC) {
+const SCEV *ScalarEvolution::getElementCount(Type *Ty, ElementCount EC,
+ SCEV::NoWrapFlags Flags) {
const SCEV *Res = getConstant(Ty, EC.getKnownMinValue());
if (EC.isScalable())
- Res = getMulExpr(Res, getVScale(Ty));
+ Res = getMulExpr(Res, getVScale(Ty), Flags);
return Res;
}
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
index 838476dcae661..fba39d00d0367 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -334,6 +334,10 @@ class VPBuilder {
FPBinOp ? FPBinOp->getFastMathFlags() : FastMathFlags(), DL));
}
+ VPExpandSCEVRecipe *expandSCEV(const SCEV *Expr, ScalarEvolution &SE) {
+ return tryInsertInstruction(new VPExpandSCEVRecipe(Expr, SE));
+ }
+
//===--------------------------------------------------------------------===//
// RAII helpers.
//===--------------------------------------------------------------------===//
@@ -559,6 +563,10 @@ class LoopVectorizationPlanner {
/// Emit remarks for recipes with invalid costs in the available VPlans.
void emitInvalidCostRemarks(OptimizationRemarkEmitter *ORE);
+ /// Create a check to \p Plan to see if the vector loop should be executed.
+ void addMinimumIterationCheck(VPlan &Plan, ElementCount VF, unsigned UF,
+ ElementCount MinProfitableTripCount) const;
+
protected:
/// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive,
/// according to the information gathered by Legal when it checked if it is
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 70f884016d08c..c9b348405cc2d 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -500,26 +500,23 @@ class InnerLoopVectorizer {
InnerLoopVectorizer(Loop *OrigLoop, PredicatedScalarEvolution &PSE,
LoopInfo *LI, DominatorTree *DT,
const TargetTransformInfo *TTI, AssumptionCache *AC,
- ElementCount VecWidth,
- ElementCount MinProfitableTripCount,
- unsigned UnrollFactor, LoopVectorizationCostModel *CM,
- BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI,
- GeneratedRTChecks &RTChecks, VPlan &Plan)
+ ElementCount VecWidth, unsigned UnrollFactor,
+ LoopVectorizationCostModel *CM, BlockFrequencyInfo *BFI,
+ ProfileSummaryInfo *PSI, GeneratedRTChecks &RTChecks,
+ VPlan &Plan)
: OrigLoop(OrigLoop), PSE(PSE), LI(LI), DT(DT), TTI(TTI), AC(AC),
- VF(VecWidth), MinProfitableTripCount(MinProfitableTripCount),
- UF(UnrollFactor), Builder(PSE.getSE()->getContext()), Cost(CM),
- BFI(BFI), PSI(PSI), RTChecks(RTChecks), Plan(Plan),
+ VF(VecWidth), UF(UnrollFactor), Builder(PSE.getSE()->getContext()),
+ Cost(CM), BFI(BFI), PSI(PSI), RTChecks(RTChecks), Plan(Plan),
VectorPHVPBB(cast<VPBasicBlock>(
Plan.getVectorLoopRegion()->getSinglePredecessor())) {}
virtual ~InnerLoopVectorizer() = default;
- /// Create a new empty loop that will contain vectorized instructions later
- /// on, while the old loop will be used as the scalar remainder. Control flow
- /// is generated around the vectorized (and scalar epilogue) loops consisting
- /// of various checks and bypasses. Return the pre-header block of the new
- /// loop. In the case of epilogue vectorization, this function is overriden to
- /// handle the more complex control flow around the loops.
+ /// Create new basic blocks needed as entries and exits of the code generated
+ /// by executing a VPlan. For epilogue vectorization, it will create blocks
+ /// for the minimum iteration checks and IR basic blocks for the vector and
+ /// scalar preheaders. Otherwise it will create a basic block for the scalar
+ /// preheader only.
virtual BasicBlock *createVectorizedLoopSkeleton();
/// Fix the vectorized code, taking care of header phi's, and more.
@@ -547,16 +544,8 @@ class InnerLoopVectorizer {
protected:
friend class LoopVectorizationPlanner;
- // Create a check to see if the vector loop should be executed
- Value *createIterationCountCheck(ElementCount VF, unsigned UF) const;
-
- /// Emit a bypass check to see if the vector trip count is zero, including if
- /// it overflows.
- void emitIterationCountCheck(BasicBlock *Bypass);
-
- /// Emit basic blocks (prefixed with \p Prefix) for the iteration check,
- /// vector loop preheader, middle block and scalar preheader.
- void createVectorLoopSkeleton(StringRef Prefix);
+ /// Create a new IR basic block for the scalar preheader.
+ void createScalarPreheader(StringRef Prefix);
/// Allow subclasses to override and print debug traces before/after vplan
/// execution, when trace information is requested.
@@ -592,8 +581,6 @@ class InnerLoopVectorizer {
/// vector elements.
ElementCount VF;
- ElementCount MinProfitableTripCount;
-
/// The vectorization unroll factor to use. Each scalar is vectorized to this
/// many different vector instructions.
unsigned UF;
@@ -679,9 +666,8 @@ class InnerLoopAndEpilogueVectorizer : public InnerLoopVectorizer {
GeneratedRTChecks &Checks, VPlan &Plan, ElementCount VecWidth,
ElementCount MinProfitableTripCount, unsigned UnrollFactor)
: InnerLoopVectorizer(OrigLoop, PSE, LI, DT, TTI, AC, VecWidth,
- MinProfitableTripCount, UnrollFactor, CM, BFI, PSI,
- Checks, Plan),
- EPI(EPI) {}
+ UnrollFactor, CM, BFI, PSI, Checks, Plan),
+ EPI(EPI), MinProfitableTripCount(MinProfitableTripCount) {}
// Override this function to handle the more complex control flow around the
// three loops.
@@ -701,6 +687,9 @@ class InnerLoopAndEpilogueVectorizer : public InnerLoopVectorizer {
/// iteration count of the loop is so small that the main vector loop is
/// completely skipped.
EpilogueLoopVectorizationInfo &EPI;
+
+protected:
+ ElementCount MinProfitableTripCount;
};
/// A specialized derived class of inner loop vectorizer that performs
@@ -724,6 +713,9 @@ class EpilogueVectorizerMainLoop : public InnerLoopAndEpilogueVectorizer {
BasicBlock *createEpilogueVectorizedLoopSkeleton() final;
protected:
+ // Create a check to see if the vector loop should be executed
+ Value *createIterationCountCheck(ElementCount VF, unsigned UF) const;
+
/// Emits an iteration count bypass check once for the main loop (when \p
/// ForEpilogue is false) and once for the epilogue loop (when \p
/// ForEpilogue is true).
@@ -2292,7 +2284,8 @@ void InnerLoopVectorizer::introduceCheckBlockInVPlan(BasicBlock *CheckIRBB) {
}
}
-Value *InnerLoopVectorizer::createIterationCountCheck(ElementCount VF,
+Value *
+EpilogueVectorizerMainLoop::createIterationCountCheck(ElementCount VF,
unsigned UF) const {
// Generate code to check if the loop's trip count is less than VF * UF, or
// equal to it in case a scalar epilogue is required; this implies that the
@@ -2363,25 +2356,6 @@ Value *InnerLoopVectorizer::createIterationCountCheck(ElementCount VF,
return CheckMinIters;
}
-void InnerLoopVectorizer::emitIterationCountCheck(BasicBlock *Bypass) {
- BasicBlock *const TCCheckBlock = LoopVectorPreHeader;
- Value *CheckMinIters = createIterationCountCheck(VF, UF);
- // Create new preheader for vector loop.
- LoopVectorPreHeader = SplitBlock(TCCheckBlock, TCCheckBlock->getTerminator(),
- static_cast<DominatorTree *>(nullptr), LI,
- nullptr, "vector.ph");
-
- BranchInst &BI =
- *BranchInst::Create(Bypass, LoopVectorPreHeader, CheckMinIters);
- if (hasBranchWeightMD(*OrigLoop->getLoopLatch()->getTerminator()))
- setBranchWeights(BI, MinItersBypassWeights, /*IsExpected=*/false);
- ReplaceInstWithInst(TCCheckBlock->getTerminator(), &BI);
-
- assert(cast<VPIRBasicBlock>(Plan.getEntry())->getIRBasicBlock() ==
- TCCheckBlock &&
- "Plan's entry must be TCCCheckBlock");
-}
-
/// Replace \p VPBB with a VPIRBasicBlock wrapping \p IRBB. All recipes from \p
/// VPBB are moved to the end of the newly created VPIRBasicBlock. VPBB must
/// have a single predecessor, which is rewired to the new VPIRBasicBlock. All
@@ -2402,7 +2376,7 @@ static VPIRBasicBlock *replaceVPBBWithIRVPBB(VPBasicBlock *VPBB,
return IRVPBB;
}
-void InnerLoopVectorizer::createVectorLoopSkeleton(StringRef Prefix) {
+void InnerLoopVectorizer::createScalarPreheader(StringRef Prefix) {
LoopVectorPreHeader = OrigLoop->getLoopPreheader();
assert(LoopVectorPreHeader && "Invalid loop structure");
assert((OrigLoop->getUniqueLatchExitBlock() ||
@@ -2414,8 +2388,8 @@ void InnerLoopVectorizer::createVectorLoopSkeleton(StringRef Prefix) {
LI, nullptr, Twine(Prefix) + "scalar.ph");
// NOTE: The Plan's scalar preheader VPBB isn't replaced with a VPIRBasicBlock
// wrapping LoopScalarPreHeader here at the moment, because the Plan's scalar
- // preheader may be unreachable at this point. Instead it is replaced in
- // createVectorizedLoopSkeleton.
+ // preheader may be unreachable at this point and SCEV expansion may add to it
+ // later.
}
/// Return the expanded step for \p ID using \p ExpandedSCEVs to look up SCEV
@@ -2456,53 +2430,9 @@ static void addFullyUnrolledInstructionsToIgnore(
}
BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton() {
- /*
- In this function we generate a new loop. The new loop will contain
- the vectorized instructions while the old loop will continue to run the
- scalar remainder.
-
- [ ] <-- old preheader - loop iteration number check and SCEVs in Plan's
- / | preheader are expanded here. Eventually all required SCEV
- / | expansion should happen here.
- / v
- | [ ] <-- vector loop bypass (may consist of multiple blocks).
- | / |
- | / v
- || [ ] <-- vector pre header.
- |/ |
- | v
- | [ ] \
- | [ ]_| <-- vector loop (created during VPlan execution).
- | |
- | v
- \ -[ ] <--- middle-block (wrapped in VPIRBasicBlock with the branch to
- | | successors created during VPlan execution)
- \/ |
- /\ v
- | ->[ ] <--- new preheader (wrapped in VPIRBasicBlock).
- | |
- (opt) v <-- edge from middle to exit iff epilogue is not required.
- | [ ] \
- | [ ]_| <-- old scalar loop to handle remainder (scalar epilogue, header
- | | wrapped in VPIRBasicBlock).
- \ |
- \ v
- >[ ] <-- exit block(s). (wrapped in VPIRBasicBlock)
- ...
- */
-
- // Create an empty vector loop, and prepare basic blocks for the runtime
- // checks.
- createVectorLoopSkeleton("");
-
- // Now, compare the new count to zero. If it is zero skip the vector loop and
- // jump to the scalar loop. This check also covers the case where the
- // backedge-taken count is uint##_max: adding one to it will overflow leading
- // to an incorrect trip count of zero. In this (rare) case we will also jump
- // to the scalar loop.
- emitIterationCountCheck(LoopScalarPreHeader);
-
- replaceVPBBWithIRVPBB(VectorPHVPBB, LoopVectorPreHeader);
+ // Create a new IR basic block for the scalar preheader.
+ createScalarPreheader("");
+
return LoopVectorPreHeader;
}
@@ -7327,6 +7257,17 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
BestVPlan.resetTripCount(Exp);
ExpSCEV->eraseFromParent();
}
+ // SCEV expansion will add new instructions in the IRBB wrapped by Entry.
+ // Remove existing VPIRInstructions/VPIRPhi and re-create them to make sure
+ // all IR instructions are wrapped. Otherwise VPInstructions may be inserted
+ // at the wrong place.
+ for (VPRecipeBase &R : make_early_inc_range(*Entry)) {
+ if (!isa<VPIRInstruction, VPIRPhi>(&R))
+ continue;
+ R.eraseFromParent();
+ }
+ for (Instruction &I : drop_begin(reverse(*Entry->getIRBasicBlock())))
+ VPIRInstruction::create(I)->insertBefore(*Entry, Entry->begin());
if (!ILV.getTripCount())
ILV.setTripCount(State.get(BestVPlan.getTripCount(), VPLane(0)));
@@ -7470,7 +7411,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
/// This function is partially responsible for generating the control flow
/// depicted in https://llvm.org/docs/Vectorizers.html#epilogue-vectorization.
BasicBlock *EpilogueVectorizerMainLoop::createEpilogueVectorizedLoopSkeleton() {
- createVectorLoopSkeleton("");
+ createScalarPreheader("");
// Generate the code to check the minimum iteration count of the vector
// epilogue (see below).
@@ -7557,7 +7498,7 @@ EpilogueVectorizerMainLoop::emitIterationCountCheck(BasicBlock *Bypass,
/// depicted in https://llvm.org/docs/Vectorizers.html#epilogue-vectorization.
BasicBlock *
EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton() {
- createVectorLoopSkeleton("vec.epilog.");
+ createScalarPreheader("vec.epilog.");
// Now, compare the remaining count and if there aren't enough iterations to
// execute the vectorized epilogue skip to the scalar part.
@@ -9363,6 +9304,28 @@ void LoopVectorizationPlanner::attachRuntimeChecks(
}
}
+void LoopVectorizationPlanner::addMinimumIterationCheck(
+ VPlan &Plan, ElementCount VF, unsigned UF,
+ ElementCount MinProfitableTripCount) const {
+ // vscale is not necessarily a power-of-2, which means we cannot guarantee
+ // an overflow to zero when updating induction variables and so an
+ // additional overflow check is required before entering the vector loop.
+ bool CheckNeededWithTailFolding =
+ VF.isScalable() && !TTI.isVScaleKnownToBeAPowerOfTwo() &&
+ !isIndvarOverflowCheckKnownFalse(&CM, VF, 1) &&
+ CM.getTailFoldingStyle() !=
+ TailFoldingStyle::DataAndControlFlowWithoutRuntimeCheck;
+ VPlanTransforms::addMinimumIterationCheck(
+ Plan, VF, UF, MinProfitableTripCount,
+ CM.requiresScalarEpilogue(VF.isVector()), CM.foldTailByMasking(),
+ CheckNeededWithTailFolding, OrigLoop,
+ hasBranchWeightMD(*OrigLoop->getLoopLatch()->getTerminator())
+ ? &MinItersBypassWeights[0]
+ : nullptr,
+ OrigLoop->getLoopPredecessor()->getTerminator()->getDebugLoc(),
+ *PSE.getSE());
+}
+
void VPDerivedIVRecipe::execute(VPTransformState &State) {
assert(!State.Lane && "VPDerivedIVRecipe being replicated.");
@@ -9478,10 +9441,13 @@ static bool processLoopInVPlanNativePath(
{
GeneratedRTChecks Checks(PSE, DT, LI, TTI, F->getDataLayout(), CM.CostKind);
- InnerLoopVectorizer LB(L, PSE, LI, DT, TTI, AC, VF.Width, VF.Width, 1, &CM,
- BFI, PSI, Checks, BestPlan);
+ InnerLoopVectorizer LB(L, PSE, LI, DT, TTI, AC, VF.Width, 1, &CM, BFI, PSI,
+ Checks, BestPlan);
LLVM_DEBUG(dbgs() << "Vectorizing outer loop in \""
<< L->getHeader()->getParent()->getName() << "\"\n");
+ LVP.addMinimumIterationCheck(BestPlan, VF.Width, 1,
+ VF.MinProfitableTripCount);
+
LVP.executePlan(VF.Width, 1, BestPlan, LB, DT, false);
}
@@ -10260,16 +10226,17 @@ bool LoopVectorizePass::processLoop(Loop *L) {
// If we decided that it is not legal to vectorize the loop, then
// interleave it.
VPlan &BestPlan = LVP.getPlanFor(VF.Width);
- InnerLoopVectorizer Unroller(
- L, PSE, LI, DT, TTI, AC, ElementCount::getFixed(1),
- ElementCount::getFixed(1), IC, &CM, BFI, PSI, Checks, BestPlan);
+ InnerLoopVectorizer Unroller(L, PSE, LI, DT, TTI, AC,
+ ElementCount::getFixed(1), IC, &CM, BFI, PSI,
+ Checks, BestPlan);
// TODO: Move to general VPlan pipeline once epilogue loops are also
// supported.
VPlanTransforms::runPass(
VPlanTransforms::materializeConstantVectorTripCount, BestPlan,
VF.Width, IC, PSE);
-
+ LVP.addMinimumIterationCheck(BestPlan, VF.Width, IC,
+ VF.MinProfitableTripCount);
LVP.executePlan(VF.Width, IC, BestPlan, Unroller, DT, false);
ORE->emit([&]() {
@@ -10330,14 +10297,15 @@ bool LoopVectorizePass::processLoop(Loop *L) {
if (!Checks.hasChecks())
DisableRuntimeUnroll = true;
} else {
- InnerLoopVectorizer LB(L, PSE, LI, DT, TTI, AC, VF.Width,
- VF.MinProfitableTripCount, IC, &CM, BFI, PSI,
- Checks, BestPlan);
+ InnerLoopVectorizer LB(L, PSE, LI, DT, TTI, AC, VF.Width, IC, &CM, BFI,
+ PSI, Checks, BestPlan);
// TODO: Move to general VPlan pipeline once epilogue loops are also
// supported.
VPlanTransforms::runPass(
VPlanTransforms::materializeConstantVectorTripCount, BestPlan,
VF.Width, IC, PSE);
+ LVP.addMinimumIterationCheck(BestPlan, VF.Width, IC,
+ VF.MinProfitableTripCount);
LVP.executePlan(VF.Width, IC, BestPlan, LB, DT, false);
++LoopsVectorized;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
index b231a8429503f..4f36de870d067 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
@@ -670,6 +670,80 @@ void VPlanTransforms::attachCheckBlock(VPlan &Plan, Value *Cond,
}
}
+void VPlanTransforms::addMinimumIterationCheck(
+ VPlan &Plan, ElementCount VF, unsigned UF,
+ ElementCount MinProfitableTripCount, bool RequiresScalarEpilogue,
+ bool TailFolded, bool CheckNeededWithTailFolding, Loop *OrigLoop,
+ const uint32_t *MinItersBypassWeights, DebugLoc DL, ScalarEvolution &SE) {
+ // Generate code to check if the loop's trip count is less than VF * UF, or
+ // equal to it in case a scalar epilogue is required; this implies that the
+ // vector trip count is zero. This check also covers the case where adding one
+ // to the backedge-taken count overflowed leading to an incorrect trip count
+ // of zero. In this case we will also jump to the scalar loop.
+ auto P = RequiresScalarEpilogue ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_ULT;
+ // If tail is to be folded, vector loop takes care of all iterations.
+ const SCEV *Count = vputils::getSCEVExprForVPValue(Plan.getTripCount(), SE);
+ Type *CountTy = Count->getType();
+ auto CreateStep = [&]() -> const SCEV * {
+ const SCEV *VFxUF = SE.getElementCount(CountTy, (VF * UF), SCEV::FlagNUW);
+ // Create step with max(MinProTripCount, UF * VF).
+ if (UF * VF.getKnownMinValue() >= MinProfitableTripCount.getKnownMinValue())
+ return VFxUF;
+
+ const SCEV *MinProfTC =
+ SE.getElementCount(CountTy, MinProfitableTripCount, SCEV::FlagNUW);
+ if (!VF.isScalable())
+ return MinProfTC;
+ return SE.getUMaxExpr(MinProfTC, VFxUF);
+ };
+
+ VPBasicBlock *EntryVPBB = Plan.getEntry();
+ VPBuilder Builder(EntryVPBB);
+ VPValue *CheckMinIters = Plan.getFalse();
+ const SCEV *Step = CreateStep();
+ if (!TailFolded) {
+ // TODO: Emit unconditional branch to vector preheader instead of
+ // conditional branch with known condition.
+ const SCEV *TripCountSCEV = SE.applyLoopGuards(Count, OrigLoop);
+ // Check if the trip count is < the step.
+ if (SE.isKnownPredicate(P, TripCountSCEV, Step)) {
+ // TODO: Ensure step is at most the trip count when determining max VF and
+ // UF, w/o tail folding.
+ CheckMinIters = Plan.getTrue();
+ } else if (!SE.isKnownPredicate(CmpInst::getInversePredicate(P),
+ TripCountSCEV, Step)) {
+ // Generate the minimum iteration check only if we cannot prove the
+ // check is known to be true, or known to be false.
+ CheckMinIters = Builder.createICmp(P, Plan.getTripCount(),
+ Builder.expandSCEV(Step, SE), DL,
+ "min.iters.check");
+ } // else step known to be < trip count, use CheckMinIters preset to false.
+ } else if (CheckNeededWithTailFolding) {
+ // vscale is not necessarily a power-of-2, which means we cannot guarantee
+ // an overflow to zero when updating induction variables and so an
+ // additional overflow check is required before entering the vector loop.
+
+ // Get the maximum unsigned value for the type.
+ VPValue *MaxUIntTripCount = Plan.getOrAddLiveIn(
+ ConstantInt::get(CountTy, cast<IntegerType>(CountTy)->getMask()));
+ VPValue *LHS = Builder.createNaryOp(Instruction::Sub,
+ {MaxUIntTripCount, Plan.getTripCount()},
+ DebugLoc::getUnknown());
+
+ // Don't execute the vector loop if (UMax - n) < (VF * UF).
+ CheckMinIters = Builder.createICmp(ICmpInst::ICMP_ULT, LHS,
+ Builder.expandSCEV(Step, SE), DL);
+ }
+ VPInstruction *Term =
+ Builder.createNaryOp(VPInstruction::BranchOnCond, {CheckMinIters}, DL);
+ if (MinItersBypassWeights) {
+ MDBuilder MDB(Plan.getContext());
+ MDNode *BranchWeights = MDB.createBranchWeights(
+ ArrayRef(MinItersBypassWeights, 2), /*IsExpected=*/false);
+ Term->addMetadata(LLVMContext::MD_prof, BranchWeights);
+ }
+}
+
bool VPlanTransforms::handleMaxMinNumReductions(VPlan &Plan) {
auto GetMinMaxCompareValue = [](VPReductionPHIRecipe *RedPhiR) -> VPValue * {
auto *MinMaxR = dyn_cast<VPRecipeWithIRFlags>(
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index 5b3d18b237efb..8710da53f0ed2 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -79,6 +79,13 @@ struct VPlanTransforms {
bool RequiresScalarEpilogueCheck,
bool TailFolded);
+ // Create a check to \p Plan to see if the vector loop should be executed.
+ static void addMinimumIterationCheck(
+ VPlan &Plan, ElementCount VF, unsigned UF,
+ ElementCount MinProfitableTripCount, bool RequiresScalarEpilogue,
+ bool TailFolded, bool CheckNeededWithTailFolding, Loop *OrigLoop,
+ const uint32_t *MinItersBypassWeights, DebugLoc DL, ScalarEvolution &SE);
+
/// Replace loops in \p Plan's flat CFG with VPRegionBlocks, turning \p Plan's
/// flat CFG into a hierarchical CFG.
LLVM_ABI_FOR_TEST static void createLoopRegions(VPlan &Plan);
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll
index 01f2ea080377d..a8a786fb551fe 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll
@@ -1366,7 +1366,7 @@ define void @pred_udiv_select_cost(ptr %A, ptr %B, ptr %C, i64 %n, i8 %y) #1 {
; DEFAULT-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1
; DEFAULT-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
; DEFAULT-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP1]], 2
-; DEFAULT-NEXT: [[TMP3:%.*]] = call i64 @llvm.umax.i64(i64 8, i64 [[TMP2]])
+; DEFAULT-NEXT: [[TMP3:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP2]], i64 8)
; DEFAULT-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], [[TMP3]]
; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
; DEFAULT: [[VECTOR_MEMCHECK]]:
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll
index bfebbdad5af0b..a44cc09b8a8ea 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll
@@ -9,7 +9,7 @@ define void @sdiv_feeding_gep(ptr %dst, i32 %x, i64 %M, i64 %conv6, i64 %N) {
; CHECK-NEXT: [[CONV61:%.*]] = zext i32 [[X]] to i64
; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP10]], 2
-; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 8, i64 [[TMP1]])
+; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 8)
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP2]]
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
; CHECK: [[VECTOR_SCEVCHECK]]:
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/eliminate-tail-predication.ll b/llvm/test/Transforms/LoopVectorize/AArch64/eliminate-tail-predication.ll
index 7363f86cdab7a..2ade55c10db36 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/eliminate-tail-predication.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/eliminate-tail-predication.ll
@@ -9,22 +9,20 @@ define void @f1(ptr %A) #0 {
; CHECK-LABEL: define void @f1
; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
-; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
-; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
+; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]]
-; CHECK-NEXT: store <vscale x 4 x i32> splat (i32 1), ptr [[TMP4]], align 4
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]]
-; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]]
+; CHECK-NEXT: store <vscale x 4 x i32> splat (i32 1), ptr [[TMP2]], align 4
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-iv-select-cmp.ll b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-iv-select-cmp.ll
index 8013a8f6e0d82..3a46944712567 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-iv-select-cmp.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-iv-select-cmp.ll
@@ -8,8 +8,8 @@ define i8 @select_icmp_var_start(ptr %a, i8 %n, i8 %start) {
; CHECK-NEXT: [[TMP0:%.*]] = add i8 [[N]], -1
; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[TMP0]] to i32
; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i32 [[TMP1]], 1
-; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP2]], 8
; CHECK-NEXT: [[FR:%.*]] = freeze i8 [[START]]
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP2]], 8
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]]
; CHECK: [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i32 [[TMP2]], 32
@@ -127,8 +127,8 @@ define i32 @select_icmp_var_start_iv_trunc(i32 %N, i32 %start) #0 {
; CHECK-NEXT: [[N_POS:%.*]] = icmp sgt i32 [[N]], 0
; CHECK-NEXT: call void @llvm.assume(i1 [[N_POS]])
; CHECK-NEXT: [[N_EXT:%.*]] = zext i32 [[N]] to i64
-; CHECK-NEXT: [[FR:%.*]] = freeze i32 [[START]]
; CHECK-NEXT: [[TMP0:%.*]] = add nuw nsw i64 [[N_EXT]], 1
+; CHECK-NEXT: [[FR:%.*]] = freeze i32 [[START]]
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 4
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]]
; CHECK: [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-chained.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-chained.ll
index 330c22d736809..c444d5bcc82c7 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-chained.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-chained.ll
@@ -53,7 +53,7 @@ define i32 @chained_partial_reduce_add_sub(ptr %a, ptr %b, ptr %c, i32 %N) #0 {
; CHECK-SVE-NEXT: [[DIV27:%.*]] = lshr i32 [[N]], 1
; CHECK-SVE-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[DIV27]] to i64
; CHECK-SVE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-SVE-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2
+; CHECK-SVE-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2
; CHECK-SVE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], [[TMP1]]
; CHECK-SVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-SVE: vector.ph:
@@ -94,7 +94,7 @@ define i32 @chained_partial_reduce_add_sub(ptr %a, ptr %b, ptr %c, i32 %N) #0 {
; CHECK-SVE-MAXBW-NEXT: [[DIV27:%.*]] = lshr i32 [[N]], 1
; CHECK-SVE-MAXBW-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[DIV27]] to i64
; CHECK-SVE-MAXBW-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-SVE-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3
+; CHECK-SVE-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 3
; CHECK-SVE-MAXBW-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], [[TMP1]]
; CHECK-SVE-MAXBW-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-SVE-MAXBW: vector.ph:
@@ -205,7 +205,7 @@ define i32 @chained_partial_reduce_add_add(ptr %a, ptr %b, ptr %c, i32 %N) #0 {
; CHECK-SVE-NEXT: [[DIV27:%.*]] = lshr i32 [[N]], 1
; CHECK-SVE-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[DIV27]] to i64
; CHECK-SVE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-SVE-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2
+; CHECK-SVE-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2
; CHECK-SVE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], [[TMP1]]
; CHECK-SVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-SVE: vector.ph:
@@ -246,7 +246,7 @@ define i32 @chained_partial_reduce_add_add(ptr %a, ptr %b, ptr %c, i32 %N) #0 {
; CHECK-SVE-MAXBW-NEXT: [[DIV27:%.*]] = lshr i32 [[N]], 1
; CHECK-SVE-MAXBW-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[DIV27]] to i64
; CHECK-SVE-MAXBW-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-SVE-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3
+; CHECK-SVE-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 3
; CHECK-SVE-MAXBW-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], [[TMP1]]
; CHECK-SVE-MAXBW-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-SVE-MAXBW: vector.ph:
@@ -357,7 +357,7 @@ define i32 @chained_partial_reduce_sub_add(ptr %a, ptr %b, ptr %c, i32 %N) #0 {
; CHECK-SVE-NEXT: [[DIV27:%.*]] = lshr i32 [[N]], 1
; CHECK-SVE-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[DIV27]] to i64
; CHECK-SVE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-SVE-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2
+; CHECK-SVE-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2
; CHECK-SVE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], [[TMP1]]
; CHECK-SVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-SVE: vector.ph:
@@ -398,7 +398,7 @@ define i32 @chained_partial_reduce_sub_add(ptr %a, ptr %b, ptr %c, i32 %N) #0 {
; CHECK-SVE-MAXBW-NEXT: [[DIV27:%.*]] = lshr i32 [[N]], 1
; CHECK-SVE-MAXBW-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[DIV27]] to i64
; CHECK-SVE-MAXBW-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-SVE-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3
+; CHECK-SVE-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 3
; CHECK-SVE-MAXBW-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], [[TMP1]]
; CHECK-SVE-MAXBW-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-SVE-MAXBW: vector.ph:
@@ -513,7 +513,7 @@ define i32 @chained_partial_reduce_sub_sub(ptr %a, ptr %b, ptr %c, i32 %N) #0 {
; CHECK-SVE-NEXT: [[DIV27:%.*]] = lshr i32 [[N]], 1
; CHECK-SVE-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[DIV27]] to i64
; CHECK-SVE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-SVE-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2
+; CHECK-SVE-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2
; CHECK-SVE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], [[TMP1]]
; CHECK-SVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-SVE: vector.ph:
@@ -554,7 +554,7 @@ define i32 @chained_partial_reduce_sub_sub(ptr %a, ptr %b, ptr %c, i32 %N) #0 {
; CHECK-SVE-MAXBW-NEXT: [[DIV27:%.*]] = lshr i32 [[N]], 1
; CHECK-SVE-MAXBW-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[DIV27]] to i64
; CHECK-SVE-MAXBW-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-SVE-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3
+; CHECK-SVE-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 3
; CHECK-SVE-MAXBW-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], [[TMP1]]
; CHECK-SVE-MAXBW-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-SVE-MAXBW: vector.ph:
@@ -671,7 +671,7 @@ define i32 @chained_partial_reduce_add_add_add(ptr %a, ptr %b, ptr %c, i32 %N) #
; CHECK-SVE-NEXT: [[DIV27:%.*]] = lshr i32 [[N]], 1
; CHECK-SVE-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[DIV27]] to i64
; CHECK-SVE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-SVE-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2
+; CHECK-SVE-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2
; CHECK-SVE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], [[TMP1]]
; CHECK-SVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-SVE: vector.ph:
@@ -714,7 +714,7 @@ define i32 @chained_partial_reduce_add_add_add(ptr %a, ptr %b, ptr %c, i32 %N) #
; CHECK-SVE-MAXBW-NEXT: [[DIV27:%.*]] = lshr i32 [[N]], 1
; CHECK-SVE-MAXBW-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[DIV27]] to i64
; CHECK-SVE-MAXBW-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-SVE-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3
+; CHECK-SVE-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 3
; CHECK-SVE-MAXBW-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], [[TMP1]]
; CHECK-SVE-MAXBW-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-SVE-MAXBW: vector.ph:
@@ -835,7 +835,7 @@ define i32 @chained_partial_reduce_sub_add_sub(ptr %a, ptr %b, ptr %c, i32 %N) #
; CHECK-SVE-NEXT: [[DIV27:%.*]] = lshr i32 [[N]], 1
; CHECK-SVE-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[DIV27]] to i64
; CHECK-SVE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-SVE-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2
+; CHECK-SVE-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2
; CHECK-SVE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], [[TMP1]]
; CHECK-SVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-SVE: vector.ph:
@@ -878,7 +878,7 @@ define i32 @chained_partial_reduce_sub_add_sub(ptr %a, ptr %b, ptr %c, i32 %N) #
; CHECK-SVE-MAXBW-NEXT: [[DIV27:%.*]] = lshr i32 [[N]], 1
; CHECK-SVE-MAXBW-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[DIV27]] to i64
; CHECK-SVE-MAXBW-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-SVE-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3
+; CHECK-SVE-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 3
; CHECK-SVE-MAXBW-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], [[TMP1]]
; CHECK-SVE-MAXBW-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-SVE-MAXBW: vector.ph:
@@ -997,7 +997,7 @@ define i32 @chained_partial_reduce_madd_extadd(ptr %a, ptr %b, ptr %c, i32 %N) #
; CHECK-SVE-NEXT: [[DIV27:%.*]] = lshr i32 [[N]], 1
; CHECK-SVE-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[DIV27]] to i64
; CHECK-SVE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-SVE-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2
+; CHECK-SVE-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2
; CHECK-SVE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], [[TMP1]]
; CHECK-SVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-SVE: vector.ph:
@@ -1037,7 +1037,7 @@ define i32 @chained_partial_reduce_madd_extadd(ptr %a, ptr %b, ptr %c, i32 %N) #
; CHECK-SVE-MAXBW-NEXT: [[DIV27:%.*]] = lshr i32 [[N]], 1
; CHECK-SVE-MAXBW-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[DIV27]] to i64
; CHECK-SVE-MAXBW-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-SVE-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3
+; CHECK-SVE-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 3
; CHECK-SVE-MAXBW-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], [[TMP1]]
; CHECK-SVE-MAXBW-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-SVE-MAXBW: vector.ph:
@@ -1141,7 +1141,7 @@ define i32 @chained_partial_reduce_extadd_extadd(ptr %a, ptr %b, i32 %N) #0 {
; CHECK-SVE-NEXT: [[DIV27:%.*]] = lshr i32 [[N]], 1
; CHECK-SVE-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[DIV27]] to i64
; CHECK-SVE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-SVE-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2
+; CHECK-SVE-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2
; CHECK-SVE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], [[TMP1]]
; CHECK-SVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-SVE: vector.ph:
@@ -1177,7 +1177,7 @@ define i32 @chained_partial_reduce_extadd_extadd(ptr %a, ptr %b, i32 %N) #0 {
; CHECK-SVE-MAXBW-NEXT: [[DIV27:%.*]] = lshr i32 [[N]], 1
; CHECK-SVE-MAXBW-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[DIV27]] to i64
; CHECK-SVE-MAXBW-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-SVE-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3
+; CHECK-SVE-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 3
; CHECK-SVE-MAXBW-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], [[TMP1]]
; CHECK-SVE-MAXBW-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-SVE-MAXBW: vector.ph:
@@ -1278,7 +1278,7 @@ define i32 @chained_partial_reduce_extadd_madd(ptr %a, ptr %b, ptr %c, i32 %N) #
; CHECK-SVE-NEXT: [[DIV27:%.*]] = lshr i32 [[N]], 1
; CHECK-SVE-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[DIV27]] to i64
; CHECK-SVE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-SVE-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2
+; CHECK-SVE-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2
; CHECK-SVE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], [[TMP1]]
; CHECK-SVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-SVE: vector.ph:
@@ -1318,7 +1318,7 @@ define i32 @chained_partial_reduce_extadd_madd(ptr %a, ptr %b, ptr %c, i32 %N) #
; CHECK-SVE-MAXBW-NEXT: [[DIV27:%.*]] = lshr i32 [[N]], 1
; CHECK-SVE-MAXBW-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[DIV27]] to i64
; CHECK-SVE-MAXBW-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-SVE-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3
+; CHECK-SVE-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 3
; CHECK-SVE-MAXBW-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], [[TMP1]]
; CHECK-SVE-MAXBW-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-SVE-MAXBW: vector.ph:
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-mixed.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-mixed.ll
index 82ea025700c37..8766d6540ed19 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-mixed.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-mixed.ll
@@ -9,8 +9,6 @@ define i32 @sudot(ptr %a, ptr %b) #0 {
; CHECK-LABEL: define i32 @sudot(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 4
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
@@ -55,8 +53,6 @@ define i32 @sudot(ptr %a, ptr %b) #0 {
; CHECK-NOI8MM-LABEL: define i32 @sudot(
; CHECK-NOI8MM-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NOI8MM-NEXT: entry:
-; CHECK-NOI8MM-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NOI8MM-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 4
; CHECK-NOI8MM-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-NOI8MM: vector.ph:
; CHECK-NOI8MM-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
@@ -124,8 +120,6 @@ define i32 @usdot(ptr %a, ptr %b) #0 {
; CHECK-LABEL: define i32 @usdot(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 4
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
@@ -170,8 +164,6 @@ define i32 @usdot(ptr %a, ptr %b) #0 {
; CHECK-NOI8MM-LABEL: define i32 @usdot(
; CHECK-NOI8MM-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NOI8MM-NEXT: entry:
-; CHECK-NOI8MM-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NOI8MM-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 4
; CHECK-NOI8MM-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-NOI8MM: vector.ph:
; CHECK-NOI8MM-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll
index 938d58bf5e1e3..792249d7829b0 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll
@@ -10,8 +10,6 @@ define i32 @dotp(ptr %a, ptr %b) #0 {
; CHECK-INTERLEAVE1-LABEL: define i32 @dotp(
; CHECK-INTERLEAVE1-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-INTERLEAVE1-NEXT: entry:
-; CHECK-INTERLEAVE1-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-INTERLEAVE1-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2
; CHECK-INTERLEAVE1-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-INTERLEAVE1: vector.ph:
; CHECK-INTERLEAVE1-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
@@ -42,8 +40,6 @@ define i32 @dotp(ptr %a, ptr %b) #0 {
; CHECK-INTERLEAVED-LABEL: define i32 @dotp(
; CHECK-INTERLEAVED-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-INTERLEAVED-NEXT: entry:
-; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3
; CHECK-INTERLEAVED-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-INTERLEAVED: vector.ph:
; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
@@ -88,8 +84,6 @@ define i32 @dotp(ptr %a, ptr %b) #0 {
; CHECK-MAXBW-LABEL: define i32 @dotp(
; CHECK-MAXBW-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-MAXBW-NEXT: entry:
-; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3
; CHECK-MAXBW-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-MAXBW: vector.ph:
; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
@@ -143,8 +137,6 @@ define i64 @not_dotp_i8_to_i64_has_neon_dotprod(ptr readonly %a, ptr readonly %b
; CHECK-INTERLEAVE1-LABEL: define i64 @not_dotp_i8_to_i64_has_neon_dotprod(
; CHECK-INTERLEAVE1-SAME: ptr readonly [[A:%.*]], ptr readonly [[B:%.*]]) #[[ATTR1:[0-9]+]] {
; CHECK-INTERLEAVE1-NEXT: entry:
-; CHECK-INTERLEAVE1-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-INTERLEAVE1-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP7]], 1
; CHECK-INTERLEAVE1-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-INTERLEAVE1: vector.ph:
; CHECK-INTERLEAVE1-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
@@ -177,8 +169,6 @@ define i64 @not_dotp_i8_to_i64_has_neon_dotprod(ptr readonly %a, ptr readonly %b
; CHECK-INTERLEAVED-LABEL: define i64 @not_dotp_i8_to_i64_has_neon_dotprod(
; CHECK-INTERLEAVED-SAME: ptr readonly [[A:%.*]], ptr readonly [[B:%.*]]) #[[ATTR1:[0-9]+]] {
; CHECK-INTERLEAVED-NEXT: entry:
-; CHECK-INTERLEAVED-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP7]], 2
; CHECK-INTERLEAVED-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-INTERLEAVED: vector.ph:
; CHECK-INTERLEAVED-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
@@ -225,8 +215,6 @@ define i64 @not_dotp_i8_to_i64_has_neon_dotprod(ptr readonly %a, ptr readonly %b
; CHECK-MAXBW-LABEL: define i64 @not_dotp_i8_to_i64_has_neon_dotprod(
; CHECK-MAXBW-SAME: ptr readonly [[A:%.*]], ptr readonly [[B:%.*]]) #[[ATTR1:[0-9]+]] {
; CHECK-MAXBW-NEXT: entry:
-; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3
; CHECK-MAXBW-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-MAXBW: vector.ph:
; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
@@ -284,8 +272,6 @@ define i64 @not_dotp_i16_to_i64_has_neon_dotprod(ptr readonly %a, ptr readonly %
; CHECK-INTERLEAVE1-LABEL: define i64 @not_dotp_i16_to_i64_has_neon_dotprod(
; CHECK-INTERLEAVE1-SAME: ptr readonly [[A:%.*]], ptr readonly [[B:%.*]]) #[[ATTR1]] {
; CHECK-INTERLEAVE1-NEXT: entry:
-; CHECK-INTERLEAVE1-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-INTERLEAVE1-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP7]], 1
; CHECK-INTERLEAVE1-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-INTERLEAVE1: vector.ph:
; CHECK-INTERLEAVE1-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64()
@@ -322,8 +308,6 @@ define i64 @not_dotp_i16_to_i64_has_neon_dotprod(ptr readonly %a, ptr readonly %
; CHECK-INTERLEAVED-LABEL: define i64 @not_dotp_i16_to_i64_has_neon_dotprod(
; CHECK-INTERLEAVED-SAME: ptr readonly [[A:%.*]], ptr readonly [[B:%.*]]) #[[ATTR1]] {
; CHECK-INTERLEAVED-NEXT: entry:
-; CHECK-INTERLEAVED-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP7]], 2
; CHECK-INTERLEAVED-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-INTERLEAVED: vector.ph:
; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
@@ -374,8 +358,6 @@ define i64 @not_dotp_i16_to_i64_has_neon_dotprod(ptr readonly %a, ptr readonly %
; CHECK-MAXBW-LABEL: define i64 @not_dotp_i16_to_i64_has_neon_dotprod(
; CHECK-MAXBW-SAME: ptr readonly [[A:%.*]], ptr readonly [[B:%.*]]) #[[ATTR1]] {
; CHECK-MAXBW-NEXT: entry:
-; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2
; CHECK-MAXBW-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-MAXBW: vector.ph:
; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
@@ -793,8 +775,6 @@ define i32 @not_dotp_not_loop_carried(ptr %a, ptr %b) #0 {
; CHECK-INTERLEAVE1-LABEL: define i32 @not_dotp_not_loop_carried(
; CHECK-INTERLEAVE1-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-INTERLEAVE1-NEXT: entry:
-; CHECK-INTERLEAVE1-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-INTERLEAVE1-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3
; CHECK-INTERLEAVE1-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-INTERLEAVE1: vector.ph:
; CHECK-INTERLEAVE1-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
@@ -837,8 +817,6 @@ define i32 @not_dotp_not_loop_carried(ptr %a, ptr %b) #0 {
; CHECK-INTERLEAVED-LABEL: define i32 @not_dotp_not_loop_carried(
; CHECK-INTERLEAVED-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-INTERLEAVED-NEXT: entry:
-; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 4
; CHECK-INTERLEAVED-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-INTERLEAVED: vector.ph:
; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
@@ -887,8 +865,6 @@ define i32 @not_dotp_not_loop_carried(ptr %a, ptr %b) #0 {
; CHECK-MAXBW-LABEL: define i32 @not_dotp_not_loop_carried(
; CHECK-MAXBW-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-MAXBW-NEXT: entry:
-; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3
; CHECK-MAXBW-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-MAXBW: vector.ph:
; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
@@ -954,8 +930,6 @@ define i32 @not_dotp_not_phi(ptr %a, ptr %b) #0 {
; CHECK-INTERLEAVE1-LABEL: define i32 @not_dotp_not_phi(
; CHECK-INTERLEAVE1-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-INTERLEAVE1-NEXT: entry:
-; CHECK-INTERLEAVE1-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-INTERLEAVE1-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3
; CHECK-INTERLEAVE1-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-INTERLEAVE1: vector.ph:
; CHECK-INTERLEAVE1-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
@@ -992,8 +966,6 @@ define i32 @not_dotp_not_phi(ptr %a, ptr %b) #0 {
; CHECK-INTERLEAVED-LABEL: define i32 @not_dotp_not_phi(
; CHECK-INTERLEAVED-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-INTERLEAVED-NEXT: entry:
-; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 4
; CHECK-INTERLEAVED-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-INTERLEAVED: vector.ph:
; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
@@ -1036,8 +1008,6 @@ define i32 @not_dotp_not_phi(ptr %a, ptr %b) #0 {
; CHECK-MAXBW-LABEL: define i32 @not_dotp_not_phi(
; CHECK-MAXBW-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-MAXBW-NEXT: entry:
-; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3
; CHECK-MAXBW-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-MAXBW: vector.ph:
; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
@@ -1098,7 +1068,7 @@ define i32 @dotp_unrolled(i32 %num_out, i64 %num_in, ptr %a, ptr %b) #0 {
; CHECK-INTERLEAVE1-SAME: i32 [[NUM_OUT:%.*]], i64 [[NUM_IN:%.*]], ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-INTERLEAVE1-NEXT: entry:
; CHECK-INTERLEAVE1-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-INTERLEAVE1-NEXT: [[TMP15:%.*]] = shl nuw i64 [[TMP13]], 2
+; CHECK-INTERLEAVE1-NEXT: [[TMP15:%.*]] = shl nuw nsw i64 [[TMP13]], 2
; CHECK-INTERLEAVE1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[NUM_IN]], [[TMP15]]
; CHECK-INTERLEAVE1-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-INTERLEAVE1: vector.ph:
@@ -1164,7 +1134,7 @@ define i32 @dotp_unrolled(i32 %num_out, i64 %num_in, ptr %a, ptr %b) #0 {
; CHECK-INTERLEAVED-SAME: i32 [[NUM_OUT:%.*]], i64 [[NUM_IN:%.*]], ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-INTERLEAVED-NEXT: entry:
; CHECK-INTERLEAVED-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-INTERLEAVED-NEXT: [[TMP15:%.*]] = shl nuw i64 [[TMP13]], 3
+; CHECK-INTERLEAVED-NEXT: [[TMP15:%.*]] = shl nuw nsw i64 [[TMP13]], 3
; CHECK-INTERLEAVED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[NUM_IN]], [[TMP15]]
; CHECK-INTERLEAVED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-INTERLEAVED: vector.ph:
@@ -1286,7 +1256,7 @@ define i32 @dotp_unrolled(i32 %num_out, i64 %num_in, ptr %a, ptr %b) #0 {
; CHECK-MAXBW-SAME: i32 [[NUM_OUT:%.*]], i64 [[NUM_IN:%.*]], ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-MAXBW-NEXT: entry:
; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3
+; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 3
; CHECK-MAXBW-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[NUM_IN]], [[TMP1]]
; CHECK-MAXBW-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-MAXBW: vector.ph:
@@ -1541,8 +1511,6 @@ define i32 @not_dotp_extend_user(ptr %a, ptr %b) #0 {
; CHECK-INTERLEAVE1-LABEL: define i32 @not_dotp_extend_user(
; CHECK-INTERLEAVE1-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-INTERLEAVE1-NEXT: entry:
-; CHECK-INTERLEAVE1-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-INTERLEAVE1-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2
; CHECK-INTERLEAVE1-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-INTERLEAVE1: vector.ph:
; CHECK-INTERLEAVE1-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
@@ -1577,8 +1545,6 @@ define i32 @not_dotp_extend_user(ptr %a, ptr %b) #0 {
; CHECK-INTERLEAVED-LABEL: define i32 @not_dotp_extend_user(
; CHECK-INTERLEAVED-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-INTERLEAVED-NEXT: entry:
-; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3
; CHECK-INTERLEAVED-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-INTERLEAVED: vector.ph:
; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
@@ -1627,8 +1593,6 @@ define i32 @not_dotp_extend_user(ptr %a, ptr %b) #0 {
; CHECK-MAXBW-LABEL: define i32 @not_dotp_extend_user(
; CHECK-MAXBW-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-MAXBW-NEXT: entry:
-; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3
; CHECK-MAXBW-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-MAXBW: vector.ph:
; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
@@ -1687,8 +1651,6 @@ define i64 @dotp_cost_disagreement(ptr %a, ptr %b) #0 {
; CHECK-INTERLEAVE1-LABEL: define i64 @dotp_cost_disagreement(
; CHECK-INTERLEAVE1-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-INTERLEAVE1-NEXT: entry:
-; CHECK-INTERLEAVE1-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-INTERLEAVE1-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 1
; CHECK-INTERLEAVE1-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-INTERLEAVE1: vector.ph:
; CHECK-INTERLEAVE1-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
@@ -1721,7 +1683,7 @@ define i64 @dotp_cost_disagreement(ptr %a, ptr %b) #0 {
; CHECK-INTERLEAVED-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-INTERLEAVED-NEXT: entry:
; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2
+; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2
; CHECK-INTERLEAVED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 41, [[TMP1]]
; CHECK-INTERLEAVED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-INTERLEAVED: vector.ph:
@@ -1769,7 +1731,7 @@ define i64 @dotp_cost_disagreement(ptr %a, ptr %b) #0 {
; CHECK-MAXBW-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-MAXBW-NEXT: entry:
; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3
+; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 3
; CHECK-MAXBW-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 41, [[TMP1]]
; CHECK-MAXBW-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-MAXBW: vector.ph:
@@ -2076,7 +2038,7 @@ define i64 @not_dotp_ext_outside_plan(ptr %a, i16 %b, i64 %n) #0 {
; CHECK-MAXBW: for.ph:
; CHECK-MAXBW-NEXT: [[EXT_B:%.*]] = zext i16 [[B]] to i64
; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2
+; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2
; CHECK-MAXBW-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]]
; CHECK-MAXBW-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-MAXBW: vector.ph:
@@ -2209,7 +2171,7 @@ define i64 @not_dotp_ext_outside_plan2(ptr %a, i16 %b, i64 %n) #0 {
; CHECK-MAXBW: for.ph:
; CHECK-MAXBW-NEXT: [[EXT_B:%.*]] = zext i16 [[B]] to i64
; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2
+; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2
; CHECK-MAXBW-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]]
; CHECK-MAXBW-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-MAXBW: vector.ph:
@@ -2271,7 +2233,7 @@ define dso_local i32 @not_dotp_vscale1(ptr %a, ptr %b, i32 %n, i64 %cost) #0 {
; CHECK-INTERLEAVE1: for.body.preheader:
; CHECK-INTERLEAVE1-NEXT: [[TMP0:%.*]] = zext i32 [[N]] to i64
; CHECK-INTERLEAVE1-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-INTERLEAVE1-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP1]], 1
+; CHECK-INTERLEAVE1-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 1
; CHECK-INTERLEAVE1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]]
; CHECK-INTERLEAVE1-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-INTERLEAVE1: vector.ph:
@@ -2312,7 +2274,7 @@ define dso_local i32 @not_dotp_vscale1(ptr %a, ptr %b, i32 %n, i64 %cost) #0 {
; CHECK-INTERLEAVED: for.body.preheader:
; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = zext i32 [[N]] to i64
; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP1]], 2
+; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 2
; CHECK-INTERLEAVED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]]
; CHECK-INTERLEAVED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-INTERLEAVED: vector.ph:
@@ -2367,7 +2329,7 @@ define dso_local i32 @not_dotp_vscale1(ptr %a, ptr %b, i32 %n, i64 %cost) #0 {
; CHECK-MAXBW: for.body.preheader:
; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = zext i32 [[N]] to i64
; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP1]], 3
+; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 3
; CHECK-MAXBW-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]]
; CHECK-MAXBW-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-MAXBW: vector.ph:
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-sub.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-sub.ll
index 89b5689845612..64cb33181cc1e 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-sub.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-sub.ll
@@ -10,8 +10,6 @@ define i32 @dotp(ptr %a, ptr %b) #0 {
; CHECK-INTERLEAVE1-LABEL: define i32 @dotp(
; CHECK-INTERLEAVE1-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-INTERLEAVE1-NEXT: entry:
-; CHECK-INTERLEAVE1-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-INTERLEAVE1-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2
; CHECK-INTERLEAVE1-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-INTERLEAVE1: vector.ph:
; CHECK-INTERLEAVE1-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
@@ -43,8 +41,6 @@ define i32 @dotp(ptr %a, ptr %b) #0 {
; CHECK-INTERLEAVED-LABEL: define i32 @dotp(
; CHECK-INTERLEAVED-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-INTERLEAVED-NEXT: entry:
-; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3
; CHECK-INTERLEAVED-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-INTERLEAVED: vector.ph:
; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
@@ -91,8 +87,6 @@ define i32 @dotp(ptr %a, ptr %b) #0 {
; CHECK-MAXBW-LABEL: define i32 @dotp(
; CHECK-MAXBW-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-MAXBW-NEXT: entry:
-; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3
; CHECK-MAXBW-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[ENTRY:%.*]]
; CHECK-MAXBW: vector.ph:
; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce.ll
index c3a7f2c477db8..370bfc641001a 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce.ll
@@ -60,8 +60,6 @@ define i32 @zext_add_reduc_i8_i32_sve(ptr %a) #0 {
; CHECK-MAXBW-LABEL: define i32 @zext_add_reduc_i8_i32_sve(
; CHECK-MAXBW-SAME: ptr [[A:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-MAXBW-NEXT: entry:
-; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 4
; CHECK-MAXBW-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-MAXBW: vector.ph:
; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
@@ -244,8 +242,6 @@ define i64 @zext_add_reduc_i8_i64(ptr %a) #0 {
; CHECK-MAXBW-LABEL: define i64 @zext_add_reduc_i8_i64(
; CHECK-MAXBW-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-MAXBW-NEXT: entry:
-; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 4
; CHECK-MAXBW-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-MAXBW: vector.ph:
; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
@@ -340,8 +336,6 @@ define i64 @zext_add_reduc_i16_i64(ptr %a) #0 {
; CHECK-MAXBW-LABEL: define i64 @zext_add_reduc_i16_i64(
; CHECK-MAXBW-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-MAXBW-NEXT: entry:
-; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3
; CHECK-MAXBW-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-MAXBW: vector.ph:
; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
@@ -436,8 +430,6 @@ define i32 @zext_add_reduc_i8_i32_has_neon_dotprod(ptr %a) #1 {
; CHECK-MAXBW-LABEL: define i32 @zext_add_reduc_i8_i32_has_neon_dotprod(
; CHECK-MAXBW-SAME: ptr [[A:%.*]]) #[[ATTR2:[0-9]+]] {
; CHECK-MAXBW-NEXT: entry:
-; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 4
; CHECK-MAXBW-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-MAXBW: vector.ph:
; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
@@ -718,8 +710,6 @@ define i32 @zext_sub_reduc_i8_i32_has_neon_dotprod(ptr %a) #1 {
; CHECK-MAXBW-LABEL: define i32 @zext_sub_reduc_i8_i32_has_neon_dotprod(
; CHECK-MAXBW-SAME: ptr [[A:%.*]]) #[[ATTR2]] {
; CHECK-MAXBW-NEXT: entry:
-; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3
; CHECK-MAXBW-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-MAXBW: vector.ph:
; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
@@ -814,8 +804,6 @@ define i32 @sext_add_reduc_i8_i32(ptr %a) #0 {
; CHECK-MAXBW-LABEL: define i32 @sext_add_reduc_i8_i32(
; CHECK-MAXBW-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-MAXBW-NEXT: entry:
-; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 4
; CHECK-MAXBW-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-MAXBW: vector.ph:
; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
@@ -932,7 +920,7 @@ define i32 @add_of_zext_outside_loop(i32 %a, ptr noalias %b, i8 %c, i32 %d) #0 {
; CHECK-MAXBW-NEXT: [[CONV1:%.*]] = zext i8 [[C]] to i32
; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = sub i32 1024, [[D]]
; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = call i32 @llvm.vscale.i32()
-; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = shl nuw i32 [[TMP1]], 4
+; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = shl nuw nsw i32 [[TMP1]], 4
; CHECK-MAXBW-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], [[TMP2]]
; CHECK-MAXBW-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-MAXBW: vector.ph:
@@ -1053,7 +1041,7 @@ define i32 @add_of_loop_invariant_zext(i32 %a, ptr %b, i8 %c, i32 %d) #0 {
; CHECK-MAXBW-NEXT: entry:
; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = sub i32 1024, [[D]]
; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = call i32 @llvm.vscale.i32()
-; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = shl nuw i32 [[TMP1]], 4
+; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = shl nuw nsw i32 [[TMP1]], 4
; CHECK-MAXBW-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], [[TMP2]]
; CHECK-MAXBW-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-MAXBW: vector.ph:
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll b/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll
index 94172dea8ee6a..db55c5ca107a6 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll
@@ -8,8 +8,6 @@ target triple = "aarch64-unknown-linux-gnu"
define void @test_invar_gep(ptr %dst) #0 {
; CHECK-LABEL: @test_invar_gep(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll
index 9583c7f174333..375e412861777 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll
@@ -34,7 +34,7 @@ define float @fadd_strict(ptr noalias nocapture readonly %a, i64 %n) #0 {
; CHECK-UNORDERED-SAME: (ptr noalias readonly captures(none) [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-UNORDERED-NEXT: entry:
; CHECK-UNORDERED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-UNORDERED-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3
+; CHECK-UNORDERED-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 3
; CHECK-UNORDERED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]]
; CHECK-UNORDERED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-UNORDERED: vector.ph:
@@ -77,7 +77,7 @@ define float @fadd_strict(ptr noalias nocapture readonly %a, i64 %n) #0 {
; CHECK-ORDERED-SAME: (ptr noalias readonly captures(none) [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-ORDERED-NEXT: entry:
; CHECK-ORDERED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-ORDERED-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3
+; CHECK-ORDERED-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 3
; CHECK-ORDERED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]]
; CHECK-ORDERED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-ORDERED: vector.ph:
@@ -202,7 +202,7 @@ define float @fadd_strict_unroll(ptr noalias nocapture readonly %a, i64 %n) #0 {
; CHECK-UNORDERED-SAME: (ptr noalias readonly captures(none) [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
; CHECK-UNORDERED-NEXT: entry:
; CHECK-UNORDERED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-UNORDERED-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 5
+; CHECK-UNORDERED-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 5
; CHECK-UNORDERED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]]
; CHECK-UNORDERED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-UNORDERED: vector.ph:
@@ -266,7 +266,7 @@ define float @fadd_strict_unroll(ptr noalias nocapture readonly %a, i64 %n) #0 {
; CHECK-ORDERED-SAME: (ptr noalias readonly captures(none) [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
; CHECK-ORDERED-NEXT: entry:
; CHECK-ORDERED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-ORDERED-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 5
+; CHECK-ORDERED-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 5
; CHECK-ORDERED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]]
; CHECK-ORDERED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-ORDERED: vector.ph:
@@ -468,7 +468,7 @@ define void @fadd_strict_interleave(ptr noalias nocapture readonly %a, ptr noali
; CHECK-UNORDERED-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 1
; CHECK-UNORDERED-NEXT: [[TMP2:%.*]] = add nuw i64 [[TMP1]], 1
; CHECK-UNORDERED-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-UNORDERED-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 2
+; CHECK-UNORDERED-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP3]], 2
; CHECK-UNORDERED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], [[TMP4]]
; CHECK-UNORDERED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-UNORDERED: vector.ph:
@@ -536,7 +536,7 @@ define void @fadd_strict_interleave(ptr noalias nocapture readonly %a, ptr noali
; CHECK-ORDERED-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 1
; CHECK-ORDERED-NEXT: [[TMP2:%.*]] = add nuw i64 [[TMP1]], 1
; CHECK-ORDERED-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-ORDERED-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 2
+; CHECK-ORDERED-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP3]], 2
; CHECK-ORDERED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], [[TMP4]]
; CHECK-ORDERED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-ORDERED: vector.ph:
@@ -725,7 +725,7 @@ define float @fadd_of_sum(ptr noalias nocapture readonly %a, ptr noalias nocaptu
; CHECK-UNORDERED-NEXT: br i1 [[CMP1]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
; CHECK-UNORDERED: for.body.preheader:
; CHECK-UNORDERED-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-UNORDERED-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP1]], 2
+; CHECK-UNORDERED-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 2
; CHECK-UNORDERED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP2]]
; CHECK-UNORDERED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-UNORDERED: vector.ph:
@@ -782,7 +782,7 @@ define float @fadd_of_sum(ptr noalias nocapture readonly %a, ptr noalias nocaptu
; CHECK-ORDERED-NEXT: br i1 [[CMP1]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
; CHECK-ORDERED: for.body.preheader:
; CHECK-ORDERED-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-ORDERED-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP1]], 2
+; CHECK-ORDERED-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 2
; CHECK-ORDERED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP2]]
; CHECK-ORDERED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-ORDERED: vector.ph:
@@ -945,7 +945,7 @@ define float @fadd_conditional(ptr noalias nocapture readonly %a, ptr noalias no
; CHECK-UNORDERED-SAME: (ptr noalias readonly captures(none) [[A:%.*]], ptr noalias readonly captures(none) [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
; CHECK-UNORDERED-NEXT: entry:
; CHECK-UNORDERED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-UNORDERED-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2
+; CHECK-UNORDERED-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2
; CHECK-UNORDERED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]]
; CHECK-UNORDERED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-UNORDERED: vector.ph:
@@ -1000,7 +1000,7 @@ define float @fadd_conditional(ptr noalias nocapture readonly %a, ptr noalias no
; CHECK-ORDERED-SAME: (ptr noalias readonly captures(none) [[A:%.*]], ptr noalias readonly captures(none) [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
; CHECK-ORDERED-NEXT: entry:
; CHECK-ORDERED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-ORDERED-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2
+; CHECK-ORDERED-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2
; CHECK-ORDERED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]]
; CHECK-ORDERED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-ORDERED: vector.ph:
@@ -1165,7 +1165,7 @@ define float @fadd_multiple(ptr noalias nocapture %a, ptr noalias nocapture %b,
; CHECK-UNORDERED-SAME: (ptr noalias captures(none) [[A:%.*]], ptr noalias captures(none) [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
; CHECK-UNORDERED-NEXT: entry:
; CHECK-UNORDERED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-UNORDERED-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3
+; CHECK-UNORDERED-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 3
; CHECK-UNORDERED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]]
; CHECK-UNORDERED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-UNORDERED: vector.ph:
@@ -1300,7 +1300,7 @@ define float @fmuladd_strict(ptr %a, ptr %b, i64 %n) #0 {
; CHECK-UNORDERED-SAME: (ptr [[A:%.*]], ptr [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
; CHECK-UNORDERED-NEXT: entry:
; CHECK-UNORDERED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-UNORDERED-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 5
+; CHECK-UNORDERED-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 5
; CHECK-UNORDERED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]]
; CHECK-UNORDERED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-UNORDERED: vector.ph:
@@ -1380,7 +1380,7 @@ define float @fmuladd_strict(ptr %a, ptr %b, i64 %n) #0 {
; CHECK-ORDERED-SAME: (ptr [[A:%.*]], ptr [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
; CHECK-ORDERED-NEXT: entry:
; CHECK-ORDERED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-ORDERED-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 5
+; CHECK-ORDERED-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 5
; CHECK-ORDERED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]]
; CHECK-ORDERED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-ORDERED: vector.ph:
@@ -1610,7 +1610,7 @@ define float @fmuladd_strict_fmf(ptr %a, ptr %b, i64 %n) #0 {
; CHECK-UNORDERED-SAME: (ptr [[A:%.*]], ptr [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
; CHECK-UNORDERED-NEXT: entry:
; CHECK-UNORDERED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-UNORDERED-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 5
+; CHECK-UNORDERED-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 5
; CHECK-UNORDERED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]]
; CHECK-UNORDERED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-UNORDERED: vector.ph:
@@ -1690,7 +1690,7 @@ define float @fmuladd_strict_fmf(ptr %a, ptr %b, i64 %n) #0 {
; CHECK-ORDERED-SAME: (ptr [[A:%.*]], ptr [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
; CHECK-ORDERED-NEXT: entry:
; CHECK-ORDERED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-ORDERED-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 5
+; CHECK-ORDERED-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 5
; CHECK-ORDERED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]]
; CHECK-ORDERED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-ORDERED: vector.ph:
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll b/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll
index b7016ff4abf88..eb63b9cbd8427 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll
@@ -14,7 +14,7 @@ define i64 @same_exit_block_pre_inc_use1() #1 {
; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024)
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 4
+; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 64, [[TMP1]]
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
@@ -251,9 +251,6 @@ define i64 @loop_contains_safe_div() #1 {
; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 4
; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024)
-; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP12:%.*]] = shl nuw i64 [[TMP11]], 2
-; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.umax.i64(i64 8, i64 [[TMP12]])
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/single-early-exit-interleave.ll b/llvm/test/Transforms/LoopVectorize/AArch64/single-early-exit-interleave.ll
index 46493e0bcd417..be5864958afc2 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/single-early-exit-interleave.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/single-early-exit-interleave.ll
@@ -14,7 +14,7 @@ define i64 @same_exit_block_pre_inc_use1() #0 {
; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024)
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 6
+; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 6
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 510, [[TMP1]]
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll
index 04b0075e5a5b2..6b0da1bb2ed82 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll
@@ -145,8 +145,6 @@ exit:
define void @main_vf_vscale_x_2_no_epi_iteration(ptr %A) #0 vscale_range(8, 8) {
; CHECK-LABEL: @main_vf_vscale_x_2_no_epi_iteration(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
@@ -589,8 +587,6 @@ define void @trip_count_vscale(ptr noalias %a, ptr noalias %b) vscale_range(1, 1
; CHECK-VF8-NEXT: entry:
; CHECK-VF8-NEXT: [[V:%.*]] = tail call i64 @llvm.vscale.i64()
; CHECK-VF8-NEXT: [[N:%.*]] = mul nuw nsw i64 [[V]], 1033
-; CHECK-VF8-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-VF8-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3
; CHECK-VF8-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-VF8: vector.ph:
; CHECK-VF8-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
@@ -735,8 +731,6 @@ define void @trip_count_vscale_no_epilogue_iterations(ptr noalias %a, ptr noalia
; CHECK-VF8-NEXT: entry:
; CHECK-VF8-NEXT: [[V:%.*]] = tail call i64 @llvm.vscale.i64()
; CHECK-VF8-NEXT: [[N:%.*]] = mul nuw nsw i64 [[V]], 1024
-; CHECK-VF8-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-VF8-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3
; CHECK-VF8-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-VF8: vector.ph:
; CHECK-VF8-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-fneg.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-fneg.ll
index 95153a4a59f1a..51743cf636a14 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-fneg.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-fneg.ll
@@ -16,7 +16,7 @@ define void @fneg(ptr nocapture noundef writeonly %d, ptr nocapture noundef read
; CHECK: for.body.preheader:
; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 4
+; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], [[TMP1]]
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
; CHECK: vector.memcheck:
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-store.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-store.ll
index 7dc6a6aa890f9..4ae49354e6443 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-store.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-store.ll
@@ -7,7 +7,7 @@ define void @inv_store_i16(ptr noalias %dst, ptr noalias readonly %src, i64 %N)
; CHECK-LABEL: @inv_store_i16(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2
+; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]]
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
@@ -54,7 +54,7 @@ define void @cond_inv_store_i32(ptr noalias %dst, ptr noalias readonly %src, i64
; CHECK-LABEL: @cond_inv_store_i32(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2
+; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]]
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-runtime-check-size-based-threshold.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-runtime-check-size-based-threshold.ll
index 3c667ccbdedfc..4c7f70ad4d15e 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-runtime-check-size-based-threshold.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-runtime-check-size-based-threshold.ll
@@ -15,7 +15,7 @@ define void @min_trip_count_due_to_runtime_checks_1(ptr %dst.1, ptr %dst.2, ptr
; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[N:%.*]], i64 1)
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2
-; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 20, i64 [[TMP1]])
+; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 20)
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[UMAX]], [[TMP2]]
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
; CHECK: vector.memcheck:
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll
index 4cd7dac62e79f..552d9e23c33c5 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll
@@ -536,8 +536,6 @@ while.end.loopexit: ; preds = %while.body
define void @simple_memset_trip1024(i32 %val, ptr %ptr, i64 %n) #0 {
; CHECK-LABEL: @simple_memset_trip1024(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vscale-based-trip-counts.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vscale-based-trip-counts.ll
index 8dafa3453dcf5..4444be36c3567 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vscale-based-trip-counts.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vscale-based-trip-counts.ll
@@ -9,8 +9,6 @@ define void @vscale_mul_4(ptr noalias noundef readonly captures(none) %a, ptr no
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2
-; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP1]], [[TMP5]]
@@ -62,8 +60,6 @@ define void @vscale_mul_8(ptr noalias noundef readonly captures(none) %a, ptr n
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[MUL1:%.*]] = shl nuw nsw i64 [[TMP0]], 3
-; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP1]], 3
; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 8
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[MUL1]], [[TMP4]]
@@ -128,8 +124,7 @@ define void @vscale_mul_12(ptr noalias noundef readonly captures(none) %a, ptr n
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[MUL1:%.*]] = mul nuw nsw i64 [[TMP0]], 12
-; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP1]], 2
+; CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP0]], 2
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[MUL1]], [[TMP2]]
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
@@ -196,8 +191,7 @@ define void @vscale_mul_31(ptr noalias noundef readonly captures(none) %a, ptr n
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[MUL1:%.*]] = mul nuw nsw i64 [[TMP0]], 31
-; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP1]], 3
+; CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP0]], 3
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[MUL1]], [[TMP2]]
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
@@ -277,8 +271,7 @@ define void @vscale_mul_64(ptr noalias noundef readonly captures(none) %a, ptr n
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[MUL1:%.*]] = mul nuw nsw i64 [[TMP0]], 64
-; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP1]], 3
+; CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP0]], 3
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[MUL1]], [[TMP2]]
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
@@ -360,8 +353,7 @@ define void @trip_count_with_overflow(ptr noalias noundef readonly captures(none
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[TMP0]], 2
-; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 3
+; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP0]], 3
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP1]], [[TMP3]]
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
@@ -442,8 +434,6 @@ define void @trip_count_too_big_for_element_count(ptr noalias noundef readonly c
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 32
-; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 3
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 8
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP1]], [[TMP5]]
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll b/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll
index 29d901f084bdb..f4ec3db58cf1f 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll
@@ -85,7 +85,7 @@ define i8 @dead_live_out_due_to_scalar_epilogue_required(ptr %src, ptr %dst) {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i32 [[TMP0]], 2
-; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.umax.i32(i32 6, i32 [[TMP1]])
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.umax.i32(i32 [[TMP1]], i32 6)
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 252, [[TMP2]]
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
; CHECK: [[VECTOR_MEMCHECK]]:
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/fminimumnum.ll b/llvm/test/Transforms/LoopVectorize/RISCV/fminimumnum.ll
index 8b6db703d8274..c1bdba8cc5f21 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/fminimumnum.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/fminimumnum.ll
@@ -12,7 +12,7 @@ define void @fmin32(ptr noundef readonly captures(none) %input1, ptr noundef rea
; CHECK-NEXT: [[OUTPUT1:%.*]] = ptrtoint ptr [[OUTPUT]] to i64
; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP13:%.*]] = shl nuw i64 [[TMP8]], 2
-; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.umax.i64(i64 15, i64 [[TMP13]])
+; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP13]], i64 15)
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 4096, [[TMP14]]
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
; CHECK: [[VECTOR_MEMCHECK]]:
@@ -73,7 +73,7 @@ define void @fmin32(ptr noundef readonly captures(none) %input1, ptr noundef rea
; ZVFHMIN-NEXT: [[OUTPUT1:%.*]] = ptrtoint ptr [[OUTPUT]] to i64
; ZVFHMIN-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; ZVFHMIN-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2
-; ZVFHMIN-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 15, i64 [[TMP1]])
+; ZVFHMIN-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 15)
; ZVFHMIN-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 4096, [[TMP2]]
; ZVFHMIN-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
; ZVFHMIN: [[VECTOR_MEMCHECK]]:
@@ -157,7 +157,7 @@ define void @fmax32(ptr noundef readonly captures(none) %input1, ptr noundef rea
; CHECK-NEXT: [[OUTPUT1:%.*]] = ptrtoint ptr [[OUTPUT]] to i64
; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP13:%.*]] = shl nuw i64 [[TMP8]], 2
-; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.umax.i64(i64 15, i64 [[TMP13]])
+; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP13]], i64 15)
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 4096, [[TMP14]]
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
; CHECK: [[VECTOR_MEMCHECK]]:
@@ -218,7 +218,7 @@ define void @fmax32(ptr noundef readonly captures(none) %input1, ptr noundef rea
; ZVFHMIN-NEXT: [[OUTPUT1:%.*]] = ptrtoint ptr [[OUTPUT]] to i64
; ZVFHMIN-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; ZVFHMIN-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2
-; ZVFHMIN-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 15, i64 [[TMP1]])
+; ZVFHMIN-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 15)
; ZVFHMIN-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 4096, [[TMP2]]
; ZVFHMIN-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
; ZVFHMIN: [[VECTOR_MEMCHECK]]:
@@ -302,7 +302,7 @@ define void @fmin64(ptr noundef readonly captures(none) %input1, ptr noundef rea
; CHECK-NEXT: [[OUTPUT1:%.*]] = ptrtoint ptr [[OUTPUT]] to i64
; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP13:%.*]] = shl nuw i64 [[TMP8]], 1
-; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.umax.i64(i64 15, i64 [[TMP13]])
+; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP13]], i64 15)
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 4096, [[TMP14]]
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
; CHECK: [[VECTOR_MEMCHECK]]:
@@ -363,7 +363,7 @@ define void @fmin64(ptr noundef readonly captures(none) %input1, ptr noundef rea
; ZVFHMIN-NEXT: [[OUTPUT1:%.*]] = ptrtoint ptr [[OUTPUT]] to i64
; ZVFHMIN-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; ZVFHMIN-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 1
-; ZVFHMIN-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 15, i64 [[TMP1]])
+; ZVFHMIN-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 15)
; ZVFHMIN-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 4096, [[TMP2]]
; ZVFHMIN-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
; ZVFHMIN: [[VECTOR_MEMCHECK]]:
@@ -447,7 +447,7 @@ define void @fmax64(ptr noundef readonly captures(none) %input1, ptr noundef rea
; CHECK-NEXT: [[OUTPUT1:%.*]] = ptrtoint ptr [[OUTPUT]] to i64
; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP13:%.*]] = shl nuw i64 [[TMP8]], 1
-; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.umax.i64(i64 15, i64 [[TMP13]])
+; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP13]], i64 15)
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 4096, [[TMP14]]
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
; CHECK: [[VECTOR_MEMCHECK]]:
@@ -508,7 +508,7 @@ define void @fmax64(ptr noundef readonly captures(none) %input1, ptr noundef rea
; ZVFHMIN-NEXT: [[OUTPUT1:%.*]] = ptrtoint ptr [[OUTPUT]] to i64
; ZVFHMIN-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; ZVFHMIN-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 1
-; ZVFHMIN-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 15, i64 [[TMP1]])
+; ZVFHMIN-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 15)
; ZVFHMIN-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 4096, [[TMP2]]
; ZVFHMIN-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
; ZVFHMIN: [[VECTOR_MEMCHECK]]:
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-bin-unary-ops-args.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-bin-unary-ops-args.ll
index e6ed26cfa1109..82a357676a591 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-bin-unary-ops-args.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-bin-unary-ops-args.ll
@@ -62,7 +62,7 @@ define void @test_and(ptr nocapture %a, ptr nocapture readonly %b) {
; NO-VP-NEXT: [[B1:%.*]] = ptrtoint ptr [[B]] to i64
; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP7]], 4
-; NO-VP-NEXT: [[TMP14:%.*]] = call i64 @llvm.umax.i64(i64 32, i64 [[TMP1]])
+; NO-VP-NEXT: [[TMP14:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 32)
; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 100, [[TMP14]]
; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
; NO-VP: [[VECTOR_MEMCHECK]]:
@@ -178,7 +178,7 @@ define void @test_or(ptr nocapture %a, ptr nocapture readonly %b) {
; NO-VP-NEXT: [[B1:%.*]] = ptrtoint ptr [[B]] to i64
; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP7]], 4
-; NO-VP-NEXT: [[TMP14:%.*]] = call i64 @llvm.umax.i64(i64 32, i64 [[TMP1]])
+; NO-VP-NEXT: [[TMP14:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 32)
; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 100, [[TMP14]]
; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
; NO-VP: [[VECTOR_MEMCHECK]]:
@@ -294,7 +294,7 @@ define void @test_xor(ptr nocapture %a, ptr nocapture readonly %b) {
; NO-VP-NEXT: [[B1:%.*]] = ptrtoint ptr [[B]] to i64
; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP7]], 4
-; NO-VP-NEXT: [[TMP14:%.*]] = call i64 @llvm.umax.i64(i64 32, i64 [[TMP1]])
+; NO-VP-NEXT: [[TMP14:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 32)
; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 100, [[TMP14]]
; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
; NO-VP: [[VECTOR_MEMCHECK]]:
@@ -410,7 +410,7 @@ define void @test_shl(ptr nocapture %a, ptr nocapture readonly %b) {
; NO-VP-NEXT: [[B1:%.*]] = ptrtoint ptr [[B]] to i64
; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP7]], 4
-; NO-VP-NEXT: [[TMP14:%.*]] = call i64 @llvm.umax.i64(i64 32, i64 [[TMP1]])
+; NO-VP-NEXT: [[TMP14:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 32)
; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 100, [[TMP14]]
; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
; NO-VP: [[VECTOR_MEMCHECK]]:
@@ -526,7 +526,7 @@ define void @test_lshr(ptr nocapture %a, ptr nocapture readonly %b) {
; NO-VP-NEXT: [[B1:%.*]] = ptrtoint ptr [[B]] to i64
; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP7]], 4
-; NO-VP-NEXT: [[TMP14:%.*]] = call i64 @llvm.umax.i64(i64 32, i64 [[TMP1]])
+; NO-VP-NEXT: [[TMP14:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 32)
; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 100, [[TMP14]]
; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
; NO-VP: [[VECTOR_MEMCHECK]]:
@@ -642,7 +642,7 @@ define void @test_ashr(ptr nocapture %a, ptr nocapture readonly %b) {
; NO-VP-NEXT: [[B1:%.*]] = ptrtoint ptr [[B]] to i64
; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP7]], 4
-; NO-VP-NEXT: [[TMP14:%.*]] = call i64 @llvm.umax.i64(i64 32, i64 [[TMP1]])
+; NO-VP-NEXT: [[TMP14:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 32)
; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 100, [[TMP14]]
; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
; NO-VP: [[VECTOR_MEMCHECK]]:
@@ -758,7 +758,7 @@ define void @test_add(ptr nocapture %a, ptr nocapture readonly %b) {
; NO-VP-NEXT: [[B1:%.*]] = ptrtoint ptr [[B]] to i64
; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP7]], 4
-; NO-VP-NEXT: [[TMP14:%.*]] = call i64 @llvm.umax.i64(i64 32, i64 [[TMP1]])
+; NO-VP-NEXT: [[TMP14:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 32)
; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 100, [[TMP14]]
; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
; NO-VP: [[VECTOR_MEMCHECK]]:
@@ -874,7 +874,7 @@ define void @test_sub(ptr nocapture %a, ptr nocapture readonly %b) {
; NO-VP-NEXT: [[B1:%.*]] = ptrtoint ptr [[B]] to i64
; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP7]], 4
-; NO-VP-NEXT: [[TMP14:%.*]] = call i64 @llvm.umax.i64(i64 32, i64 [[TMP1]])
+; NO-VP-NEXT: [[TMP14:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 32)
; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 100, [[TMP14]]
; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
; NO-VP: [[VECTOR_MEMCHECK]]:
@@ -990,7 +990,7 @@ define void @test_mul(ptr nocapture %a, ptr nocapture readonly %b) {
; NO-VP-NEXT: [[B1:%.*]] = ptrtoint ptr [[B]] to i64
; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP7]], 4
-; NO-VP-NEXT: [[TMP14:%.*]] = call i64 @llvm.umax.i64(i64 32, i64 [[TMP1]])
+; NO-VP-NEXT: [[TMP14:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 32)
; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 100, [[TMP14]]
; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
; NO-VP: [[VECTOR_MEMCHECK]]:
@@ -1106,7 +1106,7 @@ define void @test_sdiv(ptr nocapture %a, ptr nocapture readonly %b) {
; NO-VP-NEXT: [[B1:%.*]] = ptrtoint ptr [[B]] to i64
; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP7]], 4
-; NO-VP-NEXT: [[TMP14:%.*]] = call i64 @llvm.umax.i64(i64 32, i64 [[TMP1]])
+; NO-VP-NEXT: [[TMP14:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 32)
; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 100, [[TMP14]]
; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
; NO-VP: [[VECTOR_MEMCHECK]]:
@@ -1222,7 +1222,7 @@ define void @test_udiv(ptr nocapture %a, ptr nocapture readonly %b) {
; NO-VP-NEXT: [[B1:%.*]] = ptrtoint ptr [[B]] to i64
; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP7]], 4
-; NO-VP-NEXT: [[TMP14:%.*]] = call i64 @llvm.umax.i64(i64 32, i64 [[TMP1]])
+; NO-VP-NEXT: [[TMP14:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 32)
; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 100, [[TMP14]]
; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
; NO-VP: [[VECTOR_MEMCHECK]]:
@@ -1338,7 +1338,7 @@ define void @test_srem(ptr nocapture %a, ptr nocapture readonly %b) {
; NO-VP-NEXT: [[B1:%.*]] = ptrtoint ptr [[B]] to i64
; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP7]], 4
-; NO-VP-NEXT: [[TMP14:%.*]] = call i64 @llvm.umax.i64(i64 32, i64 [[TMP1]])
+; NO-VP-NEXT: [[TMP14:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 32)
; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 100, [[TMP14]]
; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
; NO-VP: [[VECTOR_MEMCHECK]]:
@@ -1454,7 +1454,7 @@ define void @test_urem(ptr nocapture %a, ptr nocapture readonly %b) {
; NO-VP-NEXT: [[B1:%.*]] = ptrtoint ptr [[B]] to i64
; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP7]], 4
-; NO-VP-NEXT: [[TMP14:%.*]] = call i64 @llvm.umax.i64(i64 32, i64 [[TMP1]])
+; NO-VP-NEXT: [[TMP14:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 32)
; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 100, [[TMP14]]
; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
; NO-VP: [[VECTOR_MEMCHECK]]:
@@ -1573,7 +1573,7 @@ define void @test_fadd(ptr nocapture %a, ptr nocapture readonly %b) {
; NO-VP-NEXT: [[B1:%.*]] = ptrtoint ptr [[B]] to i64
; NO-VP-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP9]], 2
-; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 16, i64 [[TMP1]])
+; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 16)
; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 100, [[TMP2]]
; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
; NO-VP: [[VECTOR_MEMCHECK]]:
@@ -1691,7 +1691,7 @@ define void @test_fsub(ptr nocapture %a, ptr nocapture readonly %b) {
; NO-VP-NEXT: [[B1:%.*]] = ptrtoint ptr [[B]] to i64
; NO-VP-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP9]], 2
-; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 16, i64 [[TMP1]])
+; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 16)
; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 100, [[TMP2]]
; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
; NO-VP: [[VECTOR_MEMCHECK]]:
@@ -1809,7 +1809,7 @@ define void @test_fmul(ptr nocapture %a, ptr nocapture readonly %b) {
; NO-VP-NEXT: [[B1:%.*]] = ptrtoint ptr [[B]] to i64
; NO-VP-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP9]], 2
-; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 16, i64 [[TMP1]])
+; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 16)
; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 100, [[TMP2]]
; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
; NO-VP: [[VECTOR_MEMCHECK]]:
@@ -1927,7 +1927,7 @@ define void @test_fdiv(ptr nocapture %a, ptr nocapture readonly %b) {
; NO-VP-NEXT: [[B1:%.*]] = ptrtoint ptr [[B]] to i64
; NO-VP-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP9]], 2
-; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 16, i64 [[TMP1]])
+; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 16)
; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 100, [[TMP2]]
; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
; NO-VP: [[VECTOR_MEMCHECK]]:
@@ -2098,7 +2098,7 @@ define void @test_fneg(ptr nocapture %a, ptr nocapture readonly %b) {
; NO-VP-NEXT: [[B1:%.*]] = ptrtoint ptr [[B]] to i64
; NO-VP-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP9]], 2
-; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 16, i64 [[TMP1]])
+; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 16)
; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 100, [[TMP2]]
; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
; NO-VP: [[VECTOR_MEMCHECK]]:
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-call-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-call-intrinsics.ll
index 0ce9ef650379d..feafabd7c2a19 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-call-intrinsics.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-call-intrinsics.ll
@@ -72,7 +72,7 @@ define void @vp_smax(ptr %a, ptr %b, ptr %c, i64 %N) {
; NO-VP-NEXT: [[A1:%.*]] = ptrtoint ptr [[A]] to i64
; NO-VP-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64()
; NO-VP-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP12]], 2
-; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 16, i64 [[TMP11]])
+; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP11]], i64 16)
; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP2]]
; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
; NO-VP: [[VECTOR_MEMCHECK]]:
@@ -211,7 +211,7 @@ define void @vp_smin(ptr %a, ptr %b, ptr %c, i64 %N) {
; NO-VP-NEXT: [[A1:%.*]] = ptrtoint ptr [[A]] to i64
; NO-VP-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64()
; NO-VP-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP12]], 2
-; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 16, i64 [[TMP11]])
+; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP11]], i64 16)
; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP2]]
; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
; NO-VP: [[VECTOR_MEMCHECK]]:
@@ -350,7 +350,7 @@ define void @vp_umax(ptr %a, ptr %b, ptr %c, i64 %N) {
; NO-VP-NEXT: [[A1:%.*]] = ptrtoint ptr [[A]] to i64
; NO-VP-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64()
; NO-VP-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP12]], 2
-; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 16, i64 [[TMP11]])
+; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP11]], i64 16)
; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP2]]
; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
; NO-VP: [[VECTOR_MEMCHECK]]:
@@ -489,7 +489,7 @@ define void @vp_umin(ptr %a, ptr %b, ptr %c, i64 %N) {
; NO-VP-NEXT: [[A1:%.*]] = ptrtoint ptr [[A]] to i64
; NO-VP-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64()
; NO-VP-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP12]], 2
-; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 16, i64 [[TMP11]])
+; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP11]], i64 16)
; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP2]]
; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
; NO-VP: [[VECTOR_MEMCHECK]]:
@@ -619,7 +619,7 @@ define void @vp_ctlz(ptr %a, ptr %b, i64 %N) {
; NO-VP-NEXT: [[A1:%.*]] = ptrtoint ptr [[A]] to i64
; NO-VP-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
; NO-VP-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP10]], 2
-; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 8, i64 [[TMP9]])
+; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP9]], i64 8)
; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP2]]
; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
; NO-VP: [[VECTOR_MEMCHECK]]:
@@ -738,7 +738,7 @@ define void @vp_cttz(ptr %a, ptr %b, i64 %N) {
; NO-VP-NEXT: [[A1:%.*]] = ptrtoint ptr [[A]] to i64
; NO-VP-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
; NO-VP-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP10]], 2
-; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 8, i64 [[TMP9]])
+; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP9]], i64 8)
; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP2]]
; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
; NO-VP: [[VECTOR_MEMCHECK]]:
@@ -861,7 +861,7 @@ define void @vp_lrint(ptr %a, ptr %b, i64 %N) {
; NO-VP-NEXT: [[A1:%.*]] = ptrtoint ptr [[A]] to i64
; NO-VP-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
; NO-VP-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP10]], 2
-; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 16, i64 [[TMP9]])
+; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP9]], i64 16)
; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP2]]
; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
; NO-VP: [[VECTOR_MEMCHECK]]:
@@ -990,7 +990,7 @@ define void @vp_llrint(ptr %a, ptr %b, i64 %N) {
; NO-VP-NEXT: [[A1:%.*]] = ptrtoint ptr [[A]] to i64
; NO-VP-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
; NO-VP-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP10]], 2
-; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 16, i64 [[TMP9]])
+; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP9]], i64 16)
; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP2]]
; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
; NO-VP: [[VECTOR_MEMCHECK]]:
@@ -1115,7 +1115,7 @@ define void @vp_abs(ptr %a, ptr %b, i64 %N) {
; NO-VP-NEXT: [[A1:%.*]] = ptrtoint ptr [[A]] to i64
; NO-VP-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP9]], 2
-; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 8, i64 [[TMP1]])
+; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 8)
; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP2]]
; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
; NO-VP: [[VECTOR_MEMCHECK]]:
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cast-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cast-intrinsics.ll
index 7ea462eed42db..79113acf88e31 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cast-intrinsics.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cast-intrinsics.ll
@@ -60,7 +60,7 @@ define void @vp_sext(ptr %a, ptr %b, i64 %N) {
; NO-VP-NEXT: [[ENTRY:.*]]:
; NO-VP-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP9]], 1
-; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 20, i64 [[TMP1]])
+; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 20)
; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP2]]
; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
; NO-VP: [[VECTOR_MEMCHECK]]:
@@ -178,7 +178,7 @@ define void @vp_zext(ptr %a, ptr %b, i64 %N) {
; NO-VP-NEXT: [[ENTRY:.*]]:
; NO-VP-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP9]], 1
-; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 20, i64 [[TMP1]])
+; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 20)
; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP2]]
; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
; NO-VP: [[VECTOR_MEMCHECK]]:
@@ -296,7 +296,7 @@ define void @vp_trunc(ptr %a, ptr %b, i64 %N) {
; NO-VP-NEXT: [[ENTRY:.*]]:
; NO-VP-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP9]], 1
-; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 20, i64 [[TMP1]])
+; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 20)
; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP2]]
; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
; NO-VP: [[VECTOR_MEMCHECK]]:
@@ -414,7 +414,7 @@ define void @vp_fpext(ptr %a, ptr %b, i64 %N) {
; NO-VP-NEXT: [[ENTRY:.*]]:
; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP7]], 1
-; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 16, i64 [[TMP1]])
+; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 16)
; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP2]]
; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
; NO-VP: [[VECTOR_MEMCHECK]]:
@@ -532,7 +532,7 @@ define void @vp_fptrunc(ptr %a, ptr %b, i64 %N) {
; NO-VP-NEXT: [[ENTRY:.*]]:
; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP7]], 1
-; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 16, i64 [[TMP1]])
+; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 16)
; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP2]]
; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
; NO-VP: [[VECTOR_MEMCHECK]]:
@@ -652,7 +652,7 @@ define void @vp_sitofp(ptr %a, ptr %b, i64 %N) {
; NO-VP-NEXT: [[A1:%.*]] = ptrtoint ptr [[A]] to i64
; NO-VP-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP9]], 2
-; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 16, i64 [[TMP1]])
+; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 16)
; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP2]]
; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
; NO-VP: [[VECTOR_MEMCHECK]]:
@@ -770,7 +770,7 @@ define void @vp_uitofp(ptr %a, ptr %b, i64 %N) {
; NO-VP-NEXT: [[A1:%.*]] = ptrtoint ptr [[A]] to i64
; NO-VP-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP9]], 2
-; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 16, i64 [[TMP1]])
+; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 16)
; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP2]]
; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
; NO-VP: [[VECTOR_MEMCHECK]]:
@@ -888,7 +888,7 @@ define void @vp_fptosi(ptr %a, ptr %b, i64 %N) {
; NO-VP-NEXT: [[A1:%.*]] = ptrtoint ptr [[A]] to i64
; NO-VP-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP9]], 2
-; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 16, i64 [[TMP1]])
+; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 16)
; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP2]]
; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
; NO-VP: [[VECTOR_MEMCHECK]]:
@@ -1006,7 +1006,7 @@ define void @vp_fptoui(ptr %a, ptr %b, i64 %N) {
; NO-VP-NEXT: [[A1:%.*]] = ptrtoint ptr [[A]] to i64
; NO-VP-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP9]], 2
-; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 16, i64 [[TMP1]])
+; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 16)
; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP2]]
; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
; NO-VP: [[VECTOR_MEMCHECK]]:
@@ -1124,7 +1124,7 @@ define void @vp_inttoptr(ptr %a, ptr %b, i64 %N) {
; NO-VP-NEXT: [[A1:%.*]] = ptrtoint ptr [[A]] to i64
; NO-VP-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
; NO-VP-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP10]], 1
-; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 16, i64 [[TMP9]])
+; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP9]], i64 16)
; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP2]]
; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
; NO-VP: [[VECTOR_MEMCHECK]]:
diff --git a/llvm/test/Transforms/LoopVectorize/epilog-iv-select-cmp.ll b/llvm/test/Transforms/LoopVectorize/epilog-iv-select-cmp.ll
index 968e107d04f8f..2d75576bc36ee 100644
--- a/llvm/test/Transforms/LoopVectorize/epilog-iv-select-cmp.ll
+++ b/llvm/test/Transforms/LoopVectorize/epilog-iv-select-cmp.ll
@@ -212,12 +212,12 @@ define i8 @select_icmp_var_start(ptr %a, i8 %n, i8 %start) {
; CHECK-NEXT: [[TMP0:%.*]] = add i8 [[N]], -1
; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[TMP0]] to i32
; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i32 [[TMP1]], 1
-; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP2]], 4
; CHECK-NEXT: [[FR:%.*]] = freeze i8 [[START]]
-; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]]
-; CHECK: [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i32 [[TMP2]], 4
-; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]]
+; CHECK: [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
+; CHECK-NEXT: [[MIN_ITERS_CHECK2:%.*]] = icmp ult i32 [[TMP2]], 4
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK2]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP2]], 4
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP2]], [[N_MOD_VF]]
diff --git a/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc-and-loop-metadata.ll b/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc-and-loop-metadata.ll
index 372c703f4cb23..aa5fca88da9d4 100644
--- a/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc-and-loop-metadata.ll
+++ b/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc-and-loop-metadata.ll
@@ -448,7 +448,7 @@ define void @scalar_cast_dbg(ptr nocapture %a, i32 %start, i64 %k) {
; DEBUGLOC: [[VECTOR_PH]]:
; DEBUGLOC-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[K]], 4
; DEBUGLOC-NEXT: [[N_VEC:%.*]] = sub i64 [[K]], [[N_MOD_VF]]
-; DEBUGLOC-NEXT: br label %[[VECTOR_BODY:.*]], !dbg [[DBG77]]
+; DEBUGLOC-NEXT: br label %[[VECTOR_BODY:.*]], !dbg [[DBG78]]
; DEBUGLOC: [[VECTOR_BODY]]:
; DEBUGLOC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ], !dbg [[DBG78]]
; DEBUGLOC-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ], !dbg [[DBG79:![0-9]+]]
@@ -556,7 +556,7 @@ define void @widen_intrinsic_dbg(i64 %n, ptr %y, ptr %x) {
; DEBUGLOC: [[VECTOR_PH]]:
; DEBUGLOC-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
; DEBUGLOC-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
-; DEBUGLOC-NEXT: br label %[[VECTOR_BODY:.*]], !dbg [[DBG97]]
+; DEBUGLOC-NEXT: br label %[[VECTOR_BODY:.*]], !dbg [[DBG98]]
; DEBUGLOC: [[VECTOR_BODY]]:
; DEBUGLOC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ], !dbg [[DBG98]]
; DEBUGLOC-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[Y]], i64 [[INDEX]], !dbg [[DBG99:![0-9]+]]
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll b/llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll
index cae5c4af13792..0892500da90e3 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll
@@ -88,7 +88,14 @@ define void @iv_expand(ptr %p, i64 %n) {
; CHECK-NEXT: }
; CHECK-NEXT: Successor(s): middle.block
; CHECK: VPlan 'Final VPlan for VF={8},UF={1}'
-; CHECK: ir-bb<vector.ph>:
+; CHECK-NEXT: Live-in ir<%n> = original trip-count
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<entry>:
+; CHECK-NEXT: EMIT vp<%min.iters.check> = icmp ult ir<%n>, ir<8>
+; CHECK-NEXT: EMIT branch-on-cond vp<%min.iters.check>
+; CHECK-NEXT: Successor(s): ir-bb<scalar.ph>, vector.ph
+; CHECK-EMPTY:
+; CHECK: vector.ph:
; CHECK-NEXT: EMIT vp<%n.mod.vf> = urem ir<%n>, ir<8>
; CHECK-NEXT: EMIT vp<%n.vec> = sub ir<%n>, vp<%n.mod.vf>
; CHECK-NEXT: EMIT vp<[[STEP_VECTOR:%.+]]> = step-vector
@@ -100,8 +107,8 @@ define void @iv_expand(ptr %p, i64 %n) {
; CHECK-NEXT: Successor(s): vector.body
; CHECK-EMPTY:
; CHECK-NEXT: vector.body:
-; CHECK-NEXT: EMIT-SCALAR vp<[[SCALAR_PHI:%.+]]> = phi [ ir<0>, ir-bb<vector.ph> ], [ vp<%index.next>, vector.body ]
-; CHECK-NEXT: WIDEN-PHI ir<%iv> = phi [ vp<[[INDUCTION]]>, ir-bb<vector.ph> ], [ vp<%vec.ind.next>, vector.body ]
+; CHECK-NEXT: EMIT-SCALAR vp<[[SCALAR_PHI:%.+]]> = phi [ ir<0>, vector.ph ], [ vp<%index.next>, vector.body ]
+; CHECK-NEXT: WIDEN-PHI ir<%iv> = phi [ vp<[[INDUCTION]]>, vector.ph ], [ vp<%vec.ind.next>, vector.body ]
; CHECK-NEXT: CLONE ir<%q> = getelementptr ir<%p>, vp<[[SCALAR_PHI]]>
; CHECK-NEXT: WIDEN ir<%x> = load ir<%q>
; CHECK-NEXT: WIDEN ir<%y> = add ir<%x>, ir<%iv>
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-predicate-switch.ll b/llvm/test/Transforms/LoopVectorize/vplan-predicate-switch.ll
index 32aadcfb164e6..3d05ee7f27b5c 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-predicate-switch.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-predicate-switch.ll
@@ -6,16 +6,21 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) {
; CHECK-NEXT: ir<%0> = original trip-count
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<entry>:
-; CHECK-NEXT: Successor(s): ir-bb<scalar.ph>, ir-bb<vector.ph>
-; CHECK-EMPTY:
-; CHECK-NEXT: ir-bb<vector.ph>:
+; CHECK-NEXT: IR %start2 = ptrtoint ptr %start to i64
+; CHECK-NEXT: IR %end1 = ptrtoint ptr %end to i64
+; CHECK-NEXT: IR %0 = sub i64 %end1, %start2
+; CHECK-NEXT: EMIT vp<%min.iters.check> = icmp ult ir<%0>, ir<2>
+; CHECK-NEXT: EMIT branch-on-cond vp<%min.iters.check>
+; CHECK-NEXT: Successor(s): ir-bb<scalar.ph>, vector.ph
+; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
; CHECK-NEXT: EMIT vp<%n.mod.vf> = urem ir<%0>, ir<2>
; CHECK-NEXT: EMIT vp<[[VTC:%.+]]> = sub ir<%0>, vp<%n.mod.vf>
; CHECK-NEXT: vp<[[END:%.+]]> = DERIVED-IV ir<%start> + vp<[[VTC]]> * ir<1>
; CHECK-NEXT: Successor(s): vector.body
; CHECK-EMPTY:
; CHECK-NEXT: vector.body:
-; CHECK-NEXT: EMIT-SCALAR vp<[[CAN_IV:%.+]]> = phi [ ir<0>, ir-bb<vector.ph> ], [ vp<[[CAN_IV_NEXT:%.+]]>, default.2 ]
+; CHECK-NEXT: EMIT-SCALAR vp<[[CAN_IV:%.+]]> = phi [ ir<0>, vector.ph ], [ vp<[[CAN_IV_NEXT:%.+]]>, default.2 ]
; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, ir<2>
; CHECK-NEXT: EMIT vp<[[PTR:%.+]]> = ptradd ir<%start>, vp<[[STEPS]]>
; CHECK-NEXT: WIDEN ir<%l> = load vp<[[PTR]]>
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll
index c65d9368d595f..2a7ffec27c2f9 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll
@@ -66,9 +66,10 @@ define void @test_tc_less_than_16(ptr %A, i64 %N) {
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<entry>:
; CHECK-NEXT: IR %and = and i64 %N, 15
-; CHECK-NEXT: Successor(s): ir-bb<scalar.ph>, ir-bb<vector.ph>
+; CHECK-NEXT: EMIT branch-on-cond ir<true>
+; CHECK-NEXT: Successor(s): ir-bb<scalar.ph>, vector.ph
; CHECK-EMPTY:
-; CHECK-NEXT: ir-bb<vector.ph>:
+; CHECK-NEXT: vector.ph:
; CHECK-NEXT: EMIT vp<%n.mod.vf> = urem ir<%and>, ir<16>
; CHECK-NEXT: EMIT vp<[[VTC:%.+]]> = sub ir<%and>, vp<%n.mod.vf>
; CHECK-NEXT: vp<[[END1:%.+]]> = DERIVED-IV ir<%and> + vp<[[VTC]]> * ir<-1>
>From a457b67e6d10f2733425911605677e74daffc97a Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Thu, 21 Aug 2025 18:38:55 +0100
Subject: [PATCH 2/7] !fixup address latest comments, thanks
---
.../Vectorize/LoopVectorizationPlanner.h | 5 +-
.../Transforms/Vectorize/LoopVectorize.cpp | 40 ++++-------
.../Vectorize/VPlanConstruction.cpp | 70 +++++++++++--------
3 files changed, 57 insertions(+), 58 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
index fba39d00d0367..3e039ce6de96d 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -334,7 +334,7 @@ class VPBuilder {
FPBinOp ? FPBinOp->getFastMathFlags() : FastMathFlags(), DL));
}
- VPExpandSCEVRecipe *expandSCEV(const SCEV *Expr, ScalarEvolution &SE) {
+ VPExpandSCEVRecipe *createExpandSCEV(const SCEV *Expr, ScalarEvolution &SE) {
return tryInsertInstruction(new VPExpandSCEVRecipe(Expr, SE));
}
@@ -563,7 +563,8 @@ class LoopVectorizationPlanner {
/// Emit remarks for recipes with invalid costs in the available VPlans.
void emitInvalidCostRemarks(OptimizationRemarkEmitter *ORE);
- /// Create a check to \p Plan to see if the vector loop should be executed.
+ /// Create a check to \p Plan to see if the vector loop should be executed
+ /// based on its trip count.
void addMinimumIterationCheck(VPlan &Plan, ElementCount VF, unsigned UF,
ElementCount MinProfitableTripCount) const;
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 008a3e752e380..eb051cc07c5f8 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -544,7 +544,8 @@ class InnerLoopVectorizer {
protected:
friend class LoopVectorizationPlanner;
- /// Create a new IR basic block for the scalar preheader.
+ /// Create a new IR basic block for the scalar preheader whose name is
+ /// prefixed with \p Prefix.
void createScalarPreheader(StringRef Prefix);
/// Allow subclasses to override and print debug traces before/after vplan
@@ -669,17 +670,6 @@ class InnerLoopAndEpilogueVectorizer : public InnerLoopVectorizer {
UnrollFactor, CM, BFI, PSI, Checks, Plan),
EPI(EPI), MinProfitableTripCount(MinProfitableTripCount) {}
- // Override this function to handle the more complex control flow around the
- // three loops.
- BasicBlock *createVectorizedLoopSkeleton() final {
- return createEpilogueVectorizedLoopSkeleton();
- }
-
- /// The interface for creating a vectorized skeleton using one of two
- /// different strategies, each corresponding to one execution of the vplan
- /// as described above.
- virtual BasicBlock *createEpilogueVectorizedLoopSkeleton() = 0;
-
/// Holds and updates state information required to vectorize the main loop
/// and its epilogue in two separate passes. This setup helps us avoid
/// regenerating and recomputing runtime safety checks. It also helps us to
@@ -710,10 +700,10 @@ class EpilogueVectorizerMainLoop : public InnerLoopAndEpilogueVectorizer {
EPI.MainLoopVF, EPI.MainLoopUF) {}
/// Implements the interface for creating a vectorized skeleton using the
/// *main loop* strategy (ie the first pass of vplan execution).
- BasicBlock *createEpilogueVectorizedLoopSkeleton() final;
+ BasicBlock *createVectorizedLoopSkeleton() final;
protected:
- // Create a check to see if the vector loop should be executed
+ // Create a check to see if the main vector loop should be executed
Value *createIterationCountCheck(ElementCount VF, unsigned UF) const;
/// Emits an iteration count bypass check once for the main loop (when \p
@@ -742,7 +732,7 @@ class EpilogueVectorizerEpilogueLoop : public InnerLoopAndEpilogueVectorizer {
}
/// Implements the interface for creating a vectorized skeleton using the
/// *epilogue loop* strategy (ie the second pass of vplan execution).
- BasicBlock *createEpilogueVectorizedLoopSkeleton() final;
+ BasicBlock *createVectorizedLoopSkeleton() final;
protected:
/// Emits an iteration count bypass check after the main vector loop has
@@ -7268,7 +7258,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
// at the wrong place.
for (VPRecipeBase &R : make_early_inc_range(*Entry)) {
if (!isa<VPIRInstruction, VPIRPhi>(&R))
- continue;
+ break;
R.eraseFromParent();
}
for (Instruction &I : drop_begin(reverse(*Entry->getIRBasicBlock())))
@@ -7415,7 +7405,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
/// This function is partially responsible for generating the control flow
/// depicted in https://llvm.org/docs/Vectorizers.html#epilogue-vectorization.
-BasicBlock *EpilogueVectorizerMainLoop::createEpilogueVectorizedLoopSkeleton() {
+BasicBlock *EpilogueVectorizerMainLoop::createVectorizedLoopSkeleton() {
createScalarPreheader("");
// Generate the code to check the minimum iteration count of the vector
@@ -7501,8 +7491,7 @@ EpilogueVectorizerMainLoop::emitIterationCountCheck(BasicBlock *Bypass,
/// This function is partially responsible for generating the control flow
/// depicted in https://llvm.org/docs/Vectorizers.html#epilogue-vectorization.
-BasicBlock *
-EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton() {
+BasicBlock *EpilogueVectorizerEpilogueLoop::createVectorizedLoopSkeleton() {
createScalarPreheader("vec.epilog.");
// Now, compare the remaining count and if there aren't enough iterations to
@@ -9315,18 +9304,19 @@ void LoopVectorizationPlanner::addMinimumIterationCheck(
// vscale is not necessarily a power-of-2, which means we cannot guarantee
// an overflow to zero when updating induction variables and so an
// additional overflow check is required before entering the vector loop.
- bool CheckNeededWithTailFolding =
+ bool IsIndvarOverflowCheckNeededForVF =
VF.isScalable() && !TTI.isVScaleKnownToBeAPowerOfTwo() &&
- !isIndvarOverflowCheckKnownFalse(&CM, VF, 1) &&
+ !isIndvarOverflowCheckKnownFalse(&CM, VF, UF) &&
CM.getTailFoldingStyle() !=
TailFoldingStyle::DataAndControlFlowWithoutRuntimeCheck;
+ const uint32_t *BranchWeigths =
+ hasBranchWeightMD(*OrigLoop->getLoopLatch()->getTerminator())
+ ? &MinItersBypassWeights[0]
+ : nullptr;
VPlanTransforms::addMinimumIterationCheck(
Plan, VF, UF, MinProfitableTripCount,
CM.requiresScalarEpilogue(VF.isVector()), CM.foldTailByMasking(),
- CheckNeededWithTailFolding, OrigLoop,
- hasBranchWeightMD(*OrigLoop->getLoopLatch()->getTerminator())
- ? &MinItersBypassWeights[0]
- : nullptr,
+ IsIndvarOverflowCheckNeededForVF, OrigLoop, BranchWeigths,
OrigLoop->getLoopPredecessor()->getTerminator()->getDebugLoc(),
*PSE.getSE());
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
index 4f36de870d067..f7e039f86e93b 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
@@ -680,62 +680,70 @@ void VPlanTransforms::addMinimumIterationCheck(
// vector trip count is zero. This check also covers the case where adding one
// to the backedge-taken count overflowed leading to an incorrect trip count
// of zero. In this case we will also jump to the scalar loop.
- auto P = RequiresScalarEpilogue ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_ULT;
+ CmpInst::Predicate CmpPred =
+ RequiresScalarEpilogue ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_ULT;
// If tail is to be folded, vector loop takes care of all iterations.
- const SCEV *Count = vputils::getSCEVExprForVPValue(Plan.getTripCount(), SE);
- Type *CountTy = Count->getType();
- auto CreateStep = [&]() -> const SCEV * {
- const SCEV *VFxUF = SE.getElementCount(CountTy, (VF * UF), SCEV::FlagNUW);
- // Create step with max(MinProTripCount, UF * VF).
- if (UF * VF.getKnownMinValue() >= MinProfitableTripCount.getKnownMinValue())
+ VPValue *TripCountVPV = Plan.getTripCount();
+ const SCEV *TripCount = vputils::getSCEVExprForVPValue(TripCountVPV, SE);
+ Type *TripCountTy = TripCount->getType();
+ auto CreateMinTripCount = [&]() -> const SCEV * {
+ // Create or get max(MinProfitableTripCount, UF * VF) and return it.
+ const SCEV *VFxUF =
+ SE.getElementCount(TripCountTy, (VF * UF), SCEV::FlagNUW);
+ const SCEV *MinProfitableTripCountSCEV =
+ SE.getElementCount(TripCountTy, MinProfitableTripCount, SCEV::FlagNUW);
+ const SCEV *Max = SE.getUMaxExpr(MinProfitableTripCountSCEV, VFxUF);
+ if (!VF.isScalable())
+ return Max;
+
+ if (UF * VF.getKnownMinValue() >=
+ MinProfitableTripCount.getKnownMinValue()) {
+ // TODO: SCEV should be able to simplify test.
return VFxUF;
+ }
- const SCEV *MinProfTC =
- SE.getElementCount(CountTy, MinProfitableTripCount, SCEV::FlagNUW);
- if (!VF.isScalable())
- return MinProfTC;
- return SE.getUMaxExpr(MinProfTC, VFxUF);
+ return Max;
};
VPBasicBlock *EntryVPBB = Plan.getEntry();
VPBuilder Builder(EntryVPBB);
- VPValue *CheckMinIters = Plan.getFalse();
- const SCEV *Step = CreateStep();
+ VPValue *TripCountCheck = Plan.getFalse();
+ const SCEV *Step = CreateMinTripCount();
if (!TailFolded) {
// TODO: Emit unconditional branch to vector preheader instead of
// conditional branch with known condition.
- const SCEV *TripCountSCEV = SE.applyLoopGuards(Count, OrigLoop);
+ TripCount = SE.applyLoopGuards(TripCount, OrigLoop);
// Check if the trip count is < the step.
- if (SE.isKnownPredicate(P, TripCountSCEV, Step)) {
+ if (SE.isKnownPredicate(CmpPred, TripCount, Step)) {
// TODO: Ensure step is at most the trip count when determining max VF and
// UF, w/o tail folding.
- CheckMinIters = Plan.getTrue();
- } else if (!SE.isKnownPredicate(CmpInst::getInversePredicate(P),
- TripCountSCEV, Step)) {
+ TripCountCheck = Plan.getTrue();
+ } else if (!SE.isKnownPredicate(CmpInst::getInversePredicate(CmpPred),
+ TripCount, Step)) {
// Generate the minimum iteration check only if we cannot prove the
// check is known to be true, or known to be false.
- CheckMinIters = Builder.createICmp(P, Plan.getTripCount(),
- Builder.expandSCEV(Step, SE), DL,
- "min.iters.check");
- } // else step known to be < trip count, use CheckMinIters preset to false.
+ VPValue *MinTripCountVPV = Builder.createExpandSCEV(Step, SE);
+ TripCountCheck = Builder.createICmp(
+ CmpPred, TripCountVPV, MinTripCountVPV, DL, "min.iters.check");
+ } // else step known to be < trip count, use TripCountCheck preset to false.
} else if (CheckNeededWithTailFolding) {
// vscale is not necessarily a power-of-2, which means we cannot guarantee
// an overflow to zero when updating induction variables and so an
// additional overflow check is required before entering the vector loop.
// Get the maximum unsigned value for the type.
- VPValue *MaxUIntTripCount = Plan.getOrAddLiveIn(
- ConstantInt::get(CountTy, cast<IntegerType>(CountTy)->getMask()));
- VPValue *LHS = Builder.createNaryOp(Instruction::Sub,
- {MaxUIntTripCount, Plan.getTripCount()},
- DebugLoc::getUnknown());
+ VPValue *MaxUIntTripCount = Plan.getOrAddLiveIn(ConstantInt::get(
+ TripCountTy, cast<IntegerType>(TripCountTy)->getMask()));
+ VPValue *DistanceToMax =
+ Builder.createNaryOp(Instruction::Sub, {MaxUIntTripCount, TripCountVPV},
+ DebugLoc::getUnknown());
// Don't execute the vector loop if (UMax - n) < (VF * UF).
- CheckMinIters = Builder.createICmp(ICmpInst::ICMP_ULT, LHS,
- Builder.expandSCEV(Step, SE), DL);
+ TripCountCheck = Builder.createICmp(ICmpInst::ICMP_ULT, DistanceToMax,
+ Builder.createExpandSCEV(Step, SE), DL);
}
VPInstruction *Term =
- Builder.createNaryOp(VPInstruction::BranchOnCond, {CheckMinIters}, DL);
+ Builder.createNaryOp(VPInstruction::BranchOnCond, {TripCountCheck}, DL);
if (MinItersBypassWeights) {
MDBuilder MDB(Plan.getContext());
MDNode *BranchWeights = MDB.createBranchWeights(
>From 20aafa186b2d235206669424f6ef8e93a159a009 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Thu, 21 Aug 2025 20:50:35 +0100
Subject: [PATCH 3/7] [VPlan] Create VPIRInstructions for expanded SCEVs.
---
llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 677d97e54d556..980f73da09c2a 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -3493,6 +3493,7 @@ VPlanTransforms::expandSCEVs(VPlan &Plan, ScalarEvolution &SE) {
auto *Entry = cast<VPIRBasicBlock>(Plan.getEntry());
BasicBlock *EntryBB = Entry->getIRBasicBlock();
DenseMap<const SCEV *, Value *> ExpandedSCEVs;
+ VPBasicBlock::iterator InsertPt;
for (VPRecipeBase &R : make_early_inc_range(*Entry)) {
if (isa<VPIRInstruction, VPIRPhi>(&R))
continue;
@@ -3507,8 +3508,16 @@ VPlanTransforms::expandSCEVs(VPlan &Plan, ScalarEvolution &SE) {
ExpSCEV->replaceAllUsesWith(Exp);
if (Plan.getTripCount() == ExpSCEV)
Plan.resetTripCount(Exp);
+
+ InsertPt = std::next(ExpSCEV->getIterator());
ExpSCEV->eraseFromParent();
}
+ for (Instruction &I : *EntryBB) {
+ if (!Expander.isInsertedInstruction(&I) || isa<PHINode>(I))
+ continue;
+ VPIRInstruction::create(I)->insertBefore(*Entry, InsertPt);
+ }
+
return ExpandedSCEVs;
}
>From 1ae4d5fb338ba2b0af34597e0a7c25373b13d4cd Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Sat, 23 Aug 2025 14:45:23 +0100
Subject: [PATCH 4/7] !fixup address comments, thanks
---
.../Transforms/Vectorize/LoopVectorize.cpp | 6 +--
.../Vectorize/VPlanConstruction.cpp | 2 +
.../Transforms/Vectorize/VPlanTransforms.cpp | 3 ++
.../Transforms/Vectorize/VPlanTransforms.h | 37 +++++++++++++++++--
4 files changed, 42 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 9bb1c27f74bd4..76e185d5a6f65 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -9409,11 +9409,11 @@ static bool processLoopInVPlanNativePath(
{
GeneratedRTChecks Checks(PSE, DT, LI, TTI, F->getDataLayout(), CM.CostKind);
- InnerLoopVectorizer LB(L, PSE, LI, DT, TTI, AC, VF.Width, 1, &CM, BFI, PSI,
- Checks, BestPlan);
+ InnerLoopVectorizer LB(L, PSE, LI, DT, TTI, AC, VF.Width, /*UF=*/1, &CM,
+ BFI, PSI, Checks, BestPlan);
LLVM_DEBUG(dbgs() << "Vectorizing outer loop in \""
<< L->getHeader()->getParent()->getName() << "\"\n");
- LVP.addMinimumIterationCheck(BestPlan, VF.Width, 1,
+ LVP.addMinimumIterationCheck(BestPlan, VF.Width, /*UF=*/1,
VF.MinProfitableTripCount);
LVP.executePlan(VF.Width, 1, BestPlan, LB, DT, false);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
index 34d33345b5612..92c2fc055d0e8 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
@@ -740,6 +740,8 @@ void VPlanTransforms::addMinimumIterationCheck(
DebugLoc::getUnknown());
// Don't execute the vector loop if (UMax - n) < (VF * UF).
+ // FIXME: Should only check VF * UF, but currently checks Step=max(VF*UF,
+ // minProfitableTripCount).
TripCountCheck = Builder.createICmp(ICmpInst::ICMP_ULT, DistanceToMax,
Builder.createExpandSCEV(Step), DL);
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 980f73da09c2a..050c728e56240 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -3512,6 +3512,9 @@ VPlanTransforms::expandSCEVs(VPlan &Plan, ScalarEvolution &SE) {
InsertPt = std::next(ExpSCEV->getIterator());
ExpSCEV->eraseFromParent();
}
+ assert(none_of(*Entry, IsaPred<VPExpandSCEVRecipe>) &&
+ "VPExpandSCEVRecipes must be be at the beginning of the entry block, "
+ "after any VPIRInstructions");
for (Instruction &I : *EntryBB) {
if (!Expander.isInsertedInstruction(&I) || isa<PHINode>(I))
continue;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index 66aa4d1f7b535..4b4576c420dfc 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -62,9 +62,40 @@ struct VPlanTransforms {
/// The created loop is wrapped in an initial skeleton to facilitate
/// vectorization, consisting of a vector pre-header, an exit block for the
/// main vector loop (middle.block) and a new block as preheader of the scalar
- /// loop (scalar.ph). It also adds a canonical IV and its increment, using \p
- /// InductionTy and \p IVDL, and creates a VPValue expression for the original
- /// trip count.
+ /// loop (scalar.ph). See below for an illustration. It also adds a canonical
+ /// IV and its increment, using \p InductionTy and \p IVDL, and creates a
+ /// VPValue expression for the original trip count.
+ ///
+ /// [ ] <-- Plan's entry VPIRBasicBlock, wrapping the original loop's
+ /// / \ old preheader. Will contain iteration number check and SCEV
+ /// | | expansions.
+ /// | |
+ /// / v
+ /// | [ ] <-- vector loop bypass (may consist of multiple blocks) will be
+ /// | / | added later.
+ /// | / v
+ /// || [ ] <-- vector pre header.
+ /// |/ |
+ /// | v
+ /// | [ ] \ <-- plain CFG loop wrapping original loop to be vectorized.
+ /// | [ ]_|
+ /// | |
+ /// | v
+ /// | [ ] <--- middle-block with the branch to successors
+ /// | / |
+ /// | / |
+ /// | | v
+ /// \--->[ ] <--- scalar preheader (initial a VPBasicBlock, which will be
+ /// | | replaced later by a VPIRBasicBlock wrapping the scalar
+ /// | | preheader basic block.
+ /// | |
+ /// v <-- edge from middle to exit iff epilogue is not required.
+ /// | [ ] \
+ /// | [ ]_| <-- old scalar loop to handle remainder (scalar epilogue,
+ /// | | header wrapped in VPIRBasicBlock).
+ /// \ |
+ /// \ v
+ /// >[ ] <-- original loop exit block(s), wrapped in VPIRBasicBlocks.
LLVM_ABI_FOR_TEST static std::unique_ptr<VPlan>
buildVPlan0(Loop *TheLoop, LoopInfo &LI, Type *InductionTy, DebugLoc IVDL,
PredicatedScalarEvolution &PSE);
>From 91a1dff23fa7d15f0e6356fe19d48dc0781380c1 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Sun, 24 Aug 2025 10:34:32 +0100
Subject: [PATCH 5/7] !fixup address latest comments, thanks
---
.../Transforms/Vectorize/LoopVectorize.cpp | 12 ++--
.../Vectorize/VPlanConstruction.cpp | 58 +++++++++----------
.../Transforms/Vectorize/VPlanTransforms.cpp | 3 +-
3 files changed, 35 insertions(+), 38 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 9cc11b46a5951..8decc5115b1ee 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -512,11 +512,9 @@ class InnerLoopVectorizer {
virtual ~InnerLoopVectorizer() = default;
- /// Create new basic blocks needed as entries and exits of the code generated
- /// by executing a VPlan. For epilogue vectorization, it will create blocks
- /// for the minimum iteration checks and IR basic blocks for the vector and
- /// scalar preheaders. Otherwise it will create a basic block for the scalar
- /// preheader only.
+ /// Creates a basic block for the scalar preheader. Both
+ /// EpilogueVectorizerMainLoop and EpilogueVectorizerEpilogueLoop overwrite
+ /// the method to create blocks and checks needed for epilogue vectorization.
virtual BasicBlock *createVectorizedLoopSkeleton();
/// Fix the vectorized code, taking care of header phi's, and more.
@@ -2386,8 +2384,8 @@ void InnerLoopVectorizer::createScalarPreheader(StringRef Prefix) {
LI, nullptr, Twine(Prefix) + "scalar.ph");
// NOTE: The Plan's scalar preheader VPBB isn't replaced with a VPIRBasicBlock
// wrapping LoopScalarPreHeader here at the moment, because the Plan's scalar
- // preheader may be unreachable at this point and SCEV expansion may add to it
- // later.
+ // preheader may be unreachable at this point. Instead it is replaced in
+ // executePlan.
}
/// Return the expanded step for \p ID using \p ExpandedSCEVs to look up SCEV
diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
index 92c2fc055d0e8..a226088f72c26 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
@@ -687,30 +687,47 @@ void VPlanTransforms::addMinimumIterationCheck(
VPValue *TripCountVPV = Plan.getTripCount();
const SCEV *TripCount = vputils::getSCEVExprForVPValue(TripCountVPV, SE);
Type *TripCountTy = TripCount->getType();
- auto CreateMinTripCount = [&]() -> const SCEV * {
- // Create or get max(MinProfitableTripCount, UF * VF) and return it.
+ auto GetMinTripCount = [&]() -> const SCEV * {
+ // Compute max(MinProfitableTripCount, UF * VF) and return it.
const SCEV *VFxUF =
SE.getElementCount(TripCountTy, (VF * UF), SCEV::FlagNUW);
- const SCEV *MinProfitableTripCountSCEV =
- SE.getElementCount(TripCountTy, MinProfitableTripCount, SCEV::FlagNUW);
- const SCEV *Max = SE.getUMaxExpr(MinProfitableTripCountSCEV, VFxUF);
- if (!VF.isScalable())
- return Max;
-
if (UF * VF.getKnownMinValue() >=
MinProfitableTripCount.getKnownMinValue()) {
// TODO: SCEV should be able to simplify test.
return VFxUF;
}
-
- return Max;
+ const SCEV *MinProfitableTripCountSCEV =
+ SE.getElementCount(TripCountTy, MinProfitableTripCount, SCEV::FlagNUW);
+ return SE.getUMaxExpr(MinProfitableTripCountSCEV, VFxUF);
};
VPBasicBlock *EntryVPBB = Plan.getEntry();
VPBuilder Builder(EntryVPBB);
VPValue *TripCountCheck = Plan.getFalse();
- const SCEV *Step = CreateMinTripCount();
- if (!TailFolded) {
+ const SCEV *Step = GetMinTripCount();
+ if (TailFolded) {
+ if (CheckNeededWithTailFolding) {
+ // vscale is not necessarily a power-of-2, which means we cannot guarantee
+ // an overflow to zero when updating induction variables and so an
+ // additional overflow check is required before entering the vector loop.
+
+ // Get the maximum unsigned value for the type.
+ VPValue *MaxUIntTripCount = Plan.getOrAddLiveIn(ConstantInt::get(
+ TripCountTy, cast<IntegerType>(TripCountTy)->getMask()));
+ VPValue *DistanceToMax = Builder.createNaryOp(
+ Instruction::Sub, {MaxUIntTripCount, TripCountVPV},
+ DebugLoc::getUnknown());
+
+ // Don't execute the vector loop if (UMax - n) < (VF * UF).
+ // FIXME: Should only check VF * UF, but currently checks Step=max(VF*UF,
+ // minProfitableTripCount).
+ TripCountCheck = Builder.createICmp(ICmpInst::ICMP_ULT, DistanceToMax,
+ Builder.createExpandSCEV(Step), DL);
+ } else {
+ // TripCountCheck = false, folding tail implies positive vector trip
+ // count.
+ }
+ } else {
// TODO: Emit unconditional branch to vector preheader instead of
// conditional branch with known condition.
TripCount = SE.applyLoopGuards(TripCount, OrigLoop);
@@ -727,23 +744,6 @@ void VPlanTransforms::addMinimumIterationCheck(
TripCountCheck = Builder.createICmp(
CmpPred, TripCountVPV, MinTripCountVPV, DL, "min.iters.check");
} // else step known to be < trip count, use TripCountCheck preset to false.
- } else if (CheckNeededWithTailFolding) {
- // vscale is not necessarily a power-of-2, which means we cannot guarantee
- // an overflow to zero when updating induction variables and so an
- // additional overflow check is required before entering the vector loop.
-
- // Get the maximum unsigned value for the type.
- VPValue *MaxUIntTripCount = Plan.getOrAddLiveIn(ConstantInt::get(
- TripCountTy, cast<IntegerType>(TripCountTy)->getMask()));
- VPValue *DistanceToMax =
- Builder.createNaryOp(Instruction::Sub, {MaxUIntTripCount, TripCountVPV},
- DebugLoc::getUnknown());
-
- // Don't execute the vector loop if (UMax - n) < (VF * UF).
- // FIXME: Should only check VF * UF, but currently checks Step=max(VF*UF,
- // minProfitableTripCount).
- TripCountCheck = Builder.createICmp(ICmpInst::ICMP_ULT, DistanceToMax,
- Builder.createExpandSCEV(Step), DL);
}
VPInstruction *Term =
Builder.createNaryOp(VPInstruction::BranchOnCond, {TripCountCheck}, DL);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 28a85b707c5cd..13bea7d2294aa 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -3507,12 +3507,11 @@ VPlanTransforms::expandSCEVs(VPlan &Plan, ScalarEvolution &SE) {
ExpSCEV->replaceAllUsesWith(Exp);
if (Plan.getTripCount() == ExpSCEV)
Plan.resetTripCount(Exp);
-
InsertPt = std::next(ExpSCEV->getIterator());
ExpSCEV->eraseFromParent();
}
assert(none_of(*Entry, IsaPred<VPExpandSCEVRecipe>) &&
- "VPExpandSCEVRecipes must be be at the beginning of the entry block, "
+ "VPExpandSCEVRecipes must be at the beginning of the entry block, "
"after any VPIRInstructions");
for (Instruction &I : *EntryBB) {
if (!Expander.isInsertedInstruction(&I) || isa<PHINode>(I))
>From 83272e80ebc289450921f9c11f2937137053d7b8 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Tue, 26 Aug 2025 13:09:52 +0100
Subject: [PATCH 6/7] !fixup address comments, thanks!
---
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index f500059ea6601..98554310c74df 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -514,7 +514,8 @@ class InnerLoopVectorizer {
/// Creates a basic block for the scalar preheader. Both
/// EpilogueVectorizerMainLoop and EpilogueVectorizerEpilogueLoop overwrite
- /// the method to create blocks and checks needed for epilogue vectorization.
+ /// the method to create additional blocks and checks needed for epilogue
+ /// vectorization.
virtual BasicBlock *createVectorizedLoopSkeleton();
/// Fix the vectorized code, taking care of header phi's, and more.
@@ -697,7 +698,7 @@ class EpilogueVectorizerMainLoop : public InnerLoopAndEpilogueVectorizer {
BFI, PSI, Check, Plan, EPI.MainLoopVF,
EPI.MainLoopVF, EPI.MainLoopUF) {}
/// Implements the interface for creating a vectorized skeleton using the
- /// *main loop* strategy (ie the first pass of vplan execution).
+ /// *main loop* strategy (i.e., the first pass of VPlan execution).
BasicBlock *createVectorizedLoopSkeleton() final;
protected:
@@ -729,7 +730,7 @@ class EpilogueVectorizerEpilogueLoop : public InnerLoopAndEpilogueVectorizer {
TripCount = EPI.TripCount;
}
/// Implements the interface for creating a vectorized skeleton using the
- /// *epilogue loop* strategy (ie the second pass of vplan execution).
+ /// *epilogue loop* strategy (i.e., the second pass of VPlan execution).
BasicBlock *createVectorizedLoopSkeleton() final;
protected:
@@ -2428,7 +2429,6 @@ static void addFullyUnrolledInstructionsToIgnore(
BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton() {
// Create a new IR basic block for the scalar preheader.
createScalarPreheader("");
-
return LoopVectorPreHeader;
}
@@ -9431,7 +9431,7 @@ static bool processLoopInVPlanNativePath(
LVP.addMinimumIterationCheck(BestPlan, VF.Width, /*UF=*/1,
VF.MinProfitableTripCount);
- LVP.executePlan(VF.Width, 1, BestPlan, LB, DT, false);
+ LVP.executePlan(VF.Width, /*UF=*/1, BestPlan, LB, DT, false);
}
reportVectorization(ORE, L, VF, 1);
>From 113d991faca56be68ec378f5e8b130ef436bfd25 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Tue, 26 Aug 2025 15:06:42 +0100
Subject: [PATCH 7/7] !fixup rename variable, enforce order
---
llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 14 +++++++++-----
1 file changed, 9 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index da7baa263ff10..d32d2a9ad11f7 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -3509,7 +3509,6 @@ VPlanTransforms::expandSCEVs(VPlan &Plan, ScalarEvolution &SE) {
auto *Entry = cast<VPIRBasicBlock>(Plan.getEntry());
BasicBlock *EntryBB = Entry->getIRBasicBlock();
DenseMap<const SCEV *, Value *> ExpandedSCEVs;
- VPBasicBlock::iterator InsertPt;
for (VPRecipeBase &R : make_early_inc_range(*Entry)) {
if (isa<VPIRInstruction, VPIRPhi>(&R))
continue;
@@ -3524,16 +3523,21 @@ VPlanTransforms::expandSCEVs(VPlan &Plan, ScalarEvolution &SE) {
ExpSCEV->replaceAllUsesWith(Exp);
if (Plan.getTripCount() == ExpSCEV)
Plan.resetTripCount(Exp);
- InsertPt = std::next(ExpSCEV->getIterator());
ExpSCEV->eraseFromParent();
}
assert(none_of(*Entry, IsaPred<VPExpandSCEVRecipe>) &&
"VPExpandSCEVRecipes must be at the beginning of the entry block, "
"after any VPIRInstructions");
- for (Instruction &I : *EntryBB) {
- if (!Expander.isInsertedInstruction(&I) || isa<PHINode>(I))
+ // Add IR instructions in the entry basic block but not in the VPIRBasicBlock
+ // to the VPIRBasicBlock.
+ auto EI = Entry->begin();
+ for (Instruction &I : drop_end(*EntryBB)) {
+ if (EI != Entry->end() && isa<VPIRInstruction>(*EI) &&
+ &cast<VPIRInstruction>(&*EI)->getInstruction() == &I) {
+ EI++;
continue;
- VPIRInstruction::create(I)->insertBefore(*Entry, InsertPt);
+ }
+ VPIRInstruction::create(I)->insertBefore(*Entry, EI);
}
return ExpandedSCEVs;
More information about the llvm-commits
mailing list