[llvm] [LV] Avoid SCEVChecks when IV update doesn't overflow (PR #171605)
Ramkumar Ramachandra via llvm-commits
llvm-commits at lists.llvm.org
Thu Dec 11 02:28:06 PST 2025
https://github.com/artagnon updated https://github.com/llvm/llvm-project/pull/171605
>From fc6997e4cb411708752d4a1accd5169ed04a094b Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Wed, 10 Dec 2025 11:44:55 +0000
Subject: [PATCH] [LV] Avoid SCEVChecks when IV update doesn't overflow
We already check when IV update overflow checks are needed in different
places in LV: consolidate them into a single routine, and re-use it to
conditionally drop SCEVChecks in GeneratedRTChecks.
---
.../Transforms/Vectorize/LoopVectorize.cpp | 144 +++---
.../AArch64/backedge-overflow.ll | 16 +-
.../AArch64/conditional-branches-cost.ll | 183 +++-----
.../AArch64/divs-with-scalable-vfs.ll | 84 +---
.../AArch64/epilog-vectorization-factors.ll | 11 +-
.../epilog-vectorization-widen-inductions.ll | 53 ++-
.../AArch64/induction-costs-sve.ll | 123 +----
.../LoopVectorize/AArch64/induction-costs.ll | 15 +-
.../AArch64/low_trip_count_predicates.ll | 237 +++++++---
.../LoopVectorize/AArch64/predicated-costs.ll | 61 +--
.../AArch64/simple_early_exit.ll | 19 +-
...-narrow-interleave-to-widen-memory-cost.ll | 55 +--
.../PowerPC/optimal-epilog-vectorization.ll | 22 +-
.../PowerPC/vplan-scalarivsext-crash.ll | 17 +-
.../RISCV/gather-scatter-cost.ll | 14 +-
.../RISCV/masked_gather_scatter.ll | 24 +-
.../RISCV/riscv-vector-reverse.ll | 60 +--
.../LoopVectorize/RISCV/strided-accesses.ll | 79 +---
.../LoopVectorize/X86/cost-model.ll | 33 +-
.../X86/divs-with-tail-folding.ll | 63 +--
.../X86/epilog-vectorization-inductions.ll | 12 +-
.../illegal-parallel-loop-uniform-write.ll | 16 +-
.../LoopVectorize/X86/interleave-cost.ll | 101 +---
.../LoopVectorize/X86/multi-exit-cost.ll | 16 +-
.../Transforms/LoopVectorize/X86/pr35432.ll | 23 +-
.../Transforms/LoopVectorize/X86/pr72969.ll | 22 +-
.../X86/replicating-load-store-costs.ll | 5 +-
.../X86/scev-checks-unprofitable.ll | 34 +-
.../X86/x86_fp80-vector-store.ll | 12 +-
.../LoopVectorize/branch-weights.ll | 16 +-
...log-vectorization-trunc-induction-steps.ll | 12 +-
.../first-order-recurrence-complex.ll | 22 +-
.../Transforms/LoopVectorize/induction.ll | 436 ++++--------------
...aved-accesses-requiring-scev-predicates.ll | 45 +-
.../LoopVectorize/load-deref-pred-align.ll | 19 +-
...o-fold-tail-by-masking-iv-external-uses.ll | 16 +-
.../Transforms/LoopVectorize/opaque-ptr.ll | 25 +-
.../optimal-epilog-vectorization.ll | 16 +-
llvm/test/Transforms/LoopVectorize/optsize.ll | 38 +-
.../LoopVectorize/pointer-induction.ll | 22 +-
.../pr30654-phiscev-sext-trunc.ll | 86 +---
llvm/test/Transforms/LoopVectorize/pr34681.ll | 28 +-
llvm/test/Transforms/LoopVectorize/pr37248.ll | 34 +-
llvm/test/Transforms/LoopVectorize/pr45259.ll | 13 +-
.../preserve-dbg-loc-and-loop-metadata.ll | 40 +-
.../reduction-minmax-users-and-predicated.ll | 9 +-
.../LoopVectorize/reverse_induction.ll | 30 +-
.../LoopVectorize/runtime-check-known-true.ll | 13 +-
.../runtime-check-small-clamped-bounds.ll | 30 +-
.../runtime-checks-difference.ll | 17 +-
.../scev-exit-phi-invalidation.ll | 43 +-
.../LoopVectorize/scev-predicate-reasoning.ll | 40 +-
.../LoopVectorize/single_early_exit.ll | 17 +-
...ive-path-inner-loop-with-runtime-checks.ll | 37 +-
.../X86/SROA-after-final-loop-unrolling-2.ll | 82 ++--
55 files changed, 829 insertions(+), 1911 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 15d0fa41bd902..adf149e641e8f 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1766,6 +1766,64 @@ class LoopVectorizationCostModel {
};
} // end namespace llvm
+static std::optional<unsigned> getMaxVScale(const Function &F,
+ const TargetTransformInfo &TTI) {
+ if (std::optional<unsigned> MaxVScale = TTI.getMaxVScale())
+ return MaxVScale;
+
+ if (F.hasFnAttribute(Attribute::VScaleRange))
+ return F.getFnAttribute(Attribute::VScaleRange).getVScaleRangeMax();
+
+ return std::nullopt;
+}
+
+/// For the given VF and UF and maximum trip count computed for the loop, return
+/// whether the induction variable might overflow in the vectorized loop. If
+/// not, then we know a runtime overflow check always evaluates to false and can
+/// be removed.
+static bool
+isIndvarOverflowCheckKnownFalse(const LoopVectorizationCostModel *Cost,
+ ElementCount VF,
+ std::optional<unsigned> UF = std::nullopt) {
+ // Always be conservative if we don't know the exact unroll factor.
+ unsigned MaxUF = UF ? *UF : Cost->TTI.getMaxInterleaveFactor(VF);
+
+ IntegerType *IdxTy = Cost->Legal->getWidestInductionType();
+ APInt MaxUIntTripCount = IdxTy->getMask();
+
+ // We know the runtime overflow check is known false iff the (max) trip-count
+ // is known and (max) trip-count + (VF * UF) does not overflow in the type of
+ // the vector loop induction variable.
+ if (unsigned TC = Cost->PSE.getSmallConstantMaxTripCount()) {
+ uint64_t MaxVF = VF.getKnownMinValue();
+ if (VF.isScalable()) {
+ std::optional<unsigned> MaxVScale =
+ getMaxVScale(*Cost->TheFunction, Cost->TTI);
+ if (!MaxVScale)
+ return false;
+ MaxVF *= *MaxVScale;
+ }
+
+ return (MaxUIntTripCount - TC).ugt(MaxVF * MaxUF);
+ }
+
+ return false;
+}
+
+/// Checks whether an IndVar overflow check is needed using
+/// isIndvarOverflowCheckKnownFalse, with additional information about the
+/// tail-folding style.
+static bool isIndvarOverflowCheckNeeded(const LoopVectorizationCostModel &CM,
+ ElementCount VF, unsigned IC) {
+ // vscale is not necessarily a power-of-2, which means we cannot guarantee
+ // an overflow to zero when updating induction variables and so an
+ // additional overflow check is required before entering the vector loop.
+ return VF.isScalable() && !CM.TTI.isVScaleKnownToBeAPowerOfTwo() &&
+ !isIndvarOverflowCheckKnownFalse(&CM, VF, IC) &&
+ CM.getTailFoldingStyle() !=
+ TailFoldingStyle::DataAndControlFlowWithoutRuntimeCheck;
+}
+
namespace {
/// Helper struct to manage generating runtime checks for vectorization.
///
@@ -1801,17 +1859,17 @@ class GeneratedRTChecks {
PredicatedScalarEvolution &PSE;
- /// The kind of cost that we are calculating
- TTI::TargetCostKind CostKind;
+ /// The CostModel.
+ const LoopVectorizationCostModel &CM;
public:
GeneratedRTChecks(PredicatedScalarEvolution &PSE, DominatorTree *DT,
LoopInfo *LI, TargetTransformInfo *TTI,
- const DataLayout &DL, TTI::TargetCostKind CostKind)
+ const DataLayout &DL, LoopVectorizationCostModel &CM)
: DT(DT), LI(LI), TTI(TTI),
SCEVExp(*PSE.getSE(), DL, "scev.check", /*PreserveLCSSA=*/false),
MemCheckExp(*PSE.getSE(), DL, "scev.check", /*PreserveLCSSA=*/false),
- PSE(PSE), CostKind(CostKind) {}
+ PSE(PSE), CM(CM) {}
/// Generate runtime checks in SCEVCheckBlock and MemCheckBlock, so we can
/// accurately estimate the cost of the runtime checks. The blocks are
@@ -1833,11 +1891,18 @@ class GeneratedRTChecks {
BasicBlock *LoopHeader = L->getHeader();
BasicBlock *Preheader = L->getLoopPreheader();
+ // SCEVChecks are droppable when the UnionPred is always true, or when
+ // IndVar overflow checks are not needed, under the condition that we don't
+ // drop stride-versioning checks.
+ bool SCEVChecksAreDroppable =
+ UnionPred.isAlwaysTrue() || (!isIndvarOverflowCheckNeeded(CM, VF, IC) &&
+ LAI.getSymbolicStrides().empty());
+
// Use SplitBlock to create blocks for SCEV & memory runtime checks to
// ensure the blocks are properly added to LoopInfo & DominatorTree. Those
// may be used by SCEVExpander. The blocks will be un-linked from their
// predecessors and removed from LI & DT at the end of the function.
- if (!UnionPred.isAlwaysTrue()) {
+ if (!SCEVChecksAreDroppable) {
SCEVCheckBlock = SplitBlock(Preheader, Preheader->getTerminator(), DT, LI,
nullptr, "vector.scevcheck");
@@ -1935,7 +2000,7 @@ class GeneratedRTChecks {
for (Instruction &I : *SCEVCheckBlock) {
if (SCEVCheckBlock->getTerminator() == &I)
continue;
- InstructionCost C = TTI->getInstructionCost(&I, CostKind);
+ InstructionCost C = TTI->getInstructionCost(&I, CM.CostKind);
LLVM_DEBUG(dbgs() << " " << C << " for " << I << "\n");
RTCheckCost += C;
}
@@ -1944,7 +2009,7 @@ class GeneratedRTChecks {
for (Instruction &I : *MemCheckBlock) {
if (MemCheckBlock->getTerminator() == &I)
continue;
- InstructionCost C = TTI->getInstructionCost(&I, CostKind);
+ InstructionCost C = TTI->getInstructionCost(&I, CM.CostKind);
LLVM_DEBUG(dbgs() << " " << C << " for " << I << "\n");
MemCheckCost += C;
}
@@ -2222,49 +2287,6 @@ emitTransformedIndex(IRBuilderBase &B, Value *Index, Value *StartValue,
llvm_unreachable("invalid enum");
}
-static std::optional<unsigned> getMaxVScale(const Function &F,
- const TargetTransformInfo &TTI) {
- if (std::optional<unsigned> MaxVScale = TTI.getMaxVScale())
- return MaxVScale;
-
- if (F.hasFnAttribute(Attribute::VScaleRange))
- return F.getFnAttribute(Attribute::VScaleRange).getVScaleRangeMax();
-
- return std::nullopt;
-}
-
-/// For the given VF and UF and maximum trip count computed for the loop, return
-/// whether the induction variable might overflow in the vectorized loop. If not,
-/// then we know a runtime overflow check always evaluates to false and can be
-/// removed.
-static bool isIndvarOverflowCheckKnownFalse(
- const LoopVectorizationCostModel *Cost,
- ElementCount VF, std::optional<unsigned> UF = std::nullopt) {
- // Always be conservative if we don't know the exact unroll factor.
- unsigned MaxUF = UF ? *UF : Cost->TTI.getMaxInterleaveFactor(VF);
-
- IntegerType *IdxTy = Cost->Legal->getWidestInductionType();
- APInt MaxUIntTripCount = IdxTy->getMask();
-
- // We know the runtime overflow check is known false iff the (max) trip-count
- // is known and (max) trip-count + (VF * UF) does not overflow in the type of
- // the vector loop induction variable.
- if (unsigned TC = Cost->PSE.getSmallConstantMaxTripCount()) {
- uint64_t MaxVF = VF.getKnownMinValue();
- if (VF.isScalable()) {
- std::optional<unsigned> MaxVScale =
- getMaxVScale(*Cost->TheFunction, Cost->TTI);
- if (!MaxVScale)
- return false;
- MaxVF *= *MaxVScale;
- }
-
- return (MaxUIntTripCount - TC).ugt(MaxVF * MaxUF);
- }
-
- return false;
-}
-
// Return whether we allow using masked interleave-groups (for dealing with
// strided loads/stores that reside in predicated blocks, or for dealing
// with gaps).
@@ -2354,13 +2376,7 @@ Value *EpilogueVectorizerMainLoop::createIterationCountCheck(
// check is known to be true, or known to be false.
CheckMinIters = Builder.CreateICmp(P, Count, Step, "min.iters.check");
} // else step known to be < trip count, use CheckMinIters preset to false.
- } else if (VF.isScalable() && !TTI->isVScaleKnownToBeAPowerOfTwo() &&
- !isIndvarOverflowCheckKnownFalse(Cost, VF, UF) &&
- Style != TailFoldingStyle::DataAndControlFlowWithoutRuntimeCheck) {
- // vscale is not necessarily a power-of-2, which means we cannot guarantee
- // an overflow to zero when updating induction variables and so an
- // additional overflow check is required before entering the vector loop.
-
+ } else if (isIndvarOverflowCheckNeeded(*Cost, VF, UF)) {
// Get the maximum unsigned value for the type.
Value *MaxUIntTripCount =
ConstantInt::get(CountTy, cast<IntegerType>(CountTy)->getMask());
@@ -9122,14 +9138,6 @@ void LoopVectorizationPlanner::attachRuntimeChecks(
void LoopVectorizationPlanner::addMinimumIterationCheck(
VPlan &Plan, ElementCount VF, unsigned UF,
ElementCount MinProfitableTripCount) const {
- // vscale is not necessarily a power-of-2, which means we cannot guarantee
- // an overflow to zero when updating induction variables and so an
- // additional overflow check is required before entering the vector loop.
- bool IsIndvarOverflowCheckNeededForVF =
- VF.isScalable() && !TTI.isVScaleKnownToBeAPowerOfTwo() &&
- !isIndvarOverflowCheckKnownFalse(&CM, VF, UF) &&
- CM.getTailFoldingStyle() !=
- TailFoldingStyle::DataAndControlFlowWithoutRuntimeCheck;
const uint32_t *BranchWeigths =
hasBranchWeightMD(*OrigLoop->getLoopLatch()->getTerminator())
? &MinItersBypassWeights[0]
@@ -9137,7 +9145,7 @@ void LoopVectorizationPlanner::addMinimumIterationCheck(
VPlanTransforms::addMinimumIterationCheck(
Plan, VF, UF, MinProfitableTripCount,
CM.requiresScalarEpilogue(VF.isVector()), CM.foldTailByMasking(),
- IsIndvarOverflowCheckNeededForVF, OrigLoop, BranchWeigths,
+ isIndvarOverflowCheckNeeded(CM, VF, UF), OrigLoop, BranchWeigths,
OrigLoop->getLoopPredecessor()->getTerminator()->getDebugLoc(),
*PSE.getSE());
}
@@ -9249,7 +9257,7 @@ static bool processLoopInVPlanNativePath(
VPlan &BestPlan = LVP.getPlanFor(VF.Width);
{
- GeneratedRTChecks Checks(PSE, DT, LI, TTI, F->getDataLayout(), CM.CostKind);
+ GeneratedRTChecks Checks(PSE, DT, LI, TTI, F->getDataLayout(), CM);
InnerLoopVectorizer LB(L, PSE, LI, DT, TTI, AC, VF.Width, /*UF=*/1, &CM,
Checks, BestPlan);
LLVM_DEBUG(dbgs() << "Vectorizing outer loop in \""
@@ -10085,7 +10093,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
if (ORE->allowExtraAnalysis(LV_NAME))
LVP.emitInvalidCostRemarks(ORE);
- GeneratedRTChecks Checks(PSE, DT, LI, TTI, F->getDataLayout(), CM.CostKind);
+ GeneratedRTChecks Checks(PSE, DT, LI, TTI, F->getDataLayout(), CM);
if (LVP.hasPlanWithVF(VF.Width)) {
// Select the interleave count.
IC = LVP.selectInterleaveCount(LVP.getPlanFor(VF.Width), VF.Width, VF.Cost);
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/backedge-overflow.ll b/llvm/test/Transforms/LoopVectorize/AArch64/backedge-overflow.ll
index 4f7195934a790..24d2e54a6a0d3 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/backedge-overflow.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/backedge-overflow.ll
@@ -10,8 +10,8 @@
; CHECK-LABEL: test_sge
-; CHECK-LABEL: vector.scevcheck
-; CHECK-LABEL: vector.body
+; CHECK-NOT: vector.scevcheck
+; CHECK: vector.body
define void @test_sge(ptr noalias %A,
ptr noalias %B,
ptr noalias %C, i32 %N) {
@@ -48,8 +48,8 @@ for.end:
}
; CHECK-LABEL: test_uge
-; CHECK-LABEL: vector.scevcheck
-; CHECK-LABEL: vector.body
+; CHECK-NOT: vector.scevcheck
+; CHECK: vector.body
define void @test_uge(ptr noalias %A,
ptr noalias %B,
ptr noalias %C, i32 %N, i32 %Offset) {
@@ -88,8 +88,8 @@ for.end:
}
; CHECK-LABEL: test_ule
-; CHECK-LABEL: vector.scevcheck
-; CHECK-LABEL: vector.body
+; CHECK-NOT: vector.scevcheck
+; CHECK: vector.body
define void @test_ule(ptr noalias %A,
ptr noalias %B,
ptr noalias %C, i32 %N,
@@ -127,8 +127,8 @@ for.end:
}
; CHECK-LABEL: test_sle
-; CHECK-LABEL: vector.scevcheck
-; CHECK-LABEL: vector.body
+; CHECK-NOT: vector.scevcheck
+; CHECK: vector.body
define void @test_sle(ptr noalias %A,
ptr noalias %B,
ptr noalias %C, i32 %N,
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll
index b549a06f08f8c..75ac8c1de36a1 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll
@@ -658,38 +658,14 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
; DEFAULT-NEXT: [[ENTRY:.*:]]
; DEFAULT-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1
; DEFAULT-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 8
-; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
-; DEFAULT: [[VECTOR_SCEVCHECK]]:
-; DEFAULT-NEXT: [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 [[N]])
-; DEFAULT-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i64, i1 } [[MUL]], 0
-; DEFAULT-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i64, i1 } [[MUL]], 1
-; DEFAULT-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[DST]], i64 [[MUL_RESULT]]
-; DEFAULT-NEXT: [[TMP3:%.*]] = icmp ult ptr [[TMP2]], [[DST]]
-; DEFAULT-NEXT: [[TMP4:%.*]] = or i1 [[TMP3]], [[MUL_OVERFLOW]]
-; DEFAULT-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DST]], i64 4
-; DEFAULT-NEXT: [[MUL1:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 [[N]])
-; DEFAULT-NEXT: [[MUL_RESULT2:%.*]] = extractvalue { i64, i1 } [[MUL1]], 0
-; DEFAULT-NEXT: [[MUL_OVERFLOW3:%.*]] = extractvalue { i64, i1 } [[MUL1]], 1
-; DEFAULT-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[SCEVGEP]], i64 [[MUL_RESULT2]]
-; DEFAULT-NEXT: [[TMP7:%.*]] = icmp ult ptr [[TMP6]], [[SCEVGEP]]
-; DEFAULT-NEXT: [[TMP8:%.*]] = or i1 [[TMP7]], [[MUL_OVERFLOW3]]
-; DEFAULT-NEXT: [[SCEVGEP4:%.*]] = getelementptr i8, ptr [[DST]], i64 8
-; DEFAULT-NEXT: [[MUL5:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 [[N]])
-; DEFAULT-NEXT: [[MUL_RESULT6:%.*]] = extractvalue { i64, i1 } [[MUL5]], 0
-; DEFAULT-NEXT: [[MUL_OVERFLOW7:%.*]] = extractvalue { i64, i1 } [[MUL5]], 1
-; DEFAULT-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[SCEVGEP4]], i64 [[MUL_RESULT6]]
-; DEFAULT-NEXT: [[TMP11:%.*]] = icmp ult ptr [[TMP10]], [[SCEVGEP4]]
-; DEFAULT-NEXT: [[TMP12:%.*]] = or i1 [[TMP11]], [[MUL_OVERFLOW7]]
-; DEFAULT-NEXT: [[TMP13:%.*]] = or i1 [[TMP4]], [[TMP8]]
-; DEFAULT-NEXT: [[TMP14:%.*]] = or i1 [[TMP13]], [[TMP12]]
-; DEFAULT-NEXT: br i1 [[TMP14]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
+; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; DEFAULT: [[VECTOR_PH]]:
; DEFAULT-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 8
; DEFAULT-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
; DEFAULT-NEXT: br label %[[VECTOR_BODY:.*]]
; DEFAULT: [[VECTOR_BODY]]:
-; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE27:.*]] ]
-; DEFAULT-NEXT: [[VEC_IND:%.*]] = phi <8 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE27]] ]
+; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE20:.*]] ]
+; DEFAULT-NEXT: [[VEC_IND:%.*]] = phi <8 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE20]] ]
; DEFAULT-NEXT: [[TMP15:%.*]] = load float, ptr [[SRC_1]], align 4
; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT8:%.*]] = insertelement <8 x float> poison, float [[TMP15]], i64 0
; DEFAULT-NEXT: [[BROADCAST_SPLAT9:%.*]] = shufflevector <8 x float> [[BROADCAST_SPLATINSERT8]], <8 x float> poison, <8 x i32> zeroinitializer
@@ -723,8 +699,8 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE]]
; DEFAULT: [[PRED_STORE_CONTINUE]]:
; DEFAULT-NEXT: [[TMP31:%.*]] = extractelement <8 x i1> [[TMP22]], i32 1
-; DEFAULT-NEXT: br i1 [[TMP31]], label %[[PRED_STORE_IF14:.*]], label %[[PRED_STORE_CONTINUE15:.*]]
-; DEFAULT: [[PRED_STORE_IF14]]:
+; DEFAULT-NEXT: br i1 [[TMP31]], label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]]
+; DEFAULT: [[PRED_STORE_IF7]]:
; DEFAULT-NEXT: [[TMP32:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 1
; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP32]], align 4
; DEFAULT-NEXT: [[TMP33:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 1
@@ -735,11 +711,11 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP36]], align 4
; DEFAULT-NEXT: [[TMP37:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 1
; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP37]], align 4
-; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE15]]
-; DEFAULT: [[PRED_STORE_CONTINUE15]]:
+; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE8]]
+; DEFAULT: [[PRED_STORE_CONTINUE8]]:
; DEFAULT-NEXT: [[TMP38:%.*]] = extractelement <8 x i1> [[TMP22]], i32 2
-; DEFAULT-NEXT: br i1 [[TMP38]], label %[[PRED_STORE_IF16:.*]], label %[[PRED_STORE_CONTINUE17:.*]]
-; DEFAULT: [[PRED_STORE_IF16]]:
+; DEFAULT-NEXT: br i1 [[TMP38]], label %[[PRED_STORE_IF9:.*]], label %[[PRED_STORE_CONTINUE10:.*]]
+; DEFAULT: [[PRED_STORE_IF9]]:
; DEFAULT-NEXT: [[TMP39:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 2
; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP39]], align 4
; DEFAULT-NEXT: [[TMP40:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 2
@@ -750,11 +726,11 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP43]], align 4
; DEFAULT-NEXT: [[TMP44:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 2
; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP44]], align 4
-; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE17]]
-; DEFAULT: [[PRED_STORE_CONTINUE17]]:
+; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE10]]
+; DEFAULT: [[PRED_STORE_CONTINUE10]]:
; DEFAULT-NEXT: [[TMP45:%.*]] = extractelement <8 x i1> [[TMP22]], i32 3
-; DEFAULT-NEXT: br i1 [[TMP45]], label %[[PRED_STORE_IF18:.*]], label %[[PRED_STORE_CONTINUE19:.*]]
-; DEFAULT: [[PRED_STORE_IF18]]:
+; DEFAULT-NEXT: br i1 [[TMP45]], label %[[PRED_STORE_IF11:.*]], label %[[PRED_STORE_CONTINUE12:.*]]
+; DEFAULT: [[PRED_STORE_IF11]]:
; DEFAULT-NEXT: [[TMP46:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 3
; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP46]], align 4
; DEFAULT-NEXT: [[TMP47:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 3
@@ -765,11 +741,11 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP50]], align 4
; DEFAULT-NEXT: [[TMP51:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 3
; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP51]], align 4
-; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE19]]
-; DEFAULT: [[PRED_STORE_CONTINUE19]]:
+; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE12]]
+; DEFAULT: [[PRED_STORE_CONTINUE12]]:
; DEFAULT-NEXT: [[TMP52:%.*]] = extractelement <8 x i1> [[TMP22]], i32 4
-; DEFAULT-NEXT: br i1 [[TMP52]], label %[[PRED_STORE_IF20:.*]], label %[[PRED_STORE_CONTINUE21:.*]]
-; DEFAULT: [[PRED_STORE_IF20]]:
+; DEFAULT-NEXT: br i1 [[TMP52]], label %[[PRED_STORE_IF13:.*]], label %[[PRED_STORE_CONTINUE14:.*]]
+; DEFAULT: [[PRED_STORE_IF13]]:
; DEFAULT-NEXT: [[TMP53:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 4
; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP53]], align 4
; DEFAULT-NEXT: [[TMP54:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 4
@@ -780,11 +756,11 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP57]], align 4
; DEFAULT-NEXT: [[TMP58:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 4
; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP58]], align 4
-; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE21]]
-; DEFAULT: [[PRED_STORE_CONTINUE21]]:
+; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE14]]
+; DEFAULT: [[PRED_STORE_CONTINUE14]]:
; DEFAULT-NEXT: [[TMP59:%.*]] = extractelement <8 x i1> [[TMP22]], i32 5
-; DEFAULT-NEXT: br i1 [[TMP59]], label %[[PRED_STORE_IF22:.*]], label %[[PRED_STORE_CONTINUE23:.*]]
-; DEFAULT: [[PRED_STORE_IF22]]:
+; DEFAULT-NEXT: br i1 [[TMP59]], label %[[PRED_STORE_IF15:.*]], label %[[PRED_STORE_CONTINUE16:.*]]
+; DEFAULT: [[PRED_STORE_IF15]]:
; DEFAULT-NEXT: [[TMP60:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 5
; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP60]], align 4
; DEFAULT-NEXT: [[TMP61:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 5
@@ -795,11 +771,11 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP64]], align 4
; DEFAULT-NEXT: [[TMP65:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 5
; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP65]], align 4
-; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE23]]
-; DEFAULT: [[PRED_STORE_CONTINUE23]]:
+; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE16]]
+; DEFAULT: [[PRED_STORE_CONTINUE16]]:
; DEFAULT-NEXT: [[TMP66:%.*]] = extractelement <8 x i1> [[TMP22]], i32 6
-; DEFAULT-NEXT: br i1 [[TMP66]], label %[[PRED_STORE_IF24:.*]], label %[[PRED_STORE_CONTINUE25:.*]]
-; DEFAULT: [[PRED_STORE_IF24]]:
+; DEFAULT-NEXT: br i1 [[TMP66]], label %[[PRED_STORE_IF17:.*]], label %[[PRED_STORE_CONTINUE18:.*]]
+; DEFAULT: [[PRED_STORE_IF17]]:
; DEFAULT-NEXT: [[TMP67:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 6
; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP67]], align 4
; DEFAULT-NEXT: [[TMP68:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 6
@@ -810,11 +786,11 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP71]], align 4
; DEFAULT-NEXT: [[TMP72:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 6
; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP72]], align 4
-; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE25]]
-; DEFAULT: [[PRED_STORE_CONTINUE25]]:
+; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE18]]
+; DEFAULT: [[PRED_STORE_CONTINUE18]]:
; DEFAULT-NEXT: [[TMP73:%.*]] = extractelement <8 x i1> [[TMP22]], i32 7
-; DEFAULT-NEXT: br i1 [[TMP73]], label %[[PRED_STORE_IF26:.*]], label %[[PRED_STORE_CONTINUE27]]
-; DEFAULT: [[PRED_STORE_IF26]]:
+; DEFAULT-NEXT: br i1 [[TMP73]], label %[[PRED_STORE_IF19:.*]], label %[[PRED_STORE_CONTINUE20]]
+; DEFAULT: [[PRED_STORE_IF19]]:
; DEFAULT-NEXT: [[TMP74:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 7
; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP74]], align 4
; DEFAULT-NEXT: [[TMP75:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 7
@@ -825,8 +801,8 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP78]], align 4
; DEFAULT-NEXT: [[TMP79:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 7
; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP79]], align 4
-; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE27]]
-; DEFAULT: [[PRED_STORE_CONTINUE27]]:
+; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE20]]
+; DEFAULT: [[PRED_STORE_CONTINUE20]]:
; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; DEFAULT-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8)
; DEFAULT-NEXT: [[TMP80:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
@@ -840,31 +816,7 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
; PRED-SAME: ptr noalias [[SRC_1:%.*]], ptr noalias [[SRC_2:%.*]], ptr noalias [[SRC_3:%.*]], ptr noalias [[SRC_4:%.*]], ptr noalias [[DST:%.*]], i64 [[N:%.*]]) #[[ATTR3:[0-9]+]] {
; PRED-NEXT: [[ENTRY:.*:]]
; PRED-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1
-; PRED-NEXT: br label %[[VECTOR_SCEVCHECK:.*]]
-; PRED: [[VECTOR_SCEVCHECK]]:
-; PRED-NEXT: [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 [[N]])
-; PRED-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i64, i1 } [[MUL]], 0
-; PRED-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i64, i1 } [[MUL]], 1
-; PRED-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[DST]], i64 [[MUL_RESULT]]
-; PRED-NEXT: [[TMP3:%.*]] = icmp ult ptr [[TMP2]], [[DST]]
-; PRED-NEXT: [[TMP4:%.*]] = or i1 [[TMP3]], [[MUL_OVERFLOW]]
-; PRED-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DST]], i64 4
-; PRED-NEXT: [[MUL1:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 [[N]])
-; PRED-NEXT: [[MUL_RESULT2:%.*]] = extractvalue { i64, i1 } [[MUL1]], 0
-; PRED-NEXT: [[MUL_OVERFLOW3:%.*]] = extractvalue { i64, i1 } [[MUL1]], 1
-; PRED-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[SCEVGEP]], i64 [[MUL_RESULT2]]
-; PRED-NEXT: [[TMP7:%.*]] = icmp ult ptr [[TMP6]], [[SCEVGEP]]
-; PRED-NEXT: [[TMP8:%.*]] = or i1 [[TMP7]], [[MUL_OVERFLOW3]]
-; PRED-NEXT: [[SCEVGEP4:%.*]] = getelementptr i8, ptr [[DST]], i64 8
-; PRED-NEXT: [[MUL5:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 [[N]])
-; PRED-NEXT: [[MUL_RESULT6:%.*]] = extractvalue { i64, i1 } [[MUL5]], 0
-; PRED-NEXT: [[MUL_OVERFLOW7:%.*]] = extractvalue { i64, i1 } [[MUL5]], 1
-; PRED-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[SCEVGEP4]], i64 [[MUL_RESULT6]]
-; PRED-NEXT: [[TMP11:%.*]] = icmp ult ptr [[TMP10]], [[SCEVGEP4]]
-; PRED-NEXT: [[TMP12:%.*]] = or i1 [[TMP11]], [[MUL_OVERFLOW7]]
-; PRED-NEXT: [[TMP13:%.*]] = or i1 [[TMP4]], [[TMP8]]
-; PRED-NEXT: [[TMP14:%.*]] = or i1 [[TMP13]], [[TMP12]]
-; PRED-NEXT: br i1 [[TMP14]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; PRED-NEXT: br label %[[VECTOR_PH:.*]]
; PRED: [[VECTOR_PH]]:
; PRED-NEXT: [[TMP15:%.*]] = sub i64 [[TMP0]], 8
; PRED-NEXT: [[TMP16:%.*]] = icmp ugt i64 [[TMP0]], 8
@@ -872,9 +824,9 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
; PRED-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i64(i64 0, i64 [[TMP0]])
; PRED-NEXT: br label %[[VECTOR_BODY:.*]]
; PRED: [[VECTOR_BODY]]:
-; PRED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE27:.*]] ]
-; PRED-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <8 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], %[[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[PRED_STORE_CONTINUE27]] ]
-; PRED-NEXT: [[VEC_IND:%.*]] = phi <8 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE27]] ]
+; PRED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE20:.*]] ]
+; PRED-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <8 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], %[[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[PRED_STORE_CONTINUE20]] ]
+; PRED-NEXT: [[VEC_IND:%.*]] = phi <8 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE20]] ]
; PRED-NEXT: [[TMP18:%.*]] = load float, ptr [[SRC_1]], align 4
; PRED-NEXT: [[BROADCAST_SPLATINSERT8:%.*]] = insertelement <8 x float> poison, float [[TMP18]], i64 0
; PRED-NEXT: [[BROADCAST_SPLAT9:%.*]] = shufflevector <8 x float> [[BROADCAST_SPLATINSERT8]], <8 x float> poison, <8 x i32> zeroinitializer
@@ -909,8 +861,8 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
; PRED-NEXT: br label %[[PRED_STORE_CONTINUE]]
; PRED: [[PRED_STORE_CONTINUE]]:
; PRED-NEXT: [[TMP35:%.*]] = extractelement <8 x i1> [[TMP26]], i32 1
-; PRED-NEXT: br i1 [[TMP35]], label %[[PRED_STORE_IF14:.*]], label %[[PRED_STORE_CONTINUE15:.*]]
-; PRED: [[PRED_STORE_IF14]]:
+; PRED-NEXT: br i1 [[TMP35]], label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]]
+; PRED: [[PRED_STORE_IF7]]:
; PRED-NEXT: [[TMP36:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 1
; PRED-NEXT: store float 0.000000e+00, ptr [[TMP36]], align 4
; PRED-NEXT: [[TMP37:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 1
@@ -921,11 +873,11 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
; PRED-NEXT: store float 0.000000e+00, ptr [[TMP40]], align 4
; PRED-NEXT: [[TMP41:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 1
; PRED-NEXT: store float 0.000000e+00, ptr [[TMP41]], align 4
-; PRED-NEXT: br label %[[PRED_STORE_CONTINUE15]]
-; PRED: [[PRED_STORE_CONTINUE15]]:
+; PRED-NEXT: br label %[[PRED_STORE_CONTINUE8]]
+; PRED: [[PRED_STORE_CONTINUE8]]:
; PRED-NEXT: [[TMP42:%.*]] = extractelement <8 x i1> [[TMP26]], i32 2
-; PRED-NEXT: br i1 [[TMP42]], label %[[PRED_STORE_IF16:.*]], label %[[PRED_STORE_CONTINUE17:.*]]
-; PRED: [[PRED_STORE_IF16]]:
+; PRED-NEXT: br i1 [[TMP42]], label %[[PRED_STORE_IF9:.*]], label %[[PRED_STORE_CONTINUE10:.*]]
+; PRED: [[PRED_STORE_IF9]]:
; PRED-NEXT: [[TMP43:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 2
; PRED-NEXT: store float 0.000000e+00, ptr [[TMP43]], align 4
; PRED-NEXT: [[TMP44:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 2
@@ -936,11 +888,11 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
; PRED-NEXT: store float 0.000000e+00, ptr [[TMP47]], align 4
; PRED-NEXT: [[TMP48:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 2
; PRED-NEXT: store float 0.000000e+00, ptr [[TMP48]], align 4
-; PRED-NEXT: br label %[[PRED_STORE_CONTINUE17]]
-; PRED: [[PRED_STORE_CONTINUE17]]:
+; PRED-NEXT: br label %[[PRED_STORE_CONTINUE10]]
+; PRED: [[PRED_STORE_CONTINUE10]]:
; PRED-NEXT: [[TMP49:%.*]] = extractelement <8 x i1> [[TMP26]], i32 3
-; PRED-NEXT: br i1 [[TMP49]], label %[[PRED_STORE_IF18:.*]], label %[[PRED_STORE_CONTINUE19:.*]]
-; PRED: [[PRED_STORE_IF18]]:
+; PRED-NEXT: br i1 [[TMP49]], label %[[PRED_STORE_IF11:.*]], label %[[PRED_STORE_CONTINUE12:.*]]
+; PRED: [[PRED_STORE_IF11]]:
; PRED-NEXT: [[TMP50:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 3
; PRED-NEXT: store float 0.000000e+00, ptr [[TMP50]], align 4
; PRED-NEXT: [[TMP51:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 3
@@ -951,11 +903,11 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
; PRED-NEXT: store float 0.000000e+00, ptr [[TMP54]], align 4
; PRED-NEXT: [[TMP55:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 3
; PRED-NEXT: store float 0.000000e+00, ptr [[TMP55]], align 4
-; PRED-NEXT: br label %[[PRED_STORE_CONTINUE19]]
-; PRED: [[PRED_STORE_CONTINUE19]]:
+; PRED-NEXT: br label %[[PRED_STORE_CONTINUE12]]
+; PRED: [[PRED_STORE_CONTINUE12]]:
; PRED-NEXT: [[TMP56:%.*]] = extractelement <8 x i1> [[TMP26]], i32 4
-; PRED-NEXT: br i1 [[TMP56]], label %[[PRED_STORE_IF20:.*]], label %[[PRED_STORE_CONTINUE21:.*]]
-; PRED: [[PRED_STORE_IF20]]:
+; PRED-NEXT: br i1 [[TMP56]], label %[[PRED_STORE_IF13:.*]], label %[[PRED_STORE_CONTINUE14:.*]]
+; PRED: [[PRED_STORE_IF13]]:
; PRED-NEXT: [[TMP57:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 4
; PRED-NEXT: store float 0.000000e+00, ptr [[TMP57]], align 4
; PRED-NEXT: [[TMP58:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 4
@@ -966,11 +918,11 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
; PRED-NEXT: store float 0.000000e+00, ptr [[TMP61]], align 4
; PRED-NEXT: [[TMP62:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 4
; PRED-NEXT: store float 0.000000e+00, ptr [[TMP62]], align 4
-; PRED-NEXT: br label %[[PRED_STORE_CONTINUE21]]
-; PRED: [[PRED_STORE_CONTINUE21]]:
+; PRED-NEXT: br label %[[PRED_STORE_CONTINUE14]]
+; PRED: [[PRED_STORE_CONTINUE14]]:
; PRED-NEXT: [[TMP63:%.*]] = extractelement <8 x i1> [[TMP26]], i32 5
-; PRED-NEXT: br i1 [[TMP63]], label %[[PRED_STORE_IF22:.*]], label %[[PRED_STORE_CONTINUE23:.*]]
-; PRED: [[PRED_STORE_IF22]]:
+; PRED-NEXT: br i1 [[TMP63]], label %[[PRED_STORE_IF15:.*]], label %[[PRED_STORE_CONTINUE16:.*]]
+; PRED: [[PRED_STORE_IF15]]:
; PRED-NEXT: [[TMP64:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 5
; PRED-NEXT: store float 0.000000e+00, ptr [[TMP64]], align 4
; PRED-NEXT: [[TMP65:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 5
@@ -981,11 +933,11 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
; PRED-NEXT: store float 0.000000e+00, ptr [[TMP68]], align 4
; PRED-NEXT: [[TMP69:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 5
; PRED-NEXT: store float 0.000000e+00, ptr [[TMP69]], align 4
-; PRED-NEXT: br label %[[PRED_STORE_CONTINUE23]]
-; PRED: [[PRED_STORE_CONTINUE23]]:
+; PRED-NEXT: br label %[[PRED_STORE_CONTINUE16]]
+; PRED: [[PRED_STORE_CONTINUE16]]:
; PRED-NEXT: [[TMP70:%.*]] = extractelement <8 x i1> [[TMP26]], i32 6
-; PRED-NEXT: br i1 [[TMP70]], label %[[PRED_STORE_IF24:.*]], label %[[PRED_STORE_CONTINUE25:.*]]
-; PRED: [[PRED_STORE_IF24]]:
+; PRED-NEXT: br i1 [[TMP70]], label %[[PRED_STORE_IF17:.*]], label %[[PRED_STORE_CONTINUE18:.*]]
+; PRED: [[PRED_STORE_IF17]]:
; PRED-NEXT: [[TMP71:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 6
; PRED-NEXT: store float 0.000000e+00, ptr [[TMP71]], align 4
; PRED-NEXT: [[TMP72:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 6
@@ -996,11 +948,11 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
; PRED-NEXT: store float 0.000000e+00, ptr [[TMP75]], align 4
; PRED-NEXT: [[TMP76:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 6
; PRED-NEXT: store float 0.000000e+00, ptr [[TMP76]], align 4
-; PRED-NEXT: br label %[[PRED_STORE_CONTINUE25]]
-; PRED: [[PRED_STORE_CONTINUE25]]:
+; PRED-NEXT: br label %[[PRED_STORE_CONTINUE18]]
+; PRED: [[PRED_STORE_CONTINUE18]]:
; PRED-NEXT: [[TMP77:%.*]] = extractelement <8 x i1> [[TMP26]], i32 7
-; PRED-NEXT: br i1 [[TMP77]], label %[[PRED_STORE_IF26:.*]], label %[[PRED_STORE_CONTINUE27]]
-; PRED: [[PRED_STORE_IF26]]:
+; PRED-NEXT: br i1 [[TMP77]], label %[[PRED_STORE_IF19:.*]], label %[[PRED_STORE_CONTINUE20]]
+; PRED: [[PRED_STORE_IF19]]:
; PRED-NEXT: [[TMP78:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 7
; PRED-NEXT: store float 0.000000e+00, ptr [[TMP78]], align 4
; PRED-NEXT: [[TMP79:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 7
@@ -1011,8 +963,8 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
; PRED-NEXT: store float 0.000000e+00, ptr [[TMP82]], align 4
; PRED-NEXT: [[TMP83:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 7
; PRED-NEXT: store float 0.000000e+00, ptr [[TMP83]], align 4
-; PRED-NEXT: br label %[[PRED_STORE_CONTINUE27]]
-; PRED: [[PRED_STORE_CONTINUE27]]:
+; PRED-NEXT: br label %[[PRED_STORE_CONTINUE20]]
+; PRED: [[PRED_STORE_CONTINUE20]]:
; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8
; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i64(i64 [[INDEX]], i64 [[TMP17]])
; PRED-NEXT: [[TMP84:%.*]] = extractelement <8 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0
@@ -1020,8 +972,9 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
; PRED-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8)
; PRED-NEXT: br i1 [[TMP85]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; PRED: [[MIDDLE_BLOCK]]:
-; PRED-NEXT: br [[EXIT:label %.*]]
-; PRED: [[SCALAR_PH]]:
+; PRED-NEXT: br label %[[EXIT:.*]]
+; PRED: [[EXIT]]:
+; PRED-NEXT: ret void
;
entry:
br label %loop.header
@@ -1124,7 +1077,7 @@ define void @redundant_branch_and_tail_folding(ptr %dst, i1 %c) {
; PRED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; PRED-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
; PRED-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 24
-; PRED-NEXT: br i1 [[TMP11]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
+; PRED-NEXT: br i1 [[TMP11]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; PRED: [[MIDDLE_BLOCK]]:
; PRED-NEXT: br label %[[EXIT:.*]]
; PRED: [[EXIT]]:
@@ -1313,7 +1266,7 @@ define void @pred_udiv_select_cost(ptr %A, ptr %B, ptr %C, i64 %n, i8 %y) #1 {
; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[INDEX]], i64 [[TMP11]])
; PRED-NEXT: [[TMP28:%.*]] = extractelement <vscale x 16 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0
; PRED-NEXT: [[TMP29:%.*]] = xor i1 [[TMP28]], true
-; PRED-NEXT: br i1 [[TMP29]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
+; PRED-NEXT: br i1 [[TMP29]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; PRED: [[MIDDLE_BLOCK]]:
; PRED-NEXT: br [[EXIT:label %.*]]
; PRED: [[SCALAR_PH]]:
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll
index 72e813b62025f..15f910d7c96aa 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll
@@ -9,16 +9,8 @@ define void @sdiv_feeding_gep(ptr %dst, i32 %x, i64 %M, i64 %conv6, i64 %N) {
; CHECK-NEXT: [[CONV61:%.*]] = zext i32 [[X]] to i64
; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP10]], 2
-; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 8)
-; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP2]]
-; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
-; CHECK: [[VECTOR_SCEVCHECK]]:
-; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[N]], -1
-; CHECK-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
-; CHECK-NEXT: [[TMP5:%.*]] = icmp slt i32 [[TMP4]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = icmp ugt i64 [[TMP3]], 4294967295
-; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]]
-; CHECK-NEXT: br i1 [[TMP7]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]]
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 4
@@ -48,7 +40,7 @@ define void @sdiv_feeding_gep(ptr %dst, i32 %x, i64 %M, i64 %conv6, i64 %N) {
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK: [[SCALAR_PH]]:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
@@ -97,14 +89,7 @@ define void @sdiv_feeding_gep_predicated(ptr %dst, i32 %x, i64 %M, i64 %conv6, i
; CHECK-SAME: ptr [[DST:%.*]], i32 [[X:%.*]], i64 [[M:%.*]], i64 [[CONV6:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[CONV61:%.*]] = zext i32 [[X]] to i64
-; CHECK-NEXT: br label %[[VECTOR_SCEVCHECK:.*]]
-; CHECK: [[VECTOR_SCEVCHECK]]:
-; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1
-; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
-; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i32 [[TMP1]], 0
-; CHECK-NEXT: [[TMP3:%.*]] = icmp ugt i64 [[TMP0]], 4294967295
-; CHECK-NEXT: [[TMP4:%.*]] = or i1 [[TMP2]], [[TMP3]]
-; CHECK-NEXT: br i1 [[TMP4]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 2
@@ -148,28 +133,6 @@ define void @sdiv_feeding_gep_predicated(ptr %dst, i32 %x, i64 %M, i64 %conv6, i
; CHECK-NEXT: br i1 [[TMP36]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br label %[[EXIT:.*]]
-; CHECK: [[SCALAR_PH]]:
-; CHECK-NEXT: br label %[[LOOP:.*]]
-; CHECK: [[LOOP]]:
-; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
-; CHECK-NEXT: [[C:%.*]] = icmp ule i64 [[IV]], [[M]]
-; CHECK-NEXT: br i1 [[C]], label %[[THEN:.*]], label %[[LOOP_LATCH]]
-; CHECK: [[THEN]]:
-; CHECK-NEXT: [[DIV18:%.*]] = sdiv i64 [[M]], [[CONV6]]
-; CHECK-NEXT: [[CONV20:%.*]] = trunc i64 [[DIV18]] to i32
-; CHECK-NEXT: [[MUL30:%.*]] = mul i64 [[DIV18]], [[CONV61]]
-; CHECK-NEXT: [[SUB31:%.*]] = sub i64 [[IV]], [[MUL30]]
-; CHECK-NEXT: [[CONV34:%.*]] = trunc i64 [[SUB31]] to i32
-; CHECK-NEXT: [[MUL35:%.*]] = mul i32 [[X]], [[CONV20]]
-; CHECK-NEXT: [[ADD36:%.*]] = add i32 [[MUL35]], [[CONV34]]
-; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[ADD36]] to i64
-; CHECK-NEXT: [[GEP:%.*]] = getelementptr double, ptr [[DST]], i64 [[IDXPROM]]
-; CHECK-NEXT: store double 0.000000e+00, ptr [[GEP]], align 8
-; CHECK-NEXT: br label %[[LOOP_LATCH]]
-; CHECK: [[LOOP_LATCH]]:
-; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
-; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
-; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
@@ -212,13 +175,7 @@ define void @udiv_urem_feeding_gep(i64 %x, ptr %dst, i64 %N) {
; CHECK-NEXT: [[MUL_1_I:%.*]] = mul i64 [[X]], [[X]]
; CHECK-NEXT: [[MUL_2_I:%.*]] = mul i64 [[MUL_1_I]], [[X]]
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1
-; CHECK-NEXT: br label %[[VECTOR_SCEVCHECK:.*]]
-; CHECK: [[VECTOR_SCEVCHECK]]:
-; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[N]] to i32
-; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i32 [[TMP1]], 0
-; CHECK-NEXT: [[TMP3:%.*]] = icmp ugt i64 [[N]], 4294967295
-; CHECK-NEXT: [[TMP4:%.*]] = or i1 [[TMP2]], [[TMP3]]
-; CHECK-NEXT: br i1 [[TMP4]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 2
@@ -262,32 +219,9 @@ define void @udiv_urem_feeding_gep(i64 %x, ptr %dst, i64 %N) {
; CHECK-NEXT: [[TMP39:%.*]] = extractelement <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0
; CHECK-NEXT: [[TMP40:%.*]] = xor i1 [[TMP39]], true
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT4]]
-; CHECK-NEXT: br i1 [[TMP40]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP40]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br label %[[EXIT:.*]]
-; CHECK: [[SCALAR_PH]]:
-; CHECK-NEXT: br label %[[LOOP:.*]]
-; CHECK: [[LOOP]]:
-; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
-; CHECK-NEXT: [[DIV_I:%.*]] = udiv i64 [[IV]], [[MUL_2_I]]
-; CHECK-NEXT: [[REM_I:%.*]] = urem i64 [[IV]], [[MUL_2_I]]
-; CHECK-NEXT: [[DIV_1_I:%.*]] = udiv i64 [[REM_I]], [[MUL_1_I]]
-; CHECK-NEXT: [[REM_1_I:%.*]] = urem i64 [[REM_I]], [[MUL_1_I]]
-; CHECK-NEXT: [[DIV_2_I:%.*]] = udiv i64 [[REM_1_I]], [[X]]
-; CHECK-NEXT: [[REM_2_I:%.*]] = urem i64 [[REM_1_I]], [[X]]
-; CHECK-NEXT: [[MUL_I:%.*]] = mul i64 [[X]], [[DIV_I]]
-; CHECK-NEXT: [[ADD_I:%.*]] = add i64 [[MUL_I]], [[DIV_1_I]]
-; CHECK-NEXT: [[MUL_1_I9:%.*]] = mul i64 [[ADD_I]], [[X]]
-; CHECK-NEXT: [[ADD_1_I:%.*]] = add i64 [[MUL_1_I9]], [[DIV_2_I]]
-; CHECK-NEXT: [[MUL_2_I11:%.*]] = mul i64 [[ADD_1_I]], [[X]]
-; CHECK-NEXT: [[ADD_2_I:%.*]] = add i64 [[MUL_2_I11]], [[REM_2_I]]
-; CHECK-NEXT: [[SEXT_I:%.*]] = shl i64 [[ADD_2_I]], 32
-; CHECK-NEXT: [[CONV6_I:%.*]] = ashr i64 [[SEXT_I]], 32
-; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[DST]], i64 [[CONV6_I]]
-; CHECK-NEXT: store i64 [[DIV_I]], ptr [[GEP]], align 4
-; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
-; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], [[N]]
-; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP7:![0-9]+]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
@@ -326,9 +260,7 @@ exit:
; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
-; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]]}
+; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
-; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]]}
-; CHECK: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]}
-; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META1]]}
+; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]], [[META2]]}
;.
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-factors.ll b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-factors.ll
index 549df337e6907..8f17f400fc2f2 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-factors.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-factors.ll
@@ -430,13 +430,6 @@ define void @trip_count_based_on_ptrtoint(i64 %x) "target-cpu"="apple-m1" {
; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 2
; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
-; CHECK: vector.scevcheck:
-; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[ADD]] to i2
-; CHECK-NEXT: [[TMP4:%.*]] = trunc i64 [[X]] to i2
-; CHECK-NEXT: [[TMP5:%.*]] = sub i2 [[TMP3]], [[TMP4]]
-; CHECK-NEXT: [[TMP6:%.*]] = zext i2 [[TMP5]] to i64
-; CHECK-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i64 [[TMP6]], 0
-; CHECK-NEXT: br i1 [[IDENT_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
; CHECK: vector.main.loop.iter.check:
; CHECK-NEXT: br i1 true, label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
@@ -466,7 +459,7 @@ define void @trip_count_based_on_ptrtoint(i64 %x) "target-cpu"="apple-m1" {
; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 4
; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF11]]
; CHECK: vec.epilog.ph:
-; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
+; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
; CHECK-NEXT: [[N_MOD_VF1:%.*]] = urem i64 [[TMP2]], 4
; CHECK-NEXT: [[N_VEC2:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF1]]
; CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[N_VEC2]], 4
@@ -484,7 +477,7 @@ define void @trip_count_based_on_ptrtoint(i64 %x) "target-cpu"="apple-m1" {
; CHECK-NEXT: [[CMP_N6:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC2]]
; CHECK-NEXT: br i1 [[CMP_N6]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
; CHECK: vec.epilog.scalar.ph:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[TMP14]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[PTR_START]], [[VECTOR_SCEVCHECK]] ], [ [[PTR_START]], [[ITER_CHECK:%.*]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[TMP14]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[PTR_START]], [[ITER_CHECK:%.*]] ]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll
index 85726c161cc54..9c2e1ec3815e0 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll
@@ -355,17 +355,62 @@ exit:
define void @test_widen_extended_induction(ptr %dst) {
; CHECK-LABEL: @test_widen_extended_induction(
-; CHECK-NEXT: entry:
+; CHECK-NEXT: iter.check:
+; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
+; CHECK: vector.main.loop.iter.check:
+; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK: vector.ph:
; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: vector.body:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i8> [ <i8 0, i8 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i8> [[VEC_IND]], splat (i8 2)
+; CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[INDEX]] to i8
+; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[TMP0]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [6 x i8], ptr [[DST:%.*]], i64 0, i64 [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 2
+; CHECK-NEXT: store <2 x i8> [[VEC_IND]], ptr [[TMP2]], align 1
+; CHECK-NEXT: store <2 x i8> [[STEP_ADD]], ptr [[TMP3]], align 1
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i8> [[STEP_ADD]], splat (i8 2)
+; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 10000
+; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[LOOP]], {{!llvm.loop ![0-9]+}}
+; CHECK: middle.block:
+; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
+; CHECK: vec.epilog.iter.check:
+; CHECK-NEXT: br i1 true, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3]]
+; CHECK: vec.epilog.ph:
+; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i32 [ 10000, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ 16, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i8> poison, i8 [[BC_RESUME_VAL]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i8> [[BROADCAST_SPLATINSERT]], <2 x i8> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i8> [[BROADCAST_SPLAT]], <i8 0, i8 1>
+; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
+; CHECK: vec.epilog.vector.body:
+; CHECK-NEXT: [[INDEX1:%.*]] = phi i32 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT3:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_IND2:%.*]] = phi <2 x i8> [ [[INDUCTION]], [[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT4:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
+; CHECK-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX1]] to i8
+; CHECK-NEXT: [[TMP5:%.*]] = zext i8 [[OFFSET_IDX]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [6 x i8], ptr [[DST]], i64 0, i64 [[TMP5]]
+; CHECK-NEXT: store <2 x i8> [[VEC_IND2]], ptr [[TMP6]], align 1
+; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i32 [[INDEX1]], 2
+; CHECK-NEXT: [[VEC_IND_NEXT4]] = add <2 x i8> [[VEC_IND2]], splat (i8 2)
+; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT3]], 10000
+; CHECK-NEXT: br i1 [[TMP7]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], {{!llvm.loop ![0-9]+}}
+; CHECK: vec.epilog.middle.block:
+; CHECK-NEXT: br i1 true, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
+; CHECK: vec.epilog.scalar.ph:
+; CHECK-NEXT: [[BC_RESUME_VAL5:%.*]] = phi i8 [ 16, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 16, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ]
+; CHECK-NEXT: br label [[LOOP1:%.*]]
; CHECK: loop:
-; CHECK-NEXT: [[IV:%.*]] = phi i8 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[BC_RESUME_VAL5]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP1]] ]
; CHECK-NEXT: [[IV_EXT:%.*]] = zext i8 [[IV]] to i64
-; CHECK-NEXT: [[ARRAYIDX1449:%.*]] = getelementptr inbounds [6 x i8], ptr [[DST:%.*]], i64 0, i64 [[IV_EXT]]
+; CHECK-NEXT: [[ARRAYIDX1449:%.*]] = getelementptr inbounds [6 x i8], ptr [[DST]], i64 0, i64 [[IV_EXT]]
; CHECK-NEXT: store i8 [[IV]], ptr [[ARRAYIDX1449]], align 1
; CHECK-NEXT: [[IV_NEXT]] = add i8 [[IV]], 1
; CHECK-NEXT: [[IV_NEXT_EXT:%.*]] = zext i8 [[IV_NEXT]] to i32
; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT_EXT]], 10000
-; CHECK-NEXT: br i1 [[EC]], label [[EXIT:%.*]], label [[LOOP]]
+; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP1]], {{!llvm.loop ![0-9]+}}
; CHECK: exit:
; CHECK-NEXT: ret void
;
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll
index 4b097ba2422e4..bafb65dab90a2 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll
@@ -213,24 +213,7 @@ define void @iv_trunc(i32 %x, ptr %dst, i64 %N) #0 {
; DEFAULT-NEXT: [[MUL_X:%.*]] = add i32 [[X]], 1
; DEFAULT-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1
; DEFAULT-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 2
-; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
-; DEFAULT: [[VECTOR_SCEVCHECK]]:
-; DEFAULT-NEXT: [[TMP1:%.*]] = sub i32 -1, [[X]]
-; DEFAULT-NEXT: [[TMP2:%.*]] = icmp slt i32 [[MUL_X]], 0
-; DEFAULT-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i32 [[TMP1]], i32 [[MUL_X]]
-; DEFAULT-NEXT: [[TMP4:%.*]] = trunc i64 [[N]] to i32
-; DEFAULT-NEXT: [[MUL:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 [[TMP3]], i32 [[TMP4]])
-; DEFAULT-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL]], 0
-; DEFAULT-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL]], 1
-; DEFAULT-NEXT: [[TMP5:%.*]] = sub i32 0, [[MUL_RESULT]]
-; DEFAULT-NEXT: [[TMP6:%.*]] = icmp ugt i32 [[TMP5]], 0
-; DEFAULT-NEXT: [[TMP7:%.*]] = select i1 [[TMP2]], i1 [[TMP6]], i1 false
-; DEFAULT-NEXT: [[TMP8:%.*]] = or i1 [[TMP7]], [[MUL_OVERFLOW]]
-; DEFAULT-NEXT: [[TMP9:%.*]] = icmp ugt i64 [[N]], 4294967295
-; DEFAULT-NEXT: [[TMP10:%.*]] = icmp ne i32 [[MUL_X]], 0
-; DEFAULT-NEXT: [[TMP11:%.*]] = and i1 [[TMP9]], [[TMP10]]
-; DEFAULT-NEXT: [[TMP12:%.*]] = or i1 [[TMP8]], [[TMP11]]
-; DEFAULT-NEXT: br i1 [[TMP12]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
+; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; DEFAULT: [[VECTOR_PH]]:
; DEFAULT-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 2
; DEFAULT-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
@@ -254,7 +237,7 @@ define void @iv_trunc(i32 %x, ptr %dst, i64 %N) #0 {
; DEFAULT-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
; DEFAULT-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; DEFAULT: [[SCALAR_PH]]:
-; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ]
+; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; DEFAULT-NEXT: br label %[[FOR_BODY:.*]]
; DEFAULT: [[FOR_BODY]]:
; DEFAULT-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ]
@@ -314,25 +297,7 @@ define void @trunc_ivs_and_store(i32 %x, ptr %dst, i64 %N) #0 {
; DEFAULT-NEXT: [[MUL:%.*]] = mul i32 [[X]], [[X]]
; DEFAULT-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1
; DEFAULT-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 2
-; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
-; DEFAULT: [[VECTOR_SCEVCHECK]]:
-; DEFAULT-NEXT: [[TMP1:%.*]] = mul i32 [[X]], [[X]]
-; DEFAULT-NEXT: [[TMP2:%.*]] = sub i32 0, [[TMP1]]
-; DEFAULT-NEXT: [[TMP3:%.*]] = icmp slt i32 [[MUL]], 0
-; DEFAULT-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 [[MUL]]
-; DEFAULT-NEXT: [[TMP5:%.*]] = trunc i64 [[N]] to i32
-; DEFAULT-NEXT: [[MUL1:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 [[TMP4]], i32 [[TMP5]])
-; DEFAULT-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL1]], 0
-; DEFAULT-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL1]], 1
-; DEFAULT-NEXT: [[TMP6:%.*]] = sub i32 0, [[MUL_RESULT]]
-; DEFAULT-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[TMP6]], 0
-; DEFAULT-NEXT: [[TMP8:%.*]] = select i1 [[TMP3]], i1 [[TMP7]], i1 false
-; DEFAULT-NEXT: [[TMP9:%.*]] = or i1 [[TMP8]], [[MUL_OVERFLOW]]
-; DEFAULT-NEXT: [[TMP10:%.*]] = icmp ugt i64 [[N]], 4294967295
-; DEFAULT-NEXT: [[TMP11:%.*]] = icmp ne i32 [[MUL]], 0
-; DEFAULT-NEXT: [[TMP12:%.*]] = and i1 [[TMP10]], [[TMP11]]
-; DEFAULT-NEXT: [[TMP13:%.*]] = or i1 [[TMP9]], [[TMP12]]
-; DEFAULT-NEXT: br i1 [[TMP13]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
+; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; DEFAULT: [[VECTOR_PH]]:
; DEFAULT-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 2
; DEFAULT-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
@@ -359,12 +324,12 @@ define void @trunc_ivs_and_store(i32 %x, ptr %dst, i64 %N) #0 {
; DEFAULT-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
; DEFAULT-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; DEFAULT: [[SCALAR_PH]]:
-; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ]
-; DEFAULT-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i32 [ [[TMP14]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ]
+; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; DEFAULT-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[TMP14]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; DEFAULT-NEXT: br label %[[LOOP:.*]]
; DEFAULT: [[LOOP]]:
; DEFAULT-NEXT: [[IV_1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_1_NEXT:%.*]], %[[LOOP]] ]
-; DEFAULT-NEXT: [[IV_2:%.*]] = phi i32 [ [[BC_RESUME_VAL2]], %[[SCALAR_PH]] ], [ [[IV_2_NEXT:%.*]], %[[LOOP]] ]
+; DEFAULT-NEXT: [[IV_2:%.*]] = phi i32 [ [[BC_RESUME_VAL1]], %[[SCALAR_PH]] ], [ [[IV_2_NEXT:%.*]], %[[LOOP]] ]
; DEFAULT-NEXT: [[IV_1_TRUNC:%.*]] = trunc i64 [[IV_1]] to i32
; DEFAULT-NEXT: [[IV_1_MUL:%.*]] = mul i32 [[MUL]], [[IV_1_TRUNC]]
; DEFAULT-NEXT: [[IV_2_NEXT]] = add i32 [[IV_2]], 1
@@ -425,24 +390,7 @@ define void @ivs_trunc_and_ext(i32 %x, ptr %dst, i64 %N) #0 {
; DEFAULT-NEXT: [[ADD:%.*]] = add i32 [[X]], 1
; DEFAULT-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1
; DEFAULT-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 2
-; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
-; DEFAULT: [[VECTOR_SCEVCHECK]]:
-; DEFAULT-NEXT: [[TMP1:%.*]] = sub i32 -1, [[X]]
-; DEFAULT-NEXT: [[TMP2:%.*]] = icmp slt i32 [[ADD]], 0
-; DEFAULT-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i32 [[TMP1]], i32 [[ADD]]
-; DEFAULT-NEXT: [[TMP4:%.*]] = trunc i64 [[N]] to i32
-; DEFAULT-NEXT: [[MUL:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 [[TMP3]], i32 [[TMP4]])
-; DEFAULT-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL]], 0
-; DEFAULT-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL]], 1
-; DEFAULT-NEXT: [[TMP5:%.*]] = sub i32 0, [[MUL_RESULT]]
-; DEFAULT-NEXT: [[TMP6:%.*]] = icmp ugt i32 [[TMP5]], 0
-; DEFAULT-NEXT: [[TMP7:%.*]] = select i1 [[TMP2]], i1 [[TMP6]], i1 false
-; DEFAULT-NEXT: [[TMP8:%.*]] = or i1 [[TMP7]], [[MUL_OVERFLOW]]
-; DEFAULT-NEXT: [[TMP9:%.*]] = icmp ugt i64 [[N]], 4294967295
-; DEFAULT-NEXT: [[TMP10:%.*]] = icmp ne i32 [[ADD]], 0
-; DEFAULT-NEXT: [[TMP11:%.*]] = and i1 [[TMP9]], [[TMP10]]
-; DEFAULT-NEXT: [[TMP12:%.*]] = or i1 [[TMP8]], [[TMP11]]
-; DEFAULT-NEXT: br i1 [[TMP12]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
+; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; DEFAULT: [[VECTOR_PH]]:
; DEFAULT-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 2
; DEFAULT-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
@@ -469,8 +417,8 @@ define void @ivs_trunc_and_ext(i32 %x, ptr %dst, i64 %N) #0 {
; DEFAULT-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
; DEFAULT-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; DEFAULT: [[SCALAR_PH]]:
-; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ]
-; DEFAULT-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[TMP13]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ]
+; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; DEFAULT-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[TMP13]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; DEFAULT-NEXT: br label %[[LOOP:.*]]
; DEFAULT: [[LOOP]]:
; DEFAULT-NEXT: [[IV_1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_1_NEXT:%.*]], %[[LOOP]] ]
@@ -535,16 +483,7 @@ define void @exit_cond_zext_iv(ptr %dst, i64 %N) {
; DEFAULT-NEXT: [[ENTRY:.*]]:
; DEFAULT-NEXT: [[UMAX1:%.*]] = call i64 @llvm.umax.i64(i64 [[N]], i64 1)
; DEFAULT-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[UMAX1]], 2
-; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
-; DEFAULT: [[VECTOR_SCEVCHECK]]:
-; DEFAULT-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[N]], i64 1)
-; DEFAULT-NEXT: [[TMP0:%.*]] = add i64 [[UMAX]], -1
-; DEFAULT-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
-; DEFAULT-NEXT: [[TMP2:%.*]] = add i32 1, [[TMP1]]
-; DEFAULT-NEXT: [[TMP3:%.*]] = icmp ult i32 [[TMP2]], 1
-; DEFAULT-NEXT: [[TMP4:%.*]] = icmp ugt i64 [[TMP0]], 4294967295
-; DEFAULT-NEXT: [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]]
-; DEFAULT-NEXT: br i1 [[TMP5]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
+; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; DEFAULT: [[VECTOR_PH]]:
; DEFAULT-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[UMAX1]], 2
; DEFAULT-NEXT: [[N_VEC:%.*]] = sub i64 [[UMAX1]], [[N_MOD_VF]]
@@ -564,12 +503,12 @@ define void @exit_cond_zext_iv(ptr %dst, i64 %N) {
; DEFAULT-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[UMAX1]], [[N_VEC]]
; DEFAULT-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; DEFAULT: [[SCALAR_PH]]:
-; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[TMP6]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ]
-; DEFAULT-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ]
+; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[TMP6]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; DEFAULT-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; DEFAULT-NEXT: br label %[[LOOP:.*]]
; DEFAULT: [[LOOP]]:
; DEFAULT-NEXT: [[IV_1:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_1_NEXT:%.*]], %[[LOOP]] ]
-; DEFAULT-NEXT: [[IV_CONV:%.*]] = phi i64 [ [[BC_RESUME_VAL2]], %[[SCALAR_PH]] ], [ [[IV_EXT:%.*]], %[[LOOP]] ]
+; DEFAULT-NEXT: [[IV_CONV:%.*]] = phi i64 [ [[BC_RESUME_VAL1]], %[[SCALAR_PH]] ], [ [[IV_EXT:%.*]], %[[LOOP]] ]
; DEFAULT-NEXT: [[GEP:%.*]] = getelementptr { [100 x i32], i32, i32 }, ptr [[DST]], i64 [[IV_CONV]], i32 2
; DEFAULT-NEXT: store i32 0, ptr [[GEP]], align 8
; DEFAULT-NEXT: [[IV_1_NEXT]] = add i32 [[IV_1]], 1
@@ -582,27 +521,18 @@ define void @exit_cond_zext_iv(ptr %dst, i64 %N) {
; PRED-LABEL: define void @exit_cond_zext_iv(
; PRED-SAME: ptr [[DST:%.*]], i64 [[N:%.*]]) {
; PRED-NEXT: [[ENTRY:.*:]]
-; PRED-NEXT: [[UMAX1:%.*]] = call i64 @llvm.umax.i64(i64 [[N]], i64 1)
-; PRED-NEXT: br label %[[VECTOR_SCEVCHECK:.*]]
-; PRED: [[VECTOR_SCEVCHECK]]:
; PRED-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[N]], i64 1)
-; PRED-NEXT: [[TMP0:%.*]] = add i64 [[UMAX]], -1
-; PRED-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
-; PRED-NEXT: [[TMP2:%.*]] = add i32 1, [[TMP1]]
-; PRED-NEXT: [[TMP3:%.*]] = icmp ult i32 [[TMP2]], 1
-; PRED-NEXT: [[TMP4:%.*]] = icmp ugt i64 [[TMP0]], 4294967295
-; PRED-NEXT: [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]]
-; PRED-NEXT: br i1 [[TMP5]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; PRED-NEXT: br label %[[VECTOR_PH:.*]]
; PRED: [[VECTOR_PH]]:
-; PRED-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX1]], 1
+; PRED-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX]], 1
; PRED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 2
; PRED-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
-; PRED-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[UMAX1]], 1
+; PRED-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[UMAX]], 1
; PRED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0
; PRED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
; PRED-NEXT: br label %[[VECTOR_BODY:.*]]
; PRED: [[VECTOR_BODY]]:
-; PRED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE5:.*]] ]
+; PRED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE4:.*]] ]
; PRED-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <2 x i64> poison, i64 [[INDEX]], i64 0
; PRED-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT2]], <2 x i64> poison, <2 x i32> zeroinitializer
; PRED-NEXT: [[VEC_IV:%.*]] = add <2 x i64> [[BROADCAST_SPLAT3]], <i64 0, i64 1>
@@ -616,29 +546,18 @@ define void @exit_cond_zext_iv(ptr %dst, i64 %N) {
; PRED-NEXT: br label %[[PRED_STORE_CONTINUE]]
; PRED: [[PRED_STORE_CONTINUE]]:
; PRED-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP6]], i32 1
-; PRED-NEXT: br i1 [[TMP10]], label %[[PRED_STORE_IF4:.*]], label %[[PRED_STORE_CONTINUE5]]
-; PRED: [[PRED_STORE_IF4]]:
+; PRED-NEXT: br i1 [[TMP10]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4]]
+; PRED: [[PRED_STORE_IF3]]:
; PRED-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 1
; PRED-NEXT: [[TMP12:%.*]] = getelementptr { [100 x i32], i32, i32 }, ptr [[DST]], i64 [[TMP11]], i32 2
; PRED-NEXT: store i32 0, ptr [[TMP12]], align 8
-; PRED-NEXT: br label %[[PRED_STORE_CONTINUE5]]
-; PRED: [[PRED_STORE_CONTINUE5]]:
+; PRED-NEXT: br label %[[PRED_STORE_CONTINUE4]]
+; PRED: [[PRED_STORE_CONTINUE4]]:
; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2
; PRED-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; PRED-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; PRED: [[MIDDLE_BLOCK]]:
; PRED-NEXT: br label %[[EXIT:.*]]
-; PRED: [[SCALAR_PH]]:
-; PRED-NEXT: br label %[[LOOP:.*]]
-; PRED: [[LOOP]]:
-; PRED-NEXT: [[IV_1:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[IV_1_NEXT:%.*]], %[[LOOP]] ]
-; PRED-NEXT: [[IV_CONV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_EXT:%.*]], %[[LOOP]] ]
-; PRED-NEXT: [[GEP:%.*]] = getelementptr { [100 x i32], i32, i32 }, ptr [[DST]], i64 [[IV_CONV]], i32 2
-; PRED-NEXT: store i32 0, ptr [[GEP]], align 8
-; PRED-NEXT: [[IV_1_NEXT]] = add i32 [[IV_1]], 1
-; PRED-NEXT: [[IV_EXT]] = zext i32 [[IV_1_NEXT]] to i64
-; PRED-NEXT: [[C:%.*]] = icmp ult i64 [[IV_EXT]], [[N]]
-; PRED-NEXT: br i1 [[C]], label %[[LOOP]], label %[[EXIT]], !llvm.loop [[LOOP5:![0-9]+]]
; PRED: [[EXIT]]:
; PRED-NEXT: ret void
;
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll
index 7b42e565e127d..2840e4de10164 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll
@@ -356,16 +356,7 @@ define void @zext_iv_increment(ptr %dst, i64 %N) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[UMAX1:%.*]] = call i64 @llvm.umax.i64(i64 [[N]], i64 1)
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[UMAX1]], 2
-; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
-; CHECK: vector.scevcheck:
-; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[N]], i64 1)
-; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[UMAX]], -1
-; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP0]] to i32
-; CHECK-NEXT: [[TMP3:%.*]] = add i32 1, [[TMP2]]
-; CHECK-NEXT: [[TMP4:%.*]] = icmp ult i32 [[TMP3]], 1
-; CHECK-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[TMP0]], 4294967295
-; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP4]], [[TMP5]]
-; CHECK-NEXT: br i1 [[TMP6]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[UMAX1]], 2
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[UMAX1]], [[N_MOD_VF]]
@@ -385,8 +376,8 @@ define void @zext_iv_increment(ptr %dst, i64 %N) {
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[UMAX1]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
-; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll b/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll
index cf45f3a88f37e..d020d33e41585 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll
@@ -26,8 +26,7 @@ target triple = "aarch64-unknown-linux-gnu"
; DEBUG: LV: Clamping the MaxVF to maximum power of two not exceeding the constant trip count: 16
; DEBUG: LV: IC is 1
; DEBUG: LV: VF is 16
-; DEBUG: LV: Vectorization is not beneficial: expected trip count < minimum profitable VF (16 < 32)
-; DEBUG: LV: Too many memory checks needed.
+; DEBUG: Executing best plan with VF=16, UF=1
; DEBUG-LABEL: LV: Checking a loop in 'overflow_indvar_known_false'
; DEBUG: LV: Found trip count: 0
@@ -49,17 +48,7 @@ define void @low_vf_ic_is_better(ptr nocapture noundef %p, i32 %tc, i16 noundef
; CHECK-VS1-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
; CHECK-VS1-NEXT: [[TMP3:%.*]] = sub i64 20, [[TMP2]]
; CHECK-VS1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 8
-; CHECK-VS1-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
-; CHECK-VS1: [[VECTOR_SCEVCHECK]]:
-; CHECK-VS1-NEXT: [[TMP6:%.*]] = add i32 [[TC]], 1
-; CHECK-VS1-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
-; CHECK-VS1-NEXT: [[TMP8:%.*]] = sub i64 19, [[TMP7]]
-; CHECK-VS1-NEXT: [[TMP9:%.*]] = trunc i64 [[TMP8]] to i32
-; CHECK-VS1-NEXT: [[TMP10:%.*]] = add i32 [[TMP6]], [[TMP9]]
-; CHECK-VS1-NEXT: [[TMP11:%.*]] = icmp ult i32 [[TMP10]], [[TMP6]]
-; CHECK-VS1-NEXT: [[TMP12:%.*]] = icmp ugt i64 [[TMP8]], 4294967295
-; CHECK-VS1-NEXT: [[TMP13:%.*]] = or i1 [[TMP11]], [[TMP12]]
-; CHECK-VS1-NEXT: br i1 [[TMP13]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]]
+; CHECK-VS1-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]]
; CHECK-VS1: [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
; CHECK-VS1-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-VS1-NEXT: [[TMP15:%.*]] = shl nuw i64 [[TMP14]], 4
@@ -112,7 +101,7 @@ define void @low_vf_ic_is_better(ptr nocapture noundef %p, i32 %tc, i16 noundef
; CHECK-VS1-NEXT: [[CMP_N10:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC3]]
; CHECK-VS1-NEXT: br i1 [[CMP_N10]], label %[[WHILE_END_LOOPEXIT]], label %[[VEC_EPILOG_SCALAR_PH]]
; CHECK-VS1: [[VEC_EPILOG_SCALAR_PH]]:
-; CHECK-VS1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP39]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END4]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[TMP0]], %[[VECTOR_SCEVCHECK]] ], [ [[TMP0]], %[[ITER_CHECK]] ]
+; CHECK-VS1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP39]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END4]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[TMP0]], %[[ITER_CHECK]] ]
; CHECK-VS1-NEXT: br label %[[WHILE_BODY:.*]]
; CHECK-VS1: [[WHILE_BODY]]:
; CHECK-VS1-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[WHILE_BODY]] ]
@@ -142,17 +131,7 @@ define void @low_vf_ic_is_better(ptr nocapture noundef %p, i32 %tc, i16 noundef
; CHECK-VS2-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
; CHECK-VS2-NEXT: [[TMP3:%.*]] = sub i64 20, [[TMP2]]
; CHECK-VS2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 8
-; CHECK-VS2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
-; CHECK-VS2: [[VECTOR_SCEVCHECK]]:
-; CHECK-VS2-NEXT: [[TMP6:%.*]] = add i32 [[TC]], 1
-; CHECK-VS2-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
-; CHECK-VS2-NEXT: [[TMP8:%.*]] = sub i64 19, [[TMP7]]
-; CHECK-VS2-NEXT: [[TMP9:%.*]] = trunc i64 [[TMP8]] to i32
-; CHECK-VS2-NEXT: [[TMP10:%.*]] = add i32 [[TMP6]], [[TMP9]]
-; CHECK-VS2-NEXT: [[TMP11:%.*]] = icmp ult i32 [[TMP10]], [[TMP6]]
-; CHECK-VS2-NEXT: [[TMP12:%.*]] = icmp ugt i64 [[TMP8]], 4294967295
-; CHECK-VS2-NEXT: [[TMP13:%.*]] = or i1 [[TMP11]], [[TMP12]]
-; CHECK-VS2-NEXT: br i1 [[TMP13]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]]
+; CHECK-VS2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]]
; CHECK-VS2: [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
; CHECK-VS2-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-VS2-NEXT: [[TMP15:%.*]] = shl nuw i64 [[TMP14]], 3
@@ -205,7 +184,7 @@ define void @low_vf_ic_is_better(ptr nocapture noundef %p, i32 %tc, i16 noundef
; CHECK-VS2-NEXT: [[CMP_N10:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC3]]
; CHECK-VS2-NEXT: br i1 [[CMP_N10]], label %[[WHILE_END_LOOPEXIT]], label %[[VEC_EPILOG_SCALAR_PH]]
; CHECK-VS2: [[VEC_EPILOG_SCALAR_PH]]:
-; CHECK-VS2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP39]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END4]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[TMP0]], %[[VECTOR_SCEVCHECK]] ], [ [[TMP0]], %[[ITER_CHECK]] ]
+; CHECK-VS2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP39]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END4]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[TMP0]], %[[ITER_CHECK]] ]
; CHECK-VS2-NEXT: br label %[[WHILE_BODY:.*]]
; CHECK-VS2: [[WHILE_BODY]]:
; CHECK-VS2-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[WHILE_BODY]] ]
@@ -303,31 +282,139 @@ define void @too_many_runtime_checks(ptr nocapture noundef %p, ptr nocapture nou
; CHECK-SAME: ptr noundef captures(none) [[P:%.*]], ptr noundef captures(none) [[P1:%.*]], ptr noundef readonly captures(none) [[P2:%.*]], ptr noundef readonly captures(none) [[P3:%.*]], i32 noundef [[TC:%.*]], i16 noundef [[VAL:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[CMP20:%.*]] = icmp ult i32 [[TC]], 16
-; CHECK-NEXT: br i1 [[CMP20]], label %[[WHILE_PREHEADER:.*]], label %[[WHILE_END:.*]]
-; CHECK: [[WHILE_PREHEADER]]:
+; CHECK-NEXT: br i1 [[CMP20]], label %[[ITER_CHECK:.*]], label %[[WHILE_END:.*]]
+; CHECK: [[ITER_CHECK]]:
; CHECK-NEXT: [[CONV8:%.*]] = trunc i16 [[VAL]] to i8
; CHECK-NEXT: [[V:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 4
; CHECK-NEXT: [[TMP1:%.*]] = zext nneg i32 [[TC]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[TC]], 1
+; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[TMP2]] to i64
+; CHECK-NEXT: [[TMP4:%.*]] = sub i64 17, [[TMP3]]
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP4]], 4
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
+; CHECK: [[VECTOR_MEMCHECK]]:
+; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[P1]], i64 [[TMP1]]
+; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP1]], 17
+; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[TC]], 1
+; CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
+; CHECK-NEXT: [[TMP8:%.*]] = sub i64 [[TMP5]], [[TMP7]]
+; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[P1]], i64 [[TMP8]]
+; CHECK-NEXT: [[TMP9:%.*]] = add nuw nsw i64 [[TMP1]], 4
+; CHECK-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[P]], i64 [[TMP9]]
+; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[TMP1]], 21
+; CHECK-NEXT: [[TMP11:%.*]] = sub i64 [[TMP10]], [[TMP7]]
+; CHECK-NEXT: [[SCEVGEP3:%.*]] = getelementptr i8, ptr [[P]], i64 [[TMP11]]
+; CHECK-NEXT: [[SCEVGEP4:%.*]] = getelementptr i8, ptr [[P2]], i64 [[TMP1]]
+; CHECK-NEXT: [[SCEVGEP5:%.*]] = getelementptr i8, ptr [[P2]], i64 [[TMP8]]
+; CHECK-NEXT: [[SCEVGEP6:%.*]] = getelementptr i8, ptr [[P3]], i64 [[TMP1]]
+; CHECK-NEXT: [[SCEVGEP7:%.*]] = getelementptr i8, ptr [[P3]], i64 [[TMP8]]
+; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[SCEVGEP]], [[SCEVGEP3]]
+; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[SCEVGEP2]], [[SCEVGEP1]]
+; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
+; CHECK-NEXT: [[BOUND08:%.*]] = icmp ult ptr [[SCEVGEP]], [[SCEVGEP5]]
+; CHECK-NEXT: [[BOUND19:%.*]] = icmp ult ptr [[SCEVGEP4]], [[SCEVGEP1]]
+; CHECK-NEXT: [[FOUND_CONFLICT10:%.*]] = and i1 [[BOUND08]], [[BOUND19]]
+; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT10]]
+; CHECK-NEXT: [[BOUND011:%.*]] = icmp ult ptr [[SCEVGEP]], [[SCEVGEP7]]
+; CHECK-NEXT: [[BOUND112:%.*]] = icmp ult ptr [[SCEVGEP6]], [[SCEVGEP1]]
+; CHECK-NEXT: [[FOUND_CONFLICT13:%.*]] = and i1 [[BOUND011]], [[BOUND112]]
+; CHECK-NEXT: [[CONFLICT_RDX14:%.*]] = or i1 [[CONFLICT_RDX]], [[FOUND_CONFLICT13]]
+; CHECK-NEXT: [[BOUND015:%.*]] = icmp ult ptr [[SCEVGEP2]], [[SCEVGEP5]]
+; CHECK-NEXT: [[BOUND116:%.*]] = icmp ult ptr [[SCEVGEP4]], [[SCEVGEP3]]
+; CHECK-NEXT: [[FOUND_CONFLICT17:%.*]] = and i1 [[BOUND015]], [[BOUND116]]
+; CHECK-NEXT: [[CONFLICT_RDX18:%.*]] = or i1 [[CONFLICT_RDX14]], [[FOUND_CONFLICT17]]
+; CHECK-NEXT: [[BOUND019:%.*]] = icmp ult ptr [[SCEVGEP2]], [[SCEVGEP7]]
+; CHECK-NEXT: [[BOUND120:%.*]] = icmp ult ptr [[SCEVGEP6]], [[SCEVGEP3]]
+; CHECK-NEXT: [[FOUND_CONFLICT21:%.*]] = and i1 [[BOUND019]], [[BOUND120]]
+; CHECK-NEXT: [[CONFLICT_RDX22:%.*]] = or i1 [[CONFLICT_RDX18]], [[FOUND_CONFLICT21]]
+; CHECK-NEXT: br i1 [[CONFLICT_RDX22]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]]
+; CHECK: [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
+; CHECK-NEXT: [[MIN_ITERS_CHECK23:%.*]] = icmp ult i64 [[TMP4]], 16
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK23]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP4]], 16
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP4]], [[N_MOD_VF]]
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[CONV8]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i8> [[BROADCAST_SPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[INDVARS_IV:%.*]] = add i64 [[TMP1]], [[INDEX]]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[P2]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 1, !alias.scope [[META6:![0-9]+]]
+; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw i8, ptr [[P3]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[WIDE_LOAD24:%.*]] = load <16 x i8>, ptr [[TMP14]], align 1, !alias.scope [[META9:![0-9]+]]
+; CHECK-NEXT: [[TMP15:%.*]] = mul <16 x i8> [[WIDE_LOAD24]], [[WIDE_LOAD]]
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw i8, ptr [[P1]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[WIDE_LOAD25:%.*]] = load <16 x i8>, ptr [[TMP16]], align 1, !alias.scope [[META11:![0-9]+]], !noalias [[META13:![0-9]+]]
+; CHECK-NEXT: [[TMP17:%.*]] = add <16 x i8> [[TMP15]], [[WIDE_LOAD25]]
+; CHECK-NEXT: store <16 x i8> [[TMP17]], ptr [[TMP16]], align 1, !alias.scope [[META11]], !noalias [[META13]]
+; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[WIDE_LOAD26:%.*]] = load <16 x i8>, ptr [[TMP18]], align 1, !alias.scope [[META15:![0-9]+]], !noalias [[META16:![0-9]+]]
+; CHECK-NEXT: [[TMP19:%.*]] = add <16 x i8> [[WIDE_LOAD26]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT: store <16 x i8> [[TMP19]], ptr [[TMP18]], align 1, !alias.scope [[META15]], !noalias [[META16]]
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
+; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP4]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[CMP_N]], label %[[WHILE_END_LOOPEXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]]
+; CHECK: [[VEC_EPILOG_ITER_CHECK]]:
+; CHECK-NEXT: [[IND_END:%.*]] = add i64 [[TMP1]], [[N_VEC]]
+; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 4
+; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF18:![0-9]+]]
+; CHECK: [[VEC_EPILOG_PH]]:
+; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
+; CHECK-NEXT: [[N_MOD_VF27:%.*]] = urem i64 [[TMP4]], 4
+; CHECK-NEXT: [[N_VEC28:%.*]] = sub i64 [[TMP4]], [[N_MOD_VF27]]
+; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[TMP1]], [[N_VEC28]]
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT29:%.*]] = insertelement <4 x i8> poison, i8 [[CONV8]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT30:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT29]], <4 x i8> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]]
+; CHECK: [[VEC_EPILOG_VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX31:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT36:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
+; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[TMP1]], [[INDEX31]]
+; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw i8, ptr [[P2]], i64 [[OFFSET_IDX]]
+; CHECK-NEXT: [[WIDE_LOAD32:%.*]] = load <4 x i8>, ptr [[TMP22]], align 1, !alias.scope [[META6]]
+; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw i8, ptr [[P3]], i64 [[OFFSET_IDX]]
+; CHECK-NEXT: [[WIDE_LOAD33:%.*]] = load <4 x i8>, ptr [[TMP23]], align 1, !alias.scope [[META9]]
+; CHECK-NEXT: [[TMP24:%.*]] = mul <4 x i8> [[WIDE_LOAD33]], [[WIDE_LOAD32]]
+; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw i8, ptr [[P1]], i64 [[OFFSET_IDX]]
+; CHECK-NEXT: [[WIDE_LOAD34:%.*]] = load <4 x i8>, ptr [[TMP25]], align 1, !alias.scope [[META11]], !noalias [[META13]]
+; CHECK-NEXT: [[TMP26:%.*]] = add <4 x i8> [[TMP24]], [[WIDE_LOAD34]]
+; CHECK-NEXT: store <4 x i8> [[TMP26]], ptr [[TMP25]], align 1, !alias.scope [[META11]], !noalias [[META13]]
+; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 [[OFFSET_IDX]]
+; CHECK-NEXT: [[WIDE_LOAD35:%.*]] = load <4 x i8>, ptr [[TMP27]], align 1, !alias.scope [[META15]], !noalias [[META16]]
+; CHECK-NEXT: [[TMP28:%.*]] = add <4 x i8> [[WIDE_LOAD35]], [[BROADCAST_SPLAT30]]
+; CHECK-NEXT: store <4 x i8> [[TMP28]], ptr [[TMP27]], align 1, !alias.scope [[META15]], !noalias [[META16]]
+; CHECK-NEXT: [[INDEX_NEXT36]] = add nuw i64 [[INDEX31]], 4
+; CHECK-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT36]], [[N_VEC28]]
+; CHECK-NEXT: br i1 [[TMP29]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]]
+; CHECK: [[VEC_EPILOG_MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[CMP_N37:%.*]] = icmp eq i64 [[TMP4]], [[N_VEC28]]
+; CHECK-NEXT: br i1 [[CMP_N37]], label %[[WHILE_END_LOOPEXIT]], label %[[VEC_EPILOG_SCALAR_PH]]
+; CHECK: [[VEC_EPILOG_SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP21]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[TMP1]], %[[VECTOR_MEMCHECK]] ], [ [[TMP1]], %[[ITER_CHECK]] ]
; CHECK-NEXT: br label %[[WHILE_BODY:.*]]
; CHECK: [[WHILE_BODY]]:
-; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[TMP1]], %[[WHILE_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[WHILE_BODY]] ]
-; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[P2]], i64 [[INDVARS_IV]]
-; CHECK-NEXT: [[TMP60:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
-; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i8, ptr [[P3]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[WHILE_BODY]] ]
+; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw i8, ptr [[P2]], i64 [[IV]]
+; CHECK-NEXT: [[TMP60:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
+; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i8, ptr [[P3]], i64 [[IV]]
; CHECK-NEXT: [[TMP61:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
; CHECK-NEXT: [[MUL:%.*]] = mul i8 [[TMP61]], [[TMP60]]
-; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds nuw i8, ptr [[P1]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds nuw i8, ptr [[P1]], i64 [[IV]]
; CHECK-NEXT: [[TMP62:%.*]] = load i8, ptr [[ARRAYIDX5]], align 1
; CHECK-NEXT: [[ADD:%.*]] = add i8 [[MUL]], [[TMP62]]
; CHECK-NEXT: store i8 [[ADD]], ptr [[ARRAYIDX5]], align 1
-; CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 [[IV]]
; CHECK-NEXT: [[TMP63:%.*]] = load i8, ptr [[ARRAYIDX10]], align 1
; CHECK-NEXT: [[ADD12:%.*]] = add i8 [[TMP63]], [[CONV8]]
; CHECK-NEXT: store i8 [[ADD12]], ptr [[ARRAYIDX10]], align 1
-; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; CHECK-NEXT: [[TMP64:%.*]] = and i64 [[INDVARS_IV_NEXT]], 4294967295
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[TMP64]], 16
-; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[WHILE_END_LOOPEXIT:.*]], label %[[WHILE_BODY]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[WHILE_END_LOOPEXIT]], label %[[WHILE_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
; CHECK: [[WHILE_END_LOOPEXIT]]:
; CHECK-NEXT: br label %[[WHILE_END]]
; CHECK: [[WHILE_END]]:
@@ -380,17 +467,7 @@ define void @overflow_indvar_known_false(ptr nocapture noundef %p, i32 noundef %
; CHECK-NEXT: [[TMP19:%.*]] = add i32 [[TC]], 1
; CHECK-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64
; CHECK-NEXT: [[TMP1:%.*]] = sub i64 1028, [[TMP20]]
-; CHECK-NEXT: br label %[[VECTOR_SCEVCHECK:.*]]
-; CHECK: [[VECTOR_SCEVCHECK]]:
-; CHECK-NEXT: [[TMP21:%.*]] = add i32 [[TC]], 1
-; CHECK-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64
-; CHECK-NEXT: [[TMP23:%.*]] = sub i64 1027, [[TMP22]]
-; CHECK-NEXT: [[TMP24:%.*]] = trunc i64 [[TMP23]] to i32
-; CHECK-NEXT: [[TMP25:%.*]] = add i32 [[TMP21]], [[TMP24]]
-; CHECK-NEXT: [[TMP26:%.*]] = icmp ult i32 [[TMP25]], [[TMP21]]
-; CHECK-NEXT: [[TMP27:%.*]] = icmp ugt i64 [[TMP23]], 4294967295
-; CHECK-NEXT: [[TMP28:%.*]] = or i1 [[TMP26]], [[TMP27]]
-; CHECK-NEXT: br i1 [[TMP28]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 16
@@ -410,21 +487,9 @@ define void @overflow_indvar_known_false(ptr nocapture noundef %p, i32 noundef %
; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[INDEX_NEXT]], i64 [[TMP1]])
; CHECK-NEXT: [[TMP30:%.*]] = extractelement <vscale x 16 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0
; CHECK-NEXT: [[TMP31:%.*]] = xor i1 [[TMP30]], true
-; CHECK-NEXT: br i1 [[TMP31]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP31]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br label %[[WHILE_END_LOOPEXIT:.*]]
-; CHECK: [[SCALAR_PH]]:
-; CHECK-NEXT: br label %[[WHILE_BODY:.*]]
-; CHECK: [[WHILE_BODY]]:
-; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[TMP0]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[WHILE_BODY]] ]
-; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
-; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 [[INDVARS_IV]]
-; CHECK-NEXT: [[TMP18:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
-; CHECK-NEXT: [[ADD:%.*]] = add i8 [[TMP18]], [[CONV]]
-; CHECK-NEXT: store i8 [[ADD]], ptr [[ARRAYIDX]], align 1
-; CHECK-NEXT: [[TMP29:%.*]] = and i64 [[INDVARS_IV_NEXT]], 4294967295
-; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[TMP29]], 1027
-; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[WHILE_END_LOOPEXIT]], label %[[WHILE_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
; CHECK: [[WHILE_END_LOOPEXIT]]:
; CHECK-NEXT: br label %[[WHILE_END]]
; CHECK: [[WHILE_END]]:
@@ -470,7 +535,7 @@ define i32 @tc4(ptr noundef readonly captures(none) %tmp) vscale_range(1,16) {
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[ARRAYIDX1]], align 4
; CHECK-NEXT: [[TMP3]] = add <4 x i32> [[VEC_PHI]], [[WIDE_LOAD]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
+; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP3]])
; CHECK-NEXT: br label %[[EXIT:.*]]
@@ -509,7 +574,7 @@ define i32 @tc4_from_profile(ptr noundef readonly captures(none) %tmp, i64 %N) v
; CHECK-NEXT: [[ADD]] = add i32 [[SUM_0179]], [[TMP0]]
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
-; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]], !prof [[PROF9:![0-9]+]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]], !prof [[PROF23:![0-9]+]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], %[[FOR_BODY]] ]
; CHECK-NEXT: ret i32 [[ADD_LCSSA]]
@@ -543,20 +608,48 @@ exit: ; preds = %for.body
; CHECK-VS1: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
; CHECK-VS1: [[PROF3]] = !{!"branch_weights", i32 8, i32 8}
; CHECK-VS1: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
-; CHECK-VS1: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]]}
-; CHECK-VS1: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]}
-; CHECK-VS1: [[LOOP7]] = distinct !{[[LOOP7]], [[META1]]}
-; CHECK-VS1: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]}
-; CHECK-VS1: [[PROF9]] = !{!"branch_weights", i32 10, i32 30}
+; CHECK-VS1: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]}
+; CHECK-VS1: [[META6]] = !{[[META7:![0-9]+]]}
+; CHECK-VS1: [[META7]] = distinct !{[[META7]], [[META8:![0-9]+]]}
+; CHECK-VS1: [[META8]] = distinct !{[[META8]], !"LVerDomain"}
+; CHECK-VS1: [[META9]] = !{[[META10:![0-9]+]]}
+; CHECK-VS1: [[META10]] = distinct !{[[META10]], [[META8]]}
+; CHECK-VS1: [[META11]] = !{[[META12:![0-9]+]]}
+; CHECK-VS1: [[META12]] = distinct !{[[META12]], [[META8]]}
+; CHECK-VS1: [[META13]] = !{[[META14:![0-9]+]], [[META7]], [[META10]]}
+; CHECK-VS1: [[META14]] = distinct !{[[META14]], [[META8]]}
+; CHECK-VS1: [[META15]] = !{[[META14]]}
+; CHECK-VS1: [[META16]] = !{[[META7]], [[META10]]}
+; CHECK-VS1: [[LOOP17]] = distinct !{[[LOOP17]], [[META1]], [[META2]]}
+; CHECK-VS1: [[PROF18]] = !{!"branch_weights", i32 4, i32 12}
+; CHECK-VS1: [[LOOP19]] = distinct !{[[LOOP19]], [[META1]], [[META2]]}
+; CHECK-VS1: [[LOOP20]] = distinct !{[[LOOP20]], [[META1]]}
+; CHECK-VS1: [[LOOP21]] = distinct !{[[LOOP21]], [[META1]], [[META2]]}
+; CHECK-VS1: [[LOOP22]] = distinct !{[[LOOP22]], [[META1]], [[META2]]}
+; CHECK-VS1: [[PROF23]] = !{!"branch_weights", i32 10, i32 30}
;.
; CHECK-VS2: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
; CHECK-VS2: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
; CHECK-VS2: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
; CHECK-VS2: [[PROF3]] = !{!"branch_weights", i32 8, i32 8}
; CHECK-VS2: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
-; CHECK-VS2: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]]}
-; CHECK-VS2: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]}
-; CHECK-VS2: [[LOOP7]] = distinct !{[[LOOP7]], [[META1]]}
-; CHECK-VS2: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]}
-; CHECK-VS2: [[PROF9]] = !{!"branch_weights", i32 10, i32 30}
+; CHECK-VS2: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]}
+; CHECK-VS2: [[META6]] = !{[[META7:![0-9]+]]}
+; CHECK-VS2: [[META7]] = distinct !{[[META7]], [[META8:![0-9]+]]}
+; CHECK-VS2: [[META8]] = distinct !{[[META8]], !"LVerDomain"}
+; CHECK-VS2: [[META9]] = !{[[META10:![0-9]+]]}
+; CHECK-VS2: [[META10]] = distinct !{[[META10]], [[META8]]}
+; CHECK-VS2: [[META11]] = !{[[META12:![0-9]+]]}
+; CHECK-VS2: [[META12]] = distinct !{[[META12]], [[META8]]}
+; CHECK-VS2: [[META13]] = !{[[META14:![0-9]+]], [[META7]], [[META10]]}
+; CHECK-VS2: [[META14]] = distinct !{[[META14]], [[META8]]}
+; CHECK-VS2: [[META15]] = !{[[META14]]}
+; CHECK-VS2: [[META16]] = !{[[META7]], [[META10]]}
+; CHECK-VS2: [[LOOP17]] = distinct !{[[LOOP17]], [[META1]], [[META2]]}
+; CHECK-VS2: [[PROF18]] = !{!"branch_weights", i32 4, i32 12}
+; CHECK-VS2: [[LOOP19]] = distinct !{[[LOOP19]], [[META1]], [[META2]]}
+; CHECK-VS2: [[LOOP20]] = distinct !{[[LOOP20]], [[META1]]}
+; CHECK-VS2: [[LOOP21]] = distinct !{[[LOOP21]], [[META1]], [[META2]]}
+; CHECK-VS2: [[LOOP22]] = distinct !{[[LOOP22]], [[META1]], [[META2]]}
+; CHECK-VS2: [[PROF23]] = !{!"branch_weights", i32 10, i32 30}
;.
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/predicated-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/predicated-costs.ll
index 14398eebc6674..25d917ff849f1 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/predicated-costs.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/predicated-costs.ll
@@ -15,31 +15,7 @@ define void @test_predicated_load_cast_hint(ptr %dst.1, ptr %dst.2, ptr %src, i8
; CHECK-NEXT: [[TMP0:%.*]] = add nsw i32 [[SMAX16]], -1
; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[TMP0]], 2
; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i32 [[TMP1]], 1
-; CHECK-NEXT: br label %[[VECTOR_SCEVCHECK:.*]]
-; CHECK: [[VECTOR_SCEVCHECK]]:
-; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N_SUB]], i32 4)
-; CHECK-NEXT: [[TMP3:%.*]] = add nsw i32 [[SMAX]], -1
-; CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 2
-; CHECK-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8
-; CHECK-NEXT: [[MUL:%.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 4, i8 [[TMP5]])
-; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i8, i1 } [[MUL]], 0
-; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i8, i1 } [[MUL]], 1
-; CHECK-NEXT: [[TMP6:%.*]] = add i8 4, [[MUL_RESULT]]
-; CHECK-NEXT: [[TMP7:%.*]] = icmp ult i8 [[TMP6]], 4
-; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP7]], [[MUL_OVERFLOW]]
-; CHECK-NEXT: [[TMP9:%.*]] = icmp ugt i32 [[TMP4]], 255
-; CHECK-NEXT: [[TMP10:%.*]] = or i1 [[TMP8]], [[TMP9]]
-; CHECK-NEXT: [[TMP11:%.*]] = shl i64 [[OFF]], 3
-; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DST_1]], i64 [[TMP11]]
-; CHECK-NEXT: [[TMP12:%.*]] = zext i32 [[TMP4]] to i64
-; CHECK-NEXT: [[MUL1:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 512, i64 [[TMP12]])
-; CHECK-NEXT: [[MUL_RESULT2:%.*]] = extractvalue { i64, i1 } [[MUL1]], 0
-; CHECK-NEXT: [[MUL_OVERFLOW3:%.*]] = extractvalue { i64, i1 } [[MUL1]], 1
-; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[SCEVGEP]], i64 [[MUL_RESULT2]]
-; CHECK-NEXT: [[TMP15:%.*]] = icmp ult ptr [[TMP14]], [[SCEVGEP]]
-; CHECK-NEXT: [[TMP16:%.*]] = or i1 [[TMP15]], [[MUL_OVERFLOW3]]
-; CHECK-NEXT: [[TMP17:%.*]] = or i1 [[TMP10]], [[TMP16]]
-; CHECK-NEXT: br i1 [[TMP17]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
+; CHECK-NEXT: br label %[[VECTOR_MEMCHECK:.*]]
; CHECK: [[VECTOR_MEMCHECK]]:
; CHECK-NEXT: [[SCEVGEP4:%.*]] = getelementptr i8, ptr [[DST_2]], i64 1
; CHECK-NEXT: [[SCEVGEP5:%.*]] = getelementptr i8, ptr [[SRC]], i64 1
@@ -64,14 +40,14 @@ define void @test_predicated_load_cast_hint(ptr %dst.1, ptr %dst.2, ptr %src, i8
; CHECK-NEXT: [[BOUND113:%.*]] = icmp ult ptr [[SCEVGEP6]], [[SCEVGEP5]]
; CHECK-NEXT: [[FOUND_CONFLICT14:%.*]] = and i1 [[BOUND012]], [[BOUND113]]
; CHECK-NEXT: [[CONFLICT_RDX15:%.*]] = or i1 [[CONFLICT_RDX]], [[FOUND_CONFLICT14]]
-; CHECK-NEXT: br i1 [[CONFLICT_RDX15]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
+; CHECK-NEXT: br i1 [[CONFLICT_RDX15]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 0, i32 [[TMP2]])
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE22:.*]] ]
-; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <4 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], %[[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[PRED_STORE_CONTINUE22]] ]
-; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i8> [ <i8 0, i8 4, i8 8, i8 12>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE22]] ]
+; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE17:.*]] ]
+; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <4 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], %[[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[PRED_STORE_CONTINUE17]] ]
+; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i8> [ <i8 0, i8 4, i8 8, i8 12>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE17]] ]
; CHECK-NEXT: [[TMP28:%.*]] = load i8, ptr [[SRC]], align 1, !alias.scope [[META0:![0-9]+]], !noalias [[META3:![0-9]+]]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i8> poison, i8 [[TMP28]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT]], <4 x i8> poison, <4 x i32> zeroinitializer
@@ -88,35 +64,35 @@ define void @test_predicated_load_cast_hint(ptr %dst.1, ptr %dst.2, ptr %src, i8
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
; CHECK: [[PRED_STORE_CONTINUE]]:
; CHECK-NEXT: [[TMP32:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 1
-; CHECK-NEXT: br i1 [[TMP32]], label %[[PRED_STORE_IF17:.*]], label %[[PRED_STORE_CONTINUE18:.*]]
-; CHECK: [[PRED_STORE_IF17]]:
+; CHECK-NEXT: br i1 [[TMP32]], label %[[PRED_STORE_IF12:.*]], label %[[PRED_STORE_CONTINUE13:.*]]
+; CHECK: [[PRED_STORE_IF12]]:
; CHECK-NEXT: [[TMP108:%.*]] = extractelement <4 x i64> [[TMP26]], i32 1
; CHECK-NEXT: [[TMP109:%.*]] = getelementptr [16 x i64], ptr [[DST_1]], i64 [[TMP108]], i64 [[OFF]]
; CHECK-NEXT: [[TMP110:%.*]] = extractelement <4 x i64> [[TMP25]], i32 1
; CHECK-NEXT: [[TMP111:%.*]] = or i64 [[TMP110]], 1
; CHECK-NEXT: store i64 [[TMP111]], ptr [[TMP109]], align 8, !alias.scope [[META3]]
-; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE18]]
-; CHECK: [[PRED_STORE_CONTINUE18]]:
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE13]]
+; CHECK: [[PRED_STORE_CONTINUE13]]:
; CHECK-NEXT: [[TMP37:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 2
-; CHECK-NEXT: br i1 [[TMP37]], label %[[PRED_STORE_IF19:.*]], label %[[PRED_STORE_CONTINUE20:.*]]
-; CHECK: [[PRED_STORE_IF19]]:
+; CHECK-NEXT: br i1 [[TMP37]], label %[[PRED_STORE_IF14:.*]], label %[[PRED_STORE_CONTINUE15:.*]]
+; CHECK: [[PRED_STORE_IF14]]:
; CHECK-NEXT: [[TMP114:%.*]] = extractelement <4 x i64> [[TMP26]], i32 2
; CHECK-NEXT: [[TMP115:%.*]] = getelementptr [16 x i64], ptr [[DST_1]], i64 [[TMP114]], i64 [[OFF]]
; CHECK-NEXT: [[TMP116:%.*]] = extractelement <4 x i64> [[TMP25]], i32 2
; CHECK-NEXT: [[TMP117:%.*]] = or i64 [[TMP116]], 1
; CHECK-NEXT: store i64 [[TMP117]], ptr [[TMP115]], align 8, !alias.scope [[META3]]
-; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE20]]
-; CHECK: [[PRED_STORE_CONTINUE20]]:
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE15]]
+; CHECK: [[PRED_STORE_CONTINUE15]]:
; CHECK-NEXT: [[TMP42:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 3
-; CHECK-NEXT: br i1 [[TMP42]], label %[[PRED_STORE_IF21:.*]], label %[[PRED_STORE_CONTINUE22]]
-; CHECK: [[PRED_STORE_IF21]]:
+; CHECK-NEXT: br i1 [[TMP42]], label %[[PRED_STORE_IF16:.*]], label %[[PRED_STORE_CONTINUE17]]
+; CHECK: [[PRED_STORE_IF16]]:
; CHECK-NEXT: [[TMP120:%.*]] = extractelement <4 x i64> [[TMP26]], i32 3
; CHECK-NEXT: [[TMP121:%.*]] = getelementptr [16 x i64], ptr [[DST_1]], i64 [[TMP120]], i64 [[OFF]]
; CHECK-NEXT: [[TMP122:%.*]] = extractelement <4 x i64> [[TMP25]], i32 3
; CHECK-NEXT: [[TMP123:%.*]] = or i64 [[TMP122]], 1
; CHECK-NEXT: store i64 [[TMP123]], ptr [[TMP121]], align 8, !alias.scope [[META3]]
-; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE22]]
-; CHECK: [[PRED_STORE_CONTINUE22]]:
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE17]]
+; CHECK: [[PRED_STORE_CONTINUE17]]:
; CHECK-NEXT: store i8 0, ptr [[DST_2]], align 1, !alias.scope [[META5:![0-9]+]], !noalias [[META7:![0-9]+]]
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4
; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[INDEX_NEXT]], i32 [[TMP2]])
@@ -127,10 +103,9 @@ define void @test_predicated_load_cast_hint(ptr %dst.1, ptr %dst.2, ptr %src, i8
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br label %[[EXIT:.*]]
; CHECK: [[SCALAR_PH]]:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ 0, %[[VECTOR_SCEVCHECK]] ], [ 0, %[[VECTOR_MEMCHECK]] ]
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
-; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[IV:%.*]] = phi i8 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[L:%.*]] = load i8, ptr [[SRC]], align 1
; CHECK-NEXT: [[L_EXT:%.*]] = zext i8 [[L]] to i64
; CHECK-NEXT: [[ADD:%.*]] = or i64 [[L_EXT]], 1
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll b/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll
index 66211bd0353d4..2ce0eed8dca5c 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll
@@ -386,17 +386,8 @@ define i32 @diff_exit_block_needs_scev_check(i32 %end) {
; CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[END]] to i10
; CHECK-NEXT: [[TMP1:%.*]] = zext i10 [[TMP0]] to i64
; CHECK-NEXT: [[UMAX1:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 1)
-; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[UMAX1]], 8
-; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
-; CHECK: vector.scevcheck:
-; CHECK-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[END_CLAMPED]], i32 1)
-; CHECK-NEXT: [[TMP2:%.*]] = add nsw i32 [[UMAX]], -1
-; CHECK-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP2]] to i8
-; CHECK-NEXT: [[TMP4:%.*]] = add i8 1, [[TMP3]]
-; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i8 [[TMP4]], 1
-; CHECK-NEXT: [[TMP6:%.*]] = icmp ugt i32 [[TMP2]], 255
-; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]]
-; CHECK-NEXT: br i1 [[TMP7]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; CHECK-NEXT: [[TMP7:%.*]] = icmp ult i64 [[UMAX1]], 4
+; CHECK-NEXT: br i1 [[TMP7]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[UMAX1]], 4
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[UMAX1]], [[N_MOD_VF]]
@@ -423,8 +414,8 @@ define i32 @diff_exit_block_needs_scev_check(i32 %end) {
; CHECK: vector.early.exit:
; CHECK-NEXT: br label [[FOUND:%.*]]
; CHECK: scalar.ph:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[TMP8]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
-; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[TMP8]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[IND:%.*]] = phi i8 [ [[IND_NEXT:%.*]], [[FOR_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
@@ -538,5 +529,5 @@ attributes #1 = { "target-features"="+sve" vscale_range(1,16) }
; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]}
; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]}
; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META1]], [[META2]]}
-; CHECK: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]]}
+; CHECK: [[LOOP10]] = distinct !{[[LOOP10]], [[META2]], [[META1]]}
;.
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-cost.ll
index 6eb8242bf7975..eb2147a2b42a3 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-cost.ll
@@ -148,23 +148,7 @@ define void @test_interleave_store_one_constant(ptr noalias %src, ptr noalias %d
; CHECK-NEXT: [[ITER_CHECK:.*]]:
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 2
-; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
-; CHECK: [[VECTOR_SCEVCHECK]]:
-; CHECK-NEXT: [[MUL1:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 [[N]])
-; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i64, i1 } [[MUL1]], 0
-; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i64, i1 } [[MUL1]], 1
-; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[DST]], i64 [[MUL_RESULT]]
-; CHECK-NEXT: [[TMP3:%.*]] = icmp ult ptr [[TMP2]], [[DST]]
-; CHECK-NEXT: [[TMP4:%.*]] = or i1 [[TMP3]], [[MUL_OVERFLOW]]
-; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DST]], i64 8
-; CHECK-NEXT: [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 [[N]])
-; CHECK-NEXT: [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0
-; CHECK-NEXT: [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1
-; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[SCEVGEP]], i64 [[MUL_RESULT3]]
-; CHECK-NEXT: [[TMP7:%.*]] = icmp ult ptr [[TMP6]], [[SCEVGEP]]
-; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP7]], [[MUL_OVERFLOW4]]
-; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP4]], [[TMP8]]
-; CHECK-NEXT: br i1 [[TMP9]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]]
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]]
; CHECK: [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
; CHECK-NEXT: [[MIN_ITERS_CHECK5:%.*]] = icmp ult i64 [[TMP0]], 8
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK5]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]]
@@ -235,7 +219,7 @@ define void @test_interleave_store_one_constant(ptr noalias %src, ptr noalias %d
; CHECK-NEXT: [[CMP_N18:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC13]]
; CHECK-NEXT: br i1 [[CMP_N18]], label %[[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]]
; CHECK: [[VEC_EPILOG_SCALAR_PH]]:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC13]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_SCEVCHECK]] ], [ 0, %[[ITER_CHECK]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC13]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ]
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
@@ -278,38 +262,7 @@ define void @single_fmul_used_by_each_member(ptr noalias %A, ptr noalias %B, ptr
; CHECK-NEXT: [[ITER_CHECK:.*]]:
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 2
-; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
-; CHECK: [[VECTOR_SCEVCHECK]]:
-; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[B]], i64 8
-; CHECK-NEXT: [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 [[N]])
-; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i64, i1 } [[MUL]], 0
-; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i64, i1 } [[MUL]], 1
-; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[SCEVGEP]], i64 [[MUL_RESULT]]
-; CHECK-NEXT: [[TMP3:%.*]] = icmp ult ptr [[TMP2]], [[SCEVGEP]]
-; CHECK-NEXT: [[TMP4:%.*]] = or i1 [[TMP3]], [[MUL_OVERFLOW]]
-; CHECK-NEXT: [[MUL1:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 [[N]])
-; CHECK-NEXT: [[MUL_RESULT2:%.*]] = extractvalue { i64, i1 } [[MUL1]], 0
-; CHECK-NEXT: [[MUL_OVERFLOW3:%.*]] = extractvalue { i64, i1 } [[MUL1]], 1
-; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[B]], i64 [[MUL_RESULT2]]
-; CHECK-NEXT: [[TMP7:%.*]] = icmp ult ptr [[TMP6]], [[B]]
-; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP7]], [[MUL_OVERFLOW3]]
-; CHECK-NEXT: [[SCEVGEP4:%.*]] = getelementptr i8, ptr [[C]], i64 8
-; CHECK-NEXT: [[MUL5:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 [[N]])
-; CHECK-NEXT: [[MUL_RESULT6:%.*]] = extractvalue { i64, i1 } [[MUL5]], 0
-; CHECK-NEXT: [[MUL_OVERFLOW7:%.*]] = extractvalue { i64, i1 } [[MUL5]], 1
-; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[SCEVGEP4]], i64 [[MUL_RESULT6]]
-; CHECK-NEXT: [[TMP11:%.*]] = icmp ult ptr [[TMP10]], [[SCEVGEP4]]
-; CHECK-NEXT: [[TMP12:%.*]] = or i1 [[TMP11]], [[MUL_OVERFLOW7]]
-; CHECK-NEXT: [[MUL8:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 [[N]])
-; CHECK-NEXT: [[MUL_RESULT9:%.*]] = extractvalue { i64, i1 } [[MUL8]], 0
-; CHECK-NEXT: [[MUL_OVERFLOW10:%.*]] = extractvalue { i64, i1 } [[MUL8]], 1
-; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[C]], i64 [[MUL_RESULT9]]
-; CHECK-NEXT: [[TMP15:%.*]] = icmp ult ptr [[TMP14]], [[C]]
-; CHECK-NEXT: [[TMP16:%.*]] = or i1 [[TMP15]], [[MUL_OVERFLOW10]]
-; CHECK-NEXT: [[TMP17:%.*]] = or i1 [[TMP4]], [[TMP8]]
-; CHECK-NEXT: [[TMP18:%.*]] = or i1 [[TMP17]], [[TMP12]]
-; CHECK-NEXT: [[TMP19:%.*]] = or i1 [[TMP18]], [[TMP16]]
-; CHECK-NEXT: br i1 [[TMP19]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]]
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]]
; CHECK: [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
; CHECK-NEXT: [[MIN_ITERS_CHECK11:%.*]] = icmp ult i64 [[TMP0]], 8
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK11]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]]
@@ -390,7 +343,7 @@ define void @single_fmul_used_by_each_member(ptr noalias %A, ptr noalias %B, ptr
; CHECK-NEXT: [[CMP_N26:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC23]]
; CHECK-NEXT: br i1 [[CMP_N26]], label %[[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]]
; CHECK: [[VEC_EPILOG_SCALAR_PH]]:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC23]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_SCEVCHECK]] ], [ 0, %[[ITER_CHECK]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC23]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ]
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization.ll
index f1fbf1dd5d942..46461ecfeb5bc 100644
--- a/llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization.ll
+++ b/llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization.ll
@@ -237,16 +237,7 @@ define void @f2(ptr noalias %A, ptr noalias %B, i32 %n) {
; VF-TWO-CHECK-NEXT: [[ITER_CHECK:.*:]]
; VF-TWO-CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64
; VF-TWO-CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 2
-; VF-TWO-CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
-; VF-TWO-CHECK: [[VECTOR_SCEVCHECK]]:
-; VF-TWO-CHECK-NEXT: [[TMP1:%.*]] = add nsw i64 [[WIDE_TRIP_COUNT]], -1
-; VF-TWO-CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1
-; VF-TWO-CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32
-; VF-TWO-CHECK-NEXT: [[TMP3:%.*]] = sub i32 [[TMP0]], [[TMP2]]
-; VF-TWO-CHECK-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP3]], [[TMP0]]
-; VF-TWO-CHECK-NEXT: [[TMP6:%.*]] = icmp ugt i64 [[TMP1]], 4294967295
-; VF-TWO-CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP4]], [[TMP6]]
-; VF-TWO-CHECK-NEXT: br i1 [[TMP7]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]]
+; VF-TWO-CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]]
; VF-TWO-CHECK: [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
; VF-TWO-CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 32
; VF-TWO-CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]]
@@ -360,16 +351,7 @@ define void @f2(ptr noalias %A, ptr noalias %B, i32 %n) {
; VF-FOUR-CHECK-NEXT: [[ITER_CHECK:.*:]]
; VF-FOUR-CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64
; VF-FOUR-CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 4
-; VF-FOUR-CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
-; VF-FOUR-CHECK: [[VECTOR_SCEVCHECK]]:
-; VF-FOUR-CHECK-NEXT: [[TMP1:%.*]] = add nsw i64 [[WIDE_TRIP_COUNT]], -1
-; VF-FOUR-CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1
-; VF-FOUR-CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32
-; VF-FOUR-CHECK-NEXT: [[TMP3:%.*]] = sub i32 [[TMP0]], [[TMP2]]
-; VF-FOUR-CHECK-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP3]], [[TMP0]]
-; VF-FOUR-CHECK-NEXT: [[TMP6:%.*]] = icmp ugt i64 [[TMP1]], 4294967295
-; VF-FOUR-CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP4]], [[TMP6]]
-; VF-FOUR-CHECK-NEXT: br i1 [[TMP7]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]]
+; VF-FOUR-CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]]
; VF-FOUR-CHECK: [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
; VF-FOUR-CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 32
; VF-FOUR-CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/vplan-scalarivsext-crash.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/vplan-scalarivsext-crash.ll
index 34b8deaa8de03..22516009d86e2 100644
--- a/llvm/test/Transforms/LoopVectorize/PowerPC/vplan-scalarivsext-crash.ll
+++ b/llvm/test/Transforms/LoopVectorize/PowerPC/vplan-scalarivsext-crash.ll
@@ -16,16 +16,7 @@ define void @test_iv_trunc_crash(ptr %a, ptr %b, i32 %n) {
; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[SMAX1]] to i32
; CHECK-NEXT: [[TMP3:%.*]] = add nuw i32 [[TMP2]], 1
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP3]], 8
-; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
-; CHECK: [[VECTOR_SCEVCHECK]]:
-; CHECK-NEXT: [[TMP4:%.*]] = sext i32 [[N]] to i64
-; CHECK-NEXT: [[TMP5:%.*]] = add nsw i64 [[TMP4]], 1
-; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[TMP5]], i64 0)
-; CHECK-NEXT: [[TMP6:%.*]] = trunc i64 [[SMAX]] to i32
-; CHECK-NEXT: [[TMP7:%.*]] = icmp slt i32 [[TMP6]], 0
-; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[SMAX]], 4294967295
-; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]]
-; CHECK-NEXT: br i1 [[TMP9]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP3]], 8
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i32 [[N_MOD_VF]], 0
@@ -49,12 +40,12 @@ define void @test_iv_trunc_crash(ptr %a, ptr %b, i32 %n) {
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br label %[[SCALAR_PH]]
; CHECK: [[SCALAR_PH]]:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi double [ [[TMP13]], %[[MIDDLE_BLOCK]] ], [ [[SUM_0]], %[[ENTRY]] ], [ [[SUM_0]], %[[VECTOR_SCEVCHECK]] ]
-; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi double [ [[TMP13]], %[[MIDDLE_BLOCK]] ], [ [[SUM_0]], %[[ENTRY]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i32 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
; CHECK: [[LOOP_HEADER]]:
; CHECK-NEXT: [[SUM_1:%.*]] = phi double [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[SUM_NEXT:%.*]], %[[LOOP_BODY:.*]] ]
-; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL3]], %[[SCALAR_PH]] ], [ [[I_NEXT:%.*]], %[[LOOP_BODY]] ]
+; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL2]], %[[SCALAR_PH]] ], [ [[I_NEXT:%.*]], %[[LOOP_BODY]] ]
; CHECK-NEXT: [[COND:%.*]] = icmp sgt i32 [[I]], [[N]]
; CHECK-NEXT: br i1 [[COND]], label %[[EXIT:.*]], label %[[LOOP_BODY]]
; CHECK: [[LOOP_BODY]]:
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/gather-scatter-cost.ll b/llvm/test/Transforms/LoopVectorize/RISCV/gather-scatter-cost.ll
index 36ebd422b5d7b..f5140766bc68d 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/gather-scatter-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/gather-scatter-cost.ll
@@ -11,22 +11,13 @@ define void @predicated_uniform_load(ptr %src, i32 %n, ptr %dst, i1 %cond) {
; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[SMAX2]] to i32
; CHECK-NEXT: [[TMP3:%.*]] = add nuw i32 [[TMP2]], 1
; CHECK-NEXT: br label [[VECTOR_SCEVCHECK:%.*]]
-; CHECK: vector.scevcheck:
-; CHECK-NEXT: [[TMP4:%.*]] = sext i32 [[IBOX]] to i64
-; CHECK-NEXT: [[TMP5:%.*]] = add nsw i64 [[TMP4]], 1
-; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[TMP5]], i64 0)
-; CHECK-NEXT: [[TMP6:%.*]] = trunc i64 [[SMAX]] to i32
-; CHECK-NEXT: [[TMP7:%.*]] = icmp slt i32 [[TMP6]], 0
-; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[SMAX]], 4294967295
-; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]]
-; CHECK-NEXT: br i1 [[TMP9]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
; CHECK: vector.memcheck:
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[NBRBOXES:%.*]], i64 4
; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[BOXES:%.*]], i64 4
; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[NBRBOXES]], [[SCEVGEP1]]
; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[BOXES]], [[SCEVGEP]]
; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
-; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 4 x i1> poison, i1 [[COND:%.*]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT1:%.*]] = shufflevector <vscale x 4 x i1> [[BROADCAST_SPLATINSERT1]], <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
@@ -48,10 +39,9 @@ define void @predicated_uniform_load(ptr %src, i32 %n, ptr %dst, i1 %cond) {
; CHECK: middle.block:
; CHECK-NEXT: br label [[EXIT:%.*]]
; CHECK: scalar.ph:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
-; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
; CHECK-NEXT: br i1 [[COND]], label [[LOOP_THEN:%.*]], label [[LOOP_ELSE:%.*]]
; CHECK: loop.then:
; CHECK-NEXT: br label [[LOOP_LATCH]]
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll b/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll
index f49f9284a1e93..210e2074138a0 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll
@@ -17,25 +17,10 @@ define void @foo4(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea
; RV32-LABEL: @foo4(
; RV32-NEXT: entry:
; RV32-NEXT: br label [[VECTOR_MEMCHECK:%.*]]
-; RV32: vector.scevcheck:
-; RV32-NEXT: [[MUL:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 128, i32 624)
-; RV32-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL]], 0
-; RV32-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL]], 1
-; RV32-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A:%.*]], i32 [[MUL_RESULT]]
-; RV32-NEXT: [[TMP1:%.*]] = icmp ult ptr [[TMP0]], [[A]]
-; RV32-NEXT: [[TMP2:%.*]] = or i1 [[TMP1]], [[MUL_OVERFLOW]]
-; RV32-NEXT: [[MUL1:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 256, i32 624)
-; RV32-NEXT: [[MUL_RESULT2:%.*]] = extractvalue { i32, i1 } [[MUL1]], 0
-; RV32-NEXT: [[MUL_OVERFLOW3:%.*]] = extractvalue { i32, i1 } [[MUL1]], 1
-; RV32-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[B:%.*]], i32 [[MUL_RESULT2]]
-; RV32-NEXT: [[TMP4:%.*]] = icmp ult ptr [[TMP3]], [[B]]
-; RV32-NEXT: [[TMP5:%.*]] = or i1 [[TMP4]], [[MUL_OVERFLOW3]]
-; RV32-NEXT: [[TMP6:%.*]] = or i1 [[TMP2]], [[TMP5]]
-; RV32-NEXT: br i1 [[TMP6]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK1:%.*]]
; RV32: vector.memcheck:
; RV32-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[TRIGGER:%.*]], i32 39940
-; RV32-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A]], i32 79880
-; RV32-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[B]], i32 159752
+; RV32-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i32 79880
+; RV32-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[B:%.*]], i32 159752
; RV32-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[TRIGGER]], [[SCEVGEP]]
; RV32-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[A]], [[SCEVGEP1]]
; RV32-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
@@ -43,7 +28,7 @@ define void @foo4(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea
; RV32-NEXT: [[BOUND14:%.*]] = icmp ult ptr [[B]], [[SCEVGEP]]
; RV32-NEXT: [[FOUND_CONFLICT5:%.*]] = and i1 [[BOUND03]], [[BOUND14]]
; RV32-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT5]]
-; RV32-NEXT: br i1 [[CONFLICT_RDX]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; RV32-NEXT: br i1 [[CONFLICT_RDX]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; RV32: vector.ph:
; RV32-NEXT: [[TMP7:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
; RV32-NEXT: [[TMP9:%.*]] = mul nuw nsw <vscale x 2 x i64> [[TMP7]], splat (i64 16)
@@ -74,10 +59,9 @@ define void @foo4(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea
; RV32: middle.block:
; RV32-NEXT: br label [[FOR_END:%.*]]
; RV32: scalar.ph:
-; RV32-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[VECTOR_MEMCHECK1]] ]
; RV32-NEXT: br label [[FOR_BODY:%.*]]
; RV32: for.body:
-; RV32-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ]
+; RV32-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ]
; RV32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[INDVARS_IV]]
; RV32-NEXT: [[TMP21:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
; RV32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP21]], 100
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll
index f7340fee47eb8..6a14592fb37bd 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll
@@ -186,23 +186,14 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur
; RV64-NEXT: br i1 [[CMP7]], label %[[FOR_BODY_PREHEADER:.*]], label %[[FOR_COND_CLEANUP:.*]]
; RV64: [[FOR_BODY_PREHEADER]]:
; RV64-NEXT: [[TMP0:%.*]] = zext i32 [[N]] to i64
-; RV64-NEXT: br label %[[VECTOR_SCEVCHECK:.*]]
-; RV64: [[VECTOR_SCEVCHECK]]:
-; RV64-NEXT: [[TMP3:%.*]] = add nsw i64 [[TMP0]], -1
-; RV64-NEXT: [[TMP4:%.*]] = add i32 [[N]], -1
-; RV64-NEXT: [[TMP5:%.*]] = trunc i64 [[TMP3]] to i32
-; RV64-NEXT: [[TMP6:%.*]] = sub i32 [[TMP4]], [[TMP5]]
-; RV64-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[TMP6]], [[TMP4]]
-; RV64-NEXT: [[TMP9:%.*]] = icmp ugt i64 [[TMP3]], 4294967295
-; RV64-NEXT: [[TMP8:%.*]] = or i1 [[TMP7]], [[TMP9]]
-; RV64-NEXT: br i1 [[TMP8]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
+; RV64-NEXT: br label %[[VECTOR_MEMCHECK:.*]]
; RV64: [[VECTOR_MEMCHECK]]:
; RV64-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64()
; RV64-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 4
; RV64-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 4
; RV64-NEXT: [[TMP14:%.*]] = sub i64 [[B1]], [[A2]]
; RV64-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP14]], [[TMP13]]
-; RV64-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
+; RV64-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; RV64: [[VECTOR_PH]]:
; RV64-NEXT: br label %[[VECTOR_BODY:.*]]
; RV64: [[VECTOR_BODY]]:
@@ -240,8 +231,6 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur
; RV64: [[MIDDLE_BLOCK]]:
; RV64-NEXT: br label %[[FOR_COND_CLEANUP_LOOPEXIT:.*]]
; RV64: [[SCALAR_PH]]:
-; RV64-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP0]], %[[VECTOR_SCEVCHECK]] ], [ [[TMP0]], %[[VECTOR_MEMCHECK]] ]
-; RV64-NEXT: [[BC_RESUME_VAL4:%.*]] = phi i32 [ [[N]], %[[VECTOR_SCEVCHECK]] ], [ [[N]], %[[VECTOR_MEMCHECK]] ]
; RV64-NEXT: br label %[[FOR_BODY:.*]]
; RV64: [[FOR_COND_CLEANUP_LOOPEXIT]]:
; RV64-NEXT: br label %[[FOR_COND_CLEANUP]]
@@ -320,16 +309,7 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur
; RV64-UF2-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
; RV64-UF2-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP1]], 3
; RV64-UF2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]]
-; RV64-UF2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
-; RV64-UF2: [[VECTOR_SCEVCHECK]]:
-; RV64-UF2-NEXT: [[TMP3:%.*]] = add nsw i64 [[TMP0]], -1
-; RV64-UF2-NEXT: [[TMP4:%.*]] = add i32 [[N]], -1
-; RV64-UF2-NEXT: [[TMP5:%.*]] = trunc i64 [[TMP3]] to i32
-; RV64-UF2-NEXT: [[TMP6:%.*]] = sub i32 [[TMP4]], [[TMP5]]
-; RV64-UF2-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[TMP6]], [[TMP4]]
-; RV64-UF2-NEXT: [[TMP9:%.*]] = icmp ugt i64 [[TMP3]], 4294967295
-; RV64-UF2-NEXT: [[TMP10:%.*]] = or i1 [[TMP7]], [[TMP9]]
-; RV64-UF2-NEXT: br i1 [[TMP10]], label %[[SCALAR_PH]], label %[[VECTOR_MEMCHECK:.*]]
+; RV64-UF2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
; RV64-UF2: [[VECTOR_MEMCHECK]]:
; RV64-UF2-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64()
; RV64-UF2-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 4
@@ -392,8 +372,8 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur
; RV64-UF2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[TMP20]]
; RV64-UF2-NEXT: br i1 [[CMP_N]], label %[[FOR_COND_CLEANUP_LOOPEXIT:.*]], label %[[SCALAR_PH]]
; RV64-UF2: [[SCALAR_PH]]:
-; RV64-UF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP48]], %[[MIDDLE_BLOCK]] ], [ [[TMP0]], %[[FOR_BODY_PREHEADER]] ], [ [[TMP0]], %[[VECTOR_SCEVCHECK]] ], [ [[TMP0]], %[[VECTOR_MEMCHECK]] ]
-; RV64-UF2-NEXT: [[BC_RESUME_VAL8:%.*]] = phi i32 [ [[TMP21]], %[[MIDDLE_BLOCK]] ], [ [[N]], %[[FOR_BODY_PREHEADER]] ], [ [[N]], %[[VECTOR_SCEVCHECK]] ], [ [[N]], %[[VECTOR_MEMCHECK]] ]
+; RV64-UF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP48]], %[[MIDDLE_BLOCK]] ], [ [[TMP0]], %[[FOR_BODY_PREHEADER]] ], [ [[TMP0]], %[[VECTOR_MEMCHECK]] ]
+; RV64-UF2-NEXT: [[BC_RESUME_VAL8:%.*]] = phi i32 [ [[TMP21]], %[[MIDDLE_BLOCK]] ], [ [[N]], %[[FOR_BODY_PREHEADER]] ], [ [[N]], %[[VECTOR_MEMCHECK]] ]
; RV64-UF2-NEXT: br label %[[FOR_BODY:.*]]
; RV64-UF2: [[FOR_COND_CLEANUP_LOOPEXIT]]:
; RV64-UF2-NEXT: br label %[[FOR_COND_CLEANUP]]
@@ -437,23 +417,14 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur
; RV64-NEXT: br i1 [[CMP7]], label %[[FOR_BODY_PREHEADER:.*]], label %[[FOR_COND_CLEANUP:.*]]
; RV64: [[FOR_BODY_PREHEADER]]:
; RV64-NEXT: [[TMP0:%.*]] = zext i32 [[N]] to i64
-; RV64-NEXT: br label %[[VECTOR_SCEVCHECK:.*]]
-; RV64: [[VECTOR_SCEVCHECK]]:
-; RV64-NEXT: [[TMP3:%.*]] = add nsw i64 [[TMP0]], -1
-; RV64-NEXT: [[TMP4:%.*]] = add i32 [[N]], -1
-; RV64-NEXT: [[TMP5:%.*]] = trunc i64 [[TMP3]] to i32
-; RV64-NEXT: [[TMP6:%.*]] = sub i32 [[TMP4]], [[TMP5]]
-; RV64-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[TMP6]], [[TMP4]]
-; RV64-NEXT: [[TMP9:%.*]] = icmp ugt i64 [[TMP3]], 4294967295
-; RV64-NEXT: [[TMP8:%.*]] = or i1 [[TMP7]], [[TMP9]]
-; RV64-NEXT: br i1 [[TMP8]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
+; RV64-NEXT: br label %[[VECTOR_MEMCHECK:.*]]
; RV64: [[VECTOR_MEMCHECK]]:
; RV64-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64()
; RV64-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 4
; RV64-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 4
; RV64-NEXT: [[TMP14:%.*]] = sub i64 [[B1]], [[A2]]
; RV64-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP14]], [[TMP13]]
-; RV64-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
+; RV64-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; RV64: [[VECTOR_PH]]:
; RV64-NEXT: br label %[[VECTOR_BODY:.*]]
; RV64: [[VECTOR_BODY]]:
@@ -491,8 +462,6 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur
; RV64: [[MIDDLE_BLOCK]]:
; RV64-NEXT: br label %[[FOR_COND_CLEANUP_LOOPEXIT:.*]]
; RV64: [[SCALAR_PH]]:
-; RV64-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP0]], %[[VECTOR_SCEVCHECK]] ], [ [[TMP0]], %[[VECTOR_MEMCHECK]] ]
-; RV64-NEXT: [[BC_RESUME_VAL4:%.*]] = phi i32 [ [[N]], %[[VECTOR_SCEVCHECK]] ], [ [[N]], %[[VECTOR_MEMCHECK]] ]
; RV64-NEXT: br label %[[FOR_BODY:.*]]
; RV64: [[FOR_COND_CLEANUP_LOOPEXIT]]:
; RV64-NEXT: br label %[[FOR_COND_CLEANUP]]
@@ -571,16 +540,7 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur
; RV64-UF2-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
; RV64-UF2-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP1]], 3
; RV64-UF2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]]
-; RV64-UF2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
-; RV64-UF2: [[VECTOR_SCEVCHECK]]:
-; RV64-UF2-NEXT: [[TMP3:%.*]] = add nsw i64 [[TMP0]], -1
-; RV64-UF2-NEXT: [[TMP4:%.*]] = add i32 [[N]], -1
-; RV64-UF2-NEXT: [[TMP5:%.*]] = trunc i64 [[TMP3]] to i32
-; RV64-UF2-NEXT: [[TMP6:%.*]] = sub i32 [[TMP4]], [[TMP5]]
-; RV64-UF2-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[TMP6]], [[TMP4]]
-; RV64-UF2-NEXT: [[TMP9:%.*]] = icmp ugt i64 [[TMP3]], 4294967295
-; RV64-UF2-NEXT: [[TMP10:%.*]] = or i1 [[TMP7]], [[TMP9]]
-; RV64-UF2-NEXT: br i1 [[TMP10]], label %[[SCALAR_PH]], label %[[VECTOR_MEMCHECK:.*]]
+; RV64-UF2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
; RV64-UF2: [[VECTOR_MEMCHECK]]:
; RV64-UF2-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64()
; RV64-UF2-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 4
@@ -643,8 +603,8 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur
; RV64-UF2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[TMP20]]
; RV64-UF2-NEXT: br i1 [[CMP_N]], label %[[FOR_COND_CLEANUP_LOOPEXIT:.*]], label %[[SCALAR_PH]]
; RV64-UF2: [[SCALAR_PH]]:
-; RV64-UF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP48]], %[[MIDDLE_BLOCK]] ], [ [[TMP0]], %[[FOR_BODY_PREHEADER]] ], [ [[TMP0]], %[[VECTOR_SCEVCHECK]] ], [ [[TMP0]], %[[VECTOR_MEMCHECK]] ]
-; RV64-UF2-NEXT: [[BC_RESUME_VAL8:%.*]] = phi i32 [ [[TMP21]], %[[MIDDLE_BLOCK]] ], [ [[N]], %[[FOR_BODY_PREHEADER]] ], [ [[N]], %[[VECTOR_SCEVCHECK]] ], [ [[N]], %[[VECTOR_MEMCHECK]] ]
+; RV64-UF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP48]], %[[MIDDLE_BLOCK]] ], [ [[TMP0]], %[[FOR_BODY_PREHEADER]] ], [ [[TMP0]], %[[VECTOR_MEMCHECK]] ]
+; RV64-UF2-NEXT: [[BC_RESUME_VAL8:%.*]] = phi i32 [ [[TMP21]], %[[MIDDLE_BLOCK]] ], [ [[N]], %[[FOR_BODY_PREHEADER]] ], [ [[N]], %[[VECTOR_MEMCHECK]] ]
; RV64-UF2-NEXT: br label %[[FOR_BODY:.*]]
; RV64-UF2: [[FOR_COND_CLEANUP_LOOPEXIT]]:
; RV64-UF2-NEXT: br label %[[FOR_COND_CLEANUP]]
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll
index 96e1ebb48b02c..a978075431e81 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll
@@ -727,44 +727,15 @@ define void @double_stride_int_scaled(ptr %p, ptr %p2, i64 %stride) {
; STRIDED-LABEL: @double_stride_int_scaled(
; STRIDED-NEXT: entry:
; STRIDED-NEXT: br label [[VECTOR_SCEVCHECK:%.*]]
-; STRIDED: vector.scevcheck:
-; STRIDED-NEXT: [[TMP24:%.*]] = shl i64 [[STRIDE:%.*]], 2
-; STRIDED-NEXT: [[TMP25:%.*]] = mul i64 [[STRIDE]], -4
-; STRIDED-NEXT: [[TMP26:%.*]] = icmp slt i64 [[TMP24]], 0
-; STRIDED-NEXT: [[TMP27:%.*]] = select i1 [[TMP26]], i64 [[TMP25]], i64 [[TMP24]]
-; STRIDED-NEXT: [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[TMP27]], i64 1023)
-; STRIDED-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i64, i1 } [[MUL]], 0
-; STRIDED-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i64, i1 } [[MUL]], 1
-; STRIDED-NEXT: [[TMP28:%.*]] = sub i64 0, [[MUL_RESULT]]
-; STRIDED-NEXT: [[TMP29:%.*]] = getelementptr i8, ptr [[P2:%.*]], i64 [[MUL_RESULT]]
-; STRIDED-NEXT: [[TMP30:%.*]] = getelementptr i8, ptr [[P2]], i64 [[TMP28]]
-; STRIDED-NEXT: [[TMP31:%.*]] = icmp ult ptr [[TMP29]], [[P2]]
-; STRIDED-NEXT: [[TMP32:%.*]] = icmp ugt ptr [[TMP30]], [[P2]]
-; STRIDED-NEXT: [[TMP33:%.*]] = select i1 [[TMP26]], i1 [[TMP32]], i1 [[TMP31]]
-; STRIDED-NEXT: [[TMP13:%.*]] = or i1 [[TMP33]], [[MUL_OVERFLOW]]
-; STRIDED-NEXT: [[TMP34:%.*]] = icmp slt i64 [[TMP24]], 0
-; STRIDED-NEXT: [[TMP15:%.*]] = select i1 [[TMP34]], i64 [[TMP25]], i64 [[TMP24]]
-; STRIDED-NEXT: [[MUL1:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[TMP15]], i64 1023)
-; STRIDED-NEXT: [[MUL_RESULT2:%.*]] = extractvalue { i64, i1 } [[MUL1]], 0
-; STRIDED-NEXT: [[MUL_OVERFLOW3:%.*]] = extractvalue { i64, i1 } [[MUL1]], 1
-; STRIDED-NEXT: [[TMP16:%.*]] = sub i64 0, [[MUL_RESULT2]]
-; STRIDED-NEXT: [[TMP35:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 [[MUL_RESULT2]]
-; STRIDED-NEXT: [[TMP36:%.*]] = getelementptr i8, ptr [[P]], i64 [[TMP16]]
-; STRIDED-NEXT: [[TMP37:%.*]] = icmp ult ptr [[TMP35]], [[P]]
-; STRIDED-NEXT: [[TMP38:%.*]] = icmp ugt ptr [[TMP36]], [[P]]
-; STRIDED-NEXT: [[TMP39:%.*]] = select i1 [[TMP34]], i1 [[TMP38]], i1 [[TMP37]]
-; STRIDED-NEXT: [[TMP40:%.*]] = or i1 [[TMP39]], [[MUL_OVERFLOW3]]
-; STRIDED-NEXT: [[TMP23:%.*]] = or i1 [[TMP13]], [[TMP40]]
-; STRIDED-NEXT: br i1 [[TMP23]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK1:%.*]]
; STRIDED: vector.memcheck:
-; STRIDED-NEXT: [[TMP3:%.*]] = mul i64 [[STRIDE]], 4092
-; STRIDED-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[P2]], i64 [[TMP3]]
+; STRIDED-NEXT: [[TMP3:%.*]] = mul i64 [[STRIDE:%.*]], 4092
+; STRIDED-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[P2:%.*]], i64 [[TMP3]]
; STRIDED-NEXT: [[TMP4:%.*]] = icmp ult ptr [[P2]], [[SCEVGEP]]
; STRIDED-NEXT: [[UMIN:%.*]] = select i1 [[TMP4]], ptr [[P2]], ptr [[SCEVGEP]]
; STRIDED-NEXT: [[TMP5:%.*]] = icmp ugt ptr [[P2]], [[SCEVGEP]]
; STRIDED-NEXT: [[UMAX:%.*]] = select i1 [[TMP5]], ptr [[P2]], ptr [[SCEVGEP]]
; STRIDED-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[UMAX]], i64 4
-; STRIDED-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[P]], i64 [[TMP3]]
+; STRIDED-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 [[TMP3]]
; STRIDED-NEXT: [[TMP6:%.*]] = icmp ult ptr [[P]], [[SCEVGEP2]]
; STRIDED-NEXT: [[UMIN3:%.*]] = select i1 [[TMP6]], ptr [[P]], ptr [[SCEVGEP2]]
; STRIDED-NEXT: [[TMP7:%.*]] = icmp ugt ptr [[P]], [[SCEVGEP2]]
@@ -773,7 +744,7 @@ define void @double_stride_int_scaled(ptr %p, ptr %p2, i64 %stride) {
; STRIDED-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[UMIN]], [[SCEVGEP5]]
; STRIDED-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[UMIN3]], [[SCEVGEP1]]
; STRIDED-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
-; STRIDED-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; STRIDED-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; STRIDED: vector.ph:
; STRIDED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[STRIDE]], i64 0
; STRIDED-NEXT: [[BROADCAST_SPLAT1:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT1]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
@@ -801,10 +772,9 @@ define void @double_stride_int_scaled(ptr %p, ptr %p2, i64 %stride) {
; STRIDED: middle.block:
; STRIDED-NEXT: br label [[EXIT:%.*]]
; STRIDED: scalar.ph:
-; STRIDED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[VECTOR_MEMCHECK1]] ]
; STRIDED-NEXT: br label [[LOOP:%.*]]
; STRIDED: loop:
-; STRIDED-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
+; STRIDED-NEXT: [[I:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
; STRIDED-NEXT: [[OFFSET:%.*]] = mul nuw nsw i64 [[I]], [[STRIDE]]
; STRIDED-NEXT: [[Q0:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET]]
; STRIDED-NEXT: [[X0:%.*]] = load i32, ptr [[Q0]], align 4
@@ -821,47 +791,18 @@ define void @double_stride_int_scaled(ptr %p, ptr %p2, i64 %stride) {
; STRIDED-UF2-NEXT: entry:
; STRIDED-UF2-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; STRIDED-UF2-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3
-; STRIDED-UF2-NEXT: [[UMAX9:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 79)
+; STRIDED-UF2-NEXT: [[UMAX9:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 24)
; STRIDED-UF2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[UMAX9]]
; STRIDED-UF2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
-; STRIDED-UF2: vector.scevcheck:
-; STRIDED-UF2-NEXT: [[TMP2:%.*]] = shl i64 [[STRIDE:%.*]], 2
-; STRIDED-UF2-NEXT: [[TMP3:%.*]] = mul i64 [[STRIDE]], -4
-; STRIDED-UF2-NEXT: [[TMP4:%.*]] = icmp slt i64 [[TMP2]], 0
-; STRIDED-UF2-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i64 [[TMP3]], i64 [[TMP2]]
-; STRIDED-UF2-NEXT: [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[TMP5]], i64 1023)
-; STRIDED-UF2-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i64, i1 } [[MUL]], 0
-; STRIDED-UF2-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i64, i1 } [[MUL]], 1
-; STRIDED-UF2-NEXT: [[TMP6:%.*]] = sub i64 0, [[MUL_RESULT]]
-; STRIDED-UF2-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[P2:%.*]], i64 [[MUL_RESULT]]
-; STRIDED-UF2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[P2]], i64 [[TMP6]]
-; STRIDED-UF2-NEXT: [[TMP9:%.*]] = icmp ult ptr [[TMP7]], [[P2]]
-; STRIDED-UF2-NEXT: [[TMP10:%.*]] = icmp ugt ptr [[TMP8]], [[P2]]
-; STRIDED-UF2-NEXT: [[TMP11:%.*]] = select i1 [[TMP4]], i1 [[TMP10]], i1 [[TMP9]]
-; STRIDED-UF2-NEXT: [[TMP12:%.*]] = or i1 [[TMP11]], [[MUL_OVERFLOW]]
-; STRIDED-UF2-NEXT: [[TMP13:%.*]] = icmp slt i64 [[TMP2]], 0
-; STRIDED-UF2-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i64 [[TMP3]], i64 [[TMP2]]
-; STRIDED-UF2-NEXT: [[MUL1:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[TMP14]], i64 1023)
-; STRIDED-UF2-NEXT: [[MUL_RESULT2:%.*]] = extractvalue { i64, i1 } [[MUL1]], 0
-; STRIDED-UF2-NEXT: [[MUL_OVERFLOW3:%.*]] = extractvalue { i64, i1 } [[MUL1]], 1
-; STRIDED-UF2-NEXT: [[TMP15:%.*]] = sub i64 0, [[MUL_RESULT2]]
-; STRIDED-UF2-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 [[MUL_RESULT2]]
-; STRIDED-UF2-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[P]], i64 [[TMP15]]
-; STRIDED-UF2-NEXT: [[TMP18:%.*]] = icmp ult ptr [[TMP16]], [[P]]
-; STRIDED-UF2-NEXT: [[TMP19:%.*]] = icmp ugt ptr [[TMP17]], [[P]]
-; STRIDED-UF2-NEXT: [[TMP20:%.*]] = select i1 [[TMP13]], i1 [[TMP19]], i1 [[TMP18]]
-; STRIDED-UF2-NEXT: [[TMP21:%.*]] = or i1 [[TMP20]], [[MUL_OVERFLOW3]]
-; STRIDED-UF2-NEXT: [[TMP22:%.*]] = or i1 [[TMP12]], [[TMP21]]
-; STRIDED-UF2-NEXT: br i1 [[TMP22]], label [[SCALAR_PH]], label [[VECTOR_MEMCHECK:%.*]]
; STRIDED-UF2: vector.memcheck:
-; STRIDED-UF2-NEXT: [[TMP23:%.*]] = mul i64 [[STRIDE]], 4092
-; STRIDED-UF2-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[P2]], i64 [[TMP23]]
+; STRIDED-UF2-NEXT: [[TMP23:%.*]] = mul i64 [[STRIDE:%.*]], 4092
+; STRIDED-UF2-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[P2:%.*]], i64 [[TMP23]]
; STRIDED-UF2-NEXT: [[TMP24:%.*]] = icmp ult ptr [[P2]], [[SCEVGEP]]
; STRIDED-UF2-NEXT: [[UMIN:%.*]] = select i1 [[TMP24]], ptr [[P2]], ptr [[SCEVGEP]]
; STRIDED-UF2-NEXT: [[TMP25:%.*]] = icmp ugt ptr [[P2]], [[SCEVGEP]]
; STRIDED-UF2-NEXT: [[UMAX:%.*]] = select i1 [[TMP25]], ptr [[P2]], ptr [[SCEVGEP]]
; STRIDED-UF2-NEXT: [[SCEVGEP4:%.*]] = getelementptr i8, ptr [[UMAX]], i64 4
-; STRIDED-UF2-NEXT: [[SCEVGEP5:%.*]] = getelementptr i8, ptr [[P]], i64 [[TMP23]]
+; STRIDED-UF2-NEXT: [[SCEVGEP5:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 [[TMP23]]
; STRIDED-UF2-NEXT: [[TMP26:%.*]] = icmp ult ptr [[P]], [[SCEVGEP5]]
; STRIDED-UF2-NEXT: [[UMIN6:%.*]] = select i1 [[TMP26]], ptr [[P]], ptr [[SCEVGEP5]]
; STRIDED-UF2-NEXT: [[TMP27:%.*]] = icmp ugt ptr [[P]], [[SCEVGEP5]]
@@ -909,7 +850,7 @@ define void @double_stride_int_scaled(ptr %p, ptr %p2, i64 %stride) {
; STRIDED-UF2-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
; STRIDED-UF2-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
; STRIDED-UF2: scalar.ph:
-; STRIDED-UF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ]
+; STRIDED-UF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
; STRIDED-UF2-NEXT: br label [[LOOP:%.*]]
; STRIDED-UF2: loop:
; STRIDED-UF2-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll b/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll
index 801f910c5e13d..9e0f6bb20feb6 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll
@@ -334,24 +334,8 @@ define void @multi_exit(ptr %dst, ptr %src.1, ptr %src.2, i64 %A, i64 %B) #0 {
; CHECK-NEXT: [[TMP1:%.*]] = freeze i64 [[TMP0]]
; CHECK-NEXT: [[UMIN10:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP1]], i64 [[A:%.*]])
; CHECK-NEXT: [[TMP2:%.*]] = add nuw i64 [[UMIN10]], 1
-; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 [[TMP2]], 24
-; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
-; CHECK: vector.scevcheck:
-; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[B]], i64 1)
-; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[UMAX]], -1
-; CHECK-NEXT: [[TMP4:%.*]] = freeze i64 [[TMP3]]
-; CHECK-NEXT: [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP4]], i64 [[A]])
-; CHECK-NEXT: [[TMP5:%.*]] = trunc i64 [[UMIN]] to i32
-; CHECK-NEXT: [[TMP6:%.*]] = add i32 1, [[TMP5]]
-; CHECK-NEXT: [[TMP7:%.*]] = icmp ult i32 [[TMP6]], 1
-; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[UMIN]], 4294967295
-; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]]
-; CHECK-NEXT: [[TMP10:%.*]] = trunc i64 [[UMIN]] to i32
-; CHECK-NEXT: [[TMP11:%.*]] = icmp slt i32 [[TMP10]], 0
-; CHECK-NEXT: [[TMP12:%.*]] = icmp ugt i64 [[UMIN]], 4294967295
-; CHECK-NEXT: [[TMP13:%.*]] = or i1 [[TMP11]], [[TMP12]]
-; CHECK-NEXT: [[TMP14:%.*]] = or i1 [[TMP9]], [[TMP13]]
-; CHECK-NEXT: br i1 [[TMP14]], label [[SCALAR_PH]], label [[VECTOR_MEMCHECK:%.*]]
+; CHECK-NEXT: [[TMP14:%.*]] = icmp ule i64 [[TMP2]], 12
+; CHECK-NEXT: br i1 [[TMP14]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
; CHECK: vector.memcheck:
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DST:%.*]], i64 1
; CHECK-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[SRC_2:%.*]], i64 8
@@ -398,8 +382,8 @@ define void @multi_exit(ptr %dst, ptr %src.1, ptr %src.2, i64 %A, i64 %B) #0 {
; CHECK: middle.block:
; CHECK-NEXT: br label [[SCALAR_PH]]
; CHECK: scalar.ph:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ]
-; CHECK-NEXT: [[BC_RESUME_VAL11:%.*]] = phi i32 [ [[TMP22]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL11:%.*]] = phi i32 [ [[TMP22]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_MEMCHECK]] ]
; CHECK-NEXT: br label [[LOOP1:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV_1_WIDE:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_1_NEXT_WIDE:%.*]], [[LOOP_LATCH:%.*]] ]
@@ -822,10 +806,7 @@ define i32 @g(i64 %n) {
; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[N:%.*]] to i32
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[TMP0]], 1
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 4
-; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
-; CHECK: vector.scevcheck:
-; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt i64 [[N]], 4294967295
-; CHECK-NEXT: br i1 [[TMP2]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
; CHECK: vector.main.loop.iter.check:
; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i32 [[TMP1]], 16
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -904,8 +885,8 @@ define i32 @g(i64 %n) {
; CHECK-NEXT: [[CMP_N16:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC8]]
; CHECK-NEXT: br i1 [[CMP_N16]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
; CHECK: vec.epilog.scalar.ph:
-; CHECK-NEXT: [[BC_RESUME_VAL17:%.*]] = phi i32 [ [[N_VEC8]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ITER_CHECK:%.*]] ]
-; CHECK-NEXT: [[BC_MERGE_RDX18:%.*]] = phi i32 [ [[TMP27]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP20]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ITER_CHECK]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL17:%.*]] = phi i32 [ [[N_VEC8]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ]
+; CHECK-NEXT: [[BC_MERGE_RDX18:%.*]] = phi i32 [ [[TMP27]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP20]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL17]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/X86/divs-with-tail-folding.ll b/llvm/test/Transforms/LoopVectorize/X86/divs-with-tail-folding.ll
index 62fda4e0c2098..613bd0ab290a1 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/divs-with-tail-folding.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/divs-with-tail-folding.ll
@@ -6,14 +6,7 @@ define void @sdiv_feeding_gep(ptr %dst, i32 %x, i64 %M, i64 %conv6, i64 %N) {
; CHECK-SAME: ptr [[DST:%.*]], i32 [[X:%.*]], i64 [[M:%.*]], i64 [[CONV6:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[CONV61:%.*]] = zext i32 [[X]] to i64
-; CHECK-NEXT: br label %[[VECTOR_SCEVCHECK:.*]]
-; CHECK: [[VECTOR_SCEVCHECK]]:
-; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1
-; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
-; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i32 [[TMP1]], 0
-; CHECK-NEXT: [[TMP3:%.*]] = icmp ugt i64 [[TMP0]], 4294967295
-; CHECK-NEXT: [[TMP4:%.*]] = or i1 [[TMP2]], [[TMP3]]
-; CHECK-NEXT: br i1 [[TMP4]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], 3
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 4
@@ -43,23 +36,6 @@ define void @sdiv_feeding_gep(ptr %dst, i32 %x, i64 %M, i64 %conv6, i64 %N) {
; CHECK-NEXT: br i1 [[TMP28]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br label %[[EXIT:.*]]
-; CHECK: [[SCALAR_PH]]:
-; CHECK-NEXT: br label %[[LOOP:.*]]
-; CHECK: [[LOOP]]:
-; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
-; CHECK-NEXT: [[DIV18:%.*]] = sdiv i64 [[M]], [[CONV6]]
-; CHECK-NEXT: [[CONV20:%.*]] = trunc i64 [[DIV18]] to i32
-; CHECK-NEXT: [[MUL30:%.*]] = mul i64 [[DIV18]], [[CONV61]]
-; CHECK-NEXT: [[SUB31:%.*]] = sub i64 [[IV]], [[MUL30]]
-; CHECK-NEXT: [[CONV34:%.*]] = trunc i64 [[SUB31]] to i32
-; CHECK-NEXT: [[MUL35:%.*]] = mul i32 [[X]], [[CONV20]]
-; CHECK-NEXT: [[ADD36:%.*]] = add i32 [[MUL35]], [[CONV34]]
-; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[ADD36]] to i64
-; CHECK-NEXT: [[GEP:%.*]] = getelementptr double, ptr [[DST]], i64 [[IDXPROM]]
-; CHECK-NEXT: store double 0.000000e+00, ptr [[GEP]], align 8
-; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
-; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
-; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
@@ -92,14 +68,7 @@ define void @sdiv_feeding_gep_predicated(ptr %dst, i32 %x, i64 %M, i64 %conv6, i
; CHECK-SAME: ptr [[DST:%.*]], i32 [[X:%.*]], i64 [[M:%.*]], i64 [[CONV6:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[CONV61:%.*]] = zext i32 [[X]] to i64
-; CHECK-NEXT: br label %[[VECTOR_SCEVCHECK:.*]]
-; CHECK: [[VECTOR_SCEVCHECK]]:
-; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1
-; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
-; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i32 [[TMP1]], 0
-; CHECK-NEXT: [[TMP3:%.*]] = icmp ugt i64 [[TMP0]], 4294967295
-; CHECK-NEXT: [[TMP4:%.*]] = or i1 [[TMP2]], [[TMP3]]
-; CHECK-NEXT: br i1 [[TMP4]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], 3
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 4
@@ -153,31 +122,9 @@ define void @sdiv_feeding_gep_predicated(ptr %dst, i32 %x, i64 %M, i64 %conv6, i
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[TMP5]], 4
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP30]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP30]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br label %[[EXIT:.*]]
-; CHECK: [[SCALAR_PH]]:
-; CHECK-NEXT: br label %[[LOOP:.*]]
-; CHECK: [[LOOP]]:
-; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
-; CHECK-NEXT: [[C:%.*]] = icmp ule i64 [[IV]], [[M]]
-; CHECK-NEXT: br i1 [[C]], label %[[THEN:.*]], label %[[LOOP_LATCH]]
-; CHECK: [[THEN]]:
-; CHECK-NEXT: [[DIV18:%.*]] = sdiv i64 [[M]], [[CONV6]]
-; CHECK-NEXT: [[CONV20:%.*]] = trunc i64 [[DIV18]] to i32
-; CHECK-NEXT: [[MUL30:%.*]] = mul i64 [[DIV18]], [[CONV61]]
-; CHECK-NEXT: [[SUB31:%.*]] = sub i64 [[IV]], [[MUL30]]
-; CHECK-NEXT: [[CONV34:%.*]] = trunc i64 [[SUB31]] to i32
-; CHECK-NEXT: [[MUL35:%.*]] = mul i32 [[X]], [[CONV20]]
-; CHECK-NEXT: [[ADD36:%.*]] = add i32 [[MUL35]], [[CONV34]]
-; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[ADD36]] to i64
-; CHECK-NEXT: [[GEP:%.*]] = getelementptr double, ptr [[DST]], i64 [[IDXPROM]]
-; CHECK-NEXT: store double 0.000000e+00, ptr [[GEP]], align 8
-; CHECK-NEXT: br label %[[LOOP_LATCH]]
-; CHECK: [[LOOP_LATCH]]:
-; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
-; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
-; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
@@ -277,7 +224,5 @@ exit:
; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
-; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]]}
-; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
-; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]]}
+; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]], [[META2]]}
;.
diff --git a/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll b/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll
index a1b92e0658bd3..d5a6f3389176e 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll
@@ -12,15 +12,7 @@ define void @test_pr59459(i64 %iv.start, ptr %arr) {
; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 4
-; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
-; CHECK: vector.scevcheck:
-; CHECK-NEXT: [[TMP4:%.*]] = trunc i64 [[IV_START]] to i32
-; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP4]], i32 92)
-; CHECK-NEXT: [[TMP5:%.*]] = sub i32 [[SMAX]], [[TMP4]]
-; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[TMP4]], -1
-; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[TMP6]], [[TMP5]]
-; CHECK-NEXT: [[TMP8:%.*]] = icmp ult i32 [[TMP7]], [[TMP6]]
-; CHECK-NEXT: br i1 [[TMP8]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
; CHECK: vector.main.loop.iter.check:
; CHECK-NEXT: [[MIN_ITERS_CHECK2:%.*]] = icmp ult i64 [[TMP3]], 16
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK2]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -87,7 +79,7 @@ define void @test_pr59459(i64 %iv.start, ptr %arr) {
; CHECK-NEXT: [[CMP_N16:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC4]]
; CHECK-NEXT: br i1 [[CMP_N16]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
; CHECK: vec.epilog.scalar.ph:
-; CHECK-NEXT: [[BC_RESUME_VAL7:%.*]] = phi i64 [ [[IND_END5]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END6]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[IV_START]], [[VECTOR_SCEVCHECK]] ], [ [[IV_START]], [[ITER_CHECK:%.*]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL7:%.*]] = phi i64 [ [[IND_END5]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END6]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[IV_START]], [[ITER_CHECK:%.*]] ]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL7]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll b/llvm/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll
index 08d39ea038586..62f30de45fa60 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll
@@ -25,8 +25,6 @@ define void @foo(ptr nocapture %a, ptr nocapture %b, i32 %k, i32 %m) #0 {
; CHECK-NEXT: [[CMP27:%.*]] = icmp sgt i32 [[M:%.*]], 0
; CHECK-NEXT: br i1 [[CMP27]], label [[FOR_BODY3_LR_PH_US_PREHEADER:%.*]], label [[FOR_END15:%.*]]
; CHECK: for.body3.lr.ph.us.preheader:
-; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[M]], -1
-; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[K:%.*]] to i64
; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[M]] to i64
; CHECK-NEXT: br label [[FOR_BODY3_LR_PH_US:%.*]]
; CHECK: for.end.us:
@@ -53,17 +51,11 @@ define void @foo(ptr nocapture %a, ptr nocapture %b, i32 %k, i32 %m) #0 {
; CHECK-NEXT: br i1 [[EXITCOND32]], label [[FOR_END_US:%.*]], label [[FOR_BODY3_US]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK: for.body3.lr.ph.us:
; CHECK-NEXT: [[INDVARS_IV33]] = phi i64 [ [[INDVARS_IV_NEXT34]], [[FOR_END_US]] ], [ 0, [[FOR_BODY3_LR_PH_US_PREHEADER]] ]
-; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[TMP1]], [[INDVARS_IV33]]
-; CHECK-NEXT: [[TMP7:%.*]] = trunc i64 [[TMP6]] to i32
; CHECK-NEXT: [[TMP8:%.*]] = trunc i64 [[INDVARS_IV33]] to i32
-; CHECK-NEXT: [[ADD_US]] = add i32 [[TMP8]], [[K]]
+; CHECK-NEXT: [[ADD_US]] = add i32 [[TMP8]], [[K:%.*]]
; CHECK-NEXT: [[ARRAYIDX7_US]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV33]]
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 4
-; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH]], label [[VECTOR_SCEVCHECK:%.*]]
-; CHECK: vector.scevcheck:
-; CHECK-NEXT: [[TMP9:%.*]] = add i32 [[TMP7]], [[TMP0]]
-; CHECK-NEXT: [[TMP10:%.*]] = icmp slt i32 [[TMP9]], [[TMP7]]
-; CHECK-NEXT: br i1 [[TMP10]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 4
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
@@ -80,12 +72,12 @@ define void @foo(ptr nocapture %a, ptr nocapture %b, i32 %k, i32 %m) #0 {
; CHECK-NEXT: store i32 [[TMP18]], ptr [[ARRAYIDX7_US]], align 4, !llvm.mem.parallel_loop_access [[META0]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[OFFSET_IDX]], 4
; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_US]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
-; CHECK-NEXT: [[BC_RESUME_VAL]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY3_LR_PH_US]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY3_LR_PH_US]] ]
; CHECK-NEXT: br label [[FOR_BODY3_US]]
; CHECK: for.end15.loopexit:
; CHECK-NEXT: br label [[FOR_END15]]
diff --git a/llvm/test/Transforms/LoopVectorize/X86/interleave-cost.ll b/llvm/test/Transforms/LoopVectorize/X86/interleave-cost.ll
index 5eeebf2009a62..20c4b76f0feb6 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/interleave-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/interleave-cost.ll
@@ -82,17 +82,8 @@ define void @geps_feeding_interleave_groups_with_reuse(ptr %arg, i64 %arg1, ptr
; CHECK-SAME: ptr [[ARG:%.*]], i64 [[ARG1:%.*]], ptr [[ARG2:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[ARG1]], 1
-; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 18
-; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
-; CHECK: [[VECTOR_SCEVCHECK]]:
-; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[ARG]], i64 16
-; CHECK-NEXT: [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 32, i64 [[ARG1]])
-; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i64, i1 } [[MUL]], 0
-; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i64, i1 } [[MUL]], 1
-; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[SCEVGEP]], i64 [[MUL_RESULT]]
-; CHECK-NEXT: [[TMP3:%.*]] = icmp ult ptr [[TMP2]], [[SCEVGEP]]
-; CHECK-NEXT: [[TMP4:%.*]] = or i1 [[TMP3]], [[MUL_OVERFLOW]]
-; CHECK-NEXT: br i1 [[TMP4]], label %[[SCALAR_PH]], label %[[VECTOR_MEMCHECK:.*]]
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 8
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
; CHECK: [[VECTOR_MEMCHECK]]:
; CHECK-NEXT: [[TMP20:%.*]] = shl i64 [[ARG1]], 4
; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[TMP20]], 16
@@ -143,7 +134,7 @@ define void @geps_feeding_interleave_groups_with_reuse(ptr %arg, i64 %arg1, ptr
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK: [[SCALAR_PH]]:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ], [ 0, %[[VECTOR_MEMCHECK]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_MEMCHECK]] ]
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
@@ -242,88 +233,8 @@ define void @geps_feeding_interleave_groups_with_reuse2(ptr %A, ptr %B, i64 %N)
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[N]], 3
; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[TMP0]], 1
-; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 [[TMP1]], 56
-; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
-; CHECK: [[VECTOR_SCEVCHECK]]:
-; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[N]], 3
-; CHECK-NEXT: [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 32, i64 [[TMP2]])
-; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i64, i1 } [[MUL]], 0
-; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i64, i1 } [[MUL]], 1
-; CHECK-NEXT: [[TMP32:%.*]] = getelementptr i8, ptr [[A]], i64 [[MUL_RESULT]]
-; CHECK-NEXT: [[TMP41:%.*]] = icmp ult ptr [[TMP32]], [[A]]
-; CHECK-NEXT: [[TMP44:%.*]] = or i1 [[TMP41]], [[MUL_OVERFLOW]]
-; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A]], i64 4
-; CHECK-NEXT: [[MUL1:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 32, i64 [[TMP2]])
-; CHECK-NEXT: [[MUL_RESULT2:%.*]] = extractvalue { i64, i1 } [[MUL1]], 0
-; CHECK-NEXT: [[MUL_OVERFLOW3:%.*]] = extractvalue { i64, i1 } [[MUL1]], 1
-; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[SCEVGEP]], i64 [[MUL_RESULT2]]
-; CHECK-NEXT: [[TMP5:%.*]] = icmp ult ptr [[TMP4]], [[SCEVGEP]]
-; CHECK-NEXT: [[TMP57:%.*]] = or i1 [[TMP5]], [[MUL_OVERFLOW3]]
-; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[A]], i64 8
-; CHECK-NEXT: [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 32, i64 [[TMP2]])
-; CHECK-NEXT: [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0
-; CHECK-NEXT: [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1
-; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[SCEVGEP1]], i64 [[MUL_RESULT3]]
-; CHECK-NEXT: [[TMP9:%.*]] = icmp ult ptr [[TMP8]], [[SCEVGEP1]]
-; CHECK-NEXT: [[TMP10:%.*]] = or i1 [[TMP9]], [[MUL_OVERFLOW4]]
-; CHECK-NEXT: [[SCEVGEP5:%.*]] = getelementptr i8, ptr [[A]], i64 12
-; CHECK-NEXT: [[MUL6:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 32, i64 [[TMP2]])
-; CHECK-NEXT: [[MUL_RESULT7:%.*]] = extractvalue { i64, i1 } [[MUL6]], 0
-; CHECK-NEXT: [[MUL_OVERFLOW8:%.*]] = extractvalue { i64, i1 } [[MUL6]], 1
-; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[SCEVGEP5]], i64 [[MUL_RESULT7]]
-; CHECK-NEXT: [[TMP13:%.*]] = icmp ult ptr [[TMP12]], [[SCEVGEP5]]
-; CHECK-NEXT: [[TMP14:%.*]] = or i1 [[TMP13]], [[MUL_OVERFLOW8]]
-; CHECK-NEXT: [[SCEVGEP9:%.*]] = getelementptr i8, ptr [[A]], i64 16
-; CHECK-NEXT: [[MUL10:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 32, i64 [[TMP2]])
-; CHECK-NEXT: [[MUL_RESULT11:%.*]] = extractvalue { i64, i1 } [[MUL10]], 0
-; CHECK-NEXT: [[MUL_OVERFLOW12:%.*]] = extractvalue { i64, i1 } [[MUL10]], 1
-; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[SCEVGEP9]], i64 [[MUL_RESULT11]]
-; CHECK-NEXT: [[TMP17:%.*]] = icmp ult ptr [[TMP16]], [[SCEVGEP9]]
-; CHECK-NEXT: [[TMP18:%.*]] = or i1 [[TMP17]], [[MUL_OVERFLOW12]]
-; CHECK-NEXT: [[SCEVGEP13:%.*]] = getelementptr i8, ptr [[A]], i64 20
-; CHECK-NEXT: [[MUL14:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 32, i64 [[TMP2]])
-; CHECK-NEXT: [[MUL_RESULT15:%.*]] = extractvalue { i64, i1 } [[MUL14]], 0
-; CHECK-NEXT: [[MUL_OVERFLOW16:%.*]] = extractvalue { i64, i1 } [[MUL14]], 1
-; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[SCEVGEP13]], i64 [[MUL_RESULT15]]
-; CHECK-NEXT: [[TMP21:%.*]] = icmp ult ptr [[TMP20]], [[SCEVGEP13]]
-; CHECK-NEXT: [[TMP22:%.*]] = or i1 [[TMP21]], [[MUL_OVERFLOW16]]
-; CHECK-NEXT: [[SCEVGEP17:%.*]] = getelementptr i8, ptr [[A]], i64 24
-; CHECK-NEXT: [[MUL18:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 32, i64 [[TMP2]])
-; CHECK-NEXT: [[MUL_RESULT19:%.*]] = extractvalue { i64, i1 } [[MUL18]], 0
-; CHECK-NEXT: [[MUL_OVERFLOW20:%.*]] = extractvalue { i64, i1 } [[MUL18]], 1
-; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[SCEVGEP17]], i64 [[MUL_RESULT19]]
-; CHECK-NEXT: [[TMP25:%.*]] = icmp ult ptr [[TMP24]], [[SCEVGEP17]]
-; CHECK-NEXT: [[TMP26:%.*]] = or i1 [[TMP25]], [[MUL_OVERFLOW20]]
-; CHECK-NEXT: [[SCEVGEP21:%.*]] = getelementptr i8, ptr [[A]], i64 28
-; CHECK-NEXT: [[MUL22:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 32, i64 [[TMP2]])
-; CHECK-NEXT: [[MUL_RESULT23:%.*]] = extractvalue { i64, i1 } [[MUL22]], 0
-; CHECK-NEXT: [[MUL_OVERFLOW24:%.*]] = extractvalue { i64, i1 } [[MUL22]], 1
-; CHECK-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr [[SCEVGEP21]], i64 [[MUL_RESULT23]]
-; CHECK-NEXT: [[TMP29:%.*]] = icmp ult ptr [[TMP28]], [[SCEVGEP21]]
-; CHECK-NEXT: [[TMP30:%.*]] = or i1 [[TMP29]], [[MUL_OVERFLOW24]]
-; CHECK-NEXT: [[SCEVGEP31:%.*]] = getelementptr i8, ptr [[B]], i64 4
-; CHECK-NEXT: [[MUL29:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 [[TMP2]])
-; CHECK-NEXT: [[MUL_RESULT30:%.*]] = extractvalue { i64, i1 } [[MUL29]], 0
-; CHECK-NEXT: [[MUL_OVERFLOW31:%.*]] = extractvalue { i64, i1 } [[MUL29]], 1
-; CHECK-NEXT: [[TMP68:%.*]] = getelementptr i8, ptr [[SCEVGEP31]], i64 [[MUL_RESULT30]]
-; CHECK-NEXT: [[TMP69:%.*]] = icmp ult ptr [[TMP68]], [[SCEVGEP31]]
-; CHECK-NEXT: [[TMP70:%.*]] = or i1 [[TMP69]], [[MUL_OVERFLOW31]]
-; CHECK-NEXT: [[MUL25:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 32, i64 [[TMP2]])
-; CHECK-NEXT: [[MUL_RESULT26:%.*]] = extractvalue { i64, i1 } [[MUL25]], 0
-; CHECK-NEXT: [[MUL_OVERFLOW27:%.*]] = extractvalue { i64, i1 } [[MUL25]], 1
-; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i8, ptr [[B]], i64 [[MUL_RESULT26]]
-; CHECK-NEXT: [[TMP33:%.*]] = icmp ult ptr [[TMP71]], [[B]]
-; CHECK-NEXT: [[TMP34:%.*]] = or i1 [[TMP33]], [[MUL_OVERFLOW27]]
-; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP44]], [[TMP57]]
-; CHECK-NEXT: [[TMP35:%.*]] = or i1 [[TMP6]], [[TMP10]]
-; CHECK-NEXT: [[TMP36:%.*]] = or i1 [[TMP35]], [[TMP14]]
-; CHECK-NEXT: [[TMP37:%.*]] = or i1 [[TMP36]], [[TMP18]]
-; CHECK-NEXT: [[TMP38:%.*]] = or i1 [[TMP37]], [[TMP22]]
-; CHECK-NEXT: [[TMP39:%.*]] = or i1 [[TMP38]], [[TMP26]]
-; CHECK-NEXT: [[TMP40:%.*]] = or i1 [[TMP39]], [[TMP30]]
-; CHECK-NEXT: [[TMP72:%.*]] = or i1 [[TMP40]], [[TMP70]]
-; CHECK-NEXT: [[TMP73:%.*]] = or i1 [[TMP72]], [[TMP34]]
-; CHECK-NEXT: br i1 [[TMP73]], label %[[SCALAR_PH]], label %[[VECTOR_MEMCHECK:.*]]
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 [[TMP1]], 8
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
; CHECK: [[VECTOR_MEMCHECK]]:
; CHECK-NEXT: [[TMP42:%.*]] = lshr i64 [[N]], 3
; CHECK-NEXT: [[TMP43:%.*]] = shl i64 [[TMP42]], 5
@@ -376,7 +287,7 @@ define void @geps_feeding_interleave_groups_with_reuse2(ptr %A, ptr %B, i64 %N)
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br label %[[SCALAR_PH]]
; CHECK: [[SCALAR_PH]]:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ], [ 0, %[[VECTOR_MEMCHECK]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_MEMCHECK]] ]
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT_7:%.*]], %[[LOOP]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/X86/multi-exit-cost.ll b/llvm/test/Transforms/LoopVectorize/X86/multi-exit-cost.ll
index 2809a77b36f1a..b45cda704cf87 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/multi-exit-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/multi-exit-cost.ll
@@ -10,15 +10,7 @@ define i64 @test_value_in_exit_compare_chain_used_outside(ptr %src, i64 %x, i64
; CHECK-NEXT: [[UMIN2:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP1]], i64 [[X]])
; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[UMIN2]], 1
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 [[TMP2]], 8
-; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
-; CHECK: [[VECTOR_SCEVCHECK]]:
-; CHECK-NEXT: [[TMP3:%.*]] = add nsw i64 [[N]], -1
-; CHECK-NEXT: [[TMP4:%.*]] = freeze i64 [[TMP3]]
-; CHECK-NEXT: [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP4]], i64 [[X]])
-; CHECK-NEXT: [[TMP5:%.*]] = trunc i64 [[UMIN]] to i1
-; CHECK-NEXT: [[TMP6:%.*]] = icmp ugt i64 [[UMIN]], 1
-; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]]
-; CHECK-NEXT: br i1 [[TMP7]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 8
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
@@ -42,8 +34,8 @@ define i64 @test_value_in_exit_compare_chain_used_outside(ptr %src, i64 %x, i64
; CHECK-NEXT: [[TMP31:%.*]] = call i8 @llvm.vector.reduce.xor.v8i8(<8 x i8> [[TMP29]])
; CHECK-NEXT: br label %[[SCALAR_PH]]
; CHECK: [[SCALAR_PH]]:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ]
-; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i8 [ [[TMP31]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i8 [ [[TMP31]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
; CHECK: [[LOOP_HEADER]]:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
@@ -97,5 +89,5 @@ exit.2:
; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
-; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]]}
+; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
;.
diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr35432.ll b/llvm/test/Transforms/LoopVectorize/X86/pr35432.ll
index 9217c905945ac..70f4c2cd8ed65 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/pr35432.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/pr35432.ll
@@ -38,23 +38,8 @@ define i32 @main(ptr %ptr) {
; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP2]], 1
; CHECK-NEXT: [[UMIN1:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP0]], i32 [[TMP2]])
; CHECK-NEXT: [[TMP4:%.*]] = sub i32 [[TMP3]], [[UMIN1]]
-; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP4]], 24
-; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
-; CHECK: vector.scevcheck:
-; CHECK-NEXT: [[TMP5:%.*]] = add i8 [[CONV3]], -1
-; CHECK-NEXT: [[TMP6:%.*]] = zext i8 [[TMP5]] to i32
-; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP0]], i32 [[TMP6]])
-; CHECK-NEXT: [[TMP7:%.*]] = sub i32 [[TMP6]], [[UMIN]]
-; CHECK-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i8
-; CHECK-NEXT: [[TMP9:%.*]] = sub i8 [[TMP5]], [[TMP8]]
-; CHECK-NEXT: [[TMP10:%.*]] = icmp ugt i8 [[TMP9]], [[TMP5]]
-; CHECK-NEXT: [[TMP12:%.*]] = icmp ugt i32 [[TMP7]], 255
-; CHECK-NEXT: [[TMP13:%.*]] = or i1 [[TMP10]], [[TMP12]]
-; CHECK-NEXT: [[TMP14:%.*]] = add i32 [[DOTPROMOTED]], 1
-; CHECK-NEXT: [[TMP15:%.*]] = add i32 [[TMP14]], [[TMP7]]
-; CHECK-NEXT: [[TMP16:%.*]] = icmp slt i32 [[TMP15]], [[TMP14]]
-; CHECK-NEXT: [[TMP17:%.*]] = or i1 [[TMP13]], [[TMP16]]
-; CHECK-NEXT: br i1 [[TMP17]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; CHECK-NEXT: [[TMP17:%.*]] = icmp ult i32 [[TMP4]], 8
+; CHECK-NEXT: br i1 [[TMP17]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP4]], 8
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP4]], [[N_MOD_VF]]
@@ -77,8 +62,8 @@ define i32 @main(ptr %ptr) {
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP4]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND4_FOR_INC9_CRIT_EDGE:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[DOTPROMOTED]], [[FOR_BODY8_LR_PH]] ], [ [[DOTPROMOTED]], [[VECTOR_SCEVCHECK]] ]
-; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i8 [ [[IND_END2]], [[MIDDLE_BLOCK]] ], [ [[CONV3]], [[FOR_BODY8_LR_PH]] ], [ [[CONV3]], [[VECTOR_SCEVCHECK]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[DOTPROMOTED]], [[FOR_BODY8_LR_PH]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i8 [ [[IND_END2]], [[MIDDLE_BLOCK]] ], [ [[CONV3]], [[FOR_BODY8_LR_PH]] ]
; CHECK-NEXT: br label [[FOR_BODY8:%.*]]
; CHECK: for.body8:
; CHECK-NEXT: [[INC5:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY8]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr72969.ll b/llvm/test/Transforms/LoopVectorize/X86/pr72969.ll
index 368842634c374..23eba1cd75dd7 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/pr72969.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/pr72969.ll
@@ -35,19 +35,7 @@ define void @test(ptr %p) {
; VEC-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP2]], 3
; VEC-NEXT: [[TMP4:%.*]] = add nuw nsw i64 [[TMP3]], 1
; VEC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP4]], 4
-; VEC-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
-; VEC: vector.scevcheck:
-; VEC-NEXT: [[TMP5:%.*]] = add i64 [[P1]], 16
-; VEC-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP5]], i64 add (i64 ptrtoint (ptr @h to i64), i64 1))
-; VEC-NEXT: [[TMP6:%.*]] = add i64 [[UMAX]], -9
-; VEC-NEXT: [[TMP7:%.*]] = sub i64 [[TMP6]], [[P1]]
-; VEC-NEXT: [[TMP8:%.*]] = lshr i64 [[TMP7]], 3
-; VEC-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP8]] to i16
-; VEC-NEXT: [[TMP11:%.*]] = add i16 2, [[TMP10]]
-; VEC-NEXT: [[TMP12:%.*]] = icmp ult i16 [[TMP11]], 2
-; VEC-NEXT: [[TMP13:%.*]] = icmp ugt i64 [[TMP8]], 65535
-; VEC-NEXT: [[TMP14:%.*]] = or i1 [[TMP12]], [[TMP13]]
-; VEC-NEXT: br i1 [[TMP14]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; VEC-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; VEC: vector.ph:
; VEC-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP4]], 4
; VEC-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP4]], [[N_MOD_VF]]
@@ -84,9 +72,9 @@ define void @test(ptr %p) {
; VEC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP4]], [[N_VEC]]
; VEC-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
; VEC: scalar.ph:
-; VEC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ], [ 1, [[VECTOR_SCEVCHECK]] ]
-; VEC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY]] ], [ 1, [[VECTOR_SCEVCHECK]] ]
-; VEC-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
+; VEC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ]
+; VEC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY]] ]
+; VEC-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
; VEC-NEXT: br label [[FOR_BODY:%.*]]
; VEC: for.body:
; VEC-NEXT: [[SCALAR_RECUR:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[IDX:%.*]], [[FOR_BODY]] ]
@@ -126,5 +114,5 @@ exit:
; VEC: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
; VEC: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
; VEC: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
-; VEC: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]]}
+; VEC: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
;.
diff --git a/llvm/test/Transforms/LoopVectorize/X86/replicating-load-store-costs.ll b/llvm/test/Transforms/LoopVectorize/X86/replicating-load-store-costs.ll
index 3813560d9300a..89f0b04a53f85 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/replicating-load-store-costs.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/replicating-load-store-costs.ll
@@ -9,10 +9,7 @@ define void @test_store_initially_interleave(i32 %n, ptr noalias %src) #0 {
; I64-NEXT: [[ITER_CHECK:.*:]]
; I64-NEXT: [[TMP4:%.*]] = add i32 [[N]], 1
; I64-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP4]], 4
-; I64-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
-; I64: [[VECTOR_SCEVCHECK]]:
-; I64-NEXT: [[TMP1:%.*]] = icmp slt i32 [[N]], 0
-; I64-NEXT: br i1 [[TMP1]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]]
+; I64-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]]
; I64: [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
; I64-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ule i32 [[TMP4]], 16
; I64-NEXT: br i1 [[MIN_ITERS_CHECK1]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/X86/scev-checks-unprofitable.ll b/llvm/test/Transforms/LoopVectorize/X86/scev-checks-unprofitable.ll
index 7e6b5e932b6c6..93a17f20b468c 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/scev-checks-unprofitable.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/scev-checks-unprofitable.ll
@@ -8,7 +8,7 @@ target triple = "x86_64-unknown-linux-gnu"
; expression when vectorizing loop.2
define void @value_defined_in_loop1_used_for_trip_counts(i32 %start, i1 %c, ptr %dst) {
; CHECK-LABEL: define void @value_defined_in_loop1_used_for_trip_counts(
-; CHECK-SAME: i32 [[START:%.*]], i1 [[C:%.*]], ptr [[DST:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-SAME: i32 [[OFFSET_IDX:%.*]], i1 [[C:%.*]], ptr [[DST:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[C]], i32 0, i32 7
; CHECK-NEXT: [[ZEXT:%.*]] = zext i32 [[SELECT]] to i64
@@ -33,10 +33,31 @@ define void @value_defined_in_loop1_used_for_trip_counts(i32 %start, i1 %c, ptr
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br label %[[EXIT_1_LOOPEXIT1:.*]]
; CHECK: [[LOOP_2_PREHEADER]]:
+; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[IV_1]], 1
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP1]], 8
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH2:.*]]
+; CHECK: [[VECTOR_PH2]]:
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP1]], 8
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP1]], [[N_MOD_VF]]
+; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i64 [[N_VEC]] to i32
+; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[OFFSET_IDX]], [[DOTCAST]]
+; CHECK-NEXT: br label %[[VECTOR_BODY3:.*]]
+; CHECK: [[VECTOR_BODY3]]:
+; CHECK-NEXT: [[TMP3:%.*]] = shl i32 [[OFFSET_IDX]], 1
+; CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP4]]
+; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr [[TMP5]], align 2
+; CHECK-NEXT: br label %[[MIDDLE_BLOCK4:.*]]
+; CHECK: [[MIDDLE_BLOCK4]]:
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP1]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT_1_LOOPEXIT:.*]], label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK4]] ], [ 0, %[[LOOP_2_PREHEADER]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL5:%.*]] = phi i32 [ [[TMP2]], %[[MIDDLE_BLOCK4]] ], [ [[OFFSET_IDX]], %[[LOOP_2_PREHEADER]] ]
; CHECK-NEXT: br label %[[LOOP_2:.*]]
; CHECK: [[LOOP_2]]:
-; CHECK-NEXT: [[IV_2:%.*]] = phi i64 [ [[IV_2_NEXT:%.*]], %[[LOOP_2]] ], [ 0, %[[LOOP_2_PREHEADER]] ]
-; CHECK-NEXT: [[IV_3:%.*]] = phi i32 [ [[IV_3_NEXT:%.*]], %[[LOOP_2]] ], [ [[START]], %[[LOOP_2_PREHEADER]] ]
+; CHECK-NEXT: [[IV_2:%.*]] = phi i64 [ [[IV_2_NEXT:%.*]], %[[LOOP_2]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
+; CHECK-NEXT: [[IV_3:%.*]] = phi i32 [ [[IV_3_NEXT:%.*]], %[[LOOP_2]] ], [ [[BC_RESUME_VAL5]], %[[SCALAR_PH]] ]
; CHECK-NEXT: [[IV_3_NEXT]] = add i32 [[IV_3]], 1
; CHECK-NEXT: [[IV_2_NEXT]] = add i64 [[IV_2]], 1
; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[IV_3]], 1
@@ -44,7 +65,7 @@ define void @value_defined_in_loop1_used_for_trip_counts(i32 %start, i1 %c, ptr
; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr i8, ptr [[DST]], i64 [[ZEXT8]]
; CHECK-NEXT: store i16 0, ptr [[GEP_DST]], align 2
; CHECK-NEXT: [[EC_2:%.*]] = icmp ult i64 [[IV_2]], [[IV_1_LCSSA]]
-; CHECK-NEXT: br i1 [[EC_2]], label %[[LOOP_2]], label %[[EXIT_1_LOOPEXIT:.*]]
+; CHECK-NEXT: br i1 [[EC_2]], label %[[LOOP_2]], label %[[EXIT_1_LOOPEXIT]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: [[EXIT_1_LOOPEXIT]]:
; CHECK-NEXT: br label %[[EXIT_1:.*]]
; CHECK: [[EXIT_1_LOOPEXIT1]]:
@@ -87,3 +108,8 @@ loop.3:
exit.1:
ret void
}
+;.
+; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
+; CHECK: [[META1]] = !{!"llvm.loop.unroll.runtime.disable"}
+; CHECK: [[META2]] = !{!"llvm.loop.isvectorized", i32 1}
+;.
diff --git a/llvm/test/Transforms/LoopVectorize/X86/x86_fp80-vector-store.ll b/llvm/test/Transforms/LoopVectorize/X86/x86_fp80-vector-store.ll
index d29719de79ffd..c9e94697f5006 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/x86_fp80-vector-store.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/x86_fp80-vector-store.ll
@@ -44,8 +44,9 @@ define void @example() {
; FORCED-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
; FORCED-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; FORCED: [[MIDDLE_BLOCK]]:
-; FORCED-NEXT: br [[EXIT:label %.*]]
-; FORCED: [[SCALAR_PH:.*:]]
+; FORCED-NEXT: br label %[[EXIT:.*]]
+; FORCED: [[EXIT]]:
+; FORCED-NEXT: ret void
;
entry:
br label %loop
@@ -85,12 +86,7 @@ define void @test_replicating_store_x86_fp80_cost(i32 %n, ptr %dst) #0 {
; FORCED-NEXT: [[ENTRY:.*:]]
; FORCED-NEXT: [[TMP0:%.*]] = add i32 [[N]], 2
; FORCED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 2
-; FORCED-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
-; FORCED: [[VECTOR_SCEVCHECK]]:
-; FORCED-NEXT: [[TMP1:%.*]] = zext i32 [[N]] to i64
-; FORCED-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
-; FORCED-NEXT: [[TMP3:%.*]] = icmp ugt i64 [[TMP2]], 4294967295
-; FORCED-NEXT: br i1 [[TMP3]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
+; FORCED-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; FORCED: [[VECTOR_PH]]:
; FORCED-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2
; FORCED-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[N_MOD_VF]]
diff --git a/llvm/test/Transforms/LoopVectorize/branch-weights.ll b/llvm/test/Transforms/LoopVectorize/branch-weights.ll
index 4445141549069..42c4d3a1ca4fe 100644
--- a/llvm/test/Transforms/LoopVectorize/branch-weights.ll
+++ b/llvm/test/Transforms/LoopVectorize/branch-weights.ll
@@ -12,11 +12,7 @@ define void @f0(i8 %n, i32 %len, ptr %p) !prof !0 {
; MAINVF4IC1_EPI4: br i1 [[CMP_ENTRY]], label %[[ITER_CHECK:.*]], label %[[EXIT:.*]], !prof [[PROF1:![0-9]+]]
; MAINVF4IC1_EPI4: [[ITER_CHECK]]:
; MAINVF4IC1_EPI4: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0:%.*]], 4
-; MAINVF4IC1_EPI4: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]], !prof [[PROF2:![0-9]+]]
-; MAINVF4IC1_EPI4: [[VECTOR_SCEVCHECK]]:
-; MAINVF4IC1_EPI4: [[TMP2:%.*]] = icmp slt i8 [[TMP1:%.*]], 0
-; MAINVF4IC1_EPI4: [[TMP3:%.*]] = icmp ugt i32 [[LEN]], 255
-; MAINVF4IC1_EPI4: br i1 [[TMP4:%.*]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]], !prof [[PROF2]]
+; MAINVF4IC1_EPI4: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]], !prof [[PROF2:![0-9]+]]
; MAINVF4IC1_EPI4: [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
; MAINVF4IC1_EPI4: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i32 [[TMP0]], 4
; MAINVF4IC1_EPI4: br i1 [[MIN_ITERS_CHECK1]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]], !prof [[PROF2]]
@@ -55,11 +51,7 @@ define void @f0(i8 %n, i32 %len, ptr %p) !prof !0 {
; MAINVF4IC2_EPI4: br i1 [[CMP_ENTRY]], label %[[ITER_CHECK:.*]], label %[[EXIT:.*]], !prof [[PROF1:![0-9]+]]
; MAINVF4IC2_EPI4: [[ITER_CHECK]]:
; MAINVF4IC2_EPI4: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0:%.*]], 4
-; MAINVF4IC2_EPI4: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]], !prof [[PROF2:![0-9]+]]
-; MAINVF4IC2_EPI4: [[VECTOR_SCEVCHECK]]:
-; MAINVF4IC2_EPI4: [[TMP2:%.*]] = icmp slt i8 [[TMP1:%.*]], 0
-; MAINVF4IC2_EPI4: [[TMP3:%.*]] = icmp ugt i32 [[LEN]], 255
-; MAINVF4IC2_EPI4: br i1 [[TMP4:%.*]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]], !prof [[PROF2]]
+; MAINVF4IC2_EPI4: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]], !prof [[PROF2:![0-9]+]]
; MAINVF4IC2_EPI4: [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
; MAINVF4IC2_EPI4: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i32 [[TMP0]], 8
; MAINVF4IC2_EPI4: br i1 [[MIN_ITERS_CHECK1]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]], !prof [[PROF2]]
@@ -130,7 +122,7 @@ exit:
; MAINVF4IC1_EPI4: [[LOOP11]] = distinct !{[[LOOP11]], [[META5]], [[META6]], [[META12:![0-9]+]]}
; MAINVF4IC1_EPI4: [[META12]] = !{!"llvm.loop.estimated_trip_count", i32 0}
; MAINVF4IC1_EPI4: [[PROF13]] = !{!"branch_weights", i32 2, i32 1}
-; MAINVF4IC1_EPI4: [[LOOP14]] = distinct !{[[LOOP14]], [[META5]], [[META15:![0-9]+]]}
+; MAINVF4IC1_EPI4: [[LOOP14]] = distinct !{[[LOOP14]], [[META6]], [[META5]], [[META15:![0-9]+]]}
; MAINVF4IC1_EPI4: [[META15]] = !{!"llvm.loop.estimated_trip_count", i32 3}
;.
; MAINVF4IC2_EPI4: [[PROF0]] = !{!"function_entry_count", i64 13}
@@ -148,6 +140,6 @@ exit:
; MAINVF4IC2_EPI4: [[META12]] = !{!"llvm.loop.estimated_trip_count", i32 0}
; MAINVF4IC2_EPI4: [[PROF13]] = !{!"branch_weights", i32 1, i32 3}
; MAINVF4IC2_EPI4: [[PROF14]] = !{!"branch_weights", i32 2, i32 1}
-; MAINVF4IC2_EPI4: [[LOOP15]] = distinct !{[[LOOP15]], [[META5]], [[META16:![0-9]+]]}
+; MAINVF4IC2_EPI4: [[LOOP15]] = distinct !{[[LOOP15]], [[META6]], [[META5]], [[META16:![0-9]+]]}
; MAINVF4IC2_EPI4: [[META16]] = !{!"llvm.loop.estimated_trip_count", i32 3}
;.
diff --git a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-trunc-induction-steps.ll b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-trunc-induction-steps.ll
index f79deac2a45b0..7660613d2a4b7 100644
--- a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-trunc-induction-steps.ll
+++ b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-trunc-induction-steps.ll
@@ -9,13 +9,7 @@ define void @trunc_iv_steps_with_epilogue(ptr %A, i64 %N) {
; CHECK-NEXT: iter.check:
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 4
-; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
-; CHECK: vector.scevcheck:
-; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[N]] to i32
-; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i32 [[TMP1]], 0
-; CHECK-NEXT: [[TMP3:%.*]] = icmp ugt i64 [[N]], 4294967295
-; CHECK-NEXT: [[TMP4:%.*]] = or i1 [[TMP2]], [[TMP3]]
-; CHECK-NEXT: br i1 [[TMP4]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
; CHECK: vector.main.loop.iter.check:
; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[TMP0]], 4
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -58,7 +52,7 @@ define void @trunc_iv_steps_with_epilogue(ptr %A, i64 %N) {
; CHECK-NEXT: [[CMP_N4:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC3]]
; CHECK-NEXT: br i1 [[CMP_N4]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
; CHECK: vec.epilog.scalar.ph:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ITER_CHECK:%.*]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV_I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
@@ -96,5 +90,5 @@ exit:
; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
; CHECK: [[PROF3]] = !{!"branch_weights", i32 4, i32 0}
; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
-; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]]}
+; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]}
;.
diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll
index b0350cd884af1..97ca273c5bc56 100644
--- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll
+++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll
@@ -783,12 +783,7 @@ define void @sink_dominance(ptr %ptr, i32 %N) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[UMAX1:%.*]] = call i32 @llvm.umax.i32(i32 [[N:%.*]], i32 1)
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[UMAX1]], 4
-; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
-; CHECK: vector.scevcheck:
-; CHECK-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N]], i32 1)
-; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[UMAX]], -1
-; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[TMP0]], 0
-; CHECK-NEXT: br i1 [[TMP1]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[UMAX1]], 4
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[UMAX1]], [[N_MOD_VF]]
@@ -812,8 +807,8 @@ define void @sink_dominance(ptr %ptr, i32 %N) {
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[UMAX1]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
-; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
+; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[FOR_NEXT:%.*]], [[LOOP]] ]
@@ -862,12 +857,7 @@ define void @sink_dominance_2(ptr %ptr, i32 %N) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[UMAX1:%.*]] = call i32 @llvm.umax.i32(i32 [[N:%.*]], i32 1)
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[UMAX1]], 4
-; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
-; CHECK: vector.scevcheck:
-; CHECK-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N]], i32 1)
-; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[UMAX]], -1
-; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[TMP0]], 0
-; CHECK-NEXT: br i1 [[TMP1]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[UMAX1]], 4
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[UMAX1]], [[N_MOD_VF]]
@@ -893,8 +883,8 @@ define void @sink_dominance_2(ptr %ptr, i32 %N) {
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[UMAX1]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
-; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
+; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[FOR_NEXT:%.*]], [[LOOP]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/induction.ll b/llvm/test/Transforms/LoopVectorize/induction.ll
index b6fb378a042fd..f56c9ae82e793 100644
--- a/llvm/test/Transforms/LoopVectorize/induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/induction.ll
@@ -3278,20 +3278,7 @@ define void @wrappingindvars1(i8 %t, i32 %len, ptr %A) {
; CHECK: loop.preheader:
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[LEN:%.*]], 1
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 2
-; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
-; CHECK: vector.scevcheck:
-; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[LEN]] to i8
-; CHECK-NEXT: [[TMP2:%.*]] = add i8 [[T]], [[TMP1]]
-; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i8 [[TMP2]], [[T]]
-; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt i32 [[LEN]], 255
-; CHECK-NEXT: [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]]
-; CHECK-NEXT: [[TMP6:%.*]] = trunc i32 [[LEN]] to i8
-; CHECK-NEXT: [[TMP7:%.*]] = add i8 [[T]], [[TMP6]]
-; CHECK-NEXT: [[TMP8:%.*]] = icmp slt i8 [[TMP7]], [[T]]
-; CHECK-NEXT: [[TMP9:%.*]] = icmp ugt i32 [[LEN]], 255
-; CHECK-NEXT: [[TMP10:%.*]] = or i1 [[TMP8]], [[TMP9]]
-; CHECK-NEXT: [[TMP11:%.*]] = or i1 [[TMP5]], [[TMP10]]
-; CHECK-NEXT: br i1 [[TMP11]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[N_MOD_VF]]
@@ -3317,9 +3304,9 @@ define void @wrappingindvars1(i8 %t, i32 %len, ptr %A) {
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[T]], [[LOOP_PREHEADER]] ], [ [[T]], [[VECTOR_SCEVCHECK]] ]
-; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_PREHEADER]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
-; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[IND_END2]], [[MIDDLE_BLOCK]] ], [ [[EXT]], [[LOOP_PREHEADER]] ], [ [[EXT]], [[VECTOR_SCEVCHECK]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[T]], [[LOOP_PREHEADER]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_PREHEADER]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[IND_END2]], [[MIDDLE_BLOCK]] ], [ [[EXT]], [[LOOP_PREHEADER]] ]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IDX:%.*]] = phi i8 [ [[IDX_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
@@ -3345,23 +3332,12 @@ define void @wrappingindvars1(i8 %t, i32 %len, ptr %A) {
; IND: loop.preheader:
; IND-NEXT: [[TMP0:%.*]] = add i32 [[LEN:%.*]], 1
; IND-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 2
-; IND-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
-; IND: vector.scevcheck:
-; IND-NEXT: [[TMP1:%.*]] = trunc i32 [[LEN]] to i8
-; IND-NEXT: [[TMP2:%.*]] = xor i8 [[T]], -1
-; IND-NEXT: [[TMP3:%.*]] = icmp ult i8 [[TMP2]], [[TMP1]]
-; IND-NEXT: [[TMP4:%.*]] = trunc i32 [[LEN]] to i8
-; IND-NEXT: [[TMP5:%.*]] = add i8 [[T]], [[TMP4]]
-; IND-NEXT: [[TMP6:%.*]] = icmp slt i8 [[TMP5]], [[T]]
-; IND-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[LEN]], 255
-; IND-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]]
-; IND-NEXT: [[TMP9:%.*]] = or i1 [[TMP3]], [[TMP8]]
-; IND-NEXT: br i1 [[TMP9]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; IND-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; IND: vector.ph:
-; IND-NEXT: [[N_VEC:%.*]] = and i32 [[TMP0]], 510
+; IND-NEXT: [[N_VEC:%.*]] = and i32 [[TMP0]], -2
; IND-NEXT: [[DOTCAST:%.*]] = trunc i32 [[N_VEC]] to i8
; IND-NEXT: [[IND_END:%.*]] = add i8 [[T]], [[DOTCAST]]
-; IND-NEXT: [[IND_END2:%.*]] = add nuw nsw i32 [[N_VEC]], [[EXT]]
+; IND-NEXT: [[TMP2:%.*]] = add i32 [[N_VEC]], [[EXT]]
; IND-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[EXT]], i64 0
; IND-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
; IND-NEXT: [[INDUCTION:%.*]] = add nuw nsw <2 x i32> [[DOTSPLAT]], <i32 0, i32 1>
@@ -3382,9 +3358,9 @@ define void @wrappingindvars1(i8 %t, i32 %len, ptr %A) {
; IND-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]]
; IND-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]]
; IND: scalar.ph:
-; IND-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[T]], [[LOOP_PREHEADER]] ], [ [[T]], [[VECTOR_SCEVCHECK]] ]
-; IND-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_PREHEADER]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
-; IND-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[IND_END2]], [[MIDDLE_BLOCK]] ], [ [[EXT]], [[LOOP_PREHEADER]] ], [ [[EXT]], [[VECTOR_SCEVCHECK]] ]
+; IND-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[T]], [[LOOP_PREHEADER]] ]
+; IND-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_PREHEADER]] ]
+; IND-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[TMP2]], [[MIDDLE_BLOCK]] ], [ [[EXT]], [[LOOP_PREHEADER]] ]
; IND-NEXT: br label [[LOOP:%.*]]
; IND: loop:
; IND-NEXT: [[IDX:%.*]] = phi i8 [ [[IDX_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
@@ -3411,23 +3387,12 @@ define void @wrappingindvars1(i8 %t, i32 %len, ptr %A) {
; UNROLL: loop.preheader:
; UNROLL-NEXT: [[TMP0:%.*]] = add i32 [[LEN:%.*]], 1
; UNROLL-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 4
-; UNROLL-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
-; UNROLL: vector.scevcheck:
-; UNROLL-NEXT: [[TMP1:%.*]] = trunc i32 [[LEN]] to i8
-; UNROLL-NEXT: [[TMP2:%.*]] = xor i8 [[T]], -1
-; UNROLL-NEXT: [[TMP3:%.*]] = icmp ult i8 [[TMP2]], [[TMP1]]
-; UNROLL-NEXT: [[TMP4:%.*]] = trunc i32 [[LEN]] to i8
-; UNROLL-NEXT: [[TMP5:%.*]] = add i8 [[T]], [[TMP4]]
-; UNROLL-NEXT: [[TMP6:%.*]] = icmp slt i8 [[TMP5]], [[T]]
-; UNROLL-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[LEN]], 255
-; UNROLL-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]]
-; UNROLL-NEXT: [[TMP9:%.*]] = or i1 [[TMP3]], [[TMP8]]
-; UNROLL-NEXT: br i1 [[TMP9]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; UNROLL-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; UNROLL: vector.ph:
-; UNROLL-NEXT: [[N_VEC:%.*]] = and i32 [[TMP0]], 508
+; UNROLL-NEXT: [[N_VEC:%.*]] = and i32 [[TMP0]], -4
; UNROLL-NEXT: [[DOTCAST:%.*]] = trunc i32 [[N_VEC]] to i8
; UNROLL-NEXT: [[IND_END:%.*]] = add i8 [[T]], [[DOTCAST]]
-; UNROLL-NEXT: [[IND_END2:%.*]] = add nuw nsw i32 [[N_VEC]], [[EXT]]
+; UNROLL-NEXT: [[TMP2:%.*]] = add i32 [[N_VEC]], [[EXT]]
; UNROLL-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[EXT]], i64 0
; UNROLL-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
; UNROLL-NEXT: [[INDUCTION:%.*]] = add nuw nsw <2 x i32> [[DOTSPLAT]], <i32 0, i32 1>
@@ -3451,9 +3416,9 @@ define void @wrappingindvars1(i8 %t, i32 %len, ptr %A) {
; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]]
; UNROLL-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]]
; UNROLL: scalar.ph:
-; UNROLL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[T]], [[LOOP_PREHEADER]] ], [ [[T]], [[VECTOR_SCEVCHECK]] ]
-; UNROLL-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_PREHEADER]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
-; UNROLL-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[IND_END2]], [[MIDDLE_BLOCK]] ], [ [[EXT]], [[LOOP_PREHEADER]] ], [ [[EXT]], [[VECTOR_SCEVCHECK]] ]
+; UNROLL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[T]], [[LOOP_PREHEADER]] ]
+; UNROLL-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_PREHEADER]] ]
+; UNROLL-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[TMP2]], [[MIDDLE_BLOCK]] ], [ [[EXT]], [[LOOP_PREHEADER]] ]
; UNROLL-NEXT: br label [[LOOP:%.*]]
; UNROLL: loop:
; UNROLL-NEXT: [[IDX:%.*]] = phi i8 [ [[IDX_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
@@ -3481,20 +3446,7 @@ define void @wrappingindvars1(i8 %t, i32 %len, ptr %A) {
; UNROLL-NO-IC: loop.preheader:
; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i32 [[LEN:%.*]], 1
; UNROLL-NO-IC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 4
-; UNROLL-NO-IC-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
-; UNROLL-NO-IC: vector.scevcheck:
-; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = trunc i32 [[LEN]] to i8
-; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add i8 [[T]], [[TMP1]]
-; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = icmp ult i8 [[TMP2]], [[T]]
-; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = icmp ugt i32 [[LEN]], 255
-; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]]
-; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = trunc i32 [[LEN]] to i8
-; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = add i8 [[T]], [[TMP6]]
-; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = icmp slt i8 [[TMP7]], [[T]]
-; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = icmp ugt i32 [[LEN]], 255
-; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = or i1 [[TMP8]], [[TMP9]]
-; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = or i1 [[TMP5]], [[TMP10]]
-; UNROLL-NO-IC-NEXT: br i1 [[TMP11]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; UNROLL-NO-IC-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; UNROLL-NO-IC: vector.ph:
; UNROLL-NO-IC-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 4
; UNROLL-NO-IC-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[N_MOD_VF]]
@@ -3523,9 +3475,9 @@ define void @wrappingindvars1(i8 %t, i32 %len, ptr %A) {
; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]]
; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]]
; UNROLL-NO-IC: scalar.ph:
-; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[T]], [[LOOP_PREHEADER]] ], [ [[T]], [[VECTOR_SCEVCHECK]] ]
-; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_PREHEADER]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
-; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[IND_END2]], [[MIDDLE_BLOCK]] ], [ [[EXT]], [[LOOP_PREHEADER]] ], [ [[EXT]], [[VECTOR_SCEVCHECK]] ]
+; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[T]], [[LOOP_PREHEADER]] ]
+; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_PREHEADER]] ]
+; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[IND_END2]], [[MIDDLE_BLOCK]] ], [ [[EXT]], [[LOOP_PREHEADER]] ]
; UNROLL-NO-IC-NEXT: br label [[LOOP:%.*]]
; UNROLL-NO-IC: loop:
; UNROLL-NO-IC-NEXT: [[IDX:%.*]] = phi i8 [ [[IDX_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
@@ -3551,23 +3503,12 @@ define void @wrappingindvars1(i8 %t, i32 %len, ptr %A) {
; INTERLEAVE: loop.preheader:
; INTERLEAVE-NEXT: [[TMP0:%.*]] = add i32 [[LEN:%.*]], 1
; INTERLEAVE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 8
-; INTERLEAVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
-; INTERLEAVE: vector.scevcheck:
-; INTERLEAVE-NEXT: [[TMP1:%.*]] = trunc i32 [[LEN]] to i8
-; INTERLEAVE-NEXT: [[TMP2:%.*]] = xor i8 [[T]], -1
-; INTERLEAVE-NEXT: [[TMP3:%.*]] = icmp ult i8 [[TMP2]], [[TMP1]]
-; INTERLEAVE-NEXT: [[TMP4:%.*]] = trunc i32 [[LEN]] to i8
-; INTERLEAVE-NEXT: [[TMP5:%.*]] = add i8 [[T]], [[TMP4]]
-; INTERLEAVE-NEXT: [[TMP6:%.*]] = icmp slt i8 [[TMP5]], [[T]]
-; INTERLEAVE-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[LEN]], 255
-; INTERLEAVE-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]]
-; INTERLEAVE-NEXT: [[TMP9:%.*]] = or i1 [[TMP3]], [[TMP8]]
-; INTERLEAVE-NEXT: br i1 [[TMP9]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; INTERLEAVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; INTERLEAVE: vector.ph:
-; INTERLEAVE-NEXT: [[N_VEC:%.*]] = and i32 [[TMP0]], 504
+; INTERLEAVE-NEXT: [[N_VEC:%.*]] = and i32 [[TMP0]], -8
; INTERLEAVE-NEXT: [[DOTCAST:%.*]] = trunc i32 [[N_VEC]] to i8
; INTERLEAVE-NEXT: [[IND_END:%.*]] = add i8 [[T]], [[DOTCAST]]
-; INTERLEAVE-NEXT: [[IND_END2:%.*]] = add nuw nsw i32 [[N_VEC]], [[EXT]]
+; INTERLEAVE-NEXT: [[TMP2:%.*]] = add i32 [[N_VEC]], [[EXT]]
; INTERLEAVE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[EXT]], i64 0
; INTERLEAVE-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; INTERLEAVE-NEXT: [[INDUCTION:%.*]] = add nuw nsw <4 x i32> [[DOTSPLAT]], <i32 0, i32 1, i32 2, i32 3>
@@ -3591,9 +3532,9 @@ define void @wrappingindvars1(i8 %t, i32 %len, ptr %A) {
; INTERLEAVE-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]]
; INTERLEAVE-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]]
; INTERLEAVE: scalar.ph:
-; INTERLEAVE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[T]], [[LOOP_PREHEADER]] ], [ [[T]], [[VECTOR_SCEVCHECK]] ]
-; INTERLEAVE-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_PREHEADER]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
-; INTERLEAVE-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[IND_END2]], [[MIDDLE_BLOCK]] ], [ [[EXT]], [[LOOP_PREHEADER]] ], [ [[EXT]], [[VECTOR_SCEVCHECK]] ]
+; INTERLEAVE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[T]], [[LOOP_PREHEADER]] ]
+; INTERLEAVE-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_PREHEADER]] ]
+; INTERLEAVE-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[TMP2]], [[MIDDLE_BLOCK]] ], [ [[EXT]], [[LOOP_PREHEADER]] ]
; INTERLEAVE-NEXT: br label [[LOOP:%.*]]
; INTERLEAVE: loop:
; INTERLEAVE-NEXT: [[IDX:%.*]] = phi i8 [ [[IDX_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
@@ -3653,20 +3594,7 @@ define void @wrappingindvars2(i8 %t, i32 %len, ptr %A) {
; CHECK: loop.preheader:
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[LEN:%.*]], 1
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 2
-; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
-; CHECK: vector.scevcheck:
-; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[LEN]] to i8
-; CHECK-NEXT: [[TMP2:%.*]] = add i8 [[T]], [[TMP1]]
-; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i8 [[TMP2]], [[T]]
-; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt i32 [[LEN]], 255
-; CHECK-NEXT: [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]]
-; CHECK-NEXT: [[TMP6:%.*]] = trunc i32 [[LEN]] to i8
-; CHECK-NEXT: [[TMP7:%.*]] = add i8 [[T]], [[TMP6]]
-; CHECK-NEXT: [[TMP8:%.*]] = icmp slt i8 [[TMP7]], [[T]]
-; CHECK-NEXT: [[TMP9:%.*]] = icmp ugt i32 [[LEN]], 255
-; CHECK-NEXT: [[TMP10:%.*]] = or i1 [[TMP8]], [[TMP9]]
-; CHECK-NEXT: [[TMP11:%.*]] = or i1 [[TMP5]], [[TMP10]]
-; CHECK-NEXT: br i1 [[TMP11]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[N_MOD_VF]]
@@ -3693,9 +3621,9 @@ define void @wrappingindvars2(i8 %t, i32 %len, ptr %A) {
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[T]], [[LOOP_PREHEADER]] ], [ [[T]], [[VECTOR_SCEVCHECK]] ]
-; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i32 [ [[IND_END1]], [[MIDDLE_BLOCK]] ], [ [[EXT_MUL]], [[LOOP_PREHEADER]] ], [ [[EXT_MUL]], [[VECTOR_SCEVCHECK]] ]
-; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_PREHEADER]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[T]], [[LOOP_PREHEADER]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i32 [ [[IND_END1]], [[MIDDLE_BLOCK]] ], [ [[EXT_MUL]], [[LOOP_PREHEADER]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_PREHEADER]] ]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IDX:%.*]] = phi i8 [ [[IDX_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
@@ -3723,24 +3651,13 @@ define void @wrappingindvars2(i8 %t, i32 %len, ptr %A) {
; IND: loop.preheader:
; IND-NEXT: [[TMP0:%.*]] = add i32 [[LEN:%.*]], 1
; IND-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 2
-; IND-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
-; IND: vector.scevcheck:
-; IND-NEXT: [[TMP1:%.*]] = trunc i32 [[LEN]] to i8
-; IND-NEXT: [[TMP2:%.*]] = xor i8 [[T]], -1
-; IND-NEXT: [[TMP3:%.*]] = icmp ult i8 [[TMP2]], [[TMP1]]
-; IND-NEXT: [[TMP4:%.*]] = trunc i32 [[LEN]] to i8
-; IND-NEXT: [[TMP5:%.*]] = add i8 [[T]], [[TMP4]]
-; IND-NEXT: [[TMP6:%.*]] = icmp slt i8 [[TMP5]], [[T]]
-; IND-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[LEN]], 255
-; IND-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]]
-; IND-NEXT: [[TMP9:%.*]] = or i1 [[TMP3]], [[TMP8]]
-; IND-NEXT: br i1 [[TMP9]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; IND-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; IND: vector.ph:
-; IND-NEXT: [[N_VEC:%.*]] = and i32 [[TMP0]], 510
+; IND-NEXT: [[N_VEC:%.*]] = and i32 [[TMP0]], -2
; IND-NEXT: [[DOTCAST:%.*]] = trunc i32 [[N_VEC]] to i8
; IND-NEXT: [[IND_END:%.*]] = add i8 [[T]], [[DOTCAST]]
-; IND-NEXT: [[EXT_MUL5:%.*]] = add nuw nsw i32 [[N_VEC]], [[EXT]]
-; IND-NEXT: [[IND_END1:%.*]] = shl nuw nsw i32 [[EXT_MUL5]], 2
+; IND-NEXT: [[EXT_MUL4:%.*]] = add i32 [[N_VEC]], [[EXT]]
+; IND-NEXT: [[TMP2:%.*]] = shl i32 [[EXT_MUL4]], 2
; IND-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[EXT_MUL]], i64 0
; IND-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
; IND-NEXT: [[INDUCTION:%.*]] = add nuw nsw <2 x i32> [[DOTSPLAT]], <i32 0, i32 4>
@@ -3761,9 +3678,9 @@ define void @wrappingindvars2(i8 %t, i32 %len, ptr %A) {
; IND-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]]
; IND-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]]
; IND: scalar.ph:
-; IND-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[T]], [[LOOP_PREHEADER]] ], [ [[T]], [[VECTOR_SCEVCHECK]] ]
-; IND-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i32 [ [[IND_END1]], [[MIDDLE_BLOCK]] ], [ [[EXT_MUL]], [[LOOP_PREHEADER]] ], [ [[EXT_MUL]], [[VECTOR_SCEVCHECK]] ]
-; IND-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_PREHEADER]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
+; IND-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[T]], [[LOOP_PREHEADER]] ]
+; IND-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i32 [ [[TMP2]], [[MIDDLE_BLOCK]] ], [ [[EXT_MUL]], [[LOOP_PREHEADER]] ]
+; IND-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_PREHEADER]] ]
; IND-NEXT: br label [[LOOP:%.*]]
; IND: loop:
; IND-NEXT: [[IDX:%.*]] = phi i8 [ [[IDX_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
@@ -3792,24 +3709,13 @@ define void @wrappingindvars2(i8 %t, i32 %len, ptr %A) {
; UNROLL: loop.preheader:
; UNROLL-NEXT: [[TMP0:%.*]] = add i32 [[LEN:%.*]], 1
; UNROLL-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 4
-; UNROLL-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
-; UNROLL: vector.scevcheck:
-; UNROLL-NEXT: [[TMP1:%.*]] = trunc i32 [[LEN]] to i8
-; UNROLL-NEXT: [[TMP2:%.*]] = xor i8 [[T]], -1
-; UNROLL-NEXT: [[TMP3:%.*]] = icmp ult i8 [[TMP2]], [[TMP1]]
-; UNROLL-NEXT: [[TMP4:%.*]] = trunc i32 [[LEN]] to i8
-; UNROLL-NEXT: [[TMP5:%.*]] = add i8 [[T]], [[TMP4]]
-; UNROLL-NEXT: [[TMP6:%.*]] = icmp slt i8 [[TMP5]], [[T]]
-; UNROLL-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[LEN]], 255
-; UNROLL-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]]
-; UNROLL-NEXT: [[TMP9:%.*]] = or i1 [[TMP3]], [[TMP8]]
-; UNROLL-NEXT: br i1 [[TMP9]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; UNROLL-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; UNROLL: vector.ph:
-; UNROLL-NEXT: [[N_VEC:%.*]] = and i32 [[TMP0]], 508
+; UNROLL-NEXT: [[N_VEC:%.*]] = and i32 [[TMP0]], -4
; UNROLL-NEXT: [[DOTCAST:%.*]] = trunc i32 [[N_VEC]] to i8
; UNROLL-NEXT: [[IND_END:%.*]] = add i8 [[T]], [[DOTCAST]]
-; UNROLL-NEXT: [[EXT_MUL5:%.*]] = add nuw nsw i32 [[N_VEC]], [[EXT]]
-; UNROLL-NEXT: [[IND_END1:%.*]] = shl nuw nsw i32 [[EXT_MUL5]], 2
+; UNROLL-NEXT: [[EXT_MUL4:%.*]] = add i32 [[N_VEC]], [[EXT]]
+; UNROLL-NEXT: [[TMP2:%.*]] = shl i32 [[EXT_MUL4]], 2
; UNROLL-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[EXT_MUL]], i64 0
; UNROLL-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
; UNROLL-NEXT: [[INDUCTION:%.*]] = add nuw nsw <2 x i32> [[DOTSPLAT]], <i32 0, i32 4>
@@ -3833,9 +3739,9 @@ define void @wrappingindvars2(i8 %t, i32 %len, ptr %A) {
; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]]
; UNROLL-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]]
; UNROLL: scalar.ph:
-; UNROLL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[T]], [[LOOP_PREHEADER]] ], [ [[T]], [[VECTOR_SCEVCHECK]] ]
-; UNROLL-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i32 [ [[IND_END1]], [[MIDDLE_BLOCK]] ], [ [[EXT_MUL]], [[LOOP_PREHEADER]] ], [ [[EXT_MUL]], [[VECTOR_SCEVCHECK]] ]
-; UNROLL-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_PREHEADER]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
+; UNROLL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[T]], [[LOOP_PREHEADER]] ]
+; UNROLL-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i32 [ [[TMP2]], [[MIDDLE_BLOCK]] ], [ [[EXT_MUL]], [[LOOP_PREHEADER]] ]
+; UNROLL-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_PREHEADER]] ]
; UNROLL-NEXT: br label [[LOOP:%.*]]
; UNROLL: loop:
; UNROLL-NEXT: [[IDX:%.*]] = phi i8 [ [[IDX_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
@@ -3865,20 +3771,7 @@ define void @wrappingindvars2(i8 %t, i32 %len, ptr %A) {
; UNROLL-NO-IC: loop.preheader:
; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i32 [[LEN:%.*]], 1
; UNROLL-NO-IC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 4
-; UNROLL-NO-IC-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
-; UNROLL-NO-IC: vector.scevcheck:
-; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = trunc i32 [[LEN]] to i8
-; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add i8 [[T]], [[TMP1]]
-; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = icmp ult i8 [[TMP2]], [[T]]
-; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = icmp ugt i32 [[LEN]], 255
-; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]]
-; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = trunc i32 [[LEN]] to i8
-; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = add i8 [[T]], [[TMP6]]
-; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = icmp slt i8 [[TMP7]], [[T]]
-; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = icmp ugt i32 [[LEN]], 255
-; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = or i1 [[TMP8]], [[TMP9]]
-; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = or i1 [[TMP5]], [[TMP10]]
-; UNROLL-NO-IC-NEXT: br i1 [[TMP11]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; UNROLL-NO-IC-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; UNROLL-NO-IC: vector.ph:
; UNROLL-NO-IC-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 4
; UNROLL-NO-IC-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[N_MOD_VF]]
@@ -3908,9 +3801,9 @@ define void @wrappingindvars2(i8 %t, i32 %len, ptr %A) {
; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]]
; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]]
; UNROLL-NO-IC: scalar.ph:
-; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[T]], [[LOOP_PREHEADER]] ], [ [[T]], [[VECTOR_SCEVCHECK]] ]
-; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i32 [ [[IND_END1]], [[MIDDLE_BLOCK]] ], [ [[EXT_MUL]], [[LOOP_PREHEADER]] ], [ [[EXT_MUL]], [[VECTOR_SCEVCHECK]] ]
-; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_PREHEADER]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
+; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[T]], [[LOOP_PREHEADER]] ]
+; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i32 [ [[IND_END1]], [[MIDDLE_BLOCK]] ], [ [[EXT_MUL]], [[LOOP_PREHEADER]] ]
+; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_PREHEADER]] ]
; UNROLL-NO-IC-NEXT: br label [[LOOP:%.*]]
; UNROLL-NO-IC: loop:
; UNROLL-NO-IC-NEXT: [[IDX:%.*]] = phi i8 [ [[IDX_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
@@ -3938,24 +3831,13 @@ define void @wrappingindvars2(i8 %t, i32 %len, ptr %A) {
; INTERLEAVE: loop.preheader:
; INTERLEAVE-NEXT: [[TMP0:%.*]] = add i32 [[LEN:%.*]], 1
; INTERLEAVE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 8
-; INTERLEAVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
-; INTERLEAVE: vector.scevcheck:
-; INTERLEAVE-NEXT: [[TMP1:%.*]] = trunc i32 [[LEN]] to i8
-; INTERLEAVE-NEXT: [[TMP2:%.*]] = xor i8 [[T]], -1
-; INTERLEAVE-NEXT: [[TMP3:%.*]] = icmp ult i8 [[TMP2]], [[TMP1]]
-; INTERLEAVE-NEXT: [[TMP4:%.*]] = trunc i32 [[LEN]] to i8
-; INTERLEAVE-NEXT: [[TMP5:%.*]] = add i8 [[T]], [[TMP4]]
-; INTERLEAVE-NEXT: [[TMP6:%.*]] = icmp slt i8 [[TMP5]], [[T]]
-; INTERLEAVE-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[LEN]], 255
-; INTERLEAVE-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]]
-; INTERLEAVE-NEXT: [[TMP9:%.*]] = or i1 [[TMP3]], [[TMP8]]
-; INTERLEAVE-NEXT: br i1 [[TMP9]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; INTERLEAVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; INTERLEAVE: vector.ph:
-; INTERLEAVE-NEXT: [[N_VEC:%.*]] = and i32 [[TMP0]], 504
+; INTERLEAVE-NEXT: [[N_VEC:%.*]] = and i32 [[TMP0]], -8
; INTERLEAVE-NEXT: [[DOTCAST:%.*]] = trunc i32 [[N_VEC]] to i8
; INTERLEAVE-NEXT: [[IND_END:%.*]] = add i8 [[T]], [[DOTCAST]]
-; INTERLEAVE-NEXT: [[EXT_MUL5:%.*]] = add nuw nsw i32 [[N_VEC]], [[EXT]]
-; INTERLEAVE-NEXT: [[IND_END1:%.*]] = shl nuw nsw i32 [[EXT_MUL5]], 2
+; INTERLEAVE-NEXT: [[EXT_MUL4:%.*]] = add i32 [[N_VEC]], [[EXT]]
+; INTERLEAVE-NEXT: [[TMP2:%.*]] = shl i32 [[EXT_MUL4]], 2
; INTERLEAVE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[EXT_MUL]], i64 0
; INTERLEAVE-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; INTERLEAVE-NEXT: [[INDUCTION:%.*]] = add nuw nsw <4 x i32> [[DOTSPLAT]], <i32 0, i32 4, i32 8, i32 12>
@@ -3979,9 +3861,9 @@ define void @wrappingindvars2(i8 %t, i32 %len, ptr %A) {
; INTERLEAVE-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]]
; INTERLEAVE-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]]
; INTERLEAVE: scalar.ph:
-; INTERLEAVE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[T]], [[LOOP_PREHEADER]] ], [ [[T]], [[VECTOR_SCEVCHECK]] ]
-; INTERLEAVE-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i32 [ [[IND_END1]], [[MIDDLE_BLOCK]] ], [ [[EXT_MUL]], [[LOOP_PREHEADER]] ], [ [[EXT_MUL]], [[VECTOR_SCEVCHECK]] ]
-; INTERLEAVE-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_PREHEADER]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
+; INTERLEAVE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[T]], [[LOOP_PREHEADER]] ]
+; INTERLEAVE-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i32 [ [[TMP2]], [[MIDDLE_BLOCK]] ], [ [[EXT_MUL]], [[LOOP_PREHEADER]] ]
+; INTERLEAVE-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_PREHEADER]] ]
; INTERLEAVE-NEXT: br label [[LOOP:%.*]]
; INTERLEAVE: loop:
; INTERLEAVE-NEXT: [[IDX:%.*]] = phi i8 [ [[IDX_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
@@ -4230,14 +4112,7 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) {
; CHECK-LABEL: @trunciv(
; CHECK-NEXT: for.body.preheader:
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[K:%.*]], 2
-; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
-; CHECK: vector.scevcheck:
-; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[K]], -1
-; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
-; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i32 [[TMP1]], 0
-; CHECK-NEXT: [[TMP3:%.*]] = icmp ugt i64 [[TMP0]], 4294967295
-; CHECK-NEXT: [[TMP4:%.*]] = or i1 [[TMP2]], [[TMP3]]
-; CHECK-NEXT: br i1 [[TMP4]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[K]], 2
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[K]], [[N_MOD_VF]]
@@ -4256,7 +4131,7 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) {
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[K]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER:%.*]] ]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
@@ -4272,12 +4147,9 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) {
; IND-LABEL: @trunciv(
; IND-NEXT: for.body.preheader:
; IND-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[K:%.*]], 2
-; IND-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
-; IND: vector.scevcheck:
-; IND-NEXT: [[DOTNOT:%.*]] = icmp ult i64 [[K]], 2147483649
-; IND-NEXT: br i1 [[DOTNOT]], label [[VECTOR_PH:%.*]], label [[SCALAR_PH]]
+; IND-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; IND: vector.ph:
-; IND-NEXT: [[N_VEC:%.*]] = and i64 [[K]], 4294967294
+; IND-NEXT: [[N_VEC:%.*]] = and i64 [[K]], -2
; IND-NEXT: br label [[VECTOR_BODY:%.*]]
; IND: vector.body:
; IND-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -4294,7 +4166,7 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) {
; IND-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[K]], [[N_VEC]]
; IND-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
; IND: scalar.ph:
-; IND-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
+; IND-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER:%.*]] ]
; IND-NEXT: br label [[FOR_BODY:%.*]]
; IND: for.body:
; IND-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
@@ -4312,12 +4184,9 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) {
; UNROLL-LABEL: @trunciv(
; UNROLL-NEXT: for.body.preheader:
; UNROLL-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[K:%.*]], 4
-; UNROLL-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
-; UNROLL: vector.scevcheck:
-; UNROLL-NEXT: [[DOTNOT:%.*]] = icmp ult i64 [[K]], 2147483649
-; UNROLL-NEXT: br i1 [[DOTNOT]], label [[VECTOR_PH:%.*]], label [[SCALAR_PH]]
+; UNROLL-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; UNROLL: vector.ph:
-; UNROLL-NEXT: [[N_VEC:%.*]] = and i64 [[K]], 4294967292
+; UNROLL-NEXT: [[N_VEC:%.*]] = and i64 [[K]], -4
; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]]
; UNROLL: vector.body:
; UNROLL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -4337,7 +4206,7 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) {
; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[K]], [[N_VEC]]
; UNROLL-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
; UNROLL: scalar.ph:
-; UNROLL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
+; UNROLL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER:%.*]] ]
; UNROLL-NEXT: br label [[FOR_BODY:%.*]]
; UNROLL: for.body:
; UNROLL-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
@@ -4355,14 +4224,7 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) {
; UNROLL-NO-IC-LABEL: @trunciv(
; UNROLL-NO-IC-NEXT: for.body.preheader:
; UNROLL-NO-IC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[K:%.*]], 4
-; UNROLL-NO-IC-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
-; UNROLL-NO-IC: vector.scevcheck:
-; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i64 [[K]], -1
-; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
-; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = icmp slt i32 [[TMP1]], 0
-; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = icmp ugt i64 [[TMP0]], 4294967295
-; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = or i1 [[TMP2]], [[TMP3]]
-; UNROLL-NO-IC-NEXT: br i1 [[TMP4]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; UNROLL-NO-IC-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; UNROLL-NO-IC: vector.ph:
; UNROLL-NO-IC-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[K]], 4
; UNROLL-NO-IC-NEXT: [[N_VEC:%.*]] = sub i64 [[K]], [[N_MOD_VF]]
@@ -4384,7 +4246,7 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) {
; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[K]], [[N_VEC]]
; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
; UNROLL-NO-IC: scalar.ph:
-; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
+; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER:%.*]] ]
; UNROLL-NO-IC-NEXT: br label [[FOR_BODY:%.*]]
; UNROLL-NO-IC: for.body:
; UNROLL-NO-IC-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
@@ -4400,12 +4262,9 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) {
; INTERLEAVE-LABEL: @trunciv(
; INTERLEAVE-NEXT: for.body.preheader:
; INTERLEAVE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[K:%.*]], 8
-; INTERLEAVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
-; INTERLEAVE: vector.scevcheck:
-; INTERLEAVE-NEXT: [[DOTNOT:%.*]] = icmp ult i64 [[K]], 2147483649
-; INTERLEAVE-NEXT: br i1 [[DOTNOT]], label [[VECTOR_PH:%.*]], label [[SCALAR_PH]]
+; INTERLEAVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; INTERLEAVE: vector.ph:
-; INTERLEAVE-NEXT: [[N_VEC:%.*]] = and i64 [[K]], 4294967288
+; INTERLEAVE-NEXT: [[N_VEC:%.*]] = and i64 [[K]], -8
; INTERLEAVE-NEXT: br label [[VECTOR_BODY:%.*]]
; INTERLEAVE: vector.body:
; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -4425,7 +4284,7 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) {
; INTERLEAVE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[K]], [[N_VEC]]
; INTERLEAVE-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
; INTERLEAVE: scalar.ph:
-; INTERLEAVE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
+; INTERLEAVE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER:%.*]] ]
; INTERLEAVE-NEXT: br label [[FOR_BODY:%.*]]
; INTERLEAVE: for.body:
; INTERLEAVE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
@@ -5869,35 +5728,12 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n
; CHECK-LABEL: @test_optimized_cast_induction_feeding_first_order_recurrence(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 2
-; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
-; CHECK: vector.scevcheck:
-; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1
-; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[STEP:%.*]] to i8
-; CHECK-NEXT: [[TMP2:%.*]] = sub i8 0, [[TMP1]]
-; CHECK-NEXT: [[TMP3:%.*]] = icmp slt i8 [[TMP1]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i8 [[TMP2]], i8 [[TMP1]]
-; CHECK-NEXT: [[TMP5:%.*]] = trunc i64 [[TMP0]] to i8
-; CHECK-NEXT: [[MUL:%.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 [[TMP4]], i8 [[TMP5]])
-; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i8, i1 } [[MUL]], 0
-; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i8, i1 } [[MUL]], 1
-; CHECK-NEXT: [[TMP6:%.*]] = sub i8 0, [[MUL_RESULT]]
-; CHECK-NEXT: [[TMP7:%.*]] = icmp slt i8 [[MUL_RESULT]], 0
-; CHECK-NEXT: [[TMP8:%.*]] = icmp sgt i8 [[TMP6]], 0
-; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP3]], i1 [[TMP8]], i1 [[TMP7]]
-; CHECK-NEXT: [[TMP10:%.*]] = or i1 [[TMP9]], [[MUL_OVERFLOW]]
-; CHECK-NEXT: [[TMP11:%.*]] = icmp ugt i64 [[TMP0]], 255
-; CHECK-NEXT: [[TMP12:%.*]] = icmp ne i8 [[TMP1]], 0
-; CHECK-NEXT: [[TMP13:%.*]] = and i1 [[TMP11]], [[TMP12]]
-; CHECK-NEXT: [[TMP14:%.*]] = or i1 [[TMP10]], [[TMP13]]
-; CHECK-NEXT: [[TMP15:%.*]] = sext i8 [[TMP1]] to i32
-; CHECK-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i32 [[STEP]], [[TMP15]]
-; CHECK-NEXT: [[TMP16:%.*]] = or i1 [[TMP14]], [[IDENT_CHECK]]
-; CHECK-NEXT: br i1 [[TMP16]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 2
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i64 [[N_VEC]] to i32
-; CHECK-NEXT: [[IND_END:%.*]] = mul i32 [[DOTCAST]], [[STEP]]
+; CHECK-NEXT: [[IND_END:%.*]] = mul i32 [[DOTCAST]], [[STEP:%.*]]
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[STEP]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP19:%.*]] = mul nsw <2 x i32> <i32 0, i32 1>, [[DOTSPLAT]]
@@ -5922,9 +5758,9 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
-; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
-; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
+; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[FOR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[IV_2_CONV:%.*]], [[LOOP]] ]
@@ -5944,32 +5780,11 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n
; IND-LABEL: @test_optimized_cast_induction_feeding_first_order_recurrence(
; IND-NEXT: entry:
; IND-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 2
-; IND-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
-; IND: vector.scevcheck:
-; IND-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1
-; IND-NEXT: [[TMP1:%.*]] = trunc i32 [[STEP:%.*]] to i8
-; IND-NEXT: [[TMP2:%.*]] = icmp slt i8 [[TMP1]], 0
-; IND-NEXT: [[TMP3:%.*]] = call i8 @llvm.abs.i8(i8 [[TMP1]], i1 false)
-; IND-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP0]] to i8
-; IND-NEXT: [[MUL:%.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 [[TMP3]], i8 [[TMP4]])
-; IND-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i8, i1 } [[MUL]], 0
-; IND-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i8, i1 } [[MUL]], 1
-; IND-NEXT: [[TMP5:%.*]] = icmp slt i8 [[MUL_RESULT]], 0
-; IND-NEXT: [[TMP6:%.*]] = icmp ugt i8 [[MUL_RESULT]], -128
-; IND-NEXT: [[TMP7:%.*]] = select i1 [[TMP2]], i1 [[TMP6]], i1 [[TMP5]]
-; IND-NEXT: [[TMP8:%.*]] = or i1 [[TMP7]], [[MUL_OVERFLOW]]
-; IND-NEXT: [[TMP9:%.*]] = icmp ugt i64 [[TMP0]], 255
-; IND-NEXT: [[TMP10:%.*]] = icmp ne i8 [[TMP1]], 0
-; IND-NEXT: [[TMP11:%.*]] = and i1 [[TMP9]], [[TMP10]]
-; IND-NEXT: [[TMP12:%.*]] = or i1 [[TMP8]], [[TMP11]]
-; IND-NEXT: [[TMP13:%.*]] = add i32 [[STEP]], -128
-; IND-NEXT: [[IDENT_CHECK:%.*]] = icmp ult i32 [[TMP13]], -256
-; IND-NEXT: [[TMP14:%.*]] = or i1 [[TMP12]], [[IDENT_CHECK]]
-; IND-NEXT: br i1 [[TMP14]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; IND-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; IND: vector.ph:
; IND-NEXT: [[N_VEC:%.*]] = and i64 [[N]], -2
; IND-NEXT: [[DOTCAST:%.*]] = trunc i64 [[N_VEC]] to i32
-; IND-NEXT: [[IND_END:%.*]] = mul i32 [[STEP]], [[DOTCAST]]
+; IND-NEXT: [[IND_END:%.*]] = mul i32 [[STEP:%.*]], [[DOTCAST]]
; IND-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[STEP]], i64 0
; IND-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
; IND-NEXT: [[TMP15:%.*]] = mul nuw nsw <2 x i32> [[DOTSPLAT]], <i32 0, i32 1>
@@ -5993,9 +5808,9 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n
; IND-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; IND-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
; IND: scalar.ph:
-; IND-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
-; IND-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
-; IND-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
+; IND-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; IND-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
+; IND-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
; IND-NEXT: br label [[LOOP:%.*]]
; IND: loop:
; IND-NEXT: [[FOR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[IV_2_CONV:%.*]], [[LOOP]] ]
@@ -6015,31 +5830,10 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n
; UNROLL-LABEL: @test_optimized_cast_induction_feeding_first_order_recurrence(
; UNROLL-NEXT: entry:
; UNROLL-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4
-; UNROLL-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
-; UNROLL: vector.scevcheck:
-; UNROLL-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1
-; UNROLL-NEXT: [[TMP1:%.*]] = trunc i32 [[STEP:%.*]] to i8
-; UNROLL-NEXT: [[TMP2:%.*]] = icmp slt i8 [[TMP1]], 0
-; UNROLL-NEXT: [[TMP3:%.*]] = call i8 @llvm.abs.i8(i8 [[TMP1]], i1 false)
-; UNROLL-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP0]] to i8
-; UNROLL-NEXT: [[MUL:%.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 [[TMP3]], i8 [[TMP4]])
-; UNROLL-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i8, i1 } [[MUL]], 0
-; UNROLL-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i8, i1 } [[MUL]], 1
-; UNROLL-NEXT: [[TMP5:%.*]] = icmp slt i8 [[MUL_RESULT]], 0
-; UNROLL-NEXT: [[TMP6:%.*]] = icmp ugt i8 [[MUL_RESULT]], -128
-; UNROLL-NEXT: [[TMP7:%.*]] = select i1 [[TMP2]], i1 [[TMP6]], i1 [[TMP5]]
-; UNROLL-NEXT: [[TMP8:%.*]] = or i1 [[TMP7]], [[MUL_OVERFLOW]]
-; UNROLL-NEXT: [[TMP9:%.*]] = icmp ugt i64 [[TMP0]], 255
-; UNROLL-NEXT: [[TMP10:%.*]] = icmp ne i8 [[TMP1]], 0
-; UNROLL-NEXT: [[TMP11:%.*]] = and i1 [[TMP9]], [[TMP10]]
-; UNROLL-NEXT: [[TMP12:%.*]] = or i1 [[TMP8]], [[TMP11]]
-; UNROLL-NEXT: [[TMP13:%.*]] = add i32 [[STEP]], -128
-; UNROLL-NEXT: [[IDENT_CHECK:%.*]] = icmp ult i32 [[TMP13]], -256
-; UNROLL-NEXT: [[TMP14:%.*]] = or i1 [[TMP12]], [[IDENT_CHECK]]
-; UNROLL-NEXT: br i1 [[TMP14]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; UNROLL-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; UNROLL: vector.ph:
; UNROLL-NEXT: [[N_VEC:%.*]] = and i64 [[N]], -4
-; UNROLL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[STEP]], i64 0
+; UNROLL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[STEP:%.*]], i64 0
; UNROLL-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
; UNROLL-NEXT: [[DOTCAST:%.*]] = trunc i64 [[N_VEC]] to i32
; UNROLL-NEXT: [[IND_END:%.*]] = mul i32 [[STEP]], [[DOTCAST]]
@@ -6066,9 +5860,9 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n
; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; UNROLL-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
; UNROLL: scalar.ph:
-; UNROLL-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
-; UNROLL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
-; UNROLL-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
+; UNROLL-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; UNROLL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
+; UNROLL-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
; UNROLL-NEXT: br label [[LOOP:%.*]]
; UNROLL: loop:
; UNROLL-NEXT: [[FOR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[IV_2_CONV:%.*]], [[LOOP]] ]
@@ -6088,34 +5882,11 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n
; UNROLL-NO-IC-LABEL: @test_optimized_cast_induction_feeding_first_order_recurrence(
; UNROLL-NO-IC-NEXT: entry:
; UNROLL-NO-IC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4
-; UNROLL-NO-IC-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
-; UNROLL-NO-IC: vector.scevcheck:
-; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1
-; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = trunc i32 [[STEP:%.*]] to i8
-; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = sub i8 0, [[TMP1]]
-; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = icmp slt i8 [[TMP1]], 0
-; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i8 [[TMP2]], i8 [[TMP1]]
-; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = trunc i64 [[TMP0]] to i8
-; UNROLL-NO-IC-NEXT: [[MUL:%.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 [[TMP4]], i8 [[TMP5]])
-; UNROLL-NO-IC-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i8, i1 } [[MUL]], 0
-; UNROLL-NO-IC-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i8, i1 } [[MUL]], 1
-; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = sub i8 0, [[MUL_RESULT]]
-; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = icmp slt i8 [[MUL_RESULT]], 0
-; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = icmp sgt i8 [[TMP6]], 0
-; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = select i1 [[TMP3]], i1 [[TMP8]], i1 [[TMP7]]
-; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = or i1 [[TMP9]], [[MUL_OVERFLOW]]
-; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = icmp ugt i64 [[TMP0]], 255
-; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = icmp ne i8 [[TMP1]], 0
-; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = and i1 [[TMP11]], [[TMP12]]
-; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = or i1 [[TMP10]], [[TMP13]]
-; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = sext i8 [[TMP1]] to i32
-; UNROLL-NO-IC-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i32 [[STEP]], [[TMP15]]
-; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = or i1 [[TMP14]], [[IDENT_CHECK]]
-; UNROLL-NO-IC-NEXT: br i1 [[TMP16]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; UNROLL-NO-IC-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; UNROLL-NO-IC: vector.ph:
; UNROLL-NO-IC-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
; UNROLL-NO-IC-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
-; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[STEP]], i64 0
+; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[STEP:%.*]], i64 0
; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
; UNROLL-NO-IC-NEXT: [[DOTCAST:%.*]] = trunc i64 [[N_VEC]] to i32
; UNROLL-NO-IC-NEXT: [[IND_END:%.*]] = mul i32 [[DOTCAST]], [[STEP]]
@@ -6143,9 +5914,9 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n
; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
; UNROLL-NO-IC: scalar.ph:
-; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
-; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
-; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
+; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
+; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
; UNROLL-NO-IC-NEXT: br label [[LOOP:%.*]]
; UNROLL-NO-IC: loop:
; UNROLL-NO-IC-NEXT: [[FOR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[IV_2_CONV:%.*]], [[LOOP]] ]
@@ -6165,31 +5936,10 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n
; INTERLEAVE-LABEL: @test_optimized_cast_induction_feeding_first_order_recurrence(
; INTERLEAVE-NEXT: entry:
; INTERLEAVE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 8
-; INTERLEAVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
-; INTERLEAVE: vector.scevcheck:
-; INTERLEAVE-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1
-; INTERLEAVE-NEXT: [[TMP1:%.*]] = trunc i32 [[STEP:%.*]] to i8
-; INTERLEAVE-NEXT: [[TMP2:%.*]] = icmp slt i8 [[TMP1]], 0
-; INTERLEAVE-NEXT: [[TMP3:%.*]] = call i8 @llvm.abs.i8(i8 [[TMP1]], i1 false)
-; INTERLEAVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP0]] to i8
-; INTERLEAVE-NEXT: [[MUL:%.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 [[TMP3]], i8 [[TMP4]])
-; INTERLEAVE-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i8, i1 } [[MUL]], 0
-; INTERLEAVE-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i8, i1 } [[MUL]], 1
-; INTERLEAVE-NEXT: [[TMP5:%.*]] = icmp slt i8 [[MUL_RESULT]], 0
-; INTERLEAVE-NEXT: [[TMP6:%.*]] = icmp ugt i8 [[MUL_RESULT]], -128
-; INTERLEAVE-NEXT: [[TMP7:%.*]] = select i1 [[TMP2]], i1 [[TMP6]], i1 [[TMP5]]
-; INTERLEAVE-NEXT: [[TMP8:%.*]] = or i1 [[TMP7]], [[MUL_OVERFLOW]]
-; INTERLEAVE-NEXT: [[TMP9:%.*]] = icmp ugt i64 [[TMP0]], 255
-; INTERLEAVE-NEXT: [[TMP10:%.*]] = icmp ne i8 [[TMP1]], 0
-; INTERLEAVE-NEXT: [[TMP11:%.*]] = and i1 [[TMP9]], [[TMP10]]
-; INTERLEAVE-NEXT: [[TMP12:%.*]] = or i1 [[TMP8]], [[TMP11]]
-; INTERLEAVE-NEXT: [[TMP13:%.*]] = add i32 [[STEP]], -128
-; INTERLEAVE-NEXT: [[IDENT_CHECK:%.*]] = icmp ult i32 [[TMP13]], -256
-; INTERLEAVE-NEXT: [[TMP14:%.*]] = or i1 [[TMP12]], [[IDENT_CHECK]]
-; INTERLEAVE-NEXT: br i1 [[TMP14]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; INTERLEAVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; INTERLEAVE: vector.ph:
; INTERLEAVE-NEXT: [[N_VEC:%.*]] = and i64 [[N]], -8
-; INTERLEAVE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[STEP]], i64 0
+; INTERLEAVE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[STEP:%.*]], i64 0
; INTERLEAVE-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; INTERLEAVE-NEXT: [[DOTCAST:%.*]] = trunc i64 [[N_VEC]] to i32
; INTERLEAVE-NEXT: [[IND_END:%.*]] = mul i32 [[STEP]], [[DOTCAST]]
@@ -6216,9 +5966,9 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n
; INTERLEAVE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; INTERLEAVE-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
; INTERLEAVE: scalar.ph:
-; INTERLEAVE-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
-; INTERLEAVE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
-; INTERLEAVE-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
+; INTERLEAVE-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; INTERLEAVE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
+; INTERLEAVE-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
; INTERLEAVE-NEXT: br label [[LOOP:%.*]]
; INTERLEAVE: loop:
; INTERLEAVE-NEXT: [[FOR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[IV_2_CONV:%.*]], [[LOOP]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-requiring-scev-predicates.ll b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-requiring-scev-predicates.ll
index fdbc35b5e7a43..6a0f6f8cf094c 100644
--- a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-requiring-scev-predicates.ll
+++ b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-requiring-scev-predicates.ll
@@ -30,19 +30,7 @@ define void @wrap_around_scev_check(ptr noalias %a, ptr noalias %b, i8 %x, i8 %y
; CHECK: [[LOOP_PREHEADER]]:
; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i8 [[Y]] to i64
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 [[WIDE_TRIP_COUNT]], 4
-; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
-; CHECK: [[VECTOR_SCEVCHECK]]:
-; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[WIDE_TRIP_COUNT]], -1
-; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i8
-; CHECK-NEXT: [[MUL1:%.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 2, i8 [[TMP1]])
-; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i8, i1 } [[MUL1]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i8, i1 } [[MUL1]], 1
-; CHECK-NEXT: [[TMP2:%.*]] = add i8 [[X]], [[MUL_RESULT]]
-; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i8 [[TMP2]], [[X]]
-; CHECK-NEXT: [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]]
-; CHECK-NEXT: [[TMP17:%.*]] = icmp ugt i64 [[TMP0]], 255
-; CHECK-NEXT: [[TMP18:%.*]] = or i1 [[TMP5]], [[TMP17]]
-; CHECK-NEXT: br i1 [[TMP18]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 4
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
@@ -70,12 +58,12 @@ define void @wrap_around_scev_check(ptr noalias %a, ptr noalias %b, i8 %x, i8 %y
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br label %[[SCALAR_PH]]
; CHECK: [[SCALAR_PH]]:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[LOOP_PREHEADER]] ], [ 0, %[[VECTOR_SCEVCHECK]] ]
-; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i8 [ [[TMP9]], %[[MIDDLE_BLOCK]] ], [ [[X]], %[[LOOP_PREHEADER]] ], [ [[X]], %[[VECTOR_SCEVCHECK]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[LOOP_PREHEADER]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i8 [ [[TMP9]], %[[MIDDLE_BLOCK]] ], [ [[X]], %[[LOOP_PREHEADER]] ]
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
-; CHECK-NEXT: [[INDEX_011:%.*]] = phi i8 [ [[BC_RESUME_VAL3]], %[[SCALAR_PH]] ], [ [[ADD:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[INDEX_011:%.*]] = phi i8 [ [[BC_RESUME_VAL2]], %[[SCALAR_PH]] ], [ [[ADD:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[IDXPROM:%.*]] = zext i8 [[INDEX_011]] to i64
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IDXPROM]]
; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
@@ -122,12 +110,8 @@ exit:
define void @wrap_predicate_for_interleave_group_wraps_for_known_trip_count(ptr noalias %x, ptr noalias %out) {
; CHECK-LABEL: define void @wrap_predicate_for_interleave_group_wraps_for_known_trip_count(
; CHECK-SAME: ptr noalias [[X:%.*]], ptr noalias [[OUT:%.*]]) {
-; CHECK-NEXT: [[START:.*]]:
-; CHECK-NEXT: br label %[[VECTOR_SCEVCHECK:.*]]
-; CHECK: [[VECTOR_SCEVCHECK]]:
-; CHECK-NEXT: [[MUL:%.*]] = call { i4, i1 } @llvm.umul.with.overflow.i4(i4 5, i4 -1)
-; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i4, i1 } [[MUL]], 1
-; CHECK-NEXT: br i1 [[MUL_OVERFLOW]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK-NEXT: [[START:.*:]]
+; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
@@ -143,12 +127,11 @@ define void @wrap_predicate_for_interleave_group_wraps_for_known_trip_count(ptr
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 12
; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
-; CHECK-NEXT: br label %[[SCALAR_PH]]
+; CHECK-NEXT: br label %[[SCALAR_PH:.*]]
; CHECK: [[SCALAR_PH]]:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 12, %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_SCEVCHECK]] ]
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
-; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 12, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; CHECK-NEXT: [[IV_MUL5:%.*]] = mul nuw nsw i64 [[IV]], 5
; CHECK-NEXT: [[IV_MUL5_MASKED:%.*]] = and i64 [[IV_MUL5]], 15
@@ -187,15 +170,7 @@ define void @wrap_predicate_for_interleave_group_unknown_trip_count(ptr noalias
; CHECK-SAME: ptr noalias [[X:%.*]], ptr noalias [[OUT:%.*]], i64 [[N:%.*]]) {
; CHECK-NEXT: [[START:.*]]:
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 [[N]], 4
-; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
-; CHECK: [[VECTOR_SCEVCHECK]]:
-; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1
-; CHECK-NEXT: [[TMP9:%.*]] = trunc i64 [[TMP0]] to i4
-; CHECK-NEXT: [[MUL:%.*]] = call { i4, i1 } @llvm.umul.with.overflow.i4(i4 3, i4 [[TMP9]])
-; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i4, i1 } [[MUL]], 1
-; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i64 [[TMP0]], 15
-; CHECK-NEXT: [[TMP10:%.*]] = or i1 [[MUL_OVERFLOW]], [[TMP1]]
-; CHECK-NEXT: br i1 [[TMP10]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
@@ -217,7 +192,7 @@ define void @wrap_predicate_for_interleave_group_unknown_trip_count(ptr noalias
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br label %[[SCALAR_PH]]
; CHECK: [[SCALAR_PH]]:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[START]] ], [ 0, %[[VECTOR_SCEVCHECK]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[START]] ]
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll b/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll
index 8d3d0ff7a6406..d66e7edc69b9b 100644
--- a/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll
+++ b/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll
@@ -166,16 +166,7 @@ define i32 @loop_requires_scev_predicate(ptr %dest, i32 %end) {
; CHECK-NEXT: [[TMP1:%.*]] = zext i10 [[TMP0]] to i64
; CHECK-NEXT: [[UMAX1:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 1)
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[UMAX1]], 2
-; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
-; CHECK: vector.scevcheck:
-; CHECK-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[END_CLAMPED]], i32 1)
-; CHECK-NEXT: [[TMP2:%.*]] = add nsw i32 [[UMAX]], -1
-; CHECK-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP2]] to i8
-; CHECK-NEXT: [[TMP4:%.*]] = add i8 1, [[TMP3]]
-; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i8 [[TMP4]], 1
-; CHECK-NEXT: [[TMP6:%.*]] = icmp ugt i32 [[TMP2]], 255
-; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]]
-; CHECK-NEXT: br i1 [[TMP7]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[UMAX1]], 2
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[UMAX1]], [[N_MOD_VF]]
@@ -201,7 +192,7 @@ define i32 @loop_requires_scev_predicate(ptr %dest, i32 %end) {
; CHECK: pred.store.continue:
; CHECK-NEXT: [[TMP19:%.*]] = extractelement <2 x i1> [[TMP11]], i32 1
; CHECK-NEXT: br i1 [[TMP19]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4]]
-; CHECK: pred.store.if3:
+; CHECK: pred.store.if2:
; CHECK-NEXT: [[TMP20:%.*]] = add i64 [[INDEX]], 1
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[DEST]], i64 [[TMP20]]
; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i32 1
@@ -209,7 +200,7 @@ define i32 @loop_requires_scev_predicate(ptr %dest, i32 %end) {
; CHECK-NEXT: [[TMP24:%.*]] = add i32 [[TMP22]], [[TMP23]]
; CHECK-NEXT: store i32 [[TMP24]], ptr [[TMP21]], align 4
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]]
-; CHECK: pred.store.continue4:
+; CHECK: pred.store.continue3:
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
@@ -217,8 +208,8 @@ define i32 @loop_requires_scev_predicate(ptr %dest, i32 %end) {
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[UMAX1]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
-; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[IND:%.*]] = phi i8 [ [[IND_NEXT:%.*]], [[FOR_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/no-fold-tail-by-masking-iv-external-uses.ll b/llvm/test/Transforms/LoopVectorize/no-fold-tail-by-masking-iv-external-uses.ll
index 404ef096b49b3..511a1aca681dc 100644
--- a/llvm/test/Transforms/LoopVectorize/no-fold-tail-by-masking-iv-external-uses.ll
+++ b/llvm/test/Transforms/LoopVectorize/no-fold-tail-by-masking-iv-external-uses.ll
@@ -16,15 +16,7 @@ define i32 @test(ptr %arr, i64 %n) {
; CHECK: preheader:
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 4
-; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
-; CHECK: vector.scevcheck:
-; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[N]], -2
-; CHECK-NEXT: [[TMP7:%.*]] = trunc i64 [[TMP1]] to i8
-; CHECK-NEXT: [[TMP8:%.*]] = add i8 2, [[TMP7]]
-; CHECK-NEXT: [[TMP9:%.*]] = icmp ult i8 [[TMP8]], 2
-; CHECK-NEXT: [[TMP10:%.*]] = icmp ugt i64 [[TMP1]], 255
-; CHECK-NEXT: [[TMP12:%.*]] = or i1 [[TMP9]], [[TMP10]]
-; CHECK-NEXT: br i1 [[TMP12]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 4
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
@@ -46,8 +38,8 @@ define i32 @test(ptr %arr, i64 %n) {
; CHECK-NEXT: [[IND_ESCAPE:%.*]] = sub i64 [[IND_END]], 1
; CHECK-NEXT: br i1 [[CMP_N]], label [[LOAD_VAL:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1, [[PREHEADER]] ], [ 1, [[VECTOR_SCEVCHECK]] ]
-; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i8 [ [[IND_END1]], [[MIDDLE_BLOCK]] ], [ 1, [[PREHEADER]] ], [ 1, [[VECTOR_SCEVCHECK]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1, [[PREHEADER]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i8 [ [[IND_END1]], [[MIDDLE_BLOCK]] ], [ 1, [[PREHEADER]] ]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[CONV:%.*]] = phi i64 [ [[CONV2:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
@@ -107,5 +99,5 @@ done:
; CHECK: [[META1]] = !{!"llvm.loop.unroll.disable"}
; CHECK: [[META2]] = !{!"llvm.loop.isvectorized", i32 1}
; CHECK: [[META3]] = !{!"llvm.loop.unroll.runtime.disable"}
-; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
+; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META3]], [[META2]]}
;.
diff --git a/llvm/test/Transforms/LoopVectorize/opaque-ptr.ll b/llvm/test/Transforms/LoopVectorize/opaque-ptr.ll
index 496285a276923..4a7c4ba37568e 100644
--- a/llvm/test/Transforms/LoopVectorize/opaque-ptr.ll
+++ b/llvm/test/Transforms/LoopVectorize/opaque-ptr.ll
@@ -4,28 +4,19 @@
define void @test_ptr_iv_no_inbounds(ptr %p1.start, ptr %p2.start, ptr %p1.end) {
; CHECK-LABEL: @test_ptr_iv_no_inbounds(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[P1_START7:%.*]] = ptrtoint ptr [[P1_START:%.*]] to i64
-; CHECK-NEXT: [[P1_END6:%.*]] = ptrtoint ptr [[P1_END:%.*]] to i64
-; CHECK-NEXT: [[P1_START4:%.*]] = ptrtoint ptr [[P1_START]] to i64
-; CHECK-NEXT: [[P1_END3:%.*]] = ptrtoint ptr [[P1_END]] to i64
+; CHECK-NEXT: [[P1_START4:%.*]] = ptrtoint ptr [[P1_START:%.*]] to i64
+; CHECK-NEXT: [[P1_END3:%.*]] = ptrtoint ptr [[P1_END:%.*]] to i64
; CHECK-NEXT: [[P1_START2:%.*]] = ptrtoint ptr [[P1_START]] to i64
; CHECK-NEXT: [[P1_END1:%.*]] = ptrtoint ptr [[P1_END]] to i64
-; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[P1_END6]], -4
-; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[P1_START7]]
+; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[P1_END3]], -4
+; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[P1_START4]]
; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 2
; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 2
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
-; CHECK: vector.scevcheck:
-; CHECK-NEXT: [[TMP4:%.*]] = trunc i64 [[P1_END1]] to i2
-; CHECK-NEXT: [[TMP5:%.*]] = trunc i64 [[P1_START2]] to i2
-; CHECK-NEXT: [[TMP6:%.*]] = sub i2 [[TMP4]], [[TMP5]]
-; CHECK-NEXT: [[TMP7:%.*]] = zext i2 [[TMP6]] to i64
-; CHECK-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i64 [[TMP7]], 0
-; CHECK-NEXT: br i1 [[IDENT_CHECK]], label [[SCALAR_PH]], label [[VECTOR_MEMCHECK:%.*]]
; CHECK: vector.memcheck:
-; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[P1_END3]], -4
-; CHECK-NEXT: [[TMP9:%.*]] = sub i64 [[TMP8]], [[P1_START4]]
+; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[P1_END1]], -4
+; CHECK-NEXT: [[TMP9:%.*]] = sub i64 [[TMP4]], [[P1_START2]]
; CHECK-NEXT: [[TMP10:%.*]] = lshr i64 [[TMP9]], 2
; CHECK-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP10]], 2
; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[TMP11]], 4
@@ -60,8 +51,8 @@ define void @test_ptr_iv_no_inbounds(ptr %p1.start, ptr %p2.start, ptr %p1.end)
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[P1_START]], [[ENTRY:%.*]] ], [ [[P1_START]], [[VECTOR_SCEVCHECK]] ], [ [[P1_START]], [[VECTOR_MEMCHECK]] ]
-; CHECK-NEXT: [[BC_RESUME_VAL9:%.*]] = phi ptr [ [[IND_END8]], [[MIDDLE_BLOCK]] ], [ [[P2_START]], [[ENTRY]] ], [ [[P2_START]], [[VECTOR_SCEVCHECK]] ], [ [[P2_START]], [[VECTOR_MEMCHECK]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[P1_START]], [[ENTRY:%.*]] ], [ [[P1_START]], [[VECTOR_SCEVCHECK]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL9:%.*]] = phi ptr [ [[IND_END8]], [[MIDDLE_BLOCK]] ], [ [[P2_START]], [[ENTRY]] ], [ [[P2_START]], [[VECTOR_SCEVCHECK]] ]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[P1:%.*]] = phi ptr [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[P1_NEXT:%.*]], [[LOOP]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll
index 1a1c05187590e..b4b384400a37b 100644
--- a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll
+++ b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll
@@ -150,15 +150,7 @@ define dso_local signext i32 @f2(ptr noalias %A, ptr noalias %B, i32 signext %n)
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1
; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[TMP0]] to i64
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 4
-; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
-; CHECK: [[VECTOR_SCEVCHECK]]:
-; CHECK-NEXT: [[TMP1:%.*]] = add nsw i64 [[WIDE_TRIP_COUNT]], -1
-; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32
-; CHECK-NEXT: [[TMP3:%.*]] = sub i32 [[TMP0]], [[TMP2]]
-; CHECK-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP3]], [[TMP0]]
-; CHECK-NEXT: [[TMP6:%.*]] = icmp ugt i64 [[TMP1]], 4294967295
-; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP4]], [[TMP6]]
-; CHECK-NEXT: br i1 [[TMP7]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]]
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]]
; CHECK: [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 4
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]]
@@ -217,8 +209,8 @@ define dso_local signext i32 @f2(ptr noalias %A, ptr noalias %B, i32 signext %n)
; CHECK-NEXT: [[CMP_N6:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC3]]
; CHECK-NEXT: br i1 [[CMP_N6]], label %[[FOR_END_LOOPEXIT]], label %[[VEC_EPILOG_SCALAR_PH]]
; CHECK: [[VEC_EPILOG_SCALAR_PH]]:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_SCEVCHECK]] ], [ 0, %[[ITER_CHECK]] ]
-; CHECK-NEXT: [[BC_RESUME_VAL9:%.*]] = phi i32 [ [[IND_END]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END4]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_SCEVCHECK]] ], [ 0, %[[ITER_CHECK]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL9:%.*]] = phi i32 [ [[IND_END]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END4]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ]
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
; CHECK: [[FOR_BODY]]:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
@@ -913,7 +905,7 @@ exit: ; preds = %loop
; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]}
; CHECK: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]}
; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META1]], [[META2]]}
-; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]]}
+; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META2]], [[META1]]}
; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META1]], [[META2]]}
; CHECK: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]], [[META2]]}
; CHECK: [[LOOP11]] = distinct !{[[LOOP11]], [[META2]], [[META1]]}
diff --git a/llvm/test/Transforms/LoopVectorize/optsize.ll b/llvm/test/Transforms/LoopVectorize/optsize.ll
index 08903351aa2f0..a621c0827467a 100644
--- a/llvm/test/Transforms/LoopVectorize/optsize.ll
+++ b/llvm/test/Transforms/LoopVectorize/optsize.ll
@@ -416,11 +416,7 @@ define void @pr43371_pgso(i16 %val) !prof !14 {
; NPGSO-LABEL: define void @pr43371_pgso(
; NPGSO-SAME: i16 [[VAL:%.*]]) !prof [[PROF14]] {
; NPGSO-NEXT: [[ENTRY:.*:]]
-; NPGSO-NEXT: br label %[[VECTOR_SCEVCHECK:.*]]
-; NPGSO: [[VECTOR_SCEVCHECK]]:
-; NPGSO-NEXT: [[TMP0:%.*]] = add i16 [[VAL]], 755
-; NPGSO-NEXT: [[TMP4:%.*]] = icmp ult i16 [[TMP0]], [[VAL]]
-; NPGSO-NEXT: br i1 [[TMP4]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; NPGSO-NEXT: br label %[[VECTOR_PH:.*]]
; NPGSO: [[VECTOR_PH]]:
; NPGSO-NEXT: br label %[[VECTOR_BODY:.*]]
; NPGSO: [[VECTOR_BODY]]:
@@ -435,19 +431,8 @@ define void @pr43371_pgso(i16 %val) !prof !14 {
; NPGSO-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
; NPGSO: [[MIDDLE_BLOCK]]:
; NPGSO-NEXT: br label %[[FOR_COND_CLEANUP28:.*]]
-; NPGSO: [[SCALAR_PH]]:
-; NPGSO-NEXT: br label %[[FOR_BODY29:.*]]
; NPGSO: [[FOR_COND_CLEANUP28]]:
; NPGSO-NEXT: unreachable
-; NPGSO: [[FOR_BODY29]]:
-; NPGSO-NEXT: [[I24_0170:%.*]] = phi i16 [ 0, %[[SCALAR_PH]] ], [ [[INC37:%.*]], %[[FOR_BODY29]] ]
-; NPGSO-NEXT: [[ADD33:%.*]] = add i16 [[VAL]], [[I24_0170]]
-; NPGSO-NEXT: [[IDXPROM34:%.*]] = zext i16 [[ADD33]] to i32
-; NPGSO-NEXT: [[ARRAYIDX35:%.*]] = getelementptr [2592 x i16], ptr @cm_array, i32 0, i32 [[IDXPROM34]]
-; NPGSO-NEXT: store i16 0, ptr [[ARRAYIDX35]], align 1
-; NPGSO-NEXT: [[INC37]] = add i16 [[I24_0170]], 1
-; NPGSO-NEXT: [[CMP26:%.*]] = icmp ult i16 [[INC37]], 756
-; NPGSO-NEXT: br i1 [[CMP26]], label %[[FOR_BODY29]], label %[[FOR_COND_CLEANUP28]], !llvm.loop [[LOOP21:![0-9]+]]
;
; We do not want to generate SCEV predicates when optimising for size, because
; that will lead to extra code generation such as the SCEV overflow runtime
@@ -545,7 +530,7 @@ define i32 @pr45526() optsize {
; NPGSO-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
; NPGSO-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
; NPGSO-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 512
-; NPGSO-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
+; NPGSO-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
; NPGSO: [[MIDDLE_BLOCK]]:
; NPGSO-NEXT: [[TMP4:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP0]], i1 false)
; NPGSO-NEXT: [[TMP5:%.*]] = sub i64 [[TMP4]], 1
@@ -643,7 +628,7 @@ define i32 @pr45526_pgso() !prof !14 {
; NPGSO-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
; NPGSO-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i32> [[VEC_IND]], splat (i32 4)
; NPGSO-NEXT: [[TMP1:%.*]] = icmp eq i32 [[INDEX_NEXT]], 508
-; NPGSO-NEXT: br i1 [[TMP1]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]]
+; NPGSO-NEXT: br i1 [[TMP1]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
; NPGSO: [[MIDDLE_BLOCK]]:
; NPGSO-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP0]], i32 3
; NPGSO-NEXT: br label %[[SCALAR_PH:.*]]
@@ -654,7 +639,7 @@ define i32 @pr45526_pgso() !prof !14 {
; NPGSO-NEXT: [[FOR:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], %[[SCALAR_PH]] ], [ [[PIVPLUS1]], %[[LOOP]] ]
; NPGSO-NEXT: [[PIVPLUS1]] = add nuw nsw i32 [[PIV]], 1
; NPGSO-NEXT: [[COND:%.*]] = icmp ult i32 [[PIV]], 510
-; NPGSO-NEXT: br i1 [[COND]], label %[[LOOP]], label %[[EXIT:.*]], !llvm.loop [[LOOP24:![0-9]+]]
+; NPGSO-NEXT: br i1 [[COND]], label %[[LOOP]], label %[[EXIT:.*]], !llvm.loop [[LOOP23:![0-9]+]]
; NPGSO: [[EXIT]]:
; NPGSO-NEXT: [[FOR_LCSSA:%.*]] = phi i32 [ [[FOR]], %[[LOOP]] ]
; NPGSO-NEXT: ret i32 [[FOR_LCSSA]]
@@ -786,7 +771,7 @@ define void @stride1(ptr noalias %B, i32 %BStride) optsize {
; NPGSO-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
; NPGSO-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
; NPGSO-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1026
-; NPGSO-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]]
+; NPGSO-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
; NPGSO: [[MIDDLE_BLOCK]]:
; NPGSO-NEXT: br label %[[FOR_END:.*]]
; NPGSO: [[FOR_END]]:
@@ -891,7 +876,7 @@ define void @stride1_pgso(ptr noalias %B, i32 %BStride) !prof !14 {
; NPGSO-NEXT: store <2 x i16> splat (i16 42), ptr [[TMP1]], align 4
; NPGSO-NEXT: [[INDEX_NEXT]] = add nuw i32 [[TMP0]], 2
; NPGSO-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024
-; NPGSO-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
+; NPGSO-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]]
; NPGSO: [[MIDDLE_BLOCK]]:
; NPGSO-NEXT: br label %[[SCALAR_PH]]
; NPGSO: [[SCALAR_PH]]:
@@ -904,7 +889,7 @@ define void @stride1_pgso(ptr noalias %B, i32 %BStride) !prof !14 {
; NPGSO-NEXT: store i16 42, ptr [[GEPOFB]], align 4
; NPGSO-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
; NPGSO-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[IV_NEXT]], 1025
-; NPGSO-NEXT: br i1 [[EXITCOND]], label %[[FOR_END:.*]], label %[[FOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]]
+; NPGSO-NEXT: br i1 [[EXITCOND]], label %[[FOR_END:.*]], label %[[FOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
; NPGSO: [[FOR_END]]:
; NPGSO-NEXT: ret void
;
@@ -1112,11 +1097,10 @@ exit:
; NPGSO: [[LOOP18]] = distinct !{[[LOOP18]], [[META17]], [[META16]]}
; NPGSO: [[LOOP19]] = distinct !{[[LOOP19]], [[META16]], [[META17]]}
; NPGSO: [[LOOP20]] = distinct !{[[LOOP20]], [[META16]], [[META17]]}
-; NPGSO: [[LOOP21]] = distinct !{[[LOOP21]], [[META16]]}
+; NPGSO: [[LOOP21]] = distinct !{[[LOOP21]], [[META16]], [[META17]]}
; NPGSO: [[LOOP22]] = distinct !{[[LOOP22]], [[META16]], [[META17]]}
-; NPGSO: [[LOOP23]] = distinct !{[[LOOP23]], [[META16]], [[META17]]}
-; NPGSO: [[LOOP24]] = distinct !{[[LOOP24]], [[META17]], [[META16]]}
+; NPGSO: [[LOOP23]] = distinct !{[[LOOP23]], [[META17]], [[META16]]}
+; NPGSO: [[LOOP24]] = distinct !{[[LOOP24]], [[META16]], [[META17]]}
; NPGSO: [[LOOP25]] = distinct !{[[LOOP25]], [[META16]], [[META17]]}
-; NPGSO: [[LOOP26]] = distinct !{[[LOOP26]], [[META16]], [[META17]]}
-; NPGSO: [[LOOP27]] = distinct !{[[LOOP27]], [[META16]]}
+; NPGSO: [[LOOP26]] = distinct !{[[LOOP26]], [[META16]]}
;.
diff --git a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll
index d96134e8adf1d..ccae1b4842217 100644
--- a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll
@@ -290,12 +290,7 @@ define void @outside_lattice(ptr noalias %p, ptr noalias %q, i32 %n) {
; DEFAULT-NEXT: [[TMP0:%.*]] = zext i32 [[N:%.*]] to i64
; DEFAULT-NEXT: [[UMAX1:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP0]], i64 1)
; DEFAULT-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[UMAX1]], 4
-; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
-; DEFAULT: vector.scevcheck:
-; DEFAULT-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N]], i32 1)
-; DEFAULT-NEXT: [[TMP1:%.*]] = add i32 [[UMAX]], -1
-; DEFAULT-NEXT: [[TMP2:%.*]] = icmp slt i32 [[TMP1]], 0
-; DEFAULT-NEXT: br i1 [[TMP2]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; DEFAULT: vector.ph:
; DEFAULT-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[UMAX1]], 4
; DEFAULT-NEXT: [[N_VEC:%.*]] = sub i64 [[UMAX1]], [[N_MOD_VF]]
@@ -322,8 +317,8 @@ define void @outside_lattice(ptr noalias %p, ptr noalias %q, i32 %n) {
; DEFAULT-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[UMAX1]], [[N_VEC]]
; DEFAULT-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
; DEFAULT: scalar.ph:
-; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[TMP4]], [[MIDDLE_BLOCK]] ], [ [[P]], [[ENTRY:%.*]] ], [ [[P]], [[VECTOR_SCEVCHECK]] ]
-; DEFAULT-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[IND_END2]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
+; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[TMP4]], [[MIDDLE_BLOCK]] ], [ [[P]], [[ENTRY:%.*]] ]
+; DEFAULT-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[IND_END2]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
; DEFAULT-NEXT: br label [[FOR_BODY:%.*]]
; DEFAULT: for.body:
; DEFAULT-NEXT: [[IV_PTR:%.*]] = phi ptr [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_PTR_NEXT:%.*]], [[FOR_BODY]] ]
@@ -344,12 +339,7 @@ define void @outside_lattice(ptr noalias %p, ptr noalias %q, i32 %n) {
; STRIDED-NEXT: [[TMP0:%.*]] = zext i32 [[N:%.*]] to i64
; STRIDED-NEXT: [[UMAX1:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP0]], i64 1)
; STRIDED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[UMAX1]], 4
-; STRIDED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
-; STRIDED: vector.scevcheck:
-; STRIDED-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N]], i32 1)
-; STRIDED-NEXT: [[TMP1:%.*]] = add i32 [[UMAX]], -1
-; STRIDED-NEXT: [[TMP2:%.*]] = icmp slt i32 [[TMP1]], 0
-; STRIDED-NEXT: br i1 [[TMP2]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; STRIDED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; STRIDED: vector.ph:
; STRIDED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[UMAX1]], 4
; STRIDED-NEXT: [[N_VEC:%.*]] = sub i64 [[UMAX1]], [[N_MOD_VF]]
@@ -376,8 +366,8 @@ define void @outside_lattice(ptr noalias %p, ptr noalias %q, i32 %n) {
; STRIDED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[UMAX1]], [[N_VEC]]
; STRIDED-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
; STRIDED: scalar.ph:
-; STRIDED-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[TMP4]], [[MIDDLE_BLOCK]] ], [ [[P]], [[ENTRY:%.*]] ], [ [[P]], [[VECTOR_SCEVCHECK]] ]
-; STRIDED-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[IND_END2]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
+; STRIDED-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[TMP4]], [[MIDDLE_BLOCK]] ], [ [[P]], [[ENTRY:%.*]] ]
+; STRIDED-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[IND_END2]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
; STRIDED-NEXT: br label [[FOR_BODY:%.*]]
; STRIDED: for.body:
; STRIDED-NEXT: [[IV_PTR:%.*]] = phi ptr [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_PTR_NEXT:%.*]], [[FOR_BODY]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/pr30654-phiscev-sext-trunc.ll b/llvm/test/Transforms/LoopVectorize/pr30654-phiscev-sext-trunc.ll
index 623a9435edec1..1cd1b104ef651 100644
--- a/llvm/test/Transforms/LoopVectorize/pr30654-phiscev-sext-trunc.ll
+++ b/llvm/test/Transforms/LoopVectorize/pr30654-phiscev-sext-trunc.ll
@@ -40,35 +40,12 @@ define void @doit1(i32 %n, i32 %step) local_unnamed_addr {
; CHECK: for.body.preheader:
; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 4
-; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
-; CHECK: vector.scevcheck:
-; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[WIDE_TRIP_COUNT]], -1
-; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[STEP:%.*]] to i8
-; CHECK-NEXT: [[TMP2:%.*]] = sub i8 0, [[TMP1]]
-; CHECK-NEXT: [[TMP3:%.*]] = icmp slt i8 [[TMP1]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i8 [[TMP2]], i8 [[TMP1]]
-; CHECK-NEXT: [[TMP5:%.*]] = trunc i64 [[TMP0]] to i8
-; CHECK-NEXT: [[MUL:%.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 [[TMP4]], i8 [[TMP5]])
-; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i8, i1 } [[MUL]], 0
-; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i8, i1 } [[MUL]], 1
-; CHECK-NEXT: [[TMP6:%.*]] = sub i8 0, [[MUL_RESULT]]
-; CHECK-NEXT: [[TMP7:%.*]] = icmp slt i8 [[MUL_RESULT]], 0
-; CHECK-NEXT: [[TMP8:%.*]] = icmp sgt i8 [[TMP6]], 0
-; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP3]], i1 [[TMP8]], i1 [[TMP7]]
-; CHECK-NEXT: [[TMP10:%.*]] = or i1 [[TMP9]], [[MUL_OVERFLOW]]
-; CHECK-NEXT: [[TMP11:%.*]] = icmp ugt i64 [[TMP0]], 255
-; CHECK-NEXT: [[TMP12:%.*]] = icmp ne i8 [[TMP1]], 0
-; CHECK-NEXT: [[TMP13:%.*]] = and i1 [[TMP11]], [[TMP12]]
-; CHECK-NEXT: [[TMP14:%.*]] = or i1 [[TMP10]], [[TMP13]]
-; CHECK-NEXT: [[TMP15:%.*]] = sext i8 [[TMP1]] to i32
-; CHECK-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i32 [[STEP]], [[TMP15]]
-; CHECK-NEXT: [[TMP16:%.*]] = or i1 [[TMP14]], [[IDENT_CHECK]]
-; CHECK-NEXT: br i1 [[TMP16]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 4
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]]
; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i64 [[N_VEC]] to i32
-; CHECK-NEXT: [[IND_END:%.*]] = mul i32 [[DOTCAST]], [[STEP]]
+; CHECK-NEXT: [[IND_END:%.*]] = mul i32 [[DOTCAST]], [[STEP:%.*]]
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[STEP]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP19:%.*]] = mul nsw <4 x i32> <i32 0, i32 1, i32 2, i32 3>, [[DOTSPLAT]]
@@ -90,8 +67,8 @@ define void @doit1(i32 %n, i32 %step) local_unnamed_addr {
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
-; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
@@ -162,34 +139,12 @@ define void @doit2(i32 %n, i32 %step) local_unnamed_addr {
; CHECK: for.body.preheader:
; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 4
-; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
-; CHECK: vector.scevcheck:
-; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[WIDE_TRIP_COUNT]], -1
-; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[STEP:%.*]] to i8
-; CHECK-NEXT: [[TMP2:%.*]] = sub i8 0, [[TMP1]]
-; CHECK-NEXT: [[TMP3:%.*]] = icmp slt i8 [[TMP1]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i8 [[TMP2]], i8 [[TMP1]]
-; CHECK-NEXT: [[TMP5:%.*]] = trunc i64 [[TMP0]] to i8
-; CHECK-NEXT: [[MUL:%.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 [[TMP4]], i8 [[TMP5]])
-; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i8, i1 } [[MUL]], 0
-; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i8, i1 } [[MUL]], 1
-; CHECK-NEXT: [[TMP6:%.*]] = sub i8 0, [[MUL_RESULT]]
-; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i8 [[TMP6]], 0
-; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP3]], i1 [[TMP7]], i1 false
-; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP8]], [[MUL_OVERFLOW]]
-; CHECK-NEXT: [[TMP10:%.*]] = icmp ugt i64 [[TMP0]], 255
-; CHECK-NEXT: [[TMP11:%.*]] = icmp ne i8 [[TMP1]], 0
-; CHECK-NEXT: [[TMP12:%.*]] = and i1 [[TMP10]], [[TMP11]]
-; CHECK-NEXT: [[TMP13:%.*]] = or i1 [[TMP9]], [[TMP12]]
-; CHECK-NEXT: [[TMP14:%.*]] = sext i8 [[TMP1]] to i32
-; CHECK-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i32 [[STEP]], [[TMP14]]
-; CHECK-NEXT: [[TMP15:%.*]] = or i1 [[TMP13]], [[IDENT_CHECK]]
-; CHECK-NEXT: br i1 [[TMP15]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 4
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]]
; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i64 [[N_VEC]] to i32
-; CHECK-NEXT: [[IND_END:%.*]] = mul i32 [[DOTCAST]], [[STEP]]
+; CHECK-NEXT: [[IND_END:%.*]] = mul i32 [[DOTCAST]], [[STEP:%.*]]
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[STEP]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP18:%.*]] = mul nsw <4 x i32> <i32 0, i32 1, i32 2, i32 3>, [[DOTSPLAT]]
@@ -211,8 +166,8 @@ define void @doit2(i32 %n, i32 %step) local_unnamed_addr {
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
-; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
@@ -358,26 +313,7 @@ define void @doit4(i32 %n, i8 signext %cstep) local_unnamed_addr {
; CHECK: for.body.preheader:
; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 4
-; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
-; CHECK: vector.scevcheck:
-; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[WIDE_TRIP_COUNT]], -1
-; CHECK-NEXT: [[TMP1:%.*]] = sub i8 0, [[CSTEP]]
-; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i8 [[CSTEP]], 0
-; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i8 [[TMP1]], i8 [[CSTEP]]
-; CHECK-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP0]] to i8
-; CHECK-NEXT: [[MUL:%.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 [[TMP3]], i8 [[TMP4]])
-; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i8, i1 } [[MUL]], 0
-; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i8, i1 } [[MUL]], 1
-; CHECK-NEXT: [[TMP5:%.*]] = sub i8 0, [[MUL_RESULT]]
-; CHECK-NEXT: [[TMP6:%.*]] = icmp slt i8 [[MUL_RESULT]], 0
-; CHECK-NEXT: [[TMP7:%.*]] = icmp sgt i8 [[TMP5]], 0
-; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP2]], i1 [[TMP7]], i1 [[TMP6]]
-; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP8]], [[MUL_OVERFLOW]]
-; CHECK-NEXT: [[TMP10:%.*]] = icmp ugt i64 [[TMP0]], 255
-; CHECK-NEXT: [[TMP11:%.*]] = icmp ne i8 [[CSTEP]], 0
-; CHECK-NEXT: [[TMP12:%.*]] = and i1 [[TMP10]], [[TMP11]]
-; CHECK-NEXT: [[TMP13:%.*]] = or i1 [[TMP9]], [[TMP12]]
-; CHECK-NEXT: br i1 [[TMP13]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 4
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]]
@@ -404,8 +340,8 @@ define void @doit4(i32 %n, i8 signext %cstep) local_unnamed_addr {
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
-; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/pr34681.ll b/llvm/test/Transforms/LoopVectorize/pr34681.ll
index a04a4e9eea6fe..4e6809102d685 100644
--- a/llvm/test/Transforms/LoopVectorize/pr34681.ll
+++ b/llvm/test/Transforms/LoopVectorize/pr34681.ll
@@ -31,22 +31,7 @@ define i32 @foo1(i32 %N, ptr nocapture readnone %A, ptr nocapture readonly %B, i
; CHECK-NEXT: br i1 [[CMP8]], [[FOR_END:label %.*]], label %[[FOR_BODY_LR_PH:.*]]
; CHECK: [[FOR_BODY_LR_PH]]:
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 4
-; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
-; CHECK: [[VECTOR_SCEVCHECK]]:
-; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1
-; CHECK-NEXT: [[TMP1:%.*]] = sub i32 0, [[N]]
-; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i32 [[N]], 0
-; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i32 [[TMP1]], i32 [[N]]
-; CHECK-NEXT: [[MUL1:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 [[TMP3]], i32 [[TMP0]])
-; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL1]], 0
-; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL1]], 1
-; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[J]], [[MUL_RESULT]]
-; CHECK-NEXT: [[TMP5:%.*]] = sub i32 [[J]], [[MUL_RESULT]]
-; CHECK-NEXT: [[TMP6:%.*]] = icmp slt i32 [[TMP4]], [[J]]
-; CHECK-NEXT: [[TMP7:%.*]] = icmp sgt i32 [[TMP5]], [[J]]
-; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP2]], i1 [[TMP7]], i1 [[TMP6]]
-; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP8]], [[MUL_OVERFLOW]]
-; CHECK-NEXT: br i1 [[TMP9]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N]], 4
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]]
@@ -142,16 +127,7 @@ define i32 @foo2(i16 zeroext %N, ptr nocapture readnone %A, ptr nocapture readon
; CHECK-NEXT: br i1 [[CMP11]], [[FOR_END:label %.*]], label %[[FOR_BODY_LR_PH:.*]]
; CHECK: [[FOR_BODY_LR_PH]]:
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[CONV]], 4
-; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
-; CHECK: [[VECTOR_SCEVCHECK]]:
-; CHECK-NEXT: [[TMP0:%.*]] = add nsw i32 [[CONV]], -1
-; CHECK-NEXT: [[MUL1:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 [[CONV]], i32 [[TMP0]])
-; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL1]], 0
-; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL1]], 1
-; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[J]], [[MUL_RESULT]]
-; CHECK-NEXT: [[TMP3:%.*]] = icmp slt i32 [[TMP1]], [[J]]
-; CHECK-NEXT: [[TMP4:%.*]] = or i1 [[TMP3]], [[MUL_OVERFLOW]]
-; CHECK-NEXT: br i1 [[TMP4]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[CONV]], 4
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[CONV]], [[N_MOD_VF]]
diff --git a/llvm/test/Transforms/LoopVectorize/pr37248.ll b/llvm/test/Transforms/LoopVectorize/pr37248.ll
index 33b3d263e634a..338090b450071 100644
--- a/llvm/test/Transforms/LoopVectorize/pr37248.ll
+++ b/llvm/test/Transforms/LoopVectorize/pr37248.ll
@@ -20,17 +20,7 @@ define void @f1(ptr noalias %b, i1 %c, i32 %start) {
; CHECK-NEXT: [[SMIN1:%.*]] = call i32 @llvm.smin.i32(i32 [[START]], i32 1)
; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[TMP0]], [[SMIN1]]
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 2
-; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
-; CHECK: [[VECTOR_SCEVCHECK]]:
-; CHECK-NEXT: [[SMIN:%.*]] = call i32 @llvm.smin.i32(i32 [[START]], i32 1)
-; CHECK-NEXT: [[TMP2:%.*]] = sub i32 [[START]], [[SMIN]]
-; CHECK-NEXT: [[TMP3:%.*]] = trunc i32 [[START]] to i16
-; CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP2]] to i16
-; CHECK-NEXT: [[TMP5:%.*]] = sub i16 [[TMP3]], [[TMP4]]
-; CHECK-NEXT: [[TMP6:%.*]] = icmp sgt i16 [[TMP5]], [[TMP3]]
-; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i32 [[TMP2]], 65535
-; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP6]], [[TMP8]]
-; CHECK-NEXT: br i1 [[TMP9]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], 2
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF]]
@@ -40,7 +30,7 @@ define void @f1(ptr noalias %b, i1 %c, i32 %start) {
; CHECK-NEXT: [[TMP11:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT]], splat (i1 true)
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE3:.*]] ]
+; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE2:.*]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i32 [[START]], [[INDEX]]
; CHECK-NEXT: [[TMP12:%.*]] = trunc i32 [[OFFSET_IDX]] to i16
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP11]], i32 0
@@ -50,11 +40,11 @@ define void @f1(ptr noalias %b, i1 %c, i32 %start) {
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
; CHECK: [[PRED_STORE_CONTINUE]]:
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP11]], i32 1
-; CHECK-NEXT: br i1 [[TMP14]], label %[[PRED_STORE_IF2:.*]], label %[[PRED_STORE_CONTINUE3]]
-; CHECK: [[PRED_STORE_IF2]]:
+; CHECK-NEXT: br i1 [[TMP14]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2]]
+; CHECK: [[PRED_STORE_IF1]]:
; CHECK-NEXT: store i32 10, ptr [[B]], align 1
-; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE3]]
-; CHECK: [[PRED_STORE_CONTINUE3]]:
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE2]]
+; CHECK: [[PRED_STORE_CONTINUE2]]:
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x i16], ptr @a, i16 0, i16 [[TMP12]]
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i16, ptr [[TMP15]], i64 0
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i16, ptr [[TMP16]], i64 -1
@@ -98,17 +88,7 @@ define void @f2(ptr noalias %b, i1 %c, i32 %start) {
; CHECK-NEXT: [[SMIN1:%.*]] = call i32 @llvm.smin.i32(i32 [[START]], i32 1)
; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[TMP0]], [[SMIN1]]
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 2
-; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
-; CHECK: [[VECTOR_SCEVCHECK]]:
-; CHECK-NEXT: [[SMIN:%.*]] = call i32 @llvm.smin.i32(i32 [[START]], i32 1)
-; CHECK-NEXT: [[TMP2:%.*]] = sub i32 [[START]], [[SMIN]]
-; CHECK-NEXT: [[TMP3:%.*]] = trunc i32 [[START]] to i16
-; CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP2]] to i16
-; CHECK-NEXT: [[TMP5:%.*]] = sub i16 [[TMP3]], [[TMP4]]
-; CHECK-NEXT: [[TMP6:%.*]] = icmp sgt i16 [[TMP5]], [[TMP3]]
-; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i32 [[TMP2]], 65535
-; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP6]], [[TMP8]]
-; CHECK-NEXT: br i1 [[TMP9]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], 2
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF]]
diff --git a/llvm/test/Transforms/LoopVectorize/pr45259.ll b/llvm/test/Transforms/LoopVectorize/pr45259.ll
index 7a048a9a607ba..d58cfe8fe9ea6 100644
--- a/llvm/test/Transforms/LoopVectorize/pr45259.ll
+++ b/llvm/test/Transforms/LoopVectorize/pr45259.ll
@@ -20,16 +20,7 @@ define i8 @widget(ptr %arr, i8 %t9) {
; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[T1_0_LCSSA2]] to i32
; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP1]], [[TMP2]]
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP3]], 4
-; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
-; CHECK: vector.scevcheck:
-; CHECK-NEXT: [[TMP4:%.*]] = sub i64 -1, [[ARR1]]
-; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP4]], [[T1_0_LCSSA2]]
-; CHECK-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i8
-; CHECK-NEXT: [[TMP7:%.*]] = add i8 1, [[TMP6]]
-; CHECK-NEXT: [[TMP8:%.*]] = icmp slt i8 [[TMP7]], 1
-; CHECK-NEXT: [[TMP9:%.*]] = icmp ugt i64 [[TMP5]], 255
-; CHECK-NEXT: [[TMP10:%.*]] = or i1 [[TMP8]], [[TMP9]]
-; CHECK-NEXT: br i1 [[TMP10]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP3]], 4
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP3]], [[N_MOD_VF]]
@@ -54,7 +45,7 @@ define i8 @widget(ptr %arr, i8 %t9) {
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP3]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_PREHEADER]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_PREHEADER]] ]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc-and-loop-metadata.ll b/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc-and-loop-metadata.ll
index 239ad5b29cec5..70e6699087a08 100644
--- a/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc-and-loop-metadata.ll
+++ b/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc-and-loop-metadata.ll
@@ -366,14 +366,7 @@ define void @scalar_cast_dbg(ptr nocapture %a, i32 %start, i64 %k) {
; CHECK-SAME: ptr captures(none) [[A:%.*]], i32 [[START:%.*]], i64 [[K:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[K]], 4
-; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
-; CHECK: [[VECTOR_SCEVCHECK]]:
-; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[K]], -1
-; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
-; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i32 [[TMP1]], 0
-; CHECK-NEXT: [[TMP3:%.*]] = icmp ugt i64 [[TMP0]], 4294967295
-; CHECK-NEXT: [[TMP4:%.*]] = or i1 [[TMP2]], [[TMP3]]
-; CHECK-NEXT: br i1 [[TMP4]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[K]], 4
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[K]], [[N_MOD_VF]]
@@ -392,7 +385,7 @@ define void @scalar_cast_dbg(ptr nocapture %a, i32 %start, i64 %k) {
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[K]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK: [[SCALAR_PH]]:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
@@ -409,33 +402,26 @@ define void @scalar_cast_dbg(ptr nocapture %a, i32 %start, i64 %k) {
; DEBUGLOC-SAME: ptr captures(none) [[A:%.*]], i32 [[START:%.*]], i64 [[K:%.*]]) !dbg [[DBG67:![0-9]+]] {
; DEBUGLOC-NEXT: [[ENTRY:.*]]:
; DEBUGLOC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[K]], 4, !dbg [[DBG74:![0-9]+]]
-; DEBUGLOC-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]], !dbg [[DBG74]]
-; DEBUGLOC: [[VECTOR_SCEVCHECK]]:
-; DEBUGLOC-NEXT: [[TMP0:%.*]] = add i64 [[K]], -1, !dbg [[DBG74]]
-; DEBUGLOC-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32, !dbg [[DBG74]]
-; DEBUGLOC-NEXT: [[TMP2:%.*]] = icmp slt i32 [[TMP1]], 0, !dbg [[DBG74]]
-; DEBUGLOC-NEXT: [[TMP3:%.*]] = icmp ugt i64 [[TMP0]], 4294967295, !dbg [[DBG74]]
-; DEBUGLOC-NEXT: [[TMP4:%.*]] = or i1 [[TMP2]], [[TMP3]], !dbg [[DBG74]]
-; DEBUGLOC-NEXT: br i1 [[TMP4]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]], !dbg [[DBG75:![0-9]+]]
+; DEBUGLOC-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]], !dbg [[DBG74]]
; DEBUGLOC: [[VECTOR_PH]]:
; DEBUGLOC-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[K]], 4
; DEBUGLOC-NEXT: [[N_VEC:%.*]] = sub i64 [[K]], [[N_MOD_VF]]
-; DEBUGLOC-NEXT: br label %[[VECTOR_BODY:.*]], !dbg [[DBG75]]
+; DEBUGLOC-NEXT: br label %[[VECTOR_BODY:.*]], !dbg [[DBG74]]
; DEBUGLOC: [[VECTOR_BODY]]:
-; DEBUGLOC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ], !dbg [[DBG75]]
+; DEBUGLOC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ], !dbg [[DBG75:![0-9]+]]
; DEBUGLOC-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ], !dbg [[DBG76:![0-9]+]]
-; DEBUGLOC-NEXT: [[TMP5:%.*]] = trunc i64 [[INDEX]] to i32, !dbg [[DBG76]]
-; DEBUGLOC-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP5]], !dbg [[DBG77:![0-9]+]]
-; DEBUGLOC-NEXT: store <4 x i32> [[VEC_IND]], ptr [[TMP6]], align 4, !dbg [[DBG78:![0-9]+]]
+; DEBUGLOC-NEXT: [[TMP0:%.*]] = trunc i64 [[INDEX]] to i32, !dbg [[DBG76]]
+; DEBUGLOC-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP0]], !dbg [[DBG77:![0-9]+]]
+; DEBUGLOC-NEXT: store <4 x i32> [[VEC_IND]], ptr [[TMP1]], align 4, !dbg [[DBG78:![0-9]+]]
; DEBUGLOC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4, !dbg [[DBG75]]
; DEBUGLOC-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4), !dbg [[DBG76]]
-; DEBUGLOC-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]], !dbg [[DBG79:![0-9]+]]
-; DEBUGLOC-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !dbg [[DBG79]], !llvm.loop [[LOOP80:![0-9]+]]
+; DEBUGLOC-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]], !dbg [[DBG79:![0-9]+]]
+; DEBUGLOC-NEXT: br i1 [[TMP2]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !dbg [[DBG79]], !llvm.loop [[LOOP80:![0-9]+]]
; DEBUGLOC: [[MIDDLE_BLOCK]]:
; DEBUGLOC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[K]], [[N_VEC]], !dbg [[DBG79]]
; DEBUGLOC-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]], !dbg [[DBG79]]
; DEBUGLOC: [[SCALAR_PH]]:
-; DEBUGLOC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ], !dbg [[DBG75]]
+; DEBUGLOC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], !dbg [[DBG75]]
; DEBUGLOC-NEXT: br label %[[LOOP:.*]], !dbg [[DBG74]]
; DEBUGLOC: [[LOOP]]:
; DEBUGLOC-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ], !dbg [[DBG75]]
@@ -595,7 +581,7 @@ exit:
; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]], [[META2]]}
; CHECK: [[LOOP6]] = distinct !{[[LOOP6]], [[META2]], [[META1]]}
; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META1]], [[META2]]}
-; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]]}
+; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META2]], [[META1]]}
; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META1]], [[META2]]}
; CHECK: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]]}
;.
@@ -679,7 +665,7 @@ exit:
; DEBUGLOC: [[LOOP80]] = distinct !{[[LOOP80]], [[META26]], [[META27]]}
; DEBUGLOC: [[DBG81]] = !DILocation(line: 36, column: 1, scope: [[DBG67]])
; DEBUGLOC: [[DBG82]] = !DILocation(line: 37, column: 1, scope: [[DBG67]])
-; DEBUGLOC: [[LOOP83]] = distinct !{[[LOOP83]], [[META26]]}
+; DEBUGLOC: [[LOOP83]] = distinct !{[[LOOP83]], [[META27]], [[META26]]}
; DEBUGLOC: [[DBG84]] = !DILocation(line: 39, column: 1, scope: [[DBG67]])
; DEBUGLOC: [[DBG85]] = distinct !DISubprogram(name: "widen_intrinsic_dbg", linkageName: "widen_intrinsic_dbg", scope: null, file: [[META1]], line: 40, type: [[META6]], scopeLine: 40, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: [[META0]], retainedNodes: [[META86:![0-9]+]])
; DEBUGLOC: [[META86]] = !{[[META87]], [[META88]], [[META89]], [[META90]], [[META91]], [[META92]], [[META93]]}
diff --git a/llvm/test/Transforms/LoopVectorize/reduction-minmax-users-and-predicated.ll b/llvm/test/Transforms/LoopVectorize/reduction-minmax-users-and-predicated.ll
index e4322cfcc00ac..5302bf5abb03b 100644
--- a/llvm/test/Transforms/LoopVectorize/reduction-minmax-users-and-predicated.ll
+++ b/llvm/test/Transforms/LoopVectorize/reduction-minmax-users-and-predicated.ll
@@ -525,10 +525,7 @@ define i32 @smax_reduction_multiple_incoming(ptr %src, i32 %n, i1 %cond) {
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], 1
; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[TMP0]], [[IV_PH]]
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 4
-; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
-; CHECK: [[VECTOR_SCEVCHECK]]:
-; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i32 [[N]], [[IV_PH]]
-; CHECK-NEXT: br i1 [[TMP2]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], 4
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF]]
@@ -551,8 +548,8 @@ define i32 @smax_reduction_multiple_incoming(ptr %src, i32 %n, i1 %cond) {
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK: [[SCALAR_PH]]:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[TMP3]], %[[MIDDLE_BLOCK]] ], [ [[IV_PH]], %[[LOOP_HEADER_PREHEADER]] ], [ [[IV_PH]], %[[VECTOR_SCEVCHECK]] ]
-; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP7]], %[[MIDDLE_BLOCK]] ], [ [[MAX_PH]], %[[LOOP_HEADER_PREHEADER]] ], [ [[MAX_PH]], %[[VECTOR_SCEVCHECK]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[TMP3]], %[[MIDDLE_BLOCK]] ], [ [[IV_PH]], %[[LOOP_HEADER_PREHEADER]] ]
+; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP7]], %[[MIDDLE_BLOCK]] ], [ [[MAX_PH]], %[[LOOP_HEADER_PREHEADER]] ]
; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
; CHECK: [[LOOP_HEADER]]:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], %[[LOOP_HEADER]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/reverse_induction.ll b/llvm/test/Transforms/LoopVectorize/reverse_induction.ll
index d3e291e4f3ed2..4056563eea62d 100644
--- a/llvm/test/Transforms/LoopVectorize/reverse_induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/reverse_induction.ll
@@ -118,12 +118,7 @@ define i32 @reverse_induction_i16(i16 %startval, ptr %ptr) {
; CHECK-LABEL: define i32 @reverse_induction_i16(
; CHECK-SAME: i16 [[STARTVAL:%.*]], ptr [[PTR:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
-; CHECK-NEXT: br label %[[VECTOR_SCEVCHECK:.*]]
-; CHECK: [[VECTOR_SCEVCHECK]]:
-; CHECK-NEXT: [[TMP0:%.*]] = add i16 [[STARTVAL]], -1
-; CHECK-NEXT: [[TMP1:%.*]] = sub i16 [[TMP0]], 1023
-; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i16 [[TMP1]], [[TMP0]]
-; CHECK-NEXT: br i1 [[TMP2]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
@@ -151,22 +146,8 @@ define i32 @reverse_induction_i16(i16 %startval, ptr %ptr) {
; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP15]], [[TMP14]]
; CHECK-NEXT: [[TMP17:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]])
; CHECK-NEXT: br label %[[LOOPEND:.*]]
-; CHECK: [[SCALAR_PH]]:
-; CHECK-NEXT: br label %[[FOR_BODY:.*]]
-; CHECK: [[FOR_BODY]]:
-; CHECK-NEXT: [[ADD_I7:%.*]] = phi i16 [ [[STARTVAL]], %[[SCALAR_PH]] ], [ [[ADD_I:%.*]], %[[FOR_BODY]] ]
-; CHECK-NEXT: [[I_06:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[INC4:%.*]], %[[FOR_BODY]] ]
-; CHECK-NEXT: [[REDUX5:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[INC_REDUX:%.*]], %[[FOR_BODY]] ]
-; CHECK-NEXT: [[ADD_I]] = add i16 [[ADD_I7]], -1
-; CHECK-NEXT: [[KIND__I:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i16 [[ADD_I]]
-; CHECK-NEXT: [[TMP_I1:%.*]] = load i32, ptr [[KIND__I]], align 4
-; CHECK-NEXT: [[INC_REDUX]] = add i32 [[TMP_I1]], [[REDUX5]]
-; CHECK-NEXT: [[INC4]] = add i32 [[I_06]], 1
-; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[INC4]], 1024
-; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_BODY]], label %[[LOOPEND]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK: [[LOOPEND]]:
-; CHECK-NEXT: [[INC_REDUX_LCSSA:%.*]] = phi i32 [ [[INC_REDUX]], %[[FOR_BODY]] ], [ [[TMP17]], %[[MIDDLE_BLOCK]] ]
-; CHECK-NEXT: ret i32 [[INC_REDUX_LCSSA]]
+; CHECK-NEXT: ret i32 [[TMP17]]
;
entry:
br label %for.body
@@ -232,7 +213,7 @@ define void @reverse_forward_induction_i64_i8() {
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i8> [[STEP_ADD]], splat (i8 4)
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
-; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br label %[[WHILE_END:.*]]
; CHECK: [[WHILE_END]]:
@@ -285,7 +266,7 @@ define void @reverse_forward_induction_i64_i8_signed() {
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i8> [[STEP_ADD]], splat (i8 4)
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
-; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br label %[[WHILE_END:.*]]
; CHECK: [[WHILE_END]]:
@@ -315,7 +296,6 @@ while.end:
; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]], [[META2]]}
; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
-; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]]}
+; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]], [[META2]]}
; CHECK: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]}
-; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META1]], [[META2]]}
;.
diff --git a/llvm/test/Transforms/LoopVectorize/runtime-check-known-true.ll b/llvm/test/Transforms/LoopVectorize/runtime-check-known-true.ll
index 5a1844ac450e7..cb32a69ab4ae0 100644
--- a/llvm/test/Transforms/LoopVectorize/runtime-check-known-true.ll
+++ b/llvm/test/Transforms/LoopVectorize/runtime-check-known-true.ll
@@ -17,14 +17,7 @@ define void @test_runtime_check_known_false_after_construction(ptr %start.1, ptr
; CHECK-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP2]], 3
; CHECK-NEXT: [[TMP4:%.*]] = add nuw nsw i64 [[TMP3]], 1
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP4]], 4
-; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
-; CHECK: [[VECTOR_SCEVCHECK]]:
-; CHECK-NEXT: [[TMP5:%.*]] = trunc i64 [[END1]] to i3
-; CHECK-NEXT: [[TMP6:%.*]] = trunc i64 [[START_1_INT]] to i3
-; CHECK-NEXT: [[TMP7:%.*]] = sub i3 [[TMP5]], [[TMP6]]
-; CHECK-NEXT: [[TMP8:%.*]] = zext i3 [[TMP7]] to i64
-; CHECK-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i64 [[TMP8]], 0
-; CHECK-NEXT: br i1 [[IDENT_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP4]], 4
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP4]], [[N_MOD_VF]]
@@ -54,8 +47,8 @@ define void @test_runtime_check_known_false_after_construction(ptr %start.1, ptr
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP4]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK: [[SCALAR_PH]]:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[TMP10]], %[[MIDDLE_BLOCK]] ], [ [[START_1]], %[[ENTRY]] ], [ [[START_1]], %[[VECTOR_SCEVCHECK]] ]
-; CHECK-NEXT: [[BC_RESUME_VAL5:%.*]] = phi ptr [ [[TMP12]], %[[MIDDLE_BLOCK]] ], [ [[START_2_DIFF]], %[[ENTRY]] ], [ [[START_2_DIFF]], %[[VECTOR_SCEVCHECK]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[TMP10]], %[[MIDDLE_BLOCK]] ], [ [[START_1]], %[[ENTRY]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL5:%.*]] = phi ptr [ [[TMP12]], %[[MIDDLE_BLOCK]] ], [ [[START_2_DIFF]], %[[ENTRY]] ]
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[PTR_IV_1:%.*]] = phi ptr [ [[PTR_IV_1_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/runtime-check-small-clamped-bounds.ll b/llvm/test/Transforms/LoopVectorize/runtime-check-small-clamped-bounds.ll
index 1035642dd78e0..95c5e88f0fcfa 100644
--- a/llvm/test/Transforms/LoopVectorize/runtime-check-small-clamped-bounds.ll
+++ b/llvm/test/Transforms/LoopVectorize/runtime-check-small-clamped-bounds.ll
@@ -17,10 +17,6 @@ define void @load_clamped_index(ptr %A, ptr %B, i32 %N) {
; CHECK-NEXT: [[B1:%.*]] = ptrtoint ptr [[B:%.*]] to i64
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N:%.*]], 4
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
-; CHECK: vector.scevcheck:
-; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1
-; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[TMP0]], 3
-; CHECK-NEXT: br i1 [[TMP1]], label [[SCALAR_PH]], label [[VECTOR_MEMCHECK:%.*]]
; CHECK: vector.memcheck:
; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[B1]], [[A2]]
; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP2]], 16
@@ -44,7 +40,7 @@ define void @load_clamped_index(ptr %A, ptr %B, i32 %N) {
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
@@ -87,10 +83,6 @@ define void @store_clamped_index(ptr %A, ptr %B, i32 %N) {
; CHECK-NEXT: [[A1:%.*]] = ptrtoint ptr [[A:%.*]] to i64
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N:%.*]], 4
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
-; CHECK: vector.scevcheck:
-; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1
-; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[TMP0]], 3
-; CHECK-NEXT: br i1 [[TMP1]], label [[SCALAR_PH]], label [[VECTOR_MEMCHECK:%.*]]
; CHECK: vector.memcheck:
; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[A1]], [[B2]]
; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP2]], 16
@@ -114,7 +106,7 @@ define void @store_clamped_index(ptr %A, ptr %B, i32 %N) {
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
@@ -159,14 +151,6 @@ define void @load_clamped_index_offset_1(ptr %A, ptr %B, i32 %N) {
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], -1
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 4
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
-; CHECK: vector.scevcheck:
-; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[N]], -2
-; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i2
-; CHECK-NEXT: [[TMP3:%.*]] = add i2 1, [[TMP2]]
-; CHECK-NEXT: [[TMP4:%.*]] = icmp ult i2 [[TMP3]], 1
-; CHECK-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP1]], 3
-; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP4]], [[TMP5]]
-; CHECK-NEXT: br i1 [[TMP6]], label [[SCALAR_PH]], label [[VECTOR_MEMCHECK:%.*]]
; CHECK: vector.memcheck:
; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[B1]], [[A2]]
; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP7]], 16
@@ -192,7 +176,7 @@ define void @load_clamped_index_offset_1(ptr %A, ptr %B, i32 %N) {
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ], [ 1, [[VECTOR_SCEVCHECK]] ], [ 1, [[VECTOR_MEMCHECK]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ], [ 1, [[VECTOR_SCEVCHECK]] ]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
@@ -350,11 +334,7 @@ define void @clamped_index_equal_dependence(ptr %A, ptr %B, i32 %N) {
; CHECK-LABEL: @clamped_index_equal_dependence(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N:%.*]], 4
-; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
-; CHECK: vector.scevcheck:
-; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1
-; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[TMP0]], 3
-; CHECK-NEXT: br i1 [[TMP1]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N]], 4
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]]
@@ -373,7 +353,7 @@ define void @clamped_index_equal_dependence(ptr %A, ptr %B, i32 %N) {
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/runtime-checks-difference.ll b/llvm/test/Transforms/LoopVectorize/runtime-checks-difference.ll
index 648ebc7e6c3a5..5abb621f579bf 100644
--- a/llvm/test/Transforms/LoopVectorize/runtime-checks-difference.ll
+++ b/llvm/test/Transforms/LoopVectorize/runtime-checks-difference.ll
@@ -450,22 +450,7 @@ define void @remove_diff_checks_via_guards(i32 %x, i32 %y, ptr %A) {
; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[SMAX2]] to i32
; CHECK-NEXT: [[TMP3:%.*]] = add nuw i32 [[TMP2]], 1
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP3]], 4
-; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], [[SCALAR_PH:label %.*]], label %[[VECTOR_SCEVCHECK:.*]]
-; CHECK: [[VECTOR_SCEVCHECK]]:
-; CHECK-NEXT: [[TMP4:%.*]] = sext i32 [[X]] to i64
-; CHECK-NEXT: [[TMP5:%.*]] = add nsw i64 [[TMP4]], 1
-; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[TMP5]], i64 0)
-; CHECK-NEXT: [[TMP6:%.*]] = trunc i64 [[SMAX]] to i32
-; CHECK-NEXT: [[TMP7:%.*]] = icmp slt i32 [[TMP6]], 0
-; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[SMAX]], 4294967295
-; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]]
-; CHECK-NEXT: [[TMP10:%.*]] = trunc i64 [[SMAX]] to i32
-; CHECK-NEXT: [[TMP11:%.*]] = add i32 [[OFFSET]], [[TMP10]]
-; CHECK-NEXT: [[TMP12:%.*]] = icmp slt i32 [[TMP11]], [[OFFSET]]
-; CHECK-NEXT: [[TMP13:%.*]] = icmp ugt i64 [[SMAX]], 4294967295
-; CHECK-NEXT: [[TMP14:%.*]] = or i1 [[TMP12]], [[TMP13]]
-; CHECK-NEXT: [[TMP15:%.*]] = or i1 [[TMP9]], [[TMP14]]
-; CHECK-NEXT: br i1 [[TMP15]], [[SCALAR_PH]], label %[[VECTOR_MEMCHECK:.*]]
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], [[SCALAR_PH:label %.*]], label %[[VECTOR_MEMCHECK:.*]]
; CHECK: [[VECTOR_MEMCHECK]]:
; CHECK-NEXT: [[TMP16:%.*]] = sext i32 [[OFFSET]] to i64
; CHECK-NEXT: [[TMP17:%.*]] = shl nsw i64 [[TMP16]], 2
diff --git a/llvm/test/Transforms/LoopVectorize/scev-exit-phi-invalidation.ll b/llvm/test/Transforms/LoopVectorize/scev-exit-phi-invalidation.ll
index 479d859a9287c..527128d613624 100644
--- a/llvm/test/Transforms/LoopVectorize/scev-exit-phi-invalidation.ll
+++ b/llvm/test/Transforms/LoopVectorize/scev-exit-phi-invalidation.ll
@@ -21,38 +21,29 @@ define void @test_pr63368(i1 %c, ptr %A) {
; CHECK: middle.block:
; CHECK-NEXT: br label [[EXIT_1:%.*]]
; CHECK: exit.1:
-; CHECK-NEXT: [[SMAX1:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP0]], i32 -1)
-; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[SMAX1]], 2
-; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP2]], 4
-; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
-; CHECK: vector.scevcheck:
; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP0]], i32 -1)
-; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SMAX]], 1
-; CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i8
-; CHECK-NEXT: [[TMP5:%.*]] = add i8 1, [[TMP4]]
-; CHECK-NEXT: [[TMP6:%.*]] = icmp slt i8 [[TMP5]], 1
-; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[TMP3]], 255
-; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]]
-; CHECK-NEXT: br i1 [[TMP8]], label [[SCALAR_PH]], label [[VECTOR_PH2:%.*]]
-; CHECK: vector.ph2:
+; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[SMAX]], 2
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP2]], 4
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH1:%.*]]
+; CHECK: vector.ph1:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP2]], 4
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP2]], [[N_MOD_VF]]
-; CHECK-NEXT: [[TMP9:%.*]] = trunc i32 [[N_VEC]] to i8
-; CHECK-NEXT: br label [[VECTOR_BODY3:%.*]]
-; CHECK: vector.body3:
-; CHECK-NEXT: [[INDEX4:%.*]] = phi i32 [ 0, [[VECTOR_PH2]] ], [ [[INDEX_NEXT5:%.*]], [[VECTOR_BODY3]] ]
-; CHECK-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX4]] to i8
-; CHECK-NEXT: [[TMP10:%.*]] = add i8 [[OFFSET_IDX]], 1
-; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[A]], i8 [[TMP10]]
-; CHECK-NEXT: store <4 x i8> zeroinitializer, ptr [[TMP11]], align 1
-; CHECK-NEXT: [[INDEX_NEXT5]] = add nuw i32 [[INDEX4]], 4
-; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT5]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK6:%.*]], label [[VECTOR_BODY3]], !llvm.loop [[LOOP3:![0-9]+]]
-; CHECK: middle.block6:
+; CHECK-NEXT: [[TMP3:%.*]] = trunc i32 [[N_VEC]] to i8
+; CHECK-NEXT: br label [[VECTOR_BODY2:%.*]]
+; CHECK: vector.body2:
+; CHECK-NEXT: [[INDEX3:%.*]] = phi i32 [ 0, [[VECTOR_PH1]] ], [ [[INDEX_NEXT4:%.*]], [[VECTOR_BODY2]] ]
+; CHECK-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX3]] to i8
+; CHECK-NEXT: [[TMP4:%.*]] = add i8 [[OFFSET_IDX]], 1
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[A]], i8 [[TMP4]]
+; CHECK-NEXT: store <4 x i8> zeroinitializer, ptr [[TMP5]], align 1
+; CHECK-NEXT: [[INDEX_NEXT4]] = add nuw i32 [[INDEX3]], 4
+; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT4]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK5:%.*]], label [[VECTOR_BODY2]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK: middle.block5:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP2]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT_2:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[TMP9]], [[MIDDLE_BLOCK6]] ], [ 0, [[EXIT_1]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[TMP3]], [[MIDDLE_BLOCK5]] ], [ 0, [[EXIT_1]] ]
; CHECK-NEXT: br label [[LOOP_2:%.*]]
; CHECK: loop.2:
; CHECK-NEXT: [[IV_2:%.*]] = phi i8 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_2_NEXT:%.*]], [[LOOP_2]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/scev-predicate-reasoning.ll b/llvm/test/Transforms/LoopVectorize/scev-predicate-reasoning.ll
index 163faa2254700..39be47a179e98 100644
--- a/llvm/test/Transforms/LoopVectorize/scev-predicate-reasoning.ll
+++ b/llvm/test/Transforms/LoopVectorize/scev-predicate-reasoning.ll
@@ -6,19 +6,7 @@ define void @step_direction_unknown(i32 %arg, ptr %dst) {
; CHECK-SAME: i32 [[ARG:%.*]], ptr [[DST:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[ARG]], 1
-; CHECK-NEXT: br label %[[VECTOR_SCEVCHECK:.*]]
-; CHECK: [[VECTOR_SCEVCHECK]]:
-; CHECK-NEXT: [[TMP0:%.*]] = sub i32 -1, [[ARG]]
-; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[ADD]], 0
-; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[TMP0]], i32 [[ADD]]
-; CHECK-NEXT: [[MUL1:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 [[TMP2]], i32 1023)
-; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL1]], 0
-; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL1]], 1
-; CHECK-NEXT: [[TMP3:%.*]] = sub i32 0, [[MUL_RESULT]]
-; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt i32 [[TMP3]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[TMP1]], i1 [[TMP4]], i1 false
-; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP5]], [[MUL_OVERFLOW]]
-; CHECK-NEXT: br i1 [[TMP6]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[ADD]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
@@ -45,8 +33,9 @@ define void @step_direction_unknown(i32 %arg, ptr %dst) {
; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
; CHECK-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
-; CHECK-NEXT: br [[EXIT:label %.*]]
-; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: br label %[[EXIT:.*]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
;
entry:
%add = add i32 %arg, 1
@@ -93,7 +82,7 @@ define void @integer_induction_wraps_scev_predicate_known(i32 %x, ptr %call, ptr
; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP0]], 4
; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP5]]
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 992
-; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br label %[[SCALAR_PH:.*]]
; CHECK: [[SCALAR_PH]]:
@@ -124,7 +113,6 @@ define void @implied_wrap_predicate(ptr %A, ptr %B, ptr %C) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[A3:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT: [[C2:%.*]] = ptrtoint ptr [[C]] to i64
-; CHECK-NEXT: [[A1:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[A3]], 16
; CHECK-NEXT: [[UMAX4:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP0]], i64 add (i64 ptrtoint (ptr @h to i64), i64 1))
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[UMAX4]], -9
@@ -132,19 +120,7 @@ define void @implied_wrap_predicate(ptr %A, ptr %B, ptr %C) {
; CHECK-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP2]], 3
; CHECK-NEXT: [[TMP4:%.*]] = add nuw nsw i64 [[TMP3]], 1
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP4]], 4
-; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
-; CHECK: [[VECTOR_SCEVCHECK]]:
-; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[A1]], 16
-; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP5]], i64 add (i64 ptrtoint (ptr @h to i64), i64 1))
-; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[UMAX]], -9
-; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[TMP6]], [[A1]]
-; CHECK-NEXT: [[TMP8:%.*]] = lshr i64 [[TMP7]], 3
-; CHECK-NEXT: [[TMP9:%.*]] = trunc i64 [[TMP8]] to i16
-; CHECK-NEXT: [[TMP10:%.*]] = add i16 2, [[TMP9]]
-; CHECK-NEXT: [[TMP11:%.*]] = icmp ult i16 [[TMP10]], 2
-; CHECK-NEXT: [[TMP12:%.*]] = icmp ugt i64 [[TMP8]], 65535
-; CHECK-NEXT: [[TMP13:%.*]] = or i1 [[TMP11]], [[TMP12]]
-; CHECK-NEXT: br i1 [[TMP13]], label %[[SCALAR_PH]], label %[[VECTOR_MEMCHECK:.*]]
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
; CHECK: [[VECTOR_MEMCHECK]]:
; CHECK-NEXT: [[TMP14:%.*]] = sub i64 [[C2]], [[A3]]
; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP14]], 32
@@ -165,7 +141,7 @@ define void @implied_wrap_predicate(ptr %A, ptr %B, ptr %C) {
; CHECK-NEXT: store <4 x i64> zeroinitializer, ptr [[TMP18]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP4]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], [[EXIT:label %.*]], label %[[SCALAR_PH]]
@@ -226,7 +202,7 @@ define void @no_signed_wrap_iv_via_btc(ptr %dst, i32 %N) mustprogress {
; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP5]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP2]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label %[[OUTER_LOOPEXIT]], label %[[SCALAR_PH]]
diff --git a/llvm/test/Transforms/LoopVectorize/single_early_exit.ll b/llvm/test/Transforms/LoopVectorize/single_early_exit.ll
index ae03f2426a800..be71ea898a935 100644
--- a/llvm/test/Transforms/LoopVectorize/single_early_exit.ll
+++ b/llvm/test/Transforms/LoopVectorize/single_early_exit.ll
@@ -143,16 +143,7 @@ define i32 @diff_exit_block_needs_scev_check(i32 %end) {
; CHECK-NEXT: [[TMP1:%.*]] = zext i10 [[TMP0]] to i64
; CHECK-NEXT: [[UMAX1:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 1)
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[UMAX1]], 4
-; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
-; CHECK: vector.scevcheck:
-; CHECK-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[END_CLAMPED]], i32 1)
-; CHECK-NEXT: [[TMP2:%.*]] = add nsw i32 [[UMAX]], -1
-; CHECK-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP2]] to i8
-; CHECK-NEXT: [[TMP4:%.*]] = add i8 1, [[TMP3]]
-; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i8 [[TMP4]], 1
-; CHECK-NEXT: [[TMP6:%.*]] = icmp ugt i32 [[TMP2]], 255
-; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]]
-; CHECK-NEXT: br i1 [[TMP7]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[UMAX1]], 4
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[UMAX1]], [[N_MOD_VF]]
@@ -179,8 +170,8 @@ define i32 @diff_exit_block_needs_scev_check(i32 %end) {
; CHECK: vector.early.exit:
; CHECK-NEXT: br label [[FOUND:%.*]]
; CHECK: scalar.ph:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[TMP8]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
-; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[TMP8]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[IND:%.*]] = phi i8 [ [[IND_NEXT:%.*]], [[FOR_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
@@ -634,7 +625,7 @@ exit:
; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]], [[META2]]}
; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
-; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]]}
+; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]}
; CHECK: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]}
; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META1]], [[META2]]}
; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META2]], [[META1]]}
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-native-path-inner-loop-with-runtime-checks.ll b/llvm/test/Transforms/LoopVectorize/vplan-native-path-inner-loop-with-runtime-checks.ll
index 22cf860c8b58c..a434c86fc54dd 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-native-path-inner-loop-with-runtime-checks.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-native-path-inner-loop-with-runtime-checks.ll
@@ -7,15 +7,9 @@ define void @expand(ptr %src, ptr %dst, i64 %0) {
; CHECK-LABEL: define void @expand(
; CHECK-SAME: ptr [[SRC:%.*]], ptr [[DST:%.*]], i64 [[TMP0:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*]]:
-; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[TMP0]], 1
-; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[TMP1]], i64 1000)
-; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[SMAX]], -1
-; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[TMP2]], [[TMP0]]
+; CHECK-NEXT: [[SCEVGEP5:%.*]] = getelementptr i8, ptr [[SRC]], i64 8
; CHECK-NEXT: [[TMP4:%.*]] = shl i64 [[TMP0]], 4
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP4]]
-; CHECK-NEXT: [[TMP5:%.*]] = add nuw nsw i64 [[TMP4]], 8
-; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP5]]
-; CHECK-NEXT: [[SCEVGEP5:%.*]] = getelementptr i8, ptr [[SRC]], i64 8
; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[TMP0]], 1
; CHECK-NEXT: [[SMAX6:%.*]] = call i64 @llvm.smax.i64(i64 [[TMP6]], i64 1000)
; CHECK-NEXT: [[TMP7:%.*]] = shl i64 [[SMAX6]], 4
@@ -26,22 +20,7 @@ define void @expand(ptr %src, ptr %dst, i64 %0) {
; CHECK: [[OUTER_HEADER]]:
; CHECK-NEXT: [[OUTER_IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[OUTER_IV_NEXT:%.*]], %[[OUTER_LATCH:.*]] ]
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP8]], 4
-; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
-; CHECK: [[VECTOR_SCEVCHECK]]:
-; CHECK-NEXT: [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 [[TMP3]])
-; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i64, i1 } [[MUL]], 0
-; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i64, i1 } [[MUL]], 1
-; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[SCEVGEP]], i64 [[MUL_RESULT]]
-; CHECK-NEXT: [[TMP11:%.*]] = icmp ult ptr [[TMP10]], [[SCEVGEP]]
-; CHECK-NEXT: [[TMP12:%.*]] = or i1 [[TMP11]], [[MUL_OVERFLOW]]
-; CHECK-NEXT: [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 [[TMP3]])
-; CHECK-NEXT: [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0
-; CHECK-NEXT: [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1
-; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[SCEVGEP1]], i64 [[MUL_RESULT3]]
-; CHECK-NEXT: [[TMP15:%.*]] = icmp ult ptr [[TMP14]], [[SCEVGEP1]]
-; CHECK-NEXT: [[TMP16:%.*]] = or i1 [[TMP15]], [[MUL_OVERFLOW4]]
-; CHECK-NEXT: [[TMP17:%.*]] = or i1 [[TMP12]], [[TMP16]]
-; CHECK-NEXT: br i1 [[TMP17]], label %[[SCALAR_PH]], label %[[VECTOR_MEMCHECK:.*]]
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
; CHECK: [[VECTOR_MEMCHECK]]:
; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[SRC]], [[SCEVGEP7]]
; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[SCEVGEP]], [[SCEVGEP5]]
@@ -50,7 +29,7 @@ define void @expand(ptr %src, ptr %dst, i64 %0) {
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP8]], 4
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP8]], [[N_MOD_VF]]
-; CHECK-NEXT: [[TMP19:%.*]] = load double, ptr [[SRC]], align 8, !alias.scope [[META0:![0-9]+]], !noalias [[META3:![0-9]+]]
+; CHECK-NEXT: [[TMP5:%.*]] = load double, ptr [[SRC]], align 8, !alias.scope [[META0:![0-9]+]], !noalias [[META3:![0-9]+]]
; CHECK-NEXT: [[TMP18:%.*]] = add i64 [[TMP0]], [[N_VEC]]
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP0]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i64> [[DOTSPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
@@ -68,10 +47,10 @@ define void @expand(ptr %src, ptr %dst, i64 %0) {
; CHECK-NEXT: [[TMP26:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP23]]
; CHECK-NEXT: [[TMP33:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP25]]
; CHECK-NEXT: [[TMP28:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP27]]
-; CHECK-NEXT: store double [[TMP19]], ptr [[TMP31]], align 8, !alias.scope [[META3]]
-; CHECK-NEXT: store double [[TMP19]], ptr [[TMP26]], align 8, !alias.scope [[META3]]
-; CHECK-NEXT: store double [[TMP19]], ptr [[TMP33]], align 8, !alias.scope [[META3]]
-; CHECK-NEXT: store double [[TMP19]], ptr [[TMP28]], align 8, !alias.scope [[META3]]
+; CHECK-NEXT: store double [[TMP5]], ptr [[TMP31]], align 8, !alias.scope [[META3]]
+; CHECK-NEXT: store double [[TMP5]], ptr [[TMP26]], align 8, !alias.scope [[META3]]
+; CHECK-NEXT: store double [[TMP5]], ptr [[TMP33]], align 8, !alias.scope [[META3]]
+; CHECK-NEXT: store double [[TMP5]], ptr [[TMP28]], align 8, !alias.scope [[META3]]
; CHECK-NEXT: [[TMP29:%.*]] = or disjoint <4 x i64> [[TMP20]], splat (i64 1)
; CHECK-NEXT: [[TMP30:%.*]] = extractelement <4 x i64> [[TMP29]], i32 0
; CHECK-NEXT: [[TMP32:%.*]] = extractelement <4 x i64> [[TMP29]], i32 1
@@ -93,7 +72,7 @@ define void @expand(ptr %src, ptr %dst, i64 %0) {
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP8]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label %[[OUTER_LATCH]], label %[[SCALAR_PH]]
; CHECK: [[SCALAR_PH]]:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP18]], %[[MIDDLE_BLOCK]] ], [ [[TMP0]], %[[OUTER_HEADER]] ], [ [[TMP0]], %[[VECTOR_SCEVCHECK]] ], [ [[TMP0]], %[[VECTOR_MEMCHECK]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP18]], %[[MIDDLE_BLOCK]] ], [ [[TMP0]], %[[OUTER_HEADER]] ], [ [[TMP0]], %[[VECTOR_MEMCHECK]] ]
; CHECK-NEXT: br label %[[INNER:.*]]
; CHECK: [[INNER]]:
; CHECK-NEXT: [[INNER_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INNER_IV_NEXT:%.*]], %[[INNER]] ]
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/SROA-after-final-loop-unrolling-2.ll b/llvm/test/Transforms/PhaseOrdering/X86/SROA-after-final-loop-unrolling-2.ll
index f42101ffe89aa..d2113c72ba17a 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/SROA-after-final-loop-unrolling-2.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/SROA-after-final-loop-unrolling-2.ll
@@ -23,35 +23,61 @@ define dso_local void @foo(i32 noundef %arg, ptr noundef nonnull align 4 derefer
; CHECK-LABEL: define dso_local void @foo(
; CHECK-SAME: i32 noundef [[ARG:%.*]], ptr noundef nonnull writeonly align 4 captures(none) dereferenceable(8) [[ARG1:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[BB:.*]]:
+; CHECK-NEXT: [[I3:%.*]] = alloca [[T0:%.*]], align 8
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr nonnull [[I3]]) #[[ATTR2:[0-9]+]]
+; CHECK-NEXT: store i64 180388626456, ptr [[I3]], align 8
; CHECK-NEXT: [[I9:%.*]] = sdiv i32 [[ARG]], 128
; CHECK-NEXT: [[I10:%.*]] = shl nsw i32 [[I9]], 7
; CHECK-NEXT: [[ARG_OFF:%.*]] = add i32 [[ARG]], 127
; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i32 [[ARG_OFF]], 255
-; CHECK-NEXT: br i1 [[TMP0]], label %[[BB12:.*]], label %[[BB13:.*]]
-; CHECK: [[BB12_LOOPEXIT:.*]]:
-; CHECK-NEXT: [[I3_SROA_8_0_INSERT_EXT:%.*]] = zext i32 [[I21_3:%.*]] to i64
-; CHECK-NEXT: [[I3_SROA_8_0_INSERT_SHIFT:%.*]] = shl nuw i64 [[I3_SROA_8_0_INSERT_EXT]], 32
-; CHECK-NEXT: [[I3_SROA_0_0_INSERT_EXT:%.*]] = zext i32 [[I21_2:%.*]] to i64
-; CHECK-NEXT: [[I3_SROA_0_0_INSERT_INSERT:%.*]] = or disjoint i64 [[I3_SROA_8_0_INSERT_SHIFT]], [[I3_SROA_0_0_INSERT_EXT]]
+; CHECK-NEXT: br i1 [[TMP0]], label %[[BB12:.*]], label %[[BB13_PREHEADER:.*]]
+; CHECK: [[BB13_PREHEADER]]:
+; CHECK-NEXT: [[ARG_OFF10:%.*]] = add i32 [[ARG]], 127
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[ARG_OFF10]], 255
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[BB13:.*]], label %[[VECTOR_BODY_PREHEADER:.*]]
+; CHECK: [[VECTOR_BODY_PREHEADER]]:
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[I3]], i64 -12
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[I3]], i64 -28
+; CHECK-NEXT: [[DOTPROMOTED:%.*]] = load <4 x i32>, ptr [[TMP6]], align 4, !tbaa [[INT_TBAA5:![0-9]+]]
+; CHECK-NEXT: [[DOTPROMOTED14:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4, !tbaa [[INT_TBAA5]]
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[REVERSE915:%.*]] = phi <4 x i32> [ [[REVERSE9:%.*]], %[[VECTOR_BODY]] ], [ [[DOTPROMOTED14]], %[[VECTOR_BODY_PREHEADER]] ]
+; CHECK-NEXT: [[WIDE_LOAD13:%.*]] = phi <4 x i32> [ [[REVERSE8:%.*]], %[[VECTOR_BODY]] ], [ [[DOTPROMOTED]], %[[VECTOR_BODY_PREHEADER]] ]
+; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ], [ 0, %[[VECTOR_BODY_PREHEADER]] ]
+; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ], [ <i32 0, i32 1, i32 2, i32 3>, %[[VECTOR_BODY_PREHEADER]] ]
+; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[VEC_IND]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT: [[REVERSE8]] = mul nsw <4 x i32> [[WIDE_LOAD13]], [[TMP2]]
+; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[STEP_ADD]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT: [[REVERSE9]] = mul nsw <4 x i32> [[REVERSE915]], [[TMP3]]
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 8)
+; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[I10]]
+; CHECK-NEXT: br i1 [[TMP4]], label %[[BB12_LOOPEXIT_LOOPEXIT12:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
+; CHECK: [[BB12_LOOPEXIT_LOOPEXIT12]]:
+; CHECK-NEXT: store <4 x i32> [[REVERSE8]], ptr [[TMP6]], align 4, !tbaa [[INT_TBAA5]]
+; CHECK-NEXT: store <4 x i32> [[REVERSE9]], ptr [[TMP1]], align 4, !tbaa [[INT_TBAA5]]
+; CHECK-NEXT: br label %[[BB12_LOOPEXIT:.*]]
+; CHECK: [[BB12_LOOPEXIT]]:
+; CHECK-NEXT: [[DOTPRE:%.*]] = load i64, ptr [[I3]], align 8, !tbaa [[CHAR_TBAA13:![0-9]+]]
; CHECK-NEXT: br label %[[BB12]]
; CHECK: [[BB12]]:
-; CHECK-NEXT: [[TMP1:%.*]] = phi i64 [ [[I3_SROA_0_0_INSERT_INSERT]], %[[BB12_LOOPEXIT]] ], [ 180388626456, %[[BB]] ]
-; CHECK-NEXT: store i64 [[TMP1]], ptr [[ARG1]], align 4, !tbaa [[CHAR_TBAA5:![0-9]+]]
+; CHECK-NEXT: [[TMP5:%.*]] = phi i64 [ [[DOTPRE]], %[[BB12_LOOPEXIT]] ], [ 180388626456, %[[BB]] ]
+; CHECK-NEXT: store i64 [[TMP5]], ptr [[ARG1]], align 4, !tbaa [[CHAR_TBAA13]]
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr nonnull [[I3]]) #[[ATTR2]]
; CHECK-NEXT: ret void
; CHECK: [[BB13]]:
-; CHECK-NEXT: [[I3_SROA_8_0:%.*]] = phi i32 [ [[I21_3]], %[[BB13]] ], [ 42, %[[BB]] ]
-; CHECK-NEXT: [[I3_SROA_0_0:%.*]] = phi i32 [ [[I21_2]], %[[BB13]] ], [ 24, %[[BB]] ]
-; CHECK-NEXT: [[I4_05:%.*]] = phi i32 [ [[I24_3:%.*]], %[[BB13]] ], [ 0, %[[BB]] ]
-; CHECK-NEXT: [[I21:%.*]] = mul nsw i32 [[I3_SROA_0_0]], [[I4_05]]
-; CHECK-NEXT: [[I24:%.*]] = or disjoint i32 [[I4_05]], 1
-; CHECK-NEXT: [[I21_1:%.*]] = mul nsw i32 [[I3_SROA_8_0]], [[I24]]
-; CHECK-NEXT: [[I24_1:%.*]] = or disjoint i32 [[I4_05]], 2
-; CHECK-NEXT: [[I21_2]] = mul nsw i32 [[I21]], [[I24_1]]
-; CHECK-NEXT: [[I24_2:%.*]] = or disjoint i32 [[I4_05]], 3
-; CHECK-NEXT: [[I21_3]] = mul nsw i32 [[I21_1]], [[I24_2]]
-; CHECK-NEXT: [[I24_3]] = add nuw nsw i32 [[I4_05]], 4
+; CHECK-NEXT: [[I24_2:%.*]] = phi i32 [ [[I24_3:%.*]], %[[BB13]] ], [ 0, %[[BB13_PREHEADER]] ]
+; CHECK-NEXT: [[I15:%.*]] = and i32 [[I24_2]], 1
+; CHECK-NEXT: [[I16:%.*]] = zext nneg i32 [[I15]] to i64
+; CHECK-NEXT: [[I5_I_I:%.*]] = getelementptr inbounds nuw i32, ptr [[I3]], i64 [[I16]]
+; CHECK-NEXT: [[I21_1:%.*]] = load i32, ptr [[I5_I_I]], align 4, !tbaa [[INT_TBAA5]]
+; CHECK-NEXT: [[I21_3:%.*]] = mul nsw i32 [[I21_1]], [[I24_2]]
+; CHECK-NEXT: store i32 [[I21_3]], ptr [[I5_I_I]], align 4, !tbaa [[INT_TBAA5]]
+; CHECK-NEXT: [[I24_3]] = add nuw nsw i32 [[I24_2]], 1
; CHECK-NEXT: [[I11_NOT_3:%.*]] = icmp eq i32 [[I24_3]], [[I10]]
-; CHECK-NEXT: br i1 [[I11_NOT_3]], label %[[BB12_LOOPEXIT]], label %[[BB13]], !llvm.loop [[LOOP8:![0-9]+]]
+; CHECK-NEXT: br i1 [[I11_NOT_3]], label %[[BB12_LOOPEXIT]], label %[[BB13]], !llvm.loop [[LOOP14:![0-9]+]]
;
bb:
%i = alloca i32, align 4
@@ -168,10 +194,14 @@ attributes #3 = { nounwind }
!15 = !{!16, !16, i64 0}
!16 = !{!"long", !7, i64 0}
;.
-; CHECK: [[CHAR_TBAA5]] = !{[[META6:![0-9]+]], [[META6]], i64 0}
-; CHECK: [[META6]] = !{!"omnipotent char", [[META7:![0-9]+]], i64 0}
-; CHECK: [[META7]] = !{!"Simple C++ TBAA"}
-; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META9:![0-9]+]], [[META10:![0-9]+]]}
-; CHECK: [[META9]] = !{!"llvm.loop.mustprogress"}
-; CHECK: [[META10]] = !{!"llvm.loop.isvectorized", i32 1}
+; CHECK: [[INT_TBAA5]] = !{[[META6:![0-9]+]], [[META6]], i64 0}
+; CHECK: [[META6]] = !{!"int", [[META7:![0-9]+]], i64 0}
+; CHECK: [[META7]] = !{!"omnipotent char", [[META8:![0-9]+]], i64 0}
+; CHECK: [[META8]] = !{!"Simple C++ TBAA"}
+; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META10:![0-9]+]], [[META11:![0-9]+]], [[META12:![0-9]+]]}
+; CHECK: [[META10]] = !{!"llvm.loop.mustprogress"}
+; CHECK: [[META11]] = !{!"llvm.loop.isvectorized", i32 1}
+; CHECK: [[META12]] = !{!"llvm.loop.unroll.runtime.disable"}
+; CHECK: [[CHAR_TBAA13]] = !{[[META7]], [[META7]], i64 0}
+; CHECK: [[LOOP14]] = distinct !{[[LOOP14]], [[META10]], [[META12]], [[META11]]}
;.
More information about the llvm-commits
mailing list