[llvm] 40304d8 - Reapply "[VPlan] Remove manual region removal when simplifying for VF and UF. (#181252)" (#188589)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 26 03:14:17 PDT 2026
Author: Florian Hahn
Date: 2026-03-26T10:14:10Z
New Revision: 40304d8fefc8872dc8dd2c53f67c7303e40ea727
URL: https://github.com/llvm/llvm-project/commit/40304d8fefc8872dc8dd2c53f67c7303e40ea727
DIFF: https://github.com/llvm/llvm-project/commit/40304d8fefc8872dc8dd2c53f67c7303e40ea727.diff
LOG: Reapply "[VPlan] Remove manual region removal when simplifying for VF and UF. (#181252)" (#188589)
This reverts commit e30f9c19464bcf1bf1e9f69b63884fb78ad2d05d.
Re-land, now that the reported crash causing the revert has been fixed
as part of 77fb84889 (#187504).
Original message:
Replace manual region dissolution code in
simplifyBranchConditionForVFAndUF with using general
removeBranchOnConst. simplifyBranchConditionForVFAndUF now just creates
a (BranchOnCond true) or updates BranchOnTwoConds.
The loop then gets automatically removed by running removeBranchOnConst.
This removes a bunch of special logic to handle header phi replacements
and CFG updates. With the new code, there's no restriction on what kind
of header phi recipes the loop contains.
Note that VPEVLBasedIVRecipe needs to be marked as readnone. This is
technically unrelated, but I could not find an independent test that
would be impacted.
The code to deal with epilogue resume values now needs updating, because
we may simplify a reduction directly to the start value.
PR: https://github.com/llvm/llvm-project/pull/181252
Added:
Modified:
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
llvm/lib/Transforms/Vectorize/VPlanTransforms.h
llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll
llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll
llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll
llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll
llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-replicate-extends.ll
llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs-apple.ll
llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll
llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll
llvm/test/Transforms/LoopVectorize/RISCV/vector-loop-backedge-elimination-with-evl.ll
llvm/test/Transforms/LoopVectorize/X86/cost-model.ll
llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-ordered-reduction.ll
llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll
llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll
llvm/test/Transforms/LoopVectorize/find-last.ll
llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll
llvm/test/Transforms/LoopVectorize/load-deref-pred-poison-ub-ops-feeding-pointer.ll
llvm/test/Transforms/LoopVectorize/predicated-multiple-exits.ll
llvm/test/Transforms/LoopVectorize/reduction-minmax-users-and-predicated.ll
llvm/test/Transforms/LoopVectorize/single-early-exit-cond-poison.ll
llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-early-exit.ll
llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll
llvm/test/Transforms/PhaseOrdering/ARM/arm_var_q31.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 9c34781bb7447..b929e93ac15d6 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7421,6 +7421,10 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
VPlanTransforms::expandBranchOnTwoConds(BestVPlan);
// Convert loops with variable-length stepping after regions are dissolved.
VPlanTransforms::convertToVariableLengthStep(BestVPlan);
+ // Remove dead back-edges for single-iteration loops with BranchOnCond(true).
+ // Only process loop latches to avoid removing edges from the middle block,
+ // which may be needed for epilogue vectorization.
+ VPlanTransforms::removeBranchOnConst(BestVPlan, /*OnlyLatches=*/true);
VPlanTransforms::materializeBackedgeTakenCount(BestVPlan, VectorPH);
VPlanTransforms::materializeVectorTripCount(
BestVPlan, VectorPH, CM.foldTailByMasking(),
@@ -7428,6 +7432,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
VPlanTransforms::materializeFactors(BestVPlan, VectorPH, BestVF);
VPlanTransforms::cse(BestVPlan);
VPlanTransforms::simplifyRecipes(BestVPlan);
+ VPlanTransforms::simplifyKnownEVL(BestVPlan, BestVF, PSE);
// 0. Generate SCEV-dependent code in the entry, including TripCount, before
// making any changes to the CFG.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index a5d0077364e6b..8f937b2f8bc1f 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -128,6 +128,7 @@ bool VPRecipeBase::mayReadFromMemory() const {
return cast<VPWidenIntrinsicRecipe>(this)->mayReadFromMemory();
case VPBranchOnMaskSC:
case VPDerivedIVSC:
+ case VPCurrentIterationPHISC:
case VPFirstOrderRecurrencePHISC:
case VPReductionPHISC:
case VPPredInstPHISC:
@@ -165,6 +166,7 @@ bool VPRecipeBase::mayHaveSideEffects() const {
return cast<VPExpressionRecipe>(this)->mayHaveSideEffects();
case VPActiveLaneMaskPHISC:
case VPDerivedIVSC:
+ case VPCurrentIterationPHISC:
case VPFirstOrderRecurrencePHISC:
case VPReductionPHISC:
case VPPredInstPHISC:
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index cfc973afc7fd4..6ad48e766af69 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1580,12 +1580,6 @@ static void simplifyRecipe(VPSingleDefRecipe *Def, VPTypeAnalysis &TypeInfo) {
return;
}
- if (auto *Phi = dyn_cast<VPFirstOrderRecurrencePHIRecipe>(Def)) {
- if (Phi->getOperand(0) == Phi->getOperand(1))
- Phi->replaceAllUsesWith(Phi->getOperand(0));
- return;
- }
-
// Simplify MaskedCond with no block mask to its single operand.
if (match(Def, m_VPInstruction<VPInstruction::MaskedCond>()) &&
!cast<VPInstruction>(Def)->isMasked())
@@ -1634,9 +1628,15 @@ static void simplifyRecipe(VPSingleDefRecipe *Def, VPTypeAnalysis &TypeInfo) {
return;
}
- if (isa<VPPhi, VPWidenPHIRecipe>(Def)) {
- if (Def->getNumOperands() == 1)
+ if (isa<VPPhi, VPWidenPHIRecipe, VPHeaderPHIRecipe>(Def)) {
+ if (Def->getNumOperands() == 1) {
Def->replaceAllUsesWith(Def->getOperand(0));
+ return;
+ }
+ if (auto *Phi = dyn_cast<VPFirstOrderRecurrencePHIRecipe>(Def)) {
+ if (all_equal(Phi->incoming_values()))
+ Phi->replaceAllUsesWith(Phi->getOperand(0));
+ }
return;
}
@@ -2208,72 +2208,16 @@ static bool simplifyBranchConditionForVFAndUF(VPlan &Plan, ElementCount BestVF,
return false;
}
- // The vector loop region only executes once. If possible, completely remove
- // the region, otherwise replace the terminator controlling the latch with
- // (BranchOnCond true).
- // TODO: VPWidenIntOrFpInductionRecipe is only partially supported; add
- // support for other non-canonical widen induction recipes (e.g.,
- // VPWidenPointerInductionRecipe).
- // TODO: fold branch-on-constant after dissolving region.
- auto *Header = cast<VPBasicBlock>(VectorRegion->getEntry());
- if (all_of(Header->phis(), [](VPRecipeBase &Phi) {
- if (auto *R = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi))
- return R->isCanonical();
- return isa<VPCanonicalIVPHIRecipe, VPCurrentIterationPHIRecipe,
- VPFirstOrderRecurrencePHIRecipe, VPPhi>(&Phi);
- })) {
- for (VPRecipeBase &HeaderR : make_early_inc_range(Header->phis())) {
- if (auto *R = dyn_cast<VPWidenIntOrFpInductionRecipe>(&HeaderR)) {
- VPBuilder Builder(Plan.getVectorPreheader());
- VPValue *StepV = Builder.createNaryOp(VPInstruction::StepVector, {},
- R->getScalarType());
- HeaderR.getVPSingleValue()->replaceAllUsesWith(StepV);
- HeaderR.eraseFromParent();
- continue;
- }
- auto *Phi = cast<VPPhiAccessors>(&HeaderR);
- HeaderR.getVPSingleValue()->replaceAllUsesWith(Phi->getIncomingValue(0));
- HeaderR.eraseFromParent();
- }
-
- VPBlockBase *Preheader = VectorRegion->getSinglePredecessor();
- SmallVector<VPBlockBase *> Exits = to_vector(VectorRegion->getSuccessors());
- VPBlockUtils::disconnectBlocks(Preheader, VectorRegion);
- for (VPBlockBase *Exit : Exits)
- VPBlockUtils::disconnectBlocks(VectorRegion, Exit);
-
- for (VPBlockBase *B : vp_depth_first_shallow(VectorRegion->getEntry()))
- B->setParent(nullptr);
-
- VPBlockUtils::connectBlocks(Preheader, Header);
-
- for (VPBlockBase *Exit : Exits)
- VPBlockUtils::connectBlocks(ExitingVPBB, Exit);
-
- // Replace terminating branch-on-two-conds with branch-on-cond to early
- // exit.
- if (Exits.size() != 1) {
- assert(match(Term, m_BranchOnTwoConds()) && Exits.size() == 2 &&
- "BranchOnTwoConds needs 2 remaining exits");
- VPBuilder(Term).createNaryOp(VPInstruction::BranchOnCond,
- Term->getOperand(0));
- }
- VPlanTransforms::simplifyRecipes(Plan);
- } else {
- // The vector region contains header phis for which we cannot remove the
- // loop region yet.
-
- // For BranchOnTwoConds, set the latch exit condition to true directly.
- if (match(Term, m_BranchOnTwoConds())) {
- Term->setOperand(1, Plan.getTrue());
- return true;
- }
-
- auto *BOC = new VPInstruction(VPInstruction::BranchOnCond, {Plan.getTrue()},
- {}, {}, Term->getDebugLoc());
- ExitingVPBB->appendRecipe(BOC);
+ // The vector loop region only executes once. Convert terminator of the
+ // exiting block to exit in the first iteration.
+ if (match(Term, m_BranchOnTwoConds())) {
+ Term->setOperand(1, Plan.getTrue());
+ return true;
}
+ auto *BOC = new VPInstruction(VPInstruction::BranchOnCond, Plan.getTrue(), {},
+ {}, Term->getDebugLoc());
+ ExitingVPBB->appendRecipe(BOC);
Term->eraseFromParent();
return true;
@@ -2281,8 +2225,8 @@ static bool simplifyBranchConditionForVFAndUF(VPlan &Plan, ElementCount BestVF,
/// From the definition of llvm.experimental.get.vector.length,
/// VPInstruction::ExplicitVectorLength(%AVL) = %AVL when %AVL <= VF.
-static bool simplifyKnownEVL(VPlan &Plan, ElementCount VF,
- PredicatedScalarEvolution &PSE) {
+bool VPlanTransforms::simplifyKnownEVL(VPlan &Plan, ElementCount VF,
+ PredicatedScalarEvolution &PSE) {
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
vp_depth_first_deep(Plan.getEntry()))) {
for (VPRecipeBase &R : *VPBB) {
@@ -2325,7 +2269,6 @@ void VPlanTransforms::optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF,
bool MadeChange = tryToReplaceALMWithWideALM(Plan, BestVF, BestUF);
MadeChange |= simplifyBranchConditionForVFAndUF(Plan, BestVF, BestUF, PSE);
MadeChange |= optimizeVectorInductionWidthForTCAndVFUF(Plan, BestVF, BestUF);
- MadeChange |= simplifyKnownEVL(Plan, BestVF, PSE);
if (MadeChange) {
Plan.setVF(BestVF);
@@ -2882,7 +2825,11 @@ void VPlanTransforms::truncateToMinimalBitwidths(
}
}
-void VPlanTransforms::removeBranchOnConst(VPlan &Plan) {
+void VPlanTransforms::removeBranchOnConst(VPlan &Plan, bool OnlyLatches) {
+ std::optional<VPDominatorTree> VPDT;
+ if (OnlyLatches)
+ VPDT.emplace(Plan);
+
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
vp_depth_first_shallow(Plan.getEntry()))) {
VPValue *Cond;
@@ -2890,6 +2837,9 @@ void VPlanTransforms::removeBranchOnConst(VPlan &Plan) {
if (VPBB->empty() || !match(&VPBB->back(), m_BranchOnCond(m_VPValue(Cond))))
continue;
+ if (OnlyLatches && !VPBlockUtils::isLatch(VPBB, *VPDT))
+ continue;
+
assert(VPBB->getNumSuccessors() == 2 &&
"Two successors expected for BranchOnCond");
unsigned RemovedIdx;
@@ -2929,7 +2879,7 @@ void VPlanTransforms::optimize(VPlan &Plan) {
RUN_VPLAN_PASS(removeRedundantExpandSCEVRecipes, Plan);
RUN_VPLAN_PASS(reassociateHeaderMask, Plan);
RUN_VPLAN_PASS(simplifyRecipes, Plan);
- RUN_VPLAN_PASS(removeBranchOnConst, Plan);
+ RUN_VPLAN_PASS(removeBranchOnConst, Plan, /*OnlyLatches=*/false);
RUN_VPLAN_PASS(removeDeadRecipes, Plan);
RUN_VPLAN_PASS(createAndOptimizeReplicateRegions, Plan);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index 21a1db22126a0..cef1b9ac8168e 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -242,6 +242,11 @@ struct VPlanTransforms {
unsigned BestUF,
PredicatedScalarEvolution &PSE);
+ /// Try to simplify VPInstruction::ExplicitVectorLength recipes when the AVL
+ /// is known to be <= VF, replacing them with the AVL directly.
+ static bool simplifyKnownEVL(VPlan &Plan, ElementCount VF,
+ PredicatedScalarEvolution &PSE);
+
/// Apply VPlan-to-VPlan optimizations to \p Plan, including induction recipe
/// optimizations, dead recipe removal, replicate region optimizations and
/// block merging.
@@ -363,8 +368,9 @@ struct VPlanTransforms {
static void simplifyRecipes(VPlan &Plan);
/// Remove BranchOnCond recipes with true or false conditions together with
- /// removing dead edges to their successors.
- static void removeBranchOnConst(VPlan &Plan);
+ /// removing dead edges to their successors. If \p OnlyLatches is true, only
+ /// process loop latches.
+ static void removeBranchOnConst(VPlan &Plan, bool OnlyLatches = false);
/// Perform common-subexpression-elimination on \p Plan.
static void cse(VPlan &Plan);
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll b/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll
index e054c916de6e0..d54e8582676d6 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll
@@ -8,28 +8,17 @@ define void @clamped_tc_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range(1,1
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
-; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 8)
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[VAL]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP1]], i64 0
-; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[DOTSPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 8 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 8 x i64> [ [[TMP8]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]]
-; CHECK-NEXT: [[TMP10:%.*]] = shl nuw nsw <vscale x 8 x i64> [[VEC_IND]], splat (i64 3)
+; CHECK-NEXT: [[TMP10:%.*]] = shl nuw nsw <vscale x 8 x i64> [[TMP8]], splat (i64 3)
; CHECK-NEXT: [[TMP11:%.*]] = lshr <vscale x 8 x i64> [[BROADCAST_SPLAT]], [[TMP10]]
; CHECK-NEXT: [[TMP14:%.*]] = trunc <vscale x 8 x i64> [[TMP11]] to <vscale x 8 x i8>
-; CHECK-NEXT: call void @llvm.masked.store.nxv8i8.p0(<vscale x 8 x i8> [[TMP14]], ptr align 1 [[NEXT_GEP]], <vscale x 8 x i1> [[ACTIVE_LANE_MASK]])
-; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]]
-; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX_NEXT]], i64 8)
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 8 x i64> [[VEC_IND]], [[DOTSPLAT]]
-; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-NEXT: call void @llvm.masked.store.nxv8i8.p0(<vscale x 8 x i8> [[TMP14]], ptr align 1 [[DST]], <vscale x 8 x i1> [[ACTIVE_LANE_MASK_ENTRY]])
+; CHECK-NEXT: br label [[MIDDLE_BLOCK:%.*]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.cond.cleanup:
@@ -68,28 +57,17 @@ define void @clamped_tc_max_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range
; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[SHR]] to i64
; CHECK-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
-; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 [[WIDE_TRIP_COUNT]])
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[VAL]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP1]], i64 0
-; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[DOTSPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 8 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 8 x i64> [ [[TMP8]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]]
-; CHECK-NEXT: [[TMP10:%.*]] = shl nuw nsw <vscale x 8 x i64> [[VEC_IND]], splat (i64 3)
+; CHECK-NEXT: [[TMP10:%.*]] = shl nuw nsw <vscale x 8 x i64> [[TMP8]], splat (i64 3)
; CHECK-NEXT: [[TMP11:%.*]] = lshr <vscale x 8 x i64> [[BROADCAST_SPLAT]], [[TMP10]]
; CHECK-NEXT: [[TMP14:%.*]] = trunc <vscale x 8 x i64> [[TMP11]] to <vscale x 8 x i8>
-; CHECK-NEXT: call void @llvm.masked.store.nxv8i8.p0(<vscale x 8 x i8> [[TMP14]], ptr align 1 [[NEXT_GEP]], <vscale x 8 x i1> [[ACTIVE_LANE_MASK]])
-; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]]
-; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX_NEXT]], i64 [[WIDE_TRIP_COUNT]])
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 8 x i64> [[VEC_IND]], [[DOTSPLAT]]
-; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK-NEXT: call void @llvm.masked.store.nxv8i8.p0(<vscale x 8 x i8> [[TMP14]], ptr align 1 [[DST]], <vscale x 8 x i1> [[ACTIVE_LANE_MASK_ENTRY]])
+; CHECK-NEXT: br label [[MIDDLE_BLOCK:%.*]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.cond.cleanup.loopexit:
@@ -124,9 +102,3 @@ for.body: ; preds = %for.body.preheader,
for.cond.cleanup: ; preds = %for.body
ret void
}
-;.
-; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
-; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
-; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
-; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]], [[META2]]}
-;.
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll
index 38047541db5d9..e43b6cff0af8c 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll
@@ -13,11 +13,9 @@ define double @test_reduction_costs() {
; COMMON: [[VECTOR_PH]]:
; COMMON-NEXT: br label %[[VECTOR_BODY:.*]]
; COMMON: [[VECTOR_BODY]]:
-; COMMON-NEXT: [[VEC_PHI:%.*]] = phi double [ 0.000000e+00, %[[VECTOR_PH]] ], [ [[TMP0:%.*]], %[[VECTOR_BODY]] ]
-; COMMON-NEXT: [[VEC_PHI1:%.*]] = phi double [ 0.000000e+00, %[[VECTOR_PH]] ], [ [[TMP1:%.*]], %[[VECTOR_BODY]] ]
-; COMMON-NEXT: [[TMP0]] = call double @llvm.vector.reduce.fadd.v2f64(double [[VEC_PHI]], <2 x double> splat (double 3.000000e+00))
-; COMMON-NEXT: [[TMP1]] = call double @llvm.vector.reduce.fadd.v2f64(double [[VEC_PHI1]], <2 x double> splat (double 9.000000e+00))
-; COMMON-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; COMMON-NEXT: [[TMP0:%.*]] = call double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> splat (double 3.000000e+00))
+; COMMON-NEXT: [[TMP1:%.*]] = call double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> splat (double 9.000000e+00))
+; COMMON-NEXT: br label %[[MIDDLE_BLOCK:.*]]
; COMMON: [[MIDDLE_BLOCK]]:
; COMMON-NEXT: br label %[[EXIT:.*]]
; COMMON: [[EXIT]]:
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll b/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll
index 99db3ee324c27..945012311aa89 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll
@@ -464,13 +464,9 @@ define i32 @tc4(ptr noundef readonly captures(none) %tmp) vscale_range(1,16) {
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP]], i64 [[INDEX]]
-; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[ARRAYIDX1]], align 4
-; CHECK-NEXT: [[TMP3]] = add <4 x i32> [[VEC_PHI]], [[WIDE_LOAD]]
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP]], align 4
+; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> zeroinitializer, [[WIDE_LOAD]]
+; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP3]])
; CHECK-NEXT: br label %[[EXIT:.*]]
@@ -546,7 +542,6 @@ exit: ; preds = %for.body
; CHECK-VS1: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]]}
; CHECK-VS1: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]}
; CHECK-VS1: [[LOOP7]] = distinct !{[[LOOP7]], [[META1]]}
-; CHECK-VS1: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]}
; CHECK-VS1: [[PROF9]] = !{!"branch_weights", i32 10, i32 30}
;.
; CHECK-VS2: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
@@ -557,6 +552,5 @@ exit: ; preds = %for.body
; CHECK-VS2: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]]}
; CHECK-VS2: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]}
; CHECK-VS2: [[LOOP7]] = distinct !{[[LOOP7]], [[META1]]}
-; CHECK-VS2: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]}
; CHECK-VS2: [[PROF9]] = !{!"branch_weights", i32 10, i32 30}
;.
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll b/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll
index 5b6979d6b1198..7c26d0028d0d4 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll
@@ -196,165 +196,124 @@ define void @tail_predicate_without_optsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 %n)
; DEFAULT-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector <16 x i8> [[BROADCAST_SPLATINSERT5]], <16 x i8> poison, <16 x i32> zeroinitializer
; DEFAULT-NEXT: br label %[[VECTOR_BODY:.*]]
; DEFAULT: [[VECTOR_BODY]]:
-; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE35:.*]] ]
-; DEFAULT-NEXT: [[VEC_IND:%.*]] = phi <16 x i8> [ <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE35]] ]
-; DEFAULT-NEXT: [[VEC_IND1:%.*]] = phi <16 x i8> [ <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT36:%.*]], %[[PRED_STORE_CONTINUE35]] ]
-; DEFAULT-NEXT: [[TMP0:%.*]] = icmp ule <16 x i8> [[VEC_IND]], splat (i8 14)
-; DEFAULT-NEXT: [[TMP1:%.*]] = mul <16 x i8> [[BROADCAST_SPLAT]], [[VEC_IND1]]
-; DEFAULT-NEXT: [[TMP2:%.*]] = lshr <16 x i8> [[VEC_IND1]], splat (i8 1)
-; DEFAULT-NEXT: [[TMP3:%.*]] = mul <16 x i8> [[TMP2]], [[BROADCAST_SPLAT4]]
+; DEFAULT-NEXT: [[TMP1:%.*]] = mul <16 x i8> [[BROADCAST_SPLAT]], <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>
+; DEFAULT-NEXT: [[TMP3:%.*]] = mul <16 x i8> <i8 0, i8 0, i8 1, i8 1, i8 2, i8 2, i8 3, i8 3, i8 4, i8 4, i8 5, i8 5, i8 6, i8 6, i8 7, i8 7>, [[BROADCAST_SPLAT4]]
; DEFAULT-NEXT: [[TMP4:%.*]] = add <16 x i8> [[TMP3]], [[TMP1]]
-; DEFAULT-NEXT: [[TMP5:%.*]] = lshr <16 x i8> [[VEC_IND1]], splat (i8 2)
-; DEFAULT-NEXT: [[TMP6:%.*]] = mul <16 x i8> [[TMP5]], [[BROADCAST_SPLAT6]]
+; DEFAULT-NEXT: [[TMP6:%.*]] = mul <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 1, i8 1, i8 1, i8 1, i8 2, i8 2, i8 2, i8 2, i8 3, i8 3, i8 3, i8 3>, [[BROADCAST_SPLAT6]]
; DEFAULT-NEXT: [[TMP7:%.*]] = add <16 x i8> [[TMP4]], [[TMP6]]
-; DEFAULT-NEXT: [[TMP8:%.*]] = extractelement <16 x i1> [[TMP0]], i32 0
-; DEFAULT-NEXT: br i1 [[TMP8]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
+; DEFAULT-NEXT: br i1 true, label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; DEFAULT: [[PRED_STORE_IF]]:
-; DEFAULT-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 0
-; DEFAULT-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP9]]
+; DEFAULT-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 0
; DEFAULT-NEXT: [[TMP11:%.*]] = extractelement <16 x i8> [[TMP7]], i32 0
; DEFAULT-NEXT: store i8 [[TMP11]], ptr [[TMP10]], align 1
; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE]]
; DEFAULT: [[PRED_STORE_CONTINUE]]:
-; DEFAULT-NEXT: [[TMP12:%.*]] = extractelement <16 x i1> [[TMP0]], i32 1
-; DEFAULT-NEXT: br i1 [[TMP12]], label %[[PRED_STORE_IF6:.*]], label %[[PRED_STORE_CONTINUE7:.*]]
+; DEFAULT-NEXT: br i1 true, label %[[PRED_STORE_IF6:.*]], label %[[PRED_STORE_CONTINUE7:.*]]
; DEFAULT: [[PRED_STORE_IF6]]:
-; DEFAULT-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 1
-; DEFAULT-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP13]]
+; DEFAULT-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 1
; DEFAULT-NEXT: [[TMP15:%.*]] = extractelement <16 x i8> [[TMP7]], i32 1
; DEFAULT-NEXT: store i8 [[TMP15]], ptr [[TMP14]], align 1
; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE7]]
; DEFAULT: [[PRED_STORE_CONTINUE7]]:
-; DEFAULT-NEXT: [[TMP16:%.*]] = extractelement <16 x i1> [[TMP0]], i32 2
-; DEFAULT-NEXT: br i1 [[TMP16]], label %[[PRED_STORE_IF8:.*]], label %[[PRED_STORE_CONTINUE9:.*]]
+; DEFAULT-NEXT: br i1 true, label %[[PRED_STORE_IF8:.*]], label %[[PRED_STORE_CONTINUE9:.*]]
; DEFAULT: [[PRED_STORE_IF8]]:
-; DEFAULT-NEXT: [[TMP17:%.*]] = add i64 [[INDEX]], 2
-; DEFAULT-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP17]]
+; DEFAULT-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 2
; DEFAULT-NEXT: [[TMP19:%.*]] = extractelement <16 x i8> [[TMP7]], i32 2
; DEFAULT-NEXT: store i8 [[TMP19]], ptr [[TMP18]], align 1
; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE9]]
; DEFAULT: [[PRED_STORE_CONTINUE9]]:
-; DEFAULT-NEXT: [[TMP20:%.*]] = extractelement <16 x i1> [[TMP0]], i32 3
-; DEFAULT-NEXT: br i1 [[TMP20]], label %[[PRED_STORE_IF10:.*]], label %[[PRED_STORE_CONTINUE11:.*]]
+; DEFAULT-NEXT: br i1 true, label %[[PRED_STORE_IF10:.*]], label %[[PRED_STORE_CONTINUE11:.*]]
; DEFAULT: [[PRED_STORE_IF10]]:
-; DEFAULT-NEXT: [[TMP21:%.*]] = add i64 [[INDEX]], 3
-; DEFAULT-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP21]]
+; DEFAULT-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 3
; DEFAULT-NEXT: [[TMP23:%.*]] = extractelement <16 x i8> [[TMP7]], i32 3
; DEFAULT-NEXT: store i8 [[TMP23]], ptr [[TMP22]], align 1
; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE11]]
; DEFAULT: [[PRED_STORE_CONTINUE11]]:
-; DEFAULT-NEXT: [[TMP24:%.*]] = extractelement <16 x i1> [[TMP0]], i32 4
-; DEFAULT-NEXT: br i1 [[TMP24]], label %[[PRED_STORE_IF12:.*]], label %[[PRED_STORE_CONTINUE13:.*]]
+; DEFAULT-NEXT: br i1 true, label %[[PRED_STORE_IF12:.*]], label %[[PRED_STORE_CONTINUE13:.*]]
; DEFAULT: [[PRED_STORE_IF12]]:
-; DEFAULT-NEXT: [[TMP25:%.*]] = add i64 [[INDEX]], 4
-; DEFAULT-NEXT: [[TMP26:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP25]]
+; DEFAULT-NEXT: [[TMP26:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 4
; DEFAULT-NEXT: [[TMP27:%.*]] = extractelement <16 x i8> [[TMP7]], i32 4
; DEFAULT-NEXT: store i8 [[TMP27]], ptr [[TMP26]], align 1
; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE13]]
; DEFAULT: [[PRED_STORE_CONTINUE13]]:
-; DEFAULT-NEXT: [[TMP28:%.*]] = extractelement <16 x i1> [[TMP0]], i32 5
-; DEFAULT-NEXT: br i1 [[TMP28]], label %[[PRED_STORE_IF14:.*]], label %[[PRED_STORE_CONTINUE15:.*]]
+; DEFAULT-NEXT: br i1 true, label %[[PRED_STORE_IF14:.*]], label %[[PRED_STORE_CONTINUE15:.*]]
; DEFAULT: [[PRED_STORE_IF14]]:
-; DEFAULT-NEXT: [[TMP29:%.*]] = add i64 [[INDEX]], 5
-; DEFAULT-NEXT: [[TMP30:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP29]]
+; DEFAULT-NEXT: [[TMP30:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 5
; DEFAULT-NEXT: [[TMP31:%.*]] = extractelement <16 x i8> [[TMP7]], i32 5
; DEFAULT-NEXT: store i8 [[TMP31]], ptr [[TMP30]], align 1
; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE15]]
; DEFAULT: [[PRED_STORE_CONTINUE15]]:
-; DEFAULT-NEXT: [[TMP32:%.*]] = extractelement <16 x i1> [[TMP0]], i32 6
-; DEFAULT-NEXT: br i1 [[TMP32]], label %[[PRED_STORE_IF16:.*]], label %[[PRED_STORE_CONTINUE17:.*]]
+; DEFAULT-NEXT: br i1 true, label %[[PRED_STORE_IF16:.*]], label %[[PRED_STORE_CONTINUE17:.*]]
; DEFAULT: [[PRED_STORE_IF16]]:
-; DEFAULT-NEXT: [[TMP33:%.*]] = add i64 [[INDEX]], 6
-; DEFAULT-NEXT: [[TMP34:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP33]]
+; DEFAULT-NEXT: [[TMP34:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 6
; DEFAULT-NEXT: [[TMP35:%.*]] = extractelement <16 x i8> [[TMP7]], i32 6
; DEFAULT-NEXT: store i8 [[TMP35]], ptr [[TMP34]], align 1
; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE17]]
; DEFAULT: [[PRED_STORE_CONTINUE17]]:
-; DEFAULT-NEXT: [[TMP36:%.*]] = extractelement <16 x i1> [[TMP0]], i32 7
-; DEFAULT-NEXT: br i1 [[TMP36]], label %[[PRED_STORE_IF18:.*]], label %[[PRED_STORE_CONTINUE19:.*]]
+; DEFAULT-NEXT: br i1 true, label %[[PRED_STORE_IF18:.*]], label %[[PRED_STORE_CONTINUE19:.*]]
; DEFAULT: [[PRED_STORE_IF18]]:
-; DEFAULT-NEXT: [[TMP37:%.*]] = add i64 [[INDEX]], 7
-; DEFAULT-NEXT: [[TMP38:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP37]]
+; DEFAULT-NEXT: [[TMP38:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 7
; DEFAULT-NEXT: [[TMP39:%.*]] = extractelement <16 x i8> [[TMP7]], i32 7
; DEFAULT-NEXT: store i8 [[TMP39]], ptr [[TMP38]], align 1
; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE19]]
; DEFAULT: [[PRED_STORE_CONTINUE19]]:
-; DEFAULT-NEXT: [[TMP40:%.*]] = extractelement <16 x i1> [[TMP0]], i32 8
-; DEFAULT-NEXT: br i1 [[TMP40]], label %[[PRED_STORE_IF20:.*]], label %[[PRED_STORE_CONTINUE21:.*]]
+; DEFAULT-NEXT: br i1 true, label %[[PRED_STORE_IF20:.*]], label %[[PRED_STORE_CONTINUE21:.*]]
; DEFAULT: [[PRED_STORE_IF20]]:
-; DEFAULT-NEXT: [[TMP41:%.*]] = add i64 [[INDEX]], 8
-; DEFAULT-NEXT: [[TMP42:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP41]]
+; DEFAULT-NEXT: [[TMP42:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 8
; DEFAULT-NEXT: [[TMP43:%.*]] = extractelement <16 x i8> [[TMP7]], i32 8
; DEFAULT-NEXT: store i8 [[TMP43]], ptr [[TMP42]], align 1
; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE21]]
; DEFAULT: [[PRED_STORE_CONTINUE21]]:
-; DEFAULT-NEXT: [[TMP44:%.*]] = extractelement <16 x i1> [[TMP0]], i32 9
-; DEFAULT-NEXT: br i1 [[TMP44]], label %[[PRED_STORE_IF22:.*]], label %[[PRED_STORE_CONTINUE23:.*]]
+; DEFAULT-NEXT: br i1 true, label %[[PRED_STORE_IF22:.*]], label %[[PRED_STORE_CONTINUE23:.*]]
; DEFAULT: [[PRED_STORE_IF22]]:
-; DEFAULT-NEXT: [[TMP45:%.*]] = add i64 [[INDEX]], 9
-; DEFAULT-NEXT: [[TMP46:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP45]]
+; DEFAULT-NEXT: [[TMP46:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 9
; DEFAULT-NEXT: [[TMP47:%.*]] = extractelement <16 x i8> [[TMP7]], i32 9
; DEFAULT-NEXT: store i8 [[TMP47]], ptr [[TMP46]], align 1
; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE23]]
; DEFAULT: [[PRED_STORE_CONTINUE23]]:
-; DEFAULT-NEXT: [[TMP48:%.*]] = extractelement <16 x i1> [[TMP0]], i32 10
-; DEFAULT-NEXT: br i1 [[TMP48]], label %[[PRED_STORE_IF24:.*]], label %[[PRED_STORE_CONTINUE25:.*]]
+; DEFAULT-NEXT: br i1 true, label %[[PRED_STORE_IF24:.*]], label %[[PRED_STORE_CONTINUE25:.*]]
; DEFAULT: [[PRED_STORE_IF24]]:
-; DEFAULT-NEXT: [[TMP49:%.*]] = add i64 [[INDEX]], 10
-; DEFAULT-NEXT: [[TMP50:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP49]]
+; DEFAULT-NEXT: [[TMP50:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 10
; DEFAULT-NEXT: [[TMP51:%.*]] = extractelement <16 x i8> [[TMP7]], i32 10
; DEFAULT-NEXT: store i8 [[TMP51]], ptr [[TMP50]], align 1
; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE25]]
; DEFAULT: [[PRED_STORE_CONTINUE25]]:
-; DEFAULT-NEXT: [[TMP52:%.*]] = extractelement <16 x i1> [[TMP0]], i32 11
-; DEFAULT-NEXT: br i1 [[TMP52]], label %[[PRED_STORE_IF26:.*]], label %[[PRED_STORE_CONTINUE27:.*]]
+; DEFAULT-NEXT: br i1 true, label %[[PRED_STORE_IF26:.*]], label %[[PRED_STORE_CONTINUE27:.*]]
; DEFAULT: [[PRED_STORE_IF26]]:
-; DEFAULT-NEXT: [[TMP53:%.*]] = add i64 [[INDEX]], 11
-; DEFAULT-NEXT: [[TMP54:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP53]]
+; DEFAULT-NEXT: [[TMP54:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 11
; DEFAULT-NEXT: [[TMP55:%.*]] = extractelement <16 x i8> [[TMP7]], i32 11
; DEFAULT-NEXT: store i8 [[TMP55]], ptr [[TMP54]], align 1
; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE27]]
; DEFAULT: [[PRED_STORE_CONTINUE27]]:
-; DEFAULT-NEXT: [[TMP56:%.*]] = extractelement <16 x i1> [[TMP0]], i32 12
-; DEFAULT-NEXT: br i1 [[TMP56]], label %[[PRED_STORE_IF28:.*]], label %[[PRED_STORE_CONTINUE29:.*]]
+; DEFAULT-NEXT: br i1 true, label %[[PRED_STORE_IF28:.*]], label %[[PRED_STORE_CONTINUE29:.*]]
; DEFAULT: [[PRED_STORE_IF28]]:
-; DEFAULT-NEXT: [[TMP57:%.*]] = add i64 [[INDEX]], 12
-; DEFAULT-NEXT: [[TMP58:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP57]]
+; DEFAULT-NEXT: [[TMP58:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 12
; DEFAULT-NEXT: [[TMP59:%.*]] = extractelement <16 x i8> [[TMP7]], i32 12
; DEFAULT-NEXT: store i8 [[TMP59]], ptr [[TMP58]], align 1
; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE29]]
; DEFAULT: [[PRED_STORE_CONTINUE29]]:
-; DEFAULT-NEXT: [[TMP60:%.*]] = extractelement <16 x i1> [[TMP0]], i32 13
-; DEFAULT-NEXT: br i1 [[TMP60]], label %[[PRED_STORE_IF30:.*]], label %[[PRED_STORE_CONTINUE31:.*]]
+; DEFAULT-NEXT: br i1 true, label %[[PRED_STORE_IF30:.*]], label %[[PRED_STORE_CONTINUE31:.*]]
; DEFAULT: [[PRED_STORE_IF30]]:
-; DEFAULT-NEXT: [[TMP61:%.*]] = add i64 [[INDEX]], 13
-; DEFAULT-NEXT: [[TMP62:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP61]]
+; DEFAULT-NEXT: [[TMP62:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 13
; DEFAULT-NEXT: [[TMP63:%.*]] = extractelement <16 x i8> [[TMP7]], i32 13
; DEFAULT-NEXT: store i8 [[TMP63]], ptr [[TMP62]], align 1
; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE31]]
; DEFAULT: [[PRED_STORE_CONTINUE31]]:
-; DEFAULT-NEXT: [[TMP64:%.*]] = extractelement <16 x i1> [[TMP0]], i32 14
-; DEFAULT-NEXT: br i1 [[TMP64]], label %[[PRED_STORE_IF32:.*]], label %[[PRED_STORE_CONTINUE33:.*]]
+; DEFAULT-NEXT: br i1 true, label %[[PRED_STORE_IF32:.*]], label %[[PRED_STORE_CONTINUE33:.*]]
; DEFAULT: [[PRED_STORE_IF32]]:
-; DEFAULT-NEXT: [[TMP65:%.*]] = add i64 [[INDEX]], 14
-; DEFAULT-NEXT: [[TMP66:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP65]]
+; DEFAULT-NEXT: [[TMP66:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 14
; DEFAULT-NEXT: [[TMP67:%.*]] = extractelement <16 x i8> [[TMP7]], i32 14
; DEFAULT-NEXT: store i8 [[TMP67]], ptr [[TMP66]], align 1
; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE33]]
; DEFAULT: [[PRED_STORE_CONTINUE33]]:
-; DEFAULT-NEXT: [[TMP68:%.*]] = extractelement <16 x i1> [[TMP0]], i32 15
-; DEFAULT-NEXT: br i1 [[TMP68]], label %[[PRED_STORE_IF34:.*]], label %[[PRED_STORE_CONTINUE35]]
+; DEFAULT-NEXT: br i1 false, label %[[PRED_STORE_IF34:.*]], label %[[PRED_STORE_CONTINUE35:.*]]
; DEFAULT: [[PRED_STORE_IF34]]:
-; DEFAULT-NEXT: [[TMP69:%.*]] = add i64 [[INDEX]], 15
-; DEFAULT-NEXT: [[TMP70:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP69]]
+; DEFAULT-NEXT: [[TMP70:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 15
; DEFAULT-NEXT: [[TMP71:%.*]] = extractelement <16 x i8> [[TMP7]], i32 15
; DEFAULT-NEXT: store i8 [[TMP71]], ptr [[TMP70]], align 1
; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE35]]
; DEFAULT: [[PRED_STORE_CONTINUE35]]:
-; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
-; DEFAULT-NEXT: [[VEC_IND_NEXT]] = add <16 x i8> [[VEC_IND]], splat (i8 16)
-; DEFAULT-NEXT: [[VEC_IND_NEXT36]] = add <16 x i8> [[VEC_IND1]], splat (i8 16)
-; DEFAULT-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; DEFAULT-NEXT: br label %[[MIDDLE_BLOCK:.*]]
; DEFAULT: [[MIDDLE_BLOCK]]:
; DEFAULT-NEXT: br label %[[FOR_COND_CLEANUP:.*]]
; DEFAULT: [[FOR_COND_CLEANUP]]:
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-replicate-extends.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-replicate-extends.ll
index 720442afe16fd..2684e33cf051c 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-replicate-extends.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-replicate-extends.ll
@@ -15,9 +15,8 @@ define double @test(i32 %0) {
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP2:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP2]] = or <2 x i64> [[VEC_PHI]], [[BROADCAST_SPLAT]]
-; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-NEXT: [[TMP2:%.*]] = or <2 x i64> zeroinitializer, [[BROADCAST_SPLAT]]
+; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> [[TMP2]])
; CHECK-NEXT: br label %[[EXIT:.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs-apple.ll b/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs-apple.ll
index 2c82cae589036..a810ff80b1b20 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs-apple.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs-apple.ll
@@ -587,11 +587,7 @@ define double @test_scalarization_cost_for_load_of_address(ptr %src.0, ptr %src.
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_PHI:%.*]] = phi double [ 3.000000e+00, %[[VECTOR_PH]] ], [ [[TMP21:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
-; CHECK-NEXT: [[GEP_0:%.*]] = getelementptr [[T:%.*]], ptr [[SRC_0]], i64 [[INDEX]]
-; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <6 x double>, ptr [[GEP_0]], align 8
+; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <6 x double>, ptr [[SRC_0]], align 8
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <6 x double> [[WIDE_VEC]], <6 x double> poison, <2 x i32> <i32 0, i32 3>
; CHECK-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <6 x double> [[WIDE_VEC]], <6 x double> poison, <2 x i32> <i32 1, i32 4>
; CHECK-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <6 x double> [[WIDE_VEC]], <6 x double> poison, <2 x i32> <i32 2, i32 5>
@@ -600,12 +596,10 @@ define double @test_scalarization_cost_for_load_of_address(ptr %src.0, ptr %src.
; CHECK-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[STRIDED_VEC2]], splat (double 3.000000e+00)
; CHECK-NEXT: [[TMP6:%.*]] = fadd <2 x double> [[TMP3]], [[TMP4]]
; CHECK-NEXT: [[TMP7:%.*]] = fadd <2 x double> [[TMP6]], [[TMP5]]
-; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr double, ptr [[SRC_1]], i64 [[INDEX]]
-; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[GEP_SRC]], align 8
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[SRC_1]], align 8
; CHECK-NEXT: [[TMP9:%.*]] = fmul <2 x double> [[TMP7]], [[WIDE_LOAD]]
-; CHECK-NEXT: [[GEP_SRC_2:%.*]] = getelementptr [[T_2:%.*]], ptr [[SRC_2]], i64 [[INDEX]]
-; CHECK-NEXT: [[TMP11:%.*]] = getelementptr [[T_2]], ptr [[SRC_2]], i64 [[TMP1]]
-; CHECK-NEXT: [[GEP_72:%.*]] = getelementptr i8, ptr [[GEP_SRC_2]], i64 72
+; CHECK-NEXT: [[TMP11:%.*]] = getelementptr [[T_2:%.*]], ptr [[SRC_2]], i64 1
+; CHECK-NEXT: [[GEP_72:%.*]] = getelementptr i8, ptr [[SRC_2]], i64 72
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[TMP11]], i64 72
; CHECK-NEXT: [[L_P_2:%.*]] = load ptr, ptr [[GEP_72]], align 8
; CHECK-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP13]], align 8
@@ -614,9 +608,8 @@ define double @test_scalarization_cost_for_load_of_address(ptr %src.0, ptr %src.
; CHECK-NEXT: [[TMP18:%.*]] = insertelement <2 x double> poison, double [[LV]], i32 0
; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x double> [[TMP18]], double [[TMP17]], i32 1
; CHECK-NEXT: [[TMP20:%.*]] = fmul <2 x double> [[TMP9]], [[TMP19]]
-; CHECK-NEXT: [[TMP21]] = call double @llvm.vector.reduce.fadd.v2f64(double [[VEC_PHI]], <2 x double> [[TMP20]])
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
+; CHECK-NEXT: [[TMP21:%.*]] = call double @llvm.vector.reduce.fadd.v2f64(double 3.000000e+00, <2 x double> [[TMP20]])
+; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br label %[[EXIT:.*]]
; CHECK: [[EXIT]]:
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll
index 0f18b29b25d6d..e1f4ec4945c42 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll
@@ -552,11 +552,7 @@ define double @test_scalarization_cost_for_load_of_address(ptr %src.0, ptr %src.
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_PHI:%.*]] = phi double [ 3.000000e+00, %[[VECTOR_PH]] ], [ [[TMP21:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
-; CHECK-NEXT: [[GEP_0:%.*]] = getelementptr [[T:%.*]], ptr [[SRC_0]], i64 [[INDEX]]
-; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <6 x double>, ptr [[GEP_0]], align 8
+; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <6 x double>, ptr [[SRC_0]], align 8
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <6 x double> [[WIDE_VEC]], <6 x double> poison, <2 x i32> <i32 0, i32 3>
; CHECK-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <6 x double> [[WIDE_VEC]], <6 x double> poison, <2 x i32> <i32 1, i32 4>
; CHECK-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <6 x double> [[WIDE_VEC]], <6 x double> poison, <2 x i32> <i32 2, i32 5>
@@ -565,23 +561,20 @@ define double @test_scalarization_cost_for_load_of_address(ptr %src.0, ptr %src.
; CHECK-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[STRIDED_VEC2]], splat (double 3.000000e+00)
; CHECK-NEXT: [[TMP6:%.*]] = fadd <2 x double> [[TMP3]], [[TMP4]]
; CHECK-NEXT: [[TMP7:%.*]] = fadd <2 x double> [[TMP6]], [[TMP5]]
-; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr double, ptr [[SRC_1]], i64 [[INDEX]]
-; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[GEP_SRC]], align 8
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[SRC_1]], align 8
; CHECK-NEXT: [[TMP9:%.*]] = fmul <2 x double> [[TMP7]], [[WIDE_LOAD]]
-; CHECK-NEXT: [[GEP_SRC_2:%.*]] = getelementptr [[T_2:%.*]], ptr [[SRC_2]], i64 [[INDEX]]
-; CHECK-NEXT: [[TMP11:%.*]] = getelementptr [[T_2]], ptr [[SRC_2]], i64 [[TMP1]]
-; CHECK-NEXT: [[GEP_72:%.*]] = getelementptr i8, ptr [[GEP_SRC_2]], i64 72
-; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[TMP11]], i64 72
-; CHECK-NEXT: [[L_P_2:%.*]] = load ptr, ptr [[GEP_72]], align 8
-; CHECK-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP13]], align 8
-; CHECK-NEXT: [[LV:%.*]] = load double, ptr [[L_P_2]], align 8
-; CHECK-NEXT: [[TMP17:%.*]] = load double, ptr [[TMP15]], align 8
+; CHECK-NEXT: [[TMP11:%.*]] = getelementptr [[T_2:%.*]], ptr [[SRC_2]], i64 1
+; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[SRC_2]], i64 72
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[TMP11]], i64 72
+; CHECK-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8
+; CHECK-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP8]], align 8
+; CHECK-NEXT: [[LV:%.*]] = load double, ptr [[TMP13]], align 8
+; CHECK-NEXT: [[TMP17:%.*]] = load double, ptr [[TMP10]], align 8
; CHECK-NEXT: [[TMP18:%.*]] = insertelement <2 x double> poison, double [[LV]], i32 0
; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x double> [[TMP18]], double [[TMP17]], i32 1
; CHECK-NEXT: [[TMP20:%.*]] = fmul <2 x double> [[TMP9]], [[TMP19]]
-; CHECK-NEXT: [[TMP21]] = call double @llvm.vector.reduce.fadd.v2f64(double [[VEC_PHI]], <2 x double> [[TMP20]])
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
+; CHECK-NEXT: [[TMP21:%.*]] = call double @llvm.vector.reduce.fadd.v2f64(double 3.000000e+00, <2 x double> [[TMP20]])
+; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br label %[[EXIT:.*]]
; CHECK: [[EXIT]]:
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll b/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll
index f39d80b109d85..fb42f2c7fc925 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll
@@ -344,13 +344,9 @@ define i8 @mul_non_pow_2_low_trip_count(ptr noalias %a) {
; CHECK: vector.ph:
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <8 x i8> [ <i8 2, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP0]], align 1
-; CHECK-NEXT: [[TMP1]] = mul <8 x i8> [[WIDE_LOAD]], [[VEC_PHI]]
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
-; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP0:%.*]], align 1
+; CHECK-NEXT: [[TMP1:%.*]] = mul <8 x i8> [[WIDE_LOAD]], <i8 2, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+; CHECK-NEXT: br label [[MIDDLE_BLOCK:%.*]]
; CHECK: middle.block:
; CHECK-NEXT: [[TMP3:%.*]] = call i8 @llvm.vector.reduce.mul.v8i8(<8 x i8> [[TMP1]])
; CHECK-NEXT: br label [[SCALAR_PH:%.*]]
@@ -359,7 +355,7 @@ define i8 @mul_non_pow_2_low_trip_count(ptr noalias %a) {
; CHECK: for.body:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 8, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[RDX:%.*]] = phi i8 [ [[TMP3]], [[SCALAR_PH]] ], [ [[MUL:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[IV]]
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[TMP0]], i64 [[IV]]
; CHECK-NEXT: [[TMP5:%.*]] = load i8, ptr [[GEP]], align 1
; CHECK-NEXT: [[MUL]] = mul i8 [[TMP5]], [[RDX]]
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vector-loop-backedge-elimination-with-evl.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vector-loop-backedge-elimination-with-evl.ll
index f1dda3d5a2f91..a80d5dacff583 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/vector-loop-backedge-elimination-with-evl.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vector-loop-backedge-elimination-with-evl.ll
@@ -41,13 +41,9 @@ define i32 @test_remove_iv(i32 %start) #0 {
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <vscale x 4 x i32> zeroinitializer, i32 [[START]], i32 0
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ [[TMP2]], %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[AVL:%.*]] = phi i32 [ 6, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.experimental.get.vector.length.i32(i32 [[AVL]], i32 4, i1 true)
-; CHECK-NEXT: [[TMP4:%.*]] = xor <vscale x 4 x i32> [[VEC_PHI]], splat (i32 3)
-; CHECK-NEXT: [[TMP5]] = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[TMP4]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP3]])
-; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i32 [[AVL]], [[TMP3]]
-; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-NEXT: [[TMP3:%.*]] = xor <vscale x 4 x i32> [[TMP2]], splat (i32 3)
+; CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[TMP3]], <vscale x 4 x i32> [[TMP2]], i32 6)
+; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32> [[TMP5]])
; CHECK-NEXT: br label %[[EXIT:.*]]
@@ -71,8 +67,3 @@ exit:
attributes #0 = { vscale_range(2,2) }
-;.
-; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
-; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
-; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
-;.
diff --git a/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll b/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll
index 99efdf03f2912..ff9a491b25197 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll
@@ -603,19 +603,19 @@ define i64 @live_in_known_1_via_scev() {
; CHECK-NEXT: [[P:%.*]] = phi i32 [ 1, %[[ENTRY]] ]
; CHECK-NEXT: [[N:%.*]] = add nuw nsw i32 [[SEL]], 6
; CHECK-NEXT: [[P_EXT:%.*]] = zext nneg i32 [[P]] to i64
-; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
-; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ <i64 3, i64 1, i64 1, i64 1>, %[[VECTOR_PH]] ], [ [[VEC_PHI]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: br label %[[EXIT:.*]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_BODY]] ], [ [[INDEX_NEXT:%.*]], %[[EXIT]] ]
+; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ <i64 3, i64 1, i64 1, i64 1>, %[[VECTOR_BODY]] ], [ [[VEC_PHI]], %[[EXIT]] ]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[INDEX_NEXT]], 8
-; CHECK-NEXT: br i1 [[TMP0]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP0]], label %[[MIDDLE_BLOCK:.*]], label %[[EXIT]], !llvm.loop [[LOOP22:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vector.reduce.mul.v4i64(<4 x i64> [[VEC_PHI]])
-; CHECK-NEXT: br label %[[EXIT:.*]]
-; CHECK: [[EXIT]]:
+; CHECK-NEXT: br label %[[EXIT1:.*]]
+; CHECK: [[EXIT1]]:
; CHECK-NEXT: ret i64 [[TMP3]]
;
entry:
@@ -657,9 +657,8 @@ define i64 @cost_loop_invariant_recipes(i1 %x, i64 %y) {
; CHECK-NEXT: [[TMP2:%.*]] = shl <2 x i64> [[BROADCAST_SPLAT]], [[TMP1]]
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ splat (i64 1), %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP3]] = mul <2 x i64> [[TMP2]], [[VEC_PHI]]
-; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]]
+; CHECK-NEXT: [[TMP3:%.*]] = mul <2 x i64> [[TMP2]], splat (i64 1)
+; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vector.reduce.mul.v2i64(<2 x i64> [[TMP3]])
; CHECK-NEXT: br label %[[EXIT:.*]]
@@ -697,12 +696,9 @@ define i32 @narrowed_reduction(ptr %a, i1 %cmp) #0 {
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP0:%.*]] = and <16 x i32> [[VEC_PHI]], splat (i32 1)
-; CHECK-NEXT: [[TMP1:%.*]] = or <16 x i32> [[TMP0]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT: [[TMP1:%.*]] = or <16 x i32> zeroinitializer, [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP2:%.*]] = trunc <16 x i32> [[TMP1]] to <16 x i1>
-; CHECK-NEXT: [[TMP3]] = zext <16 x i1> [[TMP2]] to <16 x i32>
-; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
+; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[TMP2]])
; CHECK-NEXT: [[TMP5:%.*]] = zext i1 [[TMP4]] to i32
diff --git a/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-ordered-reduction.ll b/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-ordered-reduction.ll
index 2e946693af294..d7d321e84ddff 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-ordered-reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-ordered-reduction.ll
@@ -273,22 +273,18 @@ define float @ordered_reduction_epilogue_dead_main_loop(ptr %p, i64 %n) "prefer-
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[BOUND]], [[N_MOD_VF]]
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP0:%.*]] = getelementptr float, ptr [[P]], i64 [[INDEX]]
-; CHECK-NEXT: [[TMP1:%.*]] = getelementptr float, ptr [[TMP0]], i64 16
-; CHECK-NEXT: [[TMP2:%.*]] = getelementptr float, ptr [[TMP0]], i64 32
-; CHECK-NEXT: [[TMP3:%.*]] = getelementptr float, ptr [[TMP0]], i64 48
-; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x float>, ptr [[TMP0]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr float, ptr [[P]], i64 16
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr float, ptr [[P]], i64 32
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr float, ptr [[P]], i64 48
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x float>, ptr [[P]], align 4
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <16 x float>, ptr [[TMP1]], align 4
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x float>, ptr [[TMP2]], align 4
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <16 x float>, ptr [[TMP3]], align 4
-; CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.vector.reduce.fadd.v16f32(float [[VEC_PHI]], <16 x float> [[WIDE_LOAD]])
+; CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.vector.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> [[WIDE_LOAD]])
; CHECK-NEXT: [[TMP5:%.*]] = call float @llvm.vector.reduce.fadd.v16f32(float [[TMP4]], <16 x float> [[WIDE_LOAD1]])
; CHECK-NEXT: [[TMP6:%.*]] = call float @llvm.vector.reduce.fadd.v16f32(float [[TMP5]], <16 x float> [[WIDE_LOAD2]])
-; CHECK-NEXT: [[TMP7]] = call float @llvm.vector.reduce.fadd.v16f32(float [[TMP6]], <16 x float> [[WIDE_LOAD3]])
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 64
-; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
+; CHECK-NEXT: [[TMP7:%.*]] = call float @llvm.vector.reduce.fadd.v16f32(float [[TMP6]], <16 x float> [[WIDE_LOAD3]])
+; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[BOUND]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]]
@@ -302,13 +298,10 @@ define float @ordered_reduction_epilogue_dead_main_loop(ptr %p, i64 %n) "prefer-
; CHECK-NEXT: [[N_VEC5:%.*]] = sub i64 [[BOUND]], [[N_MOD_VF4]]
; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]]
; CHECK: [[VEC_EPILOG_VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX6:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT9:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_PHI7:%.*]] = phi float [ [[BC_MERGE_RDX]], %[[VEC_EPILOG_PH]] ], [ [[TMP9:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP8:%.*]] = getelementptr float, ptr [[P]], i64 [[INDEX6]]
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr float, ptr [[P]], i64 [[VEC_EPILOG_RESUME_VAL]]
; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <16 x float>, ptr [[TMP8]], align 4
-; CHECK-NEXT: [[TMP9]] = call float @llvm.vector.reduce.fadd.v16f32(float [[VEC_PHI7]], <16 x float> [[WIDE_LOAD8]])
-; CHECK-NEXT: [[INDEX_NEXT9]] = add nuw i64 [[INDEX6]], 16
-; CHECK-NEXT: br i1 true, label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
+; CHECK-NEXT: [[TMP9:%.*]] = call float @llvm.vector.reduce.fadd.v16f32(float [[BC_MERGE_RDX]], <16 x float> [[WIDE_LOAD8]])
+; CHECK-NEXT: br label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]]
; CHECK: [[VEC_EPILOG_MIDDLE_BLOCK]]:
; CHECK-NEXT: [[CMP_N10:%.*]] = icmp eq i64 [[BOUND]], [[N_VEC5]]
; CHECK-NEXT: br i1 [[CMP_N10]], label %[[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]]
@@ -364,22 +357,18 @@ define float @ordered_reduction_nonzero_start_dead_main_vector_loop(ptr %p, i64
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[BOUND]], [[N_MOD_VF]]
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_PHI:%.*]] = phi float [ 4.200000e+01, %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP0:%.*]] = getelementptr float, ptr [[P]], i64 [[INDEX]]
-; CHECK-NEXT: [[TMP1:%.*]] = getelementptr float, ptr [[TMP0]], i64 16
-; CHECK-NEXT: [[TMP2:%.*]] = getelementptr float, ptr [[TMP0]], i64 32
-; CHECK-NEXT: [[TMP3:%.*]] = getelementptr float, ptr [[TMP0]], i64 48
-; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x float>, ptr [[TMP0]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr float, ptr [[P]], i64 16
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr float, ptr [[P]], i64 32
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr float, ptr [[P]], i64 48
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x float>, ptr [[P]], align 4
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <16 x float>, ptr [[TMP1]], align 4
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x float>, ptr [[TMP2]], align 4
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <16 x float>, ptr [[TMP3]], align 4
-; CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.vector.reduce.fadd.v16f32(float [[VEC_PHI]], <16 x float> [[WIDE_LOAD]])
+; CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.vector.reduce.fadd.v16f32(float 4.200000e+01, <16 x float> [[WIDE_LOAD]])
; CHECK-NEXT: [[TMP5:%.*]] = call float @llvm.vector.reduce.fadd.v16f32(float [[TMP4]], <16 x float> [[WIDE_LOAD1]])
; CHECK-NEXT: [[TMP6:%.*]] = call float @llvm.vector.reduce.fadd.v16f32(float [[TMP5]], <16 x float> [[WIDE_LOAD2]])
-; CHECK-NEXT: [[TMP7]] = call float @llvm.vector.reduce.fadd.v16f32(float [[TMP6]], <16 x float> [[WIDE_LOAD3]])
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 64
-; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
+; CHECK-NEXT: [[TMP7:%.*]] = call float @llvm.vector.reduce.fadd.v16f32(float [[TMP6]], <16 x float> [[WIDE_LOAD3]])
+; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[BOUND]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]]
@@ -393,13 +382,10 @@ define float @ordered_reduction_nonzero_start_dead_main_vector_loop(ptr %p, i64
; CHECK-NEXT: [[N_VEC5:%.*]] = sub i64 [[BOUND]], [[N_MOD_VF4]]
; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]]
; CHECK: [[VEC_EPILOG_VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX6:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT9:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_PHI7:%.*]] = phi float [ [[BC_MERGE_RDX]], %[[VEC_EPILOG_PH]] ], [ [[TMP9:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP8:%.*]] = getelementptr float, ptr [[P]], i64 [[INDEX6]]
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr float, ptr [[P]], i64 [[VEC_EPILOG_RESUME_VAL]]
; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <16 x float>, ptr [[TMP8]], align 4
-; CHECK-NEXT: [[TMP9]] = call float @llvm.vector.reduce.fadd.v16f32(float [[VEC_PHI7]], <16 x float> [[WIDE_LOAD8]])
-; CHECK-NEXT: [[INDEX_NEXT9]] = add nuw i64 [[INDEX6]], 16
-; CHECK-NEXT: br i1 true, label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
+; CHECK-NEXT: [[TMP9:%.*]] = call float @llvm.vector.reduce.fadd.v16f32(float [[BC_MERGE_RDX]], <16 x float> [[WIDE_LOAD8]])
+; CHECK-NEXT: br label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]]
; CHECK: [[VEC_EPILOG_MIDDLE_BLOCK]]:
; CHECK-NEXT: [[CMP_N10:%.*]] = icmp eq i64 [[BOUND]], [[N_VEC5]]
; CHECK-NEXT: br i1 [[CMP_N10]], label %[[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]]
@@ -454,35 +440,29 @@ define { float, float } @two_ordered_reductions(ptr %p, ptr %q, i64 %n) "prefer-
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[BOUND]], [[N_MOD_VF]]
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP6:%.*]] = phi float [ 0.000000e+00, %[[VECTOR_PH]] ], [ [[TMP18:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP11:%.*]] = phi float [ 1.000000e+00, %[[VECTOR_PH]] ], [ [[TMP19:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP0:%.*]] = getelementptr float, ptr [[P]], i64 [[INDEX]]
-; CHECK-NEXT: [[TMP1:%.*]] = getelementptr float, ptr [[TMP0]], i64 16
-; CHECK-NEXT: [[TMP2:%.*]] = getelementptr float, ptr [[TMP0]], i64 32
-; CHECK-NEXT: [[TMP3:%.*]] = getelementptr float, ptr [[TMP0]], i64 48
-; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <16 x float>, ptr [[TMP0]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr float, ptr [[P]], i64 16
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr float, ptr [[P]], i64 32
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr float, ptr [[P]], i64 48
+; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <16 x float>, ptr [[P]], align 4
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x float>, ptr [[TMP1]], align 4
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <16 x float>, ptr [[TMP2]], align 4
; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <16 x float>, ptr [[TMP3]], align 4
-; CHECK-NEXT: [[TMP4:%.*]] = getelementptr float, ptr [[Q]], i64 [[INDEX]]
-; CHECK-NEXT: [[TMP5:%.*]] = getelementptr float, ptr [[TMP4]], i64 16
-; CHECK-NEXT: [[TMP10:%.*]] = getelementptr float, ptr [[TMP4]], i64 32
-; CHECK-NEXT: [[TMP20:%.*]] = getelementptr float, ptr [[TMP4]], i64 48
-; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <16 x float>, ptr [[TMP4]], align 4
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr float, ptr [[Q]], i64 16
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr float, ptr [[Q]], i64 32
+; CHECK-NEXT: [[TMP20:%.*]] = getelementptr float, ptr [[Q]], i64 48
+; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <16 x float>, ptr [[Q]], align 4
; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <16 x float>, ptr [[TMP5]], align 4
; CHECK-NEXT: [[WIDE_LOAD9:%.*]] = load <16 x float>, ptr [[TMP10]], align 4
; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <16 x float>, ptr [[TMP20]], align 4
-; CHECK-NEXT: [[TMP7:%.*]] = call float @llvm.vector.reduce.fadd.v16f32(float [[TMP6]], <16 x float> [[WIDE_LOAD1]])
+; CHECK-NEXT: [[TMP7:%.*]] = call float @llvm.vector.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> [[WIDE_LOAD1]])
; CHECK-NEXT: [[TMP8:%.*]] = call float @llvm.vector.reduce.fadd.v16f32(float [[TMP7]], <16 x float> [[WIDE_LOAD2]])
; CHECK-NEXT: [[TMP9:%.*]] = call float @llvm.vector.reduce.fadd.v16f32(float [[TMP8]], <16 x float> [[WIDE_LOAD3]])
-; CHECK-NEXT: [[TMP18]] = call float @llvm.vector.reduce.fadd.v16f32(float [[TMP9]], <16 x float> [[WIDE_LOAD5]])
-; CHECK-NEXT: [[TMP12:%.*]] = call float @llvm.vector.reduce.fadd.v16f32(float [[TMP11]], <16 x float> [[WIDE_LOAD6]])
+; CHECK-NEXT: [[TMP18:%.*]] = call float @llvm.vector.reduce.fadd.v16f32(float [[TMP9]], <16 x float> [[WIDE_LOAD5]])
+; CHECK-NEXT: [[TMP12:%.*]] = call float @llvm.vector.reduce.fadd.v16f32(float 1.000000e+00, <16 x float> [[WIDE_LOAD6]])
; CHECK-NEXT: [[TMP13:%.*]] = call float @llvm.vector.reduce.fadd.v16f32(float [[TMP12]], <16 x float> [[WIDE_LOAD7]])
; CHECK-NEXT: [[TMP21:%.*]] = call float @llvm.vector.reduce.fadd.v16f32(float [[TMP13]], <16 x float> [[WIDE_LOAD9]])
-; CHECK-NEXT: [[TMP19]] = call float @llvm.vector.reduce.fadd.v16f32(float [[TMP21]], <16 x float> [[WIDE_LOAD8]])
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 64
-; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]]
+; CHECK-NEXT: [[TMP19:%.*]] = call float @llvm.vector.reduce.fadd.v16f32(float [[TMP21]], <16 x float> [[WIDE_LOAD8]])
+; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[BOUND]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]]
@@ -497,17 +477,13 @@ define { float, float } @two_ordered_reductions(ptr %p, ptr %q, i64 %n) "prefer-
; CHECK-NEXT: [[N_VEC10:%.*]] = sub i64 [[BOUND]], [[N_MOD_VF9]]
; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]]
; CHECK: [[VEC_EPILOG_VECTOR_BODY]]:
-; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL1]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT17:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
-; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[BC_MERGE_RDX1]], %[[VEC_EPILOG_PH]] ], [ [[TMP16:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
-; CHECK-NEXT: [[BC_MERGE_RDX8:%.*]] = phi float [ [[BC_MERGE_RDX9]], %[[VEC_EPILOG_PH]] ], [ [[TMP17:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP14:%.*]] = getelementptr float, ptr [[P]], i64 [[VEC_EPILOG_RESUME_VAL]]
+; CHECK-NEXT: [[TMP14:%.*]] = getelementptr float, ptr [[P]], i64 [[VEC_EPILOG_RESUME_VAL1]]
; CHECK-NEXT: [[WIDE_LOAD11:%.*]] = load <16 x float>, ptr [[TMP14]], align 4
-; CHECK-NEXT: [[TMP15:%.*]] = getelementptr float, ptr [[Q]], i64 [[VEC_EPILOG_RESUME_VAL]]
+; CHECK-NEXT: [[TMP15:%.*]] = getelementptr float, ptr [[Q]], i64 [[VEC_EPILOG_RESUME_VAL1]]
; CHECK-NEXT: [[WIDE_LOAD12:%.*]] = load <16 x float>, ptr [[TMP15]], align 4
-; CHECK-NEXT: [[TMP16]] = call float @llvm.vector.reduce.fadd.v16f32(float [[BC_MERGE_RDX]], <16 x float> [[WIDE_LOAD11]])
-; CHECK-NEXT: [[TMP17]] = call float @llvm.vector.reduce.fadd.v16f32(float [[BC_MERGE_RDX8]], <16 x float> [[WIDE_LOAD12]])
-; CHECK-NEXT: [[INDEX_NEXT17]] = add nuw i64 [[VEC_EPILOG_RESUME_VAL]], 16
-; CHECK-NEXT: br i1 true, label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
+; CHECK-NEXT: [[TMP16:%.*]] = call float @llvm.vector.reduce.fadd.v16f32(float [[BC_MERGE_RDX1]], <16 x float> [[WIDE_LOAD11]])
+; CHECK-NEXT: [[TMP17:%.*]] = call float @llvm.vector.reduce.fadd.v16f32(float [[BC_MERGE_RDX9]], <16 x float> [[WIDE_LOAD12]])
+; CHECK-NEXT: br label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]]
; CHECK: [[VEC_EPILOG_MIDDLE_BLOCK]]:
; CHECK-NEXT: [[CMP_N13:%.*]] = icmp eq i64 [[BOUND]], [[N_VEC10]]
; CHECK-NEXT: br i1 [[CMP_N13]], label %[[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]]
diff --git a/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll b/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll
index 8f347ebf87016..40075357a38c9 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll
@@ -2455,44 +2455,22 @@ define i32 @test_non_unit_stride_five(i64 %len, ptr %test_base) {
; CHECK: vector.ph:
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP112:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP113:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP114:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP115:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 5
-; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 5
-; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 10
-; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 15
-; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 20
-; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 25
-; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 30
-; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 35
-; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[OFFSET_IDX]], 40
-; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[OFFSET_IDX]], 45
-; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[OFFSET_IDX]], 50
-; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[OFFSET_IDX]], 55
-; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[OFFSET_IDX]], 60
-; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[OFFSET_IDX]], 65
-; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], 70
-; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[OFFSET_IDX]], 75
-; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[OFFSET_IDX]]
-; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP1]]
-; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP2]]
-; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP3]]
-; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP4]]
-; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP5]]
-; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP6]]
-; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP7]]
-; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP8]]
-; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP9]]
-; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP10]]
-; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP11]]
-; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP12]]
-; CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP13]]
-; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP14]]
-; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP15]]
-; CHECK-NEXT: [[TMP32:%.*]] = load i1, ptr [[TMP16]], align 1
+; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 5
+; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 10
+; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 15
+; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 20
+; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 25
+; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 30
+; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 35
+; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 40
+; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 45
+; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 50
+; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 55
+; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 60
+; CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 65
+; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 70
+; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 75
+; CHECK-NEXT: [[TMP32:%.*]] = load i1, ptr [[TEST_BASE]], align 1
; CHECK-NEXT: [[TMP33:%.*]] = load i1, ptr [[TMP17]], align 1
; CHECK-NEXT: [[TMP34:%.*]] = load i1, ptr [[TMP18]], align 1
; CHECK-NEXT: [[TMP35:%.*]] = load i1, ptr [[TMP19]], align 1
@@ -2524,23 +2502,22 @@ define i32 @test_non_unit_stride_five(i64 %len, ptr %test_base) {
; CHECK-NEXT: [[TMP61:%.*]] = insertelement <4 x i1> [[TMP60]], i1 [[TMP57]], i32 1
; CHECK-NEXT: [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2
; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3
-; CHECK-NEXT: [[TMP64:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[OFFSET_IDX]]
-; CHECK-NEXT: [[TMP65:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP1]]
-; CHECK-NEXT: [[TMP66:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP2]]
-; CHECK-NEXT: [[TMP67:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP3]]
-; CHECK-NEXT: [[TMP68:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP4]]
-; CHECK-NEXT: [[TMP69:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP5]]
-; CHECK-NEXT: [[TMP70:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP6]]
-; CHECK-NEXT: [[TMP71:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP7]]
-; CHECK-NEXT: [[TMP72:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP8]]
-; CHECK-NEXT: [[TMP73:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP9]]
-; CHECK-NEXT: [[TMP74:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP10]]
-; CHECK-NEXT: [[TMP75:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP11]]
-; CHECK-NEXT: [[TMP76:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP12]]
-; CHECK-NEXT: [[TMP77:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP13]]
-; CHECK-NEXT: [[TMP78:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP14]]
-; CHECK-NEXT: [[TMP79:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP15]]
-; CHECK-NEXT: [[TMP80:%.*]] = load i32, ptr [[TMP64]], align 4
+; CHECK-NEXT: [[TMP65:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 5
+; CHECK-NEXT: [[TMP66:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 10
+; CHECK-NEXT: [[TMP67:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 15
+; CHECK-NEXT: [[TMP68:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 20
+; CHECK-NEXT: [[TMP69:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 25
+; CHECK-NEXT: [[TMP70:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 30
+; CHECK-NEXT: [[TMP71:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 35
+; CHECK-NEXT: [[TMP72:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 40
+; CHECK-NEXT: [[TMP73:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 45
+; CHECK-NEXT: [[TMP74:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 50
+; CHECK-NEXT: [[TMP75:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 55
+; CHECK-NEXT: [[TMP76:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 60
+; CHECK-NEXT: [[TMP77:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 65
+; CHECK-NEXT: [[TMP78:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 70
+; CHECK-NEXT: [[TMP79:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 75
+; CHECK-NEXT: [[TMP80:%.*]] = load i32, ptr [[ALLOCA]], align 4
; CHECK-NEXT: [[TMP81:%.*]] = load i32, ptr [[TMP65]], align 4
; CHECK-NEXT: [[TMP82:%.*]] = load i32, ptr [[TMP66]], align 4
; CHECK-NEXT: [[TMP83:%.*]] = load i32, ptr [[TMP67]], align 4
@@ -2576,16 +2553,12 @@ define i32 @test_non_unit_stride_five(i64 %len, ptr %test_base) {
; CHECK-NEXT: [[PREDPHI4:%.*]] = select <4 x i1> [[TMP47]], <4 x i32> [[TMP95]], <4 x i32> zeroinitializer
; CHECK-NEXT: [[PREDPHI5:%.*]] = select <4 x i1> [[TMP55]], <4 x i32> [[TMP103]], <4 x i32> zeroinitializer
; CHECK-NEXT: [[PREDPHI6:%.*]] = select <4 x i1> [[TMP63]], <4 x i32> [[TMP111]], <4 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP112]] = add <4 x i32> [[VEC_PHI]], [[PREDPHI]]
-; CHECK-NEXT: [[TMP113]] = add <4 x i32> [[VEC_PHI1]], [[PREDPHI4]]
-; CHECK-NEXT: [[TMP114]] = add <4 x i32> [[VEC_PHI2]], [[PREDPHI5]]
-; CHECK-NEXT: [[TMP115]] = add <4 x i32> [[VEC_PHI3]], [[PREDPHI6]]
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
-; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
+; CHECK-NEXT: [[TMP112:%.*]] = add <4 x i32> zeroinitializer, [[PREDPHI]]
+; CHECK-NEXT: br label [[MIDDLE_BLOCK:%.*]]
; CHECK: middle.block:
-; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP113]], [[TMP112]]
-; CHECK-NEXT: [[BIN_RDX7:%.*]] = add <4 x i32> [[TMP114]], [[BIN_RDX]]
-; CHECK-NEXT: [[BIN_RDX8:%.*]] = add <4 x i32> [[TMP115]], [[BIN_RDX7]]
+; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[PREDPHI4]], [[TMP112]]
+; CHECK-NEXT: [[BIN_RDX7:%.*]] = add <4 x i32> [[PREDPHI5]], [[BIN_RDX]]
+; CHECK-NEXT: [[BIN_RDX8:%.*]] = add <4 x i32> [[PREDPHI6]], [[BIN_RDX7]]
; CHECK-NEXT: [[TMP117:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX8]])
; CHECK-NEXT: br label [[SCALAR_PH:%.*]]
; CHECK: scalar.ph:
diff --git a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll
index 79e01790d2d94..213e2f47805a2 100644
--- a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll
+++ b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll
@@ -776,10 +776,9 @@ define i16 @test_no_op_or_reduction_single_vector_iteration(i64 %N) {
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[CLAMPED]], [[N_MOD_VF]]
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i16> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[VEC_PHI]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]]
+; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]]
; CHECK: [[MIDDLE_BLOCK]]:
-; CHECK-NEXT: [[TMP0:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> [[VEC_PHI]])
+; CHECK-NEXT: [[TMP0:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> zeroinitializer)
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[CLAMPED]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]]
; CHECK: [[VEC_EPILOG_ITER_CHECK]]:
@@ -792,10 +791,9 @@ define i16 @test_no_op_or_reduction_single_vector_iteration(i64 %N) {
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i16> zeroinitializer, i16 [[BC_MERGE_RDX]], i32 0
; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]]
; CHECK: [[VEC_EPILOG_VECTOR_BODY]]:
-; CHECK-NEXT: [[VEC_PHI4:%.*]] = phi <4 x i16> [ [[TMP1]], %[[VEC_EPILOG_PH]] ], [ [[VEC_PHI4]], %[[VEC_EPILOG_VECTOR_BODY]] ]
-; CHECK-NEXT: br i1 true, label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]]
+; CHECK-NEXT: br label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]]
; CHECK: [[VEC_EPILOG_MIDDLE_BLOCK]]:
-; CHECK-NEXT: [[TMP2:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> [[VEC_PHI4]])
+; CHECK-NEXT: [[TMP2:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> [[TMP1]])
; CHECK-NEXT: [[CMP_N5:%.*]] = icmp eq i64 [[CLAMPED]], [[N_VEC3]]
; CHECK-NEXT: br i1 [[CMP_N5]], label %[[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]]
; CHECK: [[VEC_EPILOG_SCALAR_PH]]:
@@ -848,13 +846,9 @@ define i16 @test_or_reduction_with_induction_single_vector_iteration(i64 %N) {
; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[N_VEC]] to i32
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i16> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP1:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 0, i16 1, i16 2, i16 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP1]] = or <4 x i16> [[VEC_PHI]], [[VEC_IND]]
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 4)
-; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]]
+; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]]
; CHECK: [[MIDDLE_BLOCK]]:
-; CHECK-NEXT: [[TMP7:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> [[TMP1]])
+; CHECK-NEXT: [[TMP7:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> <i16 0, i16 1, i16 2, i16 3>)
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[CLAMPED]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]]
; CHECK: [[VEC_EPILOG_ITER_CHECK]]:
@@ -873,11 +867,8 @@ define i16 @test_or_reduction_with_induction_single_vector_iteration(i64 %N) {
; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i16> [[BROADCAST_SPLAT]], <i16 0, i16 1, i16 2, i16 3>
; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]]
; CHECK: [[VEC_EPILOG_VECTOR_BODY]]:
-; CHECK-NEXT: [[VEC_PHI4:%.*]] = phi <4 x i16> [ [[TMP3]], %[[VEC_EPILOG_PH]] ], [ [[TMP5:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_IND5:%.*]] = phi <4 x i16> [ [[INDUCTION]], %[[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT6:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP5]] = or <4 x i16> [[VEC_PHI4]], [[VEC_IND5]]
-; CHECK-NEXT: [[VEC_IND_NEXT6]] = add <4 x i16> [[VEC_IND5]], splat (i16 4)
-; CHECK-NEXT: br i1 true, label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP31:![0-9]+]]
+; CHECK-NEXT: [[TMP5:%.*]] = or <4 x i16> [[TMP3]], [[INDUCTION]]
+; CHECK-NEXT: br label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]]
; CHECK: [[VEC_EPILOG_MIDDLE_BLOCK]]:
; CHECK-NEXT: [[TMP6:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> [[TMP5]])
; CHECK-NEXT: [[CMP_N4:%.*]] = icmp eq i64 [[CLAMPED]], [[N_VEC3]]
@@ -947,11 +938,9 @@ define i32 @anyof_reduction_in_dissolved_epilogue(i32 %val, i1 %c) {
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP11:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP11]] = or <4 x i1> [[VEC_PHI]], [[BROADCAST_SPLAT]]
-; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP33:![0-9]+]]
+; CHECK-NEXT: br label %[[MIDDLE_BLOCK]]
; CHECK: [[MIDDLE_BLOCK]]:
-; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP11]])
+; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[BROADCAST_SPLAT]])
; CHECK-NEXT: [[TMP4:%.*]] = freeze i1 [[TMP3]]
; CHECK-NEXT: [[RDX_SELECT]] = select i1 [[TMP4]], i32 [[OUTER_IV]], i32 0
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]]
@@ -972,9 +961,8 @@ define i32 @anyof_reduction_in_dissolved_epilogue(i32 %val, i1 %c) {
; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT3]], <4 x i1> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]]
; CHECK: [[VEC_EPILOG_VECTOR_BODY]]:
-; CHECK-NEXT: [[VEC_PHI5:%.*]] = phi <4 x i1> [ [[BROADCAST_SPLAT4]], %[[VEC_EPILOG_PH]] ], [ [[TMP8:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP8]] = or <4 x i1> [[VEC_PHI5]], [[BROADCAST_SPLAT6]]
-; CHECK-NEXT: br i1 true, label %[[VEC_EPILOG_MIDDLE_BLOCK]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]]
+; CHECK-NEXT: [[TMP8:%.*]] = or <4 x i1> [[BROADCAST_SPLAT4]], [[BROADCAST_SPLAT6]]
+; CHECK-NEXT: br label %[[VEC_EPILOG_MIDDLE_BLOCK]]
; CHECK: [[VEC_EPILOG_MIDDLE_BLOCK]]:
; CHECK-NEXT: [[TMP9:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP8]])
; CHECK-NEXT: [[TMP10:%.*]] = freeze i1 [[TMP9]]
@@ -1075,14 +1063,13 @@ define i64 @reduction_with_ptr_iv_inttoptr_exit_cond(ptr %base, ptr %src) {
; CHECK-NEXT: [[TMP18:%.*]] = insertelement <4 x i64> zeroinitializer, i64 [[BC_MERGE_RDX]], i32 0
; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]]
; CHECK: [[VEC_EPILOG_VECTOR_BODY]]:
-; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i64> [ [[TMP18]], %[[VEC_EPILOG_PH]] ], [ [[TMP22:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP19:%.*]] = add <4 x i64> [[VEC_PHI3]], splat (i64 1)
+; CHECK-NEXT: [[TMP19:%.*]] = add <4 x i64> [[TMP18]], splat (i64 1)
; CHECK-NEXT: [[TMP20:%.*]] = load i8, ptr [[SRC]], align 1
; CHECK-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <4 x i8> poison, i8 [[TMP20]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT5:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT4]], <4 x i8> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP21:%.*]] = zext <4 x i8> [[BROADCAST_SPLAT5]] to <4 x i64>
-; CHECK-NEXT: [[TMP22]] = add <4 x i64> [[TMP19]], [[TMP21]]
-; CHECK-NEXT: br i1 true, label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP37:![0-9]+]]
+; CHECK-NEXT: [[TMP22:%.*]] = add <4 x i64> [[TMP19]], [[TMP21]]
+; CHECK-NEXT: br label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]]
; CHECK: [[VEC_EPILOG_MIDDLE_BLOCK]]:
; CHECK-NEXT: [[TMP23:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP22]])
; CHECK-NEXT: [[CMP_N6:%.*]] = icmp eq i64 [[TMP4]], [[N_VEC2]]
diff --git a/llvm/test/Transforms/LoopVectorize/find-last.ll b/llvm/test/Transforms/LoopVectorize/find-last.ll
index 44adff384373d..d777f500945a7 100644
--- a/llvm/test/Transforms/LoopVectorize/find-last.ll
+++ b/llvm/test/Transforms/LoopVectorize/find-last.ll
@@ -280,13 +280,10 @@ define ptr @loop_inv_select_condition_issue_185682(i32 %a, i32 %b) {
; CHECK-NEXT: [[TMP2:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP1]])
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
-; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x ptr> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP3:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP4]] = select i1 [[TMP2]], <4 x i1> [[TMP0]], <4 x i1> [[TMP3]]
-; CHECK-NEXT: [[TMP5]] = select i1 [[TMP2]], <4 x ptr> zeroinitializer, <4 x ptr> [[VEC_PHI]]
-; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
+; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], <4 x i1> [[TMP0]], <4 x i1> zeroinitializer
+; CHECK-NEXT: br label [[MIDDLE_BLOCK:%.*]]
; CHECK: middle.block:
-; CHECK-NEXT: [[TMP6:%.*]] = call ptr @llvm.experimental.vector.extract.last.active.v4p0(<4 x ptr> [[TMP5]], <4 x i1> [[TMP4]], ptr null)
+; CHECK-NEXT: [[TMP6:%.*]] = call ptr @llvm.experimental.vector.extract.last.active.v4p0(<4 x ptr> zeroinitializer, <4 x i1> [[TMP3]], ptr null)
; CHECK-NEXT: br label [[EXIT:%.*]]
; CHECK: exit:
; CHECK-NEXT: ret ptr [[TMP6]]
diff --git a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll
index dd3ad4d01b465..49ca79bfa2b04 100644
--- a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll
+++ b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll
@@ -194,182 +194,129 @@ define i16 @select_decreasing_induction_icmp_table_i16(i16 noundef %val) {
; IC4VF4-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT]], <4 x i16> poison, <4 x i32> zeroinitializer
; IC4VF4-NEXT: br label %[[VECTOR_BODY:.*]]
; IC4VF4: [[VECTOR_BODY]]:
-; IC4VF4-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE44:.*]] ]
-; IC4VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 12, i16 11, i16 10, i16 9>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_LOAD_CONTINUE44]] ]
-; IC4VF4-NEXT: [[VEC_PHI:%.*]] = phi <4 x i16> [ splat (i16 32767), %[[VECTOR_PH]] ], [ [[TMP108:%.*]], %[[PRED_LOAD_CONTINUE44]] ]
-; IC4VF4-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i16> [ splat (i16 32767), %[[VECTOR_PH]] ], [ [[TMP109:%.*]], %[[PRED_LOAD_CONTINUE44]] ]
-; IC4VF4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i16> [ splat (i16 32767), %[[VECTOR_PH]] ], [ [[TMP110:%.*]], %[[PRED_LOAD_CONTINUE44]] ]
-; IC4VF4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i16> [ splat (i16 32767), %[[VECTOR_PH]] ], [ [[TMP111:%.*]], %[[PRED_LOAD_CONTINUE44]] ]
-; IC4VF4-NEXT: [[STEP_ADD:%.*]] = add <4 x i16> [[VEC_IND]], splat (i16 -4)
-; IC4VF4-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i16> [[STEP_ADD]], splat (i16 -4)
-; IC4VF4-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i16> [[STEP_ADD_2]], splat (i16 -4)
-; IC4VF4-NEXT: [[DOTCAST:%.*]] = trunc i32 [[INDEX]] to i16
-; IC4VF4-NEXT: [[OFFSET_IDX:%.*]] = sub i16 12, [[DOTCAST]]
-; IC4VF4-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i64 0
-; IC4VF4-NEXT: [[BROADCAST_SPLAT5:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT4]], <4 x i32> poison, <4 x i32> zeroinitializer
-; IC4VF4-NEXT: [[VEC_IV:%.*]] = add <4 x i32> [[BROADCAST_SPLAT5]], <i32 0, i32 1, i32 2, i32 3>
-; IC4VF4-NEXT: [[VEC_IV8:%.*]] = add <4 x i32> [[BROADCAST_SPLAT5]], <i32 4, i32 5, i32 6, i32 7>
-; IC4VF4-NEXT: [[VEC_IV11:%.*]] = add <4 x i32> [[BROADCAST_SPLAT5]], <i32 8, i32 9, i32 10, i32 11>
-; IC4VF4-NEXT: [[VEC_IV14:%.*]] = add <4 x i32> [[BROADCAST_SPLAT5]], <i32 12, i32 13, i32 14, i32 15>
-; IC4VF4-NEXT: [[TMP0:%.*]] = icmp ule <4 x i32> [[VEC_IV]], splat (i32 11)
-; IC4VF4-NEXT: [[TMP1:%.*]] = icmp ule <4 x i32> [[VEC_IV8]], splat (i32 11)
-; IC4VF4-NEXT: [[TMP2:%.*]] = icmp ule <4 x i32> [[VEC_IV11]], splat (i32 11)
-; IC4VF4-NEXT: [[TMP3:%.*]] = icmp ule <4 x i32> [[VEC_IV14]], splat (i32 11)
-; IC4VF4-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0
-; IC4VF4-NEXT: br i1 [[TMP4]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
+; IC4VF4-NEXT: br i1 true, label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
; IC4VF4: [[PRED_LOAD_IF]]:
-; IC4VF4-NEXT: [[TMP5:%.*]] = add i16 [[OFFSET_IDX]], 0
-; IC4VF4-NEXT: [[TMP6:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP5]]
+; IC4VF4-NEXT: [[TMP6:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 12
; IC4VF4-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP6]], align 1
; IC4VF4-NEXT: [[TMP8:%.*]] = insertelement <4 x i16> poison, i16 [[TMP7]], i32 0
; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE]]
; IC4VF4: [[PRED_LOAD_CONTINUE]]:
; IC4VF4-NEXT: [[TMP9:%.*]] = phi <4 x i16> [ poison, %[[VECTOR_BODY]] ], [ [[TMP8]], %[[PRED_LOAD_IF]] ]
-; IC4VF4-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1
-; IC4VF4-NEXT: br i1 [[TMP10]], label %[[PRED_LOAD_IF15:.*]], label %[[PRED_LOAD_CONTINUE16:.*]]
+; IC4VF4-NEXT: br i1 true, label %[[PRED_LOAD_IF15:.*]], label %[[PRED_LOAD_CONTINUE16:.*]]
; IC4VF4: [[PRED_LOAD_IF15]]:
-; IC4VF4-NEXT: [[TMP11:%.*]] = add i16 [[OFFSET_IDX]], -1
-; IC4VF4-NEXT: [[TMP12:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP11]]
+; IC4VF4-NEXT: [[TMP12:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 11
; IC4VF4-NEXT: [[TMP13:%.*]] = load i16, ptr [[TMP12]], align 1
; IC4VF4-NEXT: [[TMP14:%.*]] = insertelement <4 x i16> [[TMP9]], i16 [[TMP13]], i32 1
; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE16]]
; IC4VF4: [[PRED_LOAD_CONTINUE16]]:
; IC4VF4-NEXT: [[TMP15:%.*]] = phi <4 x i16> [ [[TMP9]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], %[[PRED_LOAD_IF15]] ]
-; IC4VF4-NEXT: [[TMP16:%.*]] = extractelement <4 x i1> [[TMP0]], i32 2
-; IC4VF4-NEXT: br i1 [[TMP16]], label %[[PRED_LOAD_IF17:.*]], label %[[PRED_LOAD_CONTINUE18:.*]]
+; IC4VF4-NEXT: br i1 true, label %[[PRED_LOAD_IF17:.*]], label %[[PRED_LOAD_CONTINUE18:.*]]
; IC4VF4: [[PRED_LOAD_IF17]]:
-; IC4VF4-NEXT: [[TMP17:%.*]] = add i16 [[OFFSET_IDX]], -2
-; IC4VF4-NEXT: [[TMP18:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP17]]
+; IC4VF4-NEXT: [[TMP18:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 10
; IC4VF4-NEXT: [[TMP19:%.*]] = load i16, ptr [[TMP18]], align 1
; IC4VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x i16> [[TMP15]], i16 [[TMP19]], i32 2
; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE18]]
; IC4VF4: [[PRED_LOAD_CONTINUE18]]:
; IC4VF4-NEXT: [[TMP21:%.*]] = phi <4 x i16> [ [[TMP15]], %[[PRED_LOAD_CONTINUE16]] ], [ [[TMP20]], %[[PRED_LOAD_IF17]] ]
-; IC4VF4-NEXT: [[TMP22:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3
-; IC4VF4-NEXT: br i1 [[TMP22]], label %[[PRED_LOAD_IF19:.*]], label %[[PRED_LOAD_CONTINUE20:.*]]
+; IC4VF4-NEXT: br i1 true, label %[[PRED_LOAD_IF19:.*]], label %[[PRED_LOAD_CONTINUE20:.*]]
; IC4VF4: [[PRED_LOAD_IF19]]:
-; IC4VF4-NEXT: [[TMP23:%.*]] = add i16 [[OFFSET_IDX]], -3
-; IC4VF4-NEXT: [[TMP24:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP23]]
+; IC4VF4-NEXT: [[TMP24:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 9
; IC4VF4-NEXT: [[TMP25:%.*]] = load i16, ptr [[TMP24]], align 1
; IC4VF4-NEXT: [[TMP26:%.*]] = insertelement <4 x i16> [[TMP21]], i16 [[TMP25]], i32 3
; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE20]]
; IC4VF4: [[PRED_LOAD_CONTINUE20]]:
; IC4VF4-NEXT: [[TMP27:%.*]] = phi <4 x i16> [ [[TMP21]], %[[PRED_LOAD_CONTINUE18]] ], [ [[TMP26]], %[[PRED_LOAD_IF19]] ]
-; IC4VF4-NEXT: [[TMP28:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0
-; IC4VF4-NEXT: br i1 [[TMP28]], label %[[PRED_LOAD_IF21:.*]], label %[[PRED_LOAD_CONTINUE22:.*]]
+; IC4VF4-NEXT: br i1 true, label %[[PRED_LOAD_IF21:.*]], label %[[PRED_LOAD_CONTINUE22:.*]]
; IC4VF4: [[PRED_LOAD_IF21]]:
-; IC4VF4-NEXT: [[TMP29:%.*]] = add i16 [[OFFSET_IDX]], -4
-; IC4VF4-NEXT: [[TMP30:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP29]]
+; IC4VF4-NEXT: [[TMP30:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 8
; IC4VF4-NEXT: [[TMP31:%.*]] = load i16, ptr [[TMP30]], align 1
; IC4VF4-NEXT: [[TMP32:%.*]] = insertelement <4 x i16> poison, i16 [[TMP31]], i32 0
; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE22]]
; IC4VF4: [[PRED_LOAD_CONTINUE22]]:
; IC4VF4-NEXT: [[TMP33:%.*]] = phi <4 x i16> [ poison, %[[PRED_LOAD_CONTINUE20]] ], [ [[TMP32]], %[[PRED_LOAD_IF21]] ]
-; IC4VF4-NEXT: [[TMP34:%.*]] = extractelement <4 x i1> [[TMP1]], i32 1
-; IC4VF4-NEXT: br i1 [[TMP34]], label %[[PRED_LOAD_IF23:.*]], label %[[PRED_LOAD_CONTINUE24:.*]]
+; IC4VF4-NEXT: br i1 true, label %[[PRED_LOAD_IF23:.*]], label %[[PRED_LOAD_CONTINUE24:.*]]
; IC4VF4: [[PRED_LOAD_IF23]]:
-; IC4VF4-NEXT: [[TMP35:%.*]] = add i16 [[OFFSET_IDX]], -5
-; IC4VF4-NEXT: [[TMP36:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP35]]
+; IC4VF4-NEXT: [[TMP36:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 7
; IC4VF4-NEXT: [[TMP37:%.*]] = load i16, ptr [[TMP36]], align 1
; IC4VF4-NEXT: [[TMP38:%.*]] = insertelement <4 x i16> [[TMP33]], i16 [[TMP37]], i32 1
; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE24]]
; IC4VF4: [[PRED_LOAD_CONTINUE24]]:
; IC4VF4-NEXT: [[TMP39:%.*]] = phi <4 x i16> [ [[TMP33]], %[[PRED_LOAD_CONTINUE22]] ], [ [[TMP38]], %[[PRED_LOAD_IF23]] ]
-; IC4VF4-NEXT: [[TMP40:%.*]] = extractelement <4 x i1> [[TMP1]], i32 2
-; IC4VF4-NEXT: br i1 [[TMP40]], label %[[PRED_LOAD_IF25:.*]], label %[[PRED_LOAD_CONTINUE26:.*]]
+; IC4VF4-NEXT: br i1 true, label %[[PRED_LOAD_IF25:.*]], label %[[PRED_LOAD_CONTINUE26:.*]]
; IC4VF4: [[PRED_LOAD_IF25]]:
-; IC4VF4-NEXT: [[TMP41:%.*]] = add i16 [[OFFSET_IDX]], -6
-; IC4VF4-NEXT: [[TMP42:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP41]]
+; IC4VF4-NEXT: [[TMP42:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 6
; IC4VF4-NEXT: [[TMP43:%.*]] = load i16, ptr [[TMP42]], align 1
; IC4VF4-NEXT: [[TMP44:%.*]] = insertelement <4 x i16> [[TMP39]], i16 [[TMP43]], i32 2
; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE26]]
; IC4VF4: [[PRED_LOAD_CONTINUE26]]:
; IC4VF4-NEXT: [[TMP45:%.*]] = phi <4 x i16> [ [[TMP39]], %[[PRED_LOAD_CONTINUE24]] ], [ [[TMP44]], %[[PRED_LOAD_IF25]] ]
-; IC4VF4-NEXT: [[TMP46:%.*]] = extractelement <4 x i1> [[TMP1]], i32 3
-; IC4VF4-NEXT: br i1 [[TMP46]], label %[[PRED_LOAD_IF27:.*]], label %[[PRED_LOAD_CONTINUE28:.*]]
+; IC4VF4-NEXT: br i1 true, label %[[PRED_LOAD_IF27:.*]], label %[[PRED_LOAD_CONTINUE28:.*]]
; IC4VF4: [[PRED_LOAD_IF27]]:
-; IC4VF4-NEXT: [[TMP47:%.*]] = add i16 [[OFFSET_IDX]], -7
-; IC4VF4-NEXT: [[TMP48:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP47]]
+; IC4VF4-NEXT: [[TMP48:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 5
; IC4VF4-NEXT: [[TMP49:%.*]] = load i16, ptr [[TMP48]], align 1
; IC4VF4-NEXT: [[TMP50:%.*]] = insertelement <4 x i16> [[TMP45]], i16 [[TMP49]], i32 3
; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE28]]
; IC4VF4: [[PRED_LOAD_CONTINUE28]]:
; IC4VF4-NEXT: [[TMP51:%.*]] = phi <4 x i16> [ [[TMP45]], %[[PRED_LOAD_CONTINUE26]] ], [ [[TMP50]], %[[PRED_LOAD_IF27]] ]
-; IC4VF4-NEXT: [[TMP52:%.*]] = extractelement <4 x i1> [[TMP2]], i32 0
-; IC4VF4-NEXT: br i1 [[TMP52]], label %[[PRED_LOAD_IF29:.*]], label %[[PRED_LOAD_CONTINUE30:.*]]
+; IC4VF4-NEXT: br i1 true, label %[[PRED_LOAD_IF29:.*]], label %[[PRED_LOAD_CONTINUE30:.*]]
; IC4VF4: [[PRED_LOAD_IF29]]:
-; IC4VF4-NEXT: [[TMP53:%.*]] = add i16 [[OFFSET_IDX]], -8
-; IC4VF4-NEXT: [[TMP54:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP53]]
+; IC4VF4-NEXT: [[TMP54:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 4
; IC4VF4-NEXT: [[TMP55:%.*]] = load i16, ptr [[TMP54]], align 1
; IC4VF4-NEXT: [[TMP56:%.*]] = insertelement <4 x i16> poison, i16 [[TMP55]], i32 0
; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE30]]
; IC4VF4: [[PRED_LOAD_CONTINUE30]]:
; IC4VF4-NEXT: [[TMP57:%.*]] = phi <4 x i16> [ poison, %[[PRED_LOAD_CONTINUE28]] ], [ [[TMP56]], %[[PRED_LOAD_IF29]] ]
-; IC4VF4-NEXT: [[TMP58:%.*]] = extractelement <4 x i1> [[TMP2]], i32 1
-; IC4VF4-NEXT: br i1 [[TMP58]], label %[[PRED_LOAD_IF31:.*]], label %[[PRED_LOAD_CONTINUE32:.*]]
+; IC4VF4-NEXT: br i1 true, label %[[PRED_LOAD_IF31:.*]], label %[[PRED_LOAD_CONTINUE32:.*]]
; IC4VF4: [[PRED_LOAD_IF31]]:
-; IC4VF4-NEXT: [[TMP59:%.*]] = add i16 [[OFFSET_IDX]], -9
-; IC4VF4-NEXT: [[TMP60:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP59]]
+; IC4VF4-NEXT: [[TMP60:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 3
; IC4VF4-NEXT: [[TMP61:%.*]] = load i16, ptr [[TMP60]], align 1
; IC4VF4-NEXT: [[TMP62:%.*]] = insertelement <4 x i16> [[TMP57]], i16 [[TMP61]], i32 1
; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE32]]
; IC4VF4: [[PRED_LOAD_CONTINUE32]]:
; IC4VF4-NEXT: [[TMP63:%.*]] = phi <4 x i16> [ [[TMP57]], %[[PRED_LOAD_CONTINUE30]] ], [ [[TMP62]], %[[PRED_LOAD_IF31]] ]
-; IC4VF4-NEXT: [[TMP64:%.*]] = extractelement <4 x i1> [[TMP2]], i32 2
-; IC4VF4-NEXT: br i1 [[TMP64]], label %[[PRED_LOAD_IF33:.*]], label %[[PRED_LOAD_CONTINUE34:.*]]
+; IC4VF4-NEXT: br i1 true, label %[[PRED_LOAD_IF33:.*]], label %[[PRED_LOAD_CONTINUE34:.*]]
; IC4VF4: [[PRED_LOAD_IF33]]:
-; IC4VF4-NEXT: [[TMP65:%.*]] = add i16 [[OFFSET_IDX]], -10
-; IC4VF4-NEXT: [[TMP66:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP65]]
+; IC4VF4-NEXT: [[TMP66:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 2
; IC4VF4-NEXT: [[TMP67:%.*]] = load i16, ptr [[TMP66]], align 1
; IC4VF4-NEXT: [[TMP68:%.*]] = insertelement <4 x i16> [[TMP63]], i16 [[TMP67]], i32 2
; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE34]]
; IC4VF4: [[PRED_LOAD_CONTINUE34]]:
; IC4VF4-NEXT: [[TMP69:%.*]] = phi <4 x i16> [ [[TMP63]], %[[PRED_LOAD_CONTINUE32]] ], [ [[TMP68]], %[[PRED_LOAD_IF33]] ]
-; IC4VF4-NEXT: [[TMP70:%.*]] = extractelement <4 x i1> [[TMP2]], i32 3
-; IC4VF4-NEXT: br i1 [[TMP70]], label %[[PRED_LOAD_IF35:.*]], label %[[PRED_LOAD_CONTINUE36:.*]]
+; IC4VF4-NEXT: br i1 true, label %[[PRED_LOAD_IF35:.*]], label %[[PRED_LOAD_CONTINUE36:.*]]
; IC4VF4: [[PRED_LOAD_IF35]]:
-; IC4VF4-NEXT: [[TMP71:%.*]] = add i16 [[OFFSET_IDX]], -11
-; IC4VF4-NEXT: [[TMP72:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP71]]
+; IC4VF4-NEXT: [[TMP72:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 1
; IC4VF4-NEXT: [[TMP73:%.*]] = load i16, ptr [[TMP72]], align 1
; IC4VF4-NEXT: [[TMP74:%.*]] = insertelement <4 x i16> [[TMP69]], i16 [[TMP73]], i32 3
; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE36]]
; IC4VF4: [[PRED_LOAD_CONTINUE36]]:
; IC4VF4-NEXT: [[TMP75:%.*]] = phi <4 x i16> [ [[TMP69]], %[[PRED_LOAD_CONTINUE34]] ], [ [[TMP74]], %[[PRED_LOAD_IF35]] ]
-; IC4VF4-NEXT: [[TMP76:%.*]] = extractelement <4 x i1> [[TMP3]], i32 0
-; IC4VF4-NEXT: br i1 [[TMP76]], label %[[PRED_LOAD_IF37:.*]], label %[[PRED_LOAD_CONTINUE38:.*]]
+; IC4VF4-NEXT: br i1 false, label %[[PRED_LOAD_IF37:.*]], label %[[PRED_LOAD_CONTINUE38:.*]]
; IC4VF4: [[PRED_LOAD_IF37]]:
-; IC4VF4-NEXT: [[TMP77:%.*]] = add i16 [[OFFSET_IDX]], -12
-; IC4VF4-NEXT: [[TMP78:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP77]]
+; IC4VF4-NEXT: [[TMP78:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 0
; IC4VF4-NEXT: [[TMP79:%.*]] = load i16, ptr [[TMP78]], align 1
; IC4VF4-NEXT: [[TMP80:%.*]] = insertelement <4 x i16> poison, i16 [[TMP79]], i32 0
; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE38]]
; IC4VF4: [[PRED_LOAD_CONTINUE38]]:
; IC4VF4-NEXT: [[TMP81:%.*]] = phi <4 x i16> [ poison, %[[PRED_LOAD_CONTINUE36]] ], [ [[TMP80]], %[[PRED_LOAD_IF37]] ]
-; IC4VF4-NEXT: [[TMP82:%.*]] = extractelement <4 x i1> [[TMP3]], i32 1
-; IC4VF4-NEXT: br i1 [[TMP82]], label %[[PRED_LOAD_IF39:.*]], label %[[PRED_LOAD_CONTINUE40:.*]]
+; IC4VF4-NEXT: br i1 false, label %[[PRED_LOAD_IF39:.*]], label %[[PRED_LOAD_CONTINUE40:.*]]
; IC4VF4: [[PRED_LOAD_IF39]]:
-; IC4VF4-NEXT: [[TMP83:%.*]] = add i16 [[OFFSET_IDX]], -13
-; IC4VF4-NEXT: [[TMP84:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP83]]
+; IC4VF4-NEXT: [[TMP84:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 -1
; IC4VF4-NEXT: [[TMP85:%.*]] = load i16, ptr [[TMP84]], align 1
; IC4VF4-NEXT: [[TMP86:%.*]] = insertelement <4 x i16> [[TMP81]], i16 [[TMP85]], i32 1
; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE40]]
; IC4VF4: [[PRED_LOAD_CONTINUE40]]:
; IC4VF4-NEXT: [[TMP87:%.*]] = phi <4 x i16> [ [[TMP81]], %[[PRED_LOAD_CONTINUE38]] ], [ [[TMP86]], %[[PRED_LOAD_IF39]] ]
-; IC4VF4-NEXT: [[TMP88:%.*]] = extractelement <4 x i1> [[TMP3]], i32 2
-; IC4VF4-NEXT: br i1 [[TMP88]], label %[[PRED_LOAD_IF41:.*]], label %[[PRED_LOAD_CONTINUE42:.*]]
+; IC4VF4-NEXT: br i1 false, label %[[PRED_LOAD_IF41:.*]], label %[[PRED_LOAD_CONTINUE42:.*]]
; IC4VF4: [[PRED_LOAD_IF41]]:
-; IC4VF4-NEXT: [[TMP89:%.*]] = add i16 [[OFFSET_IDX]], -14
-; IC4VF4-NEXT: [[TMP90:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP89]]
+; IC4VF4-NEXT: [[TMP90:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 -2
; IC4VF4-NEXT: [[TMP91:%.*]] = load i16, ptr [[TMP90]], align 1
; IC4VF4-NEXT: [[TMP92:%.*]] = insertelement <4 x i16> [[TMP87]], i16 [[TMP91]], i32 2
; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE42]]
; IC4VF4: [[PRED_LOAD_CONTINUE42]]:
; IC4VF4-NEXT: [[TMP93:%.*]] = phi <4 x i16> [ [[TMP87]], %[[PRED_LOAD_CONTINUE40]] ], [ [[TMP92]], %[[PRED_LOAD_IF41]] ]
-; IC4VF4-NEXT: [[TMP94:%.*]] = extractelement <4 x i1> [[TMP3]], i32 3
-; IC4VF4-NEXT: br i1 [[TMP94]], label %[[PRED_LOAD_IF43:.*]], label %[[PRED_LOAD_CONTINUE44]]
+; IC4VF4-NEXT: br i1 false, label %[[PRED_LOAD_IF43:.*]], label %[[PRED_LOAD_CONTINUE44:.*]]
; IC4VF4: [[PRED_LOAD_IF43]]:
-; IC4VF4-NEXT: [[TMP95:%.*]] = add i16 [[OFFSET_IDX]], -15
-; IC4VF4-NEXT: [[TMP96:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP95]]
+; IC4VF4-NEXT: [[TMP96:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 -3
; IC4VF4-NEXT: [[TMP97:%.*]] = load i16, ptr [[TMP96]], align 1
; IC4VF4-NEXT: [[TMP98:%.*]] = insertelement <4 x i16> [[TMP93]], i16 [[TMP97]], i32 3
; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE44]]
@@ -379,22 +326,16 @@ define i16 @select_decreasing_induction_icmp_table_i16(i16 noundef %val) {
; IC4VF4-NEXT: [[TMP101:%.*]] = icmp ugt <4 x i16> [[TMP51]], [[BROADCAST_SPLAT]]
; IC4VF4-NEXT: [[TMP102:%.*]] = icmp ugt <4 x i16> [[TMP75]], [[BROADCAST_SPLAT]]
; IC4VF4-NEXT: [[TMP103:%.*]] = icmp ugt <4 x i16> [[TMP99]], [[BROADCAST_SPLAT]]
-; IC4VF4-NEXT: [[TMP104:%.*]] = add nsw <4 x i16> [[VEC_IND]], splat (i16 -1)
-; IC4VF4-NEXT: [[TMP105:%.*]] = add nsw <4 x i16> [[STEP_ADD]], splat (i16 -1)
-; IC4VF4-NEXT: [[TMP106:%.*]] = add nsw <4 x i16> [[STEP_ADD_2]], splat (i16 -1)
-; IC4VF4-NEXT: [[TMP107:%.*]] = add nsw <4 x i16> [[STEP_ADD_3]], splat (i16 -1)
-; IC4VF4-NEXT: [[TMP108]] = select <4 x i1> [[TMP100]], <4 x i16> [[TMP104]], <4 x i16> [[VEC_PHI]]
-; IC4VF4-NEXT: [[TMP109]] = select <4 x i1> [[TMP101]], <4 x i16> [[TMP105]], <4 x i16> [[VEC_PHI1]]
-; IC4VF4-NEXT: [[TMP110]] = select <4 x i1> [[TMP102]], <4 x i16> [[TMP106]], <4 x i16> [[VEC_PHI2]]
-; IC4VF4-NEXT: [[TMP111]] = select <4 x i1> [[TMP103]], <4 x i16> [[TMP107]], <4 x i16> [[VEC_PHI3]]
-; IC4VF4-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16
-; IC4VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i16> [[STEP_ADD_3]], splat (i16 -4)
-; IC4VF4-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
+; IC4VF4-NEXT: [[TMP76:%.*]] = select <4 x i1> [[TMP100]], <4 x i16> <i16 11, i16 10, i16 9, i16 8>, <4 x i16> splat (i16 32767)
+; IC4VF4-NEXT: [[TMP77:%.*]] = select <4 x i1> [[TMP101]], <4 x i16> <i16 7, i16 6, i16 5, i16 4>, <4 x i16> splat (i16 32767)
+; IC4VF4-NEXT: [[TMP70:%.*]] = select <4 x i1> [[TMP102]], <4 x i16> <i16 3, i16 2, i16 1, i16 0>, <4 x i16> splat (i16 32767)
+; IC4VF4-NEXT: [[TMP71:%.*]] = select <4 x i1> [[TMP103]], <4 x i16> <i16 -1, i16 -2, i16 -3, i16 -4>, <4 x i16> splat (i16 32767)
+; IC4VF4-NEXT: br label %[[MIDDLE_BLOCK:.*]]
; IC4VF4: [[MIDDLE_BLOCK]]:
-; IC4VF4-NEXT: [[TMP112:%.*]] = select <4 x i1> [[TMP0]], <4 x i16> [[TMP108]], <4 x i16> [[VEC_PHI]]
-; IC4VF4-NEXT: [[TMP113:%.*]] = select <4 x i1> [[TMP1]], <4 x i16> [[TMP109]], <4 x i16> [[VEC_PHI1]]
-; IC4VF4-NEXT: [[TMP114:%.*]] = select <4 x i1> [[TMP2]], <4 x i16> [[TMP110]], <4 x i16> [[VEC_PHI2]]
-; IC4VF4-NEXT: [[TMP115:%.*]] = select <4 x i1> [[TMP3]], <4 x i16> [[TMP111]], <4 x i16> [[VEC_PHI3]]
+; IC4VF4-NEXT: [[TMP112:%.*]] = select <4 x i1> splat (i1 true), <4 x i16> [[TMP76]], <4 x i16> splat (i16 32767)
+; IC4VF4-NEXT: [[TMP113:%.*]] = select <4 x i1> splat (i1 true), <4 x i16> [[TMP77]], <4 x i16> splat (i16 32767)
+; IC4VF4-NEXT: [[TMP114:%.*]] = select <4 x i1> splat (i1 true), <4 x i16> [[TMP70]], <4 x i16> splat (i16 32767)
+; IC4VF4-NEXT: [[TMP115:%.*]] = select <4 x i1> zeroinitializer, <4 x i16> [[TMP71]], <4 x i16> splat (i16 32767)
; IC4VF4-NEXT: [[RDX_MINMAX:%.*]] = call <4 x i16> @llvm.smin.v4i16(<4 x i16> [[TMP112]], <4 x i16> [[TMP113]])
; IC4VF4-NEXT: [[RDX_MINMAX45:%.*]] = call <4 x i16> @llvm.smin.v4i16(<4 x i16> [[RDX_MINMAX]], <4 x i16> [[TMP114]])
; IC4VF4-NEXT: [[RDX_MINMAX46:%.*]] = call <4 x i16> @llvm.smin.v4i16(<4 x i16> [[RDX_MINMAX45]], <4 x i16> [[TMP115]])
@@ -519,182 +460,129 @@ define i16 @select_decreasing_induction_icmp_table_half(half noundef %val) {
; IC4VF4-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x half> [[BROADCAST_SPLATINSERT]], <4 x half> poison, <4 x i32> zeroinitializer
; IC4VF4-NEXT: br label %[[VECTOR_BODY:.*]]
; IC4VF4: [[VECTOR_BODY]]:
-; IC4VF4-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE44:.*]] ]
-; IC4VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 12, i16 11, i16 10, i16 9>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_LOAD_CONTINUE44]] ]
-; IC4VF4-NEXT: [[VEC_PHI:%.*]] = phi <4 x i16> [ splat (i16 32767), %[[VECTOR_PH]] ], [ [[TMP108:%.*]], %[[PRED_LOAD_CONTINUE44]] ]
-; IC4VF4-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i16> [ splat (i16 32767), %[[VECTOR_PH]] ], [ [[TMP109:%.*]], %[[PRED_LOAD_CONTINUE44]] ]
-; IC4VF4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i16> [ splat (i16 32767), %[[VECTOR_PH]] ], [ [[TMP110:%.*]], %[[PRED_LOAD_CONTINUE44]] ]
-; IC4VF4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i16> [ splat (i16 32767), %[[VECTOR_PH]] ], [ [[TMP111:%.*]], %[[PRED_LOAD_CONTINUE44]] ]
-; IC4VF4-NEXT: [[STEP_ADD:%.*]] = add <4 x i16> [[VEC_IND]], splat (i16 -4)
-; IC4VF4-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i16> [[STEP_ADD]], splat (i16 -4)
-; IC4VF4-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i16> [[STEP_ADD_2]], splat (i16 -4)
-; IC4VF4-NEXT: [[DOTCAST:%.*]] = trunc i32 [[INDEX]] to i16
-; IC4VF4-NEXT: [[OFFSET_IDX:%.*]] = sub i16 12, [[DOTCAST]]
-; IC4VF4-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i64 0
-; IC4VF4-NEXT: [[BROADCAST_SPLAT5:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT4]], <4 x i32> poison, <4 x i32> zeroinitializer
-; IC4VF4-NEXT: [[VEC_IV:%.*]] = add <4 x i32> [[BROADCAST_SPLAT5]], <i32 0, i32 1, i32 2, i32 3>
-; IC4VF4-NEXT: [[VEC_IV8:%.*]] = add <4 x i32> [[BROADCAST_SPLAT5]], <i32 4, i32 5, i32 6, i32 7>
-; IC4VF4-NEXT: [[VEC_IV11:%.*]] = add <4 x i32> [[BROADCAST_SPLAT5]], <i32 8, i32 9, i32 10, i32 11>
-; IC4VF4-NEXT: [[VEC_IV14:%.*]] = add <4 x i32> [[BROADCAST_SPLAT5]], <i32 12, i32 13, i32 14, i32 15>
-; IC4VF4-NEXT: [[TMP0:%.*]] = icmp ule <4 x i32> [[VEC_IV]], splat (i32 11)
-; IC4VF4-NEXT: [[TMP1:%.*]] = icmp ule <4 x i32> [[VEC_IV8]], splat (i32 11)
-; IC4VF4-NEXT: [[TMP2:%.*]] = icmp ule <4 x i32> [[VEC_IV11]], splat (i32 11)
-; IC4VF4-NEXT: [[TMP3:%.*]] = icmp ule <4 x i32> [[VEC_IV14]], splat (i32 11)
-; IC4VF4-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0
-; IC4VF4-NEXT: br i1 [[TMP4]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
+; IC4VF4-NEXT: br i1 true, label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
; IC4VF4: [[PRED_LOAD_IF]]:
-; IC4VF4-NEXT: [[TMP5:%.*]] = add i16 [[OFFSET_IDX]], 0
-; IC4VF4-NEXT: [[TMP6:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP5]]
+; IC4VF4-NEXT: [[TMP6:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 12
; IC4VF4-NEXT: [[TMP7:%.*]] = load half, ptr [[TMP6]], align 1
; IC4VF4-NEXT: [[TMP8:%.*]] = insertelement <4 x half> poison, half [[TMP7]], i32 0
; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE]]
; IC4VF4: [[PRED_LOAD_CONTINUE]]:
; IC4VF4-NEXT: [[TMP9:%.*]] = phi <4 x half> [ poison, %[[VECTOR_BODY]] ], [ [[TMP8]], %[[PRED_LOAD_IF]] ]
-; IC4VF4-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1
-; IC4VF4-NEXT: br i1 [[TMP10]], label %[[PRED_LOAD_IF15:.*]], label %[[PRED_LOAD_CONTINUE16:.*]]
+; IC4VF4-NEXT: br i1 true, label %[[PRED_LOAD_IF15:.*]], label %[[PRED_LOAD_CONTINUE16:.*]]
; IC4VF4: [[PRED_LOAD_IF15]]:
-; IC4VF4-NEXT: [[TMP11:%.*]] = add i16 [[OFFSET_IDX]], -1
-; IC4VF4-NEXT: [[TMP12:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP11]]
+; IC4VF4-NEXT: [[TMP12:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 11
; IC4VF4-NEXT: [[TMP13:%.*]] = load half, ptr [[TMP12]], align 1
; IC4VF4-NEXT: [[TMP14:%.*]] = insertelement <4 x half> [[TMP9]], half [[TMP13]], i32 1
; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE16]]
; IC4VF4: [[PRED_LOAD_CONTINUE16]]:
; IC4VF4-NEXT: [[TMP15:%.*]] = phi <4 x half> [ [[TMP9]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], %[[PRED_LOAD_IF15]] ]
-; IC4VF4-NEXT: [[TMP16:%.*]] = extractelement <4 x i1> [[TMP0]], i32 2
-; IC4VF4-NEXT: br i1 [[TMP16]], label %[[PRED_LOAD_IF17:.*]], label %[[PRED_LOAD_CONTINUE18:.*]]
+; IC4VF4-NEXT: br i1 true, label %[[PRED_LOAD_IF17:.*]], label %[[PRED_LOAD_CONTINUE18:.*]]
; IC4VF4: [[PRED_LOAD_IF17]]:
-; IC4VF4-NEXT: [[TMP17:%.*]] = add i16 [[OFFSET_IDX]], -2
-; IC4VF4-NEXT: [[TMP18:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP17]]
+; IC4VF4-NEXT: [[TMP18:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 10
; IC4VF4-NEXT: [[TMP19:%.*]] = load half, ptr [[TMP18]], align 1
; IC4VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x half> [[TMP15]], half [[TMP19]], i32 2
; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE18]]
; IC4VF4: [[PRED_LOAD_CONTINUE18]]:
; IC4VF4-NEXT: [[TMP21:%.*]] = phi <4 x half> [ [[TMP15]], %[[PRED_LOAD_CONTINUE16]] ], [ [[TMP20]], %[[PRED_LOAD_IF17]] ]
-; IC4VF4-NEXT: [[TMP22:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3
-; IC4VF4-NEXT: br i1 [[TMP22]], label %[[PRED_LOAD_IF19:.*]], label %[[PRED_LOAD_CONTINUE20:.*]]
+; IC4VF4-NEXT: br i1 true, label %[[PRED_LOAD_IF19:.*]], label %[[PRED_LOAD_CONTINUE20:.*]]
; IC4VF4: [[PRED_LOAD_IF19]]:
-; IC4VF4-NEXT: [[TMP23:%.*]] = add i16 [[OFFSET_IDX]], -3
-; IC4VF4-NEXT: [[TMP24:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP23]]
+; IC4VF4-NEXT: [[TMP24:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 9
; IC4VF4-NEXT: [[TMP25:%.*]] = load half, ptr [[TMP24]], align 1
; IC4VF4-NEXT: [[TMP26:%.*]] = insertelement <4 x half> [[TMP21]], half [[TMP25]], i32 3
; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE20]]
; IC4VF4: [[PRED_LOAD_CONTINUE20]]:
; IC4VF4-NEXT: [[TMP27:%.*]] = phi <4 x half> [ [[TMP21]], %[[PRED_LOAD_CONTINUE18]] ], [ [[TMP26]], %[[PRED_LOAD_IF19]] ]
-; IC4VF4-NEXT: [[TMP28:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0
-; IC4VF4-NEXT: br i1 [[TMP28]], label %[[PRED_LOAD_IF21:.*]], label %[[PRED_LOAD_CONTINUE22:.*]]
+; IC4VF4-NEXT: br i1 true, label %[[PRED_LOAD_IF21:.*]], label %[[PRED_LOAD_CONTINUE22:.*]]
; IC4VF4: [[PRED_LOAD_IF21]]:
-; IC4VF4-NEXT: [[TMP29:%.*]] = add i16 [[OFFSET_IDX]], -4
-; IC4VF4-NEXT: [[TMP30:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP29]]
+; IC4VF4-NEXT: [[TMP30:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 8
; IC4VF4-NEXT: [[TMP31:%.*]] = load half, ptr [[TMP30]], align 1
; IC4VF4-NEXT: [[TMP32:%.*]] = insertelement <4 x half> poison, half [[TMP31]], i32 0
; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE22]]
; IC4VF4: [[PRED_LOAD_CONTINUE22]]:
; IC4VF4-NEXT: [[TMP33:%.*]] = phi <4 x half> [ poison, %[[PRED_LOAD_CONTINUE20]] ], [ [[TMP32]], %[[PRED_LOAD_IF21]] ]
-; IC4VF4-NEXT: [[TMP34:%.*]] = extractelement <4 x i1> [[TMP1]], i32 1
-; IC4VF4-NEXT: br i1 [[TMP34]], label %[[PRED_LOAD_IF23:.*]], label %[[PRED_LOAD_CONTINUE24:.*]]
+; IC4VF4-NEXT: br i1 true, label %[[PRED_LOAD_IF23:.*]], label %[[PRED_LOAD_CONTINUE24:.*]]
; IC4VF4: [[PRED_LOAD_IF23]]:
-; IC4VF4-NEXT: [[TMP35:%.*]] = add i16 [[OFFSET_IDX]], -5
-; IC4VF4-NEXT: [[TMP36:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP35]]
+; IC4VF4-NEXT: [[TMP36:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 7
; IC4VF4-NEXT: [[TMP37:%.*]] = load half, ptr [[TMP36]], align 1
; IC4VF4-NEXT: [[TMP38:%.*]] = insertelement <4 x half> [[TMP33]], half [[TMP37]], i32 1
; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE24]]
; IC4VF4: [[PRED_LOAD_CONTINUE24]]:
; IC4VF4-NEXT: [[TMP39:%.*]] = phi <4 x half> [ [[TMP33]], %[[PRED_LOAD_CONTINUE22]] ], [ [[TMP38]], %[[PRED_LOAD_IF23]] ]
-; IC4VF4-NEXT: [[TMP40:%.*]] = extractelement <4 x i1> [[TMP1]], i32 2
-; IC4VF4-NEXT: br i1 [[TMP40]], label %[[PRED_LOAD_IF25:.*]], label %[[PRED_LOAD_CONTINUE26:.*]]
+; IC4VF4-NEXT: br i1 true, label %[[PRED_LOAD_IF25:.*]], label %[[PRED_LOAD_CONTINUE26:.*]]
; IC4VF4: [[PRED_LOAD_IF25]]:
-; IC4VF4-NEXT: [[TMP41:%.*]] = add i16 [[OFFSET_IDX]], -6
-; IC4VF4-NEXT: [[TMP42:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP41]]
+; IC4VF4-NEXT: [[TMP42:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 6
; IC4VF4-NEXT: [[TMP43:%.*]] = load half, ptr [[TMP42]], align 1
; IC4VF4-NEXT: [[TMP44:%.*]] = insertelement <4 x half> [[TMP39]], half [[TMP43]], i32 2
; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE26]]
; IC4VF4: [[PRED_LOAD_CONTINUE26]]:
; IC4VF4-NEXT: [[TMP45:%.*]] = phi <4 x half> [ [[TMP39]], %[[PRED_LOAD_CONTINUE24]] ], [ [[TMP44]], %[[PRED_LOAD_IF25]] ]
-; IC4VF4-NEXT: [[TMP46:%.*]] = extractelement <4 x i1> [[TMP1]], i32 3
-; IC4VF4-NEXT: br i1 [[TMP46]], label %[[PRED_LOAD_IF27:.*]], label %[[PRED_LOAD_CONTINUE28:.*]]
+; IC4VF4-NEXT: br i1 true, label %[[PRED_LOAD_IF27:.*]], label %[[PRED_LOAD_CONTINUE28:.*]]
; IC4VF4: [[PRED_LOAD_IF27]]:
-; IC4VF4-NEXT: [[TMP47:%.*]] = add i16 [[OFFSET_IDX]], -7
-; IC4VF4-NEXT: [[TMP48:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP47]]
+; IC4VF4-NEXT: [[TMP48:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 5
; IC4VF4-NEXT: [[TMP49:%.*]] = load half, ptr [[TMP48]], align 1
; IC4VF4-NEXT: [[TMP50:%.*]] = insertelement <4 x half> [[TMP45]], half [[TMP49]], i32 3
; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE28]]
; IC4VF4: [[PRED_LOAD_CONTINUE28]]:
; IC4VF4-NEXT: [[TMP51:%.*]] = phi <4 x half> [ [[TMP45]], %[[PRED_LOAD_CONTINUE26]] ], [ [[TMP50]], %[[PRED_LOAD_IF27]] ]
-; IC4VF4-NEXT: [[TMP52:%.*]] = extractelement <4 x i1> [[TMP2]], i32 0
-; IC4VF4-NEXT: br i1 [[TMP52]], label %[[PRED_LOAD_IF29:.*]], label %[[PRED_LOAD_CONTINUE30:.*]]
+; IC4VF4-NEXT: br i1 true, label %[[PRED_LOAD_IF29:.*]], label %[[PRED_LOAD_CONTINUE30:.*]]
; IC4VF4: [[PRED_LOAD_IF29]]:
-; IC4VF4-NEXT: [[TMP53:%.*]] = add i16 [[OFFSET_IDX]], -8
-; IC4VF4-NEXT: [[TMP54:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP53]]
+; IC4VF4-NEXT: [[TMP54:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 4
; IC4VF4-NEXT: [[TMP55:%.*]] = load half, ptr [[TMP54]], align 1
; IC4VF4-NEXT: [[TMP56:%.*]] = insertelement <4 x half> poison, half [[TMP55]], i32 0
; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE30]]
; IC4VF4: [[PRED_LOAD_CONTINUE30]]:
; IC4VF4-NEXT: [[TMP57:%.*]] = phi <4 x half> [ poison, %[[PRED_LOAD_CONTINUE28]] ], [ [[TMP56]], %[[PRED_LOAD_IF29]] ]
-; IC4VF4-NEXT: [[TMP58:%.*]] = extractelement <4 x i1> [[TMP2]], i32 1
-; IC4VF4-NEXT: br i1 [[TMP58]], label %[[PRED_LOAD_IF31:.*]], label %[[PRED_LOAD_CONTINUE32:.*]]
+; IC4VF4-NEXT: br i1 true, label %[[PRED_LOAD_IF31:.*]], label %[[PRED_LOAD_CONTINUE32:.*]]
; IC4VF4: [[PRED_LOAD_IF31]]:
-; IC4VF4-NEXT: [[TMP59:%.*]] = add i16 [[OFFSET_IDX]], -9
-; IC4VF4-NEXT: [[TMP60:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP59]]
+; IC4VF4-NEXT: [[TMP60:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 3
; IC4VF4-NEXT: [[TMP61:%.*]] = load half, ptr [[TMP60]], align 1
; IC4VF4-NEXT: [[TMP62:%.*]] = insertelement <4 x half> [[TMP57]], half [[TMP61]], i32 1
; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE32]]
; IC4VF4: [[PRED_LOAD_CONTINUE32]]:
; IC4VF4-NEXT: [[TMP63:%.*]] = phi <4 x half> [ [[TMP57]], %[[PRED_LOAD_CONTINUE30]] ], [ [[TMP62]], %[[PRED_LOAD_IF31]] ]
-; IC4VF4-NEXT: [[TMP64:%.*]] = extractelement <4 x i1> [[TMP2]], i32 2
-; IC4VF4-NEXT: br i1 [[TMP64]], label %[[PRED_LOAD_IF33:.*]], label %[[PRED_LOAD_CONTINUE34:.*]]
+; IC4VF4-NEXT: br i1 true, label %[[PRED_LOAD_IF33:.*]], label %[[PRED_LOAD_CONTINUE34:.*]]
; IC4VF4: [[PRED_LOAD_IF33]]:
-; IC4VF4-NEXT: [[TMP65:%.*]] = add i16 [[OFFSET_IDX]], -10
-; IC4VF4-NEXT: [[TMP66:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP65]]
+; IC4VF4-NEXT: [[TMP66:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 2
; IC4VF4-NEXT: [[TMP67:%.*]] = load half, ptr [[TMP66]], align 1
; IC4VF4-NEXT: [[TMP68:%.*]] = insertelement <4 x half> [[TMP63]], half [[TMP67]], i32 2
; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE34]]
; IC4VF4: [[PRED_LOAD_CONTINUE34]]:
; IC4VF4-NEXT: [[TMP69:%.*]] = phi <4 x half> [ [[TMP63]], %[[PRED_LOAD_CONTINUE32]] ], [ [[TMP68]], %[[PRED_LOAD_IF33]] ]
-; IC4VF4-NEXT: [[TMP70:%.*]] = extractelement <4 x i1> [[TMP2]], i32 3
-; IC4VF4-NEXT: br i1 [[TMP70]], label %[[PRED_LOAD_IF35:.*]], label %[[PRED_LOAD_CONTINUE36:.*]]
+; IC4VF4-NEXT: br i1 true, label %[[PRED_LOAD_IF35:.*]], label %[[PRED_LOAD_CONTINUE36:.*]]
; IC4VF4: [[PRED_LOAD_IF35]]:
-; IC4VF4-NEXT: [[TMP71:%.*]] = add i16 [[OFFSET_IDX]], -11
-; IC4VF4-NEXT: [[TMP72:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP71]]
+; IC4VF4-NEXT: [[TMP72:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 1
; IC4VF4-NEXT: [[TMP73:%.*]] = load half, ptr [[TMP72]], align 1
; IC4VF4-NEXT: [[TMP74:%.*]] = insertelement <4 x half> [[TMP69]], half [[TMP73]], i32 3
; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE36]]
; IC4VF4: [[PRED_LOAD_CONTINUE36]]:
; IC4VF4-NEXT: [[TMP75:%.*]] = phi <4 x half> [ [[TMP69]], %[[PRED_LOAD_CONTINUE34]] ], [ [[TMP74]], %[[PRED_LOAD_IF35]] ]
-; IC4VF4-NEXT: [[TMP76:%.*]] = extractelement <4 x i1> [[TMP3]], i32 0
-; IC4VF4-NEXT: br i1 [[TMP76]], label %[[PRED_LOAD_IF37:.*]], label %[[PRED_LOAD_CONTINUE38:.*]]
+; IC4VF4-NEXT: br i1 false, label %[[PRED_LOAD_IF37:.*]], label %[[PRED_LOAD_CONTINUE38:.*]]
; IC4VF4: [[PRED_LOAD_IF37]]:
-; IC4VF4-NEXT: [[TMP77:%.*]] = add i16 [[OFFSET_IDX]], -12
-; IC4VF4-NEXT: [[TMP78:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP77]]
+; IC4VF4-NEXT: [[TMP78:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 0
; IC4VF4-NEXT: [[TMP79:%.*]] = load half, ptr [[TMP78]], align 1
; IC4VF4-NEXT: [[TMP80:%.*]] = insertelement <4 x half> poison, half [[TMP79]], i32 0
; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE38]]
; IC4VF4: [[PRED_LOAD_CONTINUE38]]:
; IC4VF4-NEXT: [[TMP81:%.*]] = phi <4 x half> [ poison, %[[PRED_LOAD_CONTINUE36]] ], [ [[TMP80]], %[[PRED_LOAD_IF37]] ]
-; IC4VF4-NEXT: [[TMP82:%.*]] = extractelement <4 x i1> [[TMP3]], i32 1
-; IC4VF4-NEXT: br i1 [[TMP82]], label %[[PRED_LOAD_IF39:.*]], label %[[PRED_LOAD_CONTINUE40:.*]]
+; IC4VF4-NEXT: br i1 false, label %[[PRED_LOAD_IF39:.*]], label %[[PRED_LOAD_CONTINUE40:.*]]
; IC4VF4: [[PRED_LOAD_IF39]]:
-; IC4VF4-NEXT: [[TMP83:%.*]] = add i16 [[OFFSET_IDX]], -13
-; IC4VF4-NEXT: [[TMP84:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP83]]
+; IC4VF4-NEXT: [[TMP84:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 -1
; IC4VF4-NEXT: [[TMP85:%.*]] = load half, ptr [[TMP84]], align 1
; IC4VF4-NEXT: [[TMP86:%.*]] = insertelement <4 x half> [[TMP81]], half [[TMP85]], i32 1
; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE40]]
; IC4VF4: [[PRED_LOAD_CONTINUE40]]:
; IC4VF4-NEXT: [[TMP87:%.*]] = phi <4 x half> [ [[TMP81]], %[[PRED_LOAD_CONTINUE38]] ], [ [[TMP86]], %[[PRED_LOAD_IF39]] ]
-; IC4VF4-NEXT: [[TMP88:%.*]] = extractelement <4 x i1> [[TMP3]], i32 2
-; IC4VF4-NEXT: br i1 [[TMP88]], label %[[PRED_LOAD_IF41:.*]], label %[[PRED_LOAD_CONTINUE42:.*]]
+; IC4VF4-NEXT: br i1 false, label %[[PRED_LOAD_IF41:.*]], label %[[PRED_LOAD_CONTINUE42:.*]]
; IC4VF4: [[PRED_LOAD_IF41]]:
-; IC4VF4-NEXT: [[TMP89:%.*]] = add i16 [[OFFSET_IDX]], -14
-; IC4VF4-NEXT: [[TMP90:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP89]]
+; IC4VF4-NEXT: [[TMP90:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 -2
; IC4VF4-NEXT: [[TMP91:%.*]] = load half, ptr [[TMP90]], align 1
; IC4VF4-NEXT: [[TMP92:%.*]] = insertelement <4 x half> [[TMP87]], half [[TMP91]], i32 2
; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE42]]
; IC4VF4: [[PRED_LOAD_CONTINUE42]]:
; IC4VF4-NEXT: [[TMP93:%.*]] = phi <4 x half> [ [[TMP87]], %[[PRED_LOAD_CONTINUE40]] ], [ [[TMP92]], %[[PRED_LOAD_IF41]] ]
-; IC4VF4-NEXT: [[TMP94:%.*]] = extractelement <4 x i1> [[TMP3]], i32 3
-; IC4VF4-NEXT: br i1 [[TMP94]], label %[[PRED_LOAD_IF43:.*]], label %[[PRED_LOAD_CONTINUE44]]
+; IC4VF4-NEXT: br i1 false, label %[[PRED_LOAD_IF43:.*]], label %[[PRED_LOAD_CONTINUE44:.*]]
; IC4VF4: [[PRED_LOAD_IF43]]:
-; IC4VF4-NEXT: [[TMP95:%.*]] = add i16 [[OFFSET_IDX]], -15
-; IC4VF4-NEXT: [[TMP96:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP95]]
+; IC4VF4-NEXT: [[TMP96:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 -3
; IC4VF4-NEXT: [[TMP97:%.*]] = load half, ptr [[TMP96]], align 1
; IC4VF4-NEXT: [[TMP98:%.*]] = insertelement <4 x half> [[TMP93]], half [[TMP97]], i32 3
; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE44]]
@@ -704,22 +592,16 @@ define i16 @select_decreasing_induction_icmp_table_half(half noundef %val) {
; IC4VF4-NEXT: [[TMP101:%.*]] = fcmp ugt <4 x half> [[TMP51]], [[BROADCAST_SPLAT]]
; IC4VF4-NEXT: [[TMP102:%.*]] = fcmp ugt <4 x half> [[TMP75]], [[BROADCAST_SPLAT]]
; IC4VF4-NEXT: [[TMP103:%.*]] = fcmp ugt <4 x half> [[TMP99]], [[BROADCAST_SPLAT]]
-; IC4VF4-NEXT: [[TMP104:%.*]] = add nsw <4 x i16> [[VEC_IND]], splat (i16 -1)
-; IC4VF4-NEXT: [[TMP105:%.*]] = add nsw <4 x i16> [[STEP_ADD]], splat (i16 -1)
-; IC4VF4-NEXT: [[TMP106:%.*]] = add nsw <4 x i16> [[STEP_ADD_2]], splat (i16 -1)
-; IC4VF4-NEXT: [[TMP107:%.*]] = add nsw <4 x i16> [[STEP_ADD_3]], splat (i16 -1)
-; IC4VF4-NEXT: [[TMP108]] = select <4 x i1> [[TMP100]], <4 x i16> [[TMP104]], <4 x i16> [[VEC_PHI]]
-; IC4VF4-NEXT: [[TMP109]] = select <4 x i1> [[TMP101]], <4 x i16> [[TMP105]], <4 x i16> [[VEC_PHI1]]
-; IC4VF4-NEXT: [[TMP110]] = select <4 x i1> [[TMP102]], <4 x i16> [[TMP106]], <4 x i16> [[VEC_PHI2]]
-; IC4VF4-NEXT: [[TMP111]] = select <4 x i1> [[TMP103]], <4 x i16> [[TMP107]], <4 x i16> [[VEC_PHI3]]
-; IC4VF4-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16
-; IC4VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i16> [[STEP_ADD_3]], splat (i16 -4)
-; IC4VF4-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; IC4VF4-NEXT: [[TMP76:%.*]] = select <4 x i1> [[TMP100]], <4 x i16> <i16 11, i16 10, i16 9, i16 8>, <4 x i16> splat (i16 32767)
+; IC4VF4-NEXT: [[TMP77:%.*]] = select <4 x i1> [[TMP101]], <4 x i16> <i16 7, i16 6, i16 5, i16 4>, <4 x i16> splat (i16 32767)
+; IC4VF4-NEXT: [[TMP70:%.*]] = select <4 x i1> [[TMP102]], <4 x i16> <i16 3, i16 2, i16 1, i16 0>, <4 x i16> splat (i16 32767)
+; IC4VF4-NEXT: [[TMP71:%.*]] = select <4 x i1> [[TMP103]], <4 x i16> <i16 -1, i16 -2, i16 -3, i16 -4>, <4 x i16> splat (i16 32767)
+; IC4VF4-NEXT: br label %[[MIDDLE_BLOCK:.*]]
; IC4VF4: [[MIDDLE_BLOCK]]:
-; IC4VF4-NEXT: [[TMP112:%.*]] = select <4 x i1> [[TMP0]], <4 x i16> [[TMP108]], <4 x i16> [[VEC_PHI]]
-; IC4VF4-NEXT: [[TMP113:%.*]] = select <4 x i1> [[TMP1]], <4 x i16> [[TMP109]], <4 x i16> [[VEC_PHI1]]
-; IC4VF4-NEXT: [[TMP114:%.*]] = select <4 x i1> [[TMP2]], <4 x i16> [[TMP110]], <4 x i16> [[VEC_PHI2]]
-; IC4VF4-NEXT: [[TMP115:%.*]] = select <4 x i1> [[TMP3]], <4 x i16> [[TMP111]], <4 x i16> [[VEC_PHI3]]
+; IC4VF4-NEXT: [[TMP112:%.*]] = select <4 x i1> splat (i1 true), <4 x i16> [[TMP76]], <4 x i16> splat (i16 32767)
+; IC4VF4-NEXT: [[TMP113:%.*]] = select <4 x i1> splat (i1 true), <4 x i16> [[TMP77]], <4 x i16> splat (i16 32767)
+; IC4VF4-NEXT: [[TMP114:%.*]] = select <4 x i1> splat (i1 true), <4 x i16> [[TMP70]], <4 x i16> splat (i16 32767)
+; IC4VF4-NEXT: [[TMP115:%.*]] = select <4 x i1> zeroinitializer, <4 x i16> [[TMP71]], <4 x i16> splat (i16 32767)
; IC4VF4-NEXT: [[RDX_MINMAX:%.*]] = call <4 x i16> @llvm.smin.v4i16(<4 x i16> [[TMP112]], <4 x i16> [[TMP113]])
; IC4VF4-NEXT: [[RDX_MINMAX45:%.*]] = call <4 x i16> @llvm.smin.v4i16(<4 x i16> [[RDX_MINMAX]], <4 x i16> [[TMP114]])
; IC4VF4-NEXT: [[RDX_MINMAX46:%.*]] = call <4 x i16> @llvm.smin.v4i16(<4 x i16> [[RDX_MINMAX45]], <4 x i16> [[TMP115]])
diff --git a/llvm/test/Transforms/LoopVectorize/load-deref-pred-poison-ub-ops-feeding-pointer.ll b/llvm/test/Transforms/LoopVectorize/load-deref-pred-poison-ub-ops-feeding-pointer.ll
index 5e88072517b37..e5ffbe22d2285 100644
--- a/llvm/test/Transforms/LoopVectorize/load-deref-pred-poison-ub-ops-feeding-pointer.ll
+++ b/llvm/test/Transforms/LoopVectorize/load-deref-pred-poison-ub-ops-feeding-pointer.ll
@@ -17,58 +17,43 @@ define void @ptr_depends_on_sdiv(ptr noalias %dst, i16 noundef %off) {
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE4:.*]] ]
-; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i16> [ <i16 9, i16 10>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE4]] ]
-; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i32 [[INDEX]] to i16
-; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i16 9, [[DOTCAST]]
-; CHECK-NEXT: [[TMP0:%.*]] = icmp eq <2 x i16> [[VEC_IND]], splat (i16 10)
-; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0
-; CHECK-NEXT: br i1 [[TMP6]], label %[[PRED_SDIV_IF:.*]], label %[[PRED_SDIV_CONTINUE:.*]]
+; CHECK-NEXT: br i1 false, label %[[PRED_SDIV_IF:.*]], label %[[PRED_SDIV_CONTINUE:.*]]
; CHECK: [[PRED_SDIV_IF]]:
; CHECK-NEXT: [[TMP2:%.*]] = sdiv i16 24316, [[OFF]]
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i16> poison, i16 [[TMP2]], i32 0
; CHECK-NEXT: br label %[[PRED_SDIV_CONTINUE]]
; CHECK: [[PRED_SDIV_CONTINUE]]:
; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x i16> [ poison, %[[VECTOR_BODY]] ], [ [[TMP3]], %[[PRED_SDIV_IF]] ]
-; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP0]], i32 1
-; CHECK-NEXT: br i1 [[TMP14]], label %[[PRED_SDIV_IF1:.*]], label %[[PRED_SDIV_CONTINUE2:.*]]
+; CHECK-NEXT: br i1 true, label %[[PRED_SDIV_IF1:.*]], label %[[PRED_SDIV_CONTINUE2:.*]]
; CHECK: [[PRED_SDIV_IF1]]:
; CHECK-NEXT: [[TMP18:%.*]] = sdiv i16 24316, [[OFF]]
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x i16> [[TMP4]], i16 [[TMP18]], i32 1
; CHECK-NEXT: br label %[[PRED_SDIV_CONTINUE2]]
; CHECK: [[PRED_SDIV_CONTINUE2]]:
; CHECK-NEXT: [[TMP8:%.*]] = phi <2 x i16> [ [[TMP4]], %[[PRED_SDIV_CONTINUE]] ], [ [[TMP7]], %[[PRED_SDIV_IF1]] ]
-; CHECK-NEXT: [[TMP21:%.*]] = add <2 x i16> [[VEC_IND]], splat (i16 16383)
; CHECK-NEXT: [[TMP22:%.*]] = shl <2 x i16> [[TMP8]], splat (i16 14)
-; CHECK-NEXT: [[TMP23:%.*]] = sub <2 x i16> [[TMP21]], [[TMP22]]
-; CHECK-NEXT: [[TMP19:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0
-; CHECK-NEXT: br i1 [[TMP19]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
+; CHECK-NEXT: [[TMP23:%.*]] = sub <2 x i16> <i16 16392, i16 16393>, [[TMP22]]
+; CHECK-NEXT: br i1 false, label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; CHECK: [[PRED_STORE_IF]]:
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i16> [[TMP23]], i32 0
; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i64, ptr @src, i16 [[TMP13]]
; CHECK-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP24]], align 1
-; CHECK-NEXT: [[TMP10:%.*]] = add i16 [[OFFSET_IDX]], 0
-; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 [[TMP10]]
+; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 9
; CHECK-NEXT: store i64 [[TMP9]], ptr [[TMP11]], align 1
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
; CHECK: [[PRED_STORE_CONTINUE]]:
-; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i1> [[TMP0]], i32 1
-; CHECK-NEXT: br i1 [[TMP12]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4]]
+; CHECK-NEXT: br i1 true, label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]]
; CHECK: [[PRED_STORE_IF3]]:
; CHECK-NEXT: [[TMP25:%.*]] = extractelement <2 x i16> [[TMP23]], i32 1
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i64, ptr @src, i16 [[TMP25]]
; CHECK-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP20]], align 1
-; CHECK-NEXT: [[TMP16:%.*]] = add i16 [[OFFSET_IDX]], 1
-; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 [[TMP16]]
+; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 10
; CHECK-NEXT: store i64 [[TMP15]], ptr [[TMP17]], align 1
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE4]]
; CHECK: [[PRED_STORE_CONTINUE4]]:
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i16> [[VEC_IND]], splat (i16 2)
-; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]]
; CHECK: [[MIDDLE_BLOCK]]:
-; CHECK-NEXT: br [[EXIT:label %.*]]
-; CHECK: [[SCALAR_PH:.*:]]
+; CHECK-NEXT: br label %[[EXIT:.*]]
;
entry:
br label %loop.header
@@ -111,40 +96,28 @@ define void @ptr_depends_on_possibly_poison_value(ptr noalias %dst, i16 %off) {
; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i16> [[TMP0]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE2:.*]] ]
-; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i16> [ <i16 9, i16 10>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE2]] ]
-; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i32 [[INDEX]] to i16
-; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i16 9, [[DOTCAST]]
-; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <2 x i16> [[VEC_IND]], splat (i16 10)
-; CHECK-NEXT: [[TMP3:%.*]] = add <2 x i16> [[VEC_IND]], [[TMP1]]
-; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0
-; CHECK-NEXT: br i1 [[TMP6]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
+; CHECK-NEXT: [[TMP3:%.*]] = add <2 x i16> <i16 9, i16 10>, [[TMP1]]
+; CHECK-NEXT: br i1 false, label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; CHECK: [[PRED_STORE_IF]]:
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i16> [[TMP3]], i32 0
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr @src, i16 [[TMP5]]
; CHECK-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP14]], align 1
-; CHECK-NEXT: [[TMP7:%.*]] = add i16 [[OFFSET_IDX]], 0
-; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 [[TMP7]]
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 9
; CHECK-NEXT: store i64 [[TMP9]], ptr [[TMP8]], align 1
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
; CHECK: [[PRED_STORE_CONTINUE]]:
-; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP2]], i32 1
-; CHECK-NEXT: br i1 [[TMP10]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2]]
+; CHECK-NEXT: br i1 true, label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]]
; CHECK: [[PRED_STORE_IF1]]:
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i16> [[TMP3]], i32 1
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr @src, i16 [[TMP15]]
; CHECK-NEXT: [[TMP13:%.*]] = load i64, ptr [[TMP16]], align 1
-; CHECK-NEXT: [[TMP11:%.*]] = add i16 [[OFFSET_IDX]], 1
-; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 [[TMP11]]
+; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 10
; CHECK-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 1
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE2]]
; CHECK: [[PRED_STORE_CONTINUE2]]:
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i16> [[VEC_IND]], splat (i16 2)
-; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]]
; CHECK: [[MIDDLE_BLOCK]]:
-; CHECK-NEXT: br [[EXIT:label %.*]]
-; CHECK: [[SCALAR_PH:.*:]]
+; CHECK-NEXT: br label %[[EXIT:.*]]
;
entry:
br label %loop.header
@@ -183,38 +156,26 @@ define void @ptr_doesnt_depend_on_poison_or_ub(ptr noalias %dst, i16 noundef %of
; CHECK-NEXT: [[TMP1:%.*]] = add i16 [[TMP0]], [[OFF]]
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE2:.*]] ]
-; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i16> [ <i16 9, i16 10>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE2]] ]
-; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i32 [[INDEX]] to i16
-; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i16 9, [[DOTCAST]]
-; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <2 x i16> [[VEC_IND]], splat (i16 10)
-; CHECK-NEXT: [[TMP3:%.*]] = add i16 [[OFFSET_IDX]], [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = add i16 9, [[TMP1]]
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i64, ptr @src, i16 [[TMP3]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP4]], align 1
-; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0
-; CHECK-NEXT: br i1 [[TMP6]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
+; CHECK-NEXT: br i1 false, label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; CHECK: [[PRED_STORE_IF]]:
-; CHECK-NEXT: [[TMP7:%.*]] = add i16 [[OFFSET_IDX]], 0
-; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 [[TMP7]]
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 9
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i64> [[WIDE_LOAD]], i32 0
; CHECK-NEXT: store i64 [[TMP9]], ptr [[TMP8]], align 1
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
; CHECK: [[PRED_STORE_CONTINUE]]:
-; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP2]], i32 1
-; CHECK-NEXT: br i1 [[TMP10]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2]]
+; CHECK-NEXT: br i1 true, label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]]
; CHECK: [[PRED_STORE_IF1]]:
-; CHECK-NEXT: [[TMP11:%.*]] = add i16 [[OFFSET_IDX]], 1
-; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 [[TMP11]]
+; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 10
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[WIDE_LOAD]], i32 1
; CHECK-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 1
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE2]]
; CHECK: [[PRED_STORE_CONTINUE2]]:
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i16> [[VEC_IND]], splat (i16 2)
-; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
+; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]]
; CHECK: [[MIDDLE_BLOCK]]:
-; CHECK-NEXT: br [[EXIT:label %.*]]
-; CHECK: [[SCALAR_PH:.*:]]
+; CHECK-NEXT: br label %[[EXIT:.*]]
;
entry:
br label %loop.header
@@ -252,45 +213,33 @@ define void @ptr_depends_on_possibly_poison_value_from_load(ptr noalias %dst) {
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE2:.*]] ]
-; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i16> [ <i16 9, i16 10>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE2]] ]
-; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i32 [[INDEX]] to i16
-; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i16 9, [[DOTCAST]]
; CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr @src, align 1
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i16> poison, i16 [[TMP0]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i16> [[BROADCAST_SPLATINSERT]], <2 x i16> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i16> [[VEC_IND]], splat (i16 10)
; CHECK-NEXT: [[TMP2:%.*]] = sub <2 x i16> splat (i16 1), [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP3:%.*]] = add <2 x i16> [[TMP2]], [[BROADCAST_SPLAT]]
-; CHECK-NEXT: [[TMP4:%.*]] = add <2 x i16> [[VEC_IND]], [[TMP3]]
-; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0
-; CHECK-NEXT: br i1 [[TMP7]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
+; CHECK-NEXT: [[TMP4:%.*]] = add <2 x i16> <i16 9, i16 10>, [[TMP3]]
+; CHECK-NEXT: br i1 false, label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; CHECK: [[PRED_STORE_IF]]:
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i16> [[TMP4]], i32 0
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr @src, i16 [[TMP6]]
; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP15]], align 1
-; CHECK-NEXT: [[TMP8:%.*]] = add i16 [[OFFSET_IDX]], 0
-; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 [[TMP8]]
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 9
; CHECK-NEXT: store i64 [[TMP10]], ptr [[TMP9]], align 1
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
; CHECK: [[PRED_STORE_CONTINUE]]:
-; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP1]], i32 1
-; CHECK-NEXT: br i1 [[TMP11]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2]]
+; CHECK-NEXT: br i1 true, label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]]
; CHECK: [[PRED_STORE_IF1]]:
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x i16> [[TMP4]], i32 1
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr @src, i16 [[TMP16]]
; CHECK-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP17]], align 1
-; CHECK-NEXT: [[TMP12:%.*]] = add i16 [[OFFSET_IDX]], 1
-; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 [[TMP12]]
+; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 10
; CHECK-NEXT: store i64 [[TMP14]], ptr [[TMP13]], align 1
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE2]]
; CHECK: [[PRED_STORE_CONTINUE2]]:
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i16> [[VEC_IND]], splat (i16 2)
-; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
+; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]]
; CHECK: [[MIDDLE_BLOCK]]:
-; CHECK-NEXT: br [[EXIT:label %.*]]
-; CHECK: [[SCALAR_PH:.*:]]
+; CHECK-NEXT: br label %[[EXIT:.*]]
;
entry:
br label %loop.header
@@ -329,41 +278,29 @@ define void @ptr_depends_on_noundef_load(ptr noalias %dst) {
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE2:.*]] ]
-; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i16> [ <i16 9, i16 10>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE2]] ]
-; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i32 [[INDEX]] to i16
-; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i16 9, [[DOTCAST]]
; CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr @src, align 1, !noundef [[META10:![0-9]+]]
-; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i16> [[VEC_IND]], splat (i16 10)
; CHECK-NEXT: [[TMP2:%.*]] = sub i16 1, [[TMP0]]
; CHECK-NEXT: [[TMP3:%.*]] = add i16 [[TMP2]], [[TMP0]]
-; CHECK-NEXT: [[TMP4:%.*]] = add i16 [[OFFSET_IDX]], [[TMP3]]
+; CHECK-NEXT: [[TMP4:%.*]] = add i16 9, [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i64, ptr @src, i16 [[TMP4]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP5]], align 1
-; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0
-; CHECK-NEXT: br i1 [[TMP7]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
+; CHECK-NEXT: br i1 false, label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; CHECK: [[PRED_STORE_IF]]:
-; CHECK-NEXT: [[TMP8:%.*]] = add i16 [[OFFSET_IDX]], 0
-; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 [[TMP8]]
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 9
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i64> [[WIDE_LOAD]], i32 0
; CHECK-NEXT: store i64 [[TMP10]], ptr [[TMP9]], align 1
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
; CHECK: [[PRED_STORE_CONTINUE]]:
-; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP1]], i32 1
-; CHECK-NEXT: br i1 [[TMP11]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2]]
+; CHECK-NEXT: br i1 true, label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]]
; CHECK: [[PRED_STORE_IF1]]:
-; CHECK-NEXT: [[TMP12:%.*]] = add i16 [[OFFSET_IDX]], 1
-; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 [[TMP12]]
+; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 10
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[WIDE_LOAD]], i32 1
; CHECK-NEXT: store i64 [[TMP14]], ptr [[TMP13]], align 1
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE2]]
; CHECK: [[PRED_STORE_CONTINUE2]]:
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i16> [[VEC_IND]], splat (i16 2)
-; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
+; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]]
; CHECK: [[MIDDLE_BLOCK]]:
-; CHECK-NEXT: br [[EXIT:label %.*]]
-; CHECK: [[SCALAR_PH:.*:]]
+; CHECK-NEXT: br label %[[EXIT:.*]]
;
entry:
br label %loop.header
diff --git a/llvm/test/Transforms/LoopVectorize/predicated-multiple-exits.ll b/llvm/test/Transforms/LoopVectorize/predicated-multiple-exits.ll
index 9a11b0d42667b..51bd6339375b9 100644
--- a/llvm/test/Transforms/LoopVectorize/predicated-multiple-exits.ll
+++ b/llvm/test/Transforms/LoopVectorize/predicated-multiple-exits.ll
@@ -729,20 +729,22 @@ define i32 @diamond_exit_poison_from_speculated_branch() {
; CHECK-NEXT: br i1 [[TMP1]], label %[[VECTOR_EARLY_EXIT_CHECK:.*]], label %[[MIDDLE_BLOCK:.*]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br label %[[LOOP_END:.*]]
+; CHECK: [[LOOP_END]]:
+; CHECK-NEXT: br label %[[LOOP_END1:.*]]
; CHECK: [[VECTOR_EARLY_EXIT_CHECK]]:
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> splat (i1 true), i1 false)
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> zeroinitializer, i64 [[FIRST_ACTIVE_LANE]]
; CHECK-NEXT: br i1 [[TMP2]], label %[[VECTOR_EARLY_EXIT_0:.*]], label %[[VECTOR_EARLY_EXIT_1:.*]]
; CHECK: [[VECTOR_EARLY_EXIT_1]]:
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> <i32 10, i32 11, i32 1, i32 2>, i64 [[FIRST_ACTIVE_LANE]]
-; CHECK-NEXT: br label %[[LOOP_END]]
+; CHECK-NEXT: br label %[[LOOP_END1]]
; CHECK: [[VECTOR_EARLY_EXIT_0]]:
; CHECK-NEXT: br label %[[UNREACHABLE_EXIT:.*]]
; CHECK: [[UNREACHABLE_EXIT]]:
; CHECK-NEXT: call void @llvm.trap()
; CHECK-NEXT: unreachable
-; CHECK: [[LOOP_END]]:
-; CHECK-NEXT: [[RETVAL:%.*]] = phi i32 [ [[TMP3]], %[[VECTOR_EARLY_EXIT_1]] ], [ -1, %[[MIDDLE_BLOCK]] ]
+; CHECK: [[LOOP_END1]]:
+; CHECK-NEXT: [[RETVAL:%.*]] = phi i32 [ [[TMP3]], %[[VECTOR_EARLY_EXIT_1]] ], [ -1, %[[LOOP_END]] ]
; CHECK-NEXT: ret i32 [[RETVAL]]
;
entry:
@@ -797,20 +799,22 @@ define i32 @diamond_exit_poison_cond_second() {
; CHECK-NEXT: br i1 [[TMP1]], label %[[VECTOR_EARLY_EXIT_CHECK:.*]], label %[[MIDDLE_BLOCK:.*]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br label %[[LOOP_END:.*]]
+; CHECK: [[LOOP_END]]:
+; CHECK-NEXT: br label %[[LOOP_END1:.*]]
; CHECK: [[VECTOR_EARLY_EXIT_CHECK]]:
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> <i1 true, i1 false, i1 false, i1 false>, i1 false)
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> zeroinitializer, i64 [[FIRST_ACTIVE_LANE]]
; CHECK-NEXT: br i1 [[TMP2]], label %[[VECTOR_EARLY_EXIT_0:.*]], label %[[VECTOR_EARLY_EXIT_1:.*]]
; CHECK: [[VECTOR_EARLY_EXIT_1]]:
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> <i32 10, i32 11, i32 12, i32 13>, i64 [[FIRST_ACTIVE_LANE]]
-; CHECK-NEXT: br label %[[LOOP_END]]
+; CHECK-NEXT: br label %[[LOOP_END1]]
; CHECK: [[VECTOR_EARLY_EXIT_0]]:
; CHECK-NEXT: br label %[[UNREACHABLE_EXIT:.*]]
; CHECK: [[UNREACHABLE_EXIT]]:
; CHECK-NEXT: call void @llvm.trap()
; CHECK-NEXT: unreachable
-; CHECK: [[LOOP_END]]:
-; CHECK-NEXT: [[RETVAL:%.*]] = phi i32 [ [[TMP3]], %[[VECTOR_EARLY_EXIT_1]] ], [ -1, %[[MIDDLE_BLOCK]] ]
+; CHECK: [[LOOP_END1]]:
+; CHECK-NEXT: [[RETVAL:%.*]] = phi i32 [ [[TMP3]], %[[VECTOR_EARLY_EXIT_1]] ], [ -1, %[[LOOP_END]] ]
; CHECK-NEXT: ret i32 [[RETVAL]]
;
entry:
diff --git a/llvm/test/Transforms/LoopVectorize/reduction-minmax-users-and-predicated.ll b/llvm/test/Transforms/LoopVectorize/reduction-minmax-users-and-predicated.ll
index c9cc8060ff498..033d32a9b8bc7 100644
--- a/llvm/test/Transforms/LoopVectorize/reduction-minmax-users-and-predicated.ll
+++ b/llvm/test/Transforms/LoopVectorize/reduction-minmax-users-and-predicated.ll
@@ -48,57 +48,43 @@ define i32 @chained_smax(i32 %x, ptr %src) {
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE6:.*]] ]
-; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i8> [ <i8 0, i8 1, i8 2, i8 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_LOAD_CONTINUE6]] ]
-; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP26:%.*]], %[[PRED_LOAD_CONTINUE6]] ]
-; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <4 x i8> [[VEC_IND]], splat (i8 1)
-; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[BROADCAST_SPLAT]], <4 x i32> [[VEC_PHI]])
-; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0
-; CHECK-NEXT: br i1 [[TMP2]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
+; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[BROADCAST_SPLAT]], <4 x i32> zeroinitializer)
+; CHECK-NEXT: br i1 true, label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
; CHECK: [[PRED_LOAD_IF]]:
-; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = getelementptr [3 x i32], ptr [[SRC]], i64 [[TMP3]]
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr [3 x i32], ptr [[SRC]], i64 0
; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> poison, i32 [[TMP5]], i32 0
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]]
; CHECK: [[PRED_LOAD_CONTINUE]]:
; CHECK-NEXT: [[TMP7:%.*]] = phi <4 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP6]], %[[PRED_LOAD_IF]] ]
-; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1
-; CHECK-NEXT: br i1 [[TMP8]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2:.*]]
+; CHECK-NEXT: br i1 true, label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2:.*]]
; CHECK: [[PRED_LOAD_IF1]]:
-; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 1
-; CHECK-NEXT: [[TMP10:%.*]] = getelementptr [3 x i32], ptr [[SRC]], i64 [[TMP9]]
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr [3 x i32], ptr [[SRC]], i64 1
; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4
; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[TMP11]], i32 1
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]]
; CHECK: [[PRED_LOAD_CONTINUE2]]:
; CHECK-NEXT: [[TMP13:%.*]] = phi <4 x i32> [ [[TMP7]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP12]], %[[PRED_LOAD_IF1]] ]
-; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP0]], i32 2
-; CHECK-NEXT: br i1 [[TMP14]], label %[[PRED_LOAD_IF3:.*]], label %[[PRED_LOAD_CONTINUE4:.*]]
+; CHECK-NEXT: br i1 false, label %[[PRED_LOAD_IF3:.*]], label %[[PRED_LOAD_CONTINUE4:.*]]
; CHECK: [[PRED_LOAD_IF3]]:
-; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 2
-; CHECK-NEXT: [[TMP16:%.*]] = getelementptr [3 x i32], ptr [[SRC]], i64 [[TMP15]]
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr [3 x i32], ptr [[SRC]], i64 2
; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4
; CHECK-NEXT: [[TMP18:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[TMP17]], i32 2
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE4]]
; CHECK: [[PRED_LOAD_CONTINUE4]]:
; CHECK-NEXT: [[TMP19:%.*]] = phi <4 x i32> [ [[TMP13]], %[[PRED_LOAD_CONTINUE2]] ], [ [[TMP18]], %[[PRED_LOAD_IF3]] ]
-; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3
-; CHECK-NEXT: br i1 [[TMP20]], label %[[PRED_LOAD_IF5:.*]], label %[[PRED_LOAD_CONTINUE6]]
+; CHECK-NEXT: br i1 false, label %[[PRED_LOAD_IF5:.*]], label %[[PRED_LOAD_CONTINUE6:.*]]
; CHECK: [[PRED_LOAD_IF5]]:
-; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[INDEX]], 3
-; CHECK-NEXT: [[TMP22:%.*]] = getelementptr [3 x i32], ptr [[SRC]], i64 [[TMP21]]
+; CHECK-NEXT: [[TMP22:%.*]] = getelementptr [3 x i32], ptr [[SRC]], i64 3
; CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP23]], i32 3
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE6]]
; CHECK: [[PRED_LOAD_CONTINUE6]]:
-; CHECK-NEXT: [[TMP25:%.*]] = phi <4 x i32> [ [[TMP19]], %[[PRED_LOAD_CONTINUE4]] ], [ [[TMP24]], %[[PRED_LOAD_IF5]] ]
-; CHECK-NEXT: [[TMP26]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[TMP25]], <4 x i32> [[TMP1]])
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i8> [[VEC_IND]], splat (i8 4)
-; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-NEXT: [[TMP20:%.*]] = phi <4 x i32> [ [[TMP19]], %[[PRED_LOAD_CONTINUE4]] ], [ [[TMP24]], %[[PRED_LOAD_IF5]] ]
+; CHECK-NEXT: [[TMP21:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[TMP20]], <4 x i32> [[TMP1]])
+; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]]
; CHECK: [[MIDDLE_BLOCK]]:
-; CHECK-NEXT: [[TMP27:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP26]], <4 x i32> [[VEC_PHI]]
+; CHECK-NEXT: [[TMP27:%.*]] = select <4 x i1> <i1 true, i1 true, i1 false, i1 false>, <4 x i32> [[TMP21]], <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP28:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP27]])
; CHECK-NEXT: br label %[[EXIT:.*]]
; CHECK: [[EXIT]]:
diff --git a/llvm/test/Transforms/LoopVectorize/single-early-exit-cond-poison.ll b/llvm/test/Transforms/LoopVectorize/single-early-exit-cond-poison.ll
index 97d57a0cf83a0..3b7a0d348fbfb 100644
--- a/llvm/test/Transforms/LoopVectorize/single-early-exit-cond-poison.ll
+++ b/llvm/test/Transforms/LoopVectorize/single-early-exit-cond-poison.ll
@@ -26,6 +26,8 @@ define noundef i32 @f(i32 noundef %g) {
; VF4IC2-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]])
; VF4IC2-NEXT: br i1 [[TMP7]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]]
; VF4IC2: [[MIDDLE_BLOCK]]:
+; VF4IC2-NEXT: br label %[[MIDDLE_BLOCK1:.*]]
+; VF4IC2: [[MIDDLE_BLOCK1]]:
; VF4IC2-NEXT: [[TMP8:%.*]] = extractelement <4 x i32> [[TMP3]], i32 3
; VF4IC2-NEXT: br label %[[RETURN:.*]]
; VF4IC2: [[VECTOR_EARLY_EXIT]]:
@@ -38,7 +40,7 @@ define noundef i32 @f(i32 noundef %g) {
; VF4IC2-NEXT: [[TMP15:%.*]] = trunc i64 [[TMP14]] to i32
; VF4IC2-NEXT: br label %[[RETURN]]
; VF4IC2: [[RETURN]]:
-; VF4IC2-NEXT: [[RES:%.*]] = phi i32 [ [[TMP8]], %[[MIDDLE_BLOCK]] ], [ [[TMP15]], %[[VECTOR_EARLY_EXIT]] ]
+; VF4IC2-NEXT: [[RES:%.*]] = phi i32 [ [[TMP8]], %[[MIDDLE_BLOCK1]] ], [ [[TMP15]], %[[VECTOR_EARLY_EXIT]] ]
; VF4IC2-NEXT: ret i32 [[RES]]
;
; VF8IC1-LABEL: define noundef i32 @f(
@@ -56,6 +58,8 @@ define noundef i32 @f(i32 noundef %g) {
; VF8IC1-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> [[TMP8]])
; VF8IC1-NEXT: br i1 [[TMP3]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]]
; VF8IC1: [[MIDDLE_BLOCK]]:
+; VF8IC1-NEXT: br label %[[MIDDLE_BLOCK1:.*]]
+; VF8IC1: [[MIDDLE_BLOCK1]]:
; VF8IC1-NEXT: [[TMP4:%.*]] = extractelement <8 x i32> [[TMP1]], i32 7
; VF8IC1-NEXT: br label %[[RETURN:.*]]
; VF8IC1: [[VECTOR_EARLY_EXIT]]:
@@ -63,7 +67,7 @@ define noundef i32 @f(i32 noundef %g) {
; VF8IC1-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
; VF8IC1-NEXT: br label %[[RETURN]]
; VF8IC1: [[RETURN]]:
-; VF8IC1-NEXT: [[RES:%.*]] = phi i32 [ [[TMP4]], %[[MIDDLE_BLOCK]] ], [ [[TMP6]], %[[VECTOR_EARLY_EXIT]] ]
+; VF8IC1-NEXT: [[RES:%.*]] = phi i32 [ [[TMP4]], %[[MIDDLE_BLOCK1]] ], [ [[TMP6]], %[[VECTOR_EARLY_EXIT]] ]
; VF8IC1-NEXT: ret i32 [[RES]]
;
entry:
diff --git a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-early-exit.ll b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-early-exit.ll
index b93215035cebf..db4623f7138fb 100644
--- a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-early-exit.ll
+++ b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-early-exit.ll
@@ -52,11 +52,13 @@ define i8 @test_early_exit_max_tc_less_than_16(ptr dereferenceable(16) %A) nosyn
; VF8UF2-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> [[TMP3]])
; VF8UF2-NEXT: br i1 [[TMP4]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]]
; VF8UF2: [[MIDDLE_BLOCK]]:
+; VF8UF2-NEXT: br label %[[MIDDLE_SPLIT:.*]]
+; VF8UF2: [[MIDDLE_SPLIT]]:
; VF8UF2-NEXT: br label %[[EXIT:.*]]
; VF8UF2: [[VECTOR_EARLY_EXIT]]:
; VF8UF2-NEXT: br label %[[EXIT]]
; VF8UF2: [[EXIT]]:
-; VF8UF2-NEXT: [[RES:%.*]] = phi i8 [ 0, %[[VECTOR_EARLY_EXIT]] ], [ 1, %[[MIDDLE_BLOCK]] ]
+; VF8UF2-NEXT: [[RES:%.*]] = phi i8 [ 0, %[[VECTOR_EARLY_EXIT]] ], [ 1, %[[MIDDLE_SPLIT]] ]
; VF8UF2-NEXT: ret i8 [[RES]]
;
; VF16UF1-LABEL: define i8 @test_early_exit_max_tc_less_than_16(
@@ -72,11 +74,13 @@ define i8 @test_early_exit_max_tc_less_than_16(ptr dereferenceable(16) %A) nosyn
; VF16UF1-NEXT: [[TMP2:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[TMP1]])
; VF16UF1-NEXT: br i1 [[TMP2]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]]
; VF16UF1: [[MIDDLE_BLOCK]]:
+; VF16UF1-NEXT: br label %[[MIDDLE_SPLIT:.*]]
+; VF16UF1: [[MIDDLE_SPLIT]]:
; VF16UF1-NEXT: br label %[[EXIT:.*]]
; VF16UF1: [[VECTOR_EARLY_EXIT]]:
; VF16UF1-NEXT: br label %[[EXIT]]
; VF16UF1: [[EXIT]]:
-; VF16UF1-NEXT: [[RES:%.*]] = phi i8 [ 0, %[[VECTOR_EARLY_EXIT]] ], [ 1, %[[MIDDLE_BLOCK]] ]
+; VF16UF1-NEXT: [[RES:%.*]] = phi i8 [ 0, %[[VECTOR_EARLY_EXIT]] ], [ 1, %[[MIDDLE_SPLIT]] ]
; VF16UF1-NEXT: ret i8 [[RES]]
;
entry:
@@ -146,6 +150,8 @@ define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(ptr derefer
; VF8UF2-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> [[TMP3]])
; VF8UF2-NEXT: br i1 [[TMP4]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]]
; VF8UF2: [[MIDDLE_BLOCK]]:
+; VF8UF2-NEXT: br label %[[MIDDLE_SPLIT:.*]]
+; VF8UF2: [[MIDDLE_SPLIT]]:
; VF8UF2-NEXT: br label %[[EXIT:.*]]
; VF8UF2: [[VECTOR_EARLY_EXIT]]:
; VF8UF2-NEXT: [[TMP5:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v8i1(<8 x i1> [[TMP2]], i1 false)
@@ -156,7 +162,7 @@ define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(ptr derefer
; VF8UF2-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i64 [[TMP9]], i64 [[TMP7]]
; VF8UF2-NEXT: br label %[[EXIT]]
; VF8UF2: [[EXIT]]:
-; VF8UF2-NEXT: [[RES:%.*]] = phi i64 [ [[TMP11]], %[[VECTOR_EARLY_EXIT]] ], [ 1, %[[MIDDLE_BLOCK]] ]
+; VF8UF2-NEXT: [[RES:%.*]] = phi i64 [ [[TMP11]], %[[VECTOR_EARLY_EXIT]] ], [ 1, %[[MIDDLE_SPLIT]] ]
; VF8UF2-NEXT: ret i64 [[RES]]
;
; VF16UF1-LABEL: define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(
@@ -172,12 +178,14 @@ define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(ptr derefer
; VF16UF1-NEXT: [[TMP2:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[TMP1]])
; VF16UF1-NEXT: br i1 [[TMP2]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]]
; VF16UF1: [[MIDDLE_BLOCK]]:
+; VF16UF1-NEXT: br label %[[MIDDLE_SPLIT:.*]]
+; VF16UF1: [[MIDDLE_SPLIT]]:
; VF16UF1-NEXT: br label %[[EXIT:.*]]
; VF16UF1: [[VECTOR_EARLY_EXIT]]:
; VF16UF1-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v16i1(<16 x i1> [[TMP3]], i1 false)
; VF16UF1-NEXT: br label %[[EXIT]]
; VF16UF1: [[EXIT]]:
-; VF16UF1-NEXT: [[RES:%.*]] = phi i64 [ [[FIRST_ACTIVE_LANE]], %[[VECTOR_EARLY_EXIT]] ], [ 1, %[[MIDDLE_BLOCK]] ]
+; VF16UF1-NEXT: [[RES:%.*]] = phi i64 [ [[FIRST_ACTIVE_LANE]], %[[VECTOR_EARLY_EXIT]] ], [ 1, %[[MIDDLE_SPLIT]] ]
; VF16UF1-NEXT: ret i64 [[RES]]
;
entry:
@@ -258,12 +266,14 @@ define i8 @test_early_exit_max_vector_tc_eq_16(ptr dereferenceable(17) %A) nosyn
; VF8UF2-NEXT: br i1 [[TMP5]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]]
; VF8UF2: [[MIDDLE_BLOCK]]:
; VF8UF2-NEXT: br label %[[SCALAR_PH:.*]]
+; VF8UF2: [[SCALAR_PH]]:
+; VF8UF2-NEXT: br label %[[SCALAR_PH_SPLIT:.*]]
; VF8UF2: [[VECTOR_EARLY_EXIT]]:
; VF8UF2-NEXT: br label %[[EXIT:.*]]
-; VF8UF2: [[SCALAR_PH]]:
+; VF8UF2: [[SCALAR_PH_SPLIT]]:
; VF8UF2-NEXT: br label %[[LOOP_HEADER:.*]]
; VF8UF2: [[LOOP_HEADER]]:
-; VF8UF2-NEXT: [[IV:%.*]] = phi i64 [ 16, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; VF8UF2-NEXT: [[IV:%.*]] = phi i64 [ 16, %[[SCALAR_PH_SPLIT]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
; VF8UF2-NEXT: [[P_SRC:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV]]
; VF8UF2-NEXT: [[L:%.*]] = load i8, ptr [[P_SRC]], align 1
; VF8UF2-NEXT: [[C:%.*]] = icmp eq i8 [[L]], 0
@@ -290,12 +300,14 @@ define i8 @test_early_exit_max_vector_tc_eq_16(ptr dereferenceable(17) %A) nosyn
; VF16UF1-NEXT: br i1 [[TMP2]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]]
; VF16UF1: [[MIDDLE_BLOCK]]:
; VF16UF1-NEXT: br label %[[SCALAR_PH:.*]]
+; VF16UF1: [[SCALAR_PH]]:
+; VF16UF1-NEXT: br label %[[SCALAR_PH_SPLIT:.*]]
; VF16UF1: [[VECTOR_EARLY_EXIT]]:
; VF16UF1-NEXT: br label %[[EXIT:.*]]
-; VF16UF1: [[SCALAR_PH]]:
+; VF16UF1: [[SCALAR_PH_SPLIT]]:
; VF16UF1-NEXT: br label %[[LOOP_HEADER:.*]]
; VF16UF1: [[LOOP_HEADER]]:
-; VF16UF1-NEXT: [[IV:%.*]] = phi i64 [ 16, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; VF16UF1-NEXT: [[IV:%.*]] = phi i64 [ 16, %[[SCALAR_PH_SPLIT]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
; VF16UF1-NEXT: [[P_SRC:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV]]
; VF16UF1-NEXT: [[L:%.*]] = load i8, ptr [[P_SRC]], align 1
; VF16UF1-NEXT: [[C:%.*]] = icmp eq i8 [[L]], 0
@@ -368,28 +380,22 @@ define i1 @test_early_exit_max_tc_less_than_16_non_canonical_iv(ptr dereferencea
; VF8UF2: [[VECTOR_PH]]:
; VF8UF2-NEXT: br label %[[VECTOR_BODY:.*]]
; VF8UF2: [[VECTOR_BODY]]:
-; VF8UF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY_INTERIM:.*]] ]
-; VF8UF2-NEXT: [[VEC_IND:%.*]] = phi <8 x i64> [ <i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY_INTERIM]] ]
-; VF8UF2-NEXT: [[STEP_ADD:%.*]] = add <8 x i64> [[VEC_IND]], splat (i64 8)
-; VF8UF2-NEXT: [[OFFSET_IDX:%.*]] = add i64 2, [[INDEX]]
-; VF8UF2-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[OFFSET_IDX]]
+; VF8UF2-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 2
; VF8UF2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 8
; VF8UF2-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP0]], align 1
; VF8UF2-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x i8>, ptr [[TMP1]], align 1
; VF8UF2-NEXT: [[TMP4:%.*]] = icmp eq <8 x i8> [[WIDE_LOAD]], zeroinitializer
; VF8UF2-NEXT: [[TMP5:%.*]] = icmp eq <8 x i8> [[WIDE_LOAD1]], zeroinitializer
-; VF8UF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
; VF8UF2-NEXT: [[TMP6:%.*]] = freeze <8 x i1> [[TMP4]]
; VF8UF2-NEXT: [[TMP7:%.*]] = freeze <8 x i1> [[TMP5]]
; VF8UF2-NEXT: [[TMP8:%.*]] = or <8 x i1> [[TMP6]], [[TMP7]]
; VF8UF2-NEXT: [[TMP9:%.*]] = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> [[TMP8]])
-; VF8UF2-NEXT: [[VEC_IND_NEXT]] = add nsw <8 x i64> [[STEP_ADD]], splat (i64 8)
-; VF8UF2-NEXT: br i1 [[TMP9]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[VECTOR_BODY_INTERIM]]
+; VF8UF2-NEXT: br i1 [[TMP9]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[VECTOR_BODY_INTERIM:.*]]
; VF8UF2: [[VECTOR_BODY_INTERIM]]:
-; VF8UF2-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
+; VF8UF2-NEXT: br label %[[MIDDLE_BLOCK:.*]]
; VF8UF2: [[MIDDLE_BLOCK]]:
; VF8UF2-NEXT: [[TMP10:%.*]] = zext <8 x i8> [[WIDE_LOAD1]] to <8 x i64>
-; VF8UF2-NEXT: [[TMP3:%.*]] = icmp eq <8 x i64> [[TMP10]], [[STEP_ADD]]
+; VF8UF2-NEXT: [[TMP3:%.*]] = icmp eq <8 x i64> [[TMP10]], <i64 10, i64 11, i64 12, i64 13, i64 14, i64 15, i64 16, i64 17>
; VF8UF2-NEXT: [[TMP12:%.*]] = extractelement <8 x i1> [[TMP3]], i32 7
; VF8UF2-NEXT: br label %[[EXIT:.*]]
; VF8UF2: [[VECTOR_EARLY_EXIT]]:
@@ -405,22 +411,17 @@ define i1 @test_early_exit_max_tc_less_than_16_non_canonical_iv(ptr dereferencea
; VF16UF1: [[VECTOR_PH]]:
; VF16UF1-NEXT: br label %[[VECTOR_BODY:.*]]
; VF16UF1: [[VECTOR_BODY]]:
-; VF16UF1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY_INTERIM:.*]] ]
-; VF16UF1-NEXT: [[VEC_IND:%.*]] = phi <16 x i64> [ <i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15, i64 16, i64 17>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY_INTERIM]] ]
-; VF16UF1-NEXT: [[OFFSET_IDX:%.*]] = add i64 2, [[INDEX]]
-; VF16UF1-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[OFFSET_IDX]]
+; VF16UF1-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 2
; VF16UF1-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP0]], align 1
; VF16UF1-NEXT: [[TMP3:%.*]] = icmp eq <16 x i8> [[WIDE_LOAD]], zeroinitializer
-; VF16UF1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
; VF16UF1-NEXT: [[TMP4:%.*]] = freeze <16 x i1> [[TMP3]]
; VF16UF1-NEXT: [[TMP5:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[TMP4]])
-; VF16UF1-NEXT: [[VEC_IND_NEXT]] = add nsw <16 x i64> [[VEC_IND]], splat (i64 16)
-; VF16UF1-NEXT: br i1 [[TMP5]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[VECTOR_BODY_INTERIM]]
+; VF16UF1-NEXT: br i1 [[TMP5]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[VECTOR_BODY_INTERIM:.*]]
; VF16UF1: [[VECTOR_BODY_INTERIM]]:
-; VF16UF1-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
+; VF16UF1-NEXT: br label %[[MIDDLE_BLOCK:.*]]
; VF16UF1: [[MIDDLE_BLOCK]]:
; VF16UF1-NEXT: [[TMP6:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i64>
-; VF16UF1-NEXT: [[TMP2:%.*]] = icmp eq <16 x i64> [[TMP6]], [[VEC_IND]]
+; VF16UF1-NEXT: [[TMP2:%.*]] = icmp eq <16 x i64> [[TMP6]], <i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15, i64 16, i64 17>
; VF16UF1-NEXT: [[TMP8:%.*]] = extractelement <16 x i1> [[TMP2]], i32 15
; VF16UF1-NEXT: br label %[[EXIT:.*]]
; VF16UF1: [[VECTOR_EARLY_EXIT]]:
diff --git a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll
index 5da6fc3179043..02846aba50f72 100644
--- a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll
+++ b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll
@@ -1211,6 +1211,272 @@ loop:
exit:
ret void
}
+
+; Test that a first-order recurrence with a single vector iteration (where the
+; vector loop backedge is removed) does not crash.
+define void @first_order_recurrence_single_vector_iteration(ptr noalias %pkt, ptr noalias %dst) {
+; VF8UF1-LABEL: define void @first_order_recurrence_single_vector_iteration(
+; VF8UF1-SAME: ptr noalias [[PKT:%.*]], ptr noalias [[DST:%.*]]) {
+; VF8UF1-NEXT: [[ENTRY:.*:]]
+; VF8UF1-NEXT: br label %[[VECTOR_PH:.*]]
+; VF8UF1: [[VECTOR_PH]]:
+; VF8UF1-NEXT: br label %[[VECTOR_BODY:.*]]
+; VF8UF1: [[VECTOR_BODY]]:
+; VF8UF1-NEXT: [[TMP0:%.*]] = load i8, ptr [[PKT]], align 1
+; VF8UF1-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i8> poison, i8 [[TMP0]], i64 0
+; VF8UF1-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i8> [[BROADCAST_SPLATINSERT]], <8 x i8> poison, <8 x i32> zeroinitializer
+; VF8UF1-NEXT: [[TMP1:%.*]] = shufflevector <8 x i8> zeroinitializer, <8 x i8> [[BROADCAST_SPLAT]], <8 x i32> <i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
+; VF8UF1-NEXT: [[TMP2:%.*]] = extractelement <8 x i8> [[TMP1]], i32 7
+; VF8UF1-NEXT: store i8 [[TMP2]], ptr [[DST]], align 1
+; VF8UF1-NEXT: br label %[[MIDDLE_BLOCK:.*]]
+; VF8UF1: [[MIDDLE_BLOCK]]:
+; VF8UF1-NEXT: br label %[[EXIT:.*]]
+; VF8UF1: [[EXIT]]:
+; VF8UF1-NEXT: ret void
+;
+; VF8UF2-LABEL: define void @first_order_recurrence_single_vector_iteration(
+; VF8UF2-SAME: ptr noalias [[PKT:%.*]], ptr noalias [[DST:%.*]]) {
+; VF8UF2-NEXT: [[ENTRY:.*:]]
+; VF8UF2-NEXT: br label %[[VECTOR_PH:.*]]
+; VF8UF2: [[VECTOR_PH]]:
+; VF8UF2-NEXT: br label %[[VECTOR_BODY:.*]]
+; VF8UF2: [[VECTOR_BODY]]:
+; VF8UF2-NEXT: [[TMP0:%.*]] = load i8, ptr [[PKT]], align 1
+; VF8UF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i8> poison, i8 [[TMP0]], i64 0
+; VF8UF2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i8> [[BROADCAST_SPLATINSERT]], <8 x i8> poison, <8 x i32> zeroinitializer
+; VF8UF2-NEXT: [[TMP1:%.*]] = shufflevector <8 x i8> zeroinitializer, <8 x i8> [[BROADCAST_SPLAT]], <8 x i32> <i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
+; VF8UF2-NEXT: [[TMP2:%.*]] = shufflevector <8 x i8> [[BROADCAST_SPLAT]], <8 x i8> [[BROADCAST_SPLAT]], <8 x i32> <i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
+; VF8UF2-NEXT: br i1 true, label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
+; VF8UF2: [[PRED_STORE_IF]]:
+; VF8UF2-NEXT: [[TMP3:%.*]] = extractelement <8 x i8> [[TMP1]], i32 0
+; VF8UF2-NEXT: store i8 [[TMP3]], ptr [[DST]], align 1
+; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE]]
+; VF8UF2: [[PRED_STORE_CONTINUE]]:
+; VF8UF2-NEXT: br i1 true, label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]]
+; VF8UF2: [[PRED_STORE_IF1]]:
+; VF8UF2-NEXT: [[TMP4:%.*]] = extractelement <8 x i8> [[TMP1]], i32 1
+; VF8UF2-NEXT: store i8 [[TMP4]], ptr [[DST]], align 1
+; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE2]]
+; VF8UF2: [[PRED_STORE_CONTINUE2]]:
+; VF8UF2-NEXT: br i1 true, label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]]
+; VF8UF2: [[PRED_STORE_IF3]]:
+; VF8UF2-NEXT: [[TMP5:%.*]] = extractelement <8 x i8> [[TMP1]], i32 2
+; VF8UF2-NEXT: store i8 [[TMP5]], ptr [[DST]], align 1
+; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE4]]
+; VF8UF2: [[PRED_STORE_CONTINUE4]]:
+; VF8UF2-NEXT: br i1 true, label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6:.*]]
+; VF8UF2: [[PRED_STORE_IF5]]:
+; VF8UF2-NEXT: [[TMP6:%.*]] = extractelement <8 x i8> [[TMP1]], i32 3
+; VF8UF2-NEXT: store i8 [[TMP6]], ptr [[DST]], align 1
+; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE6]]
+; VF8UF2: [[PRED_STORE_CONTINUE6]]:
+; VF8UF2-NEXT: br i1 true, label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]]
+; VF8UF2: [[PRED_STORE_IF7]]:
+; VF8UF2-NEXT: [[TMP7:%.*]] = extractelement <8 x i8> [[TMP1]], i32 4
+; VF8UF2-NEXT: store i8 [[TMP7]], ptr [[DST]], align 1
+; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE8]]
+; VF8UF2: [[PRED_STORE_CONTINUE8]]:
+; VF8UF2-NEXT: br i1 true, label %[[PRED_STORE_IF9:.*]], label %[[PRED_STORE_CONTINUE10:.*]]
+; VF8UF2: [[PRED_STORE_IF9]]:
+; VF8UF2-NEXT: [[TMP8:%.*]] = extractelement <8 x i8> [[TMP1]], i32 5
+; VF8UF2-NEXT: store i8 [[TMP8]], ptr [[DST]], align 1
+; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE10]]
+; VF8UF2: [[PRED_STORE_CONTINUE10]]:
+; VF8UF2-NEXT: br i1 true, label %[[PRED_STORE_IF11:.*]], label %[[PRED_STORE_CONTINUE12:.*]]
+; VF8UF2: [[PRED_STORE_IF11]]:
+; VF8UF2-NEXT: [[TMP9:%.*]] = extractelement <8 x i8> [[TMP1]], i32 6
+; VF8UF2-NEXT: store i8 [[TMP9]], ptr [[DST]], align 1
+; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE12]]
+; VF8UF2: [[PRED_STORE_CONTINUE12]]:
+; VF8UF2-NEXT: br i1 true, label %[[PRED_STORE_IF13:.*]], label %[[PRED_STORE_CONTINUE14:.*]]
+; VF8UF2: [[PRED_STORE_IF13]]:
+; VF8UF2-NEXT: [[TMP10:%.*]] = extractelement <8 x i8> [[TMP1]], i32 7
+; VF8UF2-NEXT: store i8 [[TMP10]], ptr [[DST]], align 1
+; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE14]]
+; VF8UF2: [[PRED_STORE_CONTINUE14]]:
+; VF8UF2-NEXT: br i1 false, label %[[PRED_STORE_IF15:.*]], label %[[PRED_STORE_CONTINUE16:.*]]
+; VF8UF2: [[PRED_STORE_IF15]]:
+; VF8UF2-NEXT: [[TMP11:%.*]] = extractelement <8 x i8> [[TMP2]], i32 0
+; VF8UF2-NEXT: store i8 [[TMP11]], ptr [[DST]], align 1
+; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE16]]
+; VF8UF2: [[PRED_STORE_CONTINUE16]]:
+; VF8UF2-NEXT: br i1 false, label %[[PRED_STORE_IF17:.*]], label %[[PRED_STORE_CONTINUE18:.*]]
+; VF8UF2: [[PRED_STORE_IF17]]:
+; VF8UF2-NEXT: [[TMP12:%.*]] = extractelement <8 x i8> [[TMP2]], i32 1
+; VF8UF2-NEXT: store i8 [[TMP12]], ptr [[DST]], align 1
+; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE18]]
+; VF8UF2: [[PRED_STORE_CONTINUE18]]:
+; VF8UF2-NEXT: br i1 false, label %[[PRED_STORE_IF19:.*]], label %[[PRED_STORE_CONTINUE20:.*]]
+; VF8UF2: [[PRED_STORE_IF19]]:
+; VF8UF2-NEXT: [[TMP13:%.*]] = extractelement <8 x i8> [[TMP2]], i32 2
+; VF8UF2-NEXT: store i8 [[TMP13]], ptr [[DST]], align 1
+; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE20]]
+; VF8UF2: [[PRED_STORE_CONTINUE20]]:
+; VF8UF2-NEXT: br i1 false, label %[[PRED_STORE_IF21:.*]], label %[[PRED_STORE_CONTINUE22:.*]]
+; VF8UF2: [[PRED_STORE_IF21]]:
+; VF8UF2-NEXT: [[TMP14:%.*]] = extractelement <8 x i8> [[TMP2]], i32 3
+; VF8UF2-NEXT: store i8 [[TMP14]], ptr [[DST]], align 1
+; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE22]]
+; VF8UF2: [[PRED_STORE_CONTINUE22]]:
+; VF8UF2-NEXT: br i1 false, label %[[PRED_STORE_IF23:.*]], label %[[PRED_STORE_CONTINUE24:.*]]
+; VF8UF2: [[PRED_STORE_IF23]]:
+; VF8UF2-NEXT: [[TMP15:%.*]] = extractelement <8 x i8> [[TMP2]], i32 4
+; VF8UF2-NEXT: store i8 [[TMP15]], ptr [[DST]], align 1
+; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE24]]
+; VF8UF2: [[PRED_STORE_CONTINUE24]]:
+; VF8UF2-NEXT: br i1 false, label %[[PRED_STORE_IF25:.*]], label %[[PRED_STORE_CONTINUE26:.*]]
+; VF8UF2: [[PRED_STORE_IF25]]:
+; VF8UF2-NEXT: [[TMP16:%.*]] = extractelement <8 x i8> [[TMP2]], i32 5
+; VF8UF2-NEXT: store i8 [[TMP16]], ptr [[DST]], align 1
+; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE26]]
+; VF8UF2: [[PRED_STORE_CONTINUE26]]:
+; VF8UF2-NEXT: br i1 false, label %[[PRED_STORE_IF27:.*]], label %[[PRED_STORE_CONTINUE28:.*]]
+; VF8UF2: [[PRED_STORE_IF27]]:
+; VF8UF2-NEXT: [[TMP17:%.*]] = extractelement <8 x i8> [[TMP2]], i32 6
+; VF8UF2-NEXT: store i8 [[TMP17]], ptr [[DST]], align 1
+; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE28]]
+; VF8UF2: [[PRED_STORE_CONTINUE28]]:
+; VF8UF2-NEXT: br i1 false, label %[[PRED_STORE_IF29:.*]], label %[[PRED_STORE_CONTINUE30:.*]]
+; VF8UF2: [[PRED_STORE_IF29]]:
+; VF8UF2-NEXT: [[TMP18:%.*]] = extractelement <8 x i8> [[TMP2]], i32 7
+; VF8UF2-NEXT: store i8 [[TMP18]], ptr [[DST]], align 1
+; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE30]]
+; VF8UF2: [[PRED_STORE_CONTINUE30]]:
+; VF8UF2-NEXT: br label %[[MIDDLE_BLOCK:.*]]
+; VF8UF2: [[MIDDLE_BLOCK]]:
+; VF8UF2-NEXT: br label %[[EXIT:.*]]
+; VF8UF2: [[EXIT]]:
+; VF8UF2-NEXT: ret void
+;
+; VF16UF1-LABEL: define void @first_order_recurrence_single_vector_iteration(
+; VF16UF1-SAME: ptr noalias [[PKT:%.*]], ptr noalias [[DST:%.*]]) {
+; VF16UF1-NEXT: [[ENTRY:.*:]]
+; VF16UF1-NEXT: br label %[[VECTOR_PH:.*]]
+; VF16UF1: [[VECTOR_PH]]:
+; VF16UF1-NEXT: br label %[[VECTOR_BODY:.*]]
+; VF16UF1: [[VECTOR_BODY]]:
+; VF16UF1-NEXT: [[TMP0:%.*]] = load i8, ptr [[PKT]], align 1
+; VF16UF1-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[TMP0]], i64 0
+; VF16UF1-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i8> [[BROADCAST_SPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
+; VF16UF1-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> zeroinitializer, <16 x i8> [[BROADCAST_SPLAT]], <16 x i32> <i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
+; VF16UF1-NEXT: br i1 true, label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
+; VF16UF1: [[PRED_STORE_IF]]:
+; VF16UF1-NEXT: [[TMP2:%.*]] = extractelement <16 x i8> [[TMP1]], i32 0
+; VF16UF1-NEXT: store i8 [[TMP2]], ptr [[DST]], align 1
+; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE]]
+; VF16UF1: [[PRED_STORE_CONTINUE]]:
+; VF16UF1-NEXT: br i1 true, label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]]
+; VF16UF1: [[PRED_STORE_IF1]]:
+; VF16UF1-NEXT: [[TMP3:%.*]] = extractelement <16 x i8> [[TMP1]], i32 1
+; VF16UF1-NEXT: store i8 [[TMP3]], ptr [[DST]], align 1
+; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE2]]
+; VF16UF1: [[PRED_STORE_CONTINUE2]]:
+; VF16UF1-NEXT: br i1 true, label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]]
+; VF16UF1: [[PRED_STORE_IF3]]:
+; VF16UF1-NEXT: [[TMP4:%.*]] = extractelement <16 x i8> [[TMP1]], i32 2
+; VF16UF1-NEXT: store i8 [[TMP4]], ptr [[DST]], align 1
+; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE4]]
+; VF16UF1: [[PRED_STORE_CONTINUE4]]:
+; VF16UF1-NEXT: br i1 true, label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6:.*]]
+; VF16UF1: [[PRED_STORE_IF5]]:
+; VF16UF1-NEXT: [[TMP5:%.*]] = extractelement <16 x i8> [[TMP1]], i32 3
+; VF16UF1-NEXT: store i8 [[TMP5]], ptr [[DST]], align 1
+; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE6]]
+; VF16UF1: [[PRED_STORE_CONTINUE6]]:
+; VF16UF1-NEXT: br i1 true, label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]]
+; VF16UF1: [[PRED_STORE_IF7]]:
+; VF16UF1-NEXT: [[TMP6:%.*]] = extractelement <16 x i8> [[TMP1]], i32 4
+; VF16UF1-NEXT: store i8 [[TMP6]], ptr [[DST]], align 1
+; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE8]]
+; VF16UF1: [[PRED_STORE_CONTINUE8]]:
+; VF16UF1-NEXT: br i1 true, label %[[PRED_STORE_IF9:.*]], label %[[PRED_STORE_CONTINUE10:.*]]
+; VF16UF1: [[PRED_STORE_IF9]]:
+; VF16UF1-NEXT: [[TMP7:%.*]] = extractelement <16 x i8> [[TMP1]], i32 5
+; VF16UF1-NEXT: store i8 [[TMP7]], ptr [[DST]], align 1
+; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE10]]
+; VF16UF1: [[PRED_STORE_CONTINUE10]]:
+; VF16UF1-NEXT: br i1 true, label %[[PRED_STORE_IF11:.*]], label %[[PRED_STORE_CONTINUE12:.*]]
+; VF16UF1: [[PRED_STORE_IF11]]:
+; VF16UF1-NEXT: [[TMP8:%.*]] = extractelement <16 x i8> [[TMP1]], i32 6
+; VF16UF1-NEXT: store i8 [[TMP8]], ptr [[DST]], align 1
+; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE12]]
+; VF16UF1: [[PRED_STORE_CONTINUE12]]:
+; VF16UF1-NEXT: br i1 true, label %[[PRED_STORE_IF13:.*]], label %[[PRED_STORE_CONTINUE14:.*]]
+; VF16UF1: [[PRED_STORE_IF13]]:
+; VF16UF1-NEXT: [[TMP9:%.*]] = extractelement <16 x i8> [[TMP1]], i32 7
+; VF16UF1-NEXT: store i8 [[TMP9]], ptr [[DST]], align 1
+; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE14]]
+; VF16UF1: [[PRED_STORE_CONTINUE14]]:
+; VF16UF1-NEXT: br i1 false, label %[[PRED_STORE_IF15:.*]], label %[[PRED_STORE_CONTINUE16:.*]]
+; VF16UF1: [[PRED_STORE_IF15]]:
+; VF16UF1-NEXT: [[TMP10:%.*]] = extractelement <16 x i8> [[TMP1]], i32 8
+; VF16UF1-NEXT: store i8 [[TMP10]], ptr [[DST]], align 1
+; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE16]]
+; VF16UF1: [[PRED_STORE_CONTINUE16]]:
+; VF16UF1-NEXT: br i1 false, label %[[PRED_STORE_IF17:.*]], label %[[PRED_STORE_CONTINUE18:.*]]
+; VF16UF1: [[PRED_STORE_IF17]]:
+; VF16UF1-NEXT: [[TMP11:%.*]] = extractelement <16 x i8> [[TMP1]], i32 9
+; VF16UF1-NEXT: store i8 [[TMP11]], ptr [[DST]], align 1
+; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE18]]
+; VF16UF1: [[PRED_STORE_CONTINUE18]]:
+; VF16UF1-NEXT: br i1 false, label %[[PRED_STORE_IF19:.*]], label %[[PRED_STORE_CONTINUE20:.*]]
+; VF16UF1: [[PRED_STORE_IF19]]:
+; VF16UF1-NEXT: [[TMP12:%.*]] = extractelement <16 x i8> [[TMP1]], i32 10
+; VF16UF1-NEXT: store i8 [[TMP12]], ptr [[DST]], align 1
+; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE20]]
+; VF16UF1: [[PRED_STORE_CONTINUE20]]:
+; VF16UF1-NEXT: br i1 false, label %[[PRED_STORE_IF21:.*]], label %[[PRED_STORE_CONTINUE22:.*]]
+; VF16UF1: [[PRED_STORE_IF21]]:
+; VF16UF1-NEXT: [[TMP13:%.*]] = extractelement <16 x i8> [[TMP1]], i32 11
+; VF16UF1-NEXT: store i8 [[TMP13]], ptr [[DST]], align 1
+; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE22]]
+; VF16UF1: [[PRED_STORE_CONTINUE22]]:
+; VF16UF1-NEXT: br i1 false, label %[[PRED_STORE_IF23:.*]], label %[[PRED_STORE_CONTINUE24:.*]]
+; VF16UF1: [[PRED_STORE_IF23]]:
+; VF16UF1-NEXT: [[TMP14:%.*]] = extractelement <16 x i8> [[TMP1]], i32 12
+; VF16UF1-NEXT: store i8 [[TMP14]], ptr [[DST]], align 1
+; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE24]]
+; VF16UF1: [[PRED_STORE_CONTINUE24]]:
+; VF16UF1-NEXT: br i1 false, label %[[PRED_STORE_IF25:.*]], label %[[PRED_STORE_CONTINUE26:.*]]
+; VF16UF1: [[PRED_STORE_IF25]]:
+; VF16UF1-NEXT: [[TMP15:%.*]] = extractelement <16 x i8> [[TMP1]], i32 13
+; VF16UF1-NEXT: store i8 [[TMP15]], ptr [[DST]], align 1
+; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE26]]
+; VF16UF1: [[PRED_STORE_CONTINUE26]]:
+; VF16UF1-NEXT: br i1 false, label %[[PRED_STORE_IF27:.*]], label %[[PRED_STORE_CONTINUE28:.*]]
+; VF16UF1: [[PRED_STORE_IF27]]:
+; VF16UF1-NEXT: [[TMP16:%.*]] = extractelement <16 x i8> [[TMP1]], i32 14
+; VF16UF1-NEXT: store i8 [[TMP16]], ptr [[DST]], align 1
+; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE28]]
+; VF16UF1: [[PRED_STORE_CONTINUE28]]:
+; VF16UF1-NEXT: br i1 false, label %[[PRED_STORE_IF29:.*]], label %[[PRED_STORE_CONTINUE30:.*]]
+; VF16UF1: [[PRED_STORE_IF29]]:
+; VF16UF1-NEXT: [[TMP17:%.*]] = extractelement <16 x i8> [[TMP1]], i32 15
+; VF16UF1-NEXT: store i8 [[TMP17]], ptr [[DST]], align 1
+; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE30]]
+; VF16UF1: [[PRED_STORE_CONTINUE30]]:
+; VF16UF1-NEXT: br label %[[MIDDLE_BLOCK:.*]]
+; VF16UF1: [[MIDDLE_BLOCK]]:
+; VF16UF1-NEXT: br label %[[EXIT:.*]]
+; VF16UF1: [[EXIT]]:
+; VF16UF1-NEXT: ret void
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+ %recur = phi i8 [ 0, %entry ], [ %load, %loop ]
+ %load = load i8, ptr %pkt, align 1
+ store i8 %recur, ptr %dst, align 1
+ %iv.next = add i64 %iv, 1
+ %cmp = icmp eq i64 %iv, 7
+ br i1 %cmp, label %exit, label %loop
+
+exit:
+ ret void
+}
+;. !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
;.
; VF8UF1: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
; VF8UF1: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
diff --git a/llvm/test/Transforms/PhaseOrdering/ARM/arm_var_q31.ll b/llvm/test/Transforms/PhaseOrdering/ARM/arm_var_q31.ll
index b7b57176a7a2f..7ca64bc228eb9 100644
--- a/llvm/test/Transforms/PhaseOrdering/ARM/arm_var_q31.ll
+++ b/llvm/test/Transforms/PhaseOrdering/ARM/arm_var_q31.ll
@@ -74,7 +74,7 @@ define void @arm_var_q31(ptr noundef %pSrc, i32 noundef %blockSize, ptr noundef
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[SHR]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[WHILE_END_LOOPEXIT:%.*]], label [[WHILE_BODY_PREHEADER67]]
-; CHECK: while.body.preheader67:
+; CHECK: while.body.preheader63:
; CHECK-NEXT: [[SUMOFSQUARES_043_PH:%.*]] = phi i64 [ 0, [[WHILE_BODY_PREHEADER]] ], [ [[TMP25]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: [[PSRC_ADDR_042_PH:%.*]] = phi ptr [ [[PSRC]], [[WHILE_BODY_PREHEADER]] ], [ [[TMP1]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: [[BLKCNT_041_PH:%.*]] = phi i32 [ [[SHR]], [[WHILE_BODY_PREHEADER]] ], [ [[TMP2]], [[MIDDLE_BLOCK]] ]
More information about the llvm-commits
mailing list