[llvm] 40304d8 - Reapply "[VPlan] Remove manual region removal when simplifying for VF and UF. (#181252)" (#188589)

via llvm-commits llvm-commits at lists.llvm.org
Thu Mar 26 03:14:17 PDT 2026


Author: Florian Hahn
Date: 2026-03-26T10:14:10Z
New Revision: 40304d8fefc8872dc8dd2c53f67c7303e40ea727

URL: https://github.com/llvm/llvm-project/commit/40304d8fefc8872dc8dd2c53f67c7303e40ea727
DIFF: https://github.com/llvm/llvm-project/commit/40304d8fefc8872dc8dd2c53f67c7303e40ea727.diff

LOG: Reapply "[VPlan] Remove manual region removal when simplifying for VF and UF. (#181252)" (#188589)

This reverts commit e30f9c19464bcf1bf1e9f69b63884fb78ad2d05d.

Re-land, now that the reported crash causing the revert has been fixed
as part of 77fb84889 (#187504).

Original message:

Replace manual region dissolution code in
simplifyBranchConditionForVFAndUF with using general
removeBranchOnConst. simplifyBranchConditionForVFAndUF now just creates
a (BranchOnCond true) or updates BranchOnTwoConds.

The loop then gets automatically removed by running removeBranchOnConst.

This removes a bunch of special logic to handle header phi replacements
and CFG updates. With the new code, there's no restriction on what kind
of header phi recipes the loop contains.

Note that VPEVLBasedIVRecipe needs to be marked as readnone. This is
technically unrelated, but I could not find an independent test that
would be impacted.

The code to deal with epilogue resume values now needs updating, because
we may simplify a reduction directly to the start value.

PR: https://github.com/llvm/llvm-project/pull/181252

Added: 
    

Modified: 
    llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
    llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
    llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
    llvm/lib/Transforms/Vectorize/VPlanTransforms.h
    llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll
    llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll
    llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll
    llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll
    llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-replicate-extends.ll
    llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs-apple.ll
    llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll
    llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll
    llvm/test/Transforms/LoopVectorize/RISCV/vector-loop-backedge-elimination-with-evl.ll
    llvm/test/Transforms/LoopVectorize/X86/cost-model.ll
    llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-ordered-reduction.ll
    llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll
    llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll
    llvm/test/Transforms/LoopVectorize/find-last.ll
    llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll
    llvm/test/Transforms/LoopVectorize/load-deref-pred-poison-ub-ops-feeding-pointer.ll
    llvm/test/Transforms/LoopVectorize/predicated-multiple-exits.ll
    llvm/test/Transforms/LoopVectorize/reduction-minmax-users-and-predicated.ll
    llvm/test/Transforms/LoopVectorize/single-early-exit-cond-poison.ll
    llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-early-exit.ll
    llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll
    llvm/test/Transforms/PhaseOrdering/ARM/arm_var_q31.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 9c34781bb7447..b929e93ac15d6 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7421,6 +7421,10 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
   VPlanTransforms::expandBranchOnTwoConds(BestVPlan);
   // Convert loops with variable-length stepping after regions are dissolved.
   VPlanTransforms::convertToVariableLengthStep(BestVPlan);
+  // Remove dead back-edges for single-iteration loops with BranchOnCond(true).
+  // Only process loop latches to avoid removing edges from the middle block,
+  // which may be needed for epilogue vectorization.
+  VPlanTransforms::removeBranchOnConst(BestVPlan, /*OnlyLatches=*/true);
   VPlanTransforms::materializeBackedgeTakenCount(BestVPlan, VectorPH);
   VPlanTransforms::materializeVectorTripCount(
       BestVPlan, VectorPH, CM.foldTailByMasking(),
@@ -7428,6 +7432,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
   VPlanTransforms::materializeFactors(BestVPlan, VectorPH, BestVF);
   VPlanTransforms::cse(BestVPlan);
   VPlanTransforms::simplifyRecipes(BestVPlan);
+  VPlanTransforms::simplifyKnownEVL(BestVPlan, BestVF, PSE);
 
   // 0. Generate SCEV-dependent code in the entry, including TripCount, before
   // making any changes to the CFG.

diff  --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index a5d0077364e6b..8f937b2f8bc1f 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -128,6 +128,7 @@ bool VPRecipeBase::mayReadFromMemory() const {
     return cast<VPWidenIntrinsicRecipe>(this)->mayReadFromMemory();
   case VPBranchOnMaskSC:
   case VPDerivedIVSC:
+  case VPCurrentIterationPHISC:
   case VPFirstOrderRecurrencePHISC:
   case VPReductionPHISC:
   case VPPredInstPHISC:
@@ -165,6 +166,7 @@ bool VPRecipeBase::mayHaveSideEffects() const {
     return cast<VPExpressionRecipe>(this)->mayHaveSideEffects();
   case VPActiveLaneMaskPHISC:
   case VPDerivedIVSC:
+  case VPCurrentIterationPHISC:
   case VPFirstOrderRecurrencePHISC:
   case VPReductionPHISC:
   case VPPredInstPHISC:

diff  --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index cfc973afc7fd4..6ad48e766af69 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1580,12 +1580,6 @@ static void simplifyRecipe(VPSingleDefRecipe *Def, VPTypeAnalysis &TypeInfo) {
     return;
   }
 
-  if (auto *Phi = dyn_cast<VPFirstOrderRecurrencePHIRecipe>(Def)) {
-    if (Phi->getOperand(0) == Phi->getOperand(1))
-      Phi->replaceAllUsesWith(Phi->getOperand(0));
-    return;
-  }
-
   // Simplify MaskedCond with no block mask to its single operand.
   if (match(Def, m_VPInstruction<VPInstruction::MaskedCond>()) &&
       !cast<VPInstruction>(Def)->isMasked())
@@ -1634,9 +1628,15 @@ static void simplifyRecipe(VPSingleDefRecipe *Def, VPTypeAnalysis &TypeInfo) {
     return;
   }
 
-  if (isa<VPPhi, VPWidenPHIRecipe>(Def)) {
-    if (Def->getNumOperands() == 1)
+  if (isa<VPPhi, VPWidenPHIRecipe, VPHeaderPHIRecipe>(Def)) {
+    if (Def->getNumOperands() == 1) {
       Def->replaceAllUsesWith(Def->getOperand(0));
+      return;
+    }
+    if (auto *Phi = dyn_cast<VPFirstOrderRecurrencePHIRecipe>(Def)) {
+      if (all_equal(Phi->incoming_values()))
+        Phi->replaceAllUsesWith(Phi->getOperand(0));
+    }
     return;
   }
 
@@ -2208,72 +2208,16 @@ static bool simplifyBranchConditionForVFAndUF(VPlan &Plan, ElementCount BestVF,
     return false;
   }
 
-  // The vector loop region only executes once. If possible, completely remove
-  // the region, otherwise replace the terminator controlling the latch with
-  // (BranchOnCond true).
-  // TODO: VPWidenIntOrFpInductionRecipe is only partially supported; add
-  // support for other non-canonical widen induction recipes (e.g.,
-  // VPWidenPointerInductionRecipe).
-  // TODO: fold branch-on-constant after dissolving region.
-  auto *Header = cast<VPBasicBlock>(VectorRegion->getEntry());
-  if (all_of(Header->phis(), [](VPRecipeBase &Phi) {
-        if (auto *R = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi))
-          return R->isCanonical();
-        return isa<VPCanonicalIVPHIRecipe, VPCurrentIterationPHIRecipe,
-                   VPFirstOrderRecurrencePHIRecipe, VPPhi>(&Phi);
-      })) {
-    for (VPRecipeBase &HeaderR : make_early_inc_range(Header->phis())) {
-      if (auto *R = dyn_cast<VPWidenIntOrFpInductionRecipe>(&HeaderR)) {
-        VPBuilder Builder(Plan.getVectorPreheader());
-        VPValue *StepV = Builder.createNaryOp(VPInstruction::StepVector, {},
-                                              R->getScalarType());
-        HeaderR.getVPSingleValue()->replaceAllUsesWith(StepV);
-        HeaderR.eraseFromParent();
-        continue;
-      }
-      auto *Phi = cast<VPPhiAccessors>(&HeaderR);
-      HeaderR.getVPSingleValue()->replaceAllUsesWith(Phi->getIncomingValue(0));
-      HeaderR.eraseFromParent();
-    }
-
-    VPBlockBase *Preheader = VectorRegion->getSinglePredecessor();
-    SmallVector<VPBlockBase *> Exits = to_vector(VectorRegion->getSuccessors());
-    VPBlockUtils::disconnectBlocks(Preheader, VectorRegion);
-    for (VPBlockBase *Exit : Exits)
-      VPBlockUtils::disconnectBlocks(VectorRegion, Exit);
-
-    for (VPBlockBase *B : vp_depth_first_shallow(VectorRegion->getEntry()))
-      B->setParent(nullptr);
-
-    VPBlockUtils::connectBlocks(Preheader, Header);
-
-    for (VPBlockBase *Exit : Exits)
-      VPBlockUtils::connectBlocks(ExitingVPBB, Exit);
-
-    // Replace terminating branch-on-two-conds with branch-on-cond to early
-    // exit.
-    if (Exits.size() != 1) {
-      assert(match(Term, m_BranchOnTwoConds()) && Exits.size() == 2 &&
-             "BranchOnTwoConds needs 2 remaining exits");
-      VPBuilder(Term).createNaryOp(VPInstruction::BranchOnCond,
-                                   Term->getOperand(0));
-    }
-    VPlanTransforms::simplifyRecipes(Plan);
-  } else {
-    // The vector region contains header phis for which we cannot remove the
-    // loop region yet.
-
-    // For BranchOnTwoConds, set the latch exit condition to true directly.
-    if (match(Term, m_BranchOnTwoConds())) {
-      Term->setOperand(1, Plan.getTrue());
-      return true;
-    }
-
-    auto *BOC = new VPInstruction(VPInstruction::BranchOnCond, {Plan.getTrue()},
-                                  {}, {}, Term->getDebugLoc());
-    ExitingVPBB->appendRecipe(BOC);
+  // The vector loop region only executes once. Convert terminator of the
+  // exiting block to exit in the first iteration.
+  if (match(Term, m_BranchOnTwoConds())) {
+    Term->setOperand(1, Plan.getTrue());
+    return true;
   }
 
+  auto *BOC = new VPInstruction(VPInstruction::BranchOnCond, Plan.getTrue(), {},
+                                {}, Term->getDebugLoc());
+  ExitingVPBB->appendRecipe(BOC);
   Term->eraseFromParent();
 
   return true;
@@ -2281,8 +2225,8 @@ static bool simplifyBranchConditionForVFAndUF(VPlan &Plan, ElementCount BestVF,
 
 /// From the definition of llvm.experimental.get.vector.length,
 /// VPInstruction::ExplicitVectorLength(%AVL) = %AVL when %AVL <= VF.
-static bool simplifyKnownEVL(VPlan &Plan, ElementCount VF,
-                             PredicatedScalarEvolution &PSE) {
+bool VPlanTransforms::simplifyKnownEVL(VPlan &Plan, ElementCount VF,
+                                       PredicatedScalarEvolution &PSE) {
   for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
            vp_depth_first_deep(Plan.getEntry()))) {
     for (VPRecipeBase &R : *VPBB) {
@@ -2325,7 +2269,6 @@ void VPlanTransforms::optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF,
   bool MadeChange = tryToReplaceALMWithWideALM(Plan, BestVF, BestUF);
   MadeChange |= simplifyBranchConditionForVFAndUF(Plan, BestVF, BestUF, PSE);
   MadeChange |= optimizeVectorInductionWidthForTCAndVFUF(Plan, BestVF, BestUF);
-  MadeChange |= simplifyKnownEVL(Plan, BestVF, PSE);
 
   if (MadeChange) {
     Plan.setVF(BestVF);
@@ -2882,7 +2825,11 @@ void VPlanTransforms::truncateToMinimalBitwidths(
   }
 }
 
-void VPlanTransforms::removeBranchOnConst(VPlan &Plan) {
+void VPlanTransforms::removeBranchOnConst(VPlan &Plan, bool OnlyLatches) {
+  std::optional<VPDominatorTree> VPDT;
+  if (OnlyLatches)
+    VPDT.emplace(Plan);
+
   for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
            vp_depth_first_shallow(Plan.getEntry()))) {
     VPValue *Cond;
@@ -2890,6 +2837,9 @@ void VPlanTransforms::removeBranchOnConst(VPlan &Plan) {
     if (VPBB->empty() || !match(&VPBB->back(), m_BranchOnCond(m_VPValue(Cond))))
       continue;
 
+    if (OnlyLatches && !VPBlockUtils::isLatch(VPBB, *VPDT))
+      continue;
+
     assert(VPBB->getNumSuccessors() == 2 &&
            "Two successors expected for BranchOnCond");
     unsigned RemovedIdx;
@@ -2929,7 +2879,7 @@ void VPlanTransforms::optimize(VPlan &Plan) {
   RUN_VPLAN_PASS(removeRedundantExpandSCEVRecipes, Plan);
   RUN_VPLAN_PASS(reassociateHeaderMask, Plan);
   RUN_VPLAN_PASS(simplifyRecipes, Plan);
-  RUN_VPLAN_PASS(removeBranchOnConst, Plan);
+  RUN_VPLAN_PASS(removeBranchOnConst, Plan, /*OnlyLatches=*/false);
   RUN_VPLAN_PASS(removeDeadRecipes, Plan);
 
   RUN_VPLAN_PASS(createAndOptimizeReplicateRegions, Plan);

diff  --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index 21a1db22126a0..cef1b9ac8168e 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -242,6 +242,11 @@ struct VPlanTransforms {
                                  unsigned BestUF,
                                  PredicatedScalarEvolution &PSE);
 
+  /// Try to simplify VPInstruction::ExplicitVectorLength recipes when the AVL
+  /// is known to be <= VF, replacing them with the AVL directly.
+  static bool simplifyKnownEVL(VPlan &Plan, ElementCount VF,
+                               PredicatedScalarEvolution &PSE);
+
   /// Apply VPlan-to-VPlan optimizations to \p Plan, including induction recipe
   /// optimizations, dead recipe removal, replicate region optimizations and
   /// block merging.
@@ -363,8 +368,9 @@ struct VPlanTransforms {
   static void simplifyRecipes(VPlan &Plan);
 
   /// Remove BranchOnCond recipes with true or false conditions together with
-  /// removing dead edges to their successors.
-  static void removeBranchOnConst(VPlan &Plan);
+  /// removing dead edges to their successors. If \p OnlyLatches is true, only
+  /// process loop latches.
+  static void removeBranchOnConst(VPlan &Plan, bool OnlyLatches = false);
 
   /// Perform common-subexpression-elimination on \p Plan.
   static void cse(VPlan &Plan);

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll b/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll
index e054c916de6e0..d54e8582676d6 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll
@@ -8,28 +8,17 @@ define void @clamped_tc_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range(1,1
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
-; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT:    [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3
 ; CHECK-NEXT:    [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 8)
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[VAL]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP8:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
-; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP1]], i64 0
-; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[DOTSPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
-; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 8 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <vscale x 8 x i64> [ [[TMP8]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP10:%.*]] = shl nuw nsw <vscale x 8 x i64> [[VEC_IND]], splat (i64 3)
+; CHECK-NEXT:    [[TMP10:%.*]] = shl nuw nsw <vscale x 8 x i64> [[TMP8]], splat (i64 3)
 ; CHECK-NEXT:    [[TMP11:%.*]] = lshr <vscale x 8 x i64> [[BROADCAST_SPLAT]], [[TMP10]]
 ; CHECK-NEXT:    [[TMP14:%.*]] = trunc <vscale x 8 x i64> [[TMP11]] to <vscale x 8 x i8>
-; CHECK-NEXT:    call void @llvm.masked.store.nxv8i8.p0(<vscale x 8 x i8> [[TMP14]], ptr align 1 [[NEXT_GEP]], <vscale x 8 x i1> [[ACTIVE_LANE_MASK]])
-; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]]
-; CHECK-NEXT:    [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX_NEXT]], i64 8)
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <vscale x 8 x i64> [[VEC_IND]], [[DOTSPLAT]]
-; CHECK-NEXT:    br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-NEXT:    call void @llvm.masked.store.nxv8i8.p0(<vscale x 8 x i8> [[TMP14]], ptr align 1 [[DST]], <vscale x 8 x i1> [[ACTIVE_LANE_MASK_ENTRY]])
+; CHECK-NEXT:    br label [[MIDDLE_BLOCK:%.*]]
 ; CHECK:       middle.block:
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.cond.cleanup:
@@ -68,28 +57,17 @@ define void @clamped_tc_max_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range
 ; CHECK-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[SHR]] to i64
 ; CHECK-NEXT:    br label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
-; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT:    [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3
 ; CHECK-NEXT:    [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 [[WIDE_TRIP_COUNT]])
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[VAL]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP8:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
-; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP1]], i64 0
-; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[DOTSPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
-; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 8 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <vscale x 8 x i64> [ [[TMP8]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP10:%.*]] = shl nuw nsw <vscale x 8 x i64> [[VEC_IND]], splat (i64 3)
+; CHECK-NEXT:    [[TMP10:%.*]] = shl nuw nsw <vscale x 8 x i64> [[TMP8]], splat (i64 3)
 ; CHECK-NEXT:    [[TMP11:%.*]] = lshr <vscale x 8 x i64> [[BROADCAST_SPLAT]], [[TMP10]]
 ; CHECK-NEXT:    [[TMP14:%.*]] = trunc <vscale x 8 x i64> [[TMP11]] to <vscale x 8 x i8>
-; CHECK-NEXT:    call void @llvm.masked.store.nxv8i8.p0(<vscale x 8 x i8> [[TMP14]], ptr align 1 [[NEXT_GEP]], <vscale x 8 x i1> [[ACTIVE_LANE_MASK]])
-; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]]
-; CHECK-NEXT:    [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX_NEXT]], i64 [[WIDE_TRIP_COUNT]])
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <vscale x 8 x i64> [[VEC_IND]], [[DOTSPLAT]]
-; CHECK-NEXT:    br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK-NEXT:    call void @llvm.masked.store.nxv8i8.p0(<vscale x 8 x i8> [[TMP14]], ptr align 1 [[DST]], <vscale x 8 x i1> [[ACTIVE_LANE_MASK_ENTRY]])
+; CHECK-NEXT:    br label [[MIDDLE_BLOCK:%.*]]
 ; CHECK:       middle.block:
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.cond.cleanup.loopexit:
@@ -124,9 +102,3 @@ for.body:                                         ; preds = %for.body.preheader,
 for.cond.cleanup:                                 ; preds = %for.body
   ret void
 }
-;.
-; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
-; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
-; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
-; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]], [[META2]]}
-;.

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll
index 38047541db5d9..e43b6cff0af8c 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll
@@ -13,11 +13,9 @@ define double @test_reduction_costs() {
 ; COMMON:       [[VECTOR_PH]]:
 ; COMMON-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; COMMON:       [[VECTOR_BODY]]:
-; COMMON-NEXT:    [[VEC_PHI:%.*]] = phi double [ 0.000000e+00, %[[VECTOR_PH]] ], [ [[TMP0:%.*]], %[[VECTOR_BODY]] ]
-; COMMON-NEXT:    [[VEC_PHI1:%.*]] = phi double [ 0.000000e+00, %[[VECTOR_PH]] ], [ [[TMP1:%.*]], %[[VECTOR_BODY]] ]
-; COMMON-NEXT:    [[TMP0]] = call double @llvm.vector.reduce.fadd.v2f64(double [[VEC_PHI]], <2 x double> splat (double 3.000000e+00))
-; COMMON-NEXT:    [[TMP1]] = call double @llvm.vector.reduce.fadd.v2f64(double [[VEC_PHI1]], <2 x double> splat (double 9.000000e+00))
-; COMMON-NEXT:    br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; COMMON-NEXT:    [[TMP0:%.*]] = call double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> splat (double 3.000000e+00))
+; COMMON-NEXT:    [[TMP1:%.*]] = call double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> splat (double 9.000000e+00))
+; COMMON-NEXT:    br label %[[MIDDLE_BLOCK:.*]]
 ; COMMON:       [[MIDDLE_BLOCK]]:
 ; COMMON-NEXT:    br label %[[EXIT:.*]]
 ; COMMON:       [[EXIT]]:

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll b/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll
index 99db3ee324c27..945012311aa89 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll
@@ -464,13 +464,9 @@ define i32 @tc4(ptr noundef readonly captures(none) %tmp) vscale_range(1,16) {
 ; CHECK:       [[VECTOR_PH]]:
 ; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; CHECK:       [[VECTOR_BODY]]:
-; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP]], i64 [[INDEX]]
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[ARRAYIDX1]], align 4
-; CHECK-NEXT:    [[TMP3]] = add <4 x i32> [[VEC_PHI]], [[WIDE_LOAD]]
-; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT:    br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = add <4 x i32> zeroinitializer, [[WIDE_LOAD]]
+; CHECK-NEXT:    br label %[[MIDDLE_BLOCK:.*]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
 ; CHECK-NEXT:    [[TMP4:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP3]])
 ; CHECK-NEXT:    br label %[[EXIT:.*]]
@@ -546,7 +542,6 @@ exit:                                 ; preds = %for.body
 ; CHECK-VS1: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]]}
 ; CHECK-VS1: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]}
 ; CHECK-VS1: [[LOOP7]] = distinct !{[[LOOP7]], [[META1]]}
-; CHECK-VS1: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]}
 ; CHECK-VS1: [[PROF9]] = !{!"branch_weights", i32 10, i32 30}
 ;.
 ; CHECK-VS2: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
@@ -557,6 +552,5 @@ exit:                                 ; preds = %for.body
 ; CHECK-VS2: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]]}
 ; CHECK-VS2: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]}
 ; CHECK-VS2: [[LOOP7]] = distinct !{[[LOOP7]], [[META1]]}
-; CHECK-VS2: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]}
 ; CHECK-VS2: [[PROF9]] = !{!"branch_weights", i32 10, i32 30}
 ;.

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll b/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll
index 5b6979d6b1198..7c26d0028d0d4 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll
@@ -196,165 +196,124 @@ define void @tail_predicate_without_optsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 %n)
 ; DEFAULT-NEXT:    [[BROADCAST_SPLAT6:%.*]] = shufflevector <16 x i8> [[BROADCAST_SPLATINSERT5]], <16 x i8> poison, <16 x i32> zeroinitializer
 ; DEFAULT-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; DEFAULT:       [[VECTOR_BODY]]:
-; DEFAULT-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE35:.*]] ]
-; DEFAULT-NEXT:    [[VEC_IND:%.*]] = phi <16 x i8> [ <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE35]] ]
-; DEFAULT-NEXT:    [[VEC_IND1:%.*]] = phi <16 x i8> [ <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT36:%.*]], %[[PRED_STORE_CONTINUE35]] ]
-; DEFAULT-NEXT:    [[TMP0:%.*]] = icmp ule <16 x i8> [[VEC_IND]], splat (i8 14)
-; DEFAULT-NEXT:    [[TMP1:%.*]] = mul <16 x i8> [[BROADCAST_SPLAT]], [[VEC_IND1]]
-; DEFAULT-NEXT:    [[TMP2:%.*]] = lshr <16 x i8> [[VEC_IND1]], splat (i8 1)
-; DEFAULT-NEXT:    [[TMP3:%.*]] = mul <16 x i8> [[TMP2]], [[BROADCAST_SPLAT4]]
+; DEFAULT-NEXT:    [[TMP1:%.*]] = mul <16 x i8> [[BROADCAST_SPLAT]], <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>
+; DEFAULT-NEXT:    [[TMP3:%.*]] = mul <16 x i8> <i8 0, i8 0, i8 1, i8 1, i8 2, i8 2, i8 3, i8 3, i8 4, i8 4, i8 5, i8 5, i8 6, i8 6, i8 7, i8 7>, [[BROADCAST_SPLAT4]]
 ; DEFAULT-NEXT:    [[TMP4:%.*]] = add <16 x i8> [[TMP3]], [[TMP1]]
-; DEFAULT-NEXT:    [[TMP5:%.*]] = lshr <16 x i8> [[VEC_IND1]], splat (i8 2)
-; DEFAULT-NEXT:    [[TMP6:%.*]] = mul <16 x i8> [[TMP5]], [[BROADCAST_SPLAT6]]
+; DEFAULT-NEXT:    [[TMP6:%.*]] = mul <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 1, i8 1, i8 1, i8 1, i8 2, i8 2, i8 2, i8 2, i8 3, i8 3, i8 3, i8 3>, [[BROADCAST_SPLAT6]]
 ; DEFAULT-NEXT:    [[TMP7:%.*]] = add <16 x i8> [[TMP4]], [[TMP6]]
-; DEFAULT-NEXT:    [[TMP8:%.*]] = extractelement <16 x i1> [[TMP0]], i32 0
-; DEFAULT-NEXT:    br i1 [[TMP8]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
+; DEFAULT-NEXT:    br i1 true, label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
 ; DEFAULT:       [[PRED_STORE_IF]]:
-; DEFAULT-NEXT:    [[TMP9:%.*]] = add i64 [[INDEX]], 0
-; DEFAULT-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP9]]
+; DEFAULT-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 0
 ; DEFAULT-NEXT:    [[TMP11:%.*]] = extractelement <16 x i8> [[TMP7]], i32 0
 ; DEFAULT-NEXT:    store i8 [[TMP11]], ptr [[TMP10]], align 1
 ; DEFAULT-NEXT:    br label %[[PRED_STORE_CONTINUE]]
 ; DEFAULT:       [[PRED_STORE_CONTINUE]]:
-; DEFAULT-NEXT:    [[TMP12:%.*]] = extractelement <16 x i1> [[TMP0]], i32 1
-; DEFAULT-NEXT:    br i1 [[TMP12]], label %[[PRED_STORE_IF6:.*]], label %[[PRED_STORE_CONTINUE7:.*]]
+; DEFAULT-NEXT:    br i1 true, label %[[PRED_STORE_IF6:.*]], label %[[PRED_STORE_CONTINUE7:.*]]
 ; DEFAULT:       [[PRED_STORE_IF6]]:
-; DEFAULT-NEXT:    [[TMP13:%.*]] = add i64 [[INDEX]], 1
-; DEFAULT-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP13]]
+; DEFAULT-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 1
 ; DEFAULT-NEXT:    [[TMP15:%.*]] = extractelement <16 x i8> [[TMP7]], i32 1
 ; DEFAULT-NEXT:    store i8 [[TMP15]], ptr [[TMP14]], align 1
 ; DEFAULT-NEXT:    br label %[[PRED_STORE_CONTINUE7]]
 ; DEFAULT:       [[PRED_STORE_CONTINUE7]]:
-; DEFAULT-NEXT:    [[TMP16:%.*]] = extractelement <16 x i1> [[TMP0]], i32 2
-; DEFAULT-NEXT:    br i1 [[TMP16]], label %[[PRED_STORE_IF8:.*]], label %[[PRED_STORE_CONTINUE9:.*]]
+; DEFAULT-NEXT:    br i1 true, label %[[PRED_STORE_IF8:.*]], label %[[PRED_STORE_CONTINUE9:.*]]
 ; DEFAULT:       [[PRED_STORE_IF8]]:
-; DEFAULT-NEXT:    [[TMP17:%.*]] = add i64 [[INDEX]], 2
-; DEFAULT-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP17]]
+; DEFAULT-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 2
 ; DEFAULT-NEXT:    [[TMP19:%.*]] = extractelement <16 x i8> [[TMP7]], i32 2
 ; DEFAULT-NEXT:    store i8 [[TMP19]], ptr [[TMP18]], align 1
 ; DEFAULT-NEXT:    br label %[[PRED_STORE_CONTINUE9]]
 ; DEFAULT:       [[PRED_STORE_CONTINUE9]]:
-; DEFAULT-NEXT:    [[TMP20:%.*]] = extractelement <16 x i1> [[TMP0]], i32 3
-; DEFAULT-NEXT:    br i1 [[TMP20]], label %[[PRED_STORE_IF10:.*]], label %[[PRED_STORE_CONTINUE11:.*]]
+; DEFAULT-NEXT:    br i1 true, label %[[PRED_STORE_IF10:.*]], label %[[PRED_STORE_CONTINUE11:.*]]
 ; DEFAULT:       [[PRED_STORE_IF10]]:
-; DEFAULT-NEXT:    [[TMP21:%.*]] = add i64 [[INDEX]], 3
-; DEFAULT-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP21]]
+; DEFAULT-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 3
 ; DEFAULT-NEXT:    [[TMP23:%.*]] = extractelement <16 x i8> [[TMP7]], i32 3
 ; DEFAULT-NEXT:    store i8 [[TMP23]], ptr [[TMP22]], align 1
 ; DEFAULT-NEXT:    br label %[[PRED_STORE_CONTINUE11]]
 ; DEFAULT:       [[PRED_STORE_CONTINUE11]]:
-; DEFAULT-NEXT:    [[TMP24:%.*]] = extractelement <16 x i1> [[TMP0]], i32 4
-; DEFAULT-NEXT:    br i1 [[TMP24]], label %[[PRED_STORE_IF12:.*]], label %[[PRED_STORE_CONTINUE13:.*]]
+; DEFAULT-NEXT:    br i1 true, label %[[PRED_STORE_IF12:.*]], label %[[PRED_STORE_CONTINUE13:.*]]
 ; DEFAULT:       [[PRED_STORE_IF12]]:
-; DEFAULT-NEXT:    [[TMP25:%.*]] = add i64 [[INDEX]], 4
-; DEFAULT-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP25]]
+; DEFAULT-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 4
 ; DEFAULT-NEXT:    [[TMP27:%.*]] = extractelement <16 x i8> [[TMP7]], i32 4
 ; DEFAULT-NEXT:    store i8 [[TMP27]], ptr [[TMP26]], align 1
 ; DEFAULT-NEXT:    br label %[[PRED_STORE_CONTINUE13]]
 ; DEFAULT:       [[PRED_STORE_CONTINUE13]]:
-; DEFAULT-NEXT:    [[TMP28:%.*]] = extractelement <16 x i1> [[TMP0]], i32 5
-; DEFAULT-NEXT:    br i1 [[TMP28]], label %[[PRED_STORE_IF14:.*]], label %[[PRED_STORE_CONTINUE15:.*]]
+; DEFAULT-NEXT:    br i1 true, label %[[PRED_STORE_IF14:.*]], label %[[PRED_STORE_CONTINUE15:.*]]
 ; DEFAULT:       [[PRED_STORE_IF14]]:
-; DEFAULT-NEXT:    [[TMP29:%.*]] = add i64 [[INDEX]], 5
-; DEFAULT-NEXT:    [[TMP30:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP29]]
+; DEFAULT-NEXT:    [[TMP30:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 5
 ; DEFAULT-NEXT:    [[TMP31:%.*]] = extractelement <16 x i8> [[TMP7]], i32 5
 ; DEFAULT-NEXT:    store i8 [[TMP31]], ptr [[TMP30]], align 1
 ; DEFAULT-NEXT:    br label %[[PRED_STORE_CONTINUE15]]
 ; DEFAULT:       [[PRED_STORE_CONTINUE15]]:
-; DEFAULT-NEXT:    [[TMP32:%.*]] = extractelement <16 x i1> [[TMP0]], i32 6
-; DEFAULT-NEXT:    br i1 [[TMP32]], label %[[PRED_STORE_IF16:.*]], label %[[PRED_STORE_CONTINUE17:.*]]
+; DEFAULT-NEXT:    br i1 true, label %[[PRED_STORE_IF16:.*]], label %[[PRED_STORE_CONTINUE17:.*]]
 ; DEFAULT:       [[PRED_STORE_IF16]]:
-; DEFAULT-NEXT:    [[TMP33:%.*]] = add i64 [[INDEX]], 6
-; DEFAULT-NEXT:    [[TMP34:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP33]]
+; DEFAULT-NEXT:    [[TMP34:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 6
 ; DEFAULT-NEXT:    [[TMP35:%.*]] = extractelement <16 x i8> [[TMP7]], i32 6
 ; DEFAULT-NEXT:    store i8 [[TMP35]], ptr [[TMP34]], align 1
 ; DEFAULT-NEXT:    br label %[[PRED_STORE_CONTINUE17]]
 ; DEFAULT:       [[PRED_STORE_CONTINUE17]]:
-; DEFAULT-NEXT:    [[TMP36:%.*]] = extractelement <16 x i1> [[TMP0]], i32 7
-; DEFAULT-NEXT:    br i1 [[TMP36]], label %[[PRED_STORE_IF18:.*]], label %[[PRED_STORE_CONTINUE19:.*]]
+; DEFAULT-NEXT:    br i1 true, label %[[PRED_STORE_IF18:.*]], label %[[PRED_STORE_CONTINUE19:.*]]
 ; DEFAULT:       [[PRED_STORE_IF18]]:
-; DEFAULT-NEXT:    [[TMP37:%.*]] = add i64 [[INDEX]], 7
-; DEFAULT-NEXT:    [[TMP38:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP37]]
+; DEFAULT-NEXT:    [[TMP38:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 7
 ; DEFAULT-NEXT:    [[TMP39:%.*]] = extractelement <16 x i8> [[TMP7]], i32 7
 ; DEFAULT-NEXT:    store i8 [[TMP39]], ptr [[TMP38]], align 1
 ; DEFAULT-NEXT:    br label %[[PRED_STORE_CONTINUE19]]
 ; DEFAULT:       [[PRED_STORE_CONTINUE19]]:
-; DEFAULT-NEXT:    [[TMP40:%.*]] = extractelement <16 x i1> [[TMP0]], i32 8
-; DEFAULT-NEXT:    br i1 [[TMP40]], label %[[PRED_STORE_IF20:.*]], label %[[PRED_STORE_CONTINUE21:.*]]
+; DEFAULT-NEXT:    br i1 true, label %[[PRED_STORE_IF20:.*]], label %[[PRED_STORE_CONTINUE21:.*]]
 ; DEFAULT:       [[PRED_STORE_IF20]]:
-; DEFAULT-NEXT:    [[TMP41:%.*]] = add i64 [[INDEX]], 8
-; DEFAULT-NEXT:    [[TMP42:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP41]]
+; DEFAULT-NEXT:    [[TMP42:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 8
 ; DEFAULT-NEXT:    [[TMP43:%.*]] = extractelement <16 x i8> [[TMP7]], i32 8
 ; DEFAULT-NEXT:    store i8 [[TMP43]], ptr [[TMP42]], align 1
 ; DEFAULT-NEXT:    br label %[[PRED_STORE_CONTINUE21]]
 ; DEFAULT:       [[PRED_STORE_CONTINUE21]]:
-; DEFAULT-NEXT:    [[TMP44:%.*]] = extractelement <16 x i1> [[TMP0]], i32 9
-; DEFAULT-NEXT:    br i1 [[TMP44]], label %[[PRED_STORE_IF22:.*]], label %[[PRED_STORE_CONTINUE23:.*]]
+; DEFAULT-NEXT:    br i1 true, label %[[PRED_STORE_IF22:.*]], label %[[PRED_STORE_CONTINUE23:.*]]
 ; DEFAULT:       [[PRED_STORE_IF22]]:
-; DEFAULT-NEXT:    [[TMP45:%.*]] = add i64 [[INDEX]], 9
-; DEFAULT-NEXT:    [[TMP46:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP45]]
+; DEFAULT-NEXT:    [[TMP46:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 9
 ; DEFAULT-NEXT:    [[TMP47:%.*]] = extractelement <16 x i8> [[TMP7]], i32 9
 ; DEFAULT-NEXT:    store i8 [[TMP47]], ptr [[TMP46]], align 1
 ; DEFAULT-NEXT:    br label %[[PRED_STORE_CONTINUE23]]
 ; DEFAULT:       [[PRED_STORE_CONTINUE23]]:
-; DEFAULT-NEXT:    [[TMP48:%.*]] = extractelement <16 x i1> [[TMP0]], i32 10
-; DEFAULT-NEXT:    br i1 [[TMP48]], label %[[PRED_STORE_IF24:.*]], label %[[PRED_STORE_CONTINUE25:.*]]
+; DEFAULT-NEXT:    br i1 true, label %[[PRED_STORE_IF24:.*]], label %[[PRED_STORE_CONTINUE25:.*]]
 ; DEFAULT:       [[PRED_STORE_IF24]]:
-; DEFAULT-NEXT:    [[TMP49:%.*]] = add i64 [[INDEX]], 10
-; DEFAULT-NEXT:    [[TMP50:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP49]]
+; DEFAULT-NEXT:    [[TMP50:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 10
 ; DEFAULT-NEXT:    [[TMP51:%.*]] = extractelement <16 x i8> [[TMP7]], i32 10
 ; DEFAULT-NEXT:    store i8 [[TMP51]], ptr [[TMP50]], align 1
 ; DEFAULT-NEXT:    br label %[[PRED_STORE_CONTINUE25]]
 ; DEFAULT:       [[PRED_STORE_CONTINUE25]]:
-; DEFAULT-NEXT:    [[TMP52:%.*]] = extractelement <16 x i1> [[TMP0]], i32 11
-; DEFAULT-NEXT:    br i1 [[TMP52]], label %[[PRED_STORE_IF26:.*]], label %[[PRED_STORE_CONTINUE27:.*]]
+; DEFAULT-NEXT:    br i1 true, label %[[PRED_STORE_IF26:.*]], label %[[PRED_STORE_CONTINUE27:.*]]
 ; DEFAULT:       [[PRED_STORE_IF26]]:
-; DEFAULT-NEXT:    [[TMP53:%.*]] = add i64 [[INDEX]], 11
-; DEFAULT-NEXT:    [[TMP54:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP53]]
+; DEFAULT-NEXT:    [[TMP54:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 11
 ; DEFAULT-NEXT:    [[TMP55:%.*]] = extractelement <16 x i8> [[TMP7]], i32 11
 ; DEFAULT-NEXT:    store i8 [[TMP55]], ptr [[TMP54]], align 1
 ; DEFAULT-NEXT:    br label %[[PRED_STORE_CONTINUE27]]
 ; DEFAULT:       [[PRED_STORE_CONTINUE27]]:
-; DEFAULT-NEXT:    [[TMP56:%.*]] = extractelement <16 x i1> [[TMP0]], i32 12
-; DEFAULT-NEXT:    br i1 [[TMP56]], label %[[PRED_STORE_IF28:.*]], label %[[PRED_STORE_CONTINUE29:.*]]
+; DEFAULT-NEXT:    br i1 true, label %[[PRED_STORE_IF28:.*]], label %[[PRED_STORE_CONTINUE29:.*]]
 ; DEFAULT:       [[PRED_STORE_IF28]]:
-; DEFAULT-NEXT:    [[TMP57:%.*]] = add i64 [[INDEX]], 12
-; DEFAULT-NEXT:    [[TMP58:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP57]]
+; DEFAULT-NEXT:    [[TMP58:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 12
 ; DEFAULT-NEXT:    [[TMP59:%.*]] = extractelement <16 x i8> [[TMP7]], i32 12
 ; DEFAULT-NEXT:    store i8 [[TMP59]], ptr [[TMP58]], align 1
 ; DEFAULT-NEXT:    br label %[[PRED_STORE_CONTINUE29]]
 ; DEFAULT:       [[PRED_STORE_CONTINUE29]]:
-; DEFAULT-NEXT:    [[TMP60:%.*]] = extractelement <16 x i1> [[TMP0]], i32 13
-; DEFAULT-NEXT:    br i1 [[TMP60]], label %[[PRED_STORE_IF30:.*]], label %[[PRED_STORE_CONTINUE31:.*]]
+; DEFAULT-NEXT:    br i1 true, label %[[PRED_STORE_IF30:.*]], label %[[PRED_STORE_CONTINUE31:.*]]
 ; DEFAULT:       [[PRED_STORE_IF30]]:
-; DEFAULT-NEXT:    [[TMP61:%.*]] = add i64 [[INDEX]], 13
-; DEFAULT-NEXT:    [[TMP62:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP61]]
+; DEFAULT-NEXT:    [[TMP62:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 13
 ; DEFAULT-NEXT:    [[TMP63:%.*]] = extractelement <16 x i8> [[TMP7]], i32 13
 ; DEFAULT-NEXT:    store i8 [[TMP63]], ptr [[TMP62]], align 1
 ; DEFAULT-NEXT:    br label %[[PRED_STORE_CONTINUE31]]
 ; DEFAULT:       [[PRED_STORE_CONTINUE31]]:
-; DEFAULT-NEXT:    [[TMP64:%.*]] = extractelement <16 x i1> [[TMP0]], i32 14
-; DEFAULT-NEXT:    br i1 [[TMP64]], label %[[PRED_STORE_IF32:.*]], label %[[PRED_STORE_CONTINUE33:.*]]
+; DEFAULT-NEXT:    br i1 true, label %[[PRED_STORE_IF32:.*]], label %[[PRED_STORE_CONTINUE33:.*]]
 ; DEFAULT:       [[PRED_STORE_IF32]]:
-; DEFAULT-NEXT:    [[TMP65:%.*]] = add i64 [[INDEX]], 14
-; DEFAULT-NEXT:    [[TMP66:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP65]]
+; DEFAULT-NEXT:    [[TMP66:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 14
 ; DEFAULT-NEXT:    [[TMP67:%.*]] = extractelement <16 x i8> [[TMP7]], i32 14
 ; DEFAULT-NEXT:    store i8 [[TMP67]], ptr [[TMP66]], align 1
 ; DEFAULT-NEXT:    br label %[[PRED_STORE_CONTINUE33]]
 ; DEFAULT:       [[PRED_STORE_CONTINUE33]]:
-; DEFAULT-NEXT:    [[TMP68:%.*]] = extractelement <16 x i1> [[TMP0]], i32 15
-; DEFAULT-NEXT:    br i1 [[TMP68]], label %[[PRED_STORE_IF34:.*]], label %[[PRED_STORE_CONTINUE35]]
+; DEFAULT-NEXT:    br i1 false, label %[[PRED_STORE_IF34:.*]], label %[[PRED_STORE_CONTINUE35:.*]]
 ; DEFAULT:       [[PRED_STORE_IF34]]:
-; DEFAULT-NEXT:    [[TMP69:%.*]] = add i64 [[INDEX]], 15
-; DEFAULT-NEXT:    [[TMP70:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP69]]
+; DEFAULT-NEXT:    [[TMP70:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 15
 ; DEFAULT-NEXT:    [[TMP71:%.*]] = extractelement <16 x i8> [[TMP7]], i32 15
 ; DEFAULT-NEXT:    store i8 [[TMP71]], ptr [[TMP70]], align 1
 ; DEFAULT-NEXT:    br label %[[PRED_STORE_CONTINUE35]]
 ; DEFAULT:       [[PRED_STORE_CONTINUE35]]:
-; DEFAULT-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
-; DEFAULT-NEXT:    [[VEC_IND_NEXT]] = add <16 x i8> [[VEC_IND]], splat (i8 16)
-; DEFAULT-NEXT:    [[VEC_IND_NEXT36]] = add <16 x i8> [[VEC_IND1]], splat (i8 16)
-; DEFAULT-NEXT:    br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; DEFAULT-NEXT:    br label %[[MIDDLE_BLOCK:.*]]
 ; DEFAULT:       [[MIDDLE_BLOCK]]:
 ; DEFAULT-NEXT:    br label %[[FOR_COND_CLEANUP:.*]]
 ; DEFAULT:       [[FOR_COND_CLEANUP]]:

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-replicate-extends.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-replicate-extends.ll
index 720442afe16fd..2684e33cf051c 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-replicate-extends.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-replicate-extends.ll
@@ -15,9 +15,8 @@ define double @test(i32 %0) {
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; CHECK:       [[VECTOR_BODY]]:
-; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP2:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP2]] = or <2 x i64> [[VEC_PHI]], [[BROADCAST_SPLAT]]
-; CHECK-NEXT:    br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-NEXT:    [[TMP2:%.*]] = or <2 x i64> zeroinitializer, [[BROADCAST_SPLAT]]
+; CHECK-NEXT:    br label %[[MIDDLE_BLOCK:.*]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
 ; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> [[TMP2]])
 ; CHECK-NEXT:    br label %[[EXIT:.*]]

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs-apple.ll b/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs-apple.ll
index 2c82cae589036..a810ff80b1b20 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs-apple.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs-apple.ll
@@ -587,11 +587,7 @@ define double @test_scalarization_cost_for_load_of_address(ptr %src.0, ptr %src.
 ; CHECK:       [[VECTOR_PH]]:
 ; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; CHECK:       [[VECTOR_BODY]]:
-; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi double [ 3.000000e+00, %[[VECTOR_PH]] ], [ [[TMP21:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 1
-; CHECK-NEXT:    [[GEP_0:%.*]] = getelementptr [[T:%.*]], ptr [[SRC_0]], i64 [[INDEX]]
-; CHECK-NEXT:    [[WIDE_VEC:%.*]] = load <6 x double>, ptr [[GEP_0]], align 8
+; CHECK-NEXT:    [[WIDE_VEC:%.*]] = load <6 x double>, ptr [[SRC_0]], align 8
 ; CHECK-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <6 x double> [[WIDE_VEC]], <6 x double> poison, <2 x i32> <i32 0, i32 3>
 ; CHECK-NEXT:    [[STRIDED_VEC1:%.*]] = shufflevector <6 x double> [[WIDE_VEC]], <6 x double> poison, <2 x i32> <i32 1, i32 4>
 ; CHECK-NEXT:    [[STRIDED_VEC2:%.*]] = shufflevector <6 x double> [[WIDE_VEC]], <6 x double> poison, <2 x i32> <i32 2, i32 5>
@@ -600,12 +596,10 @@ define double @test_scalarization_cost_for_load_of_address(ptr %src.0, ptr %src.
 ; CHECK-NEXT:    [[TMP5:%.*]] = fmul <2 x double> [[STRIDED_VEC2]], splat (double 3.000000e+00)
 ; CHECK-NEXT:    [[TMP6:%.*]] = fadd <2 x double> [[TMP3]], [[TMP4]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = fadd <2 x double> [[TMP6]], [[TMP5]]
-; CHECK-NEXT:    [[GEP_SRC:%.*]] = getelementptr double, ptr [[SRC_1]], i64 [[INDEX]]
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[GEP_SRC]], align 8
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[SRC_1]], align 8
 ; CHECK-NEXT:    [[TMP9:%.*]] = fmul <2 x double> [[TMP7]], [[WIDE_LOAD]]
-; CHECK-NEXT:    [[GEP_SRC_2:%.*]] = getelementptr [[T_2:%.*]], ptr [[SRC_2]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr [[T_2]], ptr [[SRC_2]], i64 [[TMP1]]
-; CHECK-NEXT:    [[GEP_72:%.*]] = getelementptr i8, ptr [[GEP_SRC_2]], i64 72
+; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr [[T_2:%.*]], ptr [[SRC_2]], i64 1
+; CHECK-NEXT:    [[GEP_72:%.*]] = getelementptr i8, ptr [[SRC_2]], i64 72
 ; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr i8, ptr [[TMP11]], i64 72
 ; CHECK-NEXT:    [[L_P_2:%.*]] = load ptr, ptr [[GEP_72]], align 8
 ; CHECK-NEXT:    [[TMP15:%.*]] = load ptr, ptr [[TMP13]], align 8
@@ -614,9 +608,8 @@ define double @test_scalarization_cost_for_load_of_address(ptr %src.0, ptr %src.
 ; CHECK-NEXT:    [[TMP18:%.*]] = insertelement <2 x double> poison, double [[LV]], i32 0
 ; CHECK-NEXT:    [[TMP19:%.*]] = insertelement <2 x double> [[TMP18]], double [[TMP17]], i32 1
 ; CHECK-NEXT:    [[TMP20:%.*]] = fmul <2 x double> [[TMP9]], [[TMP19]]
-; CHECK-NEXT:    [[TMP21]] = call double @llvm.vector.reduce.fadd.v2f64(double [[VEC_PHI]], <2 x double> [[TMP20]])
-; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; CHECK-NEXT:    br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
+; CHECK-NEXT:    [[TMP21:%.*]] = call double @llvm.vector.reduce.fadd.v2f64(double 3.000000e+00, <2 x double> [[TMP20]])
+; CHECK-NEXT:    br label %[[MIDDLE_BLOCK:.*]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
 ; CHECK-NEXT:    br label %[[EXIT:.*]]
 ; CHECK:       [[EXIT]]:

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll
index 0f18b29b25d6d..e1f4ec4945c42 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll
@@ -552,11 +552,7 @@ define double @test_scalarization_cost_for_load_of_address(ptr %src.0, ptr %src.
 ; CHECK:       [[VECTOR_PH]]:
 ; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; CHECK:       [[VECTOR_BODY]]:
-; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi double [ 3.000000e+00, %[[VECTOR_PH]] ], [ [[TMP21:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 1
-; CHECK-NEXT:    [[GEP_0:%.*]] = getelementptr [[T:%.*]], ptr [[SRC_0]], i64 [[INDEX]]
-; CHECK-NEXT:    [[WIDE_VEC:%.*]] = load <6 x double>, ptr [[GEP_0]], align 8
+; CHECK-NEXT:    [[WIDE_VEC:%.*]] = load <6 x double>, ptr [[SRC_0]], align 8
 ; CHECK-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <6 x double> [[WIDE_VEC]], <6 x double> poison, <2 x i32> <i32 0, i32 3>
 ; CHECK-NEXT:    [[STRIDED_VEC1:%.*]] = shufflevector <6 x double> [[WIDE_VEC]], <6 x double> poison, <2 x i32> <i32 1, i32 4>
 ; CHECK-NEXT:    [[STRIDED_VEC2:%.*]] = shufflevector <6 x double> [[WIDE_VEC]], <6 x double> poison, <2 x i32> <i32 2, i32 5>
@@ -565,23 +561,20 @@ define double @test_scalarization_cost_for_load_of_address(ptr %src.0, ptr %src.
 ; CHECK-NEXT:    [[TMP5:%.*]] = fmul <2 x double> [[STRIDED_VEC2]], splat (double 3.000000e+00)
 ; CHECK-NEXT:    [[TMP6:%.*]] = fadd <2 x double> [[TMP3]], [[TMP4]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = fadd <2 x double> [[TMP6]], [[TMP5]]
-; CHECK-NEXT:    [[GEP_SRC:%.*]] = getelementptr double, ptr [[SRC_1]], i64 [[INDEX]]
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[GEP_SRC]], align 8
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[SRC_1]], align 8
 ; CHECK-NEXT:    [[TMP9:%.*]] = fmul <2 x double> [[TMP7]], [[WIDE_LOAD]]
-; CHECK-NEXT:    [[GEP_SRC_2:%.*]] = getelementptr [[T_2:%.*]], ptr [[SRC_2]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr [[T_2]], ptr [[SRC_2]], i64 [[TMP1]]
-; CHECK-NEXT:    [[GEP_72:%.*]] = getelementptr i8, ptr [[GEP_SRC_2]], i64 72
-; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr i8, ptr [[TMP11]], i64 72
-; CHECK-NEXT:    [[L_P_2:%.*]] = load ptr, ptr [[GEP_72]], align 8
-; CHECK-NEXT:    [[TMP15:%.*]] = load ptr, ptr [[TMP13]], align 8
-; CHECK-NEXT:    [[LV:%.*]] = load double, ptr [[L_P_2]], align 8
-; CHECK-NEXT:    [[TMP17:%.*]] = load double, ptr [[TMP15]], align 8
+; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr [[T_2:%.*]], ptr [[SRC_2]], i64 1
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr i8, ptr [[SRC_2]], i64 72
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[TMP11]], i64 72
+; CHECK-NEXT:    [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8
+; CHECK-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[TMP8]], align 8
+; CHECK-NEXT:    [[LV:%.*]] = load double, ptr [[TMP13]], align 8
+; CHECK-NEXT:    [[TMP17:%.*]] = load double, ptr [[TMP10]], align 8
 ; CHECK-NEXT:    [[TMP18:%.*]] = insertelement <2 x double> poison, double [[LV]], i32 0
 ; CHECK-NEXT:    [[TMP19:%.*]] = insertelement <2 x double> [[TMP18]], double [[TMP17]], i32 1
 ; CHECK-NEXT:    [[TMP20:%.*]] = fmul <2 x double> [[TMP9]], [[TMP19]]
-; CHECK-NEXT:    [[TMP21]] = call double @llvm.vector.reduce.fadd.v2f64(double [[VEC_PHI]], <2 x double> [[TMP20]])
-; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; CHECK-NEXT:    br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
+; CHECK-NEXT:    [[TMP21:%.*]] = call double @llvm.vector.reduce.fadd.v2f64(double 3.000000e+00, <2 x double> [[TMP20]])
+; CHECK-NEXT:    br label %[[MIDDLE_BLOCK:.*]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
 ; CHECK-NEXT:    br label %[[EXIT:.*]]
 ; CHECK:       [[EXIT]]:

diff  --git a/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll b/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll
index f39d80b109d85..fb42f2c7fc925 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll
@@ -344,13 +344,9 @@ define i8 @mul_non_pow_2_low_trip_count(ptr noalias %a) {
 ; CHECK:       vector.ph:
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
-; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <8 x i8> [ <i8 2, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP0]], align 1
-; CHECK-NEXT:    [[TMP1]] = mul <8 x i8> [[WIDE_LOAD]], [[VEC_PHI]]
-; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
-; CHECK-NEXT:    br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP0:%.*]], align 1
+; CHECK-NEXT:    [[TMP1:%.*]] = mul <8 x i8> [[WIDE_LOAD]], <i8 2, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+; CHECK-NEXT:    br label [[MIDDLE_BLOCK:%.*]]
 ; CHECK:       middle.block:
 ; CHECK-NEXT:    [[TMP3:%.*]] = call i8 @llvm.vector.reduce.mul.v8i8(<8 x i8> [[TMP1]])
 ; CHECK-NEXT:    br label [[SCALAR_PH:%.*]]
@@ -359,7 +355,7 @@ define i8 @mul_non_pow_2_low_trip_count(ptr noalias %a) {
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 8, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
 ; CHECK-NEXT:    [[RDX:%.*]] = phi i8 [ [[TMP3]], [[SCALAR_PH]] ], [ [[MUL:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[IV]]
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i8, ptr [[TMP0]], i64 [[IV]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = load i8, ptr [[GEP]], align 1
 ; CHECK-NEXT:    [[MUL]] = mul i8 [[TMP5]], [[RDX]]
 ; CHECK-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1

diff  --git a/llvm/test/Transforms/LoopVectorize/RISCV/vector-loop-backedge-elimination-with-evl.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vector-loop-backedge-elimination-with-evl.ll
index f1dda3d5a2f91..a80d5dacff583 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/vector-loop-backedge-elimination-with-evl.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vector-loop-backedge-elimination-with-evl.ll
@@ -41,13 +41,9 @@ define i32 @test_remove_iv(i32 %start) #0 {
 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <vscale x 4 x i32> zeroinitializer, i32 [[START]], i32 0
 ; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; CHECK:       [[VECTOR_BODY]]:
-; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ [[TMP2]], %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[AVL:%.*]] = phi i32 [ 6, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @llvm.experimental.get.vector.length.i32(i32 [[AVL]], i32 4, i1 true)
-; CHECK-NEXT:    [[TMP4:%.*]] = xor <vscale x 4 x i32> [[VEC_PHI]], splat (i32 3)
-; CHECK-NEXT:    [[TMP5]] = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[TMP4]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP3]])
-; CHECK-NEXT:    [[AVL_NEXT]] = sub nuw i32 [[AVL]], [[TMP3]]
-; CHECK-NEXT:    br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-NEXT:    [[TMP3:%.*]] = xor <vscale x 4 x i32> [[TMP2]], splat (i32 3)
+; CHECK-NEXT:    [[TMP5:%.*]] = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[TMP3]], <vscale x 4 x i32> [[TMP2]], i32 6)
+; CHECK-NEXT:    br label %[[MIDDLE_BLOCK:.*]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
 ; CHECK-NEXT:    [[TMP6:%.*]] = call i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32> [[TMP5]])
 ; CHECK-NEXT:    br label %[[EXIT:.*]]
@@ -71,8 +67,3 @@ exit:
 
 attributes #0 = { vscale_range(2,2) }
 
-;.
-; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
-; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
-; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
-;.

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll b/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll
index 99efdf03f2912..ff9a491b25197 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll
@@ -603,19 +603,19 @@ define i64 @live_in_known_1_via_scev() {
 ; CHECK-NEXT:    [[P:%.*]] = phi i32 [ 1, %[[ENTRY]] ]
 ; CHECK-NEXT:    [[N:%.*]] = add nuw nsw i32 [[SEL]], 6
 ; CHECK-NEXT:    [[P_EXT:%.*]] = zext nneg i32 [[P]] to i64
-; CHECK-NEXT:    br label %[[VECTOR_PH:.*]]
-; CHECK:       [[VECTOR_PH]]:
 ; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; CHECK:       [[VECTOR_BODY]]:
-; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i64> [ <i64 3, i64 1, i64 1, i64 1>, %[[VECTOR_PH]] ], [ [[VEC_PHI]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    br label %[[EXIT:.*]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_BODY]] ], [ [[INDEX_NEXT:%.*]], %[[EXIT]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i64> [ <i64 3, i64 1, i64 1, i64 1>, %[[VECTOR_BODY]] ], [ [[VEC_PHI]], %[[EXIT]] ]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP0:%.*]] = icmp eq i32 [[INDEX_NEXT]], 8
-; CHECK-NEXT:    br i1 [[TMP0]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
+; CHECK-NEXT:    br i1 [[TMP0]], label %[[MIDDLE_BLOCK:.*]], label %[[EXIT]], !llvm.loop [[LOOP22:![0-9]+]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
 ; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.vector.reduce.mul.v4i64(<4 x i64> [[VEC_PHI]])
-; CHECK-NEXT:    br label %[[EXIT:.*]]
-; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    br label %[[EXIT1:.*]]
+; CHECK:       [[EXIT1]]:
 ; CHECK-NEXT:    ret i64 [[TMP3]]
 ;
 entry:
@@ -657,9 +657,8 @@ define i64 @cost_loop_invariant_recipes(i1 %x, i64 %y) {
 ; CHECK-NEXT:    [[TMP2:%.*]] = shl <2 x i64> [[BROADCAST_SPLAT]], [[TMP1]]
 ; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; CHECK:       [[VECTOR_BODY]]:
-; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <2 x i64> [ splat (i64 1), %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP3]] = mul <2 x i64> [[TMP2]], [[VEC_PHI]]
-; CHECK-NEXT:    br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]]
+; CHECK-NEXT:    [[TMP3:%.*]] = mul <2 x i64> [[TMP2]], splat (i64 1)
+; CHECK-NEXT:    br label %[[MIDDLE_BLOCK:.*]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
 ; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.vector.reduce.mul.v2i64(<2 x i64> [[TMP3]])
 ; CHECK-NEXT:    br label %[[EXIT:.*]]
@@ -697,12 +696,9 @@ define i32 @narrowed_reduction(ptr %a, i1 %cmp) #0 {
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
 ; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; CHECK:       [[VECTOR_BODY]]:
-; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = and <16 x i32> [[VEC_PHI]], splat (i32 1)
-; CHECK-NEXT:    [[TMP1:%.*]] = or <16 x i32> [[TMP0]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT:    [[TMP1:%.*]] = or <16 x i32> zeroinitializer, [[BROADCAST_SPLAT]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = trunc <16 x i32> [[TMP1]] to <16 x i1>
-; CHECK-NEXT:    [[TMP3]] = zext <16 x i1> [[TMP2]] to <16 x i32>
-; CHECK-NEXT:    br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
+; CHECK-NEXT:    br label %[[MIDDLE_BLOCK:.*]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
 ; CHECK-NEXT:    [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[TMP2]])
 ; CHECK-NEXT:    [[TMP5:%.*]] = zext i1 [[TMP4]] to i32

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-ordered-reduction.ll b/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-ordered-reduction.ll
index 2e946693af294..d7d321e84ddff 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-ordered-reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-ordered-reduction.ll
@@ -273,22 +273,18 @@ define float @ordered_reduction_epilogue_dead_main_loop(ptr %p, i64 %n) "prefer-
 ; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[BOUND]], [[N_MOD_VF]]
 ; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; CHECK:       [[VECTOR_BODY]]:
-; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr float, ptr [[P]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr float, ptr [[TMP0]], i64 16
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr float, ptr [[TMP0]], i64 32
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr float, ptr [[TMP0]], i64 48
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <16 x float>, ptr [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr float, ptr [[P]], i64 16
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr float, ptr [[P]], i64 32
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr float, ptr [[P]], i64 48
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <16 x float>, ptr [[P]], align 4
 ; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <16 x float>, ptr [[TMP1]], align 4
 ; CHECK-NEXT:    [[WIDE_LOAD2:%.*]] = load <16 x float>, ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[WIDE_LOAD3:%.*]] = load <16 x float>, ptr [[TMP3]], align 4
-; CHECK-NEXT:    [[TMP4:%.*]] = call float @llvm.vector.reduce.fadd.v16f32(float [[VEC_PHI]], <16 x float> [[WIDE_LOAD]])
+; CHECK-NEXT:    [[TMP4:%.*]] = call float @llvm.vector.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> [[WIDE_LOAD]])
 ; CHECK-NEXT:    [[TMP5:%.*]] = call float @llvm.vector.reduce.fadd.v16f32(float [[TMP4]], <16 x float> [[WIDE_LOAD1]])
 ; CHECK-NEXT:    [[TMP6:%.*]] = call float @llvm.vector.reduce.fadd.v16f32(float [[TMP5]], <16 x float> [[WIDE_LOAD2]])
-; CHECK-NEXT:    [[TMP7]] = call float @llvm.vector.reduce.fadd.v16f32(float [[TMP6]], <16 x float> [[WIDE_LOAD3]])
-; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 64
-; CHECK-NEXT:    br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
+; CHECK-NEXT:    [[TMP7:%.*]] = call float @llvm.vector.reduce.fadd.v16f32(float [[TMP6]], <16 x float> [[WIDE_LOAD3]])
+; CHECK-NEXT:    br label %[[MIDDLE_BLOCK:.*]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
 ; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[BOUND]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]]
@@ -302,13 +298,10 @@ define float @ordered_reduction_epilogue_dead_main_loop(ptr %p, i64 %n) "prefer-
 ; CHECK-NEXT:    [[N_VEC5:%.*]] = sub i64 [[BOUND]], [[N_MOD_VF4]]
 ; CHECK-NEXT:    br label %[[VEC_EPILOG_VECTOR_BODY:.*]]
 ; CHECK:       [[VEC_EPILOG_VECTOR_BODY]]:
-; CHECK-NEXT:    [[INDEX6:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT9:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_PHI7:%.*]] = phi float [ [[BC_MERGE_RDX]], %[[VEC_EPILOG_PH]] ], [ [[TMP9:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr float, ptr [[P]], i64 [[INDEX6]]
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr float, ptr [[P]], i64 [[VEC_EPILOG_RESUME_VAL]]
 ; CHECK-NEXT:    [[WIDE_LOAD8:%.*]] = load <16 x float>, ptr [[TMP8]], align 4
-; CHECK-NEXT:    [[TMP9]] = call float @llvm.vector.reduce.fadd.v16f32(float [[VEC_PHI7]], <16 x float> [[WIDE_LOAD8]])
-; CHECK-NEXT:    [[INDEX_NEXT9]] = add nuw i64 [[INDEX6]], 16
-; CHECK-NEXT:    br i1 true, label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
+; CHECK-NEXT:    [[TMP9:%.*]] = call float @llvm.vector.reduce.fadd.v16f32(float [[BC_MERGE_RDX]], <16 x float> [[WIDE_LOAD8]])
+; CHECK-NEXT:    br label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]]
 ; CHECK:       [[VEC_EPILOG_MIDDLE_BLOCK]]:
 ; CHECK-NEXT:    [[CMP_N10:%.*]] = icmp eq i64 [[BOUND]], [[N_VEC5]]
 ; CHECK-NEXT:    br i1 [[CMP_N10]], label %[[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]]
@@ -364,22 +357,18 @@ define float @ordered_reduction_nonzero_start_dead_main_vector_loop(ptr %p, i64
 ; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[BOUND]], [[N_MOD_VF]]
 ; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; CHECK:       [[VECTOR_BODY]]:
-; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi float [ 4.200000e+01, %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr float, ptr [[P]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr float, ptr [[TMP0]], i64 16
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr float, ptr [[TMP0]], i64 32
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr float, ptr [[TMP0]], i64 48
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <16 x float>, ptr [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr float, ptr [[P]], i64 16
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr float, ptr [[P]], i64 32
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr float, ptr [[P]], i64 48
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <16 x float>, ptr [[P]], align 4
 ; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <16 x float>, ptr [[TMP1]], align 4
 ; CHECK-NEXT:    [[WIDE_LOAD2:%.*]] = load <16 x float>, ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[WIDE_LOAD3:%.*]] = load <16 x float>, ptr [[TMP3]], align 4
-; CHECK-NEXT:    [[TMP4:%.*]] = call float @llvm.vector.reduce.fadd.v16f32(float [[VEC_PHI]], <16 x float> [[WIDE_LOAD]])
+; CHECK-NEXT:    [[TMP4:%.*]] = call float @llvm.vector.reduce.fadd.v16f32(float 4.200000e+01, <16 x float> [[WIDE_LOAD]])
 ; CHECK-NEXT:    [[TMP5:%.*]] = call float @llvm.vector.reduce.fadd.v16f32(float [[TMP4]], <16 x float> [[WIDE_LOAD1]])
 ; CHECK-NEXT:    [[TMP6:%.*]] = call float @llvm.vector.reduce.fadd.v16f32(float [[TMP5]], <16 x float> [[WIDE_LOAD2]])
-; CHECK-NEXT:    [[TMP7]] = call float @llvm.vector.reduce.fadd.v16f32(float [[TMP6]], <16 x float> [[WIDE_LOAD3]])
-; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 64
-; CHECK-NEXT:    br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
+; CHECK-NEXT:    [[TMP7:%.*]] = call float @llvm.vector.reduce.fadd.v16f32(float [[TMP6]], <16 x float> [[WIDE_LOAD3]])
+; CHECK-NEXT:    br label %[[MIDDLE_BLOCK:.*]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
 ; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[BOUND]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]]
@@ -393,13 +382,10 @@ define float @ordered_reduction_nonzero_start_dead_main_vector_loop(ptr %p, i64
 ; CHECK-NEXT:    [[N_VEC5:%.*]] = sub i64 [[BOUND]], [[N_MOD_VF4]]
 ; CHECK-NEXT:    br label %[[VEC_EPILOG_VECTOR_BODY:.*]]
 ; CHECK:       [[VEC_EPILOG_VECTOR_BODY]]:
-; CHECK-NEXT:    [[INDEX6:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT9:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_PHI7:%.*]] = phi float [ [[BC_MERGE_RDX]], %[[VEC_EPILOG_PH]] ], [ [[TMP9:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr float, ptr [[P]], i64 [[INDEX6]]
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr float, ptr [[P]], i64 [[VEC_EPILOG_RESUME_VAL]]
 ; CHECK-NEXT:    [[WIDE_LOAD8:%.*]] = load <16 x float>, ptr [[TMP8]], align 4
-; CHECK-NEXT:    [[TMP9]] = call float @llvm.vector.reduce.fadd.v16f32(float [[VEC_PHI7]], <16 x float> [[WIDE_LOAD8]])
-; CHECK-NEXT:    [[INDEX_NEXT9]] = add nuw i64 [[INDEX6]], 16
-; CHECK-NEXT:    br i1 true, label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
+; CHECK-NEXT:    [[TMP9:%.*]] = call float @llvm.vector.reduce.fadd.v16f32(float [[BC_MERGE_RDX]], <16 x float> [[WIDE_LOAD8]])
+; CHECK-NEXT:    br label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]]
 ; CHECK:       [[VEC_EPILOG_MIDDLE_BLOCK]]:
 ; CHECK-NEXT:    [[CMP_N10:%.*]] = icmp eq i64 [[BOUND]], [[N_VEC5]]
 ; CHECK-NEXT:    br i1 [[CMP_N10]], label %[[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]]
@@ -454,35 +440,29 @@ define { float, float } @two_ordered_reductions(ptr %p, ptr %q, i64 %n) "prefer-
 ; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[BOUND]], [[N_MOD_VF]]
 ; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; CHECK:       [[VECTOR_BODY]]:
-; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP6:%.*]] = phi float [ 0.000000e+00, %[[VECTOR_PH]] ], [ [[TMP18:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP11:%.*]] = phi float [ 1.000000e+00, %[[VECTOR_PH]] ], [ [[TMP19:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr float, ptr [[P]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr float, ptr [[TMP0]], i64 16
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr float, ptr [[TMP0]], i64 32
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr float, ptr [[TMP0]], i64 48
-; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <16 x float>, ptr [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr float, ptr [[P]], i64 16
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr float, ptr [[P]], i64 32
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr float, ptr [[P]], i64 48
+; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <16 x float>, ptr [[P]], align 4
 ; CHECK-NEXT:    [[WIDE_LOAD2:%.*]] = load <16 x float>, ptr [[TMP1]], align 4
 ; CHECK-NEXT:    [[WIDE_LOAD3:%.*]] = load <16 x float>, ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[WIDE_LOAD5:%.*]] = load <16 x float>, ptr [[TMP3]], align 4
-; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr float, ptr [[Q]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr float, ptr [[TMP4]], i64 16
-; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr float, ptr [[TMP4]], i64 32
-; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr float, ptr [[TMP4]], i64 48
-; CHECK-NEXT:    [[WIDE_LOAD6:%.*]] = load <16 x float>, ptr [[TMP4]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr float, ptr [[Q]], i64 16
+; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr float, ptr [[Q]], i64 32
+; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr float, ptr [[Q]], i64 48
+; CHECK-NEXT:    [[WIDE_LOAD6:%.*]] = load <16 x float>, ptr [[Q]], align 4
 ; CHECK-NEXT:    [[WIDE_LOAD7:%.*]] = load <16 x float>, ptr [[TMP5]], align 4
 ; CHECK-NEXT:    [[WIDE_LOAD9:%.*]] = load <16 x float>, ptr [[TMP10]], align 4
 ; CHECK-NEXT:    [[WIDE_LOAD8:%.*]] = load <16 x float>, ptr [[TMP20]], align 4
-; CHECK-NEXT:    [[TMP7:%.*]] = call float @llvm.vector.reduce.fadd.v16f32(float [[TMP6]], <16 x float> [[WIDE_LOAD1]])
+; CHECK-NEXT:    [[TMP7:%.*]] = call float @llvm.vector.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> [[WIDE_LOAD1]])
 ; CHECK-NEXT:    [[TMP8:%.*]] = call float @llvm.vector.reduce.fadd.v16f32(float [[TMP7]], <16 x float> [[WIDE_LOAD2]])
 ; CHECK-NEXT:    [[TMP9:%.*]] = call float @llvm.vector.reduce.fadd.v16f32(float [[TMP8]], <16 x float> [[WIDE_LOAD3]])
-; CHECK-NEXT:    [[TMP18]] = call float @llvm.vector.reduce.fadd.v16f32(float [[TMP9]], <16 x float> [[WIDE_LOAD5]])
-; CHECK-NEXT:    [[TMP12:%.*]] = call float @llvm.vector.reduce.fadd.v16f32(float [[TMP11]], <16 x float> [[WIDE_LOAD6]])
+; CHECK-NEXT:    [[TMP18:%.*]] = call float @llvm.vector.reduce.fadd.v16f32(float [[TMP9]], <16 x float> [[WIDE_LOAD5]])
+; CHECK-NEXT:    [[TMP12:%.*]] = call float @llvm.vector.reduce.fadd.v16f32(float 1.000000e+00, <16 x float> [[WIDE_LOAD6]])
 ; CHECK-NEXT:    [[TMP13:%.*]] = call float @llvm.vector.reduce.fadd.v16f32(float [[TMP12]], <16 x float> [[WIDE_LOAD7]])
 ; CHECK-NEXT:    [[TMP21:%.*]] = call float @llvm.vector.reduce.fadd.v16f32(float [[TMP13]], <16 x float> [[WIDE_LOAD9]])
-; CHECK-NEXT:    [[TMP19]] = call float @llvm.vector.reduce.fadd.v16f32(float [[TMP21]], <16 x float> [[WIDE_LOAD8]])
-; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 64
-; CHECK-NEXT:    br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]]
+; CHECK-NEXT:    [[TMP19:%.*]] = call float @llvm.vector.reduce.fadd.v16f32(float [[TMP21]], <16 x float> [[WIDE_LOAD8]])
+; CHECK-NEXT:    br label %[[MIDDLE_BLOCK:.*]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
 ; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[BOUND]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]]
@@ -497,17 +477,13 @@ define { float, float } @two_ordered_reductions(ptr %p, ptr %q, i64 %n) "prefer-
 ; CHECK-NEXT:    [[N_VEC10:%.*]] = sub i64 [[BOUND]], [[N_MOD_VF9]]
 ; CHECK-NEXT:    br label %[[VEC_EPILOG_VECTOR_BODY:.*]]
 ; CHECK:       [[VEC_EPILOG_VECTOR_BODY]]:
-; CHECK-NEXT:    [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL1]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT17:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
-; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi float [ [[BC_MERGE_RDX1]], %[[VEC_EPILOG_PH]] ], [ [[TMP16:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
-; CHECK-NEXT:    [[BC_MERGE_RDX8:%.*]] = phi float [ [[BC_MERGE_RDX9]], %[[VEC_EPILOG_PH]] ], [ [[TMP17:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr float, ptr [[P]], i64 [[VEC_EPILOG_RESUME_VAL]]
+; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr float, ptr [[P]], i64 [[VEC_EPILOG_RESUME_VAL1]]
 ; CHECK-NEXT:    [[WIDE_LOAD11:%.*]] = load <16 x float>, ptr [[TMP14]], align 4
-; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr float, ptr [[Q]], i64 [[VEC_EPILOG_RESUME_VAL]]
+; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr float, ptr [[Q]], i64 [[VEC_EPILOG_RESUME_VAL1]]
 ; CHECK-NEXT:    [[WIDE_LOAD12:%.*]] = load <16 x float>, ptr [[TMP15]], align 4
-; CHECK-NEXT:    [[TMP16]] = call float @llvm.vector.reduce.fadd.v16f32(float [[BC_MERGE_RDX]], <16 x float> [[WIDE_LOAD11]])
-; CHECK-NEXT:    [[TMP17]] = call float @llvm.vector.reduce.fadd.v16f32(float [[BC_MERGE_RDX8]], <16 x float> [[WIDE_LOAD12]])
-; CHECK-NEXT:    [[INDEX_NEXT17]] = add nuw i64 [[VEC_EPILOG_RESUME_VAL]], 16
-; CHECK-NEXT:    br i1 true, label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
+; CHECK-NEXT:    [[TMP16:%.*]] = call float @llvm.vector.reduce.fadd.v16f32(float [[BC_MERGE_RDX1]], <16 x float> [[WIDE_LOAD11]])
+; CHECK-NEXT:    [[TMP17:%.*]] = call float @llvm.vector.reduce.fadd.v16f32(float [[BC_MERGE_RDX9]], <16 x float> [[WIDE_LOAD12]])
+; CHECK-NEXT:    br label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]]
 ; CHECK:       [[VEC_EPILOG_MIDDLE_BLOCK]]:
 ; CHECK-NEXT:    [[CMP_N13:%.*]] = icmp eq i64 [[BOUND]], [[N_VEC10]]
 ; CHECK-NEXT:    br i1 [[CMP_N13]], label %[[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]]

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll b/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll
index 8f347ebf87016..40075357a38c9 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll
@@ -2455,44 +2455,22 @@ define i32 @test_non_unit_stride_five(i64 %len, ptr %test_base) {
 ; CHECK:       vector.ph:
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
-; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP112:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP113:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP114:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP115:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 5
-; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 5
-; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 10
-; CHECK-NEXT:    [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 15
-; CHECK-NEXT:    [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 20
-; CHECK-NEXT:    [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 25
-; CHECK-NEXT:    [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 30
-; CHECK-NEXT:    [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 35
-; CHECK-NEXT:    [[TMP8:%.*]] = add i64 [[OFFSET_IDX]], 40
-; CHECK-NEXT:    [[TMP9:%.*]] = add i64 [[OFFSET_IDX]], 45
-; CHECK-NEXT:    [[TMP10:%.*]] = add i64 [[OFFSET_IDX]], 50
-; CHECK-NEXT:    [[TMP11:%.*]] = add i64 [[OFFSET_IDX]], 55
-; CHECK-NEXT:    [[TMP12:%.*]] = add i64 [[OFFSET_IDX]], 60
-; CHECK-NEXT:    [[TMP13:%.*]] = add i64 [[OFFSET_IDX]], 65
-; CHECK-NEXT:    [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], 70
-; CHECK-NEXT:    [[TMP15:%.*]] = add i64 [[OFFSET_IDX]], 75
-; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[OFFSET_IDX]]
-; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP1]]
-; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP2]]
-; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP3]]
-; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP4]]
-; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP5]]
-; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP6]]
-; CHECK-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP7]]
-; CHECK-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP8]]
-; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP9]]
-; CHECK-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP10]]
-; CHECK-NEXT:    [[TMP27:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP11]]
-; CHECK-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP12]]
-; CHECK-NEXT:    [[TMP29:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP13]]
-; CHECK-NEXT:    [[TMP30:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP14]]
-; CHECK-NEXT:    [[TMP31:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP15]]
-; CHECK-NEXT:    [[TMP32:%.*]] = load i1, ptr [[TMP16]], align 1
+; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 5
+; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 10
+; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 15
+; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 20
+; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 25
+; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 30
+; CHECK-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 35
+; CHECK-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 40
+; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 45
+; CHECK-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 50
+; CHECK-NEXT:    [[TMP27:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 55
+; CHECK-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 60
+; CHECK-NEXT:    [[TMP29:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 65
+; CHECK-NEXT:    [[TMP30:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 70
+; CHECK-NEXT:    [[TMP31:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 75
+; CHECK-NEXT:    [[TMP32:%.*]] = load i1, ptr [[TEST_BASE]], align 1
 ; CHECK-NEXT:    [[TMP33:%.*]] = load i1, ptr [[TMP17]], align 1
 ; CHECK-NEXT:    [[TMP34:%.*]] = load i1, ptr [[TMP18]], align 1
 ; CHECK-NEXT:    [[TMP35:%.*]] = load i1, ptr [[TMP19]], align 1
@@ -2524,23 +2502,22 @@ define i32 @test_non_unit_stride_five(i64 %len, ptr %test_base) {
 ; CHECK-NEXT:    [[TMP61:%.*]] = insertelement <4 x i1> [[TMP60]], i1 [[TMP57]], i32 1
 ; CHECK-NEXT:    [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2
 ; CHECK-NEXT:    [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3
-; CHECK-NEXT:    [[TMP64:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[OFFSET_IDX]]
-; CHECK-NEXT:    [[TMP65:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP1]]
-; CHECK-NEXT:    [[TMP66:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP2]]
-; CHECK-NEXT:    [[TMP67:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP3]]
-; CHECK-NEXT:    [[TMP68:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP4]]
-; CHECK-NEXT:    [[TMP69:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP5]]
-; CHECK-NEXT:    [[TMP70:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP6]]
-; CHECK-NEXT:    [[TMP71:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP7]]
-; CHECK-NEXT:    [[TMP72:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP8]]
-; CHECK-NEXT:    [[TMP73:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP9]]
-; CHECK-NEXT:    [[TMP74:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP10]]
-; CHECK-NEXT:    [[TMP75:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP11]]
-; CHECK-NEXT:    [[TMP76:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP12]]
-; CHECK-NEXT:    [[TMP77:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP13]]
-; CHECK-NEXT:    [[TMP78:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP14]]
-; CHECK-NEXT:    [[TMP79:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP15]]
-; CHECK-NEXT:    [[TMP80:%.*]] = load i32, ptr [[TMP64]], align 4
+; CHECK-NEXT:    [[TMP65:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 5
+; CHECK-NEXT:    [[TMP66:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 10
+; CHECK-NEXT:    [[TMP67:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 15
+; CHECK-NEXT:    [[TMP68:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 20
+; CHECK-NEXT:    [[TMP69:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 25
+; CHECK-NEXT:    [[TMP70:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 30
+; CHECK-NEXT:    [[TMP71:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 35
+; CHECK-NEXT:    [[TMP72:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 40
+; CHECK-NEXT:    [[TMP73:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 45
+; CHECK-NEXT:    [[TMP74:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 50
+; CHECK-NEXT:    [[TMP75:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 55
+; CHECK-NEXT:    [[TMP76:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 60
+; CHECK-NEXT:    [[TMP77:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 65
+; CHECK-NEXT:    [[TMP78:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 70
+; CHECK-NEXT:    [[TMP79:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 75
+; CHECK-NEXT:    [[TMP80:%.*]] = load i32, ptr [[ALLOCA]], align 4
 ; CHECK-NEXT:    [[TMP81:%.*]] = load i32, ptr [[TMP65]], align 4
 ; CHECK-NEXT:    [[TMP82:%.*]] = load i32, ptr [[TMP66]], align 4
 ; CHECK-NEXT:    [[TMP83:%.*]] = load i32, ptr [[TMP67]], align 4
@@ -2576,16 +2553,12 @@ define i32 @test_non_unit_stride_five(i64 %len, ptr %test_base) {
 ; CHECK-NEXT:    [[PREDPHI4:%.*]] = select <4 x i1> [[TMP47]], <4 x i32> [[TMP95]], <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[PREDPHI5:%.*]] = select <4 x i1> [[TMP55]], <4 x i32> [[TMP103]], <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[PREDPHI6:%.*]] = select <4 x i1> [[TMP63]], <4 x i32> [[TMP111]], <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP112]] = add <4 x i32> [[VEC_PHI]], [[PREDPHI]]
-; CHECK-NEXT:    [[TMP113]] = add <4 x i32> [[VEC_PHI1]], [[PREDPHI4]]
-; CHECK-NEXT:    [[TMP114]] = add <4 x i32> [[VEC_PHI2]], [[PREDPHI5]]
-; CHECK-NEXT:    [[TMP115]] = add <4 x i32> [[VEC_PHI3]], [[PREDPHI6]]
-; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
-; CHECK-NEXT:    br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
+; CHECK-NEXT:    [[TMP112:%.*]] = add <4 x i32> zeroinitializer, [[PREDPHI]]
+; CHECK-NEXT:    br label [[MIDDLE_BLOCK:%.*]]
 ; CHECK:       middle.block:
-; CHECK-NEXT:    [[BIN_RDX:%.*]] = add <4 x i32> [[TMP113]], [[TMP112]]
-; CHECK-NEXT:    [[BIN_RDX7:%.*]] = add <4 x i32> [[TMP114]], [[BIN_RDX]]
-; CHECK-NEXT:    [[BIN_RDX8:%.*]] = add <4 x i32> [[TMP115]], [[BIN_RDX7]]
+; CHECK-NEXT:    [[BIN_RDX:%.*]] = add <4 x i32> [[PREDPHI4]], [[TMP112]]
+; CHECK-NEXT:    [[BIN_RDX7:%.*]] = add <4 x i32> [[PREDPHI5]], [[BIN_RDX]]
+; CHECK-NEXT:    [[BIN_RDX8:%.*]] = add <4 x i32> [[PREDPHI6]], [[BIN_RDX7]]
 ; CHECK-NEXT:    [[TMP117:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX8]])
 ; CHECK-NEXT:    br label [[SCALAR_PH:%.*]]
 ; CHECK:       scalar.ph:

diff  --git a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll
index 79e01790d2d94..213e2f47805a2 100644
--- a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll
+++ b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll
@@ -776,10 +776,9 @@ define i16 @test_no_op_or_reduction_single_vector_iteration(i64 %N) {
 ; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[CLAMPED]], [[N_MOD_VF]]
 ; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; CHECK:       [[VECTOR_BODY]]:
-; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i16> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[VEC_PHI]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT:    br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]]
+; CHECK-NEXT:    br label %[[MIDDLE_BLOCK:.*]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
-; CHECK-NEXT:    [[TMP0:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> [[VEC_PHI]])
+; CHECK-NEXT:    [[TMP0:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> zeroinitializer)
 ; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[CLAMPED]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]]
 ; CHECK:       [[VEC_EPILOG_ITER_CHECK]]:
@@ -792,10 +791,9 @@ define i16 @test_no_op_or_reduction_single_vector_iteration(i64 %N) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x i16> zeroinitializer, i16 [[BC_MERGE_RDX]], i32 0
 ; CHECK-NEXT:    br label %[[VEC_EPILOG_VECTOR_BODY:.*]]
 ; CHECK:       [[VEC_EPILOG_VECTOR_BODY]]:
-; CHECK-NEXT:    [[VEC_PHI4:%.*]] = phi <4 x i16> [ [[TMP1]], %[[VEC_EPILOG_PH]] ], [ [[VEC_PHI4]], %[[VEC_EPILOG_VECTOR_BODY]] ]
-; CHECK-NEXT:    br i1 true, label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]]
+; CHECK-NEXT:    br label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]]
 ; CHECK:       [[VEC_EPILOG_MIDDLE_BLOCK]]:
-; CHECK-NEXT:    [[TMP2:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> [[VEC_PHI4]])
+; CHECK-NEXT:    [[TMP2:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> [[TMP1]])
 ; CHECK-NEXT:    [[CMP_N5:%.*]] = icmp eq i64 [[CLAMPED]], [[N_VEC3]]
 ; CHECK-NEXT:    br i1 [[CMP_N5]], label %[[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]]
 ; CHECK:       [[VEC_EPILOG_SCALAR_PH]]:
@@ -848,13 +846,9 @@ define i16 @test_or_reduction_with_induction_single_vector_iteration(i64 %N) {
 ; CHECK-NEXT:    [[TMP0:%.*]] = trunc i64 [[N_VEC]] to i32
 ; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; CHECK:       [[VECTOR_BODY]]:
-; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i16> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP1:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 0, i16 1, i16 2, i16 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP1]] = or <4 x i16> [[VEC_PHI]], [[VEC_IND]]
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 4)
-; CHECK-NEXT:    br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]]
+; CHECK-NEXT:    br label %[[MIDDLE_BLOCK:.*]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
-; CHECK-NEXT:    [[TMP7:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> [[TMP1]])
+; CHECK-NEXT:    [[TMP7:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> <i16 0, i16 1, i16 2, i16 3>)
 ; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[CLAMPED]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]]
 ; CHECK:       [[VEC_EPILOG_ITER_CHECK]]:
@@ -873,11 +867,8 @@ define i16 @test_or_reduction_with_induction_single_vector_iteration(i64 %N) {
 ; CHECK-NEXT:    [[INDUCTION:%.*]] = add <4 x i16> [[BROADCAST_SPLAT]], <i16 0, i16 1, i16 2, i16 3>
 ; CHECK-NEXT:    br label %[[VEC_EPILOG_VECTOR_BODY:.*]]
 ; CHECK:       [[VEC_EPILOG_VECTOR_BODY]]:
-; CHECK-NEXT:    [[VEC_PHI4:%.*]] = phi <4 x i16> [ [[TMP3]], %[[VEC_EPILOG_PH]] ], [ [[TMP5:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_IND5:%.*]] = phi <4 x i16> [ [[INDUCTION]], %[[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT6:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP5]] = or <4 x i16> [[VEC_PHI4]], [[VEC_IND5]]
-; CHECK-NEXT:    [[VEC_IND_NEXT6]] = add <4 x i16> [[VEC_IND5]], splat (i16 4)
-; CHECK-NEXT:    br i1 true, label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP31:![0-9]+]]
+; CHECK-NEXT:    [[TMP5:%.*]] = or <4 x i16> [[TMP3]], [[INDUCTION]]
+; CHECK-NEXT:    br label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]]
 ; CHECK:       [[VEC_EPILOG_MIDDLE_BLOCK]]:
 ; CHECK-NEXT:    [[TMP6:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> [[TMP5]])
 ; CHECK-NEXT:    [[CMP_N4:%.*]] = icmp eq i64 [[CLAMPED]], [[N_VEC3]]
@@ -947,11 +938,9 @@ define i32 @anyof_reduction_in_dissolved_epilogue(i32 %val, i1 %c) {
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; CHECK:       [[VECTOR_BODY]]:
-; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP11:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP11]] = or <4 x i1> [[VEC_PHI]], [[BROADCAST_SPLAT]]
-; CHECK-NEXT:    br i1 true, label %[[MIDDLE_BLOCK]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP33:![0-9]+]]
+; CHECK-NEXT:    br label %[[MIDDLE_BLOCK]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
-; CHECK-NEXT:    [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP11]])
+; CHECK-NEXT:    [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[BROADCAST_SPLAT]])
 ; CHECK-NEXT:    [[TMP4:%.*]] = freeze i1 [[TMP3]]
 ; CHECK-NEXT:    [[RDX_SELECT]] = select i1 [[TMP4]], i32 [[OUTER_IV]], i32 0
 ; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]]
@@ -972,9 +961,8 @@ define i32 @anyof_reduction_in_dissolved_epilogue(i32 %val, i1 %c) {
 ; CHECK-NEXT:    [[BROADCAST_SPLAT4:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT3]], <4 x i1> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    br label %[[VEC_EPILOG_VECTOR_BODY:.*]]
 ; CHECK:       [[VEC_EPILOG_VECTOR_BODY]]:
-; CHECK-NEXT:    [[VEC_PHI5:%.*]] = phi <4 x i1> [ [[BROADCAST_SPLAT4]], %[[VEC_EPILOG_PH]] ], [ [[TMP8:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP8]] = or <4 x i1> [[VEC_PHI5]], [[BROADCAST_SPLAT6]]
-; CHECK-NEXT:    br i1 true, label %[[VEC_EPILOG_MIDDLE_BLOCK]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]]
+; CHECK-NEXT:    [[TMP8:%.*]] = or <4 x i1> [[BROADCAST_SPLAT4]], [[BROADCAST_SPLAT6]]
+; CHECK-NEXT:    br label %[[VEC_EPILOG_MIDDLE_BLOCK]]
 ; CHECK:       [[VEC_EPILOG_MIDDLE_BLOCK]]:
 ; CHECK-NEXT:    [[TMP9:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP8]])
 ; CHECK-NEXT:    [[TMP10:%.*]] = freeze i1 [[TMP9]]
@@ -1075,14 +1063,13 @@ define i64 @reduction_with_ptr_iv_inttoptr_exit_cond(ptr %base, ptr %src) {
 ; CHECK-NEXT:    [[TMP18:%.*]] = insertelement <4 x i64> zeroinitializer, i64 [[BC_MERGE_RDX]], i32 0
 ; CHECK-NEXT:    br label %[[VEC_EPILOG_VECTOR_BODY:.*]]
 ; CHECK:       [[VEC_EPILOG_VECTOR_BODY]]:
-; CHECK-NEXT:    [[VEC_PHI3:%.*]] = phi <4 x i64> [ [[TMP18]], %[[VEC_EPILOG_PH]] ], [ [[TMP22:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP19:%.*]] = add <4 x i64> [[VEC_PHI3]], splat (i64 1)
+; CHECK-NEXT:    [[TMP19:%.*]] = add <4 x i64> [[TMP18]], splat (i64 1)
 ; CHECK-NEXT:    [[TMP20:%.*]] = load i8, ptr [[SRC]], align 1
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <4 x i8> poison, i8 [[TMP20]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT5:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT4]], <4 x i8> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP21:%.*]] = zext <4 x i8> [[BROADCAST_SPLAT5]] to <4 x i64>
-; CHECK-NEXT:    [[TMP22]] = add <4 x i64> [[TMP19]], [[TMP21]]
-; CHECK-NEXT:    br i1 true, label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP37:![0-9]+]]
+; CHECK-NEXT:    [[TMP22:%.*]] = add <4 x i64> [[TMP19]], [[TMP21]]
+; CHECK-NEXT:    br label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]]
 ; CHECK:       [[VEC_EPILOG_MIDDLE_BLOCK]]:
 ; CHECK-NEXT:    [[TMP23:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP22]])
 ; CHECK-NEXT:    [[CMP_N6:%.*]] = icmp eq i64 [[TMP4]], [[N_VEC2]]

diff  --git a/llvm/test/Transforms/LoopVectorize/find-last.ll b/llvm/test/Transforms/LoopVectorize/find-last.ll
index 44adff384373d..d777f500945a7 100644
--- a/llvm/test/Transforms/LoopVectorize/find-last.ll
+++ b/llvm/test/Transforms/LoopVectorize/find-last.ll
@@ -280,13 +280,10 @@ define ptr @loop_inv_select_condition_issue_185682(i32 %a, i32 %b) {
 ; CHECK-NEXT:    [[TMP2:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP1]])
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
-; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x ptr> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP3:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP4]] = select i1 [[TMP2]], <4 x i1> [[TMP0]], <4 x i1> [[TMP3]]
-; CHECK-NEXT:    [[TMP5]] = select i1 [[TMP2]], <4 x ptr> zeroinitializer, <4 x ptr> [[VEC_PHI]]
-; CHECK-NEXT:    br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
+; CHECK-NEXT:    [[TMP3:%.*]] = select i1 [[TMP2]], <4 x i1> [[TMP0]], <4 x i1> zeroinitializer
+; CHECK-NEXT:    br label [[MIDDLE_BLOCK:%.*]]
 ; CHECK:       middle.block:
-; CHECK-NEXT:    [[TMP6:%.*]] = call ptr @llvm.experimental.vector.extract.last.active.v4p0(<4 x ptr> [[TMP5]], <4 x i1> [[TMP4]], ptr null)
+; CHECK-NEXT:    [[TMP6:%.*]] = call ptr @llvm.experimental.vector.extract.last.active.v4p0(<4 x ptr> zeroinitializer, <4 x i1> [[TMP3]], ptr null)
 ; CHECK-NEXT:    br label [[EXIT:%.*]]
 ; CHECK:       exit:
 ; CHECK-NEXT:    ret ptr [[TMP6]]

diff  --git a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll
index dd3ad4d01b465..49ca79bfa2b04 100644
--- a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll
+++ b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll
@@ -194,182 +194,129 @@ define i16 @select_decreasing_induction_icmp_table_i16(i16 noundef %val) {
 ; IC4VF4-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT]], <4 x i16> poison, <4 x i32> zeroinitializer
 ; IC4VF4-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; IC4VF4:       [[VECTOR_BODY]]:
-; IC4VF4-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE44:.*]] ]
-; IC4VF4-NEXT:    [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 12, i16 11, i16 10, i16 9>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_LOAD_CONTINUE44]] ]
-; IC4VF4-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i16> [ splat (i16 32767), %[[VECTOR_PH]] ], [ [[TMP108:%.*]], %[[PRED_LOAD_CONTINUE44]] ]
-; IC4VF4-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i16> [ splat (i16 32767), %[[VECTOR_PH]] ], [ [[TMP109:%.*]], %[[PRED_LOAD_CONTINUE44]] ]
-; IC4VF4-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i16> [ splat (i16 32767), %[[VECTOR_PH]] ], [ [[TMP110:%.*]], %[[PRED_LOAD_CONTINUE44]] ]
-; IC4VF4-NEXT:    [[VEC_PHI3:%.*]] = phi <4 x i16> [ splat (i16 32767), %[[VECTOR_PH]] ], [ [[TMP111:%.*]], %[[PRED_LOAD_CONTINUE44]] ]
-; IC4VF4-NEXT:    [[STEP_ADD:%.*]] = add <4 x i16> [[VEC_IND]], splat (i16 -4)
-; IC4VF4-NEXT:    [[STEP_ADD_2:%.*]] = add <4 x i16> [[STEP_ADD]], splat (i16 -4)
-; IC4VF4-NEXT:    [[STEP_ADD_3:%.*]] = add <4 x i16> [[STEP_ADD_2]], splat (i16 -4)
-; IC4VF4-NEXT:    [[DOTCAST:%.*]] = trunc i32 [[INDEX]] to i16
-; IC4VF4-NEXT:    [[OFFSET_IDX:%.*]] = sub i16 12, [[DOTCAST]]
-; IC4VF4-NEXT:    [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i64 0
-; IC4VF4-NEXT:    [[BROADCAST_SPLAT5:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT4]], <4 x i32> poison, <4 x i32> zeroinitializer
-; IC4VF4-NEXT:    [[VEC_IV:%.*]] = add <4 x i32> [[BROADCAST_SPLAT5]], <i32 0, i32 1, i32 2, i32 3>
-; IC4VF4-NEXT:    [[VEC_IV8:%.*]] = add <4 x i32> [[BROADCAST_SPLAT5]], <i32 4, i32 5, i32 6, i32 7>
-; IC4VF4-NEXT:    [[VEC_IV11:%.*]] = add <4 x i32> [[BROADCAST_SPLAT5]], <i32 8, i32 9, i32 10, i32 11>
-; IC4VF4-NEXT:    [[VEC_IV14:%.*]] = add <4 x i32> [[BROADCAST_SPLAT5]], <i32 12, i32 13, i32 14, i32 15>
-; IC4VF4-NEXT:    [[TMP0:%.*]] = icmp ule <4 x i32> [[VEC_IV]], splat (i32 11)
-; IC4VF4-NEXT:    [[TMP1:%.*]] = icmp ule <4 x i32> [[VEC_IV8]], splat (i32 11)
-; IC4VF4-NEXT:    [[TMP2:%.*]] = icmp ule <4 x i32> [[VEC_IV11]], splat (i32 11)
-; IC4VF4-NEXT:    [[TMP3:%.*]] = icmp ule <4 x i32> [[VEC_IV14]], splat (i32 11)
-; IC4VF4-NEXT:    [[TMP4:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0
-; IC4VF4-NEXT:    br i1 [[TMP4]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
+; IC4VF4-NEXT:    br i1 true, label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
 ; IC4VF4:       [[PRED_LOAD_IF]]:
-; IC4VF4-NEXT:    [[TMP5:%.*]] = add i16 [[OFFSET_IDX]], 0
-; IC4VF4-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP5]]
+; IC4VF4-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 12
 ; IC4VF4-NEXT:    [[TMP7:%.*]] = load i16, ptr [[TMP6]], align 1
 ; IC4VF4-NEXT:    [[TMP8:%.*]] = insertelement <4 x i16> poison, i16 [[TMP7]], i32 0
 ; IC4VF4-NEXT:    br label %[[PRED_LOAD_CONTINUE]]
 ; IC4VF4:       [[PRED_LOAD_CONTINUE]]:
 ; IC4VF4-NEXT:    [[TMP9:%.*]] = phi <4 x i16> [ poison, %[[VECTOR_BODY]] ], [ [[TMP8]], %[[PRED_LOAD_IF]] ]
-; IC4VF4-NEXT:    [[TMP10:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1
-; IC4VF4-NEXT:    br i1 [[TMP10]], label %[[PRED_LOAD_IF15:.*]], label %[[PRED_LOAD_CONTINUE16:.*]]
+; IC4VF4-NEXT:    br i1 true, label %[[PRED_LOAD_IF15:.*]], label %[[PRED_LOAD_CONTINUE16:.*]]
 ; IC4VF4:       [[PRED_LOAD_IF15]]:
-; IC4VF4-NEXT:    [[TMP11:%.*]] = add i16 [[OFFSET_IDX]], -1
-; IC4VF4-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP11]]
+; IC4VF4-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 11
 ; IC4VF4-NEXT:    [[TMP13:%.*]] = load i16, ptr [[TMP12]], align 1
 ; IC4VF4-NEXT:    [[TMP14:%.*]] = insertelement <4 x i16> [[TMP9]], i16 [[TMP13]], i32 1
 ; IC4VF4-NEXT:    br label %[[PRED_LOAD_CONTINUE16]]
 ; IC4VF4:       [[PRED_LOAD_CONTINUE16]]:
 ; IC4VF4-NEXT:    [[TMP15:%.*]] = phi <4 x i16> [ [[TMP9]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], %[[PRED_LOAD_IF15]] ]
-; IC4VF4-NEXT:    [[TMP16:%.*]] = extractelement <4 x i1> [[TMP0]], i32 2
-; IC4VF4-NEXT:    br i1 [[TMP16]], label %[[PRED_LOAD_IF17:.*]], label %[[PRED_LOAD_CONTINUE18:.*]]
+; IC4VF4-NEXT:    br i1 true, label %[[PRED_LOAD_IF17:.*]], label %[[PRED_LOAD_CONTINUE18:.*]]
 ; IC4VF4:       [[PRED_LOAD_IF17]]:
-; IC4VF4-NEXT:    [[TMP17:%.*]] = add i16 [[OFFSET_IDX]], -2
-; IC4VF4-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP17]]
+; IC4VF4-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 10
 ; IC4VF4-NEXT:    [[TMP19:%.*]] = load i16, ptr [[TMP18]], align 1
 ; IC4VF4-NEXT:    [[TMP20:%.*]] = insertelement <4 x i16> [[TMP15]], i16 [[TMP19]], i32 2
 ; IC4VF4-NEXT:    br label %[[PRED_LOAD_CONTINUE18]]
 ; IC4VF4:       [[PRED_LOAD_CONTINUE18]]:
 ; IC4VF4-NEXT:    [[TMP21:%.*]] = phi <4 x i16> [ [[TMP15]], %[[PRED_LOAD_CONTINUE16]] ], [ [[TMP20]], %[[PRED_LOAD_IF17]] ]
-; IC4VF4-NEXT:    [[TMP22:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3
-; IC4VF4-NEXT:    br i1 [[TMP22]], label %[[PRED_LOAD_IF19:.*]], label %[[PRED_LOAD_CONTINUE20:.*]]
+; IC4VF4-NEXT:    br i1 true, label %[[PRED_LOAD_IF19:.*]], label %[[PRED_LOAD_CONTINUE20:.*]]
 ; IC4VF4:       [[PRED_LOAD_IF19]]:
-; IC4VF4-NEXT:    [[TMP23:%.*]] = add i16 [[OFFSET_IDX]], -3
-; IC4VF4-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP23]]
+; IC4VF4-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 9
 ; IC4VF4-NEXT:    [[TMP25:%.*]] = load i16, ptr [[TMP24]], align 1
 ; IC4VF4-NEXT:    [[TMP26:%.*]] = insertelement <4 x i16> [[TMP21]], i16 [[TMP25]], i32 3
 ; IC4VF4-NEXT:    br label %[[PRED_LOAD_CONTINUE20]]
 ; IC4VF4:       [[PRED_LOAD_CONTINUE20]]:
 ; IC4VF4-NEXT:    [[TMP27:%.*]] = phi <4 x i16> [ [[TMP21]], %[[PRED_LOAD_CONTINUE18]] ], [ [[TMP26]], %[[PRED_LOAD_IF19]] ]
-; IC4VF4-NEXT:    [[TMP28:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0
-; IC4VF4-NEXT:    br i1 [[TMP28]], label %[[PRED_LOAD_IF21:.*]], label %[[PRED_LOAD_CONTINUE22:.*]]
+; IC4VF4-NEXT:    br i1 true, label %[[PRED_LOAD_IF21:.*]], label %[[PRED_LOAD_CONTINUE22:.*]]
 ; IC4VF4:       [[PRED_LOAD_IF21]]:
-; IC4VF4-NEXT:    [[TMP29:%.*]] = add i16 [[OFFSET_IDX]], -4
-; IC4VF4-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP29]]
+; IC4VF4-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 8
 ; IC4VF4-NEXT:    [[TMP31:%.*]] = load i16, ptr [[TMP30]], align 1
 ; IC4VF4-NEXT:    [[TMP32:%.*]] = insertelement <4 x i16> poison, i16 [[TMP31]], i32 0
 ; IC4VF4-NEXT:    br label %[[PRED_LOAD_CONTINUE22]]
 ; IC4VF4:       [[PRED_LOAD_CONTINUE22]]:
 ; IC4VF4-NEXT:    [[TMP33:%.*]] = phi <4 x i16> [ poison, %[[PRED_LOAD_CONTINUE20]] ], [ [[TMP32]], %[[PRED_LOAD_IF21]] ]
-; IC4VF4-NEXT:    [[TMP34:%.*]] = extractelement <4 x i1> [[TMP1]], i32 1
-; IC4VF4-NEXT:    br i1 [[TMP34]], label %[[PRED_LOAD_IF23:.*]], label %[[PRED_LOAD_CONTINUE24:.*]]
+; IC4VF4-NEXT:    br i1 true, label %[[PRED_LOAD_IF23:.*]], label %[[PRED_LOAD_CONTINUE24:.*]]
 ; IC4VF4:       [[PRED_LOAD_IF23]]:
-; IC4VF4-NEXT:    [[TMP35:%.*]] = add i16 [[OFFSET_IDX]], -5
-; IC4VF4-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP35]]
+; IC4VF4-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 7
 ; IC4VF4-NEXT:    [[TMP37:%.*]] = load i16, ptr [[TMP36]], align 1
 ; IC4VF4-NEXT:    [[TMP38:%.*]] = insertelement <4 x i16> [[TMP33]], i16 [[TMP37]], i32 1
 ; IC4VF4-NEXT:    br label %[[PRED_LOAD_CONTINUE24]]
 ; IC4VF4:       [[PRED_LOAD_CONTINUE24]]:
 ; IC4VF4-NEXT:    [[TMP39:%.*]] = phi <4 x i16> [ [[TMP33]], %[[PRED_LOAD_CONTINUE22]] ], [ [[TMP38]], %[[PRED_LOAD_IF23]] ]
-; IC4VF4-NEXT:    [[TMP40:%.*]] = extractelement <4 x i1> [[TMP1]], i32 2
-; IC4VF4-NEXT:    br i1 [[TMP40]], label %[[PRED_LOAD_IF25:.*]], label %[[PRED_LOAD_CONTINUE26:.*]]
+; IC4VF4-NEXT:    br i1 true, label %[[PRED_LOAD_IF25:.*]], label %[[PRED_LOAD_CONTINUE26:.*]]
 ; IC4VF4:       [[PRED_LOAD_IF25]]:
-; IC4VF4-NEXT:    [[TMP41:%.*]] = add i16 [[OFFSET_IDX]], -6
-; IC4VF4-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP41]]
+; IC4VF4-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 6
 ; IC4VF4-NEXT:    [[TMP43:%.*]] = load i16, ptr [[TMP42]], align 1
 ; IC4VF4-NEXT:    [[TMP44:%.*]] = insertelement <4 x i16> [[TMP39]], i16 [[TMP43]], i32 2
 ; IC4VF4-NEXT:    br label %[[PRED_LOAD_CONTINUE26]]
 ; IC4VF4:       [[PRED_LOAD_CONTINUE26]]:
 ; IC4VF4-NEXT:    [[TMP45:%.*]] = phi <4 x i16> [ [[TMP39]], %[[PRED_LOAD_CONTINUE24]] ], [ [[TMP44]], %[[PRED_LOAD_IF25]] ]
-; IC4VF4-NEXT:    [[TMP46:%.*]] = extractelement <4 x i1> [[TMP1]], i32 3
-; IC4VF4-NEXT:    br i1 [[TMP46]], label %[[PRED_LOAD_IF27:.*]], label %[[PRED_LOAD_CONTINUE28:.*]]
+; IC4VF4-NEXT:    br i1 true, label %[[PRED_LOAD_IF27:.*]], label %[[PRED_LOAD_CONTINUE28:.*]]
 ; IC4VF4:       [[PRED_LOAD_IF27]]:
-; IC4VF4-NEXT:    [[TMP47:%.*]] = add i16 [[OFFSET_IDX]], -7
-; IC4VF4-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP47]]
+; IC4VF4-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 5
 ; IC4VF4-NEXT:    [[TMP49:%.*]] = load i16, ptr [[TMP48]], align 1
 ; IC4VF4-NEXT:    [[TMP50:%.*]] = insertelement <4 x i16> [[TMP45]], i16 [[TMP49]], i32 3
 ; IC4VF4-NEXT:    br label %[[PRED_LOAD_CONTINUE28]]
 ; IC4VF4:       [[PRED_LOAD_CONTINUE28]]:
 ; IC4VF4-NEXT:    [[TMP51:%.*]] = phi <4 x i16> [ [[TMP45]], %[[PRED_LOAD_CONTINUE26]] ], [ [[TMP50]], %[[PRED_LOAD_IF27]] ]
-; IC4VF4-NEXT:    [[TMP52:%.*]] = extractelement <4 x i1> [[TMP2]], i32 0
-; IC4VF4-NEXT:    br i1 [[TMP52]], label %[[PRED_LOAD_IF29:.*]], label %[[PRED_LOAD_CONTINUE30:.*]]
+; IC4VF4-NEXT:    br i1 true, label %[[PRED_LOAD_IF29:.*]], label %[[PRED_LOAD_CONTINUE30:.*]]
 ; IC4VF4:       [[PRED_LOAD_IF29]]:
-; IC4VF4-NEXT:    [[TMP53:%.*]] = add i16 [[OFFSET_IDX]], -8
-; IC4VF4-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP53]]
+; IC4VF4-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 4
 ; IC4VF4-NEXT:    [[TMP55:%.*]] = load i16, ptr [[TMP54]], align 1
 ; IC4VF4-NEXT:    [[TMP56:%.*]] = insertelement <4 x i16> poison, i16 [[TMP55]], i32 0
 ; IC4VF4-NEXT:    br label %[[PRED_LOAD_CONTINUE30]]
 ; IC4VF4:       [[PRED_LOAD_CONTINUE30]]:
 ; IC4VF4-NEXT:    [[TMP57:%.*]] = phi <4 x i16> [ poison, %[[PRED_LOAD_CONTINUE28]] ], [ [[TMP56]], %[[PRED_LOAD_IF29]] ]
-; IC4VF4-NEXT:    [[TMP58:%.*]] = extractelement <4 x i1> [[TMP2]], i32 1
-; IC4VF4-NEXT:    br i1 [[TMP58]], label %[[PRED_LOAD_IF31:.*]], label %[[PRED_LOAD_CONTINUE32:.*]]
+; IC4VF4-NEXT:    br i1 true, label %[[PRED_LOAD_IF31:.*]], label %[[PRED_LOAD_CONTINUE32:.*]]
 ; IC4VF4:       [[PRED_LOAD_IF31]]:
-; IC4VF4-NEXT:    [[TMP59:%.*]] = add i16 [[OFFSET_IDX]], -9
-; IC4VF4-NEXT:    [[TMP60:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP59]]
+; IC4VF4-NEXT:    [[TMP60:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 3
 ; IC4VF4-NEXT:    [[TMP61:%.*]] = load i16, ptr [[TMP60]], align 1
 ; IC4VF4-NEXT:    [[TMP62:%.*]] = insertelement <4 x i16> [[TMP57]], i16 [[TMP61]], i32 1
 ; IC4VF4-NEXT:    br label %[[PRED_LOAD_CONTINUE32]]
 ; IC4VF4:       [[PRED_LOAD_CONTINUE32]]:
 ; IC4VF4-NEXT:    [[TMP63:%.*]] = phi <4 x i16> [ [[TMP57]], %[[PRED_LOAD_CONTINUE30]] ], [ [[TMP62]], %[[PRED_LOAD_IF31]] ]
-; IC4VF4-NEXT:    [[TMP64:%.*]] = extractelement <4 x i1> [[TMP2]], i32 2
-; IC4VF4-NEXT:    br i1 [[TMP64]], label %[[PRED_LOAD_IF33:.*]], label %[[PRED_LOAD_CONTINUE34:.*]]
+; IC4VF4-NEXT:    br i1 true, label %[[PRED_LOAD_IF33:.*]], label %[[PRED_LOAD_CONTINUE34:.*]]
 ; IC4VF4:       [[PRED_LOAD_IF33]]:
-; IC4VF4-NEXT:    [[TMP65:%.*]] = add i16 [[OFFSET_IDX]], -10
-; IC4VF4-NEXT:    [[TMP66:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP65]]
+; IC4VF4-NEXT:    [[TMP66:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 2
 ; IC4VF4-NEXT:    [[TMP67:%.*]] = load i16, ptr [[TMP66]], align 1
 ; IC4VF4-NEXT:    [[TMP68:%.*]] = insertelement <4 x i16> [[TMP63]], i16 [[TMP67]], i32 2
 ; IC4VF4-NEXT:    br label %[[PRED_LOAD_CONTINUE34]]
 ; IC4VF4:       [[PRED_LOAD_CONTINUE34]]:
 ; IC4VF4-NEXT:    [[TMP69:%.*]] = phi <4 x i16> [ [[TMP63]], %[[PRED_LOAD_CONTINUE32]] ], [ [[TMP68]], %[[PRED_LOAD_IF33]] ]
-; IC4VF4-NEXT:    [[TMP70:%.*]] = extractelement <4 x i1> [[TMP2]], i32 3
-; IC4VF4-NEXT:    br i1 [[TMP70]], label %[[PRED_LOAD_IF35:.*]], label %[[PRED_LOAD_CONTINUE36:.*]]
+; IC4VF4-NEXT:    br i1 true, label %[[PRED_LOAD_IF35:.*]], label %[[PRED_LOAD_CONTINUE36:.*]]
 ; IC4VF4:       [[PRED_LOAD_IF35]]:
-; IC4VF4-NEXT:    [[TMP71:%.*]] = add i16 [[OFFSET_IDX]], -11
-; IC4VF4-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP71]]
+; IC4VF4-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 1
 ; IC4VF4-NEXT:    [[TMP73:%.*]] = load i16, ptr [[TMP72]], align 1
 ; IC4VF4-NEXT:    [[TMP74:%.*]] = insertelement <4 x i16> [[TMP69]], i16 [[TMP73]], i32 3
 ; IC4VF4-NEXT:    br label %[[PRED_LOAD_CONTINUE36]]
 ; IC4VF4:       [[PRED_LOAD_CONTINUE36]]:
 ; IC4VF4-NEXT:    [[TMP75:%.*]] = phi <4 x i16> [ [[TMP69]], %[[PRED_LOAD_CONTINUE34]] ], [ [[TMP74]], %[[PRED_LOAD_IF35]] ]
-; IC4VF4-NEXT:    [[TMP76:%.*]] = extractelement <4 x i1> [[TMP3]], i32 0
-; IC4VF4-NEXT:    br i1 [[TMP76]], label %[[PRED_LOAD_IF37:.*]], label %[[PRED_LOAD_CONTINUE38:.*]]
+; IC4VF4-NEXT:    br i1 false, label %[[PRED_LOAD_IF37:.*]], label %[[PRED_LOAD_CONTINUE38:.*]]
 ; IC4VF4:       [[PRED_LOAD_IF37]]:
-; IC4VF4-NEXT:    [[TMP77:%.*]] = add i16 [[OFFSET_IDX]], -12
-; IC4VF4-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP77]]
+; IC4VF4-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 0
 ; IC4VF4-NEXT:    [[TMP79:%.*]] = load i16, ptr [[TMP78]], align 1
 ; IC4VF4-NEXT:    [[TMP80:%.*]] = insertelement <4 x i16> poison, i16 [[TMP79]], i32 0
 ; IC4VF4-NEXT:    br label %[[PRED_LOAD_CONTINUE38]]
 ; IC4VF4:       [[PRED_LOAD_CONTINUE38]]:
 ; IC4VF4-NEXT:    [[TMP81:%.*]] = phi <4 x i16> [ poison, %[[PRED_LOAD_CONTINUE36]] ], [ [[TMP80]], %[[PRED_LOAD_IF37]] ]
-; IC4VF4-NEXT:    [[TMP82:%.*]] = extractelement <4 x i1> [[TMP3]], i32 1
-; IC4VF4-NEXT:    br i1 [[TMP82]], label %[[PRED_LOAD_IF39:.*]], label %[[PRED_LOAD_CONTINUE40:.*]]
+; IC4VF4-NEXT:    br i1 false, label %[[PRED_LOAD_IF39:.*]], label %[[PRED_LOAD_CONTINUE40:.*]]
 ; IC4VF4:       [[PRED_LOAD_IF39]]:
-; IC4VF4-NEXT:    [[TMP83:%.*]] = add i16 [[OFFSET_IDX]], -13
-; IC4VF4-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP83]]
+; IC4VF4-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 -1
 ; IC4VF4-NEXT:    [[TMP85:%.*]] = load i16, ptr [[TMP84]], align 1
 ; IC4VF4-NEXT:    [[TMP86:%.*]] = insertelement <4 x i16> [[TMP81]], i16 [[TMP85]], i32 1
 ; IC4VF4-NEXT:    br label %[[PRED_LOAD_CONTINUE40]]
 ; IC4VF4:       [[PRED_LOAD_CONTINUE40]]:
 ; IC4VF4-NEXT:    [[TMP87:%.*]] = phi <4 x i16> [ [[TMP81]], %[[PRED_LOAD_CONTINUE38]] ], [ [[TMP86]], %[[PRED_LOAD_IF39]] ]
-; IC4VF4-NEXT:    [[TMP88:%.*]] = extractelement <4 x i1> [[TMP3]], i32 2
-; IC4VF4-NEXT:    br i1 [[TMP88]], label %[[PRED_LOAD_IF41:.*]], label %[[PRED_LOAD_CONTINUE42:.*]]
+; IC4VF4-NEXT:    br i1 false, label %[[PRED_LOAD_IF41:.*]], label %[[PRED_LOAD_CONTINUE42:.*]]
 ; IC4VF4:       [[PRED_LOAD_IF41]]:
-; IC4VF4-NEXT:    [[TMP89:%.*]] = add i16 [[OFFSET_IDX]], -14
-; IC4VF4-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP89]]
+; IC4VF4-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 -2
 ; IC4VF4-NEXT:    [[TMP91:%.*]] = load i16, ptr [[TMP90]], align 1
 ; IC4VF4-NEXT:    [[TMP92:%.*]] = insertelement <4 x i16> [[TMP87]], i16 [[TMP91]], i32 2
 ; IC4VF4-NEXT:    br label %[[PRED_LOAD_CONTINUE42]]
 ; IC4VF4:       [[PRED_LOAD_CONTINUE42]]:
 ; IC4VF4-NEXT:    [[TMP93:%.*]] = phi <4 x i16> [ [[TMP87]], %[[PRED_LOAD_CONTINUE40]] ], [ [[TMP92]], %[[PRED_LOAD_IF41]] ]
-; IC4VF4-NEXT:    [[TMP94:%.*]] = extractelement <4 x i1> [[TMP3]], i32 3
-; IC4VF4-NEXT:    br i1 [[TMP94]], label %[[PRED_LOAD_IF43:.*]], label %[[PRED_LOAD_CONTINUE44]]
+; IC4VF4-NEXT:    br i1 false, label %[[PRED_LOAD_IF43:.*]], label %[[PRED_LOAD_CONTINUE44:.*]]
 ; IC4VF4:       [[PRED_LOAD_IF43]]:
-; IC4VF4-NEXT:    [[TMP95:%.*]] = add i16 [[OFFSET_IDX]], -15
-; IC4VF4-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP95]]
+; IC4VF4-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 -3
 ; IC4VF4-NEXT:    [[TMP97:%.*]] = load i16, ptr [[TMP96]], align 1
 ; IC4VF4-NEXT:    [[TMP98:%.*]] = insertelement <4 x i16> [[TMP93]], i16 [[TMP97]], i32 3
 ; IC4VF4-NEXT:    br label %[[PRED_LOAD_CONTINUE44]]
@@ -379,22 +326,16 @@ define i16 @select_decreasing_induction_icmp_table_i16(i16 noundef %val) {
 ; IC4VF4-NEXT:    [[TMP101:%.*]] = icmp ugt <4 x i16> [[TMP51]], [[BROADCAST_SPLAT]]
 ; IC4VF4-NEXT:    [[TMP102:%.*]] = icmp ugt <4 x i16> [[TMP75]], [[BROADCAST_SPLAT]]
 ; IC4VF4-NEXT:    [[TMP103:%.*]] = icmp ugt <4 x i16> [[TMP99]], [[BROADCAST_SPLAT]]
-; IC4VF4-NEXT:    [[TMP104:%.*]] = add nsw <4 x i16> [[VEC_IND]], splat (i16 -1)
-; IC4VF4-NEXT:    [[TMP105:%.*]] = add nsw <4 x i16> [[STEP_ADD]], splat (i16 -1)
-; IC4VF4-NEXT:    [[TMP106:%.*]] = add nsw <4 x i16> [[STEP_ADD_2]], splat (i16 -1)
-; IC4VF4-NEXT:    [[TMP107:%.*]] = add nsw <4 x i16> [[STEP_ADD_3]], splat (i16 -1)
-; IC4VF4-NEXT:    [[TMP108]] = select <4 x i1> [[TMP100]], <4 x i16> [[TMP104]], <4 x i16> [[VEC_PHI]]
-; IC4VF4-NEXT:    [[TMP109]] = select <4 x i1> [[TMP101]], <4 x i16> [[TMP105]], <4 x i16> [[VEC_PHI1]]
-; IC4VF4-NEXT:    [[TMP110]] = select <4 x i1> [[TMP102]], <4 x i16> [[TMP106]], <4 x i16> [[VEC_PHI2]]
-; IC4VF4-NEXT:    [[TMP111]] = select <4 x i1> [[TMP103]], <4 x i16> [[TMP107]], <4 x i16> [[VEC_PHI3]]
-; IC4VF4-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16
-; IC4VF4-NEXT:    [[VEC_IND_NEXT]] = add nsw <4 x i16> [[STEP_ADD_3]], splat (i16 -4)
-; IC4VF4-NEXT:    br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
+; IC4VF4-NEXT:    [[TMP76:%.*]] = select <4 x i1> [[TMP100]], <4 x i16> <i16 11, i16 10, i16 9, i16 8>, <4 x i16> splat (i16 32767)
+; IC4VF4-NEXT:    [[TMP77:%.*]] = select <4 x i1> [[TMP101]], <4 x i16> <i16 7, i16 6, i16 5, i16 4>, <4 x i16> splat (i16 32767)
+; IC4VF4-NEXT:    [[TMP70:%.*]] = select <4 x i1> [[TMP102]], <4 x i16> <i16 3, i16 2, i16 1, i16 0>, <4 x i16> splat (i16 32767)
+; IC4VF4-NEXT:    [[TMP71:%.*]] = select <4 x i1> [[TMP103]], <4 x i16> <i16 -1, i16 -2, i16 -3, i16 -4>, <4 x i16> splat (i16 32767)
+; IC4VF4-NEXT:    br label %[[MIDDLE_BLOCK:.*]]
 ; IC4VF4:       [[MIDDLE_BLOCK]]:
-; IC4VF4-NEXT:    [[TMP112:%.*]] = select <4 x i1> [[TMP0]], <4 x i16> [[TMP108]], <4 x i16> [[VEC_PHI]]
-; IC4VF4-NEXT:    [[TMP113:%.*]] = select <4 x i1> [[TMP1]], <4 x i16> [[TMP109]], <4 x i16> [[VEC_PHI1]]
-; IC4VF4-NEXT:    [[TMP114:%.*]] = select <4 x i1> [[TMP2]], <4 x i16> [[TMP110]], <4 x i16> [[VEC_PHI2]]
-; IC4VF4-NEXT:    [[TMP115:%.*]] = select <4 x i1> [[TMP3]], <4 x i16> [[TMP111]], <4 x i16> [[VEC_PHI3]]
+; IC4VF4-NEXT:    [[TMP112:%.*]] = select <4 x i1> splat (i1 true), <4 x i16> [[TMP76]], <4 x i16> splat (i16 32767)
+; IC4VF4-NEXT:    [[TMP113:%.*]] = select <4 x i1> splat (i1 true), <4 x i16> [[TMP77]], <4 x i16> splat (i16 32767)
+; IC4VF4-NEXT:    [[TMP114:%.*]] = select <4 x i1> splat (i1 true), <4 x i16> [[TMP70]], <4 x i16> splat (i16 32767)
+; IC4VF4-NEXT:    [[TMP115:%.*]] = select <4 x i1> zeroinitializer, <4 x i16> [[TMP71]], <4 x i16> splat (i16 32767)
 ; IC4VF4-NEXT:    [[RDX_MINMAX:%.*]] = call <4 x i16> @llvm.smin.v4i16(<4 x i16> [[TMP112]], <4 x i16> [[TMP113]])
 ; IC4VF4-NEXT:    [[RDX_MINMAX45:%.*]] = call <4 x i16> @llvm.smin.v4i16(<4 x i16> [[RDX_MINMAX]], <4 x i16> [[TMP114]])
 ; IC4VF4-NEXT:    [[RDX_MINMAX46:%.*]] = call <4 x i16> @llvm.smin.v4i16(<4 x i16> [[RDX_MINMAX45]], <4 x i16> [[TMP115]])
@@ -519,182 +460,129 @@ define i16 @select_decreasing_induction_icmp_table_half(half noundef %val) {
 ; IC4VF4-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x half> [[BROADCAST_SPLATINSERT]], <4 x half> poison, <4 x i32> zeroinitializer
 ; IC4VF4-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; IC4VF4:       [[VECTOR_BODY]]:
-; IC4VF4-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE44:.*]] ]
-; IC4VF4-NEXT:    [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 12, i16 11, i16 10, i16 9>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_LOAD_CONTINUE44]] ]
-; IC4VF4-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i16> [ splat (i16 32767), %[[VECTOR_PH]] ], [ [[TMP108:%.*]], %[[PRED_LOAD_CONTINUE44]] ]
-; IC4VF4-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i16> [ splat (i16 32767), %[[VECTOR_PH]] ], [ [[TMP109:%.*]], %[[PRED_LOAD_CONTINUE44]] ]
-; IC4VF4-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i16> [ splat (i16 32767), %[[VECTOR_PH]] ], [ [[TMP110:%.*]], %[[PRED_LOAD_CONTINUE44]] ]
-; IC4VF4-NEXT:    [[VEC_PHI3:%.*]] = phi <4 x i16> [ splat (i16 32767), %[[VECTOR_PH]] ], [ [[TMP111:%.*]], %[[PRED_LOAD_CONTINUE44]] ]
-; IC4VF4-NEXT:    [[STEP_ADD:%.*]] = add <4 x i16> [[VEC_IND]], splat (i16 -4)
-; IC4VF4-NEXT:    [[STEP_ADD_2:%.*]] = add <4 x i16> [[STEP_ADD]], splat (i16 -4)
-; IC4VF4-NEXT:    [[STEP_ADD_3:%.*]] = add <4 x i16> [[STEP_ADD_2]], splat (i16 -4)
-; IC4VF4-NEXT:    [[DOTCAST:%.*]] = trunc i32 [[INDEX]] to i16
-; IC4VF4-NEXT:    [[OFFSET_IDX:%.*]] = sub i16 12, [[DOTCAST]]
-; IC4VF4-NEXT:    [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i64 0
-; IC4VF4-NEXT:    [[BROADCAST_SPLAT5:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT4]], <4 x i32> poison, <4 x i32> zeroinitializer
-; IC4VF4-NEXT:    [[VEC_IV:%.*]] = add <4 x i32> [[BROADCAST_SPLAT5]], <i32 0, i32 1, i32 2, i32 3>
-; IC4VF4-NEXT:    [[VEC_IV8:%.*]] = add <4 x i32> [[BROADCAST_SPLAT5]], <i32 4, i32 5, i32 6, i32 7>
-; IC4VF4-NEXT:    [[VEC_IV11:%.*]] = add <4 x i32> [[BROADCAST_SPLAT5]], <i32 8, i32 9, i32 10, i32 11>
-; IC4VF4-NEXT:    [[VEC_IV14:%.*]] = add <4 x i32> [[BROADCAST_SPLAT5]], <i32 12, i32 13, i32 14, i32 15>
-; IC4VF4-NEXT:    [[TMP0:%.*]] = icmp ule <4 x i32> [[VEC_IV]], splat (i32 11)
-; IC4VF4-NEXT:    [[TMP1:%.*]] = icmp ule <4 x i32> [[VEC_IV8]], splat (i32 11)
-; IC4VF4-NEXT:    [[TMP2:%.*]] = icmp ule <4 x i32> [[VEC_IV11]], splat (i32 11)
-; IC4VF4-NEXT:    [[TMP3:%.*]] = icmp ule <4 x i32> [[VEC_IV14]], splat (i32 11)
-; IC4VF4-NEXT:    [[TMP4:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0
-; IC4VF4-NEXT:    br i1 [[TMP4]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
+; IC4VF4-NEXT:    br i1 true, label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
 ; IC4VF4:       [[PRED_LOAD_IF]]:
-; IC4VF4-NEXT:    [[TMP5:%.*]] = add i16 [[OFFSET_IDX]], 0
-; IC4VF4-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP5]]
+; IC4VF4-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 12
 ; IC4VF4-NEXT:    [[TMP7:%.*]] = load half, ptr [[TMP6]], align 1
 ; IC4VF4-NEXT:    [[TMP8:%.*]] = insertelement <4 x half> poison, half [[TMP7]], i32 0
 ; IC4VF4-NEXT:    br label %[[PRED_LOAD_CONTINUE]]
 ; IC4VF4:       [[PRED_LOAD_CONTINUE]]:
 ; IC4VF4-NEXT:    [[TMP9:%.*]] = phi <4 x half> [ poison, %[[VECTOR_BODY]] ], [ [[TMP8]], %[[PRED_LOAD_IF]] ]
-; IC4VF4-NEXT:    [[TMP10:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1
-; IC4VF4-NEXT:    br i1 [[TMP10]], label %[[PRED_LOAD_IF15:.*]], label %[[PRED_LOAD_CONTINUE16:.*]]
+; IC4VF4-NEXT:    br i1 true, label %[[PRED_LOAD_IF15:.*]], label %[[PRED_LOAD_CONTINUE16:.*]]
 ; IC4VF4:       [[PRED_LOAD_IF15]]:
-; IC4VF4-NEXT:    [[TMP11:%.*]] = add i16 [[OFFSET_IDX]], -1
-; IC4VF4-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP11]]
+; IC4VF4-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 11
 ; IC4VF4-NEXT:    [[TMP13:%.*]] = load half, ptr [[TMP12]], align 1
 ; IC4VF4-NEXT:    [[TMP14:%.*]] = insertelement <4 x half> [[TMP9]], half [[TMP13]], i32 1
 ; IC4VF4-NEXT:    br label %[[PRED_LOAD_CONTINUE16]]
 ; IC4VF4:       [[PRED_LOAD_CONTINUE16]]:
 ; IC4VF4-NEXT:    [[TMP15:%.*]] = phi <4 x half> [ [[TMP9]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], %[[PRED_LOAD_IF15]] ]
-; IC4VF4-NEXT:    [[TMP16:%.*]] = extractelement <4 x i1> [[TMP0]], i32 2
-; IC4VF4-NEXT:    br i1 [[TMP16]], label %[[PRED_LOAD_IF17:.*]], label %[[PRED_LOAD_CONTINUE18:.*]]
+; IC4VF4-NEXT:    br i1 true, label %[[PRED_LOAD_IF17:.*]], label %[[PRED_LOAD_CONTINUE18:.*]]
 ; IC4VF4:       [[PRED_LOAD_IF17]]:
-; IC4VF4-NEXT:    [[TMP17:%.*]] = add i16 [[OFFSET_IDX]], -2
-; IC4VF4-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP17]]
+; IC4VF4-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 10
 ; IC4VF4-NEXT:    [[TMP19:%.*]] = load half, ptr [[TMP18]], align 1
 ; IC4VF4-NEXT:    [[TMP20:%.*]] = insertelement <4 x half> [[TMP15]], half [[TMP19]], i32 2
 ; IC4VF4-NEXT:    br label %[[PRED_LOAD_CONTINUE18]]
 ; IC4VF4:       [[PRED_LOAD_CONTINUE18]]:
 ; IC4VF4-NEXT:    [[TMP21:%.*]] = phi <4 x half> [ [[TMP15]], %[[PRED_LOAD_CONTINUE16]] ], [ [[TMP20]], %[[PRED_LOAD_IF17]] ]
-; IC4VF4-NEXT:    [[TMP22:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3
-; IC4VF4-NEXT:    br i1 [[TMP22]], label %[[PRED_LOAD_IF19:.*]], label %[[PRED_LOAD_CONTINUE20:.*]]
+; IC4VF4-NEXT:    br i1 true, label %[[PRED_LOAD_IF19:.*]], label %[[PRED_LOAD_CONTINUE20:.*]]
 ; IC4VF4:       [[PRED_LOAD_IF19]]:
-; IC4VF4-NEXT:    [[TMP23:%.*]] = add i16 [[OFFSET_IDX]], -3
-; IC4VF4-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP23]]
+; IC4VF4-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 9
 ; IC4VF4-NEXT:    [[TMP25:%.*]] = load half, ptr [[TMP24]], align 1
 ; IC4VF4-NEXT:    [[TMP26:%.*]] = insertelement <4 x half> [[TMP21]], half [[TMP25]], i32 3
 ; IC4VF4-NEXT:    br label %[[PRED_LOAD_CONTINUE20]]
 ; IC4VF4:       [[PRED_LOAD_CONTINUE20]]:
 ; IC4VF4-NEXT:    [[TMP27:%.*]] = phi <4 x half> [ [[TMP21]], %[[PRED_LOAD_CONTINUE18]] ], [ [[TMP26]], %[[PRED_LOAD_IF19]] ]
-; IC4VF4-NEXT:    [[TMP28:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0
-; IC4VF4-NEXT:    br i1 [[TMP28]], label %[[PRED_LOAD_IF21:.*]], label %[[PRED_LOAD_CONTINUE22:.*]]
+; IC4VF4-NEXT:    br i1 true, label %[[PRED_LOAD_IF21:.*]], label %[[PRED_LOAD_CONTINUE22:.*]]
 ; IC4VF4:       [[PRED_LOAD_IF21]]:
-; IC4VF4-NEXT:    [[TMP29:%.*]] = add i16 [[OFFSET_IDX]], -4
-; IC4VF4-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP29]]
+; IC4VF4-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 8
 ; IC4VF4-NEXT:    [[TMP31:%.*]] = load half, ptr [[TMP30]], align 1
 ; IC4VF4-NEXT:    [[TMP32:%.*]] = insertelement <4 x half> poison, half [[TMP31]], i32 0
 ; IC4VF4-NEXT:    br label %[[PRED_LOAD_CONTINUE22]]
 ; IC4VF4:       [[PRED_LOAD_CONTINUE22]]:
 ; IC4VF4-NEXT:    [[TMP33:%.*]] = phi <4 x half> [ poison, %[[PRED_LOAD_CONTINUE20]] ], [ [[TMP32]], %[[PRED_LOAD_IF21]] ]
-; IC4VF4-NEXT:    [[TMP34:%.*]] = extractelement <4 x i1> [[TMP1]], i32 1
-; IC4VF4-NEXT:    br i1 [[TMP34]], label %[[PRED_LOAD_IF23:.*]], label %[[PRED_LOAD_CONTINUE24:.*]]
+; IC4VF4-NEXT:    br i1 true, label %[[PRED_LOAD_IF23:.*]], label %[[PRED_LOAD_CONTINUE24:.*]]
 ; IC4VF4:       [[PRED_LOAD_IF23]]:
-; IC4VF4-NEXT:    [[TMP35:%.*]] = add i16 [[OFFSET_IDX]], -5
-; IC4VF4-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP35]]
+; IC4VF4-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 7
 ; IC4VF4-NEXT:    [[TMP37:%.*]] = load half, ptr [[TMP36]], align 1
 ; IC4VF4-NEXT:    [[TMP38:%.*]] = insertelement <4 x half> [[TMP33]], half [[TMP37]], i32 1
 ; IC4VF4-NEXT:    br label %[[PRED_LOAD_CONTINUE24]]
 ; IC4VF4:       [[PRED_LOAD_CONTINUE24]]:
 ; IC4VF4-NEXT:    [[TMP39:%.*]] = phi <4 x half> [ [[TMP33]], %[[PRED_LOAD_CONTINUE22]] ], [ [[TMP38]], %[[PRED_LOAD_IF23]] ]
-; IC4VF4-NEXT:    [[TMP40:%.*]] = extractelement <4 x i1> [[TMP1]], i32 2
-; IC4VF4-NEXT:    br i1 [[TMP40]], label %[[PRED_LOAD_IF25:.*]], label %[[PRED_LOAD_CONTINUE26:.*]]
+; IC4VF4-NEXT:    br i1 true, label %[[PRED_LOAD_IF25:.*]], label %[[PRED_LOAD_CONTINUE26:.*]]
 ; IC4VF4:       [[PRED_LOAD_IF25]]:
-; IC4VF4-NEXT:    [[TMP41:%.*]] = add i16 [[OFFSET_IDX]], -6
-; IC4VF4-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP41]]
+; IC4VF4-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 6
 ; IC4VF4-NEXT:    [[TMP43:%.*]] = load half, ptr [[TMP42]], align 1
 ; IC4VF4-NEXT:    [[TMP44:%.*]] = insertelement <4 x half> [[TMP39]], half [[TMP43]], i32 2
 ; IC4VF4-NEXT:    br label %[[PRED_LOAD_CONTINUE26]]
 ; IC4VF4:       [[PRED_LOAD_CONTINUE26]]:
 ; IC4VF4-NEXT:    [[TMP45:%.*]] = phi <4 x half> [ [[TMP39]], %[[PRED_LOAD_CONTINUE24]] ], [ [[TMP44]], %[[PRED_LOAD_IF25]] ]
-; IC4VF4-NEXT:    [[TMP46:%.*]] = extractelement <4 x i1> [[TMP1]], i32 3
-; IC4VF4-NEXT:    br i1 [[TMP46]], label %[[PRED_LOAD_IF27:.*]], label %[[PRED_LOAD_CONTINUE28:.*]]
+; IC4VF4-NEXT:    br i1 true, label %[[PRED_LOAD_IF27:.*]], label %[[PRED_LOAD_CONTINUE28:.*]]
 ; IC4VF4:       [[PRED_LOAD_IF27]]:
-; IC4VF4-NEXT:    [[TMP47:%.*]] = add i16 [[OFFSET_IDX]], -7
-; IC4VF4-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP47]]
+; IC4VF4-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 5
 ; IC4VF4-NEXT:    [[TMP49:%.*]] = load half, ptr [[TMP48]], align 1
 ; IC4VF4-NEXT:    [[TMP50:%.*]] = insertelement <4 x half> [[TMP45]], half [[TMP49]], i32 3
 ; IC4VF4-NEXT:    br label %[[PRED_LOAD_CONTINUE28]]
 ; IC4VF4:       [[PRED_LOAD_CONTINUE28]]:
 ; IC4VF4-NEXT:    [[TMP51:%.*]] = phi <4 x half> [ [[TMP45]], %[[PRED_LOAD_CONTINUE26]] ], [ [[TMP50]], %[[PRED_LOAD_IF27]] ]
-; IC4VF4-NEXT:    [[TMP52:%.*]] = extractelement <4 x i1> [[TMP2]], i32 0
-; IC4VF4-NEXT:    br i1 [[TMP52]], label %[[PRED_LOAD_IF29:.*]], label %[[PRED_LOAD_CONTINUE30:.*]]
+; IC4VF4-NEXT:    br i1 true, label %[[PRED_LOAD_IF29:.*]], label %[[PRED_LOAD_CONTINUE30:.*]]
 ; IC4VF4:       [[PRED_LOAD_IF29]]:
-; IC4VF4-NEXT:    [[TMP53:%.*]] = add i16 [[OFFSET_IDX]], -8
-; IC4VF4-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP53]]
+; IC4VF4-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 4
 ; IC4VF4-NEXT:    [[TMP55:%.*]] = load half, ptr [[TMP54]], align 1
 ; IC4VF4-NEXT:    [[TMP56:%.*]] = insertelement <4 x half> poison, half [[TMP55]], i32 0
 ; IC4VF4-NEXT:    br label %[[PRED_LOAD_CONTINUE30]]
 ; IC4VF4:       [[PRED_LOAD_CONTINUE30]]:
 ; IC4VF4-NEXT:    [[TMP57:%.*]] = phi <4 x half> [ poison, %[[PRED_LOAD_CONTINUE28]] ], [ [[TMP56]], %[[PRED_LOAD_IF29]] ]
-; IC4VF4-NEXT:    [[TMP58:%.*]] = extractelement <4 x i1> [[TMP2]], i32 1
-; IC4VF4-NEXT:    br i1 [[TMP58]], label %[[PRED_LOAD_IF31:.*]], label %[[PRED_LOAD_CONTINUE32:.*]]
+; IC4VF4-NEXT:    br i1 true, label %[[PRED_LOAD_IF31:.*]], label %[[PRED_LOAD_CONTINUE32:.*]]
 ; IC4VF4:       [[PRED_LOAD_IF31]]:
-; IC4VF4-NEXT:    [[TMP59:%.*]] = add i16 [[OFFSET_IDX]], -9
-; IC4VF4-NEXT:    [[TMP60:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP59]]
+; IC4VF4-NEXT:    [[TMP60:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 3
 ; IC4VF4-NEXT:    [[TMP61:%.*]] = load half, ptr [[TMP60]], align 1
 ; IC4VF4-NEXT:    [[TMP62:%.*]] = insertelement <4 x half> [[TMP57]], half [[TMP61]], i32 1
 ; IC4VF4-NEXT:    br label %[[PRED_LOAD_CONTINUE32]]
 ; IC4VF4:       [[PRED_LOAD_CONTINUE32]]:
 ; IC4VF4-NEXT:    [[TMP63:%.*]] = phi <4 x half> [ [[TMP57]], %[[PRED_LOAD_CONTINUE30]] ], [ [[TMP62]], %[[PRED_LOAD_IF31]] ]
-; IC4VF4-NEXT:    [[TMP64:%.*]] = extractelement <4 x i1> [[TMP2]], i32 2
-; IC4VF4-NEXT:    br i1 [[TMP64]], label %[[PRED_LOAD_IF33:.*]], label %[[PRED_LOAD_CONTINUE34:.*]]
+; IC4VF4-NEXT:    br i1 true, label %[[PRED_LOAD_IF33:.*]], label %[[PRED_LOAD_CONTINUE34:.*]]
 ; IC4VF4:       [[PRED_LOAD_IF33]]:
-; IC4VF4-NEXT:    [[TMP65:%.*]] = add i16 [[OFFSET_IDX]], -10
-; IC4VF4-NEXT:    [[TMP66:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP65]]
+; IC4VF4-NEXT:    [[TMP66:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 2
 ; IC4VF4-NEXT:    [[TMP67:%.*]] = load half, ptr [[TMP66]], align 1
 ; IC4VF4-NEXT:    [[TMP68:%.*]] = insertelement <4 x half> [[TMP63]], half [[TMP67]], i32 2
 ; IC4VF4-NEXT:    br label %[[PRED_LOAD_CONTINUE34]]
 ; IC4VF4:       [[PRED_LOAD_CONTINUE34]]:
 ; IC4VF4-NEXT:    [[TMP69:%.*]] = phi <4 x half> [ [[TMP63]], %[[PRED_LOAD_CONTINUE32]] ], [ [[TMP68]], %[[PRED_LOAD_IF33]] ]
-; IC4VF4-NEXT:    [[TMP70:%.*]] = extractelement <4 x i1> [[TMP2]], i32 3
-; IC4VF4-NEXT:    br i1 [[TMP70]], label %[[PRED_LOAD_IF35:.*]], label %[[PRED_LOAD_CONTINUE36:.*]]
+; IC4VF4-NEXT:    br i1 true, label %[[PRED_LOAD_IF35:.*]], label %[[PRED_LOAD_CONTINUE36:.*]]
 ; IC4VF4:       [[PRED_LOAD_IF35]]:
-; IC4VF4-NEXT:    [[TMP71:%.*]] = add i16 [[OFFSET_IDX]], -11
-; IC4VF4-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP71]]
+; IC4VF4-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 1
 ; IC4VF4-NEXT:    [[TMP73:%.*]] = load half, ptr [[TMP72]], align 1
 ; IC4VF4-NEXT:    [[TMP74:%.*]] = insertelement <4 x half> [[TMP69]], half [[TMP73]], i32 3
 ; IC4VF4-NEXT:    br label %[[PRED_LOAD_CONTINUE36]]
 ; IC4VF4:       [[PRED_LOAD_CONTINUE36]]:
 ; IC4VF4-NEXT:    [[TMP75:%.*]] = phi <4 x half> [ [[TMP69]], %[[PRED_LOAD_CONTINUE34]] ], [ [[TMP74]], %[[PRED_LOAD_IF35]] ]
-; IC4VF4-NEXT:    [[TMP76:%.*]] = extractelement <4 x i1> [[TMP3]], i32 0
-; IC4VF4-NEXT:    br i1 [[TMP76]], label %[[PRED_LOAD_IF37:.*]], label %[[PRED_LOAD_CONTINUE38:.*]]
+; IC4VF4-NEXT:    br i1 false, label %[[PRED_LOAD_IF37:.*]], label %[[PRED_LOAD_CONTINUE38:.*]]
 ; IC4VF4:       [[PRED_LOAD_IF37]]:
-; IC4VF4-NEXT:    [[TMP77:%.*]] = add i16 [[OFFSET_IDX]], -12
-; IC4VF4-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP77]]
+; IC4VF4-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 0
 ; IC4VF4-NEXT:    [[TMP79:%.*]] = load half, ptr [[TMP78]], align 1
 ; IC4VF4-NEXT:    [[TMP80:%.*]] = insertelement <4 x half> poison, half [[TMP79]], i32 0
 ; IC4VF4-NEXT:    br label %[[PRED_LOAD_CONTINUE38]]
 ; IC4VF4:       [[PRED_LOAD_CONTINUE38]]:
 ; IC4VF4-NEXT:    [[TMP81:%.*]] = phi <4 x half> [ poison, %[[PRED_LOAD_CONTINUE36]] ], [ [[TMP80]], %[[PRED_LOAD_IF37]] ]
-; IC4VF4-NEXT:    [[TMP82:%.*]] = extractelement <4 x i1> [[TMP3]], i32 1
-; IC4VF4-NEXT:    br i1 [[TMP82]], label %[[PRED_LOAD_IF39:.*]], label %[[PRED_LOAD_CONTINUE40:.*]]
+; IC4VF4-NEXT:    br i1 false, label %[[PRED_LOAD_IF39:.*]], label %[[PRED_LOAD_CONTINUE40:.*]]
 ; IC4VF4:       [[PRED_LOAD_IF39]]:
-; IC4VF4-NEXT:    [[TMP83:%.*]] = add i16 [[OFFSET_IDX]], -13
-; IC4VF4-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP83]]
+; IC4VF4-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 -1
 ; IC4VF4-NEXT:    [[TMP85:%.*]] = load half, ptr [[TMP84]], align 1
 ; IC4VF4-NEXT:    [[TMP86:%.*]] = insertelement <4 x half> [[TMP81]], half [[TMP85]], i32 1
 ; IC4VF4-NEXT:    br label %[[PRED_LOAD_CONTINUE40]]
 ; IC4VF4:       [[PRED_LOAD_CONTINUE40]]:
 ; IC4VF4-NEXT:    [[TMP87:%.*]] = phi <4 x half> [ [[TMP81]], %[[PRED_LOAD_CONTINUE38]] ], [ [[TMP86]], %[[PRED_LOAD_IF39]] ]
-; IC4VF4-NEXT:    [[TMP88:%.*]] = extractelement <4 x i1> [[TMP3]], i32 2
-; IC4VF4-NEXT:    br i1 [[TMP88]], label %[[PRED_LOAD_IF41:.*]], label %[[PRED_LOAD_CONTINUE42:.*]]
+; IC4VF4-NEXT:    br i1 false, label %[[PRED_LOAD_IF41:.*]], label %[[PRED_LOAD_CONTINUE42:.*]]
 ; IC4VF4:       [[PRED_LOAD_IF41]]:
-; IC4VF4-NEXT:    [[TMP89:%.*]] = add i16 [[OFFSET_IDX]], -14
-; IC4VF4-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP89]]
+; IC4VF4-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 -2
 ; IC4VF4-NEXT:    [[TMP91:%.*]] = load half, ptr [[TMP90]], align 1
 ; IC4VF4-NEXT:    [[TMP92:%.*]] = insertelement <4 x half> [[TMP87]], half [[TMP91]], i32 2
 ; IC4VF4-NEXT:    br label %[[PRED_LOAD_CONTINUE42]]
 ; IC4VF4:       [[PRED_LOAD_CONTINUE42]]:
 ; IC4VF4-NEXT:    [[TMP93:%.*]] = phi <4 x half> [ [[TMP87]], %[[PRED_LOAD_CONTINUE40]] ], [ [[TMP92]], %[[PRED_LOAD_IF41]] ]
-; IC4VF4-NEXT:    [[TMP94:%.*]] = extractelement <4 x i1> [[TMP3]], i32 3
-; IC4VF4-NEXT:    br i1 [[TMP94]], label %[[PRED_LOAD_IF43:.*]], label %[[PRED_LOAD_CONTINUE44]]
+; IC4VF4-NEXT:    br i1 false, label %[[PRED_LOAD_IF43:.*]], label %[[PRED_LOAD_CONTINUE44:.*]]
 ; IC4VF4:       [[PRED_LOAD_IF43]]:
-; IC4VF4-NEXT:    [[TMP95:%.*]] = add i16 [[OFFSET_IDX]], -15
-; IC4VF4-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP95]]
+; IC4VF4-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 -3
 ; IC4VF4-NEXT:    [[TMP97:%.*]] = load half, ptr [[TMP96]], align 1
 ; IC4VF4-NEXT:    [[TMP98:%.*]] = insertelement <4 x half> [[TMP93]], half [[TMP97]], i32 3
 ; IC4VF4-NEXT:    br label %[[PRED_LOAD_CONTINUE44]]
@@ -704,22 +592,16 @@ define i16 @select_decreasing_induction_icmp_table_half(half noundef %val) {
 ; IC4VF4-NEXT:    [[TMP101:%.*]] = fcmp ugt <4 x half> [[TMP51]], [[BROADCAST_SPLAT]]
 ; IC4VF4-NEXT:    [[TMP102:%.*]] = fcmp ugt <4 x half> [[TMP75]], [[BROADCAST_SPLAT]]
 ; IC4VF4-NEXT:    [[TMP103:%.*]] = fcmp ugt <4 x half> [[TMP99]], [[BROADCAST_SPLAT]]
-; IC4VF4-NEXT:    [[TMP104:%.*]] = add nsw <4 x i16> [[VEC_IND]], splat (i16 -1)
-; IC4VF4-NEXT:    [[TMP105:%.*]] = add nsw <4 x i16> [[STEP_ADD]], splat (i16 -1)
-; IC4VF4-NEXT:    [[TMP106:%.*]] = add nsw <4 x i16> [[STEP_ADD_2]], splat (i16 -1)
-; IC4VF4-NEXT:    [[TMP107:%.*]] = add nsw <4 x i16> [[STEP_ADD_3]], splat (i16 -1)
-; IC4VF4-NEXT:    [[TMP108]] = select <4 x i1> [[TMP100]], <4 x i16> [[TMP104]], <4 x i16> [[VEC_PHI]]
-; IC4VF4-NEXT:    [[TMP109]] = select <4 x i1> [[TMP101]], <4 x i16> [[TMP105]], <4 x i16> [[VEC_PHI1]]
-; IC4VF4-NEXT:    [[TMP110]] = select <4 x i1> [[TMP102]], <4 x i16> [[TMP106]], <4 x i16> [[VEC_PHI2]]
-; IC4VF4-NEXT:    [[TMP111]] = select <4 x i1> [[TMP103]], <4 x i16> [[TMP107]], <4 x i16> [[VEC_PHI3]]
-; IC4VF4-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16
-; IC4VF4-NEXT:    [[VEC_IND_NEXT]] = add nsw <4 x i16> [[STEP_ADD_3]], splat (i16 -4)
-; IC4VF4-NEXT:    br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; IC4VF4-NEXT:    [[TMP76:%.*]] = select <4 x i1> [[TMP100]], <4 x i16> <i16 11, i16 10, i16 9, i16 8>, <4 x i16> splat (i16 32767)
+; IC4VF4-NEXT:    [[TMP77:%.*]] = select <4 x i1> [[TMP101]], <4 x i16> <i16 7, i16 6, i16 5, i16 4>, <4 x i16> splat (i16 32767)
+; IC4VF4-NEXT:    [[TMP70:%.*]] = select <4 x i1> [[TMP102]], <4 x i16> <i16 3, i16 2, i16 1, i16 0>, <4 x i16> splat (i16 32767)
+; IC4VF4-NEXT:    [[TMP71:%.*]] = select <4 x i1> [[TMP103]], <4 x i16> <i16 -1, i16 -2, i16 -3, i16 -4>, <4 x i16> splat (i16 32767)
+; IC4VF4-NEXT:    br label %[[MIDDLE_BLOCK:.*]]
 ; IC4VF4:       [[MIDDLE_BLOCK]]:
-; IC4VF4-NEXT:    [[TMP112:%.*]] = select <4 x i1> [[TMP0]], <4 x i16> [[TMP108]], <4 x i16> [[VEC_PHI]]
-; IC4VF4-NEXT:    [[TMP113:%.*]] = select <4 x i1> [[TMP1]], <4 x i16> [[TMP109]], <4 x i16> [[VEC_PHI1]]
-; IC4VF4-NEXT:    [[TMP114:%.*]] = select <4 x i1> [[TMP2]], <4 x i16> [[TMP110]], <4 x i16> [[VEC_PHI2]]
-; IC4VF4-NEXT:    [[TMP115:%.*]] = select <4 x i1> [[TMP3]], <4 x i16> [[TMP111]], <4 x i16> [[VEC_PHI3]]
+; IC4VF4-NEXT:    [[TMP112:%.*]] = select <4 x i1> splat (i1 true), <4 x i16> [[TMP76]], <4 x i16> splat (i16 32767)
+; IC4VF4-NEXT:    [[TMP113:%.*]] = select <4 x i1> splat (i1 true), <4 x i16> [[TMP77]], <4 x i16> splat (i16 32767)
+; IC4VF4-NEXT:    [[TMP114:%.*]] = select <4 x i1> splat (i1 true), <4 x i16> [[TMP70]], <4 x i16> splat (i16 32767)
+; IC4VF4-NEXT:    [[TMP115:%.*]] = select <4 x i1> zeroinitializer, <4 x i16> [[TMP71]], <4 x i16> splat (i16 32767)
 ; IC4VF4-NEXT:    [[RDX_MINMAX:%.*]] = call <4 x i16> @llvm.smin.v4i16(<4 x i16> [[TMP112]], <4 x i16> [[TMP113]])
 ; IC4VF4-NEXT:    [[RDX_MINMAX45:%.*]] = call <4 x i16> @llvm.smin.v4i16(<4 x i16> [[RDX_MINMAX]], <4 x i16> [[TMP114]])
 ; IC4VF4-NEXT:    [[RDX_MINMAX46:%.*]] = call <4 x i16> @llvm.smin.v4i16(<4 x i16> [[RDX_MINMAX45]], <4 x i16> [[TMP115]])

diff  --git a/llvm/test/Transforms/LoopVectorize/load-deref-pred-poison-ub-ops-feeding-pointer.ll b/llvm/test/Transforms/LoopVectorize/load-deref-pred-poison-ub-ops-feeding-pointer.ll
index 5e88072517b37..e5ffbe22d2285 100644
--- a/llvm/test/Transforms/LoopVectorize/load-deref-pred-poison-ub-ops-feeding-pointer.ll
+++ b/llvm/test/Transforms/LoopVectorize/load-deref-pred-poison-ub-ops-feeding-pointer.ll
@@ -17,58 +17,43 @@ define void @ptr_depends_on_sdiv(ptr noalias %dst, i16 noundef %off) {
 ; CHECK:       [[VECTOR_PH]]:
 ; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; CHECK:       [[VECTOR_BODY]]:
-; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE4:.*]] ]
-; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i16> [ <i16 9, i16 10>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE4]] ]
-; CHECK-NEXT:    [[DOTCAST:%.*]] = trunc i32 [[INDEX]] to i16
-; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = add i16 9, [[DOTCAST]]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp eq <2 x i16> [[VEC_IND]], splat (i16 10)
-; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0
-; CHECK-NEXT:    br i1 [[TMP6]], label %[[PRED_SDIV_IF:.*]], label %[[PRED_SDIV_CONTINUE:.*]]
+; CHECK-NEXT:    br i1 false, label %[[PRED_SDIV_IF:.*]], label %[[PRED_SDIV_CONTINUE:.*]]
 ; CHECK:       [[PRED_SDIV_IF]]:
 ; CHECK-NEXT:    [[TMP2:%.*]] = sdiv i16 24316, [[OFF]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x i16> poison, i16 [[TMP2]], i32 0
 ; CHECK-NEXT:    br label %[[PRED_SDIV_CONTINUE]]
 ; CHECK:       [[PRED_SDIV_CONTINUE]]:
 ; CHECK-NEXT:    [[TMP4:%.*]] = phi <2 x i16> [ poison, %[[VECTOR_BODY]] ], [ [[TMP3]], %[[PRED_SDIV_IF]] ]
-; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <2 x i1> [[TMP0]], i32 1
-; CHECK-NEXT:    br i1 [[TMP14]], label %[[PRED_SDIV_IF1:.*]], label %[[PRED_SDIV_CONTINUE2:.*]]
+; CHECK-NEXT:    br i1 true, label %[[PRED_SDIV_IF1:.*]], label %[[PRED_SDIV_CONTINUE2:.*]]
 ; CHECK:       [[PRED_SDIV_IF1]]:
 ; CHECK-NEXT:    [[TMP18:%.*]] = sdiv i16 24316, [[OFF]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <2 x i16> [[TMP4]], i16 [[TMP18]], i32 1
 ; CHECK-NEXT:    br label %[[PRED_SDIV_CONTINUE2]]
 ; CHECK:       [[PRED_SDIV_CONTINUE2]]:
 ; CHECK-NEXT:    [[TMP8:%.*]] = phi <2 x i16> [ [[TMP4]], %[[PRED_SDIV_CONTINUE]] ], [ [[TMP7]], %[[PRED_SDIV_IF1]] ]
-; CHECK-NEXT:    [[TMP21:%.*]] = add <2 x i16> [[VEC_IND]], splat (i16 16383)
 ; CHECK-NEXT:    [[TMP22:%.*]] = shl <2 x i16> [[TMP8]], splat (i16 14)
-; CHECK-NEXT:    [[TMP23:%.*]] = sub <2 x i16> [[TMP21]], [[TMP22]]
-; CHECK-NEXT:    [[TMP19:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0
-; CHECK-NEXT:    br i1 [[TMP19]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
+; CHECK-NEXT:    [[TMP23:%.*]] = sub <2 x i16> <i16 16392, i16 16393>, [[TMP22]]
+; CHECK-NEXT:    br i1 false, label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
 ; CHECK:       [[PRED_STORE_IF]]:
 ; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <2 x i16> [[TMP23]], i32 0
 ; CHECK-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i64, ptr @src, i16 [[TMP13]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = load i64, ptr [[TMP24]], align 1
-; CHECK-NEXT:    [[TMP10:%.*]] = add i16 [[OFFSET_IDX]], 0
-; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 [[TMP10]]
+; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 9
 ; CHECK-NEXT:    store i64 [[TMP9]], ptr [[TMP11]], align 1
 ; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE]]
 ; CHECK:       [[PRED_STORE_CONTINUE]]:
-; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <2 x i1> [[TMP0]], i32 1
-; CHECK-NEXT:    br i1 [[TMP12]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4]]
+; CHECK-NEXT:    br i1 true, label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]]
 ; CHECK:       [[PRED_STORE_IF3]]:
 ; CHECK-NEXT:    [[TMP25:%.*]] = extractelement <2 x i16> [[TMP23]], i32 1
 ; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i64, ptr @src, i16 [[TMP25]]
 ; CHECK-NEXT:    [[TMP15:%.*]] = load i64, ptr [[TMP20]], align 1
-; CHECK-NEXT:    [[TMP16:%.*]] = add i16 [[OFFSET_IDX]], 1
-; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 [[TMP16]]
+; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 10
 ; CHECK-NEXT:    store i64 [[TMP15]], ptr [[TMP17]], align 1
 ; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE4]]
 ; CHECK:       [[PRED_STORE_CONTINUE4]]:
-; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i16> [[VEC_IND]], splat (i16 2)
-; CHECK-NEXT:    br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-NEXT:    br label %[[MIDDLE_BLOCK:.*]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
-; CHECK-NEXT:    br [[EXIT:label %.*]]
-; CHECK:       [[SCALAR_PH:.*:]]
+; CHECK-NEXT:    br label %[[EXIT:.*]]
 ;
 entry:
   br label %loop.header
@@ -111,40 +96,28 @@ define void @ptr_depends_on_possibly_poison_value(ptr noalias %dst, i16 %off) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i16> [[TMP0]], [[BROADCAST_SPLAT]]
 ; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; CHECK:       [[VECTOR_BODY]]:
-; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE2:.*]] ]
-; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i16> [ <i16 9, i16 10>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE2]] ]
-; CHECK-NEXT:    [[DOTCAST:%.*]] = trunc i32 [[INDEX]] to i16
-; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = add i16 9, [[DOTCAST]]
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq <2 x i16> [[VEC_IND]], splat (i16 10)
-; CHECK-NEXT:    [[TMP3:%.*]] = add <2 x i16> [[VEC_IND]], [[TMP1]]
-; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0
-; CHECK-NEXT:    br i1 [[TMP6]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = add <2 x i16> <i16 9, i16 10>, [[TMP1]]
+; CHECK-NEXT:    br i1 false, label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
 ; CHECK:       [[PRED_STORE_IF]]:
 ; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x i16> [[TMP3]], i32 0
 ; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i64, ptr @src, i16 [[TMP5]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = load i64, ptr [[TMP14]], align 1
-; CHECK-NEXT:    [[TMP7:%.*]] = add i16 [[OFFSET_IDX]], 0
-; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 [[TMP7]]
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 9
 ; CHECK-NEXT:    store i64 [[TMP9]], ptr [[TMP8]], align 1
 ; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE]]
 ; CHECK:       [[PRED_STORE_CONTINUE]]:
-; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <2 x i1> [[TMP2]], i32 1
-; CHECK-NEXT:    br i1 [[TMP10]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2]]
+; CHECK-NEXT:    br i1 true, label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]]
 ; CHECK:       [[PRED_STORE_IF1]]:
 ; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <2 x i16> [[TMP3]], i32 1
 ; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i64, ptr @src, i16 [[TMP15]]
 ; CHECK-NEXT:    [[TMP13:%.*]] = load i64, ptr [[TMP16]], align 1
-; CHECK-NEXT:    [[TMP11:%.*]] = add i16 [[OFFSET_IDX]], 1
-; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 [[TMP11]]
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 10
 ; CHECK-NEXT:    store i64 [[TMP13]], ptr [[TMP12]], align 1
 ; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE2]]
 ; CHECK:       [[PRED_STORE_CONTINUE2]]:
-; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i16> [[VEC_IND]], splat (i16 2)
-; CHECK-NEXT:    br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK-NEXT:    br label %[[MIDDLE_BLOCK:.*]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
-; CHECK-NEXT:    br [[EXIT:label %.*]]
-; CHECK:       [[SCALAR_PH:.*:]]
+; CHECK-NEXT:    br label %[[EXIT:.*]]
 ;
 entry:
   br label %loop.header
@@ -183,38 +156,26 @@ define void @ptr_doesnt_depend_on_poison_or_ub(ptr noalias %dst, i16 noundef %of
 ; CHECK-NEXT:    [[TMP1:%.*]] = add i16 [[TMP0]], [[OFF]]
 ; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; CHECK:       [[VECTOR_BODY]]:
-; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE2:.*]] ]
-; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i16> [ <i16 9, i16 10>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE2]] ]
-; CHECK-NEXT:    [[DOTCAST:%.*]] = trunc i32 [[INDEX]] to i16
-; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = add i16 9, [[DOTCAST]]
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq <2 x i16> [[VEC_IND]], splat (i16 10)
-; CHECK-NEXT:    [[TMP3:%.*]] = add i16 [[OFFSET_IDX]], [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = add i16 9, [[TMP1]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i64, ptr @src, i16 [[TMP3]]
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP4]], align 1
-; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0
-; CHECK-NEXT:    br i1 [[TMP6]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
+; CHECK-NEXT:    br i1 false, label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
 ; CHECK:       [[PRED_STORE_IF]]:
-; CHECK-NEXT:    [[TMP7:%.*]] = add i16 [[OFFSET_IDX]], 0
-; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 [[TMP7]]
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 9
 ; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <2 x i64> [[WIDE_LOAD]], i32 0
 ; CHECK-NEXT:    store i64 [[TMP9]], ptr [[TMP8]], align 1
 ; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE]]
 ; CHECK:       [[PRED_STORE_CONTINUE]]:
-; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <2 x i1> [[TMP2]], i32 1
-; CHECK-NEXT:    br i1 [[TMP10]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2]]
+; CHECK-NEXT:    br i1 true, label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]]
 ; CHECK:       [[PRED_STORE_IF1]]:
-; CHECK-NEXT:    [[TMP11:%.*]] = add i16 [[OFFSET_IDX]], 1
-; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 [[TMP11]]
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 10
 ; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <2 x i64> [[WIDE_LOAD]], i32 1
 ; CHECK-NEXT:    store i64 [[TMP13]], ptr [[TMP12]], align 1
 ; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE2]]
 ; CHECK:       [[PRED_STORE_CONTINUE2]]:
-; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i16> [[VEC_IND]], splat (i16 2)
-; CHECK-NEXT:    br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
+; CHECK-NEXT:    br label %[[MIDDLE_BLOCK:.*]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
-; CHECK-NEXT:    br [[EXIT:label %.*]]
-; CHECK:       [[SCALAR_PH:.*:]]
+; CHECK-NEXT:    br label %[[EXIT:.*]]
 ;
 entry:
   br label %loop.header
@@ -252,45 +213,33 @@ define void @ptr_depends_on_possibly_poison_value_from_load(ptr noalias %dst) {
 ; CHECK:       [[VECTOR_PH]]:
 ; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; CHECK:       [[VECTOR_BODY]]:
-; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE2:.*]] ]
-; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i16> [ <i16 9, i16 10>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE2]] ]
-; CHECK-NEXT:    [[DOTCAST:%.*]] = trunc i32 [[INDEX]] to i16
-; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = add i16 9, [[DOTCAST]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i16, ptr @src, align 1
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i16> poison, i16 [[TMP0]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i16> [[BROADCAST_SPLATINSERT]], <2 x i16> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <2 x i16> [[VEC_IND]], splat (i16 10)
 ; CHECK-NEXT:    [[TMP2:%.*]] = sub <2 x i16> splat (i16 1), [[BROADCAST_SPLAT]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = add <2 x i16> [[TMP2]], [[BROADCAST_SPLAT]]
-; CHECK-NEXT:    [[TMP4:%.*]] = add <2 x i16> [[VEC_IND]], [[TMP3]]
-; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0
-; CHECK-NEXT:    br i1 [[TMP7]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
+; CHECK-NEXT:    [[TMP4:%.*]] = add <2 x i16> <i16 9, i16 10>, [[TMP3]]
+; CHECK-NEXT:    br i1 false, label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
 ; CHECK:       [[PRED_STORE_IF]]:
 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x i16> [[TMP4]], i32 0
 ; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i64, ptr @src, i16 [[TMP6]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = load i64, ptr [[TMP15]], align 1
-; CHECK-NEXT:    [[TMP8:%.*]] = add i16 [[OFFSET_IDX]], 0
-; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 [[TMP8]]
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 9
 ; CHECK-NEXT:    store i64 [[TMP10]], ptr [[TMP9]], align 1
 ; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE]]
 ; CHECK:       [[PRED_STORE_CONTINUE]]:
-; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <2 x i1> [[TMP1]], i32 1
-; CHECK-NEXT:    br i1 [[TMP11]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2]]
+; CHECK-NEXT:    br i1 true, label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]]
 ; CHECK:       [[PRED_STORE_IF1]]:
 ; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <2 x i16> [[TMP4]], i32 1
 ; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i64, ptr @src, i16 [[TMP16]]
 ; CHECK-NEXT:    [[TMP14:%.*]] = load i64, ptr [[TMP17]], align 1
-; CHECK-NEXT:    [[TMP12:%.*]] = add i16 [[OFFSET_IDX]], 1
-; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 [[TMP12]]
+; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 10
 ; CHECK-NEXT:    store i64 [[TMP14]], ptr [[TMP13]], align 1
 ; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE2]]
 ; CHECK:       [[PRED_STORE_CONTINUE2]]:
-; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i16> [[VEC_IND]], splat (i16 2)
-; CHECK-NEXT:    br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
+; CHECK-NEXT:    br label %[[MIDDLE_BLOCK:.*]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
-; CHECK-NEXT:    br [[EXIT:label %.*]]
-; CHECK:       [[SCALAR_PH:.*:]]
+; CHECK-NEXT:    br label %[[EXIT:.*]]
 ;
 entry:
   br label %loop.header
@@ -329,41 +278,29 @@ define void @ptr_depends_on_noundef_load(ptr noalias %dst) {
 ; CHECK:       [[VECTOR_PH]]:
 ; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; CHECK:       [[VECTOR_BODY]]:
-; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE2:.*]] ]
-; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i16> [ <i16 9, i16 10>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE2]] ]
-; CHECK-NEXT:    [[DOTCAST:%.*]] = trunc i32 [[INDEX]] to i16
-; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = add i16 9, [[DOTCAST]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i16, ptr @src, align 1, !noundef [[META10:![0-9]+]]
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <2 x i16> [[VEC_IND]], splat (i16 10)
 ; CHECK-NEXT:    [[TMP2:%.*]] = sub i16 1, [[TMP0]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = add i16 [[TMP2]], [[TMP0]]
-; CHECK-NEXT:    [[TMP4:%.*]] = add i16 [[OFFSET_IDX]], [[TMP3]]
+; CHECK-NEXT:    [[TMP4:%.*]] = add i16 9, [[TMP3]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i64, ptr @src, i16 [[TMP4]]
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP5]], align 1
-; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0
-; CHECK-NEXT:    br i1 [[TMP7]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
+; CHECK-NEXT:    br i1 false, label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
 ; CHECK:       [[PRED_STORE_IF]]:
-; CHECK-NEXT:    [[TMP8:%.*]] = add i16 [[OFFSET_IDX]], 0
-; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 [[TMP8]]
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 9
 ; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <2 x i64> [[WIDE_LOAD]], i32 0
 ; CHECK-NEXT:    store i64 [[TMP10]], ptr [[TMP9]], align 1
 ; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE]]
 ; CHECK:       [[PRED_STORE_CONTINUE]]:
-; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <2 x i1> [[TMP1]], i32 1
-; CHECK-NEXT:    br i1 [[TMP11]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2]]
+; CHECK-NEXT:    br i1 true, label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]]
 ; CHECK:       [[PRED_STORE_IF1]]:
-; CHECK-NEXT:    [[TMP12:%.*]] = add i16 [[OFFSET_IDX]], 1
-; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 [[TMP12]]
+; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 10
 ; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <2 x i64> [[WIDE_LOAD]], i32 1
 ; CHECK-NEXT:    store i64 [[TMP14]], ptr [[TMP13]], align 1
 ; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE2]]
 ; CHECK:       [[PRED_STORE_CONTINUE2]]:
-; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i16> [[VEC_IND]], splat (i16 2)
-; CHECK-NEXT:    br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
+; CHECK-NEXT:    br label %[[MIDDLE_BLOCK:.*]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
-; CHECK-NEXT:    br [[EXIT:label %.*]]
-; CHECK:       [[SCALAR_PH:.*:]]
+; CHECK-NEXT:    br label %[[EXIT:.*]]
 ;
 entry:
   br label %loop.header

diff  --git a/llvm/test/Transforms/LoopVectorize/predicated-multiple-exits.ll b/llvm/test/Transforms/LoopVectorize/predicated-multiple-exits.ll
index 9a11b0d42667b..51bd6339375b9 100644
--- a/llvm/test/Transforms/LoopVectorize/predicated-multiple-exits.ll
+++ b/llvm/test/Transforms/LoopVectorize/predicated-multiple-exits.ll
@@ -729,20 +729,22 @@ define i32 @diamond_exit_poison_from_speculated_branch() {
 ; CHECK-NEXT:    br i1 [[TMP1]], label %[[VECTOR_EARLY_EXIT_CHECK:.*]], label %[[MIDDLE_BLOCK:.*]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
 ; CHECK-NEXT:    br label %[[LOOP_END:.*]]
+; CHECK:       [[LOOP_END]]:
+; CHECK-NEXT:    br label %[[LOOP_END1:.*]]
 ; CHECK:       [[VECTOR_EARLY_EXIT_CHECK]]:
 ; CHECK-NEXT:    [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> splat (i1 true), i1 false)
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x i1> zeroinitializer, i64 [[FIRST_ACTIVE_LANE]]
 ; CHECK-NEXT:    br i1 [[TMP2]], label %[[VECTOR_EARLY_EXIT_0:.*]], label %[[VECTOR_EARLY_EXIT_1:.*]]
 ; CHECK:       [[VECTOR_EARLY_EXIT_1]]:
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x i32> <i32 10, i32 11, i32 1, i32 2>, i64 [[FIRST_ACTIVE_LANE]]
-; CHECK-NEXT:    br label %[[LOOP_END]]
+; CHECK-NEXT:    br label %[[LOOP_END1]]
 ; CHECK:       [[VECTOR_EARLY_EXIT_0]]:
 ; CHECK-NEXT:    br label %[[UNREACHABLE_EXIT:.*]]
 ; CHECK:       [[UNREACHABLE_EXIT]]:
 ; CHECK-NEXT:    call void @llvm.trap()
 ; CHECK-NEXT:    unreachable
-; CHECK:       [[LOOP_END]]:
-; CHECK-NEXT:    [[RETVAL:%.*]] = phi i32 [ [[TMP3]], %[[VECTOR_EARLY_EXIT_1]] ], [ -1, %[[MIDDLE_BLOCK]] ]
+; CHECK:       [[LOOP_END1]]:
+; CHECK-NEXT:    [[RETVAL:%.*]] = phi i32 [ [[TMP3]], %[[VECTOR_EARLY_EXIT_1]] ], [ -1, %[[LOOP_END]] ]
 ; CHECK-NEXT:    ret i32 [[RETVAL]]
 ;
 entry:
@@ -797,20 +799,22 @@ define i32 @diamond_exit_poison_cond_second() {
 ; CHECK-NEXT:    br i1 [[TMP1]], label %[[VECTOR_EARLY_EXIT_CHECK:.*]], label %[[MIDDLE_BLOCK:.*]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
 ; CHECK-NEXT:    br label %[[LOOP_END:.*]]
+; CHECK:       [[LOOP_END]]:
+; CHECK-NEXT:    br label %[[LOOP_END1:.*]]
 ; CHECK:       [[VECTOR_EARLY_EXIT_CHECK]]:
 ; CHECK-NEXT:    [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> <i1 true, i1 false, i1 false, i1 false>, i1 false)
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x i1> zeroinitializer, i64 [[FIRST_ACTIVE_LANE]]
 ; CHECK-NEXT:    br i1 [[TMP2]], label %[[VECTOR_EARLY_EXIT_0:.*]], label %[[VECTOR_EARLY_EXIT_1:.*]]
 ; CHECK:       [[VECTOR_EARLY_EXIT_1]]:
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x i32> <i32 10, i32 11, i32 12, i32 13>, i64 [[FIRST_ACTIVE_LANE]]
-; CHECK-NEXT:    br label %[[LOOP_END]]
+; CHECK-NEXT:    br label %[[LOOP_END1]]
 ; CHECK:       [[VECTOR_EARLY_EXIT_0]]:
 ; CHECK-NEXT:    br label %[[UNREACHABLE_EXIT:.*]]
 ; CHECK:       [[UNREACHABLE_EXIT]]:
 ; CHECK-NEXT:    call void @llvm.trap()
 ; CHECK-NEXT:    unreachable
-; CHECK:       [[LOOP_END]]:
-; CHECK-NEXT:    [[RETVAL:%.*]] = phi i32 [ [[TMP3]], %[[VECTOR_EARLY_EXIT_1]] ], [ -1, %[[MIDDLE_BLOCK]] ]
+; CHECK:       [[LOOP_END1]]:
+; CHECK-NEXT:    [[RETVAL:%.*]] = phi i32 [ [[TMP3]], %[[VECTOR_EARLY_EXIT_1]] ], [ -1, %[[LOOP_END]] ]
 ; CHECK-NEXT:    ret i32 [[RETVAL]]
 ;
 entry:

diff  --git a/llvm/test/Transforms/LoopVectorize/reduction-minmax-users-and-predicated.ll b/llvm/test/Transforms/LoopVectorize/reduction-minmax-users-and-predicated.ll
index c9cc8060ff498..033d32a9b8bc7 100644
--- a/llvm/test/Transforms/LoopVectorize/reduction-minmax-users-and-predicated.ll
+++ b/llvm/test/Transforms/LoopVectorize/reduction-minmax-users-and-predicated.ll
@@ -48,57 +48,43 @@ define i32 @chained_smax(i32 %x, ptr %src) {
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; CHECK:       [[VECTOR_BODY]]:
-; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE6:.*]] ]
-; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i8> [ <i8 0, i8 1, i8 2, i8 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_LOAD_CONTINUE6]] ]
-; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP26:%.*]], %[[PRED_LOAD_CONTINUE6]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ule <4 x i8> [[VEC_IND]], splat (i8 1)
-; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[BROADCAST_SPLAT]], <4 x i32> [[VEC_PHI]])
-; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0
-; CHECK-NEXT:    br i1 [[TMP2]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[BROADCAST_SPLAT]], <4 x i32> zeroinitializer)
+; CHECK-NEXT:    br i1 true, label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
 ; CHECK:       [[PRED_LOAD_IF]]:
-; CHECK-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr [3 x i32], ptr [[SRC]], i64 [[TMP3]]
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr [3 x i32], ptr [[SRC]], i64 0
 ; CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
 ; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <4 x i32> poison, i32 [[TMP5]], i32 0
 ; CHECK-NEXT:    br label %[[PRED_LOAD_CONTINUE]]
 ; CHECK:       [[PRED_LOAD_CONTINUE]]:
 ; CHECK-NEXT:    [[TMP7:%.*]] = phi <4 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP6]], %[[PRED_LOAD_IF]] ]
-; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1
-; CHECK-NEXT:    br i1 [[TMP8]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2:.*]]
+; CHECK-NEXT:    br i1 true, label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2:.*]]
 ; CHECK:       [[PRED_LOAD_IF1]]:
-; CHECK-NEXT:    [[TMP9:%.*]] = add i64 [[INDEX]], 1
-; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr [3 x i32], ptr [[SRC]], i64 [[TMP9]]
+; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr [3 x i32], ptr [[SRC]], i64 1
 ; CHECK-NEXT:    [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4
 ; CHECK-NEXT:    [[TMP12:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[TMP11]], i32 1
 ; CHECK-NEXT:    br label %[[PRED_LOAD_CONTINUE2]]
 ; CHECK:       [[PRED_LOAD_CONTINUE2]]:
 ; CHECK-NEXT:    [[TMP13:%.*]] = phi <4 x i32> [ [[TMP7]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP12]], %[[PRED_LOAD_IF1]] ]
-; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <4 x i1> [[TMP0]], i32 2
-; CHECK-NEXT:    br i1 [[TMP14]], label %[[PRED_LOAD_IF3:.*]], label %[[PRED_LOAD_CONTINUE4:.*]]
+; CHECK-NEXT:    br i1 false, label %[[PRED_LOAD_IF3:.*]], label %[[PRED_LOAD_CONTINUE4:.*]]
 ; CHECK:       [[PRED_LOAD_IF3]]:
-; CHECK-NEXT:    [[TMP15:%.*]] = add i64 [[INDEX]], 2
-; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr [3 x i32], ptr [[SRC]], i64 [[TMP15]]
+; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr [3 x i32], ptr [[SRC]], i64 2
 ; CHECK-NEXT:    [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4
 ; CHECK-NEXT:    [[TMP18:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[TMP17]], i32 2
 ; CHECK-NEXT:    br label %[[PRED_LOAD_CONTINUE4]]
 ; CHECK:       [[PRED_LOAD_CONTINUE4]]:
 ; CHECK-NEXT:    [[TMP19:%.*]] = phi <4 x i32> [ [[TMP13]], %[[PRED_LOAD_CONTINUE2]] ], [ [[TMP18]], %[[PRED_LOAD_IF3]] ]
-; CHECK-NEXT:    [[TMP20:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3
-; CHECK-NEXT:    br i1 [[TMP20]], label %[[PRED_LOAD_IF5:.*]], label %[[PRED_LOAD_CONTINUE6]]
+; CHECK-NEXT:    br i1 false, label %[[PRED_LOAD_IF5:.*]], label %[[PRED_LOAD_CONTINUE6:.*]]
 ; CHECK:       [[PRED_LOAD_IF5]]:
-; CHECK-NEXT:    [[TMP21:%.*]] = add i64 [[INDEX]], 3
-; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr [3 x i32], ptr [[SRC]], i64 [[TMP21]]
+; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr [3 x i32], ptr [[SRC]], i64 3
 ; CHECK-NEXT:    [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
 ; CHECK-NEXT:    [[TMP24:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP23]], i32 3
 ; CHECK-NEXT:    br label %[[PRED_LOAD_CONTINUE6]]
 ; CHECK:       [[PRED_LOAD_CONTINUE6]]:
-; CHECK-NEXT:    [[TMP25:%.*]] = phi <4 x i32> [ [[TMP19]], %[[PRED_LOAD_CONTINUE4]] ], [ [[TMP24]], %[[PRED_LOAD_IF5]] ]
-; CHECK-NEXT:    [[TMP26]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[TMP25]], <4 x i32> [[TMP1]])
-; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i8> [[VEC_IND]], splat (i8 4)
-; CHECK-NEXT:    br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-NEXT:    [[TMP20:%.*]] = phi <4 x i32> [ [[TMP19]], %[[PRED_LOAD_CONTINUE4]] ], [ [[TMP24]], %[[PRED_LOAD_IF5]] ]
+; CHECK-NEXT:    [[TMP21:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[TMP20]], <4 x i32> [[TMP1]])
+; CHECK-NEXT:    br label %[[MIDDLE_BLOCK:.*]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
-; CHECK-NEXT:    [[TMP27:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP26]], <4 x i32> [[VEC_PHI]]
+; CHECK-NEXT:    [[TMP27:%.*]] = select <4 x i1> <i1 true, i1 true, i1 false, i1 false>, <4 x i32> [[TMP21]], <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP28:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP27]])
 ; CHECK-NEXT:    br label %[[EXIT:.*]]
 ; CHECK:       [[EXIT]]:

diff  --git a/llvm/test/Transforms/LoopVectorize/single-early-exit-cond-poison.ll b/llvm/test/Transforms/LoopVectorize/single-early-exit-cond-poison.ll
index 97d57a0cf83a0..3b7a0d348fbfb 100644
--- a/llvm/test/Transforms/LoopVectorize/single-early-exit-cond-poison.ll
+++ b/llvm/test/Transforms/LoopVectorize/single-early-exit-cond-poison.ll
@@ -26,6 +26,8 @@ define noundef i32 @f(i32 noundef %g) {
 ; VF4IC2-NEXT:    [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]])
 ; VF4IC2-NEXT:    br i1 [[TMP7]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]]
 ; VF4IC2:       [[MIDDLE_BLOCK]]:
+; VF4IC2-NEXT:    br label %[[MIDDLE_BLOCK1:.*]]
+; VF4IC2:       [[MIDDLE_BLOCK1]]:
 ; VF4IC2-NEXT:    [[TMP8:%.*]] = extractelement <4 x i32> [[TMP3]], i32 3
 ; VF4IC2-NEXT:    br label %[[RETURN:.*]]
 ; VF4IC2:       [[VECTOR_EARLY_EXIT]]:
@@ -38,7 +40,7 @@ define noundef i32 @f(i32 noundef %g) {
 ; VF4IC2-NEXT:    [[TMP15:%.*]] = trunc i64 [[TMP14]] to i32
 ; VF4IC2-NEXT:    br label %[[RETURN]]
 ; VF4IC2:       [[RETURN]]:
-; VF4IC2-NEXT:    [[RES:%.*]] = phi i32 [ [[TMP8]], %[[MIDDLE_BLOCK]] ], [ [[TMP15]], %[[VECTOR_EARLY_EXIT]] ]
+; VF4IC2-NEXT:    [[RES:%.*]] = phi i32 [ [[TMP8]], %[[MIDDLE_BLOCK1]] ], [ [[TMP15]], %[[VECTOR_EARLY_EXIT]] ]
 ; VF4IC2-NEXT:    ret i32 [[RES]]
 ;
 ; VF8IC1-LABEL: define noundef i32 @f(
@@ -56,6 +58,8 @@ define noundef i32 @f(i32 noundef %g) {
 ; VF8IC1-NEXT:    [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> [[TMP8]])
 ; VF8IC1-NEXT:    br i1 [[TMP3]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]]
 ; VF8IC1:       [[MIDDLE_BLOCK]]:
+; VF8IC1-NEXT:    br label %[[MIDDLE_BLOCK1:.*]]
+; VF8IC1:       [[MIDDLE_BLOCK1]]:
 ; VF8IC1-NEXT:    [[TMP4:%.*]] = extractelement <8 x i32> [[TMP1]], i32 7
 ; VF8IC1-NEXT:    br label %[[RETURN:.*]]
 ; VF8IC1:       [[VECTOR_EARLY_EXIT]]:
@@ -63,7 +67,7 @@ define noundef i32 @f(i32 noundef %g) {
 ; VF8IC1-NEXT:    [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
 ; VF8IC1-NEXT:    br label %[[RETURN]]
 ; VF8IC1:       [[RETURN]]:
-; VF8IC1-NEXT:    [[RES:%.*]] = phi i32 [ [[TMP4]], %[[MIDDLE_BLOCK]] ], [ [[TMP6]], %[[VECTOR_EARLY_EXIT]] ]
+; VF8IC1-NEXT:    [[RES:%.*]] = phi i32 [ [[TMP4]], %[[MIDDLE_BLOCK1]] ], [ [[TMP6]], %[[VECTOR_EARLY_EXIT]] ]
 ; VF8IC1-NEXT:    ret i32 [[RES]]
 ;
 entry:

diff  --git a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-early-exit.ll b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-early-exit.ll
index b93215035cebf..db4623f7138fb 100644
--- a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-early-exit.ll
+++ b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-early-exit.ll
@@ -52,11 +52,13 @@ define i8 @test_early_exit_max_tc_less_than_16(ptr dereferenceable(16) %A) nosyn
 ; VF8UF2-NEXT:    [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> [[TMP3]])
 ; VF8UF2-NEXT:    br i1 [[TMP4]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]]
 ; VF8UF2:       [[MIDDLE_BLOCK]]:
+; VF8UF2-NEXT:    br label %[[MIDDLE_SPLIT:.*]]
+; VF8UF2:       [[MIDDLE_SPLIT]]:
 ; VF8UF2-NEXT:    br label %[[EXIT:.*]]
 ; VF8UF2:       [[VECTOR_EARLY_EXIT]]:
 ; VF8UF2-NEXT:    br label %[[EXIT]]
 ; VF8UF2:       [[EXIT]]:
-; VF8UF2-NEXT:    [[RES:%.*]] = phi i8 [ 0, %[[VECTOR_EARLY_EXIT]] ], [ 1, %[[MIDDLE_BLOCK]] ]
+; VF8UF2-NEXT:    [[RES:%.*]] = phi i8 [ 0, %[[VECTOR_EARLY_EXIT]] ], [ 1, %[[MIDDLE_SPLIT]] ]
 ; VF8UF2-NEXT:    ret i8 [[RES]]
 ;
 ; VF16UF1-LABEL: define i8 @test_early_exit_max_tc_less_than_16(
@@ -72,11 +74,13 @@ define i8 @test_early_exit_max_tc_less_than_16(ptr dereferenceable(16) %A) nosyn
 ; VF16UF1-NEXT:    [[TMP2:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[TMP1]])
 ; VF16UF1-NEXT:    br i1 [[TMP2]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]]
 ; VF16UF1:       [[MIDDLE_BLOCK]]:
+; VF16UF1-NEXT:    br label %[[MIDDLE_SPLIT:.*]]
+; VF16UF1:       [[MIDDLE_SPLIT]]:
 ; VF16UF1-NEXT:    br label %[[EXIT:.*]]
 ; VF16UF1:       [[VECTOR_EARLY_EXIT]]:
 ; VF16UF1-NEXT:    br label %[[EXIT]]
 ; VF16UF1:       [[EXIT]]:
-; VF16UF1-NEXT:    [[RES:%.*]] = phi i8 [ 0, %[[VECTOR_EARLY_EXIT]] ], [ 1, %[[MIDDLE_BLOCK]] ]
+; VF16UF1-NEXT:    [[RES:%.*]] = phi i8 [ 0, %[[VECTOR_EARLY_EXIT]] ], [ 1, %[[MIDDLE_SPLIT]] ]
 ; VF16UF1-NEXT:    ret i8 [[RES]]
 ;
 entry:
@@ -146,6 +150,8 @@ define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(ptr derefer
 ; VF8UF2-NEXT:    [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> [[TMP3]])
 ; VF8UF2-NEXT:    br i1 [[TMP4]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]]
 ; VF8UF2:       [[MIDDLE_BLOCK]]:
+; VF8UF2-NEXT:    br label %[[MIDDLE_SPLIT:.*]]
+; VF8UF2:       [[MIDDLE_SPLIT]]:
 ; VF8UF2-NEXT:    br label %[[EXIT:.*]]
 ; VF8UF2:       [[VECTOR_EARLY_EXIT]]:
 ; VF8UF2-NEXT:    [[TMP5:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v8i1(<8 x i1> [[TMP2]], i1 false)
@@ -156,7 +162,7 @@ define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(ptr derefer
 ; VF8UF2-NEXT:    [[TMP11:%.*]] = select i1 [[TMP10]], i64 [[TMP9]], i64 [[TMP7]]
 ; VF8UF2-NEXT:    br label %[[EXIT]]
 ; VF8UF2:       [[EXIT]]:
-; VF8UF2-NEXT:    [[RES:%.*]] = phi i64 [ [[TMP11]], %[[VECTOR_EARLY_EXIT]] ], [ 1, %[[MIDDLE_BLOCK]] ]
+; VF8UF2-NEXT:    [[RES:%.*]] = phi i64 [ [[TMP11]], %[[VECTOR_EARLY_EXIT]] ], [ 1, %[[MIDDLE_SPLIT]] ]
 ; VF8UF2-NEXT:    ret i64 [[RES]]
 ;
 ; VF16UF1-LABEL: define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(
@@ -172,12 +178,14 @@ define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(ptr derefer
 ; VF16UF1-NEXT:    [[TMP2:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[TMP1]])
 ; VF16UF1-NEXT:    br i1 [[TMP2]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]]
 ; VF16UF1:       [[MIDDLE_BLOCK]]:
+; VF16UF1-NEXT:    br label %[[MIDDLE_SPLIT:.*]]
+; VF16UF1:       [[MIDDLE_SPLIT]]:
 ; VF16UF1-NEXT:    br label %[[EXIT:.*]]
 ; VF16UF1:       [[VECTOR_EARLY_EXIT]]:
 ; VF16UF1-NEXT:    [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v16i1(<16 x i1> [[TMP3]], i1 false)
 ; VF16UF1-NEXT:    br label %[[EXIT]]
 ; VF16UF1:       [[EXIT]]:
-; VF16UF1-NEXT:    [[RES:%.*]] = phi i64 [ [[FIRST_ACTIVE_LANE]], %[[VECTOR_EARLY_EXIT]] ], [ 1, %[[MIDDLE_BLOCK]] ]
+; VF16UF1-NEXT:    [[RES:%.*]] = phi i64 [ [[FIRST_ACTIVE_LANE]], %[[VECTOR_EARLY_EXIT]] ], [ 1, %[[MIDDLE_SPLIT]] ]
 ; VF16UF1-NEXT:    ret i64 [[RES]]
 ;
 entry:
@@ -258,12 +266,14 @@ define i8 @test_early_exit_max_vector_tc_eq_16(ptr dereferenceable(17) %A) nosyn
 ; VF8UF2-NEXT:    br i1 [[TMP5]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]]
 ; VF8UF2:       [[MIDDLE_BLOCK]]:
 ; VF8UF2-NEXT:    br label %[[SCALAR_PH:.*]]
+; VF8UF2:       [[SCALAR_PH]]:
+; VF8UF2-NEXT:    br label %[[SCALAR_PH_SPLIT:.*]]
 ; VF8UF2:       [[VECTOR_EARLY_EXIT]]:
 ; VF8UF2-NEXT:    br label %[[EXIT:.*]]
-; VF8UF2:       [[SCALAR_PH]]:
+; VF8UF2:       [[SCALAR_PH_SPLIT]]:
 ; VF8UF2-NEXT:    br label %[[LOOP_HEADER:.*]]
 ; VF8UF2:       [[LOOP_HEADER]]:
-; VF8UF2-NEXT:    [[IV:%.*]] = phi i64 [ 16, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; VF8UF2-NEXT:    [[IV:%.*]] = phi i64 [ 16, %[[SCALAR_PH_SPLIT]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
 ; VF8UF2-NEXT:    [[P_SRC:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV]]
 ; VF8UF2-NEXT:    [[L:%.*]] = load i8, ptr [[P_SRC]], align 1
 ; VF8UF2-NEXT:    [[C:%.*]] = icmp eq i8 [[L]], 0
@@ -290,12 +300,14 @@ define i8 @test_early_exit_max_vector_tc_eq_16(ptr dereferenceable(17) %A) nosyn
 ; VF16UF1-NEXT:    br i1 [[TMP2]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]]
 ; VF16UF1:       [[MIDDLE_BLOCK]]:
 ; VF16UF1-NEXT:    br label %[[SCALAR_PH:.*]]
+; VF16UF1:       [[SCALAR_PH]]:
+; VF16UF1-NEXT:    br label %[[SCALAR_PH_SPLIT:.*]]
 ; VF16UF1:       [[VECTOR_EARLY_EXIT]]:
 ; VF16UF1-NEXT:    br label %[[EXIT:.*]]
-; VF16UF1:       [[SCALAR_PH]]:
+; VF16UF1:       [[SCALAR_PH_SPLIT]]:
 ; VF16UF1-NEXT:    br label %[[LOOP_HEADER:.*]]
 ; VF16UF1:       [[LOOP_HEADER]]:
-; VF16UF1-NEXT:    [[IV:%.*]] = phi i64 [ 16, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; VF16UF1-NEXT:    [[IV:%.*]] = phi i64 [ 16, %[[SCALAR_PH_SPLIT]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
 ; VF16UF1-NEXT:    [[P_SRC:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV]]
 ; VF16UF1-NEXT:    [[L:%.*]] = load i8, ptr [[P_SRC]], align 1
 ; VF16UF1-NEXT:    [[C:%.*]] = icmp eq i8 [[L]], 0
@@ -368,28 +380,22 @@ define i1 @test_early_exit_max_tc_less_than_16_non_canonical_iv(ptr dereferencea
 ; VF8UF2:       [[VECTOR_PH]]:
 ; VF8UF2-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; VF8UF2:       [[VECTOR_BODY]]:
-; VF8UF2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY_INTERIM:.*]] ]
-; VF8UF2-NEXT:    [[VEC_IND:%.*]] = phi <8 x i64> [ <i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY_INTERIM]] ]
-; VF8UF2-NEXT:    [[STEP_ADD:%.*]] = add <8 x i64> [[VEC_IND]], splat (i64 8)
-; VF8UF2-NEXT:    [[OFFSET_IDX:%.*]] = add i64 2, [[INDEX]]
-; VF8UF2-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[OFFSET_IDX]]
+; VF8UF2-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 2
 ; VF8UF2-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 8
 ; VF8UF2-NEXT:    [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP0]], align 1
 ; VF8UF2-NEXT:    [[WIDE_LOAD1:%.*]] = load <8 x i8>, ptr [[TMP1]], align 1
 ; VF8UF2-NEXT:    [[TMP4:%.*]] = icmp eq <8 x i8> [[WIDE_LOAD]], zeroinitializer
 ; VF8UF2-NEXT:    [[TMP5:%.*]] = icmp eq <8 x i8> [[WIDE_LOAD1]], zeroinitializer
-; VF8UF2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
 ; VF8UF2-NEXT:    [[TMP6:%.*]] = freeze <8 x i1> [[TMP4]]
 ; VF8UF2-NEXT:    [[TMP7:%.*]] = freeze <8 x i1> [[TMP5]]
 ; VF8UF2-NEXT:    [[TMP8:%.*]] = or <8 x i1> [[TMP6]], [[TMP7]]
 ; VF8UF2-NEXT:    [[TMP9:%.*]] = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> [[TMP8]])
-; VF8UF2-NEXT:    [[VEC_IND_NEXT]] = add nsw <8 x i64> [[STEP_ADD]], splat (i64 8)
-; VF8UF2-NEXT:    br i1 [[TMP9]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[VECTOR_BODY_INTERIM]]
+; VF8UF2-NEXT:    br i1 [[TMP9]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[VECTOR_BODY_INTERIM:.*]]
 ; VF8UF2:       [[VECTOR_BODY_INTERIM]]:
-; VF8UF2-NEXT:    br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
+; VF8UF2-NEXT:    br label %[[MIDDLE_BLOCK:.*]]
 ; VF8UF2:       [[MIDDLE_BLOCK]]:
 ; VF8UF2-NEXT:    [[TMP10:%.*]] = zext <8 x i8> [[WIDE_LOAD1]] to <8 x i64>
-; VF8UF2-NEXT:    [[TMP3:%.*]] = icmp eq <8 x i64> [[TMP10]], [[STEP_ADD]]
+; VF8UF2-NEXT:    [[TMP3:%.*]] = icmp eq <8 x i64> [[TMP10]], <i64 10, i64 11, i64 12, i64 13, i64 14, i64 15, i64 16, i64 17>
 ; VF8UF2-NEXT:    [[TMP12:%.*]] = extractelement <8 x i1> [[TMP3]], i32 7
 ; VF8UF2-NEXT:    br label %[[EXIT:.*]]
 ; VF8UF2:       [[VECTOR_EARLY_EXIT]]:
@@ -405,22 +411,17 @@ define i1 @test_early_exit_max_tc_less_than_16_non_canonical_iv(ptr dereferencea
 ; VF16UF1:       [[VECTOR_PH]]:
 ; VF16UF1-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; VF16UF1:       [[VECTOR_BODY]]:
-; VF16UF1-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY_INTERIM:.*]] ]
-; VF16UF1-NEXT:    [[VEC_IND:%.*]] = phi <16 x i64> [ <i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15, i64 16, i64 17>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY_INTERIM]] ]
-; VF16UF1-NEXT:    [[OFFSET_IDX:%.*]] = add i64 2, [[INDEX]]
-; VF16UF1-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[OFFSET_IDX]]
+; VF16UF1-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 2
 ; VF16UF1-NEXT:    [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP0]], align 1
 ; VF16UF1-NEXT:    [[TMP3:%.*]] = icmp eq <16 x i8> [[WIDE_LOAD]], zeroinitializer
-; VF16UF1-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
 ; VF16UF1-NEXT:    [[TMP4:%.*]] = freeze <16 x i1> [[TMP3]]
 ; VF16UF1-NEXT:    [[TMP5:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[TMP4]])
-; VF16UF1-NEXT:    [[VEC_IND_NEXT]] = add nsw <16 x i64> [[VEC_IND]], splat (i64 16)
-; VF16UF1-NEXT:    br i1 [[TMP5]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[VECTOR_BODY_INTERIM]]
+; VF16UF1-NEXT:    br i1 [[TMP5]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[VECTOR_BODY_INTERIM:.*]]
 ; VF16UF1:       [[VECTOR_BODY_INTERIM]]:
-; VF16UF1-NEXT:    br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
+; VF16UF1-NEXT:    br label %[[MIDDLE_BLOCK:.*]]
 ; VF16UF1:       [[MIDDLE_BLOCK]]:
 ; VF16UF1-NEXT:    [[TMP6:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i64>
-; VF16UF1-NEXT:    [[TMP2:%.*]] = icmp eq <16 x i64> [[TMP6]], [[VEC_IND]]
+; VF16UF1-NEXT:    [[TMP2:%.*]] = icmp eq <16 x i64> [[TMP6]], <i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15, i64 16, i64 17>
 ; VF16UF1-NEXT:    [[TMP8:%.*]] = extractelement <16 x i1> [[TMP2]], i32 15
 ; VF16UF1-NEXT:    br label %[[EXIT:.*]]
 ; VF16UF1:       [[VECTOR_EARLY_EXIT]]:

diff  --git a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll
index 5da6fc3179043..02846aba50f72 100644
--- a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll
+++ b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll
@@ -1211,6 +1211,272 @@ loop:
 exit:
   ret void
 }
+
+; Test that a first-order recurrence with a single vector iteration (where the
+; vector loop backedge is removed) does not crash.
+define void @first_order_recurrence_single_vector_iteration(ptr noalias %pkt, ptr noalias %dst) {
+; VF8UF1-LABEL: define void @first_order_recurrence_single_vector_iteration(
+; VF8UF1-SAME: ptr noalias [[PKT:%.*]], ptr noalias [[DST:%.*]]) {
+; VF8UF1-NEXT:  [[ENTRY:.*:]]
+; VF8UF1-NEXT:    br label %[[VECTOR_PH:.*]]
+; VF8UF1:       [[VECTOR_PH]]:
+; VF8UF1-NEXT:    br label %[[VECTOR_BODY:.*]]
+; VF8UF1:       [[VECTOR_BODY]]:
+; VF8UF1-NEXT:    [[TMP0:%.*]] = load i8, ptr [[PKT]], align 1
+; VF8UF1-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i8> poison, i8 [[TMP0]], i64 0
+; VF8UF1-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i8> [[BROADCAST_SPLATINSERT]], <8 x i8> poison, <8 x i32> zeroinitializer
+; VF8UF1-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i8> zeroinitializer, <8 x i8> [[BROADCAST_SPLAT]], <8 x i32> <i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
+; VF8UF1-NEXT:    [[TMP2:%.*]] = extractelement <8 x i8> [[TMP1]], i32 7
+; VF8UF1-NEXT:    store i8 [[TMP2]], ptr [[DST]], align 1
+; VF8UF1-NEXT:    br label %[[MIDDLE_BLOCK:.*]]
+; VF8UF1:       [[MIDDLE_BLOCK]]:
+; VF8UF1-NEXT:    br label %[[EXIT:.*]]
+; VF8UF1:       [[EXIT]]:
+; VF8UF1-NEXT:    ret void
+;
+; VF8UF2-LABEL: define void @first_order_recurrence_single_vector_iteration(
+; VF8UF2-SAME: ptr noalias [[PKT:%.*]], ptr noalias [[DST:%.*]]) {
+; VF8UF2-NEXT:  [[ENTRY:.*:]]
+; VF8UF2-NEXT:    br label %[[VECTOR_PH:.*]]
+; VF8UF2:       [[VECTOR_PH]]:
+; VF8UF2-NEXT:    br label %[[VECTOR_BODY:.*]]
+; VF8UF2:       [[VECTOR_BODY]]:
+; VF8UF2-NEXT:    [[TMP0:%.*]] = load i8, ptr [[PKT]], align 1
+; VF8UF2-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i8> poison, i8 [[TMP0]], i64 0
+; VF8UF2-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i8> [[BROADCAST_SPLATINSERT]], <8 x i8> poison, <8 x i32> zeroinitializer
+; VF8UF2-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i8> zeroinitializer, <8 x i8> [[BROADCAST_SPLAT]], <8 x i32> <i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
+; VF8UF2-NEXT:    [[TMP2:%.*]] = shufflevector <8 x i8> [[BROADCAST_SPLAT]], <8 x i8> [[BROADCAST_SPLAT]], <8 x i32> <i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
+; VF8UF2-NEXT:    br i1 true, label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
+; VF8UF2:       [[PRED_STORE_IF]]:
+; VF8UF2-NEXT:    [[TMP3:%.*]] = extractelement <8 x i8> [[TMP1]], i32 0
+; VF8UF2-NEXT:    store i8 [[TMP3]], ptr [[DST]], align 1
+; VF8UF2-NEXT:    br label %[[PRED_STORE_CONTINUE]]
+; VF8UF2:       [[PRED_STORE_CONTINUE]]:
+; VF8UF2-NEXT:    br i1 true, label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]]
+; VF8UF2:       [[PRED_STORE_IF1]]:
+; VF8UF2-NEXT:    [[TMP4:%.*]] = extractelement <8 x i8> [[TMP1]], i32 1
+; VF8UF2-NEXT:    store i8 [[TMP4]], ptr [[DST]], align 1
+; VF8UF2-NEXT:    br label %[[PRED_STORE_CONTINUE2]]
+; VF8UF2:       [[PRED_STORE_CONTINUE2]]:
+; VF8UF2-NEXT:    br i1 true, label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]]
+; VF8UF2:       [[PRED_STORE_IF3]]:
+; VF8UF2-NEXT:    [[TMP5:%.*]] = extractelement <8 x i8> [[TMP1]], i32 2
+; VF8UF2-NEXT:    store i8 [[TMP5]], ptr [[DST]], align 1
+; VF8UF2-NEXT:    br label %[[PRED_STORE_CONTINUE4]]
+; VF8UF2:       [[PRED_STORE_CONTINUE4]]:
+; VF8UF2-NEXT:    br i1 true, label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6:.*]]
+; VF8UF2:       [[PRED_STORE_IF5]]:
+; VF8UF2-NEXT:    [[TMP6:%.*]] = extractelement <8 x i8> [[TMP1]], i32 3
+; VF8UF2-NEXT:    store i8 [[TMP6]], ptr [[DST]], align 1
+; VF8UF2-NEXT:    br label %[[PRED_STORE_CONTINUE6]]
+; VF8UF2:       [[PRED_STORE_CONTINUE6]]:
+; VF8UF2-NEXT:    br i1 true, label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]]
+; VF8UF2:       [[PRED_STORE_IF7]]:
+; VF8UF2-NEXT:    [[TMP7:%.*]] = extractelement <8 x i8> [[TMP1]], i32 4
+; VF8UF2-NEXT:    store i8 [[TMP7]], ptr [[DST]], align 1
+; VF8UF2-NEXT:    br label %[[PRED_STORE_CONTINUE8]]
+; VF8UF2:       [[PRED_STORE_CONTINUE8]]:
+; VF8UF2-NEXT:    br i1 true, label %[[PRED_STORE_IF9:.*]], label %[[PRED_STORE_CONTINUE10:.*]]
+; VF8UF2:       [[PRED_STORE_IF9]]:
+; VF8UF2-NEXT:    [[TMP8:%.*]] = extractelement <8 x i8> [[TMP1]], i32 5
+; VF8UF2-NEXT:    store i8 [[TMP8]], ptr [[DST]], align 1
+; VF8UF2-NEXT:    br label %[[PRED_STORE_CONTINUE10]]
+; VF8UF2:       [[PRED_STORE_CONTINUE10]]:
+; VF8UF2-NEXT:    br i1 true, label %[[PRED_STORE_IF11:.*]], label %[[PRED_STORE_CONTINUE12:.*]]
+; VF8UF2:       [[PRED_STORE_IF11]]:
+; VF8UF2-NEXT:    [[TMP9:%.*]] = extractelement <8 x i8> [[TMP1]], i32 6
+; VF8UF2-NEXT:    store i8 [[TMP9]], ptr [[DST]], align 1
+; VF8UF2-NEXT:    br label %[[PRED_STORE_CONTINUE12]]
+; VF8UF2:       [[PRED_STORE_CONTINUE12]]:
+; VF8UF2-NEXT:    br i1 true, label %[[PRED_STORE_IF13:.*]], label %[[PRED_STORE_CONTINUE14:.*]]
+; VF8UF2:       [[PRED_STORE_IF13]]:
+; VF8UF2-NEXT:    [[TMP10:%.*]] = extractelement <8 x i8> [[TMP1]], i32 7
+; VF8UF2-NEXT:    store i8 [[TMP10]], ptr [[DST]], align 1
+; VF8UF2-NEXT:    br label %[[PRED_STORE_CONTINUE14]]
+; VF8UF2:       [[PRED_STORE_CONTINUE14]]:
+; VF8UF2-NEXT:    br i1 false, label %[[PRED_STORE_IF15:.*]], label %[[PRED_STORE_CONTINUE16:.*]]
+; VF8UF2:       [[PRED_STORE_IF15]]:
+; VF8UF2-NEXT:    [[TMP11:%.*]] = extractelement <8 x i8> [[TMP2]], i32 0
+; VF8UF2-NEXT:    store i8 [[TMP11]], ptr [[DST]], align 1
+; VF8UF2-NEXT:    br label %[[PRED_STORE_CONTINUE16]]
+; VF8UF2:       [[PRED_STORE_CONTINUE16]]:
+; VF8UF2-NEXT:    br i1 false, label %[[PRED_STORE_IF17:.*]], label %[[PRED_STORE_CONTINUE18:.*]]
+; VF8UF2:       [[PRED_STORE_IF17]]:
+; VF8UF2-NEXT:    [[TMP12:%.*]] = extractelement <8 x i8> [[TMP2]], i32 1
+; VF8UF2-NEXT:    store i8 [[TMP12]], ptr [[DST]], align 1
+; VF8UF2-NEXT:    br label %[[PRED_STORE_CONTINUE18]]
+; VF8UF2:       [[PRED_STORE_CONTINUE18]]:
+; VF8UF2-NEXT:    br i1 false, label %[[PRED_STORE_IF19:.*]], label %[[PRED_STORE_CONTINUE20:.*]]
+; VF8UF2:       [[PRED_STORE_IF19]]:
+; VF8UF2-NEXT:    [[TMP13:%.*]] = extractelement <8 x i8> [[TMP2]], i32 2
+; VF8UF2-NEXT:    store i8 [[TMP13]], ptr [[DST]], align 1
+; VF8UF2-NEXT:    br label %[[PRED_STORE_CONTINUE20]]
+; VF8UF2:       [[PRED_STORE_CONTINUE20]]:
+; VF8UF2-NEXT:    br i1 false, label %[[PRED_STORE_IF21:.*]], label %[[PRED_STORE_CONTINUE22:.*]]
+; VF8UF2:       [[PRED_STORE_IF21]]:
+; VF8UF2-NEXT:    [[TMP14:%.*]] = extractelement <8 x i8> [[TMP2]], i32 3
+; VF8UF2-NEXT:    store i8 [[TMP14]], ptr [[DST]], align 1
+; VF8UF2-NEXT:    br label %[[PRED_STORE_CONTINUE22]]
+; VF8UF2:       [[PRED_STORE_CONTINUE22]]:
+; VF8UF2-NEXT:    br i1 false, label %[[PRED_STORE_IF23:.*]], label %[[PRED_STORE_CONTINUE24:.*]]
+; VF8UF2:       [[PRED_STORE_IF23]]:
+; VF8UF2-NEXT:    [[TMP15:%.*]] = extractelement <8 x i8> [[TMP2]], i32 4
+; VF8UF2-NEXT:    store i8 [[TMP15]], ptr [[DST]], align 1
+; VF8UF2-NEXT:    br label %[[PRED_STORE_CONTINUE24]]
+; VF8UF2:       [[PRED_STORE_CONTINUE24]]:
+; VF8UF2-NEXT:    br i1 false, label %[[PRED_STORE_IF25:.*]], label %[[PRED_STORE_CONTINUE26:.*]]
+; VF8UF2:       [[PRED_STORE_IF25]]:
+; VF8UF2-NEXT:    [[TMP16:%.*]] = extractelement <8 x i8> [[TMP2]], i32 5
+; VF8UF2-NEXT:    store i8 [[TMP16]], ptr [[DST]], align 1
+; VF8UF2-NEXT:    br label %[[PRED_STORE_CONTINUE26]]
+; VF8UF2:       [[PRED_STORE_CONTINUE26]]:
+; VF8UF2-NEXT:    br i1 false, label %[[PRED_STORE_IF27:.*]], label %[[PRED_STORE_CONTINUE28:.*]]
+; VF8UF2:       [[PRED_STORE_IF27]]:
+; VF8UF2-NEXT:    [[TMP17:%.*]] = extractelement <8 x i8> [[TMP2]], i32 6
+; VF8UF2-NEXT:    store i8 [[TMP17]], ptr [[DST]], align 1
+; VF8UF2-NEXT:    br label %[[PRED_STORE_CONTINUE28]]
+; VF8UF2:       [[PRED_STORE_CONTINUE28]]:
+; VF8UF2-NEXT:    br i1 false, label %[[PRED_STORE_IF29:.*]], label %[[PRED_STORE_CONTINUE30:.*]]
+; VF8UF2:       [[PRED_STORE_IF29]]:
+; VF8UF2-NEXT:    [[TMP18:%.*]] = extractelement <8 x i8> [[TMP2]], i32 7
+; VF8UF2-NEXT:    store i8 [[TMP18]], ptr [[DST]], align 1
+; VF8UF2-NEXT:    br label %[[PRED_STORE_CONTINUE30]]
+; VF8UF2:       [[PRED_STORE_CONTINUE30]]:
+; VF8UF2-NEXT:    br label %[[MIDDLE_BLOCK:.*]]
+; VF8UF2:       [[MIDDLE_BLOCK]]:
+; VF8UF2-NEXT:    br label %[[EXIT:.*]]
+; VF8UF2:       [[EXIT]]:
+; VF8UF2-NEXT:    ret void
+;
+; VF16UF1-LABEL: define void @first_order_recurrence_single_vector_iteration(
+; VF16UF1-SAME: ptr noalias [[PKT:%.*]], ptr noalias [[DST:%.*]]) {
+; VF16UF1-NEXT:  [[ENTRY:.*:]]
+; VF16UF1-NEXT:    br label %[[VECTOR_PH:.*]]
+; VF16UF1:       [[VECTOR_PH]]:
+; VF16UF1-NEXT:    br label %[[VECTOR_BODY:.*]]
+; VF16UF1:       [[VECTOR_BODY]]:
+; VF16UF1-NEXT:    [[TMP0:%.*]] = load i8, ptr [[PKT]], align 1
+; VF16UF1-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[TMP0]], i64 0
+; VF16UF1-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i8> [[BROADCAST_SPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
+; VF16UF1-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i8> zeroinitializer, <16 x i8> [[BROADCAST_SPLAT]], <16 x i32> <i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
+; VF16UF1-NEXT:    br i1 true, label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
+; VF16UF1:       [[PRED_STORE_IF]]:
+; VF16UF1-NEXT:    [[TMP2:%.*]] = extractelement <16 x i8> [[TMP1]], i32 0
+; VF16UF1-NEXT:    store i8 [[TMP2]], ptr [[DST]], align 1
+; VF16UF1-NEXT:    br label %[[PRED_STORE_CONTINUE]]
+; VF16UF1:       [[PRED_STORE_CONTINUE]]:
+; VF16UF1-NEXT:    br i1 true, label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]]
+; VF16UF1:       [[PRED_STORE_IF1]]:
+; VF16UF1-NEXT:    [[TMP3:%.*]] = extractelement <16 x i8> [[TMP1]], i32 1
+; VF16UF1-NEXT:    store i8 [[TMP3]], ptr [[DST]], align 1
+; VF16UF1-NEXT:    br label %[[PRED_STORE_CONTINUE2]]
+; VF16UF1:       [[PRED_STORE_CONTINUE2]]:
+; VF16UF1-NEXT:    br i1 true, label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]]
+; VF16UF1:       [[PRED_STORE_IF3]]:
+; VF16UF1-NEXT:    [[TMP4:%.*]] = extractelement <16 x i8> [[TMP1]], i32 2
+; VF16UF1-NEXT:    store i8 [[TMP4]], ptr [[DST]], align 1
+; VF16UF1-NEXT:    br label %[[PRED_STORE_CONTINUE4]]
+; VF16UF1:       [[PRED_STORE_CONTINUE4]]:
+; VF16UF1-NEXT:    br i1 true, label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6:.*]]
+; VF16UF1:       [[PRED_STORE_IF5]]:
+; VF16UF1-NEXT:    [[TMP5:%.*]] = extractelement <16 x i8> [[TMP1]], i32 3
+; VF16UF1-NEXT:    store i8 [[TMP5]], ptr [[DST]], align 1
+; VF16UF1-NEXT:    br label %[[PRED_STORE_CONTINUE6]]
+; VF16UF1:       [[PRED_STORE_CONTINUE6]]:
+; VF16UF1-NEXT:    br i1 true, label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]]
+; VF16UF1:       [[PRED_STORE_IF7]]:
+; VF16UF1-NEXT:    [[TMP6:%.*]] = extractelement <16 x i8> [[TMP1]], i32 4
+; VF16UF1-NEXT:    store i8 [[TMP6]], ptr [[DST]], align 1
+; VF16UF1-NEXT:    br label %[[PRED_STORE_CONTINUE8]]
+; VF16UF1:       [[PRED_STORE_CONTINUE8]]:
+; VF16UF1-NEXT:    br i1 true, label %[[PRED_STORE_IF9:.*]], label %[[PRED_STORE_CONTINUE10:.*]]
+; VF16UF1:       [[PRED_STORE_IF9]]:
+; VF16UF1-NEXT:    [[TMP7:%.*]] = extractelement <16 x i8> [[TMP1]], i32 5
+; VF16UF1-NEXT:    store i8 [[TMP7]], ptr [[DST]], align 1
+; VF16UF1-NEXT:    br label %[[PRED_STORE_CONTINUE10]]
+; VF16UF1:       [[PRED_STORE_CONTINUE10]]:
+; VF16UF1-NEXT:    br i1 true, label %[[PRED_STORE_IF11:.*]], label %[[PRED_STORE_CONTINUE12:.*]]
+; VF16UF1:       [[PRED_STORE_IF11]]:
+; VF16UF1-NEXT:    [[TMP8:%.*]] = extractelement <16 x i8> [[TMP1]], i32 6
+; VF16UF1-NEXT:    store i8 [[TMP8]], ptr [[DST]], align 1
+; VF16UF1-NEXT:    br label %[[PRED_STORE_CONTINUE12]]
+; VF16UF1:       [[PRED_STORE_CONTINUE12]]:
+; VF16UF1-NEXT:    br i1 true, label %[[PRED_STORE_IF13:.*]], label %[[PRED_STORE_CONTINUE14:.*]]
+; VF16UF1:       [[PRED_STORE_IF13]]:
+; VF16UF1-NEXT:    [[TMP9:%.*]] = extractelement <16 x i8> [[TMP1]], i32 7
+; VF16UF1-NEXT:    store i8 [[TMP9]], ptr [[DST]], align 1
+; VF16UF1-NEXT:    br label %[[PRED_STORE_CONTINUE14]]
+; VF16UF1:       [[PRED_STORE_CONTINUE14]]:
+; VF16UF1-NEXT:    br i1 false, label %[[PRED_STORE_IF15:.*]], label %[[PRED_STORE_CONTINUE16:.*]]
+; VF16UF1:       [[PRED_STORE_IF15]]:
+; VF16UF1-NEXT:    [[TMP10:%.*]] = extractelement <16 x i8> [[TMP1]], i32 8
+; VF16UF1-NEXT:    store i8 [[TMP10]], ptr [[DST]], align 1
+; VF16UF1-NEXT:    br label %[[PRED_STORE_CONTINUE16]]
+; VF16UF1:       [[PRED_STORE_CONTINUE16]]:
+; VF16UF1-NEXT:    br i1 false, label %[[PRED_STORE_IF17:.*]], label %[[PRED_STORE_CONTINUE18:.*]]
+; VF16UF1:       [[PRED_STORE_IF17]]:
+; VF16UF1-NEXT:    [[TMP11:%.*]] = extractelement <16 x i8> [[TMP1]], i32 9
+; VF16UF1-NEXT:    store i8 [[TMP11]], ptr [[DST]], align 1
+; VF16UF1-NEXT:    br label %[[PRED_STORE_CONTINUE18]]
+; VF16UF1:       [[PRED_STORE_CONTINUE18]]:
+; VF16UF1-NEXT:    br i1 false, label %[[PRED_STORE_IF19:.*]], label %[[PRED_STORE_CONTINUE20:.*]]
+; VF16UF1:       [[PRED_STORE_IF19]]:
+; VF16UF1-NEXT:    [[TMP12:%.*]] = extractelement <16 x i8> [[TMP1]], i32 10
+; VF16UF1-NEXT:    store i8 [[TMP12]], ptr [[DST]], align 1
+; VF16UF1-NEXT:    br label %[[PRED_STORE_CONTINUE20]]
+; VF16UF1:       [[PRED_STORE_CONTINUE20]]:
+; VF16UF1-NEXT:    br i1 false, label %[[PRED_STORE_IF21:.*]], label %[[PRED_STORE_CONTINUE22:.*]]
+; VF16UF1:       [[PRED_STORE_IF21]]:
+; VF16UF1-NEXT:    [[TMP13:%.*]] = extractelement <16 x i8> [[TMP1]], i32 11
+; VF16UF1-NEXT:    store i8 [[TMP13]], ptr [[DST]], align 1
+; VF16UF1-NEXT:    br label %[[PRED_STORE_CONTINUE22]]
+; VF16UF1:       [[PRED_STORE_CONTINUE22]]:
+; VF16UF1-NEXT:    br i1 false, label %[[PRED_STORE_IF23:.*]], label %[[PRED_STORE_CONTINUE24:.*]]
+; VF16UF1:       [[PRED_STORE_IF23]]:
+; VF16UF1-NEXT:    [[TMP14:%.*]] = extractelement <16 x i8> [[TMP1]], i32 12
+; VF16UF1-NEXT:    store i8 [[TMP14]], ptr [[DST]], align 1
+; VF16UF1-NEXT:    br label %[[PRED_STORE_CONTINUE24]]
+; VF16UF1:       [[PRED_STORE_CONTINUE24]]:
+; VF16UF1-NEXT:    br i1 false, label %[[PRED_STORE_IF25:.*]], label %[[PRED_STORE_CONTINUE26:.*]]
+; VF16UF1:       [[PRED_STORE_IF25]]:
+; VF16UF1-NEXT:    [[TMP15:%.*]] = extractelement <16 x i8> [[TMP1]], i32 13
+; VF16UF1-NEXT:    store i8 [[TMP15]], ptr [[DST]], align 1
+; VF16UF1-NEXT:    br label %[[PRED_STORE_CONTINUE26]]
+; VF16UF1:       [[PRED_STORE_CONTINUE26]]:
+; VF16UF1-NEXT:    br i1 false, label %[[PRED_STORE_IF27:.*]], label %[[PRED_STORE_CONTINUE28:.*]]
+; VF16UF1:       [[PRED_STORE_IF27]]:
+; VF16UF1-NEXT:    [[TMP16:%.*]] = extractelement <16 x i8> [[TMP1]], i32 14
+; VF16UF1-NEXT:    store i8 [[TMP16]], ptr [[DST]], align 1
+; VF16UF1-NEXT:    br label %[[PRED_STORE_CONTINUE28]]
+; VF16UF1:       [[PRED_STORE_CONTINUE28]]:
+; VF16UF1-NEXT:    br i1 false, label %[[PRED_STORE_IF29:.*]], label %[[PRED_STORE_CONTINUE30:.*]]
+; VF16UF1:       [[PRED_STORE_IF29]]:
+; VF16UF1-NEXT:    [[TMP17:%.*]] = extractelement <16 x i8> [[TMP1]], i32 15
+; VF16UF1-NEXT:    store i8 [[TMP17]], ptr [[DST]], align 1
+; VF16UF1-NEXT:    br label %[[PRED_STORE_CONTINUE30]]
+; VF16UF1:       [[PRED_STORE_CONTINUE30]]:
+; VF16UF1-NEXT:    br label %[[MIDDLE_BLOCK:.*]]
+; VF16UF1:       [[MIDDLE_BLOCK]]:
+; VF16UF1-NEXT:    br label %[[EXIT:.*]]
+; VF16UF1:       [[EXIT]]:
+; VF16UF1-NEXT:    ret void
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+  %recur = phi i8 [ 0, %entry ], [ %load, %loop ]
+  %load = load i8, ptr %pkt, align 1
+  store i8 %recur, ptr %dst, align 1
+  %iv.next = add i64 %iv, 1
+  %cmp = icmp eq i64 %iv, 7
+  br i1 %cmp, label %exit, label %loop
+
+exit:
+  ret void
+}
+;. !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
 ;.
 ; VF8UF1: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
 ; VF8UF1: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}

diff  --git a/llvm/test/Transforms/PhaseOrdering/ARM/arm_var_q31.ll b/llvm/test/Transforms/PhaseOrdering/ARM/arm_var_q31.ll
index b7b57176a7a2f..7ca64bc228eb9 100644
--- a/llvm/test/Transforms/PhaseOrdering/ARM/arm_var_q31.ll
+++ b/llvm/test/Transforms/PhaseOrdering/ARM/arm_var_q31.ll
@@ -74,7 +74,7 @@ define void @arm_var_q31(ptr noundef %pSrc, i32 noundef %blockSize, ptr noundef
 ; CHECK:       middle.block:
 ; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[SHR]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[CMP_N]], label [[WHILE_END_LOOPEXIT:%.*]], label [[WHILE_BODY_PREHEADER67]]
-; CHECK:       while.body.preheader67:
+; CHECK:       while.body.preheader63:
 ; CHECK-NEXT:    [[SUMOFSQUARES_043_PH:%.*]] = phi i64 [ 0, [[WHILE_BODY_PREHEADER]] ], [ [[TMP25]], [[MIDDLE_BLOCK]] ]
 ; CHECK-NEXT:    [[PSRC_ADDR_042_PH:%.*]] = phi ptr [ [[PSRC]], [[WHILE_BODY_PREHEADER]] ], [ [[TMP1]], [[MIDDLE_BLOCK]] ]
 ; CHECK-NEXT:    [[BLKCNT_041_PH:%.*]] = phi i32 [ [[SHR]], [[WHILE_BODY_PREHEADER]] ], [ [[TMP2]], [[MIDDLE_BLOCK]] ]


        


More information about the llvm-commits mailing list