[llvm] [VPlan] Explicitly unroll replicate-regions without live-outs by VF. (PR #170212)

Florian Hahn via llvm-commits llvm-commits at lists.llvm.org
Tue Mar 10 06:20:19 PDT 2026


https://github.com/fhahn updated https://github.com/llvm/llvm-project/pull/170212

>From a19bec2cccff9b95f4d080f9967456f7c6ebe222 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Mon, 1 Dec 2025 22:21:48 +0000
Subject: [PATCH 1/2] [VPlan] Explicitly unoll replicate-regions without
 live-outs by VF.

This patch adds a new replicateReplicateRegionsByVF transform to
unroll replicate=regions by VF, dissolving them. The transform creates
VF copies of the replicate-region's content, connects them and converts
recipes to single-scalar variants for the corresponding lanes.

The initial version skips regions with live-outs (VPPredInstPHIRecipe),
which will be added  in follow-up patches.

Depends on https://github.com/llvm/llvm-project/pull/170053
---
 .../Transforms/Vectorize/LoopVectorize.cpp    |   2 +
 .../Transforms/Vectorize/VPlanTransforms.cpp  |   2 +-
 .../Transforms/Vectorize/VPlanTransforms.h    |   6 +
 llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp | 211 +++++++++++++++---
 .../LoopVectorize/AArch64/blend-costs.ll      |   3 +-
 .../AArch64/conditional-branches-cost.ll      | 147 ++++--------
 .../AArch64/fold-tail-low-trip-count.ll       |   8 -
 .../AArch64/force-target-instruction-cost.ll  |   6 +-
 .../AArch64/induction-costs-sve.ll            |   3 +-
 .../AArch64/masked-call-scalarize.ll          |   6 +-
 .../LoopVectorize/AArch64/optsize_minsize.ll  |   3 +-
 .../LoopVectorize/AArch64/predicated-costs.ll |   3 +-
 .../transform-narrow-interleave-fold-tail.ll  |   3 +-
 ...-interleave-to-widen-memory-multi-block.ll |   8 +-
 ...eave-to-widen-memory-remove-loop-region.ll |   9 +-
 .../SystemZ/force-target-instruction-cost.ll  |   5 +-
 .../VPlan/interleave-and-scalarize-only.ll    |   3 +-
 .../VPlan/vplan-predicate-switch.ll           | 108 ++++-----
 .../VPlan/vplan-print-after-all.ll            |   2 +
 .../X86/consecutive-ptr-uniforms.ll           |   7 +-
 .../LoopVectorize/X86/constant-fold.ll        |   9 +-
 .../X86/cost-conditional-branches.ll          |  14 +-
 .../LoopVectorize/X86/gather_scatter.ll       |  10 -
 .../LoopVectorize/X86/induction-costs.ll      |   3 +-
 .../X86/pr109581-unused-blend.ll              |   6 +-
 .../X86/pr55096-scalarize-add.ll              |   3 +-
 .../LoopVectorize/X86/small-size.ll           |   2 +-
 .../x86-interleaved-accesses-masked-group.ll  | 120 ++++------
 ...86-interleaved-store-accesses-with-gaps.ll |  12 +-
 llvm/test/Transforms/LoopVectorize/as_cast.ll |   2 +-
 .../LoopVectorize/cast-induction.ll           |   7 +-
 .../LoopVectorize/consecutive-ptr-uniforms.ll |   8 +-
 .../constant-fold-commutative-and.ll          |   3 +-
 .../LoopVectorize/constantfolder.ll           |   5 +-
 .../Transforms/LoopVectorize/cse-casts.ll     |   3 +-
 .../test/Transforms/LoopVectorize/debugloc.ll |   6 +-
 .../dont-fold-tail-for-divisible-TC.ll        |   3 +-
 .../find-last-iv-sinkable-load.ll             |   4 +-
 .../LoopVectorize/first-order-recurrence.ll   |   6 +-
 .../LoopVectorize/float-induction.ll          |   9 +-
 ...nd-sink-mem-ops-with-invariant-pointers.ll |   3 +-
 ...predicated-loads-with-predicated-stores.ll |  14 --
 .../LoopVectorize/if-pred-stores.ll           |  16 +-
 .../interleaved-accesses-pred-stores.ll       |  11 +-
 .../LoopVectorize/load-deref-pred-align.ll    |  20 +-
 ...eref-pred-poison-ub-ops-feeding-pointer.ll |  17 +-
 .../Transforms/LoopVectorize/loop-form.ll     |   4 +-
 .../LoopVectorize/memdep-fold-tail.ll         |   9 +-
 .../LoopVectorize/pointer-induction.ll        |   7 +-
 llvm/test/Transforms/LoopVectorize/pr37248.ll |  13 +-
 .../pr45679-fold-tail-by-masking.ll           |  22 +-
 .../LoopVectorize/predicate-switch.ll         |   6 -
 .../LoopVectorize/select-cmp-multiuse.ll      |   6 +-
 .../LoopVectorize/struct-return-replicate.ll  |  20 +-
 .../Transforms/LoopVectorize/struct-return.ll |  10 +-
 .../tail-folding-alloca-in-loop.ll            |  17 +-
 .../LoopVectorize/tail-folding-div.ll         |   3 +-
 .../tail-folding-masked-mem-opts.ll           |  11 +-
 ...folding-optimize-vector-induction-width.ll |  27 +--
 .../LoopVectorize/tail-folding-switch.ll      |   3 +-
 .../tail-folding-vectorization-factor-1.ll    |   5 +-
 .../trip-count-expansion-may-introduce-ub.ll  |   8 +-
 .../use-scalar-epilogue-if-tp-fails.ll        |   8 -
 .../vector-loop-backedge-elimination.ll       |  15 +-
 64 files changed, 469 insertions(+), 586 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index bb4eef5a41c09..6059949e22ef2 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7422,6 +7422,8 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
   RUN_VPLAN_PASS(VPlanTransforms::materializePacksAndUnpacks, BestVPlan);
   RUN_VPLAN_PASS(VPlanTransforms::materializeBroadcasts, BestVPlan);
   RUN_VPLAN_PASS(VPlanTransforms::replicateByVF, BestVPlan, BestVF);
+  RUN_VPLAN_PASS(VPlanTransforms::unrollReplicateRegions, BestVPlan, BestVF);
+  RUN_VPLAN_PASS(VPlanTransforms::mergeBlocksIntoPredecessors, BestVPlan);
   bool HasBranchWeights =
       hasBranchWeightMD(*OrigLoop->getLoopLatch()->getTerminator());
   if (HasBranchWeights) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 33e5b25bd9322..46969db9fae8d 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -537,7 +537,7 @@ static void addReplicateRegions(VPlan &Plan) {
 
 /// Remove redundant VPBasicBlocks by merging them into their predecessor if
 /// the predecessor has a single successor.
-static bool mergeBlocksIntoPredecessors(VPlan &Plan) {
+bool VPlanTransforms::mergeBlocksIntoPredecessors(VPlan &Plan) {
   SmallVector<VPBasicBlock *> WorkList;
   for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
            vp_depth_first_deep(Plan.getEntry()))) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index 2956659e5df8b..da38d4ed1fab1 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -233,6 +233,10 @@ struct VPlanTransforms {
   /// replicate regions, thereby dissolving the latter.
   static void replicateByVF(VPlan &Plan, ElementCount VF);
 
+  /// Replace replicate regions by explicitly replicating the regions' contents
+  /// \p VF times, each copy processing a single lane.
+  static void unrollReplicateRegions(VPlan &Plan, ElementCount VF);
+
   /// Optimize \p Plan based on \p BestVF and \p BestUF. This may restrict the
   /// resulting plan to \p BestVF and \p BestUF.
   static void optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF,
@@ -244,6 +248,8 @@ struct VPlanTransforms {
   /// block merging.
   LLVM_ABI_FOR_TEST static void optimize(VPlan &Plan);
 
+  static bool mergeBlocksIntoPredecessors(VPlan &Plan);
+
   /// Wrap predicated VPReplicateRecipes with a mask operand in an if-then
   /// region block and remove the mask operand. Optimize the created regions by
   /// iteratively sinking scalar operands into the region, followed by merging
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
index 2d961808d3bcd..20e99b8b3377d 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
@@ -544,6 +544,38 @@ void VPlanTransforms::unrollByUF(VPlan &Plan, unsigned UF) {
   VPlanTransforms::removeDeadRecipes(Plan);
 }
 
+/// Add a start index to \p Steps for \p Lane by combining any existing start
+/// index with the lane offset. \p Builder is used to insert new recipes before
+/// \p Steps.
+static void addStartIndexForLane(VPScalarIVStepsRecipe *Steps, unsigned Lane,
+                                 VPlan &Plan, VPBuilder &Builder) {
+  VPTypeAnalysis TypeInfo(Plan);
+  Type *BaseIVTy = TypeInfo.inferScalarType(Steps->getOperand(0));
+
+  VPValue *StartIndex = Steps->getStartIndex();
+  VPValue *LaneOffset;
+  unsigned AddOp;
+  VPIRFlags Flags;
+  if (BaseIVTy->isFloatingPointTy()) {
+    int SignedLane = static_cast<int>(Lane);
+    if (!StartIndex && Steps->getInductionOpcode() == Instruction::FSub)
+      SignedLane = -SignedLane;
+    LaneOffset = Plan.getOrAddLiveIn(ConstantFP::get(BaseIVTy, SignedLane));
+    AddOp = Steps->getInductionOpcode();
+    Flags = VPIRFlags(FastMathFlags());
+  } else {
+    unsigned BaseIVBits = BaseIVTy->getScalarSizeInBits();
+    LaneOffset = Plan.getConstantInt(
+        APInt(BaseIVBits, Lane, /*isSigned*/ false, /*implicitTrunc*/ true));
+    AddOp = Instruction::Add;
+    Flags = VPIRFlags(VPIRFlags::WrapFlagsTy(false, false));
+  }
+
+  if (StartIndex)
+    LaneOffset = Builder.createNaryOp(AddOp, {StartIndex, LaneOffset}, Flags);
+  Steps->setStartIndex(LaneOffset);
+}
+
 /// Create a single-scalar clone of \p DefR (must be a VPReplicateRecipe,
 /// VPInstruction or VPScalarIVStepsRecipe) for lane \p Lane. Use \p
 /// Def2LaneDefs to look up scalar definitions for operands of \DefR.
@@ -618,36 +650,8 @@ cloneForLane(VPlan &Plan, VPBuilder &Builder, Type *IdxTy,
       // Skip lane 0: an absent start index is implicitly zero.
       unsigned KnownLane = Lane.getKnownLane();
       if (KnownLane != 0) {
-        VPTypeAnalysis TypeInfo(Plan);
-        Type *BaseIVTy = TypeInfo.inferScalarType(DefR->getOperand(0));
-
-        VPValue *StartIndex = Steps->getStartIndex();
-        VPValue *LaneOffset;
-        unsigned AddOp;
-        VPIRFlags Flags;
-        if (BaseIVTy->isFloatingPointTy()) {
-          int SignedLane = static_cast<int>(KnownLane);
-          if (!StartIndex && Steps->getInductionOpcode() == Instruction::FSub)
-            SignedLane = -SignedLane;
-          LaneOffset =
-              Plan.getOrAddLiveIn(ConstantFP::get(BaseIVTy, SignedLane));
-          AddOp = Steps->getInductionOpcode();
-          Flags = VPIRFlags(FastMathFlags());
-        } else {
-          unsigned BaseIVBits = BaseIVTy->getScalarSizeInBits();
-          LaneOffset = Plan.getConstantInt(APInt(BaseIVBits, KnownLane,
-                                                 /*isSigned*/ false,
-                                                 /*implicitTrunc*/ true));
-          AddOp = Instruction::Add;
-          Flags = VPIRFlags(VPIRFlags::WrapFlagsTy(false, false));
-        }
-
-        if (StartIndex) {
-          VPBuilder LaneBuilder(DefR);
-          LaneOffset =
-              LaneBuilder.createNaryOp(AddOp, {StartIndex, LaneOffset}, Flags);
-        }
-        Steps->setStartIndex(LaneOffset);
+        VPBuilder LaneBuilder(DefR);
+        addStartIndexForLane(Steps, KnownLane, Plan, LaneBuilder);
       }
     }
   }
@@ -729,3 +733,150 @@ void VPlanTransforms::replicateByVF(VPlan &Plan, ElementCount VF) {
   for (auto *R : reverse(ToRemove))
     R->eraseFromParent();
 }
+
+/// Convert recipes in region blocks to operate on a single lane 0. Lane 0
+/// uses the original blocks, and the recipes are adjusted:
+/// VPReplicateRecipes are converted to single-scalar ones, branch-on-mask is
+/// converted into BranchOnCond and extracts are created as needed.
+static void convertRecipesInRegionBlocksToSingleScalar(
+    VPlan &Plan, Type *IdxTy, ElementCount VF,
+    ArrayRef<VPBlockBase *> RegionBlocks) {
+  for (VPBlockBase *VPB : RegionBlocks) {
+    for (VPRecipeBase &NewR : make_early_inc_range(*cast<VPBasicBlock>(VPB))) {
+      VPBuilder Builder(&NewR);
+      for (const auto &[I, Op] : enumerate(NewR.operands())) {
+        // Skip operands that don't need extraction: scalar VF (no vectors),
+        // values defined in the same block (already scalar), or values that
+        // are already single scalars.
+        auto *DefR = Op->getDefiningRecipe();
+        if (VF.isScalar() || (DefR && DefR->getParent() == VPB) ||
+            vputils::isSingleScalar(Op))
+          continue;
+
+        // Extract the lane from values defined outside the region.
+        VPValue *Idx = Plan.getConstantInt(IdxTy, 0);
+        VPValue *Extract = Builder.createNaryOp(Instruction::ExtractElement,
+                                                {Op, Idx}, NewR.getDebugLoc());
+        NewR.setOperand(I, Extract);
+      }
+
+      if (auto *RepR = dyn_cast<VPReplicateRecipe>(&NewR)) {
+        auto *New =
+            new VPReplicateRecipe(RepR->getUnderlyingInstr(), RepR->operands(),
+                                  /* IsSingleScalar=*/true, /*Mask=*/nullptr,
+                                  *RepR, *RepR, RepR->getDebugLoc());
+        New->insertBefore(RepR);
+        RepR->replaceAllUsesWith(New);
+        RepR->eraseFromParent();
+      } else if (auto *BranchOnMask = dyn_cast<VPBranchOnMaskRecipe>(&NewR)) {
+        Builder.createNaryOp(VPInstruction::BranchOnCond,
+                             {BranchOnMask->getOperand(0)},
+                             BranchOnMask->getDebugLoc());
+        BranchOnMask->eraseFromParent();
+      }
+    }
+  }
+}
+
+/// Process recipes in a single lane's blocks, updating them for lane-specific
+/// operations.
+static void processLaneForReplicateRegion(
+    VPlan &Plan, Type *IdxTy, unsigned Lane,
+    ArrayRef<VPBlockBase *> RegionBlocks,
+    DenseMap<VPBlockBase *, VPBlockBase *> &Old2NewBlocks) {
+  DenseMap<VPValue *, VPValue *> Old2NewVPValues;
+  for (VPBlockBase *OldVPB : RegionBlocks) {
+    auto *OldBB = cast<VPBasicBlock>(OldVPB);
+    auto *NewBB = cast<VPBasicBlock>(Old2NewBlocks.lookup(OldVPB));
+    for (const auto &[OldR, NewR] : zip(*OldBB, *NewBB)) {
+      for (const auto &[OldV, NewV] :
+           zip(OldR.definedValues(), NewR.definedValues()))
+        Old2NewVPValues[OldV] = NewV;
+    }
+
+    // Update lane operands and remap operands to use copies for current lane.
+    for (VPRecipeBase &NewR : make_early_inc_range(*NewBB)) {
+      if (auto *Steps = dyn_cast<VPScalarIVStepsRecipe>(&NewR)) {
+        VPBuilder Builder(Steps);
+        addStartIndexForLane(Steps, Lane, Plan, Builder);
+      } else if (match(&NewR, m_ExtractElement(m_VPValue(), m_ZeroInt()))) {
+        NewR.setOperand(1, Plan.getConstantInt(IdxTy, Lane));
+      }
+
+      // Remap operands to use lane-specific values.
+      for (const auto &[I, Op] : enumerate(NewR.operands())) {
+        // Use cloned value if operand was defined in the region.
+        if (auto *New = Old2NewVPValues.lookup(Op))
+          NewR.setOperand(I, New);
+      }
+    }
+  }
+}
+
+void VPlanTransforms::unrollReplicateRegions(VPlan &Plan, ElementCount VF) {
+  // Collect all replicate regions in the plan before modifying the CFG.
+  SmallVector<VPRegionBlock *> ReplicateRegions;
+  for (VPRegionBlock *Region : VPBlockUtils::blocksOnly<VPRegionBlock>(
+           vp_depth_first_shallow(Plan.getVectorLoopRegion()->getEntry()))) {
+    if (Region->isReplicator())
+      ReplicateRegions.push_back(Region);
+  }
+
+  Type *IdxTy = IntegerType::get(Plan.getContext(), 32);
+  for (VPRegionBlock *Region : ReplicateRegions) {
+    assert(!VF.isScalable() && "cannot replicate across scalable VFs");
+
+    // Skip regions with live-outs as packing scalar results back into vectors
+    // is not yet implemented.
+    VPBlockBase *Exiting = Region->getExiting();
+    if (any_of(*cast<VPBasicBlock>(Exiting), IsaPred<VPPredInstPHIRecipe>))
+      continue;
+
+    // Disconnect and dissolve the region.
+    VPBlockBase *Pred = Region->getSinglePredecessor();
+    assert(Pred && "Replicate region must have a single predecessor");
+    SmallVector<VPBlockBase *> Successors(Region->successors());
+    VPBlockUtils::disconnectBlocks(Pred, Region);
+    for (VPBlockBase *Succ : Successors)
+      VPBlockUtils::disconnectBlocks(Region, Succ);
+
+    VPBlockBase *Entry = Region->getEntry();
+    SmallVector<VPBlockBase *> RegionBlocks(vp_depth_first_shallow(Entry));
+    VPRegionBlock *ParentRegion = Region->getParent();
+    for (VPBlockBase *Block : RegionBlocks)
+      Block->setParent(ParentRegion);
+    VPBlockUtils::connectBlocks(Pred, Entry);
+
+    // Process lane 0: convert original blocks to single-scalar.
+    convertRecipesInRegionBlocksToSingleScalar(Plan, IdxTy, VF, RegionBlocks);
+    SmallVector<std::pair<VPBlockBase *, VPBlockBase *>> LaneClones;
+    LaneClones.push_back({Entry, Exiting});
+
+    // Clone blocks for remaining lanes.
+    for (unsigned Lane = 1; Lane < VF.getKnownMinValue(); ++Lane) {
+      DenseMap<VPBlockBase *, VPBlockBase *> Old2NewBlocks;
+      for (VPBlockBase *OrigBlock : RegionBlocks) {
+        VPBlockBase *ClonedBlock = OrigBlock->clone();
+        Old2NewBlocks[OrigBlock] = ClonedBlock;
+        ClonedBlock->setParent(ParentRegion);
+      }
+      for (VPBlockBase *OrigBlock : RegionBlocks) {
+        if (OrigBlock == Exiting)
+          continue;
+        for (VPBlockBase *OrigSucc : OrigBlock->successors())
+          VPBlockUtils::connectBlocks(Old2NewBlocks[OrigBlock],
+                                      Old2NewBlocks[OrigSucc]);
+      }
+      processLaneForReplicateRegion(Plan, IdxTy, Lane, RegionBlocks,
+                                    Old2NewBlocks);
+      LaneClones.push_back({Old2NewBlocks[Entry], Old2NewBlocks[Exiting]});
+    }
+
+    // Connect lanes sequentially and connect last lane to successors.
+    for (unsigned Lane = 1; Lane < VF.getKnownMinValue(); ++Lane)
+      VPBlockUtils::connectBlocks(LaneClones[Lane - 1].second,
+                                  LaneClones[Lane].first);
+    for (VPBlockBase *Succ : Successors)
+      VPBlockUtils::connectBlocks(LaneClones.back().second, Succ);
+  }
+}
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/blend-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/blend-costs.ll
index 898685e7ddbd1..2364dbe36cdc1 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/blend-costs.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/blend-costs.ll
@@ -225,8 +225,7 @@ define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i
 ; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <16 x i1> [[TMP7]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP8]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
 ; CHECK:       [[PRED_STORE_IF]]:
-; CHECK-NEXT:    [[TMP72:%.*]] = add i32 [[IV]], 0
-; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP72]]
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[IV]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 0
 ; CHECK-NEXT:    store i8 [[TMP10]], ptr [[TMP9]], align 1
 ; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE]]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll
index e5efddaac9d1a..cec944ee831da 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll
@@ -296,8 +296,7 @@ define void @latch_branch_cost(ptr %dst) {
 ; PRED-NEXT:    [[TMP1:%.*]] = extractelement <8 x i1> [[TMP0]], i32 0
 ; PRED-NEXT:    br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
 ; PRED:       [[PRED_STORE_IF]]:
-; PRED-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 0
-; PRED-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP2]]
+; PRED-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]]
 ; PRED-NEXT:    store i8 0, ptr [[TMP3]], align 1
 ; PRED-NEXT:    br label %[[PRED_STORE_CONTINUE]]
 ; PRED:       [[PRED_STORE_CONTINUE]]:
@@ -753,14 +752,11 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
 ; DEFAULT:       [[PRED_STORE_IF]]:
 ; DEFAULT-NEXT:    [[TMP25:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 0
 ; DEFAULT-NEXT:    store float 0.000000e+00, ptr [[TMP25]], align 4
-; DEFAULT-NEXT:    [[TMP26:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 0
-; DEFAULT-NEXT:    [[TMP27:%.*]] = getelementptr i8, ptr [[TMP26]], i64 4
+; DEFAULT-NEXT:    [[TMP27:%.*]] = getelementptr i8, ptr [[TMP25]], i64 4
 ; DEFAULT-NEXT:    store float 0.000000e+00, ptr [[TMP27]], align 4
-; DEFAULT-NEXT:    [[TMP28:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 0
-; DEFAULT-NEXT:    [[TMP29:%.*]] = getelementptr i8, ptr [[TMP28]], i64 8
+; DEFAULT-NEXT:    [[TMP29:%.*]] = getelementptr i8, ptr [[TMP25]], i64 8
 ; DEFAULT-NEXT:    store float 0.000000e+00, ptr [[TMP29]], align 4
-; DEFAULT-NEXT:    [[TMP30:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 0
-; DEFAULT-NEXT:    store float 0.000000e+00, ptr [[TMP30]], align 4
+; DEFAULT-NEXT:    store float 0.000000e+00, ptr [[TMP25]], align 4
 ; DEFAULT-NEXT:    br label %[[PRED_STORE_CONTINUE]]
 ; DEFAULT:       [[PRED_STORE_CONTINUE]]:
 ; DEFAULT-NEXT:    [[TMP31:%.*]] = extractelement <8 x i1> [[TMP22]], i32 1
@@ -768,14 +764,11 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
 ; DEFAULT:       [[PRED_STORE_IF14]]:
 ; DEFAULT-NEXT:    [[TMP32:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 1
 ; DEFAULT-NEXT:    store float 0.000000e+00, ptr [[TMP32]], align 4
-; DEFAULT-NEXT:    [[TMP33:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 1
-; DEFAULT-NEXT:    [[TMP34:%.*]] = getelementptr i8, ptr [[TMP33]], i64 4
+; DEFAULT-NEXT:    [[TMP34:%.*]] = getelementptr i8, ptr [[TMP32]], i64 4
 ; DEFAULT-NEXT:    store float 0.000000e+00, ptr [[TMP34]], align 4
-; DEFAULT-NEXT:    [[TMP35:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 1
-; DEFAULT-NEXT:    [[TMP36:%.*]] = getelementptr i8, ptr [[TMP35]], i64 8
+; DEFAULT-NEXT:    [[TMP36:%.*]] = getelementptr i8, ptr [[TMP32]], i64 8
 ; DEFAULT-NEXT:    store float 0.000000e+00, ptr [[TMP36]], align 4
-; DEFAULT-NEXT:    [[TMP37:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 1
-; DEFAULT-NEXT:    store float 0.000000e+00, ptr [[TMP37]], align 4
+; DEFAULT-NEXT:    store float 0.000000e+00, ptr [[TMP32]], align 4
 ; DEFAULT-NEXT:    br label %[[PRED_STORE_CONTINUE15]]
 ; DEFAULT:       [[PRED_STORE_CONTINUE15]]:
 ; DEFAULT-NEXT:    [[TMP38:%.*]] = extractelement <8 x i1> [[TMP22]], i32 2
@@ -783,14 +776,11 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
 ; DEFAULT:       [[PRED_STORE_IF16]]:
 ; DEFAULT-NEXT:    [[TMP39:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 2
 ; DEFAULT-NEXT:    store float 0.000000e+00, ptr [[TMP39]], align 4
-; DEFAULT-NEXT:    [[TMP40:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 2
-; DEFAULT-NEXT:    [[TMP41:%.*]] = getelementptr i8, ptr [[TMP40]], i64 4
+; DEFAULT-NEXT:    [[TMP41:%.*]] = getelementptr i8, ptr [[TMP39]], i64 4
 ; DEFAULT-NEXT:    store float 0.000000e+00, ptr [[TMP41]], align 4
-; DEFAULT-NEXT:    [[TMP42:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 2
-; DEFAULT-NEXT:    [[TMP43:%.*]] = getelementptr i8, ptr [[TMP42]], i64 8
+; DEFAULT-NEXT:    [[TMP43:%.*]] = getelementptr i8, ptr [[TMP39]], i64 8
 ; DEFAULT-NEXT:    store float 0.000000e+00, ptr [[TMP43]], align 4
-; DEFAULT-NEXT:    [[TMP44:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 2
-; DEFAULT-NEXT:    store float 0.000000e+00, ptr [[TMP44]], align 4
+; DEFAULT-NEXT:    store float 0.000000e+00, ptr [[TMP39]], align 4
 ; DEFAULT-NEXT:    br label %[[PRED_STORE_CONTINUE17]]
 ; DEFAULT:       [[PRED_STORE_CONTINUE17]]:
 ; DEFAULT-NEXT:    [[TMP45:%.*]] = extractelement <8 x i1> [[TMP22]], i32 3
@@ -798,14 +788,11 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
 ; DEFAULT:       [[PRED_STORE_IF18]]:
 ; DEFAULT-NEXT:    [[TMP46:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 3
 ; DEFAULT-NEXT:    store float 0.000000e+00, ptr [[TMP46]], align 4
-; DEFAULT-NEXT:    [[TMP47:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 3
-; DEFAULT-NEXT:    [[TMP48:%.*]] = getelementptr i8, ptr [[TMP47]], i64 4
+; DEFAULT-NEXT:    [[TMP48:%.*]] = getelementptr i8, ptr [[TMP46]], i64 4
 ; DEFAULT-NEXT:    store float 0.000000e+00, ptr [[TMP48]], align 4
-; DEFAULT-NEXT:    [[TMP49:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 3
-; DEFAULT-NEXT:    [[TMP50:%.*]] = getelementptr i8, ptr [[TMP49]], i64 8
+; DEFAULT-NEXT:    [[TMP50:%.*]] = getelementptr i8, ptr [[TMP46]], i64 8
 ; DEFAULT-NEXT:    store float 0.000000e+00, ptr [[TMP50]], align 4
-; DEFAULT-NEXT:    [[TMP51:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 3
-; DEFAULT-NEXT:    store float 0.000000e+00, ptr [[TMP51]], align 4
+; DEFAULT-NEXT:    store float 0.000000e+00, ptr [[TMP46]], align 4
 ; DEFAULT-NEXT:    br label %[[PRED_STORE_CONTINUE19]]
 ; DEFAULT:       [[PRED_STORE_CONTINUE19]]:
 ; DEFAULT-NEXT:    [[TMP52:%.*]] = extractelement <8 x i1> [[TMP22]], i32 4
@@ -813,14 +800,11 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
 ; DEFAULT:       [[PRED_STORE_IF20]]:
 ; DEFAULT-NEXT:    [[TMP53:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 4
 ; DEFAULT-NEXT:    store float 0.000000e+00, ptr [[TMP53]], align 4
-; DEFAULT-NEXT:    [[TMP54:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 4
-; DEFAULT-NEXT:    [[TMP55:%.*]] = getelementptr i8, ptr [[TMP54]], i64 4
+; DEFAULT-NEXT:    [[TMP55:%.*]] = getelementptr i8, ptr [[TMP53]], i64 4
 ; DEFAULT-NEXT:    store float 0.000000e+00, ptr [[TMP55]], align 4
-; DEFAULT-NEXT:    [[TMP56:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 4
-; DEFAULT-NEXT:    [[TMP57:%.*]] = getelementptr i8, ptr [[TMP56]], i64 8
+; DEFAULT-NEXT:    [[TMP57:%.*]] = getelementptr i8, ptr [[TMP53]], i64 8
 ; DEFAULT-NEXT:    store float 0.000000e+00, ptr [[TMP57]], align 4
-; DEFAULT-NEXT:    [[TMP58:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 4
-; DEFAULT-NEXT:    store float 0.000000e+00, ptr [[TMP58]], align 4
+; DEFAULT-NEXT:    store float 0.000000e+00, ptr [[TMP53]], align 4
 ; DEFAULT-NEXT:    br label %[[PRED_STORE_CONTINUE21]]
 ; DEFAULT:       [[PRED_STORE_CONTINUE21]]:
 ; DEFAULT-NEXT:    [[TMP59:%.*]] = extractelement <8 x i1> [[TMP22]], i32 5
@@ -828,14 +812,11 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
 ; DEFAULT:       [[PRED_STORE_IF22]]:
 ; DEFAULT-NEXT:    [[TMP60:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 5
 ; DEFAULT-NEXT:    store float 0.000000e+00, ptr [[TMP60]], align 4
-; DEFAULT-NEXT:    [[TMP61:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 5
-; DEFAULT-NEXT:    [[TMP62:%.*]] = getelementptr i8, ptr [[TMP61]], i64 4
+; DEFAULT-NEXT:    [[TMP62:%.*]] = getelementptr i8, ptr [[TMP60]], i64 4
 ; DEFAULT-NEXT:    store float 0.000000e+00, ptr [[TMP62]], align 4
-; DEFAULT-NEXT:    [[TMP63:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 5
-; DEFAULT-NEXT:    [[TMP64:%.*]] = getelementptr i8, ptr [[TMP63]], i64 8
+; DEFAULT-NEXT:    [[TMP64:%.*]] = getelementptr i8, ptr [[TMP60]], i64 8
 ; DEFAULT-NEXT:    store float 0.000000e+00, ptr [[TMP64]], align 4
-; DEFAULT-NEXT:    [[TMP65:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 5
-; DEFAULT-NEXT:    store float 0.000000e+00, ptr [[TMP65]], align 4
+; DEFAULT-NEXT:    store float 0.000000e+00, ptr [[TMP60]], align 4
 ; DEFAULT-NEXT:    br label %[[PRED_STORE_CONTINUE23]]
 ; DEFAULT:       [[PRED_STORE_CONTINUE23]]:
 ; DEFAULT-NEXT:    [[TMP66:%.*]] = extractelement <8 x i1> [[TMP22]], i32 6
@@ -843,14 +824,11 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
 ; DEFAULT:       [[PRED_STORE_IF24]]:
 ; DEFAULT-NEXT:    [[TMP67:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 6
 ; DEFAULT-NEXT:    store float 0.000000e+00, ptr [[TMP67]], align 4
-; DEFAULT-NEXT:    [[TMP68:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 6
-; DEFAULT-NEXT:    [[TMP69:%.*]] = getelementptr i8, ptr [[TMP68]], i64 4
+; DEFAULT-NEXT:    [[TMP69:%.*]] = getelementptr i8, ptr [[TMP67]], i64 4
 ; DEFAULT-NEXT:    store float 0.000000e+00, ptr [[TMP69]], align 4
-; DEFAULT-NEXT:    [[TMP70:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 6
-; DEFAULT-NEXT:    [[TMP71:%.*]] = getelementptr i8, ptr [[TMP70]], i64 8
+; DEFAULT-NEXT:    [[TMP71:%.*]] = getelementptr i8, ptr [[TMP67]], i64 8
 ; DEFAULT-NEXT:    store float 0.000000e+00, ptr [[TMP71]], align 4
-; DEFAULT-NEXT:    [[TMP72:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 6
-; DEFAULT-NEXT:    store float 0.000000e+00, ptr [[TMP72]], align 4
+; DEFAULT-NEXT:    store float 0.000000e+00, ptr [[TMP67]], align 4
 ; DEFAULT-NEXT:    br label %[[PRED_STORE_CONTINUE25]]
 ; DEFAULT:       [[PRED_STORE_CONTINUE25]]:
 ; DEFAULT-NEXT:    [[TMP73:%.*]] = extractelement <8 x i1> [[TMP22]], i32 7
@@ -858,14 +836,11 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
 ; DEFAULT:       [[PRED_STORE_IF26]]:
 ; DEFAULT-NEXT:    [[TMP74:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 7
 ; DEFAULT-NEXT:    store float 0.000000e+00, ptr [[TMP74]], align 4
-; DEFAULT-NEXT:    [[TMP75:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 7
-; DEFAULT-NEXT:    [[TMP76:%.*]] = getelementptr i8, ptr [[TMP75]], i64 4
+; DEFAULT-NEXT:    [[TMP76:%.*]] = getelementptr i8, ptr [[TMP74]], i64 4
 ; DEFAULT-NEXT:    store float 0.000000e+00, ptr [[TMP76]], align 4
-; DEFAULT-NEXT:    [[TMP77:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 7
-; DEFAULT-NEXT:    [[TMP78:%.*]] = getelementptr i8, ptr [[TMP77]], i64 8
+; DEFAULT-NEXT:    [[TMP78:%.*]] = getelementptr i8, ptr [[TMP74]], i64 8
 ; DEFAULT-NEXT:    store float 0.000000e+00, ptr [[TMP78]], align 4
-; DEFAULT-NEXT:    [[TMP79:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 7
-; DEFAULT-NEXT:    store float 0.000000e+00, ptr [[TMP79]], align 4
+; DEFAULT-NEXT:    store float 0.000000e+00, ptr [[TMP74]], align 4
 ; DEFAULT-NEXT:    br label %[[PRED_STORE_CONTINUE27]]
 ; DEFAULT:       [[PRED_STORE_CONTINUE27]]:
 ; DEFAULT-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
@@ -936,14 +911,11 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
 ; PRED:       [[PRED_STORE_IF]]:
 ; PRED-NEXT:    [[TMP29:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 0
 ; PRED-NEXT:    store float 0.000000e+00, ptr [[TMP29]], align 4
-; PRED-NEXT:    [[TMP30:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 0
-; PRED-NEXT:    [[TMP31:%.*]] = getelementptr i8, ptr [[TMP30]], i64 4
+; PRED-NEXT:    [[TMP31:%.*]] = getelementptr i8, ptr [[TMP29]], i64 4
 ; PRED-NEXT:    store float 0.000000e+00, ptr [[TMP31]], align 4
-; PRED-NEXT:    [[TMP32:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 0
-; PRED-NEXT:    [[TMP33:%.*]] = getelementptr i8, ptr [[TMP32]], i64 8
+; PRED-NEXT:    [[TMP33:%.*]] = getelementptr i8, ptr [[TMP29]], i64 8
 ; PRED-NEXT:    store float 0.000000e+00, ptr [[TMP33]], align 4
-; PRED-NEXT:    [[TMP34:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 0
-; PRED-NEXT:    store float 0.000000e+00, ptr [[TMP34]], align 4
+; PRED-NEXT:    store float 0.000000e+00, ptr [[TMP29]], align 4
 ; PRED-NEXT:    br label %[[PRED_STORE_CONTINUE]]
 ; PRED:       [[PRED_STORE_CONTINUE]]:
 ; PRED-NEXT:    [[TMP35:%.*]] = extractelement <8 x i1> [[TMP26]], i32 1
@@ -951,14 +923,11 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
 ; PRED:       [[PRED_STORE_IF14]]:
 ; PRED-NEXT:    [[TMP36:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 1
 ; PRED-NEXT:    store float 0.000000e+00, ptr [[TMP36]], align 4
-; PRED-NEXT:    [[TMP37:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 1
-; PRED-NEXT:    [[TMP38:%.*]] = getelementptr i8, ptr [[TMP37]], i64 4
+; PRED-NEXT:    [[TMP38:%.*]] = getelementptr i8, ptr [[TMP36]], i64 4
 ; PRED-NEXT:    store float 0.000000e+00, ptr [[TMP38]], align 4
-; PRED-NEXT:    [[TMP39:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 1
-; PRED-NEXT:    [[TMP40:%.*]] = getelementptr i8, ptr [[TMP39]], i64 8
+; PRED-NEXT:    [[TMP40:%.*]] = getelementptr i8, ptr [[TMP36]], i64 8
 ; PRED-NEXT:    store float 0.000000e+00, ptr [[TMP40]], align 4
-; PRED-NEXT:    [[TMP41:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 1
-; PRED-NEXT:    store float 0.000000e+00, ptr [[TMP41]], align 4
+; PRED-NEXT:    store float 0.000000e+00, ptr [[TMP36]], align 4
 ; PRED-NEXT:    br label %[[PRED_STORE_CONTINUE15]]
 ; PRED:       [[PRED_STORE_CONTINUE15]]:
 ; PRED-NEXT:    [[TMP42:%.*]] = extractelement <8 x i1> [[TMP26]], i32 2
@@ -966,14 +935,11 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
 ; PRED:       [[PRED_STORE_IF16]]:
 ; PRED-NEXT:    [[TMP43:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 2
 ; PRED-NEXT:    store float 0.000000e+00, ptr [[TMP43]], align 4
-; PRED-NEXT:    [[TMP44:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 2
-; PRED-NEXT:    [[TMP45:%.*]] = getelementptr i8, ptr [[TMP44]], i64 4
+; PRED-NEXT:    [[TMP45:%.*]] = getelementptr i8, ptr [[TMP43]], i64 4
 ; PRED-NEXT:    store float 0.000000e+00, ptr [[TMP45]], align 4
-; PRED-NEXT:    [[TMP46:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 2
-; PRED-NEXT:    [[TMP47:%.*]] = getelementptr i8, ptr [[TMP46]], i64 8
+; PRED-NEXT:    [[TMP47:%.*]] = getelementptr i8, ptr [[TMP43]], i64 8
 ; PRED-NEXT:    store float 0.000000e+00, ptr [[TMP47]], align 4
-; PRED-NEXT:    [[TMP48:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 2
-; PRED-NEXT:    store float 0.000000e+00, ptr [[TMP48]], align 4
+; PRED-NEXT:    store float 0.000000e+00, ptr [[TMP43]], align 4
 ; PRED-NEXT:    br label %[[PRED_STORE_CONTINUE17]]
 ; PRED:       [[PRED_STORE_CONTINUE17]]:
 ; PRED-NEXT:    [[TMP49:%.*]] = extractelement <8 x i1> [[TMP26]], i32 3
@@ -981,14 +947,11 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
 ; PRED:       [[PRED_STORE_IF18]]:
 ; PRED-NEXT:    [[TMP50:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 3
 ; PRED-NEXT:    store float 0.000000e+00, ptr [[TMP50]], align 4
-; PRED-NEXT:    [[TMP51:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 3
-; PRED-NEXT:    [[TMP52:%.*]] = getelementptr i8, ptr [[TMP51]], i64 4
+; PRED-NEXT:    [[TMP52:%.*]] = getelementptr i8, ptr [[TMP50]], i64 4
 ; PRED-NEXT:    store float 0.000000e+00, ptr [[TMP52]], align 4
-; PRED-NEXT:    [[TMP53:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 3
-; PRED-NEXT:    [[TMP54:%.*]] = getelementptr i8, ptr [[TMP53]], i64 8
+; PRED-NEXT:    [[TMP54:%.*]] = getelementptr i8, ptr [[TMP50]], i64 8
 ; PRED-NEXT:    store float 0.000000e+00, ptr [[TMP54]], align 4
-; PRED-NEXT:    [[TMP55:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 3
-; PRED-NEXT:    store float 0.000000e+00, ptr [[TMP55]], align 4
+; PRED-NEXT:    store float 0.000000e+00, ptr [[TMP50]], align 4
 ; PRED-NEXT:    br label %[[PRED_STORE_CONTINUE19]]
 ; PRED:       [[PRED_STORE_CONTINUE19]]:
 ; PRED-NEXT:    [[TMP56:%.*]] = extractelement <8 x i1> [[TMP26]], i32 4
@@ -996,14 +959,11 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
 ; PRED:       [[PRED_STORE_IF20]]:
 ; PRED-NEXT:    [[TMP57:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 4
 ; PRED-NEXT:    store float 0.000000e+00, ptr [[TMP57]], align 4
-; PRED-NEXT:    [[TMP58:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 4
-; PRED-NEXT:    [[TMP59:%.*]] = getelementptr i8, ptr [[TMP58]], i64 4
+; PRED-NEXT:    [[TMP59:%.*]] = getelementptr i8, ptr [[TMP57]], i64 4
 ; PRED-NEXT:    store float 0.000000e+00, ptr [[TMP59]], align 4
-; PRED-NEXT:    [[TMP60:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 4
-; PRED-NEXT:    [[TMP61:%.*]] = getelementptr i8, ptr [[TMP60]], i64 8
+; PRED-NEXT:    [[TMP61:%.*]] = getelementptr i8, ptr [[TMP57]], i64 8
 ; PRED-NEXT:    store float 0.000000e+00, ptr [[TMP61]], align 4
-; PRED-NEXT:    [[TMP62:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 4
-; PRED-NEXT:    store float 0.000000e+00, ptr [[TMP62]], align 4
+; PRED-NEXT:    store float 0.000000e+00, ptr [[TMP57]], align 4
 ; PRED-NEXT:    br label %[[PRED_STORE_CONTINUE21]]
 ; PRED:       [[PRED_STORE_CONTINUE21]]:
 ; PRED-NEXT:    [[TMP63:%.*]] = extractelement <8 x i1> [[TMP26]], i32 5
@@ -1011,14 +971,11 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
 ; PRED:       [[PRED_STORE_IF22]]:
 ; PRED-NEXT:    [[TMP64:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 5
 ; PRED-NEXT:    store float 0.000000e+00, ptr [[TMP64]], align 4
-; PRED-NEXT:    [[TMP65:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 5
-; PRED-NEXT:    [[TMP66:%.*]] = getelementptr i8, ptr [[TMP65]], i64 4
+; PRED-NEXT:    [[TMP66:%.*]] = getelementptr i8, ptr [[TMP64]], i64 4
 ; PRED-NEXT:    store float 0.000000e+00, ptr [[TMP66]], align 4
-; PRED-NEXT:    [[TMP67:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 5
-; PRED-NEXT:    [[TMP68:%.*]] = getelementptr i8, ptr [[TMP67]], i64 8
+; PRED-NEXT:    [[TMP68:%.*]] = getelementptr i8, ptr [[TMP64]], i64 8
 ; PRED-NEXT:    store float 0.000000e+00, ptr [[TMP68]], align 4
-; PRED-NEXT:    [[TMP69:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 5
-; PRED-NEXT:    store float 0.000000e+00, ptr [[TMP69]], align 4
+; PRED-NEXT:    store float 0.000000e+00, ptr [[TMP64]], align 4
 ; PRED-NEXT:    br label %[[PRED_STORE_CONTINUE23]]
 ; PRED:       [[PRED_STORE_CONTINUE23]]:
 ; PRED-NEXT:    [[TMP70:%.*]] = extractelement <8 x i1> [[TMP26]], i32 6
@@ -1026,14 +983,11 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
 ; PRED:       [[PRED_STORE_IF24]]:
 ; PRED-NEXT:    [[TMP71:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 6
 ; PRED-NEXT:    store float 0.000000e+00, ptr [[TMP71]], align 4
-; PRED-NEXT:    [[TMP72:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 6
-; PRED-NEXT:    [[TMP73:%.*]] = getelementptr i8, ptr [[TMP72]], i64 4
+; PRED-NEXT:    [[TMP73:%.*]] = getelementptr i8, ptr [[TMP71]], i64 4
 ; PRED-NEXT:    store float 0.000000e+00, ptr [[TMP73]], align 4
-; PRED-NEXT:    [[TMP74:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 6
-; PRED-NEXT:    [[TMP75:%.*]] = getelementptr i8, ptr [[TMP74]], i64 8
+; PRED-NEXT:    [[TMP75:%.*]] = getelementptr i8, ptr [[TMP71]], i64 8
 ; PRED-NEXT:    store float 0.000000e+00, ptr [[TMP75]], align 4
-; PRED-NEXT:    [[TMP76:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 6
-; PRED-NEXT:    store float 0.000000e+00, ptr [[TMP76]], align 4
+; PRED-NEXT:    store float 0.000000e+00, ptr [[TMP71]], align 4
 ; PRED-NEXT:    br label %[[PRED_STORE_CONTINUE25]]
 ; PRED:       [[PRED_STORE_CONTINUE25]]:
 ; PRED-NEXT:    [[TMP77:%.*]] = extractelement <8 x i1> [[TMP26]], i32 7
@@ -1041,14 +995,11 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
 ; PRED:       [[PRED_STORE_IF26]]:
 ; PRED-NEXT:    [[TMP78:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 7
 ; PRED-NEXT:    store float 0.000000e+00, ptr [[TMP78]], align 4
-; PRED-NEXT:    [[TMP79:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 7
-; PRED-NEXT:    [[TMP80:%.*]] = getelementptr i8, ptr [[TMP79]], i64 4
+; PRED-NEXT:    [[TMP80:%.*]] = getelementptr i8, ptr [[TMP78]], i64 4
 ; PRED-NEXT:    store float 0.000000e+00, ptr [[TMP80]], align 4
-; PRED-NEXT:    [[TMP81:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 7
-; PRED-NEXT:    [[TMP82:%.*]] = getelementptr i8, ptr [[TMP81]], i64 8
+; PRED-NEXT:    [[TMP82:%.*]] = getelementptr i8, ptr [[TMP78]], i64 8
 ; PRED-NEXT:    store float 0.000000e+00, ptr [[TMP82]], align 4
-; PRED-NEXT:    [[TMP83:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 7
-; PRED-NEXT:    store float 0.000000e+00, ptr [[TMP83]], align 4
+; PRED-NEXT:    store float 0.000000e+00, ptr [[TMP78]], align 4
 ; PRED-NEXT:    br label %[[PRED_STORE_CONTINUE27]]
 ; PRED:       [[PRED_STORE_CONTINUE27]]:
 ; PRED-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 8
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/fold-tail-low-trip-count.ll b/llvm/test/Transforms/LoopVectorize/AArch64/fold-tail-low-trip-count.ll
index 77ea5f0e36c86..f741a10fedfa3 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/fold-tail-low-trip-count.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/fold-tail-low-trip-count.ll
@@ -19,10 +19,6 @@ define void @low_trip_count_small(i32 %x, ptr %dst) {
 ; CHECK-NEXT:    [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[DST]], i64 1
 ; CHECK-NEXT:    [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[DST]], i64 2
 ; CHECK-NEXT:    [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[DST]], i64 3
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x ptr> poison, ptr [[DST]], i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x ptr> [[TMP1]], ptr [[NEXT_GEP2]], i32 1
-; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x ptr> [[TMP2]], ptr [[NEXT_GEP3]], i32 2
-; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x ptr> [[TMP3]], ptr [[NEXT_GEP4]], i32 3
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp ule <4 x i64> <i64 0, i64 1, i64 2, i64 3>, [[BROADCAST_SPLAT]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x i1> [[TMP5]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP6]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
@@ -96,10 +92,6 @@ define ptr @low_trip_count_small_with_live_out(i32 %x, ptr %dst) {
 ; CHECK-NEXT:    [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[DST]], i64 1
 ; CHECK-NEXT:    [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[DST]], i64 2
 ; CHECK-NEXT:    [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[DST]], i64 3
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x ptr> poison, ptr [[DST]], i32 0
-; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x ptr> [[TMP2]], ptr [[NEXT_GEP2]], i32 1
-; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x ptr> [[TMP3]], ptr [[NEXT_GEP3]], i32 2
-; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <4 x ptr> [[TMP4]], ptr [[NEXT_GEP4]], i32 3
 ; CHECK-NEXT:    [[TMP6:%.*]] = icmp ule <4 x i64> <i64 0, i64 1, i64 2, i64 3>, [[BROADCAST_SPLAT]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <4 x i1> [[TMP6]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP7]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll
index 0a62ac9804524..6b2a213661212 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll
@@ -607,10 +607,8 @@ define void @forced_scalar_instr(ptr %gep.dst) {
 ; COMMON-NEXT:    [[TMP2:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0
 ; COMMON-NEXT:    br i1 [[TMP2]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
 ; COMMON:       [[PRED_STORE_IF]]:
-; COMMON-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 0
-; COMMON-NEXT:    [[TMP4:%.*]] = add i32 [[TMP0]], 0
-; COMMON-NEXT:    [[TMP5:%.*]] = getelementptr i32, ptr [[GEP_DST]], i64 [[TMP3]]
-; COMMON-NEXT:    [[TMP6:%.*]] = or i32 [[TMP4]], 1
+; COMMON-NEXT:    [[TMP5:%.*]] = getelementptr i32, ptr [[GEP_DST]], i64 [[INDEX]]
+; COMMON-NEXT:    [[TMP6:%.*]] = or i32 [[TMP0]], 1
 ; COMMON-NEXT:    store i32 [[TMP6]], ptr [[TMP5]], align 4
 ; COMMON-NEXT:    br label %[[PRED_STORE_CONTINUE]]
 ; COMMON:       [[PRED_STORE_CONTINUE]]:
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll
index e7ccde046d58e..42fb0970a1fb4 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll
@@ -602,8 +602,7 @@ define void @exit_cond_zext_iv(ptr %dst, i64 %N) {
 ; PRED-NEXT:    [[TMP7:%.*]] = extractelement <2 x i1> [[TMP6]], i32 0
 ; PRED-NEXT:    br i1 [[TMP7]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
 ; PRED:       [[PRED_STORE_IF]]:
-; PRED-NEXT:    [[TMP8:%.*]] = add i64 [[INDEX]], 0
-; PRED-NEXT:    [[TMP9:%.*]] = getelementptr { [100 x i32], i32, i32 }, ptr [[DST]], i64 [[TMP8]], i32 2
+; PRED-NEXT:    [[TMP9:%.*]] = getelementptr { [100 x i32], i32, i32 }, ptr [[DST]], i64 [[INDEX]], i32 2
 ; PRED-NEXT:    store i32 0, ptr [[TMP9]], align 8
 ; PRED-NEXT:    br label %[[PRED_STORE_CONTINUE]]
 ; PRED:       [[PRED_STORE_CONTINUE]]:
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call-scalarize.ll b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call-scalarize.ll
index 6dcb3c7dd7214..9462a791dc9de 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call-scalarize.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call-scalarize.ll
@@ -77,7 +77,7 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 {
 ; TFCOMMON-NEXT:    [[TMP14:%.*]] = extractelement <2 x i1> [[ACTIVE_LANE_MASK]], i32 1
 ; TFCOMMON-NEXT:    br i1 [[TMP14]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE6]]
 ; TFCOMMON:       pred.store.if1:
-; TFCOMMON-NEXT:    [[TMP19:%.*]] = extractelement <2 x double> [[PREDPHI]], i32 0
+; TFCOMMON-NEXT:    [[TMP19:%.*]] = extractelement <2 x double> [[PREDPHI]], i32 1
 ; TFCOMMON-NEXT:    store double [[TMP19]], ptr [[P]], align 8
 ; TFCOMMON-NEXT:    br label [[PRED_STORE_CONTINUE6]]
 ; TFCOMMON:       pred.store.continue2:
@@ -116,7 +116,7 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 {
 ; TFA_INTERLEAVE-NEXT:    [[TMP29:%.*]] = extractelement <2 x i1> [[ACTIVE_LANE_MASK]], i32 1
 ; TFA_INTERLEAVE-NEXT:    br i1 [[TMP29]], label [[PRED_STORE_IF4:%.*]], label [[PRED_STORE_CONTINUE5:%.*]]
 ; TFA_INTERLEAVE:       pred.store.if3:
-; TFA_INTERLEAVE-NEXT:    [[TMP22:%.*]] = extractelement <2 x double> [[PREDPHI3]], i32 0
+; TFA_INTERLEAVE-NEXT:    [[TMP22:%.*]] = extractelement <2 x double> [[PREDPHI3]], i32 1
 ; TFA_INTERLEAVE-NEXT:    store double [[TMP22]], ptr [[P]], align 8
 ; TFA_INTERLEAVE-NEXT:    br label [[PRED_STORE_CONTINUE5]]
 ; TFA_INTERLEAVE:       pred.store.continue4:
@@ -130,7 +130,7 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 {
 ; TFA_INTERLEAVE-NEXT:    [[TMP25:%.*]] = extractelement <2 x i1> [[ACTIVE_LANE_MASK2]], i32 1
 ; TFA_INTERLEAVE-NEXT:    br i1 [[TMP25]], label [[PRED_STORE_IF8:%.*]], label [[PRED_STORE_CONTINUE9]]
 ; TFA_INTERLEAVE:       pred.store.if7:
-; TFA_INTERLEAVE-NEXT:    [[TMP34:%.*]] = extractelement <2 x double> [[PREDPHI3]], i32 0
+; TFA_INTERLEAVE-NEXT:    [[TMP34:%.*]] = extractelement <2 x double> [[PREDPHI3]], i32 1
 ; TFA_INTERLEAVE-NEXT:    store double [[TMP34]], ptr [[P]], align 8
 ; TFA_INTERLEAVE-NEXT:    br label [[PRED_STORE_CONTINUE9]]
 ; TFA_INTERLEAVE:       pred.store.continue8:
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll b/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll
index 5b6979d6b1198..ae3c964fa148c 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll
@@ -210,8 +210,7 @@ define void @tail_predicate_without_optsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 %n)
 ; DEFAULT-NEXT:    [[TMP8:%.*]] = extractelement <16 x i1> [[TMP0]], i32 0
 ; DEFAULT-NEXT:    br i1 [[TMP8]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
 ; DEFAULT:       [[PRED_STORE_IF]]:
-; DEFAULT-NEXT:    [[TMP9:%.*]] = add i64 [[INDEX]], 0
-; DEFAULT-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP9]]
+; DEFAULT-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[INDEX]]
 ; DEFAULT-NEXT:    [[TMP11:%.*]] = extractelement <16 x i8> [[TMP7]], i32 0
 ; DEFAULT-NEXT:    store i8 [[TMP11]], ptr [[TMP10]], align 1
 ; DEFAULT-NEXT:    br label %[[PRED_STORE_CONTINUE]]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/predicated-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/predicated-costs.ll
index a5ed5361b507a..ceb65b4b0a965 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/predicated-costs.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/predicated-costs.ll
@@ -288,8 +288,7 @@ define void @srem_sdiv_without_tail_folding(i32 %d.0, i32 %d.1, ptr %dst, i32 %e
 ; CHECK:       [[PRED_STORE_IF]]:
 ; CHECK-NEXT:    [[TMP27:%.*]] = extractelement <4 x i64> [[TMP25]], i32 0
 ; CHECK-NEXT:    [[TMP28:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP27]]
-; CHECK-NEXT:    [[TMP29:%.*]] = add i32 [[INDEX]], 0
-; CHECK-NEXT:    store i32 [[TMP29]], ptr [[TMP28]], align 4
+; CHECK-NEXT:    store i32 [[INDEX]], ptr [[TMP28]], align 4
 ; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE]]
 ; CHECK:       [[PRED_STORE_CONTINUE]]:
 ; CHECK-NEXT:    [[TMP30:%.*]] = extractelement <4 x i1> [[TMP3]], i32 1
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-fold-tail.ll b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-fold-tail.ll
index 42103f72c903d..240ac2f1ceb2f 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-fold-tail.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-fold-tail.ll
@@ -25,8 +25,7 @@ define void @load_store_interleave_group(ptr noalias %data, i64 %n) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
 ; CHECK:       [[PRED_STORE_IF]]:
-; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT:    [[TMP3:%.*]] = shl nsw i64 [[TMP2]], 1
+; CHECK-NEXT:    [[TMP3:%.*]] = shl nsw i64 [[INDEX]], 1
 ; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP3]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = load i64, ptr [[TMP4]], align 8
 ; CHECK-NEXT:    store i64 [[TMP5]], ptr [[TMP4]], align 8
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-multi-block.ll b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-multi-block.ll
index 99c735f777b66..8c34e6815f843 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-multi-block.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-multi-block.ll
@@ -152,9 +152,7 @@ define void @load_store_interleave_group_block_var_cond(ptr noalias %data, ptr %
 ; VF2IC1-NEXT:    [[TMP4:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0
 ; VF2IC1-NEXT:    br i1 [[TMP4]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
 ; VF2IC1:       [[PRED_STORE_IF]]:
-; VF2IC1-NEXT:    [[TMP5:%.*]] = add i64 [[INDEX]], 0
-; VF2IC1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[MASKS]], i64 [[TMP5]]
-; VF2IC1-NEXT:    store i8 1, ptr [[TMP6]], align 1
+; VF2IC1-NEXT:    store i8 1, ptr [[TMP2]], align 1
 ; VF2IC1-NEXT:    br label %[[PRED_STORE_CONTINUE]]
 ; VF2IC1:       [[PRED_STORE_CONTINUE]]:
 ; VF2IC1-NEXT:    [[TMP7:%.*]] = extractelement <2 x i1> [[TMP3]], i32 1
@@ -207,9 +205,7 @@ define void @load_store_interleave_group_block_var_cond(ptr noalias %data, ptr %
 ; VF2IC2-NEXT:    [[TMP11:%.*]] = extractelement <2 x i1> [[TMP9]], i32 0
 ; VF2IC2-NEXT:    br i1 [[TMP11]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
 ; VF2IC2:       [[PRED_STORE_IF]]:
-; VF2IC2-NEXT:    [[TMP12:%.*]] = add i64 [[INDEX]], 0
-; VF2IC2-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[MASKS]], i64 [[TMP12]]
-; VF2IC2-NEXT:    store i8 1, ptr [[TMP13]], align 1
+; VF2IC2-NEXT:    store i8 1, ptr [[TMP7]], align 1
 ; VF2IC2-NEXT:    br label %[[PRED_STORE_CONTINUE]]
 ; VF2IC2:       [[PRED_STORE_CONTINUE]]:
 ; VF2IC2-NEXT:    [[TMP14:%.*]] = extractelement <2 x i1> [[TMP9]], i32 1
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-remove-loop-region.ll b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-remove-loop-region.ll
index 7d77f2f6b5b9c..73fd53011e956 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-remove-loop-region.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-remove-loop-region.ll
@@ -35,12 +35,9 @@ define void @load_store_interleave_group_tc_2(ptr noalias %data) {
 ; VF4:       [[VECTOR_BODY]]:
 ; VF4-NEXT:    br i1 true, label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
 ; VF4:       [[PRED_STORE_IF]]:
-; VF4-NEXT:    [[TMP3:%.*]] = shl nsw i64 0, 1
-; VF4-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP3]]
-; VF4-NEXT:    [[TMP5:%.*]] = load i64, ptr [[TMP4]], align 8
-; VF4-NEXT:    store i64 [[TMP5]], ptr [[TMP4]], align 8
-; VF4-NEXT:    [[TMP6:%.*]] = or disjoint i64 [[TMP3]], 1
-; VF4-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP6]]
+; VF4-NEXT:    [[TMP0:%.*]] = load i64, ptr [[DATA]], align 8
+; VF4-NEXT:    store i64 [[TMP0]], ptr [[DATA]], align 8
+; VF4-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 1
 ; VF4-NEXT:    [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8
 ; VF4-NEXT:    store i64 [[TMP8]], ptr [[TMP7]], align 8
 ; VF4-NEXT:    br label %[[PRED_STORE_CONTINUE]]
diff --git a/llvm/test/Transforms/LoopVectorize/SystemZ/force-target-instruction-cost.ll b/llvm/test/Transforms/LoopVectorize/SystemZ/force-target-instruction-cost.ll
index 78c71fd3beb89..fab2da95f7f1d 100644
--- a/llvm/test/Transforms/LoopVectorize/SystemZ/force-target-instruction-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/SystemZ/force-target-instruction-cost.ll
@@ -20,9 +20,8 @@ define void @test_scalar_steps_target_instruction_cost(ptr %dst) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
 ; CHECK:       [[PRED_STORE_IF]]:
-; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 0
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[TMP2]]
-; CHECK-NEXT:    store i64 [[TMP2]], ptr [[TMP3]], align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[OFFSET_IDX]]
+; CHECK-NEXT:    store i64 [[OFFSET_IDX]], ptr [[TMP2]], align 8
 ; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE]]
 ; CHECK:       [[PRED_STORE_CONTINUE]]:
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x i1> [[TMP0]], i32 1
diff --git a/llvm/test/Transforms/LoopVectorize/VPlan/interleave-and-scalarize-only.ll b/llvm/test/Transforms/LoopVectorize/VPlan/interleave-and-scalarize-only.ll
index b3050475d4bc4..0a17319c04d35 100644
--- a/llvm/test/Transforms/LoopVectorize/VPlan/interleave-and-scalarize-only.ll
+++ b/llvm/test/Transforms/LoopVectorize/VPlan/interleave-and-scalarize-only.ll
@@ -136,9 +136,8 @@ define void @test_scalarize_with_branch_cond(ptr %src, ptr %dst) {
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %pred.store.continue4 ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = trunc i64 [[INDEX]] to i1
 ; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = sub i1 false, [[TMP0]]
-; CHECK-NEXT:    [[INDUCTION:%.*]] = add i1 [[OFFSET_IDX]], false
 ; CHECK-NEXT:    [[INDUCTION3:%.*]] = add i1 [[OFFSET_IDX]], true
-; CHECK-NEXT:    br i1 [[INDUCTION]], label %pred.store.if, label %pred.store.continue
+; CHECK-NEXT:    br i1 [[OFFSET_IDX]], label %pred.store.if, label %pred.store.continue
 ; CHECK:       pred.store.if:
 ; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr %src, i64 [[INDEX]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
diff --git a/llvm/test/Transforms/LoopVectorize/VPlan/vplan-predicate-switch.ll b/llvm/test/Transforms/LoopVectorize/VPlan/vplan-predicate-switch.ll
index 63b0da8101b9d..7dc5b131e4075 100644
--- a/llvm/test/Transforms/LoopVectorize/VPlan/vplan-predicate-switch.ll
+++ b/llvm/test/Transforms/LoopVectorize/VPlan/vplan-predicate-switch.ll
@@ -19,67 +19,69 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) {
 ; CHECK-NEXT: Successor(s): vector.body
 ; CHECK-EMPTY:
 ; CHECK-NEXT: vector.body:
-; CHECK-NEXT:   EMIT-SCALAR vp<[[CAN_IV:%.+]]> = phi [ ir<0>, vector.ph ], [ vp<[[CAN_IV_NEXT:%.+]]>, default.2 ]
+; CHECK-NEXT:   EMIT-SCALAR vp<[[CAN_IV:%.+]]> = phi [ ir<0>, vector.ph ], [ vp<[[CAN_IV_NEXT:%.+]]>, pred.store.continue{{.*}} ]
 ; CHECK-NEXT:   vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, ir<2>, ir<1>
 ; CHECK-NEXT:   EMIT vp<[[PTR:%.+]]> = ptradd ir<%start>, vp<[[CAN_IV]]>
 ; CHECK-NEXT:   EMIT vp<[[PTR]]>.1 = ptradd ir<%start>, vp<[[STEPS]]>
-; CHECK-NEXT:   EMIT vp<[[PTR_VEC:%.+]]> = buildvector vp<[[PTR]]>, vp<[[PTR]]>.1
 ; CHECK-NEXT:   WIDEN ir<%l> = load vp<[[PTR]]>
 ; CHECK-NEXT:   EMIT vp<[[C1:%.+]]> = icmp eq ir<%l>, ir<-12>
 ; CHECK-NEXT:   EMIT vp<[[C2:%.+]]> = icmp eq ir<%l>, ir<13>
 ; CHECK-NEXT:   EMIT vp<[[OR_CASES:%.+]]> = or vp<[[C1]]>, vp<[[C2]]>
 ; CHECK-NEXT:   EMIT vp<[[DEFAULT_MASK:%.+]]> = not vp<[[OR_CASES]]>
-; CHECK-NEXT: Successor(s): pred.store
-; CHECK-EMPTY:
-; CHECK-NEXT: <xVFxUF> pred.store: {
-; CHECK-NEXT:   pred.store.entry:
-; CHECK-NEXT:     BRANCH-ON-MASK vp<[[C2]]>
-; CHECK-NEXT:   Successor(s): pred.store.if, pred.store.continue
-; CHECK-EMPTY:
-; CHECK-NEXT:   pred.store.if:
-; CHECK-NEXT:     REPLICATE store ir<0>, vp<[[PTR_VEC]]>
-; CHECK-NEXT:   Successor(s): pred.store.continue
-; CHECK-EMPTY:
-; CHECK-NEXT:   pred.store.continue:
-; CHECK-NEXT:   No successors
-; CHECK-NEXT: }
-; CHECK-NEXT: Successor(s): if.then.2.0
-; CHECK-EMPTY:
-; CHECK-NEXT: if.then.2.0:
-; CHECK-NEXT: Successor(s): pred.store
-; CHECK-EMPTY:
-; CHECK-NEXT: <xVFxUF> pred.store: {
-; CHECK-NEXT:   pred.store.entry:
-; CHECK-NEXT:     BRANCH-ON-MASK vp<[[C1]]>
-; CHECK-NEXT:   Successor(s): pred.store.if, pred.store.continue
-; CHECK-EMPTY:
-; CHECK-NEXT:     pred.store.if:
-; CHECK-NEXT:     REPLICATE store ir<42>, vp<[[PTR_VEC]]>
-; CHECK-NEXT:   Successor(s): pred.store.continue
-; CHECK-EMPTY:
-; CHECK-NEXT:   pred.store.continue:
-; CHECK-NEXT:   No successors
-; CHECK-NEXT: }
-; CHECK-NEXT: Successor(s): if.then.1.1
-; CHECK-EMPTY:
-; CHECK-NEXT: if.then.1.1:
-; CHECK-NEXT: Successor(s): pred.store
-; CHECK-EMPTY:
-; CHECK-NEXT: <xVFxUF> pred.store: {
-; CHECK-NEXT:   pred.store.entry:
-; CHECK-NEXT:     BRANCH-ON-MASK vp<[[DEFAULT_MASK]]>
-; CHECK-NEXT:   Successor(s): pred.store.if, pred.store.continue
-; CHECK-EMPTY:
-; CHECK-NEXT:   pred.store.if:
-; CHECK-NEXT:     REPLICATE store ir<2>, vp<[[PTR_VEC]]>
-; CHECK-NEXT:   Successor(s): pred.store.continue
-; CHECK-EMPTY:
-; CHECK-NEXT:   pred.store.continue:
-; CHECK-NEXT:   No successors
-; CHECK-NEXT: }
-; CHECK-NEXT: Successor(s): default.2
-; CHECK-EMPTY:
-; CHECK-NEXT: default.2:
+; CHECK-NEXT:   EMIT vp<[[C2_LANE0:%.+]]> = extractelement vp<[[C2]]>, ir<0>
+; CHECK-NEXT:   EMIT branch-on-cond vp<[[C2_LANE0]]>
+; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue
+; CHECK-EMPTY:
+; CHECK-NEXT: pred.store.if:
+; CHECK-NEXT:   CLONE store ir<0>, vp<[[PTR]]>
+; CHECK-NEXT: Successor(s): pred.store.continue
+; CHECK-EMPTY:
+; CHECK-NEXT: pred.store.continue:
+; CHECK-NEXT:   EMIT vp<[[C2_LANE1:%.+]]> = extractelement vp<[[C2]]>, ir<1>
+; CHECK-NEXT:   EMIT branch-on-cond vp<[[C2_LANE1]]>
+; CHECK-NEXT: Successor(s): pred.store.if{{.*}}, pred.store.continue{{.*}}
+; CHECK-EMPTY:
+; CHECK-NEXT: pred.store.if{{.*}}:
+; CHECK-NEXT:   CLONE store ir<0>, vp<[[PTR]]>.1
+; CHECK-NEXT: Successor(s): pred.store.continue{{.*}}
+; CHECK-EMPTY:
+; CHECK-NEXT: pred.store.continue{{.*}}:
+; CHECK-NEXT:   EMIT vp<[[C1_LANE0:%.+]]> = extractelement vp<[[C1]]>, ir<0>
+; CHECK-NEXT:   EMIT branch-on-cond vp<[[C1_LANE0]]>
+; CHECK-NEXT: Successor(s): pred.store.if{{.*}}, pred.store.continue{{.*}}
+; CHECK-EMPTY:
+; CHECK-NEXT: pred.store.if{{.*}}:
+; CHECK-NEXT:   CLONE store ir<42>, vp<[[PTR]]>
+; CHECK-NEXT: Successor(s): pred.store.continue{{.*}}
+; CHECK-EMPTY:
+; CHECK-NEXT: pred.store.continue{{.*}}:
+; CHECK-NEXT:   EMIT vp<[[C1_LANE1:%.+]]> = extractelement vp<[[C1]]>, ir<1>
+; CHECK-NEXT:   EMIT branch-on-cond vp<[[C1_LANE1]]>
+; CHECK-NEXT: Successor(s): pred.store.if{{.*}}, pred.store.continue{{.*}}
+; CHECK-EMPTY:
+; CHECK-NEXT: pred.store.if{{.*}}:
+; CHECK-NEXT:   CLONE store ir<42>, vp<[[PTR]]>.1
+; CHECK-NEXT: Successor(s): pred.store.continue{{.*}}
+; CHECK-EMPTY:
+; CHECK-NEXT: pred.store.continue{{.*}}:
+; CHECK-NEXT:   EMIT vp<[[DEFAULT_MASK_LANE0:%.+]]> = extractelement vp<[[DEFAULT_MASK]]>, ir<0>
+; CHECK-NEXT:   EMIT branch-on-cond vp<[[DEFAULT_MASK_LANE0]]>
+; CHECK-NEXT: Successor(s): pred.store.if{{.*}}, pred.store.continue{{.*}}
+; CHECK-EMPTY:
+; CHECK-NEXT: pred.store.if{{.*}}:
+; CHECK-NEXT:   CLONE store ir<2>, vp<[[PTR]]>
+; CHECK-NEXT: Successor(s): pred.store.continue{{.*}}
+; CHECK-EMPTY:
+; CHECK-NEXT: pred.store.continue{{.*}}:
+; CHECK-NEXT:   EMIT vp<[[DEFAULT_MASK_LANE1:%.+]]> = extractelement vp<[[DEFAULT_MASK]]>, ir<1>
+; CHECK-NEXT:   EMIT branch-on-cond vp<[[DEFAULT_MASK_LANE1]]>
+; CHECK-NEXT: Successor(s): pred.store.if{{.*}}, pred.store.continue{{.*}}
+; CHECK-EMPTY:
+; CHECK-NEXT: pred.store.if{{.*}}:
+; CHECK-NEXT:   CLONE store ir<2>, vp<[[PTR]]>.1
+; CHECK-NEXT: Successor(s): pred.store.continue{{.*}}
+; CHECK-EMPTY:
+; CHECK-NEXT: pred.store.continue{{.*}}:
 ; CHECK-NEXT:   EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, ir<2>
 ; CHECK-NEXT:   EMIT vp<{{%.+}}> = icmp eq vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]>
 ; CHECK-NEXT:   EMIT branch-on-cond vp<{{%.+}}>
diff --git a/llvm/test/Transforms/LoopVectorize/VPlan/vplan-print-after-all.ll b/llvm/test/Transforms/LoopVectorize/VPlan/vplan-print-after-all.ll
index cba0948478955..83ff8c53bef69 100644
--- a/llvm/test/Transforms/LoopVectorize/VPlan/vplan-print-after-all.ll
+++ b/llvm/test/Transforms/LoopVectorize/VPlan/vplan-print-after-all.ll
@@ -41,6 +41,8 @@
 ; CHECK: VPlan for loop in 'foo' after VPlanTransforms::materializePacksAndUnpacks
 ; CHECK: VPlan for loop in 'foo' after VPlanTransforms::materializeBroadcasts
 ; CHECK: VPlan for loop in 'foo' after VPlanTransforms::replicateByVF
+; CHECK: VPlan for loop in 'foo' after VPlanTransforms::unrollReplicateRegions
+; CHECK: VPlan for loop in 'foo' after VPlanTransforms::mergeBlocksIntoPredecessors
 ; CHECK: VPlan for loop in 'foo' after printFinalVPlan
 
 ; Also verify that VPlans are actually printed (we aren't interested in the
diff --git a/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll b/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll
index 34377454c0cc3..3576977c2f2ad 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll
@@ -191,24 +191,23 @@ define void @PR40816() #1 {
 ; FORCE:       [[VECTOR_PH]]:
 ; FORCE-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; FORCE:       [[VECTOR_BODY]]:
-; FORCE-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE2:.*]] ]
+; FORCE-NEXT:    [[TMP0:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE2:.*]] ]
 ; FORCE-NEXT:    [[VEC_IND:%.*]] = phi <2 x i8> [ <i8 0, i8 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE2]] ]
 ; FORCE-NEXT:    [[TMP2:%.*]] = icmp ule <2 x i8> [[VEC_IND]], splat (i8 2)
 ; FORCE-NEXT:    [[TMP3:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0
 ; FORCE-NEXT:    br i1 [[TMP3]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
 ; FORCE:       [[PRED_STORE_IF]]:
-; FORCE-NEXT:    [[TMP0:%.*]] = add i32 [[INDEX]], 0
 ; FORCE-NEXT:    store i32 [[TMP0]], ptr @b, align 1
 ; FORCE-NEXT:    br label %[[PRED_STORE_CONTINUE]]
 ; FORCE:       [[PRED_STORE_CONTINUE]]:
 ; FORCE-NEXT:    [[TMP10:%.*]] = extractelement <2 x i1> [[TMP2]], i32 1
 ; FORCE-NEXT:    br i1 [[TMP10]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2]]
 ; FORCE:       [[PRED_STORE_IF1]]:
-; FORCE-NEXT:    [[TMP1:%.*]] = add i32 [[INDEX]], 1
+; FORCE-NEXT:    [[TMP1:%.*]] = add i32 [[TMP0]], 1
 ; FORCE-NEXT:    store i32 [[TMP1]], ptr @b, align 1
 ; FORCE-NEXT:    br label %[[PRED_STORE_CONTINUE2]]
 ; FORCE:       [[PRED_STORE_CONTINUE2]]:
-; FORCE-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
+; FORCE-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[TMP0]], 2
 ; FORCE-NEXT:    [[VEC_IND_NEXT]] = add <2 x i8> [[VEC_IND]], splat (i8 2)
 ; FORCE-NEXT:    [[TMP15:%.*]] = icmp eq i32 [[INDEX_NEXT]], 4
 ; FORCE-NEXT:    br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
diff --git a/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll b/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll
index f65a9d7d45ed8..972a473e0171f 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll
@@ -63,8 +63,7 @@ define void @redundant_or_1(ptr %dst, i1 %c.0, i1 %c.1) {
 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x i1> [[TMP2]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP6]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; CHECK:       pred.store.if:
-; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i32 0
-; CHECK-NEXT:    store i32 0, ptr [[TMP8]], align 4
+; CHECK-NEXT:    store i32 0, ptr [[DST:%.*]], align 4
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE]]
 ; CHECK:       pred.store.continue:
 ; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <4 x i1> [[TMP2]], i32 1
@@ -138,8 +137,7 @@ define void @redundant_or_2(ptr %dst, i1 %c.0, i1 %c.1) {
 ; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x i1> [[TMP3]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP5]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; CHECK:       pred.store.if:
-; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i32 0
-; CHECK-NEXT:    store i32 0, ptr [[TMP7]], align 4
+; CHECK-NEXT:    store i32 0, ptr [[DST:%.*]], align 4
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE]]
 ; CHECK:       pred.store.continue:
 ; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <4 x i1> [[TMP3]], i32 1
@@ -214,8 +212,7 @@ define void @redundant_and_1(ptr %dst, i1 %c.0, i1 %c.1) {
 ; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <4 x i1> [[TMP6]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP7]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; CHECK:       pred.store.if:
-; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i32 0
-; CHECK-NEXT:    store i32 0, ptr [[TMP9]], align 4
+; CHECK-NEXT:    store i32 0, ptr [[DST:%.*]], align 4
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE]]
 ; CHECK:       pred.store.continue:
 ; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <4 x i1> [[TMP6]], i32 1
diff --git a/llvm/test/Transforms/LoopVectorize/X86/cost-conditional-branches.ll b/llvm/test/Transforms/LoopVectorize/X86/cost-conditional-branches.ll
index 40eee658f3421..00a6818918f05 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/cost-conditional-branches.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/cost-conditional-branches.ll
@@ -414,7 +414,7 @@ define void @cost_duplicate_recipe_for_sinking(ptr %A, i64 %N) #2 {
 ; CHECK-NEXT:    [[TMP8:%.*]] = shl nsw i64 [[TMP4]], 2
 ; CHECK-NEXT:    [[TMP9:%.*]] = shl nsw i64 [[TMP5]], 2
 ; CHECK-NEXT:    [[TMP10:%.*]] = shl nsw i64 [[TMP6]], 2
-; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr nusw double, ptr [[A:%.*]], i64 [[TMP7]]
+; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr double, ptr [[A:%.*]], i64 [[TMP7]]
 ; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr nusw double, ptr [[A]], i64 [[TMP8]]
 ; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr nusw double, ptr [[A]], i64 [[TMP9]]
 ; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr nusw double, ptr [[A]], i64 [[TMP10]]
@@ -433,10 +433,7 @@ define void @cost_duplicate_recipe_for_sinking(ptr %A, i64 %N) #2 {
 ; CHECK-NEXT:    [[TMP23:%.*]] = extractelement <4 x i1> [[TMP19]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP23]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; CHECK:       pred.store.if:
-; CHECK-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT:    [[TMP24:%.*]] = shl nsw i64 [[TMP3]], 2
-; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr double, ptr [[A]], i64 [[TMP24]]
-; CHECK-NEXT:    store double 0.000000e+00, ptr [[TMP25]], align 8
+; CHECK-NEXT:    store double 0.000000e+00, ptr [[TMP11]], align 8
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE]]
 ; CHECK:       pred.store.continue:
 ; CHECK-NEXT:    [[TMP26:%.*]] = extractelement <4 x i1> [[TMP19]], i32 1
@@ -592,17 +589,14 @@ define void @cost_duplicate_recipe_for_sinking(ptr %A, i64 %N) #2 {
 ; CHECK:       vec.epilog.vector.body:
 ; CHECK-NEXT:    [[INDEX40:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL1]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT51:%.*]], [[PRED_STORE_CONTINUE50:%.*]] ]
 ; CHECK-NEXT:    [[TMP87:%.*]] = shl nsw i64 [[INDEX40]], 2
-; CHECK-NEXT:    [[TMP89:%.*]] = getelementptr nusw double, ptr [[A]], i64 [[TMP87]]
+; CHECK-NEXT:    [[TMP89:%.*]] = getelementptr double, ptr [[A]], i64 [[TMP87]]
 ; CHECK-NEXT:    [[WIDE_VEC41:%.*]] = load <16 x double>, ptr [[TMP89]], align 8
 ; CHECK-NEXT:    [[STRIDED_VEC42:%.*]] = shufflevector <16 x double> [[WIDE_VEC41]], <16 x double> poison, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
 ; CHECK-NEXT:    [[TMP90:%.*]] = fcmp oeq <4 x double> [[STRIDED_VEC42]], zeroinitializer
 ; CHECK-NEXT:    [[TMP91:%.*]] = extractelement <4 x i1> [[TMP90]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP91]], label [[PRED_STORE_IF43:%.*]], label [[PRED_STORE_CONTINUE44:%.*]]
 ; CHECK:       pred.store.if43:
-; CHECK-NEXT:    [[TMP86:%.*]] = add i64 [[INDEX40]], 0
-; CHECK-NEXT:    [[TMP92:%.*]] = shl nsw i64 [[TMP86]], 2
-; CHECK-NEXT:    [[TMP93:%.*]] = getelementptr double, ptr [[A]], i64 [[TMP92]]
-; CHECK-NEXT:    store double 0.000000e+00, ptr [[TMP93]], align 8
+; CHECK-NEXT:    store double 0.000000e+00, ptr [[TMP89]], align 8
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE44]]
 ; CHECK:       pred.store.continue44:
 ; CHECK-NEXT:    [[TMP94:%.*]] = extractelement <4 x i1> [[TMP90]], i32 1
diff --git a/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll b/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll
index aff665dad85a7..de2b1d1eedb81 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll
@@ -152,8 +152,6 @@ define void @foo2(ptr noalias %in, ptr noalias %out, ptr noalias %trigger, ptr n
 ; FVW2-NEXT:    [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 16>, [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE3]] ]
 ; FVW2-NEXT:    [[OFFSET_IDX:%.*]] = mul i64 [[INDEX1]], 16
 ; FVW2-NEXT:    [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 16
-; FVW2-NEXT:    [[TMP18:%.*]] = insertelement <2 x i64> poison, i64 [[OFFSET_IDX]], i32 0
-; FVW2-NEXT:    [[TMP19:%.*]] = insertelement <2 x i64> [[TMP18]], i64 [[TMP1]], i32 1
 ; FVW2-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER:%.*]], i64 [[OFFSET_IDX]]
 ; FVW2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[TMP1]]
 ; FVW2-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4
@@ -268,8 +266,6 @@ define void @foo3(ptr noalias %in, ptr noalias %out, ptr noalias %trigger) {
 ; FVW2-NEXT:    [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 16>, [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE2]] ]
 ; FVW2-NEXT:    [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 16
 ; FVW2-NEXT:    [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 16
-; FVW2-NEXT:    [[TMP18:%.*]] = insertelement <2 x i64> poison, i64 [[OFFSET_IDX]], i32 0
-; FVW2-NEXT:    [[TMP19:%.*]] = insertelement <2 x i64> [[TMP18]], i64 [[TMP1]], i32 1
 ; FVW2-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER:%.*]], i64 [[OFFSET_IDX]]
 ; FVW2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[TMP1]]
 ; FVW2-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4
@@ -371,8 +367,6 @@ define void @foo2_addrspace(ptr addrspace(1) noalias %in, ptr addrspace(1) noali
 ; FVW2-NEXT:    [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 16>, [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE3]] ]
 ; FVW2-NEXT:    [[OFFSET_IDX:%.*]] = mul i64 [[INDEX1]], 16
 ; FVW2-NEXT:    [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 16
-; FVW2-NEXT:    [[TMP18:%.*]] = insertelement <2 x i64> poison, i64 [[OFFSET_IDX]], i32 0
-; FVW2-NEXT:    [[TMP19:%.*]] = insertelement <2 x i64> [[TMP18]], i64 [[TMP1]], i32 1
 ; FVW2-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER:%.*]], i64 [[OFFSET_IDX]]
 ; FVW2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[TMP1]]
 ; FVW2-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4
@@ -473,8 +467,6 @@ define void @foo2_addrspace2(ptr addrspace(1) noalias %in, ptr addrspace(0) noal
 ; FVW2-NEXT:    [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 16>, [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE3]] ]
 ; FVW2-NEXT:    [[OFFSET_IDX:%.*]] = mul i64 [[INDEX1]], 16
 ; FVW2-NEXT:    [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 16
-; FVW2-NEXT:    [[TMP18:%.*]] = insertelement <2 x i64> poison, i64 [[OFFSET_IDX]], i32 0
-; FVW2-NEXT:    [[TMP19:%.*]] = insertelement <2 x i64> [[TMP18]], i64 [[TMP1]], i32 1
 ; FVW2-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER:%.*]], i64 [[OFFSET_IDX]]
 ; FVW2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[TMP1]]
 ; FVW2-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4
@@ -575,8 +567,6 @@ define void @foo2_addrspace3(ptr addrspace(0) noalias %in, ptr addrspace(1) noal
 ; FVW2-NEXT:    [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 16>, [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE3]] ]
 ; FVW2-NEXT:    [[OFFSET_IDX:%.*]] = mul i64 [[INDEX1]], 16
 ; FVW2-NEXT:    [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 16
-; FVW2-NEXT:    [[TMP18:%.*]] = insertelement <2 x i64> poison, i64 [[OFFSET_IDX]], i32 0
-; FVW2-NEXT:    [[TMP19:%.*]] = insertelement <2 x i64> [[TMP18]], i64 [[TMP1]], i32 1
 ; FVW2-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER:%.*]], i64 [[OFFSET_IDX]]
 ; FVW2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[TMP1]]
 ; FVW2-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4
diff --git a/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll b/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll
index 79538f7f2c68c..5f81ca53583c0 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll
@@ -584,8 +584,7 @@ define void @wide_iv_trunc(ptr %dst, i64 %N) {
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x i1> [[TMP2]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP3]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; CHECK:       pred.store.if:
-; CHECK-NEXT:    [[TMP4:%.*]] = add i32 [[TMP1]], 0
-; CHECK-NEXT:    store i32 [[TMP4]], ptr [[DST]], align 4
+; CHECK-NEXT:    store i32 [[TMP1]], ptr [[DST]], align 4
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE]]
 ; CHECK:       pred.store.continue:
 ; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x i1> [[TMP2]], i32 1
diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr109581-unused-blend.ll b/llvm/test/Transforms/LoopVectorize/X86/pr109581-unused-blend.ll
index cbf62865cdce5..36ad765993588 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/pr109581-unused-blend.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/pr109581-unused-blend.ll
@@ -28,17 +28,17 @@ define i32 @unused_blend_after_unrolling(ptr %p, i32 %a, i1 %c.1, i16 %x, i16 %y
 ; CHECK:       [[PRED_SDIV_IF]]:
 ; CHECK-NEXT:    br label %[[PRED_SDIV_CONTINUE]]
 ; CHECK:       [[PRED_SDIV_CONTINUE]]:
-; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP3]], label %[[PRED_SDIV_IF4:.*]], label %[[PRED_SDIV_CONTINUE5:.*]]
 ; CHECK:       [[PRED_SDIV_IF4]]:
 ; CHECK-NEXT:    br label %[[PRED_SDIV_CONTINUE5]]
 ; CHECK:       [[PRED_SDIV_CONTINUE5]]:
-; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x i1> [[TMP0]], i32 2
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP4]], label %[[PRED_SDIV_IF6:.*]], label %[[PRED_SDIV_CONTINUE7:.*]]
 ; CHECK:       [[PRED_SDIV_IF6]]:
 ; CHECK-NEXT:    br label %[[PRED_SDIV_CONTINUE7]]
 ; CHECK:       [[PRED_SDIV_CONTINUE7]]:
-; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP5]], label %[[PRED_SDIV_IF8:.*]], label %[[PRED_SDIV_CONTINUE9:.*]]
 ; CHECK:       [[PRED_SDIV_IF8]]:
 ; CHECK-NEXT:    br label %[[PRED_SDIV_CONTINUE9]]
diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr55096-scalarize-add.ll b/llvm/test/Transforms/LoopVectorize/X86/pr55096-scalarize-add.ll
index fb5a5f9c068b9..7e08ed43a2432 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/pr55096-scalarize-add.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/pr55096-scalarize-add.ll
@@ -18,8 +18,7 @@ define void @test_pr55096(i64 %c, ptr %p) {
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP3]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; CHECK:       pred.store.if:
-; CHECK-NEXT:    [[TMP4:%.*]] = add i16 [[OFFSET_IDX]], 0
-; CHECK-NEXT:    [[TMP5:%.*]] = add i16 [[TMP4]], 2008
+; CHECK-NEXT:    [[TMP5:%.*]] = add i16 [[OFFSET_IDX]], 2008
 ; CHECK-NEXT:    [[TMP6:%.*]] = udiv i16 4943, [[TMP5]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i16, ptr [[P]], i16 [[TMP6]]
 ; CHECK-NEXT:    store i16 0, ptr [[TMP7]], align 2
diff --git a/llvm/test/Transforms/LoopVectorize/X86/small-size.ll b/llvm/test/Transforms/LoopVectorize/X86/small-size.ll
index 0abea541158ac..eb9e864247303 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/small-size.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/small-size.ll
@@ -182,7 +182,7 @@ define void @example2(i32 %n, i32 %x) optsize {
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE24]]
 ; CHECK:       pred.store.continue23:
 ; CHECK-NEXT:    [[TMP42:%.*]] = extractelement <4 x i1> [[TMP18]], i64 3
-; CHECK-NEXT:    br i1 [[TMP42]], label [[PRED_STORE_IF25:%.*]], label [[PRED_STORE_CONTINUE26]]
+; CHECK-NEXT:    br i1 [[TMP42]], label [[PRED_STORE_IF24:%.*]], label [[PRED_STORE_CONTINUE26]]
 ; CHECK:       pred.store.if24:
 ; CHECK-NEXT:    [[TMP43:%.*]] = add i64 [[OFFSET_IDX]], 3
 ; CHECK-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [4 x i8], ptr @b, i64 [[TMP43]]
diff --git a/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll b/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll
index b5d42a8f71430..d6b6541afb293 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll
@@ -1399,8 +1399,7 @@ define dso_local void @masked_strided2(ptr noalias nocapture readonly %p, ptr no
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE44]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue44:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP132:%.*]] = sub <8 x i8> zeroinitializer, [[TMP99]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP133:%.*]] = extractelement <8 x i1> [[TMP0]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP133]], label [[PRED_STORE_IF45:%.*]], label [[PRED_STORE_CONTINUE46:%.*]]
+; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP100]], label [[PRED_STORE_IF45:%.*]], label [[PRED_STORE_CONTINUE46:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if45:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP134:%.*]] = extractelement <8 x i32> [[TMP50]], i64 0
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP135:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP134]]
@@ -1408,8 +1407,7 @@ define dso_local void @masked_strided2(ptr noalias nocapture readonly %p, ptr no
 ; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP136]], ptr [[TMP135]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE46]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue46:
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP137:%.*]] = extractelement <8 x i1> [[TMP0]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP137]], label [[PRED_STORE_IF47:%.*]], label [[PRED_STORE_CONTINUE48:%.*]]
+; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP104]], label [[PRED_STORE_IF47:%.*]], label [[PRED_STORE_CONTINUE48:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if47:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP138:%.*]] = extractelement <8 x i32> [[TMP50]], i64 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP139:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP138]]
@@ -1417,8 +1415,7 @@ define dso_local void @masked_strided2(ptr noalias nocapture readonly %p, ptr no
 ; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP140]], ptr [[TMP139]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE48]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue48:
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP141:%.*]] = extractelement <8 x i1> [[TMP0]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP141]], label [[PRED_STORE_IF49:%.*]], label [[PRED_STORE_CONTINUE50:%.*]]
+; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP108]], label [[PRED_STORE_IF49:%.*]], label [[PRED_STORE_CONTINUE50:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if49:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP142:%.*]] = extractelement <8 x i32> [[TMP50]], i64 2
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP143:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP142]]
@@ -1426,8 +1423,7 @@ define dso_local void @masked_strided2(ptr noalias nocapture readonly %p, ptr no
 ; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP144]], ptr [[TMP143]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE50]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue50:
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP145:%.*]] = extractelement <8 x i1> [[TMP0]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP145]], label [[PRED_STORE_IF51:%.*]], label [[PRED_STORE_CONTINUE52:%.*]]
+; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP112]], label [[PRED_STORE_IF51:%.*]], label [[PRED_STORE_CONTINUE52:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if51:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP146:%.*]] = extractelement <8 x i32> [[TMP50]], i64 3
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP147:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP146]]
@@ -1435,8 +1431,7 @@ define dso_local void @masked_strided2(ptr noalias nocapture readonly %p, ptr no
 ; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP148]], ptr [[TMP147]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE52]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue52:
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP149:%.*]] = extractelement <8 x i1> [[TMP0]], i64 4
-; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP149]], label [[PRED_STORE_IF53:%.*]], label [[PRED_STORE_CONTINUE54:%.*]]
+; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP116]], label [[PRED_STORE_IF53:%.*]], label [[PRED_STORE_CONTINUE54:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if53:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP150:%.*]] = extractelement <8 x i32> [[TMP50]], i64 4
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP151:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP150]]
@@ -1444,8 +1439,7 @@ define dso_local void @masked_strided2(ptr noalias nocapture readonly %p, ptr no
 ; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP152]], ptr [[TMP151]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE54]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue54:
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP153:%.*]] = extractelement <8 x i1> [[TMP0]], i64 5
-; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP153]], label [[PRED_STORE_IF55:%.*]], label [[PRED_STORE_CONTINUE56:%.*]]
+; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP120]], label [[PRED_STORE_IF55:%.*]], label [[PRED_STORE_CONTINUE56:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if55:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP154:%.*]] = extractelement <8 x i32> [[TMP50]], i64 5
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP155:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP154]]
@@ -1453,8 +1447,7 @@ define dso_local void @masked_strided2(ptr noalias nocapture readonly %p, ptr no
 ; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP156]], ptr [[TMP155]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE56]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue56:
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP157:%.*]] = extractelement <8 x i1> [[TMP0]], i64 6
-; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP157]], label [[PRED_STORE_IF57:%.*]], label [[PRED_STORE_CONTINUE58:%.*]]
+; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP124]], label [[PRED_STORE_IF57:%.*]], label [[PRED_STORE_CONTINUE58:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if57:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP158:%.*]] = extractelement <8 x i32> [[TMP50]], i64 6
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP159:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP158]]
@@ -1462,8 +1455,7 @@ define dso_local void @masked_strided2(ptr noalias nocapture readonly %p, ptr no
 ; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP160]], ptr [[TMP159]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE58]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue58:
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP161:%.*]] = extractelement <8 x i1> [[TMP0]], i64 7
-; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP161]], label [[PRED_STORE_IF59:%.*]], label [[PRED_STORE_CONTINUE60]]
+; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP128]], label [[PRED_STORE_IF59:%.*]], label [[PRED_STORE_CONTINUE60]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if59:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP162:%.*]] = extractelement <8 x i32> [[TMP50]], i64 7
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP163:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP162]]
@@ -1804,8 +1796,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE44]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue44:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP132:%.*]] = sub <8 x i8> zeroinitializer, [[TMP99]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP133:%.*]] = extractelement <8 x i1> [[TMP0]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP133]], label [[PRED_STORE_IF45:%.*]], label [[PRED_STORE_CONTINUE46:%.*]]
+; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP100]], label [[PRED_STORE_IF45:%.*]], label [[PRED_STORE_CONTINUE46:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if45:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP134:%.*]] = extractelement <8 x i32> [[TMP50]], i64 0
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP135:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP134]]
@@ -1813,8 +1804,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP136]], ptr [[TMP135]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE46]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue46:
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP137:%.*]] = extractelement <8 x i1> [[TMP0]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP137]], label [[PRED_STORE_IF47:%.*]], label [[PRED_STORE_CONTINUE48:%.*]]
+; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP104]], label [[PRED_STORE_IF47:%.*]], label [[PRED_STORE_CONTINUE48:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if47:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP138:%.*]] = extractelement <8 x i32> [[TMP50]], i64 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP139:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP138]]
@@ -1822,8 +1812,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP140]], ptr [[TMP139]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE48]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue48:
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP141:%.*]] = extractelement <8 x i1> [[TMP0]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP141]], label [[PRED_STORE_IF49:%.*]], label [[PRED_STORE_CONTINUE50:%.*]]
+; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP108]], label [[PRED_STORE_IF49:%.*]], label [[PRED_STORE_CONTINUE50:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if49:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP142:%.*]] = extractelement <8 x i32> [[TMP50]], i64 2
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP143:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP142]]
@@ -1831,8 +1820,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP144]], ptr [[TMP143]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE50]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue50:
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP145:%.*]] = extractelement <8 x i1> [[TMP0]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP145]], label [[PRED_STORE_IF51:%.*]], label [[PRED_STORE_CONTINUE52:%.*]]
+; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP112]], label [[PRED_STORE_IF51:%.*]], label [[PRED_STORE_CONTINUE52:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if51:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP146:%.*]] = extractelement <8 x i32> [[TMP50]], i64 3
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP147:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP146]]
@@ -1840,8 +1828,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP148]], ptr [[TMP147]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE52]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue52:
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP149:%.*]] = extractelement <8 x i1> [[TMP0]], i64 4
-; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP149]], label [[PRED_STORE_IF53:%.*]], label [[PRED_STORE_CONTINUE54:%.*]]
+; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP116]], label [[PRED_STORE_IF53:%.*]], label [[PRED_STORE_CONTINUE54:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if53:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP150:%.*]] = extractelement <8 x i32> [[TMP50]], i64 4
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP151:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP150]]
@@ -1849,8 +1836,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP152]], ptr [[TMP151]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE54]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue54:
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP153:%.*]] = extractelement <8 x i1> [[TMP0]], i64 5
-; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP153]], label [[PRED_STORE_IF55:%.*]], label [[PRED_STORE_CONTINUE56:%.*]]
+; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP120]], label [[PRED_STORE_IF55:%.*]], label [[PRED_STORE_CONTINUE56:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if55:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP154:%.*]] = extractelement <8 x i32> [[TMP50]], i64 5
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP155:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP154]]
@@ -1858,8 +1844,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP156]], ptr [[TMP155]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE56]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue56:
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP157:%.*]] = extractelement <8 x i1> [[TMP0]], i64 6
-; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP157]], label [[PRED_STORE_IF57:%.*]], label [[PRED_STORE_CONTINUE58:%.*]]
+; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP124]], label [[PRED_STORE_IF57:%.*]], label [[PRED_STORE_CONTINUE58:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if57:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP158:%.*]] = extractelement <8 x i32> [[TMP50]], i64 6
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP159:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP158]]
@@ -1867,8 +1852,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP160]], ptr [[TMP159]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE58]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue58:
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP161:%.*]] = extractelement <8 x i1> [[TMP0]], i64 7
-; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP161]], label [[PRED_STORE_IF59:%.*]], label [[PRED_STORE_CONTINUE60]]
+; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP128]], label [[PRED_STORE_IF59:%.*]], label [[PRED_STORE_CONTINUE60]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if59:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP162:%.*]] = extractelement <8 x i32> [[TMP50]], i64 7
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP163:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP162]]
@@ -2129,8 +2113,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE44]]
 ; ENABLED_MASKED_STRIDED:       pred.store.continue44:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP132:%.*]] = sub <8 x i8> zeroinitializer, [[TMP99]]
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP133:%.*]] = extractelement <8 x i1> [[TMP0]], i64 0
-; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP133]], label [[PRED_STORE_IF45:%.*]], label [[PRED_STORE_CONTINUE46:%.*]]
+; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP100]], label [[PRED_STORE_IF45:%.*]], label [[PRED_STORE_CONTINUE46:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.store.if45:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP134:%.*]] = extractelement <8 x i32> [[TMP50]], i64 0
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP135:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP134]]
@@ -2138,8 +2121,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p
 ; ENABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP136]], ptr [[TMP135]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE46]]
 ; ENABLED_MASKED_STRIDED:       pred.store.continue46:
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP137:%.*]] = extractelement <8 x i1> [[TMP0]], i64 1
-; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP137]], label [[PRED_STORE_IF47:%.*]], label [[PRED_STORE_CONTINUE48:%.*]]
+; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP104]], label [[PRED_STORE_IF47:%.*]], label [[PRED_STORE_CONTINUE48:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.store.if47:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP138:%.*]] = extractelement <8 x i32> [[TMP50]], i64 1
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP139:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP138]]
@@ -2147,8 +2129,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p
 ; ENABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP140]], ptr [[TMP139]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE48]]
 ; ENABLED_MASKED_STRIDED:       pred.store.continue48:
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP141:%.*]] = extractelement <8 x i1> [[TMP0]], i64 2
-; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP141]], label [[PRED_STORE_IF49:%.*]], label [[PRED_STORE_CONTINUE50:%.*]]
+; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP108]], label [[PRED_STORE_IF49:%.*]], label [[PRED_STORE_CONTINUE50:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.store.if49:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP142:%.*]] = extractelement <8 x i32> [[TMP50]], i64 2
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP143:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP142]]
@@ -2156,8 +2137,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p
 ; ENABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP144]], ptr [[TMP143]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE50]]
 ; ENABLED_MASKED_STRIDED:       pred.store.continue50:
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP145:%.*]] = extractelement <8 x i1> [[TMP0]], i64 3
-; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP145]], label [[PRED_STORE_IF51:%.*]], label [[PRED_STORE_CONTINUE52:%.*]]
+; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP112]], label [[PRED_STORE_IF51:%.*]], label [[PRED_STORE_CONTINUE52:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.store.if51:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP146:%.*]] = extractelement <8 x i32> [[TMP50]], i64 3
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP147:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP146]]
@@ -2165,8 +2145,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p
 ; ENABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP148]], ptr [[TMP147]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE52]]
 ; ENABLED_MASKED_STRIDED:       pred.store.continue52:
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP149:%.*]] = extractelement <8 x i1> [[TMP0]], i64 4
-; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP149]], label [[PRED_STORE_IF53:%.*]], label [[PRED_STORE_CONTINUE54:%.*]]
+; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP116]], label [[PRED_STORE_IF53:%.*]], label [[PRED_STORE_CONTINUE54:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.store.if53:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP150:%.*]] = extractelement <8 x i32> [[TMP50]], i64 4
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP151:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP150]]
@@ -2174,8 +2153,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p
 ; ENABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP152]], ptr [[TMP151]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE54]]
 ; ENABLED_MASKED_STRIDED:       pred.store.continue54:
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP153:%.*]] = extractelement <8 x i1> [[TMP0]], i64 5
-; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP153]], label [[PRED_STORE_IF55:%.*]], label [[PRED_STORE_CONTINUE56:%.*]]
+; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP120]], label [[PRED_STORE_IF55:%.*]], label [[PRED_STORE_CONTINUE56:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.store.if55:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP154:%.*]] = extractelement <8 x i32> [[TMP50]], i64 5
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP155:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP154]]
@@ -2183,8 +2161,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p
 ; ENABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP156]], ptr [[TMP155]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE56]]
 ; ENABLED_MASKED_STRIDED:       pred.store.continue56:
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP157:%.*]] = extractelement <8 x i1> [[TMP0]], i64 6
-; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP157]], label [[PRED_STORE_IF57:%.*]], label [[PRED_STORE_CONTINUE58:%.*]]
+; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP124]], label [[PRED_STORE_IF57:%.*]], label [[PRED_STORE_CONTINUE58:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.store.if57:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP158:%.*]] = extractelement <8 x i32> [[TMP50]], i64 6
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP159:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP158]]
@@ -2192,8 +2169,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p
 ; ENABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP160]], ptr [[TMP159]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE58]]
 ; ENABLED_MASKED_STRIDED:       pred.store.continue58:
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP161:%.*]] = extractelement <8 x i1> [[TMP0]], i64 7
-; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP161]], label [[PRED_STORE_IF59:%.*]], label [[PRED_STORE_CONTINUE60]]
+; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP128]], label [[PRED_STORE_IF59:%.*]], label [[PRED_STORE_CONTINUE60]]
 ; ENABLED_MASKED_STRIDED:       pred.store.if59:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP162:%.*]] = extractelement <8 x i32> [[TMP50]], i64 7
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP163:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP162]]
@@ -2524,8 +2500,7 @@ define dso_local void @masked_strided2_unknown_tc(ptr noalias nocapture readonly
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE46]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue46:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP134:%.*]] = sub <8 x i8> zeroinitializer, [[TMP101]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP135:%.*]] = extractelement <8 x i1> [[TMP3]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP135]], label [[PRED_STORE_IF47:%.*]], label [[PRED_STORE_CONTINUE48:%.*]]
+; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP102]], label [[PRED_STORE_IF47:%.*]], label [[PRED_STORE_CONTINUE48:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if47:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP136:%.*]] = extractelement <8 x i32> [[TMP52]], i64 0
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP137:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP136]]
@@ -2533,8 +2508,7 @@ define dso_local void @masked_strided2_unknown_tc(ptr noalias nocapture readonly
 ; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP138]], ptr [[TMP137]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE48]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue48:
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP139:%.*]] = extractelement <8 x i1> [[TMP3]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP139]], label [[PRED_STORE_IF49:%.*]], label [[PRED_STORE_CONTINUE50:%.*]]
+; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP106]], label [[PRED_STORE_IF49:%.*]], label [[PRED_STORE_CONTINUE50:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if49:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP140:%.*]] = extractelement <8 x i32> [[TMP52]], i64 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP141:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP140]]
@@ -2542,8 +2516,7 @@ define dso_local void @masked_strided2_unknown_tc(ptr noalias nocapture readonly
 ; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP142]], ptr [[TMP141]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE50]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue50:
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP143:%.*]] = extractelement <8 x i1> [[TMP3]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP143]], label [[PRED_STORE_IF51:%.*]], label [[PRED_STORE_CONTINUE52:%.*]]
+; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP110]], label [[PRED_STORE_IF51:%.*]], label [[PRED_STORE_CONTINUE52:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if51:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP144:%.*]] = extractelement <8 x i32> [[TMP52]], i64 2
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP145:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP144]]
@@ -2551,8 +2524,7 @@ define dso_local void @masked_strided2_unknown_tc(ptr noalias nocapture readonly
 ; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP146]], ptr [[TMP145]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE52]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue52:
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP147:%.*]] = extractelement <8 x i1> [[TMP3]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP147]], label [[PRED_STORE_IF53:%.*]], label [[PRED_STORE_CONTINUE54:%.*]]
+; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP114]], label [[PRED_STORE_IF53:%.*]], label [[PRED_STORE_CONTINUE54:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if53:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP148:%.*]] = extractelement <8 x i32> [[TMP52]], i64 3
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP149:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP148]]
@@ -2560,8 +2532,7 @@ define dso_local void @masked_strided2_unknown_tc(ptr noalias nocapture readonly
 ; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP150]], ptr [[TMP149]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE54]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue54:
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP151:%.*]] = extractelement <8 x i1> [[TMP3]], i64 4
-; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP151]], label [[PRED_STORE_IF55:%.*]], label [[PRED_STORE_CONTINUE56:%.*]]
+; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP118]], label [[PRED_STORE_IF55:%.*]], label [[PRED_STORE_CONTINUE56:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if55:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP152:%.*]] = extractelement <8 x i32> [[TMP52]], i64 4
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP153:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP152]]
@@ -2569,8 +2540,7 @@ define dso_local void @masked_strided2_unknown_tc(ptr noalias nocapture readonly
 ; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP154]], ptr [[TMP153]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE56]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue56:
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP155:%.*]] = extractelement <8 x i1> [[TMP3]], i64 5
-; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP155]], label [[PRED_STORE_IF57:%.*]], label [[PRED_STORE_CONTINUE58:%.*]]
+; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP122]], label [[PRED_STORE_IF57:%.*]], label [[PRED_STORE_CONTINUE58:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if57:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP156:%.*]] = extractelement <8 x i32> [[TMP52]], i64 5
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP157:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP156]]
@@ -2578,8 +2548,7 @@ define dso_local void @masked_strided2_unknown_tc(ptr noalias nocapture readonly
 ; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP158]], ptr [[TMP157]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE58]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue58:
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP159:%.*]] = extractelement <8 x i1> [[TMP3]], i64 6
-; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP159]], label [[PRED_STORE_IF59:%.*]], label [[PRED_STORE_CONTINUE60:%.*]]
+; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP126]], label [[PRED_STORE_IF59:%.*]], label [[PRED_STORE_CONTINUE60:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if59:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP160:%.*]] = extractelement <8 x i32> [[TMP52]], i64 6
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP161:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP160]]
@@ -2587,8 +2556,7 @@ define dso_local void @masked_strided2_unknown_tc(ptr noalias nocapture readonly
 ; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP162]], ptr [[TMP161]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE60]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue60:
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP163:%.*]] = extractelement <8 x i1> [[TMP3]], i64 7
-; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP163]], label [[PRED_STORE_IF61:%.*]], label [[PRED_STORE_CONTINUE62]]
+; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP130]], label [[PRED_STORE_IF61:%.*]], label [[PRED_STORE_CONTINUE62]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if61:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP164:%.*]] = extractelement <8 x i32> [[TMP52]], i64 7
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP165:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP164]]
@@ -2958,8 +2926,7 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(ptr noalias noca
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE44]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue44:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP132:%.*]] = sub <8 x i8> zeroinitializer, [[TMP99]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP133:%.*]] = extractelement <8 x i1> [[TMP0]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP133]], label [[PRED_STORE_IF45:%.*]], label [[PRED_STORE_CONTINUE46:%.*]]
+; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP100]], label [[PRED_STORE_IF45:%.*]], label [[PRED_STORE_CONTINUE46:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if45:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP134:%.*]] = extractelement <8 x i32> [[TMP50]], i64 0
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP135:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP134]]
@@ -2967,8 +2934,7 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(ptr noalias noca
 ; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP136]], ptr [[TMP135]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE46]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue46:
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP137:%.*]] = extractelement <8 x i1> [[TMP0]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP137]], label [[PRED_STORE_IF47:%.*]], label [[PRED_STORE_CONTINUE48:%.*]]
+; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP104]], label [[PRED_STORE_IF47:%.*]], label [[PRED_STORE_CONTINUE48:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if47:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP138:%.*]] = extractelement <8 x i32> [[TMP50]], i64 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP139:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP138]]
@@ -2976,8 +2942,7 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(ptr noalias noca
 ; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP140]], ptr [[TMP139]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE48]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue48:
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP141:%.*]] = extractelement <8 x i1> [[TMP0]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP141]], label [[PRED_STORE_IF49:%.*]], label [[PRED_STORE_CONTINUE50:%.*]]
+; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP108]], label [[PRED_STORE_IF49:%.*]], label [[PRED_STORE_CONTINUE50:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if49:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP142:%.*]] = extractelement <8 x i32> [[TMP50]], i64 2
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP143:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP142]]
@@ -2985,8 +2950,7 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(ptr noalias noca
 ; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP144]], ptr [[TMP143]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE50]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue50:
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP145:%.*]] = extractelement <8 x i1> [[TMP0]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP145]], label [[PRED_STORE_IF51:%.*]], label [[PRED_STORE_CONTINUE52:%.*]]
+; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP112]], label [[PRED_STORE_IF51:%.*]], label [[PRED_STORE_CONTINUE52:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if51:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP146:%.*]] = extractelement <8 x i32> [[TMP50]], i64 3
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP147:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP146]]
@@ -2994,8 +2958,7 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(ptr noalias noca
 ; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP148]], ptr [[TMP147]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE52]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue52:
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP149:%.*]] = extractelement <8 x i1> [[TMP0]], i64 4
-; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP149]], label [[PRED_STORE_IF53:%.*]], label [[PRED_STORE_CONTINUE54:%.*]]
+; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP116]], label [[PRED_STORE_IF53:%.*]], label [[PRED_STORE_CONTINUE54:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if53:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP150:%.*]] = extractelement <8 x i32> [[TMP50]], i64 4
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP151:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP150]]
@@ -3003,8 +2966,7 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(ptr noalias noca
 ; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP152]], ptr [[TMP151]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE54]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue54:
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP153:%.*]] = extractelement <8 x i1> [[TMP0]], i64 5
-; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP153]], label [[PRED_STORE_IF55:%.*]], label [[PRED_STORE_CONTINUE56:%.*]]
+; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP120]], label [[PRED_STORE_IF55:%.*]], label [[PRED_STORE_CONTINUE56:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if55:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP154:%.*]] = extractelement <8 x i32> [[TMP50]], i64 5
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP155:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP154]]
@@ -3012,8 +2974,7 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(ptr noalias noca
 ; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP156]], ptr [[TMP155]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE56]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue56:
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP157:%.*]] = extractelement <8 x i1> [[TMP0]], i64 6
-; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP157]], label [[PRED_STORE_IF57:%.*]], label [[PRED_STORE_CONTINUE58:%.*]]
+; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP124]], label [[PRED_STORE_IF57:%.*]], label [[PRED_STORE_CONTINUE58:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if57:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP158:%.*]] = extractelement <8 x i32> [[TMP50]], i64 6
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP159:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP158]]
@@ -3021,8 +2982,7 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(ptr noalias noca
 ; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP160]], ptr [[TMP159]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE58]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue58:
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP161:%.*]] = extractelement <8 x i1> [[TMP0]], i64 7
-; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP161]], label [[PRED_STORE_IF59:%.*]], label [[PRED_STORE_CONTINUE60]]
+; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP128]], label [[PRED_STORE_IF59:%.*]], label [[PRED_STORE_CONTINUE60]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if59:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP162:%.*]] = extractelement <8 x i32> [[TMP50]], i64 7
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP163:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP162]]
diff --git a/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-store-accesses-with-gaps.ll b/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-store-accesses-with-gaps.ll
index 3b8d2c623d13c..32df8e5ac8717 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-store-accesses-with-gaps.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-store-accesses-with-gaps.ll
@@ -181,8 +181,7 @@ define dso_local void @test2(ptr noalias nocapture %points, i32 %numPoints, ptr
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [2 x i8], ptr [[Y:%.*]], i64 [[INDEX]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[WIDE_MASKED_LOAD7:%.*]] = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr align 2 [[TMP19]], <4 x i1> [[TMP0]], <4 x i16> poison)
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP20:%.*]] = or disjoint <4 x i64> [[TMP2]], splat (i64 1)
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP21:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP21]], label [[PRED_STORE_IF8:%.*]], label [[PRED_STORE_CONTINUE9:%.*]]
+; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP3]], label [[PRED_STORE_IF8:%.*]], label [[PRED_STORE_CONTINUE9:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if8:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP22:%.*]] = extractelement <4 x i64> [[TMP20]], i64 0
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [2 x i8], ptr [[POINTS]], i64 [[TMP22]]
@@ -190,8 +189,7 @@ define dso_local void @test2(ptr noalias nocapture %points, i32 %numPoints, ptr
 ; DISABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP24]], ptr [[TMP23]], align 2
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE9]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue9:
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP25:%.*]] = extractelement <4 x i1> [[TMP0]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP25]], label [[PRED_STORE_IF10:%.*]], label [[PRED_STORE_CONTINUE11:%.*]]
+; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP7]], label [[PRED_STORE_IF10:%.*]], label [[PRED_STORE_CONTINUE11:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if10:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP26:%.*]] = extractelement <4 x i64> [[TMP20]], i64 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [2 x i8], ptr [[POINTS]], i64 [[TMP26]]
@@ -199,8 +197,7 @@ define dso_local void @test2(ptr noalias nocapture %points, i32 %numPoints, ptr
 ; DISABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP28]], ptr [[TMP27]], align 2
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE11]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue11:
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP29:%.*]] = extractelement <4 x i1> [[TMP0]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP29]], label [[PRED_STORE_IF12:%.*]], label [[PRED_STORE_CONTINUE13:%.*]]
+; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP11]], label [[PRED_STORE_IF12:%.*]], label [[PRED_STORE_CONTINUE13:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if12:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP30:%.*]] = extractelement <4 x i64> [[TMP20]], i64 2
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [2 x i8], ptr [[POINTS]], i64 [[TMP30]]
@@ -208,8 +205,7 @@ define dso_local void @test2(ptr noalias nocapture %points, i32 %numPoints, ptr
 ; DISABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP32]], ptr [[TMP31]], align 2
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE13]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue13:
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP33:%.*]] = extractelement <4 x i1> [[TMP0]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP33]], label [[PRED_STORE_IF14:%.*]], label [[PRED_STORE_CONTINUE15]]
+; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP15]], label [[PRED_STORE_IF14:%.*]], label [[PRED_STORE_CONTINUE15]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if14:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP34:%.*]] = extractelement <4 x i64> [[TMP20]], i64 3
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [2 x i8], ptr [[POINTS]], i64 [[TMP34]]
diff --git a/llvm/test/Transforms/LoopVectorize/as_cast.ll b/llvm/test/Transforms/LoopVectorize/as_cast.ll
index 67aacefebd555..2437bcfde08f9 100644
--- a/llvm/test/Transforms/LoopVectorize/as_cast.ll
+++ b/llvm/test/Transforms/LoopVectorize/as_cast.ll
@@ -22,7 +22,7 @@ loop:
 ; CHECK: [[ID2:%.*]] = add i64 %{{.*}}, 1
 ; CHECK: [[AS2:%.*]] = addrspacecast ptr addrspace(1) %in to ptr
 ; CHECK: [[GEP2:%.*]] = getelementptr inbounds i64, ptr [[AS2]], i64 [[ID2]]
-; CHECK: store i64 [[ID2]], ptr %9, align 4
+; CHECK: store i64 [[ID2]], ptr [[GEP2]], align 4
 
   %cmp = icmp eq i64 %next, 7
   br i1 %cmp, label %exit, label %loop
diff --git a/llvm/test/Transforms/LoopVectorize/cast-induction.ll b/llvm/test/Transforms/LoopVectorize/cast-induction.ll
index 2764a61728fe3..e5069d97f3117 100644
--- a/llvm/test/Transforms/LoopVectorize/cast-induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/cast-induction.ll
@@ -273,15 +273,14 @@ define void @cast_induction_tail_folding(ptr %A) {
 ; IC2-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; IC2:       [[VECTOR_BODY]]:
 ; IC2-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE2:.*]] ]
-; IC2-NEXT:    [[INDEX0:%.*]] = add i32 [[INDEX]], 0
 ; IC2-NEXT:    [[INDEX1:%.*]] = add i32 [[INDEX]], 1
-; IC2-NEXT:    [[TMP2:%.*]] = icmp ule i32 [[INDEX0]], 2
+; IC2-NEXT:    [[TMP2:%.*]] = icmp ule i32 [[INDEX]], 2
 ; IC2-NEXT:    [[TMP3:%.*]] = icmp ule i32 [[INDEX1]], 2
 ; IC2-NEXT:    br i1 [[TMP2]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
 ; IC2:       [[PRED_STORE_IF]]:
-; IC2-NEXT:    [[TMP4:%.*]] = sext i32 [[INDEX0]] to i64
+; IC2-NEXT:    [[TMP4:%.*]] = sext i32 [[INDEX]] to i64
 ; IC2-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP4]]
-; IC2-NEXT:    store i32 [[INDEX0]], ptr [[TMP5]], align 4
+; IC2-NEXT:    store i32 [[INDEX]], ptr [[TMP5]], align 4
 ; IC2-NEXT:    br label %[[PRED_STORE_CONTINUE]]
 ; IC2:       [[PRED_STORE_CONTINUE]]:
 ; IC2-NEXT:    br i1 [[TMP3]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2]]
diff --git a/llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll b/llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll
index 5ef3885cc8c69..fe561d7a53629 100644
--- a/llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll
+++ b/llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll
@@ -546,10 +546,6 @@ define void @predicated_store(ptr %p, i32 %x, i64 %n) {
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP1]], i32 0
 ; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP12]], i32 0
 ; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP3]], i32 0
-; CHECK-NEXT:    [[TMP13:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP0]], i32 0
-; CHECK-NEXT:    [[TMP19:%.*]] = insertelement <4 x ptr> [[TMP13]], ptr [[TMP2]], i32 1
-; CHECK-NEXT:    [[TMP25:%.*]] = insertelement <4 x ptr> [[TMP19]], ptr [[TMP4]], i32 2
-; CHECK-NEXT:    [[TMP11:%.*]] = insertelement <4 x ptr> [[TMP25]], ptr [[TMP6]], i32 3
 ; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr [[TMP0]], align 8
 ; CHECK-NEXT:    [[TMP8:%.*]] = load i32, ptr [[TMP2]], align 8
 ; CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr [[TMP4]], align 8
@@ -631,10 +627,8 @@ define void @predicated_store(ptr %p, i32 %x, i64 %n) {
 ; INTER-NEXT:    [[TMP4:%.*]] = extractelement <4 x i1> [[TMP3]], i32 0
 ; INTER-NEXT:    br i1 [[TMP4]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
 ; INTER:       [[PRED_STORE_IF]]:
-; INTER-NEXT:    [[TMP7:%.*]] = add i64 [[INDEX]], 0
-; INTER-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP7]], i32 0
 ; INTER-NEXT:    [[TMP6:%.*]] = extractelement <4 x i32> [[STRIDED_VEC]], i32 0
-; INTER-NEXT:    store i32 [[TMP6]], ptr [[TMP5]], align 8
+; INTER-NEXT:    store i32 [[TMP6]], ptr [[TMP2]], align 8
 ; INTER-NEXT:    br label %[[PRED_STORE_CONTINUE]]
 ; INTER:       [[PRED_STORE_CONTINUE]]:
 ; INTER-NEXT:    [[TMP8:%.*]] = extractelement <4 x i1> [[TMP3]], i32 1
diff --git a/llvm/test/Transforms/LoopVectorize/constant-fold-commutative-and.ll b/llvm/test/Transforms/LoopVectorize/constant-fold-commutative-and.ll
index 97c54fa5a06c2..c3edba03885b1 100644
--- a/llvm/test/Transforms/LoopVectorize/constant-fold-commutative-and.ll
+++ b/llvm/test/Transforms/LoopVectorize/constant-fold-commutative-and.ll
@@ -30,8 +30,7 @@ define void @constant_fold_commutative_and(ptr %ptr.n, ptr noalias %p, i1 %cond)
 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP6]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
 ; CHECK:       [[PRED_STORE_IF]]:
-; CHECK-NEXT:    [[TMP7:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[P]], i64 [[TMP7]]
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[P]], i64 [[INDEX]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <2 x i1> [[PREDPHI3]], i32 0
 ; CHECK-NEXT:    store i1 [[TMP9]], ptr [[TMP8]], align 1
 ; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE]]
diff --git a/llvm/test/Transforms/LoopVectorize/constantfolder.ll b/llvm/test/Transforms/LoopVectorize/constantfolder.ll
index 0cc8db02e5c6d..047a6f48837b8 100644
--- a/llvm/test/Transforms/LoopVectorize/constantfolder.ll
+++ b/llvm/test/Transforms/LoopVectorize/constantfolder.ll
@@ -401,9 +401,8 @@ define void @unused_live_ins_in_preheader(ptr %dst, ptr %src) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
 ; CHECK:       [[PRED_STORE_IF]]:
-; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[INDEX]], 0
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i32, ptr [[DST]], i32 [[TMP2]]
-; CHECK-NEXT:    store i32 [[TMP2]], ptr [[TMP3]], align 8
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i32, ptr [[DST]], i32 [[INDEX]]
+; CHECK-NEXT:    store i32 [[INDEX]], ptr [[TMP3]], align 8
 ; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE]]
 ; CHECK:       [[PRED_STORE_CONTINUE]]:
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1
diff --git a/llvm/test/Transforms/LoopVectorize/cse-casts.ll b/llvm/test/Transforms/LoopVectorize/cse-casts.ll
index 1b5cb2dc88b0d..19abb4016b498 100644
--- a/llvm/test/Transforms/LoopVectorize/cse-casts.ll
+++ b/llvm/test/Transforms/LoopVectorize/cse-casts.ll
@@ -131,9 +131,8 @@ define void @preserve_flags_narrowing_extends_and_truncs(ptr noalias %A, ptr noa
 ; CHECK-NEXT:    [[TMP33:%.*]] = zext <4 x i8> [[TMP31]] to <4 x i64>
 ; CHECK-NEXT:    br i1 true, label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
 ; CHECK:       [[PRED_STORE_IF]]:
-; CHECK-NEXT:    [[TMP34:%.*]] = getelementptr inbounds i8, ptr [[C]], i64 0
 ; CHECK-NEXT:    [[TMP35:%.*]] = extractelement <4 x i64> [[TMP32]], i32 0
-; CHECK-NEXT:    store i64 [[TMP35]], ptr [[TMP34]], align 4
+; CHECK-NEXT:    store i64 [[TMP35]], ptr [[C]], align 4
 ; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE]]
 ; CHECK:       [[PRED_STORE_CONTINUE]]:
 ; CHECK-NEXT:    br i1 true, label %[[PRED_STORE_IF15:.*]], label %[[PRED_STORE_CONTINUE16:.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/debugloc.ll b/llvm/test/Transforms/LoopVectorize/debugloc.ll
index 4b364133dfefe..15295ad5f0bd0 100644
--- a/llvm/test/Transforms/LoopVectorize/debugloc.ll
+++ b/llvm/test/Transforms/LoopVectorize/debugloc.ll
@@ -66,8 +66,7 @@ define i32 @test_debug_loc_on_branch_in_loop(ptr noalias %src, ptr noalias %dst)
 ; CHECK-NEXT:   br i1 [[EXT]], label %pred.store.if, label %pred.store.continue, !dbg [[LOC3]]
 ; CHECK-EMPTY:
 ; CHECK-NEXT: pred.store.if:
-; CHECK-NEXT:   [[IDX:%.+]] = add i64 %index, 0
-; CHECK-NEXT:   [[GEP:%.+]] = getelementptr inbounds i32, ptr %dst, i64 [[IDX]]
+; CHECK-NEXT:   [[GEP:%.+]] = getelementptr inbounds i32, ptr %dst, i64 %index
 ; CHECK-NEXT:   store i32 0, ptr [[GEP]], align 4
 ; CHECK-NEXT:   br label %pred.store.continue, !dbg [[LOC3]]
 ; CHECK-EMPTY:
@@ -105,8 +104,7 @@ define i32 @test_different_debug_loc_on_replicate_recipe(ptr noalias %src, ptr n
 ; CHECK-NEXT:   br i1 [[EXT]], label %pred.store.if, label %pred.store.continue, !dbg [[LOC4]]
 ; CHECK-EMPTY:
 ; CHECK-NEXT: pred.store.if:
-; CHECK-NEXT:   [[IDX:%.+]] = add i64 %index, 0
-; CHECK-NEXT:   [[GEP:%.+]] = getelementptr inbounds i32, ptr %dst, i64 [[IDX]], !dbg [[LOC5:!.+]]
+; CHECK-NEXT:   [[GEP:%.+]] = getelementptr inbounds i32, ptr %dst, i64 %index, !dbg [[LOC5:!.+]]
 ; CHECK-NEXT:   store i32 0, ptr [[GEP]], align 4
 ; CHECK-NEXT:   br label %pred.store.continue, !dbg [[LOC4]]
 ; CHECK-EMPTY:
diff --git a/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-divisible-TC.ll b/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-divisible-TC.ll
index 156c2bdca7b0e..8f7ae20288575 100644
--- a/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-divisible-TC.ll
+++ b/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-divisible-TC.ll
@@ -164,8 +164,7 @@ define dso_local void @cannotProveAlignedTC(ptr noalias nocapture %A, i32 %p, i3
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; CHECK:       pred.store.if:
-; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[INDEX]], 0
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[TMP2]]
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[INDEX]]
 ; CHECK-NEXT:    store i32 13, ptr [[TMP3]], align 1
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE]]
 ; CHECK:       pred.store.continue:
diff --git a/llvm/test/Transforms/LoopVectorize/find-last-iv-sinkable-load.ll b/llvm/test/Transforms/LoopVectorize/find-last-iv-sinkable-load.ll
index 5d78d7b2e320d..ba8d05c9e2c01 100644
--- a/llvm/test/Transforms/LoopVectorize/find-last-iv-sinkable-load.ll
+++ b/llvm/test/Transforms/LoopVectorize/find-last-iv-sinkable-load.ll
@@ -635,9 +635,7 @@ define i64 @findlast_load_cond_store_other_block(ptr noalias %a, ptr %b, i64 %n)
 ; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <4 x i1> [[TMP8]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP9]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
 ; CHECK:       [[PRED_STORE_IF]]:
-; CHECK-NEXT:    [[TMP10:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP10]]
-; CHECK-NEXT:    store i64 0, ptr [[TMP11]], align 8
+; CHECK-NEXT:    store i64 0, ptr [[TMP3]], align 8
 ; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE]]
 ; CHECK:       [[PRED_STORE_CONTINUE]]:
 ; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <4 x i1> [[TMP8]], i32 1
diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll
index 6b4727a1379f8..c2b3d048f9cc5 100644
--- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll
+++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll
@@ -2941,8 +2941,7 @@ define i32 @sink_into_replication_region_multiple(ptr %x, i32 %y) {
 ; UNROLL-NO-IC-NEXT:    [[TMP48:%.*]] = extractelement <4 x i1> [[TMP10]], i32 0
 ; UNROLL-NO-IC-NEXT:    br i1 [[TMP48]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; UNROLL-NO-IC:       pred.store.if:
-; UNROLL-NO-IC-NEXT:    [[TMP49:%.*]] = add i32 [[INDEX]], 0
-; UNROLL-NO-IC-NEXT:    [[TMP50:%.*]] = getelementptr inbounds i32, ptr [[X:%.*]], i32 [[TMP49]]
+; UNROLL-NO-IC-NEXT:    [[TMP50:%.*]] = getelementptr inbounds i32, ptr [[X:%.*]], i32 [[INDEX]]
 ; UNROLL-NO-IC-NEXT:    store i32 [[OFFSET_IDX]], ptr [[TMP50]], align 4
 ; UNROLL-NO-IC-NEXT:    br label [[PRED_STORE_CONTINUE]]
 ; UNROLL-NO-IC:       pred.store.continue:
@@ -3140,8 +3139,7 @@ define i32 @sink_into_replication_region_multiple(ptr %x, i32 %y) {
 ; SINK-AFTER-NEXT:    [[TMP25:%.*]] = extractelement <4 x i1> [[TMP6]], i32 0
 ; SINK-AFTER-NEXT:    br i1 [[TMP25]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; SINK-AFTER:       pred.store.if:
-; SINK-AFTER-NEXT:    [[TMP26:%.*]] = add i32 [[INDEX]], 0
-; SINK-AFTER-NEXT:    [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[X:%.*]], i32 [[TMP26]]
+; SINK-AFTER-NEXT:    [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[X:%.*]], i32 [[INDEX]]
 ; SINK-AFTER-NEXT:    store i32 [[OFFSET_IDX]], ptr [[TMP27]], align 4
 ; SINK-AFTER-NEXT:    br label [[PRED_STORE_CONTINUE]]
 ; SINK-AFTER:       pred.store.continue:
diff --git a/llvm/test/Transforms/LoopVectorize/float-induction.ll b/llvm/test/Transforms/LoopVectorize/float-induction.ll
index 4ed912b8054ca..2dd234bc8c3d0 100644
--- a/llvm/test/Transforms/LoopVectorize/float-induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/float-induction.ll
@@ -1322,8 +1322,7 @@ define void @non_primary_iv_float_scalar(ptr %A, i64 %N) {
 ; VEC4_INTERL1-NEXT:    [[TMP2:%.*]] = extractelement <4 x i1> [[TMP1]], i64 0
 ; VEC4_INTERL1-NEXT:    br i1 [[TMP2]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; VEC4_INTERL1:       pred.store.if:
-; VEC4_INTERL1-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [4 x i8], ptr [[A]], i64 [[INDEX]]
-; VEC4_INTERL1-NEXT:    store float [[DOTCAST2]], ptr [[TMP3]], align 4
+; VEC4_INTERL1-NEXT:    store float [[DOTCAST2]], ptr [[TMP0]], align 4
 ; VEC4_INTERL1-NEXT:    br label [[PRED_STORE_CONTINUE]]
 ; VEC4_INTERL1:       pred.store.continue:
 ; VEC4_INTERL1-NEXT:    [[TMP4:%.*]] = extractelement <4 x i1> [[TMP1]], i64 1
@@ -1402,8 +1401,7 @@ define void @non_primary_iv_float_scalar(ptr %A, i64 %N) {
 ; VEC4_INTERL2-NEXT:    [[TMP5:%.*]] = extractelement <4 x i1> [[TMP3]], i64 0
 ; VEC4_INTERL2-NEXT:    br i1 [[TMP5]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; VEC4_INTERL2:       pred.store.if:
-; VEC4_INTERL2-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [4 x i8], ptr [[A]], i64 [[INDEX]]
-; VEC4_INTERL2-NEXT:    store float [[DOTCAST2]], ptr [[TMP35]], align 4
+; VEC4_INTERL2-NEXT:    store float [[DOTCAST2]], ptr [[TMP1]], align 4
 ; VEC4_INTERL2-NEXT:    br label [[PRED_STORE_CONTINUE]]
 ; VEC4_INTERL2:       pred.store.continue:
 ; VEC4_INTERL2-NEXT:    [[TMP6:%.*]] = extractelement <4 x i1> [[TMP3]], i64 1
@@ -1573,8 +1571,7 @@ define void @non_primary_iv_float_scalar(ptr %A, i64 %N) {
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP2:%.*]] = extractelement <2 x i1> [[TMP1]], i64 0
 ; VEC2_INTERL1_PRED_STORE-NEXT:    br i1 [[TMP2]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; VEC2_INTERL1_PRED_STORE:       pred.store.if:
-; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [4 x i8], ptr [[A]], i64 [[INDEX]]
-; VEC2_INTERL1_PRED_STORE-NEXT:    store float [[DOTCAST2]], ptr [[TMP3]], align 4
+; VEC2_INTERL1_PRED_STORE-NEXT:    store float [[DOTCAST2]], ptr [[TMP0]], align 4
 ; VEC2_INTERL1_PRED_STORE-NEXT:    br label [[PRED_STORE_CONTINUE]]
 ; VEC2_INTERL1_PRED_STORE:       pred.store.continue:
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP4:%.*]] = extractelement <2 x i1> [[TMP1]], i64 1
diff --git a/llvm/test/Transforms/LoopVectorize/hoist-and-sink-mem-ops-with-invariant-pointers.ll b/llvm/test/Transforms/LoopVectorize/hoist-and-sink-mem-ops-with-invariant-pointers.ll
index c420157218496..748dae79fa407 100644
--- a/llvm/test/Transforms/LoopVectorize/hoist-and-sink-mem-ops-with-invariant-pointers.ll
+++ b/llvm/test/Transforms/LoopVectorize/hoist-and-sink-mem-ops-with-invariant-pointers.ll
@@ -162,8 +162,7 @@ define void @dont_hoist_predicated_load(ptr %dst, ptr %invariant_ptr, ptr %cond_
 ; CHECK-NEXT:    br i1 [[TMP2]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
 ; CHECK:       [[PRED_STORE_IF]]:
 ; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr [[INVARIANT_PTR]], align 4, !alias.scope [[META14:![0-9]+]]
-; CHECK-NEXT:    [[TMP4:%.*]] = add i32 [[INDEX]], 0
-; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]]
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]]
 ; CHECK-NEXT:    store i32 [[TMP7]], ptr [[TMP9]], align 4, !alias.scope [[META16:![0-9]+]], !noalias [[META18:![0-9]+]]
 ; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE]]
 ; CHECK:       [[PRED_STORE_CONTINUE]]:
diff --git a/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads-with-predicated-stores.ll b/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads-with-predicated-stores.ll
index 7f05452b94741..4da584fccf6ba 100644
--- a/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads-with-predicated-stores.ll
+++ b/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads-with-predicated-stores.ll
@@ -240,8 +240,6 @@ define void @test_noalias_store_via_runtime_checks(ptr %dst, ptr %dst.1, ptr %sr
 ; CHECK:       [[VECTOR_BODY]]:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE20:.*]] ]
 ; CHECK-NEXT:    [[TMP5:%.*]] = add i32 [[INDEX]], 1
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[INDEX]], i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[TMP5]], i32 1
 ; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[INDEX]]
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 4, !alias.scope [[META22:![0-9]+]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = icmp ugt <2 x i32> [[WIDE_LOAD]], splat (i32 11)
@@ -344,8 +342,6 @@ define void @test_memory_op_between_loads_alias(ptr %dst, ptr %src, ptr %cond, p
 ; CHECK:       [[VECTOR_BODY]]:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE11:.*]] ]
 ; CHECK-NEXT:    [[TMP5:%.*]] = add i32 [[INDEX]], 1
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[INDEX]], i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[TMP5]], i32 1
 ; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[INDEX]]
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 4, !alias.scope [[META35:![0-9]+]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = icmp ugt <2 x i32> [[WIDE_LOAD]], splat (i32 11)
@@ -458,8 +454,6 @@ define void @test_memory_op_between_loads_no_alias_via_rt_checks(ptr %dst, ptr %
 ; CHECK:       [[VECTOR_BODY]]:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE20:.*]] ]
 ; CHECK-NEXT:    [[TMP5:%.*]] = add i32 [[INDEX]], 1
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[INDEX]], i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[TMP5]], i32 1
 ; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[INDEX]]
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 4, !alias.scope [[META45:![0-9]+]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = icmp ugt <2 x i32> [[WIDE_LOAD]], splat (i32 11)
@@ -656,8 +650,6 @@ define void @test_stores_not_sunk_aliasing_load_between(ptr %dst, ptr %mid, ptr
 ; CHECK:       [[VECTOR_BODY]]:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE15:.*]] ]
 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[INDEX]], 1
-; CHECK-NEXT:    [[TMP18:%.*]] = insertelement <2 x i32> poison, i32 [[INDEX]], i32 0
-; CHECK-NEXT:    [[TMP19:%.*]] = insertelement <2 x i32> [[TMP18]], i32 [[TMP1]], i32 1
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[INDEX]]
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4, !alias.scope [[META68:![0-9]+]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11)
@@ -769,8 +761,6 @@ define void @sink_multiple_store_groups_noalias_via_scev(ptr %dst, ptr %src) {
 ; CHECK-NEXT:    [[TMP34:%.*]] = fadd <2 x double> [[WIDE_LOAD]], splat (double 8.000000e+00)
 ; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr double, ptr [[DST]], i64 [[INDEX]]
 ; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP17]]
-; CHECK-NEXT:    [[TMP31:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP18]], i32 0
-; CHECK-NEXT:    [[TMP19:%.*]] = insertelement <2 x ptr> [[TMP31]], ptr [[TMP21]], i32 1
 ; CHECK-NEXT:    [[TMP20:%.*]] = select <2 x i1> [[TMP10]], <2 x double> [[WIDE_LOAD]], <2 x double> [[TMP34]]
 ; CHECK-NEXT:    [[TMP32:%.*]] = extractelement <2 x double> [[TMP20]], i32 0
 ; CHECK-NEXT:    store double [[TMP32]], ptr [[TMP18]], align 8, !alias.scope [[META81:![0-9]+]], !noalias [[META78]]
@@ -851,8 +841,6 @@ define void @sink_multiple_store_groups_alias_via_scev(ptr %dst, ptr %src) {
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE7:.*]] ]
 ; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 16
 ; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 16
-; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x i64> poison, i64 [[OFFSET_IDX]], i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x i64> [[TMP4]], i64 [[TMP1]], i32 1
 ; CHECK-NEXT:    [[GEP_SRC:%.*]] = getelementptr double, ptr [[SRC]], i64 [[OFFSET_IDX]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr double, ptr [[SRC]], i64 [[TMP1]]
 ; CHECK-NEXT:    [[GEP_FLAG:%.*]] = getelementptr i8, ptr [[GEP_SRC]], i64 152
@@ -1037,8 +1025,6 @@ define void @test_three_stores_with_different_predicates(ptr %dst, ptr %src, ptr
 ; CHECK:       [[VECTOR_BODY]]:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE11:.*]] ]
 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[INDEX]], 1
-; CHECK-NEXT:    [[TMP23:%.*]] = insertelement <2 x i32> poison, i32 [[INDEX]], i32 0
-; CHECK-NEXT:    [[TMP24:%.*]] = insertelement <2 x i32> [[TMP23]], i32 [[TMP1]], i32 1
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[INDEX]]
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4, !alias.scope [[META92:![0-9]+]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11)
diff --git a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll
index 9a4f5ecd93a38..07a84d917bb24 100644
--- a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll
+++ b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll
@@ -82,11 +82,9 @@ define i32 @test(ptr nocapture %f) #0 {
 ; VEC-NEXT:    [[TMP4:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0
 ; VEC-NEXT:    br i1 [[TMP4]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; VEC:       pred.store.if:
-; VEC-NEXT:    [[TMP5:%.*]] = add i64 [[INDEX]], 0
-; VEC-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[F]], i64 [[TMP5]]
 ; VEC-NEXT:    [[TMP7:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i32 0
 ; VEC-NEXT:    [[TMP8:%.*]] = add nsw i32 [[TMP7]], 20
-; VEC-NEXT:    store i32 [[TMP8]], ptr [[TMP6]], align 4
+; VEC-NEXT:    store i32 [[TMP8]], ptr [[TMP1]], align 4
 ; VEC-NEXT:    br label [[PRED_STORE_CONTINUE]]
 ; VEC:       pred.store.continue:
 ; VEC-NEXT:    [[TMP9:%.*]] = extractelement <2 x i1> [[TMP3]], i32 1
@@ -305,10 +303,8 @@ define void @bug18724(i1 %cond, ptr %ptr, i1 %cond.2, i64 %v.1, i32 %v.2) {
 ; VEC-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 4
 ; VEC-NEXT:    br i1 [[COND_2:%.*]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE2]]
 ; VEC:       pred.store.if:
-; VEC-NEXT:    [[INDVARS_IV3:%.*]] = add i64 [[OFFSET_IDX]], 0
-; VEC-NEXT:    [[ARRAYIDX16:%.*]] = getelementptr inbounds [768 x i32], ptr [[PTR]], i64 0, i64 [[INDVARS_IV3]]
 ; VEC-NEXT:    [[TMP11:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i32 0
-; VEC-NEXT:    store i32 [[TMP11]], ptr [[ARRAYIDX16]], align 4
+; VEC-NEXT:    store i32 [[TMP11]], ptr [[TMP7]], align 4
 ; VEC-NEXT:    [[TMP12:%.*]] = add i64 [[OFFSET_IDX]], 1
 ; VEC-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [768 x i32], ptr [[PTR]], i64 0, i64 [[TMP12]]
 ; VEC-NEXT:    [[TMP14:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i32 1
@@ -444,10 +440,8 @@ define void @minimal_bit_widths(ptr %p, i1 %c) {
 ; VEC-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i8>, ptr [[TMP1]], align 1
 ; VEC-NEXT:    br i1 [[C:%.*]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE2]]
 ; VEC:       pred.store.if:
-; VEC-NEXT:    [[TMP8:%.*]] = add i64 [[INDEX]], 0
-; VEC-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[P]], i64 [[TMP8]]
 ; VEC-NEXT:    [[TMP4:%.*]] = extractelement <2 x i8> [[WIDE_LOAD]], i32 0
-; VEC-NEXT:    store i8 [[TMP4]], ptr [[TMP3]], align 1
+; VEC-NEXT:    store i8 [[TMP4]], ptr [[TMP1]], align 1
 ; VEC-NEXT:    [[TMP5:%.*]] = add i64 [[INDEX]], 1
 ; VEC-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[P]], i64 [[TMP5]]
 ; VEC-NEXT:    [[TMP7:%.*]] = extractelement <2 x i8> [[WIDE_LOAD]], i32 1
@@ -553,10 +547,8 @@ define void @minimal_bit_widths_with_aliasing_store(i1 %c, ptr %ptr) {
 ; VEC-NEXT:    store <2 x i8> zeroinitializer, ptr [[TMP2]], align 1
 ; VEC-NEXT:    br i1 [[C:%.*]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE2]]
 ; VEC:       pred.store.if:
-; VEC-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
-; VEC-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[TMP0]]
 ; VEC-NEXT:    [[TMP5:%.*]] = extractelement <2 x i8> [[WIDE_LOAD]], i32 0
-; VEC-NEXT:    store i8 [[TMP5]], ptr [[TMP4]], align 1
+; VEC-NEXT:    store i8 [[TMP5]], ptr [[TMP2]], align 1
 ; VEC-NEXT:    [[TMP7:%.*]] = add i64 [[INDEX]], 1
 ; VEC-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[TMP7]]
 ; VEC-NEXT:    [[TMP9:%.*]] = extractelement <2 x i8> [[WIDE_LOAD]], i32 1
diff --git a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-pred-stores.ll b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-pred-stores.ll
index 3bd712132637f..64c1580f18bd1 100644
--- a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-pred-stores.ll
+++ b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-pred-stores.ll
@@ -35,10 +35,8 @@ define void @interleaved_with_cond_store_0(ptr %p, i64 %x, i64 %n) {
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x i1> [[TMP1]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP2]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; CHECK:       pred.store.if:
-; CHECK-NEXT:    [[DOTSPLIT3:%.*]] = getelementptr inbounds [16 x i8], ptr [[P]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[DOTSPLIT3]], i64 8
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x i64> [[WIDE_VEC]], i64 0
-; CHECK-NEXT:    store i64 [[TMP4]], ptr [[TMP3]], align 8
+; CHECK-NEXT:    store i64 [[TMP4]], ptr [[TMP0]], align 8
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE]]
 ; CHECK:       pred.store.continue:
 ; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x i1> [[TMP1]], i64 1
@@ -134,9 +132,8 @@ define void @interleaved_with_cond_store_1(ptr %p, i64 %x, i64 %n) {
 ; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x i1> [[TMP4]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP5]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; CHECK:       pred.store.if:
-; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [16 x i8], ptr [[P]], i64 [[INDEX]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <4 x i64> [[WIDE_VEC]], i64 0
-; CHECK-NEXT:    store i64 [[TMP7]], ptr [[TMP6]], align 8
+; CHECK-NEXT:    store i64 [[TMP7]], ptr [[TMP1]], align 8
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE]]
 ; CHECK:       pred.store.continue:
 ; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <2 x i1> [[TMP4]], i64 1
@@ -242,10 +239,8 @@ define void @interleaved_with_cond_store_2(ptr %p, i64 %x, i64 %n) {
 ; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x i1> [[TMP4]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP5]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; CHECK:       pred.store.if:
-; CHECK-NEXT:    [[DOTSPLIT3:%.*]] = getelementptr inbounds [16 x i8], ptr [[P]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds nuw i8, ptr [[DOTSPLIT3]], i64 8
 ; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <4 x i64> [[WIDE_VEC]], i64 0
-; CHECK-NEXT:    store i64 [[TMP7]], ptr [[TMP6]], align 8
+; CHECK-NEXT:    store i64 [[TMP7]], ptr [[TMP3]], align 8
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE]]
 ; CHECK:       pred.store.continue:
 ; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <2 x i1> [[TMP4]], i64 1
diff --git a/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll b/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll
index e20a6673a2b16..6ad9928a4a017 100644
--- a/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll
+++ b/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll
@@ -191,8 +191,7 @@ define i32 @loop_requires_scev_predicate(ptr %dest, i32 %end) {
 ; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <2 x i1> [[TMP11]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP14]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; CHECK:       pred.store.if:
-; CHECK-NEXT:    [[TMP8:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[DEST:%.*]], i64 [[TMP8]]
+; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[DEST:%.*]], i64 [[INDEX]]
 ; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i32 0
 ; CHECK-NEXT:    [[TMP17:%.*]] = extractelement <2 x i32> [[WIDE_LOAD3]], i32 0
 ; CHECK-NEXT:    [[TMP18:%.*]] = add i32 [[TMP16]], [[TMP17]]
@@ -307,8 +306,7 @@ define void @test_rev_loops_deref_loads(ptr nocapture noundef writeonly %dest) {
 ; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <2 x i1> [[TMP5]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP9]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; CHECK:       pred.store.if:
-; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
-; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_DEST]], i64 0, i64 [[TMP0]]
+; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_DEST]], i64 0, i64 [[OFFSET_IDX]]
 ; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <2 x i32> [[REVERSE2]], i32 0
 ; CHECK-NEXT:    [[TMP12:%.*]] = shl nsw i32 [[TMP11]], 2
 ; CHECK-NEXT:    store i32 [[TMP12]], ptr [[TMP10]], align 4
@@ -393,11 +391,9 @@ define void @test_rev_loops_non_deref_loads(ptr nocapture noundef writeonly %des
 ; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <2 x i1> [[TMP6]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP7]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; CHECK:       pred.store.if:
-; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <2 x i64> [[TMP0]], i32 0
-; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_SRC]], i64 0, i64 [[TMP8]]
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_SRC]], i64 0, i64 [[TMP1]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4
-; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <2 x i64> [[TMP0]], i32 0
-; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_DEST]], i64 0, i64 [[TMP11]]
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_DEST]], i64 0, i64 [[TMP1]]
 ; CHECK-NEXT:    [[TMP13:%.*]] = shl nsw i32 [[TMP10]], 2
 ; CHECK-NEXT:    store i32 [[TMP13]], ptr [[TMP12]], align 4
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE]]
@@ -408,8 +404,7 @@ define void @test_rev_loops_non_deref_loads(ptr nocapture noundef writeonly %des
 ; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <2 x i64> [[TMP0]], i32 1
 ; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_SRC]], i64 0, i64 [[TMP15]]
 ; CHECK-NEXT:    [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4
-; CHECK-NEXT:    [[TMP18:%.*]] = extractelement <2 x i64> [[TMP0]], i32 1
-; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_DEST]], i64 0, i64 [[TMP18]]
+; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_DEST]], i64 0, i64 [[TMP15]]
 ; CHECK-NEXT:    [[TMP20:%.*]] = shl nsw i32 [[TMP17]], 2
 ; CHECK-NEXT:    store i32 [[TMP20]], ptr [[TMP19]], align 4
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE2]]
@@ -552,13 +547,10 @@ define void @test_rev_loops_strided_deref_loads(ptr nocapture noundef writeonly
 ; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_SRC]], i64 0, i64 [[TMP9]]
 ; CHECK-NEXT:    [[TMP11:%.*]] = load i32, ptr [[TMP8]], align 4
 ; CHECK-NEXT:    [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4
-; CHECK-NEXT:    [[TMP23:%.*]] = insertelement <2 x i32> poison, i32 [[TMP11]], i32 0
-; CHECK-NEXT:    [[TMP24:%.*]] = insertelement <2 x i32> [[TMP23]], i32 [[TMP12]], i32 1
 ; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <2 x i1> [[TMP5]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP13]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; CHECK:       pred.store.if:
-; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
-; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_DEST]], i64 0, i64 [[TMP0]]
+; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_DEST]], i64 0, i64 [[OFFSET_IDX]]
 ; CHECK-NEXT:    [[TMP15:%.*]] = shl nsw i32 [[TMP11]], 2
 ; CHECK-NEXT:    store i32 [[TMP15]], ptr [[TMP14]], align 4
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE]]
diff --git a/llvm/test/Transforms/LoopVectorize/load-deref-pred-poison-ub-ops-feeding-pointer.ll b/llvm/test/Transforms/LoopVectorize/load-deref-pred-poison-ub-ops-feeding-pointer.ll
index 5e88072517b37..4275170b14a09 100644
--- a/llvm/test/Transforms/LoopVectorize/load-deref-pred-poison-ub-ops-feeding-pointer.ll
+++ b/llvm/test/Transforms/LoopVectorize/load-deref-pred-poison-ub-ops-feeding-pointer.ll
@@ -47,8 +47,7 @@ define void @ptr_depends_on_sdiv(ptr noalias %dst, i16 noundef %off) {
 ; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <2 x i16> [[TMP23]], i32 0
 ; CHECK-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i64, ptr @src, i16 [[TMP13]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = load i64, ptr [[TMP24]], align 1
-; CHECK-NEXT:    [[TMP10:%.*]] = add i16 [[OFFSET_IDX]], 0
-; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 [[TMP10]]
+; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 [[OFFSET_IDX]]
 ; CHECK-NEXT:    store i64 [[TMP9]], ptr [[TMP11]], align 1
 ; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE]]
 ; CHECK:       [[PRED_STORE_CONTINUE]]:
@@ -123,8 +122,7 @@ define void @ptr_depends_on_possibly_poison_value(ptr noalias %dst, i16 %off) {
 ; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x i16> [[TMP3]], i32 0
 ; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i64, ptr @src, i16 [[TMP5]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = load i64, ptr [[TMP14]], align 1
-; CHECK-NEXT:    [[TMP7:%.*]] = add i16 [[OFFSET_IDX]], 0
-; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 [[TMP7]]
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 [[OFFSET_IDX]]
 ; CHECK-NEXT:    store i64 [[TMP9]], ptr [[TMP8]], align 1
 ; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE]]
 ; CHECK:       [[PRED_STORE_CONTINUE]]:
@@ -194,8 +192,7 @@ define void @ptr_doesnt_depend_on_poison_or_ub(ptr noalias %dst, i16 noundef %of
 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP6]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
 ; CHECK:       [[PRED_STORE_IF]]:
-; CHECK-NEXT:    [[TMP7:%.*]] = add i16 [[OFFSET_IDX]], 0
-; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 [[TMP7]]
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 [[OFFSET_IDX]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <2 x i64> [[WIDE_LOAD]], i32 0
 ; CHECK-NEXT:    store i64 [[TMP9]], ptr [[TMP8]], align 1
 ; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE]]
@@ -269,8 +266,7 @@ define void @ptr_depends_on_possibly_poison_value_from_load(ptr noalias %dst) {
 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x i16> [[TMP4]], i32 0
 ; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i64, ptr @src, i16 [[TMP6]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = load i64, ptr [[TMP15]], align 1
-; CHECK-NEXT:    [[TMP8:%.*]] = add i16 [[OFFSET_IDX]], 0
-; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 [[TMP8]]
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 [[OFFSET_IDX]]
 ; CHECK-NEXT:    store i64 [[TMP10]], ptr [[TMP9]], align 1
 ; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE]]
 ; CHECK:       [[PRED_STORE_CONTINUE]]:
@@ -333,7 +329,7 @@ define void @ptr_depends_on_noundef_load(ptr noalias %dst) {
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i16> [ <i16 9, i16 10>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE2]] ]
 ; CHECK-NEXT:    [[DOTCAST:%.*]] = trunc i32 [[INDEX]] to i16
 ; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = add i16 9, [[DOTCAST]]
-; CHECK-NEXT:    [[TMP0:%.*]] = load i16, ptr @src, align 1, !noundef [[META10:![0-9]+]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i16, ptr @src, align 1, !noundef [[META6:![0-9]+]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <2 x i16> [[VEC_IND]], splat (i16 10)
 ; CHECK-NEXT:    [[TMP2:%.*]] = sub i16 1, [[TMP0]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = add i16 [[TMP2]], [[TMP0]]
@@ -343,8 +339,7 @@ define void @ptr_depends_on_noundef_load(ptr noalias %dst) {
 ; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP7]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
 ; CHECK:       [[PRED_STORE_IF]]:
-; CHECK-NEXT:    [[TMP8:%.*]] = add i16 [[OFFSET_IDX]], 0
-; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 [[TMP8]]
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 [[OFFSET_IDX]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <2 x i64> [[WIDE_LOAD]], i32 0
 ; CHECK-NEXT:    store i64 [[TMP10]], ptr [[TMP9]], align 1
 ; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE]]
diff --git a/llvm/test/Transforms/LoopVectorize/loop-form.ll b/llvm/test/Transforms/LoopVectorize/loop-form.ll
index b46cd8ecea5b3..b43c35b873f8c 100644
--- a/llvm/test/Transforms/LoopVectorize/loop-form.ll
+++ b/llvm/test/Transforms/LoopVectorize/loop-form.ll
@@ -1061,9 +1061,7 @@ define void @scalar_predication(ptr %addr) {
 ; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP5]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; CHECK:       pred.store.if:
-; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr float, ptr [[ADDR]], i64 [[TMP0]]
-; CHECK-NEXT:    store float 1.000000e+01, ptr [[TMP6]], align 4
+; CHECK-NEXT:    store float 1.000000e+01, ptr [[TMP1]], align 4
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE]]
 ; CHECK:       pred.store.continue:
 ; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <2 x i1> [[TMP4]], i32 1
diff --git a/llvm/test/Transforms/LoopVectorize/memdep-fold-tail.ll b/llvm/test/Transforms/LoopVectorize/memdep-fold-tail.ll
index 30ee4803de607..955780a318a47 100644
--- a/llvm/test/Transforms/LoopVectorize/memdep-fold-tail.ll
+++ b/llvm/test/Transforms/LoopVectorize/memdep-fold-tail.ll
@@ -34,8 +34,7 @@ define void @maxvf3() {
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; CHECK:       pred.store.if:
-; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[INDEX]], 0
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [18 x i8], ptr @a, i32 0, i32 [[TMP2]]
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [18 x i8], ptr @a, i32 0, i32 [[INDEX]]
 ; CHECK-NEXT:    store i8 69, ptr [[TMP3]], align 8
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE]]
 ; CHECK:       pred.store.continue:
@@ -48,16 +47,14 @@ define void @maxvf3() {
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE2]]
 ; CHECK:       pred.store.continue2:
 ; CHECK-NEXT:    [[TMP7:%.*]] = add nuw nsw <2 x i32> splat (i32 3), [[VEC_IND]]
-; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0
-; CHECK-NEXT:    br i1 [[TMP8]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]]
+; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]]
 ; CHECK:       pred.store.if3:
 ; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <2 x i32> [[TMP7]], i32 0
 ; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [18 x i8], ptr @a, i32 0, i32 [[TMP9]]
 ; CHECK-NEXT:    store i8 7, ptr [[TMP10]], align 8
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE4]]
 ; CHECK:       pred.store.continue4:
-; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <2 x i1> [[TMP0]], i32 1
-; CHECK-NEXT:    br i1 [[TMP11]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6]]
+; CHECK-NEXT:    br i1 [[TMP4]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6]]
 ; CHECK:       pred.store.if5:
 ; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <2 x i32> [[TMP7]], i32 1
 ; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [18 x i8], ptr @a, i32 0, i32 [[TMP12]]
diff --git a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll
index 736d8356d2219..60cb29dec260a 100644
--- a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll
@@ -32,10 +32,6 @@ define void @a(ptr readnone %b) {
 ; CHECK-NEXT:    [[NEXT_GEP2:%.*]] = getelementptr i8, ptr null, i64 [[TMP11]]
 ; CHECK-NEXT:    [[NEXT_GEP3:%.*]] = getelementptr i8, ptr null, i64 [[TMP14]]
 ; CHECK-NEXT:    [[NEXT_GEP4:%.*]] = getelementptr i8, ptr null, i64 [[TMP17]]
-; CHECK-NEXT:    [[TMP21:%.*]] = insertelement <4 x ptr> poison, ptr [[NEXT_GEP]], i32 0
-; CHECK-NEXT:    [[TMP22:%.*]] = insertelement <4 x ptr> [[TMP21]], ptr [[NEXT_GEP2]], i32 1
-; CHECK-NEXT:    [[TMP23:%.*]] = insertelement <4 x ptr> [[TMP22]], ptr [[NEXT_GEP3]], i32 2
-; CHECK-NEXT:    [[TMP24:%.*]] = insertelement <4 x ptr> [[TMP23]], ptr [[NEXT_GEP4]], i32 3
 ; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP]], i64 -1
 ; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 -3
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP5]], align 1
@@ -44,8 +40,7 @@ define void @a(ptr readnone %b) {
 ; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <4 x i1> [[TMP7]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP8]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; CHECK:       pred.store.if:
-; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP]], i64 -1
-; CHECK-NEXT:    store i8 95, ptr [[TMP9]], align 1
+; CHECK-NEXT:    store i8 95, ptr [[TMP3]], align 1
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE]]
 ; CHECK:       pred.store.continue:
 ; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <4 x i1> [[TMP7]], i32 1
diff --git a/llvm/test/Transforms/LoopVectorize/pr37248.ll b/llvm/test/Transforms/LoopVectorize/pr37248.ll
index 98da110a44e8c..bf1ab0685ec55 100644
--- a/llvm/test/Transforms/LoopVectorize/pr37248.ll
+++ b/llvm/test/Transforms/LoopVectorize/pr37248.ll
@@ -37,25 +37,24 @@ define void @f1(ptr noalias %b, i1 %c, i32 %start) {
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i1> poison, i1 [[C]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT]], <2 x i1> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP10:%.*]] = sub i32 [[START]], [[N_VEC]]
-; CHECK-NEXT:    [[TMP11:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT]], splat (i1 true)
+; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <2 x i1> [[BROADCAST_SPLAT]], i32 0
+; CHECK-NEXT:    [[TMP12:%.*]] = xor i1 [[TMP11]], true
 ; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; CHECK:       [[VECTOR_BODY]]:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE3:.*]] ]
 ; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = sub i32 [[START]], [[INDEX]]
-; CHECK-NEXT:    [[TMP12:%.*]] = trunc i32 [[OFFSET_IDX]] to i16
-; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <2 x i1> [[TMP11]], i32 0
-; CHECK-NEXT:    br i1 [[TMP13]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
+; CHECK-NEXT:    [[TMP13:%.*]] = trunc i32 [[OFFSET_IDX]] to i16
+; CHECK-NEXT:    br i1 [[TMP12]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
 ; CHECK:       [[PRED_STORE_IF]]:
 ; CHECK-NEXT:    store i32 10, ptr [[B]], align 1
 ; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE]]
 ; CHECK:       [[PRED_STORE_CONTINUE]]:
-; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <2 x i1> [[TMP11]], i32 1
-; CHECK-NEXT:    br i1 [[TMP14]], label %[[PRED_STORE_IF2:.*]], label %[[PRED_STORE_CONTINUE3]]
+; CHECK-NEXT:    br i1 [[TMP12]], label %[[PRED_STORE_IF2:.*]], label %[[PRED_STORE_CONTINUE3]]
 ; CHECK:       [[PRED_STORE_IF2]]:
 ; CHECK-NEXT:    store i32 10, ptr [[B]], align 1
 ; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE3]]
 ; CHECK:       [[PRED_STORE_CONTINUE3]]:
-; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [2 x i16], ptr @a, i16 0, i16 [[TMP12]]
+; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [2 x i16], ptr @a, i16 0, i16 [[TMP13]]
 ; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i16, ptr [[TMP15]], i64 -1
 ; CHECK-NEXT:    store <2 x i16> zeroinitializer, ptr [[TMP17]], align 1
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
diff --git a/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll b/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll
index bfe996fccd0b2..c07134229322e 100644
--- a/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll
+++ b/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll
@@ -23,8 +23,7 @@ define void @pr45679(ptr %A) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; CHECK:       pred.store.if:
-; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[INDEX]], 0
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[TMP2]]
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[INDEX]]
 ; CHECK-NEXT:    store i32 13, ptr [[TMP3]], align 1
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE]]
 ; CHECK:       pred.store.continue:
@@ -75,8 +74,7 @@ define void @pr45679(ptr %A) {
 ; VF2UF2-NEXT:    [[TMP2:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0
 ; VF2UF2-NEXT:    br i1 [[TMP2]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; VF2UF2:       pred.store.if:
-; VF2UF2-NEXT:    [[TMP3:%.*]] = add i32 [[INDEX]], 0
-; VF2UF2-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[TMP3]]
+; VF2UF2-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[INDEX]]
 ; VF2UF2-NEXT:    store i32 13, ptr [[TMP4]], align 1
 ; VF2UF2-NEXT:    br label [[PRED_STORE_CONTINUE]]
 ; VF2UF2:       pred.store.continue:
@@ -120,17 +118,16 @@ define void @pr45679(ptr %A) {
 ; VF1UF4-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; VF1UF4:       vector.body:
 ; VF1UF4-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ]
-; VF1UF4-NEXT:    [[TMP0:%.*]] = add i32 [[INDEX]], 0
 ; VF1UF4-NEXT:    [[TMP1:%.*]] = add i32 [[INDEX]], 1
 ; VF1UF4-NEXT:    [[TMP2:%.*]] = add i32 [[INDEX]], 2
 ; VF1UF4-NEXT:    [[TMP3:%.*]] = add i32 [[INDEX]], 3
-; VF1UF4-NEXT:    [[TMP4:%.*]] = icmp ule i32 [[TMP0]], 13
+; VF1UF4-NEXT:    [[TMP4:%.*]] = icmp ule i32 [[INDEX]], 13
 ; VF1UF4-NEXT:    [[TMP5:%.*]] = icmp ule i32 [[TMP1]], 13
 ; VF1UF4-NEXT:    [[TMP6:%.*]] = icmp ule i32 [[TMP2]], 13
 ; VF1UF4-NEXT:    [[TMP7:%.*]] = icmp ule i32 [[TMP3]], 13
 ; VF1UF4-NEXT:    br i1 [[TMP4]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; VF1UF4:       pred.store.if:
-; VF1UF4-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[TMP0]]
+; VF1UF4-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[INDEX]]
 ; VF1UF4-NEXT:    store i32 13, ptr [[TMP8]], align 1
 ; VF1UF4-NEXT:    br label [[PRED_STORE_CONTINUE]]
 ; VF1UF4:       pred.store.continue:
@@ -188,8 +185,7 @@ define void @load_variant(ptr noalias %a, ptr noalias %b) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; CHECK:       pred.store.if:
-; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP2]]
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8
 ; CHECK-NEXT:    store i64 [[TMP4]], ptr [[B:%.*]], align 8
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE]]
@@ -244,8 +240,7 @@ define void @load_variant(ptr noalias %a, ptr noalias %b) {
 ; VF2UF2-NEXT:    [[TMP2:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0
 ; VF2UF2-NEXT:    br i1 [[TMP2]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; VF2UF2:       pred.store.if:
-; VF2UF2-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 0
-; VF2UF2-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP3]]
+; VF2UF2-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]]
 ; VF2UF2-NEXT:    [[TMP5:%.*]] = load i64, ptr [[TMP4]], align 8
 ; VF2UF2-NEXT:    store i64 [[TMP5]], ptr [[B:%.*]], align 8
 ; VF2UF2-NEXT:    br label [[PRED_STORE_CONTINUE]]
@@ -293,17 +288,16 @@ define void @load_variant(ptr noalias %a, ptr noalias %b) {
 ; VF1UF4-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; VF1UF4:       vector.body:
 ; VF1UF4-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ]
-; VF1UF4-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
 ; VF1UF4-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 1
 ; VF1UF4-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 2
 ; VF1UF4-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 3
-; VF1UF4-NEXT:    [[TMP4:%.*]] = icmp ule i64 [[TMP0]], 13
+; VF1UF4-NEXT:    [[TMP4:%.*]] = icmp ule i64 [[INDEX]], 13
 ; VF1UF4-NEXT:    [[TMP5:%.*]] = icmp ule i64 [[TMP1]], 13
 ; VF1UF4-NEXT:    [[TMP6:%.*]] = icmp ule i64 [[TMP2]], 13
 ; VF1UF4-NEXT:    [[TMP7:%.*]] = icmp ule i64 [[TMP3]], 13
 ; VF1UF4-NEXT:    br i1 [[TMP4]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; VF1UF4:       pred.store.if:
-; VF1UF4-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]]
+; VF1UF4-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]]
 ; VF1UF4-NEXT:    [[TMP9:%.*]] = load i64, ptr [[TMP8]], align 8
 ; VF1UF4-NEXT:    store i64 [[TMP9]], ptr [[B:%.*]], align 8
 ; VF1UF4-NEXT:    br label [[PRED_STORE_CONTINUE]]
diff --git a/llvm/test/Transforms/LoopVectorize/predicate-switch.ll b/llvm/test/Transforms/LoopVectorize/predicate-switch.ll
index 17370fcdd576d..36806e6ad125e 100644
--- a/llvm/test/Transforms/LoopVectorize/predicate-switch.ll
+++ b/llvm/test/Transforms/LoopVectorize/predicate-switch.ll
@@ -21,8 +21,6 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) {
 ; IC1-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 1
 ; IC1-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[INDEX]]
 ; IC1-NEXT:    [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP2]]
-; IC1-NEXT:    [[TMP12:%.*]] = insertelement <2 x ptr> poison, ptr [[NEXT_GEP]], i32 0
-; IC1-NEXT:    [[TMP16:%.*]] = insertelement <2 x ptr> [[TMP12]], ptr [[NEXT_GEP3]], i32 1
 ; IC1-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i8>, ptr [[NEXT_GEP]], align 1
 ; IC1-NEXT:    [[TMP7:%.*]] = icmp eq <2 x i8> [[WIDE_LOAD]], splat (i8 -12)
 ; IC1-NEXT:    [[TMP4:%.*]] = icmp eq <2 x i8> [[WIDE_LOAD]], splat (i8 13)
@@ -117,12 +115,8 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) {
 ; IC2-NEXT:    [[TMP4:%.*]] = add i64 [[INDEX]], 3
 ; IC2-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[INDEX]]
 ; IC2-NEXT:    [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP2]]
-; IC2-NEXT:    [[TMP23:%.*]] = insertelement <2 x ptr> poison, ptr [[NEXT_GEP]], i32 0
-; IC2-NEXT:    [[TMP24:%.*]] = insertelement <2 x ptr> [[TMP23]], ptr [[NEXT_GEP3]], i32 1
 ; IC2-NEXT:    [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP3]]
 ; IC2-NEXT:    [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP4]]
-; IC2-NEXT:    [[TMP30:%.*]] = insertelement <2 x ptr> poison, ptr [[NEXT_GEP4]], i32 0
-; IC2-NEXT:    [[TMP31:%.*]] = insertelement <2 x ptr> [[TMP30]], ptr [[NEXT_GEP5]], i32 1
 ; IC2-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 2
 ; IC2-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i8>, ptr [[NEXT_GEP]], align 1
 ; IC2-NEXT:    [[WIDE_LOAD6:%.*]] = load <2 x i8>, ptr [[TMP6]], align 1
diff --git a/llvm/test/Transforms/LoopVectorize/select-cmp-multiuse.ll b/llvm/test/Transforms/LoopVectorize/select-cmp-multiuse.ll
index fa03c62bb4927..ffb16d07f3ea8 100644
--- a/llvm/test/Transforms/LoopVectorize/select-cmp-multiuse.ll
+++ b/llvm/test/Transforms/LoopVectorize/select-cmp-multiuse.ll
@@ -488,8 +488,7 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) {
 ; CHECK-VF4-IC1-NEXT:    [[TMP8:%.*]] = extractelement <4 x i1> [[TMP4]], i32 0
 ; CHECK-VF4-IC1-NEXT:    br i1 [[TMP8]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; CHECK-VF4-IC1:       pred.store.if:
-; CHECK-VF4-IC1-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 0
-; CHECK-VF4-IC1-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP1]]
+; CHECK-VF4-IC1-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]]
 ; CHECK-VF4-IC1-NEXT:    [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !alias.scope [[META9:![0-9]+]], !noalias [[META6]]
 ; CHECK-VF4-IC1-NEXT:    [[TMP11:%.*]] = add nsw i32 [[TMP10]], 1
 ; CHECK-VF4-IC1-NEXT:    store i32 [[TMP11]], ptr [[TMP9]], align 4, !alias.scope [[META9]], !noalias [[META6]]
@@ -607,8 +606,7 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) {
 ; CHECK-VF4-IC2-NEXT:    [[TMP15:%.*]] = extractelement <4 x i1> [[TMP7]], i32 0
 ; CHECK-VF4-IC2-NEXT:    br i1 [[TMP15]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; CHECK-VF4-IC2:       pred.store.if:
-; CHECK-VF4-IC2-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 0
-; CHECK-VF4-IC2-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP1]]
+; CHECK-VF4-IC2-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]]
 ; CHECK-VF4-IC2-NEXT:    [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4, !alias.scope [[META9:![0-9]+]], !noalias [[META6]]
 ; CHECK-VF4-IC2-NEXT:    [[TMP18:%.*]] = add nsw i32 [[TMP17]], 1
 ; CHECK-VF4-IC2-NEXT:    store i32 [[TMP18]], ptr [[TMP16]], align 4, !alias.scope [[META9]], !noalias [[META6]]
diff --git a/llvm/test/Transforms/LoopVectorize/struct-return-replicate.ll b/llvm/test/Transforms/LoopVectorize/struct-return-replicate.ll
index b9be77ff224ff..037a76aa81c25 100644
--- a/llvm/test/Transforms/LoopVectorize/struct-return-replicate.ll
+++ b/llvm/test/Transforms/LoopVectorize/struct-return-replicate.ll
@@ -533,10 +533,8 @@ define void @struct_return_2xf32_replicate_predicated(ptr %a) {
 ; VF4-NEXT:    [[TMP51:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0
 ; VF4-NEXT:    br i1 [[TMP51]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
 ; VF4:       [[PRED_STORE_IF]]:
-; VF4-NEXT:    [[TMP52:%.*]] = add i64 [[INDEX]], 0
-; VF4-NEXT:    [[TMP53:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP52]]
 ; VF4-NEXT:    [[TMP54:%.*]] = extractelement <4 x float> [[TMP50]], i32 0
-; VF4-NEXT:    store float [[TMP54]], ptr [[TMP53]], align 8
+; VF4-NEXT:    store float [[TMP54]], ptr [[TMP0]], align 8
 ; VF4-NEXT:    br label %[[PRED_STORE_CONTINUE]]
 ; VF4:       [[PRED_STORE_CONTINUE]]:
 ; VF4-NEXT:    [[TMP55:%.*]] = extractelement <4 x i1> [[TMP1]], i32 1
@@ -590,12 +588,9 @@ define void @struct_return_2xf32_replicate_predicated(ptr %a) {
 ; VF2IC2:       [[PRED_STORE_IF]]:
 ; VF2IC2-NEXT:    [[TMP5:%.*]] = extractelement <2 x float> [[WIDE_LOAD]], i32 0
 ; VF2IC2-NEXT:    [[TMP6:%.*]] = tail call { float, float } @fn2(float [[TMP5]]) #[[ATTR3:[0-9]+]]
-; VF2IC2-NEXT:    [[TMP7:%.*]] = add i64 [[INDEX]], 0
 ; VF2IC2-NEXT:    [[TMP8:%.*]] = extractvalue { float, float } [[TMP6]], 0
-; VF2IC2-NEXT:    [[TMP9:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP7]]
-; VF2IC2-NEXT:    [[TMP10:%.*]] = extractelement <2 x float> [[WIDE_LOAD]], i32 0
-; VF2IC2-NEXT:    [[TMP11:%.*]] = fdiv float [[TMP8]], [[TMP10]]
-; VF2IC2-NEXT:    store float [[TMP11]], ptr [[TMP9]], align 8
+; VF2IC2-NEXT:    [[TMP11:%.*]] = fdiv float [[TMP8]], [[TMP5]]
+; VF2IC2-NEXT:    store float [[TMP11]], ptr [[TMP0]], align 8
 ; VF2IC2-NEXT:    br label %[[PRED_STORE_CONTINUE]]
 ; VF2IC2:       [[PRED_STORE_CONTINUE]]:
 ; VF2IC2-NEXT:    [[TMP12:%.*]] = extractelement <2 x i1> [[TMP2]], i32 1
@@ -606,8 +601,7 @@ define void @struct_return_2xf32_replicate_predicated(ptr %a) {
 ; VF2IC2-NEXT:    [[TMP15:%.*]] = add i64 [[INDEX]], 1
 ; VF2IC2-NEXT:    [[TMP16:%.*]] = extractvalue { float, float } [[TMP14]], 0
 ; VF2IC2-NEXT:    [[TMP17:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP15]]
-; VF2IC2-NEXT:    [[TMP18:%.*]] = extractelement <2 x float> [[WIDE_LOAD]], i32 1
-; VF2IC2-NEXT:    [[TMP19:%.*]] = fdiv float [[TMP16]], [[TMP18]]
+; VF2IC2-NEXT:    [[TMP19:%.*]] = fdiv float [[TMP16]], [[TMP13]]
 ; VF2IC2-NEXT:    store float [[TMP19]], ptr [[TMP17]], align 8
 ; VF2IC2-NEXT:    br label %[[PRED_STORE_CONTINUE3]]
 ; VF2IC2:       [[PRED_STORE_CONTINUE3]]:
@@ -619,8 +613,7 @@ define void @struct_return_2xf32_replicate_predicated(ptr %a) {
 ; VF2IC2-NEXT:    [[TMP23:%.*]] = add i64 [[INDEX]], 2
 ; VF2IC2-NEXT:    [[TMP24:%.*]] = extractvalue { float, float } [[TMP22]], 0
 ; VF2IC2-NEXT:    [[TMP25:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP23]]
-; VF2IC2-NEXT:    [[TMP26:%.*]] = extractelement <2 x float> [[WIDE_LOAD1]], i32 0
-; VF2IC2-NEXT:    [[TMP27:%.*]] = fdiv float [[TMP24]], [[TMP26]]
+; VF2IC2-NEXT:    [[TMP27:%.*]] = fdiv float [[TMP24]], [[TMP21]]
 ; VF2IC2-NEXT:    store float [[TMP27]], ptr [[TMP25]], align 8
 ; VF2IC2-NEXT:    br label %[[PRED_STORE_CONTINUE5]]
 ; VF2IC2:       [[PRED_STORE_CONTINUE5]]:
@@ -632,8 +625,7 @@ define void @struct_return_2xf32_replicate_predicated(ptr %a) {
 ; VF2IC2-NEXT:    [[TMP31:%.*]] = add i64 [[INDEX]], 3
 ; VF2IC2-NEXT:    [[TMP32:%.*]] = extractvalue { float, float } [[TMP30]], 0
 ; VF2IC2-NEXT:    [[TMP33:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP31]]
-; VF2IC2-NEXT:    [[TMP34:%.*]] = extractelement <2 x float> [[WIDE_LOAD1]], i32 1
-; VF2IC2-NEXT:    [[TMP35:%.*]] = fdiv float [[TMP32]], [[TMP34]]
+; VF2IC2-NEXT:    [[TMP35:%.*]] = fdiv float [[TMP32]], [[TMP29]]
 ; VF2IC2-NEXT:    store float [[TMP35]], ptr [[TMP33]], align 8
 ; VF2IC2-NEXT:    br label %[[PRED_STORE_CONTINUE7]]
 ; VF2IC2:       [[PRED_STORE_CONTINUE7]]:
diff --git a/llvm/test/Transforms/LoopVectorize/struct-return.ll b/llvm/test/Transforms/LoopVectorize/struct-return.ll
index dd1117039512b..8049de1b792e1 100644
--- a/llvm/test/Transforms/LoopVectorize/struct-return.ll
+++ b/llvm/test/Transforms/LoopVectorize/struct-return.ll
@@ -281,11 +281,8 @@ define void @scalarized_predicated_struct_return(ptr %a) {
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x i64> [[WIDE_LOAD]], i32 0
 ; CHECK-NEXT:    [[TMP4:%.*]] = tail call { i64, i64 } @bar_i64(i64 [[TMP3]]) #[[ATTR2:[0-9]+]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { i64, i64 } [[TMP4]], 0
-; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x i64> [[WIDE_LOAD]], i32 0
-; CHECK-NEXT:    [[TMP7:%.*]] = udiv i64 [[TMP5]], [[TMP6]]
-; CHECK-NEXT:    [[TMP8:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP8]]
-; CHECK-NEXT:    store i64 [[TMP7]], ptr [[TMP9]], align 8
+; CHECK-NEXT:    [[TMP7:%.*]] = udiv i64 [[TMP5]], [[TMP3]]
+; CHECK-NEXT:    store i64 [[TMP7]], ptr [[TMP0]], align 8
 ; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE]]
 ; CHECK:       [[PRED_STORE_CONTINUE]]:
 ; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <2 x i1> [[TMP1]], i32 1
@@ -294,8 +291,7 @@ define void @scalarized_predicated_struct_return(ptr %a) {
 ; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <2 x i64> [[WIDE_LOAD]], i32 1
 ; CHECK-NEXT:    [[TMP12:%.*]] = tail call { i64, i64 } @bar_i64(i64 [[TMP11]]) #[[ATTR2]]
 ; CHECK-NEXT:    [[TMP13:%.*]] = extractvalue { i64, i64 } [[TMP12]], 0
-; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <2 x i64> [[WIDE_LOAD]], i32 1
-; CHECK-NEXT:    [[TMP15:%.*]] = udiv i64 [[TMP13]], [[TMP14]]
+; CHECK-NEXT:    [[TMP15:%.*]] = udiv i64 [[TMP13]], [[TMP11]]
 ; CHECK-NEXT:    [[TMP16:%.*]] = add i64 [[INDEX]], 1
 ; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP16]]
 ; CHECK-NEXT:    store i64 [[TMP15]], ptr [[TMP17]], align 8
diff --git a/llvm/test/Transforms/LoopVectorize/tail-folding-alloca-in-loop.ll b/llvm/test/Transforms/LoopVectorize/tail-folding-alloca-in-loop.ll
index 9e523be618b44..464545bf58aa2 100644
--- a/llvm/test/Transforms/LoopVectorize/tail-folding-alloca-in-loop.ll
+++ b/llvm/test/Transforms/LoopVectorize/tail-folding-alloca-in-loop.ll
@@ -13,15 +13,11 @@ define i32 @test(ptr %vf1, i64 %n) {
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i8> [ <i8 0, i8 1, i8 2, i8 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE6]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = icmp ule <4 x i8> [[VEC_IND]], splat (i8 -56)
 ; CHECK-NEXT:    [[TMP18:%.*]] = alloca i8, i64 [[N]], align 16
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP18]], i64 0
-; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x ptr> [[BROADCAST_SPLATINSERT]], <4 x ptr> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
 ; CHECK:       [[PRED_STORE_IF]]:
-; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[VF1]], i64 [[TMP2]]
-; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x ptr> [[BROADCAST_SPLAT]], i32 0
-; CHECK-NEXT:    store ptr [[TMP4]], ptr [[TMP3]], align 8
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[VF1]], i64 [[INDEX]]
+; CHECK-NEXT:    store ptr [[TMP18]], ptr [[TMP3]], align 8
 ; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE]]
 ; CHECK:       [[PRED_STORE_CONTINUE]]:
 ; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1
@@ -29,8 +25,7 @@ define i32 @test(ptr %vf1, i64 %n) {
 ; CHECK:       [[PRED_STORE_IF1]]:
 ; CHECK-NEXT:    [[TMP6:%.*]] = add i64 [[INDEX]], 1
 ; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds ptr, ptr [[VF1]], i64 [[TMP6]]
-; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <4 x ptr> [[BROADCAST_SPLAT]], i32 0
-; CHECK-NEXT:    store ptr [[TMP8]], ptr [[TMP7]], align 8
+; CHECK-NEXT:    store ptr [[TMP18]], ptr [[TMP7]], align 8
 ; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE2]]
 ; CHECK:       [[PRED_STORE_CONTINUE2]]:
 ; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <4 x i1> [[TMP0]], i32 2
@@ -38,8 +33,7 @@ define i32 @test(ptr %vf1, i64 %n) {
 ; CHECK:       [[PRED_STORE_IF3]]:
 ; CHECK-NEXT:    [[TMP10:%.*]] = add i64 [[INDEX]], 2
 ; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds ptr, ptr [[VF1]], i64 [[TMP10]]
-; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <4 x ptr> [[BROADCAST_SPLAT]], i32 0
-; CHECK-NEXT:    store ptr [[TMP12]], ptr [[TMP11]], align 8
+; CHECK-NEXT:    store ptr [[TMP18]], ptr [[TMP11]], align 8
 ; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE4]]
 ; CHECK:       [[PRED_STORE_CONTINUE4]]:
 ; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3
@@ -47,8 +41,7 @@ define i32 @test(ptr %vf1, i64 %n) {
 ; CHECK:       [[PRED_STORE_IF5]]:
 ; CHECK-NEXT:    [[TMP14:%.*]] = add i64 [[INDEX]], 3
 ; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds ptr, ptr [[VF1]], i64 [[TMP14]]
-; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <4 x ptr> [[BROADCAST_SPLAT]], i32 0
-; CHECK-NEXT:    store ptr [[TMP16]], ptr [[TMP15]], align 8
+; CHECK-NEXT:    store ptr [[TMP18]], ptr [[TMP15]], align 8
 ; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE6]]
 ; CHECK:       [[PRED_STORE_CONTINUE6]]:
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
diff --git a/llvm/test/Transforms/LoopVectorize/tail-folding-div.ll b/llvm/test/Transforms/LoopVectorize/tail-folding-div.ll
index c9bc762fd8573..87f6751b1d013 100644
--- a/llvm/test/Transforms/LoopVectorize/tail-folding-div.ll
+++ b/llvm/test/Transforms/LoopVectorize/tail-folding-div.ll
@@ -425,8 +425,7 @@ define void @test_sdiv_variant_dividend_induction(i64 %a, ptr noalias %c) {
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP2]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
 ; CHECK:       [[PRED_STORE_IF]]:
-; CHECK-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i64, ptr [[C]], i64 [[TMP3]]
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i64, ptr [[C]], i64 [[INDEX]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0
 ; CHECK-NEXT:    store i64 [[TMP5]], ptr [[TMP4]], align 4
 ; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE]]
diff --git a/llvm/test/Transforms/LoopVectorize/tail-folding-masked-mem-opts.ll b/llvm/test/Transforms/LoopVectorize/tail-folding-masked-mem-opts.ll
index 4f0e7282025a1..55c1fb98dbe46 100644
--- a/llvm/test/Transforms/LoopVectorize/tail-folding-masked-mem-opts.ll
+++ b/llvm/test/Transforms/LoopVectorize/tail-folding-masked-mem-opts.ll
@@ -57,10 +57,9 @@ define void @simple_memcpy(ptr noalias %dst, ptr noalias %src, i64 %n)  {
 ; CHECK-PREDICATE-NEXT:    [[TMP1:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0
 ; CHECK-PREDICATE-NEXT:    br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
 ; CHECK-PREDICATE:       [[PRED_STORE_IF]]:
-; CHECK-PREDICATE-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 0
-; CHECK-PREDICATE-NEXT:    [[TMP3:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[TMP2]]
+; CHECK-PREDICATE-NEXT:    [[TMP3:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[INDEX]]
 ; CHECK-PREDICATE-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
-; CHECK-PREDICATE-NEXT:    [[TMP5:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP2]]
+; CHECK-PREDICATE-NEXT:    [[TMP5:%.*]] = getelementptr i32, ptr [[DST]], i64 [[INDEX]]
 ; CHECK-PREDICATE-NEXT:    store i32 [[TMP4]], ptr [[TMP5]], align 4
 ; CHECK-PREDICATE-NEXT:    br label %[[PRED_STORE_CONTINUE]]
 ; CHECK-PREDICATE:       [[PRED_STORE_CONTINUE]]:
@@ -126,8 +125,7 @@ define void @non_consecutive_copy(ptr noalias %dst, ptr noalias %src, i64 %n)  {
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0
 ; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[TMP3]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
-; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0
-; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP6]]
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP3]]
 ; CHECK-NEXT:    store i32 [[TMP5]], ptr [[TMP7]], align 4
 ; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE]]
 ; CHECK:       [[PRED_STORE_CONTINUE]]:
@@ -137,8 +135,7 @@ define void @non_consecutive_copy(ptr noalias %dst, ptr noalias %src, i64 %n)  {
 ; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
 ; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[TMP9]]
 ; CHECK-NEXT:    [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4
-; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
-; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP12]]
+; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP9]]
 ; CHECK-NEXT:    store i32 [[TMP11]], ptr [[TMP13]], align 4
 ; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE3]]
 ; CHECK:       [[PRED_STORE_CONTINUE3]]:
diff --git a/llvm/test/Transforms/LoopVectorize/tail-folding-optimize-vector-induction-width.ll b/llvm/test/Transforms/LoopVectorize/tail-folding-optimize-vector-induction-width.ll
index 00e04c7daee51..f3fe5cc7bd54d 100644
--- a/llvm/test/Transforms/LoopVectorize/tail-folding-optimize-vector-induction-width.ll
+++ b/llvm/test/Transforms/LoopVectorize/tail-folding-optimize-vector-induction-width.ll
@@ -15,8 +15,7 @@ define void @canonical_small_tc_i8(ptr nocapture noundef writeonly %p) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
 ; CHECK:       [[PRED_STORE_IF]]:
-; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP2]]
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[INDEX]]
 ; CHECK-NEXT:    store i16 1, ptr [[TMP3]], align 2
 ; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE]]
 ; CHECK:       [[PRED_STORE_CONTINUE]]:
@@ -66,8 +65,7 @@ define void @canonical_upper_limit_i8(ptr nocapture noundef writeonly %p) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
 ; CHECK:       [[PRED_STORE_IF]]:
-; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP2]]
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[INDEX]]
 ; CHECK-NEXT:    store i16 1, ptr [[TMP3]], align 2
 ; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE]]
 ; CHECK:       [[PRED_STORE_CONTINUE]]:
@@ -117,8 +115,7 @@ define void @canonical_lower_limit_i16(ptr nocapture noundef writeonly %p) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
 ; CHECK:       [[PRED_STORE_IF]]:
-; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP2]]
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[INDEX]]
 ; CHECK-NEXT:    store i16 1, ptr [[TMP3]], align 2
 ; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE]]
 ; CHECK:       [[PRED_STORE_CONTINUE]]:
@@ -168,8 +165,7 @@ define void @canonical_upper_limit_i16(ptr nocapture noundef writeonly %p) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
 ; CHECK:       [[PRED_STORE_IF]]:
-; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP2]]
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[INDEX]]
 ; CHECK-NEXT:    store i16 1, ptr [[TMP3]], align 2
 ; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE]]
 ; CHECK:       [[PRED_STORE_CONTINUE]]:
@@ -219,8 +215,7 @@ define void @canonical_lower_limit_i32(ptr nocapture noundef writeonly %p) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
 ; CHECK:       [[PRED_STORE_IF]]:
-; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP2]]
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[INDEX]]
 ; CHECK-NEXT:    store i16 1, ptr [[TMP3]], align 2
 ; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE]]
 ; CHECK:       [[PRED_STORE_CONTINUE]]:
@@ -270,8 +265,7 @@ define void @canonical_upper_limit_i32(ptr nocapture noundef writeonly %p) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
 ; CHECK:       [[PRED_STORE_IF]]:
-; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP2]]
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[INDEX]]
 ; CHECK-NEXT:    store i16 1, ptr [[TMP3]], align 2
 ; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE]]
 ; CHECK:       [[PRED_STORE_CONTINUE]]:
@@ -321,8 +315,7 @@ define void @canonical_lower_limit_i64(ptr nocapture noundef writeonly %p) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
 ; CHECK:       [[PRED_STORE_IF]]:
-; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP2]]
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[INDEX]]
 ; CHECK-NEXT:    store i16 1, ptr [[TMP3]], align 2
 ; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE]]
 ; CHECK:       [[PRED_STORE_CONTINUE]]:
@@ -372,8 +365,7 @@ define void @canonical_upper_limit_i64(ptr nocapture noundef writeonly %p) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
 ; CHECK:       [[PRED_STORE_IF]]:
-; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP2]]
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[INDEX]]
 ; CHECK-NEXT:    store i16 1, ptr [[TMP3]], align 2
 ; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE]]
 ; CHECK:       [[PRED_STORE_CONTINUE]]:
@@ -423,8 +415,7 @@ define void @canonical_lower_limit_i128(ptr nocapture noundef writeonly %p) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
 ; CHECK:       [[PRED_STORE_IF]]:
-; CHECK-NEXT:    [[TMP2:%.*]] = add i256 [[INDEX]], 0
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[P]], i256 [[TMP2]]
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[P]], i256 [[INDEX]]
 ; CHECK-NEXT:    store i16 1, ptr [[TMP3]], align 2
 ; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE]]
 ; CHECK:       [[PRED_STORE_CONTINUE]]:
diff --git a/llvm/test/Transforms/LoopVectorize/tail-folding-switch.ll b/llvm/test/Transforms/LoopVectorize/tail-folding-switch.ll
index b6f43aaa86e33..95e602fd5b392 100644
--- a/llvm/test/Transforms/LoopVectorize/tail-folding-switch.ll
+++ b/llvm/test/Transforms/LoopVectorize/tail-folding-switch.ll
@@ -20,8 +20,7 @@ define void @tail_fold_switch(ptr %dst, i32 %0) {
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x i1> [[TMP3]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP4]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
 ; CHECK:       [[PRED_STORE_IF]]:
-; CHECK-NEXT:    [[TMP5:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP5]]
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[INDEX]]
 ; CHECK-NEXT:    store i32 0, ptr [[TMP6]], align 4
 ; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE]]
 ; CHECK:       [[PRED_STORE_CONTINUE]]:
diff --git a/llvm/test/Transforms/LoopVectorize/tail-folding-vectorization-factor-1.ll b/llvm/test/Transforms/LoopVectorize/tail-folding-vectorization-factor-1.ll
index c6bcd708c0fce..57e0e9b63b8de 100644
--- a/llvm/test/Transforms/LoopVectorize/tail-folding-vectorization-factor-1.ll
+++ b/llvm/test/Transforms/LoopVectorize/tail-folding-vectorization-factor-1.ll
@@ -17,17 +17,16 @@ define void @VF1-VPlanExe(ptr %dst) {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 1
 ; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 2
 ; CHECK-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 3
-; CHECK-NEXT:    [[TMP4:%.*]] = icmp ule i64 [[TMP0]], 14
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp ule i64 [[INDEX]], 14
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp ule i64 [[TMP1]], 14
 ; CHECK-NEXT:    [[TMP6:%.*]] = icmp ule i64 [[TMP2]], 14
 ; CHECK-NEXT:    [[TMP7:%.*]] = icmp ule i64 [[TMP3]], 14
 ; CHECK-NEXT:    br i1 [[TMP4]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; CHECK:       pred.store.if:
-; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 [[TMP0]]
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 [[INDEX]]
 ; CHECK-NEXT:    store i32 0, ptr [[TMP8]], align 4
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE]]
 ; CHECK:       pred.store.continue:
diff --git a/llvm/test/Transforms/LoopVectorize/trip-count-expansion-may-introduce-ub.ll b/llvm/test/Transforms/LoopVectorize/trip-count-expansion-may-introduce-ub.ll
index 33ff1f7b767d3..06f6ce536e92c 100644
--- a/llvm/test/Transforms/LoopVectorize/trip-count-expansion-may-introduce-ub.ll
+++ b/llvm/test/Transforms/LoopVectorize/trip-count-expansion-may-introduce-ub.ll
@@ -156,9 +156,7 @@ define i64 @multi_exit_2_exit_count_with_udiv_by_value_in_block_executed_uncondi
 ; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <4 x i1> [[TMP7]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP8]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; CHECK:       pred.store.if:
-; CHECK-NEXT:    [[TMP20:%.*]] = add i64 [[IV]], 0
-; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP20]]
-; CHECK-NEXT:    store i32 1, ptr [[TMP9]], align 4
+; CHECK-NEXT:    store i32 1, ptr [[TMP5]], align 4
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE]]
 ; CHECK:       pred.store.continue:
 ; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <4 x i1> [[TMP7]], i32 1
@@ -267,9 +265,7 @@ define i64 @multi_exit_2_exit_count_with_udiv_by_constant_in_block_executed_unco
 ; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <4 x i1> [[TMP7]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP8]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; CHECK:       pred.store.if:
-; CHECK-NEXT:    [[TMP20:%.*]] = add i64 [[IV]], 0
-; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP20]]
-; CHECK-NEXT:    store i32 1, ptr [[TMP9]], align 4
+; CHECK-NEXT:    store i32 1, ptr [[TMP5]], align 4
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE]]
 ; CHECK:       pred.store.continue:
 ; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <4 x i1> [[TMP7]], i32 1
diff --git a/llvm/test/Transforms/LoopVectorize/use-scalar-epilogue-if-tp-fails.ll b/llvm/test/Transforms/LoopVectorize/use-scalar-epilogue-if-tp-fails.ll
index e1ca138eb0282..3a0418292bd7e 100644
--- a/llvm/test/Transforms/LoopVectorize/use-scalar-epilogue-if-tp-fails.ll
+++ b/llvm/test/Transforms/LoopVectorize/use-scalar-epilogue-if-tp-fails.ll
@@ -114,10 +114,6 @@ define void @metadata(ptr nocapture readonly %ptr, i32 %size, ptr %pos) {
 ; FORCED-TF-NEXT:    [[NEXT_GEP1:%.*]] = getelementptr i8, ptr [[PTR]], i32 [[TMP1]]
 ; FORCED-TF-NEXT:    [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[PTR]], i32 [[TMP2]]
 ; FORCED-TF-NEXT:    [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[PTR]], i32 [[TMP3]]
-; FORCED-TF-NEXT:    [[TMP4:%.*]] = insertelement <4 x ptr> poison, ptr [[NEXT_GEP]], i32 0
-; FORCED-TF-NEXT:    [[TMP5:%.*]] = insertelement <4 x ptr> [[TMP4]], ptr [[NEXT_GEP1]], i32 1
-; FORCED-TF-NEXT:    [[TMP6:%.*]] = insertelement <4 x ptr> [[TMP5]], ptr [[NEXT_GEP2]], i32 2
-; FORCED-TF-NEXT:    [[TMP7:%.*]] = insertelement <4 x ptr> [[TMP6]], ptr [[NEXT_GEP3]], i32 3
 ; FORCED-TF-NEXT:    [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i64 0
 ; FORCED-TF-NEXT:    [[BROADCAST_SPLAT5:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT4]], <4 x i32> poison, <4 x i32> zeroinitializer
 ; FORCED-TF-NEXT:    [[VEC_IV:%.*]] = add <4 x i32> [[BROADCAST_SPLAT5]], <i32 0, i32 1, i32 2, i32 3>
@@ -185,10 +181,6 @@ define void @metadata(ptr nocapture readonly %ptr, i32 %size, ptr %pos) {
 ; CHECK-NEXT:    [[NEXT_GEP1:%.*]] = getelementptr i8, ptr [[PTR]], i32 [[TMP1]]
 ; CHECK-NEXT:    [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[PTR]], i32 [[TMP2]]
 ; CHECK-NEXT:    [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[PTR]], i32 [[TMP3]]
-; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x ptr> poison, ptr [[NEXT_GEP]], i32 0
-; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <4 x ptr> [[TMP4]], ptr [[NEXT_GEP1]], i32 1
-; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <4 x ptr> [[TMP5]], ptr [[NEXT_GEP2]], i32 2
-; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x ptr> [[TMP6]], ptr [[NEXT_GEP3]], i32 3
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT5:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT4]], <4 x i32> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[VEC_IV:%.*]] = add <4 x i32> [[BROADCAST_SPLAT5]], <i32 0, i32 1, i32 2, i32 3>
diff --git a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll
index 5da6fc3179043..cfeaaf88643d0 100644
--- a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll
+++ b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll
@@ -629,10 +629,7 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
 ; VF8UF1-NEXT:    [[TMP4:%.*]] = extractelement <8 x i1> [[TMP3]], i32 0
 ; VF8UF1-NEXT:    br i1 [[TMP4]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
 ; VF8UF1:       [[PRED_STORE_IF]]:
-; VF8UF1-NEXT:    [[TMP5:%.*]] = mul i64 0, [[STEP]]
-; VF8UF1-NEXT:    [[TMP6:%.*]] = add i64 0, [[TMP5]]
-; VF8UF1-NEXT:    [[TMP7:%.*]] = add i64 [[TMP6]], [[STEP]]
-; VF8UF1-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP7]]
+; VF8UF1-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[DST]], i64 [[STEP]]
 ; VF8UF1-NEXT:    store i8 0, ptr [[TMP8]], align 1
 ; VF8UF1-NEXT:    br label %[[PRED_STORE_CONTINUE]]
 ; VF8UF1:       [[PRED_STORE_CONTINUE]]:
@@ -733,10 +730,7 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
 ; VF8UF2-NEXT:    [[TMP5:%.*]] = extractelement <8 x i1> [[TMP3]], i32 0
 ; VF8UF2-NEXT:    br i1 [[TMP5]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
 ; VF8UF2:       [[PRED_STORE_IF]]:
-; VF8UF2-NEXT:    [[TMP6:%.*]] = mul i64 0, [[STEP]]
-; VF8UF2-NEXT:    [[TMP7:%.*]] = add i64 0, [[TMP6]]
-; VF8UF2-NEXT:    [[TMP8:%.*]] = add i64 [[TMP7]], [[STEP]]
-; VF8UF2-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP8]]
+; VF8UF2-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[DST]], i64 [[STEP]]
 ; VF8UF2-NEXT:    store i8 0, ptr [[TMP9]], align 1
 ; VF8UF2-NEXT:    br label %[[PRED_STORE_CONTINUE]]
 ; VF8UF2:       [[PRED_STORE_CONTINUE]]:
@@ -916,10 +910,7 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
 ; VF16UF1-NEXT:    [[TMP4:%.*]] = extractelement <16 x i1> [[TMP3]], i32 0
 ; VF16UF1-NEXT:    br i1 [[TMP4]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
 ; VF16UF1:       [[PRED_STORE_IF]]:
-; VF16UF1-NEXT:    [[TMP5:%.*]] = mul i64 0, [[STEP]]
-; VF16UF1-NEXT:    [[TMP6:%.*]] = add i64 0, [[TMP5]]
-; VF16UF1-NEXT:    [[TMP7:%.*]] = add i64 [[TMP6]], [[STEP]]
-; VF16UF1-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP7]]
+; VF16UF1-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[DST]], i64 [[STEP]]
 ; VF16UF1-NEXT:    store i8 0, ptr [[TMP8]], align 1
 ; VF16UF1-NEXT:    br label %[[PRED_STORE_CONTINUE]]
 ; VF16UF1:       [[PRED_STORE_CONTINUE]]:

>From c22bb78e053484acea47c5f5595518197437ccd4 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Tue, 10 Mar 2026 10:23:04 +0000
Subject: [PATCH 2/2] !fixup address latest comments, thanks

---
 llvm/lib/Transforms/Vectorize/VPlan.cpp       |  14 +--
 llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp | 107 +++++++++---------
 llvm/lib/Transforms/Vectorize/VPlanUtils.h    |   7 ++
 .../VPlan/interleave-and-scalarize-only.ll    |   3 +-
 .../LoopVectorize/cast-induction.ll           |   7 +-
 .../pr45679-fold-tail-by-masking.ll           |  10 +-
 .../tail-folding-vectorization-factor-1.ll    |   5 +-
 7 files changed, 79 insertions(+), 74 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 0ceeb570e8b1f..7dabaabdb81b7 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -689,14 +689,8 @@ void VPBasicBlock::print(raw_ostream &O, const Twine &Indent,
 }
 #endif
 
-static std::pair<VPBlockBase *, VPBlockBase *> cloneFrom(VPBlockBase *Entry);
-
-// Clone the CFG for all nodes reachable from \p Entry, this includes cloning
-// the blocks and their recipes. Operands of cloned recipes will NOT be updated.
-// Remapping of operands must be done separately. Returns a pair with the new
-// entry and exiting blocks of the cloned region. If \p Entry isn't part of a
-// region, return nullptr for the exiting block.
-static std::pair<VPBlockBase *, VPBlockBase *> cloneFrom(VPBlockBase *Entry) {
+std::pair<VPBlockBase *, VPBlockBase *>
+VPBlockUtils::cloneFrom(VPBlockBase *Entry) {
   DenseMap<VPBlockBase *, VPBlockBase *> Old2NewVPBlocks;
   VPBlockBase *Exiting = nullptr;
   bool InRegion = Entry->getParent();
@@ -747,7 +741,7 @@ static std::pair<VPBlockBase *, VPBlockBase *> cloneFrom(VPBlockBase *Entry) {
 }
 
 VPRegionBlock *VPRegionBlock::clone() {
-  const auto &[NewEntry, NewExiting] = cloneFrom(getEntry());
+  const auto &[NewEntry, NewExiting] = VPBlockUtils::cloneFrom(getEntry());
   VPlan &Plan = *getPlan();
   VPRegionBlock *NewRegion =
       isReplicator()
@@ -1186,7 +1180,7 @@ static void remapOperands(VPBlockBase *Entry, VPBlockBase *NewEntry,
 VPlan *VPlan::duplicate() {
   unsigned NumBlocksBeforeCloning = CreatedBlocks.size();
   // Clone blocks.
-  const auto &[NewEntry, __] = cloneFrom(Entry);
+  const auto &[NewEntry, __] = VPBlockUtils::cloneFrom(Entry);
 
   BasicBlock *ScalarHeaderIRBB = getScalarHeader()->getIRBasicBlock();
   VPIRBasicBlock *NewScalarHeader = nullptr;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
index 20e99b8b3377d..d9590b6243689 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
@@ -545,10 +545,9 @@ void VPlanTransforms::unrollByUF(VPlan &Plan, unsigned UF) {
 }
 
 /// Add a start index to \p Steps for \p Lane by combining any existing start
-/// index with the lane offset. \p Builder is used to insert new recipes before
-/// \p Steps.
+/// index with the lane offset.
 static void addStartIndexForLane(VPScalarIVStepsRecipe *Steps, unsigned Lane,
-                                 VPlan &Plan, VPBuilder &Builder) {
+                                 VPlan &Plan) {
   VPTypeAnalysis TypeInfo(Plan);
   Type *BaseIVTy = TypeInfo.inferScalarType(Steps->getOperand(0));
 
@@ -571,8 +570,10 @@ static void addStartIndexForLane(VPScalarIVStepsRecipe *Steps, unsigned Lane,
     Flags = VPIRFlags(VPIRFlags::WrapFlagsTy(false, false));
   }
 
-  if (StartIndex)
+  if (StartIndex) {
+    VPBuilder Builder(Steps);
     LaneOffset = Builder.createNaryOp(AddOp, {StartIndex, LaneOffset}, Flags);
+  }
   Steps->setStartIndex(LaneOffset);
 }
 
@@ -650,8 +651,9 @@ cloneForLane(VPlan &Plan, VPBuilder &Builder, Type *IdxTy,
       // Skip lane 0: an absent start index is implicitly zero.
       unsigned KnownLane = Lane.getKnownLane();
       if (KnownLane != 0) {
-        VPBuilder LaneBuilder(DefR);
-        addStartIndexForLane(Steps, KnownLane, Plan, LaneBuilder);
+        New->insertBefore(DefR);
+        addStartIndexForLane(Steps, KnownLane, Plan);
+        return New;
       }
     }
   }
@@ -739,18 +741,18 @@ void VPlanTransforms::replicateByVF(VPlan &Plan, ElementCount VF) {
 /// VPReplicateRecipes are converted to single-scalar ones, branch-on-mask is
 /// converted into BranchOnCond and extracts are created as needed.
 static void convertRecipesInRegionBlocksToSingleScalar(
-    VPlan &Plan, Type *IdxTy, ElementCount VF,
-    ArrayRef<VPBlockBase *> RegionBlocks) {
-  for (VPBlockBase *VPB : RegionBlocks) {
-    for (VPRecipeBase &NewR : make_early_inc_range(*cast<VPBasicBlock>(VPB))) {
+    VPlan &Plan, Type *IdxTy, ArrayRef<VPBasicBlock *> RegionBlocks) {
+  for (VPBasicBlock *VPB : RegionBlocks) {
+    for (VPRecipeBase &NewR : make_early_inc_range(*VPB)) {
       VPBuilder Builder(&NewR);
+      assert(!match(&NewR, m_ExtractElement(m_VPValue(), m_VPValue())) &&
+             "must not contain extracts before conversion");
       for (const auto &[I, Op] : enumerate(NewR.operands())) {
-        // Skip operands that don't need extraction: scalar VF (no vectors),
-        // values defined in the same block (already scalar), or values that
-        // are already single scalars.
+        // Skip operands that don't need extraction: values defined in the
+        // same block (already scalar), or values that are already single
+        // scalars.
         auto *DefR = Op->getDefiningRecipe();
-        if (VF.isScalar() || (DefR && DefR->getParent() == VPB) ||
-            vputils::isSingleScalar(Op))
+        if ((DefR && DefR->getParent() == VPB) || vputils::isSingleScalar(Op))
           continue;
 
         // Extract the lane from values defined outside the region.
@@ -780,25 +782,22 @@ static void convertRecipesInRegionBlocksToSingleScalar(
 
 /// Process recipes in a single lane's blocks, updating them for lane-specific
 /// operations.
-static void processLaneForReplicateRegion(
-    VPlan &Plan, Type *IdxTy, unsigned Lane,
-    ArrayRef<VPBlockBase *> RegionBlocks,
-    DenseMap<VPBlockBase *, VPBlockBase *> &Old2NewBlocks) {
+static void processLaneForReplicateRegion(VPlan &Plan, Type *IdxTy,
+                                          unsigned Lane,
+                                          ArrayRef<VPBasicBlock *> RegionBlocks,
+                                          ArrayRef<VPBasicBlock *> NewBlocks) {
   DenseMap<VPValue *, VPValue *> Old2NewVPValues;
-  for (VPBlockBase *OldVPB : RegionBlocks) {
-    auto *OldBB = cast<VPBasicBlock>(OldVPB);
-    auto *NewBB = cast<VPBasicBlock>(Old2NewBlocks.lookup(OldVPB));
-    for (const auto &[OldR, NewR] : zip(*OldBB, *NewBB)) {
+  for (const auto &[OldBB, NewBB] : zip_equal(RegionBlocks, NewBlocks)) {
+    for (const auto &[OldR, NewR] : zip_equal(*OldBB, *NewBB)) {
       for (const auto &[OldV, NewV] :
-           zip(OldR.definedValues(), NewR.definedValues()))
+           zip_equal(OldR.definedValues(), NewR.definedValues()))
         Old2NewVPValues[OldV] = NewV;
     }
 
     // Update lane operands and remap operands to use copies for current lane.
     for (VPRecipeBase &NewR : make_early_inc_range(*NewBB)) {
       if (auto *Steps = dyn_cast<VPScalarIVStepsRecipe>(&NewR)) {
-        VPBuilder Builder(Steps);
-        addStartIndexForLane(Steps, Lane, Plan, Builder);
+        addStartIndexForLane(Steps, Lane, Plan);
       } else if (match(&NewR, m_ExtractElement(m_VPValue(), m_ZeroInt()))) {
         NewR.setOperand(1, Plan.getConstantInt(IdxTy, Lane));
       }
@@ -822,6 +821,9 @@ void VPlanTransforms::unrollReplicateRegions(VPlan &Plan, ElementCount VF) {
       ReplicateRegions.push_back(Region);
   }
 
+  if (VF.isScalar())
+    return;
+
   Type *IdxTy = IntegerType::get(Plan.getContext(), 32);
   for (VPRegionBlock *Region : ReplicateRegions) {
     assert(!VF.isScalable() && "cannot replicate across scalable VFs");
@@ -832,6 +834,8 @@ void VPlanTransforms::unrollReplicateRegions(VPlan &Plan, ElementCount VF) {
     if (any_of(*cast<VPBasicBlock>(Exiting), IsaPred<VPPredInstPHIRecipe>))
       continue;
 
+    VPBlockBase *Entry = Region->getEntry();
+
     // Disconnect and dissolve the region.
     VPBlockBase *Pred = Region->getSinglePredecessor();
     assert(Pred && "Replicate region must have a single predecessor");
@@ -840,43 +844,38 @@ void VPlanTransforms::unrollReplicateRegions(VPlan &Plan, ElementCount VF) {
     for (VPBlockBase *Succ : Successors)
       VPBlockUtils::disconnectBlocks(Region, Succ);
 
-    VPBlockBase *Entry = Region->getEntry();
-    SmallVector<VPBlockBase *> RegionBlocks(vp_depth_first_shallow(Entry));
+    SmallVector<SmallVector<VPBasicBlock *>> AllLaneBlocks(
+        VF.getKnownMinValue());
     VPRegionBlock *ParentRegion = Region->getParent();
-    for (VPBlockBase *Block : RegionBlocks)
-      Block->setParent(ParentRegion);
-    VPBlockUtils::connectBlocks(Pred, Entry);
+    for (VPBlockBase *VPB : vp_depth_first_shallow(Entry)) {
+      auto *BB = cast<VPBasicBlock>(VPB);
+      BB->setParent(ParentRegion);
+      AllLaneBlocks[0].push_back(BB);
+    }
 
     // Process lane 0: convert original blocks to single-scalar.
-    convertRecipesInRegionBlocksToSingleScalar(Plan, IdxTy, VF, RegionBlocks);
-    SmallVector<std::pair<VPBlockBase *, VPBlockBase *>> LaneClones;
-    LaneClones.push_back({Entry, Exiting});
+    convertRecipesInRegionBlocksToSingleScalar(Plan, IdxTy, AllLaneBlocks[0]);
 
-    // Clone blocks for remaining lanes.
+    // Clone converted blocks for remaining lanes and process each. All
+    // connections are deferred until after cloning, so cloneFrom doesn't
+    // traverse through connected blocks from previous lanes.
     for (unsigned Lane = 1; Lane < VF.getKnownMinValue(); ++Lane) {
-      DenseMap<VPBlockBase *, VPBlockBase *> Old2NewBlocks;
-      for (VPBlockBase *OrigBlock : RegionBlocks) {
-        VPBlockBase *ClonedBlock = OrigBlock->clone();
-        Old2NewBlocks[OrigBlock] = ClonedBlock;
-        ClonedBlock->setParent(ParentRegion);
+      VPBlockBase *NewEntry = VPBlockUtils::cloneFrom(Entry).first;
+      for (VPBlockBase *VPB : vp_depth_first_shallow(NewEntry)) {
+        auto *BB = cast<VPBasicBlock>(VPB);
+        BB->setParent(ParentRegion);
+        AllLaneBlocks[Lane].push_back(BB);
       }
-      for (VPBlockBase *OrigBlock : RegionBlocks) {
-        if (OrigBlock == Exiting)
-          continue;
-        for (VPBlockBase *OrigSucc : OrigBlock->successors())
-          VPBlockUtils::connectBlocks(Old2NewBlocks[OrigBlock],
-                                      Old2NewBlocks[OrigSucc]);
-      }
-      processLaneForReplicateRegion(Plan, IdxTy, Lane, RegionBlocks,
-                                    Old2NewBlocks);
-      LaneClones.push_back({Old2NewBlocks[Entry], Old2NewBlocks[Exiting]});
+      processLaneForReplicateRegion(Plan, IdxTy, Lane, AllLaneBlocks[0],
+                                    AllLaneBlocks[Lane]);
     }
 
-    // Connect lanes sequentially and connect last lane to successors.
+    // Connect Pred to lane 0, lanes sequentially, and last lane to successors.
+    VPBlockUtils::connectBlocks(Pred, Entry);
     for (unsigned Lane = 1; Lane < VF.getKnownMinValue(); ++Lane)
-      VPBlockUtils::connectBlocks(LaneClones[Lane - 1].second,
-                                  LaneClones[Lane].first);
+      VPBlockUtils::connectBlocks(AllLaneBlocks[Lane - 1].back(),
+                                  AllLaneBlocks[Lane].front());
     for (VPBlockBase *Succ : Successors)
-      VPBlockUtils::connectBlocks(LaneClones.back().second, Succ);
+      VPBlockUtils::connectBlocks(AllLaneBlocks.back().back(), Succ);
   }
 }
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.h b/llvm/lib/Transforms/Vectorize/VPlanUtils.h
index a5692699d9d76..14bab4ca8f1c8 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUtils.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.h
@@ -263,6 +263,13 @@ class VPBlockUtils {
     Old->clearSuccessors();
   }
 
+  /// Clone the CFG for all nodes reachable from \p Entry, including cloning
+  /// the blocks and their recipes. Operands of cloned recipes will NOT be
+  /// updated. Remapping of operands must be done separately. Returns a pair
+  /// with the new entry and exiting blocks of the cloned region. If \p Entry
+  /// isn't part of a region, return nullptr for the exiting block.
+  static std::pair<VPBlockBase *, VPBlockBase *> cloneFrom(VPBlockBase *Entry);
+
   /// Return an iterator range over \p Range which only includes \p BlockTy
   /// blocks. The accesses are casted to \p BlockTy.
   template <typename BlockTy, typename T>
diff --git a/llvm/test/Transforms/LoopVectorize/VPlan/interleave-and-scalarize-only.ll b/llvm/test/Transforms/LoopVectorize/VPlan/interleave-and-scalarize-only.ll
index 0a17319c04d35..a641e7ef54de7 100644
--- a/llvm/test/Transforms/LoopVectorize/VPlan/interleave-and-scalarize-only.ll
+++ b/llvm/test/Transforms/LoopVectorize/VPlan/interleave-and-scalarize-only.ll
@@ -136,8 +136,9 @@ define void @test_scalarize_with_branch_cond(ptr %src, ptr %dst) {
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %pred.store.continue4 ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = trunc i64 [[INDEX]] to i1
 ; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = sub i1 false, [[TMP0]]
+; CHECK-NEXT:    [[INDUCTION2:%.*]] = add i1 [[OFFSET_IDX]], false
 ; CHECK-NEXT:    [[INDUCTION3:%.*]] = add i1 [[OFFSET_IDX]], true
-; CHECK-NEXT:    br i1 [[OFFSET_IDX]], label %pred.store.if, label %pred.store.continue
+; CHECK-NEXT:    br i1 [[INDUCTION2]], label %pred.store.if, label %pred.store.continue
 ; CHECK:       pred.store.if:
 ; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr %src, i64 [[INDEX]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
diff --git a/llvm/test/Transforms/LoopVectorize/cast-induction.ll b/llvm/test/Transforms/LoopVectorize/cast-induction.ll
index e5069d97f3117..e93b84c2f5603 100644
--- a/llvm/test/Transforms/LoopVectorize/cast-induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/cast-induction.ll
@@ -273,14 +273,15 @@ define void @cast_induction_tail_folding(ptr %A) {
 ; IC2-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; IC2:       [[VECTOR_BODY]]:
 ; IC2-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE2:.*]] ]
+; IC2-NEXT:    [[TMP0:%.*]] = add i32 [[INDEX]], 0
 ; IC2-NEXT:    [[INDEX1:%.*]] = add i32 [[INDEX]], 1
-; IC2-NEXT:    [[TMP2:%.*]] = icmp ule i32 [[INDEX]], 2
+; IC2-NEXT:    [[TMP2:%.*]] = icmp ule i32 [[TMP0]], 2
 ; IC2-NEXT:    [[TMP3:%.*]] = icmp ule i32 [[INDEX1]], 2
 ; IC2-NEXT:    br i1 [[TMP2]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
 ; IC2:       [[PRED_STORE_IF]]:
-; IC2-NEXT:    [[TMP4:%.*]] = sext i32 [[INDEX]] to i64
+; IC2-NEXT:    [[TMP4:%.*]] = sext i32 [[TMP0]] to i64
 ; IC2-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP4]]
-; IC2-NEXT:    store i32 [[INDEX]], ptr [[TMP5]], align 4
+; IC2-NEXT:    store i32 [[TMP0]], ptr [[TMP5]], align 4
 ; IC2-NEXT:    br label %[[PRED_STORE_CONTINUE]]
 ; IC2:       [[PRED_STORE_CONTINUE]]:
 ; IC2-NEXT:    br i1 [[TMP3]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2]]
diff --git a/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll b/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll
index c07134229322e..610da05dbbab7 100644
--- a/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll
+++ b/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll
@@ -118,16 +118,17 @@ define void @pr45679(ptr %A) {
 ; VF1UF4-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; VF1UF4:       vector.body:
 ; VF1UF4-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ]
+; VF1UF4-NEXT:    [[TMP0:%.*]] = add i32 [[INDEX]], 0
 ; VF1UF4-NEXT:    [[TMP1:%.*]] = add i32 [[INDEX]], 1
 ; VF1UF4-NEXT:    [[TMP2:%.*]] = add i32 [[INDEX]], 2
 ; VF1UF4-NEXT:    [[TMP3:%.*]] = add i32 [[INDEX]], 3
-; VF1UF4-NEXT:    [[TMP4:%.*]] = icmp ule i32 [[INDEX]], 13
+; VF1UF4-NEXT:    [[TMP4:%.*]] = icmp ule i32 [[TMP0]], 13
 ; VF1UF4-NEXT:    [[TMP5:%.*]] = icmp ule i32 [[TMP1]], 13
 ; VF1UF4-NEXT:    [[TMP6:%.*]] = icmp ule i32 [[TMP2]], 13
 ; VF1UF4-NEXT:    [[TMP7:%.*]] = icmp ule i32 [[TMP3]], 13
 ; VF1UF4-NEXT:    br i1 [[TMP4]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; VF1UF4:       pred.store.if:
-; VF1UF4-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[INDEX]]
+; VF1UF4-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[TMP0]]
 ; VF1UF4-NEXT:    store i32 13, ptr [[TMP8]], align 1
 ; VF1UF4-NEXT:    br label [[PRED_STORE_CONTINUE]]
 ; VF1UF4:       pred.store.continue:
@@ -288,16 +289,17 @@ define void @load_variant(ptr noalias %a, ptr noalias %b) {
 ; VF1UF4-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; VF1UF4:       vector.body:
 ; VF1UF4-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ]
+; VF1UF4-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
 ; VF1UF4-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 1
 ; VF1UF4-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 2
 ; VF1UF4-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 3
-; VF1UF4-NEXT:    [[TMP4:%.*]] = icmp ule i64 [[INDEX]], 13
+; VF1UF4-NEXT:    [[TMP4:%.*]] = icmp ule i64 [[TMP0]], 13
 ; VF1UF4-NEXT:    [[TMP5:%.*]] = icmp ule i64 [[TMP1]], 13
 ; VF1UF4-NEXT:    [[TMP6:%.*]] = icmp ule i64 [[TMP2]], 13
 ; VF1UF4-NEXT:    [[TMP7:%.*]] = icmp ule i64 [[TMP3]], 13
 ; VF1UF4-NEXT:    br i1 [[TMP4]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; VF1UF4:       pred.store.if:
-; VF1UF4-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]]
+; VF1UF4-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]]
 ; VF1UF4-NEXT:    [[TMP9:%.*]] = load i64, ptr [[TMP8]], align 8
 ; VF1UF4-NEXT:    store i64 [[TMP9]], ptr [[B:%.*]], align 8
 ; VF1UF4-NEXT:    br label [[PRED_STORE_CONTINUE]]
diff --git a/llvm/test/Transforms/LoopVectorize/tail-folding-vectorization-factor-1.ll b/llvm/test/Transforms/LoopVectorize/tail-folding-vectorization-factor-1.ll
index 57e0e9b63b8de..c6bcd708c0fce 100644
--- a/llvm/test/Transforms/LoopVectorize/tail-folding-vectorization-factor-1.ll
+++ b/llvm/test/Transforms/LoopVectorize/tail-folding-vectorization-factor-1.ll
@@ -17,16 +17,17 @@ define void @VF1-VPlanExe(ptr %dst) {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 1
 ; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 2
 ; CHECK-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 3
-; CHECK-NEXT:    [[TMP4:%.*]] = icmp ule i64 [[INDEX]], 14
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp ule i64 [[TMP0]], 14
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp ule i64 [[TMP1]], 14
 ; CHECK-NEXT:    [[TMP6:%.*]] = icmp ule i64 [[TMP2]], 14
 ; CHECK-NEXT:    [[TMP7:%.*]] = icmp ule i64 [[TMP3]], 14
 ; CHECK-NEXT:    br i1 [[TMP4]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; CHECK:       pred.store.if:
-; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 [[TMP0]]
 ; CHECK-NEXT:    store i32 0, ptr [[TMP8]], align 4
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE]]
 ; CHECK:       pred.store.continue:



More information about the llvm-commits mailing list