[llvm-branch-commits] [llvm] e251f57 - Revert "[VPlan] Explicitly unroll replicate-regions without live-outs by VF. …"
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Thu Mar 26 15:52:29 PDT 2026
Author: Florian Hahn
Date: 2026-03-26T22:52:25Z
New Revision: e251f57b911cdf395850c2f35dc474e9e68cab96
URL: https://github.com/llvm/llvm-project/commit/e251f57b911cdf395850c2f35dc474e9e68cab96
DIFF: https://github.com/llvm/llvm-project/commit/e251f57b911cdf395850c2f35dc474e9e68cab96.diff
LOG: Revert "[VPlan] Explicitly unroll replicate-regions without live-outs by VF. …"
This reverts commit cb1661b0460627bac4407fda129febefbf3e738a.
Added:
Modified:
llvm/lib/Transforms/Vectorize/VPlan.h
llvm/lib/Transforms/Vectorize/VPlanTransforms.h
llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
llvm/test/Transforms/LoopVectorize/AArch64/blend-costs.ll
llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll
llvm/test/Transforms/LoopVectorize/AArch64/fold-tail-low-trip-count.ll
llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll
llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll
llvm/test/Transforms/LoopVectorize/AArch64/masked-call-scalarize.ll
llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll
llvm/test/Transforms/LoopVectorize/AArch64/predicated-costs.ll
llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-fold-tail.ll
llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-multi-block.ll
llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-remove-loop-region.ll
llvm/test/Transforms/LoopVectorize/SystemZ/force-target-instruction-cost.ll
llvm/test/Transforms/LoopVectorize/VPlan/interleave-and-scalarize-only.ll
llvm/test/Transforms/LoopVectorize/VPlan/vplan-predicate-switch.ll
llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll
llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll
llvm/test/Transforms/LoopVectorize/X86/cost-conditional-branches.ll
llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll
llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll
llvm/test/Transforms/LoopVectorize/X86/pr109581-unused-blend.ll
llvm/test/Transforms/LoopVectorize/X86/pr55096-scalarize-add.ll
llvm/test/Transforms/LoopVectorize/X86/small-size.ll
llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll
llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-store-accesses-with-gaps.ll
llvm/test/Transforms/LoopVectorize/as_cast.ll
llvm/test/Transforms/LoopVectorize/cast-induction.ll
llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll
llvm/test/Transforms/LoopVectorize/constant-fold-commutative-and.ll
llvm/test/Transforms/LoopVectorize/constantfolder.ll
llvm/test/Transforms/LoopVectorize/cse-casts.ll
llvm/test/Transforms/LoopVectorize/debugloc.ll
llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-divisible-TC.ll
llvm/test/Transforms/LoopVectorize/find-last-iv-sinkable-load.ll
llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll
llvm/test/Transforms/LoopVectorize/float-induction.ll
llvm/test/Transforms/LoopVectorize/hoist-and-sink-mem-ops-with-invariant-pointers.ll
llvm/test/Transforms/LoopVectorize/hoist-predicated-loads-with-predicated-stores.ll
llvm/test/Transforms/LoopVectorize/if-pred-stores.ll
llvm/test/Transforms/LoopVectorize/interleaved-accesses-pred-stores.ll
llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll
llvm/test/Transforms/LoopVectorize/loop-form.ll
llvm/test/Transforms/LoopVectorize/memdep-fold-tail.ll
llvm/test/Transforms/LoopVectorize/pointer-induction.ll
llvm/test/Transforms/LoopVectorize/pr37248.ll
llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll
llvm/test/Transforms/LoopVectorize/predicate-switch.ll
llvm/test/Transforms/LoopVectorize/select-cmp-multiuse.ll
llvm/test/Transforms/LoopVectorize/struct-return-replicate.ll
llvm/test/Transforms/LoopVectorize/struct-return.ll
llvm/test/Transforms/LoopVectorize/tail-folding-alloca-in-loop.ll
llvm/test/Transforms/LoopVectorize/tail-folding-div.ll
llvm/test/Transforms/LoopVectorize/tail-folding-masked-mem-opts.ll
llvm/test/Transforms/LoopVectorize/tail-folding-optimize-vector-induction-width.ll
llvm/test/Transforms/LoopVectorize/tail-folding-switch.ll
llvm/test/Transforms/LoopVectorize/trip-count-expansion-may-introduce-ub.ll
llvm/test/Transforms/LoopVectorize/use-scalar-epilogue-if-tp-fails.ll
llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 92b7a2c3c6bac..9ed9d07151d7f 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -3198,7 +3198,7 @@ class LLVM_ABI_FOR_TEST VPReductionEVLRecipe : public VPReductionRecipe {
/// VPReplicateRecipe replicates a given instruction producing multiple scalar
/// copies of the original scalar type, one per lane, instead of producing a
/// single copy of widened type for all lanes. If the instruction is known to be
-/// a single scalar, only one copy will be generated.
+/// a single scalar, only one copy, per lane zero, will be generated.
class LLVM_ABI_FOR_TEST VPReplicateRecipe : public VPRecipeWithIRFlags,
public VPIRMetadata {
/// Indicator if only a single replica per lane is needed.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index 09abfd5366aa6..45bb169b3bb70 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -230,11 +230,10 @@ struct VPlanTransforms {
/// Explicitly unroll \p Plan by \p UF.
static void unrollByUF(VPlan &Plan, unsigned UF);
- /// Replace replicating VPReplicateRecipe, VPScalarIVStepsRecipe and
- /// VPInstruction in \p Plan with \p VF single-scalar recipes. Replicate
- /// regions are dissolved by replicating their blocks and their recipes \p VF
- /// times.
- /// TODO: Also dissolve replicate regions with live outs.
+ /// Replace each replicating VPReplicateRecipe and VPInstruction outside of
+ /// any replicate region in \p Plan with \p VF single-scalar recipes.
+ /// TODO: Also replicate VPScalarIVSteps and VPReplicateRecipes inside
+ /// replicate regions, thereby dissolving the latter.
static void replicateByVF(VPlan &Plan, ElementCount VF);
/// Optimize \p Plan based on \p BestVF and \p BestUF. This may restrict the
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
index c0404c92ad641..881f5c341b225 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
@@ -660,160 +660,6 @@ cloneForLane(VPlan &Plan, VPBuilder &Builder, Type *IdxTy,
return New;
}
-/// Convert recipes in region blocks to operate on a single lane 0.
-/// VPReplicateRecipes are converted to single-scalar ones, branch-on-mask is
-/// converted into BranchOnCond and extracts are created as needed.
-static void convertRecipesInRegionBlocksToSingleScalar(VPlan &Plan, Type *IdxTy,
- VPBlockBase *Entry) {
- VPValue *Idx0 = Plan.getZero(IdxTy);
- for (VPBlockBase *VPB : vp_depth_first_shallow(Entry)) {
- for (VPRecipeBase &OldR : make_early_inc_range(cast<VPBasicBlock>(*VPB))) {
- VPBuilder Builder(&OldR);
- assert(!match(&OldR, m_ExtractElement(m_VPValue(), m_VPValue())) &&
- "must not contain extracts before conversion");
- for (const auto &[I, Op] : enumerate(OldR.operands())) {
- // Skip operands that don't need extraction: values defined in the
- // same block (already scalar), or values that are already single
- // scalars.
- auto *DefR = Op->getDefiningRecipe();
- if ((DefR && DefR->getParent() == VPB) || vputils::isSingleScalar(Op))
- continue;
-
- // Extract lane zero from values defined outside the region.
- VPValue *Extract = Builder.createNaryOp(Instruction::ExtractElement,
- {Op, Idx0}, OldR.getDebugLoc());
- OldR.setOperand(I, Extract);
- }
-
- if (auto *RepR = dyn_cast<VPReplicateRecipe>(&OldR)) {
- auto *NewR =
- new VPReplicateRecipe(RepR->getUnderlyingInstr(), RepR->operands(),
- /* IsSingleScalar=*/true, /*Mask=*/nullptr,
- *RepR, *RepR, RepR->getDebugLoc());
- NewR->insertBefore(RepR);
- RepR->replaceAllUsesWith(NewR);
- RepR->eraseFromParent();
- } else if (auto *BranchOnMask = dyn_cast<VPBranchOnMaskRecipe>(&OldR)) {
- Builder.createNaryOp(VPInstruction::BranchOnCond,
- {BranchOnMask->getOperand(0)},
- BranchOnMask->getDebugLoc());
- BranchOnMask->eraseFromParent();
- } else {
- assert((isa<VPScalarIVStepsRecipe>(OldR) ||
- (isa<VPInstruction>(OldR) &&
- vputils::isSingleScalar(OldR.getVPSingleValue()))) &&
- "unexpected unhandled recipe");
- }
- }
- }
-}
-
-/// Update recipes in the cloned blocks rooted at \p NewEntry to match \p Lane,
-/// using the original blocks rooted at \p OldEntry as reference.
-static void processLaneForReplicateRegion(VPlan &Plan, Type *IdxTy,
- unsigned Lane, VPBasicBlock *OldEntry,
- VPBasicBlock *NewEntry) {
- DenseMap<VPValue *, VPValue *> Old2NewVPValues;
- VPValue *IdxLane = Plan.getConstantInt(IdxTy, Lane);
- for (const auto &[OldBB, NewBB] :
- zip_equal(vp_depth_first_shallow(OldEntry),
- vp_depth_first_shallow(NewEntry))) {
- for (auto &&[OldR, NewR] :
- zip_equal(*cast<VPBasicBlock>(OldBB), *cast<VPBasicBlock>(NewBB))) {
- for (const auto &[OldV, NewV] :
- zip_equal(OldR.definedValues(), NewR.definedValues()))
- Old2NewVPValues[OldV] = NewV;
-
- if (auto *Steps = dyn_cast<VPScalarIVStepsRecipe>(&NewR))
- addLaneToStartIndex(Steps, Lane, Plan, Steps);
- else if (match(&NewR, m_ExtractElement(m_VPValue(), m_ZeroInt())))
- NewR.setOperand(1, IdxLane);
-
- // Remap operands to use lane-specific values.
- for (const auto &[I, OldOp] : enumerate(NewR.operands())) {
- // Use cloned value if operand was defined in the region.
- if (auto *NewOp = Old2NewVPValues.lookup(OldOp))
- NewR.setOperand(I, NewOp);
- }
- }
- }
-}
-
-/// Dissolve a single replicate region by replicating its blocks for each lane
-/// of \p VF. The region is disconnected, its blocks are reparented, cloned for
-/// each lane, and reconnected in sequence.
-static void dissolveReplicateRegion(VPRegionBlock *Region, ElementCount VF,
- VPlan &Plan, Type *IdxTy) {
- VPBlockBase *FirstLaneEntry = Region->getEntry();
- VPBlockBase *FirstLaneExiting = Region->getExiting();
-
- // Disconnect and dissolve the region.
- VPBlockBase *Predecessor = Region->getSinglePredecessor();
- assert(Predecessor && "Replicate region must have a single predecessor");
- VPBlockBase *Successor = Region->getSingleSuccessor();
- assert(Successor && "Replicate region must have a single successor");
- VPBlockUtils::disconnectBlocks(Predecessor, Region);
- VPBlockUtils::disconnectBlocks(Region, Successor);
-
- VPRegionBlock *ParentRegion = Region->getParent();
- for (VPBlockBase *VPB : vp_depth_first_shallow(FirstLaneEntry))
- VPB->setParent(ParentRegion);
-
- // Process the original blocks for lane 0: converting their recipes to
- // single-scalar.
- convertRecipesInRegionBlocksToSingleScalar(Plan, IdxTy, FirstLaneEntry);
-
- // Clone converted blocks for remaining lanes and process each in reverse
- // order, connecting each lane's Exiting block to the subsequent lane's entry.
- VPBlockBase *NextLaneEntry = Successor;
- unsigned NumLanes = VF.getFixedValue();
- for (int Lane = NumLanes - 1; Lane > 0; --Lane) {
- VPBlockBase *CurrentLaneEntry =
- VPBlockUtils::cloneFrom(FirstLaneEntry).first;
- VPBlockBase *CurrentLaneExiting;
- for (VPBlockBase *VPB : vp_depth_first_shallow(CurrentLaneEntry)) {
- VPB->setParent(ParentRegion);
- CurrentLaneExiting = VPB;
- }
- processLaneForReplicateRegion(Plan, IdxTy, Lane,
- cast<VPBasicBlock>(FirstLaneEntry),
- cast<VPBasicBlock>(CurrentLaneEntry));
- VPBlockUtils::connectBlocks(CurrentLaneExiting, NextLaneEntry);
- NextLaneEntry = CurrentLaneEntry;
- }
-
- // Connect Predecessor to FirstLaneEntry, and FirstLaneRegionExit to
- // NextLaneEntry which is the second lane region entry. The latter is
- // done last so that earlier clonings from FirstLaneEntry stop at
- // FirstLaneExiting.
- VPBlockUtils::connectBlocks(Predecessor, FirstLaneEntry);
- VPBlockUtils::connectBlocks(FirstLaneExiting, NextLaneEntry);
-}
-
-/// Collect and dissolve all replicate regions in the vector loop, replicating
-/// their blocks and recipes for each lane of \p VF.
-static void replicateReplicateRegionsByVF(VPlan &Plan, ElementCount VF,
- Type *IdxTy) {
- // Collect all replicate regions before modifying the CFG.
- SmallVector<VPRegionBlock *> ReplicateRegions;
- for (VPRegionBlock *Region : VPBlockUtils::blocksOnly<VPRegionBlock>(
- vp_depth_first_shallow(Plan.getVectorLoopRegion()->getEntry()))) {
- // Skip regions with live-outs as packing scalar results back into vectors
- // is not yet implemented.
- if (Region->isReplicator() && Region->getExitingBasicBlock()->empty())
- ReplicateRegions.push_back(Region);
- }
-
- assert((ReplicateRegions.empty() || !VF.isScalable()) &&
- "cannot replicate across scalable VFs");
-
- // Dissolve replicate regions by replicating their blocks for each lane.
- for (VPRegionBlock *Region : ReplicateRegions)
- dissolveReplicateRegion(Region, VF, Plan, IdxTy);
-
- VPlanTransforms::mergeBlocksIntoPredecessors(Plan);
-}
-
void VPlanTransforms::replicateByVF(VPlan &Plan, ElementCount VF) {
if (Plan.hasScalarVFOnly())
return;
@@ -887,6 +733,4 @@ void VPlanTransforms::replicateByVF(VPlan &Plan, ElementCount VF) {
}
for (auto *R : reverse(ToRemove))
R->eraseFromParent();
-
- replicateReplicateRegionsByVF(Plan, VF, IdxTy);
}
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/blend-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/blend-costs.ll
index 2364dbe36cdc1..898685e7ddbd1 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/blend-costs.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/blend-costs.ll
@@ -225,7 +225,8 @@ define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <16 x i1> [[TMP7]], i32 0
; CHECK-NEXT: br i1 [[TMP8]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; CHECK: [[PRED_STORE_IF]]:
-; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[IV]]
+; CHECK-NEXT: [[TMP72:%.*]] = add i32 [[IV]], 0
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP72]]
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 0
; CHECK-NEXT: store i8 [[TMP10]], ptr [[TMP9]], align 1
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll
index cec944ee831da..e5efddaac9d1a 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll
@@ -296,7 +296,8 @@ define void @latch_branch_cost(ptr %dst) {
; PRED-NEXT: [[TMP1:%.*]] = extractelement <8 x i1> [[TMP0]], i32 0
; PRED-NEXT: br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; PRED: [[PRED_STORE_IF]]:
-; PRED-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]]
+; PRED-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0
+; PRED-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP2]]
; PRED-NEXT: store i8 0, ptr [[TMP3]], align 1
; PRED-NEXT: br label %[[PRED_STORE_CONTINUE]]
; PRED: [[PRED_STORE_CONTINUE]]:
@@ -752,11 +753,14 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
; DEFAULT: [[PRED_STORE_IF]]:
; DEFAULT-NEXT: [[TMP25:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 0
; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP25]], align 4
-; DEFAULT-NEXT: [[TMP27:%.*]] = getelementptr i8, ptr [[TMP25]], i64 4
+; DEFAULT-NEXT: [[TMP26:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 0
+; DEFAULT-NEXT: [[TMP27:%.*]] = getelementptr i8, ptr [[TMP26]], i64 4
; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP27]], align 4
-; DEFAULT-NEXT: [[TMP29:%.*]] = getelementptr i8, ptr [[TMP25]], i64 8
+; DEFAULT-NEXT: [[TMP28:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 0
+; DEFAULT-NEXT: [[TMP29:%.*]] = getelementptr i8, ptr [[TMP28]], i64 8
; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP29]], align 4
-; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP25]], align 4
+; DEFAULT-NEXT: [[TMP30:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 0
+; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP30]], align 4
; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE]]
; DEFAULT: [[PRED_STORE_CONTINUE]]:
; DEFAULT-NEXT: [[TMP31:%.*]] = extractelement <8 x i1> [[TMP22]], i32 1
@@ -764,11 +768,14 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
; DEFAULT: [[PRED_STORE_IF14]]:
; DEFAULT-NEXT: [[TMP32:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 1
; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP32]], align 4
-; DEFAULT-NEXT: [[TMP34:%.*]] = getelementptr i8, ptr [[TMP32]], i64 4
+; DEFAULT-NEXT: [[TMP33:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 1
+; DEFAULT-NEXT: [[TMP34:%.*]] = getelementptr i8, ptr [[TMP33]], i64 4
; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP34]], align 4
-; DEFAULT-NEXT: [[TMP36:%.*]] = getelementptr i8, ptr [[TMP32]], i64 8
+; DEFAULT-NEXT: [[TMP35:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 1
+; DEFAULT-NEXT: [[TMP36:%.*]] = getelementptr i8, ptr [[TMP35]], i64 8
; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP36]], align 4
-; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP32]], align 4
+; DEFAULT-NEXT: [[TMP37:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 1
+; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP37]], align 4
; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE15]]
; DEFAULT: [[PRED_STORE_CONTINUE15]]:
; DEFAULT-NEXT: [[TMP38:%.*]] = extractelement <8 x i1> [[TMP22]], i32 2
@@ -776,11 +783,14 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
; DEFAULT: [[PRED_STORE_IF16]]:
; DEFAULT-NEXT: [[TMP39:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 2
; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP39]], align 4
-; DEFAULT-NEXT: [[TMP41:%.*]] = getelementptr i8, ptr [[TMP39]], i64 4
+; DEFAULT-NEXT: [[TMP40:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 2
+; DEFAULT-NEXT: [[TMP41:%.*]] = getelementptr i8, ptr [[TMP40]], i64 4
; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP41]], align 4
-; DEFAULT-NEXT: [[TMP43:%.*]] = getelementptr i8, ptr [[TMP39]], i64 8
+; DEFAULT-NEXT: [[TMP42:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 2
+; DEFAULT-NEXT: [[TMP43:%.*]] = getelementptr i8, ptr [[TMP42]], i64 8
; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP43]], align 4
-; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP39]], align 4
+; DEFAULT-NEXT: [[TMP44:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 2
+; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP44]], align 4
; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE17]]
; DEFAULT: [[PRED_STORE_CONTINUE17]]:
; DEFAULT-NEXT: [[TMP45:%.*]] = extractelement <8 x i1> [[TMP22]], i32 3
@@ -788,11 +798,14 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
; DEFAULT: [[PRED_STORE_IF18]]:
; DEFAULT-NEXT: [[TMP46:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 3
; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP46]], align 4
-; DEFAULT-NEXT: [[TMP48:%.*]] = getelementptr i8, ptr [[TMP46]], i64 4
+; DEFAULT-NEXT: [[TMP47:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 3
+; DEFAULT-NEXT: [[TMP48:%.*]] = getelementptr i8, ptr [[TMP47]], i64 4
; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP48]], align 4
-; DEFAULT-NEXT: [[TMP50:%.*]] = getelementptr i8, ptr [[TMP46]], i64 8
+; DEFAULT-NEXT: [[TMP49:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 3
+; DEFAULT-NEXT: [[TMP50:%.*]] = getelementptr i8, ptr [[TMP49]], i64 8
; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP50]], align 4
-; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP46]], align 4
+; DEFAULT-NEXT: [[TMP51:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 3
+; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP51]], align 4
; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE19]]
; DEFAULT: [[PRED_STORE_CONTINUE19]]:
; DEFAULT-NEXT: [[TMP52:%.*]] = extractelement <8 x i1> [[TMP22]], i32 4
@@ -800,11 +813,14 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
; DEFAULT: [[PRED_STORE_IF20]]:
; DEFAULT-NEXT: [[TMP53:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 4
; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP53]], align 4
-; DEFAULT-NEXT: [[TMP55:%.*]] = getelementptr i8, ptr [[TMP53]], i64 4
+; DEFAULT-NEXT: [[TMP54:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 4
+; DEFAULT-NEXT: [[TMP55:%.*]] = getelementptr i8, ptr [[TMP54]], i64 4
; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP55]], align 4
-; DEFAULT-NEXT: [[TMP57:%.*]] = getelementptr i8, ptr [[TMP53]], i64 8
+; DEFAULT-NEXT: [[TMP56:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 4
+; DEFAULT-NEXT: [[TMP57:%.*]] = getelementptr i8, ptr [[TMP56]], i64 8
; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP57]], align 4
-; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP53]], align 4
+; DEFAULT-NEXT: [[TMP58:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 4
+; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP58]], align 4
; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE21]]
; DEFAULT: [[PRED_STORE_CONTINUE21]]:
; DEFAULT-NEXT: [[TMP59:%.*]] = extractelement <8 x i1> [[TMP22]], i32 5
@@ -812,11 +828,14 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
; DEFAULT: [[PRED_STORE_IF22]]:
; DEFAULT-NEXT: [[TMP60:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 5
; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP60]], align 4
-; DEFAULT-NEXT: [[TMP62:%.*]] = getelementptr i8, ptr [[TMP60]], i64 4
+; DEFAULT-NEXT: [[TMP61:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 5
+; DEFAULT-NEXT: [[TMP62:%.*]] = getelementptr i8, ptr [[TMP61]], i64 4
; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP62]], align 4
-; DEFAULT-NEXT: [[TMP64:%.*]] = getelementptr i8, ptr [[TMP60]], i64 8
+; DEFAULT-NEXT: [[TMP63:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 5
+; DEFAULT-NEXT: [[TMP64:%.*]] = getelementptr i8, ptr [[TMP63]], i64 8
; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP64]], align 4
-; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP60]], align 4
+; DEFAULT-NEXT: [[TMP65:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 5
+; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP65]], align 4
; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE23]]
; DEFAULT: [[PRED_STORE_CONTINUE23]]:
; DEFAULT-NEXT: [[TMP66:%.*]] = extractelement <8 x i1> [[TMP22]], i32 6
@@ -824,11 +843,14 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
; DEFAULT: [[PRED_STORE_IF24]]:
; DEFAULT-NEXT: [[TMP67:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 6
; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP67]], align 4
-; DEFAULT-NEXT: [[TMP69:%.*]] = getelementptr i8, ptr [[TMP67]], i64 4
+; DEFAULT-NEXT: [[TMP68:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 6
+; DEFAULT-NEXT: [[TMP69:%.*]] = getelementptr i8, ptr [[TMP68]], i64 4
; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP69]], align 4
-; DEFAULT-NEXT: [[TMP71:%.*]] = getelementptr i8, ptr [[TMP67]], i64 8
+; DEFAULT-NEXT: [[TMP70:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 6
+; DEFAULT-NEXT: [[TMP71:%.*]] = getelementptr i8, ptr [[TMP70]], i64 8
; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP71]], align 4
-; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP67]], align 4
+; DEFAULT-NEXT: [[TMP72:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 6
+; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP72]], align 4
; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE25]]
; DEFAULT: [[PRED_STORE_CONTINUE25]]:
; DEFAULT-NEXT: [[TMP73:%.*]] = extractelement <8 x i1> [[TMP22]], i32 7
@@ -836,11 +858,14 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
; DEFAULT: [[PRED_STORE_IF26]]:
; DEFAULT-NEXT: [[TMP74:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 7
; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP74]], align 4
-; DEFAULT-NEXT: [[TMP76:%.*]] = getelementptr i8, ptr [[TMP74]], i64 4
+; DEFAULT-NEXT: [[TMP75:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 7
+; DEFAULT-NEXT: [[TMP76:%.*]] = getelementptr i8, ptr [[TMP75]], i64 4
; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP76]], align 4
-; DEFAULT-NEXT: [[TMP78:%.*]] = getelementptr i8, ptr [[TMP74]], i64 8
+; DEFAULT-NEXT: [[TMP77:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 7
+; DEFAULT-NEXT: [[TMP78:%.*]] = getelementptr i8, ptr [[TMP77]], i64 8
; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP78]], align 4
-; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP74]], align 4
+; DEFAULT-NEXT: [[TMP79:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 7
+; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP79]], align 4
; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE27]]
; DEFAULT: [[PRED_STORE_CONTINUE27]]:
; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
@@ -911,11 +936,14 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
; PRED: [[PRED_STORE_IF]]:
; PRED-NEXT: [[TMP29:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 0
; PRED-NEXT: store float 0.000000e+00, ptr [[TMP29]], align 4
-; PRED-NEXT: [[TMP31:%.*]] = getelementptr i8, ptr [[TMP29]], i64 4
+; PRED-NEXT: [[TMP30:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 0
+; PRED-NEXT: [[TMP31:%.*]] = getelementptr i8, ptr [[TMP30]], i64 4
; PRED-NEXT: store float 0.000000e+00, ptr [[TMP31]], align 4
-; PRED-NEXT: [[TMP33:%.*]] = getelementptr i8, ptr [[TMP29]], i64 8
+; PRED-NEXT: [[TMP32:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 0
+; PRED-NEXT: [[TMP33:%.*]] = getelementptr i8, ptr [[TMP32]], i64 8
; PRED-NEXT: store float 0.000000e+00, ptr [[TMP33]], align 4
-; PRED-NEXT: store float 0.000000e+00, ptr [[TMP29]], align 4
+; PRED-NEXT: [[TMP34:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 0
+; PRED-NEXT: store float 0.000000e+00, ptr [[TMP34]], align 4
; PRED-NEXT: br label %[[PRED_STORE_CONTINUE]]
; PRED: [[PRED_STORE_CONTINUE]]:
; PRED-NEXT: [[TMP35:%.*]] = extractelement <8 x i1> [[TMP26]], i32 1
@@ -923,11 +951,14 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
; PRED: [[PRED_STORE_IF14]]:
; PRED-NEXT: [[TMP36:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 1
; PRED-NEXT: store float 0.000000e+00, ptr [[TMP36]], align 4
-; PRED-NEXT: [[TMP38:%.*]] = getelementptr i8, ptr [[TMP36]], i64 4
+; PRED-NEXT: [[TMP37:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 1
+; PRED-NEXT: [[TMP38:%.*]] = getelementptr i8, ptr [[TMP37]], i64 4
; PRED-NEXT: store float 0.000000e+00, ptr [[TMP38]], align 4
-; PRED-NEXT: [[TMP40:%.*]] = getelementptr i8, ptr [[TMP36]], i64 8
+; PRED-NEXT: [[TMP39:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 1
+; PRED-NEXT: [[TMP40:%.*]] = getelementptr i8, ptr [[TMP39]], i64 8
; PRED-NEXT: store float 0.000000e+00, ptr [[TMP40]], align 4
-; PRED-NEXT: store float 0.000000e+00, ptr [[TMP36]], align 4
+; PRED-NEXT: [[TMP41:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 1
+; PRED-NEXT: store float 0.000000e+00, ptr [[TMP41]], align 4
; PRED-NEXT: br label %[[PRED_STORE_CONTINUE15]]
; PRED: [[PRED_STORE_CONTINUE15]]:
; PRED-NEXT: [[TMP42:%.*]] = extractelement <8 x i1> [[TMP26]], i32 2
@@ -935,11 +966,14 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
; PRED: [[PRED_STORE_IF16]]:
; PRED-NEXT: [[TMP43:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 2
; PRED-NEXT: store float 0.000000e+00, ptr [[TMP43]], align 4
-; PRED-NEXT: [[TMP45:%.*]] = getelementptr i8, ptr [[TMP43]], i64 4
+; PRED-NEXT: [[TMP44:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 2
+; PRED-NEXT: [[TMP45:%.*]] = getelementptr i8, ptr [[TMP44]], i64 4
; PRED-NEXT: store float 0.000000e+00, ptr [[TMP45]], align 4
-; PRED-NEXT: [[TMP47:%.*]] = getelementptr i8, ptr [[TMP43]], i64 8
+; PRED-NEXT: [[TMP46:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 2
+; PRED-NEXT: [[TMP47:%.*]] = getelementptr i8, ptr [[TMP46]], i64 8
; PRED-NEXT: store float 0.000000e+00, ptr [[TMP47]], align 4
-; PRED-NEXT: store float 0.000000e+00, ptr [[TMP43]], align 4
+; PRED-NEXT: [[TMP48:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 2
+; PRED-NEXT: store float 0.000000e+00, ptr [[TMP48]], align 4
; PRED-NEXT: br label %[[PRED_STORE_CONTINUE17]]
; PRED: [[PRED_STORE_CONTINUE17]]:
; PRED-NEXT: [[TMP49:%.*]] = extractelement <8 x i1> [[TMP26]], i32 3
@@ -947,11 +981,14 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
; PRED: [[PRED_STORE_IF18]]:
; PRED-NEXT: [[TMP50:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 3
; PRED-NEXT: store float 0.000000e+00, ptr [[TMP50]], align 4
-; PRED-NEXT: [[TMP52:%.*]] = getelementptr i8, ptr [[TMP50]], i64 4
+; PRED-NEXT: [[TMP51:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 3
+; PRED-NEXT: [[TMP52:%.*]] = getelementptr i8, ptr [[TMP51]], i64 4
; PRED-NEXT: store float 0.000000e+00, ptr [[TMP52]], align 4
-; PRED-NEXT: [[TMP54:%.*]] = getelementptr i8, ptr [[TMP50]], i64 8
+; PRED-NEXT: [[TMP53:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 3
+; PRED-NEXT: [[TMP54:%.*]] = getelementptr i8, ptr [[TMP53]], i64 8
; PRED-NEXT: store float 0.000000e+00, ptr [[TMP54]], align 4
-; PRED-NEXT: store float 0.000000e+00, ptr [[TMP50]], align 4
+; PRED-NEXT: [[TMP55:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 3
+; PRED-NEXT: store float 0.000000e+00, ptr [[TMP55]], align 4
; PRED-NEXT: br label %[[PRED_STORE_CONTINUE19]]
; PRED: [[PRED_STORE_CONTINUE19]]:
; PRED-NEXT: [[TMP56:%.*]] = extractelement <8 x i1> [[TMP26]], i32 4
@@ -959,11 +996,14 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
; PRED: [[PRED_STORE_IF20]]:
; PRED-NEXT: [[TMP57:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 4
; PRED-NEXT: store float 0.000000e+00, ptr [[TMP57]], align 4
-; PRED-NEXT: [[TMP59:%.*]] = getelementptr i8, ptr [[TMP57]], i64 4
+; PRED-NEXT: [[TMP58:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 4
+; PRED-NEXT: [[TMP59:%.*]] = getelementptr i8, ptr [[TMP58]], i64 4
; PRED-NEXT: store float 0.000000e+00, ptr [[TMP59]], align 4
-; PRED-NEXT: [[TMP61:%.*]] = getelementptr i8, ptr [[TMP57]], i64 8
+; PRED-NEXT: [[TMP60:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 4
+; PRED-NEXT: [[TMP61:%.*]] = getelementptr i8, ptr [[TMP60]], i64 8
; PRED-NEXT: store float 0.000000e+00, ptr [[TMP61]], align 4
-; PRED-NEXT: store float 0.000000e+00, ptr [[TMP57]], align 4
+; PRED-NEXT: [[TMP62:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 4
+; PRED-NEXT: store float 0.000000e+00, ptr [[TMP62]], align 4
; PRED-NEXT: br label %[[PRED_STORE_CONTINUE21]]
; PRED: [[PRED_STORE_CONTINUE21]]:
; PRED-NEXT: [[TMP63:%.*]] = extractelement <8 x i1> [[TMP26]], i32 5
@@ -971,11 +1011,14 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
; PRED: [[PRED_STORE_IF22]]:
; PRED-NEXT: [[TMP64:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 5
; PRED-NEXT: store float 0.000000e+00, ptr [[TMP64]], align 4
-; PRED-NEXT: [[TMP66:%.*]] = getelementptr i8, ptr [[TMP64]], i64 4
+; PRED-NEXT: [[TMP65:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 5
+; PRED-NEXT: [[TMP66:%.*]] = getelementptr i8, ptr [[TMP65]], i64 4
; PRED-NEXT: store float 0.000000e+00, ptr [[TMP66]], align 4
-; PRED-NEXT: [[TMP68:%.*]] = getelementptr i8, ptr [[TMP64]], i64 8
+; PRED-NEXT: [[TMP67:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 5
+; PRED-NEXT: [[TMP68:%.*]] = getelementptr i8, ptr [[TMP67]], i64 8
; PRED-NEXT: store float 0.000000e+00, ptr [[TMP68]], align 4
-; PRED-NEXT: store float 0.000000e+00, ptr [[TMP64]], align 4
+; PRED-NEXT: [[TMP69:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 5
+; PRED-NEXT: store float 0.000000e+00, ptr [[TMP69]], align 4
; PRED-NEXT: br label %[[PRED_STORE_CONTINUE23]]
; PRED: [[PRED_STORE_CONTINUE23]]:
; PRED-NEXT: [[TMP70:%.*]] = extractelement <8 x i1> [[TMP26]], i32 6
@@ -983,11 +1026,14 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
; PRED: [[PRED_STORE_IF24]]:
; PRED-NEXT: [[TMP71:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 6
; PRED-NEXT: store float 0.000000e+00, ptr [[TMP71]], align 4
-; PRED-NEXT: [[TMP73:%.*]] = getelementptr i8, ptr [[TMP71]], i64 4
+; PRED-NEXT: [[TMP72:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 6
+; PRED-NEXT: [[TMP73:%.*]] = getelementptr i8, ptr [[TMP72]], i64 4
; PRED-NEXT: store float 0.000000e+00, ptr [[TMP73]], align 4
-; PRED-NEXT: [[TMP75:%.*]] = getelementptr i8, ptr [[TMP71]], i64 8
+; PRED-NEXT: [[TMP74:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 6
+; PRED-NEXT: [[TMP75:%.*]] = getelementptr i8, ptr [[TMP74]], i64 8
; PRED-NEXT: store float 0.000000e+00, ptr [[TMP75]], align 4
-; PRED-NEXT: store float 0.000000e+00, ptr [[TMP71]], align 4
+; PRED-NEXT: [[TMP76:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 6
+; PRED-NEXT: store float 0.000000e+00, ptr [[TMP76]], align 4
; PRED-NEXT: br label %[[PRED_STORE_CONTINUE25]]
; PRED: [[PRED_STORE_CONTINUE25]]:
; PRED-NEXT: [[TMP77:%.*]] = extractelement <8 x i1> [[TMP26]], i32 7
@@ -995,11 +1041,14 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
; PRED: [[PRED_STORE_IF26]]:
; PRED-NEXT: [[TMP78:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 7
; PRED-NEXT: store float 0.000000e+00, ptr [[TMP78]], align 4
-; PRED-NEXT: [[TMP80:%.*]] = getelementptr i8, ptr [[TMP78]], i64 4
+; PRED-NEXT: [[TMP79:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 7
+; PRED-NEXT: [[TMP80:%.*]] = getelementptr i8, ptr [[TMP79]], i64 4
; PRED-NEXT: store float 0.000000e+00, ptr [[TMP80]], align 4
-; PRED-NEXT: [[TMP82:%.*]] = getelementptr i8, ptr [[TMP78]], i64 8
+; PRED-NEXT: [[TMP81:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 7
+; PRED-NEXT: [[TMP82:%.*]] = getelementptr i8, ptr [[TMP81]], i64 8
; PRED-NEXT: store float 0.000000e+00, ptr [[TMP82]], align 4
-; PRED-NEXT: store float 0.000000e+00, ptr [[TMP78]], align 4
+; PRED-NEXT: [[TMP83:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 7
+; PRED-NEXT: store float 0.000000e+00, ptr [[TMP83]], align 4
; PRED-NEXT: br label %[[PRED_STORE_CONTINUE27]]
; PRED: [[PRED_STORE_CONTINUE27]]:
; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/fold-tail-low-trip-count.ll b/llvm/test/Transforms/LoopVectorize/AArch64/fold-tail-low-trip-count.ll
index f741a10fedfa3..77ea5f0e36c86 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/fold-tail-low-trip-count.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/fold-tail-low-trip-count.ll
@@ -19,6 +19,10 @@ define void @low_trip_count_small(i32 %x, ptr %dst) {
; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[DST]], i64 1
; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[DST]], i64 2
; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[DST]], i64 3
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x ptr> poison, ptr [[DST]], i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x ptr> [[TMP1]], ptr [[NEXT_GEP2]], i32 1
+; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x ptr> [[TMP2]], ptr [[NEXT_GEP3]], i32 2
+; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x ptr> [[TMP3]], ptr [[NEXT_GEP4]], i32 3
; CHECK-NEXT: [[TMP5:%.*]] = icmp ule <4 x i64> <i64 0, i64 1, i64 2, i64 3>, [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP5]], i32 0
; CHECK-NEXT: br i1 [[TMP6]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
@@ -92,6 +96,10 @@ define ptr @low_trip_count_small_with_live_out(i32 %x, ptr %dst) {
; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[DST]], i64 1
; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[DST]], i64 2
; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[DST]], i64 3
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x ptr> poison, ptr [[DST]], i32 0
+; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x ptr> [[TMP2]], ptr [[NEXT_GEP2]], i32 1
+; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x ptr> [[TMP3]], ptr [[NEXT_GEP3]], i32 2
+; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x ptr> [[TMP4]], ptr [[NEXT_GEP4]], i32 3
; CHECK-NEXT: [[TMP6:%.*]] = icmp ule <4 x i64> <i64 0, i64 1, i64 2, i64 3>, [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP6]], i32 0
; CHECK-NEXT: br i1 [[TMP7]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll
index 7b74e42962bfa..e43b6cff0af8c 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll
@@ -605,8 +605,10 @@ define void @forced_scalar_instr(ptr %gep.dst) {
; COMMON-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0
; COMMON-NEXT: br i1 [[TMP2]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; COMMON: [[PRED_STORE_IF]]:
-; COMMON-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[GEP_DST]], i64 [[INDEX]]
-; COMMON-NEXT: [[TMP6:%.*]] = or i32 [[TMP0]], 1
+; COMMON-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0
+; COMMON-NEXT: [[TMP4:%.*]] = add i32 [[TMP0]], 0
+; COMMON-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[GEP_DST]], i64 [[TMP3]]
+; COMMON-NEXT: [[TMP6:%.*]] = or i32 [[TMP4]], 1
; COMMON-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 4
; COMMON-NEXT: br label %[[PRED_STORE_CONTINUE]]
; COMMON: [[PRED_STORE_CONTINUE]]:
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll
index 42fb0970a1fb4..e7ccde046d58e 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll
@@ -602,7 +602,8 @@ define void @exit_cond_zext_iv(ptr %dst, i64 %N) {
; PRED-NEXT: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP6]], i32 0
; PRED-NEXT: br i1 [[TMP7]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; PRED: [[PRED_STORE_IF]]:
-; PRED-NEXT: [[TMP9:%.*]] = getelementptr { [100 x i32], i32, i32 }, ptr [[DST]], i64 [[INDEX]], i32 2
+; PRED-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 0
+; PRED-NEXT: [[TMP9:%.*]] = getelementptr { [100 x i32], i32, i32 }, ptr [[DST]], i64 [[TMP8]], i32 2
; PRED-NEXT: store i32 0, ptr [[TMP9]], align 8
; PRED-NEXT: br label %[[PRED_STORE_CONTINUE]]
; PRED: [[PRED_STORE_CONTINUE]]:
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call-scalarize.ll b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call-scalarize.ll
index 9462a791dc9de..6dcb3c7dd7214 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call-scalarize.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call-scalarize.ll
@@ -77,7 +77,7 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 {
; TFCOMMON-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[ACTIVE_LANE_MASK]], i32 1
; TFCOMMON-NEXT: br i1 [[TMP14]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE6]]
; TFCOMMON: pred.store.if1:
-; TFCOMMON-NEXT: [[TMP19:%.*]] = extractelement <2 x double> [[PREDPHI]], i32 1
+; TFCOMMON-NEXT: [[TMP19:%.*]] = extractelement <2 x double> [[PREDPHI]], i32 0
; TFCOMMON-NEXT: store double [[TMP19]], ptr [[P]], align 8
; TFCOMMON-NEXT: br label [[PRED_STORE_CONTINUE6]]
; TFCOMMON: pred.store.continue2:
@@ -116,7 +116,7 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 {
; TFA_INTERLEAVE-NEXT: [[TMP29:%.*]] = extractelement <2 x i1> [[ACTIVE_LANE_MASK]], i32 1
; TFA_INTERLEAVE-NEXT: br i1 [[TMP29]], label [[PRED_STORE_IF4:%.*]], label [[PRED_STORE_CONTINUE5:%.*]]
; TFA_INTERLEAVE: pred.store.if3:
-; TFA_INTERLEAVE-NEXT: [[TMP22:%.*]] = extractelement <2 x double> [[PREDPHI3]], i32 1
+; TFA_INTERLEAVE-NEXT: [[TMP22:%.*]] = extractelement <2 x double> [[PREDPHI3]], i32 0
; TFA_INTERLEAVE-NEXT: store double [[TMP22]], ptr [[P]], align 8
; TFA_INTERLEAVE-NEXT: br label [[PRED_STORE_CONTINUE5]]
; TFA_INTERLEAVE: pred.store.continue4:
@@ -130,7 +130,7 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 {
; TFA_INTERLEAVE-NEXT: [[TMP25:%.*]] = extractelement <2 x i1> [[ACTIVE_LANE_MASK2]], i32 1
; TFA_INTERLEAVE-NEXT: br i1 [[TMP25]], label [[PRED_STORE_IF8:%.*]], label [[PRED_STORE_CONTINUE9]]
; TFA_INTERLEAVE: pred.store.if7:
-; TFA_INTERLEAVE-NEXT: [[TMP34:%.*]] = extractelement <2 x double> [[PREDPHI3]], i32 1
+; TFA_INTERLEAVE-NEXT: [[TMP34:%.*]] = extractelement <2 x double> [[PREDPHI3]], i32 0
; TFA_INTERLEAVE-NEXT: store double [[TMP34]], ptr [[P]], align 8
; TFA_INTERLEAVE-NEXT: br label [[PRED_STORE_CONTINUE9]]
; TFA_INTERLEAVE: pred.store.continue8:
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll b/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll
index 64341ddb7cdcb..7c26d0028d0d4 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll
@@ -203,8 +203,9 @@ define void @tail_predicate_without_optsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 %n)
; DEFAULT-NEXT: [[TMP7:%.*]] = add <16 x i8> [[TMP4]], [[TMP6]]
; DEFAULT-NEXT: br i1 true, label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; DEFAULT: [[PRED_STORE_IF]]:
+; DEFAULT-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 0
; DEFAULT-NEXT: [[TMP11:%.*]] = extractelement <16 x i8> [[TMP7]], i32 0
-; DEFAULT-NEXT: store i8 [[TMP11]], ptr [[P]], align 1
+; DEFAULT-NEXT: store i8 [[TMP11]], ptr [[TMP10]], align 1
; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE]]
; DEFAULT: [[PRED_STORE_CONTINUE]]:
; DEFAULT-NEXT: br i1 true, label %[[PRED_STORE_IF6:.*]], label %[[PRED_STORE_CONTINUE7:.*]]
@@ -429,7 +430,7 @@ define void @sve_tail_predicate_without_minsize(ptr %p, i8 %a, i8 %b, i8 %c, i32
; DEFAULT-NEXT: [[TMP24:%.*]] = extractelement <vscale x 16 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0
; DEFAULT-NEXT: [[TMP23:%.*]] = xor i1 [[TMP24]], true
; DEFAULT-NEXT: [[VEC_IND_NEXT]] = add <vscale x 16 x i8> [[VEC_IND]], [[DOTSPLAT]]
-; DEFAULT-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; DEFAULT-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; DEFAULT: [[MIDDLE_BLOCK]]:
; DEFAULT-NEXT: br label %[[FOR_COND_CLEANUP:.*]]
; DEFAULT: [[FOR_COND_CLEANUP]]:
@@ -579,7 +580,7 @@ define void @dont_vectorize_with_minsize() {
; DEFAULT-NEXT: store <8 x i16> [[TMP15]], ptr [[TMP11]], align 2
; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
; DEFAULT-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64
-; DEFAULT-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
+; DEFAULT-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; DEFAULT: [[MIDDLE_BLOCK]]:
; DEFAULT-NEXT: br label %[[FOR_COND_CLEANUP:.*]]
; DEFAULT: [[FOR_COND_CLEANUP]]:
@@ -695,7 +696,7 @@ define void @vectorization_forced_minsize_reduce_width() {
; DEFAULT-NEXT: store <8 x i16> [[TMP15]], ptr [[TMP11]], align 2
; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
; DEFAULT-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64
-; DEFAULT-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
+; DEFAULT-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
; DEFAULT: [[MIDDLE_BLOCK]]:
; DEFAULT-NEXT: br label %[[FOR_COND_CLEANUP:.*]]
; DEFAULT: [[FOR_COND_CLEANUP]]:
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/predicated-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/predicated-costs.ll
index d96a96836bd3a..1d44aeb91d8a5 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/predicated-costs.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/predicated-costs.ll
@@ -288,7 +288,8 @@ define void @srem_sdiv_without_tail_folding(i32 %d.0, i32 %d.1, ptr %dst, i32 %e
; CHECK: [[PRED_STORE_IF]]:
; CHECK-NEXT: [[TMP27:%.*]] = extractelement <4 x i64> [[TMP25]], i32 0
; CHECK-NEXT: [[TMP28:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP27]]
-; CHECK-NEXT: store i32 [[INDEX]], ptr [[TMP28]], align 4
+; CHECK-NEXT: [[TMP29:%.*]] = add i32 [[INDEX]], 0
+; CHECK-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 4
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
; CHECK: [[PRED_STORE_CONTINUE]]:
; CHECK-NEXT: [[TMP30:%.*]] = extractelement <4 x i1> [[TMP3]], i32 1
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-fold-tail.ll b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-fold-tail.ll
index 240ac2f1ceb2f..42103f72c903d 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-fold-tail.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-fold-tail.ll
@@ -25,7 +25,8 @@ define void @load_store_interleave_group(ptr noalias %data, i64 %n) {
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0
; CHECK-NEXT: br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; CHECK: [[PRED_STORE_IF]]:
-; CHECK-NEXT: [[TMP3:%.*]] = shl nsw i64 [[INDEX]], 1
+; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT: [[TMP3:%.*]] = shl nsw i64 [[TMP2]], 1
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP4]], align 8
; CHECK-NEXT: store i64 [[TMP5]], ptr [[TMP4]], align 8
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-multi-block.ll b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-multi-block.ll
index 8c34e6815f843..99c735f777b66 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-multi-block.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-multi-block.ll
@@ -152,7 +152,9 @@ define void @load_store_interleave_group_block_var_cond(ptr noalias %data, ptr %
; VF2IC1-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0
; VF2IC1-NEXT: br i1 [[TMP4]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; VF2IC1: [[PRED_STORE_IF]]:
-; VF2IC1-NEXT: store i8 1, ptr [[TMP2]], align 1
+; VF2IC1-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0
+; VF2IC1-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[MASKS]], i64 [[TMP5]]
+; VF2IC1-NEXT: store i8 1, ptr [[TMP6]], align 1
; VF2IC1-NEXT: br label %[[PRED_STORE_CONTINUE]]
; VF2IC1: [[PRED_STORE_CONTINUE]]:
; VF2IC1-NEXT: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP3]], i32 1
@@ -205,7 +207,9 @@ define void @load_store_interleave_group_block_var_cond(ptr noalias %data, ptr %
; VF2IC2-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP9]], i32 0
; VF2IC2-NEXT: br i1 [[TMP11]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; VF2IC2: [[PRED_STORE_IF]]:
-; VF2IC2-NEXT: store i8 1, ptr [[TMP7]], align 1
+; VF2IC2-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 0
+; VF2IC2-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[MASKS]], i64 [[TMP12]]
+; VF2IC2-NEXT: store i8 1, ptr [[TMP13]], align 1
; VF2IC2-NEXT: br label %[[PRED_STORE_CONTINUE]]
; VF2IC2: [[PRED_STORE_CONTINUE]]:
; VF2IC2-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP9]], i32 1
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-remove-loop-region.ll b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-remove-loop-region.ll
index 73fd53011e956..7d77f2f6b5b9c 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-remove-loop-region.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-remove-loop-region.ll
@@ -35,9 +35,12 @@ define void @load_store_interleave_group_tc_2(ptr noalias %data) {
; VF4: [[VECTOR_BODY]]:
; VF4-NEXT: br i1 true, label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; VF4: [[PRED_STORE_IF]]:
-; VF4-NEXT: [[TMP0:%.*]] = load i64, ptr [[DATA]], align 8
-; VF4-NEXT: store i64 [[TMP0]], ptr [[DATA]], align 8
-; VF4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 1
+; VF4-NEXT: [[TMP3:%.*]] = shl nsw i64 0, 1
+; VF4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP3]]
+; VF4-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP4]], align 8
+; VF4-NEXT: store i64 [[TMP5]], ptr [[TMP4]], align 8
+; VF4-NEXT: [[TMP6:%.*]] = or disjoint i64 [[TMP3]], 1
+; VF4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP6]]
; VF4-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8
; VF4-NEXT: store i64 [[TMP8]], ptr [[TMP7]], align 8
; VF4-NEXT: br label %[[PRED_STORE_CONTINUE]]
diff --git a/llvm/test/Transforms/LoopVectorize/SystemZ/force-target-instruction-cost.ll b/llvm/test/Transforms/LoopVectorize/SystemZ/force-target-instruction-cost.ll
index fab2da95f7f1d..78c71fd3beb89 100644
--- a/llvm/test/Transforms/LoopVectorize/SystemZ/force-target-instruction-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/SystemZ/force-target-instruction-cost.ll
@@ -20,8 +20,9 @@ define void @test_scalar_steps_target_instruction_cost(ptr %dst) {
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0
; CHECK-NEXT: br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; CHECK: [[PRED_STORE_IF]]:
-; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[OFFSET_IDX]]
-; CHECK-NEXT: store i64 [[OFFSET_IDX]], ptr [[TMP2]], align 8
+; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 0
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[TMP2]]
+; CHECK-NEXT: store i64 [[TMP2]], ptr [[TMP3]], align 8
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
; CHECK: [[PRED_STORE_CONTINUE]]:
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP0]], i32 1
diff --git a/llvm/test/Transforms/LoopVectorize/VPlan/interleave-and-scalarize-only.ll b/llvm/test/Transforms/LoopVectorize/VPlan/interleave-and-scalarize-only.ll
index a641e7ef54de7..b3050475d4bc4 100644
--- a/llvm/test/Transforms/LoopVectorize/VPlan/interleave-and-scalarize-only.ll
+++ b/llvm/test/Transforms/LoopVectorize/VPlan/interleave-and-scalarize-only.ll
@@ -136,9 +136,9 @@ define void @test_scalarize_with_branch_cond(ptr %src, ptr %dst) {
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %pred.store.continue4 ]
; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[INDEX]] to i1
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i1 false, [[TMP0]]
-; CHECK-NEXT: [[INDUCTION2:%.*]] = add i1 [[OFFSET_IDX]], false
+; CHECK-NEXT: [[INDUCTION:%.*]] = add i1 [[OFFSET_IDX]], false
; CHECK-NEXT: [[INDUCTION3:%.*]] = add i1 [[OFFSET_IDX]], true
-; CHECK-NEXT: br i1 [[INDUCTION2]], label %pred.store.if, label %pred.store.continue
+; CHECK-NEXT: br i1 [[INDUCTION]], label %pred.store.if, label %pred.store.continue
; CHECK: pred.store.if:
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr %src, i64 [[INDEX]]
; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
diff --git a/llvm/test/Transforms/LoopVectorize/VPlan/vplan-predicate-switch.ll b/llvm/test/Transforms/LoopVectorize/VPlan/vplan-predicate-switch.ll
index 7dc5b131e4075..63b0da8101b9d 100644
--- a/llvm/test/Transforms/LoopVectorize/VPlan/vplan-predicate-switch.ll
+++ b/llvm/test/Transforms/LoopVectorize/VPlan/vplan-predicate-switch.ll
@@ -19,69 +19,67 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) {
; CHECK-NEXT: Successor(s): vector.body
; CHECK-EMPTY:
; CHECK-NEXT: vector.body:
-; CHECK-NEXT: EMIT-SCALAR vp<[[CAN_IV:%.+]]> = phi [ ir<0>, vector.ph ], [ vp<[[CAN_IV_NEXT:%.+]]>, pred.store.continue{{.*}} ]
+; CHECK-NEXT: EMIT-SCALAR vp<[[CAN_IV:%.+]]> = phi [ ir<0>, vector.ph ], [ vp<[[CAN_IV_NEXT:%.+]]>, default.2 ]
; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, ir<2>, ir<1>
; CHECK-NEXT: EMIT vp<[[PTR:%.+]]> = ptradd ir<%start>, vp<[[CAN_IV]]>
; CHECK-NEXT: EMIT vp<[[PTR]]>.1 = ptradd ir<%start>, vp<[[STEPS]]>
+; CHECK-NEXT: EMIT vp<[[PTR_VEC:%.+]]> = buildvector vp<[[PTR]]>, vp<[[PTR]]>.1
; CHECK-NEXT: WIDEN ir<%l> = load vp<[[PTR]]>
; CHECK-NEXT: EMIT vp<[[C1:%.+]]> = icmp eq ir<%l>, ir<-12>
; CHECK-NEXT: EMIT vp<[[C2:%.+]]> = icmp eq ir<%l>, ir<13>
; CHECK-NEXT: EMIT vp<[[OR_CASES:%.+]]> = or vp<[[C1]]>, vp<[[C2]]>
; CHECK-NEXT: EMIT vp<[[DEFAULT_MASK:%.+]]> = not vp<[[OR_CASES]]>
-; CHECK-NEXT: EMIT vp<[[C2_LANE0:%.+]]> = extractelement vp<[[C2]]>, ir<0>
-; CHECK-NEXT: EMIT branch-on-cond vp<[[C2_LANE0]]>
-; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue
-; CHECK-EMPTY:
-; CHECK-NEXT: pred.store.if:
-; CHECK-NEXT: CLONE store ir<0>, vp<[[PTR]]>
-; CHECK-NEXT: Successor(s): pred.store.continue
-; CHECK-EMPTY:
-; CHECK-NEXT: pred.store.continue:
-; CHECK-NEXT: EMIT vp<[[C2_LANE1:%.+]]> = extractelement vp<[[C2]]>, ir<1>
-; CHECK-NEXT: EMIT branch-on-cond vp<[[C2_LANE1]]>
-; CHECK-NEXT: Successor(s): pred.store.if{{.*}}, pred.store.continue{{.*}}
-; CHECK-EMPTY:
-; CHECK-NEXT: pred.store.if{{.*}}:
-; CHECK-NEXT: CLONE store ir<0>, vp<[[PTR]]>.1
-; CHECK-NEXT: Successor(s): pred.store.continue{{.*}}
-; CHECK-EMPTY:
-; CHECK-NEXT: pred.store.continue{{.*}}:
-; CHECK-NEXT: EMIT vp<[[C1_LANE0:%.+]]> = extractelement vp<[[C1]]>, ir<0>
-; CHECK-NEXT: EMIT branch-on-cond vp<[[C1_LANE0]]>
-; CHECK-NEXT: Successor(s): pred.store.if{{.*}}, pred.store.continue{{.*}}
-; CHECK-EMPTY:
-; CHECK-NEXT: pred.store.if{{.*}}:
-; CHECK-NEXT: CLONE store ir<42>, vp<[[PTR]]>
-; CHECK-NEXT: Successor(s): pred.store.continue{{.*}}
-; CHECK-EMPTY:
-; CHECK-NEXT: pred.store.continue{{.*}}:
-; CHECK-NEXT: EMIT vp<[[C1_LANE1:%.+]]> = extractelement vp<[[C1]]>, ir<1>
-; CHECK-NEXT: EMIT branch-on-cond vp<[[C1_LANE1]]>
-; CHECK-NEXT: Successor(s): pred.store.if{{.*}}, pred.store.continue{{.*}}
-; CHECK-EMPTY:
-; CHECK-NEXT: pred.store.if{{.*}}:
-; CHECK-NEXT: CLONE store ir<42>, vp<[[PTR]]>.1
-; CHECK-NEXT: Successor(s): pred.store.continue{{.*}}
-; CHECK-EMPTY:
-; CHECK-NEXT: pred.store.continue{{.*}}:
-; CHECK-NEXT: EMIT vp<[[DEFAULT_MASK_LANE0:%.+]]> = extractelement vp<[[DEFAULT_MASK]]>, ir<0>
-; CHECK-NEXT: EMIT branch-on-cond vp<[[DEFAULT_MASK_LANE0]]>
-; CHECK-NEXT: Successor(s): pred.store.if{{.*}}, pred.store.continue{{.*}}
-; CHECK-EMPTY:
-; CHECK-NEXT: pred.store.if{{.*}}:
-; CHECK-NEXT: CLONE store ir<2>, vp<[[PTR]]>
-; CHECK-NEXT: Successor(s): pred.store.continue{{.*}}
-; CHECK-EMPTY:
-; CHECK-NEXT: pred.store.continue{{.*}}:
-; CHECK-NEXT: EMIT vp<[[DEFAULT_MASK_LANE1:%.+]]> = extractelement vp<[[DEFAULT_MASK]]>, ir<1>
-; CHECK-NEXT: EMIT branch-on-cond vp<[[DEFAULT_MASK_LANE1]]>
-; CHECK-NEXT: Successor(s): pred.store.if{{.*}}, pred.store.continue{{.*}}
-; CHECK-EMPTY:
-; CHECK-NEXT: pred.store.if{{.*}}:
-; CHECK-NEXT: CLONE store ir<2>, vp<[[PTR]]>.1
-; CHECK-NEXT: Successor(s): pred.store.continue{{.*}}
-; CHECK-EMPTY:
-; CHECK-NEXT: pred.store.continue{{.*}}:
+; CHECK-NEXT: Successor(s): pred.store
+; CHECK-EMPTY:
+; CHECK-NEXT: <xVFxUF> pred.store: {
+; CHECK-NEXT: pred.store.entry:
+; CHECK-NEXT: BRANCH-ON-MASK vp<[[C2]]>
+; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue
+; CHECK-EMPTY:
+; CHECK-NEXT: pred.store.if:
+; CHECK-NEXT: REPLICATE store ir<0>, vp<[[PTR_VEC]]>
+; CHECK-NEXT: Successor(s): pred.store.continue
+; CHECK-EMPTY:
+; CHECK-NEXT: pred.store.continue:
+; CHECK-NEXT: No successors
+; CHECK-NEXT: }
+; CHECK-NEXT: Successor(s): if.then.2.0
+; CHECK-EMPTY:
+; CHECK-NEXT: if.then.2.0:
+; CHECK-NEXT: Successor(s): pred.store
+; CHECK-EMPTY:
+; CHECK-NEXT: <xVFxUF> pred.store: {
+; CHECK-NEXT: pred.store.entry:
+; CHECK-NEXT: BRANCH-ON-MASK vp<[[C1]]>
+; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue
+; CHECK-EMPTY:
+; CHECK-NEXT: pred.store.if:
+; CHECK-NEXT: REPLICATE store ir<42>, vp<[[PTR_VEC]]>
+; CHECK-NEXT: Successor(s): pred.store.continue
+; CHECK-EMPTY:
+; CHECK-NEXT: pred.store.continue:
+; CHECK-NEXT: No successors
+; CHECK-NEXT: }
+; CHECK-NEXT: Successor(s): if.then.1.1
+; CHECK-EMPTY:
+; CHECK-NEXT: if.then.1.1:
+; CHECK-NEXT: Successor(s): pred.store
+; CHECK-EMPTY:
+; CHECK-NEXT: <xVFxUF> pred.store: {
+; CHECK-NEXT: pred.store.entry:
+; CHECK-NEXT: BRANCH-ON-MASK vp<[[DEFAULT_MASK]]>
+; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue
+; CHECK-EMPTY:
+; CHECK-NEXT: pred.store.if:
+; CHECK-NEXT: REPLICATE store ir<2>, vp<[[PTR_VEC]]>
+; CHECK-NEXT: Successor(s): pred.store.continue
+; CHECK-EMPTY:
+; CHECK-NEXT: pred.store.continue:
+; CHECK-NEXT: No successors
+; CHECK-NEXT: }
+; CHECK-NEXT: Successor(s): default.2
+; CHECK-EMPTY:
+; CHECK-NEXT: default.2:
; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, ir<2>
; CHECK-NEXT: EMIT vp<{{%.+}}> = icmp eq vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]>
; CHECK-NEXT: EMIT branch-on-cond vp<{{%.+}}>
diff --git a/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll b/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll
index 3576977c2f2ad..34377454c0cc3 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll
@@ -191,23 +191,24 @@ define void @PR40816() #1 {
; FORCE: [[VECTOR_PH]]:
; FORCE-NEXT: br label %[[VECTOR_BODY:.*]]
; FORCE: [[VECTOR_BODY]]:
-; FORCE-NEXT: [[TMP0:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE2:.*]] ]
+; FORCE-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE2:.*]] ]
; FORCE-NEXT: [[VEC_IND:%.*]] = phi <2 x i8> [ <i8 0, i8 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE2]] ]
; FORCE-NEXT: [[TMP2:%.*]] = icmp ule <2 x i8> [[VEC_IND]], splat (i8 2)
; FORCE-NEXT: [[TMP3:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0
; FORCE-NEXT: br i1 [[TMP3]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; FORCE: [[PRED_STORE_IF]]:
+; FORCE-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0
; FORCE-NEXT: store i32 [[TMP0]], ptr @b, align 1
; FORCE-NEXT: br label %[[PRED_STORE_CONTINUE]]
; FORCE: [[PRED_STORE_CONTINUE]]:
; FORCE-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP2]], i32 1
; FORCE-NEXT: br i1 [[TMP10]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2]]
; FORCE: [[PRED_STORE_IF1]]:
-; FORCE-NEXT: [[TMP1:%.*]] = add i32 [[TMP0]], 1
+; FORCE-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 1
; FORCE-NEXT: store i32 [[TMP1]], ptr @b, align 1
; FORCE-NEXT: br label %[[PRED_STORE_CONTINUE2]]
; FORCE: [[PRED_STORE_CONTINUE2]]:
-; FORCE-NEXT: [[INDEX_NEXT]] = add nuw i32 [[TMP0]], 2
+; FORCE-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
; FORCE-NEXT: [[VEC_IND_NEXT]] = add <2 x i8> [[VEC_IND]], splat (i8 2)
; FORCE-NEXT: [[TMP15:%.*]] = icmp eq i32 [[INDEX_NEXT]], 4
; FORCE-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
diff --git a/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll b/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll
index 972a473e0171f..f65a9d7d45ed8 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll
@@ -63,7 +63,8 @@ define void @redundant_or_1(ptr %dst, i1 %c.0, i1 %c.1) {
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP2]], i32 0
; CHECK-NEXT: br i1 [[TMP6]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; CHECK: pred.store.if:
-; CHECK-NEXT: store i32 0, ptr [[DST:%.*]], align 4
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i32 0
+; CHECK-NEXT: store i32 0, ptr [[TMP8]], align 4
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
; CHECK: pred.store.continue:
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP2]], i32 1
@@ -137,7 +138,8 @@ define void @redundant_or_2(ptr %dst, i1 %c.0, i1 %c.1) {
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP3]], i32 0
; CHECK-NEXT: br i1 [[TMP5]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; CHECK: pred.store.if:
-; CHECK-NEXT: store i32 0, ptr [[DST:%.*]], align 4
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i32 0
+; CHECK-NEXT: store i32 0, ptr [[TMP7]], align 4
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
; CHECK: pred.store.continue:
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP3]], i32 1
@@ -212,7 +214,8 @@ define void @redundant_and_1(ptr %dst, i1 %c.0, i1 %c.1) {
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP6]], i32 0
; CHECK-NEXT: br i1 [[TMP7]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; CHECK: pred.store.if:
-; CHECK-NEXT: store i32 0, ptr [[DST:%.*]], align 4
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i32 0
+; CHECK-NEXT: store i32 0, ptr [[TMP9]], align 4
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
; CHECK: pred.store.continue:
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP6]], i32 1
diff --git a/llvm/test/Transforms/LoopVectorize/X86/cost-conditional-branches.ll b/llvm/test/Transforms/LoopVectorize/X86/cost-conditional-branches.ll
index 00a6818918f05..40eee658f3421 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/cost-conditional-branches.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/cost-conditional-branches.ll
@@ -414,7 +414,7 @@ define void @cost_duplicate_recipe_for_sinking(ptr %A, i64 %N) #2 {
; CHECK-NEXT: [[TMP8:%.*]] = shl nsw i64 [[TMP4]], 2
; CHECK-NEXT: [[TMP9:%.*]] = shl nsw i64 [[TMP5]], 2
; CHECK-NEXT: [[TMP10:%.*]] = shl nsw i64 [[TMP6]], 2
-; CHECK-NEXT: [[TMP11:%.*]] = getelementptr double, ptr [[A:%.*]], i64 [[TMP7]]
+; CHECK-NEXT: [[TMP11:%.*]] = getelementptr nusw double, ptr [[A:%.*]], i64 [[TMP7]]
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr nusw double, ptr [[A]], i64 [[TMP8]]
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr nusw double, ptr [[A]], i64 [[TMP9]]
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr nusw double, ptr [[A]], i64 [[TMP10]]
@@ -433,7 +433,10 @@ define void @cost_duplicate_recipe_for_sinking(ptr %A, i64 %N) #2 {
; CHECK-NEXT: [[TMP23:%.*]] = extractelement <4 x i1> [[TMP19]], i32 0
; CHECK-NEXT: br i1 [[TMP23]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; CHECK: pred.store.if:
-; CHECK-NEXT: store double 0.000000e+00, ptr [[TMP11]], align 8
+; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT: [[TMP24:%.*]] = shl nsw i64 [[TMP3]], 2
+; CHECK-NEXT: [[TMP25:%.*]] = getelementptr double, ptr [[A]], i64 [[TMP24]]
+; CHECK-NEXT: store double 0.000000e+00, ptr [[TMP25]], align 8
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
; CHECK: pred.store.continue:
; CHECK-NEXT: [[TMP26:%.*]] = extractelement <4 x i1> [[TMP19]], i32 1
@@ -589,14 +592,17 @@ define void @cost_duplicate_recipe_for_sinking(ptr %A, i64 %N) #2 {
; CHECK: vec.epilog.vector.body:
; CHECK-NEXT: [[INDEX40:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL1]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT51:%.*]], [[PRED_STORE_CONTINUE50:%.*]] ]
; CHECK-NEXT: [[TMP87:%.*]] = shl nsw i64 [[INDEX40]], 2
-; CHECK-NEXT: [[TMP89:%.*]] = getelementptr double, ptr [[A]], i64 [[TMP87]]
+; CHECK-NEXT: [[TMP89:%.*]] = getelementptr nusw double, ptr [[A]], i64 [[TMP87]]
; CHECK-NEXT: [[WIDE_VEC41:%.*]] = load <16 x double>, ptr [[TMP89]], align 8
; CHECK-NEXT: [[STRIDED_VEC42:%.*]] = shufflevector <16 x double> [[WIDE_VEC41]], <16 x double> poison, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
; CHECK-NEXT: [[TMP90:%.*]] = fcmp oeq <4 x double> [[STRIDED_VEC42]], zeroinitializer
; CHECK-NEXT: [[TMP91:%.*]] = extractelement <4 x i1> [[TMP90]], i32 0
; CHECK-NEXT: br i1 [[TMP91]], label [[PRED_STORE_IF43:%.*]], label [[PRED_STORE_CONTINUE44:%.*]]
; CHECK: pred.store.if43:
-; CHECK-NEXT: store double 0.000000e+00, ptr [[TMP89]], align 8
+; CHECK-NEXT: [[TMP86:%.*]] = add i64 [[INDEX40]], 0
+; CHECK-NEXT: [[TMP92:%.*]] = shl nsw i64 [[TMP86]], 2
+; CHECK-NEXT: [[TMP93:%.*]] = getelementptr double, ptr [[A]], i64 [[TMP92]]
+; CHECK-NEXT: store double 0.000000e+00, ptr [[TMP93]], align 8
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE44]]
; CHECK: pred.store.continue44:
; CHECK-NEXT: [[TMP94:%.*]] = extractelement <4 x i1> [[TMP90]], i32 1
diff --git a/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll b/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll
index 1cdcc87a02b31..aa0de6ad15d70 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll
@@ -152,6 +152,8 @@ define void @foo2(ptr noalias %in, ptr noalias %out, ptr noalias %trigger, ptr n
; FVW2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 16>, [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE3]] ]
; FVW2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX1]], 16
; FVW2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 16
+; FVW2-NEXT: [[TMP18:%.*]] = insertelement <2 x i64> poison, i64 [[OFFSET_IDX]], i32 0
+; FVW2-NEXT: [[TMP19:%.*]] = insertelement <2 x i64> [[TMP18]], i64 [[TMP1]], i32 1
; FVW2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER:%.*]], i64 [[OFFSET_IDX]]
; FVW2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[TMP1]]
; FVW2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4
@@ -266,6 +268,8 @@ define void @foo3(ptr noalias %in, ptr noalias %out, ptr noalias %trigger) {
; FVW2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 16>, [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE2]] ]
; FVW2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 16
; FVW2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 16
+; FVW2-NEXT: [[TMP18:%.*]] = insertelement <2 x i64> poison, i64 [[OFFSET_IDX]], i32 0
+; FVW2-NEXT: [[TMP19:%.*]] = insertelement <2 x i64> [[TMP18]], i64 [[TMP1]], i32 1
; FVW2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER:%.*]], i64 [[OFFSET_IDX]]
; FVW2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[TMP1]]
; FVW2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4
@@ -367,6 +371,8 @@ define void @foo2_addrspace(ptr addrspace(1) noalias %in, ptr addrspace(1) noali
; FVW2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 16>, [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE3]] ]
; FVW2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX1]], 16
; FVW2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 16
+; FVW2-NEXT: [[TMP18:%.*]] = insertelement <2 x i64> poison, i64 [[OFFSET_IDX]], i32 0
+; FVW2-NEXT: [[TMP19:%.*]] = insertelement <2 x i64> [[TMP18]], i64 [[TMP1]], i32 1
; FVW2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER:%.*]], i64 [[OFFSET_IDX]]
; FVW2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[TMP1]]
; FVW2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4
@@ -467,6 +473,8 @@ define void @foo2_addrspace2(ptr addrspace(1) noalias %in, ptr addrspace(0) noal
; FVW2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 16>, [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE3]] ]
; FVW2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX1]], 16
; FVW2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 16
+; FVW2-NEXT: [[TMP18:%.*]] = insertelement <2 x i64> poison, i64 [[OFFSET_IDX]], i32 0
+; FVW2-NEXT: [[TMP19:%.*]] = insertelement <2 x i64> [[TMP18]], i64 [[TMP1]], i32 1
; FVW2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER:%.*]], i64 [[OFFSET_IDX]]
; FVW2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[TMP1]]
; FVW2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4
@@ -567,6 +575,8 @@ define void @foo2_addrspace3(ptr addrspace(0) noalias %in, ptr addrspace(1) noal
; FVW2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 16>, [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE3]] ]
; FVW2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX1]], 16
; FVW2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 16
+; FVW2-NEXT: [[TMP18:%.*]] = insertelement <2 x i64> poison, i64 [[OFFSET_IDX]], i32 0
+; FVW2-NEXT: [[TMP19:%.*]] = insertelement <2 x i64> [[TMP18]], i64 [[TMP1]], i32 1
; FVW2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER:%.*]], i64 [[OFFSET_IDX]]
; FVW2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[TMP1]]
; FVW2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4
diff --git a/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll b/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll
index 5f81ca53583c0..79538f7f2c68c 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll
@@ -584,7 +584,8 @@ define void @wide_iv_trunc(ptr %dst, i64 %N) {
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[TMP2]], i32 0
; CHECK-NEXT: br i1 [[TMP3]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; CHECK: pred.store.if:
-; CHECK-NEXT: store i32 [[TMP1]], ptr [[DST]], align 4
+; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[TMP1]], 0
+; CHECK-NEXT: store i32 [[TMP4]], ptr [[DST]], align 4
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
; CHECK: pred.store.continue:
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP2]], i32 1
diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr109581-unused-blend.ll b/llvm/test/Transforms/LoopVectorize/X86/pr109581-unused-blend.ll
index 36ad765993588..cbf62865cdce5 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/pr109581-unused-blend.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/pr109581-unused-blend.ll
@@ -28,17 +28,17 @@ define i32 @unused_blend_after_unrolling(ptr %p, i32 %a, i1 %c.1, i16 %x, i16 %y
; CHECK: [[PRED_SDIV_IF]]:
; CHECK-NEXT: br label %[[PRED_SDIV_CONTINUE]]
; CHECK: [[PRED_SDIV_CONTINUE]]:
-; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0
+; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1
; CHECK-NEXT: br i1 [[TMP3]], label %[[PRED_SDIV_IF4:.*]], label %[[PRED_SDIV_CONTINUE5:.*]]
; CHECK: [[PRED_SDIV_IF4]]:
; CHECK-NEXT: br label %[[PRED_SDIV_CONTINUE5]]
; CHECK: [[PRED_SDIV_CONTINUE5]]:
-; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0
+; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP0]], i32 2
; CHECK-NEXT: br i1 [[TMP4]], label %[[PRED_SDIV_IF6:.*]], label %[[PRED_SDIV_CONTINUE7:.*]]
; CHECK: [[PRED_SDIV_IF6]]:
; CHECK-NEXT: br label %[[PRED_SDIV_CONTINUE7]]
; CHECK: [[PRED_SDIV_CONTINUE7]]:
-; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0
+; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3
; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_SDIV_IF8:.*]], label %[[PRED_SDIV_CONTINUE9:.*]]
; CHECK: [[PRED_SDIV_IF8]]:
; CHECK-NEXT: br label %[[PRED_SDIV_CONTINUE9]]
diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr55096-scalarize-add.ll b/llvm/test/Transforms/LoopVectorize/X86/pr55096-scalarize-add.ll
index 7e08ed43a2432..fb5a5f9c068b9 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/pr55096-scalarize-add.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/pr55096-scalarize-add.ll
@@ -18,7 +18,8 @@ define void @test_pr55096(i64 %c, ptr %p) {
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0
; CHECK-NEXT: br i1 [[TMP3]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; CHECK: pred.store.if:
-; CHECK-NEXT: [[TMP5:%.*]] = add i16 [[OFFSET_IDX]], 2008
+; CHECK-NEXT: [[TMP4:%.*]] = add i16 [[OFFSET_IDX]], 0
+; CHECK-NEXT: [[TMP5:%.*]] = add i16 [[TMP4]], 2008
; CHECK-NEXT: [[TMP6:%.*]] = udiv i16 4943, [[TMP5]]
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, ptr [[P]], i16 [[TMP6]]
; CHECK-NEXT: store i16 0, ptr [[TMP7]], align 2
diff --git a/llvm/test/Transforms/LoopVectorize/X86/small-size.ll b/llvm/test/Transforms/LoopVectorize/X86/small-size.ll
index eb9e864247303..0abea541158ac 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/small-size.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/small-size.ll
@@ -182,7 +182,7 @@ define void @example2(i32 %n, i32 %x) optsize {
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE24]]
; CHECK: pred.store.continue23:
; CHECK-NEXT: [[TMP42:%.*]] = extractelement <4 x i1> [[TMP18]], i64 3
-; CHECK-NEXT: br i1 [[TMP42]], label [[PRED_STORE_IF24:%.*]], label [[PRED_STORE_CONTINUE26]]
+; CHECK-NEXT: br i1 [[TMP42]], label [[PRED_STORE_IF25:%.*]], label [[PRED_STORE_CONTINUE26]]
; CHECK: pred.store.if24:
; CHECK-NEXT: [[TMP43:%.*]] = add i64 [[OFFSET_IDX]], 3
; CHECK-NEXT: [[TMP44:%.*]] = getelementptr inbounds [4 x i8], ptr @b, i64 [[TMP43]]
diff --git a/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll b/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll
index 28debf57d69e8..56b9ac753ce24 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll
@@ -1399,7 +1399,8 @@ define void @masked_strided2(ptr noalias nocapture readonly %p, ptr noalias noca
; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE44]]
; DISABLED_MASKED_STRIDED: pred.store.continue44:
; DISABLED_MASKED_STRIDED-NEXT: [[TMP132:%.*]] = sub <8 x i8> zeroinitializer, [[TMP99]]
-; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP100]], label [[PRED_STORE_IF45:%.*]], label [[PRED_STORE_CONTINUE46:%.*]]
+; DISABLED_MASKED_STRIDED-NEXT: [[TMP133:%.*]] = extractelement <8 x i1> [[TMP0]], i64 0
+; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP133]], label [[PRED_STORE_IF45:%.*]], label [[PRED_STORE_CONTINUE46:%.*]]
; DISABLED_MASKED_STRIDED: pred.store.if45:
; DISABLED_MASKED_STRIDED-NEXT: [[TMP134:%.*]] = extractelement <8 x i32> [[TMP50]], i64 0
; DISABLED_MASKED_STRIDED-NEXT: [[TMP135:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP134]]
@@ -1407,7 +1408,8 @@ define void @masked_strided2(ptr noalias nocapture readonly %p, ptr noalias noca
; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP136]], ptr [[TMP135]], align 1
; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE46]]
; DISABLED_MASKED_STRIDED: pred.store.continue46:
-; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP104]], label [[PRED_STORE_IF47:%.*]], label [[PRED_STORE_CONTINUE48:%.*]]
+; DISABLED_MASKED_STRIDED-NEXT: [[TMP137:%.*]] = extractelement <8 x i1> [[TMP0]], i64 1
+; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP137]], label [[PRED_STORE_IF47:%.*]], label [[PRED_STORE_CONTINUE48:%.*]]
; DISABLED_MASKED_STRIDED: pred.store.if47:
; DISABLED_MASKED_STRIDED-NEXT: [[TMP138:%.*]] = extractelement <8 x i32> [[TMP50]], i64 1
; DISABLED_MASKED_STRIDED-NEXT: [[TMP139:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP138]]
@@ -1415,7 +1417,8 @@ define void @masked_strided2(ptr noalias nocapture readonly %p, ptr noalias noca
; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP140]], ptr [[TMP139]], align 1
; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE48]]
; DISABLED_MASKED_STRIDED: pred.store.continue48:
-; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP108]], label [[PRED_STORE_IF49:%.*]], label [[PRED_STORE_CONTINUE50:%.*]]
+; DISABLED_MASKED_STRIDED-NEXT: [[TMP141:%.*]] = extractelement <8 x i1> [[TMP0]], i64 2
+; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP141]], label [[PRED_STORE_IF49:%.*]], label [[PRED_STORE_CONTINUE50:%.*]]
; DISABLED_MASKED_STRIDED: pred.store.if49:
; DISABLED_MASKED_STRIDED-NEXT: [[TMP142:%.*]] = extractelement <8 x i32> [[TMP50]], i64 2
; DISABLED_MASKED_STRIDED-NEXT: [[TMP143:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP142]]
@@ -1423,7 +1426,8 @@ define void @masked_strided2(ptr noalias nocapture readonly %p, ptr noalias noca
; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP144]], ptr [[TMP143]], align 1
; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE50]]
; DISABLED_MASKED_STRIDED: pred.store.continue50:
-; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP112]], label [[PRED_STORE_IF51:%.*]], label [[PRED_STORE_CONTINUE52:%.*]]
+; DISABLED_MASKED_STRIDED-NEXT: [[TMP145:%.*]] = extractelement <8 x i1> [[TMP0]], i64 3
+; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP145]], label [[PRED_STORE_IF51:%.*]], label [[PRED_STORE_CONTINUE52:%.*]]
; DISABLED_MASKED_STRIDED: pred.store.if51:
; DISABLED_MASKED_STRIDED-NEXT: [[TMP146:%.*]] = extractelement <8 x i32> [[TMP50]], i64 3
; DISABLED_MASKED_STRIDED-NEXT: [[TMP147:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP146]]
@@ -1431,7 +1435,8 @@ define void @masked_strided2(ptr noalias nocapture readonly %p, ptr noalias noca
; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP148]], ptr [[TMP147]], align 1
; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE52]]
; DISABLED_MASKED_STRIDED: pred.store.continue52:
-; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP116]], label [[PRED_STORE_IF53:%.*]], label [[PRED_STORE_CONTINUE54:%.*]]
+; DISABLED_MASKED_STRIDED-NEXT: [[TMP149:%.*]] = extractelement <8 x i1> [[TMP0]], i64 4
+; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP149]], label [[PRED_STORE_IF53:%.*]], label [[PRED_STORE_CONTINUE54:%.*]]
; DISABLED_MASKED_STRIDED: pred.store.if53:
; DISABLED_MASKED_STRIDED-NEXT: [[TMP150:%.*]] = extractelement <8 x i32> [[TMP50]], i64 4
; DISABLED_MASKED_STRIDED-NEXT: [[TMP151:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP150]]
@@ -1439,7 +1444,8 @@ define void @masked_strided2(ptr noalias nocapture readonly %p, ptr noalias noca
; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP152]], ptr [[TMP151]], align 1
; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE54]]
; DISABLED_MASKED_STRIDED: pred.store.continue54:
-; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP120]], label [[PRED_STORE_IF55:%.*]], label [[PRED_STORE_CONTINUE56:%.*]]
+; DISABLED_MASKED_STRIDED-NEXT: [[TMP153:%.*]] = extractelement <8 x i1> [[TMP0]], i64 5
+; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP153]], label [[PRED_STORE_IF55:%.*]], label [[PRED_STORE_CONTINUE56:%.*]]
; DISABLED_MASKED_STRIDED: pred.store.if55:
; DISABLED_MASKED_STRIDED-NEXT: [[TMP154:%.*]] = extractelement <8 x i32> [[TMP50]], i64 5
; DISABLED_MASKED_STRIDED-NEXT: [[TMP155:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP154]]
@@ -1447,7 +1453,8 @@ define void @masked_strided2(ptr noalias nocapture readonly %p, ptr noalias noca
; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP156]], ptr [[TMP155]], align 1
; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE56]]
; DISABLED_MASKED_STRIDED: pred.store.continue56:
-; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP124]], label [[PRED_STORE_IF57:%.*]], label [[PRED_STORE_CONTINUE58:%.*]]
+; DISABLED_MASKED_STRIDED-NEXT: [[TMP157:%.*]] = extractelement <8 x i1> [[TMP0]], i64 6
+; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP157]], label [[PRED_STORE_IF57:%.*]], label [[PRED_STORE_CONTINUE58:%.*]]
; DISABLED_MASKED_STRIDED: pred.store.if57:
; DISABLED_MASKED_STRIDED-NEXT: [[TMP158:%.*]] = extractelement <8 x i32> [[TMP50]], i64 6
; DISABLED_MASKED_STRIDED-NEXT: [[TMP159:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP158]]
@@ -1455,7 +1462,8 @@ define void @masked_strided2(ptr noalias nocapture readonly %p, ptr noalias noca
; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP160]], ptr [[TMP159]], align 1
; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE58]]
; DISABLED_MASKED_STRIDED: pred.store.continue58:
-; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP128]], label [[PRED_STORE_IF59:%.*]], label [[PRED_STORE_CONTINUE60]]
+; DISABLED_MASKED_STRIDED-NEXT: [[TMP161:%.*]] = extractelement <8 x i1> [[TMP0]], i64 7
+; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP161]], label [[PRED_STORE_IF59:%.*]], label [[PRED_STORE_CONTINUE60]]
; DISABLED_MASKED_STRIDED: pred.store.if59:
; DISABLED_MASKED_STRIDED-NEXT: [[TMP162:%.*]] = extractelement <8 x i32> [[TMP50]], i64 7
; DISABLED_MASKED_STRIDED-NEXT: [[TMP163:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP162]]
@@ -1796,7 +1804,8 @@ define void @masked_strided2_reverse(ptr noalias nocapture readonly %p, ptr noal
; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE44]]
; DISABLED_MASKED_STRIDED: pred.store.continue44:
; DISABLED_MASKED_STRIDED-NEXT: [[TMP132:%.*]] = sub <8 x i8> zeroinitializer, [[TMP99]]
-; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP100]], label [[PRED_STORE_IF45:%.*]], label [[PRED_STORE_CONTINUE46:%.*]]
+; DISABLED_MASKED_STRIDED-NEXT: [[TMP133:%.*]] = extractelement <8 x i1> [[TMP0]], i64 0
+; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP133]], label [[PRED_STORE_IF45:%.*]], label [[PRED_STORE_CONTINUE46:%.*]]
; DISABLED_MASKED_STRIDED: pred.store.if45:
; DISABLED_MASKED_STRIDED-NEXT: [[TMP134:%.*]] = extractelement <8 x i32> [[TMP50]], i64 0
; DISABLED_MASKED_STRIDED-NEXT: [[TMP135:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP134]]
@@ -1804,7 +1813,8 @@ define void @masked_strided2_reverse(ptr noalias nocapture readonly %p, ptr noal
; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP136]], ptr [[TMP135]], align 1
; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE46]]
; DISABLED_MASKED_STRIDED: pred.store.continue46:
-; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP104]], label [[PRED_STORE_IF47:%.*]], label [[PRED_STORE_CONTINUE48:%.*]]
+; DISABLED_MASKED_STRIDED-NEXT: [[TMP137:%.*]] = extractelement <8 x i1> [[TMP0]], i64 1
+; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP137]], label [[PRED_STORE_IF47:%.*]], label [[PRED_STORE_CONTINUE48:%.*]]
; DISABLED_MASKED_STRIDED: pred.store.if47:
; DISABLED_MASKED_STRIDED-NEXT: [[TMP138:%.*]] = extractelement <8 x i32> [[TMP50]], i64 1
; DISABLED_MASKED_STRIDED-NEXT: [[TMP139:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP138]]
@@ -1812,7 +1822,8 @@ define void @masked_strided2_reverse(ptr noalias nocapture readonly %p, ptr noal
; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP140]], ptr [[TMP139]], align 1
; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE48]]
; DISABLED_MASKED_STRIDED: pred.store.continue48:
-; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP108]], label [[PRED_STORE_IF49:%.*]], label [[PRED_STORE_CONTINUE50:%.*]]
+; DISABLED_MASKED_STRIDED-NEXT: [[TMP141:%.*]] = extractelement <8 x i1> [[TMP0]], i64 2
+; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP141]], label [[PRED_STORE_IF49:%.*]], label [[PRED_STORE_CONTINUE50:%.*]]
; DISABLED_MASKED_STRIDED: pred.store.if49:
; DISABLED_MASKED_STRIDED-NEXT: [[TMP142:%.*]] = extractelement <8 x i32> [[TMP50]], i64 2
; DISABLED_MASKED_STRIDED-NEXT: [[TMP143:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP142]]
@@ -1820,7 +1831,8 @@ define void @masked_strided2_reverse(ptr noalias nocapture readonly %p, ptr noal
; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP144]], ptr [[TMP143]], align 1
; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE50]]
; DISABLED_MASKED_STRIDED: pred.store.continue50:
-; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP112]], label [[PRED_STORE_IF51:%.*]], label [[PRED_STORE_CONTINUE52:%.*]]
+; DISABLED_MASKED_STRIDED-NEXT: [[TMP145:%.*]] = extractelement <8 x i1> [[TMP0]], i64 3
+; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP145]], label [[PRED_STORE_IF51:%.*]], label [[PRED_STORE_CONTINUE52:%.*]]
; DISABLED_MASKED_STRIDED: pred.store.if51:
; DISABLED_MASKED_STRIDED-NEXT: [[TMP146:%.*]] = extractelement <8 x i32> [[TMP50]], i64 3
; DISABLED_MASKED_STRIDED-NEXT: [[TMP147:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP146]]
@@ -1828,7 +1840,8 @@ define void @masked_strided2_reverse(ptr noalias nocapture readonly %p, ptr noal
; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP148]], ptr [[TMP147]], align 1
; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE52]]
; DISABLED_MASKED_STRIDED: pred.store.continue52:
-; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP116]], label [[PRED_STORE_IF53:%.*]], label [[PRED_STORE_CONTINUE54:%.*]]
+; DISABLED_MASKED_STRIDED-NEXT: [[TMP149:%.*]] = extractelement <8 x i1> [[TMP0]], i64 4
+; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP149]], label [[PRED_STORE_IF53:%.*]], label [[PRED_STORE_CONTINUE54:%.*]]
; DISABLED_MASKED_STRIDED: pred.store.if53:
; DISABLED_MASKED_STRIDED-NEXT: [[TMP150:%.*]] = extractelement <8 x i32> [[TMP50]], i64 4
; DISABLED_MASKED_STRIDED-NEXT: [[TMP151:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP150]]
@@ -1836,7 +1849,8 @@ define void @masked_strided2_reverse(ptr noalias nocapture readonly %p, ptr noal
; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP152]], ptr [[TMP151]], align 1
; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE54]]
; DISABLED_MASKED_STRIDED: pred.store.continue54:
-; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP120]], label [[PRED_STORE_IF55:%.*]], label [[PRED_STORE_CONTINUE56:%.*]]
+; DISABLED_MASKED_STRIDED-NEXT: [[TMP153:%.*]] = extractelement <8 x i1> [[TMP0]], i64 5
+; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP153]], label [[PRED_STORE_IF55:%.*]], label [[PRED_STORE_CONTINUE56:%.*]]
; DISABLED_MASKED_STRIDED: pred.store.if55:
; DISABLED_MASKED_STRIDED-NEXT: [[TMP154:%.*]] = extractelement <8 x i32> [[TMP50]], i64 5
; DISABLED_MASKED_STRIDED-NEXT: [[TMP155:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP154]]
@@ -1844,7 +1858,8 @@ define void @masked_strided2_reverse(ptr noalias nocapture readonly %p, ptr noal
; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP156]], ptr [[TMP155]], align 1
; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE56]]
; DISABLED_MASKED_STRIDED: pred.store.continue56:
-; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP124]], label [[PRED_STORE_IF57:%.*]], label [[PRED_STORE_CONTINUE58:%.*]]
+; DISABLED_MASKED_STRIDED-NEXT: [[TMP157:%.*]] = extractelement <8 x i1> [[TMP0]], i64 6
+; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP157]], label [[PRED_STORE_IF57:%.*]], label [[PRED_STORE_CONTINUE58:%.*]]
; DISABLED_MASKED_STRIDED: pred.store.if57:
; DISABLED_MASKED_STRIDED-NEXT: [[TMP158:%.*]] = extractelement <8 x i32> [[TMP50]], i64 6
; DISABLED_MASKED_STRIDED-NEXT: [[TMP159:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP158]]
@@ -1852,7 +1867,8 @@ define void @masked_strided2_reverse(ptr noalias nocapture readonly %p, ptr noal
; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP160]], ptr [[TMP159]], align 1
; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE58]]
; DISABLED_MASKED_STRIDED: pred.store.continue58:
-; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP128]], label [[PRED_STORE_IF59:%.*]], label [[PRED_STORE_CONTINUE60]]
+; DISABLED_MASKED_STRIDED-NEXT: [[TMP161:%.*]] = extractelement <8 x i1> [[TMP0]], i64 7
+; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP161]], label [[PRED_STORE_IF59:%.*]], label [[PRED_STORE_CONTINUE60]]
; DISABLED_MASKED_STRIDED: pred.store.if59:
; DISABLED_MASKED_STRIDED-NEXT: [[TMP162:%.*]] = extractelement <8 x i32> [[TMP50]], i64 7
; DISABLED_MASKED_STRIDED-NEXT: [[TMP163:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP162]]
@@ -2113,7 +2129,8 @@ define void @masked_strided2_reverse(ptr noalias nocapture readonly %p, ptr noal
; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE44]]
; ENABLED_MASKED_STRIDED: pred.store.continue44:
; ENABLED_MASKED_STRIDED-NEXT: [[TMP132:%.*]] = sub <8 x i8> zeroinitializer, [[TMP99]]
-; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP100]], label [[PRED_STORE_IF45:%.*]], label [[PRED_STORE_CONTINUE46:%.*]]
+; ENABLED_MASKED_STRIDED-NEXT: [[TMP133:%.*]] = extractelement <8 x i1> [[TMP0]], i64 0
+; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP133]], label [[PRED_STORE_IF45:%.*]], label [[PRED_STORE_CONTINUE46:%.*]]
; ENABLED_MASKED_STRIDED: pred.store.if45:
; ENABLED_MASKED_STRIDED-NEXT: [[TMP134:%.*]] = extractelement <8 x i32> [[TMP50]], i64 0
; ENABLED_MASKED_STRIDED-NEXT: [[TMP135:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP134]]
@@ -2121,7 +2138,8 @@ define void @masked_strided2_reverse(ptr noalias nocapture readonly %p, ptr noal
; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP136]], ptr [[TMP135]], align 1
; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE46]]
; ENABLED_MASKED_STRIDED: pred.store.continue46:
-; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP104]], label [[PRED_STORE_IF47:%.*]], label [[PRED_STORE_CONTINUE48:%.*]]
+; ENABLED_MASKED_STRIDED-NEXT: [[TMP137:%.*]] = extractelement <8 x i1> [[TMP0]], i64 1
+; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP137]], label [[PRED_STORE_IF47:%.*]], label [[PRED_STORE_CONTINUE48:%.*]]
; ENABLED_MASKED_STRIDED: pred.store.if47:
; ENABLED_MASKED_STRIDED-NEXT: [[TMP138:%.*]] = extractelement <8 x i32> [[TMP50]], i64 1
; ENABLED_MASKED_STRIDED-NEXT: [[TMP139:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP138]]
@@ -2129,7 +2147,8 @@ define void @masked_strided2_reverse(ptr noalias nocapture readonly %p, ptr noal
; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP140]], ptr [[TMP139]], align 1
; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE48]]
; ENABLED_MASKED_STRIDED: pred.store.continue48:
-; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP108]], label [[PRED_STORE_IF49:%.*]], label [[PRED_STORE_CONTINUE50:%.*]]
+; ENABLED_MASKED_STRIDED-NEXT: [[TMP141:%.*]] = extractelement <8 x i1> [[TMP0]], i64 2
+; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP141]], label [[PRED_STORE_IF49:%.*]], label [[PRED_STORE_CONTINUE50:%.*]]
; ENABLED_MASKED_STRIDED: pred.store.if49:
; ENABLED_MASKED_STRIDED-NEXT: [[TMP142:%.*]] = extractelement <8 x i32> [[TMP50]], i64 2
; ENABLED_MASKED_STRIDED-NEXT: [[TMP143:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP142]]
@@ -2137,7 +2156,8 @@ define void @masked_strided2_reverse(ptr noalias nocapture readonly %p, ptr noal
; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP144]], ptr [[TMP143]], align 1
; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE50]]
; ENABLED_MASKED_STRIDED: pred.store.continue50:
-; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP112]], label [[PRED_STORE_IF51:%.*]], label [[PRED_STORE_CONTINUE52:%.*]]
+; ENABLED_MASKED_STRIDED-NEXT: [[TMP145:%.*]] = extractelement <8 x i1> [[TMP0]], i64 3
+; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP145]], label [[PRED_STORE_IF51:%.*]], label [[PRED_STORE_CONTINUE52:%.*]]
; ENABLED_MASKED_STRIDED: pred.store.if51:
; ENABLED_MASKED_STRIDED-NEXT: [[TMP146:%.*]] = extractelement <8 x i32> [[TMP50]], i64 3
; ENABLED_MASKED_STRIDED-NEXT: [[TMP147:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP146]]
@@ -2145,7 +2165,8 @@ define void @masked_strided2_reverse(ptr noalias nocapture readonly %p, ptr noal
; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP148]], ptr [[TMP147]], align 1
; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE52]]
; ENABLED_MASKED_STRIDED: pred.store.continue52:
-; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP116]], label [[PRED_STORE_IF53:%.*]], label [[PRED_STORE_CONTINUE54:%.*]]
+; ENABLED_MASKED_STRIDED-NEXT: [[TMP149:%.*]] = extractelement <8 x i1> [[TMP0]], i64 4
+; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP149]], label [[PRED_STORE_IF53:%.*]], label [[PRED_STORE_CONTINUE54:%.*]]
; ENABLED_MASKED_STRIDED: pred.store.if53:
; ENABLED_MASKED_STRIDED-NEXT: [[TMP150:%.*]] = extractelement <8 x i32> [[TMP50]], i64 4
; ENABLED_MASKED_STRIDED-NEXT: [[TMP151:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP150]]
@@ -2153,7 +2174,8 @@ define void @masked_strided2_reverse(ptr noalias nocapture readonly %p, ptr noal
; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP152]], ptr [[TMP151]], align 1
; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE54]]
; ENABLED_MASKED_STRIDED: pred.store.continue54:
-; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP120]], label [[PRED_STORE_IF55:%.*]], label [[PRED_STORE_CONTINUE56:%.*]]
+; ENABLED_MASKED_STRIDED-NEXT: [[TMP153:%.*]] = extractelement <8 x i1> [[TMP0]], i64 5
+; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP153]], label [[PRED_STORE_IF55:%.*]], label [[PRED_STORE_CONTINUE56:%.*]]
; ENABLED_MASKED_STRIDED: pred.store.if55:
; ENABLED_MASKED_STRIDED-NEXT: [[TMP154:%.*]] = extractelement <8 x i32> [[TMP50]], i64 5
; ENABLED_MASKED_STRIDED-NEXT: [[TMP155:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP154]]
@@ -2161,7 +2183,8 @@ define void @masked_strided2_reverse(ptr noalias nocapture readonly %p, ptr noal
; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP156]], ptr [[TMP155]], align 1
; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE56]]
; ENABLED_MASKED_STRIDED: pred.store.continue56:
-; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP124]], label [[PRED_STORE_IF57:%.*]], label [[PRED_STORE_CONTINUE58:%.*]]
+; ENABLED_MASKED_STRIDED-NEXT: [[TMP157:%.*]] = extractelement <8 x i1> [[TMP0]], i64 6
+; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP157]], label [[PRED_STORE_IF57:%.*]], label [[PRED_STORE_CONTINUE58:%.*]]
; ENABLED_MASKED_STRIDED: pred.store.if57:
; ENABLED_MASKED_STRIDED-NEXT: [[TMP158:%.*]] = extractelement <8 x i32> [[TMP50]], i64 6
; ENABLED_MASKED_STRIDED-NEXT: [[TMP159:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP158]]
@@ -2169,7 +2192,8 @@ define void @masked_strided2_reverse(ptr noalias nocapture readonly %p, ptr noal
; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP160]], ptr [[TMP159]], align 1
; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE58]]
; ENABLED_MASKED_STRIDED: pred.store.continue58:
-; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP128]], label [[PRED_STORE_IF59:%.*]], label [[PRED_STORE_CONTINUE60]]
+; ENABLED_MASKED_STRIDED-NEXT: [[TMP161:%.*]] = extractelement <8 x i1> [[TMP0]], i64 7
+; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP161]], label [[PRED_STORE_IF59:%.*]], label [[PRED_STORE_CONTINUE60]]
; ENABLED_MASKED_STRIDED: pred.store.if59:
; ENABLED_MASKED_STRIDED-NEXT: [[TMP162:%.*]] = extractelement <8 x i32> [[TMP50]], i64 7
; ENABLED_MASKED_STRIDED-NEXT: [[TMP163:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP162]]
@@ -2500,7 +2524,8 @@ define void @masked_strided2_unknown_tc(ptr noalias nocapture readonly %p, ptr n
; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE46]]
; DISABLED_MASKED_STRIDED: pred.store.continue46:
; DISABLED_MASKED_STRIDED-NEXT: [[TMP134:%.*]] = sub <8 x i8> zeroinitializer, [[TMP101]]
-; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP102]], label [[PRED_STORE_IF47:%.*]], label [[PRED_STORE_CONTINUE48:%.*]]
+; DISABLED_MASKED_STRIDED-NEXT: [[TMP135:%.*]] = extractelement <8 x i1> [[TMP3]], i64 0
+; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP135]], label [[PRED_STORE_IF47:%.*]], label [[PRED_STORE_CONTINUE48:%.*]]
; DISABLED_MASKED_STRIDED: pred.store.if47:
; DISABLED_MASKED_STRIDED-NEXT: [[TMP136:%.*]] = extractelement <8 x i32> [[TMP52]], i64 0
; DISABLED_MASKED_STRIDED-NEXT: [[TMP137:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP136]]
@@ -2508,7 +2533,8 @@ define void @masked_strided2_unknown_tc(ptr noalias nocapture readonly %p, ptr n
; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP138]], ptr [[TMP137]], align 1
; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE48]]
; DISABLED_MASKED_STRIDED: pred.store.continue48:
-; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP106]], label [[PRED_STORE_IF49:%.*]], label [[PRED_STORE_CONTINUE50:%.*]]
+; DISABLED_MASKED_STRIDED-NEXT: [[TMP139:%.*]] = extractelement <8 x i1> [[TMP3]], i64 1
+; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP139]], label [[PRED_STORE_IF49:%.*]], label [[PRED_STORE_CONTINUE50:%.*]]
; DISABLED_MASKED_STRIDED: pred.store.if49:
; DISABLED_MASKED_STRIDED-NEXT: [[TMP140:%.*]] = extractelement <8 x i32> [[TMP52]], i64 1
; DISABLED_MASKED_STRIDED-NEXT: [[TMP141:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP140]]
@@ -2516,7 +2542,8 @@ define void @masked_strided2_unknown_tc(ptr noalias nocapture readonly %p, ptr n
; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP142]], ptr [[TMP141]], align 1
; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE50]]
; DISABLED_MASKED_STRIDED: pred.store.continue50:
-; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP110]], label [[PRED_STORE_IF51:%.*]], label [[PRED_STORE_CONTINUE52:%.*]]
+; DISABLED_MASKED_STRIDED-NEXT: [[TMP143:%.*]] = extractelement <8 x i1> [[TMP3]], i64 2
+; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP143]], label [[PRED_STORE_IF51:%.*]], label [[PRED_STORE_CONTINUE52:%.*]]
; DISABLED_MASKED_STRIDED: pred.store.if51:
; DISABLED_MASKED_STRIDED-NEXT: [[TMP144:%.*]] = extractelement <8 x i32> [[TMP52]], i64 2
; DISABLED_MASKED_STRIDED-NEXT: [[TMP145:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP144]]
@@ -2524,7 +2551,8 @@ define void @masked_strided2_unknown_tc(ptr noalias nocapture readonly %p, ptr n
; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP146]], ptr [[TMP145]], align 1
; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE52]]
; DISABLED_MASKED_STRIDED: pred.store.continue52:
-; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP114]], label [[PRED_STORE_IF53:%.*]], label [[PRED_STORE_CONTINUE54:%.*]]
+; DISABLED_MASKED_STRIDED-NEXT: [[TMP147:%.*]] = extractelement <8 x i1> [[TMP3]], i64 3
+; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP147]], label [[PRED_STORE_IF53:%.*]], label [[PRED_STORE_CONTINUE54:%.*]]
; DISABLED_MASKED_STRIDED: pred.store.if53:
; DISABLED_MASKED_STRIDED-NEXT: [[TMP148:%.*]] = extractelement <8 x i32> [[TMP52]], i64 3
; DISABLED_MASKED_STRIDED-NEXT: [[TMP149:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP148]]
@@ -2532,7 +2560,8 @@ define void @masked_strided2_unknown_tc(ptr noalias nocapture readonly %p, ptr n
; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP150]], ptr [[TMP149]], align 1
; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE54]]
; DISABLED_MASKED_STRIDED: pred.store.continue54:
-; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP118]], label [[PRED_STORE_IF55:%.*]], label [[PRED_STORE_CONTINUE56:%.*]]
+; DISABLED_MASKED_STRIDED-NEXT: [[TMP151:%.*]] = extractelement <8 x i1> [[TMP3]], i64 4
+; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP151]], label [[PRED_STORE_IF55:%.*]], label [[PRED_STORE_CONTINUE56:%.*]]
; DISABLED_MASKED_STRIDED: pred.store.if55:
; DISABLED_MASKED_STRIDED-NEXT: [[TMP152:%.*]] = extractelement <8 x i32> [[TMP52]], i64 4
; DISABLED_MASKED_STRIDED-NEXT: [[TMP153:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP152]]
@@ -2540,7 +2569,8 @@ define void @masked_strided2_unknown_tc(ptr noalias nocapture readonly %p, ptr n
; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP154]], ptr [[TMP153]], align 1
; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE56]]
; DISABLED_MASKED_STRIDED: pred.store.continue56:
-; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP122]], label [[PRED_STORE_IF57:%.*]], label [[PRED_STORE_CONTINUE58:%.*]]
+; DISABLED_MASKED_STRIDED-NEXT: [[TMP155:%.*]] = extractelement <8 x i1> [[TMP3]], i64 5
+; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP155]], label [[PRED_STORE_IF57:%.*]], label [[PRED_STORE_CONTINUE58:%.*]]
; DISABLED_MASKED_STRIDED: pred.store.if57:
; DISABLED_MASKED_STRIDED-NEXT: [[TMP156:%.*]] = extractelement <8 x i32> [[TMP52]], i64 5
; DISABLED_MASKED_STRIDED-NEXT: [[TMP157:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP156]]
@@ -2548,7 +2578,8 @@ define void @masked_strided2_unknown_tc(ptr noalias nocapture readonly %p, ptr n
; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP158]], ptr [[TMP157]], align 1
; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE58]]
; DISABLED_MASKED_STRIDED: pred.store.continue58:
-; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP126]], label [[PRED_STORE_IF59:%.*]], label [[PRED_STORE_CONTINUE60:%.*]]
+; DISABLED_MASKED_STRIDED-NEXT: [[TMP159:%.*]] = extractelement <8 x i1> [[TMP3]], i64 6
+; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP159]], label [[PRED_STORE_IF59:%.*]], label [[PRED_STORE_CONTINUE60:%.*]]
; DISABLED_MASKED_STRIDED: pred.store.if59:
; DISABLED_MASKED_STRIDED-NEXT: [[TMP160:%.*]] = extractelement <8 x i32> [[TMP52]], i64 6
; DISABLED_MASKED_STRIDED-NEXT: [[TMP161:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP160]]
@@ -2556,7 +2587,8 @@ define void @masked_strided2_unknown_tc(ptr noalias nocapture readonly %p, ptr n
; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP162]], ptr [[TMP161]], align 1
; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE60]]
; DISABLED_MASKED_STRIDED: pred.store.continue60:
-; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP130]], label [[PRED_STORE_IF61:%.*]], label [[PRED_STORE_CONTINUE62]]
+; DISABLED_MASKED_STRIDED-NEXT: [[TMP163:%.*]] = extractelement <8 x i1> [[TMP3]], i64 7
+; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP163]], label [[PRED_STORE_IF61:%.*]], label [[PRED_STORE_CONTINUE62]]
; DISABLED_MASKED_STRIDED: pred.store.if61:
; DISABLED_MASKED_STRIDED-NEXT: [[TMP164:%.*]] = extractelement <8 x i32> [[TMP52]], i64 7
; DISABLED_MASKED_STRIDED-NEXT: [[TMP165:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP164]]
@@ -2926,7 +2958,8 @@ define void @unconditional_masked_strided2_unknown_tc(ptr noalias nocapture read
; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE44]]
; DISABLED_MASKED_STRIDED: pred.store.continue44:
; DISABLED_MASKED_STRIDED-NEXT: [[TMP132:%.*]] = sub <8 x i8> zeroinitializer, [[TMP99]]
-; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP100]], label [[PRED_STORE_IF45:%.*]], label [[PRED_STORE_CONTINUE46:%.*]]
+; DISABLED_MASKED_STRIDED-NEXT: [[TMP133:%.*]] = extractelement <8 x i1> [[TMP0]], i64 0
+; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP133]], label [[PRED_STORE_IF45:%.*]], label [[PRED_STORE_CONTINUE46:%.*]]
; DISABLED_MASKED_STRIDED: pred.store.if45:
; DISABLED_MASKED_STRIDED-NEXT: [[TMP134:%.*]] = extractelement <8 x i32> [[TMP50]], i64 0
; DISABLED_MASKED_STRIDED-NEXT: [[TMP135:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP134]]
@@ -2934,7 +2967,8 @@ define void @unconditional_masked_strided2_unknown_tc(ptr noalias nocapture read
; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP136]], ptr [[TMP135]], align 1
; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE46]]
; DISABLED_MASKED_STRIDED: pred.store.continue46:
-; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP104]], label [[PRED_STORE_IF47:%.*]], label [[PRED_STORE_CONTINUE48:%.*]]
+; DISABLED_MASKED_STRIDED-NEXT: [[TMP137:%.*]] = extractelement <8 x i1> [[TMP0]], i64 1
+; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP137]], label [[PRED_STORE_IF47:%.*]], label [[PRED_STORE_CONTINUE48:%.*]]
; DISABLED_MASKED_STRIDED: pred.store.if47:
; DISABLED_MASKED_STRIDED-NEXT: [[TMP138:%.*]] = extractelement <8 x i32> [[TMP50]], i64 1
; DISABLED_MASKED_STRIDED-NEXT: [[TMP139:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP138]]
@@ -2942,7 +2976,8 @@ define void @unconditional_masked_strided2_unknown_tc(ptr noalias nocapture read
; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP140]], ptr [[TMP139]], align 1
; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE48]]
; DISABLED_MASKED_STRIDED: pred.store.continue48:
-; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP108]], label [[PRED_STORE_IF49:%.*]], label [[PRED_STORE_CONTINUE50:%.*]]
+; DISABLED_MASKED_STRIDED-NEXT: [[TMP141:%.*]] = extractelement <8 x i1> [[TMP0]], i64 2
+; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP141]], label [[PRED_STORE_IF49:%.*]], label [[PRED_STORE_CONTINUE50:%.*]]
; DISABLED_MASKED_STRIDED: pred.store.if49:
; DISABLED_MASKED_STRIDED-NEXT: [[TMP142:%.*]] = extractelement <8 x i32> [[TMP50]], i64 2
; DISABLED_MASKED_STRIDED-NEXT: [[TMP143:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP142]]
@@ -2950,7 +2985,8 @@ define void @unconditional_masked_strided2_unknown_tc(ptr noalias nocapture read
; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP144]], ptr [[TMP143]], align 1
; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE50]]
; DISABLED_MASKED_STRIDED: pred.store.continue50:
-; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP112]], label [[PRED_STORE_IF51:%.*]], label [[PRED_STORE_CONTINUE52:%.*]]
+; DISABLED_MASKED_STRIDED-NEXT: [[TMP145:%.*]] = extractelement <8 x i1> [[TMP0]], i64 3
+; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP145]], label [[PRED_STORE_IF51:%.*]], label [[PRED_STORE_CONTINUE52:%.*]]
; DISABLED_MASKED_STRIDED: pred.store.if51:
; DISABLED_MASKED_STRIDED-NEXT: [[TMP146:%.*]] = extractelement <8 x i32> [[TMP50]], i64 3
; DISABLED_MASKED_STRIDED-NEXT: [[TMP147:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP146]]
@@ -2958,7 +2994,8 @@ define void @unconditional_masked_strided2_unknown_tc(ptr noalias nocapture read
; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP148]], ptr [[TMP147]], align 1
; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE52]]
; DISABLED_MASKED_STRIDED: pred.store.continue52:
-; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP116]], label [[PRED_STORE_IF53:%.*]], label [[PRED_STORE_CONTINUE54:%.*]]
+; DISABLED_MASKED_STRIDED-NEXT: [[TMP149:%.*]] = extractelement <8 x i1> [[TMP0]], i64 4
+; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP149]], label [[PRED_STORE_IF53:%.*]], label [[PRED_STORE_CONTINUE54:%.*]]
; DISABLED_MASKED_STRIDED: pred.store.if53:
; DISABLED_MASKED_STRIDED-NEXT: [[TMP150:%.*]] = extractelement <8 x i32> [[TMP50]], i64 4
; DISABLED_MASKED_STRIDED-NEXT: [[TMP151:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP150]]
@@ -2966,7 +3003,8 @@ define void @unconditional_masked_strided2_unknown_tc(ptr noalias nocapture read
; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP152]], ptr [[TMP151]], align 1
; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE54]]
; DISABLED_MASKED_STRIDED: pred.store.continue54:
-; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP120]], label [[PRED_STORE_IF55:%.*]], label [[PRED_STORE_CONTINUE56:%.*]]
+; DISABLED_MASKED_STRIDED-NEXT: [[TMP153:%.*]] = extractelement <8 x i1> [[TMP0]], i64 5
+; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP153]], label [[PRED_STORE_IF55:%.*]], label [[PRED_STORE_CONTINUE56:%.*]]
; DISABLED_MASKED_STRIDED: pred.store.if55:
; DISABLED_MASKED_STRIDED-NEXT: [[TMP154:%.*]] = extractelement <8 x i32> [[TMP50]], i64 5
; DISABLED_MASKED_STRIDED-NEXT: [[TMP155:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP154]]
@@ -2974,7 +3012,8 @@ define void @unconditional_masked_strided2_unknown_tc(ptr noalias nocapture read
; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP156]], ptr [[TMP155]], align 1
; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE56]]
; DISABLED_MASKED_STRIDED: pred.store.continue56:
-; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP124]], label [[PRED_STORE_IF57:%.*]], label [[PRED_STORE_CONTINUE58:%.*]]
+; DISABLED_MASKED_STRIDED-NEXT: [[TMP157:%.*]] = extractelement <8 x i1> [[TMP0]], i64 6
+; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP157]], label [[PRED_STORE_IF57:%.*]], label [[PRED_STORE_CONTINUE58:%.*]]
; DISABLED_MASKED_STRIDED: pred.store.if57:
; DISABLED_MASKED_STRIDED-NEXT: [[TMP158:%.*]] = extractelement <8 x i32> [[TMP50]], i64 6
; DISABLED_MASKED_STRIDED-NEXT: [[TMP159:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP158]]
@@ -2982,7 +3021,8 @@ define void @unconditional_masked_strided2_unknown_tc(ptr noalias nocapture read
; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP160]], ptr [[TMP159]], align 1
; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE58]]
; DISABLED_MASKED_STRIDED: pred.store.continue58:
-; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP128]], label [[PRED_STORE_IF59:%.*]], label [[PRED_STORE_CONTINUE60]]
+; DISABLED_MASKED_STRIDED-NEXT: [[TMP161:%.*]] = extractelement <8 x i1> [[TMP0]], i64 7
+; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP161]], label [[PRED_STORE_IF59:%.*]], label [[PRED_STORE_CONTINUE60]]
; DISABLED_MASKED_STRIDED: pred.store.if59:
; DISABLED_MASKED_STRIDED-NEXT: [[TMP162:%.*]] = extractelement <8 x i32> [[TMP50]], i64 7
; DISABLED_MASKED_STRIDED-NEXT: [[TMP163:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP162]]
diff --git a/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-store-accesses-with-gaps.ll b/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-store-accesses-with-gaps.ll
index d1cd631f859d0..66a1bd31ac7d3 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-store-accesses-with-gaps.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-store-accesses-with-gaps.ll
@@ -181,7 +181,8 @@ define void @test2(ptr noalias nocapture %points, i32 %numPoints, ptr noalias no
; DISABLED_MASKED_STRIDED-NEXT: [[TMP19:%.*]] = getelementptr inbounds [2 x i8], ptr [[Y:%.*]], i64 [[INDEX]]
; DISABLED_MASKED_STRIDED-NEXT: [[WIDE_MASKED_LOAD7:%.*]] = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr align 2 [[TMP19]], <4 x i1> [[TMP0]], <4 x i16> poison)
; DISABLED_MASKED_STRIDED-NEXT: [[TMP20:%.*]] = or disjoint <4 x i64> [[TMP2]], splat (i64 1)
-; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP3]], label [[PRED_STORE_IF8:%.*]], label [[PRED_STORE_CONTINUE9:%.*]]
+; DISABLED_MASKED_STRIDED-NEXT: [[TMP21:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
+; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP21]], label [[PRED_STORE_IF8:%.*]], label [[PRED_STORE_CONTINUE9:%.*]]
; DISABLED_MASKED_STRIDED: pred.store.if8:
; DISABLED_MASKED_STRIDED-NEXT: [[TMP22:%.*]] = extractelement <4 x i64> [[TMP20]], i64 0
; DISABLED_MASKED_STRIDED-NEXT: [[TMP23:%.*]] = getelementptr inbounds [2 x i8], ptr [[POINTS]], i64 [[TMP22]]
@@ -189,7 +190,8 @@ define void @test2(ptr noalias nocapture %points, i32 %numPoints, ptr noalias no
; DISABLED_MASKED_STRIDED-NEXT: store i16 [[TMP24]], ptr [[TMP23]], align 2
; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE9]]
; DISABLED_MASKED_STRIDED: pred.store.continue9:
-; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP7]], label [[PRED_STORE_IF10:%.*]], label [[PRED_STORE_CONTINUE11:%.*]]
+; DISABLED_MASKED_STRIDED-NEXT: [[TMP25:%.*]] = extractelement <4 x i1> [[TMP0]], i64 1
+; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP25]], label [[PRED_STORE_IF10:%.*]], label [[PRED_STORE_CONTINUE11:%.*]]
; DISABLED_MASKED_STRIDED: pred.store.if10:
; DISABLED_MASKED_STRIDED-NEXT: [[TMP26:%.*]] = extractelement <4 x i64> [[TMP20]], i64 1
; DISABLED_MASKED_STRIDED-NEXT: [[TMP27:%.*]] = getelementptr inbounds [2 x i8], ptr [[POINTS]], i64 [[TMP26]]
@@ -197,7 +199,8 @@ define void @test2(ptr noalias nocapture %points, i32 %numPoints, ptr noalias no
; DISABLED_MASKED_STRIDED-NEXT: store i16 [[TMP28]], ptr [[TMP27]], align 2
; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE11]]
; DISABLED_MASKED_STRIDED: pred.store.continue11:
-; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP11]], label [[PRED_STORE_IF12:%.*]], label [[PRED_STORE_CONTINUE13:%.*]]
+; DISABLED_MASKED_STRIDED-NEXT: [[TMP29:%.*]] = extractelement <4 x i1> [[TMP0]], i64 2
+; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP29]], label [[PRED_STORE_IF12:%.*]], label [[PRED_STORE_CONTINUE13:%.*]]
; DISABLED_MASKED_STRIDED: pred.store.if12:
; DISABLED_MASKED_STRIDED-NEXT: [[TMP30:%.*]] = extractelement <4 x i64> [[TMP20]], i64 2
; DISABLED_MASKED_STRIDED-NEXT: [[TMP31:%.*]] = getelementptr inbounds [2 x i8], ptr [[POINTS]], i64 [[TMP30]]
@@ -205,7 +208,8 @@ define void @test2(ptr noalias nocapture %points, i32 %numPoints, ptr noalias no
; DISABLED_MASKED_STRIDED-NEXT: store i16 [[TMP32]], ptr [[TMP31]], align 2
; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE13]]
; DISABLED_MASKED_STRIDED: pred.store.continue13:
-; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP15]], label [[PRED_STORE_IF14:%.*]], label [[PRED_STORE_CONTINUE15]]
+; DISABLED_MASKED_STRIDED-NEXT: [[TMP33:%.*]] = extractelement <4 x i1> [[TMP0]], i64 3
+; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP33]], label [[PRED_STORE_IF14:%.*]], label [[PRED_STORE_CONTINUE15]]
; DISABLED_MASKED_STRIDED: pred.store.if14:
; DISABLED_MASKED_STRIDED-NEXT: [[TMP34:%.*]] = extractelement <4 x i64> [[TMP20]], i64 3
; DISABLED_MASKED_STRIDED-NEXT: [[TMP35:%.*]] = getelementptr inbounds [2 x i8], ptr [[POINTS]], i64 [[TMP34]]
diff --git a/llvm/test/Transforms/LoopVectorize/as_cast.ll b/llvm/test/Transforms/LoopVectorize/as_cast.ll
index 2437bcfde08f9..67aacefebd555 100644
--- a/llvm/test/Transforms/LoopVectorize/as_cast.ll
+++ b/llvm/test/Transforms/LoopVectorize/as_cast.ll
@@ -22,7 +22,7 @@ loop:
; CHECK: [[ID2:%.*]] = add i64 %{{.*}}, 1
; CHECK: [[AS2:%.*]] = addrspacecast ptr addrspace(1) %in to ptr
; CHECK: [[GEP2:%.*]] = getelementptr inbounds i64, ptr [[AS2]], i64 [[ID2]]
-; CHECK: store i64 [[ID2]], ptr [[GEP2]], align 4
+; CHECK: store i64 [[ID2]], ptr %9, align 4
%cmp = icmp eq i64 %next, 7
br i1 %cmp, label %exit, label %loop
diff --git a/llvm/test/Transforms/LoopVectorize/cast-induction.ll b/llvm/test/Transforms/LoopVectorize/cast-induction.ll
index e93b84c2f5603..2764a61728fe3 100644
--- a/llvm/test/Transforms/LoopVectorize/cast-induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/cast-induction.ll
@@ -273,15 +273,15 @@ define void @cast_induction_tail_folding(ptr %A) {
; IC2-NEXT: br label %[[VECTOR_BODY:.*]]
; IC2: [[VECTOR_BODY]]:
; IC2-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE2:.*]] ]
-; IC2-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0
+; IC2-NEXT: [[INDEX0:%.*]] = add i32 [[INDEX]], 0
; IC2-NEXT: [[INDEX1:%.*]] = add i32 [[INDEX]], 1
-; IC2-NEXT: [[TMP2:%.*]] = icmp ule i32 [[TMP0]], 2
+; IC2-NEXT: [[TMP2:%.*]] = icmp ule i32 [[INDEX0]], 2
; IC2-NEXT: [[TMP3:%.*]] = icmp ule i32 [[INDEX1]], 2
; IC2-NEXT: br i1 [[TMP2]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; IC2: [[PRED_STORE_IF]]:
-; IC2-NEXT: [[TMP4:%.*]] = sext i32 [[TMP0]] to i64
+; IC2-NEXT: [[TMP4:%.*]] = sext i32 [[INDEX0]] to i64
; IC2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP4]]
-; IC2-NEXT: store i32 [[TMP0]], ptr [[TMP5]], align 4
+; IC2-NEXT: store i32 [[INDEX0]], ptr [[TMP5]], align 4
; IC2-NEXT: br label %[[PRED_STORE_CONTINUE]]
; IC2: [[PRED_STORE_CONTINUE]]:
; IC2-NEXT: br i1 [[TMP3]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2]]
diff --git a/llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll b/llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll
index fe561d7a53629..5ef3885cc8c69 100644
--- a/llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll
+++ b/llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll
@@ -546,6 +546,10 @@ define void @predicated_store(ptr %p, i32 %x, i64 %n) {
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP1]], i32 0
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP12]], i32 0
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP3]], i32 0
+; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP0]], i32 0
+; CHECK-NEXT: [[TMP19:%.*]] = insertelement <4 x ptr> [[TMP13]], ptr [[TMP2]], i32 1
+; CHECK-NEXT: [[TMP25:%.*]] = insertelement <4 x ptr> [[TMP19]], ptr [[TMP4]], i32 2
+; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x ptr> [[TMP25]], ptr [[TMP6]], i32 3
; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP0]], align 8
; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP2]], align 8
; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP4]], align 8
@@ -627,8 +631,10 @@ define void @predicated_store(ptr %p, i32 %x, i64 %n) {
; INTER-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP3]], i32 0
; INTER-NEXT: br i1 [[TMP4]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; INTER: [[PRED_STORE_IF]]:
+; INTER-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 0
+; INTER-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP7]], i32 0
; INTER-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[STRIDED_VEC]], i32 0
-; INTER-NEXT: store i32 [[TMP6]], ptr [[TMP2]], align 8
+; INTER-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 8
; INTER-NEXT: br label %[[PRED_STORE_CONTINUE]]
; INTER: [[PRED_STORE_CONTINUE]]:
; INTER-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP3]], i32 1
diff --git a/llvm/test/Transforms/LoopVectorize/constant-fold-commutative-and.ll b/llvm/test/Transforms/LoopVectorize/constant-fold-commutative-and.ll
index c3edba03885b1..97c54fa5a06c2 100644
--- a/llvm/test/Transforms/LoopVectorize/constant-fold-commutative-and.ll
+++ b/llvm/test/Transforms/LoopVectorize/constant-fold-commutative-and.ll
@@ -30,7 +30,8 @@ define void @constant_fold_commutative_and(ptr %ptr.n, ptr noalias %p, i1 %cond)
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0
; CHECK-NEXT: br i1 [[TMP6]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; CHECK: [[PRED_STORE_IF]]:
-; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[P]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[P]], i64 [[TMP7]]
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[PREDPHI3]], i32 0
; CHECK-NEXT: store i1 [[TMP9]], ptr [[TMP8]], align 1
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
diff --git a/llvm/test/Transforms/LoopVectorize/constantfolder.ll b/llvm/test/Transforms/LoopVectorize/constantfolder.ll
index 047a6f48837b8..0cc8db02e5c6d 100644
--- a/llvm/test/Transforms/LoopVectorize/constantfolder.ll
+++ b/llvm/test/Transforms/LoopVectorize/constantfolder.ll
@@ -401,8 +401,9 @@ define void @unused_live_ins_in_preheader(ptr %dst, ptr %src) {
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0
; CHECK-NEXT: br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; CHECK: [[PRED_STORE_IF]]:
-; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[DST]], i32 [[INDEX]]
-; CHECK-NEXT: store i32 [[INDEX]], ptr [[TMP3]], align 8
+; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 0
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[DST]], i32 [[TMP2]]
+; CHECK-NEXT: store i32 [[TMP2]], ptr [[TMP3]], align 8
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
; CHECK: [[PRED_STORE_CONTINUE]]:
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1
diff --git a/llvm/test/Transforms/LoopVectorize/cse-casts.ll b/llvm/test/Transforms/LoopVectorize/cse-casts.ll
index 19abb4016b498..1b5cb2dc88b0d 100644
--- a/llvm/test/Transforms/LoopVectorize/cse-casts.ll
+++ b/llvm/test/Transforms/LoopVectorize/cse-casts.ll
@@ -131,8 +131,9 @@ define void @preserve_flags_narrowing_extends_and_truncs(ptr noalias %A, ptr noa
; CHECK-NEXT: [[TMP33:%.*]] = zext <4 x i8> [[TMP31]] to <4 x i64>
; CHECK-NEXT: br i1 true, label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; CHECK: [[PRED_STORE_IF]]:
+; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds i8, ptr [[C]], i64 0
; CHECK-NEXT: [[TMP35:%.*]] = extractelement <4 x i64> [[TMP32]], i32 0
-; CHECK-NEXT: store i64 [[TMP35]], ptr [[C]], align 4
+; CHECK-NEXT: store i64 [[TMP35]], ptr [[TMP34]], align 4
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
; CHECK: [[PRED_STORE_CONTINUE]]:
; CHECK-NEXT: br i1 true, label %[[PRED_STORE_IF15:.*]], label %[[PRED_STORE_CONTINUE16:.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/debugloc.ll b/llvm/test/Transforms/LoopVectorize/debugloc.ll
index 15295ad5f0bd0..4b364133dfefe 100644
--- a/llvm/test/Transforms/LoopVectorize/debugloc.ll
+++ b/llvm/test/Transforms/LoopVectorize/debugloc.ll
@@ -66,7 +66,8 @@ define i32 @test_debug_loc_on_branch_in_loop(ptr noalias %src, ptr noalias %dst)
; CHECK-NEXT: br i1 [[EXT]], label %pred.store.if, label %pred.store.continue, !dbg [[LOC3]]
; CHECK-EMPTY:
; CHECK-NEXT: pred.store.if:
-; CHECK-NEXT: [[GEP:%.+]] = getelementptr inbounds i32, ptr %dst, i64 %index
+; CHECK-NEXT: [[IDX:%.+]] = add i64 %index, 0
+; CHECK-NEXT: [[GEP:%.+]] = getelementptr inbounds i32, ptr %dst, i64 [[IDX]]
; CHECK-NEXT: store i32 0, ptr [[GEP]], align 4
; CHECK-NEXT: br label %pred.store.continue, !dbg [[LOC3]]
; CHECK-EMPTY:
@@ -104,7 +105,8 @@ define i32 @test_
diff erent_debug_loc_on_replicate_recipe(ptr noalias %src, ptr n
; CHECK-NEXT: br i1 [[EXT]], label %pred.store.if, label %pred.store.continue, !dbg [[LOC4]]
; CHECK-EMPTY:
; CHECK-NEXT: pred.store.if:
-; CHECK-NEXT: [[GEP:%.+]] = getelementptr inbounds i32, ptr %dst, i64 %index, !dbg [[LOC5:!.+]]
+; CHECK-NEXT: [[IDX:%.+]] = add i64 %index, 0
+; CHECK-NEXT: [[GEP:%.+]] = getelementptr inbounds i32, ptr %dst, i64 [[IDX]], !dbg [[LOC5:!.+]]
; CHECK-NEXT: store i32 0, ptr [[GEP]], align 4
; CHECK-NEXT: br label %pred.store.continue, !dbg [[LOC4]]
; CHECK-EMPTY:
diff --git a/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-divisible-TC.ll b/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-divisible-TC.ll
index d43d35aec6524..f089e2d5611b6 100644
--- a/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-divisible-TC.ll
+++ b/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-divisible-TC.ll
@@ -164,7 +164,8 @@ define void @cannotProveAlignedTC(ptr noalias nocapture %A, i32 %p, i32 %q) opts
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0
; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; CHECK: pred.store.if:
-; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[INDEX]]
+; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 0
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[TMP2]]
; CHECK-NEXT: store i32 13, ptr [[TMP3]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
; CHECK: pred.store.continue:
diff --git a/llvm/test/Transforms/LoopVectorize/find-last-iv-sinkable-load.ll b/llvm/test/Transforms/LoopVectorize/find-last-iv-sinkable-load.ll
index ba8d05c9e2c01..5d78d7b2e320d 100644
--- a/llvm/test/Transforms/LoopVectorize/find-last-iv-sinkable-load.ll
+++ b/llvm/test/Transforms/LoopVectorize/find-last-iv-sinkable-load.ll
@@ -635,7 +635,9 @@ define i64 @findlast_load_cond_store_other_block(ptr noalias %a, ptr %b, i64 %n)
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP8]], i32 0
; CHECK-NEXT: br i1 [[TMP9]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; CHECK: [[PRED_STORE_IF]]:
-; CHECK-NEXT: store i64 0, ptr [[TMP3]], align 8
+; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP10]]
+; CHECK-NEXT: store i64 0, ptr [[TMP11]], align 8
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
; CHECK: [[PRED_STORE_CONTINUE]]:
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i1> [[TMP8]], i32 1
diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll
index 9b7dde4650443..abb50538fb2e8 100644
--- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll
+++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll
@@ -2941,7 +2941,8 @@ define i32 @sink_into_replication_region_multiple(ptr %x, i32 %y) {
; UNROLL-NO-IC-NEXT: [[TMP48:%.*]] = extractelement <4 x i1> [[TMP10]], i32 0
; UNROLL-NO-IC-NEXT: br i1 [[TMP48]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; UNROLL-NO-IC: pred.store.if:
-; UNROLL-NO-IC-NEXT: [[TMP50:%.*]] = getelementptr inbounds i32, ptr [[X:%.*]], i32 [[INDEX]]
+; UNROLL-NO-IC-NEXT: [[TMP49:%.*]] = add i32 [[INDEX]], 0
+; UNROLL-NO-IC-NEXT: [[TMP50:%.*]] = getelementptr inbounds i32, ptr [[X:%.*]], i32 [[TMP49]]
; UNROLL-NO-IC-NEXT: store i32 [[OFFSET_IDX]], ptr [[TMP50]], align 4
; UNROLL-NO-IC-NEXT: br label [[PRED_STORE_CONTINUE]]
; UNROLL-NO-IC: pred.store.continue:
@@ -3139,7 +3140,8 @@ define i32 @sink_into_replication_region_multiple(ptr %x, i32 %y) {
; SINK-AFTER-NEXT: [[TMP25:%.*]] = extractelement <4 x i1> [[TMP6]], i32 0
; SINK-AFTER-NEXT: br i1 [[TMP25]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; SINK-AFTER: pred.store.if:
-; SINK-AFTER-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[X:%.*]], i32 [[INDEX]]
+; SINK-AFTER-NEXT: [[TMP26:%.*]] = add i32 [[INDEX]], 0
+; SINK-AFTER-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[X:%.*]], i32 [[TMP26]]
; SINK-AFTER-NEXT: store i32 [[OFFSET_IDX]], ptr [[TMP27]], align 4
; SINK-AFTER-NEXT: br label [[PRED_STORE_CONTINUE]]
; SINK-AFTER: pred.store.continue:
diff --git a/llvm/test/Transforms/LoopVectorize/float-induction.ll b/llvm/test/Transforms/LoopVectorize/float-induction.ll
index 2dd234bc8c3d0..4ed912b8054ca 100644
--- a/llvm/test/Transforms/LoopVectorize/float-induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/float-induction.ll
@@ -1322,7 +1322,8 @@ define void @non_primary_iv_float_scalar(ptr %A, i64 %N) {
; VEC4_INTERL1-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[TMP1]], i64 0
; VEC4_INTERL1-NEXT: br i1 [[TMP2]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; VEC4_INTERL1: pred.store.if:
-; VEC4_INTERL1-NEXT: store float [[DOTCAST2]], ptr [[TMP0]], align 4
+; VEC4_INTERL1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [4 x i8], ptr [[A]], i64 [[INDEX]]
+; VEC4_INTERL1-NEXT: store float [[DOTCAST2]], ptr [[TMP3]], align 4
; VEC4_INTERL1-NEXT: br label [[PRED_STORE_CONTINUE]]
; VEC4_INTERL1: pred.store.continue:
; VEC4_INTERL1-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP1]], i64 1
@@ -1401,7 +1402,8 @@ define void @non_primary_iv_float_scalar(ptr %A, i64 %N) {
; VEC4_INTERL2-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP3]], i64 0
; VEC4_INTERL2-NEXT: br i1 [[TMP5]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; VEC4_INTERL2: pred.store.if:
-; VEC4_INTERL2-NEXT: store float [[DOTCAST2]], ptr [[TMP1]], align 4
+; VEC4_INTERL2-NEXT: [[TMP35:%.*]] = getelementptr inbounds [4 x i8], ptr [[A]], i64 [[INDEX]]
+; VEC4_INTERL2-NEXT: store float [[DOTCAST2]], ptr [[TMP35]], align 4
; VEC4_INTERL2-NEXT: br label [[PRED_STORE_CONTINUE]]
; VEC4_INTERL2: pred.store.continue:
; VEC4_INTERL2-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP3]], i64 1
@@ -1571,7 +1573,8 @@ define void @non_primary_iv_float_scalar(ptr %A, i64 %N) {
; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP2:%.*]] = extractelement <2 x i1> [[TMP1]], i64 0
; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[TMP2]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; VEC2_INTERL1_PRED_STORE: pred.store.if:
-; VEC2_INTERL1_PRED_STORE-NEXT: store float [[DOTCAST2]], ptr [[TMP0]], align 4
+; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP3:%.*]] = getelementptr inbounds [4 x i8], ptr [[A]], i64 [[INDEX]]
+; VEC2_INTERL1_PRED_STORE-NEXT: store float [[DOTCAST2]], ptr [[TMP3]], align 4
; VEC2_INTERL1_PRED_STORE-NEXT: br label [[PRED_STORE_CONTINUE]]
; VEC2_INTERL1_PRED_STORE: pred.store.continue:
; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP1]], i64 1
diff --git a/llvm/test/Transforms/LoopVectorize/hoist-and-sink-mem-ops-with-invariant-pointers.ll b/llvm/test/Transforms/LoopVectorize/hoist-and-sink-mem-ops-with-invariant-pointers.ll
index 748dae79fa407..c420157218496 100644
--- a/llvm/test/Transforms/LoopVectorize/hoist-and-sink-mem-ops-with-invariant-pointers.ll
+++ b/llvm/test/Transforms/LoopVectorize/hoist-and-sink-mem-ops-with-invariant-pointers.ll
@@ -162,7 +162,8 @@ define void @dont_hoist_predicated_load(ptr %dst, ptr %invariant_ptr, ptr %cond_
; CHECK-NEXT: br i1 [[TMP2]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; CHECK: [[PRED_STORE_IF]]:
; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[INVARIANT_PTR]], align 4, !alias.scope [[META14:![0-9]+]]
-; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]]
+; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]]
; CHECK-NEXT: store i32 [[TMP7]], ptr [[TMP9]], align 4, !alias.scope [[META16:![0-9]+]], !noalias [[META18:![0-9]+]]
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
; CHECK: [[PRED_STORE_CONTINUE]]:
diff --git a/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads-with-predicated-stores.ll b/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads-with-predicated-stores.ll
index 6d51c2e07ddf0..38527baffd2e1 100644
--- a/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads-with-predicated-stores.ll
+++ b/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads-with-predicated-stores.ll
@@ -240,6 +240,8 @@ define void @test_noalias_store_via_runtime_checks(ptr %dst, ptr %dst.1, ptr %sr
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE20:.*]] ]
; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[INDEX]], i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[TMP5]], i32 1
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 4, !alias.scope [[META22:![0-9]+]]
; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt <2 x i32> [[WIDE_LOAD]], splat (i32 11)
@@ -342,6 +344,8 @@ define void @test_memory_op_between_loads_alias(ptr %dst, ptr %src, ptr %cond, p
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE11:.*]] ]
; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[INDEX]], i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[TMP5]], i32 1
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 4, !alias.scope [[META35:![0-9]+]]
; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt <2 x i32> [[WIDE_LOAD]], splat (i32 11)
@@ -454,6 +458,8 @@ define void @test_memory_op_between_loads_no_alias_via_rt_checks(ptr %dst, ptr %
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE20:.*]] ]
; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[INDEX]], i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[TMP5]], i32 1
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 4, !alias.scope [[META45:![0-9]+]]
; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt <2 x i32> [[WIDE_LOAD]], splat (i32 11)
@@ -650,6 +656,8 @@ define void @test_stores_not_sunk_aliasing_load_between(ptr %dst, ptr %mid, ptr
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE15:.*]] ]
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 1
+; CHECK-NEXT: [[TMP18:%.*]] = insertelement <2 x i32> poison, i32 [[INDEX]], i32 0
+; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x i32> [[TMP18]], i32 [[TMP1]], i32 1
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4, !alias.scope [[META68:![0-9]+]]
; CHECK-NEXT: [[TMP3:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11)
@@ -761,6 +769,8 @@ define void @sink_multiple_store_groups_noalias_via_scev(ptr %dst, ptr %src) {
; CHECK-NEXT: [[TMP34:%.*]] = fadd <2 x double> [[WIDE_LOAD]], splat (double 8.000000e+00)
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr double, ptr [[DST]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP17]]
+; CHECK-NEXT: [[TMP31:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP18]], i32 0
+; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x ptr> [[TMP31]], ptr [[TMP21]], i32 1
; CHECK-NEXT: [[TMP20:%.*]] = select <2 x i1> [[TMP10]], <2 x double> [[WIDE_LOAD]], <2 x double> [[TMP34]]
; CHECK-NEXT: [[TMP32:%.*]] = extractelement <2 x double> [[TMP20]], i32 0
; CHECK-NEXT: store double [[TMP32]], ptr [[TMP18]], align 8, !alias.scope [[META81:![0-9]+]], !noalias [[META78]]
@@ -841,6 +851,8 @@ define void @sink_multiple_store_groups_alias_via_scev(ptr %dst, ptr %src) {
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE7:.*]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 16
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 16
+; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i64> poison, i64 [[OFFSET_IDX]], i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i64> [[TMP4]], i64 [[TMP1]], i32 1
; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr double, ptr [[SRC]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr double, ptr [[SRC]], i64 [[TMP1]]
; CHECK-NEXT: [[GEP_FLAG:%.*]] = getelementptr i8, ptr [[GEP_SRC]], i64 152
@@ -1025,6 +1037,8 @@ define void @test_three_stores_with_
diff erent_predicates(ptr %dst, ptr %src, ptr
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE11:.*]] ]
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 1
+; CHECK-NEXT: [[TMP23:%.*]] = insertelement <2 x i32> poison, i32 [[INDEX]], i32 0
+; CHECK-NEXT: [[TMP24:%.*]] = insertelement <2 x i32> [[TMP23]], i32 [[TMP1]], i32 1
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4, !alias.scope [[META92:![0-9]+]]
; CHECK-NEXT: [[TMP3:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11)
diff --git a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll
index f216024303a05..8cade4f3ea078 100644
--- a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll
+++ b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll
@@ -82,9 +82,11 @@ define i32 @test(ptr nocapture %f) #0 {
; VEC-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0
; VEC-NEXT: br i1 [[TMP4]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; VEC: pred.store.if:
+; VEC-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0
+; VEC-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[F]], i64 [[TMP5]]
; VEC-NEXT: [[TMP7:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i32 0
; VEC-NEXT: [[TMP8:%.*]] = add nsw i32 [[TMP7]], 20
-; VEC-NEXT: store i32 [[TMP8]], ptr [[TMP1]], align 4
+; VEC-NEXT: store i32 [[TMP8]], ptr [[TMP6]], align 4
; VEC-NEXT: br label [[PRED_STORE_CONTINUE]]
; VEC: pred.store.continue:
; VEC-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[TMP3]], i32 1
@@ -303,8 +305,10 @@ define void @bug18724(i1 %cond, ptr %ptr, i1 %cond.2, i64 %v.1, i32 %v.2) {
; VEC-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 4
; VEC-NEXT: br i1 [[COND_2:%.*]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE2]]
; VEC: pred.store.if:
+; VEC-NEXT: [[INDVARS_IV3:%.*]] = add i64 [[OFFSET_IDX]], 0
+; VEC-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [768 x i32], ptr [[PTR]], i64 0, i64 [[INDVARS_IV3]]
; VEC-NEXT: [[TMP11:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i32 0
-; VEC-NEXT: store i32 [[TMP11]], ptr [[TMP7]], align 4
+; VEC-NEXT: store i32 [[TMP11]], ptr [[ARRAYIDX16]], align 4
; VEC-NEXT: [[TMP12:%.*]] = add i64 [[OFFSET_IDX]], 1
; VEC-NEXT: [[TMP13:%.*]] = getelementptr inbounds [768 x i32], ptr [[PTR]], i64 0, i64 [[TMP12]]
; VEC-NEXT: [[TMP14:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i32 1
@@ -440,8 +444,10 @@ define void @minimal_bit_widths(ptr %p, i1 %c) {
; VEC-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i8>, ptr [[TMP1]], align 1
; VEC-NEXT: br i1 [[C:%.*]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE2]]
; VEC: pred.store.if:
+; VEC-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 0
+; VEC-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[P]], i64 [[TMP8]]
; VEC-NEXT: [[TMP4:%.*]] = extractelement <2 x i8> [[WIDE_LOAD]], i32 0
-; VEC-NEXT: store i8 [[TMP4]], ptr [[TMP1]], align 1
+; VEC-NEXT: store i8 [[TMP4]], ptr [[TMP3]], align 1
; VEC-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 1
; VEC-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[P]], i64 [[TMP5]]
; VEC-NEXT: [[TMP7:%.*]] = extractelement <2 x i8> [[WIDE_LOAD]], i32 1
@@ -547,8 +553,10 @@ define void @minimal_bit_widths_with_aliasing_store(i1 %c, ptr %ptr) {
; VEC-NEXT: store <2 x i8> zeroinitializer, ptr [[TMP2]], align 1
; VEC-NEXT: br i1 [[C:%.*]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE2]]
; VEC: pred.store.if:
+; VEC-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; VEC-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[TMP0]]
; VEC-NEXT: [[TMP5:%.*]] = extractelement <2 x i8> [[WIDE_LOAD]], i32 0
-; VEC-NEXT: store i8 [[TMP5]], ptr [[TMP2]], align 1
+; VEC-NEXT: store i8 [[TMP5]], ptr [[TMP4]], align 1
; VEC-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 1
; VEC-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[TMP7]]
; VEC-NEXT: [[TMP9:%.*]] = extractelement <2 x i8> [[WIDE_LOAD]], i32 1
diff --git a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-pred-stores.ll b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-pred-stores.ll
index 64c1580f18bd1..3bd712132637f 100644
--- a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-pred-stores.ll
+++ b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-pred-stores.ll
@@ -35,8 +35,10 @@ define void @interleaved_with_cond_store_0(ptr %p, i64 %x, i64 %n) {
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i1> [[TMP1]], i64 0
; CHECK-NEXT: br i1 [[TMP2]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; CHECK: pred.store.if:
+; CHECK-NEXT: [[DOTSPLIT3:%.*]] = getelementptr inbounds [16 x i8], ptr [[P]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[DOTSPLIT3]], i64 8
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[WIDE_VEC]], i64 0
-; CHECK-NEXT: store i64 [[TMP4]], ptr [[TMP0]], align 8
+; CHECK-NEXT: store i64 [[TMP4]], ptr [[TMP3]], align 8
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
; CHECK: pred.store.continue:
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP1]], i64 1
@@ -132,8 +134,9 @@ define void @interleaved_with_cond_store_1(ptr %p, i64 %x, i64 %n) {
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP4]], i64 0
; CHECK-NEXT: br i1 [[TMP5]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; CHECK: pred.store.if:
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [16 x i8], ptr [[P]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[WIDE_VEC]], i64 0
-; CHECK-NEXT: store i64 [[TMP7]], ptr [[TMP1]], align 8
+; CHECK-NEXT: store i64 [[TMP7]], ptr [[TMP6]], align 8
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
; CHECK: pred.store.continue:
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP4]], i64 1
@@ -239,8 +242,10 @@ define void @interleaved_with_cond_store_2(ptr %p, i64 %x, i64 %n) {
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP4]], i64 0
; CHECK-NEXT: br i1 [[TMP5]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; CHECK: pred.store.if:
+; CHECK-NEXT: [[DOTSPLIT3:%.*]] = getelementptr inbounds [16 x i8], ptr [[P]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw i8, ptr [[DOTSPLIT3]], i64 8
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[WIDE_VEC]], i64 0
-; CHECK-NEXT: store i64 [[TMP7]], ptr [[TMP3]], align 8
+; CHECK-NEXT: store i64 [[TMP7]], ptr [[TMP6]], align 8
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
; CHECK: pred.store.continue:
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP4]], i64 1
diff --git a/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll b/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll
index 6ad9928a4a017..e20a6673a2b16 100644
--- a/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll
+++ b/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll
@@ -191,7 +191,8 @@ define i32 @loop_requires_scev_predicate(ptr %dest, i32 %end) {
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP11]], i32 0
; CHECK-NEXT: br i1 [[TMP14]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; CHECK: pred.store.if:
-; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[DEST:%.*]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[DEST:%.*]], i64 [[TMP8]]
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i32 0
; CHECK-NEXT: [[TMP17:%.*]] = extractelement <2 x i32> [[WIDE_LOAD3]], i32 0
; CHECK-NEXT: [[TMP18:%.*]] = add i32 [[TMP16]], [[TMP17]]
@@ -306,7 +307,8 @@ define void @test_rev_loops_deref_loads(ptr nocapture noundef writeonly %dest) {
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[TMP5]], i32 0
; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; CHECK: pred.store.if:
-; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_DEST]], i64 0, i64 [[OFFSET_IDX]]
+; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_DEST]], i64 0, i64 [[TMP0]]
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i32> [[REVERSE2]], i32 0
; CHECK-NEXT: [[TMP12:%.*]] = shl nsw i32 [[TMP11]], 2
; CHECK-NEXT: store i32 [[TMP12]], ptr [[TMP10]], align 4
@@ -391,9 +393,11 @@ define void @test_rev_loops_non_deref_loads(ptr nocapture noundef writeonly %des
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP6]], i32 0
; CHECK-NEXT: br i1 [[TMP7]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; CHECK: pred.store.if:
-; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_SRC]], i64 0, i64 [[TMP1]]
+; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i64> [[TMP0]], i32 0
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_SRC]], i64 0, i64 [[TMP8]]
; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4
-; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_DEST]], i64 0, i64 [[TMP1]]
+; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i64> [[TMP0]], i32 0
+; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_DEST]], i64 0, i64 [[TMP11]]
; CHECK-NEXT: [[TMP13:%.*]] = shl nsw i32 [[TMP10]], 2
; CHECK-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
@@ -404,7 +408,8 @@ define void @test_rev_loops_non_deref_loads(ptr nocapture noundef writeonly %des
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP0]], i32 1
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_SRC]], i64 0, i64 [[TMP15]]
; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4
-; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_DEST]], i64 0, i64 [[TMP15]]
+; CHECK-NEXT: [[TMP18:%.*]] = extractelement <2 x i64> [[TMP0]], i32 1
+; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_DEST]], i64 0, i64 [[TMP18]]
; CHECK-NEXT: [[TMP20:%.*]] = shl nsw i32 [[TMP17]], 2
; CHECK-NEXT: store i32 [[TMP20]], ptr [[TMP19]], align 4
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE2]]
@@ -547,10 +552,13 @@ define void @test_rev_loops_strided_deref_loads(ptr nocapture noundef writeonly
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_SRC]], i64 0, i64 [[TMP9]]
; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP8]], align 4
; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4
+; CHECK-NEXT: [[TMP23:%.*]] = insertelement <2 x i32> poison, i32 [[TMP11]], i32 0
+; CHECK-NEXT: [[TMP24:%.*]] = insertelement <2 x i32> [[TMP23]], i32 [[TMP12]], i32 1
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP5]], i32 0
; CHECK-NEXT: br i1 [[TMP13]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; CHECK: pred.store.if:
-; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_DEST]], i64 0, i64 [[OFFSET_IDX]]
+; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
+; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_DEST]], i64 0, i64 [[TMP0]]
; CHECK-NEXT: [[TMP15:%.*]] = shl nsw i32 [[TMP11]], 2
; CHECK-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
diff --git a/llvm/test/Transforms/LoopVectorize/loop-form.ll b/llvm/test/Transforms/LoopVectorize/loop-form.ll
index b43c35b873f8c..b46cd8ecea5b3 100644
--- a/llvm/test/Transforms/LoopVectorize/loop-form.ll
+++ b/llvm/test/Transforms/LoopVectorize/loop-form.ll
@@ -1061,7 +1061,9 @@ define void @scalar_predication(ptr %addr) {
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0
; CHECK-NEXT: br i1 [[TMP5]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; CHECK: pred.store.if:
-; CHECK-NEXT: store float 1.000000e+01, ptr [[TMP1]], align 4
+; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr float, ptr [[ADDR]], i64 [[TMP0]]
+; CHECK-NEXT: store float 1.000000e+01, ptr [[TMP6]], align 4
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
; CHECK: pred.store.continue:
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP4]], i32 1
diff --git a/llvm/test/Transforms/LoopVectorize/memdep-fold-tail.ll b/llvm/test/Transforms/LoopVectorize/memdep-fold-tail.ll
index a11e3e0712477..319e1a3bae10f 100644
--- a/llvm/test/Transforms/LoopVectorize/memdep-fold-tail.ll
+++ b/llvm/test/Transforms/LoopVectorize/memdep-fold-tail.ll
@@ -34,7 +34,8 @@ define void @maxvf3() {
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0
; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; CHECK: pred.store.if:
-; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [18 x i8], ptr @a, i32 0, i32 [[INDEX]]
+; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 0
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [18 x i8], ptr @a, i32 0, i32 [[TMP2]]
; CHECK-NEXT: store i8 69, ptr [[TMP3]], align 8
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
; CHECK: pred.store.continue:
@@ -47,14 +48,16 @@ define void @maxvf3() {
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE2]]
; CHECK: pred.store.continue2:
; CHECK-NEXT: [[TMP7:%.*]] = add nuw nsw <2 x i32> splat (i32 3), [[VEC_IND]]
-; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]]
+; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0
+; CHECK-NEXT: br i1 [[TMP8]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]]
; CHECK: pred.store.if3:
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i32> [[TMP7]], i32 0
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [18 x i8], ptr @a, i32 0, i32 [[TMP9]]
; CHECK-NEXT: store i8 7, ptr [[TMP10]], align 8
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]]
; CHECK: pred.store.continue4:
-; CHECK-NEXT: br i1 [[TMP4]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6]]
+; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP0]], i32 1
+; CHECK-NEXT: br i1 [[TMP11]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6]]
; CHECK: pred.store.if5:
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i32> [[TMP7]], i32 1
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [18 x i8], ptr @a, i32 0, i32 [[TMP12]]
diff --git a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll
index 60cb29dec260a..736d8356d2219 100644
--- a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll
@@ -32,6 +32,10 @@ define void @a(ptr readnone %b) {
; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr null, i64 [[TMP11]]
; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr null, i64 [[TMP14]]
; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr null, i64 [[TMP17]]
+; CHECK-NEXT: [[TMP21:%.*]] = insertelement <4 x ptr> poison, ptr [[NEXT_GEP]], i32 0
+; CHECK-NEXT: [[TMP22:%.*]] = insertelement <4 x ptr> [[TMP21]], ptr [[NEXT_GEP2]], i32 1
+; CHECK-NEXT: [[TMP23:%.*]] = insertelement <4 x ptr> [[TMP22]], ptr [[NEXT_GEP3]], i32 2
+; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x ptr> [[TMP23]], ptr [[NEXT_GEP4]], i32 3
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP]], i64 -1
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 -3
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP5]], align 1
@@ -40,7 +44,8 @@ define void @a(ptr readnone %b) {
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP7]], i32 0
; CHECK-NEXT: br i1 [[TMP8]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; CHECK: pred.store.if:
-; CHECK-NEXT: store i8 95, ptr [[TMP3]], align 1
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP]], i64 -1
+; CHECK-NEXT: store i8 95, ptr [[TMP9]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
; CHECK: pred.store.continue:
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP7]], i32 1
diff --git a/llvm/test/Transforms/LoopVectorize/pr37248.ll b/llvm/test/Transforms/LoopVectorize/pr37248.ll
index bf1ab0685ec55..98da110a44e8c 100644
--- a/llvm/test/Transforms/LoopVectorize/pr37248.ll
+++ b/llvm/test/Transforms/LoopVectorize/pr37248.ll
@@ -37,24 +37,25 @@ define void @f1(ptr noalias %b, i1 %c, i32 %start) {
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i1> poison, i1 [[C]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT]], <2 x i1> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP10:%.*]] = sub i32 [[START]], [[N_VEC]]
-; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[BROADCAST_SPLAT]], i32 0
-; CHECK-NEXT: [[TMP12:%.*]] = xor i1 [[TMP11]], true
+; CHECK-NEXT: [[TMP11:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT]], splat (i1 true)
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE3:.*]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i32 [[START]], [[INDEX]]
-; CHECK-NEXT: [[TMP13:%.*]] = trunc i32 [[OFFSET_IDX]] to i16
-; CHECK-NEXT: br i1 [[TMP12]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
+; CHECK-NEXT: [[TMP12:%.*]] = trunc i32 [[OFFSET_IDX]] to i16
+; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP11]], i32 0
+; CHECK-NEXT: br i1 [[TMP13]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; CHECK: [[PRED_STORE_IF]]:
; CHECK-NEXT: store i32 10, ptr [[B]], align 1
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
; CHECK: [[PRED_STORE_CONTINUE]]:
-; CHECK-NEXT: br i1 [[TMP12]], label %[[PRED_STORE_IF2:.*]], label %[[PRED_STORE_CONTINUE3]]
+; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP11]], i32 1
+; CHECK-NEXT: br i1 [[TMP14]], label %[[PRED_STORE_IF2:.*]], label %[[PRED_STORE_CONTINUE3]]
; CHECK: [[PRED_STORE_IF2]]:
; CHECK-NEXT: store i32 10, ptr [[B]], align 1
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE3]]
; CHECK: [[PRED_STORE_CONTINUE3]]:
-; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x i16], ptr @a, i16 0, i16 [[TMP13]]
+; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x i16], ptr @a, i16 0, i16 [[TMP12]]
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i16, ptr [[TMP15]], i64 -1
; CHECK-NEXT: store <2 x i16> zeroinitializer, ptr [[TMP17]], align 1
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
diff --git a/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll b/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll
index 610da05dbbab7..bfe996fccd0b2 100644
--- a/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll
+++ b/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll
@@ -23,7 +23,8 @@ define void @pr45679(ptr %A) {
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0
; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; CHECK: pred.store.if:
-; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[INDEX]]
+; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 0
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[TMP2]]
; CHECK-NEXT: store i32 13, ptr [[TMP3]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
; CHECK: pred.store.continue:
@@ -74,7 +75,8 @@ define void @pr45679(ptr %A) {
; VF2UF2-NEXT: [[TMP2:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0
; VF2UF2-NEXT: br i1 [[TMP2]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; VF2UF2: pred.store.if:
-; VF2UF2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[INDEX]]
+; VF2UF2-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 0
+; VF2UF2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[TMP3]]
; VF2UF2-NEXT: store i32 13, ptr [[TMP4]], align 1
; VF2UF2-NEXT: br label [[PRED_STORE_CONTINUE]]
; VF2UF2: pred.store.continue:
@@ -186,7 +188,8 @@ define void @load_variant(ptr noalias %a, ptr noalias %b) {
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0
; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; CHECK: pred.store.if:
-; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8
; CHECK-NEXT: store i64 [[TMP4]], ptr [[B:%.*]], align 8
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
@@ -241,7 +244,8 @@ define void @load_variant(ptr noalias %a, ptr noalias %b) {
; VF2UF2-NEXT: [[TMP2:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0
; VF2UF2-NEXT: br i1 [[TMP2]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; VF2UF2: pred.store.if:
-; VF2UF2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]]
+; VF2UF2-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0
+; VF2UF2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP3]]
; VF2UF2-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP4]], align 8
; VF2UF2-NEXT: store i64 [[TMP5]], ptr [[B:%.*]], align 8
; VF2UF2-NEXT: br label [[PRED_STORE_CONTINUE]]
diff --git a/llvm/test/Transforms/LoopVectorize/predicate-switch.ll b/llvm/test/Transforms/LoopVectorize/predicate-switch.ll
index 36806e6ad125e..17370fcdd576d 100644
--- a/llvm/test/Transforms/LoopVectorize/predicate-switch.ll
+++ b/llvm/test/Transforms/LoopVectorize/predicate-switch.ll
@@ -21,6 +21,8 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) {
; IC1-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 1
; IC1-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[INDEX]]
; IC1-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP2]]
+; IC1-NEXT: [[TMP12:%.*]] = insertelement <2 x ptr> poison, ptr [[NEXT_GEP]], i32 0
+; IC1-NEXT: [[TMP16:%.*]] = insertelement <2 x ptr> [[TMP12]], ptr [[NEXT_GEP3]], i32 1
; IC1-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i8>, ptr [[NEXT_GEP]], align 1
; IC1-NEXT: [[TMP7:%.*]] = icmp eq <2 x i8> [[WIDE_LOAD]], splat (i8 -12)
; IC1-NEXT: [[TMP4:%.*]] = icmp eq <2 x i8> [[WIDE_LOAD]], splat (i8 13)
@@ -115,8 +117,12 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) {
; IC2-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 3
; IC2-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[INDEX]]
; IC2-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP2]]
+; IC2-NEXT: [[TMP23:%.*]] = insertelement <2 x ptr> poison, ptr [[NEXT_GEP]], i32 0
+; IC2-NEXT: [[TMP24:%.*]] = insertelement <2 x ptr> [[TMP23]], ptr [[NEXT_GEP3]], i32 1
; IC2-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP3]]
; IC2-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP4]]
+; IC2-NEXT: [[TMP30:%.*]] = insertelement <2 x ptr> poison, ptr [[NEXT_GEP4]], i32 0
+; IC2-NEXT: [[TMP31:%.*]] = insertelement <2 x ptr> [[TMP30]], ptr [[NEXT_GEP5]], i32 1
; IC2-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 2
; IC2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i8>, ptr [[NEXT_GEP]], align 1
; IC2-NEXT: [[WIDE_LOAD6:%.*]] = load <2 x i8>, ptr [[TMP6]], align 1
diff --git a/llvm/test/Transforms/LoopVectorize/select-cmp-multiuse.ll b/llvm/test/Transforms/LoopVectorize/select-cmp-multiuse.ll
index ffb16d07f3ea8..fa03c62bb4927 100644
--- a/llvm/test/Transforms/LoopVectorize/select-cmp-multiuse.ll
+++ b/llvm/test/Transforms/LoopVectorize/select-cmp-multiuse.ll
@@ -488,7 +488,8 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) {
; CHECK-VF4-IC1-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP4]], i32 0
; CHECK-VF4-IC1-NEXT: br i1 [[TMP8]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; CHECK-VF4-IC1: pred.store.if:
-; CHECK-VF4-IC1-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]]
+; CHECK-VF4-IC1-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0
+; CHECK-VF4-IC1-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP1]]
; CHECK-VF4-IC1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !alias.scope [[META9:![0-9]+]], !noalias [[META6]]
; CHECK-VF4-IC1-NEXT: [[TMP11:%.*]] = add nsw i32 [[TMP10]], 1
; CHECK-VF4-IC1-NEXT: store i32 [[TMP11]], ptr [[TMP9]], align 4, !alias.scope [[META9]], !noalias [[META6]]
@@ -606,7 +607,8 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) {
; CHECK-VF4-IC2-NEXT: [[TMP15:%.*]] = extractelement <4 x i1> [[TMP7]], i32 0
; CHECK-VF4-IC2-NEXT: br i1 [[TMP15]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; CHECK-VF4-IC2: pred.store.if:
-; CHECK-VF4-IC2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]]
+; CHECK-VF4-IC2-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0
+; CHECK-VF4-IC2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP1]]
; CHECK-VF4-IC2-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4, !alias.scope [[META9:![0-9]+]], !noalias [[META6]]
; CHECK-VF4-IC2-NEXT: [[TMP18:%.*]] = add nsw i32 [[TMP17]], 1
; CHECK-VF4-IC2-NEXT: store i32 [[TMP18]], ptr [[TMP16]], align 4, !alias.scope [[META9]], !noalias [[META6]]
diff --git a/llvm/test/Transforms/LoopVectorize/struct-return-replicate.ll b/llvm/test/Transforms/LoopVectorize/struct-return-replicate.ll
index 037a76aa81c25..b9be77ff224ff 100644
--- a/llvm/test/Transforms/LoopVectorize/struct-return-replicate.ll
+++ b/llvm/test/Transforms/LoopVectorize/struct-return-replicate.ll
@@ -533,8 +533,10 @@ define void @struct_return_2xf32_replicate_predicated(ptr %a) {
; VF4-NEXT: [[TMP51:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0
; VF4-NEXT: br i1 [[TMP51]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; VF4: [[PRED_STORE_IF]]:
+; VF4-NEXT: [[TMP52:%.*]] = add i64 [[INDEX]], 0
+; VF4-NEXT: [[TMP53:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP52]]
; VF4-NEXT: [[TMP54:%.*]] = extractelement <4 x float> [[TMP50]], i32 0
-; VF4-NEXT: store float [[TMP54]], ptr [[TMP0]], align 8
+; VF4-NEXT: store float [[TMP54]], ptr [[TMP53]], align 8
; VF4-NEXT: br label %[[PRED_STORE_CONTINUE]]
; VF4: [[PRED_STORE_CONTINUE]]:
; VF4-NEXT: [[TMP55:%.*]] = extractelement <4 x i1> [[TMP1]], i32 1
@@ -588,9 +590,12 @@ define void @struct_return_2xf32_replicate_predicated(ptr %a) {
; VF2IC2: [[PRED_STORE_IF]]:
; VF2IC2-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[WIDE_LOAD]], i32 0
; VF2IC2-NEXT: [[TMP6:%.*]] = tail call { float, float } @fn2(float [[TMP5]]) #[[ATTR3:[0-9]+]]
+; VF2IC2-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 0
; VF2IC2-NEXT: [[TMP8:%.*]] = extractvalue { float, float } [[TMP6]], 0
-; VF2IC2-NEXT: [[TMP11:%.*]] = fdiv float [[TMP8]], [[TMP5]]
-; VF2IC2-NEXT: store float [[TMP11]], ptr [[TMP0]], align 8
+; VF2IC2-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP7]]
+; VF2IC2-NEXT: [[TMP10:%.*]] = extractelement <2 x float> [[WIDE_LOAD]], i32 0
+; VF2IC2-NEXT: [[TMP11:%.*]] = fdiv float [[TMP8]], [[TMP10]]
+; VF2IC2-NEXT: store float [[TMP11]], ptr [[TMP9]], align 8
; VF2IC2-NEXT: br label %[[PRED_STORE_CONTINUE]]
; VF2IC2: [[PRED_STORE_CONTINUE]]:
; VF2IC2-NEXT: [[TMP12:%.*]] = extractelement <2 x i1> [[TMP2]], i32 1
@@ -601,7 +606,8 @@ define void @struct_return_2xf32_replicate_predicated(ptr %a) {
; VF2IC2-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 1
; VF2IC2-NEXT: [[TMP16:%.*]] = extractvalue { float, float } [[TMP14]], 0
; VF2IC2-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP15]]
-; VF2IC2-NEXT: [[TMP19:%.*]] = fdiv float [[TMP16]], [[TMP13]]
+; VF2IC2-NEXT: [[TMP18:%.*]] = extractelement <2 x float> [[WIDE_LOAD]], i32 1
+; VF2IC2-NEXT: [[TMP19:%.*]] = fdiv float [[TMP16]], [[TMP18]]
; VF2IC2-NEXT: store float [[TMP19]], ptr [[TMP17]], align 8
; VF2IC2-NEXT: br label %[[PRED_STORE_CONTINUE3]]
; VF2IC2: [[PRED_STORE_CONTINUE3]]:
@@ -613,7 +619,8 @@ define void @struct_return_2xf32_replicate_predicated(ptr %a) {
; VF2IC2-NEXT: [[TMP23:%.*]] = add i64 [[INDEX]], 2
; VF2IC2-NEXT: [[TMP24:%.*]] = extractvalue { float, float } [[TMP22]], 0
; VF2IC2-NEXT: [[TMP25:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP23]]
-; VF2IC2-NEXT: [[TMP27:%.*]] = fdiv float [[TMP24]], [[TMP21]]
+; VF2IC2-NEXT: [[TMP26:%.*]] = extractelement <2 x float> [[WIDE_LOAD1]], i32 0
+; VF2IC2-NEXT: [[TMP27:%.*]] = fdiv float [[TMP24]], [[TMP26]]
; VF2IC2-NEXT: store float [[TMP27]], ptr [[TMP25]], align 8
; VF2IC2-NEXT: br label %[[PRED_STORE_CONTINUE5]]
; VF2IC2: [[PRED_STORE_CONTINUE5]]:
@@ -625,7 +632,8 @@ define void @struct_return_2xf32_replicate_predicated(ptr %a) {
; VF2IC2-NEXT: [[TMP31:%.*]] = add i64 [[INDEX]], 3
; VF2IC2-NEXT: [[TMP32:%.*]] = extractvalue { float, float } [[TMP30]], 0
; VF2IC2-NEXT: [[TMP33:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP31]]
-; VF2IC2-NEXT: [[TMP35:%.*]] = fdiv float [[TMP32]], [[TMP29]]
+; VF2IC2-NEXT: [[TMP34:%.*]] = extractelement <2 x float> [[WIDE_LOAD1]], i32 1
+; VF2IC2-NEXT: [[TMP35:%.*]] = fdiv float [[TMP32]], [[TMP34]]
; VF2IC2-NEXT: store float [[TMP35]], ptr [[TMP33]], align 8
; VF2IC2-NEXT: br label %[[PRED_STORE_CONTINUE7]]
; VF2IC2: [[PRED_STORE_CONTINUE7]]:
diff --git a/llvm/test/Transforms/LoopVectorize/struct-return.ll b/llvm/test/Transforms/LoopVectorize/struct-return.ll
index 8049de1b792e1..dd1117039512b 100644
--- a/llvm/test/Transforms/LoopVectorize/struct-return.ll
+++ b/llvm/test/Transforms/LoopVectorize/struct-return.ll
@@ -281,8 +281,11 @@ define void @scalarized_predicated_struct_return(ptr %a) {
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[WIDE_LOAD]], i32 0
; CHECK-NEXT: [[TMP4:%.*]] = tail call { i64, i64 } @bar_i64(i64 [[TMP3]]) #[[ATTR2:[0-9]+]]
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { i64, i64 } [[TMP4]], 0
-; CHECK-NEXT: [[TMP7:%.*]] = udiv i64 [[TMP5]], [[TMP3]]
-; CHECK-NEXT: store i64 [[TMP7]], ptr [[TMP0]], align 8
+; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[WIDE_LOAD]], i32 0
+; CHECK-NEXT: [[TMP7:%.*]] = udiv i64 [[TMP5]], [[TMP6]]
+; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP8]]
+; CHECK-NEXT: store i64 [[TMP7]], ptr [[TMP9]], align 8
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
; CHECK: [[PRED_STORE_CONTINUE]]:
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP1]], i32 1
@@ -291,7 +294,8 @@ define void @scalarized_predicated_struct_return(ptr %a) {
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i64> [[WIDE_LOAD]], i32 1
; CHECK-NEXT: [[TMP12:%.*]] = tail call { i64, i64 } @bar_i64(i64 [[TMP11]]) #[[ATTR2]]
; CHECK-NEXT: [[TMP13:%.*]] = extractvalue { i64, i64 } [[TMP12]], 0
-; CHECK-NEXT: [[TMP15:%.*]] = udiv i64 [[TMP13]], [[TMP11]]
+; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[WIDE_LOAD]], i32 1
+; CHECK-NEXT: [[TMP15:%.*]] = udiv i64 [[TMP13]], [[TMP14]]
; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[INDEX]], 1
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP16]]
; CHECK-NEXT: store i64 [[TMP15]], ptr [[TMP17]], align 8
diff --git a/llvm/test/Transforms/LoopVectorize/tail-folding-alloca-in-loop.ll b/llvm/test/Transforms/LoopVectorize/tail-folding-alloca-in-loop.ll
index 464545bf58aa2..9e523be618b44 100644
--- a/llvm/test/Transforms/LoopVectorize/tail-folding-alloca-in-loop.ll
+++ b/llvm/test/Transforms/LoopVectorize/tail-folding-alloca-in-loop.ll
@@ -13,11 +13,15 @@ define i32 @test(ptr %vf1, i64 %n) {
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i8> [ <i8 0, i8 1, i8 2, i8 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE6]] ]
; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <4 x i8> [[VEC_IND]], splat (i8 -56)
; CHECK-NEXT: [[TMP18:%.*]] = alloca i8, i64 [[N]], align 16
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP18]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x ptr> [[BROADCAST_SPLATINSERT]], <4 x ptr> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0
; CHECK-NEXT: br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; CHECK: [[PRED_STORE_IF]]:
-; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[VF1]], i64 [[INDEX]]
-; CHECK-NEXT: store ptr [[TMP18]], ptr [[TMP3]], align 8
+; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[VF1]], i64 [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x ptr> [[BROADCAST_SPLAT]], i32 0
+; CHECK-NEXT: store ptr [[TMP4]], ptr [[TMP3]], align 8
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
; CHECK: [[PRED_STORE_CONTINUE]]:
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1
@@ -25,7 +29,8 @@ define i32 @test(ptr %vf1, i64 %n) {
; CHECK: [[PRED_STORE_IF1]]:
; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 1
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds ptr, ptr [[VF1]], i64 [[TMP6]]
-; CHECK-NEXT: store ptr [[TMP18]], ptr [[TMP7]], align 8
+; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x ptr> [[BROADCAST_SPLAT]], i32 0
+; CHECK-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 8
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE2]]
; CHECK: [[PRED_STORE_CONTINUE2]]:
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP0]], i32 2
@@ -33,7 +38,8 @@ define i32 @test(ptr %vf1, i64 %n) {
; CHECK: [[PRED_STORE_IF3]]:
; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds ptr, ptr [[VF1]], i64 [[TMP10]]
-; CHECK-NEXT: store ptr [[TMP18]], ptr [[TMP11]], align 8
+; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x ptr> [[BROADCAST_SPLAT]], i32 0
+; CHECK-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 8
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE4]]
; CHECK: [[PRED_STORE_CONTINUE4]]:
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3
@@ -41,7 +47,8 @@ define i32 @test(ptr %vf1, i64 %n) {
; CHECK: [[PRED_STORE_IF5]]:
; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 3
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds ptr, ptr [[VF1]], i64 [[TMP14]]
-; CHECK-NEXT: store ptr [[TMP18]], ptr [[TMP15]], align 8
+; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x ptr> [[BROADCAST_SPLAT]], i32 0
+; CHECK-NEXT: store ptr [[TMP16]], ptr [[TMP15]], align 8
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE6]]
; CHECK: [[PRED_STORE_CONTINUE6]]:
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
diff --git a/llvm/test/Transforms/LoopVectorize/tail-folding-div.ll b/llvm/test/Transforms/LoopVectorize/tail-folding-div.ll
index 87f6751b1d013..c9bc762fd8573 100644
--- a/llvm/test/Transforms/LoopVectorize/tail-folding-div.ll
+++ b/llvm/test/Transforms/LoopVectorize/tail-folding-div.ll
@@ -425,7 +425,8 @@ define void @test_sdiv_variant_dividend_induction(i64 %a, ptr noalias %c) {
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0
; CHECK-NEXT: br i1 [[TMP2]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; CHECK: [[PRED_STORE_IF]]:
-; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i64, ptr [[C]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i64, ptr [[C]], i64 [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0
; CHECK-NEXT: store i64 [[TMP5]], ptr [[TMP4]], align 4
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
diff --git a/llvm/test/Transforms/LoopVectorize/tail-folding-masked-mem-opts.ll b/llvm/test/Transforms/LoopVectorize/tail-folding-masked-mem-opts.ll
index 55c1fb98dbe46..4f0e7282025a1 100644
--- a/llvm/test/Transforms/LoopVectorize/tail-folding-masked-mem-opts.ll
+++ b/llvm/test/Transforms/LoopVectorize/tail-folding-masked-mem-opts.ll
@@ -57,9 +57,10 @@ define void @simple_memcpy(ptr noalias %dst, ptr noalias %src, i64 %n) {
; CHECK-PREDICATE-NEXT: [[TMP1:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0
; CHECK-PREDICATE-NEXT: br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; CHECK-PREDICATE: [[PRED_STORE_IF]]:
-; CHECK-PREDICATE-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[INDEX]]
+; CHECK-PREDICATE-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0
+; CHECK-PREDICATE-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[TMP2]]
; CHECK-PREDICATE-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
-; CHECK-PREDICATE-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[DST]], i64 [[INDEX]]
+; CHECK-PREDICATE-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP2]]
; CHECK-PREDICATE-NEXT: store i32 [[TMP4]], ptr [[TMP5]], align 4
; CHECK-PREDICATE-NEXT: br label %[[PRED_STORE_CONTINUE]]
; CHECK-PREDICATE: [[PRED_STORE_CONTINUE]]:
@@ -125,7 +126,8 @@ define void @non_consecutive_copy(ptr noalias %dst, ptr noalias %src, i64 %n) {
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
-; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP3]]
+; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP6]]
; CHECK-NEXT: store i32 [[TMP5]], ptr [[TMP7]], align 4
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
; CHECK: [[PRED_STORE_CONTINUE]]:
@@ -135,7 +137,8 @@ define void @non_consecutive_copy(ptr noalias %dst, ptr noalias %src, i64 %n) {
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[TMP9]]
; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4
-; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP9]]
+; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
+; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP12]]
; CHECK-NEXT: store i32 [[TMP11]], ptr [[TMP13]], align 4
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE3]]
; CHECK: [[PRED_STORE_CONTINUE3]]:
diff --git a/llvm/test/Transforms/LoopVectorize/tail-folding-optimize-vector-induction-width.ll b/llvm/test/Transforms/LoopVectorize/tail-folding-optimize-vector-induction-width.ll
index f3fe5cc7bd54d..00e04c7daee51 100644
--- a/llvm/test/Transforms/LoopVectorize/tail-folding-optimize-vector-induction-width.ll
+++ b/llvm/test/Transforms/LoopVectorize/tail-folding-optimize-vector-induction-width.ll
@@ -15,7 +15,8 @@ define void @canonical_small_tc_i8(ptr nocapture noundef writeonly %p) {
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0
; CHECK-NEXT: br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; CHECK: [[PRED_STORE_IF]]:
-; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP2]]
; CHECK-NEXT: store i16 1, ptr [[TMP3]], align 2
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
; CHECK: [[PRED_STORE_CONTINUE]]:
@@ -65,7 +66,8 @@ define void @canonical_upper_limit_i8(ptr nocapture noundef writeonly %p) {
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0
; CHECK-NEXT: br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; CHECK: [[PRED_STORE_IF]]:
-; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP2]]
; CHECK-NEXT: store i16 1, ptr [[TMP3]], align 2
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
; CHECK: [[PRED_STORE_CONTINUE]]:
@@ -115,7 +117,8 @@ define void @canonical_lower_limit_i16(ptr nocapture noundef writeonly %p) {
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0
; CHECK-NEXT: br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; CHECK: [[PRED_STORE_IF]]:
-; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP2]]
; CHECK-NEXT: store i16 1, ptr [[TMP3]], align 2
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
; CHECK: [[PRED_STORE_CONTINUE]]:
@@ -165,7 +168,8 @@ define void @canonical_upper_limit_i16(ptr nocapture noundef writeonly %p) {
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0
; CHECK-NEXT: br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; CHECK: [[PRED_STORE_IF]]:
-; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP2]]
; CHECK-NEXT: store i16 1, ptr [[TMP3]], align 2
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
; CHECK: [[PRED_STORE_CONTINUE]]:
@@ -215,7 +219,8 @@ define void @canonical_lower_limit_i32(ptr nocapture noundef writeonly %p) {
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0
; CHECK-NEXT: br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; CHECK: [[PRED_STORE_IF]]:
-; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP2]]
; CHECK-NEXT: store i16 1, ptr [[TMP3]], align 2
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
; CHECK: [[PRED_STORE_CONTINUE]]:
@@ -265,7 +270,8 @@ define void @canonical_upper_limit_i32(ptr nocapture noundef writeonly %p) {
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0
; CHECK-NEXT: br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; CHECK: [[PRED_STORE_IF]]:
-; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP2]]
; CHECK-NEXT: store i16 1, ptr [[TMP3]], align 2
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
; CHECK: [[PRED_STORE_CONTINUE]]:
@@ -315,7 +321,8 @@ define void @canonical_lower_limit_i64(ptr nocapture noundef writeonly %p) {
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0
; CHECK-NEXT: br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; CHECK: [[PRED_STORE_IF]]:
-; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP2]]
; CHECK-NEXT: store i16 1, ptr [[TMP3]], align 2
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
; CHECK: [[PRED_STORE_CONTINUE]]:
@@ -365,7 +372,8 @@ define void @canonical_upper_limit_i64(ptr nocapture noundef writeonly %p) {
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0
; CHECK-NEXT: br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; CHECK: [[PRED_STORE_IF]]:
-; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP2]]
; CHECK-NEXT: store i16 1, ptr [[TMP3]], align 2
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
; CHECK: [[PRED_STORE_CONTINUE]]:
@@ -415,7 +423,8 @@ define void @canonical_lower_limit_i128(ptr nocapture noundef writeonly %p) {
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0
; CHECK-NEXT: br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; CHECK: [[PRED_STORE_IF]]:
-; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[P]], i256 [[INDEX]]
+; CHECK-NEXT: [[TMP2:%.*]] = add i256 [[INDEX]], 0
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[P]], i256 [[TMP2]]
; CHECK-NEXT: store i16 1, ptr [[TMP3]], align 2
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
; CHECK: [[PRED_STORE_CONTINUE]]:
diff --git a/llvm/test/Transforms/LoopVectorize/tail-folding-switch.ll b/llvm/test/Transforms/LoopVectorize/tail-folding-switch.ll
index 95e602fd5b392..b6f43aaa86e33 100644
--- a/llvm/test/Transforms/LoopVectorize/tail-folding-switch.ll
+++ b/llvm/test/Transforms/LoopVectorize/tail-folding-switch.ll
@@ -20,7 +20,8 @@ define void @tail_fold_switch(ptr %dst, i32 %0) {
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP3]], i32 0
; CHECK-NEXT: br i1 [[TMP4]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; CHECK: [[PRED_STORE_IF]]:
-; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP5]]
; CHECK-NEXT: store i32 0, ptr [[TMP6]], align 4
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
; CHECK: [[PRED_STORE_CONTINUE]]:
diff --git a/llvm/test/Transforms/LoopVectorize/trip-count-expansion-may-introduce-ub.ll b/llvm/test/Transforms/LoopVectorize/trip-count-expansion-may-introduce-ub.ll
index 06f6ce536e92c..33ff1f7b767d3 100644
--- a/llvm/test/Transforms/LoopVectorize/trip-count-expansion-may-introduce-ub.ll
+++ b/llvm/test/Transforms/LoopVectorize/trip-count-expansion-may-introduce-ub.ll
@@ -156,7 +156,9 @@ define i64 @multi_exit_2_exit_count_with_udiv_by_value_in_block_executed_uncondi
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP7]], i32 0
; CHECK-NEXT: br i1 [[TMP8]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; CHECK: pred.store.if:
-; CHECK-NEXT: store i32 1, ptr [[TMP5]], align 4
+; CHECK-NEXT: [[TMP20:%.*]] = add i64 [[IV]], 0
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP20]]
+; CHECK-NEXT: store i32 1, ptr [[TMP9]], align 4
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
; CHECK: pred.store.continue:
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP7]], i32 1
@@ -265,7 +267,9 @@ define i64 @multi_exit_2_exit_count_with_udiv_by_constant_in_block_executed_unco
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP7]], i32 0
; CHECK-NEXT: br i1 [[TMP8]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; CHECK: pred.store.if:
-; CHECK-NEXT: store i32 1, ptr [[TMP5]], align 4
+; CHECK-NEXT: [[TMP20:%.*]] = add i64 [[IV]], 0
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP20]]
+; CHECK-NEXT: store i32 1, ptr [[TMP9]], align 4
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
; CHECK: pred.store.continue:
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP7]], i32 1
diff --git a/llvm/test/Transforms/LoopVectorize/use-scalar-epilogue-if-tp-fails.ll b/llvm/test/Transforms/LoopVectorize/use-scalar-epilogue-if-tp-fails.ll
index 3a0418292bd7e..e1ca138eb0282 100644
--- a/llvm/test/Transforms/LoopVectorize/use-scalar-epilogue-if-tp-fails.ll
+++ b/llvm/test/Transforms/LoopVectorize/use-scalar-epilogue-if-tp-fails.ll
@@ -114,6 +114,10 @@ define void @metadata(ptr nocapture readonly %ptr, i32 %size, ptr %pos) {
; FORCED-TF-NEXT: [[NEXT_GEP1:%.*]] = getelementptr i8, ptr [[PTR]], i32 [[TMP1]]
; FORCED-TF-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[PTR]], i32 [[TMP2]]
; FORCED-TF-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[PTR]], i32 [[TMP3]]
+; FORCED-TF-NEXT: [[TMP4:%.*]] = insertelement <4 x ptr> poison, ptr [[NEXT_GEP]], i32 0
+; FORCED-TF-NEXT: [[TMP5:%.*]] = insertelement <4 x ptr> [[TMP4]], ptr [[NEXT_GEP1]], i32 1
+; FORCED-TF-NEXT: [[TMP6:%.*]] = insertelement <4 x ptr> [[TMP5]], ptr [[NEXT_GEP2]], i32 2
+; FORCED-TF-NEXT: [[TMP7:%.*]] = insertelement <4 x ptr> [[TMP6]], ptr [[NEXT_GEP3]], i32 3
; FORCED-TF-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i64 0
; FORCED-TF-NEXT: [[BROADCAST_SPLAT5:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT4]], <4 x i32> poison, <4 x i32> zeroinitializer
; FORCED-TF-NEXT: [[VEC_IV:%.*]] = add <4 x i32> [[BROADCAST_SPLAT5]], <i32 0, i32 1, i32 2, i32 3>
@@ -181,6 +185,10 @@ define void @metadata(ptr nocapture readonly %ptr, i32 %size, ptr %pos) {
; CHECK-NEXT: [[NEXT_GEP1:%.*]] = getelementptr i8, ptr [[PTR]], i32 [[TMP1]]
; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[PTR]], i32 [[TMP2]]
; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[PTR]], i32 [[TMP3]]
+; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x ptr> poison, ptr [[NEXT_GEP]], i32 0
+; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x ptr> [[TMP4]], ptr [[NEXT_GEP1]], i32 1
+; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x ptr> [[TMP5]], ptr [[NEXT_GEP2]], i32 2
+; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x ptr> [[TMP6]], ptr [[NEXT_GEP3]], i32 3
; CHECK-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT5:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT4]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[VEC_IV:%.*]] = add <4 x i32> [[BROADCAST_SPLAT5]], <i32 0, i32 1, i32 2, i32 3>
diff --git a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll
index 8458e841912c6..02846aba50f72 100644
--- a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll
+++ b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll
@@ -629,7 +629,10 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF8UF1-NEXT: [[TMP4:%.*]] = extractelement <8 x i1> [[TMP3]], i32 0
; VF8UF1-NEXT: br i1 [[TMP4]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; VF8UF1: [[PRED_STORE_IF]]:
-; VF8UF1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[DST]], i64 [[STEP]]
+; VF8UF1-NEXT: [[TMP5:%.*]] = mul i64 0, [[STEP]]
+; VF8UF1-NEXT: [[TMP6:%.*]] = add i64 0, [[TMP5]]
+; VF8UF1-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], [[STEP]]
+; VF8UF1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP7]]
; VF8UF1-NEXT: store i8 0, ptr [[TMP8]], align 1
; VF8UF1-NEXT: br label %[[PRED_STORE_CONTINUE]]
; VF8UF1: [[PRED_STORE_CONTINUE]]:
@@ -730,7 +733,10 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF8UF2-NEXT: [[TMP5:%.*]] = extractelement <8 x i1> [[TMP3]], i32 0
; VF8UF2-NEXT: br i1 [[TMP5]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; VF8UF2: [[PRED_STORE_IF]]:
-; VF8UF2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[DST]], i64 [[STEP]]
+; VF8UF2-NEXT: [[TMP6:%.*]] = mul i64 0, [[STEP]]
+; VF8UF2-NEXT: [[TMP7:%.*]] = add i64 0, [[TMP6]]
+; VF8UF2-NEXT: [[TMP8:%.*]] = add i64 [[TMP7]], [[STEP]]
+; VF8UF2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP8]]
; VF8UF2-NEXT: store i8 0, ptr [[TMP9]], align 1
; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE]]
; VF8UF2: [[PRED_STORE_CONTINUE]]:
@@ -910,7 +916,10 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF16UF1-NEXT: [[TMP4:%.*]] = extractelement <16 x i1> [[TMP3]], i32 0
; VF16UF1-NEXT: br i1 [[TMP4]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; VF16UF1: [[PRED_STORE_IF]]:
-; VF16UF1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[DST]], i64 [[STEP]]
+; VF16UF1-NEXT: [[TMP5:%.*]] = mul i64 0, [[STEP]]
+; VF16UF1-NEXT: [[TMP6:%.*]] = add i64 0, [[TMP5]]
+; VF16UF1-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], [[STEP]]
+; VF16UF1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP7]]
; VF16UF1-NEXT: store i8 0, ptr [[TMP8]], align 1
; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE]]
; VF16UF1: [[PRED_STORE_CONTINUE]]:
More information about the llvm-branch-commits
mailing list