[llvm] [VPlan] Explicitly unoll replicate-regions without live-outs by VF. (PR #170212)
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Fri Dec 5 11:30:53 PST 2025
https://github.com/fhahn updated https://github.com/llvm/llvm-project/pull/170212
>From c526c9b1b79a4031d5e662b2515f8f4177c73e0c Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Fri, 5 Dec 2025 16:56:15 +0000
Subject: [PATCH 1/3] [VPlan] Replace UnrollPart for VPScalarIVSteps with start
index op (NFC)
Replace the unroll part operand for VPScalarIVStepsRecipe with the start
index. This simplifies https://github.com/llvm/llvm-project/pull/170053
and is also a first step to break down the recipe into its components.
---
.../Vectorize/LoopVectorizationPlanner.h | 11 +++++
llvm/lib/Transforms/Vectorize/VPlan.h | 10 ++---
.../lib/Transforms/Vectorize/VPlanRecipes.cpp | 20 ++-------
.../Transforms/Vectorize/VPlanTransforms.cpp | 4 +-
llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp | 42 ++++++++++++++++---
5 files changed, 58 insertions(+), 29 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
index 741392247c0d6..d89c05e22cc4c 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -322,6 +322,17 @@ class VPBuilder {
return createScalarCast(CastOp, Op, ResultTy, DL);
}
+ VPValue *createScalarSExtOrTrunc(VPValue *Op, Type *ResultTy, Type *SrcTy,
+ DebugLoc DL) {
+ if (ResultTy == SrcTy)
+ return Op;
+ Instruction::CastOps CastOp =
+ ResultTy->getScalarSizeInBits() < SrcTy->getScalarSizeInBits()
+ ? Instruction::Trunc
+ : Instruction::SExt;
+ return createScalarCast(CastOp, Op, ResultTy, DL);
+ }
+
VPWidenCastRecipe *createWidenCast(Instruction::CastOps Opcode, VPValue *Op,
Type *ResultTy) {
VPIRFlags Flags;
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index fe60e97d44997..24a1d75b4d7fb 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -3783,9 +3783,9 @@ class VPDerivedIVRecipe : public VPSingleDefRecipe {
};
/// A recipe for handling phi nodes of integer and floating-point inductions,
-/// producing their scalar values.
-class LLVM_ABI_FOR_TEST VPScalarIVStepsRecipe : public VPRecipeWithIRFlags,
- public VPUnrollPartAccessor<3> {
+/// producing their scalar values. Before unrolling the recipe has 3 operands:
+/// IV, step and VF. Unrolling adds an extra operand StartIndex.
+class LLVM_ABI_FOR_TEST VPScalarIVStepsRecipe : public VPRecipeWithIRFlags {
Instruction::BinaryOps InductionOpcode;
public:
@@ -3815,10 +3815,6 @@ class LLVM_ABI_FOR_TEST VPScalarIVStepsRecipe : public VPRecipeWithIRFlags,
getDebugLoc());
}
- /// Return true if this VPScalarIVStepsRecipe corresponds to part 0. Note that
- /// this is only accurate after the VPlan has been unrolled.
- bool isPart0() const { return getUnrollPart(*this) == 0; }
-
VP_CLASSOF_IMPL(VPDef::VPScalarIVStepsSC)
/// Generate the scalarized versions of the phi node as needed by their users.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 1b1308c78c76e..47baa0f54c8ff 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -2380,8 +2380,6 @@ void VPScalarIVStepsRecipe::execute(VPTransformState &State) {
// iteration.
bool FirstLaneOnly = vputils::onlyFirstLaneUsed(this);
// Compute the scalar steps and save the results in State.
- Type *IntStepTy =
- IntegerType::get(BaseIVTy->getContext(), BaseIVTy->getScalarSizeInBits());
unsigned StartLane = 0;
unsigned EndLane = FirstLaneOnly ? 1 : State.VF.getKnownMinValue();
@@ -2390,20 +2388,10 @@ void VPScalarIVStepsRecipe::execute(VPTransformState &State) {
EndLane = StartLane + 1;
}
Value *StartIdx0;
- if (getUnrollPart(*this) == 0)
- StartIdx0 = ConstantInt::get(IntStepTy, 0);
- else {
- StartIdx0 = State.get(getOperand(2), true);
- if (getUnrollPart(*this) != 1) {
- StartIdx0 =
- Builder.CreateMul(StartIdx0, ConstantInt::get(StartIdx0->getType(),
- getUnrollPart(*this)));
- }
- StartIdx0 = Builder.CreateSExtOrTrunc(StartIdx0, IntStepTy);
- }
-
- if (BaseIVTy->isFloatingPointTy())
- StartIdx0 = Builder.CreateSIToFP(StartIdx0, BaseIVTy);
+ if (getNumOperands() == 3) {
+ StartIdx0 = getSignedIntOrFpConstant(BaseIVTy, 0);
+ } else
+ StartIdx0 = State.get(getOperand(3), true);
for (unsigned Lane = StartLane; Lane < EndLane; ++Lane) {
Value *StartIdx = Builder.CreateBinOp(
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index a59c8cf9ea1ef..8432cf210807d 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1459,7 +1459,9 @@ static void simplifyRecipe(VPSingleDefRecipe *Def, VPTypeAnalysis &TypeInfo) {
// VPScalarIVSteps for part 0 can be replaced by their start value, if only
// the first lane is demanded.
if (auto *Steps = dyn_cast<VPScalarIVStepsRecipe>(Def)) {
- if (Steps->isPart0() && vputils::onlyFirstLaneUsed(Steps)) {
+ if ((Steps->getNumOperands() == 3 ||
+ match(Steps->getOperand(3), m_ZeroInt())) &&
+ vputils::onlyFirstLaneUsed(Steps)) {
Steps->replaceAllUsesWith(Steps->getOperand(0));
return;
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
index f215476b1e163..a1b6440a67c4e 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
@@ -53,6 +53,9 @@ class UnrollState {
/// Unroll replicate region \p VPR by cloning the region UF - 1 times.
void unrollReplicateRegionByUF(VPRegionBlock *VPR);
+ /// Add a start index operand to \p Steps for \p Part.
+ void addStartIndexForScalarSteps(VPScalarIVStepsRecipe *Steps, unsigned Part);
+
/// Unroll recipe \p R by cloning it UF - 1 times, unless it is uniform across
/// all parts.
void unrollRecipeByUF(VPRecipeBase &R);
@@ -123,6 +126,33 @@ class UnrollState {
};
} // namespace
+void UnrollState::addStartIndexForScalarSteps(VPScalarIVStepsRecipe *Steps,
+ unsigned Part) {
+ if (Part == 0) {
+ Steps->addOperand(getConstantInt(Part));
+ return;
+ }
+
+ VPBuilder Builder(Steps);
+ Type *BaseIVTy = TypeInfo.inferScalarType(Steps->getOperand(0));
+ Type *IntStepTy =
+ IntegerType::get(BaseIVTy->getContext(), BaseIVTy->getScalarSizeInBits());
+ VPValue *StartIdx0 = Steps->getOperand(2);
+ StartIdx0 = Builder.createOverflowingOp(
+ Instruction::Mul,
+ {StartIdx0,
+ Plan.getConstantInt(TypeInfo.inferScalarType(StartIdx0), Part)});
+ StartIdx0 = Builder.createScalarSExtOrTrunc(
+ StartIdx0, IntStepTy, TypeInfo.inferScalarType(StartIdx0),
+ DebugLoc::getUnknown());
+
+ if (BaseIVTy->isFloatingPointTy())
+ StartIdx0 = Builder.createScalarCast(Instruction::SIToFP, StartIdx0,
+ BaseIVTy, DebugLoc::getUnknown());
+
+ Steps->addOperand(StartIdx0);
+}
+
void UnrollState::unrollReplicateRegionByUF(VPRegionBlock *VPR) {
VPBlockBase *InsertPt = VPR->getSingleSuccessor();
for (unsigned Part = 1; Part != UF; ++Part) {
@@ -136,9 +166,8 @@ void UnrollState::unrollReplicateRegionByUF(VPRegionBlock *VPR) {
VPBlockUtils::blocksOnly<VPBasicBlock>(Part0))) {
for (const auto &[PartIR, Part0R] : zip(*PartIVPBB, *Part0VPBB)) {
remapOperands(&PartIR, Part);
- if (auto *ScalarIVSteps = dyn_cast<VPScalarIVStepsRecipe>(&PartIR)) {
- ScalarIVSteps->addOperand(getConstantInt(Part));
- }
+ if (auto *Steps = dyn_cast<VPScalarIVStepsRecipe>(&PartIR))
+ addStartIndexForScalarSteps(Steps, Part);
addRecipeForPart(&Part0R, &PartIR, Part);
}
@@ -311,10 +340,13 @@ void UnrollState::unrollRecipeByUF(VPRecipeBase &R) {
}
remapOperands(Copy, Part);
+ if (auto *ScalarIVSteps = dyn_cast<VPScalarIVStepsRecipe>(Copy))
+ addStartIndexForScalarSteps(ScalarIVSteps, Part);
+
// Add operand indicating the part to generate code for, to recipes still
// requiring it.
- if (isa<VPScalarIVStepsRecipe, VPWidenCanonicalIVRecipe,
- VPVectorPointerRecipe, VPVectorEndPointerRecipe>(Copy) ||
+ if (isa<VPWidenCanonicalIVRecipe, VPVectorPointerRecipe,
+ VPVectorEndPointerRecipe>(Copy) ||
match(Copy,
m_VPInstruction<VPInstruction::CanonicalIVIncrementForPart>()))
Copy->addOperand(getConstantInt(Part));
>From affdde996634acb0e5de8399beb96b7035a01bd8 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Tue, 25 Nov 2025 15:07:11 +0000
Subject: [PATCH 2/3] [VPlan] Replicate VPScalarIVStepsRecipe by VF outside
replicate regions.
Extend replicateByVF to also handle VPScalarIVStepsRecipe. To do so, the
patch adds a new lane operand to VPScalarIVStepsRecipe, which is only
added when replicating. This enables removing a number of lane 0
computations. The lane operand will also be used to explicitly replicate
replicate regions in a follow-up.
Depends on https://github.com/llvm/llvm-project/pull/169796 (included in
PR).
---
llvm/lib/Transforms/Vectorize/VPlan.h | 5 +-
.../lib/Transforms/Vectorize/VPlanRecipes.cpp | 16 +-
.../Transforms/Vectorize/VPlanTransforms.cpp | 6 +-
llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp | 41 +-
.../AArch64/fold-tail-low-trip-count.ll | 29 +-
.../gather-do-not-vectorize-addressing.ll | 7 +-
...interleave-allocsize-not-equal-typesize.ll | 27 +-
.../AArch64/interleave-with-gaps.ll | 23 +-
.../partial-reduce-dot-product-neon.ll | 285 +++--
.../AArch64/partial-reduce-dot-product.ll | 285 +++--
.../AArch64/pr60831-sve-inv-store-crash.ll | 20 +-
.../AArch64/replicating-load-store-costs.ll | 82 +-
...nterleave-to-widen-memory-with-wide-ops.ll | 14 +-
.../AArch64/type-shrinkage-insertelt.ll | 76 +-
.../AArch64/uniform-args-call-variants.ll | 6 +-
.../widen-call-with-intrinsic-or-libfunc.ll | 5 +-
.../RISCV/riscv-vector-reverse.ll | 9 +-
.../LoopVectorize/RISCV/uniform-load-store.ll | 3 -
.../predicated-first-order-recurrence.ll | 7 +-
...demanding-all-lanes-and-first-lane-only.ll | 7 +-
.../X86/consecutive-ptr-uniforms.ll | 5 +-
.../LoopVectorize/X86/cost-model.ll | 13 +-
.../X86/drop-poison-generating-flags.ll | 9 +-
.../X86/fixed-order-recurrence.ll | 3 +-
.../LoopVectorize/X86/gather_scatter.ll | 38 +-
.../LoopVectorize/X86/induction-costs.ll | 3 +-
...terleave-ptradd-with-replicated-operand.ll | 3 +-
...leaved-accesses-hoist-load-across-store.ll | 141 ++-
.../LoopVectorize/X86/load-deref-pred.ll | 104 +-
.../Transforms/LoopVectorize/X86/pr36524.ll | 3 -
...6-sunk-instruction-used-outside-of-loop.ll | 5 +-
.../Transforms/LoopVectorize/X86/pr72969.ll | 3 +-
.../X86/replicating-load-store-costs.ll | 33 +-
.../LoopVectorize/X86/strided_load_cost.ll | 62 +-
.../LoopVectorize/X86/uniform_mem_op.ll | 3 -
.../X86/vplan-native-inner-loop-only.ll | 3 +-
.../X86/x86_fp80-vector-store.ll | 8 +-
.../LoopVectorize/consecutive-ptr-uniforms.ll | 36 +-
.../Transforms/LoopVectorize/cse-casts.ll | 7 +-
.../test/Transforms/LoopVectorize/debugloc.ll | 7 +-
.../epilog-vectorization-any-of-reductions.ll | 6 +-
...irst-order-recurrence-dead-instructions.ll | 3 +-
.../first-order-recurrence-tail-folding.ll | 48 +-
.../LoopVectorize/first-order-recurrence.ll | 46 +-
...predicated-loads-with-predicated-stores.ll | 156 +--
.../LoopVectorize/hoist-predicated-loads.ll | 113 +-
...ction-multiple-uses-in-same-instruction.ll | 6 +-
.../LoopVectorize/induction-ptrcasts.ll | 3 +-
.../Transforms/LoopVectorize/induction.ll | 52 +-
.../interleaved-accesses-metadata.ll | 11 +-
.../LoopVectorize/iv_outside_user.ll | 6 +-
.../Transforms/LoopVectorize/lcssa-crashes.ll | 3 +-
.../LoopVectorize/load-deref-pred-neg-off.ll | 7 +-
.../Transforms/LoopVectorize/loop-form.ll | 3 +-
.../loop-with-constant-exit-condition.ll | 3 +-
.../LoopVectorize/narrow-to-single-scalar.ll | 10 +-
.../LoopVectorize/operand-bundles.ll | 7 +-
.../optimal-epilog-vectorization.ll | 6 +-
...ction-index-width-smaller-than-iv-width.ll | 6 +-
.../LoopVectorize/pointer-induction.ll | 13 +-
.../LoopVectorize/predicate-switch.ll | 6 +-
.../LoopVectorize/reduction-inloop.ll | 38 +-
.../reduction-with-invariant-store.ll | 6 +-
.../Transforms/LoopVectorize/uniform-blend.ll | 7 +-
.../uniform_across_vf_induction1.ll | 363 +++---
.../uniform_across_vf_induction1_and.ll | 207 ++--
.../uniform_across_vf_induction1_div_urem.ll | 81 +-
.../uniform_across_vf_induction1_lshr.ll | 498 ++++----
.../uniform_across_vf_induction2.ll | 1032 ++++++++---------
.../use-scalar-epilogue-if-tp-fails.ll | 6 +-
.../LoopVectorize/vect-phiscev-sext-trunc.ll | 7 +-
.../LoopVectorize/version-mem-access.ll | 3 +-
.../version-stride-with-integer-casts.ll | 6 +-
.../LoopVectorize/vplan-predicate-switch.ll | 8 +-
74 files changed, 2046 insertions(+), 2172 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 24a1d75b4d7fb..9688b46e3956d 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -3809,10 +3809,13 @@ class LLVM_ABI_FOR_TEST VPScalarIVStepsRecipe : public VPRecipeWithIRFlags {
~VPScalarIVStepsRecipe() override = default;
VPScalarIVStepsRecipe *clone() override {
- return new VPScalarIVStepsRecipe(
+ auto *NewR = new VPScalarIVStepsRecipe(
getOperand(0), getOperand(1), getOperand(2), InductionOpcode,
hasFastMathFlags() ? getFastMathFlags() : FastMathFlags(),
getDebugLoc());
+ if (getNumOperands() == 4)
+ NewR->addOperand(getOperand(3));
+ return NewR;
}
VP_CLASSOF_IMPL(VPDef::VPScalarIVStepsSC)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 47baa0f54c8ff..791124c8c6624 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -2388,14 +2388,17 @@ void VPScalarIVStepsRecipe::execute(VPTransformState &State) {
EndLane = StartLane + 1;
}
Value *StartIdx0;
- if (getNumOperands() == 3) {
+ if (getNumOperands() == 3)
StartIdx0 = getSignedIntOrFpConstant(BaseIVTy, 0);
- } else
+ else
StartIdx0 = State.get(getOperand(3), true);
for (unsigned Lane = StartLane; Lane < EndLane; ++Lane) {
- Value *StartIdx = Builder.CreateBinOp(
- AddOp, StartIdx0, getSignedIntOrFpConstant(BaseIVTy, Lane));
+ Value *StartIdx = StartIdx0;
+ if (Lane != 0) {
+ StartIdx = Builder.CreateBinOp(AddOp, StartIdx0,
+ getSignedIntOrFpConstant(BaseIVTy, Lane));
+ }
// The step returned by `createStepForVF` is a runtime-evaluated value
// when VF is scalable. Otherwise, it should be folded into a Constant.
assert((State.VF.isScalable() || isa<Constant>(StartIdx)) &&
@@ -2403,7 +2406,10 @@ void VPScalarIVStepsRecipe::execute(VPTransformState &State) {
"scalable");
auto *Mul = Builder.CreateBinOp(MulOp, StartIdx, Step);
auto *Add = Builder.CreateBinOp(AddOp, BaseIV, Mul);
- State.set(this, Add, VPLane(Lane));
+ if (State.Lane)
+ State.set(this, Add, VPLane(Lane));
+ else
+ State.set(this, Add, VPLane(0));
}
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 8432cf210807d..660595915b239 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1457,7 +1457,7 @@ static void simplifyRecipe(VPSingleDefRecipe *Def, VPTypeAnalysis &TypeInfo) {
}
// VPScalarIVSteps for part 0 can be replaced by their start value, if only
- // the first lane is demanded.
+ // the first lane is demanded and both Lane and UnrollPart operands are 0.
if (auto *Steps = dyn_cast<VPScalarIVStepsRecipe>(Def)) {
if ((Steps->getNumOperands() == 3 ||
match(Steps->getOperand(3), m_ZeroInt())) &&
@@ -4436,9 +4436,9 @@ void VPlanTransforms::materializePacksAndUnpacks(VPlan &Plan) {
for (VPBasicBlock *VPBB :
concat<VPBasicBlock *>(VPBBsOutsideLoopRegion, VPBBsInsideLoopRegion)) {
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
- if (!isa<VPReplicateRecipe, VPInstruction>(&R))
+ if (!isa<VPScalarIVStepsRecipe, VPReplicateRecipe, VPInstruction>(&R))
continue;
- auto *DefR = cast<VPRecipeWithIRFlags>(&R);
+ auto *DefR = cast<VPSingleDefRecipe>(&R);
auto UsesVectorOrInsideReplicateRegion = [DefR, LoopRegion](VPUser *U) {
VPRegionBlock *ParentRegion = cast<VPRecipeBase>(U)->getRegion();
return !U->usesScalars(DefR) || ParentRegion != LoopRegion;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
index a1b6440a67c4e..6fa5562a6a976 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
@@ -126,17 +126,18 @@ class UnrollState {
};
} // namespace
-void UnrollState::addStartIndexForScalarSteps(VPScalarIVStepsRecipe *Steps,
- unsigned Part) {
+static void addStartIndexForScalarSteps(VPScalarIVStepsRecipe *Steps,
+ unsigned Part, VPlan &Plan,
+ VPTypeAnalysis &TypeInfo) {
+ Type *BaseIVTy = TypeInfo.inferScalarType(Steps->getOperand(0));
+ Type *IntStepTy =
+ IntegerType::get(BaseIVTy->getContext(), BaseIVTy->getScalarSizeInBits());
if (Part == 0) {
- Steps->addOperand(getConstantInt(Part));
+ Steps->addOperand(Plan.getConstantInt(IntStepTy, 0));
return;
}
VPBuilder Builder(Steps);
- Type *BaseIVTy = TypeInfo.inferScalarType(Steps->getOperand(0));
- Type *IntStepTy =
- IntegerType::get(BaseIVTy->getContext(), BaseIVTy->getScalarSizeInBits());
VPValue *StartIdx0 = Steps->getOperand(2);
StartIdx0 = Builder.createOverflowingOp(
Instruction::Mul,
@@ -153,6 +154,11 @@ void UnrollState::addStartIndexForScalarSteps(VPScalarIVStepsRecipe *Steps,
Steps->addOperand(StartIdx0);
}
+void UnrollState::addStartIndexForScalarSteps(VPScalarIVStepsRecipe *Steps,
+ unsigned Part) {
+ return ::addStartIndexForScalarSteps(Steps, Part, Plan, TypeInfo);
+}
+
void UnrollState::unrollReplicateRegionByUF(VPRegionBlock *VPR) {
VPBlockBase *InsertPt = VPR->getSingleSuccessor();
for (unsigned Part = 1; Part != UF; ++Part) {
@@ -558,12 +564,28 @@ cloneForLane(VPlan &Plan, VPBuilder &Builder, Type *IdxTy,
/*IsSingleScalar=*/true, /*Mask=*/nullptr,
*RepR, *RepR, RepR->getDebugLoc());
} else {
- assert(isa<VPInstruction>(DefR) &&
+ assert((isa<VPInstruction, VPScalarIVStepsRecipe>(DefR)) &&
"DefR must be a VPReplicateRecipe or VPInstruction");
New = DefR->clone();
for (const auto &[Idx, Op] : enumerate(NewOps)) {
New->setOperand(Idx, Op);
}
+ if (isa<VPScalarIVStepsRecipe>(New)) {
+ VPTypeAnalysis TypeInfo(Plan);
+ if (New->getNumOperands() == 3)
+ addStartIndexForScalarSteps(cast<VPScalarIVStepsRecipe>(New), 0, Plan,
+ TypeInfo);
+
+ if (Lane.getKnownLane() != 0) {
+ Type *BaseIVTy = TypeInfo.inferScalarType(DefR->getOperand(0));
+ VPBuilder Builder(DefR);
+ New->setOperand(
+ 3, Builder.createNaryOp(
+ Instruction::Add,
+ {New->getOperand(3),
+ Plan.getConstantInt(BaseIVTy, Lane.getKnownLane())}));
+ }
+ }
}
New->insertBefore(DefR);
return New;
@@ -590,7 +612,7 @@ void VPlanTransforms::replicateByVF(VPlan &Plan, ElementCount VF) {
SmallVector<VPRecipeBase *> ToRemove;
for (VPBasicBlock *VPBB : VPBBsToUnroll) {
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
- if (!isa<VPInstruction, VPReplicateRecipe>(&R) ||
+ if (!isa<VPInstruction, VPReplicateRecipe, VPScalarIVStepsRecipe>(&R) ||
(isa<VPReplicateRecipe>(&R) &&
cast<VPReplicateRecipe>(&R)->isSingleScalar()) ||
(isa<VPInstruction>(&R) &&
@@ -598,6 +620,9 @@ void VPlanTransforms::replicateByVF(VPlan &Plan, ElementCount VF) {
cast<VPInstruction>(&R)->getOpcode() != VPInstruction::Unpack))
continue;
+ if (isa<VPScalarIVStepsRecipe>(&R) && Plan.hasScalarVFOnly())
+ continue;
+
auto *DefR = cast<VPSingleDefRecipe>(&R);
VPBuilder Builder(DefR);
if (DefR->getNumUsers() == 0) {
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/fold-tail-low-trip-count.ll b/llvm/test/Transforms/LoopVectorize/AArch64/fold-tail-low-trip-count.ll
index 14f5dd7d41691..46fc9646356c8 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/fold-tail-low-trip-count.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/fold-tail-low-trip-count.ll
@@ -16,11 +16,10 @@ define void @low_trip_count_small(i32 %x, ptr %dst) {
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[DST]], i64 0
; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[DST]], i64 1
; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[DST]], i64 2
; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[DST]], i64 3
-; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x ptr> poison, ptr [[NEXT_GEP]], i32 0
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x ptr> poison, ptr [[DST]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x ptr> [[TMP1]], ptr [[NEXT_GEP2]], i32 1
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x ptr> [[TMP2]], ptr [[NEXT_GEP3]], i32 2
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x ptr> [[TMP3]], ptr [[NEXT_GEP4]], i32 3
@@ -28,31 +27,31 @@ define void @low_trip_count_small(i32 %x, ptr %dst) {
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP5]], i32 0
; CHECK-NEXT: br i1 [[TMP6]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; CHECK: [[PRED_STORE_IF]]:
-; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 1
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[DST]], i64 1
; CHECK-NEXT: store i8 0, ptr [[TMP7]], align 1
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
; CHECK: [[PRED_STORE_CONTINUE]]:
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP5]], i32 1
-; CHECK-NEXT: br i1 [[TMP8]], label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6:.*]]
-; CHECK: [[PRED_STORE_IF5]]:
+; CHECK-NEXT: br i1 [[TMP8]], label %[[PRED_STORE_IF4:.*]], label %[[PRED_STORE_CONTINUE5:.*]]
+; CHECK: [[PRED_STORE_IF4]]:
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[NEXT_GEP2]], i64 1
; CHECK-NEXT: store i8 0, ptr [[TMP9]], align 1
-; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE6]]
-; CHECK: [[PRED_STORE_CONTINUE6]]:
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE5]]
+; CHECK: [[PRED_STORE_CONTINUE5]]:
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP5]], i32 2
-; CHECK-NEXT: br i1 [[TMP10]], label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]]
-; CHECK: [[PRED_STORE_IF7]]:
+; CHECK-NEXT: br i1 [[TMP10]], label %[[PRED_STORE_IF6:.*]], label %[[PRED_STORE_CONTINUE7:.*]]
+; CHECK: [[PRED_STORE_IF6]]:
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[NEXT_GEP3]], i64 1
; CHECK-NEXT: store i8 0, ptr [[TMP11]], align 1
-; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE8]]
-; CHECK: [[PRED_STORE_CONTINUE8]]:
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE7]]
+; CHECK: [[PRED_STORE_CONTINUE7]]:
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i1> [[TMP5]], i32 3
-; CHECK-NEXT: br i1 [[TMP12]], label %[[PRED_STORE_IF9:.*]], label %[[PRED_STORE_CONTINUE10:.*]]
-; CHECK: [[PRED_STORE_IF9]]:
+; CHECK-NEXT: br i1 [[TMP12]], label %[[PRED_STORE_IF8:.*]], label %[[PRED_STORE_CONTINUE9:.*]]
+; CHECK: [[PRED_STORE_IF8]]:
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[NEXT_GEP4]], i64 1
; CHECK-NEXT: store i8 0, ptr [[TMP13]], align 1
-; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE10]]
-; CHECK: [[PRED_STORE_CONTINUE10]]:
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE9]]
+; CHECK: [[PRED_STORE_CONTINUE9]]:
; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br label %[[EXIT:.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/gather-do-not-vectorize-addressing.ll b/llvm/test/Transforms/LoopVectorize/AArch64/gather-do-not-vectorize-addressing.ll
index 26a9545764091..33be739be4718 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/gather-do-not-vectorize-addressing.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/gather-do-not-vectorize-addressing.ll
@@ -20,10 +20,9 @@ define dso_local double @test(ptr nocapture noundef readonly %data, ptr nocaptur
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x double> [ <double 0.000000e+00, double -0.000000e+00>, [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
-; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[OFFSET:%.*]], i64 [[TMP0]]
-; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[OFFSET]], i64 [[TMP1]]
+; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[OFFSET:%.*]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[OFFSET]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4
; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 4
; CHECK-NEXT: [[TMP6:%.*]] = sext i32 [[TMP4]] to i64
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/interleave-allocsize-not-equal-typesize.ll b/llvm/test/Transforms/LoopVectorize/AArch64/interleave-allocsize-not-equal-typesize.ll
index 2557ae55d2c85..b396d584a8497 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/interleave-allocsize-not-equal-typesize.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/interleave-allocsize-not-equal-typesize.ll
@@ -21,31 +21,30 @@ define void @pr58722_load_interleave_group(ptr %src, ptr %dst) {
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
-; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
-; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
+; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2
+; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 3
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[TMP1]]
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[TMP2]]
-; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[TMP3]]
-; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, ptr [[TMP4]], align 4
+; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, ptr [[TMP7]], align 4, !alias.scope [[META0:![0-9]+]]
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 1
-; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i64 1
-; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 1
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i64 1
-; CHECK-NEXT: [[TMP13:%.*]] = load i24, ptr [[TMP9]], align 4, !alias.scope [[META0:![0-9]+]]
-; CHECK-NEXT: [[TMP14:%.*]] = load i24, ptr [[TMP10]], align 4, !alias.scope [[META0]]
-; CHECK-NEXT: [[TMP15:%.*]] = load i24, ptr [[TMP11]], align 4, !alias.scope [[META0]]
-; CHECK-NEXT: [[TMP16:%.*]] = load i24, ptr [[TMP12]], align 4, !alias.scope [[META0]]
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 1
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i64 1
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 1
+; CHECK-NEXT: [[TMP13:%.*]] = load i24, ptr [[TMP12]], align 4, !alias.scope [[META0]]
+; CHECK-NEXT: [[TMP14:%.*]] = load i24, ptr [[TMP8]], align 4, !alias.scope [[META0]]
+; CHECK-NEXT: [[TMP15:%.*]] = load i24, ptr [[TMP9]], align 4, !alias.scope [[META0]]
+; CHECK-NEXT: [[TMP16:%.*]] = load i24, ptr [[TMP10]], align 4, !alias.scope [[META0]]
; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x i24> poison, i24 [[TMP13]], i32 0
; CHECK-NEXT: [[TMP18:%.*]] = insertelement <4 x i24> [[TMP17]], i24 [[TMP14]], i32 1
; CHECK-NEXT: [[TMP19:%.*]] = insertelement <4 x i24> [[TMP18]], i24 [[TMP15]], i32 2
; CHECK-NEXT: [[TMP20:%.*]] = insertelement <4 x i24> [[TMP19]], i24 [[TMP16]], i32 3
; CHECK-NEXT: [[TMP21:%.*]] = zext <4 x i24> [[TMP20]] to <4 x i32>
; CHECK-NEXT: [[TMP22:%.*]] = add <4 x i32> [[STRIDED_VEC]], [[TMP21]]
-; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[INDEX]]
; CHECK-NEXT: store <4 x i32> [[TMP22]], ptr [[TMP23]], align 4, !alias.scope [[META3:![0-9]+]], !noalias [[META0]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/interleave-with-gaps.ll b/llvm/test/Transforms/LoopVectorize/AArch64/interleave-with-gaps.ll
index 5b4bb70e6a479..2108c15b7f838 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/interleave-with-gaps.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/interleave-with-gaps.ll
@@ -174,11 +174,10 @@ define void @main_vector_loop_fixed_single_vector_iteration_with_runtime_checks(
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
-; CHECK-NEXT: [[IV:%.*]] = add i64 [[OFFSET_IDX]], 0
-; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2
-; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4
-; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6
-; CHECK-NEXT: [[GEP_J:%.*]] = getelementptr i64, ptr [[J]], i64 [[IV]]
+; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 2
+; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 4
+; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 6
+; CHECK-NEXT: [[GEP_J:%.*]] = getelementptr i64, ptr [[J]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i64>, ptr [[GEP_J]], align 8
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i64> [[WIDE_VEC]], <8 x i64> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
; CHECK-NEXT: [[TMP5:%.*]] = trunc <4 x i64> [[STRIDED_VEC]] to <4 x i16>
@@ -186,14 +185,14 @@ define void @main_vector_loop_fixed_single_vector_iteration_with_runtime_checks(
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i16> [[TMP5]], i32 1
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i16> [[TMP5]], i32 2
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i16> [[TMP5]], i32 3
-; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i16, ptr [[K]], i64 [[IV]]
-; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i16, ptr [[K]], i64 [[TMP1]]
-; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i16, ptr [[K]], i64 [[TMP2]]
-; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i16, ptr [[K]], i64 [[TMP3]]
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i16, ptr [[K]], i64 [[OFFSET_IDX]]
+; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i16, ptr [[K]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i16, ptr [[K]], i64 [[TMP1]]
+; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i16, ptr [[K]], i64 [[TMP2]]
; CHECK-NEXT: store i16 [[TMP10]], ptr [[TMP6]], align 2
-; CHECK-NEXT: store i16 [[TMP11]], ptr [[TMP7]], align 2
-; CHECK-NEXT: store i16 [[TMP12]], ptr [[TMP8]], align 2
-; CHECK-NEXT: store i16 [[TMP13]], ptr [[TMP9]], align 2
+; CHECK-NEXT: store i16 [[TMP11]], ptr [[TMP15]], align 2
+; CHECK-NEXT: store i16 [[TMP12]], ptr [[TMP16]], align 2
+; CHECK-NEXT: store i16 [[TMP13]], ptr [[TMP17]], align 2
; CHECK-NEXT: store i64 0, ptr [[A]], align 8
; CHECK-NEXT: store i64 0, ptr [[B]], align 8
; CHECK-NEXT: store i64 0, ptr [[C]], align 8
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-neon.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-neon.ll
index b2be0e1d7a442..3b6405fac3050 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-neon.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-neon.ll
@@ -127,57 +127,56 @@ define i32 @not_dotp_different_types(ptr %a, ptr %b) {
; CHECK-INTERLEAVE1: vector.body:
; CHECK-INTERLEAVE1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-INTERLEAVE1-NEXT: [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP69:%.*]], [[VECTOR_BODY]] ]
-; CHECK-INTERLEAVE1-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
-; CHECK-INTERLEAVE1-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
-; CHECK-INTERLEAVE1-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
-; CHECK-INTERLEAVE1-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
-; CHECK-INTERLEAVE1-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 4
-; CHECK-INTERLEAVE1-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 5
-; CHECK-INTERLEAVE1-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 6
-; CHECK-INTERLEAVE1-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 7
-; CHECK-INTERLEAVE1-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 8
-; CHECK-INTERLEAVE1-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 9
-; CHECK-INTERLEAVE1-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 10
-; CHECK-INTERLEAVE1-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 11
-; CHECK-INTERLEAVE1-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 12
-; CHECK-INTERLEAVE1-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 13
-; CHECK-INTERLEAVE1-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 14
-; CHECK-INTERLEAVE1-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 15
-; CHECK-INTERLEAVE1-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP0]]
+; CHECK-INTERLEAVE1-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 1
+; CHECK-INTERLEAVE1-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2
+; CHECK-INTERLEAVE1-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 3
+; CHECK-INTERLEAVE1-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 4
+; CHECK-INTERLEAVE1-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 5
+; CHECK-INTERLEAVE1-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 6
+; CHECK-INTERLEAVE1-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 7
+; CHECK-INTERLEAVE1-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 8
+; CHECK-INTERLEAVE1-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 9
+; CHECK-INTERLEAVE1-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 10
+; CHECK-INTERLEAVE1-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 11
+; CHECK-INTERLEAVE1-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 12
+; CHECK-INTERLEAVE1-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 13
+; CHECK-INTERLEAVE1-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 14
+; CHECK-INTERLEAVE1-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 15
+; CHECK-INTERLEAVE1-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]]
; CHECK-INTERLEAVE1-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP16]], align 1
; CHECK-INTERLEAVE1-NEXT: [[TMP18:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32>
-; CHECK-INTERLEAVE1-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP0]]
-; CHECK-INTERLEAVE1-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]]
-; CHECK-INTERLEAVE1-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP2]]
-; CHECK-INTERLEAVE1-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP3]]
-; CHECK-INTERLEAVE1-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP4]]
-; CHECK-INTERLEAVE1-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP5]]
-; CHECK-INTERLEAVE1-NEXT: [[TMP25:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP6]]
-; CHECK-INTERLEAVE1-NEXT: [[TMP26:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP7]]
-; CHECK-INTERLEAVE1-NEXT: [[TMP27:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP8]]
-; CHECK-INTERLEAVE1-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP9]]
-; CHECK-INTERLEAVE1-NEXT: [[TMP29:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP10]]
-; CHECK-INTERLEAVE1-NEXT: [[TMP30:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP11]]
-; CHECK-INTERLEAVE1-NEXT: [[TMP31:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP12]]
-; CHECK-INTERLEAVE1-NEXT: [[TMP32:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP13]]
-; CHECK-INTERLEAVE1-NEXT: [[TMP33:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP14]]
-; CHECK-INTERLEAVE1-NEXT: [[TMP34:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP15]]
+; CHECK-INTERLEAVE1-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]]
+; CHECK-INTERLEAVE1-NEXT: [[TMP33:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP0]]
+; CHECK-INTERLEAVE1-NEXT: [[TMP34:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]]
+; CHECK-INTERLEAVE1-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP2]]
+; CHECK-INTERLEAVE1-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP3]]
+; CHECK-INTERLEAVE1-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP4]]
+; CHECK-INTERLEAVE1-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP5]]
+; CHECK-INTERLEAVE1-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP6]]
+; CHECK-INTERLEAVE1-NEXT: [[TMP25:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP7]]
+; CHECK-INTERLEAVE1-NEXT: [[TMP26:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP8]]
+; CHECK-INTERLEAVE1-NEXT: [[TMP27:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP9]]
+; CHECK-INTERLEAVE1-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP10]]
+; CHECK-INTERLEAVE1-NEXT: [[TMP29:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP11]]
+; CHECK-INTERLEAVE1-NEXT: [[TMP30:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP12]]
+; CHECK-INTERLEAVE1-NEXT: [[TMP31:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP13]]
+; CHECK-INTERLEAVE1-NEXT: [[TMP32:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP14]]
; CHECK-INTERLEAVE1-NEXT: [[TMP35:%.*]] = load i16, ptr [[TMP19]], align 2
-; CHECK-INTERLEAVE1-NEXT: [[TMP36:%.*]] = load i16, ptr [[TMP20]], align 2
-; CHECK-INTERLEAVE1-NEXT: [[TMP37:%.*]] = load i16, ptr [[TMP21]], align 2
-; CHECK-INTERLEAVE1-NEXT: [[TMP38:%.*]] = load i16, ptr [[TMP22]], align 2
-; CHECK-INTERLEAVE1-NEXT: [[TMP39:%.*]] = load i16, ptr [[TMP23]], align 2
-; CHECK-INTERLEAVE1-NEXT: [[TMP40:%.*]] = load i16, ptr [[TMP24]], align 2
-; CHECK-INTERLEAVE1-NEXT: [[TMP41:%.*]] = load i16, ptr [[TMP25]], align 2
-; CHECK-INTERLEAVE1-NEXT: [[TMP42:%.*]] = load i16, ptr [[TMP26]], align 2
-; CHECK-INTERLEAVE1-NEXT: [[TMP43:%.*]] = load i16, ptr [[TMP27]], align 2
-; CHECK-INTERLEAVE1-NEXT: [[TMP44:%.*]] = load i16, ptr [[TMP28]], align 2
-; CHECK-INTERLEAVE1-NEXT: [[TMP45:%.*]] = load i16, ptr [[TMP29]], align 2
-; CHECK-INTERLEAVE1-NEXT: [[TMP46:%.*]] = load i16, ptr [[TMP30]], align 2
-; CHECK-INTERLEAVE1-NEXT: [[TMP47:%.*]] = load i16, ptr [[TMP31]], align 2
-; CHECK-INTERLEAVE1-NEXT: [[TMP48:%.*]] = load i16, ptr [[TMP32]], align 2
-; CHECK-INTERLEAVE1-NEXT: [[TMP49:%.*]] = load i16, ptr [[TMP33]], align 2
-; CHECK-INTERLEAVE1-NEXT: [[TMP50:%.*]] = load i16, ptr [[TMP34]], align 2
+; CHECK-INTERLEAVE1-NEXT: [[TMP36:%.*]] = load i16, ptr [[TMP33]], align 2
+; CHECK-INTERLEAVE1-NEXT: [[TMP37:%.*]] = load i16, ptr [[TMP34]], align 2
+; CHECK-INTERLEAVE1-NEXT: [[TMP38:%.*]] = load i16, ptr [[TMP20]], align 2
+; CHECK-INTERLEAVE1-NEXT: [[TMP39:%.*]] = load i16, ptr [[TMP21]], align 2
+; CHECK-INTERLEAVE1-NEXT: [[TMP40:%.*]] = load i16, ptr [[TMP22]], align 2
+; CHECK-INTERLEAVE1-NEXT: [[TMP41:%.*]] = load i16, ptr [[TMP23]], align 2
+; CHECK-INTERLEAVE1-NEXT: [[TMP42:%.*]] = load i16, ptr [[TMP24]], align 2
+; CHECK-INTERLEAVE1-NEXT: [[TMP43:%.*]] = load i16, ptr [[TMP25]], align 2
+; CHECK-INTERLEAVE1-NEXT: [[TMP44:%.*]] = load i16, ptr [[TMP26]], align 2
+; CHECK-INTERLEAVE1-NEXT: [[TMP45:%.*]] = load i16, ptr [[TMP27]], align 2
+; CHECK-INTERLEAVE1-NEXT: [[TMP46:%.*]] = load i16, ptr [[TMP28]], align 2
+; CHECK-INTERLEAVE1-NEXT: [[TMP47:%.*]] = load i16, ptr [[TMP29]], align 2
+; CHECK-INTERLEAVE1-NEXT: [[TMP48:%.*]] = load i16, ptr [[TMP30]], align 2
+; CHECK-INTERLEAVE1-NEXT: [[TMP49:%.*]] = load i16, ptr [[TMP31]], align 2
+; CHECK-INTERLEAVE1-NEXT: [[TMP50:%.*]] = load i16, ptr [[TMP32]], align 2
; CHECK-INTERLEAVE1-NEXT: [[TMP51:%.*]] = insertelement <16 x i16> poison, i16 [[TMP35]], i32 0
; CHECK-INTERLEAVE1-NEXT: [[TMP52:%.*]] = insertelement <16 x i16> [[TMP51]], i16 [[TMP36]], i32 1
; CHECK-INTERLEAVE1-NEXT: [[TMP53:%.*]] = insertelement <16 x i16> [[TMP52]], i16 [[TMP37]], i32 2
@@ -216,22 +215,21 @@ define i32 @not_dotp_different_types(ptr %a, ptr %b) {
; CHECK-INTERLEAVED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP137:%.*]], [[VECTOR_BODY]] ]
; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP138:%.*]], [[VECTOR_BODY]] ]
-; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
-; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
-; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
-; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
-; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 4
-; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 5
-; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 6
-; CHECK-INTERLEAVED-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 7
-; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 8
-; CHECK-INTERLEAVED-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 9
-; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 10
-; CHECK-INTERLEAVED-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 11
-; CHECK-INTERLEAVED-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 12
-; CHECK-INTERLEAVED-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 13
-; CHECK-INTERLEAVED-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 14
-; CHECK-INTERLEAVED-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 15
+; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 1
+; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2
+; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 3
+; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 4
+; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 5
+; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 6
+; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 7
+; CHECK-INTERLEAVED-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 8
+; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 9
+; CHECK-INTERLEAVED-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 10
+; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 11
+; CHECK-INTERLEAVED-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 12
+; CHECK-INTERLEAVED-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 13
+; CHECK-INTERLEAVED-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 14
+; CHECK-INTERLEAVED-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 15
; CHECK-INTERLEAVED-NEXT: [[TMP16:%.*]] = add i64 [[INDEX]], 16
; CHECK-INTERLEAVED-NEXT: [[TMP17:%.*]] = add i64 [[INDEX]], 17
; CHECK-INTERLEAVED-NEXT: [[TMP18:%.*]] = add i64 [[INDEX]], 18
@@ -248,28 +246,28 @@ define i32 @not_dotp_different_types(ptr %a, ptr %b) {
; CHECK-INTERLEAVED-NEXT: [[TMP29:%.*]] = add i64 [[INDEX]], 29
; CHECK-INTERLEAVED-NEXT: [[TMP30:%.*]] = add i64 [[INDEX]], 30
; CHECK-INTERLEAVED-NEXT: [[TMP31:%.*]] = add i64 [[INDEX]], 31
-; CHECK-INTERLEAVED-NEXT: [[TMP32:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP0]]
+; CHECK-INTERLEAVED-NEXT: [[TMP32:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]]
; CHECK-INTERLEAVED-NEXT: [[TMP34:%.*]] = getelementptr i8, ptr [[TMP32]], i64 16
; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP32]], align 1
; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP34]], align 1
; CHECK-INTERLEAVED-NEXT: [[TMP35:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32>
; CHECK-INTERLEAVED-NEXT: [[TMP36:%.*]] = zext <16 x i8> [[WIDE_LOAD2]] to <16 x i32>
-; CHECK-INTERLEAVED-NEXT: [[TMP37:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP0]]
-; CHECK-INTERLEAVED-NEXT: [[TMP38:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]]
-; CHECK-INTERLEAVED-NEXT: [[TMP39:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP2]]
-; CHECK-INTERLEAVED-NEXT: [[TMP40:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP3]]
-; CHECK-INTERLEAVED-NEXT: [[TMP41:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP4]]
-; CHECK-INTERLEAVED-NEXT: [[TMP42:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP5]]
-; CHECK-INTERLEAVED-NEXT: [[TMP43:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP6]]
-; CHECK-INTERLEAVED-NEXT: [[TMP44:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP7]]
-; CHECK-INTERLEAVED-NEXT: [[TMP45:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP8]]
-; CHECK-INTERLEAVED-NEXT: [[TMP46:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP9]]
-; CHECK-INTERLEAVED-NEXT: [[TMP47:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP10]]
-; CHECK-INTERLEAVED-NEXT: [[TMP48:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP11]]
-; CHECK-INTERLEAVED-NEXT: [[TMP49:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP12]]
-; CHECK-INTERLEAVED-NEXT: [[TMP50:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP13]]
-; CHECK-INTERLEAVED-NEXT: [[TMP51:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP14]]
-; CHECK-INTERLEAVED-NEXT: [[TMP52:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP15]]
+; CHECK-INTERLEAVED-NEXT: [[TMP37:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]]
+; CHECK-INTERLEAVED-NEXT: [[TMP51:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP0]]
+; CHECK-INTERLEAVED-NEXT: [[TMP52:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]]
+; CHECK-INTERLEAVED-NEXT: [[TMP38:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP2]]
+; CHECK-INTERLEAVED-NEXT: [[TMP39:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP3]]
+; CHECK-INTERLEAVED-NEXT: [[TMP40:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP4]]
+; CHECK-INTERLEAVED-NEXT: [[TMP41:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP5]]
+; CHECK-INTERLEAVED-NEXT: [[TMP42:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP6]]
+; CHECK-INTERLEAVED-NEXT: [[TMP43:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP7]]
+; CHECK-INTERLEAVED-NEXT: [[TMP44:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP8]]
+; CHECK-INTERLEAVED-NEXT: [[TMP45:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP9]]
+; CHECK-INTERLEAVED-NEXT: [[TMP46:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP10]]
+; CHECK-INTERLEAVED-NEXT: [[TMP47:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP11]]
+; CHECK-INTERLEAVED-NEXT: [[TMP48:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP12]]
+; CHECK-INTERLEAVED-NEXT: [[TMP49:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP13]]
+; CHECK-INTERLEAVED-NEXT: [[TMP50:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP14]]
; CHECK-INTERLEAVED-NEXT: [[TMP53:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP16]]
; CHECK-INTERLEAVED-NEXT: [[TMP54:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP17]]
; CHECK-INTERLEAVED-NEXT: [[TMP55:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP18]]
@@ -287,21 +285,21 @@ define i32 @not_dotp_different_types(ptr %a, ptr %b) {
; CHECK-INTERLEAVED-NEXT: [[TMP67:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP30]]
; CHECK-INTERLEAVED-NEXT: [[TMP68:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP31]]
; CHECK-INTERLEAVED-NEXT: [[TMP69:%.*]] = load i16, ptr [[TMP37]], align 2
-; CHECK-INTERLEAVED-NEXT: [[TMP70:%.*]] = load i16, ptr [[TMP38]], align 2
-; CHECK-INTERLEAVED-NEXT: [[TMP71:%.*]] = load i16, ptr [[TMP39]], align 2
-; CHECK-INTERLEAVED-NEXT: [[TMP72:%.*]] = load i16, ptr [[TMP40]], align 2
-; CHECK-INTERLEAVED-NEXT: [[TMP73:%.*]] = load i16, ptr [[TMP41]], align 2
-; CHECK-INTERLEAVED-NEXT: [[TMP74:%.*]] = load i16, ptr [[TMP42]], align 2
-; CHECK-INTERLEAVED-NEXT: [[TMP75:%.*]] = load i16, ptr [[TMP43]], align 2
-; CHECK-INTERLEAVED-NEXT: [[TMP76:%.*]] = load i16, ptr [[TMP44]], align 2
-; CHECK-INTERLEAVED-NEXT: [[TMP77:%.*]] = load i16, ptr [[TMP45]], align 2
-; CHECK-INTERLEAVED-NEXT: [[TMP78:%.*]] = load i16, ptr [[TMP46]], align 2
-; CHECK-INTERLEAVED-NEXT: [[TMP79:%.*]] = load i16, ptr [[TMP47]], align 2
-; CHECK-INTERLEAVED-NEXT: [[TMP80:%.*]] = load i16, ptr [[TMP48]], align 2
-; CHECK-INTERLEAVED-NEXT: [[TMP81:%.*]] = load i16, ptr [[TMP49]], align 2
-; CHECK-INTERLEAVED-NEXT: [[TMP82:%.*]] = load i16, ptr [[TMP50]], align 2
-; CHECK-INTERLEAVED-NEXT: [[TMP83:%.*]] = load i16, ptr [[TMP51]], align 2
-; CHECK-INTERLEAVED-NEXT: [[TMP84:%.*]] = load i16, ptr [[TMP52]], align 2
+; CHECK-INTERLEAVED-NEXT: [[TMP70:%.*]] = load i16, ptr [[TMP51]], align 2
+; CHECK-INTERLEAVED-NEXT: [[TMP71:%.*]] = load i16, ptr [[TMP52]], align 2
+; CHECK-INTERLEAVED-NEXT: [[TMP72:%.*]] = load i16, ptr [[TMP38]], align 2
+; CHECK-INTERLEAVED-NEXT: [[TMP73:%.*]] = load i16, ptr [[TMP39]], align 2
+; CHECK-INTERLEAVED-NEXT: [[TMP74:%.*]] = load i16, ptr [[TMP40]], align 2
+; CHECK-INTERLEAVED-NEXT: [[TMP75:%.*]] = load i16, ptr [[TMP41]], align 2
+; CHECK-INTERLEAVED-NEXT: [[TMP76:%.*]] = load i16, ptr [[TMP42]], align 2
+; CHECK-INTERLEAVED-NEXT: [[TMP77:%.*]] = load i16, ptr [[TMP43]], align 2
+; CHECK-INTERLEAVED-NEXT: [[TMP78:%.*]] = load i16, ptr [[TMP44]], align 2
+; CHECK-INTERLEAVED-NEXT: [[TMP79:%.*]] = load i16, ptr [[TMP45]], align 2
+; CHECK-INTERLEAVED-NEXT: [[TMP80:%.*]] = load i16, ptr [[TMP46]], align 2
+; CHECK-INTERLEAVED-NEXT: [[TMP81:%.*]] = load i16, ptr [[TMP47]], align 2
+; CHECK-INTERLEAVED-NEXT: [[TMP82:%.*]] = load i16, ptr [[TMP48]], align 2
+; CHECK-INTERLEAVED-NEXT: [[TMP83:%.*]] = load i16, ptr [[TMP49]], align 2
+; CHECK-INTERLEAVED-NEXT: [[TMP84:%.*]] = load i16, ptr [[TMP50]], align 2
; CHECK-INTERLEAVED-NEXT: [[TMP85:%.*]] = insertelement <16 x i16> poison, i16 [[TMP69]], i32 0
; CHECK-INTERLEAVED-NEXT: [[TMP86:%.*]] = insertelement <16 x i16> [[TMP85]], i16 [[TMP70]], i32 1
; CHECK-INTERLEAVED-NEXT: [[TMP87:%.*]] = insertelement <16 x i16> [[TMP86]], i16 [[TMP71]], i32 2
@@ -375,57 +373,56 @@ define i32 @not_dotp_different_types(ptr %a, ptr %b) {
; CHECK-MAXBW: vector.body:
; CHECK-MAXBW-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-MAXBW-NEXT: [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP69:%.*]], [[VECTOR_BODY]] ]
-; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
-; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
-; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
-; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
-; CHECK-MAXBW-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 4
-; CHECK-MAXBW-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 5
-; CHECK-MAXBW-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 6
-; CHECK-MAXBW-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 7
-; CHECK-MAXBW-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 8
-; CHECK-MAXBW-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 9
-; CHECK-MAXBW-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 10
-; CHECK-MAXBW-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 11
-; CHECK-MAXBW-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 12
-; CHECK-MAXBW-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 13
-; CHECK-MAXBW-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 14
-; CHECK-MAXBW-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 15
-; CHECK-MAXBW-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP0]]
+; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 1
+; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2
+; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 3
+; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 4
+; CHECK-MAXBW-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 5
+; CHECK-MAXBW-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 6
+; CHECK-MAXBW-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 7
+; CHECK-MAXBW-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 8
+; CHECK-MAXBW-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 9
+; CHECK-MAXBW-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 10
+; CHECK-MAXBW-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 11
+; CHECK-MAXBW-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 12
+; CHECK-MAXBW-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 13
+; CHECK-MAXBW-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 14
+; CHECK-MAXBW-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 15
+; CHECK-MAXBW-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]]
; CHECK-MAXBW-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP16]], align 1
; CHECK-MAXBW-NEXT: [[TMP18:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32>
-; CHECK-MAXBW-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP0]]
-; CHECK-MAXBW-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]]
-; CHECK-MAXBW-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP2]]
-; CHECK-MAXBW-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP3]]
-; CHECK-MAXBW-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP4]]
-; CHECK-MAXBW-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP5]]
-; CHECK-MAXBW-NEXT: [[TMP25:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP6]]
-; CHECK-MAXBW-NEXT: [[TMP26:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP7]]
-; CHECK-MAXBW-NEXT: [[TMP27:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP8]]
-; CHECK-MAXBW-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP9]]
-; CHECK-MAXBW-NEXT: [[TMP29:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP10]]
-; CHECK-MAXBW-NEXT: [[TMP30:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP11]]
-; CHECK-MAXBW-NEXT: [[TMP31:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP12]]
-; CHECK-MAXBW-NEXT: [[TMP32:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP13]]
-; CHECK-MAXBW-NEXT: [[TMP33:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP14]]
-; CHECK-MAXBW-NEXT: [[TMP34:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP15]]
+; CHECK-MAXBW-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]]
+; CHECK-MAXBW-NEXT: [[TMP33:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP0]]
+; CHECK-MAXBW-NEXT: [[TMP34:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]]
+; CHECK-MAXBW-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP2]]
+; CHECK-MAXBW-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP3]]
+; CHECK-MAXBW-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP4]]
+; CHECK-MAXBW-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP5]]
+; CHECK-MAXBW-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP6]]
+; CHECK-MAXBW-NEXT: [[TMP25:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP7]]
+; CHECK-MAXBW-NEXT: [[TMP26:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP8]]
+; CHECK-MAXBW-NEXT: [[TMP27:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP9]]
+; CHECK-MAXBW-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP10]]
+; CHECK-MAXBW-NEXT: [[TMP29:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP11]]
+; CHECK-MAXBW-NEXT: [[TMP30:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP12]]
+; CHECK-MAXBW-NEXT: [[TMP31:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP13]]
+; CHECK-MAXBW-NEXT: [[TMP32:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP14]]
; CHECK-MAXBW-NEXT: [[TMP35:%.*]] = load i16, ptr [[TMP19]], align 2
-; CHECK-MAXBW-NEXT: [[TMP36:%.*]] = load i16, ptr [[TMP20]], align 2
-; CHECK-MAXBW-NEXT: [[TMP37:%.*]] = load i16, ptr [[TMP21]], align 2
-; CHECK-MAXBW-NEXT: [[TMP38:%.*]] = load i16, ptr [[TMP22]], align 2
-; CHECK-MAXBW-NEXT: [[TMP39:%.*]] = load i16, ptr [[TMP23]], align 2
-; CHECK-MAXBW-NEXT: [[TMP40:%.*]] = load i16, ptr [[TMP24]], align 2
-; CHECK-MAXBW-NEXT: [[TMP41:%.*]] = load i16, ptr [[TMP25]], align 2
-; CHECK-MAXBW-NEXT: [[TMP42:%.*]] = load i16, ptr [[TMP26]], align 2
-; CHECK-MAXBW-NEXT: [[TMP43:%.*]] = load i16, ptr [[TMP27]], align 2
-; CHECK-MAXBW-NEXT: [[TMP44:%.*]] = load i16, ptr [[TMP28]], align 2
-; CHECK-MAXBW-NEXT: [[TMP45:%.*]] = load i16, ptr [[TMP29]], align 2
-; CHECK-MAXBW-NEXT: [[TMP46:%.*]] = load i16, ptr [[TMP30]], align 2
-; CHECK-MAXBW-NEXT: [[TMP47:%.*]] = load i16, ptr [[TMP31]], align 2
-; CHECK-MAXBW-NEXT: [[TMP48:%.*]] = load i16, ptr [[TMP32]], align 2
-; CHECK-MAXBW-NEXT: [[TMP49:%.*]] = load i16, ptr [[TMP33]], align 2
-; CHECK-MAXBW-NEXT: [[TMP50:%.*]] = load i16, ptr [[TMP34]], align 2
+; CHECK-MAXBW-NEXT: [[TMP36:%.*]] = load i16, ptr [[TMP33]], align 2
+; CHECK-MAXBW-NEXT: [[TMP37:%.*]] = load i16, ptr [[TMP34]], align 2
+; CHECK-MAXBW-NEXT: [[TMP38:%.*]] = load i16, ptr [[TMP20]], align 2
+; CHECK-MAXBW-NEXT: [[TMP39:%.*]] = load i16, ptr [[TMP21]], align 2
+; CHECK-MAXBW-NEXT: [[TMP40:%.*]] = load i16, ptr [[TMP22]], align 2
+; CHECK-MAXBW-NEXT: [[TMP41:%.*]] = load i16, ptr [[TMP23]], align 2
+; CHECK-MAXBW-NEXT: [[TMP42:%.*]] = load i16, ptr [[TMP24]], align 2
+; CHECK-MAXBW-NEXT: [[TMP43:%.*]] = load i16, ptr [[TMP25]], align 2
+; CHECK-MAXBW-NEXT: [[TMP44:%.*]] = load i16, ptr [[TMP26]], align 2
+; CHECK-MAXBW-NEXT: [[TMP45:%.*]] = load i16, ptr [[TMP27]], align 2
+; CHECK-MAXBW-NEXT: [[TMP46:%.*]] = load i16, ptr [[TMP28]], align 2
+; CHECK-MAXBW-NEXT: [[TMP47:%.*]] = load i16, ptr [[TMP29]], align 2
+; CHECK-MAXBW-NEXT: [[TMP48:%.*]] = load i16, ptr [[TMP30]], align 2
+; CHECK-MAXBW-NEXT: [[TMP49:%.*]] = load i16, ptr [[TMP31]], align 2
+; CHECK-MAXBW-NEXT: [[TMP50:%.*]] = load i16, ptr [[TMP32]], align 2
; CHECK-MAXBW-NEXT: [[TMP51:%.*]] = insertelement <16 x i16> poison, i16 [[TMP35]], i32 0
; CHECK-MAXBW-NEXT: [[TMP52:%.*]] = insertelement <16 x i16> [[TMP51]], i16 [[TMP36]], i32 1
; CHECK-MAXBW-NEXT: [[TMP53:%.*]] = insertelement <16 x i16> [[TMP52]], i16 [[TMP37]], i32 2
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll
index 2bea0733f65b0..e8cd68bca8eec 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll
@@ -415,57 +415,56 @@ define i32 @not_dotp_different_types(ptr %a, ptr %b) #0 {
; CHECK-INTERLEAVE1: vector.body:
; CHECK-INTERLEAVE1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-INTERLEAVE1-NEXT: [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP69:%.*]], [[VECTOR_BODY]] ]
-; CHECK-INTERLEAVE1-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
-; CHECK-INTERLEAVE1-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
-; CHECK-INTERLEAVE1-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
-; CHECK-INTERLEAVE1-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
-; CHECK-INTERLEAVE1-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 4
-; CHECK-INTERLEAVE1-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 5
-; CHECK-INTERLEAVE1-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 6
-; CHECK-INTERLEAVE1-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 7
-; CHECK-INTERLEAVE1-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 8
-; CHECK-INTERLEAVE1-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 9
-; CHECK-INTERLEAVE1-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 10
-; CHECK-INTERLEAVE1-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 11
-; CHECK-INTERLEAVE1-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 12
-; CHECK-INTERLEAVE1-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 13
-; CHECK-INTERLEAVE1-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 14
-; CHECK-INTERLEAVE1-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 15
-; CHECK-INTERLEAVE1-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP0]]
+; CHECK-INTERLEAVE1-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 1
+; CHECK-INTERLEAVE1-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2
+; CHECK-INTERLEAVE1-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 3
+; CHECK-INTERLEAVE1-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 4
+; CHECK-INTERLEAVE1-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 5
+; CHECK-INTERLEAVE1-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 6
+; CHECK-INTERLEAVE1-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 7
+; CHECK-INTERLEAVE1-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 8
+; CHECK-INTERLEAVE1-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 9
+; CHECK-INTERLEAVE1-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 10
+; CHECK-INTERLEAVE1-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 11
+; CHECK-INTERLEAVE1-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 12
+; CHECK-INTERLEAVE1-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 13
+; CHECK-INTERLEAVE1-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 14
+; CHECK-INTERLEAVE1-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 15
+; CHECK-INTERLEAVE1-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]]
; CHECK-INTERLEAVE1-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP16]], align 1
; CHECK-INTERLEAVE1-NEXT: [[TMP18:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32>
-; CHECK-INTERLEAVE1-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP0]]
-; CHECK-INTERLEAVE1-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]]
-; CHECK-INTERLEAVE1-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP2]]
-; CHECK-INTERLEAVE1-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP3]]
-; CHECK-INTERLEAVE1-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP4]]
-; CHECK-INTERLEAVE1-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP5]]
-; CHECK-INTERLEAVE1-NEXT: [[TMP25:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP6]]
-; CHECK-INTERLEAVE1-NEXT: [[TMP26:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP7]]
-; CHECK-INTERLEAVE1-NEXT: [[TMP27:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP8]]
-; CHECK-INTERLEAVE1-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP9]]
-; CHECK-INTERLEAVE1-NEXT: [[TMP29:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP10]]
-; CHECK-INTERLEAVE1-NEXT: [[TMP30:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP11]]
-; CHECK-INTERLEAVE1-NEXT: [[TMP31:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP12]]
-; CHECK-INTERLEAVE1-NEXT: [[TMP32:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP13]]
-; CHECK-INTERLEAVE1-NEXT: [[TMP33:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP14]]
-; CHECK-INTERLEAVE1-NEXT: [[TMP34:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP15]]
+; CHECK-INTERLEAVE1-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]]
+; CHECK-INTERLEAVE1-NEXT: [[TMP33:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP0]]
+; CHECK-INTERLEAVE1-NEXT: [[TMP34:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]]
+; CHECK-INTERLEAVE1-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP2]]
+; CHECK-INTERLEAVE1-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP3]]
+; CHECK-INTERLEAVE1-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP4]]
+; CHECK-INTERLEAVE1-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP5]]
+; CHECK-INTERLEAVE1-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP6]]
+; CHECK-INTERLEAVE1-NEXT: [[TMP25:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP7]]
+; CHECK-INTERLEAVE1-NEXT: [[TMP26:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP8]]
+; CHECK-INTERLEAVE1-NEXT: [[TMP27:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP9]]
+; CHECK-INTERLEAVE1-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP10]]
+; CHECK-INTERLEAVE1-NEXT: [[TMP29:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP11]]
+; CHECK-INTERLEAVE1-NEXT: [[TMP30:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP12]]
+; CHECK-INTERLEAVE1-NEXT: [[TMP31:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP13]]
+; CHECK-INTERLEAVE1-NEXT: [[TMP32:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP14]]
; CHECK-INTERLEAVE1-NEXT: [[TMP35:%.*]] = load i16, ptr [[TMP19]], align 2
-; CHECK-INTERLEAVE1-NEXT: [[TMP36:%.*]] = load i16, ptr [[TMP20]], align 2
-; CHECK-INTERLEAVE1-NEXT: [[TMP37:%.*]] = load i16, ptr [[TMP21]], align 2
-; CHECK-INTERLEAVE1-NEXT: [[TMP38:%.*]] = load i16, ptr [[TMP22]], align 2
-; CHECK-INTERLEAVE1-NEXT: [[TMP41:%.*]] = load i16, ptr [[TMP23]], align 2
-; CHECK-INTERLEAVE1-NEXT: [[TMP42:%.*]] = load i16, ptr [[TMP24]], align 2
-; CHECK-INTERLEAVE1-NEXT: [[TMP43:%.*]] = load i16, ptr [[TMP25]], align 2
-; CHECK-INTERLEAVE1-NEXT: [[TMP44:%.*]] = load i16, ptr [[TMP26]], align 2
-; CHECK-INTERLEAVE1-NEXT: [[TMP45:%.*]] = load i16, ptr [[TMP27]], align 2
-; CHECK-INTERLEAVE1-NEXT: [[TMP46:%.*]] = load i16, ptr [[TMP28]], align 2
-; CHECK-INTERLEAVE1-NEXT: [[TMP55:%.*]] = load i16, ptr [[TMP29]], align 2
-; CHECK-INTERLEAVE1-NEXT: [[TMP56:%.*]] = load i16, ptr [[TMP30]], align 2
-; CHECK-INTERLEAVE1-NEXT: [[TMP47:%.*]] = load i16, ptr [[TMP31]], align 2
-; CHECK-INTERLEAVE1-NEXT: [[TMP48:%.*]] = load i16, ptr [[TMP32]], align 2
-; CHECK-INTERLEAVE1-NEXT: [[TMP49:%.*]] = load i16, ptr [[TMP33]], align 2
-; CHECK-INTERLEAVE1-NEXT: [[TMP50:%.*]] = load i16, ptr [[TMP34]], align 2
+; CHECK-INTERLEAVE1-NEXT: [[TMP36:%.*]] = load i16, ptr [[TMP33]], align 2
+; CHECK-INTERLEAVE1-NEXT: [[TMP37:%.*]] = load i16, ptr [[TMP34]], align 2
+; CHECK-INTERLEAVE1-NEXT: [[TMP38:%.*]] = load i16, ptr [[TMP20]], align 2
+; CHECK-INTERLEAVE1-NEXT: [[TMP41:%.*]] = load i16, ptr [[TMP21]], align 2
+; CHECK-INTERLEAVE1-NEXT: [[TMP42:%.*]] = load i16, ptr [[TMP22]], align 2
+; CHECK-INTERLEAVE1-NEXT: [[TMP43:%.*]] = load i16, ptr [[TMP23]], align 2
+; CHECK-INTERLEAVE1-NEXT: [[TMP44:%.*]] = load i16, ptr [[TMP24]], align 2
+; CHECK-INTERLEAVE1-NEXT: [[TMP45:%.*]] = load i16, ptr [[TMP25]], align 2
+; CHECK-INTERLEAVE1-NEXT: [[TMP46:%.*]] = load i16, ptr [[TMP26]], align 2
+; CHECK-INTERLEAVE1-NEXT: [[TMP55:%.*]] = load i16, ptr [[TMP27]], align 2
+; CHECK-INTERLEAVE1-NEXT: [[TMP56:%.*]] = load i16, ptr [[TMP28]], align 2
+; CHECK-INTERLEAVE1-NEXT: [[TMP47:%.*]] = load i16, ptr [[TMP29]], align 2
+; CHECK-INTERLEAVE1-NEXT: [[TMP48:%.*]] = load i16, ptr [[TMP30]], align 2
+; CHECK-INTERLEAVE1-NEXT: [[TMP49:%.*]] = load i16, ptr [[TMP31]], align 2
+; CHECK-INTERLEAVE1-NEXT: [[TMP50:%.*]] = load i16, ptr [[TMP32]], align 2
; CHECK-INTERLEAVE1-NEXT: [[TMP51:%.*]] = insertelement <16 x i16> poison, i16 [[TMP35]], i32 0
; CHECK-INTERLEAVE1-NEXT: [[TMP52:%.*]] = insertelement <16 x i16> [[TMP51]], i16 [[TMP36]], i32 1
; CHECK-INTERLEAVE1-NEXT: [[TMP53:%.*]] = insertelement <16 x i16> [[TMP52]], i16 [[TMP37]], i32 2
@@ -504,22 +503,21 @@ define i32 @not_dotp_different_types(ptr %a, ptr %b) #0 {
; CHECK-INTERLEAVED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP137:%.*]], [[VECTOR_BODY]] ]
; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP138:%.*]], [[VECTOR_BODY]] ]
-; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
-; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
-; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
-; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
-; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 4
-; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 5
-; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 6
-; CHECK-INTERLEAVED-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 7
-; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 8
-; CHECK-INTERLEAVED-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 9
-; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 10
-; CHECK-INTERLEAVED-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 11
-; CHECK-INTERLEAVED-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 12
-; CHECK-INTERLEAVED-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 13
-; CHECK-INTERLEAVED-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 14
-; CHECK-INTERLEAVED-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 15
+; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 1
+; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2
+; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 3
+; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 4
+; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 5
+; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 6
+; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 7
+; CHECK-INTERLEAVED-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 8
+; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 9
+; CHECK-INTERLEAVED-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 10
+; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 11
+; CHECK-INTERLEAVED-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 12
+; CHECK-INTERLEAVED-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 13
+; CHECK-INTERLEAVED-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 14
+; CHECK-INTERLEAVED-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 15
; CHECK-INTERLEAVED-NEXT: [[TMP16:%.*]] = add i64 [[INDEX]], 16
; CHECK-INTERLEAVED-NEXT: [[TMP17:%.*]] = add i64 [[INDEX]], 17
; CHECK-INTERLEAVED-NEXT: [[TMP18:%.*]] = add i64 [[INDEX]], 18
@@ -536,28 +534,28 @@ define i32 @not_dotp_different_types(ptr %a, ptr %b) #0 {
; CHECK-INTERLEAVED-NEXT: [[TMP29:%.*]] = add i64 [[INDEX]], 29
; CHECK-INTERLEAVED-NEXT: [[TMP30:%.*]] = add i64 [[INDEX]], 30
; CHECK-INTERLEAVED-NEXT: [[TMP31:%.*]] = add i64 [[INDEX]], 31
-; CHECK-INTERLEAVED-NEXT: [[TMP32:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP0]]
+; CHECK-INTERLEAVED-NEXT: [[TMP32:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]]
; CHECK-INTERLEAVED-NEXT: [[TMP34:%.*]] = getelementptr i8, ptr [[TMP32]], i64 16
; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP32]], align 1
; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP34]], align 1
; CHECK-INTERLEAVED-NEXT: [[TMP35:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32>
; CHECK-INTERLEAVED-NEXT: [[TMP36:%.*]] = zext <16 x i8> [[WIDE_LOAD2]] to <16 x i32>
-; CHECK-INTERLEAVED-NEXT: [[TMP39:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP0]]
-; CHECK-INTERLEAVED-NEXT: [[TMP40:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]]
-; CHECK-INTERLEAVED-NEXT: [[TMP41:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP2]]
-; CHECK-INTERLEAVED-NEXT: [[TMP42:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP3]]
-; CHECK-INTERLEAVED-NEXT: [[TMP43:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP4]]
-; CHECK-INTERLEAVED-NEXT: [[TMP44:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP5]]
-; CHECK-INTERLEAVED-NEXT: [[TMP45:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP6]]
-; CHECK-INTERLEAVED-NEXT: [[TMP46:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP7]]
-; CHECK-INTERLEAVED-NEXT: [[TMP47:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP8]]
-; CHECK-INTERLEAVED-NEXT: [[TMP48:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP9]]
-; CHECK-INTERLEAVED-NEXT: [[TMP49:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP10]]
-; CHECK-INTERLEAVED-NEXT: [[TMP50:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP11]]
-; CHECK-INTERLEAVED-NEXT: [[TMP51:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP12]]
-; CHECK-INTERLEAVED-NEXT: [[TMP52:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP13]]
-; CHECK-INTERLEAVED-NEXT: [[TMP53:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP14]]
-; CHECK-INTERLEAVED-NEXT: [[TMP54:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP15]]
+; CHECK-INTERLEAVED-NEXT: [[TMP39:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]]
+; CHECK-INTERLEAVED-NEXT: [[TMP51:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP0]]
+; CHECK-INTERLEAVED-NEXT: [[TMP37:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]]
+; CHECK-INTERLEAVED-NEXT: [[TMP38:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP2]]
+; CHECK-INTERLEAVED-NEXT: [[TMP52:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP3]]
+; CHECK-INTERLEAVED-NEXT: [[TMP40:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP4]]
+; CHECK-INTERLEAVED-NEXT: [[TMP41:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP5]]
+; CHECK-INTERLEAVED-NEXT: [[TMP42:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP6]]
+; CHECK-INTERLEAVED-NEXT: [[TMP43:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP7]]
+; CHECK-INTERLEAVED-NEXT: [[TMP44:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP8]]
+; CHECK-INTERLEAVED-NEXT: [[TMP45:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP9]]
+; CHECK-INTERLEAVED-NEXT: [[TMP46:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP10]]
+; CHECK-INTERLEAVED-NEXT: [[TMP47:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP11]]
+; CHECK-INTERLEAVED-NEXT: [[TMP48:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP12]]
+; CHECK-INTERLEAVED-NEXT: [[TMP49:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP13]]
+; CHECK-INTERLEAVED-NEXT: [[TMP50:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP14]]
; CHECK-INTERLEAVED-NEXT: [[TMP55:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP16]]
; CHECK-INTERLEAVED-NEXT: [[TMP56:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP17]]
; CHECK-INTERLEAVED-NEXT: [[TMP57:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP18]]
@@ -575,21 +573,21 @@ define i32 @not_dotp_different_types(ptr %a, ptr %b) #0 {
; CHECK-INTERLEAVED-NEXT: [[TMP139:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP30]]
; CHECK-INTERLEAVED-NEXT: [[TMP140:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP31]]
; CHECK-INTERLEAVED-NEXT: [[TMP69:%.*]] = load i16, ptr [[TMP39]], align 2
-; CHECK-INTERLEAVED-NEXT: [[TMP70:%.*]] = load i16, ptr [[TMP40]], align 2
-; CHECK-INTERLEAVED-NEXT: [[TMP71:%.*]] = load i16, ptr [[TMP41]], align 2
-; CHECK-INTERLEAVED-NEXT: [[TMP72:%.*]] = load i16, ptr [[TMP42]], align 2
-; CHECK-INTERLEAVED-NEXT: [[TMP73:%.*]] = load i16, ptr [[TMP43]], align 2
-; CHECK-INTERLEAVED-NEXT: [[TMP74:%.*]] = load i16, ptr [[TMP44]], align 2
-; CHECK-INTERLEAVED-NEXT: [[TMP75:%.*]] = load i16, ptr [[TMP45]], align 2
-; CHECK-INTERLEAVED-NEXT: [[TMP76:%.*]] = load i16, ptr [[TMP46]], align 2
-; CHECK-INTERLEAVED-NEXT: [[TMP77:%.*]] = load i16, ptr [[TMP47]], align 2
-; CHECK-INTERLEAVED-NEXT: [[TMP78:%.*]] = load i16, ptr [[TMP48]], align 2
-; CHECK-INTERLEAVED-NEXT: [[TMP79:%.*]] = load i16, ptr [[TMP49]], align 2
-; CHECK-INTERLEAVED-NEXT: [[TMP80:%.*]] = load i16, ptr [[TMP50]], align 2
-; CHECK-INTERLEAVED-NEXT: [[TMP81:%.*]] = load i16, ptr [[TMP51]], align 2
-; CHECK-INTERLEAVED-NEXT: [[TMP82:%.*]] = load i16, ptr [[TMP52]], align 2
-; CHECK-INTERLEAVED-NEXT: [[TMP83:%.*]] = load i16, ptr [[TMP53]], align 2
-; CHECK-INTERLEAVED-NEXT: [[TMP84:%.*]] = load i16, ptr [[TMP54]], align 2
+; CHECK-INTERLEAVED-NEXT: [[TMP70:%.*]] = load i16, ptr [[TMP51]], align 2
+; CHECK-INTERLEAVED-NEXT: [[TMP71:%.*]] = load i16, ptr [[TMP37]], align 2
+; CHECK-INTERLEAVED-NEXT: [[TMP72:%.*]] = load i16, ptr [[TMP38]], align 2
+; CHECK-INTERLEAVED-NEXT: [[TMP73:%.*]] = load i16, ptr [[TMP52]], align 2
+; CHECK-INTERLEAVED-NEXT: [[TMP74:%.*]] = load i16, ptr [[TMP40]], align 2
+; CHECK-INTERLEAVED-NEXT: [[TMP75:%.*]] = load i16, ptr [[TMP41]], align 2
+; CHECK-INTERLEAVED-NEXT: [[TMP76:%.*]] = load i16, ptr [[TMP42]], align 2
+; CHECK-INTERLEAVED-NEXT: [[TMP77:%.*]] = load i16, ptr [[TMP43]], align 2
+; CHECK-INTERLEAVED-NEXT: [[TMP78:%.*]] = load i16, ptr [[TMP44]], align 2
+; CHECK-INTERLEAVED-NEXT: [[TMP79:%.*]] = load i16, ptr [[TMP45]], align 2
+; CHECK-INTERLEAVED-NEXT: [[TMP80:%.*]] = load i16, ptr [[TMP46]], align 2
+; CHECK-INTERLEAVED-NEXT: [[TMP81:%.*]] = load i16, ptr [[TMP47]], align 2
+; CHECK-INTERLEAVED-NEXT: [[TMP82:%.*]] = load i16, ptr [[TMP48]], align 2
+; CHECK-INTERLEAVED-NEXT: [[TMP83:%.*]] = load i16, ptr [[TMP49]], align 2
+; CHECK-INTERLEAVED-NEXT: [[TMP84:%.*]] = load i16, ptr [[TMP50]], align 2
; CHECK-INTERLEAVED-NEXT: [[TMP85:%.*]] = insertelement <16 x i16> poison, i16 [[TMP69]], i32 0
; CHECK-INTERLEAVED-NEXT: [[TMP86:%.*]] = insertelement <16 x i16> [[TMP85]], i16 [[TMP70]], i32 1
; CHECK-INTERLEAVED-NEXT: [[TMP87:%.*]] = insertelement <16 x i16> [[TMP86]], i16 [[TMP71]], i32 2
@@ -663,57 +661,56 @@ define i32 @not_dotp_different_types(ptr %a, ptr %b) #0 {
; CHECK-MAXBW: vector.body:
; CHECK-MAXBW-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-MAXBW-NEXT: [[VEC_PHI1:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP138:%.*]], [[VECTOR_BODY]] ]
-; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
-; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
-; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
-; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
-; CHECK-MAXBW-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 4
-; CHECK-MAXBW-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 5
-; CHECK-MAXBW-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 6
-; CHECK-MAXBW-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 7
-; CHECK-MAXBW-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 8
-; CHECK-MAXBW-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 9
-; CHECK-MAXBW-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 10
-; CHECK-MAXBW-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 11
-; CHECK-MAXBW-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 12
-; CHECK-MAXBW-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 13
-; CHECK-MAXBW-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 14
-; CHECK-MAXBW-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 15
-; CHECK-MAXBW-NEXT: [[TMP32:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP0]]
+; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 1
+; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2
+; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 3
+; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 4
+; CHECK-MAXBW-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 5
+; CHECK-MAXBW-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 6
+; CHECK-MAXBW-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 7
+; CHECK-MAXBW-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 8
+; CHECK-MAXBW-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 9
+; CHECK-MAXBW-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 10
+; CHECK-MAXBW-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 11
+; CHECK-MAXBW-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 12
+; CHECK-MAXBW-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 13
+; CHECK-MAXBW-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 14
+; CHECK-MAXBW-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 15
+; CHECK-MAXBW-NEXT: [[TMP32:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]]
; CHECK-MAXBW-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP32]], align 1
; CHECK-MAXBW-NEXT: [[TMP36:%.*]] = zext <16 x i8> [[WIDE_LOAD2]] to <16 x i32>
-; CHECK-MAXBW-NEXT: [[TMP37:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP0]]
-; CHECK-MAXBW-NEXT: [[TMP38:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]]
-; CHECK-MAXBW-NEXT: [[TMP39:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP2]]
-; CHECK-MAXBW-NEXT: [[TMP40:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP3]]
-; CHECK-MAXBW-NEXT: [[TMP41:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP4]]
-; CHECK-MAXBW-NEXT: [[TMP42:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP5]]
-; CHECK-MAXBW-NEXT: [[TMP43:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP6]]
-; CHECK-MAXBW-NEXT: [[TMP44:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP7]]
-; CHECK-MAXBW-NEXT: [[TMP45:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP8]]
-; CHECK-MAXBW-NEXT: [[TMP46:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP9]]
-; CHECK-MAXBW-NEXT: [[TMP47:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP10]]
-; CHECK-MAXBW-NEXT: [[TMP48:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP11]]
-; CHECK-MAXBW-NEXT: [[TMP49:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP12]]
-; CHECK-MAXBW-NEXT: [[TMP50:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP13]]
-; CHECK-MAXBW-NEXT: [[TMP51:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP14]]
-; CHECK-MAXBW-NEXT: [[TMP52:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP15]]
+; CHECK-MAXBW-NEXT: [[TMP37:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]]
+; CHECK-MAXBW-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP0]]
+; CHECK-MAXBW-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]]
+; CHECK-MAXBW-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP2]]
+; CHECK-MAXBW-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP3]]
+; CHECK-MAXBW-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP4]]
+; CHECK-MAXBW-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP5]]
+; CHECK-MAXBW-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP6]]
+; CHECK-MAXBW-NEXT: [[TMP25:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP7]]
+; CHECK-MAXBW-NEXT: [[TMP26:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP8]]
+; CHECK-MAXBW-NEXT: [[TMP27:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP9]]
+; CHECK-MAXBW-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP10]]
+; CHECK-MAXBW-NEXT: [[TMP29:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP11]]
+; CHECK-MAXBW-NEXT: [[TMP30:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP12]]
+; CHECK-MAXBW-NEXT: [[TMP31:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP13]]
+; CHECK-MAXBW-NEXT: [[TMP33:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP14]]
; CHECK-MAXBW-NEXT: [[TMP101:%.*]] = load i16, ptr [[TMP37]], align 2
-; CHECK-MAXBW-NEXT: [[TMP102:%.*]] = load i16, ptr [[TMP38]], align 2
-; CHECK-MAXBW-NEXT: [[TMP103:%.*]] = load i16, ptr [[TMP39]], align 2
-; CHECK-MAXBW-NEXT: [[TMP104:%.*]] = load i16, ptr [[TMP40]], align 2
-; CHECK-MAXBW-NEXT: [[TMP105:%.*]] = load i16, ptr [[TMP41]], align 2
-; CHECK-MAXBW-NEXT: [[TMP106:%.*]] = load i16, ptr [[TMP42]], align 2
-; CHECK-MAXBW-NEXT: [[TMP107:%.*]] = load i16, ptr [[TMP43]], align 2
-; CHECK-MAXBW-NEXT: [[TMP108:%.*]] = load i16, ptr [[TMP44]], align 2
-; CHECK-MAXBW-NEXT: [[TMP109:%.*]] = load i16, ptr [[TMP45]], align 2
-; CHECK-MAXBW-NEXT: [[TMP110:%.*]] = load i16, ptr [[TMP46]], align 2
-; CHECK-MAXBW-NEXT: [[TMP111:%.*]] = load i16, ptr [[TMP47]], align 2
-; CHECK-MAXBW-NEXT: [[TMP112:%.*]] = load i16, ptr [[TMP48]], align 2
-; CHECK-MAXBW-NEXT: [[TMP113:%.*]] = load i16, ptr [[TMP49]], align 2
-; CHECK-MAXBW-NEXT: [[TMP114:%.*]] = load i16, ptr [[TMP50]], align 2
-; CHECK-MAXBW-NEXT: [[TMP115:%.*]] = load i16, ptr [[TMP51]], align 2
-; CHECK-MAXBW-NEXT: [[TMP116:%.*]] = load i16, ptr [[TMP52]], align 2
+; CHECK-MAXBW-NEXT: [[TMP102:%.*]] = load i16, ptr [[TMP18]], align 2
+; CHECK-MAXBW-NEXT: [[TMP103:%.*]] = load i16, ptr [[TMP19]], align 2
+; CHECK-MAXBW-NEXT: [[TMP104:%.*]] = load i16, ptr [[TMP20]], align 2
+; CHECK-MAXBW-NEXT: [[TMP105:%.*]] = load i16, ptr [[TMP21]], align 2
+; CHECK-MAXBW-NEXT: [[TMP106:%.*]] = load i16, ptr [[TMP22]], align 2
+; CHECK-MAXBW-NEXT: [[TMP107:%.*]] = load i16, ptr [[TMP23]], align 2
+; CHECK-MAXBW-NEXT: [[TMP108:%.*]] = load i16, ptr [[TMP24]], align 2
+; CHECK-MAXBW-NEXT: [[TMP109:%.*]] = load i16, ptr [[TMP25]], align 2
+; CHECK-MAXBW-NEXT: [[TMP110:%.*]] = load i16, ptr [[TMP26]], align 2
+; CHECK-MAXBW-NEXT: [[TMP111:%.*]] = load i16, ptr [[TMP27]], align 2
+; CHECK-MAXBW-NEXT: [[TMP112:%.*]] = load i16, ptr [[TMP28]], align 2
+; CHECK-MAXBW-NEXT: [[TMP113:%.*]] = load i16, ptr [[TMP29]], align 2
+; CHECK-MAXBW-NEXT: [[TMP114:%.*]] = load i16, ptr [[TMP30]], align 2
+; CHECK-MAXBW-NEXT: [[TMP115:%.*]] = load i16, ptr [[TMP31]], align 2
+; CHECK-MAXBW-NEXT: [[TMP116:%.*]] = load i16, ptr [[TMP33]], align 2
; CHECK-MAXBW-NEXT: [[TMP117:%.*]] = insertelement <16 x i16> poison, i16 [[TMP101]], i32 0
; CHECK-MAXBW-NEXT: [[TMP118:%.*]] = insertelement <16 x i16> [[TMP117]], i16 [[TMP102]], i32 1
; CHECK-MAXBW-NEXT: [[TMP119:%.*]] = insertelement <16 x i16> [[TMP118]], i16 [[TMP103]], i32 2
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll b/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll
index 131b3d1b02727..9cff5e5b77e92 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll
@@ -360,7 +360,6 @@ define void @test_loop2(i64 %n, ptr %dst) {
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
@@ -376,7 +375,7 @@ define void @test_loop2(i64 %n, ptr %dst) {
; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 13
; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 14
; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 15
-; CHECK-NEXT: [[TMP16:%.*]] = sub nsw i64 [[N:%.*]], [[TMP0]]
+; CHECK-NEXT: [[TMP16:%.*]] = sub nsw i64 [[N:%.*]], [[INDEX]]
; CHECK-NEXT: [[TMP17:%.*]] = sub nsw i64 [[N]], [[TMP1]]
; CHECK-NEXT: [[TMP18:%.*]] = sub nsw i64 [[N]], [[TMP2]]
; CHECK-NEXT: [[TMP19:%.*]] = sub nsw i64 [[N]], [[TMP3]]
@@ -409,7 +408,7 @@ define void @test_loop2(i64 %n, ptr %dst) {
; CHECK-NEXT: [[TMP46:%.*]] = insertelement <16 x i64> [[TMP45]], i64 [[TMP30]], i32 14
; CHECK-NEXT: [[TMP47:%.*]] = insertelement <16 x i64> [[TMP46]], i64 [[TMP31]], i32 15
; CHECK-NEXT: [[TMP48:%.*]] = trunc <16 x i64> [[TMP47]] to <16 x i8>
-; CHECK-NEXT: [[TMP49:%.*]] = add i64 [[TMP0]], [[TMP16]]
+; CHECK-NEXT: [[TMP49:%.*]] = add i64 [[INDEX]], [[TMP16]]
; CHECK-NEXT: [[TMP50:%.*]] = getelementptr i8, ptr [[DST:%.*]], i64 [[TMP49]]
; CHECK-NEXT: [[TMP51:%.*]] = extractelement <16 x i8> [[TMP48]], i32 15
; CHECK-NEXT: store i8 [[TMP51]], ptr [[TMP50]], align 1
@@ -425,7 +424,6 @@ define void @test_loop2(i64 %n, ptr %dst) {
; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
; CHECK: vec.epilog.vector.body:
; CHECK-NEXT: [[INDEX2:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT3:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP53:%.*]] = add i64 [[INDEX2]], 0
; CHECK-NEXT: [[TMP54:%.*]] = add i64 [[INDEX2]], 1
; CHECK-NEXT: [[TMP55:%.*]] = add i64 [[INDEX2]], 2
; CHECK-NEXT: [[TMP56:%.*]] = add i64 [[INDEX2]], 3
@@ -433,7 +431,7 @@ define void @test_loop2(i64 %n, ptr %dst) {
; CHECK-NEXT: [[TMP58:%.*]] = add i64 [[INDEX2]], 5
; CHECK-NEXT: [[TMP59:%.*]] = add i64 [[INDEX2]], 6
; CHECK-NEXT: [[TMP60:%.*]] = add i64 [[INDEX2]], 7
-; CHECK-NEXT: [[TMP61:%.*]] = sub nsw i64 [[N]], [[TMP53]]
+; CHECK-NEXT: [[TMP61:%.*]] = sub nsw i64 [[N]], [[INDEX2]]
; CHECK-NEXT: [[TMP62:%.*]] = sub nsw i64 [[N]], [[TMP54]]
; CHECK-NEXT: [[TMP63:%.*]] = sub nsw i64 [[N]], [[TMP55]]
; CHECK-NEXT: [[TMP64:%.*]] = sub nsw i64 [[N]], [[TMP56]]
@@ -450,7 +448,7 @@ define void @test_loop2(i64 %n, ptr %dst) {
; CHECK-NEXT: [[TMP75:%.*]] = insertelement <8 x i64> [[TMP74]], i64 [[TMP67]], i32 6
; CHECK-NEXT: [[TMP76:%.*]] = insertelement <8 x i64> [[TMP75]], i64 [[TMP68]], i32 7
; CHECK-NEXT: [[TMP77:%.*]] = trunc <8 x i64> [[TMP76]] to <8 x i8>
-; CHECK-NEXT: [[TMP78:%.*]] = add i64 [[TMP53]], [[TMP61]]
+; CHECK-NEXT: [[TMP78:%.*]] = add i64 [[INDEX2]], [[TMP61]]
; CHECK-NEXT: [[TMP79:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP78]]
; CHECK-NEXT: [[TMP80:%.*]] = extractelement <8 x i8> [[TMP77]], i32 7
; CHECK-NEXT: store i8 [[TMP80]], ptr [[TMP79]], align 1
@@ -484,7 +482,6 @@ define void @test_loop2(i64 %n, ptr %dst) {
; IC2-NEXT: br label [[VECTOR_BODY:%.*]]
; IC2: vector.body:
; IC2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; IC2-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; IC2-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
; IC2-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
; IC2-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
@@ -516,7 +513,7 @@ define void @test_loop2(i64 %n, ptr %dst) {
; IC2-NEXT: [[TMP29:%.*]] = add i64 [[INDEX]], 29
; IC2-NEXT: [[TMP30:%.*]] = add i64 [[INDEX]], 30
; IC2-NEXT: [[TMP31:%.*]] = add i64 [[INDEX]], 31
-; IC2-NEXT: [[TMP32:%.*]] = sub nsw i64 [[N:%.*]], [[TMP0]]
+; IC2-NEXT: [[TMP32:%.*]] = sub nsw i64 [[N:%.*]], [[INDEX]]
; IC2-NEXT: [[TMP33:%.*]] = sub nsw i64 [[N]], [[TMP1]]
; IC2-NEXT: [[TMP34:%.*]] = sub nsw i64 [[N]], [[TMP2]]
; IC2-NEXT: [[TMP35:%.*]] = sub nsw i64 [[N]], [[TMP3]]
@@ -582,7 +579,7 @@ define void @test_loop2(i64 %n, ptr %dst) {
; IC2-NEXT: [[TMP95:%.*]] = insertelement <16 x i64> [[TMP94]], i64 [[TMP79]], i32 15
; IC2-NEXT: [[TMP96:%.*]] = trunc <16 x i64> [[TMP63]] to <16 x i8>
; IC2-NEXT: [[TMP97:%.*]] = trunc <16 x i64> [[TMP95]] to <16 x i8>
-; IC2-NEXT: [[TMP98:%.*]] = add i64 [[TMP0]], [[TMP32]]
+; IC2-NEXT: [[TMP98:%.*]] = add i64 [[INDEX]], [[TMP32]]
; IC2-NEXT: [[TMP99:%.*]] = add i64 [[TMP16]], [[TMP64]]
; IC2-NEXT: [[TMP100:%.*]] = getelementptr i8, ptr [[DST:%.*]], i64 [[TMP98]]
; IC2-NEXT: [[TMP101:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP99]]
@@ -602,7 +599,6 @@ define void @test_loop2(i64 %n, ptr %dst) {
; IC2-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
; IC2: vec.epilog.vector.body:
; IC2-NEXT: [[INDEX1:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
-; IC2-NEXT: [[TMP105:%.*]] = add i64 [[INDEX1]], 0
; IC2-NEXT: [[TMP106:%.*]] = add i64 [[INDEX1]], 1
; IC2-NEXT: [[TMP107:%.*]] = add i64 [[INDEX1]], 2
; IC2-NEXT: [[TMP108:%.*]] = add i64 [[INDEX1]], 3
@@ -610,7 +606,7 @@ define void @test_loop2(i64 %n, ptr %dst) {
; IC2-NEXT: [[TMP110:%.*]] = add i64 [[INDEX1]], 5
; IC2-NEXT: [[TMP111:%.*]] = add i64 [[INDEX1]], 6
; IC2-NEXT: [[TMP112:%.*]] = add i64 [[INDEX1]], 7
-; IC2-NEXT: [[TMP113:%.*]] = sub nsw i64 [[N]], [[TMP105]]
+; IC2-NEXT: [[TMP113:%.*]] = sub nsw i64 [[N]], [[INDEX1]]
; IC2-NEXT: [[TMP114:%.*]] = sub nsw i64 [[N]], [[TMP106]]
; IC2-NEXT: [[TMP115:%.*]] = sub nsw i64 [[N]], [[TMP107]]
; IC2-NEXT: [[TMP116:%.*]] = sub nsw i64 [[N]], [[TMP108]]
@@ -627,7 +623,7 @@ define void @test_loop2(i64 %n, ptr %dst) {
; IC2-NEXT: [[TMP127:%.*]] = insertelement <8 x i64> [[TMP126]], i64 [[TMP119]], i32 6
; IC2-NEXT: [[TMP128:%.*]] = insertelement <8 x i64> [[TMP127]], i64 [[TMP120]], i32 7
; IC2-NEXT: [[TMP129:%.*]] = trunc <8 x i64> [[TMP128]] to <8 x i8>
-; IC2-NEXT: [[TMP130:%.*]] = add i64 [[TMP105]], [[TMP113]]
+; IC2-NEXT: [[TMP130:%.*]] = add i64 [[INDEX1]], [[TMP113]]
; IC2-NEXT: [[TMP131:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP130]]
; IC2-NEXT: [[TMP132:%.*]] = extractelement <8 x i8> [[TMP129]], i32 7
; IC2-NEXT: store i8 [[TMP132]], ptr [[TMP131]], align 1
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll
index fceab6f823d5a..6a5c0df1b58a6 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll
@@ -552,10 +552,9 @@ define double @test_scalarization_cost_for_load_of_address(ptr %src.0, ptr %src.
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi double [ 3.000000e+00, %[[VECTOR_PH]] ], [ [[TMP21:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[IV:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[IV]], 1
; CHECK-NEXT: [[GEP_0:%.*]] = getelementptr [[T:%.*]], ptr [[SRC_0]], i64 [[IV]]
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <6 x double>, ptr [[GEP_0]], align 8
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <6 x double> [[WIDE_VEC]], <6 x double> poison, <2 x i32> <i32 0, i32 3>
@@ -569,19 +568,19 @@ define double @test_scalarization_cost_for_load_of_address(ptr %src.0, ptr %src.
; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr double, ptr [[SRC_1]], i64 [[IV]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[GEP_SRC]], align 8
; CHECK-NEXT: [[TMP9:%.*]] = fmul <2 x double> [[TMP7]], [[WIDE_LOAD]]
-; CHECK-NEXT: [[GEP_SRC_2:%.*]] = getelementptr [[T_2:%.*]], ptr [[SRC_2]], i64 [[IV]]
-; CHECK-NEXT: [[TMP11:%.*]] = getelementptr [[T_2]], ptr [[SRC_2]], i64 [[TMP1]]
-; CHECK-NEXT: [[GEP_72:%.*]] = getelementptr i8, ptr [[GEP_SRC_2]], i64 72
+; CHECK-NEXT: [[TMP11:%.*]] = getelementptr [[T_2:%.*]], ptr [[SRC_2]], i64 [[IV]]
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr [[T_2]], ptr [[SRC_2]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[TMP11]], i64 72
-; CHECK-NEXT: [[L_P_2:%.*]] = load ptr, ptr [[GEP_72]], align 8
-; CHECK-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP13]], align 8
+; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[TMP10]], i64 72
+; CHECK-NEXT: [[L_P_2:%.*]] = load ptr, ptr [[TMP13]], align 8
+; CHECK-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP12]], align 8
; CHECK-NEXT: [[LV:%.*]] = load double, ptr [[L_P_2]], align 8
; CHECK-NEXT: [[TMP17:%.*]] = load double, ptr [[TMP15]], align 8
; CHECK-NEXT: [[TMP18:%.*]] = insertelement <2 x double> poison, double [[LV]], i32 0
; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x double> [[TMP18]], double [[TMP17]], i32 1
; CHECK-NEXT: [[TMP20:%.*]] = fmul <2 x double> [[TMP9]], [[TMP19]]
; CHECK-NEXT: [[TMP21]] = call double @llvm.vector.reduce.fadd.v2f64(double [[VEC_PHI]], <2 x double> [[TMP20]])
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 2
; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br label %[[EXIT:.*]]
@@ -674,23 +673,23 @@ define i32 @test_or_reduction_with_stride_2(i32 %scale, ptr %src) {
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP66:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
-; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
-; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2
-; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4
-; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6
-; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 8
-; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 10
-; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 12
-; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 14
-; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[OFFSET_IDX]], 16
-; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[OFFSET_IDX]], 18
-; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[OFFSET_IDX]], 20
-; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[OFFSET_IDX]], 22
-; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[OFFSET_IDX]], 24
-; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[OFFSET_IDX]], 26
-; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], 28
-; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[OFFSET_IDX]], 30
-; CHECK-NEXT: [[TMP16:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 2
+; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 4
+; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 6
+; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 8
+; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 10
+; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 12
+; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 14
+; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 16
+; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[OFFSET_IDX]], 18
+; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[OFFSET_IDX]], 20
+; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[OFFSET_IDX]], 22
+; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[OFFSET_IDX]], 24
+; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[OFFSET_IDX]], 26
+; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[OFFSET_IDX]], 28
+; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], 30
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[OFFSET_IDX]]
+; CHECK-NEXT: [[TMP31:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP1]]
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP2]]
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP3]]
@@ -705,23 +704,22 @@ define i32 @test_or_reduction_with_stride_2(i32 %scale, ptr %src) {
; CHECK-NEXT: [[TMP28:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP12]]
; CHECK-NEXT: [[TMP29:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP13]]
; CHECK-NEXT: [[TMP30:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP14]]
-; CHECK-NEXT: [[TMP31:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP15]]
; CHECK-NEXT: [[TMP32:%.*]] = load i8, ptr [[TMP16]], align 1
-; CHECK-NEXT: [[TMP33:%.*]] = load i8, ptr [[TMP17]], align 1
-; CHECK-NEXT: [[TMP34:%.*]] = load i8, ptr [[TMP18]], align 1
-; CHECK-NEXT: [[TMP35:%.*]] = load i8, ptr [[TMP19]], align 1
-; CHECK-NEXT: [[TMP36:%.*]] = load i8, ptr [[TMP20]], align 1
-; CHECK-NEXT: [[TMP37:%.*]] = load i8, ptr [[TMP21]], align 1
-; CHECK-NEXT: [[TMP38:%.*]] = load i8, ptr [[TMP22]], align 1
-; CHECK-NEXT: [[TMP39:%.*]] = load i8, ptr [[TMP23]], align 1
-; CHECK-NEXT: [[TMP40:%.*]] = load i8, ptr [[TMP24]], align 1
-; CHECK-NEXT: [[TMP41:%.*]] = load i8, ptr [[TMP25]], align 1
-; CHECK-NEXT: [[TMP42:%.*]] = load i8, ptr [[TMP26]], align 1
-; CHECK-NEXT: [[TMP43:%.*]] = load i8, ptr [[TMP27]], align 1
-; CHECK-NEXT: [[TMP44:%.*]] = load i8, ptr [[TMP28]], align 1
-; CHECK-NEXT: [[TMP45:%.*]] = load i8, ptr [[TMP29]], align 1
-; CHECK-NEXT: [[TMP46:%.*]] = load i8, ptr [[TMP30]], align 1
-; CHECK-NEXT: [[TMP47:%.*]] = load i8, ptr [[TMP31]], align 1
+; CHECK-NEXT: [[TMP33:%.*]] = load i8, ptr [[TMP31]], align 1
+; CHECK-NEXT: [[TMP34:%.*]] = load i8, ptr [[TMP17]], align 1
+; CHECK-NEXT: [[TMP35:%.*]] = load i8, ptr [[TMP18]], align 1
+; CHECK-NEXT: [[TMP36:%.*]] = load i8, ptr [[TMP19]], align 1
+; CHECK-NEXT: [[TMP37:%.*]] = load i8, ptr [[TMP20]], align 1
+; CHECK-NEXT: [[TMP38:%.*]] = load i8, ptr [[TMP21]], align 1
+; CHECK-NEXT: [[TMP39:%.*]] = load i8, ptr [[TMP22]], align 1
+; CHECK-NEXT: [[TMP40:%.*]] = load i8, ptr [[TMP23]], align 1
+; CHECK-NEXT: [[TMP41:%.*]] = load i8, ptr [[TMP24]], align 1
+; CHECK-NEXT: [[TMP42:%.*]] = load i8, ptr [[TMP25]], align 1
+; CHECK-NEXT: [[TMP43:%.*]] = load i8, ptr [[TMP26]], align 1
+; CHECK-NEXT: [[TMP44:%.*]] = load i8, ptr [[TMP27]], align 1
+; CHECK-NEXT: [[TMP45:%.*]] = load i8, ptr [[TMP28]], align 1
+; CHECK-NEXT: [[TMP46:%.*]] = load i8, ptr [[TMP29]], align 1
+; CHECK-NEXT: [[TMP47:%.*]] = load i8, ptr [[TMP30]], align 1
; CHECK-NEXT: [[TMP48:%.*]] = insertelement <16 x i8> poison, i8 [[TMP32]], i32 0
; CHECK-NEXT: [[TMP49:%.*]] = insertelement <16 x i8> [[TMP48]], i8 [[TMP33]], i32 1
; CHECK-NEXT: [[TMP50:%.*]] = insertelement <16 x i8> [[TMP49]], i8 [[TMP34]], i32 2
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-with-wide-ops.ll b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-with-wide-ops.ll
index 6d0c55b1d246c..267e410de1f26 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-with-wide-ops.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-with-wide-ops.ll
@@ -736,12 +736,11 @@ define void @test_2xi32(ptr noalias %data, ptr noalias %factor) {
; VF2-NEXT: br label %[[VECTOR_BODY:.*]]
; VF2: [[VECTOR_BODY]]:
; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; VF2-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; VF2-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
-; VF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[TMP0]]
+; VF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[INDEX]]
; VF2-NEXT: [[WIDE_VEC:%.*]] = load <4 x i32>, ptr [[TMP2]], align 8
; VF2-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[WIDE_VEC]], <4 x i32> poison, <2 x i32> <i32 0, i32 2>
-; VF2-NEXT: [[TMP8:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[TMP0]], i32 0
+; VF2-NEXT: [[TMP8:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[INDEX]], i32 0
; VF2-NEXT: [[TMP9:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[TMP1]], i32 0
; VF2-NEXT: [[WIDE_VEC1:%.*]] = load <6 x i32>, ptr [[TMP8]], align 8
; VF2-NEXT: [[TMP13:%.*]] = shufflevector <6 x i32> [[WIDE_VEC1]], <6 x i32> poison, <2 x i32> <i32 0, i32 3>
@@ -751,7 +750,7 @@ define void @test_2xi32(ptr noalias %data, ptr noalias %factor) {
; VF2-NEXT: [[TMP16:%.*]] = extractelement <2 x i32> [[TMP14]], i32 1
; VF2-NEXT: store i32 [[TMP15]], ptr [[TMP8]], align 8
; VF2-NEXT: store i32 [[TMP16]], ptr [[TMP9]], align 8
-; VF2-NEXT: [[TMP17:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[TMP0]], i32 1
+; VF2-NEXT: [[TMP17:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[INDEX]], i32 1
; VF2-NEXT: [[TMP18:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[TMP1]], i32 1
; VF2-NEXT: [[TMP23:%.*]] = mul <2 x i32> [[TMP7]], [[TMP22]]
; VF2-NEXT: [[TMP24:%.*]] = extractelement <2 x i32> [[TMP23]], i32 0
@@ -773,14 +772,13 @@ define void @test_2xi32(ptr noalias %data, ptr noalias %factor) {
; VF4-NEXT: br label %[[VECTOR_BODY:.*]]
; VF4: [[VECTOR_BODY]]:
; VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; VF4-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; VF4-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
; VF4-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
; VF4-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
-; VF4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[TMP0]]
+; VF4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[INDEX]]
; VF4-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, ptr [[TMP4]], align 8
; VF4-NEXT: [[TMP15:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-; VF4-NEXT: [[TMP16:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[TMP0]], i32 0
+; VF4-NEXT: [[TMP16:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[INDEX]], i32 0
; VF4-NEXT: [[TMP17:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[TMP1]], i32 0
; VF4-NEXT: [[TMP18:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[TMP2]], i32 0
; VF4-NEXT: [[TMP19:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[TMP3]], i32 0
@@ -796,7 +794,7 @@ define void @test_2xi32(ptr noalias %data, ptr noalias %factor) {
; VF4-NEXT: store i32 [[TMP30]], ptr [[TMP17]], align 8
; VF4-NEXT: store i32 [[TMP31]], ptr [[TMP18]], align 8
; VF4-NEXT: store i32 [[TMP32]], ptr [[TMP19]], align 8
-; VF4-NEXT: [[TMP33:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[TMP0]], i32 1
+; VF4-NEXT: [[TMP33:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[INDEX]], i32 1
; VF4-NEXT: [[TMP34:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[TMP1]], i32 1
; VF4-NEXT: [[TMP35:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[TMP2]], i32 1
; VF4-NEXT: [[TMP36:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[TMP3]], i32 1
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-insertelt.ll b/llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-insertelt.ll
index 4761cb0d63de7..a23ab6671f50b 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-insertelt.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-insertelt.ll
@@ -13,40 +13,39 @@ define void @test0(ptr noalias %M3, ptr noalias %A, ptr noalias %B) {
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
-; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
-; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
-; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[TMP0]]
-; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP4]], align 2
-; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i16> [[WIDE_LOAD]], splat (i16 10)
-; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i16> [[TMP5]], i32 0
-; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i16> [[TMP5]], i32 1
-; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i16> [[TMP5]], i32 2
-; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i16> [[TMP5]], i32 3
+; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2
+; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 3
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[INDEX]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP3]], align 2
+; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i16> [[WIDE_LOAD]], splat (i16 10)
+; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i16> [[TMP4]], i32 0
+; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i16> [[TMP4]], i32 1
+; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i16> [[TMP4]], i32 2
+; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i16> [[TMP4]], i32 3
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
-; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]]
+; CHECK-NEXT: [[TMP13:%.*]] = load i64, ptr [[TMP9]], align 8
; CHECK-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP10]], align 8
; CHECK-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP11]], align 8
; CHECK-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP12]], align 8
-; CHECK-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP13]], align 8
+; CHECK-NEXT: [[TMP17:%.*]] = ashr exact i64 [[TMP13]], 32
; CHECK-NEXT: [[TMP18:%.*]] = ashr exact i64 [[TMP14]], 32
; CHECK-NEXT: [[TMP19:%.*]] = ashr exact i64 [[TMP15]], 32
; CHECK-NEXT: [[TMP20:%.*]] = ashr exact i64 [[TMP16]], 32
-; CHECK-NEXT: [[TMP21:%.*]] = ashr exact i64 [[TMP17]], 32
+; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i16, ptr [[M3]], i64 [[TMP17]]
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i16, ptr [[M3]], i64 [[TMP18]]
; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i16, ptr [[M3]], i64 [[TMP19]]
; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i16, ptr [[M3]], i64 [[TMP20]]
-; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i16, ptr [[M3]], i64 [[TMP21]]
+; CHECK-NEXT: store i16 [[TMP5]], ptr [[TMP21]], align 2
; CHECK-NEXT: store i16 [[TMP6]], ptr [[TMP22]], align 2
; CHECK-NEXT: store i16 [[TMP7]], ptr [[TMP23]], align 2
; CHECK-NEXT: store i16 [[TMP8]], ptr [[TMP24]], align 2
-; CHECK-NEXT: store i16 [[TMP9]], ptr [[TMP25]], align 2
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
-; CHECK-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
+; CHECK-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[FOR_INC1286_LOOPEXIT:%.*]]
; CHECK: for.inc1286.loopexit:
@@ -84,44 +83,43 @@ define void @test1(ptr noalias %M3, ptr noalias %A, ptr noalias %B, ptr noalias
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
-; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
-; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
-; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[C]], align 4
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i64 0
+; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2
+; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 3
+; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[C]], align 4
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[TMP0]]
-; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP5]], align 2
-; CHECK-NEXT: [[TMP6:%.*]] = trunc <4 x i32> [[BROADCAST_SPLAT]] to <4 x i16>
-; CHECK-NEXT: [[TMP7:%.*]] = add <4 x i16> [[WIDE_LOAD]], [[TMP6]]
-; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i16> [[TMP7]], i32 0
-; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i16> [[TMP7]], i32 1
-; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i16> [[TMP7]], i32 2
-; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i16> [[TMP7]], i32 3
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[INDEX]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP4]], align 2
+; CHECK-NEXT: [[TMP5:%.*]] = trunc <4 x i32> [[BROADCAST_SPLAT]] to <4 x i16>
+; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i16> [[WIDE_LOAD]], [[TMP5]]
+; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i16> [[TMP6]], i32 0
+; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i16> [[TMP6]], i32 1
+; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i16> [[TMP6]], i32 2
+; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i16> [[TMP6]], i32 3
+; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
-; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]]
+; CHECK-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP11]], align 8
; CHECK-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP12]], align 8
; CHECK-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP13]], align 8
; CHECK-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP14]], align 8
-; CHECK-NEXT: [[TMP19:%.*]] = load i64, ptr [[TMP15]], align 8
+; CHECK-NEXT: [[TMP19:%.*]] = ashr exact i64 [[TMP15]], 32
; CHECK-NEXT: [[TMP20:%.*]] = ashr exact i64 [[TMP16]], 32
; CHECK-NEXT: [[TMP21:%.*]] = ashr exact i64 [[TMP17]], 32
; CHECK-NEXT: [[TMP22:%.*]] = ashr exact i64 [[TMP18]], 32
-; CHECK-NEXT: [[TMP23:%.*]] = ashr exact i64 [[TMP19]], 32
+; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i16, ptr [[M3]], i64 [[TMP19]]
; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i16, ptr [[M3]], i64 [[TMP20]]
; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i16, ptr [[M3]], i64 [[TMP21]]
; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i16, ptr [[M3]], i64 [[TMP22]]
-; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i16, ptr [[M3]], i64 [[TMP23]]
+; CHECK-NEXT: store i16 [[TMP7]], ptr [[TMP23]], align 2
; CHECK-NEXT: store i16 [[TMP8]], ptr [[TMP24]], align 2
; CHECK-NEXT: store i16 [[TMP9]], ptr [[TMP25]], align 2
; CHECK-NEXT: store i16 [[TMP10]], ptr [[TMP26]], align 2
-; CHECK-NEXT: store i16 [[TMP11]], ptr [[TMP27]], align 2
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
-; CHECK-NEXT: br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK-NEXT: [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
+; CHECK-NEXT: br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[FOR_INC1286_LOOPEXIT:%.*]]
; CHECK: for.inc1286.loopexit:
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/uniform-args-call-variants.ll b/llvm/test/Transforms/LoopVectorize/AArch64/uniform-args-call-variants.ll
index dcb890670e33b..c185f3fe8a2f2 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/uniform-args-call-variants.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/uniform-args-call-variants.ll
@@ -188,7 +188,7 @@ define void @test_uniform_not_invariant(ptr noalias %dst, ptr readonly %src, i64
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[GEPSRC:%.*]] = getelementptr double, ptr [[SRC]], i64 [[INDVARS_IV]]
; CHECK-NEXT: [[DATA:%.*]] = load double, ptr [[GEPSRC]], align 8
-; CHECK-NEXT: [[CALL:%.*]] = call double @foo(double [[DATA]], i64 [[INDVARS_IV]]) #[[ATTR4:[0-9]+]]
+; CHECK-NEXT: [[CALL:%.*]] = call double @foo(double [[DATA]], i64 [[INDVARS_IV]]) #[[ATTR5:[0-9]+]]
; CHECK-NEXT: [[GEPDST:%.*]] = getelementptr inbounds nuw double, ptr [[DST]], i64 [[INDVARS_IV]]
; CHECK-NEXT: store double [[CALL]], ptr [[GEPDST]], align 8
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
@@ -212,7 +212,7 @@ define void @test_uniform_not_invariant(ptr noalias %dst, ptr readonly %src, i64
; INTERLEAVE: pred.store.if:
; INTERLEAVE-NEXT: [[TMP1:%.*]] = getelementptr double, ptr [[SRC]], i64 [[INDEX]]
; INTERLEAVE-NEXT: [[TMP2:%.*]] = load double, ptr [[TMP1]], align 8
-; INTERLEAVE-NEXT: [[TMP3:%.*]] = call double @foo(double [[TMP2]], i64 [[INDEX]]) #[[ATTR4:[0-9]+]]
+; INTERLEAVE-NEXT: [[TMP3:%.*]] = call double @foo(double [[TMP2]], i64 [[INDEX]]) #[[ATTR5:[0-9]+]]
; INTERLEAVE-NEXT: [[TMP4:%.*]] = getelementptr inbounds double, ptr [[DST]], i64 [[INDEX]]
; INTERLEAVE-NEXT: store double [[TMP3]], ptr [[TMP4]], align 8
; INTERLEAVE-NEXT: br label [[PRED_STORE_CONTINUE]]
@@ -222,7 +222,7 @@ define void @test_uniform_not_invariant(ptr noalias %dst, ptr readonly %src, i64
; INTERLEAVE-NEXT: [[TMP5:%.*]] = or disjoint i64 [[INDEX]], 1
; INTERLEAVE-NEXT: [[TMP6:%.*]] = getelementptr double, ptr [[SRC]], i64 [[TMP5]]
; INTERLEAVE-NEXT: [[TMP7:%.*]] = load double, ptr [[TMP6]], align 8
-; INTERLEAVE-NEXT: [[TMP8:%.*]] = call double @foo(double [[TMP7]], i64 [[TMP5]]) #[[ATTR4]]
+; INTERLEAVE-NEXT: [[TMP8:%.*]] = call double @foo(double [[TMP7]], i64 [[TMP5]]) #[[ATTR5]]
; INTERLEAVE-NEXT: [[TMP9:%.*]] = getelementptr inbounds double, ptr [[DST]], i64 [[TMP5]]
; INTERLEAVE-NEXT: store double [[TMP8]], ptr [[TMP9]], align 8
; INTERLEAVE-NEXT: br label [[PRED_STORE_CONTINUE4]]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/widen-call-with-intrinsic-or-libfunc.ll b/llvm/test/Transforms/LoopVectorize/AArch64/widen-call-with-intrinsic-or-libfunc.ll
index cf035d0b2b2ee..043ddb286761c 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/widen-call-with-intrinsic-or-libfunc.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/widen-call-with-intrinsic-or-libfunc.ll
@@ -110,15 +110,14 @@ define void @test(ptr noalias %src, ptr noalias %dst) {
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
-; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[SRC:%.*]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[SRC:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[TMP2]], align 4
; CHECK-NEXT: [[TMP4:%.*]] = fpext <2 x float> [[WIDE_LOAD]] to <2 x double>
; CHECK-NEXT: [[TMP5:%.*]] = call fast <2 x double> @__simd_sin_v2f64(<2 x double> [[TMP4]])
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[TMP5]], i32 0
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x double> [[TMP5]], i32 1
-; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[DST:%.*]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[DST:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 [[TMP1]]
; CHECK-NEXT: store double [[TMP8]], ptr [[TMP6]], align 8
; CHECK-NEXT: store double [[TMP9]], ptr [[TMP7]], align 8
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll
index f2f65685e9bad..49fe1056e3ab2 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll
@@ -849,11 +849,10 @@ define void @vector_reverse_irregular_type(ptr noalias %A, ptr noalias %B) {
; RV64: [[VECTOR_BODY]]:
; RV64-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; RV64-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]]
-; RV64-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
; RV64-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], -1
; RV64-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], -2
; RV64-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], -3
-; RV64-NEXT: [[TMP4:%.*]] = add nsw i64 [[TMP0]], -1
+; RV64-NEXT: [[TMP4:%.*]] = add nsw i64 [[OFFSET_IDX]], -1
; RV64-NEXT: [[TMP5:%.*]] = add nsw i64 [[TMP1]], -1
; RV64-NEXT: [[TMP6:%.*]] = add nsw i64 [[TMP2]], -1
; RV64-NEXT: [[TMP7:%.*]] = add nsw i64 [[TMP3]], -1
@@ -900,11 +899,10 @@ define void @vector_reverse_irregular_type(ptr noalias %A, ptr noalias %B) {
; RV32: [[VECTOR_BODY]]:
; RV32-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; RV32-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]]
-; RV32-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
; RV32-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], -1
; RV32-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], -2
; RV32-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], -3
-; RV32-NEXT: [[TMP4:%.*]] = add nsw i64 [[TMP0]], -1
+; RV32-NEXT: [[TMP4:%.*]] = add nsw i64 [[OFFSET_IDX]], -1
; RV32-NEXT: [[TMP5:%.*]] = add nsw i64 [[TMP1]], -1
; RV32-NEXT: [[TMP6:%.*]] = add nsw i64 [[TMP2]], -1
; RV32-NEXT: [[TMP7:%.*]] = add nsw i64 [[TMP3]], -1
@@ -951,7 +949,6 @@ define void @vector_reverse_irregular_type(ptr noalias %A, ptr noalias %B) {
; RV64-UF2: [[VECTOR_BODY]]:
; RV64-UF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; RV64-UF2-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]]
-; RV64-UF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
; RV64-UF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], -1
; RV64-UF2-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], -2
; RV64-UF2-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], -3
@@ -959,7 +956,7 @@ define void @vector_reverse_irregular_type(ptr noalias %A, ptr noalias %B) {
; RV64-UF2-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], -5
; RV64-UF2-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], -6
; RV64-UF2-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], -7
-; RV64-UF2-NEXT: [[TMP8:%.*]] = add nsw i64 [[TMP0]], -1
+; RV64-UF2-NEXT: [[TMP8:%.*]] = add nsw i64 [[OFFSET_IDX]], -1
; RV64-UF2-NEXT: [[TMP9:%.*]] = add nsw i64 [[TMP1]], -1
; RV64-UF2-NEXT: [[TMP10:%.*]] = add nsw i64 [[TMP2]], -1
; RV64-UF2-NEXT: [[TMP11:%.*]] = add nsw i64 [[TMP3]], -1
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll
index 55e7018c49eec..28b097e2c0536 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll
@@ -602,9 +602,6 @@ define void @uniform_store_of_loop_varying(ptr noalias nocapture %a, ptr noalias
; FIXEDLEN-NEXT: br label %[[VECTOR_BODY:.*]]
; FIXEDLEN: [[VECTOR_BODY]]:
; FIXEDLEN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; FIXEDLEN-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 4
-; FIXEDLEN-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 5
-; FIXEDLEN-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 6
; FIXEDLEN-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 7
; FIXEDLEN-NEXT: store i64 [[TMP4]], ptr [[B]], align 8
; FIXEDLEN-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]]
diff --git a/llvm/test/Transforms/LoopVectorize/SystemZ/predicated-first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/SystemZ/predicated-first-order-recurrence.ll
index ee84ef243570a..c665c0754b241 100644
--- a/llvm/test/Transforms/LoopVectorize/SystemZ/predicated-first-order-recurrence.ll
+++ b/llvm/test/Transforms/LoopVectorize/SystemZ/predicated-first-order-recurrence.ll
@@ -20,13 +20,14 @@ define void @func_21() {
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE4:%.*]] ]
; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ <i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[PRED_STORE_CONTINUE4]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i8> [ <i8 0, i8 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE4]] ]
-; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[TMP21:%.*]] = insertelement <2 x i64> poison, i64 [[INDEX]], i32 0
+; CHECK-NEXT: [[TMP22:%.*]] = insertelement <2 x i64> [[TMP21]], i64 [[TMP1]], i32 1
; CHECK-NEXT: [[TMP2:%.*]] = icmp ule <2 x i8> [[VEC_IND]], splat (i8 4)
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0
; CHECK-NEXT: br i1 [[TMP3]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
; CHECK: pred.load.if:
-; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [5 x i32], ptr @A, i64 0, i64 [[TMP0]]
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [5 x i32], ptr @A, i64 0, i64 [[INDEX]]
; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> poison, i32 [[TMP5]], i32 0
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]]
@@ -45,7 +46,7 @@ define void @func_21() {
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0
; CHECK-NEXT: br i1 [[TMP14]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; CHECK: pred.store.if:
-; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [5 x i32], ptr @B, i64 0, i64 [[TMP0]]
+; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [5 x i32], ptr @B, i64 0, i64 [[INDEX]]
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x i32> [[TMP13]], i32 0
; CHECK-NEXT: store i32 [[TMP16]], ptr [[TMP15]], align 4
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
diff --git a/llvm/test/Transforms/LoopVectorize/SystemZ/scalar-steps-with-users-demanding-all-lanes-and-first-lane-only.ll b/llvm/test/Transforms/LoopVectorize/SystemZ/scalar-steps-with-users-demanding-all-lanes-and-first-lane-only.ll
index cfb180594b0ec..66d032ad1fd77 100644
--- a/llvm/test/Transforms/LoopVectorize/SystemZ/scalar-steps-with-users-demanding-all-lanes-and-first-lane-only.ll
+++ b/llvm/test/Transforms/LoopVectorize/SystemZ/scalar-steps-with-users-demanding-all-lanes-and-first-lane-only.ll
@@ -16,15 +16,13 @@ define void @test_scalar_iv_steps_used_by_replicate_and_first_lane_only_vpinst(p
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[TMP3:%.*]] = mul nsw i64 0, 4
; CHECK-NEXT: [[TMP4:%.*]] = mul nsw i64 1, 4
; CHECK-NEXT: [[TMP2:%.*]] = mul nsw i64 2, 4
; CHECK-NEXT: [[TMP15:%.*]] = mul nsw i64 3, 4
-; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[SRC_1]], i64 [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[SRC_1]], i64 [[TMP4]]
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[SRC_1]], i64 [[TMP2]]
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[SRC_1]], i64 [[TMP15]]
-; CHECK-NEXT: [[TMP12:%.*]] = load i8, ptr [[TMP8]], align 1
+; CHECK-NEXT: [[TMP12:%.*]] = load i8, ptr [[SRC_1]], align 1
; CHECK-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP5]], align 1
; CHECK-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP6]], align 1
; CHECK-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP7]], align 1
@@ -33,8 +31,7 @@ define void @test_scalar_iv_steps_used_by_replicate_and_first_lane_only_vpinst(p
; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x i8> [[TMP13]], i8 [[TMP10]], i32 2
; CHECK-NEXT: [[TMP19:%.*]] = insertelement <4 x i8> [[TMP14]], i8 [[TMP11]], i32 3
; CHECK-NEXT: [[TMP20:%.*]] = icmp eq <4 x i8> [[TMP19]], zeroinitializer
-; CHECK-NEXT: [[TMP22:%.*]] = getelementptr [8 x i32], ptr @src, i64 0, i64 4
-; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP22]], align 4
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr getelementptr inbounds nuw (i8, ptr @src, i64 16), align 4
; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x i1> [[TMP20]], i32 0
; CHECK-NEXT: br i1 [[TMP24]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; CHECK: [[PRED_STORE_IF]]:
diff --git a/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll b/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll
index 69505eb176f50..dee6c274585bb 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll
@@ -66,7 +66,6 @@ define void @PR31671(float %x, ptr %d) #0 {
; FORCE: [[VECTOR_BODY]]:
; FORCE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; FORCE-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 5
-; FORCE-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
; FORCE-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 5
; FORCE-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 10
; FORCE-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 15
@@ -74,7 +73,7 @@ define void @PR31671(float %x, ptr %d) #0 {
; FORCE-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 25
; FORCE-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 30
; FORCE-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 35
-; FORCE-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[DATA:%.*]], ptr [[D]], i64 0, i32 3, i64 [[TMP0]]
+; FORCE-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[DATA:%.*]], ptr [[D]], i64 0, i32 3, i64 [[OFFSET_IDX]]
; FORCE-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[DATA]], ptr [[D]], i64 0, i32 3, i64 [[TMP2]]
; FORCE-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[DATA]], ptr [[D]], i64 0, i32 3, i64 [[TMP4]]
; FORCE-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[DATA]], ptr [[D]], i64 0, i32 3, i64 [[TMP6]]
@@ -90,7 +89,7 @@ define void @PR31671(float %x, ptr %d) #0 {
; FORCE-NEXT: [[TMP13:%.*]] = fmul <2 x float> [[BROADCAST_SPLAT]], [[STRIDED_VEC2]]
; FORCE-NEXT: [[TMP14:%.*]] = fmul <2 x float> [[BROADCAST_SPLAT]], [[STRIDED_VEC4]]
; FORCE-NEXT: [[TMP15:%.*]] = fmul <2 x float> [[BROADCAST_SPLAT]], [[STRIDED_VEC6]]
-; FORCE-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[DATA]], ptr [[D]], i64 0, i32 0, i64 [[TMP0]]
+; FORCE-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[DATA]], ptr [[D]], i64 0, i32 0, i64 [[OFFSET_IDX]]
; FORCE-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[DATA]], ptr [[D]], i64 0, i32 0, i64 [[TMP1]]
; FORCE-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[DATA]], ptr [[D]], i64 0, i32 0, i64 [[TMP2]]
; FORCE-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[DATA]], ptr [[D]], i64 0, i32 0, i64 [[TMP3]]
diff --git a/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll b/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll
index 801f910c5e13d..e7c262656feff 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll
@@ -90,7 +90,6 @@ define float @PR27826(ptr nocapture readonly %a, ptr nocapture readonly %b, i32
; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x float> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP121:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI4:%.*]] = phi <4 x float> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP122:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 32
-; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 32
; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 64
; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 96
@@ -106,7 +105,7 @@ define float @PR27826(ptr nocapture readonly %a, ptr nocapture readonly %b, i32
; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[OFFSET_IDX]], 416
; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[OFFSET_IDX]], 448
; CHECK-NEXT: [[TMP18:%.*]] = add i64 [[OFFSET_IDX]], 480
-; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP3]]
+; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP4]]
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP5]]
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP6]]
@@ -154,7 +153,7 @@ define float @PR27826(ptr nocapture readonly %a, ptr nocapture readonly %b, i32
; CHECK-NEXT: [[TMP64:%.*]] = insertelement <4 x float> [[TMP63]], float [[TMP60]], i32 1
; CHECK-NEXT: [[TMP65:%.*]] = insertelement <4 x float> [[TMP64]], float [[TMP61]], i32 2
; CHECK-NEXT: [[TMP66:%.*]] = insertelement <4 x float> [[TMP65]], float [[TMP62]], i32 3
-; CHECK-NEXT: [[TMP67:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[TMP3]]
+; CHECK-NEXT: [[TMP67:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[TMP68:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP4]]
; CHECK-NEXT: [[TMP69:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP5]]
; CHECK-NEXT: [[TMP70:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP6]]
@@ -236,11 +235,10 @@ define float @PR27826(ptr nocapture readonly %a, ptr nocapture readonly %b, i32
; CHECK-NEXT: [[INDEX10:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT13:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI11:%.*]] = phi <4 x float> [ [[TMP125]], [[VEC_EPILOG_PH]] ], [ [[TMP155:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX12:%.*]] = mul i64 [[INDEX10]], 32
-; CHECK-NEXT: [[TMP126:%.*]] = add i64 [[OFFSET_IDX12]], 0
; CHECK-NEXT: [[TMP127:%.*]] = add i64 [[OFFSET_IDX12]], 32
; CHECK-NEXT: [[TMP128:%.*]] = add i64 [[OFFSET_IDX12]], 64
; CHECK-NEXT: [[TMP129:%.*]] = add i64 [[OFFSET_IDX12]], 96
-; CHECK-NEXT: [[TMP130:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP126]]
+; CHECK-NEXT: [[TMP130:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[OFFSET_IDX12]]
; CHECK-NEXT: [[TMP131:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP127]]
; CHECK-NEXT: [[TMP132:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP128]]
; CHECK-NEXT: [[TMP133:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP129]]
@@ -252,7 +250,7 @@ define float @PR27826(ptr nocapture readonly %a, ptr nocapture readonly %b, i32
; CHECK-NEXT: [[TMP139:%.*]] = insertelement <4 x float> [[TMP138]], float [[TMP135]], i32 1
; CHECK-NEXT: [[TMP140:%.*]] = insertelement <4 x float> [[TMP139]], float [[TMP136]], i32 2
; CHECK-NEXT: [[TMP141:%.*]] = insertelement <4 x float> [[TMP140]], float [[TMP137]], i32 3
-; CHECK-NEXT: [[TMP142:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP126]]
+; CHECK-NEXT: [[TMP142:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[OFFSET_IDX12]]
; CHECK-NEXT: [[TMP143:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP127]]
; CHECK-NEXT: [[TMP144:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP128]]
; CHECK-NEXT: [[TMP145:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP129]]
@@ -472,11 +470,10 @@ define i1 @any_of_cost(ptr %start, ptr %end) #0 {
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP26:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <2 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP27:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 40
-; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 40
; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[OFFSET_IDX]], 80
; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[OFFSET_IDX]], 120
-; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP6]]
+; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP7]]
; CHECK-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP8]]
; CHECK-NEXT: [[NEXT_GEP6:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP9]]
diff --git a/llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll b/llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll
index 4423b89e981b9..7ec45bedb7eb5 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll
@@ -287,12 +287,10 @@ define void @drop_nonvector_nuw_nsw_avx1(ptr noalias nocapture readonly %input,
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[PTRS]], i64 0
-; CHECK-NEXT: [[TMP6:%.*]] = sub i64 0, 1
; CHECK-NEXT: [[TMP7:%.*]] = sub i64 1, 1
; CHECK-NEXT: [[TMP8:%.*]] = sub i64 2, 1
; CHECK-NEXT: [[TMP9:%.*]] = sub i64 3, 1
-; CHECK-NEXT: [[TMP10:%.*]] = getelementptr float, ptr [[INPUT]], i64 [[TMP6]]
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr float, ptr [[INPUT]], i64 -1
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr float, ptr [[INPUT]], i64 [[TMP7]]
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr float, ptr [[INPUT]], i64 [[TMP8]]
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr float, ptr [[INPUT]], i64 [[TMP9]]
@@ -300,11 +298,10 @@ define void @drop_nonvector_nuw_nsw_avx1(ptr noalias nocapture readonly %input,
; CHECK-NEXT: [[TMP15:%.*]] = insertelement <4 x ptr> [[TMP14]], ptr [[TMP11]], i32 1
; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x ptr> [[TMP15]], ptr [[TMP12]], i32 2
; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x ptr> [[TMP16]], ptr [[TMP13]], i32 3
-; CHECK-NEXT: store <4 x ptr> [[TMP17]], ptr [[TMP5]], align 8
+; CHECK-NEXT: store <4 x ptr> [[TMP17]], ptr [[PTRS]], align 8
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr align 4 [[TMP10]], <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> poison), !invariant.load [[META0]]
; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> [[WIDE_MASKED_LOAD]], <4 x float> zeroinitializer
-; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, ptr [[OUTPUT]], i64 0
-; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[TMP21]], align 4
+; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[OUTPUT]], align 4
; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]]
; CHECK: [[MIDDLE_BLOCK]]:
;
diff --git a/llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll
index 84579d97b38e2..86444d3354fe8 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll
@@ -354,7 +354,6 @@ define void @test_for_tried_to_force_scalar(ptr noalias %A, ptr noalias %B, ptr
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 1
; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 3
@@ -362,7 +361,7 @@ define void @test_for_tried_to_force_scalar(ptr noalias %A, ptr noalias %B, ptr
; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 5
; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 6
; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 7
-; CHECK-NEXT: [[TMP13:%.*]] = getelementptr nusw [3 x float], ptr [[A:%.*]], i64 [[TMP5]]
+; CHECK-NEXT: [[TMP13:%.*]] = getelementptr nusw [3 x float], ptr [[A:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr nusw [3 x float], ptr [[A]], i64 [[TMP6]]
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr nusw [3 x float], ptr [[A]], i64 [[TMP7]]
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr nusw [3 x float], ptr [[A]], i64 [[TMP8]]
diff --git a/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll b/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll
index 0bac10a41640e..aff665dad85a7 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll
@@ -151,9 +151,10 @@ define void @foo2(ptr noalias %in, ptr noalias %out, ptr noalias %trigger, ptr n
; FVW2-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE3:%.*]] ]
; FVW2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 16>, [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE3]] ]
; FVW2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX1]], 16
-; FVW2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
; FVW2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 16
-; FVW2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER:%.*]], i64 [[TMP0]]
+; FVW2-NEXT: [[TMP18:%.*]] = insertelement <2 x i64> poison, i64 [[OFFSET_IDX]], i32 0
+; FVW2-NEXT: [[TMP19:%.*]] = insertelement <2 x i64> [[TMP18]], i64 [[TMP1]], i32 1
+; FVW2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER:%.*]], i64 [[OFFSET_IDX]]
; FVW2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[TMP1]]
; FVW2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4
; FVW2-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 4
@@ -166,7 +167,7 @@ define void @foo2(ptr noalias %in, ptr noalias %out, ptr noalias %trigger, ptr n
; FVW2-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0
; FVW2-NEXT: br i1 [[TMP11]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; FVW2: pred.store.if:
-; FVW2-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[OUT:%.*]], i64 [[TMP0]]
+; FVW2-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[OUT:%.*]], i64 [[OFFSET_IDX]]
; FVW2-NEXT: [[TMP13:%.*]] = extractelement <2 x float> [[TMP10]], i32 0
; FVW2-NEXT: store float [[TMP13]], ptr [[TMP12]], align 4
; FVW2-NEXT: br label [[PRED_STORE_CONTINUE]]
@@ -266,9 +267,10 @@ define void @foo3(ptr noalias %in, ptr noalias %out, ptr noalias %trigger) {
; FVW2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ]
; FVW2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 16>, [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE2]] ]
; FVW2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 16
-; FVW2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
; FVW2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 16
-; FVW2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER:%.*]], i64 [[TMP0]]
+; FVW2-NEXT: [[TMP18:%.*]] = insertelement <2 x i64> poison, i64 [[OFFSET_IDX]], i32 0
+; FVW2-NEXT: [[TMP19:%.*]] = insertelement <2 x i64> [[TMP18]], i64 [[TMP1]], i32 1
+; FVW2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER:%.*]], i64 [[OFFSET_IDX]]
; FVW2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[TMP1]]
; FVW2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4
; FVW2-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 4
@@ -281,7 +283,7 @@ define void @foo3(ptr noalias %in, ptr noalias %out, ptr noalias %trigger) {
; FVW2-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0
; FVW2-NEXT: br i1 [[TMP11]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; FVW2: pred.store.if:
-; FVW2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_OUT:%.*]], ptr [[OUT:%.*]], i64 [[TMP0]], i32 1
+; FVW2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_OUT:%.*]], ptr [[OUT:%.*]], i64 [[OFFSET_IDX]], i32 1
; FVW2-NEXT: [[TMP13:%.*]] = extractelement <2 x float> [[TMP10]], i32 0
; FVW2-NEXT: store float [[TMP13]], ptr [[TMP12]], align 4
; FVW2-NEXT: br label [[PRED_STORE_CONTINUE]]
@@ -368,9 +370,10 @@ define void @foo2_addrspace(ptr addrspace(1) noalias %in, ptr addrspace(1) noali
; FVW2-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE3:%.*]] ]
; FVW2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 16>, [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE3]] ]
; FVW2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX1]], 16
-; FVW2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
; FVW2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 16
-; FVW2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER:%.*]], i64 [[TMP0]]
+; FVW2-NEXT: [[TMP18:%.*]] = insertelement <2 x i64> poison, i64 [[OFFSET_IDX]], i32 0
+; FVW2-NEXT: [[TMP19:%.*]] = insertelement <2 x i64> [[TMP18]], i64 [[TMP1]], i32 1
+; FVW2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER:%.*]], i64 [[OFFSET_IDX]]
; FVW2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[TMP1]]
; FVW2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4
; FVW2-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 4
@@ -383,7 +386,7 @@ define void @foo2_addrspace(ptr addrspace(1) noalias %in, ptr addrspace(1) noali
; FVW2-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0
; FVW2-NEXT: br i1 [[TMP11]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; FVW2: pred.store.if:
-; FVW2-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr addrspace(1) [[OUT:%.*]], i64 [[TMP0]]
+; FVW2-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr addrspace(1) [[OUT:%.*]], i64 [[OFFSET_IDX]]
; FVW2-NEXT: [[TMP13:%.*]] = extractelement <2 x float> [[TMP10]], i32 0
; FVW2-NEXT: store float [[TMP13]], ptr addrspace(1) [[TMP12]], align 4
; FVW2-NEXT: br label [[PRED_STORE_CONTINUE]]
@@ -469,9 +472,10 @@ define void @foo2_addrspace2(ptr addrspace(1) noalias %in, ptr addrspace(0) noal
; FVW2-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE3:%.*]] ]
; FVW2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 16>, [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE3]] ]
; FVW2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX1]], 16
-; FVW2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
; FVW2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 16
-; FVW2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER:%.*]], i64 [[TMP0]]
+; FVW2-NEXT: [[TMP18:%.*]] = insertelement <2 x i64> poison, i64 [[OFFSET_IDX]], i32 0
+; FVW2-NEXT: [[TMP19:%.*]] = insertelement <2 x i64> [[TMP18]], i64 [[TMP1]], i32 1
+; FVW2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER:%.*]], i64 [[OFFSET_IDX]]
; FVW2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[TMP1]]
; FVW2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4
; FVW2-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 4
@@ -484,7 +488,7 @@ define void @foo2_addrspace2(ptr addrspace(1) noalias %in, ptr addrspace(0) noal
; FVW2-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0
; FVW2-NEXT: br i1 [[TMP11]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; FVW2: pred.store.if:
-; FVW2-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[OUT:%.*]], i64 [[TMP0]]
+; FVW2-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[OUT:%.*]], i64 [[OFFSET_IDX]]
; FVW2-NEXT: [[TMP13:%.*]] = extractelement <2 x float> [[TMP10]], i32 0
; FVW2-NEXT: store float [[TMP13]], ptr [[TMP12]], align 4
; FVW2-NEXT: br label [[PRED_STORE_CONTINUE]]
@@ -570,9 +574,10 @@ define void @foo2_addrspace3(ptr addrspace(0) noalias %in, ptr addrspace(1) noal
; FVW2-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE3:%.*]] ]
; FVW2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 16>, [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE3]] ]
; FVW2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX1]], 16
-; FVW2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
; FVW2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 16
-; FVW2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER:%.*]], i64 [[TMP0]]
+; FVW2-NEXT: [[TMP18:%.*]] = insertelement <2 x i64> poison, i64 [[OFFSET_IDX]], i32 0
+; FVW2-NEXT: [[TMP19:%.*]] = insertelement <2 x i64> [[TMP18]], i64 [[TMP1]], i32 1
+; FVW2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER:%.*]], i64 [[OFFSET_IDX]]
; FVW2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[TMP1]]
; FVW2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4
; FVW2-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 4
@@ -585,7 +590,7 @@ define void @foo2_addrspace3(ptr addrspace(0) noalias %in, ptr addrspace(1) noal
; FVW2-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0
; FVW2-NEXT: br i1 [[TMP11]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; FVW2: pred.store.if:
-; FVW2-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr addrspace(1) [[OUT:%.*]], i64 [[TMP0]]
+; FVW2-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr addrspace(1) [[OUT:%.*]], i64 [[OFFSET_IDX]]
; FVW2-NEXT: [[TMP13:%.*]] = extractelement <2 x float> [[TMP10]], i32 0
; FVW2-NEXT: store float [[TMP13]], ptr addrspace(1) [[TMP12]], align 4
; FVW2-NEXT: br label [[PRED_STORE_CONTINUE]]
@@ -779,9 +784,8 @@ define void @test_gather_not_profitable_pr48429(i32 %d, ptr readonly noalias %pt
; FVW2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 4
; FVW2-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[OFFSET_IDX]]
; FVW2-NEXT: [[OFFSET_IDX9:%.*]] = mul i64 [[INDEX]], 64
-; FVW2-NEXT: [[TMP17:%.*]] = add i64 [[OFFSET_IDX9]], 0
; FVW2-NEXT: [[TMP18:%.*]] = add i64 [[OFFSET_IDX9]], 64
-; FVW2-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[DEST]], i64 [[TMP17]]
+; FVW2-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[DEST]], i64 [[OFFSET_IDX9]]
; FVW2-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[DEST]], i64 [[TMP18]]
; FVW2-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[IDXPROM]]
; FVW2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[TMP21]], align 4, !alias.scope [[META8:![0-9]+]]
diff --git a/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll b/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll
index 04bff3c393f62..aed00abdbc3fd 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll
@@ -255,7 +255,6 @@ define void @multiple_pointer_ivs_with_scalar_uses_only(ptr %A, ptr %B) #0 {
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <16 x i32> [ <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 2048>, [[VECTOR_PH]] ], [ [[TMP22:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
-; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6
@@ -271,7 +270,7 @@ define void @multiple_pointer_ivs_with_scalar_uses_only(ptr %A, ptr %B) #0 {
; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[OFFSET_IDX]], 26
; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], 28
; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[OFFSET_IDX]], 30
-; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP0]]
+; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[NEXT_GEP7:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP1]]
; CHECK-NEXT: [[NEXT_GEP8:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP2]]
; CHECK-NEXT: [[NEXT_GEP9:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP3]]
diff --git a/llvm/test/Transforms/LoopVectorize/X86/interleave-ptradd-with-replicated-operand.ll b/llvm/test/Transforms/LoopVectorize/X86/interleave-ptradd-with-replicated-operand.ll
index 14fb2a76eb75b..fbdc342f0b7f3 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/interleave-ptradd-with-replicated-operand.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/interleave-ptradd-with-replicated-operand.ll
@@ -16,7 +16,6 @@ define ptr @test_interleave_ptradd_with_replicated_op(ptr %m) #0 {
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8
-; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 8
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 16
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 24
@@ -32,7 +31,7 @@ define ptr @test_interleave_ptradd_with_replicated_op(ptr %m) #0 {
; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[OFFSET_IDX]], 104
; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], 112
; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[OFFSET_IDX]], 120
-; CHECK-NEXT: [[NEXT_GEP12:%.*]] = getelementptr i8, ptr [[M]], i64 [[TMP0]]
+; CHECK-NEXT: [[NEXT_GEP12:%.*]] = getelementptr i8, ptr [[M]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[M]], i64 [[TMP1]]
; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[M]], i64 [[TMP2]]
; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[M]], i64 [[TMP3]]
diff --git a/llvm/test/Transforms/LoopVectorize/X86/interleaved-accesses-hoist-load-across-store.ll b/llvm/test/Transforms/LoopVectorize/X86/interleaved-accesses-hoist-load-across-store.ll
index 829fdff5123e2..297d313404679 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/interleaved-accesses-hoist-load-across-store.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/interleaved-accesses-hoist-load-across-store.ll
@@ -20,43 +20,42 @@ define void @pr63602_1(ptr %arr) {
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]]
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[INDEX]], 3
; CHECK-NEXT: [[OFFSET_IDX1:%.*]] = add i64 4, [[TMP1]]
-; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX1]], 0
-; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX1]], 3
-; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX1]], 6
-; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX1]], 9
-; CHECK-NEXT: [[TMP6:%.*]] = add nuw nsw i64 [[OFFSET_IDX]], 4
-; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP6]]
-; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <12 x i32>, ptr [[TMP7]], align 4
+; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX1]], 3
+; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX1]], 6
+; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX1]], 9
+; CHECK-NEXT: [[TMP5:%.*]] = add nuw nsw i64 [[OFFSET_IDX]], 4
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP5]]
+; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <12 x i32>, ptr [[TMP6]], align 4
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <12 x i32> [[WIDE_VEC]], <12 x i32> poison, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
-; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i32> [[STRIDED_VEC]], i32 0
-; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[STRIDED_VEC]], i32 1
-; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i32> [[STRIDED_VEC]], i32 2
-; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i32> [[STRIDED_VEC]], i32 3
+; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[STRIDED_VEC]], i32 0
+; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i32> [[STRIDED_VEC]], i32 1
+; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[STRIDED_VEC]], i32 2
+; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i32> [[STRIDED_VEC]], i32 3
+; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[OFFSET_IDX1]]
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP2]]
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP3]]
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP4]]
-; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP5]]
+; CHECK-NEXT: store i32 [[TMP7]], ptr [[TMP11]], align 4
; CHECK-NEXT: store i32 [[TMP8]], ptr [[TMP12]], align 4
; CHECK-NEXT: store i32 [[TMP9]], ptr [[TMP13]], align 4
; CHECK-NEXT: store i32 [[TMP10]], ptr [[TMP14]], align 4
-; CHECK-NEXT: store i32 [[TMP11]], ptr [[TMP15]], align 4
-; CHECK-NEXT: [[TMP16:%.*]] = add nuw nsw i64 [[OFFSET_IDX]], 2
-; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP16]]
-; CHECK-NEXT: [[WIDE_VEC2:%.*]] = load <12 x i32>, ptr [[TMP17]], align 4
+; CHECK-NEXT: [[TMP15:%.*]] = add nuw nsw i64 [[OFFSET_IDX]], 2
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP15]]
+; CHECK-NEXT: [[WIDE_VEC2:%.*]] = load <12 x i32>, ptr [[TMP16]], align 4
; CHECK-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <12 x i32> [[WIDE_VEC2]], <12 x i32> poison, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
; CHECK-NEXT: [[STRIDED_VEC4:%.*]] = shufflevector <12 x i32> [[WIDE_VEC2]], <12 x i32> poison, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
-; CHECK-NEXT: [[TMP18:%.*]] = add <4 x i32> [[STRIDED_VEC4]], [[STRIDED_VEC3]]
-; CHECK-NEXT: [[TMP19:%.*]] = extractelement <4 x i32> [[TMP18]], i32 0
-; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i32> [[TMP18]], i32 1
-; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x i32> [[TMP18]], i32 2
-; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x i32> [[TMP18]], i32 3
+; CHECK-NEXT: [[TMP17:%.*]] = add <4 x i32> [[STRIDED_VEC4]], [[STRIDED_VEC3]]
+; CHECK-NEXT: [[TMP18:%.*]] = extractelement <4 x i32> [[TMP17]], i32 0
+; CHECK-NEXT: [[TMP19:%.*]] = extractelement <4 x i32> [[TMP17]], i32 1
+; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i32> [[TMP17]], i32 2
+; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x i32> [[TMP17]], i32 3
+; CHECK-NEXT: store i32 [[TMP18]], ptr [[TMP11]], align 4
; CHECK-NEXT: store i32 [[TMP19]], ptr [[TMP12]], align 4
; CHECK-NEXT: store i32 [[TMP20]], ptr [[TMP13]], align 4
; CHECK-NEXT: store i32 [[TMP21]], ptr [[TMP14]], align 4
-; CHECK-NEXT: store i32 [[TMP22]], ptr [[TMP15]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
-; CHECK-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
+; CHECK-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[SCALAR_PH:%.*]]
; CHECK: scalar.ph:
@@ -127,68 +126,66 @@ define void @pr63602_2(ptr %arr) {
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 3
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]]
-; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
-; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3
-; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6
-; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 9
-; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[INDEX]], 3
-; CHECK-NEXT: [[OFFSET_IDX1:%.*]] = add i64 4, [[TMP5]]
-; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX1]], 0
-; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX1]], 3
-; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[OFFSET_IDX1]], 6
-; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[OFFSET_IDX1]], 9
-; CHECK-NEXT: [[TMP10:%.*]] = add nuw nsw i64 [[TMP1]], 4
-; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP10]]
-; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <12 x i32>, ptr [[TMP11]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3
+; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 6
+; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 9
+; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[INDEX]], 3
+; CHECK-NEXT: [[OFFSET_IDX1:%.*]] = add i64 4, [[TMP4]]
+; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX1]], 3
+; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX1]], 6
+; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX1]], 9
+; CHECK-NEXT: [[TMP8:%.*]] = add nuw nsw i64 [[OFFSET_IDX]], 4
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP8]]
+; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <12 x i32>, ptr [[TMP9]], align 4
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <12 x i32> [[WIDE_VEC]], <12 x i32> poison, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
-; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i32> [[STRIDED_VEC]], i32 0
-; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i32> [[STRIDED_VEC]], i32 1
-; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i32> [[STRIDED_VEC]], i32 2
-; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i32> [[STRIDED_VEC]], i32 3
+; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i32> [[STRIDED_VEC]], i32 0
+; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i32> [[STRIDED_VEC]], i32 1
+; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i32> [[STRIDED_VEC]], i32 2
+; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i32> [[STRIDED_VEC]], i32 3
+; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[OFFSET_IDX1]]
+; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP5]]
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP6]]
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP7]]
-; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP8]]
-; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP9]]
+; CHECK-NEXT: store i32 [[TMP10]], ptr [[TMP14]], align 4
+; CHECK-NEXT: store i32 [[TMP11]], ptr [[TMP15]], align 4
; CHECK-NEXT: store i32 [[TMP12]], ptr [[TMP16]], align 4
; CHECK-NEXT: store i32 [[TMP13]], ptr [[TMP17]], align 4
-; CHECK-NEXT: store i32 [[TMP14]], ptr [[TMP18]], align 4
-; CHECK-NEXT: store i32 [[TMP15]], ptr [[TMP19]], align 4
-; CHECK-NEXT: [[TMP20:%.*]] = add nuw nsw i64 [[TMP1]], 2
-; CHECK-NEXT: [[TMP21:%.*]] = add nuw nsw i64 [[TMP2]], 2
-; CHECK-NEXT: [[TMP22:%.*]] = add nuw nsw i64 [[TMP3]], 2
-; CHECK-NEXT: [[TMP23:%.*]] = add nuw nsw i64 [[TMP4]], 2
+; CHECK-NEXT: [[TMP18:%.*]] = add nuw nsw i64 [[OFFSET_IDX]], 2
+; CHECK-NEXT: [[TMP19:%.*]] = add nuw nsw i64 [[TMP1]], 2
+; CHECK-NEXT: [[TMP20:%.*]] = add nuw nsw i64 [[TMP2]], 2
+; CHECK-NEXT: [[TMP21:%.*]] = add nuw nsw i64 [[TMP3]], 2
+; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP18]]
+; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP19]]
; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP20]]
; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP21]]
-; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP22]]
-; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP23]]
+; CHECK-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP14]], align 4
+; CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP15]], align 4
; CHECK-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP16]], align 4
; CHECK-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP17]], align 4
-; CHECK-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP18]], align 4
-; CHECK-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP19]], align 4
-; CHECK-NEXT: [[TMP32:%.*]] = insertelement <4 x i32> poison, i32 [[TMP28]], i32 0
-; CHECK-NEXT: [[TMP33:%.*]] = insertelement <4 x i32> [[TMP32]], i32 [[TMP29]], i32 1
-; CHECK-NEXT: [[TMP34:%.*]] = insertelement <4 x i32> [[TMP33]], i32 [[TMP30]], i32 2
-; CHECK-NEXT: [[TMP35:%.*]] = insertelement <4 x i32> [[TMP34]], i32 [[TMP31]], i32 3
+; CHECK-NEXT: [[TMP30:%.*]] = insertelement <4 x i32> poison, i32 [[TMP26]], i32 0
+; CHECK-NEXT: [[TMP31:%.*]] = insertelement <4 x i32> [[TMP30]], i32 [[TMP27]], i32 1
+; CHECK-NEXT: [[TMP32:%.*]] = insertelement <4 x i32> [[TMP31]], i32 [[TMP28]], i32 2
+; CHECK-NEXT: [[TMP33:%.*]] = insertelement <4 x i32> [[TMP32]], i32 [[TMP29]], i32 3
+; CHECK-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP22]], align 4
+; CHECK-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP23]], align 4
; CHECK-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP24]], align 4
; CHECK-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP25]], align 4
-; CHECK-NEXT: [[TMP38:%.*]] = load i32, ptr [[TMP26]], align 4
-; CHECK-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP27]], align 4
-; CHECK-NEXT: [[TMP40:%.*]] = insertelement <4 x i32> poison, i32 [[TMP36]], i32 0
-; CHECK-NEXT: [[TMP41:%.*]] = insertelement <4 x i32> [[TMP40]], i32 [[TMP37]], i32 1
-; CHECK-NEXT: [[TMP42:%.*]] = insertelement <4 x i32> [[TMP41]], i32 [[TMP38]], i32 2
-; CHECK-NEXT: [[TMP43:%.*]] = insertelement <4 x i32> [[TMP42]], i32 [[TMP39]], i32 3
-; CHECK-NEXT: [[TMP44:%.*]] = add <4 x i32> [[TMP35]], [[TMP43]]
-; CHECK-NEXT: [[TMP45:%.*]] = extractelement <4 x i32> [[TMP44]], i32 0
-; CHECK-NEXT: [[TMP46:%.*]] = extractelement <4 x i32> [[TMP44]], i32 1
-; CHECK-NEXT: [[TMP47:%.*]] = extractelement <4 x i32> [[TMP44]], i32 2
-; CHECK-NEXT: [[TMP48:%.*]] = extractelement <4 x i32> [[TMP44]], i32 3
+; CHECK-NEXT: [[TMP38:%.*]] = insertelement <4 x i32> poison, i32 [[TMP34]], i32 0
+; CHECK-NEXT: [[TMP39:%.*]] = insertelement <4 x i32> [[TMP38]], i32 [[TMP35]], i32 1
+; CHECK-NEXT: [[TMP40:%.*]] = insertelement <4 x i32> [[TMP39]], i32 [[TMP36]], i32 2
+; CHECK-NEXT: [[TMP41:%.*]] = insertelement <4 x i32> [[TMP40]], i32 [[TMP37]], i32 3
+; CHECK-NEXT: [[TMP42:%.*]] = add <4 x i32> [[TMP33]], [[TMP41]]
+; CHECK-NEXT: [[TMP43:%.*]] = extractelement <4 x i32> [[TMP42]], i32 0
+; CHECK-NEXT: [[TMP44:%.*]] = extractelement <4 x i32> [[TMP42]], i32 1
+; CHECK-NEXT: [[TMP45:%.*]] = extractelement <4 x i32> [[TMP42]], i32 2
+; CHECK-NEXT: [[TMP46:%.*]] = extractelement <4 x i32> [[TMP42]], i32 3
+; CHECK-NEXT: store i32 [[TMP43]], ptr [[TMP14]], align 4
+; CHECK-NEXT: store i32 [[TMP44]], ptr [[TMP15]], align 4
; CHECK-NEXT: store i32 [[TMP45]], ptr [[TMP16]], align 4
; CHECK-NEXT: store i32 [[TMP46]], ptr [[TMP17]], align 4
-; CHECK-NEXT: store i32 [[TMP47]], ptr [[TMP18]], align 4
-; CHECK-NEXT: store i32 [[TMP48]], ptr [[TMP19]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT: [[TMP49:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
-; CHECK-NEXT: br i1 [[TMP49]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK-NEXT: [[TMP47:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
+; CHECK-NEXT: br i1 [[TMP47]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[SCALAR_PH:%.*]]
; CHECK: scalar.ph:
diff --git a/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll b/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll
index 78363e13595cb..92ab4c23d43e6 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll
@@ -106,7 +106,6 @@ define i32 @test_explicit_pred_generic(i64 %len, ptr %test_base) {
; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP73:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP74:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP75:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
@@ -122,7 +121,7 @@ define i32 @test_explicit_pred_generic(i64 %len, ptr %test_base) {
; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 13
; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 14
; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 15
-; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP1]]
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP2]]
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP3]]
@@ -170,7 +169,7 @@ define i32 @test_explicit_pred_generic(i64 %len, ptr %test_base) {
; CHECK-NEXT: [[TMP61:%.*]] = insertelement <4 x i1> [[TMP60]], i1 [[TMP57]], i32 1
; CHECK-NEXT: [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2
; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3
-; CHECK-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr [[ALLOCA]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr [[ALLOCA]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP69:%.*]] = getelementptr i32, ptr [[TMP64]], i64 4
; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[TMP64]], i64 8
; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr [[TMP64]], i64 12
@@ -241,7 +240,6 @@ define i32 @test_invariant_address(i64 %len, ptr %test_base) {
; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP97:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP98:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP99:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
@@ -257,7 +255,7 @@ define i32 @test_invariant_address(i64 %len, ptr %test_base) {
; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 13
; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 14
; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 15
-; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP1]]
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP2]]
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP3]]
@@ -396,23 +394,38 @@ define i32 @test_step_narrower_than_access(i64 %len, ptr %test_base) {
; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP145:%.*]], [[PRED_LOAD_CONTINUE33]] ]
; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP146:%.*]], [[PRED_LOAD_CONTINUE33]] ]
; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP147:%.*]], [[PRED_LOAD_CONTINUE33]] ]
-; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
+; CHECK-NEXT: [[TMP64:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX]], i32 0
+; CHECK-NEXT: [[TMP69:%.*]] = insertelement <4 x i64> [[TMP64]], i64 [[TMP1]], i32 1
+; CHECK-NEXT: [[TMP74:%.*]] = insertelement <4 x i64> [[TMP69]], i64 [[TMP2]], i32 2
+; CHECK-NEXT: [[TMP79:%.*]] = insertelement <4 x i64> [[TMP74]], i64 [[TMP3]], i32 3
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 5
; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 6
; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 7
+; CHECK-NEXT: [[TMP84:%.*]] = insertelement <4 x i64> poison, i64 [[TMP4]], i32 0
+; CHECK-NEXT: [[TMP89:%.*]] = insertelement <4 x i64> [[TMP84]], i64 [[TMP5]], i32 1
+; CHECK-NEXT: [[TMP94:%.*]] = insertelement <4 x i64> [[TMP89]], i64 [[TMP6]], i32 2
+; CHECK-NEXT: [[TMP99:%.*]] = insertelement <4 x i64> [[TMP94]], i64 [[TMP7]], i32 3
; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 8
; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 9
; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 10
; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 11
+; CHECK-NEXT: [[TMP104:%.*]] = insertelement <4 x i64> poison, i64 [[TMP8]], i32 0
+; CHECK-NEXT: [[TMP109:%.*]] = insertelement <4 x i64> [[TMP104]], i64 [[TMP9]], i32 1
+; CHECK-NEXT: [[TMP114:%.*]] = insertelement <4 x i64> [[TMP109]], i64 [[TMP10]], i32 2
+; CHECK-NEXT: [[TMP119:%.*]] = insertelement <4 x i64> [[TMP114]], i64 [[TMP11]], i32 3
; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 12
; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 13
; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 14
; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 15
-; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP124:%.*]] = insertelement <4 x i64> poison, i64 [[TMP12]], i32 0
+; CHECK-NEXT: [[TMP129:%.*]] = insertelement <4 x i64> [[TMP124]], i64 [[TMP13]], i32 1
+; CHECK-NEXT: [[TMP134:%.*]] = insertelement <4 x i64> [[TMP129]], i64 [[TMP14]], i32 2
+; CHECK-NEXT: [[TMP139:%.*]] = insertelement <4 x i64> [[TMP134]], i64 [[TMP15]], i32 3
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP1]]
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP2]]
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP3]]
@@ -462,7 +475,7 @@ define i32 @test_step_narrower_than_access(i64 %len, ptr %test_base) {
; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3
; CHECK-NEXT: br i1 [[TMP32]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
; CHECK: pred.load.if:
-; CHECK-NEXT: [[TMP65:%.*]] = getelementptr inbounds i16, ptr [[ALLOCA]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP65:%.*]] = getelementptr inbounds i16, ptr [[ALLOCA]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP66:%.*]] = load i32, ptr [[TMP65]], align 4
; CHECK-NEXT: [[TMP67:%.*]] = insertelement <4 x i32> poison, i32 [[TMP66]], i32 0
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]]
@@ -653,7 +666,6 @@ define i32 @test_max_trip_count(i64 %len, ptr %test_base, i64 %n) {
; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP74:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP75:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP76:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 1
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 3
@@ -669,7 +681,7 @@ define i32 @test_max_trip_count(i64 %len, ptr %test_base, i64 %n) {
; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 13
; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 14
; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[INDEX]], 15
-; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[TMP1]]
+; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP2]]
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP3]]
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP4]]
@@ -717,7 +729,7 @@ define i32 @test_max_trip_count(i64 %len, ptr %test_base, i64 %n) {
; CHECK-NEXT: [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 1
; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 2
; CHECK-NEXT: [[TMP64:%.*]] = insertelement <4 x i1> [[TMP63]], i1 [[TMP60]], i32 3
-; CHECK-NEXT: [[TMP65:%.*]] = getelementptr i32, ptr [[ALLOCA]], i64 [[TMP1]]
+; CHECK-NEXT: [[TMP65:%.*]] = getelementptr i32, ptr [[ALLOCA]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[TMP65]], i64 4
; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr [[TMP65]], i64 8
; CHECK-NEXT: [[TMP72:%.*]] = getelementptr i32, ptr [[TMP65]], i64 12
@@ -812,7 +824,6 @@ define i32 @test_non_zero_start(i64 %len, ptr %test_base) {
; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP74:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP75:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1024, [[INDEX]]
-; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 1
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 3
@@ -828,7 +839,7 @@ define i32 @test_non_zero_start(i64 %len, ptr %test_base) {
; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[OFFSET_IDX]], 13
; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], 14
; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[OFFSET_IDX]], 15
-; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP1]]
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP2]]
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP3]]
@@ -876,7 +887,7 @@ define i32 @test_non_zero_start(i64 %len, ptr %test_base) {
; CHECK-NEXT: [[TMP61:%.*]] = insertelement <4 x i1> [[TMP60]], i1 [[TMP57]], i32 1
; CHECK-NEXT: [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2
; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3
-; CHECK-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr [[ALLOCA]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr [[ALLOCA]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[TMP69:%.*]] = getelementptr i32, ptr [[TMP64]], i64 4
; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[TMP64]], i64 8
; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr [[TMP64]], i64 12
@@ -995,7 +1006,6 @@ define i32 @test_non_unit_stride(i64 %len, ptr %test_base) {
; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP114:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP115:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
-; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6
@@ -1011,7 +1021,7 @@ define i32 @test_non_unit_stride(i64 %len, ptr %test_base) {
; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[OFFSET_IDX]], 26
; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], 28
; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[OFFSET_IDX]], 30
-; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP1]]
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP2]]
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP3]]
@@ -1059,7 +1069,7 @@ define i32 @test_non_unit_stride(i64 %len, ptr %test_base) {
; CHECK-NEXT: [[TMP61:%.*]] = insertelement <4 x i1> [[TMP60]], i1 [[TMP57]], i32 1
; CHECK-NEXT: [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2
; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3
-; CHECK-NEXT: [[TMP64:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP64:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[TMP65:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP1]]
; CHECK-NEXT: [[TMP66:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP2]]
; CHECK-NEXT: [[TMP67:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP3]]
@@ -1166,7 +1176,6 @@ define i32 @neg_off_by_many(i64 %len, ptr %test_base) {
; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP73:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP74:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP75:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
@@ -1182,7 +1191,7 @@ define i32 @neg_off_by_many(i64 %len, ptr %test_base) {
; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 13
; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 14
; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 15
-; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP1]]
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP2]]
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP3]]
@@ -1230,7 +1239,7 @@ define i32 @neg_off_by_many(i64 %len, ptr %test_base) {
; CHECK-NEXT: [[TMP61:%.*]] = insertelement <4 x i1> [[TMP60]], i1 [[TMP57]], i32 1
; CHECK-NEXT: [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2
; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3
-; CHECK-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr [[ALLOCA]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr [[ALLOCA]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP69:%.*]] = getelementptr i32, ptr [[TMP64]], i64 4
; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[TMP64]], i64 8
; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr [[TMP64]], i64 12
@@ -1297,7 +1306,6 @@ define i32 @neg_off_by_one_iteration(i64 %len, ptr %test_base) {
; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP73:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP74:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP75:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
@@ -1313,7 +1321,7 @@ define i32 @neg_off_by_one_iteration(i64 %len, ptr %test_base) {
; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 13
; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 14
; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 15
-; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP1]]
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP2]]
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP3]]
@@ -1361,7 +1369,7 @@ define i32 @neg_off_by_one_iteration(i64 %len, ptr %test_base) {
; CHECK-NEXT: [[TMP61:%.*]] = insertelement <4 x i1> [[TMP60]], i1 [[TMP57]], i32 1
; CHECK-NEXT: [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2
; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3
-; CHECK-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr [[ALLOCA]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr [[ALLOCA]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP69:%.*]] = getelementptr i32, ptr [[TMP64]], i64 4
; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[TMP64]], i64 8
; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr [[TMP64]], i64 12
@@ -1428,7 +1436,6 @@ define i32 @neg_off_by_one_byte(i64 %len, ptr %test_base) {
; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP73:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP74:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP75:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
@@ -1444,7 +1451,7 @@ define i32 @neg_off_by_one_byte(i64 %len, ptr %test_base) {
; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 13
; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 14
; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 15
-; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP1]]
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP2]]
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP3]]
@@ -1492,7 +1499,7 @@ define i32 @neg_off_by_one_byte(i64 %len, ptr %test_base) {
; CHECK-NEXT: [[TMP61:%.*]] = insertelement <4 x i1> [[TMP60]], i1 [[TMP57]], i32 1
; CHECK-NEXT: [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2
; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3
-; CHECK-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr [[ALLOCA]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr [[ALLOCA]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP69:%.*]] = getelementptr i32, ptr [[TMP64]], i64 4
; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[TMP64]], i64 8
; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr [[TMP64]], i64 12
@@ -1568,7 +1575,6 @@ define i32 @test_constant_max(i64 %len, ptr %test_base) {
; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP74:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP75:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP76:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 1
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 3
@@ -1584,7 +1590,7 @@ define i32 @test_constant_max(i64 %len, ptr %test_base) {
; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 13
; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 14
; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[INDEX]], 15
-; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[TMP1]]
+; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP2]]
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP3]]
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP4]]
@@ -1632,7 +1638,7 @@ define i32 @test_constant_max(i64 %len, ptr %test_base) {
; CHECK-NEXT: [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 1
; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 2
; CHECK-NEXT: [[TMP64:%.*]] = insertelement <4 x i1> [[TMP63]], i1 [[TMP60]], i32 3
-; CHECK-NEXT: [[TMP65:%.*]] = getelementptr i32, ptr [[ALLOCA]], i64 [[TMP1]]
+; CHECK-NEXT: [[TMP65:%.*]] = getelementptr i32, ptr [[ALLOCA]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[TMP65]], i64 4
; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr [[TMP65]], i64 8
; CHECK-NEXT: [[TMP72:%.*]] = getelementptr i32, ptr [[TMP65]], i64 12
@@ -1728,7 +1734,6 @@ define i32 @test_allocsize(i64 %len, ptr %test_base) nofree nosync {
; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP73:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP74:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP75:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
@@ -1744,7 +1749,7 @@ define i32 @test_allocsize(i64 %len, ptr %test_base) nofree nosync {
; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 13
; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 14
; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 15
-; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP1]]
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP2]]
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP3]]
@@ -1792,7 +1797,7 @@ define i32 @test_allocsize(i64 %len, ptr %test_base) nofree nosync {
; CHECK-NEXT: [[TMP61:%.*]] = insertelement <4 x i1> [[TMP60]], i1 [[TMP57]], i32 1
; CHECK-NEXT: [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2
; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3
-; CHECK-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr [[ALLOCATION]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr [[ALLOCATION]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP69:%.*]] = getelementptr i32, ptr [[TMP64]], i64 4
; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[TMP64]], i64 8
; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr [[TMP64]], i64 12
@@ -1860,7 +1865,6 @@ define i32 @test_allocsize_array(i64 %len, ptr %test_base) nofree nosync {
; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP73:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP74:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP75:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
@@ -1876,7 +1880,7 @@ define i32 @test_allocsize_array(i64 %len, ptr %test_base) nofree nosync {
; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 13
; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 14
; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 15
-; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP1]]
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP2]]
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP3]]
@@ -1924,7 +1928,7 @@ define i32 @test_allocsize_array(i64 %len, ptr %test_base) nofree nosync {
; CHECK-NEXT: [[TMP61:%.*]] = insertelement <4 x i1> [[TMP60]], i1 [[TMP57]], i32 1
; CHECK-NEXT: [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2
; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3
-; CHECK-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr [[ALLOCATION]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr [[ALLOCATION]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP69:%.*]] = getelementptr i32, ptr [[TMP64]], i64 4
; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[TMP64]], i64 8
; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr [[TMP64]], i64 12
@@ -2002,7 +2006,6 @@ define i32 @test_allocsize_cond_deref(i1 %allzero, ptr %test_base) {
; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP73:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP74:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP75:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
@@ -2018,7 +2021,7 @@ define i32 @test_allocsize_cond_deref(i1 %allzero, ptr %test_base) {
; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 13
; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 14
; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 15
-; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP1]]
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP2]]
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP3]]
@@ -2066,7 +2069,7 @@ define i32 @test_allocsize_cond_deref(i1 %allzero, ptr %test_base) {
; CHECK-NEXT: [[TMP61:%.*]] = insertelement <4 x i1> [[TMP60]], i1 [[TMP57]], i32 1
; CHECK-NEXT: [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2
; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3
-; CHECK-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr [[ALLOCATION]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr [[ALLOCATION]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP69:%.*]] = getelementptr i32, ptr [[TMP64]], i64 4
; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[TMP64]], i64 8
; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr [[TMP64]], i64 12
@@ -2142,7 +2145,6 @@ define i32 @test_stride_three(i64 %len, ptr %test_base) {
; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP114:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP115:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3
-; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 6
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 9
@@ -2158,7 +2160,7 @@ define i32 @test_stride_three(i64 %len, ptr %test_base) {
; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[OFFSET_IDX]], 39
; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], 42
; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[OFFSET_IDX]], 45
-; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP1]]
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP2]]
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP3]]
@@ -2206,7 +2208,7 @@ define i32 @test_stride_three(i64 %len, ptr %test_base) {
; CHECK-NEXT: [[TMP61:%.*]] = insertelement <4 x i1> [[TMP60]], i1 [[TMP57]], i32 1
; CHECK-NEXT: [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2
; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3
-; CHECK-NEXT: [[TMP64:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP64:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[TMP65:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP1]]
; CHECK-NEXT: [[TMP66:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP2]]
; CHECK-NEXT: [[TMP67:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP3]]
@@ -2331,7 +2333,6 @@ define i32 @test_non_unit_stride_four(i64 %len, ptr %test_base) {
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP56:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP57:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 4
-; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 4
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 8
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 12
@@ -2339,7 +2340,7 @@ define i32 @test_non_unit_stride_four(i64 %len, ptr %test_base) {
; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 20
; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 24
; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 28
-; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP1]]
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP2]]
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP3]]
@@ -2363,7 +2364,7 @@ define i32 @test_non_unit_stride_four(i64 %len, ptr %test_base) {
; CHECK-NEXT: [[TMP29:%.*]] = insertelement <4 x i1> [[TMP28]], i1 [[TMP25]], i32 1
; CHECK-NEXT: [[TMP30:%.*]] = insertelement <4 x i1> [[TMP29]], i1 [[TMP26]], i32 2
; CHECK-NEXT: [[TMP31:%.*]] = insertelement <4 x i1> [[TMP30]], i1 [[TMP27]], i32 3
-; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP1]]
; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP2]]
; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP3]]
@@ -2460,7 +2461,6 @@ define i32 @test_non_unit_stride_five(i64 %len, ptr %test_base) {
; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP114:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP115:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 5
-; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 5
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 10
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 15
@@ -2476,7 +2476,7 @@ define i32 @test_non_unit_stride_five(i64 %len, ptr %test_base) {
; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[OFFSET_IDX]], 65
; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], 70
; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[OFFSET_IDX]], 75
-; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP1]]
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP2]]
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP3]]
@@ -2524,7 +2524,7 @@ define i32 @test_non_unit_stride_five(i64 %len, ptr %test_base) {
; CHECK-NEXT: [[TMP61:%.*]] = insertelement <4 x i1> [[TMP60]], i1 [[TMP57]], i32 1
; CHECK-NEXT: [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2
; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3
-; CHECK-NEXT: [[TMP64:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP64:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[TMP65:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP1]]
; CHECK-NEXT: [[TMP66:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP2]]
; CHECK-NEXT: [[TMP67:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP3]]
@@ -2650,7 +2650,6 @@ define i32 @test_non_unit_stride_off_by_four_bytes(i64 %len, ptr %test_base) {
; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP114:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP115:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
-; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6
@@ -2666,7 +2665,7 @@ define i32 @test_non_unit_stride_off_by_four_bytes(i64 %len, ptr %test_base) {
; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[OFFSET_IDX]], 26
; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], 28
; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[OFFSET_IDX]], 30
-; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP1]]
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP2]]
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP3]]
@@ -2714,7 +2713,7 @@ define i32 @test_non_unit_stride_off_by_four_bytes(i64 %len, ptr %test_base) {
; CHECK-NEXT: [[TMP61:%.*]] = insertelement <4 x i1> [[TMP60]], i1 [[TMP57]], i32 1
; CHECK-NEXT: [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2
; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3
-; CHECK-NEXT: [[TMP64:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP64:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[TMP65:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP1]]
; CHECK-NEXT: [[TMP66:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP2]]
; CHECK-NEXT: [[TMP67:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP3]]
@@ -2842,7 +2841,6 @@ define i32 @test_non_unit_stride_with_first_iteration_step_access(i64 %len, ptr
; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP130:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP131:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
-; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6
@@ -2858,7 +2856,7 @@ define i32 @test_non_unit_stride_with_first_iteration_step_access(i64 %len, ptr
; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[OFFSET_IDX]], 26
; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], 28
; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[OFFSET_IDX]], 30
-; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[TMP0]], 2
+; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[OFFSET_IDX]], 2
; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[TMP1]], 2
; CHECK-NEXT: [[TMP18:%.*]] = add i64 [[TMP2]], 2
; CHECK-NEXT: [[TMP19:%.*]] = add i64 [[TMP3]], 2
@@ -2874,7 +2872,7 @@ define i32 @test_non_unit_stride_with_first_iteration_step_access(i64 %len, ptr
; CHECK-NEXT: [[TMP29:%.*]] = add i64 [[TMP13]], 2
; CHECK-NEXT: [[TMP30:%.*]] = add i64 [[TMP14]], 2
; CHECK-NEXT: [[TMP31:%.*]] = add i64 [[TMP15]], 2
-; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP1]]
; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP2]]
; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP3]]
diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr36524.ll b/llvm/test/Transforms/LoopVectorize/X86/pr36524.ll
index 1350e40c77e66..1396029b20c9f 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/pr36524.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/pr36524.ll
@@ -21,9 +21,6 @@ define void @foo(ptr %ptr, ptr %ptr.2) {
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 2, i64 3, i64 4, i64 5>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 2, [[INDEX]]
; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[OFFSET_IDX]] to i32
-; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[TMP0]], 0
-; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[TMP0]], 1
-; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP0]], 2
; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[TMP0]], 3
; CHECK-NEXT: store i32 [[TMP4]], ptr [[PTR_2]], align 4, !alias.scope [[META0:![0-9]+]], !noalias [[META3:![0-9]+]]
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[PTR]], i64 [[INDEX]]
diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr51366-sunk-instruction-used-outside-of-loop.ll b/llvm/test/Transforms/LoopVectorize/X86/pr51366-sunk-instruction-used-outside-of-loop.ll
index 34c54de2140cc..3cdffa8284ba9 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/pr51366-sunk-instruction-used-outside-of-loop.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/pr51366-sunk-instruction-used-outside-of-loop.ll
@@ -11,9 +11,8 @@ define ptr @test(ptr noalias %src, ptr noalias %dst) {
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2]] ]
-; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
-; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP1]]
; CHECK-NEXT: [[TMP16:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP6]], i32 0
; CHECK-NEXT: [[TMP18:%.*]] = insertelement <2 x ptr> [[TMP16]], ptr [[TMP2]], i32 1
@@ -35,7 +34,7 @@ define ptr @test(ptr noalias %src, ptr noalias %dst) {
; CHECK: [[PRED_LOAD_CONTINUE2]]:
; CHECK-NEXT: [[TMP15:%.*]] = phi <2 x i32> [ [[TMP9]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP12]], %[[PRED_LOAD_IF1]] ]
; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP4]], <2 x i32> [[TMP15]], <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[INDEX]]
; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP14]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 2)
diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr72969.ll b/llvm/test/Transforms/LoopVectorize/X86/pr72969.ll
index 368842634c374..b1b432cda3016 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/pr72969.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/pr72969.ll
@@ -57,11 +57,10 @@ define void @test(ptr %p) {
; VEC: vector.body:
; VEC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; VEC-NEXT: [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 1, i16 2, i16 3, i16 4>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; VEC-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 0
; VEC-NEXT: [[TMP16:%.*]] = add i64 [[INDEX]], 1
; VEC-NEXT: [[TMP17:%.*]] = add i64 [[INDEX]], 2
; VEC-NEXT: [[TMP18:%.*]] = add i64 [[INDEX]], 3
-; VEC-NEXT: [[TMP19:%.*]] = shl i64 [[TMP15]], 1
+; VEC-NEXT: [[TMP19:%.*]] = shl i64 [[INDEX]], 1
; VEC-NEXT: [[TMP20:%.*]] = shl i64 [[TMP16]], 1
; VEC-NEXT: [[TMP21:%.*]] = shl i64 [[TMP17]], 1
; VEC-NEXT: [[TMP22:%.*]] = shl i64 [[TMP18]], 1
diff --git a/llvm/test/Transforms/LoopVectorize/X86/replicating-load-store-costs.ll b/llvm/test/Transforms/LoopVectorize/X86/replicating-load-store-costs.ll
index 3813560d9300a..e7c786b6b6e9e 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/replicating-load-store-costs.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/replicating-load-store-costs.ll
@@ -28,7 +28,6 @@ define void @test_store_initially_interleave(i32 %n, ptr noalias %src) #0 {
; I64-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
; I64-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i32> [[STEP_ADD]], splat (i32 4)
; I64-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i32> [[STEP_ADD_2]], splat (i32 4)
-; I64-NEXT: [[IV:%.*]] = add i32 [[INDEX]], 0
; I64-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1
; I64-NEXT: [[TMP6:%.*]] = add i32 [[INDEX]], 2
; I64-NEXT: [[TMP7:%.*]] = add i32 [[INDEX]], 3
@@ -64,7 +63,7 @@ define void @test_store_initially_interleave(i32 %n, ptr noalias %src) #0 {
; I64-NEXT: [[TMP69:%.*]] = extractelement <4 x double> [[TMP23]], i32 1
; I64-NEXT: [[TMP70:%.*]] = extractelement <4 x double> [[TMP23]], i32 2
; I64-NEXT: [[TMP71:%.*]] = extractelement <4 x double> [[TMP23]], i32 3
-; I64-NEXT: [[ADD_PTR_I:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[IV]]
+; I64-NEXT: [[ADD_PTR_I:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[INDEX]]
; I64-NEXT: [[TMP25:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP5]]
; I64-NEXT: [[TMP26:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP6]]
; I64-NEXT: [[TMP27:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP7]]
@@ -134,7 +133,6 @@ define void @test_store_initially_interleave(i32 %n, ptr noalias %src) #0 {
; I64: [[VEC_EPILOG_VECTOR_BODY]]:
; I64-NEXT: [[INDEX4:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT6:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
; I64-NEXT: [[VEC_IND5:%.*]] = phi <4 x i32> [ [[INDUCTION]], %[[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT7:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
-; I64-NEXT: [[TMP75:%.*]] = add i32 [[INDEX4]], 0
; I64-NEXT: [[TMP76:%.*]] = add i32 [[INDEX4]], 1
; I64-NEXT: [[TMP77:%.*]] = add i32 [[INDEX4]], 2
; I64-NEXT: [[TMP78:%.*]] = add i32 [[INDEX4]], 3
@@ -143,7 +141,7 @@ define void @test_store_initially_interleave(i32 %n, ptr noalias %src) #0 {
; I64-NEXT: [[TMP89:%.*]] = extractelement <4 x double> [[TMP79]], i32 1
; I64-NEXT: [[TMP90:%.*]] = extractelement <4 x double> [[TMP79]], i32 2
; I64-NEXT: [[TMP91:%.*]] = extractelement <4 x double> [[TMP79]], i32 3
-; I64-NEXT: [[TMP84:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP75]]
+; I64-NEXT: [[TMP84:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[INDEX4]]
; I64-NEXT: [[TMP85:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP76]]
; I64-NEXT: [[TMP86:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP77]]
; I64-NEXT: [[TMP93:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP78]]
@@ -184,7 +182,6 @@ define void @test_store_initially_interleave(i32 %n, ptr noalias %src) #0 {
; I32-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
; I32-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i32> [[STEP_ADD]], splat (i32 4)
; I32-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i32> [[STEP_ADD_2]], splat (i32 4)
-; I32-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 0
; I32-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 1
; I32-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 2
; I32-NEXT: [[TMP6:%.*]] = add i32 [[INDEX]], 3
@@ -220,7 +217,7 @@ define void @test_store_initially_interleave(i32 %n, ptr noalias %src) #0 {
; I32-NEXT: [[TMP68:%.*]] = extractelement <4 x double> [[TMP55]], i32 1
; I32-NEXT: [[TMP69:%.*]] = extractelement <4 x double> [[TMP55]], i32 2
; I32-NEXT: [[TMP70:%.*]] = extractelement <4 x double> [[TMP55]], i32 3
-; I32-NEXT: [[TMP15:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP3]]
+; I32-NEXT: [[TMP15:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[INDEX]]
; I32-NEXT: [[TMP16:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP4]]
; I32-NEXT: [[TMP17:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP5]]
; I32-NEXT: [[TMP18:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP6]]
@@ -290,7 +287,6 @@ define void @test_store_initially_interleave(i32 %n, ptr noalias %src) #0 {
; I32: [[VEC_EPILOG_VECTOR_BODY]]:
; I32-NEXT: [[INDEX4:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT6:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
; I32-NEXT: [[VEC_IND5:%.*]] = phi <4 x i32> [ [[INDUCTION]], %[[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT7:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
-; I32-NEXT: [[TMP74:%.*]] = add i32 [[INDEX4]], 0
; I32-NEXT: [[TMP75:%.*]] = add i32 [[INDEX4]], 1
; I32-NEXT: [[TMP76:%.*]] = add i32 [[INDEX4]], 2
; I32-NEXT: [[TMP77:%.*]] = add i32 [[INDEX4]], 3
@@ -299,7 +295,7 @@ define void @test_store_initially_interleave(i32 %n, ptr noalias %src) #0 {
; I32-NEXT: [[TMP88:%.*]] = extractelement <4 x double> [[TMP78]], i32 1
; I32-NEXT: [[TMP89:%.*]] = extractelement <4 x double> [[TMP78]], i32 2
; I32-NEXT: [[TMP90:%.*]] = extractelement <4 x double> [[TMP78]], i32 3
-; I32-NEXT: [[TMP83:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP74]]
+; I32-NEXT: [[TMP83:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[INDEX4]]
; I32-NEXT: [[TMP84:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP75]]
; I32-NEXT: [[TMP85:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP76]]
; I32-NEXT: [[TMP92:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP77]]
@@ -352,11 +348,10 @@ define void @test_store_loaded_value(ptr noalias %src, ptr noalias %dst, i32 %n)
; I64-NEXT: br label %[[VECTOR_BODY:.*]]
; I64: [[VECTOR_BODY]]:
; I64-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; I64-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; I64-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
; I64-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
; I64-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
-; I64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP0]]
+; I64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[INDEX]]
; I64-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP1]]
; I64-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP2]]
; I64-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP3]]
@@ -364,7 +359,7 @@ define void @test_store_loaded_value(ptr noalias %src, ptr noalias %dst, i32 %n)
; I64-NEXT: [[TMP9:%.*]] = load double, ptr [[TMP5]], align 8
; I64-NEXT: [[TMP10:%.*]] = load double, ptr [[TMP6]], align 8
; I64-NEXT: [[TMP11:%.*]] = load double, ptr [[TMP7]], align 8
-; I64-NEXT: [[TMP12:%.*]] = shl i64 [[TMP0]], 1
+; I64-NEXT: [[TMP12:%.*]] = shl i64 [[INDEX]], 1
; I64-NEXT: [[TMP13:%.*]] = shl i64 [[TMP1]], 1
; I64-NEXT: [[TMP14:%.*]] = shl i64 [[TMP2]], 1
; I64-NEXT: [[TMP15:%.*]] = shl i64 [[TMP3]], 1
@@ -399,11 +394,10 @@ define void @test_store_loaded_value(ptr noalias %src, ptr noalias %dst, i32 %n)
; I32-NEXT: br label %[[VECTOR_BODY:.*]]
; I32: [[VECTOR_BODY]]:
; I32-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; I32-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; I32-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
; I32-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
; I32-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
-; I32-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP0]]
+; I32-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[INDEX]]
; I32-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP1]]
; I32-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP2]]
; I32-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP3]]
@@ -411,7 +405,7 @@ define void @test_store_loaded_value(ptr noalias %src, ptr noalias %dst, i32 %n)
; I32-NEXT: [[TMP9:%.*]] = load double, ptr [[TMP5]], align 8
; I32-NEXT: [[TMP10:%.*]] = load double, ptr [[TMP6]], align 8
; I32-NEXT: [[TMP11:%.*]] = load double, ptr [[TMP7]], align 8
-; I32-NEXT: [[TMP12:%.*]] = shl i64 [[TMP0]], 1
+; I32-NEXT: [[TMP12:%.*]] = shl i64 [[INDEX]], 1
; I32-NEXT: [[TMP13:%.*]] = shl i64 [[TMP1]], 1
; I32-NEXT: [[TMP14:%.*]] = shl i64 [[TMP2]], 1
; I32-NEXT: [[TMP15:%.*]] = shl i64 [[TMP3]], 1
@@ -716,7 +710,6 @@ define void @loaded_address_used_by_load_through_blend(i64 %start, ptr noalias %
; I32-NEXT: br label %[[VECTOR_BODY:.*]]
; I32: [[VECTOR_BODY]]:
; I32-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; I32-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0
; I32-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 1
; I32-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 2
; I32-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 3
@@ -724,7 +717,7 @@ define void @loaded_address_used_by_load_through_blend(i64 %start, ptr noalias %
; I32-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 5
; I32-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 6
; I32-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 7
-; I32-NEXT: [[TMP11:%.*]] = add i64 [[TMP3]], 1
+; I32-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 1
; I32-NEXT: [[TMP12:%.*]] = add i64 [[TMP4]], 1
; I32-NEXT: [[TMP13:%.*]] = add i64 [[TMP5]], 1
; I32-NEXT: [[TMP14:%.*]] = add i64 [[TMP6]], 1
@@ -798,7 +791,7 @@ define void @loaded_address_used_by_load_through_blend(i64 %start, ptr noalias %
; I32-NEXT: [[TMP88:%.*]] = load float, ptr [[TMP87]], align 4
; I32-NEXT: [[TMP90:%.*]] = load float, ptr [[TMP89]], align 4
; I32-NEXT: [[TMP92:%.*]] = load float, ptr [[TMP91]], align 4
-; I32-NEXT: [[TMP93:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP3]]
+; I32-NEXT: [[TMP93:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]]
; I32-NEXT: [[TMP94:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP4]]
; I32-NEXT: [[TMP95:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP5]]
; I32-NEXT: [[TMP96:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP6]]
@@ -864,7 +857,6 @@ define void @address_use_in_different_block(ptr noalias %dst, ptr %src.0, ptr %s
; I64-NEXT: br label %[[VECTOR_BODY:.*]]
; I64: [[VECTOR_BODY]]:
; I64-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; I64-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; I64-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
; I64-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
; I64-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
@@ -872,7 +864,7 @@ define void @address_use_in_different_block(ptr noalias %dst, ptr %src.0, ptr %s
; I64-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 5
; I64-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 6
; I64-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 7
-; I64-NEXT: [[TMP8:%.*]] = mul i64 [[TMP0]], [[OFFSET]]
+; I64-NEXT: [[TMP8:%.*]] = mul i64 [[INDEX]], [[OFFSET]]
; I64-NEXT: [[TMP9:%.*]] = mul i64 [[TMP1]], [[OFFSET]]
; I64-NEXT: [[TMP10:%.*]] = mul i64 [[TMP2]], [[OFFSET]]
; I64-NEXT: [[TMP11:%.*]] = mul i64 [[TMP3]], [[OFFSET]]
@@ -981,11 +973,10 @@ define void @address_use_in_different_block(ptr noalias %dst, ptr %src.0, ptr %s
; I32-NEXT: br label %[[VECTOR_BODY:.*]]
; I32: [[VECTOR_BODY]]:
; I32-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; I32-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; I32-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
; I32-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
; I32-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
-; I32-NEXT: [[TMP4:%.*]] = mul i64 [[TMP0]], [[OFFSET]]
+; I32-NEXT: [[TMP4:%.*]] = mul i64 [[INDEX]], [[OFFSET]]
; I32-NEXT: [[TMP5:%.*]] = mul i64 [[TMP1]], [[OFFSET]]
; I32-NEXT: [[TMP6:%.*]] = mul i64 [[TMP2]], [[OFFSET]]
; I32-NEXT: [[TMP7:%.*]] = mul i64 [[TMP3]], [[OFFSET]]
diff --git a/llvm/test/Transforms/LoopVectorize/X86/strided_load_cost.ll b/llvm/test/Transforms/LoopVectorize/X86/strided_load_cost.ll
index da48f984cb329..c1264ca2f8413 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/strided_load_cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/strided_load_cost.ll
@@ -26,7 +26,6 @@ define i32 @matrix_row_col(ptr nocapture readonly %data, i32 %i, i32 %j) local_u
; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <8 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP145:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <8 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP146:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <8 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP147:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
@@ -58,15 +57,15 @@ define i32 @matrix_row_col(ptr nocapture readonly %data, i32 %i, i32 %j) local_u
; CHECK-NEXT: [[TMP29:%.*]] = add i64 [[INDEX]], 29
; CHECK-NEXT: [[TMP30:%.*]] = add i64 [[INDEX]], 30
; CHECK-NEXT: [[TMP31:%.*]] = add i64 [[INDEX]], 31
-; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[IDXPROM]], i64 [[TMP0]]
-; CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i64 8
-; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i64 16
-; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i64 24
+; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[IDXPROM]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP37:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i64 8
+; CHECK-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i64 16
+; CHECK-NEXT: [[TMP39:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i64 24
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[TMP32]], align 4, !tbaa [[INT_TBAA1:![0-9]+]]
-; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <8 x i32>, ptr [[TMP33]], align 4, !tbaa [[INT_TBAA1]]
-; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <8 x i32>, ptr [[TMP34]], align 4, !tbaa [[INT_TBAA1]]
-; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <8 x i32>, ptr [[TMP35]], align 4, !tbaa [[INT_TBAA1]]
-; CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP0]], i64 [[IDXPROM5]]
+; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <8 x i32>, ptr [[TMP37]], align 4, !tbaa [[INT_TBAA1]]
+; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <8 x i32>, ptr [[TMP38]], align 4, !tbaa [[INT_TBAA1]]
+; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <8 x i32>, ptr [[TMP39]], align 4, !tbaa [[INT_TBAA1]]
+; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[INDEX]], i64 [[IDXPROM5]]
; CHECK-NEXT: [[TMP41:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP1]], i64 [[IDXPROM5]]
; CHECK-NEXT: [[TMP42:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP2]], i64 [[IDXPROM5]]
; CHECK-NEXT: [[TMP43:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP3]], i64 [[IDXPROM5]]
@@ -98,7 +97,7 @@ define i32 @matrix_row_col(ptr nocapture readonly %data, i32 %i, i32 %j) local_u
; CHECK-NEXT: [[TMP69:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP29]], i64 [[IDXPROM5]]
; CHECK-NEXT: [[TMP70:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP30]], i64 [[IDXPROM5]]
; CHECK-NEXT: [[TMP71:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP31]], i64 [[IDXPROM5]]
-; CHECK-NEXT: [[TMP72:%.*]] = load i32, ptr [[TMP40]], align 4, !tbaa [[INT_TBAA1]]
+; CHECK-NEXT: [[TMP72:%.*]] = load i32, ptr [[TMP35]], align 4, !tbaa [[INT_TBAA1]]
; CHECK-NEXT: [[TMP73:%.*]] = load i32, ptr [[TMP41]], align 4, !tbaa [[INT_TBAA1]]
; CHECK-NEXT: [[TMP74:%.*]] = load i32, ptr [[TMP42]], align 4, !tbaa [[INT_TBAA1]]
; CHECK-NEXT: [[TMP75:%.*]] = load i32, ptr [[TMP43]], align 4, !tbaa [[INT_TBAA1]]
@@ -193,17 +192,16 @@ define i32 @matrix_row_col(ptr nocapture readonly %data, i32 %i, i32 %j) local_u
; CHECK: [[VEC_EPILOG_VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX9:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT12:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI10:%.*]] = phi <4 x i32> [ [[TMP171]], %[[VEC_EPILOG_PH]] ], [ [[TMP168:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP172:%.*]] = add i64 [[INDEX9]], 0
; CHECK-NEXT: [[TMP173:%.*]] = add i64 [[INDEX9]], 1
; CHECK-NEXT: [[TMP174:%.*]] = add i64 [[INDEX9]], 2
; CHECK-NEXT: [[TMP175:%.*]] = add i64 [[INDEX9]], 3
-; CHECK-NEXT: [[TMP152:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[IDXPROM]], i64 [[TMP172]]
+; CHECK-NEXT: [[TMP152:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[IDXPROM]], i64 [[INDEX9]]
; CHECK-NEXT: [[WIDE_LOAD11:%.*]] = load <4 x i32>, ptr [[TMP152]], align 4, !tbaa [[INT_TBAA1]]
-; CHECK-NEXT: [[TMP154:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP172]], i64 [[IDXPROM5]]
+; CHECK-NEXT: [[TMP153:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[INDEX9]], i64 [[IDXPROM5]]
; CHECK-NEXT: [[TMP155:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP173]], i64 [[IDXPROM5]]
; CHECK-NEXT: [[TMP156:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP174]], i64 [[IDXPROM5]]
; CHECK-NEXT: [[TMP157:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP175]], i64 [[IDXPROM5]]
-; CHECK-NEXT: [[TMP158:%.*]] = load i32, ptr [[TMP154]], align 4, !tbaa [[INT_TBAA1]]
+; CHECK-NEXT: [[TMP158:%.*]] = load i32, ptr [[TMP153]], align 4, !tbaa [[INT_TBAA1]]
; CHECK-NEXT: [[TMP159:%.*]] = load i32, ptr [[TMP155]], align 4, !tbaa [[INT_TBAA1]]
; CHECK-NEXT: [[TMP160:%.*]] = load i32, ptr [[TMP156]], align 4, !tbaa [[INT_TBAA1]]
; CHECK-NEXT: [[TMP161:%.*]] = load i32, ptr [[TMP157]], align 4, !tbaa [[INT_TBAA1]]
@@ -257,7 +255,6 @@ define i32 @matrix_row_col(ptr nocapture readonly %data, i32 %i, i32 %j) local_u
; MAX-BW-NEXT: [[VEC_PHI1:%.*]] = phi <8 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP145:%.*]], %[[VECTOR_BODY]] ]
; MAX-BW-NEXT: [[VEC_PHI2:%.*]] = phi <8 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP146:%.*]], %[[VECTOR_BODY]] ]
; MAX-BW-NEXT: [[VEC_PHI3:%.*]] = phi <8 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP147:%.*]], %[[VECTOR_BODY]] ]
-; MAX-BW-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; MAX-BW-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
; MAX-BW-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
; MAX-BW-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
@@ -289,15 +286,15 @@ define i32 @matrix_row_col(ptr nocapture readonly %data, i32 %i, i32 %j) local_u
; MAX-BW-NEXT: [[TMP29:%.*]] = add i64 [[INDEX]], 29
; MAX-BW-NEXT: [[TMP30:%.*]] = add i64 [[INDEX]], 30
; MAX-BW-NEXT: [[TMP31:%.*]] = add i64 [[INDEX]], 31
-; MAX-BW-NEXT: [[TMP32:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[IDXPROM]], i64 [[TMP0]]
-; MAX-BW-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i64 8
-; MAX-BW-NEXT: [[TMP34:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i64 16
-; MAX-BW-NEXT: [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i64 24
+; MAX-BW-NEXT: [[TMP32:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[IDXPROM]], i64 [[INDEX]]
+; MAX-BW-NEXT: [[TMP37:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i64 8
+; MAX-BW-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i64 16
+; MAX-BW-NEXT: [[TMP39:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i64 24
; MAX-BW-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[TMP32]], align 4, !tbaa [[INT_TBAA1:![0-9]+]]
-; MAX-BW-NEXT: [[WIDE_LOAD4:%.*]] = load <8 x i32>, ptr [[TMP33]], align 4, !tbaa [[INT_TBAA1]]
-; MAX-BW-NEXT: [[WIDE_LOAD5:%.*]] = load <8 x i32>, ptr [[TMP34]], align 4, !tbaa [[INT_TBAA1]]
-; MAX-BW-NEXT: [[WIDE_LOAD6:%.*]] = load <8 x i32>, ptr [[TMP35]], align 4, !tbaa [[INT_TBAA1]]
-; MAX-BW-NEXT: [[TMP40:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP0]], i64 [[IDXPROM5]]
+; MAX-BW-NEXT: [[WIDE_LOAD4:%.*]] = load <8 x i32>, ptr [[TMP37]], align 4, !tbaa [[INT_TBAA1]]
+; MAX-BW-NEXT: [[WIDE_LOAD5:%.*]] = load <8 x i32>, ptr [[TMP38]], align 4, !tbaa [[INT_TBAA1]]
+; MAX-BW-NEXT: [[WIDE_LOAD6:%.*]] = load <8 x i32>, ptr [[TMP39]], align 4, !tbaa [[INT_TBAA1]]
+; MAX-BW-NEXT: [[TMP35:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[INDEX]], i64 [[IDXPROM5]]
; MAX-BW-NEXT: [[TMP41:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP1]], i64 [[IDXPROM5]]
; MAX-BW-NEXT: [[TMP42:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP2]], i64 [[IDXPROM5]]
; MAX-BW-NEXT: [[TMP43:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP3]], i64 [[IDXPROM5]]
@@ -329,7 +326,7 @@ define i32 @matrix_row_col(ptr nocapture readonly %data, i32 %i, i32 %j) local_u
; MAX-BW-NEXT: [[TMP69:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP29]], i64 [[IDXPROM5]]
; MAX-BW-NEXT: [[TMP70:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP30]], i64 [[IDXPROM5]]
; MAX-BW-NEXT: [[TMP71:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP31]], i64 [[IDXPROM5]]
-; MAX-BW-NEXT: [[TMP72:%.*]] = load i32, ptr [[TMP40]], align 4, !tbaa [[INT_TBAA1]]
+; MAX-BW-NEXT: [[TMP72:%.*]] = load i32, ptr [[TMP35]], align 4, !tbaa [[INT_TBAA1]]
; MAX-BW-NEXT: [[TMP73:%.*]] = load i32, ptr [[TMP41]], align 4, !tbaa [[INT_TBAA1]]
; MAX-BW-NEXT: [[TMP74:%.*]] = load i32, ptr [[TMP42]], align 4, !tbaa [[INT_TBAA1]]
; MAX-BW-NEXT: [[TMP75:%.*]] = load i32, ptr [[TMP43]], align 4, !tbaa [[INT_TBAA1]]
@@ -424,17 +421,16 @@ define i32 @matrix_row_col(ptr nocapture readonly %data, i32 %i, i32 %j) local_u
; MAX-BW: [[VEC_EPILOG_VECTOR_BODY]]:
; MAX-BW-NEXT: [[INDEX9:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT12:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
; MAX-BW-NEXT: [[VEC_PHI10:%.*]] = phi <4 x i32> [ [[TMP171]], %[[VEC_EPILOG_PH]] ], [ [[TMP168:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
-; MAX-BW-NEXT: [[TMP172:%.*]] = add i64 [[INDEX9]], 0
; MAX-BW-NEXT: [[TMP173:%.*]] = add i64 [[INDEX9]], 1
; MAX-BW-NEXT: [[TMP174:%.*]] = add i64 [[INDEX9]], 2
; MAX-BW-NEXT: [[TMP175:%.*]] = add i64 [[INDEX9]], 3
-; MAX-BW-NEXT: [[TMP152:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[IDXPROM]], i64 [[TMP172]]
+; MAX-BW-NEXT: [[TMP152:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[IDXPROM]], i64 [[INDEX9]]
; MAX-BW-NEXT: [[WIDE_LOAD11:%.*]] = load <4 x i32>, ptr [[TMP152]], align 4, !tbaa [[INT_TBAA1]]
-; MAX-BW-NEXT: [[TMP154:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP172]], i64 [[IDXPROM5]]
+; MAX-BW-NEXT: [[TMP153:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[INDEX9]], i64 [[IDXPROM5]]
; MAX-BW-NEXT: [[TMP155:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP173]], i64 [[IDXPROM5]]
; MAX-BW-NEXT: [[TMP156:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP174]], i64 [[IDXPROM5]]
; MAX-BW-NEXT: [[TMP157:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP175]], i64 [[IDXPROM5]]
-; MAX-BW-NEXT: [[TMP158:%.*]] = load i32, ptr [[TMP154]], align 4, !tbaa [[INT_TBAA1]]
+; MAX-BW-NEXT: [[TMP158:%.*]] = load i32, ptr [[TMP153]], align 4, !tbaa [[INT_TBAA1]]
; MAX-BW-NEXT: [[TMP159:%.*]] = load i32, ptr [[TMP155]], align 4, !tbaa [[INT_TBAA1]]
; MAX-BW-NEXT: [[TMP160:%.*]] = load i32, ptr [[TMP156]], align 4, !tbaa [[INT_TBAA1]]
; MAX-BW-NEXT: [[TMP161:%.*]] = load i32, ptr [[TMP157]], align 4, !tbaa [[INT_TBAA1]]
@@ -507,7 +503,6 @@ define void @test(ptr %A, ptr noalias %B) #0 {
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
-; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6
@@ -515,7 +510,7 @@ define void @test(ptr %A, ptr noalias %B) #0 {
; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 10
; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 12
; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 14
-; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A]], i64 0, i64 [[TMP0]]
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A]], i64 0, i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <16 x i32>, ptr [[TMP16]], align 4
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
; CHECK-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
@@ -529,7 +524,7 @@ define void @test(ptr %A, ptr noalias %B) #0 {
; CHECK-NEXT: [[TMP33:%.*]] = extractelement <8 x i8> [[TMP19]], i32 5
; CHECK-NEXT: [[TMP34:%.*]] = extractelement <8 x i8> [[TMP19]], i32 6
; CHECK-NEXT: [[TMP35:%.*]] = extractelement <8 x i8> [[TMP19]], i32 7
-; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds [1024 x i8], ptr [[B]], i64 0, i64 [[TMP0]]
+; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds [1024 x i8], ptr [[B]], i64 0, i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds [1024 x i8], ptr [[B]], i64 0, i64 [[TMP1]]
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1024 x i8], ptr [[B]], i64 0, i64 [[TMP2]]
; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1024 x i8], ptr [[B]], i64 0, i64 [[TMP3]]
@@ -562,7 +557,6 @@ define void @test(ptr %A, ptr noalias %B) #0 {
; MAX-BW: [[VECTOR_BODY]]:
; MAX-BW-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; MAX-BW-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
-; MAX-BW-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
; MAX-BW-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2
; MAX-BW-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4
; MAX-BW-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6
@@ -578,7 +572,7 @@ define void @test(ptr %A, ptr noalias %B) #0 {
; MAX-BW-NEXT: [[TMP13:%.*]] = add i64 [[OFFSET_IDX]], 26
; MAX-BW-NEXT: [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], 28
; MAX-BW-NEXT: [[TMP15:%.*]] = add i64 [[OFFSET_IDX]], 30
-; MAX-BW-NEXT: [[TMP32:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A]], i64 0, i64 [[TMP0]]
+; MAX-BW-NEXT: [[TMP32:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A]], i64 0, i64 [[OFFSET_IDX]]
; MAX-BW-NEXT: [[WIDE_VEC:%.*]] = load <32 x i32>, ptr [[TMP32]], align 4
; MAX-BW-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <32 x i32> [[WIDE_VEC]], <32 x i32> poison, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
; MAX-BW-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <32 x i32> [[WIDE_VEC]], <32 x i32> poison, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
@@ -600,7 +594,7 @@ define void @test(ptr %A, ptr noalias %B) #0 {
; MAX-BW-NEXT: [[TMP65:%.*]] = extractelement <16 x i8> [[TMP35]], i32 13
; MAX-BW-NEXT: [[TMP66:%.*]] = extractelement <16 x i8> [[TMP35]], i32 14
; MAX-BW-NEXT: [[TMP67:%.*]] = extractelement <16 x i8> [[TMP35]], i32 15
-; MAX-BW-NEXT: [[TMP69:%.*]] = getelementptr inbounds [1024 x i8], ptr [[B]], i64 0, i64 [[TMP0]]
+; MAX-BW-NEXT: [[TMP69:%.*]] = getelementptr inbounds [1024 x i8], ptr [[B]], i64 0, i64 [[OFFSET_IDX]]
; MAX-BW-NEXT: [[TMP70:%.*]] = getelementptr inbounds [1024 x i8], ptr [[B]], i64 0, i64 [[TMP1]]
; MAX-BW-NEXT: [[TMP71:%.*]] = getelementptr inbounds [1024 x i8], ptr [[B]], i64 0, i64 [[TMP2]]
; MAX-BW-NEXT: [[TMP72:%.*]] = getelementptr inbounds [1024 x i8], ptr [[B]], i64 0, i64 [[TMP3]]
diff --git a/llvm/test/Transforms/LoopVectorize/X86/uniform_mem_op.ll b/llvm/test/Transforms/LoopVectorize/X86/uniform_mem_op.ll
index dbd7019188d07..74201fef14b58 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/uniform_mem_op.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/uniform_mem_op.ll
@@ -198,9 +198,6 @@ define void @uniform_store_varying_value(ptr align(4) %addr) {
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[INDEX]] to i32
-; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[TMP0]], 12
-; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[TMP0]], 13
-; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[TMP0]], 14
; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[TMP0]], 15
; CHECK-NEXT: store i32 [[TMP7]], ptr [[ADDR:%.*]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
diff --git a/llvm/test/Transforms/LoopVectorize/X86/vplan-native-inner-loop-only.ll b/llvm/test/Transforms/LoopVectorize/X86/vplan-native-inner-loop-only.ll
index 38617d25bfd7a..d4335961e19b2 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/vplan-native-inner-loop-only.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/vplan-native-inner-loop-only.ll
@@ -16,11 +16,10 @@ define void @test(ptr %A) {
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
-; CHECK-NEXT: [[TMP4:%.*]] = shl nsw i64 [[TMP0]], 1
+; CHECK-NEXT: [[TMP4:%.*]] = shl nsw i64 [[INDEX]], 1
; CHECK-NEXT: [[TMP5:%.*]] = shl nsw i64 [[TMP1]], 1
; CHECK-NEXT: [[TMP6:%.*]] = shl nsw i64 [[TMP2]], 1
; CHECK-NEXT: [[TMP7:%.*]] = shl nsw i64 [[TMP3]], 1
diff --git a/llvm/test/Transforms/LoopVectorize/X86/x86_fp80-vector-store.ll b/llvm/test/Transforms/LoopVectorize/X86/x86_fp80-vector-store.ll
index d29719de79ffd..d7e853931417b 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/x86_fp80-vector-store.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/x86_fp80-vector-store.ll
@@ -30,12 +30,11 @@ define void @example() {
; FORCED: [[VECTOR_BODY]]:
; FORCED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; FORCED-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; FORCED-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; FORCED-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
; FORCED-NEXT: [[TMP2:%.*]] = sitofp <2 x i64> [[VEC_IND]] to <2 x x86_fp80>
; FORCED-NEXT: [[TMP5:%.*]] = extractelement <2 x x86_fp80> [[TMP2]], i32 0
; FORCED-NEXT: [[TMP6:%.*]] = extractelement <2 x x86_fp80> [[TMP2]], i32 1
-; FORCED-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1024 x x86_fp80], ptr @x, i64 0, i64 [[TMP0]]
+; FORCED-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1024 x x86_fp80], ptr @x, i64 0, i64 [[INDEX]]
; FORCED-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1024 x x86_fp80], ptr @x, i64 0, i64 [[TMP1]]
; FORCED-NEXT: store x86_fp80 [[TMP5]], ptr [[TMP3]], align 16
; FORCED-NEXT: store x86_fp80 [[TMP6]], ptr [[TMP4]], align 16
@@ -44,8 +43,9 @@ define void @example() {
; FORCED-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
; FORCED-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; FORCED: [[MIDDLE_BLOCK]]:
-; FORCED-NEXT: br [[EXIT:label %.*]]
-; FORCED: [[SCALAR_PH:.*:]]
+; FORCED-NEXT: br label %[[EXIT:.*]]
+; FORCED: [[EXIT]]:
+; FORCED-NEXT: ret void
;
entry:
br label %loop
diff --git a/llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll b/llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll
index 6c63b823b7666..4acc7403d6a37 100644
--- a/llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll
+++ b/llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll
@@ -243,15 +243,14 @@ define i32 @interleaved_access_forward(ptr %p, i64 %n) {
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP28:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[INDEX:%.*]] = add i64 [[INDEX1]], 0
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX1]], 1
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX1]], 2
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX1]], 3
-; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[PAIR:%.*]], ptr [[P]], i64 [[INDEX]], i32 0
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[PAIR:%.*]], ptr [[P]], i64 [[INDEX1]], i32 0
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP0]], i32 0
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP1]], i32 0
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP2]], i32 0
-; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[INDEX]], i32 1
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[INDEX1]], i32 1
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP0]], i32 1
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP1]], i32 1
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP2]], i32 1
@@ -390,15 +389,14 @@ define i32 @interleaved_access_reverse(ptr %p, i64 %n) {
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP31:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[N]], [[INDEX]]
-; CHECK-NEXT: [[TMP18:%.*]] = add i64 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], -1
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], -2
; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], -3
-; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[PAIR:%.*]], ptr [[P]], i64 [[TMP18]], i32 0
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[PAIR:%.*]], ptr [[P]], i64 [[OFFSET_IDX]], i32 0
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP3]], i32 0
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP4]], i32 0
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP5]], i32 0
-; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP18]], i32 1
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[OFFSET_IDX]], i32 1
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP3]], i32 1
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP4]], i32 1
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP5]], i32 1
@@ -544,11 +542,10 @@ define void @predicated_store(ptr %p, i32 %x, i64 %n) {
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE6:.*]] ]
-; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
-; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[PAIR:%.*]], ptr [[P]], i64 [[TMP5]], i32 0
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[PAIR:%.*]], ptr [[P]], i64 [[INDEX]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP1]], i32 0
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP12]], i32 0
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP3]], i32 0
@@ -734,11 +731,10 @@ define void @irregular_type(ptr %a, i64 %n) {
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[INDEX:%.*]] = add i64 [[INDEX1]], 0
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX1]], 1
; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX1]], 2
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX1]], 3
-; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds x86_fp80, ptr [[A]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds x86_fp80, ptr [[A]], i64 [[INDEX1]]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds x86_fp80, ptr [[A]], i64 [[TMP1]]
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds x86_fp80, ptr [[A]], i64 [[TMP5]]
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds x86_fp80, ptr [[A]], i64 [[TMP3]]
@@ -778,11 +774,10 @@ define void @irregular_type(ptr %a, i64 %n) {
; INTER-NEXT: br label %[[VECTOR_BODY:.*]]
; INTER: [[VECTOR_BODY]]:
; INTER-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; INTER-NEXT: [[INDEX:%.*]] = add i64 [[INDEX1]], 0
; INTER-NEXT: [[TMP1:%.*]] = add i64 [[INDEX1]], 1
; INTER-NEXT: [[TMP5:%.*]] = add i64 [[INDEX1]], 2
; INTER-NEXT: [[TMP3:%.*]] = add i64 [[INDEX1]], 3
-; INTER-NEXT: [[TMP0:%.*]] = getelementptr inbounds x86_fp80, ptr [[A]], i64 [[INDEX]]
+; INTER-NEXT: [[TMP0:%.*]] = getelementptr inbounds x86_fp80, ptr [[A]], i64 [[INDEX1]]
; INTER-NEXT: [[TMP2:%.*]] = getelementptr inbounds x86_fp80, ptr [[A]], i64 [[TMP1]]
; INTER-NEXT: [[TMP4:%.*]] = getelementptr inbounds x86_fp80, ptr [[A]], i64 [[TMP5]]
; INTER-NEXT: [[TMP6:%.*]] = getelementptr inbounds x86_fp80, ptr [[A]], i64 [[TMP3]]
@@ -947,11 +942,10 @@ define void @pointer_iv_non_uniform_0(ptr %a, i64 %n) {
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX1:%.*]] = mul i64 [[INDEX]], 16
-; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[OFFSET_IDX1]], 0
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX1]], 16
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX1]], 32
; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[OFFSET_IDX1]], 48
-; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[OFFSET_IDX]]
+; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[OFFSET_IDX1]]
; CHECK-NEXT: [[NEXT_GEP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP3]]
; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP4]]
; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP9]]
@@ -1062,11 +1056,10 @@ define void @pointer_iv_non_uniform_0(ptr %a, i64 %n) {
; INTER: [[VECTOR_BODY]]:
; INTER-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; INTER-NEXT: [[OFFSET_IDX1:%.*]] = mul i64 [[INDEX]], 16
-; INTER-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[OFFSET_IDX1]], 0
; INTER-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX1]], 16
; INTER-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX1]], 32
; INTER-NEXT: [[TMP8:%.*]] = add i64 [[OFFSET_IDX1]], 48
-; INTER-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[OFFSET_IDX]]
+; INTER-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[OFFSET_IDX1]]
; INTER-NEXT: [[NEXT_GEP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP5]]
; INTER-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP6]]
; INTER-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP8]]
@@ -1181,11 +1174,10 @@ define void @pointer_iv_non_uniform_1(ptr %a, i64 %n) {
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX1:%.*]] = mul i64 [[INDEX]], 16
-; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[OFFSET_IDX1]], 0
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX1]], 16
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX1]], 32
; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX1]], 48
-; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[OFFSET_IDX]]
+; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[OFFSET_IDX1]]
; CHECK-NEXT: [[NEXT_GEP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP3]]
; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP4]]
; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP6]]
@@ -1230,11 +1222,10 @@ define void @pointer_iv_non_uniform_1(ptr %a, i64 %n) {
; INTER: [[VECTOR_BODY]]:
; INTER-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; INTER-NEXT: [[OFFSET_IDX1:%.*]] = mul i64 [[INDEX]], 16
-; INTER-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[OFFSET_IDX1]], 0
; INTER-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX1]], 16
; INTER-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX1]], 32
; INTER-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX1]], 48
-; INTER-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[OFFSET_IDX]]
+; INTER-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[OFFSET_IDX1]]
; INTER-NEXT: [[NEXT_GEP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP3]]
; INTER-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP4]]
; INTER-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP6]]
@@ -1469,11 +1460,10 @@ define void @pointer_operand_geps_with_different_indexed_types(ptr %A, ptr %B, i
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[INDEX:%.*]] = add i64 [[INDEX1]], 0
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX1]], 1
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX1]], 2
; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX1]], 3
-; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX1]]
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]]
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]]
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]]
@@ -1498,7 +1488,7 @@ define void @pointer_operand_geps_with_different_indexed_types(ptr %A, ptr %B, i
; CHECK-NEXT: [[TMP32:%.*]] = insertelement <4 x i8> [[TMP27]], i8 [[TMP24]], i32 2
; CHECK-NEXT: [[TMP28:%.*]] = insertelement <4 x i8> [[TMP32]], i8 [[TMP25]], i32 3
; CHECK-NEXT: [[TMP29:%.*]] = xor <4 x i8> [[TMP20]], [[TMP28]]
-; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[INDEX1]]
; CHECK-NEXT: store <4 x i8> [[TMP29]], ptr [[TMP30]], align 1, !alias.scope [[META30:![0-9]+]], !noalias [[META27]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX1]], 4
; CHECK-NEXT: [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
diff --git a/llvm/test/Transforms/LoopVectorize/cse-casts.ll b/llvm/test/Transforms/LoopVectorize/cse-casts.ll
index b6d7a9f81ec9d..b450942e3966b 100644
--- a/llvm/test/Transforms/LoopVectorize/cse-casts.ll
+++ b/llvm/test/Transforms/LoopVectorize/cse-casts.ll
@@ -185,11 +185,10 @@ define void @preserve_flags_narrowing_extends_and_truncs(ptr noalias %A, ptr noa
; CHECK-NEXT: store i64 [[TMP49]], ptr [[TMP48]], align 4
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE28]]
; CHECK: [[PRED_STORE_CONTINUE28]]:
-; CHECK-NEXT: [[TMP50:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 0
; CHECK-NEXT: [[TMP51:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 1
; CHECK-NEXT: [[TMP52:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 2
; CHECK-NEXT: [[TMP53:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 3
-; CHECK-NEXT: [[TMP54:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP50]], i32 0
+; CHECK-NEXT: [[TMP54:%.*]] = insertelement <4 x ptr> poison, ptr [[B]], i32 0
; CHECK-NEXT: [[TMP55:%.*]] = insertelement <4 x ptr> [[TMP54]], ptr [[TMP51]], i32 1
; CHECK-NEXT: [[TMP56:%.*]] = insertelement <4 x ptr> [[TMP55]], ptr [[TMP52]], i32 2
; CHECK-NEXT: [[TMP57:%.*]] = insertelement <4 x ptr> [[TMP56]], ptr [[TMP53]], i32 3
@@ -203,7 +202,7 @@ define void @preserve_flags_narrowing_extends_and_truncs(ptr noalias %A, ptr noa
; CHECK-NEXT: [[TMP65:%.*]] = insertelement <4 x ptr> [[TMP64]], ptr [[TMP61]], i32 3
; CHECK-NEXT: br i1 true, label %[[PRED_LOAD_IF29:.*]], label %[[PRED_LOAD_CONTINUE30:.*]]
; CHECK: [[PRED_LOAD_IF29]]:
-; CHECK-NEXT: [[TMP66:%.*]] = load i8, ptr [[TMP50]], align 1
+; CHECK-NEXT: [[TMP66:%.*]] = load i8, ptr [[B]], align 1
; CHECK-NEXT: [[TMP67:%.*]] = insertelement <4 x i8> poison, i8 [[TMP66]], i32 0
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE30]]
; CHECK: [[PRED_LOAD_CONTINUE30]]:
@@ -272,7 +271,7 @@ define void @preserve_flags_narrowing_extends_and_truncs(ptr noalias %A, ptr noa
; CHECK-NEXT: br i1 true, label %[[PRED_STORE_IF45:.*]], label %[[PRED_STORE_CONTINUE46:.*]]
; CHECK: [[PRED_STORE_IF45]]:
; CHECK-NEXT: [[TMP102:%.*]] = extractelement <4 x i8> [[TMP100]], i32 0
-; CHECK-NEXT: store i8 [[TMP102]], ptr [[TMP50]], align 1
+; CHECK-NEXT: store i8 [[TMP102]], ptr [[B]], align 1
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE46]]
; CHECK: [[PRED_STORE_CONTINUE46]]:
; CHECK-NEXT: br i1 true, label %[[PRED_STORE_IF47:.*]], label %[[PRED_STORE_CONTINUE48:.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/debugloc.ll b/llvm/test/Transforms/LoopVectorize/debugloc.ll
index 03e0853d29075..4b364133dfefe 100644
--- a/llvm/test/Transforms/LoopVectorize/debugloc.ll
+++ b/llvm/test/Transforms/LoopVectorize/debugloc.ll
@@ -173,11 +173,10 @@ define void @test_scalar_steps(ptr nocapture %a, ptr noalias %b, i64 %size) !dbg
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
-; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 0, !dbg [[LOC8:!.+]]
-; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 2, !dbg [[LOC8]]
-; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP6]]
+; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 2, !dbg [[LOC8:!.+]]
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP7]]
-; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP6]]
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP7]]
; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP8]], align 4
; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP9]], align 4
diff --git a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-any-of-reductions.ll b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-any-of-reductions.ll
index 1a99c47aa351d..0607ec38e7e46 100644
--- a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-any-of-reductions.ll
+++ b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-any-of-reductions.ll
@@ -325,11 +325,10 @@ define i1 @any_of_reduction_i1_epilog2(ptr %start, ptr %end, i64 %x) {
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[RDX_SELECT_CMP:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 16
-; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 16
; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 32
; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 48
-; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP4]]
+; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP5]]
; CHECK-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP6]]
; CHECK-NEXT: [[NEXT_GEP6:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP7]]
@@ -378,11 +377,10 @@ define i1 @any_of_reduction_i1_epilog2(ptr %start, ptr %end, i64 %x) {
; CHECK-NEXT: [[INDEX11:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT20:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI12:%.*]] = phi <4 x i1> [ [[MINMAX_IDENT_SPLAT]], [[VEC_EPILOG_PH]] ], [ [[TMP43:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX13:%.*]] = mul i64 [[INDEX11]], 16
-; CHECK-NEXT: [[TMP26:%.*]] = add i64 [[OFFSET_IDX13]], 0
; CHECK-NEXT: [[TMP27:%.*]] = add i64 [[OFFSET_IDX13]], 16
; CHECK-NEXT: [[TMP28:%.*]] = add i64 [[OFFSET_IDX13]], 32
; CHECK-NEXT: [[TMP29:%.*]] = add i64 [[OFFSET_IDX13]], 48
-; CHECK-NEXT: [[NEXT_GEP14:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP26]]
+; CHECK-NEXT: [[NEXT_GEP14:%.*]] = getelementptr i8, ptr [[START]], i64 [[OFFSET_IDX13]]
; CHECK-NEXT: [[NEXT_GEP15:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP27]]
; CHECK-NEXT: [[NEXT_GEP16:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP28]]
; CHECK-NEXT: [[NEXT_GEP17:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP29]]
diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-dead-instructions.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-dead-instructions.ll
index cf2e7ccd1b2f0..fe9cafea2db84 100644
--- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-dead-instructions.ll
+++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-dead-instructions.ll
@@ -15,7 +15,6 @@ define i8 @recurrence_phi_with_same_incoming_values_after_simplifications(i8 %fo
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i32 1, [[INDEX]]
-; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[OFFSET_IDX]], 1
; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[OFFSET_IDX]], 2
; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[OFFSET_IDX]], 3
@@ -23,7 +22,7 @@ define i8 @recurrence_phi_with_same_incoming_values_after_simplifications(i8 %fo
; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[OFFSET_IDX]], 5
; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[OFFSET_IDX]], 6
; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[OFFSET_IDX]], 7
-; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP1]]
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[OFFSET_IDX]]
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP2]]
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP3]]
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP4]]
diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-tail-folding.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-tail-folding.ll
index 28b46726f80dc..08abf5cfefdbf 100644
--- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-tail-folding.ll
+++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-tail-folding.ll
@@ -20,13 +20,14 @@ define i32 @FOR_used_outside(ptr noalias %A, ptr noalias %B, i64 %n) {
; VF2IC1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE4:.*]] ]
; VF2IC1-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE4]] ]
; VF2IC1-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ <i32 poison, i32 33>, %[[VECTOR_PH]] ], [ [[TMP12:%.*]], %[[PRED_STORE_CONTINUE4]] ]
-; VF2IC1-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0
; VF2IC1-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 1
+; VF2IC1-NEXT: [[TMP1:%.*]] = insertelement <2 x i64> poison, i64 [[INDEX]], i32 0
+; VF2IC1-NEXT: [[TMP5:%.*]] = insertelement <2 x i64> [[TMP1]], i64 [[TMP4]], i32 1
; VF2IC1-NEXT: [[TMP2:%.*]] = icmp ule <2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
; VF2IC1-NEXT: [[TMP3:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0
; VF2IC1-NEXT: br i1 [[TMP3]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
; VF2IC1: [[PRED_LOAD_IF]]:
-; VF2IC1-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[TMP1]]
+; VF2IC1-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[INDEX]]
; VF2IC1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4
; VF2IC1-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> poison, i32 [[TMP10]], i32 0
; VF2IC1-NEXT: br label %[[PRED_LOAD_CONTINUE]]
@@ -45,7 +46,7 @@ define i32 @FOR_used_outside(ptr noalias %A, ptr noalias %B, i64 %n) {
; VF2IC1-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0
; VF2IC1-NEXT: br i1 [[TMP14]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; VF2IC1: [[PRED_STORE_IF]]:
-; VF2IC1-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[TMP1]]
+; VF2IC1-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[INDEX]]
; VF2IC1-NEXT: [[TMP16:%.*]] = extractelement <2 x i32> [[TMP13]], i32 0
; VF2IC1-NEXT: [[TMP17:%.*]] = extractelement <2 x i32> [[TMP12]], i32 0
; VF2IC1-NEXT: [[TMP18:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
@@ -96,16 +97,19 @@ define i32 @FOR_used_outside(ptr noalias %A, ptr noalias %B, i64 %n) {
; VF2IC2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE12]] ]
; VF2IC2-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ <i32 poison, i32 33>, %[[VECTOR_PH]] ], [ [[TMP25:%.*]], %[[PRED_STORE_CONTINUE12]] ]
; VF2IC2-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
-; VF2IC2-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0
; VF2IC2-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
+; VF2IC2-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> poison, i64 [[INDEX]], i32 0
+; VF2IC2-NEXT: [[TMP8:%.*]] = insertelement <2 x i64> [[TMP3]], i64 [[TMP1]], i32 1
; VF2IC2-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
; VF2IC2-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 3
+; VF2IC2-NEXT: [[TMP69:%.*]] = insertelement <2 x i64> poison, i64 [[TMP2]], i32 0
+; VF2IC2-NEXT: [[TMP70:%.*]] = insertelement <2 x i64> [[TMP69]], i64 [[TMP7]], i32 1
; VF2IC2-NEXT: [[TMP4:%.*]] = icmp ule <2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
; VF2IC2-NEXT: [[TMP5:%.*]] = icmp ule <2 x i64> [[STEP_ADD]], [[BROADCAST_SPLAT]]
; VF2IC2-NEXT: [[TMP6:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0
; VF2IC2-NEXT: br i1 [[TMP6]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
; VF2IC2: [[PRED_LOAD_IF]]:
-; VF2IC2-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[TMP3]]
+; VF2IC2-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[INDEX]]
; VF2IC2-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
; VF2IC2-NEXT: [[TMP9:%.*]] = insertelement <2 x i32> poison, i32 [[TMP23]], i32 0
; VF2IC2-NEXT: br label %[[PRED_LOAD_CONTINUE]]
@@ -143,7 +147,7 @@ define i32 @FOR_used_outside(ptr noalias %A, ptr noalias %B, i64 %n) {
; VF2IC2-NEXT: [[TMP28:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0
; VF2IC2-NEXT: br i1 [[TMP28]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; VF2IC2: [[PRED_STORE_IF]]:
-; VF2IC2-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[TMP3]]
+; VF2IC2-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[INDEX]]
; VF2IC2-NEXT: [[TMP30:%.*]] = extractelement <2 x i32> [[TMP26]], i32 0
; VF2IC2-NEXT: [[TMP31:%.*]] = extractelement <2 x i32> [[TMP15]], i32 0
; VF2IC2-NEXT: [[TMP32:%.*]] = add nsw i32 [[TMP30]], [[TMP31]]
@@ -314,13 +318,14 @@ define i32 @FOR_next_used_outside(ptr noalias %A, ptr noalias %B, i64 %n) {
; VF2IC1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE4:.*]] ]
; VF2IC1-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE4]] ]
; VF2IC1-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ <i32 poison, i32 33>, %[[VECTOR_PH]] ], [ [[TMP12:%.*]], %[[PRED_STORE_CONTINUE4]] ]
-; VF2IC1-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0
; VF2IC1-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 1
+; VF2IC1-NEXT: [[TMP1:%.*]] = insertelement <2 x i64> poison, i64 [[INDEX]], i32 0
+; VF2IC1-NEXT: [[TMP5:%.*]] = insertelement <2 x i64> [[TMP1]], i64 [[TMP4]], i32 1
; VF2IC1-NEXT: [[TMP2:%.*]] = icmp ule <2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
; VF2IC1-NEXT: [[TMP3:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0
; VF2IC1-NEXT: br i1 [[TMP3]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
; VF2IC1: [[PRED_LOAD_IF]]:
-; VF2IC1-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[TMP1]]
+; VF2IC1-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[INDEX]]
; VF2IC1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4
; VF2IC1-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> poison, i32 [[TMP10]], i32 0
; VF2IC1-NEXT: br label %[[PRED_LOAD_CONTINUE]]
@@ -339,7 +344,7 @@ define i32 @FOR_next_used_outside(ptr noalias %A, ptr noalias %B, i64 %n) {
; VF2IC1-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0
; VF2IC1-NEXT: br i1 [[TMP14]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; VF2IC1: [[PRED_STORE_IF]]:
-; VF2IC1-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[TMP1]]
+; VF2IC1-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[INDEX]]
; VF2IC1-NEXT: [[TMP16:%.*]] = extractelement <2 x i32> [[TMP13]], i32 0
; VF2IC1-NEXT: [[TMP17:%.*]] = extractelement <2 x i32> [[TMP12]], i32 0
; VF2IC1-NEXT: [[TMP18:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
@@ -386,16 +391,19 @@ define i32 @FOR_next_used_outside(ptr noalias %A, ptr noalias %B, i64 %n) {
; VF2IC2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE12]] ]
; VF2IC2-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ <i32 poison, i32 33>, %[[VECTOR_PH]] ], [ [[TMP25:%.*]], %[[PRED_STORE_CONTINUE12]] ]
; VF2IC2-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
-; VF2IC2-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0
; VF2IC2-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
+; VF2IC2-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> poison, i64 [[INDEX]], i32 0
+; VF2IC2-NEXT: [[TMP8:%.*]] = insertelement <2 x i64> [[TMP3]], i64 [[TMP1]], i32 1
; VF2IC2-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
; VF2IC2-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 3
+; VF2IC2-NEXT: [[TMP65:%.*]] = insertelement <2 x i64> poison, i64 [[TMP2]], i32 0
+; VF2IC2-NEXT: [[TMP66:%.*]] = insertelement <2 x i64> [[TMP65]], i64 [[TMP7]], i32 1
; VF2IC2-NEXT: [[TMP4:%.*]] = icmp ule <2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
; VF2IC2-NEXT: [[TMP5:%.*]] = icmp ule <2 x i64> [[STEP_ADD]], [[BROADCAST_SPLAT]]
; VF2IC2-NEXT: [[TMP6:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0
; VF2IC2-NEXT: br i1 [[TMP6]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
; VF2IC2: [[PRED_LOAD_IF]]:
-; VF2IC2-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[TMP3]]
+; VF2IC2-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[INDEX]]
; VF2IC2-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
; VF2IC2-NEXT: [[TMP9:%.*]] = insertelement <2 x i32> poison, i32 [[TMP23]], i32 0
; VF2IC2-NEXT: br label %[[PRED_LOAD_CONTINUE]]
@@ -433,7 +441,7 @@ define i32 @FOR_next_used_outside(ptr noalias %A, ptr noalias %B, i64 %n) {
; VF2IC2-NEXT: [[TMP28:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0
; VF2IC2-NEXT: br i1 [[TMP28]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; VF2IC2: [[PRED_STORE_IF]]:
-; VF2IC2-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[TMP3]]
+; VF2IC2-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[INDEX]]
; VF2IC2-NEXT: [[TMP30:%.*]] = extractelement <2 x i32> [[TMP26]], i32 0
; VF2IC2-NEXT: [[TMP31:%.*]] = extractelement <2 x i32> [[TMP15]], i32 0
; VF2IC2-NEXT: [[TMP32:%.*]] = add nsw i32 [[TMP30]], [[TMP31]]
@@ -597,13 +605,14 @@ define i32 @FOR_and_next_used_outside(ptr noalias %A, ptr noalias %B, i64 %n) {
; VF2IC1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE4:.*]] ]
; VF2IC1-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE4]] ]
; VF2IC1-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ <i32 poison, i32 33>, %[[VECTOR_PH]] ], [ [[TMP12:%.*]], %[[PRED_STORE_CONTINUE4]] ]
-; VF2IC1-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0
; VF2IC1-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 1
+; VF2IC1-NEXT: [[TMP1:%.*]] = insertelement <2 x i64> poison, i64 [[INDEX]], i32 0
+; VF2IC1-NEXT: [[TMP5:%.*]] = insertelement <2 x i64> [[TMP1]], i64 [[TMP4]], i32 1
; VF2IC1-NEXT: [[TMP2:%.*]] = icmp ule <2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
; VF2IC1-NEXT: [[TMP3:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0
; VF2IC1-NEXT: br i1 [[TMP3]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
; VF2IC1: [[PRED_LOAD_IF]]:
-; VF2IC1-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[TMP1]]
+; VF2IC1-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[INDEX]]
; VF2IC1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4
; VF2IC1-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> poison, i32 [[TMP10]], i32 0
; VF2IC1-NEXT: br label %[[PRED_LOAD_CONTINUE]]
@@ -622,7 +631,7 @@ define i32 @FOR_and_next_used_outside(ptr noalias %A, ptr noalias %B, i64 %n) {
; VF2IC1-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0
; VF2IC1-NEXT: br i1 [[TMP14]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; VF2IC1: [[PRED_STORE_IF]]:
-; VF2IC1-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[TMP1]]
+; VF2IC1-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[INDEX]]
; VF2IC1-NEXT: [[TMP16:%.*]] = extractelement <2 x i32> [[TMP13]], i32 0
; VF2IC1-NEXT: [[TMP17:%.*]] = extractelement <2 x i32> [[TMP12]], i32 0
; VF2IC1-NEXT: [[TMP18:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
@@ -675,16 +684,19 @@ define i32 @FOR_and_next_used_outside(ptr noalias %A, ptr noalias %B, i64 %n) {
; VF2IC2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE12]] ]
; VF2IC2-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ <i32 poison, i32 33>, %[[VECTOR_PH]] ], [ [[TMP25:%.*]], %[[PRED_STORE_CONTINUE12]] ]
; VF2IC2-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
-; VF2IC2-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0
; VF2IC2-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
+; VF2IC2-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> poison, i64 [[INDEX]], i32 0
+; VF2IC2-NEXT: [[TMP8:%.*]] = insertelement <2 x i64> [[TMP3]], i64 [[TMP1]], i32 1
; VF2IC2-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
; VF2IC2-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 3
+; VF2IC2-NEXT: [[TMP74:%.*]] = insertelement <2 x i64> poison, i64 [[TMP2]], i32 0
+; VF2IC2-NEXT: [[TMP75:%.*]] = insertelement <2 x i64> [[TMP74]], i64 [[TMP7]], i32 1
; VF2IC2-NEXT: [[TMP4:%.*]] = icmp ule <2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
; VF2IC2-NEXT: [[TMP5:%.*]] = icmp ule <2 x i64> [[STEP_ADD]], [[BROADCAST_SPLAT]]
; VF2IC2-NEXT: [[TMP6:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0
; VF2IC2-NEXT: br i1 [[TMP6]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
; VF2IC2: [[PRED_LOAD_IF]]:
-; VF2IC2-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[TMP3]]
+; VF2IC2-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[INDEX]]
; VF2IC2-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
; VF2IC2-NEXT: [[TMP9:%.*]] = insertelement <2 x i32> poison, i32 [[TMP23]], i32 0
; VF2IC2-NEXT: br label %[[PRED_LOAD_CONTINUE]]
@@ -722,7 +734,7 @@ define i32 @FOR_and_next_used_outside(ptr noalias %A, ptr noalias %B, i64 %n) {
; VF2IC2-NEXT: [[TMP28:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0
; VF2IC2-NEXT: br i1 [[TMP28]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; VF2IC2: [[PRED_STORE_IF]]:
-; VF2IC2-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[TMP3]]
+; VF2IC2-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[INDEX]]
; VF2IC2-NEXT: [[TMP30:%.*]] = extractelement <2 x i32> [[TMP26]], i32 0
; VF2IC2-NEXT: [[TMP31:%.*]] = extractelement <2 x i32> [[TMP15]], i32 0
; VF2IC2-NEXT: [[TMP32:%.*]] = add nsw i32 [[TMP30]], [[TMP31]]
diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll
index 063f47ce2b32d..82b3ae5eaf5d4 100644
--- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll
+++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll
@@ -1041,8 +1041,6 @@ define i32 @PR30183(i32 %pre_load, ptr %a, ptr %b, i64 %n) {
; UNROLL-NO-IC: vector.body:
; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
-; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 8
-; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 10
; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 12
; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 14
; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = add nuw nsw i64 [[TMP5]], 2
@@ -1132,8 +1130,6 @@ define i32 @PR30183(i32 %pre_load, ptr %a, ptr %b, i64 %n) {
; SINK-AFTER: vector.body:
; SINK-AFTER-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; SINK-AFTER-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
-; SINK-AFTER-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 0
-; SINK-AFTER-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 2
; SINK-AFTER-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 4
; SINK-AFTER-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 6
; SINK-AFTER-NEXT: [[TMP9:%.*]] = add nuw nsw i64 [[TMP5]], 2
@@ -1423,7 +1419,6 @@ define i32 @PR33613(ptr %b, double %j, i32 %d, i32 %n) {
; UNROLL-NO-IC-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP41:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x double> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[TMP31:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 200
-; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 200
; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 400
; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 600
@@ -1431,7 +1426,7 @@ define i32 @PR33613(ptr %b, double %j, i32 %d, i32 %n) {
; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 1000
; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 1200
; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 1400
-; UNROLL-NO-IC-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP0]]
+; UNROLL-NO-IC-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[B]], i64 [[OFFSET_IDX]]
; UNROLL-NO-IC-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]]
; UNROLL-NO-IC-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP2]]
; UNROLL-NO-IC-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP3]]
@@ -1595,11 +1590,10 @@ define i32 @PR33613(ptr %b, double %j, i32 %d, i32 %n) {
; SINK-AFTER-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP20:%.*]], [[VECTOR_BODY]] ]
; SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x double> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ]
; SINK-AFTER-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 200
-; SINK-AFTER-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
; SINK-AFTER-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 200
; SINK-AFTER-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 400
; SINK-AFTER-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 600
-; SINK-AFTER-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP0]]
+; SINK-AFTER-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[B]], i64 [[OFFSET_IDX]]
; SINK-AFTER-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]]
; SINK-AFTER-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP2]]
; SINK-AFTER-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP3]]
@@ -1898,7 +1892,6 @@ define void @PR34711(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %n) {
; UNROLL-NO-IC: vector.body:
; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[TMP34:%.*]], [[VECTOR_BODY]] ]
-; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
@@ -1906,8 +1899,8 @@ define void @PR34711(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %n) {
; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 5
; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 6
; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 7
-; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[C:%.*]], i64 [[TMP0]]
-; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i16], ptr [[A]], i64 [[TMP0]], i64 1
+; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[C:%.*]], i64 [[INDEX]]
+; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i16], ptr [[A]], i64 [[INDEX]], i64 1
; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i16], ptr [[A]], i64 [[TMP1]], i64 1
; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i16], ptr [[A]], i64 [[TMP2]], i64 1
; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i16], ptr [[A]], i64 [[TMP3]], i64 1
@@ -1942,7 +1935,7 @@ define void @PR34711(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %n) {
; UNROLL-NO-IC-NEXT: [[TMP40:%.*]] = sext <4 x i16> [[TMP34]] to <4 x i32>
; UNROLL-NO-IC-NEXT: [[TMP41:%.*]] = mul nsw <4 x i32> [[TMP39]], [[TMP37]]
; UNROLL-NO-IC-NEXT: [[TMP42:%.*]] = mul nsw <4 x i32> [[TMP40]], [[TMP38]]
-; UNROLL-NO-IC-NEXT: [[TMP43:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP0]]
+; UNROLL-NO-IC-NEXT: [[TMP43:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]]
; UNROLL-NO-IC-NEXT: [[TMP45:%.*]] = getelementptr inbounds i32, ptr [[TMP43]], i64 4
; UNROLL-NO-IC-NEXT: store <4 x i32> [[TMP41]], ptr [[TMP43]], align 4
; UNROLL-NO-IC-NEXT: store <4 x i32> [[TMP42]], ptr [[TMP45]], align 4
@@ -2046,12 +2039,11 @@ define void @PR34711(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %n) {
; SINK-AFTER: vector.body:
; SINK-AFTER-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ]
-; SINK-AFTER-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; SINK-AFTER-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
; SINK-AFTER-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
; SINK-AFTER-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
-; SINK-AFTER-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[C:%.*]], i64 [[TMP0]]
-; SINK-AFTER-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x i16], ptr [[A]], i64 [[TMP0]], i64 1
+; SINK-AFTER-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[C:%.*]], i64 [[INDEX]]
+; SINK-AFTER-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x i16], ptr [[A]], i64 [[INDEX]], i64 1
; SINK-AFTER-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i16], ptr [[A]], i64 [[TMP1]], i64 1
; SINK-AFTER-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x i16], ptr [[A]], i64 [[TMP2]], i64 1
; SINK-AFTER-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i16], ptr [[A]], i64 [[TMP3]], i64 1
@@ -2068,7 +2060,7 @@ define void @PR34711(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %n) {
; SINK-AFTER-NEXT: [[TMP19:%.*]] = sext <4 x i16> [[TMP18]] to <4 x i32>
; SINK-AFTER-NEXT: [[TMP20:%.*]] = sext <4 x i16> [[TMP17]] to <4 x i32>
; SINK-AFTER-NEXT: [[TMP21:%.*]] = mul nsw <4 x i32> [[TMP20]], [[TMP19]]
-; SINK-AFTER-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP0]]
+; SINK-AFTER-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]]
; SINK-AFTER-NEXT: store <4 x i32> [[TMP21]], ptr [[TMP22]], align 4
; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; SINK-AFTER-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
@@ -2861,20 +2853,27 @@ define i32 @sink_into_replication_region_multiple(ptr %x, i32 %y) {
; UNROLL-NO-IC-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP47:%.*]], [[PRED_STORE_CONTINUE29]] ]
; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
; UNROLL-NO-IC-NEXT: [[OFFSET_IDX:%.*]] = sub i32 [[Y]], [[INDEX]]
-; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add i32 [[OFFSET_IDX]], 0
; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = add i32 [[OFFSET_IDX]], -1
; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = add i32 [[OFFSET_IDX]], -2
; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = add i32 [[OFFSET_IDX]], -3
+; UNROLL-NO-IC-NEXT: [[TMP76:%.*]] = insertelement <4 x i32> poison, i32 [[OFFSET_IDX]], i32 0
+; UNROLL-NO-IC-NEXT: [[TMP77:%.*]] = insertelement <4 x i32> [[TMP76]], i32 [[TMP3]], i32 1
+; UNROLL-NO-IC-NEXT: [[TMP78:%.*]] = insertelement <4 x i32> [[TMP77]], i32 [[TMP4]], i32 2
+; UNROLL-NO-IC-NEXT: [[TMP79:%.*]] = insertelement <4 x i32> [[TMP78]], i32 [[TMP5]], i32 3
; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = add i32 [[OFFSET_IDX]], -4
; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = add i32 [[OFFSET_IDX]], -5
; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = add i32 [[OFFSET_IDX]], -6
; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = add i32 [[OFFSET_IDX]], -7
+; UNROLL-NO-IC-NEXT: [[TMP80:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i32 0
+; UNROLL-NO-IC-NEXT: [[TMP81:%.*]] = insertelement <4 x i32> [[TMP80]], i32 [[TMP7]], i32 1
+; UNROLL-NO-IC-NEXT: [[TMP82:%.*]] = insertelement <4 x i32> [[TMP81]], i32 [[TMP8]], i32 2
+; UNROLL-NO-IC-NEXT: [[TMP83:%.*]] = insertelement <4 x i32> [[TMP82]], i32 [[TMP9]], i32 3
; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = icmp ule <4 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = icmp ule <4 x i32> [[STEP_ADD]], [[BROADCAST_SPLAT]]
; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = extractelement <4 x i1> [[TMP10]], i32 0
; UNROLL-NO-IC-NEXT: br i1 [[TMP12]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]]
; UNROLL-NO-IC: pred.udiv.if:
-; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = udiv i32 219220132, [[TMP2]]
+; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = udiv i32 219220132, [[OFFSET_IDX]]
; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = insertelement <4 x i32> poison, i32 [[TMP13]], i32 0
; UNROLL-NO-IC-NEXT: br label [[PRED_UDIV_CONTINUE]]
; UNROLL-NO-IC: pred.udiv.continue:
@@ -2944,7 +2943,7 @@ define i32 @sink_into_replication_region_multiple(ptr %x, i32 %y) {
; UNROLL-NO-IC: pred.store.if:
; UNROLL-NO-IC-NEXT: [[TMP49:%.*]] = add i32 [[INDEX]], 0
; UNROLL-NO-IC-NEXT: [[TMP50:%.*]] = getelementptr inbounds i32, ptr [[X:%.*]], i32 [[TMP49]]
-; UNROLL-NO-IC-NEXT: store i32 [[TMP2]], ptr [[TMP50]], align 4
+; UNROLL-NO-IC-NEXT: store i32 [[OFFSET_IDX]], ptr [[TMP50]], align 4
; UNROLL-NO-IC-NEXT: br label [[PRED_STORE_CONTINUE]]
; UNROLL-NO-IC: pred.store.continue:
; UNROLL-NO-IC-NEXT: [[TMP51:%.*]] = extractelement <4 x i1> [[TMP10]], i32 1
@@ -3097,15 +3096,18 @@ define i32 @sink_into_replication_region_multiple(ptr %x, i32 %y) {
; SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP22:%.*]], [[PRED_STORE_CONTINUE12]] ]
; SINK-AFTER-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP24:%.*]], [[PRED_STORE_CONTINUE12]] ]
; SINK-AFTER-NEXT: [[OFFSET_IDX:%.*]] = sub i32 [[Y]], [[INDEX]]
-; SINK-AFTER-NEXT: [[TMP2:%.*]] = add i32 [[OFFSET_IDX]], 0
; SINK-AFTER-NEXT: [[TMP3:%.*]] = add i32 [[OFFSET_IDX]], -1
; SINK-AFTER-NEXT: [[TMP4:%.*]] = add i32 [[OFFSET_IDX]], -2
; SINK-AFTER-NEXT: [[TMP5:%.*]] = add i32 [[OFFSET_IDX]], -3
+; SINK-AFTER-NEXT: [[TMP40:%.*]] = insertelement <4 x i32> poison, i32 [[OFFSET_IDX]], i32 0
+; SINK-AFTER-NEXT: [[TMP41:%.*]] = insertelement <4 x i32> [[TMP40]], i32 [[TMP3]], i32 1
+; SINK-AFTER-NEXT: [[TMP42:%.*]] = insertelement <4 x i32> [[TMP41]], i32 [[TMP4]], i32 2
+; SINK-AFTER-NEXT: [[TMP43:%.*]] = insertelement <4 x i32> [[TMP42]], i32 [[TMP5]], i32 3
; SINK-AFTER-NEXT: [[TMP6:%.*]] = icmp ule <4 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
; SINK-AFTER-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP6]], i32 0
; SINK-AFTER-NEXT: br i1 [[TMP7]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]]
; SINK-AFTER: pred.udiv.if:
-; SINK-AFTER-NEXT: [[TMP8:%.*]] = udiv i32 219220132, [[TMP2]]
+; SINK-AFTER-NEXT: [[TMP8:%.*]] = udiv i32 219220132, [[OFFSET_IDX]]
; SINK-AFTER-NEXT: [[TMP9:%.*]] = insertelement <4 x i32> poison, i32 [[TMP8]], i32 0
; SINK-AFTER-NEXT: br label [[PRED_UDIV_CONTINUE]]
; SINK-AFTER: pred.udiv.continue:
@@ -3141,7 +3143,7 @@ define i32 @sink_into_replication_region_multiple(ptr %x, i32 %y) {
; SINK-AFTER: pred.store.if:
; SINK-AFTER-NEXT: [[TMP26:%.*]] = add i32 [[INDEX]], 0
; SINK-AFTER-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[X:%.*]], i32 [[TMP26]]
-; SINK-AFTER-NEXT: store i32 [[TMP2]], ptr [[TMP27]], align 4
+; SINK-AFTER-NEXT: store i32 [[OFFSET_IDX]], ptr [[TMP27]], align 4
; SINK-AFTER-NEXT: br label [[PRED_STORE_CONTINUE]]
; SINK-AFTER: pred.store.continue:
; SINK-AFTER-NEXT: [[TMP28:%.*]] = extractelement <4 x i1> [[TMP6]], i32 1
diff --git a/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads-with-predicated-stores.ll b/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads-with-predicated-stores.ll
index cdbe9bb555834..d95fb869bb1b6 100644
--- a/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads-with-predicated-stores.ll
+++ b/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads-with-predicated-stores.ll
@@ -22,24 +22,23 @@ define void @test_stores_noalias_via_rt_checks_after_loads(ptr %dst, ptr %src, p
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0
; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1
-; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP4]]
-; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 4, !alias.scope [[META0:![0-9]+]]
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[INDEX]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP1]], align 4, !alias.scope [[META0:![0-9]+]]
; CHECK-NEXT: [[TMP7:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11)
-; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP4]]
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[INDEX]]
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP5]]
-; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP10]], align 4, !alias.scope [[META3:![0-9]+]]
+; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP3]], align 4, !alias.scope [[META3:![0-9]+]]
; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4, !alias.scope [[META3]]
; CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x i32> poison, i32 [[TMP9]], i32 0
; CHECK-NEXT: [[TMP17:%.*]] = insertelement <2 x i32> [[TMP13]], i32 [[TMP16]], i32 1
; CHECK-NEXT: [[TMP19:%.*]] = sub <2 x i32> [[TMP17]], splat (i32 5)
; CHECK-NEXT: [[TMP36:%.*]] = add <2 x i32> [[TMP17]], splat (i32 10)
-; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]]
+; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]]
; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP5]]
; CHECK-NEXT: [[TMP14:%.*]] = select <2 x i1> [[TMP7]], <2 x i32> [[TMP36]], <2 x i32> [[TMP19]]
; CHECK-NEXT: [[TMP18:%.*]] = extractelement <2 x i32> [[TMP14]], i32 0
-; CHECK-NEXT: store i32 [[TMP18]], ptr [[TMP21]], align 4, !alias.scope [[META5:![0-9]+]], !noalias [[META7:![0-9]+]]
+; CHECK-NEXT: store i32 [[TMP18]], ptr [[TMP11]], align 4, !alias.scope [[META5:![0-9]+]], !noalias [[META7:![0-9]+]]
; CHECK-NEXT: [[TMP20:%.*]] = extractelement <2 x i32> [[TMP14]], i32 1
; CHECK-NEXT: store i32 [[TMP20]], ptr [[TMP24]], align 4, !alias.scope [[META5]], !noalias [[META7]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
@@ -109,16 +108,17 @@ define void @test_aliasing_store(ptr %dst, ptr %src, ptr %cond) {
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE15:.*]] ]
-; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0
; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1
-; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP4]]
-; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 4, !alias.scope [[META12:![0-9]+]]
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[INDEX]], i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[TMP5]], i32 1
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[INDEX]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP3]], align 4, !alias.scope [[META12:![0-9]+]]
; CHECK-NEXT: [[TMP7:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11)
; CHECK-NEXT: [[TMP8:%.*]] = xor <2 x i1> [[TMP7]], splat (i1 true)
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0
; CHECK-NEXT: br i1 [[TMP9]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
; CHECK: [[PRED_LOAD_IF]]:
-; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP4]]
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[INDEX]]
; CHECK-NEXT: store i32 99, ptr [[TMP10]], align 4, !alias.scope [[META15:![0-9]+]], !noalias [[META17:![0-9]+]]
; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4, !alias.scope [[META15]], !noalias [[META17]]
; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x i32> poison, i32 [[TMP11]], i32 0
@@ -139,8 +139,8 @@ define void @test_aliasing_store(ptr %dst, ptr %src, ptr %cond) {
; CHECK-NEXT: [[TMP26:%.*]] = extractelement <2 x i1> [[TMP7]], i32 0
; CHECK-NEXT: br i1 [[TMP26]], label %[[PRED_LOAD_IF12:.*]], label %[[PRED_LOAD_CONTINUE13:.*]]
; CHECK: [[PRED_LOAD_IF12]]:
-; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP4]]
-; CHECK-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4, !alias.scope [[META15]], !noalias [[META17]]
+; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[INDEX]]
+; CHECK-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP20]], align 4, !alias.scope [[META15]], !noalias [[META17]]
; CHECK-NEXT: [[TMP29:%.*]] = insertelement <2 x i32> poison, i32 [[TMP28]], i32 0
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE13]]
; CHECK: [[PRED_LOAD_CONTINUE13]]:
@@ -155,7 +155,7 @@ define void @test_aliasing_store(ptr %dst, ptr %src, ptr %cond) {
; CHECK: [[PRED_LOAD_CONTINUE15]]:
; CHECK-NEXT: [[TMP35:%.*]] = phi <2 x i32> [ [[TMP30]], %[[PRED_LOAD_CONTINUE13]] ], [ [[TMP34]], %[[PRED_LOAD_IF14]] ]
; CHECK-NEXT: [[TMP36:%.*]] = add <2 x i32> [[TMP35]], splat (i32 10)
-; CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]]
+; CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]]
; CHECK-NEXT: [[TMP41:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP5]]
; CHECK-NEXT: [[TMP37:%.*]] = select <2 x i1> [[TMP7]], <2 x i32> [[TMP36]], <2 x i32> [[TMP19]]
; CHECK-NEXT: [[TMP38:%.*]] = extractelement <2 x i32> [[TMP37]], i32 0
@@ -239,16 +239,17 @@ define void @test_noalias_store_via_runtime_checks(ptr %dst, ptr %dst.1, ptr %sr
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE20:.*]] ]
-; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0
; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1
-; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP4]]
-; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 4, !alias.scope [[META22:![0-9]+]]
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[INDEX]], i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[TMP5]], i32 1
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[INDEX]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP3]], align 4, !alias.scope [[META22:![0-9]+]]
; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt <2 x i32> [[WIDE_LOAD]], splat (i32 11)
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0
; CHECK-NEXT: br i1 [[TMP9]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; CHECK: [[PRED_STORE_IF]]:
-; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[DST_1]], i32 [[TMP4]]
-; CHECK-NEXT: store i32 10, ptr [[TMP10]], align 4, !alias.scope [[META25:![0-9]+]], !noalias [[META27:![0-9]+]]
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[DST_1]], i32 [[INDEX]]
+; CHECK-NEXT: store i32 10, ptr [[TMP6]], align 4, !alias.scope [[META25:![0-9]+]], !noalias [[META27:![0-9]+]]
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
; CHECK: [[PRED_STORE_CONTINUE]]:
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i1> [[TMP8]], i32 1
@@ -258,15 +259,15 @@ define void @test_noalias_store_via_runtime_checks(ptr %dst, ptr %dst.1, ptr %sr
; CHECK-NEXT: store i32 10, ptr [[TMP16]], align 4, !alias.scope [[META25]], !noalias [[META27]]
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE20]]
; CHECK: [[PRED_STORE_CONTINUE20]]:
-; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP4]]
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[INDEX]]
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP5]]
-; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP12]], align 4, !alias.scope [[META30:![0-9]+]]
+; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4, !alias.scope [[META30:![0-9]+]]
; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4, !alias.scope [[META30]]
; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> poison, i32 [[TMP11]], i32 0
; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x i32> [[TMP14]], i32 [[TMP18]], i32 1
; CHECK-NEXT: [[TMP21:%.*]] = sub <2 x i32> [[TMP19]], splat (i32 5)
; CHECK-NEXT: [[TMP38:%.*]] = add <2 x i32> [[TMP19]], splat (i32 10)
-; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]]
+; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]]
; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP5]]
; CHECK-NEXT: [[TMP22:%.*]] = select <2 x i1> [[TMP8]], <2 x i32> [[TMP21]], <2 x i32> [[TMP38]]
; CHECK-NEXT: [[TMP24:%.*]] = extractelement <2 x i32> [[TMP22]], i32 0
@@ -342,16 +343,17 @@ define void @test_memory_op_between_loads_alias(ptr %dst, ptr %src, ptr %cond, p
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE15:.*]] ]
-; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0
; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1
-; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP4]]
-; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 4, !alias.scope [[META35:![0-9]+]]
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[INDEX]], i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[TMP5]], i32 1
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[INDEX]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP3]], align 4, !alias.scope [[META35:![0-9]+]]
; CHECK-NEXT: [[TMP7:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11)
; CHECK-NEXT: [[TMP8:%.*]] = xor <2 x i1> [[TMP7]], splat (i1 true)
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0
; CHECK-NEXT: br i1 [[TMP9]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
; CHECK: [[PRED_LOAD_IF]]:
-; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP4]]
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[INDEX]]
; CHECK-NEXT: store i32 0, ptr [[TMP10]], align 4, !alias.scope [[META38:![0-9]+]], !noalias [[META40:![0-9]+]]
; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4, !alias.scope [[META38]], !noalias [[META40]]
; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x i32> poison, i32 [[TMP11]], i32 0
@@ -372,8 +374,8 @@ define void @test_memory_op_between_loads_alias(ptr %dst, ptr %src, ptr %cond, p
; CHECK-NEXT: [[TMP26:%.*]] = extractelement <2 x i1> [[TMP7]], i32 0
; CHECK-NEXT: br i1 [[TMP26]], label %[[PRED_LOAD_IF12:.*]], label %[[PRED_LOAD_CONTINUE13:.*]]
; CHECK: [[PRED_LOAD_IF12]]:
-; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP4]]
-; CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP27]], align 4, !alias.scope [[META38]], !noalias [[META40]]
+; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[INDEX]]
+; CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP21]], align 4, !alias.scope [[META38]], !noalias [[META40]]
; CHECK-NEXT: [[TMP23:%.*]] = insertelement <2 x i32> poison, i32 [[TMP20]], i32 0
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE13]]
; CHECK: [[PRED_LOAD_CONTINUE13]]:
@@ -387,11 +389,11 @@ define void @test_memory_op_between_loads_alias(ptr %dst, ptr %src, ptr %cond, p
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE15]]
; CHECK: [[PRED_LOAD_CONTINUE15]]:
; CHECK-NEXT: [[TMP33:%.*]] = phi <2 x i32> [ [[TMP22]], %[[PRED_LOAD_CONTINUE13]] ], [ [[TMP32]], %[[PRED_LOAD_IF14]] ]
-; CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]]
+; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]]
; CHECK-NEXT: [[TMP37:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP5]]
; CHECK-NEXT: [[TMP28:%.*]] = select <2 x i1> [[TMP7]], <2 x i32> [[TMP33]], <2 x i32> [[TMP19]]
; CHECK-NEXT: [[TMP29:%.*]] = extractelement <2 x i32> [[TMP28]], i32 0
-; CHECK-NEXT: store i32 [[TMP29]], ptr [[TMP36]], align 4, !alias.scope [[META42:![0-9]+]], !noalias [[META35]]
+; CHECK-NEXT: store i32 [[TMP29]], ptr [[TMP27]], align 4, !alias.scope [[META42:![0-9]+]], !noalias [[META35]]
; CHECK-NEXT: [[TMP35:%.*]] = extractelement <2 x i32> [[TMP28]], i32 1
; CHECK-NEXT: store i32 [[TMP35]], ptr [[TMP37]], align 4, !alias.scope [[META42]], !noalias [[META35]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
@@ -474,16 +476,17 @@ define void @test_memory_op_between_loads_no_alias_via_rt_checks(ptr %dst, ptr %
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE20:.*]] ]
-; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0
; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1
-; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP4]]
-; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 4, !alias.scope [[META45:![0-9]+]]
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[INDEX]], i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[TMP5]], i32 1
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[INDEX]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP3]], align 4, !alias.scope [[META45:![0-9]+]]
; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt <2 x i32> [[WIDE_LOAD]], splat (i32 11)
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0
; CHECK-NEXT: br i1 [[TMP9]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; CHECK: [[PRED_STORE_IF]]:
-; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[DST_1]], i32 [[TMP4]]
-; CHECK-NEXT: store i32 0, ptr [[TMP10]], align 4, !alias.scope [[META48:![0-9]+]], !noalias [[META50:![0-9]+]]
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[DST_1]], i32 [[INDEX]]
+; CHECK-NEXT: store i32 0, ptr [[TMP6]], align 4, !alias.scope [[META48:![0-9]+]], !noalias [[META50:![0-9]+]]
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
; CHECK: [[PRED_STORE_CONTINUE]]:
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i1> [[TMP8]], i32 1
@@ -493,14 +496,14 @@ define void @test_memory_op_between_loads_no_alias_via_rt_checks(ptr %dst, ptr %
; CHECK-NEXT: store i32 0, ptr [[TMP16]], align 4, !alias.scope [[META48]], !noalias [[META50]]
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE20]]
; CHECK: [[PRED_STORE_CONTINUE20]]:
-; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP4]]
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[INDEX]]
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP5]]
-; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP12]], align 4, !alias.scope [[META53:![0-9]+]]
+; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4, !alias.scope [[META53:![0-9]+]]
; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4, !alias.scope [[META53]]
; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> poison, i32 [[TMP11]], i32 0
; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x i32> [[TMP14]], i32 [[TMP18]], i32 1
; CHECK-NEXT: [[TMP21:%.*]] = add <2 x i32> [[TMP19]], splat (i32 10)
-; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]]
+; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]]
; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP5]]
; CHECK-NEXT: [[TMP20:%.*]] = select <2 x i1> [[TMP8]], <2 x i32> [[TMP21]], <2 x i32> [[TMP19]]
; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x i32> [[TMP20]], i32 0
@@ -576,16 +579,17 @@ define void @test_stores_not_sunk_due_to_aliasing_load(ptr %dst, ptr %alias, ptr
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE7:.*]] ]
-; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0
; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1
-; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP4]]
-; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 4, !alias.scope [[META58:![0-9]+]]
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[INDEX]], i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[TMP5]], i32 1
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[INDEX]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP3]], align 4, !alias.scope [[META58:![0-9]+]]
; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt <2 x i32> [[WIDE_LOAD]], splat (i32 11)
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP7]], i32 0
; CHECK-NEXT: br i1 [[TMP8]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
; CHECK: [[PRED_LOAD_IF]]:
-; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[ALIAS]], i32 [[TMP4]]
-; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !alias.scope [[META61:![0-9]+]]
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[ALIAS]], i32 [[INDEX]]
+; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP6]], align 4, !alias.scope [[META61:![0-9]+]]
; CHECK-NEXT: [[TMP15:%.*]] = insertelement <2 x i32> poison, i32 [[TMP10]], i32 0
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]]
; CHECK: [[PRED_LOAD_CONTINUE]]:
@@ -599,7 +603,7 @@ define void @test_stores_not_sunk_due_to_aliasing_load(ptr %dst, ptr %alias, ptr
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE7]]
; CHECK: [[PRED_LOAD_CONTINUE7]]:
; CHECK-NEXT: [[TMP22:%.*]] = phi <2 x i32> [ [[TMP20]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP12]], %[[PRED_LOAD_IF6]] ]
-; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]]
+; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]]
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP5]]
; CHECK-NEXT: [[TMP16:%.*]] = select <2 x i1> [[TMP7]], <2 x i32> [[TMP22]], <2 x i32> splat (i32 10)
; CHECK-NEXT: [[TMP17:%.*]] = extractelement <2 x i32> [[TMP16]], i32 0
@@ -670,20 +674,21 @@ define void @test_stores_not_sunk_aliasing_load_between(ptr %dst, ptr %mid, ptr
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE15:.*]] ]
-; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 1
-; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP0]]
-; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4, !alias.scope [[META68:![0-9]+]]
+; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> poison, i32 [[INDEX]], i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP6]], i32 [[TMP1]], i32 1
+; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[INDEX]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP11]], align 4, !alias.scope [[META68:![0-9]+]]
; CHECK-NEXT: [[TMP3:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11)
; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[TMP3]], splat (i1 true)
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0
; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; CHECK: [[PRED_STORE_IF]]:
-; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP0]]
-; CHECK-NEXT: store i32 20, ptr [[TMP6]], align 4, !alias.scope [[META71:![0-9]+]], !noalias [[META73:![0-9]+]]
-; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4, !alias.scope [[META71]], !noalias [[META73]]
-; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[MID]], i32 [[TMP0]]
-; CHECK-NEXT: store i32 [[TMP7]], ptr [[TMP8]], align 4, !alias.scope [[META75:![0-9]+]], !noalias [[META68]]
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]]
+; CHECK-NEXT: store i32 20, ptr [[TMP7]], align 4, !alias.scope [[META71:![0-9]+]], !noalias [[META73:![0-9]+]]
+; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !alias.scope [[META71]], !noalias [[META73]]
+; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[MID]], i32 [[INDEX]]
+; CHECK-NEXT: store i32 [[TMP8]], ptr [[TMP14]], align 4, !alias.scope [[META75:![0-9]+]], !noalias [[META68]]
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
; CHECK: [[PRED_STORE_CONTINUE]]:
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[TMP4]], i32 1
@@ -691,16 +696,16 @@ define void @test_stores_not_sunk_aliasing_load_between(ptr %dst, ptr %mid, ptr
; CHECK: [[PRED_STORE_IF10]]:
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP1]]
; CHECK-NEXT: store i32 20, ptr [[TMP10]], align 4, !alias.scope [[META71]], !noalias [[META73]]
-; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4, !alias.scope [[META71]], !noalias [[META73]]
+; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP10]], align 4, !alias.scope [[META71]], !noalias [[META73]]
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[MID]], i32 [[TMP1]]
-; CHECK-NEXT: store i32 [[TMP11]], ptr [[TMP12]], align 4, !alias.scope [[META75]], !noalias [[META68]]
+; CHECK-NEXT: store i32 [[TMP18]], ptr [[TMP12]], align 4, !alias.scope [[META75]], !noalias [[META68]]
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE11]]
; CHECK: [[PRED_STORE_CONTINUE11]]:
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0
; CHECK-NEXT: br i1 [[TMP13]], label %[[PRED_STORE_IF12:.*]], label %[[PRED_STORE_CONTINUE13:.*]]
; CHECK: [[PRED_STORE_IF12]]:
-; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP0]]
-; CHECK-NEXT: store i32 10, ptr [[TMP14]], align 4, !alias.scope [[META71]], !noalias [[META73]]
+; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]]
+; CHECK-NEXT: store i32 10, ptr [[TMP19]], align 4, !alias.scope [[META71]], !noalias [[META73]]
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE13]]
; CHECK: [[PRED_STORE_CONTINUE13]]:
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i1> [[TMP3]], i32 1
@@ -766,9 +771,10 @@ define void @sink_multiple_store_groups_noalias_via_scev(ptr %dst, ptr %src) {
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE7:.*]] ]
; CHECK-NEXT: [[INDEX:%.*]] = mul i64 [[INDEX1]], 16
-; CHECK-NEXT: [[IV:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[INDEX]], 16
-; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr double, ptr [[SRC]], i64 [[IV]]
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i64> poison, i64 [[INDEX]], i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i64> [[TMP1]], i64 [[TMP17]], i32 1
+; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr double, ptr [[SRC]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr double, ptr [[SRC]], i64 [[TMP17]]
; CHECK-NEXT: [[GEP_FLAG:%.*]] = getelementptr i8, ptr [[GEP_SRC]], i64 152
; CHECK-NEXT: [[TMP26:%.*]] = getelementptr i8, ptr [[TMP22]], i64 152
@@ -786,9 +792,9 @@ define void @sink_multiple_store_groups_noalias_via_scev(ptr %dst, ptr %src) {
; CHECK-NEXT: [[TMP24:%.*]] = extractelement <2 x i1> [[TMP16]], i32 0
; CHECK-NEXT: br i1 [[TMP24]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; CHECK: [[PRED_STORE_IF]]:
-; CHECK-NEXT: [[TMP18:%.*]] = getelementptr double, ptr [[DST]], i64 [[IV]]
+; CHECK-NEXT: [[TMP35:%.*]] = getelementptr double, ptr [[DST]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP19:%.*]] = extractelement <2 x double> [[TMP34]], i32 0
-; CHECK-NEXT: store double [[TMP19]], ptr [[TMP18]], align 8, !alias.scope [[META81:![0-9]+]], !noalias [[META78]]
+; CHECK-NEXT: store double [[TMP19]], ptr [[TMP35]], align 8, !alias.scope [[META81:![0-9]+]], !noalias [[META78]]
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
; CHECK: [[PRED_STORE_CONTINUE]]:
; CHECK-NEXT: [[TMP20:%.*]] = extractelement <2 x i1> [[TMP16]], i32 1
@@ -802,7 +808,7 @@ define void @sink_multiple_store_groups_noalias_via_scev(ptr %dst, ptr %src) {
; CHECK-NEXT: [[TMP23:%.*]] = extractelement <2 x i1> [[TMP10]], i32 0
; CHECK-NEXT: br i1 [[TMP23]], label %[[PRED_STORE_IF4:.*]], label %[[PRED_STORE_CONTINUE5:.*]]
; CHECK: [[PRED_STORE_IF4]]:
-; CHECK-NEXT: [[TMP31:%.*]] = getelementptr double, ptr [[DST]], i64 [[IV]]
+; CHECK-NEXT: [[TMP31:%.*]] = getelementptr double, ptr [[DST]], i64 [[INDEX]]
; CHECK-NEXT: store double [[TMP13]], ptr [[TMP31]], align 8, !alias.scope [[META81]], !noalias [[META78]]
; CHECK-NEXT: [[TMP37:%.*]] = getelementptr i8, ptr [[TMP31]], i64 16
; CHECK-NEXT: store double 1.000000e+01, ptr [[TMP37]], align 8, !alias.scope [[META81]], !noalias [[META78]]
@@ -877,9 +883,10 @@ define void @sink_multiple_store_groups_alias_via_scev(ptr %dst, ptr %src) {
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE7:.*]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 16
-; CHECK-NEXT: [[IV:%.*]] = add i64 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 16
-; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr double, ptr [[SRC]], i64 [[IV]]
+; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i64> poison, i64 [[OFFSET_IDX]], i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i64> [[TMP4]], i64 [[TMP1]], i32 1
+; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr double, ptr [[SRC]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr double, ptr [[SRC]], i64 [[TMP1]]
; CHECK-NEXT: [[GEP_FLAG:%.*]] = getelementptr i8, ptr [[GEP_SRC]], i64 152
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP3]], i64 152
@@ -897,7 +904,7 @@ define void @sink_multiple_store_groups_alias_via_scev(ptr %dst, ptr %src) {
; CHECK-NEXT: [[TMP36:%.*]] = extractelement <2 x i1> [[TMP17]], i32 0
; CHECK-NEXT: br i1 [[TMP36]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; CHECK: [[PRED_STORE_IF]]:
-; CHECK-NEXT: [[TMP20:%.*]] = getelementptr double, ptr [[DST]], i64 [[IV]]
+; CHECK-NEXT: [[TMP20:%.*]] = getelementptr double, ptr [[DST]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[TMP19:%.*]] = extractelement <2 x double> [[TMP18]], i32 0
; CHECK-NEXT: store double [[TMP19]], ptr [[TMP20]], align 8, !alias.scope [[META88:![0-9]+]], !noalias [[META85]]
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
@@ -913,7 +920,7 @@ define void @sink_multiple_store_groups_alias_via_scev(ptr %dst, ptr %src) {
; CHECK-NEXT: [[TMP42:%.*]] = extractelement <2 x i1> [[TMP12]], i32 0
; CHECK-NEXT: br i1 [[TMP42]], label %[[PRED_STORE_IF4:.*]], label %[[PRED_STORE_CONTINUE5:.*]]
; CHECK: [[PRED_STORE_IF4]]:
-; CHECK-NEXT: [[TMP43:%.*]] = getelementptr double, ptr [[DST]], i64 [[IV]]
+; CHECK-NEXT: [[TMP43:%.*]] = getelementptr double, ptr [[DST]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: store double [[TMP13]], ptr [[TMP43]], align 8, !alias.scope [[META88]], !noalias [[META85]]
; CHECK-NEXT: [[TMP44:%.*]] = getelementptr i8, ptr [[TMP43]], i64 8
; CHECK-NEXT: store double 1.000000e+01, ptr [[TMP44]], align 8, !alias.scope [[META88]], !noalias [[META85]]
@@ -1062,16 +1069,17 @@ define void @test_three_stores_with_different_predicates(ptr %dst, ptr %src, ptr
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE11:.*]] ]
-; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 1
-; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP0]]
-; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4, !alias.scope [[META92:![0-9]+]]
+; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> poison, i32 [[INDEX]], i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP5]], i32 [[TMP1]], i32 1
+; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[INDEX]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP13]], align 4, !alias.scope [[META92:![0-9]+]]
; CHECK-NEXT: [[TMP3:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11)
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0
; CHECK-NEXT: br i1 [[TMP4]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; CHECK: [[PRED_STORE_IF]]:
-; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP0]]
-; CHECK-NEXT: store i32 1, ptr [[TMP5]], align 4, !alias.scope [[META95:![0-9]+]], !noalias [[META92]]
+; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]]
+; CHECK-NEXT: store i32 1, ptr [[TMP19]], align 4, !alias.scope [[META95:![0-9]+]], !noalias [[META92]]
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
; CHECK: [[PRED_STORE_CONTINUE]]:
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i1> [[TMP3]], i32 1
@@ -1088,8 +1096,8 @@ define void @test_three_stores_with_different_predicates(ptr %dst, ptr %src, ptr
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i1> [[TMP11]], i32 0
; CHECK-NEXT: br i1 [[TMP12]], label %[[PRED_STORE_IF4:.*]], label %[[PRED_STORE_CONTINUE5:.*]]
; CHECK: [[PRED_STORE_IF4]]:
-; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP0]]
-; CHECK-NEXT: store i32 2, ptr [[TMP13]], align 4, !alias.scope [[META95]], !noalias [[META92]]
+; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]]
+; CHECK-NEXT: store i32 2, ptr [[TMP23]], align 4, !alias.scope [[META95]], !noalias [[META92]]
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE5]]
; CHECK: [[PRED_STORE_CONTINUE5]]:
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP11]], i32 1
@@ -1104,8 +1112,8 @@ define void @test_three_stores_with_different_predicates(ptr %dst, ptr %src, ptr
; CHECK-NEXT: [[TMP18:%.*]] = extractelement <2 x i1> [[TMP17]], i32 0
; CHECK-NEXT: br i1 [[TMP18]], label %[[PRED_STORE_IF8:.*]], label %[[PRED_STORE_CONTINUE9:.*]]
; CHECK: [[PRED_STORE_IF8]]:
-; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP0]]
-; CHECK-NEXT: store i32 3, ptr [[TMP19]], align 4, !alias.scope [[META95]], !noalias [[META92]]
+; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]]
+; CHECK-NEXT: store i32 3, ptr [[TMP24]], align 4, !alias.scope [[META95]], !noalias [[META92]]
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE9]]
; CHECK: [[PRED_STORE_CONTINUE9]]:
; CHECK-NEXT: [[TMP20:%.*]] = extractelement <2 x i1> [[TMP17]], i32 1
diff --git a/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads.ll b/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads.ll
index f6dd8564c001b..724a3cb10eb3c 100644
--- a/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads.ll
+++ b/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads.ll
@@ -22,21 +22,20 @@ define void @test(ptr %dst, ptr %src, ptr %cond) {
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0
; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1
-; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP4]]
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[INDEX]]
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP5]]
-; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP4]]
-; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP24]], align 4, !alias.scope [[META0:![0-9]+]]
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[INDEX]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP3]], align 4, !alias.scope [[META0:![0-9]+]]
; CHECK-NEXT: [[TMP15:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11)
-; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP8]], align 4, !alias.scope [[META3:![0-9]+]]
+; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP1]], align 4, !alias.scope [[META3:![0-9]+]]
; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP9]], align 4, !alias.scope [[META3]]
; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x i32> poison, i32 [[TMP18]], i32 0
; CHECK-NEXT: [[TMP36:%.*]] = insertelement <2 x i32> [[TMP19]], i32 [[TMP7]], i32 1
; CHECK-NEXT: [[TMP25:%.*]] = add <2 x i32> [[TMP36]], splat (i32 10)
; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP15]], <2 x i32> [[TMP36]], <2 x i32> [[TMP25]]
-; CHECK-NEXT: [[TMP37:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]]
-; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP37]], align 4, !alias.scope [[META5:![0-9]+]], !noalias [[META7:![0-9]+]]
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]]
+; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP10]], align 4, !alias.scope [[META5:![0-9]+]], !noalias [[META7:![0-9]+]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
; CHECK-NEXT: [[TMP38:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100
; CHECK-NEXT: br i1 [[TMP38]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
@@ -100,16 +99,17 @@ define void @different_addresses(ptr %dst, ptr %src1, ptr %src2, ptr %cond) {
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE13:.*]] ]
-; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0
; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1
-; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP4]]
+; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> poison, i32 [[INDEX]], i32 0
+; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> [[TMP4]], i32 [[TMP5]], i32 1
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP16]], align 4
; CHECK-NEXT: [[TMP11:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11)
; CHECK-NEXT: [[TMP12:%.*]] = xor <2 x i1> [[TMP11]], splat (i1 true)
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP12]], i32 0
; CHECK-NEXT: br i1 [[TMP13]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
; CHECK: [[PRED_LOAD_IF]]:
-; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[SRC2]], i32 [[TMP4]]
+; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[SRC2]], i32 [[INDEX]]
; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP20]], align 4
; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x i32> poison, i32 [[TMP15]], i32 0
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]]
@@ -128,7 +128,7 @@ define void @different_addresses(ptr %dst, ptr %src1, ptr %src2, ptr %cond) {
; CHECK-NEXT: [[TMP28:%.*]] = extractelement <2 x i1> [[TMP11]], i32 0
; CHECK-NEXT: br i1 [[TMP28]], label %[[PRED_LOAD_IF10:.*]], label %[[PRED_LOAD_CONTINUE11:.*]]
; CHECK: [[PRED_LOAD_IF10]]:
-; CHECK-NEXT: [[TMP49:%.*]] = getelementptr inbounds i32, ptr [[SRC1]], i32 [[TMP4]]
+; CHECK-NEXT: [[TMP49:%.*]] = getelementptr inbounds i32, ptr [[SRC1]], i32 [[INDEX]]
; CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP49]], align 4
; CHECK-NEXT: [[TMP30:%.*]] = insertelement <2 x i32> poison, i32 [[TMP24]], i32 0
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE11]]
@@ -144,7 +144,7 @@ define void @different_addresses(ptr %dst, ptr %src1, ptr %src2, ptr %cond) {
; CHECK: [[PRED_LOAD_CONTINUE13]]:
; CHECK-NEXT: [[TMP33:%.*]] = phi <2 x i32> [ [[TMP32]], %[[PRED_LOAD_CONTINUE11]] ], [ [[TMP27]], %[[PRED_LOAD_IF12]] ]
; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP11]], <2 x i32> [[TMP33]], <2 x i32> [[TMP23]]
-; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]]
+; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]]
; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP34]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
; CHECK-NEXT: [[TMP35:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100
@@ -213,16 +213,15 @@ define void @non_complementary_masks(ptr %dst, ptr %src, ptr %cond1, ptr %cond2)
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE17:.*]] ]
-; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0
; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1
-; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP4]]
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[INDEX]]
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP5]]
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP8]], i32 0
; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x ptr> [[TMP10]], ptr [[TMP9]], i32 1
-; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[COND1]], i32 [[TMP4]]
-; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[COND2]], i32 [[TMP4]]
-; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP24]], align 4, !alias.scope [[META14:![0-9]+]]
-; CHECK-NEXT: [[WIDE_LOAD11:%.*]] = load <2 x i32>, ptr [[TMP25]], align 4, !alias.scope [[META17:![0-9]+]]
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[COND1]], i32 [[INDEX]]
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[COND2]], i32 [[INDEX]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 4, !alias.scope [[META14:![0-9]+]]
+; CHECK-NEXT: [[WIDE_LOAD11:%.*]] = load <2 x i32>, ptr [[TMP6]], align 4, !alias.scope [[META17:![0-9]+]]
; CHECK-NEXT: [[TMP37:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11)
; CHECK-NEXT: [[TMP38:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD11]], splat (i32 20)
; CHECK-NEXT: [[TMP18:%.*]] = xor <2 x i1> [[TMP37]], splat (i1 true)
@@ -262,7 +261,7 @@ define void @non_complementary_masks(ptr %dst, ptr %src, ptr %cond1, ptr %cond2)
; CHECK-NEXT: [[TMP32:%.*]] = phi <2 x i32> [ [[TMP36]], %[[PRED_LOAD_CONTINUE15]] ], [ [[TMP31]], %[[PRED_LOAD_IF16]] ]
; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP19]], <2 x i32> [[TMP28]], <2 x i32> zeroinitializer
; CHECK-NEXT: [[PREDPHI18:%.*]] = select <2 x i1> [[TMP37]], <2 x i32> [[TMP32]], <2 x i32> [[PREDPHI]]
-; CHECK-NEXT: [[TMP41:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]]
+; CHECK-NEXT: [[TMP41:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]]
; CHECK-NEXT: store <2 x i32> [[PREDPHI18]], ptr [[TMP41]], align 4, !alias.scope [[META21:![0-9]+]], !noalias [[META23:![0-9]+]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
; CHECK-NEXT: [[TMP42:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100
@@ -332,13 +331,12 @@ define void @different_access_sizes(ptr %dst, ptr %src, ptr %cond) {
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE11:.*]] ]
-; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0
; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1
-; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP4]]
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[INDEX]]
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP5]]
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP6]], i32 0
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x ptr> [[TMP8]], ptr [[TMP7]], i32 1
-; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP4]]
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP10]], align 4, !alias.scope [[META26:![0-9]+]]
; CHECK-NEXT: [[TMP11:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11)
; CHECK-NEXT: [[TMP12:%.*]] = xor <2 x i1> [[TMP11]], splat (i1 true)
@@ -377,7 +375,7 @@ define void @different_access_sizes(ptr %dst, ptr %src, ptr %cond) {
; CHECK-NEXT: [[TMP29:%.*]] = phi <2 x i8> [ [[TMP25]], %[[PRED_LOAD_CONTINUE9]] ], [ [[TMP28]], %[[PRED_LOAD_IF10]] ]
; CHECK-NEXT: [[TMP30:%.*]] = zext <2 x i8> [[TMP29]] to <2 x i32>
; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP11]], <2 x i32> [[TMP30]], <2 x i32> [[TMP21]]
-; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]]
+; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]]
; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP31]], align 4, !alias.scope [[META31:![0-9]+]], !noalias [[META33:![0-9]+]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
; CHECK-NEXT: [[TMP32:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100
@@ -441,21 +439,20 @@ define void @different_alignments_same_address(ptr %dst, ptr %src, ptr %cond) {
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0
; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1
-; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP4]]
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[INDEX]]
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP5]]
-; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP4]]
-; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP20]], align 4, !alias.scope [[META36:![0-9]+]]
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[INDEX]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP3]], align 4, !alias.scope [[META36:![0-9]+]]
; CHECK-NEXT: [[TMP15:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11)
-; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP8]], align 2, !alias.scope [[META39:![0-9]+]]
+; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP1]], align 2, !alias.scope [[META39:![0-9]+]]
; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP9]], align 2, !alias.scope [[META39]]
; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x i32> poison, i32 [[TMP18]], i32 0
; CHECK-NEXT: [[TMP25:%.*]] = insertelement <2 x i32> [[TMP19]], i32 [[TMP7]], i32 1
; CHECK-NEXT: [[TMP26:%.*]] = add <2 x i32> [[TMP25]], splat (i32 10)
; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP15]], <2 x i32> [[TMP25]], <2 x i32> [[TMP26]]
-; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]]
-; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP35]], align 4, !alias.scope [[META41:![0-9]+]], !noalias [[META43:![0-9]+]]
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]]
+; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP10]], align 4, !alias.scope [[META41:![0-9]+]], !noalias [[META43:![0-9]+]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
; CHECK-NEXT: [[TMP36:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100
; CHECK-NEXT: br i1 [[TMP36]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP44:![0-9]+]]
@@ -581,21 +578,20 @@ define void @duplicate_gep(ptr %dst, ptr %src, ptr %cond) {
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0
; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1
-; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP4]]
-; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 4, !alias.scope [[META46:![0-9]+]]
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[INDEX]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP1]], align 4, !alias.scope [[META46:![0-9]+]]
; CHECK-NEXT: [[TMP7:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11)
-; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP4]]
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[INDEX]]
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP5]]
-; CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP10]], align 4, !alias.scope [[META49:![0-9]+]]
+; CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP3]], align 4, !alias.scope [[META49:![0-9]+]]
; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP15]], align 4, !alias.scope [[META49]]
; CHECK-NEXT: [[TMP23:%.*]] = insertelement <2 x i32> poison, i32 [[TMP22]], i32 0
; CHECK-NEXT: [[TMP29:%.*]] = insertelement <2 x i32> [[TMP23]], i32 [[TMP8]], i32 1
; CHECK-NEXT: [[TMP19:%.*]] = add <2 x i32> [[TMP29]], splat (i32 10)
; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP7]], <2 x i32> [[TMP29]], <2 x i32> [[TMP19]]
-; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]]
-; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP30]], align 4, !alias.scope [[META51:![0-9]+]], !noalias [[META53:![0-9]+]]
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]]
+; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP10]], align 4, !alias.scope [[META51:![0-9]+]], !noalias [[META53:![0-9]+]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
; CHECK-NEXT: [[TMP31:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100
; CHECK-NEXT: br i1 [[TMP31]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP54:![0-9]+]]
@@ -660,21 +656,20 @@ define void @non_unit_stride_i64(ptr %dst, ptr %src, ptr %cond) {
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[INDEX]], 0
; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[INDEX]], 1
-; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP6]]
-; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP8]], align 4, !alias.scope [[META56:![0-9]+]]
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[INDEX]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP1]], align 4, !alias.scope [[META56:![0-9]+]]
; CHECK-NEXT: [[TMP9:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11)
-; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i32 [[TMP6]]
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i32 [[INDEX]]
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i32 [[TMP7]]
-; CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP12]], align 4, !alias.scope [[META59:![0-9]+]]
+; CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP3]], align 4, !alias.scope [[META59:![0-9]+]]
; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP17]], align 4, !alias.scope [[META59]]
; CHECK-NEXT: [[TMP25:%.*]] = insertelement <2 x i32> poison, i32 [[TMP24]], i32 0
; CHECK-NEXT: [[TMP31:%.*]] = insertelement <2 x i32> [[TMP25]], i32 [[TMP10]], i32 1
; CHECK-NEXT: [[TMP21:%.*]] = add <2 x i32> [[TMP31]], splat (i32 10)
; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP9]], <2 x i32> [[TMP31]], <2 x i32> [[TMP21]]
-; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP6]]
-; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP32]], align 4, !alias.scope [[META61:![0-9]+]], !noalias [[META63:![0-9]+]]
+; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]]
+; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP11]], align 4, !alias.scope [[META61:![0-9]+]], !noalias [[META63:![0-9]+]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
; CHECK-NEXT: [[TMP33:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100
; CHECK-NEXT: br i1 [[TMP33]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP64:![0-9]+]]
@@ -784,16 +779,17 @@ define void @hoist_multiple_complementary_loads(ptr noalias %dst, ptr noalias %s
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE10:.*]] ]
-; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 1
-; CHECK-NEXT: [[TMP43:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP0]]
+; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i32> poison, i32 [[INDEX]], i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP3]], i32 [[TMP1]], i32 1
+; CHECK-NEXT: [[TMP43:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP43]], align 4
; CHECK-NEXT: [[TMP7:%.*]] = icmp ne <2 x i32> [[WIDE_LOAD]], zeroinitializer
; CHECK-NEXT: [[TMP8:%.*]] = xor <2 x i1> [[TMP7]], splat (i1 true)
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0
; CHECK-NEXT: br i1 [[TMP9]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
; CHECK: [[PRED_LOAD_IF]]:
-; CHECK-NEXT: [[TMP63:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i32 [[TMP0]]
+; CHECK-NEXT: [[TMP63:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i32 [[INDEX]]
; CHECK-NEXT: [[TMP64:%.*]] = load i32, ptr [[TMP63]], align 4
; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x i32> poison, i32 [[TMP64]], i32 0
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]]
@@ -815,7 +811,7 @@ define void @hoist_multiple_complementary_loads(ptr noalias %dst, ptr noalias %s
; CHECK-NEXT: [[TMP19:%.*]] = extractelement <2 x i1> [[TMP32]], i32 0
; CHECK-NEXT: br i1 [[TMP19]], label %[[PRED_LOAD_IF3:.*]], label %[[PRED_LOAD_CONTINUE4:.*]]
; CHECK: [[PRED_LOAD_IF3]]:
-; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i32 [[TMP0]]
+; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i32 [[INDEX]]
; CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
; CHECK-NEXT: [[TMP33:%.*]] = insertelement <2 x i32> poison, i32 [[TMP21]], i32 0
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE4]]
@@ -835,7 +831,7 @@ define void @hoist_multiple_complementary_loads(ptr noalias %dst, ptr noalias %s
; CHECK-NEXT: [[TMP31:%.*]] = extractelement <2 x i1> [[TMP30]], i32 0
; CHECK-NEXT: br i1 [[TMP31]], label %[[PRED_LOAD_IF7:.*]], label %[[PRED_LOAD_CONTINUE8:.*]]
; CHECK: [[PRED_LOAD_IF7]]:
-; CHECK-NEXT: [[TMP61:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i32 [[TMP0]]
+; CHECK-NEXT: [[TMP61:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i32 [[INDEX]]
; CHECK-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP61]], align 4
; CHECK-NEXT: [[TMP38:%.*]] = insertelement <2 x i32> poison, i32 [[TMP34]], i32 0
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE8]]
@@ -852,7 +848,7 @@ define void @hoist_multiple_complementary_loads(ptr noalias %dst, ptr noalias %s
; CHECK-NEXT: [[TMP45:%.*]] = phi <2 x i32> [ [[TMP35]], %[[PRED_LOAD_CONTINUE8]] ], [ [[TMP44]], %[[PRED_LOAD_IF9]] ]
; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP32]], <2 x i32> [[TMP22]], <2 x i32> [[TMP15]]
; CHECK-NEXT: [[TMP42:%.*]] = select <2 x i1> [[TMP30]], <2 x i32> [[TMP45]], <2 x i32> [[PREDPHI]]
-; CHECK-NEXT: [[TMP39:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP0]]
+; CHECK-NEXT: [[TMP39:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]]
; CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds i8, ptr [[TMP39]], i64 32
; CHECK-NEXT: store <2 x i32> [[TMP42]], ptr [[TMP40]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
@@ -924,12 +920,11 @@ define void @hoist_predicated_load_with_chained_geps1(ptr %dst, ptr %src, i1 %co
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 1
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr [11 x i16], ptr [[SRC]], i64 [[TMP2]]
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[TMP20]], i64 8
-; CHECK-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP21]], align 2, !alias.scope [[META68:![0-9]+]]
-; CHECK-NEXT: store i16 [[TMP4]], ptr [[DST]], align 2, !alias.scope [[META71:![0-9]+]], !noalias [[META68]]
+; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr [[TMP21]], align 2, !alias.scope [[META68:![0-9]+]]
+; CHECK-NEXT: store i16 [[TMP3]], ptr [[DST]], align 2, !alias.scope [[META71:![0-9]+]], !noalias [[META68]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
; CHECK-NEXT: br i1 [[TMP26]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP73:![0-9]+]]
@@ -984,12 +979,11 @@ define void @hoist_predicated_load_with_chained_geps2(ptr %dst, ptr %src, i1 %co
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 1
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr [11 x i16], ptr [[SRC]], i64 [[TMP2]]
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[TMP4]], i64 8
-; CHECK-NEXT: [[TMP5:%.*]] = load i16, ptr [[TMP21]], align 2, !alias.scope [[META75:![0-9]+]]
-; CHECK-NEXT: store i16 [[TMP5]], ptr [[DST]], align 2, !alias.scope [[META78:![0-9]+]], !noalias [[META75]]
+; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr [[TMP21]], align 2, !alias.scope [[META75:![0-9]+]]
+; CHECK-NEXT: store i16 [[TMP3]], ptr [[DST]], align 2, !alias.scope [[META78:![0-9]+]], !noalias [[META75]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
; CHECK-NEXT: br i1 [[TMP26]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP80:![0-9]+]]
@@ -1042,13 +1036,12 @@ define void @hoist_all_three_loads_at_same_address(ptr %dst, ptr %src, ptr noali
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE11:.*]] ]
-; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0
; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1
-; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP4]]
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[INDEX]]
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP5]]
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP6]], i32 0
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x ptr> [[TMP8]], ptr [[TMP7]], i32 1
-; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP4]]
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP10]], align 4
; CHECK-NEXT: [[TMP11:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11)
; CHECK-NEXT: [[TMP12:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 20)
@@ -1108,7 +1101,7 @@ define void @hoist_all_three_loads_at_same_address(ptr %dst, ptr %src, ptr noali
; CHECK-NEXT: [[TMP42:%.*]] = phi <2 x i32> [ [[TMP38]], %[[PRED_LOAD_CONTINUE9]] ], [ [[TMP41]], %[[PRED_LOAD_IF10]] ]
; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP25]], <2 x i32> [[TMP34]], <2 x i32> [[TMP24]]
; CHECK-NEXT: [[PREDPHI16:%.*]] = select <2 x i1> [[TMP11]], <2 x i32> [[TMP42]], <2 x i32> [[PREDPHI]]
-; CHECK-NEXT: [[TMP43:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]]
+; CHECK-NEXT: [[TMP43:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]]
; CHECK-NEXT: store <2 x i32> [[PREDPHI16]], ptr [[TMP43]], align 4, !alias.scope [[META85:![0-9]+]], !noalias [[META82]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
; CHECK-NEXT: [[TMP44:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100
diff --git a/llvm/test/Transforms/LoopVectorize/induction-multiple-uses-in-same-instruction.ll b/llvm/test/Transforms/LoopVectorize/induction-multiple-uses-in-same-instruction.ll
index ccf05d73945ff..d0c3eaf0f716a 100644
--- a/llvm/test/Transforms/LoopVectorize/induction-multiple-uses-in-same-instruction.ll
+++ b/llvm/test/Transforms/LoopVectorize/induction-multiple-uses-in-same-instruction.ll
@@ -11,14 +11,12 @@ define void @multiple_iv_uses_in_same_instruction(ptr %ptr) {
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[INDEX]] to i32
-; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP2]], 0
; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[TMP2]], 1
-; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [100 x [100 x i32]], ptr [[PTR:%.*]], i64 0, i64 [[TMP0]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [100 x [100 x i32]], ptr [[PTR:%.*]], i64 0, i64 [[INDEX]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [100 x [100 x i32]], ptr [[PTR]], i64 0, i64 [[TMP1]], i64 [[TMP1]]
-; CHECK-NEXT: store i32 [[TMP3]], ptr [[TMP5]], align 4
+; CHECK-NEXT: store i32 [[TMP2]], ptr [[TMP3]], align 4
; CHECK-NEXT: store i32 [[TMP4]], ptr [[TMP6]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
diff --git a/llvm/test/Transforms/LoopVectorize/induction-ptrcasts.ll b/llvm/test/Transforms/LoopVectorize/induction-ptrcasts.ll
index 8f773e2f0edd3..a2e60c46ebf59 100644
--- a/llvm/test/Transforms/LoopVectorize/induction-ptrcasts.ll
+++ b/llvm/test/Transforms/LoopVectorize/induction-ptrcasts.ll
@@ -22,9 +22,8 @@ define void @int_iv_based_on_pointer_iv(ptr %A) {
; VF2: vector.body:
; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
; VF2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 4
-; VF2-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 0
; VF2-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 4
-; VF2-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[TMP3]]
+; VF2-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[OFFSET_IDX]]
; VF2-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP4]]
; VF2-NEXT: store i8 0, ptr [[TMP9]], align 1
; VF2-NEXT: store i8 0, ptr [[TMP10]], align 1
diff --git a/llvm/test/Transforms/LoopVectorize/induction.ll b/llvm/test/Transforms/LoopVectorize/induction.ll
index b6fb378a042fd..614af1547a1d9 100644
--- a/llvm/test/Transforms/LoopVectorize/induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/induction.ll
@@ -863,15 +863,14 @@ define float @scalarize_induction_variable_02(ptr %a, ptr %b, i64 %n) {
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x float> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP19:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8
-; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 8
-; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP3]]
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP4]]
; CHECK-NEXT: [[TMP7:%.*]] = load float, ptr [[TMP5]], align 4
; CHECK-NEXT: [[TMP8:%.*]] = load float, ptr [[TMP6]], align 4
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x float> poison, float [[TMP7]], i32 0
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x float> [[TMP9]], float [[TMP8]], i32 1
-; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[TMP3]]
+; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP4]]
; CHECK-NEXT: [[TMP13:%.*]] = load float, ptr [[TMP11]], align 4
; CHECK-NEXT: [[TMP14:%.*]] = load float, ptr [[TMP12]], align 4
@@ -1065,11 +1064,10 @@ define float @scalarize_induction_variable_02(ptr %a, ptr %b, i64 %n) {
; UNROLL-NO-IC-NEXT: [[VEC_PHI:%.*]] = phi <2 x float> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP35:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[VEC_PHI1:%.*]] = phi <2 x float> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP36:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8
-; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 0
; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 8
; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 16
; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 24
-; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP3]]
+; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[OFFSET_IDX]]
; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP4]]
; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP5]]
; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP6]]
@@ -1081,7 +1079,7 @@ define float @scalarize_induction_variable_02(ptr %a, ptr %b, i64 %n) {
; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = load float, ptr [[TMP10]], align 4
; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = insertelement <2 x float> poison, float [[TMP15]], i32 0
; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = insertelement <2 x float> [[TMP17]], float [[TMP16]], i32 1
-; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[TMP3]]
+; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[OFFSET_IDX]]
; UNROLL-NO-IC-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP4]]
; UNROLL-NO-IC-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP5]]
; UNROLL-NO-IC-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP6]]
@@ -1237,9 +1235,8 @@ define void @scalarize_induction_variable_03(ptr %p, i32 %y, i64 %n) {
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
-; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[PAIR_I32:%.*]], ptr [[P:%.*]], i64 [[TMP0]], i32 1
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[PAIR_I32:%.*]], ptr [[P:%.*]], i64 [[INDEX]], i32 1
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[TMP1]], i32 1
; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 8
; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 8
@@ -1391,11 +1388,10 @@ define void @scalarize_induction_variable_03(ptr %p, i32 %y, i64 %n) {
; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]]
; UNROLL-NO-IC: vector.body:
; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
-; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[PAIR_I32:%.*]], ptr [[P:%.*]], i64 [[TMP0]], i32 1
+; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[PAIR_I32:%.*]], ptr [[P:%.*]], i64 [[INDEX]], i32 1
; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[TMP1]], i32 1
; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[TMP2]], i32 1
; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[TMP3]], i32 1
@@ -1564,19 +1560,18 @@ define void @scalarize_induction_variable_04(ptr %a, ptr %p, i32 %n) {
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 1
; CHECK-NEXT: [[TMP11:%.*]] = shl nsw <2 x i64> [[VEC_IND]], splat (i64 2)
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i64> [[TMP11]], i32 0
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP11]], i32 1
; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP12]]
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP14]]
-; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP26]], align 1, !alias.scope [[META17:![0-9]+]]
-; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP15]], align 1, !alias.scope [[META17]]
-; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[PAIR_I32:%.*]], ptr [[P]], i64 [[TMP9]], i32 1
+; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP26]], align 1, !alias.scope [[META17:![0-9]+]]
+; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 1, !alias.scope [[META17]]
+; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[PAIR_I32:%.*]], ptr [[P]], i64 [[INDEX]], i32 1
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[TMP10]], i32 1
-; CHECK-NEXT: store i32 [[TMP16]], ptr [[TMP18]], align 1, !alias.scope [[META20:![0-9]+]], !noalias [[META17]]
-; CHECK-NEXT: store i32 [[TMP17]], ptr [[TMP19]], align 1, !alias.scope [[META20]], !noalias [[META17]]
+; CHECK-NEXT: store i32 [[TMP18]], ptr [[TMP17]], align 1, !alias.scope [[META20:![0-9]+]], !noalias [[META17]]
+; CHECK-NEXT: store i32 [[TMP16]], ptr [[TMP19]], align 1, !alias.scope [[META20]], !noalias [[META17]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i64> [[VEC_IND]], splat (i64 2)
; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
@@ -1774,7 +1769,6 @@ define void @scalarize_induction_variable_04(ptr %a, ptr %p, i32 %n) {
; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
-; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 0
; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 1
; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 2
; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 3
@@ -1788,18 +1782,18 @@ define void @scalarize_induction_variable_04(ptr %a, ptr %p, i32 %n) {
; UNROLL-NO-IC-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP19]]
; UNROLL-NO-IC-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP21]]
; UNROLL-NO-IC-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP37]]
-; UNROLL-NO-IC-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP18]], align 1, !alias.scope [[META17:![0-9]+]]
-; UNROLL-NO-IC-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP20]], align 1, !alias.scope [[META17]]
-; UNROLL-NO-IC-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP22]], align 1, !alias.scope [[META17]]
-; UNROLL-NO-IC-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP38]], align 1, !alias.scope [[META17]]
-; UNROLL-NO-IC-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[PAIR_I32:%.*]], ptr [[P]], i64 [[TMP9]], i32 1
+; UNROLL-NO-IC-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP18]], align 1, !alias.scope [[META17:![0-9]+]]
+; UNROLL-NO-IC-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP20]], align 1, !alias.scope [[META17]]
+; UNROLL-NO-IC-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP22]], align 1, !alias.scope [[META17]]
+; UNROLL-NO-IC-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP38]], align 1, !alias.scope [[META17]]
+; UNROLL-NO-IC-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[PAIR_I32:%.*]], ptr [[P]], i64 [[INDEX]], i32 1
; UNROLL-NO-IC-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[TMP10]], i32 1
; UNROLL-NO-IC-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[TMP11]], i32 1
; UNROLL-NO-IC-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[TMP12]], i32 1
-; UNROLL-NO-IC-NEXT: store i32 [[TMP23]], ptr [[TMP27]], align 1, !alias.scope [[META20:![0-9]+]], !noalias [[META17]]
-; UNROLL-NO-IC-NEXT: store i32 [[TMP24]], ptr [[TMP28]], align 1, !alias.scope [[META20]], !noalias [[META17]]
-; UNROLL-NO-IC-NEXT: store i32 [[TMP25]], ptr [[TMP29]], align 1, !alias.scope [[META20]], !noalias [[META17]]
-; UNROLL-NO-IC-NEXT: store i32 [[TMP26]], ptr [[TMP30]], align 1, !alias.scope [[META20]], !noalias [[META17]]
+; UNROLL-NO-IC-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 1, !alias.scope [[META20:![0-9]+]], !noalias [[META17]]
+; UNROLL-NO-IC-NEXT: store i32 [[TMP23]], ptr [[TMP28]], align 1, !alias.scope [[META20]], !noalias [[META17]]
+; UNROLL-NO-IC-NEXT: store i32 [[TMP24]], ptr [[TMP29]], align 1, !alias.scope [[META20]], !noalias [[META17]]
+; UNROLL-NO-IC-NEXT: store i32 [[TMP25]], ptr [[TMP30]], align 1, !alias.scope [[META20]], !noalias [[META17]]
; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i64> [[STEP_ADD]], splat (i64 2)
; UNROLL-NO-IC-NEXT: [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
@@ -2428,13 +2422,12 @@ define void @iv_vector_and_scalar_users(ptr %p, i32 %a, i32 %n) {
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 1
; CHECK-NEXT: [[TMP5:%.*]] = add <2 x i32> [[BROADCAST_SPLAT]], [[VEC_IND]]
; CHECK-NEXT: [[TMP6:%.*]] = trunc <2 x i32> [[TMP5]] to <2 x i16>
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i16> [[TMP6]], i32 0
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i16> [[TMP6]], i32 1
-; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[PAIR_I16:%.*]], ptr [[P:%.*]], i64 [[TMP3]], i32 1
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[PAIR_I16:%.*]], ptr [[P:%.*]], i64 [[INDEX]], i32 1
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[PAIR_I16]], ptr [[P]], i64 [[TMP4]], i32 1
; CHECK-NEXT: store i16 [[TMP9]], ptr [[TMP7]], align 2
; CHECK-NEXT: store i16 [[TMP10]], ptr [[TMP8]], align 2
@@ -2590,7 +2583,6 @@ define void @iv_vector_and_scalar_users(ptr %p, i32 %a, i32 %n) {
; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
-; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0
; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 1
; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 2
; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 3
@@ -2602,7 +2594,7 @@ define void @iv_vector_and_scalar_users(ptr %p, i32 %a, i32 %n) {
; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = trunc <2 x i32> [[TMP8]] to <2 x i16>
; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = extractelement <2 x i16> [[TMP10]], i32 0
; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = extractelement <2 x i16> [[TMP10]], i32 1
-; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[PAIR_I16:%.*]], ptr [[P:%.*]], i64 [[TMP3]], i32 1
+; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[PAIR_I16:%.*]], ptr [[P:%.*]], i64 [[INDEX]], i32 1
; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[PAIR_I16]], ptr [[P]], i64 [[TMP4]], i32 1
; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[PAIR_I16]], ptr [[P]], i64 [[TMP5]], i32 1
; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[PAIR_I16]], ptr [[P]], i64 [[TMP6]], i32 1
diff --git a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-metadata.ll b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-metadata.ll
index 9bbd67059e84d..f64c649604512 100644
--- a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-metadata.ll
+++ b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-metadata.ll
@@ -20,26 +20,25 @@ define void @merge_tbaa_interleave_group(ptr nocapture readonly %p, ptr noalias
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
-; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_VEC4R:%.*]], ptr [[P]], i64 [[TMP0]], i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_VEC4R:%.*]], ptr [[P]], i64 [[INDEX]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_VEC4R]], ptr [[P]], i64 [[TMP1]], i32 0
; CHECK-NEXT: [[TMP4:%.*]] = load double, ptr [[TMP2]], align 8, !tbaa [[TBAA0:![0-9]+]]
; CHECK-NEXT: [[TMP5:%.*]] = load double, ptr [[TMP3]], align 8, !tbaa [[TBAA0]]
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x double> poison, double [[TMP4]], i32 0
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> [[TMP6]], double [[TMP5]], i32 1
; CHECK-NEXT: [[TMP8:%.*]] = fmul <2 x double> [[TMP7]], splat (double 2.000000e+00)
-; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [20 x %struct.Vec2r], ptr [[CP]], i64 0, i64 [[TMP0]], i32 0
-; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_VEC4R]], ptr [[P]], i64 [[TMP0]], i32 1
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [20 x [[STRUCT_VEC2R:%.*]]], ptr [[CP]], i64 0, i64 [[INDEX]], i32 0
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_VEC4R]], ptr [[P]], i64 [[INDEX]], i32 1
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_VEC4R]], ptr [[P]], i64 [[TMP1]], i32 1
-; CHECK-NEXT: [[TMP12:%.*]] = load double, ptr [[TMP10]], align 8, !tbaa [[TBAA5:![0-9]+]]
+; CHECK-NEXT: [[TMP12:%.*]] = load double, ptr [[TMP9]], align 8, !tbaa [[TBAA5:![0-9]+]]
; CHECK-NEXT: [[TMP13:%.*]] = load double, ptr [[TMP11]], align 8, !tbaa [[TBAA5]]
; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x double> poison, double [[TMP12]], i32 0
; CHECK-NEXT: [[TMP15:%.*]] = insertelement <2 x double> [[TMP14]], double [[TMP13]], i32 1
; CHECK-NEXT: [[TMP16:%.*]] = fmul <2 x double> [[TMP15]], splat (double 3.000000e+00)
; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <2 x double> [[TMP8]], <2 x double> [[TMP16]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x double> [[TMP17]], <4 x double> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
-; CHECK-NEXT: store <4 x double> [[INTERLEAVED_VEC]], ptr [[TMP9]], align 8, !tbaa [[TBAA6:![0-9]+]]
+; CHECK-NEXT: store <4 x double> [[INTERLEAVED_VEC]], ptr [[TMP10]], align 8, !tbaa [[TBAA6:![0-9]+]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4
; CHECK-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
diff --git a/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll b/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll
index 4f19a7c586bc3..6a03bd3859829 100644
--- a/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll
+++ b/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll
@@ -1068,9 +1068,8 @@ define i32 @test_iv_uniform_with_outside_use_scev_simplification(ptr %dst) {
; VEC-NEXT: br label %[[VECTOR_BODY:.*]]
; VEC: [[VECTOR_BODY]]:
; VEC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; VEC-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0
; VEC-NEXT: [[TMP6:%.*]] = add i32 [[INDEX]], 1
-; VEC-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[DST]], i32 [[TMP0]]
+; VEC-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[DST]], i32 [[INDEX]]
; VEC-NEXT: store <2 x i16> zeroinitializer, ptr [[TMP1]], align 2
; VEC-NEXT: [[TMP5:%.*]] = add i32 [[STEP_2]], [[TMP6]]
; VEC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
@@ -1138,9 +1137,8 @@ define i32 @test_iv_uniform_with_outside_use_scev_simplification_2(ptr %dst) {
; VEC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; VEC-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 2>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
; VEC-NEXT: [[OFFSET_IDX:%.*]] = mul i32 [[INDEX]], 2
-; VEC-NEXT: [[TMP0:%.*]] = add i32 [[OFFSET_IDX]], 0
; VEC-NEXT: [[TMP1:%.*]] = add i32 [[OFFSET_IDX]], 2
-; VEC-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[DST]], i32 [[TMP0]]
+; VEC-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[DST]], i32 [[OFFSET_IDX]]
; VEC-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[DST]], i32 [[TMP1]]
; VEC-NEXT: store i16 0, ptr [[TMP2]], align 2
; VEC-NEXT: store i16 0, ptr [[TMP3]], align 2
diff --git a/llvm/test/Transforms/LoopVectorize/lcssa-crashes.ll b/llvm/test/Transforms/LoopVectorize/lcssa-crashes.ll
index e02eb433f9247..fc99e0ffda2eb 100644
--- a/llvm/test/Transforms/LoopVectorize/lcssa-crashes.ll
+++ b/llvm/test/Transforms/LoopVectorize/lcssa-crashes.ll
@@ -89,11 +89,10 @@ define void @test3(ptr %p) {
; CHECK: vector.ph:
; CHECK-NEXT: br label [[VECTOR_BODY1:%.*]]
; CHECK: vector.body:
-; CHECK-NEXT: [[POS_337:%.*]] = add i32 [[ADD41]], 0
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[ADD41]], 1
; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[ADD41]], 2
; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[ADD41]], 3
-; CHECK-NEXT: [[INC46:%.*]] = add i32 [[POS_337]], 1
+; CHECK-NEXT: [[INC46:%.*]] = add i32 [[ADD41]], 1
; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[TMP1]], 1
; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[TMP2]], 1
; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[TMP3]], 1
diff --git a/llvm/test/Transforms/LoopVectorize/load-deref-pred-neg-off.ll b/llvm/test/Transforms/LoopVectorize/load-deref-pred-neg-off.ll
index b14a1cdff92c2..0f66692aacf06 100644
--- a/llvm/test/Transforms/LoopVectorize/load-deref-pred-neg-off.ll
+++ b/llvm/test/Transforms/LoopVectorize/load-deref-pred-neg-off.ll
@@ -21,9 +21,10 @@ define i8 @test_negative_off(i16 %len, ptr %test_base) {
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i8> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP18:%.*]], [[PRED_LOAD_CONTINUE2]] ]
; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i32 [[INDEX]] to i16
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i16 -1000, [[DOTCAST]]
-; CHECK-NEXT: [[TMP0:%.*]] = add i16 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = add i16 [[OFFSET_IDX]], 1
-; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i16 [[TMP0]]
+; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i16> poison, i16 [[OFFSET_IDX]], i32 0
+; CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x i16> [[TMP8]], i16 [[TMP1]], i32 1
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i16 [[OFFSET_IDX]]
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i16 [[TMP1]]
; CHECK-NEXT: [[TMP4:%.*]] = load i1, ptr [[TMP2]], align 1
; CHECK-NEXT: [[TMP5:%.*]] = load i1, ptr [[TMP3]], align 1
@@ -31,7 +32,7 @@ define i8 @test_negative_off(i16 %len, ptr %test_base) {
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x i1> [[TMP6]], i1 [[TMP5]], i32 1
; CHECK-NEXT: br i1 [[TMP4]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
; CHECK: pred.load.if:
-; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[ALLOCA]], i16 [[TMP0]]
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[ALLOCA]], i16 [[OFFSET_IDX]]
; CHECK-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP9]], align 1
; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x i8> poison, i8 [[TMP10]], i32 0
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]]
diff --git a/llvm/test/Transforms/LoopVectorize/loop-form.ll b/llvm/test/Transforms/LoopVectorize/loop-form.ll
index 4db3d1eed4771..b46cd8ecea5b3 100644
--- a/llvm/test/Transforms/LoopVectorize/loop-form.ll
+++ b/llvm/test/Transforms/LoopVectorize/loop-form.ll
@@ -1299,9 +1299,8 @@ define i16 @multiple_exit_none_via_latch(ptr %dst, i64 %x) {
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 1
-; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 [[TMP3]]
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP4]]
; CHECK-NEXT: store i64 0, ptr [[TMP5]], align 8
; CHECK-NEXT: store i64 0, ptr [[TMP6]], align 8
diff --git a/llvm/test/Transforms/LoopVectorize/loop-with-constant-exit-condition.ll b/llvm/test/Transforms/LoopVectorize/loop-with-constant-exit-condition.ll
index 9339aed927960..0fa47f71851e6 100644
--- a/llvm/test/Transforms/LoopVectorize/loop-with-constant-exit-condition.ll
+++ b/llvm/test/Transforms/LoopVectorize/loop-with-constant-exit-condition.ll
@@ -17,9 +17,8 @@ define i16 @multiple_exit_one_with_constant_condition(ptr %dst, i64 %x) {
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 1
-; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 [[TMP3]]
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP4]]
; CHECK-NEXT: store i64 0, ptr [[TMP5]], align 8
; CHECK-NEXT: store i64 0, ptr [[TMP6]], align 8
diff --git a/llvm/test/Transforms/LoopVectorize/narrow-to-single-scalar.ll b/llvm/test/Transforms/LoopVectorize/narrow-to-single-scalar.ll
index 1736b66511bfb..3f8644c18957c 100644
--- a/llvm/test/Transforms/LoopVectorize/narrow-to-single-scalar.ll
+++ b/llvm/test/Transforms/LoopVectorize/narrow-to-single-scalar.ll
@@ -14,9 +14,6 @@ define void @narrow_select_to_single_scalar(i1 %invar.cond, ptr noalias %A, ptr
; VF4IC1: [[VECTOR_BODY]]:
; VF4IC1-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; VF4IC1-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i16
-; VF4IC1-NEXT: [[TMP2:%.*]] = add i16 [[OFFSET_IDX]], 0
-; VF4IC1-NEXT: [[TMP3:%.*]] = add i16 [[OFFSET_IDX]], 1
-; VF4IC1-NEXT: [[TMP4:%.*]] = add i16 [[OFFSET_IDX]], 2
; VF4IC1-NEXT: [[TMP5:%.*]] = add i16 [[OFFSET_IDX]], 3
; VF4IC1-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[A]], i16 [[TMP5]]
; VF4IC1-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP6]], align 1
@@ -41,7 +38,6 @@ define void @narrow_select_to_single_scalar(i1 %invar.cond, ptr noalias %A, ptr
; VF2IC2: [[VECTOR_BODY]]:
; VF2IC2-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; VF2IC2-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i16
-; VF2IC2-NEXT: [[TMP2:%.*]] = add i16 [[OFFSET_IDX]], 2
; VF2IC2-NEXT: [[TMP3:%.*]] = add i16 [[OFFSET_IDX]], 3
; VF2IC2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[A]], i16 [[TMP3]]
; VF2IC2-NEXT: [[TMP5:%.*]] = load i16, ptr [[TMP4]], align 1
@@ -86,7 +82,6 @@ define void @narrow_to_single_scalar_store_address_not_uniform_across_all_parts(
; VF4IC1: [[VECTOR_BODY]]:
; VF4IC1-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; VF4IC1-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; VF4IC1-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0
; VF4IC1-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 1
; VF4IC1-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 2
; VF4IC1-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 3
@@ -99,7 +94,7 @@ define void @narrow_to_single_scalar_store_address_not_uniform_across_all_parts(
; VF4IC1-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[DST]], i32 [[TMP7]]
; VF4IC1-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[DST]], i32 [[TMP9]]
; VF4IC1-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[DST]], i32 [[TMP11]]
-; VF4IC1-NEXT: store i32 [[TMP0]], ptr [[TMP6]], align 4
+; VF4IC1-NEXT: store i32 [[INDEX]], ptr [[TMP6]], align 4
; VF4IC1-NEXT: store i32 [[TMP1]], ptr [[TMP8]], align 4
; VF4IC1-NEXT: store i32 [[TMP2]], ptr [[TMP10]], align 4
; VF4IC1-NEXT: store i32 [[TMP3]], ptr [[TMP12]], align 4
@@ -120,11 +115,10 @@ define void @narrow_to_single_scalar_store_address_not_uniform_across_all_parts(
; VF2IC2-NEXT: br label %[[VECTOR_BODY:.*]]
; VF2IC2: [[VECTOR_BODY]]:
; VF2IC2-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; VF2IC2-NEXT: [[TMP7:%.*]] = add i32 [[INDEX]], 0
; VF2IC2-NEXT: [[TMP8:%.*]] = add i32 [[INDEX]], 1
; VF2IC2-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 2
; VF2IC2-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 3
-; VF2IC2-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP7]], 1
+; VF2IC2-NEXT: [[TMP2:%.*]] = lshr i32 [[INDEX]], 1
; VF2IC2-NEXT: [[TMP3:%.*]] = lshr i32 [[TMP0]], 1
; VF2IC2-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[DST]], i32 [[TMP2]]
; VF2IC2-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[DST]], i32 [[TMP3]]
diff --git a/llvm/test/Transforms/LoopVectorize/operand-bundles.ll b/llvm/test/Transforms/LoopVectorize/operand-bundles.ll
index ce0736486de28..18bd16873c1d5 100644
--- a/llvm/test/Transforms/LoopVectorize/operand-bundles.ll
+++ b/llvm/test/Transforms/LoopVectorize/operand-bundles.ll
@@ -142,18 +142,17 @@ define void @assume_loop_variant_operand_bundle(ptr noalias %a, ptr noalias %b)
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
-; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP8]], align 4
-; CHECK-NEXT: tail call void @llvm.assume(i1 true) [ "align"(ptr [[A]], i64 [[TMP0]]) ]
+; CHECK-NEXT: tail call void @llvm.assume(i1 true) [ "align"(ptr [[A]], i64 [[INDEX]]) ]
; CHECK-NEXT: tail call void @llvm.assume(i1 true) [ "align"(ptr [[A]], i64 [[TMP1]]) ]
; CHECK-NEXT: tail call void @llvm.assume(i1 true) [ "align"(ptr [[A]], i64 [[TMP2]]) ]
; CHECK-NEXT: tail call void @llvm.assume(i1 true) [ "align"(ptr [[A]], i64 [[TMP3]]) ]
; CHECK-NEXT: [[TMP5:%.*]] = fadd <4 x float> [[WIDE_LOAD]], splat (float 1.000000e+00)
-; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]]
; CHECK-NEXT: store <4 x float> [[TMP5]], ptr [[TMP10]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1600
diff --git a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll
index 1a1c05187590e..a0f00bf77a451 100644
--- a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll
+++ b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll
@@ -797,7 +797,6 @@ define void @multiple_ivs_wide(ptr %dst) {
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 -64, i32 -62, i32 -60, i32 -58>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = mul i32 [[INDEX]], 2
-; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[TMP0]], 0
; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[TMP0]], 2
; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP0]], 4
; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[TMP0]], 6
@@ -806,7 +805,7 @@ define void @multiple_ivs_wide(ptr %dst) {
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i32> [[TMP5]], i32 1
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i32> [[TMP5]], i32 2
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i32> [[TMP5]], i32 3
-; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP1]]
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP0]]
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP2]]
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP3]]
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]]
@@ -833,7 +832,6 @@ define void @multiple_ivs_wide(ptr %dst) {
; CHECK-NEXT: [[INDEX1:%.*]] = phi i32 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT4:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND2:%.*]] = phi <4 x i32> [ [[INDUCTION]], %[[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT3:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i32 [[INDEX1]], 2
-; CHECK-NEXT: [[TMP15:%.*]] = add i32 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[TMP16:%.*]] = add i32 [[OFFSET_IDX]], 2
; CHECK-NEXT: [[TMP17:%.*]] = add i32 [[OFFSET_IDX]], 4
; CHECK-NEXT: [[TMP18:%.*]] = add i32 [[OFFSET_IDX]], 6
@@ -842,7 +840,7 @@ define void @multiple_ivs_wide(ptr %dst) {
; CHECK-NEXT: [[TMP25:%.*]] = extractelement <4 x i32> [[TMP19]], i32 1
; CHECK-NEXT: [[TMP26:%.*]] = extractelement <4 x i32> [[TMP19]], i32 2
; CHECK-NEXT: [[TMP27:%.*]] = extractelement <4 x i32> [[TMP19]], i32 3
-; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP15]]
+; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[OFFSET_IDX]]
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP16]]
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP17]]
; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP18]]
diff --git a/llvm/test/Transforms/LoopVectorize/pointer-induction-index-width-smaller-than-iv-width.ll b/llvm/test/Transforms/LoopVectorize/pointer-induction-index-width-smaller-than-iv-width.ll
index 94392f856c386..515ef2b9b8636 100644
--- a/llvm/test/Transforms/LoopVectorize/pointer-induction-index-width-smaller-than-iv-width.ll
+++ b/llvm/test/Transforms/LoopVectorize/pointer-induction-index-width-smaller-than-iv-width.ll
@@ -15,22 +15,20 @@ define void @wide_ptr_induction_index_width_smaller_than_iv_width(ptr noalias %s
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 1
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 3
; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i64 [[INDEX]] to i32
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i32 [[DOTCAST]], 8
-; CHECK-NEXT: [[TMP11:%.*]] = add i32 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[OFFSET_IDX]], 8
; CHECK-NEXT: [[TMP16:%.*]] = add i32 [[OFFSET_IDX]], 16
; CHECK-NEXT: [[TMP17:%.*]] = add i32 [[OFFSET_IDX]], 24
-; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[SRC]], i32 [[TMP11]]
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[SRC]], i32 [[OFFSET_IDX]]
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[SRC]], i32 [[TMP6]]
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[SRC]], i32 [[TMP16]]
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[SRC]], i32 [[TMP17]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP5]], align 1
-; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[DST_0]], i64 [[TMP1]]
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[DST_0]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[DST_0]], i64 [[TMP2]]
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[DST_0]], i64 [[TMP3]]
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[DST_0]], i64 [[TMP4]]
diff --git a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll
index d96134e8adf1d..350a7f7de4083 100644
--- a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll
@@ -25,11 +25,10 @@ define void @a(ptr readnone %b) {
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE10:%.*]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 0, [[INDEX]]
-; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[OFFSET_IDX]], -1
; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], -2
; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[OFFSET_IDX]], -3
-; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr null, i64 [[TMP2]]
+; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr null, i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr null, i64 [[TMP11]]
; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr null, i64 [[TMP14]]
; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr null, i64 [[TMP17]]
@@ -536,15 +535,13 @@ define i64 @ivopt_widen_ptr_indvar_2(ptr noalias %a, i64 %stride, i64 %n) {
; STRIDED: vector.body:
; STRIDED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; STRIDED-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], [[TMP1]]
-; STRIDED-NEXT: [[TMP8:%.*]] = mul i64 0, [[TMP1]]
-; STRIDED-NEXT: [[TMP9:%.*]] = add i64 [[OFFSET_IDX]], [[TMP8]]
; STRIDED-NEXT: [[TMP10:%.*]] = mul i64 1, [[TMP1]]
; STRIDED-NEXT: [[TMP11:%.*]] = add i64 [[OFFSET_IDX]], [[TMP10]]
; STRIDED-NEXT: [[TMP12:%.*]] = mul i64 2, [[TMP1]]
; STRIDED-NEXT: [[TMP13:%.*]] = add i64 [[OFFSET_IDX]], [[TMP12]]
; STRIDED-NEXT: [[TMP14:%.*]] = mul i64 3, [[TMP1]]
; STRIDED-NEXT: [[TMP15:%.*]] = add i64 [[OFFSET_IDX]], [[TMP14]]
-; STRIDED-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr null, i64 [[TMP9]]
+; STRIDED-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr null, i64 [[OFFSET_IDX]]
; STRIDED-NEXT: [[NEXT_GEP1:%.*]] = getelementptr i8, ptr null, i64 [[TMP11]]
; STRIDED-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr null, i64 [[TMP13]]
; STRIDED-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr null, i64 [[TMP15]]
@@ -643,12 +640,6 @@ define i64 @ivopt_widen_ptr_indvar_3(ptr noalias %a, i64 %stride, i64 %n) {
; STRIDED: vector.body:
; STRIDED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; STRIDED-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], [[TMP1]]
-; STRIDED-NEXT: [[TMP4:%.*]] = mul i64 0, [[TMP1]]
-; STRIDED-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], [[TMP4]]
-; STRIDED-NEXT: [[TMP6:%.*]] = mul i64 1, [[TMP1]]
-; STRIDED-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], [[TMP6]]
-; STRIDED-NEXT: [[TMP8:%.*]] = mul i64 2, [[TMP1]]
-; STRIDED-NEXT: [[TMP9:%.*]] = add i64 [[OFFSET_IDX]], [[TMP8]]
; STRIDED-NEXT: [[TMP10:%.*]] = mul i64 3, [[TMP1]]
; STRIDED-NEXT: [[TMP11:%.*]] = add i64 [[OFFSET_IDX]], [[TMP10]]
; STRIDED-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr null, i64 [[TMP11]]
diff --git a/llvm/test/Transforms/LoopVectorize/predicate-switch.ll b/llvm/test/Transforms/LoopVectorize/predicate-switch.ll
index 3276528e54225..54c8238422f0b 100644
--- a/llvm/test/Transforms/LoopVectorize/predicate-switch.ll
+++ b/llvm/test/Transforms/LoopVectorize/predicate-switch.ll
@@ -18,9 +18,8 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) {
; IC1-NEXT: br label %[[VECTOR_BODY:.*]]
; IC1: [[VECTOR_BODY]]:
; IC1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE13:.*]] ]
-; IC1-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0
; IC1-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 1
-; IC1-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP1]]
+; IC1-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[INDEX]]
; IC1-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP2]]
; IC1-NEXT: [[TMP12:%.*]] = insertelement <2 x ptr> poison, ptr [[NEXT_GEP]], i32 0
; IC1-NEXT: [[TMP16:%.*]] = insertelement <2 x ptr> [[TMP12]], ptr [[NEXT_GEP3]], i32 1
@@ -113,11 +112,10 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) {
; IC2-NEXT: br label %[[VECTOR_BODY:.*]]
; IC2: [[VECTOR_BODY]]:
; IC2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE28:.*]] ]
-; IC2-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0
; IC2-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 1
; IC2-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 2
; IC2-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 3
-; IC2-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP1]]
+; IC2-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[INDEX]]
; IC2-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP2]]
; IC2-NEXT: [[TMP23:%.*]] = insertelement <2 x ptr> poison, ptr [[NEXT_GEP]], i32 0
; IC2-NEXT: [[TMP24:%.*]] = insertelement <2 x ptr> [[TMP23]], ptr [[NEXT_GEP3]], i32 1
diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll
index 43dede0b612f3..328fdc7793319 100644
--- a/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll
+++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll
@@ -1207,11 +1207,11 @@ for.end:
define i32 @reduction_sum_multiuse(ptr noalias nocapture %A, ptr noalias nocapture %B) {
; CHECK-LABEL: define i32 @reduction_sum_multiuse(
; CHECK-SAME: ptr noalias captures(none) [[A:%.*]], ptr noalias captures(none) [[B:%.*]]) {
-; CHECK-NEXT: [[_LR_PH1:.*]]:
+; CHECK-NEXT: [[_LR_PH:.*]]:
; CHECK-NEXT: br label %[[DOTLR_PH:.*]]
-; CHECK: [[_LR_PH:.*:]]
-; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH1]] ]
-; CHECK-NEXT: [[SUM_02:%.*]] = phi i32 [ [[L10:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH1]] ]
+; CHECK: [[_LR_PH1:.*:]]
+; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH]] ]
+; CHECK-NEXT: [[SUM_02:%.*]] = phi i32 [ [[L10:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH]] ]
; CHECK-NEXT: [[L2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]]
; CHECK-NEXT: [[L3:%.*]] = load i32, ptr [[L2]], align 4
; CHECK-NEXT: [[L4:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]]
@@ -1231,11 +1231,11 @@ define i32 @reduction_sum_multiuse(ptr noalias nocapture %A, ptr noalias nocaptu
;
; CHECK-INTERLEAVED-LABEL: define i32 @reduction_sum_multiuse(
; CHECK-INTERLEAVED-SAME: ptr noalias captures(none) [[A:%.*]], ptr noalias captures(none) [[B:%.*]]) {
-; CHECK-INTERLEAVED-NEXT: [[_LR_PH1:.*]]:
+; CHECK-INTERLEAVED-NEXT: [[_LR_PH:.*]]:
; CHECK-INTERLEAVED-NEXT: br label %[[DOTLR_PH:.*]]
-; CHECK-INTERLEAVED: [[_LR_PH:.*:]]
-; CHECK-INTERLEAVED-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH1]] ]
-; CHECK-INTERLEAVED-NEXT: [[SUM_02:%.*]] = phi i32 [ [[L10:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH1]] ]
+; CHECK-INTERLEAVED: [[_LR_PH1:.*:]]
+; CHECK-INTERLEAVED-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH]] ]
+; CHECK-INTERLEAVED-NEXT: [[SUM_02:%.*]] = phi i32 [ [[L10:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH]] ]
; CHECK-INTERLEAVED-NEXT: [[L2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]]
; CHECK-INTERLEAVED-NEXT: [[L3:%.*]] = load i32, ptr [[L2]], align 4
; CHECK-INTERLEAVED-NEXT: [[L4:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]]
@@ -2264,11 +2264,14 @@ define i32 @predicated_or_dominates_reduction(ptr %b) {
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE6:.*]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[TMP48:%.*]], %[[PRED_LOAD_CONTINUE6]] ]
-; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 1
; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 2
; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 3
-; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [0 x [[STRUCT_E:%.*]]], ptr [[B]], i32 0, i32 [[TMP0]], i32 1
+; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i32 0
+; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[TMP1]], i32 1
+; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP2]], i32 2
+; CHECK-NEXT: [[TMP21:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[TMP3]], i32 3
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [0 x [[STRUCT_E:%.*]]], ptr [[B]], i32 0, i32 [[INDEX]], i32 1
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[TMP1]], i32 1
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[TMP2]], i32 1
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[TMP3]], i32 1
@@ -2284,7 +2287,7 @@ define i32 @predicated_or_dominates_reduction(ptr %b) {
; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP19]], i32 0
; CHECK-NEXT: br i1 [[TMP20]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
; CHECK: [[PRED_LOAD_IF]]:
-; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[TMP0]]
+; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[INDEX]]
; CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x i32> poison, i32 [[TMP23]], i32 0
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]]
@@ -2342,15 +2345,22 @@ define i32 @predicated_or_dominates_reduction(ptr %b) {
; CHECK-INTERLEAVED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE15:.*]] ]
; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[TMP94:%.*]], %[[PRED_LOAD_CONTINUE15]] ]
; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[TMP98:%.*]], %[[PRED_LOAD_CONTINUE15]] ]
-; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0
; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 1
; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 2
; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 3
+; CHECK-INTERLEAVED-NEXT: [[TMP9:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i32 0
+; CHECK-INTERLEAVED-NEXT: [[TMP15:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP1]], i32 1
+; CHECK-INTERLEAVED-NEXT: [[TMP17:%.*]] = insertelement <4 x i32> [[TMP15]], i32 [[TMP2]], i32 2
+; CHECK-INTERLEAVED-NEXT: [[TMP19:%.*]] = insertelement <4 x i32> [[TMP17]], i32 [[TMP3]], i32 3
; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 4
; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 5
; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = add i32 [[INDEX]], 6
; CHECK-INTERLEAVED-NEXT: [[TMP7:%.*]] = add i32 [[INDEX]], 7
-; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = getelementptr inbounds [0 x [[STRUCT_E:%.*]]], ptr [[B]], i32 0, i32 [[TMP0]], i32 1
+; CHECK-INTERLEAVED-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i32 0
+; CHECK-INTERLEAVED-NEXT: [[TMP37:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP5]], i32 1
+; CHECK-INTERLEAVED-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP37]], i32 [[TMP6]], i32 2
+; CHECK-INTERLEAVED-NEXT: [[TMP42:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[TMP7]], i32 3
+; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = getelementptr inbounds [0 x [[STRUCT_E:%.*]]], ptr [[B]], i32 0, i32 [[INDEX]], i32 1
; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[TMP1]], i32 1
; CHECK-INTERLEAVED-NEXT: [[TMP12:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[TMP2]], i32 1
; CHECK-INTERLEAVED-NEXT: [[TMP14:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[TMP3]], i32 1
@@ -2379,7 +2389,7 @@ define i32 @predicated_or_dominates_reduction(ptr %b) {
; CHECK-INTERLEAVED-NEXT: [[TMP41:%.*]] = extractelement <4 x i1> [[TMP39]], i32 0
; CHECK-INTERLEAVED-NEXT: br i1 [[TMP41]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
; CHECK-INTERLEAVED: [[PRED_LOAD_IF]]:
-; CHECK-INTERLEAVED-NEXT: [[TMP43:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[TMP0]]
+; CHECK-INTERLEAVED-NEXT: [[TMP43:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[INDEX]]
; CHECK-INTERLEAVED-NEXT: [[TMP44:%.*]] = load i32, ptr [[TMP43]], align 4
; CHECK-INTERLEAVED-NEXT: [[TMP45:%.*]] = insertelement <4 x i32> poison, i32 [[TMP44]], i32 0
; CHECK-INTERLEAVED-NEXT: br label %[[PRED_LOAD_CONTINUE]]
diff --git a/llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll b/llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll
index dfdf1100eb57b..202352d380da4 100644
--- a/llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll
+++ b/llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll
@@ -329,11 +329,10 @@ define void @reduc_store_inside_unrolled(ptr %dst, ptr readonly %src) {
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 2, i64 4, i64 6>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP34:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
-; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6
-; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP1]]
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP2]]
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP3]]
@@ -535,11 +534,10 @@ define void @reduc_store_middle_store_predicated(ptr %dst, ptr readonly %src) {
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 2, i64 4, i64 6>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP34:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
-; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6
-; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP1]]
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP2]]
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP3]]
diff --git a/llvm/test/Transforms/LoopVectorize/uniform-blend.ll b/llvm/test/Transforms/LoopVectorize/uniform-blend.ll
index 24dc182fe24a1..3b7cc7e95c4a7 100644
--- a/llvm/test/Transforms/LoopVectorize/uniform-blend.ll
+++ b/llvm/test/Transforms/LoopVectorize/uniform-blend.ll
@@ -160,17 +160,16 @@ define void @redundant_branch_and_blends_without_mask(ptr %A) {
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 0
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 1
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 2
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 3
-; CHECK-NEXT: [[TMP35:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP5]], i32 0
+; CHECK-NEXT: [[TMP35:%.*]] = insertelement <4 x ptr> poison, ptr [[A]], i32 0
; CHECK-NEXT: [[TMP36:%.*]] = insertelement <4 x ptr> [[TMP35]], ptr [[TMP6]], i32 1
; CHECK-NEXT: [[TMP37:%.*]] = insertelement <4 x ptr> [[TMP36]], ptr [[TMP7]], i32 2
; CHECK-NEXT: [[TMP38:%.*]] = insertelement <4 x ptr> [[TMP37]], ptr [[TMP8]], i32 3
; CHECK-NEXT: br i1 true, label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
; CHECK: [[PRED_LOAD_IF]]:
-; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP5]], align 4
+; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> poison, i32 [[TMP10]], i32 0
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]]
; CHECK: [[PRED_LOAD_CONTINUE]]:
@@ -201,7 +200,7 @@ define void @redundant_branch_and_blends_without_mask(ptr %A) {
; CHECK-NEXT: br i1 true, label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; CHECK: [[PRED_STORE_IF]]:
; CHECK-NEXT: [[TMP28:%.*]] = extractelement <4 x i32> [[TMP26]], i32 0
-; CHECK-NEXT: store i32 [[TMP28]], ptr [[TMP5]], align 4
+; CHECK-NEXT: store i32 [[TMP28]], ptr [[A]], align 4
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
; CHECK: [[PRED_STORE_CONTINUE]]:
; CHECK-NEXT: br i1 true, label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1.ll b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1.ll
index f80d7b695e2af..7b27b16a5f261 100644
--- a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1.ll
+++ b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1.ll
@@ -149,28 +149,27 @@ define void @ld_div1_step2_start0_ind1(ptr noalias %A, ptr noalias %B) {
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
-; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
-; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2
-; CHECK-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 1)
-; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i32 0
-; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP2]], i32 1
+; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 2
+; CHECK-NEXT: [[TMP1:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 1)
+; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0
+; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]]
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]]
-; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]]
+; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8
; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 8
-; CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP6]], align 8
-; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> poison, i64 [[TMP7]], i32 0
-; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> [[TMP9]], i64 [[TMP8]], i32 1
-; CHECK-NEXT: [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], splat (i64 42)
-; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i64> [[TMP11]], i32 0
-; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP11]], i32 1
+; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i64> poison, i64 [[TMP6]], i32 0
+; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> [[TMP8]], i64 [[TMP7]], i32 1
+; CHECK-NEXT: [[TMP10:%.*]] = add nsw <2 x i64> [[TMP9]], splat (i64 42)
+; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i64> [[TMP10]], i32 0
+; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i64> [[TMP10]], i32 1
+; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
-; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
+; CHECK-NEXT: store i64 [[TMP11]], ptr [[TMP13]], align 8
; CHECK-NEXT: store i64 [[TMP12]], ptr [[TMP14]], align 8
-; CHECK-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 4)
-; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500
-; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500
+; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[EXIT:%.*]]
; CHECK: exit:
@@ -204,21 +203,20 @@ define void @ld_div2_step2_start0_ind1(ptr noalias %A, ptr noalias %B) {
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
-; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
-; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2
-; CHECK-NEXT: [[TMP2:%.*]] = udiv i64 [[TMP0]], 2
-; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]]
-; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP3]], align 8
-; CHECK-NEXT: [[TMP4:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], splat (i64 42)
-; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0
-; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP4]], i32 1
+; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 2
+; CHECK-NEXT: [[TMP1:%.*]] = udiv i64 [[OFFSET_IDX]], 2
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP2]], align 8
+; CHECK-NEXT: [[TMP3:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], splat (i64 42)
+; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0
+; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
-; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
+; CHECK-NEXT: store i64 [[TMP4]], ptr [[TMP6]], align 8
; CHECK-NEXT: store i64 [[TMP5]], ptr [[TMP7]], align 8
-; CHECK-NEXT: store i64 [[TMP6]], ptr [[TMP8]], align 8
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500
-; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
+; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500
+; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[EXIT:%.*]]
; CHECK: exit:
@@ -253,28 +251,27 @@ define void @ld_div3_step2_start0_ind1(ptr noalias %A, ptr noalias %B) {
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
-; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
-; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2
-; CHECK-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 3)
-; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i32 0
-; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP2]], i32 1
+; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 2
+; CHECK-NEXT: [[TMP1:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 3)
+; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0
+; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]]
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]]
-; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]]
+; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8
; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 8
-; CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP6]], align 8
-; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> poison, i64 [[TMP7]], i32 0
-; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> [[TMP9]], i64 [[TMP8]], i32 1
-; CHECK-NEXT: [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], splat (i64 42)
-; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i64> [[TMP11]], i32 0
-; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP11]], i32 1
+; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i64> poison, i64 [[TMP6]], i32 0
+; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> [[TMP8]], i64 [[TMP7]], i32 1
+; CHECK-NEXT: [[TMP10:%.*]] = add nsw <2 x i64> [[TMP9]], splat (i64 42)
+; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i64> [[TMP10]], i32 0
+; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i64> [[TMP10]], i32 1
+; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
-; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
+; CHECK-NEXT: store i64 [[TMP11]], ptr [[TMP13]], align 8
; CHECK-NEXT: store i64 [[TMP12]], ptr [[TMP14]], align 8
-; CHECK-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 4)
-; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500
-; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
+; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500
+; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[EXIT:%.*]]
; CHECK: exit:
@@ -309,28 +306,27 @@ define void @ld_div1_step3_start0_ind1(ptr noalias %A, ptr noalias %B) {
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3
-; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
-; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3
-; CHECK-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 1)
-; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i32 0
-; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP2]], i32 1
+; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 3
+; CHECK-NEXT: [[TMP1:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 1)
+; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0
+; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]]
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]]
-; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]]
+; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8
; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 8
-; CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP6]], align 8
-; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> poison, i64 [[TMP7]], i32 0
-; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> [[TMP9]], i64 [[TMP8]], i32 1
-; CHECK-NEXT: [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], splat (i64 42)
-; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i64> [[TMP11]], i32 0
-; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP11]], i32 1
+; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i64> poison, i64 [[TMP6]], i32 0
+; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> [[TMP8]], i64 [[TMP7]], i32 1
+; CHECK-NEXT: [[TMP10:%.*]] = add nsw <2 x i64> [[TMP9]], splat (i64 42)
+; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i64> [[TMP10]], i32 0
+; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i64> [[TMP10]], i32 1
+; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
-; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
+; CHECK-NEXT: store i64 [[TMP11]], ptr [[TMP13]], align 8
; CHECK-NEXT: store i64 [[TMP12]], ptr [[TMP14]], align 8
-; CHECK-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 6)
-; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332
-; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
+; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332
+; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[SCALAR_PH:%.*]]
; CHECK: scalar.ph:
@@ -364,28 +360,27 @@ define void @ld_div2_step3_start0_ind1(ptr noalias %A, ptr noalias %B) {
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3
-; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
-; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3
-; CHECK-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 2)
-; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i32 0
-; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP2]], i32 1
+; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 3
+; CHECK-NEXT: [[TMP1:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 2)
+; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0
+; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]]
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]]
-; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]]
+; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8
; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 8
-; CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP6]], align 8
-; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> poison, i64 [[TMP7]], i32 0
-; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> [[TMP9]], i64 [[TMP8]], i32 1
-; CHECK-NEXT: [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], splat (i64 42)
-; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i64> [[TMP11]], i32 0
-; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP11]], i32 1
+; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i64> poison, i64 [[TMP6]], i32 0
+; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> [[TMP8]], i64 [[TMP7]], i32 1
+; CHECK-NEXT: [[TMP10:%.*]] = add nsw <2 x i64> [[TMP9]], splat (i64 42)
+; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i64> [[TMP10]], i32 0
+; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i64> [[TMP10]], i32 1
+; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
-; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
+; CHECK-NEXT: store i64 [[TMP11]], ptr [[TMP13]], align 8
; CHECK-NEXT: store i64 [[TMP12]], ptr [[TMP14]], align 8
-; CHECK-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 6)
-; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332
-; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
+; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332
+; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[SCALAR_PH:%.*]]
; CHECK: scalar.ph:
@@ -418,21 +413,20 @@ define void @ld_div3_step3_start0_ind1(ptr noalias %A, ptr noalias %B) {
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3
-; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
-; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3
-; CHECK-NEXT: [[TMP2:%.*]] = udiv i64 [[TMP0]], 3
-; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]]
-; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP3]], align 8
-; CHECK-NEXT: [[TMP4:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], splat (i64 42)
-; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0
-; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP4]], i32 1
+; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 3
+; CHECK-NEXT: [[TMP1:%.*]] = udiv i64 [[OFFSET_IDX]], 3
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP2]], align 8
+; CHECK-NEXT: [[TMP3:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], splat (i64 42)
+; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0
+; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
-; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
+; CHECK-NEXT: store i64 [[TMP4]], ptr [[TMP6]], align 8
; CHECK-NEXT: store i64 [[TMP5]], ptr [[TMP7]], align 8
-; CHECK-NEXT: store i64 [[TMP6]], ptr [[TMP8]], align 8
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332
-; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
+; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332
+; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[SCALAR_PH:%.*]]
; CHECK: scalar.ph:
@@ -606,28 +600,27 @@ define void @ld_div1_step2_start1_ind1(ptr noalias %A, ptr noalias %B) {
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 1, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 2
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]]
-; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
-; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2
-; CHECK-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 1)
-; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0
-; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1
+; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2
+; CHECK-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 1)
+; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i32 0
+; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP2]], i32 1
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]]
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]]
-; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]]
+; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 8
; CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP6]], align 8
-; CHECK-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP7]], align 8
-; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> poison, i64 [[TMP8]], i32 0
-; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> [[TMP10]], i64 [[TMP9]], i32 1
-; CHECK-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], splat (i64 42)
-; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP12]], i32 0
-; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP12]], i32 1
+; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> poison, i64 [[TMP7]], i32 0
+; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> [[TMP9]], i64 [[TMP8]], i32 1
+; CHECK-NEXT: [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], splat (i64 42)
+; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i64> [[TMP11]], i32 0
+; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP11]], i32 1
+; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
-; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
+; CHECK-NEXT: store i64 [[TMP12]], ptr [[TMP14]], align 8
; CHECK-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8
-; CHECK-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 4)
-; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 498
-; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
+; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 498
+; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[SCALAR_PH:%.*]]
; CHECK: scalar.ph:
@@ -661,21 +654,20 @@ define void @ld_div2_step2_start1_ind1(ptr noalias %A, ptr noalias %B) {
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 2
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]]
-; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
-; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2
-; CHECK-NEXT: [[TMP3:%.*]] = udiv i64 [[TMP1]], 2
-; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]]
-; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP4]], align 8
-; CHECK-NEXT: [[TMP5:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], splat (i64 42)
-; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP5]], i32 0
-; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i64> [[TMP5]], i32 1
+; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2
+; CHECK-NEXT: [[TMP2:%.*]] = udiv i64 [[OFFSET_IDX]], 2
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP3]], align 8
+; CHECK-NEXT: [[TMP4:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], splat (i64 42)
+; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0
+; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP4]], i32 1
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
-; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
+; CHECK-NEXT: store i64 [[TMP5]], ptr [[TMP7]], align 8
; CHECK-NEXT: store i64 [[TMP6]], ptr [[TMP8]], align 8
-; CHECK-NEXT: store i64 [[TMP7]], ptr [[TMP9]], align 8
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 498
-; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
+; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 498
+; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[SCALAR_PH:%.*]]
; CHECK: scalar.ph:
@@ -710,28 +702,27 @@ define void @ld_div3_step2_start1_ind1(ptr noalias %A, ptr noalias %B) {
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 1, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 2
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]]
-; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
-; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2
-; CHECK-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 3)
-; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0
-; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1
+; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2
+; CHECK-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 3)
+; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i32 0
+; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP2]], i32 1
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]]
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]]
-; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]]
+; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 8
; CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP6]], align 8
-; CHECK-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP7]], align 8
-; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> poison, i64 [[TMP8]], i32 0
-; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> [[TMP10]], i64 [[TMP9]], i32 1
-; CHECK-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], splat (i64 42)
-; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP12]], i32 0
-; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP12]], i32 1
+; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> poison, i64 [[TMP7]], i32 0
+; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> [[TMP9]], i64 [[TMP8]], i32 1
+; CHECK-NEXT: [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], splat (i64 42)
+; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i64> [[TMP11]], i32 0
+; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP11]], i32 1
+; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
-; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
+; CHECK-NEXT: store i64 [[TMP12]], ptr [[TMP14]], align 8
; CHECK-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8
-; CHECK-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 4)
-; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 498
-; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
+; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 498
+; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[SCALAR_PH:%.*]]
; CHECK: scalar.ph:
@@ -766,28 +757,27 @@ define void @ld_div1_step3_start1_ind1(ptr noalias %A, ptr noalias %B) {
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 1, i64 4>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 3
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]]
-; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
-; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3
-; CHECK-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 1)
-; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0
-; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1
+; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3
+; CHECK-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 1)
+; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i32 0
+; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP2]], i32 1
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]]
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]]
-; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]]
+; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 8
; CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP6]], align 8
-; CHECK-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP7]], align 8
-; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> poison, i64 [[TMP8]], i32 0
-; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> [[TMP10]], i64 [[TMP9]], i32 1
-; CHECK-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], splat (i64 42)
-; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP12]], i32 0
-; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP12]], i32 1
+; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> poison, i64 [[TMP7]], i32 0
+; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> [[TMP9]], i64 [[TMP8]], i32 1
+; CHECK-NEXT: [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], splat (i64 42)
+; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i64> [[TMP11]], i32 0
+; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP11]], i32 1
+; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
-; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
+; CHECK-NEXT: store i64 [[TMP12]], ptr [[TMP14]], align 8
; CHECK-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8
-; CHECK-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 6)
-; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332
-; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
+; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332
+; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[SCALAR_PH:%.*]]
; CHECK: scalar.ph:
@@ -822,28 +812,27 @@ define void @ld_div2_step3_start1_ind1(ptr noalias %A, ptr noalias %B) {
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 1, i64 4>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 3
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]]
-; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
-; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3
-; CHECK-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 2)
-; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0
-; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1
+; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3
+; CHECK-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 2)
+; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i32 0
+; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP2]], i32 1
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]]
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]]
-; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]]
+; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 8
; CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP6]], align 8
-; CHECK-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP7]], align 8
-; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> poison, i64 [[TMP8]], i32 0
-; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> [[TMP10]], i64 [[TMP9]], i32 1
-; CHECK-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], splat (i64 42)
-; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP12]], i32 0
-; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP12]], i32 1
+; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> poison, i64 [[TMP7]], i32 0
+; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> [[TMP9]], i64 [[TMP8]], i32 1
+; CHECK-NEXT: [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], splat (i64 42)
+; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i64> [[TMP11]], i32 0
+; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP11]], i32 1
+; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
-; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
+; CHECK-NEXT: store i64 [[TMP12]], ptr [[TMP14]], align 8
; CHECK-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8
-; CHECK-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 6)
-; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332
-; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]]
+; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332
+; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[SCALAR_PH:%.*]]
; CHECK: scalar.ph:
@@ -877,21 +866,20 @@ define void @ld_div3_step3_start1_ind1(ptr noalias %A, ptr noalias %B) {
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 3
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]]
-; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
-; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3
-; CHECK-NEXT: [[TMP3:%.*]] = udiv i64 [[TMP1]], 3
-; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]]
-; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP4]], align 8
-; CHECK-NEXT: [[TMP5:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], splat (i64 42)
-; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP5]], i32 0
-; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i64> [[TMP5]], i32 1
+; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3
+; CHECK-NEXT: [[TMP2:%.*]] = udiv i64 [[OFFSET_IDX]], 3
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP3]], align 8
+; CHECK-NEXT: [[TMP4:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], splat (i64 42)
+; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0
+; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP4]], i32 1
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
-; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
+; CHECK-NEXT: store i64 [[TMP5]], ptr [[TMP7]], align 8
; CHECK-NEXT: store i64 [[TMP6]], ptr [[TMP8]], align 8
-; CHECK-NEXT: store i64 [[TMP7]], ptr [[TMP9]], align 8
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332
-; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]]
+; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332
+; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[SCALAR_PH:%.*]]
; CHECK: scalar.ph:
@@ -924,22 +912,21 @@ define void @test_step_is_not_invariant(ptr %A) {
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[INDEX]] to i16
-; CHECK-NEXT: [[TMP1:%.*]] = add i16 [[TMP0]], 0
-; CHECK-NEXT: [[TMP2:%.*]] = add i16 [[TMP0]], 1
-; CHECK-NEXT: [[TMP3:%.*]] = mul nuw nsw <2 x i32> [[VEC_IND]], [[VEC_IND]]
-; CHECK-NEXT: [[TMP4:%.*]] = trunc <2 x i32> [[TMP3]] to <2 x i16>
-; CHECK-NEXT: [[TMP5:%.*]] = udiv <2 x i16> [[TMP4]], splat (i16 6)
-; CHECK-NEXT: [[TMP6:%.*]] = zext <2 x i16> [[TMP5]] to <2 x i64>
-; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i64> [[TMP6]], i32 0
-; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i64> [[TMP6]], i32 1
+; CHECK-NEXT: [[TMP1:%.*]] = add i16 [[TMP0]], 1
+; CHECK-NEXT: [[TMP2:%.*]] = mul nuw nsw <2 x i32> [[VEC_IND]], [[VEC_IND]]
+; CHECK-NEXT: [[TMP3:%.*]] = trunc <2 x i32> [[TMP2]] to <2 x i16>
+; CHECK-NEXT: [[TMP4:%.*]] = udiv <2 x i16> [[TMP3]], splat (i16 6)
+; CHECK-NEXT: [[TMP5:%.*]] = zext <2 x i16> [[TMP4]] to <2 x i64>
+; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP5]], i32 0
+; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i64> [[TMP5]], i32 1
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[TMP6]]
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[TMP7]]
-; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[TMP8]]
+; CHECK-NEXT: store i16 [[TMP0]], ptr [[TMP8]], align 2
; CHECK-NEXT: store i16 [[TMP1]], ptr [[TMP9]], align 2
-; CHECK-NEXT: store i16 [[TMP2]], ptr [[TMP10]], align 2
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i32> [[VEC_IND]], splat (i32 2)
-; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], 56
-; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]]
+; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], 56
+; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[EXIT:%.*]]
; CHECK: exit:
diff --git a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_and.ll b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_and.ll
index 0f191b2d8a278..9fc807e955f8b 100644
--- a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_and.ll
+++ b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_and.ll
@@ -149,28 +149,27 @@ define void @ld_and_neg1_step2_start0_ind1(ptr noalias %A, ptr noalias %B) {
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
-; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
-; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2
-; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i64> [[VEC_IND]], splat (i64 -1)
-; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i32 0
-; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP2]], i32 1
+; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 2
+; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i64> [[VEC_IND]], splat (i64 -1)
+; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0
+; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]]
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]]
-; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]]
+; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8
; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 8
-; CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP6]], align 8
-; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> poison, i64 [[TMP7]], i32 0
-; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> [[TMP9]], i64 [[TMP8]], i32 1
-; CHECK-NEXT: [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], splat (i64 42)
-; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i64> [[TMP11]], i32 0
-; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP11]], i32 1
+; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i64> poison, i64 [[TMP6]], i32 0
+; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> [[TMP8]], i64 [[TMP7]], i32 1
+; CHECK-NEXT: [[TMP10:%.*]] = add nsw <2 x i64> [[TMP9]], splat (i64 42)
+; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i64> [[TMP10]], i32 0
+; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i64> [[TMP10]], i32 1
+; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
-; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
+; CHECK-NEXT: store i64 [[TMP11]], ptr [[TMP13]], align 8
; CHECK-NEXT: store i64 [[TMP12]], ptr [[TMP14]], align 8
-; CHECK-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 4)
-; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500
-; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500
+; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[EXIT:%.*]]
; CHECK: exit:
@@ -204,19 +203,18 @@ define void @ld_and_neg2_step2_start0_ind1(ptr noalias %A, ptr noalias %B) {
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
-; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
-; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2
-; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP0]], 1
-; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]]
-; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8
-; CHECK-NEXT: [[TMP5:%.*]] = add nsw i64 [[TMP4]], 42
+; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 2
+; CHECK-NEXT: [[TMP1:%.*]] = and i64 [[OFFSET_IDX]], 1
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP2]], align 8
+; CHECK-NEXT: [[TMP4:%.*]] = add nsw i64 [[TMP3]], 42
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
-; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
-; CHECK-NEXT: store i64 [[TMP5]], ptr [[TMP6]], align 8
-; CHECK-NEXT: store i64 [[TMP5]], ptr [[TMP7]], align 8
+; CHECK-NEXT: store i64 [[TMP4]], ptr [[TMP5]], align 8
+; CHECK-NEXT: store i64 [[TMP4]], ptr [[TMP6]], align 8
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500
-; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
+; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500
+; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[EXIT:%.*]]
; CHECK: exit:
@@ -251,28 +249,27 @@ define void @ld_and_neg1_step3_start0_ind1(ptr noalias %A, ptr noalias %B) {
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3
-; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
-; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3
-; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i64> [[VEC_IND]], splat (i64 -1)
-; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i32 0
-; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP2]], i32 1
+; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 3
+; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i64> [[VEC_IND]], splat (i64 -1)
+; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0
+; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]]
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]]
-; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]]
+; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8
; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 8
-; CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP6]], align 8
-; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> poison, i64 [[TMP7]], i32 0
-; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> [[TMP9]], i64 [[TMP8]], i32 1
-; CHECK-NEXT: [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], splat (i64 42)
-; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i64> [[TMP11]], i32 0
-; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP11]], i32 1
+; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i64> poison, i64 [[TMP6]], i32 0
+; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> [[TMP8]], i64 [[TMP7]], i32 1
+; CHECK-NEXT: [[TMP10:%.*]] = add nsw <2 x i64> [[TMP9]], splat (i64 42)
+; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i64> [[TMP10]], i32 0
+; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i64> [[TMP10]], i32 1
+; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
-; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
+; CHECK-NEXT: store i64 [[TMP11]], ptr [[TMP13]], align 8
; CHECK-NEXT: store i64 [[TMP12]], ptr [[TMP14]], align 8
-; CHECK-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 6)
-; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332
-; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
+; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332
+; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[SCALAR_PH:%.*]]
; CHECK: scalar.ph:
@@ -306,28 +303,27 @@ define void @ld_and_neg2_step3_start0_ind1(ptr noalias %A, ptr noalias %B) {
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3
-; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
-; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3
-; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i64> [[VEC_IND]], splat (i64 -2)
-; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i32 0
-; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP2]], i32 1
+; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 3
+; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i64> [[VEC_IND]], splat (i64 -2)
+; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0
+; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]]
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]]
-; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]]
+; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8
; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 8
-; CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP6]], align 8
-; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> poison, i64 [[TMP7]], i32 0
-; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> [[TMP9]], i64 [[TMP8]], i32 1
-; CHECK-NEXT: [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], splat (i64 42)
-; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i64> [[TMP11]], i32 0
-; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP11]], i32 1
+; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i64> poison, i64 [[TMP6]], i32 0
+; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> [[TMP8]], i64 [[TMP7]], i32 1
+; CHECK-NEXT: [[TMP10:%.*]] = add nsw <2 x i64> [[TMP9]], splat (i64 42)
+; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i64> [[TMP10]], i32 0
+; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i64> [[TMP10]], i32 1
+; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
-; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
+; CHECK-NEXT: store i64 [[TMP11]], ptr [[TMP13]], align 8
; CHECK-NEXT: store i64 [[TMP12]], ptr [[TMP14]], align 8
-; CHECK-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 6)
-; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332
-; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
+; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332
+; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[SCALAR_PH:%.*]]
; CHECK: scalar.ph:
@@ -411,28 +407,27 @@ define void @ld_and_neg2_step2_start1_ind1(ptr noalias %A, ptr noalias %B) {
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 1, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 2
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]]
-; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
-; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2
-; CHECK-NEXT: [[TMP3:%.*]] = and <2 x i64> [[VEC_IND]], splat (i64 -2)
-; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0
-; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1
+; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2
+; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i64> [[VEC_IND]], splat (i64 -2)
+; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i32 0
+; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP2]], i32 1
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]]
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]]
-; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]]
+; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 8
; CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP6]], align 8
-; CHECK-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP7]], align 8
-; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> poison, i64 [[TMP8]], i32 0
-; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> [[TMP10]], i64 [[TMP9]], i32 1
-; CHECK-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], splat (i64 42)
-; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP12]], i32 0
-; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP12]], i32 1
+; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> poison, i64 [[TMP7]], i32 0
+; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> [[TMP9]], i64 [[TMP8]], i32 1
+; CHECK-NEXT: [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], splat (i64 42)
+; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i64> [[TMP11]], i32 0
+; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP11]], i32 1
+; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
-; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
+; CHECK-NEXT: store i64 [[TMP12]], ptr [[TMP14]], align 8
; CHECK-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8
-; CHECK-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 4)
-; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 498
-; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
+; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 498
+; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[SCALAR_PH:%.*]]
; CHECK: scalar.ph:
@@ -467,28 +462,27 @@ define void @ld_and_neg2_step3_start1_ind1(ptr noalias %A, ptr noalias %B) {
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 1, i64 4>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 3
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]]
-; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
-; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3
-; CHECK-NEXT: [[TMP3:%.*]] = and <2 x i64> [[VEC_IND]], splat (i64 -2)
-; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0
-; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1
+; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3
+; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i64> [[VEC_IND]], splat (i64 -2)
+; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i32 0
+; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP2]], i32 1
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]]
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]]
-; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]]
+; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 8
; CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP6]], align 8
-; CHECK-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP7]], align 8
-; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> poison, i64 [[TMP8]], i32 0
-; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> [[TMP10]], i64 [[TMP9]], i32 1
-; CHECK-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], splat (i64 42)
-; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP12]], i32 0
-; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP12]], i32 1
+; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> poison, i64 [[TMP7]], i32 0
+; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> [[TMP9]], i64 [[TMP8]], i32 1
+; CHECK-NEXT: [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], splat (i64 42)
+; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i64> [[TMP11]], i32 0
+; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP11]], i32 1
+; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
-; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
+; CHECK-NEXT: store i64 [[TMP12]], ptr [[TMP14]], align 8
; CHECK-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8
-; CHECK-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 6)
-; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332
-; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
+; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332
+; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[SCALAR_PH:%.*]]
; CHECK: scalar.ph:
@@ -523,28 +517,27 @@ define void @ld_and_neg3_step3_start1_ind1(ptr noalias %A, ptr noalias %B) {
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 1, i64 4>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 3
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]]
-; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
-; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3
-; CHECK-NEXT: [[TMP3:%.*]] = and <2 x i64> [[VEC_IND]], splat (i64 -3)
-; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0
-; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1
+; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3
+; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i64> [[VEC_IND]], splat (i64 -3)
+; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i32 0
+; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP2]], i32 1
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]]
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]]
-; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]]
+; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 8
; CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP6]], align 8
-; CHECK-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP7]], align 8
-; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> poison, i64 [[TMP8]], i32 0
-; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> [[TMP10]], i64 [[TMP9]], i32 1
-; CHECK-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], splat (i64 42)
-; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP12]], i32 0
-; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP12]], i32 1
+; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> poison, i64 [[TMP7]], i32 0
+; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> [[TMP9]], i64 [[TMP8]], i32 1
+; CHECK-NEXT: [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], splat (i64 42)
+; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i64> [[TMP11]], i32 0
+; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP11]], i32 1
+; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
-; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
+; CHECK-NEXT: store i64 [[TMP12]], ptr [[TMP14]], align 8
; CHECK-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8
-; CHECK-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 6)
-; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332
-; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
+; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332
+; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[SCALAR_PH:%.*]]
; CHECK: scalar.ph:
diff --git a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_div_urem.ll b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_div_urem.ll
index 7ff10c544f72a..8aa58a009169d 100644
--- a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_div_urem.ll
+++ b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_div_urem.ll
@@ -283,14 +283,14 @@ define void @ld_div2_ld_scevunknown_nonuniform(ptr %src.a, ptr noalias %src.b, p
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
-; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
-; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
-; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 4
-; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 5
-; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 6
-; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 7
+; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2
+; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 3
+; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 4
+; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 5
+; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 6
+; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 7
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[SRC_A]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[SRC_A]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i32, ptr [[SRC_A]], i64 [[TMP1]]
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[SRC_A]], i64 [[TMP2]]
@@ -298,7 +298,7 @@ define void @ld_div2_ld_scevunknown_nonuniform(ptr %src.a, ptr noalias %src.b, p
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[SRC_A]], i64 [[TMP4]]
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[SRC_A]], i64 [[TMP5]]
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[SRC_A]], i64 [[TMP6]]
-; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr [[SRC_A]], i64 [[TMP7]]
+; CHECK-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP7]], align 4
; CHECK-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP8]], align 4
; CHECK-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP9]], align 4
; CHECK-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP10]], align 4
@@ -306,24 +306,24 @@ define void @ld_div2_ld_scevunknown_nonuniform(ptr %src.a, ptr noalias %src.b, p
; CHECK-NEXT: [[TMP20:%.*]] = load i64, ptr [[TMP12]], align 4
; CHECK-NEXT: [[TMP21:%.*]] = load i64, ptr [[TMP13]], align 4
; CHECK-NEXT: [[TMP22:%.*]] = load i64, ptr [[TMP14]], align 4
-; CHECK-NEXT: [[TMP23:%.*]] = load i64, ptr [[TMP15]], align 4
-; CHECK-NEXT: [[TMP24:%.*]] = insertelement <8 x i64> poison, i64 [[TMP16]], i32 0
-; CHECK-NEXT: [[TMP25:%.*]] = insertelement <8 x i64> [[TMP24]], i64 [[TMP17]], i32 1
-; CHECK-NEXT: [[TMP26:%.*]] = insertelement <8 x i64> [[TMP25]], i64 [[TMP18]], i32 2
-; CHECK-NEXT: [[TMP27:%.*]] = insertelement <8 x i64> [[TMP26]], i64 [[TMP19]], i32 3
-; CHECK-NEXT: [[TMP28:%.*]] = insertelement <8 x i64> [[TMP27]], i64 [[TMP20]], i32 4
-; CHECK-NEXT: [[TMP29:%.*]] = insertelement <8 x i64> [[TMP28]], i64 [[TMP21]], i32 5
-; CHECK-NEXT: [[TMP30:%.*]] = insertelement <8 x i64> [[TMP29]], i64 [[TMP22]], i32 6
-; CHECK-NEXT: [[TMP31:%.*]] = insertelement <8 x i64> [[TMP30]], i64 [[TMP23]], i32 7
-; CHECK-NEXT: [[TMP32:%.*]] = udiv <8 x i64> [[TMP31]], splat (i64 2)
-; CHECK-NEXT: [[TMP33:%.*]] = extractelement <8 x i64> [[TMP32]], i32 0
-; CHECK-NEXT: [[TMP34:%.*]] = extractelement <8 x i64> [[TMP32]], i32 1
-; CHECK-NEXT: [[TMP35:%.*]] = extractelement <8 x i64> [[TMP32]], i32 2
-; CHECK-NEXT: [[TMP36:%.*]] = extractelement <8 x i64> [[TMP32]], i32 3
-; CHECK-NEXT: [[TMP37:%.*]] = extractelement <8 x i64> [[TMP32]], i32 4
-; CHECK-NEXT: [[TMP38:%.*]] = extractelement <8 x i64> [[TMP32]], i32 5
-; CHECK-NEXT: [[TMP39:%.*]] = extractelement <8 x i64> [[TMP32]], i32 6
-; CHECK-NEXT: [[TMP40:%.*]] = extractelement <8 x i64> [[TMP32]], i32 7
+; CHECK-NEXT: [[TMP23:%.*]] = insertelement <8 x i64> poison, i64 [[TMP15]], i32 0
+; CHECK-NEXT: [[TMP24:%.*]] = insertelement <8 x i64> [[TMP23]], i64 [[TMP16]], i32 1
+; CHECK-NEXT: [[TMP25:%.*]] = insertelement <8 x i64> [[TMP24]], i64 [[TMP17]], i32 2
+; CHECK-NEXT: [[TMP26:%.*]] = insertelement <8 x i64> [[TMP25]], i64 [[TMP18]], i32 3
+; CHECK-NEXT: [[TMP27:%.*]] = insertelement <8 x i64> [[TMP26]], i64 [[TMP19]], i32 4
+; CHECK-NEXT: [[TMP28:%.*]] = insertelement <8 x i64> [[TMP27]], i64 [[TMP20]], i32 5
+; CHECK-NEXT: [[TMP29:%.*]] = insertelement <8 x i64> [[TMP28]], i64 [[TMP21]], i32 6
+; CHECK-NEXT: [[TMP30:%.*]] = insertelement <8 x i64> [[TMP29]], i64 [[TMP22]], i32 7
+; CHECK-NEXT: [[TMP31:%.*]] = udiv <8 x i64> [[TMP30]], splat (i64 2)
+; CHECK-NEXT: [[TMP32:%.*]] = extractelement <8 x i64> [[TMP31]], i32 0
+; CHECK-NEXT: [[TMP33:%.*]] = extractelement <8 x i64> [[TMP31]], i32 1
+; CHECK-NEXT: [[TMP34:%.*]] = extractelement <8 x i64> [[TMP31]], i32 2
+; CHECK-NEXT: [[TMP35:%.*]] = extractelement <8 x i64> [[TMP31]], i32 3
+; CHECK-NEXT: [[TMP36:%.*]] = extractelement <8 x i64> [[TMP31]], i32 4
+; CHECK-NEXT: [[TMP37:%.*]] = extractelement <8 x i64> [[TMP31]], i32 5
+; CHECK-NEXT: [[TMP38:%.*]] = extractelement <8 x i64> [[TMP31]], i32 6
+; CHECK-NEXT: [[TMP39:%.*]] = extractelement <8 x i64> [[TMP31]], i32 7
+; CHECK-NEXT: [[TMP40:%.*]] = getelementptr i32, ptr [[SRC_B]], i64 [[TMP32]]
; CHECK-NEXT: [[TMP41:%.*]] = getelementptr i32, ptr [[SRC_B]], i64 [[TMP33]]
; CHECK-NEXT: [[TMP42:%.*]] = getelementptr i32, ptr [[SRC_B]], i64 [[TMP34]]
; CHECK-NEXT: [[TMP43:%.*]] = getelementptr i32, ptr [[SRC_B]], i64 [[TMP35]]
@@ -331,7 +331,7 @@ define void @ld_div2_ld_scevunknown_nonuniform(ptr %src.a, ptr noalias %src.b, p
; CHECK-NEXT: [[TMP45:%.*]] = getelementptr i32, ptr [[SRC_B]], i64 [[TMP37]]
; CHECK-NEXT: [[TMP46:%.*]] = getelementptr i32, ptr [[SRC_B]], i64 [[TMP38]]
; CHECK-NEXT: [[TMP47:%.*]] = getelementptr i32, ptr [[SRC_B]], i64 [[TMP39]]
-; CHECK-NEXT: [[TMP48:%.*]] = getelementptr i32, ptr [[SRC_B]], i64 [[TMP40]]
+; CHECK-NEXT: [[TMP48:%.*]] = load i32, ptr [[TMP40]], align 4
; CHECK-NEXT: [[TMP49:%.*]] = load i32, ptr [[TMP41]], align 4
; CHECK-NEXT: [[TMP50:%.*]] = load i32, ptr [[TMP42]], align 4
; CHECK-NEXT: [[TMP51:%.*]] = load i32, ptr [[TMP43]], align 4
@@ -339,20 +339,19 @@ define void @ld_div2_ld_scevunknown_nonuniform(ptr %src.a, ptr noalias %src.b, p
; CHECK-NEXT: [[TMP53:%.*]] = load i32, ptr [[TMP45]], align 4
; CHECK-NEXT: [[TMP54:%.*]] = load i32, ptr [[TMP46]], align 4
; CHECK-NEXT: [[TMP55:%.*]] = load i32, ptr [[TMP47]], align 4
-; CHECK-NEXT: [[TMP56:%.*]] = load i32, ptr [[TMP48]], align 4
-; CHECK-NEXT: [[TMP57:%.*]] = insertelement <8 x i32> poison, i32 [[TMP49]], i32 0
-; CHECK-NEXT: [[TMP58:%.*]] = insertelement <8 x i32> [[TMP57]], i32 [[TMP50]], i32 1
-; CHECK-NEXT: [[TMP59:%.*]] = insertelement <8 x i32> [[TMP58]], i32 [[TMP51]], i32 2
-; CHECK-NEXT: [[TMP60:%.*]] = insertelement <8 x i32> [[TMP59]], i32 [[TMP52]], i32 3
-; CHECK-NEXT: [[TMP61:%.*]] = insertelement <8 x i32> [[TMP60]], i32 [[TMP53]], i32 4
-; CHECK-NEXT: [[TMP62:%.*]] = insertelement <8 x i32> [[TMP61]], i32 [[TMP54]], i32 5
-; CHECK-NEXT: [[TMP63:%.*]] = insertelement <8 x i32> [[TMP62]], i32 [[TMP55]], i32 6
-; CHECK-NEXT: [[TMP64:%.*]] = insertelement <8 x i32> [[TMP63]], i32 [[TMP56]], i32 7
-; CHECK-NEXT: [[TMP65:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP0]]
-; CHECK-NEXT: store <8 x i32> [[TMP64]], ptr [[TMP65]], align 4
+; CHECK-NEXT: [[TMP56:%.*]] = insertelement <8 x i32> poison, i32 [[TMP48]], i32 0
+; CHECK-NEXT: [[TMP57:%.*]] = insertelement <8 x i32> [[TMP56]], i32 [[TMP49]], i32 1
+; CHECK-NEXT: [[TMP58:%.*]] = insertelement <8 x i32> [[TMP57]], i32 [[TMP50]], i32 2
+; CHECK-NEXT: [[TMP59:%.*]] = insertelement <8 x i32> [[TMP58]], i32 [[TMP51]], i32 3
+; CHECK-NEXT: [[TMP60:%.*]] = insertelement <8 x i32> [[TMP59]], i32 [[TMP52]], i32 4
+; CHECK-NEXT: [[TMP61:%.*]] = insertelement <8 x i32> [[TMP60]], i32 [[TMP53]], i32 5
+; CHECK-NEXT: [[TMP62:%.*]] = insertelement <8 x i32> [[TMP61]], i32 [[TMP54]], i32 6
+; CHECK-NEXT: [[TMP63:%.*]] = insertelement <8 x i32> [[TMP62]], i32 [[TMP55]], i32 7
+; CHECK-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr [[DST]], i64 [[INDEX]]
+; CHECK-NEXT: store <8 x i32> [[TMP63]], ptr [[TMP64]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
-; CHECK-NEXT: [[TMP66:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
-; CHECK-NEXT: br i1 [[TMP66]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
+; CHECK-NEXT: [[TMP65:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
+; CHECK-NEXT: br i1 [[TMP65]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[SCALAR_PH:%.*]]
; CHECK: scalar.ph:
diff --git a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_lshr.ll b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_lshr.ll
index a5bb07f1fd4ef..8f2f424305282 100644
--- a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_lshr.ll
+++ b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_lshr.ll
@@ -229,28 +229,27 @@ define void @ld_lshr0_step2_start0_ind1(ptr noalias %A, ptr noalias %B) {
; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; VF2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; VF2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
-; VF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
-; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2
-; VF2-NEXT: [[TMP2:%.*]] = lshr <2 x i64> [[VEC_IND]], zeroinitializer
-; VF2-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i32 0
-; VF2-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP2]], i32 1
+; VF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 2
+; VF2-NEXT: [[TMP1:%.*]] = lshr <2 x i64> [[VEC_IND]], zeroinitializer
+; VF2-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0
+; VF2-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
+; VF2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]]
; VF2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]]
-; VF2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]]
+; VF2-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8
; VF2-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 8
-; VF2-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP6]], align 8
-; VF2-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> poison, i64 [[TMP7]], i32 0
-; VF2-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> [[TMP9]], i64 [[TMP8]], i32 1
-; VF2-NEXT: [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], splat (i64 42)
-; VF2-NEXT: [[TMP12:%.*]] = extractelement <2 x i64> [[TMP11]], i32 0
-; VF2-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP11]], i32 1
+; VF2-NEXT: [[TMP8:%.*]] = insertelement <2 x i64> poison, i64 [[TMP6]], i32 0
+; VF2-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> [[TMP8]], i64 [[TMP7]], i32 1
+; VF2-NEXT: [[TMP10:%.*]] = add nsw <2 x i64> [[TMP9]], splat (i64 42)
+; VF2-NEXT: [[TMP11:%.*]] = extractelement <2 x i64> [[TMP10]], i32 0
+; VF2-NEXT: [[TMP12:%.*]] = extractelement <2 x i64> [[TMP10]], i32 1
+; VF2-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
; VF2-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
-; VF2-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
+; VF2-NEXT: store i64 [[TMP11]], ptr [[TMP13]], align 8
; VF2-NEXT: store i64 [[TMP12]], ptr [[TMP14]], align 8
-; VF2-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8
; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; VF2-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 4)
-; VF2-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500
-; VF2-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
+; VF2-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500
+; VF2-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; VF2: middle.block:
; VF2-NEXT: br label [[EXIT:%.*]]
; VF2: exit:
@@ -266,44 +265,43 @@ define void @ld_lshr0_step2_start0_ind1(ptr noalias %A, ptr noalias %B) {
; VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 2, i64 4, i64 6>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; VF4-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
-; VF4-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
-; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2
-; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4
-; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6
-; VF4-NEXT: [[TMP4:%.*]] = lshr <4 x i64> [[VEC_IND]], zeroinitializer
-; VF4-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[TMP4]], i32 0
-; VF4-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP4]], i32 1
-; VF4-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP4]], i32 2
-; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP4]], i32 3
+; VF4-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 2
+; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 4
+; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 6
+; VF4-NEXT: [[TMP3:%.*]] = lshr <4 x i64> [[VEC_IND]], zeroinitializer
+; VF4-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0
+; VF4-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[TMP3]], i32 1
+; VF4-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP3]], i32 2
+; VF4-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP3]], i32 3
+; VF4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]]
; VF4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]]
; VF4-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP6]]
; VF4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]]
-; VF4-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP8]]
+; VF4-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP8]], align 8
; VF4-NEXT: [[TMP13:%.*]] = load i64, ptr [[TMP9]], align 8
; VF4-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP10]], align 8
; VF4-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP11]], align 8
-; VF4-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP12]], align 8
-; VF4-NEXT: [[TMP17:%.*]] = insertelement <4 x i64> poison, i64 [[TMP13]], i32 0
-; VF4-NEXT: [[TMP18:%.*]] = insertelement <4 x i64> [[TMP17]], i64 [[TMP14]], i32 1
-; VF4-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> [[TMP18]], i64 [[TMP15]], i32 2
-; VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x i64> [[TMP19]], i64 [[TMP16]], i32 3
-; VF4-NEXT: [[TMP21:%.*]] = add nsw <4 x i64> [[TMP20]], splat (i64 42)
-; VF4-NEXT: [[TMP22:%.*]] = extractelement <4 x i64> [[TMP21]], i32 0
-; VF4-NEXT: [[TMP23:%.*]] = extractelement <4 x i64> [[TMP21]], i32 1
-; VF4-NEXT: [[TMP24:%.*]] = extractelement <4 x i64> [[TMP21]], i32 2
-; VF4-NEXT: [[TMP25:%.*]] = extractelement <4 x i64> [[TMP21]], i32 3
+; VF4-NEXT: [[TMP16:%.*]] = insertelement <4 x i64> poison, i64 [[TMP12]], i32 0
+; VF4-NEXT: [[TMP17:%.*]] = insertelement <4 x i64> [[TMP16]], i64 [[TMP13]], i32 1
+; VF4-NEXT: [[TMP18:%.*]] = insertelement <4 x i64> [[TMP17]], i64 [[TMP14]], i32 2
+; VF4-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> [[TMP18]], i64 [[TMP15]], i32 3
+; VF4-NEXT: [[TMP20:%.*]] = add nsw <4 x i64> [[TMP19]], splat (i64 42)
+; VF4-NEXT: [[TMP21:%.*]] = extractelement <4 x i64> [[TMP20]], i32 0
+; VF4-NEXT: [[TMP22:%.*]] = extractelement <4 x i64> [[TMP20]], i32 1
+; VF4-NEXT: [[TMP23:%.*]] = extractelement <4 x i64> [[TMP20]], i32 2
+; VF4-NEXT: [[TMP24:%.*]] = extractelement <4 x i64> [[TMP20]], i32 3
+; VF4-NEXT: [[TMP25:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
; VF4-NEXT: [[TMP26:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
; VF4-NEXT: [[TMP27:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
; VF4-NEXT: [[TMP28:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
-; VF4-NEXT: [[TMP29:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]]
+; VF4-NEXT: store i64 [[TMP21]], ptr [[TMP25]], align 8
; VF4-NEXT: store i64 [[TMP22]], ptr [[TMP26]], align 8
; VF4-NEXT: store i64 [[TMP23]], ptr [[TMP27]], align 8
; VF4-NEXT: store i64 [[TMP24]], ptr [[TMP28]], align 8
-; VF4-NEXT: store i64 [[TMP25]], ptr [[TMP29]], align 8
; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 8)
-; VF4-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500
-; VF4-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
+; VF4-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500
+; VF4-NEXT: br i1 [[TMP29]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; VF4: middle.block:
; VF4-NEXT: br label [[EXIT:%.*]]
; VF4: exit:
@@ -337,21 +335,20 @@ define void @ld_lshr1_step2_start0_ind1(ptr noalias %A, ptr noalias %B) {
; VF2: vector.body:
; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; VF2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
-; VF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
-; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2
-; VF2-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP0]], 1
-; VF2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]]
-; VF2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP3]], align 8
-; VF2-NEXT: [[TMP4:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], splat (i64 42)
-; VF2-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0
-; VF2-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP4]], i32 1
+; VF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 2
+; VF2-NEXT: [[TMP1:%.*]] = lshr i64 [[OFFSET_IDX]], 1
+; VF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]]
+; VF2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP2]], align 8
+; VF2-NEXT: [[TMP3:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], splat (i64 42)
+; VF2-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0
+; VF2-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1
+; VF2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
; VF2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
-; VF2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
+; VF2-NEXT: store i64 [[TMP4]], ptr [[TMP6]], align 8
; VF2-NEXT: store i64 [[TMP5]], ptr [[TMP7]], align 8
-; VF2-NEXT: store i64 [[TMP6]], ptr [[TMP8]], align 8
; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; VF2-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500
-; VF2-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
+; VF2-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500
+; VF2-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; VF2: middle.block:
; VF2-NEXT: br label [[EXIT:%.*]]
; VF2: exit:
@@ -366,29 +363,28 @@ define void @ld_lshr1_step2_start0_ind1(ptr noalias %A, ptr noalias %B) {
; VF4: vector.body:
; VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; VF4-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
-; VF4-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
-; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2
-; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4
-; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6
-; VF4-NEXT: [[TMP4:%.*]] = lshr i64 [[TMP0]], 1
-; VF4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]]
-; VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP5]], align 8
-; VF4-NEXT: [[TMP6:%.*]] = add nsw <4 x i64> [[WIDE_LOAD]], splat (i64 42)
-; VF4-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP6]], i32 0
-; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP6]], i32 1
-; VF4-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP6]], i32 2
-; VF4-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[TMP6]], i32 3
+; VF4-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 2
+; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 4
+; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 6
+; VF4-NEXT: [[TMP3:%.*]] = lshr i64 [[OFFSET_IDX]], 1
+; VF4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]]
+; VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8
+; VF4-NEXT: [[TMP5:%.*]] = add nsw <4 x i64> [[WIDE_LOAD]], splat (i64 42)
+; VF4-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP5]], i32 0
+; VF4-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP5]], i32 1
+; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP5]], i32 2
+; VF4-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP5]], i32 3
+; VF4-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
; VF4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
; VF4-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
; VF4-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
-; VF4-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]]
+; VF4-NEXT: store i64 [[TMP6]], ptr [[TMP10]], align 8
; VF4-NEXT: store i64 [[TMP7]], ptr [[TMP11]], align 8
; VF4-NEXT: store i64 [[TMP8]], ptr [[TMP12]], align 8
; VF4-NEXT: store i64 [[TMP9]], ptr [[TMP13]], align 8
-; VF4-NEXT: store i64 [[TMP10]], ptr [[TMP14]], align 8
; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; VF4-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500
-; VF4-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
+; VF4-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500
+; VF4-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; VF4: middle.block:
; VF4-NEXT: br label [[EXIT:%.*]]
; VF4: exit:
@@ -423,28 +419,27 @@ define void @ld_lshr0_step3_start0_ind1(ptr noalias %A, ptr noalias %B) {
; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; VF2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; VF2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3
-; VF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
-; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3
-; VF2-NEXT: [[TMP2:%.*]] = lshr <2 x i64> [[VEC_IND]], zeroinitializer
-; VF2-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i32 0
-; VF2-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP2]], i32 1
+; VF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 3
+; VF2-NEXT: [[TMP1:%.*]] = lshr <2 x i64> [[VEC_IND]], zeroinitializer
+; VF2-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0
+; VF2-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
+; VF2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]]
; VF2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]]
-; VF2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]]
+; VF2-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8
; VF2-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 8
-; VF2-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP6]], align 8
-; VF2-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> poison, i64 [[TMP7]], i32 0
-; VF2-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> [[TMP9]], i64 [[TMP8]], i32 1
-; VF2-NEXT: [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], splat (i64 42)
-; VF2-NEXT: [[TMP12:%.*]] = extractelement <2 x i64> [[TMP11]], i32 0
-; VF2-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP11]], i32 1
+; VF2-NEXT: [[TMP8:%.*]] = insertelement <2 x i64> poison, i64 [[TMP6]], i32 0
+; VF2-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> [[TMP8]], i64 [[TMP7]], i32 1
+; VF2-NEXT: [[TMP10:%.*]] = add nsw <2 x i64> [[TMP9]], splat (i64 42)
+; VF2-NEXT: [[TMP11:%.*]] = extractelement <2 x i64> [[TMP10]], i32 0
+; VF2-NEXT: [[TMP12:%.*]] = extractelement <2 x i64> [[TMP10]], i32 1
+; VF2-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
; VF2-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
-; VF2-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
+; VF2-NEXT: store i64 [[TMP11]], ptr [[TMP13]], align 8
; VF2-NEXT: store i64 [[TMP12]], ptr [[TMP14]], align 8
-; VF2-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8
; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; VF2-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 6)
-; VF2-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332
-; VF2-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
+; VF2-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332
+; VF2-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
; VF2: middle.block:
; VF2-NEXT: br label [[SCALAR_PH:%.*]]
; VF2: scalar.ph:
@@ -459,44 +454,43 @@ define void @ld_lshr0_step3_start0_ind1(ptr noalias %A, ptr noalias %B) {
; VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 3, i64 6, i64 9>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; VF4-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3
-; VF4-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
-; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3
-; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 6
-; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 9
-; VF4-NEXT: [[TMP4:%.*]] = lshr <4 x i64> [[VEC_IND]], zeroinitializer
-; VF4-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[TMP4]], i32 0
-; VF4-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP4]], i32 1
-; VF4-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP4]], i32 2
-; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP4]], i32 3
+; VF4-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 3
+; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 6
+; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 9
+; VF4-NEXT: [[TMP3:%.*]] = lshr <4 x i64> [[VEC_IND]], zeroinitializer
+; VF4-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0
+; VF4-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[TMP3]], i32 1
+; VF4-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP3]], i32 2
+; VF4-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP3]], i32 3
+; VF4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]]
; VF4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]]
; VF4-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP6]]
; VF4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]]
-; VF4-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP8]]
+; VF4-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP8]], align 8
; VF4-NEXT: [[TMP13:%.*]] = load i64, ptr [[TMP9]], align 8
; VF4-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP10]], align 8
; VF4-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP11]], align 8
-; VF4-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP12]], align 8
-; VF4-NEXT: [[TMP17:%.*]] = insertelement <4 x i64> poison, i64 [[TMP13]], i32 0
-; VF4-NEXT: [[TMP18:%.*]] = insertelement <4 x i64> [[TMP17]], i64 [[TMP14]], i32 1
-; VF4-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> [[TMP18]], i64 [[TMP15]], i32 2
-; VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x i64> [[TMP19]], i64 [[TMP16]], i32 3
-; VF4-NEXT: [[TMP21:%.*]] = add nsw <4 x i64> [[TMP20]], splat (i64 42)
-; VF4-NEXT: [[TMP22:%.*]] = extractelement <4 x i64> [[TMP21]], i32 0
-; VF4-NEXT: [[TMP23:%.*]] = extractelement <4 x i64> [[TMP21]], i32 1
-; VF4-NEXT: [[TMP24:%.*]] = extractelement <4 x i64> [[TMP21]], i32 2
-; VF4-NEXT: [[TMP25:%.*]] = extractelement <4 x i64> [[TMP21]], i32 3
+; VF4-NEXT: [[TMP16:%.*]] = insertelement <4 x i64> poison, i64 [[TMP12]], i32 0
+; VF4-NEXT: [[TMP17:%.*]] = insertelement <4 x i64> [[TMP16]], i64 [[TMP13]], i32 1
+; VF4-NEXT: [[TMP18:%.*]] = insertelement <4 x i64> [[TMP17]], i64 [[TMP14]], i32 2
+; VF4-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> [[TMP18]], i64 [[TMP15]], i32 3
+; VF4-NEXT: [[TMP20:%.*]] = add nsw <4 x i64> [[TMP19]], splat (i64 42)
+; VF4-NEXT: [[TMP21:%.*]] = extractelement <4 x i64> [[TMP20]], i32 0
+; VF4-NEXT: [[TMP22:%.*]] = extractelement <4 x i64> [[TMP20]], i32 1
+; VF4-NEXT: [[TMP23:%.*]] = extractelement <4 x i64> [[TMP20]], i32 2
+; VF4-NEXT: [[TMP24:%.*]] = extractelement <4 x i64> [[TMP20]], i32 3
+; VF4-NEXT: [[TMP25:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
; VF4-NEXT: [[TMP26:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
; VF4-NEXT: [[TMP27:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
; VF4-NEXT: [[TMP28:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
-; VF4-NEXT: [[TMP29:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]]
+; VF4-NEXT: store i64 [[TMP21]], ptr [[TMP25]], align 8
; VF4-NEXT: store i64 [[TMP22]], ptr [[TMP26]], align 8
; VF4-NEXT: store i64 [[TMP23]], ptr [[TMP27]], align 8
; VF4-NEXT: store i64 [[TMP24]], ptr [[TMP28]], align 8
-; VF4-NEXT: store i64 [[TMP25]], ptr [[TMP29]], align 8
; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 12)
-; VF4-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332
-; VF4-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
+; VF4-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332
+; VF4-NEXT: br i1 [[TMP29]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
; VF4: middle.block:
; VF4-NEXT: br label [[SCALAR_PH:%.*]]
; VF4: scalar.ph:
@@ -530,28 +524,27 @@ define void @ld_lshr1_step3_start0_ind1(ptr noalias %A, ptr noalias %B) {
; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; VF2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; VF2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3
-; VF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
-; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3
-; VF2-NEXT: [[TMP2:%.*]] = lshr <2 x i64> [[VEC_IND]], splat (i64 1)
-; VF2-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i32 0
-; VF2-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP2]], i32 1
+; VF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 3
+; VF2-NEXT: [[TMP1:%.*]] = lshr <2 x i64> [[VEC_IND]], splat (i64 1)
+; VF2-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0
+; VF2-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
+; VF2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]]
; VF2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]]
-; VF2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]]
+; VF2-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8
; VF2-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 8
-; VF2-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP6]], align 8
-; VF2-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> poison, i64 [[TMP7]], i32 0
-; VF2-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> [[TMP9]], i64 [[TMP8]], i32 1
-; VF2-NEXT: [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], splat (i64 42)
-; VF2-NEXT: [[TMP12:%.*]] = extractelement <2 x i64> [[TMP11]], i32 0
-; VF2-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP11]], i32 1
+; VF2-NEXT: [[TMP8:%.*]] = insertelement <2 x i64> poison, i64 [[TMP6]], i32 0
+; VF2-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> [[TMP8]], i64 [[TMP7]], i32 1
+; VF2-NEXT: [[TMP10:%.*]] = add nsw <2 x i64> [[TMP9]], splat (i64 42)
+; VF2-NEXT: [[TMP11:%.*]] = extractelement <2 x i64> [[TMP10]], i32 0
+; VF2-NEXT: [[TMP12:%.*]] = extractelement <2 x i64> [[TMP10]], i32 1
+; VF2-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
; VF2-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
-; VF2-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
+; VF2-NEXT: store i64 [[TMP11]], ptr [[TMP13]], align 8
; VF2-NEXT: store i64 [[TMP12]], ptr [[TMP14]], align 8
-; VF2-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8
; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; VF2-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 6)
-; VF2-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332
-; VF2-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
+; VF2-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332
+; VF2-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
; VF2: middle.block:
; VF2-NEXT: br label [[SCALAR_PH:%.*]]
; VF2: scalar.ph:
@@ -566,44 +559,43 @@ define void @ld_lshr1_step3_start0_ind1(ptr noalias %A, ptr noalias %B) {
; VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 3, i64 6, i64 9>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; VF4-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3
-; VF4-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
-; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3
-; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 6
-; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 9
-; VF4-NEXT: [[TMP4:%.*]] = lshr <4 x i64> [[VEC_IND]], splat (i64 1)
-; VF4-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[TMP4]], i32 0
-; VF4-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP4]], i32 1
-; VF4-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP4]], i32 2
-; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP4]], i32 3
+; VF4-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 3
+; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 6
+; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 9
+; VF4-NEXT: [[TMP3:%.*]] = lshr <4 x i64> [[VEC_IND]], splat (i64 1)
+; VF4-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0
+; VF4-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[TMP3]], i32 1
+; VF4-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP3]], i32 2
+; VF4-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP3]], i32 3
+; VF4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]]
; VF4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]]
; VF4-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP6]]
; VF4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]]
-; VF4-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP8]]
+; VF4-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP8]], align 8
; VF4-NEXT: [[TMP13:%.*]] = load i64, ptr [[TMP9]], align 8
; VF4-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP10]], align 8
; VF4-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP11]], align 8
-; VF4-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP12]], align 8
-; VF4-NEXT: [[TMP17:%.*]] = insertelement <4 x i64> poison, i64 [[TMP13]], i32 0
-; VF4-NEXT: [[TMP18:%.*]] = insertelement <4 x i64> [[TMP17]], i64 [[TMP14]], i32 1
-; VF4-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> [[TMP18]], i64 [[TMP15]], i32 2
-; VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x i64> [[TMP19]], i64 [[TMP16]], i32 3
-; VF4-NEXT: [[TMP21:%.*]] = add nsw <4 x i64> [[TMP20]], splat (i64 42)
-; VF4-NEXT: [[TMP22:%.*]] = extractelement <4 x i64> [[TMP21]], i32 0
-; VF4-NEXT: [[TMP23:%.*]] = extractelement <4 x i64> [[TMP21]], i32 1
-; VF4-NEXT: [[TMP24:%.*]] = extractelement <4 x i64> [[TMP21]], i32 2
-; VF4-NEXT: [[TMP25:%.*]] = extractelement <4 x i64> [[TMP21]], i32 3
+; VF4-NEXT: [[TMP16:%.*]] = insertelement <4 x i64> poison, i64 [[TMP12]], i32 0
+; VF4-NEXT: [[TMP17:%.*]] = insertelement <4 x i64> [[TMP16]], i64 [[TMP13]], i32 1
+; VF4-NEXT: [[TMP18:%.*]] = insertelement <4 x i64> [[TMP17]], i64 [[TMP14]], i32 2
+; VF4-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> [[TMP18]], i64 [[TMP15]], i32 3
+; VF4-NEXT: [[TMP20:%.*]] = add nsw <4 x i64> [[TMP19]], splat (i64 42)
+; VF4-NEXT: [[TMP21:%.*]] = extractelement <4 x i64> [[TMP20]], i32 0
+; VF4-NEXT: [[TMP22:%.*]] = extractelement <4 x i64> [[TMP20]], i32 1
+; VF4-NEXT: [[TMP23:%.*]] = extractelement <4 x i64> [[TMP20]], i32 2
+; VF4-NEXT: [[TMP24:%.*]] = extractelement <4 x i64> [[TMP20]], i32 3
+; VF4-NEXT: [[TMP25:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
; VF4-NEXT: [[TMP26:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
; VF4-NEXT: [[TMP27:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
; VF4-NEXT: [[TMP28:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
-; VF4-NEXT: [[TMP29:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]]
+; VF4-NEXT: store i64 [[TMP21]], ptr [[TMP25]], align 8
; VF4-NEXT: store i64 [[TMP22]], ptr [[TMP26]], align 8
; VF4-NEXT: store i64 [[TMP23]], ptr [[TMP27]], align 8
; VF4-NEXT: store i64 [[TMP24]], ptr [[TMP28]], align 8
-; VF4-NEXT: store i64 [[TMP25]], ptr [[TMP29]], align 8
; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 12)
-; VF4-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332
-; VF4-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
+; VF4-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332
+; VF4-NEXT: br i1 [[TMP29]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
; VF4: middle.block:
; VF4-NEXT: br label [[SCALAR_PH:%.*]]
; VF4: scalar.ph:
@@ -725,21 +717,20 @@ define void @ld_lshr1_step2_start1_ind1(ptr noalias %A, ptr noalias %B) {
; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; VF2-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 2
; VF2-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]]
-; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
-; VF2-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2
-; VF2-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP1]], 1
-; VF2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]]
-; VF2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP4]], align 8
-; VF2-NEXT: [[TMP5:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], splat (i64 42)
-; VF2-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP5]], i32 0
-; VF2-NEXT: [[TMP7:%.*]] = extractelement <2 x i64> [[TMP5]], i32 1
+; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2
+; VF2-NEXT: [[TMP2:%.*]] = lshr i64 [[OFFSET_IDX]], 1
+; VF2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]]
+; VF2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP3]], align 8
+; VF2-NEXT: [[TMP4:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], splat (i64 42)
+; VF2-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0
+; VF2-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP4]], i32 1
+; VF2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
; VF2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
-; VF2-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
+; VF2-NEXT: store i64 [[TMP5]], ptr [[TMP7]], align 8
; VF2-NEXT: store i64 [[TMP6]], ptr [[TMP8]], align 8
-; VF2-NEXT: store i64 [[TMP7]], ptr [[TMP9]], align 8
; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; VF2-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 498
-; VF2-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
+; VF2-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 498
+; VF2-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
; VF2: middle.block:
; VF2-NEXT: br label [[SCALAR_PH:%.*]]
; VF2: scalar.ph:
@@ -754,29 +745,28 @@ define void @ld_lshr1_step2_start1_ind1(ptr noalias %A, ptr noalias %B) {
; VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; VF4-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 2
; VF4-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]]
-; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
-; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2
-; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 4
-; VF4-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 6
-; VF4-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP1]], 1
-; VF4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]]
-; VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP6]], align 8
-; VF4-NEXT: [[TMP7:%.*]] = add nsw <4 x i64> [[WIDE_LOAD]], splat (i64 42)
-; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP7]], i32 0
-; VF4-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP7]], i32 1
-; VF4-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[TMP7]], i32 2
-; VF4-NEXT: [[TMP11:%.*]] = extractelement <4 x i64> [[TMP7]], i32 3
+; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2
+; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4
+; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6
+; VF4-NEXT: [[TMP4:%.*]] = lshr i64 [[OFFSET_IDX]], 1
+; VF4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]]
+; VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP5]], align 8
+; VF4-NEXT: [[TMP6:%.*]] = add nsw <4 x i64> [[WIDE_LOAD]], splat (i64 42)
+; VF4-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP6]], i32 0
+; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP6]], i32 1
+; VF4-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP6]], i32 2
+; VF4-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[TMP6]], i32 3
+; VF4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
; VF4-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
; VF4-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
; VF4-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]]
-; VF4-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP4]]
+; VF4-NEXT: store i64 [[TMP7]], ptr [[TMP11]], align 8
; VF4-NEXT: store i64 [[TMP8]], ptr [[TMP12]], align 8
; VF4-NEXT: store i64 [[TMP9]], ptr [[TMP13]], align 8
; VF4-NEXT: store i64 [[TMP10]], ptr [[TMP14]], align 8
-; VF4-NEXT: store i64 [[TMP11]], ptr [[TMP15]], align 8
; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; VF4-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 496
-; VF4-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
+; VF4-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 496
+; VF4-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
; VF4: middle.block:
; VF4-NEXT: br label [[SCALAR_PH:%.*]]
; VF4: scalar.ph:
@@ -811,28 +801,27 @@ define void @ld_lshr1_step3_start1_ind1(ptr noalias %A, ptr noalias %B) {
; VF2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 1, i64 4>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; VF2-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 3
; VF2-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]]
-; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
-; VF2-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3
-; VF2-NEXT: [[TMP3:%.*]] = lshr <2 x i64> [[VEC_IND]], splat (i64 1)
-; VF2-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0
-; VF2-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1
+; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3
+; VF2-NEXT: [[TMP2:%.*]] = lshr <2 x i64> [[VEC_IND]], splat (i64 1)
+; VF2-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i32 0
+; VF2-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP2]], i32 1
+; VF2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]]
; VF2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]]
-; VF2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]]
+; VF2-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 8
; VF2-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP6]], align 8
-; VF2-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP7]], align 8
-; VF2-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> poison, i64 [[TMP8]], i32 0
-; VF2-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> [[TMP10]], i64 [[TMP9]], i32 1
-; VF2-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], splat (i64 42)
-; VF2-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP12]], i32 0
-; VF2-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP12]], i32 1
+; VF2-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> poison, i64 [[TMP7]], i32 0
+; VF2-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> [[TMP9]], i64 [[TMP8]], i32 1
+; VF2-NEXT: [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], splat (i64 42)
+; VF2-NEXT: [[TMP12:%.*]] = extractelement <2 x i64> [[TMP11]], i32 0
+; VF2-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP11]], i32 1
+; VF2-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
; VF2-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
-; VF2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
+; VF2-NEXT: store i64 [[TMP12]], ptr [[TMP14]], align 8
; VF2-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8
-; VF2-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8
; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; VF2-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 6)
-; VF2-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332
-; VF2-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
+; VF2-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332
+; VF2-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
; VF2: middle.block:
; VF2-NEXT: br label [[SCALAR_PH:%.*]]
; VF2: scalar.ph:
@@ -848,44 +837,43 @@ define void @ld_lshr1_step3_start1_ind1(ptr noalias %A, ptr noalias %B) {
; VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 1, i64 4, i64 7, i64 10>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; VF4-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 3
; VF4-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]]
-; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
-; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3
-; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6
-; VF4-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 9
-; VF4-NEXT: [[TMP5:%.*]] = lshr <4 x i64> [[VEC_IND]], splat (i64 1)
-; VF4-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP5]], i32 0
-; VF4-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP5]], i32 1
-; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP5]], i32 2
-; VF4-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP5]], i32 3
+; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3
+; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 6
+; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 9
+; VF4-NEXT: [[TMP4:%.*]] = lshr <4 x i64> [[VEC_IND]], splat (i64 1)
+; VF4-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[TMP4]], i32 0
+; VF4-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP4]], i32 1
+; VF4-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP4]], i32 2
+; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP4]], i32 3
+; VF4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]]
; VF4-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP6]]
; VF4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]]
; VF4-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP8]]
-; VF4-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP9]]
+; VF4-NEXT: [[TMP13:%.*]] = load i64, ptr [[TMP9]], align 8
; VF4-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP10]], align 8
; VF4-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP11]], align 8
; VF4-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP12]], align 8
-; VF4-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP13]], align 8
-; VF4-NEXT: [[TMP18:%.*]] = insertelement <4 x i64> poison, i64 [[TMP14]], i32 0
-; VF4-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> [[TMP18]], i64 [[TMP15]], i32 1
-; VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x i64> [[TMP19]], i64 [[TMP16]], i32 2
-; VF4-NEXT: [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 3
-; VF4-NEXT: [[TMP22:%.*]] = add nsw <4 x i64> [[TMP21]], splat (i64 42)
-; VF4-NEXT: [[TMP23:%.*]] = extractelement <4 x i64> [[TMP22]], i32 0
-; VF4-NEXT: [[TMP24:%.*]] = extractelement <4 x i64> [[TMP22]], i32 1
-; VF4-NEXT: [[TMP25:%.*]] = extractelement <4 x i64> [[TMP22]], i32 2
-; VF4-NEXT: [[TMP26:%.*]] = extractelement <4 x i64> [[TMP22]], i32 3
+; VF4-NEXT: [[TMP17:%.*]] = insertelement <4 x i64> poison, i64 [[TMP13]], i32 0
+; VF4-NEXT: [[TMP18:%.*]] = insertelement <4 x i64> [[TMP17]], i64 [[TMP14]], i32 1
+; VF4-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> [[TMP18]], i64 [[TMP15]], i32 2
+; VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x i64> [[TMP19]], i64 [[TMP16]], i32 3
+; VF4-NEXT: [[TMP21:%.*]] = add nsw <4 x i64> [[TMP20]], splat (i64 42)
+; VF4-NEXT: [[TMP22:%.*]] = extractelement <4 x i64> [[TMP21]], i32 0
+; VF4-NEXT: [[TMP23:%.*]] = extractelement <4 x i64> [[TMP21]], i32 1
+; VF4-NEXT: [[TMP24:%.*]] = extractelement <4 x i64> [[TMP21]], i32 2
+; VF4-NEXT: [[TMP25:%.*]] = extractelement <4 x i64> [[TMP21]], i32 3
+; VF4-NEXT: [[TMP26:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
; VF4-NEXT: [[TMP27:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
; VF4-NEXT: [[TMP28:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
; VF4-NEXT: [[TMP29:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]]
-; VF4-NEXT: [[TMP30:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP4]]
+; VF4-NEXT: store i64 [[TMP22]], ptr [[TMP26]], align 8
; VF4-NEXT: store i64 [[TMP23]], ptr [[TMP27]], align 8
; VF4-NEXT: store i64 [[TMP24]], ptr [[TMP28]], align 8
; VF4-NEXT: store i64 [[TMP25]], ptr [[TMP29]], align 8
-; VF4-NEXT: store i64 [[TMP26]], ptr [[TMP30]], align 8
; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 12)
-; VF4-NEXT: [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332
-; VF4-NEXT: br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
+; VF4-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332
+; VF4-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
; VF4: middle.block:
; VF4-NEXT: br label [[SCALAR_PH:%.*]]
; VF4: scalar.ph:
@@ -920,28 +908,27 @@ define void @ld_lshr2_step3_start1_ind1(ptr noalias %A, ptr noalias %B) {
; VF2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 1, i64 4>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; VF2-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 3
; VF2-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]]
-; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
-; VF2-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3
-; VF2-NEXT: [[TMP3:%.*]] = lshr <2 x i64> [[VEC_IND]], splat (i64 2)
-; VF2-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0
-; VF2-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1
+; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3
+; VF2-NEXT: [[TMP2:%.*]] = lshr <2 x i64> [[VEC_IND]], splat (i64 2)
+; VF2-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i32 0
+; VF2-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP2]], i32 1
+; VF2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]]
; VF2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]]
-; VF2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]]
+; VF2-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 8
; VF2-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP6]], align 8
-; VF2-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP7]], align 8
-; VF2-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> poison, i64 [[TMP8]], i32 0
-; VF2-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> [[TMP10]], i64 [[TMP9]], i32 1
-; VF2-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], splat (i64 42)
-; VF2-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP12]], i32 0
-; VF2-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP12]], i32 1
+; VF2-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> poison, i64 [[TMP7]], i32 0
+; VF2-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> [[TMP9]], i64 [[TMP8]], i32 1
+; VF2-NEXT: [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], splat (i64 42)
+; VF2-NEXT: [[TMP12:%.*]] = extractelement <2 x i64> [[TMP11]], i32 0
+; VF2-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP11]], i32 1
+; VF2-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
; VF2-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
-; VF2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
+; VF2-NEXT: store i64 [[TMP12]], ptr [[TMP14]], align 8
; VF2-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8
-; VF2-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8
; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; VF2-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 6)
-; VF2-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332
-; VF2-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
+; VF2-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332
+; VF2-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
; VF2: middle.block:
; VF2-NEXT: br label [[SCALAR_PH:%.*]]
; VF2: scalar.ph:
@@ -957,44 +944,43 @@ define void @ld_lshr2_step3_start1_ind1(ptr noalias %A, ptr noalias %B) {
; VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 1, i64 4, i64 7, i64 10>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; VF4-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 3
; VF4-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]]
-; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
-; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3
-; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6
-; VF4-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 9
-; VF4-NEXT: [[TMP5:%.*]] = lshr <4 x i64> [[VEC_IND]], splat (i64 2)
-; VF4-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP5]], i32 0
-; VF4-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP5]], i32 1
-; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP5]], i32 2
-; VF4-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP5]], i32 3
+; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3
+; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 6
+; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 9
+; VF4-NEXT: [[TMP4:%.*]] = lshr <4 x i64> [[VEC_IND]], splat (i64 2)
+; VF4-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[TMP4]], i32 0
+; VF4-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP4]], i32 1
+; VF4-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP4]], i32 2
+; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP4]], i32 3
+; VF4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]]
; VF4-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP6]]
; VF4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]]
; VF4-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP8]]
-; VF4-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP9]]
+; VF4-NEXT: [[TMP13:%.*]] = load i64, ptr [[TMP9]], align 8
; VF4-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP10]], align 8
; VF4-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP11]], align 8
; VF4-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP12]], align 8
-; VF4-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP13]], align 8
-; VF4-NEXT: [[TMP18:%.*]] = insertelement <4 x i64> poison, i64 [[TMP14]], i32 0
-; VF4-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> [[TMP18]], i64 [[TMP15]], i32 1
-; VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x i64> [[TMP19]], i64 [[TMP16]], i32 2
-; VF4-NEXT: [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 3
-; VF4-NEXT: [[TMP22:%.*]] = add nsw <4 x i64> [[TMP21]], splat (i64 42)
-; VF4-NEXT: [[TMP23:%.*]] = extractelement <4 x i64> [[TMP22]], i32 0
-; VF4-NEXT: [[TMP24:%.*]] = extractelement <4 x i64> [[TMP22]], i32 1
-; VF4-NEXT: [[TMP25:%.*]] = extractelement <4 x i64> [[TMP22]], i32 2
-; VF4-NEXT: [[TMP26:%.*]] = extractelement <4 x i64> [[TMP22]], i32 3
+; VF4-NEXT: [[TMP17:%.*]] = insertelement <4 x i64> poison, i64 [[TMP13]], i32 0
+; VF4-NEXT: [[TMP18:%.*]] = insertelement <4 x i64> [[TMP17]], i64 [[TMP14]], i32 1
+; VF4-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> [[TMP18]], i64 [[TMP15]], i32 2
+; VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x i64> [[TMP19]], i64 [[TMP16]], i32 3
+; VF4-NEXT: [[TMP21:%.*]] = add nsw <4 x i64> [[TMP20]], splat (i64 42)
+; VF4-NEXT: [[TMP22:%.*]] = extractelement <4 x i64> [[TMP21]], i32 0
+; VF4-NEXT: [[TMP23:%.*]] = extractelement <4 x i64> [[TMP21]], i32 1
+; VF4-NEXT: [[TMP24:%.*]] = extractelement <4 x i64> [[TMP21]], i32 2
+; VF4-NEXT: [[TMP25:%.*]] = extractelement <4 x i64> [[TMP21]], i32 3
+; VF4-NEXT: [[TMP26:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
; VF4-NEXT: [[TMP27:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
; VF4-NEXT: [[TMP28:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
; VF4-NEXT: [[TMP29:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]]
-; VF4-NEXT: [[TMP30:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP4]]
+; VF4-NEXT: store i64 [[TMP22]], ptr [[TMP26]], align 8
; VF4-NEXT: store i64 [[TMP23]], ptr [[TMP27]], align 8
; VF4-NEXT: store i64 [[TMP24]], ptr [[TMP28]], align 8
; VF4-NEXT: store i64 [[TMP25]], ptr [[TMP29]], align 8
-; VF4-NEXT: store i64 [[TMP26]], ptr [[TMP30]], align 8
; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 12)
-; VF4-NEXT: [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332
-; VF4-NEXT: br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
+; VF4-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332
+; VF4-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
; VF4: middle.block:
; VF4-NEXT: br label [[SCALAR_PH:%.*]]
; VF4: scalar.ph:
diff --git a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction2.ll b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction2.ll
index fb962c017156d..42d7a75b4b6bc 100644
--- a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction2.ll
+++ b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction2.ll
@@ -304,31 +304,30 @@ define void @ld_div1_step2_start0_ind2(ptr noalias %A, ptr noalias %B) {
; VF2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; VF2-NEXT: [[VEC_IND1:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ]
; VF2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
-; VF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
-; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2
-; VF2-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 1)
-; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND1]], splat (i64 1)
-; VF2-NEXT: [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]]
-; VF2-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0
-; VF2-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP4]], i32 1
+; VF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 2
+; VF2-NEXT: [[TMP1:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 1)
+; VF2-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND1]], splat (i64 1)
+; VF2-NEXT: [[TMP3:%.*]] = add <2 x i64> [[TMP1]], [[TMP2]]
+; VF2-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0
+; VF2-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1
+; VF2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]]
; VF2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]]
-; VF2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP6]]
+; VF2-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP6]], align 8
; VF2-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP7]], align 8
-; VF2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 8
-; VF2-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> poison, i64 [[TMP9]], i32 0
-; VF2-NEXT: [[TMP12:%.*]] = insertelement <2 x i64> [[TMP11]], i64 [[TMP10]], i32 1
-; VF2-NEXT: [[TMP13:%.*]] = add nsw <2 x i64> [[TMP12]], splat (i64 42)
-; VF2-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP13]], i32 0
-; VF2-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP13]], i32 1
+; VF2-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> poison, i64 [[TMP8]], i32 0
+; VF2-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> [[TMP10]], i64 [[TMP9]], i32 1
+; VF2-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], splat (i64 42)
+; VF2-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP12]], i32 0
+; VF2-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP12]], i32 1
+; VF2-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
; VF2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
-; VF2-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
+; VF2-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8
; VF2-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8
-; VF2-NEXT: store i64 [[TMP15]], ptr [[TMP17]], align 8
; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; VF2-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 4)
; VF2-NEXT: [[VEC_IND_NEXT2]] = add nsw <2 x i64> [[VEC_IND1]], splat (i64 2)
-; VF2-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500
-; VF2-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
+; VF2-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500
+; VF2-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; VF2: middle.block:
; VF2-NEXT: br label [[EXIT:%.*]]
; VF2: exit:
@@ -345,47 +344,46 @@ define void @ld_div1_step2_start0_ind2(ptr noalias %A, ptr noalias %B) {
; VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 2, i64 4, i64 6>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; VF4-NEXT: [[VEC_IND1:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ]
; VF4-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
-; VF4-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
-; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2
-; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4
-; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6
-; VF4-NEXT: [[TMP4:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 1)
-; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND1]], splat (i64 1)
-; VF4-NEXT: [[TMP6:%.*]] = add <4 x i64> [[TMP4]], [[TMP5]]
-; VF4-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP6]], i32 0
-; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP6]], i32 1
-; VF4-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP6]], i32 2
-; VF4-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[TMP6]], i32 3
+; VF4-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 2
+; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 4
+; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 6
+; VF4-NEXT: [[TMP3:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 1)
+; VF4-NEXT: [[TMP4:%.*]] = udiv <4 x i64> [[VEC_IND1]], splat (i64 1)
+; VF4-NEXT: [[TMP5:%.*]] = add <4 x i64> [[TMP3]], [[TMP4]]
+; VF4-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP5]], i32 0
+; VF4-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP5]], i32 1
+; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP5]], i32 2
+; VF4-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP5]], i32 3
+; VF4-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP6]]
; VF4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]]
; VF4-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP8]]
; VF4-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP9]]
-; VF4-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP10]]
+; VF4-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP10]], align 8
; VF4-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP11]], align 8
; VF4-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP12]], align 8
; VF4-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP13]], align 8
-; VF4-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP14]], align 8
-; VF4-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> poison, i64 [[TMP15]], i32 0
-; VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x i64> [[TMP19]], i64 [[TMP16]], i32 1
-; VF4-NEXT: [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 2
-; VF4-NEXT: [[TMP22:%.*]] = insertelement <4 x i64> [[TMP21]], i64 [[TMP18]], i32 3
-; VF4-NEXT: [[TMP23:%.*]] = add nsw <4 x i64> [[TMP22]], splat (i64 42)
-; VF4-NEXT: [[TMP24:%.*]] = extractelement <4 x i64> [[TMP23]], i32 0
-; VF4-NEXT: [[TMP25:%.*]] = extractelement <4 x i64> [[TMP23]], i32 1
-; VF4-NEXT: [[TMP26:%.*]] = extractelement <4 x i64> [[TMP23]], i32 2
-; VF4-NEXT: [[TMP27:%.*]] = extractelement <4 x i64> [[TMP23]], i32 3
+; VF4-NEXT: [[TMP18:%.*]] = insertelement <4 x i64> poison, i64 [[TMP14]], i32 0
+; VF4-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> [[TMP18]], i64 [[TMP15]], i32 1
+; VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x i64> [[TMP19]], i64 [[TMP16]], i32 2
+; VF4-NEXT: [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 3
+; VF4-NEXT: [[TMP22:%.*]] = add nsw <4 x i64> [[TMP21]], splat (i64 42)
+; VF4-NEXT: [[TMP23:%.*]] = extractelement <4 x i64> [[TMP22]], i32 0
+; VF4-NEXT: [[TMP24:%.*]] = extractelement <4 x i64> [[TMP22]], i32 1
+; VF4-NEXT: [[TMP25:%.*]] = extractelement <4 x i64> [[TMP22]], i32 2
+; VF4-NEXT: [[TMP26:%.*]] = extractelement <4 x i64> [[TMP22]], i32 3
+; VF4-NEXT: [[TMP27:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
; VF4-NEXT: [[TMP28:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
; VF4-NEXT: [[TMP29:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
; VF4-NEXT: [[TMP30:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
-; VF4-NEXT: [[TMP31:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]]
+; VF4-NEXT: store i64 [[TMP23]], ptr [[TMP27]], align 8
; VF4-NEXT: store i64 [[TMP24]], ptr [[TMP28]], align 8
; VF4-NEXT: store i64 [[TMP25]], ptr [[TMP29]], align 8
; VF4-NEXT: store i64 [[TMP26]], ptr [[TMP30]], align 8
-; VF4-NEXT: store i64 [[TMP27]], ptr [[TMP31]], align 8
; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 8)
; VF4-NEXT: [[VEC_IND_NEXT2]] = add nsw <4 x i64> [[VEC_IND1]], splat (i64 4)
-; VF4-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500
-; VF4-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
+; VF4-NEXT: [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500
+; VF4-NEXT: br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; VF4: middle.block:
; VF4-NEXT: br label [[EXIT:%.*]]
; VF4: exit:
@@ -425,31 +423,30 @@ define void @ld_div2_step2_start0_ind2(ptr noalias %A, ptr noalias %B) {
; VF2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; VF2-NEXT: [[VEC_IND1:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ]
; VF2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
-; VF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
-; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2
-; VF2-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 2)
-; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND1]], splat (i64 2)
-; VF2-NEXT: [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]]
-; VF2-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0
-; VF2-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP4]], i32 1
+; VF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 2
+; VF2-NEXT: [[TMP1:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 2)
+; VF2-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND1]], splat (i64 2)
+; VF2-NEXT: [[TMP3:%.*]] = add <2 x i64> [[TMP1]], [[TMP2]]
+; VF2-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0
+; VF2-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1
+; VF2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]]
; VF2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]]
-; VF2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP6]]
+; VF2-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP6]], align 8
; VF2-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP7]], align 8
-; VF2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 8
-; VF2-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> poison, i64 [[TMP9]], i32 0
-; VF2-NEXT: [[TMP12:%.*]] = insertelement <2 x i64> [[TMP11]], i64 [[TMP10]], i32 1
-; VF2-NEXT: [[TMP13:%.*]] = add nsw <2 x i64> [[TMP12]], splat (i64 42)
-; VF2-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP13]], i32 0
-; VF2-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP13]], i32 1
+; VF2-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> poison, i64 [[TMP8]], i32 0
+; VF2-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> [[TMP10]], i64 [[TMP9]], i32 1
+; VF2-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], splat (i64 42)
+; VF2-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP12]], i32 0
+; VF2-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP12]], i32 1
+; VF2-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
; VF2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
-; VF2-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
+; VF2-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8
; VF2-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8
-; VF2-NEXT: store i64 [[TMP15]], ptr [[TMP17]], align 8
; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; VF2-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 4)
; VF2-NEXT: [[VEC_IND_NEXT2]] = add nsw <2 x i64> [[VEC_IND1]], splat (i64 2)
-; VF2-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500
-; VF2-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
+; VF2-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500
+; VF2-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; VF2: middle.block:
; VF2-NEXT: br label [[EXIT:%.*]]
; VF2: exit:
@@ -466,47 +463,46 @@ define void @ld_div2_step2_start0_ind2(ptr noalias %A, ptr noalias %B) {
; VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 2, i64 4, i64 6>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; VF4-NEXT: [[VEC_IND1:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ]
; VF4-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
-; VF4-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
-; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2
-; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4
-; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6
-; VF4-NEXT: [[TMP4:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 2)
-; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND1]], splat (i64 2)
-; VF4-NEXT: [[TMP6:%.*]] = add <4 x i64> [[TMP4]], [[TMP5]]
-; VF4-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP6]], i32 0
-; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP6]], i32 1
-; VF4-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP6]], i32 2
-; VF4-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[TMP6]], i32 3
+; VF4-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 2
+; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 4
+; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 6
+; VF4-NEXT: [[TMP3:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 2)
+; VF4-NEXT: [[TMP4:%.*]] = udiv <4 x i64> [[VEC_IND1]], splat (i64 2)
+; VF4-NEXT: [[TMP5:%.*]] = add <4 x i64> [[TMP3]], [[TMP4]]
+; VF4-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP5]], i32 0
+; VF4-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP5]], i32 1
+; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP5]], i32 2
+; VF4-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP5]], i32 3
+; VF4-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP6]]
; VF4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]]
; VF4-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP8]]
; VF4-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP9]]
-; VF4-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP10]]
+; VF4-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP10]], align 8
; VF4-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP11]], align 8
; VF4-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP12]], align 8
; VF4-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP13]], align 8
-; VF4-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP14]], align 8
-; VF4-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> poison, i64 [[TMP15]], i32 0
-; VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x i64> [[TMP19]], i64 [[TMP16]], i32 1
-; VF4-NEXT: [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 2
-; VF4-NEXT: [[TMP22:%.*]] = insertelement <4 x i64> [[TMP21]], i64 [[TMP18]], i32 3
-; VF4-NEXT: [[TMP23:%.*]] = add nsw <4 x i64> [[TMP22]], splat (i64 42)
-; VF4-NEXT: [[TMP24:%.*]] = extractelement <4 x i64> [[TMP23]], i32 0
-; VF4-NEXT: [[TMP25:%.*]] = extractelement <4 x i64> [[TMP23]], i32 1
-; VF4-NEXT: [[TMP26:%.*]] = extractelement <4 x i64> [[TMP23]], i32 2
-; VF4-NEXT: [[TMP27:%.*]] = extractelement <4 x i64> [[TMP23]], i32 3
+; VF4-NEXT: [[TMP18:%.*]] = insertelement <4 x i64> poison, i64 [[TMP14]], i32 0
+; VF4-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> [[TMP18]], i64 [[TMP15]], i32 1
+; VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x i64> [[TMP19]], i64 [[TMP16]], i32 2
+; VF4-NEXT: [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 3
+; VF4-NEXT: [[TMP22:%.*]] = add nsw <4 x i64> [[TMP21]], splat (i64 42)
+; VF4-NEXT: [[TMP23:%.*]] = extractelement <4 x i64> [[TMP22]], i32 0
+; VF4-NEXT: [[TMP24:%.*]] = extractelement <4 x i64> [[TMP22]], i32 1
+; VF4-NEXT: [[TMP25:%.*]] = extractelement <4 x i64> [[TMP22]], i32 2
+; VF4-NEXT: [[TMP26:%.*]] = extractelement <4 x i64> [[TMP22]], i32 3
+; VF4-NEXT: [[TMP27:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
; VF4-NEXT: [[TMP28:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
; VF4-NEXT: [[TMP29:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
; VF4-NEXT: [[TMP30:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
-; VF4-NEXT: [[TMP31:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]]
+; VF4-NEXT: store i64 [[TMP23]], ptr [[TMP27]], align 8
; VF4-NEXT: store i64 [[TMP24]], ptr [[TMP28]], align 8
; VF4-NEXT: store i64 [[TMP25]], ptr [[TMP29]], align 8
; VF4-NEXT: store i64 [[TMP26]], ptr [[TMP30]], align 8
-; VF4-NEXT: store i64 [[TMP27]], ptr [[TMP31]], align 8
; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 8)
; VF4-NEXT: [[VEC_IND_NEXT2]] = add nsw <4 x i64> [[VEC_IND1]], splat (i64 4)
-; VF4-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500
-; VF4-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
+; VF4-NEXT: [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500
+; VF4-NEXT: br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; VF4: middle.block:
; VF4-NEXT: br label [[EXIT:%.*]]
; VF4: exit:
@@ -546,31 +542,30 @@ define void @ld_div3_step2_start0_ind2(ptr noalias %A, ptr noalias %B) {
; VF2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; VF2-NEXT: [[VEC_IND1:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ]
; VF2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
-; VF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
-; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2
-; VF2-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 3)
-; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND1]], splat (i64 3)
-; VF2-NEXT: [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]]
-; VF2-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0
-; VF2-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP4]], i32 1
+; VF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 2
+; VF2-NEXT: [[TMP1:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 3)
+; VF2-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND1]], splat (i64 3)
+; VF2-NEXT: [[TMP3:%.*]] = add <2 x i64> [[TMP1]], [[TMP2]]
+; VF2-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0
+; VF2-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1
+; VF2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]]
; VF2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]]
-; VF2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP6]]
+; VF2-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP6]], align 8
; VF2-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP7]], align 8
-; VF2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 8
-; VF2-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> poison, i64 [[TMP9]], i32 0
-; VF2-NEXT: [[TMP12:%.*]] = insertelement <2 x i64> [[TMP11]], i64 [[TMP10]], i32 1
-; VF2-NEXT: [[TMP13:%.*]] = add nsw <2 x i64> [[TMP12]], splat (i64 42)
-; VF2-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP13]], i32 0
-; VF2-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP13]], i32 1
+; VF2-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> poison, i64 [[TMP8]], i32 0
+; VF2-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> [[TMP10]], i64 [[TMP9]], i32 1
+; VF2-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], splat (i64 42)
+; VF2-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP12]], i32 0
+; VF2-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP12]], i32 1
+; VF2-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
; VF2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
-; VF2-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
+; VF2-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8
; VF2-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8
-; VF2-NEXT: store i64 [[TMP15]], ptr [[TMP17]], align 8
; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; VF2-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 4)
; VF2-NEXT: [[VEC_IND_NEXT2]] = add nsw <2 x i64> [[VEC_IND1]], splat (i64 2)
-; VF2-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500
-; VF2-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
+; VF2-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500
+; VF2-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
; VF2: middle.block:
; VF2-NEXT: br label [[EXIT:%.*]]
; VF2: exit:
@@ -587,47 +582,46 @@ define void @ld_div3_step2_start0_ind2(ptr noalias %A, ptr noalias %B) {
; VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 2, i64 4, i64 6>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; VF4-NEXT: [[VEC_IND1:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ]
; VF4-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
-; VF4-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
-; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2
-; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4
-; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6
-; VF4-NEXT: [[TMP4:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 3)
-; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND1]], splat (i64 3)
-; VF4-NEXT: [[TMP6:%.*]] = add <4 x i64> [[TMP4]], [[TMP5]]
-; VF4-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP6]], i32 0
-; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP6]], i32 1
-; VF4-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP6]], i32 2
-; VF4-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[TMP6]], i32 3
+; VF4-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 2
+; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 4
+; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 6
+; VF4-NEXT: [[TMP3:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 3)
+; VF4-NEXT: [[TMP4:%.*]] = udiv <4 x i64> [[VEC_IND1]], splat (i64 3)
+; VF4-NEXT: [[TMP5:%.*]] = add <4 x i64> [[TMP3]], [[TMP4]]
+; VF4-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP5]], i32 0
+; VF4-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP5]], i32 1
+; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP5]], i32 2
+; VF4-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP5]], i32 3
+; VF4-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP6]]
; VF4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]]
; VF4-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP8]]
; VF4-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP9]]
-; VF4-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP10]]
+; VF4-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP10]], align 8
; VF4-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP11]], align 8
; VF4-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP12]], align 8
; VF4-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP13]], align 8
-; VF4-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP14]], align 8
-; VF4-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> poison, i64 [[TMP15]], i32 0
-; VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x i64> [[TMP19]], i64 [[TMP16]], i32 1
-; VF4-NEXT: [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 2
-; VF4-NEXT: [[TMP22:%.*]] = insertelement <4 x i64> [[TMP21]], i64 [[TMP18]], i32 3
-; VF4-NEXT: [[TMP23:%.*]] = add nsw <4 x i64> [[TMP22]], splat (i64 42)
-; VF4-NEXT: [[TMP24:%.*]] = extractelement <4 x i64> [[TMP23]], i32 0
-; VF4-NEXT: [[TMP25:%.*]] = extractelement <4 x i64> [[TMP23]], i32 1
-; VF4-NEXT: [[TMP26:%.*]] = extractelement <4 x i64> [[TMP23]], i32 2
-; VF4-NEXT: [[TMP27:%.*]] = extractelement <4 x i64> [[TMP23]], i32 3
+; VF4-NEXT: [[TMP18:%.*]] = insertelement <4 x i64> poison, i64 [[TMP14]], i32 0
+; VF4-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> [[TMP18]], i64 [[TMP15]], i32 1
+; VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x i64> [[TMP19]], i64 [[TMP16]], i32 2
+; VF4-NEXT: [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 3
+; VF4-NEXT: [[TMP22:%.*]] = add nsw <4 x i64> [[TMP21]], splat (i64 42)
+; VF4-NEXT: [[TMP23:%.*]] = extractelement <4 x i64> [[TMP22]], i32 0
+; VF4-NEXT: [[TMP24:%.*]] = extractelement <4 x i64> [[TMP22]], i32 1
+; VF4-NEXT: [[TMP25:%.*]] = extractelement <4 x i64> [[TMP22]], i32 2
+; VF4-NEXT: [[TMP26:%.*]] = extractelement <4 x i64> [[TMP22]], i32 3
+; VF4-NEXT: [[TMP27:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
; VF4-NEXT: [[TMP28:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
; VF4-NEXT: [[TMP29:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
; VF4-NEXT: [[TMP30:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
-; VF4-NEXT: [[TMP31:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]]
+; VF4-NEXT: store i64 [[TMP23]], ptr [[TMP27]], align 8
; VF4-NEXT: store i64 [[TMP24]], ptr [[TMP28]], align 8
; VF4-NEXT: store i64 [[TMP25]], ptr [[TMP29]], align 8
; VF4-NEXT: store i64 [[TMP26]], ptr [[TMP30]], align 8
-; VF4-NEXT: store i64 [[TMP27]], ptr [[TMP31]], align 8
; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 8)
; VF4-NEXT: [[VEC_IND_NEXT2]] = add nsw <4 x i64> [[VEC_IND1]], splat (i64 4)
-; VF4-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500
-; VF4-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
+; VF4-NEXT: [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500
+; VF4-NEXT: br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
; VF4: middle.block:
; VF4-NEXT: br label [[EXIT:%.*]]
; VF4: exit:
@@ -667,31 +661,30 @@ define void @ld_div1_step3_start0_ind2(ptr noalias %A, ptr noalias %B) {
; VF2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; VF2-NEXT: [[VEC_IND1:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ]
; VF2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3
-; VF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
-; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3
-; VF2-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 1)
-; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND1]], splat (i64 1)
-; VF2-NEXT: [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]]
-; VF2-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0
-; VF2-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP4]], i32 1
+; VF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 3
+; VF2-NEXT: [[TMP1:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 1)
+; VF2-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND1]], splat (i64 1)
+; VF2-NEXT: [[TMP3:%.*]] = add <2 x i64> [[TMP1]], [[TMP2]]
+; VF2-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0
+; VF2-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1
+; VF2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]]
; VF2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]]
-; VF2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP6]]
+; VF2-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP6]], align 8
; VF2-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP7]], align 8
-; VF2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 8
-; VF2-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> poison, i64 [[TMP9]], i32 0
-; VF2-NEXT: [[TMP12:%.*]] = insertelement <2 x i64> [[TMP11]], i64 [[TMP10]], i32 1
-; VF2-NEXT: [[TMP13:%.*]] = add nsw <2 x i64> [[TMP12]], splat (i64 42)
-; VF2-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP13]], i32 0
-; VF2-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP13]], i32 1
+; VF2-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> poison, i64 [[TMP8]], i32 0
+; VF2-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> [[TMP10]], i64 [[TMP9]], i32 1
+; VF2-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], splat (i64 42)
+; VF2-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP12]], i32 0
+; VF2-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP12]], i32 1
+; VF2-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
; VF2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
-; VF2-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
+; VF2-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8
; VF2-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8
-; VF2-NEXT: store i64 [[TMP15]], ptr [[TMP17]], align 8
; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; VF2-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 6)
; VF2-NEXT: [[VEC_IND_NEXT2]] = add nsw <2 x i64> [[VEC_IND1]], splat (i64 2)
-; VF2-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332
-; VF2-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
+; VF2-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332
+; VF2-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
; VF2: middle.block:
; VF2-NEXT: br label [[SCALAR_PH:%.*]]
; VF2: scalar.ph:
@@ -707,47 +700,46 @@ define void @ld_div1_step3_start0_ind2(ptr noalias %A, ptr noalias %B) {
; VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 3, i64 6, i64 9>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; VF4-NEXT: [[VEC_IND1:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ]
; VF4-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3
-; VF4-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
-; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3
-; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 6
-; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 9
-; VF4-NEXT: [[TMP4:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 1)
-; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND1]], splat (i64 1)
-; VF4-NEXT: [[TMP6:%.*]] = add <4 x i64> [[TMP4]], [[TMP5]]
-; VF4-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP6]], i32 0
-; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP6]], i32 1
-; VF4-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP6]], i32 2
-; VF4-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[TMP6]], i32 3
+; VF4-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 3
+; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 6
+; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 9
+; VF4-NEXT: [[TMP3:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 1)
+; VF4-NEXT: [[TMP4:%.*]] = udiv <4 x i64> [[VEC_IND1]], splat (i64 1)
+; VF4-NEXT: [[TMP5:%.*]] = add <4 x i64> [[TMP3]], [[TMP4]]
+; VF4-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP5]], i32 0
+; VF4-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP5]], i32 1
+; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP5]], i32 2
+; VF4-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP5]], i32 3
+; VF4-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP6]]
; VF4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]]
; VF4-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP8]]
; VF4-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP9]]
-; VF4-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP10]]
+; VF4-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP10]], align 8
; VF4-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP11]], align 8
; VF4-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP12]], align 8
; VF4-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP13]], align 8
-; VF4-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP14]], align 8
-; VF4-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> poison, i64 [[TMP15]], i32 0
-; VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x i64> [[TMP19]], i64 [[TMP16]], i32 1
-; VF4-NEXT: [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 2
-; VF4-NEXT: [[TMP22:%.*]] = insertelement <4 x i64> [[TMP21]], i64 [[TMP18]], i32 3
-; VF4-NEXT: [[TMP23:%.*]] = add nsw <4 x i64> [[TMP22]], splat (i64 42)
-; VF4-NEXT: [[TMP24:%.*]] = extractelement <4 x i64> [[TMP23]], i32 0
-; VF4-NEXT: [[TMP25:%.*]] = extractelement <4 x i64> [[TMP23]], i32 1
-; VF4-NEXT: [[TMP26:%.*]] = extractelement <4 x i64> [[TMP23]], i32 2
-; VF4-NEXT: [[TMP27:%.*]] = extractelement <4 x i64> [[TMP23]], i32 3
+; VF4-NEXT: [[TMP18:%.*]] = insertelement <4 x i64> poison, i64 [[TMP14]], i32 0
+; VF4-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> [[TMP18]], i64 [[TMP15]], i32 1
+; VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x i64> [[TMP19]], i64 [[TMP16]], i32 2
+; VF4-NEXT: [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 3
+; VF4-NEXT: [[TMP22:%.*]] = add nsw <4 x i64> [[TMP21]], splat (i64 42)
+; VF4-NEXT: [[TMP23:%.*]] = extractelement <4 x i64> [[TMP22]], i32 0
+; VF4-NEXT: [[TMP24:%.*]] = extractelement <4 x i64> [[TMP22]], i32 1
+; VF4-NEXT: [[TMP25:%.*]] = extractelement <4 x i64> [[TMP22]], i32 2
+; VF4-NEXT: [[TMP26:%.*]] = extractelement <4 x i64> [[TMP22]], i32 3
+; VF4-NEXT: [[TMP27:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
; VF4-NEXT: [[TMP28:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
; VF4-NEXT: [[TMP29:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
; VF4-NEXT: [[TMP30:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
-; VF4-NEXT: [[TMP31:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]]
+; VF4-NEXT: store i64 [[TMP23]], ptr [[TMP27]], align 8
; VF4-NEXT: store i64 [[TMP24]], ptr [[TMP28]], align 8
; VF4-NEXT: store i64 [[TMP25]], ptr [[TMP29]], align 8
; VF4-NEXT: store i64 [[TMP26]], ptr [[TMP30]], align 8
-; VF4-NEXT: store i64 [[TMP27]], ptr [[TMP31]], align 8
; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 12)
; VF4-NEXT: [[VEC_IND_NEXT2]] = add nsw <4 x i64> [[VEC_IND1]], splat (i64 4)
-; VF4-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332
-; VF4-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
+; VF4-NEXT: [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332
+; VF4-NEXT: br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
; VF4: middle.block:
; VF4-NEXT: br label [[SCALAR_PH:%.*]]
; VF4: scalar.ph:
@@ -786,31 +778,30 @@ define void @ld_div2_step3_start0_ind2(ptr noalias %A, ptr noalias %B) {
; VF2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; VF2-NEXT: [[VEC_IND1:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ]
; VF2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3
-; VF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
-; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3
-; VF2-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 2)
-; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND1]], splat (i64 2)
-; VF2-NEXT: [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]]
-; VF2-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0
-; VF2-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP4]], i32 1
+; VF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 3
+; VF2-NEXT: [[TMP1:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 2)
+; VF2-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND1]], splat (i64 2)
+; VF2-NEXT: [[TMP3:%.*]] = add <2 x i64> [[TMP1]], [[TMP2]]
+; VF2-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0
+; VF2-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1
+; VF2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]]
; VF2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]]
-; VF2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP6]]
+; VF2-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP6]], align 8
; VF2-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP7]], align 8
-; VF2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 8
-; VF2-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> poison, i64 [[TMP9]], i32 0
-; VF2-NEXT: [[TMP12:%.*]] = insertelement <2 x i64> [[TMP11]], i64 [[TMP10]], i32 1
-; VF2-NEXT: [[TMP13:%.*]] = add nsw <2 x i64> [[TMP12]], splat (i64 42)
-; VF2-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP13]], i32 0
-; VF2-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP13]], i32 1
+; VF2-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> poison, i64 [[TMP8]], i32 0
+; VF2-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> [[TMP10]], i64 [[TMP9]], i32 1
+; VF2-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], splat (i64 42)
+; VF2-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP12]], i32 0
+; VF2-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP12]], i32 1
+; VF2-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
; VF2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
-; VF2-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
+; VF2-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8
; VF2-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8
-; VF2-NEXT: store i64 [[TMP15]], ptr [[TMP17]], align 8
; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; VF2-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 6)
; VF2-NEXT: [[VEC_IND_NEXT2]] = add nsw <2 x i64> [[VEC_IND1]], splat (i64 2)
-; VF2-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332
-; VF2-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
+; VF2-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332
+; VF2-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
; VF2: middle.block:
; VF2-NEXT: br label [[SCALAR_PH:%.*]]
; VF2: scalar.ph:
@@ -826,47 +817,46 @@ define void @ld_div2_step3_start0_ind2(ptr noalias %A, ptr noalias %B) {
; VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 3, i64 6, i64 9>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; VF4-NEXT: [[VEC_IND1:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ]
; VF4-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3
-; VF4-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
-; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3
-; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 6
-; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 9
-; VF4-NEXT: [[TMP4:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 2)
-; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND1]], splat (i64 2)
-; VF4-NEXT: [[TMP6:%.*]] = add <4 x i64> [[TMP4]], [[TMP5]]
-; VF4-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP6]], i32 0
-; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP6]], i32 1
-; VF4-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP6]], i32 2
-; VF4-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[TMP6]], i32 3
+; VF4-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 3
+; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 6
+; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 9
+; VF4-NEXT: [[TMP3:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 2)
+; VF4-NEXT: [[TMP4:%.*]] = udiv <4 x i64> [[VEC_IND1]], splat (i64 2)
+; VF4-NEXT: [[TMP5:%.*]] = add <4 x i64> [[TMP3]], [[TMP4]]
+; VF4-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP5]], i32 0
+; VF4-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP5]], i32 1
+; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP5]], i32 2
+; VF4-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP5]], i32 3
+; VF4-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP6]]
; VF4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]]
; VF4-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP8]]
; VF4-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP9]]
-; VF4-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP10]]
+; VF4-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP10]], align 8
; VF4-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP11]], align 8
; VF4-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP12]], align 8
; VF4-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP13]], align 8
-; VF4-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP14]], align 8
-; VF4-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> poison, i64 [[TMP15]], i32 0
-; VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x i64> [[TMP19]], i64 [[TMP16]], i32 1
-; VF4-NEXT: [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 2
-; VF4-NEXT: [[TMP22:%.*]] = insertelement <4 x i64> [[TMP21]], i64 [[TMP18]], i32 3
-; VF4-NEXT: [[TMP23:%.*]] = add nsw <4 x i64> [[TMP22]], splat (i64 42)
-; VF4-NEXT: [[TMP24:%.*]] = extractelement <4 x i64> [[TMP23]], i32 0
-; VF4-NEXT: [[TMP25:%.*]] = extractelement <4 x i64> [[TMP23]], i32 1
-; VF4-NEXT: [[TMP26:%.*]] = extractelement <4 x i64> [[TMP23]], i32 2
-; VF4-NEXT: [[TMP27:%.*]] = extractelement <4 x i64> [[TMP23]], i32 3
+; VF4-NEXT: [[TMP18:%.*]] = insertelement <4 x i64> poison, i64 [[TMP14]], i32 0
+; VF4-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> [[TMP18]], i64 [[TMP15]], i32 1
+; VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x i64> [[TMP19]], i64 [[TMP16]], i32 2
+; VF4-NEXT: [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 3
+; VF4-NEXT: [[TMP22:%.*]] = add nsw <4 x i64> [[TMP21]], splat (i64 42)
+; VF4-NEXT: [[TMP23:%.*]] = extractelement <4 x i64> [[TMP22]], i32 0
+; VF4-NEXT: [[TMP24:%.*]] = extractelement <4 x i64> [[TMP22]], i32 1
+; VF4-NEXT: [[TMP25:%.*]] = extractelement <4 x i64> [[TMP22]], i32 2
+; VF4-NEXT: [[TMP26:%.*]] = extractelement <4 x i64> [[TMP22]], i32 3
+; VF4-NEXT: [[TMP27:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
; VF4-NEXT: [[TMP28:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
; VF4-NEXT: [[TMP29:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
; VF4-NEXT: [[TMP30:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
-; VF4-NEXT: [[TMP31:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]]
+; VF4-NEXT: store i64 [[TMP23]], ptr [[TMP27]], align 8
; VF4-NEXT: store i64 [[TMP24]], ptr [[TMP28]], align 8
; VF4-NEXT: store i64 [[TMP25]], ptr [[TMP29]], align 8
; VF4-NEXT: store i64 [[TMP26]], ptr [[TMP30]], align 8
-; VF4-NEXT: store i64 [[TMP27]], ptr [[TMP31]], align 8
; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 12)
; VF4-NEXT: [[VEC_IND_NEXT2]] = add nsw <4 x i64> [[VEC_IND1]], splat (i64 4)
-; VF4-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332
-; VF4-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
+; VF4-NEXT: [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332
+; VF4-NEXT: br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
; VF4: middle.block:
; VF4-NEXT: br label [[SCALAR_PH:%.*]]
; VF4: scalar.ph:
@@ -905,31 +895,30 @@ define void @ld_div3_step3_start0_ind2(ptr noalias %A, ptr noalias %B) {
; VF2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; VF2-NEXT: [[VEC_IND1:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ]
; VF2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3
-; VF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
-; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3
-; VF2-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 3)
-; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND1]], splat (i64 3)
-; VF2-NEXT: [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]]
-; VF2-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0
-; VF2-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP4]], i32 1
+; VF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 3
+; VF2-NEXT: [[TMP1:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 3)
+; VF2-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND1]], splat (i64 3)
+; VF2-NEXT: [[TMP3:%.*]] = add <2 x i64> [[TMP1]], [[TMP2]]
+; VF2-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0
+; VF2-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1
+; VF2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]]
; VF2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]]
-; VF2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP6]]
+; VF2-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP6]], align 8
; VF2-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP7]], align 8
-; VF2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 8
-; VF2-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> poison, i64 [[TMP9]], i32 0
-; VF2-NEXT: [[TMP12:%.*]] = insertelement <2 x i64> [[TMP11]], i64 [[TMP10]], i32 1
-; VF2-NEXT: [[TMP13:%.*]] = add nsw <2 x i64> [[TMP12]], splat (i64 42)
-; VF2-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP13]], i32 0
-; VF2-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP13]], i32 1
+; VF2-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> poison, i64 [[TMP8]], i32 0
+; VF2-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> [[TMP10]], i64 [[TMP9]], i32 1
+; VF2-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], splat (i64 42)
+; VF2-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP12]], i32 0
+; VF2-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP12]], i32 1
+; VF2-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
; VF2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
-; VF2-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
+; VF2-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8
; VF2-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8
-; VF2-NEXT: store i64 [[TMP15]], ptr [[TMP17]], align 8
; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; VF2-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 6)
; VF2-NEXT: [[VEC_IND_NEXT2]] = add nsw <2 x i64> [[VEC_IND1]], splat (i64 2)
-; VF2-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332
-; VF2-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
+; VF2-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332
+; VF2-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
; VF2: middle.block:
; VF2-NEXT: br label [[SCALAR_PH:%.*]]
; VF2: scalar.ph:
@@ -945,47 +934,46 @@ define void @ld_div3_step3_start0_ind2(ptr noalias %A, ptr noalias %B) {
; VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 3, i64 6, i64 9>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; VF4-NEXT: [[VEC_IND1:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ]
; VF4-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3
-; VF4-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
-; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3
-; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 6
-; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 9
-; VF4-NEXT: [[TMP4:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 3)
-; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND1]], splat (i64 3)
-; VF4-NEXT: [[TMP6:%.*]] = add <4 x i64> [[TMP4]], [[TMP5]]
-; VF4-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP6]], i32 0
-; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP6]], i32 1
-; VF4-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP6]], i32 2
-; VF4-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[TMP6]], i32 3
+; VF4-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 3
+; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 6
+; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 9
+; VF4-NEXT: [[TMP3:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 3)
+; VF4-NEXT: [[TMP4:%.*]] = udiv <4 x i64> [[VEC_IND1]], splat (i64 3)
+; VF4-NEXT: [[TMP5:%.*]] = add <4 x i64> [[TMP3]], [[TMP4]]
+; VF4-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP5]], i32 0
+; VF4-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP5]], i32 1
+; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP5]], i32 2
+; VF4-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP5]], i32 3
+; VF4-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP6]]
; VF4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]]
; VF4-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP8]]
; VF4-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP9]]
-; VF4-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP10]]
+; VF4-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP10]], align 8
; VF4-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP11]], align 8
; VF4-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP12]], align 8
; VF4-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP13]], align 8
-; VF4-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP14]], align 8
-; VF4-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> poison, i64 [[TMP15]], i32 0
-; VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x i64> [[TMP19]], i64 [[TMP16]], i32 1
-; VF4-NEXT: [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 2
-; VF4-NEXT: [[TMP22:%.*]] = insertelement <4 x i64> [[TMP21]], i64 [[TMP18]], i32 3
-; VF4-NEXT: [[TMP23:%.*]] = add nsw <4 x i64> [[TMP22]], splat (i64 42)
-; VF4-NEXT: [[TMP24:%.*]] = extractelement <4 x i64> [[TMP23]], i32 0
-; VF4-NEXT: [[TMP25:%.*]] = extractelement <4 x i64> [[TMP23]], i32 1
-; VF4-NEXT: [[TMP26:%.*]] = extractelement <4 x i64> [[TMP23]], i32 2
-; VF4-NEXT: [[TMP27:%.*]] = extractelement <4 x i64> [[TMP23]], i32 3
+; VF4-NEXT: [[TMP18:%.*]] = insertelement <4 x i64> poison, i64 [[TMP14]], i32 0
+; VF4-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> [[TMP18]], i64 [[TMP15]], i32 1
+; VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x i64> [[TMP19]], i64 [[TMP16]], i32 2
+; VF4-NEXT: [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 3
+; VF4-NEXT: [[TMP22:%.*]] = add nsw <4 x i64> [[TMP21]], splat (i64 42)
+; VF4-NEXT: [[TMP23:%.*]] = extractelement <4 x i64> [[TMP22]], i32 0
+; VF4-NEXT: [[TMP24:%.*]] = extractelement <4 x i64> [[TMP22]], i32 1
+; VF4-NEXT: [[TMP25:%.*]] = extractelement <4 x i64> [[TMP22]], i32 2
+; VF4-NEXT: [[TMP26:%.*]] = extractelement <4 x i64> [[TMP22]], i32 3
+; VF4-NEXT: [[TMP27:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
; VF4-NEXT: [[TMP28:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
; VF4-NEXT: [[TMP29:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
; VF4-NEXT: [[TMP30:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
-; VF4-NEXT: [[TMP31:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]]
+; VF4-NEXT: store i64 [[TMP23]], ptr [[TMP27]], align 8
; VF4-NEXT: store i64 [[TMP24]], ptr [[TMP28]], align 8
; VF4-NEXT: store i64 [[TMP25]], ptr [[TMP29]], align 8
; VF4-NEXT: store i64 [[TMP26]], ptr [[TMP30]], align 8
-; VF4-NEXT: store i64 [[TMP27]], ptr [[TMP31]], align 8
; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 12)
; VF4-NEXT: [[VEC_IND_NEXT2]] = add nsw <4 x i64> [[VEC_IND1]], splat (i64 4)
-; VF4-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332
-; VF4-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
+; VF4-NEXT: [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332
+; VF4-NEXT: br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
; VF4: middle.block:
; VF4-NEXT: br label [[SCALAR_PH:%.*]]
; VF4: scalar.ph:
@@ -1322,31 +1310,30 @@ define void @ld_div1_step2_start1_ind2(ptr noalias %A, ptr noalias %B) {
; VF2-NEXT: [[VEC_IND1:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ]
; VF2-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 2
; VF2-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]]
-; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
-; VF2-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2
-; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 1)
-; VF2-NEXT: [[TMP4:%.*]] = udiv <2 x i64> [[VEC_IND1]], splat (i64 1)
-; VF2-NEXT: [[TMP5:%.*]] = add <2 x i64> [[TMP3]], [[TMP4]]
-; VF2-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP5]], i32 0
-; VF2-NEXT: [[TMP7:%.*]] = extractelement <2 x i64> [[TMP5]], i32 1
+; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2
+; VF2-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 1)
+; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND1]], splat (i64 1)
+; VF2-NEXT: [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]]
+; VF2-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0
+; VF2-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP4]], i32 1
+; VF2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]]
; VF2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP6]]
-; VF2-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]]
+; VF2-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP7]], align 8
; VF2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 8
-; VF2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 8
-; VF2-NEXT: [[TMP12:%.*]] = insertelement <2 x i64> poison, i64 [[TMP10]], i32 0
-; VF2-NEXT: [[TMP13:%.*]] = insertelement <2 x i64> [[TMP12]], i64 [[TMP11]], i32 1
-; VF2-NEXT: [[TMP14:%.*]] = add nsw <2 x i64> [[TMP13]], splat (i64 42)
-; VF2-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP14]], i32 0
-; VF2-NEXT: [[TMP16:%.*]] = extractelement <2 x i64> [[TMP14]], i32 1
+; VF2-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> poison, i64 [[TMP9]], i32 0
+; VF2-NEXT: [[TMP12:%.*]] = insertelement <2 x i64> [[TMP11]], i64 [[TMP10]], i32 1
+; VF2-NEXT: [[TMP13:%.*]] = add nsw <2 x i64> [[TMP12]], splat (i64 42)
+; VF2-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP13]], i32 0
+; VF2-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP13]], i32 1
+; VF2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
; VF2-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
-; VF2-NEXT: [[TMP18:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
+; VF2-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8
; VF2-NEXT: store i64 [[TMP15]], ptr [[TMP17]], align 8
-; VF2-NEXT: store i64 [[TMP16]], ptr [[TMP18]], align 8
; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; VF2-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 4)
; VF2-NEXT: [[VEC_IND_NEXT2]] = add nsw <2 x i64> [[VEC_IND1]], splat (i64 2)
-; VF2-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], 498
-; VF2-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
+; VF2-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 498
+; VF2-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
; VF2: middle.block:
; VF2-NEXT: br label [[SCALAR_PH:%.*]]
; VF2: scalar.ph:
@@ -1363,47 +1350,46 @@ define void @ld_div1_step2_start1_ind2(ptr noalias %A, ptr noalias %B) {
; VF4-NEXT: [[VEC_IND1:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ]
; VF4-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 2
; VF4-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]]
-; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
-; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2
-; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 4
-; VF4-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 6
-; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 1)
-; VF4-NEXT: [[TMP6:%.*]] = udiv <4 x i64> [[VEC_IND1]], splat (i64 1)
-; VF4-NEXT: [[TMP7:%.*]] = add <4 x i64> [[TMP5]], [[TMP6]]
-; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP7]], i32 0
-; VF4-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP7]], i32 1
-; VF4-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[TMP7]], i32 2
-; VF4-NEXT: [[TMP11:%.*]] = extractelement <4 x i64> [[TMP7]], i32 3
+; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2
+; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4
+; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6
+; VF4-NEXT: [[TMP4:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 1)
+; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND1]], splat (i64 1)
+; VF4-NEXT: [[TMP6:%.*]] = add <4 x i64> [[TMP4]], [[TMP5]]
+; VF4-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP6]], i32 0
+; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP6]], i32 1
+; VF4-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP6]], i32 2
+; VF4-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[TMP6]], i32 3
+; VF4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]]
; VF4-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP8]]
; VF4-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP9]]
; VF4-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP10]]
-; VF4-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP11]]
+; VF4-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP11]], align 8
; VF4-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP12]], align 8
; VF4-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP13]], align 8
; VF4-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP14]], align 8
-; VF4-NEXT: [[TMP19:%.*]] = load i64, ptr [[TMP15]], align 8
-; VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x i64> poison, i64 [[TMP16]], i32 0
-; VF4-NEXT: [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 1
-; VF4-NEXT: [[TMP22:%.*]] = insertelement <4 x i64> [[TMP21]], i64 [[TMP18]], i32 2
-; VF4-NEXT: [[TMP23:%.*]] = insertelement <4 x i64> [[TMP22]], i64 [[TMP19]], i32 3
-; VF4-NEXT: [[TMP24:%.*]] = add nsw <4 x i64> [[TMP23]], splat (i64 42)
-; VF4-NEXT: [[TMP25:%.*]] = extractelement <4 x i64> [[TMP24]], i32 0
-; VF4-NEXT: [[TMP26:%.*]] = extractelement <4 x i64> [[TMP24]], i32 1
-; VF4-NEXT: [[TMP27:%.*]] = extractelement <4 x i64> [[TMP24]], i32 2
-; VF4-NEXT: [[TMP28:%.*]] = extractelement <4 x i64> [[TMP24]], i32 3
+; VF4-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> poison, i64 [[TMP15]], i32 0
+; VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x i64> [[TMP19]], i64 [[TMP16]], i32 1
+; VF4-NEXT: [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 2
+; VF4-NEXT: [[TMP22:%.*]] = insertelement <4 x i64> [[TMP21]], i64 [[TMP18]], i32 3
+; VF4-NEXT: [[TMP23:%.*]] = add nsw <4 x i64> [[TMP22]], splat (i64 42)
+; VF4-NEXT: [[TMP24:%.*]] = extractelement <4 x i64> [[TMP23]], i32 0
+; VF4-NEXT: [[TMP25:%.*]] = extractelement <4 x i64> [[TMP23]], i32 1
+; VF4-NEXT: [[TMP26:%.*]] = extractelement <4 x i64> [[TMP23]], i32 2
+; VF4-NEXT: [[TMP27:%.*]] = extractelement <4 x i64> [[TMP23]], i32 3
+; VF4-NEXT: [[TMP28:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
; VF4-NEXT: [[TMP29:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
; VF4-NEXT: [[TMP30:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
; VF4-NEXT: [[TMP31:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]]
-; VF4-NEXT: [[TMP32:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP4]]
+; VF4-NEXT: store i64 [[TMP24]], ptr [[TMP28]], align 8
; VF4-NEXT: store i64 [[TMP25]], ptr [[TMP29]], align 8
; VF4-NEXT: store i64 [[TMP26]], ptr [[TMP30]], align 8
; VF4-NEXT: store i64 [[TMP27]], ptr [[TMP31]], align 8
-; VF4-NEXT: store i64 [[TMP28]], ptr [[TMP32]], align 8
; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 8)
; VF4-NEXT: [[VEC_IND_NEXT2]] = add nsw <4 x i64> [[VEC_IND1]], splat (i64 4)
-; VF4-NEXT: [[TMP33:%.*]] = icmp eq i64 [[INDEX_NEXT]], 496
-; VF4-NEXT: br i1 [[TMP33]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
+; VF4-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], 496
+; VF4-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
; VF4: middle.block:
; VF4-NEXT: br label [[SCALAR_PH:%.*]]
; VF4: scalar.ph:
@@ -1443,31 +1429,30 @@ define void @ld_div2_step2_start1_ind2(ptr noalias %A, ptr noalias %B) {
; VF2-NEXT: [[VEC_IND1:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ]
; VF2-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 2
; VF2-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]]
-; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
-; VF2-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2
-; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 2)
-; VF2-NEXT: [[TMP4:%.*]] = udiv <2 x i64> [[VEC_IND1]], splat (i64 2)
-; VF2-NEXT: [[TMP5:%.*]] = add <2 x i64> [[TMP3]], [[TMP4]]
-; VF2-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP5]], i32 0
-; VF2-NEXT: [[TMP7:%.*]] = extractelement <2 x i64> [[TMP5]], i32 1
+; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2
+; VF2-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 2)
+; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND1]], splat (i64 2)
+; VF2-NEXT: [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]]
+; VF2-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0
+; VF2-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP4]], i32 1
+; VF2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]]
; VF2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP6]]
-; VF2-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]]
+; VF2-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP7]], align 8
; VF2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 8
-; VF2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 8
-; VF2-NEXT: [[TMP12:%.*]] = insertelement <2 x i64> poison, i64 [[TMP10]], i32 0
-; VF2-NEXT: [[TMP13:%.*]] = insertelement <2 x i64> [[TMP12]], i64 [[TMP11]], i32 1
-; VF2-NEXT: [[TMP14:%.*]] = add nsw <2 x i64> [[TMP13]], splat (i64 42)
-; VF2-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP14]], i32 0
-; VF2-NEXT: [[TMP16:%.*]] = extractelement <2 x i64> [[TMP14]], i32 1
+; VF2-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> poison, i64 [[TMP9]], i32 0
+; VF2-NEXT: [[TMP12:%.*]] = insertelement <2 x i64> [[TMP11]], i64 [[TMP10]], i32 1
+; VF2-NEXT: [[TMP13:%.*]] = add nsw <2 x i64> [[TMP12]], splat (i64 42)
+; VF2-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP13]], i32 0
+; VF2-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP13]], i32 1
+; VF2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
; VF2-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
-; VF2-NEXT: [[TMP18:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
+; VF2-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8
; VF2-NEXT: store i64 [[TMP15]], ptr [[TMP17]], align 8
-; VF2-NEXT: store i64 [[TMP16]], ptr [[TMP18]], align 8
; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; VF2-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 4)
; VF2-NEXT: [[VEC_IND_NEXT2]] = add nsw <2 x i64> [[VEC_IND1]], splat (i64 2)
-; VF2-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], 498
-; VF2-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
+; VF2-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 498
+; VF2-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
; VF2: middle.block:
; VF2-NEXT: br label [[SCALAR_PH:%.*]]
; VF2: scalar.ph:
@@ -1484,47 +1469,46 @@ define void @ld_div2_step2_start1_ind2(ptr noalias %A, ptr noalias %B) {
; VF4-NEXT: [[VEC_IND1:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ]
; VF4-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 2
; VF4-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]]
-; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
-; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2
-; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 4
-; VF4-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 6
-; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 2)
-; VF4-NEXT: [[TMP6:%.*]] = udiv <4 x i64> [[VEC_IND1]], splat (i64 2)
-; VF4-NEXT: [[TMP7:%.*]] = add <4 x i64> [[TMP5]], [[TMP6]]
-; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP7]], i32 0
-; VF4-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP7]], i32 1
-; VF4-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[TMP7]], i32 2
-; VF4-NEXT: [[TMP11:%.*]] = extractelement <4 x i64> [[TMP7]], i32 3
+; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2
+; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4
+; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6
+; VF4-NEXT: [[TMP4:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 2)
+; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND1]], splat (i64 2)
+; VF4-NEXT: [[TMP6:%.*]] = add <4 x i64> [[TMP4]], [[TMP5]]
+; VF4-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP6]], i32 0
+; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP6]], i32 1
+; VF4-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP6]], i32 2
+; VF4-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[TMP6]], i32 3
+; VF4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]]
; VF4-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP8]]
; VF4-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP9]]
; VF4-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP10]]
-; VF4-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP11]]
+; VF4-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP11]], align 8
; VF4-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP12]], align 8
; VF4-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP13]], align 8
; VF4-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP14]], align 8
-; VF4-NEXT: [[TMP19:%.*]] = load i64, ptr [[TMP15]], align 8
-; VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x i64> poison, i64 [[TMP16]], i32 0
-; VF4-NEXT: [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 1
-; VF4-NEXT: [[TMP22:%.*]] = insertelement <4 x i64> [[TMP21]], i64 [[TMP18]], i32 2
-; VF4-NEXT: [[TMP23:%.*]] = insertelement <4 x i64> [[TMP22]], i64 [[TMP19]], i32 3
-; VF4-NEXT: [[TMP24:%.*]] = add nsw <4 x i64> [[TMP23]], splat (i64 42)
-; VF4-NEXT: [[TMP25:%.*]] = extractelement <4 x i64> [[TMP24]], i32 0
-; VF4-NEXT: [[TMP26:%.*]] = extractelement <4 x i64> [[TMP24]], i32 1
-; VF4-NEXT: [[TMP27:%.*]] = extractelement <4 x i64> [[TMP24]], i32 2
-; VF4-NEXT: [[TMP28:%.*]] = extractelement <4 x i64> [[TMP24]], i32 3
+; VF4-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> poison, i64 [[TMP15]], i32 0
+; VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x i64> [[TMP19]], i64 [[TMP16]], i32 1
+; VF4-NEXT: [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 2
+; VF4-NEXT: [[TMP22:%.*]] = insertelement <4 x i64> [[TMP21]], i64 [[TMP18]], i32 3
+; VF4-NEXT: [[TMP23:%.*]] = add nsw <4 x i64> [[TMP22]], splat (i64 42)
+; VF4-NEXT: [[TMP24:%.*]] = extractelement <4 x i64> [[TMP23]], i32 0
+; VF4-NEXT: [[TMP25:%.*]] = extractelement <4 x i64> [[TMP23]], i32 1
+; VF4-NEXT: [[TMP26:%.*]] = extractelement <4 x i64> [[TMP23]], i32 2
+; VF4-NEXT: [[TMP27:%.*]] = extractelement <4 x i64> [[TMP23]], i32 3
+; VF4-NEXT: [[TMP28:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
; VF4-NEXT: [[TMP29:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
; VF4-NEXT: [[TMP30:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
; VF4-NEXT: [[TMP31:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]]
-; VF4-NEXT: [[TMP32:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP4]]
+; VF4-NEXT: store i64 [[TMP24]], ptr [[TMP28]], align 8
; VF4-NEXT: store i64 [[TMP25]], ptr [[TMP29]], align 8
; VF4-NEXT: store i64 [[TMP26]], ptr [[TMP30]], align 8
; VF4-NEXT: store i64 [[TMP27]], ptr [[TMP31]], align 8
-; VF4-NEXT: store i64 [[TMP28]], ptr [[TMP32]], align 8
; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 8)
; VF4-NEXT: [[VEC_IND_NEXT2]] = add nsw <4 x i64> [[VEC_IND1]], splat (i64 4)
-; VF4-NEXT: [[TMP33:%.*]] = icmp eq i64 [[INDEX_NEXT]], 496
-; VF4-NEXT: br i1 [[TMP33]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
+; VF4-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], 496
+; VF4-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
; VF4: middle.block:
; VF4-NEXT: br label [[SCALAR_PH:%.*]]
; VF4: scalar.ph:
@@ -1564,31 +1548,30 @@ define void @ld_div3_step2_start1_ind2(ptr noalias %A, ptr noalias %B) {
; VF2-NEXT: [[VEC_IND1:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ]
; VF2-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 2
; VF2-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]]
-; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
-; VF2-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2
-; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 3)
-; VF2-NEXT: [[TMP4:%.*]] = udiv <2 x i64> [[VEC_IND1]], splat (i64 3)
-; VF2-NEXT: [[TMP5:%.*]] = add <2 x i64> [[TMP3]], [[TMP4]]
-; VF2-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP5]], i32 0
-; VF2-NEXT: [[TMP7:%.*]] = extractelement <2 x i64> [[TMP5]], i32 1
+; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2
+; VF2-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 3)
+; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND1]], splat (i64 3)
+; VF2-NEXT: [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]]
+; VF2-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0
+; VF2-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP4]], i32 1
+; VF2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]]
; VF2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP6]]
-; VF2-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]]
+; VF2-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP7]], align 8
; VF2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 8
-; VF2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 8
-; VF2-NEXT: [[TMP12:%.*]] = insertelement <2 x i64> poison, i64 [[TMP10]], i32 0
-; VF2-NEXT: [[TMP13:%.*]] = insertelement <2 x i64> [[TMP12]], i64 [[TMP11]], i32 1
-; VF2-NEXT: [[TMP14:%.*]] = add nsw <2 x i64> [[TMP13]], splat (i64 42)
-; VF2-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP14]], i32 0
-; VF2-NEXT: [[TMP16:%.*]] = extractelement <2 x i64> [[TMP14]], i32 1
+; VF2-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> poison, i64 [[TMP9]], i32 0
+; VF2-NEXT: [[TMP12:%.*]] = insertelement <2 x i64> [[TMP11]], i64 [[TMP10]], i32 1
+; VF2-NEXT: [[TMP13:%.*]] = add nsw <2 x i64> [[TMP12]], splat (i64 42)
+; VF2-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP13]], i32 0
+; VF2-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP13]], i32 1
+; VF2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
; VF2-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
-; VF2-NEXT: [[TMP18:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
+; VF2-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8
; VF2-NEXT: store i64 [[TMP15]], ptr [[TMP17]], align 8
-; VF2-NEXT: store i64 [[TMP16]], ptr [[TMP18]], align 8
; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; VF2-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 4)
; VF2-NEXT: [[VEC_IND_NEXT2]] = add nsw <2 x i64> [[VEC_IND1]], splat (i64 2)
-; VF2-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], 498
-; VF2-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
+; VF2-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 498
+; VF2-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
; VF2: middle.block:
; VF2-NEXT: br label [[SCALAR_PH:%.*]]
; VF2: scalar.ph:
@@ -1605,47 +1588,46 @@ define void @ld_div3_step2_start1_ind2(ptr noalias %A, ptr noalias %B) {
; VF4-NEXT: [[VEC_IND1:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ]
; VF4-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 2
; VF4-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]]
-; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
-; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2
-; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 4
-; VF4-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 6
-; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 3)
-; VF4-NEXT: [[TMP6:%.*]] = udiv <4 x i64> [[VEC_IND1]], splat (i64 3)
-; VF4-NEXT: [[TMP7:%.*]] = add <4 x i64> [[TMP5]], [[TMP6]]
-; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP7]], i32 0
-; VF4-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP7]], i32 1
-; VF4-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[TMP7]], i32 2
-; VF4-NEXT: [[TMP11:%.*]] = extractelement <4 x i64> [[TMP7]], i32 3
+; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2
+; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4
+; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6
+; VF4-NEXT: [[TMP4:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 3)
+; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND1]], splat (i64 3)
+; VF4-NEXT: [[TMP6:%.*]] = add <4 x i64> [[TMP4]], [[TMP5]]
+; VF4-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP6]], i32 0
+; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP6]], i32 1
+; VF4-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP6]], i32 2
+; VF4-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[TMP6]], i32 3
+; VF4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]]
; VF4-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP8]]
; VF4-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP9]]
; VF4-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP10]]
-; VF4-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP11]]
+; VF4-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP11]], align 8
; VF4-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP12]], align 8
; VF4-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP13]], align 8
; VF4-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP14]], align 8
-; VF4-NEXT: [[TMP19:%.*]] = load i64, ptr [[TMP15]], align 8
-; VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x i64> poison, i64 [[TMP16]], i32 0
-; VF4-NEXT: [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 1
-; VF4-NEXT: [[TMP22:%.*]] = insertelement <4 x i64> [[TMP21]], i64 [[TMP18]], i32 2
-; VF4-NEXT: [[TMP23:%.*]] = insertelement <4 x i64> [[TMP22]], i64 [[TMP19]], i32 3
-; VF4-NEXT: [[TMP24:%.*]] = add nsw <4 x i64> [[TMP23]], splat (i64 42)
-; VF4-NEXT: [[TMP25:%.*]] = extractelement <4 x i64> [[TMP24]], i32 0
-; VF4-NEXT: [[TMP26:%.*]] = extractelement <4 x i64> [[TMP24]], i32 1
-; VF4-NEXT: [[TMP27:%.*]] = extractelement <4 x i64> [[TMP24]], i32 2
-; VF4-NEXT: [[TMP28:%.*]] = extractelement <4 x i64> [[TMP24]], i32 3
+; VF4-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> poison, i64 [[TMP15]], i32 0
+; VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x i64> [[TMP19]], i64 [[TMP16]], i32 1
+; VF4-NEXT: [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 2
+; VF4-NEXT: [[TMP22:%.*]] = insertelement <4 x i64> [[TMP21]], i64 [[TMP18]], i32 3
+; VF4-NEXT: [[TMP23:%.*]] = add nsw <4 x i64> [[TMP22]], splat (i64 42)
+; VF4-NEXT: [[TMP24:%.*]] = extractelement <4 x i64> [[TMP23]], i32 0
+; VF4-NEXT: [[TMP25:%.*]] = extractelement <4 x i64> [[TMP23]], i32 1
+; VF4-NEXT: [[TMP26:%.*]] = extractelement <4 x i64> [[TMP23]], i32 2
+; VF4-NEXT: [[TMP27:%.*]] = extractelement <4 x i64> [[TMP23]], i32 3
+; VF4-NEXT: [[TMP28:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
; VF4-NEXT: [[TMP29:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
; VF4-NEXT: [[TMP30:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
; VF4-NEXT: [[TMP31:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]]
-; VF4-NEXT: [[TMP32:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP4]]
+; VF4-NEXT: store i64 [[TMP24]], ptr [[TMP28]], align 8
; VF4-NEXT: store i64 [[TMP25]], ptr [[TMP29]], align 8
; VF4-NEXT: store i64 [[TMP26]], ptr [[TMP30]], align 8
; VF4-NEXT: store i64 [[TMP27]], ptr [[TMP31]], align 8
-; VF4-NEXT: store i64 [[TMP28]], ptr [[TMP32]], align 8
; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 8)
; VF4-NEXT: [[VEC_IND_NEXT2]] = add nsw <4 x i64> [[VEC_IND1]], splat (i64 4)
-; VF4-NEXT: [[TMP33:%.*]] = icmp eq i64 [[INDEX_NEXT]], 496
-; VF4-NEXT: br i1 [[TMP33]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
+; VF4-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], 496
+; VF4-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
; VF4: middle.block:
; VF4-NEXT: br label [[SCALAR_PH:%.*]]
; VF4: scalar.ph:
@@ -1685,31 +1667,30 @@ define void @ld_div1_step3_start1_ind2(ptr noalias %A, ptr noalias %B) {
; VF2-NEXT: [[VEC_IND1:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ]
; VF2-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 3
; VF2-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]]
-; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
-; VF2-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3
-; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 1)
-; VF2-NEXT: [[TMP4:%.*]] = udiv <2 x i64> [[VEC_IND1]], splat (i64 1)
-; VF2-NEXT: [[TMP5:%.*]] = add <2 x i64> [[TMP3]], [[TMP4]]
-; VF2-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP5]], i32 0
-; VF2-NEXT: [[TMP7:%.*]] = extractelement <2 x i64> [[TMP5]], i32 1
+; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3
+; VF2-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 1)
+; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND1]], splat (i64 1)
+; VF2-NEXT: [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]]
+; VF2-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0
+; VF2-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP4]], i32 1
+; VF2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]]
; VF2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP6]]
-; VF2-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]]
+; VF2-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP7]], align 8
; VF2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 8
-; VF2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 8
-; VF2-NEXT: [[TMP12:%.*]] = insertelement <2 x i64> poison, i64 [[TMP10]], i32 0
-; VF2-NEXT: [[TMP13:%.*]] = insertelement <2 x i64> [[TMP12]], i64 [[TMP11]], i32 1
-; VF2-NEXT: [[TMP14:%.*]] = add nsw <2 x i64> [[TMP13]], splat (i64 42)
-; VF2-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP14]], i32 0
-; VF2-NEXT: [[TMP16:%.*]] = extractelement <2 x i64> [[TMP14]], i32 1
+; VF2-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> poison, i64 [[TMP9]], i32 0
+; VF2-NEXT: [[TMP12:%.*]] = insertelement <2 x i64> [[TMP11]], i64 [[TMP10]], i32 1
+; VF2-NEXT: [[TMP13:%.*]] = add nsw <2 x i64> [[TMP12]], splat (i64 42)
+; VF2-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP13]], i32 0
+; VF2-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP13]], i32 1
+; VF2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
; VF2-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
-; VF2-NEXT: [[TMP18:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
+; VF2-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8
; VF2-NEXT: store i64 [[TMP15]], ptr [[TMP17]], align 8
-; VF2-NEXT: store i64 [[TMP16]], ptr [[TMP18]], align 8
; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; VF2-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 6)
; VF2-NEXT: [[VEC_IND_NEXT2]] = add nsw <2 x i64> [[VEC_IND1]], splat (i64 2)
-; VF2-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332
-; VF2-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
+; VF2-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332
+; VF2-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
; VF2: middle.block:
; VF2-NEXT: br label [[SCALAR_PH:%.*]]
; VF2: scalar.ph:
@@ -1726,47 +1707,46 @@ define void @ld_div1_step3_start1_ind2(ptr noalias %A, ptr noalias %B) {
; VF4-NEXT: [[VEC_IND1:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ]
; VF4-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 3
; VF4-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]]
-; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
-; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3
-; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6
-; VF4-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 9
-; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 1)
-; VF4-NEXT: [[TMP6:%.*]] = udiv <4 x i64> [[VEC_IND1]], splat (i64 1)
-; VF4-NEXT: [[TMP7:%.*]] = add <4 x i64> [[TMP5]], [[TMP6]]
-; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP7]], i32 0
-; VF4-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP7]], i32 1
-; VF4-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[TMP7]], i32 2
-; VF4-NEXT: [[TMP11:%.*]] = extractelement <4 x i64> [[TMP7]], i32 3
+; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3
+; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 6
+; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 9
+; VF4-NEXT: [[TMP4:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 1)
+; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND1]], splat (i64 1)
+; VF4-NEXT: [[TMP6:%.*]] = add <4 x i64> [[TMP4]], [[TMP5]]
+; VF4-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP6]], i32 0
+; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP6]], i32 1
+; VF4-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP6]], i32 2
+; VF4-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[TMP6]], i32 3
+; VF4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]]
; VF4-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP8]]
; VF4-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP9]]
; VF4-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP10]]
-; VF4-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP11]]
+; VF4-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP11]], align 8
; VF4-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP12]], align 8
; VF4-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP13]], align 8
; VF4-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP14]], align 8
-; VF4-NEXT: [[TMP19:%.*]] = load i64, ptr [[TMP15]], align 8
-; VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x i64> poison, i64 [[TMP16]], i32 0
-; VF4-NEXT: [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 1
-; VF4-NEXT: [[TMP22:%.*]] = insertelement <4 x i64> [[TMP21]], i64 [[TMP18]], i32 2
-; VF4-NEXT: [[TMP23:%.*]] = insertelement <4 x i64> [[TMP22]], i64 [[TMP19]], i32 3
-; VF4-NEXT: [[TMP24:%.*]] = add nsw <4 x i64> [[TMP23]], splat (i64 42)
-; VF4-NEXT: [[TMP25:%.*]] = extractelement <4 x i64> [[TMP24]], i32 0
-; VF4-NEXT: [[TMP26:%.*]] = extractelement <4 x i64> [[TMP24]], i32 1
-; VF4-NEXT: [[TMP27:%.*]] = extractelement <4 x i64> [[TMP24]], i32 2
-; VF4-NEXT: [[TMP28:%.*]] = extractelement <4 x i64> [[TMP24]], i32 3
+; VF4-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> poison, i64 [[TMP15]], i32 0
+; VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x i64> [[TMP19]], i64 [[TMP16]], i32 1
+; VF4-NEXT: [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 2
+; VF4-NEXT: [[TMP22:%.*]] = insertelement <4 x i64> [[TMP21]], i64 [[TMP18]], i32 3
+; VF4-NEXT: [[TMP23:%.*]] = add nsw <4 x i64> [[TMP22]], splat (i64 42)
+; VF4-NEXT: [[TMP24:%.*]] = extractelement <4 x i64> [[TMP23]], i32 0
+; VF4-NEXT: [[TMP25:%.*]] = extractelement <4 x i64> [[TMP23]], i32 1
+; VF4-NEXT: [[TMP26:%.*]] = extractelement <4 x i64> [[TMP23]], i32 2
+; VF4-NEXT: [[TMP27:%.*]] = extractelement <4 x i64> [[TMP23]], i32 3
+; VF4-NEXT: [[TMP28:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
; VF4-NEXT: [[TMP29:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
; VF4-NEXT: [[TMP30:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
; VF4-NEXT: [[TMP31:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]]
-; VF4-NEXT: [[TMP32:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP4]]
+; VF4-NEXT: store i64 [[TMP24]], ptr [[TMP28]], align 8
; VF4-NEXT: store i64 [[TMP25]], ptr [[TMP29]], align 8
; VF4-NEXT: store i64 [[TMP26]], ptr [[TMP30]], align 8
; VF4-NEXT: store i64 [[TMP27]], ptr [[TMP31]], align 8
-; VF4-NEXT: store i64 [[TMP28]], ptr [[TMP32]], align 8
; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 12)
; VF4-NEXT: [[VEC_IND_NEXT2]] = add nsw <4 x i64> [[VEC_IND1]], splat (i64 4)
-; VF4-NEXT: [[TMP33:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332
-; VF4-NEXT: br i1 [[TMP33]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
+; VF4-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332
+; VF4-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
; VF4: middle.block:
; VF4-NEXT: br label [[SCALAR_PH:%.*]]
; VF4: scalar.ph:
@@ -1806,31 +1786,30 @@ define void @ld_div2_step3_start1_ind2(ptr noalias %A, ptr noalias %B) {
; VF2-NEXT: [[VEC_IND1:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ]
; VF2-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 3
; VF2-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]]
-; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
-; VF2-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3
-; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 2)
-; VF2-NEXT: [[TMP4:%.*]] = udiv <2 x i64> [[VEC_IND1]], splat (i64 2)
-; VF2-NEXT: [[TMP5:%.*]] = add <2 x i64> [[TMP3]], [[TMP4]]
-; VF2-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP5]], i32 0
-; VF2-NEXT: [[TMP7:%.*]] = extractelement <2 x i64> [[TMP5]], i32 1
+; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3
+; VF2-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 2)
+; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND1]], splat (i64 2)
+; VF2-NEXT: [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]]
+; VF2-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0
+; VF2-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP4]], i32 1
+; VF2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]]
; VF2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP6]]
-; VF2-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]]
+; VF2-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP7]], align 8
; VF2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 8
-; VF2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 8
-; VF2-NEXT: [[TMP12:%.*]] = insertelement <2 x i64> poison, i64 [[TMP10]], i32 0
-; VF2-NEXT: [[TMP13:%.*]] = insertelement <2 x i64> [[TMP12]], i64 [[TMP11]], i32 1
-; VF2-NEXT: [[TMP14:%.*]] = add nsw <2 x i64> [[TMP13]], splat (i64 42)
-; VF2-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP14]], i32 0
-; VF2-NEXT: [[TMP16:%.*]] = extractelement <2 x i64> [[TMP14]], i32 1
+; VF2-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> poison, i64 [[TMP9]], i32 0
+; VF2-NEXT: [[TMP12:%.*]] = insertelement <2 x i64> [[TMP11]], i64 [[TMP10]], i32 1
+; VF2-NEXT: [[TMP13:%.*]] = add nsw <2 x i64> [[TMP12]], splat (i64 42)
+; VF2-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP13]], i32 0
+; VF2-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP13]], i32 1
+; VF2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
; VF2-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
-; VF2-NEXT: [[TMP18:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
+; VF2-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8
; VF2-NEXT: store i64 [[TMP15]], ptr [[TMP17]], align 8
-; VF2-NEXT: store i64 [[TMP16]], ptr [[TMP18]], align 8
; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; VF2-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 6)
; VF2-NEXT: [[VEC_IND_NEXT2]] = add nsw <2 x i64> [[VEC_IND1]], splat (i64 2)
-; VF2-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332
-; VF2-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]]
+; VF2-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332
+; VF2-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]]
; VF2: middle.block:
; VF2-NEXT: br label [[SCALAR_PH:%.*]]
; VF2: scalar.ph:
@@ -1847,47 +1826,46 @@ define void @ld_div2_step3_start1_ind2(ptr noalias %A, ptr noalias %B) {
; VF4-NEXT: [[VEC_IND1:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ]
; VF4-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 3
; VF4-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]]
-; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
-; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3
-; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6
-; VF4-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 9
-; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 2)
-; VF4-NEXT: [[TMP6:%.*]] = udiv <4 x i64> [[VEC_IND1]], splat (i64 2)
-; VF4-NEXT: [[TMP7:%.*]] = add <4 x i64> [[TMP5]], [[TMP6]]
-; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP7]], i32 0
-; VF4-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP7]], i32 1
-; VF4-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[TMP7]], i32 2
-; VF4-NEXT: [[TMP11:%.*]] = extractelement <4 x i64> [[TMP7]], i32 3
+; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3
+; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 6
+; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 9
+; VF4-NEXT: [[TMP4:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 2)
+; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND1]], splat (i64 2)
+; VF4-NEXT: [[TMP6:%.*]] = add <4 x i64> [[TMP4]], [[TMP5]]
+; VF4-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP6]], i32 0
+; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP6]], i32 1
+; VF4-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP6]], i32 2
+; VF4-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[TMP6]], i32 3
+; VF4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]]
; VF4-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP8]]
; VF4-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP9]]
; VF4-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP10]]
-; VF4-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP11]]
+; VF4-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP11]], align 8
; VF4-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP12]], align 8
; VF4-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP13]], align 8
; VF4-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP14]], align 8
-; VF4-NEXT: [[TMP19:%.*]] = load i64, ptr [[TMP15]], align 8
-; VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x i64> poison, i64 [[TMP16]], i32 0
-; VF4-NEXT: [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 1
-; VF4-NEXT: [[TMP22:%.*]] = insertelement <4 x i64> [[TMP21]], i64 [[TMP18]], i32 2
-; VF4-NEXT: [[TMP23:%.*]] = insertelement <4 x i64> [[TMP22]], i64 [[TMP19]], i32 3
-; VF4-NEXT: [[TMP24:%.*]] = add nsw <4 x i64> [[TMP23]], splat (i64 42)
-; VF4-NEXT: [[TMP25:%.*]] = extractelement <4 x i64> [[TMP24]], i32 0
-; VF4-NEXT: [[TMP26:%.*]] = extractelement <4 x i64> [[TMP24]], i32 1
-; VF4-NEXT: [[TMP27:%.*]] = extractelement <4 x i64> [[TMP24]], i32 2
-; VF4-NEXT: [[TMP28:%.*]] = extractelement <4 x i64> [[TMP24]], i32 3
+; VF4-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> poison, i64 [[TMP15]], i32 0
+; VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x i64> [[TMP19]], i64 [[TMP16]], i32 1
+; VF4-NEXT: [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 2
+; VF4-NEXT: [[TMP22:%.*]] = insertelement <4 x i64> [[TMP21]], i64 [[TMP18]], i32 3
+; VF4-NEXT: [[TMP23:%.*]] = add nsw <4 x i64> [[TMP22]], splat (i64 42)
+; VF4-NEXT: [[TMP24:%.*]] = extractelement <4 x i64> [[TMP23]], i32 0
+; VF4-NEXT: [[TMP25:%.*]] = extractelement <4 x i64> [[TMP23]], i32 1
+; VF4-NEXT: [[TMP26:%.*]] = extractelement <4 x i64> [[TMP23]], i32 2
+; VF4-NEXT: [[TMP27:%.*]] = extractelement <4 x i64> [[TMP23]], i32 3
+; VF4-NEXT: [[TMP28:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
; VF4-NEXT: [[TMP29:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
; VF4-NEXT: [[TMP30:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
; VF4-NEXT: [[TMP31:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]]
-; VF4-NEXT: [[TMP32:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP4]]
+; VF4-NEXT: store i64 [[TMP24]], ptr [[TMP28]], align 8
; VF4-NEXT: store i64 [[TMP25]], ptr [[TMP29]], align 8
; VF4-NEXT: store i64 [[TMP26]], ptr [[TMP30]], align 8
; VF4-NEXT: store i64 [[TMP27]], ptr [[TMP31]], align 8
-; VF4-NEXT: store i64 [[TMP28]], ptr [[TMP32]], align 8
; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 12)
; VF4-NEXT: [[VEC_IND_NEXT2]] = add nsw <4 x i64> [[VEC_IND1]], splat (i64 4)
-; VF4-NEXT: [[TMP33:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332
-; VF4-NEXT: br i1 [[TMP33]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]]
+; VF4-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332
+; VF4-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]]
; VF4: middle.block:
; VF4-NEXT: br label [[SCALAR_PH:%.*]]
; VF4: scalar.ph:
@@ -1927,31 +1905,30 @@ define void @ld_div3_step3_start1_ind2(ptr noalias %A, ptr noalias %B) {
; VF2-NEXT: [[VEC_IND1:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ]
; VF2-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 3
; VF2-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]]
-; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
-; VF2-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3
-; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 3)
-; VF2-NEXT: [[TMP4:%.*]] = udiv <2 x i64> [[VEC_IND1]], splat (i64 3)
-; VF2-NEXT: [[TMP5:%.*]] = add <2 x i64> [[TMP3]], [[TMP4]]
-; VF2-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP5]], i32 0
-; VF2-NEXT: [[TMP7:%.*]] = extractelement <2 x i64> [[TMP5]], i32 1
+; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3
+; VF2-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 3)
+; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND1]], splat (i64 3)
+; VF2-NEXT: [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]]
+; VF2-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0
+; VF2-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP4]], i32 1
+; VF2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]]
; VF2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP6]]
-; VF2-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]]
+; VF2-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP7]], align 8
; VF2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 8
-; VF2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 8
-; VF2-NEXT: [[TMP12:%.*]] = insertelement <2 x i64> poison, i64 [[TMP10]], i32 0
-; VF2-NEXT: [[TMP13:%.*]] = insertelement <2 x i64> [[TMP12]], i64 [[TMP11]], i32 1
-; VF2-NEXT: [[TMP14:%.*]] = add nsw <2 x i64> [[TMP13]], splat (i64 42)
-; VF2-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP14]], i32 0
-; VF2-NEXT: [[TMP16:%.*]] = extractelement <2 x i64> [[TMP14]], i32 1
+; VF2-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> poison, i64 [[TMP9]], i32 0
+; VF2-NEXT: [[TMP12:%.*]] = insertelement <2 x i64> [[TMP11]], i64 [[TMP10]], i32 1
+; VF2-NEXT: [[TMP13:%.*]] = add nsw <2 x i64> [[TMP12]], splat (i64 42)
+; VF2-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP13]], i32 0
+; VF2-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP13]], i32 1
+; VF2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
; VF2-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
-; VF2-NEXT: [[TMP18:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
+; VF2-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8
; VF2-NEXT: store i64 [[TMP15]], ptr [[TMP17]], align 8
-; VF2-NEXT: store i64 [[TMP16]], ptr [[TMP18]], align 8
; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; VF2-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 6)
; VF2-NEXT: [[VEC_IND_NEXT2]] = add nsw <2 x i64> [[VEC_IND1]], splat (i64 2)
-; VF2-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332
-; VF2-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]]
+; VF2-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332
+; VF2-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]]
; VF2: middle.block:
; VF2-NEXT: br label [[SCALAR_PH:%.*]]
; VF2: scalar.ph:
@@ -1968,47 +1945,46 @@ define void @ld_div3_step3_start1_ind2(ptr noalias %A, ptr noalias %B) {
; VF4-NEXT: [[VEC_IND1:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ]
; VF4-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 3
; VF4-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]]
-; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
-; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3
-; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6
-; VF4-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 9
-; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 3)
-; VF4-NEXT: [[TMP6:%.*]] = udiv <4 x i64> [[VEC_IND1]], splat (i64 3)
-; VF4-NEXT: [[TMP7:%.*]] = add <4 x i64> [[TMP5]], [[TMP6]]
-; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP7]], i32 0
-; VF4-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP7]], i32 1
-; VF4-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[TMP7]], i32 2
-; VF4-NEXT: [[TMP11:%.*]] = extractelement <4 x i64> [[TMP7]], i32 3
+; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3
+; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 6
+; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 9
+; VF4-NEXT: [[TMP4:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 3)
+; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND1]], splat (i64 3)
+; VF4-NEXT: [[TMP6:%.*]] = add <4 x i64> [[TMP4]], [[TMP5]]
+; VF4-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP6]], i32 0
+; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP6]], i32 1
+; VF4-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP6]], i32 2
+; VF4-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[TMP6]], i32 3
+; VF4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]]
; VF4-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP8]]
; VF4-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP9]]
; VF4-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP10]]
-; VF4-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP11]]
+; VF4-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP11]], align 8
; VF4-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP12]], align 8
; VF4-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP13]], align 8
; VF4-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP14]], align 8
-; VF4-NEXT: [[TMP19:%.*]] = load i64, ptr [[TMP15]], align 8
-; VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x i64> poison, i64 [[TMP16]], i32 0
-; VF4-NEXT: [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 1
-; VF4-NEXT: [[TMP22:%.*]] = insertelement <4 x i64> [[TMP21]], i64 [[TMP18]], i32 2
-; VF4-NEXT: [[TMP23:%.*]] = insertelement <4 x i64> [[TMP22]], i64 [[TMP19]], i32 3
-; VF4-NEXT: [[TMP24:%.*]] = add nsw <4 x i64> [[TMP23]], splat (i64 42)
-; VF4-NEXT: [[TMP25:%.*]] = extractelement <4 x i64> [[TMP24]], i32 0
-; VF4-NEXT: [[TMP26:%.*]] = extractelement <4 x i64> [[TMP24]], i32 1
-; VF4-NEXT: [[TMP27:%.*]] = extractelement <4 x i64> [[TMP24]], i32 2
-; VF4-NEXT: [[TMP28:%.*]] = extractelement <4 x i64> [[TMP24]], i32 3
+; VF4-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> poison, i64 [[TMP15]], i32 0
+; VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x i64> [[TMP19]], i64 [[TMP16]], i32 1
+; VF4-NEXT: [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 2
+; VF4-NEXT: [[TMP22:%.*]] = insertelement <4 x i64> [[TMP21]], i64 [[TMP18]], i32 3
+; VF4-NEXT: [[TMP23:%.*]] = add nsw <4 x i64> [[TMP22]], splat (i64 42)
+; VF4-NEXT: [[TMP24:%.*]] = extractelement <4 x i64> [[TMP23]], i32 0
+; VF4-NEXT: [[TMP25:%.*]] = extractelement <4 x i64> [[TMP23]], i32 1
+; VF4-NEXT: [[TMP26:%.*]] = extractelement <4 x i64> [[TMP23]], i32 2
+; VF4-NEXT: [[TMP27:%.*]] = extractelement <4 x i64> [[TMP23]], i32 3
+; VF4-NEXT: [[TMP28:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
; VF4-NEXT: [[TMP29:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
; VF4-NEXT: [[TMP30:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
; VF4-NEXT: [[TMP31:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]]
-; VF4-NEXT: [[TMP32:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP4]]
+; VF4-NEXT: store i64 [[TMP24]], ptr [[TMP28]], align 8
; VF4-NEXT: store i64 [[TMP25]], ptr [[TMP29]], align 8
; VF4-NEXT: store i64 [[TMP26]], ptr [[TMP30]], align 8
; VF4-NEXT: store i64 [[TMP27]], ptr [[TMP31]], align 8
-; VF4-NEXT: store i64 [[TMP28]], ptr [[TMP32]], align 8
; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 12)
; VF4-NEXT: [[VEC_IND_NEXT2]] = add nsw <4 x i64> [[VEC_IND1]], splat (i64 4)
-; VF4-NEXT: [[TMP33:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332
-; VF4-NEXT: br i1 [[TMP33]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]]
+; VF4-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332
+; VF4-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]]
; VF4: middle.block:
; VF4-NEXT: br label [[SCALAR_PH:%.*]]
; VF4: scalar.ph:
diff --git a/llvm/test/Transforms/LoopVectorize/use-scalar-epilogue-if-tp-fails.ll b/llvm/test/Transforms/LoopVectorize/use-scalar-epilogue-if-tp-fails.ll
index 252f2942452b4..6bdf571a318be 100644
--- a/llvm/test/Transforms/LoopVectorize/use-scalar-epilogue-if-tp-fails.ll
+++ b/llvm/test/Transforms/LoopVectorize/use-scalar-epilogue-if-tp-fails.ll
@@ -106,11 +106,10 @@ define void @metadata(ptr nocapture readonly %ptr, i32 %size, ptr %pos) {
; FORCED-TF-NEXT: br label [[VECTOR_BODY:%.*]]
; FORCED-TF: vector.body:
; FORCED-TF-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE11:%.*]] ]
-; FORCED-TF-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0
; FORCED-TF-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 1
; FORCED-TF-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 2
; FORCED-TF-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 3
-; FORCED-TF-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PTR:%.*]], i32 [[TMP0]]
+; FORCED-TF-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PTR:%.*]], i32 [[INDEX]]
; FORCED-TF-NEXT: [[NEXT_GEP1:%.*]] = getelementptr i8, ptr [[PTR]], i32 [[TMP1]]
; FORCED-TF-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[PTR]], i32 [[TMP2]]
; FORCED-TF-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[PTR]], i32 [[TMP3]]
@@ -185,11 +184,10 @@ define void @metadata(ptr nocapture readonly %ptr, i32 %size, ptr %pos) {
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE11:%.*]] ]
-; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 1
; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 2
; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 3
-; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PTR:%.*]], i32 [[TMP0]]
+; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PTR:%.*]], i32 [[INDEX]]
; CHECK-NEXT: [[NEXT_GEP1:%.*]] = getelementptr i8, ptr [[PTR]], i32 [[TMP1]]
; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[PTR]], i32 [[TMP2]]
; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[PTR]], i32 [[TMP3]]
diff --git a/llvm/test/Transforms/LoopVectorize/vect-phiscev-sext-trunc.ll b/llvm/test/Transforms/LoopVectorize/vect-phiscev-sext-trunc.ll
index 9f3744f4a5150..76a8bba86984c 100644
--- a/llvm/test/Transforms/LoopVectorize/vect-phiscev-sext-trunc.ll
+++ b/llvm/test/Transforms/LoopVectorize/vect-phiscev-sext-trunc.ll
@@ -112,9 +112,10 @@ for.end:
; VF8: vector.body:
; VF8-NEXT: [[INDEX:%.+]] = phi i64 [ 0, %vector.ph ]
; VF8-NEXT: [[OFFSET_IDX:%.+]] = mul i64 [[INDEX]], %step
-; VF8-NEXT: [[MUL0:%.+]] = mul i64 0, %step
-; VF8-NEXT: [[ADD:%.+]] = add i64 [[OFFSET_IDX]], [[MUL0]]
-; VF8: getelementptr inbounds i32, ptr %in, i64 [[ADD]]
+; VF8-NEXT: [[MUL1:%.+]] = mul i64 1, %step
+; VF8-NEXT: [[ADD1:%.+]] = add i64 [[OFFSET_IDX]], [[MUL1]]
+; VF8: getelementptr inbounds i32, ptr %in, i64 [[OFFSET_IDX]]
+; VF8: getelementptr inbounds i32, ptr %in, i64 [[ADD1]]
; VF8: middle.block:
; VF1-LABEL: @doit2
diff --git a/llvm/test/Transforms/LoopVectorize/version-mem-access.ll b/llvm/test/Transforms/LoopVectorize/version-mem-access.ll
index 8b9a526899041..c573ebaa51e9f 100644
--- a/llvm/test/Transforms/LoopVectorize/version-mem-access.ll
+++ b/llvm/test/Transforms/LoopVectorize/version-mem-access.ll
@@ -141,9 +141,8 @@ define void @stride_poison(ptr %dst) mustprogress {
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], poison
-; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], poison
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], poison
-; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP3]]
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[DST]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP4]]
; CHECK-NEXT: store i8 0, ptr [[TMP5]], align 1
; CHECK-NEXT: store i8 0, ptr [[TMP6]], align 1
diff --git a/llvm/test/Transforms/LoopVectorize/version-stride-with-integer-casts.ll b/llvm/test/Transforms/LoopVectorize/version-stride-with-integer-casts.ll
index 6a6ae316f4e52..6cb76f86aeb5a 100644
--- a/llvm/test/Transforms/LoopVectorize/version-stride-with-integer-casts.ll
+++ b/llvm/test/Transforms/LoopVectorize/version-stride-with-integer-casts.ll
@@ -157,11 +157,10 @@ define void @versioned_sext_use_in_gep(i32 %scale, ptr %dst, i64 %scale.2) {
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 1
; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[INDEX]], 3
-; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP10]]
+; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP12]]
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP14]]
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP16]]
@@ -235,11 +234,10 @@ define void @test_versioned_with_different_uses(i32 %offset, ptr noalias %dst.1,
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[IV_1]], [[INDEX]]
; CHECK-NEXT: [[OFFSET_IDX2:%.*]] = trunc i64 [[INDEX]] to i32
-; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[OFFSET_IDX2]], 0
; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[OFFSET_IDX2]], 1
; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[OFFSET_IDX2]], 2
; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[OFFSET_IDX2]], 3
-; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[DST_1]], i32 [[TMP4]]
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[DST_1]], i32 [[OFFSET_IDX2]]
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[DST_1]], i32 [[TMP5]]
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[DST_1]], i32 [[TMP6]]
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[DST_1]], i32 [[TMP7]]
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-predicate-switch.ll b/llvm/test/Transforms/LoopVectorize/vplan-predicate-switch.ll
index cf85f26992c2f..e60c3c40048f3 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-predicate-switch.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-predicate-switch.ll
@@ -21,11 +21,9 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) {
; CHECK-EMPTY:
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT-SCALAR vp<[[CAN_IV:%.+]]> = phi [ ir<0>, vector.ph ], [ vp<[[CAN_IV_NEXT:%.+]]>, default.2 ]
-; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, ir<2>
-; CHECK-NEXT: EMIT vp<[[STEP1:%.+]]> = extractelement vp<[[STEPS]]>, ir<0>
-; CHECK-NEXT: EMIT vp<[[PTR:%.+]]> = ptradd ir<%start>, vp<[[STEP1]]>
-; CHECK-NEXT: EMIT vp<[[STEP2:%.+]]> = extractelement vp<[[STEPS]]>, ir<1>
-; CHECK-NEXT: EMIT vp<[[PTR]]>.1 = ptradd ir<%start>, vp<[[STEP2]]>
+; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, ir<2>, ir<1>
+; CHECK-NEXT: EMIT vp<[[PTR:%.+]]> = ptradd ir<%start>, vp<[[CAN_IV]]>
+; CHECK-NEXT: EMIT vp<[[PTR]]>.1 = ptradd ir<%start>, vp<[[STEPS]]>
; CHECK-NEXT: EMIT vp<[[PTR_VEC:%.+]]> = buildvector vp<[[PTR]]>, vp<[[PTR]]>.1
; CHECK-NEXT: WIDEN ir<%l> = load vp<[[PTR]]>
; CHECK-NEXT: EMIT vp<[[C1:%.+]]> = icmp eq ir<%l>, ir<-12>
>From 85be19d832a9d0457a5204584cec4670a12820dc Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Mon, 1 Dec 2025 22:21:48 +0000
Subject: [PATCH 3/3] [VPlan] Explicitly unoll replicate-regions without
live-outs by VF.
This patch adds a new replicateReplicateRegionsByVF transform to
unroll replicate=regions by VF, dissolving them. The transform creates
VF copies of the replicate-region's content, connects them and converts
recipes to single-scalar variants for the corresponding lanes.
The initial version skips regions with live-outs (VPPredInstPHIRecipe),
which will be added in follow-up patches.
Depends on https://github.com/llvm/llvm-project/pull/170053
---
.../Transforms/Vectorize/LoopVectorize.cpp | 4 +
llvm/lib/Transforms/Vectorize/VPlan.h | 5 +-
.../Transforms/Vectorize/VPlanTransforms.cpp | 2 +-
.../Transforms/Vectorize/VPlanTransforms.h | 6 +
llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp | 177 +++++++++++++
.../LoopVectorize/AArch64/blend-costs.ll | 3 +-
.../AArch64/conditional-branches-cost.ll | 163 +++++-------
.../AArch64/fold-tail-low-trip-count.ll | 4 -
.../AArch64/induction-costs-sve.ll | 3 +-
.../AArch64/masked-call-scalarize.ll | 10 +-
.../LoopVectorize/AArch64/optsize_minsize.ll | 35 ++-
.../LoopVectorize/AArch64/predicated-costs.ll | 11 +-
...-interleave-to-widen-memory-multi-block.ll | 8 +-
...eave-to-widen-memory-remove-loop-region.ll | 9 +-
.../SystemZ/force-target-instruction-cost.ll | 5 +-
.../X86/consecutive-ptr-uniforms.ll | 7 +-
.../LoopVectorize/X86/constant-fold.ll | 27 +-
.../X86/cost-conditional-branches.ll | 14 +-
.../LoopVectorize/X86/gather_scatter.ll | 10 -
.../LoopVectorize/X86/induction-costs.ll | 3 +-
.../X86/pr109581-unused-blend.ll | 12 +-
.../X86/pr55096-scalarize-add.ll | 3 +-
.../LoopVectorize/X86/small-size.ll | 2 +-
.../x86-interleaved-accesses-masked-group.ll | 240 ++++++++----------
...86-interleaved-store-accesses-with-gaps.ll | 12 +-
llvm/test/Transforms/LoopVectorize/as_cast.ll | 2 +-
.../LoopVectorize/cast-induction.ll | 3 +-
.../LoopVectorize/consecutive-ptr-uniforms.ll | 8 +-
.../Transforms/LoopVectorize/cse-casts.ll | 3 +-
.../test/Transforms/LoopVectorize/debugloc.ll | 6 +-
.../dont-fold-tail-for-divisible-TC.ll | 3 +-
.../LoopVectorize/first-order-recurrence.ll | 6 +-
.../LoopVectorize/float-induction.ll | 9 +-
...nd-sink-mem-ops-with-invariant-pointers.ll | 3 +-
...predicated-loads-with-predicated-stores.ll | 130 +++++-----
.../LoopVectorize/if-pred-stores.ll | 16 +-
.../interleave-and-scalarize-only.ll | 3 +-
.../interleaved-accesses-pred-stores.ll | 11 +-
.../Transforms/LoopVectorize/lcssa-crashes.ll | 3 +-
.../LoopVectorize/load-deref-pred-align.ll | 22 +-
...eref-pred-poison-ub-ops-feeding-pointer.ll | 50 ++--
.../Transforms/LoopVectorize/loop-form.ll | 4 +-
.../LoopVectorize/memdep-fold-tail.ll | 13 +-
.../multiple-result-intrinsics.ll | 16 +-
.../LoopVectorize/pointer-induction.ll | 7 +-
llvm/test/Transforms/LoopVectorize/pr37248.ll | 9 +-
.../pr45679-fold-tail-by-masking.ll | 26 +-
.../LoopVectorize/predicate-switch.ll | 6 -
.../LoopVectorize/select-cmp-multiuse.ll | 6 +-
.../LoopVectorize/struct-return-replicate.ll | 68 +++--
.../Transforms/LoopVectorize/struct-return.ll | 10 +-
.../tail-folding-alloca-in-loop.ll | 25 +-
...folding-optimize-vector-induction-width.ll | 63 ++---
.../LoopVectorize/tail-folding-switch.ll | 11 +-
.../tail-folding-vectorization-factor-1.ll | 5 +-
.../trip-count-expansion-may-introduce-ub.ll | 8 +-
.../use-scalar-epilogue-if-tp-fails.ll | 8 -
.../vector-loop-backedge-elimination.ll | 15 +-
.../LoopVectorize/vplan-predicate-switch.ll | 108 ++++----
59 files changed, 696 insertions(+), 765 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index f78acf31a0de2..83de00e3b7f57 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7316,6 +7316,10 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
BestVPlan);
VPlanTransforms::runPass(VPlanTransforms::materializeBroadcasts, BestVPlan);
VPlanTransforms::runPass(VPlanTransforms::replicateByVF, BestVPlan, BestVF);
+ VPlanTransforms::runPass(VPlanTransforms::unrollReplicateRegions, BestVPlan,
+ BestVF);
+ VPlanTransforms::runPass(VPlanTransforms::mergeBlocksIntoPredecessors,
+ BestVPlan);
bool HasBranchWeights =
hasBranchWeightMD(*OrigLoop->getLoopLatch()->getTerminator());
if (HasBranchWeights) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 9688b46e3956d..e219da1d45b59 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -3813,8 +3813,9 @@ class LLVM_ABI_FOR_TEST VPScalarIVStepsRecipe : public VPRecipeWithIRFlags {
getOperand(0), getOperand(1), getOperand(2), InductionOpcode,
hasFastMathFlags() ? getFastMathFlags() : FastMathFlags(),
getDebugLoc());
- if (getNumOperands() == 4)
- NewR->addOperand(getOperand(3));
+ // Add start index operand, if present.
+ for (VPValue *Op : drop_begin(operands(), 3))
+ NewR->addOperand(Op);
return NewR;
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 660595915b239..b5cb0ddd5f0a9 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -497,7 +497,7 @@ static void addReplicateRegions(VPlan &Plan) {
/// Remove redundant VPBasicBlocks by merging them into their predecessor if
/// the predecessor has a single successor.
-static bool mergeBlocksIntoPredecessors(VPlan &Plan) {
+bool VPlanTransforms::mergeBlocksIntoPredecessors(VPlan &Plan) {
SmallVector<VPBasicBlock *> WorkList;
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
vp_depth_first_deep(Plan.getEntry()))) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index afdf1655b4622..6458f49af29ca 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -178,6 +178,10 @@ struct VPlanTransforms {
/// replicate regions, thereby dissolving the latter.
static void replicateByVF(VPlan &Plan, ElementCount VF);
+ /// Replace replicate regions by explicitly replicating the regions' contents
+ /// \p VF times, each copy processing a single lane.
+ static void unrollReplicateRegions(VPlan &Plan, ElementCount VF);
+
/// Optimize \p Plan based on \p BestVF and \p BestUF. This may restrict the
/// resulting plan to \p BestVF and \p BestUF.
static void optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF,
@@ -189,6 +193,8 @@ struct VPlanTransforms {
/// block merging.
LLVM_ABI_FOR_TEST static void optimize(VPlan &Plan);
+ static bool mergeBlocksIntoPredecessors(VPlan &Plan);
+
/// Wrap predicated VPReplicateRecipes with a mask operand in an if-then
/// region block and remove the mask operand. Optimize the created regions by
/// iteratively sinking scalar operands into the region, followed by merging
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
index 6fa5562a6a976..a399133d90cc1 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
@@ -24,6 +24,9 @@
#include "llvm/ADT/ScopeExit.h"
#include "llvm/Analysis/IVDescriptors.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "vplan"
using namespace llvm;
using namespace llvm::VPlanPatternMatch;
@@ -124,6 +127,7 @@ class UnrollState {
R->setOperand(OpIdx, getValueForPart(Op, Part));
}
};
+
} // namespace
static void addStartIndexForScalarSteps(VPScalarIVStepsRecipe *Steps,
@@ -665,3 +669,176 @@ void VPlanTransforms::replicateByVF(VPlan &Plan, ElementCount VF) {
for (auto *R : reverse(ToRemove))
R->eraseFromParent();
}
+
+/// Process recipes in a single lane's blocks, updating them for lane-specific
+/// operations.
+static void processLane(VPlan &Plan, Type *IdxTy, unsigned Lane,
+ ElementCount VF, ArrayRef<VPBlockBase *> RegionBlocks,
+ DenseMap<VPBlockBase *, VPBlockBase *> &Old2NewBlocks) {
+ DenseMap<VPValue *, VPValue *> Old2NewVPValues;
+ for (VPBlockBase *OldVPB : RegionBlocks) {
+ auto *OldBB = cast<VPBasicBlock>(OldVPB);
+ auto *NewBB = cast<VPBasicBlock>(Old2NewBlocks.lookup(OldVPB));
+ for (const auto &[OldR, NewR] : zip(*OldBB, *NewBB)) {
+ for (const auto &[OldV, NewV] :
+ zip(OldR.definedValues(), NewR.definedValues()))
+ Old2NewVPValues[OldV] = NewV;
+ }
+
+ // Update lane operands and remap operands to use copies for current lane.
+ for (VPRecipeBase &NewR : make_early_inc_range(*NewBB)) {
+ if (auto *Steps = dyn_cast<VPScalarIVStepsRecipe>(&NewR)) {
+ VPTypeAnalysis TypeInfo(Plan);
+ if (Lane != 0) {
+ Type *BaseIVTy = TypeInfo.inferScalarType(Steps->getOperand(0));
+ VPBuilder Builder(Steps);
+ Steps->setOperand(
+ 3, Builder.createNaryOp(Instruction::Add,
+ {Steps->getOperand(3),
+ Plan.getConstantInt(BaseIVTy, Lane)}));
+ }
+
+ } else if (match(&NewR, m_ExtractElement(m_VPValue(), m_ZeroInt())))
+ NewR.setOperand(1, Plan.getConstantInt(IdxTy, Lane));
+
+ // Remap operands to use lane-specific values.
+ for (const auto &[I, Op] : enumerate(NewR.operands())) {
+ // Use cloned value if operand was defined in the region.
+ if (auto *New = Old2NewVPValues.lookup(Op))
+ NewR.setOperand(I, New);
+ }
+ }
+ }
+}
+
+/// Process a single lane: clone blocks (or reuse original for lane 0), collect
+/// value mappings, and process recipes for lane-specific operations.
+static void processSingleLane(
+ VPlan &Plan, Type *IdxTy, unsigned Lane, ElementCount VF,
+ ArrayRef<VPBlockBase *> RegionBlocks, VPBlockBase *Entry,
+ VPBlockBase *Exiting,
+ SmallVectorImpl<std::pair<VPBlockBase *, VPBlockBase *>> &LaneClones) {
+ DenseMap<VPBlockBase *, VPBlockBase *> Old2NewBlocks;
+ if (Lane == 0) {
+ // Lane 0 uses the original blocks, and the recipes are adjusted:
+ // VPReplicateRecipes are converted to single-scalar ones, branch-on-mask is
+ // converted into BranchOnCond and extracts are created as needed.
+ for (VPBlockBase *VPB : RegionBlocks) {
+ Old2NewBlocks[VPB] = VPB;
+
+ for (VPRecipeBase &NewR :
+ make_early_inc_range(*cast<VPBasicBlock>(VPB))) {
+ VPBuilder Builder(&NewR);
+ for (const auto &[I, Op] : enumerate(NewR.operands())) {
+ // Skip operands that don't need extraction: scalar VF (no vectors),
+ // values defined in the same block (already scalar), or values that
+ // are already single scalars.
+ if (VF.isScalar() ||
+ (Op->getDefiningRecipe() &&
+ Op->getDefiningRecipe()->getParent() == VPB) ||
+ vputils::isSingleScalar(Op))
+ continue;
+
+ // Extract the lane from values defined outside the region.
+ VPValue *Idx = Plan.getConstantInt(IdxTy, Lane);
+ VPValue *Extract = Builder.createNaryOp(
+ Instruction::ExtractElement, {Op, Idx}, NewR.getDebugLoc());
+ NewR.setOperand(I, Extract);
+ }
+
+ if (auto *RepR = dyn_cast<VPReplicateRecipe>(&NewR)) {
+ auto *New = new VPReplicateRecipe(
+ RepR->getUnderlyingInstr(), RepR->operands(),
+ /* IsSingleScalar=*/true, /*Mask=*/nullptr, *RepR, *RepR,
+ RepR->getDebugLoc());
+ New->insertBefore(RepR);
+ RepR->replaceAllUsesWith(New);
+ RepR->eraseFromParent();
+ } else if (auto *BranchOnMask = dyn_cast<VPBranchOnMaskRecipe>(&NewR)) {
+ Builder.createNaryOp(VPInstruction::BranchOnCond,
+ {BranchOnMask->getOperand(0)},
+ BranchOnMask->getDebugLoc());
+ BranchOnMask->eraseFromParent();
+ } else if (auto *Steps = dyn_cast<VPScalarIVStepsRecipe>(&NewR)) {
+ VPTypeAnalysis TypeInfo(Plan);
+ if (Steps->getNumOperands() == 3)
+ addStartIndexForScalarSteps(Steps, 0, Plan, TypeInfo);
+ }
+ }
+ }
+ } else {
+ // Clone blocks and connect them according to original structure.
+ for (VPBlockBase *OrigBlock : RegionBlocks) {
+ VPBlockBase *ClonedBlock = OrigBlock->clone();
+ Old2NewBlocks[OrigBlock] = ClonedBlock;
+ ClonedBlock->setParent(Entry->getParent());
+ }
+ for (VPBlockBase *OrigBlock : RegionBlocks) {
+ if (OrigBlock == Exiting)
+ continue;
+ for (VPBlockBase *OrigSucc : OrigBlock->successors())
+ VPBlockUtils::connectBlocks(Old2NewBlocks[OrigBlock],
+ Old2NewBlocks[OrigSucc]);
+ }
+ }
+
+ processLane(Plan, IdxTy, Lane, VF, RegionBlocks, Old2NewBlocks);
+ LaneClones.push_back({Old2NewBlocks[Entry], Old2NewBlocks[Exiting]});
+}
+
+void VPlanTransforms::unrollReplicateRegions(VPlan &Plan, ElementCount VF) {
+ // Collect all replicate regions in the plan before modifying the CFG.
+ SmallVector<VPRegionBlock *> ReplicateRegions;
+ for (VPBlockBase *Block :
+ vp_depth_first_shallow(Plan.getVectorLoopRegion()->getEntry())) {
+ if (auto *Region = dyn_cast<VPRegionBlock>(Block)) {
+ if (Region->isReplicator())
+ ReplicateRegions.push_back(Region);
+ }
+ }
+
+ Type *IdxTy = IntegerType::get(Plan.getContext(), 32);
+
+ for (VPRegionBlock *Region : ReplicateRegions) {
+ assert(!VF.isScalable() && "cannot replicate across scalable VFs");
+
+ VPBlockBase *Entry = Region->getEntry();
+ VPBlockBase *Exiting = Region->getExiting();
+
+ // Skip regions with live-outs as packing scalar results back into vectors
+ // is not yet implemented.
+ if (any_of(*cast<VPBasicBlock>(Exiting), IsaPred<VPPredInstPHIRecipe>))
+ continue;
+
+ // Get region context before dissolving.
+ VPBlockBase *Pred = Region->getSinglePredecessor();
+ assert(Pred && "Replicate region must have a single predecessor");
+ SmallVector<VPBlockBase *> Successors(Region->successors());
+
+ // Disconnect and dissolve the region.
+ VPBlockUtils::disconnectBlocks(Pred, Region);
+ for (VPBlockBase *Succ : Successors)
+ VPBlockUtils::disconnectBlocks(Region, Succ);
+
+ SmallVector<VPBlockBase *> RegionBlocks(vp_depth_first_shallow(Entry));
+ VPRegionBlock *ParentRegion = Region->getParent();
+ for (VPBlockBase *Block : RegionBlocks)
+ Block->setParent(ParentRegion);
+ VPBlockUtils::connectBlocks(Pred, Entry);
+
+ // Process each lane: clone blocks, collect value mappings, and process
+ // recipes for lane-specific operations.
+ SmallVector<std::pair<VPBlockBase *, VPBlockBase *>> LaneClones;
+ for (unsigned Lane = 0; Lane < VF.getKnownMinValue(); ++Lane) {
+ processSingleLane(Plan, IdxTy, Lane, VF, RegionBlocks, Entry, Exiting,
+ LaneClones);
+ }
+
+ // Connect lanes sequentially and connect last lane to successors.
+ for (unsigned Lane = 1; Lane < VF.getKnownMinValue(); ++Lane)
+ VPBlockUtils::connectBlocks(LaneClones[Lane - 1].second,
+ LaneClones[Lane].first);
+ for (VPBlockBase *Succ : Successors)
+ VPBlockUtils::connectBlocks(LaneClones.back().second, Succ);
+ }
+}
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/blend-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/blend-costs.ll
index 8ab5723a52a11..1c2686b67331a 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/blend-costs.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/blend-costs.ll
@@ -199,8 +199,7 @@ define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <16 x i1> [[TMP7]], i32 0
; CHECK-NEXT: br i1 [[TMP8]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; CHECK: [[PRED_STORE_IF]]:
-; CHECK-NEXT: [[TMP72:%.*]] = add i32 [[IV]], 0
-; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP72]]
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[IV]]
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 0
; CHECK-NEXT: store i8 [[TMP10]], ptr [[TMP9]], align 1
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll
index b549a06f08f8c..7feb83e7d9cba 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll
@@ -290,13 +290,12 @@ define void @latch_branch_cost(ptr %dst) {
; PRED: [[VECTOR_PH]]:
; PRED-NEXT: br label %[[VECTOR_BODY:.*]]
; PRED: [[VECTOR_BODY]]:
-; PRED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE14:.*]] ]
+; PRED-NEXT: [[TMP2:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE14:.*]] ]
; PRED-NEXT: [[VEC_IND:%.*]] = phi <8 x i8> [ <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE14]] ]
; PRED-NEXT: [[TMP0:%.*]] = icmp ule <8 x i8> [[VEC_IND]], splat (i8 99)
; PRED-NEXT: [[TMP1:%.*]] = extractelement <8 x i1> [[TMP0]], i32 0
; PRED-NEXT: br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; PRED: [[PRED_STORE_IF]]:
-; PRED-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0
; PRED-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP2]]
; PRED-NEXT: store i8 0, ptr [[TMP3]], align 1
; PRED-NEXT: br label %[[PRED_STORE_CONTINUE]]
@@ -304,7 +303,7 @@ define void @latch_branch_cost(ptr %dst) {
; PRED-NEXT: [[TMP4:%.*]] = extractelement <8 x i1> [[TMP0]], i32 1
; PRED-NEXT: br i1 [[TMP4]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]]
; PRED: [[PRED_STORE_IF1]]:
-; PRED-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 1
+; PRED-NEXT: [[TMP5:%.*]] = add i64 [[TMP2]], 1
; PRED-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP5]]
; PRED-NEXT: store i8 0, ptr [[TMP6]], align 1
; PRED-NEXT: br label %[[PRED_STORE_CONTINUE2]]
@@ -312,7 +311,7 @@ define void @latch_branch_cost(ptr %dst) {
; PRED-NEXT: [[TMP7:%.*]] = extractelement <8 x i1> [[TMP0]], i32 2
; PRED-NEXT: br i1 [[TMP7]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]]
; PRED: [[PRED_STORE_IF3]]:
-; PRED-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 2
+; PRED-NEXT: [[TMP8:%.*]] = add i64 [[TMP2]], 2
; PRED-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP8]]
; PRED-NEXT: store i8 0, ptr [[TMP9]], align 1
; PRED-NEXT: br label %[[PRED_STORE_CONTINUE4]]
@@ -320,7 +319,7 @@ define void @latch_branch_cost(ptr %dst) {
; PRED-NEXT: [[TMP10:%.*]] = extractelement <8 x i1> [[TMP0]], i32 3
; PRED-NEXT: br i1 [[TMP10]], label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6:.*]]
; PRED: [[PRED_STORE_IF5]]:
-; PRED-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 3
+; PRED-NEXT: [[TMP11:%.*]] = add i64 [[TMP2]], 3
; PRED-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP11]]
; PRED-NEXT: store i8 0, ptr [[TMP12]], align 1
; PRED-NEXT: br label %[[PRED_STORE_CONTINUE6]]
@@ -328,7 +327,7 @@ define void @latch_branch_cost(ptr %dst) {
; PRED-NEXT: [[TMP13:%.*]] = extractelement <8 x i1> [[TMP0]], i32 4
; PRED-NEXT: br i1 [[TMP13]], label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]]
; PRED: [[PRED_STORE_IF7]]:
-; PRED-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 4
+; PRED-NEXT: [[TMP14:%.*]] = add i64 [[TMP2]], 4
; PRED-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP14]]
; PRED-NEXT: store i8 0, ptr [[TMP15]], align 1
; PRED-NEXT: br label %[[PRED_STORE_CONTINUE8]]
@@ -336,7 +335,7 @@ define void @latch_branch_cost(ptr %dst) {
; PRED-NEXT: [[TMP16:%.*]] = extractelement <8 x i1> [[TMP0]], i32 5
; PRED-NEXT: br i1 [[TMP16]], label %[[PRED_STORE_IF9:.*]], label %[[PRED_STORE_CONTINUE10:.*]]
; PRED: [[PRED_STORE_IF9]]:
-; PRED-NEXT: [[TMP17:%.*]] = add i64 [[INDEX]], 5
+; PRED-NEXT: [[TMP17:%.*]] = add i64 [[TMP2]], 5
; PRED-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP17]]
; PRED-NEXT: store i8 0, ptr [[TMP18]], align 1
; PRED-NEXT: br label %[[PRED_STORE_CONTINUE10]]
@@ -344,7 +343,7 @@ define void @latch_branch_cost(ptr %dst) {
; PRED-NEXT: [[TMP19:%.*]] = extractelement <8 x i1> [[TMP0]], i32 6
; PRED-NEXT: br i1 [[TMP19]], label %[[PRED_STORE_IF11:.*]], label %[[PRED_STORE_CONTINUE12:.*]]
; PRED: [[PRED_STORE_IF11]]:
-; PRED-NEXT: [[TMP20:%.*]] = add i64 [[INDEX]], 6
+; PRED-NEXT: [[TMP20:%.*]] = add i64 [[TMP2]], 6
; PRED-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP20]]
; PRED-NEXT: store i8 0, ptr [[TMP21]], align 1
; PRED-NEXT: br label %[[PRED_STORE_CONTINUE12]]
@@ -352,12 +351,12 @@ define void @latch_branch_cost(ptr %dst) {
; PRED-NEXT: [[TMP22:%.*]] = extractelement <8 x i1> [[TMP0]], i32 7
; PRED-NEXT: br i1 [[TMP22]], label %[[PRED_STORE_IF13:.*]], label %[[PRED_STORE_CONTINUE14]]
; PRED: [[PRED_STORE_IF13]]:
-; PRED-NEXT: [[TMP23:%.*]] = add i64 [[INDEX]], 7
+; PRED-NEXT: [[TMP23:%.*]] = add i64 [[TMP2]], 7
; PRED-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP23]]
; PRED-NEXT: store i8 0, ptr [[TMP24]], align 1
; PRED-NEXT: br label %[[PRED_STORE_CONTINUE14]]
; PRED: [[PRED_STORE_CONTINUE14]]:
-; PRED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
+; PRED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP2]], 8
; PRED-NEXT: [[VEC_IND_NEXT]] = add <8 x i8> [[VEC_IND]], splat (i8 8)
; PRED-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], 104
; PRED-NEXT: br i1 [[TMP25]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
@@ -710,121 +709,97 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
; DEFAULT-NEXT: [[TMP24:%.*]] = extractelement <8 x i1> [[TMP22]], i32 0
; DEFAULT-NEXT: br i1 [[TMP24]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; DEFAULT: [[PRED_STORE_IF]]:
-; DEFAULT-NEXT: [[TMP25:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 0
-; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP25]], align 4
; DEFAULT-NEXT: [[TMP26:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 0
+; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP26]], align 4
; DEFAULT-NEXT: [[TMP27:%.*]] = getelementptr i8, ptr [[TMP26]], i64 4
; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP27]], align 4
-; DEFAULT-NEXT: [[TMP28:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 0
-; DEFAULT-NEXT: [[TMP29:%.*]] = getelementptr i8, ptr [[TMP28]], i64 8
-; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP29]], align 4
-; DEFAULT-NEXT: [[TMP30:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 0
+; DEFAULT-NEXT: [[TMP30:%.*]] = getelementptr i8, ptr [[TMP26]], i64 8
; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP30]], align 4
+; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP26]], align 4
; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE]]
; DEFAULT: [[PRED_STORE_CONTINUE]]:
; DEFAULT-NEXT: [[TMP31:%.*]] = extractelement <8 x i1> [[TMP22]], i32 1
; DEFAULT-NEXT: br i1 [[TMP31]], label %[[PRED_STORE_IF14:.*]], label %[[PRED_STORE_CONTINUE15:.*]]
; DEFAULT: [[PRED_STORE_IF14]]:
-; DEFAULT-NEXT: [[TMP32:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 1
-; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP32]], align 4
; DEFAULT-NEXT: [[TMP33:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 1
+; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP33]], align 4
; DEFAULT-NEXT: [[TMP34:%.*]] = getelementptr i8, ptr [[TMP33]], i64 4
; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP34]], align 4
-; DEFAULT-NEXT: [[TMP35:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 1
-; DEFAULT-NEXT: [[TMP36:%.*]] = getelementptr i8, ptr [[TMP35]], i64 8
-; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP36]], align 4
-; DEFAULT-NEXT: [[TMP37:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 1
+; DEFAULT-NEXT: [[TMP37:%.*]] = getelementptr i8, ptr [[TMP33]], i64 8
; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP37]], align 4
+; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP33]], align 4
; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE15]]
; DEFAULT: [[PRED_STORE_CONTINUE15]]:
; DEFAULT-NEXT: [[TMP38:%.*]] = extractelement <8 x i1> [[TMP22]], i32 2
; DEFAULT-NEXT: br i1 [[TMP38]], label %[[PRED_STORE_IF16:.*]], label %[[PRED_STORE_CONTINUE17:.*]]
; DEFAULT: [[PRED_STORE_IF16]]:
-; DEFAULT-NEXT: [[TMP39:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 2
-; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP39]], align 4
; DEFAULT-NEXT: [[TMP40:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 2
+; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP40]], align 4
; DEFAULT-NEXT: [[TMP41:%.*]] = getelementptr i8, ptr [[TMP40]], i64 4
; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP41]], align 4
-; DEFAULT-NEXT: [[TMP42:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 2
-; DEFAULT-NEXT: [[TMP43:%.*]] = getelementptr i8, ptr [[TMP42]], i64 8
-; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP43]], align 4
-; DEFAULT-NEXT: [[TMP44:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 2
+; DEFAULT-NEXT: [[TMP44:%.*]] = getelementptr i8, ptr [[TMP40]], i64 8
; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP44]], align 4
+; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP40]], align 4
; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE17]]
; DEFAULT: [[PRED_STORE_CONTINUE17]]:
; DEFAULT-NEXT: [[TMP45:%.*]] = extractelement <8 x i1> [[TMP22]], i32 3
; DEFAULT-NEXT: br i1 [[TMP45]], label %[[PRED_STORE_IF18:.*]], label %[[PRED_STORE_CONTINUE19:.*]]
; DEFAULT: [[PRED_STORE_IF18]]:
-; DEFAULT-NEXT: [[TMP46:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 3
-; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP46]], align 4
; DEFAULT-NEXT: [[TMP47:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 3
+; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP47]], align 4
; DEFAULT-NEXT: [[TMP48:%.*]] = getelementptr i8, ptr [[TMP47]], i64 4
; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP48]], align 4
-; DEFAULT-NEXT: [[TMP49:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 3
-; DEFAULT-NEXT: [[TMP50:%.*]] = getelementptr i8, ptr [[TMP49]], i64 8
-; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP50]], align 4
-; DEFAULT-NEXT: [[TMP51:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 3
+; DEFAULT-NEXT: [[TMP51:%.*]] = getelementptr i8, ptr [[TMP47]], i64 8
; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP51]], align 4
+; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP47]], align 4
; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE19]]
; DEFAULT: [[PRED_STORE_CONTINUE19]]:
; DEFAULT-NEXT: [[TMP52:%.*]] = extractelement <8 x i1> [[TMP22]], i32 4
; DEFAULT-NEXT: br i1 [[TMP52]], label %[[PRED_STORE_IF20:.*]], label %[[PRED_STORE_CONTINUE21:.*]]
; DEFAULT: [[PRED_STORE_IF20]]:
-; DEFAULT-NEXT: [[TMP53:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 4
-; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP53]], align 4
; DEFAULT-NEXT: [[TMP54:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 4
+; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP54]], align 4
; DEFAULT-NEXT: [[TMP55:%.*]] = getelementptr i8, ptr [[TMP54]], i64 4
; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP55]], align 4
-; DEFAULT-NEXT: [[TMP56:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 4
-; DEFAULT-NEXT: [[TMP57:%.*]] = getelementptr i8, ptr [[TMP56]], i64 8
-; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP57]], align 4
-; DEFAULT-NEXT: [[TMP58:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 4
+; DEFAULT-NEXT: [[TMP58:%.*]] = getelementptr i8, ptr [[TMP54]], i64 8
; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP58]], align 4
+; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP54]], align 4
; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE21]]
; DEFAULT: [[PRED_STORE_CONTINUE21]]:
; DEFAULT-NEXT: [[TMP59:%.*]] = extractelement <8 x i1> [[TMP22]], i32 5
; DEFAULT-NEXT: br i1 [[TMP59]], label %[[PRED_STORE_IF22:.*]], label %[[PRED_STORE_CONTINUE23:.*]]
; DEFAULT: [[PRED_STORE_IF22]]:
-; DEFAULT-NEXT: [[TMP60:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 5
-; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP60]], align 4
; DEFAULT-NEXT: [[TMP61:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 5
+; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP61]], align 4
; DEFAULT-NEXT: [[TMP62:%.*]] = getelementptr i8, ptr [[TMP61]], i64 4
; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP62]], align 4
-; DEFAULT-NEXT: [[TMP63:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 5
-; DEFAULT-NEXT: [[TMP64:%.*]] = getelementptr i8, ptr [[TMP63]], i64 8
-; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP64]], align 4
-; DEFAULT-NEXT: [[TMP65:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 5
+; DEFAULT-NEXT: [[TMP65:%.*]] = getelementptr i8, ptr [[TMP61]], i64 8
; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP65]], align 4
+; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP61]], align 4
; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE23]]
; DEFAULT: [[PRED_STORE_CONTINUE23]]:
; DEFAULT-NEXT: [[TMP66:%.*]] = extractelement <8 x i1> [[TMP22]], i32 6
; DEFAULT-NEXT: br i1 [[TMP66]], label %[[PRED_STORE_IF24:.*]], label %[[PRED_STORE_CONTINUE25:.*]]
; DEFAULT: [[PRED_STORE_IF24]]:
-; DEFAULT-NEXT: [[TMP67:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 6
-; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP67]], align 4
; DEFAULT-NEXT: [[TMP68:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 6
+; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP68]], align 4
; DEFAULT-NEXT: [[TMP69:%.*]] = getelementptr i8, ptr [[TMP68]], i64 4
; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP69]], align 4
-; DEFAULT-NEXT: [[TMP70:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 6
-; DEFAULT-NEXT: [[TMP71:%.*]] = getelementptr i8, ptr [[TMP70]], i64 8
-; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP71]], align 4
-; DEFAULT-NEXT: [[TMP72:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 6
+; DEFAULT-NEXT: [[TMP72:%.*]] = getelementptr i8, ptr [[TMP68]], i64 8
; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP72]], align 4
+; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP68]], align 4
; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE25]]
; DEFAULT: [[PRED_STORE_CONTINUE25]]:
; DEFAULT-NEXT: [[TMP73:%.*]] = extractelement <8 x i1> [[TMP22]], i32 7
; DEFAULT-NEXT: br i1 [[TMP73]], label %[[PRED_STORE_IF26:.*]], label %[[PRED_STORE_CONTINUE27]]
; DEFAULT: [[PRED_STORE_IF26]]:
-; DEFAULT-NEXT: [[TMP74:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 7
-; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP74]], align 4
; DEFAULT-NEXT: [[TMP75:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 7
+; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP75]], align 4
; DEFAULT-NEXT: [[TMP76:%.*]] = getelementptr i8, ptr [[TMP75]], i64 4
; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP76]], align 4
-; DEFAULT-NEXT: [[TMP77:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 7
-; DEFAULT-NEXT: [[TMP78:%.*]] = getelementptr i8, ptr [[TMP77]], i64 8
-; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP78]], align 4
-; DEFAULT-NEXT: [[TMP79:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 7
+; DEFAULT-NEXT: [[TMP79:%.*]] = getelementptr i8, ptr [[TMP75]], i64 8
; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP79]], align 4
+; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP75]], align 4
; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE27]]
; DEFAULT: [[PRED_STORE_CONTINUE27]]:
; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
@@ -896,121 +871,97 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
; PRED-NEXT: [[TMP28:%.*]] = extractelement <8 x i1> [[TMP26]], i32 0
; PRED-NEXT: br i1 [[TMP28]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; PRED: [[PRED_STORE_IF]]:
-; PRED-NEXT: [[TMP29:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 0
-; PRED-NEXT: store float 0.000000e+00, ptr [[TMP29]], align 4
; PRED-NEXT: [[TMP30:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 0
+; PRED-NEXT: store float 0.000000e+00, ptr [[TMP30]], align 4
; PRED-NEXT: [[TMP31:%.*]] = getelementptr i8, ptr [[TMP30]], i64 4
; PRED-NEXT: store float 0.000000e+00, ptr [[TMP31]], align 4
-; PRED-NEXT: [[TMP32:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 0
-; PRED-NEXT: [[TMP33:%.*]] = getelementptr i8, ptr [[TMP32]], i64 8
-; PRED-NEXT: store float 0.000000e+00, ptr [[TMP33]], align 4
-; PRED-NEXT: [[TMP34:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 0
+; PRED-NEXT: [[TMP34:%.*]] = getelementptr i8, ptr [[TMP30]], i64 8
; PRED-NEXT: store float 0.000000e+00, ptr [[TMP34]], align 4
+; PRED-NEXT: store float 0.000000e+00, ptr [[TMP30]], align 4
; PRED-NEXT: br label %[[PRED_STORE_CONTINUE]]
; PRED: [[PRED_STORE_CONTINUE]]:
; PRED-NEXT: [[TMP35:%.*]] = extractelement <8 x i1> [[TMP26]], i32 1
; PRED-NEXT: br i1 [[TMP35]], label %[[PRED_STORE_IF14:.*]], label %[[PRED_STORE_CONTINUE15:.*]]
; PRED: [[PRED_STORE_IF14]]:
-; PRED-NEXT: [[TMP36:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 1
-; PRED-NEXT: store float 0.000000e+00, ptr [[TMP36]], align 4
; PRED-NEXT: [[TMP37:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 1
+; PRED-NEXT: store float 0.000000e+00, ptr [[TMP37]], align 4
; PRED-NEXT: [[TMP38:%.*]] = getelementptr i8, ptr [[TMP37]], i64 4
; PRED-NEXT: store float 0.000000e+00, ptr [[TMP38]], align 4
-; PRED-NEXT: [[TMP39:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 1
-; PRED-NEXT: [[TMP40:%.*]] = getelementptr i8, ptr [[TMP39]], i64 8
-; PRED-NEXT: store float 0.000000e+00, ptr [[TMP40]], align 4
-; PRED-NEXT: [[TMP41:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 1
+; PRED-NEXT: [[TMP41:%.*]] = getelementptr i8, ptr [[TMP37]], i64 8
; PRED-NEXT: store float 0.000000e+00, ptr [[TMP41]], align 4
+; PRED-NEXT: store float 0.000000e+00, ptr [[TMP37]], align 4
; PRED-NEXT: br label %[[PRED_STORE_CONTINUE15]]
; PRED: [[PRED_STORE_CONTINUE15]]:
; PRED-NEXT: [[TMP42:%.*]] = extractelement <8 x i1> [[TMP26]], i32 2
; PRED-NEXT: br i1 [[TMP42]], label %[[PRED_STORE_IF16:.*]], label %[[PRED_STORE_CONTINUE17:.*]]
; PRED: [[PRED_STORE_IF16]]:
-; PRED-NEXT: [[TMP43:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 2
-; PRED-NEXT: store float 0.000000e+00, ptr [[TMP43]], align 4
; PRED-NEXT: [[TMP44:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 2
+; PRED-NEXT: store float 0.000000e+00, ptr [[TMP44]], align 4
; PRED-NEXT: [[TMP45:%.*]] = getelementptr i8, ptr [[TMP44]], i64 4
; PRED-NEXT: store float 0.000000e+00, ptr [[TMP45]], align 4
-; PRED-NEXT: [[TMP46:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 2
-; PRED-NEXT: [[TMP47:%.*]] = getelementptr i8, ptr [[TMP46]], i64 8
-; PRED-NEXT: store float 0.000000e+00, ptr [[TMP47]], align 4
-; PRED-NEXT: [[TMP48:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 2
+; PRED-NEXT: [[TMP48:%.*]] = getelementptr i8, ptr [[TMP44]], i64 8
; PRED-NEXT: store float 0.000000e+00, ptr [[TMP48]], align 4
+; PRED-NEXT: store float 0.000000e+00, ptr [[TMP44]], align 4
; PRED-NEXT: br label %[[PRED_STORE_CONTINUE17]]
; PRED: [[PRED_STORE_CONTINUE17]]:
; PRED-NEXT: [[TMP49:%.*]] = extractelement <8 x i1> [[TMP26]], i32 3
; PRED-NEXT: br i1 [[TMP49]], label %[[PRED_STORE_IF18:.*]], label %[[PRED_STORE_CONTINUE19:.*]]
; PRED: [[PRED_STORE_IF18]]:
-; PRED-NEXT: [[TMP50:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 3
-; PRED-NEXT: store float 0.000000e+00, ptr [[TMP50]], align 4
; PRED-NEXT: [[TMP51:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 3
+; PRED-NEXT: store float 0.000000e+00, ptr [[TMP51]], align 4
; PRED-NEXT: [[TMP52:%.*]] = getelementptr i8, ptr [[TMP51]], i64 4
; PRED-NEXT: store float 0.000000e+00, ptr [[TMP52]], align 4
-; PRED-NEXT: [[TMP53:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 3
-; PRED-NEXT: [[TMP54:%.*]] = getelementptr i8, ptr [[TMP53]], i64 8
-; PRED-NEXT: store float 0.000000e+00, ptr [[TMP54]], align 4
-; PRED-NEXT: [[TMP55:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 3
+; PRED-NEXT: [[TMP55:%.*]] = getelementptr i8, ptr [[TMP51]], i64 8
; PRED-NEXT: store float 0.000000e+00, ptr [[TMP55]], align 4
+; PRED-NEXT: store float 0.000000e+00, ptr [[TMP51]], align 4
; PRED-NEXT: br label %[[PRED_STORE_CONTINUE19]]
; PRED: [[PRED_STORE_CONTINUE19]]:
; PRED-NEXT: [[TMP56:%.*]] = extractelement <8 x i1> [[TMP26]], i32 4
; PRED-NEXT: br i1 [[TMP56]], label %[[PRED_STORE_IF20:.*]], label %[[PRED_STORE_CONTINUE21:.*]]
; PRED: [[PRED_STORE_IF20]]:
-; PRED-NEXT: [[TMP57:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 4
-; PRED-NEXT: store float 0.000000e+00, ptr [[TMP57]], align 4
; PRED-NEXT: [[TMP58:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 4
+; PRED-NEXT: store float 0.000000e+00, ptr [[TMP58]], align 4
; PRED-NEXT: [[TMP59:%.*]] = getelementptr i8, ptr [[TMP58]], i64 4
; PRED-NEXT: store float 0.000000e+00, ptr [[TMP59]], align 4
-; PRED-NEXT: [[TMP60:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 4
-; PRED-NEXT: [[TMP61:%.*]] = getelementptr i8, ptr [[TMP60]], i64 8
-; PRED-NEXT: store float 0.000000e+00, ptr [[TMP61]], align 4
-; PRED-NEXT: [[TMP62:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 4
+; PRED-NEXT: [[TMP62:%.*]] = getelementptr i8, ptr [[TMP58]], i64 8
; PRED-NEXT: store float 0.000000e+00, ptr [[TMP62]], align 4
+; PRED-NEXT: store float 0.000000e+00, ptr [[TMP58]], align 4
; PRED-NEXT: br label %[[PRED_STORE_CONTINUE21]]
; PRED: [[PRED_STORE_CONTINUE21]]:
; PRED-NEXT: [[TMP63:%.*]] = extractelement <8 x i1> [[TMP26]], i32 5
; PRED-NEXT: br i1 [[TMP63]], label %[[PRED_STORE_IF22:.*]], label %[[PRED_STORE_CONTINUE23:.*]]
; PRED: [[PRED_STORE_IF22]]:
-; PRED-NEXT: [[TMP64:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 5
-; PRED-NEXT: store float 0.000000e+00, ptr [[TMP64]], align 4
; PRED-NEXT: [[TMP65:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 5
+; PRED-NEXT: store float 0.000000e+00, ptr [[TMP65]], align 4
; PRED-NEXT: [[TMP66:%.*]] = getelementptr i8, ptr [[TMP65]], i64 4
; PRED-NEXT: store float 0.000000e+00, ptr [[TMP66]], align 4
-; PRED-NEXT: [[TMP67:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 5
-; PRED-NEXT: [[TMP68:%.*]] = getelementptr i8, ptr [[TMP67]], i64 8
-; PRED-NEXT: store float 0.000000e+00, ptr [[TMP68]], align 4
-; PRED-NEXT: [[TMP69:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 5
+; PRED-NEXT: [[TMP69:%.*]] = getelementptr i8, ptr [[TMP65]], i64 8
; PRED-NEXT: store float 0.000000e+00, ptr [[TMP69]], align 4
+; PRED-NEXT: store float 0.000000e+00, ptr [[TMP65]], align 4
; PRED-NEXT: br label %[[PRED_STORE_CONTINUE23]]
; PRED: [[PRED_STORE_CONTINUE23]]:
; PRED-NEXT: [[TMP70:%.*]] = extractelement <8 x i1> [[TMP26]], i32 6
; PRED-NEXT: br i1 [[TMP70]], label %[[PRED_STORE_IF24:.*]], label %[[PRED_STORE_CONTINUE25:.*]]
; PRED: [[PRED_STORE_IF24]]:
-; PRED-NEXT: [[TMP71:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 6
-; PRED-NEXT: store float 0.000000e+00, ptr [[TMP71]], align 4
; PRED-NEXT: [[TMP72:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 6
+; PRED-NEXT: store float 0.000000e+00, ptr [[TMP72]], align 4
; PRED-NEXT: [[TMP73:%.*]] = getelementptr i8, ptr [[TMP72]], i64 4
; PRED-NEXT: store float 0.000000e+00, ptr [[TMP73]], align 4
-; PRED-NEXT: [[TMP74:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 6
-; PRED-NEXT: [[TMP75:%.*]] = getelementptr i8, ptr [[TMP74]], i64 8
-; PRED-NEXT: store float 0.000000e+00, ptr [[TMP75]], align 4
-; PRED-NEXT: [[TMP76:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 6
+; PRED-NEXT: [[TMP76:%.*]] = getelementptr i8, ptr [[TMP72]], i64 8
; PRED-NEXT: store float 0.000000e+00, ptr [[TMP76]], align 4
+; PRED-NEXT: store float 0.000000e+00, ptr [[TMP72]], align 4
; PRED-NEXT: br label %[[PRED_STORE_CONTINUE25]]
; PRED: [[PRED_STORE_CONTINUE25]]:
; PRED-NEXT: [[TMP77:%.*]] = extractelement <8 x i1> [[TMP26]], i32 7
; PRED-NEXT: br i1 [[TMP77]], label %[[PRED_STORE_IF26:.*]], label %[[PRED_STORE_CONTINUE27]]
; PRED: [[PRED_STORE_IF26]]:
-; PRED-NEXT: [[TMP78:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 7
-; PRED-NEXT: store float 0.000000e+00, ptr [[TMP78]], align 4
; PRED-NEXT: [[TMP79:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 7
+; PRED-NEXT: store float 0.000000e+00, ptr [[TMP79]], align 4
; PRED-NEXT: [[TMP80:%.*]] = getelementptr i8, ptr [[TMP79]], i64 4
; PRED-NEXT: store float 0.000000e+00, ptr [[TMP80]], align 4
-; PRED-NEXT: [[TMP81:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 7
-; PRED-NEXT: [[TMP82:%.*]] = getelementptr i8, ptr [[TMP81]], i64 8
-; PRED-NEXT: store float 0.000000e+00, ptr [[TMP82]], align 4
-; PRED-NEXT: [[TMP83:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 7
+; PRED-NEXT: [[TMP83:%.*]] = getelementptr i8, ptr [[TMP79]], i64 8
; PRED-NEXT: store float 0.000000e+00, ptr [[TMP83]], align 4
+; PRED-NEXT: store float 0.000000e+00, ptr [[TMP79]], align 4
; PRED-NEXT: br label %[[PRED_STORE_CONTINUE27]]
; PRED: [[PRED_STORE_CONTINUE27]]:
; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/fold-tail-low-trip-count.ll b/llvm/test/Transforms/LoopVectorize/AArch64/fold-tail-low-trip-count.ll
index 46fc9646356c8..d070451187cb0 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/fold-tail-low-trip-count.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/fold-tail-low-trip-count.ll
@@ -19,10 +19,6 @@ define void @low_trip_count_small(i32 %x, ptr %dst) {
; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[DST]], i64 1
; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[DST]], i64 2
; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[DST]], i64 3
-; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x ptr> poison, ptr [[DST]], i32 0
-; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x ptr> [[TMP1]], ptr [[NEXT_GEP2]], i32 1
-; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x ptr> [[TMP2]], ptr [[NEXT_GEP3]], i32 2
-; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x ptr> [[TMP3]], ptr [[NEXT_GEP4]], i32 3
; CHECK-NEXT: [[TMP5:%.*]] = icmp ule <4 x i64> <i64 0, i64 1, i64 2, i64 3>, [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP5]], i32 0
; CHECK-NEXT: br i1 [[TMP6]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll
index 4b097ba2422e4..760fbaea52fbe 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll
@@ -610,8 +610,7 @@ define void @exit_cond_zext_iv(ptr %dst, i64 %N) {
; PRED-NEXT: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP6]], i32 0
; PRED-NEXT: br i1 [[TMP7]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; PRED: [[PRED_STORE_IF]]:
-; PRED-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 0
-; PRED-NEXT: [[TMP9:%.*]] = getelementptr { [100 x i32], i32, i32 }, ptr [[DST]], i64 [[TMP8]], i32 2
+; PRED-NEXT: [[TMP9:%.*]] = getelementptr { [100 x i32], i32, i32 }, ptr [[DST]], i64 [[INDEX]], i32 2
; PRED-NEXT: store i32 0, ptr [[TMP9]], align 8
; PRED-NEXT: br label %[[PRED_STORE_CONTINUE]]
; PRED: [[PRED_STORE_CONTINUE]]:
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call-scalarize.ll b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call-scalarize.ll
index 3311cbc11881b..e667ed1d32946 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call-scalarize.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call-scalarize.ll
@@ -64,7 +64,7 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 {
; TFCOMMON-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ]
; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[PRED_STORE_CONTINUE6]] ]
; TFCOMMON-NEXT: [[LD:%.*]] = load double, ptr [[P2:%.*]], align 8
-; TFCOMMON-NEXT: [[TMP5:%.*]] = tail call double @llvm.exp.f64(double [[LD]]) #[[ATTR2:[0-9]+]]
+; TFCOMMON-NEXT: [[TMP5:%.*]] = tail call double @llvm.exp.f64(double [[LD]]) #[[ATTR3:[0-9]+]]
; TFCOMMON-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[TMP5]], i64 0
; TFCOMMON-NEXT: [[TMP8:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT]], <2 x double> poison, <2 x i32> zeroinitializer
; TFCOMMON-NEXT: [[TMP9:%.*]] = fcmp ogt <2 x double> [[TMP8]], zeroinitializer
@@ -80,7 +80,7 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 {
; TFCOMMON-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[ACTIVE_LANE_MASK]], i32 1
; TFCOMMON-NEXT: br i1 [[TMP14]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE6]]
; TFCOMMON: pred.store.if1:
-; TFCOMMON-NEXT: [[TMP19:%.*]] = extractelement <2 x double> [[PREDPHI]], i32 0
+; TFCOMMON-NEXT: [[TMP19:%.*]] = extractelement <2 x double> [[PREDPHI]], i32 1
; TFCOMMON-NEXT: store double [[TMP19]], ptr [[P]], align 8
; TFCOMMON-NEXT: br label [[PRED_STORE_CONTINUE6]]
; TFCOMMON: pred.store.continue2:
@@ -106,7 +106,7 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 {
; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[PRED_STORE_CONTINUE9]] ]
; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK2:%.*]] = phi <2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY1]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT10:%.*]], [[PRED_STORE_CONTINUE9]] ]
; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = load double, ptr [[P2:%.*]], align 8
-; TFA_INTERLEAVE-NEXT: [[TMP5:%.*]] = tail call double @llvm.exp.f64(double [[TMP4]]) #[[ATTR2:[0-9]+]]
+; TFA_INTERLEAVE-NEXT: [[TMP5:%.*]] = tail call double @llvm.exp.f64(double [[TMP4]]) #[[ATTR3:[0-9]+]]
; TFA_INTERLEAVE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[TMP5]], i64 0
; TFA_INTERLEAVE-NEXT: [[TMP12:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT]], <2 x double> poison, <2 x i32> zeroinitializer
; TFA_INTERLEAVE-NEXT: [[TMP14:%.*]] = fcmp ogt <2 x double> [[TMP12]], zeroinitializer
@@ -122,7 +122,7 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 {
; TFA_INTERLEAVE-NEXT: [[TMP29:%.*]] = extractelement <2 x i1> [[ACTIVE_LANE_MASK]], i32 1
; TFA_INTERLEAVE-NEXT: br i1 [[TMP29]], label [[PRED_STORE_IF4:%.*]], label [[PRED_STORE_CONTINUE5:%.*]]
; TFA_INTERLEAVE: pred.store.if3:
-; TFA_INTERLEAVE-NEXT: [[TMP22:%.*]] = extractelement <2 x double> [[PREDPHI3]], i32 0
+; TFA_INTERLEAVE-NEXT: [[TMP22:%.*]] = extractelement <2 x double> [[PREDPHI3]], i32 1
; TFA_INTERLEAVE-NEXT: store double [[TMP22]], ptr [[P]], align 8
; TFA_INTERLEAVE-NEXT: br label [[PRED_STORE_CONTINUE5]]
; TFA_INTERLEAVE: pred.store.continue4:
@@ -136,7 +136,7 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 {
; TFA_INTERLEAVE-NEXT: [[TMP25:%.*]] = extractelement <2 x i1> [[ACTIVE_LANE_MASK2]], i32 1
; TFA_INTERLEAVE-NEXT: br i1 [[TMP25]], label [[PRED_STORE_IF8:%.*]], label [[PRED_STORE_CONTINUE9]]
; TFA_INTERLEAVE: pred.store.if7:
-; TFA_INTERLEAVE-NEXT: [[TMP34:%.*]] = extractelement <2 x double> [[PREDPHI3]], i32 0
+; TFA_INTERLEAVE-NEXT: [[TMP34:%.*]] = extractelement <2 x double> [[PREDPHI3]], i32 1
; TFA_INTERLEAVE-NEXT: store double [[TMP34]], ptr [[P]], align 8
; TFA_INTERLEAVE-NEXT: br label [[PRED_STORE_CONTINUE9]]
; TFA_INTERLEAVE: pred.store.continue8:
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll b/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll
index bdbf08aecf6b3..e70bf7f01ee64 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll
@@ -196,7 +196,7 @@ define void @tail_predicate_without_optsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 %n)
; DEFAULT-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector <16 x i8> [[BROADCAST_SPLATINSERT5]], <16 x i8> poison, <16 x i32> zeroinitializer
; DEFAULT-NEXT: br label %[[VECTOR_BODY:.*]]
; DEFAULT: [[VECTOR_BODY]]:
-; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE35:.*]] ]
+; DEFAULT-NEXT: [[TMP9:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE35:.*]] ]
; DEFAULT-NEXT: [[VEC_IND:%.*]] = phi <16 x i8> [ <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE35]] ]
; DEFAULT-NEXT: [[VEC_IND1:%.*]] = phi <16 x i8> [ <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT36:%.*]], %[[PRED_STORE_CONTINUE35]] ]
; DEFAULT-NEXT: [[TMP0:%.*]] = icmp ule <16 x i8> [[VEC_IND]], splat (i8 14)
@@ -210,7 +210,6 @@ define void @tail_predicate_without_optsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 %n)
; DEFAULT-NEXT: [[TMP8:%.*]] = extractelement <16 x i1> [[TMP0]], i32 0
; DEFAULT-NEXT: br i1 [[TMP8]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; DEFAULT: [[PRED_STORE_IF]]:
-; DEFAULT-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 0
; DEFAULT-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP9]]
; DEFAULT-NEXT: [[TMP11:%.*]] = extractelement <16 x i8> [[TMP7]], i32 0
; DEFAULT-NEXT: store i8 [[TMP11]], ptr [[TMP10]], align 1
@@ -219,7 +218,7 @@ define void @tail_predicate_without_optsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 %n)
; DEFAULT-NEXT: [[TMP12:%.*]] = extractelement <16 x i1> [[TMP0]], i32 1
; DEFAULT-NEXT: br i1 [[TMP12]], label %[[PRED_STORE_IF6:.*]], label %[[PRED_STORE_CONTINUE7:.*]]
; DEFAULT: [[PRED_STORE_IF6]]:
-; DEFAULT-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 1
+; DEFAULT-NEXT: [[TMP13:%.*]] = add i64 [[TMP9]], 1
; DEFAULT-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP13]]
; DEFAULT-NEXT: [[TMP15:%.*]] = extractelement <16 x i8> [[TMP7]], i32 1
; DEFAULT-NEXT: store i8 [[TMP15]], ptr [[TMP14]], align 1
@@ -228,7 +227,7 @@ define void @tail_predicate_without_optsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 %n)
; DEFAULT-NEXT: [[TMP16:%.*]] = extractelement <16 x i1> [[TMP0]], i32 2
; DEFAULT-NEXT: br i1 [[TMP16]], label %[[PRED_STORE_IF8:.*]], label %[[PRED_STORE_CONTINUE9:.*]]
; DEFAULT: [[PRED_STORE_IF8]]:
-; DEFAULT-NEXT: [[TMP17:%.*]] = add i64 [[INDEX]], 2
+; DEFAULT-NEXT: [[TMP17:%.*]] = add i64 [[TMP9]], 2
; DEFAULT-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP17]]
; DEFAULT-NEXT: [[TMP19:%.*]] = extractelement <16 x i8> [[TMP7]], i32 2
; DEFAULT-NEXT: store i8 [[TMP19]], ptr [[TMP18]], align 1
@@ -237,7 +236,7 @@ define void @tail_predicate_without_optsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 %n)
; DEFAULT-NEXT: [[TMP20:%.*]] = extractelement <16 x i1> [[TMP0]], i32 3
; DEFAULT-NEXT: br i1 [[TMP20]], label %[[PRED_STORE_IF10:.*]], label %[[PRED_STORE_CONTINUE11:.*]]
; DEFAULT: [[PRED_STORE_IF10]]:
-; DEFAULT-NEXT: [[TMP21:%.*]] = add i64 [[INDEX]], 3
+; DEFAULT-NEXT: [[TMP21:%.*]] = add i64 [[TMP9]], 3
; DEFAULT-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP21]]
; DEFAULT-NEXT: [[TMP23:%.*]] = extractelement <16 x i8> [[TMP7]], i32 3
; DEFAULT-NEXT: store i8 [[TMP23]], ptr [[TMP22]], align 1
@@ -246,7 +245,7 @@ define void @tail_predicate_without_optsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 %n)
; DEFAULT-NEXT: [[TMP24:%.*]] = extractelement <16 x i1> [[TMP0]], i32 4
; DEFAULT-NEXT: br i1 [[TMP24]], label %[[PRED_STORE_IF12:.*]], label %[[PRED_STORE_CONTINUE13:.*]]
; DEFAULT: [[PRED_STORE_IF12]]:
-; DEFAULT-NEXT: [[TMP25:%.*]] = add i64 [[INDEX]], 4
+; DEFAULT-NEXT: [[TMP25:%.*]] = add i64 [[TMP9]], 4
; DEFAULT-NEXT: [[TMP26:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP25]]
; DEFAULT-NEXT: [[TMP27:%.*]] = extractelement <16 x i8> [[TMP7]], i32 4
; DEFAULT-NEXT: store i8 [[TMP27]], ptr [[TMP26]], align 1
@@ -255,7 +254,7 @@ define void @tail_predicate_without_optsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 %n)
; DEFAULT-NEXT: [[TMP28:%.*]] = extractelement <16 x i1> [[TMP0]], i32 5
; DEFAULT-NEXT: br i1 [[TMP28]], label %[[PRED_STORE_IF14:.*]], label %[[PRED_STORE_CONTINUE15:.*]]
; DEFAULT: [[PRED_STORE_IF14]]:
-; DEFAULT-NEXT: [[TMP29:%.*]] = add i64 [[INDEX]], 5
+; DEFAULT-NEXT: [[TMP29:%.*]] = add i64 [[TMP9]], 5
; DEFAULT-NEXT: [[TMP30:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP29]]
; DEFAULT-NEXT: [[TMP31:%.*]] = extractelement <16 x i8> [[TMP7]], i32 5
; DEFAULT-NEXT: store i8 [[TMP31]], ptr [[TMP30]], align 1
@@ -264,7 +263,7 @@ define void @tail_predicate_without_optsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 %n)
; DEFAULT-NEXT: [[TMP32:%.*]] = extractelement <16 x i1> [[TMP0]], i32 6
; DEFAULT-NEXT: br i1 [[TMP32]], label %[[PRED_STORE_IF16:.*]], label %[[PRED_STORE_CONTINUE17:.*]]
; DEFAULT: [[PRED_STORE_IF16]]:
-; DEFAULT-NEXT: [[TMP33:%.*]] = add i64 [[INDEX]], 6
+; DEFAULT-NEXT: [[TMP33:%.*]] = add i64 [[TMP9]], 6
; DEFAULT-NEXT: [[TMP34:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP33]]
; DEFAULT-NEXT: [[TMP35:%.*]] = extractelement <16 x i8> [[TMP7]], i32 6
; DEFAULT-NEXT: store i8 [[TMP35]], ptr [[TMP34]], align 1
@@ -273,7 +272,7 @@ define void @tail_predicate_without_optsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 %n)
; DEFAULT-NEXT: [[TMP36:%.*]] = extractelement <16 x i1> [[TMP0]], i32 7
; DEFAULT-NEXT: br i1 [[TMP36]], label %[[PRED_STORE_IF18:.*]], label %[[PRED_STORE_CONTINUE19:.*]]
; DEFAULT: [[PRED_STORE_IF18]]:
-; DEFAULT-NEXT: [[TMP37:%.*]] = add i64 [[INDEX]], 7
+; DEFAULT-NEXT: [[TMP37:%.*]] = add i64 [[TMP9]], 7
; DEFAULT-NEXT: [[TMP38:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP37]]
; DEFAULT-NEXT: [[TMP39:%.*]] = extractelement <16 x i8> [[TMP7]], i32 7
; DEFAULT-NEXT: store i8 [[TMP39]], ptr [[TMP38]], align 1
@@ -282,7 +281,7 @@ define void @tail_predicate_without_optsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 %n)
; DEFAULT-NEXT: [[TMP40:%.*]] = extractelement <16 x i1> [[TMP0]], i32 8
; DEFAULT-NEXT: br i1 [[TMP40]], label %[[PRED_STORE_IF20:.*]], label %[[PRED_STORE_CONTINUE21:.*]]
; DEFAULT: [[PRED_STORE_IF20]]:
-; DEFAULT-NEXT: [[TMP41:%.*]] = add i64 [[INDEX]], 8
+; DEFAULT-NEXT: [[TMP41:%.*]] = add i64 [[TMP9]], 8
; DEFAULT-NEXT: [[TMP42:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP41]]
; DEFAULT-NEXT: [[TMP43:%.*]] = extractelement <16 x i8> [[TMP7]], i32 8
; DEFAULT-NEXT: store i8 [[TMP43]], ptr [[TMP42]], align 1
@@ -291,7 +290,7 @@ define void @tail_predicate_without_optsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 %n)
; DEFAULT-NEXT: [[TMP44:%.*]] = extractelement <16 x i1> [[TMP0]], i32 9
; DEFAULT-NEXT: br i1 [[TMP44]], label %[[PRED_STORE_IF22:.*]], label %[[PRED_STORE_CONTINUE23:.*]]
; DEFAULT: [[PRED_STORE_IF22]]:
-; DEFAULT-NEXT: [[TMP45:%.*]] = add i64 [[INDEX]], 9
+; DEFAULT-NEXT: [[TMP45:%.*]] = add i64 [[TMP9]], 9
; DEFAULT-NEXT: [[TMP46:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP45]]
; DEFAULT-NEXT: [[TMP47:%.*]] = extractelement <16 x i8> [[TMP7]], i32 9
; DEFAULT-NEXT: store i8 [[TMP47]], ptr [[TMP46]], align 1
@@ -300,7 +299,7 @@ define void @tail_predicate_without_optsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 %n)
; DEFAULT-NEXT: [[TMP48:%.*]] = extractelement <16 x i1> [[TMP0]], i32 10
; DEFAULT-NEXT: br i1 [[TMP48]], label %[[PRED_STORE_IF24:.*]], label %[[PRED_STORE_CONTINUE25:.*]]
; DEFAULT: [[PRED_STORE_IF24]]:
-; DEFAULT-NEXT: [[TMP49:%.*]] = add i64 [[INDEX]], 10
+; DEFAULT-NEXT: [[TMP49:%.*]] = add i64 [[TMP9]], 10
; DEFAULT-NEXT: [[TMP50:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP49]]
; DEFAULT-NEXT: [[TMP51:%.*]] = extractelement <16 x i8> [[TMP7]], i32 10
; DEFAULT-NEXT: store i8 [[TMP51]], ptr [[TMP50]], align 1
@@ -309,7 +308,7 @@ define void @tail_predicate_without_optsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 %n)
; DEFAULT-NEXT: [[TMP52:%.*]] = extractelement <16 x i1> [[TMP0]], i32 11
; DEFAULT-NEXT: br i1 [[TMP52]], label %[[PRED_STORE_IF26:.*]], label %[[PRED_STORE_CONTINUE27:.*]]
; DEFAULT: [[PRED_STORE_IF26]]:
-; DEFAULT-NEXT: [[TMP53:%.*]] = add i64 [[INDEX]], 11
+; DEFAULT-NEXT: [[TMP53:%.*]] = add i64 [[TMP9]], 11
; DEFAULT-NEXT: [[TMP54:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP53]]
; DEFAULT-NEXT: [[TMP55:%.*]] = extractelement <16 x i8> [[TMP7]], i32 11
; DEFAULT-NEXT: store i8 [[TMP55]], ptr [[TMP54]], align 1
@@ -318,7 +317,7 @@ define void @tail_predicate_without_optsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 %n)
; DEFAULT-NEXT: [[TMP56:%.*]] = extractelement <16 x i1> [[TMP0]], i32 12
; DEFAULT-NEXT: br i1 [[TMP56]], label %[[PRED_STORE_IF28:.*]], label %[[PRED_STORE_CONTINUE29:.*]]
; DEFAULT: [[PRED_STORE_IF28]]:
-; DEFAULT-NEXT: [[TMP57:%.*]] = add i64 [[INDEX]], 12
+; DEFAULT-NEXT: [[TMP57:%.*]] = add i64 [[TMP9]], 12
; DEFAULT-NEXT: [[TMP58:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP57]]
; DEFAULT-NEXT: [[TMP59:%.*]] = extractelement <16 x i8> [[TMP7]], i32 12
; DEFAULT-NEXT: store i8 [[TMP59]], ptr [[TMP58]], align 1
@@ -327,7 +326,7 @@ define void @tail_predicate_without_optsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 %n)
; DEFAULT-NEXT: [[TMP60:%.*]] = extractelement <16 x i1> [[TMP0]], i32 13
; DEFAULT-NEXT: br i1 [[TMP60]], label %[[PRED_STORE_IF30:.*]], label %[[PRED_STORE_CONTINUE31:.*]]
; DEFAULT: [[PRED_STORE_IF30]]:
-; DEFAULT-NEXT: [[TMP61:%.*]] = add i64 [[INDEX]], 13
+; DEFAULT-NEXT: [[TMP61:%.*]] = add i64 [[TMP9]], 13
; DEFAULT-NEXT: [[TMP62:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP61]]
; DEFAULT-NEXT: [[TMP63:%.*]] = extractelement <16 x i8> [[TMP7]], i32 13
; DEFAULT-NEXT: store i8 [[TMP63]], ptr [[TMP62]], align 1
@@ -336,7 +335,7 @@ define void @tail_predicate_without_optsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 %n)
; DEFAULT-NEXT: [[TMP64:%.*]] = extractelement <16 x i1> [[TMP0]], i32 14
; DEFAULT-NEXT: br i1 [[TMP64]], label %[[PRED_STORE_IF32:.*]], label %[[PRED_STORE_CONTINUE33:.*]]
; DEFAULT: [[PRED_STORE_IF32]]:
-; DEFAULT-NEXT: [[TMP65:%.*]] = add i64 [[INDEX]], 14
+; DEFAULT-NEXT: [[TMP65:%.*]] = add i64 [[TMP9]], 14
; DEFAULT-NEXT: [[TMP66:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP65]]
; DEFAULT-NEXT: [[TMP67:%.*]] = extractelement <16 x i8> [[TMP7]], i32 14
; DEFAULT-NEXT: store i8 [[TMP67]], ptr [[TMP66]], align 1
@@ -345,13 +344,13 @@ define void @tail_predicate_without_optsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 %n)
; DEFAULT-NEXT: [[TMP68:%.*]] = extractelement <16 x i1> [[TMP0]], i32 15
; DEFAULT-NEXT: br i1 [[TMP68]], label %[[PRED_STORE_IF34:.*]], label %[[PRED_STORE_CONTINUE35]]
; DEFAULT: [[PRED_STORE_IF34]]:
-; DEFAULT-NEXT: [[TMP69:%.*]] = add i64 [[INDEX]], 15
+; DEFAULT-NEXT: [[TMP69:%.*]] = add i64 [[TMP9]], 15
; DEFAULT-NEXT: [[TMP70:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP69]]
; DEFAULT-NEXT: [[TMP71:%.*]] = extractelement <16 x i8> [[TMP7]], i32 15
; DEFAULT-NEXT: store i8 [[TMP71]], ptr [[TMP70]], align 1
; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE35]]
; DEFAULT: [[PRED_STORE_CONTINUE35]]:
-; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
+; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP9]], 16
; DEFAULT-NEXT: [[VEC_IND_NEXT]] = add <16 x i8> [[VEC_IND]], splat (i8 16)
; DEFAULT-NEXT: [[VEC_IND_NEXT36]] = add <16 x i8> [[VEC_IND1]], splat (i8 16)
; DEFAULT-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/predicated-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/predicated-costs.ll
index f67a3d9be408a..447435c369869 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/predicated-costs.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/predicated-costs.ll
@@ -239,7 +239,7 @@ define void @srem_sdiv_without_tail_folding(i32 %d.0, i32 %d.1, ptr %dst, i32 %e
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE12:.*]] ]
+; CHECK-NEXT: [[TMP29:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE12:.*]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE12]] ]
; CHECK-NEXT: [[TMP0:%.*]] = add nsw <4 x i32> [[VEC_IND]], splat (i32 -1)
; CHECK-NEXT: [[TMP1:%.*]] = srem <4 x i32> [[TMP0]], [[BROADCAST_SPLAT]]
@@ -288,7 +288,6 @@ define void @srem_sdiv_without_tail_folding(i32 %d.0, i32 %d.1, ptr %dst, i32 %e
; CHECK: [[PRED_STORE_IF]]:
; CHECK-NEXT: [[TMP27:%.*]] = extractelement <4 x i64> [[TMP25]], i32 0
; CHECK-NEXT: [[TMP28:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP27]]
-; CHECK-NEXT: [[TMP29:%.*]] = add i32 [[INDEX]], 0
; CHECK-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 4
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
; CHECK: [[PRED_STORE_CONTINUE]]:
@@ -297,7 +296,7 @@ define void @srem_sdiv_without_tail_folding(i32 %d.0, i32 %d.1, ptr %dst, i32 %e
; CHECK: [[PRED_STORE_IF7]]:
; CHECK-NEXT: [[TMP31:%.*]] = extractelement <4 x i64> [[TMP25]], i32 1
; CHECK-NEXT: [[TMP32:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP31]]
-; CHECK-NEXT: [[TMP33:%.*]] = add i32 [[INDEX]], 1
+; CHECK-NEXT: [[TMP33:%.*]] = add i32 [[TMP29]], 1
; CHECK-NEXT: store i32 [[TMP33]], ptr [[TMP32]], align 4
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE8]]
; CHECK: [[PRED_STORE_CONTINUE8]]:
@@ -306,7 +305,7 @@ define void @srem_sdiv_without_tail_folding(i32 %d.0, i32 %d.1, ptr %dst, i32 %e
; CHECK: [[PRED_STORE_IF9]]:
; CHECK-NEXT: [[TMP35:%.*]] = extractelement <4 x i64> [[TMP25]], i32 2
; CHECK-NEXT: [[TMP36:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP35]]
-; CHECK-NEXT: [[TMP37:%.*]] = add i32 [[INDEX]], 2
+; CHECK-NEXT: [[TMP37:%.*]] = add i32 [[TMP29]], 2
; CHECK-NEXT: store i32 [[TMP37]], ptr [[TMP36]], align 4
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE10]]
; CHECK: [[PRED_STORE_CONTINUE10]]:
@@ -315,11 +314,11 @@ define void @srem_sdiv_without_tail_folding(i32 %d.0, i32 %d.1, ptr %dst, i32 %e
; CHECK: [[PRED_STORE_IF11]]:
; CHECK-NEXT: [[TMP39:%.*]] = extractelement <4 x i64> [[TMP25]], i32 3
; CHECK-NEXT: [[TMP40:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP39]]
-; CHECK-NEXT: [[TMP41:%.*]] = add i32 [[INDEX]], 3
+; CHECK-NEXT: [[TMP41:%.*]] = add i32 [[TMP29]], 3
; CHECK-NEXT: store i32 [[TMP41]], ptr [[TMP40]], align 4
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE12]]
; CHECK: [[PRED_STORE_CONTINUE12]]:
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[TMP29]], 4
; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i32> [[VEC_IND]], splat (i32 4)
; CHECK-NEXT: [[TMP42:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP42]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-multi-block.ll b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-multi-block.ll
index 99c735f777b66..8c34e6815f843 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-multi-block.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-multi-block.ll
@@ -152,9 +152,7 @@ define void @load_store_interleave_group_block_var_cond(ptr noalias %data, ptr %
; VF2IC1-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0
; VF2IC1-NEXT: br i1 [[TMP4]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; VF2IC1: [[PRED_STORE_IF]]:
-; VF2IC1-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0
-; VF2IC1-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[MASKS]], i64 [[TMP5]]
-; VF2IC1-NEXT: store i8 1, ptr [[TMP6]], align 1
+; VF2IC1-NEXT: store i8 1, ptr [[TMP2]], align 1
; VF2IC1-NEXT: br label %[[PRED_STORE_CONTINUE]]
; VF2IC1: [[PRED_STORE_CONTINUE]]:
; VF2IC1-NEXT: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP3]], i32 1
@@ -207,9 +205,7 @@ define void @load_store_interleave_group_block_var_cond(ptr noalias %data, ptr %
; VF2IC2-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP9]], i32 0
; VF2IC2-NEXT: br i1 [[TMP11]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; VF2IC2: [[PRED_STORE_IF]]:
-; VF2IC2-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 0
-; VF2IC2-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[MASKS]], i64 [[TMP12]]
-; VF2IC2-NEXT: store i8 1, ptr [[TMP13]], align 1
+; VF2IC2-NEXT: store i8 1, ptr [[TMP7]], align 1
; VF2IC2-NEXT: br label %[[PRED_STORE_CONTINUE]]
; VF2IC2: [[PRED_STORE_CONTINUE]]:
; VF2IC2-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP9]], i32 1
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-remove-loop-region.ll b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-remove-loop-region.ll
index c26176028626b..2db4b96270c2c 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-remove-loop-region.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-remove-loop-region.ll
@@ -34,12 +34,9 @@ define void @load_store_interleave_group_tc_2(ptr noalias %data) {
; VF4: [[VECTOR_BODY]]:
; VF4-NEXT: br i1 true, label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; VF4: [[PRED_STORE_IF]]:
-; VF4-NEXT: [[TMP3:%.*]] = shl nsw i64 0, 1
-; VF4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP3]]
-; VF4-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP4]], align 8
-; VF4-NEXT: store i64 [[TMP5]], ptr [[TMP4]], align 8
-; VF4-NEXT: [[TMP6:%.*]] = or disjoint i64 [[TMP3]], 1
-; VF4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP6]]
+; VF4-NEXT: [[TMP0:%.*]] = load i64, ptr [[DATA]], align 8
+; VF4-NEXT: store i64 [[TMP0]], ptr [[DATA]], align 8
+; VF4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 1
; VF4-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8
; VF4-NEXT: store i64 [[TMP8]], ptr [[TMP7]], align 8
; VF4-NEXT: br label %[[PRED_STORE_CONTINUE]]
diff --git a/llvm/test/Transforms/LoopVectorize/SystemZ/force-target-instruction-cost.ll b/llvm/test/Transforms/LoopVectorize/SystemZ/force-target-instruction-cost.ll
index 78c71fd3beb89..fab2da95f7f1d 100644
--- a/llvm/test/Transforms/LoopVectorize/SystemZ/force-target-instruction-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/SystemZ/force-target-instruction-cost.ll
@@ -20,9 +20,8 @@ define void @test_scalar_steps_target_instruction_cost(ptr %dst) {
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0
; CHECK-NEXT: br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; CHECK: [[PRED_STORE_IF]]:
-; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 0
-; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[TMP2]]
-; CHECK-NEXT: store i64 [[TMP2]], ptr [[TMP3]], align 8
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[OFFSET_IDX]]
+; CHECK-NEXT: store i64 [[OFFSET_IDX]], ptr [[TMP2]], align 8
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
; CHECK: [[PRED_STORE_CONTINUE]]:
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP0]], i32 1
diff --git a/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll b/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll
index dee6c274585bb..3b895d447c6c1 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll
@@ -191,24 +191,23 @@ define void @PR40816() #1 {
; FORCE: [[VECTOR_PH]]:
; FORCE-NEXT: br label %[[VECTOR_BODY:.*]]
; FORCE: [[VECTOR_BODY]]:
-; FORCE-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE2:.*]] ]
+; FORCE-NEXT: [[TMP0:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE2:.*]] ]
; FORCE-NEXT: [[VEC_IND:%.*]] = phi <2 x i8> [ <i8 0, i8 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE2]] ]
; FORCE-NEXT: [[TMP2:%.*]] = icmp ule <2 x i8> [[VEC_IND]], splat (i8 2)
; FORCE-NEXT: [[TMP3:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0
; FORCE-NEXT: br i1 [[TMP3]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; FORCE: [[PRED_STORE_IF]]:
-; FORCE-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0
; FORCE-NEXT: store i32 [[TMP0]], ptr @b, align 1
; FORCE-NEXT: br label %[[PRED_STORE_CONTINUE]]
; FORCE: [[PRED_STORE_CONTINUE]]:
; FORCE-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP2]], i32 1
; FORCE-NEXT: br i1 [[TMP10]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2]]
; FORCE: [[PRED_STORE_IF1]]:
-; FORCE-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 1
+; FORCE-NEXT: [[TMP1:%.*]] = add i32 [[TMP0]], 1
; FORCE-NEXT: store i32 [[TMP1]], ptr @b, align 1
; FORCE-NEXT: br label %[[PRED_STORE_CONTINUE2]]
; FORCE: [[PRED_STORE_CONTINUE2]]:
-; FORCE-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
+; FORCE-NEXT: [[INDEX_NEXT]] = add nuw i32 [[TMP0]], 2
; FORCE-NEXT: [[VEC_IND_NEXT]] = add <2 x i8> [[VEC_IND]], splat (i8 2)
; FORCE-NEXT: [[TMP15:%.*]] = icmp eq i32 [[INDEX_NEXT]], 4
; FORCE-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
diff --git a/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll b/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll
index f65a9d7d45ed8..9c4aff56a62a4 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll
@@ -63,28 +63,27 @@ define void @redundant_or_1(ptr %dst, i1 %c.0, i1 %c.1) {
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP2]], i32 0
; CHECK-NEXT: br i1 [[TMP6]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; CHECK: pred.store.if:
-; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i32 0
-; CHECK-NEXT: store i32 0, ptr [[TMP8]], align 4
+; CHECK-NEXT: store i32 0, ptr [[TMP8:%.*]], align 4
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
; CHECK: pred.store.continue:
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP2]], i32 1
; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]]
; CHECK: pred.store.if3:
-; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 1
+; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 1
; CHECK-NEXT: store i32 0, ptr [[TMP11]], align 4
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]]
; CHECK: pred.store.continue4:
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i1> [[TMP2]], i32 2
; CHECK-NEXT: br i1 [[TMP12]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]]
; CHECK: pred.store.if5:
-; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 2
+; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 2
; CHECK-NEXT: store i32 0, ptr [[TMP14]], align 4
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]]
; CHECK: pred.store.continue6:
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i1> [[TMP2]], i32 3
; CHECK-NEXT: br i1 [[TMP15]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8:%.*]]
; CHECK: pred.store.if7:
-; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 3
+; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 3
; CHECK-NEXT: store i32 0, ptr [[TMP17]], align 4
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE8]]
; CHECK: pred.store.continue8:
@@ -138,28 +137,27 @@ define void @redundant_or_2(ptr %dst, i1 %c.0, i1 %c.1) {
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP3]], i32 0
; CHECK-NEXT: br i1 [[TMP5]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; CHECK: pred.store.if:
-; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i32 0
-; CHECK-NEXT: store i32 0, ptr [[TMP7]], align 4
+; CHECK-NEXT: store i32 0, ptr [[TMP7:%.*]], align 4
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
; CHECK: pred.store.continue:
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP3]], i32 1
; CHECK-NEXT: br i1 [[TMP8]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]]
; CHECK: pred.store.if3:
-; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 1
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 1
; CHECK-NEXT: store i32 0, ptr [[TMP10]], align 4
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]]
; CHECK: pred.store.continue4:
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i1> [[TMP3]], i32 2
; CHECK-NEXT: br i1 [[TMP11]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]]
; CHECK: pred.store.if5:
-; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 2
+; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 2
; CHECK-NEXT: store i32 0, ptr [[TMP13]], align 4
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]]
; CHECK: pred.store.continue6:
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP3]], i32 3
; CHECK-NEXT: br i1 [[TMP14]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8:%.*]]
; CHECK: pred.store.if7:
-; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 3
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 3
; CHECK-NEXT: store i32 0, ptr [[TMP16]], align 4
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE8]]
; CHECK: pred.store.continue8:
@@ -214,28 +212,27 @@ define void @redundant_and_1(ptr %dst, i1 %c.0, i1 %c.1) {
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP6]], i32 0
; CHECK-NEXT: br i1 [[TMP7]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; CHECK: pred.store.if:
-; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i32 0
-; CHECK-NEXT: store i32 0, ptr [[TMP9]], align 4
+; CHECK-NEXT: store i32 0, ptr [[TMP9:%.*]], align 4
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
; CHECK: pred.store.continue:
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP6]], i32 1
; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]]
; CHECK: pred.store.if3:
-; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 1
+; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 1
; CHECK-NEXT: store i32 0, ptr [[TMP12]], align 4
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]]
; CHECK: pred.store.continue4:
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i1> [[TMP6]], i32 2
; CHECK-NEXT: br i1 [[TMP13]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]]
; CHECK: pred.store.if5:
-; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 2
+; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 2
; CHECK-NEXT: store i32 0, ptr [[TMP15]], align 4
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]]
; CHECK: pred.store.continue6:
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i1> [[TMP6]], i32 3
; CHECK-NEXT: br i1 [[TMP16]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8:%.*]]
; CHECK: pred.store.if7:
-; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 3
+; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 3
; CHECK-NEXT: store i32 0, ptr [[TMP18]], align 4
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE8]]
; CHECK: pred.store.continue8:
diff --git a/llvm/test/Transforms/LoopVectorize/X86/cost-conditional-branches.ll b/llvm/test/Transforms/LoopVectorize/X86/cost-conditional-branches.ll
index 651e2ad5e74da..5eed334de2619 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/cost-conditional-branches.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/cost-conditional-branches.ll
@@ -414,7 +414,7 @@ define void @cost_duplicate_recipe_for_sinking(ptr %A, i64 %N) #2 {
; CHECK-NEXT: [[TMP8:%.*]] = shl nsw i64 [[TMP4]], 2
; CHECK-NEXT: [[TMP9:%.*]] = shl nsw i64 [[TMP5]], 2
; CHECK-NEXT: [[TMP10:%.*]] = shl nsw i64 [[TMP6]], 2
-; CHECK-NEXT: [[TMP11:%.*]] = getelementptr nusw double, ptr [[A:%.*]], i64 [[TMP7]]
+; CHECK-NEXT: [[TMP11:%.*]] = getelementptr double, ptr [[A:%.*]], i64 [[TMP7]]
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr nusw double, ptr [[A]], i64 [[TMP8]]
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr nusw double, ptr [[A]], i64 [[TMP9]]
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr nusw double, ptr [[A]], i64 [[TMP10]]
@@ -433,10 +433,7 @@ define void @cost_duplicate_recipe_for_sinking(ptr %A, i64 %N) #2 {
; CHECK-NEXT: [[TMP23:%.*]] = extractelement <4 x i1> [[TMP19]], i32 0
; CHECK-NEXT: br i1 [[TMP23]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; CHECK: pred.store.if:
-; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT: [[TMP24:%.*]] = shl nsw i64 [[TMP3]], 2
-; CHECK-NEXT: [[TMP25:%.*]] = getelementptr double, ptr [[A]], i64 [[TMP24]]
-; CHECK-NEXT: store double 0.000000e+00, ptr [[TMP25]], align 8
+; CHECK-NEXT: store double 0.000000e+00, ptr [[TMP11]], align 8
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
; CHECK: pred.store.continue:
; CHECK-NEXT: [[TMP26:%.*]] = extractelement <4 x i1> [[TMP19]], i32 1
@@ -592,17 +589,14 @@ define void @cost_duplicate_recipe_for_sinking(ptr %A, i64 %N) #2 {
; CHECK: vec.epilog.vector.body:
; CHECK-NEXT: [[INDEX40:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL1]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT51:%.*]], [[PRED_STORE_CONTINUE50:%.*]] ]
; CHECK-NEXT: [[TMP87:%.*]] = shl nsw i64 [[INDEX40]], 2
-; CHECK-NEXT: [[TMP89:%.*]] = getelementptr nusw double, ptr [[A]], i64 [[TMP87]]
+; CHECK-NEXT: [[TMP89:%.*]] = getelementptr double, ptr [[A]], i64 [[TMP87]]
; CHECK-NEXT: [[WIDE_VEC41:%.*]] = load <16 x double>, ptr [[TMP89]], align 8
; CHECK-NEXT: [[STRIDED_VEC42:%.*]] = shufflevector <16 x double> [[WIDE_VEC41]], <16 x double> poison, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
; CHECK-NEXT: [[TMP90:%.*]] = fcmp oeq <4 x double> [[STRIDED_VEC42]], zeroinitializer
; CHECK-NEXT: [[TMP91:%.*]] = extractelement <4 x i1> [[TMP90]], i32 0
; CHECK-NEXT: br i1 [[TMP91]], label [[PRED_STORE_IF43:%.*]], label [[PRED_STORE_CONTINUE44:%.*]]
; CHECK: pred.store.if43:
-; CHECK-NEXT: [[TMP86:%.*]] = add i64 [[INDEX40]], 0
-; CHECK-NEXT: [[TMP92:%.*]] = shl nsw i64 [[TMP86]], 2
-; CHECK-NEXT: [[TMP93:%.*]] = getelementptr double, ptr [[A]], i64 [[TMP92]]
-; CHECK-NEXT: store double 0.000000e+00, ptr [[TMP93]], align 8
+; CHECK-NEXT: store double 0.000000e+00, ptr [[TMP89]], align 8
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE44]]
; CHECK: pred.store.continue44:
; CHECK-NEXT: [[TMP94:%.*]] = extractelement <4 x i1> [[TMP90]], i32 1
diff --git a/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll b/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll
index aff665dad85a7..de2b1d1eedb81 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll
@@ -152,8 +152,6 @@ define void @foo2(ptr noalias %in, ptr noalias %out, ptr noalias %trigger, ptr n
; FVW2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 16>, [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE3]] ]
; FVW2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX1]], 16
; FVW2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 16
-; FVW2-NEXT: [[TMP18:%.*]] = insertelement <2 x i64> poison, i64 [[OFFSET_IDX]], i32 0
-; FVW2-NEXT: [[TMP19:%.*]] = insertelement <2 x i64> [[TMP18]], i64 [[TMP1]], i32 1
; FVW2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER:%.*]], i64 [[OFFSET_IDX]]
; FVW2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[TMP1]]
; FVW2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4
@@ -268,8 +266,6 @@ define void @foo3(ptr noalias %in, ptr noalias %out, ptr noalias %trigger) {
; FVW2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 16>, [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE2]] ]
; FVW2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 16
; FVW2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 16
-; FVW2-NEXT: [[TMP18:%.*]] = insertelement <2 x i64> poison, i64 [[OFFSET_IDX]], i32 0
-; FVW2-NEXT: [[TMP19:%.*]] = insertelement <2 x i64> [[TMP18]], i64 [[TMP1]], i32 1
; FVW2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER:%.*]], i64 [[OFFSET_IDX]]
; FVW2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[TMP1]]
; FVW2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4
@@ -371,8 +367,6 @@ define void @foo2_addrspace(ptr addrspace(1) noalias %in, ptr addrspace(1) noali
; FVW2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 16>, [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE3]] ]
; FVW2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX1]], 16
; FVW2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 16
-; FVW2-NEXT: [[TMP18:%.*]] = insertelement <2 x i64> poison, i64 [[OFFSET_IDX]], i32 0
-; FVW2-NEXT: [[TMP19:%.*]] = insertelement <2 x i64> [[TMP18]], i64 [[TMP1]], i32 1
; FVW2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER:%.*]], i64 [[OFFSET_IDX]]
; FVW2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[TMP1]]
; FVW2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4
@@ -473,8 +467,6 @@ define void @foo2_addrspace2(ptr addrspace(1) noalias %in, ptr addrspace(0) noal
; FVW2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 16>, [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE3]] ]
; FVW2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX1]], 16
; FVW2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 16
-; FVW2-NEXT: [[TMP18:%.*]] = insertelement <2 x i64> poison, i64 [[OFFSET_IDX]], i32 0
-; FVW2-NEXT: [[TMP19:%.*]] = insertelement <2 x i64> [[TMP18]], i64 [[TMP1]], i32 1
; FVW2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER:%.*]], i64 [[OFFSET_IDX]]
; FVW2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[TMP1]]
; FVW2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4
@@ -575,8 +567,6 @@ define void @foo2_addrspace3(ptr addrspace(0) noalias %in, ptr addrspace(1) noal
; FVW2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 16>, [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE3]] ]
; FVW2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX1]], 16
; FVW2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 16
-; FVW2-NEXT: [[TMP18:%.*]] = insertelement <2 x i64> poison, i64 [[OFFSET_IDX]], i32 0
-; FVW2-NEXT: [[TMP19:%.*]] = insertelement <2 x i64> [[TMP18]], i64 [[TMP1]], i32 1
; FVW2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER:%.*]], i64 [[OFFSET_IDX]]
; FVW2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[TMP1]]
; FVW2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4
diff --git a/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll b/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll
index aed00abdbc3fd..a903cbd55304b 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll
@@ -584,8 +584,7 @@ define void @wide_iv_trunc(ptr %dst, i64 %N) {
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[TMP2]], i32 0
; CHECK-NEXT: br i1 [[TMP3]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; CHECK: pred.store.if:
-; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[TMP1]], 0
-; CHECK-NEXT: store i32 [[TMP4]], ptr [[DST]], align 4
+; CHECK-NEXT: store i32 [[TMP1]], ptr [[DST]], align 4
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
; CHECK: pred.store.continue:
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP2]], i32 1
diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr109581-unused-blend.ll b/llvm/test/Transforms/LoopVectorize/X86/pr109581-unused-blend.ll
index cbf62865cdce5..d36260a7a5492 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/pr109581-unused-blend.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/pr109581-unused-blend.ll
@@ -28,23 +28,23 @@ define i32 @unused_blend_after_unrolling(ptr %p, i32 %a, i1 %c.1, i16 %x, i16 %y
; CHECK: [[PRED_SDIV_IF]]:
; CHECK-NEXT: br label %[[PRED_SDIV_CONTINUE]]
; CHECK: [[PRED_SDIV_CONTINUE]]:
-; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1
-; CHECK-NEXT: br i1 [[TMP3]], label %[[PRED_SDIV_IF4:.*]], label %[[PRED_SDIV_CONTINUE5:.*]]
+; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0
+; CHECK-NEXT: br i1 [[TMP6]], label %[[PRED_SDIV_IF4:.*]], label %[[PRED_SDIV_CONTINUE5:.*]]
; CHECK: [[PRED_SDIV_IF4]]:
; CHECK-NEXT: br label %[[PRED_SDIV_CONTINUE5]]
; CHECK: [[PRED_SDIV_CONTINUE5]]:
-; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP0]], i32 2
+; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0
; CHECK-NEXT: br i1 [[TMP4]], label %[[PRED_SDIV_IF6:.*]], label %[[PRED_SDIV_CONTINUE7:.*]]
; CHECK: [[PRED_SDIV_IF6]]:
; CHECK-NEXT: br label %[[PRED_SDIV_CONTINUE7]]
; CHECK: [[PRED_SDIV_CONTINUE7]]:
-; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3
+; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0
; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_SDIV_IF8:.*]], label %[[PRED_SDIV_CONTINUE9:.*]]
; CHECK: [[PRED_SDIV_IF8]]:
; CHECK-NEXT: br label %[[PRED_SDIV_CONTINUE9]]
; CHECK: [[PRED_SDIV_CONTINUE9]]:
-; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0
-; CHECK-NEXT: br i1 [[TMP6]], label %[[PRED_SDIV_IF10:.*]], label %[[PRED_SDIV_CONTINUE11:.*]]
+; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0
+; CHECK-NEXT: br i1 [[TMP7]], label %[[PRED_SDIV_IF10:.*]], label %[[PRED_SDIV_CONTINUE11:.*]]
; CHECK: [[PRED_SDIV_IF10]]:
; CHECK-NEXT: br label %[[PRED_SDIV_CONTINUE11]]
; CHECK: [[PRED_SDIV_CONTINUE11]]:
diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr55096-scalarize-add.ll b/llvm/test/Transforms/LoopVectorize/X86/pr55096-scalarize-add.ll
index fb5a5f9c068b9..7e08ed43a2432 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/pr55096-scalarize-add.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/pr55096-scalarize-add.ll
@@ -18,8 +18,7 @@ define void @test_pr55096(i64 %c, ptr %p) {
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0
; CHECK-NEXT: br i1 [[TMP3]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; CHECK: pred.store.if:
-; CHECK-NEXT: [[TMP4:%.*]] = add i16 [[OFFSET_IDX]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = add i16 [[TMP4]], 2008
+; CHECK-NEXT: [[TMP5:%.*]] = add i16 [[OFFSET_IDX]], 2008
; CHECK-NEXT: [[TMP6:%.*]] = udiv i16 4943, [[TMP5]]
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, ptr [[P]], i16 [[TMP6]]
; CHECK-NEXT: store i16 0, ptr [[TMP7]], align 2
diff --git a/llvm/test/Transforms/LoopVectorize/X86/small-size.ll b/llvm/test/Transforms/LoopVectorize/X86/small-size.ll
index 93cf59c019d5f..7ab83f04c188a 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/small-size.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/small-size.ll
@@ -182,7 +182,7 @@ define void @example2(i32 %n, i32 %x) optsize {
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE24]]
; CHECK: pred.store.continue23:
; CHECK-NEXT: [[TMP42:%.*]] = extractelement <4 x i1> [[TMP18]], i64 3
-; CHECK-NEXT: br i1 [[TMP42]], label [[PRED_STORE_IF25:%.*]], label [[PRED_STORE_CONTINUE26]]
+; CHECK-NEXT: br i1 [[TMP42]], label [[PRED_STORE_IF24:%.*]], label [[PRED_STORE_CONTINUE26]]
; CHECK: pred.store.if24:
; CHECK-NEXT: [[TMP43:%.*]] = add i64 [[OFFSET_IDX]], 3
; CHECK-NEXT: [[TMP44:%.*]] = getelementptr inbounds i32, ptr @b, i64 [[TMP43]]
diff --git a/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll b/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll
index b5d42a8f71430..1a960facbd7e1 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll
@@ -1399,8 +1399,7 @@ define dso_local void @masked_strided2(ptr noalias nocapture readonly %p, ptr no
; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE44]]
; DISABLED_MASKED_STRIDED: pred.store.continue44:
; DISABLED_MASKED_STRIDED-NEXT: [[TMP132:%.*]] = sub <8 x i8> zeroinitializer, [[TMP99]]
-; DISABLED_MASKED_STRIDED-NEXT: [[TMP133:%.*]] = extractelement <8 x i1> [[TMP0]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP133]], label [[PRED_STORE_IF45:%.*]], label [[PRED_STORE_CONTINUE46:%.*]]
+; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP100]], label [[PRED_STORE_IF44:%.*]], label [[PRED_STORE_CONTINUE46:%.*]]
; DISABLED_MASKED_STRIDED: pred.store.if45:
; DISABLED_MASKED_STRIDED-NEXT: [[TMP134:%.*]] = extractelement <8 x i32> [[TMP50]], i64 0
; DISABLED_MASKED_STRIDED-NEXT: [[TMP135:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP134]]
@@ -1408,8 +1407,7 @@ define dso_local void @masked_strided2(ptr noalias nocapture readonly %p, ptr no
; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP136]], ptr [[TMP135]], align 1
; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE46]]
; DISABLED_MASKED_STRIDED: pred.store.continue46:
-; DISABLED_MASKED_STRIDED-NEXT: [[TMP137:%.*]] = extractelement <8 x i1> [[TMP0]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP137]], label [[PRED_STORE_IF47:%.*]], label [[PRED_STORE_CONTINUE48:%.*]]
+; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP104]], label [[PRED_STORE_IF46:%.*]], label [[PRED_STORE_CONTINUE48:%.*]]
; DISABLED_MASKED_STRIDED: pred.store.if47:
; DISABLED_MASKED_STRIDED-NEXT: [[TMP138:%.*]] = extractelement <8 x i32> [[TMP50]], i64 1
; DISABLED_MASKED_STRIDED-NEXT: [[TMP139:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP138]]
@@ -1417,58 +1415,52 @@ define dso_local void @masked_strided2(ptr noalias nocapture readonly %p, ptr no
; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP140]], ptr [[TMP139]], align 1
; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE48]]
; DISABLED_MASKED_STRIDED: pred.store.continue48:
-; DISABLED_MASKED_STRIDED-NEXT: [[TMP141:%.*]] = extractelement <8 x i1> [[TMP0]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP141]], label [[PRED_STORE_IF49:%.*]], label [[PRED_STORE_CONTINUE50:%.*]]
+; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP108]], label [[PRED_STORE_IF48:%.*]], label [[PRED_STORE_CONTINUE50:%.*]]
; DISABLED_MASKED_STRIDED: pred.store.if49:
; DISABLED_MASKED_STRIDED-NEXT: [[TMP142:%.*]] = extractelement <8 x i32> [[TMP50]], i64 2
; DISABLED_MASKED_STRIDED-NEXT: [[TMP143:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP142]]
-; DISABLED_MASKED_STRIDED-NEXT: [[TMP144:%.*]] = extractelement <8 x i8> [[TMP132]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP144]], ptr [[TMP143]], align 1
+; DISABLED_MASKED_STRIDED-NEXT: [[TMP141:%.*]] = extractelement <8 x i8> [[TMP132]], i64 2
+; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP141]], ptr [[TMP143]], align 1
; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE50]]
; DISABLED_MASKED_STRIDED: pred.store.continue50:
-; DISABLED_MASKED_STRIDED-NEXT: [[TMP145:%.*]] = extractelement <8 x i1> [[TMP0]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP145]], label [[PRED_STORE_IF51:%.*]], label [[PRED_STORE_CONTINUE52:%.*]]
+; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP112]], label [[PRED_STORE_IF50:%.*]], label [[PRED_STORE_CONTINUE52:%.*]]
; DISABLED_MASKED_STRIDED: pred.store.if51:
; DISABLED_MASKED_STRIDED-NEXT: [[TMP146:%.*]] = extractelement <8 x i32> [[TMP50]], i64 3
; DISABLED_MASKED_STRIDED-NEXT: [[TMP147:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP146]]
-; DISABLED_MASKED_STRIDED-NEXT: [[TMP148:%.*]] = extractelement <8 x i8> [[TMP132]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP148]], ptr [[TMP147]], align 1
+; DISABLED_MASKED_STRIDED-NEXT: [[TMP144:%.*]] = extractelement <8 x i8> [[TMP132]], i64 3
+; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP144]], ptr [[TMP147]], align 1
; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE52]]
; DISABLED_MASKED_STRIDED: pred.store.continue52:
-; DISABLED_MASKED_STRIDED-NEXT: [[TMP149:%.*]] = extractelement <8 x i1> [[TMP0]], i64 4
-; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP149]], label [[PRED_STORE_IF53:%.*]], label [[PRED_STORE_CONTINUE54:%.*]]
+; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP116]], label [[PRED_STORE_IF52:%.*]], label [[PRED_STORE_CONTINUE54:%.*]]
; DISABLED_MASKED_STRIDED: pred.store.if53:
; DISABLED_MASKED_STRIDED-NEXT: [[TMP150:%.*]] = extractelement <8 x i32> [[TMP50]], i64 4
; DISABLED_MASKED_STRIDED-NEXT: [[TMP151:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP150]]
-; DISABLED_MASKED_STRIDED-NEXT: [[TMP152:%.*]] = extractelement <8 x i8> [[TMP132]], i64 4
-; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP152]], ptr [[TMP151]], align 1
+; DISABLED_MASKED_STRIDED-NEXT: [[TMP148:%.*]] = extractelement <8 x i8> [[TMP132]], i64 4
+; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP148]], ptr [[TMP151]], align 1
; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE54]]
; DISABLED_MASKED_STRIDED: pred.store.continue54:
-; DISABLED_MASKED_STRIDED-NEXT: [[TMP153:%.*]] = extractelement <8 x i1> [[TMP0]], i64 5
-; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP153]], label [[PRED_STORE_IF55:%.*]], label [[PRED_STORE_CONTINUE56:%.*]]
+; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP120]], label [[PRED_STORE_IF54:%.*]], label [[PRED_STORE_CONTINUE56:%.*]]
; DISABLED_MASKED_STRIDED: pred.store.if55:
; DISABLED_MASKED_STRIDED-NEXT: [[TMP154:%.*]] = extractelement <8 x i32> [[TMP50]], i64 5
; DISABLED_MASKED_STRIDED-NEXT: [[TMP155:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP154]]
-; DISABLED_MASKED_STRIDED-NEXT: [[TMP156:%.*]] = extractelement <8 x i8> [[TMP132]], i64 5
-; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP156]], ptr [[TMP155]], align 1
+; DISABLED_MASKED_STRIDED-NEXT: [[TMP152:%.*]] = extractelement <8 x i8> [[TMP132]], i64 5
+; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP152]], ptr [[TMP155]], align 1
; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE56]]
; DISABLED_MASKED_STRIDED: pred.store.continue56:
-; DISABLED_MASKED_STRIDED-NEXT: [[TMP157:%.*]] = extractelement <8 x i1> [[TMP0]], i64 6
-; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP157]], label [[PRED_STORE_IF57:%.*]], label [[PRED_STORE_CONTINUE58:%.*]]
+; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP124]], label [[PRED_STORE_IF56:%.*]], label [[PRED_STORE_CONTINUE58:%.*]]
; DISABLED_MASKED_STRIDED: pred.store.if57:
; DISABLED_MASKED_STRIDED-NEXT: [[TMP158:%.*]] = extractelement <8 x i32> [[TMP50]], i64 6
; DISABLED_MASKED_STRIDED-NEXT: [[TMP159:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP158]]
-; DISABLED_MASKED_STRIDED-NEXT: [[TMP160:%.*]] = extractelement <8 x i8> [[TMP132]], i64 6
-; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP160]], ptr [[TMP159]], align 1
+; DISABLED_MASKED_STRIDED-NEXT: [[TMP153:%.*]] = extractelement <8 x i8> [[TMP132]], i64 6
+; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP153]], ptr [[TMP159]], align 1
; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE58]]
; DISABLED_MASKED_STRIDED: pred.store.continue58:
-; DISABLED_MASKED_STRIDED-NEXT: [[TMP161:%.*]] = extractelement <8 x i1> [[TMP0]], i64 7
-; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP161]], label [[PRED_STORE_IF59:%.*]], label [[PRED_STORE_CONTINUE60]]
+; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP128]], label [[PRED_STORE_IF58:%.*]], label [[PRED_STORE_CONTINUE60]]
; DISABLED_MASKED_STRIDED: pred.store.if59:
; DISABLED_MASKED_STRIDED-NEXT: [[TMP162:%.*]] = extractelement <8 x i32> [[TMP50]], i64 7
; DISABLED_MASKED_STRIDED-NEXT: [[TMP163:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP162]]
-; DISABLED_MASKED_STRIDED-NEXT: [[TMP164:%.*]] = extractelement <8 x i8> [[TMP132]], i64 7
-; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP164]], ptr [[TMP163]], align 1
+; DISABLED_MASKED_STRIDED-NEXT: [[TMP156:%.*]] = extractelement <8 x i8> [[TMP132]], i64 7
+; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP156]], ptr [[TMP163]], align 1
; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE60]]
; DISABLED_MASKED_STRIDED: pred.store.continue60:
; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
@@ -1804,8 +1796,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p
; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE44]]
; DISABLED_MASKED_STRIDED: pred.store.continue44:
; DISABLED_MASKED_STRIDED-NEXT: [[TMP132:%.*]] = sub <8 x i8> zeroinitializer, [[TMP99]]
-; DISABLED_MASKED_STRIDED-NEXT: [[TMP133:%.*]] = extractelement <8 x i1> [[TMP0]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP133]], label [[PRED_STORE_IF45:%.*]], label [[PRED_STORE_CONTINUE46:%.*]]
+; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP100]], label [[PRED_STORE_IF44:%.*]], label [[PRED_STORE_CONTINUE46:%.*]]
; DISABLED_MASKED_STRIDED: pred.store.if45:
; DISABLED_MASKED_STRIDED-NEXT: [[TMP134:%.*]] = extractelement <8 x i32> [[TMP50]], i64 0
; DISABLED_MASKED_STRIDED-NEXT: [[TMP135:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP134]]
@@ -1813,8 +1804,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p
; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP136]], ptr [[TMP135]], align 1
; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE46]]
; DISABLED_MASKED_STRIDED: pred.store.continue46:
-; DISABLED_MASKED_STRIDED-NEXT: [[TMP137:%.*]] = extractelement <8 x i1> [[TMP0]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP137]], label [[PRED_STORE_IF47:%.*]], label [[PRED_STORE_CONTINUE48:%.*]]
+; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP104]], label [[PRED_STORE_IF46:%.*]], label [[PRED_STORE_CONTINUE48:%.*]]
; DISABLED_MASKED_STRIDED: pred.store.if47:
; DISABLED_MASKED_STRIDED-NEXT: [[TMP138:%.*]] = extractelement <8 x i32> [[TMP50]], i64 1
; DISABLED_MASKED_STRIDED-NEXT: [[TMP139:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP138]]
@@ -1822,58 +1812,52 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p
; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP140]], ptr [[TMP139]], align 1
; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE48]]
; DISABLED_MASKED_STRIDED: pred.store.continue48:
-; DISABLED_MASKED_STRIDED-NEXT: [[TMP141:%.*]] = extractelement <8 x i1> [[TMP0]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP141]], label [[PRED_STORE_IF49:%.*]], label [[PRED_STORE_CONTINUE50:%.*]]
+; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP108]], label [[PRED_STORE_IF48:%.*]], label [[PRED_STORE_CONTINUE50:%.*]]
; DISABLED_MASKED_STRIDED: pred.store.if49:
; DISABLED_MASKED_STRIDED-NEXT: [[TMP142:%.*]] = extractelement <8 x i32> [[TMP50]], i64 2
; DISABLED_MASKED_STRIDED-NEXT: [[TMP143:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP142]]
-; DISABLED_MASKED_STRIDED-NEXT: [[TMP144:%.*]] = extractelement <8 x i8> [[TMP132]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP144]], ptr [[TMP143]], align 1
+; DISABLED_MASKED_STRIDED-NEXT: [[TMP141:%.*]] = extractelement <8 x i8> [[TMP132]], i64 2
+; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP141]], ptr [[TMP143]], align 1
; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE50]]
; DISABLED_MASKED_STRIDED: pred.store.continue50:
-; DISABLED_MASKED_STRIDED-NEXT: [[TMP145:%.*]] = extractelement <8 x i1> [[TMP0]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP145]], label [[PRED_STORE_IF51:%.*]], label [[PRED_STORE_CONTINUE52:%.*]]
+; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP112]], label [[PRED_STORE_IF50:%.*]], label [[PRED_STORE_CONTINUE52:%.*]]
; DISABLED_MASKED_STRIDED: pred.store.if51:
; DISABLED_MASKED_STRIDED-NEXT: [[TMP146:%.*]] = extractelement <8 x i32> [[TMP50]], i64 3
; DISABLED_MASKED_STRIDED-NEXT: [[TMP147:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP146]]
-; DISABLED_MASKED_STRIDED-NEXT: [[TMP148:%.*]] = extractelement <8 x i8> [[TMP132]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP148]], ptr [[TMP147]], align 1
+; DISABLED_MASKED_STRIDED-NEXT: [[TMP144:%.*]] = extractelement <8 x i8> [[TMP132]], i64 3
+; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP144]], ptr [[TMP147]], align 1
; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE52]]
; DISABLED_MASKED_STRIDED: pred.store.continue52:
-; DISABLED_MASKED_STRIDED-NEXT: [[TMP149:%.*]] = extractelement <8 x i1> [[TMP0]], i64 4
-; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP149]], label [[PRED_STORE_IF53:%.*]], label [[PRED_STORE_CONTINUE54:%.*]]
+; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP116]], label [[PRED_STORE_IF52:%.*]], label [[PRED_STORE_CONTINUE54:%.*]]
; DISABLED_MASKED_STRIDED: pred.store.if53:
; DISABLED_MASKED_STRIDED-NEXT: [[TMP150:%.*]] = extractelement <8 x i32> [[TMP50]], i64 4
; DISABLED_MASKED_STRIDED-NEXT: [[TMP151:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP150]]
-; DISABLED_MASKED_STRIDED-NEXT: [[TMP152:%.*]] = extractelement <8 x i8> [[TMP132]], i64 4
-; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP152]], ptr [[TMP151]], align 1
+; DISABLED_MASKED_STRIDED-NEXT: [[TMP148:%.*]] = extractelement <8 x i8> [[TMP132]], i64 4
+; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP148]], ptr [[TMP151]], align 1
; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE54]]
; DISABLED_MASKED_STRIDED: pred.store.continue54:
-; DISABLED_MASKED_STRIDED-NEXT: [[TMP153:%.*]] = extractelement <8 x i1> [[TMP0]], i64 5
-; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP153]], label [[PRED_STORE_IF55:%.*]], label [[PRED_STORE_CONTINUE56:%.*]]
+; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP120]], label [[PRED_STORE_IF54:%.*]], label [[PRED_STORE_CONTINUE56:%.*]]
; DISABLED_MASKED_STRIDED: pred.store.if55:
; DISABLED_MASKED_STRIDED-NEXT: [[TMP154:%.*]] = extractelement <8 x i32> [[TMP50]], i64 5
; DISABLED_MASKED_STRIDED-NEXT: [[TMP155:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP154]]
-; DISABLED_MASKED_STRIDED-NEXT: [[TMP156:%.*]] = extractelement <8 x i8> [[TMP132]], i64 5
-; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP156]], ptr [[TMP155]], align 1
+; DISABLED_MASKED_STRIDED-NEXT: [[TMP152:%.*]] = extractelement <8 x i8> [[TMP132]], i64 5
+; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP152]], ptr [[TMP155]], align 1
; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE56]]
; DISABLED_MASKED_STRIDED: pred.store.continue56:
-; DISABLED_MASKED_STRIDED-NEXT: [[TMP157:%.*]] = extractelement <8 x i1> [[TMP0]], i64 6
-; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP157]], label [[PRED_STORE_IF57:%.*]], label [[PRED_STORE_CONTINUE58:%.*]]
+; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP124]], label [[PRED_STORE_IF56:%.*]], label [[PRED_STORE_CONTINUE58:%.*]]
; DISABLED_MASKED_STRIDED: pred.store.if57:
; DISABLED_MASKED_STRIDED-NEXT: [[TMP158:%.*]] = extractelement <8 x i32> [[TMP50]], i64 6
; DISABLED_MASKED_STRIDED-NEXT: [[TMP159:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP158]]
-; DISABLED_MASKED_STRIDED-NEXT: [[TMP160:%.*]] = extractelement <8 x i8> [[TMP132]], i64 6
-; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP160]], ptr [[TMP159]], align 1
+; DISABLED_MASKED_STRIDED-NEXT: [[TMP153:%.*]] = extractelement <8 x i8> [[TMP132]], i64 6
+; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP153]], ptr [[TMP159]], align 1
; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE58]]
; DISABLED_MASKED_STRIDED: pred.store.continue58:
-; DISABLED_MASKED_STRIDED-NEXT: [[TMP161:%.*]] = extractelement <8 x i1> [[TMP0]], i64 7
-; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP161]], label [[PRED_STORE_IF59:%.*]], label [[PRED_STORE_CONTINUE60]]
+; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP128]], label [[PRED_STORE_IF58:%.*]], label [[PRED_STORE_CONTINUE60]]
; DISABLED_MASKED_STRIDED: pred.store.if59:
; DISABLED_MASKED_STRIDED-NEXT: [[TMP162:%.*]] = extractelement <8 x i32> [[TMP50]], i64 7
; DISABLED_MASKED_STRIDED-NEXT: [[TMP163:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP162]]
-; DISABLED_MASKED_STRIDED-NEXT: [[TMP164:%.*]] = extractelement <8 x i8> [[TMP132]], i64 7
-; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP164]], ptr [[TMP163]], align 1
+; DISABLED_MASKED_STRIDED-NEXT: [[TMP156:%.*]] = extractelement <8 x i8> [[TMP132]], i64 7
+; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP156]], ptr [[TMP163]], align 1
; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE60]]
; DISABLED_MASKED_STRIDED: pred.store.continue60:
; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
@@ -2129,8 +2113,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p
; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE44]]
; ENABLED_MASKED_STRIDED: pred.store.continue44:
; ENABLED_MASKED_STRIDED-NEXT: [[TMP132:%.*]] = sub <8 x i8> zeroinitializer, [[TMP99]]
-; ENABLED_MASKED_STRIDED-NEXT: [[TMP133:%.*]] = extractelement <8 x i1> [[TMP0]], i64 0
-; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP133]], label [[PRED_STORE_IF45:%.*]], label [[PRED_STORE_CONTINUE46:%.*]]
+; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP100]], label [[PRED_STORE_IF44:%.*]], label [[PRED_STORE_CONTINUE46:%.*]]
; ENABLED_MASKED_STRIDED: pred.store.if45:
; ENABLED_MASKED_STRIDED-NEXT: [[TMP134:%.*]] = extractelement <8 x i32> [[TMP50]], i64 0
; ENABLED_MASKED_STRIDED-NEXT: [[TMP135:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP134]]
@@ -2138,8 +2121,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p
; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP136]], ptr [[TMP135]], align 1
; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE46]]
; ENABLED_MASKED_STRIDED: pred.store.continue46:
-; ENABLED_MASKED_STRIDED-NEXT: [[TMP137:%.*]] = extractelement <8 x i1> [[TMP0]], i64 1
-; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP137]], label [[PRED_STORE_IF47:%.*]], label [[PRED_STORE_CONTINUE48:%.*]]
+; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP104]], label [[PRED_STORE_IF46:%.*]], label [[PRED_STORE_CONTINUE48:%.*]]
; ENABLED_MASKED_STRIDED: pred.store.if47:
; ENABLED_MASKED_STRIDED-NEXT: [[TMP138:%.*]] = extractelement <8 x i32> [[TMP50]], i64 1
; ENABLED_MASKED_STRIDED-NEXT: [[TMP139:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP138]]
@@ -2147,58 +2129,52 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p
; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP140]], ptr [[TMP139]], align 1
; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE48]]
; ENABLED_MASKED_STRIDED: pred.store.continue48:
-; ENABLED_MASKED_STRIDED-NEXT: [[TMP141:%.*]] = extractelement <8 x i1> [[TMP0]], i64 2
-; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP141]], label [[PRED_STORE_IF49:%.*]], label [[PRED_STORE_CONTINUE50:%.*]]
+; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP108]], label [[PRED_STORE_IF48:%.*]], label [[PRED_STORE_CONTINUE50:%.*]]
; ENABLED_MASKED_STRIDED: pred.store.if49:
; ENABLED_MASKED_STRIDED-NEXT: [[TMP142:%.*]] = extractelement <8 x i32> [[TMP50]], i64 2
; ENABLED_MASKED_STRIDED-NEXT: [[TMP143:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP142]]
-; ENABLED_MASKED_STRIDED-NEXT: [[TMP144:%.*]] = extractelement <8 x i8> [[TMP132]], i64 2
-; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP144]], ptr [[TMP143]], align 1
+; ENABLED_MASKED_STRIDED-NEXT: [[TMP141:%.*]] = extractelement <8 x i8> [[TMP132]], i64 2
+; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP141]], ptr [[TMP143]], align 1
; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE50]]
; ENABLED_MASKED_STRIDED: pred.store.continue50:
-; ENABLED_MASKED_STRIDED-NEXT: [[TMP145:%.*]] = extractelement <8 x i1> [[TMP0]], i64 3
-; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP145]], label [[PRED_STORE_IF51:%.*]], label [[PRED_STORE_CONTINUE52:%.*]]
+; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP112]], label [[PRED_STORE_IF50:%.*]], label [[PRED_STORE_CONTINUE52:%.*]]
; ENABLED_MASKED_STRIDED: pred.store.if51:
; ENABLED_MASKED_STRIDED-NEXT: [[TMP146:%.*]] = extractelement <8 x i32> [[TMP50]], i64 3
; ENABLED_MASKED_STRIDED-NEXT: [[TMP147:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP146]]
-; ENABLED_MASKED_STRIDED-NEXT: [[TMP148:%.*]] = extractelement <8 x i8> [[TMP132]], i64 3
-; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP148]], ptr [[TMP147]], align 1
+; ENABLED_MASKED_STRIDED-NEXT: [[TMP144:%.*]] = extractelement <8 x i8> [[TMP132]], i64 3
+; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP144]], ptr [[TMP147]], align 1
; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE52]]
; ENABLED_MASKED_STRIDED: pred.store.continue52:
-; ENABLED_MASKED_STRIDED-NEXT: [[TMP149:%.*]] = extractelement <8 x i1> [[TMP0]], i64 4
-; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP149]], label [[PRED_STORE_IF53:%.*]], label [[PRED_STORE_CONTINUE54:%.*]]
+; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP116]], label [[PRED_STORE_IF52:%.*]], label [[PRED_STORE_CONTINUE54:%.*]]
; ENABLED_MASKED_STRIDED: pred.store.if53:
; ENABLED_MASKED_STRIDED-NEXT: [[TMP150:%.*]] = extractelement <8 x i32> [[TMP50]], i64 4
; ENABLED_MASKED_STRIDED-NEXT: [[TMP151:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP150]]
-; ENABLED_MASKED_STRIDED-NEXT: [[TMP152:%.*]] = extractelement <8 x i8> [[TMP132]], i64 4
-; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP152]], ptr [[TMP151]], align 1
+; ENABLED_MASKED_STRIDED-NEXT: [[TMP148:%.*]] = extractelement <8 x i8> [[TMP132]], i64 4
+; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP148]], ptr [[TMP151]], align 1
; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE54]]
; ENABLED_MASKED_STRIDED: pred.store.continue54:
-; ENABLED_MASKED_STRIDED-NEXT: [[TMP153:%.*]] = extractelement <8 x i1> [[TMP0]], i64 5
-; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP153]], label [[PRED_STORE_IF55:%.*]], label [[PRED_STORE_CONTINUE56:%.*]]
+; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP120]], label [[PRED_STORE_IF54:%.*]], label [[PRED_STORE_CONTINUE56:%.*]]
; ENABLED_MASKED_STRIDED: pred.store.if55:
; ENABLED_MASKED_STRIDED-NEXT: [[TMP154:%.*]] = extractelement <8 x i32> [[TMP50]], i64 5
; ENABLED_MASKED_STRIDED-NEXT: [[TMP155:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP154]]
-; ENABLED_MASKED_STRIDED-NEXT: [[TMP156:%.*]] = extractelement <8 x i8> [[TMP132]], i64 5
-; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP156]], ptr [[TMP155]], align 1
+; ENABLED_MASKED_STRIDED-NEXT: [[TMP152:%.*]] = extractelement <8 x i8> [[TMP132]], i64 5
+; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP152]], ptr [[TMP155]], align 1
; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE56]]
; ENABLED_MASKED_STRIDED: pred.store.continue56:
-; ENABLED_MASKED_STRIDED-NEXT: [[TMP157:%.*]] = extractelement <8 x i1> [[TMP0]], i64 6
-; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP157]], label [[PRED_STORE_IF57:%.*]], label [[PRED_STORE_CONTINUE58:%.*]]
+; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP124]], label [[PRED_STORE_IF56:%.*]], label [[PRED_STORE_CONTINUE58:%.*]]
; ENABLED_MASKED_STRIDED: pred.store.if57:
; ENABLED_MASKED_STRIDED-NEXT: [[TMP158:%.*]] = extractelement <8 x i32> [[TMP50]], i64 6
; ENABLED_MASKED_STRIDED-NEXT: [[TMP159:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP158]]
-; ENABLED_MASKED_STRIDED-NEXT: [[TMP160:%.*]] = extractelement <8 x i8> [[TMP132]], i64 6
-; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP160]], ptr [[TMP159]], align 1
+; ENABLED_MASKED_STRIDED-NEXT: [[TMP153:%.*]] = extractelement <8 x i8> [[TMP132]], i64 6
+; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP153]], ptr [[TMP159]], align 1
; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE58]]
; ENABLED_MASKED_STRIDED: pred.store.continue58:
-; ENABLED_MASKED_STRIDED-NEXT: [[TMP161:%.*]] = extractelement <8 x i1> [[TMP0]], i64 7
-; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP161]], label [[PRED_STORE_IF59:%.*]], label [[PRED_STORE_CONTINUE60]]
+; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP128]], label [[PRED_STORE_IF58:%.*]], label [[PRED_STORE_CONTINUE60]]
; ENABLED_MASKED_STRIDED: pred.store.if59:
; ENABLED_MASKED_STRIDED-NEXT: [[TMP162:%.*]] = extractelement <8 x i32> [[TMP50]], i64 7
; ENABLED_MASKED_STRIDED-NEXT: [[TMP163:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP162]]
-; ENABLED_MASKED_STRIDED-NEXT: [[TMP164:%.*]] = extractelement <8 x i8> [[TMP132]], i64 7
-; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP164]], ptr [[TMP163]], align 1
+; ENABLED_MASKED_STRIDED-NEXT: [[TMP156:%.*]] = extractelement <8 x i8> [[TMP132]], i64 7
+; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP156]], ptr [[TMP163]], align 1
; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE60]]
; ENABLED_MASKED_STRIDED: pred.store.continue60:
; ENABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
@@ -2524,8 +2500,7 @@ define dso_local void @masked_strided2_unknown_tc(ptr noalias nocapture readonly
; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE46]]
; DISABLED_MASKED_STRIDED: pred.store.continue46:
; DISABLED_MASKED_STRIDED-NEXT: [[TMP134:%.*]] = sub <8 x i8> zeroinitializer, [[TMP101]]
-; DISABLED_MASKED_STRIDED-NEXT: [[TMP135:%.*]] = extractelement <8 x i1> [[TMP3]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP135]], label [[PRED_STORE_IF47:%.*]], label [[PRED_STORE_CONTINUE48:%.*]]
+; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP102]], label [[PRED_STORE_IF46:%.*]], label [[PRED_STORE_CONTINUE48:%.*]]
; DISABLED_MASKED_STRIDED: pred.store.if47:
; DISABLED_MASKED_STRIDED-NEXT: [[TMP136:%.*]] = extractelement <8 x i32> [[TMP52]], i64 0
; DISABLED_MASKED_STRIDED-NEXT: [[TMP137:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP136]]
@@ -2533,8 +2508,7 @@ define dso_local void @masked_strided2_unknown_tc(ptr noalias nocapture readonly
; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP138]], ptr [[TMP137]], align 1
; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE48]]
; DISABLED_MASKED_STRIDED: pred.store.continue48:
-; DISABLED_MASKED_STRIDED-NEXT: [[TMP139:%.*]] = extractelement <8 x i1> [[TMP3]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP139]], label [[PRED_STORE_IF49:%.*]], label [[PRED_STORE_CONTINUE50:%.*]]
+; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP106]], label [[PRED_STORE_IF48:%.*]], label [[PRED_STORE_CONTINUE50:%.*]]
; DISABLED_MASKED_STRIDED: pred.store.if49:
; DISABLED_MASKED_STRIDED-NEXT: [[TMP140:%.*]] = extractelement <8 x i32> [[TMP52]], i64 1
; DISABLED_MASKED_STRIDED-NEXT: [[TMP141:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP140]]
@@ -2542,58 +2516,52 @@ define dso_local void @masked_strided2_unknown_tc(ptr noalias nocapture readonly
; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP142]], ptr [[TMP141]], align 1
; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE50]]
; DISABLED_MASKED_STRIDED: pred.store.continue50:
-; DISABLED_MASKED_STRIDED-NEXT: [[TMP143:%.*]] = extractelement <8 x i1> [[TMP3]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP143]], label [[PRED_STORE_IF51:%.*]], label [[PRED_STORE_CONTINUE52:%.*]]
+; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP110]], label [[PRED_STORE_IF50:%.*]], label [[PRED_STORE_CONTINUE52:%.*]]
; DISABLED_MASKED_STRIDED: pred.store.if51:
; DISABLED_MASKED_STRIDED-NEXT: [[TMP144:%.*]] = extractelement <8 x i32> [[TMP52]], i64 2
; DISABLED_MASKED_STRIDED-NEXT: [[TMP145:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP144]]
-; DISABLED_MASKED_STRIDED-NEXT: [[TMP146:%.*]] = extractelement <8 x i8> [[TMP134]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP146]], ptr [[TMP145]], align 1
+; DISABLED_MASKED_STRIDED-NEXT: [[TMP143:%.*]] = extractelement <8 x i8> [[TMP134]], i64 2
+; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP143]], ptr [[TMP145]], align 1
; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE52]]
; DISABLED_MASKED_STRIDED: pred.store.continue52:
-; DISABLED_MASKED_STRIDED-NEXT: [[TMP147:%.*]] = extractelement <8 x i1> [[TMP3]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP147]], label [[PRED_STORE_IF53:%.*]], label [[PRED_STORE_CONTINUE54:%.*]]
+; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP114]], label [[PRED_STORE_IF52:%.*]], label [[PRED_STORE_CONTINUE54:%.*]]
; DISABLED_MASKED_STRIDED: pred.store.if53:
; DISABLED_MASKED_STRIDED-NEXT: [[TMP148:%.*]] = extractelement <8 x i32> [[TMP52]], i64 3
; DISABLED_MASKED_STRIDED-NEXT: [[TMP149:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP148]]
-; DISABLED_MASKED_STRIDED-NEXT: [[TMP150:%.*]] = extractelement <8 x i8> [[TMP134]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP150]], ptr [[TMP149]], align 1
+; DISABLED_MASKED_STRIDED-NEXT: [[TMP146:%.*]] = extractelement <8 x i8> [[TMP134]], i64 3
+; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP146]], ptr [[TMP149]], align 1
; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE54]]
; DISABLED_MASKED_STRIDED: pred.store.continue54:
-; DISABLED_MASKED_STRIDED-NEXT: [[TMP151:%.*]] = extractelement <8 x i1> [[TMP3]], i64 4
-; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP151]], label [[PRED_STORE_IF55:%.*]], label [[PRED_STORE_CONTINUE56:%.*]]
+; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP118]], label [[PRED_STORE_IF54:%.*]], label [[PRED_STORE_CONTINUE56:%.*]]
; DISABLED_MASKED_STRIDED: pred.store.if55:
; DISABLED_MASKED_STRIDED-NEXT: [[TMP152:%.*]] = extractelement <8 x i32> [[TMP52]], i64 4
; DISABLED_MASKED_STRIDED-NEXT: [[TMP153:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP152]]
-; DISABLED_MASKED_STRIDED-NEXT: [[TMP154:%.*]] = extractelement <8 x i8> [[TMP134]], i64 4
-; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP154]], ptr [[TMP153]], align 1
+; DISABLED_MASKED_STRIDED-NEXT: [[TMP150:%.*]] = extractelement <8 x i8> [[TMP134]], i64 4
+; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP150]], ptr [[TMP153]], align 1
; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE56]]
; DISABLED_MASKED_STRIDED: pred.store.continue56:
-; DISABLED_MASKED_STRIDED-NEXT: [[TMP155:%.*]] = extractelement <8 x i1> [[TMP3]], i64 5
-; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP155]], label [[PRED_STORE_IF57:%.*]], label [[PRED_STORE_CONTINUE58:%.*]]
+; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP122]], label [[PRED_STORE_IF56:%.*]], label [[PRED_STORE_CONTINUE58:%.*]]
; DISABLED_MASKED_STRIDED: pred.store.if57:
; DISABLED_MASKED_STRIDED-NEXT: [[TMP156:%.*]] = extractelement <8 x i32> [[TMP52]], i64 5
; DISABLED_MASKED_STRIDED-NEXT: [[TMP157:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP156]]
-; DISABLED_MASKED_STRIDED-NEXT: [[TMP158:%.*]] = extractelement <8 x i8> [[TMP134]], i64 5
-; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP158]], ptr [[TMP157]], align 1
+; DISABLED_MASKED_STRIDED-NEXT: [[TMP154:%.*]] = extractelement <8 x i8> [[TMP134]], i64 5
+; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP154]], ptr [[TMP157]], align 1
; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE58]]
; DISABLED_MASKED_STRIDED: pred.store.continue58:
-; DISABLED_MASKED_STRIDED-NEXT: [[TMP159:%.*]] = extractelement <8 x i1> [[TMP3]], i64 6
-; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP159]], label [[PRED_STORE_IF59:%.*]], label [[PRED_STORE_CONTINUE60:%.*]]
+; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP126]], label [[PRED_STORE_IF58:%.*]], label [[PRED_STORE_CONTINUE60:%.*]]
; DISABLED_MASKED_STRIDED: pred.store.if59:
; DISABLED_MASKED_STRIDED-NEXT: [[TMP160:%.*]] = extractelement <8 x i32> [[TMP52]], i64 6
; DISABLED_MASKED_STRIDED-NEXT: [[TMP161:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP160]]
-; DISABLED_MASKED_STRIDED-NEXT: [[TMP162:%.*]] = extractelement <8 x i8> [[TMP134]], i64 6
-; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP162]], ptr [[TMP161]], align 1
+; DISABLED_MASKED_STRIDED-NEXT: [[TMP155:%.*]] = extractelement <8 x i8> [[TMP134]], i64 6
+; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP155]], ptr [[TMP161]], align 1
; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE60]]
; DISABLED_MASKED_STRIDED: pred.store.continue60:
-; DISABLED_MASKED_STRIDED-NEXT: [[TMP163:%.*]] = extractelement <8 x i1> [[TMP3]], i64 7
-; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP163]], label [[PRED_STORE_IF61:%.*]], label [[PRED_STORE_CONTINUE62]]
+; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP130]], label [[PRED_STORE_IF60:%.*]], label [[PRED_STORE_CONTINUE62]]
; DISABLED_MASKED_STRIDED: pred.store.if61:
; DISABLED_MASKED_STRIDED-NEXT: [[TMP164:%.*]] = extractelement <8 x i32> [[TMP52]], i64 7
; DISABLED_MASKED_STRIDED-NEXT: [[TMP165:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP164]]
-; DISABLED_MASKED_STRIDED-NEXT: [[TMP166:%.*]] = extractelement <8 x i8> [[TMP134]], i64 7
-; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP166]], ptr [[TMP165]], align 1
+; DISABLED_MASKED_STRIDED-NEXT: [[TMP158:%.*]] = extractelement <8 x i8> [[TMP134]], i64 7
+; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP158]], ptr [[TMP165]], align 1
; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE62]]
; DISABLED_MASKED_STRIDED: pred.store.continue62:
; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
@@ -2958,8 +2926,7 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(ptr noalias noca
; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE44]]
; DISABLED_MASKED_STRIDED: pred.store.continue44:
; DISABLED_MASKED_STRIDED-NEXT: [[TMP132:%.*]] = sub <8 x i8> zeroinitializer, [[TMP99]]
-; DISABLED_MASKED_STRIDED-NEXT: [[TMP133:%.*]] = extractelement <8 x i1> [[TMP0]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP133]], label [[PRED_STORE_IF45:%.*]], label [[PRED_STORE_CONTINUE46:%.*]]
+; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP100]], label [[PRED_STORE_IF44:%.*]], label [[PRED_STORE_CONTINUE46:%.*]]
; DISABLED_MASKED_STRIDED: pred.store.if45:
; DISABLED_MASKED_STRIDED-NEXT: [[TMP134:%.*]] = extractelement <8 x i32> [[TMP50]], i64 0
; DISABLED_MASKED_STRIDED-NEXT: [[TMP135:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP134]]
@@ -2967,8 +2934,7 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(ptr noalias noca
; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP136]], ptr [[TMP135]], align 1
; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE46]]
; DISABLED_MASKED_STRIDED: pred.store.continue46:
-; DISABLED_MASKED_STRIDED-NEXT: [[TMP137:%.*]] = extractelement <8 x i1> [[TMP0]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP137]], label [[PRED_STORE_IF47:%.*]], label [[PRED_STORE_CONTINUE48:%.*]]
+; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP104]], label [[PRED_STORE_IF46:%.*]], label [[PRED_STORE_CONTINUE48:%.*]]
; DISABLED_MASKED_STRIDED: pred.store.if47:
; DISABLED_MASKED_STRIDED-NEXT: [[TMP138:%.*]] = extractelement <8 x i32> [[TMP50]], i64 1
; DISABLED_MASKED_STRIDED-NEXT: [[TMP139:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP138]]
@@ -2976,58 +2942,52 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(ptr noalias noca
; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP140]], ptr [[TMP139]], align 1
; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE48]]
; DISABLED_MASKED_STRIDED: pred.store.continue48:
-; DISABLED_MASKED_STRIDED-NEXT: [[TMP141:%.*]] = extractelement <8 x i1> [[TMP0]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP141]], label [[PRED_STORE_IF49:%.*]], label [[PRED_STORE_CONTINUE50:%.*]]
+; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP108]], label [[PRED_STORE_IF48:%.*]], label [[PRED_STORE_CONTINUE50:%.*]]
; DISABLED_MASKED_STRIDED: pred.store.if49:
; DISABLED_MASKED_STRIDED-NEXT: [[TMP142:%.*]] = extractelement <8 x i32> [[TMP50]], i64 2
; DISABLED_MASKED_STRIDED-NEXT: [[TMP143:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP142]]
-; DISABLED_MASKED_STRIDED-NEXT: [[TMP144:%.*]] = extractelement <8 x i8> [[TMP132]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP144]], ptr [[TMP143]], align 1
+; DISABLED_MASKED_STRIDED-NEXT: [[TMP141:%.*]] = extractelement <8 x i8> [[TMP132]], i64 2
+; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP141]], ptr [[TMP143]], align 1
; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE50]]
; DISABLED_MASKED_STRIDED: pred.store.continue50:
-; DISABLED_MASKED_STRIDED-NEXT: [[TMP145:%.*]] = extractelement <8 x i1> [[TMP0]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP145]], label [[PRED_STORE_IF51:%.*]], label [[PRED_STORE_CONTINUE52:%.*]]
+; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP112]], label [[PRED_STORE_IF50:%.*]], label [[PRED_STORE_CONTINUE52:%.*]]
; DISABLED_MASKED_STRIDED: pred.store.if51:
; DISABLED_MASKED_STRIDED-NEXT: [[TMP146:%.*]] = extractelement <8 x i32> [[TMP50]], i64 3
; DISABLED_MASKED_STRIDED-NEXT: [[TMP147:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP146]]
-; DISABLED_MASKED_STRIDED-NEXT: [[TMP148:%.*]] = extractelement <8 x i8> [[TMP132]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP148]], ptr [[TMP147]], align 1
+; DISABLED_MASKED_STRIDED-NEXT: [[TMP144:%.*]] = extractelement <8 x i8> [[TMP132]], i64 3
+; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP144]], ptr [[TMP147]], align 1
; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE52]]
; DISABLED_MASKED_STRIDED: pred.store.continue52:
-; DISABLED_MASKED_STRIDED-NEXT: [[TMP149:%.*]] = extractelement <8 x i1> [[TMP0]], i64 4
-; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP149]], label [[PRED_STORE_IF53:%.*]], label [[PRED_STORE_CONTINUE54:%.*]]
+; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP116]], label [[PRED_STORE_IF52:%.*]], label [[PRED_STORE_CONTINUE54:%.*]]
; DISABLED_MASKED_STRIDED: pred.store.if53:
; DISABLED_MASKED_STRIDED-NEXT: [[TMP150:%.*]] = extractelement <8 x i32> [[TMP50]], i64 4
; DISABLED_MASKED_STRIDED-NEXT: [[TMP151:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP150]]
-; DISABLED_MASKED_STRIDED-NEXT: [[TMP152:%.*]] = extractelement <8 x i8> [[TMP132]], i64 4
-; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP152]], ptr [[TMP151]], align 1
+; DISABLED_MASKED_STRIDED-NEXT: [[TMP148:%.*]] = extractelement <8 x i8> [[TMP132]], i64 4
+; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP148]], ptr [[TMP151]], align 1
; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE54]]
; DISABLED_MASKED_STRIDED: pred.store.continue54:
-; DISABLED_MASKED_STRIDED-NEXT: [[TMP153:%.*]] = extractelement <8 x i1> [[TMP0]], i64 5
-; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP153]], label [[PRED_STORE_IF55:%.*]], label [[PRED_STORE_CONTINUE56:%.*]]
+; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP120]], label [[PRED_STORE_IF54:%.*]], label [[PRED_STORE_CONTINUE56:%.*]]
; DISABLED_MASKED_STRIDED: pred.store.if55:
; DISABLED_MASKED_STRIDED-NEXT: [[TMP154:%.*]] = extractelement <8 x i32> [[TMP50]], i64 5
; DISABLED_MASKED_STRIDED-NEXT: [[TMP155:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP154]]
-; DISABLED_MASKED_STRIDED-NEXT: [[TMP156:%.*]] = extractelement <8 x i8> [[TMP132]], i64 5
-; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP156]], ptr [[TMP155]], align 1
+; DISABLED_MASKED_STRIDED-NEXT: [[TMP152:%.*]] = extractelement <8 x i8> [[TMP132]], i64 5
+; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP152]], ptr [[TMP155]], align 1
; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE56]]
; DISABLED_MASKED_STRIDED: pred.store.continue56:
-; DISABLED_MASKED_STRIDED-NEXT: [[TMP157:%.*]] = extractelement <8 x i1> [[TMP0]], i64 6
-; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP157]], label [[PRED_STORE_IF57:%.*]], label [[PRED_STORE_CONTINUE58:%.*]]
+; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP124]], label [[PRED_STORE_IF56:%.*]], label [[PRED_STORE_CONTINUE58:%.*]]
; DISABLED_MASKED_STRIDED: pred.store.if57:
; DISABLED_MASKED_STRIDED-NEXT: [[TMP158:%.*]] = extractelement <8 x i32> [[TMP50]], i64 6
; DISABLED_MASKED_STRIDED-NEXT: [[TMP159:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP158]]
-; DISABLED_MASKED_STRIDED-NEXT: [[TMP160:%.*]] = extractelement <8 x i8> [[TMP132]], i64 6
-; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP160]], ptr [[TMP159]], align 1
+; DISABLED_MASKED_STRIDED-NEXT: [[TMP153:%.*]] = extractelement <8 x i8> [[TMP132]], i64 6
+; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP153]], ptr [[TMP159]], align 1
; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE58]]
; DISABLED_MASKED_STRIDED: pred.store.continue58:
-; DISABLED_MASKED_STRIDED-NEXT: [[TMP161:%.*]] = extractelement <8 x i1> [[TMP0]], i64 7
-; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP161]], label [[PRED_STORE_IF59:%.*]], label [[PRED_STORE_CONTINUE60]]
+; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP128]], label [[PRED_STORE_IF58:%.*]], label [[PRED_STORE_CONTINUE60]]
; DISABLED_MASKED_STRIDED: pred.store.if59:
; DISABLED_MASKED_STRIDED-NEXT: [[TMP162:%.*]] = extractelement <8 x i32> [[TMP50]], i64 7
; DISABLED_MASKED_STRIDED-NEXT: [[TMP163:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP162]]
-; DISABLED_MASKED_STRIDED-NEXT: [[TMP164:%.*]] = extractelement <8 x i8> [[TMP132]], i64 7
-; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP164]], ptr [[TMP163]], align 1
+; DISABLED_MASKED_STRIDED-NEXT: [[TMP156:%.*]] = extractelement <8 x i8> [[TMP132]], i64 7
+; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP156]], ptr [[TMP163]], align 1
; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE60]]
; DISABLED_MASKED_STRIDED: pred.store.continue60:
; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
diff --git a/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-store-accesses-with-gaps.ll b/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-store-accesses-with-gaps.ll
index cfc588a7f5d89..69a35d43183f3 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-store-accesses-with-gaps.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-store-accesses-with-gaps.ll
@@ -181,8 +181,7 @@ define dso_local void @test2(ptr noalias nocapture %points, i32 %numPoints, ptr
; DISABLED_MASKED_STRIDED-NEXT: [[TMP19:%.*]] = getelementptr inbounds i16, ptr [[Y:%.*]], i64 [[INDEX]]
; DISABLED_MASKED_STRIDED-NEXT: [[WIDE_MASKED_LOAD7:%.*]] = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr align 2 [[TMP19]], <4 x i1> [[TMP0]], <4 x i16> poison)
; DISABLED_MASKED_STRIDED-NEXT: [[TMP20:%.*]] = or disjoint <4 x i64> [[TMP2]], splat (i64 1)
-; DISABLED_MASKED_STRIDED-NEXT: [[TMP21:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP21]], label [[PRED_STORE_IF8:%.*]], label [[PRED_STORE_CONTINUE9:%.*]]
+; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP3]], label [[PRED_STORE_IF8:%.*]], label [[PRED_STORE_CONTINUE9:%.*]]
; DISABLED_MASKED_STRIDED: pred.store.if8:
; DISABLED_MASKED_STRIDED-NEXT: [[TMP22:%.*]] = extractelement <4 x i64> [[TMP20]], i64 0
; DISABLED_MASKED_STRIDED-NEXT: [[TMP23:%.*]] = getelementptr inbounds i16, ptr [[POINTS]], i64 [[TMP22]]
@@ -190,8 +189,7 @@ define dso_local void @test2(ptr noalias nocapture %points, i32 %numPoints, ptr
; DISABLED_MASKED_STRIDED-NEXT: store i16 [[TMP24]], ptr [[TMP23]], align 2
; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE9]]
; DISABLED_MASKED_STRIDED: pred.store.continue9:
-; DISABLED_MASKED_STRIDED-NEXT: [[TMP25:%.*]] = extractelement <4 x i1> [[TMP0]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP25]], label [[PRED_STORE_IF10:%.*]], label [[PRED_STORE_CONTINUE11:%.*]]
+; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP7]], label [[PRED_STORE_IF10:%.*]], label [[PRED_STORE_CONTINUE11:%.*]]
; DISABLED_MASKED_STRIDED: pred.store.if10:
; DISABLED_MASKED_STRIDED-NEXT: [[TMP26:%.*]] = extractelement <4 x i64> [[TMP20]], i64 1
; DISABLED_MASKED_STRIDED-NEXT: [[TMP27:%.*]] = getelementptr inbounds i16, ptr [[POINTS]], i64 [[TMP26]]
@@ -199,8 +197,7 @@ define dso_local void @test2(ptr noalias nocapture %points, i32 %numPoints, ptr
; DISABLED_MASKED_STRIDED-NEXT: store i16 [[TMP28]], ptr [[TMP27]], align 2
; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE11]]
; DISABLED_MASKED_STRIDED: pred.store.continue11:
-; DISABLED_MASKED_STRIDED-NEXT: [[TMP29:%.*]] = extractelement <4 x i1> [[TMP0]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP29]], label [[PRED_STORE_IF12:%.*]], label [[PRED_STORE_CONTINUE13:%.*]]
+; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP11]], label [[PRED_STORE_IF12:%.*]], label [[PRED_STORE_CONTINUE13:%.*]]
; DISABLED_MASKED_STRIDED: pred.store.if12:
; DISABLED_MASKED_STRIDED-NEXT: [[TMP30:%.*]] = extractelement <4 x i64> [[TMP20]], i64 2
; DISABLED_MASKED_STRIDED-NEXT: [[TMP31:%.*]] = getelementptr inbounds i16, ptr [[POINTS]], i64 [[TMP30]]
@@ -208,8 +205,7 @@ define dso_local void @test2(ptr noalias nocapture %points, i32 %numPoints, ptr
; DISABLED_MASKED_STRIDED-NEXT: store i16 [[TMP32]], ptr [[TMP31]], align 2
; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE13]]
; DISABLED_MASKED_STRIDED: pred.store.continue13:
-; DISABLED_MASKED_STRIDED-NEXT: [[TMP33:%.*]] = extractelement <4 x i1> [[TMP0]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP33]], label [[PRED_STORE_IF14:%.*]], label [[PRED_STORE_CONTINUE15]]
+; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP15]], label [[PRED_STORE_IF14:%.*]], label [[PRED_STORE_CONTINUE15]]
; DISABLED_MASKED_STRIDED: pred.store.if14:
; DISABLED_MASKED_STRIDED-NEXT: [[TMP34:%.*]] = extractelement <4 x i64> [[TMP20]], i64 3
; DISABLED_MASKED_STRIDED-NEXT: [[TMP35:%.*]] = getelementptr inbounds i16, ptr [[POINTS]], i64 [[TMP34]]
diff --git a/llvm/test/Transforms/LoopVectorize/as_cast.ll b/llvm/test/Transforms/LoopVectorize/as_cast.ll
index 67aacefebd555..2437bcfde08f9 100644
--- a/llvm/test/Transforms/LoopVectorize/as_cast.ll
+++ b/llvm/test/Transforms/LoopVectorize/as_cast.ll
@@ -22,7 +22,7 @@ loop:
; CHECK: [[ID2:%.*]] = add i64 %{{.*}}, 1
; CHECK: [[AS2:%.*]] = addrspacecast ptr addrspace(1) %in to ptr
; CHECK: [[GEP2:%.*]] = getelementptr inbounds i64, ptr [[AS2]], i64 [[ID2]]
-; CHECK: store i64 [[ID2]], ptr %9, align 4
+; CHECK: store i64 [[ID2]], ptr [[GEP2]], align 4
%cmp = icmp eq i64 %next, 7
br i1 %cmp, label %exit, label %loop
diff --git a/llvm/test/Transforms/LoopVectorize/cast-induction.ll b/llvm/test/Transforms/LoopVectorize/cast-induction.ll
index 0b58bb65e2bc5..a70d218cc8ce3 100644
--- a/llvm/test/Transforms/LoopVectorize/cast-induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/cast-induction.ll
@@ -119,9 +119,8 @@ define void @cast_induction_tail_folding(ptr %A) {
; IC2-LABEL: @cast_induction_tail_folding(
; IC2: [[INDEX:%.+]] = phi i32 [ 0, %vector.ph ]
-; IC2-NEXT: [[INDEX0:%.+]] = add i32 [[INDEX]], 0
; IC2-NEXT: [[INDEX1:%.+]] = add i32 [[INDEX]], 1
-; IC2-NEXT: = icmp ule i32 [[INDEX0]], 2
+; IC2-NEXT: = icmp ule i32 [[INDEX]], 2
; IC2-NEXT: = icmp ule i32 [[INDEX1]], 2
;
entry:
diff --git a/llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll b/llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll
index 4acc7403d6a37..f76b38bfda1c7 100644
--- a/llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll
+++ b/llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll
@@ -549,10 +549,6 @@ define void @predicated_store(ptr %p, i32 %x, i64 %n) {
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP1]], i32 0
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP12]], i32 0
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP3]], i32 0
-; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP0]], i32 0
-; CHECK-NEXT: [[TMP19:%.*]] = insertelement <4 x ptr> [[TMP13]], ptr [[TMP2]], i32 1
-; CHECK-NEXT: [[TMP25:%.*]] = insertelement <4 x ptr> [[TMP19]], ptr [[TMP4]], i32 2
-; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x ptr> [[TMP25]], ptr [[TMP6]], i32 3
; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP0]], align 8
; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP2]], align 8
; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP4]], align 8
@@ -634,10 +630,8 @@ define void @predicated_store(ptr %p, i32 %x, i64 %n) {
; INTER-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP3]], i32 0
; INTER-NEXT: br i1 [[TMP4]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; INTER: [[PRED_STORE_IF]]:
-; INTER-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 0
-; INTER-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP7]], i32 0
; INTER-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[STRIDED_VEC]], i32 0
-; INTER-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 8
+; INTER-NEXT: store i32 [[TMP6]], ptr [[TMP2]], align 8
; INTER-NEXT: br label %[[PRED_STORE_CONTINUE]]
; INTER: [[PRED_STORE_CONTINUE]]:
; INTER-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP3]], i32 1
diff --git a/llvm/test/Transforms/LoopVectorize/cse-casts.ll b/llvm/test/Transforms/LoopVectorize/cse-casts.ll
index b450942e3966b..06e4d775d2232 100644
--- a/llvm/test/Transforms/LoopVectorize/cse-casts.ll
+++ b/llvm/test/Transforms/LoopVectorize/cse-casts.ll
@@ -131,9 +131,8 @@ define void @preserve_flags_narrowing_extends_and_truncs(ptr noalias %A, ptr noa
; CHECK-NEXT: [[TMP33:%.*]] = zext <4 x i8> [[TMP31]] to <4 x i64>
; CHECK-NEXT: br i1 true, label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; CHECK: [[PRED_STORE_IF]]:
-; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds i8, ptr [[C]], i64 0
; CHECK-NEXT: [[TMP35:%.*]] = extractelement <4 x i64> [[TMP32]], i32 0
-; CHECK-NEXT: store i64 [[TMP35]], ptr [[TMP34]], align 4
+; CHECK-NEXT: store i64 [[TMP35]], ptr [[C]], align 4
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
; CHECK: [[PRED_STORE_CONTINUE]]:
; CHECK-NEXT: br i1 true, label %[[PRED_STORE_IF15:.*]], label %[[PRED_STORE_CONTINUE16:.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/debugloc.ll b/llvm/test/Transforms/LoopVectorize/debugloc.ll
index 4b364133dfefe..15295ad5f0bd0 100644
--- a/llvm/test/Transforms/LoopVectorize/debugloc.ll
+++ b/llvm/test/Transforms/LoopVectorize/debugloc.ll
@@ -66,8 +66,7 @@ define i32 @test_debug_loc_on_branch_in_loop(ptr noalias %src, ptr noalias %dst)
; CHECK-NEXT: br i1 [[EXT]], label %pred.store.if, label %pred.store.continue, !dbg [[LOC3]]
; CHECK-EMPTY:
; CHECK-NEXT: pred.store.if:
-; CHECK-NEXT: [[IDX:%.+]] = add i64 %index, 0
-; CHECK-NEXT: [[GEP:%.+]] = getelementptr inbounds i32, ptr %dst, i64 [[IDX]]
+; CHECK-NEXT: [[GEP:%.+]] = getelementptr inbounds i32, ptr %dst, i64 %index
; CHECK-NEXT: store i32 0, ptr [[GEP]], align 4
; CHECK-NEXT: br label %pred.store.continue, !dbg [[LOC3]]
; CHECK-EMPTY:
@@ -105,8 +104,7 @@ define i32 @test_different_debug_loc_on_replicate_recipe(ptr noalias %src, ptr n
; CHECK-NEXT: br i1 [[EXT]], label %pred.store.if, label %pred.store.continue, !dbg [[LOC4]]
; CHECK-EMPTY:
; CHECK-NEXT: pred.store.if:
-; CHECK-NEXT: [[IDX:%.+]] = add i64 %index, 0
-; CHECK-NEXT: [[GEP:%.+]] = getelementptr inbounds i32, ptr %dst, i64 [[IDX]], !dbg [[LOC5:!.+]]
+; CHECK-NEXT: [[GEP:%.+]] = getelementptr inbounds i32, ptr %dst, i64 %index, !dbg [[LOC5:!.+]]
; CHECK-NEXT: store i32 0, ptr [[GEP]], align 4
; CHECK-NEXT: br label %pred.store.continue, !dbg [[LOC4]]
; CHECK-EMPTY:
diff --git a/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-divisible-TC.ll b/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-divisible-TC.ll
index 156c2bdca7b0e..8f7ae20288575 100644
--- a/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-divisible-TC.ll
+++ b/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-divisible-TC.ll
@@ -164,8 +164,7 @@ define dso_local void @cannotProveAlignedTC(ptr noalias nocapture %A, i32 %p, i3
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0
; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; CHECK: pred.store.if:
-; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 0
-; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[TMP2]]
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[INDEX]]
; CHECK-NEXT: store i32 13, ptr [[TMP3]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
; CHECK: pred.store.continue:
diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll
index 82b3ae5eaf5d4..5f101a4944461 100644
--- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll
+++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll
@@ -2941,8 +2941,7 @@ define i32 @sink_into_replication_region_multiple(ptr %x, i32 %y) {
; UNROLL-NO-IC-NEXT: [[TMP48:%.*]] = extractelement <4 x i1> [[TMP10]], i32 0
; UNROLL-NO-IC-NEXT: br i1 [[TMP48]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; UNROLL-NO-IC: pred.store.if:
-; UNROLL-NO-IC-NEXT: [[TMP49:%.*]] = add i32 [[INDEX]], 0
-; UNROLL-NO-IC-NEXT: [[TMP50:%.*]] = getelementptr inbounds i32, ptr [[X:%.*]], i32 [[TMP49]]
+; UNROLL-NO-IC-NEXT: [[TMP50:%.*]] = getelementptr inbounds i32, ptr [[X:%.*]], i32 [[INDEX]]
; UNROLL-NO-IC-NEXT: store i32 [[OFFSET_IDX]], ptr [[TMP50]], align 4
; UNROLL-NO-IC-NEXT: br label [[PRED_STORE_CONTINUE]]
; UNROLL-NO-IC: pred.store.continue:
@@ -3141,8 +3140,7 @@ define i32 @sink_into_replication_region_multiple(ptr %x, i32 %y) {
; SINK-AFTER-NEXT: [[TMP25:%.*]] = extractelement <4 x i1> [[TMP6]], i32 0
; SINK-AFTER-NEXT: br i1 [[TMP25]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; SINK-AFTER: pred.store.if:
-; SINK-AFTER-NEXT: [[TMP26:%.*]] = add i32 [[INDEX]], 0
-; SINK-AFTER-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[X:%.*]], i32 [[TMP26]]
+; SINK-AFTER-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[X:%.*]], i32 [[INDEX]]
; SINK-AFTER-NEXT: store i32 [[OFFSET_IDX]], ptr [[TMP27]], align 4
; SINK-AFTER-NEXT: br label [[PRED_STORE_CONTINUE]]
; SINK-AFTER: pred.store.continue:
diff --git a/llvm/test/Transforms/LoopVectorize/float-induction.ll b/llvm/test/Transforms/LoopVectorize/float-induction.ll
index f56699a45320d..ca83000494e8b 100644
--- a/llvm/test/Transforms/LoopVectorize/float-induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/float-induction.ll
@@ -1322,8 +1322,7 @@ define void @non_primary_iv_float_scalar(ptr %A, i64 %N) {
; VEC4_INTERL1-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[TMP1]], i64 0
; VEC4_INTERL1-NEXT: br i1 [[TMP2]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; VEC4_INTERL1: pred.store.if:
-; VEC4_INTERL1-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]]
-; VEC4_INTERL1-NEXT: store float [[DOTCAST2]], ptr [[TMP3]], align 4
+; VEC4_INTERL1-NEXT: store float [[DOTCAST2]], ptr [[TMP0]], align 4
; VEC4_INTERL1-NEXT: br label [[PRED_STORE_CONTINUE]]
; VEC4_INTERL1: pred.store.continue:
; VEC4_INTERL1-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP1]], i64 1
@@ -1402,8 +1401,7 @@ define void @non_primary_iv_float_scalar(ptr %A, i64 %N) {
; VEC4_INTERL2-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP3]], i64 0
; VEC4_INTERL2-NEXT: br i1 [[TMP5]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; VEC4_INTERL2: pred.store.if:
-; VEC4_INTERL2-NEXT: [[TMP35:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]]
-; VEC4_INTERL2-NEXT: store float [[DOTCAST2]], ptr [[TMP35]], align 4
+; VEC4_INTERL2-NEXT: store float [[DOTCAST2]], ptr [[TMP1]], align 4
; VEC4_INTERL2-NEXT: br label [[PRED_STORE_CONTINUE]]
; VEC4_INTERL2: pred.store.continue:
; VEC4_INTERL2-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP3]], i64 1
@@ -1573,8 +1571,7 @@ define void @non_primary_iv_float_scalar(ptr %A, i64 %N) {
; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP2:%.*]] = extractelement <2 x i1> [[TMP1]], i64 0
; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[TMP2]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; VEC2_INTERL1_PRED_STORE: pred.store.if:
-; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]]
-; VEC2_INTERL1_PRED_STORE-NEXT: store float [[DOTCAST2]], ptr [[TMP3]], align 4
+; VEC2_INTERL1_PRED_STORE-NEXT: store float [[DOTCAST2]], ptr [[TMP0]], align 4
; VEC2_INTERL1_PRED_STORE-NEXT: br label [[PRED_STORE_CONTINUE]]
; VEC2_INTERL1_PRED_STORE: pred.store.continue:
; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP1]], i64 1
diff --git a/llvm/test/Transforms/LoopVectorize/hoist-and-sink-mem-ops-with-invariant-pointers.ll b/llvm/test/Transforms/LoopVectorize/hoist-and-sink-mem-ops-with-invariant-pointers.ll
index 7bbc186dcbbae..0b6f3aa080a71 100644
--- a/llvm/test/Transforms/LoopVectorize/hoist-and-sink-mem-ops-with-invariant-pointers.ll
+++ b/llvm/test/Transforms/LoopVectorize/hoist-and-sink-mem-ops-with-invariant-pointers.ll
@@ -162,8 +162,7 @@ define void @dont_hoist_predicated_load(ptr %dst, ptr %invariant_ptr, ptr %cond_
; CHECK-NEXT: br i1 [[TMP2]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; CHECK: [[PRED_STORE_IF]]:
; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[INVARIANT_PTR]], align 4, !alias.scope [[META14:![0-9]+]]
-; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0
-; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]]
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]]
; CHECK-NEXT: store i32 [[TMP7]], ptr [[TMP9]], align 4, !alias.scope [[META16:![0-9]+]], !noalias [[META18:![0-9]+]]
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
; CHECK: [[PRED_STORE_CONTINUE]]:
diff --git a/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads-with-predicated-stores.ll b/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads-with-predicated-stores.ll
index d95fb869bb1b6..a5b7c3d7f6cde 100644
--- a/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads-with-predicated-stores.ll
+++ b/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads-with-predicated-stores.ll
@@ -25,22 +25,22 @@ define void @test_stores_noalias_via_rt_checks_after_loads(ptr %dst, ptr %src, p
; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP1]], align 4, !alias.scope [[META0:![0-9]+]]
-; CHECK-NEXT: [[TMP7:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11)
+; CHECK-NEXT: [[TMP2:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11)
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[INDEX]]
-; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP5]]
-; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP3]], align 4, !alias.scope [[META3:![0-9]+]]
-; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4, !alias.scope [[META3]]
-; CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x i32> poison, i32 [[TMP9]], i32 0
-; CHECK-NEXT: [[TMP17:%.*]] = insertelement <2 x i32> [[TMP13]], i32 [[TMP16]], i32 1
-; CHECK-NEXT: [[TMP19:%.*]] = sub <2 x i32> [[TMP17]], splat (i32 5)
-; CHECK-NEXT: [[TMP36:%.*]] = add <2 x i32> [[TMP17]], splat (i32 10)
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP5]]
+; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP3]], align 4, !alias.scope [[META3:![0-9]+]]
+; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 4, !alias.scope [[META3]]
+; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x i32> poison, i32 [[TMP16]], i32 0
+; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> [[TMP7]], i32 [[TMP6]], i32 1
+; CHECK-NEXT: [[TMP9:%.*]] = sub <2 x i32> [[TMP8]], splat (i32 5)
+; CHECK-NEXT: [[TMP10:%.*]] = add <2 x i32> [[TMP8]], splat (i32 10)
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]]
-; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP5]]
-; CHECK-NEXT: [[TMP14:%.*]] = select <2 x i1> [[TMP7]], <2 x i32> [[TMP36]], <2 x i32> [[TMP19]]
-; CHECK-NEXT: [[TMP18:%.*]] = extractelement <2 x i32> [[TMP14]], i32 0
-; CHECK-NEXT: store i32 [[TMP18]], ptr [[TMP11]], align 4, !alias.scope [[META5:![0-9]+]], !noalias [[META7:![0-9]+]]
-; CHECK-NEXT: [[TMP20:%.*]] = extractelement <2 x i32> [[TMP14]], i32 1
-; CHECK-NEXT: store i32 [[TMP20]], ptr [[TMP24]], align 4, !alias.scope [[META5]], !noalias [[META7]]
+; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP5]]
+; CHECK-NEXT: [[TMP13:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> [[TMP10]], <2 x i32> [[TMP9]]
+; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i32> [[TMP13]], i32 0
+; CHECK-NEXT: store i32 [[TMP14]], ptr [[TMP11]], align 4, !alias.scope [[META5:![0-9]+]], !noalias [[META7:![0-9]+]]
+; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i32> [[TMP13]], i32 1
+; CHECK-NEXT: store i32 [[TMP15]], ptr [[TMP12]], align 4, !alias.scope [[META5]], !noalias [[META7]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
; CHECK-NEXT: [[TMP43:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100
; CHECK-NEXT: br i1 [[TMP43]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
@@ -144,24 +144,24 @@ define void @test_aliasing_store(ptr %dst, ptr %src, ptr %cond) {
; CHECK-NEXT: [[TMP29:%.*]] = insertelement <2 x i32> poison, i32 [[TMP28]], i32 0
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE13]]
; CHECK: [[PRED_LOAD_CONTINUE13]]:
-; CHECK-NEXT: [[TMP30:%.*]] = phi <2 x i32> [ poison, %[[PRED_LOAD_CONTINUE11]] ], [ [[TMP29]], %[[PRED_LOAD_IF12]] ]
-; CHECK-NEXT: [[TMP31:%.*]] = extractelement <2 x i1> [[TMP7]], i32 1
-; CHECK-NEXT: br i1 [[TMP31]], label %[[PRED_LOAD_IF14:.*]], label %[[PRED_LOAD_CONTINUE15]]
+; CHECK-NEXT: [[TMP21:%.*]] = phi <2 x i32> [ poison, %[[PRED_LOAD_CONTINUE11]] ], [ [[TMP29]], %[[PRED_LOAD_IF12]] ]
+; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x i1> [[TMP7]], i32 1
+; CHECK-NEXT: br i1 [[TMP22]], label %[[PRED_LOAD_IF14:.*]], label %[[PRED_LOAD_CONTINUE15]]
; CHECK: [[PRED_LOAD_IF14]]:
-; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP5]]
-; CHECK-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4, !alias.scope [[META15]], !noalias [[META17]]
-; CHECK-NEXT: [[TMP34:%.*]] = insertelement <2 x i32> [[TMP30]], i32 [[TMP33]], i32 1
+; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP5]]
+; CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4, !alias.scope [[META15]], !noalias [[META17]]
+; CHECK-NEXT: [[TMP25:%.*]] = insertelement <2 x i32> [[TMP21]], i32 [[TMP24]], i32 1
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE15]]
; CHECK: [[PRED_LOAD_CONTINUE15]]:
-; CHECK-NEXT: [[TMP35:%.*]] = phi <2 x i32> [ [[TMP30]], %[[PRED_LOAD_CONTINUE13]] ], [ [[TMP34]], %[[PRED_LOAD_IF14]] ]
-; CHECK-NEXT: [[TMP36:%.*]] = add <2 x i32> [[TMP35]], splat (i32 10)
-; CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]]
-; CHECK-NEXT: [[TMP41:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP5]]
-; CHECK-NEXT: [[TMP37:%.*]] = select <2 x i1> [[TMP7]], <2 x i32> [[TMP36]], <2 x i32> [[TMP19]]
-; CHECK-NEXT: [[TMP38:%.*]] = extractelement <2 x i32> [[TMP37]], i32 0
-; CHECK-NEXT: store i32 [[TMP38]], ptr [[TMP40]], align 4, !alias.scope [[META19:![0-9]+]], !noalias [[META12]]
-; CHECK-NEXT: [[TMP39:%.*]] = extractelement <2 x i32> [[TMP37]], i32 1
-; CHECK-NEXT: store i32 [[TMP39]], ptr [[TMP41]], align 4, !alias.scope [[META19]], !noalias [[META12]]
+; CHECK-NEXT: [[TMP33:%.*]] = phi <2 x i32> [ [[TMP21]], %[[PRED_LOAD_CONTINUE13]] ], [ [[TMP25]], %[[PRED_LOAD_IF14]] ]
+; CHECK-NEXT: [[TMP27:%.*]] = add <2 x i32> [[TMP33]], splat (i32 10)
+; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]]
+; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP5]]
+; CHECK-NEXT: [[TMP30:%.*]] = select <2 x i1> [[TMP7]], <2 x i32> [[TMP27]], <2 x i32> [[TMP19]]
+; CHECK-NEXT: [[TMP31:%.*]] = extractelement <2 x i32> [[TMP30]], i32 0
+; CHECK-NEXT: store i32 [[TMP31]], ptr [[TMP34]], align 4, !alias.scope [[META19:![0-9]+]], !noalias [[META12]]
+; CHECK-NEXT: [[TMP32:%.*]] = extractelement <2 x i32> [[TMP30]], i32 1
+; CHECK-NEXT: store i32 [[TMP32]], ptr [[TMP35]], align 4, !alias.scope [[META19]], !noalias [[META12]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
; CHECK-NEXT: [[TMP43:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100
; CHECK-NEXT: br i1 [[TMP43]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
@@ -240,8 +240,6 @@ define void @test_noalias_store_via_runtime_checks(ptr %dst, ptr %dst.1, ptr %sr
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE20:.*]] ]
; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1
-; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[INDEX]], i32 0
-; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[TMP5]], i32 1
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP3]], align 4, !alias.scope [[META22:![0-9]+]]
; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt <2 x i32> [[WIDE_LOAD]], splat (i32 11)
@@ -260,20 +258,20 @@ define void @test_noalias_store_via_runtime_checks(ptr %dst, ptr %dst.1, ptr %sr
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE20]]
; CHECK: [[PRED_STORE_CONTINUE20]]:
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[INDEX]]
-; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP5]]
+; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP5]]
; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4, !alias.scope [[META30:![0-9]+]]
-; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4, !alias.scope [[META30]]
+; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP13]], align 4, !alias.scope [[META30]]
; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> poison, i32 [[TMP11]], i32 0
; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x i32> [[TMP14]], i32 [[TMP18]], i32 1
-; CHECK-NEXT: [[TMP21:%.*]] = sub <2 x i32> [[TMP19]], splat (i32 5)
-; CHECK-NEXT: [[TMP38:%.*]] = add <2 x i32> [[TMP19]], splat (i32 10)
-; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]]
-; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP5]]
-; CHECK-NEXT: [[TMP22:%.*]] = select <2 x i1> [[TMP8]], <2 x i32> [[TMP21]], <2 x i32> [[TMP38]]
-; CHECK-NEXT: [[TMP24:%.*]] = extractelement <2 x i32> [[TMP22]], i32 0
-; CHECK-NEXT: store i32 [[TMP24]], ptr [[TMP23]], align 4, !alias.scope [[META31:![0-9]+]], !noalias [[META32:![0-9]+]]
-; CHECK-NEXT: [[TMP20:%.*]] = extractelement <2 x i32> [[TMP22]], i32 1
-; CHECK-NEXT: store i32 [[TMP20]], ptr [[TMP26]], align 4, !alias.scope [[META31]], !noalias [[META32]]
+; CHECK-NEXT: [[TMP20:%.*]] = sub <2 x i32> [[TMP19]], splat (i32 5)
+; CHECK-NEXT: [[TMP21:%.*]] = add <2 x i32> [[TMP19]], splat (i32 10)
+; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]]
+; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP5]]
+; CHECK-NEXT: [[TMP17:%.*]] = select <2 x i1> [[TMP8]], <2 x i32> [[TMP20]], <2 x i32> [[TMP21]]
+; CHECK-NEXT: [[TMP24:%.*]] = extractelement <2 x i32> [[TMP17]], i32 0
+; CHECK-NEXT: store i32 [[TMP24]], ptr [[TMP22]], align 4, !alias.scope [[META31:![0-9]+]], !noalias [[META32:![0-9]+]]
+; CHECK-NEXT: [[TMP25:%.*]] = extractelement <2 x i32> [[TMP17]], i32 1
+; CHECK-NEXT: store i32 [[TMP25]], ptr [[TMP23]], align 4, !alias.scope [[META31]], !noalias [[META32]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
; CHECK-NEXT: [[TMP45:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100
; CHECK-NEXT: br i1 [[TMP45]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP33:![0-9]+]]
@@ -477,8 +475,6 @@ define void @test_memory_op_between_loads_no_alias_via_rt_checks(ptr %dst, ptr %
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE20:.*]] ]
; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1
-; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[INDEX]], i32 0
-; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[TMP5]], i32 1
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP3]], align 4, !alias.scope [[META45:![0-9]+]]
; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt <2 x i32> [[WIDE_LOAD]], splat (i32 11)
@@ -497,19 +493,19 @@ define void @test_memory_op_between_loads_no_alias_via_rt_checks(ptr %dst, ptr %
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE20]]
; CHECK: [[PRED_STORE_CONTINUE20]]:
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[INDEX]]
-; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP5]]
+; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP5]]
; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4, !alias.scope [[META53:![0-9]+]]
-; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4, !alias.scope [[META53]]
+; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP13]], align 4, !alias.scope [[META53]]
; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> poison, i32 [[TMP11]], i32 0
; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x i32> [[TMP14]], i32 [[TMP18]], i32 1
-; CHECK-NEXT: [[TMP21:%.*]] = add <2 x i32> [[TMP19]], splat (i32 10)
-; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]]
-; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP5]]
-; CHECK-NEXT: [[TMP20:%.*]] = select <2 x i1> [[TMP8]], <2 x i32> [[TMP21]], <2 x i32> [[TMP19]]
-; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x i32> [[TMP20]], i32 0
-; CHECK-NEXT: store i32 [[TMP22]], ptr [[TMP23]], align 4, !alias.scope [[META54:![0-9]+]], !noalias [[META55:![0-9]+]]
-; CHECK-NEXT: [[TMP24:%.*]] = extractelement <2 x i32> [[TMP20]], i32 1
-; CHECK-NEXT: store i32 [[TMP24]], ptr [[TMP26]], align 4, !alias.scope [[META54]], !noalias [[META55]]
+; CHECK-NEXT: [[TMP20:%.*]] = add <2 x i32> [[TMP19]], splat (i32 10)
+; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]]
+; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP5]]
+; CHECK-NEXT: [[TMP23:%.*]] = select <2 x i1> [[TMP8]], <2 x i32> [[TMP20]], <2 x i32> [[TMP19]]
+; CHECK-NEXT: [[TMP17:%.*]] = extractelement <2 x i32> [[TMP23]], i32 0
+; CHECK-NEXT: store i32 [[TMP17]], ptr [[TMP21]], align 4, !alias.scope [[META54:![0-9]+]], !noalias [[META55:![0-9]+]]
+; CHECK-NEXT: [[TMP24:%.*]] = extractelement <2 x i32> [[TMP23]], i32 1
+; CHECK-NEXT: store i32 [[TMP24]], ptr [[TMP22]], align 4, !alias.scope [[META54]], !noalias [[META55]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
; CHECK-NEXT: [[TMP36:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100
; CHECK-NEXT: br i1 [[TMP36]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP56:![0-9]+]]
@@ -589,13 +585,13 @@ define void @test_stores_not_sunk_due_to_aliasing_load(ptr %dst, ptr %alias, ptr
; CHECK-NEXT: br i1 [[TMP8]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
; CHECK: [[PRED_LOAD_IF]]:
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[ALIAS]], i32 [[INDEX]]
-; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP6]], align 4, !alias.scope [[META61:![0-9]+]]
-; CHECK-NEXT: [[TMP15:%.*]] = insertelement <2 x i32> poison, i32 [[TMP10]], i32 0
+; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP6]], align 4, !alias.scope [[META61:![0-9]+]]
+; CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x i32> poison, i32 [[TMP9]], i32 0
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]]
; CHECK: [[PRED_LOAD_CONTINUE]]:
-; CHECK-NEXT: [[TMP20:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP15]], %[[PRED_LOAD_IF]] ]
-; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP7]], i32 1
-; CHECK-NEXT: br i1 [[TMP13]], label %[[PRED_LOAD_IF6:.*]], label %[[PRED_LOAD_CONTINUE7]]
+; CHECK-NEXT: [[TMP20:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP13]], %[[PRED_LOAD_IF]] ]
+; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP7]], i32 1
+; CHECK-NEXT: br i1 [[TMP10]], label %[[PRED_LOAD_IF6:.*]], label %[[PRED_LOAD_CONTINUE7]]
; CHECK: [[PRED_LOAD_IF6]]:
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[ALIAS]], i32 [[TMP5]]
; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP14]], align 4, !alias.scope [[META61]]
@@ -675,8 +671,6 @@ define void @test_stores_not_sunk_aliasing_load_between(ptr %dst, ptr %mid, ptr
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE15:.*]] ]
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 1
-; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> poison, i32 [[INDEX]], i32 0
-; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP6]], i32 [[TMP1]], i32 1
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP11]], align 4, !alias.scope [[META68:![0-9]+]]
; CHECK-NEXT: [[TMP3:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11)
@@ -686,9 +680,9 @@ define void @test_stores_not_sunk_aliasing_load_between(ptr %dst, ptr %mid, ptr
; CHECK: [[PRED_STORE_IF]]:
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]]
; CHECK-NEXT: store i32 20, ptr [[TMP7]], align 4, !alias.scope [[META71:![0-9]+]], !noalias [[META73:![0-9]+]]
-; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !alias.scope [[META71]], !noalias [[META73]]
+; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP7]], align 4, !alias.scope [[META71]], !noalias [[META73]]
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[MID]], i32 [[INDEX]]
-; CHECK-NEXT: store i32 [[TMP8]], ptr [[TMP14]], align 4, !alias.scope [[META75:![0-9]+]], !noalias [[META68]]
+; CHECK-NEXT: store i32 [[TMP6]], ptr [[TMP14]], align 4, !alias.scope [[META75:![0-9]+]], !noalias [[META68]]
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
; CHECK: [[PRED_STORE_CONTINUE]]:
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[TMP4]], i32 1
@@ -772,8 +766,6 @@ define void @sink_multiple_store_groups_noalias_via_scev(ptr %dst, ptr %src) {
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE7:.*]] ]
; CHECK-NEXT: [[INDEX:%.*]] = mul i64 [[INDEX1]], 16
; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[INDEX]], 16
-; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i64> poison, i64 [[INDEX]], i32 0
-; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i64> [[TMP1]], i64 [[TMP17]], i32 1
; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr double, ptr [[SRC]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr double, ptr [[SRC]], i64 [[TMP17]]
; CHECK-NEXT: [[GEP_FLAG:%.*]] = getelementptr i8, ptr [[GEP_SRC]], i64 152
@@ -819,8 +811,8 @@ define void @sink_multiple_store_groups_noalias_via_scev(ptr %dst, ptr %src) {
; CHECK: [[PRED_STORE_IF6]]:
; CHECK-NEXT: [[TMP32:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP17]]
; CHECK-NEXT: store double [[TMP14]], ptr [[TMP32]], align 8, !alias.scope [[META81]], !noalias [[META78]]
-; CHECK-NEXT: [[TMP47:%.*]] = getelementptr i8, ptr [[TMP32]], i64 16
-; CHECK-NEXT: store double 1.000000e+01, ptr [[TMP47]], align 8, !alias.scope [[META81]], !noalias [[META78]]
+; CHECK-NEXT: [[TMP36:%.*]] = getelementptr i8, ptr [[TMP32]], i64 16
+; CHECK-NEXT: store double 1.000000e+01, ptr [[TMP36]], align 8, !alias.scope [[META81]], !noalias [[META78]]
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE7]]
; CHECK: [[PRED_STORE_CONTINUE7]]:
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX1]], 2
@@ -884,8 +876,6 @@ define void @sink_multiple_store_groups_alias_via_scev(ptr %dst, ptr %src) {
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE7:.*]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 16
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 16
-; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i64> poison, i64 [[OFFSET_IDX]], i32 0
-; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i64> [[TMP4]], i64 [[TMP1]], i32 1
; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr double, ptr [[SRC]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr double, ptr [[SRC]], i64 [[TMP1]]
; CHECK-NEXT: [[GEP_FLAG:%.*]] = getelementptr i8, ptr [[GEP_SRC]], i64 152
@@ -1070,8 +1060,6 @@ define void @test_three_stores_with_different_predicates(ptr %dst, ptr %src, ptr
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE11:.*]] ]
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 1
-; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> poison, i32 [[INDEX]], i32 0
-; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP5]], i32 [[TMP1]], i32 1
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP13]], align 4, !alias.scope [[META92:![0-9]+]]
; CHECK-NEXT: [[TMP3:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11)
diff --git a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll
index f9dd626e523e8..d41bf087880a8 100644
--- a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll
+++ b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll
@@ -82,11 +82,9 @@ define i32 @test(ptr nocapture %f) #0 {
; VEC-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0
; VEC-NEXT: br i1 [[TMP4]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; VEC: pred.store.if:
-; VEC-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0
-; VEC-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[F]], i64 [[TMP5]]
; VEC-NEXT: [[TMP7:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i32 0
; VEC-NEXT: [[TMP8:%.*]] = add nsw i32 [[TMP7]], 20
-; VEC-NEXT: store i32 [[TMP8]], ptr [[TMP6]], align 4
+; VEC-NEXT: store i32 [[TMP8]], ptr [[TMP1]], align 4
; VEC-NEXT: br label [[PRED_STORE_CONTINUE]]
; VEC: pred.store.continue:
; VEC-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[TMP3]], i32 1
@@ -305,10 +303,8 @@ define void @bug18724(i1 %cond, ptr %ptr, i1 %cond.2, i64 %v.1, i32 %v.2) {
; VEC-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 4
; VEC-NEXT: br i1 [[COND_2:%.*]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE2]]
; VEC: pred.store.if:
-; VEC-NEXT: [[INDVARS_IV3:%.*]] = add i64 [[OFFSET_IDX]], 0
-; VEC-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [768 x i32], ptr [[PTR]], i64 0, i64 [[INDVARS_IV3]]
; VEC-NEXT: [[TMP11:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i32 0
-; VEC-NEXT: store i32 [[TMP11]], ptr [[ARRAYIDX16]], align 4
+; VEC-NEXT: store i32 [[TMP11]], ptr [[TMP7]], align 4
; VEC-NEXT: [[TMP12:%.*]] = add i64 [[OFFSET_IDX]], 1
; VEC-NEXT: [[TMP13:%.*]] = getelementptr inbounds [768 x i32], ptr [[PTR]], i64 0, i64 [[TMP12]]
; VEC-NEXT: [[TMP14:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i32 1
@@ -444,10 +440,8 @@ define void @minimal_bit_widths(ptr %p, i1 %c) {
; VEC-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i8>, ptr [[TMP1]], align 1
; VEC-NEXT: br i1 [[C:%.*]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE2]]
; VEC: pred.store.if:
-; VEC-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 0
-; VEC-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[P]], i64 [[TMP8]]
; VEC-NEXT: [[TMP4:%.*]] = extractelement <2 x i8> [[WIDE_LOAD]], i32 0
-; VEC-NEXT: store i8 [[TMP4]], ptr [[TMP3]], align 1
+; VEC-NEXT: store i8 [[TMP4]], ptr [[TMP1]], align 1
; VEC-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 1
; VEC-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[P]], i64 [[TMP5]]
; VEC-NEXT: [[TMP7:%.*]] = extractelement <2 x i8> [[WIDE_LOAD]], i32 1
@@ -553,10 +547,8 @@ define void @minimal_bit_widths_with_aliasing_store(i1 %c, ptr %ptr) {
; VEC-NEXT: store <2 x i8> zeroinitializer, ptr [[TMP2]], align 1
; VEC-NEXT: br i1 [[C:%.*]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE2]]
; VEC: pred.store.if:
-; VEC-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
-; VEC-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[TMP0]]
; VEC-NEXT: [[TMP5:%.*]] = extractelement <2 x i8> [[WIDE_LOAD]], i32 0
-; VEC-NEXT: store i8 [[TMP5]], ptr [[TMP4]], align 1
+; VEC-NEXT: store i8 [[TMP5]], ptr [[TMP2]], align 1
; VEC-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 1
; VEC-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[TMP7]]
; VEC-NEXT: [[TMP9:%.*]] = extractelement <2 x i8> [[WIDE_LOAD]], i32 1
diff --git a/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll b/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll
index 64caecc847096..5b11efdc0f865 100644
--- a/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll
+++ b/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll
@@ -138,9 +138,8 @@ define void @test_scalarize_with_branch_cond(ptr %src, ptr %dst) {
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %pred.store.continue4 ]
; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[INDEX]] to i1
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i1 false, [[TMP0]]
-; CHECK-NEXT: [[INDUCTION:%.*]] = add i1 [[OFFSET_IDX]], false
; CHECK-NEXT: [[INDUCTION3:%.*]] = add i1 [[OFFSET_IDX]], true
-; CHECK-NEXT: br i1 [[INDUCTION]], label %pred.store.if, label %pred.store.continue
+; CHECK-NEXT: br i1 [[OFFSET_IDX]], label %pred.store.if, label %pred.store.continue
; CHECK: pred.store.if:
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr %src, i64 [[INDEX]]
; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
diff --git a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-pred-stores.ll b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-pred-stores.ll
index 16a56f3a38ac9..1faaea56be173 100644
--- a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-pred-stores.ll
+++ b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-pred-stores.ll
@@ -35,10 +35,8 @@ define void @interleaved_with_cond_store_0(ptr %p, i64 %x, i64 %n) {
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i1> [[TMP1]], i64 0
; CHECK-NEXT: br i1 [[TMP2]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; CHECK: pred.store.if:
-; CHECK-NEXT: [[DOTSPLIT3:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[INDEX]]
-; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[DOTSPLIT3]], i64 8
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[WIDE_VEC]], i64 0
-; CHECK-NEXT: store i64 [[TMP4]], ptr [[TMP3]], align 8
+; CHECK-NEXT: store i64 [[TMP4]], ptr [[TMP0]], align 8
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
; CHECK: pred.store.continue:
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP1]], i64 1
@@ -134,9 +132,8 @@ define void @interleaved_with_cond_store_1(ptr %p, i64 %x, i64 %n) {
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP4]], i64 0
; CHECK-NEXT: br i1 [[TMP5]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; CHECK: pred.store.if:
-; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[WIDE_VEC]], i64 0
-; CHECK-NEXT: store i64 [[TMP7]], ptr [[TMP6]], align 8
+; CHECK-NEXT: store i64 [[TMP7]], ptr [[TMP1]], align 8
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
; CHECK: pred.store.continue:
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP4]], i64 1
@@ -242,10 +239,8 @@ define void @interleaved_with_cond_store_2(ptr %p, i64 %x, i64 %n) {
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP4]], i64 0
; CHECK-NEXT: br i1 [[TMP5]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; CHECK: pred.store.if:
-; CHECK-NEXT: [[DOTSPLIT3:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[INDEX]]
-; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw i8, ptr [[DOTSPLIT3]], i64 8
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[WIDE_VEC]], i64 0
-; CHECK-NEXT: store i64 [[TMP7]], ptr [[TMP6]], align 8
+; CHECK-NEXT: store i64 [[TMP7]], ptr [[TMP3]], align 8
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
; CHECK: pred.store.continue:
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP4]], i64 1
diff --git a/llvm/test/Transforms/LoopVectorize/lcssa-crashes.ll b/llvm/test/Transforms/LoopVectorize/lcssa-crashes.ll
index fc99e0ffda2eb..3510391ffde1d 100644
--- a/llvm/test/Transforms/LoopVectorize/lcssa-crashes.ll
+++ b/llvm/test/Transforms/LoopVectorize/lcssa-crashes.ll
@@ -102,8 +102,7 @@ define void @test3(ptr %p) {
; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> [[TMP10]], i32 [[TMP7]], i32 3
; CHECK-NEXT: br i1 true, label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; CHECK: pred.store.if:
-; CHECK-NEXT: [[IDXPROM4738:%.*]] = add i64 [[IDXPROM4736]], 0
-; CHECK-NEXT: [[ARRAYIDX48:%.*]] = getelementptr inbounds [1024 x i8], ptr [[P:%.*]], i64 0, i64 [[IDXPROM4738]]
+; CHECK-NEXT: [[ARRAYIDX48:%.*]] = getelementptr inbounds [1024 x i8], ptr [[P:%.*]], i64 0, i64 [[IDXPROM4736]]
; CHECK-NEXT: store i8 0, ptr [[ARRAYIDX48]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
; CHECK: pred.store.continue:
diff --git a/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll b/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll
index 8d3d0ff7a6406..44c7f099e5436 100644
--- a/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll
+++ b/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll
@@ -191,8 +191,7 @@ define i32 @loop_requires_scev_predicate(ptr %dest, i32 %end) {
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP11]], i32 0
; CHECK-NEXT: br i1 [[TMP14]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; CHECK: pred.store.if:
-; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[DEST:%.*]], i64 [[TMP8]]
+; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[DEST:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i32 0
; CHECK-NEXT: [[TMP17:%.*]] = extractelement <2 x i32> [[WIDE_LOAD3]], i32 0
; CHECK-NEXT: [[TMP18:%.*]] = add i32 [[TMP16]], [[TMP17]]
@@ -309,8 +308,7 @@ define void @test_rev_loops_deref_loads(ptr nocapture noundef writeonly %dest) {
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[TMP5]], i32 0
; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; CHECK: pred.store.if:
-; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
-; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_DEST]], i64 0, i64 [[TMP0]]
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_DEST]], i64 0, i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i32> [[REVERSE2]], i32 0
; CHECK-NEXT: [[TMP12:%.*]] = shl nsw i32 [[TMP11]], 2
; CHECK-NEXT: store i32 [[TMP12]], ptr [[TMP10]], align 4
@@ -396,11 +394,9 @@ define void @test_rev_loops_non_deref_loads(ptr nocapture noundef writeonly %des
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP6]], i32 0
; CHECK-NEXT: br i1 [[TMP7]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; CHECK: pred.store.if:
-; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i64> [[TMP0]], i32 0
-; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_SRC]], i64 0, i64 [[TMP8]]
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_SRC]], i64 0, i64 [[TMP1]]
; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4
-; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i64> [[TMP0]], i32 0
-; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_DEST]], i64 0, i64 [[TMP11]]
+; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_DEST]], i64 0, i64 [[TMP1]]
; CHECK-NEXT: [[TMP13:%.*]] = shl nsw i32 [[TMP10]], 2
; CHECK-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
@@ -411,8 +407,7 @@ define void @test_rev_loops_non_deref_loads(ptr nocapture noundef writeonly %des
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP0]], i32 1
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_SRC]], i64 0, i64 [[TMP15]]
; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4
-; CHECK-NEXT: [[TMP18:%.*]] = extractelement <2 x i64> [[TMP0]], i32 1
-; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_DEST]], i64 0, i64 [[TMP18]]
+; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_DEST]], i64 0, i64 [[TMP15]]
; CHECK-NEXT: [[TMP20:%.*]] = shl nsw i32 [[TMP17]], 2
; CHECK-NEXT: store i32 [[TMP20]], ptr [[TMP19]], align 4
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE2]]
@@ -556,13 +551,10 @@ define void @test_rev_loops_strided_deref_loads(ptr nocapture noundef writeonly
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_SRC]], i64 0, i64 [[TMP9]]
; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP8]], align 4
; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4
-; CHECK-NEXT: [[TMP23:%.*]] = insertelement <2 x i32> poison, i32 [[TMP11]], i32 0
-; CHECK-NEXT: [[TMP24:%.*]] = insertelement <2 x i32> [[TMP23]], i32 [[TMP12]], i32 1
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP5]], i32 0
; CHECK-NEXT: br i1 [[TMP13]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; CHECK: pred.store.if:
-; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
-; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_DEST]], i64 0, i64 [[TMP0]]
+; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_DEST]], i64 0, i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[TMP15:%.*]] = shl nsw i32 [[TMP11]], 2
; CHECK-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
@@ -675,7 +667,7 @@ define void @adding_offset_overflows(i32 %n, ptr %A) {
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
; CHECK: pred.store.continue:
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[TMP3]], i32 1
-; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_STORE_IF2:%.*]], label [[PRED_STORE_CONTINUE3]]
+; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE3]]
; CHECK: pred.store.if3:
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i64> [[TMP5]], i32 1
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[C]], i64 [[TMP10]]
diff --git a/llvm/test/Transforms/LoopVectorize/load-deref-pred-poison-ub-ops-feeding-pointer.ll b/llvm/test/Transforms/LoopVectorize/load-deref-pred-poison-ub-ops-feeding-pointer.ll
index 5e88072517b37..3175879ae29d7 100644
--- a/llvm/test/Transforms/LoopVectorize/load-deref-pred-poison-ub-ops-feeding-pointer.ll
+++ b/llvm/test/Transforms/LoopVectorize/load-deref-pred-poison-ub-ops-feeding-pointer.ll
@@ -47,8 +47,7 @@ define void @ptr_depends_on_sdiv(ptr noalias %dst, i16 noundef %off) {
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i16> [[TMP23]], i32 0
; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i64, ptr @src, i16 [[TMP13]]
; CHECK-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP24]], align 1
-; CHECK-NEXT: [[TMP10:%.*]] = add i16 [[OFFSET_IDX]], 0
-; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 [[TMP10]]
+; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 [[OFFSET_IDX]]
; CHECK-NEXT: store i64 [[TMP9]], ptr [[TMP11]], align 1
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
; CHECK: [[PRED_STORE_CONTINUE]]:
@@ -67,8 +66,9 @@ define void @ptr_depends_on_sdiv(ptr noalias %dst, i16 noundef %off) {
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i16> [[VEC_IND]], splat (i16 2)
; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
-; CHECK-NEXT: br [[EXIT:label %.*]]
-; CHECK: [[SCALAR_PH:.*:]]
+; CHECK-NEXT: br label %[[EXIT:.*]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
;
entry:
br label %loop.header
@@ -123,8 +123,7 @@ define void @ptr_depends_on_possibly_poison_value(ptr noalias %dst, i16 %off) {
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i16> [[TMP3]], i32 0
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr @src, i16 [[TMP5]]
; CHECK-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP14]], align 1
-; CHECK-NEXT: [[TMP7:%.*]] = add i16 [[OFFSET_IDX]], 0
-; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 [[TMP7]]
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 [[OFFSET_IDX]]
; CHECK-NEXT: store i64 [[TMP9]], ptr [[TMP8]], align 1
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
; CHECK: [[PRED_STORE_CONTINUE]]:
@@ -141,10 +140,11 @@ define void @ptr_depends_on_possibly_poison_value(ptr noalias %dst, i16 %off) {
; CHECK: [[PRED_STORE_CONTINUE2]]:
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i16> [[VEC_IND]], splat (i16 2)
-; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
-; CHECK-NEXT: br [[EXIT:label %.*]]
-; CHECK: [[SCALAR_PH:.*:]]
+; CHECK-NEXT: br label %[[EXIT:.*]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
;
entry:
br label %loop.header
@@ -194,8 +194,7 @@ define void @ptr_doesnt_depend_on_poison_or_ub(ptr noalias %dst, i16 noundef %of
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0
; CHECK-NEXT: br i1 [[TMP6]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; CHECK: [[PRED_STORE_IF]]:
-; CHECK-NEXT: [[TMP7:%.*]] = add i16 [[OFFSET_IDX]], 0
-; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 [[TMP7]]
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 [[OFFSET_IDX]]
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i64> [[WIDE_LOAD]], i32 0
; CHECK-NEXT: store i64 [[TMP9]], ptr [[TMP8]], align 1
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
@@ -211,10 +210,11 @@ define void @ptr_doesnt_depend_on_poison_or_ub(ptr noalias %dst, i16 noundef %of
; CHECK: [[PRED_STORE_CONTINUE2]]:
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i16> [[VEC_IND]], splat (i16 2)
-; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
+; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
-; CHECK-NEXT: br [[EXIT:label %.*]]
-; CHECK: [[SCALAR_PH:.*:]]
+; CHECK-NEXT: br label %[[EXIT:.*]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
;
entry:
br label %loop.header
@@ -269,8 +269,7 @@ define void @ptr_depends_on_possibly_poison_value_from_load(ptr noalias %dst) {
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i16> [[TMP4]], i32 0
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr @src, i16 [[TMP6]]
; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP15]], align 1
-; CHECK-NEXT: [[TMP8:%.*]] = add i16 [[OFFSET_IDX]], 0
-; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 [[TMP8]]
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 [[OFFSET_IDX]]
; CHECK-NEXT: store i64 [[TMP10]], ptr [[TMP9]], align 1
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
; CHECK: [[PRED_STORE_CONTINUE]]:
@@ -287,10 +286,11 @@ define void @ptr_depends_on_possibly_poison_value_from_load(ptr noalias %dst) {
; CHECK: [[PRED_STORE_CONTINUE2]]:
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i16> [[VEC_IND]], splat (i16 2)
-; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
+; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
-; CHECK-NEXT: br [[EXIT:label %.*]]
-; CHECK: [[SCALAR_PH:.*:]]
+; CHECK-NEXT: br label %[[EXIT:.*]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
;
entry:
br label %loop.header
@@ -333,7 +333,7 @@ define void @ptr_depends_on_noundef_load(ptr noalias %dst) {
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i16> [ <i16 9, i16 10>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE2]] ]
; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i32 [[INDEX]] to i16
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i16 9, [[DOTCAST]]
-; CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr @src, align 1, !noundef [[META10:![0-9]+]]
+; CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr @src, align 1, !noundef [[META6:![0-9]+]]
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i16> [[VEC_IND]], splat (i16 10)
; CHECK-NEXT: [[TMP2:%.*]] = sub i16 1, [[TMP0]]
; CHECK-NEXT: [[TMP3:%.*]] = add i16 [[TMP2]], [[TMP0]]
@@ -343,8 +343,7 @@ define void @ptr_depends_on_noundef_load(ptr noalias %dst) {
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0
; CHECK-NEXT: br i1 [[TMP7]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; CHECK: [[PRED_STORE_IF]]:
-; CHECK-NEXT: [[TMP8:%.*]] = add i16 [[OFFSET_IDX]], 0
-; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 [[TMP8]]
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 [[OFFSET_IDX]]
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i64> [[WIDE_LOAD]], i32 0
; CHECK-NEXT: store i64 [[TMP10]], ptr [[TMP9]], align 1
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
@@ -360,10 +359,11 @@ define void @ptr_depends_on_noundef_load(ptr noalias %dst) {
; CHECK: [[PRED_STORE_CONTINUE2]]:
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i16> [[VEC_IND]], splat (i16 2)
-; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
+; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
-; CHECK-NEXT: br [[EXIT:label %.*]]
-; CHECK: [[SCALAR_PH:.*:]]
+; CHECK-NEXT: br label %[[EXIT:.*]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
;
entry:
br label %loop.header
diff --git a/llvm/test/Transforms/LoopVectorize/loop-form.ll b/llvm/test/Transforms/LoopVectorize/loop-form.ll
index b46cd8ecea5b3..b43c35b873f8c 100644
--- a/llvm/test/Transforms/LoopVectorize/loop-form.ll
+++ b/llvm/test/Transforms/LoopVectorize/loop-form.ll
@@ -1061,9 +1061,7 @@ define void @scalar_predication(ptr %addr) {
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0
; CHECK-NEXT: br i1 [[TMP5]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; CHECK: pred.store.if:
-; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = getelementptr float, ptr [[ADDR]], i64 [[TMP0]]
-; CHECK-NEXT: store float 1.000000e+01, ptr [[TMP6]], align 4
+; CHECK-NEXT: store float 1.000000e+01, ptr [[TMP1]], align 4
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
; CHECK: pred.store.continue:
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP4]], i32 1
diff --git a/llvm/test/Transforms/LoopVectorize/memdep-fold-tail.ll b/llvm/test/Transforms/LoopVectorize/memdep-fold-tail.ll
index 30ee4803de607..efc6c870a9a79 100644
--- a/llvm/test/Transforms/LoopVectorize/memdep-fold-tail.ll
+++ b/llvm/test/Transforms/LoopVectorize/memdep-fold-tail.ll
@@ -34,8 +34,7 @@ define void @maxvf3() {
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0
; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; CHECK: pred.store.if:
-; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 0
-; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [18 x i8], ptr @a, i32 0, i32 [[TMP2]]
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [18 x i8], ptr @a, i32 0, i32 [[INDEX]]
; CHECK-NEXT: store i8 69, ptr [[TMP3]], align 8
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
; CHECK: pred.store.continue:
@@ -48,19 +47,17 @@ define void @maxvf3() {
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE2]]
; CHECK: pred.store.continue2:
; CHECK-NEXT: [[TMP7:%.*]] = add nuw nsw <2 x i32> splat (i32 3), [[VEC_IND]]
-; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0
-; CHECK-NEXT: br i1 [[TMP8]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]]
+; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_STORE_IF2:%.*]], label [[PRED_STORE_CONTINUE4:%.*]]
; CHECK: pred.store.if3:
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i32> [[TMP7]], i32 0
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [18 x i8], ptr @a, i32 0, i32 [[TMP9]]
; CHECK-NEXT: store i8 7, ptr [[TMP10]], align 8
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]]
; CHECK: pred.store.continue4:
-; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP0]], i32 1
-; CHECK-NEXT: br i1 [[TMP11]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6]]
+; CHECK-NEXT: br i1 [[TMP4]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6]]
; CHECK: pred.store.if5:
-; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i32> [[TMP7]], i32 1
-; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [18 x i8], ptr @a, i32 0, i32 [[TMP12]]
+; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i32> [[TMP7]], i32 1
+; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [18 x i8], ptr @a, i32 0, i32 [[TMP11]]
; CHECK-NEXT: store i8 7, ptr [[TMP13]], align 8
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]]
; CHECK: pred.store.continue6:
diff --git a/llvm/test/Transforms/LoopVectorize/multiple-result-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/multiple-result-intrinsics.ll
index b19f9c5a3b60d..1a13377a291ff 100644
--- a/llvm/test/Transforms/LoopVectorize/multiple-result-intrinsics.ll
+++ b/llvm/test/Transforms/LoopVectorize/multiple-result-intrinsics.ll
@@ -77,9 +77,9 @@ define void @predicated_sincos(float %x, ptr noalias %in, ptr noalias writeonly
; CHECK-LABEL: define void @predicated_sincos(
; CHECK-SAME: float [[X:%.*]], ptr noalias [[IN:%.*]], ptr noalias writeonly [[OUT_A:%.*]], ptr noalias writeonly [[OUT_B:%.*]]) {
; CHECK: [[ENTRY:.*:]]
-; CHECK: [[VECTOR_BODY:.*]]:
-; CHECK: [[VECTOR_BODY1:.*:]]
-; CHECK: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_BODY]] ], [ [[INDEX_NEXT:%.*]], %[[IF_THEN1:.*]] ]
+; CHECK: [[VECTOR_BODY1:.*]]:
+; CHECK: [[VECTOR_BODY:.*:]]
+; CHECK: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_BODY1]] ], [ [[INDEX_NEXT:%.*]], %[[FOR_END:.*]] ]
; CHECK: [[TMP4:%.*]] = call { <2 x float>, <2 x float> } @llvm.sincos.v2f32(<2 x float> [[WIDE_LOAD:%.*]])
; CHECK: [[TMP5:%.*]] = extractvalue { <2 x float>, <2 x float> } [[TMP4]], 0
; CHECK: [[TMP6:%.*]] = extractvalue { <2 x float>, <2 x float> } [[TMP4]], 1
@@ -93,16 +93,16 @@ define void @predicated_sincos(float %x, ptr noalias %in, ptr noalias writeonly
; CHECK: br label %[[PRED_STORE_CONTINUE]]
; CHECK: [[PRED_STORE_CONTINUE]]:
; CHECK: [[TMP12:%.*]] = extractelement <2 x i1> [[TMP3]], i32 1
-; CHECK: br i1 [[TMP12]], label %[[PRED_STORE_IF1:.*]], label %[[IF_THEN1]]
+; CHECK: br i1 [[TMP12]], label %[[PRED_STORE_IF1:.*]], label %[[FOR_END]]
; CHECK: [[PRED_STORE_IF1]]:
; CHECK: [[TMP15:%.*]] = extractelement <2 x float> [[TMP5]], i32 1
; CHECK: store float [[TMP15]], ptr [[TMP14:%.*]], align 4
; CHECK: [[TMP17:%.*]] = extractelement <2 x float> [[TMP6]], i32 1
; CHECK: store float [[TMP17]], ptr [[TMP16:%.*]], align 4
-; CHECK: br label %[[IF_THEN1]]
-; CHECK: [[IF_THEN1]]:
-; CHECK: [[IF_MERGE:.*:]]
-; CHECK: [[FOR_END:.*:]]
+; CHECK: br label %[[FOR_END]]
+; CHECK: [[FOR_END]]:
+; CHECK: [[MIDDLE_BLOCK:.*:]]
+; CHECK: [[FOR_END1:.*:]]
;
entry:
br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll
index 350a7f7de4083..c23efe18da607 100644
--- a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll
@@ -32,10 +32,6 @@ define void @a(ptr readnone %b) {
; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr null, i64 [[TMP11]]
; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr null, i64 [[TMP14]]
; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr null, i64 [[TMP17]]
-; CHECK-NEXT: [[TMP21:%.*]] = insertelement <4 x ptr> poison, ptr [[NEXT_GEP]], i32 0
-; CHECK-NEXT: [[TMP22:%.*]] = insertelement <4 x ptr> [[TMP21]], ptr [[NEXT_GEP2]], i32 1
-; CHECK-NEXT: [[TMP23:%.*]] = insertelement <4 x ptr> [[TMP22]], ptr [[NEXT_GEP3]], i32 2
-; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x ptr> [[TMP23]], ptr [[NEXT_GEP4]], i32 3
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP]], i64 -1
; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 0
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP25]], i64 -3
@@ -45,8 +41,7 @@ define void @a(ptr readnone %b) {
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP7]], i32 0
; CHECK-NEXT: br i1 [[TMP8]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; CHECK: pred.store.if:
-; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP]], i64 -1
-; CHECK-NEXT: store i8 95, ptr [[TMP9]], align 1
+; CHECK-NEXT: store i8 95, ptr [[TMP3]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
; CHECK: pred.store.continue:
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP7]], i32 1
diff --git a/llvm/test/Transforms/LoopVectorize/pr37248.ll b/llvm/test/Transforms/LoopVectorize/pr37248.ll
index 33b3d263e634a..cc0a52a08503f 100644
--- a/llvm/test/Transforms/LoopVectorize/pr37248.ll
+++ b/llvm/test/Transforms/LoopVectorize/pr37248.ll
@@ -37,20 +37,19 @@ define void @f1(ptr noalias %b, i1 %c, i32 %start) {
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i1> poison, i1 [[C]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT]], <2 x i1> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP10:%.*]] = sub i32 [[START]], [[N_VEC]]
-; CHECK-NEXT: [[TMP11:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT]], splat (i1 true)
+; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[BROADCAST_SPLAT]], i32 0
+; CHECK-NEXT: [[TMP11:%.*]] = xor i1 [[TMP13]], true
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE3:.*]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i32 [[START]], [[INDEX]]
; CHECK-NEXT: [[TMP12:%.*]] = trunc i32 [[OFFSET_IDX]] to i16
-; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP11]], i32 0
-; CHECK-NEXT: br i1 [[TMP13]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
+; CHECK-NEXT: br i1 [[TMP11]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; CHECK: [[PRED_STORE_IF]]:
; CHECK-NEXT: store i32 10, ptr [[B]], align 1
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
; CHECK: [[PRED_STORE_CONTINUE]]:
-; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP11]], i32 1
-; CHECK-NEXT: br i1 [[TMP14]], label %[[PRED_STORE_IF2:.*]], label %[[PRED_STORE_CONTINUE3]]
+; CHECK-NEXT: br i1 [[TMP11]], label %[[PRED_STORE_IF2:.*]], label %[[PRED_STORE_CONTINUE3]]
; CHECK: [[PRED_STORE_IF2]]:
; CHECK-NEXT: store i32 10, ptr [[B]], align 1
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE3]]
diff --git a/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll b/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll
index 9ed35fb0a79e8..b1f00466d871a 100644
--- a/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll
+++ b/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll
@@ -23,8 +23,7 @@ define void @pr45679(ptr %A) {
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0
; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; CHECK: pred.store.if:
-; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 0
-; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[TMP2]]
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[INDEX]]
; CHECK-NEXT: store i32 13, ptr [[TMP3]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
; CHECK: pred.store.continue:
@@ -75,8 +74,7 @@ define void @pr45679(ptr %A) {
; VF2UF2-NEXT: [[TMP2:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0
; VF2UF2-NEXT: br i1 [[TMP2]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; VF2UF2: pred.store.if:
-; VF2UF2-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 0
-; VF2UF2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[TMP3]]
+; VF2UF2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[INDEX]]
; VF2UF2-NEXT: store i32 13, ptr [[TMP4]], align 1
; VF2UF2-NEXT: br label [[PRED_STORE_CONTINUE]]
; VF2UF2: pred.store.continue:
@@ -97,7 +95,7 @@ define void @pr45679(ptr %A) {
; VF2UF2-NEXT: br label [[PRED_STORE_CONTINUE5]]
; VF2UF2: pred.store.continue4:
; VF2UF2-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP1]], i32 1
-; VF2UF2-NEXT: br i1 [[TMP11]], label [[PRED_STORE_IF6:%.*]], label [[PRED_STORE_CONTINUE7]]
+; VF2UF2-NEXT: br i1 [[TMP11]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE7]]
; VF2UF2: pred.store.if5:
; VF2UF2-NEXT: [[TMP12:%.*]] = add i32 [[INDEX]], 3
; VF2UF2-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP12]]
@@ -120,17 +118,16 @@ define void @pr45679(ptr %A) {
; VF1UF4-NEXT: br label [[VECTOR_BODY:%.*]]
; VF1UF4: vector.body:
; VF1UF4-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ]
-; VF1UF4-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0
; VF1UF4-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 1
; VF1UF4-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 2
; VF1UF4-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 3
-; VF1UF4-NEXT: [[TMP4:%.*]] = icmp ule i32 [[TMP0]], 13
+; VF1UF4-NEXT: [[TMP4:%.*]] = icmp ule i32 [[INDEX]], 13
; VF1UF4-NEXT: [[TMP5:%.*]] = icmp ule i32 [[TMP1]], 13
; VF1UF4-NEXT: [[TMP6:%.*]] = icmp ule i32 [[TMP2]], 13
; VF1UF4-NEXT: [[TMP7:%.*]] = icmp ule i32 [[TMP3]], 13
; VF1UF4-NEXT: br i1 [[TMP4]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; VF1UF4: pred.store.if:
-; VF1UF4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[TMP0]]
+; VF1UF4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[INDEX]]
; VF1UF4-NEXT: store i32 13, ptr [[TMP8]], align 1
; VF1UF4-NEXT: br label [[PRED_STORE_CONTINUE]]
; VF1UF4: pred.store.continue:
@@ -188,8 +185,7 @@ define void @load_variant(ptr noalias %a, ptr noalias %b) {
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0
; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; CHECK: pred.store.if:
-; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP2]]
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8
; CHECK-NEXT: store i64 [[TMP4]], ptr [[B:%.*]], align 8
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
@@ -244,8 +240,7 @@ define void @load_variant(ptr noalias %a, ptr noalias %b) {
; VF2UF2-NEXT: [[TMP2:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0
; VF2UF2-NEXT: br i1 [[TMP2]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; VF2UF2: pred.store.if:
-; VF2UF2-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0
-; VF2UF2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP3]]
+; VF2UF2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]]
; VF2UF2-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP4]], align 8
; VF2UF2-NEXT: store i64 [[TMP5]], ptr [[B:%.*]], align 8
; VF2UF2-NEXT: br label [[PRED_STORE_CONTINUE]]
@@ -269,7 +264,7 @@ define void @load_variant(ptr noalias %a, ptr noalias %b) {
; VF2UF2-NEXT: br label [[PRED_STORE_CONTINUE5]]
; VF2UF2: pred.store.continue4:
; VF2UF2-NEXT: [[TMP17:%.*]] = extractelement <2 x i1> [[TMP1]], i32 1
-; VF2UF2-NEXT: br i1 [[TMP17]], label [[PRED_STORE_IF6:%.*]], label [[PRED_STORE_CONTINUE7]]
+; VF2UF2-NEXT: br i1 [[TMP17]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE7]]
; VF2UF2: pred.store.if5:
; VF2UF2-NEXT: [[TMP18:%.*]] = add i64 [[INDEX]], 3
; VF2UF2-NEXT: [[TMP19:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP18]]
@@ -293,17 +288,16 @@ define void @load_variant(ptr noalias %a, ptr noalias %b) {
; VF1UF4-NEXT: br label [[VECTOR_BODY:%.*]]
; VF1UF4: vector.body:
; VF1UF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ]
-; VF1UF4-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; VF1UF4-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
; VF1UF4-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
; VF1UF4-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
-; VF1UF4-NEXT: [[TMP4:%.*]] = icmp ule i64 [[TMP0]], 13
+; VF1UF4-NEXT: [[TMP4:%.*]] = icmp ule i64 [[INDEX]], 13
; VF1UF4-NEXT: [[TMP5:%.*]] = icmp ule i64 [[TMP1]], 13
; VF1UF4-NEXT: [[TMP6:%.*]] = icmp ule i64 [[TMP2]], 13
; VF1UF4-NEXT: [[TMP7:%.*]] = icmp ule i64 [[TMP3]], 13
; VF1UF4-NEXT: br i1 [[TMP4]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; VF1UF4: pred.store.if:
-; VF1UF4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]]
+; VF1UF4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]]
; VF1UF4-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP8]], align 8
; VF1UF4-NEXT: store i64 [[TMP9]], ptr [[B:%.*]], align 8
; VF1UF4-NEXT: br label [[PRED_STORE_CONTINUE]]
diff --git a/llvm/test/Transforms/LoopVectorize/predicate-switch.ll b/llvm/test/Transforms/LoopVectorize/predicate-switch.ll
index 54c8238422f0b..d4405ca17c4cb 100644
--- a/llvm/test/Transforms/LoopVectorize/predicate-switch.ll
+++ b/llvm/test/Transforms/LoopVectorize/predicate-switch.ll
@@ -21,8 +21,6 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) {
; IC1-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 1
; IC1-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[INDEX]]
; IC1-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP2]]
-; IC1-NEXT: [[TMP12:%.*]] = insertelement <2 x ptr> poison, ptr [[NEXT_GEP]], i32 0
-; IC1-NEXT: [[TMP16:%.*]] = insertelement <2 x ptr> [[TMP12]], ptr [[NEXT_GEP3]], i32 1
; IC1-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i8>, ptr [[NEXT_GEP]], align 1
; IC1-NEXT: [[TMP7:%.*]] = icmp eq <2 x i8> [[WIDE_LOAD]], splat (i8 -12)
; IC1-NEXT: [[TMP4:%.*]] = icmp eq <2 x i8> [[WIDE_LOAD]], splat (i8 13)
@@ -117,12 +115,8 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) {
; IC2-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 3
; IC2-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[INDEX]]
; IC2-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP2]]
-; IC2-NEXT: [[TMP23:%.*]] = insertelement <2 x ptr> poison, ptr [[NEXT_GEP]], i32 0
-; IC2-NEXT: [[TMP24:%.*]] = insertelement <2 x ptr> [[TMP23]], ptr [[NEXT_GEP3]], i32 1
; IC2-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP3]]
; IC2-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP4]]
-; IC2-NEXT: [[TMP30:%.*]] = insertelement <2 x ptr> poison, ptr [[NEXT_GEP4]], i32 0
-; IC2-NEXT: [[TMP31:%.*]] = insertelement <2 x ptr> [[TMP30]], ptr [[NEXT_GEP5]], i32 1
; IC2-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 2
; IC2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i8>, ptr [[NEXT_GEP]], align 1
; IC2-NEXT: [[WIDE_LOAD6:%.*]] = load <2 x i8>, ptr [[TMP6]], align 1
diff --git a/llvm/test/Transforms/LoopVectorize/select-cmp-multiuse.ll b/llvm/test/Transforms/LoopVectorize/select-cmp-multiuse.ll
index fa03c62bb4927..ffb16d07f3ea8 100644
--- a/llvm/test/Transforms/LoopVectorize/select-cmp-multiuse.ll
+++ b/llvm/test/Transforms/LoopVectorize/select-cmp-multiuse.ll
@@ -488,8 +488,7 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) {
; CHECK-VF4-IC1-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP4]], i32 0
; CHECK-VF4-IC1-NEXT: br i1 [[TMP8]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; CHECK-VF4-IC1: pred.store.if:
-; CHECK-VF4-IC1-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0
-; CHECK-VF4-IC1-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP1]]
+; CHECK-VF4-IC1-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]]
; CHECK-VF4-IC1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !alias.scope [[META9:![0-9]+]], !noalias [[META6]]
; CHECK-VF4-IC1-NEXT: [[TMP11:%.*]] = add nsw i32 [[TMP10]], 1
; CHECK-VF4-IC1-NEXT: store i32 [[TMP11]], ptr [[TMP9]], align 4, !alias.scope [[META9]], !noalias [[META6]]
@@ -607,8 +606,7 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) {
; CHECK-VF4-IC2-NEXT: [[TMP15:%.*]] = extractelement <4 x i1> [[TMP7]], i32 0
; CHECK-VF4-IC2-NEXT: br i1 [[TMP15]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; CHECK-VF4-IC2: pred.store.if:
-; CHECK-VF4-IC2-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0
-; CHECK-VF4-IC2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP1]]
+; CHECK-VF4-IC2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]]
; CHECK-VF4-IC2-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4, !alias.scope [[META9:![0-9]+]], !noalias [[META6]]
; CHECK-VF4-IC2-NEXT: [[TMP18:%.*]] = add nsw i32 [[TMP17]], 1
; CHECK-VF4-IC2-NEXT: store i32 [[TMP18]], ptr [[TMP16]], align 4, !alias.scope [[META9]], !noalias [[META6]]
diff --git a/llvm/test/Transforms/LoopVectorize/struct-return-replicate.ll b/llvm/test/Transforms/LoopVectorize/struct-return-replicate.ll
index b9be77ff224ff..a944712ac62e1 100644
--- a/llvm/test/Transforms/LoopVectorize/struct-return-replicate.ll
+++ b/llvm/test/Transforms/LoopVectorize/struct-return-replicate.ll
@@ -488,14 +488,14 @@ define void @struct_return_2xf32_replicate_predicated(ptr %a) {
; VF4-NEXT: [[TMP16:%.*]] = extractvalue { float, float } [[TMP15]], 0
; VF4-NEXT: [[TMP17:%.*]] = extractvalue { <4 x float>, <4 x float> } [[TMP12]], 0
; VF4-NEXT: [[TMP18:%.*]] = insertelement <4 x float> [[TMP17]], float [[TMP16]], i32 1
-; VF4-NEXT: [[TMP19:%.*]] = insertvalue { <4 x float>, <4 x float> } [[TMP12]], <4 x float> [[TMP18]], 0
+; VF4-NEXT: [[TMP55:%.*]] = insertvalue { <4 x float>, <4 x float> } [[TMP12]], <4 x float> [[TMP18]], 0
; VF4-NEXT: [[TMP20:%.*]] = extractvalue { float, float } [[TMP15]], 1
-; VF4-NEXT: [[TMP21:%.*]] = extractvalue { <4 x float>, <4 x float> } [[TMP19]], 1
+; VF4-NEXT: [[TMP21:%.*]] = extractvalue { <4 x float>, <4 x float> } [[TMP55]], 1
; VF4-NEXT: [[TMP22:%.*]] = insertelement <4 x float> [[TMP21]], float [[TMP20]], i32 1
-; VF4-NEXT: [[TMP23:%.*]] = insertvalue { <4 x float>, <4 x float> } [[TMP19]], <4 x float> [[TMP22]], 1
+; VF4-NEXT: [[TMP23:%.*]] = insertvalue { <4 x float>, <4 x float> } [[TMP55]], <4 x float> [[TMP22]], 1
; VF4-NEXT: br label %[[PRED_CALL_CONTINUE2]]
; VF4: [[PRED_CALL_CONTINUE2]]:
-; VF4-NEXT: [[TMP24:%.*]] = phi { <4 x float>, <4 x float> } [ [[TMP12]], %[[PRED_CALL_CONTINUE]] ], [ [[TMP19]], %[[PRED_CALL_IF1]] ]
+; VF4-NEXT: [[TMP24:%.*]] = phi { <4 x float>, <4 x float> } [ [[TMP12]], %[[PRED_CALL_CONTINUE]] ], [ [[TMP55]], %[[PRED_CALL_IF1]] ]
; VF4-NEXT: [[TMP25:%.*]] = extractelement <4 x i1> [[TMP1]], i32 2
; VF4-NEXT: br i1 [[TMP25]], label %[[PRED_CALL_IF3:.*]], label %[[PRED_CALL_CONTINUE4:.*]]
; VF4: [[PRED_CALL_IF3]]:
@@ -533,37 +533,35 @@ define void @struct_return_2xf32_replicate_predicated(ptr %a) {
; VF4-NEXT: [[TMP51:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0
; VF4-NEXT: br i1 [[TMP51]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; VF4: [[PRED_STORE_IF]]:
-; VF4-NEXT: [[TMP52:%.*]] = add i64 [[INDEX]], 0
-; VF4-NEXT: [[TMP53:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP52]]
-; VF4-NEXT: [[TMP54:%.*]] = extractelement <4 x float> [[TMP50]], i32 0
-; VF4-NEXT: store float [[TMP54]], ptr [[TMP53]], align 8
+; VF4-NEXT: [[TMP52:%.*]] = extractelement <4 x float> [[TMP50]], i32 0
+; VF4-NEXT: store float [[TMP52]], ptr [[TMP0]], align 8
; VF4-NEXT: br label %[[PRED_STORE_CONTINUE]]
; VF4: [[PRED_STORE_CONTINUE]]:
-; VF4-NEXT: [[TMP55:%.*]] = extractelement <4 x i1> [[TMP1]], i32 1
-; VF4-NEXT: br i1 [[TMP55]], label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]]
+; VF4-NEXT: [[TMP53:%.*]] = extractelement <4 x i1> [[TMP1]], i32 1
+; VF4-NEXT: br i1 [[TMP53]], label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]]
; VF4: [[PRED_STORE_IF7]]:
; VF4-NEXT: [[TMP56:%.*]] = add i64 [[INDEX]], 1
-; VF4-NEXT: [[TMP57:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP56]]
-; VF4-NEXT: [[TMP58:%.*]] = extractelement <4 x float> [[TMP50]], i32 1
-; VF4-NEXT: store float [[TMP58]], ptr [[TMP57]], align 8
+; VF4-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP56]]
+; VF4-NEXT: [[TMP54:%.*]] = extractelement <4 x float> [[TMP50]], i32 1
+; VF4-NEXT: store float [[TMP54]], ptr [[TMP19]], align 8
; VF4-NEXT: br label %[[PRED_STORE_CONTINUE8]]
; VF4: [[PRED_STORE_CONTINUE8]]:
-; VF4-NEXT: [[TMP59:%.*]] = extractelement <4 x i1> [[TMP1]], i32 2
-; VF4-NEXT: br i1 [[TMP59]], label %[[PRED_STORE_IF9:.*]], label %[[PRED_STORE_CONTINUE10:.*]]
+; VF4-NEXT: [[TMP57:%.*]] = extractelement <4 x i1> [[TMP1]], i32 2
+; VF4-NEXT: br i1 [[TMP57]], label %[[PRED_STORE_IF9:.*]], label %[[PRED_STORE_CONTINUE10:.*]]
; VF4: [[PRED_STORE_IF9]]:
-; VF4-NEXT: [[TMP60:%.*]] = add i64 [[INDEX]], 2
-; VF4-NEXT: [[TMP61:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP60]]
-; VF4-NEXT: [[TMP62:%.*]] = extractelement <4 x float> [[TMP50]], i32 2
-; VF4-NEXT: store float [[TMP62]], ptr [[TMP61]], align 8
+; VF4-NEXT: [[TMP58:%.*]] = add i64 [[INDEX]], 2
+; VF4-NEXT: [[TMP59:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP58]]
+; VF4-NEXT: [[TMP60:%.*]] = extractelement <4 x float> [[TMP50]], i32 2
+; VF4-NEXT: store float [[TMP60]], ptr [[TMP59]], align 8
; VF4-NEXT: br label %[[PRED_STORE_CONTINUE10]]
; VF4: [[PRED_STORE_CONTINUE10]]:
-; VF4-NEXT: [[TMP63:%.*]] = extractelement <4 x i1> [[TMP1]], i32 3
-; VF4-NEXT: br i1 [[TMP63]], label %[[PRED_STORE_IF11:.*]], label %[[PRED_STORE_CONTINUE12]]
+; VF4-NEXT: [[TMP61:%.*]] = extractelement <4 x i1> [[TMP1]], i32 3
+; VF4-NEXT: br i1 [[TMP61]], label %[[PRED_STORE_IF11:.*]], label %[[PRED_STORE_CONTINUE12]]
; VF4: [[PRED_STORE_IF11]]:
-; VF4-NEXT: [[TMP64:%.*]] = add i64 [[INDEX]], 3
-; VF4-NEXT: [[TMP65:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP64]]
-; VF4-NEXT: [[TMP66:%.*]] = extractelement <4 x float> [[TMP50]], i32 3
-; VF4-NEXT: store float [[TMP66]], ptr [[TMP65]], align 8
+; VF4-NEXT: [[TMP62:%.*]] = add i64 [[INDEX]], 3
+; VF4-NEXT: [[TMP63:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP62]]
+; VF4-NEXT: [[TMP64:%.*]] = extractelement <4 x float> [[TMP50]], i32 3
+; VF4-NEXT: store float [[TMP64]], ptr [[TMP63]], align 8
; VF4-NEXT: br label %[[PRED_STORE_CONTINUE12]]
; VF4: [[PRED_STORE_CONTINUE12]]:
; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
@@ -590,12 +588,9 @@ define void @struct_return_2xf32_replicate_predicated(ptr %a) {
; VF2IC2: [[PRED_STORE_IF]]:
; VF2IC2-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[WIDE_LOAD]], i32 0
; VF2IC2-NEXT: [[TMP6:%.*]] = tail call { float, float } @fn2(float [[TMP5]]) #[[ATTR3:[0-9]+]]
-; VF2IC2-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 0
; VF2IC2-NEXT: [[TMP8:%.*]] = extractvalue { float, float } [[TMP6]], 0
-; VF2IC2-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP7]]
-; VF2IC2-NEXT: [[TMP10:%.*]] = extractelement <2 x float> [[WIDE_LOAD]], i32 0
-; VF2IC2-NEXT: [[TMP11:%.*]] = fdiv float [[TMP8]], [[TMP10]]
-; VF2IC2-NEXT: store float [[TMP11]], ptr [[TMP9]], align 8
+; VF2IC2-NEXT: [[TMP11:%.*]] = fdiv float [[TMP8]], [[TMP5]]
+; VF2IC2-NEXT: store float [[TMP11]], ptr [[TMP0]], align 8
; VF2IC2-NEXT: br label %[[PRED_STORE_CONTINUE]]
; VF2IC2: [[PRED_STORE_CONTINUE]]:
; VF2IC2-NEXT: [[TMP12:%.*]] = extractelement <2 x i1> [[TMP2]], i32 1
@@ -606,8 +601,7 @@ define void @struct_return_2xf32_replicate_predicated(ptr %a) {
; VF2IC2-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 1
; VF2IC2-NEXT: [[TMP16:%.*]] = extractvalue { float, float } [[TMP14]], 0
; VF2IC2-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP15]]
-; VF2IC2-NEXT: [[TMP18:%.*]] = extractelement <2 x float> [[WIDE_LOAD]], i32 1
-; VF2IC2-NEXT: [[TMP19:%.*]] = fdiv float [[TMP16]], [[TMP18]]
+; VF2IC2-NEXT: [[TMP19:%.*]] = fdiv float [[TMP16]], [[TMP13]]
; VF2IC2-NEXT: store float [[TMP19]], ptr [[TMP17]], align 8
; VF2IC2-NEXT: br label %[[PRED_STORE_CONTINUE3]]
; VF2IC2: [[PRED_STORE_CONTINUE3]]:
@@ -619,8 +613,7 @@ define void @struct_return_2xf32_replicate_predicated(ptr %a) {
; VF2IC2-NEXT: [[TMP23:%.*]] = add i64 [[INDEX]], 2
; VF2IC2-NEXT: [[TMP24:%.*]] = extractvalue { float, float } [[TMP22]], 0
; VF2IC2-NEXT: [[TMP25:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP23]]
-; VF2IC2-NEXT: [[TMP26:%.*]] = extractelement <2 x float> [[WIDE_LOAD1]], i32 0
-; VF2IC2-NEXT: [[TMP27:%.*]] = fdiv float [[TMP24]], [[TMP26]]
+; VF2IC2-NEXT: [[TMP27:%.*]] = fdiv float [[TMP24]], [[TMP21]]
; VF2IC2-NEXT: store float [[TMP27]], ptr [[TMP25]], align 8
; VF2IC2-NEXT: br label %[[PRED_STORE_CONTINUE5]]
; VF2IC2: [[PRED_STORE_CONTINUE5]]:
@@ -629,11 +622,10 @@ define void @struct_return_2xf32_replicate_predicated(ptr %a) {
; VF2IC2: [[PRED_STORE_IF6]]:
; VF2IC2-NEXT: [[TMP29:%.*]] = extractelement <2 x float> [[WIDE_LOAD1]], i32 1
; VF2IC2-NEXT: [[TMP30:%.*]] = tail call { float, float } @fn2(float [[TMP29]]) #[[ATTR3]]
-; VF2IC2-NEXT: [[TMP31:%.*]] = add i64 [[INDEX]], 3
+; VF2IC2-NEXT: [[TMP26:%.*]] = add i64 [[INDEX]], 3
; VF2IC2-NEXT: [[TMP32:%.*]] = extractvalue { float, float } [[TMP30]], 0
-; VF2IC2-NEXT: [[TMP33:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP31]]
-; VF2IC2-NEXT: [[TMP34:%.*]] = extractelement <2 x float> [[WIDE_LOAD1]], i32 1
-; VF2IC2-NEXT: [[TMP35:%.*]] = fdiv float [[TMP32]], [[TMP34]]
+; VF2IC2-NEXT: [[TMP33:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP26]]
+; VF2IC2-NEXT: [[TMP35:%.*]] = fdiv float [[TMP32]], [[TMP29]]
; VF2IC2-NEXT: store float [[TMP35]], ptr [[TMP33]], align 8
; VF2IC2-NEXT: br label %[[PRED_STORE_CONTINUE7]]
; VF2IC2: [[PRED_STORE_CONTINUE7]]:
diff --git a/llvm/test/Transforms/LoopVectorize/struct-return.ll b/llvm/test/Transforms/LoopVectorize/struct-return.ll
index 70c6c7e900c51..a0a59d00bd951 100644
--- a/llvm/test/Transforms/LoopVectorize/struct-return.ll
+++ b/llvm/test/Transforms/LoopVectorize/struct-return.ll
@@ -278,11 +278,8 @@ define void @scalarized_predicated_struct_return(ptr %a) {
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[WIDE_LOAD]], i32 0
; CHECK-NEXT: [[TMP4:%.*]] = tail call { i64, i64 } @bar_i64(i64 [[TMP3]]) #[[ATTR2:[0-9]+]]
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { i64, i64 } [[TMP4]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[WIDE_LOAD]], i32 0
-; CHECK-NEXT: [[TMP7:%.*]] = udiv i64 [[TMP5]], [[TMP6]]
-; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP8]]
-; CHECK-NEXT: store i64 [[TMP7]], ptr [[TMP9]], align 8
+; CHECK-NEXT: [[TMP6:%.*]] = udiv i64 [[TMP5]], [[TMP3]]
+; CHECK-NEXT: store i64 [[TMP6]], ptr [[TMP0]], align 8
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
; CHECK: [[PRED_STORE_CONTINUE]]:
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP1]], i32 1
@@ -291,8 +288,7 @@ define void @scalarized_predicated_struct_return(ptr %a) {
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i64> [[WIDE_LOAD]], i32 1
; CHECK-NEXT: [[TMP12:%.*]] = tail call { i64, i64 } @bar_i64(i64 [[TMP11]]) #[[ATTR2]]
; CHECK-NEXT: [[TMP13:%.*]] = extractvalue { i64, i64 } [[TMP12]], 0
-; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[WIDE_LOAD]], i32 1
-; CHECK-NEXT: [[TMP15:%.*]] = udiv i64 [[TMP13]], [[TMP14]]
+; CHECK-NEXT: [[TMP15:%.*]] = udiv i64 [[TMP13]], [[TMP11]]
; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[INDEX]], 1
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP16]]
; CHECK-NEXT: store i64 [[TMP15]], ptr [[TMP17]], align 8
diff --git a/llvm/test/Transforms/LoopVectorize/tail-folding-alloca-in-loop.ll b/llvm/test/Transforms/LoopVectorize/tail-folding-alloca-in-loop.ll
index 9e523be618b44..b282c69d72813 100644
--- a/llvm/test/Transforms/LoopVectorize/tail-folding-alloca-in-loop.ll
+++ b/llvm/test/Transforms/LoopVectorize/tail-folding-alloca-in-loop.ll
@@ -9,49 +9,42 @@ define i32 @test(ptr %vf1, i64 %n) {
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE6:.*]] ]
+; CHECK-NEXT: [[TMP2:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE6:.*]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i8> [ <i8 0, i8 1, i8 2, i8 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE6]] ]
; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <4 x i8> [[VEC_IND]], splat (i8 -56)
; CHECK-NEXT: [[TMP18:%.*]] = alloca i8, i64 [[N]], align 16
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP18]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x ptr> [[BROADCAST_SPLATINSERT]], <4 x ptr> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0
; CHECK-NEXT: br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; CHECK: [[PRED_STORE_IF]]:
-; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[VF1]], i64 [[TMP2]]
-; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x ptr> [[BROADCAST_SPLAT]], i32 0
-; CHECK-NEXT: store ptr [[TMP4]], ptr [[TMP3]], align 8
+; CHECK-NEXT: store ptr [[TMP18]], ptr [[TMP3]], align 8
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
; CHECK: [[PRED_STORE_CONTINUE]]:
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1
; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]]
; CHECK: [[PRED_STORE_IF1]]:
-; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[TMP2]], 1
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds ptr, ptr [[VF1]], i64 [[TMP6]]
-; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x ptr> [[BROADCAST_SPLAT]], i32 0
-; CHECK-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 8
+; CHECK-NEXT: store ptr [[TMP18]], ptr [[TMP7]], align 8
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE2]]
; CHECK: [[PRED_STORE_CONTINUE2]]:
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP0]], i32 2
; CHECK-NEXT: br i1 [[TMP9]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]]
; CHECK: [[PRED_STORE_IF3]]:
-; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 2
+; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[TMP2]], 2
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds ptr, ptr [[VF1]], i64 [[TMP10]]
-; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x ptr> [[BROADCAST_SPLAT]], i32 0
-; CHECK-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 8
+; CHECK-NEXT: store ptr [[TMP18]], ptr [[TMP11]], align 8
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE4]]
; CHECK: [[PRED_STORE_CONTINUE4]]:
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3
; CHECK-NEXT: br i1 [[TMP13]], label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6]]
; CHECK: [[PRED_STORE_IF5]]:
-; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 3
+; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[TMP2]], 3
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds ptr, ptr [[VF1]], i64 [[TMP14]]
-; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x ptr> [[BROADCAST_SPLAT]], i32 0
-; CHECK-NEXT: store ptr [[TMP16]], ptr [[TMP15]], align 8
+; CHECK-NEXT: store ptr [[TMP18]], ptr [[TMP15]], align 8
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE6]]
; CHECK: [[PRED_STORE_CONTINUE6]]:
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP2]], 4
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i8> [[VEC_IND]], splat (i8 4)
; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 204
; CHECK-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
diff --git a/llvm/test/Transforms/LoopVectorize/tail-folding-optimize-vector-induction-width.ll b/llvm/test/Transforms/LoopVectorize/tail-folding-optimize-vector-induction-width.ll
index 00e04c7daee51..eade1724c0b80 100644
--- a/llvm/test/Transforms/LoopVectorize/tail-folding-optimize-vector-induction-width.ll
+++ b/llvm/test/Transforms/LoopVectorize/tail-folding-optimize-vector-induction-width.ll
@@ -9,13 +9,12 @@ define void @canonical_small_tc_i8(ptr nocapture noundef writeonly %p) {
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE2:.*]] ]
+; CHECK-NEXT: [[TMP2:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE2:.*]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i8> [ <i8 0, i8 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE2]] ]
; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <2 x i8> [[VEC_IND]], splat (i8 14)
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0
; CHECK-NEXT: br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; CHECK: [[PRED_STORE_IF]]:
-; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP2]]
; CHECK-NEXT: store i16 1, ptr [[TMP3]], align 2
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
@@ -23,12 +22,12 @@ define void @canonical_small_tc_i8(ptr nocapture noundef writeonly %p) {
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP0]], i32 1
; CHECK-NEXT: br i1 [[TMP4]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2]]
; CHECK: [[PRED_STORE_IF1]]:
-; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP2]], 1
; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP5]]
; CHECK-NEXT: store i16 1, ptr [[TMP24]], align 2
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE2]]
; CHECK: [[PRED_STORE_CONTINUE2]]:
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP2]], 2
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i8> [[VEC_IND]], splat (i8 2)
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
@@ -60,13 +59,12 @@ define void @canonical_upper_limit_i8(ptr nocapture noundef writeonly %p) {
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE2:.*]] ]
+; CHECK-NEXT: [[TMP2:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE2:.*]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i8> [ <i8 0, i8 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE2]] ]
; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <2 x i8> [[VEC_IND]], splat (i8 -2)
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0
; CHECK-NEXT: br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; CHECK: [[PRED_STORE_IF]]:
-; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP2]]
; CHECK-NEXT: store i16 1, ptr [[TMP3]], align 2
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
@@ -74,12 +72,12 @@ define void @canonical_upper_limit_i8(ptr nocapture noundef writeonly %p) {
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP0]], i32 1
; CHECK-NEXT: br i1 [[TMP4]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2]]
; CHECK: [[PRED_STORE_IF1]]:
-; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP2]], 1
; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP5]]
; CHECK-NEXT: store i16 1, ptr [[TMP24]], align 2
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE2]]
; CHECK: [[PRED_STORE_CONTINUE2]]:
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP2]], 2
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i8> [[VEC_IND]], splat (i8 2)
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
@@ -111,13 +109,12 @@ define void @canonical_lower_limit_i16(ptr nocapture noundef writeonly %p) {
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE2:.*]] ]
+; CHECK-NEXT: [[TMP2:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE2:.*]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i16> [ <i16 0, i16 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE2]] ]
; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <2 x i16> [[VEC_IND]], splat (i16 256)
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0
; CHECK-NEXT: br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; CHECK: [[PRED_STORE_IF]]:
-; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP2]]
; CHECK-NEXT: store i16 1, ptr [[TMP3]], align 2
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
@@ -125,12 +122,12 @@ define void @canonical_lower_limit_i16(ptr nocapture noundef writeonly %p) {
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP0]], i32 1
; CHECK-NEXT: br i1 [[TMP4]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2]]
; CHECK: [[PRED_STORE_IF1]]:
-; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP2]], 1
; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP5]]
; CHECK-NEXT: store i16 1, ptr [[TMP24]], align 2
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE2]]
; CHECK: [[PRED_STORE_CONTINUE2]]:
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP2]], 2
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i16> [[VEC_IND]], splat (i16 2)
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 258
; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
@@ -162,13 +159,12 @@ define void @canonical_upper_limit_i16(ptr nocapture noundef writeonly %p) {
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE2:.*]] ]
+; CHECK-NEXT: [[TMP2:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE2:.*]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i16> [ <i16 0, i16 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE2]] ]
; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <2 x i16> [[VEC_IND]], splat (i16 -2)
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0
; CHECK-NEXT: br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; CHECK: [[PRED_STORE_IF]]:
-; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP2]]
; CHECK-NEXT: store i16 1, ptr [[TMP3]], align 2
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
@@ -176,12 +172,12 @@ define void @canonical_upper_limit_i16(ptr nocapture noundef writeonly %p) {
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP0]], i32 1
; CHECK-NEXT: br i1 [[TMP4]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2]]
; CHECK: [[PRED_STORE_IF1]]:
-; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP2]], 1
; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP5]]
; CHECK-NEXT: store i16 1, ptr [[TMP24]], align 2
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE2]]
; CHECK: [[PRED_STORE_CONTINUE2]]:
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP2]], 2
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i16> [[VEC_IND]], splat (i16 2)
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 65536
; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
@@ -213,13 +209,12 @@ define void @canonical_lower_limit_i32(ptr nocapture noundef writeonly %p) {
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE2:.*]] ]
+; CHECK-NEXT: [[TMP2:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE2:.*]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE2]] ]
; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <2 x i32> [[VEC_IND]], splat (i32 65536)
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0
; CHECK-NEXT: br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; CHECK: [[PRED_STORE_IF]]:
-; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP2]]
; CHECK-NEXT: store i16 1, ptr [[TMP3]], align 2
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
@@ -227,12 +222,12 @@ define void @canonical_lower_limit_i32(ptr nocapture noundef writeonly %p) {
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP0]], i32 1
; CHECK-NEXT: br i1 [[TMP4]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2]]
; CHECK: [[PRED_STORE_IF1]]:
-; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP2]], 1
; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP5]]
; CHECK-NEXT: store i16 1, ptr [[TMP24]], align 2
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE2]]
; CHECK: [[PRED_STORE_CONTINUE2]]:
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP2]], 2
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 65538
; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
@@ -264,13 +259,12 @@ define void @canonical_upper_limit_i32(ptr nocapture noundef writeonly %p) {
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE2:.*]] ]
+; CHECK-NEXT: [[TMP2:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE2:.*]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE2]] ]
; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <2 x i32> [[VEC_IND]], splat (i32 -2)
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0
; CHECK-NEXT: br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; CHECK: [[PRED_STORE_IF]]:
-; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP2]]
; CHECK-NEXT: store i16 1, ptr [[TMP3]], align 2
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
@@ -278,12 +272,12 @@ define void @canonical_upper_limit_i32(ptr nocapture noundef writeonly %p) {
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP0]], i32 1
; CHECK-NEXT: br i1 [[TMP4]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2]]
; CHECK: [[PRED_STORE_IF1]]:
-; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP2]], 1
; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP5]]
; CHECK-NEXT: store i16 1, ptr [[TMP24]], align 2
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE2]]
; CHECK: [[PRED_STORE_CONTINUE2]]:
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP2]], 2
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4294967296
; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
@@ -315,13 +309,12 @@ define void @canonical_lower_limit_i64(ptr nocapture noundef writeonly %p) {
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[PRED_STORE_CONTINUE2:.*]] ]
+; CHECK-NEXT: [[TMP2:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[PRED_STORE_CONTINUE2:.*]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE2]] ]
; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <2 x i64> [[VEC_IND]], splat (i64 4294967296)
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0
; CHECK-NEXT: br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; CHECK: [[PRED_STORE_IF]]:
-; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP2]]
; CHECK-NEXT: store i16 1, ptr [[TMP3]], align 2
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
@@ -329,12 +322,12 @@ define void @canonical_lower_limit_i64(ptr nocapture noundef writeonly %p) {
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP0]], i32 1
; CHECK-NEXT: br i1 [[TMP4]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2]]
; CHECK: [[PRED_STORE_IF1]]:
-; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP2]], 1
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP5]]
; CHECK-NEXT: store i16 1, ptr [[TMP6]], align 2
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE2]]
; CHECK: [[PRED_STORE_CONTINUE2]]:
-; CHECK-NEXT: [[TMP8]] = add i64 [[INDEX]], 2
+; CHECK-NEXT: [[TMP8]] = add i64 [[TMP2]], 2
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP8]], 4294967298
; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
@@ -366,13 +359,12 @@ define void @canonical_upper_limit_i64(ptr nocapture noundef writeonly %p) {
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[PRED_STORE_CONTINUE2:.*]] ]
+; CHECK-NEXT: [[TMP2:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[PRED_STORE_CONTINUE2:.*]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE2]] ]
; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <2 x i64> [[VEC_IND]], splat (i64 -2)
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0
; CHECK-NEXT: br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; CHECK: [[PRED_STORE_IF]]:
-; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP2]]
; CHECK-NEXT: store i16 1, ptr [[TMP3]], align 2
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
@@ -380,12 +372,12 @@ define void @canonical_upper_limit_i64(ptr nocapture noundef writeonly %p) {
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP0]], i32 1
; CHECK-NEXT: br i1 [[TMP4]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2]]
; CHECK: [[PRED_STORE_IF1]]:
-; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP2]], 1
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP5]]
; CHECK-NEXT: store i16 1, ptr [[TMP6]], align 2
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE2]]
; CHECK: [[PRED_STORE_CONTINUE2]]:
-; CHECK-NEXT: [[TMP8]] = add i64 [[INDEX]], 2
+; CHECK-NEXT: [[TMP8]] = add i64 [[TMP2]], 2
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP8]], 0
; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
@@ -417,13 +409,12 @@ define void @canonical_lower_limit_i128(ptr nocapture noundef writeonly %p) {
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i256 [ 0, %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[PRED_STORE_CONTINUE2:.*]] ]
+; CHECK-NEXT: [[TMP2:%.*]] = phi i256 [ 0, %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[PRED_STORE_CONTINUE2:.*]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i128> [ <i128 0, i128 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE2]] ]
; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <2 x i128> [[VEC_IND]], splat (i128 18446744073709551616)
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0
; CHECK-NEXT: br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; CHECK: [[PRED_STORE_IF]]:
-; CHECK-NEXT: [[TMP2:%.*]] = add i256 [[INDEX]], 0
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[P]], i256 [[TMP2]]
; CHECK-NEXT: store i16 1, ptr [[TMP3]], align 2
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
@@ -431,12 +422,12 @@ define void @canonical_lower_limit_i128(ptr nocapture noundef writeonly %p) {
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP0]], i32 1
; CHECK-NEXT: br i1 [[TMP4]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2]]
; CHECK: [[PRED_STORE_IF1]]:
-; CHECK-NEXT: [[TMP5:%.*]] = add i256 [[INDEX]], 1
+; CHECK-NEXT: [[TMP5:%.*]] = add i256 [[TMP2]], 1
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[P]], i256 [[TMP5]]
; CHECK-NEXT: store i16 1, ptr [[TMP6]], align 2
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE2]]
; CHECK: [[PRED_STORE_CONTINUE2]]:
-; CHECK-NEXT: [[TMP8]] = add i256 [[INDEX]], 2
+; CHECK-NEXT: [[TMP8]] = add i256 [[TMP2]], 2
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i128> [[VEC_IND]], splat (i128 2)
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i256 [[TMP8]], 18446744073709551618
; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
diff --git a/llvm/test/Transforms/LoopVectorize/tail-folding-switch.ll b/llvm/test/Transforms/LoopVectorize/tail-folding-switch.ll
index b6f43aaa86e33..86a060fb50875 100644
--- a/llvm/test/Transforms/LoopVectorize/tail-folding-switch.ll
+++ b/llvm/test/Transforms/LoopVectorize/tail-folding-switch.ll
@@ -13,14 +13,13 @@ define void @tail_fold_switch(ptr %dst, i32 %0) {
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <4 x i32> [[BROADCAST_SPLAT]], splat (i32 1)
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE6:.*]] ]
+; CHECK-NEXT: [[TMP5:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE6:.*]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i8> [ <i8 0, i8 1, i8 2, i8 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE6]] ]
; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <4 x i8> [[VEC_IND]], splat (i8 4)
; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> [[TMP2]], <4 x i1> zeroinitializer
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP3]], i32 0
; CHECK-NEXT: br i1 [[TMP4]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; CHECK: [[PRED_STORE_IF]]:
-; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP5]]
; CHECK-NEXT: store i32 0, ptr [[TMP6]], align 4
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
@@ -28,7 +27,7 @@ define void @tail_fold_switch(ptr %dst, i32 %0) {
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP3]], i32 1
; CHECK-NEXT: br i1 [[TMP7]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]]
; CHECK: [[PRED_STORE_IF1]]:
-; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[TMP5]], 1
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP8]]
; CHECK-NEXT: store i32 0, ptr [[TMP9]], align 4
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE2]]
@@ -36,7 +35,7 @@ define void @tail_fold_switch(ptr %dst, i32 %0) {
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP3]], i32 2
; CHECK-NEXT: br i1 [[TMP10]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]]
; CHECK: [[PRED_STORE_IF3]]:
-; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 2
+; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP5]], 2
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP11]]
; CHECK-NEXT: store i32 0, ptr [[TMP12]], align 4
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE4]]
@@ -44,12 +43,12 @@ define void @tail_fold_switch(ptr %dst, i32 %0) {
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i1> [[TMP3]], i32 3
; CHECK-NEXT: br i1 [[TMP13]], label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6]]
; CHECK: [[PRED_STORE_IF5]]:
-; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 3
+; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[TMP5]], 3
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP14]]
; CHECK-NEXT: store i32 0, ptr [[TMP15]], align 4
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE6]]
; CHECK: [[PRED_STORE_CONTINUE6]]:
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP5]], 4
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i8> [[VEC_IND]], splat (i8 4)
; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 8
; CHECK-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
diff --git a/llvm/test/Transforms/LoopVectorize/tail-folding-vectorization-factor-1.ll b/llvm/test/Transforms/LoopVectorize/tail-folding-vectorization-factor-1.ll
index 541ee96ac760b..fbb9c5f14c86c 100644
--- a/llvm/test/Transforms/LoopVectorize/tail-folding-vectorization-factor-1.ll
+++ b/llvm/test/Transforms/LoopVectorize/tail-folding-vectorization-factor-1.ll
@@ -17,17 +17,16 @@ define void @VF1-VPlanExe(ptr %dst) {
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ]
-; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
-; CHECK-NEXT: [[TMP4:%.*]] = icmp ule i64 [[TMP0]], 14
+; CHECK-NEXT: [[TMP4:%.*]] = icmp ule i64 [[INDEX]], 14
; CHECK-NEXT: [[TMP5:%.*]] = icmp ule i64 [[TMP1]], 14
; CHECK-NEXT: [[TMP6:%.*]] = icmp ule i64 [[TMP2]], 14
; CHECK-NEXT: [[TMP7:%.*]] = icmp ule i64 [[TMP3]], 14
; CHECK-NEXT: br i1 [[TMP4]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; CHECK: pred.store.if:
-; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 [[INDEX]]
; CHECK-NEXT: store i32 0, ptr [[TMP8]], align 4
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
; CHECK: pred.store.continue:
diff --git a/llvm/test/Transforms/LoopVectorize/trip-count-expansion-may-introduce-ub.ll b/llvm/test/Transforms/LoopVectorize/trip-count-expansion-may-introduce-ub.ll
index 586804bac9429..adb6b029a7112 100644
--- a/llvm/test/Transforms/LoopVectorize/trip-count-expansion-may-introduce-ub.ll
+++ b/llvm/test/Transforms/LoopVectorize/trip-count-expansion-may-introduce-ub.ll
@@ -156,9 +156,7 @@ define i64 @multi_exit_2_exit_count_with_udiv_by_value_in_block_executed_uncondi
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP7]], i32 0
; CHECK-NEXT: br i1 [[TMP8]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; CHECK: pred.store.if:
-; CHECK-NEXT: [[TMP20:%.*]] = add i64 [[IV]], 0
-; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP20]]
-; CHECK-NEXT: store i32 1, ptr [[TMP9]], align 4
+; CHECK-NEXT: store i32 1, ptr [[TMP5]], align 4
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
; CHECK: pred.store.continue:
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP7]], i32 1
@@ -267,9 +265,7 @@ define i64 @multi_exit_2_exit_count_with_udiv_by_constant_in_block_executed_unco
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP7]], i32 0
; CHECK-NEXT: br i1 [[TMP8]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; CHECK: pred.store.if:
-; CHECK-NEXT: [[TMP20:%.*]] = add i64 [[IV]], 0
-; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP20]]
-; CHECK-NEXT: store i32 1, ptr [[TMP9]], align 4
+; CHECK-NEXT: store i32 1, ptr [[TMP5]], align 4
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
; CHECK: pred.store.continue:
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP7]], i32 1
diff --git a/llvm/test/Transforms/LoopVectorize/use-scalar-epilogue-if-tp-fails.ll b/llvm/test/Transforms/LoopVectorize/use-scalar-epilogue-if-tp-fails.ll
index 6bdf571a318be..380a6ac860f39 100644
--- a/llvm/test/Transforms/LoopVectorize/use-scalar-epilogue-if-tp-fails.ll
+++ b/llvm/test/Transforms/LoopVectorize/use-scalar-epilogue-if-tp-fails.ll
@@ -113,10 +113,6 @@ define void @metadata(ptr nocapture readonly %ptr, i32 %size, ptr %pos) {
; FORCED-TF-NEXT: [[NEXT_GEP1:%.*]] = getelementptr i8, ptr [[PTR]], i32 [[TMP1]]
; FORCED-TF-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[PTR]], i32 [[TMP2]]
; FORCED-TF-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[PTR]], i32 [[TMP3]]
-; FORCED-TF-NEXT: [[TMP4:%.*]] = insertelement <4 x ptr> poison, ptr [[NEXT_GEP]], i32 0
-; FORCED-TF-NEXT: [[TMP5:%.*]] = insertelement <4 x ptr> [[TMP4]], ptr [[NEXT_GEP1]], i32 1
-; FORCED-TF-NEXT: [[TMP6:%.*]] = insertelement <4 x ptr> [[TMP5]], ptr [[NEXT_GEP2]], i32 2
-; FORCED-TF-NEXT: [[TMP7:%.*]] = insertelement <4 x ptr> [[TMP6]], ptr [[NEXT_GEP3]], i32 3
; FORCED-TF-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i64 0
; FORCED-TF-NEXT: [[BROADCAST_SPLAT5:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT4]], <4 x i32> poison, <4 x i32> zeroinitializer
; FORCED-TF-NEXT: [[VEC_IV:%.*]] = add <4 x i32> [[BROADCAST_SPLAT5]], <i32 0, i32 1, i32 2, i32 3>
@@ -191,10 +187,6 @@ define void @metadata(ptr nocapture readonly %ptr, i32 %size, ptr %pos) {
; CHECK-NEXT: [[NEXT_GEP1:%.*]] = getelementptr i8, ptr [[PTR]], i32 [[TMP1]]
; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[PTR]], i32 [[TMP2]]
; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[PTR]], i32 [[TMP3]]
-; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x ptr> poison, ptr [[NEXT_GEP]], i32 0
-; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x ptr> [[TMP4]], ptr [[NEXT_GEP1]], i32 1
-; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x ptr> [[TMP5]], ptr [[NEXT_GEP2]], i32 2
-; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x ptr> [[TMP6]], ptr [[NEXT_GEP3]], i32 3
; CHECK-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT5:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT4]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[VEC_IV:%.*]] = add <4 x i32> [[BROADCAST_SPLAT5]], <i32 0, i32 1, i32 2, i32 3>
diff --git a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll
index 5da6fc3179043..cfeaaf88643d0 100644
--- a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll
+++ b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll
@@ -629,10 +629,7 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF8UF1-NEXT: [[TMP4:%.*]] = extractelement <8 x i1> [[TMP3]], i32 0
; VF8UF1-NEXT: br i1 [[TMP4]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; VF8UF1: [[PRED_STORE_IF]]:
-; VF8UF1-NEXT: [[TMP5:%.*]] = mul i64 0, [[STEP]]
-; VF8UF1-NEXT: [[TMP6:%.*]] = add i64 0, [[TMP5]]
-; VF8UF1-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], [[STEP]]
-; VF8UF1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP7]]
+; VF8UF1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[DST]], i64 [[STEP]]
; VF8UF1-NEXT: store i8 0, ptr [[TMP8]], align 1
; VF8UF1-NEXT: br label %[[PRED_STORE_CONTINUE]]
; VF8UF1: [[PRED_STORE_CONTINUE]]:
@@ -733,10 +730,7 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF8UF2-NEXT: [[TMP5:%.*]] = extractelement <8 x i1> [[TMP3]], i32 0
; VF8UF2-NEXT: br i1 [[TMP5]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; VF8UF2: [[PRED_STORE_IF]]:
-; VF8UF2-NEXT: [[TMP6:%.*]] = mul i64 0, [[STEP]]
-; VF8UF2-NEXT: [[TMP7:%.*]] = add i64 0, [[TMP6]]
-; VF8UF2-NEXT: [[TMP8:%.*]] = add i64 [[TMP7]], [[STEP]]
-; VF8UF2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP8]]
+; VF8UF2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[DST]], i64 [[STEP]]
; VF8UF2-NEXT: store i8 0, ptr [[TMP9]], align 1
; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE]]
; VF8UF2: [[PRED_STORE_CONTINUE]]:
@@ -916,10 +910,7 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF16UF1-NEXT: [[TMP4:%.*]] = extractelement <16 x i1> [[TMP3]], i32 0
; VF16UF1-NEXT: br i1 [[TMP4]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; VF16UF1: [[PRED_STORE_IF]]:
-; VF16UF1-NEXT: [[TMP5:%.*]] = mul i64 0, [[STEP]]
-; VF16UF1-NEXT: [[TMP6:%.*]] = add i64 0, [[TMP5]]
-; VF16UF1-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], [[STEP]]
-; VF16UF1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP7]]
+; VF16UF1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[DST]], i64 [[STEP]]
; VF16UF1-NEXT: store i8 0, ptr [[TMP8]], align 1
; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE]]
; VF16UF1: [[PRED_STORE_CONTINUE]]:
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-predicate-switch.ll b/llvm/test/Transforms/LoopVectorize/vplan-predicate-switch.ll
index e60c3c40048f3..f0ef31d9ae23c 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-predicate-switch.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-predicate-switch.ll
@@ -20,67 +20,69 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) {
; CHECK-NEXT: Successor(s): vector.body
; CHECK-EMPTY:
; CHECK-NEXT: vector.body:
-; CHECK-NEXT: EMIT-SCALAR vp<[[CAN_IV:%.+]]> = phi [ ir<0>, vector.ph ], [ vp<[[CAN_IV_NEXT:%.+]]>, default.2 ]
+; CHECK-NEXT: EMIT-SCALAR vp<[[CAN_IV:%.+]]> = phi [ ir<0>, vector.ph ], [ vp<[[CAN_IV_NEXT:%.+]]>, pred.store.continue{{.*}} ]
; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, ir<2>, ir<1>
; CHECK-NEXT: EMIT vp<[[PTR:%.+]]> = ptradd ir<%start>, vp<[[CAN_IV]]>
; CHECK-NEXT: EMIT vp<[[PTR]]>.1 = ptradd ir<%start>, vp<[[STEPS]]>
-; CHECK-NEXT: EMIT vp<[[PTR_VEC:%.+]]> = buildvector vp<[[PTR]]>, vp<[[PTR]]>.1
; CHECK-NEXT: WIDEN ir<%l> = load vp<[[PTR]]>
; CHECK-NEXT: EMIT vp<[[C1:%.+]]> = icmp eq ir<%l>, ir<-12>
; CHECK-NEXT: EMIT vp<[[C2:%.+]]> = icmp eq ir<%l>, ir<13>
; CHECK-NEXT: EMIT vp<[[OR_CASES:%.+]]> = or vp<[[C1]]>, vp<[[C2]]>
; CHECK-NEXT: EMIT vp<[[DEFAULT_MASK:%.+]]> = not vp<[[OR_CASES]]>
-; CHECK-NEXT: Successor(s): pred.store
-; CHECK-EMPTY:
-; CHECK-NEXT: <xVFxUF> pred.store: {
-; CHECK-NEXT: pred.store.entry:
-; CHECK-NEXT: BRANCH-ON-MASK vp<[[C2]]>
-; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue
-; CHECK-EMPTY:
-; CHECK-NEXT: pred.store.if:
-; CHECK-NEXT: REPLICATE store ir<0>, vp<[[PTR_VEC]]>
-; CHECK-NEXT: Successor(s): pred.store.continue
-; CHECK-EMPTY:
-; CHECK-NEXT: pred.store.continue:
-; CHECK-NEXT: No successors
-; CHECK-NEXT: }
-; CHECK-NEXT: Successor(s): if.then.2.0
-; CHECK-EMPTY:
-; CHECK-NEXT: if.then.2.0:
-; CHECK-NEXT: Successor(s): pred.store
-; CHECK-EMPTY:
-; CHECK-NEXT: <xVFxUF> pred.store: {
-; CHECK-NEXT: pred.store.entry:
-; CHECK-NEXT: BRANCH-ON-MASK vp<[[C1]]>
-; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue
-; CHECK-EMPTY:
-; CHECK-NEXT: pred.store.if:
-; CHECK-NEXT: REPLICATE store ir<42>, vp<[[PTR_VEC]]>
-; CHECK-NEXT: Successor(s): pred.store.continue
-; CHECK-EMPTY:
-; CHECK-NEXT: pred.store.continue:
-; CHECK-NEXT: No successors
-; CHECK-NEXT: }
-; CHECK-NEXT: Successor(s): if.then.1.1
-; CHECK-EMPTY:
-; CHECK-NEXT: if.then.1.1:
-; CHECK-NEXT: Successor(s): pred.store
-; CHECK-EMPTY:
-; CHECK-NEXT: <xVFxUF> pred.store: {
-; CHECK-NEXT: pred.store.entry:
-; CHECK-NEXT: BRANCH-ON-MASK vp<[[DEFAULT_MASK]]>
-; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue
-; CHECK-EMPTY:
-; CHECK-NEXT: pred.store.if:
-; CHECK-NEXT: REPLICATE store ir<2>, vp<[[PTR_VEC]]>
-; CHECK-NEXT: Successor(s): pred.store.continue
-; CHECK-EMPTY:
-; CHECK-NEXT: pred.store.continue:
-; CHECK-NEXT: No successors
-; CHECK-NEXT: }
-; CHECK-NEXT: Successor(s): default.2
-; CHECK-EMPTY:
-; CHECK-NEXT: default.2:
+; CHECK-NEXT: EMIT vp<[[C2_LANE0:%.+]]> = extractelement vp<[[C2]]>, ir<0>
+; CHECK-NEXT: EMIT branch-on-cond vp<[[C2_LANE0]]>
+; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue
+; CHECK-EMPTY:
+; CHECK-NEXT: pred.store.if:
+; CHECK-NEXT: CLONE store ir<0>, vp<[[PTR]]>
+; CHECK-NEXT: Successor(s): pred.store.continue
+; CHECK-EMPTY:
+; CHECK-NEXT: pred.store.continue:
+; CHECK-NEXT: EMIT vp<[[C2_LANE1:%.+]]> = extractelement vp<[[C2]]>, ir<1>
+; CHECK-NEXT: EMIT branch-on-cond vp<[[C2_LANE1]]>
+; CHECK-NEXT: Successor(s): pred.store.if{{.*}}, pred.store.continue{{.*}}
+; CHECK-EMPTY:
+; CHECK-NEXT: pred.store.if{{.*}}:
+; CHECK-NEXT: CLONE store ir<0>, vp<[[PTR]]>.1
+; CHECK-NEXT: Successor(s): pred.store.continue{{.*}}
+; CHECK-EMPTY:
+; CHECK-NEXT: pred.store.continue{{.*}}:
+; CHECK-NEXT: EMIT vp<[[C1_LANE0:%.+]]> = extractelement vp<[[C1]]>, ir<0>
+; CHECK-NEXT: EMIT branch-on-cond vp<[[C1_LANE0]]>
+; CHECK-NEXT: Successor(s): pred.store.if{{.*}}, pred.store.continue{{.*}}
+; CHECK-EMPTY:
+; CHECK-NEXT: pred.store.if{{.*}}:
+; CHECK-NEXT: CLONE store ir<42>, vp<[[PTR]]>
+; CHECK-NEXT: Successor(s): pred.store.continue{{.*}}
+; CHECK-EMPTY:
+; CHECK-NEXT: pred.store.continue{{.*}}:
+; CHECK-NEXT: EMIT vp<[[C1_LANE1:%.+]]> = extractelement vp<[[C1]]>, ir<1>
+; CHECK-NEXT: EMIT branch-on-cond vp<[[C1_LANE1]]>
+; CHECK-NEXT: Successor(s): pred.store.if{{.*}}, pred.store.continue{{.*}}
+; CHECK-EMPTY:
+; CHECK-NEXT: pred.store.if{{.*}}:
+; CHECK-NEXT: CLONE store ir<42>, vp<[[PTR]]>.1
+; CHECK-NEXT: Successor(s): pred.store.continue{{.*}}
+; CHECK-EMPTY:
+; CHECK-NEXT: pred.store.continue{{.*}}:
+; CHECK-NEXT: EMIT vp<[[DEFAULT_MASK_LANE0:%.+]]> = extractelement vp<[[DEFAULT_MASK]]>, ir<0>
+; CHECK-NEXT: EMIT branch-on-cond vp<[[DEFAULT_MASK_LANE0]]>
+; CHECK-NEXT: Successor(s): pred.store.if{{.*}}, pred.store.continue{{.*}}
+; CHECK-EMPTY:
+; CHECK-NEXT: pred.store.if{{.*}}:
+; CHECK-NEXT: CLONE store ir<2>, vp<[[PTR]]>
+; CHECK-NEXT: Successor(s): pred.store.continue{{.*}}
+; CHECK-EMPTY:
+; CHECK-NEXT: pred.store.continue{{.*}}:
+; CHECK-NEXT: EMIT vp<[[DEFAULT_MASK_LANE1:%.+]]> = extractelement vp<[[DEFAULT_MASK]]>, ir<1>
+; CHECK-NEXT: EMIT branch-on-cond vp<[[DEFAULT_MASK_LANE1]]>
+; CHECK-NEXT: Successor(s): pred.store.if{{.*}}, pred.store.continue{{.*}}
+; CHECK-EMPTY:
+; CHECK-NEXT: pred.store.if{{.*}}:
+; CHECK-NEXT: CLONE store ir<2>, vp<[[PTR]]>.1
+; CHECK-NEXT: Successor(s): pred.store.continue{{.*}}
+; CHECK-EMPTY:
+; CHECK-NEXT: pred.store.continue{{.*}}:
; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, ir<2>
; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]>
; CHECK-NEXT: Successor(s): middle.block, vector.body
More information about the llvm-commits
mailing list