[llvm] [VPlan] Introduce CSE pass (PR #151872)
Ramkumar Ramachandra via llvm-commits
llvm-commits at lists.llvm.org
Sun Aug 3 10:34:48 PDT 2025
https://github.com/artagnon updated https://github.com/llvm/llvm-project/pull/151872
>From a95d0cd4a78e32dd23e3f35d9112485620044cd3 Mon Sep 17 00:00:00 2001
From: Pedro Lobo <pedro.lobo at tecnico.ulisboa.pt>
Date: Sun, 3 Aug 2025 16:10:20 +0100
Subject: [PATCH 1/2] [LV] Pre-commit test for cse_bitcast
---
.../LoopVectorize/X86/cse-bitcast.ll | 80 +++++++++++++++++++
1 file changed, 80 insertions(+)
create mode 100644 llvm/test/Transforms/LoopVectorize/X86/cse-bitcast.ll
diff --git a/llvm/test/Transforms/LoopVectorize/X86/cse-bitcast.ll b/llvm/test/Transforms/LoopVectorize/X86/cse-bitcast.ll
new file mode 100644
index 0000000000000..1480b3b7a36e1
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/X86/cse-bitcast.ll
@@ -0,0 +1,80 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --filter-out-after "^scalar.ph" --version 5
+; RUN: opt %s -passes=loop-vectorize -S | FileCheck %s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @cse_bitcast(i16 %val, half %fval, ptr %p, i64 %n) {
+; CHECK-LABEL: define void @cse_bitcast(
+; CHECK-SAME: i16 [[VAL:%.*]], half [[FVAL:%.*]], ptr [[P:%.*]], i64 [[N:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 60
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
+; CHECK: [[VECTOR_SCEVCHECK]]:
+; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1
+; CHECK-NEXT: [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[TMP0]])
+; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i64, i1 } [[MUL]], 0
+; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i64, i1 } [[MUL]], 1
+; CHECK-NEXT: [[TMP1:%.*]] = sub i64 0, [[MUL_RESULT]]
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[P]], i64 [[MUL_RESULT]]
+; CHECK-NEXT: [[TMP3:%.*]] = icmp ult ptr [[TMP2]], [[P]]
+; CHECK-NEXT: [[TMP4:%.*]] = or i1 [[TMP3]], [[MUL_OVERFLOW]]
+; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[P]], i64 2
+; CHECK-NEXT: [[MUL1:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[TMP0]])
+; CHECK-NEXT: [[MUL_RESULT2:%.*]] = extractvalue { i64, i1 } [[MUL1]], 0
+; CHECK-NEXT: [[MUL_OVERFLOW3:%.*]] = extractvalue { i64, i1 } [[MUL1]], 1
+; CHECK-NEXT: [[TMP5:%.*]] = sub i64 0, [[MUL_RESULT2]]
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[SCEVGEP]], i64 [[MUL_RESULT2]]
+; CHECK-NEXT: [[TMP7:%.*]] = icmp ult ptr [[TMP6]], [[SCEVGEP]]
+; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP7]], [[MUL_OVERFLOW3]]
+; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP4]], [[TMP8]]
+; CHECK-NEXT: br i1 [[TMP9]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 8
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
+; CHECK-NEXT: [[TMP10:%.*]] = mul i64 [[N_VEC]], 4
+; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[P]], i64 [[TMP10]]
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i16> poison, i16 [[VAL]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT]], <4 x i16> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <4 x half> poison, half [[FVAL]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT5:%.*]] = shufflevector <4 x half> [[BROADCAST_SPLATINSERT4]], <4 x half> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 4
+; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[OFFSET_IDX]], 16
+; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[P]], i64 [[OFFSET_IDX]]
+; CHECK-NEXT: [[NEXT_GEP6:%.*]] = getelementptr i8, ptr [[P]], i64 [[TMP12]]
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast <4 x i16> [[BROADCAST_SPLAT]] to <4 x half>
+; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x half> [[TMP13]], <4 x half> [[BROADCAST_SPLAT5]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x half> [[TMP14]], <8 x half> poison, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
+; CHECK-NEXT: store <8 x half> [[INTERLEAVED_VEC]], ptr [[NEXT_GEP]], align 2
+; CHECK-NEXT: [[TMP15:%.*]] = bitcast <4 x i16> [[BROADCAST_SPLAT]] to <4 x half>
+; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <4 x half> [[TMP15]], <4 x half> [[BROADCAST_SPLAT5]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[INTERLEAVED_VEC7:%.*]] = shufflevector <8 x half> [[TMP16]], <8 x half> poison, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
+; CHECK-NEXT: store <8 x half> [[INTERLEAVED_VEC7]], ptr [[NEXT_GEP6]], align 2
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
+; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[CMP_N]], [[END:label %.*]], label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+;
+entry:
+ br label %loop
+
+loop:
+ %res = phi ptr [ %p, %entry ], [ %res.next, %loop ]
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+ store i16 %val, ptr %res
+ %res.offset = getelementptr i16, ptr %res, i64 1
+ store half %fval, ptr %res.offset
+ %res.next = getelementptr i16, ptr %res, i64 2
+ %iv.next = add i64 %iv, 1
+ %exit.cond = icmp eq i64 %iv.next, %n
+ br i1 %exit.cond, label %end, label %loop
+
+end:
+ ret void
+}
>From cc22a76225b00353955796da32f99dee76f740a7 Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Sun, 3 Aug 2025 14:16:11 +0100
Subject: [PATCH 2/2] [VPlan] Introduce CSE pass
Requires #151487 to completely subsume the non-VPlan based limited CSE.
Inspired by #146856, although the test from that PR remains unchanged:
still investigating.
---
.../Transforms/Vectorize/LoopVectorize.cpp | 1 +
llvm/lib/Transforms/Vectorize/VPlan.cpp | 27 +++
.../Transforms/Vectorize/VPlanTransforms.cpp | 48 ++++++
.../Transforms/Vectorize/VPlanTransforms.h | 4 +
llvm/lib/Transforms/Vectorize/VPlanUtils.h | 9 +
llvm/lib/Transforms/Vectorize/VPlanValue.h | 7 +
.../epilog-vectorization-widen-inductions.ll | 5 +-
.../AArch64/force-target-instruction-cost.ll | 3 +-
.../LoopVectorize/AArch64/induction-costs.ll | 3 +-
.../LoopVectorize/AArch64/licm-calls.ll | 3 +-
.../AArch64/partial-reduce-dot-product.ll | 23 ++-
.../AArch64/reduction-recurrence-costs-sve.ll | 6 +-
.../LoopVectorize/AArch64/sve-epilog-vect.ll | 32 ++--
.../AArch64/sve-vscale-based-trip-counts.ll | 37 ++--
.../LoopVectorize/AArch64/sve-widen-phi.ll | 4 +-
.../Transforms/LoopVectorize/ARM/mve-qabs.ll | 8 +-
.../LoopVectorize/ARM/mve-reduction-types.ll | 15 +-
.../LoopVectorize/ARM/pointer_iv.ll | 16 +-
.../LoopVectorize/RISCV/inloop-reduction.ll | 6 +-
.../LoopVectorize/RISCV/scalable-tailfold.ll | 5 +-
.../LoopVectorize/RISCV/strided-accesses.ll | 10 +-
.../X86/cost-constant-known-via-scev.ll | 3 +-
.../LoopVectorize/X86/induction-costs.ll | 3 +-
.../LoopVectorize/X86/uniform_load.ll | 9 +-
.../first-order-recurrence-complex.ll | 6 +-
...t-order-recurrence-multiply-recurrences.ll | 3 +-
.../LoopVectorize/first-order-recurrence.ll | 42 ++---
.../Transforms/LoopVectorize/induction.ll | 80 +++++----
.../interleave-with-i65-induction.ll | 3 +-
...aved-accesses-different-insert-position.ll | 3 +-
.../Transforms/LoopVectorize/opaque-ptr.ll | 12 +-
.../LoopVectorize/pointer-induction.ll | 24 +--
llvm/test/Transforms/LoopVectorize/pr36983.ll | 36 +++-
.../pr59319-loop-access-info-invalidation.ll | 25 ++-
.../LoopVectorize/reverse_induction.ll | 9 +-
.../LoopVectorize/single-value-blend-phis.ll | 6 +-
...oop-backedge-elimination-branch-weights.ll | 3 +-
.../vector-loop-backedge-elimination.ll | 163 +++++++++---------
.../version-stride-with-integer-casts.ll | 6 +-
.../vplan-printing-before-execute.ll | 3 +-
40 files changed, 392 insertions(+), 319 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 20528089c0008..2603171f0eee6 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7308,6 +7308,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
VPlanTransforms::narrowInterleaveGroups(
BestVPlan, BestVF,
TTI.getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector));
+ VPlanTransforms::cse(BestVPlan, *Legal->getWidestInductionType());
VPlanTransforms::removeDeadRecipes(BestVPlan);
VPlanTransforms::convertToConcreteRecipes(BestVPlan,
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 25b9616880bf4..f4869074a440c 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -122,6 +122,33 @@ void VPDef::dump() const {
}
#endif
+bool VPValue::isIdenticalTo(const VPValue *Other) const {
+ if (getVPValueID() != Other->getVPValueID() ||
+ hasDefiningRecipe() != Other->hasDefiningRecipe() ||
+ getUnderlyingValue() != Other->getUnderlyingValue())
+ return false;
+ if (hasDefiningRecipe()) {
+ const VPRecipeBase *DefL = getDefiningRecipe();
+ const VPRecipeBase *DefR = Other->getDefiningRecipe();
+ if (vputils::getOpcode(*DefL) != vputils::getOpcode(*DefR) ||
+ DefL->getNumOperands() != DefR->getNumOperands())
+ return false;
+ return equal(DefL->operands(), DefR->operands());
+ }
+ return true;
+}
+
+hash_code llvm::hash_value(const VPValue &V) {
+ if (V.hasDefiningRecipe()) {
+ const VPRecipeBase *Def = V.getDefiningRecipe();
+ return hash_combine(vputils::getOpcode(*Def),
+ hash_combine_range(Def->operands()));
+ }
+ if (Value *U = V.getUnderlyingValue())
+ return hash_combine(V.getVPValueID(), U);
+ return hash_value(V.getVPValueID());
+}
+
VPRecipeBase *VPValue::getDefiningRecipe() {
return cast_or_null<VPRecipeBase>(Def);
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index fcbc86f5e4c58..9426a80969135 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1755,6 +1755,54 @@ void VPlanTransforms::clearReductionWrapFlags(VPlan &Plan) {
}
}
+/// Hash the underlying data of a VPSingleDefRecipe pointer, instead of hashing
+/// the pointer itself.
+namespace {
+struct CSEDenseMapInfo : public DenseMapInfo<VPSingleDefRecipe *> {
+ static unsigned getHashValue(const VPSingleDefRecipe *R) {
+ return hash_value(*R);
+ }
+
+ static bool isEqual(const VPSingleDefRecipe *LHS,
+ const VPSingleDefRecipe *RHS) {
+ if (LHS == getEmptyKey() || RHS == getEmptyKey() ||
+ LHS == getTombstoneKey() || RHS == getTombstoneKey())
+ return LHS == RHS;
+ return LHS->isIdenticalTo(RHS);
+ }
+};
+} // end anonymous namespace
+
+/// Perform a common-subexpression-elimination of VPSingleDefRecipes on the \p
+/// Plan.
+void VPlanTransforms::cse(VPlan &Plan, Type &CanonicalIVTy) {
+ DenseMap<VPSingleDefRecipe *, VPSingleDefRecipe *, CSEDenseMapInfo> CSEMap;
+ VPTypeAnalysis TypeInfo(&CanonicalIVTy);
+ // There is existing logic to sink instructions into replicate regions, and
+ // we'd be undoing that work if we went through replicate regions. Hence,
+ // don't CSE in replicate regions.
+ for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
+ vp_depth_first_shallow(Plan.getEntry()))) {
+ for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
+ auto *Def = dyn_cast<VPSingleDefRecipe>(&R);
+ if (!Def)
+ continue;
+ // Check if we can replace this instruction with any of the
+ // visited instructions.
+ if (VPSingleDefRecipe *V = CSEMap.lookup(Def)) {
+ // Due to other transforms like truncateToMinimalBitwidths, there is no
+ // embeeded type information that we can reliably look at.
+ if (TypeInfo.inferScalarType(Def) != TypeInfo.inferScalarType(V))
+ continue;
+ Def->replaceAllUsesWith(V);
+ Def->eraseFromParent();
+ } else {
+ CSEMap.insert_or_assign(Def, Def);
+ }
+ }
+ }
+}
+
/// Move loop-invariant recipes out of the vector loop region in \p Plan.
static void licm(VPlan &Plan) {
VPBasicBlock *Preheader = Plan.getVectorPreheader();
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index 880159f760922..f782a4e331091 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -240,6 +240,10 @@ struct VPlanTransforms {
/// removing dead edges to their successors.
static void removeBranchOnConst(VPlan &Plan);
+ /// Perform common-subexpression-elimination, which is best done after the \p
+ /// Plan is executed.
+ static void cse(VPlan &Plan, Type &CanonicalIVType);
+
/// If there's a single exit block, optimize its phi recipes that use exiting
/// IV values by feeding them precomputed end values instead, possibly taken
/// one step backwards.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.h b/llvm/lib/Transforms/Vectorize/VPlanUtils.h
index 8dcd57f1b3598..40bcd80930b3d 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUtils.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.h
@@ -10,6 +10,7 @@
#define LLVM_TRANSFORMS_VECTORIZE_VPLANUTILS_H
#include "VPlan.h"
+#include "llvm/ADT/TypeSwitch.h"
namespace llvm {
class ScalarEvolution;
@@ -37,6 +38,14 @@ VPValue *getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr,
/// SCEV expression could be constructed.
const SCEV *getSCEVExprForVPValue(VPValue *V, ScalarEvolution &SE);
+/// Get any instruction opcode data embedded in recipe \p R.
+inline std::optional<unsigned> getOpcode(const VPRecipeBase &R) {
+ return TypeSwitch<const VPRecipeBase *, std::optional<unsigned>>(&R)
+ .Case<VPInstruction, VPWidenRecipe, VPWidenCastRecipe, VPReplicateRecipe,
+ VPWidenSelectRecipe>([](auto *I) { return I->getOpcode(); })
+ .Default([](auto *) { return std::nullopt; });
+}
+
/// Returns true if \p VPV is a single scalar, either because it produces the
/// same value for all lanes or only has its first lane used.
inline bool isSingleScalar(const VPValue *VPV) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h
index 24f6d61512ef6..05bb8a385fae6 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanValue.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h
@@ -185,8 +185,15 @@ class LLVM_ABI_FOR_TEST VPValue {
assert(!UnderlyingVal && "Underlying Value is already set.");
UnderlyingVal = Val;
}
+
+ // Equality of data.
+ bool isIdenticalTo(const VPValue *Other) const;
};
+// Hash method so VPValue can be de-duplicated in certain
+// contexts.
+hash_code hash_value(const VPValue &Arg);
+
typedef DenseMap<Value *, VPValue *> Value2VPValueTy;
typedef DenseMap<VPValue *, Value *> VPValue2ValueTy;
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll
index 5b15896da8d78..87b8c4af1e0c7 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll
@@ -330,11 +330,10 @@ define void @test_widen_induction_step_2(ptr %A, i64 %N, i32 %step) {
; CHECK-NEXT: [[CMP_N11:%.*]] = icmp eq i64 [[N]], [[IND_END]]
; CHECK-NEXT: br i1 [[CMP_N11]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
; CHECK: vec.epilog.scalar.ph:
-; CHECK-NEXT: [[BC_RESUME_VAL5:%.*]] = phi i64 [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END4]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ]
-; CHECK-NEXT: [[BC_RESUME_VAL6:%.*]] = phi i64 [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END4]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL6:%.*]] = phi i64 [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END4]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
-; CHECK-NEXT: [[IV_1:%.*]] = phi i64 [ [[BC_RESUME_VAL5]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_1_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[IV_1:%.*]] = phi i64 [ [[BC_RESUME_VAL6]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_1_NEXT:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[IV_2:%.*]] = phi i64 [ [[BC_RESUME_VAL6]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_1_NEXT]], [[LOOP]] ]
; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV_1]]
; CHECK-NEXT: [[ADD:%.*]] = add i64 [[IV_2]], 10
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll
index c9cef142e484c..ab8896fc21f43 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll
@@ -23,12 +23,11 @@ define double @test_reduction_costs() {
; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ]
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ]
-; CHECK-NEXT: [[BC_MERGE_RDX2:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ]
; CHECK-NEXT: br label %[[LOOP_1:.*]]
; CHECK: [[LOOP_1]]:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_1]] ]
; CHECK-NEXT: [[R_1:%.*]] = phi double [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[R_1_NEXT:%.*]], %[[LOOP_1]] ]
-; CHECK-NEXT: [[R_2:%.*]] = phi double [ [[BC_MERGE_RDX2]], %[[SCALAR_PH]] ], [ [[R_2_NEXT:%.*]], %[[LOOP_1]] ]
+; CHECK-NEXT: [[R_2:%.*]] = phi double [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[R_2_NEXT:%.*]], %[[LOOP_1]] ]
; CHECK-NEXT: [[R_1_NEXT]] = fadd double [[R_1]], 3.000000e+00
; CHECK-NEXT: [[R_2_NEXT]] = fadd double [[R_2]], 9.000000e+00
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll
index aa2ec2de14c29..26441518bc683 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll
@@ -294,7 +294,6 @@ define i64 @test_ptr_ivs_and_widened_ivs(ptr %src, i32 %N) {
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[N_VEC]], 4
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP3]]
-; CHECK-NEXT: [[IND_END1:%.*]] = trunc i64 [[N_VEC]] to i32
; CHECK-NEXT: [[IND_END3:%.*]] = trunc i64 [[N_VEC]] to i32
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
@@ -321,7 +320,7 @@ define i64 @test_ptr_ivs_and_widened_ivs(ptr %src, i32 %N) {
; CHECK: scalar.ph:
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[SRC]], [[ENTRY]] ]
-; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i32 [ [[IND_END1]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i32 [ [[IND_END3]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
; CHECK-NEXT: [[BC_RESUME_VAL4:%.*]] = phi i32 [ [[IND_END3]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/licm-calls.ll b/llvm/test/Transforms/LoopVectorize/AArch64/licm-calls.ll
index 8c4eba61b6ba2..09a1c17087af2 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/licm-calls.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/licm-calls.ll
@@ -9,9 +9,8 @@ define void @licm_replicate_call(double %x, ptr %dst) {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
-; CHECK-NEXT: [[TMP0:%.*]] = tail call double @llvm.pow.f64(double [[X]], double 3.000000e+00)
; CHECK-NEXT: [[TMP1:%.*]] = tail call double @llvm.pow.f64(double [[X]], double 3.000000e+00)
-; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[TMP0]], i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[TMP1]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[TMP1]], i32 1
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll
index 5218e64cddd80..7f511c59a46bd 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll
@@ -1003,10 +1003,6 @@ define i32 @not_dotp_not_phi(ptr %a, ptr %b) #0 {
; CHECK-INTERLEAVE1-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
; CHECK-INTERLEAVE1: middle.block:
-; CHECK-INTERLEAVE1-NEXT: [[TMP19:%.*]] = call i32 @llvm.vscale.i32()
-; CHECK-INTERLEAVE1-NEXT: [[TMP20:%.*]] = mul nuw i32 [[TMP19]], 8
-; CHECK-INTERLEAVE1-NEXT: [[TMP21:%.*]] = sub i32 [[TMP20]], 1
-; CHECK-INTERLEAVE1-NEXT: [[TMP22:%.*]] = extractelement <vscale x 8 x i32> [[TMP17]], i32 [[TMP21]]
; CHECK-INTERLEAVE1-NEXT: [[TMP23:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-INTERLEAVE1-NEXT: [[TMP24:%.*]] = mul nuw i32 [[TMP23]], 8
; CHECK-INTERLEAVE1-NEXT: [[TMP25:%.*]] = sub i32 [[TMP24]], 1
@@ -1049,10 +1045,6 @@ define i32 @not_dotp_not_phi(ptr %a, ptr %b) #0 {
; CHECK-INTERLEAVED-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-INTERLEAVED-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
; CHECK-INTERLEAVED: middle.block:
-; CHECK-INTERLEAVED-NEXT: [[TMP23:%.*]] = call i32 @llvm.vscale.i32()
-; CHECK-INTERLEAVED-NEXT: [[TMP31:%.*]] = mul nuw i32 [[TMP23]], 8
-; CHECK-INTERLEAVED-NEXT: [[TMP25:%.*]] = sub i32 [[TMP31]], 1
-; CHECK-INTERLEAVED-NEXT: [[TMP26:%.*]] = extractelement <vscale x 8 x i32> [[TMP21]], i32 [[TMP25]]
; CHECK-INTERLEAVED-NEXT: [[TMP27:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-INTERLEAVED-NEXT: [[TMP28:%.*]] = mul nuw i32 [[TMP27]], 8
; CHECK-INTERLEAVED-NEXT: [[TMP29:%.*]] = sub i32 [[TMP28]], 1
@@ -1089,10 +1081,6 @@ define i32 @not_dotp_not_phi(ptr %a, ptr %b) #0 {
; CHECK-MAXBW-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-MAXBW-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
; CHECK-MAXBW: middle.block:
-; CHECK-MAXBW-NEXT: [[TMP26:%.*]] = call i32 @llvm.vscale.i32()
-; CHECK-MAXBW-NEXT: [[TMP27:%.*]] = mul nuw i32 [[TMP26]], 8
-; CHECK-MAXBW-NEXT: [[TMP28:%.*]] = sub i32 [[TMP27]], 1
-; CHECK-MAXBW-NEXT: [[TMP29:%.*]] = extractelement <vscale x 8 x i32> [[TMP21]], i32 [[TMP28]]
; CHECK-MAXBW-NEXT: [[TMP23:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-MAXBW-NEXT: [[TMP24:%.*]] = mul nuw i32 [[TMP23]], 8
; CHECK-MAXBW-NEXT: [[TMP25:%.*]] = sub i32 [[TMP24]], 1
@@ -1852,6 +1840,17 @@ define i64 @dotp_cost_disagreement(ptr %a, ptr %b) #0 {
; CHECK-MAXBW-NEXT: [[TMP9:%.*]] = zext <vscale x 8 x i8> [[WIDE_LOAD]] to <vscale x 8 x i64>
; CHECK-MAXBW-NEXT: [[TMP10:%.*]] = add nuw nsw i64 [[INDEX]], 1
; CHECK-MAXBW-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 [[TMP10]]
+; CHECK-MAXBW-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 8 x i8>, ptr [[TMP11]], align 1
+; CHECK-MAXBW-NEXT: [[TMP12:%.*]] = zext <vscale x 8 x i8> [[WIDE_LOAD1]] to <vscale x 8 x i64>
+; CHECK-MAXBW-NEXT: [[TMP15:%.*]] = mul nuw nsw <vscale x 8 x i64> [[TMP12]], [[TMP9]]
+; CHECK-MAXBW-NEXT: [[TMP14]] = add <vscale x 8 x i64> [[VEC_PHI]], [[TMP15]]
+; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
+; CHECK-MAXBW-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-MAXBW-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
+; CHECK-MAXBW: middle.block:
+; CHECK-MAXBW-NEXT: [[TMP16:%.*]] = call i64 @llvm.vector.reduce.add.nxv8i64(<vscale x 8 x i64> [[TMP14]])
+; CHECK-MAXBW-NEXT: [[CMP_N:%.*]] = icmp eq i64 41, [[N_VEC]]
+; CHECK-MAXBW-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
; CHECK-MAXBW: scalar.ph:
;
entry:
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll b/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll
index 08d35f71e7cc3..366d4fe1adf04 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll
@@ -251,15 +251,13 @@ define i32 @chained_recurrences(i32 %x, i64 %y, ptr %src.1, i32 %z, ptr %src.2)
; PRED-NEXT: br label %[[EXIT:.*]]
; PRED: [[SCALAR_PH]]:
; PRED-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, %[[ENTRY]] ]
-; PRED-NEXT: [[SCALAR_RECUR_INIT8:%.*]] = phi i32 [ 0, %[[ENTRY]] ]
; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ]
-; PRED-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, %[[ENTRY]] ]
; PRED-NEXT: br label %[[LOOP:.*]]
; PRED: [[LOOP]]:
; PRED-NEXT: [[TMP45:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], %[[SCALAR_PH]] ], [ [[TMP53:%.*]], %[[LOOP]] ]
-; PRED-NEXT: [[SCALAR_RECUR10:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT8]], %[[SCALAR_PH]] ], [ [[TMP45]], %[[LOOP]] ]
+; PRED-NEXT: [[SCALAR_RECUR10:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], %[[SCALAR_PH]] ], [ [[TMP45]], %[[LOOP]] ]
; PRED-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT1:%.*]], %[[LOOP]] ]
-; PRED-NEXT: [[SUM_RED:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[RED_2:%.*]], %[[LOOP]] ]
+; PRED-NEXT: [[SUM_RED:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], %[[SCALAR_PH]] ], [ [[RED_2:%.*]], %[[LOOP]] ]
; PRED-NEXT: [[TMP52:%.*]] = add i64 [[Y]], 1
; PRED-NEXT: [[GEP_1:%.*]] = getelementptr i32, ptr [[SRC_1]], i64 [[TMP52]]
; PRED-NEXT: [[TMP53]] = load i32, ptr [[GEP_1]], align 4
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll
index 0f407cd565457..a5f132b05e27e 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll
@@ -564,11 +564,11 @@ define void @trip_count_vscale(ptr noalias %a, ptr noalias %b) vscale_range(1, 1
; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <vscale x 4 x float>, ptr [[TMP17]], align 4
; CHECK-NEXT: [[TMP18:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD]], [[WIDE_LOAD3]]
; CHECK-NEXT: [[TMP19:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD2]], [[WIDE_LOAD4]]
-; CHECK-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP21:%.*]] = mul nuw i64 [[TMP20]], 4
-; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw float, ptr [[TMP13]], i64 [[TMP21]]
+; CHECK-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP22:%.*]] = mul nuw i64 [[TMP21]], 4
+; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw float, ptr [[TMP13]], i64 [[TMP22]]
; CHECK-NEXT: store <vscale x 4 x float> [[TMP18]], ptr [[TMP13]], align 4
-; CHECK-NEXT: store <vscale x 4 x float> [[TMP19]], ptr [[TMP22]], align 4
+; CHECK-NEXT: store <vscale x 4 x float> [[TMP19]], ptr [[TMP20]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]]
; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
@@ -640,11 +640,11 @@ define void @trip_count_vscale(ptr noalias %a, ptr noalias %b) vscale_range(1, 1
; CHECK-VF8-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 4 x float>, ptr [[TMP15]], align 4
; CHECK-VF8-NEXT: [[TMP16:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD]], [[WIDE_LOAD2]]
; CHECK-VF8-NEXT: [[TMP17:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD1]], [[WIDE_LOAD3]]
-; CHECK-VF8-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-VF8-NEXT: [[TMP19:%.*]] = mul nuw i64 [[TMP18]], 4
-; CHECK-VF8-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw float, ptr [[TMP11]], i64 [[TMP19]]
+; CHECK-VF8-NEXT: [[TMP19:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-VF8-NEXT: [[TMP20:%.*]] = mul nuw i64 [[TMP19]], 4
+; CHECK-VF8-NEXT: [[TMP18:%.*]] = getelementptr inbounds nuw float, ptr [[TMP11]], i64 [[TMP20]]
; CHECK-VF8-NEXT: store <vscale x 4 x float> [[TMP16]], ptr [[TMP11]], align 4
-; CHECK-VF8-NEXT: store <vscale x 4 x float> [[TMP17]], ptr [[TMP20]], align 4
+; CHECK-VF8-NEXT: store <vscale x 4 x float> [[TMP17]], ptr [[TMP18]], align 4
; CHECK-VF8-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
; CHECK-VF8-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-VF8-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
@@ -716,11 +716,11 @@ define void @trip_count_vscale_no_epilogue_iterations(ptr noalias %a, ptr noalia
; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <vscale x 4 x float>, ptr [[TMP17]], align 4
; CHECK-NEXT: [[TMP18:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD]], [[WIDE_LOAD3]]
; CHECK-NEXT: [[TMP19:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD2]], [[WIDE_LOAD4]]
-; CHECK-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP21:%.*]] = mul nuw i64 [[TMP20]], 4
-; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw float, ptr [[TMP13]], i64 [[TMP21]]
+; CHECK-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP22:%.*]] = mul nuw i64 [[TMP21]], 4
+; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw float, ptr [[TMP13]], i64 [[TMP22]]
; CHECK-NEXT: store <vscale x 4 x float> [[TMP18]], ptr [[TMP13]], align 4
-; CHECK-NEXT: store <vscale x 4 x float> [[TMP19]], ptr [[TMP22]], align 4
+; CHECK-NEXT: store <vscale x 4 x float> [[TMP19]], ptr [[TMP20]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]]
; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
@@ -792,11 +792,11 @@ define void @trip_count_vscale_no_epilogue_iterations(ptr noalias %a, ptr noalia
; CHECK-VF8-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 4 x float>, ptr [[TMP15]], align 4
; CHECK-VF8-NEXT: [[TMP16:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD]], [[WIDE_LOAD2]]
; CHECK-VF8-NEXT: [[TMP17:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD1]], [[WIDE_LOAD3]]
-; CHECK-VF8-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-VF8-NEXT: [[TMP19:%.*]] = mul nuw i64 [[TMP18]], 4
-; CHECK-VF8-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw float, ptr [[TMP11]], i64 [[TMP19]]
+; CHECK-VF8-NEXT: [[TMP19:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-VF8-NEXT: [[TMP20:%.*]] = mul nuw i64 [[TMP19]], 4
+; CHECK-VF8-NEXT: [[TMP18:%.*]] = getelementptr inbounds nuw float, ptr [[TMP11]], i64 [[TMP20]]
; CHECK-VF8-NEXT: store <vscale x 4 x float> [[TMP16]], ptr [[TMP11]], align 4
-; CHECK-VF8-NEXT: store <vscale x 4 x float> [[TMP17]], ptr [[TMP20]], align 4
+; CHECK-VF8-NEXT: store <vscale x 4 x float> [[TMP17]], ptr [[TMP18]], align 4
; CHECK-VF8-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
; CHECK-VF8-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-VF8-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vscale-based-trip-counts.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vscale-based-trip-counts.ll
index 352f4fe3dae21..cd22c9b4e0fdd 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vscale-based-trip-counts.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vscale-based-trip-counts.ll
@@ -84,11 +84,8 @@ define void @vscale_mul_8(ptr noalias noundef readonly captures(none) %a, ptr n
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 4 x float>, ptr [[TMP16]], align 4
; CHECK-NEXT: [[TMP17:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD]], [[WIDE_LOAD2]]
; CHECK-NEXT: [[TMP18:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD1]], [[WIDE_LOAD3]]
-; CHECK-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP21:%.*]] = mul nuw i64 [[TMP20]], 4
-; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[TMP21]]
; CHECK-NEXT: store <vscale x 4 x float> [[TMP17]], ptr [[B]], align 4
-; CHECK-NEXT: store <vscale x 4 x float> [[TMP18]], ptr [[TMP22]], align 4
+; CHECK-NEXT: store <vscale x 4 x float> [[TMP18]], ptr [[TMP16]], align 4
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[MUL1]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_COND_CLEANUP:.*]], label %[[FOR_BODY:.*]]
; CHECK: [[FOR_COND_CLEANUP]]:
@@ -230,11 +227,11 @@ define void @vscale_mul_31(ptr noalias noundef readonly captures(none) %a, ptr n
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 4 x float>, ptr [[TMP16]], align 4
; CHECK-NEXT: [[TMP17:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD]], [[WIDE_LOAD2]]
; CHECK-NEXT: [[TMP18:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD1]], [[WIDE_LOAD3]]
-; CHECK-NEXT: [[TMP19:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP20:%.*]] = mul nuw i64 [[TMP19]], 4
-; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw float, ptr [[TMP12]], i64 [[TMP20]]
+; CHECK-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP21:%.*]] = mul nuw i64 [[TMP20]], 4
+; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw float, ptr [[TMP12]], i64 [[TMP21]]
; CHECK-NEXT: store <vscale x 4 x float> [[TMP17]], ptr [[TMP12]], align 4
-; CHECK-NEXT: store <vscale x 4 x float> [[TMP18]], ptr [[TMP21]], align 4
+; CHECK-NEXT: store <vscale x 4 x float> [[TMP18]], ptr [[TMP19]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]]
; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
@@ -313,11 +310,11 @@ define void @vscale_mul_64(ptr noalias noundef readonly captures(none) %a, ptr n
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 4 x float>, ptr [[TMP16]], align 4
; CHECK-NEXT: [[TMP17:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD]], [[WIDE_LOAD2]]
; CHECK-NEXT: [[TMP18:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD1]], [[WIDE_LOAD3]]
-; CHECK-NEXT: [[TMP19:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP20:%.*]] = mul nuw i64 [[TMP19]], 4
-; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw float, ptr [[TMP12]], i64 [[TMP20]]
+; CHECK-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP21:%.*]] = mul nuw i64 [[TMP20]], 4
+; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw float, ptr [[TMP12]], i64 [[TMP21]]
; CHECK-NEXT: store <vscale x 4 x float> [[TMP17]], ptr [[TMP12]], align 4
-; CHECK-NEXT: store <vscale x 4 x float> [[TMP18]], ptr [[TMP21]], align 4
+; CHECK-NEXT: store <vscale x 4 x float> [[TMP18]], ptr [[TMP19]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]]
; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
@@ -398,11 +395,11 @@ define void @trip_count_with_overflow(ptr noalias noundef readonly captures(none
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 4 x float>, ptr [[TMP17]], align 4
; CHECK-NEXT: [[TMP18:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD]], [[WIDE_LOAD2]]
; CHECK-NEXT: [[TMP19:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD1]], [[WIDE_LOAD3]]
-; CHECK-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP21:%.*]] = mul nuw i64 [[TMP20]], 4
-; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw float, ptr [[TMP13]], i64 [[TMP21]]
+; CHECK-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP22:%.*]] = mul nuw i64 [[TMP21]], 4
+; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw float, ptr [[TMP13]], i64 [[TMP22]]
; CHECK-NEXT: store <vscale x 4 x float> [[TMP18]], ptr [[TMP13]], align 4
-; CHECK-NEXT: store <vscale x 4 x float> [[TMP19]], ptr [[TMP22]], align 4
+; CHECK-NEXT: store <vscale x 4 x float> [[TMP19]], ptr [[TMP20]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]]
; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
@@ -479,11 +476,11 @@ define void @trip_count_too_big_for_element_count(ptr noalias noundef readonly c
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 4 x float>, ptr [[TMP17]], align 4
; CHECK-NEXT: [[TMP18:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD]], [[WIDE_LOAD2]]
; CHECK-NEXT: [[TMP19:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD1]], [[WIDE_LOAD3]]
-; CHECK-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP21:%.*]] = mul nuw i64 [[TMP20]], 4
-; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw float, ptr [[TMP13]], i64 [[TMP21]]
+; CHECK-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP22:%.*]] = mul nuw i64 [[TMP21]], 4
+; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw float, ptr [[TMP13]], i64 [[TMP22]]
; CHECK-NEXT: store <vscale x 4 x float> [[TMP18]], ptr [[TMP13]], align 4
-; CHECK-NEXT: store <vscale x 4 x float> [[TMP19]], ptr [[TMP22]], align 4
+; CHECK-NEXT: store <vscale x 4 x float> [[TMP19]], ptr [[TMP20]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]]
; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll
index 6947884efb699..411e63309da75 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll
@@ -148,8 +148,8 @@ define void @widen_2ptrs_phi_unrolled(ptr noalias nocapture %dst, ptr noalias no
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 2
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[OFFSET_IDX]]
-; CHECK-NEXT: [[OFFSET_IDX4:%.*]] = shl i64 [[INDEX]], 2
-; CHECK-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[DST]], i64 [[OFFSET_IDX4]]
+; CHECK-NEXT: [[OFFSET_IDX1:%.*]] = shl i64 [[INDEX]], 2
+; CHECK-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[DST]], i64 [[OFFSET_IDX1]]
; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[DOTIDX:%.*]] = shl nuw nsw i64 [[TMP7]], 4
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 [[DOTIDX]]
diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-qabs.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-qabs.ll
index 0595bb31a58d8..2081b6c457c48 100644
--- a/llvm/test/Transforms/LoopVectorize/ARM/mve-qabs.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-qabs.ll
@@ -120,8 +120,8 @@ define void @arm_abs_q15(ptr nocapture readonly %pSrc, ptr nocapture %pDst, i32
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i32 [[INDEX]], 1
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PSRC]], i32 [[OFFSET_IDX]]
-; CHECK-NEXT: [[OFFSET_IDX7:%.*]] = shl i32 [[INDEX]], 1
-; CHECK-NEXT: [[NEXT_GEP8:%.*]] = getelementptr i8, ptr [[PDST]], i32 [[OFFSET_IDX7]]
+; CHECK-NEXT: [[OFFSET_IDX3:%.*]] = shl i32 [[INDEX]], 1
+; CHECK-NEXT: [[NEXT_GEP8:%.*]] = getelementptr i8, ptr [[PDST]], i32 [[OFFSET_IDX3]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, ptr [[NEXT_GEP]], align 2
; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt <8 x i16> [[WIDE_LOAD]], zeroinitializer
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <8 x i16> [[WIDE_LOAD]], splat (i16 -32768)
@@ -215,8 +215,8 @@ define void @arm_abs_q31(ptr nocapture readonly %pSrc, ptr nocapture %pDst, i32
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i32 [[INDEX]], 2
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PSRC]], i32 [[OFFSET_IDX]]
-; CHECK-NEXT: [[OFFSET_IDX7:%.*]] = shl i32 [[INDEX]], 2
-; CHECK-NEXT: [[NEXT_GEP8:%.*]] = getelementptr i8, ptr [[PDST]], i32 [[OFFSET_IDX7]]
+; CHECK-NEXT: [[OFFSET_IDX3:%.*]] = shl i32 [[INDEX]], 2
+; CHECK-NEXT: [[NEXT_GEP8:%.*]] = getelementptr i8, ptr [[PDST]], i32 [[OFFSET_IDX3]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[NEXT_GEP]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt <4 x i32> [[WIDE_LOAD]], zeroinitializer
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], splat (i32 -2147483648)
diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-types.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-types.ll
index 83cb3250fe87b..5bbe7738996df 100644
--- a/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-types.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-types.ll
@@ -36,11 +36,10 @@ define i32 @mla_i32(ptr noalias nocapture readonly %A, ptr noalias nocapture rea
; CHECK: middle.block:
; CHECK-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]]
; CHECK: scalar.ph:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ]
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
-; CHECK-NEXT: [[I_011:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
+; CHECK-NEXT: [[I_011:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
; CHECK-NEXT: [[RES_010:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i32 [[I_011]]
; CHECK-NEXT: [[TMP12:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
@@ -116,11 +115,10 @@ define i32 @mla_i8(ptr noalias nocapture readonly %A, ptr noalias nocapture read
; CHECK: middle.block:
; CHECK-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]]
; CHECK: scalar.ph:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ]
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
-; CHECK-NEXT: [[I_011:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
+; CHECK-NEXT: [[I_011:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
; CHECK-NEXT: [[RES_010:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i32 [[I_011]]
; CHECK-NEXT: [[TMP12:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
@@ -191,11 +189,10 @@ define i32 @add_i32(ptr nocapture readonly %x, i32 %n) #0 {
; CHECK: middle.block:
; CHECK-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]]
; CHECK: scalar.ph:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ]
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
-; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
+; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
; CHECK-NEXT: [[R_07:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[X]], i32 [[I_08]]
; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
@@ -386,11 +383,10 @@ define i32 @or_i32(ptr nocapture readonly %x, i32 %n) #0 {
; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP4]])
; CHECK-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]]
; CHECK: scalar.ph:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ]
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
-; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
+; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
; CHECK-NEXT: [[R_07:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[X]], i32 [[I_08]]
; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
@@ -451,11 +447,10 @@ define i32 @xor_i32(ptr nocapture readonly %x, i32 %n) #0 {
; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> [[TMP4]])
; CHECK-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]]
; CHECK: scalar.ph:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ]
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
-; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
+; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
; CHECK-NEXT: [[R_07:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[X]], i32 [[I_08]]
; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
diff --git a/llvm/test/Transforms/LoopVectorize/ARM/pointer_iv.ll b/llvm/test/Transforms/LoopVectorize/ARM/pointer_iv.ll
index 7c6e7053b4c32..3238b6d031641 100644
--- a/llvm/test/Transforms/LoopVectorize/ARM/pointer_iv.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/pointer_iv.ll
@@ -14,8 +14,8 @@ define hidden void @pointer_phi_v4i32_add1(ptr noalias nocapture readonly %A, pt
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i32 [[INDEX]], 2
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i32 [[OFFSET_IDX]]
-; CHECK-NEXT: [[OFFSET_IDX4:%.*]] = shl i32 [[INDEX]], 2
-; CHECK-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[B:%.*]], i32 [[OFFSET_IDX4]]
+; CHECK-NEXT: [[OFFSET_IDX1:%.*]] = shl i32 [[INDEX]], 2
+; CHECK-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[B:%.*]], i32 [[OFFSET_IDX1]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[NEXT_GEP]], align 4
; CHECK-NEXT: [[TMP0:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: store <4 x i32> [[TMP0]], ptr [[NEXT_GEP5]], align 4
@@ -162,8 +162,8 @@ define hidden void @pointer_phi_v8i16_add1(ptr noalias nocapture readonly %A, pt
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i32 [[INDEX]], 1
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i32 [[OFFSET_IDX]]
-; CHECK-NEXT: [[OFFSET_IDX4:%.*]] = shl i32 [[INDEX]], 1
-; CHECK-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[B:%.*]], i32 [[OFFSET_IDX4]]
+; CHECK-NEXT: [[OFFSET_IDX1:%.*]] = shl i32 [[INDEX]], 1
+; CHECK-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[B:%.*]], i32 [[OFFSET_IDX1]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, ptr [[NEXT_GEP]], align 2
; CHECK-NEXT: [[TMP1:%.*]] = add <8 x i16> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: store <8 x i16> [[TMP1]], ptr [[NEXT_GEP5]], align 2
@@ -447,8 +447,8 @@ define hidden void @pointer_phi_v4f32_add1(ptr noalias nocapture readonly %A, pt
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i32 [[INDEX]], 2
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i32 [[OFFSET_IDX]]
-; CHECK-NEXT: [[OFFSET_IDX4:%.*]] = shl i32 [[INDEX]], 2
-; CHECK-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[B:%.*]], i32 [[OFFSET_IDX4]]
+; CHECK-NEXT: [[OFFSET_IDX1:%.*]] = shl i32 [[INDEX]], 2
+; CHECK-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[B:%.*]], i32 [[OFFSET_IDX1]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[NEXT_GEP]], align 4
; CHECK-NEXT: [[TMP0:%.*]] = fadd fast <4 x float> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: store <4 x float> [[TMP0]], ptr [[NEXT_GEP5]], align 4
@@ -594,8 +594,8 @@ define hidden void @pointer_phi_v4half_add1(ptr noalias nocapture readonly %A, p
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i32 [[INDEX]], 1
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i32 [[OFFSET_IDX]]
-; CHECK-NEXT: [[OFFSET_IDX4:%.*]] = shl i32 [[INDEX]], 1
-; CHECK-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[B:%.*]], i32 [[OFFSET_IDX4]]
+; CHECK-NEXT: [[OFFSET_IDX1:%.*]] = shl i32 [[INDEX]], 1
+; CHECK-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[B:%.*]], i32 [[OFFSET_IDX1]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x half>, ptr [[NEXT_GEP]], align 4
; CHECK-NEXT: [[TMP0:%.*]] = fadd fast <8 x half> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: store <8 x half> [[TMP0]], ptr [[NEXT_GEP5]], align 4
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll
index 6e2434aefce9d..bb74993aadce5 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll
@@ -147,11 +147,10 @@ define i32 @add_i16_i32(ptr nocapture readonly %x, i32 %n) {
; IF-EVL-OUTLOOP-NEXT: [[TMP12:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP10]])
; IF-EVL-OUTLOOP-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]]
; IF-EVL-OUTLOOP: scalar.ph:
-; IF-EVL-OUTLOOP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ]
; IF-EVL-OUTLOOP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ]
; IF-EVL-OUTLOOP-NEXT: br label [[FOR_BODY:%.*]]
; IF-EVL-OUTLOOP: for.body:
-; IF-EVL-OUTLOOP-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
+; IF-EVL-OUTLOOP-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
; IF-EVL-OUTLOOP-NEXT: [[R_07:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
; IF-EVL-OUTLOOP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[X]], i32 [[I_08]]
; IF-EVL-OUTLOOP-NEXT: [[TMP13:%.*]] = load i16, ptr [[ARRAYIDX]], align 2
@@ -200,11 +199,10 @@ define i32 @add_i16_i32(ptr nocapture readonly %x, i32 %n) {
; IF-EVL-INLOOP: middle.block:
; IF-EVL-INLOOP-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]]
; IF-EVL-INLOOP: scalar.ph:
-; IF-EVL-INLOOP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ]
; IF-EVL-INLOOP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ]
; IF-EVL-INLOOP-NEXT: br label [[FOR_BODY:%.*]]
; IF-EVL-INLOOP: for.body:
-; IF-EVL-INLOOP-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
+; IF-EVL-INLOOP-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
; IF-EVL-INLOOP-NEXT: [[R_07:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
; IF-EVL-INLOOP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[X]], i32 [[I_08]]
; IF-EVL-INLOOP-NEXT: [[TMP13:%.*]] = load i16, ptr [[ARRAYIDX]], align 2
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll
index ed507961ef825..978051542c641 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll
@@ -168,11 +168,10 @@ define i64 @indexed_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i64
; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> [[TMP11]])
; CHECK-NEXT: br label [[FOR_END:%.*]]
; CHECK: scalar.ph:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ]
-; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ 0, [[ENTRY]] ]
+; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
-; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[SUM:%.*]] = phi i64 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[SUM_NEXT:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[BADDR:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]]
; CHECK-NEXT: [[AIDX:%.*]] = load i64, ptr [[BADDR]], align 8
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll
index 80f027452c3c1..263bc8f38554e 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll
@@ -341,11 +341,10 @@ define void @single_stride_int_iv(ptr %p, i64 %stride) {
; NOSTRIDED-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
; NOSTRIDED-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
; NOSTRIDED: scalar.ph:
-; NOSTRIDED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
-; NOSTRIDED-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
+; NOSTRIDED-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
; NOSTRIDED-NEXT: br label [[LOOP:%.*]]
; NOSTRIDED: loop:
-; NOSTRIDED-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
+; NOSTRIDED-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
; NOSTRIDED-NEXT: [[OFFSET:%.*]] = phi i64 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[OFFSET_NEXT:%.*]], [[LOOP]] ]
; NOSTRIDED-NEXT: [[Q0:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET]]
; NOSTRIDED-NEXT: [[X0:%.*]] = load i32, ptr [[Q0]], align 4
@@ -642,11 +641,10 @@ define void @double_stride_int_iv(ptr %p, ptr %p2, i64 %stride) {
; NOSTRIDED-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
; NOSTRIDED-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
; NOSTRIDED: scalar.ph:
-; NOSTRIDED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
-; NOSTRIDED-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
+; NOSTRIDED-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
; NOSTRIDED-NEXT: br label [[LOOP:%.*]]
; NOSTRIDED: loop:
-; NOSTRIDED-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
+; NOSTRIDED-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
; NOSTRIDED-NEXT: [[OFFSET:%.*]] = phi i64 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[OFFSET_NEXT:%.*]], [[LOOP]] ]
; NOSTRIDED-NEXT: [[Q0:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET]]
; NOSTRIDED-NEXT: [[X0:%.*]] = load i32, ptr [[Q0]], align 4
diff --git a/llvm/test/Transforms/LoopVectorize/X86/cost-constant-known-via-scev.ll b/llvm/test/Transforms/LoopVectorize/X86/cost-constant-known-via-scev.ll
index 04e0dafba6b86..7ae83d484e831 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/cost-constant-known-via-scev.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/cost-constant-known-via-scev.ll
@@ -98,11 +98,10 @@ define i64 @second_lshr_operand_zero_via_scev() {
; CHECK-NEXT: br label %[[EXIT:.*]]
; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ]
-; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ]
; CHECK-NEXT: br label %[[LOOPS:.*]]
; CHECK: [[LOOPS]]:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOPS]] ]
-; CHECK-NEXT: [[RED:%.*]] = phi i64 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[RED_NEXT:%.*]], %[[LOOPS]] ]
+; CHECK-NEXT: [[RED:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[RED_NEXT:%.*]], %[[LOOPS]] ]
; CHECK-NEXT: [[C:%.*]] = icmp eq i64 [[IV]], 0
; CHECK-NEXT: [[AND:%.*]] = and i64 [[IV]], 0
; CHECK-NEXT: [[TMP14:%.*]] = trunc i64 [[IV]] to i32
diff --git a/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll b/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll
index fcd94f444e8a5..35a52cf7387c5 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll
@@ -250,7 +250,6 @@ define void @multiple_pointer_ivs_with_scalar_uses_only(ptr %A, ptr %B) #0 {
; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[A]], i64 8589934368
-; CHECK-NEXT: [[IND_END3:%.*]] = getelementptr i8, ptr [[B]], i64 4294967184
; CHECK-NEXT: [[IND_END5:%.*]] = getelementptr i8, ptr [[B]], i64 4294967184
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
@@ -340,7 +339,7 @@ define void @multiple_pointer_ivs_with_scalar_uses_only(ptr %A, ptr %B) #0 {
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ -12, [[MIDDLE_BLOCK]] ], [ 100, [[ENTRY:%.*]] ], [ 100, [[VECTOR_MEMCHECK]] ]
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 2048, [[ENTRY]] ], [ 2048, [[VECTOR_MEMCHECK]] ]
; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[A]], [[ENTRY]] ], [ [[A]], [[VECTOR_MEMCHECK]] ]
-; CHECK-NEXT: [[BC_RESUME_VAL4:%.*]] = phi ptr [ [[IND_END3]], [[MIDDLE_BLOCK]] ], [ [[B]], [[ENTRY]] ], [ [[B]], [[VECTOR_MEMCHECK]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL4:%.*]] = phi ptr [ [[IND_END5]], [[MIDDLE_BLOCK]] ], [ [[B]], [[ENTRY]] ], [ [[B]], [[VECTOR_MEMCHECK]] ]
; CHECK-NEXT: [[BC_RESUME_VAL6:%.*]] = phi ptr [ [[IND_END5]], [[MIDDLE_BLOCK]] ], [ [[B]], [[ENTRY]] ], [ [[B]], [[VECTOR_MEMCHECK]] ]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
diff --git a/llvm/test/Transforms/LoopVectorize/X86/uniform_load.ll b/llvm/test/Transforms/LoopVectorize/X86/uniform_load.ll
index 62d08c8668235..9698c33d8e08c 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/uniform_load.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/uniform_load.ll
@@ -36,13 +36,10 @@ define void @foo(ptr nocapture noalias %A, i64 %N) #0 {
; CHECK-NEXT: [[TMP8:%.*]] = fadd <8 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD2]]
; CHECK-NEXT: [[TMP9:%.*]] = fadd <8 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD3]]
; CHECK-NEXT: [[TMP10:%.*]] = fadd <8 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD4]]
-; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[A]], i32 8
-; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[A]], i32 16
-; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[A]], i32 24
; CHECK-NEXT: store <8 x float> [[TMP7]], ptr [[A]], align 4
-; CHECK-NEXT: store <8 x float> [[TMP8]], ptr [[TMP11]], align 4
-; CHECK-NEXT: store <8 x float> [[TMP9]], ptr [[TMP12]], align 4
-; CHECK-NEXT: store <8 x float> [[TMP10]], ptr [[TMP13]], align 4
+; CHECK-NEXT: store <8 x float> [[TMP8]], ptr [[TMP4]], align 4
+; CHECK-NEXT: store <8 x float> [[TMP9]], ptr [[TMP5]], align 4
+; CHECK-NEXT: store <8 x float> [[TMP10]], ptr [[TMP6]], align 4
; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]]
; CHECK: [[MIDDLE_BLOCK]]:
;
diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll
index 74df675a75cbd..f9b3d37607092 100644
--- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll
+++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll
@@ -1024,11 +1024,10 @@ define void @test_for_sink_instruction_after_same_incoming_1(ptr %ptr) {
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 996
; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
; CHECK: middle.block:
-; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x double> [[WIDE_LOAD]], i32 3
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT2:%.*]] = extractelement <4 x double> [[WIDE_LOAD]], i32 3
; CHECK-NEXT: br label [[SCALAR_PH]]
; CHECK: scalar.ph:
-; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi double [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 1.000000e+01, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi double [ [[VECTOR_RECUR_EXTRACT2]], [[MIDDLE_BLOCK]] ], [ 1.000000e+01, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[SCALAR_RECUR_INIT3:%.*]] = phi double [ [[VECTOR_RECUR_EXTRACT2]], [[MIDDLE_BLOCK]] ], [ 2.000000e+01, [[ENTRY]] ]
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 997, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY]] ]
; CHECK-NEXT: br label [[LOOP:%.*]]
@@ -1090,11 +1089,10 @@ define void @test_for_sink_instruction_after_same_incoming_2(ptr %ptr) {
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 996
; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
; CHECK: middle.block:
-; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x double> [[WIDE_LOAD]], i32 3
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT2:%.*]] = extractelement <4 x double> [[WIDE_LOAD]], i32 3
; CHECK-NEXT: br label [[SCALAR_PH]]
; CHECK: scalar.ph:
-; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi double [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 2.000000e+01, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi double [ [[VECTOR_RECUR_EXTRACT2]], [[MIDDLE_BLOCK]] ], [ 2.000000e+01, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[SCALAR_RECUR_INIT3:%.*]] = phi double [ [[VECTOR_RECUR_EXTRACT2]], [[MIDDLE_BLOCK]] ], [ 1.000000e+01, [[ENTRY]] ]
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 997, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY]] ]
; CHECK-NEXT: br label [[LOOP:%.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-multiply-recurrences.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-multiply-recurrences.ll
index 715ea1c51aba6..b815c9d41e35c 100644
--- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-multiply-recurrences.ll
+++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-multiply-recurrences.ll
@@ -122,12 +122,11 @@ define void @test_pr54223_sink_after_insertion_order(ptr noalias %a, ptr noalias
; CHECK-NEXT: br label [[EXIT:%.*]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ]
-; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ]
; CHECK-NEXT: [[SCALAR_RECUR_INIT5:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi float [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[FOR_1_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi float [ [[SCALAR_RECUR_INIT5]], [[SCALAR_PH]] ], [ [[FOR_1_NEXT:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[SCALAR_RECUR6:%.*]] = phi float [ [[SCALAR_RECUR_INIT5]], [[SCALAR_PH]] ], [ [[FOR_2_NEXT:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[NEG:%.*]] = fneg float [[SCALAR_RECUR6]]
; CHECK-NEXT: [[MULADD:%.*]] = call float @llvm.fmuladd.f32(float [[SCALAR_RECUR]], float [[NEG]], float 0.000000e+00)
diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll
index 3adfcf53e4564..9dd3def5369db 100644
--- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll
+++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll
@@ -1195,11 +1195,10 @@ define i64 @constant_folded_previous_value() {
; UNROLL-NO-IC: middle.block:
; UNROLL-NO-IC-NEXT: br label [[FOR_END:%.*]]
; UNROLL-NO-IC: scalar.ph:
-; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ]
-; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ 0, [[ENTRY]] ]
+; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ]
; UNROLL-NO-IC-NEXT: br label [[SCALAR_BODY:%.*]]
; UNROLL-NO-IC: scalar.body:
-; UNROLL-NO-IC-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[SCALAR_BODY]] ]
+; UNROLL-NO-IC-NEXT: [[I:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[SCALAR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[VAR2:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[VAR3:%.*]], [[SCALAR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[VAR3]] = add i64 0, 1
; UNROLL-NO-IC-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1
@@ -1222,11 +1221,10 @@ define i64 @constant_folded_previous_value() {
; UNROLL-NO-VF: middle.block:
; UNROLL-NO-VF-NEXT: br label [[FOR_END:%.*]]
; UNROLL-NO-VF: scalar.ph:
-; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ]
-; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ 0, [[ENTRY]] ]
+; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ]
; UNROLL-NO-VF-NEXT: br label [[SCALAR_BODY:%.*]]
; UNROLL-NO-VF: scalar.body:
-; UNROLL-NO-VF-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[SCALAR_BODY]] ]
+; UNROLL-NO-VF-NEXT: [[I:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[SCALAR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[VAR2:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[VAR3:%.*]], [[SCALAR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[VAR3]] = add i64 0, 1
; UNROLL-NO-VF-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1
@@ -1249,11 +1247,10 @@ define i64 @constant_folded_previous_value() {
; SINK-AFTER: middle.block:
; SINK-AFTER-NEXT: br label [[FOR_END:%.*]]
; SINK-AFTER: scalar.ph:
-; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ]
-; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ 0, [[ENTRY]] ]
+; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ]
; SINK-AFTER-NEXT: br label [[SCALAR_BODY:%.*]]
; SINK-AFTER: scalar.body:
-; SINK-AFTER-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[SCALAR_BODY]] ]
+; SINK-AFTER-NEXT: [[I:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[SCALAR_BODY]] ]
; SINK-AFTER-NEXT: [[VAR2:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[VAR3:%.*]], [[SCALAR_BODY]] ]
; SINK-AFTER-NEXT: [[VAR3]] = add i64 0, 1
; SINK-AFTER-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1
@@ -2743,7 +2740,6 @@ define i32 @sink_into_replication_region(i32 %y) {
; UNROLL-NO-IC-NEXT: br label [[BB1:%.*]]
; UNROLL-NO-IC: scalar.ph:
; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[Y]], [[BB:%.*]] ]
-; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[BB]] ]
; UNROLL-NO-IC-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[BB]] ]
; UNROLL-NO-IC-NEXT: br label [[BB2:%.*]]
; UNROLL-NO-IC: bb1:
@@ -2751,7 +2747,7 @@ define i32 @sink_into_replication_region(i32 %y) {
; UNROLL-NO-IC-NEXT: ret i32 [[VAR]]
; UNROLL-NO-IC: bb2:
; UNROLL-NO-IC-NEXT: [[VAR3:%.*]] = phi i32 [ [[VAR8:%.*]], [[BB2]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
-; UNROLL-NO-IC-NEXT: [[VAR4:%.*]] = phi i32 [ [[VAR7:%.*]], [[BB2]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ]
+; UNROLL-NO-IC-NEXT: [[VAR4:%.*]] = phi i32 [ [[VAR7:%.*]], [[BB2]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
; UNROLL-NO-IC-NEXT: [[VAR5:%.*]] = phi i32 [ [[VAR6]], [[BB2]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
; UNROLL-NO-IC-NEXT: [[VAR6]] = add i32 [[VAR5]], [[VAR4]]
; UNROLL-NO-IC-NEXT: [[VAR7]] = udiv i32 219220132, [[VAR3]]
@@ -2806,7 +2802,6 @@ define i32 @sink_into_replication_region(i32 %y) {
; UNROLL-NO-VF-NEXT: br label [[BB1:%.*]]
; UNROLL-NO-VF: scalar.ph:
; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[Y]], [[BB:%.*]] ]
-; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[BB]] ]
; UNROLL-NO-VF-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[BB]] ]
; UNROLL-NO-VF-NEXT: br label [[BB2:%.*]]
; UNROLL-NO-VF: bb1:
@@ -2814,7 +2809,7 @@ define i32 @sink_into_replication_region(i32 %y) {
; UNROLL-NO-VF-NEXT: ret i32 [[VAR]]
; UNROLL-NO-VF: bb2:
; UNROLL-NO-VF-NEXT: [[VAR3:%.*]] = phi i32 [ [[VAR8:%.*]], [[BB2]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
-; UNROLL-NO-VF-NEXT: [[VAR4:%.*]] = phi i32 [ [[VAR7:%.*]], [[BB2]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ]
+; UNROLL-NO-VF-NEXT: [[VAR4:%.*]] = phi i32 [ [[VAR7:%.*]], [[BB2]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
; UNROLL-NO-VF-NEXT: [[VAR5:%.*]] = phi i32 [ [[VAR6]], [[BB2]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
; UNROLL-NO-VF-NEXT: [[VAR6]] = add i32 [[VAR5]], [[VAR4]]
; UNROLL-NO-VF-NEXT: [[VAR7]] = udiv i32 219220132, [[VAR3]]
@@ -2892,7 +2887,6 @@ define i32 @sink_into_replication_region(i32 %y) {
; SINK-AFTER-NEXT: br label [[BB1:%.*]]
; SINK-AFTER: scalar.ph:
; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[Y]], [[BB:%.*]] ]
-; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[BB]] ]
; SINK-AFTER-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[BB]] ]
; SINK-AFTER-NEXT: br label [[BB2:%.*]]
; SINK-AFTER: bb1:
@@ -2900,7 +2894,7 @@ define i32 @sink_into_replication_region(i32 %y) {
; SINK-AFTER-NEXT: ret i32 [[VAR]]
; SINK-AFTER: bb2:
; SINK-AFTER-NEXT: [[VAR3:%.*]] = phi i32 [ [[VAR8:%.*]], [[BB2]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
-; SINK-AFTER-NEXT: [[VAR4:%.*]] = phi i32 [ [[VAR7:%.*]], [[BB2]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ]
+; SINK-AFTER-NEXT: [[VAR4:%.*]] = phi i32 [ [[VAR7:%.*]], [[BB2]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
; SINK-AFTER-NEXT: [[VAR5:%.*]] = phi i32 [ [[VAR6]], [[BB2]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
; SINK-AFTER-NEXT: [[VAR6]] = add i32 [[VAR5]], [[VAR4]]
; SINK-AFTER-NEXT: [[VAR7]] = udiv i32 219220132, [[VAR3]]
@@ -3105,8 +3099,6 @@ define i32 @sink_into_replication_region_multiple(ptr %x, i32 %y) {
; UNROLL-NO-IC-NEXT: br label [[BB1:%.*]]
; UNROLL-NO-IC: scalar.ph:
; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[Y]], [[BB:%.*]] ]
-; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ 0, [[BB]] ]
-; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[BB]] ]
; UNROLL-NO-IC-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[BB]] ]
; UNROLL-NO-IC-NEXT: br label [[BB2:%.*]]
; UNROLL-NO-IC: bb1:
@@ -3114,8 +3106,8 @@ define i32 @sink_into_replication_region_multiple(ptr %x, i32 %y) {
; UNROLL-NO-IC-NEXT: ret i32 [[VAR]]
; UNROLL-NO-IC: bb2:
; UNROLL-NO-IC-NEXT: [[VAR3:%.*]] = phi i32 [ [[VAR8:%.*]], [[BB2]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
-; UNROLL-NO-IC-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[BB2]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
-; UNROLL-NO-IC-NEXT: [[VAR4:%.*]] = phi i32 [ [[VAR7:%.*]], [[BB2]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ]
+; UNROLL-NO-IC-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[BB2]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
+; UNROLL-NO-IC-NEXT: [[VAR4:%.*]] = phi i32 [ [[VAR7:%.*]], [[BB2]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
; UNROLL-NO-IC-NEXT: [[VAR5:%.*]] = phi i32 [ [[VAR6]], [[BB2]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
; UNROLL-NO-IC-NEXT: [[G:%.*]] = getelementptr inbounds i32, ptr [[X]], i32 [[IV]]
; UNROLL-NO-IC-NEXT: [[VAR6]] = add i32 [[VAR5]], [[VAR4]]
@@ -3186,8 +3178,6 @@ define i32 @sink_into_replication_region_multiple(ptr %x, i32 %y) {
; UNROLL-NO-VF-NEXT: br label [[BB1:%.*]]
; UNROLL-NO-VF: scalar.ph:
; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[Y]], [[BB:%.*]] ]
-; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ 0, [[BB]] ]
-; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[BB]] ]
; UNROLL-NO-VF-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[BB]] ]
; UNROLL-NO-VF-NEXT: br label [[BB2:%.*]]
; UNROLL-NO-VF: bb1:
@@ -3195,8 +3185,8 @@ define i32 @sink_into_replication_region_multiple(ptr %x, i32 %y) {
; UNROLL-NO-VF-NEXT: ret i32 [[VAR]]
; UNROLL-NO-VF: bb2:
; UNROLL-NO-VF-NEXT: [[VAR3:%.*]] = phi i32 [ [[VAR8:%.*]], [[BB2]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
-; UNROLL-NO-VF-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[BB2]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
-; UNROLL-NO-VF-NEXT: [[VAR4:%.*]] = phi i32 [ [[VAR7:%.*]], [[BB2]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ]
+; UNROLL-NO-VF-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[BB2]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
+; UNROLL-NO-VF-NEXT: [[VAR4:%.*]] = phi i32 [ [[VAR7:%.*]], [[BB2]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
; UNROLL-NO-VF-NEXT: [[VAR5:%.*]] = phi i32 [ [[VAR6]], [[BB2]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
; UNROLL-NO-VF-NEXT: [[G:%.*]] = getelementptr inbounds i32, ptr [[X]], i32 [[IV]]
; UNROLL-NO-VF-NEXT: [[VAR6]] = add i32 [[VAR5]], [[VAR4]]
@@ -3308,8 +3298,6 @@ define i32 @sink_into_replication_region_multiple(ptr %x, i32 %y) {
; SINK-AFTER-NEXT: br label [[BB1:%.*]]
; SINK-AFTER: scalar.ph:
; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[Y]], [[BB:%.*]] ]
-; SINK-AFTER-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ 0, [[BB]] ]
-; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[BB]] ]
; SINK-AFTER-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[BB]] ]
; SINK-AFTER-NEXT: br label [[BB2:%.*]]
; SINK-AFTER: bb1:
@@ -3317,8 +3305,8 @@ define i32 @sink_into_replication_region_multiple(ptr %x, i32 %y) {
; SINK-AFTER-NEXT: ret i32 [[VAR]]
; SINK-AFTER: bb2:
; SINK-AFTER-NEXT: [[VAR3:%.*]] = phi i32 [ [[VAR8:%.*]], [[BB2]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
-; SINK-AFTER-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[BB2]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
-; SINK-AFTER-NEXT: [[VAR4:%.*]] = phi i32 [ [[VAR7:%.*]], [[BB2]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ]
+; SINK-AFTER-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[BB2]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
+; SINK-AFTER-NEXT: [[VAR4:%.*]] = phi i32 [ [[VAR7:%.*]], [[BB2]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
; SINK-AFTER-NEXT: [[VAR5:%.*]] = phi i32 [ [[VAR6]], [[BB2]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
; SINK-AFTER-NEXT: [[G:%.*]] = getelementptr inbounds i32, ptr [[X]], i32 [[IV]]
; SINK-AFTER-NEXT: [[VAR6]] = add i32 [[VAR5]], [[VAR4]]
diff --git a/llvm/test/Transforms/LoopVectorize/induction.ll b/llvm/test/Transforms/LoopVectorize/induction.ll
index 77b91ccb913cf..9188c25b67b70 100644
--- a/llvm/test/Transforms/LoopVectorize/induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/induction.ll
@@ -5426,7 +5426,7 @@ define i64 @trunc_with_first_order_recurrence() {
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 1, i32 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ <i32 poison, i32 42>, [[VECTOR_PH]] ], [ [[VEC_IND2:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND2]] = phi <2 x i32> [ <i32 1, i32 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_IND4:%.*]] = phi <2 x i32> [ <i32 1, i32 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT5:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_IND3:%.*]] = phi <2 x i32> [ <i32 1, i32 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT4:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <2 x i32> [[VECTOR_RECUR]], <2 x i32> [[VEC_IND2]], <2 x i32> <i32 1, i32 2>
; CHECK-NEXT: [[TMP1:%.*]] = mul <2 x i32> [[VEC_IND]], [[VEC_IND2]]
; CHECK-NEXT: [[TMP2:%.*]] = add <2 x i32> [[TMP1]], splat (i32 42)
@@ -5434,14 +5434,14 @@ define i64 @trunc_with_first_order_recurrence() {
; CHECK-NEXT: [[TMP4:%.*]] = add <2 x i32> [[TMP3]], [[TMP2]]
; CHECK-NEXT: [[TMP5:%.*]] = sext <2 x i32> [[TMP4]] to <2 x i64>
; CHECK-NEXT: [[TMP6:%.*]] = add <2 x i64> [[VEC_PHI]], [[TMP5]]
-; CHECK-NEXT: [[TMP7:%.*]] = shl <2 x i32> [[VEC_IND4]], splat (i32 1)
+; CHECK-NEXT: [[TMP7:%.*]] = shl <2 x i32> [[VEC_IND3]], splat (i32 1)
; CHECK-NEXT: [[TMP8:%.*]] = add <2 x i32> [[TMP2]], [[TMP7]]
; CHECK-NEXT: [[TMP9:%.*]] = sext <2 x i32> [[TMP8]] to <2 x i64>
; CHECK-NEXT: [[TMP10]] = add <2 x i64> [[TMP6]], [[TMP9]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
; CHECK-NEXT: [[VEC_IND_NEXT3]] = add <2 x i32> [[VEC_IND2]], splat (i32 2)
-; CHECK-NEXT: [[VEC_IND_NEXT5]] = add <2 x i32> [[VEC_IND4]], splat (i32 2)
+; CHECK-NEXT: [[VEC_IND_NEXT4]] = add <2 x i32> [[VEC_IND3]], splat (i32 2)
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 112
; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP48:![0-9]+]]
; CHECK: middle.block:
@@ -5490,7 +5490,7 @@ define i64 @trunc_with_first_order_recurrence() {
; IND-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 1, i32 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; IND-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ <i32 poison, i32 42>, [[VECTOR_PH]] ], [ [[VEC_IND2:%.*]], [[VECTOR_BODY]] ]
; IND-NEXT: [[VEC_IND2]] = phi <2 x i32> [ <i32 1, i32 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ]
-; IND-NEXT: [[VEC_IND4:%.*]] = phi <2 x i32> [ <i32 1, i32 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT5:%.*]], [[VECTOR_BODY]] ]
+; IND-NEXT: [[VEC_IND3:%.*]] = phi <2 x i32> [ <i32 1, i32 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT4:%.*]], [[VECTOR_BODY]] ]
; IND-NEXT: [[TMP0:%.*]] = shufflevector <2 x i32> [[VECTOR_RECUR]], <2 x i32> [[VEC_IND2]], <2 x i32> <i32 1, i32 2>
; IND-NEXT: [[TMP1:%.*]] = mul <2 x i32> [[VEC_IND]], [[VEC_IND2]]
; IND-NEXT: [[TMP2:%.*]] = add <2 x i32> [[TMP1]], splat (i32 42)
@@ -5498,14 +5498,14 @@ define i64 @trunc_with_first_order_recurrence() {
; IND-NEXT: [[TMP4:%.*]] = add <2 x i32> [[TMP3]], [[TMP2]]
; IND-NEXT: [[TMP5:%.*]] = sext <2 x i32> [[TMP4]] to <2 x i64>
; IND-NEXT: [[TMP6:%.*]] = add <2 x i64> [[VEC_PHI]], [[TMP5]]
-; IND-NEXT: [[TMP7:%.*]] = shl <2 x i32> [[VEC_IND4]], splat (i32 1)
+; IND-NEXT: [[TMP7:%.*]] = shl <2 x i32> [[VEC_IND3]], splat (i32 1)
; IND-NEXT: [[TMP8:%.*]] = add <2 x i32> [[TMP2]], [[TMP7]]
; IND-NEXT: [[TMP9:%.*]] = sext <2 x i32> [[TMP8]] to <2 x i64>
; IND-NEXT: [[TMP10]] = add <2 x i64> [[TMP6]], [[TMP9]]
; IND-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
; IND-NEXT: [[VEC_IND_NEXT3]] = add <2 x i32> [[VEC_IND2]], splat (i32 2)
-; IND-NEXT: [[VEC_IND_NEXT5]] = add <2 x i32> [[VEC_IND4]], splat (i32 2)
+; IND-NEXT: [[VEC_IND_NEXT4]] = add <2 x i32> [[VEC_IND3]], splat (i32 2)
; IND-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 112
; IND-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP48:![0-9]+]]
; IND: middle.block:
@@ -5551,17 +5551,17 @@ define i64 @trunc_with_first_order_recurrence() {
; UNROLL-NEXT: [[VEC_PHI2:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP21:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 1, i32 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ <i32 poison, i32 42>, [[VECTOR_PH]] ], [ [[STEP_ADD7:%.*]], [[VECTOR_BODY]] ]
-; UNROLL-NEXT: [[VEC_IND3:%.*]] = phi <2 x i32> [ <i32 1, i32 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT4:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NEXT: [[VEC_IND5:%.*]] = phi <2 x i32> [ <i32 1, i32 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT6:%.*]], [[VECTOR_BODY]] ]
+; UNROLL-NEXT: [[VEC_IND3:%.*]] = phi <2 x i32> [ <i32 1, i32 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT7:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
-; UNROLL-NEXT: [[STEP_ADD7]] = add <2 x i32> [[VEC_IND3]], splat (i32 2)
-; UNROLL-NEXT: [[TMP0:%.*]] = shufflevector <2 x i32> [[VECTOR_RECUR]], <2 x i32> [[VEC_IND3]], <2 x i32> <i32 1, i32 2>
-; UNROLL-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[VEC_IND3]], <2 x i32> [[STEP_ADD7]], <2 x i32> <i32 1, i32 2>
-; UNROLL-NEXT: [[TMP2:%.*]] = mul <2 x i32> [[VEC_IND]], [[VEC_IND3]]
+; UNROLL-NEXT: [[STEP_ADD7]] = add <2 x i32> [[VEC_IND5]], splat (i32 2)
+; UNROLL-NEXT: [[TMP0:%.*]] = shufflevector <2 x i32> [[VECTOR_RECUR]], <2 x i32> [[VEC_IND5]], <2 x i32> <i32 1, i32 2>
+; UNROLL-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[VEC_IND5]], <2 x i32> [[STEP_ADD7]], <2 x i32> <i32 1, i32 2>
+; UNROLL-NEXT: [[TMP2:%.*]] = mul <2 x i32> [[VEC_IND]], [[VEC_IND5]]
; UNROLL-NEXT: [[TMP3:%.*]] = mul <2 x i32> [[STEP_ADD]], [[STEP_ADD7]]
; UNROLL-NEXT: [[TMP4:%.*]] = add <2 x i32> [[TMP2]], splat (i32 42)
; UNROLL-NEXT: [[TMP5:%.*]] = add <2 x i32> [[TMP3]], splat (i32 42)
-; UNROLL-NEXT: [[TMP6:%.*]] = add <2 x i32> [[TMP0]], [[VEC_IND3]]
+; UNROLL-NEXT: [[TMP6:%.*]] = add <2 x i32> [[TMP0]], [[VEC_IND5]]
; UNROLL-NEXT: [[TMP7:%.*]] = add <2 x i32> [[TMP1]], [[STEP_ADD7]]
; UNROLL-NEXT: [[TMP8:%.*]] = add <2 x i32> [[TMP6]], [[TMP4]]
; UNROLL-NEXT: [[TMP9:%.*]] = add <2 x i32> [[TMP7]], [[TMP5]]
@@ -5569,9 +5569,9 @@ define i64 @trunc_with_first_order_recurrence() {
; UNROLL-NEXT: [[TMP11:%.*]] = sext <2 x i32> [[TMP9]] to <2 x i64>
; UNROLL-NEXT: [[TMP12:%.*]] = add <2 x i64> [[VEC_PHI]], [[TMP10]]
; UNROLL-NEXT: [[TMP13:%.*]] = add <2 x i64> [[VEC_PHI2]], [[TMP11]]
-; UNROLL-NEXT: [[TMP14:%.*]] = shl <2 x i32> [[VEC_IND5]], splat (i32 1)
-; UNROLL-NEXT: [[STEP_ADD8:%.*]] = shl <2 x i32> [[VEC_IND5]], splat (i32 1)
-; UNROLL-NEXT: [[TMP15:%.*]] = add <2 x i32> [[STEP_ADD8]], splat (i32 4)
+; UNROLL-NEXT: [[TMP14:%.*]] = shl <2 x i32> [[VEC_IND3]], splat (i32 1)
+; UNROLL-NEXT: [[STEP_ADD5:%.*]] = shl <2 x i32> [[VEC_IND3]], splat (i32 1)
+; UNROLL-NEXT: [[TMP15:%.*]] = add <2 x i32> [[STEP_ADD5]], splat (i32 4)
; UNROLL-NEXT: [[TMP16:%.*]] = add <2 x i32> [[TMP4]], [[TMP14]]
; UNROLL-NEXT: [[TMP17:%.*]] = add <2 x i32> [[TMP5]], [[TMP15]]
; UNROLL-NEXT: [[TMP18:%.*]] = sext <2 x i32> [[TMP16]] to <2 x i64>
@@ -5580,8 +5580,8 @@ define i64 @trunc_with_first_order_recurrence() {
; UNROLL-NEXT: [[TMP21]] = add <2 x i64> [[TMP13]], [[TMP19]]
; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; UNROLL-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 4)
-; UNROLL-NEXT: [[VEC_IND_NEXT4]] = add <2 x i32> [[VEC_IND3]], splat (i32 4)
; UNROLL-NEXT: [[VEC_IND_NEXT6]] = add <2 x i32> [[VEC_IND5]], splat (i32 4)
+; UNROLL-NEXT: [[VEC_IND_NEXT7]] = add <2 x i32> [[VEC_IND3]], splat (i32 4)
; UNROLL-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], 112
; UNROLL-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP48:![0-9]+]]
; UNROLL: middle.block:
@@ -5628,18 +5628,18 @@ define i64 @trunc_with_first_order_recurrence() {
; UNROLL-NO-IC-NEXT: [[VEC_PHI2:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP21:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 1, i32 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ <i32 poison, i32 42>, [[VECTOR_PH]] ], [ [[STEP_ADD7:%.*]], [[VECTOR_BODY]] ]
-; UNROLL-NO-IC-NEXT: [[VEC_IND3:%.*]] = phi <2 x i32> [ <i32 1, i32 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT4:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[VEC_IND5:%.*]] = phi <2 x i32> [ <i32 1, i32 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT6:%.*]], [[VECTOR_BODY]] ]
+; UNROLL-NO-IC-NEXT: [[VEC_IND3:%.*]] = phi <2 x i32> [ <i32 1, i32 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT7:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
-; UNROLL-NO-IC-NEXT: [[STEP_ADD7]] = add <2 x i32> [[VEC_IND3]], splat (i32 2)
-; UNROLL-NO-IC-NEXT: [[STEP_ADD8:%.*]] = add <2 x i32> [[VEC_IND5]], splat (i32 2)
-; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = shufflevector <2 x i32> [[VECTOR_RECUR]], <2 x i32> [[VEC_IND3]], <2 x i32> <i32 1, i32 2>
-; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[VEC_IND3]], <2 x i32> [[STEP_ADD7]], <2 x i32> <i32 1, i32 2>
-; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = mul <2 x i32> [[VEC_IND]], [[VEC_IND3]]
+; UNROLL-NO-IC-NEXT: [[STEP_ADD7]] = add <2 x i32> [[VEC_IND5]], splat (i32 2)
+; UNROLL-NO-IC-NEXT: [[STEP_ADD5:%.*]] = add <2 x i32> [[VEC_IND3]], splat (i32 2)
+; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = shufflevector <2 x i32> [[VECTOR_RECUR]], <2 x i32> [[VEC_IND5]], <2 x i32> <i32 1, i32 2>
+; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[VEC_IND5]], <2 x i32> [[STEP_ADD7]], <2 x i32> <i32 1, i32 2>
+; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = mul <2 x i32> [[VEC_IND]], [[VEC_IND5]]
; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = mul <2 x i32> [[STEP_ADD]], [[STEP_ADD7]]
; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = add <2 x i32> [[TMP2]], splat (i32 42)
; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = add <2 x i32> [[TMP3]], splat (i32 42)
-; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = add <2 x i32> [[TMP0]], [[VEC_IND3]]
+; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = add <2 x i32> [[TMP0]], [[VEC_IND5]]
; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = add <2 x i32> [[TMP1]], [[STEP_ADD7]]
; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = add <2 x i32> [[TMP6]], [[TMP4]]
; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = add <2 x i32> [[TMP7]], [[TMP5]]
@@ -5647,8 +5647,8 @@ define i64 @trunc_with_first_order_recurrence() {
; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = sext <2 x i32> [[TMP9]] to <2 x i64>
; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = add <2 x i64> [[VEC_PHI]], [[TMP10]]
; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = add <2 x i64> [[VEC_PHI2]], [[TMP11]]
-; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = shl <2 x i32> [[VEC_IND5]], splat (i32 1)
-; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = shl <2 x i32> [[STEP_ADD8]], splat (i32 1)
+; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = shl <2 x i32> [[VEC_IND3]], splat (i32 1)
+; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = shl <2 x i32> [[STEP_ADD5]], splat (i32 1)
; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = add <2 x i32> [[TMP4]], [[TMP14]]
; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = add <2 x i32> [[TMP5]], [[TMP15]]
; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = sext <2 x i32> [[TMP16]] to <2 x i64>
@@ -5657,8 +5657,8 @@ define i64 @trunc_with_first_order_recurrence() {
; UNROLL-NO-IC-NEXT: [[TMP21]] = add <2 x i64> [[TMP13]], [[TMP19]]
; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], splat (i32 2)
-; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT4]] = add <2 x i32> [[STEP_ADD7]], splat (i32 2)
-; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT6]] = add <2 x i32> [[STEP_ADD8]], splat (i32 2)
+; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT6]] = add <2 x i32> [[STEP_ADD7]], splat (i32 2)
+; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT7]] = add <2 x i32> [[STEP_ADD5]], splat (i32 2)
; UNROLL-NO-IC-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], 112
; UNROLL-NO-IC-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP48:![0-9]+]]
; UNROLL-NO-IC: middle.block:
@@ -5708,17 +5708,17 @@ define i64 @trunc_with_first_order_recurrence() {
; INTERLEAVE-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP21:%.*]], [[VECTOR_BODY]] ]
; INTERLEAVE-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 1, i32 2, i32 3, i32 4>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; INTERLEAVE-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 42>, [[VECTOR_PH]] ], [ [[STEP_ADD7:%.*]], [[VECTOR_BODY]] ]
-; INTERLEAVE-NEXT: [[VEC_IND3:%.*]] = phi <4 x i32> [ <i32 1, i32 2, i32 3, i32 4>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT4:%.*]], [[VECTOR_BODY]] ]
; INTERLEAVE-NEXT: [[VEC_IND5:%.*]] = phi <4 x i32> [ <i32 1, i32 2, i32 3, i32 4>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT6:%.*]], [[VECTOR_BODY]] ]
+; INTERLEAVE-NEXT: [[VEC_IND3:%.*]] = phi <4 x i32> [ <i32 1, i32 2, i32 3, i32 4>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT7:%.*]], [[VECTOR_BODY]] ]
; INTERLEAVE-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
-; INTERLEAVE-NEXT: [[STEP_ADD7]] = add <4 x i32> [[VEC_IND3]], splat (i32 4)
-; INTERLEAVE-NEXT: [[TMP0:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[VEC_IND3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
-; INTERLEAVE-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[VEC_IND3]], <4 x i32> [[STEP_ADD7]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
-; INTERLEAVE-NEXT: [[TMP2:%.*]] = mul <4 x i32> [[VEC_IND]], [[VEC_IND3]]
+; INTERLEAVE-NEXT: [[STEP_ADD7]] = add <4 x i32> [[VEC_IND5]], splat (i32 4)
+; INTERLEAVE-NEXT: [[TMP0:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[VEC_IND5]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+; INTERLEAVE-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[VEC_IND5]], <4 x i32> [[STEP_ADD7]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+; INTERLEAVE-NEXT: [[TMP2:%.*]] = mul <4 x i32> [[VEC_IND]], [[VEC_IND5]]
; INTERLEAVE-NEXT: [[TMP3:%.*]] = mul <4 x i32> [[STEP_ADD]], [[STEP_ADD7]]
; INTERLEAVE-NEXT: [[TMP4:%.*]] = add <4 x i32> [[TMP2]], splat (i32 42)
; INTERLEAVE-NEXT: [[TMP5:%.*]] = add <4 x i32> [[TMP3]], splat (i32 42)
-; INTERLEAVE-NEXT: [[TMP6:%.*]] = add <4 x i32> [[TMP0]], [[VEC_IND3]]
+; INTERLEAVE-NEXT: [[TMP6:%.*]] = add <4 x i32> [[TMP0]], [[VEC_IND5]]
; INTERLEAVE-NEXT: [[TMP7:%.*]] = add <4 x i32> [[TMP1]], [[STEP_ADD7]]
; INTERLEAVE-NEXT: [[TMP8:%.*]] = add <4 x i32> [[TMP6]], [[TMP4]]
; INTERLEAVE-NEXT: [[TMP9:%.*]] = add <4 x i32> [[TMP7]], [[TMP5]]
@@ -5726,9 +5726,9 @@ define i64 @trunc_with_first_order_recurrence() {
; INTERLEAVE-NEXT: [[TMP11:%.*]] = sext <4 x i32> [[TMP9]] to <4 x i64>
; INTERLEAVE-NEXT: [[TMP12:%.*]] = add <4 x i64> [[VEC_PHI]], [[TMP10]]
; INTERLEAVE-NEXT: [[TMP13:%.*]] = add <4 x i64> [[VEC_PHI2]], [[TMP11]]
-; INTERLEAVE-NEXT: [[TMP14:%.*]] = shl <4 x i32> [[VEC_IND5]], splat (i32 1)
-; INTERLEAVE-NEXT: [[STEP_ADD8:%.*]] = shl <4 x i32> [[VEC_IND5]], splat (i32 1)
-; INTERLEAVE-NEXT: [[TMP15:%.*]] = add <4 x i32> [[STEP_ADD8]], splat (i32 8)
+; INTERLEAVE-NEXT: [[TMP14:%.*]] = shl <4 x i32> [[VEC_IND3]], splat (i32 1)
+; INTERLEAVE-NEXT: [[STEP_ADD5:%.*]] = shl <4 x i32> [[VEC_IND3]], splat (i32 1)
+; INTERLEAVE-NEXT: [[TMP15:%.*]] = add <4 x i32> [[STEP_ADD5]], splat (i32 8)
; INTERLEAVE-NEXT: [[TMP16:%.*]] = add <4 x i32> [[TMP4]], [[TMP14]]
; INTERLEAVE-NEXT: [[TMP17:%.*]] = add <4 x i32> [[TMP5]], [[TMP15]]
; INTERLEAVE-NEXT: [[TMP18:%.*]] = sext <4 x i32> [[TMP16]] to <4 x i64>
@@ -5737,8 +5737,8 @@ define i64 @trunc_with_first_order_recurrence() {
; INTERLEAVE-NEXT: [[TMP21]] = add <4 x i64> [[TMP13]], [[TMP19]]
; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; INTERLEAVE-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 8)
-; INTERLEAVE-NEXT: [[VEC_IND_NEXT4]] = add <4 x i32> [[VEC_IND3]], splat (i32 8)
; INTERLEAVE-NEXT: [[VEC_IND_NEXT6]] = add <4 x i32> [[VEC_IND5]], splat (i32 8)
+; INTERLEAVE-NEXT: [[VEC_IND_NEXT7]] = add <4 x i32> [[VEC_IND3]], splat (i32 8)
; INTERLEAVE-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], 112
; INTERLEAVE-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP48:![0-9]+]]
; INTERLEAVE: middle.block:
@@ -5834,12 +5834,11 @@ define void @pr52460_first_order_recurrence_truncated_iv(ptr noalias %src, ptr %
; CHECK-NEXT: br label [[EXIT:%.*]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ]
-; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ 0, [[ENTRY]] ]
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[ENTRY]] ]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT: [[TRUNC_IV:%.*]] = phi i32 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[TRUNC_IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[TRUNC_IV:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TRUNC_IV_NEXT:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[IV_TRUNC:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[LV:%.*]] = load i32, ptr [[SRC]], align 4
; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[LV]], [[RECUR]]
@@ -5956,12 +5955,11 @@ define void @pr52460_first_order_recurrence_truncated_iv(ptr noalias %src, ptr %
; UNROLL-NO-IC-NEXT: br label [[EXIT:%.*]]
; UNROLL-NO-IC: scalar.ph:
; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ]
-; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ 0, [[ENTRY]] ]
; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[ENTRY]] ]
; UNROLL-NO-IC-NEXT: br label [[LOOP:%.*]]
; UNROLL-NO-IC: loop:
; UNROLL-NO-IC-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
-; UNROLL-NO-IC-NEXT: [[TRUNC_IV:%.*]] = phi i32 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[TRUNC_IV_NEXT:%.*]], [[LOOP]] ]
+; UNROLL-NO-IC-NEXT: [[TRUNC_IV:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TRUNC_IV_NEXT:%.*]], [[LOOP]] ]
; UNROLL-NO-IC-NEXT: [[RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[IV_TRUNC:%.*]], [[LOOP]] ]
; UNROLL-NO-IC-NEXT: [[LV:%.*]] = load i32, ptr [[SRC]], align 4
; UNROLL-NO-IC-NEXT: [[MUL:%.*]] = mul nsw i32 [[LV]], [[RECUR]]
diff --git a/llvm/test/Transforms/LoopVectorize/interleave-with-i65-induction.ll b/llvm/test/Transforms/LoopVectorize/interleave-with-i65-induction.ll
index 8a4820949af12..2384c5d4c9c90 100644
--- a/llvm/test/Transforms/LoopVectorize/interleave-with-i65-induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/interleave-with-i65-induction.ll
@@ -36,12 +36,11 @@ define void @i65_induction_with_negative_step(ptr %dst) {
; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ]
; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i65 [ 0, %[[ENTRY]] ]
-; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ 0, %[[ENTRY]] ]
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[IV_I65:%.*]] = phi i65 [ [[BC_RESUME_VAL2]], %[[SCALAR_PH]] ], [ [[IV_I65_NEXT:%.*]], %[[LOOP]] ]
-; CHECK-NEXT: [[FOR:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], %[[SCALAR_PH]] ], [ [[TRUNC:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[FOR:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[TRUNC:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[TRUNC]] = trunc i65 [[IV_I65]] to i64
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[TRUNC]]
; CHECK-NEXT: store i64 [[FOR]], ptr [[GEP]], align 8
diff --git a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-different-insert-position.ll b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-different-insert-position.ll
index 651210df823dd..7ffa8c07b8825 100644
--- a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-different-insert-position.ll
+++ b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-different-insert-position.ll
@@ -29,11 +29,10 @@ define void @gep_for_first_member_does_not_dominate_insert_point(ptr %str, ptr n
; CHECK-NEXT: br label %[[EXIT:.*]]
; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ]
-; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i64 [ 0, %[[ENTRY]] ]
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
-; CHECK-NEXT: [[IV2:%.*]] = phi i64 [ [[BC_RESUME_VAL2]], %[[SCALAR_PH]] ], [ [[IV2_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[IV2:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV2_NEXT:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[OR_1:%.*]] = or disjoint i64 [[IV2]], 1
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i8, ptr [[STR]], i64 [[OR_1]]
; CHECK-NEXT: [[TMP9:%.*]] = load i8, ptr [[GEP1]], align 1
diff --git a/llvm/test/Transforms/LoopVectorize/opaque-ptr.ll b/llvm/test/Transforms/LoopVectorize/opaque-ptr.ll
index 496285a276923..64ab06d7d0b2c 100644
--- a/llvm/test/Transforms/LoopVectorize/opaque-ptr.ll
+++ b/llvm/test/Transforms/LoopVectorize/opaque-ptr.ll
@@ -47,10 +47,10 @@ define void @test_ptr_iv_no_inbounds(ptr %p1.start, ptr %p2.start, ptr %p1.end)
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 4
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[P1_START]], i64 [[OFFSET_IDX]]
-; CHECK-NEXT: [[OFFSET_IDX10:%.*]] = mul i64 [[INDEX]], 4
-; CHECK-NEXT: [[NEXT_GEP11:%.*]] = getelementptr i8, ptr [[P2_START]], i64 [[OFFSET_IDX10]]
+; CHECK-NEXT: [[OFFSET_IDX8:%.*]] = mul i64 [[INDEX]], 4
+; CHECK-NEXT: [[NEXT_GEP9:%.*]] = getelementptr i8, ptr [[P2_START]], i64 [[OFFSET_IDX8]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[NEXT_GEP]], align 4, !alias.scope [[META0:![0-9]+]], !noalias [[META3:![0-9]+]]
-; CHECK-NEXT: [[WIDE_LOAD12:%.*]] = load <2 x float>, ptr [[NEXT_GEP11]], align 4, !alias.scope [[META3]]
+; CHECK-NEXT: [[WIDE_LOAD12:%.*]] = load <2 x float>, ptr [[NEXT_GEP9]], align 4, !alias.scope [[META3]]
; CHECK-NEXT: [[TMP19:%.*]] = fadd <2 x float> [[WIDE_LOAD]], [[WIDE_LOAD12]]
; CHECK-NEXT: store <2 x float> [[TMP19]], ptr [[NEXT_GEP]], align 4, !alias.scope [[META0]], !noalias [[META3]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
@@ -133,10 +133,10 @@ define void @test_ptr_iv_with_inbounds(ptr %p1.start, ptr %p2.start, ptr %p1.end
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 4
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[P1_START]], i64 [[OFFSET_IDX]]
-; CHECK-NEXT: [[OFFSET_IDX8:%.*]] = mul i64 [[INDEX]], 4
-; CHECK-NEXT: [[NEXT_GEP9:%.*]] = getelementptr i8, ptr [[P2_START]], i64 [[OFFSET_IDX8]]
+; CHECK-NEXT: [[OFFSET_IDX6:%.*]] = mul i64 [[INDEX]], 4
+; CHECK-NEXT: [[NEXT_GEP7:%.*]] = getelementptr i8, ptr [[P2_START]], i64 [[OFFSET_IDX6]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[NEXT_GEP]], align 4, !alias.scope [[META9:![0-9]+]], !noalias [[META12:![0-9]+]]
-; CHECK-NEXT: [[WIDE_LOAD10:%.*]] = load <2 x float>, ptr [[NEXT_GEP9]], align 4, !alias.scope [[META12]]
+; CHECK-NEXT: [[WIDE_LOAD10:%.*]] = load <2 x float>, ptr [[NEXT_GEP7]], align 4, !alias.scope [[META12]]
; CHECK-NEXT: [[TMP15:%.*]] = fadd <2 x float> [[WIDE_LOAD]], [[WIDE_LOAD10]]
; CHECK-NEXT: store <2 x float> [[TMP15]], ptr [[NEXT_GEP]], align 4, !alias.scope [[META9]], !noalias [[META12]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
diff --git a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll
index d1ae9ce5238c1..732dad01dced9 100644
--- a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll
@@ -26,13 +26,13 @@ define void @a(ptr readnone %b) {
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE10:%.*]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 0, [[INDEX]]
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 0
-; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[OFFSET_IDX]], -1
-; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], -2
-; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[OFFSET_IDX]], -3
+; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[OFFSET_IDX]], -1
+; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[OFFSET_IDX]], -2
+; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], -3
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr null, i64 [[TMP2]]
-; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr null, i64 [[TMP11]]
-; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr null, i64 [[TMP14]]
-; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr null, i64 [[TMP17]]
+; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr null, i64 [[TMP9]]
+; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr null, i64 [[TMP11]]
+; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr null, i64 [[TMP6]]
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP]], i64 -1
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i32 0
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 -3
@@ -42,22 +42,22 @@ define void @a(ptr readnone %b) {
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP7]], i32 0
; CHECK-NEXT: br i1 [[TMP8]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; CHECK: pred.store.if:
-; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP]], i64 -1
-; CHECK-NEXT: store i8 95, ptr [[TMP9]], align 1
+; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP]], i64 -1
+; CHECK-NEXT: store i8 95, ptr [[TMP12]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
; CHECK: pred.store.continue:
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP7]], i32 1
; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]]
; CHECK: pred.store.if5:
-; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP2]], i64 -1
-; CHECK-NEXT: store i8 95, ptr [[TMP12]], align 1
+; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP2]], i64 -1
+; CHECK-NEXT: store i8 95, ptr [[TMP14]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]]
; CHECK: pred.store.continue6:
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i1> [[TMP7]], i32 2
; CHECK-NEXT: br i1 [[TMP13]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8:%.*]]
; CHECK: pred.store.if7:
-; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP3]], i64 -1
-; CHECK-NEXT: store i8 95, ptr [[TMP15]], align 1
+; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP3]], i64 -1
+; CHECK-NEXT: store i8 95, ptr [[TMP17]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE8]]
; CHECK: pred.store.continue8:
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i1> [[TMP7]], i32 3
diff --git a/llvm/test/Transforms/LoopVectorize/pr36983.ll b/llvm/test/Transforms/LoopVectorize/pr36983.ll
index 7e38d60b6f581..bbff2846e2114 100644
--- a/llvm/test/Transforms/LoopVectorize/pr36983.ll
+++ b/llvm/test/Transforms/LoopVectorize/pr36983.ll
@@ -1,12 +1,40 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5
; RUN: opt < %s -passes=loop-vectorize -S | FileCheck %s
; There could be more than one LCSSA PHIs in loop exit block.
-; CHECK-LABEL: bb1.bb3_crit_edge:
-; CHECK: %_tmp133.lcssa1 = phi i16 [ %_tmp133, %bb2 ], [ %vector.recur.extract.for.phi, %middle.block ]
-; CHECK: %_tmp133.lcssa = phi i16 [ %_tmp133, %bb2 ], [ %vector.recur.extract.for.phi1, %middle.block ]
-
define void @f1() {
+; CHECK-LABEL: define void @f1() {
+; CHECK-NEXT: [[BB2_LR_PH:.*]]:
+; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 0, i16 -1, i16 -2, i16 -3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = sub nsw <4 x i16> [[VEC_IND]], splat (i16 1)
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 -4)
+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[INDEX_NEXT]], 65536
+; CHECK-NEXT: br i1 [[TMP1]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[TMP0]], i32 2
+; CHECK-NEXT: br label %[[BB1_BB3_CRIT_EDGE:.*]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 0, %[[BB2_LR_PH]] ]
+; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ undef, %[[BB2_LR_PH]] ]
+; CHECK-NEXT: br label %[[BB2:.*]]
+; CHECK: [[BB2]]:
+; CHECK-NEXT: [[_TMP132:%.*]] = phi i16 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[_TMP10:%.*]], %[[BB2]] ]
+; CHECK-NEXT: [[_TMP133:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], %[[SCALAR_PH]] ], [ [[_TMP10]], %[[BB2]] ]
+; CHECK-NEXT: [[_TMP10]] = sub nsw i16 [[_TMP132]], 1
+; CHECK-NEXT: [[_TMP15:%.*]] = icmp ne i16 [[_TMP10]], 0
+; CHECK-NEXT: br i1 [[_TMP15]], label %[[BB2]], label %[[BB1_BB3_CRIT_EDGE]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK: [[BB1_BB3_CRIT_EDGE]]:
+; CHECK-NEXT: [[_TMP133_LCSSA1:%.*]] = phi i16 [ [[_TMP133]], %[[BB2]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], %[[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: [[_TMP133_LCSSA:%.*]] = phi i16 [ [[_TMP133]], %[[BB2]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], %[[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: ret void
+;
bb2.lr.ph:
br label %bb2
diff --git a/llvm/test/Transforms/LoopVectorize/pr59319-loop-access-info-invalidation.ll b/llvm/test/Transforms/LoopVectorize/pr59319-loop-access-info-invalidation.ll
index b6c72056b0c5c..611e6c184625c 100644
--- a/llvm/test/Transforms/LoopVectorize/pr59319-loop-access-info-invalidation.ll
+++ b/llvm/test/Transforms/LoopVectorize/pr59319-loop-access-info-invalidation.ll
@@ -24,11 +24,10 @@ define void @reduced(ptr %0, ptr %1, i64 %iv, ptr %2, i64 %iv76, i64 %iv93) {
; CHECK-NEXT: [[IND_ESCAPE:%.*]] = sub i64 [[IND_END]], 1
; CHECK-NEXT: br i1 [[CMP_N]], label [[LOOP_2_PREHEADER:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
-; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: br label [[LOOP_1:%.*]]
; CHECK: loop.1:
-; CHECK-NEXT: [[IV761:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT77:%.*]], [[LOOP_1]] ]
+; CHECK-NEXT: [[IV761:%.*]] = phi i64 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[IV_NEXT77:%.*]], [[LOOP_1]] ]
; CHECK-NEXT: [[IV4:%.*]] = phi i64 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_1]] ]
; CHECK-NEXT: [[IV_NEXT77]] = add i64 [[IV761]], 1
; CHECK-NEXT: [[ARRAYIDX_I_I50:%.*]] = getelementptr i32, ptr [[TMP0:%.*]], i64 [[IV76:%.*]]
@@ -49,20 +48,20 @@ define void @reduced(ptr %0, ptr %1, i64 %iv, ptr %2, i64 %iv76, i64 %iv93) {
; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[SCEVGEP2]], [[SCEVGEP]]
; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH5]], label [[VECTOR_PH6:%.*]]
-; CHECK: vector.ph6:
+; CHECK: vector.ph5:
; CHECK-NEXT: [[N_MOD_VF8:%.*]] = urem i64 [[TMP3]], 4
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF8]]
; CHECK-NEXT: br label [[VECTOR_BODY10:%.*]]
-; CHECK: vector.body9:
+; CHECK: vector.body8:
; CHECK-NEXT: [[INDEX12:%.*]] = phi i64 [ 0, [[VECTOR_PH6]] ], [ [[INDEX_NEXT13:%.*]], [[VECTOR_BODY10]] ]
-; CHECK-NEXT: store i32 0, ptr [[TMP1]], align 4, !alias.scope !4, !noalias !7
+; CHECK-NEXT: store i32 0, ptr [[TMP1]], align 4, !alias.scope [[META4:![0-9]+]], !noalias [[META7:![0-9]+]]
; CHECK-NEXT: [[INDEX_NEXT13]] = add nuw i64 [[INDEX12]], 4
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT13]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK12:%.*]], label [[VECTOR_BODY10]], !llvm.loop [[LOOP9:![0-9]+]]
-; CHECK: middle.block12:
+; CHECK: middle.block11:
; CHECK-NEXT: [[CMP_N10:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N10]], label [[LOOP_3_LR_PH:%.*]], label [[SCALAR_PH5]]
-; CHECK: scalar.ph4:
+; CHECK: scalar.ph3:
; CHECK-NEXT: [[BC_RESUME_VAL13:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK12]] ], [ 0, [[LOOP_2_PREHEADER]] ], [ 0, [[VECTOR_MEMCHECK]] ]
; CHECK-NEXT: br label [[LOOP_2:%.*]]
; CHECK: loop.3.lr.ph:
@@ -70,7 +69,7 @@ define void @reduced(ptr %0, ptr %1, i64 %iv, ptr %2, i64 %iv76, i64 %iv93) {
; CHECK-NEXT: [[ARRAYIDX_I_I62:%.*]] = getelementptr i32, ptr [[TMP0]], i64 [[IDXPROM_I_I61]]
; CHECK-NEXT: [[MIN_ITERS_CHECK22:%.*]] = icmp ult i64 [[TMP3]], 4
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK22]], label [[SCALAR_PH21:%.*]], label [[VECTOR_MEMCHECK15:%.*]]
-; CHECK: vector.memcheck15:
+; CHECK: vector.memcheck14:
; CHECK-NEXT: [[SCEVGEP15:%.*]] = getelementptr i8, ptr [[TMP1]], i64 4
; CHECK-NEXT: [[TMP12:%.*]] = shl nuw nsw i64 [[IDXPROM_I_I61]], 2
; CHECK-NEXT: [[TMP13:%.*]] = add nuw nsw i64 [[TMP12]], 4
@@ -79,20 +78,20 @@ define void @reduced(ptr %0, ptr %1, i64 %iv, ptr %2, i64 %iv76, i64 %iv93) {
; CHECK-NEXT: [[BOUND118:%.*]] = icmp ult ptr [[ARRAYIDX_I_I62]], [[SCEVGEP15]]
; CHECK-NEXT: [[FOUND_CONFLICT19:%.*]] = and i1 [[BOUND017]], [[BOUND118]]
; CHECK-NEXT: br i1 [[FOUND_CONFLICT19]], label [[SCALAR_PH21]], label [[VECTOR_PH24:%.*]]
-; CHECK: vector.ph23:
+; CHECK: vector.ph22:
; CHECK-NEXT: [[N_MOD_VF24:%.*]] = urem i64 [[TMP3]], 4
; CHECK-NEXT: [[N_VEC25:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF24]]
; CHECK-NEXT: br label [[VECTOR_BODY27:%.*]]
-; CHECK: vector.body26:
+; CHECK: vector.body25:
; CHECK-NEXT: [[INDEX29:%.*]] = phi i64 [ 0, [[VECTOR_PH24]] ], [ [[INDEX_NEXT29:%.*]], [[VECTOR_BODY27]] ]
; CHECK-NEXT: store i32 0, ptr [[TMP1]], align 4, !alias.scope [[META10:![0-9]+]], !noalias [[META13:![0-9]+]]
; CHECK-NEXT: [[INDEX_NEXT29]] = add nuw i64 [[INDEX29]], 4
; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT29]], [[N_VEC25]]
; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK29:%.*]], label [[VECTOR_BODY27]], !llvm.loop [[LOOP15:![0-9]+]]
-; CHECK: middle.block29:
+; CHECK: middle.block28:
; CHECK-NEXT: [[CMP_N27:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC25]]
; CHECK-NEXT: br i1 [[CMP_N27]], label [[LOOP_CLEANUP:%.*]], label [[SCALAR_PH21]]
-; CHECK: scalar.ph21:
+; CHECK: scalar.ph20:
; CHECK-NEXT: [[BC_RESUME_VAL26:%.*]] = phi i64 [ [[N_VEC25]], [[MIDDLE_BLOCK29]] ], [ 0, [[LOOP_3_LR_PH]] ], [ 0, [[VECTOR_MEMCHECK15]] ]
; CHECK-NEXT: br label [[LOOP_3:%.*]]
; CHECK: loop.2:
diff --git a/llvm/test/Transforms/LoopVectorize/reverse_induction.ll b/llvm/test/Transforms/LoopVectorize/reverse_induction.ll
index cea16c9eb7513..7ba11eb788a61 100644
--- a/llvm/test/Transforms/LoopVectorize/reverse_induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/reverse_induction.ll
@@ -40,12 +40,11 @@ define i32 @reverse_induction_i64(i64 %startval, ptr %ptr) {
; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[STARTVAL]], %[[ENTRY]] ]
; CHECK-NEXT: [[BC_RESUME_VAL4:%.*]] = phi i32 [ 0, %[[ENTRY]] ]
-; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, %[[ENTRY]] ]
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
; CHECK: [[FOR_BODY]]:
; CHECK-NEXT: [[ADD_I7:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[ADD_I:%.*]], %[[FOR_BODY]] ]
; CHECK-NEXT: [[I_06:%.*]] = phi i32 [ [[BC_RESUME_VAL4]], %[[SCALAR_PH]] ], [ [[INC4:%.*]], %[[FOR_BODY]] ]
-; CHECK-NEXT: [[REDUX5:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[INC_REDUX:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[REDUX5:%.*]] = phi i32 [ [[BC_RESUME_VAL4]], %[[SCALAR_PH]] ], [ [[INC_REDUX:%.*]], %[[FOR_BODY]] ]
; CHECK-NEXT: [[ADD_I]] = add i64 [[ADD_I7]], -1
; CHECK-NEXT: [[KIND__I:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i64 [[ADD_I]]
; CHECK-NEXT: [[TMP_I1:%.*]] = load i32, ptr [[KIND__I]], align 4
@@ -111,12 +110,11 @@ define i32 @reverse_induction_i128(i128 %startval, ptr %ptr) {
; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i128 [ [[STARTVAL]], %[[ENTRY]] ]
; CHECK-NEXT: [[BC_RESUME_VAL4:%.*]] = phi i32 [ 0, %[[ENTRY]] ]
-; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, %[[ENTRY]] ]
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
; CHECK: [[FOR_BODY]]:
; CHECK-NEXT: [[ADD_I7:%.*]] = phi i128 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[ADD_I:%.*]], %[[FOR_BODY]] ]
; CHECK-NEXT: [[I_06:%.*]] = phi i32 [ [[BC_RESUME_VAL4]], %[[SCALAR_PH]] ], [ [[INC4:%.*]], %[[FOR_BODY]] ]
-; CHECK-NEXT: [[REDUX5:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[INC_REDUX:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[REDUX5:%.*]] = phi i32 [ [[BC_RESUME_VAL4]], %[[SCALAR_PH]] ], [ [[INC_REDUX:%.*]], %[[FOR_BODY]] ]
; CHECK-NEXT: [[ADD_I]] = add i128 [[ADD_I7]], -1
; CHECK-NEXT: [[KIND__I:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i128 [[ADD_I]]
; CHECK-NEXT: [[TMP_I1:%.*]] = load i32, ptr [[KIND__I]], align 4
@@ -192,12 +190,11 @@ define i32 @reverse_induction_i16(i16 %startval, ptr %ptr) {
; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ [[STARTVAL]], %[[ENTRY]] ], [ [[STARTVAL]], %[[VECTOR_SCEVCHECK]] ]
; CHECK-NEXT: [[BC_RESUME_VAL4:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ]
-; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ]
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
; CHECK: [[FOR_BODY]]:
; CHECK-NEXT: [[ADD_I7:%.*]] = phi i16 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[ADD_I:%.*]], %[[FOR_BODY]] ]
; CHECK-NEXT: [[I_06:%.*]] = phi i32 [ [[BC_RESUME_VAL4]], %[[SCALAR_PH]] ], [ [[INC4:%.*]], %[[FOR_BODY]] ]
-; CHECK-NEXT: [[REDUX5:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[INC_REDUX:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[REDUX5:%.*]] = phi i32 [ [[BC_RESUME_VAL4]], %[[SCALAR_PH]] ], [ [[INC_REDUX:%.*]], %[[FOR_BODY]] ]
; CHECK-NEXT: [[ADD_I]] = add i16 [[ADD_I7]], -1
; CHECK-NEXT: [[KIND__I:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i16 [[ADD_I]]
; CHECK-NEXT: [[TMP_I1:%.*]] = load i32, ptr [[KIND__I]], align 4
diff --git a/llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll b/llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll
index 842ff910c89d3..bfc8e8d843c90 100644
--- a/llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll
+++ b/llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll
@@ -183,10 +183,10 @@ define void @multiple_incoming_phi_with_blend_mask(i64 %a, ptr noalias %dst) {
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_IND1:%.*]] = phi <2 x i16> [ <i16 0, i16 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND3:%.*]] = phi <2 x i16> [ <i16 0, i16 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT4:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_IND2:%.*]] = phi <2 x i16> [ <i16 0, i16 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT5:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt <2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
-; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP1]], <2 x i16> [[VEC_IND3]], <2 x i16> [[VEC_IND1]]
+; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP1]], <2 x i16> [[VEC_IND2]], <2 x i16> [[VEC_IND3]]
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i16> [[PREDPHI]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [32 x i16], ptr @src, i16 0, i16 [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i16> [[PREDPHI]], i32 1
@@ -199,8 +199,8 @@ define void @multiple_incoming_phi_with_blend_mask(i64 %a, ptr noalias %dst) {
; CHECK-NEXT: store <2 x i16> [[TMP9]], ptr [[TMP10]], align 2
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
-; CHECK-NEXT: [[VEC_IND_NEXT2]] = add <2 x i16> [[VEC_IND1]], splat (i16 2)
; CHECK-NEXT: [[VEC_IND_NEXT4]] = add <2 x i16> [[VEC_IND3]], splat (i16 2)
+; CHECK-NEXT: [[VEC_IND_NEXT5]] = add <2 x i16> [[VEC_IND2]], splat (i16 2)
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32
; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; CHECK: middle.block:
diff --git a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-branch-weights.ll b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-branch-weights.ll
index 38dbbbb21583a..a9118da233e33 100644
--- a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-branch-weights.ll
+++ b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-branch-weights.ll
@@ -60,9 +60,8 @@ define void @test_tc_between_8_and_17(ptr %A, i64 range(i64 8, 17) %N) {
; VF8UF2-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x i8>, ptr [[TMP2]], align 1
; VF8UF2-NEXT: [[TMP3:%.*]] = add nsw <8 x i8> [[WIDE_LOAD]], splat (i8 10)
; VF8UF2-NEXT: [[TMP4:%.*]] = add nsw <8 x i8> [[WIDE_LOAD1]], splat (i8 10)
-; VF8UF2-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[A]], i32 8
; VF8UF2-NEXT: store <8 x i8> [[TMP3]], ptr [[A]], align 1
-; VF8UF2-NEXT: store <8 x i8> [[TMP4]], ptr [[TMP6]], align 1
+; VF8UF2-NEXT: store <8 x i8> [[TMP4]], ptr [[TMP2]], align 1
; VF8UF2-NEXT: br label %[[MIDDLE_BLOCK:.*]]
; VF8UF2: [[MIDDLE_BLOCK]]:
; VF8UF2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
diff --git a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll
index 59c76aefbb90f..92dc3964ce8ee 100644
--- a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll
+++ b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll
@@ -66,9 +66,8 @@ define void @test_tc_less_than_16(ptr %A, i64 %N) {
; VF8UF2-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x i8>, ptr [[TMP3]], align 1
; VF8UF2-NEXT: [[TMP4:%.*]] = add nsw <8 x i8> [[WIDE_LOAD]], splat (i8 10)
; VF8UF2-NEXT: [[TMP5:%.*]] = add nsw <8 x i8> [[WIDE_LOAD1]], splat (i8 10)
-; VF8UF2-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[A]], i32 8
; VF8UF2-NEXT: store <8 x i8> [[TMP4]], ptr [[A]], align 1
-; VF8UF2-NEXT: store <8 x i8> [[TMP5]], ptr [[TMP7]], align 1
+; VF8UF2-NEXT: store <8 x i8> [[TMP5]], ptr [[TMP3]], align 1
; VF8UF2-NEXT: br label %[[MIDDLE_BLOCK:.*]]
; VF8UF2: [[MIDDLE_BLOCK]]:
; VF8UF2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[AND]], [[N_VEC]]
@@ -713,9 +712,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF8UF1-NEXT: [[TMP4:%.*]] = extractelement <8 x i1> [[TMP3]], i32 0
; VF8UF1-NEXT: br i1 [[TMP4]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; VF8UF1: [[PRED_STORE_IF]]:
-; VF8UF1-NEXT: [[TMP5:%.*]] = mul i64 0, [[STEP]]
+; VF8UF1-NEXT: [[TMP5:%.*]] = mul i64 0, [[TMP1]]
; VF8UF1-NEXT: [[TMP6:%.*]] = add i64 0, [[TMP5]]
-; VF8UF1-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], [[STEP]]
+; VF8UF1-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], [[TMP1]]
; VF8UF1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP7]]
; VF8UF1-NEXT: store i8 0, ptr [[TMP8]], align 1
; VF8UF1-NEXT: br label %[[PRED_STORE_CONTINUE]]
@@ -723,9 +722,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF8UF1-NEXT: [[TMP9:%.*]] = extractelement <8 x i1> [[TMP3]], i32 1
; VF8UF1-NEXT: br i1 [[TMP9]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]]
; VF8UF1: [[PRED_STORE_IF1]]:
-; VF8UF1-NEXT: [[TMP10:%.*]] = mul i64 1, [[STEP]]
+; VF8UF1-NEXT: [[TMP10:%.*]] = mul i64 1, [[TMP1]]
; VF8UF1-NEXT: [[TMP11:%.*]] = add i64 0, [[TMP10]]
-; VF8UF1-NEXT: [[TMP12:%.*]] = add i64 [[TMP11]], [[STEP]]
+; VF8UF1-NEXT: [[TMP12:%.*]] = add i64 [[TMP11]], [[TMP1]]
; VF8UF1-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP12]]
; VF8UF1-NEXT: store i8 0, ptr [[TMP13]], align 1
; VF8UF1-NEXT: br label %[[PRED_STORE_CONTINUE2]]
@@ -733,9 +732,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF8UF1-NEXT: [[TMP14:%.*]] = extractelement <8 x i1> [[TMP3]], i32 2
; VF8UF1-NEXT: br i1 [[TMP14]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]]
; VF8UF1: [[PRED_STORE_IF3]]:
-; VF8UF1-NEXT: [[TMP15:%.*]] = mul i64 2, [[STEP]]
+; VF8UF1-NEXT: [[TMP15:%.*]] = mul i64 2, [[TMP1]]
; VF8UF1-NEXT: [[TMP16:%.*]] = add i64 0, [[TMP15]]
-; VF8UF1-NEXT: [[TMP17:%.*]] = add i64 [[TMP16]], [[STEP]]
+; VF8UF1-NEXT: [[TMP17:%.*]] = add i64 [[TMP16]], [[TMP1]]
; VF8UF1-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP17]]
; VF8UF1-NEXT: store i8 0, ptr [[TMP18]], align 1
; VF8UF1-NEXT: br label %[[PRED_STORE_CONTINUE4]]
@@ -743,9 +742,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF8UF1-NEXT: [[TMP19:%.*]] = extractelement <8 x i1> [[TMP3]], i32 3
; VF8UF1-NEXT: br i1 [[TMP19]], label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6:.*]]
; VF8UF1: [[PRED_STORE_IF5]]:
-; VF8UF1-NEXT: [[TMP20:%.*]] = mul i64 3, [[STEP]]
+; VF8UF1-NEXT: [[TMP20:%.*]] = mul i64 3, [[TMP1]]
; VF8UF1-NEXT: [[TMP21:%.*]] = add i64 0, [[TMP20]]
-; VF8UF1-NEXT: [[TMP22:%.*]] = add i64 [[TMP21]], [[STEP]]
+; VF8UF1-NEXT: [[TMP22:%.*]] = add i64 [[TMP21]], [[TMP1]]
; VF8UF1-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP22]]
; VF8UF1-NEXT: store i8 0, ptr [[TMP23]], align 1
; VF8UF1-NEXT: br label %[[PRED_STORE_CONTINUE6]]
@@ -753,9 +752,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF8UF1-NEXT: [[TMP24:%.*]] = extractelement <8 x i1> [[TMP3]], i32 4
; VF8UF1-NEXT: br i1 [[TMP24]], label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]]
; VF8UF1: [[PRED_STORE_IF7]]:
-; VF8UF1-NEXT: [[TMP25:%.*]] = mul i64 4, [[STEP]]
+; VF8UF1-NEXT: [[TMP25:%.*]] = mul i64 4, [[TMP1]]
; VF8UF1-NEXT: [[TMP26:%.*]] = add i64 0, [[TMP25]]
-; VF8UF1-NEXT: [[TMP27:%.*]] = add i64 [[TMP26]], [[STEP]]
+; VF8UF1-NEXT: [[TMP27:%.*]] = add i64 [[TMP26]], [[TMP1]]
; VF8UF1-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP27]]
; VF8UF1-NEXT: store i8 0, ptr [[TMP28]], align 1
; VF8UF1-NEXT: br label %[[PRED_STORE_CONTINUE8]]
@@ -763,9 +762,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF8UF1-NEXT: [[TMP29:%.*]] = extractelement <8 x i1> [[TMP3]], i32 5
; VF8UF1-NEXT: br i1 [[TMP29]], label %[[PRED_STORE_IF9:.*]], label %[[PRED_STORE_CONTINUE10:.*]]
; VF8UF1: [[PRED_STORE_IF9]]:
-; VF8UF1-NEXT: [[TMP30:%.*]] = mul i64 5, [[STEP]]
+; VF8UF1-NEXT: [[TMP30:%.*]] = mul i64 5, [[TMP1]]
; VF8UF1-NEXT: [[TMP31:%.*]] = add i64 0, [[TMP30]]
-; VF8UF1-NEXT: [[TMP32:%.*]] = add i64 [[TMP31]], [[STEP]]
+; VF8UF1-NEXT: [[TMP32:%.*]] = add i64 [[TMP31]], [[TMP1]]
; VF8UF1-NEXT: [[TMP33:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP32]]
; VF8UF1-NEXT: store i8 0, ptr [[TMP33]], align 1
; VF8UF1-NEXT: br label %[[PRED_STORE_CONTINUE10]]
@@ -773,9 +772,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF8UF1-NEXT: [[TMP34:%.*]] = extractelement <8 x i1> [[TMP3]], i32 6
; VF8UF1-NEXT: br i1 [[TMP34]], label %[[PRED_STORE_IF11:.*]], label %[[PRED_STORE_CONTINUE12:.*]]
; VF8UF1: [[PRED_STORE_IF11]]:
-; VF8UF1-NEXT: [[TMP35:%.*]] = mul i64 6, [[STEP]]
+; VF8UF1-NEXT: [[TMP35:%.*]] = mul i64 6, [[TMP1]]
; VF8UF1-NEXT: [[TMP36:%.*]] = add i64 0, [[TMP35]]
-; VF8UF1-NEXT: [[TMP37:%.*]] = add i64 [[TMP36]], [[STEP]]
+; VF8UF1-NEXT: [[TMP37:%.*]] = add i64 [[TMP36]], [[TMP1]]
; VF8UF1-NEXT: [[TMP38:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP37]]
; VF8UF1-NEXT: store i8 0, ptr [[TMP38]], align 1
; VF8UF1-NEXT: br label %[[PRED_STORE_CONTINUE12]]
@@ -783,9 +782,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF8UF1-NEXT: [[TMP39:%.*]] = extractelement <8 x i1> [[TMP3]], i32 7
; VF8UF1-NEXT: br i1 [[TMP39]], label %[[PRED_STORE_IF13:.*]], label %[[PRED_STORE_CONTINUE14:.*]]
; VF8UF1: [[PRED_STORE_IF13]]:
-; VF8UF1-NEXT: [[TMP40:%.*]] = mul i64 7, [[STEP]]
+; VF8UF1-NEXT: [[TMP40:%.*]] = mul i64 7, [[TMP1]]
; VF8UF1-NEXT: [[TMP41:%.*]] = add i64 0, [[TMP40]]
-; VF8UF1-NEXT: [[TMP42:%.*]] = add i64 [[TMP41]], [[STEP]]
+; VF8UF1-NEXT: [[TMP42:%.*]] = add i64 [[TMP41]], [[TMP1]]
; VF8UF1-NEXT: [[TMP43:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP42]]
; VF8UF1-NEXT: store i8 0, ptr [[TMP43]], align 1
; VF8UF1-NEXT: br label %[[PRED_STORE_CONTINUE14]]
@@ -830,9 +829,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF8UF2-NEXT: [[TMP5:%.*]] = extractelement <8 x i1> [[TMP3]], i32 0
; VF8UF2-NEXT: br i1 [[TMP5]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; VF8UF2: [[PRED_STORE_IF]]:
-; VF8UF2-NEXT: [[TMP6:%.*]] = mul i64 0, [[STEP]]
+; VF8UF2-NEXT: [[TMP6:%.*]] = mul i64 0, [[TMP1]]
; VF8UF2-NEXT: [[TMP7:%.*]] = add i64 0, [[TMP6]]
-; VF8UF2-NEXT: [[TMP8:%.*]] = add i64 [[TMP7]], [[STEP]]
+; VF8UF2-NEXT: [[TMP8:%.*]] = add i64 [[TMP7]], [[TMP1]]
; VF8UF2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP8]]
; VF8UF2-NEXT: store i8 0, ptr [[TMP9]], align 1
; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE]]
@@ -840,9 +839,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF8UF2-NEXT: [[TMP10:%.*]] = extractelement <8 x i1> [[TMP3]], i32 1
; VF8UF2-NEXT: br i1 [[TMP10]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]]
; VF8UF2: [[PRED_STORE_IF1]]:
-; VF8UF2-NEXT: [[TMP11:%.*]] = mul i64 1, [[STEP]]
+; VF8UF2-NEXT: [[TMP11:%.*]] = mul i64 1, [[TMP1]]
; VF8UF2-NEXT: [[TMP12:%.*]] = add i64 0, [[TMP11]]
-; VF8UF2-NEXT: [[TMP13:%.*]] = add i64 [[TMP12]], [[STEP]]
+; VF8UF2-NEXT: [[TMP13:%.*]] = add i64 [[TMP12]], [[TMP1]]
; VF8UF2-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP13]]
; VF8UF2-NEXT: store i8 0, ptr [[TMP14]], align 1
; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE2]]
@@ -850,9 +849,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF8UF2-NEXT: [[TMP15:%.*]] = extractelement <8 x i1> [[TMP3]], i32 2
; VF8UF2-NEXT: br i1 [[TMP15]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]]
; VF8UF2: [[PRED_STORE_IF3]]:
-; VF8UF2-NEXT: [[TMP16:%.*]] = mul i64 2, [[STEP]]
+; VF8UF2-NEXT: [[TMP16:%.*]] = mul i64 2, [[TMP1]]
; VF8UF2-NEXT: [[TMP17:%.*]] = add i64 0, [[TMP16]]
-; VF8UF2-NEXT: [[TMP18:%.*]] = add i64 [[TMP17]], [[STEP]]
+; VF8UF2-NEXT: [[TMP18:%.*]] = add i64 [[TMP17]], [[TMP1]]
; VF8UF2-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP18]]
; VF8UF2-NEXT: store i8 0, ptr [[TMP19]], align 1
; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE4]]
@@ -860,9 +859,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF8UF2-NEXT: [[TMP20:%.*]] = extractelement <8 x i1> [[TMP3]], i32 3
; VF8UF2-NEXT: br i1 [[TMP20]], label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6:.*]]
; VF8UF2: [[PRED_STORE_IF5]]:
-; VF8UF2-NEXT: [[TMP21:%.*]] = mul i64 3, [[STEP]]
+; VF8UF2-NEXT: [[TMP21:%.*]] = mul i64 3, [[TMP1]]
; VF8UF2-NEXT: [[TMP22:%.*]] = add i64 0, [[TMP21]]
-; VF8UF2-NEXT: [[TMP23:%.*]] = add i64 [[TMP22]], [[STEP]]
+; VF8UF2-NEXT: [[TMP23:%.*]] = add i64 [[TMP22]], [[TMP1]]
; VF8UF2-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP23]]
; VF8UF2-NEXT: store i8 0, ptr [[TMP24]], align 1
; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE6]]
@@ -870,9 +869,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF8UF2-NEXT: [[TMP25:%.*]] = extractelement <8 x i1> [[TMP3]], i32 4
; VF8UF2-NEXT: br i1 [[TMP25]], label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]]
; VF8UF2: [[PRED_STORE_IF7]]:
-; VF8UF2-NEXT: [[TMP26:%.*]] = mul i64 4, [[STEP]]
+; VF8UF2-NEXT: [[TMP26:%.*]] = mul i64 4, [[TMP1]]
; VF8UF2-NEXT: [[TMP27:%.*]] = add i64 0, [[TMP26]]
-; VF8UF2-NEXT: [[TMP28:%.*]] = add i64 [[TMP27]], [[STEP]]
+; VF8UF2-NEXT: [[TMP28:%.*]] = add i64 [[TMP27]], [[TMP1]]
; VF8UF2-NEXT: [[TMP29:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP28]]
; VF8UF2-NEXT: store i8 0, ptr [[TMP29]], align 1
; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE8]]
@@ -880,9 +879,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF8UF2-NEXT: [[TMP30:%.*]] = extractelement <8 x i1> [[TMP3]], i32 5
; VF8UF2-NEXT: br i1 [[TMP30]], label %[[PRED_STORE_IF9:.*]], label %[[PRED_STORE_CONTINUE10:.*]]
; VF8UF2: [[PRED_STORE_IF9]]:
-; VF8UF2-NEXT: [[TMP31:%.*]] = mul i64 5, [[STEP]]
+; VF8UF2-NEXT: [[TMP31:%.*]] = mul i64 5, [[TMP1]]
; VF8UF2-NEXT: [[TMP32:%.*]] = add i64 0, [[TMP31]]
-; VF8UF2-NEXT: [[TMP33:%.*]] = add i64 [[TMP32]], [[STEP]]
+; VF8UF2-NEXT: [[TMP33:%.*]] = add i64 [[TMP32]], [[TMP1]]
; VF8UF2-NEXT: [[TMP34:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP33]]
; VF8UF2-NEXT: store i8 0, ptr [[TMP34]], align 1
; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE10]]
@@ -890,9 +889,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF8UF2-NEXT: [[TMP35:%.*]] = extractelement <8 x i1> [[TMP3]], i32 6
; VF8UF2-NEXT: br i1 [[TMP35]], label %[[PRED_STORE_IF11:.*]], label %[[PRED_STORE_CONTINUE12:.*]]
; VF8UF2: [[PRED_STORE_IF11]]:
-; VF8UF2-NEXT: [[TMP36:%.*]] = mul i64 6, [[STEP]]
+; VF8UF2-NEXT: [[TMP36:%.*]] = mul i64 6, [[TMP1]]
; VF8UF2-NEXT: [[TMP37:%.*]] = add i64 0, [[TMP36]]
-; VF8UF2-NEXT: [[TMP38:%.*]] = add i64 [[TMP37]], [[STEP]]
+; VF8UF2-NEXT: [[TMP38:%.*]] = add i64 [[TMP37]], [[TMP1]]
; VF8UF2-NEXT: [[TMP39:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP38]]
; VF8UF2-NEXT: store i8 0, ptr [[TMP39]], align 1
; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE12]]
@@ -900,9 +899,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF8UF2-NEXT: [[TMP40:%.*]] = extractelement <8 x i1> [[TMP3]], i32 7
; VF8UF2-NEXT: br i1 [[TMP40]], label %[[PRED_STORE_IF13:.*]], label %[[PRED_STORE_CONTINUE14:.*]]
; VF8UF2: [[PRED_STORE_IF13]]:
-; VF8UF2-NEXT: [[TMP41:%.*]] = mul i64 7, [[STEP]]
+; VF8UF2-NEXT: [[TMP41:%.*]] = mul i64 7, [[TMP1]]
; VF8UF2-NEXT: [[TMP42:%.*]] = add i64 0, [[TMP41]]
-; VF8UF2-NEXT: [[TMP43:%.*]] = add i64 [[TMP42]], [[STEP]]
+; VF8UF2-NEXT: [[TMP43:%.*]] = add i64 [[TMP42]], [[TMP1]]
; VF8UF2-NEXT: [[TMP44:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP43]]
; VF8UF2-NEXT: store i8 0, ptr [[TMP44]], align 1
; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE14]]
@@ -910,9 +909,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF8UF2-NEXT: [[TMP45:%.*]] = extractelement <8 x i1> [[TMP4]], i32 0
; VF8UF2-NEXT: br i1 [[TMP45]], label %[[PRED_STORE_IF15:.*]], label %[[PRED_STORE_CONTINUE16:.*]]
; VF8UF2: [[PRED_STORE_IF15]]:
-; VF8UF2-NEXT: [[TMP46:%.*]] = mul i64 8, [[STEP]]
+; VF8UF2-NEXT: [[TMP46:%.*]] = mul i64 8, [[TMP1]]
; VF8UF2-NEXT: [[TMP47:%.*]] = add i64 0, [[TMP46]]
-; VF8UF2-NEXT: [[TMP48:%.*]] = add i64 [[TMP47]], [[STEP]]
+; VF8UF2-NEXT: [[TMP48:%.*]] = add i64 [[TMP47]], [[TMP1]]
; VF8UF2-NEXT: [[TMP49:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP48]]
; VF8UF2-NEXT: store i8 0, ptr [[TMP49]], align 1
; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE16]]
@@ -920,9 +919,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF8UF2-NEXT: [[TMP50:%.*]] = extractelement <8 x i1> [[TMP4]], i32 1
; VF8UF2-NEXT: br i1 [[TMP50]], label %[[PRED_STORE_IF17:.*]], label %[[PRED_STORE_CONTINUE18:.*]]
; VF8UF2: [[PRED_STORE_IF17]]:
-; VF8UF2-NEXT: [[TMP51:%.*]] = mul i64 9, [[STEP]]
+; VF8UF2-NEXT: [[TMP51:%.*]] = mul i64 9, [[TMP1]]
; VF8UF2-NEXT: [[TMP52:%.*]] = add i64 0, [[TMP51]]
-; VF8UF2-NEXT: [[TMP53:%.*]] = add i64 [[TMP52]], [[STEP]]
+; VF8UF2-NEXT: [[TMP53:%.*]] = add i64 [[TMP52]], [[TMP1]]
; VF8UF2-NEXT: [[TMP54:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP53]]
; VF8UF2-NEXT: store i8 0, ptr [[TMP54]], align 1
; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE18]]
@@ -930,9 +929,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF8UF2-NEXT: [[TMP55:%.*]] = extractelement <8 x i1> [[TMP4]], i32 2
; VF8UF2-NEXT: br i1 [[TMP55]], label %[[PRED_STORE_IF19:.*]], label %[[PRED_STORE_CONTINUE20:.*]]
; VF8UF2: [[PRED_STORE_IF19]]:
-; VF8UF2-NEXT: [[TMP56:%.*]] = mul i64 10, [[STEP]]
+; VF8UF2-NEXT: [[TMP56:%.*]] = mul i64 10, [[TMP1]]
; VF8UF2-NEXT: [[TMP57:%.*]] = add i64 0, [[TMP56]]
-; VF8UF2-NEXT: [[TMP58:%.*]] = add i64 [[TMP57]], [[STEP]]
+; VF8UF2-NEXT: [[TMP58:%.*]] = add i64 [[TMP57]], [[TMP1]]
; VF8UF2-NEXT: [[TMP59:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP58]]
; VF8UF2-NEXT: store i8 0, ptr [[TMP59]], align 1
; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE20]]
@@ -940,9 +939,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF8UF2-NEXT: [[TMP60:%.*]] = extractelement <8 x i1> [[TMP4]], i32 3
; VF8UF2-NEXT: br i1 [[TMP60]], label %[[PRED_STORE_IF21:.*]], label %[[PRED_STORE_CONTINUE22:.*]]
; VF8UF2: [[PRED_STORE_IF21]]:
-; VF8UF2-NEXT: [[TMP61:%.*]] = mul i64 11, [[STEP]]
+; VF8UF2-NEXT: [[TMP61:%.*]] = mul i64 11, [[TMP1]]
; VF8UF2-NEXT: [[TMP62:%.*]] = add i64 0, [[TMP61]]
-; VF8UF2-NEXT: [[TMP63:%.*]] = add i64 [[TMP62]], [[STEP]]
+; VF8UF2-NEXT: [[TMP63:%.*]] = add i64 [[TMP62]], [[TMP1]]
; VF8UF2-NEXT: [[TMP64:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP63]]
; VF8UF2-NEXT: store i8 0, ptr [[TMP64]], align 1
; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE22]]
@@ -950,9 +949,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF8UF2-NEXT: [[TMP65:%.*]] = extractelement <8 x i1> [[TMP4]], i32 4
; VF8UF2-NEXT: br i1 [[TMP65]], label %[[PRED_STORE_IF23:.*]], label %[[PRED_STORE_CONTINUE24:.*]]
; VF8UF2: [[PRED_STORE_IF23]]:
-; VF8UF2-NEXT: [[TMP66:%.*]] = mul i64 12, [[STEP]]
+; VF8UF2-NEXT: [[TMP66:%.*]] = mul i64 12, [[TMP1]]
; VF8UF2-NEXT: [[TMP67:%.*]] = add i64 0, [[TMP66]]
-; VF8UF2-NEXT: [[TMP68:%.*]] = add i64 [[TMP67]], [[STEP]]
+; VF8UF2-NEXT: [[TMP68:%.*]] = add i64 [[TMP67]], [[TMP1]]
; VF8UF2-NEXT: [[TMP69:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP68]]
; VF8UF2-NEXT: store i8 0, ptr [[TMP69]], align 1
; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE24]]
@@ -960,9 +959,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF8UF2-NEXT: [[TMP70:%.*]] = extractelement <8 x i1> [[TMP4]], i32 5
; VF8UF2-NEXT: br i1 [[TMP70]], label %[[PRED_STORE_IF25:.*]], label %[[PRED_STORE_CONTINUE26:.*]]
; VF8UF2: [[PRED_STORE_IF25]]:
-; VF8UF2-NEXT: [[TMP71:%.*]] = mul i64 13, [[STEP]]
+; VF8UF2-NEXT: [[TMP71:%.*]] = mul i64 13, [[TMP1]]
; VF8UF2-NEXT: [[TMP72:%.*]] = add i64 0, [[TMP71]]
-; VF8UF2-NEXT: [[TMP73:%.*]] = add i64 [[TMP72]], [[STEP]]
+; VF8UF2-NEXT: [[TMP73:%.*]] = add i64 [[TMP72]], [[TMP1]]
; VF8UF2-NEXT: [[TMP74:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP73]]
; VF8UF2-NEXT: store i8 0, ptr [[TMP74]], align 1
; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE26]]
@@ -970,9 +969,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF8UF2-NEXT: [[TMP75:%.*]] = extractelement <8 x i1> [[TMP4]], i32 6
; VF8UF2-NEXT: br i1 [[TMP75]], label %[[PRED_STORE_IF27:.*]], label %[[PRED_STORE_CONTINUE28:.*]]
; VF8UF2: [[PRED_STORE_IF27]]:
-; VF8UF2-NEXT: [[TMP76:%.*]] = mul i64 14, [[STEP]]
+; VF8UF2-NEXT: [[TMP76:%.*]] = mul i64 14, [[TMP1]]
; VF8UF2-NEXT: [[TMP77:%.*]] = add i64 0, [[TMP76]]
-; VF8UF2-NEXT: [[TMP78:%.*]] = add i64 [[TMP77]], [[STEP]]
+; VF8UF2-NEXT: [[TMP78:%.*]] = add i64 [[TMP77]], [[TMP1]]
; VF8UF2-NEXT: [[TMP79:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP78]]
; VF8UF2-NEXT: store i8 0, ptr [[TMP79]], align 1
; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE28]]
@@ -980,9 +979,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF8UF2-NEXT: [[TMP80:%.*]] = extractelement <8 x i1> [[TMP4]], i32 7
; VF8UF2-NEXT: br i1 [[TMP80]], label %[[PRED_STORE_IF29:.*]], label %[[PRED_STORE_CONTINUE30:.*]]
; VF8UF2: [[PRED_STORE_IF29]]:
-; VF8UF2-NEXT: [[TMP81:%.*]] = mul i64 15, [[STEP]]
+; VF8UF2-NEXT: [[TMP81:%.*]] = mul i64 15, [[TMP1]]
; VF8UF2-NEXT: [[TMP82:%.*]] = add i64 0, [[TMP81]]
-; VF8UF2-NEXT: [[TMP83:%.*]] = add i64 [[TMP82]], [[STEP]]
+; VF8UF2-NEXT: [[TMP83:%.*]] = add i64 [[TMP82]], [[TMP1]]
; VF8UF2-NEXT: [[TMP84:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP83]]
; VF8UF2-NEXT: store i8 0, ptr [[TMP84]], align 1
; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE30]]
@@ -1026,9 +1025,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF16UF1-NEXT: [[TMP4:%.*]] = extractelement <16 x i1> [[TMP3]], i32 0
; VF16UF1-NEXT: br i1 [[TMP4]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; VF16UF1: [[PRED_STORE_IF]]:
-; VF16UF1-NEXT: [[TMP5:%.*]] = mul i64 0, [[STEP]]
+; VF16UF1-NEXT: [[TMP5:%.*]] = mul i64 0, [[TMP1]]
; VF16UF1-NEXT: [[TMP6:%.*]] = add i64 0, [[TMP5]]
-; VF16UF1-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], [[STEP]]
+; VF16UF1-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], [[TMP1]]
; VF16UF1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP7]]
; VF16UF1-NEXT: store i8 0, ptr [[TMP8]], align 1
; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE]]
@@ -1036,9 +1035,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF16UF1-NEXT: [[TMP9:%.*]] = extractelement <16 x i1> [[TMP3]], i32 1
; VF16UF1-NEXT: br i1 [[TMP9]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]]
; VF16UF1: [[PRED_STORE_IF1]]:
-; VF16UF1-NEXT: [[TMP10:%.*]] = mul i64 1, [[STEP]]
+; VF16UF1-NEXT: [[TMP10:%.*]] = mul i64 1, [[TMP1]]
; VF16UF1-NEXT: [[TMP11:%.*]] = add i64 0, [[TMP10]]
-; VF16UF1-NEXT: [[TMP12:%.*]] = add i64 [[TMP11]], [[STEP]]
+; VF16UF1-NEXT: [[TMP12:%.*]] = add i64 [[TMP11]], [[TMP1]]
; VF16UF1-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP12]]
; VF16UF1-NEXT: store i8 0, ptr [[TMP13]], align 1
; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE2]]
@@ -1046,9 +1045,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF16UF1-NEXT: [[TMP14:%.*]] = extractelement <16 x i1> [[TMP3]], i32 2
; VF16UF1-NEXT: br i1 [[TMP14]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]]
; VF16UF1: [[PRED_STORE_IF3]]:
-; VF16UF1-NEXT: [[TMP15:%.*]] = mul i64 2, [[STEP]]
+; VF16UF1-NEXT: [[TMP15:%.*]] = mul i64 2, [[TMP1]]
; VF16UF1-NEXT: [[TMP16:%.*]] = add i64 0, [[TMP15]]
-; VF16UF1-NEXT: [[TMP17:%.*]] = add i64 [[TMP16]], [[STEP]]
+; VF16UF1-NEXT: [[TMP17:%.*]] = add i64 [[TMP16]], [[TMP1]]
; VF16UF1-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP17]]
; VF16UF1-NEXT: store i8 0, ptr [[TMP18]], align 1
; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE4]]
@@ -1056,9 +1055,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF16UF1-NEXT: [[TMP19:%.*]] = extractelement <16 x i1> [[TMP3]], i32 3
; VF16UF1-NEXT: br i1 [[TMP19]], label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6:.*]]
; VF16UF1: [[PRED_STORE_IF5]]:
-; VF16UF1-NEXT: [[TMP20:%.*]] = mul i64 3, [[STEP]]
+; VF16UF1-NEXT: [[TMP20:%.*]] = mul i64 3, [[TMP1]]
; VF16UF1-NEXT: [[TMP21:%.*]] = add i64 0, [[TMP20]]
-; VF16UF1-NEXT: [[TMP22:%.*]] = add i64 [[TMP21]], [[STEP]]
+; VF16UF1-NEXT: [[TMP22:%.*]] = add i64 [[TMP21]], [[TMP1]]
; VF16UF1-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP22]]
; VF16UF1-NEXT: store i8 0, ptr [[TMP23]], align 1
; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE6]]
@@ -1066,9 +1065,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF16UF1-NEXT: [[TMP24:%.*]] = extractelement <16 x i1> [[TMP3]], i32 4
; VF16UF1-NEXT: br i1 [[TMP24]], label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]]
; VF16UF1: [[PRED_STORE_IF7]]:
-; VF16UF1-NEXT: [[TMP25:%.*]] = mul i64 4, [[STEP]]
+; VF16UF1-NEXT: [[TMP25:%.*]] = mul i64 4, [[TMP1]]
; VF16UF1-NEXT: [[TMP26:%.*]] = add i64 0, [[TMP25]]
-; VF16UF1-NEXT: [[TMP27:%.*]] = add i64 [[TMP26]], [[STEP]]
+; VF16UF1-NEXT: [[TMP27:%.*]] = add i64 [[TMP26]], [[TMP1]]
; VF16UF1-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP27]]
; VF16UF1-NEXT: store i8 0, ptr [[TMP28]], align 1
; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE8]]
@@ -1076,9 +1075,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF16UF1-NEXT: [[TMP29:%.*]] = extractelement <16 x i1> [[TMP3]], i32 5
; VF16UF1-NEXT: br i1 [[TMP29]], label %[[PRED_STORE_IF9:.*]], label %[[PRED_STORE_CONTINUE10:.*]]
; VF16UF1: [[PRED_STORE_IF9]]:
-; VF16UF1-NEXT: [[TMP30:%.*]] = mul i64 5, [[STEP]]
+; VF16UF1-NEXT: [[TMP30:%.*]] = mul i64 5, [[TMP1]]
; VF16UF1-NEXT: [[TMP31:%.*]] = add i64 0, [[TMP30]]
-; VF16UF1-NEXT: [[TMP32:%.*]] = add i64 [[TMP31]], [[STEP]]
+; VF16UF1-NEXT: [[TMP32:%.*]] = add i64 [[TMP31]], [[TMP1]]
; VF16UF1-NEXT: [[TMP33:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP32]]
; VF16UF1-NEXT: store i8 0, ptr [[TMP33]], align 1
; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE10]]
@@ -1086,9 +1085,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF16UF1-NEXT: [[TMP34:%.*]] = extractelement <16 x i1> [[TMP3]], i32 6
; VF16UF1-NEXT: br i1 [[TMP34]], label %[[PRED_STORE_IF11:.*]], label %[[PRED_STORE_CONTINUE12:.*]]
; VF16UF1: [[PRED_STORE_IF11]]:
-; VF16UF1-NEXT: [[TMP35:%.*]] = mul i64 6, [[STEP]]
+; VF16UF1-NEXT: [[TMP35:%.*]] = mul i64 6, [[TMP1]]
; VF16UF1-NEXT: [[TMP36:%.*]] = add i64 0, [[TMP35]]
-; VF16UF1-NEXT: [[TMP37:%.*]] = add i64 [[TMP36]], [[STEP]]
+; VF16UF1-NEXT: [[TMP37:%.*]] = add i64 [[TMP36]], [[TMP1]]
; VF16UF1-NEXT: [[TMP38:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP37]]
; VF16UF1-NEXT: store i8 0, ptr [[TMP38]], align 1
; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE12]]
@@ -1096,9 +1095,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF16UF1-NEXT: [[TMP39:%.*]] = extractelement <16 x i1> [[TMP3]], i32 7
; VF16UF1-NEXT: br i1 [[TMP39]], label %[[PRED_STORE_IF13:.*]], label %[[PRED_STORE_CONTINUE14:.*]]
; VF16UF1: [[PRED_STORE_IF13]]:
-; VF16UF1-NEXT: [[TMP40:%.*]] = mul i64 7, [[STEP]]
+; VF16UF1-NEXT: [[TMP40:%.*]] = mul i64 7, [[TMP1]]
; VF16UF1-NEXT: [[TMP41:%.*]] = add i64 0, [[TMP40]]
-; VF16UF1-NEXT: [[TMP42:%.*]] = add i64 [[TMP41]], [[STEP]]
+; VF16UF1-NEXT: [[TMP42:%.*]] = add i64 [[TMP41]], [[TMP1]]
; VF16UF1-NEXT: [[TMP43:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP42]]
; VF16UF1-NEXT: store i8 0, ptr [[TMP43]], align 1
; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE14]]
@@ -1106,9 +1105,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF16UF1-NEXT: [[TMP44:%.*]] = extractelement <16 x i1> [[TMP3]], i32 8
; VF16UF1-NEXT: br i1 [[TMP44]], label %[[PRED_STORE_IF15:.*]], label %[[PRED_STORE_CONTINUE16:.*]]
; VF16UF1: [[PRED_STORE_IF15]]:
-; VF16UF1-NEXT: [[TMP45:%.*]] = mul i64 8, [[STEP]]
+; VF16UF1-NEXT: [[TMP45:%.*]] = mul i64 8, [[TMP1]]
; VF16UF1-NEXT: [[TMP46:%.*]] = add i64 0, [[TMP45]]
-; VF16UF1-NEXT: [[TMP47:%.*]] = add i64 [[TMP46]], [[STEP]]
+; VF16UF1-NEXT: [[TMP47:%.*]] = add i64 [[TMP46]], [[TMP1]]
; VF16UF1-NEXT: [[TMP48:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP47]]
; VF16UF1-NEXT: store i8 0, ptr [[TMP48]], align 1
; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE16]]
@@ -1116,9 +1115,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF16UF1-NEXT: [[TMP49:%.*]] = extractelement <16 x i1> [[TMP3]], i32 9
; VF16UF1-NEXT: br i1 [[TMP49]], label %[[PRED_STORE_IF17:.*]], label %[[PRED_STORE_CONTINUE18:.*]]
; VF16UF1: [[PRED_STORE_IF17]]:
-; VF16UF1-NEXT: [[TMP50:%.*]] = mul i64 9, [[STEP]]
+; VF16UF1-NEXT: [[TMP50:%.*]] = mul i64 9, [[TMP1]]
; VF16UF1-NEXT: [[TMP51:%.*]] = add i64 0, [[TMP50]]
-; VF16UF1-NEXT: [[TMP52:%.*]] = add i64 [[TMP51]], [[STEP]]
+; VF16UF1-NEXT: [[TMP52:%.*]] = add i64 [[TMP51]], [[TMP1]]
; VF16UF1-NEXT: [[TMP53:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP52]]
; VF16UF1-NEXT: store i8 0, ptr [[TMP53]], align 1
; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE18]]
@@ -1126,9 +1125,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF16UF1-NEXT: [[TMP54:%.*]] = extractelement <16 x i1> [[TMP3]], i32 10
; VF16UF1-NEXT: br i1 [[TMP54]], label %[[PRED_STORE_IF19:.*]], label %[[PRED_STORE_CONTINUE20:.*]]
; VF16UF1: [[PRED_STORE_IF19]]:
-; VF16UF1-NEXT: [[TMP55:%.*]] = mul i64 10, [[STEP]]
+; VF16UF1-NEXT: [[TMP55:%.*]] = mul i64 10, [[TMP1]]
; VF16UF1-NEXT: [[TMP56:%.*]] = add i64 0, [[TMP55]]
-; VF16UF1-NEXT: [[TMP57:%.*]] = add i64 [[TMP56]], [[STEP]]
+; VF16UF1-NEXT: [[TMP57:%.*]] = add i64 [[TMP56]], [[TMP1]]
; VF16UF1-NEXT: [[TMP58:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP57]]
; VF16UF1-NEXT: store i8 0, ptr [[TMP58]], align 1
; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE20]]
@@ -1136,9 +1135,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF16UF1-NEXT: [[TMP59:%.*]] = extractelement <16 x i1> [[TMP3]], i32 11
; VF16UF1-NEXT: br i1 [[TMP59]], label %[[PRED_STORE_IF21:.*]], label %[[PRED_STORE_CONTINUE22:.*]]
; VF16UF1: [[PRED_STORE_IF21]]:
-; VF16UF1-NEXT: [[TMP60:%.*]] = mul i64 11, [[STEP]]
+; VF16UF1-NEXT: [[TMP60:%.*]] = mul i64 11, [[TMP1]]
; VF16UF1-NEXT: [[TMP61:%.*]] = add i64 0, [[TMP60]]
-; VF16UF1-NEXT: [[TMP62:%.*]] = add i64 [[TMP61]], [[STEP]]
+; VF16UF1-NEXT: [[TMP62:%.*]] = add i64 [[TMP61]], [[TMP1]]
; VF16UF1-NEXT: [[TMP63:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP62]]
; VF16UF1-NEXT: store i8 0, ptr [[TMP63]], align 1
; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE22]]
@@ -1146,9 +1145,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF16UF1-NEXT: [[TMP64:%.*]] = extractelement <16 x i1> [[TMP3]], i32 12
; VF16UF1-NEXT: br i1 [[TMP64]], label %[[PRED_STORE_IF23:.*]], label %[[PRED_STORE_CONTINUE24:.*]]
; VF16UF1: [[PRED_STORE_IF23]]:
-; VF16UF1-NEXT: [[TMP65:%.*]] = mul i64 12, [[STEP]]
+; VF16UF1-NEXT: [[TMP65:%.*]] = mul i64 12, [[TMP1]]
; VF16UF1-NEXT: [[TMP66:%.*]] = add i64 0, [[TMP65]]
-; VF16UF1-NEXT: [[TMP67:%.*]] = add i64 [[TMP66]], [[STEP]]
+; VF16UF1-NEXT: [[TMP67:%.*]] = add i64 [[TMP66]], [[TMP1]]
; VF16UF1-NEXT: [[TMP68:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP67]]
; VF16UF1-NEXT: store i8 0, ptr [[TMP68]], align 1
; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE24]]
@@ -1156,9 +1155,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF16UF1-NEXT: [[TMP69:%.*]] = extractelement <16 x i1> [[TMP3]], i32 13
; VF16UF1-NEXT: br i1 [[TMP69]], label %[[PRED_STORE_IF25:.*]], label %[[PRED_STORE_CONTINUE26:.*]]
; VF16UF1: [[PRED_STORE_IF25]]:
-; VF16UF1-NEXT: [[TMP70:%.*]] = mul i64 13, [[STEP]]
+; VF16UF1-NEXT: [[TMP70:%.*]] = mul i64 13, [[TMP1]]
; VF16UF1-NEXT: [[TMP71:%.*]] = add i64 0, [[TMP70]]
-; VF16UF1-NEXT: [[TMP72:%.*]] = add i64 [[TMP71]], [[STEP]]
+; VF16UF1-NEXT: [[TMP72:%.*]] = add i64 [[TMP71]], [[TMP1]]
; VF16UF1-NEXT: [[TMP73:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP72]]
; VF16UF1-NEXT: store i8 0, ptr [[TMP73]], align 1
; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE26]]
@@ -1166,9 +1165,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF16UF1-NEXT: [[TMP74:%.*]] = extractelement <16 x i1> [[TMP3]], i32 14
; VF16UF1-NEXT: br i1 [[TMP74]], label %[[PRED_STORE_IF27:.*]], label %[[PRED_STORE_CONTINUE28:.*]]
; VF16UF1: [[PRED_STORE_IF27]]:
-; VF16UF1-NEXT: [[TMP75:%.*]] = mul i64 14, [[STEP]]
+; VF16UF1-NEXT: [[TMP75:%.*]] = mul i64 14, [[TMP1]]
; VF16UF1-NEXT: [[TMP76:%.*]] = add i64 0, [[TMP75]]
-; VF16UF1-NEXT: [[TMP77:%.*]] = add i64 [[TMP76]], [[STEP]]
+; VF16UF1-NEXT: [[TMP77:%.*]] = add i64 [[TMP76]], [[TMP1]]
; VF16UF1-NEXT: [[TMP78:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP77]]
; VF16UF1-NEXT: store i8 0, ptr [[TMP78]], align 1
; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE28]]
@@ -1176,9 +1175,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF16UF1-NEXT: [[TMP79:%.*]] = extractelement <16 x i1> [[TMP3]], i32 15
; VF16UF1-NEXT: br i1 [[TMP79]], label %[[PRED_STORE_IF29:.*]], label %[[PRED_STORE_CONTINUE30:.*]]
; VF16UF1: [[PRED_STORE_IF29]]:
-; VF16UF1-NEXT: [[TMP80:%.*]] = mul i64 15, [[STEP]]
+; VF16UF1-NEXT: [[TMP80:%.*]] = mul i64 15, [[TMP1]]
; VF16UF1-NEXT: [[TMP81:%.*]] = add i64 0, [[TMP80]]
-; VF16UF1-NEXT: [[TMP82:%.*]] = add i64 [[TMP81]], [[STEP]]
+; VF16UF1-NEXT: [[TMP82:%.*]] = add i64 [[TMP81]], [[TMP1]]
; VF16UF1-NEXT: [[TMP83:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP82]]
; VF16UF1-NEXT: store i8 0, ptr [[TMP83]], align 1
; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE30]]
diff --git a/llvm/test/Transforms/LoopVectorize/version-stride-with-integer-casts.ll b/llvm/test/Transforms/LoopVectorize/version-stride-with-integer-casts.ll
index 28739471eac2f..76d5c5299686e 100644
--- a/llvm/test/Transforms/LoopVectorize/version-stride-with-integer-casts.ll
+++ b/llvm/test/Transforms/LoopVectorize/version-stride-with-integer-casts.ll
@@ -417,11 +417,11 @@ define void @zext_of_i1_stride(i1 %g, ptr %dst) mustprogress {
; CHECK: vector.ph:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP1]], 4
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP1]], [[N_MOD_VF]]
-; CHECK-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], [[G_64]]
+; CHECK-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], [[TMP1]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], [[G_64]]
+; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], [[TMP1]]
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, ptr [[DST]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: store <4 x i16> splat (i16 1), ptr [[TMP4]], align 2
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
@@ -479,7 +479,7 @@ define void @sext_of_i1_stride(i1 %g, ptr %dst) mustprogress {
; CHECK: vector.ph:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 4
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
-; CHECK-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], [[G_64]]
+; CHECK-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], [[TMP2]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[DST]], i32 0
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll
index d85638733211c..623d6c9208d11 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll
@@ -82,9 +82,8 @@ define void @test_tc_less_than_16(ptr %A, i64 %N) {
; CHECK-NEXT: WIDEN ir<%l>.1 = load vp<[[VPTR2]]>
; CHECK-NEXT: WIDEN ir<%add> = add nsw ir<%l>, ir<10>
; CHECK-NEXT: WIDEN ir<%add>.1 = add nsw ir<%l>.1, ir<10>
-; CHECK-NEXT: vp<[[VPTR4:%.+]]> = vector-pointer ir<%A>, ir<1>
; CHECK-NEXT: WIDEN store ir<%A>, ir<%add>
-; CHECK-NEXT: WIDEN store vp<[[VPTR4]]>, ir<%add>.1
+; CHECK-NEXT: WIDEN store vp<[[VPTR3:%.*]]>, ir<%add>.1
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
More information about the llvm-commits
mailing list