[llvm] [VPlan] Introduce CSE pass (PR #151872)
Ramkumar Ramachandra via llvm-commits
llvm-commits at lists.llvm.org
Sun Aug 3 16:26:23 PDT 2025
https://github.com/artagnon updated https://github.com/llvm/llvm-project/pull/151872
>From a95d0cd4a78e32dd23e3f35d9112485620044cd3 Mon Sep 17 00:00:00 2001
From: Pedro Lobo <pedro.lobo at tecnico.ulisboa.pt>
Date: Sun, 3 Aug 2025 16:10:20 +0100
Subject: [PATCH 1/3] [LV] Pre-commit test for cse_bitcast
---
.../LoopVectorize/X86/cse-bitcast.ll | 80 +++++++++++++++++++
1 file changed, 80 insertions(+)
create mode 100644 llvm/test/Transforms/LoopVectorize/X86/cse-bitcast.ll
diff --git a/llvm/test/Transforms/LoopVectorize/X86/cse-bitcast.ll b/llvm/test/Transforms/LoopVectorize/X86/cse-bitcast.ll
new file mode 100644
index 0000000000000..1480b3b7a36e1
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/X86/cse-bitcast.ll
@@ -0,0 +1,80 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --filter-out-after "^scalar.ph" --version 5
+; RUN: opt %s -passes=loop-vectorize -S | FileCheck %s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @cse_bitcast(i16 %val, half %fval, ptr %p, i64 %n) {
+; CHECK-LABEL: define void @cse_bitcast(
+; CHECK-SAME: i16 [[VAL:%.*]], half [[FVAL:%.*]], ptr [[P:%.*]], i64 [[N:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 60
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
+; CHECK: [[VECTOR_SCEVCHECK]]:
+; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1
+; CHECK-NEXT: [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[TMP0]])
+; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i64, i1 } [[MUL]], 0
+; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i64, i1 } [[MUL]], 1
+; CHECK-NEXT: [[TMP1:%.*]] = sub i64 0, [[MUL_RESULT]]
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[P]], i64 [[MUL_RESULT]]
+; CHECK-NEXT: [[TMP3:%.*]] = icmp ult ptr [[TMP2]], [[P]]
+; CHECK-NEXT: [[TMP4:%.*]] = or i1 [[TMP3]], [[MUL_OVERFLOW]]
+; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[P]], i64 2
+; CHECK-NEXT: [[MUL1:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[TMP0]])
+; CHECK-NEXT: [[MUL_RESULT2:%.*]] = extractvalue { i64, i1 } [[MUL1]], 0
+; CHECK-NEXT: [[MUL_OVERFLOW3:%.*]] = extractvalue { i64, i1 } [[MUL1]], 1
+; CHECK-NEXT: [[TMP5:%.*]] = sub i64 0, [[MUL_RESULT2]]
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[SCEVGEP]], i64 [[MUL_RESULT2]]
+; CHECK-NEXT: [[TMP7:%.*]] = icmp ult ptr [[TMP6]], [[SCEVGEP]]
+; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP7]], [[MUL_OVERFLOW3]]
+; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP4]], [[TMP8]]
+; CHECK-NEXT: br i1 [[TMP9]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 8
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
+; CHECK-NEXT: [[TMP10:%.*]] = mul i64 [[N_VEC]], 4
+; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[P]], i64 [[TMP10]]
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i16> poison, i16 [[VAL]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT]], <4 x i16> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <4 x half> poison, half [[FVAL]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT5:%.*]] = shufflevector <4 x half> [[BROADCAST_SPLATINSERT4]], <4 x half> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 4
+; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[OFFSET_IDX]], 16
+; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[P]], i64 [[OFFSET_IDX]]
+; CHECK-NEXT: [[NEXT_GEP6:%.*]] = getelementptr i8, ptr [[P]], i64 [[TMP12]]
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast <4 x i16> [[BROADCAST_SPLAT]] to <4 x half>
+; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x half> [[TMP13]], <4 x half> [[BROADCAST_SPLAT5]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x half> [[TMP14]], <8 x half> poison, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
+; CHECK-NEXT: store <8 x half> [[INTERLEAVED_VEC]], ptr [[NEXT_GEP]], align 2
+; CHECK-NEXT: [[TMP15:%.*]] = bitcast <4 x i16> [[BROADCAST_SPLAT]] to <4 x half>
+; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <4 x half> [[TMP15]], <4 x half> [[BROADCAST_SPLAT5]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[INTERLEAVED_VEC7:%.*]] = shufflevector <8 x half> [[TMP16]], <8 x half> poison, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
+; CHECK-NEXT: store <8 x half> [[INTERLEAVED_VEC7]], ptr [[NEXT_GEP6]], align 2
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
+; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[CMP_N]], [[END:label %.*]], label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+;
+entry:
+ br label %loop
+
+loop:
+ %res = phi ptr [ %p, %entry ], [ %res.next, %loop ]
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+ store i16 %val, ptr %res
+ %res.offset = getelementptr i16, ptr %res, i64 1
+ store half %fval, ptr %res.offset
+ %res.next = getelementptr i16, ptr %res, i64 2
+ %iv.next = add i64 %iv, 1
+ %exit.cond = icmp eq i64 %iv.next, %n
+ br i1 %exit.cond, label %end, label %loop
+
+end:
+ ret void
+}
>From 771fefc7fb3f94062d43e268f61ff1fd96f50172 Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Sun, 3 Aug 2025 14:16:11 +0100
Subject: [PATCH 2/3] [VPlan] Introduce CSE pass
---
.../Transforms/Vectorize/LoopVectorize.cpp | 1 +
llvm/lib/Transforms/Vectorize/VPlan.cpp | 32 ++++
.../Transforms/Vectorize/VPlanTransforms.cpp | 44 +++++
.../Transforms/Vectorize/VPlanTransforms.h | 4 +
llvm/lib/Transforms/Vectorize/VPlanUtils.h | 10 ++
llvm/lib/Transforms/Vectorize/VPlanValue.h | 7 +
.../epilog-vectorization-widen-inductions.ll | 5 +-
.../extractvalue-no-scalarization-required.ll | 105 +++++++----
.../AArch64/force-target-instruction-cost.ll | 3 +-
.../LoopVectorize/AArch64/induction-costs.ll | 3 +-
.../AArch64/interleave-with-gaps.ll | 12 +-
.../LoopVectorize/AArch64/licm-calls.ll | 3 +-
.../AArch64/partial-reduce-dot-product.ll | 23 ++-
.../AArch64/reduction-recurrence-costs-sve.ll | 6 +-
.../LoopVectorize/AArch64/sve-epilog-vect.ll | 32 ++--
.../AArch64/sve-vscale-based-trip-counts.ll | 43 ++---
.../AArch64/sve-widen-extractvalue.ll | 56 ++++--
.../LoopVectorize/ARM/mve-reduction-types.ll | 15 +-
.../LoopVectorize/ARM/pointer_iv.ll | 16 +-
.../LoopVectorize/RISCV/inloop-reduction.ll | 6 +-
.../LoopVectorize/RISCV/scalable-tailfold.ll | 5 +-
.../LoopVectorize/RISCV/strided-accesses.ll | 10 +-
...demanding-all-lanes-and-first-lane-only.ll | 17 +-
.../X86/cost-constant-known-via-scev.ll | 3 +-
.../LoopVectorize/X86/induction-costs.ll | 3 +-
.../LoopVectorize/X86/uniform_load.ll | 15 +-
.../LoopVectorize/X86/unroll-small-loops.ll | 11 +-
.../first-order-recurrence-complex.ll | 6 +-
...t-order-recurrence-multiply-recurrences.ll | 3 +-
.../LoopVectorize/first-order-recurrence.ll | 42 ++---
.../Transforms/LoopVectorize/induction.ll | 6 +-
.../interleave-with-i65-induction.ll | 3 +-
...aved-accesses-different-insert-position.ll | 3 +-
.../LoopVectorize/iv_outside_user.ll | 4 +-
.../Transforms/LoopVectorize/opaque-ptr.ll | 12 +-
.../LoopVectorize/pointer-induction.ll | 24 +--
llvm/test/Transforms/LoopVectorize/pr36983.ll | 36 +++-
.../pr59319-loop-access-info-invalidation.ll | 25 ++-
.../LoopVectorize/reverse_induction.ll | 9 +-
.../LoopVectorize/single-value-blend-phis.ll | 6 +-
...oop-backedge-elimination-branch-weights.ll | 9 +-
...or-loop-backedge-elimination-early-exit.ll | 14 +-
.../vector-loop-backedge-elimination.ll | 169 +++++++++---------
.../version-stride-with-integer-casts.ll | 6 +-
.../vplan-printing-before-execute.ll | 5 +-
.../AArch64/indvars-vectorization.ll | 4 +-
46 files changed, 491 insertions(+), 385 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 20528089c0008..2603171f0eee6 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7308,6 +7308,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
VPlanTransforms::narrowInterleaveGroups(
BestVPlan, BestVF,
TTI.getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector));
+ VPlanTransforms::cse(BestVPlan, *Legal->getWidestInductionType());
VPlanTransforms::removeDeadRecipes(BestVPlan);
VPlanTransforms::convertToConcreteRecipes(BestVPlan,
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 25b9616880bf4..2da5ab13f7b3d 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -122,6 +122,38 @@ void VPDef::dump() const {
}
#endif
+bool VPValue::isIdenticalTo(const VPValue *Other) const {
+ if (getVPValueID() != Other->getVPValueID() ||
+ hasDefiningRecipe() != Other->hasDefiningRecipe() ||
+ !getUnderlyingValue() != !Other->getUnderlyingValue())
+ return false;
+ Instruction *I = dyn_cast_or_null<Instruction>(getUnderlyingValue());
+ Instruction *OtherI =
+ dyn_cast_or_null<Instruction>(Other->getUnderlyingValue());
+ if (I && OtherI)
+ return I->getOpcode() == OtherI->getOpcode() &&
+ equal(I->operand_values(), OtherI->operand_values());
+ if (hasDefiningRecipe()) {
+ const VPRecipeBase *DefL = getDefiningRecipe();
+ const VPRecipeBase *DefR = Other->getDefiningRecipe();
+ return vputils::getOpcode(*DefL) == vputils::getOpcode(*DefR) &&
+ equal(DefL->operands(), DefR->operands());
+ }
+ return getUnderlyingValue() == Other->getUnderlyingValue();
+}
+
+hash_code llvm::hash_value(const VPValue &V) {
+ if (Instruction *I = dyn_cast_or_null<Instruction>(V.getUnderlyingValue()))
+ return hash_combine(I->getOpcode(),
+ hash_combine_range(I->operand_values()));
+ if (V.hasDefiningRecipe()) {
+ const VPRecipeBase *Def = V.getDefiningRecipe();
+ return hash_combine(vputils::getOpcode(*Def),
+ hash_combine_range(Def->operands()));
+ }
+ return hash_combine(V.getVPValueID(), V.getUnderlyingValue());
+}
+
VPRecipeBase *VPValue::getDefiningRecipe() {
return cast_or_null<VPRecipeBase>(Def);
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index fcbc86f5e4c58..0783505fdce9a 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1755,6 +1755,50 @@ void VPlanTransforms::clearReductionWrapFlags(VPlan &Plan) {
}
}
+/// Hash the underlying data of a VPSingleDefRecipe pointer, instead of hashing
+/// the pointer itself.
+namespace {
+struct CSEDenseMapInfo : public DenseMapInfo<VPSingleDefRecipe *> {
+ static unsigned getHashValue(const VPSingleDefRecipe *R) {
+ return hash_value(*R);
+ }
+
+ static bool isEqual(const VPSingleDefRecipe *LHS,
+ const VPSingleDefRecipe *RHS) {
+ if (LHS == getEmptyKey() || RHS == getEmptyKey() ||
+ LHS == getTombstoneKey() || RHS == getTombstoneKey())
+ return LHS == RHS;
+ return LHS->isIdenticalTo(RHS);
+ }
+};
+} // end anonymous namespace
+
+/// Perform a common-subexpression-elimination of VPSingleDefRecipes on the \p
+/// Plan.
+void VPlanTransforms::cse(VPlan &Plan, Type &CanonicalIVTy) {
+ DenseMap<VPSingleDefRecipe *, VPSingleDefRecipe *, CSEDenseMapInfo> CSEMap;
+ VPTypeAnalysis TypeInfo(&CanonicalIVTy);
+ // There is existing logic to sink instructions into replicate regions, and
+ // we'd be undoing that work if we went through replicate regions. Hence,
+ // don't CSE in replicate regions.
+ for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
+ vp_depth_first_shallow(Plan.getEntry()))) {
+ for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
+ auto *Def = dyn_cast<VPSingleDefRecipe>(&R);
+ if (!Def)
+ continue;
+ if (VPSingleDefRecipe *V = CSEMap.lookup(Def)) {
+ if (TypeInfo.inferScalarType(Def) != TypeInfo.inferScalarType(V))
+ continue;
+ Def->replaceAllUsesWith(V);
+ Def->eraseFromParent();
+ continue;
+ }
+ CSEMap[Def] = Def;
+ }
+ }
+}
+
/// Move loop-invariant recipes out of the vector loop region in \p Plan.
static void licm(VPlan &Plan) {
VPBasicBlock *Preheader = Plan.getVectorPreheader();
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index 880159f760922..f782a4e331091 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -240,6 +240,10 @@ struct VPlanTransforms {
/// removing dead edges to their successors.
static void removeBranchOnConst(VPlan &Plan);
+ /// Perform common-subexpression-elimination, which is best done after the \p
+ /// Plan is executed.
+ static void cse(VPlan &Plan, Type &CanonicalIVType);
+
/// If there's a single exit block, optimize its phi recipes that use exiting
/// IV values by feeding them precomputed end values instead, possibly taken
/// one step backwards.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.h b/llvm/lib/Transforms/Vectorize/VPlanUtils.h
index 8dcd57f1b3598..309fa1d785490 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUtils.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.h
@@ -10,6 +10,7 @@
#define LLVM_TRANSFORMS_VECTORIZE_VPLANUTILS_H
#include "VPlan.h"
+#include "llvm/ADT/TypeSwitch.h"
namespace llvm {
class ScalarEvolution;
@@ -37,6 +38,15 @@ VPValue *getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr,
/// SCEV expression could be constructed.
const SCEV *getSCEVExprForVPValue(VPValue *V, ScalarEvolution &SE);
+/// Get any instruction opcode data embedded in recipe \p R.
+inline std::optional<unsigned> getOpcode(const VPRecipeBase &R) {
+ return TypeSwitch<const VPRecipeBase *, std::optional<unsigned>>(&R)
+ .Case<VPInstruction, VPWidenRecipe, VPWidenCastRecipe,
+ VPWidenSelectRecipe, VPHistogramRecipe, VPPartialReductionRecipe,
+ VPReplicateRecipe>([](auto *I) { return I->getOpcode(); })
+ .Default([](auto *) { return std::nullopt; });
+}
+
/// Returns true if \p VPV is a single scalar, either because it produces the
/// same value for all lanes or only has its first lane used.
inline bool isSingleScalar(const VPValue *VPV) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h
index 24f6d61512ef6..05bb8a385fae6 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanValue.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h
@@ -185,8 +185,15 @@ class LLVM_ABI_FOR_TEST VPValue {
assert(!UnderlyingVal && "Underlying Value is already set.");
UnderlyingVal = Val;
}
+
+ // Equality of data.
+ bool isIdenticalTo(const VPValue *Other) const;
};
+// Hash method so VPValue can be de-duplicated in certain
+// contexts.
+hash_code hash_value(const VPValue &Arg);
+
typedef DenseMap<Value *, VPValue *> Value2VPValueTy;
typedef DenseMap<VPValue *, Value *> VPValue2ValueTy;
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll
index 5b15896da8d78..87b8c4af1e0c7 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll
@@ -330,11 +330,10 @@ define void @test_widen_induction_step_2(ptr %A, i64 %N, i32 %step) {
; CHECK-NEXT: [[CMP_N11:%.*]] = icmp eq i64 [[N]], [[IND_END]]
; CHECK-NEXT: br i1 [[CMP_N11]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
; CHECK: vec.epilog.scalar.ph:
-; CHECK-NEXT: [[BC_RESUME_VAL5:%.*]] = phi i64 [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END4]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ]
-; CHECK-NEXT: [[BC_RESUME_VAL6:%.*]] = phi i64 [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END4]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL6:%.*]] = phi i64 [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END4]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
-; CHECK-NEXT: [[IV_1:%.*]] = phi i64 [ [[BC_RESUME_VAL5]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_1_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[IV_1:%.*]] = phi i64 [ [[BC_RESUME_VAL6]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_1_NEXT:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[IV_2:%.*]] = phi i64 [ [[BC_RESUME_VAL6]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_1_NEXT]], [[LOOP]] ]
; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV_1]]
; CHECK-NEXT: [[ADD:%.*]] = add i64 [[IV_2]], 10
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll b/llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll
index 19f2a363a733b..bbdab4856273a 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5
; REQUIRES: asserts
; RUN: opt -passes=loop-vectorize -mtriple=arm64-apple-ios %s -S -debug -disable-output 2>&1 | FileCheck --check-prefix=CM %s
@@ -22,23 +23,42 @@
; Check that the extractvalue operands are actually free in vector code.
-; FORCED: [[E1:%.+]] = extractvalue { i64, i64 } %sv, 0
-; FORCED-NEXT: %broadcast.splatinsert = insertelement <2 x i64> poison, i64 [[E1]], i64 0
-; FORCED-NEXT: %broadcast.splat = shufflevector <2 x i64> %broadcast.splatinsert, <2 x i64> poison, <2 x i32> zeroinitializer
-; FORCED-NEXT: [[E2:%.+]] = extractvalue { i64, i64 } %sv, 1
-; FORCED-NEXT: %broadcast.splatinsert1 = insertelement <2 x i64> poison, i64 [[E2]], i64 0
-; FORCED-NEXT: %broadcast.splat2 = shufflevector <2 x i64> %broadcast.splatinsert1, <2 x i64> poison, <2 x i32> zeroinitializer
-; FORCED-NEXT: [[ADD:%.+]] = add <2 x i64> %broadcast.splat, %broadcast.splat2
-
-; FORCED-LABEL: vector.body: ; preds = %vector.body, %vector.ph
-; FORCED-NEXT: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-; FORCED-NEXT: [[GEP:%.+]] = getelementptr i64, ptr %dst, i32 %index
-; FORCED-NEXT: store <2 x i64> [[ADD]], ptr [[GEP]], align 4
-; FORCED-NEXT: %index.next = add nuw i32 %index, 2
-; FORCED-NEXT: [[C:%.+]] = icmp eq i32 %index.next, 1000
-; FORCED-NEXT: br i1 [[C]], label %middle.block, label %vector.body
-
define void @test1(ptr %dst, {i64, i64} %sv) {
+; FORCED-LABEL: define void @test1(
+; FORCED-SAME: ptr [[DST:%.*]], { i64, i64 } [[SV:%.*]]) {
+; FORCED-NEXT: [[ENTRY:.*]]:
+; FORCED-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; FORCED: [[VECTOR_PH]]:
+; FORCED-NEXT: [[TMP0:%.*]] = extractvalue { i64, i64 } [[SV]], 0
+; FORCED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP0]], i64 0
+; FORCED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
+; FORCED-NEXT: [[TMP1:%.*]] = add <2 x i64> [[BROADCAST_SPLAT]], [[BROADCAST_SPLAT]]
+; FORCED-NEXT: br label %[[VECTOR_BODY:.*]]
+; FORCED: [[VECTOR_BODY]]:
+; FORCED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; FORCED-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[DST]], i32 [[INDEX]]
+; FORCED-NEXT: store <2 x i64> [[TMP1]], ptr [[TMP2]], align 4
+; FORCED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
+; FORCED-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000
+; FORCED-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; FORCED: [[MIDDLE_BLOCK]]:
+; FORCED-NEXT: br label %[[EXIT:.*]]
+; FORCED: [[SCALAR_PH]]:
+; FORCED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, %[[ENTRY]] ]
+; FORCED-NEXT: br label %[[LOOP_BODY:.*]]
+; FORCED: [[LOOP_BODY]]:
+; FORCED-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_BODY]] ]
+; FORCED-NEXT: [[A:%.*]] = extractvalue { i64, i64 } [[SV]], 0
+; FORCED-NEXT: [[B:%.*]] = extractvalue { i64, i64 } [[SV]], 1
+; FORCED-NEXT: [[ADDR:%.*]] = getelementptr i64, ptr [[DST]], i32 [[IV]]
+; FORCED-NEXT: [[ADD:%.*]] = add i64 [[A]], [[B]]
+; FORCED-NEXT: store i64 [[ADD]], ptr [[ADDR]], align 4
+; FORCED-NEXT: [[IV_NEXT]] = add nsw i32 [[IV]], 1
+; FORCED-NEXT: [[COND:%.*]] = icmp ne i32 [[IV_NEXT]], 1000
+; FORCED-NEXT: br i1 [[COND]], label %[[LOOP_BODY]], label %[[EXIT]], !llvm.loop [[LOOP3:![0-9]+]]
+; FORCED: [[EXIT]]:
+; FORCED-NEXT: ret void
+;
entry:
br label %loop.body
@@ -70,25 +90,42 @@ declare float @powf(float, float) readnone nounwind
; CM: LV: Scalar loop costs: 14.
-; FORCED-LABEL: define void @test_getVectorCallCost
-
-; FORCED: [[E1:%.+]] = extractvalue { float, float } %sv, 0
-; FORCED-NEXT: %broadcast.splatinsert = insertelement <2 x float> poison, float [[E1]], i64 0
-; FORCED-NEXT: %broadcast.splat = shufflevector <2 x float> %broadcast.splatinsert, <2 x float> poison, <2 x i32> zeroinitializer
-; FORCED-NEXT: [[E2:%.+]] = extractvalue { float, float } %sv, 1
-; FORCED-NEXT: %broadcast.splatinsert1 = insertelement <2 x float> poison, float [[E2]], i64 0
-; FORCED-NEXT: %broadcast.splat2 = shufflevector <2 x float> %broadcast.splatinsert1, <2 x float> poison, <2 x i32> zeroinitializer
-
-; FORCED-LABEL: vector.body: ; preds = %vector.body, %vector.ph
-; FORCED-NEXT: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-; FORCED-NEXT: [[GEP1:%.+]] = getelementptr float, ptr %dst, i32 %index
-; FORCED-NEXT: [[POW:%.+]] = call <2 x float> @llvm.pow.v2f32(<2 x float> %broadcast.splat, <2 x float> %broadcast.splat2)
-; FORCED-NEXT: store <2 x float> [[POW]], ptr [[GEP1]], align 4
-; FORCED-NEXT: %index.next = add nuw i32 %index, 2
-; FORCED-NEXT: [[C:%.+]] = icmp eq i32 %index.next, 1000
-; FORCED-NEXT: br i1 [[C]], label %middle.block, label %vector.body
-
define void @test_getVectorCallCost(ptr %dst, {float, float} %sv) {
+; FORCED-LABEL: define void @test_getVectorCallCost(
+; FORCED-SAME: ptr [[DST:%.*]], { float, float } [[SV:%.*]]) {
+; FORCED-NEXT: [[ENTRY:.*]]:
+; FORCED-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; FORCED: [[VECTOR_PH]]:
+; FORCED-NEXT: [[TMP0:%.*]] = extractvalue { float, float } [[SV]], 0
+; FORCED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[TMP0]], i64 0
+; FORCED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer
+; FORCED-NEXT: br label %[[VECTOR_BODY:.*]]
+; FORCED: [[VECTOR_BODY]]:
+; FORCED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; FORCED-NEXT: [[TMP1:%.*]] = getelementptr float, ptr [[DST]], i32 [[INDEX]]
+; FORCED-NEXT: [[TMP2:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> [[BROADCAST_SPLAT]], <2 x float> [[BROADCAST_SPLAT]])
+; FORCED-NEXT: store <2 x float> [[TMP2]], ptr [[TMP1]], align 4
+; FORCED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
+; FORCED-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000
+; FORCED-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; FORCED: [[MIDDLE_BLOCK]]:
+; FORCED-NEXT: br label %[[EXIT:.*]]
+; FORCED: [[SCALAR_PH]]:
+; FORCED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, %[[ENTRY]] ]
+; FORCED-NEXT: br label %[[LOOP_BODY:.*]]
+; FORCED: [[LOOP_BODY]]:
+; FORCED-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_BODY]] ]
+; FORCED-NEXT: [[A:%.*]] = extractvalue { float, float } [[SV]], 0
+; FORCED-NEXT: [[B:%.*]] = extractvalue { float, float } [[SV]], 1
+; FORCED-NEXT: [[ADDR:%.*]] = getelementptr float, ptr [[DST]], i32 [[IV]]
+; FORCED-NEXT: [[P:%.*]] = call float @powf(float [[A]], float [[B]])
+; FORCED-NEXT: store float [[P]], ptr [[ADDR]], align 4
+; FORCED-NEXT: [[IV_NEXT]] = add nsw i32 [[IV]], 1
+; FORCED-NEXT: [[COND:%.*]] = icmp ne i32 [[IV_NEXT]], 1000
+; FORCED-NEXT: br i1 [[COND]], label %[[LOOP_BODY]], label %[[EXIT]], !llvm.loop [[LOOP5:![0-9]+]]
+; FORCED: [[EXIT]]:
+; FORCED-NEXT: ret void
+;
entry:
br label %loop.body
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll
index c9cef142e484c..ab8896fc21f43 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll
@@ -23,12 +23,11 @@ define double @test_reduction_costs() {
; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ]
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ]
-; CHECK-NEXT: [[BC_MERGE_RDX2:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ]
; CHECK-NEXT: br label %[[LOOP_1:.*]]
; CHECK: [[LOOP_1]]:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_1]] ]
; CHECK-NEXT: [[R_1:%.*]] = phi double [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[R_1_NEXT:%.*]], %[[LOOP_1]] ]
-; CHECK-NEXT: [[R_2:%.*]] = phi double [ [[BC_MERGE_RDX2]], %[[SCALAR_PH]] ], [ [[R_2_NEXT:%.*]], %[[LOOP_1]] ]
+; CHECK-NEXT: [[R_2:%.*]] = phi double [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[R_2_NEXT:%.*]], %[[LOOP_1]] ]
; CHECK-NEXT: [[R_1_NEXT]] = fadd double [[R_1]], 3.000000e+00
; CHECK-NEXT: [[R_2_NEXT]] = fadd double [[R_2]], 9.000000e+00
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll
index aa2ec2de14c29..26441518bc683 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll
@@ -294,7 +294,6 @@ define i64 @test_ptr_ivs_and_widened_ivs(ptr %src, i32 %N) {
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[N_VEC]], 4
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP3]]
-; CHECK-NEXT: [[IND_END1:%.*]] = trunc i64 [[N_VEC]] to i32
; CHECK-NEXT: [[IND_END3:%.*]] = trunc i64 [[N_VEC]] to i32
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
@@ -321,7 +320,7 @@ define i64 @test_ptr_ivs_and_widened_ivs(ptr %src, i32 %N) {
; CHECK: scalar.ph:
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[SRC]], [[ENTRY]] ]
-; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i32 [ [[IND_END1]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i32 [ [[IND_END3]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
; CHECK-NEXT: [[BC_RESUME_VAL4:%.*]] = phi i32 [ [[IND_END3]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/interleave-with-gaps.ll b/llvm/test/Transforms/LoopVectorize/AArch64/interleave-with-gaps.ll
index 649be65e8e671..7be18a39dfa8e 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/interleave-with-gaps.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/interleave-with-gaps.ll
@@ -19,7 +19,6 @@ define i64 @vector_loop_with_remaining_iterations(ptr %src, ptr noalias %dst, i3
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[X]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP2:%.*]] = call <16 x i32> @llvm.abs.v16i32(<16 x i32> [[BROADCAST_SPLAT]], i1 false)
; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i32> @llvm.abs.v16i32(<16 x i32> [[BROADCAST_SPLAT]], i1 false)
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
@@ -29,7 +28,7 @@ define i64 @vector_loop_with_remaining_iterations(ptr %src, ptr noalias %dst, i3
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <64 x i8>, ptr [[TMP4]], align 1
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <64 x i8> [[WIDE_VEC]], <64 x i8> poison, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28, i32 32, i32 36, i32 40, i32 44, i32 48, i32 52, i32 56, i32 60>
; CHECK-NEXT: [[TMP5:%.*]] = zext <16 x i8> [[STRIDED_VEC]] to <16 x i32>
-; CHECK-NEXT: [[TMP6:%.*]] = call <16 x i32> @llvm.umin.v16i32(<16 x i32> [[TMP2]], <16 x i32> [[TMP5]])
+; CHECK-NEXT: [[TMP6:%.*]] = call <16 x i32> @llvm.umin.v16i32(<16 x i32> [[TMP3]], <16 x i32> [[TMP5]])
; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i32> @llvm.umin.v16i32(<16 x i32> [[TMP3]], <16 x i32> [[TMP6]])
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[INDEX]]
; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr [[TMP8]], align 1
@@ -60,7 +59,6 @@ define i64 @vector_loop_with_remaining_iterations(ptr %src, ptr noalias %dst, i3
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[X]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 2 x i32> [[BROADCAST_SPLATINSERT1]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP22:%.*]] = insertelement <vscale x 2 x i64> zeroinitializer, i64 [[BC_MERGE_RDX]], i32 0
-; CHECK-NEXT: [[TMP23:%.*]] = call <vscale x 2 x i32> @llvm.abs.nxv2i32(<vscale x 2 x i32> [[BROADCAST_SPLAT2]], i1 false)
; CHECK-NEXT: [[TMP24:%.*]] = call <vscale x 2 x i32> @llvm.abs.nxv2i32(<vscale x 2 x i32> [[BROADCAST_SPLAT2]], i1 false)
; CHECK-NEXT: [[TMP25:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[BC_RESUME_VAL]], i64 0
@@ -78,7 +76,7 @@ define i64 @vector_loop_with_remaining_iterations(ptr %src, ptr noalias %dst, i3
; CHECK-NEXT: [[TMP28:%.*]] = getelementptr { [4 x i8] }, ptr [[SRC]], <vscale x 2 x i64> [[VEC_IND]], i32 0, i64 3
; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 2 x i8> @llvm.masked.gather.nxv2i8.nxv2p0(<vscale x 2 x ptr> [[TMP28]], i32 1, <vscale x 2 x i1> splat (i1 true), <vscale x 2 x i8> poison)
; CHECK-NEXT: [[TMP29:%.*]] = zext <vscale x 2 x i8> [[WIDE_MASKED_GATHER]] to <vscale x 2 x i32>
-; CHECK-NEXT: [[TMP30:%.*]] = call <vscale x 2 x i32> @llvm.umin.nxv2i32(<vscale x 2 x i32> [[TMP23]], <vscale x 2 x i32> [[TMP29]])
+; CHECK-NEXT: [[TMP30:%.*]] = call <vscale x 2 x i32> @llvm.umin.nxv2i32(<vscale x 2 x i32> [[TMP24]], <vscale x 2 x i32> [[TMP29]])
; CHECK-NEXT: [[TMP31:%.*]] = call <vscale x 2 x i32> @llvm.umin.nxv2i32(<vscale x 2 x i32> [[TMP24]], <vscale x 2 x i32> [[TMP30]])
; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[INDEX7]]
; CHECK-NEXT: store <vscale x 2 x i8> zeroinitializer, ptr [[TMP32]], align 1
@@ -157,7 +155,6 @@ define i64 @main_vector_loop_fixed_with_no_remaining_iterations(ptr %src, ptr no
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[X]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP2:%.*]] = call <16 x i32> @llvm.abs.v16i32(<16 x i32> [[BROADCAST_SPLAT]], i1 false)
; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i32> @llvm.abs.v16i32(<16 x i32> [[BROADCAST_SPLAT]], i1 false)
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
@@ -167,7 +164,7 @@ define i64 @main_vector_loop_fixed_with_no_remaining_iterations(ptr %src, ptr no
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <64 x i8>, ptr [[TMP4]], align 1
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <64 x i8> [[WIDE_VEC]], <64 x i8> poison, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28, i32 32, i32 36, i32 40, i32 44, i32 48, i32 52, i32 56, i32 60>
; CHECK-NEXT: [[TMP5:%.*]] = zext <16 x i8> [[STRIDED_VEC]] to <16 x i32>
-; CHECK-NEXT: [[TMP6:%.*]] = call <16 x i32> @llvm.umin.v16i32(<16 x i32> [[TMP2]], <16 x i32> [[TMP5]])
+; CHECK-NEXT: [[TMP6:%.*]] = call <16 x i32> @llvm.umin.v16i32(<16 x i32> [[TMP3]], <16 x i32> [[TMP5]])
; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i32> @llvm.umin.v16i32(<16 x i32> [[TMP3]], <16 x i32> [[TMP6]])
; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[INDEX]]
; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr [[TMP26]], align 1
@@ -198,7 +195,6 @@ define i64 @main_vector_loop_fixed_with_no_remaining_iterations(ptr %src, ptr no
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[X]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 2 x i32> [[BROADCAST_SPLATINSERT1]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP22:%.*]] = insertelement <vscale x 2 x i64> zeroinitializer, i64 [[BC_MERGE_RDX]], i32 0
-; CHECK-NEXT: [[TMP23:%.*]] = call <vscale x 2 x i32> @llvm.abs.nxv2i32(<vscale x 2 x i32> [[BROADCAST_SPLAT2]], i1 false)
; CHECK-NEXT: [[TMP24:%.*]] = call <vscale x 2 x i32> @llvm.abs.nxv2i32(<vscale x 2 x i32> [[BROADCAST_SPLAT2]], i1 false)
; CHECK-NEXT: [[TMP25:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[BC_RESUME_VAL]], i64 0
@@ -216,7 +212,7 @@ define i64 @main_vector_loop_fixed_with_no_remaining_iterations(ptr %src, ptr no
; CHECK-NEXT: [[TMP28:%.*]] = getelementptr { [4 x i8] }, ptr [[SRC]], <vscale x 2 x i64> [[VEC_IND]], i32 0, i64 3
; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 2 x i8> @llvm.masked.gather.nxv2i8.nxv2p0(<vscale x 2 x ptr> [[TMP28]], i32 1, <vscale x 2 x i1> splat (i1 true), <vscale x 2 x i8> poison)
; CHECK-NEXT: [[TMP29:%.*]] = zext <vscale x 2 x i8> [[WIDE_MASKED_GATHER]] to <vscale x 2 x i32>
-; CHECK-NEXT: [[TMP30:%.*]] = call <vscale x 2 x i32> @llvm.umin.nxv2i32(<vscale x 2 x i32> [[TMP23]], <vscale x 2 x i32> [[TMP29]])
+; CHECK-NEXT: [[TMP30:%.*]] = call <vscale x 2 x i32> @llvm.umin.nxv2i32(<vscale x 2 x i32> [[TMP24]], <vscale x 2 x i32> [[TMP29]])
; CHECK-NEXT: [[TMP31:%.*]] = call <vscale x 2 x i32> @llvm.umin.nxv2i32(<vscale x 2 x i32> [[TMP24]], <vscale x 2 x i32> [[TMP30]])
; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[INDEX7]]
; CHECK-NEXT: store <vscale x 2 x i8> zeroinitializer, ptr [[TMP32]], align 1
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/licm-calls.ll b/llvm/test/Transforms/LoopVectorize/AArch64/licm-calls.ll
index 8c4eba61b6ba2..09a1c17087af2 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/licm-calls.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/licm-calls.ll
@@ -9,9 +9,8 @@ define void @licm_replicate_call(double %x, ptr %dst) {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
-; CHECK-NEXT: [[TMP0:%.*]] = tail call double @llvm.pow.f64(double [[X]], double 3.000000e+00)
; CHECK-NEXT: [[TMP1:%.*]] = tail call double @llvm.pow.f64(double [[X]], double 3.000000e+00)
-; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[TMP0]], i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[TMP1]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[TMP1]], i32 1
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll
index 5218e64cddd80..7f511c59a46bd 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll
@@ -1003,10 +1003,6 @@ define i32 @not_dotp_not_phi(ptr %a, ptr %b) #0 {
; CHECK-INTERLEAVE1-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
; CHECK-INTERLEAVE1: middle.block:
-; CHECK-INTERLEAVE1-NEXT: [[TMP19:%.*]] = call i32 @llvm.vscale.i32()
-; CHECK-INTERLEAVE1-NEXT: [[TMP20:%.*]] = mul nuw i32 [[TMP19]], 8
-; CHECK-INTERLEAVE1-NEXT: [[TMP21:%.*]] = sub i32 [[TMP20]], 1
-; CHECK-INTERLEAVE1-NEXT: [[TMP22:%.*]] = extractelement <vscale x 8 x i32> [[TMP17]], i32 [[TMP21]]
; CHECK-INTERLEAVE1-NEXT: [[TMP23:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-INTERLEAVE1-NEXT: [[TMP24:%.*]] = mul nuw i32 [[TMP23]], 8
; CHECK-INTERLEAVE1-NEXT: [[TMP25:%.*]] = sub i32 [[TMP24]], 1
@@ -1049,10 +1045,6 @@ define i32 @not_dotp_not_phi(ptr %a, ptr %b) #0 {
; CHECK-INTERLEAVED-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-INTERLEAVED-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
; CHECK-INTERLEAVED: middle.block:
-; CHECK-INTERLEAVED-NEXT: [[TMP23:%.*]] = call i32 @llvm.vscale.i32()
-; CHECK-INTERLEAVED-NEXT: [[TMP31:%.*]] = mul nuw i32 [[TMP23]], 8
-; CHECK-INTERLEAVED-NEXT: [[TMP25:%.*]] = sub i32 [[TMP31]], 1
-; CHECK-INTERLEAVED-NEXT: [[TMP26:%.*]] = extractelement <vscale x 8 x i32> [[TMP21]], i32 [[TMP25]]
; CHECK-INTERLEAVED-NEXT: [[TMP27:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-INTERLEAVED-NEXT: [[TMP28:%.*]] = mul nuw i32 [[TMP27]], 8
; CHECK-INTERLEAVED-NEXT: [[TMP29:%.*]] = sub i32 [[TMP28]], 1
@@ -1089,10 +1081,6 @@ define i32 @not_dotp_not_phi(ptr %a, ptr %b) #0 {
; CHECK-MAXBW-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-MAXBW-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
; CHECK-MAXBW: middle.block:
-; CHECK-MAXBW-NEXT: [[TMP26:%.*]] = call i32 @llvm.vscale.i32()
-; CHECK-MAXBW-NEXT: [[TMP27:%.*]] = mul nuw i32 [[TMP26]], 8
-; CHECK-MAXBW-NEXT: [[TMP28:%.*]] = sub i32 [[TMP27]], 1
-; CHECK-MAXBW-NEXT: [[TMP29:%.*]] = extractelement <vscale x 8 x i32> [[TMP21]], i32 [[TMP28]]
; CHECK-MAXBW-NEXT: [[TMP23:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-MAXBW-NEXT: [[TMP24:%.*]] = mul nuw i32 [[TMP23]], 8
; CHECK-MAXBW-NEXT: [[TMP25:%.*]] = sub i32 [[TMP24]], 1
@@ -1852,6 +1840,17 @@ define i64 @dotp_cost_disagreement(ptr %a, ptr %b) #0 {
; CHECK-MAXBW-NEXT: [[TMP9:%.*]] = zext <vscale x 8 x i8> [[WIDE_LOAD]] to <vscale x 8 x i64>
; CHECK-MAXBW-NEXT: [[TMP10:%.*]] = add nuw nsw i64 [[INDEX]], 1
; CHECK-MAXBW-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 [[TMP10]]
+; CHECK-MAXBW-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 8 x i8>, ptr [[TMP11]], align 1
+; CHECK-MAXBW-NEXT: [[TMP12:%.*]] = zext <vscale x 8 x i8> [[WIDE_LOAD1]] to <vscale x 8 x i64>
+; CHECK-MAXBW-NEXT: [[TMP15:%.*]] = mul nuw nsw <vscale x 8 x i64> [[TMP12]], [[TMP9]]
+; CHECK-MAXBW-NEXT: [[TMP14]] = add <vscale x 8 x i64> [[VEC_PHI]], [[TMP15]]
+; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
+; CHECK-MAXBW-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-MAXBW-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
+; CHECK-MAXBW: middle.block:
+; CHECK-MAXBW-NEXT: [[TMP16:%.*]] = call i64 @llvm.vector.reduce.add.nxv8i64(<vscale x 8 x i64> [[TMP14]])
+; CHECK-MAXBW-NEXT: [[CMP_N:%.*]] = icmp eq i64 41, [[N_VEC]]
+; CHECK-MAXBW-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
; CHECK-MAXBW: scalar.ph:
;
entry:
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll b/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll
index 08d35f71e7cc3..366d4fe1adf04 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll
@@ -251,15 +251,13 @@ define i32 @chained_recurrences(i32 %x, i64 %y, ptr %src.1, i32 %z, ptr %src.2)
; PRED-NEXT: br label %[[EXIT:.*]]
; PRED: [[SCALAR_PH]]:
; PRED-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, %[[ENTRY]] ]
-; PRED-NEXT: [[SCALAR_RECUR_INIT8:%.*]] = phi i32 [ 0, %[[ENTRY]] ]
; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ]
-; PRED-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, %[[ENTRY]] ]
; PRED-NEXT: br label %[[LOOP:.*]]
; PRED: [[LOOP]]:
; PRED-NEXT: [[TMP45:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], %[[SCALAR_PH]] ], [ [[TMP53:%.*]], %[[LOOP]] ]
-; PRED-NEXT: [[SCALAR_RECUR10:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT8]], %[[SCALAR_PH]] ], [ [[TMP45]], %[[LOOP]] ]
+; PRED-NEXT: [[SCALAR_RECUR10:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], %[[SCALAR_PH]] ], [ [[TMP45]], %[[LOOP]] ]
; PRED-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT1:%.*]], %[[LOOP]] ]
-; PRED-NEXT: [[SUM_RED:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[RED_2:%.*]], %[[LOOP]] ]
+; PRED-NEXT: [[SUM_RED:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], %[[SCALAR_PH]] ], [ [[RED_2:%.*]], %[[LOOP]] ]
; PRED-NEXT: [[TMP52:%.*]] = add i64 [[Y]], 1
; PRED-NEXT: [[GEP_1:%.*]] = getelementptr i32, ptr [[SRC_1]], i64 [[TMP52]]
; PRED-NEXT: [[TMP53]] = load i32, ptr [[GEP_1]], align 4
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll
index 0f407cd565457..a5f132b05e27e 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll
@@ -564,11 +564,11 @@ define void @trip_count_vscale(ptr noalias %a, ptr noalias %b) vscale_range(1, 1
; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <vscale x 4 x float>, ptr [[TMP17]], align 4
; CHECK-NEXT: [[TMP18:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD]], [[WIDE_LOAD3]]
; CHECK-NEXT: [[TMP19:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD2]], [[WIDE_LOAD4]]
-; CHECK-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP21:%.*]] = mul nuw i64 [[TMP20]], 4
-; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw float, ptr [[TMP13]], i64 [[TMP21]]
+; CHECK-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP22:%.*]] = mul nuw i64 [[TMP21]], 4
+; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw float, ptr [[TMP13]], i64 [[TMP22]]
; CHECK-NEXT: store <vscale x 4 x float> [[TMP18]], ptr [[TMP13]], align 4
-; CHECK-NEXT: store <vscale x 4 x float> [[TMP19]], ptr [[TMP22]], align 4
+; CHECK-NEXT: store <vscale x 4 x float> [[TMP19]], ptr [[TMP20]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]]
; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
@@ -640,11 +640,11 @@ define void @trip_count_vscale(ptr noalias %a, ptr noalias %b) vscale_range(1, 1
; CHECK-VF8-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 4 x float>, ptr [[TMP15]], align 4
; CHECK-VF8-NEXT: [[TMP16:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD]], [[WIDE_LOAD2]]
; CHECK-VF8-NEXT: [[TMP17:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD1]], [[WIDE_LOAD3]]
-; CHECK-VF8-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-VF8-NEXT: [[TMP19:%.*]] = mul nuw i64 [[TMP18]], 4
-; CHECK-VF8-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw float, ptr [[TMP11]], i64 [[TMP19]]
+; CHECK-VF8-NEXT: [[TMP19:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-VF8-NEXT: [[TMP20:%.*]] = mul nuw i64 [[TMP19]], 4
+; CHECK-VF8-NEXT: [[TMP18:%.*]] = getelementptr inbounds nuw float, ptr [[TMP11]], i64 [[TMP20]]
; CHECK-VF8-NEXT: store <vscale x 4 x float> [[TMP16]], ptr [[TMP11]], align 4
-; CHECK-VF8-NEXT: store <vscale x 4 x float> [[TMP17]], ptr [[TMP20]], align 4
+; CHECK-VF8-NEXT: store <vscale x 4 x float> [[TMP17]], ptr [[TMP18]], align 4
; CHECK-VF8-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
; CHECK-VF8-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-VF8-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
@@ -716,11 +716,11 @@ define void @trip_count_vscale_no_epilogue_iterations(ptr noalias %a, ptr noalia
; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <vscale x 4 x float>, ptr [[TMP17]], align 4
; CHECK-NEXT: [[TMP18:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD]], [[WIDE_LOAD3]]
; CHECK-NEXT: [[TMP19:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD2]], [[WIDE_LOAD4]]
-; CHECK-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP21:%.*]] = mul nuw i64 [[TMP20]], 4
-; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw float, ptr [[TMP13]], i64 [[TMP21]]
+; CHECK-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP22:%.*]] = mul nuw i64 [[TMP21]], 4
+; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw float, ptr [[TMP13]], i64 [[TMP22]]
; CHECK-NEXT: store <vscale x 4 x float> [[TMP18]], ptr [[TMP13]], align 4
-; CHECK-NEXT: store <vscale x 4 x float> [[TMP19]], ptr [[TMP22]], align 4
+; CHECK-NEXT: store <vscale x 4 x float> [[TMP19]], ptr [[TMP20]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]]
; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
@@ -792,11 +792,11 @@ define void @trip_count_vscale_no_epilogue_iterations(ptr noalias %a, ptr noalia
; CHECK-VF8-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 4 x float>, ptr [[TMP15]], align 4
; CHECK-VF8-NEXT: [[TMP16:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD]], [[WIDE_LOAD2]]
; CHECK-VF8-NEXT: [[TMP17:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD1]], [[WIDE_LOAD3]]
-; CHECK-VF8-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-VF8-NEXT: [[TMP19:%.*]] = mul nuw i64 [[TMP18]], 4
-; CHECK-VF8-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw float, ptr [[TMP11]], i64 [[TMP19]]
+; CHECK-VF8-NEXT: [[TMP19:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-VF8-NEXT: [[TMP20:%.*]] = mul nuw i64 [[TMP19]], 4
+; CHECK-VF8-NEXT: [[TMP18:%.*]] = getelementptr inbounds nuw float, ptr [[TMP11]], i64 [[TMP20]]
; CHECK-VF8-NEXT: store <vscale x 4 x float> [[TMP16]], ptr [[TMP11]], align 4
-; CHECK-VF8-NEXT: store <vscale x 4 x float> [[TMP17]], ptr [[TMP20]], align 4
+; CHECK-VF8-NEXT: store <vscale x 4 x float> [[TMP17]], ptr [[TMP18]], align 4
; CHECK-VF8-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
; CHECK-VF8-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-VF8-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vscale-based-trip-counts.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vscale-based-trip-counts.ll
index 352f4fe3dae21..b0740cb315b01 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vscale-based-trip-counts.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vscale-based-trip-counts.ll
@@ -72,23 +72,14 @@ define void @vscale_mul_8(ptr noalias noundef readonly captures(none) %a, ptr n
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[MUL1]], [[N_MOD_VF]]
; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 8
-; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 4
-; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[TMP10]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[A]], align 4
-; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 4 x float>, ptr [[TMP11]], align 4
; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP15:%.*]] = mul nuw i64 [[TMP14]], 4
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[TMP15]]
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <vscale x 4 x float>, ptr [[B]], align 4
-; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 4 x float>, ptr [[TMP16]], align 4
; CHECK-NEXT: [[TMP17:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD]], [[WIDE_LOAD2]]
-; CHECK-NEXT: [[TMP18:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD1]], [[WIDE_LOAD3]]
-; CHECK-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP21:%.*]] = mul nuw i64 [[TMP20]], 4
-; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[TMP21]]
; CHECK-NEXT: store <vscale x 4 x float> [[TMP17]], ptr [[B]], align 4
-; CHECK-NEXT: store <vscale x 4 x float> [[TMP18]], ptr [[TMP22]], align 4
+; CHECK-NEXT: store <vscale x 4 x float> [[TMP17]], ptr [[TMP16]], align 4
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[MUL1]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_COND_CLEANUP:.*]], label %[[FOR_BODY:.*]]
; CHECK: [[FOR_COND_CLEANUP]]:
@@ -230,11 +221,11 @@ define void @vscale_mul_31(ptr noalias noundef readonly captures(none) %a, ptr n
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 4 x float>, ptr [[TMP16]], align 4
; CHECK-NEXT: [[TMP17:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD]], [[WIDE_LOAD2]]
; CHECK-NEXT: [[TMP18:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD1]], [[WIDE_LOAD3]]
-; CHECK-NEXT: [[TMP19:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP20:%.*]] = mul nuw i64 [[TMP19]], 4
-; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw float, ptr [[TMP12]], i64 [[TMP20]]
+; CHECK-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP21:%.*]] = mul nuw i64 [[TMP20]], 4
+; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw float, ptr [[TMP12]], i64 [[TMP21]]
; CHECK-NEXT: store <vscale x 4 x float> [[TMP17]], ptr [[TMP12]], align 4
-; CHECK-NEXT: store <vscale x 4 x float> [[TMP18]], ptr [[TMP21]], align 4
+; CHECK-NEXT: store <vscale x 4 x float> [[TMP18]], ptr [[TMP19]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]]
; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
@@ -313,11 +304,11 @@ define void @vscale_mul_64(ptr noalias noundef readonly captures(none) %a, ptr n
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 4 x float>, ptr [[TMP16]], align 4
; CHECK-NEXT: [[TMP17:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD]], [[WIDE_LOAD2]]
; CHECK-NEXT: [[TMP18:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD1]], [[WIDE_LOAD3]]
-; CHECK-NEXT: [[TMP19:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP20:%.*]] = mul nuw i64 [[TMP19]], 4
-; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw float, ptr [[TMP12]], i64 [[TMP20]]
+; CHECK-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP21:%.*]] = mul nuw i64 [[TMP20]], 4
+; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw float, ptr [[TMP12]], i64 [[TMP21]]
; CHECK-NEXT: store <vscale x 4 x float> [[TMP17]], ptr [[TMP12]], align 4
-; CHECK-NEXT: store <vscale x 4 x float> [[TMP18]], ptr [[TMP21]], align 4
+; CHECK-NEXT: store <vscale x 4 x float> [[TMP18]], ptr [[TMP19]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]]
; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
@@ -398,11 +389,11 @@ define void @trip_count_with_overflow(ptr noalias noundef readonly captures(none
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 4 x float>, ptr [[TMP17]], align 4
; CHECK-NEXT: [[TMP18:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD]], [[WIDE_LOAD2]]
; CHECK-NEXT: [[TMP19:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD1]], [[WIDE_LOAD3]]
-; CHECK-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP21:%.*]] = mul nuw i64 [[TMP20]], 4
-; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw float, ptr [[TMP13]], i64 [[TMP21]]
+; CHECK-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP22:%.*]] = mul nuw i64 [[TMP21]], 4
+; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw float, ptr [[TMP13]], i64 [[TMP22]]
; CHECK-NEXT: store <vscale x 4 x float> [[TMP18]], ptr [[TMP13]], align 4
-; CHECK-NEXT: store <vscale x 4 x float> [[TMP19]], ptr [[TMP22]], align 4
+; CHECK-NEXT: store <vscale x 4 x float> [[TMP19]], ptr [[TMP20]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]]
; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
@@ -479,11 +470,11 @@ define void @trip_count_too_big_for_element_count(ptr noalias noundef readonly c
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 4 x float>, ptr [[TMP17]], align 4
; CHECK-NEXT: [[TMP18:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD]], [[WIDE_LOAD2]]
; CHECK-NEXT: [[TMP19:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD1]], [[WIDE_LOAD3]]
-; CHECK-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP21:%.*]] = mul nuw i64 [[TMP20]], 4
-; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw float, ptr [[TMP13]], i64 [[TMP21]]
+; CHECK-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP22:%.*]] = mul nuw i64 [[TMP21]], 4
+; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw float, ptr [[TMP13]], i64 [[TMP22]]
; CHECK-NEXT: store <vscale x 4 x float> [[TMP18]], ptr [[TMP13]], align 4
-; CHECK-NEXT: store <vscale x 4 x float> [[TMP19]], ptr [[TMP22]], align 4
+; CHECK-NEXT: store <vscale x 4 x float> [[TMP19]], ptr [[TMP20]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]]
; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-extractvalue.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-extractvalue.ll
index 473fabfc9fecc..3603afa11ba67 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-extractvalue.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-extractvalue.ll
@@ -1,17 +1,54 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5
; RUN: opt -S -passes=loop-vectorize < %s | FileCheck %s
target triple = "aarch64-unknown-linux-gnu"
define void @widen_extractvalue(ptr %dst, {i64, i64} %sv) #0 {
-; CHECK-LABEL: @widen_extractvalue(
-; CHECK: [[EXTRACT0:%.*]] = extractvalue { i64, i64 } [[SV:%.*]], 0
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[EXTRACT0]], i64 0
-; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
-; CHECK-NEXT: [[EXTRACT1:%.*]] = extractvalue { i64, i64 } [[SV]], 1
-; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[EXTRACT1]], i64 0
-; CHECK-NEXT: [[DOTSPLAT2:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT1]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
-; CHECK: [[ADD:%.*]] = add <vscale x 2 x i64> [[DOTSPLAT]], [[DOTSPLAT2]]
-; CHECK: vector.body:
+; CHECK-LABEL: define void @widen_extractvalue(
+; CHECK-SAME: ptr [[DST:%.*]], { i64, i64 } [[SV:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.vscale.i32()
+; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i32 [[TMP0]], 2
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 1000, [[TMP1]]
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32()
+; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i32 [[TMP2]], 2
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 1000, [[TMP3]]
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 1000, [[N_MOD_VF]]
+; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vscale.i32()
+; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i32 [[TMP4]], 2
+; CHECK-NEXT: [[EXTRACT0:%.*]] = extractvalue { i64, i64 } [[SV]], 0
+; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[EXTRACT0]], i64 0
+; CHECK-NEXT: [[DOTSPLAT2:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT1]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP7:%.*]] = add <vscale x 2 x i64> [[DOTSPLAT2]], [[DOTSPLAT2]]
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[DST]], i32 [[INDEX]]
+; CHECK-NEXT: store <vscale x 2 x i64> [[TMP7]], ptr [[TMP8]], align 8
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP5]]
+; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 1000, [[N_VEC]]
+; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: br label %[[LOOP_BODY:.*]]
+; CHECK: [[LOOP_BODY]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_BODY]] ]
+; CHECK-NEXT: [[A:%.*]] = extractvalue { i64, i64 } [[SV]], 0
+; CHECK-NEXT: [[B:%.*]] = extractvalue { i64, i64 } [[SV]], 1
+; CHECK-NEXT: [[ADDR:%.*]] = getelementptr i64, ptr [[DST]], i32 [[IV]]
+; CHECK-NEXT: [[ADD:%.*]] = add i64 [[A]], [[B]]
+; CHECK-NEXT: store i64 [[ADD]], ptr [[ADDR]], align 8
+; CHECK-NEXT: [[IV_NEXT]] = add nsw i32 [[IV]], 1
+; CHECK-NEXT: [[COND:%.*]] = icmp ne i32 [[IV_NEXT]], 1000
+; CHECK-NEXT: br i1 [[COND]], label %[[LOOP_BODY]], label %[[EXIT]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
+;
entry:
br label %loop.body
@@ -38,4 +75,3 @@ attributes #0 = { "target-features"="+sve" }
!3 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}
!4 = !{!"llvm.loop.interleave.count", i32 1}
!5 = !{!"llvm.loop.vectorize.enable", i1 true}
-
diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-types.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-types.ll
index 83cb3250fe87b..5bbe7738996df 100644
--- a/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-types.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-types.ll
@@ -36,11 +36,10 @@ define i32 @mla_i32(ptr noalias nocapture readonly %A, ptr noalias nocapture rea
; CHECK: middle.block:
; CHECK-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]]
; CHECK: scalar.ph:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ]
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
-; CHECK-NEXT: [[I_011:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
+; CHECK-NEXT: [[I_011:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
; CHECK-NEXT: [[RES_010:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i32 [[I_011]]
; CHECK-NEXT: [[TMP12:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
@@ -116,11 +115,10 @@ define i32 @mla_i8(ptr noalias nocapture readonly %A, ptr noalias nocapture read
; CHECK: middle.block:
; CHECK-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]]
; CHECK: scalar.ph:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ]
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
-; CHECK-NEXT: [[I_011:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
+; CHECK-NEXT: [[I_011:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
; CHECK-NEXT: [[RES_010:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i32 [[I_011]]
; CHECK-NEXT: [[TMP12:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
@@ -191,11 +189,10 @@ define i32 @add_i32(ptr nocapture readonly %x, i32 %n) #0 {
; CHECK: middle.block:
; CHECK-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]]
; CHECK: scalar.ph:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ]
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
-; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
+; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
; CHECK-NEXT: [[R_07:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[X]], i32 [[I_08]]
; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
@@ -386,11 +383,10 @@ define i32 @or_i32(ptr nocapture readonly %x, i32 %n) #0 {
; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP4]])
; CHECK-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]]
; CHECK: scalar.ph:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ]
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
-; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
+; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
; CHECK-NEXT: [[R_07:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[X]], i32 [[I_08]]
; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
@@ -451,11 +447,10 @@ define i32 @xor_i32(ptr nocapture readonly %x, i32 %n) #0 {
; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> [[TMP4]])
; CHECK-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]]
; CHECK: scalar.ph:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ]
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
-; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
+; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
; CHECK-NEXT: [[R_07:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[X]], i32 [[I_08]]
; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
diff --git a/llvm/test/Transforms/LoopVectorize/ARM/pointer_iv.ll b/llvm/test/Transforms/LoopVectorize/ARM/pointer_iv.ll
index 7c6e7053b4c32..3238b6d031641 100644
--- a/llvm/test/Transforms/LoopVectorize/ARM/pointer_iv.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/pointer_iv.ll
@@ -14,8 +14,8 @@ define hidden void @pointer_phi_v4i32_add1(ptr noalias nocapture readonly %A, pt
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i32 [[INDEX]], 2
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i32 [[OFFSET_IDX]]
-; CHECK-NEXT: [[OFFSET_IDX4:%.*]] = shl i32 [[INDEX]], 2
-; CHECK-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[B:%.*]], i32 [[OFFSET_IDX4]]
+; CHECK-NEXT: [[OFFSET_IDX1:%.*]] = shl i32 [[INDEX]], 2
+; CHECK-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[B:%.*]], i32 [[OFFSET_IDX1]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[NEXT_GEP]], align 4
; CHECK-NEXT: [[TMP0:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: store <4 x i32> [[TMP0]], ptr [[NEXT_GEP5]], align 4
@@ -162,8 +162,8 @@ define hidden void @pointer_phi_v8i16_add1(ptr noalias nocapture readonly %A, pt
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i32 [[INDEX]], 1
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i32 [[OFFSET_IDX]]
-; CHECK-NEXT: [[OFFSET_IDX4:%.*]] = shl i32 [[INDEX]], 1
-; CHECK-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[B:%.*]], i32 [[OFFSET_IDX4]]
+; CHECK-NEXT: [[OFFSET_IDX1:%.*]] = shl i32 [[INDEX]], 1
+; CHECK-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[B:%.*]], i32 [[OFFSET_IDX1]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, ptr [[NEXT_GEP]], align 2
; CHECK-NEXT: [[TMP1:%.*]] = add <8 x i16> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: store <8 x i16> [[TMP1]], ptr [[NEXT_GEP5]], align 2
@@ -447,8 +447,8 @@ define hidden void @pointer_phi_v4f32_add1(ptr noalias nocapture readonly %A, pt
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i32 [[INDEX]], 2
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i32 [[OFFSET_IDX]]
-; CHECK-NEXT: [[OFFSET_IDX4:%.*]] = shl i32 [[INDEX]], 2
-; CHECK-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[B:%.*]], i32 [[OFFSET_IDX4]]
+; CHECK-NEXT: [[OFFSET_IDX1:%.*]] = shl i32 [[INDEX]], 2
+; CHECK-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[B:%.*]], i32 [[OFFSET_IDX1]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[NEXT_GEP]], align 4
; CHECK-NEXT: [[TMP0:%.*]] = fadd fast <4 x float> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: store <4 x float> [[TMP0]], ptr [[NEXT_GEP5]], align 4
@@ -594,8 +594,8 @@ define hidden void @pointer_phi_v4half_add1(ptr noalias nocapture readonly %A, p
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i32 [[INDEX]], 1
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i32 [[OFFSET_IDX]]
-; CHECK-NEXT: [[OFFSET_IDX4:%.*]] = shl i32 [[INDEX]], 1
-; CHECK-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[B:%.*]], i32 [[OFFSET_IDX4]]
+; CHECK-NEXT: [[OFFSET_IDX1:%.*]] = shl i32 [[INDEX]], 1
+; CHECK-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[B:%.*]], i32 [[OFFSET_IDX1]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x half>, ptr [[NEXT_GEP]], align 4
; CHECK-NEXT: [[TMP0:%.*]] = fadd fast <8 x half> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: store <8 x half> [[TMP0]], ptr [[NEXT_GEP5]], align 4
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll
index 6e2434aefce9d..bb74993aadce5 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll
@@ -147,11 +147,10 @@ define i32 @add_i16_i32(ptr nocapture readonly %x, i32 %n) {
; IF-EVL-OUTLOOP-NEXT: [[TMP12:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP10]])
; IF-EVL-OUTLOOP-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]]
; IF-EVL-OUTLOOP: scalar.ph:
-; IF-EVL-OUTLOOP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ]
; IF-EVL-OUTLOOP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ]
; IF-EVL-OUTLOOP-NEXT: br label [[FOR_BODY:%.*]]
; IF-EVL-OUTLOOP: for.body:
-; IF-EVL-OUTLOOP-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
+; IF-EVL-OUTLOOP-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
; IF-EVL-OUTLOOP-NEXT: [[R_07:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
; IF-EVL-OUTLOOP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[X]], i32 [[I_08]]
; IF-EVL-OUTLOOP-NEXT: [[TMP13:%.*]] = load i16, ptr [[ARRAYIDX]], align 2
@@ -200,11 +199,10 @@ define i32 @add_i16_i32(ptr nocapture readonly %x, i32 %n) {
; IF-EVL-INLOOP: middle.block:
; IF-EVL-INLOOP-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]]
; IF-EVL-INLOOP: scalar.ph:
-; IF-EVL-INLOOP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ]
; IF-EVL-INLOOP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ]
; IF-EVL-INLOOP-NEXT: br label [[FOR_BODY:%.*]]
; IF-EVL-INLOOP: for.body:
-; IF-EVL-INLOOP-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
+; IF-EVL-INLOOP-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
; IF-EVL-INLOOP-NEXT: [[R_07:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
; IF-EVL-INLOOP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[X]], i32 [[I_08]]
; IF-EVL-INLOOP-NEXT: [[TMP13:%.*]] = load i16, ptr [[ARRAYIDX]], align 2
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll
index ed507961ef825..978051542c641 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll
@@ -168,11 +168,10 @@ define i64 @indexed_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i64
; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> [[TMP11]])
; CHECK-NEXT: br label [[FOR_END:%.*]]
; CHECK: scalar.ph:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ]
-; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ 0, [[ENTRY]] ]
+; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
-; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[SUM:%.*]] = phi i64 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[SUM_NEXT:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[BADDR:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]]
; CHECK-NEXT: [[AIDX:%.*]] = load i64, ptr [[BADDR]], align 8
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll
index 80f027452c3c1..263bc8f38554e 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll
@@ -341,11 +341,10 @@ define void @single_stride_int_iv(ptr %p, i64 %stride) {
; NOSTRIDED-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
; NOSTRIDED-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
; NOSTRIDED: scalar.ph:
-; NOSTRIDED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
-; NOSTRIDED-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
+; NOSTRIDED-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
; NOSTRIDED-NEXT: br label [[LOOP:%.*]]
; NOSTRIDED: loop:
-; NOSTRIDED-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
+; NOSTRIDED-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
; NOSTRIDED-NEXT: [[OFFSET:%.*]] = phi i64 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[OFFSET_NEXT:%.*]], [[LOOP]] ]
; NOSTRIDED-NEXT: [[Q0:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET]]
; NOSTRIDED-NEXT: [[X0:%.*]] = load i32, ptr [[Q0]], align 4
@@ -642,11 +641,10 @@ define void @double_stride_int_iv(ptr %p, ptr %p2, i64 %stride) {
; NOSTRIDED-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
; NOSTRIDED-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
; NOSTRIDED: scalar.ph:
-; NOSTRIDED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
-; NOSTRIDED-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
+; NOSTRIDED-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
; NOSTRIDED-NEXT: br label [[LOOP:%.*]]
; NOSTRIDED: loop:
-; NOSTRIDED-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
+; NOSTRIDED-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
; NOSTRIDED-NEXT: [[OFFSET:%.*]] = phi i64 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[OFFSET_NEXT:%.*]], [[LOOP]] ]
; NOSTRIDED-NEXT: [[Q0:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET]]
; NOSTRIDED-NEXT: [[X0:%.*]] = load i32, ptr [[Q0]], align 4
diff --git a/llvm/test/Transforms/LoopVectorize/SystemZ/scalar-steps-with-users-demanding-all-lanes-and-first-lane-only.ll b/llvm/test/Transforms/LoopVectorize/SystemZ/scalar-steps-with-users-demanding-all-lanes-and-first-lane-only.ll
index a91bc656cc7ef..15d2760421a23 100644
--- a/llvm/test/Transforms/LoopVectorize/SystemZ/scalar-steps-with-users-demanding-all-lanes-and-first-lane-only.ll
+++ b/llvm/test/Transforms/LoopVectorize/SystemZ/scalar-steps-with-users-demanding-all-lanes-and-first-lane-only.ll
@@ -17,20 +17,11 @@ define void @test_scalar_iv_steps_used_by_replicate_and_first_lane_only_vpinst(p
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[TMP3:%.*]] = mul nsw i64 0, 4
-; CHECK-NEXT: [[TMP4:%.*]] = mul nsw i64 1, 4
-; CHECK-NEXT: [[TMP2:%.*]] = mul nsw i64 2, 4
-; CHECK-NEXT: [[TMP15:%.*]] = mul nsw i64 3, 4
-; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[SRC_1]], i64 [[TMP3]]
-; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[SRC_1]], i64 [[TMP4]]
-; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[SRC_1]], i64 [[TMP2]]
-; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[SRC_1]], i64 [[TMP15]]
-; CHECK-NEXT: [[TMP12:%.*]] = load i8, ptr [[TMP8]], align 1
-; CHECK-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP5]], align 1
-; CHECK-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP6]], align 1
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[SRC_1]], i64 [[TMP3]]
; CHECK-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP7]], align 1
-; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x i8> poison, i8 [[TMP12]], i32 0
-; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x i8> [[TMP16]], i8 [[TMP9]], i32 1
-; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x i8> [[TMP13]], i8 [[TMP10]], i32 2
+; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i8> poison, i8 [[TMP11]], i32 0
+; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i8> [[TMP5]], i8 [[TMP11]], i32 1
+; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x i8> [[TMP4]], i8 [[TMP11]], i32 2
; CHECK-NEXT: [[TMP19:%.*]] = insertelement <4 x i8> [[TMP14]], i8 [[TMP11]], i32 3
; CHECK-NEXT: [[TMP20:%.*]] = icmp eq <4 x i8> [[TMP19]], zeroinitializer
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr [8 x i32], ptr @src, i64 0, i64 4
diff --git a/llvm/test/Transforms/LoopVectorize/X86/cost-constant-known-via-scev.ll b/llvm/test/Transforms/LoopVectorize/X86/cost-constant-known-via-scev.ll
index 04e0dafba6b86..7ae83d484e831 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/cost-constant-known-via-scev.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/cost-constant-known-via-scev.ll
@@ -98,11 +98,10 @@ define i64 @second_lshr_operand_zero_via_scev() {
; CHECK-NEXT: br label %[[EXIT:.*]]
; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ]
-; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ]
; CHECK-NEXT: br label %[[LOOPS:.*]]
; CHECK: [[LOOPS]]:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOPS]] ]
-; CHECK-NEXT: [[RED:%.*]] = phi i64 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[RED_NEXT:%.*]], %[[LOOPS]] ]
+; CHECK-NEXT: [[RED:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[RED_NEXT:%.*]], %[[LOOPS]] ]
; CHECK-NEXT: [[C:%.*]] = icmp eq i64 [[IV]], 0
; CHECK-NEXT: [[AND:%.*]] = and i64 [[IV]], 0
; CHECK-NEXT: [[TMP14:%.*]] = trunc i64 [[IV]] to i32
diff --git a/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll b/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll
index fcd94f444e8a5..35a52cf7387c5 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll
@@ -250,7 +250,6 @@ define void @multiple_pointer_ivs_with_scalar_uses_only(ptr %A, ptr %B) #0 {
; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[A]], i64 8589934368
-; CHECK-NEXT: [[IND_END3:%.*]] = getelementptr i8, ptr [[B]], i64 4294967184
; CHECK-NEXT: [[IND_END5:%.*]] = getelementptr i8, ptr [[B]], i64 4294967184
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
@@ -340,7 +339,7 @@ define void @multiple_pointer_ivs_with_scalar_uses_only(ptr %A, ptr %B) #0 {
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ -12, [[MIDDLE_BLOCK]] ], [ 100, [[ENTRY:%.*]] ], [ 100, [[VECTOR_MEMCHECK]] ]
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 2048, [[ENTRY]] ], [ 2048, [[VECTOR_MEMCHECK]] ]
; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[A]], [[ENTRY]] ], [ [[A]], [[VECTOR_MEMCHECK]] ]
-; CHECK-NEXT: [[BC_RESUME_VAL4:%.*]] = phi ptr [ [[IND_END3]], [[MIDDLE_BLOCK]] ], [ [[B]], [[ENTRY]] ], [ [[B]], [[VECTOR_MEMCHECK]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL4:%.*]] = phi ptr [ [[IND_END5]], [[MIDDLE_BLOCK]] ], [ [[B]], [[ENTRY]] ], [ [[B]], [[VECTOR_MEMCHECK]] ]
; CHECK-NEXT: [[BC_RESUME_VAL6:%.*]] = phi ptr [ [[IND_END5]], [[MIDDLE_BLOCK]] ], [ [[B]], [[ENTRY]] ], [ [[B]], [[VECTOR_MEMCHECK]] ]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
diff --git a/llvm/test/Transforms/LoopVectorize/X86/uniform_load.ll b/llvm/test/Transforms/LoopVectorize/X86/uniform_load.ll
index 62d08c8668235..e80fe60c69e2a 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/uniform_load.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/uniform_load.ll
@@ -29,20 +29,11 @@ define void @foo(ptr nocapture noalias %A, i64 %N) #0 {
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[A]], i32 16
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A]], i32 24
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x float>, ptr [[A]], align 4
-; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x float>, ptr [[TMP4]], align 4
-; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <8 x float>, ptr [[TMP5]], align 4
-; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <8 x float>, ptr [[TMP6]], align 4
; CHECK-NEXT: [[TMP7:%.*]] = fadd <8 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD]]
-; CHECK-NEXT: [[TMP8:%.*]] = fadd <8 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD2]]
-; CHECK-NEXT: [[TMP9:%.*]] = fadd <8 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD3]]
-; CHECK-NEXT: [[TMP10:%.*]] = fadd <8 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD4]]
-; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[A]], i32 8
-; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[A]], i32 16
-; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[A]], i32 24
; CHECK-NEXT: store <8 x float> [[TMP7]], ptr [[A]], align 4
-; CHECK-NEXT: store <8 x float> [[TMP8]], ptr [[TMP11]], align 4
-; CHECK-NEXT: store <8 x float> [[TMP9]], ptr [[TMP12]], align 4
-; CHECK-NEXT: store <8 x float> [[TMP10]], ptr [[TMP13]], align 4
+; CHECK-NEXT: store <8 x float> [[TMP7]], ptr [[TMP4]], align 4
+; CHECK-NEXT: store <8 x float> [[TMP7]], ptr [[TMP5]], align 4
+; CHECK-NEXT: store <8 x float> [[TMP7]], ptr [[TMP6]], align 4
; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]]
; CHECK: [[MIDDLE_BLOCK]]:
;
diff --git a/llvm/test/Transforms/LoopVectorize/X86/unroll-small-loops.ll b/llvm/test/Transforms/LoopVectorize/X86/unroll-small-loops.ll
index 76230994ed87d..9d10a2d2f98c0 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/unroll-small-loops.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/unroll-small-loops.ll
@@ -6,7 +6,7 @@
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.8.0"
-; We don't unroll this loop because it has a small constant trip count
+; We don't unroll this loop because it has a small constant trip count
; that is not profitable for generating a scalar epilogue
;
; CHECK-VECTOR-LABEL: @foo_trip_count_8(
@@ -47,9 +47,6 @@ for.end: ; preds = %for.body
;
; CHECK-VECTOR-LABEL: @foo_trip_count_16(
; CHECK-VECTOR: load <4 x i32>
-; CHECK-VECTOR: load <4 x i32>
-; CHECK-VECTOR: load <4 x i32>
-; CHECK-VECTOR: load <4 x i32>
; CHECK-VECTOR-NOT: load <4 x i32>
; CHECK-VECTOR: store <4 x i32>
; CHECK-VECTOR: store <4 x i32>
@@ -125,8 +122,8 @@ for.end: ; preds = %for.body
ret void
}
-; We should unroll this loop twice since unrolling four times will
-; create an epilogue loop of TC 8, while unrolling it twice will
+; We should unroll this loop twice since unrolling four times will
+; create an epilogue loop of TC 8, while unrolling it twice will
; eliminate the epologue loop altogether
;
; CHECK-VECTOR-LABEL: @foo_trip_count_24(
@@ -245,7 +242,7 @@ for.end: ; preds = %for.body
; We should unroll this loop 4 times since TC not being a multiple of VF may require
; the epilogue loop to run, making it profitable when the vector loop runs
-; at least twice. The IC is restricted to 4 since that is the maximum supported
+; at least twice. The IC is restricted to 4 since that is the maximum supported
; for the target.
;
; CHECK-VECTOR-LABEL: @foo_trip_count_101(
diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll
index 74df675a75cbd..f9b3d37607092 100644
--- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll
+++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll
@@ -1024,11 +1024,10 @@ define void @test_for_sink_instruction_after_same_incoming_1(ptr %ptr) {
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 996
; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
; CHECK: middle.block:
-; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x double> [[WIDE_LOAD]], i32 3
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT2:%.*]] = extractelement <4 x double> [[WIDE_LOAD]], i32 3
; CHECK-NEXT: br label [[SCALAR_PH]]
; CHECK: scalar.ph:
-; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi double [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 1.000000e+01, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi double [ [[VECTOR_RECUR_EXTRACT2]], [[MIDDLE_BLOCK]] ], [ 1.000000e+01, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[SCALAR_RECUR_INIT3:%.*]] = phi double [ [[VECTOR_RECUR_EXTRACT2]], [[MIDDLE_BLOCK]] ], [ 2.000000e+01, [[ENTRY]] ]
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 997, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY]] ]
; CHECK-NEXT: br label [[LOOP:%.*]]
@@ -1090,11 +1089,10 @@ define void @test_for_sink_instruction_after_same_incoming_2(ptr %ptr) {
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 996
; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
; CHECK: middle.block:
-; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x double> [[WIDE_LOAD]], i32 3
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT2:%.*]] = extractelement <4 x double> [[WIDE_LOAD]], i32 3
; CHECK-NEXT: br label [[SCALAR_PH]]
; CHECK: scalar.ph:
-; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi double [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 2.000000e+01, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi double [ [[VECTOR_RECUR_EXTRACT2]], [[MIDDLE_BLOCK]] ], [ 2.000000e+01, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[SCALAR_RECUR_INIT3:%.*]] = phi double [ [[VECTOR_RECUR_EXTRACT2]], [[MIDDLE_BLOCK]] ], [ 1.000000e+01, [[ENTRY]] ]
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 997, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY]] ]
; CHECK-NEXT: br label [[LOOP:%.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-multiply-recurrences.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-multiply-recurrences.ll
index 715ea1c51aba6..b815c9d41e35c 100644
--- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-multiply-recurrences.ll
+++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-multiply-recurrences.ll
@@ -122,12 +122,11 @@ define void @test_pr54223_sink_after_insertion_order(ptr noalias %a, ptr noalias
; CHECK-NEXT: br label [[EXIT:%.*]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ]
-; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ]
; CHECK-NEXT: [[SCALAR_RECUR_INIT5:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi float [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[FOR_1_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi float [ [[SCALAR_RECUR_INIT5]], [[SCALAR_PH]] ], [ [[FOR_1_NEXT:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[SCALAR_RECUR6:%.*]] = phi float [ [[SCALAR_RECUR_INIT5]], [[SCALAR_PH]] ], [ [[FOR_2_NEXT:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[NEG:%.*]] = fneg float [[SCALAR_RECUR6]]
; CHECK-NEXT: [[MULADD:%.*]] = call float @llvm.fmuladd.f32(float [[SCALAR_RECUR]], float [[NEG]], float 0.000000e+00)
diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll
index 3adfcf53e4564..9dd3def5369db 100644
--- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll
+++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll
@@ -1195,11 +1195,10 @@ define i64 @constant_folded_previous_value() {
; UNROLL-NO-IC: middle.block:
; UNROLL-NO-IC-NEXT: br label [[FOR_END:%.*]]
; UNROLL-NO-IC: scalar.ph:
-; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ]
-; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ 0, [[ENTRY]] ]
+; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ]
; UNROLL-NO-IC-NEXT: br label [[SCALAR_BODY:%.*]]
; UNROLL-NO-IC: scalar.body:
-; UNROLL-NO-IC-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[SCALAR_BODY]] ]
+; UNROLL-NO-IC-NEXT: [[I:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[SCALAR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[VAR2:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[VAR3:%.*]], [[SCALAR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[VAR3]] = add i64 0, 1
; UNROLL-NO-IC-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1
@@ -1222,11 +1221,10 @@ define i64 @constant_folded_previous_value() {
; UNROLL-NO-VF: middle.block:
; UNROLL-NO-VF-NEXT: br label [[FOR_END:%.*]]
; UNROLL-NO-VF: scalar.ph:
-; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ]
-; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ 0, [[ENTRY]] ]
+; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ]
; UNROLL-NO-VF-NEXT: br label [[SCALAR_BODY:%.*]]
; UNROLL-NO-VF: scalar.body:
-; UNROLL-NO-VF-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[SCALAR_BODY]] ]
+; UNROLL-NO-VF-NEXT: [[I:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[SCALAR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[VAR2:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[VAR3:%.*]], [[SCALAR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[VAR3]] = add i64 0, 1
; UNROLL-NO-VF-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1
@@ -1249,11 +1247,10 @@ define i64 @constant_folded_previous_value() {
; SINK-AFTER: middle.block:
; SINK-AFTER-NEXT: br label [[FOR_END:%.*]]
; SINK-AFTER: scalar.ph:
-; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ]
-; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ 0, [[ENTRY]] ]
+; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ]
; SINK-AFTER-NEXT: br label [[SCALAR_BODY:%.*]]
; SINK-AFTER: scalar.body:
-; SINK-AFTER-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[SCALAR_BODY]] ]
+; SINK-AFTER-NEXT: [[I:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[SCALAR_BODY]] ]
; SINK-AFTER-NEXT: [[VAR2:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[VAR3:%.*]], [[SCALAR_BODY]] ]
; SINK-AFTER-NEXT: [[VAR3]] = add i64 0, 1
; SINK-AFTER-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1
@@ -2743,7 +2740,6 @@ define i32 @sink_into_replication_region(i32 %y) {
; UNROLL-NO-IC-NEXT: br label [[BB1:%.*]]
; UNROLL-NO-IC: scalar.ph:
; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[Y]], [[BB:%.*]] ]
-; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[BB]] ]
; UNROLL-NO-IC-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[BB]] ]
; UNROLL-NO-IC-NEXT: br label [[BB2:%.*]]
; UNROLL-NO-IC: bb1:
@@ -2751,7 +2747,7 @@ define i32 @sink_into_replication_region(i32 %y) {
; UNROLL-NO-IC-NEXT: ret i32 [[VAR]]
; UNROLL-NO-IC: bb2:
; UNROLL-NO-IC-NEXT: [[VAR3:%.*]] = phi i32 [ [[VAR8:%.*]], [[BB2]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
-; UNROLL-NO-IC-NEXT: [[VAR4:%.*]] = phi i32 [ [[VAR7:%.*]], [[BB2]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ]
+; UNROLL-NO-IC-NEXT: [[VAR4:%.*]] = phi i32 [ [[VAR7:%.*]], [[BB2]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
; UNROLL-NO-IC-NEXT: [[VAR5:%.*]] = phi i32 [ [[VAR6]], [[BB2]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
; UNROLL-NO-IC-NEXT: [[VAR6]] = add i32 [[VAR5]], [[VAR4]]
; UNROLL-NO-IC-NEXT: [[VAR7]] = udiv i32 219220132, [[VAR3]]
@@ -2806,7 +2802,6 @@ define i32 @sink_into_replication_region(i32 %y) {
; UNROLL-NO-VF-NEXT: br label [[BB1:%.*]]
; UNROLL-NO-VF: scalar.ph:
; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[Y]], [[BB:%.*]] ]
-; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[BB]] ]
; UNROLL-NO-VF-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[BB]] ]
; UNROLL-NO-VF-NEXT: br label [[BB2:%.*]]
; UNROLL-NO-VF: bb1:
@@ -2814,7 +2809,7 @@ define i32 @sink_into_replication_region(i32 %y) {
; UNROLL-NO-VF-NEXT: ret i32 [[VAR]]
; UNROLL-NO-VF: bb2:
; UNROLL-NO-VF-NEXT: [[VAR3:%.*]] = phi i32 [ [[VAR8:%.*]], [[BB2]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
-; UNROLL-NO-VF-NEXT: [[VAR4:%.*]] = phi i32 [ [[VAR7:%.*]], [[BB2]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ]
+; UNROLL-NO-VF-NEXT: [[VAR4:%.*]] = phi i32 [ [[VAR7:%.*]], [[BB2]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
; UNROLL-NO-VF-NEXT: [[VAR5:%.*]] = phi i32 [ [[VAR6]], [[BB2]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
; UNROLL-NO-VF-NEXT: [[VAR6]] = add i32 [[VAR5]], [[VAR4]]
; UNROLL-NO-VF-NEXT: [[VAR7]] = udiv i32 219220132, [[VAR3]]
@@ -2892,7 +2887,6 @@ define i32 @sink_into_replication_region(i32 %y) {
; SINK-AFTER-NEXT: br label [[BB1:%.*]]
; SINK-AFTER: scalar.ph:
; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[Y]], [[BB:%.*]] ]
-; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[BB]] ]
; SINK-AFTER-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[BB]] ]
; SINK-AFTER-NEXT: br label [[BB2:%.*]]
; SINK-AFTER: bb1:
@@ -2900,7 +2894,7 @@ define i32 @sink_into_replication_region(i32 %y) {
; SINK-AFTER-NEXT: ret i32 [[VAR]]
; SINK-AFTER: bb2:
; SINK-AFTER-NEXT: [[VAR3:%.*]] = phi i32 [ [[VAR8:%.*]], [[BB2]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
-; SINK-AFTER-NEXT: [[VAR4:%.*]] = phi i32 [ [[VAR7:%.*]], [[BB2]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ]
+; SINK-AFTER-NEXT: [[VAR4:%.*]] = phi i32 [ [[VAR7:%.*]], [[BB2]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
; SINK-AFTER-NEXT: [[VAR5:%.*]] = phi i32 [ [[VAR6]], [[BB2]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
; SINK-AFTER-NEXT: [[VAR6]] = add i32 [[VAR5]], [[VAR4]]
; SINK-AFTER-NEXT: [[VAR7]] = udiv i32 219220132, [[VAR3]]
@@ -3105,8 +3099,6 @@ define i32 @sink_into_replication_region_multiple(ptr %x, i32 %y) {
; UNROLL-NO-IC-NEXT: br label [[BB1:%.*]]
; UNROLL-NO-IC: scalar.ph:
; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[Y]], [[BB:%.*]] ]
-; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ 0, [[BB]] ]
-; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[BB]] ]
; UNROLL-NO-IC-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[BB]] ]
; UNROLL-NO-IC-NEXT: br label [[BB2:%.*]]
; UNROLL-NO-IC: bb1:
@@ -3114,8 +3106,8 @@ define i32 @sink_into_replication_region_multiple(ptr %x, i32 %y) {
; UNROLL-NO-IC-NEXT: ret i32 [[VAR]]
; UNROLL-NO-IC: bb2:
; UNROLL-NO-IC-NEXT: [[VAR3:%.*]] = phi i32 [ [[VAR8:%.*]], [[BB2]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
-; UNROLL-NO-IC-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[BB2]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
-; UNROLL-NO-IC-NEXT: [[VAR4:%.*]] = phi i32 [ [[VAR7:%.*]], [[BB2]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ]
+; UNROLL-NO-IC-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[BB2]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
+; UNROLL-NO-IC-NEXT: [[VAR4:%.*]] = phi i32 [ [[VAR7:%.*]], [[BB2]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
; UNROLL-NO-IC-NEXT: [[VAR5:%.*]] = phi i32 [ [[VAR6]], [[BB2]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
; UNROLL-NO-IC-NEXT: [[G:%.*]] = getelementptr inbounds i32, ptr [[X]], i32 [[IV]]
; UNROLL-NO-IC-NEXT: [[VAR6]] = add i32 [[VAR5]], [[VAR4]]
@@ -3186,8 +3178,6 @@ define i32 @sink_into_replication_region_multiple(ptr %x, i32 %y) {
; UNROLL-NO-VF-NEXT: br label [[BB1:%.*]]
; UNROLL-NO-VF: scalar.ph:
; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[Y]], [[BB:%.*]] ]
-; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ 0, [[BB]] ]
-; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[BB]] ]
; UNROLL-NO-VF-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[BB]] ]
; UNROLL-NO-VF-NEXT: br label [[BB2:%.*]]
; UNROLL-NO-VF: bb1:
@@ -3195,8 +3185,8 @@ define i32 @sink_into_replication_region_multiple(ptr %x, i32 %y) {
; UNROLL-NO-VF-NEXT: ret i32 [[VAR]]
; UNROLL-NO-VF: bb2:
; UNROLL-NO-VF-NEXT: [[VAR3:%.*]] = phi i32 [ [[VAR8:%.*]], [[BB2]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
-; UNROLL-NO-VF-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[BB2]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
-; UNROLL-NO-VF-NEXT: [[VAR4:%.*]] = phi i32 [ [[VAR7:%.*]], [[BB2]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ]
+; UNROLL-NO-VF-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[BB2]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
+; UNROLL-NO-VF-NEXT: [[VAR4:%.*]] = phi i32 [ [[VAR7:%.*]], [[BB2]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
; UNROLL-NO-VF-NEXT: [[VAR5:%.*]] = phi i32 [ [[VAR6]], [[BB2]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
; UNROLL-NO-VF-NEXT: [[G:%.*]] = getelementptr inbounds i32, ptr [[X]], i32 [[IV]]
; UNROLL-NO-VF-NEXT: [[VAR6]] = add i32 [[VAR5]], [[VAR4]]
@@ -3308,8 +3298,6 @@ define i32 @sink_into_replication_region_multiple(ptr %x, i32 %y) {
; SINK-AFTER-NEXT: br label [[BB1:%.*]]
; SINK-AFTER: scalar.ph:
; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[Y]], [[BB:%.*]] ]
-; SINK-AFTER-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ 0, [[BB]] ]
-; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[BB]] ]
; SINK-AFTER-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[BB]] ]
; SINK-AFTER-NEXT: br label [[BB2:%.*]]
; SINK-AFTER: bb1:
@@ -3317,8 +3305,8 @@ define i32 @sink_into_replication_region_multiple(ptr %x, i32 %y) {
; SINK-AFTER-NEXT: ret i32 [[VAR]]
; SINK-AFTER: bb2:
; SINK-AFTER-NEXT: [[VAR3:%.*]] = phi i32 [ [[VAR8:%.*]], [[BB2]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
-; SINK-AFTER-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[BB2]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
-; SINK-AFTER-NEXT: [[VAR4:%.*]] = phi i32 [ [[VAR7:%.*]], [[BB2]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ]
+; SINK-AFTER-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[BB2]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
+; SINK-AFTER-NEXT: [[VAR4:%.*]] = phi i32 [ [[VAR7:%.*]], [[BB2]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
; SINK-AFTER-NEXT: [[VAR5:%.*]] = phi i32 [ [[VAR6]], [[BB2]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
; SINK-AFTER-NEXT: [[G:%.*]] = getelementptr inbounds i32, ptr [[X]], i32 [[IV]]
; SINK-AFTER-NEXT: [[VAR6]] = add i32 [[VAR5]], [[VAR4]]
diff --git a/llvm/test/Transforms/LoopVectorize/induction.ll b/llvm/test/Transforms/LoopVectorize/induction.ll
index 77b91ccb913cf..d36b849589513 100644
--- a/llvm/test/Transforms/LoopVectorize/induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/induction.ll
@@ -5834,12 +5834,11 @@ define void @pr52460_first_order_recurrence_truncated_iv(ptr noalias %src, ptr %
; CHECK-NEXT: br label [[EXIT:%.*]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ]
-; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ 0, [[ENTRY]] ]
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[ENTRY]] ]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT: [[TRUNC_IV:%.*]] = phi i32 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[TRUNC_IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[TRUNC_IV:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TRUNC_IV_NEXT:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[IV_TRUNC:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[LV:%.*]] = load i32, ptr [[SRC]], align 4
; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[LV]], [[RECUR]]
@@ -5956,12 +5955,11 @@ define void @pr52460_first_order_recurrence_truncated_iv(ptr noalias %src, ptr %
; UNROLL-NO-IC-NEXT: br label [[EXIT:%.*]]
; UNROLL-NO-IC: scalar.ph:
; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ]
-; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ 0, [[ENTRY]] ]
; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[ENTRY]] ]
; UNROLL-NO-IC-NEXT: br label [[LOOP:%.*]]
; UNROLL-NO-IC: loop:
; UNROLL-NO-IC-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
-; UNROLL-NO-IC-NEXT: [[TRUNC_IV:%.*]] = phi i32 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[TRUNC_IV_NEXT:%.*]], [[LOOP]] ]
+; UNROLL-NO-IC-NEXT: [[TRUNC_IV:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TRUNC_IV_NEXT:%.*]], [[LOOP]] ]
; UNROLL-NO-IC-NEXT: [[RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[IV_TRUNC:%.*]], [[LOOP]] ]
; UNROLL-NO-IC-NEXT: [[LV:%.*]] = load i32, ptr [[SRC]], align 4
; UNROLL-NO-IC-NEXT: [[MUL:%.*]] = mul nsw i32 [[LV]], [[RECUR]]
diff --git a/llvm/test/Transforms/LoopVectorize/interleave-with-i65-induction.ll b/llvm/test/Transforms/LoopVectorize/interleave-with-i65-induction.ll
index 8a4820949af12..2384c5d4c9c90 100644
--- a/llvm/test/Transforms/LoopVectorize/interleave-with-i65-induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/interleave-with-i65-induction.ll
@@ -36,12 +36,11 @@ define void @i65_induction_with_negative_step(ptr %dst) {
; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ]
; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i65 [ 0, %[[ENTRY]] ]
-; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ 0, %[[ENTRY]] ]
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[IV_I65:%.*]] = phi i65 [ [[BC_RESUME_VAL2]], %[[SCALAR_PH]] ], [ [[IV_I65_NEXT:%.*]], %[[LOOP]] ]
-; CHECK-NEXT: [[FOR:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], %[[SCALAR_PH]] ], [ [[TRUNC:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[FOR:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[TRUNC:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[TRUNC]] = trunc i65 [[IV_I65]] to i64
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[TRUNC]]
; CHECK-NEXT: store i64 [[FOR]], ptr [[GEP]], align 8
diff --git a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-different-insert-position.ll b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-different-insert-position.ll
index 651210df823dd..7ffa8c07b8825 100644
--- a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-different-insert-position.ll
+++ b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-different-insert-position.ll
@@ -29,11 +29,10 @@ define void @gep_for_first_member_does_not_dominate_insert_point(ptr %str, ptr n
; CHECK-NEXT: br label %[[EXIT:.*]]
; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ]
-; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i64 [ 0, %[[ENTRY]] ]
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
-; CHECK-NEXT: [[IV2:%.*]] = phi i64 [ [[BC_RESUME_VAL2]], %[[SCALAR_PH]] ], [ [[IV2_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[IV2:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV2_NEXT:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[OR_1:%.*]] = or disjoint i64 [[IV2]], 1
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i8, ptr [[STR]], i64 [[OR_1]]
; CHECK-NEXT: [[TMP9:%.*]] = load i8, ptr [[GEP1]], align 1
diff --git a/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll b/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll
index 97d33858bd830..f8a7f454ce7ce 100644
--- a/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll
+++ b/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll
@@ -1372,7 +1372,7 @@ define i64 @test_iv_increment_incremented(ptr %dst) {
; VEC-NEXT: [[TMP1:%.*]] = getelementptr i16, ptr [[TMP0]], i32 0
; VEC-NEXT: [[TMP2:%.*]] = getelementptr i16, ptr [[TMP1]], i32 -1
; VEC-NEXT: store <2 x i16> splat (i16 1), ptr [[TMP2]], align 2
-; VEC-NEXT: [[TMP5:%.*]] = add i64 1, -1
+; VEC-NEXT: [[TMP5:%.*]] = add i64 2, -1
; VEC-NEXT: [[TMP6:%.*]] = add i64 [[TMP5]], 1
; VEC-NEXT: br label %[[MIDDLE_BLOCK:.*]]
; VEC: [[MIDDLE_BLOCK]]:
@@ -1402,9 +1402,7 @@ define i64 @test_iv_increment_incremented(ptr %dst) {
; INTERLEAVE-NEXT: br label %[[VECTOR_BODY:.*]]
; INTERLEAVE: [[VECTOR_BODY]]:
; INTERLEAVE-NEXT: [[TMP0:%.*]] = getelementptr i16, ptr [[DST]], i64 3
-; INTERLEAVE-NEXT: [[TMP1:%.*]] = getelementptr i16, ptr [[DST]], i64 2
; INTERLEAVE-NEXT: store i16 1, ptr [[TMP0]], align 2
-; INTERLEAVE-NEXT: store i16 1, ptr [[TMP1]], align 2
; INTERLEAVE-NEXT: [[TMP2:%.*]] = add i64 1, -1
; INTERLEAVE-NEXT: [[TMP3:%.*]] = add i64 [[TMP2]], 1
; INTERLEAVE-NEXT: br label %[[MIDDLE_BLOCK:.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/opaque-ptr.ll b/llvm/test/Transforms/LoopVectorize/opaque-ptr.ll
index 496285a276923..64ab06d7d0b2c 100644
--- a/llvm/test/Transforms/LoopVectorize/opaque-ptr.ll
+++ b/llvm/test/Transforms/LoopVectorize/opaque-ptr.ll
@@ -47,10 +47,10 @@ define void @test_ptr_iv_no_inbounds(ptr %p1.start, ptr %p2.start, ptr %p1.end)
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 4
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[P1_START]], i64 [[OFFSET_IDX]]
-; CHECK-NEXT: [[OFFSET_IDX10:%.*]] = mul i64 [[INDEX]], 4
-; CHECK-NEXT: [[NEXT_GEP11:%.*]] = getelementptr i8, ptr [[P2_START]], i64 [[OFFSET_IDX10]]
+; CHECK-NEXT: [[OFFSET_IDX8:%.*]] = mul i64 [[INDEX]], 4
+; CHECK-NEXT: [[NEXT_GEP9:%.*]] = getelementptr i8, ptr [[P2_START]], i64 [[OFFSET_IDX8]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[NEXT_GEP]], align 4, !alias.scope [[META0:![0-9]+]], !noalias [[META3:![0-9]+]]
-; CHECK-NEXT: [[WIDE_LOAD12:%.*]] = load <2 x float>, ptr [[NEXT_GEP11]], align 4, !alias.scope [[META3]]
+; CHECK-NEXT: [[WIDE_LOAD12:%.*]] = load <2 x float>, ptr [[NEXT_GEP9]], align 4, !alias.scope [[META3]]
; CHECK-NEXT: [[TMP19:%.*]] = fadd <2 x float> [[WIDE_LOAD]], [[WIDE_LOAD12]]
; CHECK-NEXT: store <2 x float> [[TMP19]], ptr [[NEXT_GEP]], align 4, !alias.scope [[META0]], !noalias [[META3]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
@@ -133,10 +133,10 @@ define void @test_ptr_iv_with_inbounds(ptr %p1.start, ptr %p2.start, ptr %p1.end
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 4
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[P1_START]], i64 [[OFFSET_IDX]]
-; CHECK-NEXT: [[OFFSET_IDX8:%.*]] = mul i64 [[INDEX]], 4
-; CHECK-NEXT: [[NEXT_GEP9:%.*]] = getelementptr i8, ptr [[P2_START]], i64 [[OFFSET_IDX8]]
+; CHECK-NEXT: [[OFFSET_IDX6:%.*]] = mul i64 [[INDEX]], 4
+; CHECK-NEXT: [[NEXT_GEP7:%.*]] = getelementptr i8, ptr [[P2_START]], i64 [[OFFSET_IDX6]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[NEXT_GEP]], align 4, !alias.scope [[META9:![0-9]+]], !noalias [[META12:![0-9]+]]
-; CHECK-NEXT: [[WIDE_LOAD10:%.*]] = load <2 x float>, ptr [[NEXT_GEP9]], align 4, !alias.scope [[META12]]
+; CHECK-NEXT: [[WIDE_LOAD10:%.*]] = load <2 x float>, ptr [[NEXT_GEP7]], align 4, !alias.scope [[META12]]
; CHECK-NEXT: [[TMP15:%.*]] = fadd <2 x float> [[WIDE_LOAD]], [[WIDE_LOAD10]]
; CHECK-NEXT: store <2 x float> [[TMP15]], ptr [[NEXT_GEP]], align 4, !alias.scope [[META9]], !noalias [[META12]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
diff --git a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll
index d1ae9ce5238c1..732dad01dced9 100644
--- a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll
@@ -26,13 +26,13 @@ define void @a(ptr readnone %b) {
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE10:%.*]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 0, [[INDEX]]
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 0
-; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[OFFSET_IDX]], -1
-; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], -2
-; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[OFFSET_IDX]], -3
+; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[OFFSET_IDX]], -1
+; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[OFFSET_IDX]], -2
+; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], -3
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr null, i64 [[TMP2]]
-; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr null, i64 [[TMP11]]
-; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr null, i64 [[TMP14]]
-; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr null, i64 [[TMP17]]
+; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr null, i64 [[TMP9]]
+; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr null, i64 [[TMP11]]
+; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr null, i64 [[TMP6]]
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP]], i64 -1
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i32 0
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 -3
@@ -42,22 +42,22 @@ define void @a(ptr readnone %b) {
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP7]], i32 0
; CHECK-NEXT: br i1 [[TMP8]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; CHECK: pred.store.if:
-; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP]], i64 -1
-; CHECK-NEXT: store i8 95, ptr [[TMP9]], align 1
+; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP]], i64 -1
+; CHECK-NEXT: store i8 95, ptr [[TMP12]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
; CHECK: pred.store.continue:
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP7]], i32 1
; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]]
; CHECK: pred.store.if5:
-; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP2]], i64 -1
-; CHECK-NEXT: store i8 95, ptr [[TMP12]], align 1
+; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP2]], i64 -1
+; CHECK-NEXT: store i8 95, ptr [[TMP14]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]]
; CHECK: pred.store.continue6:
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i1> [[TMP7]], i32 2
; CHECK-NEXT: br i1 [[TMP13]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8:%.*]]
; CHECK: pred.store.if7:
-; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP3]], i64 -1
-; CHECK-NEXT: store i8 95, ptr [[TMP15]], align 1
+; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP3]], i64 -1
+; CHECK-NEXT: store i8 95, ptr [[TMP17]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE8]]
; CHECK: pred.store.continue8:
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i1> [[TMP7]], i32 3
diff --git a/llvm/test/Transforms/LoopVectorize/pr36983.ll b/llvm/test/Transforms/LoopVectorize/pr36983.ll
index 7e38d60b6f581..bbff2846e2114 100644
--- a/llvm/test/Transforms/LoopVectorize/pr36983.ll
+++ b/llvm/test/Transforms/LoopVectorize/pr36983.ll
@@ -1,12 +1,40 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5
; RUN: opt < %s -passes=loop-vectorize -S | FileCheck %s
; There could be more than one LCSSA PHIs in loop exit block.
-; CHECK-LABEL: bb1.bb3_crit_edge:
-; CHECK: %_tmp133.lcssa1 = phi i16 [ %_tmp133, %bb2 ], [ %vector.recur.extract.for.phi, %middle.block ]
-; CHECK: %_tmp133.lcssa = phi i16 [ %_tmp133, %bb2 ], [ %vector.recur.extract.for.phi1, %middle.block ]
-
define void @f1() {
+; CHECK-LABEL: define void @f1() {
+; CHECK-NEXT: [[BB2_LR_PH:.*]]:
+; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 0, i16 -1, i16 -2, i16 -3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = sub nsw <4 x i16> [[VEC_IND]], splat (i16 1)
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 -4)
+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[INDEX_NEXT]], 65536
+; CHECK-NEXT: br i1 [[TMP1]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[TMP0]], i32 2
+; CHECK-NEXT: br label %[[BB1_BB3_CRIT_EDGE:.*]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 0, %[[BB2_LR_PH]] ]
+; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ undef, %[[BB2_LR_PH]] ]
+; CHECK-NEXT: br label %[[BB2:.*]]
+; CHECK: [[BB2]]:
+; CHECK-NEXT: [[_TMP132:%.*]] = phi i16 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[_TMP10:%.*]], %[[BB2]] ]
+; CHECK-NEXT: [[_TMP133:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], %[[SCALAR_PH]] ], [ [[_TMP10]], %[[BB2]] ]
+; CHECK-NEXT: [[_TMP10]] = sub nsw i16 [[_TMP132]], 1
+; CHECK-NEXT: [[_TMP15:%.*]] = icmp ne i16 [[_TMP10]], 0
+; CHECK-NEXT: br i1 [[_TMP15]], label %[[BB2]], label %[[BB1_BB3_CRIT_EDGE]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK: [[BB1_BB3_CRIT_EDGE]]:
+; CHECK-NEXT: [[_TMP133_LCSSA1:%.*]] = phi i16 [ [[_TMP133]], %[[BB2]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], %[[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: [[_TMP133_LCSSA:%.*]] = phi i16 [ [[_TMP133]], %[[BB2]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], %[[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: ret void
+;
bb2.lr.ph:
br label %bb2
diff --git a/llvm/test/Transforms/LoopVectorize/pr59319-loop-access-info-invalidation.ll b/llvm/test/Transforms/LoopVectorize/pr59319-loop-access-info-invalidation.ll
index b6c72056b0c5c..611e6c184625c 100644
--- a/llvm/test/Transforms/LoopVectorize/pr59319-loop-access-info-invalidation.ll
+++ b/llvm/test/Transforms/LoopVectorize/pr59319-loop-access-info-invalidation.ll
@@ -24,11 +24,10 @@ define void @reduced(ptr %0, ptr %1, i64 %iv, ptr %2, i64 %iv76, i64 %iv93) {
; CHECK-NEXT: [[IND_ESCAPE:%.*]] = sub i64 [[IND_END]], 1
; CHECK-NEXT: br i1 [[CMP_N]], label [[LOOP_2_PREHEADER:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
-; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: br label [[LOOP_1:%.*]]
; CHECK: loop.1:
-; CHECK-NEXT: [[IV761:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT77:%.*]], [[LOOP_1]] ]
+; CHECK-NEXT: [[IV761:%.*]] = phi i64 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[IV_NEXT77:%.*]], [[LOOP_1]] ]
; CHECK-NEXT: [[IV4:%.*]] = phi i64 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_1]] ]
; CHECK-NEXT: [[IV_NEXT77]] = add i64 [[IV761]], 1
; CHECK-NEXT: [[ARRAYIDX_I_I50:%.*]] = getelementptr i32, ptr [[TMP0:%.*]], i64 [[IV76:%.*]]
@@ -49,20 +48,20 @@ define void @reduced(ptr %0, ptr %1, i64 %iv, ptr %2, i64 %iv76, i64 %iv93) {
; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[SCEVGEP2]], [[SCEVGEP]]
; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH5]], label [[VECTOR_PH6:%.*]]
-; CHECK: vector.ph6:
+; CHECK: vector.ph5:
; CHECK-NEXT: [[N_MOD_VF8:%.*]] = urem i64 [[TMP3]], 4
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF8]]
; CHECK-NEXT: br label [[VECTOR_BODY10:%.*]]
-; CHECK: vector.body9:
+; CHECK: vector.body8:
; CHECK-NEXT: [[INDEX12:%.*]] = phi i64 [ 0, [[VECTOR_PH6]] ], [ [[INDEX_NEXT13:%.*]], [[VECTOR_BODY10]] ]
-; CHECK-NEXT: store i32 0, ptr [[TMP1]], align 4, !alias.scope !4, !noalias !7
+; CHECK-NEXT: store i32 0, ptr [[TMP1]], align 4, !alias.scope [[META4:![0-9]+]], !noalias [[META7:![0-9]+]]
; CHECK-NEXT: [[INDEX_NEXT13]] = add nuw i64 [[INDEX12]], 4
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT13]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK12:%.*]], label [[VECTOR_BODY10]], !llvm.loop [[LOOP9:![0-9]+]]
-; CHECK: middle.block12:
+; CHECK: middle.block11:
; CHECK-NEXT: [[CMP_N10:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N10]], label [[LOOP_3_LR_PH:%.*]], label [[SCALAR_PH5]]
-; CHECK: scalar.ph4:
+; CHECK: scalar.ph3:
; CHECK-NEXT: [[BC_RESUME_VAL13:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK12]] ], [ 0, [[LOOP_2_PREHEADER]] ], [ 0, [[VECTOR_MEMCHECK]] ]
; CHECK-NEXT: br label [[LOOP_2:%.*]]
; CHECK: loop.3.lr.ph:
@@ -70,7 +69,7 @@ define void @reduced(ptr %0, ptr %1, i64 %iv, ptr %2, i64 %iv76, i64 %iv93) {
; CHECK-NEXT: [[ARRAYIDX_I_I62:%.*]] = getelementptr i32, ptr [[TMP0]], i64 [[IDXPROM_I_I61]]
; CHECK-NEXT: [[MIN_ITERS_CHECK22:%.*]] = icmp ult i64 [[TMP3]], 4
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK22]], label [[SCALAR_PH21:%.*]], label [[VECTOR_MEMCHECK15:%.*]]
-; CHECK: vector.memcheck15:
+; CHECK: vector.memcheck14:
; CHECK-NEXT: [[SCEVGEP15:%.*]] = getelementptr i8, ptr [[TMP1]], i64 4
; CHECK-NEXT: [[TMP12:%.*]] = shl nuw nsw i64 [[IDXPROM_I_I61]], 2
; CHECK-NEXT: [[TMP13:%.*]] = add nuw nsw i64 [[TMP12]], 4
@@ -79,20 +78,20 @@ define void @reduced(ptr %0, ptr %1, i64 %iv, ptr %2, i64 %iv76, i64 %iv93) {
; CHECK-NEXT: [[BOUND118:%.*]] = icmp ult ptr [[ARRAYIDX_I_I62]], [[SCEVGEP15]]
; CHECK-NEXT: [[FOUND_CONFLICT19:%.*]] = and i1 [[BOUND017]], [[BOUND118]]
; CHECK-NEXT: br i1 [[FOUND_CONFLICT19]], label [[SCALAR_PH21]], label [[VECTOR_PH24:%.*]]
-; CHECK: vector.ph23:
+; CHECK: vector.ph22:
; CHECK-NEXT: [[N_MOD_VF24:%.*]] = urem i64 [[TMP3]], 4
; CHECK-NEXT: [[N_VEC25:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF24]]
; CHECK-NEXT: br label [[VECTOR_BODY27:%.*]]
-; CHECK: vector.body26:
+; CHECK: vector.body25:
; CHECK-NEXT: [[INDEX29:%.*]] = phi i64 [ 0, [[VECTOR_PH24]] ], [ [[INDEX_NEXT29:%.*]], [[VECTOR_BODY27]] ]
; CHECK-NEXT: store i32 0, ptr [[TMP1]], align 4, !alias.scope [[META10:![0-9]+]], !noalias [[META13:![0-9]+]]
; CHECK-NEXT: [[INDEX_NEXT29]] = add nuw i64 [[INDEX29]], 4
; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT29]], [[N_VEC25]]
; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK29:%.*]], label [[VECTOR_BODY27]], !llvm.loop [[LOOP15:![0-9]+]]
-; CHECK: middle.block29:
+; CHECK: middle.block28:
; CHECK-NEXT: [[CMP_N27:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC25]]
; CHECK-NEXT: br i1 [[CMP_N27]], label [[LOOP_CLEANUP:%.*]], label [[SCALAR_PH21]]
-; CHECK: scalar.ph21:
+; CHECK: scalar.ph20:
; CHECK-NEXT: [[BC_RESUME_VAL26:%.*]] = phi i64 [ [[N_VEC25]], [[MIDDLE_BLOCK29]] ], [ 0, [[LOOP_3_LR_PH]] ], [ 0, [[VECTOR_MEMCHECK15]] ]
; CHECK-NEXT: br label [[LOOP_3:%.*]]
; CHECK: loop.2:
diff --git a/llvm/test/Transforms/LoopVectorize/reverse_induction.ll b/llvm/test/Transforms/LoopVectorize/reverse_induction.ll
index cea16c9eb7513..7ba11eb788a61 100644
--- a/llvm/test/Transforms/LoopVectorize/reverse_induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/reverse_induction.ll
@@ -40,12 +40,11 @@ define i32 @reverse_induction_i64(i64 %startval, ptr %ptr) {
; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[STARTVAL]], %[[ENTRY]] ]
; CHECK-NEXT: [[BC_RESUME_VAL4:%.*]] = phi i32 [ 0, %[[ENTRY]] ]
-; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, %[[ENTRY]] ]
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
; CHECK: [[FOR_BODY]]:
; CHECK-NEXT: [[ADD_I7:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[ADD_I:%.*]], %[[FOR_BODY]] ]
; CHECK-NEXT: [[I_06:%.*]] = phi i32 [ [[BC_RESUME_VAL4]], %[[SCALAR_PH]] ], [ [[INC4:%.*]], %[[FOR_BODY]] ]
-; CHECK-NEXT: [[REDUX5:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[INC_REDUX:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[REDUX5:%.*]] = phi i32 [ [[BC_RESUME_VAL4]], %[[SCALAR_PH]] ], [ [[INC_REDUX:%.*]], %[[FOR_BODY]] ]
; CHECK-NEXT: [[ADD_I]] = add i64 [[ADD_I7]], -1
; CHECK-NEXT: [[KIND__I:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i64 [[ADD_I]]
; CHECK-NEXT: [[TMP_I1:%.*]] = load i32, ptr [[KIND__I]], align 4
@@ -111,12 +110,11 @@ define i32 @reverse_induction_i128(i128 %startval, ptr %ptr) {
; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i128 [ [[STARTVAL]], %[[ENTRY]] ]
; CHECK-NEXT: [[BC_RESUME_VAL4:%.*]] = phi i32 [ 0, %[[ENTRY]] ]
-; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, %[[ENTRY]] ]
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
; CHECK: [[FOR_BODY]]:
; CHECK-NEXT: [[ADD_I7:%.*]] = phi i128 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[ADD_I:%.*]], %[[FOR_BODY]] ]
; CHECK-NEXT: [[I_06:%.*]] = phi i32 [ [[BC_RESUME_VAL4]], %[[SCALAR_PH]] ], [ [[INC4:%.*]], %[[FOR_BODY]] ]
-; CHECK-NEXT: [[REDUX5:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[INC_REDUX:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[REDUX5:%.*]] = phi i32 [ [[BC_RESUME_VAL4]], %[[SCALAR_PH]] ], [ [[INC_REDUX:%.*]], %[[FOR_BODY]] ]
; CHECK-NEXT: [[ADD_I]] = add i128 [[ADD_I7]], -1
; CHECK-NEXT: [[KIND__I:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i128 [[ADD_I]]
; CHECK-NEXT: [[TMP_I1:%.*]] = load i32, ptr [[KIND__I]], align 4
@@ -192,12 +190,11 @@ define i32 @reverse_induction_i16(i16 %startval, ptr %ptr) {
; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ [[STARTVAL]], %[[ENTRY]] ], [ [[STARTVAL]], %[[VECTOR_SCEVCHECK]] ]
; CHECK-NEXT: [[BC_RESUME_VAL4:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ]
-; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ]
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
; CHECK: [[FOR_BODY]]:
; CHECK-NEXT: [[ADD_I7:%.*]] = phi i16 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[ADD_I:%.*]], %[[FOR_BODY]] ]
; CHECK-NEXT: [[I_06:%.*]] = phi i32 [ [[BC_RESUME_VAL4]], %[[SCALAR_PH]] ], [ [[INC4:%.*]], %[[FOR_BODY]] ]
-; CHECK-NEXT: [[REDUX5:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[INC_REDUX:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[REDUX5:%.*]] = phi i32 [ [[BC_RESUME_VAL4]], %[[SCALAR_PH]] ], [ [[INC_REDUX:%.*]], %[[FOR_BODY]] ]
; CHECK-NEXT: [[ADD_I]] = add i16 [[ADD_I7]], -1
; CHECK-NEXT: [[KIND__I:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i16 [[ADD_I]]
; CHECK-NEXT: [[TMP_I1:%.*]] = load i32, ptr [[KIND__I]], align 4
diff --git a/llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll b/llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll
index 842ff910c89d3..bfc8e8d843c90 100644
--- a/llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll
+++ b/llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll
@@ -183,10 +183,10 @@ define void @multiple_incoming_phi_with_blend_mask(i64 %a, ptr noalias %dst) {
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_IND1:%.*]] = phi <2 x i16> [ <i16 0, i16 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND3:%.*]] = phi <2 x i16> [ <i16 0, i16 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT4:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_IND2:%.*]] = phi <2 x i16> [ <i16 0, i16 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT5:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt <2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
-; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP1]], <2 x i16> [[VEC_IND3]], <2 x i16> [[VEC_IND1]]
+; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP1]], <2 x i16> [[VEC_IND2]], <2 x i16> [[VEC_IND3]]
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i16> [[PREDPHI]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [32 x i16], ptr @src, i16 0, i16 [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i16> [[PREDPHI]], i32 1
@@ -199,8 +199,8 @@ define void @multiple_incoming_phi_with_blend_mask(i64 %a, ptr noalias %dst) {
; CHECK-NEXT: store <2 x i16> [[TMP9]], ptr [[TMP10]], align 2
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
-; CHECK-NEXT: [[VEC_IND_NEXT2]] = add <2 x i16> [[VEC_IND1]], splat (i16 2)
; CHECK-NEXT: [[VEC_IND_NEXT4]] = add <2 x i16> [[VEC_IND3]], splat (i16 2)
+; CHECK-NEXT: [[VEC_IND_NEXT5]] = add <2 x i16> [[VEC_IND2]], splat (i16 2)
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32
; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; CHECK: middle.block:
diff --git a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-branch-weights.ll b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-branch-weights.ll
index 38dbbbb21583a..9b9ea6391f45f 100644
--- a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-branch-weights.ll
+++ b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-branch-weights.ll
@@ -57,23 +57,20 @@ define void @test_tc_between_8_and_17(ptr %A, i64 range(i64 8, 17) %N) {
; VF8UF2: [[VECTOR_BODY]]:
; VF8UF2-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[A]], i32 8
; VF8UF2-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[A]], align 1
-; VF8UF2-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x i8>, ptr [[TMP2]], align 1
; VF8UF2-NEXT: [[TMP3:%.*]] = add nsw <8 x i8> [[WIDE_LOAD]], splat (i8 10)
-; VF8UF2-NEXT: [[TMP4:%.*]] = add nsw <8 x i8> [[WIDE_LOAD1]], splat (i8 10)
-; VF8UF2-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[A]], i32 8
; VF8UF2-NEXT: store <8 x i8> [[TMP3]], ptr [[A]], align 1
-; VF8UF2-NEXT: store <8 x i8> [[TMP4]], ptr [[TMP6]], align 1
+; VF8UF2-NEXT: store <8 x i8> [[TMP3]], ptr [[TMP2]], align 1
; VF8UF2-NEXT: br label %[[MIDDLE_BLOCK:.*]]
; VF8UF2: [[MIDDLE_BLOCK]]:
; VF8UF2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; VF8UF2-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]], !prof [[PROF1:![0-9]+]]
; VF8UF2: [[SCALAR_PH]]:
; VF8UF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
-; VF8UF2-NEXT: [[BC_RESUME_VAL2:%.*]] = phi ptr [ [[TMP0]], %[[MIDDLE_BLOCK]] ], [ [[A]], %[[ENTRY]] ]
+; VF8UF2-NEXT: [[BC_RESUME_VAL1:%.*]] = phi ptr [ [[TMP0]], %[[MIDDLE_BLOCK]] ], [ [[A]], %[[ENTRY]] ]
; VF8UF2-NEXT: br label %[[LOOP:.*]]
; VF8UF2: [[LOOP]]:
; VF8UF2-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
-; VF8UF2-NEXT: [[P_SRC:%.*]] = phi ptr [ [[BC_RESUME_VAL2]], %[[SCALAR_PH]] ], [ [[P_SRC_NEXT:%.*]], %[[LOOP]] ]
+; VF8UF2-NEXT: [[P_SRC:%.*]] = phi ptr [ [[BC_RESUME_VAL1]], %[[SCALAR_PH]] ], [ [[P_SRC_NEXT:%.*]], %[[LOOP]] ]
; VF8UF2-NEXT: [[P_SRC_NEXT]] = getelementptr inbounds i8, ptr [[P_SRC]], i64 1
; VF8UF2-NEXT: [[L:%.*]] = load i8, ptr [[P_SRC]], align 1
; VF8UF2-NEXT: [[ADD:%.*]] = add nsw i8 [[L]], 10
diff --git a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-early-exit.ll b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-early-exit.ll
index 3d4431795a8c0..9fc531e38ee5e 100644
--- a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-early-exit.ll
+++ b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-early-exit.ll
@@ -54,12 +54,9 @@ define i8 @test_early_exit_max_tc_less_than_16(ptr dereferenceable(16) %A) nosyn
; VF8UF2: [[VECTOR_PH]]:
; VF8UF2-NEXT: br label %[[VECTOR_BODY:.*]]
; VF8UF2: [[VECTOR_BODY]]:
-; VF8UF2-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[A]], i32 8
; VF8UF2-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[A]], align 1
-; VF8UF2-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x i8>, ptr [[TMP0]], align 1
; VF8UF2-NEXT: [[TMP1:%.*]] = icmp eq <8 x i8> [[WIDE_LOAD]], zeroinitializer
-; VF8UF2-NEXT: [[TMP2:%.*]] = icmp eq <8 x i8> [[WIDE_LOAD1]], zeroinitializer
-; VF8UF2-NEXT: [[TMP3:%.*]] = or <8 x i1> [[TMP1]], [[TMP2]]
+; VF8UF2-NEXT: [[TMP3:%.*]] = or <8 x i1> [[TMP1]], [[TMP1]]
; VF8UF2-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> [[TMP3]])
; VF8UF2-NEXT: br label %[[MIDDLE_SPLIT:.*]]
; VF8UF2: [[MIDDLE_SPLIT]]:
@@ -188,12 +185,9 @@ define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(ptr derefer
; VF8UF2: [[VECTOR_PH]]:
; VF8UF2-NEXT: br label %[[VECTOR_BODY:.*]]
; VF8UF2: [[VECTOR_BODY]]:
-; VF8UF2-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[A]], i32 8
; VF8UF2-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[A]], align 1
-; VF8UF2-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x i8>, ptr [[TMP0]], align 1
-; VF8UF2-NEXT: [[TMP1:%.*]] = icmp eq <8 x i8> [[WIDE_LOAD]], zeroinitializer
-; VF8UF2-NEXT: [[TMP2:%.*]] = icmp eq <8 x i8> [[WIDE_LOAD1]], zeroinitializer
-; VF8UF2-NEXT: [[TMP3:%.*]] = or <8 x i1> [[TMP1]], [[TMP2]]
+; VF8UF2-NEXT: [[TMP2:%.*]] = icmp eq <8 x i8> [[WIDE_LOAD]], zeroinitializer
+; VF8UF2-NEXT: [[TMP3:%.*]] = or <8 x i1> [[TMP2]], [[TMP2]]
; VF8UF2-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> [[TMP3]])
; VF8UF2-NEXT: br label %[[MIDDLE_SPLIT:.*]]
; VF8UF2: [[MIDDLE_SPLIT]]:
@@ -203,7 +197,7 @@ define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(ptr derefer
; VF8UF2: [[VECTOR_EARLY_EXIT]]:
; VF8UF2-NEXT: [[TMP5:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v8i1(<8 x i1> [[TMP2]], i1 true)
; VF8UF2-NEXT: [[TMP7:%.*]] = add i64 8, [[TMP5]]
-; VF8UF2-NEXT: [[TMP8:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v8i1(<8 x i1> [[TMP1]], i1 true)
+; VF8UF2-NEXT: [[TMP8:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v8i1(<8 x i1> [[TMP2]], i1 true)
; VF8UF2-NEXT: [[TMP9:%.*]] = add i64 0, [[TMP8]]
; VF8UF2-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP8]], 8
; VF8UF2-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i64 [[TMP9]], i64 [[TMP7]]
diff --git a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll
index 59c76aefbb90f..b87c6d22dd0db 100644
--- a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll
+++ b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll
@@ -63,23 +63,20 @@ define void @test_tc_less_than_16(ptr %A, i64 %N) {
; VF8UF2: [[VECTOR_BODY]]:
; VF8UF2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[A]], i32 8
; VF8UF2-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[A]], align 1
-; VF8UF2-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x i8>, ptr [[TMP3]], align 1
; VF8UF2-NEXT: [[TMP4:%.*]] = add nsw <8 x i8> [[WIDE_LOAD]], splat (i8 10)
-; VF8UF2-NEXT: [[TMP5:%.*]] = add nsw <8 x i8> [[WIDE_LOAD1]], splat (i8 10)
-; VF8UF2-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[A]], i32 8
; VF8UF2-NEXT: store <8 x i8> [[TMP4]], ptr [[A]], align 1
-; VF8UF2-NEXT: store <8 x i8> [[TMP5]], ptr [[TMP7]], align 1
+; VF8UF2-NEXT: store <8 x i8> [[TMP4]], ptr [[TMP3]], align 1
; VF8UF2-NEXT: br label %[[MIDDLE_BLOCK:.*]]
; VF8UF2: [[MIDDLE_BLOCK]]:
; VF8UF2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[AND]], [[N_VEC]]
; VF8UF2-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; VF8UF2: [[SCALAR_PH]]:
; VF8UF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP0]], %[[MIDDLE_BLOCK]] ], [ [[AND]], %[[ENTRY]] ]
-; VF8UF2-NEXT: [[BC_RESUME_VAL2:%.*]] = phi ptr [ [[TMP1]], %[[MIDDLE_BLOCK]] ], [ [[A]], %[[ENTRY]] ]
+; VF8UF2-NEXT: [[BC_RESUME_VAL1:%.*]] = phi ptr [ [[TMP1]], %[[MIDDLE_BLOCK]] ], [ [[A]], %[[ENTRY]] ]
; VF8UF2-NEXT: br label %[[LOOP:.*]]
; VF8UF2: [[LOOP]]:
; VF8UF2-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
-; VF8UF2-NEXT: [[P_SRC:%.*]] = phi ptr [ [[BC_RESUME_VAL2]], %[[SCALAR_PH]] ], [ [[P_SRC_NEXT:%.*]], %[[LOOP]] ]
+; VF8UF2-NEXT: [[P_SRC:%.*]] = phi ptr [ [[BC_RESUME_VAL1]], %[[SCALAR_PH]] ], [ [[P_SRC_NEXT:%.*]], %[[LOOP]] ]
; VF8UF2-NEXT: [[P_SRC_NEXT]] = getelementptr inbounds i8, ptr [[P_SRC]], i64 1
; VF8UF2-NEXT: [[L:%.*]] = load i8, ptr [[P_SRC]], align 1
; VF8UF2-NEXT: [[ADD:%.*]] = add nsw i8 [[L]], 10
@@ -713,9 +710,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF8UF1-NEXT: [[TMP4:%.*]] = extractelement <8 x i1> [[TMP3]], i32 0
; VF8UF1-NEXT: br i1 [[TMP4]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; VF8UF1: [[PRED_STORE_IF]]:
-; VF8UF1-NEXT: [[TMP5:%.*]] = mul i64 0, [[STEP]]
+; VF8UF1-NEXT: [[TMP5:%.*]] = mul i64 0, [[TMP1]]
; VF8UF1-NEXT: [[TMP6:%.*]] = add i64 0, [[TMP5]]
-; VF8UF1-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], [[STEP]]
+; VF8UF1-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], [[TMP1]]
; VF8UF1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP7]]
; VF8UF1-NEXT: store i8 0, ptr [[TMP8]], align 1
; VF8UF1-NEXT: br label %[[PRED_STORE_CONTINUE]]
@@ -723,9 +720,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF8UF1-NEXT: [[TMP9:%.*]] = extractelement <8 x i1> [[TMP3]], i32 1
; VF8UF1-NEXT: br i1 [[TMP9]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]]
; VF8UF1: [[PRED_STORE_IF1]]:
-; VF8UF1-NEXT: [[TMP10:%.*]] = mul i64 1, [[STEP]]
+; VF8UF1-NEXT: [[TMP10:%.*]] = mul i64 1, [[TMP1]]
; VF8UF1-NEXT: [[TMP11:%.*]] = add i64 0, [[TMP10]]
-; VF8UF1-NEXT: [[TMP12:%.*]] = add i64 [[TMP11]], [[STEP]]
+; VF8UF1-NEXT: [[TMP12:%.*]] = add i64 [[TMP11]], [[TMP1]]
; VF8UF1-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP12]]
; VF8UF1-NEXT: store i8 0, ptr [[TMP13]], align 1
; VF8UF1-NEXT: br label %[[PRED_STORE_CONTINUE2]]
@@ -733,9 +730,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF8UF1-NEXT: [[TMP14:%.*]] = extractelement <8 x i1> [[TMP3]], i32 2
; VF8UF1-NEXT: br i1 [[TMP14]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]]
; VF8UF1: [[PRED_STORE_IF3]]:
-; VF8UF1-NEXT: [[TMP15:%.*]] = mul i64 2, [[STEP]]
+; VF8UF1-NEXT: [[TMP15:%.*]] = mul i64 2, [[TMP1]]
; VF8UF1-NEXT: [[TMP16:%.*]] = add i64 0, [[TMP15]]
-; VF8UF1-NEXT: [[TMP17:%.*]] = add i64 [[TMP16]], [[STEP]]
+; VF8UF1-NEXT: [[TMP17:%.*]] = add i64 [[TMP16]], [[TMP1]]
; VF8UF1-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP17]]
; VF8UF1-NEXT: store i8 0, ptr [[TMP18]], align 1
; VF8UF1-NEXT: br label %[[PRED_STORE_CONTINUE4]]
@@ -743,9 +740,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF8UF1-NEXT: [[TMP19:%.*]] = extractelement <8 x i1> [[TMP3]], i32 3
; VF8UF1-NEXT: br i1 [[TMP19]], label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6:.*]]
; VF8UF1: [[PRED_STORE_IF5]]:
-; VF8UF1-NEXT: [[TMP20:%.*]] = mul i64 3, [[STEP]]
+; VF8UF1-NEXT: [[TMP20:%.*]] = mul i64 3, [[TMP1]]
; VF8UF1-NEXT: [[TMP21:%.*]] = add i64 0, [[TMP20]]
-; VF8UF1-NEXT: [[TMP22:%.*]] = add i64 [[TMP21]], [[STEP]]
+; VF8UF1-NEXT: [[TMP22:%.*]] = add i64 [[TMP21]], [[TMP1]]
; VF8UF1-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP22]]
; VF8UF1-NEXT: store i8 0, ptr [[TMP23]], align 1
; VF8UF1-NEXT: br label %[[PRED_STORE_CONTINUE6]]
@@ -753,9 +750,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF8UF1-NEXT: [[TMP24:%.*]] = extractelement <8 x i1> [[TMP3]], i32 4
; VF8UF1-NEXT: br i1 [[TMP24]], label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]]
; VF8UF1: [[PRED_STORE_IF7]]:
-; VF8UF1-NEXT: [[TMP25:%.*]] = mul i64 4, [[STEP]]
+; VF8UF1-NEXT: [[TMP25:%.*]] = mul i64 4, [[TMP1]]
; VF8UF1-NEXT: [[TMP26:%.*]] = add i64 0, [[TMP25]]
-; VF8UF1-NEXT: [[TMP27:%.*]] = add i64 [[TMP26]], [[STEP]]
+; VF8UF1-NEXT: [[TMP27:%.*]] = add i64 [[TMP26]], [[TMP1]]
; VF8UF1-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP27]]
; VF8UF1-NEXT: store i8 0, ptr [[TMP28]], align 1
; VF8UF1-NEXT: br label %[[PRED_STORE_CONTINUE8]]
@@ -763,9 +760,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF8UF1-NEXT: [[TMP29:%.*]] = extractelement <8 x i1> [[TMP3]], i32 5
; VF8UF1-NEXT: br i1 [[TMP29]], label %[[PRED_STORE_IF9:.*]], label %[[PRED_STORE_CONTINUE10:.*]]
; VF8UF1: [[PRED_STORE_IF9]]:
-; VF8UF1-NEXT: [[TMP30:%.*]] = mul i64 5, [[STEP]]
+; VF8UF1-NEXT: [[TMP30:%.*]] = mul i64 5, [[TMP1]]
; VF8UF1-NEXT: [[TMP31:%.*]] = add i64 0, [[TMP30]]
-; VF8UF1-NEXT: [[TMP32:%.*]] = add i64 [[TMP31]], [[STEP]]
+; VF8UF1-NEXT: [[TMP32:%.*]] = add i64 [[TMP31]], [[TMP1]]
; VF8UF1-NEXT: [[TMP33:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP32]]
; VF8UF1-NEXT: store i8 0, ptr [[TMP33]], align 1
; VF8UF1-NEXT: br label %[[PRED_STORE_CONTINUE10]]
@@ -773,9 +770,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF8UF1-NEXT: [[TMP34:%.*]] = extractelement <8 x i1> [[TMP3]], i32 6
; VF8UF1-NEXT: br i1 [[TMP34]], label %[[PRED_STORE_IF11:.*]], label %[[PRED_STORE_CONTINUE12:.*]]
; VF8UF1: [[PRED_STORE_IF11]]:
-; VF8UF1-NEXT: [[TMP35:%.*]] = mul i64 6, [[STEP]]
+; VF8UF1-NEXT: [[TMP35:%.*]] = mul i64 6, [[TMP1]]
; VF8UF1-NEXT: [[TMP36:%.*]] = add i64 0, [[TMP35]]
-; VF8UF1-NEXT: [[TMP37:%.*]] = add i64 [[TMP36]], [[STEP]]
+; VF8UF1-NEXT: [[TMP37:%.*]] = add i64 [[TMP36]], [[TMP1]]
; VF8UF1-NEXT: [[TMP38:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP37]]
; VF8UF1-NEXT: store i8 0, ptr [[TMP38]], align 1
; VF8UF1-NEXT: br label %[[PRED_STORE_CONTINUE12]]
@@ -783,9 +780,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF8UF1-NEXT: [[TMP39:%.*]] = extractelement <8 x i1> [[TMP3]], i32 7
; VF8UF1-NEXT: br i1 [[TMP39]], label %[[PRED_STORE_IF13:.*]], label %[[PRED_STORE_CONTINUE14:.*]]
; VF8UF1: [[PRED_STORE_IF13]]:
-; VF8UF1-NEXT: [[TMP40:%.*]] = mul i64 7, [[STEP]]
+; VF8UF1-NEXT: [[TMP40:%.*]] = mul i64 7, [[TMP1]]
; VF8UF1-NEXT: [[TMP41:%.*]] = add i64 0, [[TMP40]]
-; VF8UF1-NEXT: [[TMP42:%.*]] = add i64 [[TMP41]], [[STEP]]
+; VF8UF1-NEXT: [[TMP42:%.*]] = add i64 [[TMP41]], [[TMP1]]
; VF8UF1-NEXT: [[TMP43:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP42]]
; VF8UF1-NEXT: store i8 0, ptr [[TMP43]], align 1
; VF8UF1-NEXT: br label %[[PRED_STORE_CONTINUE14]]
@@ -830,9 +827,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF8UF2-NEXT: [[TMP5:%.*]] = extractelement <8 x i1> [[TMP3]], i32 0
; VF8UF2-NEXT: br i1 [[TMP5]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; VF8UF2: [[PRED_STORE_IF]]:
-; VF8UF2-NEXT: [[TMP6:%.*]] = mul i64 0, [[STEP]]
+; VF8UF2-NEXT: [[TMP6:%.*]] = mul i64 0, [[TMP1]]
; VF8UF2-NEXT: [[TMP7:%.*]] = add i64 0, [[TMP6]]
-; VF8UF2-NEXT: [[TMP8:%.*]] = add i64 [[TMP7]], [[STEP]]
+; VF8UF2-NEXT: [[TMP8:%.*]] = add i64 [[TMP7]], [[TMP1]]
; VF8UF2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP8]]
; VF8UF2-NEXT: store i8 0, ptr [[TMP9]], align 1
; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE]]
@@ -840,9 +837,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF8UF2-NEXT: [[TMP10:%.*]] = extractelement <8 x i1> [[TMP3]], i32 1
; VF8UF2-NEXT: br i1 [[TMP10]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]]
; VF8UF2: [[PRED_STORE_IF1]]:
-; VF8UF2-NEXT: [[TMP11:%.*]] = mul i64 1, [[STEP]]
+; VF8UF2-NEXT: [[TMP11:%.*]] = mul i64 1, [[TMP1]]
; VF8UF2-NEXT: [[TMP12:%.*]] = add i64 0, [[TMP11]]
-; VF8UF2-NEXT: [[TMP13:%.*]] = add i64 [[TMP12]], [[STEP]]
+; VF8UF2-NEXT: [[TMP13:%.*]] = add i64 [[TMP12]], [[TMP1]]
; VF8UF2-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP13]]
; VF8UF2-NEXT: store i8 0, ptr [[TMP14]], align 1
; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE2]]
@@ -850,9 +847,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF8UF2-NEXT: [[TMP15:%.*]] = extractelement <8 x i1> [[TMP3]], i32 2
; VF8UF2-NEXT: br i1 [[TMP15]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]]
; VF8UF2: [[PRED_STORE_IF3]]:
-; VF8UF2-NEXT: [[TMP16:%.*]] = mul i64 2, [[STEP]]
+; VF8UF2-NEXT: [[TMP16:%.*]] = mul i64 2, [[TMP1]]
; VF8UF2-NEXT: [[TMP17:%.*]] = add i64 0, [[TMP16]]
-; VF8UF2-NEXT: [[TMP18:%.*]] = add i64 [[TMP17]], [[STEP]]
+; VF8UF2-NEXT: [[TMP18:%.*]] = add i64 [[TMP17]], [[TMP1]]
; VF8UF2-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP18]]
; VF8UF2-NEXT: store i8 0, ptr [[TMP19]], align 1
; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE4]]
@@ -860,9 +857,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF8UF2-NEXT: [[TMP20:%.*]] = extractelement <8 x i1> [[TMP3]], i32 3
; VF8UF2-NEXT: br i1 [[TMP20]], label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6:.*]]
; VF8UF2: [[PRED_STORE_IF5]]:
-; VF8UF2-NEXT: [[TMP21:%.*]] = mul i64 3, [[STEP]]
+; VF8UF2-NEXT: [[TMP21:%.*]] = mul i64 3, [[TMP1]]
; VF8UF2-NEXT: [[TMP22:%.*]] = add i64 0, [[TMP21]]
-; VF8UF2-NEXT: [[TMP23:%.*]] = add i64 [[TMP22]], [[STEP]]
+; VF8UF2-NEXT: [[TMP23:%.*]] = add i64 [[TMP22]], [[TMP1]]
; VF8UF2-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP23]]
; VF8UF2-NEXT: store i8 0, ptr [[TMP24]], align 1
; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE6]]
@@ -870,9 +867,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF8UF2-NEXT: [[TMP25:%.*]] = extractelement <8 x i1> [[TMP3]], i32 4
; VF8UF2-NEXT: br i1 [[TMP25]], label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]]
; VF8UF2: [[PRED_STORE_IF7]]:
-; VF8UF2-NEXT: [[TMP26:%.*]] = mul i64 4, [[STEP]]
+; VF8UF2-NEXT: [[TMP26:%.*]] = mul i64 4, [[TMP1]]
; VF8UF2-NEXT: [[TMP27:%.*]] = add i64 0, [[TMP26]]
-; VF8UF2-NEXT: [[TMP28:%.*]] = add i64 [[TMP27]], [[STEP]]
+; VF8UF2-NEXT: [[TMP28:%.*]] = add i64 [[TMP27]], [[TMP1]]
; VF8UF2-NEXT: [[TMP29:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP28]]
; VF8UF2-NEXT: store i8 0, ptr [[TMP29]], align 1
; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE8]]
@@ -880,9 +877,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF8UF2-NEXT: [[TMP30:%.*]] = extractelement <8 x i1> [[TMP3]], i32 5
; VF8UF2-NEXT: br i1 [[TMP30]], label %[[PRED_STORE_IF9:.*]], label %[[PRED_STORE_CONTINUE10:.*]]
; VF8UF2: [[PRED_STORE_IF9]]:
-; VF8UF2-NEXT: [[TMP31:%.*]] = mul i64 5, [[STEP]]
+; VF8UF2-NEXT: [[TMP31:%.*]] = mul i64 5, [[TMP1]]
; VF8UF2-NEXT: [[TMP32:%.*]] = add i64 0, [[TMP31]]
-; VF8UF2-NEXT: [[TMP33:%.*]] = add i64 [[TMP32]], [[STEP]]
+; VF8UF2-NEXT: [[TMP33:%.*]] = add i64 [[TMP32]], [[TMP1]]
; VF8UF2-NEXT: [[TMP34:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP33]]
; VF8UF2-NEXT: store i8 0, ptr [[TMP34]], align 1
; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE10]]
@@ -890,9 +887,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF8UF2-NEXT: [[TMP35:%.*]] = extractelement <8 x i1> [[TMP3]], i32 6
; VF8UF2-NEXT: br i1 [[TMP35]], label %[[PRED_STORE_IF11:.*]], label %[[PRED_STORE_CONTINUE12:.*]]
; VF8UF2: [[PRED_STORE_IF11]]:
-; VF8UF2-NEXT: [[TMP36:%.*]] = mul i64 6, [[STEP]]
+; VF8UF2-NEXT: [[TMP36:%.*]] = mul i64 6, [[TMP1]]
; VF8UF2-NEXT: [[TMP37:%.*]] = add i64 0, [[TMP36]]
-; VF8UF2-NEXT: [[TMP38:%.*]] = add i64 [[TMP37]], [[STEP]]
+; VF8UF2-NEXT: [[TMP38:%.*]] = add i64 [[TMP37]], [[TMP1]]
; VF8UF2-NEXT: [[TMP39:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP38]]
; VF8UF2-NEXT: store i8 0, ptr [[TMP39]], align 1
; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE12]]
@@ -900,9 +897,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF8UF2-NEXT: [[TMP40:%.*]] = extractelement <8 x i1> [[TMP3]], i32 7
; VF8UF2-NEXT: br i1 [[TMP40]], label %[[PRED_STORE_IF13:.*]], label %[[PRED_STORE_CONTINUE14:.*]]
; VF8UF2: [[PRED_STORE_IF13]]:
-; VF8UF2-NEXT: [[TMP41:%.*]] = mul i64 7, [[STEP]]
+; VF8UF2-NEXT: [[TMP41:%.*]] = mul i64 7, [[TMP1]]
; VF8UF2-NEXT: [[TMP42:%.*]] = add i64 0, [[TMP41]]
-; VF8UF2-NEXT: [[TMP43:%.*]] = add i64 [[TMP42]], [[STEP]]
+; VF8UF2-NEXT: [[TMP43:%.*]] = add i64 [[TMP42]], [[TMP1]]
; VF8UF2-NEXT: [[TMP44:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP43]]
; VF8UF2-NEXT: store i8 0, ptr [[TMP44]], align 1
; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE14]]
@@ -910,9 +907,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF8UF2-NEXT: [[TMP45:%.*]] = extractelement <8 x i1> [[TMP4]], i32 0
; VF8UF2-NEXT: br i1 [[TMP45]], label %[[PRED_STORE_IF15:.*]], label %[[PRED_STORE_CONTINUE16:.*]]
; VF8UF2: [[PRED_STORE_IF15]]:
-; VF8UF2-NEXT: [[TMP46:%.*]] = mul i64 8, [[STEP]]
+; VF8UF2-NEXT: [[TMP46:%.*]] = mul i64 8, [[TMP1]]
; VF8UF2-NEXT: [[TMP47:%.*]] = add i64 0, [[TMP46]]
-; VF8UF2-NEXT: [[TMP48:%.*]] = add i64 [[TMP47]], [[STEP]]
+; VF8UF2-NEXT: [[TMP48:%.*]] = add i64 [[TMP47]], [[TMP1]]
; VF8UF2-NEXT: [[TMP49:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP48]]
; VF8UF2-NEXT: store i8 0, ptr [[TMP49]], align 1
; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE16]]
@@ -920,9 +917,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF8UF2-NEXT: [[TMP50:%.*]] = extractelement <8 x i1> [[TMP4]], i32 1
; VF8UF2-NEXT: br i1 [[TMP50]], label %[[PRED_STORE_IF17:.*]], label %[[PRED_STORE_CONTINUE18:.*]]
; VF8UF2: [[PRED_STORE_IF17]]:
-; VF8UF2-NEXT: [[TMP51:%.*]] = mul i64 9, [[STEP]]
+; VF8UF2-NEXT: [[TMP51:%.*]] = mul i64 9, [[TMP1]]
; VF8UF2-NEXT: [[TMP52:%.*]] = add i64 0, [[TMP51]]
-; VF8UF2-NEXT: [[TMP53:%.*]] = add i64 [[TMP52]], [[STEP]]
+; VF8UF2-NEXT: [[TMP53:%.*]] = add i64 [[TMP52]], [[TMP1]]
; VF8UF2-NEXT: [[TMP54:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP53]]
; VF8UF2-NEXT: store i8 0, ptr [[TMP54]], align 1
; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE18]]
@@ -930,9 +927,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF8UF2-NEXT: [[TMP55:%.*]] = extractelement <8 x i1> [[TMP4]], i32 2
; VF8UF2-NEXT: br i1 [[TMP55]], label %[[PRED_STORE_IF19:.*]], label %[[PRED_STORE_CONTINUE20:.*]]
; VF8UF2: [[PRED_STORE_IF19]]:
-; VF8UF2-NEXT: [[TMP56:%.*]] = mul i64 10, [[STEP]]
+; VF8UF2-NEXT: [[TMP56:%.*]] = mul i64 10, [[TMP1]]
; VF8UF2-NEXT: [[TMP57:%.*]] = add i64 0, [[TMP56]]
-; VF8UF2-NEXT: [[TMP58:%.*]] = add i64 [[TMP57]], [[STEP]]
+; VF8UF2-NEXT: [[TMP58:%.*]] = add i64 [[TMP57]], [[TMP1]]
; VF8UF2-NEXT: [[TMP59:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP58]]
; VF8UF2-NEXT: store i8 0, ptr [[TMP59]], align 1
; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE20]]
@@ -940,9 +937,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF8UF2-NEXT: [[TMP60:%.*]] = extractelement <8 x i1> [[TMP4]], i32 3
; VF8UF2-NEXT: br i1 [[TMP60]], label %[[PRED_STORE_IF21:.*]], label %[[PRED_STORE_CONTINUE22:.*]]
; VF8UF2: [[PRED_STORE_IF21]]:
-; VF8UF2-NEXT: [[TMP61:%.*]] = mul i64 11, [[STEP]]
+; VF8UF2-NEXT: [[TMP61:%.*]] = mul i64 11, [[TMP1]]
; VF8UF2-NEXT: [[TMP62:%.*]] = add i64 0, [[TMP61]]
-; VF8UF2-NEXT: [[TMP63:%.*]] = add i64 [[TMP62]], [[STEP]]
+; VF8UF2-NEXT: [[TMP63:%.*]] = add i64 [[TMP62]], [[TMP1]]
; VF8UF2-NEXT: [[TMP64:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP63]]
; VF8UF2-NEXT: store i8 0, ptr [[TMP64]], align 1
; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE22]]
@@ -950,9 +947,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF8UF2-NEXT: [[TMP65:%.*]] = extractelement <8 x i1> [[TMP4]], i32 4
; VF8UF2-NEXT: br i1 [[TMP65]], label %[[PRED_STORE_IF23:.*]], label %[[PRED_STORE_CONTINUE24:.*]]
; VF8UF2: [[PRED_STORE_IF23]]:
-; VF8UF2-NEXT: [[TMP66:%.*]] = mul i64 12, [[STEP]]
+; VF8UF2-NEXT: [[TMP66:%.*]] = mul i64 12, [[TMP1]]
; VF8UF2-NEXT: [[TMP67:%.*]] = add i64 0, [[TMP66]]
-; VF8UF2-NEXT: [[TMP68:%.*]] = add i64 [[TMP67]], [[STEP]]
+; VF8UF2-NEXT: [[TMP68:%.*]] = add i64 [[TMP67]], [[TMP1]]
; VF8UF2-NEXT: [[TMP69:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP68]]
; VF8UF2-NEXT: store i8 0, ptr [[TMP69]], align 1
; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE24]]
@@ -960,9 +957,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF8UF2-NEXT: [[TMP70:%.*]] = extractelement <8 x i1> [[TMP4]], i32 5
; VF8UF2-NEXT: br i1 [[TMP70]], label %[[PRED_STORE_IF25:.*]], label %[[PRED_STORE_CONTINUE26:.*]]
; VF8UF2: [[PRED_STORE_IF25]]:
-; VF8UF2-NEXT: [[TMP71:%.*]] = mul i64 13, [[STEP]]
+; VF8UF2-NEXT: [[TMP71:%.*]] = mul i64 13, [[TMP1]]
; VF8UF2-NEXT: [[TMP72:%.*]] = add i64 0, [[TMP71]]
-; VF8UF2-NEXT: [[TMP73:%.*]] = add i64 [[TMP72]], [[STEP]]
+; VF8UF2-NEXT: [[TMP73:%.*]] = add i64 [[TMP72]], [[TMP1]]
; VF8UF2-NEXT: [[TMP74:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP73]]
; VF8UF2-NEXT: store i8 0, ptr [[TMP74]], align 1
; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE26]]
@@ -970,9 +967,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF8UF2-NEXT: [[TMP75:%.*]] = extractelement <8 x i1> [[TMP4]], i32 6
; VF8UF2-NEXT: br i1 [[TMP75]], label %[[PRED_STORE_IF27:.*]], label %[[PRED_STORE_CONTINUE28:.*]]
; VF8UF2: [[PRED_STORE_IF27]]:
-; VF8UF2-NEXT: [[TMP76:%.*]] = mul i64 14, [[STEP]]
+; VF8UF2-NEXT: [[TMP76:%.*]] = mul i64 14, [[TMP1]]
; VF8UF2-NEXT: [[TMP77:%.*]] = add i64 0, [[TMP76]]
-; VF8UF2-NEXT: [[TMP78:%.*]] = add i64 [[TMP77]], [[STEP]]
+; VF8UF2-NEXT: [[TMP78:%.*]] = add i64 [[TMP77]], [[TMP1]]
; VF8UF2-NEXT: [[TMP79:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP78]]
; VF8UF2-NEXT: store i8 0, ptr [[TMP79]], align 1
; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE28]]
@@ -980,9 +977,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF8UF2-NEXT: [[TMP80:%.*]] = extractelement <8 x i1> [[TMP4]], i32 7
; VF8UF2-NEXT: br i1 [[TMP80]], label %[[PRED_STORE_IF29:.*]], label %[[PRED_STORE_CONTINUE30:.*]]
; VF8UF2: [[PRED_STORE_IF29]]:
-; VF8UF2-NEXT: [[TMP81:%.*]] = mul i64 15, [[STEP]]
+; VF8UF2-NEXT: [[TMP81:%.*]] = mul i64 15, [[TMP1]]
; VF8UF2-NEXT: [[TMP82:%.*]] = add i64 0, [[TMP81]]
-; VF8UF2-NEXT: [[TMP83:%.*]] = add i64 [[TMP82]], [[STEP]]
+; VF8UF2-NEXT: [[TMP83:%.*]] = add i64 [[TMP82]], [[TMP1]]
; VF8UF2-NEXT: [[TMP84:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP83]]
; VF8UF2-NEXT: store i8 0, ptr [[TMP84]], align 1
; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE30]]
@@ -1026,9 +1023,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF16UF1-NEXT: [[TMP4:%.*]] = extractelement <16 x i1> [[TMP3]], i32 0
; VF16UF1-NEXT: br i1 [[TMP4]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; VF16UF1: [[PRED_STORE_IF]]:
-; VF16UF1-NEXT: [[TMP5:%.*]] = mul i64 0, [[STEP]]
+; VF16UF1-NEXT: [[TMP5:%.*]] = mul i64 0, [[TMP1]]
; VF16UF1-NEXT: [[TMP6:%.*]] = add i64 0, [[TMP5]]
-; VF16UF1-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], [[STEP]]
+; VF16UF1-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], [[TMP1]]
; VF16UF1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP7]]
; VF16UF1-NEXT: store i8 0, ptr [[TMP8]], align 1
; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE]]
@@ -1036,9 +1033,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF16UF1-NEXT: [[TMP9:%.*]] = extractelement <16 x i1> [[TMP3]], i32 1
; VF16UF1-NEXT: br i1 [[TMP9]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]]
; VF16UF1: [[PRED_STORE_IF1]]:
-; VF16UF1-NEXT: [[TMP10:%.*]] = mul i64 1, [[STEP]]
+; VF16UF1-NEXT: [[TMP10:%.*]] = mul i64 1, [[TMP1]]
; VF16UF1-NEXT: [[TMP11:%.*]] = add i64 0, [[TMP10]]
-; VF16UF1-NEXT: [[TMP12:%.*]] = add i64 [[TMP11]], [[STEP]]
+; VF16UF1-NEXT: [[TMP12:%.*]] = add i64 [[TMP11]], [[TMP1]]
; VF16UF1-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP12]]
; VF16UF1-NEXT: store i8 0, ptr [[TMP13]], align 1
; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE2]]
@@ -1046,9 +1043,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF16UF1-NEXT: [[TMP14:%.*]] = extractelement <16 x i1> [[TMP3]], i32 2
; VF16UF1-NEXT: br i1 [[TMP14]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]]
; VF16UF1: [[PRED_STORE_IF3]]:
-; VF16UF1-NEXT: [[TMP15:%.*]] = mul i64 2, [[STEP]]
+; VF16UF1-NEXT: [[TMP15:%.*]] = mul i64 2, [[TMP1]]
; VF16UF1-NEXT: [[TMP16:%.*]] = add i64 0, [[TMP15]]
-; VF16UF1-NEXT: [[TMP17:%.*]] = add i64 [[TMP16]], [[STEP]]
+; VF16UF1-NEXT: [[TMP17:%.*]] = add i64 [[TMP16]], [[TMP1]]
; VF16UF1-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP17]]
; VF16UF1-NEXT: store i8 0, ptr [[TMP18]], align 1
; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE4]]
@@ -1056,9 +1053,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF16UF1-NEXT: [[TMP19:%.*]] = extractelement <16 x i1> [[TMP3]], i32 3
; VF16UF1-NEXT: br i1 [[TMP19]], label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6:.*]]
; VF16UF1: [[PRED_STORE_IF5]]:
-; VF16UF1-NEXT: [[TMP20:%.*]] = mul i64 3, [[STEP]]
+; VF16UF1-NEXT: [[TMP20:%.*]] = mul i64 3, [[TMP1]]
; VF16UF1-NEXT: [[TMP21:%.*]] = add i64 0, [[TMP20]]
-; VF16UF1-NEXT: [[TMP22:%.*]] = add i64 [[TMP21]], [[STEP]]
+; VF16UF1-NEXT: [[TMP22:%.*]] = add i64 [[TMP21]], [[TMP1]]
; VF16UF1-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP22]]
; VF16UF1-NEXT: store i8 0, ptr [[TMP23]], align 1
; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE6]]
@@ -1066,9 +1063,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF16UF1-NEXT: [[TMP24:%.*]] = extractelement <16 x i1> [[TMP3]], i32 4
; VF16UF1-NEXT: br i1 [[TMP24]], label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]]
; VF16UF1: [[PRED_STORE_IF7]]:
-; VF16UF1-NEXT: [[TMP25:%.*]] = mul i64 4, [[STEP]]
+; VF16UF1-NEXT: [[TMP25:%.*]] = mul i64 4, [[TMP1]]
; VF16UF1-NEXT: [[TMP26:%.*]] = add i64 0, [[TMP25]]
-; VF16UF1-NEXT: [[TMP27:%.*]] = add i64 [[TMP26]], [[STEP]]
+; VF16UF1-NEXT: [[TMP27:%.*]] = add i64 [[TMP26]], [[TMP1]]
; VF16UF1-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP27]]
; VF16UF1-NEXT: store i8 0, ptr [[TMP28]], align 1
; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE8]]
@@ -1076,9 +1073,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF16UF1-NEXT: [[TMP29:%.*]] = extractelement <16 x i1> [[TMP3]], i32 5
; VF16UF1-NEXT: br i1 [[TMP29]], label %[[PRED_STORE_IF9:.*]], label %[[PRED_STORE_CONTINUE10:.*]]
; VF16UF1: [[PRED_STORE_IF9]]:
-; VF16UF1-NEXT: [[TMP30:%.*]] = mul i64 5, [[STEP]]
+; VF16UF1-NEXT: [[TMP30:%.*]] = mul i64 5, [[TMP1]]
; VF16UF1-NEXT: [[TMP31:%.*]] = add i64 0, [[TMP30]]
-; VF16UF1-NEXT: [[TMP32:%.*]] = add i64 [[TMP31]], [[STEP]]
+; VF16UF1-NEXT: [[TMP32:%.*]] = add i64 [[TMP31]], [[TMP1]]
; VF16UF1-NEXT: [[TMP33:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP32]]
; VF16UF1-NEXT: store i8 0, ptr [[TMP33]], align 1
; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE10]]
@@ -1086,9 +1083,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF16UF1-NEXT: [[TMP34:%.*]] = extractelement <16 x i1> [[TMP3]], i32 6
; VF16UF1-NEXT: br i1 [[TMP34]], label %[[PRED_STORE_IF11:.*]], label %[[PRED_STORE_CONTINUE12:.*]]
; VF16UF1: [[PRED_STORE_IF11]]:
-; VF16UF1-NEXT: [[TMP35:%.*]] = mul i64 6, [[STEP]]
+; VF16UF1-NEXT: [[TMP35:%.*]] = mul i64 6, [[TMP1]]
; VF16UF1-NEXT: [[TMP36:%.*]] = add i64 0, [[TMP35]]
-; VF16UF1-NEXT: [[TMP37:%.*]] = add i64 [[TMP36]], [[STEP]]
+; VF16UF1-NEXT: [[TMP37:%.*]] = add i64 [[TMP36]], [[TMP1]]
; VF16UF1-NEXT: [[TMP38:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP37]]
; VF16UF1-NEXT: store i8 0, ptr [[TMP38]], align 1
; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE12]]
@@ -1096,9 +1093,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF16UF1-NEXT: [[TMP39:%.*]] = extractelement <16 x i1> [[TMP3]], i32 7
; VF16UF1-NEXT: br i1 [[TMP39]], label %[[PRED_STORE_IF13:.*]], label %[[PRED_STORE_CONTINUE14:.*]]
; VF16UF1: [[PRED_STORE_IF13]]:
-; VF16UF1-NEXT: [[TMP40:%.*]] = mul i64 7, [[STEP]]
+; VF16UF1-NEXT: [[TMP40:%.*]] = mul i64 7, [[TMP1]]
; VF16UF1-NEXT: [[TMP41:%.*]] = add i64 0, [[TMP40]]
-; VF16UF1-NEXT: [[TMP42:%.*]] = add i64 [[TMP41]], [[STEP]]
+; VF16UF1-NEXT: [[TMP42:%.*]] = add i64 [[TMP41]], [[TMP1]]
; VF16UF1-NEXT: [[TMP43:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP42]]
; VF16UF1-NEXT: store i8 0, ptr [[TMP43]], align 1
; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE14]]
@@ -1106,9 +1103,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF16UF1-NEXT: [[TMP44:%.*]] = extractelement <16 x i1> [[TMP3]], i32 8
; VF16UF1-NEXT: br i1 [[TMP44]], label %[[PRED_STORE_IF15:.*]], label %[[PRED_STORE_CONTINUE16:.*]]
; VF16UF1: [[PRED_STORE_IF15]]:
-; VF16UF1-NEXT: [[TMP45:%.*]] = mul i64 8, [[STEP]]
+; VF16UF1-NEXT: [[TMP45:%.*]] = mul i64 8, [[TMP1]]
; VF16UF1-NEXT: [[TMP46:%.*]] = add i64 0, [[TMP45]]
-; VF16UF1-NEXT: [[TMP47:%.*]] = add i64 [[TMP46]], [[STEP]]
+; VF16UF1-NEXT: [[TMP47:%.*]] = add i64 [[TMP46]], [[TMP1]]
; VF16UF1-NEXT: [[TMP48:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP47]]
; VF16UF1-NEXT: store i8 0, ptr [[TMP48]], align 1
; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE16]]
@@ -1116,9 +1113,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF16UF1-NEXT: [[TMP49:%.*]] = extractelement <16 x i1> [[TMP3]], i32 9
; VF16UF1-NEXT: br i1 [[TMP49]], label %[[PRED_STORE_IF17:.*]], label %[[PRED_STORE_CONTINUE18:.*]]
; VF16UF1: [[PRED_STORE_IF17]]:
-; VF16UF1-NEXT: [[TMP50:%.*]] = mul i64 9, [[STEP]]
+; VF16UF1-NEXT: [[TMP50:%.*]] = mul i64 9, [[TMP1]]
; VF16UF1-NEXT: [[TMP51:%.*]] = add i64 0, [[TMP50]]
-; VF16UF1-NEXT: [[TMP52:%.*]] = add i64 [[TMP51]], [[STEP]]
+; VF16UF1-NEXT: [[TMP52:%.*]] = add i64 [[TMP51]], [[TMP1]]
; VF16UF1-NEXT: [[TMP53:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP52]]
; VF16UF1-NEXT: store i8 0, ptr [[TMP53]], align 1
; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE18]]
@@ -1126,9 +1123,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF16UF1-NEXT: [[TMP54:%.*]] = extractelement <16 x i1> [[TMP3]], i32 10
; VF16UF1-NEXT: br i1 [[TMP54]], label %[[PRED_STORE_IF19:.*]], label %[[PRED_STORE_CONTINUE20:.*]]
; VF16UF1: [[PRED_STORE_IF19]]:
-; VF16UF1-NEXT: [[TMP55:%.*]] = mul i64 10, [[STEP]]
+; VF16UF1-NEXT: [[TMP55:%.*]] = mul i64 10, [[TMP1]]
; VF16UF1-NEXT: [[TMP56:%.*]] = add i64 0, [[TMP55]]
-; VF16UF1-NEXT: [[TMP57:%.*]] = add i64 [[TMP56]], [[STEP]]
+; VF16UF1-NEXT: [[TMP57:%.*]] = add i64 [[TMP56]], [[TMP1]]
; VF16UF1-NEXT: [[TMP58:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP57]]
; VF16UF1-NEXT: store i8 0, ptr [[TMP58]], align 1
; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE20]]
@@ -1136,9 +1133,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF16UF1-NEXT: [[TMP59:%.*]] = extractelement <16 x i1> [[TMP3]], i32 11
; VF16UF1-NEXT: br i1 [[TMP59]], label %[[PRED_STORE_IF21:.*]], label %[[PRED_STORE_CONTINUE22:.*]]
; VF16UF1: [[PRED_STORE_IF21]]:
-; VF16UF1-NEXT: [[TMP60:%.*]] = mul i64 11, [[STEP]]
+; VF16UF1-NEXT: [[TMP60:%.*]] = mul i64 11, [[TMP1]]
; VF16UF1-NEXT: [[TMP61:%.*]] = add i64 0, [[TMP60]]
-; VF16UF1-NEXT: [[TMP62:%.*]] = add i64 [[TMP61]], [[STEP]]
+; VF16UF1-NEXT: [[TMP62:%.*]] = add i64 [[TMP61]], [[TMP1]]
; VF16UF1-NEXT: [[TMP63:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP62]]
; VF16UF1-NEXT: store i8 0, ptr [[TMP63]], align 1
; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE22]]
@@ -1146,9 +1143,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF16UF1-NEXT: [[TMP64:%.*]] = extractelement <16 x i1> [[TMP3]], i32 12
; VF16UF1-NEXT: br i1 [[TMP64]], label %[[PRED_STORE_IF23:.*]], label %[[PRED_STORE_CONTINUE24:.*]]
; VF16UF1: [[PRED_STORE_IF23]]:
-; VF16UF1-NEXT: [[TMP65:%.*]] = mul i64 12, [[STEP]]
+; VF16UF1-NEXT: [[TMP65:%.*]] = mul i64 12, [[TMP1]]
; VF16UF1-NEXT: [[TMP66:%.*]] = add i64 0, [[TMP65]]
-; VF16UF1-NEXT: [[TMP67:%.*]] = add i64 [[TMP66]], [[STEP]]
+; VF16UF1-NEXT: [[TMP67:%.*]] = add i64 [[TMP66]], [[TMP1]]
; VF16UF1-NEXT: [[TMP68:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP67]]
; VF16UF1-NEXT: store i8 0, ptr [[TMP68]], align 1
; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE24]]
@@ -1156,9 +1153,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF16UF1-NEXT: [[TMP69:%.*]] = extractelement <16 x i1> [[TMP3]], i32 13
; VF16UF1-NEXT: br i1 [[TMP69]], label %[[PRED_STORE_IF25:.*]], label %[[PRED_STORE_CONTINUE26:.*]]
; VF16UF1: [[PRED_STORE_IF25]]:
-; VF16UF1-NEXT: [[TMP70:%.*]] = mul i64 13, [[STEP]]
+; VF16UF1-NEXT: [[TMP70:%.*]] = mul i64 13, [[TMP1]]
; VF16UF1-NEXT: [[TMP71:%.*]] = add i64 0, [[TMP70]]
-; VF16UF1-NEXT: [[TMP72:%.*]] = add i64 [[TMP71]], [[STEP]]
+; VF16UF1-NEXT: [[TMP72:%.*]] = add i64 [[TMP71]], [[TMP1]]
; VF16UF1-NEXT: [[TMP73:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP72]]
; VF16UF1-NEXT: store i8 0, ptr [[TMP73]], align 1
; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE26]]
@@ -1166,9 +1163,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF16UF1-NEXT: [[TMP74:%.*]] = extractelement <16 x i1> [[TMP3]], i32 14
; VF16UF1-NEXT: br i1 [[TMP74]], label %[[PRED_STORE_IF27:.*]], label %[[PRED_STORE_CONTINUE28:.*]]
; VF16UF1: [[PRED_STORE_IF27]]:
-; VF16UF1-NEXT: [[TMP75:%.*]] = mul i64 14, [[STEP]]
+; VF16UF1-NEXT: [[TMP75:%.*]] = mul i64 14, [[TMP1]]
; VF16UF1-NEXT: [[TMP76:%.*]] = add i64 0, [[TMP75]]
-; VF16UF1-NEXT: [[TMP77:%.*]] = add i64 [[TMP76]], [[STEP]]
+; VF16UF1-NEXT: [[TMP77:%.*]] = add i64 [[TMP76]], [[TMP1]]
; VF16UF1-NEXT: [[TMP78:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP77]]
; VF16UF1-NEXT: store i8 0, ptr [[TMP78]], align 1
; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE28]]
@@ -1176,9 +1173,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF16UF1-NEXT: [[TMP79:%.*]] = extractelement <16 x i1> [[TMP3]], i32 15
; VF16UF1-NEXT: br i1 [[TMP79]], label %[[PRED_STORE_IF29:.*]], label %[[PRED_STORE_CONTINUE30:.*]]
; VF16UF1: [[PRED_STORE_IF29]]:
-; VF16UF1-NEXT: [[TMP80:%.*]] = mul i64 15, [[STEP]]
+; VF16UF1-NEXT: [[TMP80:%.*]] = mul i64 15, [[TMP1]]
; VF16UF1-NEXT: [[TMP81:%.*]] = add i64 0, [[TMP80]]
-; VF16UF1-NEXT: [[TMP82:%.*]] = add i64 [[TMP81]], [[STEP]]
+; VF16UF1-NEXT: [[TMP82:%.*]] = add i64 [[TMP81]], [[TMP1]]
; VF16UF1-NEXT: [[TMP83:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP82]]
; VF16UF1-NEXT: store i8 0, ptr [[TMP83]], align 1
; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE30]]
diff --git a/llvm/test/Transforms/LoopVectorize/version-stride-with-integer-casts.ll b/llvm/test/Transforms/LoopVectorize/version-stride-with-integer-casts.ll
index 28739471eac2f..76d5c5299686e 100644
--- a/llvm/test/Transforms/LoopVectorize/version-stride-with-integer-casts.ll
+++ b/llvm/test/Transforms/LoopVectorize/version-stride-with-integer-casts.ll
@@ -417,11 +417,11 @@ define void @zext_of_i1_stride(i1 %g, ptr %dst) mustprogress {
; CHECK: vector.ph:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP1]], 4
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP1]], [[N_MOD_VF]]
-; CHECK-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], [[G_64]]
+; CHECK-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], [[TMP1]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], [[G_64]]
+; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], [[TMP1]]
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, ptr [[DST]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: store <4 x i16> splat (i16 1), ptr [[TMP4]], align 2
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
@@ -479,7 +479,7 @@ define void @sext_of_i1_stride(i1 %g, ptr %dst) mustprogress {
; CHECK: vector.ph:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 4
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
-; CHECK-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], [[G_64]]
+; CHECK-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], [[TMP2]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[DST]], i32 0
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll
index d85638733211c..54b1e060720e8 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll
@@ -79,12 +79,9 @@ define void @test_tc_less_than_16(ptr %A, i64 %N) {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: vp<[[VPTR2:%.]]> = vector-pointer ir<%A>, ir<1>
; CHECK-NEXT: WIDEN ir<%l> = load ir<%A>
-; CHECK-NEXT: WIDEN ir<%l>.1 = load vp<[[VPTR2]]>
; CHECK-NEXT: WIDEN ir<%add> = add nsw ir<%l>, ir<10>
-; CHECK-NEXT: WIDEN ir<%add>.1 = add nsw ir<%l>.1, ir<10>
-; CHECK-NEXT: vp<[[VPTR4:%.+]]> = vector-pointer ir<%A>, ir<1>
; CHECK-NEXT: WIDEN store ir<%A>, ir<%add>
-; CHECK-NEXT: WIDEN store vp<[[VPTR4]]>, ir<%add>.1
+; CHECK-NEXT: WIDEN store vp<[[VPTR3:%.*]]>, ir<%add>
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/indvars-vectorization.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/indvars-vectorization.ll
index b056f44a6c469..54d00d54dce26 100644
--- a/llvm/test/Transforms/PhaseOrdering/AArch64/indvars-vectorization.ll
+++ b/llvm/test/Transforms/PhaseOrdering/AArch64/indvars-vectorization.ll
@@ -50,12 +50,12 @@ define void @s172(i32 noundef %xa, i32 noundef %xb, ptr noundef %a, ptr noundef
; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[FOR_BODY_PREHEADER13]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP8]], -8
-; CHECK-NEXT: [[TMP18:%.*]] = mul nuw i64 [[N_VEC]], [[TMP1]]
+; CHECK-NEXT: [[TMP18:%.*]] = mul i64 [[N_VEC]], [[TMP8]]
; CHECK-NEXT: [[IND_END:%.*]] = add i64 [[TMP18]], [[TMP0]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP19:%.*]] = mul nuw i64 [[INDEX]], [[TMP1]]
+; CHECK-NEXT: [[TMP19:%.*]] = mul i64 [[INDEX]], [[TMP8]]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[TMP19]], [[TMP0]]
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP20]], i64 16
>From 9e4df6ca219b25b9858056eaf2c7f79fb098ab7b Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Sun, 3 Aug 2025 23:10:25 +0100
Subject: [PATCH 3/3] [VPlan] Fix some bugs
---
llvm/include/llvm/IR/FMF.h | 7 +
llvm/include/llvm/IR/GEPNoWrapFlags.h | 8 +
llvm/lib/Transforms/Vectorize/VPlan.cpp | 25 ++-
llvm/lib/Transforms/Vectorize/VPlan.h | 34 ++--
.../lib/Transforms/Vectorize/VPlanRecipes.cpp | 23 +++
llvm/lib/Transforms/Vectorize/VPlanValue.h | 5 +-
.../extractvalue-no-scalarization-required.ll | 105 ++++-------
.../LoopVectorize/AArch64/induction-costs.ll | 3 +-
.../AArch64/sve-vscale-based-trip-counts.ll | 40 ++---
.../AArch64/sve-widen-extractvalue.ll | 56 ++----
.../LoopVectorize/X86/induction-costs.ll | 3 +-
.../LoopVectorize/X86/uniform_load.ll | 12 +-
...oop-backedge-elimination-branch-weights.ll | 4 +-
.../vector-loop-backedge-elimination.ll | 164 +++++++++---------
.../version-stride-with-integer-casts.ll | 6 +-
.../vplan-printing-before-execute.ll | 4 +-
.../AArch64/indvars-vectorization.ll | 4 +-
17 files changed, 245 insertions(+), 258 deletions(-)
diff --git a/llvm/include/llvm/IR/FMF.h b/llvm/include/llvm/IR/FMF.h
index 8bd591250a38a..da4d9f564bd5d 100644
--- a/llvm/include/llvm/IR/FMF.h
+++ b/llvm/include/llvm/IR/FMF.h
@@ -13,6 +13,7 @@
#ifndef LLVM_IR_FMF_H
#define LLVM_IR_FMF_H
+#include "llvm/ADT/Hashing.h"
#include "llvm/Support/Compiler.h"
namespace llvm {
@@ -121,6 +122,9 @@ class FastMathFlags {
const unsigned ValueMask = NoNaNs | NoInfs | NoSignedZeros;
return FastMathFlags(ValueMask & (LHS.Flags | RHS.Flags));
}
+
+ // Hashing.
+ friend hash_code hash_value(const FastMathFlags &FMF);
};
inline FastMathFlags operator|(FastMathFlags LHS, FastMathFlags RHS) {
@@ -138,6 +142,9 @@ inline raw_ostream &operator<<(raw_ostream &O, FastMathFlags FMF) {
return O;
}
+inline hash_code hash_value(const FastMathFlags &FMF) {
+ return hash_value(FMF.Flags);
+}
} // end namespace llvm
#endif // LLVM_IR_FMF_H
diff --git a/llvm/include/llvm/IR/GEPNoWrapFlags.h b/llvm/include/llvm/IR/GEPNoWrapFlags.h
index 4e6ab0d88bfcf..f0f85f66aea13 100644
--- a/llvm/include/llvm/IR/GEPNoWrapFlags.h
+++ b/llvm/include/llvm/IR/GEPNoWrapFlags.h
@@ -13,6 +13,7 @@
#ifndef LLVM_IR_GEPNOWRAPFLAGS_H
#define LLVM_IR_GEPNOWRAPFLAGS_H
+#include "llvm/ADT/Hashing.h"
#include <assert.h>
namespace llvm {
@@ -101,8 +102,15 @@ class GEPNoWrapFlags {
Flags |= Other.Flags;
return *this;
}
+
+ // Hashing.
+ friend hash_code hash_value(const GEPNoWrapFlags &NW);
};
+inline hash_code hash_value(const GEPNoWrapFlags &NW) {
+ return hash_value(NW.Flags);
+}
+
} // end namespace llvm
#endif // LLVM_IR_GEPNOWRAPFLAGS_H
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 2da5ab13f7b3d..2b67ee8ae5406 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -127,13 +127,19 @@ bool VPValue::isIdenticalTo(const VPValue *Other) const {
hasDefiningRecipe() != Other->hasDefiningRecipe() ||
!getUnderlyingValue() != !Other->getUnderlyingValue())
return false;
- Instruction *I = dyn_cast_or_null<Instruction>(getUnderlyingValue());
- Instruction *OtherI =
- dyn_cast_or_null<Instruction>(Other->getUnderlyingValue());
+
+ auto *I = dyn_cast_or_null<Instruction>(getUnderlyingValue());
+ auto *OtherI = dyn_cast_or_null<Instruction>(Other->getUnderlyingValue());
if (I && OtherI)
- return I->getOpcode() == OtherI->getOpcode() &&
- equal(I->operand_values(), OtherI->operand_values());
+ return I->isIdenticalTo(OtherI);
+
if (hasDefiningRecipe()) {
+ // Special case for matching flags on instruction.
+ auto *VPI = dyn_cast<VPInstruction>(this);
+ auto *OtherVPI = dyn_cast<VPInstruction>(Other);
+ if (VPI && OtherVPI && VPI->hashFlags() != OtherVPI->hashFlags())
+ return false;
+
const VPRecipeBase *DefL = getDefiningRecipe();
const VPRecipeBase *DefR = Other->getDefiningRecipe();
return vputils::getOpcode(*DefL) == vputils::getOpcode(*DefR) &&
@@ -144,11 +150,16 @@ bool VPValue::isIdenticalTo(const VPValue *Other) const {
hash_code llvm::hash_value(const VPValue &V) {
if (Instruction *I = dyn_cast_or_null<Instruction>(V.getUnderlyingValue()))
- return hash_combine(I->getOpcode(),
+ return hash_combine(I->getOpcode(), I->getRawSubclassOptionalData(),
hash_combine_range(I->operand_values()));
+
if (V.hasDefiningRecipe()) {
+ hash_code Flags;
+ if (auto *VPI = dyn_cast<VPInstruction>(&V))
+ Flags = VPI->hashFlags();
+
const VPRecipeBase *Def = V.getDefiningRecipe();
- return hash_combine(vputils::getOpcode(*Def),
+ return hash_combine(vputils::getOpcode(*Def), Flags,
hash_combine_range(Def->operands()));
}
return hash_combine(V.getVPValueID(), V.getUnderlyingValue());
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index a5de5933d5ff1..c4d84514e8fbd 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -594,18 +594,6 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
/// Class to record and manage LLVM IR flags.
class VPIRFlags {
- enum class OperationType : unsigned char {
- Cmp,
- OverflowingBinOp,
- Trunc,
- DisjointOp,
- PossiblyExactOp,
- GEPOp,
- FPMathOp,
- NonNegOp,
- Other
- };
-
public:
struct WrapFlagsTy {
char HasNUW : 1;
@@ -647,6 +635,19 @@ class VPIRFlags {
LLVM_ABI_FOR_TEST FastMathFlagsTy(const FastMathFlags &FMF);
};
+protected:
+ enum class OperationType : unsigned char {
+ Cmp,
+ OverflowingBinOp,
+ Trunc,
+ DisjointOp,
+ PossiblyExactOp,
+ GEPOp,
+ FPMathOp,
+ NonNegOp,
+ Other
+ };
+
OperationType OpType;
union {
@@ -819,6 +820,12 @@ class VPIRFlags {
return NonNegFlags.NonNeg;
}
+ bool isExact() const {
+ assert(OpType == OperationType::PossiblyExactOp &&
+ "recipe doesn't have a EXACT flag");
+ return ExactFlags.IsExact;
+ }
+
bool hasNoUnsignedWrap() const {
switch (OpType) {
case OperationType::OverflowingBinOp:
@@ -1134,6 +1141,9 @@ class LLVM_ABI_FOR_TEST VPInstruction : public VPRecipeWithIRFlags,
/// result is also a single scalar.
bool isSingleScalar() const;
+ // Hash method for the VPIRFlags flags.
+ hash_code hashFlags() const;
+
/// Returns the symbolic name assigned to the VPInstruction.
StringRef getName() const { return Name; }
};
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 11b4677ec102e..63e2385c63040 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -1802,6 +1802,29 @@ VPIRFlags::FastMathFlagsTy::FastMathFlagsTy(const FastMathFlags &FMF) {
ApproxFunc = FMF.approxFunc();
}
+hash_code VPInstruction::hashFlags() const {
+ switch (OpType) {
+ case OperationType::OverflowingBinOp:
+ case OperationType::Trunc:
+ return hash_combine(hasNoSignedWrap(), hasNoUnsignedWrap());
+ case OperationType::DisjointOp:
+ return hash_value(isDisjoint());
+ case OperationType::PossiblyExactOp:
+ return hash_value(isExact());
+ case OperationType::GEPOp:
+ return hash_value(getGEPNoWrapFlags());
+ case OperationType::FPMathOp:
+ return hash_value(getFastMathFlags());
+ case OperationType::NonNegOp:
+ return hash_value(isNonNeg());
+ case OperationType::Cmp:
+ return hash_value(getPredicate());
+ case OperationType::Other:
+ return {};
+ }
+ llvm_unreachable("Unknown OperationType enum");
+}
+
#if !defined(NDEBUG)
bool VPIRFlags::flagsValidForOpcode(unsigned Opcode) const {
switch (OpType) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h
index 05bb8a385fae6..24e44dab9353a 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanValue.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h
@@ -190,9 +190,8 @@ class LLVM_ABI_FOR_TEST VPValue {
bool isIdenticalTo(const VPValue *Other) const;
};
-// Hash method so VPValue can be de-duplicated in certain
-// contexts.
-hash_code hash_value(const VPValue &Arg);
+// Hash method for VPValue.
+hash_code hash_value(const VPValue &V);
typedef DenseMap<Value *, VPValue *> Value2VPValueTy;
typedef DenseMap<VPValue *, Value *> VPValue2ValueTy;
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll b/llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll
index bbdab4856273a..19f2a363a733b 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll
@@ -1,4 +1,3 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5
; REQUIRES: asserts
; RUN: opt -passes=loop-vectorize -mtriple=arm64-apple-ios %s -S -debug -disable-output 2>&1 | FileCheck --check-prefix=CM %s
@@ -23,42 +22,23 @@
; Check that the extractvalue operands are actually free in vector code.
+; FORCED: [[E1:%.+]] = extractvalue { i64, i64 } %sv, 0
+; FORCED-NEXT: %broadcast.splatinsert = insertelement <2 x i64> poison, i64 [[E1]], i64 0
+; FORCED-NEXT: %broadcast.splat = shufflevector <2 x i64> %broadcast.splatinsert, <2 x i64> poison, <2 x i32> zeroinitializer
+; FORCED-NEXT: [[E2:%.+]] = extractvalue { i64, i64 } %sv, 1
+; FORCED-NEXT: %broadcast.splatinsert1 = insertelement <2 x i64> poison, i64 [[E2]], i64 0
+; FORCED-NEXT: %broadcast.splat2 = shufflevector <2 x i64> %broadcast.splatinsert1, <2 x i64> poison, <2 x i32> zeroinitializer
+; FORCED-NEXT: [[ADD:%.+]] = add <2 x i64> %broadcast.splat, %broadcast.splat2
+
+; FORCED-LABEL: vector.body: ; preds = %vector.body, %vector.ph
+; FORCED-NEXT: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+; FORCED-NEXT: [[GEP:%.+]] = getelementptr i64, ptr %dst, i32 %index
+; FORCED-NEXT: store <2 x i64> [[ADD]], ptr [[GEP]], align 4
+; FORCED-NEXT: %index.next = add nuw i32 %index, 2
+; FORCED-NEXT: [[C:%.+]] = icmp eq i32 %index.next, 1000
+; FORCED-NEXT: br i1 [[C]], label %middle.block, label %vector.body
+
define void @test1(ptr %dst, {i64, i64} %sv) {
-; FORCED-LABEL: define void @test1(
-; FORCED-SAME: ptr [[DST:%.*]], { i64, i64 } [[SV:%.*]]) {
-; FORCED-NEXT: [[ENTRY:.*]]:
-; FORCED-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
-; FORCED: [[VECTOR_PH]]:
-; FORCED-NEXT: [[TMP0:%.*]] = extractvalue { i64, i64 } [[SV]], 0
-; FORCED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP0]], i64 0
-; FORCED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
-; FORCED-NEXT: [[TMP1:%.*]] = add <2 x i64> [[BROADCAST_SPLAT]], [[BROADCAST_SPLAT]]
-; FORCED-NEXT: br label %[[VECTOR_BODY:.*]]
-; FORCED: [[VECTOR_BODY]]:
-; FORCED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; FORCED-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[DST]], i32 [[INDEX]]
-; FORCED-NEXT: store <2 x i64> [[TMP1]], ptr [[TMP2]], align 4
-; FORCED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
-; FORCED-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000
-; FORCED-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
-; FORCED: [[MIDDLE_BLOCK]]:
-; FORCED-NEXT: br label %[[EXIT:.*]]
-; FORCED: [[SCALAR_PH]]:
-; FORCED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, %[[ENTRY]] ]
-; FORCED-NEXT: br label %[[LOOP_BODY:.*]]
-; FORCED: [[LOOP_BODY]]:
-; FORCED-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_BODY]] ]
-; FORCED-NEXT: [[A:%.*]] = extractvalue { i64, i64 } [[SV]], 0
-; FORCED-NEXT: [[B:%.*]] = extractvalue { i64, i64 } [[SV]], 1
-; FORCED-NEXT: [[ADDR:%.*]] = getelementptr i64, ptr [[DST]], i32 [[IV]]
-; FORCED-NEXT: [[ADD:%.*]] = add i64 [[A]], [[B]]
-; FORCED-NEXT: store i64 [[ADD]], ptr [[ADDR]], align 4
-; FORCED-NEXT: [[IV_NEXT]] = add nsw i32 [[IV]], 1
-; FORCED-NEXT: [[COND:%.*]] = icmp ne i32 [[IV_NEXT]], 1000
-; FORCED-NEXT: br i1 [[COND]], label %[[LOOP_BODY]], label %[[EXIT]], !llvm.loop [[LOOP3:![0-9]+]]
-; FORCED: [[EXIT]]:
-; FORCED-NEXT: ret void
-;
entry:
br label %loop.body
@@ -90,42 +70,25 @@ declare float @powf(float, float) readnone nounwind
; CM: LV: Scalar loop costs: 14.
+; FORCED-LABEL: define void @test_getVectorCallCost
+
+; FORCED: [[E1:%.+]] = extractvalue { float, float } %sv, 0
+; FORCED-NEXT: %broadcast.splatinsert = insertelement <2 x float> poison, float [[E1]], i64 0
+; FORCED-NEXT: %broadcast.splat = shufflevector <2 x float> %broadcast.splatinsert, <2 x float> poison, <2 x i32> zeroinitializer
+; FORCED-NEXT: [[E2:%.+]] = extractvalue { float, float } %sv, 1
+; FORCED-NEXT: %broadcast.splatinsert1 = insertelement <2 x float> poison, float [[E2]], i64 0
+; FORCED-NEXT: %broadcast.splat2 = shufflevector <2 x float> %broadcast.splatinsert1, <2 x float> poison, <2 x i32> zeroinitializer
+
+; FORCED-LABEL: vector.body: ; preds = %vector.body, %vector.ph
+; FORCED-NEXT: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+; FORCED-NEXT: [[GEP1:%.+]] = getelementptr float, ptr %dst, i32 %index
+; FORCED-NEXT: [[POW:%.+]] = call <2 x float> @llvm.pow.v2f32(<2 x float> %broadcast.splat, <2 x float> %broadcast.splat2)
+; FORCED-NEXT: store <2 x float> [[POW]], ptr [[GEP1]], align 4
+; FORCED-NEXT: %index.next = add nuw i32 %index, 2
+; FORCED-NEXT: [[C:%.+]] = icmp eq i32 %index.next, 1000
+; FORCED-NEXT: br i1 [[C]], label %middle.block, label %vector.body
+
define void @test_getVectorCallCost(ptr %dst, {float, float} %sv) {
-; FORCED-LABEL: define void @test_getVectorCallCost(
-; FORCED-SAME: ptr [[DST:%.*]], { float, float } [[SV:%.*]]) {
-; FORCED-NEXT: [[ENTRY:.*]]:
-; FORCED-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
-; FORCED: [[VECTOR_PH]]:
-; FORCED-NEXT: [[TMP0:%.*]] = extractvalue { float, float } [[SV]], 0
-; FORCED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[TMP0]], i64 0
-; FORCED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer
-; FORCED-NEXT: br label %[[VECTOR_BODY:.*]]
-; FORCED: [[VECTOR_BODY]]:
-; FORCED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; FORCED-NEXT: [[TMP1:%.*]] = getelementptr float, ptr [[DST]], i32 [[INDEX]]
-; FORCED-NEXT: [[TMP2:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> [[BROADCAST_SPLAT]], <2 x float> [[BROADCAST_SPLAT]])
-; FORCED-NEXT: store <2 x float> [[TMP2]], ptr [[TMP1]], align 4
-; FORCED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
-; FORCED-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000
-; FORCED-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
-; FORCED: [[MIDDLE_BLOCK]]:
-; FORCED-NEXT: br label %[[EXIT:.*]]
-; FORCED: [[SCALAR_PH]]:
-; FORCED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, %[[ENTRY]] ]
-; FORCED-NEXT: br label %[[LOOP_BODY:.*]]
-; FORCED: [[LOOP_BODY]]:
-; FORCED-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_BODY]] ]
-; FORCED-NEXT: [[A:%.*]] = extractvalue { float, float } [[SV]], 0
-; FORCED-NEXT: [[B:%.*]] = extractvalue { float, float } [[SV]], 1
-; FORCED-NEXT: [[ADDR:%.*]] = getelementptr float, ptr [[DST]], i32 [[IV]]
-; FORCED-NEXT: [[P:%.*]] = call float @powf(float [[A]], float [[B]])
-; FORCED-NEXT: store float [[P]], ptr [[ADDR]], align 4
-; FORCED-NEXT: [[IV_NEXT]] = add nsw i32 [[IV]], 1
-; FORCED-NEXT: [[COND:%.*]] = icmp ne i32 [[IV_NEXT]], 1000
-; FORCED-NEXT: br i1 [[COND]], label %[[LOOP_BODY]], label %[[EXIT]], !llvm.loop [[LOOP5:![0-9]+]]
-; FORCED: [[EXIT]]:
-; FORCED-NEXT: ret void
-;
entry:
br label %loop.body
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll
index 26441518bc683..aa2ec2de14c29 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll
@@ -294,6 +294,7 @@ define i64 @test_ptr_ivs_and_widened_ivs(ptr %src, i32 %N) {
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[N_VEC]], 4
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP3]]
+; CHECK-NEXT: [[IND_END1:%.*]] = trunc i64 [[N_VEC]] to i32
; CHECK-NEXT: [[IND_END3:%.*]] = trunc i64 [[N_VEC]] to i32
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
@@ -320,7 +321,7 @@ define i64 @test_ptr_ivs_and_widened_ivs(ptr %src, i32 %N) {
; CHECK: scalar.ph:
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[SRC]], [[ENTRY]] ]
-; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i32 [ [[IND_END3]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i32 [ [[IND_END1]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
; CHECK-NEXT: [[BC_RESUME_VAL4:%.*]] = phi i32 [ [[IND_END3]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vscale-based-trip-counts.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vscale-based-trip-counts.ll
index b0740cb315b01..075c9bbc4a2f2 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vscale-based-trip-counts.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vscale-based-trip-counts.ll
@@ -73,13 +73,13 @@ define void @vscale_mul_8(ptr noalias noundef readonly captures(none) %a, ptr n
; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 8
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[A]], align 4
-; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP15:%.*]] = mul nuw i64 [[TMP14]], 4
-; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[TMP15]]
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <vscale x 4 x float>, ptr [[B]], align 4
; CHECK-NEXT: [[TMP17:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD]], [[WIDE_LOAD2]]
+; CHECK-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP21:%.*]] = mul nuw i64 [[TMP20]], 4
+; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[TMP21]]
; CHECK-NEXT: store <vscale x 4 x float> [[TMP17]], ptr [[B]], align 4
-; CHECK-NEXT: store <vscale x 4 x float> [[TMP17]], ptr [[TMP16]], align 4
+; CHECK-NEXT: store <vscale x 4 x float> [[TMP17]], ptr [[TMP22]], align 4
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[MUL1]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_COND_CLEANUP:.*]], label %[[FOR_BODY:.*]]
; CHECK: [[FOR_COND_CLEANUP]]:
@@ -221,11 +221,11 @@ define void @vscale_mul_31(ptr noalias noundef readonly captures(none) %a, ptr n
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 4 x float>, ptr [[TMP16]], align 4
; CHECK-NEXT: [[TMP17:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD]], [[WIDE_LOAD2]]
; CHECK-NEXT: [[TMP18:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD1]], [[WIDE_LOAD3]]
-; CHECK-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP21:%.*]] = mul nuw i64 [[TMP20]], 4
-; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw float, ptr [[TMP12]], i64 [[TMP21]]
+; CHECK-NEXT: [[TMP19:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP20:%.*]] = mul nuw i64 [[TMP19]], 4
+; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw float, ptr [[TMP12]], i64 [[TMP20]]
; CHECK-NEXT: store <vscale x 4 x float> [[TMP17]], ptr [[TMP12]], align 4
-; CHECK-NEXT: store <vscale x 4 x float> [[TMP18]], ptr [[TMP19]], align 4
+; CHECK-NEXT: store <vscale x 4 x float> [[TMP18]], ptr [[TMP21]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]]
; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
@@ -304,11 +304,11 @@ define void @vscale_mul_64(ptr noalias noundef readonly captures(none) %a, ptr n
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 4 x float>, ptr [[TMP16]], align 4
; CHECK-NEXT: [[TMP17:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD]], [[WIDE_LOAD2]]
; CHECK-NEXT: [[TMP18:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD1]], [[WIDE_LOAD3]]
-; CHECK-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP21:%.*]] = mul nuw i64 [[TMP20]], 4
-; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw float, ptr [[TMP12]], i64 [[TMP21]]
+; CHECK-NEXT: [[TMP19:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP20:%.*]] = mul nuw i64 [[TMP19]], 4
+; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw float, ptr [[TMP12]], i64 [[TMP20]]
; CHECK-NEXT: store <vscale x 4 x float> [[TMP17]], ptr [[TMP12]], align 4
-; CHECK-NEXT: store <vscale x 4 x float> [[TMP18]], ptr [[TMP19]], align 4
+; CHECK-NEXT: store <vscale x 4 x float> [[TMP18]], ptr [[TMP21]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]]
; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
@@ -389,11 +389,11 @@ define void @trip_count_with_overflow(ptr noalias noundef readonly captures(none
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 4 x float>, ptr [[TMP17]], align 4
; CHECK-NEXT: [[TMP18:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD]], [[WIDE_LOAD2]]
; CHECK-NEXT: [[TMP19:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD1]], [[WIDE_LOAD3]]
-; CHECK-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP22:%.*]] = mul nuw i64 [[TMP21]], 4
-; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw float, ptr [[TMP13]], i64 [[TMP22]]
+; CHECK-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP21:%.*]] = mul nuw i64 [[TMP20]], 4
+; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw float, ptr [[TMP13]], i64 [[TMP21]]
; CHECK-NEXT: store <vscale x 4 x float> [[TMP18]], ptr [[TMP13]], align 4
-; CHECK-NEXT: store <vscale x 4 x float> [[TMP19]], ptr [[TMP20]], align 4
+; CHECK-NEXT: store <vscale x 4 x float> [[TMP19]], ptr [[TMP22]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]]
; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
@@ -470,11 +470,11 @@ define void @trip_count_too_big_for_element_count(ptr noalias noundef readonly c
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 4 x float>, ptr [[TMP17]], align 4
; CHECK-NEXT: [[TMP18:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD]], [[WIDE_LOAD2]]
; CHECK-NEXT: [[TMP19:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD1]], [[WIDE_LOAD3]]
-; CHECK-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP22:%.*]] = mul nuw i64 [[TMP21]], 4
-; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw float, ptr [[TMP13]], i64 [[TMP22]]
+; CHECK-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP21:%.*]] = mul nuw i64 [[TMP20]], 4
+; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw float, ptr [[TMP13]], i64 [[TMP21]]
; CHECK-NEXT: store <vscale x 4 x float> [[TMP18]], ptr [[TMP13]], align 4
-; CHECK-NEXT: store <vscale x 4 x float> [[TMP19]], ptr [[TMP20]], align 4
+; CHECK-NEXT: store <vscale x 4 x float> [[TMP19]], ptr [[TMP22]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]]
; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-extractvalue.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-extractvalue.ll
index 3603afa11ba67..473fabfc9fecc 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-extractvalue.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-extractvalue.ll
@@ -1,54 +1,17 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5
; RUN: opt -S -passes=loop-vectorize < %s | FileCheck %s
target triple = "aarch64-unknown-linux-gnu"
define void @widen_extractvalue(ptr %dst, {i64, i64} %sv) #0 {
-; CHECK-LABEL: define void @widen_extractvalue(
-; CHECK-SAME: ptr [[DST:%.*]], { i64, i64 } [[SV:%.*]]) #[[ATTR0:[0-9]+]] {
-; CHECK-NEXT: [[ENTRY:.*]]:
-; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.vscale.i32()
-; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i32 [[TMP0]], 2
-; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 1000, [[TMP1]]
-; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
-; CHECK: [[VECTOR_PH]]:
-; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32()
-; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i32 [[TMP2]], 2
-; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 1000, [[TMP3]]
-; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 1000, [[N_MOD_VF]]
-; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vscale.i32()
-; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i32 [[TMP4]], 2
-; CHECK-NEXT: [[EXTRACT0:%.*]] = extractvalue { i64, i64 } [[SV]], 0
-; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[EXTRACT0]], i64 0
-; CHECK-NEXT: [[DOTSPLAT2:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT1]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP7:%.*]] = add <vscale x 2 x i64> [[DOTSPLAT2]], [[DOTSPLAT2]]
-; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
-; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[DST]], i32 [[INDEX]]
-; CHECK-NEXT: store <vscale x 2 x i64> [[TMP7]], ptr [[TMP8]], align 8
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP5]]
-; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
-; CHECK: [[MIDDLE_BLOCK]]:
-; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 1000, [[N_VEC]]
-; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
-; CHECK: [[SCALAR_PH]]:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
-; CHECK-NEXT: br label %[[LOOP_BODY:.*]]
-; CHECK: [[LOOP_BODY]]:
-; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_BODY]] ]
-; CHECK-NEXT: [[A:%.*]] = extractvalue { i64, i64 } [[SV]], 0
-; CHECK-NEXT: [[B:%.*]] = extractvalue { i64, i64 } [[SV]], 1
-; CHECK-NEXT: [[ADDR:%.*]] = getelementptr i64, ptr [[DST]], i32 [[IV]]
-; CHECK-NEXT: [[ADD:%.*]] = add i64 [[A]], [[B]]
-; CHECK-NEXT: store i64 [[ADD]], ptr [[ADDR]], align 8
-; CHECK-NEXT: [[IV_NEXT]] = add nsw i32 [[IV]], 1
-; CHECK-NEXT: [[COND:%.*]] = icmp ne i32 [[IV_NEXT]], 1000
-; CHECK-NEXT: br i1 [[COND]], label %[[LOOP_BODY]], label %[[EXIT]], !llvm.loop [[LOOP4:![0-9]+]]
-; CHECK: [[EXIT]]:
-; CHECK-NEXT: ret void
-;
+; CHECK-LABEL: @widen_extractvalue(
+; CHECK: [[EXTRACT0:%.*]] = extractvalue { i64, i64 } [[SV:%.*]], 0
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[EXTRACT0]], i64 0
+; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+; CHECK-NEXT: [[EXTRACT1:%.*]] = extractvalue { i64, i64 } [[SV]], 1
+; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[EXTRACT1]], i64 0
+; CHECK-NEXT: [[DOTSPLAT2:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT1]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+; CHECK: [[ADD:%.*]] = add <vscale x 2 x i64> [[DOTSPLAT]], [[DOTSPLAT2]]
+; CHECK: vector.body:
entry:
br label %loop.body
@@ -75,3 +38,4 @@ attributes #0 = { "target-features"="+sve" }
!3 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}
!4 = !{!"llvm.loop.interleave.count", i32 1}
!5 = !{!"llvm.loop.vectorize.enable", i1 true}
+
diff --git a/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll b/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll
index 35a52cf7387c5..fcd94f444e8a5 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll
@@ -250,6 +250,7 @@ define void @multiple_pointer_ivs_with_scalar_uses_only(ptr %A, ptr %B) #0 {
; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[A]], i64 8589934368
+; CHECK-NEXT: [[IND_END3:%.*]] = getelementptr i8, ptr [[B]], i64 4294967184
; CHECK-NEXT: [[IND_END5:%.*]] = getelementptr i8, ptr [[B]], i64 4294967184
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
@@ -339,7 +340,7 @@ define void @multiple_pointer_ivs_with_scalar_uses_only(ptr %A, ptr %B) #0 {
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ -12, [[MIDDLE_BLOCK]] ], [ 100, [[ENTRY:%.*]] ], [ 100, [[VECTOR_MEMCHECK]] ]
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 2048, [[ENTRY]] ], [ 2048, [[VECTOR_MEMCHECK]] ]
; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[A]], [[ENTRY]] ], [ [[A]], [[VECTOR_MEMCHECK]] ]
-; CHECK-NEXT: [[BC_RESUME_VAL4:%.*]] = phi ptr [ [[IND_END5]], [[MIDDLE_BLOCK]] ], [ [[B]], [[ENTRY]] ], [ [[B]], [[VECTOR_MEMCHECK]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL4:%.*]] = phi ptr [ [[IND_END3]], [[MIDDLE_BLOCK]] ], [ [[B]], [[ENTRY]] ], [ [[B]], [[VECTOR_MEMCHECK]] ]
; CHECK-NEXT: [[BC_RESUME_VAL6:%.*]] = phi ptr [ [[IND_END5]], [[MIDDLE_BLOCK]] ], [ [[B]], [[ENTRY]] ], [ [[B]], [[VECTOR_MEMCHECK]] ]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
diff --git a/llvm/test/Transforms/LoopVectorize/X86/uniform_load.ll b/llvm/test/Transforms/LoopVectorize/X86/uniform_load.ll
index e80fe60c69e2a..9deb19e34b1a6 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/uniform_load.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/uniform_load.ll
@@ -25,15 +25,15 @@ define void @foo(ptr nocapture noalias %A, i64 %N) #0 {
; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr @inc, align 4
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x float> poison, float [[TMP1]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x float> [[BROADCAST_SPLATINSERT]], <8 x float> poison, <8 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[A]], i32 8
-; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[A]], i32 16
-; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A]], i32 24
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x float>, ptr [[A]], align 4
; CHECK-NEXT: [[TMP7:%.*]] = fadd <8 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD]]
+; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[A]], i32 8
+; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[A]], i32 16
+; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[A]], i32 24
; CHECK-NEXT: store <8 x float> [[TMP7]], ptr [[A]], align 4
-; CHECK-NEXT: store <8 x float> [[TMP7]], ptr [[TMP4]], align 4
-; CHECK-NEXT: store <8 x float> [[TMP7]], ptr [[TMP5]], align 4
-; CHECK-NEXT: store <8 x float> [[TMP7]], ptr [[TMP6]], align 4
+; CHECK-NEXT: store <8 x float> [[TMP7]], ptr [[TMP11]], align 4
+; CHECK-NEXT: store <8 x float> [[TMP7]], ptr [[TMP12]], align 4
+; CHECK-NEXT: store <8 x float> [[TMP7]], ptr [[TMP13]], align 4
; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]]
; CHECK: [[MIDDLE_BLOCK]]:
;
diff --git a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-branch-weights.ll b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-branch-weights.ll
index 9b9ea6391f45f..cc9f4f01db915 100644
--- a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-branch-weights.ll
+++ b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-branch-weights.ll
@@ -55,11 +55,11 @@ define void @test_tc_between_8_and_17(ptr %A, i64 range(i64 8, 17) %N) {
; VF8UF2-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 [[N_VEC]]
; VF8UF2-NEXT: br label %[[VECTOR_BODY:.*]]
; VF8UF2: [[VECTOR_BODY]]:
-; VF8UF2-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[A]], i32 8
; VF8UF2-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[A]], align 1
; VF8UF2-NEXT: [[TMP3:%.*]] = add nsw <8 x i8> [[WIDE_LOAD]], splat (i8 10)
+; VF8UF2-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[A]], i32 8
; VF8UF2-NEXT: store <8 x i8> [[TMP3]], ptr [[A]], align 1
-; VF8UF2-NEXT: store <8 x i8> [[TMP3]], ptr [[TMP2]], align 1
+; VF8UF2-NEXT: store <8 x i8> [[TMP3]], ptr [[TMP6]], align 1
; VF8UF2-NEXT: br label %[[MIDDLE_BLOCK:.*]]
; VF8UF2: [[MIDDLE_BLOCK]]:
; VF8UF2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
diff --git a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll
index b87c6d22dd0db..74d79ecf2d298 100644
--- a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll
+++ b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll
@@ -61,11 +61,11 @@ define void @test_tc_less_than_16(ptr %A, i64 %N) {
; VF8UF2-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[N_VEC]]
; VF8UF2-NEXT: br label %[[VECTOR_BODY:.*]]
; VF8UF2: [[VECTOR_BODY]]:
-; VF8UF2-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[A]], i32 8
; VF8UF2-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[A]], align 1
; VF8UF2-NEXT: [[TMP4:%.*]] = add nsw <8 x i8> [[WIDE_LOAD]], splat (i8 10)
+; VF8UF2-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[A]], i32 8
; VF8UF2-NEXT: store <8 x i8> [[TMP4]], ptr [[A]], align 1
-; VF8UF2-NEXT: store <8 x i8> [[TMP4]], ptr [[TMP3]], align 1
+; VF8UF2-NEXT: store <8 x i8> [[TMP4]], ptr [[TMP7]], align 1
; VF8UF2-NEXT: br label %[[MIDDLE_BLOCK:.*]]
; VF8UF2: [[MIDDLE_BLOCK]]:
; VF8UF2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[AND]], [[N_VEC]]
@@ -710,9 +710,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF8UF1-NEXT: [[TMP4:%.*]] = extractelement <8 x i1> [[TMP3]], i32 0
; VF8UF1-NEXT: br i1 [[TMP4]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; VF8UF1: [[PRED_STORE_IF]]:
-; VF8UF1-NEXT: [[TMP5:%.*]] = mul i64 0, [[TMP1]]
+; VF8UF1-NEXT: [[TMP5:%.*]] = mul i64 0, [[STEP]]
; VF8UF1-NEXT: [[TMP6:%.*]] = add i64 0, [[TMP5]]
-; VF8UF1-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], [[TMP1]]
+; VF8UF1-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], [[STEP]]
; VF8UF1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP7]]
; VF8UF1-NEXT: store i8 0, ptr [[TMP8]], align 1
; VF8UF1-NEXT: br label %[[PRED_STORE_CONTINUE]]
@@ -720,9 +720,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF8UF1-NEXT: [[TMP9:%.*]] = extractelement <8 x i1> [[TMP3]], i32 1
; VF8UF1-NEXT: br i1 [[TMP9]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]]
; VF8UF1: [[PRED_STORE_IF1]]:
-; VF8UF1-NEXT: [[TMP10:%.*]] = mul i64 1, [[TMP1]]
+; VF8UF1-NEXT: [[TMP10:%.*]] = mul i64 1, [[STEP]]
; VF8UF1-NEXT: [[TMP11:%.*]] = add i64 0, [[TMP10]]
-; VF8UF1-NEXT: [[TMP12:%.*]] = add i64 [[TMP11]], [[TMP1]]
+; VF8UF1-NEXT: [[TMP12:%.*]] = add i64 [[TMP11]], [[STEP]]
; VF8UF1-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP12]]
; VF8UF1-NEXT: store i8 0, ptr [[TMP13]], align 1
; VF8UF1-NEXT: br label %[[PRED_STORE_CONTINUE2]]
@@ -730,9 +730,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF8UF1-NEXT: [[TMP14:%.*]] = extractelement <8 x i1> [[TMP3]], i32 2
; VF8UF1-NEXT: br i1 [[TMP14]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]]
; VF8UF1: [[PRED_STORE_IF3]]:
-; VF8UF1-NEXT: [[TMP15:%.*]] = mul i64 2, [[TMP1]]
+; VF8UF1-NEXT: [[TMP15:%.*]] = mul i64 2, [[STEP]]
; VF8UF1-NEXT: [[TMP16:%.*]] = add i64 0, [[TMP15]]
-; VF8UF1-NEXT: [[TMP17:%.*]] = add i64 [[TMP16]], [[TMP1]]
+; VF8UF1-NEXT: [[TMP17:%.*]] = add i64 [[TMP16]], [[STEP]]
; VF8UF1-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP17]]
; VF8UF1-NEXT: store i8 0, ptr [[TMP18]], align 1
; VF8UF1-NEXT: br label %[[PRED_STORE_CONTINUE4]]
@@ -740,9 +740,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF8UF1-NEXT: [[TMP19:%.*]] = extractelement <8 x i1> [[TMP3]], i32 3
; VF8UF1-NEXT: br i1 [[TMP19]], label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6:.*]]
; VF8UF1: [[PRED_STORE_IF5]]:
-; VF8UF1-NEXT: [[TMP20:%.*]] = mul i64 3, [[TMP1]]
+; VF8UF1-NEXT: [[TMP20:%.*]] = mul i64 3, [[STEP]]
; VF8UF1-NEXT: [[TMP21:%.*]] = add i64 0, [[TMP20]]
-; VF8UF1-NEXT: [[TMP22:%.*]] = add i64 [[TMP21]], [[TMP1]]
+; VF8UF1-NEXT: [[TMP22:%.*]] = add i64 [[TMP21]], [[STEP]]
; VF8UF1-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP22]]
; VF8UF1-NEXT: store i8 0, ptr [[TMP23]], align 1
; VF8UF1-NEXT: br label %[[PRED_STORE_CONTINUE6]]
@@ -750,9 +750,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF8UF1-NEXT: [[TMP24:%.*]] = extractelement <8 x i1> [[TMP3]], i32 4
; VF8UF1-NEXT: br i1 [[TMP24]], label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]]
; VF8UF1: [[PRED_STORE_IF7]]:
-; VF8UF1-NEXT: [[TMP25:%.*]] = mul i64 4, [[TMP1]]
+; VF8UF1-NEXT: [[TMP25:%.*]] = mul i64 4, [[STEP]]
; VF8UF1-NEXT: [[TMP26:%.*]] = add i64 0, [[TMP25]]
-; VF8UF1-NEXT: [[TMP27:%.*]] = add i64 [[TMP26]], [[TMP1]]
+; VF8UF1-NEXT: [[TMP27:%.*]] = add i64 [[TMP26]], [[STEP]]
; VF8UF1-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP27]]
; VF8UF1-NEXT: store i8 0, ptr [[TMP28]], align 1
; VF8UF1-NEXT: br label %[[PRED_STORE_CONTINUE8]]
@@ -760,9 +760,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF8UF1-NEXT: [[TMP29:%.*]] = extractelement <8 x i1> [[TMP3]], i32 5
; VF8UF1-NEXT: br i1 [[TMP29]], label %[[PRED_STORE_IF9:.*]], label %[[PRED_STORE_CONTINUE10:.*]]
; VF8UF1: [[PRED_STORE_IF9]]:
-; VF8UF1-NEXT: [[TMP30:%.*]] = mul i64 5, [[TMP1]]
+; VF8UF1-NEXT: [[TMP30:%.*]] = mul i64 5, [[STEP]]
; VF8UF1-NEXT: [[TMP31:%.*]] = add i64 0, [[TMP30]]
-; VF8UF1-NEXT: [[TMP32:%.*]] = add i64 [[TMP31]], [[TMP1]]
+; VF8UF1-NEXT: [[TMP32:%.*]] = add i64 [[TMP31]], [[STEP]]
; VF8UF1-NEXT: [[TMP33:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP32]]
; VF8UF1-NEXT: store i8 0, ptr [[TMP33]], align 1
; VF8UF1-NEXT: br label %[[PRED_STORE_CONTINUE10]]
@@ -770,9 +770,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF8UF1-NEXT: [[TMP34:%.*]] = extractelement <8 x i1> [[TMP3]], i32 6
; VF8UF1-NEXT: br i1 [[TMP34]], label %[[PRED_STORE_IF11:.*]], label %[[PRED_STORE_CONTINUE12:.*]]
; VF8UF1: [[PRED_STORE_IF11]]:
-; VF8UF1-NEXT: [[TMP35:%.*]] = mul i64 6, [[TMP1]]
+; VF8UF1-NEXT: [[TMP35:%.*]] = mul i64 6, [[STEP]]
; VF8UF1-NEXT: [[TMP36:%.*]] = add i64 0, [[TMP35]]
-; VF8UF1-NEXT: [[TMP37:%.*]] = add i64 [[TMP36]], [[TMP1]]
+; VF8UF1-NEXT: [[TMP37:%.*]] = add i64 [[TMP36]], [[STEP]]
; VF8UF1-NEXT: [[TMP38:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP37]]
; VF8UF1-NEXT: store i8 0, ptr [[TMP38]], align 1
; VF8UF1-NEXT: br label %[[PRED_STORE_CONTINUE12]]
@@ -780,9 +780,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF8UF1-NEXT: [[TMP39:%.*]] = extractelement <8 x i1> [[TMP3]], i32 7
; VF8UF1-NEXT: br i1 [[TMP39]], label %[[PRED_STORE_IF13:.*]], label %[[PRED_STORE_CONTINUE14:.*]]
; VF8UF1: [[PRED_STORE_IF13]]:
-; VF8UF1-NEXT: [[TMP40:%.*]] = mul i64 7, [[TMP1]]
+; VF8UF1-NEXT: [[TMP40:%.*]] = mul i64 7, [[STEP]]
; VF8UF1-NEXT: [[TMP41:%.*]] = add i64 0, [[TMP40]]
-; VF8UF1-NEXT: [[TMP42:%.*]] = add i64 [[TMP41]], [[TMP1]]
+; VF8UF1-NEXT: [[TMP42:%.*]] = add i64 [[TMP41]], [[STEP]]
; VF8UF1-NEXT: [[TMP43:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP42]]
; VF8UF1-NEXT: store i8 0, ptr [[TMP43]], align 1
; VF8UF1-NEXT: br label %[[PRED_STORE_CONTINUE14]]
@@ -827,9 +827,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF8UF2-NEXT: [[TMP5:%.*]] = extractelement <8 x i1> [[TMP3]], i32 0
; VF8UF2-NEXT: br i1 [[TMP5]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; VF8UF2: [[PRED_STORE_IF]]:
-; VF8UF2-NEXT: [[TMP6:%.*]] = mul i64 0, [[TMP1]]
+; VF8UF2-NEXT: [[TMP6:%.*]] = mul i64 0, [[STEP]]
; VF8UF2-NEXT: [[TMP7:%.*]] = add i64 0, [[TMP6]]
-; VF8UF2-NEXT: [[TMP8:%.*]] = add i64 [[TMP7]], [[TMP1]]
+; VF8UF2-NEXT: [[TMP8:%.*]] = add i64 [[TMP7]], [[STEP]]
; VF8UF2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP8]]
; VF8UF2-NEXT: store i8 0, ptr [[TMP9]], align 1
; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE]]
@@ -837,9 +837,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF8UF2-NEXT: [[TMP10:%.*]] = extractelement <8 x i1> [[TMP3]], i32 1
; VF8UF2-NEXT: br i1 [[TMP10]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]]
; VF8UF2: [[PRED_STORE_IF1]]:
-; VF8UF2-NEXT: [[TMP11:%.*]] = mul i64 1, [[TMP1]]
+; VF8UF2-NEXT: [[TMP11:%.*]] = mul i64 1, [[STEP]]
; VF8UF2-NEXT: [[TMP12:%.*]] = add i64 0, [[TMP11]]
-; VF8UF2-NEXT: [[TMP13:%.*]] = add i64 [[TMP12]], [[TMP1]]
+; VF8UF2-NEXT: [[TMP13:%.*]] = add i64 [[TMP12]], [[STEP]]
; VF8UF2-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP13]]
; VF8UF2-NEXT: store i8 0, ptr [[TMP14]], align 1
; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE2]]
@@ -847,9 +847,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF8UF2-NEXT: [[TMP15:%.*]] = extractelement <8 x i1> [[TMP3]], i32 2
; VF8UF2-NEXT: br i1 [[TMP15]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]]
; VF8UF2: [[PRED_STORE_IF3]]:
-; VF8UF2-NEXT: [[TMP16:%.*]] = mul i64 2, [[TMP1]]
+; VF8UF2-NEXT: [[TMP16:%.*]] = mul i64 2, [[STEP]]
; VF8UF2-NEXT: [[TMP17:%.*]] = add i64 0, [[TMP16]]
-; VF8UF2-NEXT: [[TMP18:%.*]] = add i64 [[TMP17]], [[TMP1]]
+; VF8UF2-NEXT: [[TMP18:%.*]] = add i64 [[TMP17]], [[STEP]]
; VF8UF2-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP18]]
; VF8UF2-NEXT: store i8 0, ptr [[TMP19]], align 1
; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE4]]
@@ -857,9 +857,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF8UF2-NEXT: [[TMP20:%.*]] = extractelement <8 x i1> [[TMP3]], i32 3
; VF8UF2-NEXT: br i1 [[TMP20]], label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6:.*]]
; VF8UF2: [[PRED_STORE_IF5]]:
-; VF8UF2-NEXT: [[TMP21:%.*]] = mul i64 3, [[TMP1]]
+; VF8UF2-NEXT: [[TMP21:%.*]] = mul i64 3, [[STEP]]
; VF8UF2-NEXT: [[TMP22:%.*]] = add i64 0, [[TMP21]]
-; VF8UF2-NEXT: [[TMP23:%.*]] = add i64 [[TMP22]], [[TMP1]]
+; VF8UF2-NEXT: [[TMP23:%.*]] = add i64 [[TMP22]], [[STEP]]
; VF8UF2-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP23]]
; VF8UF2-NEXT: store i8 0, ptr [[TMP24]], align 1
; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE6]]
@@ -867,9 +867,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF8UF2-NEXT: [[TMP25:%.*]] = extractelement <8 x i1> [[TMP3]], i32 4
; VF8UF2-NEXT: br i1 [[TMP25]], label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]]
; VF8UF2: [[PRED_STORE_IF7]]:
-; VF8UF2-NEXT: [[TMP26:%.*]] = mul i64 4, [[TMP1]]
+; VF8UF2-NEXT: [[TMP26:%.*]] = mul i64 4, [[STEP]]
; VF8UF2-NEXT: [[TMP27:%.*]] = add i64 0, [[TMP26]]
-; VF8UF2-NEXT: [[TMP28:%.*]] = add i64 [[TMP27]], [[TMP1]]
+; VF8UF2-NEXT: [[TMP28:%.*]] = add i64 [[TMP27]], [[STEP]]
; VF8UF2-NEXT: [[TMP29:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP28]]
; VF8UF2-NEXT: store i8 0, ptr [[TMP29]], align 1
; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE8]]
@@ -877,9 +877,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF8UF2-NEXT: [[TMP30:%.*]] = extractelement <8 x i1> [[TMP3]], i32 5
; VF8UF2-NEXT: br i1 [[TMP30]], label %[[PRED_STORE_IF9:.*]], label %[[PRED_STORE_CONTINUE10:.*]]
; VF8UF2: [[PRED_STORE_IF9]]:
-; VF8UF2-NEXT: [[TMP31:%.*]] = mul i64 5, [[TMP1]]
+; VF8UF2-NEXT: [[TMP31:%.*]] = mul i64 5, [[STEP]]
; VF8UF2-NEXT: [[TMP32:%.*]] = add i64 0, [[TMP31]]
-; VF8UF2-NEXT: [[TMP33:%.*]] = add i64 [[TMP32]], [[TMP1]]
+; VF8UF2-NEXT: [[TMP33:%.*]] = add i64 [[TMP32]], [[STEP]]
; VF8UF2-NEXT: [[TMP34:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP33]]
; VF8UF2-NEXT: store i8 0, ptr [[TMP34]], align 1
; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE10]]
@@ -887,9 +887,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF8UF2-NEXT: [[TMP35:%.*]] = extractelement <8 x i1> [[TMP3]], i32 6
; VF8UF2-NEXT: br i1 [[TMP35]], label %[[PRED_STORE_IF11:.*]], label %[[PRED_STORE_CONTINUE12:.*]]
; VF8UF2: [[PRED_STORE_IF11]]:
-; VF8UF2-NEXT: [[TMP36:%.*]] = mul i64 6, [[TMP1]]
+; VF8UF2-NEXT: [[TMP36:%.*]] = mul i64 6, [[STEP]]
; VF8UF2-NEXT: [[TMP37:%.*]] = add i64 0, [[TMP36]]
-; VF8UF2-NEXT: [[TMP38:%.*]] = add i64 [[TMP37]], [[TMP1]]
+; VF8UF2-NEXT: [[TMP38:%.*]] = add i64 [[TMP37]], [[STEP]]
; VF8UF2-NEXT: [[TMP39:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP38]]
; VF8UF2-NEXT: store i8 0, ptr [[TMP39]], align 1
; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE12]]
@@ -897,9 +897,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF8UF2-NEXT: [[TMP40:%.*]] = extractelement <8 x i1> [[TMP3]], i32 7
; VF8UF2-NEXT: br i1 [[TMP40]], label %[[PRED_STORE_IF13:.*]], label %[[PRED_STORE_CONTINUE14:.*]]
; VF8UF2: [[PRED_STORE_IF13]]:
-; VF8UF2-NEXT: [[TMP41:%.*]] = mul i64 7, [[TMP1]]
+; VF8UF2-NEXT: [[TMP41:%.*]] = mul i64 7, [[STEP]]
; VF8UF2-NEXT: [[TMP42:%.*]] = add i64 0, [[TMP41]]
-; VF8UF2-NEXT: [[TMP43:%.*]] = add i64 [[TMP42]], [[TMP1]]
+; VF8UF2-NEXT: [[TMP43:%.*]] = add i64 [[TMP42]], [[STEP]]
; VF8UF2-NEXT: [[TMP44:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP43]]
; VF8UF2-NEXT: store i8 0, ptr [[TMP44]], align 1
; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE14]]
@@ -907,9 +907,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF8UF2-NEXT: [[TMP45:%.*]] = extractelement <8 x i1> [[TMP4]], i32 0
; VF8UF2-NEXT: br i1 [[TMP45]], label %[[PRED_STORE_IF15:.*]], label %[[PRED_STORE_CONTINUE16:.*]]
; VF8UF2: [[PRED_STORE_IF15]]:
-; VF8UF2-NEXT: [[TMP46:%.*]] = mul i64 8, [[TMP1]]
+; VF8UF2-NEXT: [[TMP46:%.*]] = mul i64 8, [[STEP]]
; VF8UF2-NEXT: [[TMP47:%.*]] = add i64 0, [[TMP46]]
-; VF8UF2-NEXT: [[TMP48:%.*]] = add i64 [[TMP47]], [[TMP1]]
+; VF8UF2-NEXT: [[TMP48:%.*]] = add i64 [[TMP47]], [[STEP]]
; VF8UF2-NEXT: [[TMP49:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP48]]
; VF8UF2-NEXT: store i8 0, ptr [[TMP49]], align 1
; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE16]]
@@ -917,9 +917,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF8UF2-NEXT: [[TMP50:%.*]] = extractelement <8 x i1> [[TMP4]], i32 1
; VF8UF2-NEXT: br i1 [[TMP50]], label %[[PRED_STORE_IF17:.*]], label %[[PRED_STORE_CONTINUE18:.*]]
; VF8UF2: [[PRED_STORE_IF17]]:
-; VF8UF2-NEXT: [[TMP51:%.*]] = mul i64 9, [[TMP1]]
+; VF8UF2-NEXT: [[TMP51:%.*]] = mul i64 9, [[STEP]]
; VF8UF2-NEXT: [[TMP52:%.*]] = add i64 0, [[TMP51]]
-; VF8UF2-NEXT: [[TMP53:%.*]] = add i64 [[TMP52]], [[TMP1]]
+; VF8UF2-NEXT: [[TMP53:%.*]] = add i64 [[TMP52]], [[STEP]]
; VF8UF2-NEXT: [[TMP54:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP53]]
; VF8UF2-NEXT: store i8 0, ptr [[TMP54]], align 1
; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE18]]
@@ -927,9 +927,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF8UF2-NEXT: [[TMP55:%.*]] = extractelement <8 x i1> [[TMP4]], i32 2
; VF8UF2-NEXT: br i1 [[TMP55]], label %[[PRED_STORE_IF19:.*]], label %[[PRED_STORE_CONTINUE20:.*]]
; VF8UF2: [[PRED_STORE_IF19]]:
-; VF8UF2-NEXT: [[TMP56:%.*]] = mul i64 10, [[TMP1]]
+; VF8UF2-NEXT: [[TMP56:%.*]] = mul i64 10, [[STEP]]
; VF8UF2-NEXT: [[TMP57:%.*]] = add i64 0, [[TMP56]]
-; VF8UF2-NEXT: [[TMP58:%.*]] = add i64 [[TMP57]], [[TMP1]]
+; VF8UF2-NEXT: [[TMP58:%.*]] = add i64 [[TMP57]], [[STEP]]
; VF8UF2-NEXT: [[TMP59:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP58]]
; VF8UF2-NEXT: store i8 0, ptr [[TMP59]], align 1
; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE20]]
@@ -937,9 +937,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF8UF2-NEXT: [[TMP60:%.*]] = extractelement <8 x i1> [[TMP4]], i32 3
; VF8UF2-NEXT: br i1 [[TMP60]], label %[[PRED_STORE_IF21:.*]], label %[[PRED_STORE_CONTINUE22:.*]]
; VF8UF2: [[PRED_STORE_IF21]]:
-; VF8UF2-NEXT: [[TMP61:%.*]] = mul i64 11, [[TMP1]]
+; VF8UF2-NEXT: [[TMP61:%.*]] = mul i64 11, [[STEP]]
; VF8UF2-NEXT: [[TMP62:%.*]] = add i64 0, [[TMP61]]
-; VF8UF2-NEXT: [[TMP63:%.*]] = add i64 [[TMP62]], [[TMP1]]
+; VF8UF2-NEXT: [[TMP63:%.*]] = add i64 [[TMP62]], [[STEP]]
; VF8UF2-NEXT: [[TMP64:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP63]]
; VF8UF2-NEXT: store i8 0, ptr [[TMP64]], align 1
; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE22]]
@@ -947,9 +947,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF8UF2-NEXT: [[TMP65:%.*]] = extractelement <8 x i1> [[TMP4]], i32 4
; VF8UF2-NEXT: br i1 [[TMP65]], label %[[PRED_STORE_IF23:.*]], label %[[PRED_STORE_CONTINUE24:.*]]
; VF8UF2: [[PRED_STORE_IF23]]:
-; VF8UF2-NEXT: [[TMP66:%.*]] = mul i64 12, [[TMP1]]
+; VF8UF2-NEXT: [[TMP66:%.*]] = mul i64 12, [[STEP]]
; VF8UF2-NEXT: [[TMP67:%.*]] = add i64 0, [[TMP66]]
-; VF8UF2-NEXT: [[TMP68:%.*]] = add i64 [[TMP67]], [[TMP1]]
+; VF8UF2-NEXT: [[TMP68:%.*]] = add i64 [[TMP67]], [[STEP]]
; VF8UF2-NEXT: [[TMP69:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP68]]
; VF8UF2-NEXT: store i8 0, ptr [[TMP69]], align 1
; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE24]]
@@ -957,9 +957,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF8UF2-NEXT: [[TMP70:%.*]] = extractelement <8 x i1> [[TMP4]], i32 5
; VF8UF2-NEXT: br i1 [[TMP70]], label %[[PRED_STORE_IF25:.*]], label %[[PRED_STORE_CONTINUE26:.*]]
; VF8UF2: [[PRED_STORE_IF25]]:
-; VF8UF2-NEXT: [[TMP71:%.*]] = mul i64 13, [[TMP1]]
+; VF8UF2-NEXT: [[TMP71:%.*]] = mul i64 13, [[STEP]]
; VF8UF2-NEXT: [[TMP72:%.*]] = add i64 0, [[TMP71]]
-; VF8UF2-NEXT: [[TMP73:%.*]] = add i64 [[TMP72]], [[TMP1]]
+; VF8UF2-NEXT: [[TMP73:%.*]] = add i64 [[TMP72]], [[STEP]]
; VF8UF2-NEXT: [[TMP74:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP73]]
; VF8UF2-NEXT: store i8 0, ptr [[TMP74]], align 1
; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE26]]
@@ -967,9 +967,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF8UF2-NEXT: [[TMP75:%.*]] = extractelement <8 x i1> [[TMP4]], i32 6
; VF8UF2-NEXT: br i1 [[TMP75]], label %[[PRED_STORE_IF27:.*]], label %[[PRED_STORE_CONTINUE28:.*]]
; VF8UF2: [[PRED_STORE_IF27]]:
-; VF8UF2-NEXT: [[TMP76:%.*]] = mul i64 14, [[TMP1]]
+; VF8UF2-NEXT: [[TMP76:%.*]] = mul i64 14, [[STEP]]
; VF8UF2-NEXT: [[TMP77:%.*]] = add i64 0, [[TMP76]]
-; VF8UF2-NEXT: [[TMP78:%.*]] = add i64 [[TMP77]], [[TMP1]]
+; VF8UF2-NEXT: [[TMP78:%.*]] = add i64 [[TMP77]], [[STEP]]
; VF8UF2-NEXT: [[TMP79:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP78]]
; VF8UF2-NEXT: store i8 0, ptr [[TMP79]], align 1
; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE28]]
@@ -977,9 +977,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF8UF2-NEXT: [[TMP80:%.*]] = extractelement <8 x i1> [[TMP4]], i32 7
; VF8UF2-NEXT: br i1 [[TMP80]], label %[[PRED_STORE_IF29:.*]], label %[[PRED_STORE_CONTINUE30:.*]]
; VF8UF2: [[PRED_STORE_IF29]]:
-; VF8UF2-NEXT: [[TMP81:%.*]] = mul i64 15, [[TMP1]]
+; VF8UF2-NEXT: [[TMP81:%.*]] = mul i64 15, [[STEP]]
; VF8UF2-NEXT: [[TMP82:%.*]] = add i64 0, [[TMP81]]
-; VF8UF2-NEXT: [[TMP83:%.*]] = add i64 [[TMP82]], [[TMP1]]
+; VF8UF2-NEXT: [[TMP83:%.*]] = add i64 [[TMP82]], [[STEP]]
; VF8UF2-NEXT: [[TMP84:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP83]]
; VF8UF2-NEXT: store i8 0, ptr [[TMP84]], align 1
; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE30]]
@@ -1023,9 +1023,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF16UF1-NEXT: [[TMP4:%.*]] = extractelement <16 x i1> [[TMP3]], i32 0
; VF16UF1-NEXT: br i1 [[TMP4]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; VF16UF1: [[PRED_STORE_IF]]:
-; VF16UF1-NEXT: [[TMP5:%.*]] = mul i64 0, [[TMP1]]
+; VF16UF1-NEXT: [[TMP5:%.*]] = mul i64 0, [[STEP]]
; VF16UF1-NEXT: [[TMP6:%.*]] = add i64 0, [[TMP5]]
-; VF16UF1-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], [[TMP1]]
+; VF16UF1-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], [[STEP]]
; VF16UF1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP7]]
; VF16UF1-NEXT: store i8 0, ptr [[TMP8]], align 1
; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE]]
@@ -1033,9 +1033,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF16UF1-NEXT: [[TMP9:%.*]] = extractelement <16 x i1> [[TMP3]], i32 1
; VF16UF1-NEXT: br i1 [[TMP9]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]]
; VF16UF1: [[PRED_STORE_IF1]]:
-; VF16UF1-NEXT: [[TMP10:%.*]] = mul i64 1, [[TMP1]]
+; VF16UF1-NEXT: [[TMP10:%.*]] = mul i64 1, [[STEP]]
; VF16UF1-NEXT: [[TMP11:%.*]] = add i64 0, [[TMP10]]
-; VF16UF1-NEXT: [[TMP12:%.*]] = add i64 [[TMP11]], [[TMP1]]
+; VF16UF1-NEXT: [[TMP12:%.*]] = add i64 [[TMP11]], [[STEP]]
; VF16UF1-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP12]]
; VF16UF1-NEXT: store i8 0, ptr [[TMP13]], align 1
; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE2]]
@@ -1043,9 +1043,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF16UF1-NEXT: [[TMP14:%.*]] = extractelement <16 x i1> [[TMP3]], i32 2
; VF16UF1-NEXT: br i1 [[TMP14]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]]
; VF16UF1: [[PRED_STORE_IF3]]:
-; VF16UF1-NEXT: [[TMP15:%.*]] = mul i64 2, [[TMP1]]
+; VF16UF1-NEXT: [[TMP15:%.*]] = mul i64 2, [[STEP]]
; VF16UF1-NEXT: [[TMP16:%.*]] = add i64 0, [[TMP15]]
-; VF16UF1-NEXT: [[TMP17:%.*]] = add i64 [[TMP16]], [[TMP1]]
+; VF16UF1-NEXT: [[TMP17:%.*]] = add i64 [[TMP16]], [[STEP]]
; VF16UF1-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP17]]
; VF16UF1-NEXT: store i8 0, ptr [[TMP18]], align 1
; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE4]]
@@ -1053,9 +1053,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF16UF1-NEXT: [[TMP19:%.*]] = extractelement <16 x i1> [[TMP3]], i32 3
; VF16UF1-NEXT: br i1 [[TMP19]], label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6:.*]]
; VF16UF1: [[PRED_STORE_IF5]]:
-; VF16UF1-NEXT: [[TMP20:%.*]] = mul i64 3, [[TMP1]]
+; VF16UF1-NEXT: [[TMP20:%.*]] = mul i64 3, [[STEP]]
; VF16UF1-NEXT: [[TMP21:%.*]] = add i64 0, [[TMP20]]
-; VF16UF1-NEXT: [[TMP22:%.*]] = add i64 [[TMP21]], [[TMP1]]
+; VF16UF1-NEXT: [[TMP22:%.*]] = add i64 [[TMP21]], [[STEP]]
; VF16UF1-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP22]]
; VF16UF1-NEXT: store i8 0, ptr [[TMP23]], align 1
; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE6]]
@@ -1063,9 +1063,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF16UF1-NEXT: [[TMP24:%.*]] = extractelement <16 x i1> [[TMP3]], i32 4
; VF16UF1-NEXT: br i1 [[TMP24]], label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]]
; VF16UF1: [[PRED_STORE_IF7]]:
-; VF16UF1-NEXT: [[TMP25:%.*]] = mul i64 4, [[TMP1]]
+; VF16UF1-NEXT: [[TMP25:%.*]] = mul i64 4, [[STEP]]
; VF16UF1-NEXT: [[TMP26:%.*]] = add i64 0, [[TMP25]]
-; VF16UF1-NEXT: [[TMP27:%.*]] = add i64 [[TMP26]], [[TMP1]]
+; VF16UF1-NEXT: [[TMP27:%.*]] = add i64 [[TMP26]], [[STEP]]
; VF16UF1-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP27]]
; VF16UF1-NEXT: store i8 0, ptr [[TMP28]], align 1
; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE8]]
@@ -1073,9 +1073,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF16UF1-NEXT: [[TMP29:%.*]] = extractelement <16 x i1> [[TMP3]], i32 5
; VF16UF1-NEXT: br i1 [[TMP29]], label %[[PRED_STORE_IF9:.*]], label %[[PRED_STORE_CONTINUE10:.*]]
; VF16UF1: [[PRED_STORE_IF9]]:
-; VF16UF1-NEXT: [[TMP30:%.*]] = mul i64 5, [[TMP1]]
+; VF16UF1-NEXT: [[TMP30:%.*]] = mul i64 5, [[STEP]]
; VF16UF1-NEXT: [[TMP31:%.*]] = add i64 0, [[TMP30]]
-; VF16UF1-NEXT: [[TMP32:%.*]] = add i64 [[TMP31]], [[TMP1]]
+; VF16UF1-NEXT: [[TMP32:%.*]] = add i64 [[TMP31]], [[STEP]]
; VF16UF1-NEXT: [[TMP33:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP32]]
; VF16UF1-NEXT: store i8 0, ptr [[TMP33]], align 1
; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE10]]
@@ -1083,9 +1083,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF16UF1-NEXT: [[TMP34:%.*]] = extractelement <16 x i1> [[TMP3]], i32 6
; VF16UF1-NEXT: br i1 [[TMP34]], label %[[PRED_STORE_IF11:.*]], label %[[PRED_STORE_CONTINUE12:.*]]
; VF16UF1: [[PRED_STORE_IF11]]:
-; VF16UF1-NEXT: [[TMP35:%.*]] = mul i64 6, [[TMP1]]
+; VF16UF1-NEXT: [[TMP35:%.*]] = mul i64 6, [[STEP]]
; VF16UF1-NEXT: [[TMP36:%.*]] = add i64 0, [[TMP35]]
-; VF16UF1-NEXT: [[TMP37:%.*]] = add i64 [[TMP36]], [[TMP1]]
+; VF16UF1-NEXT: [[TMP37:%.*]] = add i64 [[TMP36]], [[STEP]]
; VF16UF1-NEXT: [[TMP38:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP37]]
; VF16UF1-NEXT: store i8 0, ptr [[TMP38]], align 1
; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE12]]
@@ -1093,9 +1093,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF16UF1-NEXT: [[TMP39:%.*]] = extractelement <16 x i1> [[TMP3]], i32 7
; VF16UF1-NEXT: br i1 [[TMP39]], label %[[PRED_STORE_IF13:.*]], label %[[PRED_STORE_CONTINUE14:.*]]
; VF16UF1: [[PRED_STORE_IF13]]:
-; VF16UF1-NEXT: [[TMP40:%.*]] = mul i64 7, [[TMP1]]
+; VF16UF1-NEXT: [[TMP40:%.*]] = mul i64 7, [[STEP]]
; VF16UF1-NEXT: [[TMP41:%.*]] = add i64 0, [[TMP40]]
-; VF16UF1-NEXT: [[TMP42:%.*]] = add i64 [[TMP41]], [[TMP1]]
+; VF16UF1-NEXT: [[TMP42:%.*]] = add i64 [[TMP41]], [[STEP]]
; VF16UF1-NEXT: [[TMP43:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP42]]
; VF16UF1-NEXT: store i8 0, ptr [[TMP43]], align 1
; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE14]]
@@ -1103,9 +1103,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF16UF1-NEXT: [[TMP44:%.*]] = extractelement <16 x i1> [[TMP3]], i32 8
; VF16UF1-NEXT: br i1 [[TMP44]], label %[[PRED_STORE_IF15:.*]], label %[[PRED_STORE_CONTINUE16:.*]]
; VF16UF1: [[PRED_STORE_IF15]]:
-; VF16UF1-NEXT: [[TMP45:%.*]] = mul i64 8, [[TMP1]]
+; VF16UF1-NEXT: [[TMP45:%.*]] = mul i64 8, [[STEP]]
; VF16UF1-NEXT: [[TMP46:%.*]] = add i64 0, [[TMP45]]
-; VF16UF1-NEXT: [[TMP47:%.*]] = add i64 [[TMP46]], [[TMP1]]
+; VF16UF1-NEXT: [[TMP47:%.*]] = add i64 [[TMP46]], [[STEP]]
; VF16UF1-NEXT: [[TMP48:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP47]]
; VF16UF1-NEXT: store i8 0, ptr [[TMP48]], align 1
; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE16]]
@@ -1113,9 +1113,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF16UF1-NEXT: [[TMP49:%.*]] = extractelement <16 x i1> [[TMP3]], i32 9
; VF16UF1-NEXT: br i1 [[TMP49]], label %[[PRED_STORE_IF17:.*]], label %[[PRED_STORE_CONTINUE18:.*]]
; VF16UF1: [[PRED_STORE_IF17]]:
-; VF16UF1-NEXT: [[TMP50:%.*]] = mul i64 9, [[TMP1]]
+; VF16UF1-NEXT: [[TMP50:%.*]] = mul i64 9, [[STEP]]
; VF16UF1-NEXT: [[TMP51:%.*]] = add i64 0, [[TMP50]]
-; VF16UF1-NEXT: [[TMP52:%.*]] = add i64 [[TMP51]], [[TMP1]]
+; VF16UF1-NEXT: [[TMP52:%.*]] = add i64 [[TMP51]], [[STEP]]
; VF16UF1-NEXT: [[TMP53:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP52]]
; VF16UF1-NEXT: store i8 0, ptr [[TMP53]], align 1
; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE18]]
@@ -1123,9 +1123,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF16UF1-NEXT: [[TMP54:%.*]] = extractelement <16 x i1> [[TMP3]], i32 10
; VF16UF1-NEXT: br i1 [[TMP54]], label %[[PRED_STORE_IF19:.*]], label %[[PRED_STORE_CONTINUE20:.*]]
; VF16UF1: [[PRED_STORE_IF19]]:
-; VF16UF1-NEXT: [[TMP55:%.*]] = mul i64 10, [[TMP1]]
+; VF16UF1-NEXT: [[TMP55:%.*]] = mul i64 10, [[STEP]]
; VF16UF1-NEXT: [[TMP56:%.*]] = add i64 0, [[TMP55]]
-; VF16UF1-NEXT: [[TMP57:%.*]] = add i64 [[TMP56]], [[TMP1]]
+; VF16UF1-NEXT: [[TMP57:%.*]] = add i64 [[TMP56]], [[STEP]]
; VF16UF1-NEXT: [[TMP58:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP57]]
; VF16UF1-NEXT: store i8 0, ptr [[TMP58]], align 1
; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE20]]
@@ -1133,9 +1133,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF16UF1-NEXT: [[TMP59:%.*]] = extractelement <16 x i1> [[TMP3]], i32 11
; VF16UF1-NEXT: br i1 [[TMP59]], label %[[PRED_STORE_IF21:.*]], label %[[PRED_STORE_CONTINUE22:.*]]
; VF16UF1: [[PRED_STORE_IF21]]:
-; VF16UF1-NEXT: [[TMP60:%.*]] = mul i64 11, [[TMP1]]
+; VF16UF1-NEXT: [[TMP60:%.*]] = mul i64 11, [[STEP]]
; VF16UF1-NEXT: [[TMP61:%.*]] = add i64 0, [[TMP60]]
-; VF16UF1-NEXT: [[TMP62:%.*]] = add i64 [[TMP61]], [[TMP1]]
+; VF16UF1-NEXT: [[TMP62:%.*]] = add i64 [[TMP61]], [[STEP]]
; VF16UF1-NEXT: [[TMP63:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP62]]
; VF16UF1-NEXT: store i8 0, ptr [[TMP63]], align 1
; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE22]]
@@ -1143,9 +1143,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF16UF1-NEXT: [[TMP64:%.*]] = extractelement <16 x i1> [[TMP3]], i32 12
; VF16UF1-NEXT: br i1 [[TMP64]], label %[[PRED_STORE_IF23:.*]], label %[[PRED_STORE_CONTINUE24:.*]]
; VF16UF1: [[PRED_STORE_IF23]]:
-; VF16UF1-NEXT: [[TMP65:%.*]] = mul i64 12, [[TMP1]]
+; VF16UF1-NEXT: [[TMP65:%.*]] = mul i64 12, [[STEP]]
; VF16UF1-NEXT: [[TMP66:%.*]] = add i64 0, [[TMP65]]
-; VF16UF1-NEXT: [[TMP67:%.*]] = add i64 [[TMP66]], [[TMP1]]
+; VF16UF1-NEXT: [[TMP67:%.*]] = add i64 [[TMP66]], [[STEP]]
; VF16UF1-NEXT: [[TMP68:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP67]]
; VF16UF1-NEXT: store i8 0, ptr [[TMP68]], align 1
; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE24]]
@@ -1153,9 +1153,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF16UF1-NEXT: [[TMP69:%.*]] = extractelement <16 x i1> [[TMP3]], i32 13
; VF16UF1-NEXT: br i1 [[TMP69]], label %[[PRED_STORE_IF25:.*]], label %[[PRED_STORE_CONTINUE26:.*]]
; VF16UF1: [[PRED_STORE_IF25]]:
-; VF16UF1-NEXT: [[TMP70:%.*]] = mul i64 13, [[TMP1]]
+; VF16UF1-NEXT: [[TMP70:%.*]] = mul i64 13, [[STEP]]
; VF16UF1-NEXT: [[TMP71:%.*]] = add i64 0, [[TMP70]]
-; VF16UF1-NEXT: [[TMP72:%.*]] = add i64 [[TMP71]], [[TMP1]]
+; VF16UF1-NEXT: [[TMP72:%.*]] = add i64 [[TMP71]], [[STEP]]
; VF16UF1-NEXT: [[TMP73:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP72]]
; VF16UF1-NEXT: store i8 0, ptr [[TMP73]], align 1
; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE26]]
@@ -1163,9 +1163,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF16UF1-NEXT: [[TMP74:%.*]] = extractelement <16 x i1> [[TMP3]], i32 14
; VF16UF1-NEXT: br i1 [[TMP74]], label %[[PRED_STORE_IF27:.*]], label %[[PRED_STORE_CONTINUE28:.*]]
; VF16UF1: [[PRED_STORE_IF27]]:
-; VF16UF1-NEXT: [[TMP75:%.*]] = mul i64 14, [[TMP1]]
+; VF16UF1-NEXT: [[TMP75:%.*]] = mul i64 14, [[STEP]]
; VF16UF1-NEXT: [[TMP76:%.*]] = add i64 0, [[TMP75]]
-; VF16UF1-NEXT: [[TMP77:%.*]] = add i64 [[TMP76]], [[TMP1]]
+; VF16UF1-NEXT: [[TMP77:%.*]] = add i64 [[TMP76]], [[STEP]]
; VF16UF1-NEXT: [[TMP78:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP77]]
; VF16UF1-NEXT: store i8 0, ptr [[TMP78]], align 1
; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE28]]
@@ -1173,9 +1173,9 @@ define void @scev_expand_step(i64 %x, ptr %dst) {
; VF16UF1-NEXT: [[TMP79:%.*]] = extractelement <16 x i1> [[TMP3]], i32 15
; VF16UF1-NEXT: br i1 [[TMP79]], label %[[PRED_STORE_IF29:.*]], label %[[PRED_STORE_CONTINUE30:.*]]
; VF16UF1: [[PRED_STORE_IF29]]:
-; VF16UF1-NEXT: [[TMP80:%.*]] = mul i64 15, [[TMP1]]
+; VF16UF1-NEXT: [[TMP80:%.*]] = mul i64 15, [[STEP]]
; VF16UF1-NEXT: [[TMP81:%.*]] = add i64 0, [[TMP80]]
-; VF16UF1-NEXT: [[TMP82:%.*]] = add i64 [[TMP81]], [[TMP1]]
+; VF16UF1-NEXT: [[TMP82:%.*]] = add i64 [[TMP81]], [[STEP]]
; VF16UF1-NEXT: [[TMP83:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP82]]
; VF16UF1-NEXT: store i8 0, ptr [[TMP83]], align 1
; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE30]]
diff --git a/llvm/test/Transforms/LoopVectorize/version-stride-with-integer-casts.ll b/llvm/test/Transforms/LoopVectorize/version-stride-with-integer-casts.ll
index 76d5c5299686e..28739471eac2f 100644
--- a/llvm/test/Transforms/LoopVectorize/version-stride-with-integer-casts.ll
+++ b/llvm/test/Transforms/LoopVectorize/version-stride-with-integer-casts.ll
@@ -417,11 +417,11 @@ define void @zext_of_i1_stride(i1 %g, ptr %dst) mustprogress {
; CHECK: vector.ph:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP1]], 4
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP1]], [[N_MOD_VF]]
-; CHECK-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], [[TMP1]]
+; CHECK-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], [[G_64]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], [[TMP1]]
+; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], [[G_64]]
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, ptr [[DST]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: store <4 x i16> splat (i16 1), ptr [[TMP4]], align 2
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
@@ -479,7 +479,7 @@ define void @sext_of_i1_stride(i1 %g, ptr %dst) mustprogress {
; CHECK: vector.ph:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 4
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
-; CHECK-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], [[TMP2]]
+; CHECK-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], [[G_64]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[DST]], i32 0
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll
index 54b1e060720e8..b71c3664947ec 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll
@@ -77,11 +77,11 @@ define void @test_tc_less_than_16(ptr %A, i64 %N) {
; CHECK-NEXT: Successor(s): vector.body
; CHECK-EMPTY:
; CHECK-NEXT: vector.body:
-; CHECK-NEXT: vp<[[VPTR2:%.]]> = vector-pointer ir<%A>, ir<1>
; CHECK-NEXT: WIDEN ir<%l> = load ir<%A>
; CHECK-NEXT: WIDEN ir<%add> = add nsw ir<%l>, ir<10>
+; CHECK-NEXT: vp<[[VPTR2:%.]]> = vector-pointer ir<%A>, ir<1>
; CHECK-NEXT: WIDEN store ir<%A>, ir<%add>
-; CHECK-NEXT: WIDEN store vp<[[VPTR3:%.*]]>, ir<%add>
+; CHECK-NEXT: WIDEN store vp<[[VPTR2]]>, ir<%add>
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/indvars-vectorization.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/indvars-vectorization.ll
index 54d00d54dce26..b056f44a6c469 100644
--- a/llvm/test/Transforms/PhaseOrdering/AArch64/indvars-vectorization.ll
+++ b/llvm/test/Transforms/PhaseOrdering/AArch64/indvars-vectorization.ll
@@ -50,12 +50,12 @@ define void @s172(i32 noundef %xa, i32 noundef %xb, ptr noundef %a, ptr noundef
; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[FOR_BODY_PREHEADER13]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP8]], -8
-; CHECK-NEXT: [[TMP18:%.*]] = mul i64 [[N_VEC]], [[TMP8]]
+; CHECK-NEXT: [[TMP18:%.*]] = mul nuw i64 [[N_VEC]], [[TMP1]]
; CHECK-NEXT: [[IND_END:%.*]] = add i64 [[TMP18]], [[TMP0]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP19:%.*]] = mul i64 [[INDEX]], [[TMP8]]
+; CHECK-NEXT: [[TMP19:%.*]] = mul nuw i64 [[INDEX]], [[TMP1]]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[TMP19]], [[TMP0]]
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP20]], i64 16
More information about the llvm-commits
mailing list