[llvm] [VPlan] Simplify pow-of-2 (mul|udiv) -> (shl|lshr) (PR #172477)
Ramkumar Ramachandra via llvm-commits
llvm-commits at lists.llvm.org
Tue Dec 16 05:00:52 PST 2025
https://github.com/artagnon created https://github.com/llvm/llvm-project/pull/172477
None
>From 4c6ebcf46d0cd902dbecab212df922e3fd07a252 Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Tue, 2 Dec 2025 11:55:21 +0000
Subject: [PATCH] [VPlan] Simplify pow-of-2 (mul|udiv) -> (shl|lshr)
---
.../Transforms/Vectorize/VPlanPatternMatch.h | 12 ++-
.../Transforms/Vectorize/VPlanTransforms.cpp | 15 +++
.../AArch64/clamped-trip-count.ll | 4 +-
.../AArch64/conditional-branches-cost.ll | 12 +--
.../AArch64/divs-with-scalable-vfs.ll | 8 +-
.../AArch64/eliminate-tail-predication.ll | 2 +-
.../gather-do-not-vectorize-addressing.ll | 2 +-
.../AArch64/induction-costs-sve.ll | 8 +-
.../AArch64/interleaving-reduction.ll | 6 +-
.../LoopVectorize/AArch64/invalid-costs.ll | 2 +-
.../AArch64/low_trip_count_predicates.ll | 6 +-
.../LoopVectorize/AArch64/masked-call.ll | 48 ++++-----
.../LoopVectorize/AArch64/masked_ldst_sme.ll | 2 +-
.../LoopVectorize/AArch64/optsize_minsize.ll | 6 +-
.../AArch64/outer_loop_prefer_scalable.ll | 2 +-
.../AArch64/partial-reduce-chained.ll | 36 +++----
.../AArch64/partial-reduce-constant-ops.ll | 6 +-
.../partial-reduce-dot-product-epilogue.ll | 12 +--
.../partial-reduce-dot-product-mixed.ll | 8 +-
.../AArch64/partial-reduce-dot-product.ll | 54 +++++-----
.../partial-reduce-incomplete-chains.ll | 2 +-
.../AArch64/partial-reduce-sub.ll | 8 +-
.../LoopVectorize/AArch64/partial-reduce.ll | 64 ++++++------
.../AArch64/pr60831-sve-inv-store-crash.ll | 25 +++--
.../AArch64/reduction-recurrence-costs-sve.ll | 16 +--
.../AArch64/scalable-avoid-scalarization.ll | 2 +-
.../AArch64/scalable-reduction-inloop-cond.ll | 4 +-
.../AArch64/scalable-strict-fadd.ll | 80 +++++++--------
.../AArch64/scalable-struct-return.ll | 6 +-
.../AArch64/simple_early_exit.ll | 4 +-
.../AArch64/single-early-exit-interleave.ll | 6 +-
.../LoopVectorize/AArch64/store-costs-sve.ll | 14 +--
.../sve-epilog-vect-inloop-reductions.ll | 4 +-
.../AArch64/sve-epilog-vect-reductions.ll | 4 +-
.../sve-epilog-vect-strict-reductions.ll | 4 +-
.../LoopVectorize/AArch64/sve-epilog-vect.ll | 48 ++++-----
.../AArch64/sve-epilog-vscale-fixed.ll | 10 +-
.../LoopVectorize/AArch64/sve-fneg.ll | 4 +-
.../AArch64/sve-inductions-unusual-types.ll | 8 +-
.../AArch64/sve-interleaved-accesses.ll | 25 +----
.../LoopVectorize/AArch64/sve-inv-store.ll | 4 +-
.../AArch64/sve-live-out-pointer-induction.ll | 4 +-
.../AArch64/sve-low-trip-count.ll | 2 +-
.../LoopVectorize/AArch64/sve-multiexit.ll | 8 +-
.../AArch64/sve-predicated-costs.ll | 4 +-
.../sve-runtime-check-size-based-threshold.ll | 4 +-
.../AArch64/sve-tail-folding-forced.ll | 2 +-
.../AArch64/sve-tail-folding-optsize.ll | 2 +-
.../AArch64/sve-tail-folding-reductions.ll | 12 +--
.../AArch64/sve-tail-folding-unroll.ll | 12 +--
.../LoopVectorize/AArch64/sve-tail-folding.ll | 25 +++--
.../AArch64/sve-vscale-based-trip-counts.ll | 24 ++---
.../AArch64/sve-wide-lane-mask.ll | 24 ++---
.../AArch64/sve-widen-extractvalue.ll | 2 +-
.../LoopVectorize/AArch64/sve-widen-gep.ll | 4 +-
.../LoopVectorize/AArch64/sve-widen-phi.ll | 6 +-
.../AArch64/tail-folding-styles.ll | 10 +-
...row-interleave-to-widen-memory-scalable.ll | 18 ++--
...e-to-widen-memory-with-wide-ops-chained.ll | 2 +-
.../AArch64/type-shrinkage-zext-costs.ll | 12 +--
.../widen-gep-all-indices-invariant.ll | 2 +-
.../AArch64/wider-VF-for-callinst.ll | 4 +-
.../ARM/mve-gather-scatter-tailpred.ll | 6 +-
.../LoopVectorize/RISCV/dead-ops-cost.ll | 10 +-
.../RISCV/evl-compatible-loops.ll | 5 +-
.../first-order-recurrence-scalable-vf1.ll | 2 +-
.../LoopVectorize/RISCV/fminimumnum.ll | 16 +--
.../LoopVectorize/RISCV/induction-costs.ll | 9 +-
.../LoopVectorize/RISCV/inloop-reduction.ll | 12 +--
.../RISCV/interleaved-accesses.ll | 46 ++++-----
.../RISCV/masked_gather_scatter.ll | 10 +-
.../RISCV/partial-reduce-dot-product.ll | 16 +--
.../LoopVectorize/RISCV/reg-usage-prune-vf.ll | 36 +++----
.../RISCV/riscv-vector-reverse.ll | 16 +--
.../LoopVectorize/RISCV/strided-accesses.ll | 69 ++++++-------
.../RISCV/tail-folding-bin-unary-ops-args.ll | 36 +++----
.../RISCV/tail-folding-call-intrinsics.ll | 18 ++--
.../RISCV/tail-folding-cast-intrinsics.ll | 22 ++---
.../RISCV/tail-folding-cond-reduction.ll | 16 +--
.../LoopVectorize/RISCV/tail-folding-div.ll | 8 +-
.../tail-folding-fixed-order-recurrence.ll | 20 ++--
.../RISCV/tail-folding-inloop-reduction.ll | 28 +++---
.../RISCV/tail-folding-interleave.ll | 12 +--
.../RISCV/tail-folding-intermediate-store.ll | 4 +-
.../LoopVectorize/RISCV/tail-folding-iv32.ll | 2 +-
.../RISCV/tail-folding-masked-loadstore.ll | 2 +-
.../RISCV/tail-folding-ordered-reduction.ll | 2 +-
.../RISCV/tail-folding-reduction.ll | 28 +++---
.../RISCV/tail-folding-reverse-load-store.ll | 4 +-
.../RISCV/tail-folding-safe-dep-distance.ll | 12 +--
...sform-narrow-interleave-to-widen-memory.ll | 4 +-
.../truncate-to-minimal-bitwidth-evl-crash.ll | 2 +-
.../RISCV/vectorize-vp-intrinsics.ll | 2 +-
...demanding-all-lanes-and-first-lane-only.ll | 15 +--
.../X86/cost-constant-known-via-scev.ll | 4 +-
.../X86/epilog-vectorization-inductions.ll | 2 +-
.../LoopVectorize/X86/induction-costs.ll | 6 +-
.../LoopVectorize/X86/uniform_mem_op.ll | 8 +-
.../LoopVectorize/create-induction-resume.ll | 2 +-
.../first-order-recurrence-chains-vplan.ll | 16 +--
.../LoopVectorize/hoist-predicated-loads.ll | 6 +-
.../Transforms/LoopVectorize/if-reduction.ll | 2 +-
.../LoopVectorize/induction-step.ll | 10 +-
.../Transforms/LoopVectorize/induction.ll | 2 +-
.../LoopVectorize/load-deref-pred-align.ll | 8 +-
...row-to-single-scalar-widen-gep-scalable.ll | 2 +-
.../optimal-epilog-vectorization.ll | 8 +-
.../LoopVectorize/outer_loop_scalable.ll | 2 +-
.../LoopVectorize/pointer-induction.ll | 2 +-
.../pr30654-phiscev-sext-trunc.ll | 6 +-
.../LoopVectorize/pr58811-scev-expansion.ll | 12 +--
.../LoopVectorize/scalable-assume.ll | 12 +--
.../scalable-first-order-recurrence.ll | 34 +++----
.../LoopVectorize/scalable-inductions.ll | 6 +-
.../LoopVectorize/scalable-iv-outside-user.ll | 4 +-
.../LoopVectorize/scalable-lifetime.ll | 4 +-
...able-loop-unpredicated-body-scalar-tail.ll | 6 +-
.../LoopVectorize/scalable-predication.ll | 2 +-
.../scalable-reduction-inloop.ll | 4 +-
.../scalable-trunc-min-bitwidth.ll | 4 +-
.../LoopVectorize/scev-predicate-reasoning.ll | 2 +-
.../uniform_across_vf_induction1.ll | 80 +++++++--------
.../uniform_across_vf_induction1_div_urem.ll | 98 +++++++++----------
.../uniform_across_vf_induction2.ll | 94 +++++++++---------
.../vectorize-force-tail-with-evl.ll | 2 +-
.../vplan-printing-reductions.ll | 6 +-
.../vplan-sink-scalars-and-merge.ll | 14 +--
.../AArch64/sve-interleave-vectorization.ll | 2 +-
128 files changed, 843 insertions(+), 895 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
index f082b970c7762..bb3c94218ce6b 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
@@ -290,12 +290,8 @@ struct Recipe_match {
if ((!matchRecipeAndOpcode<RecipeTys>(R) && ...))
return false;
- if (R->getNumOperands() != std::tuple_size<Ops_t>::value) {
- assert(Opcode == Instruction::PHI &&
- "non-variadic recipe with matched opcode does not have the "
- "expected number of operands");
+ if (R->getNumOperands() != std::tuple_size<Ops_t>::value)
return false;
- }
auto IdxSeq = std::make_index_sequence<std::tuple_size<Ops_t>::value>();
if (all_of_tuple_elements(IdxSeq, [R](auto Op, unsigned Idx) {
@@ -547,6 +543,12 @@ m_c_Mul(const Op0_t &Op0, const Op1_t &Op1) {
return m_c_Binary<Instruction::Mul, Op0_t, Op1_t>(Op0, Op1);
}
+template <typename Op0_t, typename Op1_t>
+inline AllRecipe_match<Instruction::UDiv, Op0_t, Op1_t>
+m_UDiv(const Op0_t &Op0, const Op1_t &Op1) {
+ return m_Binary<Instruction::UDiv, Op0_t, Op1_t>(Op0, Op1);
+}
+
/// Match a binary AND operation.
template <typename Op0_t, typename Op1_t>
inline AllRecipe_commutative_match<Instruction::And, Op0_t, Op1_t>
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 0bcf131d5ea86..7f7842371af1d 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1343,6 +1343,21 @@ static void simplifyRecipe(VPSingleDefRecipe *Def, VPTypeAnalysis &TypeInfo) {
return Def->replaceAllUsesWith(
Def->getOperand(0) == A ? Def->getOperand(1) : Def->getOperand(0));
+ const APInt *APC;
+ if (match(Def, m_c_Mul(m_VPValue(), m_APInt(APC))) && APC->isPowerOf2())
+ return Def->replaceAllUsesWith(Builder.createNaryOp(
+ Instruction::Shl,
+ {Def->getOperand(0),
+ Plan->getConstantInt(APC->getBitWidth(), APC->exactLogBase2())},
+ *cast<VPRecipeWithIRFlags>(Def), Def->getDebugLoc()));
+
+ if (match(Def, m_UDiv(m_VPValue(), m_APInt(APC))) && APC->isPowerOf2())
+ return Def->replaceAllUsesWith(Builder.createNaryOp(
+ Instruction::LShr,
+ {Def->getOperand(0),
+ Plan->getConstantInt(APC->getBitWidth(), APC->exactLogBase2())},
+ *cast<VPRecipeWithIRFlags>(Def), Def->getDebugLoc()));
+
if (match(Def, m_Not(m_VPValue(A)))) {
if (match(A, m_Not(m_VPValue(A))))
return Def->replaceAllUsesWith(A);
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll b/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll
index ac8095ae5c3e7..077bde70a537b 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll
@@ -9,7 +9,7 @@ define void @clamped_tc_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range(1,1
; CHECK-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8
+; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 8)
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[VAL]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
@@ -71,7 +71,7 @@ define void @clamped_tc_max_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range
; CHECK-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8
+; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 [[WIDE_TRIP_COUNT]])
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[VAL]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll
index 4f2933ad2f85c..d7d77cb4325d4 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll
@@ -528,8 +528,8 @@ define void @multiple_exit_conditions(ptr %src, ptr noalias %dst) #1 {
; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK1]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; DEFAULT: [[VECTOR_PH]]:
; DEFAULT-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
-; DEFAULT-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP4]], 4
-; DEFAULT-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP11]], 4
+; DEFAULT-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP4]], 2
+; DEFAULT-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP11]], 2
; DEFAULT-NEXT: [[N_MOD_VF:%.*]] = urem i64 257, [[TMP5]]
; DEFAULT-NEXT: [[N_VEC:%.*]] = sub i64 257, [[N_MOD_VF]]
; DEFAULT-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[N_VEC]], 8
@@ -545,7 +545,7 @@ define void @multiple_exit_conditions(ptr %src, ptr noalias %dst) #1 {
; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i16> poison, i16 [[TMP8]], i64 0
; DEFAULT-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i16> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
; DEFAULT-NEXT: [[TMP9:%.*]] = uitofp <vscale x 4 x i16> [[BROADCAST_SPLAT]] to <vscale x 4 x double>
-; DEFAULT-NEXT: [[TMP14:%.*]] = mul nuw nsw i64 [[TMP11]], 2
+; DEFAULT-NEXT: [[TMP14:%.*]] = shl nuw nsw i64 [[TMP11]], 1
; DEFAULT-NEXT: [[TMP17:%.*]] = mul nuw nsw i64 [[TMP11]], 3
; DEFAULT-NEXT: [[TMP12:%.*]] = getelementptr double, ptr [[NEXT_GEP1]], i64 [[TMP11]]
; DEFAULT-NEXT: [[TMP15:%.*]] = getelementptr double, ptr [[NEXT_GEP1]], i64 [[TMP14]]
@@ -568,7 +568,7 @@ define void @multiple_exit_conditions(ptr %src, ptr noalias %dst) #1 {
; PRED-NEXT: br label %[[VECTOR_PH:.*]]
; PRED: [[VECTOR_PH]]:
; PRED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; PRED-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4
+; PRED-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2
; PRED-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
; PRED-NEXT: [[TMP7:%.*]] = shl nuw i64 [[TMP6]], 2
; PRED-NEXT: [[TMP8:%.*]] = sub i64 257, [[TMP7]]
@@ -1219,7 +1219,7 @@ define void @pred_udiv_select_cost(ptr %A, ptr %B, ptr %C, i64 %n, i8 %y) #1 {
; DEFAULT-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
; DEFAULT: [[VECTOR_PH]]:
; DEFAULT-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
-; DEFAULT-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 4
+; DEFAULT-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP8]], 2
; DEFAULT-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], [[TMP9]]
; DEFAULT-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i8> poison, i8 [[Y]], i64 0
@@ -1273,7 +1273,7 @@ define void @pred_udiv_select_cost(ptr %A, ptr %B, ptr %C, i64 %n, i8 %y) #1 {
; PRED-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; PRED: [[VECTOR_PH]]:
; PRED-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
-; PRED-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 16
+; PRED-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 4
; PRED-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
; PRED-NEXT: [[TMP8:%.*]] = shl nuw i64 [[TMP7]], 4
; PRED-NEXT: [[TMP9:%.*]] = sub i64 [[TMP0]], [[TMP8]]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll
index 2b294696ebe89..99bbcad95b6de 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll
@@ -21,8 +21,8 @@ define void @sdiv_feeding_gep(ptr %dst, i32 %x, i64 %M, i64 %conv6, i64 %N) {
; CHECK-NEXT: br i1 [[TMP7]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP8]], 2
-; CHECK-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP11]], 2
+; CHECK-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP8]], 1
+; CHECK-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP11]], 1
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP9]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-NEXT: [[TMP18:%.*]] = sdiv i64 [[M]], [[CONV6]]
@@ -106,7 +106,7 @@ define void @sdiv_feeding_gep_predicated(ptr %dst, i32 %x, i64 %M, i64 %conv6, i
; CHECK-NEXT: br i1 [[TMP4]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 2
+; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 1
; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP10]], 1
; CHECK-NEXT: [[TMP12:%.*]] = sub i64 [[N]], [[TMP11]]
@@ -220,7 +220,7 @@ define void @udiv_urem_feeding_gep(i64 %x, ptr %dst, i64 %N) {
; CHECK-NEXT: br i1 [[TMP4]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 2
+; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 1
; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP10]], 1
; CHECK-NEXT: [[TMP12:%.*]] = sub i64 [[TMP0]], [[TMP11]]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/eliminate-tail-predication.ll b/llvm/test/Transforms/LoopVectorize/AArch64/eliminate-tail-predication.ll
index 14c53cd89c922..74598e2063e48 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/eliminate-tail-predication.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/eliminate-tail-predication.ll
@@ -12,7 +12,7 @@ define void @f1(ptr %A) #0 {
; CHECK-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4
+; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/gather-do-not-vectorize-addressing.ll b/llvm/test/Transforms/LoopVectorize/AArch64/gather-do-not-vectorize-addressing.ll
index 26a9545764091..d18283f831799 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/gather-do-not-vectorize-addressing.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/gather-do-not-vectorize-addressing.ll
@@ -77,7 +77,7 @@ define dso_local double @test(ptr nocapture noundef readonly %data, ptr nocaptur
; SVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; SVE: vector.ph:
; SVE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; SVE-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2
+; SVE-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 1
; SVE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], [[TMP3]]
; SVE-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]]
; SVE-NEXT: br label [[VECTOR_BODY:%.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll
index 4cacc30a714b6..aa6e4df26d71b 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll
@@ -30,8 +30,8 @@ define void @iv_casts(ptr %dst, ptr %src, i32 %x, i64 %N) #0 {
; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK3]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]]
; DEFAULT: [[VECTOR_PH]]:
; DEFAULT-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
-; DEFAULT-NEXT: [[TMP13:%.*]] = mul nuw i64 [[TMP9]], 8
-; DEFAULT-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP13]], 2
+; DEFAULT-NEXT: [[TMP13:%.*]] = shl nuw i64 [[TMP9]], 3
+; DEFAULT-NEXT: [[TMP10:%.*]] = shl nuw i64 [[TMP13]], 1
; DEFAULT-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], [[TMP10]]
; DEFAULT-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i32> poison, i32 [[X]], i64 0
@@ -70,7 +70,7 @@ define void @iv_casts(ptr %dst, ptr %src, i32 %x, i64 %N) #0 {
; DEFAULT: [[VEC_EPILOG_PH]]:
; DEFAULT-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
; DEFAULT-NEXT: [[TMP33:%.*]] = call i64 @llvm.vscale.i64()
-; DEFAULT-NEXT: [[TMP34:%.*]] = mul nuw i64 [[TMP33]], 4
+; DEFAULT-NEXT: [[TMP34:%.*]] = shl nuw i64 [[TMP33]], 2
; DEFAULT-NEXT: [[N_MOD_VF5:%.*]] = urem i64 [[TMP0]], [[TMP34]]
; DEFAULT-NEXT: [[N_VEC6:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF5]]
; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT7:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[X]], i64 0
@@ -130,7 +130,7 @@ define void @iv_casts(ptr %dst, ptr %src, i32 %x, i64 %N) #0 {
; PRED-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; PRED: [[VECTOR_PH]]:
; PRED-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
-; PRED-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 16
+; PRED-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP4]], 4
; PRED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[X]], i64 0
; PRED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 16 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
; PRED-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/interleaving-reduction.ll b/llvm/test/Transforms/LoopVectorize/AArch64/interleaving-reduction.ll
index 040acb494a42e..fff9365baccb4 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/interleaving-reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/interleaving-reduction.ll
@@ -144,8 +144,8 @@ define i32 @interleave_integer_reduction(ptr %src, i64 %N) {
; INTERLEAVE-4-VLA-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
; INTERLEAVE-4-VLA: vector.ph:
; INTERLEAVE-4-VLA-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; INTERLEAVE-4-VLA-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP2]], 4
-; INTERLEAVE-4-VLA-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP5]], 4
+; INTERLEAVE-4-VLA-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP2]], 2
+; INTERLEAVE-4-VLA-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP5]], 2
; INTERLEAVE-4-VLA-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
; INTERLEAVE-4-VLA-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; INTERLEAVE-4-VLA-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -156,7 +156,7 @@ define i32 @interleave_integer_reduction(ptr %src, i64 %N) {
; INTERLEAVE-4-VLA-NEXT: [[VEC_PHI3:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP16:%.*]], [[VECTOR_BODY]] ]
; INTERLEAVE-4-VLA-NEXT: [[VEC_PHI4:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ]
; INTERLEAVE-4-VLA-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i64 [[INDEX]]
-; INTERLEAVE-4-VLA-NEXT: [[TMP9:%.*]] = mul nuw nsw i64 [[TMP5]], 2
+; INTERLEAVE-4-VLA-NEXT: [[TMP9:%.*]] = shl nuw nsw i64 [[TMP5]], 1
; INTERLEAVE-4-VLA-NEXT: [[TMP12:%.*]] = mul nuw nsw i64 [[TMP5]], 3
; INTERLEAVE-4-VLA-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 [[TMP5]]
; INTERLEAVE-4-VLA-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 [[TMP9]]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/invalid-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/invalid-costs.ll
index cc3b1c9c9db8a..de5a24666626c 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/invalid-costs.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/invalid-costs.ll
@@ -16,7 +16,7 @@ define void @replicate_sdiv_conditional(ptr noalias %a, ptr noalias %b, ptr noal
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
+; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 64, [[TMP3]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 64, [[N_MOD_VF]]
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll b/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll
index cf45f3a88f37e..c340cfc9ad6cc 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll
@@ -67,7 +67,7 @@ define void @low_vf_ic_is_better(ptr nocapture noundef %p, i32 %tc, i16 noundef
; CHECK-VS1-NEXT: br i1 [[MIN_ITERS_CHECK1]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK-VS1: [[VECTOR_PH]]:
; CHECK-VS1-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-VS1-NEXT: [[TMP17:%.*]] = mul nuw i64 [[TMP16]], 16
+; CHECK-VS1-NEXT: [[TMP17:%.*]] = shl nuw i64 [[TMP16]], 4
; CHECK-VS1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], [[TMP17]]
; CHECK-VS1-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
; CHECK-VS1-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[CONV]], i64 0
@@ -160,7 +160,7 @@ define void @low_vf_ic_is_better(ptr nocapture noundef %p, i32 %tc, i16 noundef
; CHECK-VS2-NEXT: br i1 [[MIN_ITERS_CHECK1]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK-VS2: [[VECTOR_PH]]:
; CHECK-VS2-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-VS2-NEXT: [[TMP17:%.*]] = mul nuw i64 [[TMP16]], 8
+; CHECK-VS2-NEXT: [[TMP17:%.*]] = shl nuw i64 [[TMP16]], 3
; CHECK-VS2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], [[TMP17]]
; CHECK-VS2-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
; CHECK-VS2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i8> poison, i8 [[CONV]], i64 0
@@ -393,7 +393,7 @@ define void @overflow_indvar_known_false(ptr nocapture noundef %p, i32 noundef %
; CHECK-NEXT: br i1 [[TMP28]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 16
+; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 4
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 0, i64 [[TMP1]])
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[CONV]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[BROADCAST_SPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll
index 0a11e8e4390cb..00c7e6eecfb2c 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll
@@ -15,7 +15,7 @@ define void @test_widen(ptr noalias %a, ptr readnone %b) #4 {
; TFNONE-NEXT: br label %[[VECTOR_PH:.*]]
; TFNONE: [[VECTOR_PH]]:
; TFNONE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; TFNONE-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2
+; TFNONE-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 1
; TFNONE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]]
; TFNONE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]]
; TFNONE-NEXT: br label %[[VECTOR_BODY:.*]]
@@ -50,7 +50,7 @@ define void @test_widen(ptr noalias %a, ptr readnone %b) #4 {
; TFCOMMON-SAME: ptr noalias [[A:%.*]], ptr readnone [[B:%.*]]) #[[ATTR0:[0-9]+]] {
; TFCOMMON-NEXT: [[ENTRY:.*]]:
; TFCOMMON-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
-; TFCOMMON-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 2
+; TFCOMMON-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 1
; TFCOMMON-NEXT: br label %[[VECTOR_BODY:.*]]
; TFCOMMON: [[VECTOR_BODY]]:
; TFCOMMON-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
@@ -72,8 +72,8 @@ define void @test_widen(ptr noalias %a, ptr readnone %b) #4 {
; TFA_INTERLEAVE-SAME: ptr noalias [[A:%.*]], ptr readnone [[B:%.*]]) #[[ATTR0:[0-9]+]] {
; TFA_INTERLEAVE-NEXT: [[ENTRY:.*]]:
; TFA_INTERLEAVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
-; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP3]], 2
-; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP9]], 2
+; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP3]], 1
+; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP9]], 1
; TFA_INTERLEAVE-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
; TFA_INTERLEAVE-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 1
; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY1:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP6]], i64 1025)
@@ -130,7 +130,7 @@ define void @test_if_then(ptr noalias %a, ptr readnone %b) #4 {
; TFNONE-NEXT: br label %[[VECTOR_PH:.*]]
; TFNONE: [[VECTOR_PH]]:
; TFNONE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; TFNONE-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2
+; TFNONE-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 1
; TFNONE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]]
; TFNONE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]]
; TFNONE-NEXT: br label %[[VECTOR_BODY:.*]]
@@ -173,7 +173,7 @@ define void @test_if_then(ptr noalias %a, ptr readnone %b) #4 {
; TFCOMMON-SAME: ptr noalias [[A:%.*]], ptr readnone [[B:%.*]]) #[[ATTR0]] {
; TFCOMMON-NEXT: [[ENTRY:.*]]:
; TFCOMMON-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
-; TFCOMMON-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 2
+; TFCOMMON-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 1
; TFCOMMON-NEXT: br label %[[VECTOR_BODY:.*]]
; TFCOMMON: [[VECTOR_BODY]]:
; TFCOMMON-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
@@ -198,8 +198,8 @@ define void @test_if_then(ptr noalias %a, ptr readnone %b) #4 {
; TFA_INTERLEAVE-SAME: ptr noalias [[A:%.*]], ptr readnone [[B:%.*]]) #[[ATTR0]] {
; TFA_INTERLEAVE-NEXT: [[ENTRY:.*]]:
; TFA_INTERLEAVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
-; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP3]], 2
-; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP9]], 2
+; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP3]], 1
+; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP9]], 1
; TFA_INTERLEAVE-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
; TFA_INTERLEAVE-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 1
; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY1:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP6]], i64 1025)
@@ -273,7 +273,7 @@ define void @test_widen_if_then_else(ptr noalias %a, ptr readnone %b) #4 {
; TFNONE-NEXT: br label %[[VECTOR_PH:.*]]
; TFNONE: [[VECTOR_PH]]:
; TFNONE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; TFNONE-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2
+; TFNONE-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 1
; TFNONE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]]
; TFNONE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]]
; TFNONE-NEXT: br label %[[VECTOR_BODY:.*]]
@@ -321,7 +321,7 @@ define void @test_widen_if_then_else(ptr noalias %a, ptr readnone %b) #4 {
; TFCOMMON-SAME: ptr noalias [[A:%.*]], ptr readnone [[B:%.*]]) #[[ATTR0]] {
; TFCOMMON-NEXT: [[ENTRY:.*]]:
; TFCOMMON-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
-; TFCOMMON-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 2
+; TFCOMMON-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 1
; TFCOMMON-NEXT: br label %[[VECTOR_BODY:.*]]
; TFCOMMON: [[VECTOR_BODY]]:
; TFCOMMON-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
@@ -349,8 +349,8 @@ define void @test_widen_if_then_else(ptr noalias %a, ptr readnone %b) #4 {
; TFA_INTERLEAVE-SAME: ptr noalias [[A:%.*]], ptr readnone [[B:%.*]]) #[[ATTR0]] {
; TFA_INTERLEAVE-NEXT: [[ENTRY:.*]]:
; TFA_INTERLEAVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
-; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP3]], 2
-; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP9]], 2
+; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP3]], 1
+; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP9]], 1
; TFA_INTERLEAVE-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
; TFA_INTERLEAVE-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 1
; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY1:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP6]], i64 1025)
@@ -433,7 +433,7 @@ define void @test_widen_nomask(ptr noalias %a, ptr readnone %b) #4 {
; TFNONE-NEXT: br label %[[VECTOR_PH:.*]]
; TFNONE: [[VECTOR_PH]]:
; TFNONE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; TFNONE-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2
+; TFNONE-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 1
; TFNONE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]]
; TFNONE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]]
; TFNONE-NEXT: br label %[[VECTOR_BODY:.*]]
@@ -485,7 +485,7 @@ define void @test_widen_nomask(ptr noalias %a, ptr readnone %b) #4 {
; TFFALLBACK-SAME: ptr noalias [[A:%.*]], ptr readnone [[B:%.*]]) #[[ATTR0]] {
; TFFALLBACK-NEXT: [[ENTRY:.*]]:
; TFFALLBACK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; TFFALLBACK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2
+; TFFALLBACK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 1
; TFFALLBACK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]]
; TFFALLBACK-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]]
; TFFALLBACK-NEXT: br label %[[VECTOR_BODY:.*]]
@@ -557,7 +557,7 @@ define void @test_widen_optmask(ptr noalias %a, ptr readnone %b) #4 {
; TFNONE-NEXT: br label %[[VECTOR_PH:.*]]
; TFNONE: [[VECTOR_PH]]:
; TFNONE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; TFNONE-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2
+; TFNONE-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 1
; TFNONE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]]
; TFNONE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]]
; TFNONE-NEXT: br label %[[VECTOR_BODY:.*]]
@@ -592,7 +592,7 @@ define void @test_widen_optmask(ptr noalias %a, ptr readnone %b) #4 {
; TFALWAYS-SAME: ptr noalias [[A:%.*]], ptr readnone [[B:%.*]]) #[[ATTR0]] {
; TFALWAYS-NEXT: [[ENTRY:.*]]:
; TFALWAYS-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
-; TFALWAYS-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 2
+; TFALWAYS-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 1
; TFALWAYS-NEXT: br label %[[VECTOR_BODY:.*]]
; TFALWAYS: [[VECTOR_BODY]]:
; TFALWAYS-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
@@ -614,7 +614,7 @@ define void @test_widen_optmask(ptr noalias %a, ptr readnone %b) #4 {
; TFFALLBACK-SAME: ptr noalias [[A:%.*]], ptr readnone [[B:%.*]]) #[[ATTR0]] {
; TFFALLBACK-NEXT: [[ENTRY:.*]]:
; TFFALLBACK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
-; TFFALLBACK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 2
+; TFFALLBACK-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 1
; TFFALLBACK-NEXT: br label %[[VECTOR_BODY:.*]]
; TFFALLBACK: [[VECTOR_BODY]]:
; TFFALLBACK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
@@ -636,8 +636,8 @@ define void @test_widen_optmask(ptr noalias %a, ptr readnone %b) #4 {
; TFA_INTERLEAVE-SAME: ptr noalias [[A:%.*]], ptr readnone [[B:%.*]]) #[[ATTR0]] {
; TFA_INTERLEAVE-NEXT: [[ENTRY:.*]]:
; TFA_INTERLEAVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
-; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP3]], 2
-; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP9]], 2
+; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP3]], 1
+; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP9]], 1
; TFA_INTERLEAVE-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
; TFA_INTERLEAVE-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 1
; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY1:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP6]], i64 1025)
@@ -696,7 +696,7 @@ define double @test_widen_fmuladd_and_call(ptr noalias %a, ptr readnone %b, doub
; TFNONE-NEXT: br label %[[VECTOR_PH:.*]]
; TFNONE: [[VECTOR_PH]]:
; TFNONE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; TFNONE-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2
+; TFNONE-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 1
; TFNONE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]]
; TFNONE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]]
; TFNONE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x double> poison, double [[M]], i64 0
@@ -741,7 +741,7 @@ define double @test_widen_fmuladd_and_call(ptr noalias %a, ptr readnone %b, doub
; TFALWAYS-SAME: ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], double [[M:%.*]]) #[[ATTR0]] {
; TFALWAYS-NEXT: [[ENTRY:.*]]:
; TFALWAYS-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
-; TFALWAYS-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 2
+; TFALWAYS-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 1
; TFALWAYS-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x double> poison, double [[M]], i64 0
; TFALWAYS-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x double> [[BROADCAST_SPLATINSERT]], <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
; TFALWAYS-NEXT: br label %[[VECTOR_BODY:.*]]
@@ -770,7 +770,7 @@ define double @test_widen_fmuladd_and_call(ptr noalias %a, ptr readnone %b, doub
; TFFALLBACK-SAME: ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], double [[M:%.*]]) #[[ATTR0]] {
; TFFALLBACK-NEXT: [[ENTRY:.*]]:
; TFFALLBACK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
-; TFFALLBACK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 2
+; TFFALLBACK-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 1
; TFFALLBACK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x double> poison, double [[M]], i64 0
; TFFALLBACK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x double> [[BROADCAST_SPLATINSERT]], <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
; TFFALLBACK-NEXT: br label %[[VECTOR_BODY:.*]]
@@ -799,8 +799,8 @@ define double @test_widen_fmuladd_and_call(ptr noalias %a, ptr readnone %b, doub
; TFA_INTERLEAVE-SAME: ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], double [[M:%.*]]) #[[ATTR0]] {
; TFA_INTERLEAVE-NEXT: [[ENTRY:.*]]:
; TFA_INTERLEAVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
-; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP3]], 2
-; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP9]], 2
+; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP3]], 1
+; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP9]], 1
; TFA_INTERLEAVE-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
; TFA_INTERLEAVE-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 1
; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY1:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP6]], i64 1025)
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/masked_ldst_sme.ll b/llvm/test/Transforms/LoopVectorize/AArch64/masked_ldst_sme.ll
index fe7f43f7f4b02..3887322cb95b1 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/masked_ldst_sme.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/masked_ldst_sme.ll
@@ -49,7 +49,7 @@ define void @wombat(i32 %arg, ptr %arg1, ptr %arg2, ptr %arg3, ptr %arg4, ptr %a
; CHECK-NEXT: br i1 [[CONFLICT_RDX27]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 16
+; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 4
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[ZEXT]], [[TMP3]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[ZEXT]], [[N_MOD_VF]]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[ARG6]], i64 0
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll b/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll
index bdbf08aecf6b3..b3d49f3b8b168 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll
@@ -440,7 +440,7 @@ define void @sve_tail_predicate_without_minsize(ptr %p, i8 %a, i8 %b, i8 %c, i32
; DEFAULT-NEXT: br label %[[VECTOR_PH:.*]]
; DEFAULT: [[VECTOR_PH]]:
; DEFAULT-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; DEFAULT-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 16
+; DEFAULT-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 4
; DEFAULT-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
; DEFAULT-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 4
; DEFAULT-NEXT: [[TMP7:%.*]] = sub i64 15, [[TMP6]]
@@ -490,7 +490,7 @@ define void @sve_tail_predicate_without_minsize(ptr %p, i8 %a, i8 %b, i8 %c, i32
; OPTSIZE-NEXT: br label %[[VECTOR_PH:.*]]
; OPTSIZE: [[VECTOR_PH]]:
; OPTSIZE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; OPTSIZE-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 16
+; OPTSIZE-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 4
; OPTSIZE-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
; OPTSIZE-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 4
; OPTSIZE-NEXT: [[TMP7:%.*]] = sub i64 15, [[TMP6]]
@@ -540,7 +540,7 @@ define void @sve_tail_predicate_without_minsize(ptr %p, i8 %a, i8 %b, i8 %c, i32
; MINSIZE-NEXT: br label %[[VECTOR_PH:.*]]
; MINSIZE: [[VECTOR_PH]]:
; MINSIZE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; MINSIZE-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 16
+; MINSIZE-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 4
; MINSIZE-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
; MINSIZE-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 4
; MINSIZE-NEXT: [[TMP7:%.*]] = sub i64 15, [[TMP6]]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_prefer_scalable.ll b/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_prefer_scalable.ll
index 5f8d7e7d24cc4..33f8794d38d61 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_prefer_scalable.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_prefer_scalable.ll
@@ -15,7 +15,7 @@ define void @foo() {
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
+; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-chained.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-chained.ll
index dd0107b8c4bff..f90b26013fcbc 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-chained.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-chained.ll
@@ -58,7 +58,7 @@ define i32 @chained_partial_reduce_add_sub(ptr %a, ptr %b, ptr %c, i32 %N) #0 {
; CHECK-SVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-SVE: vector.ph:
; CHECK-SVE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-SVE-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 16
+; CHECK-SVE-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 4
; CHECK-SVE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], [[TMP3]]
; CHECK-SVE-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]]
; CHECK-SVE-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -100,7 +100,7 @@ define i32 @chained_partial_reduce_add_sub(ptr %a, ptr %b, ptr %c, i32 %N) #0 {
; CHECK-SVE-MAXBW-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-SVE-MAXBW: vector.ph:
; CHECK-SVE-MAXBW-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-SVE-MAXBW-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8
+; CHECK-SVE-MAXBW-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 3
; CHECK-SVE-MAXBW-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], [[TMP3]]
; CHECK-SVE-MAXBW-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]]
; CHECK-SVE-MAXBW-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -211,7 +211,7 @@ define i32 @chained_partial_reduce_add_add(ptr %a, ptr %b, ptr %c, i32 %N) #0 {
; CHECK-SVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-SVE: vector.ph:
; CHECK-SVE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-SVE-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 16
+; CHECK-SVE-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 4
; CHECK-SVE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], [[TMP3]]
; CHECK-SVE-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]]
; CHECK-SVE-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -252,7 +252,7 @@ define i32 @chained_partial_reduce_add_add(ptr %a, ptr %b, ptr %c, i32 %N) #0 {
; CHECK-SVE-MAXBW-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-SVE-MAXBW: vector.ph:
; CHECK-SVE-MAXBW-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-SVE-MAXBW-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8
+; CHECK-SVE-MAXBW-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 3
; CHECK-SVE-MAXBW-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], [[TMP3]]
; CHECK-SVE-MAXBW-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]]
; CHECK-SVE-MAXBW-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -363,7 +363,7 @@ define i32 @chained_partial_reduce_sub_add(ptr %a, ptr %b, ptr %c, i32 %N) #0 {
; CHECK-SVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-SVE: vector.ph:
; CHECK-SVE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-SVE-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 16
+; CHECK-SVE-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 4
; CHECK-SVE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], [[TMP3]]
; CHECK-SVE-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]]
; CHECK-SVE-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -405,7 +405,7 @@ define i32 @chained_partial_reduce_sub_add(ptr %a, ptr %b, ptr %c, i32 %N) #0 {
; CHECK-SVE-MAXBW-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-SVE-MAXBW: vector.ph:
; CHECK-SVE-MAXBW-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-SVE-MAXBW-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8
+; CHECK-SVE-MAXBW-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 3
; CHECK-SVE-MAXBW-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], [[TMP3]]
; CHECK-SVE-MAXBW-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]]
; CHECK-SVE-MAXBW-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -520,7 +520,7 @@ define i32 @chained_partial_reduce_sub_sub(ptr %a, ptr %b, ptr %c, i32 %N) #0 {
; CHECK-SVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-SVE: vector.ph:
; CHECK-SVE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-SVE-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 16
+; CHECK-SVE-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 4
; CHECK-SVE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], [[TMP3]]
; CHECK-SVE-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]]
; CHECK-SVE-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -563,7 +563,7 @@ define i32 @chained_partial_reduce_sub_sub(ptr %a, ptr %b, ptr %c, i32 %N) #0 {
; CHECK-SVE-MAXBW-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-SVE-MAXBW: vector.ph:
; CHECK-SVE-MAXBW-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-SVE-MAXBW-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8
+; CHECK-SVE-MAXBW-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 3
; CHECK-SVE-MAXBW-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], [[TMP3]]
; CHECK-SVE-MAXBW-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]]
; CHECK-SVE-MAXBW-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -680,7 +680,7 @@ define i32 @chained_partial_reduce_add_add_add(ptr %a, ptr %b, ptr %c, i32 %N) #
; CHECK-SVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-SVE: vector.ph:
; CHECK-SVE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-SVE-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 16
+; CHECK-SVE-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 4
; CHECK-SVE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], [[TMP3]]
; CHECK-SVE-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]]
; CHECK-SVE-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -723,7 +723,7 @@ define i32 @chained_partial_reduce_add_add_add(ptr %a, ptr %b, ptr %c, i32 %N) #
; CHECK-SVE-MAXBW-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-SVE-MAXBW: vector.ph:
; CHECK-SVE-MAXBW-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-SVE-MAXBW-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8
+; CHECK-SVE-MAXBW-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 3
; CHECK-SVE-MAXBW-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], [[TMP3]]
; CHECK-SVE-MAXBW-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]]
; CHECK-SVE-MAXBW-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -844,7 +844,7 @@ define i32 @chained_partial_reduce_sub_add_sub(ptr %a, ptr %b, ptr %c, i32 %N) #
; CHECK-SVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-SVE: vector.ph:
; CHECK-SVE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-SVE-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 16
+; CHECK-SVE-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 4
; CHECK-SVE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], [[TMP3]]
; CHECK-SVE-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]]
; CHECK-SVE-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -889,7 +889,7 @@ define i32 @chained_partial_reduce_sub_add_sub(ptr %a, ptr %b, ptr %c, i32 %N) #
; CHECK-SVE-MAXBW-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-SVE-MAXBW: vector.ph:
; CHECK-SVE-MAXBW-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-SVE-MAXBW-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8
+; CHECK-SVE-MAXBW-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 3
; CHECK-SVE-MAXBW-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], [[TMP3]]
; CHECK-SVE-MAXBW-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]]
; CHECK-SVE-MAXBW-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -1008,7 +1008,7 @@ define i32 @chained_partial_reduce_madd_extadd(ptr %a, ptr %b, ptr %c, i32 %N) #
; CHECK-SVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-SVE: vector.ph:
; CHECK-SVE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-SVE-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 16
+; CHECK-SVE-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 4
; CHECK-SVE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], [[TMP3]]
; CHECK-SVE-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]]
; CHECK-SVE-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -1048,7 +1048,7 @@ define i32 @chained_partial_reduce_madd_extadd(ptr %a, ptr %b, ptr %c, i32 %N) #
; CHECK-SVE-MAXBW-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-SVE-MAXBW: vector.ph:
; CHECK-SVE-MAXBW-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-SVE-MAXBW-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8
+; CHECK-SVE-MAXBW-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 3
; CHECK-SVE-MAXBW-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], [[TMP3]]
; CHECK-SVE-MAXBW-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]]
; CHECK-SVE-MAXBW-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -1152,7 +1152,7 @@ define i32 @chained_partial_reduce_extadd_extadd(ptr %a, ptr %b, i32 %N) #0 {
; CHECK-SVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-SVE: vector.ph:
; CHECK-SVE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-SVE-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 16
+; CHECK-SVE-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 4
; CHECK-SVE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], [[TMP3]]
; CHECK-SVE-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]]
; CHECK-SVE-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -1188,7 +1188,7 @@ define i32 @chained_partial_reduce_extadd_extadd(ptr %a, ptr %b, i32 %N) #0 {
; CHECK-SVE-MAXBW-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-SVE-MAXBW: vector.ph:
; CHECK-SVE-MAXBW-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-SVE-MAXBW-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8
+; CHECK-SVE-MAXBW-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 3
; CHECK-SVE-MAXBW-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], [[TMP3]]
; CHECK-SVE-MAXBW-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]]
; CHECK-SVE-MAXBW-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -1289,7 +1289,7 @@ define i32 @chained_partial_reduce_extadd_madd(ptr %a, ptr %b, ptr %c, i32 %N) #
; CHECK-SVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-SVE: vector.ph:
; CHECK-SVE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-SVE-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 16
+; CHECK-SVE-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 4
; CHECK-SVE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], [[TMP3]]
; CHECK-SVE-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]]
; CHECK-SVE-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -1329,7 +1329,7 @@ define i32 @chained_partial_reduce_extadd_madd(ptr %a, ptr %b, ptr %c, i32 %N) #
; CHECK-SVE-MAXBW-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-SVE-MAXBW: vector.ph:
; CHECK-SVE-MAXBW-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-SVE-MAXBW-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8
+; CHECK-SVE-MAXBW-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 3
; CHECK-SVE-MAXBW-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], [[TMP3]]
; CHECK-SVE-MAXBW-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]]
; CHECK-SVE-MAXBW-NEXT: br label [[VECTOR_BODY:%.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-constant-ops.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-constant-ops.ll
index b033f6051f812..ba00cb0c543fd 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-constant-ops.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-constant-ops.ll
@@ -156,7 +156,7 @@ define i32 @red_zext_mul_by_256(ptr %start, ptr %end) {
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[NEXT_GEP]], align 1
; CHECK-NEXT: [[TMP3:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32>
-; CHECK-NEXT: [[TMP4:%.*]] = mul <16 x i32> [[TMP3]], splat (i32 256)
+; CHECK-NEXT: [[TMP4:%.*]] = shl <16 x i32> [[TMP3]], splat (i32 8)
; CHECK-NEXT: [[TMP5]] = add <16 x i32> [[VEC_PHI]], [[TMP4]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
@@ -290,7 +290,7 @@ define i32 @red_sext_mul_by_128(ptr %start, ptr %end) {
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[NEXT_GEP]], align 1
; CHECK-NEXT: [[TMP3:%.*]] = sext <16 x i8> [[WIDE_LOAD]] to <16 x i32>
-; CHECK-NEXT: [[TMP4:%.*]] = mul <16 x i32> [[TMP3]], splat (i32 128)
+; CHECK-NEXT: [[TMP4:%.*]] = shl <16 x i32> [[TMP3]], splat (i32 7)
; CHECK-NEXT: [[TMP5]] = add <16 x i32> [[VEC_PHI]], [[TMP4]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
@@ -423,7 +423,7 @@ define i32 @red_sext_mul_by_256(ptr %start, ptr %end) {
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[NEXT_GEP]], align 1
; CHECK-NEXT: [[TMP3:%.*]] = sext <16 x i8> [[WIDE_LOAD]] to <16 x i32>
-; CHECK-NEXT: [[TMP4:%.*]] = mul <16 x i32> [[TMP3]], splat (i32 256)
+; CHECK-NEXT: [[TMP4:%.*]] = shl <16 x i32> [[TMP3]], splat (i32 8)
; CHECK-NEXT: [[TMP5]] = add <16 x i32> [[VEC_PHI]], [[TMP4]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-epilogue.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-epilogue.ll
index fed979e34e5d5..5c2c67337625a 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-epilogue.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-epilogue.ll
@@ -11,7 +11,7 @@ define i32 @dotp(ptr %a, ptr %b) #0 {
; CHECK-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 16
+; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 4
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -85,7 +85,7 @@ define void @dotp_small_epilogue_vf(i64 %idx.neg, i8 %a) #1 {
; CHECK-NEXT: [[PARTIAL_REDUCE]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP4]])
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[IV_NEXT]]
-; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[PARTIAL_REDUCE]])
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[IV_NEXT]]
@@ -93,7 +93,7 @@ define void @dotp_small_epilogue_vf(i64 %idx.neg, i8 %a) #1 {
; CHECK: vec.epilog.iter.check:
; CHECK-NEXT: [[IND_END6:%.*]] = add i64 [[IDX_NEG]], [[IV_NEXT]]
; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 4
-; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF4:![0-9]+]]
+; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF5:![0-9]+]]
; CHECK: vec.epilog.ph:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT]], [[WHILE_BODY]] ], [ 0, [[ENTRY]] ]
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP6]], [[WHILE_BODY]] ], [ 0, [[ENTRY]] ]
@@ -116,7 +116,7 @@ define void @dotp_small_epilogue_vf(i64 %idx.neg, i8 %a) #1 {
; CHECK-NEXT: [[TMP13]] = add <4 x i32> [[TMP14]], [[VEC_PHI9]]
; CHECK-NEXT: [[INDEX_NEXT14]] = add nuw i64 [[INDEX9]], 4
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT14]], [[N_VEC5]]
-; CHECK-NEXT: br i1 [[TMP12]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP12]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; CHECK: vec.epilog.middle.block:
; CHECK-NEXT: [[TMP15:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP13]])
; CHECK-NEXT: [[CMP_N15:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC5]]
@@ -140,7 +140,7 @@ define void @dotp_small_epilogue_vf(i64 %idx.neg, i8 %a) #1 {
; CHECK-NEXT: [[CMP_IV_NEG:%.*]] = icmp ugt i64 [[IV_NEG]], 0
; CHECK-NEXT: [[CMP_IV:%.*]] = icmp ne i64 [[ACCUM1]], -1
; CHECK-NEXT: [[EXITCOND:%.*]] = and i1 [[CMP_IV_NEG]], [[CMP_IV]]
-; CHECK-NEXT: br i1 [[EXITCOND]], label [[WHILE_BODY1]], label [[WHILE_END_LOOPEXIT]], !llvm.loop [[LOOP6:![0-9]+]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[WHILE_BODY1]], label [[WHILE_END_LOOPEXIT]], !llvm.loop [[LOOP7:![0-9]+]]
; CHECK: while.end.loopexit:
; CHECK-NEXT: [[RESULT:%.*]] = phi i32 [ [[ADD]], [[WHILE_BODY1]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ], [ [[TMP15]], [[VEC_EPILOG_MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret void
@@ -419,7 +419,7 @@ define i32 @dotp_predicated(i64 %N, ptr %a, ptr %b) {
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 16
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], splat (i64 16)
; CHECK-NEXT: [[TMP181:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP181]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP181]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[TMP182:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[PARTIAL_REDUCE]])
; CHECK-NEXT: br label [[EXIT:%.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-mixed.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-mixed.ll
index 916bda1e245f7..dc7ed20ac0927 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-mixed.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-mixed.ll
@@ -12,8 +12,8 @@ define i32 @sudot(ptr %a, ptr %b) #0 {
; CHECK-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP2]], 16
-; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP1]], 2
+; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP2]], 4
+; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP1]], 1
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -112,8 +112,8 @@ define i32 @usdot(ptr %a, ptr %b) #0 {
; CHECK-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP2]], 16
-; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP1]], 2
+; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP2]], 4
+; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP1]], 1
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll
index 5be1fac8c6287..7db519ffceb98 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll
@@ -13,7 +13,7 @@ define i32 @dotp(ptr %a, ptr %b) #0 {
; CHECK-INTERLEAVE1-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK-INTERLEAVE1: vector.ph:
; CHECK-INTERLEAVE1-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-INTERLEAVE1-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 16
+; CHECK-INTERLEAVE1-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 4
; CHECK-INTERLEAVE1-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]]
; CHECK-INTERLEAVE1-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; CHECK-INTERLEAVE1-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -43,8 +43,8 @@ define i32 @dotp(ptr %a, ptr %b) #0 {
; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK-INTERLEAVED: vector.ph:
; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP0]], 16
-; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP4]], 2
+; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP0]], 4
+; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP4]], 1
; CHECK-INTERLEAVED-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]]
; CHECK-INTERLEAVED-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -84,7 +84,7 @@ define i32 @dotp(ptr %a, ptr %b) #0 {
; CHECK-MAXBW-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK-MAXBW: vector.ph:
; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 16
+; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 4
; CHECK-MAXBW-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
; CHECK-MAXBW-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; CHECK-MAXBW-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -878,7 +878,7 @@ define i32 @not_dotp_not_phi(ptr %a, ptr %b) #0 {
; CHECK-INTERLEAVE1-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK-INTERLEAVE1: vector.ph:
; CHECK-INTERLEAVE1-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-INTERLEAVE1-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8
+; CHECK-INTERLEAVE1-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 3
; CHECK-INTERLEAVE1-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
; CHECK-INTERLEAVE1-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; CHECK-INTERLEAVE1-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -910,8 +910,8 @@ define i32 @not_dotp_not_phi(ptr %a, ptr %b) #0 {
; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK-INTERLEAVED: vector.ph:
; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP2]], 8
-; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP1]], 2
+; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP2]], 3
+; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP1]], 1
; CHECK-INTERLEAVED-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
; CHECK-INTERLEAVED-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -945,7 +945,7 @@ define i32 @not_dotp_not_phi(ptr %a, ptr %b) #0 {
; CHECK-MAXBW-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK-MAXBW: vector.ph:
; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8
+; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 3
; CHECK-MAXBW-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
; CHECK-MAXBW-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; CHECK-MAXBW-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -1003,7 +1003,7 @@ define i32 @dotp_unrolled(i32 %num_out, i64 %num_in, ptr %a, ptr %b) #0 {
; CHECK-INTERLEAVE1-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-INTERLEAVE1: vector.ph:
; CHECK-INTERLEAVE1-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-INTERLEAVE1-NEXT: [[TMP14:%.*]] = mul nuw i64 [[TMP13]], 16
+; CHECK-INTERLEAVE1-NEXT: [[TMP14:%.*]] = shl nuw i64 [[TMP13]], 4
; CHECK-INTERLEAVE1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[NUM_IN]], [[TMP14]]
; CHECK-INTERLEAVE1-NEXT: [[N_VEC:%.*]] = sub i64 [[NUM_IN]], [[N_MOD_VF]]
; CHECK-INTERLEAVE1-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -1069,8 +1069,8 @@ define i32 @dotp_unrolled(i32 %num_out, i64 %num_in, ptr %a, ptr %b) #0 {
; CHECK-INTERLEAVED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-INTERLEAVED: vector.ph:
; CHECK-INTERLEAVED-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-INTERLEAVED-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP13]], 16
-; CHECK-INTERLEAVED-NEXT: [[TMP14:%.*]] = mul nuw i64 [[TMP16]], 2
+; CHECK-INTERLEAVED-NEXT: [[TMP16:%.*]] = shl nuw i64 [[TMP13]], 4
+; CHECK-INTERLEAVED-NEXT: [[TMP14:%.*]] = shl nuw i64 [[TMP16]], 1
; CHECK-INTERLEAVED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[NUM_IN]], [[TMP14]]
; CHECK-INTERLEAVED-NEXT: [[N_VEC:%.*]] = sub i64 [[NUM_IN]], [[N_MOD_VF]]
; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -1176,7 +1176,7 @@ define i32 @dotp_unrolled(i32 %num_out, i64 %num_in, ptr %a, ptr %b) #0 {
; CHECK-MAXBW-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-MAXBW: vector.ph:
; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 16
+; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 4
; CHECK-MAXBW-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[NUM_IN]], [[TMP3]]
; CHECK-MAXBW-NEXT: [[N_VEC:%.*]] = sub i64 [[NUM_IN]], [[N_MOD_VF]]
; CHECK-MAXBW-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -1295,7 +1295,7 @@ define i32 @dotp_predicated(i64 %N, ptr %a, ptr %b) #0 {
; CHECK-INTERLEAVE1-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK-INTERLEAVE1: vector.ph:
; CHECK-INTERLEAVE1-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-INTERLEAVE1-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 16
+; CHECK-INTERLEAVE1-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 4
; CHECK-INTERLEAVE1-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-INTERLEAVE1-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 4
; CHECK-INTERLEAVE1-NEXT: [[TMP7:%.*]] = sub i64 [[N]], [[TMP6]]
@@ -1333,7 +1333,7 @@ define i32 @dotp_predicated(i64 %N, ptr %a, ptr %b) #0 {
; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK-INTERLEAVED: vector.ph:
; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 16
+; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 4
; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 4
; CHECK-INTERLEAVED-NEXT: [[TMP7:%.*]] = sub i64 [[N]], [[TMP6]]
@@ -1371,7 +1371,7 @@ define i32 @dotp_predicated(i64 %N, ptr %a, ptr %b) #0 {
; CHECK-MAXBW-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK-MAXBW: vector.ph:
; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-MAXBW-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 16
+; CHECK-MAXBW-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 4
; CHECK-MAXBW-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-MAXBW-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 4
; CHECK-MAXBW-NEXT: [[TMP7:%.*]] = sub i64 [[N]], [[TMP6]]
@@ -1432,7 +1432,7 @@ define i32 @not_dotp_extend_user(ptr %a, ptr %b) #0 {
; CHECK-INTERLEAVE1-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK-INTERLEAVE1: vector.ph:
; CHECK-INTERLEAVE1-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-INTERLEAVE1-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8
+; CHECK-INTERLEAVE1-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3
; CHECK-INTERLEAVE1-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]]
; CHECK-INTERLEAVE1-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; CHECK-INTERLEAVE1-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -1466,8 +1466,8 @@ define i32 @not_dotp_extend_user(ptr %a, ptr %b) #0 {
; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK-INTERLEAVED: vector.ph:
; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP0]], 8
-; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP4]], 2
+; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP0]], 3
+; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP4]], 1
; CHECK-INTERLEAVED-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]]
; CHECK-INTERLEAVED-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -1511,7 +1511,7 @@ define i32 @not_dotp_extend_user(ptr %a, ptr %b) #0 {
; CHECK-MAXBW-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK-MAXBW: vector.ph:
; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8
+; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 3
; CHECK-MAXBW-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
; CHECK-MAXBW-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; CHECK-MAXBW-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -1572,7 +1572,7 @@ define i64 @dotp_cost_disagreement(ptr %a, ptr %b) #0 {
; CHECK-INTERLEAVE1-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-INTERLEAVE1: vector.ph:
; CHECK-INTERLEAVE1-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-INTERLEAVE1-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 16
+; CHECK-INTERLEAVE1-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 4
; CHECK-INTERLEAVE1-NEXT: [[N_MOD_VF:%.*]] = urem i64 41, [[TMP3]]
; CHECK-INTERLEAVE1-NEXT: [[N_VEC:%.*]] = sub i64 41, [[N_MOD_VF]]
; CHECK-INTERLEAVE1-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -1606,8 +1606,8 @@ define i64 @dotp_cost_disagreement(ptr %a, ptr %b) #0 {
; CHECK-INTERLEAVED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-INTERLEAVED: vector.ph:
; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP2]], 16
-; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP6]], 2
+; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP2]], 4
+; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP6]], 1
; CHECK-INTERLEAVED-NEXT: [[N_MOD_VF:%.*]] = urem i64 41, [[TMP3]]
; CHECK-INTERLEAVED-NEXT: [[N_VEC:%.*]] = sub i64 41, [[N_MOD_VF]]
; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -1651,7 +1651,7 @@ define i64 @dotp_cost_disagreement(ptr %a, ptr %b) #0 {
; CHECK-MAXBW-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-MAXBW: vector.ph:
; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 16
+; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 4
; CHECK-MAXBW-NEXT: [[N_MOD_VF:%.*]] = urem i64 41, [[TMP3]]
; CHECK-MAXBW-NEXT: [[N_VEC:%.*]] = sub i64 41, [[N_MOD_VF]]
; CHECK-MAXBW-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -2145,7 +2145,7 @@ define dso_local i32 @not_dotp_vscale1(ptr %a, ptr %b, i32 %n, i64 %cost) #0 {
; CHECK-INTERLEAVE1-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-INTERLEAVE1: vector.ph:
; CHECK-INTERLEAVE1-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-INTERLEAVE1-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 16
+; CHECK-INTERLEAVE1-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 4
; CHECK-INTERLEAVE1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], [[TMP4]]
; CHECK-INTERLEAVE1-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
; CHECK-INTERLEAVE1-NEXT: [[TMP7:%.*]] = trunc i64 [[N_VEC]] to i32
@@ -2186,8 +2186,8 @@ define dso_local i32 @not_dotp_vscale1(ptr %a, ptr %b, i32 %n, i64 %cost) #0 {
; CHECK-INTERLEAVED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-INTERLEAVED: vector.ph:
; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP3]], 16
-; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP10]], 2
+; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = shl nuw i64 [[TMP3]], 4
+; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP10]], 1
; CHECK-INTERLEAVED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], [[TMP4]]
; CHECK-INTERLEAVED-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
; CHECK-INTERLEAVED-NEXT: [[TMP7:%.*]] = trunc i64 [[N_VEC]] to i32
@@ -2238,7 +2238,7 @@ define dso_local i32 @not_dotp_vscale1(ptr %a, ptr %b, i32 %n, i64 %cost) #0 {
; CHECK-MAXBW-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-MAXBW: vector.ph:
; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-MAXBW-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 16
+; CHECK-MAXBW-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 4
; CHECK-MAXBW-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], [[TMP4]]
; CHECK-MAXBW-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
; CHECK-MAXBW-NEXT: [[TMP7:%.*]] = trunc i64 [[N_VEC]] to i32
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-incomplete-chains.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-incomplete-chains.ll
index 2060168c531fb..e37f5229f0a16 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-incomplete-chains.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-incomplete-chains.ll
@@ -175,7 +175,7 @@ define void @chained_sext_adds(ptr noalias %src, ptr noalias %dst) #0 {
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
+; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1000, [[TMP3]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1000, [[N_MOD_VF]]
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-sub.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-sub.ll
index 39d6cac453012..02ba48fb0371c 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-sub.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-sub.ll
@@ -13,7 +13,7 @@ define i32 @dotp(ptr %a, ptr %b) #0 {
; CHECK-INTERLEAVE1-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK-INTERLEAVE1: vector.ph:
; CHECK-INTERLEAVE1-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-INTERLEAVE1-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 16
+; CHECK-INTERLEAVE1-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 4
; CHECK-INTERLEAVE1-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]]
; CHECK-INTERLEAVE1-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; CHECK-INTERLEAVE1-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -44,8 +44,8 @@ define i32 @dotp(ptr %a, ptr %b) #0 {
; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK-INTERLEAVED: vector.ph:
; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP0]], 16
-; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP4]], 2
+; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP0]], 4
+; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP4]], 1
; CHECK-INTERLEAVED-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]]
; CHECK-INTERLEAVED-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -87,7 +87,7 @@ define i32 @dotp(ptr %a, ptr %b) #0 {
; CHECK-MAXBW-NEXT: br label [[ENTRY:%.*]]
; CHECK-MAXBW: vector.ph:
; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 16
+; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 4
; CHECK-MAXBW-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
; CHECK-MAXBW-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; CHECK-MAXBW-NEXT: br label [[FOR_BODY:%.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce.ll
index 04a2b65a9dcea..553d942d40d26 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce.ll
@@ -15,7 +15,7 @@ define i32 @zext_add_reduc_i8_i32_sve(ptr %a) #0 {
; CHECK-INTERLEAVE1-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK-INTERLEAVE1: vector.ph:
; CHECK-INTERLEAVE1-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-INTERLEAVE1-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP2]], 16
+; CHECK-INTERLEAVE1-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP2]], 4
; CHECK-INTERLEAVE1-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP1]]
; CHECK-INTERLEAVE1-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]]
; CHECK-INTERLEAVE1-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -41,8 +41,8 @@ define i32 @zext_add_reduc_i8_i32_sve(ptr %a) #0 {
; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK-INTERLEAVED: vector.ph:
; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP2]], 16
-; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP4]], 2
+; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP2]], 4
+; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP4]], 1
; CHECK-INTERLEAVED-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP1]]
; CHECK-INTERLEAVED-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]]
; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -74,7 +74,7 @@ define i32 @zext_add_reduc_i8_i32_sve(ptr %a) #0 {
; CHECK-MAXBW-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK-MAXBW: vector.ph:
; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 16
+; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 4
; CHECK-MAXBW-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]]
; CHECK-MAXBW-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]]
; CHECK-MAXBW-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -208,7 +208,7 @@ define i64 @zext_add_reduc_i8_i64(ptr %a) #0 {
; CHECK-INTERLEAVE1-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK-INTERLEAVE1: vector.ph:
; CHECK-INTERLEAVE1-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-INTERLEAVE1-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP0]], 16
+; CHECK-INTERLEAVE1-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP0]], 4
; CHECK-INTERLEAVE1-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP2]]
; CHECK-INTERLEAVE1-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]]
; CHECK-INTERLEAVE1-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -234,8 +234,8 @@ define i64 @zext_add_reduc_i8_i64(ptr %a) #0 {
; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK-INTERLEAVED: vector.ph:
; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP0]], 16
-; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP4]], 2
+; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP0]], 4
+; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP4]], 1
; CHECK-INTERLEAVED-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP2]]
; CHECK-INTERLEAVED-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]]
; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -267,7 +267,7 @@ define i64 @zext_add_reduc_i8_i64(ptr %a) #0 {
; CHECK-MAXBW-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK-MAXBW: vector.ph:
; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 16
+; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 4
; CHECK-MAXBW-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]]
; CHECK-MAXBW-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]]
; CHECK-MAXBW-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -313,7 +313,7 @@ define i64 @zext_add_reduc_i16_i64(ptr %a) #0 {
; CHECK-INTERLEAVE1-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK-INTERLEAVE1: vector.ph:
; CHECK-INTERLEAVE1-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-INTERLEAVE1-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP0]], 8
+; CHECK-INTERLEAVE1-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP0]], 3
; CHECK-INTERLEAVE1-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP2]]
; CHECK-INTERLEAVE1-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]]
; CHECK-INTERLEAVE1-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -339,8 +339,8 @@ define i64 @zext_add_reduc_i16_i64(ptr %a) #0 {
; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK-INTERLEAVED: vector.ph:
; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP0]], 8
-; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP4]], 2
+; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP0]], 3
+; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP4]], 1
; CHECK-INTERLEAVED-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP2]]
; CHECK-INTERLEAVED-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]]
; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -372,7 +372,7 @@ define i64 @zext_add_reduc_i16_i64(ptr %a) #0 {
; CHECK-MAXBW-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK-MAXBW: vector.ph:
; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8
+; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 3
; CHECK-MAXBW-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]]
; CHECK-MAXBW-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]]
; CHECK-MAXBW-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -519,7 +519,7 @@ define i32 @zext_add_reduc_i8_i32_predicated(ptr %a) #0 {
; CHECK-INTERLEAVE1-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK-INTERLEAVE1: vector.ph:
; CHECK-INTERLEAVE1-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-INTERLEAVE1-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 16
+; CHECK-INTERLEAVE1-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 4
; CHECK-INTERLEAVE1-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 0, i64 1025)
; CHECK-INTERLEAVE1-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK-INTERLEAVE1: vector.body:
@@ -548,7 +548,7 @@ define i32 @zext_add_reduc_i8_i32_predicated(ptr %a) #0 {
; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK-INTERLEAVED: vector.ph:
; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 16
+; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 4
; CHECK-INTERLEAVED-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 0, i64 1025)
; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK-INTERLEAVED: vector.body:
@@ -577,7 +577,7 @@ define i32 @zext_add_reduc_i8_i32_predicated(ptr %a) #0 {
; CHECK-MAXBW-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK-MAXBW: vector.ph:
; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 16
+; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 4
; CHECK-MAXBW-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 0, i64 1025)
; CHECK-MAXBW-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK-MAXBW: vector.body:
@@ -708,7 +708,7 @@ define i32 @zext_sub_reduc_i8_i32_has_neon_dotprod(ptr %a) #1 {
; CHECK-INTERLEAVE1-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK-INTERLEAVE1: vector.ph:
; CHECK-INTERLEAVE1-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-INTERLEAVE1-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP0]], 8
+; CHECK-INTERLEAVE1-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP0]], 3
; CHECK-INTERLEAVE1-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP2]]
; CHECK-INTERLEAVE1-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]]
; CHECK-INTERLEAVE1-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -734,8 +734,8 @@ define i32 @zext_sub_reduc_i8_i32_has_neon_dotprod(ptr %a) #1 {
; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK-INTERLEAVED: vector.ph:
; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP0]], 8
-; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP4]], 4
+; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP0]], 3
+; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP4]], 2
; CHECK-INTERLEAVED-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP2]]
; CHECK-INTERLEAVED-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]]
; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -746,7 +746,7 @@ define i32 @zext_sub_reduc_i8_i32_has_neon_dotprod(ptr %a) #1 {
; CHECK-INTERLEAVED-NEXT: [[VEC_PHI2:%.*]] = phi <vscale x 8 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP18:%.*]], [[VECTOR_BODY]] ]
; CHECK-INTERLEAVED-NEXT: [[VEC_PHI3:%.*]] = phi <vscale x 8 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP19:%.*]], [[VECTOR_BODY]] ]
; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]]
-; CHECK-INTERLEAVED-NEXT: [[TMP7:%.*]] = mul nuw nsw i64 [[TMP4]], 2
+; CHECK-INTERLEAVED-NEXT: [[TMP7:%.*]] = shl nuw nsw i64 [[TMP4]], 1
; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = mul nuw nsw i64 [[TMP4]], 3
; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP1]], i64 [[TMP4]]
; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[TMP1]], i64 [[TMP7]]
@@ -781,7 +781,7 @@ define i32 @zext_sub_reduc_i8_i32_has_neon_dotprod(ptr %a) #1 {
; CHECK-MAXBW-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK-MAXBW: vector.ph:
; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8
+; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 3
; CHECK-MAXBW-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]]
; CHECK-MAXBW-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]]
; CHECK-MAXBW-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -827,7 +827,7 @@ define i32 @sext_add_reduc_i8_i32(ptr %a) #0 {
; CHECK-INTERLEAVE1-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK-INTERLEAVE1: vector.ph:
; CHECK-INTERLEAVE1-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-INTERLEAVE1-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP0]], 16
+; CHECK-INTERLEAVE1-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP0]], 4
; CHECK-INTERLEAVE1-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP2]]
; CHECK-INTERLEAVE1-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]]
; CHECK-INTERLEAVE1-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -853,8 +853,8 @@ define i32 @sext_add_reduc_i8_i32(ptr %a) #0 {
; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK-INTERLEAVED: vector.ph:
; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP0]], 16
-; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP4]], 2
+; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP0]], 4
+; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP4]], 1
; CHECK-INTERLEAVED-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP2]]
; CHECK-INTERLEAVED-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]]
; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -886,7 +886,7 @@ define i32 @sext_add_reduc_i8_i32(ptr %a) #0 {
; CHECK-MAXBW-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK-MAXBW: vector.ph:
; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 16
+; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 4
; CHECK-MAXBW-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]]
; CHECK-MAXBW-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]]
; CHECK-MAXBW-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -937,7 +937,7 @@ define i32 @add_of_zext_outside_loop(i32 %a, ptr noalias %b, i8 %c, i32 %d) #0 {
; CHECK-INTERLEAVE1-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-INTERLEAVE1: vector.ph:
; CHECK-INTERLEAVE1-NEXT: [[TMP7:%.*]] = call i32 @llvm.vscale.i32()
-; CHECK-INTERLEAVE1-NEXT: [[TMP4:%.*]] = mul nuw i32 [[TMP7]], 16
+; CHECK-INTERLEAVE1-NEXT: [[TMP4:%.*]] = shl nuw i32 [[TMP7]], 4
; CHECK-INTERLEAVE1-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], [[TMP4]]
; CHECK-INTERLEAVE1-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[N_MOD_VF]]
; CHECK-INTERLEAVE1-NEXT: [[TMP1:%.*]] = add i32 [[D]], [[N_VEC]]
@@ -972,8 +972,8 @@ define i32 @add_of_zext_outside_loop(i32 %a, ptr noalias %b, i8 %c, i32 %d) #0 {
; CHECK-INTERLEAVED-NEXT: br i1 [[TMP11]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-INTERLEAVED: vector.ph:
; CHECK-INTERLEAVED-NEXT: [[TMP13:%.*]] = call i32 @llvm.vscale.i32()
-; CHECK-INTERLEAVED-NEXT: [[TMP16:%.*]] = mul nuw i32 [[TMP13]], 16
-; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = mul nuw i32 [[TMP16]], 2
+; CHECK-INTERLEAVED-NEXT: [[TMP16:%.*]] = shl nuw i32 [[TMP13]], 4
+; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = shl nuw i32 [[TMP16]], 1
; CHECK-INTERLEAVED-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP2]], [[TMP4]]
; CHECK-INTERLEAVED-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP2]], [[N_MOD_VF]]
; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = add i32 [[D]], [[N_VEC]]
@@ -1014,7 +1014,7 @@ define i32 @add_of_zext_outside_loop(i32 %a, ptr noalias %b, i8 %c, i32 %d) #0 {
; CHECK-MAXBW-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-MAXBW: vector.ph:
; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = call i32 @llvm.vscale.i32()
-; CHECK-MAXBW-NEXT: [[TMP4:%.*]] = mul nuw i32 [[TMP3]], 16
+; CHECK-MAXBW-NEXT: [[TMP4:%.*]] = shl nuw i32 [[TMP3]], 4
; CHECK-MAXBW-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], [[TMP4]]
; CHECK-MAXBW-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[N_MOD_VF]]
; CHECK-MAXBW-NEXT: [[TMP7:%.*]] = add i32 [[D]], [[N_VEC]]
@@ -1068,7 +1068,7 @@ define i32 @add_of_loop_invariant_zext(i32 %a, ptr %b, i8 %c, i32 %d) #0 {
; CHECK-INTERLEAVE1-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-INTERLEAVE1: vector.ph:
; CHECK-INTERLEAVE1-NEXT: [[TMP3:%.*]] = call i32 @llvm.vscale.i32()
-; CHECK-INTERLEAVE1-NEXT: [[TMP9:%.*]] = mul nuw i32 [[TMP3]], 16
+; CHECK-INTERLEAVE1-NEXT: [[TMP9:%.*]] = shl nuw i32 [[TMP3]], 4
; CHECK-INTERLEAVE1-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], [[TMP9]]
; CHECK-INTERLEAVE1-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[N_MOD_VF]]
; CHECK-INTERLEAVE1-NEXT: [[TMP1:%.*]] = add i32 [[D]], [[N_VEC]]
@@ -1103,8 +1103,8 @@ define i32 @add_of_loop_invariant_zext(i32 %a, ptr %b, i8 %c, i32 %d) #0 {
; CHECK-INTERLEAVED-NEXT: br i1 [[TMP11]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-INTERLEAVED: vector.ph:
; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = call i32 @llvm.vscale.i32()
-; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = mul nuw i32 [[TMP3]], 16
-; CHECK-INTERLEAVED-NEXT: [[TMP12:%.*]] = mul nuw i32 [[TMP8]], 2
+; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = shl nuw i32 [[TMP3]], 4
+; CHECK-INTERLEAVED-NEXT: [[TMP12:%.*]] = shl nuw i32 [[TMP8]], 1
; CHECK-INTERLEAVED-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP2]], [[TMP12]]
; CHECK-INTERLEAVED-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP2]], [[N_MOD_VF]]
; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = add i32 [[D]], [[N_VEC]]
@@ -1145,7 +1145,7 @@ define i32 @add_of_loop_invariant_zext(i32 %a, ptr %b, i8 %c, i32 %d) #0 {
; CHECK-MAXBW-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-MAXBW: vector.ph:
; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = call i32 @llvm.vscale.i32()
-; CHECK-MAXBW-NEXT: [[TMP4:%.*]] = mul nuw i32 [[TMP3]], 16
+; CHECK-MAXBW-NEXT: [[TMP4:%.*]] = shl nuw i32 [[TMP3]], 4
; CHECK-MAXBW-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], [[TMP4]]
; CHECK-MAXBW-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[N_MOD_VF]]
; CHECK-MAXBW-NEXT: [[TMP7:%.*]] = add i32 [[D]], [[N_VEC]]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll b/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll
index fd560de78a6d0..436ece5c7d770 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll
@@ -12,7 +12,7 @@ define void @test_invar_gep(ptr %dst) #0 {
; CHECK-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
+; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 100, [[TMP3]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 100, [[N_MOD_VF]]
; CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
@@ -56,10 +56,10 @@ define void @test_invar_gep(ptr %dst) #0 {
; IC2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; IC2: vector.ph:
; IC2-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; IC2-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP2]], 4
+; IC2-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP2]], 2
; IC2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP11]], i64 0
; IC2-NEXT: [[TMP21:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
-; IC2-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP11]], 2
+; IC2-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP11]], 1
; IC2-NEXT: [[N_MOD_VF:%.*]] = urem i64 100, [[TMP3]]
; IC2-NEXT: [[N_VEC:%.*]] = sub i64 100, [[N_MOD_VF]]
; IC2-NEXT: [[TMP5:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
@@ -120,7 +120,7 @@ define void @test_invar_gep_var_start(i64 %start, ptr %dst) #0 {
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4
+; CHECK-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 2
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], [[TMP4]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[START]], [[N_VEC]]
@@ -169,10 +169,10 @@ define void @test_invar_gep_var_start(i64 %start, ptr %dst) #0 {
; IC2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; IC2: vector.ph:
; IC2-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
-; IC2-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4
+; IC2-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 2
; IC2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP4]], i64 0
; IC2-NEXT: [[TMP9:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
-; IC2-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 2
+; IC2-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP4]], 1
; IC2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], [[TMP5]]
; IC2-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
; IC2-NEXT: [[TMP6:%.*]] = add i64 [[START]], [[N_VEC]]
@@ -238,7 +238,7 @@ define void @test_invar_gep_var_start_step_2(i64 %start, ptr %dst) #0 {
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 4
+; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 2
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], [[TMP6]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[N_VEC]], 2
@@ -248,9 +248,6 @@ define void @test_invar_gep_var_start_step_2(i64 %start, ptr %dst) #0 {
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP18:%.*]] = mul nsw <vscale x 4 x i64> [[TMP10]], splat (i64 2)
; CHECK-NEXT: [[INDUCTION:%.*]] = add nsw <vscale x 4 x i64> [[DOTSPLAT]], [[TMP18]]
-; CHECK-NEXT: [[TMP11:%.*]] = mul nsw i64 2, [[TMP6]]
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP11]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT1]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -261,7 +258,7 @@ define void @test_invar_gep_var_start_step_2(i64 %start, ptr %dst) #0 {
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <vscale x 4 x i64> [[TMP12]], i32 [[TMP15]]
; CHECK-NEXT: store i64 [[TMP16]], ptr [[DST:%.*]], align 1
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]]
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <vscale x 4 x i64> [[TMP12]], [[BROADCAST_SPLAT2]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <vscale x 4 x i64> [[TMP12]], splat (i64 4)
; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
; CHECK: middle.block:
@@ -291,15 +288,15 @@ define void @test_invar_gep_var_start_step_2(i64 %start, ptr %dst) #0 {
; IC2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; IC2: vector.ph:
; IC2-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
-; IC2-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 4
+; IC2-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 2
; IC2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP6]], i64 0
; IC2-NEXT: [[BROADCAST_SPLAT1:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
-; IC2-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 2
+; IC2-NEXT: [[TMP7:%.*]] = shl nuw i64 [[TMP6]], 1
; IC2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], [[TMP7]]
; IC2-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
; IC2-NEXT: [[TMP10:%.*]] = mul i64 [[N_VEC]], 2
; IC2-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[START]], [[TMP10]]
-; IC2-NEXT: [[TMP13:%.*]] = mul <vscale x 4 x i64> [[BROADCAST_SPLAT1]], splat (i64 2)
+; IC2-NEXT: [[TMP13:%.*]] = shl <vscale x 4 x i64> [[BROADCAST_SPLAT1]], splat (i64 1)
; IC2-NEXT: [[TMP11:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
; IC2-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[START]], i64 0
; IC2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT1]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll b/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll
index 6f90cbc3d8f51..4d02da05dee95 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll
@@ -56,7 +56,7 @@ define i32 @chained_recurrences(i32 %x, i64 %y, ptr %src.1, i32 %z, ptr %src.2)
; VSCALEFORTUNING2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; VSCALEFORTUNING2: [[VECTOR_PH]]:
; VSCALEFORTUNING2-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
-; VSCALEFORTUNING2-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 8
+; VSCALEFORTUNING2-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 3
; VSCALEFORTUNING2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], [[TMP4]]
; VSCALEFORTUNING2-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
; VSCALEFORTUNING2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[X]], i64 0
@@ -179,7 +179,7 @@ define i32 @chained_recurrences(i32 %x, i64 %y, ptr %src.1, i32 %z, ptr %src.2)
; PRED-NEXT: br label %[[VECTOR_PH:.*]]
; PRED: [[VECTOR_PH]]:
; PRED-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
-; PRED-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 4
+; PRED-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP1]], 2
; PRED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[X]], i64 0
; PRED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT1]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
; PRED-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
@@ -294,8 +294,8 @@ define i16 @reduce_udiv(ptr %src, i16 %x, i64 %N) #0 {
; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; DEFAULT: [[VECTOR_PH]]:
; DEFAULT-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
-; DEFAULT-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP3]], 4
-; DEFAULT-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP5]], 2
+; DEFAULT-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP3]], 2
+; DEFAULT-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP5]], 1
; DEFAULT-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], [[TMP4]]
; DEFAULT-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i16> poison, i16 [[X]], i64 0
@@ -354,8 +354,8 @@ define i16 @reduce_udiv(ptr %src, i16 %x, i64 %N) #0 {
; VSCALEFORTUNING2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]]
; VSCALEFORTUNING2: [[VECTOR_PH]]:
; VSCALEFORTUNING2-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
-; VSCALEFORTUNING2-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP3]], 4
-; VSCALEFORTUNING2-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP7]], 2
+; VSCALEFORTUNING2-NEXT: [[TMP7:%.*]] = shl nuw i64 [[TMP3]], 2
+; VSCALEFORTUNING2-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP7]], 1
; VSCALEFORTUNING2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], [[TMP4]]
; VSCALEFORTUNING2-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
; VSCALEFORTUNING2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i16> poison, i16 [[X]], i64 0
@@ -388,7 +388,7 @@ define i16 @reduce_udiv(ptr %src, i16 %x, i64 %N) #0 {
; VSCALEFORTUNING2-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
; VSCALEFORTUNING2-NEXT: [[BC_MERGE_RDX:%.*]] = phi i16 [ [[TMP18]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
; VSCALEFORTUNING2-NEXT: [[TMP19:%.*]] = call i64 @llvm.vscale.i64()
-; VSCALEFORTUNING2-NEXT: [[TMP20:%.*]] = mul nuw i64 [[TMP19]], 2
+; VSCALEFORTUNING2-NEXT: [[TMP20:%.*]] = shl nuw i64 [[TMP19]], 1
; VSCALEFORTUNING2-NEXT: [[N_MOD_VF4:%.*]] = urem i64 [[TMP0]], [[TMP20]]
; VSCALEFORTUNING2-NEXT: [[N_VEC5:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF4]]
; VSCALEFORTUNING2-NEXT: [[TMP21:%.*]] = insertelement <vscale x 2 x i16> zeroinitializer, i16 [[BC_MERGE_RDX]], i32 0
@@ -434,7 +434,7 @@ define i16 @reduce_udiv(ptr %src, i16 %x, i64 %N) #0 {
; PRED-NEXT: br label %[[VECTOR_PH:.*]]
; PRED: [[VECTOR_PH]]:
; PRED-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
-; PRED-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 8
+; PRED-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP1]], 3
; PRED-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
; PRED-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP8]], 3
; PRED-NEXT: [[TMP10:%.*]] = sub i64 [[TMP0]], [[TMP9]]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll
index dd6f0fe5f1292..d41a737bf3610 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll
@@ -21,7 +21,7 @@ define void @test_no_scalarization(ptr %a, ptr noalias %b, i32 %idx, i32 %n) #0
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vscale.i32()
-; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i32 [[TMP4]], 2
+; CHECK-NEXT: [[TMP5:%.*]] = shl nuw i32 [[TMP4]], 1
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], [[TMP5]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF]]
; CHECK-NEXT: [[IND_END:%.*]] = add i32 [[IDX]], [[N_VEC]]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reduction-inloop-cond.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reduction-inloop-cond.ll
index 977713d389b4e..46c2051ce916d 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reduction-inloop-cond.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reduction-inloop-cond.ll
@@ -11,7 +11,7 @@ define float @cond_fadd(ptr noalias nocapture readonly %a, ptr noalias nocapture
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
+; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -93,7 +93,7 @@ define float @cond_cmp_sel(ptr noalias %a, ptr noalias %cond, i64 %N) {
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
+; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll
index fbac263819e4b..e3188b1c08c38 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll
@@ -39,7 +39,7 @@ define float @fadd_strict(ptr noalias nocapture readonly %a, i64 %n) #0 {
; CHECK-UNORDERED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-UNORDERED: vector.ph:
; CHECK-UNORDERED-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-UNORDERED-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8
+; CHECK-UNORDERED-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 3
; CHECK-UNORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
; CHECK-UNORDERED-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-UNORDERED-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -82,7 +82,7 @@ define float @fadd_strict(ptr noalias nocapture readonly %a, i64 %n) #0 {
; CHECK-ORDERED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-ORDERED: vector.ph:
; CHECK-ORDERED-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-ORDERED-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8
+; CHECK-ORDERED-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 3
; CHECK-ORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
; CHECK-ORDERED-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-ORDERED-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -121,7 +121,7 @@ define float @fadd_strict(ptr noalias nocapture readonly %a, i64 %n) #0 {
; CHECK-ORDERED-TF-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK-ORDERED-TF: vector.ph:
; CHECK-ORDERED-TF-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-ORDERED-TF-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8
+; CHECK-ORDERED-TF-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3
; CHECK-ORDERED-TF-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-ORDERED-TF-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 3
; CHECK-ORDERED-TF-NEXT: [[TMP4:%.*]] = sub i64 [[N]], [[TMP3]]
@@ -195,8 +195,8 @@ define float @fadd_strict_unroll(ptr noalias nocapture readonly %a, i64 %n) #0 {
; CHECK-UNORDERED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-UNORDERED: vector.ph:
; CHECK-UNORDERED-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-UNORDERED-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8
-; CHECK-UNORDERED-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4
+; CHECK-UNORDERED-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 3
+; CHECK-UNORDERED-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 2
; CHECK-UNORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP4]]
; CHECK-UNORDERED-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-UNORDERED-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -207,7 +207,7 @@ define float @fadd_strict_unroll(ptr noalias nocapture readonly %a, i64 %n) #0 {
; CHECK-UNORDERED-NEXT: [[VEC_PHI2:%.*]] = phi <vscale x 8 x float> [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP13:%.*]], [[VECTOR_BODY]] ]
; CHECK-UNORDERED-NEXT: [[VEC_PHI3:%.*]] = phi <vscale x 8 x float> [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[VECTOR_BODY]] ]
; CHECK-UNORDERED-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]]
-; CHECK-UNORDERED-NEXT: [[TMP6:%.*]] = mul nuw nsw i64 [[TMP3]], 2
+; CHECK-UNORDERED-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[TMP3]], 1
; CHECK-UNORDERED-NEXT: [[TMP7:%.*]] = mul nuw nsw i64 [[TMP3]], 3
; CHECK-UNORDERED-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i64 [[TMP3]]
; CHECK-UNORDERED-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i64 [[TMP6]]
@@ -256,8 +256,8 @@ define float @fadd_strict_unroll(ptr noalias nocapture readonly %a, i64 %n) #0 {
; CHECK-ORDERED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-ORDERED: vector.ph:
; CHECK-ORDERED-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-ORDERED-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8
-; CHECK-ORDERED-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4
+; CHECK-ORDERED-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 3
+; CHECK-ORDERED-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 2
; CHECK-ORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP4]]
; CHECK-ORDERED-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-ORDERED-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -265,7 +265,7 @@ define float @fadd_strict_unroll(ptr noalias nocapture readonly %a, i64 %n) #0 {
; CHECK-ORDERED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-ORDERED-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[VECTOR_BODY]] ]
; CHECK-ORDERED-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]]
-; CHECK-ORDERED-NEXT: [[TMP6:%.*]] = mul nuw nsw i64 [[TMP3]], 2
+; CHECK-ORDERED-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[TMP3]], 1
; CHECK-ORDERED-NEXT: [[TMP7:%.*]] = mul nuw nsw i64 [[TMP3]], 3
; CHECK-ORDERED-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i64 [[TMP3]]
; CHECK-ORDERED-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i64 [[TMP6]]
@@ -307,8 +307,8 @@ define float @fadd_strict_unroll(ptr noalias nocapture readonly %a, i64 %n) #0 {
; CHECK-ORDERED-TF-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK-ORDERED-TF: vector.ph:
; CHECK-ORDERED-TF-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-ORDERED-TF-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8
-; CHECK-ORDERED-TF-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 4
+; CHECK-ORDERED-TF-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3
+; CHECK-ORDERED-TF-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP1]], 2
; CHECK-ORDERED-TF-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-ORDERED-TF-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 5
; CHECK-ORDERED-TF-NEXT: [[TMP5:%.*]] = sub i64 [[N]], [[TMP4]]
@@ -336,7 +336,7 @@ define float @fadd_strict_unroll(ptr noalias nocapture readonly %a, i64 %n) #0 {
; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK8:%.*]] = phi <vscale x 8 x i1> [ [[ACTIVE_LANE_MASK_ENTRY5]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT14:%.*]], [[VECTOR_BODY]] ]
; CHECK-ORDERED-TF-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP27:%.*]], [[VECTOR_BODY]] ]
; CHECK-ORDERED-TF-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]]
-; CHECK-ORDERED-TF-NEXT: [[TMP15:%.*]] = mul nuw nsw i64 [[TMP1]], 2
+; CHECK-ORDERED-TF-NEXT: [[TMP15:%.*]] = shl nuw nsw i64 [[TMP1]], 1
; CHECK-ORDERED-TF-NEXT: [[TMP16:%.*]] = mul nuw nsw i64 [[TMP1]], 3
; CHECK-ORDERED-TF-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[TMP1]]
; CHECK-ORDERED-TF-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[TMP15]]
@@ -440,7 +440,7 @@ define void @fadd_strict_interleave(ptr noalias nocapture readonly %a, ptr noali
; CHECK-UNORDERED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-UNORDERED: vector.ph:
; CHECK-UNORDERED-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-UNORDERED-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 4
+; CHECK-UNORDERED-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 2
; CHECK-UNORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], [[TMP6]]
; CHECK-UNORDERED-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
; CHECK-UNORDERED-NEXT: [[TMP7:%.*]] = mul i64 [[N_VEC]], 2
@@ -508,7 +508,7 @@ define void @fadd_strict_interleave(ptr noalias nocapture readonly %a, ptr noali
; CHECK-ORDERED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-ORDERED: vector.ph:
; CHECK-ORDERED-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-ORDERED-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 4
+; CHECK-ORDERED-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 2
; CHECK-ORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], [[TMP6]]
; CHECK-ORDERED-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
; CHECK-ORDERED-NEXT: [[TMP7:%.*]] = mul i64 [[N_VEC]], 2
@@ -569,7 +569,7 @@ define void @fadd_strict_interleave(ptr noalias nocapture readonly %a, ptr noali
; CHECK-ORDERED-TF-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK-ORDERED-TF: vector.ph:
; CHECK-ORDERED-TF-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-ORDERED-TF-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4
+; CHECK-ORDERED-TF-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 2
; CHECK-ORDERED-TF-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-ORDERED-TF-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 2
; CHECK-ORDERED-TF-NEXT: [[TMP7:%.*]] = sub i64 [[TMP2]], [[TMP6]]
@@ -679,7 +679,7 @@ define float @fadd_of_sum(ptr noalias nocapture readonly %a, ptr noalias nocaptu
; CHECK-UNORDERED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-UNORDERED: vector.ph:
; CHECK-UNORDERED-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-UNORDERED-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4
+; CHECK-UNORDERED-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 2
; CHECK-UNORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP4]]
; CHECK-UNORDERED-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-UNORDERED-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -736,7 +736,7 @@ define float @fadd_of_sum(ptr noalias nocapture readonly %a, ptr noalias nocaptu
; CHECK-ORDERED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-ORDERED: vector.ph:
; CHECK-ORDERED-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-ORDERED-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4
+; CHECK-ORDERED-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 2
; CHECK-ORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP4]]
; CHECK-ORDERED-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-ORDERED-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -789,7 +789,7 @@ define float @fadd_of_sum(ptr noalias nocapture readonly %a, ptr noalias nocaptu
; CHECK-ORDERED-TF-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK-ORDERED-TF: vector.ph:
; CHECK-ORDERED-TF-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-ORDERED-TF-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 4
+; CHECK-ORDERED-TF-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP1]], 2
; CHECK-ORDERED-TF-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-ORDERED-TF-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 2
; CHECK-ORDERED-TF-NEXT: [[TMP5:%.*]] = sub i64 [[N]], [[TMP4]]
@@ -884,7 +884,7 @@ define float @fadd_conditional(ptr noalias nocapture readonly %a, ptr noalias no
; CHECK-UNORDERED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-UNORDERED: vector.ph:
; CHECK-UNORDERED-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-UNORDERED-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
+; CHECK-UNORDERED-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2
; CHECK-UNORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
; CHECK-UNORDERED-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-UNORDERED-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -939,7 +939,7 @@ define float @fadd_conditional(ptr noalias nocapture readonly %a, ptr noalias no
; CHECK-ORDERED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-ORDERED: vector.ph:
; CHECK-ORDERED-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-ORDERED-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
+; CHECK-ORDERED-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2
; CHECK-ORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
; CHECK-ORDERED-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-ORDERED-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -990,7 +990,7 @@ define float @fadd_conditional(ptr noalias nocapture readonly %a, ptr noalias no
; CHECK-ORDERED-TF-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK-ORDERED-TF: vector.ph:
; CHECK-ORDERED-TF-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-ORDERED-TF-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4
+; CHECK-ORDERED-TF-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2
; CHECK-ORDERED-TF-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-ORDERED-TF-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2
; CHECK-ORDERED-TF-NEXT: [[TMP4:%.*]] = sub i64 [[N]], [[TMP3]]
@@ -1084,7 +1084,7 @@ define float @fadd_multiple(ptr noalias nocapture %a, ptr noalias nocapture %b,
; CHECK-UNORDERED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-UNORDERED: vector.ph:
; CHECK-UNORDERED-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-UNORDERED-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8
+; CHECK-UNORDERED-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 3
; CHECK-UNORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
; CHECK-UNORDERED-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-UNORDERED-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -1219,8 +1219,8 @@ define float @fmuladd_strict(ptr %a, ptr %b, i64 %n) #0 {
; CHECK-UNORDERED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-UNORDERED: vector.ph:
; CHECK-UNORDERED-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-UNORDERED-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8
-; CHECK-UNORDERED-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4
+; CHECK-UNORDERED-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 3
+; CHECK-UNORDERED-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 2
; CHECK-UNORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP4]]
; CHECK-UNORDERED-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-UNORDERED-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -1231,7 +1231,7 @@ define float @fmuladd_strict(ptr %a, ptr %b, i64 %n) #0 {
; CHECK-UNORDERED-NEXT: [[VEC_PHI2:%.*]] = phi <vscale x 8 x float> [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ]
; CHECK-UNORDERED-NEXT: [[VEC_PHI3:%.*]] = phi <vscale x 8 x float> [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP18:%.*]], [[VECTOR_BODY]] ]
; CHECK-UNORDERED-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]]
-; CHECK-UNORDERED-NEXT: [[TMP6:%.*]] = mul nuw nsw i64 [[TMP3]], 2
+; CHECK-UNORDERED-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[TMP3]], 1
; CHECK-UNORDERED-NEXT: [[TMP7:%.*]] = mul nuw nsw i64 [[TMP3]], 3
; CHECK-UNORDERED-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i64 [[TMP3]]
; CHECK-UNORDERED-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i64 [[TMP6]]
@@ -1290,8 +1290,8 @@ define float @fmuladd_strict(ptr %a, ptr %b, i64 %n) #0 {
; CHECK-ORDERED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-ORDERED: vector.ph:
; CHECK-ORDERED-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-ORDERED-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8
-; CHECK-ORDERED-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4
+; CHECK-ORDERED-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 3
+; CHECK-ORDERED-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 2
; CHECK-ORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP4]]
; CHECK-ORDERED-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-ORDERED-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -1299,7 +1299,7 @@ define float @fmuladd_strict(ptr %a, ptr %b, i64 %n) #0 {
; CHECK-ORDERED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-ORDERED-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP22:%.*]], [[VECTOR_BODY]] ]
; CHECK-ORDERED-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]]
-; CHECK-ORDERED-NEXT: [[TMP6:%.*]] = mul nuw nsw i64 [[TMP3]], 2
+; CHECK-ORDERED-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[TMP3]], 1
; CHECK-ORDERED-NEXT: [[TMP7:%.*]] = mul nuw nsw i64 [[TMP3]], 3
; CHECK-ORDERED-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i64 [[TMP3]]
; CHECK-ORDERED-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i64 [[TMP6]]
@@ -1355,8 +1355,8 @@ define float @fmuladd_strict(ptr %a, ptr %b, i64 %n) #0 {
; CHECK-ORDERED-TF-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK-ORDERED-TF: vector.ph:
; CHECK-ORDERED-TF-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-ORDERED-TF-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8
-; CHECK-ORDERED-TF-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 4
+; CHECK-ORDERED-TF-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3
+; CHECK-ORDERED-TF-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP1]], 2
; CHECK-ORDERED-TF-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-ORDERED-TF-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 5
; CHECK-ORDERED-TF-NEXT: [[TMP5:%.*]] = sub i64 [[N]], [[TMP4]]
@@ -1384,7 +1384,7 @@ define float @fmuladd_strict(ptr %a, ptr %b, i64 %n) #0 {
; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK8:%.*]] = phi <vscale x 8 x i1> [ [[ACTIVE_LANE_MASK_ENTRY5]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT18:%.*]], [[VECTOR_BODY]] ]
; CHECK-ORDERED-TF-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP35:%.*]], [[VECTOR_BODY]] ]
; CHECK-ORDERED-TF-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]]
-; CHECK-ORDERED-TF-NEXT: [[TMP15:%.*]] = mul nuw nsw i64 [[TMP1]], 2
+; CHECK-ORDERED-TF-NEXT: [[TMP15:%.*]] = shl nuw nsw i64 [[TMP1]], 1
; CHECK-ORDERED-TF-NEXT: [[TMP16:%.*]] = mul nuw nsw i64 [[TMP1]], 3
; CHECK-ORDERED-TF-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[TMP1]]
; CHECK-ORDERED-TF-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[TMP15]]
@@ -1488,8 +1488,8 @@ define float @fmuladd_strict_fmf(ptr %a, ptr %b, i64 %n) #0 {
; CHECK-UNORDERED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-UNORDERED: vector.ph:
; CHECK-UNORDERED-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-UNORDERED-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8
-; CHECK-UNORDERED-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4
+; CHECK-UNORDERED-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 3
+; CHECK-UNORDERED-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 2
; CHECK-UNORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP4]]
; CHECK-UNORDERED-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-UNORDERED-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -1500,7 +1500,7 @@ define float @fmuladd_strict_fmf(ptr %a, ptr %b, i64 %n) #0 {
; CHECK-UNORDERED-NEXT: [[VEC_PHI2:%.*]] = phi <vscale x 8 x float> [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ]
; CHECK-UNORDERED-NEXT: [[VEC_PHI3:%.*]] = phi <vscale x 8 x float> [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP18:%.*]], [[VECTOR_BODY]] ]
; CHECK-UNORDERED-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]]
-; CHECK-UNORDERED-NEXT: [[TMP6:%.*]] = mul nuw nsw i64 [[TMP3]], 2
+; CHECK-UNORDERED-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[TMP3]], 1
; CHECK-UNORDERED-NEXT: [[TMP7:%.*]] = mul nuw nsw i64 [[TMP3]], 3
; CHECK-UNORDERED-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i64 [[TMP3]]
; CHECK-UNORDERED-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i64 [[TMP6]]
@@ -1559,8 +1559,8 @@ define float @fmuladd_strict_fmf(ptr %a, ptr %b, i64 %n) #0 {
; CHECK-ORDERED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-ORDERED: vector.ph:
; CHECK-ORDERED-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-ORDERED-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8
-; CHECK-ORDERED-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4
+; CHECK-ORDERED-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 3
+; CHECK-ORDERED-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 2
; CHECK-ORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP4]]
; CHECK-ORDERED-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-ORDERED-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -1568,7 +1568,7 @@ define float @fmuladd_strict_fmf(ptr %a, ptr %b, i64 %n) #0 {
; CHECK-ORDERED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-ORDERED-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP22:%.*]], [[VECTOR_BODY]] ]
; CHECK-ORDERED-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]]
-; CHECK-ORDERED-NEXT: [[TMP6:%.*]] = mul nuw nsw i64 [[TMP3]], 2
+; CHECK-ORDERED-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[TMP3]], 1
; CHECK-ORDERED-NEXT: [[TMP7:%.*]] = mul nuw nsw i64 [[TMP3]], 3
; CHECK-ORDERED-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i64 [[TMP3]]
; CHECK-ORDERED-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i64 [[TMP6]]
@@ -1624,8 +1624,8 @@ define float @fmuladd_strict_fmf(ptr %a, ptr %b, i64 %n) #0 {
; CHECK-ORDERED-TF-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK-ORDERED-TF: vector.ph:
; CHECK-ORDERED-TF-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-ORDERED-TF-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8
-; CHECK-ORDERED-TF-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 4
+; CHECK-ORDERED-TF-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3
+; CHECK-ORDERED-TF-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP1]], 2
; CHECK-ORDERED-TF-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-ORDERED-TF-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 5
; CHECK-ORDERED-TF-NEXT: [[TMP5:%.*]] = sub i64 [[N]], [[TMP4]]
@@ -1653,7 +1653,7 @@ define float @fmuladd_strict_fmf(ptr %a, ptr %b, i64 %n) #0 {
; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK8:%.*]] = phi <vscale x 8 x i1> [ [[ACTIVE_LANE_MASK_ENTRY5]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT18:%.*]], [[VECTOR_BODY]] ]
; CHECK-ORDERED-TF-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP35:%.*]], [[VECTOR_BODY]] ]
; CHECK-ORDERED-TF-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]]
-; CHECK-ORDERED-TF-NEXT: [[TMP15:%.*]] = mul nuw nsw i64 [[TMP1]], 2
+; CHECK-ORDERED-TF-NEXT: [[TMP15:%.*]] = shl nuw nsw i64 [[TMP1]], 1
; CHECK-ORDERED-TF-NEXT: [[TMP16:%.*]] = mul nuw nsw i64 [[TMP1]], 3
; CHECK-ORDERED-TF-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[TMP1]]
; CHECK-ORDERED-TF-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[TMP15]]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-struct-return.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-struct-return.ll
index 9a831690d632d..695ea713ed125 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-struct-return.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-struct-return.ll
@@ -12,7 +12,7 @@ define void @struct_return_f32_widen(ptr noalias %in, ptr noalias writeonly %out
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4
+; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2
; CHECK-NEXT: [[TMP4:%.*]] = sub i64 1024, [[TMP3]]
@@ -71,7 +71,7 @@ define void @struct_return_f64_widen(ptr noalias %in, ptr noalias writeonly %out
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 2
+; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 1
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 1
; CHECK-NEXT: [[TMP4:%.*]] = sub i64 1024, [[TMP3]]
@@ -148,7 +148,7 @@ define void @struct_return_f32_widen_rt_checks(ptr %in, ptr writeonly %out_a, pt
; CHECK-NEXT: br i1 [[CONFLICT_RDX6]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 4
+; CHECK-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP8]], 2
; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP10]], 2
; CHECK-NEXT: [[TMP12:%.*]] = sub i64 1024, [[TMP11]]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll b/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll
index 66211bd0353d4..032362ab05b15 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll
@@ -19,7 +19,7 @@ define i64 @same_exit_block_pre_inc_use1() #1 {
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 16
+; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 4
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 64, [[TMP3]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 64, [[N_MOD_VF]]
; CHECK-NEXT: [[TMP6:%.*]] = add i64 3, [[N_VEC]]
@@ -232,7 +232,7 @@ define i64 @loop_contains_safe_div() #1 {
; CHECK-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP10]], 4
+; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP10]], 2
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 64, [[TMP3]]
; CHECK-NEXT: [[INDEX1:%.*]] = sub i64 64, [[N_MOD_VF]]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX1]]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/single-early-exit-interleave.ll b/llvm/test/Transforms/LoopVectorize/AArch64/single-early-exit-interleave.ll
index cc03ef8af8b00..50ea180cd9798 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/single-early-exit-interleave.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/single-early-exit-interleave.ll
@@ -19,8 +19,8 @@ define i64 @same_exit_block_pre_inc_use1() #0 {
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP2]], 16
-; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP4]], 4
+; CHECK-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP2]], 4
+; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP4]], 2
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 510, [[TMP3]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 510, [[N_MOD_VF]]
; CHECK-NEXT: [[INDEX_NEXT:%.*]] = add i64 3, [[N_VEC]]
@@ -29,7 +29,7 @@ define i64 @same_exit_block_pre_inc_use1() #0 {
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT3:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX1]]
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[OFFSET_IDX]]
-; CHECK-NEXT: [[TMP27:%.*]] = mul nuw nsw i64 [[TMP4]], 2
+; CHECK-NEXT: [[TMP27:%.*]] = shl nuw nsw i64 [[TMP4]], 1
; CHECK-NEXT: [[TMP15:%.*]] = mul nuw nsw i64 [[TMP4]], 3
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i64 [[TMP4]]
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i64 [[TMP27]]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll b/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll
index d3edad4010bb8..2615d0ad28411 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll
@@ -20,8 +20,8 @@ define void @cost_store_i8(ptr %dst) #0 {
; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
; DEFAULT: vector.ph:
; DEFAULT-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; DEFAULT-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP2]], 16
-; DEFAULT-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP5]], 2
+; DEFAULT-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP2]], 4
+; DEFAULT-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP5]], 1
; DEFAULT-NEXT: [[N_MOD_VF:%.*]] = urem i64 101, [[TMP3]]
; DEFAULT-NEXT: [[N_VEC:%.*]] = sub i64 101, [[N_MOD_VF]]
; DEFAULT-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -43,7 +43,7 @@ define void @cost_store_i8(ptr %dst) #0 {
; DEFAULT: vec.epilog.ph:
; DEFAULT-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
; DEFAULT-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64()
-; DEFAULT-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 4
+; DEFAULT-NEXT: [[TMP12:%.*]] = shl nuw i64 [[TMP11]], 2
; DEFAULT-NEXT: [[N_MOD_VF2:%.*]] = urem i64 101, [[TMP12]]
; DEFAULT-NEXT: [[N_VEC3:%.*]] = sub i64 101, [[N_MOD_VF2]]
; DEFAULT-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
@@ -76,7 +76,7 @@ define void @cost_store_i8(ptr %dst) #0 {
; PRED-NEXT: br label [[VECTOR_PH:%.*]]
; PRED: vector.ph:
; PRED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; PRED-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 16
+; PRED-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 4
; PRED-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
; PRED-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 4
; PRED-NEXT: [[TMP4:%.*]] = sub i64 101, [[TMP3]]
@@ -133,8 +133,8 @@ define void @trunc_store(ptr %dst, ptr %src, i16 %x) #1 {
; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
; DEFAULT: vector.ph:
; DEFAULT-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; DEFAULT-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP2]], 16
-; DEFAULT-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP16]], 2
+; DEFAULT-NEXT: [[TMP16:%.*]] = shl nuw i64 [[TMP2]], 4
+; DEFAULT-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP16]], 1
; DEFAULT-NEXT: [[N_MOD_VF:%.*]] = urem i64 1000, [[TMP3]]
; DEFAULT-NEXT: [[N_VEC:%.*]] = sub i64 1000, [[N_MOD_VF]]
; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i16> poison, i16 [[X]], i64 0
@@ -211,7 +211,7 @@ define void @trunc_store(ptr %dst, ptr %src, i16 %x) #1 {
; PRED-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; PRED: vector.ph:
; PRED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; PRED-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 16
+; PRED-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 4
; PRED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i16> poison, i16 [[X]], i64 0
; PRED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 16 x i16> [[BROADCAST_SPLATINSERT]], <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer
; PRED-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 0, i64 1000)
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-inloop-reductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-inloop-reductions.ll
index 78bd554210c6b..b9bbc272fc63a 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-inloop-reductions.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-inloop-reductions.ll
@@ -16,8 +16,8 @@ define i64 @int_reduction_and(ptr noalias nocapture %a, i64 %N) {
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP2]], 2
-; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP4]], 2
+; CHECK-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP2]], 1
+; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP4]], 1
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-reductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-reductions.ll
index 6d80887cdbecb..347ebeadb40a3 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-reductions.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-reductions.ll
@@ -16,8 +16,8 @@ define i64 @int_reduction_add(ptr %a, i64 %N) {
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP2]], 2
-; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP4]], 2
+; CHECK-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP2]], 1
+; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP4]], 1
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-strict-reductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-strict-reductions.ll
index 6724384bdd227..9d636067c59aa 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-strict-reductions.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-strict-reductions.ll
@@ -16,8 +16,8 @@ define float @fadd_strict(ptr noalias nocapture readonly %a, i64 %n) {
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP2]], 4
-; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP4]], 2
+; CHECK-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP2]], 2
+; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP4]], 1
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll
index 5a7caf1713d73..83e4ca74e56fe 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll
@@ -27,8 +27,8 @@ define void @main_vf_vscale_x_16(ptr %A) #0 {
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP2]], 16
-; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP5]], 2
+; CHECK-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP2]], 4
+; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP5]], 1
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -59,8 +59,8 @@ define void @main_vf_vscale_x_16(ptr %A) #0 {
; CHECK-VF8-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-VF8: vector.ph:
; CHECK-VF8-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-VF8-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP2]], 16
-; CHECK-VF8-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP5]], 2
+; CHECK-VF8-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP2]], 4
+; CHECK-VF8-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP5]], 1
; CHECK-VF8-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
; CHECK-VF8-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; CHECK-VF8-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -117,8 +117,8 @@ define void @main_vf_vscale_x_2_no_epi_iteration(ptr %A) #0 vscale_range(8, 8) {
; CHECK-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP0]], 2
-; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP3]], 2
+; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP0]], 1
+; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP3]], 1
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -148,8 +148,8 @@ define void @main_vf_vscale_x_2_no_epi_iteration(ptr %A) #0 vscale_range(8, 8) {
; CHECK-VF8-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-VF8: vector.ph:
; CHECK-VF8-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-VF8-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP2]], 2
-; CHECK-VF8-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP5]], 2
+; CHECK-VF8-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP2]], 1
+; CHECK-VF8-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP5]], 1
; CHECK-VF8-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
; CHECK-VF8-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; CHECK-VF8-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -226,8 +226,8 @@ define void @main_vf_vscale_x_2(ptr %A, i64 %n) #0 vscale_range(8, 8) {
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP2]], 2
-; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP5]], 2
+; CHECK-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP2]], 1
+; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP5]], 1
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -277,8 +277,8 @@ define void @main_vf_vscale_x_2(ptr %A, i64 %n) #0 vscale_range(8, 8) {
; CHECK-VF8-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-VF8: vector.ph:
; CHECK-VF8-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-VF8-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP2]], 2
-; CHECK-VF8-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP5]], 2
+; CHECK-VF8-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP2]], 1
+; CHECK-VF8-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP5]], 1
; CHECK-VF8-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
; CHECK-VF8-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-VF8-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -345,8 +345,8 @@ define void @test_pr57912_pointer_induction(ptr %start) #0 {
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP2]], 16
-; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP4]], 2
+; CHECK-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP2]], 4
+; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP4]], 1
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 10000, [[TMP3]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 10000, [[N_MOD_VF]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -395,8 +395,8 @@ define void @test_pr57912_pointer_induction(ptr %start) #0 {
; CHECK-VF8-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-VF8: vector.ph:
; CHECK-VF8-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-VF8-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP2]], 16
-; CHECK-VF8-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP4]], 2
+; CHECK-VF8-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP2]], 4
+; CHECK-VF8-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP4]], 1
; CHECK-VF8-NEXT: [[N_MOD_VF:%.*]] = urem i64 10000, [[TMP3]]
; CHECK-VF8-NEXT: [[N_VEC:%.*]] = sub i64 10000, [[N_MOD_VF]]
; CHECK-VF8-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -465,8 +465,8 @@ define void @trip_count_vscale(ptr noalias %a, ptr noalias %b) vscale_range(1, 1
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP2]], 4
-; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP5]], 2
+; CHECK-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP2]], 2
+; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP5]], 1
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -524,8 +524,8 @@ define void @trip_count_vscale(ptr noalias %a, ptr noalias %b) vscale_range(1, 1
; CHECK-VF8-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK-VF8: vector.ph:
; CHECK-VF8-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-VF8-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP0]], 4
-; CHECK-VF8-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP3]], 2
+; CHECK-VF8-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP0]], 2
+; CHECK-VF8-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP3]], 1
; CHECK-VF8-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP1]]
; CHECK-VF8-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-VF8-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -583,8 +583,8 @@ define void @trip_count_vscale_no_epilogue_iterations(ptr noalias %a, ptr noalia
; CHECK-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP0]], 4
-; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP3]], 2
+; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP0]], 2
+; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP3]], 1
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP1]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -619,8 +619,8 @@ define void @trip_count_vscale_no_epilogue_iterations(ptr noalias %a, ptr noalia
; CHECK-VF8-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK-VF8: vector.ph:
; CHECK-VF8-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-VF8-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP0]], 4
-; CHECK-VF8-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP3]], 2
+; CHECK-VF8-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP0]], 2
+; CHECK-VF8-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP3]], 1
; CHECK-VF8-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP1]]
; CHECK-VF8-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-VF8-NEXT: br label [[VECTOR_BODY:%.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vscale-fixed.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vscale-fixed.ll
index 29c3244570eca..061f9dc551e22 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vscale-fixed.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vscale-fixed.ll
@@ -36,8 +36,8 @@ define void @main_vf_vscale_x_16(ptr %A, i64 %n) #0 {
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP2]], 16
-; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP5]], 2
+; CHECK-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP2]], 4
+; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP5]], 1
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -89,8 +89,8 @@ define void @main_vf_vscale_x_16(ptr %A, i64 %n) #0 {
; CHECK-EPILOG-PREFER-SCALABLE-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-EPILOG-PREFER-SCALABLE: vector.ph:
; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP4]], 16
-; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP7]], 2
+; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[TMP7:%.*]] = shl nuw i64 [[TMP4]], 4
+; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP7]], 1
; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP5]]
; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-EPILOG-PREFER-SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -112,7 +112,7 @@ define void @main_vf_vscale_x_16(ptr %A, i64 %n) #0 {
; CHECK-EPILOG-PREFER-SCALABLE: vec.epilog.ph:
; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[TMP14:%.*]] = mul nuw i64 [[TMP13]], 8
+; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[TMP14:%.*]] = shl nuw i64 [[TMP13]], 3
; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[N_MOD_VF2:%.*]] = urem i64 [[N]], [[TMP14]]
; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[N_VEC3:%.*]] = sub i64 [[N]], [[N_MOD_VF2]]
; CHECK-EPILOG-PREFER-SCALABLE-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-fneg.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-fneg.ll
index a766eb599875d..a5f5cbad0d949 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-fneg.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-fneg.ll
@@ -28,8 +28,8 @@ define void @fneg(ptr nocapture noundef writeonly %d, ptr nocapture noundef read
; CHECK-NEXT: br i1 [[DIFF_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP6]], 8
-; CHECK-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP8]], 2
+; CHECK-NEXT: [[TMP8:%.*]] = shl nuw i64 [[TMP6]], 3
+; CHECK-NEXT: [[TMP7:%.*]] = shl nuw i64 [[TMP8]], 1
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], [[TMP7]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions-unusual-types.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions-unusual-types.ll
index 9b8748b4eb7de..f80693f15a4af 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions-unusual-types.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions-unusual-types.ll
@@ -18,10 +18,10 @@ define void @induction_i7(ptr %dst) #0 {
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP40:%.*]] = mul nuw i64 [[TMP2]], 2
+; CHECK-NEXT: [[TMP40:%.*]] = shl nuw i64 [[TMP2]], 1
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP40]], i64 0
; CHECK-NEXT: [[DOTSPLAT_:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP40]], 2
+; CHECK-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP40]], 1
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 64, [[TMP5]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 64, [[N_MOD_VF]]
; CHECK-NEXT: [[IND_END:%.*]] = trunc i64 [[N_VEC]] to i7
@@ -84,10 +84,10 @@ define void @induction_i3_zext(ptr %dst) #0 {
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP40:%.*]] = mul nuw i64 [[TMP2]], 2
+; CHECK-NEXT: [[TMP40:%.*]] = shl nuw i64 [[TMP2]], 1
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP40]], i64 0
; CHECK-NEXT: [[DOTSPLAT_:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP40]], 2
+; CHECK-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP40]], 1
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 64, [[TMP5]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 64, [[N_MOD_VF]]
; CHECK-NEXT: [[IND_END:%.*]] = trunc i64 [[N_VEC]] to i3
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll
index 8935010e71676..97dba2c1eb5e2 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll
@@ -107,9 +107,6 @@ define void @test_array_load2_i16_store2(i32 %C, i32 %D) #1 {
; CHECK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT1]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw <vscale x 4 x i64> [[TMP2]], splat (i64 1)
-; CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[TMP0]], 3
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP5]], i64 0
-; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -128,7 +125,7 @@ define void @test_array_load2_i16_store2(i32 %C, i32 %D) #1 {
; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = call <vscale x 8 x i32> @llvm.vector.interleave2.nxv8i32(<vscale x 4 x i32> [[TMP10]], <vscale x 4 x i32> [[TMP12]])
; CHECK-NEXT: store <vscale x 8 x i32> [[INTERLEAVED_VEC]], ptr [[TMP14]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]]
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <vscale x 4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 512
; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: middle.block:
@@ -191,9 +188,6 @@ define void @test_array_load2_store2_i16(i32 noundef %C, i32 noundef %D) #1 {
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT1]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw <vscale x 4 x i64> [[TMP2]], splat (i64 1)
-; CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[TMP0]], 3
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP5]], i64 0
-; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -214,7 +208,7 @@ define void @test_array_load2_store2_i16(i32 noundef %C, i32 noundef %D) #1 {
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i16, ptr @CD_i16, <vscale x 4 x i64> [[TMP9]]
; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i16.nxv4p0(<vscale x 4 x i16> [[TMP14]], <vscale x 4 x ptr> align 2 [[TMP15]], <vscale x 4 x i1> splat (i1 true))
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]]
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <vscale x 4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 512
; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; CHECK: middle.block:
@@ -1112,9 +1106,6 @@ define void @PR27626_4(ptr %a, i32 %x, i32 %y, i32 %z, i64 %n) #1 {
; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT3]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP10:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
; CHECK-NEXT: [[TMP9:%.*]] = shl nuw nsw <vscale x 4 x i64> [[TMP10]], splat (i64 1)
-; CHECK-NEXT: [[TMP11:%.*]] = shl nuw nsw i64 [[TMP6]], 3
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP11]], i64 0
-; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -1125,7 +1116,7 @@ define void @PR27626_4(ptr %a, i32 %x, i32 %y, i32 %z, i64 %n) #1 {
; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = call <vscale x 8 x i32> @llvm.vector.interleave2.nxv8i32(<vscale x 4 x i32> [[BROADCAST_SPLAT2]], <vscale x 4 x i32> [[BROADCAST_SPLAT4]])
; CHECK-NEXT: store <vscale x 8 x i32> [[INTERLEAVED_VEC]], ptr [[P]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]]
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <vscale x 4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]]
; CHECK: middle.block:
@@ -1190,9 +1181,6 @@ define void @PR27626_5(ptr %a, i32 %x, i32 %y, i32 %z, i64 %n) #1 {
; CHECK-NEXT: [[TMP10:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
; CHECK-NEXT: [[TMP19:%.*]] = shl nuw nsw <vscale x 4 x i64> [[TMP10]], splat (i64 1)
; CHECK-NEXT: [[INDUCTION:%.*]] = add nuw nsw <vscale x 4 x i64> [[TMP19]], splat (i64 3)
-; CHECK-NEXT: [[TMP12:%.*]] = shl nuw nsw i64 [[TMP7]], 3
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP12]], i64 0
-; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -1206,7 +1194,7 @@ define void @PR27626_5(ptr %a, i32 %x, i32 %y, i32 %z, i64 %n) #1 {
; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[BROADCAST_SPLAT2]], <vscale x 4 x ptr> align 4 [[TMP17]], <vscale x 4 x i1> splat (i1 true))
; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[BROADCAST_SPLAT4]], <vscale x 4 x ptr> align 4 [[TMP15]], <vscale x 4 x i1> splat (i1 true))
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]]
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <vscale x 4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]]
; CHECK: middle.block:
@@ -1274,9 +1262,6 @@ define void @PR34743(ptr %a, ptr %b, i64 %n) #1 {
; CHECK-NEXT: [[IND_END:%.*]] = shl i64 [[N_VEC]], 1
; CHECK-NEXT: [[TMP14:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
; CHECK-NEXT: [[TMP15:%.*]] = shl nuw nsw <vscale x 4 x i64> [[TMP14]], splat (i64 1)
-; CHECK-NEXT: [[TMP17:%.*]] = shl nuw nsw i64 [[TMP9]], 3
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP17]], i64 0
-; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP33:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-NEXT: [[TMP16:%.*]] = shl nuw nsw i32 [[TMP33]], 2
; CHECK-NEXT: [[TMP34:%.*]] = add nsw i32 [[TMP16]], -1
@@ -1301,7 +1286,7 @@ define void @PR34743(ptr %a, ptr %b, i64 %n) #1 {
; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]]
; CHECK-NEXT: store <vscale x 4 x i32> [[TMP27]], ptr [[TMP28]], align 4, !alias.scope [[META37:![0-9]+]], !noalias [[META34]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]]
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <vscale x 4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP29]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP39:![0-9]+]]
; CHECK: middle.block:
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-store.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-store.ll
index 9c3f3f74e4196..e2ac5225fe0fc 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-store.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-store.ll
@@ -12,7 +12,7 @@ define void @inv_store_i16(ptr noalias %dst, ptr noalias readonly %src, i64 %N)
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
+; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -59,7 +59,7 @@ define void @cond_inv_store_i32(ptr noalias %dst, ptr noalias readonly %src, i64
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
+; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x ptr> poison, ptr [[DST:%.*]], i64 0
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-live-out-pointer-induction.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-live-out-pointer-induction.ll
index 10a099e4f8ec0..a5ac999720e99 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-live-out-pointer-induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-live-out-pointer-induction.ll
@@ -16,8 +16,8 @@ define ptr @test(ptr %start.1, ptr %start.2, ptr %end) {
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP6]], 2
-; CHECK-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP10]], 2
+; CHECK-NEXT: [[TMP10:%.*]] = shl nuw i64 [[TMP6]], 1
+; CHECK-NEXT: [[TMP7:%.*]] = shl nuw i64 [[TMP10]], 1
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], [[TMP7]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[N_VEC]], 8
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-low-trip-count.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-low-trip-count.ll
index 8c62ffd9c7a98..96fb60b893a66 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-low-trip-count.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-low-trip-count.ll
@@ -10,7 +10,7 @@ define void @trip7_i64(ptr noalias nocapture noundef %dst, ptr noalias nocapture
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 2
+; CHECK-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 1
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 7)
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-multiexit.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-multiexit.ll
index 809da06a9a4ac..ad85147a03b42 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-multiexit.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-multiexit.ll
@@ -28,8 +28,8 @@ define void @multiple_exits_unique_exit_block(ptr %A, ptr %B, i32 %N) #0 {
; CHECK-NEXT: br i1 [[DIFF_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vscale.i32()
-; CHECK-NEXT: [[TMP11:%.*]] = mul nuw i32 [[TMP7]], 4
-; CHECK-NEXT: [[TMP8:%.*]] = mul nuw i32 [[TMP11]], 2
+; CHECK-NEXT: [[TMP11:%.*]] = shl nuw i32 [[TMP7]], 2
+; CHECK-NEXT: [[TMP8:%.*]] = shl nuw i32 [[TMP11]], 1
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], [[TMP8]]
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[N_MOD_VF]], 0
; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 [[N_MOD_VF]]
@@ -96,8 +96,8 @@ define i32 @multiple_exits_multiple_exit_blocks(ptr %A, ptr %B, i32 %N) #0 {
; CHECK-NEXT: br i1 [[DIFF_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vscale.i32()
-; CHECK-NEXT: [[TMP11:%.*]] = mul nuw i32 [[TMP7]], 4
-; CHECK-NEXT: [[TMP8:%.*]] = mul nuw i32 [[TMP11]], 2
+; CHECK-NEXT: [[TMP11:%.*]] = shl nuw i32 [[TMP7]], 2
+; CHECK-NEXT: [[TMP8:%.*]] = shl nuw i32 [[TMP11]], 1
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], [[TMP8]]
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[N_MOD_VF]], 0
; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 [[N_MOD_VF]]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-predicated-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-predicated-costs.ll
index b373f0eb390ed..cd78aee4c7491 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-predicated-costs.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-predicated-costs.ll
@@ -71,8 +71,8 @@ define void @always_taken(ptr noalias %p0, ptr noalias %p1, i1 %c0, i1 %c1) {
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vscale.i32()
-; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i32 [[TMP4]], 2
-; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i32 [[TMP3]], 2
+; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i32 [[TMP4]], 1
+; CHECK-NEXT: [[TMP5:%.*]] = shl nuw i32 [[TMP3]], 1
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 1024, [[TMP5]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 1024, [[N_MOD_VF]]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i1> poison, i1 [[C1]], i64 0
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-runtime-check-size-based-threshold.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-runtime-check-size-based-threshold.ll
index da203b9d08a16..9ff193b36c008 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-runtime-check-size-based-threshold.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-runtime-check-size-based-threshold.ll
@@ -43,8 +43,8 @@ define void @min_trip_count_due_to_runtime_checks_1(ptr %dst.1, ptr %dst.2, ptr
; CHECK-NEXT: br i1 [[CONFLICT_RDX11]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP17:%.*]] = mul nuw i64 [[TMP15]], 2
-; CHECK-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP17]], 2
+; CHECK-NEXT: [[TMP17:%.*]] = shl nuw i64 [[TMP15]], 1
+; CHECK-NEXT: [[TMP16:%.*]] = shl nuw i64 [[TMP17]], 1
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[UMAX]], [[TMP16]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[UMAX]], [[N_MOD_VF]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll
index 8108320fd54ab..1c4b8d61d167e 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll
@@ -47,7 +47,7 @@ define void @simple_memset(i32 %val, ptr %ptr, i64 %n) #0 {
; CHECK-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP14:%.*]] = mul nuw i64 [[TMP13]], 4
+; CHECK-NEXT: [[TMP14:%.*]] = shl nuw i64 [[TMP13]], 2
; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 2
; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[UMAX]], [[TMP6]]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-optsize.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-optsize.ll
index 243ea7c3c4c43..b40aea57a3283 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-optsize.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-optsize.ll
@@ -9,7 +9,7 @@ define void @trip1025_i64(ptr noalias nocapture noundef %dst, ptr noalias nocapt
; CHECK-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 2
+; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 1
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025)
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-reductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-reductions.ll
index ae7c9d263c179..ba8c1835af45b 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-reductions.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-reductions.ll
@@ -13,7 +13,7 @@ define i32 @add_reduction_i32(ptr %ptr, i64 %n) #0 {
; CHECK-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP15]], 4
+; CHECK-NEXT: [[TMP16:%.*]] = shl nuw i64 [[TMP15]], 2
; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 2
; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[UMAX]], [[TMP6]]
@@ -46,7 +46,7 @@ define i32 @add_reduction_i32(ptr %ptr, i64 %n) #0 {
; CHECK-IN-LOOP-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK-IN-LOOP: vector.ph:
; CHECK-IN-LOOP-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-IN-LOOP-NEXT: [[TMP17:%.*]] = mul nuw i64 [[TMP16]], 4
+; CHECK-IN-LOOP-NEXT: [[TMP17:%.*]] = shl nuw i64 [[TMP16]], 2
; CHECK-IN-LOOP-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-IN-LOOP-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 2
; CHECK-IN-LOOP-NEXT: [[TMP7:%.*]] = sub i64 [[UMAX]], [[TMP6]]
@@ -97,7 +97,7 @@ define float @add_reduction_f32(ptr %ptr, i64 %n) #0 {
; CHECK-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP15]], 4
+; CHECK-NEXT: [[TMP16:%.*]] = shl nuw i64 [[TMP15]], 2
; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 2
; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[UMAX]], [[TMP6]]
@@ -129,7 +129,7 @@ define float @add_reduction_f32(ptr %ptr, i64 %n) #0 {
; CHECK-IN-LOOP-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK-IN-LOOP: vector.ph:
; CHECK-IN-LOOP-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-IN-LOOP-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP15]], 4
+; CHECK-IN-LOOP-NEXT: [[TMP16:%.*]] = shl nuw i64 [[TMP15]], 2
; CHECK-IN-LOOP-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-IN-LOOP-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 2
; CHECK-IN-LOOP-NEXT: [[TMP7:%.*]] = sub i64 [[UMAX]], [[TMP6]]
@@ -178,7 +178,7 @@ define i32 @cond_xor_reduction(ptr noalias %a, ptr noalias %cond, i64 %N) #0 {
; CHECK-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP22:%.*]] = mul nuw i64 [[TMP21]], 4
+; CHECK-NEXT: [[TMP22:%.*]] = shl nuw i64 [[TMP21]], 2
; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 2
; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[N:%.*]], [[TMP6]]
@@ -215,7 +215,7 @@ define i32 @cond_xor_reduction(ptr noalias %a, ptr noalias %cond, i64 %N) #0 {
; CHECK-IN-LOOP-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK-IN-LOOP: vector.ph:
; CHECK-IN-LOOP-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-IN-LOOP-NEXT: [[TMP21:%.*]] = mul nuw i64 [[TMP20]], 4
+; CHECK-IN-LOOP-NEXT: [[TMP21:%.*]] = shl nuw i64 [[TMP20]], 2
; CHECK-IN-LOOP-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-IN-LOOP-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 2
; CHECK-IN-LOOP-NEXT: [[TMP7:%.*]] = sub i64 [[N:%.*]], [[TMP6]]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-unroll.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-unroll.ll
index 5e94fbe95d4bf..019ea0b3e07f5 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-unroll.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-unroll.ll
@@ -11,8 +11,8 @@ define void @simple_memset(i32 %val, ptr %ptr, i64 %n) #0 {
; CHECK-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP61:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP61]], 4
-; CHECK-NEXT: [[TMP62:%.*]] = mul nuw i64 [[TMP1]], 4
+; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP61]], 2
+; CHECK-NEXT: [[TMP62:%.*]] = shl nuw i64 [[TMP1]], 2
; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 4
; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[UMAX]], [[TMP6]]
@@ -41,7 +41,7 @@ define void @simple_memset(i32 %val, ptr %ptr, i64 %n) #0 {
; CHECK-NEXT: [[ACTIVE_LANE_MASK8:%.*]] = phi <vscale x 4 x i1> [ [[ACTIVE_LANE_MASK_ENTRY4]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT12:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[ACTIVE_LANE_MASK9:%.*]] = phi <vscale x 4 x i1> [ [[ACTIVE_LANE_MASK_ENTRY5]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT13:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP47:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[INDEX6]]
-; CHECK-NEXT: [[TMP56:%.*]] = mul nuw nsw i64 [[TMP1]], 2
+; CHECK-NEXT: [[TMP56:%.*]] = shl nuw nsw i64 [[TMP1]], 1
; CHECK-NEXT: [[TMP59:%.*]] = mul nuw nsw i64 [[TMP1]], 3
; CHECK-NEXT: [[TMP54:%.*]] = getelementptr i32, ptr [[TMP47]], i64 [[TMP1]]
; CHECK-NEXT: [[TMP57:%.*]] = getelementptr i32, ptr [[TMP47]], i64 [[TMP56]]
@@ -94,8 +94,8 @@ define void @cond_memset(i32 %val, ptr noalias readonly %cond_ptr, ptr noalias %
; CHECK-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP5]], 4
-; CHECK-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP1]], 4
+; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP5]], 2
+; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP1]], 2
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 4
; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[UMAX]], [[TMP3]]
@@ -124,7 +124,7 @@ define void @cond_memset(i32 %val, ptr noalias readonly %cond_ptr, ptr noalias %
; CHECK-NEXT: [[ACTIVE_LANE_MASK8:%.*]] = phi <vscale x 4 x i1> [ [[ACTIVE_LANE_MASK_ENTRY4]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT15:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[ACTIVE_LANE_MASK9:%.*]] = phi <vscale x 4 x i1> [ [[ACTIVE_LANE_MASK_ENTRY5]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT16:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP47:%.*]] = getelementptr i32, ptr [[COND_PTR:%.*]], i64 [[INDEX6]]
-; CHECK-NEXT: [[TMP56:%.*]] = mul nuw nsw i64 [[TMP1]], 2
+; CHECK-NEXT: [[TMP56:%.*]] = shl nuw nsw i64 [[TMP1]], 1
; CHECK-NEXT: [[TMP59:%.*]] = mul nuw nsw i64 [[TMP1]], 3
; CHECK-NEXT: [[TMP54:%.*]] = getelementptr i32, ptr [[TMP47]], i64 [[TMP1]]
; CHECK-NEXT: [[TMP57:%.*]] = getelementptr i32, ptr [[TMP47]], i64 [[TMP56]]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll
index 8cc9e431e6214..6a0005881ae94 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll
@@ -11,7 +11,7 @@ define void @simple_memset(i32 %val, ptr %ptr, i64 %n) #0 {
; CHECK-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4
+; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2
; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 2
; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[UMAX]], [[TMP6]]
@@ -103,7 +103,7 @@ define void @simple_memcpy(ptr noalias %dst, ptr noalias %src, i64 %n) #0 {
; CHECK-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4
+; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2
; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 2
; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[UMAX]], [[TMP6]]
@@ -156,7 +156,7 @@ define void @copy_stride4(ptr noalias %dst, ptr noalias %src, i64 %n) #0 {
; CHECK-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4
+; CHECK-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 2
; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP8]], 2
; CHECK-NEXT: [[TMP10:%.*]] = sub i64 [[TMP2]], [[TMP9]]
@@ -166,9 +166,6 @@ define void @copy_stride4(ptr noalias %dst, ptr noalias %src, i64 %n) #0 {
; CHECK-NEXT: [[TMP13:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
; CHECK-NEXT: [[TMP14:%.*]] = mul nsw <vscale x 4 x i64> [[TMP13]], splat (i64 4)
; CHECK-NEXT: [[INDUCTION:%.*]] = add nsw <vscale x 4 x i64> zeroinitializer, [[TMP14]]
-; CHECK-NEXT: [[TMP18:%.*]] = mul nsw i64 4, [[TMP4]]
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP18]], i64 0
-; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VECTOR_BODY]] ]
@@ -182,7 +179,7 @@ define void @copy_stride4(ptr noalias %dst, ptr noalias %src, i64 %n) #0 {
; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP12]])
; CHECK-NEXT: [[TMP21:%.*]] = extractelement <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0
; CHECK-NEXT: [[TMP22:%.*]] = xor i1 [[TMP21]], true
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <vscale x 4 x i64> [[VEC_IND]], splat (i64 16)
; CHECK-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[WHILE_END_LOOPEXIT:%.*]]
@@ -214,7 +211,7 @@ define void @simple_gather_scatter(ptr noalias %dst, ptr noalias %src, ptr noali
; CHECK-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4
+; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2
; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 2
; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[UMAX]], [[TMP6]]
@@ -269,7 +266,7 @@ define void @uniform_load(ptr noalias %dst, ptr noalias readonly %src, i64 %n) #
; CHECK-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4
+; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2
; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 2
; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[N:%.*]], [[TMP6]]
@@ -323,7 +320,7 @@ define void @cond_uniform_load(ptr noalias %dst, ptr noalias readonly %src, ptr
; CHECK-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4
+; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2
; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 2
; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[N:%.*]], [[TMP6]]
@@ -390,7 +387,7 @@ define void @uniform_store(ptr noalias %dst, ptr noalias readonly %src, i64 %n)
; CHECK-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4
+; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2
; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 2
; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[N:%.*]], [[TMP6]]
@@ -441,7 +438,7 @@ define void @simple_fdiv(ptr noalias %dst, ptr noalias %src, i64 %n) #0 {
; CHECK-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4
+; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2
; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 2
; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[UMAX]], [[TMP6]]
@@ -496,7 +493,7 @@ define void @simple_idiv(ptr noalias %dst, ptr noalias %src, i64 %n) #0 {
; CHECK-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4
+; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2
; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 2
; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[UMAX]], [[TMP6]]
@@ -549,7 +546,7 @@ define void @simple_memset_trip1024(i32 %val, ptr %ptr, i64 %n) #0 {
; CHECK-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
+; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[VAL:%.*]], i64 0
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vscale-based-trip-counts.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vscale-based-trip-counts.ll
index a1b7fed936d69..360e7d6a23c19 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vscale-based-trip-counts.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vscale-based-trip-counts.ll
@@ -10,7 +10,7 @@ define void @vscale_mul_4(ptr noalias noundef readonly captures(none) %a, ptr no
; CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2
; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP10]], 4
+; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP10]], 2
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP1]], [[TMP3]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP1]], [[N_MOD_VF]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[A]], align 4
@@ -61,8 +61,8 @@ define void @vscale_mul_8(ptr noalias noundef readonly captures(none) %a, ptr n
; CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[MUL1:%.*]] = shl nuw nsw i64 [[TMP0]], 3
; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP3]], 4
-; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP2]], 2
+; CHECK-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP3]], 2
+; CHECK-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP2]], 1
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[MUL1]], [[TMP4]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[MUL1]], [[N_MOD_VF]]
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[TMP2]]
@@ -119,7 +119,7 @@ define void @vscale_mul_12(ptr noalias noundef readonly captures(none) %a, ptr n
; CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[MUL1:%.*]] = mul nuw nsw i64 [[TMP0]], 12
; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4
+; CHECK-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 2
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[MUL1]], [[TMP4]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[MUL1]], [[N_MOD_VF]]
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
@@ -179,8 +179,8 @@ define void @vscale_mul_31(ptr noalias noundef readonly captures(none) %a, ptr n
; CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[MUL1:%.*]] = mul nuw nsw i64 [[TMP0]], 31
; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP3]], 4
-; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP2]], 2
+; CHECK-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP3]], 2
+; CHECK-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP2]], 1
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[MUL1]], [[TMP4]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[MUL1]], [[N_MOD_VF]]
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
@@ -246,8 +246,8 @@ define void @vscale_mul_64(ptr noalias noundef readonly captures(none) %a, ptr n
; CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[MUL1:%.*]] = mul nuw nsw i64 [[TMP0]], 64
; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP3]], 4
-; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP2]], 2
+; CHECK-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP3]], 2
+; CHECK-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP2]], 1
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[MUL1]], [[TMP4]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[MUL1]], [[N_MOD_VF]]
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
@@ -319,8 +319,8 @@ define void @trip_count_with_overflow(ptr noalias noundef readonly captures(none
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP4]], 4
-; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP6]], 2
+; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP4]], 2
+; CHECK-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP6]], 1
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP1]], [[TMP5]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP1]], [[N_MOD_VF]]
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
@@ -390,8 +390,8 @@ define void @trip_count_too_big_for_element_count(ptr noalias noundef readonly c
; CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 32
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP4]], 4
-; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP3]], 2
+; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP4]], 2
+; CHECK-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP3]], 1
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP1]], [[TMP5]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP1]], [[N_MOD_VF]]
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-wide-lane-mask.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-wide-lane-mask.ll
index 2c7521bddafb6..b7bdd9e64da9a 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-wide-lane-mask.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-wide-lane-mask.ll
@@ -13,7 +13,7 @@ define void @scalable_wide_active_lane_mask(ptr noalias %dst, ptr readonly %src,
; CHECK-UF1-NEXT: br label [[VECTOR_PH1:%.*]]
; CHECK-UF1: vector.ph:
; CHECK-UF1-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-UF1-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP5]], 16
+; CHECK-UF1-NEXT: [[TMP12:%.*]] = shl nuw i64 [[TMP5]], 4
; CHECK-UF1-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-UF1-NEXT: [[TMP18:%.*]] = shl nuw i64 [[TMP17]], 4
; CHECK-UF1-NEXT: [[TMP7:%.*]] = sub i64 [[N]], [[TMP18]]
@@ -42,8 +42,8 @@ define void @scalable_wide_active_lane_mask(ptr noalias %dst, ptr readonly %src,
; CHECK-UF4-NEXT: br label [[VECTOR_PH1:%.*]]
; CHECK-UF4: vector.ph:
; CHECK-UF4-NEXT: [[TMP61:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-UF4-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP61]], 16
-; CHECK-UF4-NEXT: [[TMP62:%.*]] = mul nuw i64 [[TMP1]], 4
+; CHECK-UF4-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP61]], 4
+; CHECK-UF4-NEXT: [[TMP62:%.*]] = shl nuw i64 [[TMP1]], 2
; CHECK-UF4-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-UF4-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 6
; CHECK-UF4-NEXT: [[TMP7:%.*]] = sub i64 [[N]], [[TMP3]]
@@ -62,7 +62,7 @@ define void @scalable_wide_active_lane_mask(ptr noalias %dst, ptr readonly %src,
; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK7:%.*]] = phi <vscale x 16 x i1> [ [[TMP18]], [[VECTOR_PH1]] ], [ [[TMP57:%.*]], [[VECTOR_BODY]] ]
; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK8:%.*]] = phi <vscale x 16 x i1> [ [[TMP19]], [[VECTOR_PH1]] ], [ [[TMP58:%.*]], [[VECTOR_BODY]] ]
; CHECK-UF4-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[INDEX]]
-; CHECK-UF4-NEXT: [[TMP32:%.*]] = mul nuw nsw i64 [[TMP1]], 2
+; CHECK-UF4-NEXT: [[TMP32:%.*]] = shl nuw nsw i64 [[TMP1]], 1
; CHECK-UF4-NEXT: [[TMP29:%.*]] = mul nuw nsw i64 [[TMP1]], 3
; CHECK-UF4-NEXT: [[TMP24:%.*]] = getelementptr inbounds i8, ptr [[TMP20]], i64 [[TMP1]]
; CHECK-UF4-NEXT: [[TMP33:%.*]] = getelementptr inbounds i8, ptr [[TMP20]], i64 [[TMP32]]
@@ -100,8 +100,8 @@ define void @scalable_wide_active_lane_mask(ptr noalias %dst, ptr readonly %src,
; CHECK-TF-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK-TF: vector.ph:
; CHECK-TF-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-TF-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP0]], 16
-; CHECK-TF-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP10]], 2
+; CHECK-TF-NEXT: [[TMP10:%.*]] = shl nuw i64 [[TMP0]], 4
+; CHECK-TF-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP10]], 1
; CHECK-TF-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-TF-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 5
; CHECK-TF-NEXT: [[TMP4:%.*]] = sub i64 [[N]], [[TMP3]]
@@ -162,7 +162,7 @@ define void @scalable_wide_active_lane_mask_double(ptr noalias %dst, ptr readonl
; CHECK-UF1-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK-UF1: vector.ph:
; CHECK-UF1-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-UF1-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP12]], 2
+; CHECK-UF1-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP12]], 1
; CHECK-UF1-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-UF1-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP2]], 1
; CHECK-UF1-NEXT: [[TMP10:%.*]] = sub i64 [[N]], [[TMP9]]
@@ -194,8 +194,8 @@ define void @scalable_wide_active_lane_mask_double(ptr noalias %dst, ptr readonl
; CHECK-UF4-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK-UF4: vector.ph:
; CHECK-UF4-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-UF4-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP2]], 2
-; CHECK-UF4-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP1]], 4
+; CHECK-UF4-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP2]], 1
+; CHECK-UF4-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP1]], 2
; CHECK-UF4-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-UF4-NEXT: [[TMP26:%.*]] = shl nuw i64 [[TMP4]], 3
; CHECK-UF4-NEXT: [[TMP31:%.*]] = sub i64 [[N]], [[TMP26]]
@@ -214,7 +214,7 @@ define void @scalable_wide_active_lane_mask_double(ptr noalias %dst, ptr readonl
; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK7:%.*]] = phi <vscale x 2 x i1> [ [[TMP13]], [[VECTOR_PH]] ], [ [[TMP52:%.*]], [[VECTOR_BODY]] ]
; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK8:%.*]] = phi <vscale x 2 x i1> [ [[TMP14]], [[VECTOR_PH]] ], [ [[TMP53:%.*]], [[VECTOR_BODY]] ]
; CHECK-UF4-NEXT: [[TMP15:%.*]] = getelementptr inbounds double, ptr [[SRC]], i64 [[INDEX]]
-; CHECK-UF4-NEXT: [[TMP21:%.*]] = mul nuw nsw i64 [[TMP1]], 2
+; CHECK-UF4-NEXT: [[TMP21:%.*]] = shl nuw nsw i64 [[TMP1]], 1
; CHECK-UF4-NEXT: [[TMP24:%.*]] = mul nuw nsw i64 [[TMP1]], 3
; CHECK-UF4-NEXT: [[TMP29:%.*]] = getelementptr inbounds double, ptr [[TMP15]], i64 [[TMP1]]
; CHECK-UF4-NEXT: [[TMP22:%.*]] = getelementptr inbounds double, ptr [[TMP15]], i64 [[TMP21]]
@@ -255,8 +255,8 @@ define void @scalable_wide_active_lane_mask_double(ptr noalias %dst, ptr readonl
; CHECK-TF-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK-TF: vector.ph:
; CHECK-TF-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-TF-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP0]], 2
-; CHECK-TF-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP10]], 2
+; CHECK-TF-NEXT: [[TMP10:%.*]] = shl nuw i64 [[TMP0]], 1
+; CHECK-TF-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP10]], 1
; CHECK-TF-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-TF-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2
; CHECK-TF-NEXT: [[TMP4:%.*]] = sub i64 [[N]], [[TMP3]]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-extractvalue.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-extractvalue.ll
index eceda0897b174..a381218002fc2 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-extractvalue.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-extractvalue.ll
@@ -13,7 +13,7 @@ define void @widen_extractvalue(ptr %dst, {i64, i64} %sv) #0 {
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32()
-; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i32 [[TMP2]], 2
+; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i32 [[TMP2]], 1
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 1000, [[TMP3]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 1000, [[N_MOD_VF]]
; CHECK-NEXT: [[EXTRACT0:%.*]] = extractvalue { i64, i64 } [[SV]], 0
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll
index 456c03824106a..5c57491ef9290 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll
@@ -23,7 +23,7 @@ define void @pointer_induction_used_as_vector(ptr noalias %start.1, ptr noalias
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2
+; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 1
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[N_VEC]], 8
@@ -105,7 +105,7 @@ define void @pointer_induction(ptr noalias %start, i64 %N) {
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 2
+; CHECK-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 1
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], [[TMP4]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START:%.*]], i64 [[N_VEC]]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll
index 3082a49babed4..406090922dcfd 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll
@@ -240,8 +240,7 @@ define i32 @pointer_iv_mixed(ptr noalias %a, ptr noalias %b, i64 %n) #0 {
; CHECK-NEXT: [[TMP12]] = add <vscale x 2 x i32> [[WIDE_LOAD]], [[VEC_PHI]]
; CHECK-NEXT: store <vscale x 2 x ptr> [[VECTOR_GEP]], ptr [[NEXT_GEP]], align 8
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]]
-; CHECK-NEXT: [[TMP8:%.*]] = shl nuw nsw i64 [[TMP5]], 3
-; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP8]]
+; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 16
; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
; CHECK: middle.block:
@@ -311,8 +310,7 @@ define void @phi_used_in_vector_compare_and_scalar_indvar_update_and_store(ptr %
; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <vscale x 2 x ptr> [[VECTOR_GEP]], zeroinitializer
; CHECK-NEXT: call void @llvm.masked.store.nxv2i16.p0(<vscale x 2 x i16> zeroinitializer, ptr align 2 [[TMP7]], <vscale x 2 x i1> [[TMP6]])
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]]
-; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw i64 [[TMP0]], 2
-; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP3]]
+; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 4
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
; CHECK: middle.block:
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/tail-folding-styles.ll b/llvm/test/Transforms/LoopVectorize/AArch64/tail-folding-styles.ll
index de70da6d2558b..7a3b8ccf3e241 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/tail-folding-styles.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/tail-folding-styles.ll
@@ -19,7 +19,7 @@ define void @simple_memset_tailfold(i32 %val, ptr %ptr, i64 %n) "target-features
; NONE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; NONE: vector.ph:
; NONE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; NONE-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
+; NONE-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2
; NONE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[UMAX]], [[TMP3]]
; NONE-NEXT: [[N_VEC:%.*]] = sub i64 [[UMAX]], [[N_MOD_VF]]
; NONE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[VAL:%.*]], i64 0
@@ -54,7 +54,7 @@ define void @simple_memset_tailfold(i32 %val, ptr %ptr, i64 %n) "target-features
; DATA-NEXT: br label [[VECTOR_PH:%.*]]
; DATA: vector.ph:
; DATA-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
-; DATA-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4
+; DATA-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP4]], 2
; DATA-NEXT: [[TMP8:%.*]] = sub i64 [[TMP5]], 1
; DATA-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX]], [[TMP8]]
; DATA-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]]
@@ -81,7 +81,7 @@ define void @simple_memset_tailfold(i32 %val, ptr %ptr, i64 %n) "target-features
; DATA_NO_LANEMASK-NEXT: br label [[VECTOR_PH:%.*]]
; DATA_NO_LANEMASK: vector.ph:
; DATA_NO_LANEMASK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
-; DATA_NO_LANEMASK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4
+; DATA_NO_LANEMASK-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP4]], 2
; DATA_NO_LANEMASK-NEXT: [[TMP8:%.*]] = sub i64 [[TMP5]], 1
; DATA_NO_LANEMASK-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX]], [[TMP8]]
; DATA_NO_LANEMASK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]]
@@ -116,7 +116,7 @@ define void @simple_memset_tailfold(i32 %val, ptr %ptr, i64 %n) "target-features
; DATA_AND_CONTROL-NEXT: br label [[VECTOR_PH:%.*]]
; DATA_AND_CONTROL: vector.ph:
; DATA_AND_CONTROL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
-; DATA_AND_CONTROL-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4
+; DATA_AND_CONTROL-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP4]], 2
; DATA_AND_CONTROL-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[UMAX]])
; DATA_AND_CONTROL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[VAL:%.*]], i64 0
; DATA_AND_CONTROL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
@@ -142,7 +142,7 @@ define void @simple_memset_tailfold(i32 %val, ptr %ptr, i64 %n) "target-features
; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: br label [[VECTOR_PH:%.*]]
; DATA_AND_CONTROL_NO_RT_CHECK: vector.ph:
; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4
+; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2
; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 2
; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[UMAX]], [[TMP6]]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-scalable.ll b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-scalable.ll
index d82dace2c9e04..57d0534872118 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-scalable.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-scalable.ll
@@ -13,7 +13,7 @@ define void @load_store_interleave_group(ptr noalias %data) {
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2
+; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 1
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 100, [[TMP3]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 100, [[N_MOD_VF]]
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
@@ -62,7 +62,7 @@ define void @test_2xi64_unary_op_load_interleave_group(ptr noalias %data, ptr no
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2
+; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 1
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1111, [[TMP3]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1111, [[N_MOD_VF]]
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
@@ -172,7 +172,7 @@ define void @test_masked_interleave_group(i32 %N, ptr %mask, ptr %src, ptr %dst)
;
; CHECK-LABEL: define void @test_masked_interleave_group(
; CHECK-SAME: i32 [[N:%.*]], ptr [[MASK:%.*]], ptr [[SRC:%.*]], ptr [[DST:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[ITER_CHECK:.*]]:
; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[N]] to i64
; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[TMP0]], 1
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
@@ -202,7 +202,7 @@ define void @test_masked_interleave_group(i32 %N, ptr %mask, ptr %src, ptr %dst)
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK6]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP21]], 16
+; CHECK-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP21]], 4
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP1]], [[TMP9]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP1]], [[N_MOD_VF]]
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
@@ -243,7 +243,7 @@ define void @test_masked_interleave_group(i32 %N, ptr %mask, ptr %src, ptr %dst)
; CHECK: [[VEC_EPILOG_PH]]:
; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
; CHECK-NEXT: [[TMP22:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP23:%.*]] = mul nuw i64 [[TMP22]], 8
+; CHECK-NEXT: [[TMP23:%.*]] = shl nuw i64 [[TMP22]], 3
; CHECK-NEXT: [[N_MOD_VF10:%.*]] = urem i64 [[TMP1]], [[TMP23]]
; CHECK-NEXT: [[INDEX:%.*]] = sub i64 [[TMP1]], [[N_MOD_VF10]]
; CHECK-NEXT: [[TMP24:%.*]] = trunc i64 [[INDEX]] to i32
@@ -279,10 +279,10 @@ define void @test_masked_interleave_group(i32 %N, ptr %mask, ptr %src, ptr %dst)
; CHECK-NEXT: [[CMP_N24:%.*]] = icmp eq i64 [[TMP1]], [[INDEX]]
; CHECK-NEXT: br i1 [[CMP_N24]], label %[[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]]
; CHECK: [[VEC_EPILOG_SCALAR_PH]]:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[TMP24]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP10]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MEMCHECK]] ], [ 0, %[[ENTRY]] ]
-; CHECK-NEXT: [[BC_RESUME_VAL25:%.*]] = phi ptr [ [[NEXT_GEP]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP12]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[DST]], %[[VECTOR_MEMCHECK]] ], [ [[DST]], %[[ENTRY]] ]
-; CHECK-NEXT: [[BC_RESUME_VAL26:%.*]] = phi ptr [ [[NEXT_GEP7]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP14]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[SRC]], %[[VECTOR_MEMCHECK]] ], [ [[SRC]], %[[ENTRY]] ]
-; CHECK-NEXT: [[BC_RESUME_VAL27:%.*]] = phi ptr [ [[NEXT_GEP8]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP15]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[MASK]], %[[VECTOR_MEMCHECK]] ], [ [[MASK]], %[[ENTRY]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[TMP24]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP10]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MEMCHECK]] ], [ 0, %[[ITER_CHECK]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL25:%.*]] = phi ptr [ [[NEXT_GEP]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP12]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[DST]], %[[VECTOR_MEMCHECK]] ], [ [[DST]], %[[ITER_CHECK]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL26:%.*]] = phi ptr [ [[NEXT_GEP7]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP14]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[SRC]], %[[VECTOR_MEMCHECK]] ], [ [[SRC]], %[[ITER_CHECK]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL27:%.*]] = phi ptr [ [[NEXT_GEP8]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP15]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[MASK]], %[[VECTOR_MEMCHECK]] ], [ [[MASK]], %[[ITER_CHECK]] ]
; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
; CHECK: [[LOOP_HEADER]]:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-with-wide-ops-chained.ll b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-with-wide-ops-chained.ll
index 23bc21a49a8b3..be0902180abc8 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-with-wide-ops-chained.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-with-wide-ops-chained.ll
@@ -554,7 +554,7 @@ define void @test_2xi64_mul_add_xor_mismatched_opcodes2(ptr noalias %data, ptr n
; VF2-NEXT: [[TMP4:%.*]] = add <2 x i64> [[TMP3]], splat (i64 2)
; VF2-NEXT: [[TMP5:%.*]] = xor <2 x i64> splat (i64 4), [[TMP4]]
; VF2-NEXT: [[TMP6:%.*]] = mul <2 x i64> [[WIDE_LOAD]], [[STRIDED_VEC1]]
-; VF2-NEXT: [[TMP7:%.*]] = mul <2 x i64> [[TMP6]], splat (i64 2)
+; VF2-NEXT: [[TMP7:%.*]] = shl <2 x i64> [[TMP6]], splat (i64 1)
; VF2-NEXT: [[TMP8:%.*]] = xor <2 x i64> splat (i64 4), [[TMP7]]
; VF2-NEXT: [[TMP9:%.*]] = shufflevector <2 x i64> [[TMP5]], <2 x i64> [[TMP8]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; VF2-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i64> [[TMP9]], <4 x i64> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-zext-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-zext-costs.ll
index 0cfc14a0ae3a8..a44a16455445c 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-zext-costs.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-zext-costs.ll
@@ -29,7 +29,7 @@ define void @zext_i8_i16(ptr noalias nocapture readonly %p, ptr noalias nocaptur
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 16
+; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 4
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], [[TMP6]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -53,8 +53,8 @@ define void @zext_i8_i16(ptr noalias nocapture readonly %p, ptr noalias nocaptur
; CHECK: for.body:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[INDVARS_IV]]
-; CHECK-NEXT: [[TMP8:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
-; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[TMP8]] to i32
+; CHECK-NEXT: [[TMP12:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[TMP12]] to i32
; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i32 [[CONV]], 2
; CHECK-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16
; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i16, ptr [[Q]], i64 [[INDVARS_IV]]
@@ -109,7 +109,7 @@ define void @sext_i8_i16(ptr noalias nocapture readonly %p, ptr noalias nocaptur
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 16
+; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 4
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], [[TMP6]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -133,8 +133,8 @@ define void @sext_i8_i16(ptr noalias nocapture readonly %p, ptr noalias nocaptur
; CHECK: for.body:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[INDVARS_IV]]
-; CHECK-NEXT: [[TMP8:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
-; CHECK-NEXT: [[CONV:%.*]] = sext i8 [[TMP8]] to i32
+; CHECK-NEXT: [[TMP12:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[CONV:%.*]] = sext i8 [[TMP12]] to i32
; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i32 [[CONV]], 2
; CHECK-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16
; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i16, ptr [[Q]], i64 [[INDVARS_IV]]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/widen-gep-all-indices-invariant.ll b/llvm/test/Transforms/LoopVectorize/AArch64/widen-gep-all-indices-invariant.ll
index 97cc6929e44d5..83186fdc9b38a 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/widen-gep-all-indices-invariant.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/widen-gep-all-indices-invariant.ll
@@ -10,7 +10,7 @@ define i32 @gep_with_all_invariant_operands(ptr %src.0, ptr %src.1, i64 %n, i1 %
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 4
+; CHECK-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP1]], 2
; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 2
; CHECK-NEXT: [[TMP5:%.*]] = sub i64 [[TMP0]], [[TMP4]]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/wider-VF-for-callinst.ll b/llvm/test/Transforms/LoopVectorize/AArch64/wider-VF-for-callinst.ll
index 82f272ad853a8..5e9f07e3611e9 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/wider-VF-for-callinst.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/wider-VF-for-callinst.ll
@@ -10,7 +10,7 @@ define void @test_widen(ptr noalias %a, ptr readnone %b) #1 {
; WIDE-NEXT: br label [[VECTOR_PH:%.*]]
; WIDE: vector.ph:
; WIDE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; WIDE-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
+; WIDE-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2
; WIDE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]]
; WIDE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]]
; WIDE-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -48,7 +48,7 @@ define void @test_widen(ptr noalias %a, ptr readnone %b) #1 {
; NARROW-NEXT: br label [[VECTOR_PH:%.*]]
; NARROW: vector.ph:
; NARROW-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; NARROW-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP2]], 4
+; NARROW-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP2]], 2
; NARROW-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP1]]
; NARROW-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]]
; NARROW-NEXT: br label [[VECTOR_BODY:%.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll
index 9f62c7dcda65a..d0dfc8e6fbd2f 100644
--- a/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll
@@ -124,7 +124,7 @@ define void @test_stride2_4i32(ptr readonly %data, ptr noalias nocapture %dst, i
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP3:%.*]] = mul nuw nsw i32 [[INDEX]], 2
+; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw i32 [[INDEX]], 1
; CHECK-NEXT: [[TMP4:%.*]] = add nuw nsw i32 [[TMP3]], 2
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[DATA:%.*]], i32 [[TMP4]]
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, ptr [[TMP5]], align 4
@@ -233,7 +233,7 @@ define void @test_stride4_4i32(ptr readonly %data, ptr noalias nocapture %dst, i
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[INDEX]], i32 [[N]])
-; CHECK-NEXT: [[TMP1:%.*]] = mul nuw nsw <4 x i32> [[VEC_IND]], splat (i32 4)
+; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw <4 x i32> [[VEC_IND]], splat (i32 2)
; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw <4 x i32> [[TMP1]], splat (i32 2)
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[DATA:%.*]], <4 x i32> [[TMP2]]
; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> align 4 [[TMP3]], <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison)
@@ -454,7 +454,7 @@ define void @test_stride_noninvar3_4i32(ptr readonly %data, ptr noalias nocaptur
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP3:%.*]] = mul nuw nsw <4 x i32> <i32 0, i32 1, i32 2, i32 3>, [[DOTSPLAT]]
; CHECK-NEXT: [[INDUCTION:%.*]] = add nuw nsw <4 x i32> splat (i32 3), [[TMP3]]
-; CHECK-NEXT: [[TMP2:%.*]] = mul nuw nsw i32 [[X]], 4
+; CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i32 [[X]], 2
; CHECK-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <4 x i32> poison, i32 [[TMP2]], i64 0
; CHECK-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT2]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll b/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll
index 9daf4236982bd..bc9ff908e8e1c 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll
@@ -87,16 +87,13 @@ define i8 @dead_live_out_due_to_scalar_epilogue_required(ptr %src, ptr %dst) {
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 16 x i32> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[AVL:%.*]] = phi i32 [ 252, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.experimental.get.vector.length.i32(i32 [[AVL]], i32 16, i1 true)
-; CHECK-NEXT: [[TMP3:%.*]] = mul i32 4, [[TMP2]]
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[TMP3]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 16 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
; CHECK-NEXT: [[TMP9:%.*]] = sext <vscale x 16 x i32> [[VEC_IND]] to <vscale x 16 x i64>
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[SRC]], <vscale x 16 x i64> [[TMP9]]
; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 16 x i8> @llvm.vp.gather.nxv16i8.nxv16p0(<vscale x 16 x ptr> align 1 [[TMP6]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP2]]), !alias.scope [[META3:![0-9]+]]
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[DST]], <vscale x 16 x i64> [[TMP9]]
; CHECK-NEXT: call void @llvm.vp.scatter.nxv16i8.nxv16p0(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x ptr> align 1 [[TMP7]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP2]]), !alias.scope [[META6:![0-9]+]], !noalias [[META3]]
; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i32 [[AVL]], [[TMP2]]
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 16 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 16 x i32> [[VEC_IND]], splat (i32 16)
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[AVL_NEXT]], 0
; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
@@ -360,9 +357,6 @@ define void @gather_interleave_group_with_dead_insert_pos(i64 %N, ptr noalias %s
; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ [[TMP2]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
; CHECK-NEXT: [[TMP16:%.*]] = zext i32 [[TMP10]] to i64
-; CHECK-NEXT: [[TMP12:%.*]] = mul nsw i64 2, [[TMP16]]
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP12]], i64 0
-; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[EVL_BASED_IV]], 2
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[INTERLEAVE_EVL:%.*]] = mul nuw nsw i32 [[TMP10]], 2
@@ -375,7 +369,7 @@ define void @gather_interleave_group_with_dead_insert_pos(i64 %N, ptr noalias %s
; CHECK-NEXT: call void @llvm.vp.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[TMP18]], <vscale x 4 x ptr> align 4 [[TMP19]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]])
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]]
; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]]
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <vscale x 4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
; CHECK-NEXT: br i1 [[TMP21]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/evl-compatible-loops.ll b/llvm/test/Transforms/LoopVectorize/RISCV/evl-compatible-loops.ll
index 69d83db49fd18..9c3fdc9408341 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/evl-compatible-loops.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/evl-compatible-loops.ll
@@ -62,7 +62,7 @@ define void @test_wide_ptr_induction(ptr noalias %a, ptr noalias %b, i64 %N) {
; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[B]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
-; CHECK-NEXT: [[TMP6:%.*]] = mul <vscale x 2 x i64> [[TMP5]], splat (i64 8)
+; CHECK-NEXT: [[TMP6:%.*]] = shl <vscale x 2 x i64> [[TMP5]], splat (i64 3)
; CHECK-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <vscale x 2 x i64> [[TMP6]]
; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[EVL_BASED_IV]]
@@ -70,8 +70,7 @@ define void @test_wide_ptr_induction(ptr noalias %a, ptr noalias %b, i64 %N) {
; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP7]] to i64
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP9]], [[EVL_BASED_IV]]
; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP9]]
-; CHECK-NEXT: [[TMP11:%.*]] = mul i64 8, [[TMP9]]
-; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP11]]
+; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 64
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK: middle.block:
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/first-order-recurrence-scalable-vf1.ll b/llvm/test/Transforms/LoopVectorize/RISCV/first-order-recurrence-scalable-vf1.ll
index e35db479dc963..753b75970882d 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/first-order-recurrence-scalable-vf1.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/first-order-recurrence-scalable-vf1.ll
@@ -11,7 +11,7 @@ define i64 @pr97452_scalable_vf1_for(ptr %src, ptr noalias %dst) #0 {
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 2
+; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 1
; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32
; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i32 [[TMP3]], 2
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/fminimumnum.ll b/llvm/test/Transforms/LoopVectorize/RISCV/fminimumnum.ll
index 9f747dd6222a2..bd52d31c2b1c9 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/fminimumnum.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/fminimumnum.ll
@@ -28,7 +28,7 @@ define void @fmin32(ptr noundef readonly captures(none) %input1, ptr noundef rea
; CHECK-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP18:%.*]] = mul nuw i64 [[TMP9]], 4
+; CHECK-NEXT: [[TMP18:%.*]] = shl nuw i64 [[TMP9]], 2
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 4096, [[TMP18]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 4096, [[N_MOD_VF]]
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
@@ -89,7 +89,7 @@ define void @fmin32(ptr noundef readonly captures(none) %input1, ptr noundef rea
; ZVFHMIN-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
; ZVFHMIN: [[VECTOR_PH]]:
; ZVFHMIN-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
-; ZVFHMIN-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 4
+; ZVFHMIN-NEXT: [[TMP10:%.*]] = shl nuw i64 [[TMP9]], 2
; ZVFHMIN-NEXT: [[N_MOD_VF:%.*]] = urem i64 4096, [[TMP10]]
; ZVFHMIN-NEXT: [[N_VEC:%.*]] = sub i64 4096, [[N_MOD_VF]]
; ZVFHMIN-NEXT: br label %[[VECTOR_BODY:.*]]
@@ -173,7 +173,7 @@ define void @fmax32(ptr noundef readonly captures(none) %input1, ptr noundef rea
; CHECK-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP18:%.*]] = mul nuw i64 [[TMP9]], 4
+; CHECK-NEXT: [[TMP18:%.*]] = shl nuw i64 [[TMP9]], 2
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 4096, [[TMP18]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 4096, [[N_MOD_VF]]
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
@@ -234,7 +234,7 @@ define void @fmax32(ptr noundef readonly captures(none) %input1, ptr noundef rea
; ZVFHMIN-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
; ZVFHMIN: [[VECTOR_PH]]:
; ZVFHMIN-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
-; ZVFHMIN-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 4
+; ZVFHMIN-NEXT: [[TMP10:%.*]] = shl nuw i64 [[TMP9]], 2
; ZVFHMIN-NEXT: [[N_MOD_VF:%.*]] = urem i64 4096, [[TMP10]]
; ZVFHMIN-NEXT: [[N_VEC:%.*]] = sub i64 4096, [[N_MOD_VF]]
; ZVFHMIN-NEXT: br label %[[VECTOR_BODY:.*]]
@@ -318,7 +318,7 @@ define void @fmin64(ptr noundef readonly captures(none) %input1, ptr noundef rea
; CHECK-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP18:%.*]] = mul nuw i64 [[TMP9]], 2
+; CHECK-NEXT: [[TMP18:%.*]] = shl nuw i64 [[TMP9]], 1
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 4096, [[TMP18]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 4096, [[N_MOD_VF]]
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
@@ -379,7 +379,7 @@ define void @fmin64(ptr noundef readonly captures(none) %input1, ptr noundef rea
; ZVFHMIN-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
; ZVFHMIN: [[VECTOR_PH]]:
; ZVFHMIN-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
-; ZVFHMIN-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 2
+; ZVFHMIN-NEXT: [[TMP10:%.*]] = shl nuw i64 [[TMP9]], 1
; ZVFHMIN-NEXT: [[N_MOD_VF:%.*]] = urem i64 4096, [[TMP10]]
; ZVFHMIN-NEXT: [[N_VEC:%.*]] = sub i64 4096, [[N_MOD_VF]]
; ZVFHMIN-NEXT: br label %[[VECTOR_BODY:.*]]
@@ -463,7 +463,7 @@ define void @fmax64(ptr noundef readonly captures(none) %input1, ptr noundef rea
; CHECK-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP18:%.*]] = mul nuw i64 [[TMP9]], 2
+; CHECK-NEXT: [[TMP18:%.*]] = shl nuw i64 [[TMP9]], 1
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 4096, [[TMP18]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 4096, [[N_MOD_VF]]
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
@@ -524,7 +524,7 @@ define void @fmax64(ptr noundef readonly captures(none) %input1, ptr noundef rea
; ZVFHMIN-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
; ZVFHMIN: [[VECTOR_PH]]:
; ZVFHMIN-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
-; ZVFHMIN-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 2
+; ZVFHMIN-NEXT: [[TMP10:%.*]] = shl nuw i64 [[TMP9]], 1
; ZVFHMIN-NEXT: [[N_MOD_VF:%.*]] = urem i64 4096, [[TMP10]]
; ZVFHMIN-NEXT: [[N_VEC:%.*]] = sub i64 4096, [[N_MOD_VF]]
; ZVFHMIN-NEXT: br label %[[VECTOR_BODY:.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/induction-costs.ll b/llvm/test/Transforms/LoopVectorize/RISCV/induction-costs.ll
index 4ccec2ca61778..2353eed4259da 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/induction-costs.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/induction-costs.ll
@@ -135,20 +135,17 @@ define void @test_3_inductions(ptr noalias %dst, ptr noalias %src, i64 %n) #1 {
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i32> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_IND2:%.*]] = phi <vscale x 2 x i32> [ [[INDUCTION1]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT5:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_IND2:%.*]] = phi <vscale x 2 x i32> [ [[INDUCTION1]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ [[TMP0]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
-; CHECK-NEXT: [[TMP4:%.*]] = mul i32 2, [[TMP3]]
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[TMP4]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <vscale x 2 x i32> [[BROADCAST_SPLATINSERT3]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP5:%.*]] = or <vscale x 2 x i32> [[VEC_IND2]], [[VEC_IND]]
; CHECK-NEXT: [[TMP6:%.*]] = sext <vscale x 2 x i32> [[TMP5]] to <vscale x 2 x i64>
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[SRC]], <vscale x 2 x i64> [[TMP6]]
; CHECK-NEXT: call void @llvm.vp.scatter.nxv2p0.nxv2p0(<vscale x 2 x ptr> [[TMP7]], <vscale x 2 x ptr> align 8 [[BROADCAST_SPLAT]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP3]])
; CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP3]] to i64
; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]]
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i32> [[VEC_IND]], [[BROADCAST_SPLAT4]]
-; CHECK-NEXT: [[VEC_IND_NEXT5]] = add <vscale x 2 x i32> [[VEC_IND2]], [[BROADCAST_SPLAT4]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i32> [[VEC_IND]], splat (i32 4)
+; CHECK-NEXT: [[VEC_IND_NEXT3]] = add <vscale x 2 x i32> [[VEC_IND2]], splat (i32 4)
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll
index 7e6e45feaa834..c520f667c2eeb 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll
@@ -20,7 +20,7 @@ define i32 @add_i16_i32(ptr nocapture readonly %x, i32 %n) {
; OUTLOOP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; OUTLOOP: vector.ph:
; OUTLOOP-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32()
-; OUTLOOP-NEXT: [[TMP3:%.*]] = mul nuw i32 [[TMP2]], 4
+; OUTLOOP-NEXT: [[TMP3:%.*]] = shl nuw i32 [[TMP2]], 2
; OUTLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N]], [[TMP3]]
; OUTLOOP-NEXT: [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]]
; OUTLOOP-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -70,7 +70,7 @@ define i32 @add_i16_i32(ptr nocapture readonly %x, i32 %n) {
; INLOOP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; INLOOP: vector.ph:
; INLOOP-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32()
-; INLOOP-NEXT: [[TMP3:%.*]] = mul nuw i32 [[TMP2]], 8
+; INLOOP-NEXT: [[TMP3:%.*]] = shl nuw i32 [[TMP2]], 3
; INLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N]], [[TMP3]]
; INLOOP-NEXT: [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]]
; INLOOP-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -199,7 +199,7 @@ define i32 @smin(ptr %a, i64 %n, i32 %start) {
; OUTLOOP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; OUTLOOP: vector.ph:
; OUTLOOP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; OUTLOOP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
+; OUTLOOP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2
; OUTLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
; OUTLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; OUTLOOP-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[START:%.*]], i64 0
@@ -245,7 +245,7 @@ define i32 @smin(ptr %a, i64 %n, i32 %start) {
; INLOOP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; INLOOP: vector.ph:
; INLOOP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; INLOOP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
+; INLOOP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2
; INLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
; INLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; INLOOP-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -301,7 +301,7 @@ define i32 @smin(ptr %a, i64 %n, i32 %start) {
; IF-EVL-OUTLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]]
; IF-EVL-OUTLOOP-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]]
; IF-EVL-OUTLOOP-NEXT: [[TMP12:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
-; IF-EVL-OUTLOOP-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; IF-EVL-OUTLOOP-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
; IF-EVL-OUTLOOP: middle.block:
; IF-EVL-OUTLOOP-NEXT: [[TMP18:%.*]] = call i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32> [[TMP15]])
; IF-EVL-OUTLOOP-NEXT: br label [[FOR_BODY:%.*]]
@@ -326,7 +326,7 @@ define i32 @smin(ptr %a, i64 %n, i32 %start) {
; IF-EVL-INLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP14]], [[EVL_BASED_IV]]
; IF-EVL-INLOOP-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP14]]
; IF-EVL-INLOOP-NEXT: [[TMP10:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
-; IF-EVL-INLOOP-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; IF-EVL-INLOOP-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
; IF-EVL-INLOOP: middle.block:
; IF-EVL-INLOOP-NEXT: br label [[FOR_BODY:%.*]]
; IF-EVL-INLOOP: for.end:
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll
index 31c8b74194062..7ff7e6deaf503 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll
@@ -141,7 +141,7 @@ define void @load_store_factor2_i64(ptr %p) {
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP16]], [[INDEX]]
; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]]
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
-; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: exit:
@@ -197,7 +197,7 @@ define void @load_store_factor2_i64(ptr %p) {
; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP16]], [[INDEX]]
; SCALABLE-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]]
; SCALABLE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
-; SCALABLE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; SCALABLE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
; SCALABLE: middle.block:
; SCALABLE-NEXT: br label [[LOOP:%.*]]
; SCALABLE: exit:
@@ -255,7 +255,7 @@ define void @load_store_factor3_i32(ptr %p) {
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP19]], [[INDEX]]
; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP19]]
; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
-; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: exit:
@@ -317,7 +317,7 @@ define void @load_store_factor3_i32(ptr %p) {
; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP19]], [[INDEX]]
; SCALABLE-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP19]]
; SCALABLE-NEXT: [[TMP15:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
-; SCALABLE-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
+; SCALABLE-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; SCALABLE: middle.block:
; SCALABLE-NEXT: br label [[LOOP:%.*]]
; SCALABLE: exit:
@@ -381,7 +381,7 @@ define void @load_store_factor3_i64(ptr %p) {
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP19]], [[INDEX]]
; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP19]]
; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
-; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: exit:
@@ -443,7 +443,7 @@ define void @load_store_factor3_i64(ptr %p) {
; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP19]], [[INDEX]]
; SCALABLE-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP19]]
; SCALABLE-NEXT: [[TMP15:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
-; SCALABLE-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
+; SCALABLE-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; SCALABLE: middle.block:
; SCALABLE-NEXT: br label [[LOOP:%.*]]
; SCALABLE: exit:
@@ -489,7 +489,7 @@ define void @load_store_factor4(ptr %p) {
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 1024, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
-; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[INDEX]], 4
+; CHECK-NEXT: [[TMP8:%.*]] = shl i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP8]]
; CHECK-NEXT: [[INTERLEAVE_EVL:%.*]] = mul nuw nsw i32 [[TMP7]], 4
; CHECK-NEXT: [[WIDE_MASKED_VEC:%.*]] = call <vscale x 8 x i64> @llvm.vp.load.nxv8i64.p0(ptr align 8 [[TMP9]], <vscale x 8 x i1> splat (i1 true), i32 [[INTERLEAVE_EVL]])
@@ -509,7 +509,7 @@ define void @load_store_factor4(ptr %p) {
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP22]], [[INDEX]]
; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP22]]
; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
-; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: exit:
@@ -522,7 +522,7 @@ define void @load_store_factor4(ptr %p) {
; FIXED-NEXT: br label [[VECTOR_BODY:%.*]]
; FIXED: vector.body:
; FIXED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; FIXED-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 4
+; FIXED-NEXT: [[TMP0:%.*]] = shl i64 [[INDEX]], 2
; FIXED-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP0]]
; FIXED-NEXT: [[WIDE_VEC:%.*]] = load <16 x i64>, ptr [[TMP1]], align 8
; FIXED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i64> [[WIDE_VEC]], <16 x i64> poison, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
@@ -555,7 +555,7 @@ define void @load_store_factor4(ptr %p) {
; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; SCALABLE-NEXT: [[AVL:%.*]] = phi i64 [ 1024, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; SCALABLE-NEXT: [[TMP7:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
-; SCALABLE-NEXT: [[TMP8:%.*]] = mul i64 [[INDEX]], 4
+; SCALABLE-NEXT: [[TMP8:%.*]] = shl i64 [[INDEX]], 2
; SCALABLE-NEXT: [[TMP9:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP8]]
; SCALABLE-NEXT: [[INTERLEAVE_EVL:%.*]] = mul nuw nsw i32 [[TMP7]], 4
; SCALABLE-NEXT: [[WIDE_MASKED_VEC:%.*]] = call <vscale x 8 x i64> @llvm.vp.load.nxv8i64.p0(ptr align 8 [[TMP9]], <vscale x 8 x i1> splat (i1 true), i32 [[INTERLEAVE_EVL]])
@@ -575,7 +575,7 @@ define void @load_store_factor4(ptr %p) {
; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP22]], [[INDEX]]
; SCALABLE-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP22]]
; SCALABLE-NEXT: [[TMP18:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
-; SCALABLE-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
+; SCALABLE-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; SCALABLE: middle.block:
; SCALABLE-NEXT: br label [[LOOP:%.*]]
; SCALABLE: exit:
@@ -649,7 +649,7 @@ define void @load_store_factor5(ptr %p) {
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP25]], [[INDEX]]
; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP25]]
; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
-; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: exit:
@@ -721,7 +721,7 @@ define void @load_store_factor5(ptr %p) {
; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP25]], [[INDEX]]
; SCALABLE-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP25]]
; SCALABLE-NEXT: [[TMP20:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
-; SCALABLE-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
+; SCALABLE-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
; SCALABLE: middle.block:
; SCALABLE-NEXT: br label [[LOOP:%.*]]
; SCALABLE: exit:
@@ -803,7 +803,7 @@ define void @load_store_factor6(ptr %p) {
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP28]], [[INDEX]]
; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP28]]
; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
-; CHECK-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: exit:
@@ -880,7 +880,7 @@ define void @load_store_factor6(ptr %p) {
; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP28]], [[INDEX]]
; SCALABLE-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP28]]
; SCALABLE-NEXT: [[TMP22:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
-; SCALABLE-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
+; SCALABLE-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
; SCALABLE: middle.block:
; SCALABLE-NEXT: br label [[LOOP:%.*]]
; SCALABLE: exit:
@@ -970,7 +970,7 @@ define void @load_store_factor7(ptr %p) {
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP31]], [[INDEX]]
; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP31]]
; CHECK-NEXT: [[TMP24:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
-; CHECK-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: exit:
@@ -1053,7 +1053,7 @@ define void @load_store_factor7(ptr %p) {
; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP31]], [[INDEX]]
; SCALABLE-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP31]]
; SCALABLE-NEXT: [[TMP24:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
-; SCALABLE-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
+; SCALABLE-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
; SCALABLE: middle.block:
; SCALABLE-NEXT: br label [[LOOP:%.*]]
; SCALABLE: exit:
@@ -1151,7 +1151,7 @@ define void @load_store_factor8(ptr %p) {
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP34]], [[INDEX]]
; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP34]]
; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
-; CHECK-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: exit:
@@ -1237,7 +1237,7 @@ define void @load_store_factor8(ptr %p) {
; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP34]], [[INDEX]]
; SCALABLE-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP34]]
; SCALABLE-NEXT: [[TMP25:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
-; SCALABLE-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
+; SCALABLE-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
; SCALABLE: middle.block:
; SCALABLE-NEXT: br label [[LOOP:%.*]]
; SCALABLE: exit:
@@ -1327,7 +1327,7 @@ define void @combine_load_factor2_i32(ptr noalias %p, ptr noalias %q) {
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP16]], [[INDEX]]
; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]]
; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
-; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: exit:
@@ -1379,7 +1379,7 @@ define void @combine_load_factor2_i32(ptr noalias %p, ptr noalias %q) {
; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP16]], [[INDEX]]
; SCALABLE-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]]
; SCALABLE-NEXT: [[TMP14:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
-; SCALABLE-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
+; SCALABLE-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
; SCALABLE: middle.block:
; SCALABLE-NEXT: br label [[LOOP:%.*]]
; SCALABLE: exit:
@@ -1434,7 +1434,7 @@ define void @combine_load_factor2_i64(ptr noalias %p, ptr noalias %q) {
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP16]], [[INDEX]]
; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]]
; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
-; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: exit:
@@ -1486,7 +1486,7 @@ define void @combine_load_factor2_i64(ptr noalias %p, ptr noalias %q) {
; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP16]], [[INDEX]]
; SCALABLE-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]]
; SCALABLE-NEXT: [[TMP14:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
-; SCALABLE-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
+; SCALABLE-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
; SCALABLE: middle.block:
; SCALABLE-NEXT: br label [[LOOP:%.*]]
; SCALABLE: exit:
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll b/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll
index f49f9284a1e93..efd9a364f2a60 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll
@@ -54,9 +54,6 @@ define void @foo4(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea
; RV32-NEXT: [[AVL:%.*]] = phi i64 [ 625, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; RV32-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
; RV32-NEXT: [[TMP8:%.*]] = zext i32 [[TMP10]] to i64
-; RV32-NEXT: [[TMP11:%.*]] = mul nuw nsw i64 16, [[TMP8]]
-; RV32-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP11]], i64 0
-; RV32-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
; RV32-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], <vscale x 2 x i64> [[VEC_IND]]
; RV32-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 2 x i32> @llvm.vp.gather.nxv2i32.nxv2p0(<vscale x 2 x ptr> align 4 [[TMP13]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP10]]), !alias.scope [[META0:![0-9]+]], !noalias [[META3:![0-9]+]]
; RV32-NEXT: [[TMP14:%.*]] = icmp slt <vscale x 2 x i32> [[WIDE_MASKED_GATHER]], splat (i32 100)
@@ -68,7 +65,7 @@ define void @foo4(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea
; RV32-NEXT: [[TMP19:%.*]] = getelementptr inbounds double, ptr [[A]], <vscale x 2 x i64> [[VEC_IND]]
; RV32-NEXT: call void @llvm.vp.scatter.nxv2f64.nxv2p0(<vscale x 2 x double> [[TMP18]], <vscale x 2 x ptr> align 8 [[TMP19]], <vscale x 2 x i1> [[TMP14]], i32 [[TMP10]]), !alias.scope [[META3]], !noalias [[META5]]
; RV32-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]]
-; RV32-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <vscale x 2 x i64> [[VEC_IND]], [[DOTSPLAT]]
+; RV32-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <vscale x 2 x i64> [[VEC_IND]], splat (i64 256)
; RV32-NEXT: [[TMP24:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
; RV32-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
; RV32: middle.block:
@@ -123,9 +120,6 @@ define void @foo4(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea
; RV64-NEXT: [[AVL:%.*]] = phi i64 [ 625, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; RV64-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
; RV64-NEXT: [[TMP8:%.*]] = zext i32 [[TMP10]] to i64
-; RV64-NEXT: [[TMP11:%.*]] = mul nuw nsw i64 16, [[TMP8]]
-; RV64-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP11]], i64 0
-; RV64-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
; RV64-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], <vscale x 2 x i64> [[VEC_IND]]
; RV64-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 2 x i32> @llvm.vp.gather.nxv2i32.nxv2p0(<vscale x 2 x ptr> align 4 [[TMP13]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP10]]), !alias.scope [[META0:![0-9]+]]
; RV64-NEXT: [[TMP14:%.*]] = icmp slt <vscale x 2 x i32> [[WIDE_MASKED_GATHER]], splat (i32 100)
@@ -137,7 +131,7 @@ define void @foo4(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea
; RV64-NEXT: [[TMP19:%.*]] = getelementptr inbounds double, ptr [[A]], <vscale x 2 x i64> [[VEC_IND]]
; RV64-NEXT: call void @llvm.vp.scatter.nxv2f64.nxv2p0(<vscale x 2 x double> [[TMP18]], <vscale x 2 x ptr> align 8 [[TMP19]], <vscale x 2 x i1> [[TMP14]], i32 [[TMP10]]), !alias.scope [[META5:![0-9]+]], !noalias [[META7:![0-9]+]]
; RV64-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]]
-; RV64-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <vscale x 2 x i64> [[VEC_IND]], [[DOTSPLAT]]
+; RV64-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <vscale x 2 x i64> [[VEC_IND]], splat (i64 256)
; RV64-NEXT: [[TMP24:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
; RV64-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
; RV64: middle.block:
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/partial-reduce-dot-product.ll b/llvm/test/Transforms/LoopVectorize/RISCV/partial-reduce-dot-product.ll
index 1ae1ba6795c01..d41f5e4d92e58 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/partial-reduce-dot-product.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/partial-reduce-dot-product.ll
@@ -19,7 +19,7 @@ define i32 @vqdot(ptr %a, ptr %b) #0 {
; V-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; V: vector.ph:
; V-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; V-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
+; V-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2
; V-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
; V-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; V-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -52,7 +52,7 @@ define i32 @vqdot(ptr %a, ptr %b) #0 {
; ZVQDOTQ-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; ZVQDOTQ: vector.ph:
; ZVQDOTQ-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; ZVQDOTQ-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
+; ZVQDOTQ-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2
; ZVQDOTQ-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
; ZVQDOTQ-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; ZVQDOTQ-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -212,7 +212,7 @@ define i32 @vqdotu(ptr %a, ptr %b) #0 {
; V-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; V: vector.ph:
; V-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; V-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
+; V-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2
; V-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
; V-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; V-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -245,7 +245,7 @@ define i32 @vqdotu(ptr %a, ptr %b) #0 {
; ZVQDOTQ-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; ZVQDOTQ: vector.ph:
; ZVQDOTQ-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; ZVQDOTQ-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
+; ZVQDOTQ-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2
; ZVQDOTQ-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
; ZVQDOTQ-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; ZVQDOTQ-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -405,7 +405,7 @@ define i32 @vqdotsu(ptr %a, ptr %b) #0 {
; V-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; V: vector.ph:
; V-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; V-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
+; V-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2
; V-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
; V-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; V-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -438,7 +438,7 @@ define i32 @vqdotsu(ptr %a, ptr %b) #0 {
; ZVQDOTQ-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; ZVQDOTQ: vector.ph:
; ZVQDOTQ-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; ZVQDOTQ-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
+; ZVQDOTQ-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2
; ZVQDOTQ-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
; ZVQDOTQ-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; ZVQDOTQ-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -597,7 +597,7 @@ define i32 @vqdotsu2(ptr %a, ptr %b) #0 {
; V-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; V: vector.ph:
; V-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; V-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
+; V-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2
; V-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
; V-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; V-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -630,7 +630,7 @@ define i32 @vqdotsu2(ptr %a, ptr %b) #0 {
; ZVQDOTQ-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; ZVQDOTQ: vector.ph:
; ZVQDOTQ-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; ZVQDOTQ-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
+; ZVQDOTQ-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2
; ZVQDOTQ-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
; ZVQDOTQ-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; ZVQDOTQ-NEXT: br label [[VECTOR_BODY:%.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-prune-vf.ll b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-prune-vf.ll
index 850a6cb7ddb0d..0523af10188c2 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-prune-vf.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-prune-vf.ll
@@ -20,19 +20,13 @@ define void @f(ptr noalias %p0, ptr noalias %p1, ptr noalias %p2) {
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i64> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_IND3:%.*]] = phi <vscale x 4 x i64> [ [[INDUCTION1]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT11:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_IND4:%.*]] = phi <vscale x 4 x i64> [ [[INDUCTION2]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT12:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_IND3:%.*]] = phi <vscale x 4 x i64> [ [[INDUCTION1]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT7:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_IND4:%.*]] = phi <vscale x 4 x i64> [ [[INDUCTION2]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT8:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 1025, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
; CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
-; CHECK-NEXT: [[TMP8:%.*]] = mul i64 4, [[TMP7]]
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP8]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP10:%.*]] = mul i64 3, [[TMP7]]
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP10]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT5]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP12:%.*]] = mul i64 2, [[TMP7]]
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT7:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP12]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT7:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP10]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT8:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT7]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP13:%.*]] = sub <vscale x 4 x i64> [[VEC_IND]], splat (i64 1)
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[P0]], <vscale x 4 x i64> [[TMP13]]
@@ -51,9 +45,9 @@ define void @f(ptr noalias %p0, ptr noalias %p1, ptr noalias %p2) {
; CHECK-NEXT: call void @llvm.vp.store.nxv12i8.p0(<vscale x 12 x i8> [[INTERLEAVED_VEC]], ptr align 1 [[TMP21]], <vscale x 12 x i1> splat (i1 true), i32 [[INTERLEAVE_EVL]])
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP7]], [[EVL_BASED_IV]]
; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP7]]
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT8]]
-; CHECK-NEXT: [[VEC_IND_NEXT11]] = add <vscale x 4 x i64> [[VEC_IND3]], [[BROADCAST_SPLAT6]]
-; CHECK-NEXT: [[VEC_IND_NEXT12]] = add <vscale x 4 x i64> [[VEC_IND4]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], splat (i64 4)
+; CHECK-NEXT: [[VEC_IND_NEXT7]] = add <vscale x 4 x i64> [[VEC_IND3]], [[BROADCAST_SPLAT8]]
+; CHECK-NEXT: [[VEC_IND_NEXT8]] = add <vscale x 4 x i64> [[VEC_IND4]], splat (i64 16)
; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
; CHECK-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
@@ -77,19 +71,13 @@ define void @f(ptr noalias %p0, ptr noalias %p1, ptr noalias %p2) {
; NO-REG-PRESSURE-CHECK: [[VECTOR_BODY]]:
; NO-REG-PRESSURE-CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; NO-REG-PRESSURE-CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 8 x i64> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; NO-REG-PRESSURE-CHECK-NEXT: [[VEC_IND3:%.*]] = phi <vscale x 8 x i64> [ [[INDUCTION1]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT11:%.*]], %[[VECTOR_BODY]] ]
-; NO-REG-PRESSURE-CHECK-NEXT: [[VEC_IND4:%.*]] = phi <vscale x 8 x i64> [ [[INDUCTION2]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT12:%.*]], %[[VECTOR_BODY]] ]
+; NO-REG-PRESSURE-CHECK-NEXT: [[VEC_IND3:%.*]] = phi <vscale x 8 x i64> [ [[INDUCTION1]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT7:%.*]], %[[VECTOR_BODY]] ]
+; NO-REG-PRESSURE-CHECK-NEXT: [[VEC_IND4:%.*]] = phi <vscale x 8 x i64> [ [[INDUCTION2]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT8:%.*]], %[[VECTOR_BODY]] ]
; NO-REG-PRESSURE-CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 1025, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; NO-REG-PRESSURE-CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 8, i1 true)
; NO-REG-PRESSURE-CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
-; NO-REG-PRESSURE-CHECK-NEXT: [[TMP8:%.*]] = mul i64 4, [[TMP7]]
-; NO-REG-PRESSURE-CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP8]], i64 0
-; NO-REG-PRESSURE-CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
; NO-REG-PRESSURE-CHECK-NEXT: [[TMP10:%.*]] = mul i64 3, [[TMP7]]
-; NO-REG-PRESSURE-CHECK-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP10]], i64 0
-; NO-REG-PRESSURE-CHECK-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT5]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
-; NO-REG-PRESSURE-CHECK-NEXT: [[TMP12:%.*]] = mul i64 2, [[TMP7]]
-; NO-REG-PRESSURE-CHECK-NEXT: [[BROADCAST_SPLATINSERT7:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP12]], i64 0
+; NO-REG-PRESSURE-CHECK-NEXT: [[BROADCAST_SPLATINSERT7:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP10]], i64 0
; NO-REG-PRESSURE-CHECK-NEXT: [[BROADCAST_SPLAT8:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT7]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
; NO-REG-PRESSURE-CHECK-NEXT: [[TMP13:%.*]] = sub <vscale x 8 x i64> [[VEC_IND]], splat (i64 1)
; NO-REG-PRESSURE-CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[P0]], <vscale x 8 x i64> [[TMP13]]
@@ -108,9 +96,9 @@ define void @f(ptr noalias %p0, ptr noalias %p1, ptr noalias %p2) {
; NO-REG-PRESSURE-CHECK-NEXT: call void @llvm.vp.store.nxv24i8.p0(<vscale x 24 x i8> [[INTERLEAVED_VEC]], ptr align 1 [[TMP21]], <vscale x 24 x i1> splat (i1 true), i32 [[INTERLEAVE_EVL]])
; NO-REG-PRESSURE-CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP7]], [[EVL_BASED_IV]]
; NO-REG-PRESSURE-CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP7]]
-; NO-REG-PRESSURE-CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 8 x i64> [[VEC_IND]], [[BROADCAST_SPLAT8]]
-; NO-REG-PRESSURE-CHECK-NEXT: [[VEC_IND_NEXT11]] = add <vscale x 8 x i64> [[VEC_IND3]], [[BROADCAST_SPLAT6]]
-; NO-REG-PRESSURE-CHECK-NEXT: [[VEC_IND_NEXT12]] = add <vscale x 8 x i64> [[VEC_IND4]], [[BROADCAST_SPLAT]]
+; NO-REG-PRESSURE-CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 8 x i64> [[VEC_IND]], splat (i64 4)
+; NO-REG-PRESSURE-CHECK-NEXT: [[VEC_IND_NEXT7]] = add <vscale x 8 x i64> [[VEC_IND3]], [[BROADCAST_SPLAT8]]
+; NO-REG-PRESSURE-CHECK-NEXT: [[VEC_IND_NEXT8]] = add <vscale x 8 x i64> [[VEC_IND4]], splat (i64 16)
; NO-REG-PRESSURE-CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
; NO-REG-PRESSURE-CHECK-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; NO-REG-PRESSURE-CHECK: [[MIDDLE_BLOCK]]:
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll
index f7340fee47eb8..e2d1aabddce09 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll
@@ -105,8 +105,8 @@ define void @vector_reverse_i32(ptr noalias %A, ptr noalias %B) {
; RV64-UF2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; RV64-UF2: [[VECTOR_PH]]:
; RV64-UF2-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
-; RV64-UF2-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4
-; RV64-UF2-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 2
+; RV64-UF2-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP4]], 2
+; RV64-UF2-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 1
; RV64-UF2-NEXT: [[N_VEC:%.*]] = urem i64 1023, [[TMP6]]
; RV64-UF2-NEXT: [[TMP7:%.*]] = sub i64 1023, [[N_VEC]]
; RV64-UF2-NEXT: [[TMP33:%.*]] = sub i64 1023, [[TMP7]]
@@ -339,8 +339,8 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur
; RV64-UF2-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
; RV64-UF2: [[VECTOR_PH]]:
; RV64-UF2-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64()
-; RV64-UF2-NEXT: [[TMP18:%.*]] = mul nuw i64 [[TMP17]], 4
-; RV64-UF2-NEXT: [[TMP19:%.*]] = mul nuw i64 [[TMP18]], 2
+; RV64-UF2-NEXT: [[TMP18:%.*]] = shl nuw i64 [[TMP17]], 2
+; RV64-UF2-NEXT: [[TMP19:%.*]] = shl nuw i64 [[TMP18]], 1
; RV64-UF2-NEXT: [[N_VEC:%.*]] = urem i64 [[TMP0]], [[TMP19]]
; RV64-UF2-NEXT: [[TMP20:%.*]] = sub i64 [[TMP0]], [[N_VEC]]
; RV64-UF2-NEXT: [[TMP48:%.*]] = sub i64 [[TMP0]], [[TMP20]]
@@ -590,8 +590,8 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur
; RV64-UF2-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
; RV64-UF2: [[VECTOR_PH]]:
; RV64-UF2-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64()
-; RV64-UF2-NEXT: [[TMP18:%.*]] = mul nuw i64 [[TMP17]], 4
-; RV64-UF2-NEXT: [[TMP19:%.*]] = mul nuw i64 [[TMP18]], 2
+; RV64-UF2-NEXT: [[TMP18:%.*]] = shl nuw i64 [[TMP17]], 2
+; RV64-UF2-NEXT: [[TMP19:%.*]] = shl nuw i64 [[TMP18]], 1
; RV64-UF2-NEXT: [[N_VEC:%.*]] = urem i64 [[TMP0]], [[TMP19]]
; RV64-UF2-NEXT: [[TMP20:%.*]] = sub i64 [[TMP0]], [[N_VEC]]
; RV64-UF2-NEXT: [[TMP48:%.*]] = sub i64 [[TMP0]], [[TMP20]]
@@ -768,8 +768,8 @@ define void @vector_reverse_f32_simplify(ptr noalias %A, ptr noalias %B) {
; RV64-UF2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; RV64-UF2: [[VECTOR_PH]]:
; RV64-UF2-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
-; RV64-UF2-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4
-; RV64-UF2-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 2
+; RV64-UF2-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP4]], 2
+; RV64-UF2-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 1
; RV64-UF2-NEXT: [[N_VEC:%.*]] = urem i64 1023, [[TMP6]]
; RV64-UF2-NEXT: [[TMP7:%.*]] = sub i64 1023, [[N_VEC]]
; RV64-UF2-NEXT: [[TMP33:%.*]] = sub i64 1023, [[TMP7]]
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll
index d156d0c501e1f..e655968d486b4 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll
@@ -21,7 +21,7 @@ define void @single_constant_stride_int_scaled(ptr %p) {
; CHECK-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP12]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP14:%.*]] = mul nuw nsw <vscale x 4 x i64> [[VEC_IND]], splat (i64 8)
+; CHECK-NEXT: [[TMP14:%.*]] = shl nuw nsw <vscale x 4 x i64> [[VEC_IND]], splat (i64 3)
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr [[P:%.*]], <vscale x 4 x i64> [[TMP14]]
; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i32> @llvm.vp.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> align 4 [[TMP15]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP11]])
; CHECK-NEXT: [[TMP16:%.*]] = add <vscale x 4 x i32> [[WIDE_MASKED_GATHER]], splat (i32 1)
@@ -43,10 +43,10 @@ define void @single_constant_stride_int_scaled(ptr %p) {
; CHECK-UF2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-UF2: vector.ph:
; CHECK-UF2-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-UF2-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
+; CHECK-UF2-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2
; CHECK-UF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP3]], i64 0
; CHECK-UF2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
-; CHECK-UF2-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 2
+; CHECK-UF2-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 1
; CHECK-UF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP4]]
; CHECK-UF2-NEXT: [[TMP5:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
; CHECK-UF2-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i64 [[TMP4]], i64 [[N_MOD_VF]]
@@ -59,8 +59,8 @@ define void @single_constant_stride_int_scaled(ptr %p) {
; CHECK-UF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-UF2-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-UF2-NEXT: [[STEP_ADD:%.*]] = add <vscale x 4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
-; CHECK-UF2-NEXT: [[TMP9:%.*]] = mul nuw nsw <vscale x 4 x i64> [[VEC_IND]], splat (i64 8)
-; CHECK-UF2-NEXT: [[TMP10:%.*]] = mul nuw nsw <vscale x 4 x i64> [[STEP_ADD]], splat (i64 8)
+; CHECK-UF2-NEXT: [[TMP9:%.*]] = shl nuw nsw <vscale x 4 x i64> [[VEC_IND]], splat (i64 3)
+; CHECK-UF2-NEXT: [[TMP10:%.*]] = shl nuw nsw <vscale x 4 x i64> [[STEP_ADD]], splat (i64 3)
; CHECK-UF2-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[P:%.*]], <vscale x 4 x i64> [[TMP9]]
; CHECK-UF2-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[P]], <vscale x 4 x i64> [[TMP10]]
; CHECK-UF2-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> align 4 [[TMP11]], <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> poison)
@@ -123,15 +123,12 @@ define void @single_constant_stride_int_iv(ptr %p) {
; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 1024, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
; CHECK-NEXT: [[TMP11:%.*]] = zext i32 [[TMP7]] to i64
-; CHECK-NEXT: [[TMP9:%.*]] = mul nuw nsw i64 64, [[TMP11]]
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP9]], i64 0
-; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[P:%.*]], <vscale x 4 x i64> [[VEC_IND]]
; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i32> @llvm.vp.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> align 4 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP7]])
; CHECK-NEXT: [[TMP13:%.*]] = add <vscale x 4 x i32> [[WIDE_MASKED_GATHER]], splat (i32 1)
; CHECK-NEXT: call void @llvm.vp.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[TMP13]], <vscale x 4 x ptr> align 4 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP7]])
; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP11]]
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <vscale x 4 x i64> [[VEC_IND]], splat (i64 4096)
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK: middle.block:
@@ -147,14 +144,14 @@ define void @single_constant_stride_int_iv(ptr %p) {
; CHECK-UF2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-UF2: vector.ph:
; CHECK-UF2-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-UF2-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
+; CHECK-UF2-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2
; CHECK-UF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP3]], i64 0
; CHECK-UF2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
-; CHECK-UF2-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 2
+; CHECK-UF2-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 1
; CHECK-UF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP4]]
; CHECK-UF2-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; CHECK-UF2-NEXT: [[TMP5:%.*]] = mul i64 [[N_VEC]], 64
-; CHECK-UF2-NEXT: [[TMP6:%.*]] = mul <vscale x 4 x i64> [[BROADCAST_SPLAT]], splat (i64 64)
+; CHECK-UF2-NEXT: [[TMP6:%.*]] = shl <vscale x 4 x i64> [[BROADCAST_SPLAT]], splat (i64 6)
; CHECK-UF2-NEXT: [[TMP7:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
; CHECK-UF2-NEXT: [[TMP8:%.*]] = mul nuw nsw <vscale x 4 x i64> [[TMP7]], splat (i64 64)
; CHECK-UF2-NEXT: [[INDUCTION:%.*]] = add nuw nsw <vscale x 4 x i64> zeroinitializer, [[TMP8]]
@@ -226,7 +223,7 @@ define void @single_constant_stride_ptr_iv(ptr %p) {
; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[P:%.*]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 1024, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP14:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
-; CHECK-NEXT: [[TMP16:%.*]] = mul <vscale x 4 x i64> [[TMP14]], splat (i64 8)
+; CHECK-NEXT: [[TMP16:%.*]] = shl <vscale x 4 x i64> [[TMP14]], splat (i64 3)
; CHECK-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <vscale x 4 x i64> [[TMP16]]
; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
; CHECK-NEXT: [[TMP19:%.*]] = call <vscale x 4 x i32> @llvm.vp.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> align 4 [[VECTOR_GEP]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP11]])
@@ -234,8 +231,7 @@ define void @single_constant_stride_ptr_iv(ptr %p) {
; CHECK-NEXT: call void @llvm.vp.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[TMP20]], <vscale x 4 x ptr> align 4 [[VECTOR_GEP]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP11]])
; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP11]] to i64
; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP9]]
-; CHECK-NEXT: [[TMP12:%.*]] = mul i64 8, [[TMP9]]
-; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP12]]
+; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 64
; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: middle.block:
@@ -251,23 +247,23 @@ define void @single_constant_stride_ptr_iv(ptr %p) {
; CHECK-UF2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-UF2: vector.ph:
; CHECK-UF2-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-UF2-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
+; CHECK-UF2-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2
; CHECK-UF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP3]], i64 0
; CHECK-UF2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
-; CHECK-UF2-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 2
+; CHECK-UF2-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 1
; CHECK-UF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP4]]
; CHECK-UF2-NEXT: [[TMP5:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
; CHECK-UF2-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i64 [[TMP4]], i64 [[N_MOD_VF]]
; CHECK-UF2-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[TMP6]]
; CHECK-UF2-NEXT: [[TMP7:%.*]] = mul i64 [[N_VEC]], 8
; CHECK-UF2-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 [[TMP7]]
-; CHECK-UF2-NEXT: [[TMP9:%.*]] = mul <vscale x 4 x i64> [[BROADCAST_SPLAT]], splat (i64 8)
+; CHECK-UF2-NEXT: [[TMP9:%.*]] = shl <vscale x 4 x i64> [[BROADCAST_SPLAT]], splat (i64 3)
; CHECK-UF2-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK-UF2: vector.body:
; CHECK-UF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-UF2-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[P]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
; CHECK-UF2-NEXT: [[TMP10:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
-; CHECK-UF2-NEXT: [[TMP11:%.*]] = mul <vscale x 4 x i64> [[TMP10]], splat (i64 8)
+; CHECK-UF2-NEXT: [[TMP11:%.*]] = shl <vscale x 4 x i64> [[TMP10]], splat (i64 3)
; CHECK-UF2-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <vscale x 4 x i64> [[TMP11]]
; CHECK-UF2-NEXT: [[TMP12:%.*]] = extractelement <vscale x 4 x ptr> [[VECTOR_GEP]], i32 0
; CHECK-UF2-NEXT: [[STEP_ADD:%.*]] = getelementptr i8, <vscale x 4 x ptr> [[VECTOR_GEP]], <vscale x 4 x i64> [[TMP9]]
@@ -283,8 +279,7 @@ define void @single_constant_stride_ptr_iv(ptr %p) {
; CHECK-UF2-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[TMP16]], <vscale x 4 x ptr> align 4 [[VECTOR_GEP]], <vscale x 4 x i1> splat (i1 true))
; CHECK-UF2-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[TMP17]], <vscale x 4 x ptr> align 4 [[STEP_ADD]], <vscale x 4 x i1> splat (i1 true))
; CHECK-UF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]]
-; CHECK-UF2-NEXT: [[TMP18:%.*]] = mul i64 8, [[TMP4]]
-; CHECK-UF2-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP18]]
+; CHECK-UF2-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 64
; CHECK-UF2-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-UF2-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; CHECK-UF2: middle.block:
@@ -375,8 +370,8 @@ define void @single_stride_int_scaled(ptr %p, i64 %stride) {
; NOSTRIDED-UF2-NEXT: br i1 [[IDENT_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; NOSTRIDED-UF2: vector.ph:
; NOSTRIDED-UF2-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; NOSTRIDED-UF2-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP2]], 4
-; NOSTRIDED-UF2-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP5]], 2
+; NOSTRIDED-UF2-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP2]], 2
+; NOSTRIDED-UF2-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP5]], 1
; NOSTRIDED-UF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
; NOSTRIDED-UF2-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; NOSTRIDED-UF2-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -497,8 +492,8 @@ define void @single_stride_int_iv(ptr %p, i64 %stride) {
; NOSTRIDED-UF2-NEXT: br i1 [[IDENT_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; NOSTRIDED-UF2: vector.ph:
; NOSTRIDED-UF2-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; NOSTRIDED-UF2-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP2]], 4
-; NOSTRIDED-UF2-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP5]], 2
+; NOSTRIDED-UF2-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP2]], 2
+; NOSTRIDED-UF2-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP5]], 1
; NOSTRIDED-UF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
; NOSTRIDED-UF2-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; NOSTRIDED-UF2-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -679,8 +674,8 @@ define void @double_stride_int_scaled(ptr %p, ptr %p2, i64 %stride) {
; NOSTRIDED-UF2-NEXT: br i1 [[DIFF_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; NOSTRIDED-UF2: vector.ph:
; NOSTRIDED-UF2-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
-; NOSTRIDED-UF2-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP6]], 4
-; NOSTRIDED-UF2-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP9]], 2
+; NOSTRIDED-UF2-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP6]], 2
+; NOSTRIDED-UF2-NEXT: [[TMP7:%.*]] = shl nuw i64 [[TMP9]], 1
; NOSTRIDED-UF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP7]]
; NOSTRIDED-UF2-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; NOSTRIDED-UF2-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -868,10 +863,10 @@ define void @double_stride_int_scaled(ptr %p, ptr %p2, i64 %stride) {
; STRIDED-UF2-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; STRIDED-UF2: vector.ph:
; STRIDED-UF2-NEXT: [[TMP28:%.*]] = call i64 @llvm.vscale.i64()
-; STRIDED-UF2-NEXT: [[TMP29:%.*]] = mul nuw i64 [[TMP28]], 4
+; STRIDED-UF2-NEXT: [[TMP29:%.*]] = shl nuw i64 [[TMP28]], 2
; STRIDED-UF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP29]], i64 0
; STRIDED-UF2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
-; STRIDED-UF2-NEXT: [[TMP30:%.*]] = mul nuw i64 [[TMP29]], 2
+; STRIDED-UF2-NEXT: [[TMP30:%.*]] = shl nuw i64 [[TMP29]], 1
; STRIDED-UF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP30]]
; STRIDED-UF2-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; STRIDED-UF2-NEXT: [[BROADCAST_SPLATINSERT10:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[STRIDE]], i64 0
@@ -991,8 +986,8 @@ define void @double_stride_int_iv(ptr %p, ptr %p2, i64 %stride) {
; NOSTRIDED-UF2-NEXT: br i1 [[IDENT_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; NOSTRIDED-UF2: vector.ph:
; NOSTRIDED-UF2-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; NOSTRIDED-UF2-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP2]], 4
-; NOSTRIDED-UF2-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP5]], 2
+; NOSTRIDED-UF2-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP2]], 2
+; NOSTRIDED-UF2-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP5]], 1
; NOSTRIDED-UF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
; NOSTRIDED-UF2-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; NOSTRIDED-UF2-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -1195,10 +1190,10 @@ define void @double_stride_ptr_iv(ptr %p, ptr %p2, i64 %stride) {
; STRIDED-UF2-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; STRIDED-UF2: vector.ph:
; STRIDED-UF2-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
-; STRIDED-UF2-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4
+; STRIDED-UF2-NEXT: [[TMP8:%.*]] = shl nuw i64 [[TMP7]], 2
; STRIDED-UF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP8]], i64 0
; STRIDED-UF2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
-; STRIDED-UF2-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 2
+; STRIDED-UF2-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP8]], 1
; STRIDED-UF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP9]]
; STRIDED-UF2-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; STRIDED-UF2-NEXT: [[BROADCAST_SPLATINSERT7:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[STRIDE]], i64 0
@@ -1326,10 +1321,10 @@ define void @constant_stride_reinterpret(ptr noalias %in, ptr noalias %out) {
; NOSTRIDED-UF2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; NOSTRIDED-UF2: vector.ph:
; NOSTRIDED-UF2-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; NOSTRIDED-UF2-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2
+; NOSTRIDED-UF2-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 1
; NOSTRIDED-UF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP3]], i64 0
; NOSTRIDED-UF2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
-; NOSTRIDED-UF2-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 2
+; NOSTRIDED-UF2-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 1
; NOSTRIDED-UF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP4]]
; NOSTRIDED-UF2-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; NOSTRIDED-UF2-NEXT: [[TMP5:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
@@ -1408,10 +1403,10 @@ define void @constant_stride_reinterpret(ptr noalias %in, ptr noalias %out) {
; STRIDED-UF2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; STRIDED-UF2: vector.ph:
; STRIDED-UF2-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; STRIDED-UF2-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2
+; STRIDED-UF2-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 1
; STRIDED-UF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP3]], i64 0
; STRIDED-UF2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
-; STRIDED-UF2-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 2
+; STRIDED-UF2-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 1
; STRIDED-UF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP4]]
; STRIDED-UF2-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; STRIDED-UF2-NEXT: [[TMP5:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-bin-unary-ops-args.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-bin-unary-ops-args.ll
index 0bcd027588f7d..9d78a7e6ff79f 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-bin-unary-ops-args.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-bin-unary-ops-args.ll
@@ -72,7 +72,7 @@ define void @test_and(ptr nocapture %a, ptr nocapture readonly %b) {
; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
; NO-VP: [[VECTOR_PH]]:
; NO-VP-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
-; NO-VP-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 16
+; NO-VP-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 4
; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 100, [[TMP6]]
; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 100, [[N_MOD_VF]]
; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]]
@@ -187,7 +187,7 @@ define void @test_or(ptr nocapture %a, ptr nocapture readonly %b) {
; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
; NO-VP: [[VECTOR_PH]]:
; NO-VP-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
-; NO-VP-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 16
+; NO-VP-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 4
; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 100, [[TMP6]]
; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 100, [[N_MOD_VF]]
; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]]
@@ -302,7 +302,7 @@ define void @test_xor(ptr nocapture %a, ptr nocapture readonly %b) {
; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
; NO-VP: [[VECTOR_PH]]:
; NO-VP-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
-; NO-VP-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 16
+; NO-VP-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 4
; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 100, [[TMP6]]
; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 100, [[N_MOD_VF]]
; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]]
@@ -417,7 +417,7 @@ define void @test_shl(ptr nocapture %a, ptr nocapture readonly %b) {
; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
; NO-VP: [[VECTOR_PH]]:
; NO-VP-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
-; NO-VP-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 16
+; NO-VP-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 4
; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 100, [[TMP6]]
; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 100, [[N_MOD_VF]]
; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]]
@@ -532,7 +532,7 @@ define void @test_lshr(ptr nocapture %a, ptr nocapture readonly %b) {
; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
; NO-VP: [[VECTOR_PH]]:
; NO-VP-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
-; NO-VP-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 16
+; NO-VP-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 4
; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 100, [[TMP6]]
; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 100, [[N_MOD_VF]]
; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]]
@@ -647,7 +647,7 @@ define void @test_ashr(ptr nocapture %a, ptr nocapture readonly %b) {
; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
; NO-VP: [[VECTOR_PH]]:
; NO-VP-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
-; NO-VP-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 16
+; NO-VP-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 4
; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 100, [[TMP6]]
; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 100, [[N_MOD_VF]]
; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]]
@@ -762,7 +762,7 @@ define void @test_add(ptr nocapture %a, ptr nocapture readonly %b) {
; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
; NO-VP: [[VECTOR_PH]]:
; NO-VP-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
-; NO-VP-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 16
+; NO-VP-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 4
; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 100, [[TMP6]]
; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 100, [[N_MOD_VF]]
; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]]
@@ -877,7 +877,7 @@ define void @test_sub(ptr nocapture %a, ptr nocapture readonly %b) {
; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
; NO-VP: [[VECTOR_PH]]:
; NO-VP-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
-; NO-VP-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 16
+; NO-VP-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 4
; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 100, [[TMP6]]
; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 100, [[N_MOD_VF]]
; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]]
@@ -992,7 +992,7 @@ define void @test_mul(ptr nocapture %a, ptr nocapture readonly %b) {
; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
; NO-VP: [[VECTOR_PH]]:
; NO-VP-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
-; NO-VP-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 16
+; NO-VP-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 4
; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 100, [[TMP6]]
; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 100, [[N_MOD_VF]]
; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]]
@@ -1107,7 +1107,7 @@ define void @test_sdiv(ptr nocapture %a, ptr nocapture readonly %b) {
; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
; NO-VP: [[VECTOR_PH]]:
; NO-VP-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
-; NO-VP-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 16
+; NO-VP-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 4
; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 100, [[TMP6]]
; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 100, [[N_MOD_VF]]
; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]]
@@ -1222,7 +1222,7 @@ define void @test_udiv(ptr nocapture %a, ptr nocapture readonly %b) {
; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
; NO-VP: [[VECTOR_PH]]:
; NO-VP-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
-; NO-VP-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 16
+; NO-VP-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 4
; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 100, [[TMP6]]
; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 100, [[N_MOD_VF]]
; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]]
@@ -1337,7 +1337,7 @@ define void @test_srem(ptr nocapture %a, ptr nocapture readonly %b) {
; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
; NO-VP: [[VECTOR_PH]]:
; NO-VP-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
-; NO-VP-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 16
+; NO-VP-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 4
; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 100, [[TMP6]]
; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 100, [[N_MOD_VF]]
; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]]
@@ -1452,7 +1452,7 @@ define void @test_urem(ptr nocapture %a, ptr nocapture readonly %b) {
; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
; NO-VP: [[VECTOR_PH]]:
; NO-VP-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
-; NO-VP-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 16
+; NO-VP-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 4
; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 100, [[TMP6]]
; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 100, [[N_MOD_VF]]
; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]]
@@ -1571,7 +1571,7 @@ define void @test_fadd(ptr nocapture %a, ptr nocapture readonly %b) {
; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
; NO-VP: [[VECTOR_PH]]:
; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
-; NO-VP-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4
+; NO-VP-NEXT: [[TMP8:%.*]] = shl nuw i64 [[TMP7]], 2
; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 100, [[TMP8]]
; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 100, [[N_MOD_VF]]
; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]]
@@ -1688,7 +1688,7 @@ define void @test_fsub(ptr nocapture %a, ptr nocapture readonly %b) {
; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
; NO-VP: [[VECTOR_PH]]:
; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
-; NO-VP-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4
+; NO-VP-NEXT: [[TMP8:%.*]] = shl nuw i64 [[TMP7]], 2
; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 100, [[TMP8]]
; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 100, [[N_MOD_VF]]
; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]]
@@ -1805,7 +1805,7 @@ define void @test_fmul(ptr nocapture %a, ptr nocapture readonly %b) {
; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
; NO-VP: [[VECTOR_PH]]:
; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
-; NO-VP-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4
+; NO-VP-NEXT: [[TMP8:%.*]] = shl nuw i64 [[TMP7]], 2
; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 100, [[TMP8]]
; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 100, [[N_MOD_VF]]
; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]]
@@ -1922,7 +1922,7 @@ define void @test_fdiv(ptr nocapture %a, ptr nocapture readonly %b) {
; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
; NO-VP: [[VECTOR_PH]]:
; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
-; NO-VP-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4
+; NO-VP-NEXT: [[TMP8:%.*]] = shl nuw i64 [[TMP7]], 2
; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 100, [[TMP8]]
; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 100, [[N_MOD_VF]]
; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]]
@@ -2092,7 +2092,7 @@ define void @test_fneg(ptr nocapture %a, ptr nocapture readonly %b) {
; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
; NO-VP: [[VECTOR_PH]]:
; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
-; NO-VP-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4
+; NO-VP-NEXT: [[TMP8:%.*]] = shl nuw i64 [[TMP7]], 2
; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 100, [[TMP8]]
; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 100, [[N_MOD_VF]]
; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-call-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-call-intrinsics.ll
index f5ff512b94123..f670d2abbcacc 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-call-intrinsics.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-call-intrinsics.ll
@@ -87,7 +87,7 @@ define void @vp_smax(ptr %a, ptr %b, ptr %c, i64 %N) {
; NO-VP-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
; NO-VP: [[VECTOR_PH]]:
; NO-VP-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
-; NO-VP-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 4
+; NO-VP-NEXT: [[TMP10:%.*]] = shl nuw i64 [[TMP9]], 2
; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP10]]
; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]]
@@ -225,7 +225,7 @@ define void @vp_smin(ptr %a, ptr %b, ptr %c, i64 %N) {
; NO-VP-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
; NO-VP: [[VECTOR_PH]]:
; NO-VP-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
-; NO-VP-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 4
+; NO-VP-NEXT: [[TMP10:%.*]] = shl nuw i64 [[TMP9]], 2
; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP10]]
; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]]
@@ -363,7 +363,7 @@ define void @vp_umax(ptr %a, ptr %b, ptr %c, i64 %N) {
; NO-VP-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
; NO-VP: [[VECTOR_PH]]:
; NO-VP-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
-; NO-VP-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 4
+; NO-VP-NEXT: [[TMP10:%.*]] = shl nuw i64 [[TMP9]], 2
; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP10]]
; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]]
@@ -501,7 +501,7 @@ define void @vp_umin(ptr %a, ptr %b, ptr %c, i64 %N) {
; NO-VP-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
; NO-VP: [[VECTOR_PH]]:
; NO-VP-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
-; NO-VP-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 4
+; NO-VP-NEXT: [[TMP10:%.*]] = shl nuw i64 [[TMP9]], 2
; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP10]]
; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]]
@@ -626,7 +626,7 @@ define void @vp_ctlz(ptr %a, ptr %b, i64 %N) {
; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
; NO-VP: [[VECTOR_PH]]:
; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
-; NO-VP-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4
+; NO-VP-NEXT: [[TMP8:%.*]] = shl nuw i64 [[TMP7]], 2
; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP8]]
; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]]
@@ -744,7 +744,7 @@ define void @vp_cttz(ptr %a, ptr %b, i64 %N) {
; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
; NO-VP: [[VECTOR_PH]]:
; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
-; NO-VP-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4
+; NO-VP-NEXT: [[TMP8:%.*]] = shl nuw i64 [[TMP7]], 2
; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP8]]
; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]]
@@ -866,7 +866,7 @@ define void @vp_lrint(ptr %a, ptr %b, i64 %N) {
; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
; NO-VP: [[VECTOR_PH]]:
; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
-; NO-VP-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4
+; NO-VP-NEXT: [[TMP8:%.*]] = shl nuw i64 [[TMP7]], 2
; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP8]]
; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]]
@@ -994,7 +994,7 @@ define void @vp_llrint(ptr %a, ptr %b, i64 %N) {
; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
; NO-VP: [[VECTOR_PH]]:
; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
-; NO-VP-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4
+; NO-VP-NEXT: [[TMP8:%.*]] = shl nuw i64 [[TMP7]], 2
; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP8]]
; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]]
@@ -1118,7 +1118,7 @@ define void @vp_abs(ptr %a, ptr %b, i64 %N) {
; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
; NO-VP: [[VECTOR_PH]]:
; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
-; NO-VP-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4
+; NO-VP-NEXT: [[TMP8:%.*]] = shl nuw i64 [[TMP7]], 2
; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP8]]
; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cast-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cast-intrinsics.ll
index 317bcde2f4670..bfacd6bd59f50 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cast-intrinsics.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cast-intrinsics.ll
@@ -73,7 +73,7 @@ define void @vp_sext(ptr %a, ptr %b, i64 %N) {
; NO-VP-NEXT: br i1 [[FOUND_CONFLICT]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
; NO-VP: [[VECTOR_PH]]:
; NO-VP-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
-; NO-VP-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 2
+; NO-VP-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 1
; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP6]]
; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]]
@@ -190,7 +190,7 @@ define void @vp_zext(ptr %a, ptr %b, i64 %N) {
; NO-VP-NEXT: br i1 [[FOUND_CONFLICT]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
; NO-VP: [[VECTOR_PH]]:
; NO-VP-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
-; NO-VP-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 2
+; NO-VP-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 1
; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP6]]
; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]]
@@ -307,7 +307,7 @@ define void @vp_trunc(ptr %a, ptr %b, i64 %N) {
; NO-VP-NEXT: br i1 [[FOUND_CONFLICT]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
; NO-VP: [[VECTOR_PH]]:
; NO-VP-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
-; NO-VP-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 2
+; NO-VP-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 1
; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP6]]
; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]]
@@ -424,7 +424,7 @@ define void @vp_fpext(ptr %a, ptr %b, i64 %N) {
; NO-VP-NEXT: br i1 [[FOUND_CONFLICT]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
; NO-VP: [[VECTOR_PH]]:
; NO-VP-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
-; NO-VP-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 2
+; NO-VP-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 1
; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP6]]
; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]]
@@ -541,7 +541,7 @@ define void @vp_fptrunc(ptr %a, ptr %b, i64 %N) {
; NO-VP-NEXT: br i1 [[FOUND_CONFLICT]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
; NO-VP: [[VECTOR_PH]]:
; NO-VP-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
-; NO-VP-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 2
+; NO-VP-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 1
; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP6]]
; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]]
@@ -658,7 +658,7 @@ define void @vp_sitofp(ptr %a, ptr %b, i64 %N) {
; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
; NO-VP: [[VECTOR_PH]]:
; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
-; NO-VP-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4
+; NO-VP-NEXT: [[TMP8:%.*]] = shl nuw i64 [[TMP7]], 2
; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP8]]
; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]]
@@ -775,7 +775,7 @@ define void @vp_uitofp(ptr %a, ptr %b, i64 %N) {
; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
; NO-VP: [[VECTOR_PH]]:
; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
-; NO-VP-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4
+; NO-VP-NEXT: [[TMP8:%.*]] = shl nuw i64 [[TMP7]], 2
; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP8]]
; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]]
@@ -892,7 +892,7 @@ define void @vp_fptosi(ptr %a, ptr %b, i64 %N) {
; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
; NO-VP: [[VECTOR_PH]]:
; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
-; NO-VP-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4
+; NO-VP-NEXT: [[TMP8:%.*]] = shl nuw i64 [[TMP7]], 2
; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP8]]
; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]]
@@ -1009,7 +1009,7 @@ define void @vp_fptoui(ptr %a, ptr %b, i64 %N) {
; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
; NO-VP: [[VECTOR_PH]]:
; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
-; NO-VP-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4
+; NO-VP-NEXT: [[TMP8:%.*]] = shl nuw i64 [[TMP7]], 2
; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP8]]
; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]]
@@ -1126,7 +1126,7 @@ define void @vp_inttoptr(ptr %a, ptr %b, i64 %N) {
; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
; NO-VP: [[VECTOR_PH]]:
; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
-; NO-VP-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 2
+; NO-VP-NEXT: [[TMP8:%.*]] = shl nuw i64 [[TMP7]], 1
; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP8]]
; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]]
@@ -1218,7 +1218,7 @@ define void @vp_ptrtoint(ptr %a, ptr %b, i64 %N) {
; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; NO-VP: [[VECTOR_PH]]:
; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2
+; NO-VP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 1
; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; NO-VP-NEXT: [[TMP6:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cond-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cond-reduction.ll
index a44acd3d9e48d..02788541d5d52 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cond-reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cond-reduction.ll
@@ -83,7 +83,7 @@ define i32 @cond_add(ptr %a, i64 %n, i32 %start) {
; NO-VP-OUTLOOP-NEXT: br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; NO-VP-OUTLOOP: vector.ph:
; NO-VP-OUTLOOP-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
-; NO-VP-OUTLOOP-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 4
+; NO-VP-OUTLOOP-NEXT: [[TMP7:%.*]] = shl nuw i64 [[TMP6]], 2
; NO-VP-OUTLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP7]]
; NO-VP-OUTLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; NO-VP-OUTLOOP-NEXT: [[TMP11:%.*]] = insertelement <vscale x 4 x i32> zeroinitializer, i32 [[START]], i32 0
@@ -131,7 +131,7 @@ define i32 @cond_add(ptr %a, i64 %n, i32 %start) {
; NO-VP-INLOOP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; NO-VP-INLOOP: vector.ph:
; NO-VP-INLOOP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; NO-VP-INLOOP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
+; NO-VP-INLOOP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2
; NO-VP-INLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
; NO-VP-INLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; NO-VP-INLOOP-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -253,7 +253,7 @@ define i32 @cond_add_pred(ptr %a, i64 %n, i32 %start) {
; NO-VP-OUTLOOP-NEXT: br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; NO-VP-OUTLOOP: vector.ph:
; NO-VP-OUTLOOP-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
-; NO-VP-OUTLOOP-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 4
+; NO-VP-OUTLOOP-NEXT: [[TMP7:%.*]] = shl nuw i64 [[TMP6]], 2
; NO-VP-OUTLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP7]]
; NO-VP-OUTLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; NO-VP-OUTLOOP-NEXT: [[TMP11:%.*]] = insertelement <vscale x 4 x i32> zeroinitializer, i32 [[START]], i32 0
@@ -305,7 +305,7 @@ define i32 @cond_add_pred(ptr %a, i64 %n, i32 %start) {
; NO-VP-INLOOP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; NO-VP-INLOOP: vector.ph:
; NO-VP-INLOOP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; NO-VP-INLOOP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
+; NO-VP-INLOOP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2
; NO-VP-INLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
; NO-VP-INLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; NO-VP-INLOOP-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -452,7 +452,7 @@ define i32 @step_cond_add(ptr %a, i64 %n, i32 %start) {
; NO-VP-OUTLOOP-NEXT: br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; NO-VP-OUTLOOP: vector.ph:
; NO-VP-OUTLOOP-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
-; NO-VP-OUTLOOP-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 4
+; NO-VP-OUTLOOP-NEXT: [[TMP7:%.*]] = shl nuw i64 [[TMP6]], 2
; NO-VP-OUTLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP7]]
; NO-VP-OUTLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; NO-VP-OUTLOOP-NEXT: [[TMP11:%.*]] = insertelement <vscale x 4 x i32> zeroinitializer, i32 [[START]], i32 0
@@ -509,7 +509,7 @@ define i32 @step_cond_add(ptr %a, i64 %n, i32 %start) {
; NO-VP-INLOOP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; NO-VP-INLOOP: vector.ph:
; NO-VP-INLOOP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; NO-VP-INLOOP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
+; NO-VP-INLOOP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2
; NO-VP-INLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
; NO-VP-INLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; NO-VP-INLOOP-NEXT: [[TMP6:%.*]] = call <vscale x 4 x i32> @llvm.stepvector.nxv4i32()
@@ -655,7 +655,7 @@ define i32 @step_cond_add_pred(ptr %a, i64 %n, i32 %start) {
; NO-VP-OUTLOOP-NEXT: br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; NO-VP-OUTLOOP: vector.ph:
; NO-VP-OUTLOOP-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
-; NO-VP-OUTLOOP-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 4
+; NO-VP-OUTLOOP-NEXT: [[TMP7:%.*]] = shl nuw i64 [[TMP6]], 2
; NO-VP-OUTLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP7]]
; NO-VP-OUTLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; NO-VP-OUTLOOP-NEXT: [[TMP11:%.*]] = insertelement <vscale x 4 x i32> zeroinitializer, i32 [[START]], i32 0
@@ -716,7 +716,7 @@ define i32 @step_cond_add_pred(ptr %a, i64 %n, i32 %start) {
; NO-VP-INLOOP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; NO-VP-INLOOP: vector.ph:
; NO-VP-INLOOP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; NO-VP-INLOOP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
+; NO-VP-INLOOP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2
; NO-VP-INLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
; NO-VP-INLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; NO-VP-INLOOP-NEXT: [[TMP6:%.*]] = call <vscale x 4 x i32> @llvm.stepvector.nxv4i32()
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-div.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-div.ll
index 8cd540c3888db..2e9795609bc00 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-div.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-div.ll
@@ -45,7 +45,7 @@ define void @test_sdiv(ptr noalias %a, ptr noalias %b, ptr noalias %c) {
; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; NO-VP: [[VECTOR_PH]]:
; NO-VP-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64()
-; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP13]], 2
+; NO-VP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP13]], 1
; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]]
@@ -141,7 +141,7 @@ define void @test_udiv(ptr noalias %a, ptr noalias %b, ptr noalias %c) {
; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; NO-VP: [[VECTOR_PH]]:
; NO-VP-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64()
-; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP13]], 2
+; NO-VP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP13]], 1
; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]]
@@ -236,7 +236,7 @@ define void @test_srem(ptr noalias %a, ptr noalias %b, ptr noalias %c) {
; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; NO-VP: [[VECTOR_PH]]:
; NO-VP-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64()
-; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP13]], 2
+; NO-VP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP13]], 1
; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]]
@@ -331,7 +331,7 @@ define void @test_urem(ptr noalias %a, ptr noalias %b, ptr noalias %c) {
; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; NO-VP: [[VECTOR_PH]]:
; NO-VP-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64()
-; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP13]], 2
+; NO-VP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP13]], 1
; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-fixed-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-fixed-order-recurrence.ll
index b95691f6e7c04..ebaf11b430307 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-fixed-order-recurrence.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-fixed-order-recurrence.ll
@@ -16,7 +16,7 @@ define void @first_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) {
; IF-EVL-NEXT: br label %[[VECTOR_PH:.*]]
; IF-EVL: [[VECTOR_PH]]:
; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
-; IF-EVL-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4
+; IF-EVL-NEXT: [[TMP8:%.*]] = shl nuw i64 [[TMP7]], 2
; IF-EVL-NEXT: [[TMP25:%.*]] = trunc i64 [[TMP8]] to i32
; IF-EVL-NEXT: [[TMP9:%.*]] = call i32 @llvm.vscale.i32()
; IF-EVL-NEXT: [[TMP10:%.*]] = mul nuw i32 [[TMP9]], 4
@@ -54,7 +54,7 @@ define void @first_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) {
; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; NO-VP: [[VECTOR_PH]]:
; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
+; NO-VP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2
; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TC]], [[TMP3]]
; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[TC]], [[N_MOD_VF]]
; NO-VP-NEXT: [[TMP6:%.*]] = call i32 @llvm.vscale.i32()
@@ -125,7 +125,7 @@ define void @second_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) {
; IF-EVL-NEXT: br label %[[VECTOR_PH:.*]]
; IF-EVL: [[VECTOR_PH]]:
; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
-; IF-EVL-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4
+; IF-EVL-NEXT: [[TMP8:%.*]] = shl nuw i64 [[TMP7]], 2
; IF-EVL-NEXT: [[TMP32:%.*]] = trunc i64 [[TMP8]] to i32
; IF-EVL-NEXT: [[TMP9:%.*]] = call i32 @llvm.vscale.i32()
; IF-EVL-NEXT: [[TMP10:%.*]] = mul nuw i32 [[TMP9]], 4
@@ -169,7 +169,7 @@ define void @second_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) {
; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; NO-VP: [[VECTOR_PH]]:
; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
+; NO-VP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2
; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TC]], [[TMP3]]
; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[TC]], [[N_MOD_VF]]
; NO-VP-NEXT: [[TMP6:%.*]] = call i32 @llvm.vscale.i32()
@@ -253,7 +253,7 @@ define void @third_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) {
; IF-EVL-NEXT: br label %[[VECTOR_PH:.*]]
; IF-EVL: [[VECTOR_PH]]:
; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
-; IF-EVL-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4
+; IF-EVL-NEXT: [[TMP8:%.*]] = shl nuw i64 [[TMP7]], 2
; IF-EVL-NEXT: [[TMP39:%.*]] = trunc i64 [[TMP8]] to i32
; IF-EVL-NEXT: [[TMP9:%.*]] = call i32 @llvm.vscale.i32()
; IF-EVL-NEXT: [[TMP10:%.*]] = mul nuw i32 [[TMP9]], 4
@@ -304,7 +304,7 @@ define void @third_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) {
; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; NO-VP: [[VECTOR_PH]]:
; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
+; NO-VP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2
; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TC]], [[TMP3]]
; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[TC]], [[N_MOD_VF]]
; NO-VP-NEXT: [[TMP6:%.*]] = call i32 @llvm.vscale.i32()
@@ -404,7 +404,7 @@ define i32 @FOR_reduction(ptr noalias %A, ptr noalias %B, i64 %TC) {
; IF-EVL-NEXT: br label %[[VECTOR_PH:.*]]
; IF-EVL: [[VECTOR_PH]]:
; IF-EVL-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; IF-EVL-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
+; IF-EVL-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2
; IF-EVL-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
; IF-EVL-NEXT: [[TMP6:%.*]] = call i32 @llvm.vscale.i32()
; IF-EVL-NEXT: [[TMP7:%.*]] = mul nuw i32 [[TMP6]], 4
@@ -454,7 +454,7 @@ define i32 @FOR_reduction(ptr noalias %A, ptr noalias %B, i64 %TC) {
; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; NO-VP: [[VECTOR_PH]]:
; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
+; NO-VP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2
; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TC]], [[TMP3]]
; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[TC]], [[N_MOD_VF]]
; NO-VP-NEXT: [[TMP6:%.*]] = call i32 @llvm.vscale.i32()
@@ -530,7 +530,7 @@ define void @first_order_recurrence_indvar(ptr noalias %A, i64 %TC) {
; IF-EVL-NEXT: br label %[[VECTOR_PH:.*]]
; IF-EVL: [[VECTOR_PH]]:
; IF-EVL-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64()
-; IF-EVL-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP18]], 2
+; IF-EVL-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP18]], 1
; IF-EVL-NEXT: [[TMP5:%.*]] = trunc i64 [[TMP4]] to i32
; IF-EVL-NEXT: [[TMP6:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
; IF-EVL-NEXT: [[TMP8:%.*]] = mul <vscale x 2 x i64> [[TMP6]], splat (i64 1)
@@ -573,7 +573,7 @@ define void @first_order_recurrence_indvar(ptr noalias %A, i64 %TC) {
; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; NO-VP: [[VECTOR_PH]]:
; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2
+; NO-VP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 1
; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TC]], [[TMP3]]
; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[TC]], [[N_MOD_VF]]
; NO-VP-NEXT: [[TMP6:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-inloop-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-inloop-reduction.ll
index cc1b2380bc532..77818580ccf50 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-inloop-reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-inloop-reduction.ll
@@ -42,7 +42,7 @@ define i32 @add(ptr %a, i64 %n, i32 %start) {
; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; NO-VP: vector.ph:
; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
+; NO-VP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2
; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -233,7 +233,7 @@ define i32 @or(ptr %a, i64 %n, i32 %start) {
; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; NO-VP: vector.ph:
; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
+; NO-VP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2
; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -317,7 +317,7 @@ define i32 @and(ptr %a, i64 %n, i32 %start) {
; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; NO-VP: vector.ph:
; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
+; NO-VP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2
; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -401,7 +401,7 @@ define i32 @xor(ptr %a, i64 %n, i32 %start) {
; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; NO-VP: vector.ph:
; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
+; NO-VP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2
; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -485,7 +485,7 @@ define i32 @smin(ptr %a, i64 %n, i32 %start) {
; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; NO-VP: vector.ph:
; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
+; NO-VP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2
; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -571,7 +571,7 @@ define i32 @smax(ptr %a, i64 %n, i32 %start) {
; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; NO-VP: vector.ph:
; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
+; NO-VP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2
; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -657,7 +657,7 @@ define i32 @umin(ptr %a, i64 %n, i32 %start) {
; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; NO-VP: vector.ph:
; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
+; NO-VP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2
; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -743,7 +743,7 @@ define i32 @umax(ptr %a, i64 %n, i32 %start) {
; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; NO-VP: vector.ph:
; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
+; NO-VP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2
; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -829,7 +829,7 @@ define float @fadd(ptr %a, i64 %n, float %start) {
; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; NO-VP: vector.ph:
; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
+; NO-VP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2
; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -1021,7 +1021,7 @@ define float @fmin(ptr %a, i64 %n, float %start) #0 {
; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; NO-VP: vector.ph:
; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
+; NO-VP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2
; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -1109,7 +1109,7 @@ define float @fmax(ptr %a, i64 %n, float %start) #0 {
; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; NO-VP: vector.ph:
; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
+; NO-VP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2
; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -1415,7 +1415,7 @@ define float @fmuladd(ptr %a, ptr %b, i64 %n, float %start) {
; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; NO-VP: vector.ph:
; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
+; NO-VP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2
; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -1509,7 +1509,7 @@ define i32 @anyof_icmp(ptr %a, i64 %n, i32 %start, i32 %inv) {
; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; NO-VP: vector.ph:
; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
+; NO-VP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2
; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -1601,7 +1601,7 @@ define i32 @anyof_fcmp(ptr %a, i64 %n, i32 %start, i32 %inv) {
; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; NO-VP: vector.ph:
; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
+; NO-VP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2
; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-interleave.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-interleave.ll
index b5662b0bd8d3b..98e353707886f 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-interleave.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-interleave.ll
@@ -44,7 +44,7 @@ define void @interleave(ptr noalias %a, ptr noalias %b, i64 %N) {
; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; NO-VP: vector.ph:
; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
+; NO-VP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2
; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -157,7 +157,7 @@ define i32 @load_factor_4_with_gap(i64 %n, ptr noalias %a) {
; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; NO-VP: vector.ph:
; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
+; NO-VP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2
; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
; NO-VP-NEXT: [[TMP4:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
; NO-VP-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i64 [[TMP3]], i64 [[N_MOD_VF]]
@@ -281,7 +281,7 @@ define void @store_factor_4_with_gap(i32 %n, ptr noalias %a) {
; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; NO-VP: vector.ph:
; NO-VP-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32()
-; NO-VP-NEXT: [[TMP9:%.*]] = mul nuw i32 [[TMP2]], 4
+; NO-VP-NEXT: [[TMP9:%.*]] = shl nuw i32 [[TMP2]], 2
; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N]], [[TMP9]]
; NO-VP-NEXT: [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]]
; NO-VP-NEXT: [[TMP7:%.*]] = call <vscale x 4 x i32> @llvm.stepvector.nxv4i32()
@@ -397,7 +397,7 @@ define i32 @load_factor_4_with_tail_gap(i64 %n, ptr noalias %a) {
; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; NO-VP: vector.ph:
; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
+; NO-VP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2
; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
; NO-VP-NEXT: [[TMP4:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
; NO-VP-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i64 [[TMP3]], i64 [[N_MOD_VF]]
@@ -522,7 +522,7 @@ define void @store_factor_4_with_tail_gap(i32 %n, ptr noalias %a) {
; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; NO-VP: vector.ph:
; NO-VP-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32()
-; NO-VP-NEXT: [[TMP9:%.*]] = mul nuw i32 [[TMP2]], 4
+; NO-VP-NEXT: [[TMP9:%.*]] = shl nuw i32 [[TMP2]], 2
; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N]], [[TMP9]]
; NO-VP-NEXT: [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]]
; NO-VP-NEXT: [[TMP7:%.*]] = call <vscale x 4 x i32> @llvm.stepvector.nxv4i32()
@@ -639,7 +639,7 @@ define i32 @load_factor_4_reverse(i64 %n, ptr noalias %a) {
; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; NO-VP: vector.ph:
; NO-VP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
-; NO-VP-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4
+; NO-VP-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP4]], 2
; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP1]], [[TMP5]]
; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP1]], [[N_MOD_VF]]
; NO-VP-NEXT: [[TMP6:%.*]] = sub i64 [[N]], [[N_VEC]]
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-intermediate-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-intermediate-store.ll
index 9e89cde3bc24a..b063514370319 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-intermediate-store.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-intermediate-store.ll
@@ -129,7 +129,7 @@ define void @reduction_intermediate_store(ptr %a, i64 %n, i32 %start, ptr %addr)
; NO-VP-OUTLOOP-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; NO-VP-OUTLOOP: vector.ph:
; NO-VP-OUTLOOP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
-; NO-VP-OUTLOOP-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4
+; NO-VP-OUTLOOP-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP4]], 2
; NO-VP-OUTLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP5]]
; NO-VP-OUTLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; NO-VP-OUTLOOP-NEXT: [[TMP8:%.*]] = insertelement <vscale x 4 x i32> zeroinitializer, i32 [[START]], i32 0
@@ -182,7 +182,7 @@ define void @reduction_intermediate_store(ptr %a, i64 %n, i32 %start, ptr %addr)
; NO-VP-INLOOP-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; NO-VP-INLOOP: vector.ph:
; NO-VP-INLOOP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
-; NO-VP-INLOOP-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4
+; NO-VP-INLOOP-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP4]], 2
; NO-VP-INLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP5]]
; NO-VP-INLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; NO-VP-INLOOP-NEXT: br label [[VECTOR_BODY:%.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-iv32.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-iv32.ll
index 1aea6aaff66a3..ec131491c4bde 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-iv32.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-iv32.ll
@@ -38,7 +38,7 @@ define void @iv32(ptr noalias %a, ptr noalias %b, i32 %N) {
; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; NO-VP: vector.ph:
; NO-VP-NEXT: [[TMP1:%.*]] = call i32 @llvm.vscale.i32()
-; NO-VP-NEXT: [[TMP11:%.*]] = mul nuw i32 [[TMP1]], 4
+; NO-VP-NEXT: [[TMP11:%.*]] = shl nuw i32 [[TMP1]], 2
; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N]], [[TMP11]]
; NO-VP-NEXT: [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]]
; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-masked-loadstore.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-masked-loadstore.ll
index e9dcf8f0f0395..c7950ee402a9d 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-masked-loadstore.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-masked-loadstore.ll
@@ -42,7 +42,7 @@ define void @masked_loadstore(ptr noalias %a, ptr noalias %b, i64 %n) {
; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; NO-VP: vector.ph:
; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
+; NO-VP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2
; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
; NO-VP-NEXT: [[INC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; NO-VP-NEXT: br label [[FOR_BODY:%.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-ordered-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-ordered-reduction.ll
index dcb7bf484f4ae..e9f4ffda9b822 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-ordered-reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-ordered-reduction.ll
@@ -41,7 +41,7 @@ define float @fadd(ptr noalias nocapture readonly %a, i64 %n) {
; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[ENTRY:%.*]]
; NO-VP: vector.ph:
; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
+; NO-VP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2
; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N1]], [[TMP3]]
; NO-VP-NEXT: [[N:%.*]] = sub i64 [[N1]], [[N_MOD_VF]]
; NO-VP-NEXT: br label [[FOR_BODY:%.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reduction.ll
index d1a2303e35e68..89d04f6423c58 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reduction.ll
@@ -42,7 +42,7 @@ define i32 @add(ptr %a, i64 %n, i32 %start) {
; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; NO-VP: vector.ph:
; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
+; NO-VP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2
; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; NO-VP-NEXT: [[TMP6:%.*]] = insertelement <vscale x 4 x i32> zeroinitializer, i32 [[START:%.*]], i32 0
@@ -236,7 +236,7 @@ define i32 @or(ptr %a, i64 %n, i32 %start) {
; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; NO-VP: vector.ph:
; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
+; NO-VP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2
; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; NO-VP-NEXT: [[TMP6:%.*]] = insertelement <vscale x 4 x i32> zeroinitializer, i32 [[START:%.*]], i32 0
@@ -323,7 +323,7 @@ define i32 @and(ptr %a, i64 %n, i32 %start) {
; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; NO-VP: vector.ph:
; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
+; NO-VP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2
; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; NO-VP-NEXT: [[TMP6:%.*]] = insertelement <vscale x 4 x i32> splat (i32 -1), i32 [[START:%.*]], i32 0
@@ -410,7 +410,7 @@ define i32 @xor(ptr %a, i64 %n, i32 %start) {
; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; NO-VP: vector.ph:
; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
+; NO-VP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2
; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; NO-VP-NEXT: [[TMP6:%.*]] = insertelement <vscale x 4 x i32> zeroinitializer, i32 [[START:%.*]], i32 0
@@ -499,7 +499,7 @@ define i32 @smin(ptr %a, i64 %n, i32 %start) {
; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; NO-VP: vector.ph:
; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
+; NO-VP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2
; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; NO-VP-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[START:%.*]], i64 0
@@ -592,7 +592,7 @@ define i32 @smax(ptr %a, i64 %n, i32 %start) {
; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; NO-VP: vector.ph:
; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
+; NO-VP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2
; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; NO-VP-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[START:%.*]], i64 0
@@ -685,7 +685,7 @@ define i32 @umin(ptr %a, i64 %n, i32 %start) {
; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; NO-VP: vector.ph:
; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
+; NO-VP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2
; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; NO-VP-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[START:%.*]], i64 0
@@ -778,7 +778,7 @@ define i32 @umax(ptr %a, i64 %n, i32 %start) {
; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; NO-VP: vector.ph:
; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
+; NO-VP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2
; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; NO-VP-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[START:%.*]], i64 0
@@ -869,7 +869,7 @@ define float @fadd(ptr %a, i64 %n, float %start) {
; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; NO-VP: vector.ph:
; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
+; NO-VP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2
; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; NO-VP-NEXT: [[TMP6:%.*]] = insertelement <vscale x 4 x float> splat (float -0.000000e+00), float [[START:%.*]], i32 0
@@ -1065,7 +1065,7 @@ define float @fmin(ptr %a, i64 %n, float %start) #0 {
; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; NO-VP: vector.ph:
; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
+; NO-VP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2
; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; NO-VP-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[START:%.*]], i64 0
@@ -1158,7 +1158,7 @@ define float @fmax(ptr %a, i64 %n, float %start) #0 {
; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; NO-VP: vector.ph:
; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
+; NO-VP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2
; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; NO-VP-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[START:%.*]], i64 0
@@ -1467,7 +1467,7 @@ define float @fmuladd(ptr %a, ptr %b, i64 %n, float %start) {
; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; NO-VP: vector.ph:
; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
+; NO-VP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2
; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; NO-VP-NEXT: [[TMP6:%.*]] = insertelement <vscale x 4 x float> splat (float -0.000000e+00), float [[START:%.*]], i32 0
@@ -1561,7 +1561,7 @@ define i32 @anyof_icmp(ptr %a, i64 %n, i32 %start, i32 %inv) {
; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; NO-VP: vector.ph:
; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
+; NO-VP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2
; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -1653,7 +1653,7 @@ define i32 @anyof_fcmp(ptr %a, i64 %n, i32 %start, i32 %inv) {
; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; NO-VP: vector.ph:
; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
+; NO-VP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2
; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reverse-load-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reverse-load-store.ll
index 13990000585ea..36e4e41bdf800 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reverse-load-store.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reverse-load-store.ll
@@ -55,7 +55,7 @@ define void @reverse_load_store(i64 %startval, ptr noalias %ptr, ptr noalias %pt
; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; NO-VP: vector.ph:
; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
+; NO-VP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2
; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; NO-VP-NEXT: [[TMP6:%.*]] = sub i64 [[STARTVAL:%.*]], [[N_VEC]]
@@ -178,7 +178,7 @@ define void @reverse_load_store_masked(i64 %startval, ptr noalias %ptr, ptr noal
; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[ENTRY:%.*]], label [[VECTOR_PH:%.*]]
; NO-VP: vector.ph:
; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
+; NO-VP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2
; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; NO-VP-NEXT: [[TMP6:%.*]] = sub i64 [[STARTVAL1:%.*]], [[N_VEC]]
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-safe-dep-distance.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-safe-dep-distance.ll
index e1c62fe2d043d..cd60817465689 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-safe-dep-distance.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-safe-dep-distance.ll
@@ -43,7 +43,7 @@ define void @test(ptr %p) {
; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[ENTRY:%.*]]
; NO-VP: vector.ph:
; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2
+; NO-VP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 1
; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 200, [[TMP3]]
; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 200, [[N_MOD_VF]]
; NO-VP-NEXT: br label [[LOOP:%.*]]
@@ -111,7 +111,7 @@ define void @test_may_clobber1(ptr %p) {
; IF-EVL-NEXT: store <4 x i64> [[WIDE_LOAD]], ptr [[TMP4]], align 32
; IF-EVL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; IF-EVL-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 200
-; IF-EVL-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; IF-EVL-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
; IF-EVL: middle.block:
; IF-EVL-NEXT: br label [[LOOP:%.*]]
; IF-EVL: exit:
@@ -221,7 +221,7 @@ define void @test_may_clobber3(ptr %p) {
; IF-EVL-NEXT: store <2 x i64> [[WIDE_LOAD]], ptr [[TMP4]], align 32
; IF-EVL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; IF-EVL-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 200
-; IF-EVL-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
+; IF-EVL-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; IF-EVL: middle.block:
; IF-EVL-NEXT: br label [[LOOP:%.*]]
; IF-EVL: exit:
@@ -285,7 +285,7 @@ define void @trivial_due_max_vscale(ptr %p) {
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP13]], [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[TMP5]], [[TMP13]]
; IF-EVL-NEXT: [[TMP12:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
-; IF-EVL-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
+; IF-EVL-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; IF-EVL: middle.block:
; IF-EVL-NEXT: br label [[LOOP:%.*]]
; IF-EVL: exit:
@@ -299,7 +299,7 @@ define void @trivial_due_max_vscale(ptr %p) {
; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[ENTRY:%.*]]
; NO-VP: vector.ph:
; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2
+; NO-VP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 1
; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 200, [[TMP3]]
; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 200, [[N_MOD_VF]]
; NO-VP-NEXT: br label [[LOOP:%.*]]
@@ -372,7 +372,7 @@ define void @no_high_lmul_or_interleave(ptr %p) {
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP11]], [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP11]]
; IF-EVL-NEXT: [[TMP12:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
-; IF-EVL-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
+; IF-EVL-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; IF-EVL: middle.block:
; IF-EVL-NEXT: br label [[LOOP:%.*]]
; IF-EVL: exit:
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/transform-narrow-interleave-to-widen-memory.ll b/llvm/test/Transforms/LoopVectorize/RISCV/transform-narrow-interleave-to-widen-memory.ll
index d4d7d398185a1..ea49a27b363db 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/transform-narrow-interleave-to-widen-memory.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/transform-narrow-interleave-to-widen-memory.ll
@@ -42,7 +42,7 @@ define void @load_store_interleave_group(ptr noalias %data) {
; EPILOGUE-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; EPILOGUE: [[VECTOR_PH]]:
; EPILOGUE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; EPILOGUE-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2
+; EPILOGUE-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 1
; EPILOGUE-NEXT: [[N_MOD_VF:%.*]] = urem i64 100, [[TMP3]]
; EPILOGUE-NEXT: [[N_VEC:%.*]] = sub i64 100, [[N_MOD_VF]]
; EPILOGUE-NEXT: br label %[[VECTOR_BODY:.*]]
@@ -141,7 +141,7 @@ define void @load_store_interleave_group_i32(ptr noalias %data) {
; EPILOGUE-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; EPILOGUE: [[VECTOR_PH]]:
; EPILOGUE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; EPILOGUE-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
+; EPILOGUE-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2
; EPILOGUE-NEXT: [[N_MOD_VF:%.*]] = urem i64 100, [[TMP3]]
; EPILOGUE-NEXT: [[N_VEC:%.*]] = sub i64 100, [[N_MOD_VF]]
; EPILOGUE-NEXT: br label %[[VECTOR_BODY:.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-evl-crash.ll b/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-evl-crash.ll
index 232c354764e1a..a1b8cbbabeece 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-evl-crash.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-evl-crash.ll
@@ -56,7 +56,7 @@ define void @truncate_i16_to_i8_cse(ptr noalias %src, ptr noalias %dst) {
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8
+; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 3
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 4294967296, [[TMP3]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 4294967296, [[N_MOD_VF]]
; CHECK-NEXT: [[TMP4:%.*]] = trunc i64 [[N_VEC]] to i32
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-vp-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-vp-intrinsics.ll
index 0a64723b6ff9d..2a63d4b22886e 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-vp-intrinsics.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-vp-intrinsics.ll
@@ -42,7 +42,7 @@ define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) {
; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; NO-VP: vector.ph:
; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
+; NO-VP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2
; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/SystemZ/scalar-steps-with-users-demanding-all-lanes-and-first-lane-only.ll b/llvm/test/Transforms/LoopVectorize/SystemZ/scalar-steps-with-users-demanding-all-lanes-and-first-lane-only.ll
index cfb180594b0ec..5cc68753b7c1d 100644
--- a/llvm/test/Transforms/LoopVectorize/SystemZ/scalar-steps-with-users-demanding-all-lanes-and-first-lane-only.ll
+++ b/llvm/test/Transforms/LoopVectorize/SystemZ/scalar-steps-with-users-demanding-all-lanes-and-first-lane-only.ll
@@ -16,14 +16,10 @@ define void @test_scalar_iv_steps_used_by_replicate_and_first_lane_only_vpinst(p
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[TMP3:%.*]] = mul nsw i64 0, 4
-; CHECK-NEXT: [[TMP4:%.*]] = mul nsw i64 1, 4
-; CHECK-NEXT: [[TMP2:%.*]] = mul nsw i64 2, 4
-; CHECK-NEXT: [[TMP15:%.*]] = mul nsw i64 3, 4
-; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[SRC_1]], i64 [[TMP3]]
-; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[SRC_1]], i64 [[TMP4]]
-; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[SRC_1]], i64 [[TMP2]]
-; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[SRC_1]], i64 [[TMP15]]
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[SRC_1]], i64 0
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[SRC_1]], i64 4
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[SRC_1]], i64 8
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[SRC_1]], i64 12
; CHECK-NEXT: [[TMP12:%.*]] = load i8, ptr [[TMP8]], align 1
; CHECK-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP5]], align 1
; CHECK-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP6]], align 1
@@ -33,8 +29,7 @@ define void @test_scalar_iv_steps_used_by_replicate_and_first_lane_only_vpinst(p
; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x i8> [[TMP13]], i8 [[TMP10]], i32 2
; CHECK-NEXT: [[TMP19:%.*]] = insertelement <4 x i8> [[TMP14]], i8 [[TMP11]], i32 3
; CHECK-NEXT: [[TMP20:%.*]] = icmp eq <4 x i8> [[TMP19]], zeroinitializer
-; CHECK-NEXT: [[TMP22:%.*]] = getelementptr [8 x i32], ptr @src, i64 0, i64 4
-; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP22]], align 4
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr getelementptr inbounds nuw (i8, ptr @src, i64 16), align 4
; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x i1> [[TMP20]], i32 0
; CHECK-NEXT: br i1 [[TMP24]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; CHECK: [[PRED_STORE_IF]]:
diff --git a/llvm/test/Transforms/LoopVectorize/X86/cost-constant-known-via-scev.ll b/llvm/test/Transforms/LoopVectorize/X86/cost-constant-known-via-scev.ll
index e0dd3768ec111..a6bbf8683c592 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/cost-constant-known-via-scev.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/cost-constant-known-via-scev.ll
@@ -17,8 +17,8 @@ define i64 @test_foldable_live_in_via_scev() {
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ splat (i64 1), %[[VECTOR_PH]] ], [ [[TMP0:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i64> [ splat (i64 1), %[[VECTOR_PH]] ], [ [[TMP1:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP0]] = mul <2 x i64> [[VEC_PHI]], splat (i64 2)
-; CHECK-NEXT: [[TMP1]] = mul <2 x i64> [[VEC_PHI1]], splat (i64 2)
+; CHECK-NEXT: [[TMP0]] = shl <2 x i64> [[VEC_PHI]], splat (i64 1)
+; CHECK-NEXT: [[TMP1]] = shl <2 x i64> [[VEC_PHI1]], splat (i64 1)
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 96
; CHECK-NEXT: br i1 [[TMP2]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
diff --git a/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll b/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll
index a1b92e0658bd3..000064727f13e 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll
@@ -195,7 +195,7 @@ define void @test_induction_step_needs_expansion(ptr noalias %j, ptr %k, i64 %l,
; CHECK-NEXT: [[DOTSPLAT15:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT14]], <8 x i16> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[TMP22:%.*]] = mul <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, [[DOTSPLAT15]]
; CHECK-NEXT: [[INDUCTION17:%.*]] = add <8 x i16> [[DOTSPLAT16]], [[TMP22]]
-; CHECK-NEXT: [[TMP15:%.*]] = mul i16 [[TMP0]], 8
+; CHECK-NEXT: [[TMP15:%.*]] = shl i16 [[TMP0]], 3
; CHECK-NEXT: [[BROADCAST_SPLATINSERT22:%.*]] = insertelement <8 x i16> poison, i16 [[TMP15]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT23:%.*]] = shufflevector <8 x i16> [[BROADCAST_SPLATINSERT22]], <8 x i16> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll b/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll
index 04bff3c393f62..009d386f52c77 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll
@@ -685,7 +685,7 @@ define void @wombat(i32 %arg, ptr %dst) #1 {
; CHECK-NEXT: [[DOTSPLAT2:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT1]], <8 x i32> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[TMP1:%.*]] = mul <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, [[DOTSPLAT2]]
; CHECK-NEXT: [[INDUCTION:%.*]] = add <8 x i32> [[DOTSPLAT]], [[TMP1]]
-; CHECK-NEXT: [[TMP2:%.*]] = mul i32 [[ARG]], 8
+; CHECK-NEXT: [[TMP2:%.*]] = shl i32 [[ARG]], 3
; CHECK-NEXT: [[DOTSPLATINSERT3:%.*]] = insertelement <8 x i32> poison, i32 [[TMP2]], i64 0
; CHECK-NEXT: [[DOTSPLAT4:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT3]], <8 x i32> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -755,7 +755,7 @@ define void @wombat2(i32 %arg, ptr %dst) #1 {
; CHECK-NEXT: [[DOTSPLAT2:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT1]], <8 x i32> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[TMP1:%.*]] = mul <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, [[DOTSPLAT2]]
; CHECK-NEXT: [[INDUCTION:%.*]] = add <8 x i32> [[DOTSPLAT]], [[TMP1]]
-; CHECK-NEXT: [[TMP2:%.*]] = mul i32 [[ARG]], 8
+; CHECK-NEXT: [[TMP2:%.*]] = shl i32 [[ARG]], 3
; CHECK-NEXT: [[DOTSPLATINSERT3:%.*]] = insertelement <8 x i32> poison, i32 [[TMP2]], i64 0
; CHECK-NEXT: [[DOTSPLAT4:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT3]], <8 x i32> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -828,7 +828,7 @@ define void @with_dead_use(i32 %arg, ptr %dst) #1 {
; CHECK-NEXT: [[DOTSPLAT2:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT1]], <8 x i32> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[TMP1:%.*]] = mul <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, [[DOTSPLAT2]]
; CHECK-NEXT: [[INDUCTION:%.*]] = add <8 x i32> [[DOTSPLAT]], [[TMP1]]
-; CHECK-NEXT: [[TMP2:%.*]] = mul i32 [[ARG]], 8
+; CHECK-NEXT: [[TMP2:%.*]] = shl i32 [[ARG]], 3
; CHECK-NEXT: [[DOTSPLATINSERT3:%.*]] = insertelement <8 x i32> poison, i32 [[TMP2]], i64 0
; CHECK-NEXT: [[DOTSPLAT4:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT3]], <8 x i32> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/X86/uniform_mem_op.ll b/llvm/test/Transforms/LoopVectorize/X86/uniform_mem_op.ll
index dbd7019188d07..2d3ab3e8b5c43 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/uniform_mem_op.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/uniform_mem_op.ll
@@ -345,10 +345,10 @@ define i32 @test_count_bits(ptr %test_base) {
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 8
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 12
-; CHECK-NEXT: [[TMP4:%.*]] = udiv i64 [[INDEX]], 8
-; CHECK-NEXT: [[TMP5:%.*]] = udiv i64 [[TMP1]], 8
-; CHECK-NEXT: [[TMP6:%.*]] = udiv i64 [[TMP2]], 8
-; CHECK-NEXT: [[TMP7:%.*]] = udiv i64 [[TMP3]], 8
+; CHECK-NEXT: [[TMP4:%.*]] = lshr i64 [[INDEX]], 3
+; CHECK-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP1]], 3
+; CHECK-NEXT: [[TMP6:%.*]] = lshr i64 [[TMP2]], 3
+; CHECK-NEXT: [[TMP7:%.*]] = lshr i64 [[TMP3]], 3
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[TEST_BASE:%.*]], i64 [[TMP4]]
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TEST_BASE]], i64 [[TMP5]]
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TEST_BASE]], i64 [[TMP6]]
diff --git a/llvm/test/Transforms/LoopVectorize/create-induction-resume.ll b/llvm/test/Transforms/LoopVectorize/create-induction-resume.ll
index f9b512700f608..c8ec3bd8bcf0e 100644
--- a/llvm/test/Transforms/LoopVectorize/create-induction-resume.ll
+++ b/llvm/test/Transforms/LoopVectorize/create-induction-resume.ll
@@ -49,7 +49,7 @@ define void @test(i32 %arg, i32 %L1.limit, i32 %L2.switch, i1 %c, ptr %dst) {
; CHECK-NEXT: [[DOTSPLAT1:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT1]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP4:%.*]] = mul <4 x i32> <i32 0, i32 1, i32 2, i32 3>, [[DOTSPLAT1]]
; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> splat (i32 1), [[TMP4]]
-; CHECK-NEXT: [[TMP5:%.*]] = mul i32 [[INDUCTION_IV]], 4
+; CHECK-NEXT: [[TMP5:%.*]] = shl i32 [[INDUCTION_IV]], 2
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP5]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll
index e80fa3edba0c7..39af9cf382c1d 100644
--- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll
+++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll
@@ -185,17 +185,17 @@ define i32 @test_chained_first_order_recurrences_4(ptr %base, i64 %x) {
; CHECK-NEXT: Successor(s): scalar.ph, vector.ph
; CHECK-EMPTY:
; CHECK-NEXT: vector.ph:
-; CHECK-NEXT: CLONE ir<%for.x.next> = mul ir<%x>, ir<2>
+; CHECK-NEXT: EMIT vp<[[SHL:%.+]]> = shl ir<%x>, ir<1>
; CHECK-NEXT: Successor(s): vector loop
; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]>
-; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%for.x> = phi ir<0>, ir<%for.x.next>
+; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%for.x> = phi ir<0>, vp<[[SHL]]>
; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%for.y> = phi ir<0>, ir<%for.x.prev>
; CHECK-NEXT: vp<[[SCALAR_STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, vp<[[VF]]>
; CHECK-NEXT: CLONE ir<%gep> = getelementptr ir<%base>, vp<[[SCALAR_STEPS]]>
-; CHECK-NEXT: EMIT vp<[[SPLICE_X:%.]]> = first-order splice ir<%for.x>, ir<%for.x.next>
+; CHECK-NEXT: EMIT vp<[[SPLICE_X:%.]]> = first-order splice ir<%for.x>, vp<[[SHL]]>
; CHECK-NEXT: WIDEN-CAST ir<%for.x.prev> = trunc vp<[[SPLICE_X]]> to i32
; CHECK-NEXT: EMIT vp<[[SPLICE_Y:%.+]]> = first-order splice ir<%for.y>, ir<%for.x.prev>
; CHECK-NEXT: WIDEN-CAST ir<%for.y.i64> = sext vp<[[SPLICE_Y]]> to i64
@@ -208,7 +208,7 @@ define i32 @test_chained_first_order_recurrences_4(ptr %base, i64 %x) {
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
-; CHECK-NEXT: EMIT vp<[[EXT_X_PART:%.+]]> = extract-last-part ir<%for.x.next>
+; CHECK-NEXT: EMIT vp<[[EXT_X_PART:%.+]]> = extract-last-part vp<[[SHL]]>
; CHECK-NEXT: EMIT vp<[[EXT_X:%.+]]> = extract-last-lane vp<[[EXT_X_PART]]>
; CHECK-NEXT: EMIT vp<[[EXT_Y_PART:%.+]]> = extract-last-part ir<%for.x.prev>
; CHECK-NEXT: EMIT vp<[[EXT_Y:%.+]]>.1 = extract-last-lane vp<[[EXT_Y_PART]]>
@@ -269,14 +269,14 @@ define i32 @test_chained_first_order_recurrences_5_hoist_to_load(ptr %base) {
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]>
-; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%for.x> = phi ir<0>, ir<%for.x.next>
+; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%for.x> = phi ir<0>, vp<%6>
; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%for.y> = phi ir<0>, ir<%for.x.prev>
; CHECK-NEXT: vp<[[SCALAR_STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, vp<[[VF]]>
; CHECK-NEXT: CLONE ir<%gep> = getelementptr ir<%base>, vp<[[SCALAR_STEPS]]>
; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%gep>
; CHECK-NEXT: WIDEN ir<%l> = load vp<[[VEC_PTR]]>
-; CHECK-NEXT: WIDEN ir<%for.x.next> = mul ir<%l>, ir<2>
-; CHECK-NEXT: EMIT vp<[[SPLICE_X:%.]]> = first-order splice ir<%for.x>, ir<%for.x.next>
+; CHECK-NEXT: EMIT vp<%6> = shl ir<%l>, ir<1>
+; CHECK-NEXT: EMIT vp<[[SPLICE_X:%.]]> = first-order splice ir<%for.x>, vp<%6>
; CHECK-NEXT: WIDEN-CAST ir<%for.x.prev> = trunc vp<[[SPLICE_X]]> to i32
; CHECK-NEXT: EMIT vp<[[SPLICE_Y:%.+]]> = first-order splice ir<%for.y>, ir<%for.x.prev>
; CHECK-NEXT: WIDEN-CAST ir<%for.y.i64> = sext vp<[[SPLICE_Y]]> to i64
@@ -289,7 +289,7 @@ define i32 @test_chained_first_order_recurrences_5_hoist_to_load(ptr %base) {
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
-; CHECK-NEXT: EMIT vp<[[EXT_X_PART:%.+]]> = extract-last-part ir<%for.x.next>
+; CHECK-NEXT: EMIT vp<[[EXT_X_PART:%.+]]> = extract-last-part vp<%6>
; CHECK-NEXT: EMIT vp<[[EXT_X:%.+]]> = extract-last-lane vp<[[EXT_X_PART]]>
; CHECK-NEXT: EMIT vp<[[EXT_Y_PART:%.+]]> = extract-last-part ir<%for.x.prev>
; CHECK-NEXT: EMIT vp<[[EXT_Y:%.+]]>.1 = extract-last-lane vp<[[EXT_Y_PART]]>
diff --git a/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads.ll b/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads.ll
index f6dd8564c001b..44e77e0a01f45 100644
--- a/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads.ll
+++ b/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads.ll
@@ -829,8 +829,8 @@ define void @hoist_multiple_complementary_loads(ptr noalias %dst, ptr noalias %s
; CHECK-NEXT: [[TMP27:%.*]] = insertelement <2 x i32> [[TMP23]], i32 [[TMP26]], i32 1
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE6]]
; CHECK: [[PRED_LOAD_CONTINUE6]]:
-; CHECK-NEXT: [[TMP18:%.*]] = phi <2 x i32> [ [[TMP23]], %[[PRED_LOAD_CONTINUE4]] ], [ [[TMP27]], %[[PRED_LOAD_IF5]] ]
-; CHECK-NEXT: [[TMP22:%.*]] = mul <2 x i32> [[TMP18]], splat (i32 2)
+; CHECK-NEXT: [[TMP41:%.*]] = phi <2 x i32> [ [[TMP23]], %[[PRED_LOAD_CONTINUE4]] ], [ [[TMP27]], %[[PRED_LOAD_IF5]] ]
+; CHECK-NEXT: [[TMP22:%.*]] = shl <2 x i32> [[TMP41]], splat (i32 1)
; CHECK-NEXT: [[TMP30:%.*]] = select <2 x i1> [[TMP7]], <2 x i1> [[TMP16]], <2 x i1> zeroinitializer
; CHECK-NEXT: [[TMP31:%.*]] = extractelement <2 x i1> [[TMP30]], i32 0
; CHECK-NEXT: br i1 [[TMP31]], label %[[PRED_LOAD_IF7:.*]], label %[[PRED_LOAD_CONTINUE8:.*]]
@@ -1089,7 +1089,7 @@ define void @hoist_all_three_loads_at_same_address(ptr %dst, ptr %src, ptr noali
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE7]]
; CHECK: [[PRED_LOAD_CONTINUE7]]:
; CHECK-NEXT: [[TMP33:%.*]] = phi <2 x i32> [ [[TMP29]], %[[PRED_LOAD_CONTINUE5]] ], [ [[TMP32]], %[[PRED_LOAD_IF6]] ]
-; CHECK-NEXT: [[TMP34:%.*]] = mul <2 x i32> [[TMP33]], splat (i32 2)
+; CHECK-NEXT: [[TMP34:%.*]] = shl <2 x i32> [[TMP33]], splat (i32 1)
; CHECK-NEXT: [[TMP35:%.*]] = extractelement <2 x i1> [[TMP11]], i32 0
; CHECK-NEXT: br i1 [[TMP35]], label %[[PRED_LOAD_IF8:.*]], label %[[PRED_LOAD_CONTINUE9:.*]]
; CHECK: [[PRED_LOAD_IF8]]:
diff --git a/llvm/test/Transforms/LoopVectorize/if-reduction.ll b/llvm/test/Transforms/LoopVectorize/if-reduction.ll
index eab9df558f608..018578b15d4b9 100644
--- a/llvm/test/Transforms/LoopVectorize/if-reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/if-reduction.ll
@@ -1727,7 +1727,7 @@ define i32 @fcmp_0_mult_select1(ptr noalias %x, i32 %N) nounwind readonly {
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[X]], i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP1]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], zeroinitializer
-; CHECK-NEXT: [[TMP4:%.*]] = mul <4 x i32> [[VEC_PHI]], splat (i32 2)
+; CHECK-NEXT: [[TMP4:%.*]] = shl <4 x i32> [[VEC_PHI]], splat (i32 1)
; CHECK-NEXT: [[TMP5]] = select <4 x i1> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> [[VEC_PHI]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
diff --git a/llvm/test/Transforms/LoopVectorize/induction-step.ll b/llvm/test/Transforms/LoopVectorize/induction-step.ll
index 5cc228f382983..76dc7a471c066 100644
--- a/llvm/test/Transforms/LoopVectorize/induction-step.ll
+++ b/llvm/test/Transforms/LoopVectorize/induction-step.ll
@@ -39,7 +39,7 @@ define void @induction_with_global(i32 %init, ptr noalias nocapture %A, i32 %N)
; CHECK-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT2]], <8 x i32> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[TMP6:%.*]] = mul nsw <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, [[DOTSPLAT3]]
; CHECK-NEXT: [[INDUCTION:%.*]] = add nsw <8 x i32> [[DOTSPLAT]], [[TMP6]]
-; CHECK-NEXT: [[TMP7:%.*]] = mul nsw i32 [[TMP0]], 8
+; CHECK-NEXT: [[TMP7:%.*]] = shl nsw i32 [[TMP0]], 3
; CHECK-NEXT: [[DOTSPLATINSERT5:%.*]] = insertelement <8 x i32> poison, i32 [[TMP7]], i64 0
; CHECK-NEXT: [[DOTSPLAT6:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT5]], <8 x i32> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
@@ -128,7 +128,7 @@ define i32 @induction_with_loop_inv(i32 %init, ptr noalias nocapture %A, i32 %N,
; CHECK-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT2]], <8 x i32> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[TMP8:%.*]] = mul nsw <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, [[DOTSPLAT3]]
; CHECK-NEXT: [[INDUCTION:%.*]] = add nsw <8 x i32> [[DOTSPLAT]], [[TMP8]]
-; CHECK-NEXT: [[TMP5:%.*]] = mul nsw i32 [[J_012]], 8
+; CHECK-NEXT: [[TMP5:%.*]] = shl nsw i32 [[J_012]], 3
; CHECK-NEXT: [[DOTSPLATINSERT5:%.*]] = insertelement <8 x i32> poison, i32 [[TMP5]], i64 0
; CHECK-NEXT: [[DOTSPLAT6:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT5]], <8 x i32> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
@@ -221,7 +221,7 @@ define void @non_primary_iv_loop_inv_trunc(ptr %a, i64 %n, i64 %step) {
; CHECK-NEXT: [[DOTSPLAT6:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT5]], <8 x i32> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = mul <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, [[DOTSPLAT6]]
; CHECK-NEXT: [[INDUCTION:%.*]] = add <8 x i32> zeroinitializer, [[TMP2]]
-; CHECK-NEXT: [[TMP5:%.*]] = mul i32 [[TMP3]], 8
+; CHECK-NEXT: [[TMP5:%.*]] = shl i32 [[TMP3]], 3
; CHECK-NEXT: [[DOTSPLATINSERT8:%.*]] = insertelement <8 x i32> poison, i32 [[TMP5]], i64 0
; CHECK-NEXT: [[DOTSPLAT9:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT8]], <8 x i32> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
@@ -329,7 +329,7 @@ define void @wide_add_induction_step_live_in(ptr %dst, i64 %N, i16 %off) {
; CHECK-NEXT: [[DOTSPLAT1:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT1]], <8 x i16> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[TMP1:%.*]] = mul <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, [[DOTSPLAT1]]
; CHECK-NEXT: [[INDUCTION:%.*]] = add <8 x i16> zeroinitializer, [[TMP1]]
-; CHECK-NEXT: [[TMP2:%.*]] = mul i16 [[O_1]], 8
+; CHECK-NEXT: [[TMP2:%.*]] = shl i16 [[O_1]], 3
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[TMP2]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i16> [[BROADCAST_SPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
@@ -399,7 +399,7 @@ define void @wide_sub_induction_step_live_in(ptr %dst, i64 %N, i16 %off) {
; CHECK-NEXT: [[DOTSPLAT1:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT1]], <8 x i16> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = mul <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, [[DOTSPLAT1]]
; CHECK-NEXT: [[INDUCTION:%.*]] = add <8 x i16> zeroinitializer, [[TMP2]]
-; CHECK-NEXT: [[TMP3:%.*]] = mul i16 [[TMP0]], 8
+; CHECK-NEXT: [[TMP3:%.*]] = shl i16 [[TMP0]], 3
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[TMP3]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i16> [[BROADCAST_SPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/induction.ll b/llvm/test/Transforms/LoopVectorize/induction.ll
index b6fb378a042fd..454a4116a6322 100644
--- a/llvm/test/Transforms/LoopVectorize/induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/induction.ll
@@ -5902,7 +5902,7 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP19:%.*]] = mul nsw <2 x i32> <i32 0, i32 1>, [[DOTSPLAT]]
; CHECK-NEXT: [[INDUCTION:%.*]] = add nsw <2 x i32> zeroinitializer, [[TMP19]]
-; CHECK-NEXT: [[TMP18:%.*]] = mul nsw i32 [[STEP]], 2
+; CHECK-NEXT: [[TMP18:%.*]] = shl nsw i32 [[STEP]], 1
; CHECK-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <2 x i32> poison, i32 [[TMP18]], i64 0
; CHECK-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT2]], <2 x i32> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll b/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll
index 8d3d0ff7a6406..84e4aebd9425a 100644
--- a/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll
+++ b/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll
@@ -478,10 +478,10 @@ define i16 @test_strided_access(i64 %len, ptr %test_base) {
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TEST_BASE:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i8>, ptr [[TMP1]], align 1
; CHECK-NEXT: [[TMP3:%.*]] = icmp sge <2 x i8> [[WIDE_LOAD]], zeroinitializer
-; CHECK-NEXT: [[TMP4:%.*]] = mul <2 x i64> [[VEC_IND]], splat (i64 2)
+; CHECK-NEXT: [[TMP4:%.*]] = shl <2 x i64> [[VEC_IND]], splat (i64 1)
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0
-; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i64> [[TMP4]], i32 1
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[ALLOCA]], i64 [[TMP5]]
+; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i64> [[TMP4]], i32 1
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, ptr [[ALLOCA]], i64 [[TMP7]]
; CHECK-NEXT: [[TMP9:%.*]] = load i16, ptr [[TMP6]], align 2
; CHECK-NEXT: [[TMP10:%.*]] = load i16, ptr [[TMP8]], align 2
@@ -549,10 +549,10 @@ define void @test_rev_loops_strided_deref_loads(ptr nocapture noundef writeonly
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP3]], align 4
; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <2 x i32> [[WIDE_LOAD]], <2 x i32> poison, <2 x i32> <i32 1, i32 0>
; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <2 x i32> [[REVERSE]], splat (i32 3)
-; CHECK-NEXT: [[TMP6:%.*]] = mul <2 x i64> [[VEC_IND]], splat (i64 2)
+; CHECK-NEXT: [[TMP6:%.*]] = shl <2 x i64> [[VEC_IND]], splat (i64 1)
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i64> [[TMP6]], i32 0
-; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i64> [[TMP6]], i32 1
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_SRC]], i64 0, i64 [[TMP7]]
+; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i64> [[TMP6]], i32 1
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_SRC]], i64 0, i64 [[TMP9]]
; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP8]], align 4
; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4
diff --git a/llvm/test/Transforms/LoopVectorize/narrow-to-single-scalar-widen-gep-scalable.ll b/llvm/test/Transforms/LoopVectorize/narrow-to-single-scalar-widen-gep-scalable.ll
index 6746e92cc1fd1..a76e337bdea59 100644
--- a/llvm/test/Transforms/LoopVectorize/narrow-to-single-scalar-widen-gep-scalable.ll
+++ b/llvm/test/Transforms/LoopVectorize/narrow-to-single-scalar-widen-gep-scalable.ll
@@ -13,7 +13,7 @@ define void @widengep_narrow(ptr %in, ptr noalias %p) {
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2
+; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 1
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]]
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[IN]], i64 8
diff --git a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll
index 1a1c05187590e..518d589c6c7cc 100644
--- a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll
+++ b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll
@@ -455,7 +455,7 @@ define void @induction_resume_value_requires_non_trivial_scev_expansion(ptr %dst
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i8> [[DOTSPLATINSERT]], <4 x i8> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = mul <4 x i8> <i8 0, i8 1, i8 2, i8 3>, [[DOTSPLAT]]
; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i8> zeroinitializer, [[TMP2]]
-; CHECK-NEXT: [[TMP3:%.*]] = mul i8 [[INDUCTION_IV]], 4
+; CHECK-NEXT: [[TMP3:%.*]] = shl i8 [[INDUCTION_IV]], 2
; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <4 x i8> poison, i8 [[TMP3]], i64 0
; CHECK-NEXT: [[DOTSPLAT2:%.*]] = shufflevector <4 x i8> [[DOTSPLATINSERT1]], <4 x i8> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
@@ -484,7 +484,7 @@ define void @induction_resume_value_requires_non_trivial_scev_expansion(ptr %dst
; CHECK-NEXT: [[DOTSPLAT11:%.*]] = shufflevector <4 x i8> [[DOTSPLATINSERT10]], <4 x i8> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP8:%.*]] = mul <4 x i8> <i8 0, i8 1, i8 2, i8 3>, [[DOTSPLAT11]]
; CHECK-NEXT: [[INDUCTION12:%.*]] = add <4 x i8> [[DOTSPLAT9]], [[TMP8]]
-; CHECK-NEXT: [[TMP9:%.*]] = mul i8 [[INDUCTION_IV]], 4
+; CHECK-NEXT: [[TMP9:%.*]] = shl i8 [[INDUCTION_IV]], 2
; CHECK-NEXT: [[DOTSPLATINSERT13:%.*]] = insertelement <4 x i8> poison, i8 [[TMP9]], i64 0
; CHECK-NEXT: [[DOTSPLAT14:%.*]] = shufflevector <4 x i8> [[DOTSPLATINSERT13]], <4 x i8> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]]
@@ -542,7 +542,7 @@ define void @induction_resume_value_requires_non_trivial_scev_expansion(ptr %dst
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i8> [[DOTSPLATINSERT]], <4 x i8> poison, <4 x i32> zeroinitializer
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP2:%.*]] = mul <4 x i8> <i8 0, i8 1, i8 2, i8 3>, [[DOTSPLAT]]
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[INDUCTION:%.*]] = add <4 x i8> zeroinitializer, [[TMP2]]
-; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP3:%.*]] = mul i8 [[INDUCTION_IV]], 4
+; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP3:%.*]] = shl i8 [[INDUCTION_IV]], 2
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <4 x i8> poison, i8 [[TMP3]], i64 0
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[DOTSPLAT2:%.*]] = shufflevector <4 x i8> [[DOTSPLATINSERT1]], <4 x i8> poison, <4 x i32> zeroinitializer
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br label %[[VECTOR_BODY:.*]]
@@ -571,7 +571,7 @@ define void @induction_resume_value_requires_non_trivial_scev_expansion(ptr %dst
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[DOTSPLAT11:%.*]] = shufflevector <2 x i8> [[DOTSPLATINSERT10]], <2 x i8> poison, <2 x i32> zeroinitializer
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP8:%.*]] = mul <2 x i8> <i8 0, i8 1>, [[DOTSPLAT11]]
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[INDUCTION12:%.*]] = add <2 x i8> [[DOTSPLAT9]], [[TMP8]]
-; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP9:%.*]] = mul i8 [[INDUCTION_IV]], 2
+; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP9:%.*]] = shl i8 [[INDUCTION_IV]], 1
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[DOTSPLATINSERT13:%.*]] = insertelement <2 x i8> poison, i8 [[TMP9]], i64 0
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[DOTSPLAT14:%.*]] = shufflevector <2 x i8> [[DOTSPLATINSERT13]], <2 x i8> poison, <2 x i32> zeroinitializer
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/outer_loop_scalable.ll b/llvm/test/Transforms/LoopVectorize/outer_loop_scalable.ll
index b224a5a86b83d..ecbf24b06e845 100644
--- a/llvm/test/Transforms/LoopVectorize/outer_loop_scalable.ll
+++ b/llvm/test/Transforms/LoopVectorize/outer_loop_scalable.ll
@@ -21,7 +21,7 @@ define void @foo() {
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
+; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
diff --git a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll
index d96134e8adf1d..995c2016339a4 100644
--- a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll
@@ -243,7 +243,7 @@ define void @non_constant_vector_expansion(i32 %0, ptr %call) {
; STRIDED-NEXT: [[TMP6:%.*]] = getelementptr ptr, ptr [[CALL:%.*]], i32 [[OFFSET_IDX]]
; STRIDED-NEXT: store <4 x ptr> [[VECTOR_GEP]], ptr [[TMP6]], align 4
; STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; STRIDED-NEXT: [[TMP3:%.*]] = mul i64 [[TMP1]], 4
+; STRIDED-NEXT: [[TMP3:%.*]] = shl i64 [[TMP1]], 2
; STRIDED-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP3]]
; STRIDED-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
; STRIDED-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
diff --git a/llvm/test/Transforms/LoopVectorize/pr30654-phiscev-sext-trunc.ll b/llvm/test/Transforms/LoopVectorize/pr30654-phiscev-sext-trunc.ll
index 623a9435edec1..81e873e7ef5fd 100644
--- a/llvm/test/Transforms/LoopVectorize/pr30654-phiscev-sext-trunc.ll
+++ b/llvm/test/Transforms/LoopVectorize/pr30654-phiscev-sext-trunc.ll
@@ -73,7 +73,7 @@ define void @doit1(i32 %n, i32 %step) local_unnamed_addr {
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP19:%.*]] = mul nsw <4 x i32> <i32 0, i32 1, i32 2, i32 3>, [[DOTSPLAT]]
; CHECK-NEXT: [[INDUCTION:%.*]] = add nsw <4 x i32> zeroinitializer, [[TMP19]]
-; CHECK-NEXT: [[TMP18:%.*]] = mul nsw i32 [[STEP]], 4
+; CHECK-NEXT: [[TMP18:%.*]] = shl nsw i32 [[STEP]], 2
; CHECK-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <4 x i32> poison, i32 [[TMP18]], i64 0
; CHECK-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT2]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -194,7 +194,7 @@ define void @doit2(i32 %n, i32 %step) local_unnamed_addr {
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP18:%.*]] = mul nsw <4 x i32> <i32 0, i32 1, i32 2, i32 3>, [[DOTSPLAT]]
; CHECK-NEXT: [[INDUCTION:%.*]] = add nsw <4 x i32> zeroinitializer, [[TMP18]]
-; CHECK-NEXT: [[TMP17:%.*]] = mul nsw i32 [[STEP]], 4
+; CHECK-NEXT: [[TMP17:%.*]] = shl nsw i32 [[STEP]], 2
; CHECK-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <4 x i32> poison, i32 [[TMP17]], i64 0
; CHECK-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT2]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -387,7 +387,7 @@ define void @doit4(i32 %n, i8 signext %cstep) local_unnamed_addr {
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP16:%.*]] = mul nsw <4 x i32> <i32 0, i32 1, i32 2, i32 3>, [[DOTSPLAT]]
; CHECK-NEXT: [[INDUCTION:%.*]] = add nsw <4 x i32> zeroinitializer, [[TMP16]]
-; CHECK-NEXT: [[TMP15:%.*]] = mul nsw i32 [[CONV]], 4
+; CHECK-NEXT: [[TMP15:%.*]] = shl nsw i32 [[CONV]], 2
; CHECK-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <4 x i32> poison, i32 [[TMP15]], i64 0
; CHECK-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT2]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/pr58811-scev-expansion.ll b/llvm/test/Transforms/LoopVectorize/pr58811-scev-expansion.ll
index 25cc5b22764e1..5896fc04431d2 100644
--- a/llvm/test/Transforms/LoopVectorize/pr58811-scev-expansion.ll
+++ b/llvm/test/Transforms/LoopVectorize/pr58811-scev-expansion.ll
@@ -29,7 +29,7 @@ define void @test1_pr58811(ptr %dst) {
; VF2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
; VF2-NEXT: [[TMP3:%.*]] = mul <2 x i32> <i32 0, i32 1>, [[BROADCAST_SPLAT]]
; VF2-NEXT: [[INDUCTION:%.*]] = add <2 x i32> zeroinitializer, [[TMP3]]
-; VF2-NEXT: [[TMP4:%.*]] = mul i32 [[INDUCTION_IV]], 2
+; VF2-NEXT: [[TMP4:%.*]] = shl i32 [[INDUCTION_IV]], 1
; VF2-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i32> poison, i32 [[TMP4]], i64 0
; VF2-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT1]], <2 x i32> poison, <2 x i32> zeroinitializer
; VF2-NEXT: br label %[[VECTOR_BODY:.*]]
@@ -92,7 +92,7 @@ define void @test1_pr58811(ptr %dst) {
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP3:%.*]] = mul <4 x i32> <i32 0, i32 1, i32 2, i32 3>, [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> zeroinitializer, [[TMP3]]
-; CHECK-NEXT: [[TMP4:%.*]] = mul i32 [[INDUCTION_IV]], 4
+; CHECK-NEXT: [[TMP4:%.*]] = shl i32 [[INDUCTION_IV]], 2
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT1]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
@@ -201,7 +201,7 @@ define void @test2_pr58811(ptr %dst) {
; VF2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
; VF2-NEXT: [[TMP3:%.*]] = mul <2 x i32> <i32 0, i32 1>, [[BROADCAST_SPLAT]]
; VF2-NEXT: [[INDUCTION:%.*]] = add <2 x i32> zeroinitializer, [[TMP3]]
-; VF2-NEXT: [[TMP4:%.*]] = mul i32 [[INDUCTION_IV_LCSSA]], 2
+; VF2-NEXT: [[TMP4:%.*]] = shl i32 [[INDUCTION_IV_LCSSA]], 1
; VF2-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i32> poison, i32 [[TMP4]], i64 0
; VF2-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT1]], <2 x i32> poison, <2 x i32> zeroinitializer
; VF2-NEXT: br label %[[VECTOR_BODY:.*]]
@@ -266,7 +266,7 @@ define void @test2_pr58811(ptr %dst) {
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP3:%.*]] = mul <4 x i32> <i32 0, i32 1, i32 2, i32 3>, [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> zeroinitializer, [[TMP3]]
-; CHECK-NEXT: [[TMP4:%.*]] = mul i32 [[INDUCTION_IV_LCSSA]], 4
+; CHECK-NEXT: [[TMP4:%.*]] = shl i32 [[INDUCTION_IV_LCSSA]], 2
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT1]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
@@ -357,7 +357,7 @@ define void @test3_pr58811(ptr %dst) {
; VF2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
; VF2-NEXT: [[TMP5:%.*]] = mul <2 x i32> <i32 0, i32 1>, [[BROADCAST_SPLAT]]
; VF2-NEXT: [[INDUCTION:%.*]] = add <2 x i32> zeroinitializer, [[TMP5]]
-; VF2-NEXT: [[TMP6:%.*]] = mul i32 [[TMP3]], 2
+; VF2-NEXT: [[TMP6:%.*]] = shl i32 [[TMP3]], 1
; VF2-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i32> poison, i32 [[TMP6]], i64 0
; VF2-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT1]], <2 x i32> poison, <2 x i32> zeroinitializer
; VF2-NEXT: br label %[[VECTOR_BODY:.*]]
@@ -419,7 +419,7 @@ define void @test3_pr58811(ptr %dst) {
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP5:%.*]] = mul <4 x i32> <i32 0, i32 1, i32 2, i32 3>, [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> zeroinitializer, [[TMP5]]
-; CHECK-NEXT: [[TMP6:%.*]] = mul i32 [[TMP3]], 4
+; CHECK-NEXT: [[TMP6:%.*]] = shl i32 [[TMP3]], 2
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT1]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/scalable-assume.ll b/llvm/test/Transforms/LoopVectorize/scalable-assume.ll
index f254b19bba173..32d1c3f38b26a 100644
--- a/llvm/test/Transforms/LoopVectorize/scalable-assume.ll
+++ b/llvm/test/Transforms/LoopVectorize/scalable-assume.ll
@@ -11,8 +11,8 @@ define void @test1(ptr noalias nocapture %a, ptr noalias nocapture readonly %b)
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP2]], 2
-; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP4]], 2
+; CHECK-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP2]], 1
+; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP4]], 1
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1600, [[TMP3]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1600, [[N_MOD_VF]]
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
@@ -82,8 +82,8 @@ define void @test2(ptr %a, ptr noalias %b) {
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP6]], 2
-; CHECK-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP3]], 2
+; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP6]], 1
+; CHECK-NEXT: [[TMP7:%.*]] = shl nuw i64 [[TMP3]], 1
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1600, [[TMP7]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1600, [[N_MOD_VF]]
; CHECK-NEXT: tail call void @llvm.assume(i1 [[MASKCOND]])
@@ -148,10 +148,10 @@ define void @predicated_assume(ptr noalias nocapture readonly %a, ptr noalias no
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 2
+; CHECK-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP4]], 1
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP5]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 2
+; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 1
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP6]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
diff --git a/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll
index 49700dd880106..c9651cdf8bdf6 100644
--- a/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll
+++ b/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll
@@ -33,7 +33,7 @@ define i32 @recurrence_1(ptr nocapture readonly %a, ptr nocapture %b, i32 %n) {
; CHECK-VF4UF1-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
; CHECK-VF4UF1: [[VECTOR_PH]]:
; CHECK-VF4UF1-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-VF4UF1-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP10]], 4
+; CHECK-VF4UF1-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP10]], 2
; CHECK-VF4UF1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], [[TMP11]]
; CHECK-VF4UF1-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
; CHECK-VF4UF1-NEXT: [[TMP14:%.*]] = call i32 @llvm.vscale.i32()
@@ -96,8 +96,8 @@ define i32 @recurrence_1(ptr nocapture readonly %a, ptr nocapture %b, i32 %n) {
; CHECK-VF4UF2-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
; CHECK-VF4UF2: [[VECTOR_PH]]:
; CHECK-VF4UF2-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-VF4UF2-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP10]], 4
-; CHECK-VF4UF2-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP12]], 2
+; CHECK-VF4UF2-NEXT: [[TMP12:%.*]] = shl nuw i64 [[TMP10]], 2
+; CHECK-VF4UF2-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP12]], 1
; CHECK-VF4UF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], [[TMP11]]
; CHECK-VF4UF2-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
; CHECK-VF4UF2-NEXT: [[TMP14:%.*]] = call i32 @llvm.vscale.i32()
@@ -189,7 +189,7 @@ define i32 @recurrence_2(ptr nocapture readonly %a, i32 %n) {
; CHECK-VF4UF1-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK-VF4UF1: [[VECTOR_PH]]:
; CHECK-VF4UF1-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-VF4UF1-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4
+; CHECK-VF4UF1-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 2
; CHECK-VF4UF1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], [[TMP4]]
; CHECK-VF4UF1-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
; CHECK-VF4UF1-NEXT: [[TMP7:%.*]] = call i32 @llvm.vscale.i32()
@@ -248,8 +248,8 @@ define i32 @recurrence_2(ptr nocapture readonly %a, i32 %n) {
; CHECK-VF4UF2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK-VF4UF2: [[VECTOR_PH]]:
; CHECK-VF4UF2-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-VF4UF2-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP3]], 4
-; CHECK-VF4UF2-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP11]], 2
+; CHECK-VF4UF2-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP3]], 2
+; CHECK-VF4UF2-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP11]], 1
; CHECK-VF4UF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], [[TMP4]]
; CHECK-VF4UF2-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
; CHECK-VF4UF2-NEXT: [[TMP7:%.*]] = call i32 @llvm.vscale.i32()
@@ -371,7 +371,7 @@ define void @recurrence_3(ptr nocapture readonly %a, ptr nocapture %b, i32 %n, f
; CHECK-VF4UF1-NEXT: br i1 [[FOUND_CONFLICT]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
; CHECK-VF4UF1: [[VECTOR_PH]]:
; CHECK-VF4UF1-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-VF4UF1-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 4
+; CHECK-VF4UF1-NEXT: [[TMP12:%.*]] = shl nuw i64 [[TMP11]], 2
; CHECK-VF4UF1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], [[TMP12]]
; CHECK-VF4UF1-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
; CHECK-VF4UF1-NEXT: [[TMP15:%.*]] = add i64 1, [[N_VEC]]
@@ -447,8 +447,8 @@ define void @recurrence_3(ptr nocapture readonly %a, ptr nocapture %b, i32 %n, f
; CHECK-VF4UF2-NEXT: br i1 [[FOUND_CONFLICT]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
; CHECK-VF4UF2: [[VECTOR_PH]]:
; CHECK-VF4UF2-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-VF4UF2-NEXT: [[TMP13:%.*]] = mul nuw i64 [[TMP11]], 4
-; CHECK-VF4UF2-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP13]], 2
+; CHECK-VF4UF2-NEXT: [[TMP13:%.*]] = shl nuw i64 [[TMP11]], 2
+; CHECK-VF4UF2-NEXT: [[TMP12:%.*]] = shl nuw i64 [[TMP13]], 1
; CHECK-VF4UF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], [[TMP12]]
; CHECK-VF4UF2-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
; CHECK-VF4UF2-NEXT: [[TMP15:%.*]] = add i64 1, [[N_VEC]]
@@ -544,7 +544,7 @@ define i64 @constant_folded_previous_value() {
; CHECK-VF4UF1-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK-VF4UF1: [[VECTOR_PH]]:
; CHECK-VF4UF1-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-VF4UF1-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
+; CHECK-VF4UF1-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2
; CHECK-VF4UF1-NEXT: [[N_MOD_VF:%.*]] = urem i64 1000, [[TMP3]]
; CHECK-VF4UF1-NEXT: [[N_VEC:%.*]] = sub i64 1000, [[N_MOD_VF]]
; CHECK-VF4UF1-NEXT: br label %[[VECTOR_BODY:.*]]
@@ -570,7 +570,7 @@ define i64 @constant_folded_previous_value() {
; CHECK-VF4UF2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK-VF4UF2: [[VECTOR_PH]]:
; CHECK-VF4UF2-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-VF4UF2-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8
+; CHECK-VF4UF2-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 3
; CHECK-VF4UF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 1000, [[TMP3]]
; CHECK-VF4UF2-NEXT: [[N_VEC:%.*]] = sub i64 1000, [[N_MOD_VF]]
; CHECK-VF4UF2-NEXT: br label %[[VECTOR_BODY:.*]]
@@ -618,7 +618,7 @@ define i32 @extract_second_last_iteration(ptr %cval, i32 %x) {
; CHECK-VF4UF1-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK-VF4UF1: [[VECTOR_PH]]:
; CHECK-VF4UF1-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32()
-; CHECK-VF4UF1-NEXT: [[TMP3:%.*]] = mul nuw i32 [[TMP2]], 4
+; CHECK-VF4UF1-NEXT: [[TMP3:%.*]] = shl nuw i32 [[TMP2]], 2
; CHECK-VF4UF1-NEXT: [[N_MOD_VF:%.*]] = urem i32 96, [[TMP3]]
; CHECK-VF4UF1-NEXT: [[N_VEC:%.*]] = sub i32 96, [[N_MOD_VF]]
; CHECK-VF4UF1-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[X]], i64 0
@@ -663,10 +663,10 @@ define i32 @extract_second_last_iteration(ptr %cval, i32 %x) {
; CHECK-VF4UF2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK-VF4UF2: [[VECTOR_PH]]:
; CHECK-VF4UF2-NEXT: [[TMP4:%.*]] = call i32 @llvm.vscale.i32()
-; CHECK-VF4UF2-NEXT: [[TMP5:%.*]] = mul nuw i32 [[TMP4]], 4
+; CHECK-VF4UF2-NEXT: [[TMP5:%.*]] = shl nuw i32 [[TMP4]], 2
; CHECK-VF4UF2-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP5]], i64 0
; CHECK-VF4UF2-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT1]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
-; CHECK-VF4UF2-NEXT: [[TMP6:%.*]] = mul nuw i32 [[TMP5]], 2
+; CHECK-VF4UF2-NEXT: [[TMP6:%.*]] = shl nuw i32 [[TMP5]], 1
; CHECK-VF4UF2-NEXT: [[N_MOD_VF:%.*]] = urem i32 96, [[TMP6]]
; CHECK-VF4UF2-NEXT: [[N_VEC:%.*]] = sub i32 96, [[N_MOD_VF]]
; CHECK-VF4UF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[X]], i64 0
@@ -745,7 +745,7 @@ define void @sink_after(ptr %a, ptr %b, i64 %n) {
; CHECK-VF4UF1-NEXT: br i1 [[FOUND_CONFLICT]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
; CHECK-VF4UF1: [[VECTOR_PH]]:
; CHECK-VF4UF1-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-VF4UF1-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 4
+; CHECK-VF4UF1-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 2
; CHECK-VF4UF1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP6]]
; CHECK-VF4UF1-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-VF4UF1-NEXT: [[TMP9:%.*]] = call i32 @llvm.vscale.i32()
@@ -802,8 +802,8 @@ define void @sink_after(ptr %a, ptr %b, i64 %n) {
; CHECK-VF4UF2-NEXT: br i1 [[FOUND_CONFLICT]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
; CHECK-VF4UF2: [[VECTOR_PH]]:
; CHECK-VF4UF2-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-VF4UF2-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP5]], 4
-; CHECK-VF4UF2-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP7]], 2
+; CHECK-VF4UF2-NEXT: [[TMP7:%.*]] = shl nuw i64 [[TMP5]], 2
+; CHECK-VF4UF2-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP7]], 1
; CHECK-VF4UF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP6]]
; CHECK-VF4UF2-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-VF4UF2-NEXT: [[TMP9:%.*]] = call i32 @llvm.vscale.i32()
diff --git a/llvm/test/Transforms/LoopVectorize/scalable-inductions.ll b/llvm/test/Transforms/LoopVectorize/scalable-inductions.ll
index 60dc0edbe3063..75528b32ec07f 100644
--- a/llvm/test/Transforms/LoopVectorize/scalable-inductions.ll
+++ b/llvm/test/Transforms/LoopVectorize/scalable-inductions.ll
@@ -179,10 +179,6 @@ define void @add_unique_ind32(ptr noalias nocapture %a, i64 %n) {
; CHECK-NEXT: [[IND_END:%.*]] = shl i32 [[DOTCAST]], 1
; CHECK-NEXT: [[TMP6:%.*]] = call <vscale x 4 x i32> @llvm.stepvector.nxv4i32()
; CHECK-NEXT: [[TMP7:%.*]] = shl nuw nsw <vscale x 4 x i32> [[TMP6]], splat (i32 1)
-; CHECK-NEXT: [[TMP8:%.*]] = trunc i64 [[TMP3]] to i32
-; CHECK-NEXT: [[TMP9:%.*]] = shl nuw nsw i32 [[TMP8]], 1
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP9]], i64 0
-; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[DOTSPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -190,7 +186,7 @@ define void @add_unique_ind32(ptr noalias nocapture %a, i64 %n) {
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
; CHECK-NEXT: store <vscale x 4 x i32> [[VEC_IND]], ptr [[TMP10]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]]
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <vscale x 4 x i32> [[VEC_IND]], [[DOTSPLAT]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <vscale x 4 x i32> [[VEC_IND]], splat (i32 4)
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
; CHECK: middle.block:
diff --git a/llvm/test/Transforms/LoopVectorize/scalable-iv-outside-user.ll b/llvm/test/Transforms/LoopVectorize/scalable-iv-outside-user.ll
index 64ec3ab2cf214..efc949ae53192 100644
--- a/llvm/test/Transforms/LoopVectorize/scalable-iv-outside-user.ll
+++ b/llvm/test/Transforms/LoopVectorize/scalable-iv-outside-user.ll
@@ -13,8 +13,8 @@ define i32 @iv_live_out_wide(ptr %dst) {
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vscale.i32()
-; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i32 [[TMP4]], 2
-; CHECK-NEXT: [[TMP6:%.*]] = mul nuw i32 [[TMP5]], 2
+; CHECK-NEXT: [[TMP5:%.*]] = shl nuw i32 [[TMP4]], 1
+; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i32 [[TMP5]], 1
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 2000, [[TMP6]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 2000, [[N_MOD_VF]]
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/scalable-lifetime.ll b/llvm/test/Transforms/LoopVectorize/scalable-lifetime.ll
index 850d6b299ca23..c1a004e67c4d9 100644
--- a/llvm/test/Transforms/LoopVectorize/scalable-lifetime.ll
+++ b/llvm/test/Transforms/LoopVectorize/scalable-lifetime.ll
@@ -17,7 +17,7 @@ define void @test(ptr %d) {
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2
+; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 1
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 128, [[TMP3]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 128, [[N_MOD_VF]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -85,7 +85,7 @@ define void @testloopvariant(ptr %d) {
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2
+; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 1
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 128, [[TMP3]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 128, [[N_MOD_VF]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/scalable-loop-unpredicated-body-scalar-tail.ll b/llvm/test/Transforms/LoopVectorize/scalable-loop-unpredicated-body-scalar-tail.ll
index db2d13af38d63..2f7a001f760b1 100644
--- a/llvm/test/Transforms/LoopVectorize/scalable-loop-unpredicated-body-scalar-tail.ll
+++ b/llvm/test/Transforms/LoopVectorize/scalable-loop-unpredicated-body-scalar-tail.ll
@@ -14,7 +14,7 @@ define void @loop(i64 %N, ptr noalias %a, ptr noalias %b) {
; CHECKUF1-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECKUF1: [[VECTOR_PH]]:
; CHECKUF1-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
-; CHECKUF1-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 4
+; CHECKUF1-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 2
; CHECKUF1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP6]]
; CHECKUF1-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECKUF1-NEXT: br label %[[VECTOR_BODY:.*]]
@@ -42,8 +42,8 @@ define void @loop(i64 %N, ptr noalias %a, ptr noalias %b) {
; CHECKUF2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECKUF2: [[VECTOR_PH]]:
; CHECKUF2-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
-; CHECKUF2-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP5]], 4
-; CHECKUF2-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP3]], 2
+; CHECKUF2-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP5]], 2
+; CHECKUF2-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP3]], 1
; CHECKUF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP6]]
; CHECKUF2-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECKUF2-NEXT: br label %[[VECTOR_BODY:.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/scalable-predication.ll b/llvm/test/Transforms/LoopVectorize/scalable-predication.ll
index b63ab8fe8e84e..f5b445641097c 100644
--- a/llvm/test/Transforms/LoopVectorize/scalable-predication.ll
+++ b/llvm/test/Transforms/LoopVectorize/scalable-predication.ll
@@ -14,7 +14,7 @@ define void @foo(i32 %val, ptr dereferenceable(1024) %ptr) {
; CHECK-NEXT: br i1 [[TMP8]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4
+; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2
; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 256, [[TMP2]]
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
diff --git a/llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll b/llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll
index bdd7bb8f3df98..a1594e20efd21 100644
--- a/llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll
+++ b/llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll
@@ -12,8 +12,8 @@ define i8 @reduction_add_trunc(ptr noalias nocapture %A) {
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32()
-; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i32 [[TMP2]], 8
-; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i32 [[TMP4]], 2
+; CHECK-NEXT: [[TMP4:%.*]] = shl nuw i32 [[TMP2]], 3
+; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i32 [[TMP4]], 1
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 256, [[TMP3]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 256, [[N_MOD_VF]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/scalable-trunc-min-bitwidth.ll b/llvm/test/Transforms/LoopVectorize/scalable-trunc-min-bitwidth.ll
index ae96fe58caa26..1acd48dbf25d2 100644
--- a/llvm/test/Transforms/LoopVectorize/scalable-trunc-min-bitwidth.ll
+++ b/llvm/test/Transforms/LoopVectorize/scalable-trunc-min-bitwidth.ll
@@ -10,7 +10,7 @@ define void @trunc_minimal_bitwidth(ptr %bptr, ptr noalias %hptr, i32 %val, i64
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
+; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[VAL:%.*]], i64 0
@@ -68,7 +68,7 @@ define void @trunc_minimal_bitwidths_shufflevector (ptr %p, i32 %arg1, i64 %len)
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
+; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[LEN]], [[TMP3]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[LEN]], [[N_MOD_VF]]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[ARG1:%.*]], i64 0
diff --git a/llvm/test/Transforms/LoopVectorize/scev-predicate-reasoning.ll b/llvm/test/Transforms/LoopVectorize/scev-predicate-reasoning.ll
index 163faa2254700..e40e8bb10a3f8 100644
--- a/llvm/test/Transforms/LoopVectorize/scev-predicate-reasoning.ll
+++ b/llvm/test/Transforms/LoopVectorize/scev-predicate-reasoning.ll
@@ -90,7 +90,7 @@ define void @integer_induction_wraps_scev_predicate_known(i32 %x, ptr %call, ptr
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr ptr, ptr [[CALL]], i32 [[OFFSET_IDX]]
; CHECK-NEXT: store <4 x ptr> [[VECTOR_GEP]], ptr [[TMP4]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP0]], 4
+; CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[TMP0]], 2
; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP5]]
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 992
; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
diff --git a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1.ll b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1.ll
index f80d7b695e2af..c64a333b990ba 100644
--- a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1.ll
+++ b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1.ll
@@ -13,7 +13,7 @@ define void @ld_div1_step1_start0_ind1(ptr noalias %A, ptr noalias %B) {
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP0:%.*]] = udiv i64 [[INDEX]], 1
+; CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP0]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP1]], align 8
; CHECK-NEXT: [[TMP2:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], splat (i64 42)
@@ -55,7 +55,7 @@ define void @ld_div2_step1_start0_ind1(ptr noalias %A, ptr noalias %B) {
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP0:%.*]] = udiv i64 [[INDEX]], 2
+; CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[INDEX]], 1
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8
; CHECK-NEXT: [[TMP3:%.*]] = add nsw i64 [[TMP2]], 42
@@ -151,12 +151,12 @@ define void @ld_div1_step2_start0_ind1(ptr noalias %A, ptr noalias %B) {
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2
-; CHECK-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 1)
+; CHECK-NEXT: [[TMP2:%.*]] = lshr <2 x i64> [[VEC_IND]], zeroinitializer
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i32 0
-; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP2]], i32 1
-; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]]
-; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]]
-; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 8
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]]
+; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP2]], i32 1
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]]
+; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP4]], align 8
; CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP6]], align 8
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> poison, i64 [[TMP7]], i32 0
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> [[TMP9]], i64 [[TMP8]], i32 1
@@ -206,7 +206,7 @@ define void @ld_div2_step2_start0_ind1(ptr noalias %A, ptr noalias %B) {
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2
-; CHECK-NEXT: [[TMP2:%.*]] = udiv i64 [[TMP0]], 2
+; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP0]], 1
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP3]], align 8
; CHECK-NEXT: [[TMP4:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], splat (i64 42)
@@ -311,12 +311,12 @@ define void @ld_div1_step3_start0_ind1(ptr noalias %A, ptr noalias %B) {
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3
-; CHECK-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 1)
+; CHECK-NEXT: [[TMP2:%.*]] = lshr <2 x i64> [[VEC_IND]], zeroinitializer
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i32 0
-; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP2]], i32 1
-; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]]
-; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]]
-; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 8
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]]
+; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP2]], i32 1
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]]
+; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP4]], align 8
; CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP6]], align 8
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> poison, i64 [[TMP7]], i32 0
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> [[TMP9]], i64 [[TMP8]], i32 1
@@ -366,12 +366,12 @@ define void @ld_div2_step3_start0_ind1(ptr noalias %A, ptr noalias %B) {
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3
-; CHECK-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 2)
+; CHECK-NEXT: [[TMP2:%.*]] = lshr <2 x i64> [[VEC_IND]], splat (i64 1)
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i32 0
-; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP2]], i32 1
-; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]]
-; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]]
-; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 8
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]]
+; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP2]], i32 1
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]]
+; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP4]], align 8
; CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP6]], align 8
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> poison, i64 [[TMP7]], i32 0
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> [[TMP9]], i64 [[TMP8]], i32 1
@@ -465,7 +465,7 @@ define void @ld_div1_step1_start1_ind1(ptr noalias %A, ptr noalias %B) {
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]]
-; CHECK-NEXT: [[TMP0:%.*]] = udiv i64 [[OFFSET_IDX]], 1
+; CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP0]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP1]], align 8
; CHECK-NEXT: [[TMP2:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], splat (i64 42)
@@ -507,12 +507,12 @@ define void @ld_div2_step1_start1_ind1(ptr noalias %A, ptr noalias %B) {
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 1, i64 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]]
-; CHECK-NEXT: [[TMP0:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 2)
+; CHECK-NEXT: [[TMP0:%.*]] = lshr <2 x i64> [[VEC_IND]], splat (i64 1)
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i64> [[TMP0]], i32 0
-; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[TMP0]], i32 1
-; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]]
-; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]]
-; CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP3]], align 8
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP0]], i32 1
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]]
+; CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8
; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x i64> poison, i64 [[TMP5]], i32 0
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i64> [[TMP7]], i64 [[TMP6]], i32 1
@@ -608,12 +608,12 @@ define void @ld_div1_step2_start1_ind1(ptr noalias %A, ptr noalias %B) {
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]]
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2
-; CHECK-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 1)
+; CHECK-NEXT: [[TMP3:%.*]] = lshr <2 x i64> [[VEC_IND]], zeroinitializer
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0
-; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1
-; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]]
-; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]]
-; CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP6]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]]
+; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP6]]
+; CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP5]], align 8
; CHECK-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP7]], align 8
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> poison, i64 [[TMP8]], i32 0
; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> [[TMP10]], i64 [[TMP9]], i32 1
@@ -663,7 +663,7 @@ define void @ld_div2_step2_start1_ind1(ptr noalias %A, ptr noalias %B) {
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]]
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2
-; CHECK-NEXT: [[TMP3:%.*]] = udiv i64 [[TMP1]], 2
+; CHECK-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP1]], 1
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP4]], align 8
; CHECK-NEXT: [[TMP5:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], splat (i64 42)
@@ -768,12 +768,12 @@ define void @ld_div1_step3_start1_ind1(ptr noalias %A, ptr noalias %B) {
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]]
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3
-; CHECK-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 1)
+; CHECK-NEXT: [[TMP3:%.*]] = lshr <2 x i64> [[VEC_IND]], zeroinitializer
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0
-; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1
-; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]]
-; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]]
-; CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP6]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]]
+; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP6]]
+; CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP5]], align 8
; CHECK-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP7]], align 8
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> poison, i64 [[TMP8]], i32 0
; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> [[TMP10]], i64 [[TMP9]], i32 1
@@ -824,12 +824,12 @@ define void @ld_div2_step3_start1_ind1(ptr noalias %A, ptr noalias %B) {
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]]
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3
-; CHECK-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 2)
+; CHECK-NEXT: [[TMP3:%.*]] = lshr <2 x i64> [[VEC_IND]], splat (i64 1)
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0
-; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1
-; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]]
-; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]]
-; CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP6]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]]
+; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP6]]
+; CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP5]], align 8
; CHECK-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP7]], align 8
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> poison, i64 [[TMP8]], i32 0
; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> [[TMP10]], i64 [[TMP9]], i32 1
diff --git a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_div_urem.ll b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_div_urem.ll
index 7ff10c544f72a..1512cd4986e36 100644
--- a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_div_urem.ll
+++ b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_div_urem.ll
@@ -15,7 +15,7 @@ define void @ld_div2_urem3_1(ptr noalias %A, ptr noalias %B) {
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <8 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP0:%.*]] = udiv <8 x i64> [[VEC_IND]], splat (i64 2)
+; CHECK-NEXT: [[TMP0:%.*]] = lshr <8 x i64> [[VEC_IND]], splat (i64 1)
; CHECK-NEXT: [[TMP1:%.*]] = urem <8 x i64> [[TMP0]], splat (i64 3)
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x i64> [[TMP1]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <8 x i64> [[TMP1]], i32 1
@@ -91,7 +91,7 @@ define void @ld_div2_urem3_2(ptr noalias %A, ptr noalias %B) {
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <8 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = add nsw <8 x i64> [[VEC_IND]], splat (i64 1)
-; CHECK-NEXT: [[TMP1:%.*]] = udiv <8 x i64> [[TMP0]], splat (i64 2)
+; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i64> [[TMP0]], splat (i64 1)
; CHECK-NEXT: [[TMP2:%.*]] = urem <8 x i64> [[TMP1]], splat (i64 3)
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <8 x i64> [[TMP2]], i32 0
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <8 x i64> [[TMP2]], i32 1
@@ -166,30 +166,30 @@ define void @ld_div4(ptr noalias %A, ptr noalias %B) {
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <8 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP0:%.*]] = udiv <8 x i64> [[VEC_IND]], splat (i64 4)
+; CHECK-NEXT: [[TMP0:%.*]] = lshr <8 x i64> [[VEC_IND]], splat (i64 2)
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <8 x i64> [[TMP0]], i32 0
-; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x i64> [[TMP0]], i32 1
-; CHECK-NEXT: [[TMP3:%.*]] = extractelement <8 x i64> [[TMP0]], i32 2
-; CHECK-NEXT: [[TMP4:%.*]] = extractelement <8 x i64> [[TMP0]], i32 3
-; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i64> [[TMP0]], i32 4
-; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i64> [[TMP0]], i32 5
-; CHECK-NEXT: [[TMP7:%.*]] = extractelement <8 x i64> [[TMP0]], i32 6
-; CHECK-NEXT: [[TMP8:%.*]] = extractelement <8 x i64> [[TMP0]], i32 7
-; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]]
-; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]]
-; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]]
-; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]]
-; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]]
-; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP6]]
-; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]]
-; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP8]]
-; CHECK-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP9]], align 8
-; CHECK-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP10]], align 8
-; CHECK-NEXT: [[TMP19:%.*]] = load i64, ptr [[TMP11]], align 8
-; CHECK-NEXT: [[TMP20:%.*]] = load i64, ptr [[TMP12]], align 8
-; CHECK-NEXT: [[TMP21:%.*]] = load i64, ptr [[TMP13]], align 8
-; CHECK-NEXT: [[TMP22:%.*]] = load i64, ptr [[TMP14]], align 8
-; CHECK-NEXT: [[TMP23:%.*]] = load i64, ptr [[TMP15]], align 8
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = extractelement <8 x i64> [[TMP0]], i32 1
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]]
+; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i64> [[TMP0]], i32 2
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]]
+; CHECK-NEXT: [[TMP7:%.*]] = extractelement <8 x i64> [[TMP0]], i32 3
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]]
+; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x i64> [[TMP0]], i32 4
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP9]]
+; CHECK-NEXT: [[TMP11:%.*]] = extractelement <8 x i64> [[TMP0]], i32 5
+; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP11]]
+; CHECK-NEXT: [[TMP13:%.*]] = extractelement <8 x i64> [[TMP0]], i32 6
+; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP13]]
+; CHECK-NEXT: [[TMP15:%.*]] = extractelement <8 x i64> [[TMP0]], i32 7
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP15]]
+; CHECK-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP2]], align 8
+; CHECK-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP4]], align 8
+; CHECK-NEXT: [[TMP19:%.*]] = load i64, ptr [[TMP6]], align 8
+; CHECK-NEXT: [[TMP20:%.*]] = load i64, ptr [[TMP8]], align 8
+; CHECK-NEXT: [[TMP21:%.*]] = load i64, ptr [[TMP10]], align 8
+; CHECK-NEXT: [[TMP22:%.*]] = load i64, ptr [[TMP12]], align 8
+; CHECK-NEXT: [[TMP23:%.*]] = load i64, ptr [[TMP14]], align 8
; CHECK-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP16]], align 8
; CHECK-NEXT: [[TMP25:%.*]] = insertelement <8 x i64> poison, i64 [[TMP17]], i32 0
; CHECK-NEXT: [[TMP26:%.*]] = insertelement <8 x i64> [[TMP25]], i64 [[TMP18]], i32 1
@@ -239,7 +239,7 @@ define void @ld_div8_urem3(ptr noalias %A, ptr noalias %B) {
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP0:%.*]] = udiv i64 [[INDEX]], 8
+; CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[INDEX]], 3
; CHECK-NEXT: [[TMP1:%.*]] = urem i64 [[TMP0]], 3
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP2]], align 8
@@ -315,30 +315,30 @@ define void @ld_div2_ld_scevunknown_nonuniform(ptr %src.a, ptr noalias %src.b, p
; CHECK-NEXT: [[TMP29:%.*]] = insertelement <8 x i64> [[TMP28]], i64 [[TMP21]], i32 5
; CHECK-NEXT: [[TMP30:%.*]] = insertelement <8 x i64> [[TMP29]], i64 [[TMP22]], i32 6
; CHECK-NEXT: [[TMP31:%.*]] = insertelement <8 x i64> [[TMP30]], i64 [[TMP23]], i32 7
-; CHECK-NEXT: [[TMP32:%.*]] = udiv <8 x i64> [[TMP31]], splat (i64 2)
+; CHECK-NEXT: [[TMP32:%.*]] = lshr <8 x i64> [[TMP31]], splat (i64 1)
; CHECK-NEXT: [[TMP33:%.*]] = extractelement <8 x i64> [[TMP32]], i32 0
-; CHECK-NEXT: [[TMP34:%.*]] = extractelement <8 x i64> [[TMP32]], i32 1
-; CHECK-NEXT: [[TMP35:%.*]] = extractelement <8 x i64> [[TMP32]], i32 2
-; CHECK-NEXT: [[TMP36:%.*]] = extractelement <8 x i64> [[TMP32]], i32 3
-; CHECK-NEXT: [[TMP37:%.*]] = extractelement <8 x i64> [[TMP32]], i32 4
-; CHECK-NEXT: [[TMP38:%.*]] = extractelement <8 x i64> [[TMP32]], i32 5
-; CHECK-NEXT: [[TMP39:%.*]] = extractelement <8 x i64> [[TMP32]], i32 6
-; CHECK-NEXT: [[TMP40:%.*]] = extractelement <8 x i64> [[TMP32]], i32 7
-; CHECK-NEXT: [[TMP41:%.*]] = getelementptr i32, ptr [[SRC_B]], i64 [[TMP33]]
-; CHECK-NEXT: [[TMP42:%.*]] = getelementptr i32, ptr [[SRC_B]], i64 [[TMP34]]
-; CHECK-NEXT: [[TMP43:%.*]] = getelementptr i32, ptr [[SRC_B]], i64 [[TMP35]]
-; CHECK-NEXT: [[TMP44:%.*]] = getelementptr i32, ptr [[SRC_B]], i64 [[TMP36]]
-; CHECK-NEXT: [[TMP45:%.*]] = getelementptr i32, ptr [[SRC_B]], i64 [[TMP37]]
-; CHECK-NEXT: [[TMP46:%.*]] = getelementptr i32, ptr [[SRC_B]], i64 [[TMP38]]
-; CHECK-NEXT: [[TMP47:%.*]] = getelementptr i32, ptr [[SRC_B]], i64 [[TMP39]]
-; CHECK-NEXT: [[TMP48:%.*]] = getelementptr i32, ptr [[SRC_B]], i64 [[TMP40]]
-; CHECK-NEXT: [[TMP49:%.*]] = load i32, ptr [[TMP41]], align 4
-; CHECK-NEXT: [[TMP50:%.*]] = load i32, ptr [[TMP42]], align 4
-; CHECK-NEXT: [[TMP51:%.*]] = load i32, ptr [[TMP43]], align 4
-; CHECK-NEXT: [[TMP52:%.*]] = load i32, ptr [[TMP44]], align 4
-; CHECK-NEXT: [[TMP53:%.*]] = load i32, ptr [[TMP45]], align 4
-; CHECK-NEXT: [[TMP54:%.*]] = load i32, ptr [[TMP46]], align 4
-; CHECK-NEXT: [[TMP55:%.*]] = load i32, ptr [[TMP47]], align 4
+; CHECK-NEXT: [[TMP34:%.*]] = getelementptr i32, ptr [[SRC_B]], i64 [[TMP33]]
+; CHECK-NEXT: [[TMP35:%.*]] = extractelement <8 x i64> [[TMP32]], i32 1
+; CHECK-NEXT: [[TMP36:%.*]] = getelementptr i32, ptr [[SRC_B]], i64 [[TMP35]]
+; CHECK-NEXT: [[TMP37:%.*]] = extractelement <8 x i64> [[TMP32]], i32 2
+; CHECK-NEXT: [[TMP38:%.*]] = getelementptr i32, ptr [[SRC_B]], i64 [[TMP37]]
+; CHECK-NEXT: [[TMP39:%.*]] = extractelement <8 x i64> [[TMP32]], i32 3
+; CHECK-NEXT: [[TMP40:%.*]] = getelementptr i32, ptr [[SRC_B]], i64 [[TMP39]]
+; CHECK-NEXT: [[TMP41:%.*]] = extractelement <8 x i64> [[TMP32]], i32 4
+; CHECK-NEXT: [[TMP42:%.*]] = getelementptr i32, ptr [[SRC_B]], i64 [[TMP41]]
+; CHECK-NEXT: [[TMP43:%.*]] = extractelement <8 x i64> [[TMP32]], i32 5
+; CHECK-NEXT: [[TMP44:%.*]] = getelementptr i32, ptr [[SRC_B]], i64 [[TMP43]]
+; CHECK-NEXT: [[TMP45:%.*]] = extractelement <8 x i64> [[TMP32]], i32 6
+; CHECK-NEXT: [[TMP46:%.*]] = getelementptr i32, ptr [[SRC_B]], i64 [[TMP45]]
+; CHECK-NEXT: [[TMP47:%.*]] = extractelement <8 x i64> [[TMP32]], i32 7
+; CHECK-NEXT: [[TMP48:%.*]] = getelementptr i32, ptr [[SRC_B]], i64 [[TMP47]]
+; CHECK-NEXT: [[TMP49:%.*]] = load i32, ptr [[TMP34]], align 4
+; CHECK-NEXT: [[TMP50:%.*]] = load i32, ptr [[TMP36]], align 4
+; CHECK-NEXT: [[TMP51:%.*]] = load i32, ptr [[TMP38]], align 4
+; CHECK-NEXT: [[TMP52:%.*]] = load i32, ptr [[TMP40]], align 4
+; CHECK-NEXT: [[TMP53:%.*]] = load i32, ptr [[TMP42]], align 4
+; CHECK-NEXT: [[TMP54:%.*]] = load i32, ptr [[TMP44]], align 4
+; CHECK-NEXT: [[TMP55:%.*]] = load i32, ptr [[TMP46]], align 4
; CHECK-NEXT: [[TMP56:%.*]] = load i32, ptr [[TMP48]], align 4
; CHECK-NEXT: [[TMP57:%.*]] = insertelement <8 x i32> poison, i32 [[TMP49]], i32 0
; CHECK-NEXT: [[TMP58:%.*]] = insertelement <8 x i32> [[TMP57]], i32 [[TMP50]], i32 1
diff --git a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction2.ll b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction2.ll
index fb962c017156d..6ce34ad58c753 100644
--- a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction2.ll
+++ b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction2.ll
@@ -14,8 +14,8 @@ define void @ld_div1_step1_start0_ind2(ptr noalias %A, ptr noalias %B) {
; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; VF2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; VF2-NEXT: [[VEC_IND1:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ]
-; VF2-NEXT: [[TMP0:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 1)
-; VF2-NEXT: [[TMP1:%.*]] = udiv <2 x i64> [[VEC_IND1]], splat (i64 1)
+; VF2-NEXT: [[TMP0:%.*]] = lshr <2 x i64> [[VEC_IND]], zeroinitializer
+; VF2-NEXT: [[TMP1:%.*]] = lshr <2 x i64> [[VEC_IND1]], zeroinitializer
; VF2-NEXT: [[TMP2:%.*]] = add <2 x i64> [[TMP0]], [[TMP1]]
; VF2-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i32 0
; VF2-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP2]], i32 1
@@ -48,8 +48,8 @@ define void @ld_div1_step1_start0_ind2(ptr noalias %A, ptr noalias %B) {
; VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; VF4-NEXT: [[VEC_IND1:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ]
-; VF4-NEXT: [[TMP0:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 1)
-; VF4-NEXT: [[TMP1:%.*]] = udiv <4 x i64> [[VEC_IND1]], splat (i64 1)
+; VF4-NEXT: [[TMP0:%.*]] = lshr <4 x i64> [[VEC_IND]], zeroinitializer
+; VF4-NEXT: [[TMP1:%.*]] = lshr <4 x i64> [[VEC_IND1]], zeroinitializer
; VF4-NEXT: [[TMP2:%.*]] = add <4 x i64> [[TMP0]], [[TMP1]]
; VF4-NEXT: [[TMP3:%.*]] = extractelement <4 x i64> [[TMP2]], i32 0
; VF4-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP2]], i32 1
@@ -112,7 +112,7 @@ define void @ld_div2_step1_start0_ind2(ptr noalias %A, ptr noalias %B) {
; VF2-NEXT: br label [[VECTOR_BODY:%.*]]
; VF2: vector.body:
; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; VF2-NEXT: [[TMP0:%.*]] = udiv i64 [[INDEX]], 2
+; VF2-NEXT: [[TMP0:%.*]] = lshr i64 [[INDEX]], 1
; VF2-NEXT: [[TMP1:%.*]] = add i64 [[TMP0]], [[TMP0]]
; VF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]]
; VF2-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP2]], align 8
@@ -139,8 +139,8 @@ define void @ld_div2_step1_start0_ind2(ptr noalias %A, ptr noalias %B) {
; VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; VF4-NEXT: [[VEC_IND1:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ]
-; VF4-NEXT: [[TMP0:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 2)
-; VF4-NEXT: [[TMP1:%.*]] = udiv <4 x i64> [[VEC_IND1]], splat (i64 2)
+; VF4-NEXT: [[TMP0:%.*]] = lshr <4 x i64> [[VEC_IND]], splat (i64 1)
+; VF4-NEXT: [[TMP1:%.*]] = lshr <4 x i64> [[VEC_IND1]], splat (i64 1)
; VF4-NEXT: [[TMP2:%.*]] = add <4 x i64> [[TMP0]], [[TMP1]]
; VF4-NEXT: [[TMP3:%.*]] = extractelement <4 x i64> [[TMP2]], i32 0
; VF4-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP2]], i32 1
@@ -306,8 +306,8 @@ define void @ld_div1_step2_start0_ind2(ptr noalias %A, ptr noalias %B) {
; VF2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
; VF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2
-; VF2-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 1)
-; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND1]], splat (i64 1)
+; VF2-NEXT: [[TMP2:%.*]] = lshr <2 x i64> [[VEC_IND]], zeroinitializer
+; VF2-NEXT: [[TMP3:%.*]] = lshr <2 x i64> [[VEC_IND1]], zeroinitializer
; VF2-NEXT: [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]]
; VF2-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0
; VF2-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP4]], i32 1
@@ -349,8 +349,8 @@ define void @ld_div1_step2_start0_ind2(ptr noalias %A, ptr noalias %B) {
; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2
; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4
; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6
-; VF4-NEXT: [[TMP4:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 1)
-; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND1]], splat (i64 1)
+; VF4-NEXT: [[TMP4:%.*]] = lshr <4 x i64> [[VEC_IND]], zeroinitializer
+; VF4-NEXT: [[TMP5:%.*]] = lshr <4 x i64> [[VEC_IND1]], zeroinitializer
; VF4-NEXT: [[TMP6:%.*]] = add <4 x i64> [[TMP4]], [[TMP5]]
; VF4-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP6]], i32 0
; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP6]], i32 1
@@ -427,8 +427,8 @@ define void @ld_div2_step2_start0_ind2(ptr noalias %A, ptr noalias %B) {
; VF2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
; VF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2
-; VF2-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 2)
-; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND1]], splat (i64 2)
+; VF2-NEXT: [[TMP2:%.*]] = lshr <2 x i64> [[VEC_IND]], splat (i64 1)
+; VF2-NEXT: [[TMP3:%.*]] = lshr <2 x i64> [[VEC_IND1]], splat (i64 1)
; VF2-NEXT: [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]]
; VF2-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0
; VF2-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP4]], i32 1
@@ -470,8 +470,8 @@ define void @ld_div2_step2_start0_ind2(ptr noalias %A, ptr noalias %B) {
; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2
; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4
; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6
-; VF4-NEXT: [[TMP4:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 2)
-; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND1]], splat (i64 2)
+; VF4-NEXT: [[TMP4:%.*]] = lshr <4 x i64> [[VEC_IND]], splat (i64 1)
+; VF4-NEXT: [[TMP5:%.*]] = lshr <4 x i64> [[VEC_IND1]], splat (i64 1)
; VF4-NEXT: [[TMP6:%.*]] = add <4 x i64> [[TMP4]], [[TMP5]]
; VF4-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP6]], i32 0
; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP6]], i32 1
@@ -669,8 +669,8 @@ define void @ld_div1_step3_start0_ind2(ptr noalias %A, ptr noalias %B) {
; VF2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3
; VF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3
-; VF2-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 1)
-; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND1]], splat (i64 1)
+; VF2-NEXT: [[TMP2:%.*]] = lshr <2 x i64> [[VEC_IND]], zeroinitializer
+; VF2-NEXT: [[TMP3:%.*]] = lshr <2 x i64> [[VEC_IND1]], zeroinitializer
; VF2-NEXT: [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]]
; VF2-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0
; VF2-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP4]], i32 1
@@ -711,8 +711,8 @@ define void @ld_div1_step3_start0_ind2(ptr noalias %A, ptr noalias %B) {
; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3
; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 6
; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 9
-; VF4-NEXT: [[TMP4:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 1)
-; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND1]], splat (i64 1)
+; VF4-NEXT: [[TMP4:%.*]] = lshr <4 x i64> [[VEC_IND]], zeroinitializer
+; VF4-NEXT: [[TMP5:%.*]] = lshr <4 x i64> [[VEC_IND1]], zeroinitializer
; VF4-NEXT: [[TMP6:%.*]] = add <4 x i64> [[TMP4]], [[TMP5]]
; VF4-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP6]], i32 0
; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP6]], i32 1
@@ -788,8 +788,8 @@ define void @ld_div2_step3_start0_ind2(ptr noalias %A, ptr noalias %B) {
; VF2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3
; VF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3
-; VF2-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 2)
-; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND1]], splat (i64 2)
+; VF2-NEXT: [[TMP2:%.*]] = lshr <2 x i64> [[VEC_IND]], splat (i64 1)
+; VF2-NEXT: [[TMP3:%.*]] = lshr <2 x i64> [[VEC_IND1]], splat (i64 1)
; VF2-NEXT: [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]]
; VF2-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0
; VF2-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP4]], i32 1
@@ -830,8 +830,8 @@ define void @ld_div2_step3_start0_ind2(ptr noalias %A, ptr noalias %B) {
; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3
; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 6
; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 9
-; VF4-NEXT: [[TMP4:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 2)
-; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND1]], splat (i64 2)
+; VF4-NEXT: [[TMP4:%.*]] = lshr <4 x i64> [[VEC_IND]], splat (i64 1)
+; VF4-NEXT: [[TMP5:%.*]] = lshr <4 x i64> [[VEC_IND1]], splat (i64 1)
; VF4-NEXT: [[TMP6:%.*]] = add <4 x i64> [[TMP4]], [[TMP5]]
; VF4-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP6]], i32 0
; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP6]], i32 1
@@ -1024,8 +1024,8 @@ define void @ld_div1_step1_start1_ind2(ptr noalias %A, ptr noalias %B) {
; VF2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 1, i64 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; VF2-NEXT: [[VEC_IND1:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ]
; VF2-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]]
-; VF2-NEXT: [[TMP0:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 1)
-; VF2-NEXT: [[TMP1:%.*]] = udiv <2 x i64> [[VEC_IND1]], splat (i64 1)
+; VF2-NEXT: [[TMP0:%.*]] = lshr <2 x i64> [[VEC_IND]], zeroinitializer
+; VF2-NEXT: [[TMP1:%.*]] = lshr <2 x i64> [[VEC_IND1]], zeroinitializer
; VF2-NEXT: [[TMP2:%.*]] = add <2 x i64> [[TMP0]], [[TMP1]]
; VF2-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i32 0
; VF2-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP2]], i32 1
@@ -1058,8 +1058,8 @@ define void @ld_div1_step1_start1_ind2(ptr noalias %A, ptr noalias %B) {
; VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 1, i64 2, i64 3, i64 4>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; VF4-NEXT: [[VEC_IND1:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ]
; VF4-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]]
-; VF4-NEXT: [[TMP0:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 1)
-; VF4-NEXT: [[TMP1:%.*]] = udiv <4 x i64> [[VEC_IND1]], splat (i64 1)
+; VF4-NEXT: [[TMP0:%.*]] = lshr <4 x i64> [[VEC_IND]], zeroinitializer
+; VF4-NEXT: [[TMP1:%.*]] = lshr <4 x i64> [[VEC_IND1]], zeroinitializer
; VF4-NEXT: [[TMP2:%.*]] = add <4 x i64> [[TMP0]], [[TMP1]]
; VF4-NEXT: [[TMP3:%.*]] = extractelement <4 x i64> [[TMP2]], i32 0
; VF4-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP2]], i32 1
@@ -1123,8 +1123,8 @@ define void @ld_div2_step1_start1_ind2(ptr noalias %A, ptr noalias %B) {
; VF2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 1, i64 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; VF2-NEXT: [[VEC_IND1:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ]
; VF2-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]]
-; VF2-NEXT: [[TMP0:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 2)
-; VF2-NEXT: [[TMP1:%.*]] = udiv <2 x i64> [[VEC_IND1]], splat (i64 2)
+; VF2-NEXT: [[TMP0:%.*]] = lshr <2 x i64> [[VEC_IND]], splat (i64 1)
+; VF2-NEXT: [[TMP1:%.*]] = lshr <2 x i64> [[VEC_IND1]], splat (i64 1)
; VF2-NEXT: [[TMP2:%.*]] = add <2 x i64> [[TMP0]], [[TMP1]]
; VF2-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i32 0
; VF2-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP2]], i32 1
@@ -1157,8 +1157,8 @@ define void @ld_div2_step1_start1_ind2(ptr noalias %A, ptr noalias %B) {
; VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 1, i64 2, i64 3, i64 4>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; VF4-NEXT: [[VEC_IND1:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ]
; VF4-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]]
-; VF4-NEXT: [[TMP0:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 2)
-; VF4-NEXT: [[TMP1:%.*]] = udiv <4 x i64> [[VEC_IND1]], splat (i64 2)
+; VF4-NEXT: [[TMP0:%.*]] = lshr <4 x i64> [[VEC_IND]], splat (i64 1)
+; VF4-NEXT: [[TMP1:%.*]] = lshr <4 x i64> [[VEC_IND1]], splat (i64 1)
; VF4-NEXT: [[TMP2:%.*]] = add <4 x i64> [[TMP0]], [[TMP1]]
; VF4-NEXT: [[TMP3:%.*]] = extractelement <4 x i64> [[TMP2]], i32 0
; VF4-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP2]], i32 1
@@ -1324,8 +1324,8 @@ define void @ld_div1_step2_start1_ind2(ptr noalias %A, ptr noalias %B) {
; VF2-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]]
; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
; VF2-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2
-; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 1)
-; VF2-NEXT: [[TMP4:%.*]] = udiv <2 x i64> [[VEC_IND1]], splat (i64 1)
+; VF2-NEXT: [[TMP3:%.*]] = lshr <2 x i64> [[VEC_IND]], zeroinitializer
+; VF2-NEXT: [[TMP4:%.*]] = lshr <2 x i64> [[VEC_IND1]], zeroinitializer
; VF2-NEXT: [[TMP5:%.*]] = add <2 x i64> [[TMP3]], [[TMP4]]
; VF2-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP5]], i32 0
; VF2-NEXT: [[TMP7:%.*]] = extractelement <2 x i64> [[TMP5]], i32 1
@@ -1367,8 +1367,8 @@ define void @ld_div1_step2_start1_ind2(ptr noalias %A, ptr noalias %B) {
; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2
; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 4
; VF4-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 6
-; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 1)
-; VF4-NEXT: [[TMP6:%.*]] = udiv <4 x i64> [[VEC_IND1]], splat (i64 1)
+; VF4-NEXT: [[TMP5:%.*]] = lshr <4 x i64> [[VEC_IND]], zeroinitializer
+; VF4-NEXT: [[TMP6:%.*]] = lshr <4 x i64> [[VEC_IND1]], zeroinitializer
; VF4-NEXT: [[TMP7:%.*]] = add <4 x i64> [[TMP5]], [[TMP6]]
; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP7]], i32 0
; VF4-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP7]], i32 1
@@ -1445,8 +1445,8 @@ define void @ld_div2_step2_start1_ind2(ptr noalias %A, ptr noalias %B) {
; VF2-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]]
; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
; VF2-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2
-; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 2)
-; VF2-NEXT: [[TMP4:%.*]] = udiv <2 x i64> [[VEC_IND1]], splat (i64 2)
+; VF2-NEXT: [[TMP3:%.*]] = lshr <2 x i64> [[VEC_IND]], splat (i64 1)
+; VF2-NEXT: [[TMP4:%.*]] = lshr <2 x i64> [[VEC_IND1]], splat (i64 1)
; VF2-NEXT: [[TMP5:%.*]] = add <2 x i64> [[TMP3]], [[TMP4]]
; VF2-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP5]], i32 0
; VF2-NEXT: [[TMP7:%.*]] = extractelement <2 x i64> [[TMP5]], i32 1
@@ -1488,8 +1488,8 @@ define void @ld_div2_step2_start1_ind2(ptr noalias %A, ptr noalias %B) {
; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2
; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 4
; VF4-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 6
-; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 2)
-; VF4-NEXT: [[TMP6:%.*]] = udiv <4 x i64> [[VEC_IND1]], splat (i64 2)
+; VF4-NEXT: [[TMP5:%.*]] = lshr <4 x i64> [[VEC_IND]], splat (i64 1)
+; VF4-NEXT: [[TMP6:%.*]] = lshr <4 x i64> [[VEC_IND1]], splat (i64 1)
; VF4-NEXT: [[TMP7:%.*]] = add <4 x i64> [[TMP5]], [[TMP6]]
; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP7]], i32 0
; VF4-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP7]], i32 1
@@ -1687,8 +1687,8 @@ define void @ld_div1_step3_start1_ind2(ptr noalias %A, ptr noalias %B) {
; VF2-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]]
; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
; VF2-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3
-; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 1)
-; VF2-NEXT: [[TMP4:%.*]] = udiv <2 x i64> [[VEC_IND1]], splat (i64 1)
+; VF2-NEXT: [[TMP3:%.*]] = lshr <2 x i64> [[VEC_IND]], zeroinitializer
+; VF2-NEXT: [[TMP4:%.*]] = lshr <2 x i64> [[VEC_IND1]], zeroinitializer
; VF2-NEXT: [[TMP5:%.*]] = add <2 x i64> [[TMP3]], [[TMP4]]
; VF2-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP5]], i32 0
; VF2-NEXT: [[TMP7:%.*]] = extractelement <2 x i64> [[TMP5]], i32 1
@@ -1730,8 +1730,8 @@ define void @ld_div1_step3_start1_ind2(ptr noalias %A, ptr noalias %B) {
; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3
; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6
; VF4-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 9
-; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 1)
-; VF4-NEXT: [[TMP6:%.*]] = udiv <4 x i64> [[VEC_IND1]], splat (i64 1)
+; VF4-NEXT: [[TMP5:%.*]] = lshr <4 x i64> [[VEC_IND]], zeroinitializer
+; VF4-NEXT: [[TMP6:%.*]] = lshr <4 x i64> [[VEC_IND1]], zeroinitializer
; VF4-NEXT: [[TMP7:%.*]] = add <4 x i64> [[TMP5]], [[TMP6]]
; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP7]], i32 0
; VF4-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP7]], i32 1
@@ -1808,8 +1808,8 @@ define void @ld_div2_step3_start1_ind2(ptr noalias %A, ptr noalias %B) {
; VF2-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]]
; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
; VF2-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3
-; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 2)
-; VF2-NEXT: [[TMP4:%.*]] = udiv <2 x i64> [[VEC_IND1]], splat (i64 2)
+; VF2-NEXT: [[TMP3:%.*]] = lshr <2 x i64> [[VEC_IND]], splat (i64 1)
+; VF2-NEXT: [[TMP4:%.*]] = lshr <2 x i64> [[VEC_IND1]], splat (i64 1)
; VF2-NEXT: [[TMP5:%.*]] = add <2 x i64> [[TMP3]], [[TMP4]]
; VF2-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP5]], i32 0
; VF2-NEXT: [[TMP7:%.*]] = extractelement <2 x i64> [[TMP5]], i32 1
@@ -1851,8 +1851,8 @@ define void @ld_div2_step3_start1_ind2(ptr noalias %A, ptr noalias %B) {
; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3
; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6
; VF4-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 9
-; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 2)
-; VF4-NEXT: [[TMP6:%.*]] = udiv <4 x i64> [[VEC_IND1]], splat (i64 2)
+; VF4-NEXT: [[TMP5:%.*]] = lshr <4 x i64> [[VEC_IND]], splat (i64 1)
+; VF4-NEXT: [[TMP6:%.*]] = lshr <4 x i64> [[VEC_IND1]], splat (i64 1)
; VF4-NEXT: [[TMP7:%.*]] = add <4 x i64> [[TMP5]], [[TMP6]]
; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP7]], i32 0
; VF4-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP7]], i32 1
diff --git a/llvm/test/Transforms/LoopVectorize/vectorize-force-tail-with-evl.ll b/llvm/test/Transforms/LoopVectorize/vectorize-force-tail-with-evl.ll
index 199bd5b31c70d..e30ebb59a8198 100644
--- a/llvm/test/Transforms/LoopVectorize/vectorize-force-tail-with-evl.ll
+++ b/llvm/test/Transforms/LoopVectorize/vectorize-force-tail-with-evl.ll
@@ -42,7 +42,7 @@ define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) {
; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; NO-VP: vector.ph:
; NO-VP-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
-; NO-VP-NEXT: [[TMP14:%.*]] = mul nuw i64 [[TMP1]], 4
+; NO-VP-NEXT: [[TMP14:%.*]] = shl nuw i64 [[TMP1]], 2
; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP14]]
; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll
index 0fde76477b9f9..66bf7e212a14e 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll
@@ -1049,8 +1049,8 @@ define i64 @print_ext_mulacc_not_extended_const(ptr %start, ptr %end) {
; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer vp<%next.gep>
; CHECK-NEXT: WIDEN ir<%l> = load vp<[[VEC_PTR]]>
; CHECK-NEXT: WIDEN-CAST ir<%l.ext> = sext ir<%l> to i32
-; CHECK-NEXT: WIDEN ir<%mul> = mul ir<%l.ext>, ir<128>
-; CHECK-NEXT: EXPRESSION vp<[[RDX_NEXT]]> = ir<[[RDX]]> + reduce.add (ir<%mul> sext to i64)
+; CHECK-NEXT: EMIT vp<[[SHL:%.+]]> = shl ir<%l.ext>, ir<7>
+; CHECK-NEXT: EXPRESSION vp<[[RDX_NEXT]]> = ir<[[RDX]]> + reduce.add (vp<[[SHL]]> sext to i64)
; CHECK-NEXT: EMIT vp<[[IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]>
; CHECK-NEXT: EMIT branch-on-count vp<[[IV_NEXT]]>, vp<[[VTC]]>
; CHECK-NEXT: No successors
@@ -1058,7 +1058,7 @@ define i64 @print_ext_mulacc_not_extended_const(ptr %start, ptr %end) {
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
-; CHECK-NEXT: EMIT vp<%11> = compute-reduction-result ir<[[RDX]]>, vp<[[RDX_NEXT]]>
+; CHECK-NEXT: EMIT vp<[[RES:%.+]]> = compute-reduction-result ir<[[RDX]]>, vp<[[RDX_NEXT]]>
; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq vp<%3>, vp<[[VTC]]>
; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n>
entry:
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll
index 88dead4418628..f49cf5630bca0 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll
@@ -39,20 +39,16 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue
; CHECK: pred.store.if:
-; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>
-; CHECK-NEXT: REPLICATE ir<%gep.b> = getelementptr inbounds ir<@b>, ir<0>, vp<[[STEPS]]>
-; CHECK-NEXT: REPLICATE ir<%lv.b> = load ir<%gep.b>
-; CHECK-NEXT: REPLICATE ir<%add> = add ir<%lv.b>, ir<10>
+; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, vp<[[VF]]>
; CHECK-NEXT: REPLICATE ir<%gep.a> = getelementptr inbounds ir<@a>, ir<0>, vp<[[STEPS]]
-; CHECK-NEXT: REPLICATE ir<%mul> = mul ir<2>, ir<%add>
-; CHECK-NEXT: REPLICATE store ir<%mul>, ir<%gep.a>
+; CHECK-NEXT: REPLICATE store ir<4>, ir<%gep.a>
; CHECK-NEXT: Successor(s): pred.store.continue
; CHECK: pred.store.continue:
; CHECK-NEXT: No successors
; CHECK-NEXT: }
-; CHECK: if.1:
+; CHECK: if.0:
; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]>
; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]>
; CHECK-NEXT: No successors
@@ -126,7 +122,7 @@ exit:
; CHECK-NEXT: }
; CHECK: loop.0:
-; CHECK-NEXT: WIDEN ir<%mul> = mul ir<%iv>, ir<2>
+; CHECK-NEXT: EMIT vp<%9> = shl ir<%iv>, ir<1>
; CHECK-NEXT: Successor(s): pred.store
; CHECK: <xVFxUF> pred.store: {
@@ -135,7 +131,7 @@ exit:
; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue
; CHECK: pred.store.if:
-; CHECK-NEXT: REPLICATE ir<%gep.a> = getelementptr inbounds ir<@a>, ir<0>, ir<%mul>
+; CHECK-NEXT: REPLICATE ir<%gep.a> = getelementptr inbounds ir<@a>, ir<0>, vp<%9>
; CHECK-NEXT: REPLICATE ir<%add> = add vp<[[PRED]]>, ir<10>
; CHECK-NEXT: REPLICATE store ir<%add>, ir<%gep.a>
; CHECK-NEXT: Successor(s): pred.store.continue
diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/sve-interleave-vectorization.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/sve-interleave-vectorization.ll
index cd7ca1e22b499..6e8384420875f 100644
--- a/llvm/test/Transforms/PhaseOrdering/AArch64/sve-interleave-vectorization.ll
+++ b/llvm/test/Transforms/PhaseOrdering/AArch64/sve-interleave-vectorization.ll
@@ -21,7 +21,7 @@ define void @interleave_deinterleave(ptr noalias %dst, ptr %a, ptr %b) {
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY_PREHEADER:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
+; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
More information about the llvm-commits
mailing list