[llvm] [VPlan] Simplify pow-of-2 (mul|udiv) -> (shl|lshr) (PR #172477)

Tue Dec 16 05:01:26 PST 2025

llvmbot wrote:




@llvm/pr-subscribers-backend-systemz

Author: Ramkumar Ramachandra (artagnon)

<details>
<summary>Changes</summary>



---

Patch is 460.87 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/172477.diff


128 Files Affected:

- (modified) llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h (+7-5) 
- (modified) llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp (+15) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll (+2-2) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll (+6-6) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll (+4-4) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/eliminate-tail-predication.ll (+1-1) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/gather-do-not-vectorize-addressing.ll (+1-1) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll (+4-4) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/interleaving-reduction.ll (+3-3) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/invalid-costs.ll (+1-1) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll (+3-3) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll (+24-24) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/masked_ldst_sme.ll (+1-1) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll (+3-3) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_prefer_scalable.ll (+1-1) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-chained.ll (+18-18) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-constant-ops.ll (+3-3) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-epilogue.ll (+6-6) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-mixed.ll (+4-4) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll (+27-27) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-incomplete-chains.ll (+1-1) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-sub.ll (+4-4) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce.ll (+32-32) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll (+11-14) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll (+8-8) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll (+1-1) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/scalable-reduction-inloop-cond.ll (+2-2) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll (+40-40) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/scalable-struct-return.ll (+3-3) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll (+2-2) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/single-early-exit-interleave.ll (+3-3) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll (+7-7) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-inloop-reductions.ll (+2-2) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-reductions.ll (+2-2) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-strict-reductions.ll (+2-2) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll (+24-24) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vscale-fixed.ll (+5-5) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-fneg.ll (+2-2) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions-unusual-types.ll (+4-4) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll (+5-20) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-store.ll (+2-2) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-live-out-pointer-induction.ll (+2-2) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-low-trip-count.ll (+1-1) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-multiexit.ll (+4-4) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-predicated-costs.ll (+2-2) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-runtime-check-size-based-threshold.ll (+2-2) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll (+1-1) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-optsize.ll (+1-1) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-reductions.ll (+6-6) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-unroll.ll (+6-6) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll (+11-14) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-vscale-based-trip-counts.ll (+12-12) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-wide-lane-mask.ll (+12-12) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-extractvalue.ll (+1-1) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll (+2-2) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll (+2-4) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/tail-folding-styles.ll (+5-5) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-scalable.ll (+9-9) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-with-wide-ops-chained.ll (+1-1) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-zext-costs.ll (+6-6) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/widen-gep-all-indices-invariant.ll (+1-1) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/wider-VF-for-callinst.ll (+2-2) 
- (modified) llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll (+3-3) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll (+2-8) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/evl-compatible-loops.ll (+2-3) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/first-order-recurrence-scalable-vf1.ll (+1-1) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/fminimumnum.ll (+8-8) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/induction-costs.ll (+3-6) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll (+6-6) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll (+23-23) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll (+2-8) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/partial-reduce-dot-product.ll (+8-8) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-prune-vf.ll (+12-24) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll (+8-8) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll (+32-37) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-bin-unary-ops-args.ll (+18-18) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-call-intrinsics.ll (+9-9) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cast-intrinsics.ll (+11-11) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cond-reduction.ll (+8-8) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-div.ll (+4-4) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-fixed-order-recurrence.ll (+10-10) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-inloop-reduction.ll (+14-14) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-interleave.ll (+6-6) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-intermediate-store.ll (+2-2) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-iv32.ll (+1-1) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-masked-loadstore.ll (+1-1) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-ordered-reduction.ll (+1-1) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reduction.ll (+14-14) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reverse-load-store.ll (+2-2) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-safe-dep-distance.ll (+6-6) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/transform-narrow-interleave-to-widen-memory.ll (+2-2) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-evl-crash.ll (+1-1) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/vectorize-vp-intrinsics.ll (+1-1) 
- (modified) llvm/test/Transforms/LoopVectorize/SystemZ/scalar-steps-with-users-demanding-all-lanes-and-first-lane-only.ll (+5-10) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/cost-constant-known-via-scev.ll (+2-2) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll (+1-1) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll (+3-3) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/uniform_mem_op.ll (+4-4) 
- (modified) llvm/test/Transforms/LoopVectorize/create-induction-resume.ll (+1-1) 
- (modified) llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll (+8-8) 
- (modified) llvm/test/Transforms/LoopVectorize/hoist-predicated-loads.ll (+3-3) 
- (modified) llvm/test/Transforms/LoopVectorize/if-reduction.ll (+1-1) 
- (modified) llvm/test/Transforms/LoopVectorize/induction-step.ll (+5-5) 
- (modified) llvm/test/Transforms/LoopVectorize/induction.ll (+1-1) 
- (modified) llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll (+4-4) 
- (modified) llvm/test/Transforms/LoopVectorize/narrow-to-single-scalar-widen-gep-scalable.ll (+1-1) 
- (modified) llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll (+4-4) 
- (modified) llvm/test/Transforms/LoopVectorize/outer_loop_scalable.ll (+1-1) 
- (modified) llvm/test/Transforms/LoopVectorize/pointer-induction.ll (+1-1) 
- (modified) llvm/test/Transforms/LoopVectorize/pr30654-phiscev-sext-trunc.ll (+3-3) 
- (modified) llvm/test/Transforms/LoopVectorize/pr58811-scev-expansion.ll (+6-6) 
- (modified) llvm/test/Transforms/LoopVectorize/scalable-assume.ll (+6-6) 
- (modified) llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll (+17-17) 
- (modified) llvm/test/Transforms/LoopVectorize/scalable-inductions.ll (+1-5) 
- (modified) llvm/test/Transforms/LoopVectorize/scalable-iv-outside-user.ll (+2-2) 
- (modified) llvm/test/Transforms/LoopVectorize/scalable-lifetime.ll (+2-2) 
- (modified) llvm/test/Transforms/LoopVectorize/scalable-loop-unpredicated-body-scalar-tail.ll (+3-3) 
- (modified) llvm/test/Transforms/LoopVectorize/scalable-predication.ll (+1-1) 
- (modified) llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll (+2-2) 
- (modified) llvm/test/Transforms/LoopVectorize/scalable-trunc-min-bitwidth.ll (+2-2) 
- (modified) llvm/test/Transforms/LoopVectorize/scev-predicate-reasoning.ll (+1-1) 
- (modified) llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1.ll (+40-40) 
- (modified) llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_div_urem.ll (+49-49) 
- (modified) llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction2.ll (+47-47) 
- (modified) llvm/test/Transforms/LoopVectorize/vectorize-force-tail-with-evl.ll (+1-1) 
- (modified) llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll (+3-3) 
- (modified) llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll (+5-9) 
- (modified) llvm/test/Transforms/PhaseOrdering/AArch64/sve-interleave-vectorization.ll (+1-1) 


``````````diff

diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
index f082b970c7762..bb3c94218ce6b 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
@@ -290,12 +290,8 @@ struct Recipe_match {
     if ((!matchRecipeAndOpcode<RecipeTys>(R) && ...))
       return false;
 
-    if (R->getNumOperands() != std::tuple_size<Ops_t>::value) {
-      assert(Opcode == Instruction::PHI &&
-             "non-variadic recipe with matched opcode does not have the "
-             "expected number of operands");
+    if (R->getNumOperands() != std::tuple_size<Ops_t>::value)
       return false;
-    }
 
     auto IdxSeq = std::make_index_sequence<std::tuple_size<Ops_t>::value>();
     if (all_of_tuple_elements(IdxSeq, [R](auto Op, unsigned Idx) {
@@ -547,6 +543,12 @@ m_c_Mul(const Op0_t &Op0, const Op1_t &Op1) {
   return m_c_Binary<Instruction::Mul, Op0_t, Op1_t>(Op0, Op1);
 }
 
+template <typename Op0_t, typename Op1_t>
+inline AllRecipe_match<Instruction::UDiv, Op0_t, Op1_t>
+m_UDiv(const Op0_t &Op0, const Op1_t &Op1) {
+  return m_Binary<Instruction::UDiv, Op0_t, Op1_t>(Op0, Op1);
+}
+
 /// Match a binary AND operation.
 template <typename Op0_t, typename Op1_t>
 inline AllRecipe_commutative_match<Instruction::And, Op0_t, Op1_t>
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 0bcf131d5ea86..7f7842371af1d 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1343,6 +1343,21 @@ static void simplifyRecipe(VPSingleDefRecipe *Def, VPTypeAnalysis &TypeInfo) {
     return Def->replaceAllUsesWith(
         Def->getOperand(0) == A ? Def->getOperand(1) : Def->getOperand(0));
 
+  const APInt *APC;
+  if (match(Def, m_c_Mul(m_VPValue(), m_APInt(APC))) && APC->isPowerOf2())
+    return Def->replaceAllUsesWith(Builder.createNaryOp(
+        Instruction::Shl,
+        {Def->getOperand(0),
+         Plan->getConstantInt(APC->getBitWidth(), APC->exactLogBase2())},
+        *cast<VPRecipeWithIRFlags>(Def), Def->getDebugLoc()));
+
+  if (match(Def, m_UDiv(m_VPValue(), m_APInt(APC))) && APC->isPowerOf2())
+    return Def->replaceAllUsesWith(Builder.createNaryOp(
+        Instruction::LShr,
+        {Def->getOperand(0),
+         Plan->getConstantInt(APC->getBitWidth(), APC->exactLogBase2())},
+        *cast<VPRecipeWithIRFlags>(Def), Def->getDebugLoc()));
+
   if (match(Def, m_Not(m_VPValue(A)))) {
     if (match(A, m_Not(m_VPValue(A))))
       return Def->replaceAllUsesWith(A);
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll b/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll
index ac8095ae5c3e7..077bde70a537b 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll
@@ -9,7 +9,7 @@ define void @clamped_tc_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range(1,1
 ; CHECK-NEXT:    br label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
 ; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT:    [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8
+; CHECK-NEXT:    [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3
 ; CHECK-NEXT:    [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 8)
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[VAL]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
@@ -71,7 +71,7 @@ define void @clamped_tc_max_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range
 ; CHECK-NEXT:    br label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
 ; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT:    [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8
+; CHECK-NEXT:    [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3
 ; CHECK-NEXT:    [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 [[WIDE_TRIP_COUNT]])
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[VAL]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll
index 4f2933ad2f85c..d7d77cb4325d4 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll
@@ -528,8 +528,8 @@ define void @multiple_exit_conditions(ptr %src, ptr noalias %dst) #1 {
 ; DEFAULT-NEXT:    br i1 [[MIN_ITERS_CHECK1]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
 ; DEFAULT:       [[VECTOR_PH]]:
 ; DEFAULT-NEXT:    [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
-; DEFAULT-NEXT:    [[TMP11:%.*]] = mul nuw i64 [[TMP4]], 4
-; DEFAULT-NEXT:    [[TMP5:%.*]] = mul nuw i64 [[TMP11]], 4
+; DEFAULT-NEXT:    [[TMP11:%.*]] = shl nuw i64 [[TMP4]], 2
+; DEFAULT-NEXT:    [[TMP5:%.*]] = shl nuw i64 [[TMP11]], 2
 ; DEFAULT-NEXT:    [[N_MOD_VF:%.*]] = urem i64 257, [[TMP5]]
 ; DEFAULT-NEXT:    [[N_VEC:%.*]] = sub i64 257, [[N_MOD_VF]]
 ; DEFAULT-NEXT:    [[OFFSET_IDX:%.*]] = mul i64 [[N_VEC]], 8
@@ -545,7 +545,7 @@ define void @multiple_exit_conditions(ptr %src, ptr noalias %dst) #1 {
 ; DEFAULT-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i16> poison, i16 [[TMP8]], i64 0
 ; DEFAULT-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i16> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
 ; DEFAULT-NEXT:    [[TMP9:%.*]] = uitofp <vscale x 4 x i16> [[BROADCAST_SPLAT]] to <vscale x 4 x double>
-; DEFAULT-NEXT:    [[TMP14:%.*]] = mul nuw nsw i64 [[TMP11]], 2
+; DEFAULT-NEXT:    [[TMP14:%.*]] = shl nuw nsw i64 [[TMP11]], 1
 ; DEFAULT-NEXT:    [[TMP17:%.*]] = mul nuw nsw i64 [[TMP11]], 3
 ; DEFAULT-NEXT:    [[TMP12:%.*]] = getelementptr double, ptr [[NEXT_GEP1]], i64 [[TMP11]]
 ; DEFAULT-NEXT:    [[TMP15:%.*]] = getelementptr double, ptr [[NEXT_GEP1]], i64 [[TMP14]]
@@ -568,7 +568,7 @@ define void @multiple_exit_conditions(ptr %src, ptr noalias %dst) #1 {
 ; PRED-NEXT:    br label %[[VECTOR_PH:.*]]
 ; PRED:       [[VECTOR_PH]]:
 ; PRED-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; PRED-NEXT:    [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4
+; PRED-NEXT:    [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2
 ; PRED-NEXT:    [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
 ; PRED-NEXT:    [[TMP7:%.*]] = shl nuw i64 [[TMP6]], 2
 ; PRED-NEXT:    [[TMP8:%.*]] = sub i64 257, [[TMP7]]
@@ -1219,7 +1219,7 @@ define void @pred_udiv_select_cost(ptr %A, ptr %B, ptr %C, i64 %n, i8 %y) #1 {
 ; DEFAULT-NEXT:    br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
 ; DEFAULT:       [[VECTOR_PH]]:
 ; DEFAULT-NEXT:    [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
-; DEFAULT-NEXT:    [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 4
+; DEFAULT-NEXT:    [[TMP9:%.*]] = shl nuw i64 [[TMP8]], 2
 ; DEFAULT-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], [[TMP9]]
 ; DEFAULT-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
 ; DEFAULT-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i8> poison, i8 [[Y]], i64 0
@@ -1273,7 +1273,7 @@ define void @pred_udiv_select_cost(ptr %A, ptr %B, ptr %C, i64 %n, i8 %y) #1 {
 ; PRED-NEXT:    br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
 ; PRED:       [[VECTOR_PH]]:
 ; PRED-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
-; PRED-NEXT:    [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 16
+; PRED-NEXT:    [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 4
 ; PRED-NEXT:    [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
 ; PRED-NEXT:    [[TMP8:%.*]] = shl nuw i64 [[TMP7]], 4
 ; PRED-NEXT:    [[TMP9:%.*]] = sub i64 [[TMP0]], [[TMP8]]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll
index 2b294696ebe89..99bbcad95b6de 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll
@@ -21,8 +21,8 @@ define void @sdiv_feeding_gep(ptr %dst, i32 %x, i64 %M, i64 %conv6, i64 %N) {
 ; CHECK-NEXT:    br i1 [[TMP7]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
 ; CHECK:       [[VECTOR_PH]]:
 ; CHECK-NEXT:    [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT:    [[TMP11:%.*]] = mul nuw i64 [[TMP8]], 2
-; CHECK-NEXT:    [[TMP9:%.*]] = mul nuw i64 [[TMP11]], 2
+; CHECK-NEXT:    [[TMP11:%.*]] = shl nuw i64 [[TMP8]], 1
+; CHECK-NEXT:    [[TMP9:%.*]] = shl nuw i64 [[TMP11]], 1
 ; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP9]]
 ; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
 ; CHECK-NEXT:    [[TMP18:%.*]] = sdiv i64 [[M]], [[CONV6]]
@@ -106,7 +106,7 @@ define void @sdiv_feeding_gep_predicated(ptr %dst, i32 %x, i64 %M, i64 %conv6, i
 ; CHECK-NEXT:    br i1 [[TMP4]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
 ; CHECK:       [[VECTOR_PH]]:
 ; CHECK-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT:    [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 2
+; CHECK-NEXT:    [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 1
 ; CHECK-NEXT:    [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[TMP11:%.*]] = shl nuw i64 [[TMP10]], 1
 ; CHECK-NEXT:    [[TMP12:%.*]] = sub i64 [[N]], [[TMP11]]
@@ -220,7 +220,7 @@ define void @udiv_urem_feeding_gep(i64 %x, ptr %dst, i64 %N) {
 ; CHECK-NEXT:    br i1 [[TMP4]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
 ; CHECK:       [[VECTOR_PH]]:
 ; CHECK-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT:    [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 2
+; CHECK-NEXT:    [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 1
 ; CHECK-NEXT:    [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[TMP11:%.*]] = shl nuw i64 [[TMP10]], 1
 ; CHECK-NEXT:    [[TMP12:%.*]] = sub i64 [[TMP0]], [[TMP11]]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/eliminate-tail-predication.ll b/llvm/test/Transforms/LoopVectorize/AArch64/eliminate-tail-predication.ll
index 14c53cd89c922..74598e2063e48 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/eliminate-tail-predication.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/eliminate-tail-predication.ll
@@ -12,7 +12,7 @@ define void @f1(ptr %A) #0 {
 ; CHECK-NEXT:    br label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
 ; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT:    [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4
+; CHECK-NEXT:    [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2
 ; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]]
 ; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/gather-do-not-vectorize-addressing.ll b/llvm/test/Transforms/LoopVectorize/AArch64/gather-do-not-vectorize-addressing.ll
index 26a9545764091..d18283f831799 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/gather-do-not-vectorize-addressing.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/gather-do-not-vectorize-addressing.ll
@@ -77,7 +77,7 @@ define dso_local double @test(ptr nocapture noundef readonly %data, ptr nocaptur
 ; SVE-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; SVE:       vector.ph:
 ; SVE-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; SVE-NEXT:    [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2
+; SVE-NEXT:    [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 1
 ; SVE-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], [[TMP3]]
 ; SVE-NEXT:    [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]]
 ; SVE-NEXT:    br label [[VECTOR_BODY:%.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll
index 4cacc30a714b6..aa6e4df26d71b 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll
@@ -30,8 +30,8 @@ define void @iv_casts(ptr %dst, ptr %src, i32 %x, i64 %N) #0 {
 ; DEFAULT-NEXT:    br i1 [[MIN_ITERS_CHECK3]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]]
 ; DEFAULT:       [[VECTOR_PH]]:
 ; DEFAULT-NEXT:    [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
-; DEFAULT-NEXT:    [[TMP13:%.*]] = mul nuw i64 [[TMP9]], 8
-; DEFAULT-NEXT:    [[TMP10:%.*]] = mul nuw i64 [[TMP13]], 2
+; DEFAULT-NEXT:    [[TMP13:%.*]] = shl nuw i64 [[TMP9]], 3
+; DEFAULT-NEXT:    [[TMP10:%.*]] = shl nuw i64 [[TMP13]], 1
 ; DEFAULT-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], [[TMP10]]
 ; DEFAULT-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
 ; DEFAULT-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i32> poison, i32 [[X]], i64 0
@@ -70,7 +70,7 @@ define void @iv_casts(ptr %dst, ptr %src, i32 %x, i64 %N) #0 {
 ; DEFAULT:       [[VEC_EPILOG_PH]]:
 ; DEFAULT-NEXT:    [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
 ; DEFAULT-NEXT:    [[TMP33:%.*]] = call i64 @llvm.vscale.i64()
-; DEFAULT-NEXT:    [[TMP34:%.*]] = mul nuw i64 [[TMP33]], 4
+; DEFAULT-NEXT:    [[TMP34:%.*]] = shl nuw i64 [[TMP33]], 2
 ; DEFAULT-NEXT:    [[N_MOD_VF5:%.*]] = urem i64 [[TMP0]], [[TMP34]]
 ; DEFAULT-NEXT:    [[N_VEC6:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF5]]
 ; DEFAULT-NEXT:    [[BROADCAST_SPLATINSERT7:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[X]], i64 0
@@ -130,7 +130,7 @@ define void @iv_casts(ptr %dst, ptr %src, i32 %x, i64 %N) #0 {
 ; PRED-NEXT:    br i1 [[DIFF_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
 ; PRED:       [[VECTOR_PH]]:
 ; PRED-NEXT:    [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
-; PRED-NEXT:    [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 16
+; PRED-NEXT:    [[TMP5:%.*]] = shl nuw i64 [[TMP4]], 4
 ; PRED-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[X]], i64 0
 ; PRED-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 16 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
 ; PRED-NEXT:    [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/interleaving-reduction.ll b/llvm/test/Transforms/LoopVectorize/AArch64/interleaving-reduction.ll
index 040acb494a42e..fff9365baccb4 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/interleaving-reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/interleaving-reduction.ll
@@ -144,8 +144,8 @@ define i32 @interleave_integer_reduction(ptr %src, i64 %N) {
 ; INTERLEAVE-4-VLA-NEXT:    br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; INTERLEAVE-4-VLA:       vector.ph:
 ; INTERLEAVE-4-VLA-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; INTERLEAVE-4-VLA-NEXT:    [[TMP5:%.*]] = mul nuw i64 [[TMP2]], 4
-; INTERLEAVE-4-VLA-NEXT:    [[TMP3:%.*]] = mul nuw i64 [[TMP5]], 4
+; INTERLEAVE-4-VLA-NEXT:    [[TMP5:%.*]] = shl nuw i64 [[TMP2]], 2
+; INTERLEAVE-4-VLA-NEXT:    [[TMP3:%.*]] = shl nuw i64 [[TMP5]], 2
 ; INTERLEAVE-4-VLA-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
 ; INTERLEAVE-4-VLA-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
 ; INTERLEAVE-4-VLA-NEXT:    br label [[VECTOR_BODY:%.*]]
@@ -156,7 +156,7 @@ define i32 @interleave_integer_reduction(ptr %src, i64 %N) {
 ; INTERLEAVE-4-VLA-NEXT:    [[VEC_PHI3:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP16:%.*]], [[VECTOR_BODY]] ]
 ; INTERLEAVE-4-VLA-NEXT:    [[VEC_PHI4:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ]
 ; INTERLEAVE-4-VLA-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i64 [[INDEX]]
-; INTERLEAVE-4-VLA-NEXT:    [[TMP9:%.*]] = mul nuw nsw i64 [[TMP5]], 2
+; INTERLEAVE-4-VLA-NEXT:    [[TMP9:%.*]] = shl nuw nsw i64 [[TMP5]], 1
 ; INTERLEAVE-4-VLA-NEXT:    [[TMP12:%.*]] = mul nuw nsw i64 [[TMP5]], 3
 ; INTERLEAVE-4-VLA-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 [[TMP5]]
 ; INTERLEAVE-4-VLA-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 [[TMP9]]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/invalid-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/invalid-costs.ll
index cc3b1c9c9db8a..de5a24666626c 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/invalid-costs.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/invalid-costs.ll
@@ -16,7 +16,7 @@ define void @replicate_sdiv_conditional(ptr noalias %a, ptr noalias %b, ptr noal
 ; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
 ; CHECK:       [[VECTOR_PH]]:
 ; CHECK-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT:    [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
+; CHECK-NEXT:    [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2
 ; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 64, [[TMP3]]
 ; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 64, [[N_MOD_VF]]
 ; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll b/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll
index cf45f3a88f37e..c340cfc9ad6cc 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll
@@ -67,7 +67,7 @@ define void @low_vf_ic_is_better(ptr nocapture noundef %p, i32 %tc, i16 noundef
 ; CHECK-VS1-NEXT:    br i1 [[MIN_ITERS_CHECK1]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]]
 ; CHECK-VS1:       [[VECTOR_PH]]:
 ; CHECK-VS1-NEXT:    [[TMP16:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-VS1-NEXT:    [[TMP17:%.*]] = mul nuw i64 [[TMP16]], 16
+; CHECK-VS1-NEXT:    [[TMP17:%.*]] = shl nuw i64 [[TMP16]], 4
 ; CHECK-VS1-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], [[TMP17]]
 ; CHECK-VS1-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
 ; CHECK-VS1-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[CONV]], i64 0
@@ -160,7 +160,7 @@ define void @low_vf_ic_is_better(ptr nocapture noundef %p, i32 %tc, i16 noundef
 ; CHECK-VS2-NEXT:    br i1 [[MIN_ITERS_CHECK1]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]]
 ; CHECK-VS2:       [[VECTOR_PH]]:
 ; CHECK-VS2-NEXT:    [[TMP16:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-VS2-NEXT:    [[TMP17:%.*]] = mul nuw i64 [[TMP16]], 8
+; CHECK-VS2-NEXT:    [[TMP17:%.*]] = shl nuw i64 [[TMP16]], 3
 ; CHECK-VS2-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], [[TMP17]]
 ; CHECK-VS2-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
 ; CHECK-VS2-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i8> poison, i8 [[CONV]], i64 0
@@ -393,7 +393,7 @@ define void @overflow_indvar_known_false(ptr nocapture noundef %p, i32 noundef %
 ; CHECK-NEXT:    br i1 [[TMP28]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
 ; CHECK:       [[VECTOR_PH]]:
 ; CHECK-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT:    [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 16
+; CHECK-NEXT:    [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 4
 ; CHECK-NEXT:    [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 0, i64 [[TMP1]])
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[CONV]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[BROADCAST_SPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll
index 0a11e8e4390cb..00c7e6eecfb2c 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll
@@ -15,7 +15,7 @@ define void @test_widen(ptr noalias %a, ptr readnone %b) #4 {
 ; TFNONE-NEXT:    br label %[[VECTOR_PH:.*]]
 ; TFNONE:       [[VECTOR_PH]]:
 ; TFNONE-NEXT:    [[TMP2:%...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/172477