[llvm] [RISCV] Add `2^N + 2^M` expanding pattern for mul (PR #137195)

Iris Shi via llvm-commits llvm-commits at lists.llvm.org
Thu Apr 24 08:32:02 PDT 2025


https://github.com/el-ev created https://github.com/llvm/llvm-project/pull/137195

Closes #137023

>From cba1d49f453eea87f23227de0f839fc05c3edd92 Mon Sep 17 00:00:00 2001
From: Iris Shi <0.0 at owo.li>
Date: Thu, 24 Apr 2025 23:21:45 +0800
Subject: [PATCH 1/2] pre-commit tests

---
 llvm/test/CodeGen/RISCV/mul.ll | 171 ++++++++++++++++++++++++++++++++-
 1 file changed, 167 insertions(+), 4 deletions(-)

diff --git a/llvm/test/CodeGen/RISCV/mul.ll b/llvm/test/CodeGen/RISCV/mul.ll
index 548c7e1c6ea8c..8dd691946ae5a 100644
--- a/llvm/test/CodeGen/RISCV/mul.ll
+++ b/llvm/test/CodeGen/RISCV/mul.ll
@@ -464,6 +464,37 @@ define i32 @mulhu_constant(i32 %a) nounwind {
   ret i32 %4
 }
 
+define i32 @muli32_p10(i32 %a) nounwind {
+; RV32I-LABEL: muli32_p10:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    li a1, 10
+; RV32I-NEXT:    tail __mulsi3
+;
+; RV32IM-LABEL: muli32_p10:
+; RV32IM:       # %bb.0:
+; RV32IM-NEXT:    li a1, 10
+; RV32IM-NEXT:    mul a0, a0, a1
+; RV32IM-NEXT:    ret
+;
+; RV64I-LABEL: muli32_p10:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    li a1, 10
+; RV64I-NEXT:    call __muldi3
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64IM-LABEL: muli32_p10:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    li a1, 10
+; RV64IM-NEXT:    mulw a0, a0, a1
+; RV64IM-NEXT:    ret
+  %1 = mul i32 %a, 10
+  ret i32 %1
+}
+
 define i32 @muli32_p14(i32 %a) nounwind {
 ; RV32I-LABEL: muli32_p14:
 ; RV32I:       # %bb.0:
@@ -494,6 +525,37 @@ define i32 @muli32_p14(i32 %a) nounwind {
   ret i32 %1
 }
 
+define i32 @muli32_p20(i32 %a) nounwind {
+; RV32I-LABEL: muli32_p20:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    li a1, 20
+; RV32I-NEXT:    tail __mulsi3
+;
+; RV32IM-LABEL: muli32_p20:
+; RV32IM:       # %bb.0:
+; RV32IM-NEXT:    li a1, 20
+; RV32IM-NEXT:    mul a0, a0, a1
+; RV32IM-NEXT:    ret
+;
+; RV64I-LABEL: muli32_p20:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    li a1, 20
+; RV64I-NEXT:    call __muldi3
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64IM-LABEL: muli32_p20:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    li a1, 20
+; RV64IM-NEXT:    mulw a0, a0, a1
+; RV64IM-NEXT:    ret
+  %1 = mul i32 %a, 20
+  ret i32 %1
+}
+
 define i32 @muli32_p28(i32 %a) nounwind {
 ; RV32I-LABEL: muli32_p28:
 ; RV32I:       # %bb.0:
@@ -672,6 +734,34 @@ define i32 @muli32_p65(i32 %a) nounwind {
   ret i32 %1
 }
 
+define i32 @muli32_p66(i32 %a) nounwind {
+; RV32I-LABEL: muli32_p66:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    slli a1, a0, 6
+; RV32I-NEXT:    add a0, a1, a0
+; RV32I-NEXT:    ret
+;
+; RV32IM-LABEL: muli32_p66:
+; RV32IM:       # %bb.0:
+; RV32IM-NEXT:    slli a1, a0, 6
+; RV32IM-NEXT:    add a0, a1, a0
+; RV32IM-NEXT:    ret
+;
+; RV64I-LABEL: muli32_p66:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a1, a0, 6
+; RV64I-NEXT:    addw a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64IM-LABEL: muli32_p66:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    slli a1, a0, 6
+; RV64IM-NEXT:    addw a0, a1, a0
+; RV64IM-NEXT:    ret
+  %1 = mul i32 %a, 65
+  ret i32 %1
+}
+
 define i32 @muli32_p63(i32 %a) nounwind {
 ; RV32I-LABEL: muli32_p63:
 ; RV32I:       # %bb.0:
@@ -778,7 +868,80 @@ define i64 @muli64_p63(i64 %a) nounwind {
   ret i64 %1
 }
 
+define i64 @muli64_p60(i64 %a) nounwind {
+; RV32I-LABEL: muli64_p60:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    li a2, 60
+; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    call __muldi3
+; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
+;
+; RV32IM-LABEL: muli64_p60:
+; RV32IM:       # %bb.0:
+; RV32IM-NEXT:    li a2, 60
+; RV32IM-NEXT:    slli a3, a1, 2
+; RV32IM-NEXT:    slli a1, a1, 6
+; RV32IM-NEXT:    sub a1, a1, a3
+; RV32IM-NEXT:    slli a3, a0, 2
+; RV32IM-NEXT:    mulhu a2, a0, a2
+; RV32IM-NEXT:    slli a0, a0, 6
+; RV32IM-NEXT:    add a1, a2, a1
+; RV32IM-NEXT:    sub a0, a0, a3
+; RV32IM-NEXT:    ret
+;
+; RV64I-LABEL: muli64_p60:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a1, 60
+; RV64I-NEXT:    tail __muldi3
+;
+; RV64IM-LABEL: muli64_p60:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    slli a1, a0, 2
+; RV64IM-NEXT:    slli a0, a0, 6
+; RV64IM-NEXT:    sub a0, a0, a1
+; RV64IM-NEXT:    ret
+  %1 = mul i64 %a, 60
+  ret i64 %1
+}
 
+define i64 @muli64_p68(i64 %a) nounwind {
+; RV32I-LABEL: muli64_p68:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    li a2, 68
+; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    call __muldi3
+; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
+;
+; RV32IM-LABEL: muli64_p68:
+; RV32IM:       # %bb.0:
+; RV32IM-NEXT:    li a2, 68
+; RV32IM-NEXT:    mul a1, a1, a2
+; RV32IM-NEXT:    mulhu a3, a0, a2
+; RV32IM-NEXT:    add a1, a3, a1
+; RV32IM-NEXT:    mul a0, a0, a2
+; RV32IM-NEXT:    ret
+;
+; RV64I-LABEL: muli64_p68:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a1, 68
+; RV64I-NEXT:    tail __muldi3
+;
+; RV64IM-LABEL: muli64_p68:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    li a1, 68
+; RV64IM-NEXT:    mul a0, a0, a1
+; RV64IM-NEXT:    ret
+  %1 = mul i64 %a, 68
+  ret i64 %1
+}
 
 define i32 @muli32_m63(i32 %a) nounwind {
 ; RV32I-LABEL: muli32_m63:
@@ -1327,10 +1490,10 @@ define i128 @muli128_m3840(i128 %a) nounwind {
 ; RV32I-NEXT:    sltu a7, a5, a4
 ; RV32I-NEXT:    sub a6, a6, t2
 ; RV32I-NEXT:    mv t1, a7
-; RV32I-NEXT:    beq t0, a3, .LBB36_2
+; RV32I-NEXT:    beq t0, a3, .LBB41_2
 ; RV32I-NEXT:  # %bb.1:
 ; RV32I-NEXT:    sltu t1, t0, a3
-; RV32I-NEXT:  .LBB36_2:
+; RV32I-NEXT:  .LBB41_2:
 ; RV32I-NEXT:    sub a2, a2, a1
 ; RV32I-NEXT:    sub a1, t0, a3
 ; RV32I-NEXT:    sub a5, a5, a4
@@ -1441,10 +1604,10 @@ define i128 @muli128_m63(i128 %a) nounwind {
 ; RV32I-NEXT:    sltu a7, a3, a6
 ; RV32I-NEXT:    or t0, t0, a5
 ; RV32I-NEXT:    mv a5, a7
-; RV32I-NEXT:    beq a4, t0, .LBB37_2
+; RV32I-NEXT:    beq a4, t0, .LBB42_2
 ; RV32I-NEXT:  # %bb.1:
 ; RV32I-NEXT:    sltu a5, a4, t0
-; RV32I-NEXT:  .LBB37_2:
+; RV32I-NEXT:  .LBB42_2:
 ; RV32I-NEXT:    srli t1, a4, 26
 ; RV32I-NEXT:    slli t2, a2, 6
 ; RV32I-NEXT:    srli t3, a2, 26

>From 85e63fb45249e5240239ff931295d0c74284a7e9 Mon Sep 17 00:00:00 2001
From: Iris Shi <0.0 at owo.li>
Date: Thu, 24 Apr 2025 23:23:49 +0800
Subject: [PATCH 2/2] [RISCV] Add `2^N + 2^M` expanding pattern for mul

---
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp   |  60 ++++---
 llvm/test/CodeGen/RISCV/mul.ll                | 156 +++++++++++-------
 llvm/test/CodeGen/RISCV/rv32xtheadba.ll       |  55 +++---
 llvm/test/CodeGen/RISCV/rv32zba.ll            |  55 +++---
 llvm/test/CodeGen/RISCV/rv64xtheadba.ll       |  65 +++++---
 llvm/test/CodeGen/RISCV/rv64zba.ll            | 106 ++++++------
 .../CodeGen/RISCV/rvv/calling-conv-fastcc.ll  |  99 ++++++-----
 .../fixed-vectors-strided-load-store-asm.ll   | 140 +++++++++-------
 .../RISCV/rvv/vreductions-fp-sdnode.ll        |  12 +-
 9 files changed, 432 insertions(+), 316 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 02451ee716865..34d789d1ff5c8 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -15436,6 +15436,31 @@ static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG,
   return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
 }
 
+// 2^N +/- 2^M -> (add/sub (shl X, C1), (shl X, C2))
+static SDValue expandMulToAddOrSubOfShl(SDNode *N, SelectionDAG &DAG) {
+  ConstantSDNode *CNode = dyn_cast<ConstantSDNode>(N->getOperand(1));
+  if (!CNode)
+    return SDValue();
+  uint64_t MulAmt = CNode->getZExtValue();
+  uint64_t MulAmtLowBit = MulAmt & (-MulAmt);
+  ISD::NodeType Op;
+  if (isPowerOf2_64(MulAmt + MulAmtLowBit))
+    Op = ISD::SUB;
+  else if (isPowerOf2_64(MulAmt - MulAmtLowBit))
+    Op = ISD::ADD;
+  else
+    return SDValue();
+  uint64_t ShiftAmt1 = MulAmt + MulAmtLowBit;
+  SDLoc DL(N);
+  SDValue Shift1 =
+      DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(0),
+                  DAG.getConstant(Log2_64(ShiftAmt1), DL, N->getValueType(0)));
+  SDValue Shift2 = DAG.getNode(
+      ISD::SHL, DL, N->getValueType(0), N->getOperand(0),
+      DAG.getConstant(Log2_64(MulAmtLowBit), DL, N->getValueType(0)));
+  return DAG.getNode(Op, DL, N->getValueType(0), Shift1, Shift2);
+}
+
 // Try to expand a scalar multiply to a faster sequence.
 static SDValue expandMul(SDNode *N, SelectionDAG &DAG,
                          TargetLowering::DAGCombinerInfo &DCI,
@@ -15443,18 +15468,24 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG,
 
   EVT VT = N->getValueType(0);
 
+  const bool HasShlAdd =
+      Subtarget.hasStdExtZba() || Subtarget.hasVendorXTHeadBa();
+
   // LI + MUL is usually smaller than the alternative sequence.
   if (DAG.getMachineFunction().getFunction().hasMinSize())
     return SDValue();
 
-  if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
-    return SDValue();
-
   if (VT != Subtarget.getXLenVT())
     return SDValue();
 
-  const bool HasShlAdd =
-      Subtarget.hasStdExtZba() || Subtarget.hasVendorXTHeadBa();
+  // This may prevent some ShlAdd optimizations. Try this combination
+  // later if we have that.
+  if (!HasShlAdd)
+    if (SDValue V = expandMulToAddOrSubOfShl(N, DAG))
+      return V;
+
+  if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
+    return SDValue();
 
   ConstantSDNode *CNode = dyn_cast<ConstantSDNode>(N->getOperand(1));
   if (!CNode)
@@ -15569,22 +15600,7 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG,
         return DAG.getNode(ISD::SUB, DL, VT, Shift1, Mul359);
       }
     }
-  }
 
-  // 2^N - 2^M -> (sub (shl X, C1), (shl X, C2))
-  uint64_t MulAmtLowBit = MulAmt & (-MulAmt);
-  if (isPowerOf2_64(MulAmt + MulAmtLowBit)) {
-    uint64_t ShiftAmt1 = MulAmt + MulAmtLowBit;
-    SDLoc DL(N);
-    SDValue Shift1 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
-                                 DAG.getConstant(Log2_64(ShiftAmt1), DL, VT));
-    SDValue Shift2 =
-        DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
-                    DAG.getConstant(Log2_64(MulAmtLowBit), DL, VT));
-    return DAG.getNode(ISD::SUB, DL, VT, Shift1, Shift2);
-  }
-
-  if (HasShlAdd) {
     for (uint64_t Divisor : {3, 5, 9}) {
       if (MulAmt % Divisor != 0)
         continue;
@@ -15608,6 +15624,10 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG,
         }
       }
     }
+
+    // Delayed
+    if (SDValue V = expandMulToAddOrSubOfShl(N, DAG))
+      return V;
   }
 
   return SDValue();
diff --git a/llvm/test/CodeGen/RISCV/mul.ll b/llvm/test/CodeGen/RISCV/mul.ll
index 8dd691946ae5a..15aa522815605 100644
--- a/llvm/test/CodeGen/RISCV/mul.ll
+++ b/llvm/test/CodeGen/RISCV/mul.ll
@@ -467,29 +467,30 @@ define i32 @mulhu_constant(i32 %a) nounwind {
 define i32 @muli32_p10(i32 %a) nounwind {
 ; RV32I-LABEL: muli32_p10:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    li a1, 10
-; RV32I-NEXT:    tail __mulsi3
+; RV32I-NEXT:    slli a1, a0, 1
+; RV32I-NEXT:    slli a0, a0, 3
+; RV32I-NEXT:    add a0, a0, a1
+; RV32I-NEXT:    ret
 ;
 ; RV32IM-LABEL: muli32_p10:
 ; RV32IM:       # %bb.0:
-; RV32IM-NEXT:    li a1, 10
-; RV32IM-NEXT:    mul a0, a0, a1
+; RV32IM-NEXT:    slli a1, a0, 1
+; RV32IM-NEXT:    slli a0, a0, 3
+; RV32IM-NEXT:    add a0, a0, a1
 ; RV32IM-NEXT:    ret
 ;
 ; RV64I-LABEL: muli32_p10:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    addi sp, sp, -16
-; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    li a1, 10
-; RV64I-NEXT:    call __muldi3
-; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    slli a1, a0, 1
+; RV64I-NEXT:    slli a0, a0, 3
+; RV64I-NEXT:    add a0, a0, a1
 ; RV64I-NEXT:    ret
 ;
 ; RV64IM-LABEL: muli32_p10:
 ; RV64IM:       # %bb.0:
-; RV64IM-NEXT:    li a1, 10
-; RV64IM-NEXT:    mulw a0, a0, a1
+; RV64IM-NEXT:    slli a1, a0, 1
+; RV64IM-NEXT:    slli a0, a0, 3
+; RV64IM-NEXT:    addw a0, a0, a1
 ; RV64IM-NEXT:    ret
   %1 = mul i32 %a, 10
   ret i32 %1
@@ -498,8 +499,10 @@ define i32 @muli32_p10(i32 %a) nounwind {
 define i32 @muli32_p14(i32 %a) nounwind {
 ; RV32I-LABEL: muli32_p14:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    li a1, 14
-; RV32I-NEXT:    tail __mulsi3
+; RV32I-NEXT:    slli a1, a0, 1
+; RV32I-NEXT:    slli a0, a0, 4
+; RV32I-NEXT:    sub a0, a0, a1
+; RV32I-NEXT:    ret
 ;
 ; RV32IM-LABEL: muli32_p14:
 ; RV32IM:       # %bb.0:
@@ -528,29 +531,30 @@ define i32 @muli32_p14(i32 %a) nounwind {
 define i32 @muli32_p20(i32 %a) nounwind {
 ; RV32I-LABEL: muli32_p20:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    li a1, 20
-; RV32I-NEXT:    tail __mulsi3
+; RV32I-NEXT:    slli a1, a0, 2
+; RV32I-NEXT:    slli a0, a0, 4
+; RV32I-NEXT:    add a0, a0, a1
+; RV32I-NEXT:    ret
 ;
 ; RV32IM-LABEL: muli32_p20:
 ; RV32IM:       # %bb.0:
-; RV32IM-NEXT:    li a1, 20
-; RV32IM-NEXT:    mul a0, a0, a1
+; RV32IM-NEXT:    slli a1, a0, 2
+; RV32IM-NEXT:    slli a0, a0, 4
+; RV32IM-NEXT:    add a0, a0, a1
 ; RV32IM-NEXT:    ret
 ;
 ; RV64I-LABEL: muli32_p20:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    addi sp, sp, -16
-; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    li a1, 20
-; RV64I-NEXT:    call __muldi3
-; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    slli a1, a0, 2
+; RV64I-NEXT:    slli a0, a0, 4
+; RV64I-NEXT:    add a0, a0, a1
 ; RV64I-NEXT:    ret
 ;
 ; RV64IM-LABEL: muli32_p20:
 ; RV64IM:       # %bb.0:
-; RV64IM-NEXT:    li a1, 20
-; RV64IM-NEXT:    mulw a0, a0, a1
+; RV64IM-NEXT:    slli a1, a0, 2
+; RV64IM-NEXT:    slli a0, a0, 4
+; RV64IM-NEXT:    addw a0, a0, a1
 ; RV64IM-NEXT:    ret
   %1 = mul i32 %a, 20
   ret i32 %1
@@ -559,8 +563,10 @@ define i32 @muli32_p20(i32 %a) nounwind {
 define i32 @muli32_p28(i32 %a) nounwind {
 ; RV32I-LABEL: muli32_p28:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    li a1, 28
-; RV32I-NEXT:    tail __mulsi3
+; RV32I-NEXT:    slli a1, a0, 2
+; RV32I-NEXT:    slli a0, a0, 5
+; RV32I-NEXT:    sub a0, a0, a1
+; RV32I-NEXT:    ret
 ;
 ; RV32IM-LABEL: muli32_p28:
 ; RV32IM:       # %bb.0:
@@ -589,8 +595,10 @@ define i32 @muli32_p28(i32 %a) nounwind {
 define i32 @muli32_p30(i32 %a) nounwind {
 ; RV32I-LABEL: muli32_p30:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    li a1, 30
-; RV32I-NEXT:    tail __mulsi3
+; RV32I-NEXT:    slli a1, a0, 1
+; RV32I-NEXT:    slli a0, a0, 5
+; RV32I-NEXT:    sub a0, a0, a1
+; RV32I-NEXT:    ret
 ;
 ; RV32IM-LABEL: muli32_p30:
 ; RV32IM:       # %bb.0:
@@ -619,8 +627,10 @@ define i32 @muli32_p30(i32 %a) nounwind {
 define i32 @muli32_p56(i32 %a) nounwind {
 ; RV32I-LABEL: muli32_p56:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    li a1, 56
-; RV32I-NEXT:    tail __mulsi3
+; RV32I-NEXT:    slli a1, a0, 3
+; RV32I-NEXT:    slli a0, a0, 6
+; RV32I-NEXT:    sub a0, a0, a1
+; RV32I-NEXT:    ret
 ;
 ; RV32IM-LABEL: muli32_p56:
 ; RV32IM:       # %bb.0:
@@ -649,8 +659,10 @@ define i32 @muli32_p56(i32 %a) nounwind {
 define i32 @muli32_p60(i32 %a) nounwind {
 ; RV32I-LABEL: muli32_p60:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    li a1, 60
-; RV32I-NEXT:    tail __mulsi3
+; RV32I-NEXT:    slli a1, a0, 2
+; RV32I-NEXT:    slli a0, a0, 6
+; RV32I-NEXT:    sub a0, a0, a1
+; RV32I-NEXT:    ret
 ;
 ; RV32IM-LABEL: muli32_p60:
 ; RV32IM:       # %bb.0:
@@ -679,8 +691,10 @@ define i32 @muli32_p60(i32 %a) nounwind {
 define i32 @muli32_p62(i32 %a) nounwind {
 ; RV32I-LABEL: muli32_p62:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    li a1, 62
-; RV32I-NEXT:    tail __mulsi3
+; RV32I-NEXT:    slli a1, a0, 1
+; RV32I-NEXT:    slli a0, a0, 6
+; RV32I-NEXT:    sub a0, a0, a1
+; RV32I-NEXT:    ret
 ;
 ; RV32IM-LABEL: muli32_p62:
 ; RV32IM:       # %bb.0:
@@ -895,8 +909,10 @@ define i64 @muli64_p60(i64 %a) nounwind {
 ;
 ; RV64I-LABEL: muli64_p60:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    li a1, 60
-; RV64I-NEXT:    tail __muldi3
+; RV64I-NEXT:    slli a1, a0, 2
+; RV64I-NEXT:    slli a0, a0, 6
+; RV64I-NEXT:    sub a0, a0, a1
+; RV64I-NEXT:    ret
 ;
 ; RV64IM-LABEL: muli64_p60:
 ; RV64IM:       # %bb.0:
@@ -923,21 +939,28 @@ define i64 @muli64_p68(i64 %a) nounwind {
 ; RV32IM-LABEL: muli64_p68:
 ; RV32IM:       # %bb.0:
 ; RV32IM-NEXT:    li a2, 68
-; RV32IM-NEXT:    mul a1, a1, a2
-; RV32IM-NEXT:    mulhu a3, a0, a2
-; RV32IM-NEXT:    add a1, a3, a1
-; RV32IM-NEXT:    mul a0, a0, a2
+; RV32IM-NEXT:    slli a3, a1, 2
+; RV32IM-NEXT:    slli a1, a1, 6
+; RV32IM-NEXT:    add a1, a1, a3
+; RV32IM-NEXT:    slli a3, a0, 2
+; RV32IM-NEXT:    mulhu a2, a0, a2
+; RV32IM-NEXT:    slli a0, a0, 6
+; RV32IM-NEXT:    add a1, a2, a1
+; RV32IM-NEXT:    add a0, a0, a3
 ; RV32IM-NEXT:    ret
 ;
 ; RV64I-LABEL: muli64_p68:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    li a1, 68
-; RV64I-NEXT:    tail __muldi3
+; RV64I-NEXT:    slli a1, a0, 2
+; RV64I-NEXT:    slli a0, a0, 6
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    ret
 ;
 ; RV64IM-LABEL: muli64_p68:
 ; RV64IM:       # %bb.0:
-; RV64IM-NEXT:    li a1, 68
-; RV64IM-NEXT:    mul a0, a0, a1
+; RV64IM-NEXT:    slli a1, a0, 2
+; RV64IM-NEXT:    slli a0, a0, 6
+; RV64IM-NEXT:    add a0, a0, a1
 ; RV64IM-NEXT:    ret
   %1 = mul i64 %a, 68
   ret i64 %1
@@ -1093,8 +1116,10 @@ define i64 @muli64_m65(i64 %a) nounwind {
 define i32 @muli32_p384(i32 %a) nounwind {
 ; RV32I-LABEL: muli32_p384:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    li a1, 384
-; RV32I-NEXT:    tail __mulsi3
+; RV32I-NEXT:    slli a1, a0, 7
+; RV32I-NEXT:    slli a0, a0, 9
+; RV32I-NEXT:    sub a0, a0, a1
+; RV32I-NEXT:    ret
 ;
 ; RV32IM-LABEL: muli32_p384:
 ; RV32IM:       # %bb.0:
@@ -1123,8 +1148,10 @@ define i32 @muli32_p384(i32 %a) nounwind {
 define i32 @muli32_p12288(i32 %a) nounwind {
 ; RV32I-LABEL: muli32_p12288:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    lui a1, 3
-; RV32I-NEXT:    tail __mulsi3
+; RV32I-NEXT:    slli a1, a0, 12
+; RV32I-NEXT:    slli a0, a0, 14
+; RV32I-NEXT:    sub a0, a0, a1
+; RV32I-NEXT:    ret
 ;
 ; RV32IM-LABEL: muli32_p12288:
 ; RV32IM:       # %bb.0:
@@ -1300,12 +1327,16 @@ define i64 @muli64_p4352(i64 %a) nounwind {
 ;
 ; RV32IM-LABEL: muli64_p4352:
 ; RV32IM:       # %bb.0:
+; RV32IM-NEXT:    slli a2, a1, 8
+; RV32IM-NEXT:    slli a1, a1, 12
+; RV32IM-NEXT:    add a1, a1, a2
 ; RV32IM-NEXT:    li a2, 17
 ; RV32IM-NEXT:    slli a2, a2, 8
-; RV32IM-NEXT:    mul a1, a1, a2
-; RV32IM-NEXT:    mulhu a3, a0, a2
-; RV32IM-NEXT:    add a1, a3, a1
-; RV32IM-NEXT:    mul a0, a0, a2
+; RV32IM-NEXT:    mulhu a2, a0, a2
+; RV32IM-NEXT:    add a1, a2, a1
+; RV32IM-NEXT:    slli a2, a0, 8
+; RV32IM-NEXT:    slli a0, a0, 12
+; RV32IM-NEXT:    add a0, a0, a2
 ; RV32IM-NEXT:    ret
 ;
 ; RV64I-LABEL: muli64_p4352:
@@ -2032,12 +2063,16 @@ define i64 @muland_demand(i64 %x) nounwind {
 ; RV64I-NEXT:    li a1, -29
 ; RV64I-NEXT:    srli a1, a1, 2
 ; RV64I-NEXT:    and a0, a0, a1
-; RV64I-NEXT:    li a1, 12
-; RV64I-NEXT:    tail __muldi3
+; RV64I-NEXT:    slli a1, a0, 2
+; RV64I-NEXT:    slli a0, a0, 4
+; RV64I-NEXT:    sub a0, a0, a1
+; RV64I-NEXT:    ret
 ;
 ; RV64IM-LABEL: muland_demand:
 ; RV64IM:       # %bb.0:
-; RV64IM-NEXT:    andi a0, a0, -8
+; RV64IM-NEXT:    li a1, -29
+; RV64IM-NEXT:    srli a1, a1, 2
+; RV64IM-NEXT:    and a0, a0, a1
 ; RV64IM-NEXT:    slli a1, a0, 2
 ; RV64IM-NEXT:    slli a0, a0, 4
 ; RV64IM-NEXT:    sub a0, a0, a1
@@ -2068,9 +2103,10 @@ define i64 @mulzext_demand(i32 signext %x) nounwind {
 ;
 ; RV64I-LABEL: mulzext_demand:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    li a1, 3
-; RV64I-NEXT:    slli a1, a1, 32
-; RV64I-NEXT:    tail __muldi3
+; RV64I-NEXT:    slli a1, a0, 32
+; RV64I-NEXT:    slli a0, a0, 34
+; RV64I-NEXT:    sub a0, a0, a1
+; RV64I-NEXT:    ret
 ;
 ; RV64IM-LABEL: mulzext_demand:
 ; RV64IM:       # %bb.0:
diff --git a/llvm/test/CodeGen/RISCV/rv32xtheadba.ll b/llvm/test/CodeGen/RISCV/rv32xtheadba.ll
index 44ab0e1fef6c1..0fc0adbfa83d9 100644
--- a/llvm/test/CodeGen/RISCV/rv32xtheadba.ll
+++ b/llvm/test/CodeGen/RISCV/rv32xtheadba.ll
@@ -116,8 +116,9 @@ define i32 @addmul6(i32 %a, i32 %b) {
 define i32 @addmul10(i32 %a, i32 %b) {
 ; RV32I-LABEL: addmul10:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    li a2, 10
-; RV32I-NEXT:    mul a0, a0, a2
+; RV32I-NEXT:    slli a2, a0, 1
+; RV32I-NEXT:    slli a0, a0, 3
+; RV32I-NEXT:    add a0, a0, a2
 ; RV32I-NEXT:    add a0, a0, a1
 ; RV32I-NEXT:    ret
 ;
@@ -153,8 +154,9 @@ define i32 @addmul12(i32 %a, i32 %b) {
 define i32 @addmul18(i32 %a, i32 %b) {
 ; RV32I-LABEL: addmul18:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    li a2, 18
-; RV32I-NEXT:    mul a0, a0, a2
+; RV32I-NEXT:    slli a2, a0, 1
+; RV32I-NEXT:    slli a0, a0, 4
+; RV32I-NEXT:    add a0, a0, a2
 ; RV32I-NEXT:    add a0, a0, a1
 ; RV32I-NEXT:    ret
 ;
@@ -171,8 +173,9 @@ define i32 @addmul18(i32 %a, i32 %b) {
 define i32 @addmul20(i32 %a, i32 %b) {
 ; RV32I-LABEL: addmul20:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    li a2, 20
-; RV32I-NEXT:    mul a0, a0, a2
+; RV32I-NEXT:    slli a2, a0, 2
+; RV32I-NEXT:    slli a0, a0, 4
+; RV32I-NEXT:    add a0, a0, a2
 ; RV32I-NEXT:    add a0, a0, a1
 ; RV32I-NEXT:    ret
 ;
@@ -208,8 +211,9 @@ define i32 @addmul24(i32 %a, i32 %b) {
 define i32 @addmul36(i32 %a, i32 %b) {
 ; RV32I-LABEL: addmul36:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    li a2, 36
-; RV32I-NEXT:    mul a0, a0, a2
+; RV32I-NEXT:    slli a2, a0, 2
+; RV32I-NEXT:    slli a0, a0, 5
+; RV32I-NEXT:    add a0, a0, a2
 ; RV32I-NEXT:    add a0, a0, a1
 ; RV32I-NEXT:    ret
 ;
@@ -226,8 +230,9 @@ define i32 @addmul36(i32 %a, i32 %b) {
 define i32 @addmul40(i32 %a, i32 %b) {
 ; RV32I-LABEL: addmul40:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    li a2, 40
-; RV32I-NEXT:    mul a0, a0, a2
+; RV32I-NEXT:    slli a2, a0, 3
+; RV32I-NEXT:    slli a0, a0, 5
+; RV32I-NEXT:    add a0, a0, a2
 ; RV32I-NEXT:    add a0, a0, a1
 ; RV32I-NEXT:    ret
 ;
@@ -244,8 +249,9 @@ define i32 @addmul40(i32 %a, i32 %b) {
 define i32 @addmul72(i32 %a, i32 %b) {
 ; RV32I-LABEL: addmul72:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    li a2, 72
-; RV32I-NEXT:    mul a0, a0, a2
+; RV32I-NEXT:    slli a2, a0, 3
+; RV32I-NEXT:    slli a0, a0, 6
+; RV32I-NEXT:    add a0, a0, a2
 ; RV32I-NEXT:    add a0, a0, a1
 ; RV32I-NEXT:    ret
 ;
@@ -279,8 +285,9 @@ define i32 @mul96(i32 %a) {
 define i32 @mul160(i32 %a) {
 ; RV32I-LABEL: mul160:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    li a1, 160
-; RV32I-NEXT:    mul a0, a0, a1
+; RV32I-NEXT:    slli a1, a0, 5
+; RV32I-NEXT:    slli a0, a0, 7
+; RV32I-NEXT:    add a0, a0, a1
 ; RV32I-NEXT:    ret
 ;
 ; RV32XTHEADBA-LABEL: mul160:
@@ -312,8 +319,9 @@ define i32 @mul200(i32 %a) {
 define i32 @mul288(i32 %a) {
 ; RV32I-LABEL: mul288:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    li a1, 288
-; RV32I-NEXT:    mul a0, a0, a1
+; RV32I-NEXT:    slli a1, a0, 5
+; RV32I-NEXT:    slli a0, a0, 8
+; RV32I-NEXT:    add a0, a0, a1
 ; RV32I-NEXT:    ret
 ;
 ; RV32XTHEADBA-LABEL: mul288:
@@ -328,8 +336,9 @@ define i32 @mul288(i32 %a) {
 define i32 @mul258(i32 %a) {
 ; RV32I-LABEL: mul258:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    li a1, 258
-; RV32I-NEXT:    mul a0, a0, a1
+; RV32I-NEXT:    slli a1, a0, 1
+; RV32I-NEXT:    slli a0, a0, 8
+; RV32I-NEXT:    add a0, a0, a1
 ; RV32I-NEXT:    ret
 ;
 ; RV32XTHEADBA-LABEL: mul258:
@@ -344,8 +353,9 @@ define i32 @mul258(i32 %a) {
 define i32 @mul260(i32 %a) {
 ; RV32I-LABEL: mul260:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    li a1, 260
-; RV32I-NEXT:    mul a0, a0, a1
+; RV32I-NEXT:    slli a1, a0, 2
+; RV32I-NEXT:    slli a0, a0, 8
+; RV32I-NEXT:    add a0, a0, a1
 ; RV32I-NEXT:    ret
 ;
 ; RV32XTHEADBA-LABEL: mul260:
@@ -360,8 +370,9 @@ define i32 @mul260(i32 %a) {
 define i32 @mul264(i32 %a) {
 ; RV32I-LABEL: mul264:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    li a1, 264
-; RV32I-NEXT:    mul a0, a0, a1
+; RV32I-NEXT:    slli a1, a0, 3
+; RV32I-NEXT:    slli a0, a0, 8
+; RV32I-NEXT:    add a0, a0, a1
 ; RV32I-NEXT:    ret
 ;
 ; RV32XTHEADBA-LABEL: mul264:
diff --git a/llvm/test/CodeGen/RISCV/rv32zba.ll b/llvm/test/CodeGen/RISCV/rv32zba.ll
index fec156ac2be27..f8ca41782c6e1 100644
--- a/llvm/test/CodeGen/RISCV/rv32zba.ll
+++ b/llvm/test/CodeGen/RISCV/rv32zba.ll
@@ -82,8 +82,9 @@ define i32 @addmul6(i32 %a, i32 %b) {
 define i32 @addmul10(i32 %a, i32 %b) {
 ; RV32I-LABEL: addmul10:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    li a2, 10
-; RV32I-NEXT:    mul a0, a0, a2
+; RV32I-NEXT:    slli a2, a0, 1
+; RV32I-NEXT:    slli a0, a0, 3
+; RV32I-NEXT:    add a0, a0, a2
 ; RV32I-NEXT:    add a0, a0, a1
 ; RV32I-NEXT:    ret
 ;
@@ -119,8 +120,9 @@ define i32 @addmul12(i32 %a, i32 %b) {
 define i32 @addmul18(i32 %a, i32 %b) {
 ; RV32I-LABEL: addmul18:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    li a2, 18
-; RV32I-NEXT:    mul a0, a0, a2
+; RV32I-NEXT:    slli a2, a0, 1
+; RV32I-NEXT:    slli a0, a0, 4
+; RV32I-NEXT:    add a0, a0, a2
 ; RV32I-NEXT:    add a0, a0, a1
 ; RV32I-NEXT:    ret
 ;
@@ -137,8 +139,9 @@ define i32 @addmul18(i32 %a, i32 %b) {
 define i32 @addmul20(i32 %a, i32 %b) {
 ; RV32I-LABEL: addmul20:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    li a2, 20
-; RV32I-NEXT:    mul a0, a0, a2
+; RV32I-NEXT:    slli a2, a0, 2
+; RV32I-NEXT:    slli a0, a0, 4
+; RV32I-NEXT:    add a0, a0, a2
 ; RV32I-NEXT:    add a0, a0, a1
 ; RV32I-NEXT:    ret
 ;
@@ -174,8 +177,9 @@ define i32 @addmul24(i32 %a, i32 %b) {
 define i32 @addmul36(i32 %a, i32 %b) {
 ; RV32I-LABEL: addmul36:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    li a2, 36
-; RV32I-NEXT:    mul a0, a0, a2
+; RV32I-NEXT:    slli a2, a0, 2
+; RV32I-NEXT:    slli a0, a0, 5
+; RV32I-NEXT:    add a0, a0, a2
 ; RV32I-NEXT:    add a0, a0, a1
 ; RV32I-NEXT:    ret
 ;
@@ -192,8 +196,9 @@ define i32 @addmul36(i32 %a, i32 %b) {
 define i32 @addmul40(i32 %a, i32 %b) {
 ; RV32I-LABEL: addmul40:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    li a2, 40
-; RV32I-NEXT:    mul a0, a0, a2
+; RV32I-NEXT:    slli a2, a0, 3
+; RV32I-NEXT:    slli a0, a0, 5
+; RV32I-NEXT:    add a0, a0, a2
 ; RV32I-NEXT:    add a0, a0, a1
 ; RV32I-NEXT:    ret
 ;
@@ -210,8 +215,9 @@ define i32 @addmul40(i32 %a, i32 %b) {
 define i32 @addmul72(i32 %a, i32 %b) {
 ; RV32I-LABEL: addmul72:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    li a2, 72
-; RV32I-NEXT:    mul a0, a0, a2
+; RV32I-NEXT:    slli a2, a0, 3
+; RV32I-NEXT:    slli a0, a0, 6
+; RV32I-NEXT:    add a0, a0, a2
 ; RV32I-NEXT:    add a0, a0, a1
 ; RV32I-NEXT:    ret
 ;
@@ -245,8 +251,9 @@ define i32 @mul96(i32 %a) {
 define i32 @mul160(i32 %a) {
 ; RV32I-LABEL: mul160:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    li a1, 160
-; RV32I-NEXT:    mul a0, a0, a1
+; RV32I-NEXT:    slli a1, a0, 5
+; RV32I-NEXT:    slli a0, a0, 7
+; RV32I-NEXT:    add a0, a0, a1
 ; RV32I-NEXT:    ret
 ;
 ; RV32ZBA-LABEL: mul160:
@@ -261,8 +268,9 @@ define i32 @mul160(i32 %a) {
 define i32 @mul288(i32 %a) {
 ; RV32I-LABEL: mul288:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    li a1, 288
-; RV32I-NEXT:    mul a0, a0, a1
+; RV32I-NEXT:    slli a1, a0, 5
+; RV32I-NEXT:    slli a0, a0, 8
+; RV32I-NEXT:    add a0, a0, a1
 ; RV32I-NEXT:    ret
 ;
 ; RV32ZBA-LABEL: mul288:
@@ -277,8 +285,9 @@ define i32 @mul288(i32 %a) {
 define i32 @mul258(i32 %a) {
 ; RV32I-LABEL: mul258:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    li a1, 258
-; RV32I-NEXT:    mul a0, a0, a1
+; RV32I-NEXT:    slli a1, a0, 1
+; RV32I-NEXT:    slli a0, a0, 8
+; RV32I-NEXT:    add a0, a0, a1
 ; RV32I-NEXT:    ret
 ;
 ; RV32ZBA-LABEL: mul258:
@@ -293,8 +302,9 @@ define i32 @mul258(i32 %a) {
 define i32 @mul260(i32 %a) {
 ; RV32I-LABEL: mul260:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    li a1, 260
-; RV32I-NEXT:    mul a0, a0, a1
+; RV32I-NEXT:    slli a1, a0, 2
+; RV32I-NEXT:    slli a0, a0, 8
+; RV32I-NEXT:    add a0, a0, a1
 ; RV32I-NEXT:    ret
 ;
 ; RV32ZBA-LABEL: mul260:
@@ -309,8 +319,9 @@ define i32 @mul260(i32 %a) {
 define i32 @mul264(i32 %a) {
 ; RV32I-LABEL: mul264:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    li a1, 264
-; RV32I-NEXT:    mul a0, a0, a1
+; RV32I-NEXT:    slli a1, a0, 3
+; RV32I-NEXT:    slli a0, a0, 8
+; RV32I-NEXT:    add a0, a0, a1
 ; RV32I-NEXT:    ret
 ;
 ; RV32ZBA-LABEL: mul264:
diff --git a/llvm/test/CodeGen/RISCV/rv64xtheadba.ll b/llvm/test/CodeGen/RISCV/rv64xtheadba.ll
index 2272c17bcef03..05396e3355ff6 100644
--- a/llvm/test/CodeGen/RISCV/rv64xtheadba.ll
+++ b/llvm/test/CodeGen/RISCV/rv64xtheadba.ll
@@ -131,8 +131,9 @@ define i64 @disjointormul6(i64 %a, i64 %b) {
 define i64 @addmul10(i64 %a, i64 %b) {
 ; RV64I-LABEL: addmul10:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    li a2, 10
-; RV64I-NEXT:    mul a0, a0, a2
+; RV64I-NEXT:    slli a2, a0, 1
+; RV64I-NEXT:    slli a0, a0, 3
+; RV64I-NEXT:    add a0, a0, a2
 ; RV64I-NEXT:    add a0, a0, a1
 ; RV64I-NEXT:    ret
 ;
@@ -168,8 +169,9 @@ define i64 @addmul12(i64 %a, i64 %b) {
 define i64 @addmul18(i64 %a, i64 %b) {
 ; RV64I-LABEL: addmul18:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    li a2, 18
-; RV64I-NEXT:    mul a0, a0, a2
+; RV64I-NEXT:    slli a2, a0, 1
+; RV64I-NEXT:    slli a0, a0, 4
+; RV64I-NEXT:    add a0, a0, a2
 ; RV64I-NEXT:    add a0, a0, a1
 ; RV64I-NEXT:    ret
 ;
@@ -186,8 +188,9 @@ define i64 @addmul18(i64 %a, i64 %b) {
 define i64 @addmul20(i64 %a, i64 %b) {
 ; RV64I-LABEL: addmul20:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    li a2, 20
-; RV64I-NEXT:    mul a0, a0, a2
+; RV64I-NEXT:    slli a2, a0, 2
+; RV64I-NEXT:    slli a0, a0, 4
+; RV64I-NEXT:    add a0, a0, a2
 ; RV64I-NEXT:    add a0, a0, a1
 ; RV64I-NEXT:    ret
 ;
@@ -235,8 +238,9 @@ define i64 @addmul24(i64 %a, i64 %b) {
 define i64 @addmul36(i64 %a, i64 %b) {
 ; RV64I-LABEL: addmul36:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    li a2, 36
-; RV64I-NEXT:    mul a0, a0, a2
+; RV64I-NEXT:    slli a2, a0, 2
+; RV64I-NEXT:    slli a0, a0, 5
+; RV64I-NEXT:    add a0, a0, a2
 ; RV64I-NEXT:    add a0, a0, a1
 ; RV64I-NEXT:    ret
 ;
@@ -253,8 +257,9 @@ define i64 @addmul36(i64 %a, i64 %b) {
 define i64 @addmul40(i64 %a, i64 %b) {
 ; RV64I-LABEL: addmul40:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    li a2, 40
-; RV64I-NEXT:    mul a0, a0, a2
+; RV64I-NEXT:    slli a2, a0, 3
+; RV64I-NEXT:    slli a0, a0, 5
+; RV64I-NEXT:    add a0, a0, a2
 ; RV64I-NEXT:    add a0, a0, a1
 ; RV64I-NEXT:    ret
 ;
@@ -271,8 +276,9 @@ define i64 @addmul40(i64 %a, i64 %b) {
 define i64 @addmul72(i64 %a, i64 %b) {
 ; RV64I-LABEL: addmul72:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    li a2, 72
-; RV64I-NEXT:    mul a0, a0, a2
+; RV64I-NEXT:    slli a2, a0, 3
+; RV64I-NEXT:    slli a0, a0, 6
+; RV64I-NEXT:    add a0, a0, a2
 ; RV64I-NEXT:    add a0, a0, a1
 ; RV64I-NEXT:    ret
 ;
@@ -632,8 +638,9 @@ define i64 @mul137(i64 %a) {
 define i64 @mul160(i64 %a) {
 ; RV64I-LABEL: mul160:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    li a1, 160
-; RV64I-NEXT:    mul a0, a0, a1
+; RV64I-NEXT:    slli a1, a0, 5
+; RV64I-NEXT:    slli a0, a0, 7
+; RV64I-NEXT:    add a0, a0, a1
 ; RV64I-NEXT:    ret
 ;
 ; RV64XTHEADBA-LABEL: mul160:
@@ -648,8 +655,9 @@ define i64 @mul160(i64 %a) {
 define i64 @mul288(i64 %a) {
 ; RV64I-LABEL: mul288:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    li a1, 288
-; RV64I-NEXT:    mul a0, a0, a1
+; RV64I-NEXT:    slli a1, a0, 5
+; RV64I-NEXT:    slli a0, a0, 8
+; RV64I-NEXT:    add a0, a0, a1
 ; RV64I-NEXT:    ret
 ;
 ; RV64XTHEADBA-LABEL: mul288:
@@ -697,8 +705,9 @@ define i64 @sh3add_imm(i64 %0) {
 define i64 @mul258(i64 %a) {
 ; RV64I-LABEL: mul258:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    li a1, 258
-; RV64I-NEXT:    mul a0, a0, a1
+; RV64I-NEXT:    slli a1, a0, 1
+; RV64I-NEXT:    slli a0, a0, 8
+; RV64I-NEXT:    add a0, a0, a1
 ; RV64I-NEXT:    ret
 ;
 ; RV64XTHEADBA-LABEL: mul258:
@@ -713,8 +722,9 @@ define i64 @mul258(i64 %a) {
 define i64 @mul260(i64 %a) {
 ; RV64I-LABEL: mul260:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    li a1, 260
-; RV64I-NEXT:    mul a0, a0, a1
+; RV64I-NEXT:    slli a1, a0, 2
+; RV64I-NEXT:    slli a0, a0, 8
+; RV64I-NEXT:    add a0, a0, a1
 ; RV64I-NEXT:    ret
 ;
 ; RV64XTHEADBA-LABEL: mul260:
@@ -729,8 +739,9 @@ define i64 @mul260(i64 %a) {
 define i64 @mul264(i64 %a) {
 ; RV64I-LABEL: mul264:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    li a1, 264
-; RV64I-NEXT:    mul a0, a0, a1
+; RV64I-NEXT:    slli a1, a0, 3
+; RV64I-NEXT:    slli a0, a0, 8
+; RV64I-NEXT:    add a0, a0, a1
 ; RV64I-NEXT:    ret
 ;
 ; RV64XTHEADBA-LABEL: mul264:
@@ -988,8 +999,9 @@ define signext i32 @mulw192(i32 signext %a) {
 define signext i32 @mulw320(i32 signext %a) {
 ; RV64I-LABEL: mulw320:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    li a1, 320
-; RV64I-NEXT:    mulw a0, a0, a1
+; RV64I-NEXT:    slli a1, a0, 6
+; RV64I-NEXT:    slli a0, a0, 8
+; RV64I-NEXT:    addw a0, a0, a1
 ; RV64I-NEXT:    ret
 ;
 ; RV64XTHEADBA-LABEL: mulw320:
@@ -1004,8 +1016,9 @@ define signext i32 @mulw320(i32 signext %a) {
 define signext i32 @mulw576(i32 signext %a) {
 ; RV64I-LABEL: mulw576:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    li a1, 576
-; RV64I-NEXT:    mulw a0, a0, a1
+; RV64I-NEXT:    slli a1, a0, 6
+; RV64I-NEXT:    slli a0, a0, 9
+; RV64I-NEXT:    addw a0, a0, a1
 ; RV64I-NEXT:    ret
 ;
 ; RV64XTHEADBA-LABEL: mulw576:
diff --git a/llvm/test/CodeGen/RISCV/rv64zba.ll b/llvm/test/CodeGen/RISCV/rv64zba.ll
index 9760821832b37..e362e5ebd8192 100644
--- a/llvm/test/CodeGen/RISCV/rv64zba.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zba.ll
@@ -414,8 +414,9 @@ define i64 @disjointormul6(i64 %a, i64 %b) {
 define i64 @addmul10(i64 %a, i64 %b) {
 ; RV64I-LABEL: addmul10:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    li a2, 10
-; RV64I-NEXT:    mul a0, a0, a2
+; RV64I-NEXT:    slli a2, a0, 1
+; RV64I-NEXT:    slli a0, a0, 3
+; RV64I-NEXT:    add a0, a0, a2
 ; RV64I-NEXT:    add a0, a0, a1
 ; RV64I-NEXT:    ret
 ;
@@ -451,8 +452,9 @@ define i64 @addmul12(i64 %a, i64 %b) {
 define i64 @addmul18(i64 %a, i64 %b) {
 ; RV64I-LABEL: addmul18:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    li a2, 18
-; RV64I-NEXT:    mul a0, a0, a2
+; RV64I-NEXT:    slli a2, a0, 1
+; RV64I-NEXT:    slli a0, a0, 4
+; RV64I-NEXT:    add a0, a0, a2
 ; RV64I-NEXT:    add a0, a0, a1
 ; RV64I-NEXT:    ret
 ;
@@ -469,8 +471,9 @@ define i64 @addmul18(i64 %a, i64 %b) {
 define i64 @addmul20(i64 %a, i64 %b) {
 ; RV64I-LABEL: addmul20:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    li a2, 20
-; RV64I-NEXT:    mul a0, a0, a2
+; RV64I-NEXT:    slli a2, a0, 2
+; RV64I-NEXT:    slli a0, a0, 4
+; RV64I-NEXT:    add a0, a0, a2
 ; RV64I-NEXT:    add a0, a0, a1
 ; RV64I-NEXT:    ret
 ;
@@ -518,8 +521,9 @@ define i64 @addmul24(i64 %a, i64 %b) {
 define i64 @addmul36(i64 %a, i64 %b) {
 ; RV64I-LABEL: addmul36:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    li a2, 36
-; RV64I-NEXT:    mul a0, a0, a2
+; RV64I-NEXT:    slli a2, a0, 2
+; RV64I-NEXT:    slli a0, a0, 5
+; RV64I-NEXT:    add a0, a0, a2
 ; RV64I-NEXT:    add a0, a0, a1
 ; RV64I-NEXT:    ret
 ;
@@ -536,8 +540,9 @@ define i64 @addmul36(i64 %a, i64 %b) {
 define i64 @addmul40(i64 %a, i64 %b) {
 ; RV64I-LABEL: addmul40:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    li a2, 40
-; RV64I-NEXT:    mul a0, a0, a2
+; RV64I-NEXT:    slli a2, a0, 3
+; RV64I-NEXT:    slli a0, a0, 5
+; RV64I-NEXT:    add a0, a0, a2
 ; RV64I-NEXT:    add a0, a0, a1
 ; RV64I-NEXT:    ret
 ;
@@ -554,8 +559,9 @@ define i64 @addmul40(i64 %a, i64 %b) {
 define i64 @addmul72(i64 %a, i64 %b) {
 ; RV64I-LABEL: addmul72:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    li a2, 72
-; RV64I-NEXT:    mul a0, a0, a2
+; RV64I-NEXT:    slli a2, a0, 3
+; RV64I-NEXT:    slli a0, a0, 6
+; RV64I-NEXT:    add a0, a0, a2
 ; RV64I-NEXT:    add a0, a0, a1
 ; RV64I-NEXT:    ret
 ;
@@ -915,8 +921,9 @@ define i64 @mul137(i64 %a) {
 define i64 @mul160(i64 %a) {
 ; RV64I-LABEL: mul160:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    li a1, 160
-; RV64I-NEXT:    mul a0, a0, a1
+; RV64I-NEXT:    slli a1, a0, 5
+; RV64I-NEXT:    slli a0, a0, 7
+; RV64I-NEXT:    add a0, a0, a1
 ; RV64I-NEXT:    ret
 ;
 ; RV64ZBA-LABEL: mul160:
@@ -931,8 +938,9 @@ define i64 @mul160(i64 %a) {
 define i64 @mul288(i64 %a) {
 ; RV64I-LABEL: mul288:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    li a1, 288
-; RV64I-NEXT:    mul a0, a0, a1
+; RV64I-NEXT:    slli a1, a0, 5
+; RV64I-NEXT:    slli a0, a0, 8
+; RV64I-NEXT:    add a0, a0, a1
 ; RV64I-NEXT:    ret
 ;
 ; RV64ZBA-LABEL: mul288:
@@ -947,10 +955,10 @@ define i64 @mul288(i64 %a) {
 define i64 @zext_mul68(i32 signext %a) {
 ; RV64I-LABEL: zext_mul68:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    li a1, 17
-; RV64I-NEXT:    slli a1, a1, 34
 ; RV64I-NEXT:    slli a0, a0, 32
-; RV64I-NEXT:    mulhu a0, a0, a1
+; RV64I-NEXT:    srli a1, a0, 30
+; RV64I-NEXT:    srli a0, a0, 26
+; RV64I-NEXT:    add a0, a0, a1
 ; RV64I-NEXT:    ret
 ;
 ; RV64ZBA-LABEL: zext_mul68:
@@ -985,10 +993,10 @@ define i64 @zext_mul96(i32 signext %a) {
 define i64 @zext_mul160(i32 signext %a) {
 ; RV64I-LABEL: zext_mul160:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    li a1, 5
-; RV64I-NEXT:    slli a1, a1, 37
 ; RV64I-NEXT:    slli a0, a0, 32
-; RV64I-NEXT:    mulhu a0, a0, a1
+; RV64I-NEXT:    srli a1, a0, 27
+; RV64I-NEXT:    srli a0, a0, 25
+; RV64I-NEXT:    add a0, a0, a1
 ; RV64I-NEXT:    ret
 ;
 ; RV64ZBA-LABEL: zext_mul160:
@@ -1004,10 +1012,10 @@ define i64 @zext_mul160(i32 signext %a) {
 define i64 @zext_mul288(i32 signext %a) {
 ; RV64I-LABEL: zext_mul288:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    li a1, 9
-; RV64I-NEXT:    slli a1, a1, 37
 ; RV64I-NEXT:    slli a0, a0, 32
-; RV64I-NEXT:    mulhu a0, a0, a1
+; RV64I-NEXT:    srli a1, a0, 27
+; RV64I-NEXT:    srli a0, a0, 24
+; RV64I-NEXT:    add a0, a0, a1
 ; RV64I-NEXT:    ret
 ;
 ; RV64ZBA-LABEL: zext_mul288:
@@ -1043,9 +1051,9 @@ define i64 @zext_mul12884901888(i32 signext %a) {
 define i64 @zext_mul21474836480(i32 signext %a) {
 ; RV64I-LABEL: zext_mul21474836480:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    li a1, 5
-; RV64I-NEXT:    slli a1, a1, 32
-; RV64I-NEXT:    mul a0, a0, a1
+; RV64I-NEXT:    slli a1, a0, 32
+; RV64I-NEXT:    slli a0, a0, 34
+; RV64I-NEXT:    add a0, a0, a1
 ; RV64I-NEXT:    ret
 ;
 ; RV64ZBA-LABEL: zext_mul21474836480:
@@ -1062,9 +1070,9 @@ define i64 @zext_mul21474836480(i32 signext %a) {
 define i64 @zext_mul38654705664(i32 signext %a) {
 ; RV64I-LABEL: zext_mul38654705664:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    li a1, 9
-; RV64I-NEXT:    slli a1, a1, 32
-; RV64I-NEXT:    mul a0, a0, a1
+; RV64I-NEXT:    slli a1, a0, 32
+; RV64I-NEXT:    slli a0, a0, 35
+; RV64I-NEXT:    add a0, a0, a1
 ; RV64I-NEXT:    ret
 ;
 ; RV64ZBA-LABEL: zext_mul38654705664:
@@ -1188,8 +1196,9 @@ define i64 @adduw_imm(i32 signext %0) nounwind {
 define i64 @mul258(i64 %a) {
 ; RV64I-LABEL: mul258:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    li a1, 258
-; RV64I-NEXT:    mul a0, a0, a1
+; RV64I-NEXT:    slli a1, a0, 1
+; RV64I-NEXT:    slli a0, a0, 8
+; RV64I-NEXT:    add a0, a0, a1
 ; RV64I-NEXT:    ret
 ;
 ; RV64ZBA-LABEL: mul258:
@@ -1204,8 +1213,9 @@ define i64 @mul258(i64 %a) {
 define i64 @mul260(i64 %a) {
 ; RV64I-LABEL: mul260:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    li a1, 260
-; RV64I-NEXT:    mul a0, a0, a1
+; RV64I-NEXT:    slli a1, a0, 2
+; RV64I-NEXT:    slli a0, a0, 8
+; RV64I-NEXT:    add a0, a0, a1
 ; RV64I-NEXT:    ret
 ;
 ; RV64ZBA-LABEL: mul260:
@@ -1220,8 +1230,9 @@ define i64 @mul260(i64 %a) {
 define i64 @mul264(i64 %a) {
 ; RV64I-LABEL: mul264:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    li a1, 264
-; RV64I-NEXT:    mul a0, a0, a1
+; RV64I-NEXT:    slli a1, a0, 3
+; RV64I-NEXT:    slli a0, a0, 8
+; RV64I-NEXT:    add a0, a0, a1
 ; RV64I-NEXT:    ret
 ;
 ; RV64ZBA-LABEL: mul264:
@@ -1496,8 +1507,9 @@ define signext i32 @mulw192(i32 signext %a) {
 define signext i32 @mulw320(i32 signext %a) {
 ; RV64I-LABEL: mulw320:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    li a1, 320
-; RV64I-NEXT:    mulw a0, a0, a1
+; RV64I-NEXT:    slli a1, a0, 6
+; RV64I-NEXT:    slli a0, a0, 8
+; RV64I-NEXT:    addw a0, a0, a1
 ; RV64I-NEXT:    ret
 ;
 ; RV64ZBA-LABEL: mulw320:
@@ -1512,8 +1524,9 @@ define signext i32 @mulw320(i32 signext %a) {
 define signext i32 @mulw576(i32 signext %a) {
 ; RV64I-LABEL: mulw576:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    li a1, 576
-; RV64I-NEXT:    mulw a0, a0, a1
+; RV64I-NEXT:    slli a1, a0, 6
+; RV64I-NEXT:    slli a0, a0, 9
+; RV64I-NEXT:    addw a0, a0, a1
 ; RV64I-NEXT:    ret
 ;
 ; RV64ZBA-LABEL: mulw576:
@@ -2977,8 +2990,9 @@ define i64 @bext_mul132(i32 %1, i32 %2) {
 ; RV64I:       # %bb.0: # %entry
 ; RV64I-NEXT:    srlw a0, a0, a1
 ; RV64I-NEXT:    andi a0, a0, 1
-; RV64I-NEXT:    li a1, 132
-; RV64I-NEXT:    mul a0, a0, a1
+; RV64I-NEXT:    slli a1, a0, 2
+; RV64I-NEXT:    slli a0, a0, 7
+; RV64I-NEXT:    or a0, a0, a1
 ; RV64I-NEXT:    ret
 ;
 ; RV64ZBANOZBB-LABEL: bext_mul132:
@@ -3015,10 +3029,10 @@ define ptr @gep_lshr_i32(ptr %0, i64 %1) {
 ; RV64I-LABEL: gep_lshr_i32:
 ; RV64I:       # %bb.0: # %entry
 ; RV64I-NEXT:    srli a1, a1, 2
-; RV64I-NEXT:    li a2, 5
-; RV64I-NEXT:    slli a2, a2, 36
 ; RV64I-NEXT:    slli a1, a1, 32
-; RV64I-NEXT:    mulhu a1, a1, a2
+; RV64I-NEXT:    srli a2, a1, 28
+; RV64I-NEXT:    srli a1, a1, 26
+; RV64I-NEXT:    add a1, a1, a2
 ; RV64I-NEXT:    add a0, a0, a1
 ; RV64I-NEXT:    ret
 ;
diff --git a/llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll b/llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll
index 530f9bf19fce7..bd912193c4fed 100644
--- a/llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll
@@ -105,87 +105,86 @@ define fastcc <vscale x 128 x i32> @ret_split_nxv128i32(ptr %x) {
 ; CHECK-NEXT:    sub sp, sp, a2
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb
 ; CHECK-NEXT:    csrr a2, vlenb
-; CHECK-NEXT:    li a3, 40
 ; CHECK-NEXT:    vl8re32.v v8, (a1)
-; CHECK-NEXT:    csrr a4, vlenb
-; CHECK-NEXT:    slli a4, a4, 5
-; CHECK-NEXT:    add a4, sp, a4
-; CHECK-NEXT:    addi a4, a4, 16
-; CHECK-NEXT:    vs8r.v v8, (a4) # vscale x 64-byte Folded Spill
-; CHECK-NEXT:    slli a4, a2, 3
-; CHECK-NEXT:    slli a5, a2, 5
-; CHECK-NEXT:    slli a6, a2, 4
-; CHECK-NEXT:    slli a7, a2, 6
-; CHECK-NEXT:    mul a2, a2, a3
-; CHECK-NEXT:    sub a3, a5, a4
-; CHECK-NEXT:    sub t0, a7, a6
-; CHECK-NEXT:    sub a7, a7, a4
-; CHECK-NEXT:    add t1, a1, a4
-; CHECK-NEXT:    add t2, a1, a6
-; CHECK-NEXT:    add t3, a1, a5
+; CHECK-NEXT:    csrr a3, vlenb
+; CHECK-NEXT:    slli a3, a3, 5
+; CHECK-NEXT:    add a3, sp, a3
+; CHECK-NEXT:    addi a3, a3, 16
+; CHECK-NEXT:    vs8r.v v8, (a3) # vscale x 64-byte Folded Spill
+; CHECK-NEXT:    slli a3, a2, 3
+; CHECK-NEXT:    slli a4, a2, 5
+; CHECK-NEXT:    slli a5, a2, 4
+; CHECK-NEXT:    slli a2, a2, 6
+; CHECK-NEXT:    sub a6, a4, a3
+; CHECK-NEXT:    add a7, a4, a3
+; CHECK-NEXT:    sub t0, a2, a5
+; CHECK-NEXT:    sub a2, a2, a3
+; CHECK-NEXT:    add t1, a1, a3
+; CHECK-NEXT:    add t2, a1, a5
+; CHECK-NEXT:    add t3, a1, a4
 ; CHECK-NEXT:    vl8re32.v v8, (t1)
 ; CHECK-NEXT:    csrr t1, vlenb
-; CHECK-NEXT:    li t4, 24
-; CHECK-NEXT:    mul t1, t1, t4
+; CHECK-NEXT:    slli t1, t1, 4
 ; CHECK-NEXT:    add t1, sp, t1
 ; CHECK-NEXT:    addi t1, t1, 16
 ; CHECK-NEXT:    vs8r.v v8, (t1) # vscale x 64-byte Folded Spill
-; CHECK-NEXT:    add t1, a1, a2
+; CHECK-NEXT:    add t1, a1, a6
 ; CHECK-NEXT:    vl8re32.v v8, (t2)
-; CHECK-NEXT:    csrr t2, vlenb
-; CHECK-NEXT:    slli t2, t2, 3
-; CHECK-NEXT:    add t2, sp, t2
-; CHECK-NEXT:    addi t2, t2, 16
+; CHECK-NEXT:    addi t2, sp, 16
 ; CHECK-NEXT:    vs8r.v v8, (t2) # vscale x 64-byte Folded Spill
-; CHECK-NEXT:    add t2, a1, a3
+; CHECK-NEXT:    add t2, a1, a7
 ; CHECK-NEXT:    vl8re32.v v16, (t3)
 ; CHECK-NEXT:    add t3, a1, t0
-; CHECK-NEXT:    add a1, a1, a7
+; CHECK-NEXT:    add a1, a1, a2
 ; CHECK-NEXT:    vl8re32.v v8, (t1)
-; CHECK-NEXT:    vl8re32.v v24, (t2)
 ; CHECK-NEXT:    csrr t1, vlenb
-; CHECK-NEXT:    slli t1, t1, 4
+; CHECK-NEXT:    li t4, 24
+; CHECK-NEXT:    mul t1, t1, t4
 ; CHECK-NEXT:    add t1, sp, t1
 ; CHECK-NEXT:    addi t1, t1, 16
-; CHECK-NEXT:    vs8r.v v24, (t1) # vscale x 64-byte Folded Spill
+; CHECK-NEXT:    vs8r.v v8, (t1) # vscale x 64-byte Folded Spill
+; CHECK-NEXT:    vl8re32.v v8, (t2)
+; CHECK-NEXT:    csrr t1, vlenb
+; CHECK-NEXT:    slli t1, t1, 3
+; CHECK-NEXT:    add t1, sp, t1
+; CHECK-NEXT:    addi t1, t1, 16
+; CHECK-NEXT:    vs8r.v v8, (t1) # vscale x 64-byte Folded Spill
 ; CHECK-NEXT:    vl8re32.v v24, (t3)
-; CHECK-NEXT:    addi t1, sp, 16
-; CHECK-NEXT:    vs8r.v v24, (t1) # vscale x 64-byte Folded Spill
-; CHECK-NEXT:    vl8re32.v v24, (a1)
+; CHECK-NEXT:    vl8re32.v v8, (a1)
 ; CHECK-NEXT:    csrr a1, vlenb
 ; CHECK-NEXT:    slli a1, a1, 5
 ; CHECK-NEXT:    add a1, sp, a1
 ; CHECK-NEXT:    addi a1, a1, 16
 ; CHECK-NEXT:    vl8r.v v0, (a1) # vscale x 64-byte Folded Reload
 ; CHECK-NEXT:    vs8r.v v0, (a0)
-; CHECK-NEXT:    add a2, a0, a2
-; CHECK-NEXT:    vs8r.v v8, (a2)
+; CHECK-NEXT:    add a4, a0, a4
+; CHECK-NEXT:    vs8r.v v16, (a4)
 ; CHECK-NEXT:    add a5, a0, a5
+; CHECK-NEXT:    addi a1, sp, 16
+; CHECK-NEXT:    vl8r.v v16, (a1) # vscale x 64-byte Folded Reload
 ; CHECK-NEXT:    vs8r.v v16, (a5)
-; CHECK-NEXT:    add a6, a0, a6
+; CHECK-NEXT:    add a3, a0, a3
 ; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    slli a1, a1, 3
+; CHECK-NEXT:    slli a1, a1, 4
 ; CHECK-NEXT:    add a1, sp, a1
 ; CHECK-NEXT:    addi a1, a1, 16
-; CHECK-NEXT:    vl8r.v v8, (a1) # vscale x 64-byte Folded Reload
-; CHECK-NEXT:    vs8r.v v8, (a6)
-; CHECK-NEXT:    add a4, a0, a4
+; CHECK-NEXT:    vl8r.v v16, (a1) # vscale x 64-byte Folded Reload
+; CHECK-NEXT:    vs8r.v v16, (a3)
+; CHECK-NEXT:    add a2, a0, a2
+; CHECK-NEXT:    vs8r.v v8, (a2)
+; CHECK-NEXT:    add t0, a0, t0
+; CHECK-NEXT:    vs8r.v v24, (t0)
+; CHECK-NEXT:    add a7, a0, a7
 ; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    li a2, 24
-; CHECK-NEXT:    mul a1, a1, a2
+; CHECK-NEXT:    slli a1, a1, 3
 ; CHECK-NEXT:    add a1, sp, a1
 ; CHECK-NEXT:    addi a1, a1, 16
 ; CHECK-NEXT:    vl8r.v v8, (a1) # vscale x 64-byte Folded Reload
-; CHECK-NEXT:    vs8r.v v8, (a4)
-; CHECK-NEXT:    add a7, a0, a7
-; CHECK-NEXT:    vs8r.v v24, (a7)
-; CHECK-NEXT:    add t0, a0, t0
-; CHECK-NEXT:    addi a1, sp, 16
-; CHECK-NEXT:    vl8r.v v8, (a1) # vscale x 64-byte Folded Reload
-; CHECK-NEXT:    vs8r.v v8, (t0)
-; CHECK-NEXT:    add a0, a0, a3
+; CHECK-NEXT:    vs8r.v v8, (a7)
+; CHECK-NEXT:    add a0, a0, a6
 ; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    slli a1, a1, 4
+; CHECK-NEXT:    li a2, 24
+; CHECK-NEXT:    mul a1, a1, a2
 ; CHECK-NEXT:    add a1, sp, a1
 ; CHECK-NEXT:    addi a1, a1, 16
 ; CHECK-NEXT:    vl8r.v v8, (a1) # vscale x 64-byte Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll
index 29d9a8a9b060c..07aa05f609c40 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll
@@ -653,28 +653,31 @@ define void @gather_of_pointers(ptr noalias nocapture %arg, ptr noalias nocaptur
 ; ZVE32F-LABEL: gather_of_pointers:
 ; ZVE32F:       # %bb.0: # %bb
 ; ZVE32F-NEXT:    li a2, 0
-; ZVE32F-NEXT:    lui a4, 2
-; ZVE32F-NEXT:    li a3, 1
-; ZVE32F-NEXT:    add a4, a0, a4
-; ZVE32F-NEXT:    li a5, 40
+; ZVE32F-NEXT:    lui a3, 2
+; ZVE32F-NEXT:    add a3, a0, a3
+; ZVE32F-NEXT:    li a4, 1
 ; ZVE32F-NEXT:  .LBB12_1: # %bb2
 ; ZVE32F-NEXT:    # =>This Inner Loop Header: Depth=1
-; ZVE32F-NEXT:    mul a6, a3, a5
-; ZVE32F-NEXT:    mul a7, a2, a5
+; ZVE32F-NEXT:    slli a5, a4, 3
+; ZVE32F-NEXT:    slli a6, a4, 5
+; ZVE32F-NEXT:    slli a7, a2, 3
+; ZVE32F-NEXT:    slli t0, a2, 5
 ; ZVE32F-NEXT:    addi a2, a2, 4
-; ZVE32F-NEXT:    add a6, a1, a6
+; ZVE32F-NEXT:    add a5, a6, a5
+; ZVE32F-NEXT:    add a7, t0, a7
+; ZVE32F-NEXT:    add a5, a1, a5
 ; ZVE32F-NEXT:    add a7, a1, a7
-; ZVE32F-NEXT:    ld t0, 0(a7)
-; ZVE32F-NEXT:    ld t1, 0(a6)
+; ZVE32F-NEXT:    ld a6, 0(a7)
+; ZVE32F-NEXT:    ld t0, 0(a5)
 ; ZVE32F-NEXT:    ld a7, 80(a7)
-; ZVE32F-NEXT:    ld a6, 80(a6)
-; ZVE32F-NEXT:    sd t0, 0(a0)
-; ZVE32F-NEXT:    sd t1, 8(a0)
+; ZVE32F-NEXT:    ld a5, 80(a5)
+; ZVE32F-NEXT:    sd a6, 0(a0)
+; ZVE32F-NEXT:    sd t0, 8(a0)
 ; ZVE32F-NEXT:    sd a7, 16(a0)
-; ZVE32F-NEXT:    sd a6, 24(a0)
+; ZVE32F-NEXT:    sd a5, 24(a0)
 ; ZVE32F-NEXT:    addi a0, a0, 32
-; ZVE32F-NEXT:    addi a3, a3, 4
-; ZVE32F-NEXT:    bne a0, a4, .LBB12_1
+; ZVE32F-NEXT:    addi a4, a4, 4
+; ZVE32F-NEXT:    bne a0, a3, .LBB12_1
 ; ZVE32F-NEXT:  # %bb.2: # %bb18
 ; ZVE32F-NEXT:    ret
 ;
@@ -701,28 +704,31 @@ define void @gather_of_pointers(ptr noalias nocapture %arg, ptr noalias nocaptur
 ; OPTV-LABEL: gather_of_pointers:
 ; OPTV:       # %bb.0: # %bb
 ; OPTV-NEXT:    li a2, 0
-; OPTV-NEXT:    lui a4, 2
-; OPTV-NEXT:    li a3, 1
-; OPTV-NEXT:    add a4, a0, a4
-; OPTV-NEXT:    li a5, 40
+; OPTV-NEXT:    lui a3, 2
+; OPTV-NEXT:    add a3, a0, a3
+; OPTV-NEXT:    li a4, 1
 ; OPTV-NEXT:  .LBB12_1: # %bb2
 ; OPTV-NEXT:    # =>This Inner Loop Header: Depth=1
-; OPTV-NEXT:    mul a6, a3, a5
-; OPTV-NEXT:    mul a7, a2, a5
+; OPTV-NEXT:    slli a5, a4, 3
+; OPTV-NEXT:    slli a6, a4, 5
+; OPTV-NEXT:    slli a7, a2, 3
+; OPTV-NEXT:    slli t0, a2, 5
 ; OPTV-NEXT:    addi a2, a2, 4
-; OPTV-NEXT:    add a6, a1, a6
+; OPTV-NEXT:    add a5, a6, a5
+; OPTV-NEXT:    add a7, t0, a7
+; OPTV-NEXT:    add a5, a1, a5
 ; OPTV-NEXT:    add a7, a1, a7
-; OPTV-NEXT:    ld t0, 0(a7)
-; OPTV-NEXT:    ld t1, 0(a6)
+; OPTV-NEXT:    ld a6, 0(a7)
+; OPTV-NEXT:    ld t0, 0(a5)
 ; OPTV-NEXT:    ld a7, 80(a7)
-; OPTV-NEXT:    ld a6, 80(a6)
-; OPTV-NEXT:    sd t0, 0(a0)
-; OPTV-NEXT:    sd t1, 8(a0)
+; OPTV-NEXT:    ld a5, 80(a5)
+; OPTV-NEXT:    sd a6, 0(a0)
+; OPTV-NEXT:    sd t0, 8(a0)
 ; OPTV-NEXT:    sd a7, 16(a0)
-; OPTV-NEXT:    sd a6, 24(a0)
+; OPTV-NEXT:    sd a5, 24(a0)
 ; OPTV-NEXT:    addi a0, a0, 32
-; OPTV-NEXT:    addi a3, a3, 4
-; OPTV-NEXT:    bne a0, a4, .LBB12_1
+; OPTV-NEXT:    addi a4, a4, 4
+; OPTV-NEXT:    bne a0, a3, .LBB12_1
 ; OPTV-NEXT:  # %bb.2: # %bb18
 ; OPTV-NEXT:    ret
 bb:
@@ -778,28 +784,31 @@ define void @scatter_of_pointers(ptr noalias nocapture %arg, ptr noalias nocaptu
 ; ZVE32F-LABEL: scatter_of_pointers:
 ; ZVE32F:       # %bb.0: # %bb
 ; ZVE32F-NEXT:    li a2, 0
-; ZVE32F-NEXT:    lui a4, 2
-; ZVE32F-NEXT:    li a3, 1
-; ZVE32F-NEXT:    add a4, a1, a4
-; ZVE32F-NEXT:    li a5, 40
+; ZVE32F-NEXT:    lui a3, 2
+; ZVE32F-NEXT:    add a3, a1, a3
+; ZVE32F-NEXT:    li a4, 1
 ; ZVE32F-NEXT:  .LBB13_1: # %bb2
 ; ZVE32F-NEXT:    # =>This Inner Loop Header: Depth=1
-; ZVE32F-NEXT:    ld a6, 0(a1)
-; ZVE32F-NEXT:    ld a7, 8(a1)
-; ZVE32F-NEXT:    ld t0, 16(a1)
-; ZVE32F-NEXT:    ld t1, 24(a1)
-; ZVE32F-NEXT:    mul t2, a3, a5
-; ZVE32F-NEXT:    mul t3, a2, a5
+; ZVE32F-NEXT:    ld a5, 0(a1)
+; ZVE32F-NEXT:    ld a6, 8(a1)
+; ZVE32F-NEXT:    ld a7, 16(a1)
+; ZVE32F-NEXT:    ld t0, 24(a1)
+; ZVE32F-NEXT:    slli t1, a4, 3
+; ZVE32F-NEXT:    slli t2, a4, 5
+; ZVE32F-NEXT:    slli t3, a2, 3
+; ZVE32F-NEXT:    add t1, t2, t1
+; ZVE32F-NEXT:    slli t2, a2, 5
 ; ZVE32F-NEXT:    addi a2, a2, 4
 ; ZVE32F-NEXT:    addi a1, a1, 32
+; ZVE32F-NEXT:    add t2, t2, t3
+; ZVE32F-NEXT:    add t1, a0, t1
 ; ZVE32F-NEXT:    add t2, a0, t2
-; ZVE32F-NEXT:    add t3, a0, t3
-; ZVE32F-NEXT:    sd a6, 0(t3)
-; ZVE32F-NEXT:    sd a7, 0(t2)
-; ZVE32F-NEXT:    sd t0, 80(t3)
-; ZVE32F-NEXT:    sd t1, 80(t2)
-; ZVE32F-NEXT:    addi a3, a3, 4
-; ZVE32F-NEXT:    bne a1, a4, .LBB13_1
+; ZVE32F-NEXT:    sd a5, 0(t2)
+; ZVE32F-NEXT:    sd a6, 0(t1)
+; ZVE32F-NEXT:    sd a7, 80(t2)
+; ZVE32F-NEXT:    sd t0, 80(t1)
+; ZVE32F-NEXT:    addi a4, a4, 4
+; ZVE32F-NEXT:    bne a1, a3, .LBB13_1
 ; ZVE32F-NEXT:  # %bb.2: # %bb18
 ; ZVE32F-NEXT:    ret
 ;
@@ -826,28 +835,31 @@ define void @scatter_of_pointers(ptr noalias nocapture %arg, ptr noalias nocaptu
 ; OPTV-LABEL: scatter_of_pointers:
 ; OPTV:       # %bb.0: # %bb
 ; OPTV-NEXT:    li a2, 0
-; OPTV-NEXT:    lui a4, 2
-; OPTV-NEXT:    li a3, 1
-; OPTV-NEXT:    add a4, a1, a4
-; OPTV-NEXT:    li a5, 40
+; OPTV-NEXT:    lui a3, 2
+; OPTV-NEXT:    add a3, a1, a3
+; OPTV-NEXT:    li a4, 1
 ; OPTV-NEXT:  .LBB13_1: # %bb2
 ; OPTV-NEXT:    # =>This Inner Loop Header: Depth=1
-; OPTV-NEXT:    ld a6, 0(a1)
-; OPTV-NEXT:    ld a7, 8(a1)
-; OPTV-NEXT:    ld t0, 16(a1)
-; OPTV-NEXT:    ld t1, 24(a1)
-; OPTV-NEXT:    mul t2, a3, a5
-; OPTV-NEXT:    mul t3, a2, a5
+; OPTV-NEXT:    ld a5, 0(a1)
+; OPTV-NEXT:    ld a6, 8(a1)
+; OPTV-NEXT:    ld a7, 16(a1)
+; OPTV-NEXT:    ld t0, 24(a1)
+; OPTV-NEXT:    slli t1, a4, 3
+; OPTV-NEXT:    slli t2, a4, 5
+; OPTV-NEXT:    slli t3, a2, 3
+; OPTV-NEXT:    add t1, t2, t1
+; OPTV-NEXT:    slli t2, a2, 5
 ; OPTV-NEXT:    addi a2, a2, 4
 ; OPTV-NEXT:    addi a1, a1, 32
+; OPTV-NEXT:    add t2, t2, t3
+; OPTV-NEXT:    add t1, a0, t1
 ; OPTV-NEXT:    add t2, a0, t2
-; OPTV-NEXT:    add t3, a0, t3
-; OPTV-NEXT:    sd a6, 0(t3)
-; OPTV-NEXT:    sd a7, 0(t2)
-; OPTV-NEXT:    sd t0, 80(t3)
-; OPTV-NEXT:    sd t1, 80(t2)
-; OPTV-NEXT:    addi a3, a3, 4
-; OPTV-NEXT:    bne a1, a4, .LBB13_1
+; OPTV-NEXT:    sd a5, 0(t2)
+; OPTV-NEXT:    sd a6, 0(t1)
+; OPTV-NEXT:    sd a7, 80(t2)
+; OPTV-NEXT:    sd t0, 80(t1)
+; OPTV-NEXT:    addi a4, a4, 4
+; OPTV-NEXT:    bne a1, a3, .LBB13_1
 ; OPTV-NEXT:  # %bb.2: # %bb18
 ; OPTV-NEXT:    ret
 bb:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll
index a78130e8f102f..3da04eb7e6abe 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll
@@ -925,9 +925,9 @@ define half @vreduce_ord_fadd_nxv10f16(<vscale x 10 x half> %v, half %s) {
 ; CHECK-LABEL: vreduce_ord_fadd_nxv10f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    li a1, 10
-; CHECK-NEXT:    srli a0, a0, 3
-; CHECK-NEXT:    mul a0, a0, a1
+; CHECK-NEXT:    srli a1, a0, 3
+; CHECK-NEXT:    slli a1, a1, 1
+; CHECK-NEXT:    add a0, a0, a1
 ; CHECK-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
 ; CHECK-NEXT:    vfmv.s.f v12, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
@@ -1007,9 +1007,9 @@ define half @vreduce_fmin_nxv10f16(<vscale x 10 x half> %v) {
 ; CHECK-NEXT:    addi a1, a1, %lo(.LCPI73_0)
 ; CHECK-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
 ; CHECK-NEXT:    vle16.v v12, (a1)
-; CHECK-NEXT:    srli a0, a0, 3
-; CHECK-NEXT:    li a1, 10
-; CHECK-NEXT:    mul a0, a0, a1
+; CHECK-NEXT:    srli a1, a0, 3
+; CHECK-NEXT:    slli a1, a1, 1
+; CHECK-NEXT:    add a0, a0, a1
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; CHECK-NEXT:    vfredmin.vs v12, v8, v12
 ; CHECK-NEXT:    vfmv.f.s fa0, v12



More information about the llvm-commits mailing list