[llvm] [RISCV] Expand constant multiplication for targets without M extension (PR #137195)
Iris Shi via llvm-commits
llvm-commits at lists.llvm.org
Thu May 8 21:20:11 PDT 2025
https://github.com/el-ev updated https://github.com/llvm/llvm-project/pull/137195
>From 6d7a4858f80f723fb980cd8286244f7f751b46db Mon Sep 17 00:00:00 2001
From: Iris Shi <0.0 at owo.li>
Date: Fri, 9 May 2025 12:19:40 +0800
Subject: [PATCH] [RISCV] Expand constant multiplication for targets without M
extension
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 115 +++++-
llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll | 58 ++-
.../CodeGen/RISCV/ctz_zero_return_test.ll | 360 ++++++++++-------
llvm/test/CodeGen/RISCV/mul-expand.ll | 371 +++++++++++++-----
llvm/test/CodeGen/RISCV/mul.ll | 355 ++++++++++++-----
llvm/test/CodeGen/RISCV/rv64xtheadbb.ll | 160 +++++---
llvm/test/CodeGen/RISCV/rv64zbb.ll | 160 +++++---
.../CodeGen/RISCV/rvv/known-never-zero.ll | 33 +-
.../CodeGen/RISCV/srem-seteq-illegal-types.ll | 227 +++++++----
.../CodeGen/RISCV/urem-seteq-illegal-types.ll | 342 +++++++++-------
.../CodeGen/RISCV/xqccmp-additional-stack.ll | 6 +-
.../CodeGen/RISCV/zcmp-additional-stack.ll | 7 +-
12 files changed, 1486 insertions(+), 708 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 8403b51483323..53d07f31bbbf2 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -20,6 +20,7 @@
#include "RISCVSelectionDAGInfo.h"
#include "RISCVSubtarget.h"
#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/ValueTracking.h"
@@ -15456,6 +15457,105 @@ static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG,
return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
}
+static SDValue expandMulToNAFSequence(SDNode *N, SelectionDAG &DAG,
+ const SDLoc &DL, uint64_t MulAmt) {
+ EVT VT = N->getValueType(0);
+ const uint64_t BitWidth = VT.getFixedSizeInBits();
+
+ // Find the Non-adjacent form of the multiplier.
+ llvm::SmallVector<std::pair<bool, uint64_t>> Sequence; // {isAdd, shamt}
+ for (uint64_t E = MulAmt, I = 0; E && I < BitWidth; ++I, E >>= 1) {
+ if (E & 1) {
+ bool IsAdd = (E & 3) == 1;
+ Sequence.push_back({IsAdd, I});
+ E -= IsAdd ? 1 : -1;
+ }
+ }
+
+ SDValue Result = DAG.getConstant(0, DL, N->getValueType(0));
+ SDValue N0 = N->getOperand(0);
+
+ for (const auto &Op : Sequence) {
+ SDValue ShiftVal;
+ if (Op.second > 0)
+ ShiftVal =
+ DAG.getNode(ISD::SHL, DL, VT, N0, DAG.getConstant(Op.second, DL, VT));
+ else
+ ShiftVal = N0;
+
+ ISD::NodeType AddSubOp = Op.first ? ISD::ADD : ISD::SUB;
+ Result = DAG.getNode(AddSubOp, DL, VT, Result, ShiftVal);
+ }
+ return Result;
+}
+// Try to expand a multiply to a sequence of shifts and add/subs,
+// for a machine without native mul instruction.
+static SDValue expandMulToBasicOps(SDNode *N, SelectionDAG &DAG,
+ uint64_t MulAmt) {
+ EVT VT = N->getValueType(0);
+ const uint64_t BitWidth = VT.getFixedSizeInBits();
+ SDLoc DL(N);
+
+ if (MulAmt == 0)
+ return DAG.getConstant(0, DL, N->getValueType(0));
+
+ // Try to factorize into (2^N) * (2^M_1 +/- 1) * (2^M_2 +/- 1) * ...
+ uint64_t TrailingZeros = llvm::countr_zero(MulAmt);
+ uint64_t E = MulAmt >> TrailingZeros;
+
+ llvm::SmallVector<std::pair<bool, uint64_t>> Factors; // {is_2^M+1, M}
+
+ while (E > 1) {
+ bool Found = false;
+ for (int64_t I = BitWidth - 1; I >= 2; --I) {
+ uint64_t Factor = 1ULL << I;
+
+ if (E % (Factor + 1) == 0) {
+ Factors.push_back({true, I});
+ E /= Factor + 1;
+ Found = true;
+ break;
+ }
+ if (E % (Factor - 1) == 0) {
+ Factors.push_back({false, I});
+ E /= Factor - 1;
+ Found = true;
+ break;
+ }
+ }
+ if (!Found)
+ break;
+ }
+
+ SDValue Result;
+ SDValue N0 = N->getOperand(0);
+
+ bool UseFactorization = !Factors.empty() && (Factors.size() < 5);
+
+ if (UseFactorization) {
+ if (E == 1)
+ Result = N0;
+ else
+ Result = expandMulToNAFSequence(N, DAG, DL, E);
+
+ for (const auto &F : Factors) {
+ SDValue ShiftVal = DAG.getNode(ISD::SHL, DL, VT, Result,
+ DAG.getConstant(F.second, DL, VT));
+
+ ISD::NodeType AddSubOp = F.first ? ISD::ADD : ISD::SUB;
+ Result = DAG.getNode(AddSubOp, DL, N->getValueType(0), ShiftVal, Result);
+ }
+
+ if (TrailingZeros > 0)
+ Result = DAG.getNode(ISD::SHL, DL, VT, Result,
+ DAG.getConstant(TrailingZeros, DL, VT));
+
+ return Result;
+ }
+
+ return expandMulToNAFSequence(N, DAG, DL, MulAmt);
+}
+
// Try to expand a scalar multiply to a faster sequence.
static SDValue expandMul(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
@@ -15467,20 +15567,23 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG,
if (DAG.getMachineFunction().getFunction().hasMinSize())
return SDValue();
- if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
- return SDValue();
-
if (VT != Subtarget.getXLenVT())
return SDValue();
- const bool HasShlAdd =
- Subtarget.hasStdExtZba() || Subtarget.hasVendorXTHeadBa();
-
ConstantSDNode *CNode = dyn_cast<ConstantSDNode>(N->getOperand(1));
if (!CNode)
return SDValue();
uint64_t MulAmt = CNode->getZExtValue();
+ if (!Subtarget.hasStdExtM() && !Subtarget.hasStdExtZmmul())
+ return expandMulToBasicOps(N, DAG, MulAmt);
+
+ if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
+ return SDValue();
+
+ const bool HasShlAdd =
+ Subtarget.hasStdExtZba() || Subtarget.hasVendorXTHeadBa();
+
// WARNING: The code below is knowingly incorrect with regards to undef semantics.
// We're adding additional uses of X here, and in principle, we should be freezing
// X before doing so. However, adding freeze here causes real regressions, and no
diff --git a/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll b/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll
index 3a7d31253b05d..8b9d602dcde83 100644
--- a/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll
+++ b/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll
@@ -262,20 +262,33 @@ define i32 @test_cttz_i32(i32 %a) nounwind {
; RV64I-NEXT: sext.w a1, a0
; RV64I-NEXT: beqz a1, .LBB2_2
; RV64I-NEXT: # %bb.1: # %cond.false
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: neg a1, a0
+; RV64I-NEXT: negw a1, a0
; RV64I-NEXT: and a0, a0, a1
-; RV64I-NEXT: lui a1, 30667
-; RV64I-NEXT: addiw a1, a1, 1329
-; RV64I-NEXT: call __muldi3
+; RV64I-NEXT: slli a1, a0, 6
+; RV64I-NEXT: slli a2, a0, 8
+; RV64I-NEXT: slli a3, a0, 10
+; RV64I-NEXT: slli a4, a0, 12
+; RV64I-NEXT: add a1, a1, a2
+; RV64I-NEXT: slli a2, a0, 16
+; RV64I-NEXT: subw a3, a3, a4
+; RV64I-NEXT: slli a4, a0, 18
+; RV64I-NEXT: subw a2, a2, a4
+; RV64I-NEXT: slli a4, a0, 4
+; RV64I-NEXT: subw a4, a0, a4
+; RV64I-NEXT: add a1, a4, a1
+; RV64I-NEXT: slli a4, a0, 14
+; RV64I-NEXT: subw a3, a3, a4
+; RV64I-NEXT: slli a4, a0, 23
+; RV64I-NEXT: subw a2, a2, a4
+; RV64I-NEXT: slli a0, a0, 27
+; RV64I-NEXT: add a1, a1, a3
+; RV64I-NEXT: add a0, a2, a0
+; RV64I-NEXT: add a0, a1, a0
; RV64I-NEXT: srliw a0, a0, 27
; RV64I-NEXT: lui a1, %hi(.LCPI2_0)
; RV64I-NEXT: addi a1, a1, %lo(.LCPI2_0)
; RV64I-NEXT: add a0, a1, a0
; RV64I-NEXT: lbu a0, 0(a0)
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
; RV64I-NEXT: .LBB2_2:
; RV64I-NEXT: li a0, 32
@@ -730,20 +743,33 @@ define i32 @test_cttz_i32_zero_undef(i32 %a) nounwind {
;
; RV64I-LABEL: test_cttz_i32_zero_undef:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: neg a1, a0
+; RV64I-NEXT: negw a1, a0
; RV64I-NEXT: and a0, a0, a1
-; RV64I-NEXT: lui a1, 30667
-; RV64I-NEXT: addiw a1, a1, 1329
-; RV64I-NEXT: call __muldi3
+; RV64I-NEXT: slli a1, a0, 6
+; RV64I-NEXT: slli a2, a0, 8
+; RV64I-NEXT: slli a3, a0, 10
+; RV64I-NEXT: slli a4, a0, 12
+; RV64I-NEXT: add a1, a1, a2
+; RV64I-NEXT: slli a2, a0, 16
+; RV64I-NEXT: subw a3, a3, a4
+; RV64I-NEXT: slli a4, a0, 18
+; RV64I-NEXT: subw a2, a2, a4
+; RV64I-NEXT: slli a4, a0, 4
+; RV64I-NEXT: subw a4, a0, a4
+; RV64I-NEXT: add a1, a4, a1
+; RV64I-NEXT: slli a4, a0, 14
+; RV64I-NEXT: subw a3, a3, a4
+; RV64I-NEXT: slli a4, a0, 23
+; RV64I-NEXT: subw a2, a2, a4
+; RV64I-NEXT: slli a0, a0, 27
+; RV64I-NEXT: add a1, a1, a3
+; RV64I-NEXT: add a0, a2, a0
+; RV64I-NEXT: add a0, a1, a0
; RV64I-NEXT: srliw a0, a0, 27
; RV64I-NEXT: lui a1, %hi(.LCPI6_0)
; RV64I-NEXT: addi a1, a1, %lo(.LCPI6_0)
; RV64I-NEXT: add a0, a1, a0
; RV64I-NEXT: lbu a0, 0(a0)
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
; RV32M-LABEL: test_cttz_i32_zero_undef:
diff --git a/llvm/test/CodeGen/RISCV/ctz_zero_return_test.ll b/llvm/test/CodeGen/RISCV/ctz_zero_return_test.ll
index 03a6a6b1c4b7d..33907e10730a7 100644
--- a/llvm/test/CodeGen/RISCV/ctz_zero_return_test.ll
+++ b/llvm/test/CodeGen/RISCV/ctz_zero_return_test.ll
@@ -162,27 +162,38 @@ define i64 @ctz_dereferencing_pointer_zext(ptr %b) nounwind {
;
; RV64I-LABEL: ctz_dereferencing_pointer_zext:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
-; RV64I-NEXT: lw s0, 0(a0)
-; RV64I-NEXT: neg a0, s0
-; RV64I-NEXT: and a0, s0, a0
-; RV64I-NEXT: lui a1, 30667
-; RV64I-NEXT: addiw a1, a1, 1329
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: srliw a0, a0, 27
-; RV64I-NEXT: lui a1, %hi(.LCPI1_0)
-; RV64I-NEXT: addi a1, a1, %lo(.LCPI1_0)
-; RV64I-NEXT: add a0, a1, a0
-; RV64I-NEXT: lbu a0, 0(a0)
-; RV64I-NEXT: seqz a1, s0
-; RV64I-NEXT: addi a1, a1, -1
-; RV64I-NEXT: and a0, a0, a1
+; RV64I-NEXT: lw a0, 0(a0)
+; RV64I-NEXT: negw a1, a0
+; RV64I-NEXT: and a1, a0, a1
+; RV64I-NEXT: slli a2, a1, 6
+; RV64I-NEXT: slli a3, a1, 8
+; RV64I-NEXT: slli a4, a1, 10
+; RV64I-NEXT: slli a5, a1, 12
+; RV64I-NEXT: add a2, a2, a3
+; RV64I-NEXT: slli a3, a1, 16
+; RV64I-NEXT: subw a4, a4, a5
+; RV64I-NEXT: slli a5, a1, 18
+; RV64I-NEXT: subw a3, a3, a5
+; RV64I-NEXT: slli a5, a1, 4
+; RV64I-NEXT: subw a5, a1, a5
+; RV64I-NEXT: add a2, a5, a2
+; RV64I-NEXT: slli a5, a1, 14
+; RV64I-NEXT: subw a4, a4, a5
+; RV64I-NEXT: slli a5, a1, 23
+; RV64I-NEXT: subw a3, a3, a5
+; RV64I-NEXT: slli a1, a1, 27
+; RV64I-NEXT: add a2, a2, a4
+; RV64I-NEXT: add a1, a3, a1
+; RV64I-NEXT: add a1, a2, a1
+; RV64I-NEXT: srliw a1, a1, 27
+; RV64I-NEXT: lui a2, %hi(.LCPI1_0)
+; RV64I-NEXT: addi a2, a2, %lo(.LCPI1_0)
+; RV64I-NEXT: add a1, a2, a1
+; RV64I-NEXT: lbu a1, 0(a1)
+; RV64I-NEXT: seqz a0, a0
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: and a0, a1, a0
; RV64I-NEXT: andi a0, a0, 31
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
@@ -237,27 +248,37 @@ define signext i32 @ctz1(i32 signext %x) nounwind {
;
; RV64I-LABEL: ctz1:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
-; RV64I-NEXT: mv s0, a0
-; RV64I-NEXT: neg a0, a0
-; RV64I-NEXT: and a0, s0, a0
-; RV64I-NEXT: lui a1, 30667
-; RV64I-NEXT: addiw a1, a1, 1329
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: srliw a0, a0, 27
-; RV64I-NEXT: lui a1, %hi(.LCPI2_0)
-; RV64I-NEXT: addi a1, a1, %lo(.LCPI2_0)
-; RV64I-NEXT: add a0, a1, a0
-; RV64I-NEXT: lbu a0, 0(a0)
-; RV64I-NEXT: seqz a1, s0
-; RV64I-NEXT: addi a1, a1, -1
-; RV64I-NEXT: and a0, a0, a1
+; RV64I-NEXT: negw a1, a0
+; RV64I-NEXT: and a1, a0, a1
+; RV64I-NEXT: slli a2, a1, 6
+; RV64I-NEXT: slli a3, a1, 8
+; RV64I-NEXT: slli a4, a1, 10
+; RV64I-NEXT: slli a5, a1, 12
+; RV64I-NEXT: add a2, a2, a3
+; RV64I-NEXT: slli a3, a1, 16
+; RV64I-NEXT: subw a4, a4, a5
+; RV64I-NEXT: slli a5, a1, 18
+; RV64I-NEXT: subw a3, a3, a5
+; RV64I-NEXT: slli a5, a1, 4
+; RV64I-NEXT: subw a5, a1, a5
+; RV64I-NEXT: add a2, a5, a2
+; RV64I-NEXT: slli a5, a1, 14
+; RV64I-NEXT: subw a4, a4, a5
+; RV64I-NEXT: slli a5, a1, 23
+; RV64I-NEXT: subw a3, a3, a5
+; RV64I-NEXT: slli a1, a1, 27
+; RV64I-NEXT: add a2, a2, a4
+; RV64I-NEXT: add a1, a3, a1
+; RV64I-NEXT: add a1, a2, a1
+; RV64I-NEXT: srliw a1, a1, 27
+; RV64I-NEXT: lui a2, %hi(.LCPI2_0)
+; RV64I-NEXT: addi a2, a2, %lo(.LCPI2_0)
+; RV64I-NEXT: add a1, a2, a1
+; RV64I-NEXT: lbu a1, 0(a1)
+; RV64I-NEXT: seqz a0, a0
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: and a0, a1, a0
; RV64I-NEXT: andi a0, a0, 31
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
@@ -310,27 +331,37 @@ define signext i32 @ctz1_flipped(i32 signext %x) nounwind {
;
; RV64I-LABEL: ctz1_flipped:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
-; RV64I-NEXT: mv s0, a0
-; RV64I-NEXT: neg a0, a0
-; RV64I-NEXT: and a0, s0, a0
-; RV64I-NEXT: lui a1, 30667
-; RV64I-NEXT: addiw a1, a1, 1329
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: srliw a0, a0, 27
-; RV64I-NEXT: lui a1, %hi(.LCPI3_0)
-; RV64I-NEXT: addi a1, a1, %lo(.LCPI3_0)
-; RV64I-NEXT: add a0, a1, a0
-; RV64I-NEXT: lbu a0, 0(a0)
-; RV64I-NEXT: seqz a1, s0
-; RV64I-NEXT: addi a1, a1, -1
-; RV64I-NEXT: and a0, a0, a1
+; RV64I-NEXT: negw a1, a0
+; RV64I-NEXT: and a1, a0, a1
+; RV64I-NEXT: slli a2, a1, 6
+; RV64I-NEXT: slli a3, a1, 8
+; RV64I-NEXT: slli a4, a1, 10
+; RV64I-NEXT: slli a5, a1, 12
+; RV64I-NEXT: add a2, a2, a3
+; RV64I-NEXT: slli a3, a1, 16
+; RV64I-NEXT: subw a4, a4, a5
+; RV64I-NEXT: slli a5, a1, 18
+; RV64I-NEXT: subw a3, a3, a5
+; RV64I-NEXT: slli a5, a1, 4
+; RV64I-NEXT: subw a5, a1, a5
+; RV64I-NEXT: add a2, a5, a2
+; RV64I-NEXT: slli a5, a1, 14
+; RV64I-NEXT: subw a4, a4, a5
+; RV64I-NEXT: slli a5, a1, 23
+; RV64I-NEXT: subw a3, a3, a5
+; RV64I-NEXT: slli a1, a1, 27
+; RV64I-NEXT: add a2, a2, a4
+; RV64I-NEXT: add a1, a3, a1
+; RV64I-NEXT: add a1, a2, a1
+; RV64I-NEXT: srliw a1, a1, 27
+; RV64I-NEXT: lui a2, %hi(.LCPI3_0)
+; RV64I-NEXT: addi a2, a2, %lo(.LCPI3_0)
+; RV64I-NEXT: add a1, a2, a1
+; RV64I-NEXT: lbu a1, 0(a1)
+; RV64I-NEXT: seqz a0, a0
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: and a0, a1, a0
; RV64I-NEXT: andi a0, a0, 31
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
@@ -381,20 +412,33 @@ define signext i32 @ctz2(i32 signext %x) nounwind {
; RV64I: # %bb.0: # %entry
; RV64I-NEXT: beqz a0, .LBB4_2
; RV64I-NEXT: # %bb.1: # %cond.false
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: neg a1, a0
+; RV64I-NEXT: negw a1, a0
; RV64I-NEXT: and a0, a0, a1
-; RV64I-NEXT: lui a1, 30667
-; RV64I-NEXT: addiw a1, a1, 1329
-; RV64I-NEXT: call __muldi3
+; RV64I-NEXT: slli a1, a0, 6
+; RV64I-NEXT: slli a2, a0, 8
+; RV64I-NEXT: slli a3, a0, 10
+; RV64I-NEXT: slli a4, a0, 12
+; RV64I-NEXT: add a1, a1, a2
+; RV64I-NEXT: slli a2, a0, 16
+; RV64I-NEXT: subw a3, a3, a4
+; RV64I-NEXT: slli a4, a0, 18
+; RV64I-NEXT: subw a2, a2, a4
+; RV64I-NEXT: slli a4, a0, 4
+; RV64I-NEXT: subw a4, a0, a4
+; RV64I-NEXT: add a1, a4, a1
+; RV64I-NEXT: slli a4, a0, 14
+; RV64I-NEXT: subw a3, a3, a4
+; RV64I-NEXT: slli a4, a0, 23
+; RV64I-NEXT: subw a2, a2, a4
+; RV64I-NEXT: slli a0, a0, 27
+; RV64I-NEXT: add a1, a1, a3
+; RV64I-NEXT: add a0, a2, a0
+; RV64I-NEXT: add a0, a1, a0
; RV64I-NEXT: srliw a0, a0, 27
; RV64I-NEXT: lui a1, %hi(.LCPI4_0)
; RV64I-NEXT: addi a1, a1, %lo(.LCPI4_0)
; RV64I-NEXT: add a0, a1, a0
; RV64I-NEXT: lbu a0, 0(a0)
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
; RV64I-NEXT: .LBB4_2:
; RV64I-NEXT: li a0, 32
@@ -446,20 +490,33 @@ define signext i32 @ctz3(i32 signext %x) nounwind {
; RV64I: # %bb.0: # %entry
; RV64I-NEXT: beqz a0, .LBB5_2
; RV64I-NEXT: # %bb.1: # %cond.false
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: neg a1, a0
+; RV64I-NEXT: negw a1, a0
; RV64I-NEXT: and a0, a0, a1
-; RV64I-NEXT: lui a1, 30667
-; RV64I-NEXT: addiw a1, a1, 1329
-; RV64I-NEXT: call __muldi3
+; RV64I-NEXT: slli a1, a0, 6
+; RV64I-NEXT: slli a2, a0, 8
+; RV64I-NEXT: slli a3, a0, 10
+; RV64I-NEXT: slli a4, a0, 12
+; RV64I-NEXT: add a1, a1, a2
+; RV64I-NEXT: slli a2, a0, 16
+; RV64I-NEXT: subw a3, a3, a4
+; RV64I-NEXT: slli a4, a0, 18
+; RV64I-NEXT: subw a2, a2, a4
+; RV64I-NEXT: slli a4, a0, 4
+; RV64I-NEXT: subw a4, a0, a4
+; RV64I-NEXT: add a1, a4, a1
+; RV64I-NEXT: slli a4, a0, 14
+; RV64I-NEXT: subw a3, a3, a4
+; RV64I-NEXT: slli a4, a0, 23
+; RV64I-NEXT: subw a2, a2, a4
+; RV64I-NEXT: slli a0, a0, 27
+; RV64I-NEXT: add a1, a1, a3
+; RV64I-NEXT: add a0, a2, a0
+; RV64I-NEXT: add a0, a1, a0
; RV64I-NEXT: srliw a0, a0, 27
; RV64I-NEXT: lui a1, %hi(.LCPI5_0)
; RV64I-NEXT: addi a1, a1, %lo(.LCPI5_0)
; RV64I-NEXT: add a0, a1, a0
; RV64I-NEXT: lbu a0, 0(a0)
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
; RV64I-NEXT: .LBB5_2:
; RV64I-NEXT: li a0, 32
@@ -767,27 +824,37 @@ define signext i32 @ctz5(i32 signext %x) nounwind {
;
; RV64I-LABEL: ctz5:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
-; RV64I-NEXT: mv s0, a0
-; RV64I-NEXT: neg a0, a0
-; RV64I-NEXT: and a0, s0, a0
-; RV64I-NEXT: lui a1, 30667
-; RV64I-NEXT: addiw a1, a1, 1329
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: srliw a0, a0, 27
-; RV64I-NEXT: lui a1, %hi(.LCPI8_0)
-; RV64I-NEXT: addi a1, a1, %lo(.LCPI8_0)
-; RV64I-NEXT: add a0, a1, a0
-; RV64I-NEXT: lbu a0, 0(a0)
-; RV64I-NEXT: seqz a1, s0
-; RV64I-NEXT: addi a1, a1, -1
-; RV64I-NEXT: and a0, a0, a1
+; RV64I-NEXT: negw a1, a0
+; RV64I-NEXT: and a1, a0, a1
+; RV64I-NEXT: slli a2, a1, 6
+; RV64I-NEXT: slli a3, a1, 8
+; RV64I-NEXT: slli a4, a1, 10
+; RV64I-NEXT: slli a5, a1, 12
+; RV64I-NEXT: add a2, a2, a3
+; RV64I-NEXT: slli a3, a1, 16
+; RV64I-NEXT: subw a4, a4, a5
+; RV64I-NEXT: slli a5, a1, 18
+; RV64I-NEXT: subw a3, a3, a5
+; RV64I-NEXT: slli a5, a1, 4
+; RV64I-NEXT: subw a5, a1, a5
+; RV64I-NEXT: add a2, a5, a2
+; RV64I-NEXT: slli a5, a1, 14
+; RV64I-NEXT: subw a4, a4, a5
+; RV64I-NEXT: slli a5, a1, 23
+; RV64I-NEXT: subw a3, a3, a5
+; RV64I-NEXT: slli a1, a1, 27
+; RV64I-NEXT: add a2, a2, a4
+; RV64I-NEXT: add a1, a3, a1
+; RV64I-NEXT: add a1, a2, a1
+; RV64I-NEXT: srliw a1, a1, 27
+; RV64I-NEXT: lui a2, %hi(.LCPI8_0)
+; RV64I-NEXT: addi a2, a2, %lo(.LCPI8_0)
+; RV64I-NEXT: add a1, a2, a1
+; RV64I-NEXT: lbu a1, 0(a1)
+; RV64I-NEXT: seqz a0, a0
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: and a0, a1, a0
; RV64I-NEXT: andi a0, a0, 31
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
@@ -840,27 +907,37 @@ define signext i32 @ctz6(i32 signext %x) nounwind {
;
; RV64I-LABEL: ctz6:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
-; RV64I-NEXT: mv s0, a0
-; RV64I-NEXT: neg a0, a0
-; RV64I-NEXT: and a0, s0, a0
-; RV64I-NEXT: lui a1, 30667
-; RV64I-NEXT: addiw a1, a1, 1329
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: srliw a0, a0, 27
-; RV64I-NEXT: lui a1, %hi(.LCPI9_0)
-; RV64I-NEXT: addi a1, a1, %lo(.LCPI9_0)
-; RV64I-NEXT: add a0, a1, a0
-; RV64I-NEXT: lbu a0, 0(a0)
-; RV64I-NEXT: seqz a1, s0
-; RV64I-NEXT: addi a1, a1, -1
-; RV64I-NEXT: and a0, a0, a1
+; RV64I-NEXT: negw a1, a0
+; RV64I-NEXT: and a1, a0, a1
+; RV64I-NEXT: slli a2, a1, 6
+; RV64I-NEXT: slli a3, a1, 8
+; RV64I-NEXT: slli a4, a1, 10
+; RV64I-NEXT: slli a5, a1, 12
+; RV64I-NEXT: add a2, a2, a3
+; RV64I-NEXT: slli a3, a1, 16
+; RV64I-NEXT: subw a4, a4, a5
+; RV64I-NEXT: slli a5, a1, 18
+; RV64I-NEXT: subw a3, a3, a5
+; RV64I-NEXT: slli a5, a1, 4
+; RV64I-NEXT: subw a5, a1, a5
+; RV64I-NEXT: add a2, a5, a2
+; RV64I-NEXT: slli a5, a1, 14
+; RV64I-NEXT: subw a4, a4, a5
+; RV64I-NEXT: slli a5, a1, 23
+; RV64I-NEXT: subw a3, a3, a5
+; RV64I-NEXT: slli a1, a1, 27
+; RV64I-NEXT: add a2, a2, a4
+; RV64I-NEXT: add a1, a3, a1
+; RV64I-NEXT: add a1, a2, a1
+; RV64I-NEXT: srliw a1, a1, 27
+; RV64I-NEXT: lui a2, %hi(.LCPI9_0)
+; RV64I-NEXT: addi a2, a2, %lo(.LCPI9_0)
+; RV64I-NEXT: add a1, a2, a1
+; RV64I-NEXT: lbu a1, 0(a1)
+; RV64I-NEXT: seqz a0, a0
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: and a0, a1, a0
; RV64I-NEXT: andi a0, a0, 31
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
@@ -918,28 +995,39 @@ define signext i32 @globalVar() nounwind {
;
; RV64I-LABEL: globalVar:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
; RV64I-NEXT: lui a0, %hi(global_x)
-; RV64I-NEXT: lw s0, %lo(global_x)(a0)
-; RV64I-NEXT: neg a0, s0
-; RV64I-NEXT: and a0, s0, a0
-; RV64I-NEXT: lui a1, 30667
-; RV64I-NEXT: addiw a1, a1, 1329
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: srliw a0, a0, 27
-; RV64I-NEXT: lui a1, %hi(.LCPI10_0)
-; RV64I-NEXT: addi a1, a1, %lo(.LCPI10_0)
-; RV64I-NEXT: add a0, a1, a0
-; RV64I-NEXT: lbu a0, 0(a0)
-; RV64I-NEXT: seqz a1, s0
-; RV64I-NEXT: addi a1, a1, -1
-; RV64I-NEXT: and a0, a0, a1
+; RV64I-NEXT: lw a0, %lo(global_x)(a0)
+; RV64I-NEXT: negw a1, a0
+; RV64I-NEXT: and a1, a0, a1
+; RV64I-NEXT: slli a2, a1, 6
+; RV64I-NEXT: slli a3, a1, 8
+; RV64I-NEXT: slli a4, a1, 10
+; RV64I-NEXT: slli a5, a1, 12
+; RV64I-NEXT: add a2, a2, a3
+; RV64I-NEXT: slli a3, a1, 16
+; RV64I-NEXT: subw a4, a4, a5
+; RV64I-NEXT: slli a5, a1, 18
+; RV64I-NEXT: subw a3, a3, a5
+; RV64I-NEXT: slli a5, a1, 4
+; RV64I-NEXT: subw a5, a1, a5
+; RV64I-NEXT: add a2, a5, a2
+; RV64I-NEXT: slli a5, a1, 14
+; RV64I-NEXT: subw a4, a4, a5
+; RV64I-NEXT: slli a5, a1, 23
+; RV64I-NEXT: subw a3, a3, a5
+; RV64I-NEXT: slli a1, a1, 27
+; RV64I-NEXT: add a2, a2, a4
+; RV64I-NEXT: add a1, a3, a1
+; RV64I-NEXT: add a1, a2, a1
+; RV64I-NEXT: srliw a1, a1, 27
+; RV64I-NEXT: lui a2, %hi(.LCPI10_0)
+; RV64I-NEXT: addi a2, a2, %lo(.LCPI10_0)
+; RV64I-NEXT: add a1, a2, a1
+; RV64I-NEXT: lbu a1, 0(a1)
+; RV64I-NEXT: seqz a0, a0
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: and a0, a1, a0
; RV64I-NEXT: andi a0, a0, 31
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/mul-expand.ll b/llvm/test/CodeGen/RISCV/mul-expand.ll
index 5bb74bc184d8b..a33c5d6cf3db0 100644
--- a/llvm/test/CodeGen/RISCV/mul-expand.ll
+++ b/llvm/test/CodeGen/RISCV/mul-expand.ll
@@ -7,17 +7,22 @@
define i32 @muli32_0x555(i32 %a) nounwind {
; RV32I-LABEL: muli32_0x555:
; RV32I: # %bb.0:
-; RV32I-NEXT: li a1, 1365
-; RV32I-NEXT: tail __mulsi3
+; RV32I-NEXT: slli a1, a0, 6
+; RV32I-NEXT: add a0, a1, a0
+; RV32I-NEXT: slli a1, a0, 3
+; RV32I-NEXT: sub a1, a1, a0
+; RV32I-NEXT: slli a0, a1, 2
+; RV32I-NEXT: sub a0, a0, a1
+; RV32I-NEXT: ret
;
; RV64I-LABEL: muli32_0x555:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: li a1, 1365
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: slli a1, a0, 6
+; RV64I-NEXT: add a0, a1, a0
+; RV64I-NEXT: slli a1, a0, 3
+; RV64I-NEXT: sub a1, a1, a0
+; RV64I-NEXT: slli a0, a1, 2
+; RV64I-NEXT: sub a0, a0, a1
; RV64I-NEXT: ret
%a1 = mul i32 %a, 1365
ret i32 %a1
@@ -37,8 +42,13 @@ define i64 @muli64_0x555(i64 %a) nounwind {
;
; RV64I-LABEL: muli64_0x555:
; RV64I: # %bb.0:
-; RV64I-NEXT: li a1, 1365
-; RV64I-NEXT: tail __muldi3
+; RV64I-NEXT: slli a1, a0, 6
+; RV64I-NEXT: add a0, a1, a0
+; RV64I-NEXT: slli a1, a0, 3
+; RV64I-NEXT: sub a1, a1, a0
+; RV64I-NEXT: slli a0, a1, 2
+; RV64I-NEXT: sub a0, a0, a1
+; RV64I-NEXT: ret
%a1 = mul i64 %a, 1365
ret i64 %a1
}
@@ -46,19 +56,26 @@ define i64 @muli64_0x555(i64 %a) nounwind {
define i32 @muli32_0x33333333(i32 %a) nounwind {
; RV32I-LABEL: muli32_0x33333333:
; RV32I: # %bb.0:
-; RV32I-NEXT: lui a1, 209715
-; RV32I-NEXT: addi a1, a1, 819
-; RV32I-NEXT: tail __mulsi3
+; RV32I-NEXT: slli a1, a0, 16
+; RV32I-NEXT: add a0, a1, a0
+; RV32I-NEXT: slli a1, a0, 8
+; RV32I-NEXT: add a0, a1, a0
+; RV32I-NEXT: slli a1, a0, 4
+; RV32I-NEXT: add a0, a1, a0
+; RV32I-NEXT: slli a1, a0, 2
+; RV32I-NEXT: sub a0, a1, a0
+; RV32I-NEXT: ret
;
; RV64I-LABEL: muli32_0x33333333:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: lui a1, 209715
-; RV64I-NEXT: addiw a1, a1, 819
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: slli a1, a0, 16
+; RV64I-NEXT: add a0, a1, a0
+; RV64I-NEXT: slli a1, a0, 8
+; RV64I-NEXT: add a0, a1, a0
+; RV64I-NEXT: slli a1, a0, 4
+; RV64I-NEXT: add a0, a1, a0
+; RV64I-NEXT: slli a1, a0, 2
+; RV64I-NEXT: sub a0, a1, a0
; RV64I-NEXT: ret
%a1 = mul i32 %a, 858993459
ret i32 %a1
@@ -79,9 +96,15 @@ define i64 @muli64_0x33333333(i64 %a) nounwind {
;
; RV64I-LABEL: muli64_0x33333333:
; RV64I: # %bb.0:
-; RV64I-NEXT: lui a1, 209715
-; RV64I-NEXT: addiw a1, a1, 819
-; RV64I-NEXT: tail __muldi3
+; RV64I-NEXT: slli a1, a0, 16
+; RV64I-NEXT: add a0, a1, a0
+; RV64I-NEXT: slli a1, a0, 8
+; RV64I-NEXT: add a0, a1, a0
+; RV64I-NEXT: slli a1, a0, 4
+; RV64I-NEXT: add a0, a1, a0
+; RV64I-NEXT: slli a1, a0, 2
+; RV64I-NEXT: sub a0, a1, a0
+; RV64I-NEXT: ret
%a1 = mul i64 %a, 858993459
ret i64 %a1
}
@@ -89,19 +112,50 @@ define i64 @muli64_0x33333333(i64 %a) nounwind {
define i32 @muli32_0xaaaaaaaa(i32 %a) nounwind {
; RV32I-LABEL: muli32_0xaaaaaaaa:
; RV32I: # %bb.0:
-; RV32I-NEXT: lui a1, 699051
-; RV32I-NEXT: addi a1, a1, -1366
-; RV32I-NEXT: tail __mulsi3
+; RV32I-NEXT: slli a1, a0, 16
+; RV32I-NEXT: add a0, a1, a0
+; RV32I-NEXT: slli a1, a0, 8
+; RV32I-NEXT: add a0, a1, a0
+; RV32I-NEXT: slli a1, a0, 4
+; RV32I-NEXT: add a0, a1, a0
+; RV32I-NEXT: slli a1, a0, 2
+; RV32I-NEXT: add a0, a1, a0
+; RV32I-NEXT: slli a0, a0, 1
+; RV32I-NEXT: ret
;
; RV64I-LABEL: muli32_0xaaaaaaaa:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: lui a1, 699051
-; RV64I-NEXT: addiw a1, a1, -1366
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: slli a1, a0, 3
+; RV64I-NEXT: slli a2, a0, 1
+; RV64I-NEXT: slli a3, a0, 5
+; RV64I-NEXT: slli a4, a0, 7
+; RV64I-NEXT: slli a5, a0, 9
+; RV64I-NEXT: slli a6, a0, 11
+; RV64I-NEXT: add a1, a2, a1
+; RV64I-NEXT: slli a2, a0, 15
+; RV64I-NEXT: add a3, a3, a4
+; RV64I-NEXT: slli a4, a0, 17
+; RV64I-NEXT: add a5, a5, a6
+; RV64I-NEXT: slli a6, a0, 23
+; RV64I-NEXT: add a2, a2, a4
+; RV64I-NEXT: slli a4, a0, 25
+; RV64I-NEXT: add a4, a6, a4
+; RV64I-NEXT: add a1, a1, a3
+; RV64I-NEXT: slli a3, a0, 13
+; RV64I-NEXT: add a3, a5, a3
+; RV64I-NEXT: slli a5, a0, 19
+; RV64I-NEXT: add a2, a2, a5
+; RV64I-NEXT: slli a5, a0, 27
+; RV64I-NEXT: add a4, a4, a5
+; RV64I-NEXT: add a1, a1, a3
+; RV64I-NEXT: slli a3, a0, 21
+; RV64I-NEXT: add a2, a2, a3
+; RV64I-NEXT: slli a3, a0, 29
+; RV64I-NEXT: add a3, a4, a3
+; RV64I-NEXT: slli a0, a0, 31
+; RV64I-NEXT: add a1, a1, a2
+; RV64I-NEXT: sub a0, a3, a0
+; RV64I-NEXT: add a0, a1, a0
; RV64I-NEXT: ret
%a1 = mul i32 %a, -1431655766
ret i32 %a1
@@ -122,10 +176,16 @@ define i64 @muli64_0xaaaaaaaa(i64 %a) nounwind {
;
; RV64I-LABEL: muli64_0xaaaaaaaa:
; RV64I: # %bb.0:
-; RV64I-NEXT: lui a1, 349525
-; RV64I-NEXT: addiw a1, a1, 1365
-; RV64I-NEXT: slli a1, a1, 1
-; RV64I-NEXT: tail __muldi3
+; RV64I-NEXT: slli a1, a0, 16
+; RV64I-NEXT: add a0, a1, a0
+; RV64I-NEXT: slli a1, a0, 8
+; RV64I-NEXT: add a0, a1, a0
+; RV64I-NEXT: slli a1, a0, 4
+; RV64I-NEXT: add a0, a1, a0
+; RV64I-NEXT: slli a1, a0, 2
+; RV64I-NEXT: add a0, a1, a0
+; RV64I-NEXT: slli a0, a0, 1
+; RV64I-NEXT: ret
%a1 = mul i64 %a, 2863311530
ret i64 %a1
}
@@ -171,19 +231,32 @@ define i64 @muli64_0x0fffffff(i64 %a) nounwind {
define i32 @muli32_0xf0f0f0f0(i32 %a) nounwind {
; RV32I-LABEL: muli32_0xf0f0f0f0:
; RV32I: # %bb.0:
-; RV32I-NEXT: lui a1, 986895
-; RV32I-NEXT: addi a1, a1, 240
-; RV32I-NEXT: tail __mulsi3
+; RV32I-NEXT: slli a1, a0, 16
+; RV32I-NEXT: add a0, a1, a0
+; RV32I-NEXT: slli a1, a0, 8
+; RV32I-NEXT: add a0, a1, a0
+; RV32I-NEXT: slli a1, a0, 4
+; RV32I-NEXT: sub a0, a1, a0
+; RV32I-NEXT: slli a0, a0, 4
+; RV32I-NEXT: ret
;
; RV64I-LABEL: muli32_0xf0f0f0f0:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: lui a1, 986895
-; RV64I-NEXT: addiw a1, a1, 240
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: slli a1, a0, 4
+; RV64I-NEXT: slli a2, a0, 12
+; RV64I-NEXT: slli a3, a0, 16
+; RV64I-NEXT: sub a1, a0, a1
+; RV64I-NEXT: sub a2, a2, a3
+; RV64I-NEXT: add a1, a1, a2
+; RV64I-NEXT: slli a0, a0, 24
+; RV64I-NEXT: add a0, a1, a0
+; RV64I-NEXT: slli a1, a0, 16
+; RV64I-NEXT: add a0, a1, a0
+; RV64I-NEXT: slli a1, a0, 12
+; RV64I-NEXT: sub a1, a1, a0
+; RV64I-NEXT: slli a0, a1, 8
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: slli a0, a0, 4
; RV64I-NEXT: ret
%a1 = mul i32 %a, -252645136
ret i32 %a1
@@ -204,10 +277,14 @@ define i64 @muli64_0xf0f0f0f0(i64 %a) nounwind {
;
; RV64I-LABEL: muli64_0xf0f0f0f0:
; RV64I: # %bb.0:
-; RV64I-NEXT: lui a1, 61681
-; RV64I-NEXT: addiw a1, a1, -241
-; RV64I-NEXT: slli a1, a1, 4
-; RV64I-NEXT: tail __muldi3
+; RV64I-NEXT: slli a1, a0, 16
+; RV64I-NEXT: add a0, a1, a0
+; RV64I-NEXT: slli a1, a0, 8
+; RV64I-NEXT: add a0, a1, a0
+; RV64I-NEXT: slli a1, a0, 4
+; RV64I-NEXT: sub a0, a1, a0
+; RV64I-NEXT: slli a0, a0, 4
+; RV64I-NEXT: ret
%a1 = mul i64 %a, 4042322160
ret i64 %a1
}
@@ -215,19 +292,27 @@ define i64 @muli64_0xf0f0f0f0(i64 %a) nounwind {
define i32 @muli32_0xf7f7f7f7(i32 %a) nounwind {
; RV32I-LABEL: muli32_0xf7f7f7f7:
; RV32I: # %bb.0:
-; RV32I-NEXT: lui a1, 1015679
-; RV32I-NEXT: addi a1, a1, 2039
-; RV32I-NEXT: tail __mulsi3
+; RV32I-NEXT: slli a1, a0, 3
+; RV32I-NEXT: add a1, a0, a1
+; RV32I-NEXT: slli a0, a0, 8
+; RV32I-NEXT: sub a0, a0, a1
+; RV32I-NEXT: slli a1, a0, 16
+; RV32I-NEXT: add a0, a1, a0
+; RV32I-NEXT: slli a1, a0, 8
+; RV32I-NEXT: add a0, a1, a0
+; RV32I-NEXT: ret
;
; RV64I-LABEL: muli32_0xf7f7f7f7:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: lui a1, 1015679
-; RV64I-NEXT: addiw a1, a1, 2039
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: slli a1, a0, 3
+; RV64I-NEXT: slli a2, a0, 11
+; RV64I-NEXT: slli a3, a0, 19
+; RV64I-NEXT: add a1, a0, a1
+; RV64I-NEXT: slli a0, a0, 27
+; RV64I-NEXT: add a2, a2, a3
+; RV64I-NEXT: add a1, a1, a2
+; RV64I-NEXT: neg a0, a0
+; RV64I-NEXT: sub a0, a0, a1
; RV64I-NEXT: ret
%a1 = mul i32 %a, -134744073
ret i32 %a1
@@ -248,11 +333,15 @@ define i64 @muli64_0xf7f7f7f7(i64 %a) nounwind {
;
; RV64I-LABEL: muli64_0xf7f7f7f7:
; RV64I: # %bb.0:
-; RV64I-NEXT: lui a1, 248
-; RV64I-NEXT: addiw a1, a1, -129
-; RV64I-NEXT: slli a1, a1, 12
-; RV64I-NEXT: addi a1, a1, 2039
-; RV64I-NEXT: tail __muldi3
+; RV64I-NEXT: slli a1, a0, 3
+; RV64I-NEXT: add a1, a0, a1
+; RV64I-NEXT: slli a0, a0, 8
+; RV64I-NEXT: sub a0, a0, a1
+; RV64I-NEXT: slli a1, a0, 16
+; RV64I-NEXT: add a0, a1, a0
+; RV64I-NEXT: slli a1, a0, 8
+; RV64I-NEXT: add a0, a1, a0
+; RV64I-NEXT: ret
%a1 = mul i64 %a, 4160223223
ret i64 %a1
}
@@ -405,19 +494,46 @@ define i64 @muli64_0x7fffffff(i64 %a) nounwind {
define i32 @muli32_0xdeadbeef(i32 %a) nounwind {
; RV32I-LABEL: muli32_0xdeadbeef:
; RV32I: # %bb.0:
-; RV32I-NEXT: lui a1, 912092
-; RV32I-NEXT: addi a1, a1, -273
-; RV32I-NEXT: tail __mulsi3
+; RV32I-NEXT: slli a1, a0, 12
+; RV32I-NEXT: slli a2, a0, 14
+; RV32I-NEXT: slli a3, a0, 17
+; RV32I-NEXT: sub a1, a1, a2
+; RV32I-NEXT: slli a2, a0, 21
+; RV32I-NEXT: add a2, a3, a2
+; RV32I-NEXT: slli a3, a0, 4
+; RV32I-NEXT: add a3, a0, a3
+; RV32I-NEXT: slli a0, a0, 24
+; RV32I-NEXT: sub a1, a1, a3
+; RV32I-NEXT: sub a2, a2, a0
+; RV32I-NEXT: sub a0, a1, a2
+; RV32I-NEXT: slli a1, a0, 8
+; RV32I-NEXT: add a0, a1, a0
+; RV32I-NEXT: ret
;
; RV64I-LABEL: muli32_0xdeadbeef:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: lui a1, 912092
-; RV64I-NEXT: addiw a1, a1, -273
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: slli a1, a0, 12
+; RV64I-NEXT: slli a2, a0, 14
+; RV64I-NEXT: slli a3, a0, 17
+; RV64I-NEXT: slli a4, a0, 21
+; RV64I-NEXT: sub a1, a1, a2
+; RV64I-NEXT: slli a2, a0, 32
+; RV64I-NEXT: add a3, a3, a4
+; RV64I-NEXT: slli a4, a0, 40
+; RV64I-NEXT: sub a2, a2, a4
+; RV64I-NEXT: slli a4, a0, 4
+; RV64I-NEXT: add a4, a0, a4
+; RV64I-NEXT: sub a1, a1, a4
+; RV64I-NEXT: slli a4, a0, 24
+; RV64I-NEXT: sub a3, a3, a4
+; RV64I-NEXT: slli a4, a0, 48
+; RV64I-NEXT: add a2, a2, a4
+; RV64I-NEXT: slli a0, a0, 56
+; RV64I-NEXT: sub a1, a1, a3
+; RV64I-NEXT: sub a2, a2, a0
+; RV64I-NEXT: sub a0, a1, a2
+; RV64I-NEXT: slli a1, a0, 8
+; RV64I-NEXT: add a0, a1, a0
; RV64I-NEXT: ret
%a1 = mul i32 %a, -559038737
ret i32 %a1
@@ -438,10 +554,21 @@ define i64 @muli64_0xdeadbeef(i64 %a) nounwind {
;
; RV64I-LABEL: muli64_0xdeadbeef:
; RV64I: # %bb.0:
-; RV64I-NEXT: lui a1, 228023
-; RV64I-NEXT: slli a1, a1, 2
-; RV64I-NEXT: addi a1, a1, -273
-; RV64I-NEXT: tail __muldi3
+; RV64I-NEXT: slli a1, a0, 12
+; RV64I-NEXT: slli a2, a0, 14
+; RV64I-NEXT: slli a3, a0, 17
+; RV64I-NEXT: sub a1, a1, a2
+; RV64I-NEXT: slli a2, a0, 21
+; RV64I-NEXT: add a2, a3, a2
+; RV64I-NEXT: slli a3, a0, 4
+; RV64I-NEXT: add a3, a0, a3
+; RV64I-NEXT: slli a0, a0, 24
+; RV64I-NEXT: sub a1, a1, a3
+; RV64I-NEXT: sub a2, a2, a0
+; RV64I-NEXT: sub a0, a1, a2
+; RV64I-NEXT: slli a1, a0, 8
+; RV64I-NEXT: add a0, a1, a0
+; RV64I-NEXT: ret
%a1 = mul i64 %a, 3735928559
ret i64 %a1
}
@@ -449,19 +576,56 @@ define i64 @muli64_0xdeadbeef(i64 %a) nounwind {
define i32 @muli32_0x12345678(i32 %a) nounwind {
; RV32I-LABEL: muli32_0x12345678:
; RV32I: # %bb.0:
-; RV32I-NEXT: lui a1, 74565
-; RV32I-NEXT: addi a1, a1, 1656
-; RV32I-NEXT: tail __mulsi3
+; RV32I-NEXT: slli a1, a0, 4
+; RV32I-NEXT: slli a2, a0, 6
+; RV32I-NEXT: slli a3, a0, 9
+; RV32I-NEXT: add a1, a1, a2
+; RV32I-NEXT: slli a2, a0, 11
+; RV32I-NEXT: sub a3, a3, a2
+; RV32I-NEXT: slli a2, a0, 2
+; RV32I-NEXT: add a2, a0, a2
+; RV32I-NEXT: sub a2, a2, a1
+; RV32I-NEXT: slli a1, a0, 13
+; RV32I-NEXT: add a1, a3, a1
+; RV32I-NEXT: slli a3, a0, 15
+; RV32I-NEXT: slli a0, a0, 17
+; RV32I-NEXT: sub a2, a2, a1
+; RV32I-NEXT: add a0, a3, a0
+; RV32I-NEXT: add a0, a2, a0
+; RV32I-NEXT: slli a1, a0, 3
+; RV32I-NEXT: add a0, a1, a0
+; RV32I-NEXT: slli a1, a0, 3
+; RV32I-NEXT: add a0, a1, a0
+; RV32I-NEXT: slli a1, a0, 2
+; RV32I-NEXT: sub a0, a1, a0
+; RV32I-NEXT: slli a0, a0, 3
+; RV32I-NEXT: ret
;
; RV64I-LABEL: muli32_0x12345678:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: lui a1, 74565
-; RV64I-NEXT: addiw a1, a1, 1656
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: slli a1, a0, 4
+; RV64I-NEXT: slli a2, a0, 6
+; RV64I-NEXT: slli a3, a0, 9
+; RV64I-NEXT: add a1, a1, a2
+; RV64I-NEXT: slli a2, a0, 11
+; RV64I-NEXT: sub a3, a3, a2
+; RV64I-NEXT: slli a2, a0, 2
+; RV64I-NEXT: add a2, a0, a2
+; RV64I-NEXT: sub a2, a2, a1
+; RV64I-NEXT: slli a1, a0, 13
+; RV64I-NEXT: add a1, a3, a1
+; RV64I-NEXT: slli a3, a0, 15
+; RV64I-NEXT: slli a0, a0, 17
+; RV64I-NEXT: sub a2, a2, a1
+; RV64I-NEXT: add a0, a3, a0
+; RV64I-NEXT: add a0, a2, a0
+; RV64I-NEXT: slli a1, a0, 3
+; RV64I-NEXT: add a0, a1, a0
+; RV64I-NEXT: slli a1, a0, 3
+; RV64I-NEXT: add a0, a1, a0
+; RV64I-NEXT: slli a1, a0, 2
+; RV64I-NEXT: sub a0, a1, a0
+; RV64I-NEXT: slli a0, a0, 3
; RV64I-NEXT: ret
%a1 = mul i32 %a, 305419896
ret i32 %a1
@@ -482,9 +646,30 @@ define i64 @muli64_0x12345678(i64 %a) nounwind {
;
; RV64I-LABEL: muli64_0x12345678:
; RV64I: # %bb.0:
-; RV64I-NEXT: lui a1, 74565
-; RV64I-NEXT: addiw a1, a1, 1656
-; RV64I-NEXT: tail __muldi3
+; RV64I-NEXT: slli a1, a0, 4
+; RV64I-NEXT: slli a2, a0, 6
+; RV64I-NEXT: slli a3, a0, 9
+; RV64I-NEXT: add a1, a1, a2
+; RV64I-NEXT: slli a2, a0, 11
+; RV64I-NEXT: sub a3, a3, a2
+; RV64I-NEXT: slli a2, a0, 2
+; RV64I-NEXT: add a2, a0, a2
+; RV64I-NEXT: sub a2, a2, a1
+; RV64I-NEXT: slli a1, a0, 13
+; RV64I-NEXT: add a1, a3, a1
+; RV64I-NEXT: slli a3, a0, 15
+; RV64I-NEXT: slli a0, a0, 17
+; RV64I-NEXT: sub a2, a2, a1
+; RV64I-NEXT: add a0, a3, a0
+; RV64I-NEXT: add a0, a2, a0
+; RV64I-NEXT: slli a1, a0, 3
+; RV64I-NEXT: add a0, a1, a0
+; RV64I-NEXT: slli a1, a0, 3
+; RV64I-NEXT: add a0, a1, a0
+; RV64I-NEXT: slli a1, a0, 2
+; RV64I-NEXT: sub a0, a1, a0
+; RV64I-NEXT: slli a0, a0, 3
+; RV64I-NEXT: ret
%a1 = mul i64 %a, 305419896
ret i64 %a1
}
diff --git a/llvm/test/CodeGen/RISCV/mul.ll b/llvm/test/CodeGen/RISCV/mul.ll
index 9447dcaf72373..4400536066a6c 100644
--- a/llvm/test/CodeGen/RISCV/mul.ll
+++ b/llvm/test/CodeGen/RISCV/mul.ll
@@ -464,11 +464,43 @@ define i32 @mulhu_constant(i32 %a) nounwind {
ret i32 %4
}
+define i32 @muli32_p10(i32 %a) nounwind {
+; RV32I-LABEL: muli32_p10:
+; RV32I: # %bb.0:
+; RV32I-NEXT: slli a1, a0, 2
+; RV32I-NEXT: add a0, a1, a0
+; RV32I-NEXT: slli a0, a0, 1
+; RV32I-NEXT: ret
+;
+; RV32IM-LABEL: muli32_p10:
+; RV32IM: # %bb.0:
+; RV32IM-NEXT: li a1, 10
+; RV32IM-NEXT: mul a0, a0, a1
+; RV32IM-NEXT: ret
+;
+; RV64I-LABEL: muli32_p10:
+; RV64I: # %bb.0:
+; RV64I-NEXT: slli a1, a0, 2
+; RV64I-NEXT: add a0, a1, a0
+; RV64I-NEXT: slli a0, a0, 1
+; RV64I-NEXT: ret
+;
+; RV64IM-LABEL: muli32_p10:
+; RV64IM: # %bb.0:
+; RV64IM-NEXT: li a1, 10
+; RV64IM-NEXT: mulw a0, a0, a1
+; RV64IM-NEXT: ret
+ %1 = mul i32 %a, 10
+ ret i32 %1
+}
+
define i32 @muli32_p14(i32 %a) nounwind {
; RV32I-LABEL: muli32_p14:
; RV32I: # %bb.0:
-; RV32I-NEXT: li a1, 14
-; RV32I-NEXT: tail __mulsi3
+; RV32I-NEXT: slli a1, a0, 3
+; RV32I-NEXT: sub a0, a1, a0
+; RV32I-NEXT: slli a0, a0, 1
+; RV32I-NEXT: ret
;
; RV32IM-LABEL: muli32_p14:
; RV32IM: # %bb.0:
@@ -479,9 +511,9 @@ define i32 @muli32_p14(i32 %a) nounwind {
;
; RV64I-LABEL: muli32_p14:
; RV64I: # %bb.0:
-; RV64I-NEXT: slli a1, a0, 1
-; RV64I-NEXT: slli a0, a0, 4
-; RV64I-NEXT: sub a0, a0, a1
+; RV64I-NEXT: slli a1, a0, 3
+; RV64I-NEXT: sub a0, a1, a0
+; RV64I-NEXT: slli a0, a0, 1
; RV64I-NEXT: ret
;
; RV64IM-LABEL: muli32_p14:
@@ -497,8 +529,10 @@ define i32 @muli32_p14(i32 %a) nounwind {
define i32 @muli32_p18(i32 %a) nounwind {
; RV32I-LABEL: muli32_p18:
; RV32I: # %bb.0:
-; RV32I-NEXT: li a1, 18
-; RV32I-NEXT: tail __mulsi3
+; RV32I-NEXT: slli a1, a0, 3
+; RV32I-NEXT: add a0, a1, a0
+; RV32I-NEXT: slli a0, a0, 1
+; RV32I-NEXT: ret
;
; RV32IM-LABEL: muli32_p18:
; RV32IM: # %bb.0:
@@ -508,12 +542,9 @@ define i32 @muli32_p18(i32 %a) nounwind {
;
; RV64I-LABEL: muli32_p18:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: li a1, 18
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: slli a1, a0, 3
+; RV64I-NEXT: add a0, a1, a0
+; RV64I-NEXT: slli a0, a0, 1
; RV64I-NEXT: ret
;
; RV64IM-LABEL: muli32_p18:
@@ -528,8 +559,10 @@ define i32 @muli32_p18(i32 %a) nounwind {
define i32 @muli32_p28(i32 %a) nounwind {
; RV32I-LABEL: muli32_p28:
; RV32I: # %bb.0:
-; RV32I-NEXT: li a1, 28
-; RV32I-NEXT: tail __mulsi3
+; RV32I-NEXT: slli a1, a0, 3
+; RV32I-NEXT: sub a0, a1, a0
+; RV32I-NEXT: slli a0, a0, 2
+; RV32I-NEXT: ret
;
; RV32IM-LABEL: muli32_p28:
; RV32IM: # %bb.0:
@@ -540,9 +573,9 @@ define i32 @muli32_p28(i32 %a) nounwind {
;
; RV64I-LABEL: muli32_p28:
; RV64I: # %bb.0:
-; RV64I-NEXT: slli a1, a0, 2
-; RV64I-NEXT: slli a0, a0, 5
-; RV64I-NEXT: sub a0, a0, a1
+; RV64I-NEXT: slli a1, a0, 3
+; RV64I-NEXT: sub a0, a1, a0
+; RV64I-NEXT: slli a0, a0, 2
; RV64I-NEXT: ret
;
; RV64IM-LABEL: muli32_p28:
@@ -558,8 +591,10 @@ define i32 @muli32_p28(i32 %a) nounwind {
define i32 @muli32_p30(i32 %a) nounwind {
; RV32I-LABEL: muli32_p30:
; RV32I: # %bb.0:
-; RV32I-NEXT: li a1, 30
-; RV32I-NEXT: tail __mulsi3
+; RV32I-NEXT: slli a1, a0, 4
+; RV32I-NEXT: sub a0, a1, a0
+; RV32I-NEXT: slli a0, a0, 1
+; RV32I-NEXT: ret
;
; RV32IM-LABEL: muli32_p30:
; RV32IM: # %bb.0:
@@ -570,9 +605,9 @@ define i32 @muli32_p30(i32 %a) nounwind {
;
; RV64I-LABEL: muli32_p30:
; RV64I: # %bb.0:
-; RV64I-NEXT: slli a1, a0, 1
-; RV64I-NEXT: slli a0, a0, 5
-; RV64I-NEXT: sub a0, a0, a1
+; RV64I-NEXT: slli a1, a0, 4
+; RV64I-NEXT: sub a0, a1, a0
+; RV64I-NEXT: slli a0, a0, 1
; RV64I-NEXT: ret
;
; RV64IM-LABEL: muli32_p30:
@@ -588,8 +623,10 @@ define i32 @muli32_p30(i32 %a) nounwind {
define i32 @muli32_p34(i32 %a) nounwind {
; RV32I-LABEL: muli32_p34:
; RV32I: # %bb.0:
-; RV32I-NEXT: li a1, 34
-; RV32I-NEXT: tail __mulsi3
+; RV32I-NEXT: slli a1, a0, 4
+; RV32I-NEXT: add a0, a1, a0
+; RV32I-NEXT: slli a0, a0, 1
+; RV32I-NEXT: ret
;
; RV32IM-LABEL: muli32_p34:
; RV32IM: # %bb.0:
@@ -599,12 +636,9 @@ define i32 @muli32_p34(i32 %a) nounwind {
;
; RV64I-LABEL: muli32_p34:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: li a1, 34
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: slli a1, a0, 4
+; RV64I-NEXT: add a0, a1, a0
+; RV64I-NEXT: slli a0, a0, 1
; RV64I-NEXT: ret
;
; RV64IM-LABEL: muli32_p34:
@@ -619,8 +653,10 @@ define i32 @muli32_p34(i32 %a) nounwind {
define i32 @muli32_p36(i32 %a) nounwind {
; RV32I-LABEL: muli32_p36:
; RV32I: # %bb.0:
-; RV32I-NEXT: li a1, 36
-; RV32I-NEXT: tail __mulsi3
+; RV32I-NEXT: slli a1, a0, 3
+; RV32I-NEXT: add a0, a1, a0
+; RV32I-NEXT: slli a0, a0, 2
+; RV32I-NEXT: ret
;
; RV32IM-LABEL: muli32_p36:
; RV32IM: # %bb.0:
@@ -630,12 +666,9 @@ define i32 @muli32_p36(i32 %a) nounwind {
;
; RV64I-LABEL: muli32_p36:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: li a1, 36
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: slli a1, a0, 3
+; RV64I-NEXT: add a0, a1, a0
+; RV64I-NEXT: slli a0, a0, 2
; RV64I-NEXT: ret
;
; RV64IM-LABEL: muli32_p36:
@@ -650,8 +683,10 @@ define i32 @muli32_p36(i32 %a) nounwind {
define i32 @muli32_p56(i32 %a) nounwind {
; RV32I-LABEL: muli32_p56:
; RV32I: # %bb.0:
-; RV32I-NEXT: li a1, 56
-; RV32I-NEXT: tail __mulsi3
+; RV32I-NEXT: slli a1, a0, 3
+; RV32I-NEXT: sub a0, a1, a0
+; RV32I-NEXT: slli a0, a0, 3
+; RV32I-NEXT: ret
;
; RV32IM-LABEL: muli32_p56:
; RV32IM: # %bb.0:
@@ -663,8 +698,8 @@ define i32 @muli32_p56(i32 %a) nounwind {
; RV64I-LABEL: muli32_p56:
; RV64I: # %bb.0:
; RV64I-NEXT: slli a1, a0, 3
-; RV64I-NEXT: slli a0, a0, 6
-; RV64I-NEXT: sub a0, a0, a1
+; RV64I-NEXT: sub a0, a1, a0
+; RV64I-NEXT: slli a0, a0, 3
; RV64I-NEXT: ret
;
; RV64IM-LABEL: muli32_p56:
@@ -680,8 +715,10 @@ define i32 @muli32_p56(i32 %a) nounwind {
define i32 @muli32_p60(i32 %a) nounwind {
; RV32I-LABEL: muli32_p60:
; RV32I: # %bb.0:
-; RV32I-NEXT: li a1, 60
-; RV32I-NEXT: tail __mulsi3
+; RV32I-NEXT: slli a1, a0, 4
+; RV32I-NEXT: sub a0, a1, a0
+; RV32I-NEXT: slli a0, a0, 2
+; RV32I-NEXT: ret
;
; RV32IM-LABEL: muli32_p60:
; RV32IM: # %bb.0:
@@ -692,9 +729,9 @@ define i32 @muli32_p60(i32 %a) nounwind {
;
; RV64I-LABEL: muli32_p60:
; RV64I: # %bb.0:
-; RV64I-NEXT: slli a1, a0, 2
-; RV64I-NEXT: slli a0, a0, 6
-; RV64I-NEXT: sub a0, a0, a1
+; RV64I-NEXT: slli a1, a0, 4
+; RV64I-NEXT: sub a0, a1, a0
+; RV64I-NEXT: slli a0, a0, 2
; RV64I-NEXT: ret
;
; RV64IM-LABEL: muli32_p60:
@@ -710,8 +747,10 @@ define i32 @muli32_p60(i32 %a) nounwind {
define i32 @muli32_p62(i32 %a) nounwind {
; RV32I-LABEL: muli32_p62:
; RV32I: # %bb.0:
-; RV32I-NEXT: li a1, 62
-; RV32I-NEXT: tail __mulsi3
+; RV32I-NEXT: slli a1, a0, 5
+; RV32I-NEXT: sub a0, a1, a0
+; RV32I-NEXT: slli a0, a0, 1
+; RV32I-NEXT: ret
;
; RV32IM-LABEL: muli32_p62:
; RV32IM: # %bb.0:
@@ -722,9 +761,9 @@ define i32 @muli32_p62(i32 %a) nounwind {
;
; RV64I-LABEL: muli32_p62:
; RV64I: # %bb.0:
-; RV64I-NEXT: slli a1, a0, 1
-; RV64I-NEXT: slli a0, a0, 6
-; RV64I-NEXT: sub a0, a0, a1
+; RV64I-NEXT: slli a1, a0, 5
+; RV64I-NEXT: sub a0, a1, a0
+; RV64I-NEXT: slli a0, a0, 1
; RV64I-NEXT: ret
;
; RV64IM-LABEL: muli32_p62:
@@ -765,6 +804,36 @@ define i32 @muli32_p65(i32 %a) nounwind {
ret i32 %1
}
+define i32 @muli32_p66(i32 %a) nounwind {
+; RV32I-LABEL: muli32_p66:
+; RV32I: # %bb.0:
+; RV32I-NEXT: slli a1, a0, 5
+; RV32I-NEXT: add a0, a1, a0
+; RV32I-NEXT: slli a0, a0, 1
+; RV32I-NEXT: ret
+;
+; RV32IM-LABEL: muli32_p66:
+; RV32IM: # %bb.0:
+; RV32IM-NEXT: li a1, 66
+; RV32IM-NEXT: mul a0, a0, a1
+; RV32IM-NEXT: ret
+;
+; RV64I-LABEL: muli32_p66:
+; RV64I: # %bb.0:
+; RV64I-NEXT: slli a1, a0, 5
+; RV64I-NEXT: add a0, a1, a0
+; RV64I-NEXT: slli a0, a0, 1
+; RV64I-NEXT: ret
+;
+; RV64IM-LABEL: muli32_p66:
+; RV64IM: # %bb.0:
+; RV64IM-NEXT: li a1, 66
+; RV64IM-NEXT: mulw a0, a0, a1
+; RV64IM-NEXT: ret
+ %1 = mul i32 %a, 66
+ ret i32 %1
+}
+
define i32 @muli32_p63(i32 %a) nounwind {
; RV32I-LABEL: muli32_p63:
; RV32I: # %bb.0:
@@ -876,7 +945,7 @@ define i64 @muli64_p72(i64 %a) nounwind {
; RV32I: # %bb.0:
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: li a2, 72
+; RV32I-NEXT: li a2, 60
; RV32I-NEXT: li a3, 0
; RV32I-NEXT: call __muldi3
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
@@ -885,6 +954,48 @@ define i64 @muli64_p72(i64 %a) nounwind {
;
; RV32IM-LABEL: muli64_p72:
; RV32IM: # %bb.0:
+; RV32IM-NEXT: li a2, 60
+; RV32IM-NEXT: slli a3, a1, 2
+; RV32IM-NEXT: slli a1, a1, 6
+; RV32IM-NEXT: sub a1, a1, a3
+; RV32IM-NEXT: slli a3, a0, 2
+; RV32IM-NEXT: mulhu a2, a0, a2
+; RV32IM-NEXT: slli a0, a0, 6
+; RV32IM-NEXT: add a1, a2, a1
+; RV32IM-NEXT: sub a0, a0, a3
+; RV32IM-NEXT: ret
+;
+; RV64I-LABEL: muli64_p72:
+; RV64I: # %bb.0:
+; RV64I-NEXT: slli a1, a0, 4
+; RV64I-NEXT: sub a0, a1, a0
+; RV64I-NEXT: slli a0, a0, 2
+; RV64I-NEXT: ret
+;
+; RV64IM-LABEL: muli64_p72:
+; RV64IM: # %bb.0:
+; RV64IM-NEXT: slli a1, a0, 2
+; RV64IM-NEXT: slli a0, a0, 6
+; RV64IM-NEXT: sub a0, a0, a1
+; RV64IM-NEXT: ret
+ %1 = mul i64 %a, 60
+ ret i64 %1
+}
+
+define i64 @muli64_p68(i64 %a) nounwind {
+; RV32I-LABEL: muli64_p68:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a2, 72
+; RV32I-NEXT: li a3, 0
+; RV32I-NEXT: call __muldi3
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IM-LABEL: muli64_p68:
+; RV32IM: # %bb.0:
; RV32IM-NEXT: li a2, 72
; RV32IM-NEXT: mul a1, a1, a2
; RV32IM-NEXT: mulhu a3, a0, a2
@@ -892,12 +1003,14 @@ define i64 @muli64_p72(i64 %a) nounwind {
; RV32IM-NEXT: mul a0, a0, a2
; RV32IM-NEXT: ret
;
-; RV64I-LABEL: muli64_p72:
+; RV64I-LABEL: muli64_p68:
; RV64I: # %bb.0:
-; RV64I-NEXT: li a1, 72
-; RV64I-NEXT: tail __muldi3
+; RV64I-NEXT: slli a1, a0, 3
+; RV64I-NEXT: add a0, a1, a0
+; RV64I-NEXT: slli a0, a0, 3
+; RV64I-NEXT: ret
;
-; RV64IM-LABEL: muli64_p72:
+; RV64IM-LABEL: muli64_p68:
; RV64IM: # %bb.0:
; RV64IM-NEXT: li a1, 72
; RV64IM-NEXT: mul a0, a0, a1
@@ -1056,8 +1169,10 @@ define i64 @muli64_m65(i64 %a) nounwind {
define i32 @muli32_p384(i32 %a) nounwind {
; RV32I-LABEL: muli32_p384:
; RV32I: # %bb.0:
-; RV32I-NEXT: li a1, 384
-; RV32I-NEXT: tail __mulsi3
+; RV32I-NEXT: slli a1, a0, 2
+; RV32I-NEXT: sub a0, a1, a0
+; RV32I-NEXT: slli a0, a0, 7
+; RV32I-NEXT: ret
;
; RV32IM-LABEL: muli32_p384:
; RV32IM: # %bb.0:
@@ -1068,9 +1183,9 @@ define i32 @muli32_p384(i32 %a) nounwind {
;
; RV64I-LABEL: muli32_p384:
; RV64I: # %bb.0:
-; RV64I-NEXT: slli a1, a0, 7
-; RV64I-NEXT: slli a0, a0, 9
-; RV64I-NEXT: sub a0, a0, a1
+; RV64I-NEXT: slli a1, a0, 2
+; RV64I-NEXT: sub a0, a1, a0
+; RV64I-NEXT: slli a0, a0, 7
; RV64I-NEXT: ret
;
; RV64IM-LABEL: muli32_p384:
@@ -1086,8 +1201,10 @@ define i32 @muli32_p384(i32 %a) nounwind {
define i32 @muli32_p12288(i32 %a) nounwind {
; RV32I-LABEL: muli32_p12288:
; RV32I: # %bb.0:
-; RV32I-NEXT: lui a1, 3
-; RV32I-NEXT: tail __mulsi3
+; RV32I-NEXT: slli a1, a0, 2
+; RV32I-NEXT: sub a0, a1, a0
+; RV32I-NEXT: slli a0, a0, 12
+; RV32I-NEXT: ret
;
; RV32IM-LABEL: muli32_p12288:
; RV32IM: # %bb.0:
@@ -1098,9 +1215,9 @@ define i32 @muli32_p12288(i32 %a) nounwind {
;
; RV64I-LABEL: muli32_p12288:
; RV64I: # %bb.0:
-; RV64I-NEXT: slli a1, a0, 12
-; RV64I-NEXT: slli a0, a0, 14
-; RV64I-NEXT: sub a0, a0, a1
+; RV64I-NEXT: slli a1, a0, 2
+; RV64I-NEXT: sub a0, a1, a0
+; RV64I-NEXT: slli a0, a0, 12
; RV64I-NEXT: ret
;
; RV64IM-LABEL: muli32_p12288:
@@ -1212,9 +1329,11 @@ define i32 @muli32_m3840(i32 %a) nounwind {
define i32 @muli32_m4352(i32 %a) nounwind {
; RV32I-LABEL: muli32_m4352:
; RV32I: # %bb.0:
-; RV32I-NEXT: li a1, -17
-; RV32I-NEXT: slli a1, a1, 8
-; RV32I-NEXT: tail __mulsi3
+; RV32I-NEXT: slli a1, a0, 12
+; RV32I-NEXT: slli a0, a0, 8
+; RV32I-NEXT: add a0, a0, a1
+; RV32I-NEXT: neg a0, a0
+; RV32I-NEXT: ret
;
; RV32IM-LABEL: muli32_m4352:
; RV32IM: # %bb.0:
@@ -1225,13 +1344,10 @@ define i32 @muli32_m4352(i32 %a) nounwind {
;
; RV64I-LABEL: muli32_m4352:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: li a1, -17
-; RV64I-NEXT: slli a1, a1, 8
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: slli a1, a0, 12
+; RV64I-NEXT: slli a0, a0, 8
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: neg a0, a0
; RV64I-NEXT: ret
;
; RV64IM-LABEL: muli32_m4352:
@@ -1362,9 +1478,11 @@ define i64 @muli64_m4352(i64 %a) nounwind {
;
; RV64I-LABEL: muli64_m4352:
; RV64I: # %bb.0:
-; RV64I-NEXT: li a1, -17
-; RV64I-NEXT: slli a1, a1, 8
-; RV64I-NEXT: tail __muldi3
+; RV64I-NEXT: slli a1, a0, 12
+; RV64I-NEXT: slli a0, a0, 8
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: neg a0, a0
+; RV64I-NEXT: ret
;
; RV64IM-LABEL: muli64_m4352:
; RV64IM: # %bb.0:
@@ -1453,10 +1571,10 @@ define i128 @muli128_m3840(i128 %a) nounwind {
; RV32I-NEXT: sltu a7, a5, a4
; RV32I-NEXT: sub a6, a6, t2
; RV32I-NEXT: mv t1, a7
-; RV32I-NEXT: beq t0, a3, .LBB40_2
+; RV32I-NEXT: beq t0, a3, .LBB43_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: sltu t1, t0, a3
-; RV32I-NEXT: .LBB40_2:
+; RV32I-NEXT: .LBB43_2:
; RV32I-NEXT: sub a2, a2, a1
; RV32I-NEXT: sub a1, t0, a3
; RV32I-NEXT: sub a5, a5, a4
@@ -1567,10 +1685,10 @@ define i128 @muli128_m63(i128 %a) nounwind {
; RV32I-NEXT: sltu a7, a3, a6
; RV32I-NEXT: or t0, t0, a5
; RV32I-NEXT: mv a5, a7
-; RV32I-NEXT: beq a4, t0, .LBB41_2
+; RV32I-NEXT: beq a4, t0, .LBB44_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: sltu a5, a4, t0
-; RV32I-NEXT: .LBB41_2:
+; RV32I-NEXT: .LBB44_2:
; RV32I-NEXT: srli t1, a4, 26
; RV32I-NEXT: slli t2, a2, 6
; RV32I-NEXT: srli t3, a2, 26
@@ -1995,8 +2113,10 @@ define i64 @muland_demand(i64 %x) nounwind {
; RV64I-NEXT: li a1, -29
; RV64I-NEXT: srli a1, a1, 2
; RV64I-NEXT: and a0, a0, a1
-; RV64I-NEXT: li a1, 12
-; RV64I-NEXT: tail __muldi3
+; RV64I-NEXT: slli a1, a0, 2
+; RV64I-NEXT: sub a0, a1, a0
+; RV64I-NEXT: slli a0, a0, 2
+; RV64I-NEXT: ret
;
; RV64IM-LABEL: muland_demand:
; RV64IM: # %bb.0:
@@ -2031,9 +2151,10 @@ define i64 @mulzext_demand(i32 signext %x) nounwind {
;
; RV64I-LABEL: mulzext_demand:
; RV64I: # %bb.0:
-; RV64I-NEXT: li a1, 3
-; RV64I-NEXT: slli a1, a1, 32
-; RV64I-NEXT: tail __muldi3
+; RV64I-NEXT: slli a1, a0, 2
+; RV64I-NEXT: subw a0, a1, a0
+; RV64I-NEXT: slli a0, a0, 32
+; RV64I-NEXT: ret
;
; RV64IM-LABEL: mulzext_demand:
; RV64IM: # %bb.0:
@@ -2050,8 +2171,16 @@ define i32 @mulfshl_demand(i32 signext %x) nounwind {
; RV32I-LABEL: mulfshl_demand:
; RV32I: # %bb.0:
; RV32I-NEXT: srli a0, a0, 11
-; RV32I-NEXT: lui a1, 92808
-; RV32I-NEXT: tail __mulsi3
+; RV32I-NEXT: slli a1, a0, 3
+; RV32I-NEXT: slli a2, a0, 8
+; RV32I-NEXT: add a1, a0, a1
+; RV32I-NEXT: slli a0, a0, 10
+; RV32I-NEXT: add a0, a2, a0
+; RV32I-NEXT: add a0, a1, a0
+; RV32I-NEXT: slli a1, a0, 3
+; RV32I-NEXT: add a0, a1, a0
+; RV32I-NEXT: slli a0, a0, 15
+; RV32I-NEXT: ret
;
; RV32IM-LABEL: mulfshl_demand:
; RV32IM: # %bb.0:
@@ -2062,13 +2191,16 @@ define i32 @mulfshl_demand(i32 signext %x) nounwind {
;
; RV64I-LABEL: mulfshl_demand:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: srliw a0, a0, 11
-; RV64I-NEXT: lui a1, 92808
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: slli a1, a0, 3
+; RV64I-NEXT: slli a2, a0, 8
+; RV64I-NEXT: add a1, a0, a1
+; RV64I-NEXT: slli a0, a0, 10
+; RV64I-NEXT: add a0, a2, a0
+; RV64I-NEXT: add a0, a1, a0
+; RV64I-NEXT: slli a1, a0, 3
+; RV64I-NEXT: add a0, a1, a0
+; RV64I-NEXT: slli a0, a0, 15
; RV64I-NEXT: ret
;
; RV64IM-LABEL: mulfshl_demand:
@@ -2085,8 +2217,16 @@ define i32 @mulfshl_demand(i32 signext %x) nounwind {
define i32 @mulor_demand(i32 signext %x, i32 signext %y) nounwind {
; RV32I-LABEL: mulor_demand:
; RV32I: # %bb.0:
-; RV32I-NEXT: lui a1, 92808
-; RV32I-NEXT: tail __mulsi3
+; RV32I-NEXT: slli a1, a0, 3
+; RV32I-NEXT: slli a2, a0, 8
+; RV32I-NEXT: add a1, a0, a1
+; RV32I-NEXT: slli a0, a0, 10
+; RV32I-NEXT: add a0, a2, a0
+; RV32I-NEXT: add a0, a1, a0
+; RV32I-NEXT: slli a1, a0, 3
+; RV32I-NEXT: add a0, a1, a0
+; RV32I-NEXT: slli a0, a0, 15
+; RV32I-NEXT: ret
;
; RV32IM-LABEL: mulor_demand:
; RV32IM: # %bb.0:
@@ -2096,12 +2236,15 @@ define i32 @mulor_demand(i32 signext %x, i32 signext %y) nounwind {
;
; RV64I-LABEL: mulor_demand:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: lui a1, 92808
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: slli a1, a0, 3
+; RV64I-NEXT: slli a2, a0, 8
+; RV64I-NEXT: add a1, a0, a1
+; RV64I-NEXT: slli a0, a0, 10
+; RV64I-NEXT: add a0, a2, a0
+; RV64I-NEXT: add a0, a1, a0
+; RV64I-NEXT: slli a1, a0, 3
+; RV64I-NEXT: add a0, a1, a0
+; RV64I-NEXT: slli a0, a0, 15
; RV64I-NEXT: ret
;
; RV64IM-LABEL: mulor_demand:
diff --git a/llvm/test/CodeGen/RISCV/rv64xtheadbb.ll b/llvm/test/CodeGen/RISCV/rv64xtheadbb.ll
index d9f7d36127293..10ef3357d4783 100644
--- a/llvm/test/CodeGen/RISCV/rv64xtheadbb.ll
+++ b/llvm/test/CodeGen/RISCV/rv64xtheadbb.ll
@@ -357,20 +357,33 @@ define signext i32 @cttz_i32(i32 signext %a) nounwind {
; RV64I: # %bb.0:
; RV64I-NEXT: beqz a0, .LBB6_2
; RV64I-NEXT: # %bb.1: # %cond.false
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: neg a1, a0
+; RV64I-NEXT: negw a1, a0
; RV64I-NEXT: and a0, a0, a1
-; RV64I-NEXT: lui a1, 30667
-; RV64I-NEXT: addiw a1, a1, 1329
-; RV64I-NEXT: call __muldi3
+; RV64I-NEXT: slli a1, a0, 6
+; RV64I-NEXT: slli a2, a0, 8
+; RV64I-NEXT: slli a3, a0, 10
+; RV64I-NEXT: slli a4, a0, 12
+; RV64I-NEXT: add a1, a1, a2
+; RV64I-NEXT: slli a2, a0, 16
+; RV64I-NEXT: subw a3, a3, a4
+; RV64I-NEXT: slli a4, a0, 18
+; RV64I-NEXT: subw a2, a2, a4
+; RV64I-NEXT: slli a4, a0, 4
+; RV64I-NEXT: subw a4, a0, a4
+; RV64I-NEXT: add a1, a4, a1
+; RV64I-NEXT: slli a4, a0, 14
+; RV64I-NEXT: subw a3, a3, a4
+; RV64I-NEXT: slli a4, a0, 23
+; RV64I-NEXT: subw a2, a2, a4
+; RV64I-NEXT: slli a0, a0, 27
+; RV64I-NEXT: add a1, a1, a3
+; RV64I-NEXT: add a0, a2, a0
+; RV64I-NEXT: add a0, a1, a0
; RV64I-NEXT: srliw a0, a0, 27
; RV64I-NEXT: lui a1, %hi(.LCPI6_0)
; RV64I-NEXT: addi a1, a1, %lo(.LCPI6_0)
; RV64I-NEXT: add a0, a1, a0
; RV64I-NEXT: lbu a0, 0(a0)
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
; RV64I-NEXT: .LBB6_2:
; RV64I-NEXT: li a0, 32
@@ -397,20 +410,33 @@ define signext i32 @cttz_i32(i32 signext %a) nounwind {
define signext i32 @cttz_zero_undef_i32(i32 signext %a) nounwind {
; RV64I-LABEL: cttz_zero_undef_i32:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: neg a1, a0
+; RV64I-NEXT: negw a1, a0
; RV64I-NEXT: and a0, a0, a1
-; RV64I-NEXT: lui a1, 30667
-; RV64I-NEXT: addiw a1, a1, 1329
-; RV64I-NEXT: call __muldi3
+; RV64I-NEXT: slli a1, a0, 6
+; RV64I-NEXT: slli a2, a0, 8
+; RV64I-NEXT: slli a3, a0, 10
+; RV64I-NEXT: slli a4, a0, 12
+; RV64I-NEXT: add a1, a1, a2
+; RV64I-NEXT: slli a2, a0, 16
+; RV64I-NEXT: subw a3, a3, a4
+; RV64I-NEXT: slli a4, a0, 18
+; RV64I-NEXT: subw a2, a2, a4
+; RV64I-NEXT: slli a4, a0, 4
+; RV64I-NEXT: subw a4, a0, a4
+; RV64I-NEXT: add a1, a4, a1
+; RV64I-NEXT: slli a4, a0, 14
+; RV64I-NEXT: subw a3, a3, a4
+; RV64I-NEXT: slli a4, a0, 23
+; RV64I-NEXT: subw a2, a2, a4
+; RV64I-NEXT: slli a0, a0, 27
+; RV64I-NEXT: add a1, a1, a3
+; RV64I-NEXT: add a0, a2, a0
+; RV64I-NEXT: add a0, a1, a0
; RV64I-NEXT: srliw a0, a0, 27
; RV64I-NEXT: lui a1, %hi(.LCPI7_0)
; RV64I-NEXT: addi a1, a1, %lo(.LCPI7_0)
; RV64I-NEXT: add a0, a1, a0
; RV64I-NEXT: lbu a0, 0(a0)
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
; RV64XTHEADBB-LABEL: cttz_zero_undef_i32:
@@ -429,26 +455,36 @@ define signext i32 @cttz_zero_undef_i32(i32 signext %a) nounwind {
define signext i32 @findFirstSet_i32(i32 signext %a) nounwind {
; RV64I-LABEL: findFirstSet_i32:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
-; RV64I-NEXT: mv s0, a0
-; RV64I-NEXT: neg a0, a0
-; RV64I-NEXT: and a0, s0, a0
-; RV64I-NEXT: lui a1, 30667
-; RV64I-NEXT: addiw a1, a1, 1329
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: srliw a0, a0, 27
-; RV64I-NEXT: lui a1, %hi(.LCPI8_0)
-; RV64I-NEXT: addi a1, a1, %lo(.LCPI8_0)
-; RV64I-NEXT: add a0, a1, a0
-; RV64I-NEXT: lbu a0, 0(a0)
-; RV64I-NEXT: snez a1, s0
-; RV64I-NEXT: addi a1, a1, -1
-; RV64I-NEXT: or a0, a1, a0
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: negw a1, a0
+; RV64I-NEXT: and a1, a0, a1
+; RV64I-NEXT: slli a2, a1, 6
+; RV64I-NEXT: slli a3, a1, 8
+; RV64I-NEXT: slli a4, a1, 10
+; RV64I-NEXT: slli a5, a1, 12
+; RV64I-NEXT: add a2, a2, a3
+; RV64I-NEXT: slli a3, a1, 16
+; RV64I-NEXT: subw a4, a4, a5
+; RV64I-NEXT: slli a5, a1, 18
+; RV64I-NEXT: subw a3, a3, a5
+; RV64I-NEXT: slli a5, a1, 4
+; RV64I-NEXT: subw a5, a1, a5
+; RV64I-NEXT: add a2, a5, a2
+; RV64I-NEXT: slli a5, a1, 14
+; RV64I-NEXT: subw a4, a4, a5
+; RV64I-NEXT: slli a5, a1, 23
+; RV64I-NEXT: subw a3, a3, a5
+; RV64I-NEXT: slli a1, a1, 27
+; RV64I-NEXT: add a2, a2, a4
+; RV64I-NEXT: add a1, a3, a1
+; RV64I-NEXT: add a1, a2, a1
+; RV64I-NEXT: srliw a1, a1, 27
+; RV64I-NEXT: lui a2, %hi(.LCPI8_0)
+; RV64I-NEXT: addi a2, a2, %lo(.LCPI8_0)
+; RV64I-NEXT: add a1, a2, a1
+; RV64I-NEXT: lbu a1, 0(a1)
+; RV64I-NEXT: snez a0, a0
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: or a0, a0, a1
; RV64I-NEXT: ret
;
; RV64XTHEADBB-LABEL: findFirstSet_i32:
@@ -472,27 +508,37 @@ define signext i32 @findFirstSet_i32(i32 signext %a) nounwind {
define signext i32 @ffs_i32(i32 signext %a) nounwind {
; RV64I-LABEL: ffs_i32:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
-; RV64I-NEXT: mv s0, a0
-; RV64I-NEXT: neg a0, a0
-; RV64I-NEXT: and a0, s0, a0
-; RV64I-NEXT: lui a1, 30667
-; RV64I-NEXT: addiw a1, a1, 1329
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: srliw a0, a0, 27
-; RV64I-NEXT: lui a1, %hi(.LCPI9_0)
-; RV64I-NEXT: addi a1, a1, %lo(.LCPI9_0)
-; RV64I-NEXT: add a0, a1, a0
-; RV64I-NEXT: lbu a0, 0(a0)
-; RV64I-NEXT: seqz a1, s0
-; RV64I-NEXT: addi a0, a0, 1
-; RV64I-NEXT: addi a1, a1, -1
-; RV64I-NEXT: and a0, a1, a0
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: negw a1, a0
+; RV64I-NEXT: and a1, a0, a1
+; RV64I-NEXT: slli a2, a1, 6
+; RV64I-NEXT: slli a3, a1, 8
+; RV64I-NEXT: slli a4, a1, 10
+; RV64I-NEXT: slli a5, a1, 12
+; RV64I-NEXT: add a2, a2, a3
+; RV64I-NEXT: slli a3, a1, 16
+; RV64I-NEXT: subw a4, a4, a5
+; RV64I-NEXT: slli a5, a1, 18
+; RV64I-NEXT: subw a3, a3, a5
+; RV64I-NEXT: slli a5, a1, 4
+; RV64I-NEXT: subw a5, a1, a5
+; RV64I-NEXT: add a2, a5, a2
+; RV64I-NEXT: slli a5, a1, 14
+; RV64I-NEXT: subw a4, a4, a5
+; RV64I-NEXT: slli a5, a1, 23
+; RV64I-NEXT: subw a3, a3, a5
+; RV64I-NEXT: add a2, a2, a4
+; RV64I-NEXT: lui a4, %hi(.LCPI9_0)
+; RV64I-NEXT: addi a4, a4, %lo(.LCPI9_0)
+; RV64I-NEXT: slli a1, a1, 27
+; RV64I-NEXT: add a1, a3, a1
+; RV64I-NEXT: add a1, a2, a1
+; RV64I-NEXT: srliw a1, a1, 27
+; RV64I-NEXT: add a1, a4, a1
+; RV64I-NEXT: lbu a1, 0(a1)
+; RV64I-NEXT: seqz a0, a0
+; RV64I-NEXT: addi a1, a1, 1
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: and a0, a0, a1
; RV64I-NEXT: ret
;
; RV64XTHEADBB-LABEL: ffs_i32:
diff --git a/llvm/test/CodeGen/RISCV/rv64zbb.ll b/llvm/test/CodeGen/RISCV/rv64zbb.ll
index 17eb0817d548a..3cd1931b6ae4c 100644
--- a/llvm/test/CodeGen/RISCV/rv64zbb.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zbb.ll
@@ -347,20 +347,33 @@ define signext i32 @cttz_i32(i32 signext %a) nounwind {
; RV64I: # %bb.0:
; RV64I-NEXT: beqz a0, .LBB6_2
; RV64I-NEXT: # %bb.1: # %cond.false
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: neg a1, a0
+; RV64I-NEXT: negw a1, a0
; RV64I-NEXT: and a0, a0, a1
-; RV64I-NEXT: lui a1, 30667
-; RV64I-NEXT: addiw a1, a1, 1329
-; RV64I-NEXT: call __muldi3
+; RV64I-NEXT: slli a1, a0, 6
+; RV64I-NEXT: slli a2, a0, 8
+; RV64I-NEXT: slli a3, a0, 10
+; RV64I-NEXT: slli a4, a0, 12
+; RV64I-NEXT: add a1, a1, a2
+; RV64I-NEXT: slli a2, a0, 16
+; RV64I-NEXT: subw a3, a3, a4
+; RV64I-NEXT: slli a4, a0, 18
+; RV64I-NEXT: subw a2, a2, a4
+; RV64I-NEXT: slli a4, a0, 4
+; RV64I-NEXT: subw a4, a0, a4
+; RV64I-NEXT: add a1, a4, a1
+; RV64I-NEXT: slli a4, a0, 14
+; RV64I-NEXT: subw a3, a3, a4
+; RV64I-NEXT: slli a4, a0, 23
+; RV64I-NEXT: subw a2, a2, a4
+; RV64I-NEXT: slli a0, a0, 27
+; RV64I-NEXT: add a1, a1, a3
+; RV64I-NEXT: add a0, a2, a0
+; RV64I-NEXT: add a0, a1, a0
; RV64I-NEXT: srliw a0, a0, 27
; RV64I-NEXT: lui a1, %hi(.LCPI6_0)
; RV64I-NEXT: addi a1, a1, %lo(.LCPI6_0)
; RV64I-NEXT: add a0, a1, a0
; RV64I-NEXT: lbu a0, 0(a0)
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
; RV64I-NEXT: .LBB6_2:
; RV64I-NEXT: li a0, 32
@@ -377,20 +390,33 @@ define signext i32 @cttz_i32(i32 signext %a) nounwind {
define signext i32 @cttz_zero_undef_i32(i32 signext %a) nounwind {
; RV64I-LABEL: cttz_zero_undef_i32:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: neg a1, a0
+; RV64I-NEXT: negw a1, a0
; RV64I-NEXT: and a0, a0, a1
-; RV64I-NEXT: lui a1, 30667
-; RV64I-NEXT: addiw a1, a1, 1329
-; RV64I-NEXT: call __muldi3
+; RV64I-NEXT: slli a1, a0, 6
+; RV64I-NEXT: slli a2, a0, 8
+; RV64I-NEXT: slli a3, a0, 10
+; RV64I-NEXT: slli a4, a0, 12
+; RV64I-NEXT: add a1, a1, a2
+; RV64I-NEXT: slli a2, a0, 16
+; RV64I-NEXT: subw a3, a3, a4
+; RV64I-NEXT: slli a4, a0, 18
+; RV64I-NEXT: subw a2, a2, a4
+; RV64I-NEXT: slli a4, a0, 4
+; RV64I-NEXT: subw a4, a0, a4
+; RV64I-NEXT: add a1, a4, a1
+; RV64I-NEXT: slli a4, a0, 14
+; RV64I-NEXT: subw a3, a3, a4
+; RV64I-NEXT: slli a4, a0, 23
+; RV64I-NEXT: subw a2, a2, a4
+; RV64I-NEXT: slli a0, a0, 27
+; RV64I-NEXT: add a1, a1, a3
+; RV64I-NEXT: add a0, a2, a0
+; RV64I-NEXT: add a0, a1, a0
; RV64I-NEXT: srliw a0, a0, 27
; RV64I-NEXT: lui a1, %hi(.LCPI7_0)
; RV64I-NEXT: addi a1, a1, %lo(.LCPI7_0)
; RV64I-NEXT: add a0, a1, a0
; RV64I-NEXT: lbu a0, 0(a0)
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
; RV64ZBB-LABEL: cttz_zero_undef_i32:
@@ -404,26 +430,36 @@ define signext i32 @cttz_zero_undef_i32(i32 signext %a) nounwind {
define signext i32 @findFirstSet_i32(i32 signext %a) nounwind {
; RV64I-LABEL: findFirstSet_i32:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
-; RV64I-NEXT: mv s0, a0
-; RV64I-NEXT: neg a0, a0
-; RV64I-NEXT: and a0, s0, a0
-; RV64I-NEXT: lui a1, 30667
-; RV64I-NEXT: addiw a1, a1, 1329
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: srliw a0, a0, 27
-; RV64I-NEXT: lui a1, %hi(.LCPI8_0)
-; RV64I-NEXT: addi a1, a1, %lo(.LCPI8_0)
-; RV64I-NEXT: add a0, a1, a0
-; RV64I-NEXT: lbu a0, 0(a0)
-; RV64I-NEXT: snez a1, s0
-; RV64I-NEXT: addi a1, a1, -1
-; RV64I-NEXT: or a0, a1, a0
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: negw a1, a0
+; RV64I-NEXT: and a1, a0, a1
+; RV64I-NEXT: slli a2, a1, 6
+; RV64I-NEXT: slli a3, a1, 8
+; RV64I-NEXT: slli a4, a1, 10
+; RV64I-NEXT: slli a5, a1, 12
+; RV64I-NEXT: add a2, a2, a3
+; RV64I-NEXT: slli a3, a1, 16
+; RV64I-NEXT: subw a4, a4, a5
+; RV64I-NEXT: slli a5, a1, 18
+; RV64I-NEXT: subw a3, a3, a5
+; RV64I-NEXT: slli a5, a1, 4
+; RV64I-NEXT: subw a5, a1, a5
+; RV64I-NEXT: add a2, a5, a2
+; RV64I-NEXT: slli a5, a1, 14
+; RV64I-NEXT: subw a4, a4, a5
+; RV64I-NEXT: slli a5, a1, 23
+; RV64I-NEXT: subw a3, a3, a5
+; RV64I-NEXT: slli a1, a1, 27
+; RV64I-NEXT: add a2, a2, a4
+; RV64I-NEXT: add a1, a3, a1
+; RV64I-NEXT: add a1, a2, a1
+; RV64I-NEXT: srliw a1, a1, 27
+; RV64I-NEXT: lui a2, %hi(.LCPI8_0)
+; RV64I-NEXT: addi a2, a2, %lo(.LCPI8_0)
+; RV64I-NEXT: add a1, a2, a1
+; RV64I-NEXT: lbu a1, 0(a1)
+; RV64I-NEXT: snez a0, a0
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: or a0, a0, a1
; RV64I-NEXT: ret
;
; RV64ZBB-LABEL: findFirstSet_i32:
@@ -442,27 +478,37 @@ define signext i32 @findFirstSet_i32(i32 signext %a) nounwind {
define signext i32 @ffs_i32(i32 signext %a) nounwind {
; RV64I-LABEL: ffs_i32:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
-; RV64I-NEXT: mv s0, a0
-; RV64I-NEXT: neg a0, a0
-; RV64I-NEXT: and a0, s0, a0
-; RV64I-NEXT: lui a1, 30667
-; RV64I-NEXT: addiw a1, a1, 1329
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: srliw a0, a0, 27
-; RV64I-NEXT: lui a1, %hi(.LCPI9_0)
-; RV64I-NEXT: addi a1, a1, %lo(.LCPI9_0)
-; RV64I-NEXT: add a0, a1, a0
-; RV64I-NEXT: lbu a0, 0(a0)
-; RV64I-NEXT: seqz a1, s0
-; RV64I-NEXT: addi a0, a0, 1
-; RV64I-NEXT: addi a1, a1, -1
-; RV64I-NEXT: and a0, a1, a0
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: negw a1, a0
+; RV64I-NEXT: and a1, a0, a1
+; RV64I-NEXT: slli a2, a1, 6
+; RV64I-NEXT: slli a3, a1, 8
+; RV64I-NEXT: slli a4, a1, 10
+; RV64I-NEXT: slli a5, a1, 12
+; RV64I-NEXT: add a2, a2, a3
+; RV64I-NEXT: slli a3, a1, 16
+; RV64I-NEXT: subw a4, a4, a5
+; RV64I-NEXT: slli a5, a1, 18
+; RV64I-NEXT: subw a3, a3, a5
+; RV64I-NEXT: slli a5, a1, 4
+; RV64I-NEXT: subw a5, a1, a5
+; RV64I-NEXT: add a2, a5, a2
+; RV64I-NEXT: slli a5, a1, 14
+; RV64I-NEXT: subw a4, a4, a5
+; RV64I-NEXT: slli a5, a1, 23
+; RV64I-NEXT: subw a3, a3, a5
+; RV64I-NEXT: add a2, a2, a4
+; RV64I-NEXT: lui a4, %hi(.LCPI9_0)
+; RV64I-NEXT: addi a4, a4, %lo(.LCPI9_0)
+; RV64I-NEXT: slli a1, a1, 27
+; RV64I-NEXT: add a1, a3, a1
+; RV64I-NEXT: add a1, a2, a1
+; RV64I-NEXT: srliw a1, a1, 27
+; RV64I-NEXT: add a1, a4, a1
+; RV64I-NEXT: lbu a1, 0(a1)
+; RV64I-NEXT: seqz a0, a0
+; RV64I-NEXT: addi a1, a1, 1
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: and a0, a0, a1
; RV64I-NEXT: ret
;
; RV64ZBB-LABEL: ffs_i32:
diff --git a/llvm/test/CodeGen/RISCV/rvv/known-never-zero.ll b/llvm/test/CodeGen/RISCV/rvv/known-never-zero.ll
index 7990c1c1eabc2..4d9a6aeaad2ef 100644
--- a/llvm/test/CodeGen/RISCV/rvv/known-never-zero.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/known-never-zero.ll
@@ -9,26 +9,35 @@
define i32 @vscale_known_nonzero() {
; CHECK-LABEL: vscale_known_nonzero:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi sp, sp, -16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; CHECK-NEXT: .cfi_offset ra, -8
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: srli a0, a0, 3
-; CHECK-NEXT: neg a1, a0
+; CHECK-NEXT: negw a1, a0
; CHECK-NEXT: and a0, a0, a1
-; CHECK-NEXT: lui a1, 30667
-; CHECK-NEXT: addiw a1, a1, 1329
-; CHECK-NEXT: call __muldi3
+; CHECK-NEXT: slli a1, a0, 6
+; CHECK-NEXT: slli a2, a0, 8
+; CHECK-NEXT: slli a3, a0, 10
+; CHECK-NEXT: slli a4, a0, 12
+; CHECK-NEXT: add a1, a1, a2
+; CHECK-NEXT: slli a2, a0, 16
+; CHECK-NEXT: subw a3, a3, a4
+; CHECK-NEXT: slli a4, a0, 18
+; CHECK-NEXT: subw a2, a2, a4
+; CHECK-NEXT: slli a4, a0, 4
+; CHECK-NEXT: subw a4, a0, a4
+; CHECK-NEXT: add a1, a4, a1
+; CHECK-NEXT: slli a4, a0, 14
+; CHECK-NEXT: subw a3, a3, a4
+; CHECK-NEXT: slli a4, a0, 23
+; CHECK-NEXT: subw a2, a2, a4
+; CHECK-NEXT: slli a0, a0, 27
+; CHECK-NEXT: add a1, a1, a3
+; CHECK-NEXT: add a0, a2, a0
+; CHECK-NEXT: add a0, a1, a0
; CHECK-NEXT: srliw a0, a0, 27
; CHECK-NEXT: lui a1, %hi(.LCPI0_0)
; CHECK-NEXT: addi a1, a1, %lo(.LCPI0_0)
; CHECK-NEXT: add a0, a1, a0
; CHECK-NEXT: lbu a0, 0(a0)
-; CHECK-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; CHECK-NEXT: .cfi_restore ra
-; CHECK-NEXT: addi sp, sp, 16
-; CHECK-NEXT: .cfi_def_cfa_offset 0
; CHECK-NEXT: ret
%x = call i32 @llvm.vscale()
%r = call i32 @llvm.cttz.i32(i32 %x, i1 false)
diff --git a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
index c6503813aeed2..ece92035efbae 100644
--- a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
@@ -9,40 +9,70 @@
define i1 @test_srem_odd(i29 %X) nounwind {
; RV32-LABEL: test_srem_odd:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32-NEXT: lui a1, 128424
-; RV32-NEXT: addi a1, a1, 331
-; RV32-NEXT: call __mulsi3
-; RV32-NEXT: lui a1, 662
-; RV32-NEXT: addi a1, a1, -83
-; RV32-NEXT: add a0, a0, a1
-; RV32-NEXT: lui a1, 1324
+; RV32-NEXT: slli a1, a0, 8
+; RV32-NEXT: slli a2, a0, 10
+; RV32-NEXT: slli a3, a0, 12
+; RV32-NEXT: slli a4, a0, 14
+; RV32-NEXT: sub a1, a1, a2
+; RV32-NEXT: slli a2, a0, 18
+; RV32-NEXT: add a3, a3, a4
+; RV32-NEXT: slli a4, a0, 22
+; RV32-NEXT: add a2, a2, a4
+; RV32-NEXT: slli a4, a0, 4
+; RV32-NEXT: sub a4, a4, a0
+; RV32-NEXT: add a1, a4, a1
+; RV32-NEXT: slli a4, a0, 16
+; RV32-NEXT: add a3, a3, a4
+; RV32-NEXT: slli a4, a0, 25
+; RV32-NEXT: sub a2, a2, a4
+; RV32-NEXT: slli a0, a0, 27
+; RV32-NEXT: add a1, a1, a3
+; RV32-NEXT: lui a3, 662
+; RV32-NEXT: add a0, a2, a0
+; RV32-NEXT: lui a2, 1324
+; RV32-NEXT: addi a3, a3, -83
+; RV32-NEXT: add a0, a1, a0
+; RV32-NEXT: slli a1, a0, 2
+; RV32-NEXT: add a0, a0, a3
+; RV32-NEXT: add a0, a1, a0
; RV32-NEXT: slli a0, a0, 3
; RV32-NEXT: srli a0, a0, 3
-; RV32-NEXT: addi a1, a1, -165
+; RV32-NEXT: addi a1, a2, -165
; RV32-NEXT: sltu a0, a0, a1
-; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: ret
;
; RV64-LABEL: test_srem_odd:
; RV64: # %bb.0:
-; RV64-NEXT: addi sp, sp, -16
-; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64-NEXT: lui a1, 128424
-; RV64-NEXT: addiw a1, a1, 331
-; RV64-NEXT: call __muldi3
-; RV64-NEXT: lui a1, 662
-; RV64-NEXT: addi a1, a1, -83
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: lui a1, 1324
+; RV64-NEXT: slli a1, a0, 8
+; RV64-NEXT: slli a2, a0, 10
+; RV64-NEXT: slli a3, a0, 12
+; RV64-NEXT: slli a4, a0, 14
+; RV64-NEXT: subw a1, a1, a2
+; RV64-NEXT: slli a2, a0, 18
+; RV64-NEXT: add a3, a3, a4
+; RV64-NEXT: slli a4, a0, 22
+; RV64-NEXT: add a2, a2, a4
+; RV64-NEXT: slli a4, a0, 4
+; RV64-NEXT: subw a4, a4, a0
+; RV64-NEXT: add a1, a4, a1
+; RV64-NEXT: slli a4, a0, 16
+; RV64-NEXT: add a3, a3, a4
+; RV64-NEXT: slli a4, a0, 25
+; RV64-NEXT: subw a2, a2, a4
+; RV64-NEXT: slli a0, a0, 27
+; RV64-NEXT: add a1, a1, a3
+; RV64-NEXT: lui a3, 662
+; RV64-NEXT: add a0, a2, a0
+; RV64-NEXT: lui a2, 1324
+; RV64-NEXT: addi a3, a3, -83
+; RV64-NEXT: add a0, a1, a0
+; RV64-NEXT: slli a1, a0, 2
+; RV64-NEXT: add a0, a0, a3
+; RV64-NEXT: add a0, a1, a0
; RV64-NEXT: slli a0, a0, 35
; RV64-NEXT: srli a0, a0, 35
-; RV64-NEXT: addiw a1, a1, -165
+; RV64-NEXT: addiw a1, a2, -165
; RV64-NEXT: sltu a0, a0, a1
-; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64-NEXT: addi sp, sp, 16
; RV64-NEXT: ret
;
; RV32M-LABEL: test_srem_odd:
@@ -382,65 +412,122 @@ define void @test_srem_vec(ptr %X) nounwind {
; RV64-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; RV64-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; RV64-NEXT: sd s4, 0(sp) # 8-byte Folded Spill
; RV64-NEXT: mv s0, a0
; RV64-NEXT: lbu a0, 12(a0)
-; RV64-NEXT: ld a1, 0(s0)
-; RV64-NEXT: lwu a2, 8(s0)
+; RV64-NEXT: ld s3, 0(s0)
+; RV64-NEXT: lwu a1, 8(s0)
; RV64-NEXT: slli a0, a0, 32
-; RV64-NEXT: srli a3, a1, 2
-; RV64-NEXT: or a0, a2, a0
-; RV64-NEXT: slli a2, a2, 62
-; RV64-NEXT: slli a1, a1, 31
-; RV64-NEXT: or a2, a2, a3
-; RV64-NEXT: slli s1, a0, 29
-; RV64-NEXT: srai a0, a2, 31
-; RV64-NEXT: srai s1, s1, 31
-; RV64-NEXT: srai s2, a1, 31
+; RV64-NEXT: srli a2, s3, 2
+; RV64-NEXT: or a0, a1, a0
+; RV64-NEXT: slli a1, a1, 62
+; RV64-NEXT: or a1, a1, a2
+; RV64-NEXT: slli a2, s3, 31
+; RV64-NEXT: slli a3, a0, 29
+; RV64-NEXT: srai a0, a1, 31
+; RV64-NEXT: srai s2, a3, 31
+; RV64-NEXT: srai s4, a2, 31
; RV64-NEXT: li a1, 7
; RV64-NEXT: call __moddi3
-; RV64-NEXT: mv s3, a0
-; RV64-NEXT: li a1, -5
-; RV64-NEXT: mv a0, s1
-; RV64-NEXT: call __moddi3
; RV64-NEXT: mv s1, a0
-; RV64-NEXT: lui a0, 699051
-; RV64-NEXT: addiw a1, a0, -1365
-; RV64-NEXT: slli a0, a1, 32
-; RV64-NEXT: add a1, a1, a0
+; RV64-NEXT: li a1, -5
; RV64-NEXT: mv a0, s2
-; RV64-NEXT: call __muldi3
-; RV64-NEXT: lui a1, %hi(.LCPI3_0)
-; RV64-NEXT: addi s1, s1, -2
-; RV64-NEXT: addi s3, s3, -1
-; RV64-NEXT: ld a1, %lo(.LCPI3_0)(a1)
-; RV64-NEXT: seqz a2, s1
-; RV64-NEXT: seqz a3, s3
-; RV64-NEXT: addi a3, a3, -1
-; RV64-NEXT: addi a2, a2, -1
-; RV64-NEXT: slli a4, a2, 2
-; RV64-NEXT: slli a5, a3, 31
-; RV64-NEXT: srli a5, a5, 62
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: or a4, a5, a4
-; RV64-NEXT: slli a5, a0, 63
-; RV64-NEXT: srli a0, a0, 1
-; RV64-NEXT: or a0, a0, a5
-; RV64-NEXT: slli a2, a2, 29
-; RV64-NEXT: slli a3, a3, 33
-; RV64-NEXT: srli a2, a2, 61
-; RV64-NEXT: sltu a0, a1, a0
-; RV64-NEXT: neg a0, a0
-; RV64-NEXT: slli a0, a0, 31
-; RV64-NEXT: srli a0, a0, 31
-; RV64-NEXT: or a0, a0, a3
-; RV64-NEXT: sd a0, 0(s0)
+; RV64-NEXT: call __moddi3
+; RV64-NEXT: slli a1, s4, 4
+; RV64-NEXT: slli a2, s4, 6
+; RV64-NEXT: slli a3, s4, 8
+; RV64-NEXT: slli a4, s4, 10
+; RV64-NEXT: slli a5, s4, 14
+; RV64-NEXT: slli a6, s4, 16
+; RV64-NEXT: slli a7, s4, 22
+; RV64-NEXT: add a1, a1, a2
+; RV64-NEXT: slli a2, s4, 24
+; RV64-NEXT: add a3, a3, a4
+; RV64-NEXT: slli a4, s3, 32
+; RV64-NEXT: add a5, a5, a6
+; RV64-NEXT: slli a6, s3, 34
+; RV64-NEXT: add a2, a7, a2
+; RV64-NEXT: slli a7, s3, 48
+; RV64-NEXT: add a4, a4, a6
+; RV64-NEXT: slli a6, s3, 50
+; RV64-NEXT: add a6, a7, a6
+; RV64-NEXT: slli a7, s4, 2
+; RV64-NEXT: add a7, s4, a7
+; RV64-NEXT: add a1, a7, a1
+; RV64-NEXT: slli a7, s4, 12
+; RV64-NEXT: add a3, a3, a7
+; RV64-NEXT: slli a7, s4, 18
+; RV64-NEXT: add a5, a5, a7
+; RV64-NEXT: slli a7, s4, 26
+; RV64-NEXT: add a2, a2, a7
+; RV64-NEXT: slli a7, s3, 36
+; RV64-NEXT: add a4, a4, a7
+; RV64-NEXT: slli a7, s3, 52
+; RV64-NEXT: add a6, a6, a7
+; RV64-NEXT: add a1, a1, a3
+; RV64-NEXT: slli a3, s4, 20
+; RV64-NEXT: add a3, a5, a3
+; RV64-NEXT: slli a5, s4, 28
+; RV64-NEXT: add a2, a2, a5
+; RV64-NEXT: slli a5, s3, 38
+; RV64-NEXT: add a4, a4, a5
+; RV64-NEXT: slli a5, s3, 54
+; RV64-NEXT: add a5, a6, a5
+; RV64-NEXT: add a1, a1, a3
+; RV64-NEXT: slli s4, s4, 30
+; RV64-NEXT: add a2, a2, s4
+; RV64-NEXT: slli a3, s3, 40
+; RV64-NEXT: add a3, a4, a3
+; RV64-NEXT: slli a4, s3, 56
+; RV64-NEXT: add a4, a5, a4
+; RV64-NEXT: slli a5, s3, 42
+; RV64-NEXT: add a1, a1, a2
+; RV64-NEXT: slli a2, s3, 58
+; RV64-NEXT: addi a0, a0, -2
+; RV64-NEXT: addi s1, s1, -1
+; RV64-NEXT: seqz a0, a0
+; RV64-NEXT: seqz a6, s1
+; RV64-NEXT: addi a6, a6, -1
+; RV64-NEXT: addi a0, a0, -1
+; RV64-NEXT: add a3, a3, a5
+; RV64-NEXT: slli a5, a0, 2
+; RV64-NEXT: add a2, a4, a2
+; RV64-NEXT: slli a4, a6, 31
+; RV64-NEXT: srli a4, a4, 62
+; RV64-NEXT: or a4, a4, a5
+; RV64-NEXT: slli a5, s3, 44
+; RV64-NEXT: add a3, a3, a5
+; RV64-NEXT: slli a5, s3, 60
+; RV64-NEXT: add a2, a2, a5
+; RV64-NEXT: slli a5, s3, 46
+; RV64-NEXT: add a3, a3, a5
+; RV64-NEXT: slli s3, s3, 62
+; RV64-NEXT: add a2, a2, s3
+; RV64-NEXT: lui a5, %hi(.LCPI3_0)
+; RV64-NEXT: ld a5, %lo(.LCPI3_0)(a5)
+; RV64-NEXT: slli a0, a0, 29
+; RV64-NEXT: slli a6, a6, 33
+; RV64-NEXT: srli a0, a0, 61
+; RV64-NEXT: add a1, a1, a3
+; RV64-NEXT: sub a2, a5, a2
+; RV64-NEXT: sub a2, a2, a1
+; RV64-NEXT: slli a1, a2, 63
+; RV64-NEXT: srli a2, a2, 1
+; RV64-NEXT: or a1, a2, a1
+; RV64-NEXT: sltu a1, a5, a1
+; RV64-NEXT: neg a1, a1
+; RV64-NEXT: slli a1, a1, 31
+; RV64-NEXT: srli a1, a1, 31
+; RV64-NEXT: or a1, a1, a6
+; RV64-NEXT: sd a1, 0(s0)
; RV64-NEXT: sw a4, 8(s0)
-; RV64-NEXT: sb a2, 12(s0)
+; RV64-NEXT: sb a0, 12(s0)
; RV64-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
; RV64-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
; RV64-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; RV64-NEXT: ld s4, 0(sp) # 8-byte Folded Reload
; RV64-NEXT: addi sp, sp, 48
; RV64-NEXT: ret
;
diff --git a/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll
index c73a18c8869d5..d4abb7f294cf9 100644
--- a/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll
@@ -9,30 +9,40 @@
define i1 @test_urem_odd(i13 %X) nounwind {
; RV32-LABEL: test_urem_odd:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32-NEXT: lui a1, 1
-; RV32-NEXT: addi a1, a1, -819
-; RV32-NEXT: call __mulsi3
+; RV32-NEXT: slli a1, a0, 4
+; RV32-NEXT: slli a2, a0, 6
+; RV32-NEXT: slli a3, a0, 8
+; RV32-NEXT: sub a1, a1, a2
+; RV32-NEXT: slli a2, a0, 10
+; RV32-NEXT: sub a3, a3, a2
+; RV32-NEXT: slli a2, a0, 2
+; RV32-NEXT: sub a2, a0, a2
+; RV32-NEXT: slli a0, a0, 12
+; RV32-NEXT: add a1, a2, a1
+; RV32-NEXT: add a0, a3, a0
+; RV32-NEXT: add a0, a1, a0
; RV32-NEXT: slli a0, a0, 19
; RV32-NEXT: srli a0, a0, 19
; RV32-NEXT: sltiu a0, a0, 1639
-; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: ret
;
; RV64-LABEL: test_urem_odd:
; RV64: # %bb.0:
-; RV64-NEXT: addi sp, sp, -16
-; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64-NEXT: lui a1, 1
-; RV64-NEXT: addiw a1, a1, -819
-; RV64-NEXT: call __muldi3
+; RV64-NEXT: slli a1, a0, 4
+; RV64-NEXT: slli a2, a0, 6
+; RV64-NEXT: slli a3, a0, 8
+; RV64-NEXT: subw a1, a1, a2
+; RV64-NEXT: slli a2, a0, 10
+; RV64-NEXT: subw a3, a3, a2
+; RV64-NEXT: slli a2, a0, 2
+; RV64-NEXT: subw a2, a0, a2
+; RV64-NEXT: slli a0, a0, 12
+; RV64-NEXT: add a1, a2, a1
+; RV64-NEXT: add a0, a3, a0
+; RV64-NEXT: add a0, a1, a0
; RV64-NEXT: slli a0, a0, 51
; RV64-NEXT: srli a0, a0, 51
; RV64-NEXT: sltiu a0, a0, 1639
-; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64-NEXT: addi sp, sp, 16
; RV64-NEXT: ret
;
; RV32M-LABEL: test_urem_odd:
@@ -82,42 +92,64 @@ define i1 @test_urem_odd(i13 %X) nounwind {
define i1 @test_urem_even(i27 %X) nounwind {
; RV32-LABEL: test_urem_even:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32-NEXT: lui a1, 28087
-; RV32-NEXT: addi a1, a1, -585
-; RV32-NEXT: call __mulsi3
+; RV32-NEXT: slli a1, a0, 6
+; RV32-NEXT: slli a2, a0, 9
+; RV32-NEXT: slli a3, a0, 12
+; RV32-NEXT: slli a4, a0, 15
+; RV32-NEXT: add a1, a1, a2
+; RV32-NEXT: slli a2, a0, 21
+; RV32-NEXT: add a3, a3, a4
+; RV32-NEXT: slli a4, a0, 24
+; RV32-NEXT: add a2, a2, a4
+; RV32-NEXT: slli a4, a0, 3
+; RV32-NEXT: add a4, a0, a4
+; RV32-NEXT: add a1, a4, a1
+; RV32-NEXT: slli a4, a0, 18
+; RV32-NEXT: add a3, a3, a4
+; RV32-NEXT: slli a0, a0, 27
+; RV32-NEXT: sub a0, a0, a2
+; RV32-NEXT: lui a2, 2341
+; RV32-NEXT: add a1, a1, a3
+; RV32-NEXT: sub a0, a0, a1
; RV32-NEXT: slli a1, a0, 26
; RV32-NEXT: slli a0, a0, 5
; RV32-NEXT: srli a0, a0, 6
; RV32-NEXT: or a0, a0, a1
-; RV32-NEXT: lui a1, 2341
; RV32-NEXT: slli a0, a0, 5
; RV32-NEXT: srli a0, a0, 5
-; RV32-NEXT: addi a1, a1, -1755
+; RV32-NEXT: addi a1, a2, -1755
; RV32-NEXT: sltu a0, a0, a1
-; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: ret
;
; RV64-LABEL: test_urem_even:
; RV64: # %bb.0:
-; RV64-NEXT: addi sp, sp, -16
-; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64-NEXT: lui a1, 28087
-; RV64-NEXT: addiw a1, a1, -585
-; RV64-NEXT: call __muldi3
+; RV64-NEXT: slli a1, a0, 6
+; RV64-NEXT: slli a2, a0, 9
+; RV64-NEXT: slli a3, a0, 12
+; RV64-NEXT: slli a4, a0, 15
+; RV64-NEXT: add a1, a1, a2
+; RV64-NEXT: slli a2, a0, 21
+; RV64-NEXT: add a3, a3, a4
+; RV64-NEXT: slli a4, a0, 24
+; RV64-NEXT: add a2, a2, a4
+; RV64-NEXT: slli a4, a0, 3
+; RV64-NEXT: add a4, a0, a4
+; RV64-NEXT: add a1, a4, a1
+; RV64-NEXT: slli a4, a0, 18
+; RV64-NEXT: add a3, a3, a4
+; RV64-NEXT: slli a0, a0, 27
+; RV64-NEXT: subw a0, a0, a2
+; RV64-NEXT: lui a2, 2341
+; RV64-NEXT: add a1, a1, a3
+; RV64-NEXT: subw a0, a0, a1
; RV64-NEXT: slli a1, a0, 26
; RV64-NEXT: slli a0, a0, 37
; RV64-NEXT: srli a0, a0, 38
; RV64-NEXT: or a0, a0, a1
-; RV64-NEXT: lui a1, 2341
; RV64-NEXT: slli a0, a0, 37
; RV64-NEXT: srli a0, a0, 37
-; RV64-NEXT: addiw a1, a1, -1755
+; RV64-NEXT: addiw a1, a2, -1755
; RV64-NEXT: sltu a0, a0, a1
-; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64-NEXT: addi sp, sp, 16
; RV64-NEXT: ret
;
; RV32M-LABEL: test_urem_even:
@@ -256,28 +288,32 @@ define i1 @test_urem_odd_setne(i4 %X) nounwind {
define i1 @test_urem_negative_odd(i9 %X) nounwind {
; RV32-LABEL: test_urem_negative_odd:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32-NEXT: li a1, 307
-; RV32-NEXT: call __mulsi3
+; RV32-NEXT: slli a1, a0, 2
+; RV32-NEXT: slli a2, a0, 4
+; RV32-NEXT: slli a3, a0, 6
+; RV32-NEXT: sub a1, a1, a0
+; RV32-NEXT: sub a2, a2, a3
+; RV32-NEXT: sub a1, a1, a2
+; RV32-NEXT: slli a0, a0, 8
+; RV32-NEXT: add a0, a1, a0
; RV32-NEXT: andi a0, a0, 511
; RV32-NEXT: sltiu a0, a0, 2
; RV32-NEXT: xori a0, a0, 1
-; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: ret
;
; RV64-LABEL: test_urem_negative_odd:
; RV64: # %bb.0:
-; RV64-NEXT: addi sp, sp, -16
-; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64-NEXT: li a1, 307
-; RV64-NEXT: call __muldi3
+; RV64-NEXT: slli a1, a0, 2
+; RV64-NEXT: slli a2, a0, 4
+; RV64-NEXT: slli a3, a0, 6
+; RV64-NEXT: subw a1, a1, a0
+; RV64-NEXT: subw a2, a2, a3
+; RV64-NEXT: subw a1, a1, a2
+; RV64-NEXT: slli a0, a0, 8
+; RV64-NEXT: add a0, a1, a0
; RV64-NEXT: andi a0, a0, 511
; RV64-NEXT: sltiu a0, a0, 2
; RV64-NEXT: xori a0, a0, 1
-; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64-NEXT: addi sp, sp, 16
; RV64-NEXT: ret
;
; RV32M-LABEL: test_urem_negative_odd:
@@ -323,117 +359,121 @@ define i1 @test_urem_negative_odd(i9 %X) nounwind {
define void @test_urem_vec(ptr %X) nounwind {
; RV32-LABEL: test_urem_vec:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -32
-; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32-NEXT: mv s0, a0
-; RV32-NEXT: lbu a0, 4(a0)
-; RV32-NEXT: lw a1, 0(s0)
-; RV32-NEXT: slli a0, a0, 10
-; RV32-NEXT: srli s1, a1, 22
-; RV32-NEXT: or s1, s1, a0
-; RV32-NEXT: srli s2, a1, 11
-; RV32-NEXT: andi a0, a1, 2047
-; RV32-NEXT: li a1, 683
-; RV32-NEXT: call __mulsi3
-; RV32-NEXT: slli a1, a0, 10
-; RV32-NEXT: slli a0, a0, 21
-; RV32-NEXT: srli a0, a0, 22
-; RV32-NEXT: or a0, a0, a1
-; RV32-NEXT: andi a0, a0, 2047
-; RV32-NEXT: sltiu s3, a0, 342
-; RV32-NEXT: li a1, 819
-; RV32-NEXT: mv a0, s1
-; RV32-NEXT: call __mulsi3
-; RV32-NEXT: addi a0, a0, -1638
-; RV32-NEXT: andi a0, a0, 2047
-; RV32-NEXT: sltiu s1, a0, 2
-; RV32-NEXT: xori s4, s1, 1
-; RV32-NEXT: li a1, 1463
-; RV32-NEXT: mv a0, s2
-; RV32-NEXT: call __mulsi3
-; RV32-NEXT: addi a0, a0, -1463
-; RV32-NEXT: addi s3, s3, -1
-; RV32-NEXT: addi s1, s1, -1
-; RV32-NEXT: andi a0, a0, 2047
-; RV32-NEXT: andi a1, s3, 2047
-; RV32-NEXT: slli s1, s1, 22
-; RV32-NEXT: sltiu a0, a0, 293
-; RV32-NEXT: addi a0, a0, -1
-; RV32-NEXT: andi a0, a0, 2047
-; RV32-NEXT: slli a0, a0, 11
-; RV32-NEXT: or a0, a0, s1
-; RV32-NEXT: or a0, a1, a0
-; RV32-NEXT: sw a0, 0(s0)
-; RV32-NEXT: sb s4, 4(s0)
-; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32-NEXT: addi sp, sp, 32
+; RV32-NEXT: lbu a1, 4(a0)
+; RV32-NEXT: lw a2, 0(a0)
+; RV32-NEXT: slli a1, a1, 10
+; RV32-NEXT: srli a3, a2, 22
+; RV32-NEXT: andi a4, a2, 2047
+; RV32-NEXT: srli a2, a2, 11
+; RV32-NEXT: or a1, a3, a1
+; RV32-NEXT: addi a2, a2, -1
+; RV32-NEXT: slli a3, a4, 2
+; RV32-NEXT: slli a5, a4, 4
+; RV32-NEXT: slli a6, a4, 6
+; RV32-NEXT: slli a7, a4, 8
+; RV32-NEXT: add a3, a4, a3
+; RV32-NEXT: slli a4, a4, 10
+; RV32-NEXT: addi a1, a1, -2
+; RV32-NEXT: add a5, a5, a6
+; RV32-NEXT: slli a6, a2, 4
+; RV32-NEXT: sub a4, a4, a7
+; RV32-NEXT: slli a7, a2, 6
+; RV32-NEXT: add a6, a2, a6
+; RV32-NEXT: slli a2, a2, 8
+; RV32-NEXT: sub a2, a7, a2
+; RV32-NEXT: slli a7, a1, 2
+; RV32-NEXT: add a3, a3, a5
+; RV32-NEXT: slli a5, a1, 4
+; RV32-NEXT: sub a1, a1, a7
+; RV32-NEXT: sub a2, a6, a2
+; RV32-NEXT: sub a4, a4, a3
+; RV32-NEXT: add a1, a1, a5
+; RV32-NEXT: slli a3, a2, 3
+; RV32-NEXT: slli a5, a4, 10
+; RV32-NEXT: slli a4, a4, 21
+; RV32-NEXT: slli a6, a1, 6
+; RV32-NEXT: sub a3, a3, a2
+; RV32-NEXT: srli a4, a4, 22
+; RV32-NEXT: sub a1, a6, a1
+; RV32-NEXT: andi a2, a3, 2047
+; RV32-NEXT: or a4, a4, a5
+; RV32-NEXT: andi a1, a1, 2047
+; RV32-NEXT: sltiu a2, a2, 293
+; RV32-NEXT: andi a3, a4, 2047
+; RV32-NEXT: sltiu a1, a1, 2
+; RV32-NEXT: sltiu a3, a3, 342
+; RV32-NEXT: addi a2, a2, -1
+; RV32-NEXT: xori a4, a1, 1
+; RV32-NEXT: addi a1, a1, -1
+; RV32-NEXT: addi a3, a3, -1
+; RV32-NEXT: andi a2, a2, 2047
+; RV32-NEXT: slli a2, a2, 11
+; RV32-NEXT: andi a3, a3, 2047
+; RV32-NEXT: slli a1, a1, 22
+; RV32-NEXT: or a2, a3, a2
+; RV32-NEXT: or a1, a2, a1
+; RV32-NEXT: sw a1, 0(a0)
+; RV32-NEXT: sb a4, 4(a0)
; RV32-NEXT: ret
;
; RV64-LABEL: test_urem_vec:
; RV64: # %bb.0:
-; RV64-NEXT: addi sp, sp, -48
-; RV64-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64-NEXT: mv s0, a0
-; RV64-NEXT: lbu a0, 4(a0)
-; RV64-NEXT: lwu a1, 0(s0)
-; RV64-NEXT: slli a0, a0, 32
-; RV64-NEXT: or a0, a1, a0
-; RV64-NEXT: srli s1, a0, 22
-; RV64-NEXT: srli s2, a0, 11
-; RV64-NEXT: andi a0, a0, 2047
-; RV64-NEXT: li a1, 683
-; RV64-NEXT: call __muldi3
-; RV64-NEXT: slli a1, a0, 10
-; RV64-NEXT: slli a0, a0, 53
-; RV64-NEXT: srli a0, a0, 54
-; RV64-NEXT: or a0, a0, a1
-; RV64-NEXT: andi a0, a0, 2047
-; RV64-NEXT: sltiu s3, a0, 342
-; RV64-NEXT: li a1, 1463
-; RV64-NEXT: mv a0, s2
-; RV64-NEXT: call __muldi3
-; RV64-NEXT: addi a0, a0, -1463
-; RV64-NEXT: andi a0, a0, 2047
-; RV64-NEXT: sltiu s2, a0, 293
-; RV64-NEXT: li a1, 819
-; RV64-NEXT: mv a0, s1
-; RV64-NEXT: call __muldi3
-; RV64-NEXT: addi a0, a0, -1638
-; RV64-NEXT: addi s3, s3, -1
-; RV64-NEXT: addi s2, s2, -1
-; RV64-NEXT: andi a0, a0, 2047
-; RV64-NEXT: andi a1, s3, 2047
-; RV64-NEXT: andi a2, s2, 2047
-; RV64-NEXT: sltiu a0, a0, 2
-; RV64-NEXT: slli a2, a2, 11
-; RV64-NEXT: addi a0, a0, -1
-; RV64-NEXT: slli a0, a0, 22
-; RV64-NEXT: or a0, a2, a0
-; RV64-NEXT: or a0, a1, a0
-; RV64-NEXT: slli a1, a0, 31
-; RV64-NEXT: srli a1, a1, 63
-; RV64-NEXT: sw a0, 0(s0)
-; RV64-NEXT: sb a1, 4(s0)
-; RV64-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64-NEXT: addi sp, sp, 48
+; RV64-NEXT: lbu a1, 4(a0)
+; RV64-NEXT: lwu a2, 0(a0)
+; RV64-NEXT: slli a1, a1, 32
+; RV64-NEXT: or a1, a2, a1
+; RV64-NEXT: andi a2, a1, 2047
+; RV64-NEXT: srli a3, a1, 11
+; RV64-NEXT: addi a3, a3, -1
+; RV64-NEXT: slli a4, a2, 2
+; RV64-NEXT: slli a5, a2, 4
+; RV64-NEXT: slli a6, a2, 6
+; RV64-NEXT: slli a7, a2, 8
+; RV64-NEXT: add a4, a2, a4
+; RV64-NEXT: slli a2, a2, 10
+; RV64-NEXT: add a5, a5, a6
+; RV64-NEXT: slli a6, a3, 4
+; RV64-NEXT: subw a2, a2, a7
+; RV64-NEXT: slli a7, a3, 6
+; RV64-NEXT: add a6, a3, a6
+; RV64-NEXT: slli a3, a3, 8
+; RV64-NEXT: subw a3, a7, a3
+; RV64-NEXT: srli a1, a1, 22
+; RV64-NEXT: addi a1, a1, -2
+; RV64-NEXT: add a4, a4, a5
+; RV64-NEXT: slli a5, a1, 2
+; RV64-NEXT: subw a5, a1, a5
+; RV64-NEXT: slli a1, a1, 4
+; RV64-NEXT: subw a3, a6, a3
+; RV64-NEXT: subw a2, a2, a4
+; RV64-NEXT: add a1, a5, a1
+; RV64-NEXT: slli a4, a3, 3
+; RV64-NEXT: slli a5, a2, 10
+; RV64-NEXT: slli a2, a2, 53
+; RV64-NEXT: slli a6, a1, 6
+; RV64-NEXT: subw a4, a4, a3
+; RV64-NEXT: srli a2, a2, 54
+; RV64-NEXT: subw a1, a6, a1
+; RV64-NEXT: andi a3, a4, 2047
+; RV64-NEXT: or a2, a2, a5
+; RV64-NEXT: andi a1, a1, 2047
+; RV64-NEXT: sltiu a3, a3, 293
+; RV64-NEXT: andi a2, a2, 2047
+; RV64-NEXT: sltiu a1, a1, 2
+; RV64-NEXT: sltiu a2, a2, 342
+; RV64-NEXT: addi a1, a1, -1
+; RV64-NEXT: addi a3, a3, -1
+; RV64-NEXT: addi a2, a2, -1
+; RV64-NEXT: andi a3, a3, 2047
+; RV64-NEXT: slli a1, a1, 22
+; RV64-NEXT: slli a3, a3, 11
+; RV64-NEXT: andi a2, a2, 2047
+; RV64-NEXT: or a2, a2, a3
+; RV64-NEXT: or a1, a2, a1
+; RV64-NEXT: slli a2, a1, 31
+; RV64-NEXT: srli a2, a2, 63
+; RV64-NEXT: sw a1, 0(a0)
+; RV64-NEXT: sb a2, 4(a0)
; RV64-NEXT: ret
;
; RV32M-LABEL: test_urem_vec:
diff --git a/llvm/test/CodeGen/RISCV/xqccmp-additional-stack.ll b/llvm/test/CodeGen/RISCV/xqccmp-additional-stack.ll
index 14e6b9bddd0a0..c73d836c45ca3 100644
--- a/llvm/test/CodeGen/RISCV/xqccmp-additional-stack.ll
+++ b/llvm/test/CodeGen/RISCV/xqccmp-additional-stack.ll
@@ -1,6 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
; RUN: llc -mtriple=riscv32 -mattr=+experimental-xqccmp,+e -target-abi ilp32e -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV32
+declare i32 @__mulsi3(i32, i32)
+
define ptr @func(ptr %s, i32 %_c, ptr %incdec.ptr, i1 %0, i8 %conv14) #0 {
; RV32-LABEL: func:
; RV32: # %bb.0: # %entry
@@ -45,8 +47,8 @@ while.body: ; preds = %while.body, %entry
br i1 %0, label %while.body, label %while.end
while.end: ; preds = %while.body
- %or5 = mul i32 %_c, 16843009
- store i32 %or5, ptr null, align 4
+ %mul_result = call i32 @__mulsi3(i32 %_c, i32 16843009)
+ store i32 %mul_result, ptr null, align 4
%1 = and i32 %n.addr.042, 1
%scevgep = getelementptr i8, ptr %incdec.ptr, i32 %1
store i8 %conv14, ptr %scevgep, align 1
diff --git a/llvm/test/CodeGen/RISCV/zcmp-additional-stack.ll b/llvm/test/CodeGen/RISCV/zcmp-additional-stack.ll
index c98b9b80378fd..601780e346a0a 100644
--- a/llvm/test/CodeGen/RISCV/zcmp-additional-stack.ll
+++ b/llvm/test/CodeGen/RISCV/zcmp-additional-stack.ll
@@ -1,5 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
; RUN: llc -mtriple=riscv32 -mattr=+zcmp,+e -target-abi ilp32e -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV32
+
+declare i32 @__mulsi3(i32, i32)
+
define ptr @func(ptr %s, i32 %_c, ptr %incdec.ptr, i1 %0, i8 %conv14) #0 {
; RV32-LABEL: func:
; RV32: # %bb.0: # %entry
@@ -44,8 +47,8 @@ while.body: ; preds = %while.body, %entry
br i1 %0, label %while.body, label %while.end
while.end: ; preds = %while.body
- %or5 = mul i32 %_c, 16843009
- store i32 %or5, ptr null, align 4
+ %mul_result = call i32 @__mulsi3(i32 %_c, i32 16843009)
+ store i32 %mul_result, ptr null, align 4
%1 = and i32 %n.addr.042, 1
%scevgep = getelementptr i8, ptr %incdec.ptr, i32 %1
store i8 %conv14, ptr %scevgep, align 1
More information about the llvm-commits
mailing list