[llvm] [SDAG] Use shifts if ISD::MUL is illegal when lowering ISD::CTPOP (PR #86505)
Wang Pengcheng via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 25 06:50:38 PDT 2024
https://github.com/wangpc-pp created https://github.com/llvm/llvm-project/pull/86505
We can avoid libcalls.
Fixes #86205
>From cc6f8c358dcfa8fc7f3e55ad8a704e676145382c Mon Sep 17 00:00:00 2001
From: Wang Pengcheng <wangpengcheng.pp at bytedance.com>
Date: Mon, 25 Mar 2024 21:37:42 +0800
Subject: [PATCH] [SDAG] Use shifts if ISD::MUL is illegal when lowering
ISD::CTPOP
We can avoid libcalls.
Fixes #86205
---
.../CodeGen/SelectionDAG/TargetLowering.cpp | 20 +-
.../test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll | 9 +-
llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll | 838 ++++++++----------
.../CodeGen/RISCV/ctz_zero_return_test.ll | 114 ++-
llvm/test/CodeGen/RISCV/rv32xtheadbb.ll | 102 +--
llvm/test/CodeGen/RISCV/rv32zbb.ll | 471 +++++-----
.../RISCV/rv64-legal-i32/rv64xtheadbb.ll | 23 +-
.../CodeGen/RISCV/rv64-legal-i32/rv64zbb.ll | 46 +-
llvm/test/CodeGen/RISCV/rv64xtheadbb.ll | 237 ++---
llvm/test/CodeGen/RISCV/rv64zbb.ll | 507 +++++------
llvm/test/CodeGen/RISCV/sextw-removal.ll | 13 +-
11 files changed, 1106 insertions(+), 1274 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 8be03b66e155f6..e0662d57bb4bba 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -8709,11 +8709,21 @@ SDValue TargetLowering::expandCTPOP(SDNode *Node, SelectionDAG &DAG) const {
DAG.getConstant(0xFF, dl, VT));
}
- // v = (v * 0x01010101...) >> (Len - 8)
- SDValue Mask01 =
- DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
- return DAG.getNode(ISD::SRL, dl, VT,
- DAG.getNode(ISD::MUL, dl, VT, Op, Mask01),
+ SDValue V;
+ if (isOperationLegalOrCustomOrPromote(ISD::MUL, VT)) {
+ // v = (v * 0x01010101...) >> (Len - 8)
+ SDValue Mask01 =
+ DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
+ V = DAG.getNode(ISD::MUL, dl, VT, Op, Mask01);
+ } else {
+ V = Op;
+ SDValue ShiftC = DAG.getConstant(8, dl, VT);
+ for (unsigned I = 8; I < Len; I += 8) {
+ V = DAG.getNode(ISD::ADD, dl, VT, Op,
+ DAG.getNode(ISD::SHL, dl, VT, V, ShiftC));
+ }
+ }
+ return DAG.getNode(ISD::SRL, dl, VT, V,
DAG.getConstant(Len - 8, dl, ShVT));
}
diff --git a/llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll b/llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll
index 9fa3f5076bb221..c3731fc4f2e19f 100644
--- a/llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll
+++ b/llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll
@@ -285,9 +285,12 @@ define i32 @test_ctpop_i32(i32 %a) nounwind {
; LA64-NEXT: lu12i.w $a1, 61680
; LA64-NEXT: ori $a1, $a1, 3855
; LA64-NEXT: and $a0, $a0, $a1
-; LA64-NEXT: lu12i.w $a1, 4112
-; LA64-NEXT: ori $a1, $a1, 257
-; LA64-NEXT: mul.d $a0, $a0, $a1
+; LA64-NEXT: slli.d $a1, $a0, 8
+; LA64-NEXT: add.d $a1, $a0, $a1
+; LA64-NEXT: slli.d $a1, $a1, 8
+; LA64-NEXT: add.d $a1, $a0, $a1
+; LA64-NEXT: slli.d $a1, $a1, 8
+; LA64-NEXT: add.d $a0, $a0, $a1
; LA64-NEXT: bstrpick.d $a0, $a0, 31, 24
; LA64-NEXT: ret
%1 = call i32 @llvm.ctpop.i32(i32 %a)
diff --git a/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll b/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll
index 455e6e54c9b396..8533a1d73544f3 100644
--- a/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll
+++ b/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll
@@ -1160,8 +1160,6 @@ define i32 @test_ctlz_i32(i32 %a) nounwind {
; RV32I: # %bb.0:
; RV32I-NEXT: beqz a0, .LBB10_2
; RV32I-NEXT: # %bb.1: # %cond.false
-; RV32I-NEXT: addi sp, sp, -16
-; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: srli a1, a0, 1
; RV32I-NEXT: or a0, a0, a1
; RV32I-NEXT: srli a1, a0, 2
@@ -1189,61 +1187,63 @@ define i32 @test_ctlz_i32(i32 %a) nounwind {
; RV32I-NEXT: lui a1, 61681
; RV32I-NEXT: addi a1, a1, -241
; RV32I-NEXT: and a0, a0, a1
-; RV32I-NEXT: lui a1, 4112
-; RV32I-NEXT: addi a1, a1, 257
-; RV32I-NEXT: call __mulsi3
+; RV32I-NEXT: slli a1, a0, 8
+; RV32I-NEXT: add a1, a0, a1
+; RV32I-NEXT: slli a1, a1, 8
+; RV32I-NEXT: add a1, a0, a1
+; RV32I-NEXT: slli a1, a1, 8
+; RV32I-NEXT: add a0, a0, a1
; RV32I-NEXT: srli a0, a0, 24
-; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
; RV32I-NEXT: .LBB10_2:
; RV32I-NEXT: li a0, 32
; RV32I-NEXT: ret
;
-; RV64I-LABEL: test_ctlz_i32:
-; RV64I: # %bb.0:
-; RV64I-NEXT: sext.w a1, a0
-; RV64I-NEXT: beqz a1, .LBB10_2
-; RV64I-NEXT: # %bb.1: # %cond.false
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: srliw a1, a0, 1
-; RV64I-NEXT: or a0, a0, a1
-; RV64I-NEXT: srliw a1, a0, 2
-; RV64I-NEXT: or a0, a0, a1
-; RV64I-NEXT: srliw a1, a0, 4
-; RV64I-NEXT: or a0, a0, a1
-; RV64I-NEXT: srliw a1, a0, 8
-; RV64I-NEXT: or a0, a0, a1
-; RV64I-NEXT: srliw a1, a0, 16
-; RV64I-NEXT: or a0, a0, a1
-; RV64I-NEXT: not a0, a0
-; RV64I-NEXT: srli a1, a0, 1
-; RV64I-NEXT: lui a2, 349525
-; RV64I-NEXT: addiw a2, a2, 1365
-; RV64I-NEXT: and a1, a1, a2
-; RV64I-NEXT: sub a0, a0, a1
-; RV64I-NEXT: lui a1, 209715
-; RV64I-NEXT: addiw a1, a1, 819
-; RV64I-NEXT: and a2, a0, a1
-; RV64I-NEXT: srli a0, a0, 2
-; RV64I-NEXT: and a0, a0, a1
-; RV64I-NEXT: add a0, a2, a0
-; RV64I-NEXT: srli a1, a0, 4
-; RV64I-NEXT: add a0, a0, a1
-; RV64I-NEXT: lui a1, 61681
-; RV64I-NEXT: addiw a1, a1, -241
-; RV64I-NEXT: and a0, a0, a1
-; RV64I-NEXT: lui a1, 4112
-; RV64I-NEXT: addiw a1, a1, 257
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: srliw a0, a0, 24
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
-; RV64I-NEXT: ret
-; RV64I-NEXT: .LBB10_2:
-; RV64I-NEXT: li a0, 32
-; RV64I-NEXT: ret
+; RV64NOZBB-LABEL: test_ctlz_i32:
+; RV64NOZBB: # %bb.0:
+; RV64NOZBB-NEXT: sext.w a1, a0
+; RV64NOZBB-NEXT: beqz a1, .LBB10_2
+; RV64NOZBB-NEXT: # %bb.1: # %cond.false
+; RV64NOZBB-NEXT: srliw a1, a0, 1
+; RV64NOZBB-NEXT: or a0, a0, a1
+; RV64NOZBB-NEXT: srliw a1, a0, 2
+; RV64NOZBB-NEXT: or a0, a0, a1
+; RV64NOZBB-NEXT: srliw a1, a0, 4
+; RV64NOZBB-NEXT: or a0, a0, a1
+; RV64NOZBB-NEXT: srliw a1, a0, 8
+; RV64NOZBB-NEXT: or a0, a0, a1
+; RV64NOZBB-NEXT: srliw a1, a0, 16
+; RV64NOZBB-NEXT: or a0, a0, a1
+; RV64NOZBB-NEXT: not a0, a0
+; RV64NOZBB-NEXT: srli a1, a0, 1
+; RV64NOZBB-NEXT: lui a2, 349525
+; RV64NOZBB-NEXT: addiw a2, a2, 1365
+; RV64NOZBB-NEXT: and a1, a1, a2
+; RV64NOZBB-NEXT: sub a0, a0, a1
+; RV64NOZBB-NEXT: lui a1, 209715
+; RV64NOZBB-NEXT: addiw a1, a1, 819
+; RV64NOZBB-NEXT: and a2, a0, a1
+; RV64NOZBB-NEXT: srli a0, a0, 2
+; RV64NOZBB-NEXT: and a0, a0, a1
+; RV64NOZBB-NEXT: add a0, a2, a0
+; RV64NOZBB-NEXT: srli a1, a0, 4
+; RV64NOZBB-NEXT: add a0, a0, a1
+; RV64NOZBB-NEXT: lui a1, 61681
+; RV64NOZBB-NEXT: addi a2, a1, -241
+; RV64NOZBB-NEXT: and a2, a0, a2
+; RV64NOZBB-NEXT: slli a0, a0, 8
+; RV64NOZBB-NEXT: addi a1, a1, -256
+; RV64NOZBB-NEXT: and a0, a0, a1
+; RV64NOZBB-NEXT: add a0, a2, a0
+; RV64NOZBB-NEXT: slli a0, a0, 8
+; RV64NOZBB-NEXT: add a0, a2, a0
+; RV64NOZBB-NEXT: slli a0, a0, 8
+; RV64NOZBB-NEXT: add a0, a2, a0
+; RV64NOZBB-NEXT: srliw a0, a0, 24
+; RV64NOZBB-NEXT: ret
+; RV64NOZBB-NEXT: .LBB10_2:
+; RV64NOZBB-NEXT: li a0, 32
+; RV64NOZBB-NEXT: ret
;
; RV32M-LABEL: test_ctlz_i32:
; RV32M: # %bb.0:
@@ -1285,47 +1285,6 @@ define i32 @test_ctlz_i32(i32 %a) nounwind {
; RV32M-NEXT: li a0, 32
; RV32M-NEXT: ret
;
-; RV64M-LABEL: test_ctlz_i32:
-; RV64M: # %bb.0:
-; RV64M-NEXT: sext.w a1, a0
-; RV64M-NEXT: beqz a1, .LBB10_2
-; RV64M-NEXT: # %bb.1: # %cond.false
-; RV64M-NEXT: srliw a1, a0, 1
-; RV64M-NEXT: or a0, a0, a1
-; RV64M-NEXT: srliw a1, a0, 2
-; RV64M-NEXT: or a0, a0, a1
-; RV64M-NEXT: srliw a1, a0, 4
-; RV64M-NEXT: or a0, a0, a1
-; RV64M-NEXT: srliw a1, a0, 8
-; RV64M-NEXT: or a0, a0, a1
-; RV64M-NEXT: srliw a1, a0, 16
-; RV64M-NEXT: or a0, a0, a1
-; RV64M-NEXT: not a0, a0
-; RV64M-NEXT: srli a1, a0, 1
-; RV64M-NEXT: lui a2, 349525
-; RV64M-NEXT: addiw a2, a2, 1365
-; RV64M-NEXT: and a1, a1, a2
-; RV64M-NEXT: sub a0, a0, a1
-; RV64M-NEXT: lui a1, 209715
-; RV64M-NEXT: addiw a1, a1, 819
-; RV64M-NEXT: and a2, a0, a1
-; RV64M-NEXT: srli a0, a0, 2
-; RV64M-NEXT: and a0, a0, a1
-; RV64M-NEXT: add a0, a2, a0
-; RV64M-NEXT: srli a1, a0, 4
-; RV64M-NEXT: add a0, a0, a1
-; RV64M-NEXT: lui a1, 61681
-; RV64M-NEXT: addi a1, a1, -241
-; RV64M-NEXT: and a0, a0, a1
-; RV64M-NEXT: lui a1, 4112
-; RV64M-NEXT: addi a1, a1, 257
-; RV64M-NEXT: mul a0, a0, a1
-; RV64M-NEXT: srliw a0, a0, 24
-; RV64M-NEXT: ret
-; RV64M-NEXT: .LBB10_2:
-; RV64M-NEXT: li a0, 32
-; RV64M-NEXT: ret
-;
; RV32ZBB-LABEL: test_ctlz_i32:
; RV32ZBB: # %bb.0:
; RV32ZBB-NEXT: clz a0, a0
@@ -1354,19 +1313,16 @@ define i32 @test_ctlz_i32(i32 %a) nounwind {
define i64 @test_ctlz_i64(i64 %a) nounwind {
; RV32I-LABEL: test_ctlz_i64:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s6, 0(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s0, a1
-; RV32I-NEXT: mv s2, a0
-; RV32I-NEXT: srli a0, a1, 1
-; RV32I-NEXT: or a0, a1, a0
+; RV32I-NEXT: lui a2, 349525
+; RV32I-NEXT: addi a4, a2, 1365
+; RV32I-NEXT: lui a2, 209715
+; RV32I-NEXT: addi a3, a2, 819
+; RV32I-NEXT: lui a2, 61681
+; RV32I-NEXT: addi a2, a2, -241
+; RV32I-NEXT: bnez a1, .LBB11_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: srli a1, a0, 1
+; RV32I-NEXT: or a0, a0, a1
; RV32I-NEXT: srli a1, a0, 2
; RV32I-NEXT: or a0, a0, a1
; RV32I-NEXT: srli a1, a0, 4
@@ -1377,28 +1333,28 @@ define i64 @test_ctlz_i64(i64 %a) nounwind {
; RV32I-NEXT: or a0, a0, a1
; RV32I-NEXT: not a0, a0
; RV32I-NEXT: srli a1, a0, 1
-; RV32I-NEXT: lui a2, 349525
-; RV32I-NEXT: addi s4, a2, 1365
-; RV32I-NEXT: and a1, a1, s4
+; RV32I-NEXT: and a1, a1, a4
; RV32I-NEXT: sub a0, a0, a1
-; RV32I-NEXT: lui a1, 209715
-; RV32I-NEXT: addi s5, a1, 819
-; RV32I-NEXT: and a1, a0, s5
+; RV32I-NEXT: and a1, a0, a3
; RV32I-NEXT: srli a0, a0, 2
-; RV32I-NEXT: and a0, a0, s5
+; RV32I-NEXT: and a0, a0, a3
; RV32I-NEXT: add a0, a1, a0
; RV32I-NEXT: srli a1, a0, 4
; RV32I-NEXT: add a0, a0, a1
-; RV32I-NEXT: lui a1, 61681
-; RV32I-NEXT: addi s6, a1, -241
-; RV32I-NEXT: and a0, a0, s6
-; RV32I-NEXT: lui a1, 4112
-; RV32I-NEXT: addi s3, a1, 257
-; RV32I-NEXT: mv a1, s3
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: srli a0, s2, 1
-; RV32I-NEXT: or a0, s2, a0
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: slli a1, a0, 8
+; RV32I-NEXT: add a1, a0, a1
+; RV32I-NEXT: slli a1, a1, 8
+; RV32I-NEXT: add a1, a0, a1
+; RV32I-NEXT: slli a1, a1, 8
+; RV32I-NEXT: add a0, a0, a1
+; RV32I-NEXT: srli a0, a0, 24
+; RV32I-NEXT: addi a0, a0, 32
+; RV32I-NEXT: li a1, 0
+; RV32I-NEXT: ret
+; RV32I-NEXT: .LBB11_2:
+; RV32I-NEXT: srli a0, a1, 1
+; RV32I-NEXT: or a0, a1, a0
; RV32I-NEXT: srli a1, a0, 2
; RV32I-NEXT: or a0, a0, a1
; RV32I-NEXT: srli a1, a0, 4
@@ -1409,43 +1365,29 @@ define i64 @test_ctlz_i64(i64 %a) nounwind {
; RV32I-NEXT: or a0, a0, a1
; RV32I-NEXT: not a0, a0
; RV32I-NEXT: srli a1, a0, 1
-; RV32I-NEXT: and a1, a1, s4
+; RV32I-NEXT: and a1, a1, a4
; RV32I-NEXT: sub a0, a0, a1
-; RV32I-NEXT: and a1, a0, s5
+; RV32I-NEXT: and a1, a0, a3
; RV32I-NEXT: srli a0, a0, 2
-; RV32I-NEXT: and a0, a0, s5
+; RV32I-NEXT: and a0, a0, a3
; RV32I-NEXT: add a0, a1, a0
; RV32I-NEXT: srli a1, a0, 4
; RV32I-NEXT: add a0, a0, a1
-; RV32I-NEXT: and a0, a0, s6
-; RV32I-NEXT: mv a1, s3
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: bnez s0, .LBB11_2
-; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: slli a1, a0, 8
+; RV32I-NEXT: add a1, a0, a1
+; RV32I-NEXT: slli a1, a1, 8
+; RV32I-NEXT: add a1, a0, a1
+; RV32I-NEXT: slli a1, a1, 8
+; RV32I-NEXT: add a0, a0, a1
; RV32I-NEXT: srli a0, a0, 24
-; RV32I-NEXT: addi a0, a0, 32
-; RV32I-NEXT: j .LBB11_3
-; RV32I-NEXT: .LBB11_2:
-; RV32I-NEXT: srli a0, s1, 24
-; RV32I-NEXT: .LBB11_3:
; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s6, 0(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_ctlz_i64:
; RV64I: # %bb.0:
; RV64I-NEXT: beqz a0, .LBB11_2
; RV64I-NEXT: # %bb.1: # %cond.false
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: srli a1, a0, 1
; RV64I-NEXT: or a0, a0, a1
; RV64I-NEXT: srli a1, a0, 2
@@ -1481,14 +1423,21 @@ define i64 @test_ctlz_i64(i64 %a) nounwind {
; RV64I-NEXT: slli a2, a1, 32
; RV64I-NEXT: add a1, a1, a2
; RV64I-NEXT: and a0, a0, a1
-; RV64I-NEXT: lui a1, 4112
-; RV64I-NEXT: addiw a1, a1, 257
-; RV64I-NEXT: slli a2, a1, 32
-; RV64I-NEXT: add a1, a1, a2
-; RV64I-NEXT: call __muldi3
+; RV64I-NEXT: slli a1, a0, 8
+; RV64I-NEXT: add a1, a0, a1
+; RV64I-NEXT: slli a1, a1, 8
+; RV64I-NEXT: add a1, a0, a1
+; RV64I-NEXT: slli a1, a1, 8
+; RV64I-NEXT: add a1, a0, a1
+; RV64I-NEXT: slli a1, a1, 8
+; RV64I-NEXT: add a1, a0, a1
+; RV64I-NEXT: slli a1, a1, 8
+; RV64I-NEXT: add a1, a0, a1
+; RV64I-NEXT: slli a1, a1, 8
+; RV64I-NEXT: add a1, a0, a1
+; RV64I-NEXT: slli a1, a1, 8
+; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: srli a0, a0, 56
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
; RV64I-NEXT: .LBB11_2:
; RV64I-NEXT: li a0, 64
@@ -1831,8 +1780,6 @@ define i16 @test_ctlz_i16_zero_undef(i16 %a) nounwind {
define i32 @test_ctlz_i32_zero_undef(i32 %a) nounwind {
; RV32I-LABEL: test_ctlz_i32_zero_undef:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -16
-; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: srli a1, a0, 1
; RV32I-NEXT: or a0, a0, a1
; RV32I-NEXT: srli a1, a0, 2
@@ -1860,52 +1807,54 @@ define i32 @test_ctlz_i32_zero_undef(i32 %a) nounwind {
; RV32I-NEXT: lui a1, 61681
; RV32I-NEXT: addi a1, a1, -241
; RV32I-NEXT: and a0, a0, a1
-; RV32I-NEXT: lui a1, 4112
-; RV32I-NEXT: addi a1, a1, 257
-; RV32I-NEXT: call __mulsi3
+; RV32I-NEXT: slli a1, a0, 8
+; RV32I-NEXT: add a1, a0, a1
+; RV32I-NEXT: slli a1, a1, 8
+; RV32I-NEXT: add a1, a0, a1
+; RV32I-NEXT: slli a1, a1, 8
+; RV32I-NEXT: add a0, a0, a1
; RV32I-NEXT: srli a0, a0, 24
-; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV64I-LABEL: test_ctlz_i32_zero_undef:
-; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: srliw a1, a0, 1
-; RV64I-NEXT: or a0, a0, a1
-; RV64I-NEXT: srliw a1, a0, 2
-; RV64I-NEXT: or a0, a0, a1
-; RV64I-NEXT: srliw a1, a0, 4
-; RV64I-NEXT: or a0, a0, a1
-; RV64I-NEXT: srliw a1, a0, 8
-; RV64I-NEXT: or a0, a0, a1
-; RV64I-NEXT: srliw a1, a0, 16
-; RV64I-NEXT: or a0, a0, a1
-; RV64I-NEXT: not a0, a0
-; RV64I-NEXT: srli a1, a0, 1
-; RV64I-NEXT: lui a2, 349525
-; RV64I-NEXT: addiw a2, a2, 1365
-; RV64I-NEXT: and a1, a1, a2
-; RV64I-NEXT: sub a0, a0, a1
-; RV64I-NEXT: lui a1, 209715
-; RV64I-NEXT: addiw a1, a1, 819
-; RV64I-NEXT: and a2, a0, a1
-; RV64I-NEXT: srli a0, a0, 2
-; RV64I-NEXT: and a0, a0, a1
-; RV64I-NEXT: add a0, a2, a0
-; RV64I-NEXT: srli a1, a0, 4
-; RV64I-NEXT: add a0, a0, a1
-; RV64I-NEXT: lui a1, 61681
-; RV64I-NEXT: addiw a1, a1, -241
-; RV64I-NEXT: and a0, a0, a1
-; RV64I-NEXT: lui a1, 4112
-; RV64I-NEXT: addiw a1, a1, 257
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: srliw a0, a0, 24
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
-; RV64I-NEXT: ret
+; RV64NOZBB-LABEL: test_ctlz_i32_zero_undef:
+; RV64NOZBB: # %bb.0:
+; RV64NOZBB-NEXT: srliw a1, a0, 1
+; RV64NOZBB-NEXT: or a0, a0, a1
+; RV64NOZBB-NEXT: srliw a1, a0, 2
+; RV64NOZBB-NEXT: or a0, a0, a1
+; RV64NOZBB-NEXT: srliw a1, a0, 4
+; RV64NOZBB-NEXT: or a0, a0, a1
+; RV64NOZBB-NEXT: srliw a1, a0, 8
+; RV64NOZBB-NEXT: or a0, a0, a1
+; RV64NOZBB-NEXT: srliw a1, a0, 16
+; RV64NOZBB-NEXT: or a0, a0, a1
+; RV64NOZBB-NEXT: not a0, a0
+; RV64NOZBB-NEXT: srli a1, a0, 1
+; RV64NOZBB-NEXT: lui a2, 349525
+; RV64NOZBB-NEXT: addiw a2, a2, 1365
+; RV64NOZBB-NEXT: and a1, a1, a2
+; RV64NOZBB-NEXT: sub a0, a0, a1
+; RV64NOZBB-NEXT: lui a1, 209715
+; RV64NOZBB-NEXT: addiw a1, a1, 819
+; RV64NOZBB-NEXT: and a2, a0, a1
+; RV64NOZBB-NEXT: srli a0, a0, 2
+; RV64NOZBB-NEXT: and a0, a0, a1
+; RV64NOZBB-NEXT: add a0, a2, a0
+; RV64NOZBB-NEXT: srli a1, a0, 4
+; RV64NOZBB-NEXT: add a0, a0, a1
+; RV64NOZBB-NEXT: lui a1, 61681
+; RV64NOZBB-NEXT: addi a2, a1, -241
+; RV64NOZBB-NEXT: and a2, a0, a2
+; RV64NOZBB-NEXT: slli a0, a0, 8
+; RV64NOZBB-NEXT: addi a1, a1, -256
+; RV64NOZBB-NEXT: and a0, a0, a1
+; RV64NOZBB-NEXT: add a0, a2, a0
+; RV64NOZBB-NEXT: slli a0, a0, 8
+; RV64NOZBB-NEXT: add a0, a2, a0
+; RV64NOZBB-NEXT: slli a0, a0, 8
+; RV64NOZBB-NEXT: add a0, a2, a0
+; RV64NOZBB-NEXT: srliw a0, a0, 24
+; RV64NOZBB-NEXT: ret
;
; RV32M-LABEL: test_ctlz_i32_zero_undef:
; RV32M: # %bb.0:
@@ -1942,41 +1891,6 @@ define i32 @test_ctlz_i32_zero_undef(i32 %a) nounwind {
; RV32M-NEXT: srli a0, a0, 24
; RV32M-NEXT: ret
;
-; RV64M-LABEL: test_ctlz_i32_zero_undef:
-; RV64M: # %bb.0:
-; RV64M-NEXT: srliw a1, a0, 1
-; RV64M-NEXT: or a0, a0, a1
-; RV64M-NEXT: srliw a1, a0, 2
-; RV64M-NEXT: or a0, a0, a1
-; RV64M-NEXT: srliw a1, a0, 4
-; RV64M-NEXT: or a0, a0, a1
-; RV64M-NEXT: srliw a1, a0, 8
-; RV64M-NEXT: or a0, a0, a1
-; RV64M-NEXT: srliw a1, a0, 16
-; RV64M-NEXT: or a0, a0, a1
-; RV64M-NEXT: not a0, a0
-; RV64M-NEXT: srli a1, a0, 1
-; RV64M-NEXT: lui a2, 349525
-; RV64M-NEXT: addiw a2, a2, 1365
-; RV64M-NEXT: and a1, a1, a2
-; RV64M-NEXT: sub a0, a0, a1
-; RV64M-NEXT: lui a1, 209715
-; RV64M-NEXT: addiw a1, a1, 819
-; RV64M-NEXT: and a2, a0, a1
-; RV64M-NEXT: srli a0, a0, 2
-; RV64M-NEXT: and a0, a0, a1
-; RV64M-NEXT: add a0, a2, a0
-; RV64M-NEXT: srli a1, a0, 4
-; RV64M-NEXT: add a0, a0, a1
-; RV64M-NEXT: lui a1, 61681
-; RV64M-NEXT: addi a1, a1, -241
-; RV64M-NEXT: and a0, a0, a1
-; RV64M-NEXT: lui a1, 4112
-; RV64M-NEXT: addi a1, a1, 257
-; RV64M-NEXT: mul a0, a0, a1
-; RV64M-NEXT: srliw a0, a0, 24
-; RV64M-NEXT: ret
-;
; RV32ZBB-LABEL: test_ctlz_i32_zero_undef:
; RV32ZBB: # %bb.0:
; RV32ZBB-NEXT: clz a0, a0
@@ -2005,19 +1919,16 @@ define i32 @test_ctlz_i32_zero_undef(i32 %a) nounwind {
define i64 @test_ctlz_i64_zero_undef(i64 %a) nounwind {
; RV32I-LABEL: test_ctlz_i64_zero_undef:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s6, 0(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s0, a1
-; RV32I-NEXT: mv s2, a0
-; RV32I-NEXT: srli a0, a1, 1
-; RV32I-NEXT: or a0, a1, a0
+; RV32I-NEXT: lui a2, 349525
+; RV32I-NEXT: addi a4, a2, 1365
+; RV32I-NEXT: lui a2, 209715
+; RV32I-NEXT: addi a3, a2, 819
+; RV32I-NEXT: lui a2, 61681
+; RV32I-NEXT: addi a2, a2, -241
+; RV32I-NEXT: bnez a1, .LBB15_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: srli a1, a0, 1
+; RV32I-NEXT: or a0, a0, a1
; RV32I-NEXT: srli a1, a0, 2
; RV32I-NEXT: or a0, a0, a1
; RV32I-NEXT: srli a1, a0, 4
@@ -2028,28 +1939,28 @@ define i64 @test_ctlz_i64_zero_undef(i64 %a) nounwind {
; RV32I-NEXT: or a0, a0, a1
; RV32I-NEXT: not a0, a0
; RV32I-NEXT: srli a1, a0, 1
-; RV32I-NEXT: lui a2, 349525
-; RV32I-NEXT: addi s4, a2, 1365
-; RV32I-NEXT: and a1, a1, s4
+; RV32I-NEXT: and a1, a1, a4
; RV32I-NEXT: sub a0, a0, a1
-; RV32I-NEXT: lui a1, 209715
-; RV32I-NEXT: addi s5, a1, 819
-; RV32I-NEXT: and a1, a0, s5
+; RV32I-NEXT: and a1, a0, a3
; RV32I-NEXT: srli a0, a0, 2
-; RV32I-NEXT: and a0, a0, s5
+; RV32I-NEXT: and a0, a0, a3
; RV32I-NEXT: add a0, a1, a0
; RV32I-NEXT: srli a1, a0, 4
; RV32I-NEXT: add a0, a0, a1
-; RV32I-NEXT: lui a1, 61681
-; RV32I-NEXT: addi s6, a1, -241
-; RV32I-NEXT: and a0, a0, s6
-; RV32I-NEXT: lui a1, 4112
-; RV32I-NEXT: addi s3, a1, 257
-; RV32I-NEXT: mv a1, s3
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: srli a0, s2, 1
-; RV32I-NEXT: or a0, s2, a0
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: slli a1, a0, 8
+; RV32I-NEXT: add a1, a0, a1
+; RV32I-NEXT: slli a1, a1, 8
+; RV32I-NEXT: add a1, a0, a1
+; RV32I-NEXT: slli a1, a1, 8
+; RV32I-NEXT: add a0, a0, a1
+; RV32I-NEXT: srli a0, a0, 24
+; RV32I-NEXT: addi a0, a0, 32
+; RV32I-NEXT: li a1, 0
+; RV32I-NEXT: ret
+; RV32I-NEXT: .LBB15_2:
+; RV32I-NEXT: srli a0, a1, 1
+; RV32I-NEXT: or a0, a1, a0
; RV32I-NEXT: srli a1, a0, 2
; RV32I-NEXT: or a0, a0, a1
; RV32I-NEXT: srli a1, a0, 4
@@ -2060,41 +1971,27 @@ define i64 @test_ctlz_i64_zero_undef(i64 %a) nounwind {
; RV32I-NEXT: or a0, a0, a1
; RV32I-NEXT: not a0, a0
; RV32I-NEXT: srli a1, a0, 1
-; RV32I-NEXT: and a1, a1, s4
+; RV32I-NEXT: and a1, a1, a4
; RV32I-NEXT: sub a0, a0, a1
-; RV32I-NEXT: and a1, a0, s5
+; RV32I-NEXT: and a1, a0, a3
; RV32I-NEXT: srli a0, a0, 2
-; RV32I-NEXT: and a0, a0, s5
+; RV32I-NEXT: and a0, a0, a3
; RV32I-NEXT: add a0, a1, a0
; RV32I-NEXT: srli a1, a0, 4
; RV32I-NEXT: add a0, a0, a1
-; RV32I-NEXT: and a0, a0, s6
-; RV32I-NEXT: mv a1, s3
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: bnez s0, .LBB15_2
-; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: slli a1, a0, 8
+; RV32I-NEXT: add a1, a0, a1
+; RV32I-NEXT: slli a1, a1, 8
+; RV32I-NEXT: add a1, a0, a1
+; RV32I-NEXT: slli a1, a1, 8
+; RV32I-NEXT: add a0, a0, a1
; RV32I-NEXT: srli a0, a0, 24
-; RV32I-NEXT: addi a0, a0, 32
-; RV32I-NEXT: j .LBB15_3
-; RV32I-NEXT: .LBB15_2:
-; RV32I-NEXT: srli a0, s1, 24
-; RV32I-NEXT: .LBB15_3:
; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s6, 0(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_ctlz_i64_zero_undef:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: srli a1, a0, 1
; RV64I-NEXT: or a0, a0, a1
; RV64I-NEXT: srli a1, a0, 2
@@ -2130,14 +2027,21 @@ define i64 @test_ctlz_i64_zero_undef(i64 %a) nounwind {
; RV64I-NEXT: slli a2, a1, 32
; RV64I-NEXT: add a1, a1, a2
; RV64I-NEXT: and a0, a0, a1
-; RV64I-NEXT: lui a1, 4112
-; RV64I-NEXT: addiw a1, a1, 257
-; RV64I-NEXT: slli a2, a1, 32
-; RV64I-NEXT: add a1, a1, a2
-; RV64I-NEXT: call __muldi3
+; RV64I-NEXT: slli a1, a0, 8
+; RV64I-NEXT: add a1, a0, a1
+; RV64I-NEXT: slli a1, a1, 8
+; RV64I-NEXT: add a1, a0, a1
+; RV64I-NEXT: slli a1, a1, 8
+; RV64I-NEXT: add a1, a0, a1
+; RV64I-NEXT: slli a1, a1, 8
+; RV64I-NEXT: add a1, a0, a1
+; RV64I-NEXT: slli a1, a1, 8
+; RV64I-NEXT: add a1, a0, a1
+; RV64I-NEXT: slli a1, a1, 8
+; RV64I-NEXT: add a1, a0, a1
+; RV64I-NEXT: slli a1, a1, 8
+; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: srli a0, a0, 56
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
; RV32M-LABEL: test_ctlz_i64_zero_undef:
@@ -2464,8 +2368,6 @@ define i16 @test_ctpop_i16(i16 %a) nounwind {
define i32 @test_ctpop_i32(i32 %a) nounwind {
; RV32I-LABEL: test_ctpop_i32:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -16
-; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: srli a1, a0, 1
; RV32I-NEXT: lui a2, 349525
; RV32I-NEXT: addi a2, a2, 1365
@@ -2482,41 +2384,43 @@ define i32 @test_ctpop_i32(i32 %a) nounwind {
; RV32I-NEXT: lui a1, 61681
; RV32I-NEXT: addi a1, a1, -241
; RV32I-NEXT: and a0, a0, a1
-; RV32I-NEXT: lui a1, 4112
-; RV32I-NEXT: addi a1, a1, 257
-; RV32I-NEXT: call __mulsi3
+; RV32I-NEXT: slli a1, a0, 8
+; RV32I-NEXT: add a1, a0, a1
+; RV32I-NEXT: slli a1, a1, 8
+; RV32I-NEXT: add a1, a0, a1
+; RV32I-NEXT: slli a1, a1, 8
+; RV32I-NEXT: add a0, a0, a1
; RV32I-NEXT: srli a0, a0, 24
-; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV64I-LABEL: test_ctpop_i32:
-; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: srli a1, a0, 1
-; RV64I-NEXT: lui a2, 349525
-; RV64I-NEXT: addiw a2, a2, 1365
-; RV64I-NEXT: and a1, a1, a2
-; RV64I-NEXT: sub a0, a0, a1
-; RV64I-NEXT: lui a1, 209715
-; RV64I-NEXT: addiw a1, a1, 819
-; RV64I-NEXT: and a2, a0, a1
-; RV64I-NEXT: srli a0, a0, 2
-; RV64I-NEXT: and a0, a0, a1
-; RV64I-NEXT: add a0, a2, a0
-; RV64I-NEXT: srli a1, a0, 4
-; RV64I-NEXT: add a0, a0, a1
-; RV64I-NEXT: lui a1, 61681
-; RV64I-NEXT: addiw a1, a1, -241
-; RV64I-NEXT: and a0, a0, a1
-; RV64I-NEXT: lui a1, 4112
-; RV64I-NEXT: addiw a1, a1, 257
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: srliw a0, a0, 24
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
-; RV64I-NEXT: ret
+; RV64NOZBB-LABEL: test_ctpop_i32:
+; RV64NOZBB: # %bb.0:
+; RV64NOZBB-NEXT: srli a1, a0, 1
+; RV64NOZBB-NEXT: lui a2, 349525
+; RV64NOZBB-NEXT: addiw a2, a2, 1365
+; RV64NOZBB-NEXT: and a1, a1, a2
+; RV64NOZBB-NEXT: sub a0, a0, a1
+; RV64NOZBB-NEXT: lui a1, 209715
+; RV64NOZBB-NEXT: addiw a1, a1, 819
+; RV64NOZBB-NEXT: and a2, a0, a1
+; RV64NOZBB-NEXT: srli a0, a0, 2
+; RV64NOZBB-NEXT: and a0, a0, a1
+; RV64NOZBB-NEXT: add a0, a2, a0
+; RV64NOZBB-NEXT: srli a1, a0, 4
+; RV64NOZBB-NEXT: add a0, a0, a1
+; RV64NOZBB-NEXT: lui a1, 61681
+; RV64NOZBB-NEXT: addi a2, a1, -241
+; RV64NOZBB-NEXT: and a2, a0, a2
+; RV64NOZBB-NEXT: slli a0, a0, 8
+; RV64NOZBB-NEXT: addi a1, a1, -256
+; RV64NOZBB-NEXT: and a0, a0, a1
+; RV64NOZBB-NEXT: add a0, a2, a0
+; RV64NOZBB-NEXT: slli a0, a0, 8
+; RV64NOZBB-NEXT: add a0, a2, a0
+; RV64NOZBB-NEXT: slli a0, a0, 8
+; RV64NOZBB-NEXT: add a0, a2, a0
+; RV64NOZBB-NEXT: srliw a0, a0, 24
+; RV64NOZBB-NEXT: ret
;
; RV32M-LABEL: test_ctpop_i32:
; RV32M: # %bb.0:
@@ -2542,30 +2446,6 @@ define i32 @test_ctpop_i32(i32 %a) nounwind {
; RV32M-NEXT: srli a0, a0, 24
; RV32M-NEXT: ret
;
-; RV64M-LABEL: test_ctpop_i32:
-; RV64M: # %bb.0:
-; RV64M-NEXT: srli a1, a0, 1
-; RV64M-NEXT: lui a2, 349525
-; RV64M-NEXT: addiw a2, a2, 1365
-; RV64M-NEXT: and a1, a1, a2
-; RV64M-NEXT: sub a0, a0, a1
-; RV64M-NEXT: lui a1, 209715
-; RV64M-NEXT: addiw a1, a1, 819
-; RV64M-NEXT: and a2, a0, a1
-; RV64M-NEXT: srli a0, a0, 2
-; RV64M-NEXT: and a0, a0, a1
-; RV64M-NEXT: add a0, a2, a0
-; RV64M-NEXT: srli a1, a0, 4
-; RV64M-NEXT: add a0, a0, a1
-; RV64M-NEXT: lui a1, 61681
-; RV64M-NEXT: addi a1, a1, -241
-; RV64M-NEXT: and a0, a0, a1
-; RV64M-NEXT: lui a1, 4112
-; RV64M-NEXT: addi a1, a1, 257
-; RV64M-NEXT: mul a0, a0, a1
-; RV64M-NEXT: srliw a0, a0, 24
-; RV64M-NEXT: ret
-;
; RV32ZBB-LABEL: test_ctpop_i32:
; RV32ZBB: # %bb.0:
; RV32ZBB-NEXT: cpop a0, a0
@@ -2578,8 +2458,6 @@ define i32 @test_ctpop_i32(i32 %a) nounwind {
;
; RV32XTHEADBB-LABEL: test_ctpop_i32:
; RV32XTHEADBB: # %bb.0:
-; RV32XTHEADBB-NEXT: addi sp, sp, -16
-; RV32XTHEADBB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32XTHEADBB-NEXT: srli a1, a0, 1
; RV32XTHEADBB-NEXT: lui a2, 349525
; RV32XTHEADBB-NEXT: addi a2, a2, 1365
@@ -2596,18 +2474,17 @@ define i32 @test_ctpop_i32(i32 %a) nounwind {
; RV32XTHEADBB-NEXT: lui a1, 61681
; RV32XTHEADBB-NEXT: addi a1, a1, -241
; RV32XTHEADBB-NEXT: and a0, a0, a1
-; RV32XTHEADBB-NEXT: lui a1, 4112
-; RV32XTHEADBB-NEXT: addi a1, a1, 257
-; RV32XTHEADBB-NEXT: call __mulsi3
+; RV32XTHEADBB-NEXT: slli a1, a0, 8
+; RV32XTHEADBB-NEXT: add a1, a0, a1
+; RV32XTHEADBB-NEXT: slli a1, a1, 8
+; RV32XTHEADBB-NEXT: add a1, a0, a1
+; RV32XTHEADBB-NEXT: slli a1, a1, 8
+; RV32XTHEADBB-NEXT: add a0, a0, a1
; RV32XTHEADBB-NEXT: srli a0, a0, 24
-; RV32XTHEADBB-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32XTHEADBB-NEXT: addi sp, sp, 16
; RV32XTHEADBB-NEXT: ret
;
; RV64XTHEADBB-LABEL: test_ctpop_i32:
; RV64XTHEADBB: # %bb.0:
-; RV64XTHEADBB-NEXT: addi sp, sp, -16
-; RV64XTHEADBB-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64XTHEADBB-NEXT: srli a1, a0, 1
; RV64XTHEADBB-NEXT: lui a2, 349525
; RV64XTHEADBB-NEXT: addiw a2, a2, 1365
@@ -2622,14 +2499,17 @@ define i32 @test_ctpop_i32(i32 %a) nounwind {
; RV64XTHEADBB-NEXT: srli a1, a0, 4
; RV64XTHEADBB-NEXT: add a0, a0, a1
; RV64XTHEADBB-NEXT: lui a1, 61681
-; RV64XTHEADBB-NEXT: addiw a1, a1, -241
+; RV64XTHEADBB-NEXT: addi a2, a1, -241
+; RV64XTHEADBB-NEXT: and a2, a0, a2
+; RV64XTHEADBB-NEXT: slli a0, a0, 8
+; RV64XTHEADBB-NEXT: addi a1, a1, -256
; RV64XTHEADBB-NEXT: and a0, a0, a1
-; RV64XTHEADBB-NEXT: lui a1, 4112
-; RV64XTHEADBB-NEXT: addiw a1, a1, 257
-; RV64XTHEADBB-NEXT: call __muldi3
+; RV64XTHEADBB-NEXT: add a0, a2, a0
+; RV64XTHEADBB-NEXT: slli a0, a0, 8
+; RV64XTHEADBB-NEXT: add a0, a2, a0
+; RV64XTHEADBB-NEXT: slli a0, a0, 8
+; RV64XTHEADBB-NEXT: add a0, a2, a0
; RV64XTHEADBB-NEXT: srliw a0, a0, 24
-; RV64XTHEADBB-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64XTHEADBB-NEXT: addi sp, sp, 16
; RV64XTHEADBB-NEXT: ret
%1 = call i32 @llvm.ctpop.i32(i32 %a)
ret i32 %1
@@ -2638,65 +2518,52 @@ define i32 @test_ctpop_i32(i32 %a) nounwind {
define i64 @test_ctpop_i64(i64 %a) nounwind {
; RV32I-LABEL: test_ctpop_i64:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: srli a0, a1, 1
-; RV32I-NEXT: lui a2, 349525
-; RV32I-NEXT: addi s2, a2, 1365
-; RV32I-NEXT: and a0, a0, s2
-; RV32I-NEXT: sub a1, a1, a0
-; RV32I-NEXT: lui a0, 209715
-; RV32I-NEXT: addi s3, a0, 819
-; RV32I-NEXT: and a0, a1, s3
+; RV32I-NEXT: srli a2, a1, 1
+; RV32I-NEXT: lui a3, 349525
+; RV32I-NEXT: addi a3, a3, 1365
+; RV32I-NEXT: and a2, a2, a3
+; RV32I-NEXT: sub a1, a1, a2
+; RV32I-NEXT: lui a2, 209715
+; RV32I-NEXT: addi a2, a2, 819
+; RV32I-NEXT: and a4, a1, a2
; RV32I-NEXT: srli a1, a1, 2
-; RV32I-NEXT: and a1, a1, s3
-; RV32I-NEXT: add a0, a0, a1
-; RV32I-NEXT: srli a1, a0, 4
-; RV32I-NEXT: add a0, a0, a1
-; RV32I-NEXT: lui a1, 61681
-; RV32I-NEXT: addi s4, a1, -241
-; RV32I-NEXT: and a0, a0, s4
-; RV32I-NEXT: lui a1, 4112
-; RV32I-NEXT: addi s1, a1, 257
-; RV32I-NEXT: mv a1, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: srli s5, a0, 24
-; RV32I-NEXT: srli a0, s0, 1
-; RV32I-NEXT: and a0, a0, s2
-; RV32I-NEXT: sub s0, s0, a0
-; RV32I-NEXT: and a0, s0, s3
-; RV32I-NEXT: srli s0, s0, 2
-; RV32I-NEXT: and a1, s0, s3
-; RV32I-NEXT: add a0, a0, a1
-; RV32I-NEXT: srli a1, a0, 4
-; RV32I-NEXT: add a0, a0, a1
-; RV32I-NEXT: and a0, a0, s4
-; RV32I-NEXT: mv a1, s1
-; RV32I-NEXT: call __mulsi3
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: add a1, a4, a1
+; RV32I-NEXT: srli a4, a1, 4
+; RV32I-NEXT: add a1, a1, a4
+; RV32I-NEXT: lui a4, 61681
+; RV32I-NEXT: addi a4, a4, -241
+; RV32I-NEXT: and a1, a1, a4
+; RV32I-NEXT: slli a5, a1, 8
+; RV32I-NEXT: add a5, a1, a5
+; RV32I-NEXT: slli a5, a5, 8
+; RV32I-NEXT: add a5, a1, a5
+; RV32I-NEXT: slli a5, a5, 8
+; RV32I-NEXT: add a1, a1, a5
+; RV32I-NEXT: srli a1, a1, 24
+; RV32I-NEXT: srli a5, a0, 1
+; RV32I-NEXT: and a3, a5, a3
+; RV32I-NEXT: sub a0, a0, a3
+; RV32I-NEXT: and a3, a0, a2
+; RV32I-NEXT: srli a0, a0, 2
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: add a0, a3, a0
+; RV32I-NEXT: srli a2, a0, 4
+; RV32I-NEXT: add a0, a0, a2
+; RV32I-NEXT: and a0, a0, a4
+; RV32I-NEXT: slli a2, a0, 8
+; RV32I-NEXT: add a2, a0, a2
+; RV32I-NEXT: slli a2, a2, 8
+; RV32I-NEXT: add a2, a0, a2
+; RV32I-NEXT: slli a2, a2, 8
+; RV32I-NEXT: add a0, a0, a2
; RV32I-NEXT: srli a0, a0, 24
-; RV32I-NEXT: add a0, a0, s5
+; RV32I-NEXT: add a0, a0, a1
; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_ctpop_i64:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: srli a1, a0, 1
; RV64I-NEXT: lui a2, 349525
; RV64I-NEXT: addiw a2, a2, 1365
@@ -2719,14 +2586,21 @@ define i64 @test_ctpop_i64(i64 %a) nounwind {
; RV64I-NEXT: slli a2, a1, 32
; RV64I-NEXT: add a1, a1, a2
; RV64I-NEXT: and a0, a0, a1
-; RV64I-NEXT: lui a1, 4112
-; RV64I-NEXT: addiw a1, a1, 257
-; RV64I-NEXT: slli a2, a1, 32
-; RV64I-NEXT: add a1, a1, a2
-; RV64I-NEXT: call __muldi3
+; RV64I-NEXT: slli a1, a0, 8
+; RV64I-NEXT: add a1, a0, a1
+; RV64I-NEXT: slli a1, a1, 8
+; RV64I-NEXT: add a1, a0, a1
+; RV64I-NEXT: slli a1, a1, 8
+; RV64I-NEXT: add a1, a0, a1
+; RV64I-NEXT: slli a1, a1, 8
+; RV64I-NEXT: add a1, a0, a1
+; RV64I-NEXT: slli a1, a1, 8
+; RV64I-NEXT: add a1, a0, a1
+; RV64I-NEXT: slli a1, a1, 8
+; RV64I-NEXT: add a1, a0, a1
+; RV64I-NEXT: slli a1, a1, 8
+; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: srli a0, a0, 56
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
; RV32M-LABEL: test_ctpop_i64:
@@ -2814,65 +2688,52 @@ define i64 @test_ctpop_i64(i64 %a) nounwind {
;
; RV32XTHEADBB-LABEL: test_ctpop_i64:
; RV32XTHEADBB: # %bb.0:
-; RV32XTHEADBB-NEXT: addi sp, sp, -32
-; RV32XTHEADBB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32XTHEADBB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32XTHEADBB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32XTHEADBB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32XTHEADBB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32XTHEADBB-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32XTHEADBB-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32XTHEADBB-NEXT: mv s0, a0
-; RV32XTHEADBB-NEXT: srli a0, a1, 1
-; RV32XTHEADBB-NEXT: lui a2, 349525
-; RV32XTHEADBB-NEXT: addi s2, a2, 1365
-; RV32XTHEADBB-NEXT: and a0, a0, s2
-; RV32XTHEADBB-NEXT: sub a1, a1, a0
-; RV32XTHEADBB-NEXT: lui a0, 209715
-; RV32XTHEADBB-NEXT: addi s3, a0, 819
-; RV32XTHEADBB-NEXT: and a0, a1, s3
+; RV32XTHEADBB-NEXT: srli a2, a1, 1
+; RV32XTHEADBB-NEXT: lui a3, 349525
+; RV32XTHEADBB-NEXT: addi a3, a3, 1365
+; RV32XTHEADBB-NEXT: and a2, a2, a3
+; RV32XTHEADBB-NEXT: sub a1, a1, a2
+; RV32XTHEADBB-NEXT: lui a2, 209715
+; RV32XTHEADBB-NEXT: addi a2, a2, 819
+; RV32XTHEADBB-NEXT: and a4, a1, a2
; RV32XTHEADBB-NEXT: srli a1, a1, 2
-; RV32XTHEADBB-NEXT: and a1, a1, s3
-; RV32XTHEADBB-NEXT: add a0, a0, a1
-; RV32XTHEADBB-NEXT: srli a1, a0, 4
-; RV32XTHEADBB-NEXT: add a0, a0, a1
-; RV32XTHEADBB-NEXT: lui a1, 61681
-; RV32XTHEADBB-NEXT: addi s4, a1, -241
-; RV32XTHEADBB-NEXT: and a0, a0, s4
-; RV32XTHEADBB-NEXT: lui a1, 4112
-; RV32XTHEADBB-NEXT: addi s1, a1, 257
-; RV32XTHEADBB-NEXT: mv a1, s1
-; RV32XTHEADBB-NEXT: call __mulsi3
-; RV32XTHEADBB-NEXT: srli s5, a0, 24
-; RV32XTHEADBB-NEXT: srli a0, s0, 1
-; RV32XTHEADBB-NEXT: and a0, a0, s2
-; RV32XTHEADBB-NEXT: sub s0, s0, a0
-; RV32XTHEADBB-NEXT: and a0, s0, s3
-; RV32XTHEADBB-NEXT: srli s0, s0, 2
-; RV32XTHEADBB-NEXT: and a1, s0, s3
-; RV32XTHEADBB-NEXT: add a0, a0, a1
-; RV32XTHEADBB-NEXT: srli a1, a0, 4
-; RV32XTHEADBB-NEXT: add a0, a0, a1
-; RV32XTHEADBB-NEXT: and a0, a0, s4
-; RV32XTHEADBB-NEXT: mv a1, s1
-; RV32XTHEADBB-NEXT: call __mulsi3
+; RV32XTHEADBB-NEXT: and a1, a1, a2
+; RV32XTHEADBB-NEXT: add a1, a4, a1
+; RV32XTHEADBB-NEXT: srli a4, a1, 4
+; RV32XTHEADBB-NEXT: add a1, a1, a4
+; RV32XTHEADBB-NEXT: lui a4, 61681
+; RV32XTHEADBB-NEXT: addi a4, a4, -241
+; RV32XTHEADBB-NEXT: and a1, a1, a4
+; RV32XTHEADBB-NEXT: slli a5, a1, 8
+; RV32XTHEADBB-NEXT: add a5, a1, a5
+; RV32XTHEADBB-NEXT: slli a5, a5, 8
+; RV32XTHEADBB-NEXT: add a5, a1, a5
+; RV32XTHEADBB-NEXT: slli a5, a5, 8
+; RV32XTHEADBB-NEXT: add a1, a1, a5
+; RV32XTHEADBB-NEXT: srli a1, a1, 24
+; RV32XTHEADBB-NEXT: srli a5, a0, 1
+; RV32XTHEADBB-NEXT: and a3, a5, a3
+; RV32XTHEADBB-NEXT: sub a0, a0, a3
+; RV32XTHEADBB-NEXT: and a3, a0, a2
+; RV32XTHEADBB-NEXT: srli a0, a0, 2
+; RV32XTHEADBB-NEXT: and a0, a0, a2
+; RV32XTHEADBB-NEXT: add a0, a3, a0
+; RV32XTHEADBB-NEXT: srli a2, a0, 4
+; RV32XTHEADBB-NEXT: add a0, a0, a2
+; RV32XTHEADBB-NEXT: and a0, a0, a4
+; RV32XTHEADBB-NEXT: slli a2, a0, 8
+; RV32XTHEADBB-NEXT: add a2, a0, a2
+; RV32XTHEADBB-NEXT: slli a2, a2, 8
+; RV32XTHEADBB-NEXT: add a2, a0, a2
+; RV32XTHEADBB-NEXT: slli a2, a2, 8
+; RV32XTHEADBB-NEXT: add a0, a0, a2
; RV32XTHEADBB-NEXT: srli a0, a0, 24
-; RV32XTHEADBB-NEXT: add a0, a0, s5
+; RV32XTHEADBB-NEXT: add a0, a0, a1
; RV32XTHEADBB-NEXT: li a1, 0
-; RV32XTHEADBB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32XTHEADBB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32XTHEADBB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32XTHEADBB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32XTHEADBB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32XTHEADBB-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32XTHEADBB-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32XTHEADBB-NEXT: addi sp, sp, 32
; RV32XTHEADBB-NEXT: ret
;
; RV64XTHEADBB-LABEL: test_ctpop_i64:
; RV64XTHEADBB: # %bb.0:
-; RV64XTHEADBB-NEXT: addi sp, sp, -16
-; RV64XTHEADBB-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64XTHEADBB-NEXT: srli a1, a0, 1
; RV64XTHEADBB-NEXT: lui a2, 349525
; RV64XTHEADBB-NEXT: addiw a2, a2, 1365
@@ -2895,14 +2756,21 @@ define i64 @test_ctpop_i64(i64 %a) nounwind {
; RV64XTHEADBB-NEXT: slli a2, a1, 32
; RV64XTHEADBB-NEXT: add a1, a1, a2
; RV64XTHEADBB-NEXT: and a0, a0, a1
-; RV64XTHEADBB-NEXT: lui a1, 4112
-; RV64XTHEADBB-NEXT: addiw a1, a1, 257
-; RV64XTHEADBB-NEXT: slli a2, a1, 32
-; RV64XTHEADBB-NEXT: add a1, a1, a2
-; RV64XTHEADBB-NEXT: call __muldi3
+; RV64XTHEADBB-NEXT: slli a1, a0, 8
+; RV64XTHEADBB-NEXT: add a1, a0, a1
+; RV64XTHEADBB-NEXT: slli a1, a1, 8
+; RV64XTHEADBB-NEXT: add a1, a0, a1
+; RV64XTHEADBB-NEXT: slli a1, a1, 8
+; RV64XTHEADBB-NEXT: add a1, a0, a1
+; RV64XTHEADBB-NEXT: slli a1, a1, 8
+; RV64XTHEADBB-NEXT: add a1, a0, a1
+; RV64XTHEADBB-NEXT: slli a1, a1, 8
+; RV64XTHEADBB-NEXT: add a1, a0, a1
+; RV64XTHEADBB-NEXT: slli a1, a1, 8
+; RV64XTHEADBB-NEXT: add a1, a0, a1
+; RV64XTHEADBB-NEXT: slli a1, a1, 8
+; RV64XTHEADBB-NEXT: add a0, a0, a1
; RV64XTHEADBB-NEXT: srli a0, a0, 56
-; RV64XTHEADBB-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64XTHEADBB-NEXT: addi sp, sp, 16
; RV64XTHEADBB-NEXT: ret
%1 = call i64 @llvm.ctpop.i64(i64 %a)
ret i64 %1
diff --git a/llvm/test/CodeGen/RISCV/ctz_zero_return_test.ll b/llvm/test/CodeGen/RISCV/ctz_zero_return_test.ll
index adf614435b31d7..60da0cc242a765 100644
--- a/llvm/test/CodeGen/RISCV/ctz_zero_return_test.ll
+++ b/llvm/test/CodeGen/RISCV/ctz_zero_return_test.ll
@@ -602,19 +602,16 @@ define signext i32 @ctlz(i64 %b) nounwind {
;
; RV32I-LABEL: ctlz:
; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s6, 0(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s0, a1
-; RV32I-NEXT: mv s2, a0
-; RV32I-NEXT: srli a0, a1, 1
-; RV32I-NEXT: or a0, a1, a0
+; RV32I-NEXT: lui a2, 349525
+; RV32I-NEXT: addi a4, a2, 1365
+; RV32I-NEXT: lui a2, 209715
+; RV32I-NEXT: addi a3, a2, 819
+; RV32I-NEXT: lui a2, 61681
+; RV32I-NEXT: addi a2, a2, -241
+; RV32I-NEXT: bnez a1, .LBB7_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: srli a1, a0, 1
+; RV32I-NEXT: or a0, a0, a1
; RV32I-NEXT: srli a1, a0, 2
; RV32I-NEXT: or a0, a0, a1
; RV32I-NEXT: srli a1, a0, 4
@@ -625,28 +622,28 @@ define signext i32 @ctlz(i64 %b) nounwind {
; RV32I-NEXT: or a0, a0, a1
; RV32I-NEXT: not a0, a0
; RV32I-NEXT: srli a1, a0, 1
-; RV32I-NEXT: lui a2, 349525
-; RV32I-NEXT: addi s4, a2, 1365
-; RV32I-NEXT: and a1, a1, s4
+; RV32I-NEXT: and a1, a1, a4
; RV32I-NEXT: sub a0, a0, a1
-; RV32I-NEXT: lui a1, 209715
-; RV32I-NEXT: addi s5, a1, 819
-; RV32I-NEXT: and a1, a0, s5
+; RV32I-NEXT: and a1, a0, a3
; RV32I-NEXT: srli a0, a0, 2
-; RV32I-NEXT: and a0, a0, s5
+; RV32I-NEXT: and a0, a0, a3
; RV32I-NEXT: add a0, a1, a0
; RV32I-NEXT: srli a1, a0, 4
; RV32I-NEXT: add a0, a0, a1
-; RV32I-NEXT: lui a1, 61681
-; RV32I-NEXT: addi s6, a1, -241
-; RV32I-NEXT: and a0, a0, s6
-; RV32I-NEXT: lui a1, 4112
-; RV32I-NEXT: addi s3, a1, 257
-; RV32I-NEXT: mv a1, s3
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: srli a0, s2, 1
-; RV32I-NEXT: or a0, s2, a0
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: slli a1, a0, 8
+; RV32I-NEXT: add a1, a0, a1
+; RV32I-NEXT: slli a1, a1, 8
+; RV32I-NEXT: add a1, a0, a1
+; RV32I-NEXT: slli a1, a1, 8
+; RV32I-NEXT: add a0, a0, a1
+; RV32I-NEXT: srli a0, a0, 24
+; RV32I-NEXT: addi a0, a0, 32
+; RV32I-NEXT: andi a0, a0, 63
+; RV32I-NEXT: ret
+; RV32I-NEXT: .LBB7_2:
+; RV32I-NEXT: srli a0, a1, 1
+; RV32I-NEXT: or a0, a1, a0
; RV32I-NEXT: srli a1, a0, 2
; RV32I-NEXT: or a0, a0, a1
; RV32I-NEXT: srli a1, a0, 4
@@ -657,41 +654,27 @@ define signext i32 @ctlz(i64 %b) nounwind {
; RV32I-NEXT: or a0, a0, a1
; RV32I-NEXT: not a0, a0
; RV32I-NEXT: srli a1, a0, 1
-; RV32I-NEXT: and a1, a1, s4
+; RV32I-NEXT: and a1, a1, a4
; RV32I-NEXT: sub a0, a0, a1
-; RV32I-NEXT: and a1, a0, s5
+; RV32I-NEXT: and a1, a0, a3
; RV32I-NEXT: srli a0, a0, 2
-; RV32I-NEXT: and a0, a0, s5
+; RV32I-NEXT: and a0, a0, a3
; RV32I-NEXT: add a0, a1, a0
; RV32I-NEXT: srli a1, a0, 4
; RV32I-NEXT: add a0, a0, a1
-; RV32I-NEXT: and a0, a0, s6
-; RV32I-NEXT: mv a1, s3
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: bnez s0, .LBB7_2
-; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: slli a1, a0, 8
+; RV32I-NEXT: add a1, a0, a1
+; RV32I-NEXT: slli a1, a1, 8
+; RV32I-NEXT: add a1, a0, a1
+; RV32I-NEXT: slli a1, a1, 8
+; RV32I-NEXT: add a0, a0, a1
; RV32I-NEXT: srli a0, a0, 24
-; RV32I-NEXT: addi s1, a0, 32
-; RV32I-NEXT: j .LBB7_3
-; RV32I-NEXT: .LBB7_2:
-; RV32I-NEXT: srli s1, s1, 24
-; RV32I-NEXT: .LBB7_3: # %entry
-; RV32I-NEXT: andi a0, s1, 63
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s6, 0(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: andi a0, a0, 63
; RV32I-NEXT: ret
;
; RV64I-LABEL: ctlz:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: srli a1, a0, 1
; RV64I-NEXT: or a0, a0, a1
; RV64I-NEXT: srli a1, a0, 2
@@ -727,15 +710,22 @@ define signext i32 @ctlz(i64 %b) nounwind {
; RV64I-NEXT: slli a2, a1, 32
; RV64I-NEXT: add a1, a1, a2
; RV64I-NEXT: and a0, a0, a1
-; RV64I-NEXT: lui a1, 4112
-; RV64I-NEXT: addiw a1, a1, 257
-; RV64I-NEXT: slli a2, a1, 32
-; RV64I-NEXT: add a1, a1, a2
-; RV64I-NEXT: call __muldi3
+; RV64I-NEXT: slli a1, a0, 8
+; RV64I-NEXT: add a1, a0, a1
+; RV64I-NEXT: slli a1, a1, 8
+; RV64I-NEXT: add a1, a0, a1
+; RV64I-NEXT: slli a1, a1, 8
+; RV64I-NEXT: add a1, a0, a1
+; RV64I-NEXT: slli a1, a1, 8
+; RV64I-NEXT: add a1, a0, a1
+; RV64I-NEXT: slli a1, a1, 8
+; RV64I-NEXT: add a1, a0, a1
+; RV64I-NEXT: slli a1, a1, 8
+; RV64I-NEXT: add a1, a0, a1
+; RV64I-NEXT: slli a1, a1, 8
+; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: slli a0, a0, 2
; RV64I-NEXT: srli a0, a0, 58
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rv32xtheadbb.ll b/llvm/test/CodeGen/RISCV/rv32xtheadbb.ll
index 3731b9719445ea..a90caf2e431f69 100644
--- a/llvm/test/CodeGen/RISCV/rv32xtheadbb.ll
+++ b/llvm/test/CodeGen/RISCV/rv32xtheadbb.ll
@@ -11,8 +11,6 @@ define i32 @ctlz_i32(i32 %a) nounwind {
; RV32I: # %bb.0:
; RV32I-NEXT: beqz a0, .LBB0_2
; RV32I-NEXT: # %bb.1: # %cond.false
-; RV32I-NEXT: addi sp, sp, -16
-; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: srli a1, a0, 1
; RV32I-NEXT: or a0, a0, a1
; RV32I-NEXT: srli a1, a0, 2
@@ -40,12 +38,13 @@ define i32 @ctlz_i32(i32 %a) nounwind {
; RV32I-NEXT: lui a1, 61681
; RV32I-NEXT: addi a1, a1, -241
; RV32I-NEXT: and a0, a0, a1
-; RV32I-NEXT: lui a1, 4112
-; RV32I-NEXT: addi a1, a1, 257
-; RV32I-NEXT: call __mulsi3
+; RV32I-NEXT: slli a1, a0, 8
+; RV32I-NEXT: add a1, a0, a1
+; RV32I-NEXT: slli a1, a1, 8
+; RV32I-NEXT: add a1, a0, a1
+; RV32I-NEXT: slli a1, a1, 8
+; RV32I-NEXT: add a0, a0, a1
; RV32I-NEXT: srli a0, a0, 24
-; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
; RV32I-NEXT: .LBB0_2:
; RV32I-NEXT: li a0, 32
@@ -64,19 +63,16 @@ declare i64 @llvm.ctlz.i64(i64, i1)
define i64 @ctlz_i64(i64 %a) nounwind {
; RV32I-LABEL: ctlz_i64:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s6, 0(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s0, a1
-; RV32I-NEXT: mv s2, a0
-; RV32I-NEXT: srli a0, a1, 1
-; RV32I-NEXT: or a0, a1, a0
+; RV32I-NEXT: lui a2, 349525
+; RV32I-NEXT: addi a4, a2, 1365
+; RV32I-NEXT: lui a2, 209715
+; RV32I-NEXT: addi a3, a2, 819
+; RV32I-NEXT: lui a2, 61681
+; RV32I-NEXT: addi a2, a2, -241
+; RV32I-NEXT: bnez a1, .LBB1_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: srli a1, a0, 1
+; RV32I-NEXT: or a0, a0, a1
; RV32I-NEXT: srli a1, a0, 2
; RV32I-NEXT: or a0, a0, a1
; RV32I-NEXT: srli a1, a0, 4
@@ -87,28 +83,28 @@ define i64 @ctlz_i64(i64 %a) nounwind {
; RV32I-NEXT: or a0, a0, a1
; RV32I-NEXT: not a0, a0
; RV32I-NEXT: srli a1, a0, 1
-; RV32I-NEXT: lui a2, 349525
-; RV32I-NEXT: addi s4, a2, 1365
-; RV32I-NEXT: and a1, a1, s4
+; RV32I-NEXT: and a1, a1, a4
; RV32I-NEXT: sub a0, a0, a1
-; RV32I-NEXT: lui a1, 209715
-; RV32I-NEXT: addi s5, a1, 819
-; RV32I-NEXT: and a1, a0, s5
+; RV32I-NEXT: and a1, a0, a3
; RV32I-NEXT: srli a0, a0, 2
-; RV32I-NEXT: and a0, a0, s5
+; RV32I-NEXT: and a0, a0, a3
; RV32I-NEXT: add a0, a1, a0
; RV32I-NEXT: srli a1, a0, 4
; RV32I-NEXT: add a0, a0, a1
-; RV32I-NEXT: lui a1, 61681
-; RV32I-NEXT: addi s6, a1, -241
-; RV32I-NEXT: and a0, a0, s6
-; RV32I-NEXT: lui a1, 4112
-; RV32I-NEXT: addi s3, a1, 257
-; RV32I-NEXT: mv a1, s3
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: srli a0, s2, 1
-; RV32I-NEXT: or a0, s2, a0
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: slli a1, a0, 8
+; RV32I-NEXT: add a1, a0, a1
+; RV32I-NEXT: slli a1, a1, 8
+; RV32I-NEXT: add a1, a0, a1
+; RV32I-NEXT: slli a1, a1, 8
+; RV32I-NEXT: add a0, a0, a1
+; RV32I-NEXT: srli a0, a0, 24
+; RV32I-NEXT: addi a0, a0, 32
+; RV32I-NEXT: li a1, 0
+; RV32I-NEXT: ret
+; RV32I-NEXT: .LBB1_2:
+; RV32I-NEXT: srli a0, a1, 1
+; RV32I-NEXT: or a0, a1, a0
; RV32I-NEXT: srli a1, a0, 2
; RV32I-NEXT: or a0, a0, a1
; RV32I-NEXT: srli a1, a0, 4
@@ -119,35 +115,23 @@ define i64 @ctlz_i64(i64 %a) nounwind {
; RV32I-NEXT: or a0, a0, a1
; RV32I-NEXT: not a0, a0
; RV32I-NEXT: srli a1, a0, 1
-; RV32I-NEXT: and a1, a1, s4
+; RV32I-NEXT: and a1, a1, a4
; RV32I-NEXT: sub a0, a0, a1
-; RV32I-NEXT: and a1, a0, s5
+; RV32I-NEXT: and a1, a0, a3
; RV32I-NEXT: srli a0, a0, 2
-; RV32I-NEXT: and a0, a0, s5
+; RV32I-NEXT: and a0, a0, a3
; RV32I-NEXT: add a0, a1, a0
; RV32I-NEXT: srli a1, a0, 4
; RV32I-NEXT: add a0, a0, a1
-; RV32I-NEXT: and a0, a0, s6
-; RV32I-NEXT: mv a1, s3
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: bnez s0, .LBB1_2
-; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: slli a1, a0, 8
+; RV32I-NEXT: add a1, a0, a1
+; RV32I-NEXT: slli a1, a1, 8
+; RV32I-NEXT: add a1, a0, a1
+; RV32I-NEXT: slli a1, a1, 8
+; RV32I-NEXT: add a0, a0, a1
; RV32I-NEXT: srli a0, a0, 24
-; RV32I-NEXT: addi a0, a0, 32
-; RV32I-NEXT: j .LBB1_3
-; RV32I-NEXT: .LBB1_2:
-; RV32I-NEXT: srli a0, s1, 24
-; RV32I-NEXT: .LBB1_3:
; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s6, 0(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
; RV32XTHEADBB-LABEL: ctlz_i64:
diff --git a/llvm/test/CodeGen/RISCV/rv32zbb.ll b/llvm/test/CodeGen/RISCV/rv32zbb.ll
index 36c107061795c4..97e7344e994d1f 100644
--- a/llvm/test/CodeGen/RISCV/rv32zbb.ll
+++ b/llvm/test/CodeGen/RISCV/rv32zbb.ll
@@ -11,8 +11,6 @@ define i32 @ctlz_i32(i32 %a) nounwind {
; RV32I: # %bb.0:
; RV32I-NEXT: beqz a0, .LBB0_2
; RV32I-NEXT: # %bb.1: # %cond.false
-; RV32I-NEXT: addi sp, sp, -16
-; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: srli a1, a0, 1
; RV32I-NEXT: or a0, a0, a1
; RV32I-NEXT: srli a1, a0, 2
@@ -40,12 +38,13 @@ define i32 @ctlz_i32(i32 %a) nounwind {
; RV32I-NEXT: lui a1, 61681
; RV32I-NEXT: addi a1, a1, -241
; RV32I-NEXT: and a0, a0, a1
-; RV32I-NEXT: lui a1, 4112
-; RV32I-NEXT: addi a1, a1, 257
-; RV32I-NEXT: call __mulsi3
+; RV32I-NEXT: slli a1, a0, 8
+; RV32I-NEXT: add a1, a0, a1
+; RV32I-NEXT: slli a1, a1, 8
+; RV32I-NEXT: add a1, a0, a1
+; RV32I-NEXT: slli a1, a1, 8
+; RV32I-NEXT: add a0, a0, a1
; RV32I-NEXT: srli a0, a0, 24
-; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
; RV32I-NEXT: .LBB0_2:
; RV32I-NEXT: li a0, 32
@@ -64,19 +63,16 @@ declare i64 @llvm.ctlz.i64(i64, i1)
define i64 @ctlz_i64(i64 %a) nounwind {
; RV32I-LABEL: ctlz_i64:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s6, 0(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s0, a1
-; RV32I-NEXT: mv s2, a0
-; RV32I-NEXT: srli a0, a1, 1
-; RV32I-NEXT: or a0, a1, a0
+; RV32I-NEXT: lui a2, 349525
+; RV32I-NEXT: addi a4, a2, 1365
+; RV32I-NEXT: lui a2, 209715
+; RV32I-NEXT: addi a3, a2, 819
+; RV32I-NEXT: lui a2, 61681
+; RV32I-NEXT: addi a2, a2, -241
+; RV32I-NEXT: bnez a1, .LBB1_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: srli a1, a0, 1
+; RV32I-NEXT: or a0, a0, a1
; RV32I-NEXT: srli a1, a0, 2
; RV32I-NEXT: or a0, a0, a1
; RV32I-NEXT: srli a1, a0, 4
@@ -87,28 +83,28 @@ define i64 @ctlz_i64(i64 %a) nounwind {
; RV32I-NEXT: or a0, a0, a1
; RV32I-NEXT: not a0, a0
; RV32I-NEXT: srli a1, a0, 1
-; RV32I-NEXT: lui a2, 349525
-; RV32I-NEXT: addi s4, a2, 1365
-; RV32I-NEXT: and a1, a1, s4
+; RV32I-NEXT: and a1, a1, a4
; RV32I-NEXT: sub a0, a0, a1
-; RV32I-NEXT: lui a1, 209715
-; RV32I-NEXT: addi s5, a1, 819
-; RV32I-NEXT: and a1, a0, s5
+; RV32I-NEXT: and a1, a0, a3
; RV32I-NEXT: srli a0, a0, 2
-; RV32I-NEXT: and a0, a0, s5
+; RV32I-NEXT: and a0, a0, a3
; RV32I-NEXT: add a0, a1, a0
; RV32I-NEXT: srli a1, a0, 4
; RV32I-NEXT: add a0, a0, a1
-; RV32I-NEXT: lui a1, 61681
-; RV32I-NEXT: addi s6, a1, -241
-; RV32I-NEXT: and a0, a0, s6
-; RV32I-NEXT: lui a1, 4112
-; RV32I-NEXT: addi s3, a1, 257
-; RV32I-NEXT: mv a1, s3
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: srli a0, s2, 1
-; RV32I-NEXT: or a0, s2, a0
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: slli a1, a0, 8
+; RV32I-NEXT: add a1, a0, a1
+; RV32I-NEXT: slli a1, a1, 8
+; RV32I-NEXT: add a1, a0, a1
+; RV32I-NEXT: slli a1, a1, 8
+; RV32I-NEXT: add a0, a0, a1
+; RV32I-NEXT: srli a0, a0, 24
+; RV32I-NEXT: addi a0, a0, 32
+; RV32I-NEXT: li a1, 0
+; RV32I-NEXT: ret
+; RV32I-NEXT: .LBB1_2:
+; RV32I-NEXT: srli a0, a1, 1
+; RV32I-NEXT: or a0, a1, a0
; RV32I-NEXT: srli a1, a0, 2
; RV32I-NEXT: or a0, a0, a1
; RV32I-NEXT: srli a1, a0, 4
@@ -119,35 +115,23 @@ define i64 @ctlz_i64(i64 %a) nounwind {
; RV32I-NEXT: or a0, a0, a1
; RV32I-NEXT: not a0, a0
; RV32I-NEXT: srli a1, a0, 1
-; RV32I-NEXT: and a1, a1, s4
+; RV32I-NEXT: and a1, a1, a4
; RV32I-NEXT: sub a0, a0, a1
-; RV32I-NEXT: and a1, a0, s5
+; RV32I-NEXT: and a1, a0, a3
; RV32I-NEXT: srli a0, a0, 2
-; RV32I-NEXT: and a0, a0, s5
+; RV32I-NEXT: and a0, a0, a3
; RV32I-NEXT: add a0, a1, a0
; RV32I-NEXT: srli a1, a0, 4
; RV32I-NEXT: add a0, a0, a1
-; RV32I-NEXT: and a0, a0, s6
-; RV32I-NEXT: mv a1, s3
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: bnez s0, .LBB1_2
-; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: slli a1, a0, 8
+; RV32I-NEXT: add a1, a0, a1
+; RV32I-NEXT: slli a1, a1, 8
+; RV32I-NEXT: add a1, a0, a1
+; RV32I-NEXT: slli a1, a1, 8
+; RV32I-NEXT: add a0, a0, a1
; RV32I-NEXT: srli a0, a0, 24
-; RV32I-NEXT: addi a0, a0, 32
-; RV32I-NEXT: j .LBB1_3
-; RV32I-NEXT: .LBB1_2:
-; RV32I-NEXT: srli a0, s1, 24
-; RV32I-NEXT: .LBB1_3:
; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s6, 0(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
; RV32ZBB-LABEL: ctlz_i64:
@@ -275,8 +259,6 @@ declare i32 @llvm.ctpop.i32(i32)
define i32 @ctpop_i32(i32 %a) nounwind {
; RV32I-LABEL: ctpop_i32:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -16
-; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: srli a1, a0, 1
; RV32I-NEXT: lui a2, 349525
; RV32I-NEXT: addi a2, a2, 1365
@@ -293,12 +275,13 @@ define i32 @ctpop_i32(i32 %a) nounwind {
; RV32I-NEXT: lui a1, 61681
; RV32I-NEXT: addi a1, a1, -241
; RV32I-NEXT: and a0, a0, a1
-; RV32I-NEXT: lui a1, 4112
-; RV32I-NEXT: addi a1, a1, 257
-; RV32I-NEXT: call __mulsi3
+; RV32I-NEXT: slli a1, a0, 8
+; RV32I-NEXT: add a1, a0, a1
+; RV32I-NEXT: slli a1, a1, 8
+; RV32I-NEXT: add a1, a0, a1
+; RV32I-NEXT: slli a1, a1, 8
+; RV32I-NEXT: add a0, a0, a1
; RV32I-NEXT: srli a0, a0, 24
-; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
; RV32ZBB-LABEL: ctpop_i32:
@@ -390,58 +373,46 @@ declare <2 x i32> @llvm.ctpop.v2i32(<2 x i32>)
define <2 x i32> @ctpop_v2i32(<2 x i32> %a) nounwind {
; RV32I-LABEL: ctpop_v2i32:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s0, a1
-; RV32I-NEXT: srli a1, a0, 1
-; RV32I-NEXT: lui a2, 349525
-; RV32I-NEXT: addi s3, a2, 1365
-; RV32I-NEXT: and a1, a1, s3
-; RV32I-NEXT: sub a0, a0, a1
-; RV32I-NEXT: lui a1, 209715
-; RV32I-NEXT: addi s4, a1, 819
-; RV32I-NEXT: and a1, a0, s4
+; RV32I-NEXT: srli a2, a0, 1
+; RV32I-NEXT: lui a3, 349525
+; RV32I-NEXT: addi a3, a3, 1365
+; RV32I-NEXT: and a2, a2, a3
+; RV32I-NEXT: sub a0, a0, a2
+; RV32I-NEXT: lui a2, 209715
+; RV32I-NEXT: addi a2, a2, 819
+; RV32I-NEXT: and a4, a0, a2
; RV32I-NEXT: srli a0, a0, 2
-; RV32I-NEXT: and a0, a0, s4
-; RV32I-NEXT: add a0, a1, a0
-; RV32I-NEXT: srli a1, a0, 4
-; RV32I-NEXT: add a0, a0, a1
-; RV32I-NEXT: lui a1, 61681
-; RV32I-NEXT: addi s5, a1, -241
-; RV32I-NEXT: and a0, a0, s5
-; RV32I-NEXT: lui a1, 4112
-; RV32I-NEXT: addi s1, a1, 257
-; RV32I-NEXT: mv a1, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: srli s2, a0, 24
-; RV32I-NEXT: srli a0, s0, 1
-; RV32I-NEXT: and a0, a0, s3
-; RV32I-NEXT: sub s0, s0, a0
-; RV32I-NEXT: and a0, s0, s4
-; RV32I-NEXT: srli s0, s0, 2
-; RV32I-NEXT: and a1, s0, s4
-; RV32I-NEXT: add a0, a0, a1
-; RV32I-NEXT: srli a1, a0, 4
-; RV32I-NEXT: add a0, a0, a1
-; RV32I-NEXT: and a0, a0, s5
-; RV32I-NEXT: mv a1, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: srli a1, a0, 24
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: add a0, a4, a0
+; RV32I-NEXT: srli a4, a0, 4
+; RV32I-NEXT: add a0, a0, a4
+; RV32I-NEXT: lui a4, 61681
+; RV32I-NEXT: addi a4, a4, -241
+; RV32I-NEXT: and a0, a0, a4
+; RV32I-NEXT: slli a5, a0, 8
+; RV32I-NEXT: add a5, a0, a5
+; RV32I-NEXT: slli a5, a5, 8
+; RV32I-NEXT: add a5, a0, a5
+; RV32I-NEXT: slli a5, a5, 8
+; RV32I-NEXT: add a0, a0, a5
+; RV32I-NEXT: srli a0, a0, 24
+; RV32I-NEXT: srli a5, a1, 1
+; RV32I-NEXT: and a3, a5, a3
+; RV32I-NEXT: sub a1, a1, a3
+; RV32I-NEXT: and a3, a1, a2
+; RV32I-NEXT: srli a1, a1, 2
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: add a1, a3, a1
+; RV32I-NEXT: srli a2, a1, 4
+; RV32I-NEXT: add a1, a1, a2
+; RV32I-NEXT: and a1, a1, a4
+; RV32I-NEXT: slli a2, a1, 8
+; RV32I-NEXT: add a2, a1, a2
+; RV32I-NEXT: slli a2, a2, 8
+; RV32I-NEXT: add a2, a1, a2
+; RV32I-NEXT: slli a2, a2, 8
+; RV32I-NEXT: add a1, a1, a2
+; RV32I-NEXT: srli a1, a1, 24
; RV32I-NEXT: ret
;
; RV32ZBB-LABEL: ctpop_v2i32:
@@ -558,59 +529,48 @@ declare i64 @llvm.ctpop.i64(i64)
define i64 @ctpop_i64(i64 %a) nounwind {
; RV32I-LABEL: ctpop_i64:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: srli a0, a1, 1
-; RV32I-NEXT: lui a2, 349525
-; RV32I-NEXT: addi s2, a2, 1365
-; RV32I-NEXT: and a0, a0, s2
-; RV32I-NEXT: sub a1, a1, a0
-; RV32I-NEXT: lui a0, 209715
-; RV32I-NEXT: addi s3, a0, 819
-; RV32I-NEXT: and a0, a1, s3
+; RV32I-NEXT: srli a2, a1, 1
+; RV32I-NEXT: lui a3, 349525
+; RV32I-NEXT: addi a3, a3, 1365
+; RV32I-NEXT: and a2, a2, a3
+; RV32I-NEXT: sub a1, a1, a2
+; RV32I-NEXT: lui a2, 209715
+; RV32I-NEXT: addi a2, a2, 819
+; RV32I-NEXT: and a4, a1, a2
; RV32I-NEXT: srli a1, a1, 2
-; RV32I-NEXT: and a1, a1, s3
-; RV32I-NEXT: add a0, a0, a1
-; RV32I-NEXT: srli a1, a0, 4
-; RV32I-NEXT: add a0, a0, a1
-; RV32I-NEXT: lui a1, 61681
-; RV32I-NEXT: addi s4, a1, -241
-; RV32I-NEXT: and a0, a0, s4
-; RV32I-NEXT: lui a1, 4112
-; RV32I-NEXT: addi s1, a1, 257
-; RV32I-NEXT: mv a1, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: srli s5, a0, 24
-; RV32I-NEXT: srli a0, s0, 1
-; RV32I-NEXT: and a0, a0, s2
-; RV32I-NEXT: sub s0, s0, a0
-; RV32I-NEXT: and a0, s0, s3
-; RV32I-NEXT: srli s0, s0, 2
-; RV32I-NEXT: and a1, s0, s3
-; RV32I-NEXT: add a0, a0, a1
-; RV32I-NEXT: srli a1, a0, 4
-; RV32I-NEXT: add a0, a0, a1
-; RV32I-NEXT: and a0, a0, s4
-; RV32I-NEXT: mv a1, s1
-; RV32I-NEXT: call __mulsi3
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: add a1, a4, a1
+; RV32I-NEXT: srli a4, a1, 4
+; RV32I-NEXT: add a1, a1, a4
+; RV32I-NEXT: lui a4, 61681
+; RV32I-NEXT: addi a4, a4, -241
+; RV32I-NEXT: and a1, a1, a4
+; RV32I-NEXT: slli a5, a1, 8
+; RV32I-NEXT: add a5, a1, a5
+; RV32I-NEXT: slli a5, a5, 8
+; RV32I-NEXT: add a5, a1, a5
+; RV32I-NEXT: slli a5, a5, 8
+; RV32I-NEXT: add a1, a1, a5
+; RV32I-NEXT: srli a1, a1, 24
+; RV32I-NEXT: srli a5, a0, 1
+; RV32I-NEXT: and a3, a5, a3
+; RV32I-NEXT: sub a0, a0, a3
+; RV32I-NEXT: and a3, a0, a2
+; RV32I-NEXT: srli a0, a0, 2
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: add a0, a3, a0
+; RV32I-NEXT: srli a2, a0, 4
+; RV32I-NEXT: add a0, a0, a2
+; RV32I-NEXT: and a0, a0, a4
+; RV32I-NEXT: slli a2, a0, 8
+; RV32I-NEXT: add a2, a0, a2
+; RV32I-NEXT: slli a2, a2, 8
+; RV32I-NEXT: add a2, a0, a2
+; RV32I-NEXT: slli a2, a2, 8
+; RV32I-NEXT: add a0, a0, a2
; RV32I-NEXT: srli a0, a0, 24
-; RV32I-NEXT: add a0, a0, s5
+; RV32I-NEXT: add a0, a0, a1
; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
; RV32ZBB-LABEL: ctpop_i64:
@@ -738,99 +698,90 @@ declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>)
define <2 x i64> @ctpop_v2i64(<2 x i64> %a) nounwind {
; RV32I-LABEL: ctpop_v2i64:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -48
-; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s3, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s4, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s5, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s6, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s7, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s8, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: lw a0, 4(a1)
-; RV32I-NEXT: lw s2, 8(a1)
-; RV32I-NEXT: lw s5, 12(a1)
-; RV32I-NEXT: lw s6, 0(a1)
-; RV32I-NEXT: srli a1, a0, 1
-; RV32I-NEXT: lui a2, 349525
-; RV32I-NEXT: addi s3, a2, 1365
-; RV32I-NEXT: and a1, a1, s3
-; RV32I-NEXT: sub a0, a0, a1
-; RV32I-NEXT: lui a1, 209715
-; RV32I-NEXT: addi s4, a1, 819
-; RV32I-NEXT: and a1, a0, s4
-; RV32I-NEXT: srli a0, a0, 2
-; RV32I-NEXT: and a0, a0, s4
-; RV32I-NEXT: add a0, a1, a0
-; RV32I-NEXT: srli a1, a0, 4
-; RV32I-NEXT: add a0, a0, a1
-; RV32I-NEXT: lui a1, 61681
-; RV32I-NEXT: addi s7, a1, -241
-; RV32I-NEXT: and a0, a0, s7
-; RV32I-NEXT: lui a1, 4112
-; RV32I-NEXT: addi s1, a1, 257
-; RV32I-NEXT: mv a1, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: srli s8, a0, 24
-; RV32I-NEXT: srli a0, s6, 1
-; RV32I-NEXT: and a0, a0, s3
-; RV32I-NEXT: sub a0, s6, a0
-; RV32I-NEXT: and a1, a0, s4
-; RV32I-NEXT: srli a0, a0, 2
-; RV32I-NEXT: and a0, a0, s4
-; RV32I-NEXT: add a0, a1, a0
-; RV32I-NEXT: srli a1, a0, 4
-; RV32I-NEXT: add a0, a0, a1
-; RV32I-NEXT: and a0, a0, s7
-; RV32I-NEXT: mv a1, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: srli a0, a0, 24
-; RV32I-NEXT: add s8, a0, s8
-; RV32I-NEXT: srli a0, s5, 1
-; RV32I-NEXT: and a0, a0, s3
-; RV32I-NEXT: sub a0, s5, a0
-; RV32I-NEXT: and a1, a0, s4
-; RV32I-NEXT: srli a0, a0, 2
-; RV32I-NEXT: and a0, a0, s4
-; RV32I-NEXT: add a0, a1, a0
-; RV32I-NEXT: srli a1, a0, 4
-; RV32I-NEXT: add a0, a0, a1
-; RV32I-NEXT: and a0, a0, s7
-; RV32I-NEXT: mv a1, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: srli s5, a0, 24
-; RV32I-NEXT: srli a0, s2, 1
-; RV32I-NEXT: and a0, a0, s3
-; RV32I-NEXT: sub a0, s2, a0
-; RV32I-NEXT: and a1, a0, s4
-; RV32I-NEXT: srli a0, a0, 2
-; RV32I-NEXT: and a0, a0, s4
-; RV32I-NEXT: add a0, a1, a0
-; RV32I-NEXT: srli a1, a0, 4
-; RV32I-NEXT: add a0, a0, a1
-; RV32I-NEXT: and a0, a0, s7
-; RV32I-NEXT: mv a1, s1
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: srli a0, a0, 24
-; RV32I-NEXT: add a0, a0, s5
-; RV32I-NEXT: sw zero, 12(s0)
-; RV32I-NEXT: sw zero, 4(s0)
-; RV32I-NEXT: sw a0, 8(s0)
-; RV32I-NEXT: sw s8, 0(s0)
-; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s3, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s4, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s5, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s6, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s7, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s8, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 48
+; RV32I-NEXT: lw a3, 4(a1)
+; RV32I-NEXT: lw a2, 8(a1)
+; RV32I-NEXT: lw a4, 12(a1)
+; RV32I-NEXT: lw a5, 0(a1)
+; RV32I-NEXT: srli a6, a3, 1
+; RV32I-NEXT: lui a1, 349525
+; RV32I-NEXT: addi a1, a1, 1365
+; RV32I-NEXT: and a6, a6, a1
+; RV32I-NEXT: sub a6, a3, a6
+; RV32I-NEXT: lui a3, 209715
+; RV32I-NEXT: addi a3, a3, 819
+; RV32I-NEXT: and a7, a6, a3
+; RV32I-NEXT: srli a6, a6, 2
+; RV32I-NEXT: and a6, a6, a3
+; RV32I-NEXT: add a6, a7, a6
+; RV32I-NEXT: srli a7, a6, 4
+; RV32I-NEXT: add a6, a6, a7
+; RV32I-NEXT: lui a7, 61681
+; RV32I-NEXT: addi a7, a7, -241
+; RV32I-NEXT: and a6, a6, a7
+; RV32I-NEXT: slli t0, a6, 8
+; RV32I-NEXT: add t0, a6, t0
+; RV32I-NEXT: slli t0, t0, 8
+; RV32I-NEXT: add t0, a6, t0
+; RV32I-NEXT: slli t0, t0, 8
+; RV32I-NEXT: add a6, a6, t0
+; RV32I-NEXT: srli a6, a6, 24
+; RV32I-NEXT: srli t0, a5, 1
+; RV32I-NEXT: and t0, t0, a1
+; RV32I-NEXT: sub a5, a5, t0
+; RV32I-NEXT: and t0, a5, a3
+; RV32I-NEXT: srli a5, a5, 2
+; RV32I-NEXT: and a5, a5, a3
+; RV32I-NEXT: add a5, t0, a5
+; RV32I-NEXT: srli t0, a5, 4
+; RV32I-NEXT: add a5, a5, t0
+; RV32I-NEXT: and a5, a5, a7
+; RV32I-NEXT: slli t0, a5, 8
+; RV32I-NEXT: add t0, a5, t0
+; RV32I-NEXT: slli t0, t0, 8
+; RV32I-NEXT: add t0, a5, t0
+; RV32I-NEXT: slli t0, t0, 8
+; RV32I-NEXT: add a5, a5, t0
+; RV32I-NEXT: srli a5, a5, 24
+; RV32I-NEXT: add a5, a5, a6
+; RV32I-NEXT: srli a6, a4, 1
+; RV32I-NEXT: and a6, a6, a1
+; RV32I-NEXT: sub a4, a4, a6
+; RV32I-NEXT: and a6, a4, a3
+; RV32I-NEXT: srli a4, a4, 2
+; RV32I-NEXT: and a4, a4, a3
+; RV32I-NEXT: add a4, a6, a4
+; RV32I-NEXT: srli a6, a4, 4
+; RV32I-NEXT: add a4, a4, a6
+; RV32I-NEXT: and a4, a4, a7
+; RV32I-NEXT: slli a6, a4, 8
+; RV32I-NEXT: add a6, a4, a6
+; RV32I-NEXT: slli a6, a6, 8
+; RV32I-NEXT: add a6, a4, a6
+; RV32I-NEXT: slli a6, a6, 8
+; RV32I-NEXT: add a4, a4, a6
+; RV32I-NEXT: srli a4, a4, 24
+; RV32I-NEXT: srli a6, a2, 1
+; RV32I-NEXT: and a1, a6, a1
+; RV32I-NEXT: sub a2, a2, a1
+; RV32I-NEXT: and a1, a2, a3
+; RV32I-NEXT: srli a2, a2, 2
+; RV32I-NEXT: and a2, a2, a3
+; RV32I-NEXT: add a1, a1, a2
+; RV32I-NEXT: srli a2, a1, 4
+; RV32I-NEXT: add a1, a1, a2
+; RV32I-NEXT: and a1, a1, a7
+; RV32I-NEXT: slli a2, a1, 8
+; RV32I-NEXT: add a2, a1, a2
+; RV32I-NEXT: slli a2, a2, 8
+; RV32I-NEXT: add a2, a1, a2
+; RV32I-NEXT: slli a2, a2, 8
+; RV32I-NEXT: add a1, a1, a2
+; RV32I-NEXT: srli a1, a1, 24
+; RV32I-NEXT: add a1, a1, a4
+; RV32I-NEXT: sw zero, 12(a0)
+; RV32I-NEXT: sw zero, 4(a0)
+; RV32I-NEXT: sw a1, 8(a0)
+; RV32I-NEXT: sw a5, 0(a0)
; RV32I-NEXT: ret
;
; RV32ZBB-LABEL: ctpop_v2i64:
diff --git a/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64xtheadbb.ll b/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64xtheadbb.ll
index 73bfc6480b4d75..d5d6060a7f7990 100644
--- a/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64xtheadbb.ll
+++ b/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64xtheadbb.ll
@@ -317,8 +317,6 @@ define i64 @ctlz_i64(i64 %a) nounwind {
; RV64I: # %bb.0:
; RV64I-NEXT: beqz a0, .LBB5_2
; RV64I-NEXT: # %bb.1: # %cond.false
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: srli a1, a0, 1
; RV64I-NEXT: or a0, a0, a1
; RV64I-NEXT: srli a1, a0, 2
@@ -354,14 +352,21 @@ define i64 @ctlz_i64(i64 %a) nounwind {
; RV64I-NEXT: slli a2, a1, 32
; RV64I-NEXT: add a1, a1, a2
; RV64I-NEXT: and a0, a0, a1
-; RV64I-NEXT: lui a1, 4112
-; RV64I-NEXT: addiw a1, a1, 257
-; RV64I-NEXT: slli a2, a1, 32
-; RV64I-NEXT: add a1, a1, a2
-; RV64I-NEXT: call __muldi3
+; RV64I-NEXT: slli a1, a0, 8
+; RV64I-NEXT: add a1, a0, a1
+; RV64I-NEXT: slli a1, a1, 8
+; RV64I-NEXT: add a1, a0, a1
+; RV64I-NEXT: slli a1, a1, 8
+; RV64I-NEXT: add a1, a0, a1
+; RV64I-NEXT: slli a1, a1, 8
+; RV64I-NEXT: add a1, a0, a1
+; RV64I-NEXT: slli a1, a1, 8
+; RV64I-NEXT: add a1, a0, a1
+; RV64I-NEXT: slli a1, a1, 8
+; RV64I-NEXT: add a1, a0, a1
+; RV64I-NEXT: slli a1, a1, 8
+; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: srli a0, a0, 56
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
; RV64I-NEXT: .LBB5_2:
; RV64I-NEXT: li a0, 64
diff --git a/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbb.ll b/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbb.ll
index 7feef4dad4116a..b3dc59d0a59248 100644
--- a/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbb.ll
+++ b/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbb.ll
@@ -307,8 +307,6 @@ define i64 @ctlz_i64(i64 %a) nounwind {
; RV64I: # %bb.0:
; RV64I-NEXT: beqz a0, .LBB5_2
; RV64I-NEXT: # %bb.1: # %cond.false
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: srli a1, a0, 1
; RV64I-NEXT: or a0, a0, a1
; RV64I-NEXT: srli a1, a0, 2
@@ -344,14 +342,21 @@ define i64 @ctlz_i64(i64 %a) nounwind {
; RV64I-NEXT: slli a2, a1, 32
; RV64I-NEXT: add a1, a1, a2
; RV64I-NEXT: and a0, a0, a1
-; RV64I-NEXT: lui a1, 4112
-; RV64I-NEXT: addiw a1, a1, 257
-; RV64I-NEXT: slli a2, a1, 32
-; RV64I-NEXT: add a1, a1, a2
-; RV64I-NEXT: call __muldi3
+; RV64I-NEXT: slli a1, a0, 8
+; RV64I-NEXT: add a1, a0, a1
+; RV64I-NEXT: slli a1, a1, 8
+; RV64I-NEXT: add a1, a0, a1
+; RV64I-NEXT: slli a1, a1, 8
+; RV64I-NEXT: add a1, a0, a1
+; RV64I-NEXT: slli a1, a1, 8
+; RV64I-NEXT: add a1, a0, a1
+; RV64I-NEXT: slli a1, a1, 8
+; RV64I-NEXT: add a1, a0, a1
+; RV64I-NEXT: slli a1, a1, 8
+; RV64I-NEXT: add a1, a0, a1
+; RV64I-NEXT: slli a1, a1, 8
+; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: srli a0, a0, 56
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
; RV64I-NEXT: .LBB5_2:
; RV64I-NEXT: li a0, 64
@@ -623,8 +628,6 @@ declare i64 @llvm.ctpop.i64(i64)
define i64 @ctpop_i64(i64 %a) nounwind {
; RV64I-LABEL: ctpop_i64:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: srli a1, a0, 1
; RV64I-NEXT: lui a2, 349525
; RV64I-NEXT: addiw a2, a2, 1365
@@ -647,14 +650,21 @@ define i64 @ctpop_i64(i64 %a) nounwind {
; RV64I-NEXT: slli a2, a1, 32
; RV64I-NEXT: add a1, a1, a2
; RV64I-NEXT: and a0, a0, a1
-; RV64I-NEXT: lui a1, 4112
-; RV64I-NEXT: addiw a1, a1, 257
-; RV64I-NEXT: slli a2, a1, 32
-; RV64I-NEXT: add a1, a1, a2
-; RV64I-NEXT: call __muldi3
+; RV64I-NEXT: slli a1, a0, 8
+; RV64I-NEXT: add a1, a0, a1
+; RV64I-NEXT: slli a1, a1, 8
+; RV64I-NEXT: add a1, a0, a1
+; RV64I-NEXT: slli a1, a1, 8
+; RV64I-NEXT: add a1, a0, a1
+; RV64I-NEXT: slli a1, a1, 8
+; RV64I-NEXT: add a1, a0, a1
+; RV64I-NEXT: slli a1, a1, 8
+; RV64I-NEXT: add a1, a0, a1
+; RV64I-NEXT: slli a1, a1, 8
+; RV64I-NEXT: add a1, a0, a1
+; RV64I-NEXT: slli a1, a1, 8
+; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: srli a0, a0, 56
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
; RV64ZBB-LABEL: ctpop_i64:
diff --git a/llvm/test/CodeGen/RISCV/rv64xtheadbb.ll b/llvm/test/CodeGen/RISCV/rv64xtheadbb.ll
index 1f62ea9f568191..b5bdaacadd357c 100644
--- a/llvm/test/CodeGen/RISCV/rv64xtheadbb.ll
+++ b/llvm/test/CodeGen/RISCV/rv64xtheadbb.ll
@@ -11,8 +11,6 @@ define signext i32 @ctlz_i32(i32 signext %a) nounwind {
; RV64I: # %bb.0:
; RV64I-NEXT: beqz a0, .LBB0_2
; RV64I-NEXT: # %bb.1: # %cond.false
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: srliw a1, a0, 1
; RV64I-NEXT: or a0, a0, a1
; RV64I-NEXT: srliw a1, a0, 2
@@ -38,14 +36,17 @@ define signext i32 @ctlz_i32(i32 signext %a) nounwind {
; RV64I-NEXT: srli a1, a0, 4
; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: lui a1, 61681
-; RV64I-NEXT: addiw a1, a1, -241
+; RV64I-NEXT: addi a2, a1, -241
+; RV64I-NEXT: and a2, a0, a2
+; RV64I-NEXT: slli a0, a0, 8
+; RV64I-NEXT: addi a1, a1, -256
; RV64I-NEXT: and a0, a0, a1
-; RV64I-NEXT: lui a1, 4112
-; RV64I-NEXT: addiw a1, a1, 257
-; RV64I-NEXT: call __muldi3
+; RV64I-NEXT: add a0, a2, a0
+; RV64I-NEXT: slli a0, a0, 8
+; RV64I-NEXT: add a0, a2, a0
+; RV64I-NEXT: slli a0, a0, 8
+; RV64I-NEXT: add a0, a2, a0
; RV64I-NEXT: srliw a0, a0, 24
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
; RV64I-NEXT: .LBB0_2:
; RV64I-NEXT: li a0, 32
@@ -66,8 +67,6 @@ define signext i32 @log2_i32(i32 signext %a) nounwind {
; RV64I: # %bb.0:
; RV64I-NEXT: beqz a0, .LBB1_2
; RV64I-NEXT: # %bb.1: # %cond.false
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: srliw a1, a0, 1
; RV64I-NEXT: or a0, a0, a1
; RV64I-NEXT: srliw a1, a0, 2
@@ -93,14 +92,17 @@ define signext i32 @log2_i32(i32 signext %a) nounwind {
; RV64I-NEXT: srli a1, a0, 4
; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: lui a1, 61681
-; RV64I-NEXT: addiw a1, a1, -241
+; RV64I-NEXT: addi a2, a1, -241
+; RV64I-NEXT: and a2, a0, a2
+; RV64I-NEXT: slli a0, a0, 8
+; RV64I-NEXT: addi a1, a1, -256
; RV64I-NEXT: and a0, a0, a1
-; RV64I-NEXT: lui a1, 4112
-; RV64I-NEXT: addiw a1, a1, 257
-; RV64I-NEXT: call __muldi3
+; RV64I-NEXT: add a0, a2, a0
+; RV64I-NEXT: slli a0, a0, 8
+; RV64I-NEXT: add a0, a2, a0
+; RV64I-NEXT: slli a0, a0, 8
+; RV64I-NEXT: add a0, a2, a0
; RV64I-NEXT: srliw a0, a0, 24
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: j .LBB1_3
; RV64I-NEXT: .LBB1_2:
; RV64I-NEXT: li a0, 32
@@ -125,50 +127,49 @@ define signext i32 @log2_i32(i32 signext %a) nounwind {
define signext i32 @log2_ceil_i32(i32 signext %a) nounwind {
; RV64I-LABEL: log2_ceil_i32:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
-; RV64I-NEXT: addiw a0, a0, -1
-; RV64I-NEXT: li s0, 32
-; RV64I-NEXT: li a1, 32
-; RV64I-NEXT: beqz a0, .LBB2_2
+; RV64I-NEXT: addiw a1, a0, -1
+; RV64I-NEXT: li a0, 32
+; RV64I-NEXT: li a2, 32
+; RV64I-NEXT: beqz a1, .LBB2_2
; RV64I-NEXT: # %bb.1: # %cond.false
-; RV64I-NEXT: srliw a1, a0, 1
-; RV64I-NEXT: or a0, a0, a1
-; RV64I-NEXT: srliw a1, a0, 2
-; RV64I-NEXT: or a0, a0, a1
-; RV64I-NEXT: srliw a1, a0, 4
-; RV64I-NEXT: or a0, a0, a1
-; RV64I-NEXT: srliw a1, a0, 8
-; RV64I-NEXT: or a0, a0, a1
-; RV64I-NEXT: srliw a1, a0, 16
-; RV64I-NEXT: or a0, a0, a1
-; RV64I-NEXT: not a0, a0
-; RV64I-NEXT: srli a1, a0, 1
-; RV64I-NEXT: lui a2, 349525
-; RV64I-NEXT: addiw a2, a2, 1365
+; RV64I-NEXT: srliw a2, a1, 1
+; RV64I-NEXT: or a1, a1, a2
+; RV64I-NEXT: srliw a2, a1, 2
+; RV64I-NEXT: or a1, a1, a2
+; RV64I-NEXT: srliw a2, a1, 4
+; RV64I-NEXT: or a1, a1, a2
+; RV64I-NEXT: srliw a2, a1, 8
+; RV64I-NEXT: or a1, a1, a2
+; RV64I-NEXT: srliw a2, a1, 16
+; RV64I-NEXT: or a1, a1, a2
+; RV64I-NEXT: not a1, a1
+; RV64I-NEXT: srli a2, a1, 1
+; RV64I-NEXT: lui a3, 349525
+; RV64I-NEXT: addiw a3, a3, 1365
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sub a1, a1, a2
+; RV64I-NEXT: lui a2, 209715
+; RV64I-NEXT: addiw a2, a2, 819
+; RV64I-NEXT: and a3, a1, a2
+; RV64I-NEXT: srli a1, a1, 2
; RV64I-NEXT: and a1, a1, a2
-; RV64I-NEXT: sub a0, a0, a1
-; RV64I-NEXT: lui a1, 209715
-; RV64I-NEXT: addiw a1, a1, 819
-; RV64I-NEXT: and a2, a0, a1
-; RV64I-NEXT: srli a0, a0, 2
-; RV64I-NEXT: and a0, a0, a1
-; RV64I-NEXT: add a0, a2, a0
-; RV64I-NEXT: srli a1, a0, 4
-; RV64I-NEXT: add a0, a0, a1
-; RV64I-NEXT: lui a1, 61681
-; RV64I-NEXT: addiw a1, a1, -241
-; RV64I-NEXT: and a0, a0, a1
-; RV64I-NEXT: lui a1, 4112
-; RV64I-NEXT: addiw a1, a1, 257
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: srliw a1, a0, 24
+; RV64I-NEXT: add a1, a3, a1
+; RV64I-NEXT: srli a2, a1, 4
+; RV64I-NEXT: add a1, a1, a2
+; RV64I-NEXT: lui a2, 61681
+; RV64I-NEXT: addi a3, a2, -241
+; RV64I-NEXT: and a3, a1, a3
+; RV64I-NEXT: slli a1, a1, 8
+; RV64I-NEXT: addi a2, a2, -256
+; RV64I-NEXT: and a1, a1, a2
+; RV64I-NEXT: add a1, a3, a1
+; RV64I-NEXT: slli a1, a1, 8
+; RV64I-NEXT: add a1, a3, a1
+; RV64I-NEXT: slli a1, a1, 8
+; RV64I-NEXT: add a1, a3, a1
+; RV64I-NEXT: srliw a2, a1, 24
; RV64I-NEXT: .LBB2_2: # %cond.end
-; RV64I-NEXT: sub a0, s0, a1
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: sub a0, a0, a2
; RV64I-NEXT: ret
;
; RV64XTHEADBB-LABEL: log2_ceil_i32:
@@ -189,48 +190,46 @@ define signext i32 @log2_ceil_i32(i32 signext %a) nounwind {
define signext i32 @findLastSet_i32(i32 signext %a) nounwind {
; RV64I-LABEL: findLastSet_i32:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
-; RV64I-NEXT: mv s0, a0
-; RV64I-NEXT: srliw a0, a0, 1
-; RV64I-NEXT: or a0, s0, a0
-; RV64I-NEXT: srliw a1, a0, 2
-; RV64I-NEXT: or a0, a0, a1
-; RV64I-NEXT: srliw a1, a0, 4
-; RV64I-NEXT: or a0, a0, a1
-; RV64I-NEXT: srliw a1, a0, 8
-; RV64I-NEXT: or a0, a0, a1
-; RV64I-NEXT: srliw a1, a0, 16
-; RV64I-NEXT: or a0, a0, a1
-; RV64I-NEXT: not a0, a0
-; RV64I-NEXT: srli a1, a0, 1
-; RV64I-NEXT: lui a2, 349525
-; RV64I-NEXT: addiw a2, a2, 1365
+; RV64I-NEXT: srliw a1, a0, 1
+; RV64I-NEXT: or a1, a0, a1
+; RV64I-NEXT: srliw a2, a1, 2
+; RV64I-NEXT: or a1, a1, a2
+; RV64I-NEXT: srliw a2, a1, 4
+; RV64I-NEXT: or a1, a1, a2
+; RV64I-NEXT: srliw a2, a1, 8
+; RV64I-NEXT: or a1, a1, a2
+; RV64I-NEXT: srliw a2, a1, 16
+; RV64I-NEXT: or a1, a1, a2
+; RV64I-NEXT: not a1, a1
+; RV64I-NEXT: srli a2, a1, 1
+; RV64I-NEXT: lui a3, 349525
+; RV64I-NEXT: addiw a3, a3, 1365
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sub a1, a1, a2
+; RV64I-NEXT: lui a2, 209715
+; RV64I-NEXT: addiw a2, a2, 819
+; RV64I-NEXT: and a3, a1, a2
+; RV64I-NEXT: srli a1, a1, 2
; RV64I-NEXT: and a1, a1, a2
-; RV64I-NEXT: sub a0, a0, a1
-; RV64I-NEXT: lui a1, 209715
-; RV64I-NEXT: addiw a1, a1, 819
-; RV64I-NEXT: and a2, a0, a1
-; RV64I-NEXT: srli a0, a0, 2
-; RV64I-NEXT: and a0, a0, a1
-; RV64I-NEXT: add a0, a2, a0
-; RV64I-NEXT: srli a1, a0, 4
-; RV64I-NEXT: add a0, a0, a1
-; RV64I-NEXT: lui a1, 61681
-; RV64I-NEXT: addiw a1, a1, -241
-; RV64I-NEXT: and a0, a0, a1
-; RV64I-NEXT: lui a1, 4112
-; RV64I-NEXT: addiw a1, a1, 257
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: srliw a0, a0, 24
-; RV64I-NEXT: xori a0, a0, 31
-; RV64I-NEXT: snez a1, s0
-; RV64I-NEXT: addi a1, a1, -1
-; RV64I-NEXT: or a0, a1, a0
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: add a1, a3, a1
+; RV64I-NEXT: srli a2, a1, 4
+; RV64I-NEXT: add a1, a1, a2
+; RV64I-NEXT: lui a2, 61681
+; RV64I-NEXT: addi a3, a2, -241
+; RV64I-NEXT: and a3, a1, a3
+; RV64I-NEXT: slli a1, a1, 8
+; RV64I-NEXT: addi a2, a2, -256
+; RV64I-NEXT: and a1, a1, a2
+; RV64I-NEXT: add a1, a3, a1
+; RV64I-NEXT: slli a1, a1, 8
+; RV64I-NEXT: add a1, a3, a1
+; RV64I-NEXT: slli a1, a1, 8
+; RV64I-NEXT: add a1, a3, a1
+; RV64I-NEXT: srliw a1, a1, 24
+; RV64I-NEXT: xori a1, a1, 31
+; RV64I-NEXT: snez a0, a0
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: or a0, a0, a1
; RV64I-NEXT: ret
;
; RV64XTHEADBB-LABEL: findLastSet_i32:
@@ -256,10 +255,6 @@ define i32 @ctlz_lshr_i32(i32 signext %a) {
; RV64I-NEXT: srliw a0, a0, 1
; RV64I-NEXT: beqz a0, .LBB4_2
; RV64I-NEXT: # %bb.1: # %cond.false
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: .cfi_def_cfa_offset 16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: .cfi_offset ra, -8
; RV64I-NEXT: srliw a1, a0, 1
; RV64I-NEXT: or a0, a0, a1
; RV64I-NEXT: srliw a1, a0, 2
@@ -285,14 +280,17 @@ define i32 @ctlz_lshr_i32(i32 signext %a) {
; RV64I-NEXT: srli a1, a0, 4
; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: lui a1, 61681
-; RV64I-NEXT: addiw a1, a1, -241
+; RV64I-NEXT: addi a2, a1, -241
+; RV64I-NEXT: and a2, a0, a2
+; RV64I-NEXT: slli a0, a0, 8
+; RV64I-NEXT: addi a1, a1, -256
; RV64I-NEXT: and a0, a0, a1
-; RV64I-NEXT: lui a1, 4112
-; RV64I-NEXT: addiw a1, a1, 257
-; RV64I-NEXT: call __muldi3
+; RV64I-NEXT: add a0, a2, a0
+; RV64I-NEXT: slli a0, a0, 8
+; RV64I-NEXT: add a0, a2, a0
+; RV64I-NEXT: slli a0, a0, 8
+; RV64I-NEXT: add a0, a2, a0
; RV64I-NEXT: srliw a0, a0, 24
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
; RV64I-NEXT: .LBB4_2:
; RV64I-NEXT: li a0, 32
@@ -317,8 +315,6 @@ define i64 @ctlz_i64(i64 %a) nounwind {
; RV64I: # %bb.0:
; RV64I-NEXT: beqz a0, .LBB5_2
; RV64I-NEXT: # %bb.1: # %cond.false
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: srli a1, a0, 1
; RV64I-NEXT: or a0, a0, a1
; RV64I-NEXT: srli a1, a0, 2
@@ -354,14 +350,21 @@ define i64 @ctlz_i64(i64 %a) nounwind {
; RV64I-NEXT: slli a2, a1, 32
; RV64I-NEXT: add a1, a1, a2
; RV64I-NEXT: and a0, a0, a1
-; RV64I-NEXT: lui a1, 4112
-; RV64I-NEXT: addiw a1, a1, 257
-; RV64I-NEXT: slli a2, a1, 32
-; RV64I-NEXT: add a1, a1, a2
-; RV64I-NEXT: call __muldi3
+; RV64I-NEXT: slli a1, a0, 8
+; RV64I-NEXT: add a1, a0, a1
+; RV64I-NEXT: slli a1, a1, 8
+; RV64I-NEXT: add a1, a0, a1
+; RV64I-NEXT: slli a1, a1, 8
+; RV64I-NEXT: add a1, a0, a1
+; RV64I-NEXT: slli a1, a1, 8
+; RV64I-NEXT: add a1, a0, a1
+; RV64I-NEXT: slli a1, a1, 8
+; RV64I-NEXT: add a1, a0, a1
+; RV64I-NEXT: slli a1, a1, 8
+; RV64I-NEXT: add a1, a0, a1
+; RV64I-NEXT: slli a1, a1, 8
+; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: srli a0, a0, 56
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
; RV64I-NEXT: .LBB5_2:
; RV64I-NEXT: li a0, 64
diff --git a/llvm/test/CodeGen/RISCV/rv64zbb.ll b/llvm/test/CodeGen/RISCV/rv64zbb.ll
index 2269d8d04c9cb0..1a3ea9a284bcc0 100644
--- a/llvm/test/CodeGen/RISCV/rv64zbb.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zbb.ll
@@ -11,8 +11,6 @@ define signext i32 @ctlz_i32(i32 signext %a) nounwind {
; RV64I: # %bb.0:
; RV64I-NEXT: beqz a0, .LBB0_2
; RV64I-NEXT: # %bb.1: # %cond.false
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: srliw a1, a0, 1
; RV64I-NEXT: or a0, a0, a1
; RV64I-NEXT: srliw a1, a0, 2
@@ -38,14 +36,17 @@ define signext i32 @ctlz_i32(i32 signext %a) nounwind {
; RV64I-NEXT: srli a1, a0, 4
; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: lui a1, 61681
-; RV64I-NEXT: addiw a1, a1, -241
+; RV64I-NEXT: addi a2, a1, -241
+; RV64I-NEXT: and a2, a0, a2
+; RV64I-NEXT: slli a0, a0, 8
+; RV64I-NEXT: addi a1, a1, -256
; RV64I-NEXT: and a0, a0, a1
-; RV64I-NEXT: lui a1, 4112
-; RV64I-NEXT: addiw a1, a1, 257
-; RV64I-NEXT: call __muldi3
+; RV64I-NEXT: add a0, a2, a0
+; RV64I-NEXT: slli a0, a0, 8
+; RV64I-NEXT: add a0, a2, a0
+; RV64I-NEXT: slli a0, a0, 8
+; RV64I-NEXT: add a0, a2, a0
; RV64I-NEXT: srliw a0, a0, 24
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
; RV64I-NEXT: .LBB0_2:
; RV64I-NEXT: li a0, 32
@@ -64,8 +65,6 @@ define signext i32 @log2_i32(i32 signext %a) nounwind {
; RV64I: # %bb.0:
; RV64I-NEXT: beqz a0, .LBB1_2
; RV64I-NEXT: # %bb.1: # %cond.false
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: srliw a1, a0, 1
; RV64I-NEXT: or a0, a0, a1
; RV64I-NEXT: srliw a1, a0, 2
@@ -91,14 +90,17 @@ define signext i32 @log2_i32(i32 signext %a) nounwind {
; RV64I-NEXT: srli a1, a0, 4
; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: lui a1, 61681
-; RV64I-NEXT: addiw a1, a1, -241
+; RV64I-NEXT: addi a2, a1, -241
+; RV64I-NEXT: and a2, a0, a2
+; RV64I-NEXT: slli a0, a0, 8
+; RV64I-NEXT: addi a1, a1, -256
; RV64I-NEXT: and a0, a0, a1
-; RV64I-NEXT: lui a1, 4112
-; RV64I-NEXT: addiw a1, a1, 257
-; RV64I-NEXT: call __muldi3
+; RV64I-NEXT: add a0, a2, a0
+; RV64I-NEXT: slli a0, a0, 8
+; RV64I-NEXT: add a0, a2, a0
+; RV64I-NEXT: slli a0, a0, 8
+; RV64I-NEXT: add a0, a2, a0
; RV64I-NEXT: srliw a0, a0, 24
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: j .LBB1_3
; RV64I-NEXT: .LBB1_2:
; RV64I-NEXT: li a0, 32
@@ -121,50 +123,49 @@ define signext i32 @log2_i32(i32 signext %a) nounwind {
define signext i32 @log2_ceil_i32(i32 signext %a) nounwind {
; RV64I-LABEL: log2_ceil_i32:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
-; RV64I-NEXT: addiw a0, a0, -1
-; RV64I-NEXT: li s0, 32
-; RV64I-NEXT: li a1, 32
-; RV64I-NEXT: beqz a0, .LBB2_2
+; RV64I-NEXT: addiw a1, a0, -1
+; RV64I-NEXT: li a0, 32
+; RV64I-NEXT: li a2, 32
+; RV64I-NEXT: beqz a1, .LBB2_2
; RV64I-NEXT: # %bb.1: # %cond.false
-; RV64I-NEXT: srliw a1, a0, 1
-; RV64I-NEXT: or a0, a0, a1
-; RV64I-NEXT: srliw a1, a0, 2
-; RV64I-NEXT: or a0, a0, a1
-; RV64I-NEXT: srliw a1, a0, 4
-; RV64I-NEXT: or a0, a0, a1
-; RV64I-NEXT: srliw a1, a0, 8
-; RV64I-NEXT: or a0, a0, a1
-; RV64I-NEXT: srliw a1, a0, 16
-; RV64I-NEXT: or a0, a0, a1
-; RV64I-NEXT: not a0, a0
-; RV64I-NEXT: srli a1, a0, 1
-; RV64I-NEXT: lui a2, 349525
-; RV64I-NEXT: addiw a2, a2, 1365
+; RV64I-NEXT: srliw a2, a1, 1
+; RV64I-NEXT: or a1, a1, a2
+; RV64I-NEXT: srliw a2, a1, 2
+; RV64I-NEXT: or a1, a1, a2
+; RV64I-NEXT: srliw a2, a1, 4
+; RV64I-NEXT: or a1, a1, a2
+; RV64I-NEXT: srliw a2, a1, 8
+; RV64I-NEXT: or a1, a1, a2
+; RV64I-NEXT: srliw a2, a1, 16
+; RV64I-NEXT: or a1, a1, a2
+; RV64I-NEXT: not a1, a1
+; RV64I-NEXT: srli a2, a1, 1
+; RV64I-NEXT: lui a3, 349525
+; RV64I-NEXT: addiw a3, a3, 1365
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sub a1, a1, a2
+; RV64I-NEXT: lui a2, 209715
+; RV64I-NEXT: addiw a2, a2, 819
+; RV64I-NEXT: and a3, a1, a2
+; RV64I-NEXT: srli a1, a1, 2
; RV64I-NEXT: and a1, a1, a2
-; RV64I-NEXT: sub a0, a0, a1
-; RV64I-NEXT: lui a1, 209715
-; RV64I-NEXT: addiw a1, a1, 819
-; RV64I-NEXT: and a2, a0, a1
-; RV64I-NEXT: srli a0, a0, 2
-; RV64I-NEXT: and a0, a0, a1
-; RV64I-NEXT: add a0, a2, a0
-; RV64I-NEXT: srli a1, a0, 4
-; RV64I-NEXT: add a0, a0, a1
-; RV64I-NEXT: lui a1, 61681
-; RV64I-NEXT: addiw a1, a1, -241
-; RV64I-NEXT: and a0, a0, a1
-; RV64I-NEXT: lui a1, 4112
-; RV64I-NEXT: addiw a1, a1, 257
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: srliw a1, a0, 24
+; RV64I-NEXT: add a1, a3, a1
+; RV64I-NEXT: srli a2, a1, 4
+; RV64I-NEXT: add a1, a1, a2
+; RV64I-NEXT: lui a2, 61681
+; RV64I-NEXT: addi a3, a2, -241
+; RV64I-NEXT: and a3, a1, a3
+; RV64I-NEXT: slli a1, a1, 8
+; RV64I-NEXT: addi a2, a2, -256
+; RV64I-NEXT: and a1, a1, a2
+; RV64I-NEXT: add a1, a3, a1
+; RV64I-NEXT: slli a1, a1, 8
+; RV64I-NEXT: add a1, a3, a1
+; RV64I-NEXT: slli a1, a1, 8
+; RV64I-NEXT: add a1, a3, a1
+; RV64I-NEXT: srliw a2, a1, 24
; RV64I-NEXT: .LBB2_2: # %cond.end
-; RV64I-NEXT: sub a0, s0, a1
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: sub a0, a0, a2
; RV64I-NEXT: ret
;
; RV64ZBB-LABEL: log2_ceil_i32:
@@ -183,48 +184,46 @@ define signext i32 @log2_ceil_i32(i32 signext %a) nounwind {
define signext i32 @findLastSet_i32(i32 signext %a) nounwind {
; RV64I-LABEL: findLastSet_i32:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
-; RV64I-NEXT: mv s0, a0
-; RV64I-NEXT: srliw a0, a0, 1
-; RV64I-NEXT: or a0, s0, a0
-; RV64I-NEXT: srliw a1, a0, 2
-; RV64I-NEXT: or a0, a0, a1
-; RV64I-NEXT: srliw a1, a0, 4
-; RV64I-NEXT: or a0, a0, a1
-; RV64I-NEXT: srliw a1, a0, 8
-; RV64I-NEXT: or a0, a0, a1
-; RV64I-NEXT: srliw a1, a0, 16
-; RV64I-NEXT: or a0, a0, a1
-; RV64I-NEXT: not a0, a0
-; RV64I-NEXT: srli a1, a0, 1
-; RV64I-NEXT: lui a2, 349525
-; RV64I-NEXT: addiw a2, a2, 1365
+; RV64I-NEXT: srliw a1, a0, 1
+; RV64I-NEXT: or a1, a0, a1
+; RV64I-NEXT: srliw a2, a1, 2
+; RV64I-NEXT: or a1, a1, a2
+; RV64I-NEXT: srliw a2, a1, 4
+; RV64I-NEXT: or a1, a1, a2
+; RV64I-NEXT: srliw a2, a1, 8
+; RV64I-NEXT: or a1, a1, a2
+; RV64I-NEXT: srliw a2, a1, 16
+; RV64I-NEXT: or a1, a1, a2
+; RV64I-NEXT: not a1, a1
+; RV64I-NEXT: srli a2, a1, 1
+; RV64I-NEXT: lui a3, 349525
+; RV64I-NEXT: addiw a3, a3, 1365
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sub a1, a1, a2
+; RV64I-NEXT: lui a2, 209715
+; RV64I-NEXT: addiw a2, a2, 819
+; RV64I-NEXT: and a3, a1, a2
+; RV64I-NEXT: srli a1, a1, 2
; RV64I-NEXT: and a1, a1, a2
-; RV64I-NEXT: sub a0, a0, a1
-; RV64I-NEXT: lui a1, 209715
-; RV64I-NEXT: addiw a1, a1, 819
-; RV64I-NEXT: and a2, a0, a1
-; RV64I-NEXT: srli a0, a0, 2
-; RV64I-NEXT: and a0, a0, a1
-; RV64I-NEXT: add a0, a2, a0
-; RV64I-NEXT: srli a1, a0, 4
-; RV64I-NEXT: add a0, a0, a1
-; RV64I-NEXT: lui a1, 61681
-; RV64I-NEXT: addiw a1, a1, -241
-; RV64I-NEXT: and a0, a0, a1
-; RV64I-NEXT: lui a1, 4112
-; RV64I-NEXT: addiw a1, a1, 257
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: srliw a0, a0, 24
-; RV64I-NEXT: xori a0, a0, 31
-; RV64I-NEXT: snez a1, s0
-; RV64I-NEXT: addi a1, a1, -1
-; RV64I-NEXT: or a0, a1, a0
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: add a1, a3, a1
+; RV64I-NEXT: srli a2, a1, 4
+; RV64I-NEXT: add a1, a1, a2
+; RV64I-NEXT: lui a2, 61681
+; RV64I-NEXT: addi a3, a2, -241
+; RV64I-NEXT: and a3, a1, a3
+; RV64I-NEXT: slli a1, a1, 8
+; RV64I-NEXT: addi a2, a2, -256
+; RV64I-NEXT: and a1, a1, a2
+; RV64I-NEXT: add a1, a3, a1
+; RV64I-NEXT: slli a1, a1, 8
+; RV64I-NEXT: add a1, a3, a1
+; RV64I-NEXT: slli a1, a1, 8
+; RV64I-NEXT: add a1, a3, a1
+; RV64I-NEXT: srliw a1, a1, 24
+; RV64I-NEXT: xori a1, a1, 31
+; RV64I-NEXT: snez a0, a0
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: or a0, a0, a1
; RV64I-NEXT: ret
;
; RV64ZBB-LABEL: findLastSet_i32:
@@ -248,10 +247,6 @@ define i32 @ctlz_lshr_i32(i32 signext %a) {
; RV64I-NEXT: srliw a0, a0, 1
; RV64I-NEXT: beqz a0, .LBB4_2
; RV64I-NEXT: # %bb.1: # %cond.false
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: .cfi_def_cfa_offset 16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: .cfi_offset ra, -8
; RV64I-NEXT: srliw a1, a0, 1
; RV64I-NEXT: or a0, a0, a1
; RV64I-NEXT: srliw a1, a0, 2
@@ -277,14 +272,17 @@ define i32 @ctlz_lshr_i32(i32 signext %a) {
; RV64I-NEXT: srli a1, a0, 4
; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: lui a1, 61681
-; RV64I-NEXT: addiw a1, a1, -241
+; RV64I-NEXT: addi a2, a1, -241
+; RV64I-NEXT: and a2, a0, a2
+; RV64I-NEXT: slli a0, a0, 8
+; RV64I-NEXT: addi a1, a1, -256
; RV64I-NEXT: and a0, a0, a1
-; RV64I-NEXT: lui a1, 4112
-; RV64I-NEXT: addiw a1, a1, 257
-; RV64I-NEXT: call __muldi3
+; RV64I-NEXT: add a0, a2, a0
+; RV64I-NEXT: slli a0, a0, 8
+; RV64I-NEXT: add a0, a2, a0
+; RV64I-NEXT: slli a0, a0, 8
+; RV64I-NEXT: add a0, a2, a0
; RV64I-NEXT: srliw a0, a0, 24
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
; RV64I-NEXT: .LBB4_2:
; RV64I-NEXT: li a0, 32
@@ -307,8 +305,6 @@ define i64 @ctlz_i64(i64 %a) nounwind {
; RV64I: # %bb.0:
; RV64I-NEXT: beqz a0, .LBB5_2
; RV64I-NEXT: # %bb.1: # %cond.false
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: srli a1, a0, 1
; RV64I-NEXT: or a0, a0, a1
; RV64I-NEXT: srli a1, a0, 2
@@ -344,14 +340,21 @@ define i64 @ctlz_i64(i64 %a) nounwind {
; RV64I-NEXT: slli a2, a1, 32
; RV64I-NEXT: add a1, a1, a2
; RV64I-NEXT: and a0, a0, a1
-; RV64I-NEXT: lui a1, 4112
-; RV64I-NEXT: addiw a1, a1, 257
-; RV64I-NEXT: slli a2, a1, 32
-; RV64I-NEXT: add a1, a1, a2
-; RV64I-NEXT: call __muldi3
+; RV64I-NEXT: slli a1, a0, 8
+; RV64I-NEXT: add a1, a0, a1
+; RV64I-NEXT: slli a1, a1, 8
+; RV64I-NEXT: add a1, a0, a1
+; RV64I-NEXT: slli a1, a1, 8
+; RV64I-NEXT: add a1, a0, a1
+; RV64I-NEXT: slli a1, a1, 8
+; RV64I-NEXT: add a1, a0, a1
+; RV64I-NEXT: slli a1, a1, 8
+; RV64I-NEXT: add a1, a0, a1
+; RV64I-NEXT: slli a1, a1, 8
+; RV64I-NEXT: add a1, a0, a1
+; RV64I-NEXT: slli a1, a1, 8
+; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: srli a0, a0, 56
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
; RV64I-NEXT: .LBB5_2:
; RV64I-NEXT: li a0, 64
@@ -544,8 +547,6 @@ declare i32 @llvm.ctpop.i32(i32)
define signext i32 @ctpop_i32(i32 signext %a) nounwind {
; RV64I-LABEL: ctpop_i32:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: srli a1, a0, 1
; RV64I-NEXT: lui a2, 349525
; RV64I-NEXT: addiw a2, a2, 1365
@@ -560,14 +561,17 @@ define signext i32 @ctpop_i32(i32 signext %a) nounwind {
; RV64I-NEXT: srli a1, a0, 4
; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: lui a1, 61681
-; RV64I-NEXT: addiw a1, a1, -241
+; RV64I-NEXT: addi a2, a1, -241
+; RV64I-NEXT: and a2, a0, a2
+; RV64I-NEXT: slli a0, a0, 8
+; RV64I-NEXT: addi a1, a1, -256
; RV64I-NEXT: and a0, a0, a1
-; RV64I-NEXT: lui a1, 4112
-; RV64I-NEXT: addiw a1, a1, 257
-; RV64I-NEXT: call __muldi3
+; RV64I-NEXT: add a0, a2, a0
+; RV64I-NEXT: slli a0, a0, 8
+; RV64I-NEXT: add a0, a2, a0
+; RV64I-NEXT: slli a0, a0, 8
+; RV64I-NEXT: add a0, a2, a0
; RV64I-NEXT: srliw a0, a0, 24
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
; RV64ZBB-LABEL: ctpop_i32:
@@ -657,8 +661,6 @@ define i1 @ctpop_i32_ne_one(i32 signext %a) nounwind {
define signext i32 @ctpop_i32_load(ptr %p) nounwind {
; RV64I-LABEL: ctpop_i32_load:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: lw a0, 0(a0)
; RV64I-NEXT: srli a1, a0, 1
; RV64I-NEXT: lui a2, 349525
@@ -674,14 +676,17 @@ define signext i32 @ctpop_i32_load(ptr %p) nounwind {
; RV64I-NEXT: srli a1, a0, 4
; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: lui a1, 61681
-; RV64I-NEXT: addiw a1, a1, -241
+; RV64I-NEXT: addi a2, a1, -241
+; RV64I-NEXT: and a2, a0, a2
+; RV64I-NEXT: slli a0, a0, 8
+; RV64I-NEXT: addi a1, a1, -256
; RV64I-NEXT: and a0, a0, a1
-; RV64I-NEXT: lui a1, 4112
-; RV64I-NEXT: addiw a1, a1, 257
-; RV64I-NEXT: call __muldi3
+; RV64I-NEXT: add a0, a2, a0
+; RV64I-NEXT: slli a0, a0, 8
+; RV64I-NEXT: add a0, a2, a0
+; RV64I-NEXT: slli a0, a0, 8
+; RV64I-NEXT: add a0, a2, a0
; RV64I-NEXT: srliw a0, a0, 24
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
; RV64ZBB-LABEL: ctpop_i32_load:
@@ -699,58 +704,49 @@ declare <2 x i32> @llvm.ctpop.v2i32(<2 x i32>)
define <2 x i32> @ctpop_v2i32(<2 x i32> %a) nounwind {
; RV64I-LABEL: ctpop_v2i32:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -64
-; RV64I-NEXT: sd ra, 56(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 48(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 40(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s2, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s3, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s4, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s5, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: mv s0, a1
-; RV64I-NEXT: srli a1, a0, 1
-; RV64I-NEXT: lui a2, 349525
-; RV64I-NEXT: addiw s3, a2, 1365
-; RV64I-NEXT: and a1, a1, s3
-; RV64I-NEXT: sub a0, a0, a1
-; RV64I-NEXT: lui a1, 209715
-; RV64I-NEXT: addiw s4, a1, 819
-; RV64I-NEXT: and a1, a0, s4
+; RV64I-NEXT: srli a2, a0, 1
+; RV64I-NEXT: lui a3, 349525
+; RV64I-NEXT: addiw a3, a3, 1365
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sub a0, a0, a2
+; RV64I-NEXT: lui a2, 209715
+; RV64I-NEXT: addiw a2, a2, 819
+; RV64I-NEXT: and a4, a0, a2
; RV64I-NEXT: srli a0, a0, 2
-; RV64I-NEXT: and a0, a0, s4
-; RV64I-NEXT: add a0, a1, a0
-; RV64I-NEXT: srli a1, a0, 4
-; RV64I-NEXT: add a0, a0, a1
-; RV64I-NEXT: lui a1, 61681
-; RV64I-NEXT: addiw s5, a1, -241
-; RV64I-NEXT: and a0, a0, s5
-; RV64I-NEXT: lui a1, 4112
-; RV64I-NEXT: addiw s1, a1, 257
-; RV64I-NEXT: mv a1, s1
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: srliw s2, a0, 24
-; RV64I-NEXT: srli a0, s0, 1
-; RV64I-NEXT: and a0, a0, s3
-; RV64I-NEXT: sub s0, s0, a0
-; RV64I-NEXT: and a0, s0, s4
-; RV64I-NEXT: srli s0, s0, 2
-; RV64I-NEXT: and a1, s0, s4
-; RV64I-NEXT: add a0, a0, a1
-; RV64I-NEXT: srli a1, a0, 4
-; RV64I-NEXT: add a0, a0, a1
-; RV64I-NEXT: and a0, a0, s5
-; RV64I-NEXT: mv a1, s1
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: srliw a1, a0, 24
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: ld ra, 56(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 48(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s2, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s3, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s4, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s5, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 64
+; RV64I-NEXT: and a0, a0, a2
+; RV64I-NEXT: add a0, a4, a0
+; RV64I-NEXT: srli a4, a0, 4
+; RV64I-NEXT: add a0, a0, a4
+; RV64I-NEXT: lui a4, 61681
+; RV64I-NEXT: addi a5, a4, -241
+; RV64I-NEXT: and a6, a0, a5
+; RV64I-NEXT: slli a0, a0, 8
+; RV64I-NEXT: addi a4, a4, -256
+; RV64I-NEXT: and a0, a0, a4
+; RV64I-NEXT: add a0, a6, a0
+; RV64I-NEXT: slli a0, a0, 8
+; RV64I-NEXT: add a0, a6, a0
+; RV64I-NEXT: slli a0, a0, 8
+; RV64I-NEXT: add a0, a6, a0
+; RV64I-NEXT: srliw a0, a0, 24
+; RV64I-NEXT: srli a6, a1, 1
+; RV64I-NEXT: and a3, a6, a3
+; RV64I-NEXT: sub a1, a1, a3
+; RV64I-NEXT: and a3, a1, a2
+; RV64I-NEXT: srli a1, a1, 2
+; RV64I-NEXT: and a1, a1, a2
+; RV64I-NEXT: add a1, a3, a1
+; RV64I-NEXT: srli a2, a1, 4
+; RV64I-NEXT: add a1, a1, a2
+; RV64I-NEXT: and a5, a1, a5
+; RV64I-NEXT: slli a1, a1, 8
+; RV64I-NEXT: and a1, a1, a4
+; RV64I-NEXT: add a1, a5, a1
+; RV64I-NEXT: slli a1, a1, 8
+; RV64I-NEXT: add a1, a5, a1
+; RV64I-NEXT: slli a1, a1, 8
+; RV64I-NEXT: add a1, a5, a1
+; RV64I-NEXT: srliw a1, a1, 24
; RV64I-NEXT: ret
;
; RV64ZBB-LABEL: ctpop_v2i32:
@@ -875,8 +871,6 @@ declare i64 @llvm.ctpop.i64(i64)
define i64 @ctpop_i64(i64 %a) nounwind {
; RV64I-LABEL: ctpop_i64:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: srli a1, a0, 1
; RV64I-NEXT: lui a2, 349525
; RV64I-NEXT: addiw a2, a2, 1365
@@ -899,14 +893,21 @@ define i64 @ctpop_i64(i64 %a) nounwind {
; RV64I-NEXT: slli a2, a1, 32
; RV64I-NEXT: add a1, a1, a2
; RV64I-NEXT: and a0, a0, a1
-; RV64I-NEXT: lui a1, 4112
-; RV64I-NEXT: addiw a1, a1, 257
-; RV64I-NEXT: slli a2, a1, 32
-; RV64I-NEXT: add a1, a1, a2
-; RV64I-NEXT: call __muldi3
+; RV64I-NEXT: slli a1, a0, 8
+; RV64I-NEXT: add a1, a0, a1
+; RV64I-NEXT: slli a1, a1, 8
+; RV64I-NEXT: add a1, a0, a1
+; RV64I-NEXT: slli a1, a1, 8
+; RV64I-NEXT: add a1, a0, a1
+; RV64I-NEXT: slli a1, a1, 8
+; RV64I-NEXT: add a1, a0, a1
+; RV64I-NEXT: slli a1, a1, 8
+; RV64I-NEXT: add a1, a0, a1
+; RV64I-NEXT: slli a1, a1, 8
+; RV64I-NEXT: add a1, a0, a1
+; RV64I-NEXT: slli a1, a1, 8
+; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: srli a0, a0, 56
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
; RV64ZBB-LABEL: ctpop_i64:
@@ -998,66 +999,68 @@ declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>)
define <2 x i64> @ctpop_v2i64(<2 x i64> %a) nounwind {
; RV64I-LABEL: ctpop_v2i64:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -64
-; RV64I-NEXT: sd ra, 56(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 48(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 40(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s2, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s3, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s4, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s5, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: mv s0, a1
-; RV64I-NEXT: srli a1, a0, 1
-; RV64I-NEXT: lui a2, 349525
-; RV64I-NEXT: addiw a2, a2, 1365
-; RV64I-NEXT: slli a3, a2, 32
-; RV64I-NEXT: add s3, a2, a3
-; RV64I-NEXT: and a1, a1, s3
-; RV64I-NEXT: sub a0, a0, a1
-; RV64I-NEXT: lui a1, 209715
-; RV64I-NEXT: addiw a1, a1, 819
-; RV64I-NEXT: slli a2, a1, 32
-; RV64I-NEXT: add s4, a1, a2
-; RV64I-NEXT: and a1, a0, s4
+; RV64I-NEXT: srli a2, a0, 1
+; RV64I-NEXT: lui a3, 349525
+; RV64I-NEXT: addiw a3, a3, 1365
+; RV64I-NEXT: slli a4, a3, 32
+; RV64I-NEXT: add a3, a3, a4
+; RV64I-NEXT: and a2, a2, a3
+; RV64I-NEXT: sub a0, a0, a2
+; RV64I-NEXT: lui a2, 209715
+; RV64I-NEXT: addiw a2, a2, 819
+; RV64I-NEXT: slli a4, a2, 32
+; RV64I-NEXT: add a2, a2, a4
+; RV64I-NEXT: and a4, a0, a2
; RV64I-NEXT: srli a0, a0, 2
-; RV64I-NEXT: and a0, a0, s4
-; RV64I-NEXT: add a0, a1, a0
-; RV64I-NEXT: srli a1, a0, 4
-; RV64I-NEXT: add a0, a0, a1
-; RV64I-NEXT: lui a1, 61681
-; RV64I-NEXT: addiw a1, a1, -241
-; RV64I-NEXT: slli a2, a1, 32
-; RV64I-NEXT: add s5, a1, a2
-; RV64I-NEXT: and a0, a0, s5
-; RV64I-NEXT: lui a1, 4112
-; RV64I-NEXT: addiw s1, a1, 257
-; RV64I-NEXT: slli a1, s1, 32
-; RV64I-NEXT: add s1, s1, a1
-; RV64I-NEXT: mv a1, s1
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: srli s2, a0, 56
-; RV64I-NEXT: srli a0, s0, 1
-; RV64I-NEXT: and a0, a0, s3
-; RV64I-NEXT: sub s0, s0, a0
-; RV64I-NEXT: and a0, s0, s4
-; RV64I-NEXT: srli s0, s0, 2
-; RV64I-NEXT: and a1, s0, s4
-; RV64I-NEXT: add a0, a0, a1
-; RV64I-NEXT: srli a1, a0, 4
-; RV64I-NEXT: add a0, a0, a1
-; RV64I-NEXT: and a0, a0, s5
-; RV64I-NEXT: mv a1, s1
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: srli a1, a0, 56
-; RV64I-NEXT: mv a0, s2
-; RV64I-NEXT: ld ra, 56(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 48(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s2, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s3, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s4, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s5, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 64
+; RV64I-NEXT: and a0, a0, a2
+; RV64I-NEXT: add a0, a4, a0
+; RV64I-NEXT: srli a4, a0, 4
+; RV64I-NEXT: add a0, a0, a4
+; RV64I-NEXT: lui a4, 61681
+; RV64I-NEXT: addiw a4, a4, -241
+; RV64I-NEXT: slli a5, a4, 32
+; RV64I-NEXT: add a4, a4, a5
+; RV64I-NEXT: and a0, a0, a4
+; RV64I-NEXT: slli a5, a0, 8
+; RV64I-NEXT: add a5, a0, a5
+; RV64I-NEXT: slli a5, a5, 8
+; RV64I-NEXT: add a5, a0, a5
+; RV64I-NEXT: slli a5, a5, 8
+; RV64I-NEXT: add a5, a0, a5
+; RV64I-NEXT: slli a5, a5, 8
+; RV64I-NEXT: add a5, a0, a5
+; RV64I-NEXT: slli a5, a5, 8
+; RV64I-NEXT: add a5, a0, a5
+; RV64I-NEXT: slli a5, a5, 8
+; RV64I-NEXT: add a5, a0, a5
+; RV64I-NEXT: slli a5, a5, 8
+; RV64I-NEXT: add a0, a0, a5
+; RV64I-NEXT: srli a0, a0, 56
+; RV64I-NEXT: srli a5, a1, 1
+; RV64I-NEXT: and a3, a5, a3
+; RV64I-NEXT: sub a1, a1, a3
+; RV64I-NEXT: and a3, a1, a2
+; RV64I-NEXT: srli a1, a1, 2
+; RV64I-NEXT: and a1, a1, a2
+; RV64I-NEXT: add a1, a3, a1
+; RV64I-NEXT: srli a2, a1, 4
+; RV64I-NEXT: add a1, a1, a2
+; RV64I-NEXT: and a1, a1, a4
+; RV64I-NEXT: slli a2, a1, 8
+; RV64I-NEXT: add a2, a1, a2
+; RV64I-NEXT: slli a2, a2, 8
+; RV64I-NEXT: add a2, a1, a2
+; RV64I-NEXT: slli a2, a2, 8
+; RV64I-NEXT: add a2, a1, a2
+; RV64I-NEXT: slli a2, a2, 8
+; RV64I-NEXT: add a2, a1, a2
+; RV64I-NEXT: slli a2, a2, 8
+; RV64I-NEXT: add a2, a1, a2
+; RV64I-NEXT: slli a2, a2, 8
+; RV64I-NEXT: add a2, a1, a2
+; RV64I-NEXT: slli a2, a2, 8
+; RV64I-NEXT: add a1, a1, a2
+; RV64I-NEXT: srli a1, a1, 56
; RV64I-NEXT: ret
;
; RV64ZBB-LABEL: ctpop_v2i64:
diff --git a/llvm/test/CodeGen/RISCV/sextw-removal.ll b/llvm/test/CodeGen/RISCV/sextw-removal.ll
index f707cb31e3eced..179aef19c5d48b 100644
--- a/llvm/test/CodeGen/RISCV/sextw-removal.ll
+++ b/llvm/test/CodeGen/RISCV/sextw-removal.ll
@@ -182,8 +182,7 @@ define void @test5(i32 signext %arg, i32 signext %arg1) nounwind {
; RV64I-NEXT: addiw s1, a1, 819
; RV64I-NEXT: lui a1, 61681
; RV64I-NEXT: addi s2, a1, -241
-; RV64I-NEXT: lui a1, 4112
-; RV64I-NEXT: addi s3, a1, 257
+; RV64I-NEXT: addi s3, a1, -256
; RV64I-NEXT: .LBB4_1: # %bb2
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
; RV64I-NEXT: call bar
@@ -197,8 +196,14 @@ define void @test5(i32 signext %arg, i32 signext %arg1) nounwind {
; RV64I-NEXT: add a0, a2, a0
; RV64I-NEXT: srli a2, a0, 4
; RV64I-NEXT: add a0, a0, a2
-; RV64I-NEXT: and a0, a0, s2
-; RV64I-NEXT: mul a0, a0, s3
+; RV64I-NEXT: and a2, a0, s2
+; RV64I-NEXT: slli a0, a0, 8
+; RV64I-NEXT: and a0, a0, s3
+; RV64I-NEXT: add a0, a2, a0
+; RV64I-NEXT: slli a0, a0, 8
+; RV64I-NEXT: add a0, a2, a0
+; RV64I-NEXT: slli a0, a0, 8
+; RV64I-NEXT: add a0, a2, a0
; RV64I-NEXT: srliw a0, a0, 24
; RV64I-NEXT: bnez a1, .LBB4_1
; RV64I-NEXT: # %bb.2: # %bb7
More information about the llvm-commits
mailing list