[llvm] [SDAG] Use shifts if ISD::MUL is illegal when lowering ISD::CTPOP (PR #86505)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 25 06:51:07 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-selectiondag
Author: Wang Pengcheng (wangpc-pp)
<details>
<summary>Changes</summary>
We can avoid libcalls.
Fixes #<!-- -->86205
---
Patch is 110.51 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/86505.diff
11 Files Affected:
- (modified) llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp (+15-5)
- (modified) llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll (+6-3)
- (modified) llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll (+353-485)
- (modified) llvm/test/CodeGen/RISCV/ctz_zero_return_test.ll (+52-62)
- (modified) llvm/test/CodeGen/RISCV/rv32xtheadbb.ll (+43-59)
- (modified) llvm/test/CodeGen/RISCV/rv32zbb.ll (+211-260)
- (modified) llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64xtheadbb.ll (+14-9)
- (modified) llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbb.ll (+28-18)
- (modified) llvm/test/CodeGen/RISCV/rv64xtheadbb.ll (+120-117)
- (modified) llvm/test/CodeGen/RISCV/rv64zbb.ll (+255-252)
- (modified) llvm/test/CodeGen/RISCV/sextw-removal.ll (+9-4)
``````````diff
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 8be03b66e155f6..e0662d57bb4bba 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -8709,11 +8709,21 @@ SDValue TargetLowering::expandCTPOP(SDNode *Node, SelectionDAG &DAG) const {
DAG.getConstant(0xFF, dl, VT));
}
- // v = (v * 0x01010101...) >> (Len - 8)
- SDValue Mask01 =
- DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
- return DAG.getNode(ISD::SRL, dl, VT,
- DAG.getNode(ISD::MUL, dl, VT, Op, Mask01),
+ SDValue V;
+ if (isOperationLegalOrCustomOrPromote(ISD::MUL, VT)) {
+ // v = (v * 0x01010101...) >> (Len - 8)
+ SDValue Mask01 =
+ DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
+ V = DAG.getNode(ISD::MUL, dl, VT, Op, Mask01);
+ } else {
+ V = Op;
+ SDValue ShiftC = DAG.getConstant(8, dl, VT);
+ for (unsigned I = 8; I < Len; I += 8) {
+ V = DAG.getNode(ISD::ADD, dl, VT, Op,
+ DAG.getNode(ISD::SHL, dl, VT, V, ShiftC));
+ }
+ }
+ return DAG.getNode(ISD::SRL, dl, VT, V,
DAG.getConstant(Len - 8, dl, ShVT));
}
diff --git a/llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll b/llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll
index 9fa3f5076bb221..c3731fc4f2e19f 100644
--- a/llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll
+++ b/llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll
@@ -285,9 +285,12 @@ define i32 @test_ctpop_i32(i32 %a) nounwind {
; LA64-NEXT: lu12i.w $a1, 61680
; LA64-NEXT: ori $a1, $a1, 3855
; LA64-NEXT: and $a0, $a0, $a1
-; LA64-NEXT: lu12i.w $a1, 4112
-; LA64-NEXT: ori $a1, $a1, 257
-; LA64-NEXT: mul.d $a0, $a0, $a1
+; LA64-NEXT: slli.d $a1, $a0, 8
+; LA64-NEXT: add.d $a1, $a0, $a1
+; LA64-NEXT: slli.d $a1, $a1, 8
+; LA64-NEXT: add.d $a1, $a0, $a1
+; LA64-NEXT: slli.d $a1, $a1, 8
+; LA64-NEXT: add.d $a0, $a0, $a1
; LA64-NEXT: bstrpick.d $a0, $a0, 31, 24
; LA64-NEXT: ret
%1 = call i32 @llvm.ctpop.i32(i32 %a)
diff --git a/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll b/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll
index 455e6e54c9b396..8533a1d73544f3 100644
--- a/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll
+++ b/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll
@@ -1160,8 +1160,6 @@ define i32 @test_ctlz_i32(i32 %a) nounwind {
; RV32I: # %bb.0:
; RV32I-NEXT: beqz a0, .LBB10_2
; RV32I-NEXT: # %bb.1: # %cond.false
-; RV32I-NEXT: addi sp, sp, -16
-; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: srli a1, a0, 1
; RV32I-NEXT: or a0, a0, a1
; RV32I-NEXT: srli a1, a0, 2
@@ -1189,61 +1187,63 @@ define i32 @test_ctlz_i32(i32 %a) nounwind {
; RV32I-NEXT: lui a1, 61681
; RV32I-NEXT: addi a1, a1, -241
; RV32I-NEXT: and a0, a0, a1
-; RV32I-NEXT: lui a1, 4112
-; RV32I-NEXT: addi a1, a1, 257
-; RV32I-NEXT: call __mulsi3
+; RV32I-NEXT: slli a1, a0, 8
+; RV32I-NEXT: add a1, a0, a1
+; RV32I-NEXT: slli a1, a1, 8
+; RV32I-NEXT: add a1, a0, a1
+; RV32I-NEXT: slli a1, a1, 8
+; RV32I-NEXT: add a0, a0, a1
; RV32I-NEXT: srli a0, a0, 24
-; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
; RV32I-NEXT: .LBB10_2:
; RV32I-NEXT: li a0, 32
; RV32I-NEXT: ret
;
-; RV64I-LABEL: test_ctlz_i32:
-; RV64I: # %bb.0:
-; RV64I-NEXT: sext.w a1, a0
-; RV64I-NEXT: beqz a1, .LBB10_2
-; RV64I-NEXT: # %bb.1: # %cond.false
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: srliw a1, a0, 1
-; RV64I-NEXT: or a0, a0, a1
-; RV64I-NEXT: srliw a1, a0, 2
-; RV64I-NEXT: or a0, a0, a1
-; RV64I-NEXT: srliw a1, a0, 4
-; RV64I-NEXT: or a0, a0, a1
-; RV64I-NEXT: srliw a1, a0, 8
-; RV64I-NEXT: or a0, a0, a1
-; RV64I-NEXT: srliw a1, a0, 16
-; RV64I-NEXT: or a0, a0, a1
-; RV64I-NEXT: not a0, a0
-; RV64I-NEXT: srli a1, a0, 1
-; RV64I-NEXT: lui a2, 349525
-; RV64I-NEXT: addiw a2, a2, 1365
-; RV64I-NEXT: and a1, a1, a2
-; RV64I-NEXT: sub a0, a0, a1
-; RV64I-NEXT: lui a1, 209715
-; RV64I-NEXT: addiw a1, a1, 819
-; RV64I-NEXT: and a2, a0, a1
-; RV64I-NEXT: srli a0, a0, 2
-; RV64I-NEXT: and a0, a0, a1
-; RV64I-NEXT: add a0, a2, a0
-; RV64I-NEXT: srli a1, a0, 4
-; RV64I-NEXT: add a0, a0, a1
-; RV64I-NEXT: lui a1, 61681
-; RV64I-NEXT: addiw a1, a1, -241
-; RV64I-NEXT: and a0, a0, a1
-; RV64I-NEXT: lui a1, 4112
-; RV64I-NEXT: addiw a1, a1, 257
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: srliw a0, a0, 24
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
-; RV64I-NEXT: ret
-; RV64I-NEXT: .LBB10_2:
-; RV64I-NEXT: li a0, 32
-; RV64I-NEXT: ret
+; RV64NOZBB-LABEL: test_ctlz_i32:
+; RV64NOZBB: # %bb.0:
+; RV64NOZBB-NEXT: sext.w a1, a0
+; RV64NOZBB-NEXT: beqz a1, .LBB10_2
+; RV64NOZBB-NEXT: # %bb.1: # %cond.false
+; RV64NOZBB-NEXT: srliw a1, a0, 1
+; RV64NOZBB-NEXT: or a0, a0, a1
+; RV64NOZBB-NEXT: srliw a1, a0, 2
+; RV64NOZBB-NEXT: or a0, a0, a1
+; RV64NOZBB-NEXT: srliw a1, a0, 4
+; RV64NOZBB-NEXT: or a0, a0, a1
+; RV64NOZBB-NEXT: srliw a1, a0, 8
+; RV64NOZBB-NEXT: or a0, a0, a1
+; RV64NOZBB-NEXT: srliw a1, a0, 16
+; RV64NOZBB-NEXT: or a0, a0, a1
+; RV64NOZBB-NEXT: not a0, a0
+; RV64NOZBB-NEXT: srli a1, a0, 1
+; RV64NOZBB-NEXT: lui a2, 349525
+; RV64NOZBB-NEXT: addiw a2, a2, 1365
+; RV64NOZBB-NEXT: and a1, a1, a2
+; RV64NOZBB-NEXT: sub a0, a0, a1
+; RV64NOZBB-NEXT: lui a1, 209715
+; RV64NOZBB-NEXT: addiw a1, a1, 819
+; RV64NOZBB-NEXT: and a2, a0, a1
+; RV64NOZBB-NEXT: srli a0, a0, 2
+; RV64NOZBB-NEXT: and a0, a0, a1
+; RV64NOZBB-NEXT: add a0, a2, a0
+; RV64NOZBB-NEXT: srli a1, a0, 4
+; RV64NOZBB-NEXT: add a0, a0, a1
+; RV64NOZBB-NEXT: lui a1, 61681
+; RV64NOZBB-NEXT: addi a2, a1, -241
+; RV64NOZBB-NEXT: and a2, a0, a2
+; RV64NOZBB-NEXT: slli a0, a0, 8
+; RV64NOZBB-NEXT: addi a1, a1, -256
+; RV64NOZBB-NEXT: and a0, a0, a1
+; RV64NOZBB-NEXT: add a0, a2, a0
+; RV64NOZBB-NEXT: slli a0, a0, 8
+; RV64NOZBB-NEXT: add a0, a2, a0
+; RV64NOZBB-NEXT: slli a0, a0, 8
+; RV64NOZBB-NEXT: add a0, a2, a0
+; RV64NOZBB-NEXT: srliw a0, a0, 24
+; RV64NOZBB-NEXT: ret
+; RV64NOZBB-NEXT: .LBB10_2:
+; RV64NOZBB-NEXT: li a0, 32
+; RV64NOZBB-NEXT: ret
;
; RV32M-LABEL: test_ctlz_i32:
; RV32M: # %bb.0:
@@ -1285,47 +1285,6 @@ define i32 @test_ctlz_i32(i32 %a) nounwind {
; RV32M-NEXT: li a0, 32
; RV32M-NEXT: ret
;
-; RV64M-LABEL: test_ctlz_i32:
-; RV64M: # %bb.0:
-; RV64M-NEXT: sext.w a1, a0
-; RV64M-NEXT: beqz a1, .LBB10_2
-; RV64M-NEXT: # %bb.1: # %cond.false
-; RV64M-NEXT: srliw a1, a0, 1
-; RV64M-NEXT: or a0, a0, a1
-; RV64M-NEXT: srliw a1, a0, 2
-; RV64M-NEXT: or a0, a0, a1
-; RV64M-NEXT: srliw a1, a0, 4
-; RV64M-NEXT: or a0, a0, a1
-; RV64M-NEXT: srliw a1, a0, 8
-; RV64M-NEXT: or a0, a0, a1
-; RV64M-NEXT: srliw a1, a0, 16
-; RV64M-NEXT: or a0, a0, a1
-; RV64M-NEXT: not a0, a0
-; RV64M-NEXT: srli a1, a0, 1
-; RV64M-NEXT: lui a2, 349525
-; RV64M-NEXT: addiw a2, a2, 1365
-; RV64M-NEXT: and a1, a1, a2
-; RV64M-NEXT: sub a0, a0, a1
-; RV64M-NEXT: lui a1, 209715
-; RV64M-NEXT: addiw a1, a1, 819
-; RV64M-NEXT: and a2, a0, a1
-; RV64M-NEXT: srli a0, a0, 2
-; RV64M-NEXT: and a0, a0, a1
-; RV64M-NEXT: add a0, a2, a0
-; RV64M-NEXT: srli a1, a0, 4
-; RV64M-NEXT: add a0, a0, a1
-; RV64M-NEXT: lui a1, 61681
-; RV64M-NEXT: addi a1, a1, -241
-; RV64M-NEXT: and a0, a0, a1
-; RV64M-NEXT: lui a1, 4112
-; RV64M-NEXT: addi a1, a1, 257
-; RV64M-NEXT: mul a0, a0, a1
-; RV64M-NEXT: srliw a0, a0, 24
-; RV64M-NEXT: ret
-; RV64M-NEXT: .LBB10_2:
-; RV64M-NEXT: li a0, 32
-; RV64M-NEXT: ret
-;
; RV32ZBB-LABEL: test_ctlz_i32:
; RV32ZBB: # %bb.0:
; RV32ZBB-NEXT: clz a0, a0
@@ -1354,19 +1313,16 @@ define i32 @test_ctlz_i32(i32 %a) nounwind {
define i64 @test_ctlz_i64(i64 %a) nounwind {
; RV32I-LABEL: test_ctlz_i64:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s6, 0(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s0, a1
-; RV32I-NEXT: mv s2, a0
-; RV32I-NEXT: srli a0, a1, 1
-; RV32I-NEXT: or a0, a1, a0
+; RV32I-NEXT: lui a2, 349525
+; RV32I-NEXT: addi a4, a2, 1365
+; RV32I-NEXT: lui a2, 209715
+; RV32I-NEXT: addi a3, a2, 819
+; RV32I-NEXT: lui a2, 61681
+; RV32I-NEXT: addi a2, a2, -241
+; RV32I-NEXT: bnez a1, .LBB11_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: srli a1, a0, 1
+; RV32I-NEXT: or a0, a0, a1
; RV32I-NEXT: srli a1, a0, 2
; RV32I-NEXT: or a0, a0, a1
; RV32I-NEXT: srli a1, a0, 4
@@ -1377,28 +1333,28 @@ define i64 @test_ctlz_i64(i64 %a) nounwind {
; RV32I-NEXT: or a0, a0, a1
; RV32I-NEXT: not a0, a0
; RV32I-NEXT: srli a1, a0, 1
-; RV32I-NEXT: lui a2, 349525
-; RV32I-NEXT: addi s4, a2, 1365
-; RV32I-NEXT: and a1, a1, s4
+; RV32I-NEXT: and a1, a1, a4
; RV32I-NEXT: sub a0, a0, a1
-; RV32I-NEXT: lui a1, 209715
-; RV32I-NEXT: addi s5, a1, 819
-; RV32I-NEXT: and a1, a0, s5
+; RV32I-NEXT: and a1, a0, a3
; RV32I-NEXT: srli a0, a0, 2
-; RV32I-NEXT: and a0, a0, s5
+; RV32I-NEXT: and a0, a0, a3
; RV32I-NEXT: add a0, a1, a0
; RV32I-NEXT: srli a1, a0, 4
; RV32I-NEXT: add a0, a0, a1
-; RV32I-NEXT: lui a1, 61681
-; RV32I-NEXT: addi s6, a1, -241
-; RV32I-NEXT: and a0, a0, s6
-; RV32I-NEXT: lui a1, 4112
-; RV32I-NEXT: addi s3, a1, 257
-; RV32I-NEXT: mv a1, s3
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: srli a0, s2, 1
-; RV32I-NEXT: or a0, s2, a0
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: slli a1, a0, 8
+; RV32I-NEXT: add a1, a0, a1
+; RV32I-NEXT: slli a1, a1, 8
+; RV32I-NEXT: add a1, a0, a1
+; RV32I-NEXT: slli a1, a1, 8
+; RV32I-NEXT: add a0, a0, a1
+; RV32I-NEXT: srli a0, a0, 24
+; RV32I-NEXT: addi a0, a0, 32
+; RV32I-NEXT: li a1, 0
+; RV32I-NEXT: ret
+; RV32I-NEXT: .LBB11_2:
+; RV32I-NEXT: srli a0, a1, 1
+; RV32I-NEXT: or a0, a1, a0
; RV32I-NEXT: srli a1, a0, 2
; RV32I-NEXT: or a0, a0, a1
; RV32I-NEXT: srli a1, a0, 4
@@ -1409,43 +1365,29 @@ define i64 @test_ctlz_i64(i64 %a) nounwind {
; RV32I-NEXT: or a0, a0, a1
; RV32I-NEXT: not a0, a0
; RV32I-NEXT: srli a1, a0, 1
-; RV32I-NEXT: and a1, a1, s4
+; RV32I-NEXT: and a1, a1, a4
; RV32I-NEXT: sub a0, a0, a1
-; RV32I-NEXT: and a1, a0, s5
+; RV32I-NEXT: and a1, a0, a3
; RV32I-NEXT: srli a0, a0, 2
-; RV32I-NEXT: and a0, a0, s5
+; RV32I-NEXT: and a0, a0, a3
; RV32I-NEXT: add a0, a1, a0
; RV32I-NEXT: srli a1, a0, 4
; RV32I-NEXT: add a0, a0, a1
-; RV32I-NEXT: and a0, a0, s6
-; RV32I-NEXT: mv a1, s3
-; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: bnez s0, .LBB11_2
-; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: slli a1, a0, 8
+; RV32I-NEXT: add a1, a0, a1
+; RV32I-NEXT: slli a1, a1, 8
+; RV32I-NEXT: add a1, a0, a1
+; RV32I-NEXT: slli a1, a1, 8
+; RV32I-NEXT: add a0, a0, a1
; RV32I-NEXT: srli a0, a0, 24
-; RV32I-NEXT: addi a0, a0, 32
-; RV32I-NEXT: j .LBB11_3
-; RV32I-NEXT: .LBB11_2:
-; RV32I-NEXT: srli a0, s1, 24
-; RV32I-NEXT: .LBB11_3:
; RV32I-NEXT: li a1, 0
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s6, 0(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_ctlz_i64:
; RV64I: # %bb.0:
; RV64I-NEXT: beqz a0, .LBB11_2
; RV64I-NEXT: # %bb.1: # %cond.false
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: srli a1, a0, 1
; RV64I-NEXT: or a0, a0, a1
; RV64I-NEXT: srli a1, a0, 2
@@ -1481,14 +1423,21 @@ define i64 @test_ctlz_i64(i64 %a) nounwind {
; RV64I-NEXT: slli a2, a1, 32
; RV64I-NEXT: add a1, a1, a2
; RV64I-NEXT: and a0, a0, a1
-; RV64I-NEXT: lui a1, 4112
-; RV64I-NEXT: addiw a1, a1, 257
-; RV64I-NEXT: slli a2, a1, 32
-; RV64I-NEXT: add a1, a1, a2
-; RV64I-NEXT: call __muldi3
+; RV64I-NEXT: slli a1, a0, 8
+; RV64I-NEXT: add a1, a0, a1
+; RV64I-NEXT: slli a1, a1, 8
+; RV64I-NEXT: add a1, a0, a1
+; RV64I-NEXT: slli a1, a1, 8
+; RV64I-NEXT: add a1, a0, a1
+; RV64I-NEXT: slli a1, a1, 8
+; RV64I-NEXT: add a1, a0, a1
+; RV64I-NEXT: slli a1, a1, 8
+; RV64I-NEXT: add a1, a0, a1
+; RV64I-NEXT: slli a1, a1, 8
+; RV64I-NEXT: add a1, a0, a1
+; RV64I-NEXT: slli a1, a1, 8
+; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: srli a0, a0, 56
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
; RV64I-NEXT: .LBB11_2:
; RV64I-NEXT: li a0, 64
@@ -1831,8 +1780,6 @@ define i16 @test_ctlz_i16_zero_undef(i16 %a) nounwind {
define i32 @test_ctlz_i32_zero_undef(i32 %a) nounwind {
; RV32I-LABEL: test_ctlz_i32_zero_undef:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -16
-; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: srli a1, a0, 1
; RV32I-NEXT: or a0, a0, a1
; RV32I-NEXT: srli a1, a0, 2
@@ -1860,52 +1807,54 @@ define i32 @test_ctlz_i32_zero_undef(i32 %a) nounwind {
; RV32I-NEXT: lui a1, 61681
; RV32I-NEXT: addi a1, a1, -241
; RV32I-NEXT: and a0, a0, a1
-; RV32I-NEXT: lui a1, 4112
-; RV32I-NEXT: addi a1, a1, 257
-; RV32I-NEXT: call __mulsi3
+; RV32I-NEXT: slli a1, a0, 8
+; RV32I-NEXT: add a1, a0, a1
+; RV32I-NEXT: slli a1, a1, 8
+; RV32I-NEXT: add a1, a0, a1
+; RV32I-NEXT: slli a1, a1, 8
+; RV32I-NEXT: add a0, a0, a1
; RV32I-NEXT: srli a0, a0, 24
-; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV64I-LABEL: test_ctlz_i32_zero_undef:
-; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -16
-; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: srliw a1, a0, 1
-; RV64I-NEXT: or a0, a0, a1
-; RV64I-NEXT: srliw a1, a0, 2
-; RV64I-NEXT: or a0, a0, a1
-; RV64I-NEXT: srliw a1, a0, 4
-; RV64I-NEXT: or a0, a0, a1
-; RV64I-NEXT: srliw a1, a0, 8
-; RV64I-NEXT: or a0, a0, a1
-; RV64I-NEXT: srliw a1, a0, 16
-; RV64I-NEXT: or a0, a0, a1
-; RV64I-NEXT: not a0, a0
-; RV64I-NEXT: srli a1, a0, 1
-; RV64I-NEXT: lui a2, 349525
-; RV64I-NEXT: addiw a2, a2, 1365
-; RV64I-NEXT: and a1, a1, a2
-; RV64I-NEXT: sub a0, a0, a1
-; RV64I-NEXT: lui a1, 209715
-; RV64I-NEXT: addiw a1, a1, 819
-; RV64I-NEXT: and a2, a0, a1
-; RV64I-NEXT: srli a0, a0, 2
-; RV64I-NEXT: and a0, a0, a1
-; RV64I-NEXT: add a0, a2, a0
-; RV64I-NEXT: srli a1, a0, 4
-; RV64I-NEXT: add a0, a0, a1
-; RV64I-NEXT: lui a1, 61681
-; RV64I-NEXT: addiw a1, a1, -241
-; RV64I-NEXT: and a0, a0, a1
-; RV64I-NEXT: lui a1, 4112
-; RV64I-NEXT: addiw a1, a1, 257
-; RV64I-NEXT: call __muldi3
-; RV64I-NEXT: srliw a0, a0, 24
-; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 16
-; RV64I-NEXT: ret
+; RV64NOZBB-LABEL: test_ctlz_i32_zero_undef:
+; RV64NOZBB: # %bb.0:
+; RV64NOZBB-NEXT: srliw a1, a0, 1
+; RV64NOZBB-NEXT: or a0, a0, a1
+; RV64NOZBB-NEXT: srliw a1, a0, 2
+; RV64NOZBB-NEXT: or a0, a0, a1
+; RV64NOZBB-NEXT: srliw a1, a0, 4
+; RV64NOZBB-NEXT: or a0, a0, a1
+; RV64NOZBB-NEXT: srliw a1, a0, 8
+; RV64NOZBB-NEXT: or a0, a0, a1
+; RV64NOZBB-NEXT: srliw a1, a0, 16
+; RV64NOZBB-NEXT: or a0, a0, a1
+; RV64NOZBB-NEXT: not a0, a0
+; RV64NOZBB-NEXT: srli a1, a0, 1
+; RV64NOZBB-NEXT: lui a2, 349525
+; RV64NOZBB-NEXT: addiw a2, a2, 1365
+; RV64NOZBB-NEXT: and a1, a1, a2
+; RV64NOZBB-NEXT: sub a0, a0, a1
+; RV64NOZBB-NEXT: lui a1, 209715
+; RV64NOZBB-NEXT: addiw a1, a1, 819
+; RV64NOZBB-NEXT: and a2, a0, a1
+; RV64NOZBB-NEXT: srli a0, a0, 2
+; RV64NOZBB-NEXT: and a0, a0, a1
+; RV64NOZBB-NEXT: add a0, a2, a0
+; RV64NOZBB-NEXT: srli a1, a0, 4
+; RV64NOZBB-NEXT: add a0, a0, a1
+; RV64NOZBB-NEXT: lui a1, 61681
+; RV64NOZBB-NEXT: addi a2, a1, -241
+; RV64NOZBB-NEXT: and a2, a0, a2
+; RV64NOZBB-NEXT: slli a0, a0, 8
+; RV64NOZBB-NEXT: addi a1, a1, -256
+; RV64NOZBB-NEXT: and a0, a0, a1
+; RV64NOZBB-NEXT: add a0, a2, a0
+; RV64NOZBB-NEXT: slli a0, a0, 8
+; RV64NOZBB-NEXT: add a0, a2, a0
+; RV64NOZBB-NEXT: slli a0, a0, 8
+; RV64NOZBB-NEXT: add a0, a2, a0
+; RV64NOZBB-NEXT: srliw a0, a0, 24
+; RV64NOZBB-NEXT: ret
;
; RV32M-LABEL: test_ctlz_i32_zero_undef:
; RV32M: # %bb.0:
@@ -1942,41 +1891,6 @@ define i32 @test_ctlz_i32_zero_undef(i32 %a) nounwind {
; RV32M-NEXT: srli a0, a0, 24
; RV32M-NEXT: ret
;
-; RV64M-LABEL: test_ctlz_i32_zero_undef:
-; RV64M: # %bb.0:
-; RV64M-NEXT: srliw a1, a0, 1
-; RV64M-NEXT: or a0, a0, a1
-; RV64M-NEXT: srliw a1, a0, 2
-; RV64M-NEXT: or a0, a0, a1
-; RV64M-NEXT: srliw a1, a0, 4
-; RV64M-NEXT: or a0, a0, a1
-; RV64M-NEXT: srliw a1, a0, 8
-; RV64M-NEXT: or a0, a0, a1
-; RV64M-NEXT: srliw a1, a0, 16
-; RV64M-NEXT: or a0, a0, a1
-; RV64M-NEXT: not a0, a0
-; RV64M-NEXT: srli a1, a0, 1
-; RV64M-NEXT: lui a2, 349525
-; RV64M-NEXT: addiw a2, a2, 1365
-; RV64M-NEXT: and a1, a1, a2
-; RV64M-NEXT: sub a0, a0, a1
-; RV64M-NEXT: lui a1, 209715
-; RV64M-NEXT: addiw a1, a1, 819
-; RV64M-NEXT: and a2, a0, a1
-; RV64M-NEXT: srli a0, a0, 2
-; RV64M-NEXT: and a0, a0, a1
-; RV64M-NEXT: add a0, a2, a0
-; RV64M-NEXT: srli a1, a0, 4
-; RV64M-NEXT: add a0, a0, a1
-; RV64M-NEXT: lui a1, 61681
-; RV64M-NEXT: addi a1, a1, -241
-; RV64M-NEXT: and a0, a0, a1
-; RV64M-NEXT: lui a1, 4112
-; RV64M-NEXT: addi a1, a1, 257
-; RV64M-NEXT: mul a0, a0, a1
-; RV64M-NEXT: srliw a0, a0, 24
-; RV64M-NEXT: ret
-;
; RV32ZBB-LABEL: test_ctlz_i32_zero_undef:
; RV32ZBB: # %bb.0:
; RV32ZBB-NEXT: clz a0, a0
@@ -2005,19 +1919,16 @@ define i32 @test_ctlz_i32_zero_undef(i32 %a) nounwind {
define i64 @test_ctlz_i64_zero_undef(i64 %a) nounwind {
; RV32I-LABEL: test_ctlz_i64_zero_undef:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s6, 0(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s0, a1
-; RV32I-NEXT: mv s2, a0
-; RV32I-NEXT: srli a0, a1, 1
-; RV32I-NEXT: or a0, a1, a0
+; RV32I-NEXT: lui a2, 349525
+; RV32I-NEXT: addi a4, a2, 1365
+; RV32I-NEXT: lui a2, 209715
+; RV32I-NEXT: addi a3, a2, 819
+; RV32I-NEXT: lui a2, 61681
+; RV32I-NEXT: addi a2, a2, -241
+; RV32I-NEXT: bnez a1, .LBB15_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: srli a1, a0, 1
+; RV32I-NEXT: or a0, a0, a1
; RV32I-NEXT: srli a1, a0, 2
; RV32I-NEXT: or a0, a0, a1
; RV32I-NEXT: srli a1, a0, 4
@@ -2028,28 +1939,28 @@ define i64 @test_ctlz_i64_zero_undef(i64 %a) nounwind {
; RV32I-NEXT: or a0, a0, a1
; RV32I-NEXT: not a0, a0
; RV32I-NEXT: srli a1, a0, 1
-; RV32I-NEXT: lui a2, 349525
-; RV32I-NEXT: addi s4, a2, 1365
-; RV32I-NEXT: and a1, a1, s4
+; RV32I-NEXT: and a1, a1, a4
; RV32I-NEXT: sub a...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/86505
More information about the llvm-commits
mailing list