[llvm] [ARM][RISCV] Partially revert #101786 (PR #137120)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Apr 23 23:31:40 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-arm
Author: Sergei Barannikov (s-barannikov)
<details>
<summary>Changes</summary>
The change as is breaks the Linux kernel build as pointed out in the comments.
---
Patch is 83.81 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/137120.diff
13 Files Affected:
- (modified) llvm/lib/Target/ARM/ARMISelLowering.cpp (+4-2)
- (modified) llvm/lib/Target/RISCV/RISCVISelLowering.cpp (+5-3)
- (modified) llvm/test/CodeGen/ARM/popcnt.ll (+60-6)
- (modified) llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll (+886-201)
- (modified) llvm/test/CodeGen/RISCV/ctz_zero_return_test.ll (+84-34)
- (modified) llvm/test/CodeGen/RISCV/pr56457.ll (+35-14)
- (modified) llvm/test/CodeGen/RISCV/pr95271.ll (+22-1)
- (modified) llvm/test/CodeGen/RISCV/rv32xtheadbb.ll (+73-31)
- (modified) llvm/test/CodeGen/RISCV/rv32zbb.ll (+245-85)
- (modified) llvm/test/CodeGen/RISCV/rv64xtheadbb.ll (+39-15)
- (modified) llvm/test/CodeGen/RISCV/rv64zbb.ll (+114-35)
- (modified) llvm/test/CodeGen/RISCV/sextw-removal.ll (+40-6)
- (modified) llvm/test/CodeGen/Thumb2/mve-ctpop.ll (+49-13)
``````````diff
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index bdebd842b011c..03364d9025208 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -1221,8 +1221,10 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::ROTR, VT, Expand);
}
setOperationAction(ISD::CTTZ, MVT::i32, Custom);
- setOperationAction(ISD::CTPOP, MVT::i32, LibCall);
- setOperationAction(ISD::CTPOP, MVT::i64, LibCall);
+ // TODO: These two should be set to LibCall, but this currently breaks
+ // the Linux kernel build. See #101786.
+ setOperationAction(ISD::CTPOP, MVT::i32, Expand);
+ setOperationAction(ISD::CTPOP, MVT::i64, Expand);
if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only()) {
setOperationAction(ISD::CTLZ, MVT::i32, Expand);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, LibCall);
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 099ba5c9943ac..02451ee716865 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -396,11 +396,13 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction({ISD::CTTZ, ISD::CTTZ_ZERO_UNDEF}, MVT::i32, Custom);
} else {
setOperationAction(ISD::CTTZ, XLenVT, Expand);
+ // TODO: These should be set to LibCall, but this currently breaks
+ // the Linux kernel build. See #101786. Lacks i128 tests, too.
if (Subtarget.is64Bit())
- setOperationAction(ISD::CTPOP, MVT::i128, LibCall);
+ setOperationAction(ISD::CTPOP, MVT::i128, Expand);
else
- setOperationAction(ISD::CTPOP, MVT::i32, LibCall);
- setOperationAction(ISD::CTPOP, MVT::i64, LibCall);
+ setOperationAction(ISD::CTPOP, MVT::i32, Expand);
+ setOperationAction(ISD::CTPOP, MVT::i64, Expand);
}
if (Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() ||
diff --git a/llvm/test/CodeGen/ARM/popcnt.ll b/llvm/test/CodeGen/ARM/popcnt.ll
index fc4387320ef77..a70fdc580ca9b 100644
--- a/llvm/test/CodeGen/ARM/popcnt.ll
+++ b/llvm/test/CodeGen/ARM/popcnt.ll
@@ -324,7 +324,30 @@ define i32 @ctpop16(i16 %x) nounwind readnone {
define i32 @ctpop32(i32 %x) nounwind readnone {
; CHECK-LABEL: ctpop32:
; CHECK: @ %bb.0:
-; CHECK-NEXT: b __popcountsi2
+; CHECK-NEXT: ldr r1, .LCPI22_0
+; CHECK-NEXT: ldr r2, .LCPI22_3
+; CHECK-NEXT: and r1, r1, r0, lsr #1
+; CHECK-NEXT: ldr r12, .LCPI22_1
+; CHECK-NEXT: sub r0, r0, r1
+; CHECK-NEXT: ldr r3, .LCPI22_2
+; CHECK-NEXT: and r1, r0, r2
+; CHECK-NEXT: and r0, r2, r0, lsr #2
+; CHECK-NEXT: add r0, r1, r0
+; CHECK-NEXT: add r0, r0, r0, lsr #4
+; CHECK-NEXT: and r0, r0, r12
+; CHECK-NEXT: mul r1, r0, r3
+; CHECK-NEXT: lsr r0, r1, #24
+; CHECK-NEXT: mov pc, lr
+; CHECK-NEXT: .p2align 2
+; CHECK-NEXT: @ %bb.1:
+; CHECK-NEXT: .LCPI22_0:
+; CHECK-NEXT: .long 1431655765 @ 0x55555555
+; CHECK-NEXT: .LCPI22_1:
+; CHECK-NEXT: .long 252645135 @ 0xf0f0f0f
+; CHECK-NEXT: .LCPI22_2:
+; CHECK-NEXT: .long 16843009 @ 0x1010101
+; CHECK-NEXT: .LCPI22_3:
+; CHECK-NEXT: .long 858993459 @ 0x33333333
%count = tail call i32 @llvm.ctpop.i32(i32 %x)
ret i32 %count
}
@@ -332,12 +355,43 @@ define i32 @ctpop32(i32 %x) nounwind readnone {
define i64 @ctpop64(i64 %x) nounwind readnone {
; CHECK-LABEL: ctpop64:
; CHECK: @ %bb.0:
-; CHECK-NEXT: .save {r11, lr}
-; CHECK-NEXT: push {r11, lr}
-; CHECK-NEXT: bl __popcountdi2
-; CHECK-NEXT: asr r1, r0, #31
-; CHECK-NEXT: pop {r11, lr}
+; CHECK-NEXT: .save {r4, lr}
+; CHECK-NEXT: push {r4, lr}
+; CHECK-NEXT: ldr r2, .LCPI23_0
+; CHECK-NEXT: ldr r3, .LCPI23_3
+; CHECK-NEXT: and r4, r2, r0, lsr #1
+; CHECK-NEXT: and r2, r2, r1, lsr #1
+; CHECK-NEXT: sub r0, r0, r4
+; CHECK-NEXT: sub r1, r1, r2
+; CHECK-NEXT: and r4, r0, r3
+; CHECK-NEXT: and r2, r1, r3
+; CHECK-NEXT: and r0, r3, r0, lsr #2
+; CHECK-NEXT: and r1, r3, r1, lsr #2
+; CHECK-NEXT: add r0, r4, r0
+; CHECK-NEXT: ldr lr, .LCPI23_1
+; CHECK-NEXT: add r1, r2, r1
+; CHECK-NEXT: ldr r12, .LCPI23_2
+; CHECK-NEXT: add r0, r0, r0, lsr #4
+; CHECK-NEXT: and r0, r0, lr
+; CHECK-NEXT: add r1, r1, r1, lsr #4
+; CHECK-NEXT: mul r2, r0, r12
+; CHECK-NEXT: and r0, r1, lr
+; CHECK-NEXT: mul r1, r0, r12
+; CHECK-NEXT: lsr r0, r2, #24
+; CHECK-NEXT: add r0, r0, r1, lsr #24
+; CHECK-NEXT: mov r1, #0
+; CHECK-NEXT: pop {r4, lr}
; CHECK-NEXT: mov pc, lr
+; CHECK-NEXT: .p2align 2
+; CHECK-NEXT: @ %bb.1:
+; CHECK-NEXT: .LCPI23_0:
+; CHECK-NEXT: .long 1431655765 @ 0x55555555
+; CHECK-NEXT: .LCPI23_1:
+; CHECK-NEXT: .long 252645135 @ 0xf0f0f0f
+; CHECK-NEXT: .LCPI23_2:
+; CHECK-NEXT: .long 16843009 @ 0x1010101
+; CHECK-NEXT: .LCPI23_3:
+; CHECK-NEXT: .long 858993459 @ 0x33333333
%count = tail call i64 @llvm.ctpop.i64(i64 %x)
ret i64 %count
}
diff --git a/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll b/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll
index f8c3a75f844db..a46168f114bb9 100644
--- a/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll
+++ b/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll
@@ -1156,30 +1156,46 @@ define i16 @test_ctlz_i16(i16 %a) nounwind {
}
define i32 @test_ctlz_i32(i32 %a) nounwind {
-; RV32_NOZBB-LABEL: test_ctlz_i32:
-; RV32_NOZBB: # %bb.0:
-; RV32_NOZBB-NEXT: beqz a0, .LBB10_2
-; RV32_NOZBB-NEXT: # %bb.1: # %cond.false
-; RV32_NOZBB-NEXT: addi sp, sp, -16
-; RV32_NOZBB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32_NOZBB-NEXT: srli a1, a0, 1
-; RV32_NOZBB-NEXT: or a0, a0, a1
-; RV32_NOZBB-NEXT: srli a1, a0, 2
-; RV32_NOZBB-NEXT: or a0, a0, a1
-; RV32_NOZBB-NEXT: srli a1, a0, 4
-; RV32_NOZBB-NEXT: or a0, a0, a1
-; RV32_NOZBB-NEXT: srli a1, a0, 8
-; RV32_NOZBB-NEXT: or a0, a0, a1
-; RV32_NOZBB-NEXT: srli a1, a0, 16
-; RV32_NOZBB-NEXT: or a0, a0, a1
-; RV32_NOZBB-NEXT: not a0, a0
-; RV32_NOZBB-NEXT: call __popcountsi2
-; RV32_NOZBB-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32_NOZBB-NEXT: addi sp, sp, 16
-; RV32_NOZBB-NEXT: ret
-; RV32_NOZBB-NEXT: .LBB10_2:
-; RV32_NOZBB-NEXT: li a0, 32
-; RV32_NOZBB-NEXT: ret
+; RV32I-LABEL: test_ctlz_i32:
+; RV32I: # %bb.0:
+; RV32I-NEXT: beqz a0, .LBB10_2
+; RV32I-NEXT: # %bb.1: # %cond.false
+; RV32I-NEXT: srli a1, a0, 1
+; RV32I-NEXT: lui a2, 349525
+; RV32I-NEXT: or a0, a0, a1
+; RV32I-NEXT: addi a1, a2, 1365
+; RV32I-NEXT: srli a2, a0, 2
+; RV32I-NEXT: or a0, a0, a2
+; RV32I-NEXT: srli a2, a0, 4
+; RV32I-NEXT: or a0, a0, a2
+; RV32I-NEXT: srli a2, a0, 8
+; RV32I-NEXT: or a0, a0, a2
+; RV32I-NEXT: srli a2, a0, 16
+; RV32I-NEXT: or a0, a0, a2
+; RV32I-NEXT: not a0, a0
+; RV32I-NEXT: srli a2, a0, 1
+; RV32I-NEXT: and a1, a2, a1
+; RV32I-NEXT: lui a2, 209715
+; RV32I-NEXT: addi a2, a2, 819
+; RV32I-NEXT: sub a0, a0, a1
+; RV32I-NEXT: and a1, a0, a2
+; RV32I-NEXT: srli a0, a0, 2
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: lui a2, 61681
+; RV32I-NEXT: add a0, a1, a0
+; RV32I-NEXT: srli a1, a0, 4
+; RV32I-NEXT: add a0, a0, a1
+; RV32I-NEXT: addi a1, a2, -241
+; RV32I-NEXT: and a0, a0, a1
+; RV32I-NEXT: slli a1, a0, 8
+; RV32I-NEXT: add a0, a0, a1
+; RV32I-NEXT: slli a1, a0, 16
+; RV32I-NEXT: add a0, a0, a1
+; RV32I-NEXT: srli a0, a0, 24
+; RV32I-NEXT: ret
+; RV32I-NEXT: .LBB10_2:
+; RV32I-NEXT: li a0, 32
+; RV32I-NEXT: ret
;
; RV64I-LABEL: test_ctlz_i32:
; RV64I: # %bb.0:
@@ -1223,6 +1239,46 @@ define i32 @test_ctlz_i32(i32 %a) nounwind {
; RV64I-NEXT: li a0, 32
; RV64I-NEXT: ret
;
+; RV32M-LABEL: test_ctlz_i32:
+; RV32M: # %bb.0:
+; RV32M-NEXT: beqz a0, .LBB10_2
+; RV32M-NEXT: # %bb.1: # %cond.false
+; RV32M-NEXT: srli a1, a0, 1
+; RV32M-NEXT: lui a2, 349525
+; RV32M-NEXT: or a0, a0, a1
+; RV32M-NEXT: addi a1, a2, 1365
+; RV32M-NEXT: srli a2, a0, 2
+; RV32M-NEXT: or a0, a0, a2
+; RV32M-NEXT: srli a2, a0, 4
+; RV32M-NEXT: or a0, a0, a2
+; RV32M-NEXT: srli a2, a0, 8
+; RV32M-NEXT: or a0, a0, a2
+; RV32M-NEXT: srli a2, a0, 16
+; RV32M-NEXT: or a0, a0, a2
+; RV32M-NEXT: not a0, a0
+; RV32M-NEXT: srli a2, a0, 1
+; RV32M-NEXT: and a1, a2, a1
+; RV32M-NEXT: lui a2, 209715
+; RV32M-NEXT: addi a2, a2, 819
+; RV32M-NEXT: sub a0, a0, a1
+; RV32M-NEXT: and a1, a0, a2
+; RV32M-NEXT: srli a0, a0, 2
+; RV32M-NEXT: and a0, a0, a2
+; RV32M-NEXT: lui a2, 61681
+; RV32M-NEXT: add a0, a1, a0
+; RV32M-NEXT: srli a1, a0, 4
+; RV32M-NEXT: add a0, a0, a1
+; RV32M-NEXT: lui a1, 4112
+; RV32M-NEXT: addi a2, a2, -241
+; RV32M-NEXT: and a0, a0, a2
+; RV32M-NEXT: addi a1, a1, 257
+; RV32M-NEXT: mul a0, a0, a1
+; RV32M-NEXT: srli a0, a0, 24
+; RV32M-NEXT: ret
+; RV32M-NEXT: .LBB10_2:
+; RV32M-NEXT: li a0, 32
+; RV32M-NEXT: ret
+;
; RV64M-LABEL: test_ctlz_i32:
; RV64M: # %bb.0:
; RV64M-NEXT: sext.w a1, a0
@@ -1290,75 +1346,240 @@ define i32 @test_ctlz_i32(i32 %a) nounwind {
}
define i64 @test_ctlz_i64(i64 %a) nounwind {
-; RV32_NOZBB-LABEL: test_ctlz_i64:
-; RV32_NOZBB: # %bb.0:
-; RV32_NOZBB-NEXT: addi sp, sp, -16
-; RV32_NOZBB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32_NOZBB-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32_NOZBB-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32_NOZBB-NEXT: mv s1, a1
-; RV32_NOZBB-NEXT: srli a1, a0, 1
-; RV32_NOZBB-NEXT: or a0, a0, a1
-; RV32_NOZBB-NEXT: srli a1, a0, 2
-; RV32_NOZBB-NEXT: or a0, a0, a1
-; RV32_NOZBB-NEXT: srli a1, a0, 4
-; RV32_NOZBB-NEXT: or a0, a0, a1
-; RV32_NOZBB-NEXT: srli a1, a0, 8
-; RV32_NOZBB-NEXT: or a0, a0, a1
-; RV32_NOZBB-NEXT: srli a1, a0, 16
-; RV32_NOZBB-NEXT: or a0, a0, a1
-; RV32_NOZBB-NEXT: not a0, a0
-; RV32_NOZBB-NEXT: call __popcountsi2
-; RV32_NOZBB-NEXT: mv s0, a0
-; RV32_NOZBB-NEXT: srli a0, s1, 1
-; RV32_NOZBB-NEXT: or a0, s1, a0
-; RV32_NOZBB-NEXT: srli a1, a0, 2
-; RV32_NOZBB-NEXT: or a0, a0, a1
-; RV32_NOZBB-NEXT: srli a1, a0, 4
-; RV32_NOZBB-NEXT: or a0, a0, a1
-; RV32_NOZBB-NEXT: srli a1, a0, 8
-; RV32_NOZBB-NEXT: or a0, a0, a1
-; RV32_NOZBB-NEXT: srli a1, a0, 16
-; RV32_NOZBB-NEXT: or a0, a0, a1
-; RV32_NOZBB-NEXT: not a0, a0
-; RV32_NOZBB-NEXT: call __popcountsi2
-; RV32_NOZBB-NEXT: bnez s1, .LBB11_2
-; RV32_NOZBB-NEXT: # %bb.1:
-; RV32_NOZBB-NEXT: addi a0, s0, 32
-; RV32_NOZBB-NEXT: .LBB11_2:
-; RV32_NOZBB-NEXT: li a1, 0
-; RV32_NOZBB-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32_NOZBB-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32_NOZBB-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32_NOZBB-NEXT: addi sp, sp, 16
-; RV32_NOZBB-NEXT: ret
+; RV32I-LABEL: test_ctlz_i64:
+; RV32I: # %bb.0:
+; RV32I-NEXT: lui a2, 349525
+; RV32I-NEXT: lui a3, 209715
+; RV32I-NEXT: lui a5, 61681
+; RV32I-NEXT: addi a4, a2, 1365
+; RV32I-NEXT: addi a3, a3, 819
+; RV32I-NEXT: addi a2, a5, -241
+; RV32I-NEXT: bnez a1, .LBB11_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: srli a1, a0, 1
+; RV32I-NEXT: or a0, a0, a1
+; RV32I-NEXT: srli a1, a0, 2
+; RV32I-NEXT: or a0, a0, a1
+; RV32I-NEXT: srli a1, a0, 4
+; RV32I-NEXT: or a0, a0, a1
+; RV32I-NEXT: srli a1, a0, 8
+; RV32I-NEXT: or a0, a0, a1
+; RV32I-NEXT: srli a1, a0, 16
+; RV32I-NEXT: or a0, a0, a1
+; RV32I-NEXT: not a0, a0
+; RV32I-NEXT: srli a1, a0, 1
+; RV32I-NEXT: and a1, a1, a4
+; RV32I-NEXT: sub a0, a0, a1
+; RV32I-NEXT: and a1, a0, a3
+; RV32I-NEXT: srli a0, a0, 2
+; RV32I-NEXT: and a0, a0, a3
+; RV32I-NEXT: add a0, a1, a0
+; RV32I-NEXT: srli a1, a0, 4
+; RV32I-NEXT: add a0, a0, a1
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: slli a1, a0, 8
+; RV32I-NEXT: add a0, a0, a1
+; RV32I-NEXT: slli a1, a0, 16
+; RV32I-NEXT: add a0, a0, a1
+; RV32I-NEXT: srli a0, a0, 24
+; RV32I-NEXT: addi a0, a0, 32
+; RV32I-NEXT: li a1, 0
+; RV32I-NEXT: ret
+; RV32I-NEXT: .LBB11_2:
+; RV32I-NEXT: srli a0, a1, 1
+; RV32I-NEXT: or a0, a1, a0
+; RV32I-NEXT: srli a1, a0, 2
+; RV32I-NEXT: or a0, a0, a1
+; RV32I-NEXT: srli a1, a0, 4
+; RV32I-NEXT: or a0, a0, a1
+; RV32I-NEXT: srli a1, a0, 8
+; RV32I-NEXT: or a0, a0, a1
+; RV32I-NEXT: srli a1, a0, 16
+; RV32I-NEXT: or a0, a0, a1
+; RV32I-NEXT: not a0, a0
+; RV32I-NEXT: srli a1, a0, 1
+; RV32I-NEXT: and a1, a1, a4
+; RV32I-NEXT: sub a0, a0, a1
+; RV32I-NEXT: and a1, a0, a3
+; RV32I-NEXT: srli a0, a0, 2
+; RV32I-NEXT: and a0, a0, a3
+; RV32I-NEXT: add a0, a1, a0
+; RV32I-NEXT: srli a1, a0, 4
+; RV32I-NEXT: add a0, a0, a1
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: slli a1, a0, 8
+; RV32I-NEXT: add a0, a0, a1
+; RV32I-NEXT: slli a1, a0, 16
+; RV32I-NEXT: add a0, a0, a1
+; RV32I-NEXT: srli a0, a0, 24
+; RV32I-NEXT: li a1, 0
+; RV32I-NEXT: ret
;
-; RV64NOZBB-LABEL: test_ctlz_i64:
-; RV64NOZBB: # %bb.0:
-; RV64NOZBB-NEXT: beqz a0, .LBB11_2
-; RV64NOZBB-NEXT: # %bb.1: # %cond.false
-; RV64NOZBB-NEXT: addi sp, sp, -16
-; RV64NOZBB-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64NOZBB-NEXT: srli a1, a0, 1
-; RV64NOZBB-NEXT: or a0, a0, a1
-; RV64NOZBB-NEXT: srli a1, a0, 2
-; RV64NOZBB-NEXT: or a0, a0, a1
-; RV64NOZBB-NEXT: srli a1, a0, 4
-; RV64NOZBB-NEXT: or a0, a0, a1
-; RV64NOZBB-NEXT: srli a1, a0, 8
-; RV64NOZBB-NEXT: or a0, a0, a1
-; RV64NOZBB-NEXT: srli a1, a0, 16
-; RV64NOZBB-NEXT: or a0, a0, a1
-; RV64NOZBB-NEXT: srli a1, a0, 32
-; RV64NOZBB-NEXT: or a0, a0, a1
-; RV64NOZBB-NEXT: not a0, a0
-; RV64NOZBB-NEXT: call __popcountdi2
-; RV64NOZBB-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64NOZBB-NEXT: addi sp, sp, 16
-; RV64NOZBB-NEXT: ret
-; RV64NOZBB-NEXT: .LBB11_2:
-; RV64NOZBB-NEXT: li a0, 64
-; RV64NOZBB-NEXT: ret
+; RV64I-LABEL: test_ctlz_i64:
+; RV64I: # %bb.0:
+; RV64I-NEXT: beqz a0, .LBB11_2
+; RV64I-NEXT: # %bb.1: # %cond.false
+; RV64I-NEXT: srli a1, a0, 1
+; RV64I-NEXT: lui a2, 349525
+; RV64I-NEXT: lui a3, 209715
+; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: addiw a1, a2, 1365
+; RV64I-NEXT: addiw a2, a3, 819
+; RV64I-NEXT: srli a3, a0, 2
+; RV64I-NEXT: or a0, a0, a3
+; RV64I-NEXT: slli a3, a1, 32
+; RV64I-NEXT: add a1, a1, a3
+; RV64I-NEXT: slli a3, a2, 32
+; RV64I-NEXT: add a2, a2, a3
+; RV64I-NEXT: srli a3, a0, 4
+; RV64I-NEXT: or a0, a0, a3
+; RV64I-NEXT: srli a3, a0, 8
+; RV64I-NEXT: or a0, a0, a3
+; RV64I-NEXT: srli a3, a0, 16
+; RV64I-NEXT: or a0, a0, a3
+; RV64I-NEXT: srli a3, a0, 32
+; RV64I-NEXT: or a0, a0, a3
+; RV64I-NEXT: not a0, a0
+; RV64I-NEXT: srli a3, a0, 1
+; RV64I-NEXT: and a1, a3, a1
+; RV64I-NEXT: lui a3, 61681
+; RV64I-NEXT: addiw a3, a3, -241
+; RV64I-NEXT: sub a0, a0, a1
+; RV64I-NEXT: and a1, a0, a2
+; RV64I-NEXT: srli a0, a0, 2
+; RV64I-NEXT: and a0, a0, a2
+; RV64I-NEXT: slli a2, a3, 32
+; RV64I-NEXT: add a0, a1, a0
+; RV64I-NEXT: srli a1, a0, 4
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: add a2, a3, a2
+; RV64I-NEXT: and a0, a0, a2
+; RV64I-NEXT: slli a1, a0, 8
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: slli a1, a0, 16
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: slli a1, a0, 32
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: srli a0, a0, 56
+; RV64I-NEXT: ret
+; RV64I-NEXT: .LBB11_2:
+; RV64I-NEXT: li a0, 64
+; RV64I-NEXT: ret
+;
+; RV32M-LABEL: test_ctlz_i64:
+; RV32M: # %bb.0:
+; RV32M-NEXT: lui a2, 349525
+; RV32M-NEXT: lui a3, 209715
+; RV32M-NEXT: lui a6, 61681
+; RV32M-NEXT: lui a7, 4112
+; RV32M-NEXT: addi a5, a2, 1365
+; RV32M-NEXT: addi a4, a3, 819
+; RV32M-NEXT: addi a3, a6, -241
+; RV32M-NEXT: addi a2, a7, 257
+; RV32M-NEXT: bnez a1, .LBB11_2
+; RV32M-NEXT: # %bb.1:
+; RV32M-NEXT: srli a1, a0, 1
+; RV32M-NEXT: or a0, a0, a1
+; RV32M-NEXT: srli a1, a0, 2
+; RV32M-NEXT: or a0, a0, a1
+; RV32M-NEXT: srli a1, a0, 4
+; RV32M-NEXT: or a0, a0, a1
+; RV32M-NEXT: srli a1, a0, 8
+; RV32M-NEXT: or a0, a0, a1
+; RV32M-NEXT: srli a1, a0, 16
+; RV32M-NEXT: or a0, a0, a1
+; RV32M-NEXT: not a0, a0
+; RV32M-NEXT: srli a1, a0, 1
+; RV32M-NEXT: and a1, a1, a5
+; RV32M-NEXT: sub a0, a0, a1
+; RV32M-NEXT: and a1, a0, a4
+; RV32M-NEXT: srli a0, a0, 2
+; RV32M-NEXT: and a0, a0, a4
+; RV32M-NEXT: add a0, a1, a0
+; RV32M-NEXT: srli a1, a0, 4
+; RV32M-NEXT: add a0, a0, a1
+; RV32M-NEXT: and a0, a0, a3
+; RV32M-NEXT: mul a0, a0, a2
+; RV32M-NEXT: srli a0, a0, 24
+; RV32M-NEXT: addi a0, a0, 32
+; RV32M-NEXT: li a1, 0
+; RV32M-NEXT: ret
+; RV32M-NEXT: .LBB11_2:
+; RV32M-NEXT: srli a0, a1, 1
+; RV32M-NEXT: or a0, a1, a0
+; RV32M-NEXT: srli a1, a0, 2
+; RV32M-NEXT: or a0, a0, a1
+; RV32M-NEXT: srli a1, a0, 4
+; RV32M-NEXT: or a0, a0, a1
+; RV32M-NEXT: srli a1, a0, 8
+; RV32M-NEXT: or a0, a0, a1
+; RV32M-NEXT: srli a1, a0, 16
+; RV32M-NEXT: or a0, a0, a1
+; RV32M-NEXT: not a0, a0
+; RV32M-NEXT: srli a1, a0, 1
+; RV32M-NEXT: and a1, a1, a5
+; RV32M-NEXT: sub a0, a0, a1
+; RV32M-NEXT: and a1, a0, a4
+; RV32M-NEXT: srli a0, a0, 2
+; RV32M-NEXT: and a0, a0, a4
+; RV32M-NEXT: add a0, a1, a0
+; RV32M-NEXT: srli a1, a0, 4
+; RV32M-NEXT: add a0, a0, a1
+; RV32M-NEXT: and a0, a0, a3
+; RV32M-NEXT: mul a0, a0, a2
+; RV32M-NEXT: srli a0, a0, 24
+; RV32M-NEXT: li a1, 0
+; RV32M-NEXT: ret
+;
+; RV64M-LABEL: test_ctlz_i64:
+; RV64M: # %bb.0:
+; RV64M-NEXT: beqz a0, .LBB11_2
+; RV64M-NEXT: # %bb.1: # %cond.false
+; RV64M-NEXT: srli a1, a0, 1
+; RV64M-NEXT: lui a2, 349525
+; RV64M-NEXT: lui a3, 209715
+; RV64M-NEXT: lui a4, 61681
+; RV64M-NEXT: or a0, a0, a1
+; RV64M-NEXT: addiw a1, a2, 1365
+; RV64M-NEXT: addiw a2, a3, 819
+; RV64M-NEXT: addiw a3, a4, -241
+; RV64M-NEXT: srli a4, a0, 2
+; RV64M-NEXT: or a0, a0, a4
+; RV64M-NEXT: slli a4, a1, 32
+; RV64M-NEXT: add a1, a1, a4
+; RV64M-NEXT: slli a4, a2, 32
+; RV64M-NEXT: add a2, a2, a4
+; RV64M-NEXT: slli a4, a3, 32
+; RV64M-NEXT: add a3, a3, a4
+; RV64M-NEXT: srli a4, a0, 4
+; RV64M-NEXT: or a0, a0, a4
+; RV64M-NEXT: srli a4, a0, 8
+; RV64M-NEXT: or a0, a0, a4
+; RV64M-NEXT: srli a4, a0, 16
+; RV64M-NEXT: or a0, a0, a4
+; RV64M-NEXT: srli a4, a0, 32
+; RV64M-NEXT: or a0, a0, a4
+; RV64M-NEXT: not a0, a0
+; RV64M-NEXT: srli a4, a0, 1
+; RV64M-NEXT: and a1, a4, a1
+; RV64M-NEXT: sub a0, a0, a1
+; RV64M-NEXT: and a1, a0, a2
+; RV64M-NEXT: srli a0, a0, 2
+; RV64M-NEXT: and a0, a0, a2
+; RV64M-NEXT: lui a2, 4112
+; RV64M-NEXT: addiw a2, a2, 257
+; RV64M-NEXT: add a0, a1, a0
+; RV64M-NEXT: srli a1, a0, 4
+; RV64M-NEXT: add a0, a0, a1
+; RV64M-NEXT: slli a1, a2, 32
+; RV64M-NEXT: and a0, a0, a3
+; RV64M-NEXT: add a1, a2, a1
+; RV64M-NEXT: mul a0, a0, a1
+; RV64M-NEXT: srli a0, a0, 56
+; RV64M-NEXT: ret
+; RV64M-NEXT: .LBB11_2:
+; RV64M-NEXT: li a0, 64
+; RV64M-NEXT: ret
;
; RV32ZBB-LABEL: test_ctlz_i64:
; RV32ZBB: # %bb.0:
@@ -1572,20 +1793,41 @@ define i16 @test_ctlz_i16_zero_undef(i16 %a) nounwind {
}
define i32 @test_ctlz_i32_zero_undef(i32 %a) nounwind {
-; RV32_NOZBB-LABEL: test_ctlz_i32_zero_undef:
-; RV32_NOZBB: # %bb.0:
-; RV32_NOZBB-NEXT: srli a1, a0, 1
-; RV32_NOZBB-NEXT: or a0, a0, a1
-; RV32_NOZBB-NEXT: srli a1, a0, 2
-; RV32_NOZBB-NEXT: or a0, a0, a1
-; RV32_NOZBB-NEXT: srli a1, a0, 4
-; RV32_NOZBB-NEXT: or a0, a0, a1
-; RV32_NOZBB-NEXT: srli a1, a0, 8
-; RV32_NOZBB-NEXT: or a0, a0, a1
-; RV32_NOZBB-NEXT: srli a1, a0, 16
-; RV32_NOZBB-NEXT: or a0, a0, a1
-; RV32_NOZBB-NEXT: not a0, a0
-; RV32_NOZBB-NEXT: tail __popcountsi2
+; RV32I-LABEL: test_ctlz_i32_zero_undef:
+; RV32I: # %bb.0:
+; RV32I-NEXT: srli a1, a0, 1
+; RV32I-NEXT: lui a2, 349525
+; RV32I-NEXT: or a0, a0, a1
+; RV32I-NEXT: addi a1, a2, 1365
+; RV32I-NEXT: srli a2, a0, 2
+; RV32I-NEXT: or a0, a0, a2
+; RV32I-NEXT: srli a2, a0, 4
+; RV32I-NEXT: or a0, a0, a2
+; RV32I-NEXT: srli a2, a0, 8
+; RV32I-NEXT: or a0, a0, a2
+; RV32I-NEX...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/137120
More information about the llvm-commits
mailing list