[llvm] [ARM][RISCV] Partially revert #101786 (PR #137120)

via llvm-commits llvm-commits at lists.llvm.org
Wed Apr 23 23:31:40 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-arm

Author: Sergei Barannikov (s-barannikov)

<details>
<summary>Changes</summary>

The change as is breaks the Linux kernel build as pointed out in the comments.

---

Patch is 83.81 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/137120.diff


13 Files Affected:

- (modified) llvm/lib/Target/ARM/ARMISelLowering.cpp (+4-2) 
- (modified) llvm/lib/Target/RISCV/RISCVISelLowering.cpp (+5-3) 
- (modified) llvm/test/CodeGen/ARM/popcnt.ll (+60-6) 
- (modified) llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll (+886-201) 
- (modified) llvm/test/CodeGen/RISCV/ctz_zero_return_test.ll (+84-34) 
- (modified) llvm/test/CodeGen/RISCV/pr56457.ll (+35-14) 
- (modified) llvm/test/CodeGen/RISCV/pr95271.ll (+22-1) 
- (modified) llvm/test/CodeGen/RISCV/rv32xtheadbb.ll (+73-31) 
- (modified) llvm/test/CodeGen/RISCV/rv32zbb.ll (+245-85) 
- (modified) llvm/test/CodeGen/RISCV/rv64xtheadbb.ll (+39-15) 
- (modified) llvm/test/CodeGen/RISCV/rv64zbb.ll (+114-35) 
- (modified) llvm/test/CodeGen/RISCV/sextw-removal.ll (+40-6) 
- (modified) llvm/test/CodeGen/Thumb2/mve-ctpop.ll (+49-13) 


``````````diff
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index bdebd842b011c..03364d9025208 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -1221,8 +1221,10 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
     setOperationAction(ISD::ROTR, VT, Expand);
   }
   setOperationAction(ISD::CTTZ,  MVT::i32, Custom);
-  setOperationAction(ISD::CTPOP, MVT::i32, LibCall);
-  setOperationAction(ISD::CTPOP, MVT::i64, LibCall);
+  // TODO: These two should be set to LibCall, but this currently breaks
+  //   the Linux kernel build. See #101786.
+  setOperationAction(ISD::CTPOP, MVT::i32, Expand);
+  setOperationAction(ISD::CTPOP, MVT::i64, Expand);
   if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only()) {
     setOperationAction(ISD::CTLZ, MVT::i32, Expand);
     setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, LibCall);
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 099ba5c9943ac..02451ee716865 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -396,11 +396,13 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
       setOperationAction({ISD::CTTZ, ISD::CTTZ_ZERO_UNDEF}, MVT::i32, Custom);
   } else {
     setOperationAction(ISD::CTTZ, XLenVT, Expand);
+    // TODO: These should be set to LibCall, but this currently breaks
+    //   the Linux kernel build. See #101786. Lacks i128 tests, too.
     if (Subtarget.is64Bit())
-      setOperationAction(ISD::CTPOP, MVT::i128, LibCall);
+      setOperationAction(ISD::CTPOP, MVT::i128, Expand);
     else
-      setOperationAction(ISD::CTPOP, MVT::i32, LibCall);
-    setOperationAction(ISD::CTPOP, MVT::i64, LibCall);
+      setOperationAction(ISD::CTPOP, MVT::i32, Expand);
+    setOperationAction(ISD::CTPOP, MVT::i64, Expand);
   }
 
   if (Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() ||
diff --git a/llvm/test/CodeGen/ARM/popcnt.ll b/llvm/test/CodeGen/ARM/popcnt.ll
index fc4387320ef77..a70fdc580ca9b 100644
--- a/llvm/test/CodeGen/ARM/popcnt.ll
+++ b/llvm/test/CodeGen/ARM/popcnt.ll
@@ -324,7 +324,30 @@ define i32 @ctpop16(i16 %x) nounwind readnone {
 define i32 @ctpop32(i32 %x) nounwind readnone {
 ; CHECK-LABEL: ctpop32:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    b __popcountsi2
+; CHECK-NEXT:    ldr r1, .LCPI22_0
+; CHECK-NEXT:    ldr r2, .LCPI22_3
+; CHECK-NEXT:    and r1, r1, r0, lsr #1
+; CHECK-NEXT:    ldr r12, .LCPI22_1
+; CHECK-NEXT:    sub r0, r0, r1
+; CHECK-NEXT:    ldr r3, .LCPI22_2
+; CHECK-NEXT:    and r1, r0, r2
+; CHECK-NEXT:    and r0, r2, r0, lsr #2
+; CHECK-NEXT:    add r0, r1, r0
+; CHECK-NEXT:    add r0, r0, r0, lsr #4
+; CHECK-NEXT:    and r0, r0, r12
+; CHECK-NEXT:    mul r1, r0, r3
+; CHECK-NEXT:    lsr r0, r1, #24
+; CHECK-NEXT:    mov pc, lr
+; CHECK-NEXT:    .p2align 2
+; CHECK-NEXT:  @ %bb.1:
+; CHECK-NEXT:  .LCPI22_0:
+; CHECK-NEXT:    .long 1431655765 @ 0x55555555
+; CHECK-NEXT:  .LCPI22_1:
+; CHECK-NEXT:    .long 252645135 @ 0xf0f0f0f
+; CHECK-NEXT:  .LCPI22_2:
+; CHECK-NEXT:    .long 16843009 @ 0x1010101
+; CHECK-NEXT:  .LCPI22_3:
+; CHECK-NEXT:    .long 858993459 @ 0x33333333
   %count = tail call i32 @llvm.ctpop.i32(i32 %x)
   ret i32 %count
 }
@@ -332,12 +355,43 @@ define i32 @ctpop32(i32 %x) nounwind readnone {
 define i64 @ctpop64(i64 %x) nounwind readnone {
 ; CHECK-LABEL: ctpop64:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    .save {r11, lr}
-; CHECK-NEXT:    push {r11, lr}
-; CHECK-NEXT:    bl __popcountdi2
-; CHECK-NEXT:    asr r1, r0, #31
-; CHECK-NEXT:    pop {r11, lr}
+; CHECK-NEXT:    .save {r4, lr}
+; CHECK-NEXT:    push {r4, lr}
+; CHECK-NEXT:    ldr r2, .LCPI23_0
+; CHECK-NEXT:    ldr r3, .LCPI23_3
+; CHECK-NEXT:    and r4, r2, r0, lsr #1
+; CHECK-NEXT:    and r2, r2, r1, lsr #1
+; CHECK-NEXT:    sub r0, r0, r4
+; CHECK-NEXT:    sub r1, r1, r2
+; CHECK-NEXT:    and r4, r0, r3
+; CHECK-NEXT:    and r2, r1, r3
+; CHECK-NEXT:    and r0, r3, r0, lsr #2
+; CHECK-NEXT:    and r1, r3, r1, lsr #2
+; CHECK-NEXT:    add r0, r4, r0
+; CHECK-NEXT:    ldr lr, .LCPI23_1
+; CHECK-NEXT:    add r1, r2, r1
+; CHECK-NEXT:    ldr r12, .LCPI23_2
+; CHECK-NEXT:    add r0, r0, r0, lsr #4
+; CHECK-NEXT:    and r0, r0, lr
+; CHECK-NEXT:    add r1, r1, r1, lsr #4
+; CHECK-NEXT:    mul r2, r0, r12
+; CHECK-NEXT:    and r0, r1, lr
+; CHECK-NEXT:    mul r1, r0, r12
+; CHECK-NEXT:    lsr r0, r2, #24
+; CHECK-NEXT:    add r0, r0, r1, lsr #24
+; CHECK-NEXT:    mov r1, #0
+; CHECK-NEXT:    pop {r4, lr}
 ; CHECK-NEXT:    mov pc, lr
+; CHECK-NEXT:    .p2align 2
+; CHECK-NEXT:  @ %bb.1:
+; CHECK-NEXT:  .LCPI23_0:
+; CHECK-NEXT:    .long 1431655765 @ 0x55555555
+; CHECK-NEXT:  .LCPI23_1:
+; CHECK-NEXT:    .long 252645135 @ 0xf0f0f0f
+; CHECK-NEXT:  .LCPI23_2:
+; CHECK-NEXT:    .long 16843009 @ 0x1010101
+; CHECK-NEXT:  .LCPI23_3:
+; CHECK-NEXT:    .long 858993459 @ 0x33333333
   %count = tail call i64 @llvm.ctpop.i64(i64 %x)
   ret i64 %count
 }
diff --git a/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll b/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll
index f8c3a75f844db..a46168f114bb9 100644
--- a/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll
+++ b/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll
@@ -1156,30 +1156,46 @@ define i16 @test_ctlz_i16(i16 %a) nounwind {
 }
 
 define i32 @test_ctlz_i32(i32 %a) nounwind {
-; RV32_NOZBB-LABEL: test_ctlz_i32:
-; RV32_NOZBB:       # %bb.0:
-; RV32_NOZBB-NEXT:    beqz a0, .LBB10_2
-; RV32_NOZBB-NEXT:  # %bb.1: # %cond.false
-; RV32_NOZBB-NEXT:    addi sp, sp, -16
-; RV32_NOZBB-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
-; RV32_NOZBB-NEXT:    srli a1, a0, 1
-; RV32_NOZBB-NEXT:    or a0, a0, a1
-; RV32_NOZBB-NEXT:    srli a1, a0, 2
-; RV32_NOZBB-NEXT:    or a0, a0, a1
-; RV32_NOZBB-NEXT:    srli a1, a0, 4
-; RV32_NOZBB-NEXT:    or a0, a0, a1
-; RV32_NOZBB-NEXT:    srli a1, a0, 8
-; RV32_NOZBB-NEXT:    or a0, a0, a1
-; RV32_NOZBB-NEXT:    srli a1, a0, 16
-; RV32_NOZBB-NEXT:    or a0, a0, a1
-; RV32_NOZBB-NEXT:    not a0, a0
-; RV32_NOZBB-NEXT:    call __popcountsi2
-; RV32_NOZBB-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
-; RV32_NOZBB-NEXT:    addi sp, sp, 16
-; RV32_NOZBB-NEXT:    ret
-; RV32_NOZBB-NEXT:  .LBB10_2:
-; RV32_NOZBB-NEXT:    li a0, 32
-; RV32_NOZBB-NEXT:    ret
+; RV32I-LABEL: test_ctlz_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    beqz a0, .LBB10_2
+; RV32I-NEXT:  # %bb.1: # %cond.false
+; RV32I-NEXT:    srli a1, a0, 1
+; RV32I-NEXT:    lui a2, 349525
+; RV32I-NEXT:    or a0, a0, a1
+; RV32I-NEXT:    addi a1, a2, 1365
+; RV32I-NEXT:    srli a2, a0, 2
+; RV32I-NEXT:    or a0, a0, a2
+; RV32I-NEXT:    srli a2, a0, 4
+; RV32I-NEXT:    or a0, a0, a2
+; RV32I-NEXT:    srli a2, a0, 8
+; RV32I-NEXT:    or a0, a0, a2
+; RV32I-NEXT:    srli a2, a0, 16
+; RV32I-NEXT:    or a0, a0, a2
+; RV32I-NEXT:    not a0, a0
+; RV32I-NEXT:    srli a2, a0, 1
+; RV32I-NEXT:    and a1, a2, a1
+; RV32I-NEXT:    lui a2, 209715
+; RV32I-NEXT:    addi a2, a2, 819
+; RV32I-NEXT:    sub a0, a0, a1
+; RV32I-NEXT:    and a1, a0, a2
+; RV32I-NEXT:    srli a0, a0, 2
+; RV32I-NEXT:    and a0, a0, a2
+; RV32I-NEXT:    lui a2, 61681
+; RV32I-NEXT:    add a0, a1, a0
+; RV32I-NEXT:    srli a1, a0, 4
+; RV32I-NEXT:    add a0, a0, a1
+; RV32I-NEXT:    addi a1, a2, -241
+; RV32I-NEXT:    and a0, a0, a1
+; RV32I-NEXT:    slli a1, a0, 8
+; RV32I-NEXT:    add a0, a0, a1
+; RV32I-NEXT:    slli a1, a0, 16
+; RV32I-NEXT:    add a0, a0, a1
+; RV32I-NEXT:    srli a0, a0, 24
+; RV32I-NEXT:    ret
+; RV32I-NEXT:  .LBB10_2:
+; RV32I-NEXT:    li a0, 32
+; RV32I-NEXT:    ret
 ;
 ; RV64I-LABEL: test_ctlz_i32:
 ; RV64I:       # %bb.0:
@@ -1223,6 +1239,46 @@ define i32 @test_ctlz_i32(i32 %a) nounwind {
 ; RV64I-NEXT:    li a0, 32
 ; RV64I-NEXT:    ret
 ;
+; RV32M-LABEL: test_ctlz_i32:
+; RV32M:       # %bb.0:
+; RV32M-NEXT:    beqz a0, .LBB10_2
+; RV32M-NEXT:  # %bb.1: # %cond.false
+; RV32M-NEXT:    srli a1, a0, 1
+; RV32M-NEXT:    lui a2, 349525
+; RV32M-NEXT:    or a0, a0, a1
+; RV32M-NEXT:    addi a1, a2, 1365
+; RV32M-NEXT:    srli a2, a0, 2
+; RV32M-NEXT:    or a0, a0, a2
+; RV32M-NEXT:    srli a2, a0, 4
+; RV32M-NEXT:    or a0, a0, a2
+; RV32M-NEXT:    srli a2, a0, 8
+; RV32M-NEXT:    or a0, a0, a2
+; RV32M-NEXT:    srli a2, a0, 16
+; RV32M-NEXT:    or a0, a0, a2
+; RV32M-NEXT:    not a0, a0
+; RV32M-NEXT:    srli a2, a0, 1
+; RV32M-NEXT:    and a1, a2, a1
+; RV32M-NEXT:    lui a2, 209715
+; RV32M-NEXT:    addi a2, a2, 819
+; RV32M-NEXT:    sub a0, a0, a1
+; RV32M-NEXT:    and a1, a0, a2
+; RV32M-NEXT:    srli a0, a0, 2
+; RV32M-NEXT:    and a0, a0, a2
+; RV32M-NEXT:    lui a2, 61681
+; RV32M-NEXT:    add a0, a1, a0
+; RV32M-NEXT:    srli a1, a0, 4
+; RV32M-NEXT:    add a0, a0, a1
+; RV32M-NEXT:    lui a1, 4112
+; RV32M-NEXT:    addi a2, a2, -241
+; RV32M-NEXT:    and a0, a0, a2
+; RV32M-NEXT:    addi a1, a1, 257
+; RV32M-NEXT:    mul a0, a0, a1
+; RV32M-NEXT:    srli a0, a0, 24
+; RV32M-NEXT:    ret
+; RV32M-NEXT:  .LBB10_2:
+; RV32M-NEXT:    li a0, 32
+; RV32M-NEXT:    ret
+;
 ; RV64M-LABEL: test_ctlz_i32:
 ; RV64M:       # %bb.0:
 ; RV64M-NEXT:    sext.w a1, a0
@@ -1290,75 +1346,240 @@ define i32 @test_ctlz_i32(i32 %a) nounwind {
 }
 
 define i64 @test_ctlz_i64(i64 %a) nounwind {
-; RV32_NOZBB-LABEL: test_ctlz_i64:
-; RV32_NOZBB:       # %bb.0:
-; RV32_NOZBB-NEXT:    addi sp, sp, -16
-; RV32_NOZBB-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
-; RV32_NOZBB-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
-; RV32_NOZBB-NEXT:    sw s1, 4(sp) # 4-byte Folded Spill
-; RV32_NOZBB-NEXT:    mv s1, a1
-; RV32_NOZBB-NEXT:    srli a1, a0, 1
-; RV32_NOZBB-NEXT:    or a0, a0, a1
-; RV32_NOZBB-NEXT:    srli a1, a0, 2
-; RV32_NOZBB-NEXT:    or a0, a0, a1
-; RV32_NOZBB-NEXT:    srli a1, a0, 4
-; RV32_NOZBB-NEXT:    or a0, a0, a1
-; RV32_NOZBB-NEXT:    srli a1, a0, 8
-; RV32_NOZBB-NEXT:    or a0, a0, a1
-; RV32_NOZBB-NEXT:    srli a1, a0, 16
-; RV32_NOZBB-NEXT:    or a0, a0, a1
-; RV32_NOZBB-NEXT:    not a0, a0
-; RV32_NOZBB-NEXT:    call __popcountsi2
-; RV32_NOZBB-NEXT:    mv s0, a0
-; RV32_NOZBB-NEXT:    srli a0, s1, 1
-; RV32_NOZBB-NEXT:    or a0, s1, a0
-; RV32_NOZBB-NEXT:    srli a1, a0, 2
-; RV32_NOZBB-NEXT:    or a0, a0, a1
-; RV32_NOZBB-NEXT:    srli a1, a0, 4
-; RV32_NOZBB-NEXT:    or a0, a0, a1
-; RV32_NOZBB-NEXT:    srli a1, a0, 8
-; RV32_NOZBB-NEXT:    or a0, a0, a1
-; RV32_NOZBB-NEXT:    srli a1, a0, 16
-; RV32_NOZBB-NEXT:    or a0, a0, a1
-; RV32_NOZBB-NEXT:    not a0, a0
-; RV32_NOZBB-NEXT:    call __popcountsi2
-; RV32_NOZBB-NEXT:    bnez s1, .LBB11_2
-; RV32_NOZBB-NEXT:  # %bb.1:
-; RV32_NOZBB-NEXT:    addi a0, s0, 32
-; RV32_NOZBB-NEXT:  .LBB11_2:
-; RV32_NOZBB-NEXT:    li a1, 0
-; RV32_NOZBB-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
-; RV32_NOZBB-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
-; RV32_NOZBB-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
-; RV32_NOZBB-NEXT:    addi sp, sp, 16
-; RV32_NOZBB-NEXT:    ret
+; RV32I-LABEL: test_ctlz_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    lui a2, 349525
+; RV32I-NEXT:    lui a3, 209715
+; RV32I-NEXT:    lui a5, 61681
+; RV32I-NEXT:    addi a4, a2, 1365
+; RV32I-NEXT:    addi a3, a3, 819
+; RV32I-NEXT:    addi a2, a5, -241
+; RV32I-NEXT:    bnez a1, .LBB11_2
+; RV32I-NEXT:  # %bb.1:
+; RV32I-NEXT:    srli a1, a0, 1
+; RV32I-NEXT:    or a0, a0, a1
+; RV32I-NEXT:    srli a1, a0, 2
+; RV32I-NEXT:    or a0, a0, a1
+; RV32I-NEXT:    srli a1, a0, 4
+; RV32I-NEXT:    or a0, a0, a1
+; RV32I-NEXT:    srli a1, a0, 8
+; RV32I-NEXT:    or a0, a0, a1
+; RV32I-NEXT:    srli a1, a0, 16
+; RV32I-NEXT:    or a0, a0, a1
+; RV32I-NEXT:    not a0, a0
+; RV32I-NEXT:    srli a1, a0, 1
+; RV32I-NEXT:    and a1, a1, a4
+; RV32I-NEXT:    sub a0, a0, a1
+; RV32I-NEXT:    and a1, a0, a3
+; RV32I-NEXT:    srli a0, a0, 2
+; RV32I-NEXT:    and a0, a0, a3
+; RV32I-NEXT:    add a0, a1, a0
+; RV32I-NEXT:    srli a1, a0, 4
+; RV32I-NEXT:    add a0, a0, a1
+; RV32I-NEXT:    and a0, a0, a2
+; RV32I-NEXT:    slli a1, a0, 8
+; RV32I-NEXT:    add a0, a0, a1
+; RV32I-NEXT:    slli a1, a0, 16
+; RV32I-NEXT:    add a0, a0, a1
+; RV32I-NEXT:    srli a0, a0, 24
+; RV32I-NEXT:    addi a0, a0, 32
+; RV32I-NEXT:    li a1, 0
+; RV32I-NEXT:    ret
+; RV32I-NEXT:  .LBB11_2:
+; RV32I-NEXT:    srli a0, a1, 1
+; RV32I-NEXT:    or a0, a1, a0
+; RV32I-NEXT:    srli a1, a0, 2
+; RV32I-NEXT:    or a0, a0, a1
+; RV32I-NEXT:    srli a1, a0, 4
+; RV32I-NEXT:    or a0, a0, a1
+; RV32I-NEXT:    srli a1, a0, 8
+; RV32I-NEXT:    or a0, a0, a1
+; RV32I-NEXT:    srli a1, a0, 16
+; RV32I-NEXT:    or a0, a0, a1
+; RV32I-NEXT:    not a0, a0
+; RV32I-NEXT:    srli a1, a0, 1
+; RV32I-NEXT:    and a1, a1, a4
+; RV32I-NEXT:    sub a0, a0, a1
+; RV32I-NEXT:    and a1, a0, a3
+; RV32I-NEXT:    srli a0, a0, 2
+; RV32I-NEXT:    and a0, a0, a3
+; RV32I-NEXT:    add a0, a1, a0
+; RV32I-NEXT:    srli a1, a0, 4
+; RV32I-NEXT:    add a0, a0, a1
+; RV32I-NEXT:    and a0, a0, a2
+; RV32I-NEXT:    slli a1, a0, 8
+; RV32I-NEXT:    add a0, a0, a1
+; RV32I-NEXT:    slli a1, a0, 16
+; RV32I-NEXT:    add a0, a0, a1
+; RV32I-NEXT:    srli a0, a0, 24
+; RV32I-NEXT:    li a1, 0
+; RV32I-NEXT:    ret
 ;
-; RV64NOZBB-LABEL: test_ctlz_i64:
-; RV64NOZBB:       # %bb.0:
-; RV64NOZBB-NEXT:    beqz a0, .LBB11_2
-; RV64NOZBB-NEXT:  # %bb.1: # %cond.false
-; RV64NOZBB-NEXT:    addi sp, sp, -16
-; RV64NOZBB-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
-; RV64NOZBB-NEXT:    srli a1, a0, 1
-; RV64NOZBB-NEXT:    or a0, a0, a1
-; RV64NOZBB-NEXT:    srli a1, a0, 2
-; RV64NOZBB-NEXT:    or a0, a0, a1
-; RV64NOZBB-NEXT:    srli a1, a0, 4
-; RV64NOZBB-NEXT:    or a0, a0, a1
-; RV64NOZBB-NEXT:    srli a1, a0, 8
-; RV64NOZBB-NEXT:    or a0, a0, a1
-; RV64NOZBB-NEXT:    srli a1, a0, 16
-; RV64NOZBB-NEXT:    or a0, a0, a1
-; RV64NOZBB-NEXT:    srli a1, a0, 32
-; RV64NOZBB-NEXT:    or a0, a0, a1
-; RV64NOZBB-NEXT:    not a0, a0
-; RV64NOZBB-NEXT:    call __popcountdi2
-; RV64NOZBB-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
-; RV64NOZBB-NEXT:    addi sp, sp, 16
-; RV64NOZBB-NEXT:    ret
-; RV64NOZBB-NEXT:  .LBB11_2:
-; RV64NOZBB-NEXT:    li a0, 64
-; RV64NOZBB-NEXT:    ret
+; RV64I-LABEL: test_ctlz_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    beqz a0, .LBB11_2
+; RV64I-NEXT:  # %bb.1: # %cond.false
+; RV64I-NEXT:    srli a1, a0, 1
+; RV64I-NEXT:    lui a2, 349525
+; RV64I-NEXT:    lui a3, 209715
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    addiw a1, a2, 1365
+; RV64I-NEXT:    addiw a2, a3, 819
+; RV64I-NEXT:    srli a3, a0, 2
+; RV64I-NEXT:    or a0, a0, a3
+; RV64I-NEXT:    slli a3, a1, 32
+; RV64I-NEXT:    add a1, a1, a3
+; RV64I-NEXT:    slli a3, a2, 32
+; RV64I-NEXT:    add a2, a2, a3
+; RV64I-NEXT:    srli a3, a0, 4
+; RV64I-NEXT:    or a0, a0, a3
+; RV64I-NEXT:    srli a3, a0, 8
+; RV64I-NEXT:    or a0, a0, a3
+; RV64I-NEXT:    srli a3, a0, 16
+; RV64I-NEXT:    or a0, a0, a3
+; RV64I-NEXT:    srli a3, a0, 32
+; RV64I-NEXT:    or a0, a0, a3
+; RV64I-NEXT:    not a0, a0
+; RV64I-NEXT:    srli a3, a0, 1
+; RV64I-NEXT:    and a1, a3, a1
+; RV64I-NEXT:    lui a3, 61681
+; RV64I-NEXT:    addiw a3, a3, -241
+; RV64I-NEXT:    sub a0, a0, a1
+; RV64I-NEXT:    and a1, a0, a2
+; RV64I-NEXT:    srli a0, a0, 2
+; RV64I-NEXT:    and a0, a0, a2
+; RV64I-NEXT:    slli a2, a3, 32
+; RV64I-NEXT:    add a0, a1, a0
+; RV64I-NEXT:    srli a1, a0, 4
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    add a2, a3, a2
+; RV64I-NEXT:    and a0, a0, a2
+; RV64I-NEXT:    slli a1, a0, 8
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    slli a1, a0, 16
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    slli a1, a0, 32
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    srli a0, a0, 56
+; RV64I-NEXT:    ret
+; RV64I-NEXT:  .LBB11_2:
+; RV64I-NEXT:    li a0, 64
+; RV64I-NEXT:    ret
+;
+; RV32M-LABEL: test_ctlz_i64:
+; RV32M:       # %bb.0:
+; RV32M-NEXT:    lui a2, 349525
+; RV32M-NEXT:    lui a3, 209715
+; RV32M-NEXT:    lui a6, 61681
+; RV32M-NEXT:    lui a7, 4112
+; RV32M-NEXT:    addi a5, a2, 1365
+; RV32M-NEXT:    addi a4, a3, 819
+; RV32M-NEXT:    addi a3, a6, -241
+; RV32M-NEXT:    addi a2, a7, 257
+; RV32M-NEXT:    bnez a1, .LBB11_2
+; RV32M-NEXT:  # %bb.1:
+; RV32M-NEXT:    srli a1, a0, 1
+; RV32M-NEXT:    or a0, a0, a1
+; RV32M-NEXT:    srli a1, a0, 2
+; RV32M-NEXT:    or a0, a0, a1
+; RV32M-NEXT:    srli a1, a0, 4
+; RV32M-NEXT:    or a0, a0, a1
+; RV32M-NEXT:    srli a1, a0, 8
+; RV32M-NEXT:    or a0, a0, a1
+; RV32M-NEXT:    srli a1, a0, 16
+; RV32M-NEXT:    or a0, a0, a1
+; RV32M-NEXT:    not a0, a0
+; RV32M-NEXT:    srli a1, a0, 1
+; RV32M-NEXT:    and a1, a1, a5
+; RV32M-NEXT:    sub a0, a0, a1
+; RV32M-NEXT:    and a1, a0, a4
+; RV32M-NEXT:    srli a0, a0, 2
+; RV32M-NEXT:    and a0, a0, a4
+; RV32M-NEXT:    add a0, a1, a0
+; RV32M-NEXT:    srli a1, a0, 4
+; RV32M-NEXT:    add a0, a0, a1
+; RV32M-NEXT:    and a0, a0, a3
+; RV32M-NEXT:    mul a0, a0, a2
+; RV32M-NEXT:    srli a0, a0, 24
+; RV32M-NEXT:    addi a0, a0, 32
+; RV32M-NEXT:    li a1, 0
+; RV32M-NEXT:    ret
+; RV32M-NEXT:  .LBB11_2:
+; RV32M-NEXT:    srli a0, a1, 1
+; RV32M-NEXT:    or a0, a1, a0
+; RV32M-NEXT:    srli a1, a0, 2
+; RV32M-NEXT:    or a0, a0, a1
+; RV32M-NEXT:    srli a1, a0, 4
+; RV32M-NEXT:    or a0, a0, a1
+; RV32M-NEXT:    srli a1, a0, 8
+; RV32M-NEXT:    or a0, a0, a1
+; RV32M-NEXT:    srli a1, a0, 16
+; RV32M-NEXT:    or a0, a0, a1
+; RV32M-NEXT:    not a0, a0
+; RV32M-NEXT:    srli a1, a0, 1
+; RV32M-NEXT:    and a1, a1, a5
+; RV32M-NEXT:    sub a0, a0, a1
+; RV32M-NEXT:    and a1, a0, a4
+; RV32M-NEXT:    srli a0, a0, 2
+; RV32M-NEXT:    and a0, a0, a4
+; RV32M-NEXT:    add a0, a1, a0
+; RV32M-NEXT:    srli a1, a0, 4
+; RV32M-NEXT:    add a0, a0, a1
+; RV32M-NEXT:    and a0, a0, a3
+; RV32M-NEXT:    mul a0, a0, a2
+; RV32M-NEXT:    srli a0, a0, 24
+; RV32M-NEXT:    li a1, 0
+; RV32M-NEXT:    ret
+;
+; RV64M-LABEL: test_ctlz_i64:
+; RV64M:       # %bb.0:
+; RV64M-NEXT:    beqz a0, .LBB11_2
+; RV64M-NEXT:  # %bb.1: # %cond.false
+; RV64M-NEXT:    srli a1, a0, 1
+; RV64M-NEXT:    lui a2, 349525
+; RV64M-NEXT:    lui a3, 209715
+; RV64M-NEXT:    lui a4, 61681
+; RV64M-NEXT:    or a0, a0, a1
+; RV64M-NEXT:    addiw a1, a2, 1365
+; RV64M-NEXT:    addiw a2, a3, 819
+; RV64M-NEXT:    addiw a3, a4, -241
+; RV64M-NEXT:    srli a4, a0, 2
+; RV64M-NEXT:    or a0, a0, a4
+; RV64M-NEXT:    slli a4, a1, 32
+; RV64M-NEXT:    add a1, a1, a4
+; RV64M-NEXT:    slli a4, a2, 32
+; RV64M-NEXT:    add a2, a2, a4
+; RV64M-NEXT:    slli a4, a3, 32
+; RV64M-NEXT:    add a3, a3, a4
+; RV64M-NEXT:    srli a4, a0, 4
+; RV64M-NEXT:    or a0, a0, a4
+; RV64M-NEXT:    srli a4, a0, 8
+; RV64M-NEXT:    or a0, a0, a4
+; RV64M-NEXT:    srli a4, a0, 16
+; RV64M-NEXT:    or a0, a0, a4
+; RV64M-NEXT:    srli a4, a0, 32
+; RV64M-NEXT:    or a0, a0, a4
+; RV64M-NEXT:    not a0, a0
+; RV64M-NEXT:    srli a4, a0, 1
+; RV64M-NEXT:    and a1, a4, a1
+; RV64M-NEXT:    sub a0, a0, a1
+; RV64M-NEXT:    and a1, a0, a2
+; RV64M-NEXT:    srli a0, a0, 2
+; RV64M-NEXT:    and a0, a0, a2
+; RV64M-NEXT:    lui a2, 4112
+; RV64M-NEXT:    addiw a2, a2, 257
+; RV64M-NEXT:    add a0, a1, a0
+; RV64M-NEXT:    srli a1, a0, 4
+; RV64M-NEXT:    add a0, a0, a1
+; RV64M-NEXT:    slli a1, a2, 32
+; RV64M-NEXT:    and a0, a0, a3
+; RV64M-NEXT:    add a1, a2, a1
+; RV64M-NEXT:    mul a0, a0, a1
+; RV64M-NEXT:    srli a0, a0, 56
+; RV64M-NEXT:    ret
+; RV64M-NEXT:  .LBB11_2:
+; RV64M-NEXT:    li a0, 64
+; RV64M-NEXT:    ret
 ;
 ; RV32ZBB-LABEL: test_ctlz_i64:
 ; RV32ZBB:       # %bb.0:
@@ -1572,20 +1793,41 @@ define i16 @test_ctlz_i16_zero_undef(i16 %a) nounwind {
 }
 
 define i32 @test_ctlz_i32_zero_undef(i32 %a) nounwind {
-; RV32_NOZBB-LABEL: test_ctlz_i32_zero_undef:
-; RV32_NOZBB:       # %bb.0:
-; RV32_NOZBB-NEXT:    srli a1, a0, 1
-; RV32_NOZBB-NEXT:    or a0, a0, a1
-; RV32_NOZBB-NEXT:    srli a1, a0, 2
-; RV32_NOZBB-NEXT:    or a0, a0, a1
-; RV32_NOZBB-NEXT:    srli a1, a0, 4
-; RV32_NOZBB-NEXT:    or a0, a0, a1
-; RV32_NOZBB-NEXT:    srli a1, a0, 8
-; RV32_NOZBB-NEXT:    or a0, a0, a1
-; RV32_NOZBB-NEXT:    srli a1, a0, 16
-; RV32_NOZBB-NEXT:    or a0, a0, a1
-; RV32_NOZBB-NEXT:    not a0, a0
-; RV32_NOZBB-NEXT:    tail __popcountsi2
+; RV32I-LABEL: test_ctlz_i32_zero_undef:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    srli a1, a0, 1
+; RV32I-NEXT:    lui a2, 349525
+; RV32I-NEXT:    or a0, a0, a1
+; RV32I-NEXT:    addi a1, a2, 1365
+; RV32I-NEXT:    srli a2, a0, 2
+; RV32I-NEXT:    or a0, a0, a2
+; RV32I-NEXT:    srli a2, a0, 4
+; RV32I-NEXT:    or a0, a0, a2
+; RV32I-NEXT:    srli a2, a0, 8
+; RV32I-NEXT:    or a0, a0, a2
+; RV32I-NEX...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/137120


More information about the llvm-commits mailing list