[llvm] [CGP] Despeculate ctlz/cttz with "illegal" integer types (PR #137197)

via llvm-commits llvm-commits at lists.llvm.org
Thu Apr 24 08:34:01 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-globalisel

Author: Sergei Barannikov (s-barannikov)

<details>
<summary>Changes</summary>

The code below the removed check looks generic enough to support arbitrary integer widths. This change helps 32-bit targets avoid expensive expansion/libcalls in the case of zero input.


---

Patch is 40.00 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/137197.diff


11 Files Affected:

- (modified) llvm/lib/CodeGen/CodeGenPrepare.cpp (+2-2) 
- (modified) llvm/test/CodeGen/ARM/cttz.ll (+76-64) 
- (modified) llvm/test/CodeGen/RISCV/GlobalISel/rv32zbb.ll (+20-6) 
- (modified) llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll (+78-52) 
- (modified) llvm/test/CodeGen/RISCV/rv32xtheadbb.ll (+51-34) 
- (modified) llvm/test/CodeGen/RISCV/rv32zbb.ll (+39-29) 
- (modified) llvm/test/CodeGen/SPARC/ctlz.ll (+34-76) 
- (modified) llvm/test/CodeGen/X86/ctlo.ll (+16-11) 
- (modified) llvm/test/CodeGen/X86/ctlz.ll (+19-13) 
- (modified) llvm/test/CodeGen/X86/cttz.ll (+16-17) 
- (modified) llvm/test/CodeGen/X86/lzcnt-cmp.ll (+74-12) 


``````````diff
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index e8dc7752b23c0..f9dcb472ed1d2 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -2552,9 +2552,9 @@ static bool despeculateCountZeros(IntrinsicInst *CountZeros,
       (IntrinsicID == Intrinsic::ctlz && TLI->isCheapToSpeculateCtlz(Ty)))
     return false;
 
-  // Only handle legal scalar cases. Anything else requires too much work.
+  // Only handle scalar cases. Anything else requires too much work.
   unsigned SizeInBits = Ty->getScalarSizeInBits();
-  if (Ty->isVectorTy() || SizeInBits > DL->getLargestLegalIntTypeSizeInBits())
+  if (Ty->isVectorTy())
     return false;
 
   // Bail if the value is never zero.
diff --git a/llvm/test/CodeGen/ARM/cttz.ll b/llvm/test/CodeGen/ARM/cttz.ll
index 76adc61c5971f..1146ad64ee709 100644
--- a/llvm/test/CodeGen/ARM/cttz.ll
+++ b/llvm/test/CodeGen/ARM/cttz.ll
@@ -221,43 +221,49 @@ define i64 @test_i64(i64 %a) {
 ;
 ; CHECK-6M-LABEL: test_i64:
 ; CHECK-6M:       @ %bb.0:
-; CHECK-6M-NEXT:    .save {r4, r5, r7, lr}
-; CHECK-6M-NEXT:    push {r4, r5, r7, lr}
+; CHECK-6M-NEXT:    .save {r4, r5, r6, lr}
+; CHECK-6M-NEXT:    push {r4, r5, r6, lr}
+; CHECK-6M-NEXT:    mov r3, r1
 ; CHECK-6M-NEXT:    mov r2, r0
-; CHECK-6M-NEXT:    ldr r5, .LCPI3_0
-; CHECK-6M-NEXT:    adr r3, .LCPI3_1
+; CHECK-6M-NEXT:    movs r1, #0
+; CHECK-6M-NEXT:    orrs r0, r3
+; CHECK-6M-NEXT:    beq .LBB3_6
+; CHECK-6M-NEXT:  @ %bb.1: @ %cond.false
+; CHECK-6M-NEXT:    ldr r6, .LCPI3_0
+; CHECK-6M-NEXT:    adr r4, .LCPI3_1
 ; CHECK-6M-NEXT:    movs r0, #32
-; CHECK-6M-NEXT:    cmp r1, #0
-; CHECK-6M-NEXT:    mov r4, r0
-; CHECK-6M-NEXT:    beq .LBB3_2
-; CHECK-6M-NEXT:  @ %bb.1:
-; CHECK-6M-NEXT:    rsbs r4, r1, #0
-; CHECK-6M-NEXT:    ands r4, r1
-; CHECK-6M-NEXT:    muls r4, r5, r4
-; CHECK-6M-NEXT:    lsrs r1, r4, #27
-; CHECK-6M-NEXT:    ldrb r4, [r3, r1]
-; CHECK-6M-NEXT:  .LBB3_2:
-; CHECK-6M-NEXT:    adds r4, #32
-; CHECK-6M-NEXT:    rsbs r1, r2, #0
-; CHECK-6M-NEXT:    ands r1, r2
-; CHECK-6M-NEXT:    muls r5, r1, r5
-; CHECK-6M-NEXT:    lsrs r1, r5, #27
+; CHECK-6M-NEXT:    cmp r3, #0
+; CHECK-6M-NEXT:    mov r5, r0
+; CHECK-6M-NEXT:    beq .LBB3_3
+; CHECK-6M-NEXT:  @ %bb.2: @ %cond.false
+; CHECK-6M-NEXT:    rsbs r5, r3, #0
+; CHECK-6M-NEXT:    ands r5, r3
+; CHECK-6M-NEXT:    muls r5, r6, r5
+; CHECK-6M-NEXT:    lsrs r3, r5, #27
+; CHECK-6M-NEXT:    ldrb r5, [r4, r3]
+; CHECK-6M-NEXT:  .LBB3_3: @ %cond.false
+; CHECK-6M-NEXT:    adds r5, #32
+; CHECK-6M-NEXT:    rsbs r3, r2, #0
+; CHECK-6M-NEXT:    ands r3, r2
+; CHECK-6M-NEXT:    muls r6, r3, r6
+; CHECK-6M-NEXT:    lsrs r3, r6, #27
 ; CHECK-6M-NEXT:    cmp r2, #0
-; CHECK-6M-NEXT:    bne .LBB3_5
-; CHECK-6M-NEXT:  @ %bb.3:
-; CHECK-6M-NEXT:    beq .LBB3_6
-; CHECK-6M-NEXT:  .LBB3_4:
-; CHECK-6M-NEXT:    movs r1, #0
-; CHECK-6M-NEXT:    pop {r4, r5, r7, pc}
-; CHECK-6M-NEXT:  .LBB3_5:
-; CHECK-6M-NEXT:    ldrb r0, [r3, r1]
-; CHECK-6M-NEXT:    bne .LBB3_4
+; CHECK-6M-NEXT:    bne .LBB3_7
+; CHECK-6M-NEXT:  @ %bb.4: @ %cond.false
+; CHECK-6M-NEXT:    beq .LBB3_8
+; CHECK-6M-NEXT:  .LBB3_5: @ %cond.end
+; CHECK-6M-NEXT:    pop {r4, r5, r6, pc}
 ; CHECK-6M-NEXT:  .LBB3_6:
-; CHECK-6M-NEXT:    mov r0, r4
-; CHECK-6M-NEXT:    movs r1, #0
-; CHECK-6M-NEXT:    pop {r4, r5, r7, pc}
+; CHECK-6M-NEXT:    movs r0, #64
+; CHECK-6M-NEXT:    pop {r4, r5, r6, pc}
+; CHECK-6M-NEXT:  .LBB3_7: @ %cond.false
+; CHECK-6M-NEXT:    ldrb r0, [r4, r3]
+; CHECK-6M-NEXT:    bne .LBB3_5
+; CHECK-6M-NEXT:  .LBB3_8: @ %cond.false
+; CHECK-6M-NEXT:    mov r0, r5
+; CHECK-6M-NEXT:    pop {r4, r5, r6, pc}
 ; CHECK-6M-NEXT:    .p2align 2
-; CHECK-6M-NEXT:  @ %bb.7:
+; CHECK-6M-NEXT:  @ %bb.9:
 ; CHECK-6M-NEXT:  .LCPI3_0:
 ; CHECK-6M-NEXT:    .long 125613361 @ 0x77cb531
 ; CHECK-6M-NEXT:  .LCPI3_1:
@@ -265,43 +271,49 @@ define i64 @test_i64(i64 %a) {
 ;
 ; CHECK-8MBASE-LABEL: test_i64:
 ; CHECK-8MBASE:       @ %bb.0:
-; CHECK-8MBASE-NEXT:    .save {r4, r5, r7, lr}
-; CHECK-8MBASE-NEXT:    push {r4, r5, r7, lr}
+; CHECK-8MBASE-NEXT:    .save {r4, r5, r6, lr}
+; CHECK-8MBASE-NEXT:    push {r4, r5, r6, lr}
+; CHECK-8MBASE-NEXT:    mov r3, r1
 ; CHECK-8MBASE-NEXT:    mov r2, r0
-; CHECK-8MBASE-NEXT:    movw r5, #46385
-; CHECK-8MBASE-NEXT:    movt r5, #1916
-; CHECK-8MBASE-NEXT:    adr r3, .LCPI3_0
+; CHECK-8MBASE-NEXT:    movs r1, #0
+; CHECK-8MBASE-NEXT:    orrs r0, r3
+; CHECK-8MBASE-NEXT:    beq .LBB3_6
+; CHECK-8MBASE-NEXT:  @ %bb.1: @ %cond.false
+; CHECK-8MBASE-NEXT:    movw r6, #46385
+; CHECK-8MBASE-NEXT:    movt r6, #1916
+; CHECK-8MBASE-NEXT:    adr r4, .LCPI3_0
 ; CHECK-8MBASE-NEXT:    movs r0, #32
-; CHECK-8MBASE-NEXT:    mov r4, r0
-; CHECK-8MBASE-NEXT:    cbz r1, .LBB3_2
-; CHECK-8MBASE-NEXT:  @ %bb.1:
-; CHECK-8MBASE-NEXT:    rsbs r4, r1, #0
-; CHECK-8MBASE-NEXT:    ands r4, r1
-; CHECK-8MBASE-NEXT:    muls r4, r5, r4
-; CHECK-8MBASE-NEXT:    lsrs r1, r4, #27
-; CHECK-8MBASE-NEXT:    ldrb r4, [r3, r1]
-; CHECK-8MBASE-NEXT:  .LBB3_2:
-; CHECK-8MBASE-NEXT:    adds r4, #32
-; CHECK-8MBASE-NEXT:    rsbs r1, r2, #0
-; CHECK-8MBASE-NEXT:    ands r1, r2
-; CHECK-8MBASE-NEXT:    muls r5, r1, r5
-; CHECK-8MBASE-NEXT:    lsrs r1, r5, #27
+; CHECK-8MBASE-NEXT:    mov r5, r0
+; CHECK-8MBASE-NEXT:    cbz r3, .LBB3_3
+; CHECK-8MBASE-NEXT:  @ %bb.2: @ %cond.false
+; CHECK-8MBASE-NEXT:    rsbs r5, r3, #0
+; CHECK-8MBASE-NEXT:    ands r5, r3
+; CHECK-8MBASE-NEXT:    muls r5, r6, r5
+; CHECK-8MBASE-NEXT:    lsrs r3, r5, #27
+; CHECK-8MBASE-NEXT:    ldrb r5, [r4, r3]
+; CHECK-8MBASE-NEXT:  .LBB3_3: @ %cond.false
+; CHECK-8MBASE-NEXT:    adds r5, #32
+; CHECK-8MBASE-NEXT:    rsbs r3, r2, #0
+; CHECK-8MBASE-NEXT:    ands r3, r2
+; CHECK-8MBASE-NEXT:    muls r6, r3, r6
+; CHECK-8MBASE-NEXT:    lsrs r3, r6, #27
 ; CHECK-8MBASE-NEXT:    cmp r2, #0
-; CHECK-8MBASE-NEXT:    bne .LBB3_5
-; CHECK-8MBASE-NEXT:  @ %bb.3:
-; CHECK-8MBASE-NEXT:    beq .LBB3_6
-; CHECK-8MBASE-NEXT:  .LBB3_4:
-; CHECK-8MBASE-NEXT:    movs r1, #0
-; CHECK-8MBASE-NEXT:    pop {r4, r5, r7, pc}
-; CHECK-8MBASE-NEXT:  .LBB3_5:
-; CHECK-8MBASE-NEXT:    ldrb r0, [r3, r1]
-; CHECK-8MBASE-NEXT:    bne .LBB3_4
+; CHECK-8MBASE-NEXT:    bne .LBB3_7
+; CHECK-8MBASE-NEXT:  @ %bb.4: @ %cond.false
+; CHECK-8MBASE-NEXT:    beq .LBB3_8
+; CHECK-8MBASE-NEXT:  .LBB3_5: @ %cond.end
+; CHECK-8MBASE-NEXT:    pop {r4, r5, r6, pc}
 ; CHECK-8MBASE-NEXT:  .LBB3_6:
-; CHECK-8MBASE-NEXT:    mov r0, r4
-; CHECK-8MBASE-NEXT:    movs r1, #0
-; CHECK-8MBASE-NEXT:    pop {r4, r5, r7, pc}
+; CHECK-8MBASE-NEXT:    movs r0, #64
+; CHECK-8MBASE-NEXT:    pop {r4, r5, r6, pc}
+; CHECK-8MBASE-NEXT:  .LBB3_7: @ %cond.false
+; CHECK-8MBASE-NEXT:    ldrb r0, [r4, r3]
+; CHECK-8MBASE-NEXT:    bne .LBB3_5
+; CHECK-8MBASE-NEXT:  .LBB3_8: @ %cond.false
+; CHECK-8MBASE-NEXT:    mov r0, r5
+; CHECK-8MBASE-NEXT:    pop {r4, r5, r6, pc}
 ; CHECK-8MBASE-NEXT:    .p2align 2
-; CHECK-8MBASE-NEXT:  @ %bb.7:
+; CHECK-8MBASE-NEXT:  @ %bb.9:
 ; CHECK-8MBASE-NEXT:  .LCPI3_0:
 ; CHECK-8MBASE-NEXT:    .ascii "\000\001\034\002\035\016\030\003\036\026\024\017\031\021\004\b\037\033\r\027\025\023\020\007\032\f\022\006\013\005\n\t"
   %tmp = call i64 @llvm.cttz.i64(i64 %a, i1 false)
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/rv32zbb.ll b/llvm/test/CodeGen/RISCV/GlobalISel/rv32zbb.ll
index f9af74d6ec323..0632caecf8907 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/rv32zbb.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/rv32zbb.ll
@@ -62,6 +62,9 @@ declare i64 @llvm.ctlz.i64(i64, i1)
 define i64 @ctlz_i64(i64 %a) nounwind {
 ; RV32I-LABEL: ctlz_i64:
 ; RV32I:       # %bb.0:
+; RV32I-NEXT:    or a2, a0, a1
+; RV32I-NEXT:    beqz a2, .LBB1_3
+; RV32I-NEXT:  # %bb.1: # %cond.false
 ; RV32I-NEXT:    lui a2, 349525
 ; RV32I-NEXT:    lui a3, 209715
 ; RV32I-NEXT:    lui a6, 61681
@@ -69,8 +72,8 @@ define i64 @ctlz_i64(i64 %a) nounwind {
 ; RV32I-NEXT:    addi a4, a3, 819
 ; RV32I-NEXT:    addi a3, a6, -241
 ; RV32I-NEXT:    li a2, 32
-; RV32I-NEXT:    beqz a1, .LBB1_2
-; RV32I-NEXT:  # %bb.1:
+; RV32I-NEXT:    beqz a1, .LBB1_4
+; RV32I-NEXT:  # %bb.2: # %cond.false
 ; RV32I-NEXT:    srli a0, a1, 1
 ; RV32I-NEXT:    or a0, a1, a0
 ; RV32I-NEXT:    srli a1, a0, 2
@@ -99,7 +102,11 @@ define i64 @ctlz_i64(i64 %a) nounwind {
 ; RV32I-NEXT:    sub a0, a2, a0
 ; RV32I-NEXT:    li a1, 0
 ; RV32I-NEXT:    ret
-; RV32I-NEXT:  .LBB1_2:
+; RV32I-NEXT:  .LBB1_3:
+; RV32I-NEXT:    li a1, 0
+; RV32I-NEXT:    li a0, 64
+; RV32I-NEXT:    ret
+; RV32I-NEXT:  .LBB1_4:
 ; RV32I-NEXT:    srli a1, a0, 1
 ; RV32I-NEXT:    or a0, a0, a1
 ; RV32I-NEXT:    srli a1, a0, 2
@@ -195,14 +202,17 @@ declare i64 @llvm.cttz.i64(i64, i1)
 define i64 @cttz_i64(i64 %a) nounwind {
 ; RV32I-LABEL: cttz_i64:
 ; RV32I:       # %bb.0:
+; RV32I-NEXT:    or a2, a0, a1
+; RV32I-NEXT:    beqz a2, .LBB3_3
+; RV32I-NEXT:  # %bb.1: # %cond.false
 ; RV32I-NEXT:    lui a2, 349525
 ; RV32I-NEXT:    lui a3, 209715
 ; RV32I-NEXT:    lui a5, 61681
 ; RV32I-NEXT:    addi a4, a2, 1365
 ; RV32I-NEXT:    addi a3, a3, 819
 ; RV32I-NEXT:    addi a2, a5, -241
-; RV32I-NEXT:    beqz a0, .LBB3_2
-; RV32I-NEXT:  # %bb.1:
+; RV32I-NEXT:    beqz a0, .LBB3_4
+; RV32I-NEXT:  # %bb.2: # %cond.false
 ; RV32I-NEXT:    not a1, a0
 ; RV32I-NEXT:    addi a0, a0, -1
 ; RV32I-NEXT:    and a0, a1, a0
@@ -223,7 +233,11 @@ define i64 @cttz_i64(i64 %a) nounwind {
 ; RV32I-NEXT:    srli a0, a0, 24
 ; RV32I-NEXT:    li a1, 0
 ; RV32I-NEXT:    ret
-; RV32I-NEXT:  .LBB3_2:
+; RV32I-NEXT:  .LBB3_3:
+; RV32I-NEXT:    li a1, 0
+; RV32I-NEXT:    li a0, 64
+; RV32I-NEXT:    ret
+; RV32I-NEXT:  .LBB3_4:
 ; RV32I-NEXT:    not a0, a1
 ; RV32I-NEXT:    addi a1, a1, -1
 ; RV32I-NEXT:    and a0, a0, a1
diff --git a/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll b/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll
index a46168f114bb9..3a7d31253b05d 100644
--- a/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll
+++ b/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll
@@ -374,39 +374,42 @@ define i64 @test_cttz_i64(i64 %a) nounwind {
 ; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    mv s2, a1
-; RV32I-NEXT:    mv s0, a0
-; RV32I-NEXT:    neg a0, a0
-; RV32I-NEXT:    and a0, s0, a0
-; RV32I-NEXT:    lui a1, 30667
-; RV32I-NEXT:    addi s3, a1, 1329
-; RV32I-NEXT:    mv a1, s3
+; RV32I-NEXT:    mv s0, a1
+; RV32I-NEXT:    or a1, a0, a1
+; RV32I-NEXT:    beqz a1, .LBB3_3
+; RV32I-NEXT:  # %bb.1: # %cond.false
+; RV32I-NEXT:    neg a1, a0
+; RV32I-NEXT:    and a1, a0, a1
+; RV32I-NEXT:    lui a2, 30667
+; RV32I-NEXT:    addi s2, a2, 1329
+; RV32I-NEXT:    mv s4, a0
+; RV32I-NEXT:    mv a0, a1
+; RV32I-NEXT:    mv a1, s2
 ; RV32I-NEXT:    call __mulsi3
 ; RV32I-NEXT:    mv s1, a0
-; RV32I-NEXT:    lui s4, %hi(.LCPI3_0)
-; RV32I-NEXT:    addi s4, s4, %lo(.LCPI3_0)
-; RV32I-NEXT:    neg a0, s2
-; RV32I-NEXT:    and a0, s2, a0
-; RV32I-NEXT:    mv a1, s3
+; RV32I-NEXT:    lui s3, %hi(.LCPI3_0)
+; RV32I-NEXT:    addi s3, s3, %lo(.LCPI3_0)
+; RV32I-NEXT:    neg a0, s0
+; RV32I-NEXT:    and a0, s0, a0
+; RV32I-NEXT:    mv a1, s2
 ; RV32I-NEXT:    call __mulsi3
-; RV32I-NEXT:    bnez s2, .LBB3_3
-; RV32I-NEXT:  # %bb.1:
-; RV32I-NEXT:    li a0, 32
-; RV32I-NEXT:    beqz s0, .LBB3_4
-; RV32I-NEXT:  .LBB3_2:
-; RV32I-NEXT:    srli s1, s1, 27
-; RV32I-NEXT:    add s1, s4, s1
-; RV32I-NEXT:    lbu a0, 0(s1)
-; RV32I-NEXT:    j .LBB3_5
-; RV32I-NEXT:  .LBB3_3:
+; RV32I-NEXT:    bnez s4, .LBB3_4
+; RV32I-NEXT:  # %bb.2: # %cond.false
 ; RV32I-NEXT:    srli a0, a0, 27
-; RV32I-NEXT:    add a0, s4, a0
+; RV32I-NEXT:    add a0, s3, a0
 ; RV32I-NEXT:    lbu a0, 0(a0)
-; RV32I-NEXT:    bnez s0, .LBB3_2
-; RV32I-NEXT:  .LBB3_4:
 ; RV32I-NEXT:    addi a0, a0, 32
-; RV32I-NEXT:  .LBB3_5:
+; RV32I-NEXT:    j .LBB3_5
+; RV32I-NEXT:  .LBB3_3:
+; RV32I-NEXT:    li a0, 64
+; RV32I-NEXT:    j .LBB3_6
+; RV32I-NEXT:  .LBB3_4:
+; RV32I-NEXT:    srli s1, s1, 27
+; RV32I-NEXT:    add s1, s3, s1
+; RV32I-NEXT:    lbu a0, 0(s1)
+; RV32I-NEXT:  .LBB3_5: # %cond.false
 ; RV32I-NEXT:    li a1, 0
+; RV32I-NEXT:  .LBB3_6: # %cond.end
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
@@ -441,33 +444,35 @@ define i64 @test_cttz_i64(i64 %a) nounwind {
 ;
 ; RV32M-LABEL: test_cttz_i64:
 ; RV32M:       # %bb.0:
+; RV32M-NEXT:    or a2, a0, a1
+; RV32M-NEXT:    beqz a2, .LBB3_3
+; RV32M-NEXT:  # %bb.1: # %cond.false
 ; RV32M-NEXT:    lui a2, 30667
 ; RV32M-NEXT:    addi a3, a2, 1329
 ; RV32M-NEXT:    lui a2, %hi(.LCPI3_0)
 ; RV32M-NEXT:    addi a2, a2, %lo(.LCPI3_0)
-; RV32M-NEXT:    bnez a1, .LBB3_3
-; RV32M-NEXT:  # %bb.1:
-; RV32M-NEXT:    li a1, 32
-; RV32M-NEXT:    beqz a0, .LBB3_4
-; RV32M-NEXT:  .LBB3_2:
-; RV32M-NEXT:    neg a1, a0
-; RV32M-NEXT:    and a0, a0, a1
+; RV32M-NEXT:    bnez a0, .LBB3_4
+; RV32M-NEXT:  # %bb.2: # %cond.false
+; RV32M-NEXT:    neg a0, a1
+; RV32M-NEXT:    and a0, a1, a0
 ; RV32M-NEXT:    mul a0, a0, a3
 ; RV32M-NEXT:    srli a0, a0, 27
 ; RV32M-NEXT:    add a0, a2, a0
 ; RV32M-NEXT:    lbu a0, 0(a0)
+; RV32M-NEXT:    addi a0, a0, 32
 ; RV32M-NEXT:    li a1, 0
 ; RV32M-NEXT:    ret
 ; RV32M-NEXT:  .LBB3_3:
-; RV32M-NEXT:    neg a4, a1
-; RV32M-NEXT:    and a1, a1, a4
-; RV32M-NEXT:    mul a1, a1, a3
-; RV32M-NEXT:    srli a1, a1, 27
-; RV32M-NEXT:    add a1, a2, a1
-; RV32M-NEXT:    lbu a1, 0(a1)
-; RV32M-NEXT:    bnez a0, .LBB3_2
+; RV32M-NEXT:    li a1, 0
+; RV32M-NEXT:    li a0, 64
+; RV32M-NEXT:    ret
 ; RV32M-NEXT:  .LBB3_4:
-; RV32M-NEXT:    addi a0, a1, 32
+; RV32M-NEXT:    neg a1, a0
+; RV32M-NEXT:    and a0, a0, a1
+; RV32M-NEXT:    mul a0, a0, a3
+; RV32M-NEXT:    srli a0, a0, 27
+; RV32M-NEXT:    add a0, a2, a0
+; RV32M-NEXT:    lbu a0, 0(a0)
 ; RV32M-NEXT:    li a1, 0
 ; RV32M-NEXT:    ret
 ;
@@ -510,21 +515,28 @@ define i64 @test_cttz_i64(i64 %a) nounwind {
 ;
 ; RV32XTHEADBB-LABEL: test_cttz_i64:
 ; RV32XTHEADBB:       # %bb.0:
-; RV32XTHEADBB-NEXT:    bnez a0, .LBB3_2
-; RV32XTHEADBB-NEXT:  # %bb.1:
+; RV32XTHEADBB-NEXT:    or a2, a0, a1
+; RV32XTHEADBB-NEXT:    beqz a2, .LBB3_3
+; RV32XTHEADBB-NEXT:  # %bb.1: # %cond.false
+; RV32XTHEADBB-NEXT:    bnez a0, .LBB3_4
+; RV32XTHEADBB-NEXT:  # %bb.2: # %cond.false
 ; RV32XTHEADBB-NEXT:    addi a0, a1, -1
 ; RV32XTHEADBB-NEXT:    not a1, a1
 ; RV32XTHEADBB-NEXT:    and a0, a1, a0
 ; RV32XTHEADBB-NEXT:    th.ff1 a0, a0
 ; RV32XTHEADBB-NEXT:    li a1, 64
-; RV32XTHEADBB-NEXT:    j .LBB3_3
-; RV32XTHEADBB-NEXT:  .LBB3_2:
+; RV32XTHEADBB-NEXT:    j .LBB3_5
+; RV32XTHEADBB-NEXT:  .LBB3_3:
+; RV32XTHEADBB-NEXT:    li a1, 0
+; RV32XTHEADBB-NEXT:    li a0, 64
+; RV32XTHEADBB-NEXT:    ret
+; RV32XTHEADBB-NEXT:  .LBB3_4:
 ; RV32XTHEADBB-NEXT:    addi a1, a0, -1
 ; RV32XTHEADBB-NEXT:    not a0, a0
 ; RV32XTHEADBB-NEXT:    and a0, a0, a1
 ; RV32XTHEADBB-NEXT:    th.ff1 a0, a0
 ; RV32XTHEADBB-NEXT:    li a1, 32
-; RV32XTHEADBB-NEXT:  .LBB3_3:
+; RV32XTHEADBB-NEXT:  .LBB3_5: # %cond.false
 ; RV32XTHEADBB-NEXT:    sub a0, a1, a0
 ; RV32XTHEADBB-NEXT:    li a1, 0
 ; RV32XTHEADBB-NEXT:    ret
@@ -1348,14 +1360,17 @@ define i32 @test_ctlz_i32(i32 %a) nounwind {
 define i64 @test_ctlz_i64(i64 %a) nounwind {
 ; RV32I-LABEL: test_ctlz_i64:
 ; RV32I:       # %bb.0:
+; RV32I-NEXT:    or a2, a0, a1
+; RV32I-NEXT:    beqz a2, .LBB11_3
+; RV32I-NEXT:  # %bb.1: # %cond.false
 ; RV32I-NEXT:    lui a2, 349525
 ; RV32I-NEXT:    lui a3, 209715
 ; RV32I-NEXT:    lui a5, 61681
 ; RV32I-NEXT:    addi a4, a2, 1365
 ; RV32I-NEXT:    addi a3, a3, 819
 ; RV32I-NEXT:    addi a2, a5, -241
-; RV32I-NEXT:    bnez a1, .LBB11_2
-; RV32I-NEXT:  # %bb.1:
+; RV32I-NEXT:    bnez a1, .LBB11_4
+; RV32I-NEXT:  # %bb.2: # %cond.false
 ; RV32I-NEXT:    srli a1, a0, 1
 ; RV32I-NEXT:    or a0, a0, a1
 ; RV32I-NEXT:    srli a1, a0, 2
@@ -1385,7 +1400,11 @@ define i64 @test_ctlz_i64(i64 %a) nounwind {
 ; RV32I-NEXT:    addi a0, a0, 32
 ; RV32I-NEXT:    li a1, 0
 ; RV32I-NEXT:    ret
-; RV32I-NEXT:  .LBB11_2:
+; RV32I-NEXT:  .LBB11_3:
+; RV32I-NEXT:    li a1, 0
+; RV32I-NEXT:    li a0, 64
+; RV32I-NEXT:    ret
+; RV32I-NEXT:  .LBB11_4:
 ; RV32I-NEXT:    srli a0, a1, 1
 ; RV32I-NEXT:    or a0, a1, a0
 ; RV32I-NEXT:    srli a1, a0, 2
@@ -1468,6 +1487,9 @@ define i64 @test_ctlz_i64(i64 %a) nounwind {
 ;
 ; RV32M-LABEL: test_ctlz_i64:
 ; RV32M:       # %bb.0:
+; RV32M-NEXT:    or a2, a0, a1
+; RV32M-NEXT:    beqz a2, .LBB11_3
+; RV32M-NEXT:  # %bb.1: # %cond.false
 ; RV32M-NEXT:    lui a2, 349525
 ; RV32M-NEXT:    lui a3, 209715
 ; RV32M-NEXT:    lui a6, 61681
@@ -1476,8 +1498,8 @@ define i64 @test_ctlz_i64(i64 %a) nounwind {
 ; RV32M-NEXT:    addi a4, a3, 819
 ; RV32M-NEXT:    addi a3, a6, -241
 ; RV32M-NEXT:    addi a2, a7, 257
-; RV32M-NEXT:    bnez a1, .LBB11_2
-; RV32M-NEXT:  # %bb.1:
+; RV32M-NEXT:    bnez a1, .LBB11_4
+; RV32M-NEXT:  # %bb.2: # %cond.false
 ; RV32M-NEXT:    srli a1, a0, 1
 ; RV32M-NEXT:    or a0, a0, a1
 ; RV32M-NEXT:    srli a1, a0, 2
@@ -1504,7 +1526,11 @@ define i64 @test_ctlz_i64(i64 %a) nounwind {
 ; RV32M-NEXT:    addi a0, a0, 32
 ; RV32M-NEXT:    li a1, 0
 ; RV32M-NEXT:    ret
-; RV32M-NEXT:  .LBB11_2:
+; RV32M-NEXT:  .LBB11_3:
+; RV32M-NEXT:    li a1, 0
+; RV32M-NEXT:    li a0, 64
+; RV32M-NEXT:    ret
+; RV32M-NEXT:  .LBB11_4:
 ; RV32M-NEXT:    srli a0, a1, 1
 ; RV32M-NEXT:    or a0, a1, a0
 ; RV32M-NEXT:    srli a1, a0, 2
diff --git a/llvm/test/CodeGen/RISCV/rv32xtheadbb.ll b/llvm/test/CodeGen/RISCV/rv32xtheadbb.ll
index 04a2f67c4942b..723437a610ff8 100644
--- a/llvm/test/CodeGen/RISCV/rv32xtheadbb.ll
+++ b/llvm/test/CodeGen/RISCV/rv32xtheadbb.ll
@@ -61,14 +61,17 @@ declare i64 @llvm.ctlz.i64(i64, i1)
 define i64 @ctlz_i64(i64 %a) nounwind {
 ; RV32I-LABEL: ctlz_i64:
 ; RV32I:       # %bb.0:
+; RV32I-NEXT:    or a2, a0, a1
+; RV32I-NEXT:    beqz a2, .LBB1_3
+; RV32I-NEXT:  # %bb.1: # %cond.false
 ; RV32I-NEXT:    lui a2, 349525
 ; RV32I-NEXT:    lui a3, 209715
 ; RV32I-NEXT:    lui a5, 61681
 ; RV32I-NEXT:    addi a4, a2, 1365
 ; RV32I-NEXT:    addi a3, a3, 819
 ; RV32I-NEXT:    addi a2, a5, -241
-; RV32I-NEXT:    bnez a1, .LBB1_2
-; RV32I-NEXT:  # %bb.1:
+; RV32I-NEXT:    bnez a1, .LBB1_4
+; RV32I-NEXT:  # %bb.2: # %cond.false
 ; RV32I-NEXT:    srli a1, a0, 1
 ; RV32I-NEXT:    or a0, a0, a1
 ; RV32I-NEXT:    srli a1, a0, 2
@@ -98,7 +101,11 @@ define i64 @ctlz_i64(i64 %a) nounwind {
 ; RV32I-NEXT:    addi a0, a0, 32
 ; RV32I-NEXT:    li a1, 0
 ; RV32I-NEXT:    ret
-; RV32I-NEXT:  .LBB1_2:
+; RV32I-NEXT:  .LBB1_3:
+; RV32I-NEXT:    li a1, 0
+; RV32I-NEXT:    li a0, 64
+; RV32I-NEXT:    ret
+; RV32I-NEXT:  .LBB1_4:
 ; RV32I-NEXT:    srli a0, a1, 1
 ; RV32I-NEXT:    or a0, a1, a0
 ; RV32I-NEXT:    srli a1, a0, 2
@@ -200,39 +207,42 @@ define i64 @cttz_i64(i64 %a) nounwind {
 ; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    mv s2, a1
-; RV32I-NEXT:    mv s0, a0
-; RV32I-NEXT:    neg a0, a0
-; RV32I-NEXT:    and a0, s0, a0
-; RV32I-NEXT:    lui a1, 30667
-; RV32I-NEXT:    addi s3, a1, 1329
-; RV32I-NEXT:    mv a1, s3
+; RV32I-NEXT:    mv s0, a1
+; RV32I-NEXT:    or a1, a0, a1
+; RV32I-NEXT:    beqz a1, .LBB3_3
+; RV32I-NEXT:  # %bb.1: # %cond.false
+; RV32I-NEXT:    neg a1, a0
+; RV32I-NEXT:    and a1, a0, a1
+; RV32I-NEXT:    lui a2, 30667
+; RV32I-NEXT:    addi s2, a2, 1329
+; RV32I-NEXT:    mv s4, a0
+; RV32I-NEXT:    mv a0, a1
+; RV32I-NEXT:    mv a1, s2
 ; RV32I-NEXT:    call __mulsi3
 ; RV32I-NEXT:    mv s1, a0
-; RV32I-NEXT:    lui s4, %hi(.LCPI3_0)
-; RV32I-NEXT:    addi s4, s4, %lo(.LCPI3_0)
-; RV32I-NEXT:    neg a0, s2
-; RV32I-NEXT:    and a0, s2, a0
-; RV32I-NEXT:    mv a1, s3
+; RV32I-NEXT:    lui s3, %hi(.LCPI3_0)
+; RV32I-NEXT:    addi s3, s3, %lo(.LCPI3_0)
+; RV32I-NEXT:    neg a0, s0
+; RV32I-NEXT:    and a0, s0, a0
+; RV32I-NEXT:    mv a1, s2
 ; RV32I-NEXT:    call __mulsi3
-; RV32I-NEXT:    bnez s2, .LBB3_3
-; RV32I-NEXT:  # %bb.1:
-; RV32I-NEXT:    li a0, 32
-; RV32I-NEXT:    beqz s0, .LBB3_4
-; RV32I-NEXT:  .LBB3_2:
-; RV32I-NEXT:    srli s1, s1, 27
-; RV32I-NEXT:    add s1, s4, s1
-; RV32I-NEXT:    lbu a0, 0(s1)
-; RV32I-NEXT:    j .LBB3_5
-; RV32I-NEXT:  .LBB3_3:
+; RV32I-NEXT:    bnez s4, .LBB3_4
+; RV32I-NEXT:  # %bb.2: # %cond.false
 ; RV32I-NEXT:    srli a0, a0, 27
-; RV32I-NEXT:    add a0, s4, a0
+; RV32I-NEXT:    add a0, s3, a0
 ; RV32I-NEXT:    lbu a0, 0(a0)
-; RV32I-NEXT:    bnez s0, .LBB3_2
-; RV32I-NEXT:  .LBB3_4:
 ; RV32I-NEXT:    addi ...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/137197


More information about the llvm-commits mailing list