[llvm] [CGP] Despeculate ctlz/cttz with "illegal" integer types (PR #137197)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Apr 24 08:34:01 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-globalisel
Author: Sergei Barannikov (s-barannikov)
<details>
<summary>Changes</summary>
The code below the removed check looks generic enough to support arbitrary integer widths. This change helps 32-bit targets avoid expensive expansion/libcalls in the case of zero input.
---
Patch is 40.00 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/137197.diff
11 Files Affected:
- (modified) llvm/lib/CodeGen/CodeGenPrepare.cpp (+2-2)
- (modified) llvm/test/CodeGen/ARM/cttz.ll (+76-64)
- (modified) llvm/test/CodeGen/RISCV/GlobalISel/rv32zbb.ll (+20-6)
- (modified) llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll (+78-52)
- (modified) llvm/test/CodeGen/RISCV/rv32xtheadbb.ll (+51-34)
- (modified) llvm/test/CodeGen/RISCV/rv32zbb.ll (+39-29)
- (modified) llvm/test/CodeGen/SPARC/ctlz.ll (+34-76)
- (modified) llvm/test/CodeGen/X86/ctlo.ll (+16-11)
- (modified) llvm/test/CodeGen/X86/ctlz.ll (+19-13)
- (modified) llvm/test/CodeGen/X86/cttz.ll (+16-17)
- (modified) llvm/test/CodeGen/X86/lzcnt-cmp.ll (+74-12)
``````````diff
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index e8dc7752b23c0..f9dcb472ed1d2 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -2552,9 +2552,9 @@ static bool despeculateCountZeros(IntrinsicInst *CountZeros,
(IntrinsicID == Intrinsic::ctlz && TLI->isCheapToSpeculateCtlz(Ty)))
return false;
- // Only handle legal scalar cases. Anything else requires too much work.
+ // Only handle scalar cases. Anything else requires too much work.
unsigned SizeInBits = Ty->getScalarSizeInBits();
- if (Ty->isVectorTy() || SizeInBits > DL->getLargestLegalIntTypeSizeInBits())
+ if (Ty->isVectorTy())
return false;
// Bail if the value is never zero.
diff --git a/llvm/test/CodeGen/ARM/cttz.ll b/llvm/test/CodeGen/ARM/cttz.ll
index 76adc61c5971f..1146ad64ee709 100644
--- a/llvm/test/CodeGen/ARM/cttz.ll
+++ b/llvm/test/CodeGen/ARM/cttz.ll
@@ -221,43 +221,49 @@ define i64 @test_i64(i64 %a) {
;
; CHECK-6M-LABEL: test_i64:
; CHECK-6M: @ %bb.0:
-; CHECK-6M-NEXT: .save {r4, r5, r7, lr}
-; CHECK-6M-NEXT: push {r4, r5, r7, lr}
+; CHECK-6M-NEXT: .save {r4, r5, r6, lr}
+; CHECK-6M-NEXT: push {r4, r5, r6, lr}
+; CHECK-6M-NEXT: mov r3, r1
; CHECK-6M-NEXT: mov r2, r0
-; CHECK-6M-NEXT: ldr r5, .LCPI3_0
-; CHECK-6M-NEXT: adr r3, .LCPI3_1
+; CHECK-6M-NEXT: movs r1, #0
+; CHECK-6M-NEXT: orrs r0, r3
+; CHECK-6M-NEXT: beq .LBB3_6
+; CHECK-6M-NEXT: @ %bb.1: @ %cond.false
+; CHECK-6M-NEXT: ldr r6, .LCPI3_0
+; CHECK-6M-NEXT: adr r4, .LCPI3_1
; CHECK-6M-NEXT: movs r0, #32
-; CHECK-6M-NEXT: cmp r1, #0
-; CHECK-6M-NEXT: mov r4, r0
-; CHECK-6M-NEXT: beq .LBB3_2
-; CHECK-6M-NEXT: @ %bb.1:
-; CHECK-6M-NEXT: rsbs r4, r1, #0
-; CHECK-6M-NEXT: ands r4, r1
-; CHECK-6M-NEXT: muls r4, r5, r4
-; CHECK-6M-NEXT: lsrs r1, r4, #27
-; CHECK-6M-NEXT: ldrb r4, [r3, r1]
-; CHECK-6M-NEXT: .LBB3_2:
-; CHECK-6M-NEXT: adds r4, #32
-; CHECK-6M-NEXT: rsbs r1, r2, #0
-; CHECK-6M-NEXT: ands r1, r2
-; CHECK-6M-NEXT: muls r5, r1, r5
-; CHECK-6M-NEXT: lsrs r1, r5, #27
+; CHECK-6M-NEXT: cmp r3, #0
+; CHECK-6M-NEXT: mov r5, r0
+; CHECK-6M-NEXT: beq .LBB3_3
+; CHECK-6M-NEXT: @ %bb.2: @ %cond.false
+; CHECK-6M-NEXT: rsbs r5, r3, #0
+; CHECK-6M-NEXT: ands r5, r3
+; CHECK-6M-NEXT: muls r5, r6, r5
+; CHECK-6M-NEXT: lsrs r3, r5, #27
+; CHECK-6M-NEXT: ldrb r5, [r4, r3]
+; CHECK-6M-NEXT: .LBB3_3: @ %cond.false
+; CHECK-6M-NEXT: adds r5, #32
+; CHECK-6M-NEXT: rsbs r3, r2, #0
+; CHECK-6M-NEXT: ands r3, r2
+; CHECK-6M-NEXT: muls r6, r3, r6
+; CHECK-6M-NEXT: lsrs r3, r6, #27
; CHECK-6M-NEXT: cmp r2, #0
-; CHECK-6M-NEXT: bne .LBB3_5
-; CHECK-6M-NEXT: @ %bb.3:
-; CHECK-6M-NEXT: beq .LBB3_6
-; CHECK-6M-NEXT: .LBB3_4:
-; CHECK-6M-NEXT: movs r1, #0
-; CHECK-6M-NEXT: pop {r4, r5, r7, pc}
-; CHECK-6M-NEXT: .LBB3_5:
-; CHECK-6M-NEXT: ldrb r0, [r3, r1]
-; CHECK-6M-NEXT: bne .LBB3_4
+; CHECK-6M-NEXT: bne .LBB3_7
+; CHECK-6M-NEXT: @ %bb.4: @ %cond.false
+; CHECK-6M-NEXT: beq .LBB3_8
+; CHECK-6M-NEXT: .LBB3_5: @ %cond.end
+; CHECK-6M-NEXT: pop {r4, r5, r6, pc}
; CHECK-6M-NEXT: .LBB3_6:
-; CHECK-6M-NEXT: mov r0, r4
-; CHECK-6M-NEXT: movs r1, #0
-; CHECK-6M-NEXT: pop {r4, r5, r7, pc}
+; CHECK-6M-NEXT: movs r0, #64
+; CHECK-6M-NEXT: pop {r4, r5, r6, pc}
+; CHECK-6M-NEXT: .LBB3_7: @ %cond.false
+; CHECK-6M-NEXT: ldrb r0, [r4, r3]
+; CHECK-6M-NEXT: bne .LBB3_5
+; CHECK-6M-NEXT: .LBB3_8: @ %cond.false
+; CHECK-6M-NEXT: mov r0, r5
+; CHECK-6M-NEXT: pop {r4, r5, r6, pc}
; CHECK-6M-NEXT: .p2align 2
-; CHECK-6M-NEXT: @ %bb.7:
+; CHECK-6M-NEXT: @ %bb.9:
; CHECK-6M-NEXT: .LCPI3_0:
; CHECK-6M-NEXT: .long 125613361 @ 0x77cb531
; CHECK-6M-NEXT: .LCPI3_1:
@@ -265,43 +271,49 @@ define i64 @test_i64(i64 %a) {
;
; CHECK-8MBASE-LABEL: test_i64:
; CHECK-8MBASE: @ %bb.0:
-; CHECK-8MBASE-NEXT: .save {r4, r5, r7, lr}
-; CHECK-8MBASE-NEXT: push {r4, r5, r7, lr}
+; CHECK-8MBASE-NEXT: .save {r4, r5, r6, lr}
+; CHECK-8MBASE-NEXT: push {r4, r5, r6, lr}
+; CHECK-8MBASE-NEXT: mov r3, r1
; CHECK-8MBASE-NEXT: mov r2, r0
-; CHECK-8MBASE-NEXT: movw r5, #46385
-; CHECK-8MBASE-NEXT: movt r5, #1916
-; CHECK-8MBASE-NEXT: adr r3, .LCPI3_0
+; CHECK-8MBASE-NEXT: movs r1, #0
+; CHECK-8MBASE-NEXT: orrs r0, r3
+; CHECK-8MBASE-NEXT: beq .LBB3_6
+; CHECK-8MBASE-NEXT: @ %bb.1: @ %cond.false
+; CHECK-8MBASE-NEXT: movw r6, #46385
+; CHECK-8MBASE-NEXT: movt r6, #1916
+; CHECK-8MBASE-NEXT: adr r4, .LCPI3_0
; CHECK-8MBASE-NEXT: movs r0, #32
-; CHECK-8MBASE-NEXT: mov r4, r0
-; CHECK-8MBASE-NEXT: cbz r1, .LBB3_2
-; CHECK-8MBASE-NEXT: @ %bb.1:
-; CHECK-8MBASE-NEXT: rsbs r4, r1, #0
-; CHECK-8MBASE-NEXT: ands r4, r1
-; CHECK-8MBASE-NEXT: muls r4, r5, r4
-; CHECK-8MBASE-NEXT: lsrs r1, r4, #27
-; CHECK-8MBASE-NEXT: ldrb r4, [r3, r1]
-; CHECK-8MBASE-NEXT: .LBB3_2:
-; CHECK-8MBASE-NEXT: adds r4, #32
-; CHECK-8MBASE-NEXT: rsbs r1, r2, #0
-; CHECK-8MBASE-NEXT: ands r1, r2
-; CHECK-8MBASE-NEXT: muls r5, r1, r5
-; CHECK-8MBASE-NEXT: lsrs r1, r5, #27
+; CHECK-8MBASE-NEXT: mov r5, r0
+; CHECK-8MBASE-NEXT: cbz r3, .LBB3_3
+; CHECK-8MBASE-NEXT: @ %bb.2: @ %cond.false
+; CHECK-8MBASE-NEXT: rsbs r5, r3, #0
+; CHECK-8MBASE-NEXT: ands r5, r3
+; CHECK-8MBASE-NEXT: muls r5, r6, r5
+; CHECK-8MBASE-NEXT: lsrs r3, r5, #27
+; CHECK-8MBASE-NEXT: ldrb r5, [r4, r3]
+; CHECK-8MBASE-NEXT: .LBB3_3: @ %cond.false
+; CHECK-8MBASE-NEXT: adds r5, #32
+; CHECK-8MBASE-NEXT: rsbs r3, r2, #0
+; CHECK-8MBASE-NEXT: ands r3, r2
+; CHECK-8MBASE-NEXT: muls r6, r3, r6
+; CHECK-8MBASE-NEXT: lsrs r3, r6, #27
; CHECK-8MBASE-NEXT: cmp r2, #0
-; CHECK-8MBASE-NEXT: bne .LBB3_5
-; CHECK-8MBASE-NEXT: @ %bb.3:
-; CHECK-8MBASE-NEXT: beq .LBB3_6
-; CHECK-8MBASE-NEXT: .LBB3_4:
-; CHECK-8MBASE-NEXT: movs r1, #0
-; CHECK-8MBASE-NEXT: pop {r4, r5, r7, pc}
-; CHECK-8MBASE-NEXT: .LBB3_5:
-; CHECK-8MBASE-NEXT: ldrb r0, [r3, r1]
-; CHECK-8MBASE-NEXT: bne .LBB3_4
+; CHECK-8MBASE-NEXT: bne .LBB3_7
+; CHECK-8MBASE-NEXT: @ %bb.4: @ %cond.false
+; CHECK-8MBASE-NEXT: beq .LBB3_8
+; CHECK-8MBASE-NEXT: .LBB3_5: @ %cond.end
+; CHECK-8MBASE-NEXT: pop {r4, r5, r6, pc}
; CHECK-8MBASE-NEXT: .LBB3_6:
-; CHECK-8MBASE-NEXT: mov r0, r4
-; CHECK-8MBASE-NEXT: movs r1, #0
-; CHECK-8MBASE-NEXT: pop {r4, r5, r7, pc}
+; CHECK-8MBASE-NEXT: movs r0, #64
+; CHECK-8MBASE-NEXT: pop {r4, r5, r6, pc}
+; CHECK-8MBASE-NEXT: .LBB3_7: @ %cond.false
+; CHECK-8MBASE-NEXT: ldrb r0, [r4, r3]
+; CHECK-8MBASE-NEXT: bne .LBB3_5
+; CHECK-8MBASE-NEXT: .LBB3_8: @ %cond.false
+; CHECK-8MBASE-NEXT: mov r0, r5
+; CHECK-8MBASE-NEXT: pop {r4, r5, r6, pc}
; CHECK-8MBASE-NEXT: .p2align 2
-; CHECK-8MBASE-NEXT: @ %bb.7:
+; CHECK-8MBASE-NEXT: @ %bb.9:
; CHECK-8MBASE-NEXT: .LCPI3_0:
; CHECK-8MBASE-NEXT: .ascii "\000\001\034\002\035\016\030\003\036\026\024\017\031\021\004\b\037\033\r\027\025\023\020\007\032\f\022\006\013\005\n\t"
%tmp = call i64 @llvm.cttz.i64(i64 %a, i1 false)
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/rv32zbb.ll b/llvm/test/CodeGen/RISCV/GlobalISel/rv32zbb.ll
index f9af74d6ec323..0632caecf8907 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/rv32zbb.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/rv32zbb.ll
@@ -62,6 +62,9 @@ declare i64 @llvm.ctlz.i64(i64, i1)
define i64 @ctlz_i64(i64 %a) nounwind {
; RV32I-LABEL: ctlz_i64:
; RV32I: # %bb.0:
+; RV32I-NEXT: or a2, a0, a1
+; RV32I-NEXT: beqz a2, .LBB1_3
+; RV32I-NEXT: # %bb.1: # %cond.false
; RV32I-NEXT: lui a2, 349525
; RV32I-NEXT: lui a3, 209715
; RV32I-NEXT: lui a6, 61681
@@ -69,8 +72,8 @@ define i64 @ctlz_i64(i64 %a) nounwind {
; RV32I-NEXT: addi a4, a3, 819
; RV32I-NEXT: addi a3, a6, -241
; RV32I-NEXT: li a2, 32
-; RV32I-NEXT: beqz a1, .LBB1_2
-; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: beqz a1, .LBB1_4
+; RV32I-NEXT: # %bb.2: # %cond.false
; RV32I-NEXT: srli a0, a1, 1
; RV32I-NEXT: or a0, a1, a0
; RV32I-NEXT: srli a1, a0, 2
@@ -99,7 +102,11 @@ define i64 @ctlz_i64(i64 %a) nounwind {
; RV32I-NEXT: sub a0, a2, a0
; RV32I-NEXT: li a1, 0
; RV32I-NEXT: ret
-; RV32I-NEXT: .LBB1_2:
+; RV32I-NEXT: .LBB1_3:
+; RV32I-NEXT: li a1, 0
+; RV32I-NEXT: li a0, 64
+; RV32I-NEXT: ret
+; RV32I-NEXT: .LBB1_4:
; RV32I-NEXT: srli a1, a0, 1
; RV32I-NEXT: or a0, a0, a1
; RV32I-NEXT: srli a1, a0, 2
@@ -195,14 +202,17 @@ declare i64 @llvm.cttz.i64(i64, i1)
define i64 @cttz_i64(i64 %a) nounwind {
; RV32I-LABEL: cttz_i64:
; RV32I: # %bb.0:
+; RV32I-NEXT: or a2, a0, a1
+; RV32I-NEXT: beqz a2, .LBB3_3
+; RV32I-NEXT: # %bb.1: # %cond.false
; RV32I-NEXT: lui a2, 349525
; RV32I-NEXT: lui a3, 209715
; RV32I-NEXT: lui a5, 61681
; RV32I-NEXT: addi a4, a2, 1365
; RV32I-NEXT: addi a3, a3, 819
; RV32I-NEXT: addi a2, a5, -241
-; RV32I-NEXT: beqz a0, .LBB3_2
-; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: beqz a0, .LBB3_4
+; RV32I-NEXT: # %bb.2: # %cond.false
; RV32I-NEXT: not a1, a0
; RV32I-NEXT: addi a0, a0, -1
; RV32I-NEXT: and a0, a1, a0
@@ -223,7 +233,11 @@ define i64 @cttz_i64(i64 %a) nounwind {
; RV32I-NEXT: srli a0, a0, 24
; RV32I-NEXT: li a1, 0
; RV32I-NEXT: ret
-; RV32I-NEXT: .LBB3_2:
+; RV32I-NEXT: .LBB3_3:
+; RV32I-NEXT: li a1, 0
+; RV32I-NEXT: li a0, 64
+; RV32I-NEXT: ret
+; RV32I-NEXT: .LBB3_4:
; RV32I-NEXT: not a0, a1
; RV32I-NEXT: addi a1, a1, -1
; RV32I-NEXT: and a0, a0, a1
diff --git a/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll b/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll
index a46168f114bb9..3a7d31253b05d 100644
--- a/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll
+++ b/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll
@@ -374,39 +374,42 @@ define i64 @test_cttz_i64(i64 %a) nounwind {
; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s2, a1
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: neg a0, a0
-; RV32I-NEXT: and a0, s0, a0
-; RV32I-NEXT: lui a1, 30667
-; RV32I-NEXT: addi s3, a1, 1329
-; RV32I-NEXT: mv a1, s3
+; RV32I-NEXT: mv s0, a1
+; RV32I-NEXT: or a1, a0, a1
+; RV32I-NEXT: beqz a1, .LBB3_3
+; RV32I-NEXT: # %bb.1: # %cond.false
+; RV32I-NEXT: neg a1, a0
+; RV32I-NEXT: and a1, a0, a1
+; RV32I-NEXT: lui a2, 30667
+; RV32I-NEXT: addi s2, a2, 1329
+; RV32I-NEXT: mv s4, a0
+; RV32I-NEXT: mv a0, a1
+; RV32I-NEXT: mv a1, s2
; RV32I-NEXT: call __mulsi3
; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: lui s4, %hi(.LCPI3_0)
-; RV32I-NEXT: addi s4, s4, %lo(.LCPI3_0)
-; RV32I-NEXT: neg a0, s2
-; RV32I-NEXT: and a0, s2, a0
-; RV32I-NEXT: mv a1, s3
+; RV32I-NEXT: lui s3, %hi(.LCPI3_0)
+; RV32I-NEXT: addi s3, s3, %lo(.LCPI3_0)
+; RV32I-NEXT: neg a0, s0
+; RV32I-NEXT: and a0, s0, a0
+; RV32I-NEXT: mv a1, s2
; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: bnez s2, .LBB3_3
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: li a0, 32
-; RV32I-NEXT: beqz s0, .LBB3_4
-; RV32I-NEXT: .LBB3_2:
-; RV32I-NEXT: srli s1, s1, 27
-; RV32I-NEXT: add s1, s4, s1
-; RV32I-NEXT: lbu a0, 0(s1)
-; RV32I-NEXT: j .LBB3_5
-; RV32I-NEXT: .LBB3_3:
+; RV32I-NEXT: bnez s4, .LBB3_4
+; RV32I-NEXT: # %bb.2: # %cond.false
; RV32I-NEXT: srli a0, a0, 27
-; RV32I-NEXT: add a0, s4, a0
+; RV32I-NEXT: add a0, s3, a0
; RV32I-NEXT: lbu a0, 0(a0)
-; RV32I-NEXT: bnez s0, .LBB3_2
-; RV32I-NEXT: .LBB3_4:
; RV32I-NEXT: addi a0, a0, 32
-; RV32I-NEXT: .LBB3_5:
+; RV32I-NEXT: j .LBB3_5
+; RV32I-NEXT: .LBB3_3:
+; RV32I-NEXT: li a0, 64
+; RV32I-NEXT: j .LBB3_6
+; RV32I-NEXT: .LBB3_4:
+; RV32I-NEXT: srli s1, s1, 27
+; RV32I-NEXT: add s1, s3, s1
+; RV32I-NEXT: lbu a0, 0(s1)
+; RV32I-NEXT: .LBB3_5: # %cond.false
; RV32I-NEXT: li a1, 0
+; RV32I-NEXT: .LBB3_6: # %cond.end
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -441,33 +444,35 @@ define i64 @test_cttz_i64(i64 %a) nounwind {
;
; RV32M-LABEL: test_cttz_i64:
; RV32M: # %bb.0:
+; RV32M-NEXT: or a2, a0, a1
+; RV32M-NEXT: beqz a2, .LBB3_3
+; RV32M-NEXT: # %bb.1: # %cond.false
; RV32M-NEXT: lui a2, 30667
; RV32M-NEXT: addi a3, a2, 1329
; RV32M-NEXT: lui a2, %hi(.LCPI3_0)
; RV32M-NEXT: addi a2, a2, %lo(.LCPI3_0)
-; RV32M-NEXT: bnez a1, .LBB3_3
-; RV32M-NEXT: # %bb.1:
-; RV32M-NEXT: li a1, 32
-; RV32M-NEXT: beqz a0, .LBB3_4
-; RV32M-NEXT: .LBB3_2:
-; RV32M-NEXT: neg a1, a0
-; RV32M-NEXT: and a0, a0, a1
+; RV32M-NEXT: bnez a0, .LBB3_4
+; RV32M-NEXT: # %bb.2: # %cond.false
+; RV32M-NEXT: neg a0, a1
+; RV32M-NEXT: and a0, a1, a0
; RV32M-NEXT: mul a0, a0, a3
; RV32M-NEXT: srli a0, a0, 27
; RV32M-NEXT: add a0, a2, a0
; RV32M-NEXT: lbu a0, 0(a0)
+; RV32M-NEXT: addi a0, a0, 32
; RV32M-NEXT: li a1, 0
; RV32M-NEXT: ret
; RV32M-NEXT: .LBB3_3:
-; RV32M-NEXT: neg a4, a1
-; RV32M-NEXT: and a1, a1, a4
-; RV32M-NEXT: mul a1, a1, a3
-; RV32M-NEXT: srli a1, a1, 27
-; RV32M-NEXT: add a1, a2, a1
-; RV32M-NEXT: lbu a1, 0(a1)
-; RV32M-NEXT: bnez a0, .LBB3_2
+; RV32M-NEXT: li a1, 0
+; RV32M-NEXT: li a0, 64
+; RV32M-NEXT: ret
; RV32M-NEXT: .LBB3_4:
-; RV32M-NEXT: addi a0, a1, 32
+; RV32M-NEXT: neg a1, a0
+; RV32M-NEXT: and a0, a0, a1
+; RV32M-NEXT: mul a0, a0, a3
+; RV32M-NEXT: srli a0, a0, 27
+; RV32M-NEXT: add a0, a2, a0
+; RV32M-NEXT: lbu a0, 0(a0)
; RV32M-NEXT: li a1, 0
; RV32M-NEXT: ret
;
@@ -510,21 +515,28 @@ define i64 @test_cttz_i64(i64 %a) nounwind {
;
; RV32XTHEADBB-LABEL: test_cttz_i64:
; RV32XTHEADBB: # %bb.0:
-; RV32XTHEADBB-NEXT: bnez a0, .LBB3_2
-; RV32XTHEADBB-NEXT: # %bb.1:
+; RV32XTHEADBB-NEXT: or a2, a0, a1
+; RV32XTHEADBB-NEXT: beqz a2, .LBB3_3
+; RV32XTHEADBB-NEXT: # %bb.1: # %cond.false
+; RV32XTHEADBB-NEXT: bnez a0, .LBB3_4
+; RV32XTHEADBB-NEXT: # %bb.2: # %cond.false
; RV32XTHEADBB-NEXT: addi a0, a1, -1
; RV32XTHEADBB-NEXT: not a1, a1
; RV32XTHEADBB-NEXT: and a0, a1, a0
; RV32XTHEADBB-NEXT: th.ff1 a0, a0
; RV32XTHEADBB-NEXT: li a1, 64
-; RV32XTHEADBB-NEXT: j .LBB3_3
-; RV32XTHEADBB-NEXT: .LBB3_2:
+; RV32XTHEADBB-NEXT: j .LBB3_5
+; RV32XTHEADBB-NEXT: .LBB3_3:
+; RV32XTHEADBB-NEXT: li a1, 0
+; RV32XTHEADBB-NEXT: li a0, 64
+; RV32XTHEADBB-NEXT: ret
+; RV32XTHEADBB-NEXT: .LBB3_4:
; RV32XTHEADBB-NEXT: addi a1, a0, -1
; RV32XTHEADBB-NEXT: not a0, a0
; RV32XTHEADBB-NEXT: and a0, a0, a1
; RV32XTHEADBB-NEXT: th.ff1 a0, a0
; RV32XTHEADBB-NEXT: li a1, 32
-; RV32XTHEADBB-NEXT: .LBB3_3:
+; RV32XTHEADBB-NEXT: .LBB3_5: # %cond.false
; RV32XTHEADBB-NEXT: sub a0, a1, a0
; RV32XTHEADBB-NEXT: li a1, 0
; RV32XTHEADBB-NEXT: ret
@@ -1348,14 +1360,17 @@ define i32 @test_ctlz_i32(i32 %a) nounwind {
define i64 @test_ctlz_i64(i64 %a) nounwind {
; RV32I-LABEL: test_ctlz_i64:
; RV32I: # %bb.0:
+; RV32I-NEXT: or a2, a0, a1
+; RV32I-NEXT: beqz a2, .LBB11_3
+; RV32I-NEXT: # %bb.1: # %cond.false
; RV32I-NEXT: lui a2, 349525
; RV32I-NEXT: lui a3, 209715
; RV32I-NEXT: lui a5, 61681
; RV32I-NEXT: addi a4, a2, 1365
; RV32I-NEXT: addi a3, a3, 819
; RV32I-NEXT: addi a2, a5, -241
-; RV32I-NEXT: bnez a1, .LBB11_2
-; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: bnez a1, .LBB11_4
+; RV32I-NEXT: # %bb.2: # %cond.false
; RV32I-NEXT: srli a1, a0, 1
; RV32I-NEXT: or a0, a0, a1
; RV32I-NEXT: srli a1, a0, 2
@@ -1385,7 +1400,11 @@ define i64 @test_ctlz_i64(i64 %a) nounwind {
; RV32I-NEXT: addi a0, a0, 32
; RV32I-NEXT: li a1, 0
; RV32I-NEXT: ret
-; RV32I-NEXT: .LBB11_2:
+; RV32I-NEXT: .LBB11_3:
+; RV32I-NEXT: li a1, 0
+; RV32I-NEXT: li a0, 64
+; RV32I-NEXT: ret
+; RV32I-NEXT: .LBB11_4:
; RV32I-NEXT: srli a0, a1, 1
; RV32I-NEXT: or a0, a1, a0
; RV32I-NEXT: srli a1, a0, 2
@@ -1468,6 +1487,9 @@ define i64 @test_ctlz_i64(i64 %a) nounwind {
;
; RV32M-LABEL: test_ctlz_i64:
; RV32M: # %bb.0:
+; RV32M-NEXT: or a2, a0, a1
+; RV32M-NEXT: beqz a2, .LBB11_3
+; RV32M-NEXT: # %bb.1: # %cond.false
; RV32M-NEXT: lui a2, 349525
; RV32M-NEXT: lui a3, 209715
; RV32M-NEXT: lui a6, 61681
@@ -1476,8 +1498,8 @@ define i64 @test_ctlz_i64(i64 %a) nounwind {
; RV32M-NEXT: addi a4, a3, 819
; RV32M-NEXT: addi a3, a6, -241
; RV32M-NEXT: addi a2, a7, 257
-; RV32M-NEXT: bnez a1, .LBB11_2
-; RV32M-NEXT: # %bb.1:
+; RV32M-NEXT: bnez a1, .LBB11_4
+; RV32M-NEXT: # %bb.2: # %cond.false
; RV32M-NEXT: srli a1, a0, 1
; RV32M-NEXT: or a0, a0, a1
; RV32M-NEXT: srli a1, a0, 2
@@ -1504,7 +1526,11 @@ define i64 @test_ctlz_i64(i64 %a) nounwind {
; RV32M-NEXT: addi a0, a0, 32
; RV32M-NEXT: li a1, 0
; RV32M-NEXT: ret
-; RV32M-NEXT: .LBB11_2:
+; RV32M-NEXT: .LBB11_3:
+; RV32M-NEXT: li a1, 0
+; RV32M-NEXT: li a0, 64
+; RV32M-NEXT: ret
+; RV32M-NEXT: .LBB11_4:
; RV32M-NEXT: srli a0, a1, 1
; RV32M-NEXT: or a0, a1, a0
; RV32M-NEXT: srli a1, a0, 2
diff --git a/llvm/test/CodeGen/RISCV/rv32xtheadbb.ll b/llvm/test/CodeGen/RISCV/rv32xtheadbb.ll
index 04a2f67c4942b..723437a610ff8 100644
--- a/llvm/test/CodeGen/RISCV/rv32xtheadbb.ll
+++ b/llvm/test/CodeGen/RISCV/rv32xtheadbb.ll
@@ -61,14 +61,17 @@ declare i64 @llvm.ctlz.i64(i64, i1)
define i64 @ctlz_i64(i64 %a) nounwind {
; RV32I-LABEL: ctlz_i64:
; RV32I: # %bb.0:
+; RV32I-NEXT: or a2, a0, a1
+; RV32I-NEXT: beqz a2, .LBB1_3
+; RV32I-NEXT: # %bb.1: # %cond.false
; RV32I-NEXT: lui a2, 349525
; RV32I-NEXT: lui a3, 209715
; RV32I-NEXT: lui a5, 61681
; RV32I-NEXT: addi a4, a2, 1365
; RV32I-NEXT: addi a3, a3, 819
; RV32I-NEXT: addi a2, a5, -241
-; RV32I-NEXT: bnez a1, .LBB1_2
-; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: bnez a1, .LBB1_4
+; RV32I-NEXT: # %bb.2: # %cond.false
; RV32I-NEXT: srli a1, a0, 1
; RV32I-NEXT: or a0, a0, a1
; RV32I-NEXT: srli a1, a0, 2
@@ -98,7 +101,11 @@ define i64 @ctlz_i64(i64 %a) nounwind {
; RV32I-NEXT: addi a0, a0, 32
; RV32I-NEXT: li a1, 0
; RV32I-NEXT: ret
-; RV32I-NEXT: .LBB1_2:
+; RV32I-NEXT: .LBB1_3:
+; RV32I-NEXT: li a1, 0
+; RV32I-NEXT: li a0, 64
+; RV32I-NEXT: ret
+; RV32I-NEXT: .LBB1_4:
; RV32I-NEXT: srli a0, a1, 1
; RV32I-NEXT: or a0, a1, a0
; RV32I-NEXT: srli a1, a0, 2
@@ -200,39 +207,42 @@ define i64 @cttz_i64(i64 %a) nounwind {
; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s2, a1
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: neg a0, a0
-; RV32I-NEXT: and a0, s0, a0
-; RV32I-NEXT: lui a1, 30667
-; RV32I-NEXT: addi s3, a1, 1329
-; RV32I-NEXT: mv a1, s3
+; RV32I-NEXT: mv s0, a1
+; RV32I-NEXT: or a1, a0, a1
+; RV32I-NEXT: beqz a1, .LBB3_3
+; RV32I-NEXT: # %bb.1: # %cond.false
+; RV32I-NEXT: neg a1, a0
+; RV32I-NEXT: and a1, a0, a1
+; RV32I-NEXT: lui a2, 30667
+; RV32I-NEXT: addi s2, a2, 1329
+; RV32I-NEXT: mv s4, a0
+; RV32I-NEXT: mv a0, a1
+; RV32I-NEXT: mv a1, s2
; RV32I-NEXT: call __mulsi3
; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: lui s4, %hi(.LCPI3_0)
-; RV32I-NEXT: addi s4, s4, %lo(.LCPI3_0)
-; RV32I-NEXT: neg a0, s2
-; RV32I-NEXT: and a0, s2, a0
-; RV32I-NEXT: mv a1, s3
+; RV32I-NEXT: lui s3, %hi(.LCPI3_0)
+; RV32I-NEXT: addi s3, s3, %lo(.LCPI3_0)
+; RV32I-NEXT: neg a0, s0
+; RV32I-NEXT: and a0, s0, a0
+; RV32I-NEXT: mv a1, s2
; RV32I-NEXT: call __mulsi3
-; RV32I-NEXT: bnez s2, .LBB3_3
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: li a0, 32
-; RV32I-NEXT: beqz s0, .LBB3_4
-; RV32I-NEXT: .LBB3_2:
-; RV32I-NEXT: srli s1, s1, 27
-; RV32I-NEXT: add s1, s4, s1
-; RV32I-NEXT: lbu a0, 0(s1)
-; RV32I-NEXT: j .LBB3_5
-; RV32I-NEXT: .LBB3_3:
+; RV32I-NEXT: bnez s4, .LBB3_4
+; RV32I-NEXT: # %bb.2: # %cond.false
; RV32I-NEXT: srli a0, a0, 27
-; RV32I-NEXT: add a0, s4, a0
+; RV32I-NEXT: add a0, s3, a0
; RV32I-NEXT: lbu a0, 0(a0)
-; RV32I-NEXT: bnez s0, .LBB3_2
-; RV32I-NEXT: .LBB3_4:
; RV32I-NEXT: addi ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/137197
More information about the llvm-commits
mailing list