[llvm] becd418 - [CGP] Despeculate ctlz/cttz with "illegal" integer types (#137197)

via llvm-commits llvm-commits at lists.llvm.org
Tue Apr 29 12:33:44 PDT 2025


Author: Sergei Barannikov
Date: 2025-04-29T22:33:40+03:00
New Revision: becd418626e1ebedde6d095f7bb020e554251b15

URL: https://github.com/llvm/llvm-project/commit/becd418626e1ebedde6d095f7bb020e554251b15
DIFF: https://github.com/llvm/llvm-project/commit/becd418626e1ebedde6d095f7bb020e554251b15.diff

LOG: [CGP] Despeculate ctlz/cttz with "illegal" integer types (#137197)

The code below the removed check looks generic enough to support
arbitrary integer widths. This change helps 32-bit targets avoid
expensive expansion/libcalls in the case of zero input.

Pull Request: https://github.com/llvm/llvm-project/pull/137197

Added: 
    

Modified: 
    llvm/lib/CodeGen/CodeGenPrepare.cpp
    llvm/test/CodeGen/ARM/cttz.ll
    llvm/test/CodeGen/RISCV/GlobalISel/rv32zbb.ll
    llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll
    llvm/test/CodeGen/RISCV/rv32xtheadbb.ll
    llvm/test/CodeGen/RISCV/rv32zbb.ll
    llvm/test/CodeGen/SPARC/ctlz.ll
    llvm/test/CodeGen/SPARC/cttz.ll
    llvm/test/CodeGen/X86/ctlo.ll
    llvm/test/CodeGen/X86/ctlz.ll
    llvm/test/CodeGen/X86/cttz.ll

Removed: 
    llvm/test/CodeGen/X86/lzcnt-cmp.ll


################################################################################
diff  --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index e8dc7752b23c0..f9dcb472ed1d2 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -2552,9 +2552,9 @@ static bool despeculateCountZeros(IntrinsicInst *CountZeros,
       (IntrinsicID == Intrinsic::ctlz && TLI->isCheapToSpeculateCtlz(Ty)))
     return false;
 
-  // Only handle legal scalar cases. Anything else requires too much work.
+  // Only handle scalar cases. Anything else requires too much work.
   unsigned SizeInBits = Ty->getScalarSizeInBits();
-  if (Ty->isVectorTy() || SizeInBits > DL->getLargestLegalIntTypeSizeInBits())
+  if (Ty->isVectorTy())
     return false;
 
   // Bail if the value is never zero.

diff  --git a/llvm/test/CodeGen/ARM/cttz.ll b/llvm/test/CodeGen/ARM/cttz.ll
index 76adc61c5971f..1146ad64ee709 100644
--- a/llvm/test/CodeGen/ARM/cttz.ll
+++ b/llvm/test/CodeGen/ARM/cttz.ll
@@ -221,43 +221,49 @@ define i64 @test_i64(i64 %a) {
 ;
 ; CHECK-6M-LABEL: test_i64:
 ; CHECK-6M:       @ %bb.0:
-; CHECK-6M-NEXT:    .save {r4, r5, r7, lr}
-; CHECK-6M-NEXT:    push {r4, r5, r7, lr}
+; CHECK-6M-NEXT:    .save {r4, r5, r6, lr}
+; CHECK-6M-NEXT:    push {r4, r5, r6, lr}
+; CHECK-6M-NEXT:    mov r3, r1
 ; CHECK-6M-NEXT:    mov r2, r0
-; CHECK-6M-NEXT:    ldr r5, .LCPI3_0
-; CHECK-6M-NEXT:    adr r3, .LCPI3_1
+; CHECK-6M-NEXT:    movs r1, #0
+; CHECK-6M-NEXT:    orrs r0, r3
+; CHECK-6M-NEXT:    beq .LBB3_6
+; CHECK-6M-NEXT:  @ %bb.1: @ %cond.false
+; CHECK-6M-NEXT:    ldr r6, .LCPI3_0
+; CHECK-6M-NEXT:    adr r4, .LCPI3_1
 ; CHECK-6M-NEXT:    movs r0, #32
-; CHECK-6M-NEXT:    cmp r1, #0
-; CHECK-6M-NEXT:    mov r4, r0
-; CHECK-6M-NEXT:    beq .LBB3_2
-; CHECK-6M-NEXT:  @ %bb.1:
-; CHECK-6M-NEXT:    rsbs r4, r1, #0
-; CHECK-6M-NEXT:    ands r4, r1
-; CHECK-6M-NEXT:    muls r4, r5, r4
-; CHECK-6M-NEXT:    lsrs r1, r4, #27
-; CHECK-6M-NEXT:    ldrb r4, [r3, r1]
-; CHECK-6M-NEXT:  .LBB3_2:
-; CHECK-6M-NEXT:    adds r4, #32
-; CHECK-6M-NEXT:    rsbs r1, r2, #0
-; CHECK-6M-NEXT:    ands r1, r2
-; CHECK-6M-NEXT:    muls r5, r1, r5
-; CHECK-6M-NEXT:    lsrs r1, r5, #27
+; CHECK-6M-NEXT:    cmp r3, #0
+; CHECK-6M-NEXT:    mov r5, r0
+; CHECK-6M-NEXT:    beq .LBB3_3
+; CHECK-6M-NEXT:  @ %bb.2: @ %cond.false
+; CHECK-6M-NEXT:    rsbs r5, r3, #0
+; CHECK-6M-NEXT:    ands r5, r3
+; CHECK-6M-NEXT:    muls r5, r6, r5
+; CHECK-6M-NEXT:    lsrs r3, r5, #27
+; CHECK-6M-NEXT:    ldrb r5, [r4, r3]
+; CHECK-6M-NEXT:  .LBB3_3: @ %cond.false
+; CHECK-6M-NEXT:    adds r5, #32
+; CHECK-6M-NEXT:    rsbs r3, r2, #0
+; CHECK-6M-NEXT:    ands r3, r2
+; CHECK-6M-NEXT:    muls r6, r3, r6
+; CHECK-6M-NEXT:    lsrs r3, r6, #27
 ; CHECK-6M-NEXT:    cmp r2, #0
-; CHECK-6M-NEXT:    bne .LBB3_5
-; CHECK-6M-NEXT:  @ %bb.3:
-; CHECK-6M-NEXT:    beq .LBB3_6
-; CHECK-6M-NEXT:  .LBB3_4:
-; CHECK-6M-NEXT:    movs r1, #0
-; CHECK-6M-NEXT:    pop {r4, r5, r7, pc}
-; CHECK-6M-NEXT:  .LBB3_5:
-; CHECK-6M-NEXT:    ldrb r0, [r3, r1]
-; CHECK-6M-NEXT:    bne .LBB3_4
+; CHECK-6M-NEXT:    bne .LBB3_7
+; CHECK-6M-NEXT:  @ %bb.4: @ %cond.false
+; CHECK-6M-NEXT:    beq .LBB3_8
+; CHECK-6M-NEXT:  .LBB3_5: @ %cond.end
+; CHECK-6M-NEXT:    pop {r4, r5, r6, pc}
 ; CHECK-6M-NEXT:  .LBB3_6:
-; CHECK-6M-NEXT:    mov r0, r4
-; CHECK-6M-NEXT:    movs r1, #0
-; CHECK-6M-NEXT:    pop {r4, r5, r7, pc}
+; CHECK-6M-NEXT:    movs r0, #64
+; CHECK-6M-NEXT:    pop {r4, r5, r6, pc}
+; CHECK-6M-NEXT:  .LBB3_7: @ %cond.false
+; CHECK-6M-NEXT:    ldrb r0, [r4, r3]
+; CHECK-6M-NEXT:    bne .LBB3_5
+; CHECK-6M-NEXT:  .LBB3_8: @ %cond.false
+; CHECK-6M-NEXT:    mov r0, r5
+; CHECK-6M-NEXT:    pop {r4, r5, r6, pc}
 ; CHECK-6M-NEXT:    .p2align 2
-; CHECK-6M-NEXT:  @ %bb.7:
+; CHECK-6M-NEXT:  @ %bb.9:
 ; CHECK-6M-NEXT:  .LCPI3_0:
 ; CHECK-6M-NEXT:    .long 125613361 @ 0x77cb531
 ; CHECK-6M-NEXT:  .LCPI3_1:
@@ -265,43 +271,49 @@ define i64 @test_i64(i64 %a) {
 ;
 ; CHECK-8MBASE-LABEL: test_i64:
 ; CHECK-8MBASE:       @ %bb.0:
-; CHECK-8MBASE-NEXT:    .save {r4, r5, r7, lr}
-; CHECK-8MBASE-NEXT:    push {r4, r5, r7, lr}
+; CHECK-8MBASE-NEXT:    .save {r4, r5, r6, lr}
+; CHECK-8MBASE-NEXT:    push {r4, r5, r6, lr}
+; CHECK-8MBASE-NEXT:    mov r3, r1
 ; CHECK-8MBASE-NEXT:    mov r2, r0
-; CHECK-8MBASE-NEXT:    movw r5, #46385
-; CHECK-8MBASE-NEXT:    movt r5, #1916
-; CHECK-8MBASE-NEXT:    adr r3, .LCPI3_0
+; CHECK-8MBASE-NEXT:    movs r1, #0
+; CHECK-8MBASE-NEXT:    orrs r0, r3
+; CHECK-8MBASE-NEXT:    beq .LBB3_6
+; CHECK-8MBASE-NEXT:  @ %bb.1: @ %cond.false
+; CHECK-8MBASE-NEXT:    movw r6, #46385
+; CHECK-8MBASE-NEXT:    movt r6, #1916
+; CHECK-8MBASE-NEXT:    adr r4, .LCPI3_0
 ; CHECK-8MBASE-NEXT:    movs r0, #32
-; CHECK-8MBASE-NEXT:    mov r4, r0
-; CHECK-8MBASE-NEXT:    cbz r1, .LBB3_2
-; CHECK-8MBASE-NEXT:  @ %bb.1:
-; CHECK-8MBASE-NEXT:    rsbs r4, r1, #0
-; CHECK-8MBASE-NEXT:    ands r4, r1
-; CHECK-8MBASE-NEXT:    muls r4, r5, r4
-; CHECK-8MBASE-NEXT:    lsrs r1, r4, #27
-; CHECK-8MBASE-NEXT:    ldrb r4, [r3, r1]
-; CHECK-8MBASE-NEXT:  .LBB3_2:
-; CHECK-8MBASE-NEXT:    adds r4, #32
-; CHECK-8MBASE-NEXT:    rsbs r1, r2, #0
-; CHECK-8MBASE-NEXT:    ands r1, r2
-; CHECK-8MBASE-NEXT:    muls r5, r1, r5
-; CHECK-8MBASE-NEXT:    lsrs r1, r5, #27
+; CHECK-8MBASE-NEXT:    mov r5, r0
+; CHECK-8MBASE-NEXT:    cbz r3, .LBB3_3
+; CHECK-8MBASE-NEXT:  @ %bb.2: @ %cond.false
+; CHECK-8MBASE-NEXT:    rsbs r5, r3, #0
+; CHECK-8MBASE-NEXT:    ands r5, r3
+; CHECK-8MBASE-NEXT:    muls r5, r6, r5
+; CHECK-8MBASE-NEXT:    lsrs r3, r5, #27
+; CHECK-8MBASE-NEXT:    ldrb r5, [r4, r3]
+; CHECK-8MBASE-NEXT:  .LBB3_3: @ %cond.false
+; CHECK-8MBASE-NEXT:    adds r5, #32
+; CHECK-8MBASE-NEXT:    rsbs r3, r2, #0
+; CHECK-8MBASE-NEXT:    ands r3, r2
+; CHECK-8MBASE-NEXT:    muls r6, r3, r6
+; CHECK-8MBASE-NEXT:    lsrs r3, r6, #27
 ; CHECK-8MBASE-NEXT:    cmp r2, #0
-; CHECK-8MBASE-NEXT:    bne .LBB3_5
-; CHECK-8MBASE-NEXT:  @ %bb.3:
-; CHECK-8MBASE-NEXT:    beq .LBB3_6
-; CHECK-8MBASE-NEXT:  .LBB3_4:
-; CHECK-8MBASE-NEXT:    movs r1, #0
-; CHECK-8MBASE-NEXT:    pop {r4, r5, r7, pc}
-; CHECK-8MBASE-NEXT:  .LBB3_5:
-; CHECK-8MBASE-NEXT:    ldrb r0, [r3, r1]
-; CHECK-8MBASE-NEXT:    bne .LBB3_4
+; CHECK-8MBASE-NEXT:    bne .LBB3_7
+; CHECK-8MBASE-NEXT:  @ %bb.4: @ %cond.false
+; CHECK-8MBASE-NEXT:    beq .LBB3_8
+; CHECK-8MBASE-NEXT:  .LBB3_5: @ %cond.end
+; CHECK-8MBASE-NEXT:    pop {r4, r5, r6, pc}
 ; CHECK-8MBASE-NEXT:  .LBB3_6:
-; CHECK-8MBASE-NEXT:    mov r0, r4
-; CHECK-8MBASE-NEXT:    movs r1, #0
-; CHECK-8MBASE-NEXT:    pop {r4, r5, r7, pc}
+; CHECK-8MBASE-NEXT:    movs r0, #64
+; CHECK-8MBASE-NEXT:    pop {r4, r5, r6, pc}
+; CHECK-8MBASE-NEXT:  .LBB3_7: @ %cond.false
+; CHECK-8MBASE-NEXT:    ldrb r0, [r4, r3]
+; CHECK-8MBASE-NEXT:    bne .LBB3_5
+; CHECK-8MBASE-NEXT:  .LBB3_8: @ %cond.false
+; CHECK-8MBASE-NEXT:    mov r0, r5
+; CHECK-8MBASE-NEXT:    pop {r4, r5, r6, pc}
 ; CHECK-8MBASE-NEXT:    .p2align 2
-; CHECK-8MBASE-NEXT:  @ %bb.7:
+; CHECK-8MBASE-NEXT:  @ %bb.9:
 ; CHECK-8MBASE-NEXT:  .LCPI3_0:
 ; CHECK-8MBASE-NEXT:    .ascii "\000\001\034\002\035\016\030\003\036\026\024\017\031\021\004\b\037\033\r\027\025\023\020\007\032\f\022\006\013\005\n\t"
   %tmp = call i64 @llvm.cttz.i64(i64 %a, i1 false)

diff  --git a/llvm/test/CodeGen/RISCV/GlobalISel/rv32zbb.ll b/llvm/test/CodeGen/RISCV/GlobalISel/rv32zbb.ll
index f9af74d6ec323..0632caecf8907 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/rv32zbb.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/rv32zbb.ll
@@ -62,6 +62,9 @@ declare i64 @llvm.ctlz.i64(i64, i1)
 define i64 @ctlz_i64(i64 %a) nounwind {
 ; RV32I-LABEL: ctlz_i64:
 ; RV32I:       # %bb.0:
+; RV32I-NEXT:    or a2, a0, a1
+; RV32I-NEXT:    beqz a2, .LBB1_3
+; RV32I-NEXT:  # %bb.1: # %cond.false
 ; RV32I-NEXT:    lui a2, 349525
 ; RV32I-NEXT:    lui a3, 209715
 ; RV32I-NEXT:    lui a6, 61681
@@ -69,8 +72,8 @@ define i64 @ctlz_i64(i64 %a) nounwind {
 ; RV32I-NEXT:    addi a4, a3, 819
 ; RV32I-NEXT:    addi a3, a6, -241
 ; RV32I-NEXT:    li a2, 32
-; RV32I-NEXT:    beqz a1, .LBB1_2
-; RV32I-NEXT:  # %bb.1:
+; RV32I-NEXT:    beqz a1, .LBB1_4
+; RV32I-NEXT:  # %bb.2: # %cond.false
 ; RV32I-NEXT:    srli a0, a1, 1
 ; RV32I-NEXT:    or a0, a1, a0
 ; RV32I-NEXT:    srli a1, a0, 2
@@ -99,7 +102,11 @@ define i64 @ctlz_i64(i64 %a) nounwind {
 ; RV32I-NEXT:    sub a0, a2, a0
 ; RV32I-NEXT:    li a1, 0
 ; RV32I-NEXT:    ret
-; RV32I-NEXT:  .LBB1_2:
+; RV32I-NEXT:  .LBB1_3:
+; RV32I-NEXT:    li a1, 0
+; RV32I-NEXT:    li a0, 64
+; RV32I-NEXT:    ret
+; RV32I-NEXT:  .LBB1_4:
 ; RV32I-NEXT:    srli a1, a0, 1
 ; RV32I-NEXT:    or a0, a0, a1
 ; RV32I-NEXT:    srli a1, a0, 2
@@ -195,14 +202,17 @@ declare i64 @llvm.cttz.i64(i64, i1)
 define i64 @cttz_i64(i64 %a) nounwind {
 ; RV32I-LABEL: cttz_i64:
 ; RV32I:       # %bb.0:
+; RV32I-NEXT:    or a2, a0, a1
+; RV32I-NEXT:    beqz a2, .LBB3_3
+; RV32I-NEXT:  # %bb.1: # %cond.false
 ; RV32I-NEXT:    lui a2, 349525
 ; RV32I-NEXT:    lui a3, 209715
 ; RV32I-NEXT:    lui a5, 61681
 ; RV32I-NEXT:    addi a4, a2, 1365
 ; RV32I-NEXT:    addi a3, a3, 819
 ; RV32I-NEXT:    addi a2, a5, -241
-; RV32I-NEXT:    beqz a0, .LBB3_2
-; RV32I-NEXT:  # %bb.1:
+; RV32I-NEXT:    beqz a0, .LBB3_4
+; RV32I-NEXT:  # %bb.2: # %cond.false
 ; RV32I-NEXT:    not a1, a0
 ; RV32I-NEXT:    addi a0, a0, -1
 ; RV32I-NEXT:    and a0, a1, a0
@@ -223,7 +233,11 @@ define i64 @cttz_i64(i64 %a) nounwind {
 ; RV32I-NEXT:    srli a0, a0, 24
 ; RV32I-NEXT:    li a1, 0
 ; RV32I-NEXT:    ret
-; RV32I-NEXT:  .LBB3_2:
+; RV32I-NEXT:  .LBB3_3:
+; RV32I-NEXT:    li a1, 0
+; RV32I-NEXT:    li a0, 64
+; RV32I-NEXT:    ret
+; RV32I-NEXT:  .LBB3_4:
 ; RV32I-NEXT:    not a0, a1
 ; RV32I-NEXT:    addi a1, a1, -1
 ; RV32I-NEXT:    and a0, a0, a1

diff  --git a/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll b/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll
index a46168f114bb9..3a7d31253b05d 100644
--- a/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll
+++ b/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll
@@ -374,39 +374,42 @@ define i64 @test_cttz_i64(i64 %a) nounwind {
 ; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    mv s2, a1
-; RV32I-NEXT:    mv s0, a0
-; RV32I-NEXT:    neg a0, a0
-; RV32I-NEXT:    and a0, s0, a0
-; RV32I-NEXT:    lui a1, 30667
-; RV32I-NEXT:    addi s3, a1, 1329
-; RV32I-NEXT:    mv a1, s3
+; RV32I-NEXT:    mv s0, a1
+; RV32I-NEXT:    or a1, a0, a1
+; RV32I-NEXT:    beqz a1, .LBB3_3
+; RV32I-NEXT:  # %bb.1: # %cond.false
+; RV32I-NEXT:    neg a1, a0
+; RV32I-NEXT:    and a1, a0, a1
+; RV32I-NEXT:    lui a2, 30667
+; RV32I-NEXT:    addi s2, a2, 1329
+; RV32I-NEXT:    mv s4, a0
+; RV32I-NEXT:    mv a0, a1
+; RV32I-NEXT:    mv a1, s2
 ; RV32I-NEXT:    call __mulsi3
 ; RV32I-NEXT:    mv s1, a0
-; RV32I-NEXT:    lui s4, %hi(.LCPI3_0)
-; RV32I-NEXT:    addi s4, s4, %lo(.LCPI3_0)
-; RV32I-NEXT:    neg a0, s2
-; RV32I-NEXT:    and a0, s2, a0
-; RV32I-NEXT:    mv a1, s3
+; RV32I-NEXT:    lui s3, %hi(.LCPI3_0)
+; RV32I-NEXT:    addi s3, s3, %lo(.LCPI3_0)
+; RV32I-NEXT:    neg a0, s0
+; RV32I-NEXT:    and a0, s0, a0
+; RV32I-NEXT:    mv a1, s2
 ; RV32I-NEXT:    call __mulsi3
-; RV32I-NEXT:    bnez s2, .LBB3_3
-; RV32I-NEXT:  # %bb.1:
-; RV32I-NEXT:    li a0, 32
-; RV32I-NEXT:    beqz s0, .LBB3_4
-; RV32I-NEXT:  .LBB3_2:
-; RV32I-NEXT:    srli s1, s1, 27
-; RV32I-NEXT:    add s1, s4, s1
-; RV32I-NEXT:    lbu a0, 0(s1)
-; RV32I-NEXT:    j .LBB3_5
-; RV32I-NEXT:  .LBB3_3:
+; RV32I-NEXT:    bnez s4, .LBB3_4
+; RV32I-NEXT:  # %bb.2: # %cond.false
 ; RV32I-NEXT:    srli a0, a0, 27
-; RV32I-NEXT:    add a0, s4, a0
+; RV32I-NEXT:    add a0, s3, a0
 ; RV32I-NEXT:    lbu a0, 0(a0)
-; RV32I-NEXT:    bnez s0, .LBB3_2
-; RV32I-NEXT:  .LBB3_4:
 ; RV32I-NEXT:    addi a0, a0, 32
-; RV32I-NEXT:  .LBB3_5:
+; RV32I-NEXT:    j .LBB3_5
+; RV32I-NEXT:  .LBB3_3:
+; RV32I-NEXT:    li a0, 64
+; RV32I-NEXT:    j .LBB3_6
+; RV32I-NEXT:  .LBB3_4:
+; RV32I-NEXT:    srli s1, s1, 27
+; RV32I-NEXT:    add s1, s3, s1
+; RV32I-NEXT:    lbu a0, 0(s1)
+; RV32I-NEXT:  .LBB3_5: # %cond.false
 ; RV32I-NEXT:    li a1, 0
+; RV32I-NEXT:  .LBB3_6: # %cond.end
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
@@ -441,33 +444,35 @@ define i64 @test_cttz_i64(i64 %a) nounwind {
 ;
 ; RV32M-LABEL: test_cttz_i64:
 ; RV32M:       # %bb.0:
+; RV32M-NEXT:    or a2, a0, a1
+; RV32M-NEXT:    beqz a2, .LBB3_3
+; RV32M-NEXT:  # %bb.1: # %cond.false
 ; RV32M-NEXT:    lui a2, 30667
 ; RV32M-NEXT:    addi a3, a2, 1329
 ; RV32M-NEXT:    lui a2, %hi(.LCPI3_0)
 ; RV32M-NEXT:    addi a2, a2, %lo(.LCPI3_0)
-; RV32M-NEXT:    bnez a1, .LBB3_3
-; RV32M-NEXT:  # %bb.1:
-; RV32M-NEXT:    li a1, 32
-; RV32M-NEXT:    beqz a0, .LBB3_4
-; RV32M-NEXT:  .LBB3_2:
-; RV32M-NEXT:    neg a1, a0
-; RV32M-NEXT:    and a0, a0, a1
+; RV32M-NEXT:    bnez a0, .LBB3_4
+; RV32M-NEXT:  # %bb.2: # %cond.false
+; RV32M-NEXT:    neg a0, a1
+; RV32M-NEXT:    and a0, a1, a0
 ; RV32M-NEXT:    mul a0, a0, a3
 ; RV32M-NEXT:    srli a0, a0, 27
 ; RV32M-NEXT:    add a0, a2, a0
 ; RV32M-NEXT:    lbu a0, 0(a0)
+; RV32M-NEXT:    addi a0, a0, 32
 ; RV32M-NEXT:    li a1, 0
 ; RV32M-NEXT:    ret
 ; RV32M-NEXT:  .LBB3_3:
-; RV32M-NEXT:    neg a4, a1
-; RV32M-NEXT:    and a1, a1, a4
-; RV32M-NEXT:    mul a1, a1, a3
-; RV32M-NEXT:    srli a1, a1, 27
-; RV32M-NEXT:    add a1, a2, a1
-; RV32M-NEXT:    lbu a1, 0(a1)
-; RV32M-NEXT:    bnez a0, .LBB3_2
+; RV32M-NEXT:    li a1, 0
+; RV32M-NEXT:    li a0, 64
+; RV32M-NEXT:    ret
 ; RV32M-NEXT:  .LBB3_4:
-; RV32M-NEXT:    addi a0, a1, 32
+; RV32M-NEXT:    neg a1, a0
+; RV32M-NEXT:    and a0, a0, a1
+; RV32M-NEXT:    mul a0, a0, a3
+; RV32M-NEXT:    srli a0, a0, 27
+; RV32M-NEXT:    add a0, a2, a0
+; RV32M-NEXT:    lbu a0, 0(a0)
 ; RV32M-NEXT:    li a1, 0
 ; RV32M-NEXT:    ret
 ;
@@ -510,21 +515,28 @@ define i64 @test_cttz_i64(i64 %a) nounwind {
 ;
 ; RV32XTHEADBB-LABEL: test_cttz_i64:
 ; RV32XTHEADBB:       # %bb.0:
-; RV32XTHEADBB-NEXT:    bnez a0, .LBB3_2
-; RV32XTHEADBB-NEXT:  # %bb.1:
+; RV32XTHEADBB-NEXT:    or a2, a0, a1
+; RV32XTHEADBB-NEXT:    beqz a2, .LBB3_3
+; RV32XTHEADBB-NEXT:  # %bb.1: # %cond.false
+; RV32XTHEADBB-NEXT:    bnez a0, .LBB3_4
+; RV32XTHEADBB-NEXT:  # %bb.2: # %cond.false
 ; RV32XTHEADBB-NEXT:    addi a0, a1, -1
 ; RV32XTHEADBB-NEXT:    not a1, a1
 ; RV32XTHEADBB-NEXT:    and a0, a1, a0
 ; RV32XTHEADBB-NEXT:    th.ff1 a0, a0
 ; RV32XTHEADBB-NEXT:    li a1, 64
-; RV32XTHEADBB-NEXT:    j .LBB3_3
-; RV32XTHEADBB-NEXT:  .LBB3_2:
+; RV32XTHEADBB-NEXT:    j .LBB3_5
+; RV32XTHEADBB-NEXT:  .LBB3_3:
+; RV32XTHEADBB-NEXT:    li a1, 0
+; RV32XTHEADBB-NEXT:    li a0, 64
+; RV32XTHEADBB-NEXT:    ret
+; RV32XTHEADBB-NEXT:  .LBB3_4:
 ; RV32XTHEADBB-NEXT:    addi a1, a0, -1
 ; RV32XTHEADBB-NEXT:    not a0, a0
 ; RV32XTHEADBB-NEXT:    and a0, a0, a1
 ; RV32XTHEADBB-NEXT:    th.ff1 a0, a0
 ; RV32XTHEADBB-NEXT:    li a1, 32
-; RV32XTHEADBB-NEXT:  .LBB3_3:
+; RV32XTHEADBB-NEXT:  .LBB3_5: # %cond.false
 ; RV32XTHEADBB-NEXT:    sub a0, a1, a0
 ; RV32XTHEADBB-NEXT:    li a1, 0
 ; RV32XTHEADBB-NEXT:    ret
@@ -1348,14 +1360,17 @@ define i32 @test_ctlz_i32(i32 %a) nounwind {
 define i64 @test_ctlz_i64(i64 %a) nounwind {
 ; RV32I-LABEL: test_ctlz_i64:
 ; RV32I:       # %bb.0:
+; RV32I-NEXT:    or a2, a0, a1
+; RV32I-NEXT:    beqz a2, .LBB11_3
+; RV32I-NEXT:  # %bb.1: # %cond.false
 ; RV32I-NEXT:    lui a2, 349525
 ; RV32I-NEXT:    lui a3, 209715
 ; RV32I-NEXT:    lui a5, 61681
 ; RV32I-NEXT:    addi a4, a2, 1365
 ; RV32I-NEXT:    addi a3, a3, 819
 ; RV32I-NEXT:    addi a2, a5, -241
-; RV32I-NEXT:    bnez a1, .LBB11_2
-; RV32I-NEXT:  # %bb.1:
+; RV32I-NEXT:    bnez a1, .LBB11_4
+; RV32I-NEXT:  # %bb.2: # %cond.false
 ; RV32I-NEXT:    srli a1, a0, 1
 ; RV32I-NEXT:    or a0, a0, a1
 ; RV32I-NEXT:    srli a1, a0, 2
@@ -1385,7 +1400,11 @@ define i64 @test_ctlz_i64(i64 %a) nounwind {
 ; RV32I-NEXT:    addi a0, a0, 32
 ; RV32I-NEXT:    li a1, 0
 ; RV32I-NEXT:    ret
-; RV32I-NEXT:  .LBB11_2:
+; RV32I-NEXT:  .LBB11_3:
+; RV32I-NEXT:    li a1, 0
+; RV32I-NEXT:    li a0, 64
+; RV32I-NEXT:    ret
+; RV32I-NEXT:  .LBB11_4:
 ; RV32I-NEXT:    srli a0, a1, 1
 ; RV32I-NEXT:    or a0, a1, a0
 ; RV32I-NEXT:    srli a1, a0, 2
@@ -1468,6 +1487,9 @@ define i64 @test_ctlz_i64(i64 %a) nounwind {
 ;
 ; RV32M-LABEL: test_ctlz_i64:
 ; RV32M:       # %bb.0:
+; RV32M-NEXT:    or a2, a0, a1
+; RV32M-NEXT:    beqz a2, .LBB11_3
+; RV32M-NEXT:  # %bb.1: # %cond.false
 ; RV32M-NEXT:    lui a2, 349525
 ; RV32M-NEXT:    lui a3, 209715
 ; RV32M-NEXT:    lui a6, 61681
@@ -1476,8 +1498,8 @@ define i64 @test_ctlz_i64(i64 %a) nounwind {
 ; RV32M-NEXT:    addi a4, a3, 819
 ; RV32M-NEXT:    addi a3, a6, -241
 ; RV32M-NEXT:    addi a2, a7, 257
-; RV32M-NEXT:    bnez a1, .LBB11_2
-; RV32M-NEXT:  # %bb.1:
+; RV32M-NEXT:    bnez a1, .LBB11_4
+; RV32M-NEXT:  # %bb.2: # %cond.false
 ; RV32M-NEXT:    srli a1, a0, 1
 ; RV32M-NEXT:    or a0, a0, a1
 ; RV32M-NEXT:    srli a1, a0, 2
@@ -1504,7 +1526,11 @@ define i64 @test_ctlz_i64(i64 %a) nounwind {
 ; RV32M-NEXT:    addi a0, a0, 32
 ; RV32M-NEXT:    li a1, 0
 ; RV32M-NEXT:    ret
-; RV32M-NEXT:  .LBB11_2:
+; RV32M-NEXT:  .LBB11_3:
+; RV32M-NEXT:    li a1, 0
+; RV32M-NEXT:    li a0, 64
+; RV32M-NEXT:    ret
+; RV32M-NEXT:  .LBB11_4:
 ; RV32M-NEXT:    srli a0, a1, 1
 ; RV32M-NEXT:    or a0, a1, a0
 ; RV32M-NEXT:    srli a1, a0, 2

diff  --git a/llvm/test/CodeGen/RISCV/rv32xtheadbb.ll b/llvm/test/CodeGen/RISCV/rv32xtheadbb.ll
index 04a2f67c4942b..723437a610ff8 100644
--- a/llvm/test/CodeGen/RISCV/rv32xtheadbb.ll
+++ b/llvm/test/CodeGen/RISCV/rv32xtheadbb.ll
@@ -61,14 +61,17 @@ declare i64 @llvm.ctlz.i64(i64, i1)
 define i64 @ctlz_i64(i64 %a) nounwind {
 ; RV32I-LABEL: ctlz_i64:
 ; RV32I:       # %bb.0:
+; RV32I-NEXT:    or a2, a0, a1
+; RV32I-NEXT:    beqz a2, .LBB1_3
+; RV32I-NEXT:  # %bb.1: # %cond.false
 ; RV32I-NEXT:    lui a2, 349525
 ; RV32I-NEXT:    lui a3, 209715
 ; RV32I-NEXT:    lui a5, 61681
 ; RV32I-NEXT:    addi a4, a2, 1365
 ; RV32I-NEXT:    addi a3, a3, 819
 ; RV32I-NEXT:    addi a2, a5, -241
-; RV32I-NEXT:    bnez a1, .LBB1_2
-; RV32I-NEXT:  # %bb.1:
+; RV32I-NEXT:    bnez a1, .LBB1_4
+; RV32I-NEXT:  # %bb.2: # %cond.false
 ; RV32I-NEXT:    srli a1, a0, 1
 ; RV32I-NEXT:    or a0, a0, a1
 ; RV32I-NEXT:    srli a1, a0, 2
@@ -98,7 +101,11 @@ define i64 @ctlz_i64(i64 %a) nounwind {
 ; RV32I-NEXT:    addi a0, a0, 32
 ; RV32I-NEXT:    li a1, 0
 ; RV32I-NEXT:    ret
-; RV32I-NEXT:  .LBB1_2:
+; RV32I-NEXT:  .LBB1_3:
+; RV32I-NEXT:    li a1, 0
+; RV32I-NEXT:    li a0, 64
+; RV32I-NEXT:    ret
+; RV32I-NEXT:  .LBB1_4:
 ; RV32I-NEXT:    srli a0, a1, 1
 ; RV32I-NEXT:    or a0, a1, a0
 ; RV32I-NEXT:    srli a1, a0, 2
@@ -200,39 +207,42 @@ define i64 @cttz_i64(i64 %a) nounwind {
 ; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    mv s2, a1
-; RV32I-NEXT:    mv s0, a0
-; RV32I-NEXT:    neg a0, a0
-; RV32I-NEXT:    and a0, s0, a0
-; RV32I-NEXT:    lui a1, 30667
-; RV32I-NEXT:    addi s3, a1, 1329
-; RV32I-NEXT:    mv a1, s3
+; RV32I-NEXT:    mv s0, a1
+; RV32I-NEXT:    or a1, a0, a1
+; RV32I-NEXT:    beqz a1, .LBB3_3
+; RV32I-NEXT:  # %bb.1: # %cond.false
+; RV32I-NEXT:    neg a1, a0
+; RV32I-NEXT:    and a1, a0, a1
+; RV32I-NEXT:    lui a2, 30667
+; RV32I-NEXT:    addi s2, a2, 1329
+; RV32I-NEXT:    mv s4, a0
+; RV32I-NEXT:    mv a0, a1
+; RV32I-NEXT:    mv a1, s2
 ; RV32I-NEXT:    call __mulsi3
 ; RV32I-NEXT:    mv s1, a0
-; RV32I-NEXT:    lui s4, %hi(.LCPI3_0)
-; RV32I-NEXT:    addi s4, s4, %lo(.LCPI3_0)
-; RV32I-NEXT:    neg a0, s2
-; RV32I-NEXT:    and a0, s2, a0
-; RV32I-NEXT:    mv a1, s3
+; RV32I-NEXT:    lui s3, %hi(.LCPI3_0)
+; RV32I-NEXT:    addi s3, s3, %lo(.LCPI3_0)
+; RV32I-NEXT:    neg a0, s0
+; RV32I-NEXT:    and a0, s0, a0
+; RV32I-NEXT:    mv a1, s2
 ; RV32I-NEXT:    call __mulsi3
-; RV32I-NEXT:    bnez s2, .LBB3_3
-; RV32I-NEXT:  # %bb.1:
-; RV32I-NEXT:    li a0, 32
-; RV32I-NEXT:    beqz s0, .LBB3_4
-; RV32I-NEXT:  .LBB3_2:
-; RV32I-NEXT:    srli s1, s1, 27
-; RV32I-NEXT:    add s1, s4, s1
-; RV32I-NEXT:    lbu a0, 0(s1)
-; RV32I-NEXT:    j .LBB3_5
-; RV32I-NEXT:  .LBB3_3:
+; RV32I-NEXT:    bnez s4, .LBB3_4
+; RV32I-NEXT:  # %bb.2: # %cond.false
 ; RV32I-NEXT:    srli a0, a0, 27
-; RV32I-NEXT:    add a0, s4, a0
+; RV32I-NEXT:    add a0, s3, a0
 ; RV32I-NEXT:    lbu a0, 0(a0)
-; RV32I-NEXT:    bnez s0, .LBB3_2
-; RV32I-NEXT:  .LBB3_4:
 ; RV32I-NEXT:    addi a0, a0, 32
-; RV32I-NEXT:  .LBB3_5:
+; RV32I-NEXT:    j .LBB3_5
+; RV32I-NEXT:  .LBB3_3:
+; RV32I-NEXT:    li a0, 64
+; RV32I-NEXT:    j .LBB3_6
+; RV32I-NEXT:  .LBB3_4:
+; RV32I-NEXT:    srli s1, s1, 27
+; RV32I-NEXT:    add s1, s3, s1
+; RV32I-NEXT:    lbu a0, 0(s1)
+; RV32I-NEXT:  .LBB3_5: # %cond.false
 ; RV32I-NEXT:    li a1, 0
+; RV32I-NEXT:  .LBB3_6: # %cond.end
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
@@ -244,21 +254,28 @@ define i64 @cttz_i64(i64 %a) nounwind {
 ;
 ; RV32XTHEADBB-LABEL: cttz_i64:
 ; RV32XTHEADBB:       # %bb.0:
-; RV32XTHEADBB-NEXT:    bnez a0, .LBB3_2
-; RV32XTHEADBB-NEXT:  # %bb.1:
+; RV32XTHEADBB-NEXT:    or a2, a0, a1
+; RV32XTHEADBB-NEXT:    beqz a2, .LBB3_3
+; RV32XTHEADBB-NEXT:  # %bb.1: # %cond.false
+; RV32XTHEADBB-NEXT:    bnez a0, .LBB3_4
+; RV32XTHEADBB-NEXT:  # %bb.2: # %cond.false
 ; RV32XTHEADBB-NEXT:    addi a0, a1, -1
 ; RV32XTHEADBB-NEXT:    not a1, a1
 ; RV32XTHEADBB-NEXT:    and a0, a1, a0
 ; RV32XTHEADBB-NEXT:    th.ff1 a0, a0
 ; RV32XTHEADBB-NEXT:    li a1, 64
-; RV32XTHEADBB-NEXT:    j .LBB3_3
-; RV32XTHEADBB-NEXT:  .LBB3_2:
+; RV32XTHEADBB-NEXT:    j .LBB3_5
+; RV32XTHEADBB-NEXT:  .LBB3_3:
+; RV32XTHEADBB-NEXT:    li a1, 0
+; RV32XTHEADBB-NEXT:    li a0, 64
+; RV32XTHEADBB-NEXT:    ret
+; RV32XTHEADBB-NEXT:  .LBB3_4:
 ; RV32XTHEADBB-NEXT:    addi a1, a0, -1
 ; RV32XTHEADBB-NEXT:    not a0, a0
 ; RV32XTHEADBB-NEXT:    and a0, a0, a1
 ; RV32XTHEADBB-NEXT:    th.ff1 a0, a0
 ; RV32XTHEADBB-NEXT:    li a1, 32
-; RV32XTHEADBB-NEXT:  .LBB3_3:
+; RV32XTHEADBB-NEXT:  .LBB3_5: # %cond.false
 ; RV32XTHEADBB-NEXT:    sub a0, a1, a0
 ; RV32XTHEADBB-NEXT:    li a1, 0
 ; RV32XTHEADBB-NEXT:    ret

diff  --git a/llvm/test/CodeGen/RISCV/rv32zbb.ll b/llvm/test/CodeGen/RISCV/rv32zbb.ll
index 98c86da41afa1..0f2284637ca6a 100644
--- a/llvm/test/CodeGen/RISCV/rv32zbb.ll
+++ b/llvm/test/CodeGen/RISCV/rv32zbb.ll
@@ -61,14 +61,17 @@ declare i64 @llvm.ctlz.i64(i64, i1)
 define i64 @ctlz_i64(i64 %a) nounwind {
 ; RV32I-LABEL: ctlz_i64:
 ; RV32I:       # %bb.0:
+; RV32I-NEXT:    or a2, a0, a1
+; RV32I-NEXT:    beqz a2, .LBB1_3
+; RV32I-NEXT:  # %bb.1: # %cond.false
 ; RV32I-NEXT:    lui a2, 349525
 ; RV32I-NEXT:    lui a3, 209715
 ; RV32I-NEXT:    lui a5, 61681
 ; RV32I-NEXT:    addi a4, a2, 1365
 ; RV32I-NEXT:    addi a3, a3, 819
 ; RV32I-NEXT:    addi a2, a5, -241
-; RV32I-NEXT:    bnez a1, .LBB1_2
-; RV32I-NEXT:  # %bb.1:
+; RV32I-NEXT:    bnez a1, .LBB1_4
+; RV32I-NEXT:  # %bb.2: # %cond.false
 ; RV32I-NEXT:    srli a1, a0, 1
 ; RV32I-NEXT:    or a0, a0, a1
 ; RV32I-NEXT:    srli a1, a0, 2
@@ -98,7 +101,11 @@ define i64 @ctlz_i64(i64 %a) nounwind {
 ; RV32I-NEXT:    addi a0, a0, 32
 ; RV32I-NEXT:    li a1, 0
 ; RV32I-NEXT:    ret
-; RV32I-NEXT:  .LBB1_2:
+; RV32I-NEXT:  .LBB1_3:
+; RV32I-NEXT:    li a1, 0
+; RV32I-NEXT:    li a0, 64
+; RV32I-NEXT:    ret
+; RV32I-NEXT:  .LBB1_4:
 ; RV32I-NEXT:    srli a0, a1, 1
 ; RV32I-NEXT:    or a0, a1, a0
 ; RV32I-NEXT:    srli a1, a0, 2
@@ -190,39 +197,42 @@ define i64 @cttz_i64(i64 %a) nounwind {
 ; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    mv s2, a1
-; RV32I-NEXT:    mv s0, a0
-; RV32I-NEXT:    neg a0, a0
-; RV32I-NEXT:    and a0, s0, a0
-; RV32I-NEXT:    lui a1, 30667
-; RV32I-NEXT:    addi s3, a1, 1329
-; RV32I-NEXT:    mv a1, s3
+; RV32I-NEXT:    mv s0, a1
+; RV32I-NEXT:    or a1, a0, a1
+; RV32I-NEXT:    beqz a1, .LBB3_3
+; RV32I-NEXT:  # %bb.1: # %cond.false
+; RV32I-NEXT:    neg a1, a0
+; RV32I-NEXT:    and a1, a0, a1
+; RV32I-NEXT:    lui a2, 30667
+; RV32I-NEXT:    addi s2, a2, 1329
+; RV32I-NEXT:    mv s4, a0
+; RV32I-NEXT:    mv a0, a1
+; RV32I-NEXT:    mv a1, s2
 ; RV32I-NEXT:    call __mulsi3
 ; RV32I-NEXT:    mv s1, a0
-; RV32I-NEXT:    lui s4, %hi(.LCPI3_0)
-; RV32I-NEXT:    addi s4, s4, %lo(.LCPI3_0)
-; RV32I-NEXT:    neg a0, s2
-; RV32I-NEXT:    and a0, s2, a0
-; RV32I-NEXT:    mv a1, s3
+; RV32I-NEXT:    lui s3, %hi(.LCPI3_0)
+; RV32I-NEXT:    addi s3, s3, %lo(.LCPI3_0)
+; RV32I-NEXT:    neg a0, s0
+; RV32I-NEXT:    and a0, s0, a0
+; RV32I-NEXT:    mv a1, s2
 ; RV32I-NEXT:    call __mulsi3
-; RV32I-NEXT:    bnez s2, .LBB3_3
-; RV32I-NEXT:  # %bb.1:
-; RV32I-NEXT:    li a0, 32
-; RV32I-NEXT:    beqz s0, .LBB3_4
-; RV32I-NEXT:  .LBB3_2:
-; RV32I-NEXT:    srli s1, s1, 27
-; RV32I-NEXT:    add s1, s4, s1
-; RV32I-NEXT:    lbu a0, 0(s1)
-; RV32I-NEXT:    j .LBB3_5
-; RV32I-NEXT:  .LBB3_3:
+; RV32I-NEXT:    bnez s4, .LBB3_4
+; RV32I-NEXT:  # %bb.2: # %cond.false
 ; RV32I-NEXT:    srli a0, a0, 27
-; RV32I-NEXT:    add a0, s4, a0
+; RV32I-NEXT:    add a0, s3, a0
 ; RV32I-NEXT:    lbu a0, 0(a0)
-; RV32I-NEXT:    bnez s0, .LBB3_2
-; RV32I-NEXT:  .LBB3_4:
 ; RV32I-NEXT:    addi a0, a0, 32
-; RV32I-NEXT:  .LBB3_5:
+; RV32I-NEXT:    j .LBB3_5
+; RV32I-NEXT:  .LBB3_3:
+; RV32I-NEXT:    li a0, 64
+; RV32I-NEXT:    j .LBB3_6
+; RV32I-NEXT:  .LBB3_4:
+; RV32I-NEXT:    srli s1, s1, 27
+; RV32I-NEXT:    add s1, s3, s1
+; RV32I-NEXT:    lbu a0, 0(s1)
+; RV32I-NEXT:  .LBB3_5: # %cond.false
 ; RV32I-NEXT:    li a1, 0
+; RV32I-NEXT:  .LBB3_6: # %cond.end
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload

diff  --git a/llvm/test/CodeGen/SPARC/ctlz.ll b/llvm/test/CodeGen/SPARC/ctlz.ll
index 72505f221469e..75930190f5166 100644
--- a/llvm/test/CodeGen/SPARC/ctlz.ll
+++ b/llvm/test/CodeGen/SPARC/ctlz.ll
@@ -156,96 +156,54 @@ define i64 @i64_nopoison(i64 %x) nounwind {
 ; SPARC-LABEL: i64_nopoison:
 ; SPARC:       ! %bb.0:
 ; SPARC-NEXT:    save %sp, -96, %sp
+; SPARC-NEXT:    or %i1, %i0, %i2
+; SPARC-NEXT:    cmp %i2, 0
+; SPARC-NEXT:    be .LBB2_4
+; SPARC-NEXT:    nop
+; SPARC-NEXT:  ! %bb.1: ! %cond.false
+; SPARC-NEXT:    call __clzsi2
+; SPARC-NEXT:    mov %i1, %o0
+; SPARC-NEXT:    mov %o0, %i2
 ; SPARC-NEXT:    call __clzsi2
 ; SPARC-NEXT:    mov %i0, %o0
 ; SPARC-NEXT:    cmp %i0, 0
-; SPARC-NEXT:    bne .LBB2_2
-; SPARC-NEXT:    nop
-; SPARC-NEXT:  ! %bb.1:
-; SPARC-NEXT:    srl %i1, 1, %i0
-; SPARC-NEXT:    or %i1, %i0, %i0
-; SPARC-NEXT:    srl %i0, 2, %i1
-; SPARC-NEXT:    or %i0, %i1, %i0
-; SPARC-NEXT:    srl %i0, 4, %i1
-; SPARC-NEXT:    or %i0, %i1, %i0
-; SPARC-NEXT:    srl %i0, 8, %i1
-; SPARC-NEXT:    or %i0, %i1, %i0
-; SPARC-NEXT:    srl %i0, 16, %i1
-; SPARC-NEXT:    or %i0, %i1, %i0
-; SPARC-NEXT:    xor %i0, -1, %i0
-; SPARC-NEXT:    srl %i0, 1, %i1
-; SPARC-NEXT:    sethi 1398101, %i2
-; SPARC-NEXT:    or %i2, 341, %i2
-; SPARC-NEXT:    and %i1, %i2, %i1
-; SPARC-NEXT:    sub %i0, %i1, %i0
-; SPARC-NEXT:    sethi 838860, %i1
-; SPARC-NEXT:    or %i1, 819, %i1
-; SPARC-NEXT:    and %i0, %i1, %i2
-; SPARC-NEXT:    srl %i0, 2, %i0
-; SPARC-NEXT:    and %i0, %i1, %i0
-; SPARC-NEXT:    add %i2, %i0, %i0
-; SPARC-NEXT:    srl %i0, 4, %i1
-; SPARC-NEXT:    add %i0, %i1, %i0
-; SPARC-NEXT:    sethi 246723, %i1
-; SPARC-NEXT:    or %i1, 783, %i1
-; SPARC-NEXT:    and %i0, %i1, %i0
-; SPARC-NEXT:    sll %i0, 8, %i1
-; SPARC-NEXT:    add %i0, %i1, %i0
-; SPARC-NEXT:    sll %i0, 16, %i1
-; SPARC-NEXT:    add %i0, %i1, %i0
-; SPARC-NEXT:    srl %i0, 24, %i0
-; SPARC-NEXT:    add %i0, 32, %o0
-; SPARC-NEXT:  .LBB2_2:
+; SPARC-NEXT:    bne .LBB2_3
+; SPARC-NEXT:    mov %o0, %i1
+; SPARC-NEXT:  ! %bb.2: ! %cond.false
+; SPARC-NEXT:    add %i2, 32, %i1
+; SPARC-NEXT:  .LBB2_3: ! %cond.false
+; SPARC-NEXT:    ret
+; SPARC-NEXT:    restore %g0, %g0, %o0
+; SPARC-NEXT:  .LBB2_4:
 ; SPARC-NEXT:    mov %g0, %i0
 ; SPARC-NEXT:    ret
-; SPARC-NEXT:    restore %g0, %o0, %o1
+; SPARC-NEXT:    restore %g0, 64, %o1
 ;
 ; SPARC-POPC-LABEL: i64_nopoison:
 ; SPARC-POPC:       ! %bb.0:
 ; SPARC-POPC-NEXT:    save %sp, -96, %sp
+; SPARC-POPC-NEXT:    or %i1, %i0, %i2
+; SPARC-POPC-NEXT:    cmp %i2, 0
+; SPARC-POPC-NEXT:    be .LBB2_4
+; SPARC-POPC-NEXT:    nop
+; SPARC-POPC-NEXT:  ! %bb.1: ! %cond.false
+; SPARC-POPC-NEXT:    call __clzsi2
+; SPARC-POPC-NEXT:    mov %i1, %o0
+; SPARC-POPC-NEXT:    mov %o0, %i2
 ; SPARC-POPC-NEXT:    call __clzsi2
 ; SPARC-POPC-NEXT:    mov %i0, %o0
 ; SPARC-POPC-NEXT:    cmp %i0, 0
-; SPARC-POPC-NEXT:    bne .LBB2_2
-; SPARC-POPC-NEXT:    nop
-; SPARC-POPC-NEXT:  ! %bb.1:
-; SPARC-POPC-NEXT:    srl %i1, 1, %i0
-; SPARC-POPC-NEXT:    or %i1, %i0, %i0
-; SPARC-POPC-NEXT:    srl %i0, 2, %i1
-; SPARC-POPC-NEXT:    or %i0, %i1, %i0
-; SPARC-POPC-NEXT:    srl %i0, 4, %i1
-; SPARC-POPC-NEXT:    or %i0, %i1, %i0
-; SPARC-POPC-NEXT:    srl %i0, 8, %i1
-; SPARC-POPC-NEXT:    or %i0, %i1, %i0
-; SPARC-POPC-NEXT:    srl %i0, 16, %i1
-; SPARC-POPC-NEXT:    or %i0, %i1, %i0
-; SPARC-POPC-NEXT:    xor %i0, -1, %i0
-; SPARC-POPC-NEXT:    srl %i0, 1, %i1
-; SPARC-POPC-NEXT:    sethi 1398101, %i2
-; SPARC-POPC-NEXT:    or %i2, 341, %i2
-; SPARC-POPC-NEXT:    and %i1, %i2, %i1
-; SPARC-POPC-NEXT:    sub %i0, %i1, %i0
-; SPARC-POPC-NEXT:    sethi 838860, %i1
-; SPARC-POPC-NEXT:    or %i1, 819, %i1
-; SPARC-POPC-NEXT:    and %i0, %i1, %i2
-; SPARC-POPC-NEXT:    srl %i0, 2, %i0
-; SPARC-POPC-NEXT:    and %i0, %i1, %i0
-; SPARC-POPC-NEXT:    add %i2, %i0, %i0
-; SPARC-POPC-NEXT:    srl %i0, 4, %i1
-; SPARC-POPC-NEXT:    add %i0, %i1, %i0
-; SPARC-POPC-NEXT:    sethi 246723, %i1
-; SPARC-POPC-NEXT:    or %i1, 783, %i1
-; SPARC-POPC-NEXT:    and %i0, %i1, %i0
-; SPARC-POPC-NEXT:    sll %i0, 8, %i1
-; SPARC-POPC-NEXT:    add %i0, %i1, %i0
-; SPARC-POPC-NEXT:    sll %i0, 16, %i1
-; SPARC-POPC-NEXT:    add %i0, %i1, %i0
-; SPARC-POPC-NEXT:    srl %i0, 24, %i0
-; SPARC-POPC-NEXT:    add %i0, 32, %o0
-; SPARC-POPC-NEXT:  .LBB2_2:
+; SPARC-POPC-NEXT:    bne .LBB2_3
+; SPARC-POPC-NEXT:    mov %o0, %i1
+; SPARC-POPC-NEXT:  ! %bb.2: ! %cond.false
+; SPARC-POPC-NEXT:    add %i2, 32, %i1
+; SPARC-POPC-NEXT:  .LBB2_3: ! %cond.false
+; SPARC-POPC-NEXT:    ret
+; SPARC-POPC-NEXT:    restore %g0, %g0, %o0
+; SPARC-POPC-NEXT:  .LBB2_4:
 ; SPARC-POPC-NEXT:    mov %g0, %i0
 ; SPARC-POPC-NEXT:    ret
-; SPARC-POPC-NEXT:    restore %g0, %o0, %o1
+; SPARC-POPC-NEXT:    restore %g0, 64, %o1
 ;
 ; SPARC-VIS3-LABEL: i64_nopoison:
 ; SPARC-VIS3:       ! %bb.0:

diff  --git a/llvm/test/CodeGen/SPARC/cttz.ll b/llvm/test/CodeGen/SPARC/cttz.ll
index 05c47b868c830..edabd7d560eda 100644
--- a/llvm/test/CodeGen/SPARC/cttz.ll
+++ b/llvm/test/CodeGen/SPARC/cttz.ll
@@ -184,21 +184,31 @@ define i32 @i32_poison(i32 %x) nounwind {
 define i64 @i64_nopoison(i64 %x) nounwind {
 ; SPARC-LABEL: i64_nopoison:
 ; SPARC:       ! %bb.0:
+; SPARC-NEXT:    or %o1, %o0, %o2
+; SPARC-NEXT:    cmp %o2, 0
+; SPARC-NEXT:    be .LBB2_3
+; SPARC-NEXT:    nop
+; SPARC-NEXT:  ! %bb.1: ! %cond.false
 ; SPARC-NEXT:    sethi 122669, %o2
 ; SPARC-NEXT:    or %o2, 305, %o2
 ; SPARC-NEXT:    sethi %hi(.LCPI2_0), %o3
-; SPARC-NEXT:    cmp %o0, 0
-; SPARC-NEXT:    be .LBB2_3
+; SPARC-NEXT:    cmp %o1, 0
+; SPARC-NEXT:    bne .LBB2_4
 ; SPARC-NEXT:    add %o3, %lo(.LCPI2_0), %o3
-; SPARC-NEXT:  ! %bb.1:
-; SPARC-NEXT:    sub %g0, %o0, %o4
-; SPARC-NEXT:    and %o0, %o4, %o0
+; SPARC-NEXT:  ! %bb.2: ! %cond.false
+; SPARC-NEXT:    sub %g0, %o0, %o1
+; SPARC-NEXT:    and %o0, %o1, %o0
 ; SPARC-NEXT:    smul %o0, %o2, %o0
 ; SPARC-NEXT:    srl %o0, 27, %o0
-; SPARC-NEXT:    cmp %o1, 0
-; SPARC-NEXT:    be .LBB2_4
 ; SPARC-NEXT:    ldub [%o3+%o0], %o0
-; SPARC-NEXT:  .LBB2_2:
+; SPARC-NEXT:    add %o0, 32, %o1
+; SPARC-NEXT:    retl
+; SPARC-NEXT:    mov %g0, %o0
+; SPARC-NEXT:  .LBB2_3:
+; SPARC-NEXT:    mov %g0, %o0
+; SPARC-NEXT:    retl
+; SPARC-NEXT:    mov 64, %o1
+; SPARC-NEXT:  .LBB2_4:
 ; SPARC-NEXT:    sub %g0, %o1, %o0
 ; SPARC-NEXT:    and %o1, %o0, %o0
 ; SPARC-NEXT:    smul %o0, %o2, %o0
@@ -206,33 +216,34 @@ define i64 @i64_nopoison(i64 %x) nounwind {
 ; SPARC-NEXT:    ldub [%o3+%o0], %o1
 ; SPARC-NEXT:    retl
 ; SPARC-NEXT:    mov %g0, %o0
-; SPARC-NEXT:  .LBB2_3:
-; SPARC-NEXT:    mov 32, %o0
-; SPARC-NEXT:    cmp %o1, 0
-; SPARC-NEXT:    bne .LBB2_2
-; SPARC-NEXT:    nop
-; SPARC-NEXT:  .LBB2_4:
-; SPARC-NEXT:    add %o0, 32, %o1
-; SPARC-NEXT:    retl
-; SPARC-NEXT:    mov %g0, %o0
 ;
 ; SPARC-POPC-LABEL: i64_nopoison:
 ; SPARC-POPC:       ! %bb.0:
+; SPARC-POPC-NEXT:    or %o1, %o0, %o2
+; SPARC-POPC-NEXT:    cmp %o2, 0
+; SPARC-POPC-NEXT:    be .LBB2_3
+; SPARC-POPC-NEXT:    nop
+; SPARC-POPC-NEXT:  ! %bb.1: ! %cond.false
 ; SPARC-POPC-NEXT:    sethi 122669, %o2
 ; SPARC-POPC-NEXT:    or %o2, 305, %o2
 ; SPARC-POPC-NEXT:    sethi %hi(.LCPI2_0), %o3
-; SPARC-POPC-NEXT:    cmp %o0, 0
-; SPARC-POPC-NEXT:    be .LBB2_3
+; SPARC-POPC-NEXT:    cmp %o1, 0
+; SPARC-POPC-NEXT:    bne .LBB2_4
 ; SPARC-POPC-NEXT:    add %o3, %lo(.LCPI2_0), %o3
-; SPARC-POPC-NEXT:  ! %bb.1:
-; SPARC-POPC-NEXT:    sub %g0, %o0, %o4
-; SPARC-POPC-NEXT:    and %o0, %o4, %o0
+; SPARC-POPC-NEXT:  ! %bb.2: ! %cond.false
+; SPARC-POPC-NEXT:    sub %g0, %o0, %o1
+; SPARC-POPC-NEXT:    and %o0, %o1, %o0
 ; SPARC-POPC-NEXT:    smul %o0, %o2, %o0
 ; SPARC-POPC-NEXT:    srl %o0, 27, %o0
-; SPARC-POPC-NEXT:    cmp %o1, 0
-; SPARC-POPC-NEXT:    be .LBB2_4
 ; SPARC-POPC-NEXT:    ldub [%o3+%o0], %o0
-; SPARC-POPC-NEXT:  .LBB2_2:
+; SPARC-POPC-NEXT:    add %o0, 32, %o1
+; SPARC-POPC-NEXT:    retl
+; SPARC-POPC-NEXT:    mov %g0, %o0
+; SPARC-POPC-NEXT:  .LBB2_3:
+; SPARC-POPC-NEXT:    mov %g0, %o0
+; SPARC-POPC-NEXT:    retl
+; SPARC-POPC-NEXT:    mov 64, %o1
+; SPARC-POPC-NEXT:  .LBB2_4:
 ; SPARC-POPC-NEXT:    sub %g0, %o1, %o0
 ; SPARC-POPC-NEXT:    and %o1, %o0, %o0
 ; SPARC-POPC-NEXT:    smul %o0, %o2, %o0
@@ -240,15 +251,6 @@ define i64 @i64_nopoison(i64 %x) nounwind {
 ; SPARC-POPC-NEXT:    ldub [%o3+%o0], %o1
 ; SPARC-POPC-NEXT:    retl
 ; SPARC-POPC-NEXT:    mov %g0, %o0
-; SPARC-POPC-NEXT:  .LBB2_3:
-; SPARC-POPC-NEXT:    mov 32, %o0
-; SPARC-POPC-NEXT:    cmp %o1, 0
-; SPARC-POPC-NEXT:    bne .LBB2_2
-; SPARC-POPC-NEXT:    nop
-; SPARC-POPC-NEXT:  .LBB2_4:
-; SPARC-POPC-NEXT:    add %o0, 32, %o1
-; SPARC-POPC-NEXT:    retl
-; SPARC-POPC-NEXT:    mov %g0, %o0
 ;
 ; SPARC-VIS3-LABEL: i64_nopoison:
 ; SPARC-VIS3:       ! %bb.0:

diff  --git a/llvm/test/CodeGen/X86/ctlo.ll b/llvm/test/CodeGen/X86/ctlo.ll
index fecb62fbc5aea..752f6659948e6 100644
--- a/llvm/test/CodeGen/X86/ctlo.ll
+++ b/llvm/test/CodeGen/X86/ctlo.ll
@@ -285,30 +285,35 @@ define i32 @ctlo_i32_undef(i32 %x) {
   ret i32 %tmp2
 }
 
-define i64 @ctlo_i64(i64 %x) {
+define i64 @ctlo_i64(i64 %x) nounwind {
 ; X86-NOCMOV-LABEL: ctlo_i64:
 ; X86-NOCMOV:       # %bb.0:
+; X86-NOCMOV-NEXT:    pushl %esi
 ; X86-NOCMOV-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NOCMOV-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-NOCMOV-NEXT:    notl %ecx
 ; X86-NOCMOV-NEXT:    notl %eax
-; X86-NOCMOV-NEXT:    bsrl %eax, %edx
-; X86-NOCMOV-NEXT:    movl $63, %eax
-; X86-NOCMOV-NEXT:    je .LBB6_2
-; X86-NOCMOV-NEXT:  # %bb.1:
-; X86-NOCMOV-NEXT:    movl %edx, %eax
-; X86-NOCMOV-NEXT:  .LBB6_2:
+; X86-NOCMOV-NEXT:    xorl %edx, %edx
+; X86-NOCMOV-NEXT:    movl %eax, %esi
+; X86-NOCMOV-NEXT:    orl %ecx, %esi
+; X86-NOCMOV-NEXT:    je .LBB6_1
+; X86-NOCMOV-NEXT:  # %bb.2: # %cond.false
 ; X86-NOCMOV-NEXT:    testl %ecx, %ecx
 ; X86-NOCMOV-NEXT:    jne .LBB6_3
-; X86-NOCMOV-NEXT:  # %bb.4:
+; X86-NOCMOV-NEXT:  # %bb.4: # %cond.false
+; X86-NOCMOV-NEXT:    bsrl %eax, %eax
 ; X86-NOCMOV-NEXT:    xorl $31, %eax
-; X86-NOCMOV-NEXT:    addl $32, %eax
-; X86-NOCMOV-NEXT:    xorl %edx, %edx
+; X86-NOCMOV-NEXT:    orl $32, %eax
+; X86-NOCMOV-NEXT:    popl %esi
+; X86-NOCMOV-NEXT:    retl
+; X86-NOCMOV-NEXT:  .LBB6_1:
+; X86-NOCMOV-NEXT:    movl $64, %eax
+; X86-NOCMOV-NEXT:    popl %esi
 ; X86-NOCMOV-NEXT:    retl
 ; X86-NOCMOV-NEXT:  .LBB6_3:
 ; X86-NOCMOV-NEXT:    bsrl %ecx, %eax
 ; X86-NOCMOV-NEXT:    xorl $31, %eax
-; X86-NOCMOV-NEXT:    xorl %edx, %edx
+; X86-NOCMOV-NEXT:    popl %esi
 ; X86-NOCMOV-NEXT:    retl
 ;
 ; X86-CMOV-LABEL: ctlo_i64:

diff  --git a/llvm/test/CodeGen/X86/ctlz.ll b/llvm/test/CodeGen/X86/ctlz.ll
index 0eabfeae853f7..1267fe9033454 100644
--- a/llvm/test/CodeGen/X86/ctlz.ll
+++ b/llvm/test/CodeGen/X86/ctlz.ll
@@ -399,27 +399,33 @@ define i32 @ctlz_i32_zero_test(i32 %n) {
 }
 
 ; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
-define i64 @ctlz_i64_zero_test(i64 %n) {
+define i64 @ctlz_i64_zero_test(i64 %n) nounwind {
 ; X86-NOCMOV-LABEL: ctlz_i64_zero_test:
 ; X86-NOCMOV:       # %bb.0:
+; X86-NOCMOV-NEXT:    pushl %esi
+; X86-NOCMOV-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NOCMOV-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NOCMOV-NEXT:    bsrl {{[0-9]+}}(%esp), %edx
-; X86-NOCMOV-NEXT:    movl $63, %eax
-; X86-NOCMOV-NEXT:    je .LBB7_2
-; X86-NOCMOV-NEXT:  # %bb.1:
-; X86-NOCMOV-NEXT:    movl %edx, %eax
-; X86-NOCMOV-NEXT:  .LBB7_2:
-; X86-NOCMOV-NEXT:    testl %ecx, %ecx
+; X86-NOCMOV-NEXT:    xorl %edx, %edx
+; X86-NOCMOV-NEXT:    movl %ecx, %esi
+; X86-NOCMOV-NEXT:    orl %eax, %esi
+; X86-NOCMOV-NEXT:    je .LBB7_1
+; X86-NOCMOV-NEXT:  # %bb.2: # %cond.false
+; X86-NOCMOV-NEXT:    testl %eax, %eax
 ; X86-NOCMOV-NEXT:    jne .LBB7_3
-; X86-NOCMOV-NEXT:  # %bb.4:
+; X86-NOCMOV-NEXT:  # %bb.4: # %cond.false
+; X86-NOCMOV-NEXT:    bsrl %ecx, %eax
 ; X86-NOCMOV-NEXT:    xorl $31, %eax
-; X86-NOCMOV-NEXT:    addl $32, %eax
-; X86-NOCMOV-NEXT:    xorl %edx, %edx
+; X86-NOCMOV-NEXT:    orl $32, %eax
+; X86-NOCMOV-NEXT:    popl %esi
+; X86-NOCMOV-NEXT:    retl
+; X86-NOCMOV-NEXT:  .LBB7_1:
+; X86-NOCMOV-NEXT:    movl $64, %eax
+; X86-NOCMOV-NEXT:    popl %esi
 ; X86-NOCMOV-NEXT:    retl
 ; X86-NOCMOV-NEXT:  .LBB7_3:
-; X86-NOCMOV-NEXT:    bsrl %ecx, %eax
+; X86-NOCMOV-NEXT:    bsrl %eax, %eax
 ; X86-NOCMOV-NEXT:    xorl $31, %eax
-; X86-NOCMOV-NEXT:    xorl %edx, %edx
+; X86-NOCMOV-NEXT:    popl %esi
 ; X86-NOCMOV-NEXT:    retl
 ;
 ; X86-CMOV-LABEL: ctlz_i64_zero_test:

diff  --git a/llvm/test/CodeGen/X86/cttz.ll b/llvm/test/CodeGen/X86/cttz.ll
index db949827af007..c95b7bd7f131a 100644
--- a/llvm/test/CodeGen/X86/cttz.ll
+++ b/llvm/test/CodeGen/X86/cttz.ll
@@ -352,26 +352,31 @@ define i32 @cttz_i32_zero_test(i32 %n) {
 }
 
 ; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
-define i64 @cttz_i64_zero_test(i64 %n) {
+define i64 @cttz_i64_zero_test(i64 %n) nounwind {
 ; X86-NOCMOV-LABEL: cttz_i64_zero_test:
 ; X86-NOCMOV:       # %bb.0:
+; X86-NOCMOV-NEXT:    pushl %esi
+; X86-NOCMOV-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NOCMOV-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NOCMOV-NOT:     rep
-; X86-NOCMOV-NEXT:    bsfl {{[0-9]+}}(%esp), %edx
-; X86-NOCMOV-NEXT:    movl $32, %eax
-; X86-NOCMOV-NEXT:    je .LBB7_2
-; X86-NOCMOV-NEXT:  # %bb.1:
-; X86-NOCMOV-NEXT:    movl %edx, %eax
-; X86-NOCMOV-NEXT:  .LBB7_2:
+; X86-NOCMOV-NEXT:    xorl %edx, %edx
+; X86-NOCMOV-NEXT:    movl %ecx, %esi
+; X86-NOCMOV-NEXT:    orl %eax, %esi
+; X86-NOCMOV-NEXT:    je .LBB7_1
+; X86-NOCMOV-NEXT:  # %bb.2: # %cond.false
 ; X86-NOCMOV-NEXT:    testl %ecx, %ecx
 ; X86-NOCMOV-NEXT:    jne .LBB7_3
-; X86-NOCMOV-NEXT:  # %bb.4:
+; X86-NOCMOV-NEXT:  # %bb.4: # %cond.false
+; X86-NOCMOV-NEXT:    rep bsfl %eax, %eax
 ; X86-NOCMOV-NEXT:    addl $32, %eax
-; X86-NOCMOV-NEXT:    xorl %edx, %edx
+; X86-NOCMOV-NEXT:    popl %esi
+; X86-NOCMOV-NEXT:    retl
+; X86-NOCMOV-NEXT:  .LBB7_1:
+; X86-NOCMOV-NEXT:    movl $64, %eax
+; X86-NOCMOV-NEXT:    popl %esi
 ; X86-NOCMOV-NEXT:    retl
 ; X86-NOCMOV-NEXT:  .LBB7_3:
 ; X86-NOCMOV-NEXT:    rep bsfl %ecx, %eax
-; X86-NOCMOV-NEXT:    xorl %edx, %edx
+; X86-NOCMOV-NEXT:    popl %esi
 ; X86-NOCMOV-NEXT:    retl
 ;
 ; X86-CMOV-LABEL: cttz_i64_zero_test:

diff  --git a/llvm/test/CodeGen/X86/lzcnt-cmp.ll b/llvm/test/CodeGen/X86/lzcnt-cmp.ll
deleted file mode 100644
index 4f65739cc70dd..0000000000000
--- a/llvm/test/CodeGen/X86/lzcnt-cmp.ll
+++ /dev/null
@@ -1,188 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=i686-- | FileCheck %s --check-prefixes=X86,X86-BSR
-; RUN: llc < %s -mtriple=i686-- -mattr=+lzcnt,+cmov | FileCheck %s --check-prefixes=X86,X86-LZCNT
-; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s --check-prefixes=X64,X64-BSR
-; RUN: llc < %s -mtriple=x86_64-- -mattr=+lzcnt | FileCheck %s --check-prefixes=X64,X64-LZCNT
-
-define i1 @lshr_ctlz_cmpeq_one_i64(i64 %in) nounwind {
-; X86-LABEL: lshr_ctlz_cmpeq_one_i64:
-; X86:       # %bb.0:
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    orl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    sete %al
-; X86-NEXT:    retl
-;
-; X64-LABEL: lshr_ctlz_cmpeq_one_i64:
-; X64:       # %bb.0:
-; X64-NEXT:    testq %rdi, %rdi
-; X64-NEXT:    sete %al
-; X64-NEXT:    retq
-  %ctlz = call i64 @llvm.ctlz.i64(i64 %in, i1 0)
-  %lshr = lshr i64 %ctlz, 6
-  %icmp = icmp eq i64 %lshr, 1
-  ret i1 %icmp
-}
-
-define i1 @lshr_ctlz_undef_cmpeq_one_i64(i64 %in) nounwind {
-; X86-BSR-LABEL: lshr_ctlz_undef_cmpeq_one_i64:
-; X86-BSR:       # %bb.0:
-; X86-BSR-NEXT:    xorl %eax, %eax
-; X86-BSR-NEXT:    retl
-;
-; X86-LZCNT-LABEL: lshr_ctlz_undef_cmpeq_one_i64:
-; X86-LZCNT:       # %bb.0:
-; X86-LZCNT-NEXT:    lzcntl {{[0-9]+}}(%esp), %ecx
-; X86-LZCNT-NEXT:    addl $32, %ecx
-; X86-LZCNT-NEXT:    xorl %eax, %eax
-; X86-LZCNT-NEXT:    cmpl $0, {{[0-9]+}}(%esp)
-; X86-LZCNT-NEXT:    cmovel %ecx, %eax
-; X86-LZCNT-NEXT:    shrl $6, %eax
-; X86-LZCNT-NEXT:    # kill: def $al killed $al killed $eax
-; X86-LZCNT-NEXT:    retl
-;
-; X64-BSR-LABEL: lshr_ctlz_undef_cmpeq_one_i64:
-; X64-BSR:       # %bb.0:
-; X64-BSR-NEXT:    xorl %eax, %eax
-; X64-BSR-NEXT:    retq
-;
-; X64-LZCNT-LABEL: lshr_ctlz_undef_cmpeq_one_i64:
-; X64-LZCNT:       # %bb.0:
-; X64-LZCNT-NEXT:    lzcntq %rdi, %rax
-; X64-LZCNT-NEXT:    shrl $6, %eax
-; X64-LZCNT-NEXT:    # kill: def $al killed $al killed $rax
-; X64-LZCNT-NEXT:    retq
-  %ctlz = call i64 @llvm.ctlz.i64(i64 %in, i1 -1)
-  %lshr = lshr i64 %ctlz, 6
-  %icmp = icmp eq i64 %lshr, 1
-  ret i1 %icmp
-}
-
-define i1 @lshr_ctlz_cmpne_zero_i64(i64 %in) nounwind {
-; X86-LABEL: lshr_ctlz_cmpne_zero_i64:
-; X86:       # %bb.0:
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    orl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    sete %al
-; X86-NEXT:    retl
-;
-; X64-LABEL: lshr_ctlz_cmpne_zero_i64:
-; X64:       # %bb.0:
-; X64-NEXT:    testq %rdi, %rdi
-; X64-NEXT:    sete %al
-; X64-NEXT:    retq
-  %ctlz = call i64 @llvm.ctlz.i64(i64 %in, i1 0)
-  %lshr = lshr i64 %ctlz, 6
-  %icmp = icmp ne i64 %lshr, 0
-  ret i1 %icmp
-}
-
-define i1 @lshr_ctlz_undef_cmpne_zero_i64(i64 %in) nounwind {
-; X86-BSR-LABEL: lshr_ctlz_undef_cmpne_zero_i64:
-; X86-BSR:       # %bb.0:
-; X86-BSR-NEXT:    xorl %eax, %eax
-; X86-BSR-NEXT:    retl
-;
-; X86-LZCNT-LABEL: lshr_ctlz_undef_cmpne_zero_i64:
-; X86-LZCNT:       # %bb.0:
-; X86-LZCNT-NEXT:    lzcntl {{[0-9]+}}(%esp), %ecx
-; X86-LZCNT-NEXT:    addl $32, %ecx
-; X86-LZCNT-NEXT:    xorl %eax, %eax
-; X86-LZCNT-NEXT:    cmpl $0, {{[0-9]+}}(%esp)
-; X86-LZCNT-NEXT:    cmovel %ecx, %eax
-; X86-LZCNT-NEXT:    shrl $6, %eax
-; X86-LZCNT-NEXT:    # kill: def $al killed $al killed $eax
-; X86-LZCNT-NEXT:    retl
-;
-; X64-BSR-LABEL: lshr_ctlz_undef_cmpne_zero_i64:
-; X64-BSR:       # %bb.0:
-; X64-BSR-NEXT:    xorl %eax, %eax
-; X64-BSR-NEXT:    retq
-;
-; X64-LZCNT-LABEL: lshr_ctlz_undef_cmpne_zero_i64:
-; X64-LZCNT:       # %bb.0:
-; X64-LZCNT-NEXT:    lzcntq %rdi, %rax
-; X64-LZCNT-NEXT:    shrl $6, %eax
-; X64-LZCNT-NEXT:    # kill: def $al killed $al killed $rax
-; X64-LZCNT-NEXT:    retq
-  %ctlz = call i64 @llvm.ctlz.i64(i64 %in, i1 -1)
-  %lshr = lshr i64 %ctlz, 6
-  %icmp = icmp ne i64 %lshr, 0
-  ret i1 %icmp
-}
-
-define <2 x i64> @lshr_ctlz_cmpeq_zero_v2i64(<2 x i64> %in) nounwind {
-; X86-LABEL: lshr_ctlz_cmpeq_zero_v2i64:
-; X86:       # %bb.0:
-; X86-NEXT:    pushl %esi
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT:    xorl %ecx, %ecx
-; X86-NEXT:    orl {{[0-9]+}}(%esp), %edx
-; X86-NEXT:    setne %cl
-; X86-NEXT:    negl %ecx
-; X86-NEXT:    xorl %edx, %edx
-; X86-NEXT:    orl {{[0-9]+}}(%esp), %esi
-; X86-NEXT:    setne %dl
-; X86-NEXT:    negl %edx
-; X86-NEXT:    movl %edx, 12(%eax)
-; X86-NEXT:    movl %edx, 8(%eax)
-; X86-NEXT:    movl %ecx, 4(%eax)
-; X86-NEXT:    movl %ecx, (%eax)
-; X86-NEXT:    popl %esi
-; X86-NEXT:    retl $4
-;
-; X64-LABEL: lshr_ctlz_cmpeq_zero_v2i64:
-; X64:       # %bb.0:
-; X64-NEXT:    pxor %xmm1, %xmm1
-; X64-NEXT:    pcmpeqd %xmm1, %xmm0
-; X64-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
-; X64-NEXT:    pand %xmm1, %xmm0
-; X64-NEXT:    pcmpeqd %xmm1, %xmm1
-; X64-NEXT:    pxor %xmm1, %xmm0
-; X64-NEXT:    retq
-  %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %in, i1 0)
-  %lshr = lshr <2 x i64> %ctlz, <i64 6, i64 6>
-  %icmp = icmp eq <2 x i64> %lshr, zeroinitializer
-  %sext = sext <2 x i1> %icmp to <2 x i64>
-  ret <2 x i64> %sext
-}
-
-define <2 x i64> @lshr_ctlz_cmpne_zero_v2i64(<2 x i64> %in) nounwind {
-; X86-LABEL: lshr_ctlz_cmpne_zero_v2i64:
-; X86:       # %bb.0:
-; X86-NEXT:    pushl %esi
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT:    xorl %ecx, %ecx
-; X86-NEXT:    orl {{[0-9]+}}(%esp), %edx
-; X86-NEXT:    sete %cl
-; X86-NEXT:    negl %ecx
-; X86-NEXT:    xorl %edx, %edx
-; X86-NEXT:    orl {{[0-9]+}}(%esp), %esi
-; X86-NEXT:    sete %dl
-; X86-NEXT:    negl %edx
-; X86-NEXT:    movl %edx, 12(%eax)
-; X86-NEXT:    movl %edx, 8(%eax)
-; X86-NEXT:    movl %ecx, 4(%eax)
-; X86-NEXT:    movl %ecx, (%eax)
-; X86-NEXT:    popl %esi
-; X86-NEXT:    retl $4
-;
-; X64-LABEL: lshr_ctlz_cmpne_zero_v2i64:
-; X64:       # %bb.0:
-; X64-NEXT:    pxor %xmm1, %xmm1
-; X64-NEXT:    pcmpeqd %xmm1, %xmm0
-; X64-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
-; X64-NEXT:    pand %xmm1, %xmm0
-; X64-NEXT:    retq
-  %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %in, i1 0)
-  %lshr = lshr <2 x i64> %ctlz, <i64 6, i64 6>
-  %icmp = icmp ne <2 x i64> %lshr, zeroinitializer
-  %sext = sext <2 x i1> %icmp to <2 x i64>
-  ret <2 x i64> %sext
-}
-
-declare i64 @llvm.ctlz.i64(i64, i1)
-declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>, i1)


        


More information about the llvm-commits mailing list