[llvm] [RFC][X86] Allow speculative BSR/BSF instructions on targets with CMOV (PR #102885)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 12 04:57:11 PDT 2024
https://github.com/RKSimon created https://github.com/llvm/llvm-project/pull/102885
Currently targets without LZCNT/TZCNT won't speculate with BSR/BSF instructions in case they have a zero value input, meaning we always insert a test+branch for the zero-input case.
This RFC patch proposes we allow speculation if the target has CMOV, and perform a branchless select instead to handle the zero input case. This will predominately help x86-64 targets where we have the triple set but haven't generated codegen for any particular cpu target.
I can't recall the entire history of why we don't already do this - BSR/BSF only set the ZF bit, and leave the others in an undefined state (at least on AMD CPUs), which can cause false dependencies before the CMOV can use the result, which might cause minor stalls, which I suppose could be avoided in well predicated branches. Can anyone recall any other reasons?
A more restricted version of this patch would be to just handle the isCheapToSpeculateCttz case and rely on the "REP BSF" expansion we already do (which effectively would do a hidden TZCNT followed by CMOV).
Any comments?
>From 7c648b198b4f489e79493368b301db7b018be193 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Fri, 9 Aug 2024 18:21:51 +0100
Subject: [PATCH] [X86] Allow speculative BSR/BSF instructions on targets with
CMOV
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 4 +-
llvm/test/CodeGen/X86/atomic-bit-test.ll | 1 -
llvm/test/CodeGen/X86/bit_ceil.ll | 53 +--
llvm/test/CodeGen/X86/combine-or.ll | 47 +-
llvm/test/CodeGen/X86/ctlo.ll | 161 ++++---
llvm/test/CodeGen/X86/ctlz.ll | 304 ++++++-------
llvm/test/CodeGen/X86/cttz.ll | 45 +-
llvm/test/CodeGen/X86/known-never-zero.ll | 507 ++++++----------------
llvm/test/CodeGen/X86/lzcnt-cmp.ll | 52 +--
llvm/test/CodeGen/X86/pr57673.ll | 50 +--
llvm/test/CodeGen/X86/pr89877.ll | 16 +-
llvm/test/CodeGen/X86/pr90847.ll | 18 +-
llvm/test/CodeGen/X86/pr92569.ll | 16 +-
13 files changed, 483 insertions(+), 791 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index b604e85b46e788..8a5b31bbc1f471 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -3238,13 +3238,13 @@ bool X86TargetLowering::shouldFormOverflowOp(unsigned Opcode, EVT VT,
bool X86TargetLowering::isCheapToSpeculateCttz(Type *Ty) const {
// Speculate cttz only if we can directly use TZCNT or can promote to i32.
- return Subtarget.hasBMI() ||
+ return Subtarget.hasBMI() || Subtarget.canUseCMOV() ||
(!Ty->isVectorTy() && Ty->getScalarSizeInBits() < 32);
}
bool X86TargetLowering::isCheapToSpeculateCtlz(Type *Ty) const {
// Speculate ctlz only if we can directly use LZCNT.
- return Subtarget.hasLZCNT();
+ return Subtarget.hasLZCNT() || Subtarget.canUseCMOV();
}
bool X86TargetLowering::ShouldShrinkFPConstant(EVT VT) const {
diff --git a/llvm/test/CodeGen/X86/atomic-bit-test.ll b/llvm/test/CodeGen/X86/atomic-bit-test.ll
index f39c4b5e620d0e..10b6605c3fb05e 100644
--- a/llvm/test/CodeGen/X86/atomic-bit-test.ll
+++ b/llvm/test/CodeGen/X86/atomic-bit-test.ll
@@ -582,7 +582,6 @@ define i32 @split_hoist_and(i32 %0) nounwind {
; X64-NEXT: lock btsl $3, v32(%rip)
; X64-NEXT: setb %al
; X64-NEXT: shll $3, %eax
-; X64-NEXT: testl %edi, %edi
; X64-NEXT: retq
%2 = atomicrmw or ptr @v32, i32 8 monotonic, align 4
%3 = tail call i32 @llvm.ctlz.i32(i32 %0, i1 false)
diff --git a/llvm/test/CodeGen/X86/bit_ceil.ll b/llvm/test/CodeGen/X86/bit_ceil.ll
index 4641c114238f8f..823453087f6180 100644
--- a/llvm/test/CodeGen/X86/bit_ceil.ll
+++ b/llvm/test/CodeGen/X86/bit_ceil.ll
@@ -8,16 +8,12 @@
define i32 @bit_ceil_i32(i32 %x) {
; NOBMI-LABEL: bit_ceil_i32:
; NOBMI: # %bb.0:
-; NOBMI-NEXT: movl %edi, %eax
-; NOBMI-NEXT: decl %eax
-; NOBMI-NEXT: je .LBB0_1
-; NOBMI-NEXT: # %bb.2: # %cond.false
-; NOBMI-NEXT: bsrl %eax, %ecx
+; NOBMI-NEXT: # kill: def $edi killed $edi def $rdi
+; NOBMI-NEXT: leal -1(%rdi), %eax
+; NOBMI-NEXT: bsrl %eax, %eax
+; NOBMI-NEXT: movl $63, %ecx
+; NOBMI-NEXT: cmovnel %eax, %ecx
; NOBMI-NEXT: xorl $31, %ecx
-; NOBMI-NEXT: jmp .LBB0_3
-; NOBMI-NEXT: .LBB0_1:
-; NOBMI-NEXT: movl $32, %ecx
-; NOBMI-NEXT: .LBB0_3: # %cond.end
; NOBMI-NEXT: negb %cl
; NOBMI-NEXT: movl $1, %edx
; NOBMI-NEXT: movl $1, %eax
@@ -51,15 +47,10 @@ define i32 @bit_ceil_i32(i32 %x) {
define i32 @bit_ceil_i32_plus1(i32 noundef %x) {
; NOBMI-LABEL: bit_ceil_i32_plus1:
; NOBMI: # %bb.0: # %entry
-; NOBMI-NEXT: testl %edi, %edi
-; NOBMI-NEXT: je .LBB1_1
-; NOBMI-NEXT: # %bb.2: # %cond.false
-; NOBMI-NEXT: bsrl %edi, %ecx
+; NOBMI-NEXT: bsrl %edi, %eax
+; NOBMI-NEXT: movl $63, %ecx
+; NOBMI-NEXT: cmovnel %eax, %ecx
; NOBMI-NEXT: xorl $31, %ecx
-; NOBMI-NEXT: jmp .LBB1_3
-; NOBMI-NEXT: .LBB1_1:
-; NOBMI-NEXT: movl $32, %ecx
-; NOBMI-NEXT: .LBB1_3: # %cond.end
; NOBMI-NEXT: negb %cl
; NOBMI-NEXT: movl $1, %edx
; NOBMI-NEXT: movl $1, %eax
@@ -94,16 +85,11 @@ entry:
define i64 @bit_ceil_i64(i64 %x) {
; NOBMI-LABEL: bit_ceil_i64:
; NOBMI: # %bb.0:
-; NOBMI-NEXT: movq %rdi, %rax
-; NOBMI-NEXT: decq %rax
-; NOBMI-NEXT: je .LBB2_1
-; NOBMI-NEXT: # %bb.2: # %cond.false
-; NOBMI-NEXT: bsrq %rax, %rcx
-; NOBMI-NEXT: xorq $63, %rcx
-; NOBMI-NEXT: jmp .LBB2_3
-; NOBMI-NEXT: .LBB2_1:
-; NOBMI-NEXT: movl $64, %ecx
-; NOBMI-NEXT: .LBB2_3: # %cond.end
+; NOBMI-NEXT: leaq -1(%rdi), %rax
+; NOBMI-NEXT: bsrq %rax, %rax
+; NOBMI-NEXT: movl $127, %ecx
+; NOBMI-NEXT: cmovneq %rax, %rcx
+; NOBMI-NEXT: xorl $63, %ecx
; NOBMI-NEXT: negb %cl
; NOBMI-NEXT: movl $1, %edx
; NOBMI-NEXT: movl $1, %eax
@@ -136,15 +122,10 @@ define i64 @bit_ceil_i64(i64 %x) {
define i64 @bit_ceil_i64_plus1(i64 noundef %x) {
; NOBMI-LABEL: bit_ceil_i64_plus1:
; NOBMI: # %bb.0: # %entry
-; NOBMI-NEXT: testq %rdi, %rdi
-; NOBMI-NEXT: je .LBB3_1
-; NOBMI-NEXT: # %bb.2: # %cond.false
-; NOBMI-NEXT: bsrq %rdi, %rcx
-; NOBMI-NEXT: xorq $63, %rcx
-; NOBMI-NEXT: jmp .LBB3_3
-; NOBMI-NEXT: .LBB3_1:
-; NOBMI-NEXT: movl $64, %ecx
-; NOBMI-NEXT: .LBB3_3: # %cond.end
+; NOBMI-NEXT: bsrq %rdi, %rax
+; NOBMI-NEXT: movl $127, %ecx
+; NOBMI-NEXT: cmovneq %rax, %rcx
+; NOBMI-NEXT: xorl $63, %ecx
; NOBMI-NEXT: negb %cl
; NOBMI-NEXT: movl $1, %edx
; NOBMI-NEXT: movl $1, %eax
diff --git a/llvm/test/CodeGen/X86/combine-or.ll b/llvm/test/CodeGen/X86/combine-or.ll
index 3b2102f46a297a..4060355495eb3b 100644
--- a/llvm/test/CodeGen/X86/combine-or.ll
+++ b/llvm/test/CodeGen/X86/combine-or.ll
@@ -213,21 +213,18 @@ define i64 @PR89533(<64 x i8> %a0) {
; SSE-NEXT: shll $16, %ecx
; SSE-NEXT: orl %eax, %ecx
; SSE-NEXT: pcmpeqb %xmm4, %xmm2
-; SSE-NEXT: pmovmskb %xmm2, %edx
-; SSE-NEXT: xorl $65535, %edx # imm = 0xFFFF
+; SSE-NEXT: pmovmskb %xmm2, %eax
+; SSE-NEXT: xorl $65535, %eax # imm = 0xFFFF
; SSE-NEXT: pcmpeqb %xmm4, %xmm3
-; SSE-NEXT: pmovmskb %xmm3, %eax
-; SSE-NEXT: notl %eax
-; SSE-NEXT: shll $16, %eax
-; SSE-NEXT: orl %edx, %eax
-; SSE-NEXT: shlq $32, %rax
-; SSE-NEXT: orq %rcx, %rax
-; SSE-NEXT: je .LBB11_2
-; SSE-NEXT: # %bb.1: # %cond.false
-; SSE-NEXT: rep bsfq %rax, %rax
-; SSE-NEXT: retq
-; SSE-NEXT: .LBB11_2: # %cond.end
+; SSE-NEXT: pmovmskb %xmm3, %edx
+; SSE-NEXT: notl %edx
+; SSE-NEXT: shll $16, %edx
+; SSE-NEXT: orl %eax, %edx
+; SSE-NEXT: shlq $32, %rdx
+; SSE-NEXT: orq %rcx, %rdx
+; SSE-NEXT: bsfq %rdx, %rcx
; SSE-NEXT: movl $64, %eax
+; SSE-NEXT: cmovneq %rcx, %rax
; SSE-NEXT: retq
;
; AVX1-LABEL: PR89533:
@@ -243,23 +240,19 @@ define i64 @PR89533(<64 x i8> %a0) {
; AVX1-NEXT: shll $16, %ecx
; AVX1-NEXT: orl %eax, %ecx
; AVX1-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm0
-; AVX1-NEXT: vpmovmskb %xmm0, %edx
-; AVX1-NEXT: xorl $65535, %edx # imm = 0xFFFF
+; AVX1-NEXT: vpmovmskb %xmm0, %eax
+; AVX1-NEXT: xorl $65535, %eax # imm = 0xFFFF
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0
; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vpmovmskb %xmm0, %eax
-; AVX1-NEXT: notl %eax
-; AVX1-NEXT: shll $16, %eax
-; AVX1-NEXT: orl %edx, %eax
-; AVX1-NEXT: shlq $32, %rax
-; AVX1-NEXT: orq %rcx, %rax
-; AVX1-NEXT: je .LBB11_2
-; AVX1-NEXT: # %bb.1: # %cond.false
-; AVX1-NEXT: rep bsfq %rax, %rax
-; AVX1-NEXT: vzeroupper
-; AVX1-NEXT: retq
-; AVX1-NEXT: .LBB11_2: # %cond.end
+; AVX1-NEXT: vpmovmskb %xmm0, %edx
+; AVX1-NEXT: notl %edx
+; AVX1-NEXT: shll $16, %edx
+; AVX1-NEXT: orl %eax, %edx
+; AVX1-NEXT: shlq $32, %rdx
+; AVX1-NEXT: orq %rcx, %rdx
+; AVX1-NEXT: bsfq %rdx, %rcx
; AVX1-NEXT: movl $64, %eax
+; AVX1-NEXT: cmovneq %rcx, %rax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/ctlo.ll b/llvm/test/CodeGen/X86/ctlo.ll
index bb80279e28f3d3..f383c9a2544fca 100644
--- a/llvm/test/CodeGen/X86/ctlo.ll
+++ b/llvm/test/CodeGen/X86/ctlo.ll
@@ -13,36 +13,44 @@ declare i32 @llvm.ctlz.i32(i32, i1)
declare i64 @llvm.ctlz.i64(i64, i1)
define i8 @ctlo_i8(i8 %x) {
-; X86-LABEL: ctlo_i8:
-; X86: # %bb.0:
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: xorb $-1, %al
-; X86-NEXT: je .LBB0_1
-; X86-NEXT: # %bb.2: # %cond.false
-; X86-NEXT: movzbl %al, %eax
-; X86-NEXT: bsrl %eax, %eax
-; X86-NEXT: xorl $7, %eax
-; X86-NEXT: # kill: def $al killed $al killed $eax
-; X86-NEXT: retl
-; X86-NEXT: .LBB0_1:
-; X86-NEXT: movb $8, %al
-; X86-NEXT: # kill: def $al killed $al killed $eax
-; X86-NEXT: retl
+; X86-NOCMOV-LABEL: ctlo_i8:
+; X86-NOCMOV: # %bb.0:
+; X86-NOCMOV-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X86-NOCMOV-NEXT: xorb $-1, %al
+; X86-NOCMOV-NEXT: je .LBB0_1
+; X86-NOCMOV-NEXT: # %bb.2: # %cond.false
+; X86-NOCMOV-NEXT: movzbl %al, %eax
+; X86-NOCMOV-NEXT: bsrl %eax, %eax
+; X86-NOCMOV-NEXT: xorl $7, %eax
+; X86-NOCMOV-NEXT: # kill: def $al killed $al killed $eax
+; X86-NOCMOV-NEXT: retl
+; X86-NOCMOV-NEXT: .LBB0_1:
+; X86-NOCMOV-NEXT: movb $8, %al
+; X86-NOCMOV-NEXT: # kill: def $al killed $al killed $eax
+; X86-NOCMOV-NEXT: retl
+;
+; X86-CMOV-LABEL: ctlo_i8:
+; X86-CMOV: # %bb.0:
+; X86-CMOV-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X86-CMOV-NEXT: notb %al
+; X86-CMOV-NEXT: movzbl %al, %eax
+; X86-CMOV-NEXT: bsrl %eax, %ecx
+; X86-CMOV-NEXT: movl $15, %eax
+; X86-CMOV-NEXT: cmovnel %ecx, %eax
+; X86-CMOV-NEXT: xorl $7, %eax
+; X86-CMOV-NEXT: # kill: def $al killed $al killed $eax
+; X86-CMOV-NEXT: retl
;
; X64-LABEL: ctlo_i8:
; X64: # %bb.0:
-; X64-NEXT: xorb $-1, %dil
-; X64-NEXT: je .LBB0_1
-; X64-NEXT: # %bb.2: # %cond.false
+; X64-NEXT: notb %dil
; X64-NEXT: movzbl %dil, %eax
-; X64-NEXT: bsrl %eax, %eax
+; X64-NEXT: bsrl %eax, %ecx
+; X64-NEXT: movl $15, %eax
+; X64-NEXT: cmovnel %ecx, %eax
; X64-NEXT: xorl $7, %eax
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
-; X64-NEXT: .LBB0_1:
-; X64-NEXT: movb $8, %al
-; X64-NEXT: # kill: def $al killed $al killed $eax
-; X64-NEXT: retq
;
; X86-CLZ-LABEL: ctlo_i8:
; X86-CLZ: # %bb.0:
@@ -111,34 +119,41 @@ define i8 @ctlo_i8_undef(i8 %x) {
}
define i16 @ctlo_i16(i16 %x) {
-; X86-LABEL: ctlo_i16:
-; X86: # %bb.0:
-; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: xorw $-1, %ax
-; X86-NEXT: je .LBB2_1
-; X86-NEXT: # %bb.2: # %cond.false
-; X86-NEXT: bsrw %ax, %ax
-; X86-NEXT: xorl $15, %eax
-; X86-NEXT: # kill: def $ax killed $ax killed $eax
-; X86-NEXT: retl
-; X86-NEXT: .LBB2_1:
-; X86-NEXT: movw $16, %ax
-; X86-NEXT: # kill: def $ax killed $ax killed $eax
-; X86-NEXT: retl
+; X86-NOCMOV-LABEL: ctlo_i16:
+; X86-NOCMOV: # %bb.0:
+; X86-NOCMOV-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NOCMOV-NEXT: xorw $-1, %ax
+; X86-NOCMOV-NEXT: je .LBB2_1
+; X86-NOCMOV-NEXT: # %bb.2: # %cond.false
+; X86-NOCMOV-NEXT: bsrw %ax, %ax
+; X86-NOCMOV-NEXT: xorl $15, %eax
+; X86-NOCMOV-NEXT: # kill: def $ax killed $ax killed $eax
+; X86-NOCMOV-NEXT: retl
+; X86-NOCMOV-NEXT: .LBB2_1:
+; X86-NOCMOV-NEXT: movw $16, %ax
+; X86-NOCMOV-NEXT: # kill: def $ax killed $ax killed $eax
+; X86-NOCMOV-NEXT: retl
+;
+; X86-CMOV-LABEL: ctlo_i16:
+; X86-CMOV: # %bb.0:
+; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-CMOV-NEXT: notl %eax
+; X86-CMOV-NEXT: bsrw %ax, %cx
+; X86-CMOV-NEXT: movw $31, %ax
+; X86-CMOV-NEXT: cmovnew %cx, %ax
+; X86-CMOV-NEXT: xorl $15, %eax
+; X86-CMOV-NEXT: # kill: def $ax killed $ax killed $eax
+; X86-CMOV-NEXT: retl
;
; X64-LABEL: ctlo_i16:
; X64: # %bb.0:
-; X64-NEXT: xorw $-1, %di
-; X64-NEXT: je .LBB2_1
-; X64-NEXT: # %bb.2: # %cond.false
-; X64-NEXT: bsrw %di, %ax
+; X64-NEXT: notl %edi
+; X64-NEXT: bsrw %di, %cx
+; X64-NEXT: movw $31, %ax
+; X64-NEXT: cmovnew %cx, %ax
; X64-NEXT: xorl $15, %eax
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
-; X64-NEXT: .LBB2_1:
-; X64-NEXT: movw $16, %ax
-; X64-NEXT: # kill: def $ax killed $ax killed $eax
-; X64-NEXT: retq
;
; X86-CLZ-LABEL: ctlo_i16:
; X86-CLZ: # %bb.0:
@@ -193,30 +208,37 @@ define i16 @ctlo_i16_undef(i16 %x) {
}
define i32 @ctlo_i32(i32 %x) {
-; X86-LABEL: ctlo_i32:
-; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: xorl $-1, %eax
-; X86-NEXT: je .LBB4_1
-; X86-NEXT: # %bb.2: # %cond.false
-; X86-NEXT: bsrl %eax, %eax
-; X86-NEXT: xorl $31, %eax
-; X86-NEXT: retl
-; X86-NEXT: .LBB4_1:
-; X86-NEXT: movl $32, %eax
-; X86-NEXT: retl
+; X86-NOCMOV-LABEL: ctlo_i32:
+; X86-NOCMOV: # %bb.0:
+; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NOCMOV-NEXT: xorl $-1, %eax
+; X86-NOCMOV-NEXT: je .LBB4_1
+; X86-NOCMOV-NEXT: # %bb.2: # %cond.false
+; X86-NOCMOV-NEXT: bsrl %eax, %eax
+; X86-NOCMOV-NEXT: xorl $31, %eax
+; X86-NOCMOV-NEXT: retl
+; X86-NOCMOV-NEXT: .LBB4_1:
+; X86-NOCMOV-NEXT: movl $32, %eax
+; X86-NOCMOV-NEXT: retl
+;
+; X86-CMOV-LABEL: ctlo_i32:
+; X86-CMOV: # %bb.0:
+; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-CMOV-NEXT: notl %eax
+; X86-CMOV-NEXT: bsrl %eax, %ecx
+; X86-CMOV-NEXT: movl $63, %eax
+; X86-CMOV-NEXT: cmovnel %ecx, %eax
+; X86-CMOV-NEXT: xorl $31, %eax
+; X86-CMOV-NEXT: retl
;
; X64-LABEL: ctlo_i32:
; X64: # %bb.0:
-; X64-NEXT: xorl $-1, %edi
-; X64-NEXT: je .LBB4_1
-; X64-NEXT: # %bb.2: # %cond.false
-; X64-NEXT: bsrl %edi, %eax
+; X64-NEXT: notl %edi
+; X64-NEXT: bsrl %edi, %ecx
+; X64-NEXT: movl $63, %eax
+; X64-NEXT: cmovnel %ecx, %eax
; X64-NEXT: xorl $31, %eax
; X64-NEXT: retq
-; X64-NEXT: .LBB4_1:
-; X64-NEXT: movl $32, %eax
-; X64-NEXT: retq
;
; X86-CLZ-LABEL: ctlo_i32:
; X86-CLZ: # %bb.0:
@@ -314,15 +336,12 @@ define i64 @ctlo_i64(i64 %x) {
;
; X64-LABEL: ctlo_i64:
; X64: # %bb.0:
-; X64-NEXT: xorq $-1, %rdi
-; X64-NEXT: je .LBB6_1
-; X64-NEXT: # %bb.2: # %cond.false
-; X64-NEXT: bsrq %rdi, %rax
+; X64-NEXT: notq %rdi
+; X64-NEXT: bsrq %rdi, %rcx
+; X64-NEXT: movl $127, %eax
+; X64-NEXT: cmovneq %rcx, %rax
; X64-NEXT: xorq $63, %rax
; X64-NEXT: retq
-; X64-NEXT: .LBB6_1:
-; X64-NEXT: movl $64, %eax
-; X64-NEXT: retq
;
; X86-CLZ-LABEL: ctlo_i64:
; X86-CLZ: # %bb.0:
diff --git a/llvm/test/CodeGen/X86/ctlz.ll b/llvm/test/CodeGen/X86/ctlz.ll
index d8f83502bd849a..6635be18b0f7a7 100644
--- a/llvm/test/CodeGen/X86/ctlz.ll
+++ b/llvm/test/CodeGen/X86/ctlz.ll
@@ -218,36 +218,41 @@ define i64 @ctlz_i64(i64 %x) {
; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
define i8 @ctlz_i8_zero_test(i8 %n) {
-; X86-LABEL: ctlz_i8_zero_test:
-; X86: # %bb.0:
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: testb %al, %al
-; X86-NEXT: je .LBB4_1
-; X86-NEXT: # %bb.2: # %cond.false
-; X86-NEXT: movzbl %al, %eax
-; X86-NEXT: bsrl %eax, %eax
-; X86-NEXT: xorl $7, %eax
-; X86-NEXT: # kill: def $al killed $al killed $eax
-; X86-NEXT: retl
-; X86-NEXT: .LBB4_1:
-; X86-NEXT: movb $8, %al
-; X86-NEXT: # kill: def $al killed $al killed $eax
-; X86-NEXT: retl
+; X86-NOCMOV-LABEL: ctlz_i8_zero_test:
+; X86-NOCMOV: # %bb.0:
+; X86-NOCMOV-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X86-NOCMOV-NEXT: testb %al, %al
+; X86-NOCMOV-NEXT: je .LBB4_1
+; X86-NOCMOV-NEXT: # %bb.2: # %cond.false
+; X86-NOCMOV-NEXT: movzbl %al, %eax
+; X86-NOCMOV-NEXT: bsrl %eax, %eax
+; X86-NOCMOV-NEXT: xorl $7, %eax
+; X86-NOCMOV-NEXT: # kill: def $al killed $al killed $eax
+; X86-NOCMOV-NEXT: retl
+; X86-NOCMOV-NEXT: .LBB4_1:
+; X86-NOCMOV-NEXT: movb $8, %al
+; X86-NOCMOV-NEXT: # kill: def $al killed $al killed $eax
+; X86-NOCMOV-NEXT: retl
+;
+; X86-CMOV-LABEL: ctlz_i8_zero_test:
+; X86-CMOV: # %bb.0:
+; X86-CMOV-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X86-CMOV-NEXT: bsrl %eax, %ecx
+; X86-CMOV-NEXT: movl $15, %eax
+; X86-CMOV-NEXT: cmovnel %ecx, %eax
+; X86-CMOV-NEXT: xorl $7, %eax
+; X86-CMOV-NEXT: # kill: def $al killed $al killed $eax
+; X86-CMOV-NEXT: retl
;
; X64-LABEL: ctlz_i8_zero_test:
; X64: # %bb.0:
-; X64-NEXT: testb %dil, %dil
-; X64-NEXT: je .LBB4_1
-; X64-NEXT: # %bb.2: # %cond.false
; X64-NEXT: movzbl %dil, %eax
-; X64-NEXT: bsrl %eax, %eax
+; X64-NEXT: bsrl %eax, %ecx
+; X64-NEXT: movl $15, %eax
+; X64-NEXT: cmovnel %ecx, %eax
; X64-NEXT: xorl $7, %eax
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
-; X64-NEXT: .LBB4_1:
-; X64-NEXT: movb $8, %al
-; X64-NEXT: # kill: def $al killed $al killed $eax
-; X64-NEXT: retq
;
; X86-CLZ-LABEL: ctlz_i8_zero_test:
; X86-CLZ: # %bb.0:
@@ -286,34 +291,38 @@ define i8 @ctlz_i8_zero_test(i8 %n) {
; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
define i16 @ctlz_i16_zero_test(i16 %n) {
-; X86-LABEL: ctlz_i16_zero_test:
-; X86: # %bb.0:
-; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: testw %ax, %ax
-; X86-NEXT: je .LBB5_1
-; X86-NEXT: # %bb.2: # %cond.false
-; X86-NEXT: bsrw %ax, %ax
-; X86-NEXT: xorl $15, %eax
-; X86-NEXT: # kill: def $ax killed $ax killed $eax
-; X86-NEXT: retl
-; X86-NEXT: .LBB5_1:
-; X86-NEXT: movw $16, %ax
-; X86-NEXT: # kill: def $ax killed $ax killed $eax
-; X86-NEXT: retl
+; X86-NOCMOV-LABEL: ctlz_i16_zero_test:
+; X86-NOCMOV: # %bb.0:
+; X86-NOCMOV-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NOCMOV-NEXT: testw %ax, %ax
+; X86-NOCMOV-NEXT: je .LBB5_1
+; X86-NOCMOV-NEXT: # %bb.2: # %cond.false
+; X86-NOCMOV-NEXT: bsrw %ax, %ax
+; X86-NOCMOV-NEXT: xorl $15, %eax
+; X86-NOCMOV-NEXT: # kill: def $ax killed $ax killed $eax
+; X86-NOCMOV-NEXT: retl
+; X86-NOCMOV-NEXT: .LBB5_1:
+; X86-NOCMOV-NEXT: movw $16, %ax
+; X86-NOCMOV-NEXT: # kill: def $ax killed $ax killed $eax
+; X86-NOCMOV-NEXT: retl
+;
+; X86-CMOV-LABEL: ctlz_i16_zero_test:
+; X86-CMOV: # %bb.0:
+; X86-CMOV-NEXT: bsrw {{[0-9]+}}(%esp), %cx
+; X86-CMOV-NEXT: movw $31, %ax
+; X86-CMOV-NEXT: cmovnew %cx, %ax
+; X86-CMOV-NEXT: xorl $15, %eax
+; X86-CMOV-NEXT: # kill: def $ax killed $ax killed $eax
+; X86-CMOV-NEXT: retl
;
; X64-LABEL: ctlz_i16_zero_test:
; X64: # %bb.0:
-; X64-NEXT: testw %di, %di
-; X64-NEXT: je .LBB5_1
-; X64-NEXT: # %bb.2: # %cond.false
-; X64-NEXT: bsrw %di, %ax
+; X64-NEXT: bsrw %di, %cx
+; X64-NEXT: movw $31, %ax
+; X64-NEXT: cmovnew %cx, %ax
; X64-NEXT: xorl $15, %eax
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
-; X64-NEXT: .LBB5_1:
-; X64-NEXT: movw $16, %ax
-; X64-NEXT: # kill: def $ax killed $ax killed $eax
-; X64-NEXT: retq
;
; X86-CLZ-LABEL: ctlz_i16_zero_test:
; X86-CLZ: # %bb.0:
@@ -340,30 +349,34 @@ define i16 @ctlz_i16_zero_test(i16 %n) {
; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
define i32 @ctlz_i32_zero_test(i32 %n) {
-; X86-LABEL: ctlz_i32_zero_test:
-; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: je .LBB6_1
-; X86-NEXT: # %bb.2: # %cond.false
-; X86-NEXT: bsrl %eax, %eax
-; X86-NEXT: xorl $31, %eax
-; X86-NEXT: retl
-; X86-NEXT: .LBB6_1:
-; X86-NEXT: movl $32, %eax
-; X86-NEXT: retl
+; X86-NOCMOV-LABEL: ctlz_i32_zero_test:
+; X86-NOCMOV: # %bb.0:
+; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NOCMOV-NEXT: testl %eax, %eax
+; X86-NOCMOV-NEXT: je .LBB6_1
+; X86-NOCMOV-NEXT: # %bb.2: # %cond.false
+; X86-NOCMOV-NEXT: bsrl %eax, %eax
+; X86-NOCMOV-NEXT: xorl $31, %eax
+; X86-NOCMOV-NEXT: retl
+; X86-NOCMOV-NEXT: .LBB6_1:
+; X86-NOCMOV-NEXT: movl $32, %eax
+; X86-NOCMOV-NEXT: retl
+;
+; X86-CMOV-LABEL: ctlz_i32_zero_test:
+; X86-CMOV: # %bb.0:
+; X86-CMOV-NEXT: bsrl {{[0-9]+}}(%esp), %ecx
+; X86-CMOV-NEXT: movl $63, %eax
+; X86-CMOV-NEXT: cmovnel %ecx, %eax
+; X86-CMOV-NEXT: xorl $31, %eax
+; X86-CMOV-NEXT: retl
;
; X64-LABEL: ctlz_i32_zero_test:
; X64: # %bb.0:
-; X64-NEXT: testl %edi, %edi
-; X64-NEXT: je .LBB6_1
-; X64-NEXT: # %bb.2: # %cond.false
-; X64-NEXT: bsrl %edi, %eax
+; X64-NEXT: bsrl %edi, %ecx
+; X64-NEXT: movl $63, %eax
+; X64-NEXT: cmovnel %ecx, %eax
; X64-NEXT: xorl $31, %eax
; X64-NEXT: retq
-; X64-NEXT: .LBB6_1:
-; X64-NEXT: movl $32, %eax
-; X64-NEXT: retq
;
; X86-CLZ-LABEL: ctlz_i32_zero_test:
; X86-CLZ: # %bb.0:
@@ -429,15 +442,11 @@ define i64 @ctlz_i64_zero_test(i64 %n) {
;
; X64-LABEL: ctlz_i64_zero_test:
; X64: # %bb.0:
-; X64-NEXT: testq %rdi, %rdi
-; X64-NEXT: je .LBB7_1
-; X64-NEXT: # %bb.2: # %cond.false
-; X64-NEXT: bsrq %rdi, %rax
+; X64-NEXT: bsrq %rdi, %rcx
+; X64-NEXT: movl $127, %eax
+; X64-NEXT: cmovneq %rcx, %rax
; X64-NEXT: xorq $63, %rax
; X64-NEXT: retq
-; X64-NEXT: .LBB7_1:
-; X64-NEXT: movl $64, %eax
-; X64-NEXT: retq
;
; X86-CLZ-LABEL: ctlz_i64_zero_test:
; X86-CLZ: # %bb.0:
@@ -580,33 +589,33 @@ define i32 @ctlz_bsr(i32 %n) {
; FIXME: The compare and branch are produced late in IR (by CodeGenPrepare), and
; codegen doesn't know how to combine the $32 and $31 into $63.
define i32 @ctlz_bsr_zero_test(i32 %n) {
-; X86-LABEL: ctlz_bsr_zero_test:
-; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: je .LBB10_1
-; X86-NEXT: # %bb.2: # %cond.false
-; X86-NEXT: bsrl %eax, %eax
-; X86-NEXT: xorl $31, %eax
-; X86-NEXT: xorl $31, %eax
-; X86-NEXT: retl
-; X86-NEXT: .LBB10_1:
-; X86-NEXT: movl $32, %eax
-; X86-NEXT: xorl $31, %eax
-; X86-NEXT: retl
+; X86-NOCMOV-LABEL: ctlz_bsr_zero_test:
+; X86-NOCMOV: # %bb.0:
+; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NOCMOV-NEXT: testl %eax, %eax
+; X86-NOCMOV-NEXT: je .LBB10_1
+; X86-NOCMOV-NEXT: # %bb.2: # %cond.false
+; X86-NOCMOV-NEXT: bsrl %eax, %eax
+; X86-NOCMOV-NEXT: xorl $31, %eax
+; X86-NOCMOV-NEXT: xorl $31, %eax
+; X86-NOCMOV-NEXT: retl
+; X86-NOCMOV-NEXT: .LBB10_1:
+; X86-NOCMOV-NEXT: movl $32, %eax
+; X86-NOCMOV-NEXT: xorl $31, %eax
+; X86-NOCMOV-NEXT: retl
+;
+; X86-CMOV-LABEL: ctlz_bsr_zero_test:
+; X86-CMOV: # %bb.0:
+; X86-CMOV-NEXT: bsrl {{[0-9]+}}(%esp), %ecx
+; X86-CMOV-NEXT: movl $63, %eax
+; X86-CMOV-NEXT: cmovnel %ecx, %eax
+; X86-CMOV-NEXT: retl
;
; X64-LABEL: ctlz_bsr_zero_test:
; X64: # %bb.0:
-; X64-NEXT: testl %edi, %edi
-; X64-NEXT: je .LBB10_1
-; X64-NEXT: # %bb.2: # %cond.false
-; X64-NEXT: bsrl %edi, %eax
-; X64-NEXT: xorl $31, %eax
-; X64-NEXT: xorl $31, %eax
-; X64-NEXT: retq
-; X64-NEXT: .LBB10_1:
-; X64-NEXT: movl $32, %eax
-; X64-NEXT: xorl $31, %eax
+; X64-NEXT: bsrl %edi, %ecx
+; X64-NEXT: movl $63, %eax
+; X64-NEXT: cmovnel %ecx, %eax
; X64-NEXT: retq
;
; X86-CLZ-LABEL: ctlz_bsr_zero_test:
@@ -945,38 +954,39 @@ define i8 @ctlz_xor7_i8_true(i8 %x) {
}
define i8 @ctlz_xor7_i8_false(i8 %x) {
-; X86-LABEL: ctlz_xor7_i8_false:
-; X86: # %bb.0:
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: testb %al, %al
-; X86-NEXT: je .LBB16_1
-; X86-NEXT: # %bb.2: # %cond.false
-; X86-NEXT: movzbl %al, %eax
-; X86-NEXT: bsrl %eax, %eax
-; X86-NEXT: xorl $7, %eax
-; X86-NEXT: xorb $7, %al
-; X86-NEXT: # kill: def $al killed $al killed $eax
-; X86-NEXT: retl
-; X86-NEXT: .LBB16_1:
-; X86-NEXT: movb $8, %al
-; X86-NEXT: xorb $7, %al
-; X86-NEXT: # kill: def $al killed $al killed $eax
-; X86-NEXT: retl
+; X86-NOCMOV-LABEL: ctlz_xor7_i8_false:
+; X86-NOCMOV: # %bb.0:
+; X86-NOCMOV-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X86-NOCMOV-NEXT: testb %al, %al
+; X86-NOCMOV-NEXT: je .LBB16_1
+; X86-NOCMOV-NEXT: # %bb.2: # %cond.false
+; X86-NOCMOV-NEXT: movzbl %al, %eax
+; X86-NOCMOV-NEXT: bsrl %eax, %eax
+; X86-NOCMOV-NEXT: xorl $7, %eax
+; X86-NOCMOV-NEXT: xorb $7, %al
+; X86-NOCMOV-NEXT: # kill: def $al killed $al killed $eax
+; X86-NOCMOV-NEXT: retl
+; X86-NOCMOV-NEXT: .LBB16_1:
+; X86-NOCMOV-NEXT: movb $8, %al
+; X86-NOCMOV-NEXT: xorb $7, %al
+; X86-NOCMOV-NEXT: # kill: def $al killed $al killed $eax
+; X86-NOCMOV-NEXT: retl
+;
+; X86-CMOV-LABEL: ctlz_xor7_i8_false:
+; X86-CMOV: # %bb.0:
+; X86-CMOV-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X86-CMOV-NEXT: bsrl %eax, %ecx
+; X86-CMOV-NEXT: movl $15, %eax
+; X86-CMOV-NEXT: cmovnel %ecx, %eax
+; X86-CMOV-NEXT: # kill: def $al killed $al killed $eax
+; X86-CMOV-NEXT: retl
;
; X64-LABEL: ctlz_xor7_i8_false:
; X64: # %bb.0:
-; X64-NEXT: testb %dil, %dil
-; X64-NEXT: je .LBB16_1
-; X64-NEXT: # %bb.2: # %cond.false
; X64-NEXT: movzbl %dil, %eax
-; X64-NEXT: bsrl %eax, %eax
-; X64-NEXT: xorl $7, %eax
-; X64-NEXT: xorb $7, %al
-; X64-NEXT: # kill: def $al killed $al killed $eax
-; X64-NEXT: retq
-; X64-NEXT: .LBB16_1:
-; X64-NEXT: movb $8, %al
-; X64-NEXT: xorb $7, %al
+; X64-NEXT: bsrl %eax, %ecx
+; X64-NEXT: movl $15, %eax
+; X64-NEXT: cmovnel %ecx, %eax
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
;
@@ -1060,33 +1070,33 @@ define i16 @ctlz_xor15_i16_true(i16 %x) {
}
define i32 @ctlz_xor31_i32_false(i32 %x) {
-; X86-LABEL: ctlz_xor31_i32_false:
-; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: je .LBB18_1
-; X86-NEXT: # %bb.2: # %cond.false
-; X86-NEXT: bsrl %eax, %eax
-; X86-NEXT: xorl $31, %eax
-; X86-NEXT: xorl $31, %eax
-; X86-NEXT: retl
-; X86-NEXT: .LBB18_1:
-; X86-NEXT: movl $32, %eax
-; X86-NEXT: xorl $31, %eax
-; X86-NEXT: retl
+; X86-NOCMOV-LABEL: ctlz_xor31_i32_false:
+; X86-NOCMOV: # %bb.0:
+; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NOCMOV-NEXT: testl %eax, %eax
+; X86-NOCMOV-NEXT: je .LBB18_1
+; X86-NOCMOV-NEXT: # %bb.2: # %cond.false
+; X86-NOCMOV-NEXT: bsrl %eax, %eax
+; X86-NOCMOV-NEXT: xorl $31, %eax
+; X86-NOCMOV-NEXT: xorl $31, %eax
+; X86-NOCMOV-NEXT: retl
+; X86-NOCMOV-NEXT: .LBB18_1:
+; X86-NOCMOV-NEXT: movl $32, %eax
+; X86-NOCMOV-NEXT: xorl $31, %eax
+; X86-NOCMOV-NEXT: retl
+;
+; X86-CMOV-LABEL: ctlz_xor31_i32_false:
+; X86-CMOV: # %bb.0:
+; X86-CMOV-NEXT: bsrl {{[0-9]+}}(%esp), %ecx
+; X86-CMOV-NEXT: movl $63, %eax
+; X86-CMOV-NEXT: cmovnel %ecx, %eax
+; X86-CMOV-NEXT: retl
;
; X64-LABEL: ctlz_xor31_i32_false:
; X64: # %bb.0:
-; X64-NEXT: testl %edi, %edi
-; X64-NEXT: je .LBB18_1
-; X64-NEXT: # %bb.2: # %cond.false
-; X64-NEXT: bsrl %edi, %eax
-; X64-NEXT: xorl $31, %eax
-; X64-NEXT: xorl $31, %eax
-; X64-NEXT: retq
-; X64-NEXT: .LBB18_1:
-; X64-NEXT: movl $32, %eax
-; X64-NEXT: xorl $31, %eax
+; X64-NEXT: bsrl %edi, %ecx
+; X64-NEXT: movl $63, %eax
+; X64-NEXT: cmovnel %ecx, %eax
; X64-NEXT: retq
;
; X86-CLZ-LABEL: ctlz_xor31_i32_false:
diff --git a/llvm/test/CodeGen/X86/cttz.ll b/llvm/test/CodeGen/X86/cttz.ll
index 6eb748a1afbab5..daa0e1e89c0cd7 100644
--- a/llvm/test/CodeGen/X86/cttz.ll
+++ b/llvm/test/CodeGen/X86/cttz.ll
@@ -303,27 +303,30 @@ define i16 @cttz_i16_zero_test(i16 %n) {
; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
define i32 @cttz_i32_zero_test(i32 %n) {
-; X86-LABEL: cttz_i32_zero_test:
-; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: je .LBB6_1
-; X86-NEXT: # %bb.2: # %cond.false
-; X86-NEXT: rep bsfl %eax, %eax
-; X86-NEXT: retl
-; X86-NEXT: .LBB6_1:
-; X86-NEXT: movl $32, %eax
-; X86-NEXT: retl
+; X86-NOCMOV-LABEL: cttz_i32_zero_test:
+; X86-NOCMOV: # %bb.0:
+; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NOCMOV-NEXT: testl %eax, %eax
+; X86-NOCMOV-NEXT: je .LBB6_1
+; X86-NOCMOV-NEXT: # %bb.2: # %cond.false
+; X86-NOCMOV-NEXT: rep bsfl %eax, %eax
+; X86-NOCMOV-NEXT: retl
+; X86-NOCMOV-NEXT: .LBB6_1:
+; X86-NOCMOV-NEXT: movl $32, %eax
+; X86-NOCMOV-NEXT: retl
+;
+; X86-CMOV-LABEL: cttz_i32_zero_test:
+; X86-CMOV: # %bb.0:
+; X86-CMOV-NEXT: bsfl {{[0-9]+}}(%esp), %ecx
+; X86-CMOV-NEXT: movl $32, %eax
+; X86-CMOV-NEXT: cmovnel %ecx, %eax
+; X86-CMOV-NEXT: retl
;
; X64-LABEL: cttz_i32_zero_test:
; X64: # %bb.0:
-; X64-NEXT: testl %edi, %edi
-; X64-NEXT: je .LBB6_1
-; X64-NEXT: # %bb.2: # %cond.false
-; X64-NEXT: rep bsfl %edi, %eax
-; X64-NEXT: retq
-; X64-NEXT: .LBB6_1:
+; X64-NEXT: bsfl %edi, %ecx
; X64-NEXT: movl $32, %eax
+; X64-NEXT: cmovnel %ecx, %eax
; X64-NEXT: retq
;
; X86-CLZ-LABEL: cttz_i32_zero_test:
@@ -388,13 +391,9 @@ define i64 @cttz_i64_zero_test(i64 %n) {
;
; X64-LABEL: cttz_i64_zero_test:
; X64: # %bb.0:
-; X64-NEXT: testq %rdi, %rdi
-; X64-NEXT: je .LBB7_1
-; X64-NEXT: # %bb.2: # %cond.false
-; X64-NEXT: rep bsfq %rdi, %rax
-; X64-NEXT: retq
-; X64-NEXT: .LBB7_1:
+; X64-NEXT: bsfq %rdi, %rcx
; X64-NEXT: movl $64, %eax
+; X64-NEXT: cmovneq %rcx, %rax
; X64-NEXT: retq
;
; X86-CLZ-LABEL: cttz_i64_zero_test:
diff --git a/llvm/test/CodeGen/X86/known-never-zero.ll b/llvm/test/CodeGen/X86/known-never-zero.ll
index df11a44626e381..52a8853a00bf54 100644
--- a/llvm/test/CodeGen/X86/known-never-zero.ll
+++ b/llvm/test/CodeGen/X86/known-never-zero.ll
@@ -44,23 +44,17 @@ define i32 @or_maybe_zero(i32 %x, i32 %y) {
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: je .LBB1_1
-; X86-NEXT: # %bb.2: # %cond.false
-; X86-NEXT: rep bsfl %eax, %eax
-; X86-NEXT: retl
-; X86-NEXT: .LBB1_1:
+; X86-NEXT: bsfl %eax, %ecx
; X86-NEXT: movl $32, %eax
+; X86-NEXT: cmovnel %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: or_maybe_zero:
; X64: # %bb.0:
; X64-NEXT: orl %esi, %edi
-; X64-NEXT: je .LBB1_1
-; X64-NEXT: # %bb.2: # %cond.false
-; X64-NEXT: rep bsfl %edi, %eax
-; X64-NEXT: retq
-; X64-NEXT: .LBB1_1:
+; X64-NEXT: bsfl %edi, %ecx
; X64-NEXT: movl $32, %eax
+; X64-NEXT: cmovnel %ecx, %eax
; X64-NEXT: retq
%z = or i32 %x, %y
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
@@ -95,18 +89,14 @@ define i32 @select_known_nonzero(i1 %c, i32 %x) {
define i32 @select_maybe_zero(i1 %c, i32 %x) {
; X86-LABEL: select_maybe_zero:
; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: orl $1, %ecx
-; X86-NEXT: xorl %eax, %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: orl $1, %eax
+; X86-NEXT: xorl %ecx, %ecx
; X86-NEXT: testb $1, {{[0-9]+}}(%esp)
-; X86-NEXT: cmovnel %ecx, %eax
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: je .LBB3_1
-; X86-NEXT: # %bb.2: # %cond.false
-; X86-NEXT: rep bsfl %eax, %eax
-; X86-NEXT: retl
-; X86-NEXT: .LBB3_1:
+; X86-NEXT: cmovnel %eax, %ecx
+; X86-NEXT: bsfl %ecx, %ecx
; X86-NEXT: movl $32, %eax
+; X86-NEXT: cmovnel %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: select_maybe_zero:
@@ -115,13 +105,9 @@ define i32 @select_maybe_zero(i1 %c, i32 %x) {
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: testb $1, %dil
; X64-NEXT: cmovnel %esi, %eax
-; X64-NEXT: testl %eax, %eax
-; X64-NEXT: je .LBB3_1
-; X64-NEXT: # %bb.2: # %cond.false
-; X64-NEXT: rep bsfl %eax, %eax
-; X64-NEXT: retq
-; X64-NEXT: .LBB3_1:
+; X64-NEXT: bsfl %eax, %ecx
; X64-NEXT: movl $32, %eax
+; X64-NEXT: cmovnel %ecx, %eax
; X64-NEXT: retq
%y = or i32 %x, 1
%z = select i1 %c, i32 %y, i32 0
@@ -205,13 +191,9 @@ define i32 @shl_maybe_zero(i32 %x, i32 %y) {
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: shll %cl, %eax
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: je .LBB7_1
-; X86-NEXT: # %bb.2: # %cond.false
-; X86-NEXT: rep bsfl %eax, %eax
-; X86-NEXT: retl
-; X86-NEXT: .LBB7_1:
+; X86-NEXT: bsfl %eax, %ecx
; X86-NEXT: movl $32, %eax
+; X86-NEXT: cmovnel %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: shl_maybe_zero:
@@ -219,13 +201,9 @@ define i32 @shl_maybe_zero(i32 %x, i32 %y) {
; X64-NEXT: movl %edi, %ecx
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NEXT: shll %cl, %esi
-; X64-NEXT: testl %esi, %esi
-; X64-NEXT: je .LBB7_1
-; X64-NEXT: # %bb.2: # %cond.false
-; X64-NEXT: rep bsfl %esi, %eax
-; X64-NEXT: retq
-; X64-NEXT: .LBB7_1:
+; X64-NEXT: bsfl %esi, %ecx
; X64-NEXT: movl $32, %eax
+; X64-NEXT: cmovnel %ecx, %eax
; X64-NEXT: retq
%z = shl nuw nsw i32 %y, %x
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
@@ -257,17 +235,13 @@ define i32 @uaddsat_known_nonzero(i32 %x) {
define i32 @uaddsat_maybe_zero(i32 %x, i32 %y) {
; X86-LABEL: uaddsat_maybe_zero:
; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: addl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl $-1, %eax
-; X86-NEXT: cmovael %ecx, %eax
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: je .LBB9_1
-; X86-NEXT: # %bb.2: # %cond.false
-; X86-NEXT: rep bsfl %eax, %eax
-; X86-NEXT: retl
-; X86-NEXT: .LBB9_1:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: addl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl $-1, %ecx
+; X86-NEXT: cmovael %eax, %ecx
+; X86-NEXT: bsfl %ecx, %ecx
; X86-NEXT: movl $32, %eax
+; X86-NEXT: cmovnel %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: uaddsat_maybe_zero:
@@ -275,13 +249,9 @@ define i32 @uaddsat_maybe_zero(i32 %x, i32 %y) {
; X64-NEXT: addl %esi, %edi
; X64-NEXT: movl $-1, %eax
; X64-NEXT: cmovael %edi, %eax
-; X64-NEXT: testl %eax, %eax
-; X64-NEXT: je .LBB9_1
-; X64-NEXT: # %bb.2: # %cond.false
-; X64-NEXT: rep bsfl %eax, %eax
-; X64-NEXT: retq
-; X64-NEXT: .LBB9_1:
+; X64-NEXT: bsfl %eax, %ecx
; X64-NEXT: movl $32, %eax
+; X64-NEXT: cmovnel %ecx, %eax
; X64-NEXT: retq
%z = call i32 @llvm.uadd.sat.i32(i32 %x, i32 %y)
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
@@ -323,26 +293,18 @@ define i32 @umax_maybe_zero(i32 %x, i32 %y) {
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: cmpl %eax, %ecx
; X86-NEXT: cmoval %ecx, %eax
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: je .LBB11_1
-; X86-NEXT: # %bb.2: # %cond.false
-; X86-NEXT: rep bsfl %eax, %eax
-; X86-NEXT: retl
-; X86-NEXT: .LBB11_1:
+; X86-NEXT: bsfl %eax, %ecx
; X86-NEXT: movl $32, %eax
+; X86-NEXT: cmovnel %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: umax_maybe_zero:
; X64: # %bb.0:
; X64-NEXT: cmpl %esi, %edi
; X64-NEXT: cmoval %edi, %esi
-; X64-NEXT: testl %esi, %esi
-; X64-NEXT: je .LBB11_1
-; X64-NEXT: # %bb.2: # %cond.false
-; X64-NEXT: rep bsfl %esi, %eax
-; X64-NEXT: retq
-; X64-NEXT: .LBB11_1:
+; X64-NEXT: bsfl %esi, %ecx
; X64-NEXT: movl $32, %eax
+; X64-NEXT: cmovnel %ecx, %eax
; X64-NEXT: retq
%z = call i32 @llvm.umax.i32(i32 %x, i32 %y)
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
@@ -383,17 +345,13 @@ define i32 @umin_known_nonzero(i32 %xx, i32 %yy) {
define i32 @umin_maybe_zero(i32 %x, i32 %y) {
; X86-LABEL: umin_maybe_zero:
; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: cmpl $54, %ecx
-; X86-NEXT: movl $54, %eax
-; X86-NEXT: cmovbl %ecx, %eax
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: je .LBB13_1
-; X86-NEXT: # %bb.2: # %cond.false
-; X86-NEXT: rep bsfl %eax, %eax
-; X86-NEXT: retl
-; X86-NEXT: .LBB13_1:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: cmpl $54, %eax
+; X86-NEXT: movl $54, %ecx
+; X86-NEXT: cmovbl %eax, %ecx
+; X86-NEXT: bsfl %ecx, %ecx
; X86-NEXT: movl $32, %eax
+; X86-NEXT: cmovnel %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: umin_maybe_zero:
@@ -401,13 +359,9 @@ define i32 @umin_maybe_zero(i32 %x, i32 %y) {
; X64-NEXT: cmpl $54, %edi
; X64-NEXT: movl $54, %eax
; X64-NEXT: cmovbl %edi, %eax
-; X64-NEXT: testl %eax, %eax
-; X64-NEXT: je .LBB13_1
-; X64-NEXT: # %bb.2: # %cond.false
-; X64-NEXT: rep bsfl %eax, %eax
-; X64-NEXT: retq
-; X64-NEXT: .LBB13_1:
+; X64-NEXT: bsfl %eax, %ecx
; X64-NEXT: movl $32, %eax
+; X64-NEXT: cmovnel %ecx, %eax
; X64-NEXT: retq
%z = call i32 @llvm.umin.i32(i32 %x, i32 54)
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
@@ -504,17 +458,13 @@ define <4 x i32> @smin_known_zero_vec(<4 x i32> %x, <4 x i32> %y) {
define i32 @smin_maybe_zero(i32 %x, i32 %y) {
; X86-LABEL: smin_maybe_zero:
; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: cmpl $54, %ecx
-; X86-NEXT: movl $54, %eax
-; X86-NEXT: cmovll %ecx, %eax
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: je .LBB17_1
-; X86-NEXT: # %bb.2: # %cond.false
-; X86-NEXT: rep bsfl %eax, %eax
-; X86-NEXT: retl
-; X86-NEXT: .LBB17_1:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: cmpl $54, %eax
+; X86-NEXT: movl $54, %ecx
+; X86-NEXT: cmovll %eax, %ecx
+; X86-NEXT: bsfl %ecx, %ecx
; X86-NEXT: movl $32, %eax
+; X86-NEXT: cmovnel %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: smin_maybe_zero:
@@ -522,13 +472,9 @@ define i32 @smin_maybe_zero(i32 %x, i32 %y) {
; X64-NEXT: cmpl $54, %edi
; X64-NEXT: movl $54, %eax
; X64-NEXT: cmovll %edi, %eax
-; X64-NEXT: testl %eax, %eax
-; X64-NEXT: je .LBB17_1
-; X64-NEXT: # %bb.2: # %cond.false
-; X64-NEXT: rep bsfl %eax, %eax
-; X64-NEXT: retq
-; X64-NEXT: .LBB17_1:
+; X64-NEXT: bsfl %eax, %ecx
; X64-NEXT: movl $32, %eax
+; X64-NEXT: cmovnel %ecx, %eax
; X64-NEXT: retq
%z = call i32 @llvm.smin.i32(i32 %x, i32 54)
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
@@ -625,17 +571,13 @@ define <4 x i32> @smax_known_zero_vec(<4 x i32> %x, <4 x i32> %y) {
define i32 @smax_known_zero(i32 %x, i32 %y) {
; X86-LABEL: smax_known_zero:
; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: testl %ecx, %ecx
-; X86-NEXT: movl $-1, %eax
-; X86-NEXT: cmovnsl %ecx, %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: testl %eax, %eax
-; X86-NEXT: je .LBB21_1
-; X86-NEXT: # %bb.2: # %cond.false
-; X86-NEXT: rep bsfl %eax, %eax
-; X86-NEXT: retl
-; X86-NEXT: .LBB21_1:
+; X86-NEXT: movl $-1, %ecx
+; X86-NEXT: cmovnsl %eax, %ecx
+; X86-NEXT: bsfl %ecx, %ecx
; X86-NEXT: movl $32, %eax
+; X86-NEXT: cmovnel %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: smax_known_zero:
@@ -643,13 +585,9 @@ define i32 @smax_known_zero(i32 %x, i32 %y) {
; X64-NEXT: testl %edi, %edi
; X64-NEXT: movl $-1, %eax
; X64-NEXT: cmovnsl %edi, %eax
-; X64-NEXT: testl %eax, %eax
-; X64-NEXT: je .LBB21_1
-; X64-NEXT: # %bb.2: # %cond.false
-; X64-NEXT: rep bsfl %eax, %eax
-; X64-NEXT: retq
-; X64-NEXT: .LBB21_1:
+; X64-NEXT: bsfl %eax, %ecx
; X64-NEXT: movl $32, %eax
+; X64-NEXT: cmovnel %ecx, %eax
; X64-NEXT: retq
%z = call i32 @llvm.smax.i32(i32 %x, i32 -1)
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
@@ -663,29 +601,16 @@ define i32 @rotr_known_nonzero(i32 %xx, i32 %y) {
; X86-NEXT: movl $256, %eax # imm = 0x100
; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
; X86-NEXT: rorl %cl, %eax
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: je .LBB22_1
-; X86-NEXT: # %bb.2: # %cond.false
; X86-NEXT: rep bsfl %eax, %eax
; X86-NEXT: retl
-; X86-NEXT: .LBB22_1:
-; X86-NEXT: movl $32, %eax
-; X86-NEXT: retl
;
; X64-LABEL: rotr_known_nonzero:
; X64: # %bb.0:
; X64-NEXT: movl %esi, %ecx
; X64-NEXT: orl $256, %edi # imm = 0x100
-; X64-NEXT: movl %edi, %eax
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
-; X64-NEXT: rorl %cl, %eax
-; X64-NEXT: testl %edi, %edi
-; X64-NEXT: je .LBB22_1
-; X64-NEXT: # %bb.2: # %cond.false
-; X64-NEXT: rep bsfl %eax, %eax
-; X64-NEXT: retq
-; X64-NEXT: .LBB22_1:
-; X64-NEXT: movl $32, %eax
+; X64-NEXT: rorl %cl, %edi
+; X64-NEXT: rep bsfl %edi, %eax
; X64-NEXT: retq
%x = or i32 %xx, 256
%shr = lshr i32 %x, %y
@@ -702,28 +627,19 @@ define i32 @rotr_maybe_zero(i32 %x, i32 %y) {
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: rorl %cl, %eax
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: je .LBB23_1
-; X86-NEXT: # %bb.2: # %cond.false
-; X86-NEXT: rep bsfl %eax, %eax
-; X86-NEXT: retl
-; X86-NEXT: .LBB23_1:
+; X86-NEXT: bsfl %eax, %ecx
; X86-NEXT: movl $32, %eax
+; X86-NEXT: cmovnel %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: rotr_maybe_zero:
; X64: # %bb.0:
; X64-NEXT: movl %esi, %ecx
-; X64-NEXT: movl %edi, %eax
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
-; X64-NEXT: rorl %cl, %eax
-; X64-NEXT: testl %edi, %edi
-; X64-NEXT: je .LBB23_1
-; X64-NEXT: # %bb.2: # %cond.false
-; X64-NEXT: rep bsfl %eax, %eax
-; X64-NEXT: retq
-; X64-NEXT: .LBB23_1:
+; X64-NEXT: rorl %cl, %edi
+; X64-NEXT: bsfl %edi, %ecx
; X64-NEXT: movl $32, %eax
+; X64-NEXT: cmovnel %ecx, %eax
; X64-NEXT: retq
%shr = lshr i32 %x, %y
%sub = sub i32 32, %y
@@ -763,28 +679,19 @@ define i32 @rotr_with_fshr_maybe_zero(i32 %x, i32 %y) {
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: rorl %cl, %eax
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: je .LBB25_1
-; X86-NEXT: # %bb.2: # %cond.false
-; X86-NEXT: rep bsfl %eax, %eax
-; X86-NEXT: retl
-; X86-NEXT: .LBB25_1:
+; X86-NEXT: bsfl %eax, %ecx
; X86-NEXT: movl $32, %eax
+; X86-NEXT: cmovnel %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: rotr_with_fshr_maybe_zero:
; X64: # %bb.0:
; X64-NEXT: movl %esi, %ecx
-; X64-NEXT: movl %edi, %eax
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
-; X64-NEXT: rorl %cl, %eax
-; X64-NEXT: testl %edi, %edi
-; X64-NEXT: je .LBB25_1
-; X64-NEXT: # %bb.2: # %cond.false
-; X64-NEXT: rep bsfl %eax, %eax
-; X64-NEXT: retq
-; X64-NEXT: .LBB25_1:
+; X64-NEXT: rorl %cl, %edi
+; X64-NEXT: bsfl %edi, %ecx
; X64-NEXT: movl $32, %eax
+; X64-NEXT: cmovnel %ecx, %eax
; X64-NEXT: retq
%z = call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 %y)
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
@@ -798,29 +705,16 @@ define i32 @rotl_known_nonzero(i32 %xx, i32 %y) {
; X86-NEXT: movl $256, %eax # imm = 0x100
; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
; X86-NEXT: roll %cl, %eax
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: je .LBB26_1
-; X86-NEXT: # %bb.2: # %cond.false
; X86-NEXT: rep bsfl %eax, %eax
; X86-NEXT: retl
-; X86-NEXT: .LBB26_1:
-; X86-NEXT: movl $32, %eax
-; X86-NEXT: retl
;
; X64-LABEL: rotl_known_nonzero:
; X64: # %bb.0:
; X64-NEXT: movl %esi, %ecx
; X64-NEXT: orl $256, %edi # imm = 0x100
-; X64-NEXT: movl %edi, %eax
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
-; X64-NEXT: roll %cl, %eax
-; X64-NEXT: testl %edi, %edi
-; X64-NEXT: je .LBB26_1
-; X64-NEXT: # %bb.2: # %cond.false
-; X64-NEXT: rep bsfl %eax, %eax
-; X64-NEXT: retq
-; X64-NEXT: .LBB26_1:
-; X64-NEXT: movl $32, %eax
+; X64-NEXT: roll %cl, %edi
+; X64-NEXT: rep bsfl %edi, %eax
; X64-NEXT: retq
%x = or i32 %xx, 256
%shl = shl i32 %x, %y
@@ -837,28 +731,19 @@ define i32 @rotl_maybe_zero(i32 %x, i32 %y) {
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: roll %cl, %eax
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: je .LBB27_1
-; X86-NEXT: # %bb.2: # %cond.false
-; X86-NEXT: rep bsfl %eax, %eax
-; X86-NEXT: retl
-; X86-NEXT: .LBB27_1:
+; X86-NEXT: bsfl %eax, %ecx
; X86-NEXT: movl $32, %eax
+; X86-NEXT: cmovnel %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: rotl_maybe_zero:
; X64: # %bb.0:
; X64-NEXT: movl %esi, %ecx
-; X64-NEXT: movl %edi, %eax
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
-; X64-NEXT: roll %cl, %eax
-; X64-NEXT: testl %edi, %edi
-; X64-NEXT: je .LBB27_1
-; X64-NEXT: # %bb.2: # %cond.false
-; X64-NEXT: rep bsfl %eax, %eax
-; X64-NEXT: retq
-; X64-NEXT: .LBB27_1:
+; X64-NEXT: roll %cl, %edi
+; X64-NEXT: bsfl %edi, %ecx
; X64-NEXT: movl $32, %eax
+; X64-NEXT: cmovnel %ecx, %eax
; X64-NEXT: retq
%shl = shl i32 %x, %y
%sub = sub i32 32, %y
@@ -898,28 +783,19 @@ define i32 @rotl_with_fshl_maybe_zero(i32 %x, i32 %y) {
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: roll %cl, %eax
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: je .LBB29_1
-; X86-NEXT: # %bb.2: # %cond.false
-; X86-NEXT: rep bsfl %eax, %eax
-; X86-NEXT: retl
-; X86-NEXT: .LBB29_1:
+; X86-NEXT: bsfl %eax, %ecx
; X86-NEXT: movl $32, %eax
+; X86-NEXT: cmovnel %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: rotl_with_fshl_maybe_zero:
; X64: # %bb.0:
; X64-NEXT: movl %esi, %ecx
-; X64-NEXT: movl %edi, %eax
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
-; X64-NEXT: roll %cl, %eax
-; X64-NEXT: testl %edi, %edi
-; X64-NEXT: je .LBB29_1
-; X64-NEXT: # %bb.2: # %cond.false
-; X64-NEXT: rep bsfl %eax, %eax
-; X64-NEXT: retq
-; X64-NEXT: .LBB29_1:
+; X64-NEXT: roll %cl, %edi
+; X64-NEXT: bsfl %edi, %ecx
; X64-NEXT: movl $32, %eax
+; X64-NEXT: cmovnel %ecx, %eax
; X64-NEXT: retq
%z = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 %y)
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
@@ -978,13 +854,9 @@ define i32 @sra_maybe_zero(i32 %x, i32 %y) {
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: sarl %cl, %eax
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: je .LBB32_1
-; X86-NEXT: # %bb.2: # %cond.false
-; X86-NEXT: rep bsfl %eax, %eax
-; X86-NEXT: retl
-; X86-NEXT: .LBB32_1:
+; X86-NEXT: bsfl %eax, %ecx
; X86-NEXT: movl $32, %eax
+; X86-NEXT: cmovnel %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: sra_maybe_zero:
@@ -992,13 +864,9 @@ define i32 @sra_maybe_zero(i32 %x, i32 %y) {
; X64-NEXT: movl %edi, %ecx
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NEXT: sarl %cl, %esi
-; X64-NEXT: testl %esi, %esi
-; X64-NEXT: je .LBB32_1
-; X64-NEXT: # %bb.2: # %cond.false
-; X64-NEXT: rep bsfl %esi, %eax
-; X64-NEXT: retq
-; X64-NEXT: .LBB32_1:
+; X64-NEXT: bsfl %esi, %ecx
; X64-NEXT: movl $32, %eax
+; X64-NEXT: cmovnel %ecx, %eax
; X64-NEXT: retq
%z = ashr exact i32 %y, %x
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
@@ -1057,13 +925,9 @@ define i32 @srl_maybe_zero(i32 %x, i32 %y) {
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: shrl %cl, %eax
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: je .LBB35_1
-; X86-NEXT: # %bb.2: # %cond.false
-; X86-NEXT: rep bsfl %eax, %eax
-; X86-NEXT: retl
-; X86-NEXT: .LBB35_1:
+; X86-NEXT: bsfl %eax, %ecx
; X86-NEXT: movl $32, %eax
+; X86-NEXT: cmovnel %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: srl_maybe_zero:
@@ -1071,13 +935,9 @@ define i32 @srl_maybe_zero(i32 %x, i32 %y) {
; X64-NEXT: movl %edi, %ecx
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NEXT: shrl %cl, %esi
-; X64-NEXT: testl %esi, %esi
-; X64-NEXT: je .LBB35_1
-; X64-NEXT: # %bb.2: # %cond.false
-; X64-NEXT: rep bsfl %esi, %eax
-; X64-NEXT: retq
-; X64-NEXT: .LBB35_1:
+; X64-NEXT: bsfl %esi, %ecx
; X64-NEXT: movl $32, %eax
+; X64-NEXT: cmovnel %ecx, %eax
; X64-NEXT: retq
%z = lshr exact i32 %y, %x
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
@@ -1114,13 +974,9 @@ define i32 @udiv_maybe_zero(i32 %x, i32 %y) {
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: xorl %edx, %edx
; X86-NEXT: divl {{[0-9]+}}(%esp)
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: je .LBB37_1
-; X86-NEXT: # %bb.2: # %cond.false
-; X86-NEXT: rep bsfl %eax, %eax
-; X86-NEXT: retl
-; X86-NEXT: .LBB37_1:
+; X86-NEXT: bsfl %eax, %ecx
; X86-NEXT: movl $32, %eax
+; X86-NEXT: cmovnel %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: udiv_maybe_zero:
@@ -1128,13 +984,9 @@ define i32 @udiv_maybe_zero(i32 %x, i32 %y) {
; X64-NEXT: movl %edi, %eax
; X64-NEXT: xorl %edx, %edx
; X64-NEXT: divl %esi
-; X64-NEXT: testl %eax, %eax
-; X64-NEXT: je .LBB37_1
-; X64-NEXT: # %bb.2: # %cond.false
-; X64-NEXT: rep bsfl %eax, %eax
-; X64-NEXT: retq
-; X64-NEXT: .LBB37_1:
+; X64-NEXT: bsfl %eax, %ecx
; X64-NEXT: movl $32, %eax
+; X64-NEXT: cmovnel %ecx, %eax
; X64-NEXT: retq
%z = udiv exact i32 %x, %y
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
@@ -1171,13 +1023,9 @@ define i32 @sdiv_maybe_zero(i32 %x, i32 %y) {
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: cltd
; X86-NEXT: idivl {{[0-9]+}}(%esp)
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: je .LBB39_1
-; X86-NEXT: # %bb.2: # %cond.false
-; X86-NEXT: rep bsfl %eax, %eax
-; X86-NEXT: retl
-; X86-NEXT: .LBB39_1:
+; X86-NEXT: bsfl %eax, %ecx
; X86-NEXT: movl $32, %eax
+; X86-NEXT: cmovnel %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: sdiv_maybe_zero:
@@ -1185,13 +1033,9 @@ define i32 @sdiv_maybe_zero(i32 %x, i32 %y) {
; X64-NEXT: movl %edi, %eax
; X64-NEXT: cltd
; X64-NEXT: idivl %esi
-; X64-NEXT: testl %eax, %eax
-; X64-NEXT: je .LBB39_1
-; X64-NEXT: # %bb.2: # %cond.false
-; X64-NEXT: rep bsfl %eax, %eax
-; X64-NEXT: retq
-; X64-NEXT: .LBB39_1:
+; X64-NEXT: bsfl %eax, %ecx
; X64-NEXT: movl $32, %eax
+; X64-NEXT: cmovnel %ecx, %eax
; X64-NEXT: retq
%z = sdiv exact i32 %x, %y
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
@@ -1225,24 +1069,18 @@ define i32 @add_maybe_zero(i32 %xx, i32 %y) {
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: orl $1, %eax
; X86-NEXT: addl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: je .LBB41_1
-; X86-NEXT: # %bb.2: # %cond.false
-; X86-NEXT: rep bsfl %eax, %eax
-; X86-NEXT: retl
-; X86-NEXT: .LBB41_1:
+; X86-NEXT: bsfl %eax, %ecx
; X86-NEXT: movl $32, %eax
+; X86-NEXT: cmovnel %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: add_maybe_zero:
; X64: # %bb.0:
; X64-NEXT: orl $1, %edi
; X64-NEXT: addl %esi, %edi
-; X64-NEXT: je .LBB41_1
-; X64-NEXT: # %bb.2: # %cond.false
-; X64-NEXT: rep bsfl %edi, %eax
-; X64-NEXT: retq
-; X64-NEXT: .LBB41_1:
+; X64-NEXT: bsfl %edi, %ecx
; X64-NEXT: movl $32, %eax
+; X64-NEXT: cmovnel %ecx, %eax
; X64-NEXT: retq
%x = or i32 %xx, 1
%z = add nsw i32 %x, %y
@@ -1304,16 +1142,13 @@ define i32 @sub_known_nonzero_ne_case(i32 %xx, i32 %yy) {
define i32 @sub_maybe_zero(i32 %x) {
; X86-LABEL: sub_maybe_zero:
; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: orl $64, %eax
-; X86-NEXT: subl %ecx, %eax
-; X86-NEXT: je .LBB44_1
-; X86-NEXT: # %bb.2: # %cond.false
-; X86-NEXT: rep bsfl %eax, %eax
-; X86-NEXT: retl
-; X86-NEXT: .LBB44_1:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: orl $64, %ecx
+; X86-NEXT: subl %eax, %ecx
+; X86-NEXT: bsfl %ecx, %ecx
; X86-NEXT: movl $32, %eax
+; X86-NEXT: cmovnel %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: sub_maybe_zero:
@@ -1321,12 +1156,9 @@ define i32 @sub_maybe_zero(i32 %x) {
; X64-NEXT: movl %edi, %eax
; X64-NEXT: orl $64, %eax
; X64-NEXT: subl %edi, %eax
-; X64-NEXT: je .LBB44_1
-; X64-NEXT: # %bb.2: # %cond.false
-; X64-NEXT: rep bsfl %eax, %eax
-; X64-NEXT: retq
-; X64-NEXT: .LBB44_1:
+; X64-NEXT: bsfl %eax, %ecx
; X64-NEXT: movl $32, %eax
+; X64-NEXT: cmovnel %ecx, %eax
; X64-NEXT: retq
%y = or i32 %x, 64
%z = sub i32 %y, %x
@@ -1337,25 +1169,19 @@ define i32 @sub_maybe_zero(i32 %x) {
define i32 @sub_maybe_zero2(i32 %x) {
; X86-LABEL: sub_maybe_zero2:
; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: negl %eax
-; X86-NEXT: je .LBB45_1
-; X86-NEXT: # %bb.2: # %cond.false
-; X86-NEXT: rep bsfl %eax, %eax
-; X86-NEXT: retl
-; X86-NEXT: .LBB45_1:
+; X86-NEXT: xorl %eax, %eax
+; X86-NEXT: subl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: bsfl %eax, %ecx
; X86-NEXT: movl $32, %eax
+; X86-NEXT: cmovnel %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: sub_maybe_zero2:
; X64: # %bb.0:
; X64-NEXT: negl %edi
-; X64-NEXT: je .LBB45_1
-; X64-NEXT: # %bb.2: # %cond.false
-; X64-NEXT: rep bsfl %edi, %eax
-; X64-NEXT: retq
-; X64-NEXT: .LBB45_1:
+; X64-NEXT: bsfl %edi, %ecx
; X64-NEXT: movl $32, %eax
+; X64-NEXT: cmovnel %ecx, %eax
; X64-NEXT: retq
%z = sub i32 0, %x
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
@@ -1368,26 +1194,18 @@ define i32 @mul_known_nonzero_nsw(i32 %x, i32 %yy) {
; X86-NEXT: movl $256, %eax # imm = 0x100
; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
; X86-NEXT: imull {{[0-9]+}}(%esp), %eax
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: je .LBB46_1
-; X86-NEXT: # %bb.2: # %cond.false
-; X86-NEXT: rep bsfl %eax, %eax
-; X86-NEXT: retl
-; X86-NEXT: .LBB46_1:
+; X86-NEXT: bsfl %eax, %ecx
; X86-NEXT: movl $32, %eax
+; X86-NEXT: cmovnel %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: mul_known_nonzero_nsw:
; X64: # %bb.0:
; X64-NEXT: orl $256, %esi # imm = 0x100
; X64-NEXT: imull %edi, %esi
-; X64-NEXT: testl %esi, %esi
-; X64-NEXT: je .LBB46_1
-; X64-NEXT: # %bb.2: # %cond.false
-; X64-NEXT: rep bsfl %esi, %eax
-; X64-NEXT: retq
-; X64-NEXT: .LBB46_1:
+; X64-NEXT: bsfl %esi, %ecx
; X64-NEXT: movl $32, %eax
+; X64-NEXT: cmovnel %ecx, %eax
; X64-NEXT: retq
%y = or i32 %yy, 256
%z = mul nsw i32 %y, %x
@@ -1401,26 +1219,18 @@ define i32 @mul_known_nonzero_nuw(i32 %x, i32 %yy) {
; X86-NEXT: movl $256, %eax # imm = 0x100
; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
; X86-NEXT: imull {{[0-9]+}}(%esp), %eax
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: je .LBB47_1
-; X86-NEXT: # %bb.2: # %cond.false
-; X86-NEXT: rep bsfl %eax, %eax
-; X86-NEXT: retl
-; X86-NEXT: .LBB47_1:
+; X86-NEXT: bsfl %eax, %ecx
; X86-NEXT: movl $32, %eax
+; X86-NEXT: cmovnel %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: mul_known_nonzero_nuw:
; X64: # %bb.0:
; X64-NEXT: orl $256, %esi # imm = 0x100
; X64-NEXT: imull %edi, %esi
-; X64-NEXT: testl %esi, %esi
-; X64-NEXT: je .LBB47_1
-; X64-NEXT: # %bb.2: # %cond.false
-; X64-NEXT: rep bsfl %esi, %eax
-; X64-NEXT: retq
-; X64-NEXT: .LBB47_1:
+; X64-NEXT: bsfl %esi, %ecx
; X64-NEXT: movl $32, %eax
+; X64-NEXT: cmovnel %ecx, %eax
; X64-NEXT: retq
%y = or i32 %yy, 256
%z = mul nuw i32 %y, %x
@@ -1433,25 +1243,17 @@ define i32 @mul_maybe_zero(i32 %x, i32 %y) {
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: imull {{[0-9]+}}(%esp), %eax
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: je .LBB48_1
-; X86-NEXT: # %bb.2: # %cond.false
-; X86-NEXT: rep bsfl %eax, %eax
-; X86-NEXT: retl
-; X86-NEXT: .LBB48_1:
+; X86-NEXT: bsfl %eax, %ecx
; X86-NEXT: movl $32, %eax
+; X86-NEXT: cmovnel %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: mul_maybe_zero:
; X64: # %bb.0:
; X64-NEXT: imull %esi, %edi
-; X64-NEXT: testl %edi, %edi
-; X64-NEXT: je .LBB48_1
-; X64-NEXT: # %bb.2: # %cond.false
-; X64-NEXT: rep bsfl %edi, %eax
-; X64-NEXT: retq
-; X64-NEXT: .LBB48_1:
+; X64-NEXT: bsfl %edi, %ecx
; X64-NEXT: movl $32, %eax
+; X64-NEXT: cmovnel %ecx, %eax
; X64-NEXT: retq
%z = mul nuw nsw i32 %y, %x
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
@@ -1496,25 +1298,17 @@ define i32 @bitcast_maybe_zero(<2 x i16> %x) {
; X86-LABEL: bitcast_maybe_zero:
; X86: # %bb.0:
; X86-NEXT: movd %xmm0, %eax
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: je .LBB50_1
-; X86-NEXT: # %bb.2: # %cond.false
-; X86-NEXT: rep bsfl %eax, %eax
-; X86-NEXT: retl
-; X86-NEXT: .LBB50_1:
+; X86-NEXT: bsfl %eax, %ecx
; X86-NEXT: movl $32, %eax
+; X86-NEXT: cmovnel %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: bitcast_maybe_zero:
; X64: # %bb.0:
; X64-NEXT: vmovd %xmm0, %eax
-; X64-NEXT: testl %eax, %eax
-; X64-NEXT: je .LBB50_1
-; X64-NEXT: # %bb.2: # %cond.false
-; X64-NEXT: rep bsfl %eax, %eax
-; X64-NEXT: retq
-; X64-NEXT: .LBB50_1:
+; X64-NEXT: bsfl %eax, %ecx
; X64-NEXT: movl $32, %eax
+; X64-NEXT: cmovnel %ecx, %eax
; X64-NEXT: retq
%z = bitcast <2 x i16> %x to i32
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
@@ -1524,27 +1318,17 @@ define i32 @bitcast_maybe_zero(<2 x i16> %x) {
define i32 @bitcast_from_float(float %x) {
; X86-LABEL: bitcast_from_float:
; X86: # %bb.0:
-; X86-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X86-NEXT: movd %xmm0, %eax
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: je .LBB51_1
-; X86-NEXT: # %bb.2: # %cond.false
-; X86-NEXT: rep bsfl %eax, %eax
-; X86-NEXT: retl
-; X86-NEXT: .LBB51_1:
+; X86-NEXT: bsfl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl $32, %eax
+; X86-NEXT: cmovnel %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: bitcast_from_float:
; X64: # %bb.0:
; X64-NEXT: vmovd %xmm0, %eax
-; X64-NEXT: testl %eax, %eax
-; X64-NEXT: je .LBB51_1
-; X64-NEXT: # %bb.2: # %cond.false
-; X64-NEXT: rep bsfl %eax, %eax
-; X64-NEXT: retq
-; X64-NEXT: .LBB51_1:
+; X64-NEXT: bsfl %eax, %ecx
; X64-NEXT: movl $32, %eax
+; X64-NEXT: cmovnel %ecx, %eax
; X64-NEXT: retq
%z = bitcast float %x to i32
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
@@ -1580,26 +1364,17 @@ define i32 @zext_maybe_zero(i16 %x) {
; X86-LABEL: zext_maybe_zero:
; X86: # %bb.0:
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: testw %ax, %ax
-; X86-NEXT: je .LBB53_1
-; X86-NEXT: # %bb.2: # %cond.false
-; X86-NEXT: movzwl %ax, %eax
-; X86-NEXT: rep bsfl %eax, %eax
-; X86-NEXT: retl
-; X86-NEXT: .LBB53_1:
+; X86-NEXT: bsfl %eax, %ecx
; X86-NEXT: movl $32, %eax
+; X86-NEXT: cmovnel %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: zext_maybe_zero:
; X64: # %bb.0:
-; X64-NEXT: testw %di, %di
-; X64-NEXT: je .LBB53_1
-; X64-NEXT: # %bb.2: # %cond.false
; X64-NEXT: movzwl %di, %eax
-; X64-NEXT: rep bsfl %eax, %eax
-; X64-NEXT: retq
-; X64-NEXT: .LBB53_1:
+; X64-NEXT: bsfl %eax, %ecx
; X64-NEXT: movl $32, %eax
+; X64-NEXT: cmovnel %ecx, %eax
; X64-NEXT: retq
%z = zext i16 %x to i32
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
@@ -1635,25 +1410,17 @@ define i32 @sext_maybe_zero(i16 %x) {
; X86-LABEL: sext_maybe_zero:
; X86: # %bb.0:
; X86-NEXT: movswl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: je .LBB55_1
-; X86-NEXT: # %bb.2: # %cond.false
-; X86-NEXT: rep bsfl %eax, %eax
-; X86-NEXT: retl
-; X86-NEXT: .LBB55_1:
+; X86-NEXT: bsfl %eax, %ecx
; X86-NEXT: movl $32, %eax
+; X86-NEXT: cmovnel %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: sext_maybe_zero:
; X64: # %bb.0:
-; X64-NEXT: testw %di, %di
-; X64-NEXT: je .LBB55_1
-; X64-NEXT: # %bb.2: # %cond.false
; X64-NEXT: movswl %di, %eax
-; X64-NEXT: rep bsfl %eax, %eax
-; X64-NEXT: retq
-; X64-NEXT: .LBB55_1:
+; X64-NEXT: bsfl %eax, %ecx
; X64-NEXT: movl $32, %eax
+; X64-NEXT: cmovnel %ecx, %eax
; X64-NEXT: retq
%z = sext i16 %x to i32
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
diff --git a/llvm/test/CodeGen/X86/lzcnt-cmp.ll b/llvm/test/CodeGen/X86/lzcnt-cmp.ll
index a9513a373661f4..4f65739cc70dd1 100644
--- a/llvm/test/CodeGen/X86/lzcnt-cmp.ll
+++ b/llvm/test/CodeGen/X86/lzcnt-cmp.ll
@@ -12,27 +12,11 @@ define i1 @lshr_ctlz_cmpeq_one_i64(i64 %in) nounwind {
; X86-NEXT: sete %al
; X86-NEXT: retl
;
-; X64-BSR-LABEL: lshr_ctlz_cmpeq_one_i64:
-; X64-BSR: # %bb.0:
-; X64-BSR-NEXT: testq %rdi, %rdi
-; X64-BSR-NEXT: je .LBB0_1
-; X64-BSR-NEXT: # %bb.2: # %cond.false
-; X64-BSR-NEXT: bsrq %rdi, %rax
-; X64-BSR-NEXT: xorq $63, %rax
-; X64-BSR-NEXT: shrl $6, %eax
-; X64-BSR-NEXT: # kill: def $al killed $al killed $rax
-; X64-BSR-NEXT: retq
-; X64-BSR-NEXT: .LBB0_1:
-; X64-BSR-NEXT: movl $64, %eax
-; X64-BSR-NEXT: shrl $6, %eax
-; X64-BSR-NEXT: # kill: def $al killed $al killed $rax
-; X64-BSR-NEXT: retq
-;
-; X64-LZCNT-LABEL: lshr_ctlz_cmpeq_one_i64:
-; X64-LZCNT: # %bb.0:
-; X64-LZCNT-NEXT: testq %rdi, %rdi
-; X64-LZCNT-NEXT: sete %al
-; X64-LZCNT-NEXT: retq
+; X64-LABEL: lshr_ctlz_cmpeq_one_i64:
+; X64: # %bb.0:
+; X64-NEXT: testq %rdi, %rdi
+; X64-NEXT: sete %al
+; X64-NEXT: retq
%ctlz = call i64 @llvm.ctlz.i64(i64 %in, i1 0)
%lshr = lshr i64 %ctlz, 6
%icmp = icmp eq i64 %lshr, 1
@@ -81,27 +65,11 @@ define i1 @lshr_ctlz_cmpne_zero_i64(i64 %in) nounwind {
; X86-NEXT: sete %al
; X86-NEXT: retl
;
-; X64-BSR-LABEL: lshr_ctlz_cmpne_zero_i64:
-; X64-BSR: # %bb.0:
-; X64-BSR-NEXT: testq %rdi, %rdi
-; X64-BSR-NEXT: je .LBB2_1
-; X64-BSR-NEXT: # %bb.2: # %cond.false
-; X64-BSR-NEXT: bsrq %rdi, %rax
-; X64-BSR-NEXT: xorq $63, %rax
-; X64-BSR-NEXT: shrl $6, %eax
-; X64-BSR-NEXT: # kill: def $al killed $al killed $rax
-; X64-BSR-NEXT: retq
-; X64-BSR-NEXT: .LBB2_1:
-; X64-BSR-NEXT: movl $64, %eax
-; X64-BSR-NEXT: shrl $6, %eax
-; X64-BSR-NEXT: # kill: def $al killed $al killed $rax
-; X64-BSR-NEXT: retq
-;
-; X64-LZCNT-LABEL: lshr_ctlz_cmpne_zero_i64:
-; X64-LZCNT: # %bb.0:
-; X64-LZCNT-NEXT: testq %rdi, %rdi
-; X64-LZCNT-NEXT: sete %al
-; X64-LZCNT-NEXT: retq
+; X64-LABEL: lshr_ctlz_cmpne_zero_i64:
+; X64: # %bb.0:
+; X64-NEXT: testq %rdi, %rdi
+; X64-NEXT: sete %al
+; X64-NEXT: retq
%ctlz = call i64 @llvm.ctlz.i64(i64 %in, i1 0)
%lshr = lshr i64 %ctlz, 6
%icmp = icmp ne i64 %lshr, 0
diff --git a/llvm/test/CodeGen/X86/pr57673.ll b/llvm/test/CodeGen/X86/pr57673.ll
index d0ae6cea068dc0..cf7717f420480b 100644
--- a/llvm/test/CodeGen/X86/pr57673.ll
+++ b/llvm/test/CodeGen/X86/pr57673.ll
@@ -24,35 +24,24 @@ define void @foo() {
; NORMAL-NEXT: [[COPY:%[0-9]+]]:gr8 = COPY [[MOV32r0_]].sub_8bit
; NORMAL-NEXT: [[LEA64r:%[0-9]+]]:gr64 = LEA64r %stack.1.i, 1, $noreg, 0, $noreg
; NORMAL-NEXT: [[DEF:%[0-9]+]]:gr64 = IMPLICIT_DEF
- ; NORMAL-NEXT: [[DEF1:%[0-9]+]]:gr64 = IMPLICIT_DEF
; NORMAL-NEXT: {{ $}}
; NORMAL-NEXT: bb.1.bb_8:
- ; NORMAL-NEXT: successors: %bb.5(0x40000000), %bb.2(0x40000000)
+ ; NORMAL-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000)
; NORMAL-NEXT: {{ $}}
; NORMAL-NEXT: TEST8rr [[COPY]], [[COPY]], implicit-def $eflags
- ; NORMAL-NEXT: JCC_1 %bb.5, 5, implicit $eflags
+ ; NORMAL-NEXT: JCC_1 %bb.3, 5, implicit $eflags
; NORMAL-NEXT: JMP_1 %bb.2
; NORMAL-NEXT: {{ $}}
; NORMAL-NEXT: bb.2.bb_mid:
- ; NORMAL-NEXT: successors: %bb.4(0x30000000), %bb.3(0x50000000)
+ ; NORMAL-NEXT: successors: %bb.3(0x80000000)
; NORMAL-NEXT: {{ $}}
- ; NORMAL-NEXT: TEST64rr [[DEF1]], [[DEF1]], implicit-def $eflags
- ; NORMAL-NEXT: JCC_1 %bb.4, 4, implicit $eflags
- ; NORMAL-NEXT: JMP_1 %bb.3
- ; NORMAL-NEXT: {{ $}}
- ; NORMAL-NEXT: bb.3.cond.false:
- ; NORMAL-NEXT: successors: %bb.4(0x80000000)
- ; NORMAL-NEXT: {{ $}}
- ; NORMAL-NEXT: bb.4.cond.end:
- ; NORMAL-NEXT: successors: %bb.5(0x80000000)
- ; NORMAL-NEXT: {{ $}}
- ; NORMAL-NEXT: [[MOVUPSrm:%[0-9]+]]:vr128 = MOVUPSrm [[LEA64r]], 1, $noreg, 40, $noreg :: (load (s128) from %ir.i4, align 8)
+ ; NORMAL-NEXT: [[MOVUPSrm:%[0-9]+]]:vr128 = MOVUPSrm %stack.1.i, 1, $noreg, 40, $noreg :: (load (s128) from %ir.i4, align 8)
; NORMAL-NEXT: MOVUPSmr $noreg, 1, $noreg, 0, $noreg, killed [[MOVUPSrm]] :: (store (s128) into `ptr null`, align 8)
- ; NORMAL-NEXT: DBG_VALUE_LIST !3, !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_plus_uconst, 40), [[LEA64r]], [[LEA64r]], debug-location !8
- ; NORMAL-NEXT: [[MOVUPSrm1:%[0-9]+]]:vr128 = MOVUPSrm [[LEA64r]], 1, $noreg, 40, $noreg :: (load (s128) from %ir.i6, align 8)
+ ; NORMAL-NEXT: DBG_VALUE $noreg, $noreg, !3, !DIExpression(), debug-location !8
+ ; NORMAL-NEXT: [[MOVUPSrm1:%[0-9]+]]:vr128 = MOVUPSrm %stack.1.i, 1, $noreg, 40, $noreg :: (load (s128) from %ir.i6, align 8)
; NORMAL-NEXT: MOVUPSmr $noreg, 1, $noreg, 0, $noreg, killed [[MOVUPSrm1]] :: (store (s128) into `ptr null`, align 8)
; NORMAL-NEXT: {{ $}}
- ; NORMAL-NEXT: bb.5.bb_last:
+ ; NORMAL-NEXT: bb.3.bb_last:
; NORMAL-NEXT: successors: %bb.1(0x80000000)
; NORMAL-NEXT: {{ $}}
; NORMAL-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
@@ -74,35 +63,24 @@ define void @foo() {
; INSTRREF-NEXT: [[COPY:%[0-9]+]]:gr8 = COPY [[MOV32r0_]].sub_8bit
; INSTRREF-NEXT: [[LEA64r:%[0-9]+]]:gr64 = LEA64r %stack.1.i, 1, $noreg, 0, $noreg
; INSTRREF-NEXT: [[DEF:%[0-9]+]]:gr64 = IMPLICIT_DEF
- ; INSTRREF-NEXT: [[DEF1:%[0-9]+]]:gr64 = IMPLICIT_DEF
; INSTRREF-NEXT: {{ $}}
; INSTRREF-NEXT: bb.1.bb_8:
- ; INSTRREF-NEXT: successors: %bb.5(0x40000000), %bb.2(0x40000000)
+ ; INSTRREF-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000)
; INSTRREF-NEXT: {{ $}}
; INSTRREF-NEXT: TEST8rr [[COPY]], [[COPY]], implicit-def $eflags
- ; INSTRREF-NEXT: JCC_1 %bb.5, 5, implicit $eflags
+ ; INSTRREF-NEXT: JCC_1 %bb.3, 5, implicit $eflags
; INSTRREF-NEXT: JMP_1 %bb.2
; INSTRREF-NEXT: {{ $}}
; INSTRREF-NEXT: bb.2.bb_mid:
- ; INSTRREF-NEXT: successors: %bb.4(0x30000000), %bb.3(0x50000000)
- ; INSTRREF-NEXT: {{ $}}
- ; INSTRREF-NEXT: TEST64rr [[DEF1]], [[DEF1]], implicit-def $eflags
- ; INSTRREF-NEXT: JCC_1 %bb.4, 4, implicit $eflags
- ; INSTRREF-NEXT: JMP_1 %bb.3
- ; INSTRREF-NEXT: {{ $}}
- ; INSTRREF-NEXT: bb.3.cond.false:
- ; INSTRREF-NEXT: successors: %bb.4(0x80000000)
- ; INSTRREF-NEXT: {{ $}}
- ; INSTRREF-NEXT: bb.4.cond.end:
- ; INSTRREF-NEXT: successors: %bb.5(0x80000000)
+ ; INSTRREF-NEXT: successors: %bb.3(0x80000000)
; INSTRREF-NEXT: {{ $}}
- ; INSTRREF-NEXT: [[MOVUPSrm:%[0-9]+]]:vr128 = MOVUPSrm [[LEA64r]], 1, $noreg, 40, $noreg :: (load (s128) from %ir.i4, align 8)
+ ; INSTRREF-NEXT: [[MOVUPSrm:%[0-9]+]]:vr128 = MOVUPSrm %stack.1.i, 1, $noreg, 40, $noreg :: (load (s128) from %ir.i4, align 8)
; INSTRREF-NEXT: MOVUPSmr $noreg, 1, $noreg, 0, $noreg, killed [[MOVUPSrm]] :: (store (s128) into `ptr null`, align 8)
- ; INSTRREF-NEXT: DBG_INSTR_REF !3, !DIExpression(DW_OP_LLVM_arg, 0), dbg-instr-ref(1, 0), dbg-instr-ref(1, 0), debug-location !8
- ; INSTRREF-NEXT: [[MOVUPSrm1:%[0-9]+]]:vr128 = MOVUPSrm [[LEA64r]], 1, $noreg, 40, $noreg :: (load (s128) from %ir.i6, align 8)
+ ; INSTRREF-NEXT: DBG_VALUE $noreg, $noreg, !3, !DIExpression(), debug-location !8
+ ; INSTRREF-NEXT: [[MOVUPSrm1:%[0-9]+]]:vr128 = MOVUPSrm %stack.1.i, 1, $noreg, 40, $noreg :: (load (s128) from %ir.i6, align 8)
; INSTRREF-NEXT: MOVUPSmr $noreg, 1, $noreg, 0, $noreg, killed [[MOVUPSrm1]] :: (store (s128) into `ptr null`, align 8)
; INSTRREF-NEXT: {{ $}}
- ; INSTRREF-NEXT: bb.5.bb_last:
+ ; INSTRREF-NEXT: bb.3.bb_last:
; INSTRREF-NEXT: successors: %bb.1(0x80000000)
; INSTRREF-NEXT: {{ $}}
; INSTRREF-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
diff --git a/llvm/test/CodeGen/X86/pr89877.ll b/llvm/test/CodeGen/X86/pr89877.ll
index 9820ec42f5b8cc..1bf5fc11090afa 100644
--- a/llvm/test/CodeGen/X86/pr89877.ll
+++ b/llvm/test/CodeGen/X86/pr89877.ll
@@ -9,13 +9,9 @@ define i32 @sext_known_nonzero(i16 %xx) {
; X86-NEXT: movl $256, %eax # imm = 0x100
; X86-NEXT: shll %cl, %eax
; X86-NEXT: cwtl
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: je .LBB0_1
-; X86-NEXT: # %bb.2: # %cond.false
-; X86-NEXT: rep bsfl %eax, %eax
-; X86-NEXT: retl
-; X86-NEXT: .LBB0_1:
+; X86-NEXT: bsfl %eax, %ecx
; X86-NEXT: movl $32, %eax
+; X86-NEXT: cmovnel %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: sext_known_nonzero:
@@ -25,13 +21,9 @@ define i32 @sext_known_nonzero(i16 %xx) {
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NEXT: shll %cl, %eax
; X64-NEXT: cwtl
-; X64-NEXT: testl %eax, %eax
-; X64-NEXT: je .LBB0_1
-; X64-NEXT: # %bb.2: # %cond.false
-; X64-NEXT: rep bsfl %eax, %eax
-; X64-NEXT: retq
-; X64-NEXT: .LBB0_1:
+; X64-NEXT: bsfl %eax, %ecx
; X64-NEXT: movl $32, %eax
+; X64-NEXT: cmovnel %ecx, %eax
; X64-NEXT: retq
%x = shl i16 256, %xx
%z = sext i16 %x to i32
diff --git a/llvm/test/CodeGen/X86/pr90847.ll b/llvm/test/CodeGen/X86/pr90847.ll
index 7aa0ceb26e1acb..d7ecdc37479c51 100644
--- a/llvm/test/CodeGen/X86/pr90847.ll
+++ b/llvm/test/CodeGen/X86/pr90847.ll
@@ -15,14 +15,9 @@ define i32 @PR90847(<8 x float> %x) nounwind {
; AVX1-NEXT: vminps %ymm2, %ymm1, %ymm1
; AVX1-NEXT: vcmpeqps %ymm0, %ymm1, %ymm0
; AVX1-NEXT: vmovmskps %ymm0, %eax
-; AVX1-NEXT: testl %eax, %eax
-; AVX1-NEXT: je .LBB0_1
-; AVX1-NEXT: # %bb.2: # %cond.false
-; AVX1-NEXT: rep bsfl %eax, %eax
-; AVX1-NEXT: vzeroupper
-; AVX1-NEXT: retq
-; AVX1-NEXT: .LBB0_1:
+; AVX1-NEXT: bsfl %eax, %ecx
; AVX1-NEXT: movl $32, %eax
+; AVX1-NEXT: cmovnel %ecx, %eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
@@ -36,14 +31,9 @@ define i32 @PR90847(<8 x float> %x) nounwind {
; AVX2-NEXT: vminps %ymm2, %ymm1, %ymm1
; AVX2-NEXT: vcmpeqps %ymm0, %ymm1, %ymm0
; AVX2-NEXT: vmovmskps %ymm0, %eax
-; AVX2-NEXT: testl %eax, %eax
-; AVX2-NEXT: je .LBB0_1
-; AVX2-NEXT: # %bb.2: # %cond.false
-; AVX2-NEXT: rep bsfl %eax, %eax
-; AVX2-NEXT: vzeroupper
-; AVX2-NEXT: retq
-; AVX2-NEXT: .LBB0_1:
+; AVX2-NEXT: bsfl %eax, %ecx
; AVX2-NEXT: movl $32, %eax
+; AVX2-NEXT: cmovnel %ecx, %eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
entry:
diff --git a/llvm/test/CodeGen/X86/pr92569.ll b/llvm/test/CodeGen/X86/pr92569.ll
index f91063089e3a90..0fb4ed7905287c 100644
--- a/llvm/test/CodeGen/X86/pr92569.ll
+++ b/llvm/test/CodeGen/X86/pr92569.ll
@@ -4,17 +4,13 @@
define void @PR92569(i64 %arg, <8 x i8> %arg1) {
; CHECK-LABEL: PR92569:
; CHECK: # %bb.0:
-; CHECK-NEXT: testq %rdi, %rdi
-; CHECK-NEXT: je .LBB0_1
-; CHECK-NEXT: # %bb.2: # %cond.false
-; CHECK-NEXT: rep bsfq %rdi, %rax
-; CHECK-NEXT: jmp .LBB0_3
-; CHECK-NEXT: .LBB0_1:
-; CHECK-NEXT: movl $64, %eax
-; CHECK-NEXT: .LBB0_3: # %cond.end
-; CHECK-NEXT: shrb $3, %al
+; CHECK-NEXT: bsfq %rdi, %rax
+; CHECK-NEXT: movl $64, %ecx
+; CHECK-NEXT: cmovneq %rax, %rcx
+; CHECK-NEXT: shrb $3, %cl
; CHECK-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
-; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: movzbl %cl, %eax
+; CHECK-NEXT: andl $15, %eax
; CHECK-NEXT: movzbl -24(%rsp,%rax), %eax
; CHECK-NEXT: movl %eax, 0
; CHECK-NEXT: retq
More information about the llvm-commits
mailing list