[llvm] ed7ad0a - [X86] Promote cttz_i32(x) -> cttz_i64((i64)x | (1 << 32)) (#102900) (REAPPLIED)

Tue Aug 13 04:48:47 PDT 2024

Author: Simon Pilgrim
Date: 2024-08-13T12:48:27+01:00
New Revision: ed7ad0a1a0584f90a211ca5a87bc46968e169e5d

URL: https://github.com/llvm/llvm-project/commit/ed7ad0a1a0584f90a211ca5a87bc46968e169e5d
DIFF: https://github.com/llvm/llvm-project/commit/ed7ad0a1a0584f90a211ca5a87bc46968e169e5d.diff

LOG: [X86] Promote cttz_i32(x) -> cttz_i64((i64)x | (1 << 32)) (#102900) (REAPPLIED)

On 64bit targets we can promote i32 CTTZ nodes to i64 CTTZ_ZERO_UNDEF by setting the 32nd bit.

#57811 also queried about whether we should use BTS instead of MOVABS+OR to avoid a i64 immediate - I'm willing to tweak the DAGToDAG isel peephole for these cases if reviewers think it worthwhile. But most recent CPUs can actually handle MOVABS faster than BTS/C/R.......

Reapplied with missing costmodel changes - the cost tables can probably be improved in a follow up patch.

Fixes #57811

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ISelLowering.cpp
    llvm/test/Analysis/CostModel/X86/cttz-codesize.ll
    llvm/test/Analysis/CostModel/X86/cttz-latency.ll
    llvm/test/Analysis/CostModel/X86/cttz-sizelatency.ll
    llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll
    llvm/test/CodeGen/X86/cttz.ll
    llvm/test/CodeGen/X86/known-never-zero.ll
    llvm/test/CodeGen/X86/pr89877.ll
    llvm/test/CodeGen/X86/pr90847.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 2074fac8578914..04dfd0ea0d893a 100644

--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -412,6 +412,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     setOperationAction(ISD::CTTZ           , MVT::i32  , Custom);
     setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32  , Legal);
     if (Subtarget.is64Bit()) {
+      setOperationPromotedToType(ISD::CTTZ , MVT::i32, MVT::i64);
       setOperationAction(ISD::CTTZ         , MVT::i64  , Custom);
       setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Legal);
     }
@@ -3237,9 +3238,10 @@ bool X86TargetLowering::shouldFormOverflowOp(unsigned Opcode, EVT VT,
 }
 
 bool X86TargetLowering::isCheapToSpeculateCttz(Type *Ty) const {
-  // Speculate cttz only if we can directly use TZCNT or can promote to i32.
+  // Speculate cttz only if we can directly use TZCNT or can promote to i32/i64.
   return Subtarget.hasBMI() ||
-         (!Ty->isVectorTy() && Ty->getScalarSizeInBits() < 32);
+         (!Ty->isVectorTy() &&
+          Ty->getScalarSizeInBits() < (Subtarget.is64Bit() ? 64u : 32u));
 }
 
 bool X86TargetLowering::isCheapToSpeculateCtlz(Type *Ty) const {

diff  --git a/llvm/test/Analysis/CostModel/X86/cttz-codesize.ll b/llvm/test/Analysis/CostModel/X86/cttz-codesize.ll
index 25b169e08a7231..a485bad2a477e9 100644
--- a/llvm/test/Analysis/CostModel/X86/cttz-codesize.ll
+++ b/llvm/test/Analysis/CostModel/X86/cttz-codesize.ll
@@ -39,13 +39,9 @@ define i64 @var_cttz_i64u(i64 %a) {
 }
 
 define i32 @var_cttz_i32(i32 %a) {
-; NOBMI-LABEL: 'var_cttz_i32'
-; NOBMI-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %cttz = call i32 @llvm.cttz.i32(i32 %a, i1 false)
-; NOBMI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %cttz
-;
-; BMI-LABEL: 'var_cttz_i32'
-; BMI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cttz = call i32 @llvm.cttz.i32(i32 %a, i1 false)
-; BMI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %cttz
+; CHECK-LABEL: 'var_cttz_i32'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cttz = call i32 @llvm.cttz.i32(i32 %a, i1 false)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %cttz
 ;
   %cttz = call i32 @llvm.cttz.i32(i32 %a, i1 0)
   ret i32 %cttz

diff  --git a/llvm/test/Analysis/CostModel/X86/cttz-latency.ll b/llvm/test/Analysis/CostModel/X86/cttz-latency.ll
index a8abba56ba49fc..066e3232612f25 100644
--- a/llvm/test/Analysis/CostModel/X86/cttz-latency.ll
+++ b/llvm/test/Analysis/CostModel/X86/cttz-latency.ll
@@ -39,13 +39,9 @@ define i64 @var_cttz_i64u(i64 %a) {
 }
 
 define i32 @var_cttz_i32(i32 %a) {
-; NOBMI-LABEL: 'var_cttz_i32'
-; NOBMI-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %cttz = call i32 @llvm.cttz.i32(i32 %a, i1 false)
-; NOBMI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %cttz
-;
-; BMI-LABEL: 'var_cttz_i32'
-; BMI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cttz = call i32 @llvm.cttz.i32(i32 %a, i1 false)
-; BMI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %cttz
+; CHECK-LABEL: 'var_cttz_i32'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cttz = call i32 @llvm.cttz.i32(i32 %a, i1 false)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %cttz
 ;
   %cttz = call i32 @llvm.cttz.i32(i32 %a, i1 0)
   ret i32 %cttz

diff  --git a/llvm/test/Analysis/CostModel/X86/cttz-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/cttz-sizelatency.ll
index 294f101f571bf2..cc71bb5c908839 100644
--- a/llvm/test/Analysis/CostModel/X86/cttz-sizelatency.ll
+++ b/llvm/test/Analysis/CostModel/X86/cttz-sizelatency.ll
@@ -39,13 +39,9 @@ define i64 @var_cttz_i64u(i64 %a) {
 }
 
 define i32 @var_cttz_i32(i32 %a) {
-; NOBMI-LABEL: 'var_cttz_i32'
-; NOBMI-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %cttz = call i32 @llvm.cttz.i32(i32 %a, i1 false)
-; NOBMI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %cttz
-;
-; BMI-LABEL: 'var_cttz_i32'
-; BMI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cttz = call i32 @llvm.cttz.i32(i32 %a, i1 false)
-; BMI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %cttz
+; CHECK-LABEL: 'var_cttz_i32'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cttz = call i32 @llvm.cttz.i32(i32 %a, i1 false)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %cttz
 ;
   %cttz = call i32 @llvm.cttz.i32(i32 %a, i1 0)
   ret i32 %cttz

diff  --git a/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll b/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll
index 88a22f98feb256..1de3e2a853dd86 100644
--- a/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll
+++ b/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll
@@ -237,17 +237,17 @@ define void @cttz(i32 %a, <16 x i32> %va) {
 ; THRU-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; LATE-LABEL: 'cttz'
-; LATE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %s = call i32 @llvm.cttz.i32(i32 %a, i1 false)
+; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s = call i32 @llvm.cttz.i32(i32 %a, i1 false)
 ; LATE-NEXT:  Cost Model: Found an estimated cost of 124 for instruction: %v = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %va, i1 false)
 ; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SIZE-LABEL: 'cttz'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %s = call i32 @llvm.cttz.i32(i32 %a, i1 false)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s = call i32 @llvm.cttz.i32(i32 %a, i1 false)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %v = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %va, i1 false)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SIZE_LATE-LABEL: 'cttz'
-; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %s = call i32 @llvm.cttz.i32(i32 %a, i1 false)
+; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s = call i32 @llvm.cttz.i32(i32 %a, i1 false)
 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 104 for instruction: %v = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %va, i1 false)
 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;

diff  --git a/llvm/test/CodeGen/X86/cttz.ll b/llvm/test/CodeGen/X86/cttz.ll
index 6eb748a1afbab5..b35a1b72fcb6f1 100644
--- a/llvm/test/CodeGen/X86/cttz.ll
+++ b/llvm/test/CodeGen/X86/cttz.ll
@@ -317,13 +317,11 @@ define i32 @cttz_i32_zero_test(i32 %n) {
 ;
 ; X64-LABEL: cttz_i32_zero_test:
 ; X64:       # %bb.0:
-; X64-NEXT:    testl %edi, %edi
-; X64-NEXT:    je .LBB6_1
-; X64-NEXT:  # %bb.2: # %cond.false
-; X64-NEXT:    rep bsfl %edi, %eax
-; X64-NEXT:    retq
-; X64-NEXT:  .LBB6_1:
-; X64-NEXT:    movl $32, %eax
+; X64-NEXT:    # kill: def $edi killed $edi def $rdi
+; X64-NEXT:    movabsq $4294967296, %rax # imm = 0x100000000
+; X64-NEXT:    orq %rdi, %rax
+; X64-NEXT:    rep bsfq %rax, %rax
+; X64-NEXT:    # kill: def $eax killed $eax killed $rax
 ; X64-NEXT:    retq
 ;
 ; X86-CLZ-LABEL: cttz_i32_zero_test:

diff  --git a/llvm/test/CodeGen/X86/known-never-zero.ll b/llvm/test/CodeGen/X86/known-never-zero.ll
index df11a44626e381..d5d604a138a719 100644
--- a/llvm/test/CodeGen/X86/known-never-zero.ll
+++ b/llvm/test/CodeGen/X86/known-never-zero.ll
@@ -54,13 +54,12 @@ define i32 @or_maybe_zero(i32 %x, i32 %y) {
 ;
 ; X64-LABEL: or_maybe_zero:
 ; X64:       # %bb.0:
+; X64-NEXT:    # kill: def $edi killed $edi def $rdi
 ; X64-NEXT:    orl %esi, %edi
-; X64-NEXT:    je .LBB1_1
-; X64-NEXT:  # %bb.2: # %cond.false
-; X64-NEXT:    rep bsfl %edi, %eax
-; X64-NEXT:    retq
-; X64-NEXT:  .LBB1_1:
-; X64-NEXT:    movl $32, %eax
+; X64-NEXT:    movabsq $4294967296, %rax # imm = 0x100000000
+; X64-NEXT:    orq %rdi, %rax
+; X64-NEXT:    rep bsfq %rax, %rax
+; X64-NEXT:    # kill: def $eax killed $eax killed $rax
 ; X64-NEXT:    retq
   %z = or i32 %x, %y
   %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
@@ -115,13 +114,10 @@ define i32 @select_maybe_zero(i1 %c, i32 %x) {
 ; X64-NEXT:    xorl %eax, %eax
 ; X64-NEXT:    testb $1, %dil
 ; X64-NEXT:    cmovnel %esi, %eax
-; X64-NEXT:    testl %eax, %eax
-; X64-NEXT:    je .LBB3_1
-; X64-NEXT:  # %bb.2: # %cond.false
-; X64-NEXT:    rep bsfl %eax, %eax
-; X64-NEXT:    retq
-; X64-NEXT:  .LBB3_1:
-; X64-NEXT:    movl $32, %eax
+; X64-NEXT:    movabsq $4294967296, %rcx # imm = 0x100000000
+; X64-NEXT:    orq %rax, %rcx
+; X64-NEXT:    rep bsfq %rcx, %rax
+; X64-NEXT:    # kill: def $eax killed $eax killed $rax
 ; X64-NEXT:    retq
   %y = or i32 %x, 1
   %z = select i1 %c, i32 %y, i32 0
@@ -216,16 +212,14 @@ define i32 @shl_maybe_zero(i32 %x, i32 %y) {
 ;
 ; X64-LABEL: shl_maybe_zero:
 ; X64:       # %bb.0:
+; X64-NEXT:    # kill: def $esi killed $esi def $rsi
 ; X64-NEXT:    movl %edi, %ecx
 ; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NEXT:    shll %cl, %esi
-; X64-NEXT:    testl %esi, %esi
-; X64-NEXT:    je .LBB7_1
-; X64-NEXT:  # %bb.2: # %cond.false
-; X64-NEXT:    rep bsfl %esi, %eax
-; X64-NEXT:    retq
-; X64-NEXT:  .LBB7_1:
-; X64-NEXT:    movl $32, %eax
+; X64-NEXT:    movabsq $4294967296, %rax # imm = 0x100000000
+; X64-NEXT:    orq %rsi, %rax
+; X64-NEXT:    rep bsfq %rax, %rax
+; X64-NEXT:    # kill: def $eax killed $eax killed $rax
 ; X64-NEXT:    retq
   %z = shl nuw nsw i32 %y, %x
   %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
@@ -275,13 +269,10 @@ define i32 @uaddsat_maybe_zero(i32 %x, i32 %y) {
 ; X64-NEXT:    addl %esi, %edi
 ; X64-NEXT:    movl $-1, %eax
 ; X64-NEXT:    cmovael %edi, %eax
-; X64-NEXT:    testl %eax, %eax
-; X64-NEXT:    je .LBB9_1
-; X64-NEXT:  # %bb.2: # %cond.false
-; X64-NEXT:    rep bsfl %eax, %eax
-; X64-NEXT:    retq
-; X64-NEXT:  .LBB9_1:
-; X64-NEXT:    movl $32, %eax
+; X64-NEXT:    movabsq $4294967296, %rcx # imm = 0x100000000
+; X64-NEXT:    orq %rax, %rcx
+; X64-NEXT:    rep bsfq %rcx, %rax
+; X64-NEXT:    # kill: def $eax killed $eax killed $rax
 ; X64-NEXT:    retq
   %z = call i32 @llvm.uadd.sat.i32(i32 %x, i32 %y)
   %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
@@ -334,15 +325,13 @@ define i32 @umax_maybe_zero(i32 %x, i32 %y) {
 ;
 ; X64-LABEL: umax_maybe_zero:
 ; X64:       # %bb.0:
+; X64-NEXT:    # kill: def $esi killed $esi def $rsi
 ; X64-NEXT:    cmpl %esi, %edi
 ; X64-NEXT:    cmoval %edi, %esi
-; X64-NEXT:    testl %esi, %esi
-; X64-NEXT:    je .LBB11_1
-; X64-NEXT:  # %bb.2: # %cond.false
-; X64-NEXT:    rep bsfl %esi, %eax
-; X64-NEXT:    retq
-; X64-NEXT:  .LBB11_1:
-; X64-NEXT:    movl $32, %eax
+; X64-NEXT:    movabsq $4294967296, %rax # imm = 0x100000000
+; X64-NEXT:    orq %rsi, %rax
+; X64-NEXT:    rep bsfq %rax, %rax
+; X64-NEXT:    # kill: def $eax killed $eax killed $rax
 ; X64-NEXT:    retq
   %z = call i32 @llvm.umax.i32(i32 %x, i32 %y)
   %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
@@ -401,13 +390,10 @@ define i32 @umin_maybe_zero(i32 %x, i32 %y) {
 ; X64-NEXT:    cmpl $54, %edi
 ; X64-NEXT:    movl $54, %eax
 ; X64-NEXT:    cmovbl %edi, %eax
-; X64-NEXT:    testl %eax, %eax
-; X64-NEXT:    je .LBB13_1
-; X64-NEXT:  # %bb.2: # %cond.false
-; X64-NEXT:    rep bsfl %eax, %eax
-; X64-NEXT:    retq
-; X64-NEXT:  .LBB13_1:
-; X64-NEXT:    movl $32, %eax
+; X64-NEXT:    movabsq $4294967296, %rcx # imm = 0x100000000
+; X64-NEXT:    orq %rax, %rcx
+; X64-NEXT:    rep bsfq %rcx, %rax
+; X64-NEXT:    # kill: def $eax killed $eax killed $rax
 ; X64-NEXT:    retq
   %z = call i32 @llvm.umin.i32(i32 %x, i32 54)
   %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
@@ -522,13 +508,10 @@ define i32 @smin_maybe_zero(i32 %x, i32 %y) {
 ; X64-NEXT:    cmpl $54, %edi
 ; X64-NEXT:    movl $54, %eax
 ; X64-NEXT:    cmovll %edi, %eax
-; X64-NEXT:    testl %eax, %eax
-; X64-NEXT:    je .LBB17_1
-; X64-NEXT:  # %bb.2: # %cond.false
-; X64-NEXT:    rep bsfl %eax, %eax
-; X64-NEXT:    retq
-; X64-NEXT:  .LBB17_1:
-; X64-NEXT:    movl $32, %eax
+; X64-NEXT:    movabsq $4294967296, %rcx # imm = 0x100000000
+; X64-NEXT:    orq %rax, %rcx
+; X64-NEXT:    rep bsfq %rcx, %rax
+; X64-NEXT:    # kill: def $eax killed $eax killed $rax
 ; X64-NEXT:    retq
   %z = call i32 @llvm.smin.i32(i32 %x, i32 54)
   %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
@@ -643,13 +626,10 @@ define i32 @smax_known_zero(i32 %x, i32 %y) {
 ; X64-NEXT:    testl %edi, %edi
 ; X64-NEXT:    movl $-1, %eax
 ; X64-NEXT:    cmovnsl %edi, %eax
-; X64-NEXT:    testl %eax, %eax
-; X64-NEXT:    je .LBB21_1
-; X64-NEXT:  # %bb.2: # %cond.false
-; X64-NEXT:    rep bsfl %eax, %eax
-; X64-NEXT:    retq
-; X64-NEXT:  .LBB21_1:
-; X64-NEXT:    movl $32, %eax
+; X64-NEXT:    movabsq $4294967296, %rcx # imm = 0x100000000
+; X64-NEXT:    orq %rax, %rcx
+; X64-NEXT:    rep bsfq %rcx, %rax
+; X64-NEXT:    # kill: def $eax killed $eax killed $rax
 ; X64-NEXT:    retq
   %z = call i32 @llvm.smax.i32(i32 %x, i32 -1)
   %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
@@ -676,16 +656,9 @@ define i32 @rotr_known_nonzero(i32 %xx, i32 %y) {
 ; X64:       # %bb.0:
 ; X64-NEXT:    movl %esi, %ecx
 ; X64-NEXT:    orl $256, %edi # imm = 0x100
-; X64-NEXT:    movl %edi, %eax
 ; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-NEXT:    rorl %cl, %eax
-; X64-NEXT:    testl %edi, %edi
-; X64-NEXT:    je .LBB22_1
-; X64-NEXT:  # %bb.2: # %cond.false
-; X64-NEXT:    rep bsfl %eax, %eax
-; X64-NEXT:    retq
-; X64-NEXT:  .LBB22_1:
-; X64-NEXT:    movl $32, %eax
+; X64-NEXT:    rorl %cl, %edi
+; X64-NEXT:    rep bsfl %edi, %eax
 ; X64-NEXT:    retq
   %x = or i32 %xx, 256
   %shr = lshr i32 %x, %y
@@ -714,16 +687,13 @@ define i32 @rotr_maybe_zero(i32 %x, i32 %y) {
 ; X64-LABEL: rotr_maybe_zero:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movl %esi, %ecx
-; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    # kill: def $edi killed $edi def $rdi
 ; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-NEXT:    rorl %cl, %eax
-; X64-NEXT:    testl %edi, %edi
-; X64-NEXT:    je .LBB23_1
-; X64-NEXT:  # %bb.2: # %cond.false
-; X64-NEXT:    rep bsfl %eax, %eax
-; X64-NEXT:    retq
-; X64-NEXT:  .LBB23_1:
-; X64-NEXT:    movl $32, %eax
+; X64-NEXT:    rorl %cl, %edi
+; X64-NEXT:    movabsq $4294967296, %rax # imm = 0x100000000
+; X64-NEXT:    orq %rdi, %rax
+; X64-NEXT:    rep bsfq %rax, %rax
+; X64-NEXT:    # kill: def $eax killed $eax killed $rax
 ; X64-NEXT:    retq
   %shr = lshr i32 %x, %y
   %sub = sub i32 32, %y
@@ -775,16 +745,13 @@ define i32 @rotr_with_fshr_maybe_zero(i32 %x, i32 %y) {
 ; X64-LABEL: rotr_with_fshr_maybe_zero:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movl %esi, %ecx
-; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    # kill: def $edi killed $edi def $rdi
 ; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-NEXT:    rorl %cl, %eax
-; X64-NEXT:    testl %edi, %edi
-; X64-NEXT:    je .LBB25_1
-; X64-NEXT:  # %bb.2: # %cond.false
-; X64-NEXT:    rep bsfl %eax, %eax
-; X64-NEXT:    retq
-; X64-NEXT:  .LBB25_1:
-; X64-NEXT:    movl $32, %eax
+; X64-NEXT:    rorl %cl, %edi
+; X64-NEXT:    movabsq $4294967296, %rax # imm = 0x100000000
+; X64-NEXT:    orq %rdi, %rax
+; X64-NEXT:    rep bsfq %rax, %rax
+; X64-NEXT:    # kill: def $eax killed $eax killed $rax
 ; X64-NEXT:    retq
   %z = call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 %y)
   %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
@@ -811,16 +778,9 @@ define i32 @rotl_known_nonzero(i32 %xx, i32 %y) {
 ; X64:       # %bb.0:
 ; X64-NEXT:    movl %esi, %ecx
 ; X64-NEXT:    orl $256, %edi # imm = 0x100
-; X64-NEXT:    movl %edi, %eax
 ; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-NEXT:    roll %cl, %eax
-; X64-NEXT:    testl %edi, %edi
-; X64-NEXT:    je .LBB26_1
-; X64-NEXT:  # %bb.2: # %cond.false
-; X64-NEXT:    rep bsfl %eax, %eax
-; X64-NEXT:    retq
-; X64-NEXT:  .LBB26_1:
-; X64-NEXT:    movl $32, %eax
+; X64-NEXT:    roll %cl, %edi
+; X64-NEXT:    rep bsfl %edi, %eax
 ; X64-NEXT:    retq
   %x = or i32 %xx, 256
   %shl = shl i32 %x, %y
@@ -849,16 +809,13 @@ define i32 @rotl_maybe_zero(i32 %x, i32 %y) {
 ; X64-LABEL: rotl_maybe_zero:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movl %esi, %ecx
-; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    # kill: def $edi killed $edi def $rdi
 ; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-NEXT:    roll %cl, %eax
-; X64-NEXT:    testl %edi, %edi
-; X64-NEXT:    je .LBB27_1
-; X64-NEXT:  # %bb.2: # %cond.false
-; X64-NEXT:    rep bsfl %eax, %eax
-; X64-NEXT:    retq
-; X64-NEXT:  .LBB27_1:
-; X64-NEXT:    movl $32, %eax
+; X64-NEXT:    roll %cl, %edi
+; X64-NEXT:    movabsq $4294967296, %rax # imm = 0x100000000
+; X64-NEXT:    orq %rdi, %rax
+; X64-NEXT:    rep bsfq %rax, %rax
+; X64-NEXT:    # kill: def $eax killed $eax killed $rax
 ; X64-NEXT:    retq
   %shl = shl i32 %x, %y
   %sub = sub i32 32, %y
@@ -910,16 +867,13 @@ define i32 @rotl_with_fshl_maybe_zero(i32 %x, i32 %y) {
 ; X64-LABEL: rotl_with_fshl_maybe_zero:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movl %esi, %ecx
-; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    # kill: def $edi killed $edi def $rdi
 ; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-NEXT:    roll %cl, %eax
-; X64-NEXT:    testl %edi, %edi
-; X64-NEXT:    je .LBB29_1
-; X64-NEXT:  # %bb.2: # %cond.false
-; X64-NEXT:    rep bsfl %eax, %eax
-; X64-NEXT:    retq
-; X64-NEXT:  .LBB29_1:
-; X64-NEXT:    movl $32, %eax
+; X64-NEXT:    roll %cl, %edi
+; X64-NEXT:    movabsq $4294967296, %rax # imm = 0x100000000
+; X64-NEXT:    orq %rdi, %rax
+; X64-NEXT:    rep bsfq %rax, %rax
+; X64-NEXT:    # kill: def $eax killed $eax killed $rax
 ; X64-NEXT:    retq
   %z = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 %y)
   %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
@@ -989,16 +943,14 @@ define i32 @sra_maybe_zero(i32 %x, i32 %y) {
 ;
 ; X64-LABEL: sra_maybe_zero:
 ; X64:       # %bb.0:
+; X64-NEXT:    # kill: def $esi killed $esi def $rsi
 ; X64-NEXT:    movl %edi, %ecx
 ; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NEXT:    sarl %cl, %esi
-; X64-NEXT:    testl %esi, %esi
-; X64-NEXT:    je .LBB32_1
-; X64-NEXT:  # %bb.2: # %cond.false
-; X64-NEXT:    rep bsfl %esi, %eax
-; X64-NEXT:    retq
-; X64-NEXT:  .LBB32_1:
-; X64-NEXT:    movl $32, %eax
+; X64-NEXT:    movabsq $4294967296, %rax # imm = 0x100000000
+; X64-NEXT:    orq %rsi, %rax
+; X64-NEXT:    rep bsfq %rax, %rax
+; X64-NEXT:    # kill: def $eax killed $eax killed $rax
 ; X64-NEXT:    retq
   %z = ashr exact i32 %y, %x
   %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
@@ -1068,16 +1020,14 @@ define i32 @srl_maybe_zero(i32 %x, i32 %y) {
 ;
 ; X64-LABEL: srl_maybe_zero:
 ; X64:       # %bb.0:
+; X64-NEXT:    # kill: def $esi killed $esi def $rsi
 ; X64-NEXT:    movl %edi, %ecx
 ; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NEXT:    shrl %cl, %esi
-; X64-NEXT:    testl %esi, %esi
-; X64-NEXT:    je .LBB35_1
-; X64-NEXT:  # %bb.2: # %cond.false
-; X64-NEXT:    rep bsfl %esi, %eax
-; X64-NEXT:    retq
-; X64-NEXT:  .LBB35_1:
-; X64-NEXT:    movl $32, %eax
+; X64-NEXT:    movabsq $4294967296, %rax # imm = 0x100000000
+; X64-NEXT:    orq %rsi, %rax
+; X64-NEXT:    rep bsfq %rax, %rax
+; X64-NEXT:    # kill: def $eax killed $eax killed $rax
 ; X64-NEXT:    retq
   %z = lshr exact i32 %y, %x
   %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
@@ -1128,13 +1078,11 @@ define i32 @udiv_maybe_zero(i32 %x, i32 %y) {
 ; X64-NEXT:    movl %edi, %eax
 ; X64-NEXT:    xorl %edx, %edx
 ; X64-NEXT:    divl %esi
-; X64-NEXT:    testl %eax, %eax
-; X64-NEXT:    je .LBB37_1
-; X64-NEXT:  # %bb.2: # %cond.false
-; X64-NEXT:    rep bsfl %eax, %eax
-; X64-NEXT:    retq
-; X64-NEXT:  .LBB37_1:
-; X64-NEXT:    movl $32, %eax
+; X64-NEXT:    # kill: def $eax killed $eax def $rax
+; X64-NEXT:    movabsq $4294967296, %rcx # imm = 0x100000000
+; X64-NEXT:    orq %rax, %rcx
+; X64-NEXT:    rep bsfq %rcx, %rax
+; X64-NEXT:    # kill: def $eax killed $eax killed $rax
 ; X64-NEXT:    retq
   %z = udiv exact i32 %x, %y
   %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
@@ -1185,13 +1133,11 @@ define i32 @sdiv_maybe_zero(i32 %x, i32 %y) {
 ; X64-NEXT:    movl %edi, %eax
 ; X64-NEXT:    cltd
 ; X64-NEXT:    idivl %esi
-; X64-NEXT:    testl %eax, %eax
-; X64-NEXT:    je .LBB39_1
-; X64-NEXT:  # %bb.2: # %cond.false
-; X64-NEXT:    rep bsfl %eax, %eax
-; X64-NEXT:    retq
-; X64-NEXT:  .LBB39_1:
-; X64-NEXT:    movl $32, %eax
+; X64-NEXT:    # kill: def $eax killed $eax def $rax
+; X64-NEXT:    movabsq $4294967296, %rcx # imm = 0x100000000
+; X64-NEXT:    orq %rax, %rcx
+; X64-NEXT:    rep bsfq %rcx, %rax
+; X64-NEXT:    # kill: def $eax killed $eax killed $rax
 ; X64-NEXT:    retq
   %z = sdiv exact i32 %x, %y
   %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
@@ -1235,14 +1181,13 @@ define i32 @add_maybe_zero(i32 %xx, i32 %y) {
 ;
 ; X64-LABEL: add_maybe_zero:
 ; X64:       # %bb.0:
+; X64-NEXT:    # kill: def $edi killed $edi def $rdi
 ; X64-NEXT:    orl $1, %edi
 ; X64-NEXT:    addl %esi, %edi
-; X64-NEXT:    je .LBB41_1
-; X64-NEXT:  # %bb.2: # %cond.false
-; X64-NEXT:    rep bsfl %edi, %eax
-; X64-NEXT:    retq
-; X64-NEXT:  .LBB41_1:
-; X64-NEXT:    movl $32, %eax
+; X64-NEXT:    movabsq $4294967296, %rax # imm = 0x100000000
+; X64-NEXT:    orq %rdi, %rax
+; X64-NEXT:    rep bsfq %rax, %rax
+; X64-NEXT:    # kill: def $eax killed $eax killed $rax
 ; X64-NEXT:    retq
   %x = or i32 %xx, 1
   %z = add nsw i32 %x, %y
@@ -1321,12 +1266,10 @@ define i32 @sub_maybe_zero(i32 %x) {
 ; X64-NEXT:    movl %edi, %eax
 ; X64-NEXT:    orl $64, %eax
 ; X64-NEXT:    subl %edi, %eax
-; X64-NEXT:    je .LBB44_1
-; X64-NEXT:  # %bb.2: # %cond.false
-; X64-NEXT:    rep bsfl %eax, %eax
-; X64-NEXT:    retq
-; X64-NEXT:  .LBB44_1:
-; X64-NEXT:    movl $32, %eax
+; X64-NEXT:    movabsq $4294967296, %rcx # imm = 0x100000000
+; X64-NEXT:    orq %rax, %rcx
+; X64-NEXT:    rep bsfq %rcx, %rax
+; X64-NEXT:    # kill: def $eax killed $eax killed $rax
 ; X64-NEXT:    retq
   %y = or i32 %x, 64
   %z = sub i32 %y, %x
@@ -1349,13 +1292,12 @@ define i32 @sub_maybe_zero2(i32 %x) {
 ;
 ; X64-LABEL: sub_maybe_zero2:
 ; X64:       # %bb.0:
+; X64-NEXT:    # kill: def $edi killed $edi def $rdi
 ; X64-NEXT:    negl %edi
-; X64-NEXT:    je .LBB45_1
-; X64-NEXT:  # %bb.2: # %cond.false
-; X64-NEXT:    rep bsfl %edi, %eax
-; X64-NEXT:    retq
-; X64-NEXT:  .LBB45_1:
-; X64-NEXT:    movl $32, %eax
+; X64-NEXT:    movabsq $4294967296, %rax # imm = 0x100000000
+; X64-NEXT:    orq %rdi, %rax
+; X64-NEXT:    rep bsfq %rax, %rax
+; X64-NEXT:    # kill: def $eax killed $eax killed $rax
 ; X64-NEXT:    retq
   %z = sub i32 0, %x
   %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
@@ -1379,15 +1321,13 @@ define i32 @mul_known_nonzero_nsw(i32 %x, i32 %yy) {
 ;
 ; X64-LABEL: mul_known_nonzero_nsw:
 ; X64:       # %bb.0:
+; X64-NEXT:    # kill: def $esi killed $esi def $rsi
 ; X64-NEXT:    orl $256, %esi # imm = 0x100
 ; X64-NEXT:    imull %edi, %esi
-; X64-NEXT:    testl %esi, %esi
-; X64-NEXT:    je .LBB46_1
-; X64-NEXT:  # %bb.2: # %cond.false
-; X64-NEXT:    rep bsfl %esi, %eax
-; X64-NEXT:    retq
-; X64-NEXT:  .LBB46_1:
-; X64-NEXT:    movl $32, %eax
+; X64-NEXT:    movabsq $4294967296, %rax # imm = 0x100000000
+; X64-NEXT:    orq %rsi, %rax
+; X64-NEXT:    rep bsfq %rax, %rax
+; X64-NEXT:    # kill: def $eax killed $eax killed $rax
 ; X64-NEXT:    retq
   %y = or i32 %yy, 256
   %z = mul nsw i32 %y, %x
@@ -1412,15 +1352,13 @@ define i32 @mul_known_nonzero_nuw(i32 %x, i32 %yy) {
 ;
 ; X64-LABEL: mul_known_nonzero_nuw:
 ; X64:       # %bb.0:
+; X64-NEXT:    # kill: def $esi killed $esi def $rsi
 ; X64-NEXT:    orl $256, %esi # imm = 0x100
 ; X64-NEXT:    imull %edi, %esi
-; X64-NEXT:    testl %esi, %esi
-; X64-NEXT:    je .LBB47_1
-; X64-NEXT:  # %bb.2: # %cond.false
-; X64-NEXT:    rep bsfl %esi, %eax
-; X64-NEXT:    retq
-; X64-NEXT:  .LBB47_1:
-; X64-NEXT:    movl $32, %eax
+; X64-NEXT:    movabsq $4294967296, %rax # imm = 0x100000000
+; X64-NEXT:    orq %rsi, %rax
+; X64-NEXT:    rep bsfq %rax, %rax
+; X64-NEXT:    # kill: def $eax killed $eax killed $rax
 ; X64-NEXT:    retq
   %y = or i32 %yy, 256
   %z = mul nuw i32 %y, %x
@@ -1444,14 +1382,12 @@ define i32 @mul_maybe_zero(i32 %x, i32 %y) {
 ;
 ; X64-LABEL: mul_maybe_zero:
 ; X64:       # %bb.0:
+; X64-NEXT:    # kill: def $edi killed $edi def $rdi
 ; X64-NEXT:    imull %esi, %edi
-; X64-NEXT:    testl %edi, %edi
-; X64-NEXT:    je .LBB48_1
-; X64-NEXT:  # %bb.2: # %cond.false
-; X64-NEXT:    rep bsfl %edi, %eax
-; X64-NEXT:    retq
-; X64-NEXT:  .LBB48_1:
-; X64-NEXT:    movl $32, %eax
+; X64-NEXT:    movabsq $4294967296, %rax # imm = 0x100000000
+; X64-NEXT:    orq %rdi, %rax
+; X64-NEXT:    rep bsfq %rax, %rax
+; X64-NEXT:    # kill: def $eax killed $eax killed $rax
 ; X64-NEXT:    retq
   %z = mul nuw nsw i32 %y, %x
   %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
@@ -1482,9 +1418,10 @@ define i32 @bitcast_known_nonzero(<2 x i16> %xx) {
 ; X64-NEXT:    vpackusdw %xmm0, %xmm0, %xmm0
 ; X64-NEXT:    vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [256,256,u,u,u,u,u,u]
 ; X64-NEXT:    vmovd %xmm0, %eax
-; X64-NEXT:    bsfl %eax, %ecx
-; X64-NEXT:    movl $32, %eax
-; X64-NEXT:    cmovnel %ecx, %eax
+; X64-NEXT:    movabsq $4294967296, %rcx # imm = 0x100000000
+; X64-NEXT:    orq %rax, %rcx
+; X64-NEXT:    rep bsfq %rcx, %rax
+; X64-NEXT:    # kill: def $eax killed $eax killed $rax
 ; X64-NEXT:    retq
   %x = shl nuw nsw <2 x i16> <i16 256, i16 256>, %xx
   %z = bitcast <2 x i16> %x to i32
@@ -1508,13 +1445,10 @@ define i32 @bitcast_maybe_zero(<2 x i16> %x) {
 ; X64-LABEL: bitcast_maybe_zero:
 ; X64:       # %bb.0:
 ; X64-NEXT:    vmovd %xmm0, %eax
-; X64-NEXT:    testl %eax, %eax
-; X64-NEXT:    je .LBB50_1
-; X64-NEXT:  # %bb.2: # %cond.false
-; X64-NEXT:    rep bsfl %eax, %eax
-; X64-NEXT:    retq
-; X64-NEXT:  .LBB50_1:
-; X64-NEXT:    movl $32, %eax
+; X64-NEXT:    movabsq $4294967296, %rcx # imm = 0x100000000
+; X64-NEXT:    orq %rax, %rcx
+; X64-NEXT:    rep bsfq %rcx, %rax
+; X64-NEXT:    # kill: def $eax killed $eax killed $rax
 ; X64-NEXT:    retq
   %z = bitcast <2 x i16> %x to i32
   %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
@@ -1538,13 +1472,10 @@ define i32 @bitcast_from_float(float %x) {
 ; X64-LABEL: bitcast_from_float:
 ; X64:       # %bb.0:
 ; X64-NEXT:    vmovd %xmm0, %eax
-; X64-NEXT:    testl %eax, %eax
-; X64-NEXT:    je .LBB51_1
-; X64-NEXT:  # %bb.2: # %cond.false
-; X64-NEXT:    rep bsfl %eax, %eax
-; X64-NEXT:    retq
-; X64-NEXT:  .LBB51_1:
-; X64-NEXT:    movl $32, %eax
+; X64-NEXT:    movabsq $4294967296, %rcx # imm = 0x100000000
+; X64-NEXT:    orq %rax, %rcx
+; X64-NEXT:    rep bsfq %rcx, %rax
+; X64-NEXT:    # kill: def $eax killed $eax killed $rax
 ; X64-NEXT:    retq
   %z = bitcast float %x to i32
   %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
@@ -1592,14 +1523,11 @@ define i32 @zext_maybe_zero(i16 %x) {
 ;
 ; X64-LABEL: zext_maybe_zero:
 ; X64:       # %bb.0:
-; X64-NEXT:    testw %di, %di
-; X64-NEXT:    je .LBB53_1
-; X64-NEXT:  # %bb.2: # %cond.false
 ; X64-NEXT:    movzwl %di, %eax
-; X64-NEXT:    rep bsfl %eax, %eax
-; X64-NEXT:    retq
-; X64-NEXT:  .LBB53_1:
-; X64-NEXT:    movl $32, %eax
+; X64-NEXT:    movabsq $4294967296, %rcx # imm = 0x100000000
+; X64-NEXT:    orq %rax, %rcx
+; X64-NEXT:    rep bsfq %rcx, %rax
+; X64-NEXT:    # kill: def $eax killed $eax killed $rax
 ; X64-NEXT:    retq
   %z = zext i16 %x to i32
   %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
@@ -1646,14 +1574,11 @@ define i32 @sext_maybe_zero(i16 %x) {
 ;
 ; X64-LABEL: sext_maybe_zero:
 ; X64:       # %bb.0:
-; X64-NEXT:    testw %di, %di
-; X64-NEXT:    je .LBB55_1
-; X64-NEXT:  # %bb.2: # %cond.false
 ; X64-NEXT:    movswl %di, %eax
-; X64-NEXT:    rep bsfl %eax, %eax
-; X64-NEXT:    retq
-; X64-NEXT:  .LBB55_1:
-; X64-NEXT:    movl $32, %eax
+; X64-NEXT:    movabsq $4294967296, %rcx # imm = 0x100000000
+; X64-NEXT:    orq %rax, %rcx
+; X64-NEXT:    rep bsfq %rcx, %rax
+; X64-NEXT:    # kill: def $eax killed $eax killed $rax
 ; X64-NEXT:    retq
   %z = sext i16 %x to i32
   %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)

diff  --git a/llvm/test/CodeGen/X86/pr89877.ll b/llvm/test/CodeGen/X86/pr89877.ll
index 9820ec42f5b8cc..fdbe75b467d992 100644
--- a/llvm/test/CodeGen/X86/pr89877.ll
+++ b/llvm/test/CodeGen/X86/pr89877.ll
@@ -24,14 +24,11 @@ define i32 @sext_known_nonzero(i16 %xx) {
 ; X64-NEXT:    movl $256, %eax # imm = 0x100
 ; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NEXT:    shll %cl, %eax
-; X64-NEXT:    cwtl
-; X64-NEXT:    testl %eax, %eax
-; X64-NEXT:    je .LBB0_1
-; X64-NEXT:  # %bb.2: # %cond.false
-; X64-NEXT:    rep bsfl %eax, %eax
-; X64-NEXT:    retq
-; X64-NEXT:  .LBB0_1:
-; X64-NEXT:    movl $32, %eax
+; X64-NEXT:    movswq %ax, %rax
+; X64-NEXT:    movabsq $4294967296, %rcx # imm = 0x100000000
+; X64-NEXT:    orq %rax, %rcx
+; X64-NEXT:    rep bsfq %rcx, %rax
+; X64-NEXT:    # kill: def $eax killed $eax killed $rax
 ; X64-NEXT:    retq
   %x = shl i16 256, %xx
   %z = sext i16 %x to i32

diff  --git a/llvm/test/CodeGen/X86/pr90847.ll b/llvm/test/CodeGen/X86/pr90847.ll
index 7aa0ceb26e1acb..f2d43c3ed8d5bd 100644
--- a/llvm/test/CodeGen/X86/pr90847.ll
+++ b/llvm/test/CodeGen/X86/pr90847.ll
@@ -15,14 +15,10 @@ define i32 @PR90847(<8 x float> %x) nounwind {
 ; AVX1-NEXT:    vminps %ymm2, %ymm1, %ymm1
 ; AVX1-NEXT:    vcmpeqps %ymm0, %ymm1, %ymm0
 ; AVX1-NEXT:    vmovmskps %ymm0, %eax
-; AVX1-NEXT:    testl %eax, %eax
-; AVX1-NEXT:    je .LBB0_1
-; AVX1-NEXT:  # %bb.2: # %cond.false
-; AVX1-NEXT:    rep bsfl %eax, %eax
-; AVX1-NEXT:    vzeroupper
-; AVX1-NEXT:    retq
-; AVX1-NEXT:  .LBB0_1:
-; AVX1-NEXT:    movl $32, %eax
+; AVX1-NEXT:    movabsq $4294967296, %rcx # imm = 0x100000000
+; AVX1-NEXT:    orq %rax, %rcx
+; AVX1-NEXT:    rep bsfq %rcx, %rax
+; AVX1-NEXT:    # kill: def $eax killed $eax killed $rax
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
@@ -36,14 +32,10 @@ define i32 @PR90847(<8 x float> %x) nounwind {
 ; AVX2-NEXT:    vminps %ymm2, %ymm1, %ymm1
 ; AVX2-NEXT:    vcmpeqps %ymm0, %ymm1, %ymm0
 ; AVX2-NEXT:    vmovmskps %ymm0, %eax
-; AVX2-NEXT:    testl %eax, %eax
-; AVX2-NEXT:    je .LBB0_1
-; AVX2-NEXT:  # %bb.2: # %cond.false
-; AVX2-NEXT:    rep bsfl %eax, %eax
-; AVX2-NEXT:    vzeroupper
-; AVX2-NEXT:    retq
-; AVX2-NEXT:  .LBB0_1:
-; AVX2-NEXT:    movl $32, %eax
+; AVX2-NEXT:    movabsq $4294967296, %rcx # imm = 0x100000000
+; AVX2-NEXT:    orq %rax, %rcx
+; AVX2-NEXT:    rep bsfq %rcx, %rax
+; AVX2-NEXT:    # kill: def $eax killed $eax killed $rax
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 entry: