[llvm] c4d3eed - [X86] Fold nested select_cc to select (cmp*ge/le Cond0, Cond1), LHS, Y)

Thu Jun 24 03:42:59 PDT 2021

Author: Simon Pilgrim
Date: 2021-06-24T11:27:57+01:00
New Revision: c4d3eedc7f1a954ba3a21af5fc9d4f8ecb37a6ac

URL: https://github.com/llvm/llvm-project/commit/c4d3eedc7f1a954ba3a21af5fc9d4f8ecb37a6ac
DIFF: https://github.com/llvm/llvm-project/commit/c4d3eedc7f1a954ba3a21af5fc9d4f8ecb37a6ac.diff

LOG: [X86] Fold nested select_cc to select (cmp*ge/le Cond0, Cond1), LHS, Y)

select (cmpeq Cond0, Cond1), LHS, (select (cmpugt Cond0, Cond1), LHS, Y) --> (select (cmpuge Cond0, Cond1), LHS, Y)
etc,

We already perform this fold in DAGCombiner for MVT::i1 comparison results, but these can still appear after legalization (in x86 case with MVT::i8 results), where we need to be more careful about generating new comparison codes.

Pulled out of D101074 to help address the remaining regressions.

Differential Revision: https://reviews.llvm.org/D104707

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ISelLowering.cpp
    llvm/test/CodeGen/X86/sdiv_fix_sat.ll
    llvm/test/CodeGen/X86/udiv_fix_sat.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 7a3a9f352aec..ccf90a2e5541 100644

--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -41836,6 +41836,36 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
         return DAG.getSelect(DL, VT, Cond, LHS, RHS);
       }
     }
+
+    // Similar to DAGCombine's select(or(CC0,CC1),X,Y) fold but for legal types.
+    // fold eq + gt/lt nested selects into ge/le selects
+    // select (cmpeq Cond0, Cond1), LHS, (select (cmpugt Cond0, Cond1), LHS, Y)
+    // --> (select (cmpuge Cond0, Cond1), LHS, Y)
+    // select (cmpslt Cond0, Cond1), LHS, (select (cmpeq Cond0, Cond1), LHS, Y)
+    // --> (select (cmpsle Cond0, Cond1), LHS, Y)
+    // .. etc ..
+    if (RHS.getOpcode() == ISD::SELECT && RHS.getOperand(1) == LHS &&
+        RHS.getOperand(0).getOpcode() == ISD::SETCC) {
+      SDValue InnerSetCC = RHS.getOperand(0);
+      ISD::CondCode InnerCC =
+          cast<CondCodeSDNode>(InnerSetCC.getOperand(2))->get();
+      if ((CC == ISD::SETEQ || InnerCC == ISD::SETEQ) &&
+          Cond0 == InnerSetCC.getOperand(0) &&
+          Cond1 == InnerSetCC.getOperand(1)) {
+        ISD::CondCode NewCC;
+        switch (CC == ISD::SETEQ ? InnerCC : CC) {
+        case ISD::SETGT:  NewCC = ISD::SETGE; break;
+        case ISD::SETLT:  NewCC = ISD::SETLE; break;
+        case ISD::SETUGT: NewCC = ISD::SETUGE; break;
+        case ISD::SETULT: NewCC = ISD::SETULE; break;
+        default: NewCC = ISD::SETCC_INVALID; break;
+        }
+        if (NewCC != ISD::SETCC_INVALID) {
+          Cond = DAG.getSetCC(DL, CondVT, Cond0, Cond1, NewCC);
+          return DAG.getSelect(DL, VT, Cond, LHS, RHS.getOperand(2));
+        }
+      }
+    }
   }
 
   // Check if the first operand is all zeros and Cond type is vXi1.

diff  --git a/llvm/test/CodeGen/X86/sdiv_fix_sat.ll b/llvm/test/CodeGen/X86/sdiv_fix_sat.ll
index 9801cb4018b9..f22812c14835 100644
--- a/llvm/test/CodeGen/X86/sdiv_fix_sat.ll
+++ b/llvm/test/CodeGen/X86/sdiv_fix_sat.ll
@@ -313,50 +313,48 @@ define i64 @func5(i64 %x, i64 %y) nounwind {
 ; X64-NEXT:    movq %rsi, (%rsp) # 8-byte Spill
 ; X64-NEXT:    movq %rdi, %r15
 ; X64-NEXT:    leaq (%rdi,%rdi), %rax
-; X64-NEXT:    movq %rdi, %rbx
-; X64-NEXT:    sarq $63, %rbx
-; X64-NEXT:    shldq $31, %rax, %rbx
-; X64-NEXT:    shlq $32, %r15
-; X64-NEXT:    movq %rsi, %r12
+; X64-NEXT:    movq %rdi, %r12
 ; X64-NEXT:    sarq $63, %r12
+; X64-NEXT:    shldq $31, %rax, %r12
+; X64-NEXT:    shlq $32, %r15
+; X64-NEXT:    movq %rsi, %r13
+; X64-NEXT:    sarq $63, %r13
 ; X64-NEXT:    movq %r15, %rdi
-; X64-NEXT:    movq %rbx, %rsi
-; X64-NEXT:    movq %r12, %rcx
+; X64-NEXT:    movq %r12, %rsi
+; X64-NEXT:    movq %r13, %rcx
 ; X64-NEXT:    callq __divti3 at PLT
-; X64-NEXT:    movq %rax, %r13
+; X64-NEXT:    movq %rax, %rbx
 ; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; X64-NEXT:    movq %rdx, %rbp
 ; X64-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT:    subq $1, %r13
+; X64-NEXT:    subq $1, %rbx
 ; X64-NEXT:    sbbq $0, %rbp
-; X64-NEXT:    testq %rbx, %rbx
-; X64-NEXT:    sets %al
 ; X64-NEXT:    testq %r12, %r12
+; X64-NEXT:    sets %al
+; X64-NEXT:    testq %r13, %r13
 ; X64-NEXT:    sets %r14b
 ; X64-NEXT:    xorb %al, %r14b
 ; X64-NEXT:    movq %r15, %rdi
-; X64-NEXT:    movq %rbx, %rsi
+; X64-NEXT:    movq %r12, %rsi
 ; X64-NEXT:    movq (%rsp), %rdx # 8-byte Reload
-; X64-NEXT:    movq %r12, %rcx
+; X64-NEXT:    movq %r13, %rcx
 ; X64-NEXT:    callq __modti3 at PLT
 ; X64-NEXT:    orq %rax, %rdx
 ; X64-NEXT:    setne %al
 ; X64-NEXT:    testb %r14b, %al
 ; X64-NEXT:    cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Folded Reload
-; X64-NEXT:    cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload
+; X64-NEXT:    cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Folded Reload
 ; X64-NEXT:    xorl %eax, %eax
 ; X64-NEXT:    testq %rbp, %rbp
-; X64-NEXT:    movq $-1, %rcx
-; X64-NEXT:    movq $-1, %rdx
-; X64-NEXT:    cmovsq %r13, %rdx
-; X64-NEXT:    cmoveq %r13, %rdx
 ; X64-NEXT:    cmovnsq %rax, %rbp
+; X64-NEXT:    movq $-1, %rcx
+; X64-NEXT:    cmovgq %rcx, %rbx
 ; X64-NEXT:    testq %rbp, %rbp
 ; X64-NEXT:    cmovnsq %rbp, %rcx
-; X64-NEXT:    cmovnsq %rdx, %rax
-; X64-NEXT:    cmpq $-1, %rbp
-; X64-NEXT:    cmoveq %rdx, %rax
-; X64-NEXT:    shrdq $1, %rcx, %rax
+; X64-NEXT:    cmpq $-2, %rbp
+; X64-NEXT:    cmovleq %rax, %rbx
+; X64-NEXT:    shrdq $1, %rcx, %rbx
+; X64-NEXT:    movq %rbx, %rax
 ; X64-NEXT:    addq $24, %rsp
 ; X64-NEXT:    popq %rbx
 ; X64-NEXT:    popq %r12
@@ -402,18 +400,19 @@ define i64 @func5(i64 %x, i64 %y) nounwind {
 ; X86-NEXT:    pushl %eax
 ; X86-NEXT:    calll __divti3
 ; X86-NEXT:    addl $32, %esp
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    subl $1, %eax
-; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    sbbl $0, %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    subl $1, %esi
 ; X86-NEXT:    movl %ecx, %eax
 ; X86-NEXT:    sbbl $0, %eax
 ; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:    sbbl $0, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
 ; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-NEXT:    sbbl $0, %ebx
@@ -446,51 +445,50 @@ define i64 @func5(i64 %x, i64 %y) nounwind {
 ; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
 ; X86-NEXT:    cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
 ; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT:    cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-NEXT:    cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
 ; X86-NEXT:    cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
 ; X86-NEXT:    testl %ebx, %ebx
-; X86-NEXT:    movl $0, %edx
-; X86-NEXT:    cmovsl %ebx, %edx
-; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    movl $2147483647, %edx # imm = 0x7FFFFFFF
-; X86-NEXT:    cmovsl %esi, %edx
-; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    movl $-1, %edx
-; X86-NEXT:    cmovsl %eax, %edx
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    cmovsl %ebx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $2147483647, %eax # imm = 0x7FFFFFFF
+; X86-NEXT:    cmovsl %edx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $-1, %eax
+; X86-NEXT:    cmovsl %esi, %eax
 ; X86-NEXT:    movl %ebx, %edi
 ; X86-NEXT:    sarl $31, %edi
 ; X86-NEXT:    andl %ecx, %edi
 ; X86-NEXT:    testl %ebx, %ebx
 ; X86-NEXT:    cmovel %ebx, %edi
-; X86-NEXT:    cmpl $2147483647, %esi # imm = 0x7FFFFFFF
-; X86-NEXT:    movl $2147483647, %eax # imm = 0x7FFFFFFF
-; X86-NEXT:    cmovael %eax, %esi
-; X86-NEXT:    movl $-1, %eax
-; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT:    cmovbl %ecx, %eax
-; X86-NEXT:    cmovel %ecx, %eax
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    cmpl $2147483647, %edx # imm = 0x7FFFFFFF
+; X86-NEXT:    movl $2147483647, %edx # imm = 0x7FFFFFFF
+; X86-NEXT:    cmovbl %ecx, %edx
+; X86-NEXT:    testl %ecx, %ecx
+; X86-NEXT:    movl $-1, %ecx
+; X86-NEXT:    cmovsl %ecx, %esi
 ; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X86-NEXT:    cmovnel %edx, %eax
-; X86-NEXT:    cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X86-NEXT:    cmpl $-2147483648, %esi # imm = 0x80000000
-; X86-NEXT:    movl $0, %ecx
-; X86-NEXT:    cmoval %eax, %ecx
-; X86-NEXT:    cmovel %eax, %ecx
-; X86-NEXT:    movl $-2147483648, %edx # imm = 0x80000000
-; X86-NEXT:    cmoval %esi, %edx
+; X86-NEXT:    cmovnel %eax, %esi
+; X86-NEXT:    cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    cmpl $-2147483648, %edx # imm = 0x80000000
+; X86-NEXT:    movl $-2147483648, %eax # imm = 0x80000000
+; X86-NEXT:    cmoval %edx, %eax
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    sarl $31, %ecx
+; X86-NEXT:    andl %esi, %ecx
 ; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
-; X86-NEXT:    movl $0, %ebx
-; X86-NEXT:    cmovsl %ebx, %eax
 ; X86-NEXT:    movl $-2147483648, %ebx # imm = 0x80000000
+; X86-NEXT:    cmovsl %ebx, %edx
+; X86-NEXT:    movl $0, %ebx
 ; X86-NEXT:    cmovsl %ebx, %esi
 ; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
 ; X86-NEXT:    cmpl $-1, %edi
-; X86-NEXT:    cmovel %edx, %esi
-; X86-NEXT:    cmovel %ecx, %eax
-; X86-NEXT:    movl %esi, %edx
+; X86-NEXT:    cmovel %ecx, %esi
+; X86-NEXT:    cmovel %eax, %edx
+; X86-NEXT:    movl %esi, %eax
 ; X86-NEXT:    leal -12(%ebp), %esp
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    popl %edi

diff  --git a/llvm/test/CodeGen/X86/udiv_fix_sat.ll b/llvm/test/CodeGen/X86/udiv_fix_sat.ll
index 67b0ea303848..91fa594f6554 100644
--- a/llvm/test/CodeGen/X86/udiv_fix_sat.ll
+++ b/llvm/test/CodeGen/X86/udiv_fix_sat.ll
@@ -170,27 +170,23 @@ define i4 @func4(i4 %x, i4 %y) nounwind {
 define i64 @func5(i64 %x, i64 %y) nounwind {
 ; X64-LABEL: func5:
 ; X64:       # %bb.0:
-; X64-NEXT:    pushq %rbx
+; X64-NEXT:    pushq %rax
 ; X64-NEXT:    movq %rsi, %rdx
 ; X64-NEXT:    leaq (%rdi,%rdi), %rsi
 ; X64-NEXT:    movq %rdi, %rax
 ; X64-NEXT:    shrq $63, %rax
 ; X64-NEXT:    shrdq $33, %rax, %rsi
 ; X64-NEXT:    shlq $32, %rdi
-; X64-NEXT:    xorl %ebx, %ebx
 ; X64-NEXT:    xorl %ecx, %ecx
 ; X64-NEXT:    callq __udivti3 at PLT
+; X64-NEXT:    cmpq $2, %rdx
+; X64-NEXT:    movq $-1, %rcx
+; X64-NEXT:    cmovbq %rax, %rcx
 ; X64-NEXT:    cmpq $1, %rdx
-; X64-NEXT:    sbbq %rbx, %rbx
-; X64-NEXT:    notq %rbx
-; X64-NEXT:    orq %rax, %rbx
-; X64-NEXT:    cmpq $1, %rdx
-; X64-NEXT:    movl $1, %ecx
-; X64-NEXT:    cmovbq %rdx, %rcx
-; X64-NEXT:    cmoveq %rax, %rbx
-; X64-NEXT:    shrdq $1, %rcx, %rbx
-; X64-NEXT:    movq %rbx, %rax
-; X64-NEXT:    popq %rbx
+; X64-NEXT:    movl $1, %eax
+; X64-NEXT:    cmovbq %rdx, %rax
+; X64-NEXT:    shldq $63, %rcx, %rax
+; X64-NEXT:    popq %rcx
 ; X64-NEXT:    retq
 ;
 ; X86-LABEL: func5:
@@ -218,17 +214,15 @@ define i64 @func5(i64 %x, i64 %y) nounwind {
 ; X86-NEXT:    pushl %esi
 ; X86-NEXT:    calll __udivti3
 ; X86-NEXT:    addl $32, %esp
-; X86-NEXT:    movl (%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    orl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl $-1, %eax
+; X86-NEXT:    movl $-1, %edx
+; X86-NEXT:    jne .LBB4_2
+; X86-NEXT:  # %bb.1:
+; X86-NEXT:    movl (%esp), %eax
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT:    cmpl $-1, %edx
-; X86-NEXT:    movl %ecx, %eax
-; X86-NEXT:    cmovel %edx, %eax
-; X86-NEXT:    cmovel %ecx, %eax
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    orl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movl $-1, %ecx
-; X86-NEXT:    cmovnel %ecx, %edx
-; X86-NEXT:    cmovnel %ecx, %eax
+; X86-NEXT:  .LBB4_2:
 ; X86-NEXT:    leal -4(%ebp), %esp
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    popl %ebp
@@ -394,7 +388,10 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
 ; X86-NEXT:    pushl %edi
 ; X86-NEXT:    pushl %esi
 ; X86-NEXT:    subl $12, %esp
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    xorl %eax, %eax
 ; X86-NEXT:    addl %ecx, %ecx
@@ -407,91 +404,72 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
 ; X86-NEXT:    pushl %ecx
 ; X86-NEXT:    calll __udivdi3
 ; X86-NEXT:    addl $16, %esp
-; X86-NEXT:    cmpl $1, %edx
-; X86-NEXT:    movl $0, %ecx
-; X86-NEXT:    sbbl %ecx, %ecx
-; X86-NEXT:    notl %ecx
-; X86-NEXT:    orl %eax, %ecx
-; X86-NEXT:    movl %ecx, %esi
-; X86-NEXT:    xorl %ecx, %ecx
-; X86-NEXT:    addl %edi, %edi
-; X86-NEXT:    setb %cl
-; X86-NEXT:    cmpl $1, %edx
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT:    movl $1, %ebp
-; X86-NEXT:    cmovael %ebp, %edx
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    movl $1, %ebp
-; X86-NEXT:    cmovel %eax, %esi
-; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    shldl $31, %edi, %ecx
-; X86-NEXT:    shll $31, %edi
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    addl %ebp, %ebp
+; X86-NEXT:    setb %al
+; X86-NEXT:    shldl $31, %ebp, %eax
+; X86-NEXT:    shll $31, %ebp
 ; X86-NEXT:    pushl $0
-; X86-NEXT:    pushl {{[0-9]+}}(%esp)
-; X86-NEXT:    pushl %ecx
-; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %eax
+; X86-NEXT:    pushl %ebp
 ; X86-NEXT:    calll __udivdi3
 ; X86-NEXT:    addl $16, %esp
-; X86-NEXT:    cmpl $1, %edx
-; X86-NEXT:    movl $0, %edi
-; X86-NEXT:    sbbl %edi, %edi
-; X86-NEXT:    notl %edi
-; X86-NEXT:    orl %eax, %edi
-; X86-NEXT:    xorl %ecx, %ecx
-; X86-NEXT:    addl %ebx, %ebx
-; X86-NEXT:    setb %cl
-; X86-NEXT:    cmpl $1, %edx
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT:    cmovael %ebp, %edx
-; X86-NEXT:    movl %edx, (%esp) # 4-byte Spill
-; X86-NEXT:    cmovel %eax, %edi
-; X86-NEXT:    shldl $31, %ebx, %ecx
-; X86-NEXT:    shll $31, %ebx
+; X86-NEXT:    movl %eax, (%esp) # 4-byte Spill
+; X86-NEXT:    movl %edx, %ebp
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    addl %edi, %edi
+; X86-NEXT:    setb %al
+; X86-NEXT:    shldl $31, %edi, %eax
+; X86-NEXT:    shll $31, %edi
 ; X86-NEXT:    pushl $0
 ; X86-NEXT:    pushl {{[0-9]+}}(%esp)
-; X86-NEXT:    pushl %ecx
-; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %eax
+; X86-NEXT:    pushl %edi
 ; X86-NEXT:    calll __udivdi3
 ; X86-NEXT:    addl $16, %esp
-; X86-NEXT:    movl %edx, %ebx
-; X86-NEXT:    cmpl $1, %edx
-; X86-NEXT:    movl $0, %ebp
-; X86-NEXT:    sbbl %ebp, %ebp
-; X86-NEXT:    notl %ebp
-; X86-NEXT:    orl %eax, %ebp
-; X86-NEXT:    xorl %ecx, %ecx
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:    xorl %eax, %eax
 ; X86-NEXT:    addl %esi, %esi
-; X86-NEXT:    setb %cl
-; X86-NEXT:    cmpl $1, %edx
-; X86-NEXT:    movl $1, %edx
-; X86-NEXT:    cmovael %edx, %ebx
-; X86-NEXT:    cmovel %eax, %ebp
-; X86-NEXT:    shldl $31, %esi, %ecx
+; X86-NEXT:    setb %al
+; X86-NEXT:    shldl $31, %esi, %eax
 ; X86-NEXT:    shll $31, %esi
 ; X86-NEXT:    pushl $0
 ; X86-NEXT:    pushl {{[0-9]+}}(%esp)
-; X86-NEXT:    pushl %ecx
+; X86-NEXT:    pushl %eax
 ; X86-NEXT:    pushl %esi
 ; X86-NEXT:    calll __udivdi3
 ; X86-NEXT:    addl $16, %esp
-; X86-NEXT:    cmpl $1, %edx
-; X86-NEXT:    movl $0, %ecx
-; X86-NEXT:    sbbl %ecx, %ecx
-; X86-NEXT:    notl %ecx
-; X86-NEXT:    orl %eax, %ecx
+; X86-NEXT:    cmpl $2, %edx
+; X86-NEXT:    movl $-1, %ecx
+; X86-NEXT:    cmovael %ecx, %eax
 ; X86-NEXT:    cmpl $1, %edx
 ; X86-NEXT:    movl $1, %esi
-; X86-NEXT:    cmovbl %edx, %esi
-; X86-NEXT:    cmovel %eax, %ecx
-; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT:    shrdl $1, %eax, %edx
+; X86-NEXT:    cmovael %esi, %edx
+; X86-NEXT:    shldl $31, %eax, %edx
+; X86-NEXT:    cmpl $2, %edi
+; X86-NEXT:    cmovael %ecx, %ebx
+; X86-NEXT:    cmpl $1, %edi
+; X86-NEXT:    cmovael %esi, %edi
+; X86-NEXT:    shldl $31, %ebx, %edi
+; X86-NEXT:    cmpl $2, %ebp
 ; X86-NEXT:    movl (%esp), %eax # 4-byte Reload
-; X86-NEXT:    shrdl $1, %eax, %edi
-; X86-NEXT:    shrdl $1, %ebx, %ebp
-; X86-NEXT:    shrdl $1, %esi, %ecx
+; X86-NEXT:    cmovael %ecx, %eax
+; X86-NEXT:    cmpl $1, %ebp
+; X86-NEXT:    cmovael %esi, %ebp
+; X86-NEXT:    shldl $31, %eax, %ebp
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    cmpl $2, %ebx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    cmovael %ecx, %eax
+; X86-NEXT:    cmpl $1, %ebx
+; X86-NEXT:    cmovbl %ebx, %esi
+; X86-NEXT:    shldl $31, %eax, %esi
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movl %ecx, 12(%eax)
+; X86-NEXT:    movl %esi, 12(%eax)
 ; X86-NEXT:    movl %ebp, 8(%eax)
 ; X86-NEXT:    movl %edi, 4(%eax)
 ; X86-NEXT:    movl %edx, (%eax)