[llvm] c4d3eed - [X86] Fold nested select_cc to select (cmp*ge/le Cond0, Cond1), LHS, Y)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Thu Jun 24 03:42:59 PDT 2021
Author: Simon Pilgrim
Date: 2021-06-24T11:27:57+01:00
New Revision: c4d3eedc7f1a954ba3a21af5fc9d4f8ecb37a6ac
URL: https://github.com/llvm/llvm-project/commit/c4d3eedc7f1a954ba3a21af5fc9d4f8ecb37a6ac
DIFF: https://github.com/llvm/llvm-project/commit/c4d3eedc7f1a954ba3a21af5fc9d4f8ecb37a6ac.diff
LOG: [X86] Fold nested select_cc to select (cmp*ge/le Cond0, Cond1), LHS, Y)
select (cmpeq Cond0, Cond1), LHS, (select (cmpugt Cond0, Cond1), LHS, Y) --> (select (cmpuge Cond0, Cond1), LHS, Y)
etc,
We already perform this fold in DAGCombiner for MVT::i1 comparison results, but these can still appear after legalization (in x86 case with MVT::i8 results), where we need to be more careful about generating new comparison codes.
Pulled out of D101074 to help address the remaining regressions.
Differential Revision: https://reviews.llvm.org/D104707
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/sdiv_fix_sat.ll
llvm/test/CodeGen/X86/udiv_fix_sat.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 7a3a9f352aec..ccf90a2e5541 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -41836,6 +41836,36 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
return DAG.getSelect(DL, VT, Cond, LHS, RHS);
}
}
+
+ // Similar to DAGCombine's select(or(CC0,CC1),X,Y) fold but for legal types.
+ // fold eq + gt/lt nested selects into ge/le selects
+ // select (cmpeq Cond0, Cond1), LHS, (select (cmpugt Cond0, Cond1), LHS, Y)
+ // --> (select (cmpuge Cond0, Cond1), LHS, Y)
+ // select (cmpslt Cond0, Cond1), LHS, (select (cmpeq Cond0, Cond1), LHS, Y)
+ // --> (select (cmpsle Cond0, Cond1), LHS, Y)
+ // .. etc ..
+ if (RHS.getOpcode() == ISD::SELECT && RHS.getOperand(1) == LHS &&
+ RHS.getOperand(0).getOpcode() == ISD::SETCC) {
+ SDValue InnerSetCC = RHS.getOperand(0);
+ ISD::CondCode InnerCC =
+ cast<CondCodeSDNode>(InnerSetCC.getOperand(2))->get();
+ if ((CC == ISD::SETEQ || InnerCC == ISD::SETEQ) &&
+ Cond0 == InnerSetCC.getOperand(0) &&
+ Cond1 == InnerSetCC.getOperand(1)) {
+ ISD::CondCode NewCC;
+ switch (CC == ISD::SETEQ ? InnerCC : CC) {
+ case ISD::SETGT: NewCC = ISD::SETGE; break;
+ case ISD::SETLT: NewCC = ISD::SETLE; break;
+ case ISD::SETUGT: NewCC = ISD::SETUGE; break;
+ case ISD::SETULT: NewCC = ISD::SETULE; break;
+ default: NewCC = ISD::SETCC_INVALID; break;
+ }
+ if (NewCC != ISD::SETCC_INVALID) {
+ Cond = DAG.getSetCC(DL, CondVT, Cond0, Cond1, NewCC);
+ return DAG.getSelect(DL, VT, Cond, LHS, RHS.getOperand(2));
+ }
+ }
+ }
}
// Check if the first operand is all zeros and Cond type is vXi1.
diff --git a/llvm/test/CodeGen/X86/sdiv_fix_sat.ll b/llvm/test/CodeGen/X86/sdiv_fix_sat.ll
index 9801cb4018b9..f22812c14835 100644
--- a/llvm/test/CodeGen/X86/sdiv_fix_sat.ll
+++ b/llvm/test/CodeGen/X86/sdiv_fix_sat.ll
@@ -313,50 +313,48 @@ define i64 @func5(i64 %x, i64 %y) nounwind {
; X64-NEXT: movq %rsi, (%rsp) # 8-byte Spill
; X64-NEXT: movq %rdi, %r15
; X64-NEXT: leaq (%rdi,%rdi), %rax
-; X64-NEXT: movq %rdi, %rbx
-; X64-NEXT: sarq $63, %rbx
-; X64-NEXT: shldq $31, %rax, %rbx
-; X64-NEXT: shlq $32, %r15
-; X64-NEXT: movq %rsi, %r12
+; X64-NEXT: movq %rdi, %r12
; X64-NEXT: sarq $63, %r12
+; X64-NEXT: shldq $31, %rax, %r12
+; X64-NEXT: shlq $32, %r15
+; X64-NEXT: movq %rsi, %r13
+; X64-NEXT: sarq $63, %r13
; X64-NEXT: movq %r15, %rdi
-; X64-NEXT: movq %rbx, %rsi
-; X64-NEXT: movq %r12, %rcx
+; X64-NEXT: movq %r12, %rsi
+; X64-NEXT: movq %r13, %rcx
; X64-NEXT: callq __divti3 at PLT
-; X64-NEXT: movq %rax, %r13
+; X64-NEXT: movq %rax, %rbx
; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: movq %rdx, %rbp
; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT: subq $1, %r13
+; X64-NEXT: subq $1, %rbx
; X64-NEXT: sbbq $0, %rbp
-; X64-NEXT: testq %rbx, %rbx
-; X64-NEXT: sets %al
; X64-NEXT: testq %r12, %r12
+; X64-NEXT: sets %al
+; X64-NEXT: testq %r13, %r13
; X64-NEXT: sets %r14b
; X64-NEXT: xorb %al, %r14b
; X64-NEXT: movq %r15, %rdi
-; X64-NEXT: movq %rbx, %rsi
+; X64-NEXT: movq %r12, %rsi
; X64-NEXT: movq (%rsp), %rdx # 8-byte Reload
-; X64-NEXT: movq %r12, %rcx
+; X64-NEXT: movq %r13, %rcx
; X64-NEXT: callq __modti3 at PLT
; X64-NEXT: orq %rax, %rdx
; X64-NEXT: setne %al
; X64-NEXT: testb %r14b, %al
; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Folded Reload
-; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload
+; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Folded Reload
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: testq %rbp, %rbp
-; X64-NEXT: movq $-1, %rcx
-; X64-NEXT: movq $-1, %rdx
-; X64-NEXT: cmovsq %r13, %rdx
-; X64-NEXT: cmoveq %r13, %rdx
; X64-NEXT: cmovnsq %rax, %rbp
+; X64-NEXT: movq $-1, %rcx
+; X64-NEXT: cmovgq %rcx, %rbx
; X64-NEXT: testq %rbp, %rbp
; X64-NEXT: cmovnsq %rbp, %rcx
-; X64-NEXT: cmovnsq %rdx, %rax
-; X64-NEXT: cmpq $-1, %rbp
-; X64-NEXT: cmoveq %rdx, %rax
-; X64-NEXT: shrdq $1, %rcx, %rax
+; X64-NEXT: cmpq $-2, %rbp
+; X64-NEXT: cmovleq %rax, %rbx
+; X64-NEXT: shrdq $1, %rcx, %rbx
+; X64-NEXT: movq %rbx, %rax
; X64-NEXT: addq $24, %rsp
; X64-NEXT: popq %rbx
; X64-NEXT: popq %r12
@@ -402,18 +400,19 @@ define i64 @func5(i64 %x, i64 %y) nounwind {
; X86-NEXT: pushl %eax
; X86-NEXT: calll __divti3
; X86-NEXT: addl $32, %esp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: subl $1, %eax
-; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: sbbl $0, %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: subl $1, %esi
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: sbbl $0, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %edx, %eax
+; X86-NEXT: sbbl $0, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: sbbl $0, %ebx
@@ -446,51 +445,50 @@ define i64 @func5(i64 %x, i64 %y) nounwind {
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X86-NEXT: testl %ebx, %ebx
-; X86-NEXT: movl $0, %edx
-; X86-NEXT: cmovsl %ebx, %edx
-; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl $2147483647, %edx # imm = 0x7FFFFFFF
-; X86-NEXT: cmovsl %esi, %edx
-; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl $-1, %edx
-; X86-NEXT: cmovsl %eax, %edx
+; X86-NEXT: movl $0, %eax
+; X86-NEXT: cmovsl %ebx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
+; X86-NEXT: cmovsl %edx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl $-1, %eax
+; X86-NEXT: cmovsl %esi, %eax
; X86-NEXT: movl %ebx, %edi
; X86-NEXT: sarl $31, %edi
; X86-NEXT: andl %ecx, %edi
; X86-NEXT: testl %ebx, %ebx
; X86-NEXT: cmovel %ebx, %edi
-; X86-NEXT: cmpl $2147483647, %esi # imm = 0x7FFFFFFF
-; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
-; X86-NEXT: cmovael %eax, %esi
-; X86-NEXT: movl $-1, %eax
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT: cmovbl %ecx, %eax
-; X86-NEXT: cmovel %ecx, %eax
+; X86-NEXT: movl %edx, %ecx
+; X86-NEXT: cmpl $2147483647, %edx # imm = 0x7FFFFFFF
+; X86-NEXT: movl $2147483647, %edx # imm = 0x7FFFFFFF
+; X86-NEXT: cmovbl %ecx, %edx
+; X86-NEXT: testl %ecx, %ecx
+; X86-NEXT: movl $-1, %ecx
+; X86-NEXT: cmovsl %ecx, %esi
; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X86-NEXT: cmovnel %edx, %eax
-; X86-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X86-NEXT: cmpl $-2147483648, %esi # imm = 0x80000000
-; X86-NEXT: movl $0, %ecx
-; X86-NEXT: cmoval %eax, %ecx
-; X86-NEXT: cmovel %eax, %ecx
-; X86-NEXT: movl $-2147483648, %edx # imm = 0x80000000
-; X86-NEXT: cmoval %esi, %edx
+; X86-NEXT: cmovnel %eax, %esi
+; X86-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT: cmpl $-2147483648, %edx # imm = 0x80000000
+; X86-NEXT: movl $-2147483648, %eax # imm = 0x80000000
+; X86-NEXT: cmoval %edx, %eax
+; X86-NEXT: movl %edx, %ecx
+; X86-NEXT: sarl $31, %ecx
+; X86-NEXT: andl %esi, %ecx
; X86-NEXT: cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
-; X86-NEXT: movl $0, %ebx
-; X86-NEXT: cmovsl %ebx, %eax
; X86-NEXT: movl $-2147483648, %ebx # imm = 0x80000000
+; X86-NEXT: cmovsl %ebx, %edx
+; X86-NEXT: movl $0, %ebx
; X86-NEXT: cmovsl %ebx, %esi
; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X86-NEXT: cmpl $-1, %edi
-; X86-NEXT: cmovel %edx, %esi
-; X86-NEXT: cmovel %ecx, %eax
-; X86-NEXT: movl %esi, %edx
+; X86-NEXT: cmovel %ecx, %esi
+; X86-NEXT: cmovel %eax, %edx
+; X86-NEXT: movl %esi, %eax
; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
diff --git a/llvm/test/CodeGen/X86/udiv_fix_sat.ll b/llvm/test/CodeGen/X86/udiv_fix_sat.ll
index 67b0ea303848..91fa594f6554 100644
--- a/llvm/test/CodeGen/X86/udiv_fix_sat.ll
+++ b/llvm/test/CodeGen/X86/udiv_fix_sat.ll
@@ -170,27 +170,23 @@ define i4 @func4(i4 %x, i4 %y) nounwind {
define i64 @func5(i64 %x, i64 %y) nounwind {
; X64-LABEL: func5:
; X64: # %bb.0:
-; X64-NEXT: pushq %rbx
+; X64-NEXT: pushq %rax
; X64-NEXT: movq %rsi, %rdx
; X64-NEXT: leaq (%rdi,%rdi), %rsi
; X64-NEXT: movq %rdi, %rax
; X64-NEXT: shrq $63, %rax
; X64-NEXT: shrdq $33, %rax, %rsi
; X64-NEXT: shlq $32, %rdi
-; X64-NEXT: xorl %ebx, %ebx
; X64-NEXT: xorl %ecx, %ecx
; X64-NEXT: callq __udivti3 at PLT
+; X64-NEXT: cmpq $2, %rdx
+; X64-NEXT: movq $-1, %rcx
+; X64-NEXT: cmovbq %rax, %rcx
; X64-NEXT: cmpq $1, %rdx
-; X64-NEXT: sbbq %rbx, %rbx
-; X64-NEXT: notq %rbx
-; X64-NEXT: orq %rax, %rbx
-; X64-NEXT: cmpq $1, %rdx
-; X64-NEXT: movl $1, %ecx
-; X64-NEXT: cmovbq %rdx, %rcx
-; X64-NEXT: cmoveq %rax, %rbx
-; X64-NEXT: shrdq $1, %rcx, %rbx
-; X64-NEXT: movq %rbx, %rax
-; X64-NEXT: popq %rbx
+; X64-NEXT: movl $1, %eax
+; X64-NEXT: cmovbq %rdx, %rax
+; X64-NEXT: shldq $63, %rcx, %rax
+; X64-NEXT: popq %rcx
; X64-NEXT: retq
;
; X86-LABEL: func5:
@@ -218,17 +214,15 @@ define i64 @func5(i64 %x, i64 %y) nounwind {
; X86-NEXT: pushl %esi
; X86-NEXT: calll __udivti3
; X86-NEXT: addl $32, %esp
-; X86-NEXT: movl (%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl $-1, %eax
+; X86-NEXT: movl $-1, %edx
+; X86-NEXT: jne .LBB4_2
+; X86-NEXT: # %bb.1:
+; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: cmpl $-1, %edx
-; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: cmovel %edx, %eax
-; X86-NEXT: cmovel %ecx, %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: orl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl $-1, %ecx
-; X86-NEXT: cmovnel %ecx, %edx
-; X86-NEXT: cmovnel %ecx, %eax
+; X86-NEXT: .LBB4_2:
; X86-NEXT: leal -4(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %ebp
@@ -394,7 +388,10 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: subl $12, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: xorl %eax, %eax
; X86-NEXT: addl %ecx, %ecx
@@ -407,91 +404,72 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
; X86-NEXT: pushl %ecx
; X86-NEXT: calll __udivdi3
; X86-NEXT: addl $16, %esp
-; X86-NEXT: cmpl $1, %edx
-; X86-NEXT: movl $0, %ecx
-; X86-NEXT: sbbl %ecx, %ecx
-; X86-NEXT: notl %ecx
-; X86-NEXT: orl %eax, %ecx
-; X86-NEXT: movl %ecx, %esi
-; X86-NEXT: xorl %ecx, %ecx
-; X86-NEXT: addl %edi, %edi
-; X86-NEXT: setb %cl
-; X86-NEXT: cmpl $1, %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: movl $1, %ebp
-; X86-NEXT: cmovael %ebp, %edx
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl $1, %ebp
-; X86-NEXT: cmovel %eax, %esi
-; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: shldl $31, %edi, %ecx
-; X86-NEXT: shll $31, %edi
+; X86-NEXT: xorl %eax, %eax
+; X86-NEXT: addl %ebp, %ebp
+; X86-NEXT: setb %al
+; X86-NEXT: shldl $31, %ebp, %eax
+; X86-NEXT: shll $31, %ebp
; X86-NEXT: pushl $0
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %ecx
-; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %eax
+; X86-NEXT: pushl %ebp
; X86-NEXT: calll __udivdi3
; X86-NEXT: addl $16, %esp
-; X86-NEXT: cmpl $1, %edx
-; X86-NEXT: movl $0, %edi
-; X86-NEXT: sbbl %edi, %edi
-; X86-NEXT: notl %edi
-; X86-NEXT: orl %eax, %edi
-; X86-NEXT: xorl %ecx, %ecx
-; X86-NEXT: addl %ebx, %ebx
-; X86-NEXT: setb %cl
-; X86-NEXT: cmpl $1, %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: cmovael %ebp, %edx
-; X86-NEXT: movl %edx, (%esp) # 4-byte Spill
-; X86-NEXT: cmovel %eax, %edi
-; X86-NEXT: shldl $31, %ebx, %ecx
-; X86-NEXT: shll $31, %ebx
+; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
+; X86-NEXT: movl %edx, %ebp
+; X86-NEXT: xorl %eax, %eax
+; X86-NEXT: addl %edi, %edi
+; X86-NEXT: setb %al
+; X86-NEXT: shldl $31, %edi, %eax
+; X86-NEXT: shll $31, %edi
; X86-NEXT: pushl $0
; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %ecx
-; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %eax
+; X86-NEXT: pushl %edi
; X86-NEXT: calll __udivdi3
; X86-NEXT: addl $16, %esp
-; X86-NEXT: movl %edx, %ebx
-; X86-NEXT: cmpl $1, %edx
-; X86-NEXT: movl $0, %ebp
-; X86-NEXT: sbbl %ebp, %ebp
-; X86-NEXT: notl %ebp
-; X86-NEXT: orl %eax, %ebp
-; X86-NEXT: xorl %ecx, %ecx
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: movl %edx, %edi
+; X86-NEXT: xorl %eax, %eax
; X86-NEXT: addl %esi, %esi
-; X86-NEXT: setb %cl
-; X86-NEXT: cmpl $1, %edx
-; X86-NEXT: movl $1, %edx
-; X86-NEXT: cmovael %edx, %ebx
-; X86-NEXT: cmovel %eax, %ebp
-; X86-NEXT: shldl $31, %esi, %ecx
+; X86-NEXT: setb %al
+; X86-NEXT: shldl $31, %esi, %eax
; X86-NEXT: shll $31, %esi
; X86-NEXT: pushl $0
; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %ecx
+; X86-NEXT: pushl %eax
; X86-NEXT: pushl %esi
; X86-NEXT: calll __udivdi3
; X86-NEXT: addl $16, %esp
-; X86-NEXT: cmpl $1, %edx
-; X86-NEXT: movl $0, %ecx
-; X86-NEXT: sbbl %ecx, %ecx
-; X86-NEXT: notl %ecx
-; X86-NEXT: orl %eax, %ecx
+; X86-NEXT: cmpl $2, %edx
+; X86-NEXT: movl $-1, %ecx
+; X86-NEXT: cmovael %ecx, %eax
; X86-NEXT: cmpl $1, %edx
; X86-NEXT: movl $1, %esi
-; X86-NEXT: cmovbl %edx, %esi
-; X86-NEXT: cmovel %eax, %ecx
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: shrdl $1, %eax, %edx
+; X86-NEXT: cmovael %esi, %edx
+; X86-NEXT: shldl $31, %eax, %edx
+; X86-NEXT: cmpl $2, %edi
+; X86-NEXT: cmovael %ecx, %ebx
+; X86-NEXT: cmpl $1, %edi
+; X86-NEXT: cmovael %esi, %edi
+; X86-NEXT: shldl $31, %ebx, %edi
+; X86-NEXT: cmpl $2, %ebp
; X86-NEXT: movl (%esp), %eax # 4-byte Reload
-; X86-NEXT: shrdl $1, %eax, %edi
-; X86-NEXT: shrdl $1, %ebx, %ebp
-; X86-NEXT: shrdl $1, %esi, %ecx
+; X86-NEXT: cmovael %ecx, %eax
+; X86-NEXT: cmpl $1, %ebp
+; X86-NEXT: cmovael %esi, %ebp
+; X86-NEXT: shldl $31, %eax, %ebp
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT: cmpl $2, %ebx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: cmovael %ecx, %eax
+; X86-NEXT: cmpl $1, %ebx
+; X86-NEXT: cmovbl %ebx, %esi
+; X86-NEXT: shldl $31, %eax, %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl %ecx, 12(%eax)
+; X86-NEXT: movl %esi, 12(%eax)
; X86-NEXT: movl %ebp, 8(%eax)
; X86-NEXT: movl %edi, 4(%eax)
; X86-NEXT: movl %edx, (%eax)
More information about the llvm-commits
mailing list