[llvm-branch-commits] [llvm] 207f329 - [DAG] SimplifyDemandedBits - use KnownBits comparisons to remove ISD::UMIN/UMAX ops
Simon Pilgrim via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Mon Jan 18 02:34:56 PST 2021
Author: Simon Pilgrim
Date: 2021-01-18T10:29:23Z
New Revision: 207f32948b2408bebd5a523695f6f7c08049db74
URL: https://github.com/llvm/llvm-project/commit/207f32948b2408bebd5a523695f6f7c08049db74
DIFF: https://github.com/llvm/llvm-project/commit/207f32948b2408bebd5a523695f6f7c08049db74.diff
LOG: [DAG] SimplifyDemandedBits - use KnownBits comparisons to remove ISD::UMIN/UMAX ops
Use the KnownBits icmp comparisons to determine when a ISD::UMIN/UMAX op is unnecessary should either op be known to be ULT/ULE or UGT/UGE than the other.
Differential Revision: https://reviews.llvm.org/D94532
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
llvm/test/CodeGen/AMDGPU/r600-legalize-umax-bug.ll
llvm/test/CodeGen/X86/combine-umin.ll
llvm/test/CodeGen/X86/sdiv_fix_sat.ll
llvm/test/CodeGen/X86/udiv_fix_sat.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index e265bcea5945..ef83df8bdd96 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -4607,6 +4607,10 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
return DAG.getNode(AltOpcode, SDLoc(N), VT, N0, N1);
}
+ // Simplify the operands using demanded-bits information.
+ if (SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
return SDValue();
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 21953373b745..b19033e3e427 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -1722,6 +1722,32 @@ bool TargetLowering::SimplifyDemandedBits(
}
break;
}
+ case ISD::UMIN: {
+ // Check if one arg is always less than (or equal) to the other arg.
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1);
+ KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1);
+ Known = KnownBits::umin(Known0, Known1);
+ if (Optional<bool> IsULE = KnownBits::ule(Known0, Known1))
+ return TLO.CombineTo(Op, IsULE.getValue() ? Op0 : Op1);
+ if (Optional<bool> IsULT = KnownBits::ult(Known0, Known1))
+ return TLO.CombineTo(Op, IsULT.getValue() ? Op0 : Op1);
+ break;
+ }
+ case ISD::UMAX: {
+ // Check if one arg is always greater than (or equal) to the other arg.
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1);
+ KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1);
+ Known = KnownBits::umax(Known0, Known1);
+ if (Optional<bool> IsUGE = KnownBits::uge(Known0, Known1))
+ return TLO.CombineTo(Op, IsUGE.getValue() ? Op0 : Op1);
+ if (Optional<bool> IsUGT = KnownBits::ugt(Known0, Known1))
+ return TLO.CombineTo(Op, IsUGT.getValue() ? Op0 : Op1);
+ break;
+ }
case ISD::BITREVERSE: {
SDValue Src = Op.getOperand(0);
APInt DemandedSrcBits = DemandedBits.reverseBits();
diff --git a/llvm/test/CodeGen/AMDGPU/r600-legalize-umax-bug.ll b/llvm/test/CodeGen/AMDGPU/r600-legalize-umax-bug.ll
index b4cd36daad65..f0604c7fe782 100644
--- a/llvm/test/CodeGen/AMDGPU/r600-legalize-umax-bug.ll
+++ b/llvm/test/CodeGen/AMDGPU/r600-legalize-umax-bug.ll
@@ -18,7 +18,7 @@ define amdgpu_kernel void @test(i64 addrspace(1)* %out) {
; CHECK-NEXT: 2(2.802597e-45), 0(0.000000e+00)
; CHECK-NEXT: MOV * T0.W, KC0[2].Y,
; CHECK-NEXT: ALU clause starting at 11:
-; CHECK-NEXT: MAX_UINT T0.X, T0.X, literal.x,
+; CHECK-NEXT: MOV T0.X, literal.x,
; CHECK-NEXT: MOV T0.Y, 0.0,
; CHECK-NEXT: LSHR * T1.X, T0.W, literal.y,
; CHECK-NEXT: 4(5.605194e-45), 2(2.802597e-45)
diff --git a/llvm/test/CodeGen/X86/combine-umin.ll b/llvm/test/CodeGen/X86/combine-umin.ll
index b22c45bbce45..1be72ad66799 100644
--- a/llvm/test/CodeGen/X86/combine-umin.ll
+++ b/llvm/test/CodeGen/X86/combine-umin.ll
@@ -10,14 +10,9 @@
define i8 @test_demandedbits_umin_ult(i8 %a0, i8 %a1) {
; CHECK-LABEL: test_demandedbits_umin_ult:
; CHECK: # %bb.0:
-; CHECK-NEXT: orb $12, %dil
-; CHECK-NEXT: orb $4, %sil
-; CHECK-NEXT: andb $13, %dil
-; CHECK-NEXT: andb $12, %sil
-; CHECK-NEXT: movzbl %dil, %ecx
-; CHECK-NEXT: movzbl %sil, %eax
-; CHECK-NEXT: cmpb %al, %cl
-; CHECK-NEXT: cmovbl %ecx, %eax
+; CHECK-NEXT: movl %esi, %eax
+; CHECK-NEXT: orb $4, %al
+; CHECK-NEXT: andb $12, %al
; CHECK-NEXT: # kill: def $al killed $al killed $eax
; CHECK-NEXT: retq
%lhs0 = and i8 %a0, 13 ; b1101
diff --git a/llvm/test/CodeGen/X86/sdiv_fix_sat.ll b/llvm/test/CodeGen/X86/sdiv_fix_sat.ll
index 617d5d7876bd..9801cb4018b9 100644
--- a/llvm/test/CodeGen/X86/sdiv_fix_sat.ll
+++ b/llvm/test/CodeGen/X86/sdiv_fix_sat.ll
@@ -313,56 +313,50 @@ define i64 @func5(i64 %x, i64 %y) nounwind {
; X64-NEXT: movq %rsi, (%rsp) # 8-byte Spill
; X64-NEXT: movq %rdi, %r15
; X64-NEXT: leaq (%rdi,%rdi), %rax
-; X64-NEXT: movq %rdi, %r12
-; X64-NEXT: sarq $63, %r12
-; X64-NEXT: shldq $31, %rax, %r12
+; X64-NEXT: movq %rdi, %rbx
+; X64-NEXT: sarq $63, %rbx
+; X64-NEXT: shldq $31, %rax, %rbx
; X64-NEXT: shlq $32, %r15
-; X64-NEXT: movq %rsi, %r13
-; X64-NEXT: sarq $63, %r13
+; X64-NEXT: movq %rsi, %r12
+; X64-NEXT: sarq $63, %r12
; X64-NEXT: movq %r15, %rdi
-; X64-NEXT: movq %r12, %rsi
-; X64-NEXT: movq %r13, %rcx
+; X64-NEXT: movq %rbx, %rsi
+; X64-NEXT: movq %r12, %rcx
; X64-NEXT: callq __divti3 at PLT
-; X64-NEXT: movq %rax, %rbx
+; X64-NEXT: movq %rax, %r13
; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: movq %rdx, %rbp
; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT: subq $1, %rbx
+; X64-NEXT: subq $1, %r13
; X64-NEXT: sbbq $0, %rbp
-; X64-NEXT: testq %r12, %r12
+; X64-NEXT: testq %rbx, %rbx
; X64-NEXT: sets %al
-; X64-NEXT: testq %r13, %r13
+; X64-NEXT: testq %r12, %r12
; X64-NEXT: sets %r14b
; X64-NEXT: xorb %al, %r14b
; X64-NEXT: movq %r15, %rdi
-; X64-NEXT: movq %r12, %rsi
+; X64-NEXT: movq %rbx, %rsi
; X64-NEXT: movq (%rsp), %rdx # 8-byte Reload
-; X64-NEXT: movq %r13, %rcx
+; X64-NEXT: movq %r12, %rcx
; X64-NEXT: callq __modti3 at PLT
; X64-NEXT: orq %rax, %rdx
; X64-NEXT: setne %al
; X64-NEXT: testb %r14b, %al
; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Folded Reload
-; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Folded Reload
-; X64-NEXT: cmpq $-1, %rbx
-; X64-NEXT: movq $-1, %rax
-; X64-NEXT: movq $-1, %rcx
-; X64-NEXT: cmovbq %rbx, %rcx
-; X64-NEXT: xorl %edx, %edx
+; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload
+; X64-NEXT: xorl %eax, %eax
; X64-NEXT: testq %rbp, %rbp
-; X64-NEXT: cmovnsq %rax, %rbx
-; X64-NEXT: cmoveq %rcx, %rbx
-; X64-NEXT: cmovnsq %rdx, %rbp
-; X64-NEXT: testq %rbx, %rbx
-; X64-NEXT: movl $0, %ecx
-; X64-NEXT: cmovaq %rbx, %rcx
+; X64-NEXT: movq $-1, %rcx
+; X64-NEXT: movq $-1, %rdx
+; X64-NEXT: cmovsq %r13, %rdx
+; X64-NEXT: cmoveq %r13, %rdx
+; X64-NEXT: cmovnsq %rax, %rbp
; X64-NEXT: testq %rbp, %rbp
-; X64-NEXT: cmovnsq %rbp, %rax
-; X64-NEXT: cmovsq %rdx, %rbx
+; X64-NEXT: cmovnsq %rbp, %rcx
+; X64-NEXT: cmovnsq %rdx, %rax
; X64-NEXT: cmpq $-1, %rbp
-; X64-NEXT: cmoveq %rcx, %rbx
-; X64-NEXT: shrdq $1, %rax, %rbx
-; X64-NEXT: movq %rbx, %rax
+; X64-NEXT: cmoveq %rdx, %rax
+; X64-NEXT: shrdq $1, %rcx, %rax
; X64-NEXT: addq $24, %rsp
; X64-NEXT: popq %rbx
; X64-NEXT: popq %r12
@@ -383,12 +377,12 @@ define i64 @func5(i64 %x, i64 %y) nounwind {
; X86-NEXT: subl $88, %esp
; X86-NEXT: movl 8(%ebp), %ecx
; X86-NEXT: movl 12(%ebp), %eax
-; X86-NEXT: movl 20(%ebp), %ebx
-; X86-NEXT: sarl $31, %ebx
-; X86-NEXT: movl %eax, %edi
+; X86-NEXT: movl 20(%ebp), %edi
; X86-NEXT: sarl $31, %edi
-; X86-NEXT: movl %edi, %edx
-; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: sarl $31, %ebx
+; X86-NEXT: movl %ebx, %edx
+; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: shldl $31, %eax, %edx
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: shldl $31, %ecx, %eax
@@ -397,42 +391,42 @@ define i64 @func5(i64 %x, i64 %y) nounwind {
; X86-NEXT: shll $31, %ecx
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: pushl %ebx
-; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %edi
; X86-NEXT: pushl 20(%ebp)
; X86-NEXT: pushl 16(%ebp)
-; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edx
; X86-NEXT: pushl %esi
; X86-NEXT: pushl %ecx
; X86-NEXT: pushl %eax
; X86-NEXT: calll __divti3
; X86-NEXT: addl $32, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: subl $1, %esi
-; X86-NEXT: sbbl $0, %edi
+; X86-NEXT: subl $1, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: sbbl $0, %esi
+; X86-NEXT: movl %ecx, %eax
; X86-NEXT: sbbl $0, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: sbbl $0, %ebx
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X86-NEXT: testl %edx, %edx
+; X86-NEXT: testl %edi, %edi
; X86-NEXT: sets %al
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: testl %ecx, %ecx
-; X86-NEXT: sets %ah
-; X86-NEXT: xorb %al, %ah
-; X86-NEXT: movb %ah, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT: sets %dl
+; X86-NEXT: xorb %al, %dl
+; X86-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: pushl %edx
-; X86-NEXT: pushl %edx
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %edi
; X86-NEXT: pushl 20(%ebp)
; X86-NEXT: pushl 16(%ebp)
; X86-NEXT: pushl %ecx
@@ -449,59 +443,54 @@ define i64 @func5(i64 %x, i64 %y) nounwind {
; X86-NEXT: orl %eax, %ecx
; X86-NEXT: setne %al
; X86-NEXT: testb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X86-NEXT: testl %ebx, %ebx
-; X86-NEXT: movl $0, %ecx
-; X86-NEXT: cmovsl %ebx, %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl $-1, %ecx
-; X86-NEXT: cmovsl %esi, %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl $2147483647, %ecx # imm = 0x7FFFFFFF
-; X86-NEXT: cmovsl %edi, %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl %ebx, %edx
-; X86-NEXT: sarl $31, %edx
-; X86-NEXT: andl %eax, %edx
+; X86-NEXT: movl $0, %edx
+; X86-NEXT: cmovsl %ebx, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl $2147483647, %edx # imm = 0x7FFFFFFF
+; X86-NEXT: cmovsl %esi, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl $-1, %edx
+; X86-NEXT: cmovsl %eax, %edx
+; X86-NEXT: movl %ebx, %edi
+; X86-NEXT: sarl $31, %edi
+; X86-NEXT: andl %ecx, %edi
; X86-NEXT: testl %ebx, %ebx
-; X86-NEXT: cmovel %ebx, %edx
-; X86-NEXT: cmpl $-1, %esi
-; X86-NEXT: movl $-1, %eax
-; X86-NEXT: cmovbl %esi, %eax
-; X86-NEXT: cmpl $2147483647, %edi # imm = 0x7FFFFFFF
-; X86-NEXT: movl $-1, %ecx
-; X86-NEXT: cmovael %ecx, %esi
-; X86-NEXT: cmovel %eax, %esi
+; X86-NEXT: cmovel %ebx, %edi
+; X86-NEXT: cmpl $2147483647, %esi # imm = 0x7FFFFFFF
; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
-; X86-NEXT: cmovael %eax, %edi
+; X86-NEXT: cmovael %eax, %esi
+; X86-NEXT: movl $-1, %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: cmovbl %ecx, %eax
+; X86-NEXT: cmovel %ecx, %eax
; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X86-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT: cmovnel %edx, %eax
; X86-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X86-NEXT: testl %esi, %esi
-; X86-NEXT: movl $0, %eax
-; X86-NEXT: cmoval %esi, %eax
-; X86-NEXT: cmpl $-2147483648, %edi # imm = 0x80000000
+; X86-NEXT: cmpl $-2147483648, %esi # imm = 0x80000000
; X86-NEXT: movl $0, %ecx
-; X86-NEXT: cmoval %esi, %ecx
+; X86-NEXT: cmoval %eax, %ecx
; X86-NEXT: cmovel %eax, %ecx
-; X86-NEXT: movl $-2147483648, %eax # imm = 0x80000000
-; X86-NEXT: cmoval %edi, %eax
+; X86-NEXT: movl $-2147483648, %edx # imm = 0x80000000
+; X86-NEXT: cmoval %esi, %edx
; X86-NEXT: cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
-; X86-NEXT: movl $-2147483648, %ebx # imm = 0x80000000
-; X86-NEXT: cmovsl %ebx, %edi
; X86-NEXT: movl $0, %ebx
+; X86-NEXT: cmovsl %ebx, %eax
+; X86-NEXT: movl $-2147483648, %ebx # imm = 0x80000000
; X86-NEXT: cmovsl %ebx, %esi
-; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X86-NEXT: cmpl $-1, %edx
-; X86-NEXT: cmovel %ecx, %esi
-; X86-NEXT: cmovel %eax, %edi
-; X86-NEXT: movl %esi, %eax
-; X86-NEXT: movl %edi, %edx
+; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT: cmpl $-1, %edi
+; X86-NEXT: cmovel %edx, %esi
+; X86-NEXT: cmovel %ecx, %eax
+; X86-NEXT: movl %esi, %edx
; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
@@ -1054,8 +1043,8 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: sbbl $0, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl %edi, %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl %esi, %eax
; X86-NEXT: sbbl $0, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: testl %ebx, %ebx
@@ -1063,11 +1052,11 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
; X86-NEXT: cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
; X86-NEXT: sets %bh
; X86-NEXT: xorb %bl, %bh
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: orl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: orl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: orl %esi, %eax
+; X86-NEXT: orl %edi, %eax
; X86-NEXT: setne %al
; X86-NEXT: testb %bh, %al
; X86-NEXT: cmovel %edx, %ecx
@@ -1079,20 +1068,19 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: cmovel %edi, %eax
+; X86-NEXT: cmovel %esi, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: subl $1, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl %edi, %eax
-; X86-NEXT: sbbl $0, %eax
-; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %edi, %esi
+; X86-NEXT: sbbl $0, %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: sbbl $0, %eax
-; X86-NEXT: movl %eax, %esi
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: sbbl $0, %eax
@@ -1123,20 +1111,20 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
; X86-NEXT: orl %eax, %ecx
; X86-NEXT: setne %al
; X86-NEXT: testb %bl, %al
+; X86-NEXT: cmovel %edi, %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: cmovel %edi, %eax
+; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: subl $1, %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: subl $1, %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl %esi, %eax
; X86-NEXT: sbbl $0, %eax
@@ -1145,19 +1133,19 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: sbbl $0, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: sbbl $0, %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: sbbl $0, %edx
; X86-NEXT: cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
; X86-NEXT: sets %bl
; X86-NEXT: cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
; X86-NEXT: sets %bh
; X86-NEXT: xorb %bl, %bh
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: orl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: orl %eax, %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: orl %edi, %eax
; X86-NEXT: setne %al
; X86-NEXT: testb %bh, %al
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
@@ -1166,219 +1154,179 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X86-NEXT: testl %ecx, %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT: testl %edx, %edx
; X86-NEXT: movl $0, %eax
-; X86-NEXT: cmovsl %ecx, %eax
+; X86-NEXT: cmovsl %edx, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl $-1, %eax
-; X86-NEXT: cmovsl %edx, %eax
+; X86-NEXT: cmovsl %ecx, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, %edx
-; X86-NEXT: sarl $31, %edx
-; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl %edx, %esi
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: sarl $31, %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %ecx, %esi
; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X86-NEXT: testl %eax, %eax
; X86-NEXT: cmovel %eax, %esi
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl $0, %edx
-; X86-NEXT: cmovsl %eax, %edx
-; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl $0, %ecx
+; X86-NEXT: cmovsl %eax, %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl $-1, %eax
; X86-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, %ebx
-; X86-NEXT: sarl $31, %ebx
-; X86-NEXT: movl %ebx, %edx
-; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: sarl $31, %edi
+; X86-NEXT: movl %edi, %ecx
+; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X86-NEXT: testl %eax, %eax
-; X86-NEXT: cmovel %eax, %edx
-; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl $0, %edx
-; X86-NEXT: cmovsl %eax, %edx
-; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: cmovel %eax, %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl $0, %ecx
+; X86-NEXT: cmovsl %eax, %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl $-1, %eax
; X86-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, %edi
-; X86-NEXT: sarl $31, %edi
-; X86-NEXT: movl %edi, %edx
-; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: cmovel %eax, %edx
-; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl $0, %esi
-; X86-NEXT: cmovsl %eax, %esi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT: movl %esi, %ebx
+; X86-NEXT: sarl $31, %ebx
+; X86-NEXT: movl %ebx, %eax
+; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT: testl %esi, %esi
+; X86-NEXT: cmovel %esi, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl $0, %ecx
+; X86-NEXT: cmovsl %esi, %ecx
; X86-NEXT: movl $-1, %eax
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X86-NEXT: cmovsl %edx, %eax
+; X86-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %edx, %eax
; X86-NEXT: sarl $31, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X86-NEXT: testl %ecx, %ecx
-; X86-NEXT: cmovel %ecx, %eax
-; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl %edx, %ecx
-; X86-NEXT: cmpl $-1, %edx
-; X86-NEXT: movl $-1, %eax
-; X86-NEXT: cmovael %eax, %ecx
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X86-NEXT: cmpl $1, %edx
-; X86-NEXT: movl $0, %eax
-; X86-NEXT: sbbl %eax, %eax
-; X86-NEXT: notl %eax
-; X86-NEXT: orl %ecx, %eax
; X86-NEXT: testl %edx, %edx
-; X86-NEXT: movl $0, %ecx
-; X86-NEXT: cmovbl %edx, %ecx
-; X86-NEXT: andl %edx, %edi
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: cmovel %edx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: andl %eax, %ebx
+; X86-NEXT: cmpl $1, %eax
+; X86-NEXT: movl $0, %edx
+; X86-NEXT: sbbl %edx, %edx
+; X86-NEXT: notl %edx
; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X86-NEXT: cmovel %ecx, %edi
-; X86-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X86-NEXT: testl %eax, %eax
+; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT: movl $0, %esi
+; X86-NEXT: cmovel %esi, %ebx
+; X86-NEXT: cmpl $-1, %ebx
+; X86-NEXT: movl $0, %eax
+; X86-NEXT: cmovel %edx, %eax
+; X86-NEXT: testl %ecx, %ecx
+; X86-NEXT: cmovsl %esi, %edx
+; X86-NEXT: movl $-1, %esi
+; X86-NEXT: cmovsl %esi, %ebx
+; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT: cmpl $-1, %ecx
+; X86-NEXT: cmovnel %ebx, %ecx
+; X86-NEXT: cmovel %eax, %edx
+; X86-NEXT: shrdl $1, %ecx, %edx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: andl %eax, %edi
+; X86-NEXT: cmpl $1, %eax
; X86-NEXT: movl $0, %ecx
-; X86-NEXT: cmoval %eax, %ecx
+; X86-NEXT: sbbl %ecx, %ecx
+; X86-NEXT: notl %ecx
+; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT: movl $0, %esi
+; X86-NEXT: cmovel %esi, %edi
; X86-NEXT: cmpl $-1, %edi
-; X86-NEXT: movl $0, %edx
-; X86-NEXT: cmovnel %edx, %ecx
-; X86-NEXT: testl %esi, %esi
-; X86-NEXT: movl $-1, %edx
-; X86-NEXT: cmovsl %edx, %edi
-; X86-NEXT: movl $0, %edx
-; X86-NEXT: cmovsl %edx, %eax
-; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X86-NEXT: cmpl $-1, %esi
+; X86-NEXT: movl $0, %eax
; X86-NEXT: cmovel %ecx, %eax
-; X86-NEXT: cmovnel %edi, %esi
-; X86-NEXT: shldl $31, %eax, %esi
-; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT: testl %ebx, %ebx
+; X86-NEXT: cmovsl %esi, %ecx
+; X86-NEXT: movl $-1, %esi
+; X86-NEXT: cmovsl %esi, %edi
+; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-NEXT: cmpl $-1, %ebx
+; X86-NEXT: cmovnel %edi, %ebx
+; X86-NEXT: cmovel %eax, %ecx
+; X86-NEXT: shrdl $1, %ebx, %ecx
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT: andl %eax, %edi
+; X86-NEXT: cmpl $1, %eax
+; X86-NEXT: movl $0, %esi
+; X86-NEXT: sbbl %esi, %esi
+; X86-NEXT: notl %esi
+; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT: movl %edi, %eax
+; X86-NEXT: movl $0, %edi
+; X86-NEXT: cmovel %edi, %eax
; X86-NEXT: cmpl $-1, %eax
-; X86-NEXT: movl $-1, %ecx
-; X86-NEXT: cmovael %ecx, %eax
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X86-NEXT: cmpl $1, %esi
+; X86-NEXT: movl %eax, %edi
; X86-NEXT: movl $0, %eax
-; X86-NEXT: sbbl %eax, %eax
-; X86-NEXT: notl %eax
-; X86-NEXT: orl %ecx, %eax
-; X86-NEXT: testl %esi, %esi
-; X86-NEXT: movl $0, %ecx
-; X86-NEXT: cmovbl %esi, %ecx
-; X86-NEXT: andl %esi, %ebx
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X86-NEXT: cmovel %ecx, %ebx
-; X86-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: movl $0, %ecx
-; X86-NEXT: cmoval %eax, %ecx
+; X86-NEXT: cmovel %esi, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT: testl %ebx, %ebx
+; X86-NEXT: movl $0, %eax
+; X86-NEXT: cmovsl %eax, %esi
+; X86-NEXT: movl $-1, %eax
+; X86-NEXT: cmovsl %eax, %edi
+; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X86-NEXT: cmpl $-1, %ebx
+; X86-NEXT: cmovnel %edi, %ebx
+; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT: shrdl $1, %ebx, %esi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT: andl %eax, %ebx
+; X86-NEXT: cmpl $1, %eax
; X86-NEXT: movl $0, %edi
-; X86-NEXT: cmovnel %edi, %ecx
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X86-NEXT: testl %esi, %esi
-; X86-NEXT: movl $-1, %edx
-; X86-NEXT: cmovsl %edx, %ebx
-; X86-NEXT: cmovsl %edi, %eax
-; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X86-NEXT: cmpl $-1, %esi
-; X86-NEXT: cmovel %ecx, %eax
-; X86-NEXT: cmovnel %ebx, %esi
-; X86-NEXT: shldl $31, %eax, %esi
-; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: sbbl %edi, %edi
+; X86-NEXT: notl %edi
+; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT: movl %ebx, %eax
+; X86-NEXT: movl $0, %ebx
+; X86-NEXT: cmovel %ebx, %eax
; X86-NEXT: cmpl $-1, %eax
-; X86-NEXT: cmovael %edx, %eax
-; X86-NEXT: movl $-1, %ebx
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X86-NEXT: cmpl $1, %edx
-; X86-NEXT: movl $0, %eax
-; X86-NEXT: sbbl %eax, %eax
-; X86-NEXT: notl %eax
-; X86-NEXT: orl %ecx, %eax
-; X86-NEXT: testl %edx, %edx
-; X86-NEXT: movl $0, %ecx
-; X86-NEXT: cmovbl %edx, %ecx
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X86-NEXT: andl %edx, %edi
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X86-NEXT: cmovel %ecx, %edi
-; X86-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: movl $0, %ecx
-; X86-NEXT: cmoval %eax, %ecx
-; X86-NEXT: cmpl $-1, %edi
-; X86-NEXT: movl $0, %edx
-; X86-NEXT: cmovnel %edx, %ecx
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X86-NEXT: testl %esi, %esi
+; X86-NEXT: movl $0, %ebx
+; X86-NEXT: cmovel %edi, %ebx
+; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT: movl $0, %ebx
; X86-NEXT: cmovsl %ebx, %edi
-; X86-NEXT: cmovsl %edx, %eax
-; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X86-NEXT: cmpl $-1, %esi
-; X86-NEXT: cmovel %ecx, %eax
-; X86-NEXT: cmovnel %edi, %esi
-; X86-NEXT: shldl $31, %eax, %esi
-; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl $-1, %ebx
+; X86-NEXT: cmovsl %ebx, %eax
+; X86-NEXT: movl %eax, %ebx
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X86-NEXT: cmpl $-1, %eax
-; X86-NEXT: cmovael %ebx, %eax
-; X86-NEXT: movl $-1, %esi
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X86-NEXT: cmpl $1, %edx
-; X86-NEXT: movl $0, %eax
-; X86-NEXT: sbbl %eax, %eax
-; X86-NEXT: notl %eax
-; X86-NEXT: orl %ecx, %eax
-; X86-NEXT: testl %edx, %edx
-; X86-NEXT: movl $0, %ecx
-; X86-NEXT: cmovbl %edx, %ecx
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X86-NEXT: andl %edx, %ebx
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X86-NEXT: cmovel %ecx, %ebx
-; X86-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: movl $0, %ecx
-; X86-NEXT: cmoval %eax, %ecx
-; X86-NEXT: cmpl $-1, %ebx
-; X86-NEXT: movl $0, %edi
-; X86-NEXT: cmovnel %edi, %ecx
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X86-NEXT: testl %edx, %edx
-; X86-NEXT: cmovsl %esi, %ebx
-; X86-NEXT: movl %ebx, %esi
-; X86-NEXT: cmovsl %edi, %eax
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X86-NEXT: andl %edx, %ebx
-; X86-NEXT: cmpl $-1, %ebx
-; X86-NEXT: cmovel %ecx, %eax
-; X86-NEXT: cmovnel %esi, %ebx
-; X86-NEXT: shldl $31, %eax, %ebx
+; X86-NEXT: cmovnel %ebx, %eax
+; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT: shrdl $1, %eax, %edi
; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl %ebx, 12(%eax)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT: movl %ecx, 8(%eax)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl %edi, 12(%eax)
+; X86-NEXT: movl %esi, 8(%eax)
; X86-NEXT: movl %ecx, 4(%eax)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT: movl %ecx, (%eax)
+; X86-NEXT: movl %edx, (%eax)
; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
diff --git a/llvm/test/CodeGen/X86/udiv_fix_sat.ll b/llvm/test/CodeGen/X86/udiv_fix_sat.ll
index 2be51c3ccbba..0f295a0f7c2a 100644
--- a/llvm/test/CodeGen/X86/udiv_fix_sat.ll
+++ b/llvm/test/CodeGen/X86/udiv_fix_sat.ll
@@ -180,18 +180,15 @@ define i64 @func5(i64 %x, i64 %y) nounwind {
; X64-NEXT: xorl %ebx, %ebx
; X64-NEXT: xorl %ecx, %ecx
; X64-NEXT: callq __udivti3 at PLT
-; X64-NEXT: cmpq $-1, %rax
-; X64-NEXT: movq $-1, %rcx
-; X64-NEXT: cmovbq %rax, %rcx
; X64-NEXT: cmpq $1, %rdx
-; X64-NEXT: movl $1, %esi
-; X64-NEXT: cmovbq %rdx, %rsi
; X64-NEXT: sbbq %rbx, %rbx
; X64-NEXT: notq %rbx
; X64-NEXT: orq %rax, %rbx
; X64-NEXT: cmpq $1, %rdx
-; X64-NEXT: cmoveq %rcx, %rbx
-; X64-NEXT: shrdq $1, %rsi, %rbx
+; X64-NEXT: movl $1, %ecx
+; X64-NEXT: cmovbq %rdx, %rcx
+; X64-NEXT: cmoveq %rax, %rbx
+; X64-NEXT: shrdq $1, %rcx, %rbx
; X64-NEXT: movq %rbx, %rax
; X64-NEXT: popq %rbx
; X64-NEXT: retq
@@ -221,18 +218,15 @@ define i64 @func5(i64 %x, i64 %y) nounwind {
; X86-NEXT: pushl %esi
; X86-NEXT: calll __udivti3
; X86-NEXT: addl $32, %esp
-; X86-NEXT: movl (%esp), %eax
+; X86-NEXT: movl (%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: cmpl $-1, %eax
-; X86-NEXT: movl $-1, %ecx
-; X86-NEXT: movl $-1, %esi
-; X86-NEXT: cmovbl %eax, %esi
; X86-NEXT: cmpl $-1, %edx
+; X86-NEXT: movl %ecx, %eax
; X86-NEXT: cmovel %edx, %eax
-; X86-NEXT: cmovel %esi, %eax
-; X86-NEXT: cmovael %ecx, %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: orl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: cmovel %ecx, %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: orl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl $-1, %ecx
; X86-NEXT: cmovnel %ecx, %edx
; X86-NEXT: cmovnel %ecx, %eax
; X86-NEXT: leal -4(%ebp), %esp
@@ -400,8 +394,8 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $16, %esp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: subl $12, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: xorl %eax, %eax
; X86-NEXT: addl %ecx, %ecx
@@ -414,109 +408,95 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
; X86-NEXT: pushl %ecx
; X86-NEXT: calll __udivdi3
; X86-NEXT: addl $16, %esp
-; X86-NEXT: cmpl $-1, %eax
-; X86-NEXT: movl $-1, %ecx
-; X86-NEXT: cmovbl %eax, %ecx
; X86-NEXT: cmpl $1, %edx
-; X86-NEXT: movl $0, %edi
-; X86-NEXT: sbbl %edi, %edi
-; X86-NEXT: notl %edi
-; X86-NEXT: orl %eax, %edi
-; X86-NEXT: movl %edi, %ebx
-; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: addl %esi, %esi
-; X86-NEXT: setb %al
+; X86-NEXT: movl $0, %ecx
+; X86-NEXT: sbbl %ecx, %ecx
+; X86-NEXT: notl %ecx
+; X86-NEXT: orl %eax, %ecx
+; X86-NEXT: movl %ecx, %esi
+; X86-NEXT: xorl %ecx, %ecx
+; X86-NEXT: addl %edi, %edi
+; X86-NEXT: setb %cl
; X86-NEXT: cmpl $1, %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: cmovel %ecx, %ebx
-; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl $1, %ecx
-; X86-NEXT: cmovael %ecx, %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: movl $1, %ebp
+; X86-NEXT: cmovael %ebp, %edx
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: shldl $31, %esi, %eax
-; X86-NEXT: shll $31, %esi
-; X86-NEXT: pushl $0
-; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
-; X86-NEXT: pushl %esi
-; X86-NEXT: calll __udivdi3
-; X86-NEXT: addl $16, %esp
-; X86-NEXT: cmpl $-1, %eax
-; X86-NEXT: movl $-1, %ecx
-; X86-NEXT: cmovbl %eax, %ecx
-; X86-NEXT: cmpl $1, %edx
-; X86-NEXT: movl $1, %esi
-; X86-NEXT: cmovbl %edx, %esi
+; X86-NEXT: movl $1, %ebp
+; X86-NEXT: cmovel %eax, %esi
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl $0, %esi
-; X86-NEXT: sbbl %esi, %esi
-; X86-NEXT: notl %esi
-; X86-NEXT: orl %eax, %esi
-; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: addl %edi, %edi
-; X86-NEXT: setb %al
-; X86-NEXT: cmpl $1, %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: cmovel %ecx, %esi
-; X86-NEXT: shldl $31, %edi, %eax
+; X86-NEXT: shldl $31, %edi, %ecx
; X86-NEXT: shll $31, %edi
; X86-NEXT: pushl $0
; X86-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-NEXT: pushl %eax
+; X86-NEXT: pushl %ecx
; X86-NEXT: pushl %edi
; X86-NEXT: calll __udivdi3
; X86-NEXT: addl $16, %esp
-; X86-NEXT: cmpl $-1, %eax
-; X86-NEXT: movl $-1, %ebx
-; X86-NEXT: cmovbl %eax, %ebx
; X86-NEXT: cmpl $1, %edx
; X86-NEXT: movl $0, %edi
; X86-NEXT: sbbl %edi, %edi
; X86-NEXT: notl %edi
; X86-NEXT: orl %eax, %edi
; X86-NEXT: xorl %ecx, %ecx
-; X86-NEXT: addl %ebp, %ebp
+; X86-NEXT: addl %ebx, %ebx
; X86-NEXT: setb %cl
; X86-NEXT: cmpl $1, %edx
-; X86-NEXT: movl %edx, %eax
-; X86-NEXT: movl $1, %edx
-; X86-NEXT: cmovael %edx, %eax
-; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
-; X86-NEXT: cmovel %ebx, %edi
-; X86-NEXT: shldl $31, %ebp, %ecx
-; X86-NEXT: shll $31, %ebp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: cmovael %ebp, %edx
+; X86-NEXT: movl %edx, (%esp) # 4-byte Spill
+; X86-NEXT: cmovel %eax, %edi
+; X86-NEXT: shldl $31, %ebx, %ecx
+; X86-NEXT: shll $31, %ebx
; X86-NEXT: pushl $0
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl %ecx
-; X86-NEXT: pushl %ebp
+; X86-NEXT: pushl %ebx
; X86-NEXT: calll __udivdi3
; X86-NEXT: addl $16, %esp
-; X86-NEXT: cmpl $-1, %eax
-; X86-NEXT: movl $-1, %ecx
-; X86-NEXT: cmovbl %eax, %ecx
+; X86-NEXT: movl %edx, %ebx
; X86-NEXT: cmpl $1, %edx
-; X86-NEXT: movl $1, %ebx
-; X86-NEXT: cmovbl %edx, %ebx
; X86-NEXT: movl $0, %ebp
; X86-NEXT: sbbl %ebp, %ebp
; X86-NEXT: notl %ebp
; X86-NEXT: orl %eax, %ebp
+; X86-NEXT: xorl %ecx, %ecx
+; X86-NEXT: addl %esi, %esi
+; X86-NEXT: setb %cl
; X86-NEXT: cmpl $1, %edx
-; X86-NEXT: cmovel %ecx, %ebp
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: shrdl $1, %eax, %ecx
+; X86-NEXT: movl $1, %edx
+; X86-NEXT: cmovael %edx, %ebx
+; X86-NEXT: cmovel %eax, %ebp
+; X86-NEXT: shldl $31, %esi, %ecx
+; X86-NEXT: shll $31, %esi
+; X86-NEXT: pushl $0
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: pushl %ecx
+; X86-NEXT: pushl %esi
+; X86-NEXT: calll __udivdi3
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: cmpl $1, %edx
+; X86-NEXT: movl $0, %ecx
+; X86-NEXT: sbbl %ecx, %ecx
+; X86-NEXT: notl %ecx
+; X86-NEXT: orl %eax, %ecx
+; X86-NEXT: cmpl $1, %edx
+; X86-NEXT: movl $1, %esi
+; X86-NEXT: cmovbl %edx, %esi
+; X86-NEXT: cmovel %eax, %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: shrdl $1, %eax, %esi
+; X86-NEXT: shrdl $1, %eax, %edx
; X86-NEXT: movl (%esp), %eax # 4-byte Reload
; X86-NEXT: shrdl $1, %eax, %edi
; X86-NEXT: shrdl $1, %ebx, %ebp
+; X86-NEXT: shrdl $1, %esi, %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl %ebp, 12(%eax)
-; X86-NEXT: movl %edi, 8(%eax)
-; X86-NEXT: movl %esi, 4(%eax)
-; X86-NEXT: movl %ecx, (%eax)
-; X86-NEXT: addl $16, %esp
+; X86-NEXT: movl %ecx, 12(%eax)
+; X86-NEXT: movl %ebp, 8(%eax)
+; X86-NEXT: movl %edi, 4(%eax)
+; X86-NEXT: movl %edx, (%eax)
+; X86-NEXT: addl $12, %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
More information about the llvm-branch-commits
mailing list