[llvm] 005173c - [X86] `X86TargetLowering`: override `allowsMemoryAccess()`
Roman Lebedev via llvm-commits
llvm-commits at lists.llvm.org
Sat Jan 21 13:18:25 PST 2023
Author: Roman Lebedev
Date: 2023-01-22T00:12:28+03:00
New Revision: 005173cbb609f79adc2018e378bc6897cf84b06d
URL: https://github.com/llvm/llvm-project/commit/005173cbb609f79adc2018e378bc6897cf84b06d
DIFF: https://github.com/llvm/llvm-project/commit/005173cbb609f79adc2018e378bc6897cf84b06d.diff
LOG: [X86] `X86TargetLowering`: override `allowsMemoryAccess()`
The baseline `allowsMemoryAccess()` is wrong for X86.
It assumes that aligned memory operations are always allowed,
but that is not true.
For example, We can not perform a 32-byte aligned non-temporal load
of a 32-byte vector, without AVX2 that is, yet `allowsMemoryAccess()`
will say it is allowed, so we may end up merging non-temporal loads,
only to split them up to legalize them, and here we go again.
NOTE: the test changes here are superfluous. The main effect is that without this change,
in D141777, we'd get stuck endlessly merging and splitting non-temporal stores.
Reviewed By: RKSimon
Differential Revision: https://reviews.llvm.org/D141776
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/lib/Target/X86/X86ISelLowering.h
llvm/test/CodeGen/X86/add-sub-bool.ll
llvm/test/CodeGen/X86/bswap-wide-int.ll
llvm/test/CodeGen/X86/fshl.ll
llvm/test/CodeGen/X86/fshr.ll
llvm/test/CodeGen/X86/i128-add.ll
llvm/test/CodeGen/X86/icmp-shift-opt.ll
llvm/test/CodeGen/X86/legalize-shl-vec.ll
llvm/test/CodeGen/X86/merge-consecutive-stores-nt.ll
llvm/test/CodeGen/X86/setcc-wide-types.ll
llvm/test/CodeGen/X86/smin.ll
llvm/test/CodeGen/X86/smul-with-overflow.ll
llvm/test/CodeGen/X86/smulo-128-legalisation-lowering.ll
llvm/test/CodeGen/X86/umin.ll
llvm/test/CodeGen/X86/umul-with-overflow.ll
llvm/test/CodeGen/X86/wide-integer-cmp.ll
llvm/test/CodeGen/X86/xaluo128.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index f999e27c9758..8ffc494501b2 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -2730,24 +2730,30 @@ bool X86TargetLowering::isSafeMemOpType(MVT VT) const {
return true;
}
+static bool isBitAligned(Align Alignment, uint64_t SizeInBits) {
+ return (8 * Alignment.value()) % SizeInBits == 0;
+}
+
+bool X86TargetLowering::isMemoryAccessFast(EVT VT, Align Alignment) const {
+ if (isBitAligned(Alignment, VT.getSizeInBits()))
+ return true;
+ switch (VT.getSizeInBits()) {
+ default:
+ // 8-byte and under are always assumed to be fast.
+ return true;
+ case 128:
+ return !Subtarget.isUnalignedMem16Slow();
+ case 256:
+ return !Subtarget.isUnalignedMem32Slow();
+ // TODO: What about AVX-512 (512-bit) accesses?
+ }
+}
+
bool X86TargetLowering::allowsMisalignedMemoryAccesses(
EVT VT, unsigned, Align Alignment, MachineMemOperand::Flags Flags,
unsigned *Fast) const {
- if (Fast) {
- switch (VT.getSizeInBits()) {
- default:
- // 8-byte and under are always assumed to be fast.
- *Fast = 1;
- break;
- case 128:
- *Fast = !Subtarget.isUnalignedMem16Slow();
- break;
- case 256:
- *Fast = !Subtarget.isUnalignedMem32Slow();
- break;
- // TODO: What about AVX-512 (512-bit) accesses?
- }
- }
+ if (Fast)
+ *Fast = isMemoryAccessFast(VT, Alignment);
// NonTemporal vector memory ops must be aligned.
if (!!(Flags & MachineMemOperand::MONonTemporal) && VT.isVector()) {
// NT loads can only be vector aligned, so if its less aligned than the
@@ -2762,6 +2768,44 @@ bool X86TargetLowering::allowsMisalignedMemoryAccesses(
return true;
}
+bool X86TargetLowering::allowsMemoryAccess(LLVMContext &Context,
+ const DataLayout &DL, EVT VT,
+ unsigned AddrSpace, Align Alignment,
+ MachineMemOperand::Flags Flags,
+ unsigned *Fast) const {
+ if (Fast)
+ *Fast = isMemoryAccessFast(VT, Alignment);
+ if (!!(Flags & MachineMemOperand::MONonTemporal) && VT.isVector()) {
+ if (allowsMisalignedMemoryAccesses(VT, AddrSpace, Alignment, Flags,
+ /*Fast=*/nullptr))
+ return true;
+ // NonTemporal vector memory ops are special, and must be aligned.
+ if (!isBitAligned(Alignment, VT.getSizeInBits()))
+ return false;
+ switch (VT.getSizeInBits()) {
+ case 128:
+ if (!!(Flags & MachineMemOperand::MOLoad) && Subtarget.hasSSE41())
+ return true;
+ if (!!(Flags & MachineMemOperand::MOStore) && Subtarget.hasSSE2())
+ return true;
+ return false;
+ case 256:
+ if (!!(Flags & MachineMemOperand::MOLoad) && Subtarget.hasAVX2())
+ return true;
+ if (!!(Flags & MachineMemOperand::MOStore) && Subtarget.hasAVX())
+ return true;
+ return false;
+ case 512:
+ if (Subtarget.hasAVX512())
+ return true;
+ return false;
+ default:
+ return false; // Don't have NonTemporal vector memory ops of this size.
+ }
+ }
+ return true;
+}
+
/// Return the entry encoding for a jump table in the
/// current function. The returned value is a member of the
/// MachineJumpTableInfo::JTEntryKind enum.
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index c08227b5b383..c5c115047271 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -1003,12 +1003,31 @@ namespace llvm {
/// legal as the hook is used before type legalization.
bool isSafeMemOpType(MVT VT) const override;
+ bool isMemoryAccessFast(EVT VT, Align Alignment) const;
+
/// Returns true if the target allows unaligned memory accesses of the
/// specified type. Returns whether it is "fast" in the last argument.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment,
MachineMemOperand::Flags Flags,
unsigned *Fast) const override;
+ /// This function returns true if the memory access is aligned or if the
+ /// target allows this specific unaligned memory access. If the access is
+ /// allowed, the optional final parameter returns a relative speed of the
+ /// access (as defined by the target).
+ bool allowsMemoryAccess(
+ LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace,
+ Align Alignment,
+ MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
+ unsigned *Fast = nullptr) const override;
+
+ bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT,
+ const MachineMemOperand &MMO,
+ unsigned *Fast) const {
+ return allowsMemoryAccess(Context, DL, VT, MMO.getAddrSpace(),
+ MMO.getAlign(), MMO.getFlags(), Fast);
+ }
+
/// Provide custom lowering hooks for some operations.
///
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
diff --git a/llvm/test/CodeGen/X86/add-sub-bool.ll b/llvm/test/CodeGen/X86/add-sub-bool.ll
index 17eda5966019..c2bfcf57185e 100644
--- a/llvm/test/CodeGen/X86/add-sub-bool.ll
+++ b/llvm/test/CodeGen/X86/add-sub-bool.ll
@@ -113,17 +113,17 @@ define i128 @test_i128_add_add_idx(i128 %x, i128 %y, i128 %z) nounwind {
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: addl {{[0-9]+}}(%esp), %esi
; X86-NEXT: adcl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: adcl {{[0-9]+}}(%esp), %edx
; X86-NEXT: adcl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: adcl {{[0-9]+}}(%esp), %edx
; X86-NEXT: btl $5, {{[0-9]+}}(%esp)
; X86-NEXT: adcl $0, %esi
; X86-NEXT: adcl $0, %edi
-; X86-NEXT: adcl $0, %edx
; X86-NEXT: adcl $0, %ecx
+; X86-NEXT: adcl $0, %edx
; X86-NEXT: movl %edi, 4(%eax)
; X86-NEXT: movl %esi, (%eax)
-; X86-NEXT: movl %edx, 8(%eax)
-; X86-NEXT: movl %ecx, 12(%eax)
+; X86-NEXT: movl %ecx, 8(%eax)
+; X86-NEXT: movl %edx, 12(%eax)
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: retl $4
diff --git a/llvm/test/CodeGen/X86/bswap-wide-int.ll b/llvm/test/CodeGen/X86/bswap-wide-int.ll
index 1ba107a4e7d8..6d5e995a6d57 100644
--- a/llvm/test/CodeGen/X86/bswap-wide-int.ll
+++ b/llvm/test/CodeGen/X86/bswap-wide-int.ll
@@ -71,8 +71,8 @@ define i128 @bswap_i128(i128 %a0) nounwind {
; X86-MOVBE-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-MOVBE-NEXT: movbel %esi, 12(%eax)
; X86-MOVBE-NEXT: movbel %edi, 8(%eax)
-; X86-MOVBE-NEXT: movbel %edx, 4(%eax)
-; X86-MOVBE-NEXT: movbel %ecx, (%eax)
+; X86-MOVBE-NEXT: movbel %ecx, 4(%eax)
+; X86-MOVBE-NEXT: movbel %edx, (%eax)
; X86-MOVBE-NEXT: popl %esi
; X86-MOVBE-NEXT: popl %edi
; X86-MOVBE-NEXT: retl $4
diff --git a/llvm/test/CodeGen/X86/fshl.ll b/llvm/test/CodeGen/X86/fshl.ll
index bf561a19b687..36bf74f57315 100644
--- a/llvm/test/CodeGen/X86/fshl.ll
+++ b/llvm/test/CodeGen/X86/fshl.ll
@@ -275,12 +275,12 @@ define i128 @var_shift_i128(i128 %x, i128 %y, i128 %z) nounwind {
; X86-FAST-NEXT: testb $64, %cl
; X86-FAST-NEXT: jne .LBB6_1
; X86-FAST-NEXT: # %bb.2:
-; X86-FAST-NEXT: movl %edi, %eax
-; X86-FAST-NEXT: movl %esi, %edi
-; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-FAST-NEXT: movl %ebx, %ebp
; X86-FAST-NEXT: movl %edx, %ebx
; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-FAST-NEXT: movl %edi, %eax
+; X86-FAST-NEXT: movl %esi, %edi
+; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-FAST-NEXT: testb $32, %cl
; X86-FAST-NEXT: je .LBB6_5
; X86-FAST-NEXT: .LBB6_4:
@@ -324,72 +324,72 @@ define i128 @var_shift_i128(i128 %x, i128 %y, i128 %z) nounwind {
; X86-SLOW-NEXT: pushl %edi
; X86-SLOW-NEXT: pushl %esi
; X86-SLOW-NEXT: pushl %eax
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-SLOW-NEXT: testb $64, %al
; X86-SLOW-NEXT: jne .LBB6_1
; X86-SLOW-NEXT: # %bb.2:
-; X86-SLOW-NEXT: movl %ebp, %ecx
-; X86-SLOW-NEXT: movl %edi, %ebp
+; X86-SLOW-NEXT: movl %edx, %ebp
+; X86-SLOW-NEXT: movl %edi, %edx
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-SLOW-NEXT: movl %edx, %ebx
-; X86-SLOW-NEXT: movl %esi, %edx
+; X86-SLOW-NEXT: movl %ebx, %ecx
+; X86-SLOW-NEXT: movl %esi, %ebx
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-SLOW-NEXT: testb $32, %al
; X86-SLOW-NEXT: je .LBB6_5
; X86-SLOW-NEXT: .LBB6_4:
-; X86-SLOW-NEXT: movl %esi, (%esp) # 4-byte Spill
-; X86-SLOW-NEXT: movl %ebp, %esi
-; X86-SLOW-NEXT: movl %edx, %ebp
+; X86-SLOW-NEXT: movl %edi, (%esp) # 4-byte Spill
+; X86-SLOW-NEXT: movl %ebx, %edi
+; X86-SLOW-NEXT: movl %edx, %ebx
; X86-SLOW-NEXT: movl %ecx, %edx
; X86-SLOW-NEXT: jmp .LBB6_6
; X86-SLOW-NEXT: .LBB6_1:
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebp
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-SLOW-NEXT: testb $32, %al
; X86-SLOW-NEXT: jne .LBB6_4
; X86-SLOW-NEXT: .LBB6_5:
-; X86-SLOW-NEXT: movl %ecx, %ebx
-; X86-SLOW-NEXT: movl %edi, (%esp) # 4-byte Spill
+; X86-SLOW-NEXT: movl %ecx, %ebp
+; X86-SLOW-NEXT: movl %esi, (%esp) # 4-byte Spill
; X86-SLOW-NEXT: .LBB6_6:
-; X86-SLOW-NEXT: movl %edx, %edi
+; X86-SLOW-NEXT: movl %edx, %esi
; X86-SLOW-NEXT: movl %eax, %ecx
-; X86-SLOW-NEXT: shll %cl, %edi
-; X86-SLOW-NEXT: shrl %ebx
+; X86-SLOW-NEXT: shll %cl, %esi
+; X86-SLOW-NEXT: shrl %ebp
; X86-SLOW-NEXT: movb %al, %ch
; X86-SLOW-NEXT: notb %ch
; X86-SLOW-NEXT: movb %ch, %cl
-; X86-SLOW-NEXT: shrl %cl, %ebx
-; X86-SLOW-NEXT: orl %edi, %ebx
-; X86-SLOW-NEXT: movl %ebp, %edi
+; X86-SLOW-NEXT: shrl %cl, %ebp
+; X86-SLOW-NEXT: orl %esi, %ebp
+; X86-SLOW-NEXT: movl %ebx, %esi
; X86-SLOW-NEXT: movb %al, %cl
-; X86-SLOW-NEXT: shll %cl, %edi
+; X86-SLOW-NEXT: shll %cl, %esi
; X86-SLOW-NEXT: shrl %edx
; X86-SLOW-NEXT: movb %ch, %cl
; X86-SLOW-NEXT: shrl %cl, %edx
-; X86-SLOW-NEXT: orl %edi, %edx
-; X86-SLOW-NEXT: movl %esi, %edi
+; X86-SLOW-NEXT: orl %esi, %edx
+; X86-SLOW-NEXT: movl %edi, %esi
; X86-SLOW-NEXT: movb %al, %cl
-; X86-SLOW-NEXT: shll %cl, %edi
-; X86-SLOW-NEXT: shrl %ebp
+; X86-SLOW-NEXT: shll %cl, %esi
+; X86-SLOW-NEXT: shrl %ebx
; X86-SLOW-NEXT: movb %ch, %cl
-; X86-SLOW-NEXT: shrl %cl, %ebp
-; X86-SLOW-NEXT: orl %edi, %ebp
+; X86-SLOW-NEXT: shrl %cl, %ebx
+; X86-SLOW-NEXT: orl %esi, %ebx
; X86-SLOW-NEXT: movb %al, %cl
; X86-SLOW-NEXT: movl (%esp), %eax # 4-byte Reload
; X86-SLOW-NEXT: shll %cl, %eax
-; X86-SLOW-NEXT: shrl %esi
+; X86-SLOW-NEXT: shrl %edi
; X86-SLOW-NEXT: movb %ch, %cl
-; X86-SLOW-NEXT: shrl %cl, %esi
-; X86-SLOW-NEXT: orl %eax, %esi
+; X86-SLOW-NEXT: shrl %cl, %edi
+; X86-SLOW-NEXT: orl %eax, %edi
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SLOW-NEXT: movl %esi, 12(%eax)
-; X86-SLOW-NEXT: movl %ebp, 8(%eax)
+; X86-SLOW-NEXT: movl %edi, 12(%eax)
+; X86-SLOW-NEXT: movl %ebx, 8(%eax)
; X86-SLOW-NEXT: movl %edx, 4(%eax)
-; X86-SLOW-NEXT: movl %ebx, (%eax)
+; X86-SLOW-NEXT: movl %ebp, (%eax)
; X86-SLOW-NEXT: addl $4, %esp
; X86-SLOW-NEXT: popl %esi
; X86-SLOW-NEXT: popl %edi
diff --git a/llvm/test/CodeGen/X86/fshr.ll b/llvm/test/CodeGen/X86/fshr.ll
index eb1f0402c6f4..367a3dddb864 100644
--- a/llvm/test/CodeGen/X86/fshr.ll
+++ b/llvm/test/CodeGen/X86/fshr.ll
@@ -263,20 +263,20 @@ define i128 @var_shift_i128(i128 %x, i128 %y, i128 %z) nounwind {
; X86-FAST-NEXT: pushl %esi
; X86-FAST-NEXT: pushl %eax
; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-FAST-NEXT: testb $64, %cl
; X86-FAST-NEXT: je .LBB6_1
; X86-FAST-NEXT: # %bb.2:
+; X86-FAST-NEXT: movl %edx, (%esp) # 4-byte Spill
+; X86-FAST-NEXT: movl %esi, %edx
+; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-FAST-NEXT: movl %edi, %ebp
; X86-FAST-NEXT: movl %ebx, %edi
; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-FAST-NEXT: movl %esi, (%esp) # 4-byte Spill
-; X86-FAST-NEXT: movl %edx, %esi
-; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-FAST-NEXT: testb $32, %cl
; X86-FAST-NEXT: je .LBB6_4
; X86-FAST-NEXT: jmp .LBB6_5
@@ -287,20 +287,20 @@ define i128 @var_shift_i128(i128 %x, i128 %y, i128 %z) nounwind {
; X86-FAST-NEXT: testb $32, %cl
; X86-FAST-NEXT: jne .LBB6_5
; X86-FAST-NEXT: .LBB6_4:
-; X86-FAST-NEXT: movl %edx, %ebx
-; X86-FAST-NEXT: movl %edi, %edx
-; X86-FAST-NEXT: movl %esi, %edi
-; X86-FAST-NEXT: movl %ebp, %esi
+; X86-FAST-NEXT: movl %esi, %ebx
+; X86-FAST-NEXT: movl %edi, %esi
+; X86-FAST-NEXT: movl %edx, %edi
+; X86-FAST-NEXT: movl %ebp, %edx
; X86-FAST-NEXT: movl (%esp), %ebp # 4-byte Reload
; X86-FAST-NEXT: .LBB6_5:
-; X86-FAST-NEXT: shrdl %cl, %esi, %ebp
-; X86-FAST-NEXT: shrdl %cl, %edi, %esi
-; X86-FAST-NEXT: shrdl %cl, %edx, %edi
+; X86-FAST-NEXT: shrdl %cl, %edx, %ebp
+; X86-FAST-NEXT: shrdl %cl, %edi, %edx
+; X86-FAST-NEXT: shrdl %cl, %esi, %edi
; X86-FAST-NEXT: # kill: def $cl killed $cl killed $ecx
-; X86-FAST-NEXT: shrdl %cl, %ebx, %edx
-; X86-FAST-NEXT: movl %edx, 12(%eax)
+; X86-FAST-NEXT: shrdl %cl, %ebx, %esi
+; X86-FAST-NEXT: movl %esi, 12(%eax)
; X86-FAST-NEXT: movl %edi, 8(%eax)
-; X86-FAST-NEXT: movl %esi, 4(%eax)
+; X86-FAST-NEXT: movl %edx, 4(%eax)
; X86-FAST-NEXT: movl %ebp, (%eax)
; X86-FAST-NEXT: addl $4, %esp
; X86-FAST-NEXT: popl %esi
@@ -316,25 +316,25 @@ define i128 @var_shift_i128(i128 %x, i128 %y, i128 %z) nounwind {
; X86-SLOW-NEXT: pushl %edi
; X86-SLOW-NEXT: pushl %esi
; X86-SLOW-NEXT: subl $8, %esp
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-SLOW-NEXT: testb $64, %cl
; X86-SLOW-NEXT: je .LBB6_1
; X86-SLOW-NEXT: # %bb.2:
-; X86-SLOW-NEXT: movl %ebx, %edx
-; X86-SLOW-NEXT: movl %edi, %ebx
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-SLOW-NEXT: movl %ebp, %eax
-; X86-SLOW-NEXT: movl %esi, %ebp
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-SLOW-NEXT: movl %ebx, %ebp
+; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-SLOW-NEXT: movl %esi, %edx
+; X86-SLOW-NEXT: movl %edi, %esi
+; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-SLOW-NEXT: testb $32, %cl
; X86-SLOW-NEXT: jne .LBB6_5
; X86-SLOW-NEXT: .LBB6_4:
-; X86-SLOW-NEXT: movl %esi, %edi
-; X86-SLOW-NEXT: movl %ebx, (%esp) # 4-byte Spill
+; X86-SLOW-NEXT: movl %ebx, %edi
+; X86-SLOW-NEXT: movl %esi, (%esp) # 4-byte Spill
; X86-SLOW-NEXT: movl %ebp, %esi
; X86-SLOW-NEXT: movl %edx, %ebp
; X86-SLOW-NEXT: movl %eax, %edx
@@ -345,8 +345,7 @@ define i128 @var_shift_i128(i128 %x, i128 %y, i128 %z) nounwind {
; X86-SLOW-NEXT: testb $32, %cl
; X86-SLOW-NEXT: je .LBB6_4
; X86-SLOW-NEXT: .LBB6_5:
-; X86-SLOW-NEXT: movl %esi, (%esp) # 4-byte Spill
-; X86-SLOW-NEXT: movl %ebx, %esi
+; X86-SLOW-NEXT: movl %ebx, (%esp) # 4-byte Spill
; X86-SLOW-NEXT: .LBB6_6:
; X86-SLOW-NEXT: shrl %cl, %edx
; X86-SLOW-NEXT: movl %ecx, %ebx
diff --git a/llvm/test/CodeGen/X86/i128-add.ll b/llvm/test/CodeGen/X86/i128-add.ll
index b033fc155e70..2849e448a053 100644
--- a/llvm/test/CodeGen/X86/i128-add.ll
+++ b/llvm/test/CodeGen/X86/i128-add.ll
@@ -14,16 +14,16 @@ define i128 @add_i128(i128 %x, i128 %y) nounwind {
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: addl {{[0-9]+}}(%esp), %esi
; X86-NEXT: adcl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: adcl {{[0-9]+}}(%esp), %edx
; X86-NEXT: adcl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: adcl {{[0-9]+}}(%esp), %edx
; X86-NEXT: addl $1, %esi
; X86-NEXT: adcl $0, %edi
-; X86-NEXT: adcl $0, %edx
; X86-NEXT: adcl $0, %ecx
+; X86-NEXT: adcl $0, %edx
; X86-NEXT: movl %edi, 4(%eax)
; X86-NEXT: movl %esi, (%eax)
-; X86-NEXT: movl %edx, 8(%eax)
-; X86-NEXT: movl %ecx, 12(%eax)
+; X86-NEXT: movl %ecx, 8(%eax)
+; X86-NEXT: movl %edx, 12(%eax)
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: retl $4
@@ -55,16 +55,16 @@ define <1 x i128> @add_v1i128(<1 x i128> %x, <1 x i128> %y) nounwind {
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: addl {{[0-9]+}}(%esp), %esi
; X86-NEXT: adcl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: adcl {{[0-9]+}}(%esp), %edx
; X86-NEXT: adcl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: adcl {{[0-9]+}}(%esp), %edx
; X86-NEXT: addl $1, %esi
; X86-NEXT: adcl $0, %edi
-; X86-NEXT: adcl $0, %edx
; X86-NEXT: adcl $0, %ecx
+; X86-NEXT: adcl $0, %edx
; X86-NEXT: movl %edi, 4(%eax)
; X86-NEXT: movl %esi, (%eax)
-; X86-NEXT: movl %edx, 8(%eax)
-; X86-NEXT: movl %ecx, 12(%eax)
+; X86-NEXT: movl %ecx, 8(%eax)
+; X86-NEXT: movl %edx, 12(%eax)
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: retl $4
diff --git a/llvm/test/CodeGen/X86/icmp-shift-opt.ll b/llvm/test/CodeGen/X86/icmp-shift-opt.ll
index 38815d950a0e..7482de0e4ddf 100644
--- a/llvm/test/CodeGen/X86/icmp-shift-opt.ll
+++ b/llvm/test/CodeGen/X86/icmp-shift-opt.ll
@@ -75,12 +75,12 @@ define i1 @opt_setcc_srl_eq_zero(i128 %a) nounwind {
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: orl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: orl %ecx, %edx
; X86-NEXT: orl %eax, %edx
-; X86-NEXT: orl %ecx, %eax
-; X86-NEXT: shldl $15, %edx, %eax
+; X86-NEXT: orl %ecx, %edx
+; X86-NEXT: orl %eax, %ecx
+; X86-NEXT: shldl $15, %edx, %ecx
; X86-NEXT: sete %al
; X86-NEXT: retl
;
@@ -100,12 +100,12 @@ define i1 @opt_setcc_srl_ne_zero(i128 %a) nounwind {
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: orl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: orl %ecx, %edx
; X86-NEXT: orl %eax, %edx
-; X86-NEXT: orl %ecx, %eax
-; X86-NEXT: shldl $15, %edx, %eax
+; X86-NEXT: orl %ecx, %edx
+; X86-NEXT: orl %eax, %ecx
+; X86-NEXT: shldl $15, %edx, %ecx
; X86-NEXT: setne %al
; X86-NEXT: retl
;
@@ -173,22 +173,22 @@ define i1 @opt_setcc_shl_eq_zero_multiple_shl_users(i128 %a) nounwind {
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: shldl $17, %esi, %edx
-; X86-NEXT: shldl $17, %ecx, %esi
+; X86-NEXT: shldl $17, %edx, %esi
+; X86-NEXT: shldl $17, %ecx, %edx
; X86-NEXT: shldl $17, %eax, %ecx
; X86-NEXT: shll $17, %eax
; X86-NEXT: movl %ecx, %edi
-; X86-NEXT: orl %edx, %edi
+; X86-NEXT: orl %esi, %edi
; X86-NEXT: movl %eax, %ebx
-; X86-NEXT: orl %esi, %ebx
+; X86-NEXT: orl %edx, %ebx
; X86-NEXT: orl %edi, %ebx
; X86-NEXT: sete %bl
-; X86-NEXT: pushl %edx
; X86-NEXT: pushl %esi
+; X86-NEXT: pushl %edx
; X86-NEXT: pushl %ecx
; X86-NEXT: pushl %eax
; X86-NEXT: calll use at PLT
diff --git a/llvm/test/CodeGen/X86/legalize-shl-vec.ll b/llvm/test/CodeGen/X86/legalize-shl-vec.ll
index 845fc60687df..2c24db9afb54 100644
--- a/llvm/test/CodeGen/X86/legalize-shl-vec.ll
+++ b/llvm/test/CodeGen/X86/legalize-shl-vec.ll
@@ -6,8 +6,8 @@ define <2 x i256> @test_shl(<2 x i256> %In) {
; X32-LABEL: test_shl:
; X32: # %bb.0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
; X32-NEXT: shldl $2, %ecx, %edx
; X32-NEXT: movl %edx, 60(%eax)
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
@@ -48,13 +48,13 @@ define <2 x i256> @test_shl(<2 x i256> %In) {
; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx
; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdx
; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdi
-; X64-NEXT: shldq $2, %rcx, %rdx
-; X64-NEXT: shldq $2, %rdi, %rcx
+; X64-NEXT: shldq $2, %rdx, %rcx
+; X64-NEXT: shldq $2, %rdi, %rdx
; X64-NEXT: shldq $2, %r9, %rdi
; X64-NEXT: shlq $63, %rsi
; X64-NEXT: shlq $2, %r9
-; X64-NEXT: movq %rdx, 56(%rax)
-; X64-NEXT: movq %rcx, 48(%rax)
+; X64-NEXT: movq %rcx, 56(%rax)
+; X64-NEXT: movq %rdx, 48(%rax)
; X64-NEXT: movq %rdi, 40(%rax)
; X64-NEXT: movq %r9, 32(%rax)
; X64-NEXT: movq %rsi, 24(%rax)
@@ -84,36 +84,37 @@ define <2 x i256> @test_srl(<2 x i256> %In) {
; X32-NEXT: .cfi_offset %edi, -16
; X32-NEXT: .cfi_offset %ebx, -12
; X32-NEXT: .cfi_offset %ebp, -8
-; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X32-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X32-NEXT: movl %ebx, %ecx
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X32-NEXT: movl %ebp, %ebx
+; X32-NEXT: shldl $28, %edx, %ebx
+; X32-NEXT: shldl $28, %esi, %edx
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: shldl $28, %ecx, %esi
+; X32-NEXT: movl %esi, (%esp) # 4-byte Spill
; X32-NEXT: shldl $28, %edi, %ecx
-; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: shldl $28, %esi, %edi
-; X32-NEXT: shldl $28, %edx, %esi
-; X32-NEXT: shldl $28, %eax, %edx
-; X32-NEXT: shldl $28, %ebp, %eax
-; X32-NEXT: movl %eax, (%esp) # 4-byte Spill
+; X32-NEXT: shldl $28, %eax, %edi
+; X32-NEXT: movl %eax, %esi
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: shldl $28, %eax, %ebp
-; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT: shrdl $4, %eax, %ecx
-; X32-NEXT: shrl $4, %ebx
+; X32-NEXT: shldl $28, %eax, %esi
+; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X32-NEXT: shrdl $4, %eax, %edx
+; X32-NEXT: shrl $4, %ebp
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: movl %ebx, 60(%eax)
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebp, 60(%eax)
; X32-NEXT: movl %ebx, 56(%eax)
-; X32-NEXT: movl %edi, 52(%eax)
-; X32-NEXT: movl %esi, 48(%eax)
-; X32-NEXT: movl %edx, 44(%eax)
-; X32-NEXT: movl (%esp), %edx # 4-byte Reload
-; X32-NEXT: movl %edx, 40(%eax)
-; X32-NEXT: movl %ebp, 36(%eax)
-; X32-NEXT: movl %ecx, 32(%eax)
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, 52(%eax)
+; X32-NEXT: movl (%esp), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, 48(%eax)
+; X32-NEXT: movl %ecx, 44(%eax)
+; X32-NEXT: movl %edi, 40(%eax)
+; X32-NEXT: movl %esi, 36(%eax)
+; X32-NEXT: movl %edx, 32(%eax)
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: shrl $31, %ecx
; X32-NEXT: movl %ecx, (%eax)
@@ -143,12 +144,12 @@ define <2 x i256> @test_srl(<2 x i256> %In) {
; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdx
; X64-NEXT: movq {{[0-9]+}}(%rsp), %rsi
; X64-NEXT: shrdq $4, %rsi, %r9
-; X64-NEXT: shrdq $4, %rcx, %rsi
+; X64-NEXT: shrdq $4, %rdx, %rsi
+; X64-NEXT: shrdq $4, %rcx, %rdx
; X64-NEXT: shrq $63, %r8
-; X64-NEXT: shrdq $4, %rdx, %rcx
-; X64-NEXT: shrq $4, %rdx
-; X64-NEXT: movq %rdx, 56(%rdi)
-; X64-NEXT: movq %rcx, 48(%rdi)
+; X64-NEXT: shrq $4, %rcx
+; X64-NEXT: movq %rcx, 56(%rdi)
+; X64-NEXT: movq %rdx, 48(%rdi)
; X64-NEXT: movq %rsi, 40(%rdi)
; X64-NEXT: movq %r9, 32(%rdi)
; X64-NEXT: movq %r8, (%rdi)
@@ -178,36 +179,37 @@ define <2 x i256> @test_sra(<2 x i256> %In) {
; X32-NEXT: .cfi_offset %edi, -16
; X32-NEXT: .cfi_offset %ebx, -12
; X32-NEXT: .cfi_offset %ebp, -8
-; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X32-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X32-NEXT: movl %ebx, %ecx
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X32-NEXT: movl %ebp, %ebx
+; X32-NEXT: shldl $26, %edx, %ebx
+; X32-NEXT: shldl $26, %esi, %edx
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: shldl $26, %ecx, %esi
+; X32-NEXT: movl %esi, (%esp) # 4-byte Spill
; X32-NEXT: shldl $26, %edi, %ecx
-; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: shldl $26, %esi, %edi
-; X32-NEXT: shldl $26, %edx, %esi
-; X32-NEXT: shldl $26, %eax, %edx
-; X32-NEXT: shldl $26, %ebp, %eax
-; X32-NEXT: movl %eax, (%esp) # 4-byte Spill
+; X32-NEXT: shldl $26, %eax, %edi
+; X32-NEXT: movl %eax, %esi
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: shldl $26, %eax, %ebp
-; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT: shrdl $6, %eax, %ecx
-; X32-NEXT: sarl $6, %ebx
+; X32-NEXT: shldl $26, %eax, %esi
+; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X32-NEXT: shrdl $6, %eax, %edx
+; X32-NEXT: sarl $6, %ebp
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: movl %ebx, 60(%eax)
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebp, 60(%eax)
; X32-NEXT: movl %ebx, 56(%eax)
-; X32-NEXT: movl %edi, 52(%eax)
-; X32-NEXT: movl %esi, 48(%eax)
-; X32-NEXT: movl %edx, 44(%eax)
-; X32-NEXT: movl (%esp), %edx # 4-byte Reload
-; X32-NEXT: movl %edx, 40(%eax)
-; X32-NEXT: movl %ebp, 36(%eax)
-; X32-NEXT: movl %ecx, 32(%eax)
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, 52(%eax)
+; X32-NEXT: movl (%esp), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, 48(%eax)
+; X32-NEXT: movl %ecx, 44(%eax)
+; X32-NEXT: movl %edi, 40(%eax)
+; X32-NEXT: movl %esi, 36(%eax)
+; X32-NEXT: movl %edx, 32(%eax)
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: sarl $31, %ecx
; X32-NEXT: movl %ecx, 28(%eax)
@@ -237,12 +239,12 @@ define <2 x i256> @test_sra(<2 x i256> %In) {
; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdx
; X64-NEXT: movq {{[0-9]+}}(%rsp), %rsi
; X64-NEXT: shrdq $6, %rsi, %r9
-; X64-NEXT: shrdq $6, %rcx, %rsi
+; X64-NEXT: shrdq $6, %rdx, %rsi
+; X64-NEXT: shrdq $6, %rcx, %rdx
; X64-NEXT: sarq $63, %r8
-; X64-NEXT: shrdq $6, %rdx, %rcx
-; X64-NEXT: sarq $6, %rdx
-; X64-NEXT: movq %rdx, 56(%rdi)
-; X64-NEXT: movq %rcx, 48(%rdi)
+; X64-NEXT: sarq $6, %rcx
+; X64-NEXT: movq %rcx, 56(%rdi)
+; X64-NEXT: movq %rdx, 48(%rdi)
; X64-NEXT: movq %rsi, 40(%rdi)
; X64-NEXT: movq %r9, 32(%rdi)
; X64-NEXT: movq %r8, 24(%rdi)
diff --git a/llvm/test/CodeGen/X86/merge-consecutive-stores-nt.ll b/llvm/test/CodeGen/X86/merge-consecutive-stores-nt.ll
index 93052e400a03..c08056982bd8 100644
--- a/llvm/test/CodeGen/X86/merge-consecutive-stores-nt.ll
+++ b/llvm/test/CodeGen/X86/merge-consecutive-stores-nt.ll
@@ -48,10 +48,10 @@ define void @merge_2_v4f32_align32(ptr %a0, ptr %a1) nounwind {
;
; X64-AVX1-LABEL: merge_2_v4f32_align32:
; X64-AVX1: # %bb.0:
-; X64-AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0
-; X64-AVX1-NEXT: vmovntdqa (%rdi), %xmm1
-; X64-AVX1-NEXT: vmovntdq %xmm1, (%rsi)
-; X64-AVX1-NEXT: vmovntdq %xmm0, 16(%rsi)
+; X64-AVX1-NEXT: vmovntdqa (%rdi), %xmm0
+; X64-AVX1-NEXT: vmovntdqa 16(%rdi), %xmm1
+; X64-AVX1-NEXT: vmovntdq %xmm0, (%rsi)
+; X64-AVX1-NEXT: vmovntdq %xmm1, 16(%rsi)
; X64-AVX1-NEXT: retq
;
; X64-AVX2-LABEL: merge_2_v4f32_align32:
diff --git a/llvm/test/CodeGen/X86/setcc-wide-types.ll b/llvm/test/CodeGen/X86/setcc-wide-types.ll
index 25c071940c52..44a6fada566c 100644
--- a/llvm/test/CodeGen/X86/setcc-wide-types.ll
+++ b/llvm/test/CodeGen/X86/setcc-wide-types.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse2 | FileCheck %s --check-prefix=ANY --check-prefix=NO512 --check-prefix=SSE2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse4.1 | FileCheck %s --check-prefix=ANY --check-prefix=NO512 --check-prefix=SSE41
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse2 | FileCheck %s --check-prefix=ANY --check-prefix=NO512 --check-prefix=SSE --check-prefix=SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse4.1 | FileCheck %s --check-prefix=ANY --check-prefix=NO512 --check-prefix=SSE --check-prefix=SSE41
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s --check-prefix=ANY --check-prefix=NO512 --check-prefix=AVXANY --check-prefix=AVX1
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2 | FileCheck %s --check-prefix=ANY --check-prefix=NO512 --check-prefix=AVXANY --check-prefix=AVX2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f | FileCheck %s --check-prefix=ANY --check-prefix=AVXANY --check-prefix=AVX512 --check-prefix=AVX512F
@@ -734,63 +734,34 @@ define i32 @eq_i128_pair(ptr %a, ptr %b) {
; if we allowed 2 pairs of 32-byte loads per block.
define i32 @ne_i256_pair(ptr %a, ptr %b) {
-; SSE2-LABEL: ne_i256_pair:
-; SSE2: # %bb.0:
-; SSE2-NEXT: movq 16(%rdi), %rax
-; SSE2-NEXT: movq 24(%rdi), %rcx
-; SSE2-NEXT: movq (%rdi), %rdx
-; SSE2-NEXT: movq 8(%rdi), %r8
-; SSE2-NEXT: xorq 8(%rsi), %r8
-; SSE2-NEXT: xorq 24(%rsi), %rcx
-; SSE2-NEXT: xorq (%rsi), %rdx
-; SSE2-NEXT: xorq 16(%rsi), %rax
-; SSE2-NEXT: movq 48(%rdi), %r9
-; SSE2-NEXT: movq 32(%rdi), %r10
-; SSE2-NEXT: movq 56(%rdi), %r11
-; SSE2-NEXT: movq 40(%rdi), %rdi
-; SSE2-NEXT: xorq 40(%rsi), %rdi
-; SSE2-NEXT: orq %r8, %rdi
-; SSE2-NEXT: xorq 56(%rsi), %r11
-; SSE2-NEXT: orq %rcx, %r11
-; SSE2-NEXT: orq %rdi, %r11
-; SSE2-NEXT: xorq 32(%rsi), %r10
-; SSE2-NEXT: orq %rdx, %r10
-; SSE2-NEXT: xorq 48(%rsi), %r9
-; SSE2-NEXT: orq %rax, %r9
-; SSE2-NEXT: orq %r10, %r9
-; SSE2-NEXT: xorl %eax, %eax
-; SSE2-NEXT: orq %r11, %r9
-; SSE2-NEXT: setne %al
-; SSE2-NEXT: retq
-;
-; SSE41-LABEL: ne_i256_pair:
-; SSE41: # %bb.0:
-; SSE41-NEXT: movq 16(%rdi), %rax
-; SSE41-NEXT: movq 24(%rdi), %rcx
-; SSE41-NEXT: movq (%rdi), %rdx
-; SSE41-NEXT: movq 8(%rdi), %r8
-; SSE41-NEXT: xorq 8(%rsi), %r8
-; SSE41-NEXT: xorq 24(%rsi), %rcx
-; SSE41-NEXT: xorq (%rsi), %rdx
-; SSE41-NEXT: xorq 16(%rsi), %rax
-; SSE41-NEXT: movq 48(%rdi), %r9
-; SSE41-NEXT: movq 32(%rdi), %r10
-; SSE41-NEXT: movq 56(%rdi), %r11
-; SSE41-NEXT: movq 40(%rdi), %rdi
-; SSE41-NEXT: xorq 40(%rsi), %rdi
-; SSE41-NEXT: orq %r8, %rdi
-; SSE41-NEXT: xorq 56(%rsi), %r11
-; SSE41-NEXT: orq %rcx, %r11
-; SSE41-NEXT: orq %rdi, %r11
-; SSE41-NEXT: xorq 32(%rsi), %r10
-; SSE41-NEXT: orq %rdx, %r10
-; SSE41-NEXT: xorq 48(%rsi), %r9
-; SSE41-NEXT: orq %rax, %r9
-; SSE41-NEXT: orq %r10, %r9
-; SSE41-NEXT: xorl %eax, %eax
-; SSE41-NEXT: orq %r11, %r9
-; SSE41-NEXT: setne %al
-; SSE41-NEXT: retq
+; SSE-LABEL: ne_i256_pair:
+; SSE: # %bb.0:
+; SSE-NEXT: movq 16(%rdi), %rax
+; SSE-NEXT: movq 24(%rdi), %rcx
+; SSE-NEXT: movq (%rdi), %rdx
+; SSE-NEXT: movq 8(%rdi), %r8
+; SSE-NEXT: xorq 8(%rsi), %r8
+; SSE-NEXT: xorq 24(%rsi), %rcx
+; SSE-NEXT: xorq (%rsi), %rdx
+; SSE-NEXT: xorq 16(%rsi), %rax
+; SSE-NEXT: movq 48(%rdi), %r9
+; SSE-NEXT: movq 32(%rdi), %r10
+; SSE-NEXT: movq 56(%rdi), %r11
+; SSE-NEXT: movq 40(%rdi), %rdi
+; SSE-NEXT: xorq 40(%rsi), %rdi
+; SSE-NEXT: orq %r8, %rdi
+; SSE-NEXT: xorq 56(%rsi), %r11
+; SSE-NEXT: orq %rcx, %r11
+; SSE-NEXT: orq %rdi, %r11
+; SSE-NEXT: xorq 32(%rsi), %r10
+; SSE-NEXT: orq %rdx, %r10
+; SSE-NEXT: xorq 48(%rsi), %r9
+; SSE-NEXT: orq %rax, %r9
+; SSE-NEXT: orq %r10, %r9
+; SSE-NEXT: xorl %eax, %eax
+; SSE-NEXT: orq %r11, %r9
+; SSE-NEXT: setne %al
+; SSE-NEXT: retq
;
; AVX1-LABEL: ne_i256_pair:
; AVX1: # %bb.0:
@@ -848,63 +819,34 @@ define i32 @ne_i256_pair(ptr %a, ptr %b) {
; if we allowed 2 pairs of 32-byte loads per block.
define i32 @eq_i256_pair(ptr %a, ptr %b) {
-; SSE2-LABEL: eq_i256_pair:
-; SSE2: # %bb.0:
-; SSE2-NEXT: movq 16(%rdi), %rax
-; SSE2-NEXT: movq 24(%rdi), %rcx
-; SSE2-NEXT: movq (%rdi), %rdx
-; SSE2-NEXT: movq 8(%rdi), %r8
-; SSE2-NEXT: xorq 8(%rsi), %r8
-; SSE2-NEXT: xorq 24(%rsi), %rcx
-; SSE2-NEXT: xorq (%rsi), %rdx
-; SSE2-NEXT: xorq 16(%rsi), %rax
-; SSE2-NEXT: movq 48(%rdi), %r9
-; SSE2-NEXT: movq 32(%rdi), %r10
-; SSE2-NEXT: movq 56(%rdi), %r11
-; SSE2-NEXT: movq 40(%rdi), %rdi
-; SSE2-NEXT: xorq 40(%rsi), %rdi
-; SSE2-NEXT: orq %r8, %rdi
-; SSE2-NEXT: xorq 56(%rsi), %r11
-; SSE2-NEXT: orq %rcx, %r11
-; SSE2-NEXT: orq %rdi, %r11
-; SSE2-NEXT: xorq 32(%rsi), %r10
-; SSE2-NEXT: orq %rdx, %r10
-; SSE2-NEXT: xorq 48(%rsi), %r9
-; SSE2-NEXT: orq %rax, %r9
-; SSE2-NEXT: orq %r10, %r9
-; SSE2-NEXT: xorl %eax, %eax
-; SSE2-NEXT: orq %r11, %r9
-; SSE2-NEXT: sete %al
-; SSE2-NEXT: retq
-;
-; SSE41-LABEL: eq_i256_pair:
-; SSE41: # %bb.0:
-; SSE41-NEXT: movq 16(%rdi), %rax
-; SSE41-NEXT: movq 24(%rdi), %rcx
-; SSE41-NEXT: movq (%rdi), %rdx
-; SSE41-NEXT: movq 8(%rdi), %r8
-; SSE41-NEXT: xorq 8(%rsi), %r8
-; SSE41-NEXT: xorq 24(%rsi), %rcx
-; SSE41-NEXT: xorq (%rsi), %rdx
-; SSE41-NEXT: xorq 16(%rsi), %rax
-; SSE41-NEXT: movq 48(%rdi), %r9
-; SSE41-NEXT: movq 32(%rdi), %r10
-; SSE41-NEXT: movq 56(%rdi), %r11
-; SSE41-NEXT: movq 40(%rdi), %rdi
-; SSE41-NEXT: xorq 40(%rsi), %rdi
-; SSE41-NEXT: orq %r8, %rdi
-; SSE41-NEXT: xorq 56(%rsi), %r11
-; SSE41-NEXT: orq %rcx, %r11
-; SSE41-NEXT: orq %rdi, %r11
-; SSE41-NEXT: xorq 32(%rsi), %r10
-; SSE41-NEXT: orq %rdx, %r10
-; SSE41-NEXT: xorq 48(%rsi), %r9
-; SSE41-NEXT: orq %rax, %r9
-; SSE41-NEXT: orq %r10, %r9
-; SSE41-NEXT: xorl %eax, %eax
-; SSE41-NEXT: orq %r11, %r9
-; SSE41-NEXT: sete %al
-; SSE41-NEXT: retq
+; SSE-LABEL: eq_i256_pair:
+; SSE: # %bb.0:
+; SSE-NEXT: movq 16(%rdi), %rax
+; SSE-NEXT: movq 24(%rdi), %rcx
+; SSE-NEXT: movq (%rdi), %rdx
+; SSE-NEXT: movq 8(%rdi), %r8
+; SSE-NEXT: xorq 8(%rsi), %r8
+; SSE-NEXT: xorq 24(%rsi), %rcx
+; SSE-NEXT: xorq (%rsi), %rdx
+; SSE-NEXT: xorq 16(%rsi), %rax
+; SSE-NEXT: movq 48(%rdi), %r9
+; SSE-NEXT: movq 32(%rdi), %r10
+; SSE-NEXT: movq 56(%rdi), %r11
+; SSE-NEXT: movq 40(%rdi), %rdi
+; SSE-NEXT: xorq 40(%rsi), %rdi
+; SSE-NEXT: orq %r8, %rdi
+; SSE-NEXT: xorq 56(%rsi), %r11
+; SSE-NEXT: orq %rcx, %r11
+; SSE-NEXT: orq %rdi, %r11
+; SSE-NEXT: xorq 32(%rsi), %r10
+; SSE-NEXT: orq %rdx, %r10
+; SSE-NEXT: xorq 48(%rsi), %r9
+; SSE-NEXT: orq %rax, %r9
+; SSE-NEXT: orq %r10, %r9
+; SSE-NEXT: xorl %eax, %eax
+; SSE-NEXT: orq %r11, %r9
+; SSE-NEXT: sete %al
+; SSE-NEXT: retq
;
; AVX1-LABEL: eq_i256_pair:
; AVX1: # %bb.0:
@@ -1238,35 +1180,65 @@ define i1 @eq_i256_op(i256 %a, i256 %b) {
}
define i1 @eq_i512_op(i512 %a, i512 %b) {
-; ANY-LABEL: eq_i512_op:
-; ANY: # %bb.0:
-; ANY-NEXT: movq {{[0-9]+}}(%rsp), %r10
-; ANY-NEXT: movq {{[0-9]+}}(%rsp), %rax
-; ANY-NEXT: addq $1, %rdi
-; ANY-NEXT: adcq $0, %rsi
-; ANY-NEXT: adcq $0, %rdx
-; ANY-NEXT: adcq $0, %rcx
-; ANY-NEXT: adcq $0, %r8
-; ANY-NEXT: adcq $0, %r9
-; ANY-NEXT: adcq $0, %r10
-; ANY-NEXT: adcq $0, %rax
-; ANY-NEXT: xorq {{[0-9]+}}(%rsp), %rsi
-; ANY-NEXT: xorq {{[0-9]+}}(%rsp), %r9
-; ANY-NEXT: orq %rsi, %r9
-; ANY-NEXT: xorq {{[0-9]+}}(%rsp), %rcx
-; ANY-NEXT: xorq {{[0-9]+}}(%rsp), %rax
-; ANY-NEXT: orq %rcx, %rax
-; ANY-NEXT: orq %r9, %rax
-; ANY-NEXT: xorq {{[0-9]+}}(%rsp), %rdx
-; ANY-NEXT: xorq {{[0-9]+}}(%rsp), %r10
-; ANY-NEXT: orq %rdx, %r10
-; ANY-NEXT: xorq {{[0-9]+}}(%rsp), %r8
-; ANY-NEXT: xorq {{[0-9]+}}(%rsp), %rdi
-; ANY-NEXT: orq %r8, %rdi
-; ANY-NEXT: orq %r10, %rdi
-; ANY-NEXT: orq %rax, %rdi
-; ANY-NEXT: sete %al
-; ANY-NEXT: retq
+; SSE-LABEL: eq_i512_op:
+; SSE: # %bb.0:
+; SSE-NEXT: movq {{[0-9]+}}(%rsp), %rax
+; SSE-NEXT: movq {{[0-9]+}}(%rsp), %r10
+; SSE-NEXT: addq $1, %rdi
+; SSE-NEXT: adcq $0, %rsi
+; SSE-NEXT: adcq $0, %rdx
+; SSE-NEXT: adcq $0, %rcx
+; SSE-NEXT: adcq $0, %r8
+; SSE-NEXT: adcq $0, %r9
+; SSE-NEXT: adcq $0, %r10
+; SSE-NEXT: adcq $0, %rax
+; SSE-NEXT: xorq {{[0-9]+}}(%rsp), %rsi
+; SSE-NEXT: xorq {{[0-9]+}}(%rsp), %r9
+; SSE-NEXT: orq %rsi, %r9
+; SSE-NEXT: xorq {{[0-9]+}}(%rsp), %rcx
+; SSE-NEXT: xorq {{[0-9]+}}(%rsp), %rax
+; SSE-NEXT: orq %rcx, %rax
+; SSE-NEXT: orq %r9, %rax
+; SSE-NEXT: xorq {{[0-9]+}}(%rsp), %rdx
+; SSE-NEXT: xorq {{[0-9]+}}(%rsp), %r10
+; SSE-NEXT: orq %rdx, %r10
+; SSE-NEXT: xorq {{[0-9]+}}(%rsp), %r8
+; SSE-NEXT: xorq {{[0-9]+}}(%rsp), %rdi
+; SSE-NEXT: orq %r8, %rdi
+; SSE-NEXT: orq %r10, %rdi
+; SSE-NEXT: orq %rax, %rdi
+; SSE-NEXT: sete %al
+; SSE-NEXT: retq
+;
+; AVXANY-LABEL: eq_i512_op:
+; AVXANY: # %bb.0:
+; AVXANY-NEXT: movq {{[0-9]+}}(%rsp), %r10
+; AVXANY-NEXT: movq {{[0-9]+}}(%rsp), %rax
+; AVXANY-NEXT: addq $1, %rdi
+; AVXANY-NEXT: adcq $0, %rsi
+; AVXANY-NEXT: adcq $0, %rdx
+; AVXANY-NEXT: adcq $0, %rcx
+; AVXANY-NEXT: adcq $0, %r8
+; AVXANY-NEXT: adcq $0, %r9
+; AVXANY-NEXT: adcq $0, %r10
+; AVXANY-NEXT: adcq $0, %rax
+; AVXANY-NEXT: xorq {{[0-9]+}}(%rsp), %rsi
+; AVXANY-NEXT: xorq {{[0-9]+}}(%rsp), %r9
+; AVXANY-NEXT: orq %rsi, %r9
+; AVXANY-NEXT: xorq {{[0-9]+}}(%rsp), %rcx
+; AVXANY-NEXT: xorq {{[0-9]+}}(%rsp), %rax
+; AVXANY-NEXT: orq %rcx, %rax
+; AVXANY-NEXT: orq %r9, %rax
+; AVXANY-NEXT: xorq {{[0-9]+}}(%rsp), %rdx
+; AVXANY-NEXT: xorq {{[0-9]+}}(%rsp), %r10
+; AVXANY-NEXT: orq %rdx, %r10
+; AVXANY-NEXT: xorq {{[0-9]+}}(%rsp), %r8
+; AVXANY-NEXT: xorq {{[0-9]+}}(%rsp), %rdi
+; AVXANY-NEXT: orq %r8, %rdi
+; AVXANY-NEXT: orq %r10, %rdi
+; AVXANY-NEXT: orq %rax, %rdi
+; AVXANY-NEXT: sete %al
+; AVXANY-NEXT: retq
%a2 = add i512 %a, 1
%r = icmp eq i512 %a2, %b
ret i1 %r
diff --git a/llvm/test/CodeGen/X86/smin.ll b/llvm/test/CodeGen/X86/smin.ll
index e7b318c48ca7..f353853befba 100644
--- a/llvm/test/CodeGen/X86/smin.ll
+++ b/llvm/test/CodeGen/X86/smin.ll
@@ -158,24 +158,24 @@ define i128 @test_i128(i128 %a, i128 %b) nounwind {
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: subl $8, %esp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: cmpl %ecx, %edi
-; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: cmpl %edx, %edi
+; X86-NEXT: movl %edx, %eax
; X86-NEXT: cmovbl %edi, %eax
; X86-NEXT: cmpl %esi, %ebp
-; X86-NEXT: movl %ecx, %ebx
+; X86-NEXT: movl %edx, %ebx
; X86-NEXT: cmovbl %edi, %ebx
; X86-NEXT: cmovel %eax, %ebx
; X86-NEXT: movl %esi, %eax
; X86-NEXT: cmovbl %ebp, %eax
; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: cmpl %edx, %edi
-; X86-NEXT: movl %edx, %eax
+; X86-NEXT: cmpl %ecx, %edi
+; X86-NEXT: movl %ecx, %eax
; X86-NEXT: cmovbl %edi, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
@@ -183,24 +183,24 @@ define i128 @test_i128(i128 %a, i128 %b) nounwind {
; X86-NEXT: movl %eax, %ebp
; X86-NEXT: sbbl %edi, %ebp
; X86-NEXT: cmovll {{[0-9]+}}(%esp), %esi
-; X86-NEXT: cmovll {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: cmovll {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl %eax, %ebp
; X86-NEXT: xorl %edi, %ebp
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: xorl %edx, %eax
+; X86-NEXT: xorl %ecx, %eax
; X86-NEXT: orl %ebp, %eax
-; X86-NEXT: cmovel %ebx, %ecx
+; X86-NEXT: cmovel %ebx, %edx
; X86-NEXT: cmovel (%esp), %esi # 4-byte Folded Reload
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: cmpl %edi, %eax
-; X86-NEXT: cmovll {{[0-9]+}}(%esp), %edx
-; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT: cmovll {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X86-NEXT: cmovll %eax, %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl %edi, 12(%eax)
-; X86-NEXT: movl %edx, 8(%eax)
+; X86-NEXT: movl %ecx, 8(%eax)
; X86-NEXT: movl %esi, 4(%eax)
-; X86-NEXT: movl %ecx, (%eax)
+; X86-NEXT: movl %edx, (%eax)
; X86-NEXT: addl $8, %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
diff --git a/llvm/test/CodeGen/X86/smul-with-overflow.ll b/llvm/test/CodeGen/X86/smul-with-overflow.ll
index fb7bc96d5bf0..fbdb6e703fef 100644
--- a/llvm/test/CodeGen/X86/smul-with-overflow.ll
+++ b/llvm/test/CodeGen/X86/smul-with-overflow.ll
@@ -441,8 +441,8 @@ define { i129, i1 } @smul_ovf(i129 %x, i129 %y) nounwind {
; X86-NEXT: movl %edx, %ecx
; X86-NEXT: movl %eax, %ebp
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl %ebx, %eax
-; X86-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: mull %ebx
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl %eax, %ebx
; X86-NEXT: movl %eax, %edi
diff --git a/llvm/test/CodeGen/X86/smulo-128-legalisation-lowering.ll b/llvm/test/CodeGen/X86/smulo-128-legalisation-lowering.ll
index fbbc857a38b4..a3d94f7b70a4 100644
--- a/llvm/test/CodeGen/X86/smulo-128-legalisation-lowering.ll
+++ b/llvm/test/CodeGen/X86/smulo-128-legalisation-lowering.ll
@@ -89,8 +89,8 @@ define zeroext i1 @smuloi128(i128 %v1, i128 %v2, ptr %res) {
; X86-NEXT: .cfi_offset %edi, -16
; X86-NEXT: .cfi_offset %ebx, -12
; X86-NEXT: .cfi_offset %ebp, -8
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-NEXT: movl %edi, %eax
; X86-NEXT: mull %ebx
@@ -251,10 +251,10 @@ define zeroext i1 @smuloi128(i128 %v1, i128 %v2, ptr %res) {
; X86-NEXT: addl %eax, %esi
; X86-NEXT: adcl %edx, %ecx
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: sarl $31, %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: sarl $31, %eax
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NEXT: mull %ecx
; X86-NEXT: movl %edx, %edi
; X86-NEXT: movl %eax, %ebx
; X86-NEXT: movl %eax, %ebp
@@ -585,8 +585,8 @@ define zeroext i1 @smuloi256(i256 %v1, i256 %v2, ptr %res) {
; X86-NEXT: .cfi_offset %edi, -16
; X86-NEXT: .cfi_offset %ebx, -12
; X86-NEXT: .cfi_offset %ebp, -8
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-NEXT: movl %ebp, %eax
; X86-NEXT: mull %ebx
@@ -1295,8 +1295,8 @@ define zeroext i1 @smuloi256(i256 %v1, i256 %v2, ptr %res) {
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
; X86-NEXT: addl %edx, %ecx
; X86-NEXT: adcl $0, %esi
-; X86-NEXT: movl %edi, %eax
-; X86-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: mull %edi
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
; X86-NEXT: addl %eax, %ecx
@@ -1315,9 +1315,9 @@ define zeroext i1 @smuloi256(i256 %v1, i256 %v2, ptr %res) {
; X86-NEXT: adcl $0, %esi
; X86-NEXT: adcl $0, %ebx
; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: movl %edi, %eax
; X86-NEXT: movl %edi, %ecx
; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl %edi, %eax
; X86-NEXT: mull {{[0-9]+}}(%esp)
; X86-NEXT: movl %edx, %ebp
; X86-NEXT: movl %eax, %ebx
@@ -1379,9 +1379,9 @@ define zeroext i1 @smuloi256(i256 %v1, i256 %v2, ptr %res) {
; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
; X86-NEXT: adcl %ebp, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Reload
-; X86-NEXT: mull %ebp
+; X86-NEXT: movl %ebp, %eax
+; X86-NEXT: mull {{[0-9]+}}(%esp)
; X86-NEXT: movl %edx, %esi
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
diff --git a/llvm/test/CodeGen/X86/umin.ll b/llvm/test/CodeGen/X86/umin.ll
index e37950e5399c..0a747b88f41b 100644
--- a/llvm/test/CodeGen/X86/umin.ll
+++ b/llvm/test/CodeGen/X86/umin.ll
@@ -154,24 +154,24 @@ define i128 @test_i128(i128 %a, i128 %b) nounwind {
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: subl $8, %esp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: cmpl %ecx, %edi
-; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: cmpl %edx, %edi
+; X86-NEXT: movl %edx, %eax
; X86-NEXT: cmovbl %edi, %eax
; X86-NEXT: cmpl %esi, %ebp
-; X86-NEXT: movl %ecx, %ebx
+; X86-NEXT: movl %edx, %ebx
; X86-NEXT: cmovbl %edi, %ebx
; X86-NEXT: cmovel %eax, %ebx
; X86-NEXT: movl %esi, %eax
; X86-NEXT: cmovbl %ebp, %eax
; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: cmpl %edx, %edi
-; X86-NEXT: movl %edx, %eax
+; X86-NEXT: cmpl %ecx, %edi
+; X86-NEXT: movl %ecx, %eax
; X86-NEXT: cmovbl %edi, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
@@ -179,24 +179,24 @@ define i128 @test_i128(i128 %a, i128 %b) nounwind {
; X86-NEXT: movl %eax, %ebp
; X86-NEXT: sbbl %edi, %ebp
; X86-NEXT: cmovbl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: cmovbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: cmovbl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl %eax, %ebp
; X86-NEXT: xorl %edi, %ebp
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: xorl %edx, %eax
+; X86-NEXT: xorl %ecx, %eax
; X86-NEXT: orl %ebp, %eax
-; X86-NEXT: cmovel %ebx, %ecx
+; X86-NEXT: cmovel %ebx, %edx
; X86-NEXT: cmovel (%esp), %esi # 4-byte Folded Reload
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: cmpl %edi, %eax
-; X86-NEXT: cmovbl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT: cmovbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X86-NEXT: cmovbl %eax, %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl %edi, 12(%eax)
-; X86-NEXT: movl %edx, 8(%eax)
+; X86-NEXT: movl %ecx, 8(%eax)
; X86-NEXT: movl %esi, 4(%eax)
-; X86-NEXT: movl %ecx, (%eax)
+; X86-NEXT: movl %edx, (%eax)
; X86-NEXT: addl $8, %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
diff --git a/llvm/test/CodeGen/X86/umul-with-overflow.ll b/llvm/test/CodeGen/X86/umul-with-overflow.ll
index ccd27ddcae58..f5248d867971 100644
--- a/llvm/test/CodeGen/X86/umul-with-overflow.ll
+++ b/llvm/test/CodeGen/X86/umul-with-overflow.ll
@@ -87,8 +87,8 @@ define i300 @test4(i300 %a, i300 %b) nounwind {
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: andl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl %ebx, %eax
; X86-NEXT: mull %edi
@@ -532,8 +532,8 @@ define i300 @test4(i300 %a, i300 %b) nounwind {
; X64-NEXT: movq %r8, %r11
; X64-NEXT: movq %rcx, %r8
; X64-NEXT: movq %rdx, %rcx
-; X64-NEXT: movq {{[0-9]+}}(%rsp), %r10
; X64-NEXT: movq {{[0-9]+}}(%rsp), %r9
+; X64-NEXT: movq {{[0-9]+}}(%rsp), %r10
; X64-NEXT: movq %rsi, %rax
; X64-NEXT: mulq %r10
; X64-NEXT: movq %rdx, %rbx
diff --git a/llvm/test/CodeGen/X86/wide-integer-cmp.ll b/llvm/test/CodeGen/X86/wide-integer-cmp.ll
index a15d633d8538..189f5167c541 100644
--- a/llvm/test/CodeGen/X86/wide-integer-cmp.ll
+++ b/llvm/test/CodeGen/X86/wide-integer-cmp.ll
@@ -99,8 +99,8 @@ define i32 @test_wide(i128 %a, i128 %b) {
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi
; CHECK-NEXT: cmpl {{[0-9]+}}(%esp), %edx
; CHECK-NEXT: sbbl {{[0-9]+}}(%esp), %esi
-; CHECK-NEXT: sbbl {{[0-9]+}}(%esp), %ecx
; CHECK-NEXT: sbbl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: sbbl {{[0-9]+}}(%esp), %ecx
; CHECK-NEXT: jge .LBB4_2
; CHECK-NEXT: # %bb.1: # %bb1
; CHECK-NEXT: movl $1, %eax
diff --git a/llvm/test/CodeGen/X86/xaluo128.ll b/llvm/test/CodeGen/X86/xaluo128.ll
index 977df0f16bb2..740a2ddec7ab 100644
--- a/llvm/test/CodeGen/X86/xaluo128.ll
+++ b/llvm/test/CodeGen/X86/xaluo128.ll
@@ -24,13 +24,13 @@ define zeroext i1 @saddoi128(i128 %v1, i128 %v2, ptr %res) nounwind {
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-NEXT: addl {{[0-9]+}}(%esp), %edi
; X86-NEXT: adcl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: adcl {{[0-9]+}}(%esp), %esi
; X86-NEXT: adcl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: adcl {{[0-9]+}}(%esp), %esi
; X86-NEXT: seto %al
; X86-NEXT: movl %edi, (%ecx)
; X86-NEXT: movl %ebx, 4(%ecx)
-; X86-NEXT: movl %esi, 8(%ecx)
-; X86-NEXT: movl %edx, 12(%ecx)
+; X86-NEXT: movl %edx, 8(%ecx)
+; X86-NEXT: movl %esi, 12(%ecx)
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
@@ -64,13 +64,13 @@ define zeroext i1 @uaddoi128(i128 %v1, i128 %v2, ptr %res) nounwind {
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-NEXT: addl {{[0-9]+}}(%esp), %edi
; X86-NEXT: adcl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: adcl {{[0-9]+}}(%esp), %esi
; X86-NEXT: adcl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: adcl {{[0-9]+}}(%esp), %esi
; X86-NEXT: setb %al
; X86-NEXT: movl %edi, (%ecx)
; X86-NEXT: movl %ebx, 4(%ecx)
-; X86-NEXT: movl %esi, 8(%ecx)
-; X86-NEXT: movl %edx, 12(%ecx)
+; X86-NEXT: movl %edx, 8(%ecx)
+; X86-NEXT: movl %esi, 12(%ecx)
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
@@ -105,13 +105,13 @@ define zeroext i1 @ssuboi128(i128 %v1, i128 %v2, ptr %res) nounwind {
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-NEXT: subl {{[0-9]+}}(%esp), %edi
; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
; X86-NEXT: seto %al
; X86-NEXT: movl %edi, (%ecx)
; X86-NEXT: movl %ebx, 4(%ecx)
-; X86-NEXT: movl %esi, 8(%ecx)
-; X86-NEXT: movl %edx, 12(%ecx)
+; X86-NEXT: movl %edx, 8(%ecx)
+; X86-NEXT: movl %esi, 12(%ecx)
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
@@ -145,13 +145,13 @@ define zeroext i1 @usuboi128(i128 %v1, i128 %v2, ptr %res) nounwind {
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-NEXT: subl {{[0-9]+}}(%esp), %edi
; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
; X86-NEXT: setb %al
; X86-NEXT: movl %edi, (%ecx)
; X86-NEXT: movl %ebx, 4(%ecx)
-; X86-NEXT: movl %esi, 8(%ecx)
-; X86-NEXT: movl %edx, 12(%ecx)
+; X86-NEXT: movl %edx, 8(%ecx)
+; X86-NEXT: movl %esi, 12(%ecx)
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
More information about the llvm-commits
mailing list