[llvm] d24c93c - [X86] Enable reassociation for ADD instructions
Guozhi Wei via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 25 17:48:35 PDT 2022
Author: Guozhi Wei
Date: 2022-10-26T00:46:13Z
New Revision: d24c93cc4107dca68a2760199e970cb04cdeed90
URL: https://github.com/llvm/llvm-project/commit/d24c93cc4107dca68a2760199e970cb04cdeed90
DIFF: https://github.com/llvm/llvm-project/commit/d24c93cc4107dca68a2760199e970cb04cdeed90.diff
LOG: [X86] Enable reassociation for ADD instructions
ADD is an associative and commutative operation, so we can do reassociation for it.
Differential Revision: https://reviews.llvm.org/D136396
Added:
Modified:
llvm/lib/Target/X86/X86InstrInfo.cpp
llvm/test/CodeGen/X86/2009-03-23-MultiUseSched.ll
llvm/test/CodeGen/X86/DynamicCalleeSavedRegisters.ll
llvm/test/CodeGen/X86/add-sub-bool.ll
llvm/test/CodeGen/X86/alias-static-alloca.ll
llvm/test/CodeGen/X86/avx512-intrinsics-x86_64.ll
llvm/test/CodeGen/X86/avx512-intrinsics.ll
llvm/test/CodeGen/X86/avx512-regcall-Mask.ll
llvm/test/CodeGen/X86/avx512-regcall-NoMask.ll
llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll
llvm/test/CodeGen/X86/div-rem-pair-recomposition-signed.ll
llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll
llvm/test/CodeGen/X86/divide-by-constant.ll
llvm/test/CodeGen/X86/divmod128.ll
llvm/test/CodeGen/X86/fold-add.ll
llvm/test/CodeGen/X86/fold-tied-op.ll
llvm/test/CodeGen/X86/h-registers-1.ll
llvm/test/CodeGen/X86/hipe-cc.ll
llvm/test/CodeGen/X86/hipe-cc64.ll
llvm/test/CodeGen/X86/imul.ll
llvm/test/CodeGen/X86/lea-opt-cse4.ll
llvm/test/CodeGen/X86/lea-opt2.ll
llvm/test/CodeGen/X86/lrshrink.ll
llvm/test/CodeGen/X86/midpoint-int.ll
llvm/test/CodeGen/X86/misched-balance.ll
llvm/test/CodeGen/X86/misched-matrix.ll
llvm/test/CodeGen/X86/mul-constant-i16.ll
llvm/test/CodeGen/X86/mul-constant-i32.ll
llvm/test/CodeGen/X86/mul-constant-i64.ll
llvm/test/CodeGen/X86/mul-i1024.ll
llvm/test/CodeGen/X86/mul-i256.ll
llvm/test/CodeGen/X86/mul-i512.ll
llvm/test/CodeGen/X86/mul128.ll
llvm/test/CodeGen/X86/mul64.ll
llvm/test/CodeGen/X86/muloti.ll
llvm/test/CodeGen/X86/popcnt.ll
llvm/test/CodeGen/X86/pr34080-2.ll
llvm/test/CodeGen/X86/pr36865.ll
llvm/test/CodeGen/X86/reassociate-add.ll
llvm/test/CodeGen/X86/smul-with-overflow.ll
llvm/test/CodeGen/X86/smul_fix.ll
llvm/test/CodeGen/X86/smul_fix_sat.ll
llvm/test/CodeGen/X86/smulo-128-legalisation-lowering.ll
llvm/test/CodeGen/X86/sse-regcall.ll
llvm/test/CodeGen/X86/stack-clash-large.ll
llvm/test/CodeGen/X86/statepoint-live-in.ll
llvm/test/CodeGen/X86/statepoint-regs.ll
llvm/test/CodeGen/X86/swift-return.ll
llvm/test/CodeGen/X86/twoaddr-lea.ll
llvm/test/CodeGen/X86/umul-with-overflow.ll
llvm/test/CodeGen/X86/umul_fix.ll
llvm/test/CodeGen/X86/urem-seteq-nonzero.ll
llvm/test/CodeGen/X86/vec_smulo.ll
llvm/test/CodeGen/X86/vp2intersect_multiple_pairs.ll
llvm/test/CodeGen/X86/win-smallparams.ll
llvm/test/CodeGen/X86/x86-cmov-converter.ll
llvm/test/CodeGen/X86/x86-no_caller_saved_registers-preserve.ll
llvm/test/CodeGen/X86/xmulo.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index b1edbf7d179db..e57d0ffbc9409 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -8709,6 +8709,10 @@ bool X86InstrInfo::hasReassociableOperands(const MachineInstr &Inst,
// 3. Other forms of the same operation (intrinsics and other variants)
bool X86InstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst) const {
switch (Inst.getOpcode()) {
+ case X86::ADD8rr:
+ case X86::ADD16rr:
+ case X86::ADD32rr:
+ case X86::ADD64rr:
case X86::AND8rr:
case X86::AND16rr:
case X86::AND32rr:
diff --git a/llvm/test/CodeGen/X86/2009-03-23-MultiUseSched.ll b/llvm/test/CodeGen/X86/2009-03-23-MultiUseSched.ll
index d2cc1b3599cbc..5c78092e9f2c4 100644
--- a/llvm/test/CodeGen/X86/2009-03-23-MultiUseSched.ll
+++ b/llvm/test/CodeGen/X86/2009-03-23-MultiUseSched.ll
@@ -37,190 +37,190 @@ define fastcc i64 @foo() nounwind {
; CHECK-NEXT: addq %rbx, %r11
; CHECK-NEXT: leaq (%rsi,%rdx), %rbx
; CHECK-NEXT: addq %rdi, %rbx
+; CHECK-NEXT: addq %rdi, %r11
; CHECK-NEXT: addq %rbx, %r11
; CHECK-NEXT: addq %rax, %rax
; CHECK-NEXT: addq %r10, %rax
-; CHECK-NEXT: movq X(%rip), %rbx
-; CHECK-NEXT: addq %rdi, %r11
; CHECK-NEXT: addq %r11, %r8
; CHECK-NEXT: addq %r10, %rax
; CHECK-NEXT: addq %r11, %rax
-; CHECK-NEXT: bswapq %rbx
-; CHECK-NEXT: leaq (%rdi,%rsi), %r11
+; CHECK-NEXT: movq X(%rip), %r11
+; CHECK-NEXT: bswapq %r11
+; CHECK-NEXT: addq %rdx, %r11
+; CHECK-NEXT: leaq (%rdi,%rsi), %rdx
+; CHECK-NEXT: addq %r8, %rdx
; CHECK-NEXT: addq %r8, %r11
-; CHECK-NEXT: addq %rdx, %rbx
-; CHECK-NEXT: addq %r11, %rbx
+; CHECK-NEXT: addq %rdx, %r11
; CHECK-NEXT: leaq (%r10,%rcx), %rdx
; CHECK-NEXT: addq %rdx, %rdx
; CHECK-NEXT: addq %rax, %rdx
-; CHECK-NEXT: movq X(%rip), %r11
-; CHECK-NEXT: addq %r8, %rbx
-; CHECK-NEXT: addq %rbx, %r9
+; CHECK-NEXT: addq %r11, %r9
; CHECK-NEXT: addq %rax, %rdx
-; CHECK-NEXT: addq %rbx, %rdx
+; CHECK-NEXT: addq %r11, %rdx
+; CHECK-NEXT: movq X(%rip), %r11
; CHECK-NEXT: bswapq %r11
-; CHECK-NEXT: leaq (%r8,%rdi), %rbx
-; CHECK-NEXT: addq %r9, %rbx
; CHECK-NEXT: addq %rsi, %r11
-; CHECK-NEXT: addq %rbx, %r11
+; CHECK-NEXT: leaq (%r8,%rdi), %rsi
+; CHECK-NEXT: addq %r9, %rsi
+; CHECK-NEXT: addq %r9, %r11
+; CHECK-NEXT: addq %rsi, %r11
; CHECK-NEXT: leaq (%rax,%r10), %rsi
; CHECK-NEXT: addq %rsi, %rsi
; CHECK-NEXT: addq %rdx, %rsi
-; CHECK-NEXT: movq X(%rip), %rbx
-; CHECK-NEXT: addq %r9, %r11
; CHECK-NEXT: addq %r11, %rcx
; CHECK-NEXT: addq %rdx, %rsi
; CHECK-NEXT: addq %r11, %rsi
-; CHECK-NEXT: bswapq %rbx
-; CHECK-NEXT: leaq (%r9,%r8), %r11
+; CHECK-NEXT: movq X(%rip), %r11
+; CHECK-NEXT: bswapq %r11
+; CHECK-NEXT: addq %rdi, %r11
+; CHECK-NEXT: leaq (%r9,%r8), %rdi
+; CHECK-NEXT: addq %rcx, %rdi
; CHECK-NEXT: addq %rcx, %r11
-; CHECK-NEXT: addq %rdi, %rbx
-; CHECK-NEXT: addq %r11, %rbx
+; CHECK-NEXT: addq %rdi, %r11
; CHECK-NEXT: leaq (%rdx,%rax), %rdi
; CHECK-NEXT: addq %rdi, %rdi
; CHECK-NEXT: addq %rsi, %rdi
-; CHECK-NEXT: movq X(%rip), %r11
-; CHECK-NEXT: addq %rcx, %rbx
-; CHECK-NEXT: addq %rbx, %r10
+; CHECK-NEXT: addq %r11, %r10
; CHECK-NEXT: addq %rsi, %rdi
-; CHECK-NEXT: addq %rbx, %rdi
+; CHECK-NEXT: addq %r11, %rdi
+; CHECK-NEXT: movq X(%rip), %r11
; CHECK-NEXT: bswapq %r11
-; CHECK-NEXT: leaq (%rcx,%r9), %rbx
-; CHECK-NEXT: addq %r10, %rbx
; CHECK-NEXT: addq %r8, %r11
-; CHECK-NEXT: addq %rbx, %r11
+; CHECK-NEXT: leaq (%rcx,%r9), %r8
+; CHECK-NEXT: addq %r10, %r8
+; CHECK-NEXT: addq %r10, %r11
+; CHECK-NEXT: addq %r8, %r11
; CHECK-NEXT: leaq (%rsi,%rdx), %r8
; CHECK-NEXT: addq %r8, %r8
; CHECK-NEXT: addq %rdi, %r8
-; CHECK-NEXT: movq X(%rip), %rbx
-; CHECK-NEXT: addq %r10, %r11
; CHECK-NEXT: addq %r11, %rax
; CHECK-NEXT: addq %rdi, %r8
; CHECK-NEXT: addq %r11, %r8
-; CHECK-NEXT: bswapq %rbx
-; CHECK-NEXT: leaq (%r10,%rcx), %r11
+; CHECK-NEXT: movq X(%rip), %r11
+; CHECK-NEXT: bswapq %r11
+; CHECK-NEXT: addq %r9, %r11
+; CHECK-NEXT: leaq (%r10,%rcx), %r9
+; CHECK-NEXT: addq %rax, %r9
; CHECK-NEXT: addq %rax, %r11
-; CHECK-NEXT: addq %r9, %rbx
-; CHECK-NEXT: addq %r11, %rbx
+; CHECK-NEXT: addq %r9, %r11
; CHECK-NEXT: leaq (%rdi,%rsi), %r9
; CHECK-NEXT: addq %r9, %r9
; CHECK-NEXT: addq %r8, %r9
-; CHECK-NEXT: movq X(%rip), %r11
-; CHECK-NEXT: addq %rax, %rbx
-; CHECK-NEXT: addq %rbx, %rdx
+; CHECK-NEXT: addq %r11, %rdx
; CHECK-NEXT: addq %r8, %r9
-; CHECK-NEXT: addq %rbx, %r9
+; CHECK-NEXT: addq %r11, %r9
+; CHECK-NEXT: movq X(%rip), %r11
; CHECK-NEXT: bswapq %r11
-; CHECK-NEXT: leaq (%rax,%r10), %rbx
-; CHECK-NEXT: addq %rdx, %rbx
; CHECK-NEXT: addq %rcx, %r11
-; CHECK-NEXT: addq %rbx, %r11
+; CHECK-NEXT: leaq (%rax,%r10), %rcx
+; CHECK-NEXT: addq %rdx, %rcx
+; CHECK-NEXT: addq %rdx, %r11
+; CHECK-NEXT: addq %rcx, %r11
; CHECK-NEXT: leaq (%r8,%rdi), %rcx
; CHECK-NEXT: addq %rcx, %rcx
; CHECK-NEXT: addq %r9, %rcx
-; CHECK-NEXT: movq X(%rip), %rbx
-; CHECK-NEXT: addq %rdx, %r11
; CHECK-NEXT: addq %r11, %rsi
; CHECK-NEXT: addq %r9, %rcx
; CHECK-NEXT: addq %r11, %rcx
-; CHECK-NEXT: bswapq %rbx
-; CHECK-NEXT: leaq (%rdx,%rax), %r11
+; CHECK-NEXT: movq X(%rip), %r11
+; CHECK-NEXT: bswapq %r11
+; CHECK-NEXT: addq %r10, %r11
+; CHECK-NEXT: leaq (%rdx,%rax), %r10
+; CHECK-NEXT: addq %rsi, %r10
; CHECK-NEXT: addq %rsi, %r11
-; CHECK-NEXT: addq %r10, %rbx
-; CHECK-NEXT: addq %r11, %rbx
+; CHECK-NEXT: addq %r10, %r11
; CHECK-NEXT: leaq (%r9,%r8), %r10
; CHECK-NEXT: addq %r10, %r10
; CHECK-NEXT: addq %rcx, %r10
-; CHECK-NEXT: movq X(%rip), %r14
-; CHECK-NEXT: addq %rsi, %rbx
-; CHECK-NEXT: addq %rbx, %rdi
+; CHECK-NEXT: addq %r11, %rdi
; CHECK-NEXT: addq %rcx, %r10
-; CHECK-NEXT: addq %rbx, %r10
-; CHECK-NEXT: bswapq %r14
-; CHECK-NEXT: leaq (%rsi,%rdx), %r11
-; CHECK-NEXT: addq %rdi, %r11
-; CHECK-NEXT: addq %rax, %r14
-; CHECK-NEXT: addq %r11, %r14
+; CHECK-NEXT: addq %r11, %r10
+; CHECK-NEXT: movq X(%rip), %rbx
+; CHECK-NEXT: bswapq %rbx
+; CHECK-NEXT: addq %rax, %rbx
+; CHECK-NEXT: leaq (%rsi,%rdx), %rax
+; CHECK-NEXT: addq %rdi, %rax
+; CHECK-NEXT: addq %rdi, %rbx
+; CHECK-NEXT: addq %rax, %rbx
; CHECK-NEXT: leaq (%rcx,%r9), %r11
; CHECK-NEXT: addq %r11, %r11
; CHECK-NEXT: addq %r10, %r11
-; CHECK-NEXT: movq X(%rip), %rax
-; CHECK-NEXT: addq %rdi, %r14
-; CHECK-NEXT: addq %r14, %r8
+; CHECK-NEXT: addq %rbx, %r8
; CHECK-NEXT: addq %r10, %r11
-; CHECK-NEXT: addq %r14, %r11
+; CHECK-NEXT: addq %rbx, %r11
+; CHECK-NEXT: movq X(%rip), %rax
; CHECK-NEXT: bswapq %rax
-; CHECK-NEXT: leaq (%rdi,%rsi), %rbx
-; CHECK-NEXT: addq %r8, %rbx
; CHECK-NEXT: addq %rdx, %rax
-; CHECK-NEXT: addq %rbx, %rax
+; CHECK-NEXT: leaq (%rdi,%rsi), %rdx
+; CHECK-NEXT: addq %r8, %rdx
+; CHECK-NEXT: addq %r8, %rax
+; CHECK-NEXT: addq %rdx, %rax
; CHECK-NEXT: leaq (%r10,%rcx), %rdx
; CHECK-NEXT: addq %rdx, %rdx
; CHECK-NEXT: addq %r11, %rdx
-; CHECK-NEXT: movq X(%rip), %rbx
-; CHECK-NEXT: addq %r8, %rax
; CHECK-NEXT: addq %rax, %r9
; CHECK-NEXT: addq %r11, %rdx
; CHECK-NEXT: addq %rax, %rdx
+; CHECK-NEXT: movq X(%rip), %rbx
; CHECK-NEXT: bswapq %rbx
+; CHECK-NEXT: addq %rsi, %rbx
; CHECK-NEXT: leaq (%r8,%rdi), %rax
; CHECK-NEXT: addq %r9, %rax
-; CHECK-NEXT: addq %rsi, %rbx
+; CHECK-NEXT: addq %r9, %rbx
; CHECK-NEXT: addq %rax, %rbx
; CHECK-NEXT: leaq (%r11,%r10), %rax
; CHECK-NEXT: addq %rax, %rax
; CHECK-NEXT: addq %rdx, %rax
-; CHECK-NEXT: movq X(%rip), %r14
-; CHECK-NEXT: addq %r9, %rbx
; CHECK-NEXT: addq %rbx, %rcx
; CHECK-NEXT: addq %rdx, %rax
; CHECK-NEXT: addq %rbx, %rax
-; CHECK-NEXT: bswapq %r14
+; CHECK-NEXT: movq X(%rip), %rbx
+; CHECK-NEXT: bswapq %rbx
+; CHECK-NEXT: addq %rdi, %rbx
; CHECK-NEXT: leaq (%r9,%r8), %rsi
; CHECK-NEXT: addq %rcx, %rsi
-; CHECK-NEXT: addq %rdi, %r14
-; CHECK-NEXT: addq %rsi, %r14
+; CHECK-NEXT: addq %rcx, %rbx
+; CHECK-NEXT: addq %rsi, %rbx
; CHECK-NEXT: leaq (%rdx,%r11), %rsi
; CHECK-NEXT: addq %rsi, %rsi
; CHECK-NEXT: addq %rax, %rsi
-; CHECK-NEXT: movq X(%rip), %rdi
-; CHECK-NEXT: addq %rcx, %r14
-; CHECK-NEXT: addq %r14, %r10
+; CHECK-NEXT: addq %rbx, %r10
; CHECK-NEXT: addq %rax, %rsi
-; CHECK-NEXT: addq %r14, %rsi
-; CHECK-NEXT: bswapq %rdi
-; CHECK-NEXT: leaq (%rcx,%r9), %rbx
+; CHECK-NEXT: addq %rbx, %rsi
+; CHECK-NEXT: movq X(%rip), %rbx
+; CHECK-NEXT: bswapq %rbx
+; CHECK-NEXT: addq %r8, %rbx
+; CHECK-NEXT: leaq (%rcx,%r9), %rdi
+; CHECK-NEXT: addq %r10, %rdi
; CHECK-NEXT: addq %r10, %rbx
-; CHECK-NEXT: addq %r8, %rdi
+; CHECK-NEXT: addq %rdi, %rbx
+; CHECK-NEXT: leaq (%rax,%rdx), %rdi
+; CHECK-NEXT: addq %rdi, %rdi
+; CHECK-NEXT: addq %rsi, %rdi
+; CHECK-NEXT: addq %rbx, %r11
+; CHECK-NEXT: addq %rsi, %rdi
; CHECK-NEXT: addq %rbx, %rdi
-; CHECK-NEXT: leaq (%rax,%rdx), %r8
-; CHECK-NEXT: addq %r8, %r8
-; CHECK-NEXT: addq %rsi, %r8
-; CHECK-NEXT: addq %r10, %rdi
-; CHECK-NEXT: addq %rdi, %r11
-; CHECK-NEXT: addq %rsi, %r8
-; CHECK-NEXT: addq %rdi, %r8
-; CHECK-NEXT: movq X(%rip), %rdi
-; CHECK-NEXT: bswapq %rdi
-; CHECK-NEXT: addq %r9, %rdi
+; CHECK-NEXT: movq X(%rip), %r8
+; CHECK-NEXT: bswapq %r8
+; CHECK-NEXT: addq %r9, %r8
; CHECK-NEXT: leaq (%r10,%rcx), %r9
; CHECK-NEXT: addq %r11, %r9
-; CHECK-NEXT: addq %r9, %rdi
+; CHECK-NEXT: addq %r11, %r8
+; CHECK-NEXT: addq %r9, %r8
; CHECK-NEXT: addq %rax, %rsi
; CHECK-NEXT: addq %rsi, %rsi
-; CHECK-NEXT: addq %r8, %rsi
-; CHECK-NEXT: addq %r8, %rsi
-; CHECK-NEXT: addq %r11, %rdi
-; CHECK-NEXT: addq %rdi, %rdx
; CHECK-NEXT: addq %rdi, %rsi
+; CHECK-NEXT: addq %rdi, %rsi
+; CHECK-NEXT: addq %r8, %rdx
+; CHECK-NEXT: addq %r8, %rsi
; CHECK-NEXT: movq X(%rip), %rax
; CHECK-NEXT: bswapq %rax
; CHECK-NEXT: addq %r10, %r11
; CHECK-NEXT: movq %rax, X(%rip)
; CHECK-NEXT: addq %rcx, %rax
; CHECK-NEXT: addq %rdx, %r11
-; CHECK-NEXT: addq %r11, %rax
; CHECK-NEXT: addq %rdx, %rax
+; CHECK-NEXT: addq %r11, %rax
; CHECK-NEXT: addq %rsi, %rax
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: popq %r14
diff --git a/llvm/test/CodeGen/X86/DynamicCalleeSavedRegisters.ll b/llvm/test/CodeGen/X86/DynamicCalleeSavedRegisters.ll
index 38b7a2cdda64e..261e8f873b2fe 100644
--- a/llvm/test/CodeGen/X86/DynamicCalleeSavedRegisters.ll
+++ b/llvm/test/CodeGen/X86/DynamicCalleeSavedRegisters.ll
@@ -28,8 +28,8 @@ define cc 11 i32 @caller(i32 %a0, i32 %b0, i32 %c0, i32 %d0, i32 %e0) nounwind {
; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
; CHECK-NEXT: movl %ebp, %esi
; CHECK-NEXT: calll callee at PLT
-; CHECK-NEXT: addl %eax, %ebx
; CHECK-NEXT: addl %ebp, %ebx
+; CHECK-NEXT: addl %eax, %ebx
; CHECK-NEXT: movl %ebx, %esi
; CHECK-NEXT: addl $12, %esp
; CHECK-NEXT: retl
diff --git a/llvm/test/CodeGen/X86/add-sub-bool.ll b/llvm/test/CodeGen/X86/add-sub-bool.ll
index 17eda59660193..30e45a4583624 100644
--- a/llvm/test/CodeGen/X86/add-sub-bool.ll
+++ b/llvm/test/CodeGen/X86/add-sub-bool.ll
@@ -68,11 +68,11 @@ define i32 @test_i32_add_add_idx0(i32 %x, i32 %y, i32 %z) nounwind {
;
; X64-LABEL: test_i32_add_add_idx0:
; X64: # %bb.0:
-; X64-NEXT: # kill: def $esi killed $esi def $rsi
+; X64-NEXT: # kill: def $edx killed $edx def $rdx
; X64-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-NEXT: leal (%rdi,%rsi), %eax
; X64-NEXT: andl $1, %edx
-; X64-NEXT: addl %edx, %eax
+; X64-NEXT: leal (%rdx,%rdi), %eax
+; X64-NEXT: addl %esi, %eax
; X64-NEXT: retq
%add = add i32 %y, %x
%mask = and i32 %z, 1
diff --git a/llvm/test/CodeGen/X86/alias-static-alloca.ll b/llvm/test/CodeGen/X86/alias-static-alloca.ll
index f8d4ccce89f96..5df2906d512a4 100644
--- a/llvm/test/CodeGen/X86/alias-static-alloca.ll
+++ b/llvm/test/CodeGen/X86/alias-static-alloca.ll
@@ -7,15 +7,15 @@
define i32 @foo(i32 %a, i32 %b, i32 %c, i32 %d) {
; CHECK-LABEL: foo:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: # kill: def $edx killed $edx def $rdx
; CHECK-NEXT: # kill: def $esi killed $esi def $rsi
-; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
; CHECK-NEXT: movl %esi, -{{[0-9]+}}(%rsp)
; CHECK-NEXT: movl %ecx, -{{[0-9]+}}(%rsp)
; CHECK-NEXT: movl %edi, -{{[0-9]+}}(%rsp)
; CHECK-NEXT: movl %edx, -{{[0-9]+}}(%rsp)
-; CHECK-NEXT: leal (%rdi,%rsi), %eax
-; CHECK-NEXT: addl %edx, %eax
+; CHECK-NEXT: leal (%rsi,%rdx), %eax
; CHECK-NEXT: addl %ecx, %eax
+; CHECK-NEXT: addl %edi, %eax
; CHECK-NEXT: retq
entry:
%a0 = alloca i32
diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics-x86_64.ll b/llvm/test/CodeGen/X86/avx512-intrinsics-x86_64.ll
index d5d10565f24b8..82aff699cf297 100644
--- a/llvm/test/CodeGen/X86/avx512-intrinsics-x86_64.ll
+++ b/llvm/test/CodeGen/X86/avx512-intrinsics-x86_64.ll
@@ -117,10 +117,10 @@ declare i64 @llvm.x86.avx512.cvttss2usi64(<4 x float>, i32) nounwind readnone
define i64 @test_x86_avx512_cvtsd2usi64(<2 x double> %a0) {
; CHECK-LABEL: test_x86_avx512_cvtsd2usi64:
; CHECK: ## %bb.0:
-; CHECK-NEXT: vcvtsd2usi %xmm0, %rax
-; CHECK-NEXT: vcvtsd2usi {rz-sae}, %xmm0, %rcx
-; CHECK-NEXT: addq %rax, %rcx
+; CHECK-NEXT: vcvtsd2usi %xmm0, %rcx
+; CHECK-NEXT: vcvtsd2usi {rz-sae}, %xmm0, %rdx
; CHECK-NEXT: vcvtsd2usi {rd-sae}, %xmm0, %rax
+; CHECK-NEXT: addq %rdx, %rax
; CHECK-NEXT: addq %rcx, %rax
; CHECK-NEXT: retq
@@ -136,10 +136,10 @@ declare i64 @llvm.x86.avx512.vcvtsd2usi64(<2 x double>, i32) nounwind readnone
define i64 @test_x86_avx512_cvtsd2si64(<2 x double> %a0) {
; CHECK-LABEL: test_x86_avx512_cvtsd2si64:
; CHECK: ## %bb.0:
-; CHECK-NEXT: vcvtsd2si %xmm0, %rax
-; CHECK-NEXT: vcvtsd2si {rz-sae}, %xmm0, %rcx
-; CHECK-NEXT: addq %rax, %rcx
+; CHECK-NEXT: vcvtsd2si %xmm0, %rcx
+; CHECK-NEXT: vcvtsd2si {rz-sae}, %xmm0, %rdx
; CHECK-NEXT: vcvtsd2si {rd-sae}, %xmm0, %rax
+; CHECK-NEXT: addq %rdx, %rax
; CHECK-NEXT: addq %rcx, %rax
; CHECK-NEXT: retq
@@ -155,10 +155,10 @@ declare i64 @llvm.x86.avx512.vcvtsd2si64(<2 x double>, i32) nounwind readnone
define i64 @test_x86_avx512_cvtss2usi64(<4 x float> %a0) {
; CHECK-LABEL: test_x86_avx512_cvtss2usi64:
; CHECK: ## %bb.0:
-; CHECK-NEXT: vcvtss2usi %xmm0, %rax
-; CHECK-NEXT: vcvtss2usi {rz-sae}, %xmm0, %rcx
-; CHECK-NEXT: addq %rax, %rcx
+; CHECK-NEXT: vcvtss2usi %xmm0, %rcx
+; CHECK-NEXT: vcvtss2usi {rz-sae}, %xmm0, %rdx
; CHECK-NEXT: vcvtss2usi {rd-sae}, %xmm0, %rax
+; CHECK-NEXT: addq %rdx, %rax
; CHECK-NEXT: addq %rcx, %rax
; CHECK-NEXT: retq
@@ -174,10 +174,10 @@ declare i64 @llvm.x86.avx512.vcvtss2usi64(<4 x float>, i32) nounwind readnone
define i64 @test_x86_avx512_cvtss2si64(<4 x float> %a0) {
; CHECK-LABEL: test_x86_avx512_cvtss2si64:
; CHECK: ## %bb.0:
-; CHECK-NEXT: vcvtss2si %xmm0, %rax
-; CHECK-NEXT: vcvtss2si {rz-sae}, %xmm0, %rcx
-; CHECK-NEXT: addq %rax, %rcx
+; CHECK-NEXT: vcvtss2si %xmm0, %rcx
+; CHECK-NEXT: vcvtss2si {rz-sae}, %xmm0, %rdx
; CHECK-NEXT: vcvtss2si {rd-sae}, %xmm0, %rax
+; CHECK-NEXT: addq %rdx, %rax
; CHECK-NEXT: addq %rcx, %rax
; CHECK-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics.ll b/llvm/test/CodeGen/X86/avx512-intrinsics.ll
index 8e0943a10537b..58d4e744c8e5b 100644
--- a/llvm/test/CodeGen/X86/avx512-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512-intrinsics.ll
@@ -934,10 +934,10 @@ declare i32 @llvm.x86.avx512.cvttss2usi(<4 x float>, i32) nounwind readnone
define i32 @test_x86_avx512_cvtsd2usi32(<2 x double> %a0) {
; CHECK-LABEL: test_x86_avx512_cvtsd2usi32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vcvtsd2usi %xmm0, %eax
-; CHECK-NEXT: vcvtsd2usi {rz-sae}, %xmm0, %ecx
-; CHECK-NEXT: addl %eax, %ecx
+; CHECK-NEXT: vcvtsd2usi %xmm0, %ecx
+; CHECK-NEXT: vcvtsd2usi {rz-sae}, %xmm0, %edx
; CHECK-NEXT: vcvtsd2usi {rd-sae}, %xmm0, %eax
+; CHECK-NEXT: addl %edx, %eax
; CHECK-NEXT: addl %ecx, %eax
; CHECK-NEXT: ret{{[l|q]}}
@@ -953,10 +953,10 @@ declare i32 @llvm.x86.avx512.vcvtsd2usi32(<2 x double>, i32) nounwind readnone
define i32 @test_x86_avx512_cvtsd2si32(<2 x double> %a0) {
; CHECK-LABEL: test_x86_avx512_cvtsd2si32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vcvtsd2si %xmm0, %eax
-; CHECK-NEXT: vcvtsd2si {rz-sae}, %xmm0, %ecx
-; CHECK-NEXT: addl %eax, %ecx
+; CHECK-NEXT: vcvtsd2si %xmm0, %ecx
+; CHECK-NEXT: vcvtsd2si {rz-sae}, %xmm0, %edx
; CHECK-NEXT: vcvtsd2si {rd-sae}, %xmm0, %eax
+; CHECK-NEXT: addl %edx, %eax
; CHECK-NEXT: addl %ecx, %eax
; CHECK-NEXT: ret{{[l|q]}}
@@ -972,10 +972,10 @@ declare i32 @llvm.x86.avx512.vcvtsd2si32(<2 x double>, i32) nounwind readnone
define i32 @test_x86_avx512_cvtss2usi32(<4 x float> %a0) {
; CHECK-LABEL: test_x86_avx512_cvtss2usi32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vcvtss2usi %xmm0, %eax
-; CHECK-NEXT: vcvtss2usi {rz-sae}, %xmm0, %ecx
-; CHECK-NEXT: addl %eax, %ecx
+; CHECK-NEXT: vcvtss2usi %xmm0, %ecx
+; CHECK-NEXT: vcvtss2usi {rz-sae}, %xmm0, %edx
; CHECK-NEXT: vcvtss2usi {rd-sae}, %xmm0, %eax
+; CHECK-NEXT: addl %edx, %eax
; CHECK-NEXT: addl %ecx, %eax
; CHECK-NEXT: ret{{[l|q]}}
@@ -991,10 +991,10 @@ declare i32 @llvm.x86.avx512.vcvtss2usi32(<4 x float>, i32) nounwind readnone
define i32 @test_x86_avx512_cvtss2si32(<4 x float> %a0) {
; CHECK-LABEL: test_x86_avx512_cvtss2si32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vcvtss2si %xmm0, %eax
-; CHECK-NEXT: vcvtss2si {rz-sae}, %xmm0, %ecx
-; CHECK-NEXT: addl %eax, %ecx
+; CHECK-NEXT: vcvtss2si %xmm0, %ecx
+; CHECK-NEXT: vcvtss2si {rz-sae}, %xmm0, %edx
; CHECK-NEXT: vcvtss2si {rd-sae}, %xmm0, %eax
+; CHECK-NEXT: addl %edx, %eax
; CHECK-NEXT: addl %ecx, %eax
; CHECK-NEXT: ret{{[l|q]}}
diff --git a/llvm/test/CodeGen/X86/avx512-regcall-Mask.ll b/llvm/test/CodeGen/X86/avx512-regcall-Mask.ll
index ffe5ffb64af14..412c30906cb5c 100644
--- a/llvm/test/CodeGen/X86/avx512-regcall-Mask.ll
+++ b/llvm/test/CodeGen/X86/avx512-regcall-Mask.ll
@@ -35,32 +35,32 @@ define dso_local x86_regcallcc i64 @test_argv64i1(<64 x i1> %x0, <64 x i1> %x1,
;
; WIN64-LABEL: test_argv64i1:
; WIN64: # %bb.0:
+; WIN64-NEXT: addq %rdx, %rcx
+; WIN64-NEXT: addq %rdi, %rcx
+; WIN64-NEXT: addq %rsi, %rcx
+; WIN64-NEXT: addq %r8, %rcx
+; WIN64-NEXT: addq %r9, %rcx
+; WIN64-NEXT: addq %r10, %rcx
+; WIN64-NEXT: addq %r11, %rcx
+; WIN64-NEXT: addq %r12, %rcx
+; WIN64-NEXT: addq %r14, %rcx
+; WIN64-NEXT: addq %r15, %rcx
; WIN64-NEXT: addq %rcx, %rax
-; WIN64-NEXT: addq %rdx, %rax
-; WIN64-NEXT: addq %rdi, %rax
-; WIN64-NEXT: addq %rsi, %rax
-; WIN64-NEXT: addq %r8, %rax
-; WIN64-NEXT: addq %r9, %rax
-; WIN64-NEXT: addq %r10, %rax
-; WIN64-NEXT: addq %r11, %rax
-; WIN64-NEXT: addq %r12, %rax
-; WIN64-NEXT: addq %r14, %rax
-; WIN64-NEXT: addq %r15, %rax
; WIN64-NEXT: addq {{[0-9]+}}(%rsp), %rax
; WIN64-NEXT: retq
;
; LINUXOSX64-LABEL: test_argv64i1:
; LINUXOSX64: # %bb.0:
+; LINUXOSX64-NEXT: addq %rdx, %rcx
+; LINUXOSX64-NEXT: addq %rdi, %rcx
+; LINUXOSX64-NEXT: addq %rsi, %rcx
+; LINUXOSX64-NEXT: addq %r8, %rcx
+; LINUXOSX64-NEXT: addq %r9, %rcx
+; LINUXOSX64-NEXT: addq %r12, %rcx
+; LINUXOSX64-NEXT: addq %r13, %rcx
+; LINUXOSX64-NEXT: addq %r14, %rcx
+; LINUXOSX64-NEXT: addq %r15, %rcx
; LINUXOSX64-NEXT: addq %rcx, %rax
-; LINUXOSX64-NEXT: addq %rdx, %rax
-; LINUXOSX64-NEXT: addq %rdi, %rax
-; LINUXOSX64-NEXT: addq %rsi, %rax
-; LINUXOSX64-NEXT: addq %r8, %rax
-; LINUXOSX64-NEXT: addq %r9, %rax
-; LINUXOSX64-NEXT: addq %r12, %rax
-; LINUXOSX64-NEXT: addq %r13, %rax
-; LINUXOSX64-NEXT: addq %r14, %rax
-; LINUXOSX64-NEXT: addq %r15, %rax
; LINUXOSX64-NEXT: addq {{[0-9]+}}(%rsp), %rax
; LINUXOSX64-NEXT: addq {{[0-9]+}}(%rsp), %rax
; LINUXOSX64-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/avx512-regcall-NoMask.ll b/llvm/test/CodeGen/X86/avx512-regcall-NoMask.ll
index 0caa8826e75c8..bb762469571d1 100644
--- a/llvm/test/CodeGen/X86/avx512-regcall-NoMask.ll
+++ b/llvm/test/CodeGen/X86/avx512-regcall-NoMask.ll
@@ -939,7 +939,7 @@ define dso_local x86_regcallcc i32 @testi32_inp(i32 %a1, i32 %a2, i32 %a3, i32 %
; X32: # %bb.0:
; X32-NEXT: pushl %ebp
; X32-NEXT: pushl %ebx
-; X32-NEXT: subl $12, %esp
+; X32-NEXT: subl $16, %esp
; X32-NEXT: movl %esi, (%esp) # 4-byte Spill
; X32-NEXT: movl %edi, %esi
; X32-NEXT: movl %edx, %ebx
@@ -950,36 +950,37 @@ define dso_local x86_regcallcc i32 @testi32_inp(i32 %a1, i32 %a2, i32 %a3, i32 %
; X32-NEXT: subl %esi, %ebx
; X32-NEXT: movl %edi, %eax
; X32-NEXT: subl %ecx, %eax
-; X32-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X32-NEXT: movl %ebp, %ecx
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: subl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: imull %eax, %ecx
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: subl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: imull %ebx, %eax
-; X32-NEXT: addl %ecx, %eax
+; X32-NEXT: movl %esi, %edx
+; X32-NEXT: subl {{[0-9]+}}(%esp), %edx
+; X32-NEXT: imull %ebx, %edx
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ebp
; X32-NEXT: movl (%esp), %ebx # 4-byte Reload
-; X32-NEXT: subl {{[0-9]+}}(%esp), %ebx
-; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: subl %ebp, %ebx
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl %eax, %ecx
; X32-NEXT: subl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: imull %ebx, %ecx
-; X32-NEXT: addl %eax, %ecx
+; X32-NEXT: addl %edx, %ecx
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: addl (%esp), %eax # 4-byte Folded Reload
-; X32-NEXT: addl {{[0-9]+}}(%esp), %ebp
-; X32-NEXT: imull %ebp, %edi
+; X32-NEXT: addl (%esp), %ebp # 4-byte Folded Reload
+; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X32-NEXT: addl {{[0-9]+}}(%esp), %edx
+; X32-NEXT: imull %edx, %edi
; X32-NEXT: addl {{[0-9]+}}(%esp), %esi
; X32-NEXT: imull {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: addl %esi, %edi
-; X32-NEXT: addl {{[0-9]+}}(%esp), %edx
-; X32-NEXT: imull %eax, %edx
-; X32-NEXT: addl %edx, %edi
+; X32-NEXT: addl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: imull %ebp, %eax
+; X32-NEXT: addl %esi, %eax
+; X32-NEXT: addl %eax, %edi
; X32-NEXT: addl %ecx, %edi
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: movl %edi, %eax
-; X32-NEXT: addl $12, %esp
+; X32-NEXT: addl $16, %esp
; X32-NEXT: popl %ebx
; X32-NEXT: popl %ebp
; X32-NEXT: retl
@@ -1013,18 +1014,18 @@ define dso_local x86_regcallcc i32 @testi32_inp(i32 %a1, i32 %a2, i32 %a3, i32 %
; WIN64-NEXT: # kill: def $r11d killed $r11d killed $r11
; WIN64-NEXT: subl %r12d, %r11d
; WIN64-NEXT: imull %edx, %r11d
-; WIN64-NEXT: addl %r9d, %r11d
; WIN64-NEXT: leal (%r14,%r15), %edx
-; WIN64-NEXT: movl %r14d, %r9d
-; WIN64-NEXT: subl %r15d, %r9d
-; WIN64-NEXT: imull %esi, %r9d
-; WIN64-NEXT: addl %r11d, %r9d
+; WIN64-NEXT: # kill: def $r14d killed $r14d killed $r14
+; WIN64-NEXT: subl %r15d, %r14d
+; WIN64-NEXT: imull %esi, %r14d
+; WIN64-NEXT: addl %r11d, %r14d
; WIN64-NEXT: addl %ecx, %eax
; WIN64-NEXT: imull %r8d, %eax
; WIN64-NEXT: imull %ebx, %r10d
-; WIN64-NEXT: addl %r10d, %eax
; WIN64-NEXT: imull %edi, %edx
+; WIN64-NEXT: addl %r10d, %edx
; WIN64-NEXT: addl %edx, %eax
+; WIN64-NEXT: addl %r14d, %eax
; WIN64-NEXT: addl %r9d, %eax
; WIN64-NEXT: popq %rbx
; WIN64-NEXT: retq
@@ -1054,19 +1055,19 @@ define dso_local x86_regcallcc i32 @testi32_inp(i32 %a1, i32 %a2, i32 %a3, i32 %
; LINUXOSX64-NEXT: leal (%r13,%r14), %r11d
; LINUXOSX64-NEXT: movl %r13d, %r12d
; LINUXOSX64-NEXT: subl %r14d, %r12d
+; LINUXOSX64-NEXT: movl {{[0-9]+}}(%rsp), %r14d
; LINUXOSX64-NEXT: imull %edx, %r12d
-; LINUXOSX64-NEXT: movl {{[0-9]+}}(%rsp), %edx
-; LINUXOSX64-NEXT: addl %r9d, %r12d
-; LINUXOSX64-NEXT: movl %r15d, %r9d
-; LINUXOSX64-NEXT: subl %edx, %r9d
-; LINUXOSX64-NEXT: imull %esi, %r9d
-; LINUXOSX64-NEXT: addl %r12d, %r9d
+; LINUXOSX64-NEXT: movl %r15d, %edx
+; LINUXOSX64-NEXT: subl %r14d, %edx
+; LINUXOSX64-NEXT: imull %esi, %edx
+; LINUXOSX64-NEXT: addl %r12d, %edx
; LINUXOSX64-NEXT: addl %ecx, %eax
; LINUXOSX64-NEXT: imull %r8d, %eax
; LINUXOSX64-NEXT: imull %r10d, %r11d
-; LINUXOSX64-NEXT: addl %r11d, %eax
-; LINUXOSX64-NEXT: addl %r15d, %edx
-; LINUXOSX64-NEXT: imull %edi, %edx
+; LINUXOSX64-NEXT: addl %r15d, %r14d
+; LINUXOSX64-NEXT: imull %edi, %r14d
+; LINUXOSX64-NEXT: addl %r11d, %r14d
+; LINUXOSX64-NEXT: addl %r14d, %eax
; LINUXOSX64-NEXT: addl %edx, %eax
; LINUXOSX64-NEXT: addl %r9d, %eax
; LINUXOSX64-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll
index 4b39dbd297558..b3148629dd554 100644
--- a/llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll
+++ b/llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll
@@ -1905,19 +1905,19 @@ define i64 @test_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1) nounwind {
; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
; X64-NEXT: vpcmpgtb %zmm0, %zmm1, %k0 # encoding: [0x62,0xf1,0x75,0x48,0x64,0xc0]
; X64-NEXT: kmovq %k0, %rcx # encoding: [0xc4,0xe1,0xfb,0x93,0xc8]
-; X64-NEXT: addq %rax, %rcx # encoding: [0x48,0x01,0xc1]
; X64-NEXT: vpcmpleb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3f,0xc1,0x02]
-; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
-; X64-NEXT: addq %rcx, %rax # encoding: [0x48,0x01,0xc8]
+; X64-NEXT: kmovq %k0, %rdx # encoding: [0xc4,0xe1,0xfb,0x93,0xd0]
+; X64-NEXT: addq %rcx, %rdx # encoding: [0x48,0x01,0xca]
; X64-NEXT: vpcmpneqb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3f,0xc1,0x04]
; X64-NEXT: kmovq %k0, %rcx # encoding: [0xc4,0xe1,0xfb,0x93,0xc8]
-; X64-NEXT: addq %rax, %rcx # encoding: [0x48,0x01,0xc1]
+; X64-NEXT: addq %rdx, %rcx # encoding: [0x48,0x01,0xd1]
; X64-NEXT: vpcmpnltb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3f,0xc1,0x05]
-; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
-; X64-NEXT: addq %rcx, %rax # encoding: [0x48,0x01,0xc8]
+; X64-NEXT: kmovq %k0, %rdx # encoding: [0xc4,0xe1,0xfb,0x93,0xd0]
+; X64-NEXT: addq %rcx, %rdx # encoding: [0x48,0x01,0xca]
+; X64-NEXT: addq %rax, %rdx # encoding: [0x48,0x01,0xc2]
; X64-NEXT: vpcmpgtb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x64,0xc1]
-; X64-NEXT: kmovq %k0, %rcx # encoding: [0xc4,0xe1,0xfb,0x93,0xc8]
-; X64-NEXT: leaq -1(%rcx,%rax), %rax # encoding: [0x48,0x8d,0x44,0x01,0xff]
+; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
+; X64-NEXT: leaq -1(%rax,%rdx), %rax # encoding: [0x48,0x8d,0x44,0x10,0xff]
; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X64-NEXT: retq # encoding: [0xc3]
%res0 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 -1)
@@ -1987,23 +1987,23 @@ define i64 @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) nounwin
; X64: # %bb.0:
; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
; X64-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x74,0xc1]
-; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
-; X64-NEXT: vpcmpgtb %zmm0, %zmm1, %k0 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x64,0xc0]
; X64-NEXT: kmovq %k0, %rcx # encoding: [0xc4,0xe1,0xfb,0x93,0xc8]
-; X64-NEXT: addq %rax, %rcx # encoding: [0x48,0x01,0xc1]
-; X64-NEXT: vpcmpleb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3f,0xc1,0x02]
+; X64-NEXT: vpcmpgtb %zmm0, %zmm1, %k0 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x64,0xc0]
; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
-; X64-NEXT: addq %rcx, %rax # encoding: [0x48,0x01,0xc8]
+; X64-NEXT: vpcmpleb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3f,0xc1,0x02]
+; X64-NEXT: kmovq %k0, %rdx # encoding: [0xc4,0xe1,0xfb,0x93,0xd0]
+; X64-NEXT: addq %rax, %rdx # encoding: [0x48,0x01,0xc2]
; X64-NEXT: vpcmpneqb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3f,0xc1,0x04]
-; X64-NEXT: kmovq %k0, %rcx # encoding: [0xc4,0xe1,0xfb,0x93,0xc8]
-; X64-NEXT: addq %rax, %rcx # encoding: [0x48,0x01,0xc1]
+; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
+; X64-NEXT: addq %rdx, %rax # encoding: [0x48,0x01,0xd0]
; X64-NEXT: vpcmpnltb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3f,0xc1,0x05]
; X64-NEXT: kmovq %k0, %rdx # encoding: [0xc4,0xe1,0xfb,0x93,0xd0]
-; X64-NEXT: addq %rcx, %rdx # encoding: [0x48,0x01,0xca]
+; X64-NEXT: addq %rax, %rdx # encoding: [0x48,0x01,0xc2]
; X64-NEXT: vpcmpgtb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x64,0xc1]
; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
; X64-NEXT: addq %rdx, %rax # encoding: [0x48,0x01,0xd0]
; X64-NEXT: addq %rdi, %rax # encoding: [0x48,0x01,0xf8]
+; X64-NEXT: addq %rcx, %rax # encoding: [0x48,0x01,0xc8]
; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X64-NEXT: retq # encoding: [0xc3]
%res0 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 %mask)
@@ -2078,19 +2078,19 @@ define i64 @test_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1) nounwind {
; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
; X64-NEXT: vpcmpltub %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3e,0xc1,0x01]
; X64-NEXT: kmovq %k0, %rcx # encoding: [0xc4,0xe1,0xfb,0x93,0xc8]
-; X64-NEXT: addq %rax, %rcx # encoding: [0x48,0x01,0xc1]
; X64-NEXT: vpcmpleub %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3e,0xc1,0x02]
-; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
-; X64-NEXT: addq %rcx, %rax # encoding: [0x48,0x01,0xc8]
+; X64-NEXT: kmovq %k0, %rdx # encoding: [0xc4,0xe1,0xfb,0x93,0xd0]
+; X64-NEXT: addq %rcx, %rdx # encoding: [0x48,0x01,0xca]
; X64-NEXT: vpcmpneqb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3f,0xc1,0x04]
; X64-NEXT: kmovq %k0, %rcx # encoding: [0xc4,0xe1,0xfb,0x93,0xc8]
-; X64-NEXT: addq %rax, %rcx # encoding: [0x48,0x01,0xc1]
+; X64-NEXT: addq %rdx, %rcx # encoding: [0x48,0x01,0xd1]
; X64-NEXT: vpcmpnltub %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3e,0xc1,0x05]
-; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
-; X64-NEXT: addq %rcx, %rax # encoding: [0x48,0x01,0xc8]
+; X64-NEXT: kmovq %k0, %rdx # encoding: [0xc4,0xe1,0xfb,0x93,0xd0]
+; X64-NEXT: addq %rcx, %rdx # encoding: [0x48,0x01,0xca]
+; X64-NEXT: addq %rax, %rdx # encoding: [0x48,0x01,0xc2]
; X64-NEXT: vpcmpnleub %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3e,0xc1,0x06]
-; X64-NEXT: kmovq %k0, %rcx # encoding: [0xc4,0xe1,0xfb,0x93,0xc8]
-; X64-NEXT: leaq -1(%rcx,%rax), %rax # encoding: [0x48,0x8d,0x44,0x01,0xff]
+; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
+; X64-NEXT: leaq -1(%rax,%rdx), %rax # encoding: [0x48,0x8d,0x44,0x10,0xff]
; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X64-NEXT: retq # encoding: [0xc3]
%res0 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 -1)
@@ -2160,23 +2160,23 @@ define i64 @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %m
; X64: # %bb.0:
; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
; X64-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x74,0xc1]
-; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
-; X64-NEXT: vpcmpltub %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3e,0xc1,0x01]
; X64-NEXT: kmovq %k0, %rcx # encoding: [0xc4,0xe1,0xfb,0x93,0xc8]
-; X64-NEXT: addq %rax, %rcx # encoding: [0x48,0x01,0xc1]
-; X64-NEXT: vpcmpleub %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3e,0xc1,0x02]
+; X64-NEXT: vpcmpltub %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3e,0xc1,0x01]
; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
-; X64-NEXT: addq %rcx, %rax # encoding: [0x48,0x01,0xc8]
+; X64-NEXT: vpcmpleub %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3e,0xc1,0x02]
+; X64-NEXT: kmovq %k0, %rdx # encoding: [0xc4,0xe1,0xfb,0x93,0xd0]
+; X64-NEXT: addq %rax, %rdx # encoding: [0x48,0x01,0xc2]
; X64-NEXT: vpcmpneqb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3f,0xc1,0x04]
-; X64-NEXT: kmovq %k0, %rcx # encoding: [0xc4,0xe1,0xfb,0x93,0xc8]
-; X64-NEXT: addq %rax, %rcx # encoding: [0x48,0x01,0xc1]
+; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
+; X64-NEXT: addq %rdx, %rax # encoding: [0x48,0x01,0xd0]
; X64-NEXT: vpcmpnltub %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3e,0xc1,0x05]
; X64-NEXT: kmovq %k0, %rdx # encoding: [0xc4,0xe1,0xfb,0x93,0xd0]
-; X64-NEXT: addq %rcx, %rdx # encoding: [0x48,0x01,0xca]
+; X64-NEXT: addq %rax, %rdx # encoding: [0x48,0x01,0xc2]
; X64-NEXT: vpcmpnleub %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3e,0xc1,0x06]
; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
; X64-NEXT: addq %rdx, %rax # encoding: [0x48,0x01,0xd0]
; X64-NEXT: addq %rdi, %rax # encoding: [0x48,0x01,0xf8]
+; X64-NEXT: addq %rcx, %rax # encoding: [0x48,0x01,0xc8]
; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X64-NEXT: retq # encoding: [0xc3]
%res0 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 %mask)
@@ -2206,19 +2206,19 @@ define i32 @test_cmp_w_512(<32 x i16> %a0, <32 x i16> %a1) nounwind {
; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
; X86-NEXT: vpcmpgtw %zmm0, %zmm1, %k0 # encoding: [0x62,0xf1,0x75,0x48,0x65,0xc0]
; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
-; X86-NEXT: addl %eax, %ecx # encoding: [0x01,0xc1]
; X86-NEXT: vpcmplew %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3f,0xc1,0x02]
-; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
-; X86-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8]
+; X86-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
+; X86-NEXT: addl %ecx, %edx # encoding: [0x01,0xca]
; X86-NEXT: vpcmpneqw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3f,0xc1,0x04]
; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
-; X86-NEXT: addl %eax, %ecx # encoding: [0x01,0xc1]
+; X86-NEXT: addl %edx, %ecx # encoding: [0x01,0xd1]
; X86-NEXT: vpcmpnltw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3f,0xc1,0x05]
-; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
-; X86-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8]
+; X86-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
+; X86-NEXT: addl %ecx, %edx # encoding: [0x01,0xca]
+; X86-NEXT: addl %eax, %edx # encoding: [0x01,0xc2]
; X86-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x65,0xc1]
-; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
-; X86-NEXT: leal -1(%ecx,%eax), %eax # encoding: [0x8d,0x44,0x01,0xff]
+; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
+; X86-NEXT: leal -1(%eax,%edx), %eax # encoding: [0x8d,0x44,0x10,0xff]
; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X86-NEXT: retl # encoding: [0xc3]
;
@@ -2228,19 +2228,19 @@ define i32 @test_cmp_w_512(<32 x i16> %a0, <32 x i16> %a1) nounwind {
; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
; X64-NEXT: vpcmpgtw %zmm0, %zmm1, %k0 # encoding: [0x62,0xf1,0x75,0x48,0x65,0xc0]
; X64-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
-; X64-NEXT: addl %eax, %ecx # encoding: [0x01,0xc1]
; X64-NEXT: vpcmplew %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3f,0xc1,0x02]
-; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
-; X64-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8]
+; X64-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
+; X64-NEXT: addl %ecx, %edx # encoding: [0x01,0xca]
; X64-NEXT: vpcmpneqw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3f,0xc1,0x04]
; X64-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
-; X64-NEXT: addl %eax, %ecx # encoding: [0x01,0xc1]
+; X64-NEXT: addl %edx, %ecx # encoding: [0x01,0xd1]
; X64-NEXT: vpcmpnltw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3f,0xc1,0x05]
-; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
-; X64-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8]
+; X64-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
+; X64-NEXT: addl %ecx, %edx # encoding: [0x01,0xca]
+; X64-NEXT: addl %eax, %edx # encoding: [0x01,0xc2]
; X64-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x65,0xc1]
-; X64-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
-; X64-NEXT: leal -1(%rcx,%rax), %eax # encoding: [0x8d,0x44,0x01,0xff]
+; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
+; X64-NEXT: leal -1(%rax,%rdx), %eax # encoding: [0x8d,0x44,0x10,0xff]
; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X64-NEXT: retq # encoding: [0xc3]
%res0 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 -1)
@@ -2268,23 +2268,23 @@ define i32 @test_mask_cmp_w_512(<32 x i16> %a0, <32 x i16> %a1, i32 %mask) nounw
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x08]
; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
; X86-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x75,0xc1]
-; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
-; X86-NEXT: vpcmpgtw %zmm0, %zmm1, %k0 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x65,0xc0]
; X86-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
-; X86-NEXT: addl %eax, %edx # encoding: [0x01,0xc2]
-; X86-NEXT: vpcmplew %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3f,0xc1,0x02]
+; X86-NEXT: vpcmpgtw %zmm0, %zmm1, %k0 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x65,0xc0]
; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
-; X86-NEXT: addl %edx, %eax # encoding: [0x01,0xd0]
+; X86-NEXT: vpcmplew %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3f,0xc1,0x02]
+; X86-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0]
+; X86-NEXT: addl %eax, %esi # encoding: [0x01,0xc6]
; X86-NEXT: vpcmpneqw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3f,0xc1,0x04]
-; X86-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
-; X86-NEXT: addl %eax, %edx # encoding: [0x01,0xc2]
+; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
+; X86-NEXT: addl %esi, %eax # encoding: [0x01,0xf0]
; X86-NEXT: vpcmpnltw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3f,0xc1,0x05]
; X86-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0]
-; X86-NEXT: addl %edx, %esi # encoding: [0x01,0xd6]
+; X86-NEXT: addl %eax, %esi # encoding: [0x01,0xc6]
; X86-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x65,0xc1]
; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
; X86-NEXT: addl %esi, %eax # encoding: [0x01,0xf0]
; X86-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8]
+; X86-NEXT: addl %edx, %eax # encoding: [0x01,0xd0]
; X86-NEXT: popl %esi # encoding: [0x5e]
; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X86-NEXT: retl # encoding: [0xc3]
@@ -2293,23 +2293,23 @@ define i32 @test_mask_cmp_w_512(<32 x i16> %a0, <32 x i16> %a1, i32 %mask) nounw
; X64: # %bb.0:
; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
; X64-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x75,0xc1]
-; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
-; X64-NEXT: vpcmpgtw %zmm0, %zmm1, %k0 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x65,0xc0]
; X64-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
-; X64-NEXT: addl %eax, %ecx # encoding: [0x01,0xc1]
-; X64-NEXT: vpcmplew %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3f,0xc1,0x02]
+; X64-NEXT: vpcmpgtw %zmm0, %zmm1, %k0 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x65,0xc0]
; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
-; X64-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8]
+; X64-NEXT: vpcmplew %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3f,0xc1,0x02]
+; X64-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
+; X64-NEXT: addl %eax, %edx # encoding: [0x01,0xc2]
; X64-NEXT: vpcmpneqw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3f,0xc1,0x04]
-; X64-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
-; X64-NEXT: addl %eax, %ecx # encoding: [0x01,0xc1]
+; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
+; X64-NEXT: addl %edx, %eax # encoding: [0x01,0xd0]
; X64-NEXT: vpcmpnltw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3f,0xc1,0x05]
; X64-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
-; X64-NEXT: addl %ecx, %edx # encoding: [0x01,0xca]
+; X64-NEXT: addl %eax, %edx # encoding: [0x01,0xc2]
; X64-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x65,0xc1]
; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
; X64-NEXT: addl %edx, %eax # encoding: [0x01,0xd0]
; X64-NEXT: addl %edi, %eax # encoding: [0x01,0xf8]
+; X64-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8]
; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X64-NEXT: retq # encoding: [0xc3]
%res0 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 %mask)
@@ -2339,19 +2339,19 @@ define i32 @test_ucmp_w_512(<32 x i16> %a0, <32 x i16> %a1) nounwind {
; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
; X86-NEXT: vpcmpltuw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3e,0xc1,0x01]
; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
-; X86-NEXT: addl %eax, %ecx # encoding: [0x01,0xc1]
; X86-NEXT: vpcmpleuw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3e,0xc1,0x02]
-; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
-; X86-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8]
+; X86-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
+; X86-NEXT: addl %ecx, %edx # encoding: [0x01,0xca]
; X86-NEXT: vpcmpneqw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3f,0xc1,0x04]
; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
-; X86-NEXT: addl %eax, %ecx # encoding: [0x01,0xc1]
+; X86-NEXT: addl %edx, %ecx # encoding: [0x01,0xd1]
; X86-NEXT: vpcmpnltuw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3e,0xc1,0x05]
-; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
-; X86-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8]
+; X86-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
+; X86-NEXT: addl %ecx, %edx # encoding: [0x01,0xca]
+; X86-NEXT: addl %eax, %edx # encoding: [0x01,0xc2]
; X86-NEXT: vpcmpnleuw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3e,0xc1,0x06]
-; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
-; X86-NEXT: leal -1(%ecx,%eax), %eax # encoding: [0x8d,0x44,0x01,0xff]
+; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
+; X86-NEXT: leal -1(%eax,%edx), %eax # encoding: [0x8d,0x44,0x10,0xff]
; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X86-NEXT: retl # encoding: [0xc3]
;
@@ -2361,19 +2361,19 @@ define i32 @test_ucmp_w_512(<32 x i16> %a0, <32 x i16> %a1) nounwind {
; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
; X64-NEXT: vpcmpltuw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3e,0xc1,0x01]
; X64-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
-; X64-NEXT: addl %eax, %ecx # encoding: [0x01,0xc1]
; X64-NEXT: vpcmpleuw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3e,0xc1,0x02]
-; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
-; X64-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8]
+; X64-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
+; X64-NEXT: addl %ecx, %edx # encoding: [0x01,0xca]
; X64-NEXT: vpcmpneqw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3f,0xc1,0x04]
; X64-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
-; X64-NEXT: addl %eax, %ecx # encoding: [0x01,0xc1]
+; X64-NEXT: addl %edx, %ecx # encoding: [0x01,0xd1]
; X64-NEXT: vpcmpnltuw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3e,0xc1,0x05]
-; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
-; X64-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8]
+; X64-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
+; X64-NEXT: addl %ecx, %edx # encoding: [0x01,0xca]
+; X64-NEXT: addl %eax, %edx # encoding: [0x01,0xc2]
; X64-NEXT: vpcmpnleuw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3e,0xc1,0x06]
-; X64-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
-; X64-NEXT: leal -1(%rcx,%rax), %eax # encoding: [0x8d,0x44,0x01,0xff]
+; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
+; X64-NEXT: leal -1(%rax,%rdx), %eax # encoding: [0x8d,0x44,0x10,0xff]
; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X64-NEXT: retq # encoding: [0xc3]
%res0 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 -1)
@@ -2401,23 +2401,23 @@ define i32 @test_mask_ucmp_w_512(<32 x i16> %a0, <32 x i16> %a1, i32 %mask) noun
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x08]
; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
; X86-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x75,0xc1]
-; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
-; X86-NEXT: vpcmpltuw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3e,0xc1,0x01]
; X86-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
-; X86-NEXT: addl %eax, %edx # encoding: [0x01,0xc2]
-; X86-NEXT: vpcmpleuw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3e,0xc1,0x02]
+; X86-NEXT: vpcmpltuw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3e,0xc1,0x01]
; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
-; X86-NEXT: addl %edx, %eax # encoding: [0x01,0xd0]
+; X86-NEXT: vpcmpleuw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3e,0xc1,0x02]
+; X86-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0]
+; X86-NEXT: addl %eax, %esi # encoding: [0x01,0xc6]
; X86-NEXT: vpcmpneqw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3f,0xc1,0x04]
-; X86-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
-; X86-NEXT: addl %eax, %edx # encoding: [0x01,0xc2]
+; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
+; X86-NEXT: addl %esi, %eax # encoding: [0x01,0xf0]
; X86-NEXT: vpcmpnltuw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3e,0xc1,0x05]
; X86-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0]
-; X86-NEXT: addl %edx, %esi # encoding: [0x01,0xd6]
+; X86-NEXT: addl %eax, %esi # encoding: [0x01,0xc6]
; X86-NEXT: vpcmpnleuw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3e,0xc1,0x06]
; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
; X86-NEXT: addl %esi, %eax # encoding: [0x01,0xf0]
; X86-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8]
+; X86-NEXT: addl %edx, %eax # encoding: [0x01,0xd0]
; X86-NEXT: popl %esi # encoding: [0x5e]
; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X86-NEXT: retl # encoding: [0xc3]
@@ -2426,23 +2426,23 @@ define i32 @test_mask_ucmp_w_512(<32 x i16> %a0, <32 x i16> %a1, i32 %mask) noun
; X64: # %bb.0:
; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
; X64-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x75,0xc1]
-; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
-; X64-NEXT: vpcmpltuw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3e,0xc1,0x01]
; X64-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
-; X64-NEXT: addl %eax, %ecx # encoding: [0x01,0xc1]
-; X64-NEXT: vpcmpleuw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3e,0xc1,0x02]
+; X64-NEXT: vpcmpltuw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3e,0xc1,0x01]
; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
-; X64-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8]
+; X64-NEXT: vpcmpleuw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3e,0xc1,0x02]
+; X64-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
+; X64-NEXT: addl %eax, %edx # encoding: [0x01,0xc2]
; X64-NEXT: vpcmpneqw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3f,0xc1,0x04]
-; X64-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
-; X64-NEXT: addl %eax, %ecx # encoding: [0x01,0xc1]
+; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
+; X64-NEXT: addl %edx, %eax # encoding: [0x01,0xd0]
; X64-NEXT: vpcmpnltuw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3e,0xc1,0x05]
; X64-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
-; X64-NEXT: addl %ecx, %edx # encoding: [0x01,0xca]
+; X64-NEXT: addl %eax, %edx # encoding: [0x01,0xc2]
; X64-NEXT: vpcmpnleuw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3e,0xc1,0x06]
; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
; X64-NEXT: addl %edx, %eax # encoding: [0x01,0xd0]
; X64-NEXT: addl %edi, %eax # encoding: [0x01,0xf8]
+; X64-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8]
; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X64-NEXT: retq # encoding: [0xc3]
%res0 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 %mask)
diff --git a/llvm/test/CodeGen/X86/div-rem-pair-recomposition-signed.ll b/llvm/test/CodeGen/X86/div-rem-pair-recomposition-signed.ll
index 587d7929e5848..0ee08a43f2657 100644
--- a/llvm/test/CodeGen/X86/div-rem-pair-recomposition-signed.ll
+++ b/llvm/test/CodeGen/X86/div-rem-pair-recomposition-signed.ll
@@ -124,10 +124,10 @@ define i64 @scalar_i64(i64 %x, i64 %y, ptr %divdst) nounwind {
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: pushl %ebp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %ebp
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: calll __divdi3
@@ -136,10 +136,10 @@ define i64 @scalar_i64(i64 %x, i64 %y, ptr %divdst) nounwind {
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl %ecx, 4(%edx)
; X86-NEXT: movl %eax, (%edx)
-; X86-NEXT: imull %eax, %ebp
-; X86-NEXT: mull %ebx
-; X86-NEXT: addl %ebp, %edx
-; X86-NEXT: imull %ebx, %ecx
+; X86-NEXT: imull %eax, %ebx
+; X86-NEXT: mull %ebp
+; X86-NEXT: imull %ebp, %ecx
+; X86-NEXT: addl %ebx, %ecx
; X86-NEXT: addl %edx, %ecx
; X86-NEXT: subl %eax, %esi
; X86-NEXT: sbbl %ecx, %edi
@@ -192,8 +192,8 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind {
; X64-NEXT: movq %rax, (%rbx)
; X64-NEXT: imulq %rax, %r14
; X64-NEXT: mulq %r15
-; X64-NEXT: addq %r14, %rdx
; X64-NEXT: imulq %r15, %rcx
+; X64-NEXT: addq %r14, %rcx
; X64-NEXT: addq %rdx, %rcx
; X64-NEXT: subq %rax, %r13
; X64-NEXT: sbbq %rcx, %r12
diff --git a/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll b/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll
index 6f67a12f67938..86dd5f32ea6f8 100644
--- a/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll
+++ b/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll
@@ -124,10 +124,10 @@ define i64 @scalar_i64(i64 %x, i64 %y, ptr %divdst) nounwind {
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: pushl %ebp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %ebp
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: calll __udivdi3
@@ -136,10 +136,10 @@ define i64 @scalar_i64(i64 %x, i64 %y, ptr %divdst) nounwind {
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl %ecx, 4(%edx)
; X86-NEXT: movl %eax, (%edx)
-; X86-NEXT: imull %eax, %ebp
-; X86-NEXT: mull %ebx
-; X86-NEXT: addl %ebp, %edx
-; X86-NEXT: imull %ebx, %ecx
+; X86-NEXT: imull %eax, %ebx
+; X86-NEXT: mull %ebp
+; X86-NEXT: imull %ebp, %ecx
+; X86-NEXT: addl %ebx, %ecx
; X86-NEXT: addl %edx, %ecx
; X86-NEXT: subl %eax, %esi
; X86-NEXT: sbbl %ecx, %edi
@@ -192,8 +192,8 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind {
; X64-NEXT: movq %rax, (%rbx)
; X64-NEXT: imulq %rax, %r14
; X64-NEXT: mulq %r15
-; X64-NEXT: addq %r14, %rdx
; X64-NEXT: imulq %r15, %rcx
+; X64-NEXT: addq %r14, %rcx
; X64-NEXT: addq %rdx, %rcx
; X64-NEXT: subq %rax, %r13
; X64-NEXT: sbbq %rcx, %r12
diff --git a/llvm/test/CodeGen/X86/divide-by-constant.ll b/llvm/test/CodeGen/X86/divide-by-constant.ll
index 80944d29560cb..cd081051467c1 100644
--- a/llvm/test/CodeGen/X86/divide-by-constant.ll
+++ b/llvm/test/CodeGen/X86/divide-by-constant.ll
@@ -793,9 +793,9 @@ define i64 @udiv_i64_3(i64 %x) nounwind {
; X32-NEXT: movl %ecx, %eax
; X32-NEXT: mull %ebx
; X32-NEXT: imull $-1431655766, %ecx, %ecx # imm = 0xAAAAAAAA
-; X32-NEXT: addl %ecx, %edx
-; X32-NEXT: imull $-1431655765, %edi, %ecx # imm = 0xAAAAAAAB
-; X32-NEXT: addl %ecx, %edx
+; X32-NEXT: imull $-1431655765, %edi, %esi # imm = 0xAAAAAAAB
+; X32-NEXT: addl %ecx, %esi
+; X32-NEXT: addl %esi, %edx
; X32-NEXT: popl %esi
; X32-NEXT: popl %edi
; X32-NEXT: popl %ebx
@@ -836,9 +836,9 @@ define i64 @udiv_i64_5(i64 %x) nounwind {
; X32-NEXT: movl %ecx, %eax
; X32-NEXT: mull %ebx
; X32-NEXT: imull $-858993460, %ecx, %ecx # imm = 0xCCCCCCCC
-; X32-NEXT: addl %ecx, %edx
-; X32-NEXT: imull $-858993459, %edi, %ecx # imm = 0xCCCCCCCD
-; X32-NEXT: addl %ecx, %edx
+; X32-NEXT: imull $-858993459, %edi, %esi # imm = 0xCCCCCCCD
+; X32-NEXT: addl %ecx, %esi
+; X32-NEXT: addl %esi, %edx
; X32-NEXT: popl %esi
; X32-NEXT: popl %edi
; X32-NEXT: popl %ebx
@@ -880,9 +880,9 @@ define i64 @udiv_i64_15(i64 %x) nounwind {
; X32-NEXT: movl %ecx, %eax
; X32-NEXT: mull %edx
; X32-NEXT: imull $-286331154, %ecx, %ecx # imm = 0xEEEEEEEE
-; X32-NEXT: addl %ecx, %edx
-; X32-NEXT: imull $-286331153, %edi, %ecx # imm = 0xEEEEEEEF
-; X32-NEXT: addl %ecx, %edx
+; X32-NEXT: imull $-286331153, %edi, %esi # imm = 0xEEEEEEEF
+; X32-NEXT: addl %ecx, %esi
+; X32-NEXT: addl %esi, %edx
; X32-NEXT: popl %esi
; X32-NEXT: popl %edi
; X32-NEXT: retl
@@ -924,9 +924,9 @@ define i64 @udiv_i64_17(i64 %x) nounwind {
; X32-NEXT: movl %ecx, %eax
; X32-NEXT: mull %ebx
; X32-NEXT: imull $-252645136, %ecx, %ecx # imm = 0xF0F0F0F0
-; X32-NEXT: addl %ecx, %edx
-; X32-NEXT: imull $-252645135, %edi, %ecx # imm = 0xF0F0F0F1
-; X32-NEXT: addl %ecx, %edx
+; X32-NEXT: imull $-252645135, %edi, %esi # imm = 0xF0F0F0F1
+; X32-NEXT: addl %ecx, %esi
+; X32-NEXT: addl %esi, %edx
; X32-NEXT: popl %esi
; X32-NEXT: popl %edi
; X32-NEXT: popl %ebx
@@ -969,9 +969,9 @@ define i64 @udiv_i64_255(i64 %x) nounwind {
; X32-NEXT: movl %ecx, %eax
; X32-NEXT: mull %edx
; X32-NEXT: imull $-16843010, %ecx, %ecx # imm = 0xFEFEFEFE
-; X32-NEXT: addl %ecx, %edx
-; X32-NEXT: imull $-16843009, %esi, %ecx # imm = 0xFEFEFEFF
-; X32-NEXT: addl %ecx, %edx
+; X32-NEXT: imull $-16843009, %esi, %esi # imm = 0xFEFEFEFF
+; X32-NEXT: addl %ecx, %esi
+; X32-NEXT: addl %esi, %edx
; X32-NEXT: popl %esi
; X32-NEXT: retl
;
@@ -1012,9 +1012,9 @@ define i64 @udiv_i64_257(i64 %x) nounwind {
; X32-NEXT: movl %ecx, %eax
; X32-NEXT: mull %ebx
; X32-NEXT: imull $-16711936, %ecx, %ecx # imm = 0xFF00FF00
-; X32-NEXT: addl %ecx, %edx
-; X32-NEXT: imull $-16711935, %edi, %ecx # imm = 0xFF00FF01
-; X32-NEXT: addl %ecx, %edx
+; X32-NEXT: imull $-16711935, %edi, %esi # imm = 0xFF00FF01
+; X32-NEXT: addl %ecx, %esi
+; X32-NEXT: addl %esi, %edx
; X32-NEXT: popl %esi
; X32-NEXT: popl %edi
; X32-NEXT: popl %ebx
@@ -1148,9 +1148,9 @@ define i64 @udiv_i64_12(i64 %x) nounwind {
; X32-NEXT: movl %ecx, %eax
; X32-NEXT: mull %ebx
; X32-NEXT: imull $-1431655766, %ecx, %ecx # imm = 0xAAAAAAAA
-; X32-NEXT: addl %ecx, %edx
-; X32-NEXT: imull $-1431655765, %edi, %ecx # imm = 0xAAAAAAAB
-; X32-NEXT: addl %ecx, %edx
+; X32-NEXT: imull $-1431655765, %edi, %esi # imm = 0xAAAAAAAB
+; X32-NEXT: addl %ecx, %esi
+; X32-NEXT: addl %esi, %edx
; X32-NEXT: popl %esi
; X32-NEXT: popl %edi
; X32-NEXT: popl %ebx
diff --git a/llvm/test/CodeGen/X86/divmod128.ll b/llvm/test/CodeGen/X86/divmod128.ll
index 2e13776715e5b..c80e8d105b342 100644
--- a/llvm/test/CodeGen/X86/divmod128.ll
+++ b/llvm/test/CodeGen/X86/divmod128.ll
@@ -482,8 +482,8 @@ define i128 @udiv_i128_3(i128 %x) nounwind {
; X86-64-NEXT: imulq %rdi, %rcx
; X86-64-NEXT: movq %rdi, %rax
; X86-64-NEXT: mulq %r8
-; X86-64-NEXT: addq %rcx, %rdx
; X86-64-NEXT: imulq %rsi, %r8
+; X86-64-NEXT: addq %rcx, %r8
; X86-64-NEXT: addq %r8, %rdx
; X86-64-NEXT: retq
;
@@ -505,8 +505,8 @@ define i128 @udiv_i128_3(i128 %x) nounwind {
; WIN64-NEXT: imulq %rcx, %r9
; WIN64-NEXT: movq %rcx, %rax
; WIN64-NEXT: mulq %r10
-; WIN64-NEXT: addq %r9, %rdx
; WIN64-NEXT: imulq %r10, %r8
+; WIN64-NEXT: addq %r9, %r8
; WIN64-NEXT: addq %r8, %rdx
; WIN64-NEXT: retq
entry:
@@ -532,8 +532,8 @@ define i128 @udiv_i128_5(i128 %x) nounwind {
; X86-64-NEXT: imulq %rdi, %rcx
; X86-64-NEXT: movq %rdi, %rax
; X86-64-NEXT: mulq %r8
-; X86-64-NEXT: addq %rcx, %rdx
; X86-64-NEXT: imulq %rsi, %r8
+; X86-64-NEXT: addq %rcx, %r8
; X86-64-NEXT: addq %r8, %rdx
; X86-64-NEXT: retq
;
@@ -555,8 +555,8 @@ define i128 @udiv_i128_5(i128 %x) nounwind {
; WIN64-NEXT: imulq %rcx, %r9
; WIN64-NEXT: movq %rcx, %rax
; WIN64-NEXT: mulq %r10
-; WIN64-NEXT: addq %r9, %rdx
; WIN64-NEXT: imulq %r10, %r8
+; WIN64-NEXT: addq %r9, %r8
; WIN64-NEXT: addq %r8, %rdx
; WIN64-NEXT: retq
entry:
@@ -584,8 +584,8 @@ define i128 @udiv_i128_15(i128 %x) nounwind {
; X86-64-NEXT: movabsq $-1229782938247303441, %r8 # imm = 0xEEEEEEEEEEEEEEEF
; X86-64-NEXT: movq %rdi, %rax
; X86-64-NEXT: mulq %r8
-; X86-64-NEXT: addq %rcx, %rdx
; X86-64-NEXT: imulq %rsi, %r8
+; X86-64-NEXT: addq %rcx, %r8
; X86-64-NEXT: addq %r8, %rdx
; X86-64-NEXT: retq
;
@@ -609,8 +609,8 @@ define i128 @udiv_i128_15(i128 %x) nounwind {
; WIN64-NEXT: movabsq $-1229782938247303441, %r10 # imm = 0xEEEEEEEEEEEEEEEF
; WIN64-NEXT: movq %rcx, %rax
; WIN64-NEXT: mulq %r10
-; WIN64-NEXT: addq %r9, %rdx
; WIN64-NEXT: imulq %r10, %r8
+; WIN64-NEXT: addq %r9, %r8
; WIN64-NEXT: addq %r8, %rdx
; WIN64-NEXT: retq
entry:
@@ -638,8 +638,8 @@ define i128 @udiv_i128_17(i128 %x) nounwind {
; X86-64-NEXT: imulq %rdi, %rcx
; X86-64-NEXT: movq %rdi, %rax
; X86-64-NEXT: mulq %r8
-; X86-64-NEXT: addq %rcx, %rdx
; X86-64-NEXT: imulq %rsi, %r8
+; X86-64-NEXT: addq %rcx, %r8
; X86-64-NEXT: addq %r8, %rdx
; X86-64-NEXT: retq
;
@@ -663,8 +663,8 @@ define i128 @udiv_i128_17(i128 %x) nounwind {
; WIN64-NEXT: imulq %rcx, %r9
; WIN64-NEXT: movq %rcx, %rax
; WIN64-NEXT: mulq %r10
-; WIN64-NEXT: addq %r9, %rdx
; WIN64-NEXT: imulq %r10, %r8
+; WIN64-NEXT: addq %r9, %r8
; WIN64-NEXT: addq %r8, %rdx
; WIN64-NEXT: retq
entry:
@@ -694,8 +694,8 @@ define i128 @udiv_i128_255(i128 %x) nounwind {
; X86-64-NEXT: movabsq $-72340172838076673, %r8 # imm = 0xFEFEFEFEFEFEFEFF
; X86-64-NEXT: movq %rdi, %rax
; X86-64-NEXT: mulq %r8
-; X86-64-NEXT: addq %rcx, %rdx
; X86-64-NEXT: imulq %rsi, %r8
+; X86-64-NEXT: addq %rcx, %r8
; X86-64-NEXT: addq %r8, %rdx
; X86-64-NEXT: retq
;
@@ -721,8 +721,8 @@ define i128 @udiv_i128_255(i128 %x) nounwind {
; WIN64-NEXT: movabsq $-72340172838076673, %r10 # imm = 0xFEFEFEFEFEFEFEFF
; WIN64-NEXT: movq %rcx, %rax
; WIN64-NEXT: mulq %r10
-; WIN64-NEXT: addq %r9, %rdx
; WIN64-NEXT: imulq %r10, %r8
+; WIN64-NEXT: addq %r9, %r8
; WIN64-NEXT: addq %r8, %rdx
; WIN64-NEXT: retq
entry:
@@ -750,8 +750,8 @@ define i128 @udiv_i128_257(i128 %x) nounwind {
; X86-64-NEXT: imulq %rdi, %rcx
; X86-64-NEXT: movq %rdi, %rax
; X86-64-NEXT: mulq %r8
-; X86-64-NEXT: addq %rcx, %rdx
; X86-64-NEXT: imulq %rsi, %r8
+; X86-64-NEXT: addq %rcx, %r8
; X86-64-NEXT: addq %r8, %rdx
; X86-64-NEXT: retq
;
@@ -775,8 +775,8 @@ define i128 @udiv_i128_257(i128 %x) nounwind {
; WIN64-NEXT: imulq %rcx, %r9
; WIN64-NEXT: movq %rcx, %rax
; WIN64-NEXT: mulq %r10
-; WIN64-NEXT: addq %r9, %rdx
; WIN64-NEXT: imulq %r10, %r8
+; WIN64-NEXT: addq %r9, %r8
; WIN64-NEXT: addq %r8, %rdx
; WIN64-NEXT: retq
entry:
@@ -806,8 +806,8 @@ define i128 @udiv_i128_65535(i128 %x) nounwind {
; X86-64-NEXT: movabsq $-281479271743489, %r8 # imm = 0xFFFEFFFEFFFEFFFF
; X86-64-NEXT: movq %rdi, %rax
; X86-64-NEXT: mulq %r8
-; X86-64-NEXT: addq %rcx, %rdx
; X86-64-NEXT: imulq %rsi, %r8
+; X86-64-NEXT: addq %rcx, %r8
; X86-64-NEXT: addq %r8, %rdx
; X86-64-NEXT: retq
;
@@ -833,8 +833,8 @@ define i128 @udiv_i128_65535(i128 %x) nounwind {
; WIN64-NEXT: movabsq $-281479271743489, %r10 # imm = 0xFFFEFFFEFFFEFFFF
; WIN64-NEXT: movq %rcx, %rax
; WIN64-NEXT: mulq %r10
-; WIN64-NEXT: addq %r9, %rdx
; WIN64-NEXT: imulq %r10, %r8
+; WIN64-NEXT: addq %r9, %r8
; WIN64-NEXT: addq %r8, %rdx
; WIN64-NEXT: retq
entry:
@@ -862,8 +862,8 @@ define i128 @udiv_i128_65537(i128 %x) nounwind {
; X86-64-NEXT: imulq %rdi, %rcx
; X86-64-NEXT: movq %rdi, %rax
; X86-64-NEXT: mulq %r8
-; X86-64-NEXT: addq %rcx, %rdx
; X86-64-NEXT: imulq %rsi, %r8
+; X86-64-NEXT: addq %rcx, %r8
; X86-64-NEXT: addq %r8, %rdx
; X86-64-NEXT: retq
;
@@ -887,8 +887,8 @@ define i128 @udiv_i128_65537(i128 %x) nounwind {
; WIN64-NEXT: imulq %rcx, %r9
; WIN64-NEXT: movq %rcx, %rax
; WIN64-NEXT: mulq %r10
-; WIN64-NEXT: addq %r9, %rdx
; WIN64-NEXT: imulq %r10, %r8
+; WIN64-NEXT: addq %r9, %r8
; WIN64-NEXT: addq %r8, %rdx
; WIN64-NEXT: retq
entry:
@@ -916,8 +916,8 @@ define i128 @udiv_i128_12(i128 %x) nounwind {
; X86-64-NEXT: imulq %rdi, %rcx
; X86-64-NEXT: movq %rdi, %rax
; X86-64-NEXT: mulq %r8
-; X86-64-NEXT: addq %rcx, %rdx
; X86-64-NEXT: imulq %rsi, %r8
+; X86-64-NEXT: addq %rcx, %r8
; X86-64-NEXT: addq %r8, %rdx
; X86-64-NEXT: retq
;
@@ -941,8 +941,8 @@ define i128 @udiv_i128_12(i128 %x) nounwind {
; WIN64-NEXT: imulq %rcx, %r9
; WIN64-NEXT: movq %rcx, %rax
; WIN64-NEXT: mulq %r10
-; WIN64-NEXT: addq %r9, %rdx
; WIN64-NEXT: imulq %r10, %r8
+; WIN64-NEXT: addq %r9, %r8
; WIN64-NEXT: addq %r8, %rdx
; WIN64-NEXT: retq
entry:
diff --git a/llvm/test/CodeGen/X86/fold-add.ll b/llvm/test/CodeGen/X86/fold-add.ll
index a4dc03471fc30..26396c3fc66a2 100644
--- a/llvm/test/CodeGen/X86/fold-add.ll
+++ b/llvm/test/CodeGen/X86/fold-add.ll
@@ -97,10 +97,10 @@ define dso_local i64 @neg_0x80000001() #0 {
; MSTATIC-NEXT: movabsq $-2147483649, %rcx
; MSTATIC-NEXT: movabsq $foo, %rax
; MSTATIC-NEXT: addq %rcx, %rax
-; MPIC-NEXT: leaq _GLOBAL_OFFSET_TABLE_(%rip), %rax
-; MPIC-NEXT: movabsq $foo at GOTOFF, %rcx
-; MPIC-NEXT: addq %rax, %rcx
+; MPIC-NEXT: leaq _GLOBAL_OFFSET_TABLE_(%rip), %rcx
+; MPIC-NEXT: movabsq $foo at GOTOFF, %rdx
; MPIC-NEXT: movabsq $-2147483649, %rax
+; MPIC-NEXT: addq %rdx, %rax
; MPIC-NEXT: addq %rcx, %rax
entry:
ret i64 add (i64 ptrtoint (ptr @foo to i64), i64 -2147483649)
diff --git a/llvm/test/CodeGen/X86/fold-tied-op.ll b/llvm/test/CodeGen/X86/fold-tied-op.ll
index a8636a3496dc4..a08133bb7af48 100644
--- a/llvm/test/CodeGen/X86/fold-tied-op.ll
+++ b/llvm/test/CodeGen/X86/fold-tied-op.ll
@@ -24,87 +24,85 @@ define i64 @fn1() #0 {
; CHECK-NEXT: .cfi_offset %esi, -20
; CHECK-NEXT: .cfi_offset %edi, -16
; CHECK-NEXT: .cfi_offset %ebx, -12
-; CHECK-NEXT: movl $-1028477379, %ebx # imm = 0xC2B2AE3D
-; CHECK-NEXT: movl $668265295, %ecx # imm = 0x27D4EB4F
-; CHECK-NEXT: movl a, %edi
-; CHECK-NEXT: cmpl $0, (%edi)
+; CHECK-NEXT: movl $-1028477379, %edi # imm = 0xC2B2AE3D
+; CHECK-NEXT: movl $668265295, %ebx # imm = 0x27D4EB4F
+; CHECK-NEXT: movl a, %eax
+; CHECK-NEXT: cmpl $0, (%eax)
; CHECK-NEXT: je .LBB0_2
; CHECK-NEXT: # %bb.1: # %if.then
-; CHECK-NEXT: movl 8(%edi), %esi
-; CHECK-NEXT: movl 12(%edi), %eax
-; CHECK-NEXT: movl %eax, %edx
-; CHECK-NEXT: shldl $1, %esi, %edx
-; CHECK-NEXT: orl %eax, %edx
-; CHECK-NEXT: leal (%esi,%esi), %eax
-; CHECK-NEXT: orl %esi, %eax
-; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; CHECK-NEXT: movl 16(%edi), %ebx
-; CHECK-NEXT: movl 20(%edi), %esi
-; CHECK-NEXT: movl %esi, %eax
-; CHECK-NEXT: shldl $2, %ebx, %eax
-; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; CHECK-NEXT: movl %ebx, %eax
-; CHECK-NEXT: movl %esi, %ebx
-; CHECK-NEXT: shldl $31, %eax, %ebx
-; CHECK-NEXT: shll $2, %eax
-; CHECK-NEXT: orl %ebx, %eax
+; CHECK-NEXT: movl 8(%eax), %edi
+; CHECK-NEXT: movl 12(%eax), %esi
+; CHECK-NEXT: movl %esi, %edx
+; CHECK-NEXT: shldl $1, %edi, %edx
+; CHECK-NEXT: orl %esi, %edx
+; CHECK-NEXT: leal (%edi,%edi), %ecx
+; CHECK-NEXT: orl %edi, %ecx
+; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT: movl 16(%eax), %ecx
+; CHECK-NEXT: movl 20(%eax), %esi
+; CHECK-NEXT: movl %esi, %edi
+; CHECK-NEXT: shldl $2, %ecx, %edi
+; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT: movl %esi, %edi
+; CHECK-NEXT: shldl $31, %ecx, %edi
+; CHECK-NEXT: shll $2, %ecx
+; CHECK-NEXT: orl %edi, %ecx
; CHECK-NEXT: shrl %esi
; CHECK-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; CHECK-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; CHECK-NEXT: adcl %edx, %esi
-; CHECK-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; CHECK-NEXT: movl 24(%edi), %eax
+; CHECK-NEXT: movl 28(%eax), %ecx
+; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT: movl 24(%eax), %eax
; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; CHECK-NEXT: movl $-1028477379, %ebx # imm = 0xC2B2AE3D
+; CHECK-NEXT: movl $-1028477379, %ecx # imm = 0xC2B2AE3D
+; CHECK-NEXT: imull %eax, %ecx
+; CHECK-NEXT: mull %ebx
+; CHECK-NEXT: movl %eax, %edi
+; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; CHECK-NEXT: imull %eax, %ebx
-; CHECK-NEXT: mull %ecx
-; CHECK-NEXT: movl %eax, %esi
-; CHECK-NEXT: addl %ebx, %edx
-; CHECK-NEXT: movl 28(%edi), %edi
-; CHECK-NEXT: imull %edi, %ecx
-; CHECK-NEXT: addl %edx, %ecx
+; CHECK-NEXT: addl %ecx, %ebx
+; CHECK-NEXT: addl %edx, %ebx
+; CHECK-NEXT: imull $1336530590, %eax, %ecx # imm = 0x4FA9D69E
+; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; CHECK-NEXT: imull $-2056954758, %edx, %eax # imm = 0x85655C7A
+; CHECK-NEXT: addl %eax, %ecx
+; CHECK-NEXT: movl %edx, %eax
; CHECK-NEXT: movl $1336530590, %edx # imm = 0x4FA9D69E
-; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; CHECK-NEXT: movl %ebx, %eax
; CHECK-NEXT: mull %edx
-; CHECK-NEXT: imull $-2056954758, %ebx, %ebx # imm = 0x85655C7A
-; CHECK-NEXT: addl %edx, %ebx
-; CHECK-NEXT: imull $1336530590, %edi, %edx # imm = 0x4FA9D69E
-; CHECK-NEXT: addl %ebx, %edx
-; CHECK-NEXT: shrdl $3, %ecx, %esi
-; CHECK-NEXT: sarl $3, %ecx
-; CHECK-NEXT: orl %edx, %ecx
-; CHECK-NEXT: orl %eax, %esi
-; CHECK-NEXT: movl $-66860409, %ebx # imm = 0xFC03CA87
-; CHECK-NEXT: movl %esi, %eax
-; CHECK-NEXT: mull %ebx
-; CHECK-NEXT: movl %eax, %edi
-; CHECK-NEXT: imull $326129324, %esi, %eax # imm = 0x137056AC
-; CHECK-NEXT: addl %edx, %eax
-; CHECK-NEXT: imull $-66860409, %ecx, %ecx # imm = 0xFC03CA87
+; CHECK-NEXT: addl %edx, %ecx
+; CHECK-NEXT: shrdl $3, %ebx, %edi
+; CHECK-NEXT: sarl $3, %ebx
+; CHECK-NEXT: orl %ecx, %ebx
+; CHECK-NEXT: orl %eax, %edi
+; CHECK-NEXT: imull $326129324, %edi, %eax # imm = 0x137056AC
+; CHECK-NEXT: imull $-66860409, %ebx, %ecx # imm = 0xFC03CA87
; CHECK-NEXT: addl %eax, %ecx
-; CHECK-NEXT: xorl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; CHECK-NEXT: xorl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; CHECK-NEXT: movl %edi, b
+; CHECK-NEXT: movl $-66860409, %ebx # imm = 0xFC03CA87
; CHECK-NEXT: movl %edi, %eax
; CHECK-NEXT: mull %ebx
-; CHECK-NEXT: imull $326129324, %edi, %esi # imm = 0x137056AC
-; CHECK-NEXT: addl %edx, %esi
+; CHECK-NEXT: addl %edx, %ecx
+; CHECK-NEXT: xorl %esi, %ecx
+; CHECK-NEXT: xorl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; CHECK-NEXT: movl %ecx, b+4
+; CHECK-NEXT: imull $326129324, %eax, %edx # imm = 0x137056AC
; CHECK-NEXT: imull $-66860409, %ecx, %ecx # imm = 0xFC03CA87
+; CHECK-NEXT: addl %edx, %ecx
+; CHECK-NEXT: movl %eax, b
+; CHECK-NEXT: mull %ebx
; CHECK-NEXT: jmp .LBB0_3
; CHECK-NEXT: .LBB0_2: # %if.else
-; CHECK-NEXT: xorl b+4, %ebx
-; CHECK-NEXT: xorl b, %ecx
-; CHECK-NEXT: movl $1419758215, %edx # imm = 0x549FCA87
-; CHECK-NEXT: movl %ecx, %eax
-; CHECK-NEXT: mull %edx
-; CHECK-NEXT: imull $93298681, %ecx, %esi # imm = 0x58F9FF9
-; CHECK-NEXT: addl %edx, %esi
-; CHECK-NEXT: imull $1419758215, %ebx, %ecx # imm = 0x549FCA87
-; CHECK-NEXT: .LBB0_3: # %if.end
+; CHECK-NEXT: xorl b+4, %edi
+; CHECK-NEXT: xorl b, %ebx
+; CHECK-NEXT: movl $1419758215, %ecx # imm = 0x549FCA87
+; CHECK-NEXT: movl %ebx, %eax
+; CHECK-NEXT: mull %ecx
+; CHECK-NEXT: imull $93298681, %ebx, %esi # imm = 0x58F9FF9
+; CHECK-NEXT: imull $1419758215, %edi, %ecx # imm = 0x549FCA87
; CHECK-NEXT: addl %esi, %ecx
+; CHECK-NEXT: .LBB0_3: # %if.end
+; CHECK-NEXT: addl %edx, %ecx
; CHECK-NEXT: addl $-1028477341, %eax # imm = 0xC2B2AE63
; CHECK-NEXT: adcl $-2048144777, %ecx # imm = 0x85EBCA77
; CHECK-NEXT: movl %eax, b
diff --git a/llvm/test/CodeGen/X86/h-registers-1.ll b/llvm/test/CodeGen/X86/h-registers-1.ll
index 07d85d260a37a..7ed6ddf267a8e 100644
--- a/llvm/test/CodeGen/X86/h-registers-1.ll
+++ b/llvm/test/CodeGen/X86/h-registers-1.ll
@@ -30,11 +30,11 @@ define i64 @foo(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f, i64 %g, i64 %h)
; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %r8d
; CHECK-NEXT: addq %rdi, %rsi
; CHECK-NEXT: addq %rbp, %rdx
-; CHECK-NEXT: addq %rsi, %rdx
; CHECK-NEXT: addq %rbx, %rcx
; CHECK-NEXT: addq %r8, %rax
; CHECK-NEXT: addq %rcx, %rax
; CHECK-NEXT: addq %rdx, %rax
+; CHECK-NEXT: addq %rsi, %rax
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: popq %rbp
@@ -63,11 +63,11 @@ define i64 @foo(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f, i64 %g, i64 %h)
; GNUX32-NEXT: movzbl {{[0-9]+}}(%esp), %r8d
; GNUX32-NEXT: addq %rdi, %rsi
; GNUX32-NEXT: addq %rbp, %rdx
-; GNUX32-NEXT: addq %rsi, %rdx
; GNUX32-NEXT: addq %rbx, %rcx
; GNUX32-NEXT: addq %r8, %rax
; GNUX32-NEXT: addq %rcx, %rax
; GNUX32-NEXT: addq %rdx, %rax
+; GNUX32-NEXT: addq %rsi, %rax
; GNUX32-NEXT: popq %rbx
; GNUX32-NEXT: .cfi_def_cfa_offset 16
; GNUX32-NEXT: popq %rbp
diff --git a/llvm/test/CodeGen/X86/hipe-cc.ll b/llvm/test/CodeGen/X86/hipe-cc.ll
index 83393c8c128f7..fd27f97425371 100644
--- a/llvm/test/CodeGen/X86/hipe-cc.ll
+++ b/llvm/test/CodeGen/X86/hipe-cc.ll
@@ -21,7 +21,7 @@ entry:
define cc 11 {i32, i32, i32} @addfour(i32 %hp, i32 %p, i32 %x, i32 %y, i32 %z) nounwind {
entry:
- ; CHECK: addl %edx, %eax
+ ; CHECK: addl %edx, %ecx
; CHECK-NEXT: addl %ecx, %eax
%0 = add i32 %x, %y
%1 = add i32 %0, %z
diff --git a/llvm/test/CodeGen/X86/hipe-cc64.ll b/llvm/test/CodeGen/X86/hipe-cc64.ll
index 29741ae76db46..fec3699650124 100644
--- a/llvm/test/CodeGen/X86/hipe-cc64.ll
+++ b/llvm/test/CodeGen/X86/hipe-cc64.ll
@@ -23,9 +23,9 @@ entry:
define cc 11 {i64, i64, i64} @addfour(i64 %hp, i64 %p, i64 %x, i64 %y, i64 %z, i64 %w) nounwind {
entry:
- ; CHECK: leaq (%rsi,%rdx), %rax
- ; CHECK-NEXT: addq %rcx, %rax
+ ; CHECK: leaq (%rdx,%rcx), %rax
; CHECK-NEXT: addq %r8, %rax
+ ; CHECK-NEXT: addq %rsi, %rax
%0 = add i64 %x, %y
%1 = add i64 %0, %z
%2 = add i64 %1, %w
diff --git a/llvm/test/CodeGen/X86/imul.ll b/llvm/test/CodeGen/X86/imul.ll
index 9131688c4efcc..94d786a65ffa8 100644
--- a/llvm/test/CodeGen/X86/imul.ll
+++ b/llvm/test/CodeGen/X86/imul.ll
@@ -450,13 +450,18 @@ define i64 @test6(i64 %a) {
;
; X86-LABEL: test6:
; X86: # %bb.0: # %entry
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: shll $5, %ecx
-; X86-NEXT: addl %eax, %ecx
+; X86-NEXT: pushl %esi
+; X86-NEXT: .cfi_def_cfa_offset 8
+; X86-NEXT: .cfi_offset %esi, -8
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, %esi
+; X86-NEXT: shll $5, %esi
; X86-NEXT: movl $33, %eax
; X86-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NEXT: addl %esi, %edx
; X86-NEXT: addl %ecx, %edx
+; X86-NEXT: popl %esi
+; X86-NEXT: .cfi_def_cfa_offset 4
; X86-NEXT: retl
entry:
%tmp3 = mul i64 %a, 33
diff --git a/llvm/test/CodeGen/X86/lea-opt-cse4.ll b/llvm/test/CodeGen/X86/lea-opt-cse4.ll
index 8fe4516fe959e..d63fdcbca79ed 100644
--- a/llvm/test/CodeGen/X86/lea-opt-cse4.ll
+++ b/llvm/test/CodeGen/X86/lea-opt-cse4.ll
@@ -71,13 +71,13 @@ define void @foo_loop(ptr nocapture %ctx, i32 %n) local_unnamed_addr #0 {
; X64-NEXT: # %bb.2: # %exit
; X64-NEXT: addl %eax, %ecx
; X64-NEXT: leal 1(%rax,%rcx), %ecx
-; X64-NEXT: addl %eax, %ecx
-; X64-NEXT: addl %eax, %ecx
-; X64-NEXT: addl %eax, %ecx
-; X64-NEXT: addl %eax, %ecx
-; X64-NEXT: addl %eax, %ecx
-; X64-NEXT: addl %eax, %ecx
-; X64-NEXT: movl %ecx, 16(%rdi)
+; X64-NEXT: leal (%rax,%rax), %edx
+; X64-NEXT: addl %eax, %edx
+; X64-NEXT: addl %eax, %edx
+; X64-NEXT: addl %eax, %edx
+; X64-NEXT: addl %eax, %edx
+; X64-NEXT: addl %ecx, %edx
+; X64-NEXT: movl %edx, 16(%rdi)
; X64-NEXT: retq
;
; X86-LABEL: foo_loop:
@@ -102,13 +102,13 @@ define void @foo_loop(ptr nocapture %ctx, i32 %n) local_unnamed_addr #0 {
; X86-NEXT: # %bb.2: # %exit
; X86-NEXT: addl %ecx, %esi
; X86-NEXT: leal 1(%ecx,%esi), %edx
-; X86-NEXT: addl %ecx, %edx
-; X86-NEXT: addl %ecx, %edx
-; X86-NEXT: addl %ecx, %edx
-; X86-NEXT: addl %ecx, %edx
-; X86-NEXT: addl %ecx, %edx
-; X86-NEXT: addl %ecx, %edx
-; X86-NEXT: movl %edx, 16(%eax)
+; X86-NEXT: leal (%ecx,%ecx), %esi
+; X86-NEXT: addl %ecx, %esi
+; X86-NEXT: addl %ecx, %esi
+; X86-NEXT: addl %ecx, %esi
+; X86-NEXT: addl %ecx, %esi
+; X86-NEXT: addl %edx, %esi
+; X86-NEXT: movl %esi, 16(%eax)
; X86-NEXT: popl %esi
; X86-NEXT: .cfi_def_cfa_offset 8
; X86-NEXT: popl %edi
diff --git a/llvm/test/CodeGen/X86/lea-opt2.ll b/llvm/test/CodeGen/X86/lea-opt2.ll
index cec19dcf49c8d..b076188c4e918 100644
--- a/llvm/test/CodeGen/X86/lea-opt2.ll
+++ b/llvm/test/CodeGen/X86/lea-opt2.ll
@@ -35,13 +35,11 @@ entry:
define i32 @test2(ptr %p, i32 %a, i32 %b, i32 %c) {
; CHECK-LABEL: test2:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: # kill: def $edx killed $edx def $rdx
; CHECK-NEXT: movl %esi, %eax
-; CHECK-NEXT: addl %eax, %ecx
; CHECK-NEXT: addl %edx, %ecx
+; CHECK-NEXT: addl %esi, %ecx
; CHECK-NEXT: movl %ecx, (%rdi)
; CHECK-NEXT: subl %edx, %eax
-; CHECK-NEXT: # kill: def $eax killed $eax killed $rax
; CHECK-NEXT: retq
entry:
%0 = add i32 %a, %b
@@ -55,13 +53,11 @@ entry:
define i32 @test3(ptr %p, i32 %a, i32 %b, i32 %c) {
; CHECK-LABEL: test3:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: # kill: def $edx killed $edx def $rdx
; CHECK-NEXT: movl %esi, %eax
-; CHECK-NEXT: addl %eax, %ecx
; CHECK-NEXT: addl %edx, %ecx
+; CHECK-NEXT: addl %esi, %ecx
; CHECK-NEXT: movl %ecx, (%rdi)
; CHECK-NEXT: subl %edx, %eax
-; CHECK-NEXT: # kill: def $eax killed $eax killed $rax
; CHECK-NEXT: retq
entry:
%0 = add i32 %a, %b
@@ -114,8 +110,8 @@ define i64 @test6(ptr %p, i64 %a, i64 %b, i64 %c) {
; CHECK-LABEL: test6:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movq (%rdi), %rax
-; CHECK-NEXT: addq %rdx, %rcx
; CHECK-NEXT: addq %rax, %rcx
+; CHECK-NEXT: addq %rdx, %rcx
; CHECK-NEXT: movq %rcx, (%rdi)
; CHECK-NEXT: subq %rdx, %rax
; CHECK-NEXT: retq
@@ -132,8 +128,8 @@ define i64 @test7(ptr %p, i64 %a, i64 %b, i64 %c) {
; CHECK-LABEL: test7:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movq (%rdi), %rax
-; CHECK-NEXT: addq %rdx, %rcx
; CHECK-NEXT: addq %rax, %rcx
+; CHECK-NEXT: addq %rdx, %rcx
; CHECK-NEXT: movq %rcx, (%rdi)
; CHECK-NEXT: subq %rdx, %rax
; CHECK-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/lrshrink.ll b/llvm/test/CodeGen/X86/lrshrink.ll
index 51f675d245190..b72a5a413c573 100644
--- a/llvm/test/CodeGen/X86/lrshrink.ll
+++ b/llvm/test/CodeGen/X86/lrshrink.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=haswell | FileCheck %s
; Checks if "%7 = add nuw nsw i64 %4, %2" is moved before the last call
; to minimize live-range.
@@ -33,10 +33,10 @@ define i64 @test(i1 %a, i64 %r1, i64 %r2, i64 %s1, i64 %s2, i64 %t1, i64 %t2) {
; CHECK-NEXT: addq %r14, %r15
; CHECK-NEXT: callq _Z3foov at PLT
; CHECK-NEXT: movl %eax, %r14d
-; CHECK-NEXT: addq %r15, %r14
; CHECK-NEXT: callq _Z3foov at PLT
; CHECK-NEXT: movl %eax, %eax
; CHECK-NEXT: addq %r14, %rax
+; CHECK-NEXT: addq %r15, %rax
; CHECK-NEXT: addq %rbx, %rax
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: .cfi_def_cfa_offset 24
diff --git a/llvm/test/CodeGen/X86/midpoint-int.ll b/llvm/test/CodeGen/X86/midpoint-int.ll
index 601166d67f6f2..11c216e67bc82 100644
--- a/llvm/test/CodeGen/X86/midpoint-int.ll
+++ b/llvm/test/CodeGen/X86/midpoint-int.ll
@@ -304,40 +304,41 @@ define i64 @scalar_i64_signed_reg_reg(i64 %a1, i64 %a2) nounwind {
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl %edi, %edx
-; X86-NEXT: sbbl %ebp, %edx
-; X86-NEXT: setl %dl
-; X86-NEXT: movzbl %dl, %ebx
+; X86-NEXT: cmpl %ebp, %eax
+; X86-NEXT: movl %edi, %ecx
+; X86-NEXT: sbbl %esi, %ecx
+; X86-NEXT: setl %cl
+; X86-NEXT: movzbl %cl, %edx
; X86-NEXT: jl .LBB5_1
; X86-NEXT: # %bb.2:
+; X86-NEXT: movl %esi, %ebx
+; X86-NEXT: movl %ebp, %esi
; X86-NEXT: movl %ebp, %ecx
-; X86-NEXT: movl %ebp, %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: jmp .LBB5_3
; X86-NEXT: .LBB5_1:
-; X86-NEXT: movl %edi, %edx
-; X86-NEXT: movl %eax, %esi
-; X86-NEXT: movl %ebp, %ecx
-; X86-NEXT: movl %ebp, %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %edi, %ebx
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: movl %esi, %edi
+; X86-NEXT: movl %ebp, %esi
+; X86-NEXT: movl %ebp, %eax
; X86-NEXT: .LBB5_3:
-; X86-NEXT: negl %ebx
-; X86-NEXT: movl %ebx, %ebp
+; X86-NEXT: negl %edx
+; X86-NEXT: movl %edx, %ebp
; X86-NEXT: orl $1, %ebp
-; X86-NEXT: subl %esi, %eax
-; X86-NEXT: sbbl %edx, %edi
+; X86-NEXT: subl %ecx, %eax
+; X86-NEXT: sbbl %ebx, %edi
; X86-NEXT: shrdl $1, %edi, %eax
-; X86-NEXT: imull %eax, %ebx
-; X86-NEXT: mull %ebp
-; X86-NEXT: addl %ebx, %edx
; X86-NEXT: shrl %edi
+; X86-NEXT: imull %eax, %edx
; X86-NEXT: imull %ebp, %edi
+; X86-NEXT: addl %edx, %edi
+; X86-NEXT: mull %ebp
; X86-NEXT: addl %edi, %edx
-; X86-NEXT: addl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: adcl %ecx, %edx
+; X86-NEXT: addl %esi, %eax
+; X86-NEXT: adcl {{[0-9]+}}(%esp), %edx
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
@@ -376,42 +377,43 @@ define i64 @scalar_i64_unsigned_reg_reg(i64 %a1, i64 %a2) nounwind {
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: xorl %ebx, %ebx
-; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl %edi, %edx
-; X86-NEXT: sbbl %ebp, %edx
-; X86-NEXT: setb %dl
-; X86-NEXT: sbbl %ebx, %ebx
-; X86-NEXT: testb %dl, %dl
+; X86-NEXT: xorl %edx, %edx
+; X86-NEXT: cmpl %ebx, %eax
+; X86-NEXT: movl %edi, %ecx
+; X86-NEXT: sbbl %esi, %ecx
+; X86-NEXT: setb %cl
+; X86-NEXT: sbbl %edx, %edx
+; X86-NEXT: testb %cl, %cl
; X86-NEXT: jne .LBB6_1
; X86-NEXT: # %bb.2:
-; X86-NEXT: movl %ebp, %ecx
-; X86-NEXT: movl %ebp, %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl %esi, %ebp
+; X86-NEXT: movl %ebx, %esi
+; X86-NEXT: movl %ebx, %ecx
; X86-NEXT: jmp .LBB6_3
; X86-NEXT: .LBB6_1:
-; X86-NEXT: movl %edi, %edx
-; X86-NEXT: movl %eax, %esi
-; X86-NEXT: movl %ebp, %ecx
-; X86-NEXT: movl %ebp, %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %edi, %ebp
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: movl %esi, %edi
+; X86-NEXT: movl %ebx, %esi
+; X86-NEXT: movl %ebx, %eax
; X86-NEXT: .LBB6_3:
-; X86-NEXT: movl %ebx, %ebp
-; X86-NEXT: orl $1, %ebp
-; X86-NEXT: subl %esi, %eax
-; X86-NEXT: sbbl %edx, %edi
+; X86-NEXT: movl %edx, %ebx
+; X86-NEXT: orl $1, %ebx
+; X86-NEXT: subl %ecx, %eax
+; X86-NEXT: sbbl %ebp, %edi
; X86-NEXT: shrdl $1, %edi, %eax
-; X86-NEXT: imull %eax, %ebx
-; X86-NEXT: mull %ebp
-; X86-NEXT: addl %ebx, %edx
; X86-NEXT: shrl %edi
-; X86-NEXT: imull %ebp, %edi
+; X86-NEXT: imull %eax, %edx
+; X86-NEXT: imull %ebx, %edi
+; X86-NEXT: addl %edx, %edi
+; X86-NEXT: mull %ebx
; X86-NEXT: addl %edi, %edx
-; X86-NEXT: addl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: adcl %ecx, %edx
+; X86-NEXT: addl %esi, %eax
+; X86-NEXT: adcl {{[0-9]+}}(%esp), %edx
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
@@ -456,39 +458,39 @@ define i64 @scalar_i64_signed_mem_reg(ptr %a1_addr, i64 %a2) nounwind {
; X86-NEXT: pushl %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl (%ecx), %esi
-; X86-NEXT: movl 4(%ecx), %ecx
-; X86-NEXT: cmpl %esi, %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl (%edx), %ecx
+; X86-NEXT: movl 4(%edx), %esi
+; X86-NEXT: cmpl %ecx, %eax
; X86-NEXT: movl %edi, %edx
-; X86-NEXT: sbbl %ecx, %edx
+; X86-NEXT: sbbl %esi, %edx
; X86-NEXT: setl %dl
-; X86-NEXT: movzbl %dl, %ebx
+; X86-NEXT: movzbl %dl, %edx
; X86-NEXT: jl .LBB7_1
; X86-NEXT: # %bb.2:
-; X86-NEXT: movl %ecx, (%esp) # 4-byte Spill
-; X86-NEXT: movl %esi, %edx
+; X86-NEXT: movl %esi, (%esp) # 4-byte Spill
+; X86-NEXT: movl %ecx, %ebx
; X86-NEXT: jmp .LBB7_3
; X86-NEXT: .LBB7_1:
; X86-NEXT: movl %edi, (%esp) # 4-byte Spill
-; X86-NEXT: movl %eax, %edx
-; X86-NEXT: movl %ecx, %edi
-; X86-NEXT: movl %esi, %eax
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: movl %esi, %edi
+; X86-NEXT: movl %ecx, %eax
; X86-NEXT: .LBB7_3:
-; X86-NEXT: negl %ebx
-; X86-NEXT: movl %ebx, %ebp
+; X86-NEXT: negl %edx
+; X86-NEXT: movl %edx, %ebp
; X86-NEXT: orl $1, %ebp
-; X86-NEXT: subl %edx, %eax
+; X86-NEXT: subl %ebx, %eax
; X86-NEXT: sbbl (%esp), %edi # 4-byte Folded Reload
; X86-NEXT: shrdl $1, %edi, %eax
-; X86-NEXT: imull %eax, %ebx
-; X86-NEXT: mull %ebp
-; X86-NEXT: addl %ebx, %edx
; X86-NEXT: shrl %edi
+; X86-NEXT: imull %eax, %edx
; X86-NEXT: imull %ebp, %edi
+; X86-NEXT: addl %edx, %edi
+; X86-NEXT: mull %ebp
; X86-NEXT: addl %edi, %edx
-; X86-NEXT: addl %esi, %eax
-; X86-NEXT: adcl %ecx, %edx
+; X86-NEXT: addl %ecx, %eax
+; X86-NEXT: adcl %esi, %edx
; X86-NEXT: addl $4, %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
@@ -531,41 +533,42 @@ define i64 @scalar_i64_signed_reg_mem(i64 %a1, ptr %a2_addr) nounwind {
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl (%edx), %eax
-; X86-NEXT: movl 4(%edx), %edi
-; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl %edi, %edx
-; X86-NEXT: sbbl %ebp, %edx
-; X86-NEXT: setl %dl
-; X86-NEXT: movzbl %dl, %ebx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl (%ecx), %eax
+; X86-NEXT: movl 4(%ecx), %edi
+; X86-NEXT: cmpl %ebp, %eax
+; X86-NEXT: movl %edi, %ecx
+; X86-NEXT: sbbl %esi, %ecx
+; X86-NEXT: setl %cl
+; X86-NEXT: movzbl %cl, %edx
; X86-NEXT: jl .LBB8_1
; X86-NEXT: # %bb.2:
+; X86-NEXT: movl %esi, %ebx
+; X86-NEXT: movl %ebp, %esi
; X86-NEXT: movl %ebp, %ecx
-; X86-NEXT: movl %ebp, %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: jmp .LBB8_3
; X86-NEXT: .LBB8_1:
-; X86-NEXT: movl %edi, %edx
-; X86-NEXT: movl %eax, %esi
-; X86-NEXT: movl %ebp, %ecx
-; X86-NEXT: movl %ebp, %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %edi, %ebx
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: movl %esi, %edi
+; X86-NEXT: movl %ebp, %esi
+; X86-NEXT: movl %ebp, %eax
; X86-NEXT: .LBB8_3:
-; X86-NEXT: negl %ebx
-; X86-NEXT: movl %ebx, %ebp
+; X86-NEXT: negl %edx
+; X86-NEXT: movl %edx, %ebp
; X86-NEXT: orl $1, %ebp
-; X86-NEXT: subl %esi, %eax
-; X86-NEXT: sbbl %edx, %edi
+; X86-NEXT: subl %ecx, %eax
+; X86-NEXT: sbbl %ebx, %edi
; X86-NEXT: shrdl $1, %edi, %eax
-; X86-NEXT: imull %eax, %ebx
-; X86-NEXT: mull %ebp
-; X86-NEXT: addl %ebx, %edx
; X86-NEXT: shrl %edi
+; X86-NEXT: imull %eax, %edx
; X86-NEXT: imull %ebp, %edi
+; X86-NEXT: addl %edx, %edi
+; X86-NEXT: mull %ebp
; X86-NEXT: addl %edi, %edx
-; X86-NEXT: addl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: adcl %ecx, %edx
+; X86-NEXT: addl %esi, %eax
+; X86-NEXT: adcl {{[0-9]+}}(%esp), %edx
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
@@ -610,40 +613,40 @@ define i64 @scalar_i64_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind {
; X86-NEXT: pushl %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl (%eax), %esi
-; X86-NEXT: movl 4(%eax), %ecx
+; X86-NEXT: movl (%eax), %ecx
+; X86-NEXT: movl 4(%eax), %esi
; X86-NEXT: movl (%edx), %eax
; X86-NEXT: movl 4(%edx), %edi
-; X86-NEXT: cmpl %esi, %eax
+; X86-NEXT: cmpl %ecx, %eax
; X86-NEXT: movl %edi, %edx
-; X86-NEXT: sbbl %ecx, %edx
+; X86-NEXT: sbbl %esi, %edx
; X86-NEXT: setl %dl
-; X86-NEXT: movzbl %dl, %ebx
+; X86-NEXT: movzbl %dl, %edx
; X86-NEXT: jl .LBB9_1
; X86-NEXT: # %bb.2:
-; X86-NEXT: movl %ecx, (%esp) # 4-byte Spill
-; X86-NEXT: movl %esi, %edx
+; X86-NEXT: movl %esi, (%esp) # 4-byte Spill
+; X86-NEXT: movl %ecx, %ebx
; X86-NEXT: jmp .LBB9_3
; X86-NEXT: .LBB9_1:
; X86-NEXT: movl %edi, (%esp) # 4-byte Spill
-; X86-NEXT: movl %eax, %edx
-; X86-NEXT: movl %ecx, %edi
-; X86-NEXT: movl %esi, %eax
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: movl %esi, %edi
+; X86-NEXT: movl %ecx, %eax
; X86-NEXT: .LBB9_3:
-; X86-NEXT: negl %ebx
-; X86-NEXT: movl %ebx, %ebp
+; X86-NEXT: negl %edx
+; X86-NEXT: movl %edx, %ebp
; X86-NEXT: orl $1, %ebp
-; X86-NEXT: subl %edx, %eax
+; X86-NEXT: subl %ebx, %eax
; X86-NEXT: sbbl (%esp), %edi # 4-byte Folded Reload
; X86-NEXT: shrdl $1, %edi, %eax
-; X86-NEXT: imull %eax, %ebx
-; X86-NEXT: mull %ebp
-; X86-NEXT: addl %ebx, %edx
; X86-NEXT: shrl %edi
+; X86-NEXT: imull %eax, %edx
; X86-NEXT: imull %ebp, %edi
+; X86-NEXT: addl %edx, %edi
+; X86-NEXT: mull %ebp
; X86-NEXT: addl %edi, %edx
-; X86-NEXT: addl %esi, %eax
-; X86-NEXT: adcl %ecx, %edx
+; X86-NEXT: addl %ecx, %eax
+; X86-NEXT: adcl %esi, %edx
; X86-NEXT: addl $4, %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
diff --git a/llvm/test/CodeGen/X86/misched-balance.ll b/llvm/test/CodeGen/X86/misched-balance.ll
index aef363f11bbe2..02d1425360159 100644
--- a/llvm/test/CodeGen/X86/misched-balance.ll
+++ b/llvm/test/CodeGen/X86/misched-balance.ll
@@ -9,8 +9,8 @@ define void @unrolled_mmult1(ptr %tmp55, ptr %tmp56, ptr %pre, ptr %pre94,
entry:
br label %for.body
-; imull folded loads should be in order and interleaved with addl, never
-; adjacent. Also check that we have no spilling.
+; imull folded loads should be in order, addl may be reordered to reduce total
+; latency. Also check that we have no spilling.
;
; Since mmult1 IR is already in good order, this effectively ensure
; the scheduler maintains source order.
@@ -22,28 +22,28 @@ entry:
; CHECK: addl
; CHECK: imull 8
; CHECK-NOT: {{imull|rsp}}
-; CHECK: addl
; CHECK: imull 12
; CHECK-NOT: {{imull|rsp}}
; CHECK: addl
+; CHECK: addl
; CHECK: imull 16
; CHECK-NOT: {{imull|rsp}}
-; CHECK: addl
; CHECK: imull 20
; CHECK-NOT: {{imull|rsp}}
; CHECK: addl
; CHECK: imull 24
; CHECK-NOT: {{imull|rsp}}
; CHECK: addl
+; CHECK: addl
; CHECK: imull 28
; CHECK-NOT: {{imull|rsp}}
-; CHECK: addl
; CHECK: imull 32
; CHECK-NOT: {{imull|rsp}}
; CHECK: addl
; CHECK: imull 36
; CHECK-NOT: {{imull|rsp}}
; CHECK: addl
+; CHECK: addl
; CHECK-NOT: {{imull|rsp}}
; CHECK-LABEL: %end
for.body:
@@ -127,28 +127,28 @@ end:
; CHECK: addl
; CHECK: imull 8
; CHECK-NOT: {{imull|rsp}}
-; CHECK: addl
; CHECK: imull 12
; CHECK-NOT: {{imull|rsp}}
; CHECK: addl
+; CHECK: addl
; CHECK: imull 16
; CHECK-NOT: {{imull|rsp}}
-; CHECK: addl
; CHECK: imull 20
; CHECK-NOT: {{imull|rsp}}
; CHECK: addl
; CHECK: imull 24
; CHECK-NOT: {{imull|rsp}}
; CHECK: addl
+; CHECK: addl
; CHECK: imull 28
; CHECK-NOT: {{imull|rsp}}
-; CHECK: addl
; CHECK: imull 32
; CHECK-NOT: {{imull|rsp}}
; CHECK: addl
; CHECK: imull 36
; CHECK-NOT: {{imull|rsp}}
; CHECK: addl
+; CHECK: addl
; CHECK-NOT: {{imull|rsp}}
; CHECK-LABEL: %end
define void @unrolled_mmult2(ptr %tmp55, ptr %tmp56, ptr %pre, ptr %pre94,
diff --git a/llvm/test/CodeGen/X86/misched-matrix.ll b/llvm/test/CodeGen/X86/misched-matrix.ll
index fa56d6bf226d1..178bea92867ee 100644
--- a/llvm/test/CodeGen/X86/misched-matrix.ll
+++ b/llvm/test/CodeGen/X86/misched-matrix.ll
@@ -33,25 +33,25 @@
; ILPMIN: imull
; ILPMIN: addl
; ILPMIN: imull
-; ILPMIN: addl
; ILPMIN: imull
; ILPMIN: addl
+; ILPMIN: addl
; ILPMIN: movl %{{.*}}, 4(
; ILPMIN: imull
; ILPMIN: imull
; ILPMIN: addl
; ILPMIN: imull
-; ILPMIN: addl
; ILPMIN: imull
; ILPMIN: addl
+; ILPMIN: addl
; ILPMIN: movl %{{.*}}, 8(
; ILPMIN: imull
; ILPMIN: imull
; ILPMIN: addl
; ILPMIN: imull
-; ILPMIN: addl
; ILPMIN: imull
; ILPMIN: addl
+; ILPMIN: addl
; ILPMIN: movl %{{.*}}, 12(
; ILPMIN-LABEL: %for.end
;
diff --git a/llvm/test/CodeGen/X86/mul-constant-i16.ll b/llvm/test/CodeGen/X86/mul-constant-i16.ll
index 267ebca88e31e..f04b89f54e3a3 100644
--- a/llvm/test/CodeGen/X86/mul-constant-i16.ll
+++ b/llvm/test/CodeGen/X86/mul-constant-i16.ll
@@ -558,10 +558,10 @@ define i16 @test_mul_by_28(i16 %x) {
define i16 @test_mul_by_29(i16 %x) {
; X86-LABEL: test_mul_by_29:
; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: leal (%ecx,%ecx,8), %eax
-; X86-NEXT: leal (%eax,%eax,2), %eax
-; X86-NEXT: addl %ecx, %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: leal (%eax,%eax,8), %ecx
+; X86-NEXT: leal (%ecx,%ecx,2), %ecx
+; X86-NEXT: addl %eax, %eax
; X86-NEXT: addl %ecx, %eax
; X86-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NEXT: retl
diff --git a/llvm/test/CodeGen/X86/mul-constant-i32.ll b/llvm/test/CodeGen/X86/mul-constant-i32.ll
index b6791a84ffa45..44c140857f4c2 100644
--- a/llvm/test/CodeGen/X86/mul-constant-i32.ll
+++ b/llvm/test/CodeGen/X86/mul-constant-i32.ll
@@ -900,10 +900,10 @@ define i32 @test_mul_by_28(i32 %x) {
define i32 @test_mul_by_29(i32 %x) {
; X86-LABEL: test_mul_by_29:
; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: leal (%ecx,%ecx,8), %eax
-; X86-NEXT: leal (%eax,%eax,2), %eax
-; X86-NEXT: addl %ecx, %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: leal (%eax,%eax,8), %ecx
+; X86-NEXT: leal (%ecx,%ecx,2), %ecx
+; X86-NEXT: addl %eax, %eax
; X86-NEXT: addl %ecx, %eax
; X86-NEXT: retl
;
@@ -1179,6 +1179,7 @@ define i32 @test_mul_by_66(i32 %x) {
;
; X64-SLM-LABEL: test_mul_by_66:
; X64-SLM: # %bb.0:
+; X64-SLM-NEXT: # kill: def $edi killed $edi def $rdi
; X64-SLM-NEXT: movl %edi, %eax
; X64-SLM-NEXT: shll $6, %eax
; X64-SLM-NEXT: addl %edi, %eax
diff --git a/llvm/test/CodeGen/X86/mul-constant-i64.ll b/llvm/test/CodeGen/X86/mul-constant-i64.ll
index 94e09011fda6c..c65858586f0db 100644
--- a/llvm/test/CodeGen/X86/mul-constant-i64.ll
+++ b/llvm/test/CodeGen/X86/mul-constant-i64.ll
@@ -498,13 +498,18 @@ define i64 @test_mul_by_16(i64 %x) {
define i64 @test_mul_by_17(i64 %x) {
; X86-LABEL: test_mul_by_17:
; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: shll $4, %ecx
-; X86-NEXT: addl %eax, %ecx
+; X86-NEXT: pushl %esi
+; X86-NEXT: .cfi_def_cfa_offset 8
+; X86-NEXT: .cfi_offset %esi, -8
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, %esi
+; X86-NEXT: shll $4, %esi
; X86-NEXT: movl $17, %eax
; X86-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NEXT: addl %esi, %edx
; X86-NEXT: addl %ecx, %edx
+; X86-NEXT: popl %esi
+; X86-NEXT: .cfi_def_cfa_offset 4
; X86-NEXT: retl
;
; X86-NOOPT-LABEL: test_mul_by_17:
@@ -685,13 +690,18 @@ define i64 @test_mul_by_21(i64 %x) {
define i64 @test_mul_by_22(i64 %x) {
; X86-LABEL: test_mul_by_22:
; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: leal (%eax,%eax,4), %ecx
-; X86-NEXT: leal (%eax,%ecx,4), %ecx
-; X86-NEXT: addl %eax, %ecx
+; X86-NEXT: pushl %esi
+; X86-NEXT: .cfi_def_cfa_offset 8
+; X86-NEXT: .cfi_offset %esi, -8
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: leal (%ecx,%ecx,4), %eax
+; X86-NEXT: leal (%ecx,%eax,4), %esi
; X86-NEXT: movl $22, %eax
; X86-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NEXT: addl %esi, %edx
; X86-NEXT: addl %ecx, %edx
+; X86-NEXT: popl %esi
+; X86-NEXT: .cfi_def_cfa_offset 4
; X86-NEXT: retl
;
; X86-NOOPT-LABEL: test_mul_by_22:
@@ -844,13 +854,18 @@ define i64 @test_mul_by_25(i64 %x) {
define i64 @test_mul_by_26(i64 %x) {
; X86-LABEL: test_mul_by_26:
; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: leal (%eax,%eax,4), %ecx
-; X86-NEXT: leal (%ecx,%ecx,4), %ecx
-; X86-NEXT: addl %eax, %ecx
+; X86-NEXT: pushl %esi
+; X86-NEXT: .cfi_def_cfa_offset 8
+; X86-NEXT: .cfi_offset %esi, -8
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: leal (%ecx,%ecx,4), %eax
+; X86-NEXT: leal (%eax,%eax,4), %esi
; X86-NEXT: movl $26, %eax
; X86-NEXT: mull {{[0-9]+}}(%esp)
; X86-NEXT: addl %ecx, %edx
+; X86-NEXT: addl %esi, %edx
+; X86-NEXT: popl %esi
+; X86-NEXT: .cfi_def_cfa_offset 4
; X86-NEXT: retl
;
; X86-NOOPT-LABEL: test_mul_by_26:
@@ -924,13 +939,18 @@ define i64 @test_mul_by_27(i64 %x) {
define i64 @test_mul_by_28(i64 %x) {
; X86-LABEL: test_mul_by_28:
; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: leal (%eax,%eax,8), %ecx
-; X86-NEXT: leal (%ecx,%ecx,2), %ecx
-; X86-NEXT: addl %eax, %ecx
+; X86-NEXT: pushl %esi
+; X86-NEXT: .cfi_def_cfa_offset 8
+; X86-NEXT: .cfi_offset %esi, -8
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: leal (%ecx,%ecx,8), %eax
+; X86-NEXT: leal (%eax,%eax,2), %esi
; X86-NEXT: movl $28, %eax
; X86-NEXT: mull {{[0-9]+}}(%esp)
; X86-NEXT: addl %ecx, %edx
+; X86-NEXT: addl %esi, %edx
+; X86-NEXT: popl %esi
+; X86-NEXT: .cfi_def_cfa_offset 4
; X86-NEXT: retl
;
; X86-NOOPT-LABEL: test_mul_by_28:
@@ -971,14 +991,19 @@ define i64 @test_mul_by_28(i64 %x) {
define i64 @test_mul_by_29(i64 %x) {
; X86-LABEL: test_mul_by_29:
; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: leal (%eax,%eax,8), %ecx
-; X86-NEXT: leal (%ecx,%ecx,2), %ecx
-; X86-NEXT: addl %eax, %ecx
-; X86-NEXT: addl %eax, %ecx
+; X86-NEXT: pushl %esi
+; X86-NEXT: .cfi_def_cfa_offset 8
+; X86-NEXT: .cfi_offset %esi, -8
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: leal (%ecx,%ecx,8), %eax
+; X86-NEXT: leal (%eax,%eax,2), %esi
+; X86-NEXT: addl %ecx, %ecx
; X86-NEXT: movl $29, %eax
; X86-NEXT: mull {{[0-9]+}}(%esp)
; X86-NEXT: addl %ecx, %edx
+; X86-NEXT: addl %esi, %edx
+; X86-NEXT: popl %esi
+; X86-NEXT: .cfi_def_cfa_offset 4
; X86-NEXT: retl
;
; X86-NOOPT-LABEL: test_mul_by_29:
@@ -1501,8 +1526,8 @@ define i64 @test_mul_spec(i64 %x) nounwind {
; X86-NEXT: movl %esi, %eax
; X86-NEXT: mull %edi
; X86-NEXT: imull %esi, %ebx
-; X86-NEXT: addl %ebx, %edx
; X86-NEXT: imull %ecx, %edi
+; X86-NEXT: addl %ebx, %edi
; X86-NEXT: addl %edi, %edx
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
@@ -1537,8 +1562,8 @@ define i64 @test_mul_spec(i64 %x) nounwind {
; X86-NOOPT-NEXT: movl %esi, %eax
; X86-NOOPT-NEXT: mull %edi
; X86-NOOPT-NEXT: imull %esi, %ebx
-; X86-NOOPT-NEXT: addl %ebx, %edx
; X86-NOOPT-NEXT: imull %ecx, %edi
+; X86-NOOPT-NEXT: addl %ebx, %edi
; X86-NOOPT-NEXT: addl %edi, %edx
; X86-NOOPT-NEXT: popl %esi
; X86-NOOPT-NEXT: popl %edi
diff --git a/llvm/test/CodeGen/X86/mul-i1024.ll b/llvm/test/CodeGen/X86/mul-i1024.ll
index 015137b3d350c..fc40f50ab4e5a 100644
--- a/llvm/test/CodeGen/X86/mul-i1024.ll
+++ b/llvm/test/CodeGen/X86/mul-i1024.ll
@@ -10,112 +10,111 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: pushl %edi
; X32-NEXT: pushl %esi
; X32-NEXT: subl $400, %esp # imm = 0x190
-; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ebp
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT: movl 60(%ecx), %esi
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl 56(%ecx), %edi
+; X32-NEXT: movl 60(%ecx), %edi
; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl (%eax), %ebp
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: mull %ebp
+; X32-NEXT: movl 56(%ecx), %ebx
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl (%ebp), %esi
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %ecx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: mull %ebp
+; X32-NEXT: movl %edi, %eax
+; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl %ecx, %ebx
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl %ecx, %edi
; X32-NEXT: adcl $0, %esi
-; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: movl 4(%eax), %ecx
-; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl 4(%ebp), %ecx
+; X32-NEXT: movl %ebx, %eax
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %ecx, %edi
+; X32-NEXT: movl %ecx, %ebp
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %ebx, %eax
-; X32-NEXT: movl %eax, (%esp) # 4-byte Spill
+; X32-NEXT: addl %edi, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %esi, %ecx
; X32-NEXT: setb %bl
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %edi
+; X32-NEXT: mull %ebp
; X32-NEXT: addl %ecx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movzbl %bl, %eax
; X32-NEXT: adcl %eax, %edx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X32-NEXT: movl 48(%edi), %esi
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: mull %ebp
+; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X32-NEXT: movl 48(%esi), %edi
+; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: mull %ebx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: movl 52(%edi), %eax
+; X32-NEXT: movl 52(%esi), %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: mull %ebp
-; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: movl %eax, %edi
-; X32-NEXT: addl %ecx, %edi
-; X32-NEXT: adcl $0, %ebx
-; X32-NEXT: movl %esi, %eax
+; X32-NEXT: mull %ebx
+; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl %ecx, %ebp
+; X32-NEXT: adcl $0, %esi
+; X32-NEXT: movl %edi, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %ebp
-; X32-NEXT: addl %edi, %eax
+; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %ebx, %ebp
-; X32-NEXT: setb %bl
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: movl %esi, %eax
+; X32-NEXT: adcl %esi, %ebx
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: movl %edi, %eax
; X32-NEXT: mull %ecx
; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: addl %ebp, %ecx
-; X32-NEXT: movzbl %bl, %eax
+; X32-NEXT: addl %ebx, %ecx
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, %edx
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: adcl (%esp), %edx # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: movl 8(%eax), %ebp
-; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: mull %ebp
-; X32-NEXT: movl %edx, %edi
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl 8(%eax), %ebx
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: movl %esi, %eax
-; X32-NEXT: mull %ebp
+; X32-NEXT: mull %ebx
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edi, %eax
+; X32-NEXT: mull %ebx
; X32-NEXT: movl %edx, %ebp
-; X32-NEXT: movl %eax, %esi
-; X32-NEXT: addl %edi, %esi
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: adcl $0, %ebp
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: movl 12(%eax), %edi
-; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: addl %esi, %eax
-; X32-NEXT: movl %eax, %esi
-; X32-NEXT: adcl %ebp, %ebx
-; X32-NEXT: setb (%esp) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: movl %edi, %ebp
+; X32-NEXT: movl 12(%eax), %ebx
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: mull %ebx
+; X32-NEXT: movl %edx, %esi
+; X32-NEXT: addl %edi, %eax
; X32-NEXT: movl %eax, %edi
-; X32-NEXT: addl %ebx, %edi
-; X32-NEXT: movzbl (%esp), %eax # 1-byte Folded Reload
+; X32-NEXT: adcl %ebp, %esi
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: mull %ebx
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl %esi, %ebp
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, %edx
; X32-NEXT: addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl $0, %edi
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl $0, %ebp
; X32-NEXT: adcl $0, %edx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
@@ -123,166 +122,165 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: movl %ecx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, (%esp) # 4-byte Spill
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl (%esp), %ebx # 4-byte Folded Reload
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: adcl $0, %esi
; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: movl %ebp, %ecx
-; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: mull %ebp
-; X32-NEXT: movl %edx, %ebp
-; X32-NEXT: addl %ebx, %eax
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: adcl %esi, %ebp
-; X32-NEXT: setb (%esp) # 1-byte Folded Spill
+; X32-NEXT: movl %ebx, %ecx
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: mull %ebx
+; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: addl %edi, %eax
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: adcl %esi, %ebx
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %ecx
-; X32-NEXT: addl %ebp, %eax
-; X32-NEXT: movzbl (%esp), %ecx # 1-byte Folded Reload
+; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: addl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: addl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
; X32-NEXT: adcl %ecx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, %edx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X32-NEXT: movl 40(%esi), %edi
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: mull %ebx
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: movl 44(%esi), %ebp
+; X32-NEXT: movl 40(%esi), %ebp
; X32-NEXT: movl %ebp, %eax
; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: mull %ebx
-; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl %ecx, %ebx
-; X32-NEXT: adcl $0, %esi
-; X32-NEXT: movl %edi, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: mull %edi
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: movl 44(%esi), %ebx
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: mull %edi
+; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl %ecx, %edi
+; X32-NEXT: adcl $0, %esi
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: mull %ebp
+; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: addl %edi, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %esi, %ecx
-; X32-NEXT: setb %bl
-; X32-NEXT: movl %ebp, %eax
-; X32-NEXT: mull %edi
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: mull %ebp
; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movl %eax, (%esp) # 4-byte Spill
-; X32-NEXT: movzbl %bl, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, %edx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X32-NEXT: movl 32(%ebp), %edi
-; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X32-NEXT: movl 32(%edi), %ecx
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: movl 36(%ebp), %eax
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: movl %eax, %ebp
-; X32-NEXT: addl %ecx, %ebp
-; X32-NEXT: adcl $0, %ebx
-; X32-NEXT: movl %edi, %esi
+; X32-NEXT: movl %edx, %ebp
+; X32-NEXT: movl 36(%edi), %edi
; X32-NEXT: movl %edi, %eax
+; X32-NEXT: mull %esi
+; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: addl %ebp, %ebx
+; X32-NEXT: adcl $0, %esi
+; X32-NEXT: movl %ecx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %edi
-; X32-NEXT: addl %ebp, %eax
+; X32-NEXT: movl %edx, %ebp
+; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %ebx, %edi
+; X32-NEXT: adcl %esi, %ebp
; X32-NEXT: setb %bl
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
-; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: mull %ecx
; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: addl %edi, %ecx
+; X32-NEXT: addl %ebp, %ecx
; X32-NEXT: movzbl %bl, %eax
; X32-NEXT: adcl %eax, %edx
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl $0, (%esp) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl %esi, %ebx
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %esi, %eax
+; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %edi
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: movl %edi, %eax
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %ebp
-; X32-NEXT: movl %eax, %esi
-; X32-NEXT: addl %edi, %esi
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: adcl $0, %ebp
; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: addl %esi, %eax
-; X32-NEXT: movl %eax, %esi
-; X32-NEXT: adcl %ebp, %ebx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: mull %ebx
+; X32-NEXT: movl %edx, %esi
+; X32-NEXT: addl %edi, %eax
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: adcl %ebp, %esi
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: movl %eax, %edi
-; X32-NEXT: addl %ebx, %edi
+; X32-NEXT: mull %ebx
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl %esi, %ebp
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, %edx
; X32-NEXT: addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl $0, %edi
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl $0, %ebp
; X32-NEXT: adcl $0, %edx
-; X32-NEXT: addl (%esp), %edi # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
-; X32-NEXT: movl %ebp, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %ecx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl %ecx, %ebx
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl %ecx, %edi
; X32-NEXT: adcl $0, %esi
-; X32-NEXT: movl %ebp, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
-; X32-NEXT: mull %ebp
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: mull %ebx
; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %ebx, %eax
-; X32-NEXT: movl %eax, (%esp) # 4-byte Spill
+; X32-NEXT: addl %edi, %eax
+; X32-NEXT: movl %eax, %edi
; X32-NEXT: adcl %esi, %ecx
-; X32-NEXT: setb %bl
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %ebp
+; X32-NEXT: mull %ebx
; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movzbl %bl, %ecx
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
; X32-NEXT: adcl %ecx, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: addl %edi, %ecx
-; X32-NEXT: movl (%esp), %esi # 4-byte Reload
+; X32-NEXT: addl %ebp, %ecx
+; X32-NEXT: movl %edi, %esi
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 1-byte Folded Reload
; X32-NEXT: adcl %edi, %eax
@@ -290,7 +288,7 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: movl %esi, (%esp) # 4-byte Spill
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
@@ -300,67 +298,66 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: movl 16(%eax), %edi
-; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, %ebp
+; X32-NEXT: movl 16(%eax), %esi
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: mull %esi
+; X32-NEXT: movl %edx, %ecx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, %edi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl %ebp, %ebx
-; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: movl 20(%eax), %edx
-; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %esi
-; X32-NEXT: mull %edx
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl %ecx, %edi
+; X32-NEXT: adcl $0, %esi
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl 20(%eax), %ecx
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %ebp
-; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: addl %edi, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %ebp
-; X32-NEXT: setb %bl
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %esi, %edi
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %esi, %ebp
+; X32-NEXT: setb %cl
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: mull %ebx
; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movzbl %bl, %eax
+; X32-NEXT: movzbl %cl, %eax
; X32-NEXT: adcl %eax, %edx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: mull %edi
+; X32-NEXT: movl %edx, %esi
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %ebp
-; X32-NEXT: movl %eax, %esi
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: adcl $0, %ebp
-; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %edi
+; X32-NEXT: movl %edx, %edi
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl %esi, %ebp
+; X32-NEXT: adcl $0, %edi
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: mull %ebx
+; X32-NEXT: movl %ebx, %esi
; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %esi, %eax
+; X32-NEXT: addl %ebp, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %edi, %ecx
+; X32-NEXT: setb %bl
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: movl %edi, %eax
+; X32-NEXT: mull %esi
+; X32-NEXT: addl %ecx, %eax
+; X32-NEXT: movzbl %bl, %ecx
+; X32-NEXT: adcl %ecx, %edx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %ebp, %ecx
-; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl %ebx, %esi
-; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: mull %edi
-; X32-NEXT: movl %eax, %edi
-; X32-NEXT: addl %ecx, %edi
-; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
-; X32-NEXT: adcl %eax, %edx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
@@ -368,77 +365,78 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl 24(%eax), %ebx
; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
-; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: movl %esi, %eax
; X32-NEXT: mull %ebx
; X32-NEXT: movl %edx, %ecx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl %edi, %eax
; X32-NEXT: mull %ebx
-; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl %edx, %ebp
; X32-NEXT: movl %eax, %ebx
; X32-NEXT: addl %ecx, %ebx
-; X32-NEXT: adcl $0, %esi
+; X32-NEXT: adcl $0, %ebp
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl 28(%eax), %ecx
-; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: movl %esi, %eax
; X32-NEXT: mull %ecx
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edx, %ebp
+; X32-NEXT: movl %edx, %edi
; X32-NEXT: addl %ebx, %eax
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: adcl %esi, %ebp
-; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: movl %eax, %esi
+; X32-NEXT: adcl %ebp, %edi
+; X32-NEXT: setb %bl
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %eax, %esi
-; X32-NEXT: addl %ebp, %esi
-; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl %edi, %ebp
+; X32-NEXT: movzbl %bl, %eax
; X32-NEXT: adcl %eax, %edx
-; X32-NEXT: addl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl $0, %esi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl $0, %ebp
; X32-NEXT: adcl $0, %edx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: movl %esi, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: movl %eax, %ebp
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
-; X32-NEXT: adcl $0, %ebx
-; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl %edx, %edi
+; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: adcl $0, %edi
+; X32-NEXT: movl %esi, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %edi
-; X32-NEXT: addl %ebp, %eax
-; X32-NEXT: movl %eax, %ebp
-; X32-NEXT: adcl %ebx, %edi
-; X32-NEXT: setb %bl
+; X32-NEXT: movl %edx, %esi
+; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: adcl %edi, %esi
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %ecx
-; X32-NEXT: addl %edi, %eax
-; X32-NEXT: movzbl %bl, %edi
-; X32-NEXT: adcl %edi, %edx
+; X32-NEXT: addl %esi, %eax
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload
+; X32-NEXT: adcl %esi, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: addl %esi, %edi
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: addl %ebp, %edi
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
; X32-NEXT: adcl %ecx, %eax
; X32-NEXT: adcl $0, %edx
; X32-NEXT: movl %edx, %esi
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl (%esp), %ecx # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: adcl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: adcl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
@@ -446,72 +444,71 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: adcl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: movl %edi, %edx
; X32-NEXT: adcl $0, %edx
-; X32-NEXT: movl %ebp, %ecx
-; X32-NEXT: adcl $0, %ecx
+; X32-NEXT: adcl $0, %ebx
; X32-NEXT: adcl $0, %eax
; X32-NEXT: adcl $0, %esi
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: setb (%esp) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
-; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: mull %edi
; X32-NEXT: movl %edx, %ecx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: mull %edi
+; X32-NEXT: movl %edx, %edi
; X32-NEXT: movl %eax, %ebx
; X32-NEXT: addl %ecx, %ebx
-; X32-NEXT: adcl $0, %esi
-; X32-NEXT: movl %ebp, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
-; X32-NEXT: mull %ebp
+; X32-NEXT: adcl $0, %edi
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %ecx
; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %esi, %ecx
+; X32-NEXT: adcl %edi, %ecx
; X32-NEXT: setb %bl
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: mull %ebp
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: mull %esi
; X32-NEXT: addl %ecx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movzbl %bl, %eax
; X32-NEXT: adcl %eax, %edx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: movl %esi, %eax
-; X32-NEXT: mull %edi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: mull %ebx
+; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: movl %edi, %eax
+; X32-NEXT: mull %ebx
; X32-NEXT: movl %edx, %ebx
; X32-NEXT: movl %eax, %ebp
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: addl %ecx, %ebp
; X32-NEXT: adcl $0, %ebx
-; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: movl %esi, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %edi
+; X32-NEXT: movl %edx, %esi
; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %ebx, %edi
+; X32-NEXT: adcl %ebx, %esi
; X32-NEXT: setb %bl
-; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl %edi, %eax
; X32-NEXT: mull %ecx
; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: addl %edi, %ecx
+; X32-NEXT: addl %esi, %ecx
; X32-NEXT: movzbl %bl, %eax
; X32-NEXT: adcl %eax, %edx
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
@@ -519,73 +516,74 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
-; X32-NEXT: movl %ebp, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: movl %eax, %esi
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: adcl $0, %ebx
-; X32-NEXT: movl %ebp, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
+; X32-NEXT: movl %edi, %eax
+; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %ebp
-; X32-NEXT: addl %esi, %eax
-; X32-NEXT: movl %eax, %esi
-; X32-NEXT: adcl %ebx, %ebp
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: adcl $0, %ebp
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: mull %esi
+; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: addl %edi, %eax
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: adcl %ebp, %ebx
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: movl %edi, %ebx
-; X32-NEXT: movl %eax, %edi
-; X32-NEXT: addl %ebp, %edi
+; X32-NEXT: mull %esi
+; X32-NEXT: movl %esi, %ebp
+; X32-NEXT: movl %eax, %esi
+; X32-NEXT: addl %ebx, %esi
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, %edx
; X32-NEXT: addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl $0, %edi
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl $0, %esi
; X32-NEXT: adcl $0, %edx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: mull %edi
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl %eax, %ebp
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
-; X32-NEXT: adcl $0, %esi
+; X32-NEXT: mull %edi
+; X32-NEXT: movl %edx, %edi
+; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: adcl $0, %edi
; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: movl %ebx, %ecx
-; X32-NEXT: mull %ebx
-; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: addl %ebp, %eax
-; X32-NEXT: movl %eax, %ebp
-; X32-NEXT: adcl %esi, %ebx
-; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: movl %ebp, %ecx
+; X32-NEXT: mull %ebp
+; X32-NEXT: movl %edx, %ebp
+; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %edi, %ebp
+; X32-NEXT: setb %bl
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %ecx
-; X32-NEXT: addl %ebx, %eax
-; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X32-NEXT: addl %ebp, %eax
+; X32-NEXT: movzbl %bl, %ecx
; X32-NEXT: adcl %ecx, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: addl %edi, %ecx
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
-; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload
-; X32-NEXT: adcl %esi, %eax
-; X32-NEXT: movl %eax, %esi
+; X32-NEXT: addl %esi, %ecx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 1-byte Folded Reload
+; X32-NEXT: adcl %edi, %eax
+; X32-NEXT: movl %eax, %edi
; X32-NEXT: adcl $0, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
@@ -595,116 +593,115 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movzbl (%esp), %eax # 1-byte Folded Reload
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, %ecx
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl $0, %ebp
-; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, %esi
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl $0, %edi
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, %edx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X32-NEXT: movl 24(%esi), %ebp
-; X32-NEXT: movl %ebp, %eax
-; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: mull %ebx
+; X32-NEXT: movl 24(%esi), %ebx
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: mull %edi
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: movl 28(%esi), %edi
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: mull %ebx
+; X32-NEXT: movl 28(%esi), %ebp
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: mull %edi
; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl %ecx, %ebx
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl %ecx, %edi
; X32-NEXT: adcl $0, %esi
-; X32-NEXT: movl %ebp, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
-; X32-NEXT: mull %ebp
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: mull %ebx
; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: addl %edi, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %esi, %ecx
-; X32-NEXT: setb %bl
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: mull %ebp
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: mull %ebx
; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movl %eax, (%esp) # 4-byte Spill
-; X32-NEXT: movzbl %bl, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, %edx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X32-NEXT: movl 16(%edi), %esi
-; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X32-NEXT: movl 16(%esi), %edi
+; X32-NEXT: movl %edi, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: mull %ebx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: movl 20(%edi), %eax
+; X32-NEXT: movl 20(%esi), %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: mull %ebx
+; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl %ecx, %ebp
+; X32-NEXT: adcl $0, %esi
+; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: movl %eax, %edi
-; X32-NEXT: addl %ecx, %edi
-; X32-NEXT: adcl $0, %ebx
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: movl %ebp, %ecx
-; X32-NEXT: mull %ebp
-; X32-NEXT: movl %edx, %ebp
-; X32-NEXT: addl %edi, %eax
+; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %ebx, %ebp
-; X32-NEXT: setb %bl
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: movl %edi, %eax
+; X32-NEXT: adcl %esi, %ebx
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: mull %ecx
; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: addl %ebp, %ecx
-; X32-NEXT: movzbl %bl, %eax
+; X32-NEXT: addl %ebx, %ecx
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, %edx
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl $0, (%esp) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl %esi, %ebx
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %esi, %eax
+; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl %edi, %ebx
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %ebp
-; X32-NEXT: movl %eax, %esi
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: adcl $0, %ebp
; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: addl %esi, %eax
-; X32-NEXT: movl %eax, %esi
-; X32-NEXT: adcl %ebp, %ebx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: mull %ebx
+; X32-NEXT: movl %edx, %esi
+; X32-NEXT: addl %edi, %eax
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: adcl %ebp, %esi
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: movl %edi, %ebp
-; X32-NEXT: movl %eax, %edi
-; X32-NEXT: addl %ebx, %edi
+; X32-NEXT: mull %ebx
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl %esi, %ebp
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, %edx
; X32-NEXT: addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl $0, %edi
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl $0, %ebp
; X32-NEXT: adcl $0, %edx
-; X32-NEXT: addl (%esp), %edi # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, (%esp) # 4-byte Spill
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: movl %ecx, %eax
@@ -715,89 +712,89 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: adcl $0, %esi
; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: movl %ebp, %ecx
-; X32-NEXT: mull %ebp
-; X32-NEXT: movl %edx, %ebp
-; X32-NEXT: addl %ebx, %eax
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: adcl %esi, %ebp
+; X32-NEXT: movl %ebx, %ecx
+; X32-NEXT: mull %ebx
+; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: addl %edi, %eax
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: adcl %esi, %ebx
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %ecx
-; X32-NEXT: addl %ebp, %eax
+; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: addl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: adcl (%esp), %ebx # 4-byte Folded Reload
-; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: addl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
; X32-NEXT: adcl %ecx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, %edx
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edx, (%esp) # 4-byte Spill
; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X32-NEXT: movl 8(%esi), %ecx
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl 8(%esi), %ebp
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: mull %edi
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edx, (%esp) # 4-byte Spill
-; X32-NEXT: movl 12(%esi), %ebp
-; X32-NEXT: movl %ebp, %eax
-; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: movl 12(%esi), %ebx
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: mull %edi
; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl (%esp), %ebx # 4-byte Folded Reload
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl %ecx, %edi
; X32-NEXT: adcl $0, %esi
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: mull %ebp
; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: addl %edi, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %esi, %ecx
-; X32-NEXT: setb %bl
-; X32-NEXT: movl %ebp, %eax
-; X32-NEXT: mull %edi
-; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movzbl %bl, %eax
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: mull %ebp
+; X32-NEXT: addl %ecx, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, %edx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X32-NEXT: movl (%ebp), %edi
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
+; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X32-NEXT: movl (%esi), %ecx
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: mull %edi
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: movl 4(%ebp), %eax
-; X32-NEXT: movl %eax, (%esp) # 4-byte Spill
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: movl %eax, %ebp
-; X32-NEXT: addl %ecx, %ebp
-; X32-NEXT: adcl $0, %ebx
-; X32-NEXT: movl %edi, %esi
-; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl %edx, %ebp
+; X32-NEXT: movl 4(%esi), %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: mull %edi
+; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: addl %ebp, %ebx
+; X32-NEXT: adcl $0, %esi
+; X32-NEXT: movl %ecx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %edi
-; X32-NEXT: addl %ebp, %eax
+; X32-NEXT: movl %edx, %ebp
+; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %ebx, %edi
+; X32-NEXT: adcl %esi, %ebp
; X32-NEXT: setb %bl
-; X32-NEXT: movl (%esp), %ebp # 4-byte Reload
-; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: movl %edi, %eax
; X32-NEXT: mull %ecx
; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: addl %edi, %ecx
+; X32-NEXT: addl %ebp, %ecx
; X32-NEXT: movzbl %bl, %eax
; X32-NEXT: adcl %eax, %edx
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
@@ -805,44 +802,43 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl %esi, %ebx
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %edi
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: movl %edi, %eax
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %ebp
-; X32-NEXT: movl %eax, %esi
-; X32-NEXT: addl %edi, %esi
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: adcl $0, %ebp
; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: addl %esi, %eax
-; X32-NEXT: movl %eax, %esi
-; X32-NEXT: adcl %ebp, %ebx
-; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl (%esp), %eax # 4-byte Reload
-; X32-NEXT: mull %edi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: mull %ebx
+; X32-NEXT: movl %edx, %esi
+; X32-NEXT: addl %edi, %eax
; X32-NEXT: movl %eax, %edi
-; X32-NEXT: addl %ebx, %edi
+; X32-NEXT: adcl %ebp, %esi
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: mull %ebx
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl %esi, %ebp
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, %edx
; X32-NEXT: addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl $0, %edi
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl $0, %ebp
; X32-NEXT: adcl $0, %edx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
-; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %ecx
@@ -850,33 +846,33 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl %ecx, %ebx
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl %ecx, %edi
; X32-NEXT: adcl $0, %esi
-; X32-NEXT: movl %ebp, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
-; X32-NEXT: mull %ebp
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: mull %ebx
; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %ebx, %eax
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: addl %edi, %eax
+; X32-NEXT: movl %eax, %edi
; X32-NEXT: adcl %esi, %ecx
-; X32-NEXT: setb %bl
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %ebp
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: mull %ebx
; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movzbl %bl, %ecx
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
; X32-NEXT: adcl %ecx, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: addl %edi, %ecx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 1-byte Folded Reload
-; X32-NEXT: adcl %edi, %eax
+; X32-NEXT: addl %ebp, %ecx
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 1-byte Folded Reload
+; X32-NEXT: adcl %ebx, %eax
; X32-NEXT: adcl $0, %edx
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
@@ -884,130 +880,130 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: movl %esi, %eax
+; X32-NEXT: adcl $0, (%esp) # 4-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: mull %edi
; X32-NEXT: movl %edx, %ebp
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, %edi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl %ebp, %ebx
-; X32-NEXT: adcl $0, %edi
; X32-NEXT: movl %esi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
+; X32-NEXT: movl %esi, %ecx
+; X32-NEXT: mull %edi
+; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl %ebp, %edi
+; X32-NEXT: adcl $0, %esi
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: mull %ebx
; X32-NEXT: movl %edx, %ebp
-; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: addl %edi, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %ebp
-; X32-NEXT: setb %bl
+; X32-NEXT: adcl %esi, %ebp
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: mull %esi
+; X32-NEXT: mull %ebx
; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movzbl %bl, %eax
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, %edx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: movl %ebx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %edi
+; X32-NEXT: movl %edx, %esi
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl (%esp), %eax # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %ebp
-; X32-NEXT: movl %eax, %esi
-; X32-NEXT: addl %edi, %esi
-; X32-NEXT: adcl $0, %ebp
+; X32-NEXT: movl %edx, %edi
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl %esi, %ebp
+; X32-NEXT: adcl $0, %edi
; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %esi, %eax
+; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %ebp, %ecx
+; X32-NEXT: adcl %edi, %ecx
; X32-NEXT: setb %bl
-; X32-NEXT: movl (%esp), %esi # 4-byte Reload
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: mull %edi
-; X32-NEXT: movl %eax, %edi
-; X32-NEXT: addl %ecx, %edi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: mull %esi
+; X32-NEXT: movl %eax, %esi
+; X32-NEXT: addl %ecx, %esi
; X32-NEXT: movzbl %bl, %eax
; X32-NEXT: adcl %eax, %edx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
-; X32-NEXT: movl %ebp, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: mull %ebx
-; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: mull %edi
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: mull %ebx
-; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl %ecx, %ebx
-; X32-NEXT: adcl $0, %esi
; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: mull %edi
+; X32-NEXT: movl %edx, %ebp
+; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: adcl $0, %ebp
+; X32-NEXT: movl %ecx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %ebp
+; X32-NEXT: movl %edx, %edi
; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: adcl %esi, %ebp
+; X32-NEXT: adcl %ebp, %edi
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl (%esp), %eax # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %eax, %esi
-; X32-NEXT: addl %ebp, %esi
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl %edi, %ebp
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, %edx
-; X32-NEXT: addl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: addl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl $0, %esi
+; X32-NEXT: adcl $0, %ebp
; X32-NEXT: adcl $0, %edx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: movl %esi, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: movl %eax, %ebp
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
-; X32-NEXT: adcl $0, %ebx
-; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl %edx, %edi
+; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: adcl $0, %edi
+; X32-NEXT: movl %esi, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %edi
-; X32-NEXT: addl %ebp, %eax
-; X32-NEXT: movl %eax, %ebp
-; X32-NEXT: adcl %ebx, %edi
-; X32-NEXT: setb %bl
+; X32-NEXT: movl %edx, %esi
+; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: adcl %edi, %esi
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %ecx
-; X32-NEXT: addl %edi, %eax
-; X32-NEXT: movzbl %bl, %edi
-; X32-NEXT: adcl %edi, %edx
+; X32-NEXT: addl %esi, %eax
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload
+; X32-NEXT: adcl %esi, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: addl %esi, %edi
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: addl %ebp, %edi
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
; X32-NEXT: adcl %ecx, %eax
; X32-NEXT: adcl $0, %edx
@@ -1022,42 +1018,41 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: adcl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: movl %edi, %edx
; X32-NEXT: adcl $0, %edx
-; X32-NEXT: movl %ebp, %ecx
-; X32-NEXT: adcl $0, %ecx
+; X32-NEXT: adcl $0, %ebx
; X32-NEXT: adcl $0, %eax
; X32-NEXT: adcl $0, %esi
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: adcl (%esp), %esi # 4-byte Folded Reload
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: movl %ecx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %eax, (%esp) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
; X32-NEXT: movl %ebp, %eax
; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl %edx, %edi
; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl %ecx, %ebx
-; X32-NEXT: adcl $0, %esi
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: adcl $0, %edi
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %ecx
; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %esi, %ecx
+; X32-NEXT: adcl %edi, %ecx
; X32-NEXT: setb %bl
; X32-NEXT: movl %ebp, %eax
-; X32-NEXT: mull %edi
+; X32-NEXT: mull %esi
; X32-NEXT: addl %ecx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movzbl %bl, %eax
@@ -1067,96 +1062,98 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: movl %ecx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
; X32-NEXT: mull %ebp
-; X32-NEXT: movl %edx, %edi
+; X32-NEXT: movl %edx, %esi
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: movl %edi, %eax
; X32-NEXT: mull %ebp
; X32-NEXT: movl %edx, %ebx
; X32-NEXT: movl %eax, %ebp
-; X32-NEXT: addl %edi, %ebp
+; X32-NEXT: addl %esi, %ebp
; X32-NEXT: adcl $0, %ebx
; X32-NEXT: movl %ecx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %edi
+; X32-NEXT: movl %edx, %esi
; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %ebx, %edi
+; X32-NEXT: adcl %ebx, %esi
; X32-NEXT: setb %bl
-; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl %edi, %ebp
; X32-NEXT: mull %ecx
; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: addl %edi, %ecx
+; X32-NEXT: addl %esi, %ecx
; X32-NEXT: movzbl %bl, %eax
; X32-NEXT: adcl %eax, %edx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: addl (%esp), %ecx # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edx, (%esp) # 4-byte Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
-; X32-NEXT: movl %ebp, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: mull %ebx
-; X32-NEXT: movl %edx, %edi
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: mull %edi
+; X32-NEXT: movl %edx, %esi
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: mull %ebx
-; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: movl %eax, %esi
-; X32-NEXT: addl %edi, %esi
-; X32-NEXT: adcl $0, %ebx
; X32-NEXT: movl %ebp, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: mull %edi
; X32-NEXT: movl %edx, %ebp
-; X32-NEXT: addl %esi, %eax
-; X32-NEXT: movl %eax, %esi
-; X32-NEXT: adcl %ebx, %ebp
-; X32-NEXT: setb %bl
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %edi
; X32-NEXT: movl %eax, %edi
-; X32-NEXT: addl %ebp, %edi
-; X32-NEXT: movzbl %bl, %eax
+; X32-NEXT: addl %esi, %edi
+; X32-NEXT: adcl $0, %ebp
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: mull %esi
+; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: addl %edi, %eax
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: adcl %ebp, %ebx
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: mull %esi
+; X32-NEXT: movl %eax, %esi
+; X32-NEXT: addl %ebx, %esi
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, %edx
; X32-NEXT: addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl $0, %edi
+; X32-NEXT: adcl (%esp), %edi # 4-byte Folded Reload
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl $0, %esi
; X32-NEXT: adcl $0, %edx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: mull %edi
; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %eax, (%esp) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl %eax, %ebp
-; X32-NEXT: addl %ecx, %ebp
-; X32-NEXT: adcl $0, %esi
-; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: mull %ebx
+; X32-NEXT: mull %edi
+; X32-NEXT: movl %edx, %edi
+; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: addl %ecx, %ebx
+; X32-NEXT: adcl $0, %edi
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: mull %ebp
; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %ebp, %eax
-; X32-NEXT: movl %eax, %ebp
-; X32-NEXT: adcl %esi, %ecx
-; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %edi, %ecx
+; X32-NEXT: setb %bl
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %ebx
+; X32-NEXT: mull %ebp
; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X32-NEXT: movzbl %bl, %ecx
; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: addl %edi, %ebx
+; X32-NEXT: movl (%esp), %ebx # 4-byte Reload
+; X32-NEXT: addl %esi, %ebx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
; X32-NEXT: adcl %ecx, %eax
@@ -1173,7 +1170,7 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, %ebx
-; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %ebx, (%esp) # 4-byte Spill
; X32-NEXT: adcl $0, %ebp
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: adcl $0, %ebx
@@ -1187,9 +1184,9 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: movl (%esp), %ecx # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %ecx, (%esp) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
@@ -1204,53 +1201,50 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X32-NEXT: movl 32(%ebx), %esi
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X32-NEXT: movl 32(%ebp), %esi
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: movl %ecx, %eax
; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
-; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %esi
; X32-NEXT: movl %eax, %edi
-; X32-NEXT: addl %ecx, %edi
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: adcl $0, %esi
-; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: movl 36(%eax), %ecx
-; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: mull %ecx
-; X32-NEXT: movl %ecx, %ebx
-; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl 36(%ebp), %ebp
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: mull %ebp
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edx, %ecx
; X32-NEXT: addl %edi, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %esi, %ecx
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl %ebp, %eax
-; X32-NEXT: mull %ebx
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: mull %ebp
; X32-NEXT: addl %ecx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, %edx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: movl %edi, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
+; X32-NEXT: mull %ebx
; X32-NEXT: movl %edx, %ecx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: mull %esi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: mull %ebx
; X32-NEXT: movl %edx, %esi
; X32-NEXT: movl %eax, %ebp
; X32-NEXT: addl %ecx, %ebp
; X32-NEXT: adcl $0, %esi
-; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl %edi, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %ebx
@@ -1258,7 +1252,8 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %esi, %ebx
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: movl %esi, %eax
; X32-NEXT: mull %ecx
; X32-NEXT: movl %eax, %ecx
; X32-NEXT: addl %ebx, %ecx
@@ -1270,23 +1265,24 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: movl 40(%eax), %ebx
-; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: mull %ebx
+; X32-NEXT: movl 40(%eax), %ebp
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl %edi, %ebx
+; X32-NEXT: mull %ebp
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: mull %ebx
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: mull %ebp
; X32-NEXT: movl %edx, %ebp
; X32-NEXT: movl %eax, %edi
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: adcl $0, %ebp
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: movl 44(%eax), %ebx
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: mull %ebx
+; X32-NEXT: movl 44(%eax), %esi
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: mull %esi
+; X32-NEXT: movl %esi, %ebx
; X32-NEXT: movl %edx, %esi
; X32-NEXT: addl %edi, %eax
; X32-NEXT: movl %eax, %edi
@@ -1294,16 +1290,17 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %ebx
-; X32-NEXT: movl %eax, %ebp
-; X32-NEXT: addl %esi, %ebp
+; X32-NEXT: movl %ebx, %ebp
+; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: addl %esi, %ebx
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, %edx
; X32-NEXT: addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl $0, %ebp
+; X32-NEXT: adcl $0, %ebx
; X32-NEXT: adcl $0, %edx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
@@ -1320,20 +1317,20 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: adcl $0, %esi
; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: movl %ebx, %ecx
-; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: mull %ebx
-; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: movl %ebp, %ecx
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: mull %ebp
+; X32-NEXT: movl %edx, %ebp
; X32-NEXT: addl %edi, %eax
; X32-NEXT: movl %eax, %edi
-; X32-NEXT: adcl %esi, %ebx
+; X32-NEXT: adcl %esi, %ebp
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %ecx
-; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: addl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: addl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
@@ -1341,61 +1338,60 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, %edx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: mull %esi
+; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
; X32-NEXT: movl %ebp, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
+; X32-NEXT: mull %esi
+; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl %ecx, %edi
+; X32-NEXT: adcl $0, %esi
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: mull %ebx
; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: addl %edi, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, %edi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl %ecx, %ebx
-; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl %ebp, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
-; X32-NEXT: mull %ebp
-; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %ebx, %eax
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %ecx
-; X32-NEXT: setb %bl
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: mull %ebp
+; X32-NEXT: adcl %esi, %ecx
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: mull %ebx
; X32-NEXT: addl %ecx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movzbl %bl, %eax
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, %edx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edx, %ecx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl (%esp), %edi # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: movl %edi, %eax
; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: movl %edx, %esi
; X32-NEXT: movl %eax, %ebp
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
-; X32-NEXT: adcl $0, %ebx
-; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: addl %ecx, %ebp
+; X32-NEXT: adcl $0, %esi
+; X32-NEXT: movl %ebx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl %edx, %ebx
; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %ebx, %esi
-; X32-NEXT: setb %bl
+; X32-NEXT: adcl %esi, %ebx
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl %edi, %eax
-; X32-NEXT: movl %edi, %ebp
; X32-NEXT: mull %ecx
; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: addl %esi, %ecx
-; X32-NEXT: movzbl %bl, %eax
+; X32-NEXT: addl %ebx, %ecx
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, %edx
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
@@ -1404,76 +1400,75 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: mull %esi
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %ebp, %eax
-; X32-NEXT: mull %edi
+; X32-NEXT: movl %edi, %eax
+; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %ebp
; X32-NEXT: movl %eax, %edi
-; X32-NEXT: addl %esi, %edi
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: adcl $0, %ebp
; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: mull %ebx
+; X32-NEXT: movl %edx, %esi
; X32-NEXT: addl %edi, %eax
; X32-NEXT: movl %eax, %edi
-; X32-NEXT: adcl %ebp, %ebx
+; X32-NEXT: adcl %ebp, %esi
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl (%esp), %eax # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %eax, %esi
-; X32-NEXT: addl %ebx, %esi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: mull %ebx
+; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: addl %esi, %ebx
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, %edx
; X32-NEXT: addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl $0, %esi
+; X32-NEXT: adcl $0, %ebx
; X32-NEXT: adcl $0, %edx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
; X32-NEXT: movl %ebp, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %ecx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, %edi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl %ecx, %ebx
-; X32-NEXT: adcl $0, %edi
+; X32-NEXT: mull %esi
+; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl %ecx, %edi
+; X32-NEXT: adcl $0, %esi
; X32-NEXT: movl %ebp, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
; X32-NEXT: mull %ebp
; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %ebx, %eax
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %ecx
-; X32-NEXT: setb %bl
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: movl %edi, %eax
+; X32-NEXT: addl %edi, %eax
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: adcl %esi, %ecx
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: movl %esi, %eax
; X32-NEXT: mull %ebp
; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movzbl %bl, %ecx
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
; X32-NEXT: adcl %ecx, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: addl %esi, %ecx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: addl %ebx, %ecx
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 1-byte Folded Reload
; X32-NEXT: adcl %ebx, %eax
; X32-NEXT: adcl $0, %edx
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
@@ -1483,33 +1478,34 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: movl 48(%eax), %esi
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: mull %esi
+; X32-NEXT: movl 48(%eax), %ebx
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: movl %edi, %eax
+; X32-NEXT: mull %ebx
; X32-NEXT: movl %edx, %ebp
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: movl %edi, %ecx
-; X32-NEXT: mull %esi
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl %esi, %ecx
+; X32-NEXT: mull %ebx
; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl %eax, %edi
-; X32-NEXT: addl %ebp, %edi
+; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: addl %ebp, %ebx
; X32-NEXT: adcl $0, %esi
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl 52(%eax), %edx
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl %edx, %edi
; X32-NEXT: mull %edx
; X32-NEXT: movl %edx, %ebp
-; X32-NEXT: addl %edi, %eax
+; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %esi, %ebp
; X32-NEXT: setb %bl
; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
+; X32-NEXT: mull %edi
+; X32-NEXT: movl %edi, %esi
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movzbl %bl, %eax
@@ -1517,29 +1513,29 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: mull %ebx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: mull %edi
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl (%esp), %edi # 4-byte Reload
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: mull %ebx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: mull %edi
; X32-NEXT: movl %edx, %ebp
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: adcl $0, %ebp
; X32-NEXT: movl %ecx, %eax
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: addl %edi, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %ebp, %ecx
-; X32-NEXT: setb %bl
-; X32-NEXT: movl %edi, %eax
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: movl %ebx, %eax
; X32-NEXT: mull %esi
; X32-NEXT: movl %eax, %esi
; X32-NEXT: addl %ecx, %esi
-; X32-NEXT: movzbl %bl, %eax
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, %edx
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
@@ -1554,34 +1550,34 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl %ebx, %eax
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: movl %eax, %edi
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; X32-NEXT: adcl $0, %ebx
+; X32-NEXT: movl %edx, %edi
+; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: adcl $0, %edi
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl 60(%eax), %ecx
; X32-NEXT: movl %ebp, %eax
; X32-NEXT: mull %ecx
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edx, %ebp
-; X32-NEXT: addl %edi, %eax
-; X32-NEXT: movl %eax, %edi
-; X32-NEXT: adcl %ebx, %ebp
+; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: adcl %edi, %ebp
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl (%esp), %eax # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl %ebp, %ebx
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl %ebp, %edi
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, %edx
; X32-NEXT: addl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl $0, %ebx
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl $0, %edi
; X32-NEXT: adcl $0, %edx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
@@ -1593,25 +1589,25 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %edi
+; X32-NEXT: movl %edx, %ebx
; X32-NEXT: movl %eax, %ebp
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
-; X32-NEXT: adcl $0, %edi
+; X32-NEXT: adcl $0, %ebx
; X32-NEXT: movl %esi, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %esi
; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movl %eax, %ebp
-; X32-NEXT: adcl %edi, %esi
-; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: adcl %ebx, %esi
+; X32-NEXT: setb %bl
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %ecx
; X32-NEXT: addl %esi, %eax
-; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload
+; X32-NEXT: movzbl %bl, %esi
; X32-NEXT: adcl %esi, %edx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: addl %ebx, %edi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: addl %edi, %ebx
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
; X32-NEXT: adcl %ecx, %eax
@@ -1625,7 +1621,7 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: adcl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: adcl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl %edi, %edx
+; X32-NEXT: movl %ebx, %edx
; X32-NEXT: adcl $0, %edx
; X32-NEXT: movl %ebp, %ecx
; X32-NEXT: adcl $0, %ecx
@@ -1668,27 +1664,27 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: movzbl %bl, %eax
; X32-NEXT: adcl %eax, %edx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: movl %ecx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: movl %edi, %eax
; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %edi
+; X32-NEXT: movl %edx, %ebx
; X32-NEXT: movl %eax, %ebp
-; X32-NEXT: addl %ecx, %ebp
-; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: adcl $0, %ebx
+; X32-NEXT: movl %ecx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %esi
; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %esi
+; X32-NEXT: adcl %ebx, %esi
; X32-NEXT: setb %bl
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: movl %edi, %eax
; X32-NEXT: mull %ecx
; X32-NEXT: movl %eax, %ecx
@@ -1708,30 +1704,30 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edi, %eax
; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %edi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X32-NEXT: adcl $0, %edi
+; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: adcl $0, %ebx
; X32-NEXT: movl %ebp, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %ebp
-; X32-NEXT: addl %ebx, %eax
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: adcl %edi, %ebp
+; X32-NEXT: addl %edi, %eax
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: adcl %ebx, %ebp
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: movl %eax, %edi
-; X32-NEXT: addl %ebp, %edi
+; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: addl %ebp, %ebx
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, %edx
; X32-NEXT: addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl $0, %edi
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl $0, %ebx
; X32-NEXT: adcl $0, %edx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
@@ -1743,30 +1739,30 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: movl %edx, %edi
; X32-NEXT: movl %eax, %ebp
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
-; X32-NEXT: adcl $0, %ebx
+; X32-NEXT: adcl $0, %edi
; X32-NEXT: movl %ecx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %esi
; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movl %eax, %ebp
-; X32-NEXT: adcl %ebx, %esi
-; X32-NEXT: setb %bl
+; X32-NEXT: adcl %edi, %esi
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %ecx
; X32-NEXT: addl %esi, %eax
; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: movzbl %bl, %eax
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, %edx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: addl %edi, %ebx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: addl %ebx, %edi
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
-; X32-NEXT: movl %ecx, %edi
-; X32-NEXT: adcl %eax, %edi
+; X32-NEXT: movl %ecx, %ebx
+; X32-NEXT: adcl %eax, %ebx
; X32-NEXT: adcl $0, %edx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
@@ -1779,10 +1775,11 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
-; X32-NEXT: adcl %eax, %ebx
-; X32-NEXT: adcl $0, %ebp
-; X32-NEXT: adcl $0, %edi
+; X32-NEXT: adcl %eax, %edi
; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl $0, %ebp
+; X32-NEXT: adcl $0, %ebx
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: adcl $0, %edi
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
@@ -1793,7 +1790,7 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl (%esp), %eax # 4-byte Reload
; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
@@ -1806,6 +1803,7 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, %edx
; X32-NEXT: adcl $0, %esi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: adcl $0, %ebx
; X32-NEXT: adcl $0, %ebp
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
@@ -1849,55 +1847,54 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: addl %edi, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %esi, %ecx
-; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: setb (%esp) # 1-byte Folded Spill
; X32-NEXT: movl %ebp, %eax
; X32-NEXT: mull %ebx
; X32-NEXT: addl %ecx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X32-NEXT: movzbl (%esp), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, %edx
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl %edx, (%esp) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: movl %edx, %ecx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: movl %edi, %eax
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %esi
; X32-NEXT: movl %eax, %ebp
-; X32-NEXT: addl %ebx, %ebp
+; X32-NEXT: addl %ecx, %ebp
; X32-NEXT: adcl $0, %esi
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %ebx
; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %esi, %ebx
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl %ecx, %esi
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: mull %edi
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: addl %ebx, %ecx
+; X32-NEXT: movl %edi, %eax
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %eax, %esi
+; X32-NEXT: addl %ebx, %esi
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, %edx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl $0, (%esp) # 4-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: mull %edi
+; X32-NEXT: movl %edi, %eax
+; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %ebp
; X32-NEXT: movl %eax, %edi
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
@@ -1905,24 +1902,24 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: movl %ebx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: mull %ebx
-; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl %edx, %ecx
; X32-NEXT: addl %edi, %eax
; X32-NEXT: movl %eax, %edi
-; X32-NEXT: adcl %ebp, %esi
+; X32-NEXT: adcl %ebp, %ecx
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %ebx
; X32-NEXT: movl %eax, %ebp
-; X32-NEXT: addl %esi, %ebp
+; X32-NEXT: addl %ecx, %ebp
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, %edx
-; X32-NEXT: addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: addl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, %ebp
; X32-NEXT: adcl $0, %edx
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT: adcl (%esp), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
@@ -1930,7 +1927,7 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %eax, (%esp) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %esi
@@ -1950,7 +1947,7 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: addl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: addl %ebp, (%esp) # 4-byte Folded Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
@@ -1960,268 +1957,267 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %ecx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, %edi
-; X32-NEXT: movl %eax, %ebp
-; X32-NEXT: addl %ecx, %ebp
-; X32-NEXT: adcl $0, %edi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: mull %esi
+; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl %ecx, %edi
+; X32-NEXT: adcl $0, %esi
; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: mull %ebx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: mull %ebp
; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %ebp, %eax
+; X32-NEXT: addl %edi, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %ecx
-; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: mull %ebx
+; X32-NEXT: adcl %esi, %ecx
+; X32-NEXT: setb %bl
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: mull %ebp
; X32-NEXT: addl %ecx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X32-NEXT: movzbl %bl, %eax
; X32-NEXT: adcl %eax, %edx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: movl %edi, %eax
-; X32-NEXT: mull %esi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %ebp
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %esi
; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X32-NEXT: adcl $0, %ebp
-; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: addl %ebp, %ebx
+; X32-NEXT: adcl $0, %esi
+; X32-NEXT: movl %edi, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl %edx, %ebp
; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %ebp, %esi
-; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: adcl %esi, %ebp
+; X32-NEXT: setb %bl
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: movl %edi, %eax
-; X32-NEXT: movl %edi, %ebx
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: addl %esi, %ecx
-; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X32-NEXT: movl %eax, %esi
+; X32-NEXT: addl %ebp, %esi
+; X32-NEXT: movzbl %bl, %eax
; X32-NEXT: adcl %eax, %edx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
-; X32-NEXT: movl %ebp, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: movl %eax, %edi
-; X32-NEXT: addl %esi, %edi
-; X32-NEXT: adcl $0, %ebx
-; X32-NEXT: movl %ebp, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edi, %eax
+; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %ebp
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: adcl $0, %ebp
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: mull %ebx
+; X32-NEXT: movl %edx, %ecx
; X32-NEXT: addl %edi, %eax
; X32-NEXT: movl %eax, %edi
-; X32-NEXT: adcl %ebx, %ebp
-; X32-NEXT: setb %bl
+; X32-NEXT: adcl %ebp, %ecx
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %eax, %esi
-; X32-NEXT: addl %ebp, %esi
-; X32-NEXT: movzbl %bl, %eax
+; X32-NEXT: mull %ebx
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl %ecx, %ebp
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, %edx
-; X32-NEXT: addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: addl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl $0, %esi
+; X32-NEXT: adcl $0, %ebp
; X32-NEXT: adcl $0, %edx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %ecx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, %edi
-; X32-NEXT: movl %eax, %ebp
-; X32-NEXT: addl %ecx, %ebp
-; X32-NEXT: adcl $0, %edi
+; X32-NEXT: mull %esi
+; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl %ecx, %edi
+; X32-NEXT: adcl $0, %esi
; X32-NEXT: movl %ebx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: mull %ebx
; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %ebp, %eax
-; X32-NEXT: movl %eax, %ebp
-; X32-NEXT: adcl %edi, %ecx
+; X32-NEXT: addl %edi, %eax
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: adcl %esi, %ecx
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: movl %esi, %eax
; X32-NEXT: mull %ebx
; X32-NEXT: addl %ecx, %eax
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
; X32-NEXT: adcl %ecx, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: addl %esi, %ecx
-; X32-NEXT: movl %ebp, %esi
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: addl %ebp, %ecx
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 1-byte Folded Reload
; X32-NEXT: adcl %ebx, %eax
; X32-NEXT: adcl $0, %edx
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl $0, (%esp) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: movl %edi, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
+; X32-NEXT: mull %ebx
; X32-NEXT: movl %edx, %ecx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: movl %edi, %ebp
-; X32-NEXT: mull %esi
+; X32-NEXT: movl %esi, %ebp
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: mull %ebx
; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl %eax, %edi
-; X32-NEXT: addl %ecx, %edi
+; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: addl %ecx, %ebx
; X32-NEXT: adcl $0, %esi
-; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: addl %edi, %eax
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %esi, %ebx
-; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl %ebp, %eax
-; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: mull %edi
+; X32-NEXT: movl %edx, %ecx
; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X32-NEXT: adcl %esi, %ecx
+; X32-NEXT: setb %bl
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: mull %edi
+; X32-NEXT: addl %ecx, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movzbl %bl, %eax
; X32-NEXT: adcl %eax, %edx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: movl %esi, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: mull %ecx
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: mull %edi
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: mull %ecx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: mull %edi
; X32-NEXT: movl %edx, %ebx
; X32-NEXT: movl %eax, %ebp
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: adcl $0, %ebx
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %edi
; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %ebx, %ecx
+; X32-NEXT: adcl %ebx, %edi
; X32-NEXT: setb %bl
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %eax, %esi
-; X32-NEXT: addl %ecx, %esi
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: addl %edi, %ecx
; X32-NEXT: movzbl %bl, %eax
; X32-NEXT: adcl %eax, %edx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: mull %ecx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: mull %edi
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: mull %ecx
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: mull %edi
; X32-NEXT: movl %edx, %ebp
-; X32-NEXT: movl %eax, %edi
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl %eax, %esi
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: adcl $0, %ebp
; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: mull %ecx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: mull %edi
; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: addl %edi, %eax
-; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl %esi, %eax
+; X32-NEXT: movl %eax, %esi
; X32-NEXT: adcl %ebp, %ebx
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %ecx
-; X32-NEXT: movl %eax, %ebp
-; X32-NEXT: addl %ebx, %ebp
+; X32-NEXT: mull %edi
+; X32-NEXT: movl %edi, %ebp
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl %ebx, %edi
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, %edx
-; X32-NEXT: addl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl $0, %ebp
+; X32-NEXT: addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl $0, %edi
; X32-NEXT: adcl $0, %edx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: movl %esi, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: mull %ecx
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %edi
+; X32-NEXT: mull %esi
+; X32-NEXT: movl %edx, %esi
; X32-NEXT: movl %eax, %ebx
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %esi
+; X32-NEXT: adcl $0, %esi
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: movl %ebp, %ecx
+; X32-NEXT: mull %ebp
+; X32-NEXT: movl %edx, %ebp
; X32-NEXT: addl %ebx, %eax
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: adcl %edi, %esi
-; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %esi, %ebp
+; X32-NEXT: setb %bl
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %ecx
-; X32-NEXT: addl %esi, %eax
-; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload
-; X32-NEXT: adcl %esi, %edx
+; X32-NEXT: addl %ebp, %eax
+; X32-NEXT: movzbl %bl, %ecx
+; X32-NEXT: adcl %ecx, %edx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: addl %edi, %ebx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: addl %ebp, %edi
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
; X32-NEXT: adcl %ecx, %eax
; X32-NEXT: adcl $0, %edx
@@ -2234,62 +2230,63 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: adcl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: adcl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl %edi, %edx
+; X32-NEXT: movl %ebx, %edx
; X32-NEXT: adcl $0, %edx
-; X32-NEXT: adcl $0, %ebx
+; X32-NEXT: movl %edi, %ecx
+; X32-NEXT: adcl $0, %ecx
; X32-NEXT: adcl $0, %eax
; X32-NEXT: adcl $0, %esi
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT: addl (%esp), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: mull %esi
+; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
; X32-NEXT: movl %ebp, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: mull %esi
+; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl %ecx, %edi
+; X32-NEXT: adcl $0, %esi
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: addl %edi, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, %edi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl %ecx, %ebx
-; X32-NEXT: adcl $0, %edi
+; X32-NEXT: adcl %esi, %ebx
+; X32-NEXT: setb (%esp) # 1-byte Folded Spill
; X32-NEXT: movl %ebp, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
-; X32-NEXT: mull %ebp
-; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: mull %ecx
; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %ecx
-; X32-NEXT: setb %bl
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: mull %ebp
-; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movzbl %bl, %eax
+; X32-NEXT: movzbl (%esp), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, %edx
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl %edx, (%esp) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: movl %ecx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: movl %edx, %esi
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
; X32-NEXT: movl %ebp, %eax
; X32-NEXT: mull %edi
; X32-NEXT: movl %edx, %edi
; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl %ecx, %ebx
+; X32-NEXT: addl %esi, %ebx
; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl %ecx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %esi
@@ -2299,82 +2296,83 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: setb %bl
; X32-NEXT: movl %ebp, %eax
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: addl %esi, %ecx
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl %esi, %edi
; X32-NEXT: movzbl %bl, %eax
; X32-NEXT: adcl %eax, %edx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
+; X32-NEXT: adcl $0, (%esp) # 4-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %ebp, %eax
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %esi
; X32-NEXT: movl %eax, %ebp
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
-; X32-NEXT: adcl $0, %ebx
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %edi
+; X32-NEXT: adcl $0, %esi
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %ebx
; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movl %eax, %ebp
-; X32-NEXT: adcl %ebx, %edi
+; X32-NEXT: adcl %esi, %ebx
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl %edi, %ebx
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: addl %ebx, %ecx
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, %edx
-; X32-NEXT: addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: addl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl $0, %ebx
+; X32-NEXT: adcl $0, %ecx
; X32-NEXT: adcl $0, %edx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: adcl (%esp), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: setb (%esp) # 1-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: movl %edi, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edx, %ebx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %edi
-; X32-NEXT: movl %eax, %ebp
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
-; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl %ebx, %ebp
+; X32-NEXT: adcl $0, %esi
+; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: mull %ebx
+; X32-NEXT: movl %edx, %edi
; X32-NEXT: addl %ebp, %eax
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %esi
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: adcl %esi, %edi
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %ecx
-; X32-NEXT: addl %esi, %eax
-; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
-; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
-; X32-NEXT: addl %ebx, %ebp
+; X32-NEXT: mull %ebx
+; X32-NEXT: addl %edi, %eax
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X32-NEXT: adcl %eax, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
-; X32-NEXT: adcl %ecx, %eax
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: addl %ecx, %ebx
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movzbl (%esp), %eax # 1-byte Folded Reload
+; X32-NEXT: movl %edi, %ebp
+; X32-NEXT: adcl %eax, %ebp
; X32-NEXT: adcl $0, %edx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
@@ -2386,14 +2384,14 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
-; X32-NEXT: adcl %eax, %ebp
-; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl $0, %ebx
+; X32-NEXT: adcl %eax, %ebx
; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
-; X32-NEXT: adcl $0, %ebp
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: adcl $0, %ebx
+; X32-NEXT: adcl $0, %ebp
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: adcl $0, %ebp
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
@@ -2420,143 +2418,141 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: adcl $0, %edi
; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl $0, %ebx
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, %ebp
; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl $0, %ebx
-; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl 64(%eax), %edi
; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: movl %ecx, %eax
; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: movl %edx, %esi
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
; X32-NEXT: movl %ebp, %eax
; X32-NEXT: mull %edi
; X32-NEXT: movl %edx, %edi
; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl %ecx, %ebx
+; X32-NEXT: addl %esi, %ebx
; X32-NEXT: adcl $0, %edi
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: movl 68(%eax), %ecx
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: movl %ecx, %esi
-; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: movl 68(%eax), %esi
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: mull %esi
+; X32-NEXT: movl %esi, %ecx
+; X32-NEXT: movl %esi, (%esp) # 4-byte Spill
+; X32-NEXT: movl %edx, %esi
; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %ecx
+; X32-NEXT: adcl %edi, %esi
; X32-NEXT: setb %bl
; X32-NEXT: movl %ebp, %eax
-; X32-NEXT: mull %esi
-; X32-NEXT: addl %ecx, %eax
+; X32-NEXT: mull %ecx
+; X32-NEXT: addl %esi, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movzbl %bl, %eax
; X32-NEXT: adcl %eax, %edx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %ebp
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %edi
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: addl %esi, %ecx
+; X32-NEXT: adcl $0, %edi
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: movl (%esp), %esi # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: movl %eax, %esi
-; X32-NEXT: addl %ebp, %esi
-; X32-NEXT: adcl $0, %ebx
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
; X32-NEXT: movl %edx, %ebp
-; X32-NEXT: addl %esi, %eax
+; X32-NEXT: addl %ecx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %ebx, %ebp
-; X32-NEXT: setb %bl
-; X32-NEXT: movl %ecx, %esi
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: mull %edi
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: addl %ebp, %ecx
-; X32-NEXT: movzbl %bl, %eax
+; X32-NEXT: adcl %edi, %ebp
+; X32-NEXT: setb %cl
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: mull %esi
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl %ebp, %edi
+; X32-NEXT: movzbl %cl, %eax
; X32-NEXT: adcl %eax, %edx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: movl 72(%eax), %edi
-; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: mull %edi
+; X32-NEXT: movl 72(%eax), %ecx
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: mull %edi
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %ebp
-; X32-NEXT: movl %eax, %edi
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: adcl $0, %ebp
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: movl 76(%eax), %esi
-; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: addl %edi, %eax
-; X32-NEXT: movl %eax, %edi
-; X32-NEXT: adcl %ebp, %ebx
+; X32-NEXT: movl 76(%eax), %ecx
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edx, %esi
+; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: adcl %ebp, %esi
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %esi, %ebp
-; X32-NEXT: movl %eax, %esi
-; X32-NEXT: addl %ebx, %esi
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: addl %esi, %ecx
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, %edx
-; X32-NEXT: addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl $0, %esi
+; X32-NEXT: addl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl $0, %ecx
; X32-NEXT: adcl $0, %edx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: movl %esi, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: mull %edi
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, %edi
+; X32-NEXT: movl %edx, %ebp
; X32-NEXT: movl %eax, %ebx
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: movl %ebp, %ecx
-; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: mull %ebp
-; X32-NEXT: movl %edx, %ebp
+; X32-NEXT: adcl $0, %ebp
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: mull %edi
+; X32-NEXT: movl %edx, %esi
; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: adcl %edi, %ebp
+; X32-NEXT: adcl %ebp, %esi
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %ecx
-; X32-NEXT: addl %ebp, %eax
-; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
-; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: addl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: mull %edi
+; X32-NEXT: addl %esi, %eax
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload
+; X32-NEXT: adcl %esi, %edx
+; X32-NEXT: addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
@@ -2566,97 +2562,96 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: mull %esi
+; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: mull %esi
+; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl %ecx, %edi
+; X32-NEXT: adcl $0, %esi
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: movl (%esp), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %ebp
+; X32-NEXT: addl %edi, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %esi, %ebp
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: mull %ecx
+; X32-NEXT: addl %ebp, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X32-NEXT: adcl %eax, %edx
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: movl %ecx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: movl %edx, %ebx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: movl %esi, %eax
; X32-NEXT: mull %edi
; X32-NEXT: movl %edx, %edi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl %ecx, %ebx
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl %ebx, %ebp
; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl %ebp, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
-; X32-NEXT: mull %ebp
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: movl (%esp), %ebx # 4-byte Reload
+; X32-NEXT: mull %ebx
; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %edi, %ecx
-; X32-NEXT: setb %bl
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl %esi, %eax
-; X32-NEXT: mull %ebp
-; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movzbl %bl, %eax
+; X32-NEXT: mull %ebx
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl %ecx, %edi
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, %edx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: movl %esi, %eax
+; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: movl %ecx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: mull %ebx
-; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: mull %ebx
-; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: movl %eax, %ebp
-; X32-NEXT: addl %ecx, %ebp
-; X32-NEXT: adcl $0, %ebx
; X32-NEXT: movl %esi, %eax
+; X32-NEXT: mull %ebx
+; X32-NEXT: movl %edx, %ebp
+; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: adcl $0, %ebp
+; X32-NEXT: movl %ecx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %esi
-; X32-NEXT: addl %ebp, %eax
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %ebx, %esi
-; X32-NEXT: setb %bl
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: movl %edi, %ebp
+; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: adcl %ebp, %esi
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %ecx
; X32-NEXT: movl %eax, %ecx
; X32-NEXT: addl %esi, %ecx
-; X32-NEXT: movzbl %bl, %eax
-; X32-NEXT: adcl %eax, %edx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %ebp, %eax
-; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, %ebp
-; X32-NEXT: movl %eax, %edi
-; X32-NEXT: addl %esi, %edi
-; X32-NEXT: adcl $0, %ebp
-; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: addl %edi, %eax
-; X32-NEXT: movl %eax, %edi
-; X32-NEXT: adcl %ebp, %ebx
-; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %eax, %esi
-; X32-NEXT: addl %ebx, %esi
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, %edx
-; X32-NEXT: addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl $0, %esi
+; X32-NEXT: addl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl $0, %ecx
; X32-NEXT: adcl $0, %edx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
@@ -2664,42 +2659,44 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: movl %ebp, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: movl %edx, %esi
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %edi
; X32-NEXT: movl %edx, %edi
; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl %ecx, %ebx
+; X32-NEXT: addl %esi, %ebx
; X32-NEXT: adcl $0, %edi
; X32-NEXT: movl %ebp, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
; X32-NEXT: mull %ebp
-; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: movl %edx, %esi
; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %ecx
+; X32-NEXT: adcl %edi, %esi
; X32-NEXT: setb %bl
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: movl %edi, %eax
; X32-NEXT: mull %ebp
-; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movzbl %bl, %ecx
-; X32-NEXT: adcl %ecx, %edx
+; X32-NEXT: addl %esi, %eax
+; X32-NEXT: movzbl %bl, %ebx
+; X32-NEXT: movl %edx, %esi
+; X32-NEXT: adcl %ebx, %esi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-NEXT: addl %ecx, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: addl %esi, %ecx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 1-byte Folded Reload
-; X32-NEXT: adcl %edi, %eax
-; X32-NEXT: adcl $0, %edx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 1-byte Folded Reload
+; X32-NEXT: adcl %ebx, %eax
+; X32-NEXT: adcl $0, %esi
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
@@ -2707,64 +2704,63 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl 80(%eax), %esi
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
-; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %ecx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: movl %edi, %eax
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl %ecx, %ebx
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl %ecx, %ebp
; X32-NEXT: adcl $0, %esi
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl 84(%eax), %ecx
-; X32-NEXT: movl %ebp, %eax
-; X32-NEXT: movl %ecx, %ebp
-; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %ebx, %eax
; X32-NEXT: mull %ecx
+; X32-NEXT: movl %ecx, %ebx
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %esi, %ecx
-; X32-NEXT: setb %bl
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl %edi, %eax
-; X32-NEXT: mull %ebp
+; X32-NEXT: mull %ebx
; X32-NEXT: addl %ecx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movzbl %bl, %eax
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, %edx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: movl %edi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: mull %ebx
; X32-NEXT: movl %edx, %ecx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl %eax, %ebp
-; X32-NEXT: addl %ecx, %ebp
-; X32-NEXT: adcl $0, %esi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: mull %ebx
+; X32-NEXT: movl %edx, %ebp
+; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: addl %ecx, %ebx
+; X32-NEXT: adcl $0, %ebp
; X32-NEXT: movl %edi, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %edi
-; X32-NEXT: addl %ebp, %eax
+; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %esi, %edi
-; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: adcl %ebp, %edi
+; X32-NEXT: setb %bl
+; X32-NEXT: movl %esi, %eax
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %eax, %esi
-; X32-NEXT: addl %edi, %esi
-; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: addl %edi, %ecx
+; X32-NEXT: movzbl %bl, %eax
; X32-NEXT: adcl %eax, %edx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
@@ -2772,38 +2768,39 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl 88(%eax), %edi
; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: mull %edi
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl %esi, %eax
; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, %ebp
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X32-NEXT: adcl $0, %ebp
+; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: movl %eax, %esi
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: adcl $0, %ebx
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl 92(%eax), %edi
-; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %ebx, %eax
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: adcl %ebp, %ecx
+; X32-NEXT: movl %edx, %ebp
+; X32-NEXT: addl %esi, %eax
+; X32-NEXT: movl %eax, %esi
+; X32-NEXT: adcl %ebx, %ebp
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %edi
-; X32-NEXT: movl %eax, %ebp
-; X32-NEXT: addl %ecx, %ebp
+; X32-NEXT: movl %edi, %ebx
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl %ebp, %edi
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, %edx
-; X32-NEXT: addl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl $0, %ebp
+; X32-NEXT: addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl $0, %edi
; X32-NEXT: adcl $0, %edx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
@@ -2816,26 +2813,26 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: adcl $0, %esi
; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: movl %edi, %ecx
-; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, %edi
-; X32-NEXT: addl %ebx, %eax
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: adcl %esi, %edi
+; X32-NEXT: movl %ebx, %ecx
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: mull %ebx
+; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: addl %ebp, %eax
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: adcl %esi, %ebx
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %ecx
-; X32-NEXT: addl %edi, %eax
+; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: addl %ebp, %edi
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: addl %edi, %ebx
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
; X32-NEXT: adcl %ecx, %eax
; X32-NEXT: adcl $0, %edx
@@ -2848,15 +2845,16 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: adcl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: adcl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl %edi, %edx
+; X32-NEXT: movl %ebx, %edx
; X32-NEXT: adcl $0, %edx
-; X32-NEXT: adcl $0, %ebx
+; X32-NEXT: movl %ebp, %ecx
+; X32-NEXT: adcl $0, %ecx
; X32-NEXT: adcl $0, %eax
; X32-NEXT: adcl $0, %esi
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
@@ -2866,130 +2864,130 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: movl %ebx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: movl %edx, %ebp
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
-; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: movl %ecx, %eax
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %esi
; X32-NEXT: movl %eax, %edi
-; X32-NEXT: addl %ecx, %edi
+; X32-NEXT: addl %ebp, %edi
; X32-NEXT: adcl $0, %esi
; X32-NEXT: movl %ebx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: mull %ebx
-; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: movl %edx, %ebp
; X32-NEXT: addl %edi, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %esi, %ecx
+; X32-NEXT: adcl %esi, %ebp
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: movl %ecx, %eax
; X32-NEXT: mull %ebx
-; X32-NEXT: addl %ecx, %eax
+; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, %edx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: mull %esi
+; X32-NEXT: mull %edi
; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: mull %edi
+; X32-NEXT: movl %edx, %edi
; X32-NEXT: movl %eax, %ebp
-; X32-NEXT: addl %ecx, %ebp
-; X32-NEXT: adcl $0, %esi
+; X32-NEXT: addl %esi, %ebp
+; X32-NEXT: adcl $0, %edi
; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: mull %ebx
+; X32-NEXT: movl %edx, %esi
; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %esi, %ebx
+; X32-NEXT: adcl %edi, %esi
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: mull %ecx
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: addl %ebx, %ecx
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: mull %ebx
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl %esi, %edi
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, %edx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: mull %ebx
+; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: mull %ebx
-; X32-NEXT: movl %edx, %ebp
-; X32-NEXT: movl %eax, %edi
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; X32-NEXT: adcl $0, %ebp
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: mull %ebx
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %esi
-; X32-NEXT: addl %edi, %eax
-; X32-NEXT: movl %eax, %edi
-; X32-NEXT: adcl %ebp, %esi
+; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: adcl $0, %esi
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %ebp
+; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: adcl %esi, %ebp
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %ebx
-; X32-NEXT: movl %ebx, %ebp
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl %esi, %ebx
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: addl %ebp, %ecx
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, %edx
-; X32-NEXT: addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl $0, %ebx
+; X32-NEXT: addl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl $0, %ecx
; X32-NEXT: adcl $0, %edx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: movl %edi, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edx, %ebx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl %eax, %edi
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl %ebx, %ebp
; X32-NEXT: adcl $0, %esi
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: movl %ebp, %ecx
-; X32-NEXT: mull %ebp
-; X32-NEXT: movl %edx, %ebp
-; X32-NEXT: addl %edi, %eax
-; X32-NEXT: movl %eax, %edi
-; X32-NEXT: adcl %esi, %ebp
+; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: mull %ebx
+; X32-NEXT: movl %edx, %edi
+; X32-NEXT: addl %ebp, %eax
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: adcl %esi, %edi
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %ecx
-; X32-NEXT: addl %ebp, %eax
-; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
-; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: addl %ebx, %ecx
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload
-; X32-NEXT: adcl %esi, %eax
-; X32-NEXT: movl %eax, %esi
-; X32-NEXT: adcl $0, %edx
+; X32-NEXT: mull %ebx
+; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: addl %edi, %ebx
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X32-NEXT: movl %edx, %edi
+; X32-NEXT: adcl %eax, %edi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-NEXT: addl %ecx, %edx
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X32-NEXT: adcl %eax, %ebx
+; X32-NEXT: adcl $0, %edi
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
@@ -2999,14 +2997,14 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
-; X32-NEXT: adcl %eax, %ecx
-; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %eax, %edx
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl $0, %ebp
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl $0, %ebx
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, %edi
; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl $0, %esi
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl $0, %edx
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: movl %edi, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
@@ -3041,7 +3039,7 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: mull %edi
; X32-NEXT: movl %edx, %ecx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: movl (%esp), %esi # 4-byte Reload
; X32-NEXT: movl %esi, %eax
; X32-NEXT: mull %edi
; X32-NEXT: movl %edx, %ebx
@@ -3088,7 +3086,7 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: movl %eax, %esi
; X32-NEXT: adcl %ebp, %ebx
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl (%esp), %eax # 4-byte Reload
; X32-NEXT: mull %edi
; X32-NEXT: movl %edx, %ebp
; X32-NEXT: movl %eax, %edi
@@ -3137,56 +3135,52 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: adcl $0, %edx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl %esi, %ecx
-; X32-NEXT: imull %eax, %ecx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
+; X32-NEXT: imull %eax, %esi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: mull %ebx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: addl %ecx, %edx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
-; X32-NEXT: imull %ebp, %esi
-; X32-NEXT: addl %edx, %esi
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: imull {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: addl %esi, %ebx
+; X32-NEXT: addl %edx, %ebx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl %eax, %esi
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: imull %ebx, %esi
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: addl %esi, %edx
+; X32-NEXT: movl %eax, %edx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: imull %ecx, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: imull %edi, %esi
-; X32-NEXT: addl %edx, %esi
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: imull %esi, %ebp
+; X32-NEXT: addl %edx, %ebp
+; X32-NEXT: mull %esi
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl %edx, %ebp
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: adcl %ebx, %ebp
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %esi, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: movl %edx, %ebp
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl %ecx, %eax
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %esi
; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl %ecx, %ebx
+; X32-NEXT: addl %ebp, %ebx
; X32-NEXT: adcl $0, %esi
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: mull %ebp
-; X32-NEXT: movl %ebp, %edi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %ebp
; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %esi, %ebp
-; X32-NEXT: setb %cl
+; X32-NEXT: setb %bl
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %edi
+; X32-NEXT: mull %ecx
; X32-NEXT: addl %ebp, %eax
-; X32-NEXT: movzbl %cl, %ecx
+; X32-NEXT: movzbl %bl, %ecx
; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: addl %edi, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
@@ -3196,50 +3190,48 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
; X32-NEXT: mull %ebp
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: addl %ecx, %edx
-; X32-NEXT: imull {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: movl (%esp), %ebx # 4-byte Reload
+; X32-NEXT: imull %ebx, %ebp
+; X32-NEXT: addl %ecx, %ebp
; X32-NEXT: addl %edx, %ebp
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: imull %ebx, %ecx
+; X32-NEXT: movl %eax, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: movl %eax, %esi
-; X32-NEXT: addl %ecx, %edx
+; X32-NEXT: imull %edi, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: imull %edi, %ecx
-; X32-NEXT: addl %edx, %ecx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %ebp, %ecx
-; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: imull %ecx, %esi
+; X32-NEXT: addl %edx, %esi
+; X32-NEXT: mull %ecx
+; X32-NEXT: addl %edx, %esi
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %ebp, %esi
+; X32-NEXT: movl %ecx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl %edi, %eax
; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %ebp
; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: addl %esi, %ecx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: adcl $0, %ebp
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: mull %ebx
; X32-NEXT: movl %edx, %ebx
; X32-NEXT: movl %eax, %edi
; X32-NEXT: addl %ecx, %edi
; X32-NEXT: adcl %ebp, %ebx
; X32-NEXT: setb %cl
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %esi
+; X32-NEXT: mull (%esp) # 4-byte Folded Reload
; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movzbl %cl, %ecx
; X32-NEXT: adcl %ecx, %edx
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT: adcl %esi, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
@@ -3248,79 +3240,79 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edi, (%esp) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X32-NEXT: movl 104(%esi), %ebx
-; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: movl 104(%ecx), %ebp
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: mull %esi
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: movl 108(%esi), %esi
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, %edi
-; X32-NEXT: movl %eax, %ebp
-; X32-NEXT: addl %ecx, %ebp
-; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: movl 108(%ecx), %ecx
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: mull %esi
+; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl %ebx, %edi
+; X32-NEXT: adcl $0, %esi
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: mull %ebx
-; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %ebp, %eax
+; X32-NEXT: movl %edx, %ebp
+; X32-NEXT: addl %edi, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %ecx
+; X32-NEXT: adcl %esi, %ebp
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl %ecx, %eax
; X32-NEXT: mull %ebx
-; X32-NEXT: addl %ecx, %eax
+; X32-NEXT: movl %ebx, %edi
+; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, %edx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X32-NEXT: movl 96(%esi), %eax
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: movl 96(%ecx), %esi
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: mull %ebx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: movl 100(%esi), %eax
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl 100(%ecx), %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: mull %edi
+; X32-NEXT: mull %ebx
; X32-NEXT: movl %edx, %ebp
; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl %ecx, %ebx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: adcl $0, %ebp
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: mull %edi
+; X32-NEXT: movl %edx, %ecx
; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %ebp, %esi
+; X32-NEXT: adcl %ebp, %ecx
; X32-NEXT: setb %bl
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
; X32-NEXT: movl %ebp, %eax
-; X32-NEXT: mull %ecx
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: addl %esi, %ecx
+; X32-NEXT: mull %edi
+; X32-NEXT: movl %eax, %esi
+; X32-NEXT: addl %ecx, %esi
; X32-NEXT: movzbl %bl, %eax
; X32-NEXT: adcl %eax, %edx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: movl %edi, %esi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: movl %ecx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: mull %edi
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
@@ -3331,32 +3323,31 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: movl %eax, %edi
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: adcl $0, %ebx
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %ebp
; X32-NEXT: addl %edi, %eax
; X32-NEXT: movl %eax, %edi
; X32-NEXT: adcl %ebx, %ebp
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %esi, %ebx
-; X32-NEXT: movl %eax, %esi
-; X32-NEXT: addl %ebp, %esi
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: addl %ebp, %ecx
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
-; X32-NEXT: adcl %eax, %edx
-; X32-NEXT: addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl %eax, %ebx
+; X32-NEXT: addl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl $0, %esi
-; X32-NEXT: adcl $0, %edx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl $0, %ecx
+; X32-NEXT: adcl $0, %ebx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: movl %esi, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: mull %edi
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
@@ -3367,21 +3358,21 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: movl %eax, %ebp
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: movl %ebx, %ecx
-; X32-NEXT: mull %ebx
-; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X32-NEXT: movl %edx, %esi
; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movl %eax, %ebp
-; X32-NEXT: adcl %edi, %ebx
+; X32-NEXT: adcl %edi, %esi
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %ecx
-; X32-NEXT: addl %ebx, %eax
-; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
-; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: addl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: mull %edi
+; X32-NEXT: addl %esi, %eax
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload
+; X32-NEXT: adcl %esi, %edx
+; X32-NEXT: addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl %ebx, %ebp
; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
; X32-NEXT: adcl %ecx, %eax
@@ -3389,101 +3380,98 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: adcl $0, %edx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT: movl 112(%ecx), %edi
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: imull %edi, %esi
-; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl 112(%ecx), %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: imull %eax, %edi
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: mull %ebx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: addl %esi, %edx
; X32-NEXT: movl 116(%ecx), %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: imull %eax, %ebx
+; X32-NEXT: addl %edi, %ebx
; X32-NEXT: addl %edx, %ebx
; X32-NEXT: movl 120(%ecx), %eax
-; X32-NEXT: movl %eax, %esi
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: imull %ecx, %esi
+; X32-NEXT: movl %ecx, %edx
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: imull %esi, %ecx
+; X32-NEXT: movl 124(%edx), %edi
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: imull %ebp, %edi
+; X32-NEXT: addl %ecx, %edi
; X32-NEXT: mull %ebp
-; X32-NEXT: addl %esi, %edx
-; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X32-NEXT: movl 124(%esi), %esi
-; X32-NEXT: imull %ebp, %esi
-; X32-NEXT: addl %edx, %esi
+; X32-NEXT: addl %edx, %edi
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %ebx, %esi
+; X32-NEXT: adcl %ebx, %edi
; X32-NEXT: movl %ebp, %eax
-; X32-NEXT: mull %edi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %ebp
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %esi
; X32-NEXT: movl %eax, %ebx
; X32-NEXT: addl %ebp, %ebx
-; X32-NEXT: adcl $0, %ecx
+; X32-NEXT: adcl $0, %esi
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %ebp
; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %ecx, %ebp
+; X32-NEXT: adcl %esi, %ebp
; X32-NEXT: setb %bl
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %edi
+; X32-NEXT: mull %ecx
; X32-NEXT: addl %ebp, %eax
-; X32-NEXT: movzbl %bl, %ecx
-; X32-NEXT: adcl %ecx, %edx
+; X32-NEXT: movzbl %bl, %esi
+; X32-NEXT: adcl %esi, %edx
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %esi, %edx
+; X32-NEXT: adcl %edi, %edx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: imull %eax, %esi
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
-; X32-NEXT: mull %ebp
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: addl %esi, %edx
-; X32-NEXT: imull {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
-; X32-NEXT: addl %edx, %ebp
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl %eax, %edi
+; X32-NEXT: imull %eax, %ecx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: imull %ebx, %edi
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: mull %ecx
-; X32-NEXT: movl %eax, %esi
-; X32-NEXT: addl %edi, %edx
-; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: mull %ebx
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: imull {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: addl %ecx, %ebx
+; X32-NEXT: addl %edx, %ebx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl %eax, %edx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: imull %esi, %edx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: imull %eax, %ecx
+; X32-NEXT: imull %edi, %ecx
; X32-NEXT: addl %edx, %ecx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %ebp, %ecx
-; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %eax, %esi
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: mull %ecx
+; X32-NEXT: mull %edi
+; X32-NEXT: addl %edx, %ecx
+; X32-NEXT: addl %ebp, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %ebx, %ecx
+; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: mull %edi
; X32-NEXT: movl %edx, %ebp
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: mull %ecx
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: mull %edi
; X32-NEXT: movl %edx, %edi
; X32-NEXT: movl %eax, %ebx
; X32-NEXT: addl %ebp, %ebx
; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %ebp
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: addl %ebx, %ecx
+; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %edi, %ebp
; X32-NEXT: setb %bl
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
@@ -3492,9 +3480,10 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: movzbl %bl, %edi
; X32-NEXT: adcl %edi, %edx
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT: adcl %ecx, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
@@ -3516,7 +3505,7 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: adcl (%esp), %ebx # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
@@ -3553,7 +3542,7 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: addl %ecx, %edi
; X32-NEXT: adcl $0, %esi
; X32-NEXT: movl %ebp, %eax
-; X32-NEXT: movl (%esp), %ebp # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
; X32-NEXT: mull %ebp
; X32-NEXT: movl %edx, %ecx
; X32-NEXT: addl %edi, %eax
@@ -3644,7 +3633,7 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %eax, (%esp) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %esi
@@ -3664,7 +3653,7 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: addl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: addl %ebx, (%esp) # 4-byte Folded Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
@@ -3689,7 +3678,7 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: addl %ecx, %edi
; X32-NEXT: adcl $0, %esi
; X32-NEXT: movl %ebp, %eax
-; X32-NEXT: movl (%esp), %ebp # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
; X32-NEXT: mull %ebp
; X32-NEXT: movl %edx, %ecx
; X32-NEXT: addl %edi, %eax
@@ -3719,7 +3708,7 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: addl %edi, %ebp
; X32-NEXT: adcl $0, %esi
; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: movl (%esp), %edi # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: mull %edi
; X32-NEXT: movl %edx, %ebx
; X32-NEXT: addl %ebp, %eax
@@ -3814,7 +3803,7 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl $0, (%esp) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
@@ -3959,7 +3948,7 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: adcl $0, %ecx
; X32-NEXT: adcl $0, %eax
; X32-NEXT: adcl $0, %esi
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT: addl (%esp), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
@@ -3994,7 +3983,7 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movzbl %bl, %eax
; X32-NEXT: adcl %eax, %edx
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edx, (%esp) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: movl %esi, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
@@ -4017,7 +4006,6 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: adcl %ebx, %esi
; X32-NEXT: setb %bl
; X32-NEXT: movl %edi, %eax
-; X32-NEXT: movl %edi, %ebp
; X32-NEXT: mull %ecx
; X32-NEXT: movl %eax, %ecx
; X32-NEXT: addl %esi, %ecx
@@ -4027,18 +4015,18 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl $0, (%esp) # 4-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: mull %esi
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %ebp, %eax
-; X32-NEXT: mull %edi
+; X32-NEXT: movl %edi, %eax
+; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %ebp
; X32-NEXT: movl %eax, %edi
-; X32-NEXT: addl %esi, %edi
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: adcl $0, %ebp
; X32-NEXT: movl %ebx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
@@ -4060,9 +4048,9 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: adcl $0, %ebp
; X32-NEXT: adcl $0, %edx
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT: adcl (%esp), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: setb (%esp) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: movl %ecx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
@@ -4093,7 +4081,7 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: addl %ebp, %ecx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X32-NEXT: movzbl (%esp), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, %ebx
; X32-NEXT: adcl $0, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
@@ -4120,7 +4108,7 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: movl %esi, %eax
; X32-NEXT: mull %edi
; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %eax, (%esp) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
; X32-NEXT: movl %ebp, %eax
; X32-NEXT: mull %edi
@@ -4152,7 +4140,7 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: mull %edi
; X32-NEXT: movl %edx, %ecx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl (%esp), %ebx # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: movl %ebx, %eax
; X32-NEXT: mull %edi
; X32-NEXT: movl %edx, %edi
@@ -4173,14 +4161,14 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: addl %esi, %edi
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, %edx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: addl (%esp), %edi # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl 104(%eax), %esi
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %esi, (%esp) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: movl %ecx, %eax
; X32-NEXT: mull %esi
@@ -4202,7 +4190,7 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: movl %eax, %ebx
; X32-NEXT: adcl %ebp, %ecx
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl (%esp), %eax # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %ebp
; X32-NEXT: movl %eax, %esi
@@ -4219,7 +4207,7 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: movl (%esp), %edi # 4-byte Reload
; X32-NEXT: mull %edi
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
@@ -4251,56 +4239,53 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: adcl $0, %edx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl %edi, %esi
-; X32-NEXT: imull %eax, %esi
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: imull %eax, %edi
+; X32-NEXT: movl (%esp), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: addl %esi, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: imull %ebx, %ecx
+; X32-NEXT: addl %edi, %ecx
; X32-NEXT: addl %edx, %ecx
-; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %ecx, (%esp) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl %eax, %esi
+; X32-NEXT: movl %eax, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: imull %edi, %esi
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
-; X32-NEXT: mull %ebp
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: addl %esi, %edx
+; X32-NEXT: imull %edi, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: movl %ebp, %eax
-; X32-NEXT: imull %ebp, %esi
-; X32-NEXT: addl %edx, %esi
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: imull %ecx, %esi
+; X32-NEXT: addl %edx, %esi
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %ebp
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl %edx, %esi
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: adcl (%esp), %esi # 4-byte Folded Reload
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: mull %esi
+; X32-NEXT: movl %edx, (%esp) # 4-byte Spill
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edi, %eax
+; X32-NEXT: mull %esi
+; X32-NEXT: movl %edx, %esi
; X32-NEXT: movl %eax, %edi
-; X32-NEXT: addl %ebp, %edi
+; X32-NEXT: addl (%esp), %edi # 4-byte Folded Reload
; X32-NEXT: adcl $0, %esi
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl %ecx, %eax
; X32-NEXT: mull %ebx
-; X32-NEXT: movl %ebx, %ebp
; X32-NEXT: movl %edx, %ecx
; X32-NEXT: addl %edi, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %esi, %ecx
; X32-NEXT: setb %bl
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %ebp
+; X32-NEXT: mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
; X32-NEXT: addl %ecx, %eax
; X32-NEXT: movzbl %bl, %ecx
; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
@@ -4309,52 +4294,51 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: imull %eax, %ecx
; X32-NEXT: movl 120(%edi), %esi
-; X32-NEXT: movl %edi, %ebx
; X32-NEXT: mull %esi
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: addl %ecx, %edx
-; X32-NEXT: imull (%esp), %esi # 4-byte Folded Reload
+; X32-NEXT: movl %eax, (%esp) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: imull %ebp, %esi
+; X32-NEXT: addl %ecx, %esi
; X32-NEXT: addl %edx, %esi
-; X32-NEXT: movl 112(%edi), %edi
-; X32-NEXT: movl 116(%ebx), %ebp
+; X32-NEXT: movl 112(%edi), %ecx
+; X32-NEXT: movl 116(%edi), %edi
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: imull %ebp, %ebx
-; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: mull %edi
-; X32-NEXT: addl %ebx, %edx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: imull %edi, %ecx
-; X32-NEXT: addl %edx, %ecx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %esi, %ecx
-; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: movl %eax, %edx
+; X32-NEXT: imull %edi, %edx
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: imull %ecx, %ebx
+; X32-NEXT: addl %edx, %ebx
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %esi
+; X32-NEXT: addl %edx, %ebx
+; X32-NEXT: addl (%esp), %eax # 4-byte Folded Reload
+; X32-NEXT: movl %eax, (%esp) # 4-byte Spill
+; X32-NEXT: adcl %esi, %ebx
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: mull %esi
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %ebp, %eax
-; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: movl %eax, %ebp
-; X32-NEXT: addl %esi, %ebp
-; X32-NEXT: adcl $0, %ebx
; X32-NEXT: movl %edi, %eax
-; X32-NEXT: movl (%esp), %esi # 4-byte Reload
; X32-NEXT: mull %esi
+; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: movl %eax, %esi
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: adcl $0, %ebx
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: mull %ebp
; X32-NEXT: movl %edx, %ecx
; X32-NEXT: movl %eax, %edi
-; X32-NEXT: addl %ebp, %edi
+; X32-NEXT: addl %esi, %edi
; X32-NEXT: adcl %ebx, %ecx
; X32-NEXT: setb %bl
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %esi
+; X32-NEXT: mull %ebp
; X32-NEXT: addl %ecx, %eax
; X32-NEXT: movzbl %bl, %ecx
; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: addl (%esp), %eax # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
@@ -4364,7 +4348,7 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edi, (%esp) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
@@ -4393,7 +4377,7 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: movl %ebp, %eax
; X32-NEXT: mull %ecx
; X32-NEXT: addl %esi, %eax
-; X32-NEXT: movl %eax, (%esp) # 4-byte Spill
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movzbl %bl, %eax
; X32-NEXT: adcl %eax, %edx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
@@ -4427,7 +4411,7 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl $0, (%esp) # 4-byte Folded Spill
+; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: movl %ecx, %eax
@@ -4461,9 +4445,9 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, %ebp
; X32-NEXT: adcl $0, %ecx
-; X32-NEXT: addl (%esp), %ebp # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: setb (%esp) # 1-byte Folded Spill
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: movl %esi, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
@@ -4492,101 +4476,98 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: addl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl %ecx, %ebx
; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movzbl (%esp), %ecx # 1-byte Folded Reload
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
; X32-NEXT: adcl %ecx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, %edx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl %edi, %esi
-; X32-NEXT: imull %eax, %esi
+; X32-NEXT: imull %eax, %edi
+; X32-NEXT: movl %eax, %ebx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %eax, (%esp) # 4-byte Spill
-; X32-NEXT: addl %esi, %edx
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
; X32-NEXT: imull %ebp, %ecx
+; X32-NEXT: addl %edi, %ecx
; X32-NEXT: addl %edx, %ecx
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl %eax, %edi
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: imull %ebx, %edi
+; X32-NEXT: movl %eax, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: addl %edi, %edx
+; X32-NEXT: imull %esi, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: imull %esi, %edi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: imull %ecx, %edi
; X32-NEXT: addl %edx, %edi
-; X32-NEXT: addl (%esp), %ecx # 4-byte Folded Reload
-; X32-NEXT: movl %ecx, (%esp) # 4-byte Spill
+; X32-NEXT: mull %ecx
+; X32-NEXT: addl %edx, %edi
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: mull %ebx
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: mull %ecx
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: mull %ebx
; X32-NEXT: movl %edx, %edi
; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl %esi, %ebx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl %ecx, %eax
; X32-NEXT: mull %ebp
-; X32-NEXT: movl %ebp, %esi
; X32-NEXT: movl %edx, %ebp
; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %edi, %ebp
; X32-NEXT: setb %cl
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %esi
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movzbl %cl, %ecx
; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: addl (%esp), %eax # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: imull %esi, %ecx
-; X32-NEXT: movl %esi, %eax
+; X32-NEXT: imull %eax, %ecx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: mull %ebx
-; X32-NEXT: movl %eax, (%esp) # 4-byte Spill
-; X32-NEXT: addl %ecx, %edx
+; X32-NEXT: movl %eax, %edi
; X32-NEXT: imull {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: addl %ecx, %ebx
; X32-NEXT: addl %edx, %ebx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: imull %edi, %ecx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
-; X32-NEXT: mull %ebp
-; X32-NEXT: addl %ecx, %edx
+; X32-NEXT: movl %eax, %edx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: imull %esi, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
; X32-NEXT: imull %ebp, %ecx
; X32-NEXT: addl %edx, %ecx
-; X32-NEXT: addl (%esp), %eax # 4-byte Folded Reload
+; X32-NEXT: mull %ebp
+; X32-NEXT: addl %edx, %ecx
+; X32-NEXT: addl %edi, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %ebx, %ecx
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %ebp, %eax
-; X32-NEXT: movl %ebp, %ebx
-; X32-NEXT: mull %esi
+; X32-NEXT: movl %ebp, %edi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %ebp
-; X32-NEXT: movl %eax, (%esp) # 4-byte Spill
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: mull %esi
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %esi
; X32-NEXT: movl %eax, %ecx
; X32-NEXT: addl %ebp, %ecx
; X32-NEXT: adcl $0, %esi
-; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl %edi, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
; X32-NEXT: mull %ebp
; X32-NEXT: movl %edx, %edi
@@ -4602,7 +4583,7 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: adcl %ecx, %esi
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: movl (%esp), %ecx # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
@@ -4624,9 +4605,9 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
-; X32-NEXT: movl %ebp, (%esp) # 4-byte Spill
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: adcl (%esp), %ebp # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
@@ -4637,9 +4618,9 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl (%esp), %ecx # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: movl %ecx, (%esp) # 4-byte Spill
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
@@ -4677,9 +4658,9 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: movl (%esp), %edx # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, (%esp) # 4-byte Spill
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
@@ -4717,7 +4698,7 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl (%esp), %edi # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
@@ -4804,201 +4785,200 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; X64-NEXT: pushq %rbx
; X64-NEXT: subq $240, %rsp
; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT: movq %rsi, %rbp
; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT: movq 40(%rdi), %r12
-; X64-NEXT: movq 32(%rdi), %r14
-; X64-NEXT: movq 56(%rdi), %r15
+; X64-NEXT: movq 40(%rdi), %rbx
+; X64-NEXT: movq 32(%rdi), %r12
+; X64-NEXT: movq 56(%rdi), %r14
; X64-NEXT: movq 48(%rdi), %r10
; X64-NEXT: movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: movq (%rsi), %r11
-; X64-NEXT: movq 8(%rsi), %r8
+; X64-NEXT: movq 8(%rsi), %rcx
+; X64-NEXT: movq %rsi, %r13
; X64-NEXT: movq %r10, %rax
; X64-NEXT: mulq %r11
-; X64-NEXT: movq %rdx, %rcx
-; X64-NEXT: movq %rax, %rdi
-; X64-NEXT: movq %r15, %rax
-; X64-NEXT: movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT: mulq %r11
; X64-NEXT: movq %rdx, %rsi
+; X64-NEXT: movq %rax, %rbp
+; X64-NEXT: movq %r14, %rax
+; X64-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT: mulq %r11
+; X64-NEXT: movq %rdx, %rdi
; X64-NEXT: movq %rax, %r9
-; X64-NEXT: addq %rcx, %r9
-; X64-NEXT: adcq $0, %rsi
+; X64-NEXT: addq %rsi, %r9
+; X64-NEXT: adcq $0, %rdi
; X64-NEXT: movq %r10, %rax
-; X64-NEXT: mulq %r8
+; X64-NEXT: mulq %rcx
; X64-NEXT: movq %rdx, %r10
-; X64-NEXT: movq %rax, %rcx
-; X64-NEXT: addq %r9, %rcx
-; X64-NEXT: adcq %rsi, %r10
+; X64-NEXT: movq %rax, %r8
+; X64-NEXT: addq %r9, %r8
+; X64-NEXT: adcq %rdi, %r10
; X64-NEXT: setb %al
; X64-NEXT: movzbl %al, %r9d
-; X64-NEXT: movq %r15, %rax
-; X64-NEXT: mulq %r8
-; X64-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT: movq %rdx, %r13
-; X64-NEXT: movq %rax, %rsi
-; X64-NEXT: addq %r10, %rsi
-; X64-NEXT: adcq %r9, %r13
; X64-NEXT: movq %r14, %rax
+; X64-NEXT: mulq %rcx
+; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT: movq %rdx, %r15
+; X64-NEXT: movq %rax, %rdi
+; X64-NEXT: addq %r10, %rdi
+; X64-NEXT: adcq %r9, %r15
+; X64-NEXT: movq %r12, %rax
; X64-NEXT: movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: mulq %r11
; X64-NEXT: movq %rdx, %r9
; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT: movq %r12, %rax
+; X64-NEXT: movq %rbx, %r14
+; X64-NEXT: movq %rbx, %rax
; X64-NEXT: mulq %r11
; X64-NEXT: movq %rdx, %r10
; X64-NEXT: movq %rax, %r11
; X64-NEXT: addq %r9, %r11
; X64-NEXT: adcq $0, %r10
-; X64-NEXT: movq %r14, %rax
-; X64-NEXT: mulq %r8
+; X64-NEXT: movq %r12, %rax
+; X64-NEXT: mulq %rcx
; X64-NEXT: movq %rdx, %r9
; X64-NEXT: addq %r11, %rax
; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: adcq %r10, %r9
; X64-NEXT: setb %r10b
-; X64-NEXT: movq %r12, %rax
-; X64-NEXT: mulq %r8
+; X64-NEXT: movq %rbx, %rax
+; X64-NEXT: mulq %rcx
; X64-NEXT: movq %rdx, %r11
; X64-NEXT: movq %rax, %rbx
; X64-NEXT: addq %r9, %rbx
; X64-NEXT: movzbl %r10b, %eax
; X64-NEXT: adcq %rax, %r11
-; X64-NEXT: addq %rdi, %rbx
-; X64-NEXT: adcq %rcx, %r11
-; X64-NEXT: adcq $0, %rsi
-; X64-NEXT: adcq $0, %r13
-; X64-NEXT: movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT: movq 16(%rbp), %r15
-; X64-NEXT: movq %r14, %r10
-; X64-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT: movq %r14, %rax
-; X64-NEXT: mulq %r15
-; X64-NEXT: movq %rdx, %rdi
-; X64-NEXT: movq %rax, %r8
-; X64-NEXT: movq %r12, %rax
-; X64-NEXT: movq %r12, %rcx
+; X64-NEXT: addq %rbp, %rbx
+; X64-NEXT: adcq %r8, %r11
+; X64-NEXT: adcq $0, %rdi
+; X64-NEXT: adcq $0, %r15
+; X64-NEXT: movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT: movq 16(%r13), %rcx
; X64-NEXT: movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT: mulq %r15
+; X64-NEXT: movq %r12, %rax
+; X64-NEXT: mulq %rcx
+; X64-NEXT: movq %rdx, %r8
+; X64-NEXT: movq %rax, %rsi
+; X64-NEXT: movq %r14, %rax
+; X64-NEXT: movq %r14, %rbp
+; X64-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT: mulq %rcx
; X64-NEXT: movq %rdx, %r9
; X64-NEXT: movq %rax, %r14
-; X64-NEXT: addq %rdi, %r14
+; X64-NEXT: addq %r8, %r14
; X64-NEXT: adcq $0, %r9
-; X64-NEXT: movq 24(%rbp), %rdi
-; X64-NEXT: movq %r10, %rax
-; X64-NEXT: mulq %rdi
-; X64-NEXT: movq %rdi, %rbp
+; X64-NEXT: movq 24(%r13), %r13
+; X64-NEXT: movq %r12, %rax
+; X64-NEXT: mulq %r13
; X64-NEXT: movq %rdx, %r12
; X64-NEXT: addq %r14, %rax
; X64-NEXT: movq %rax, %r14
; X64-NEXT: adcq %r9, %r12
; X64-NEXT: setb %r10b
-; X64-NEXT: movq %rcx, %rax
-; X64-NEXT: mulq %rdi
-; X64-NEXT: movq %rdx, %rdi
+; X64-NEXT: movq %rbp, %rax
+; X64-NEXT: mulq %r13
+; X64-NEXT: movq %rdx, %r8
; X64-NEXT: movq %rax, %r9
; X64-NEXT: addq %r12, %r9
; X64-NEXT: movzbl %r10b, %eax
-; X64-NEXT: adcq %rax, %rdi
-; X64-NEXT: addq %rbx, %r8
-; X64-NEXT: movq %r8, (%rsp) # 8-byte Spill
+; X64-NEXT: adcq %rax, %r8
+; X64-NEXT: addq %rbx, %rsi
+; X64-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: adcq %r11, %r14
-; X64-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT: movq %r14, (%rsp) # 8-byte Spill
; X64-NEXT: adcq $0, %r9
-; X64-NEXT: adcq $0, %rdi
-; X64-NEXT: addq %rsi, %r9
-; X64-NEXT: adcq %r13, %rdi
+; X64-NEXT: adcq $0, %r8
+; X64-NEXT: addq %rdi, %r9
+; X64-NEXT: adcq %r15, %r8
; X64-NEXT: setb %r10b
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload
; X64-NEXT: movq %rbx, %rax
-; X64-NEXT: movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT: mulq %r15
-; X64-NEXT: movq %rdx, %rcx
+; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT: mulq %rcx
+; X64-NEXT: movq %rdx, %rsi
; X64-NEXT: movq %rax, %r14
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload
; X64-NEXT: movq %r12, %rax
-; X64-NEXT: mulq %r15
-; X64-NEXT: movq %rdx, %rsi
+; X64-NEXT: mulq %rcx
+; X64-NEXT: movq %rdx, %rdi
; X64-NEXT: movq %rax, %r11
-; X64-NEXT: addq %rcx, %r11
-; X64-NEXT: adcq $0, %rsi
+; X64-NEXT: addq %rsi, %r11
+; X64-NEXT: adcq $0, %rdi
; X64-NEXT: movq %rbx, %rax
-; X64-NEXT: movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT: mulq %rbp
-; X64-NEXT: movq %rdx, %rcx
+; X64-NEXT: movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT: mulq %r13
+; X64-NEXT: movq %rdx, %rsi
; X64-NEXT: addq %r11, %rax
; X64-NEXT: movq %rax, %r11
-; X64-NEXT: adcq %rsi, %rcx
-; X64-NEXT: setb %sil
+; X64-NEXT: adcq %rdi, %rsi
+; X64-NEXT: setb %dil
; X64-NEXT: movq %r12, %rax
-; X64-NEXT: mulq %rbp
-; X64-NEXT: addq %rcx, %rax
-; X64-NEXT: movzbl %sil, %ecx
-; X64-NEXT: adcq %rcx, %rdx
+; X64-NEXT: mulq %r13
+; X64-NEXT: addq %rsi, %rax
+; X64-NEXT: movzbl %dil, %esi
+; X64-NEXT: adcq %rsi, %rdx
; X64-NEXT: addq %r9, %r14
; X64-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT: adcq %rdi, %r11
+; X64-NEXT: adcq %r8, %r11
; X64-NEXT: movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT: movzbl %r10b, %ecx
-; X64-NEXT: adcq %rcx, %rax
+; X64-NEXT: movzbl %r10b, %esi
+; X64-NEXT: adcq %rsi, %rax
; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: adcq $0, %rdx
; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
-; X64-NEXT: movq 16(%r8), %rsi
-; X64-NEXT: movq %rsi, %rax
-; X64-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT: movq 16(%rcx), %r10
+; X64-NEXT: movq %r10, %rax
+; X64-NEXT: movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload
; X64-NEXT: mulq %r13
; X64-NEXT: movq %rax, %r9
-; X64-NEXT: movq %rdx, %rcx
-; X64-NEXT: movq 24(%r8), %r14
+; X64-NEXT: movq %rdx, %rsi
+; X64-NEXT: movq 24(%rcx), %r14
; X64-NEXT: movq %r14, %rax
; X64-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: mulq %r13
-; X64-NEXT: movq %r13, %r15
+; X64-NEXT: movq %r13, %rbx
; X64-NEXT: movq %rdx, %rdi
-; X64-NEXT: movq %rax, %r11
-; X64-NEXT: addq %rcx, %r11
+; X64-NEXT: movq %rax, %r8
+; X64-NEXT: addq %rsi, %r8
; X64-NEXT: adcq $0, %rdi
-; X64-NEXT: movq %rsi, %rax
+; X64-NEXT: movq %r10, %rax
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload
; X64-NEXT: mulq %r13
-; X64-NEXT: movq %rdx, %rbx
+; X64-NEXT: movq %rdx, %r11
; X64-NEXT: movq %rax, %rsi
-; X64-NEXT: addq %r11, %rsi
-; X64-NEXT: adcq %rdi, %rbx
+; X64-NEXT: addq %r8, %rsi
+; X64-NEXT: adcq %rdi, %r11
; X64-NEXT: setb %r10b
; X64-NEXT: movq %r14, %rax
; X64-NEXT: mulq %r13
; X64-NEXT: movq %r13, %r12
-; X64-NEXT: movq %rdx, %rcx
-; X64-NEXT: movq %rax, %rdi
-; X64-NEXT: addq %rbx, %rdi
+; X64-NEXT: movq %rdx, %rbp
+; X64-NEXT: movq %rax, %r8
+; X64-NEXT: addq %r11, %r8
; X64-NEXT: movzbl %r10b, %eax
-; X64-NEXT: adcq %rax, %rcx
-; X64-NEXT: movq (%r8), %r13
+; X64-NEXT: adcq %rax, %rbp
+; X64-NEXT: movq (%rcx), %r13
; X64-NEXT: movq %r13, %rax
-; X64-NEXT: mulq %r15
+; X64-NEXT: mulq %rbx
; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: movq %rdx, %r11
-; X64-NEXT: movq 8(%r8), %rbp
-; X64-NEXT: movq %rbp, %rax
-; X64-NEXT: mulq %r15
+; X64-NEXT: movq 8(%rcx), %rdi
+; X64-NEXT: movq %rdi, %rax
+; X64-NEXT: mulq %rbx
; X64-NEXT: movq %rdx, %rbx
; X64-NEXT: movq %rax, %r14
; X64-NEXT: addq %r11, %r14
; X64-NEXT: adcq $0, %rbx
; X64-NEXT: movq %r13, %rax
-; X64-NEXT: movq %r12, %r11
+; X64-NEXT: movq %r12, %rcx
; X64-NEXT: mulq %r12
; X64-NEXT: movq %rdx, %r12
; X64-NEXT: addq %r14, %rax
; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: adcq %rbx, %r12
; X64-NEXT: setb %r10b
-; X64-NEXT: movq %rbp, %rax
-; X64-NEXT: mulq %r11
+; X64-NEXT: movq %rdi, %rax
+; X64-NEXT: mulq %rcx
; X64-NEXT: movq %rdx, %r11
; X64-NEXT: movq %rax, %rbx
; X64-NEXT: addq %r12, %rbx
@@ -5006,31 +4986,32 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; X64-NEXT: adcq %rax, %r11
; X64-NEXT: addq %r9, %rbx
; X64-NEXT: adcq %rsi, %r11
-; X64-NEXT: adcq $0, %rdi
-; X64-NEXT: adcq $0, %rcx
-; X64-NEXT: movq %r13, %r10
+; X64-NEXT: adcq $0, %r8
+; X64-NEXT: adcq $0, %rbp
+; X64-NEXT: movq %r13, %rcx
; X64-NEXT: movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: movq %r13, %rax
-; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
-; X64-NEXT: mulq %r8
+; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload
+; X64-NEXT: mulq %r15
; X64-NEXT: movq %rdx, %rsi
; X64-NEXT: movq %rax, %r13
-; X64-NEXT: movq %rbp, %rax
-; X64-NEXT: mulq %r8
+; X64-NEXT: movq %rdi, %rax
+; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT: mulq %r15
; X64-NEXT: movq %rdx, %r9
; X64-NEXT: movq %rax, %r14
; X64-NEXT: addq %rsi, %r14
; X64-NEXT: adcq $0, %r9
-; X64-NEXT: movq %r10, %rax
-; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload
-; X64-NEXT: mulq %r15
+; X64-NEXT: movq %rcx, %rax
+; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT: mulq %rcx
; X64-NEXT: movq %rdx, %r12
; X64-NEXT: addq %r14, %rax
; X64-NEXT: movq %rax, %r14
; X64-NEXT: adcq %r9, %r12
; X64-NEXT: setb %r10b
-; X64-NEXT: movq %rbp, %rax
-; X64-NEXT: mulq %r15
+; X64-NEXT: movq %rdi, %rax
+; X64-NEXT: mulq %rcx
; X64-NEXT: movq %rdx, %r9
; X64-NEXT: movq %rax, %rsi
; X64-NEXT: addq %r12, %rsi
@@ -5042,202 +5023,199 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; X64-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: adcq $0, %rsi
; X64-NEXT: adcq $0, %r9
-; X64-NEXT: addq %rdi, %rsi
-; X64-NEXT: adcq %rcx, %r9
+; X64-NEXT: addq %r8, %rsi
+; X64-NEXT: adcq %rbp, %r9
; X64-NEXT: setb %r10b
-; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
-; X64-NEXT: movq %rdi, %rax
-; X64-NEXT: mulq %r8
-; X64-NEXT: movq %rdx, %rcx
-; X64-NEXT: movq %rax, %r13
+; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT: movq %rcx, %rax
+; X64-NEXT: mulq %r15
+; X64-NEXT: movq %rdx, %rdi
+; X64-NEXT: movq %rax, %r14
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload
; X64-NEXT: movq %r12, %rax
-; X64-NEXT: mulq %r8
-; X64-NEXT: movq %rdx, %r11
-; X64-NEXT: movq %rax, %rbx
-; X64-NEXT: addq %rcx, %rbx
-; X64-NEXT: adcq $0, %r11
-; X64-NEXT: movq %rdi, %rax
-; X64-NEXT: movq %rdi, %r8
; X64-NEXT: mulq %r15
-; X64-NEXT: movq %rdx, %r14
+; X64-NEXT: movq %rdx, %r8
+; X64-NEXT: movq %rax, %r11
+; X64-NEXT: addq %rdi, %r11
+; X64-NEXT: adcq $0, %r8
+; X64-NEXT: movq %rcx, %rax
+; X64-NEXT: movq %rcx, %r15
+; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
+; X64-NEXT: mulq %rbp
+; X64-NEXT: movq %rdx, %rbx
; X64-NEXT: movq %rax, %rcx
-; X64-NEXT: addq %rbx, %rcx
-; X64-NEXT: adcq %r11, %r14
+; X64-NEXT: addq %r11, %rcx
+; X64-NEXT: adcq %r8, %rbx
; X64-NEXT: setb %r11b
; X64-NEXT: movq %r12, %rax
-; X64-NEXT: mulq %r15
-; X64-NEXT: addq %r14, %rax
+; X64-NEXT: mulq %rbp
+; X64-NEXT: addq %rbx, %rax
; X64-NEXT: movzbl %r11b, %edi
; X64-NEXT: adcq %rdi, %rdx
-; X64-NEXT: addq %rsi, %r13
+; X64-NEXT: addq %rsi, %r14
; X64-NEXT: adcq %r9, %rcx
; X64-NEXT: movzbl %r10b, %esi
; X64-NEXT: adcq %rsi, %rax
; X64-NEXT: adcq $0, %rdx
-; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload
-; X64-NEXT: movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload
+; X64-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload
; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT: adcq (%rsp), %rax # 8-byte Folded Reload
+; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Folded Reload
+; X64-NEXT: adcq (%rsp), %rdx # 8-byte Folded Reload
; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: adcq $0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
; X64-NEXT: adcq $0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
; X64-NEXT: adcq $0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
; X64-NEXT: adcq $0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
-; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
-; X64-NEXT: movq 32(%rcx), %rdi
-; X64-NEXT: movq %r8, %r10
-; X64-NEXT: movq %r8, %rax
-; X64-NEXT: mulq %rdi
+; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload
+; X64-NEXT: movq 32(%r10), %rcx
+; X64-NEXT: movq %r15, %rax
+; X64-NEXT: mulq %rcx
; X64-NEXT: movq %rdx, %rsi
-; X64-NEXT: movq %rax, %r8
+; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT: movq %r12, %r8
; X64-NEXT: movq %r12, %rax
-; X64-NEXT: mulq %rdi
-; X64-NEXT: movq %rdi, %r14
+; X64-NEXT: mulq %rcx
+; X64-NEXT: movq %rcx, %r14
; X64-NEXT: movq %rdx, %r9
; X64-NEXT: movq %rax, %r11
; X64-NEXT: addq %rsi, %r11
; X64-NEXT: adcq $0, %r9
-; X64-NEXT: movq 40(%rcx), %rsi
-; X64-NEXT: movq %r10, %rax
-; X64-NEXT: mulq %rsi
-; X64-NEXT: movq %rsi, %rdi
+; X64-NEXT: movq 40(%r10), %rcx
+; X64-NEXT: movq %r10, %rdi
+; X64-NEXT: movq %r15, %rax
+; X64-NEXT: mulq %rcx
+; X64-NEXT: movq %rcx, %r12
; X64-NEXT: movq %rdx, %rbx
; X64-NEXT: movq %rax, %rsi
; X64-NEXT: addq %r11, %rsi
; X64-NEXT: adcq %r9, %rbx
; X64-NEXT: setb %r10b
-; X64-NEXT: movq %r12, %rax
-; X64-NEXT: mulq %rdi
+; X64-NEXT: movq %r8, %rax
+; X64-NEXT: mulq %rcx
; X64-NEXT: movq %rdx, %r9
; X64-NEXT: movq %rax, %r11
; X64-NEXT: addq %rbx, %r11
; X64-NEXT: movzbl %r10b, %eax
; X64-NEXT: adcq %rax, %r9
-; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload
-; X64-NEXT: movq %r12, %rax
+; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT: movq %rcx, %rax
; X64-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: mulq %r14
; X64-NEXT: movq %rdx, %rbx
; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT: movq %rbp, %rax
+; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; X64-NEXT: movq %r8, %rax
; X64-NEXT: mulq %r14
; X64-NEXT: movq %rdx, %r14
; X64-NEXT: movq %rax, %r13
; X64-NEXT: addq %rbx, %r13
; X64-NEXT: adcq $0, %r14
-; X64-NEXT: movq %r12, %rax
-; X64-NEXT: movq %rdi, %rbx
-; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT: mulq %rdi
-; X64-NEXT: movq %rdx, %r10
+; X64-NEXT: movq %rcx, %rax
+; X64-NEXT: movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT: mulq %r12
+; X64-NEXT: movq %rdx, %r15
; X64-NEXT: addq %r13, %rax
; X64-NEXT: movq %rax, (%rsp) # 8-byte Spill
-; X64-NEXT: adcq %r14, %r10
-; X64-NEXT: setb %r15b
-; X64-NEXT: movq %rbp, %rax
-; X64-NEXT: movq %rbp, %rdi
-; X64-NEXT: mulq %rbx
+; X64-NEXT: adcq %r14, %r15
+; X64-NEXT: setb %r10b
+; X64-NEXT: movq %r8, %rax
+; X64-NEXT: mulq %r12
; X64-NEXT: movq %rdx, %rbx
; X64-NEXT: movq %rax, %r14
-; X64-NEXT: addq %r10, %r14
-; X64-NEXT: movzbl %r15b, %eax
+; X64-NEXT: addq %r15, %r14
+; X64-NEXT: movzbl %r10b, %eax
; X64-NEXT: adcq %rax, %rbx
-; X64-NEXT: addq %r8, %r14
+; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload
; X64-NEXT: adcq %rsi, %rbx
; X64-NEXT: adcq $0, %r11
; X64-NEXT: adcq $0, %r9
-; X64-NEXT: movq %rcx, %r8
-; X64-NEXT: movq 48(%rcx), %rcx
-; X64-NEXT: movq %r12, %r15
-; X64-NEXT: movq %r12, %rax
-; X64-NEXT: mulq %rcx
+; X64-NEXT: movq 48(%rdi), %r12
+; X64-NEXT: movq %rcx, %rax
+; X64-NEXT: mulq %r12
; X64-NEXT: movq %rdx, %rsi
; X64-NEXT: movq %rax, %rbp
-; X64-NEXT: movq %rdi, %rax
-; X64-NEXT: movq %rdi, %r12
-; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT: mulq %rcx
-; X64-NEXT: movq %rdx, %r10
+; X64-NEXT: movq %r8, %rax
+; X64-NEXT: mulq %r12
+; X64-NEXT: movq %rdx, %r15
; X64-NEXT: movq %rax, %r13
; X64-NEXT: addq %rsi, %r13
-; X64-NEXT: adcq $0, %r10
-; X64-NEXT: movq 56(%r8), %rsi
-; X64-NEXT: movq %r15, %rax
+; X64-NEXT: adcq $0, %r15
+; X64-NEXT: movq 56(%rdi), %rsi
+; X64-NEXT: movq %rcx, %rax
; X64-NEXT: mulq %rsi
-; X64-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT: movq %rdx, %r15
+; X64-NEXT: movq %rdx, %rcx
; X64-NEXT: movq %rax, %rdi
; X64-NEXT: addq %r13, %rdi
-; X64-NEXT: adcq %r10, %r15
-; X64-NEXT: setb %r8b
-; X64-NEXT: movq %r12, %rax
+; X64-NEXT: adcq %r15, %rcx
+; X64-NEXT: setb %r10b
+; X64-NEXT: movq %r8, %rax
; X64-NEXT: mulq %rsi
+; X64-NEXT: movq %rsi, %r8
; X64-NEXT: movq %rdx, %rsi
; X64-NEXT: movq %rax, %r13
-; X64-NEXT: addq %r15, %r13
-; X64-NEXT: movzbl %r8b, %eax
+; X64-NEXT: addq %rcx, %r13
+; X64-NEXT: movzbl %r10b, %eax
; X64-NEXT: adcq %rax, %rsi
; X64-NEXT: addq %r14, %rbp
-; X64-NEXT: movq %rbp, %r8
+; X64-NEXT: movq %rbp, %r10
; X64-NEXT: adcq %rbx, %rdi
; X64-NEXT: adcq $0, %r13
; X64-NEXT: adcq $0, %rsi
; X64-NEXT: addq %r11, %r13
; X64-NEXT: adcq %r9, %rsi
-; X64-NEXT: setb %bpl
+; X64-NEXT: setb {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Folded Spill
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload
; X64-NEXT: movq %r14, %rax
-; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT: mulq %rcx
-; X64-NEXT: movq %rdx, %r9
-; X64-NEXT: movq %rax, %r12
+; X64-NEXT: movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT: mulq %r12
+; X64-NEXT: movq %rdx, %rcx
+; X64-NEXT: movq %rax, %rbp
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload
; X64-NEXT: movq %r11, %rax
-; X64-NEXT: mulq %rcx
-; X64-NEXT: movq %rdx, %r10
+; X64-NEXT: mulq %r12
+; X64-NEXT: movq %rdx, %r9
; X64-NEXT: movq %rax, %rbx
-; X64-NEXT: addq %r9, %rbx
-; X64-NEXT: adcq $0, %r10
+; X64-NEXT: addq %rcx, %rbx
+; X64-NEXT: adcq $0, %r9
; X64-NEXT: movq %r14, %rax
-; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
-; X64-NEXT: mulq %rcx
-; X64-NEXT: movq %rdx, %r15
-; X64-NEXT: movq %rax, %r9
-; X64-NEXT: addq %rbx, %r9
-; X64-NEXT: adcq %r10, %r15
-; X64-NEXT: setb %r10b
+; X64-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT: mulq %r8
+; X64-NEXT: movq %rdx, %rcx
+; X64-NEXT: movq %rax, %r12
+; X64-NEXT: addq %rbx, %r12
+; X64-NEXT: adcq %r9, %rcx
+; X64-NEXT: setb %r15b
; X64-NEXT: movq %r11, %rax
-; X64-NEXT: mulq %rcx
+; X64-NEXT: mulq %r8
; X64-NEXT: movq %rdx, %r14
; X64-NEXT: movq %rax, %rbx
-; X64-NEXT: addq %r15, %rbx
-; X64-NEXT: movzbl %r10b, %eax
+; X64-NEXT: addq %rcx, %rbx
+; X64-NEXT: movzbl %r15b, %eax
; X64-NEXT: adcq %rax, %r14
-; X64-NEXT: addq %r13, %r12
-; X64-NEXT: adcq %rsi, %r9
-; X64-NEXT: movzbl %bpl, %eax
+; X64-NEXT: addq %r13, %rbp
+; X64-NEXT: adcq %rsi, %r12
+; X64-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
; X64-NEXT: adcq %rax, %rbx
; X64-NEXT: adcq $0, %r14
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
; X64-NEXT: addq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
; X64-NEXT: adcq %rax, (%rsp) # 8-byte Folded Spill
-; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Folded Reload
-; X64-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Folded Reload
+; X64-NEXT: movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Folded Reload
; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT: adcq $0, %rbp
; X64-NEXT: adcq $0, %r12
-; X64-NEXT: adcq $0, %r9
; X64-NEXT: adcq $0, %rbx
; X64-NEXT: adcq $0, %r14
-; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Folded Reload
+; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Folded Reload
+; X64-NEXT: movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Folded Reload
; X64-NEXT: movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Folded Reload
-; X64-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Folded Reload
; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload
; X64-NEXT: setb {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Folded Spill
@@ -5246,127 +5224,127 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
; X64-NEXT: mulq %rsi
; X64-NEXT: movq %rdx, %rcx
-; X64-NEXT: movq %rax, %rdi
+; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload
; X64-NEXT: movq %r9, %rax
; X64-NEXT: mulq %rsi
; X64-NEXT: movq %rsi, %r13
; X64-NEXT: movq %rdx, %rsi
-; X64-NEXT: movq %rax, %r10
-; X64-NEXT: addq %rcx, %r10
+; X64-NEXT: movq %rax, %rdi
+; X64-NEXT: addq %rcx, %rdi
; X64-NEXT: adcq $0, %rsi
; X64-NEXT: movq %r8, %rax
-; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
-; X64-NEXT: mulq %rcx
-; X64-NEXT: movq %rdx, %r15
+; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; X64-NEXT: mulq %r8
+; X64-NEXT: movq %rdx, %rcx
; X64-NEXT: movq %rax, %r12
-; X64-NEXT: addq %r10, %r12
-; X64-NEXT: adcq %rsi, %r15
-; X64-NEXT: setb %r8b
+; X64-NEXT: addq %rdi, %r12
+; X64-NEXT: adcq %rsi, %rcx
+; X64-NEXT: setb %r10b
; X64-NEXT: movq %r9, %rax
-; X64-NEXT: mulq %rcx
+; X64-NEXT: mulq %r8
; X64-NEXT: movq %rdx, %r9
-; X64-NEXT: movq %rax, %rsi
-; X64-NEXT: addq %r15, %rsi
-; X64-NEXT: movzbl %r8b, %eax
-; X64-NEXT: adcq %rax, %r9
-; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
-; X64-NEXT: movq %r8, %rax
+; X64-NEXT: movq %rax, %rdi
+; X64-NEXT: addq %rcx, %rdi
+; X64-NEXT: movzbl %r10b, %eax
+; X64-NEXT: adcq %rax, %r9
+; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload
+; X64-NEXT: movq %r10, %rax
; X64-NEXT: mulq %r13
-; X64-NEXT: movq %rdx, %r10
+; X64-NEXT: movq %rdx, %rcx
; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
; X64-NEXT: movq %rbp, %rax
; X64-NEXT: mulq %r13
; X64-NEXT: movq %rdx, %r15
; X64-NEXT: movq %rax, %r13
-; X64-NEXT: addq %r10, %r13
+; X64-NEXT: addq %rcx, %r13
; X64-NEXT: adcq $0, %r15
-; X64-NEXT: movq %r8, %rax
-; X64-NEXT: movq %r8, %r11
-; X64-NEXT: mulq %rcx
-; X64-NEXT: movq %rdx, %r10
+; X64-NEXT: movq %r10, %rax
+; X64-NEXT: movq %r10, %r11
+; X64-NEXT: mulq %r8
+; X64-NEXT: movq %rdx, %rcx
; X64-NEXT: addq %r13, %rax
; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT: adcq %r15, %r10
-; X64-NEXT: setb %r8b
+; X64-NEXT: adcq %r15, %rcx
+; X64-NEXT: setb %r10b
; X64-NEXT: movq %rbp, %rax
; X64-NEXT: movq %rbp, %r15
-; X64-NEXT: mulq %rcx
+; X64-NEXT: mulq %r8
; X64-NEXT: movq %rdx, %r13
; X64-NEXT: movq %rax, %rbp
-; X64-NEXT: addq %r10, %rbp
-; X64-NEXT: movzbl %r8b, %eax
+; X64-NEXT: addq %rcx, %rbp
+; X64-NEXT: movzbl %r10b, %eax
; X64-NEXT: adcq %rax, %r13
-; X64-NEXT: addq %rdi, %rbp
+; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Folded Reload
; X64-NEXT: adcq %r12, %r13
-; X64-NEXT: adcq $0, %rsi
+; X64-NEXT: adcq $0, %rdi
; X64-NEXT: adcq $0, %r9
-; X64-NEXT: movq %r11, %r8
+; X64-NEXT: movq %r11, %r10
; X64-NEXT: movq %r11, %rax
-; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
-; X64-NEXT: mulq %rcx
-; X64-NEXT: movq %rdx, %rdi
+; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; X64-NEXT: mulq %rsi
+; X64-NEXT: movq %rdx, %rcx
; X64-NEXT: movq %rax, %r11
; X64-NEXT: movq %r15, %rax
; X64-NEXT: movq %r15, %r12
-; X64-NEXT: mulq %rcx
-; X64-NEXT: movq %rdx, %r10
-; X64-NEXT: movq %rax, %r15
-; X64-NEXT: addq %rdi, %r15
-; X64-NEXT: adcq $0, %r10
-; X64-NEXT: movq %r8, %rax
-; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
-; X64-NEXT: mulq %rdi
+; X64-NEXT: mulq %rsi
; X64-NEXT: movq %rdx, %r8
+; X64-NEXT: movq %rax, %r15
+; X64-NEXT: addq %rcx, %r15
+; X64-NEXT: adcq $0, %r8
+; X64-NEXT: movq %r10, %rax
+; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; X64-NEXT: mulq %rsi
+; X64-NEXT: movq %rdx, %rcx
; X64-NEXT: addq %r15, %rax
; X64-NEXT: movq %rax, %r15
-; X64-NEXT: adcq %r10, %r8
+; X64-NEXT: adcq %r8, %rcx
; X64-NEXT: setb %r10b
; X64-NEXT: movq %r12, %rax
-; X64-NEXT: mulq %rdi
-; X64-NEXT: movq %rdx, %rdi
+; X64-NEXT: mulq %rsi
+; X64-NEXT: movq %rdx, %r8
; X64-NEXT: movq %rax, %r12
-; X64-NEXT: addq %r8, %r12
+; X64-NEXT: addq %rcx, %r12
; X64-NEXT: movzbl %r10b, %eax
-; X64-NEXT: adcq %rax, %rdi
+; X64-NEXT: adcq %rax, %r8
; X64-NEXT: addq %rbp, %r11
; X64-NEXT: adcq %r13, %r15
; X64-NEXT: movq %r15, %rbp
; X64-NEXT: adcq $0, %r12
-; X64-NEXT: adcq $0, %rdi
-; X64-NEXT: addq %rsi, %r12
-; X64-NEXT: adcq %r9, %rdi
-; X64-NEXT: setb {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Folded Spill
-; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload
-; X64-NEXT: movq %r15, %rax
-; X64-NEXT: movq %rcx, %rsi
-; X64-NEXT: mulq %rcx
-; X64-NEXT: movq %rdx, %rcx
-; X64-NEXT: movq %rax, %r10
+; X64-NEXT: adcq $0, %r8
+; X64-NEXT: addq %rdi, %r12
+; X64-NEXT: adcq %r9, %r8
+; X64-NEXT: setb %r9b
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload
; X64-NEXT: movq %r13, %rax
+; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; X64-NEXT: mulq %rsi
+; X64-NEXT: movq %rdx, %rcx
+; X64-NEXT: movq %rax, %r10
+; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload
+; X64-NEXT: movq %r15, %rax
; X64-NEXT: mulq %rsi
; X64-NEXT: movq %rdx, %rsi
-; X64-NEXT: movq %rax, %r8
-; X64-NEXT: addq %rcx, %r8
+; X64-NEXT: movq %rax, %rdi
+; X64-NEXT: addq %rcx, %rdi
; X64-NEXT: adcq $0, %rsi
-; X64-NEXT: movq %r15, %rax
-; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload
-; X64-NEXT: mulq %r9
+; X64-NEXT: movq %r13, %rax
+; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload
+; X64-NEXT: mulq %r13
; X64-NEXT: movq %rdx, %rcx
-; X64-NEXT: addq %r8, %rax
-; X64-NEXT: movq %rax, %r8
+; X64-NEXT: addq %rdi, %rax
+; X64-NEXT: movq %rax, %rdi
; X64-NEXT: adcq %rsi, %rcx
; X64-NEXT: setb %sil
-; X64-NEXT: movq %r13, %rax
-; X64-NEXT: mulq %r9
+; X64-NEXT: movq %r15, %rax
+; X64-NEXT: mulq %r13
; X64-NEXT: addq %rcx, %rax
; X64-NEXT: movzbl %sil, %ecx
; X64-NEXT: adcq %rcx, %rdx
; X64-NEXT: addq %r12, %r10
-; X64-NEXT: adcq %rdi, %r8
-; X64-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X64-NEXT: adcq %r8, %rdi
+; X64-NEXT: movzbl %r9b, %ecx
; X64-NEXT: adcq %rcx, %rax
; X64-NEXT: movq %rax, %rcx
; X64-NEXT: adcq $0, %rdx
@@ -5381,350 +5359,342 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; X64-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
; X64-NEXT: adcq %rax, %r10
; X64-NEXT: movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT: adcq $0, %r8
-; X64-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT: adcq $0, %rdi
+; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: adcq $0, %rcx
; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: adcq $0, %rdx
; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload
-; X64-NEXT: movq 64(%r9), %rcx
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload
-; X64-NEXT: movq %r10, %rax
-; X64-NEXT: mulq %rcx
-; X64-NEXT: movq %rdx, %rsi
+; X64-NEXT: movq 64(%r10), %rsi
+; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
+; X64-NEXT: movq %rdi, %rax
+; X64-NEXT: mulq %rsi
+; X64-NEXT: movq %rdx, %rcx
; X64-NEXT: movq %rax, %r11
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload
; X64-NEXT: movq %r14, %rax
-; X64-NEXT: mulq %rcx
-; X64-NEXT: movq %rcx, %r15
-; X64-NEXT: movq %rdx, %rdi
-; X64-NEXT: movq %rax, %r8
-; X64-NEXT: addq %rsi, %r8
-; X64-NEXT: adcq $0, %rdi
-; X64-NEXT: movq 72(%r9), %rsi
-; X64-NEXT: movq %r9, %rcx
-; X64-NEXT: movq %r10, %rax
; X64-NEXT: mulq %rsi
-; X64-NEXT: movq %rdx, %r10
+; X64-NEXT: movq %rsi, %r15
+; X64-NEXT: movq %rdx, %r8
+; X64-NEXT: movq %rax, %r9
+; X64-NEXT: addq %rcx, %r9
+; X64-NEXT: adcq $0, %r8
+; X64-NEXT: movq 72(%r10), %rcx
+; X64-NEXT: movq %r10, %rsi
+; X64-NEXT: movq %rdi, %rax
+; X64-NEXT: mulq %rcx
+; X64-NEXT: movq %rcx, %rdi
+; X64-NEXT: movq %rdx, %rcx
; X64-NEXT: movq %rax, %rbx
-; X64-NEXT: addq %r8, %rbx
-; X64-NEXT: adcq %rdi, %r10
-; X64-NEXT: setb %r8b
+; X64-NEXT: addq %r9, %rbx
+; X64-NEXT: adcq %r8, %rcx
+; X64-NEXT: setb %r10b
; X64-NEXT: movq %r14, %rax
-; X64-NEXT: mulq %rsi
-; X64-NEXT: movq %rsi, %r13
-; X64-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT: movq %rdx, %rsi
+; X64-NEXT: mulq %rdi
+; X64-NEXT: movq %rdi, %r13
+; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT: movq %rdx, %rdi
; X64-NEXT: movq %rax, %r9
-; X64-NEXT: addq %r10, %r9
-; X64-NEXT: movzbl %r8b, %eax
-; X64-NEXT: adcq %rax, %rsi
-; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload
-; X64-NEXT: movq %r12, %rax
-; X64-NEXT: movq %r15, %rdi
+; X64-NEXT: addq %rcx, %r9
+; X64-NEXT: movzbl %r10b, %eax
+; X64-NEXT: adcq %rax, %rdi
+; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; X64-NEXT: movq %r8, %rax
; X64-NEXT: movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: mulq %r15
-; X64-NEXT: movq %rdx, %r8
+; X64-NEXT: movq %rdx, %rcx
; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload
-; X64-NEXT: movq %r15, %rax
-; X64-NEXT: mulq %rdi
-; X64-NEXT: movq %rdx, %r10
-; X64-NEXT: movq %rax, %r14
-; X64-NEXT: addq %r8, %r14
-; X64-NEXT: adcq $0, %r10
+; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload
; X64-NEXT: movq %r12, %rax
-; X64-NEXT: movq %r12, %rdi
+; X64-NEXT: mulq %r15
+; X64-NEXT: movq %rdx, %r14
+; X64-NEXT: movq %rax, %r15
+; X64-NEXT: addq %rcx, %r15
+; X64-NEXT: adcq $0, %r14
+; X64-NEXT: movq %r8, %rax
; X64-NEXT: mulq %r13
-; X64-NEXT: movq %rdx, %r8
-; X64-NEXT: addq %r14, %rax
+; X64-NEXT: movq %rdx, %rcx
+; X64-NEXT: addq %r15, %rax
; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT: adcq %r10, %r8
+; X64-NEXT: adcq %r14, %rcx
; X64-NEXT: setb %r10b
-; X64-NEXT: movq %r15, %rax
-; X64-NEXT: movq %r15, %r12
+; X64-NEXT: movq %r12, %rax
; X64-NEXT: mulq %r13
; X64-NEXT: movq %rdx, %r14
; X64-NEXT: movq %rax, %rbp
-; X64-NEXT: addq %r8, %rbp
+; X64-NEXT: addq %rcx, %rbp
; X64-NEXT: movzbl %r10b, %eax
; X64-NEXT: adcq %rax, %r14
; X64-NEXT: addq %r11, %rbp
; X64-NEXT: adcq %rbx, %r14
; X64-NEXT: adcq $0, %r9
-; X64-NEXT: adcq $0, %rsi
-; X64-NEXT: movq %rcx, %rbx
-; X64-NEXT: movq 80(%rcx), %r15
-; X64-NEXT: movq %rdi, %rax
+; X64-NEXT: adcq $0, %rdi
+; X64-NEXT: movq %rsi, %r10
+; X64-NEXT: movq 80(%rsi), %r15
+; X64-NEXT: movq %r8, %rax
; X64-NEXT: mulq %r15
-; X64-NEXT: movq %rdx, %r8
-; X64-NEXT: movq %rax, %rcx
+; X64-NEXT: movq %rdx, %rcx
+; X64-NEXT: movq %rax, %rsi
; X64-NEXT: movq %r12, %rax
; X64-NEXT: mulq %r15
-; X64-NEXT: movq %rdx, %r10
-; X64-NEXT: movq %rax, %r11
-; X64-NEXT: addq %r8, %r11
-; X64-NEXT: adcq $0, %r10
-; X64-NEXT: movq 88(%rbx), %rbx
-; X64-NEXT: movq %rdi, %rax
-; X64-NEXT: mulq %rbx
-; X64-NEXT: movq %rdx, %r8
-; X64-NEXT: addq %r11, %rax
-; X64-NEXT: movq %rax, %r11
-; X64-NEXT: adcq %r10, %r8
-; X64-NEXT: setb %r10b
+; X64-NEXT: movq %rdx, %r11
+; X64-NEXT: movq %rax, %rbx
+; X64-NEXT: addq %rcx, %rbx
+; X64-NEXT: adcq $0, %r11
+; X64-NEXT: movq 88(%r10), %r10
+; X64-NEXT: movq %r8, %rax
+; X64-NEXT: mulq %r10
+; X64-NEXT: movq %rdx, %rcx
+; X64-NEXT: addq %rbx, %rax
+; X64-NEXT: movq %rax, %rbx
+; X64-NEXT: adcq %r11, %rcx
+; X64-NEXT: setb %r11b
; X64-NEXT: movq %r12, %rax
-; X64-NEXT: mulq %rbx
+; X64-NEXT: mulq %r10
; X64-NEXT: movq %rdx, %r12
; X64-NEXT: movq %rax, %r13
-; X64-NEXT: addq %r8, %r13
-; X64-NEXT: movzbl %r10b, %eax
+; X64-NEXT: addq %rcx, %r13
+; X64-NEXT: movzbl %r11b, %eax
; X64-NEXT: adcq %rax, %r12
-; X64-NEXT: addq %rbp, %rcx
-; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT: adcq %r14, %r11
-; X64-NEXT: movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT: addq %rbp, %rsi
+; X64-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT: adcq %r14, %rbx
+; X64-NEXT: movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: adcq $0, %r13
; X64-NEXT: adcq $0, %r12
; X64-NEXT: addq %r9, %r13
-; X64-NEXT: adcq %rsi, %r12
-; X64-NEXT: setb %bpl
+; X64-NEXT: adcq %rdi, %r12
+; X64-NEXT: setb %r9b
+; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload
+; X64-NEXT: movq %r14, %rax
+; X64-NEXT: mulq %r15
+; X64-NEXT: movq %rdx, %rcx
+; X64-NEXT: movq %rax, %rdi
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload
; X64-NEXT: movq %r11, %rax
; X64-NEXT: mulq %r15
-; X64-NEXT: movq %rdx, %rdi
-; X64-NEXT: movq %rax, %rsi
-; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
-; X64-NEXT: movq %rcx, %rax
-; X64-NEXT: mulq %r15
; X64-NEXT: movq %rdx, %r8
-; X64-NEXT: movq %rax, %r10
-; X64-NEXT: addq %rdi, %r10
+; X64-NEXT: movq %rax, %rbx
+; X64-NEXT: addq %rcx, %rbx
; X64-NEXT: adcq $0, %r8
-; X64-NEXT: movq %r11, %rax
-; X64-NEXT: movq %r11, %r14
-; X64-NEXT: mulq %rbx
-; X64-NEXT: movq %rdx, %rdi
-; X64-NEXT: addq %r10, %rax
-; X64-NEXT: movq %rax, %r9
-; X64-NEXT: adcq %r8, %rdi
+; X64-NEXT: movq %r14, %rax
+; X64-NEXT: mulq %r10
+; X64-NEXT: movq %rdx, %rcx
+; X64-NEXT: addq %rbx, %rax
+; X64-NEXT: movq %rax, %rsi
+; X64-NEXT: adcq %r8, %rcx
; X64-NEXT: setb %r8b
-; X64-NEXT: movq %rcx, %rax
-; X64-NEXT: mulq %rbx
-; X64-NEXT: addq %rdi, %rax
+; X64-NEXT: movq %r11, %rax
+; X64-NEXT: mulq %r10
+; X64-NEXT: addq %rcx, %rax
; X64-NEXT: movzbl %r8b, %ecx
; X64-NEXT: adcq %rcx, %rdx
-; X64-NEXT: addq %r13, %rsi
+; X64-NEXT: addq %r13, %rdi
+; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT: adcq %r12, %rsi
; X64-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT: adcq %r12, %r9
-; X64-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT: movzbl %bpl, %ecx
+; X64-NEXT: movzbl %r9b, %ecx
; X64-NEXT: adcq %rcx, %rax
; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: adcq $0, %rdx
; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
-; X64-NEXT: imulq %rax, %rbx
-; X64-NEXT: movq %rax, %r12
+; X64-NEXT: imulq %rax, %r10
+; X64-NEXT: movq %rax, %r9
; X64-NEXT: mulq %r15
-; X64-NEXT: movq %rax, %r8
-; X64-NEXT: addq %rbx, %rdx
-; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
-; X64-NEXT: imulq %rcx, %r15
+; X64-NEXT: movq %rax, %rcx
+; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; X64-NEXT: imulq %rsi, %r15
+; X64-NEXT: addq %r10, %r15
; X64-NEXT: addq %rdx, %r15
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
-; X64-NEXT: movq %rax, %r10
-; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
-; X64-NEXT: imulq %rsi, %r10
-; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload
-; X64-NEXT: mulq %r9
-; X64-NEXT: movq %rax, %rdi
-; X64-NEXT: addq %r10, %rdx
+; X64-NEXT: movq %rax, %rdx
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload
-; X64-NEXT: imulq %r9, %rbx
-; X64-NEXT: addq %rdx, %rbx
-; X64-NEXT: addq %r8, %rdi
-; X64-NEXT: adcq %r15, %rbx
-; X64-NEXT: movq %r9, %rax
-; X64-NEXT: mulq %r12
-; X64-NEXT: movq %rdx, %r8
+; X64-NEXT: imulq %rbx, %rdx
+; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload
+; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
+; X64-NEXT: imulq %rdi, %r10
+; X64-NEXT: addq %rdx, %r10
+; X64-NEXT: mulq %rdi
+; X64-NEXT: movq %rax, %r8
+; X64-NEXT: addq %rdx, %r10
+; X64-NEXT: addq %rcx, %r8
+; X64-NEXT: adcq %r15, %r10
+; X64-NEXT: movq %rdi, %rax
+; X64-NEXT: mulq %r9
+; X64-NEXT: movq %rdx, %r15
; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT: movq %rsi, %rax
-; X64-NEXT: mulq %r12
-; X64-NEXT: movq %rdx, %r10
+; X64-NEXT: movq %rbx, %rax
+; X64-NEXT: mulq %r9
+; X64-NEXT: movq %rdx, %r12
+; X64-NEXT: movq %rax, %r13
+; X64-NEXT: addq %r15, %r13
+; X64-NEXT: adcq $0, %r12
+; X64-NEXT: movq %rdi, %rax
+; X64-NEXT: mulq %rsi
+; X64-NEXT: movq %rdx, %rbp
; X64-NEXT: movq %rax, %r15
-; X64-NEXT: addq %r8, %r15
-; X64-NEXT: adcq $0, %r10
-; X64-NEXT: movq %r9, %rax
-; X64-NEXT: mulq %rcx
-; X64-NEXT: movq %rdx, %r8
-; X64-NEXT: movq %rax, %r11
-; X64-NEXT: addq %r15, %r11
-; X64-NEXT: adcq %r10, %r8
-; X64-NEXT: setb %r10b
-; X64-NEXT: movq %rsi, %rax
-; X64-NEXT: mulq %rcx
+; X64-NEXT: addq %r13, %r15
+; X64-NEXT: adcq %r12, %rbp
+; X64-NEXT: setb %cl
+; X64-NEXT: movq %rbx, %rax
+; X64-NEXT: mulq %rsi
; X64-NEXT: movq %rdx, %rsi
-; X64-NEXT: movq %rax, %r15
-; X64-NEXT: addq %r8, %r15
-; X64-NEXT: movzbl %r10b, %eax
+; X64-NEXT: movq %rax, %r12
+; X64-NEXT: addq %rbp, %r12
+; X64-NEXT: movzbl %cl, %eax
; X64-NEXT: adcq %rax, %rsi
-; X64-NEXT: addq %rdi, %r15
-; X64-NEXT: adcq %rbx, %rsi
+; X64-NEXT: addq %r8, %r12
+; X64-NEXT: adcq %r10, %rsi
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
-; X64-NEXT: movq 120(%rdx), %rdi
+; X64-NEXT: movq 120(%rdx), %rcx
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
-; X64-NEXT: imulq %rax, %rdi
-; X64-NEXT: movq 112(%rdx), %rbx
-; X64-NEXT: movq %rdx, %r12
-; X64-NEXT: movq %rax, %rbp
-; X64-NEXT: mulq %rbx
+; X64-NEXT: imulq %rax, %rcx
+; X64-NEXT: movq 112(%rdx), %r9
+; X64-NEXT: movq %rdx, %rdi
+; X64-NEXT: movq %rax, %r10
+; X64-NEXT: mulq %r9
; X64-NEXT: movq %rax, %r8
-; X64-NEXT: addq %rdi, %rdx
-; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
-; X64-NEXT: imulq %rcx, %rbx
-; X64-NEXT: addq %rdx, %rbx
-; X64-NEXT: movq 96(%r12), %r10
-; X64-NEXT: movq 104(%r12), %rdi
+; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload
+; X64-NEXT: imulq %rbx, %r9
+; X64-NEXT: addq %rcx, %r9
+; X64-NEXT: addq %rdx, %r9
+; X64-NEXT: movq 96(%rdi), %rbp
+; X64-NEXT: movq 104(%rdi), %rdi
; X64-NEXT: movq %r14, %rax
-; X64-NEXT: movq %r14, %r12
-; X64-NEXT: imulq %rdi, %r12
-; X64-NEXT: mulq %r10
+; X64-NEXT: imulq %rdi, %rax
+; X64-NEXT: imulq %rbp, %r11
+; X64-NEXT: addq %rax, %r11
+; X64-NEXT: movq %r14, %rax
+; X64-NEXT: mulq %rbp
; X64-NEXT: movq %rax, %r13
-; X64-NEXT: addq %r12, %rdx
-; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload
-; X64-NEXT: imulq %r10, %r14
-; X64-NEXT: addq %rdx, %r14
+; X64-NEXT: addq %rdx, %r11
; X64-NEXT: addq %r8, %r13
-; X64-NEXT: adcq %rbx, %r14
-; X64-NEXT: movq %r10, %rax
-; X64-NEXT: mulq %rbp
+; X64-NEXT: adcq %r9, %r11
+; X64-NEXT: movq %r11, %r14
+; X64-NEXT: movq %rbp, %rax
+; X64-NEXT: mulq %r10
; X64-NEXT: movq %rdx, %r8
-; X64-NEXT: movq %rax, %r9
+; X64-NEXT: movq %rax, %r11
; X64-NEXT: movq %rdi, %rax
-; X64-NEXT: mulq %rbp
+; X64-NEXT: mulq %r10
+; X64-NEXT: movq %rdx, %rcx
+; X64-NEXT: movq %rax, %r9
+; X64-NEXT: addq %r8, %r9
+; X64-NEXT: adcq $0, %rcx
+; X64-NEXT: movq %rbp, %rax
+; X64-NEXT: mulq %rbx
; X64-NEXT: movq %rdx, %rbp
-; X64-NEXT: movq %rax, %r12
-; X64-NEXT: addq %r8, %r12
-; X64-NEXT: adcq $0, %rbp
-; X64-NEXT: movq %r10, %rax
-; X64-NEXT: mulq %rcx
-; X64-NEXT: movq %rdx, %r10
-; X64-NEXT: movq %rax, %rbx
-; X64-NEXT: addq %r12, %rbx
-; X64-NEXT: adcq %rbp, %r10
-; X64-NEXT: setb %r8b
+; X64-NEXT: movq %rax, %r8
+; X64-NEXT: addq %r9, %r8
+; X64-NEXT: adcq %rcx, %rbp
+; X64-NEXT: setb %cl
; X64-NEXT: movq %rdi, %rax
-; X64-NEXT: mulq %rcx
-; X64-NEXT: addq %r10, %rax
-; X64-NEXT: movzbl %r8b, %edi
-; X64-NEXT: adcq %rdi, %rdx
+; X64-NEXT: mulq %rbx
+; X64-NEXT: addq %rbp, %rax
+; X64-NEXT: movzbl %cl, %ecx
+; X64-NEXT: adcq %rcx, %rdx
; X64-NEXT: addq %r13, %rax
; X64-NEXT: adcq %r14, %rdx
-; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Folded Reload
-; X64-NEXT: adcq %r11, %rbx
-; X64-NEXT: adcq %r15, %rax
+; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Folded Reload
+; X64-NEXT: adcq %r15, %r8
+; X64-NEXT: adcq %r12, %rax
; X64-NEXT: adcq %rsi, %rdx
-; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Folded Reload
-; X64-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Folded Reload
-; X64-NEXT: movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Folded Reload
+; X64-NEXT: movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Folded Reload
+; X64-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Folded Reload
; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload
-; X64-NEXT: movq 80(%r13), %r8
-; X64-NEXT: movq %r8, %rax
-; X64-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; X64-NEXT: movq 80(%r8), %r9
+; X64-NEXT: movq %r9, %rax
+; X64-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
; X64-NEXT: mulq %rsi
-; X64-NEXT: movq %rax, %rbx
+; X64-NEXT: movq %rax, %r10
; X64-NEXT: movq %rdx, %rcx
-; X64-NEXT: movq 88(%r13), %r11
-; X64-NEXT: movq %r13, %r10
-; X64-NEXT: movq %r11, %rax
-; X64-NEXT: movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT: movq 88(%r8), %rbx
+; X64-NEXT: movq %rbx, %rax
+; X64-NEXT: movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: mulq %rsi
-; X64-NEXT: movq %rsi, %r9
+; X64-NEXT: movq %rsi, %r11
; X64-NEXT: movq %rdx, %rsi
; X64-NEXT: movq %rax, %rdi
; X64-NEXT: addq %rcx, %rdi
; X64-NEXT: adcq $0, %rsi
-; X64-NEXT: movq %r8, %rax
-; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
-; X64-NEXT: mulq %r8
+; X64-NEXT: movq %r9, %rax
+; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload
+; X64-NEXT: mulq %r9
; X64-NEXT: movq %rdx, %rcx
; X64-NEXT: movq %rax, %r14
; X64-NEXT: addq %rdi, %r14
; X64-NEXT: adcq %rsi, %rcx
; X64-NEXT: setb %dil
-; X64-NEXT: movq %r11, %rax
-; X64-NEXT: mulq %r8
-; X64-NEXT: movq %r8, %r11
-; X64-NEXT: movq %rdx, %r13
+; X64-NEXT: movq %rbx, %rax
+; X64-NEXT: mulq %r9
+; X64-NEXT: movq %rdx, %rbx
; X64-NEXT: movq %rax, %rsi
; X64-NEXT: addq %rcx, %rsi
; X64-NEXT: movzbl %dil, %eax
-; X64-NEXT: adcq %rax, %r13
-; X64-NEXT: movq %r10, %rdi
-; X64-NEXT: movq 64(%r10), %r10
-; X64-NEXT: movq %r10, %rax
-; X64-NEXT: mulq %r9
+; X64-NEXT: adcq %rax, %rbx
+; X64-NEXT: movq 64(%r8), %rdi
+; X64-NEXT: movq %rdi, %rax
+; X64-NEXT: mulq %r11
; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: movq %rdx, %rcx
-; X64-NEXT: movq 72(%rdi), %rax
-; X64-NEXT: movq %rax, %r8
-; X64-NEXT: mulq %r9
+; X64-NEXT: movq 72(%r8), %rax
+; X64-NEXT: movq %rax, %r13
+; X64-NEXT: mulq %r11
; X64-NEXT: movq %rdx, %r15
; X64-NEXT: movq %rax, %r12
; X64-NEXT: addq %rcx, %r12
; X64-NEXT: adcq $0, %r15
-; X64-NEXT: movq %r10, %rax
-; X64-NEXT: movq %r11, %r9
-; X64-NEXT: mulq %r11
+; X64-NEXT: movq %rdi, %rax
+; X64-NEXT: movq %rdi, %r8
+; X64-NEXT: mulq %r9
; X64-NEXT: movq %rdx, %rcx
; X64-NEXT: addq %r12, %rax
; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: adcq %r15, %rcx
; X64-NEXT: setb %dil
-; X64-NEXT: movq %r8, %r11
-; X64-NEXT: movq %r8, %rax
+; X64-NEXT: movq %r13, %r11
+; X64-NEXT: movq %r13, %rax
; X64-NEXT: mulq %r9
; X64-NEXT: movq %rdx, %r12
; X64-NEXT: movq %rax, %rbp
; X64-NEXT: addq %rcx, %rbp
; X64-NEXT: movzbl %dil, %eax
; X64-NEXT: adcq %rax, %r12
-; X64-NEXT: addq %rbx, %rbp
+; X64-NEXT: addq %r10, %rbp
; X64-NEXT: adcq %r14, %r12
; X64-NEXT: adcq $0, %rsi
-; X64-NEXT: adcq $0, %r13
-; X64-NEXT: movq %r10, %rdi
-; X64-NEXT: movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT: movq %r10, %rax
-; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
-; X64-NEXT: mulq %r8
+; X64-NEXT: adcq $0, %rbx
+; X64-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT: movq %r8, %rax
+; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload
+; X64-NEXT: mulq %r13
; X64-NEXT: movq %rdx, %rcx
; X64-NEXT: movq %rax, %r9
; X64-NEXT: movq %r11, %rax
-; X64-NEXT: movq %r11, %rbx
; X64-NEXT: movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT: mulq %r8
+; X64-NEXT: mulq %r13
; X64-NEXT: movq %rdx, %r10
; X64-NEXT: movq %rax, %r14
; X64-NEXT: addq %rcx, %r14
; X64-NEXT: adcq $0, %r10
-; X64-NEXT: movq %rdi, %rax
+; X64-NEXT: movq %r8, %rax
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload
; X64-NEXT: mulq %r15
; X64-NEXT: movq %rdx, %rcx
-; X64-NEXT: addq %r14, %rax
-; X64-NEXT: movq %rax, %r11
+; X64-NEXT: movq %rax, %r8
+; X64-NEXT: addq %r14, %r8
; X64-NEXT: adcq %r10, %rcx
; X64-NEXT: setb %dil
-; X64-NEXT: movq %rbx, %rax
+; X64-NEXT: movq %r11, %rax
; X64-NEXT: mulq %r15
; X64-NEXT: movq %rdx, %r10
; X64-NEXT: movq %rax, %r14
@@ -5733,26 +5703,26 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; X64-NEXT: adcq %rax, %r10
; X64-NEXT: addq %rbp, %r9
; X64-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT: adcq %r12, %r11
-; X64-NEXT: movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT: adcq %r12, %r8
+; X64-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: adcq $0, %r14
; X64-NEXT: adcq $0, %r10
; X64-NEXT: addq %rsi, %r14
-; X64-NEXT: adcq %r13, %r10
+; X64-NEXT: adcq %rbx, %r10
; X64-NEXT: setb %dil
-; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload
-; X64-NEXT: movq %r11, %rax
-; X64-NEXT: mulq %r8
+; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
+; X64-NEXT: movq %rbp, %rax
+; X64-NEXT: mulq %r13
; X64-NEXT: movq %rdx, %rcx
-; X64-NEXT: movq %rax, %r12
+; X64-NEXT: movq %rax, %r8
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload
; X64-NEXT: movq %rbx, %rax
-; X64-NEXT: mulq %r8
+; X64-NEXT: mulq %r13
; X64-NEXT: movq %rdx, %rsi
; X64-NEXT: movq %rax, %r9
; X64-NEXT: addq %rcx, %r9
; X64-NEXT: adcq $0, %rsi
-; X64-NEXT: movq %r11, %rax
+; X64-NEXT: movq %rbp, %rax
; X64-NEXT: mulq %r15
; X64-NEXT: movq %rdx, %rcx
; X64-NEXT: addq %r9, %rax
@@ -5764,8 +5734,8 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; X64-NEXT: addq %rcx, %rax
; X64-NEXT: movzbl %sil, %ecx
; X64-NEXT: adcq %rcx, %rdx
-; X64-NEXT: addq %r14, %r12
-; X64-NEXT: movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT: addq %r14, %r8
+; X64-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: adcq %r10, %r9
; X64-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: movzbl %dil, %ecx
@@ -5773,160 +5743,163 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: adcq $0, %rdx
; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
-; X64-NEXT: movq 96(%rdi), %rsi
-; X64-NEXT: imulq %rsi, %r15
-; X64-NEXT: movq %rsi, %rax
-; X64-NEXT: movq %r8, %rcx
-; X64-NEXT: mulq %r8
-; X64-NEXT: movq %rax, %r10
-; X64-NEXT: addq %r15, %rdx
-; X64-NEXT: movq 104(%rdi), %r9
-; X64-NEXT: imulq %r9, %rcx
-; X64-NEXT: addq %rdx, %rcx
-; X64-NEXT: movq %rcx, %r14
-; X64-NEXT: movq 112(%rdi), %rax
-; X64-NEXT: movq %rax, %rcx
-; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload
-; X64-NEXT: imulq %r12, %rcx
-; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload
-; X64-NEXT: mulq %r15
-; X64-NEXT: movq %rax, %r8
-; X64-NEXT: addq %rcx, %rdx
-; X64-NEXT: movq 120(%rdi), %rdi
-; X64-NEXT: imulq %r15, %rdi
-; X64-NEXT: addq %rdx, %rdi
-; X64-NEXT: addq %r10, %r8
-; X64-NEXT: adcq %r14, %rdi
-; X64-NEXT: movq %r15, %rax
-; X64-NEXT: mulq %rsi
-; X64-NEXT: movq %rdx, %r10
-; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT: movq %r12, %rax
-; X64-NEXT: mulq %rsi
-; X64-NEXT: movq %rdx, %r14
+; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; X64-NEXT: movq 96(%r8), %rcx
+; X64-NEXT: imulq %rcx, %r15
+; X64-NEXT: movq %rcx, %rax
+; X64-NEXT: movq %r13, %r9
+; X64-NEXT: mulq %r13
+; X64-NEXT: movq %rax, %rsi
+; X64-NEXT: movq 104(%r8), %rdi
+; X64-NEXT: imulq %rdi, %r9
+; X64-NEXT: addq %r15, %r9
+; X64-NEXT: addq %rdx, %r9
+; X64-NEXT: movq 112(%r8), %rax
+; X64-NEXT: movq %rax, %rdx
+; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload
+; X64-NEXT: imulq %r11, %rdx
+; X64-NEXT: movq 120(%r8), %r8
+; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload
+; X64-NEXT: imulq %r10, %r8
+; X64-NEXT: addq %rdx, %r8
+; X64-NEXT: mulq %r10
; X64-NEXT: movq %rax, %r13
-; X64-NEXT: addq %r10, %r13
-; X64-NEXT: adcq $0, %r14
-; X64-NEXT: movq %r15, %rax
-; X64-NEXT: mulq %r9
+; X64-NEXT: addq %rdx, %r8
+; X64-NEXT: addq %rsi, %r13
+; X64-NEXT: adcq %r9, %r8
+; X64-NEXT: movq %r10, %rax
+; X64-NEXT: movq %r10, %r9
+; X64-NEXT: mulq %rcx
+; X64-NEXT: movq %rdx, %rsi
+; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT: movq %r11, %rax
+; X64-NEXT: mulq %rcx
; X64-NEXT: movq %rdx, %rcx
-; X64-NEXT: movq %rax, %rbp
-; X64-NEXT: addq %r13, %rbp
-; X64-NEXT: adcq %r14, %rcx
-; X64-NEXT: setb %sil
-; X64-NEXT: movq %r12, %rax
-; X64-NEXT: mulq %r9
+; X64-NEXT: movq %rax, %r10
+; X64-NEXT: addq %rsi, %r10
+; X64-NEXT: adcq $0, %rcx
+; X64-NEXT: movq %r9, %rax
+; X64-NEXT: mulq %rdi
+; X64-NEXT: movq %rdx, %rsi
+; X64-NEXT: movq %rax, %r12
+; X64-NEXT: addq %r10, %r12
+; X64-NEXT: adcq %rcx, %rsi
+; X64-NEXT: setb %cl
+; X64-NEXT: movq %r11, %rax
+; X64-NEXT: mulq %rdi
; X64-NEXT: movq %rdx, %r10
; X64-NEXT: movq %rax, %r14
-; X64-NEXT: addq %rcx, %r14
-; X64-NEXT: movzbl %sil, %eax
+; X64-NEXT: addq %rsi, %r14
+; X64-NEXT: movzbl %cl, %eax
; X64-NEXT: adcq %rax, %r10
-; X64-NEXT: addq %r8, %r14
-; X64-NEXT: adcq %rdi, %r10
-; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
+; X64-NEXT: addq %r13, %r14
+; X64-NEXT: adcq %r8, %r10
+; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT: imulq %rax, %rsi
+; X64-NEXT: movq %rax, %r11
+; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT: mulq %rcx
+; X64-NEXT: movq %rax, %r9
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload
-; X64-NEXT: imulq %r15, %rdi
-; X64-NEXT: movq %r15, %rax
+; X64-NEXT: imulq %r15, %rcx
+; X64-NEXT: addq %rsi, %rcx
+; X64-NEXT: addq %rdx, %rcx
+; X64-NEXT: movq %rcx, %r8
+; X64-NEXT: movq %rbp, %rax
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
-; X64-NEXT: mulq %rsi
+; X64-NEXT: imulq %rsi, %rax
+; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
+; X64-NEXT: imulq %rdi, %rbx
+; X64-NEXT: addq %rax, %rbx
+; X64-NEXT: movq %rbp, %rax
+; X64-NEXT: mulq %rdi
; X64-NEXT: movq %rax, %rcx
-; X64-NEXT: addq %rdi, %rdx
-; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload
-; X64-NEXT: imulq %r12, %rsi
-; X64-NEXT: addq %rdx, %rsi
-; X64-NEXT: movq %rsi, %r8
-; X64-NEXT: movq %r11, %rax
-; X64-NEXT: movq %r11, %rsi
-; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload
-; X64-NEXT: imulq %r9, %rsi
-; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload
-; X64-NEXT: mulq %r11
-; X64-NEXT: movq %rax, %r13
-; X64-NEXT: addq %rsi, %rdx
-; X64-NEXT: imulq %r11, %rbx
; X64-NEXT: addq %rdx, %rbx
-; X64-NEXT: addq %rcx, %r13
+; X64-NEXT: addq %r9, %rcx
; X64-NEXT: adcq %r8, %rbx
-; X64-NEXT: movq %r11, %rax
-; X64-NEXT: mulq %r15
+; X64-NEXT: movq %rbx, %rbp
+; X64-NEXT: movq %rdi, %rax
+; X64-NEXT: movq %rdi, %r9
+; X64-NEXT: mulq %r11
; X64-NEXT: movq %rdx, %r8
-; X64-NEXT: movq %rax, %rcx
+; X64-NEXT: movq %rax, %r13
+; X64-NEXT: movq %rsi, %rax
+; X64-NEXT: movq %rsi, %rbx
+; X64-NEXT: mulq %r11
+; X64-NEXT: movq %rdx, %rsi
+; X64-NEXT: movq %rax, %rdi
+; X64-NEXT: addq %r8, %rdi
+; X64-NEXT: adcq $0, %rsi
; X64-NEXT: movq %r9, %rax
; X64-NEXT: mulq %r15
-; X64-NEXT: movq %rdx, %rdi
-; X64-NEXT: movq %rax, %rsi
-; X64-NEXT: addq %r8, %rsi
-; X64-NEXT: adcq $0, %rdi
-; X64-NEXT: movq %r11, %rax
-; X64-NEXT: mulq %r12
; X64-NEXT: movq %rdx, %r8
; X64-NEXT: movq %rax, %r11
-; X64-NEXT: addq %rsi, %r11
-; X64-NEXT: adcq %rdi, %r8
+; X64-NEXT: addq %rdi, %r11
+; X64-NEXT: adcq %rsi, %r8
; X64-NEXT: setb %sil
-; X64-NEXT: movq %r9, %rax
-; X64-NEXT: mulq %r12
+; X64-NEXT: movq %rbx, %rax
+; X64-NEXT: mulq %r15
; X64-NEXT: addq %r8, %rax
; X64-NEXT: movzbl %sil, %esi
; X64-NEXT: adcq %rsi, %rdx
-; X64-NEXT: addq %r13, %rax
-; X64-NEXT: adcq %rbx, %rdx
-; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload
-; X64-NEXT: adcq %rbp, %r11
+; X64-NEXT: addq %rcx, %rax
+; X64-NEXT: adcq %rbp, %rdx
+; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload
+; X64-NEXT: adcq %r12, %r11
; X64-NEXT: adcq %r14, %rax
; X64-NEXT: adcq %r10, %rdx
-; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload
+; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload
; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Folded Reload
; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Folded Reload
+; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
-; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload
-; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
-; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Folded Reload
+; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload
+; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload
+; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Folded Reload
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload
; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Folded Reload
-; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload
-; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Folded Reload
-; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload
+; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload
; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Folded Reload
; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Folded Reload
-; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload
+; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload
+; X64-NEXT: movq %rcx, %rdi
+; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload
; X64-NEXT: movq %rsi, %r8
-; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Folded Reload
-; X64-NEXT: movq %rdi, %r9
+; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Folded Reload
; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Folded Reload
-; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Folded Reload
-; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload
+; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload
; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Folded Reload
; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Folded Reload
+; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
-; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
-; X64-NEXT: movq %rdi, (%rsi)
-; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
-; X64-NEXT: movq %rdi, 8(%rsi)
-; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
-; X64-NEXT: movq %rdi, 16(%rsi)
-; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
-; X64-NEXT: movq %rdi, 24(%rsi)
-; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
-; X64-NEXT: movq %rdi, 32(%rsi)
-; X64-NEXT: movq (%rsp), %rdi # 8-byte Reload
-; X64-NEXT: movq %rdi, 40(%rsi)
-; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
-; X64-NEXT: movq %rdi, 48(%rsi)
-; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
-; X64-NEXT: movq %rdi, 56(%rsi)
-; X64-NEXT: movq %r8, 64(%rsi)
-; X64-NEXT: movq %r9, 72(%rsi)
-; X64-NEXT: movq %r10, 80(%rsi)
-; X64-NEXT: movq %rbx, 88(%rsi)
-; X64-NEXT: movq %rcx, 96(%rsi)
-; X64-NEXT: movq %r11, 104(%rsi)
-; X64-NEXT: movq %rax, 112(%rsi)
-; X64-NEXT: movq %rdx, 120(%rsi)
+; X64-NEXT: movq %rsi, (%rcx)
+; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; X64-NEXT: movq %rsi, 8(%rcx)
+; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; X64-NEXT: movq %rsi, 16(%rcx)
+; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; X64-NEXT: movq %rsi, 24(%rcx)
+; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; X64-NEXT: movq %rsi, 32(%rcx)
+; X64-NEXT: movq (%rsp), %rsi # 8-byte Reload
+; X64-NEXT: movq %rsi, 40(%rcx)
+; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; X64-NEXT: movq %rsi, 48(%rcx)
+; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; X64-NEXT: movq %rsi, 56(%rcx)
+; X64-NEXT: movq %rdi, 64(%rcx)
+; X64-NEXT: movq %r8, 72(%rcx)
+; X64-NEXT: movq %r9, 80(%rcx)
+; X64-NEXT: movq %r10, 88(%rcx)
+; X64-NEXT: movq %r13, 96(%rcx)
+; X64-NEXT: movq %r11, 104(%rcx)
+; X64-NEXT: movq %rax, 112(%rcx)
+; X64-NEXT: movq %rdx, 120(%rcx)
; X64-NEXT: addq $240, %rsp
; X64-NEXT: popq %rbx
; X64-NEXT: popq %r12
diff --git a/llvm/test/CodeGen/X86/mul-i256.ll b/llvm/test/CodeGen/X86/mul-i256.ll
index 9382278ff5c2d..434d14ad4abed 100644
--- a/llvm/test/CodeGen/X86/mul-i256.ll
+++ b/llvm/test/CodeGen/X86/mul-i256.ll
@@ -23,85 +23,85 @@ define void @test(ptr %a, ptr %b, ptr %out) #0 {
; X32-NEXT: .cfi_offset %ebp, -8
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT: movl 12(%ecx), %esi
-; X32-NEXT: movl 8(%ecx), %ebx
+; X32-NEXT: movl 12(%ecx), %ebp
+; X32-NEXT: movl 8(%ecx), %esi
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl (%eax), %ebx
; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl (%eax), %edi
-; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, %ebp
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %esi, %eax
-; X32-NEXT: movl %esi, %ecx
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl %eax, %edi
-; X32-NEXT: addl %ebp, %edi
-; X32-NEXT: adcl $0, %esi
-; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: movl 4(%eax), %ebp
-; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: mull %ebp
+; X32-NEXT: mull %ebx
+; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: movl %eax, (%esp) # 4-byte Spill
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: addl %edi, %eax
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %esi, %ebx
-; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: mull %ebp
+; X32-NEXT: mull %ebx
+; X32-NEXT: movl %edx, %edi
+; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: addl %ecx, %ebx
+; X32-NEXT: adcl $0, %edi
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl 4(%eax), %ecx
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %ecx, %esi
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edx, %ecx
; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X32-NEXT: adcl %edi, %ecx
+; X32-NEXT: setb %bl
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: mull %esi
+; X32-NEXT: addl %ecx, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movzbl %bl, %eax
; X32-NEXT: adcl %eax, %edx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[0-9]+}}(%esp), %edi
; X32-NEXT: movl (%edi), %esi
; X32-NEXT: movl %esi, %eax
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: mull %ecx
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl 4(%edi), %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: mull %ebx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: movl 4(%edi), %edi
+; X32-NEXT: movl %edi, %eax
+; X32-NEXT: mull %ebx
; X32-NEXT: movl %edx, %ebx
; X32-NEXT: movl %eax, %ebp
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: addl %ecx, %ebp
; X32-NEXT: adcl $0, %ebx
; X32-NEXT: movl %esi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %esi
; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %ebx, %ecx
+; X32-NEXT: adcl %ebx, %esi
; X32-NEXT: setb %bl
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: mull %edi
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edi, %eax
+; X32-NEXT: mull %ecx
; X32-NEXT: movl %eax, %ebp
-; X32-NEXT: addl %ecx, %ebp
+; X32-NEXT: addl %esi, %ebp
; X32-NEXT: movzbl %bl, %eax
; X32-NEXT: adcl %eax, %edx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: addl (%esp), %ebp # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: movl 8(%eax), %edi
-; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl 8(%eax), %esi
+; X32-NEXT: movl %esi, (%esp) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: mull %edi
+; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: mull %edi
+; X32-NEXT: movl %edi, %eax
+; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %ebx
; X32-NEXT: movl %eax, %edi
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
@@ -110,55 +110,54 @@ define void @test(ptr %a, ptr %b, ptr %out) #0 {
; X32-NEXT: movl 12(%eax), %esi
; X32-NEXT: movl %ecx, %eax
; X32-NEXT: mull %esi
-; X32-NEXT: movl %esi, %ecx
-; X32-NEXT: movl %esi, (%esp) # 4-byte Spill
-; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl %edx, %ecx
; X32-NEXT: addl %edi, %eax
; X32-NEXT: movl %eax, %edi
-; X32-NEXT: adcl %ebx, %esi
-; X32-NEXT: setb %bl
+; X32-NEXT: adcl %ebx, %ecx
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %ecx
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: addl %esi, %ecx
-; X32-NEXT: movzbl %bl, %eax
+; X32-NEXT: mull %esi
+; X32-NEXT: movl %esi, %ebx
+; X32-NEXT: movl %eax, %esi
+; X32-NEXT: addl %ecx, %esi
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, %edx
; X32-NEXT: addl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl $0, %ecx
+; X32-NEXT: adcl $0, %esi
; X32-NEXT: adcl $0, %edx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: movl (%esp), %edi # 4-byte Reload
; X32-NEXT: mull %edi
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: movl %edx, %edi
; X32-NEXT: movl %eax, %ebp
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
-; X32-NEXT: adcl $0, %ebx
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: movl (%esp), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, %esi
+; X32-NEXT: adcl $0, %edi
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: movl %ebx, %ecx
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: mull %ebx
+; X32-NEXT: movl %edx, %ebx
; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movl %eax, %ebp
-; X32-NEXT: adcl %ebx, %esi
+; X32-NEXT: adcl %edi, %ebx
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: movl %edi, %ebx
-; X32-NEXT: addl %esi, %eax
-; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload
-; X32-NEXT: adcl %esi, %edx
-; X32-NEXT: addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: mull %ecx
+; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X32-NEXT: adcl %ecx, %edx
+; X32-NEXT: addl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
@@ -168,43 +167,41 @@ define void @test(ptr %a, ptr %b, ptr %out) #0 {
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl 16(%ecx), %edi
-; X32-NEXT: movl %ebx, %esi
-; X32-NEXT: imull %edi, %esi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: imull %edi, %ebx
; X32-NEXT: movl %edi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
-; X32-NEXT: mull %ebp
-; X32-NEXT: movl %eax, (%esp) # 4-byte Spill
-; X32-NEXT: addl %esi, %edx
-; X32-NEXT: movl 20(%ecx), %eax
+; X32-NEXT: movl (%esp), %esi # 4-byte Reload
+; X32-NEXT: mull %esi
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: imull %eax, %ebp
-; X32-NEXT: addl %edx, %ebp
+; X32-NEXT: movl 20(%ecx), %ebp
+; X32-NEXT: imull %ebp, %esi
+; X32-NEXT: addl %ebx, %esi
+; X32-NEXT: addl %edx, %esi
+; X32-NEXT: movl %esi, (%esp) # 4-byte Spill
; X32-NEXT: movl 24(%ecx), %eax
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: imull %ebx, %ecx
+; X32-NEXT: movl %eax, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: addl %ecx, %edx
-; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: imull %esi, %edx
; X32-NEXT: movl 28(%ecx), %ecx
-; X32-NEXT: imull %esi, %ecx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: imull %ebx, %ecx
; X32-NEXT: addl %edx, %ecx
-; X32-NEXT: addl (%esp), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, (%esp) # 4-byte Spill
-; X32-NEXT: adcl %ebp, %ecx
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, %ebp
+; X32-NEXT: mull %ebx
+; X32-NEXT: addl %edx, %ecx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl (%esp), %ecx # 4-byte Folded Reload
; X32-NEXT: movl %ebx, %eax
; X32-NEXT: mull %edi
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %eax, (%esp) # 4-byte Spill
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: mull %edi
; X32-NEXT: movl %edx, %esi
; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl %ebp, %ebx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: adcl $0, %esi
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
; X32-NEXT: mull %ebp
; X32-NEXT: movl %edx, %edi
; X32-NEXT: addl %ebx, %eax
@@ -216,84 +213,84 @@ define void @test(ptr %a, ptr %b, ptr %out) #0 {
; X32-NEXT: addl %edi, %eax
; X32-NEXT: movzbl %bl, %esi
; X32-NEXT: adcl %esi, %edx
-; X32-NEXT: addl (%esp), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, (%esp) # 4-byte Spill
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %ecx, %edx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X32-NEXT: movl 28(%edi), %esi
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: imull %eax, %esi
-; X32-NEXT: movl 24(%edi), %ecx
-; X32-NEXT: mull %ecx
+; X32-NEXT: movl 28(%edi), %ecx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: imull %ebp, %ecx
+; X32-NEXT: movl 24(%edi), %esi
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: mull %esi
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: addl %esi, %edx
-; X32-NEXT: imull {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: addl %edx, %ecx
-; X32-NEXT: movl 16(%edi), %ebp
+; X32-NEXT: imull {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: addl %ecx, %esi
+; X32-NEXT: addl %edx, %esi
+; X32-NEXT: movl 16(%edi), %ecx
; X32-NEXT: movl 20(%edi), %ebx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl %eax, %edi
-; X32-NEXT: imull %ebx, %edi
+; X32-NEXT: movl %eax, %edx
+; X32-NEXT: imull %ebx, %edx
; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: mull %ebp
-; X32-NEXT: addl %edi, %edx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: imull %ebp, %esi
-; X32-NEXT: addl %edx, %esi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: imull %ecx, %edi
+; X32-NEXT: addl %edx, %edi
+; X32-NEXT: mull %ecx
+; X32-NEXT: addl %edx, %edi
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %ecx, %esi
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %ebp, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %esi
+; X32-NEXT: adcl %esi, %edi
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: mull %ebp
+; X32-NEXT: movl %edx, %edi
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl %esi, %ebx
-; X32-NEXT: adcl $0, %ecx
-; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: mull %ebp
+; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: movl %eax, %esi
+; X32-NEXT: addl %edi, %esi
+; X32-NEXT: adcl $0, %ebx
+; X32-NEXT: movl %ecx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
; X32-NEXT: mull %ebp
; X32-NEXT: movl %edx, %edi
-; X32-NEXT: movl %eax, %esi
-; X32-NEXT: addl %ebx, %esi
-; X32-NEXT: adcl %ecx, %edi
-; X32-NEXT: setb %cl
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: addl %esi, %ecx
+; X32-NEXT: adcl %ebx, %edi
+; X32-NEXT: setb %bl
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %ebp
; X32-NEXT: addl %edi, %eax
-; X32-NEXT: movzbl %cl, %ecx
-; X32-NEXT: adcl %ecx, %edx
+; X32-NEXT: movzbl %bl, %esi
+; X32-NEXT: adcl %esi, %edx
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: adcl (%esp), %eax # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: addl (%esp), %esi # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: movl %ecx, %ebx
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: movl %esi, %ebx
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: movl %edi, (%ecx)
+; X32-NEXT: movl %edi, (%esi)
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: movl %edi, 4(%ecx)
+; X32-NEXT: movl %edi, 4(%esi)
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: movl %edi, 8(%ecx)
+; X32-NEXT: movl %edi, 8(%esi)
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: movl %edi, 12(%ecx)
-; X32-NEXT: movl %ebx, 16(%ecx)
-; X32-NEXT: movl %esi, 20(%ecx)
-; X32-NEXT: movl %eax, 24(%ecx)
-; X32-NEXT: movl %edx, 28(%ecx)
+; X32-NEXT: movl %edi, 12(%esi)
+; X32-NEXT: movl %ebx, 16(%esi)
+; X32-NEXT: movl %ecx, 20(%esi)
+; X32-NEXT: movl %eax, 24(%esi)
+; X32-NEXT: movl %edx, 28(%esi)
; X32-NEXT: addl $72, %esp
; X32-NEXT: .cfi_def_cfa_offset 20
; X32-NEXT: popl %esi
@@ -312,9 +309,12 @@ define void @test(ptr %a, ptr %b, ptr %out) #0 {
; X64-NEXT: .cfi_def_cfa_offset 16
; X64-NEXT: pushq %r14
; X64-NEXT: .cfi_def_cfa_offset 24
-; X64-NEXT: pushq %rbx
+; X64-NEXT: pushq %r12
; X64-NEXT: .cfi_def_cfa_offset 32
-; X64-NEXT: .cfi_offset %rbx, -32
+; X64-NEXT: pushq %rbx
+; X64-NEXT: .cfi_def_cfa_offset 40
+; X64-NEXT: .cfi_offset %rbx, -40
+; X64-NEXT: .cfi_offset %r12, -32
; X64-NEXT: .cfi_offset %r14, -24
; X64-NEXT: .cfi_offset %r15, -16
; X64-NEXT: movq %rdx, %rcx
@@ -329,17 +329,17 @@ define void @test(ptr %a, ptr %b, ptr %out) #0 {
; X64-NEXT: movq %r9, %rax
; X64-NEXT: mulq %r10
; X64-NEXT: movq %rax, %rdi
-; X64-NEXT: addq %r15, %rdx
; X64-NEXT: imulq %r14, %r10
+; X64-NEXT: addq %r15, %r10
; X64-NEXT: addq %rdx, %r10
-; X64-NEXT: movq %r8, %r15
-; X64-NEXT: imulq %r11, %r15
+; X64-NEXT: movq %r8, %r12
+; X64-NEXT: imulq %r11, %r12
; X64-NEXT: movq %r8, %rax
; X64-NEXT: mulq %rbx
; X64-NEXT: movq %rax, %r8
-; X64-NEXT: addq %r15, %rdx
; X64-NEXT: movq 24(%rsi), %r15
; X64-NEXT: imulq %rbx, %r15
+; X64-NEXT: addq %r12, %r15
; X64-NEXT: addq %rdx, %r15
; X64-NEXT: addq %rdi, %r8
; X64-NEXT: adcq %r10, %r15
@@ -372,6 +372,8 @@ define void @test(ptr %a, ptr %b, ptr %out) #0 {
; X64-NEXT: movq %rax, 16(%rcx)
; X64-NEXT: movq %rdx, 24(%rcx)
; X64-NEXT: popq %rbx
+; X64-NEXT: .cfi_def_cfa_offset 32
+; X64-NEXT: popq %r12
; X64-NEXT: .cfi_def_cfa_offset 24
; X64-NEXT: popq %r14
; X64-NEXT: .cfi_def_cfa_offset 16
diff --git a/llvm/test/CodeGen/X86/mul-i512.ll b/llvm/test/CodeGen/X86/mul-i512.ll
index 08d0f7cd08220..f5254ed37b53d 100644
--- a/llvm/test/CodeGen/X86/mul-i512.ll
+++ b/llvm/test/CodeGen/X86/mul-i512.ll
@@ -33,7 +33,6 @@ define void @test_512(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: movl %ebp, %eax
; X32-NEXT: mull %ecx
; X32-NEXT: movl %ecx, %ebp
-; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edx, %esi
; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
@@ -47,73 +46,74 @@ define void @test_512(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: adcl %eax, %edx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT: movl 16(%ecx), %ebp
-; X32-NEXT: movl %ebp, %eax
-; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl 16(%ecx), %ebx
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: mull %edi
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl 20(%ecx), %ebx
-; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl 20(%ecx), %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: mull %edi
; X32-NEXT: movl %edx, %edi
; X32-NEXT: movl %eax, %ecx
; X32-NEXT: addl %esi, %ecx
; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl %ebp, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl %ebp, %esi
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: mull %ebp
; X32-NEXT: movl %edx, %ebp
; X32-NEXT: addl %ecx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %edi, %ebp
; X32-NEXT: setb %cl
-; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: movl %ebx, %eax
; X32-NEXT: mull %esi
-; X32-NEXT: addl %ebp, %eax
-; X32-NEXT: movzbl %cl, %ecx
-; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %eax, %esi
+; X32-NEXT: addl %ebp, %esi
+; X32-NEXT: movzbl %cl, %eax
+; X32-NEXT: adcl %eax, %edx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X32-NEXT: movl 8(%edi), %ebp
-; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: mull %ebp
-; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl 8(%eax), %ecx
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: movl %edi, %eax
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: mull %ebp
+; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %ebp
; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl %ecx, %ebx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: adcl $0, %ebp
-; X32-NEXT: movl 12(%edi), %ecx
-; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl 12(%eax), %ecx
+; X32-NEXT: movl %edi, %eax
; X32-NEXT: mull %ecx
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edx, %edi
; X32-NEXT: addl %ebx, %eax
-; X32-NEXT: movl %eax, %esi
+; X32-NEXT: movl %eax, %ebx
; X32-NEXT: adcl %ebp, %edi
-; X32-NEXT: setb %bl
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %ecx
; X32-NEXT: movl %eax, %ecx
; X32-NEXT: addl %edi, %ecx
-; X32-NEXT: movzbl %bl, %eax
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, %edx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: addl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, %ecx
; X32-NEXT: adcl $0, %edx
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
@@ -125,7 +125,7 @@ define void @test_512(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: mull %edi
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %eax, (%esp) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %edi
; X32-NEXT: movl %edx, %ebp
@@ -145,7 +145,7 @@ define void @test_512(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: addl %esi, %eax
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload
; X32-NEXT: adcl %esi, %edx
-; X32-NEXT: addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: addl %ecx, (%esp) # 4-byte Folded Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
@@ -154,9 +154,9 @@ define void @test_512(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: adcl $0, %edx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT: movl 8(%ecx), %ebx
-; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl 8(%ecx), %edi
+; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
@@ -166,19 +166,20 @@ define void @test_512(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl %eax, %edi
-; X32-NEXT: addl %ebp, %edi
+; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: addl %ebp, %ebx
; X32-NEXT: adcl $0, %esi
-; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: mull %ebx
+; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: mull %edi
; X32-NEXT: movl %edx, %ebp
-; X32-NEXT: addl %edi, %eax
+; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %esi, %ebp
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: mull %ebx
+; X32-NEXT: mull %edi
+; X32-NEXT: movl %edi, %ebx
; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
@@ -191,66 +192,66 @@ define void @test_512(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: mull %edi
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edx, (%esp) # 4-byte Spill
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl 4(%ecx), %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, %edi
-; X32-NEXT: movl %eax, %ebp
-; X32-NEXT: addl (%esp), %ebp # 4-byte Folded Reload
-; X32-NEXT: adcl $0, %edi
+; X32-NEXT: movl %edx, %ebp
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: adcl $0, %ebp
; X32-NEXT: movl %esi, %eax
; X32-NEXT: mull %ebx
; X32-NEXT: movl %ebx, %esi
; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %ebp, %eax
+; X32-NEXT: addl %edi, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %ecx
-; X32-NEXT: setb %bl
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
-; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: adcl %ebp, %ecx
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
; X32-NEXT: mull %esi
; X32-NEXT: movl %eax, %esi
; X32-NEXT: addl %ecx, %esi
-; X32-NEXT: movzbl %bl, %eax
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, %edx
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edx, %ecx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: movl %ebx, %eax
; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, %ebp
+; X32-NEXT: movl %edx, %edi
; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X32-NEXT: adcl $0, %ebp
-; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: addl %ecx, %ebx
+; X32-NEXT: adcl $0, %edi
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %edi
+; X32-NEXT: movl %edx, %ebp
; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: adcl %ebp, %edi
+; X32-NEXT: adcl %edi, %ebp
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %eax, %ebp
-; X32-NEXT: addl %edi, %ebp
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl %ebp, %edi
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, %edx
; X32-NEXT: addl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl $0, %ebp
+; X32-NEXT: adcl $0, %edi
; X32-NEXT: adcl $0, %edx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
@@ -262,27 +263,28 @@ define void @test_512(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %edi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X32-NEXT: adcl $0, %edi
+; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: adcl $0, %ebx
; X32-NEXT: movl %esi, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %esi
-; X32-NEXT: addl %ebx, %eax
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %esi
-; X32-NEXT: setb %bl
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: addl %ebp, %eax
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: adcl %ebx, %esi
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
; X32-NEXT: mull %ecx
; X32-NEXT: addl %esi, %eax
-; X32-NEXT: movzbl %bl, %ecx
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
; X32-NEXT: movl %edx, %esi
; X32-NEXT: adcl %ecx, %esi
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: addl %ebp, %ecx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-NEXT: addl %edi, %ecx
+; X32-NEXT: movl %ebp, %edx
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 1-byte Folded Reload
; X32-NEXT: adcl %edi, %eax
@@ -295,111 +297,109 @@ define void @test_512(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl $0, (%esp) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: movl 16(%eax), %esi
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: mull %esi
+; X32-NEXT: movl 16(%eax), %ebp
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: mull %ebp
; X32-NEXT: movl %edx, %ecx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
-; X32-NEXT: movl %ebp, %eax
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl %eax, %edi
-; X32-NEXT: addl %ecx, %edi
-; X32-NEXT: adcl $0, %esi
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: mull %ebp
+; X32-NEXT: movl %edx, %edi
+; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: addl %ecx, %ebx
+; X32-NEXT: adcl $0, %edi
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl 20(%eax), %ecx
-; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: mull %ecx
-; X32-NEXT: movl %ecx, %ebx
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl %ecx, %esi
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %edi, %eax
+; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %esi, %ecx
-; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl %ebp, %eax
-; X32-NEXT: mull %ebx
+; X32-NEXT: adcl %edi, %ecx
+; X32-NEXT: setb %bl
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: mull %esi
; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movl %eax, (%esp) # 4-byte Spill
-; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movzbl %bl, %eax
; X32-NEXT: adcl %eax, %edx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
-; X32-NEXT: movl %ebp, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: mull %ebp
+; X32-NEXT: movl %edx, %esi
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: movl %edi, %eax
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl %ecx, %ebx
-; X32-NEXT: adcl $0, %esi
-; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: mull %ebp
+; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl %esi, %ebp
+; X32-NEXT: adcl $0, %ebx
+; X32-NEXT: movl %ecx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %ebp
-; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: movl %edx, %esi
+; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %esi, %ebp
+; X32-NEXT: adcl %ebx, %esi
; X32-NEXT: setb %bl
; X32-NEXT: movl %edi, %eax
; X32-NEXT: mull %ecx
; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: addl %ebp, %ecx
+; X32-NEXT: addl %esi, %ecx
; X32-NEXT: movzbl %bl, %eax
; X32-NEXT: adcl %eax, %edx
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl $0, (%esp) # 4-byte Folded Spill
+; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: movl 24(%eax), %ebx
-; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: mull %ebx
+; X32-NEXT: movl 24(%eax), %esi
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edi, %eax
-; X32-NEXT: mull %ebx
+; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %ebp
; X32-NEXT: movl %eax, %edi
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: adcl $0, %ebp
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: movl 28(%eax), %ebx
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: mull %ebx
-; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl 28(%eax), %esi
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: mull %esi
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edx, %ebx
; X32-NEXT: addl %edi, %eax
; X32-NEXT: movl %eax, %edi
-; X32-NEXT: adcl %ebp, %esi
+; X32-NEXT: adcl %ebp, %ebx
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %ebx
-; X32-NEXT: movl %ebx, %ebp
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl %esi, %ebx
+; X32-NEXT: mull %esi
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl %ebx, %ebp
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, %edx
; X32-NEXT: addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl $0, %ebx
+; X32-NEXT: adcl $0, %ebp
; X32-NEXT: adcl $0, %edx
-; X32-NEXT: addl (%esp), %ebx # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
@@ -407,31 +407,29 @@ define void @test_512(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: movl %ecx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, (%esp) # 4-byte Spill
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl %eax, %edi
-; X32-NEXT: addl (%esp), %edi # 4-byte Folded Reload
-; X32-NEXT: adcl $0, %esi
+; X32-NEXT: movl %edx, %edi
+; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: adcl $0, %edi
; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: movl %ebp, %ecx
-; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: mull %ebp
-; X32-NEXT: movl %edx, %ebp
-; X32-NEXT: addl %edi, %eax
-; X32-NEXT: movl %eax, (%esp) # 4-byte Spill
-; X32-NEXT: adcl %esi, %ebp
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %esi
+; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: adcl %edi, %esi
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %ecx
-; X32-NEXT: addl %ebp, %eax
+; X32-NEXT: addl %esi, %eax
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
; X32-NEXT: adcl %ecx, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: addl %ebx, %edi
-; X32-NEXT: movl (%esp), %ebx # 4-byte Reload
+; X32-NEXT: addl %ebp, %edi
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
; X32-NEXT: adcl %ecx, %eax
@@ -447,21 +445,20 @@ define void @test_512(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: adcl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: movl %edi, %edx
; X32-NEXT: adcl $0, %edx
-; X32-NEXT: movl %ebx, %ecx
-; X32-NEXT: adcl $0, %ecx
+; X32-NEXT: adcl $0, %ebx
; X32-NEXT: adcl $0, %eax
; X32-NEXT: adcl $0, %esi
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT: addl (%esp), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: movl %ecx, (%esp) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %ecx
@@ -470,81 +467,80 @@ define void @test_512(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: movl %ebp, %eax
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl %ecx, %ebx
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl %ecx, %edi
; X32-NEXT: adcl $0, %esi
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: addl %edi, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %esi, %ecx
-; X32-NEXT: setb %bl
+; X32-NEXT: adcl %esi, %ebx
+; X32-NEXT: setb (%esp) # 1-byte Folded Spill
; X32-NEXT: movl %ebp, %eax
-; X32-NEXT: mull %edi
-; X32-NEXT: addl %ecx, %eax
+; X32-NEXT: mull %ecx
+; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movzbl %bl, %eax
+; X32-NEXT: movzbl (%esp), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, %edx
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl %edx, (%esp) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
; X32-NEXT: movl %ebp, %eax
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl %ecx, %ebx
-; X32-NEXT: adcl $0, %esi
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %edi
+; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: adcl $0, %edi
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: mull %esi
+; X32-NEXT: movl %edx, %ecx
; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %esi, %edi
+; X32-NEXT: adcl %edi, %ecx
; X32-NEXT: setb %bl
; X32-NEXT: movl %ebp, %eax
-; X32-NEXT: mull %ecx
+; X32-NEXT: mull %esi
; X32-NEXT: movl %eax, %esi
-; X32-NEXT: addl %edi, %esi
+; X32-NEXT: addl %ecx, %esi
; X32-NEXT: movzbl %bl, %eax
; X32-NEXT: adcl %eax, %edx
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl $0, (%esp) # 4-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
+; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %ebp, %eax
-; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %edi
; X32-NEXT: movl %eax, %ebp
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
-; X32-NEXT: adcl $0, %ebx
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: adcl $0, %edi
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %ebx
; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movl %eax, %ebp
-; X32-NEXT: adcl %ebx, %ecx
+; X32-NEXT: adcl %edi, %ebx
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: movl %edi, %ebx
+; X32-NEXT: mull %ecx
; X32-NEXT: movl %eax, %edi
-; X32-NEXT: addl %ecx, %edi
+; X32-NEXT: addl %ebx, %edi
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, %edx
; X32-NEXT: addl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
@@ -553,58 +549,59 @@ define void @test_512(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: adcl $0, %edi
; X32-NEXT: adcl $0, %edx
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT: adcl (%esp), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: setb (%esp) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %esi
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %ebx
; X32-NEXT: movl %eax, %ebp
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
-; X32-NEXT: adcl $0, %esi
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: movl %ebx, %ecx
-; X32-NEXT: mull %ebx
-; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: adcl $0, %ebx
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %esi
; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movl %eax, %ebp
-; X32-NEXT: adcl %esi, %ebx
+; X32-NEXT: adcl %ebx, %esi
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %ecx
-; X32-NEXT: addl %ebx, %eax
-; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
-; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: addl %edi, %ecx
+; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: addl %esi, %ebx
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X32-NEXT: movl %edx, %esi
+; X32-NEXT: adcl %eax, %esi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-NEXT: addl %edi, %edx
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
-; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload
-; X32-NEXT: adcl %esi, %eax
-; X32-NEXT: movl %eax, %esi
-; X32-NEXT: adcl $0, %edx
+; X32-NEXT: movzbl (%esp), %eax # 1-byte Folded Reload
+; X32-NEXT: adcl %eax, %ebx
+; X32-NEXT: adcl $0, %esi
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl (%esp), %eax # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
-; X32-NEXT: adcl %eax, %ecx
-; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %eax, %edx
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, %ebp
; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl $0, %ebx
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, %esi
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl $0, %edx
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl 32(%eax), %edi
; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
@@ -612,7 +609,7 @@ define void @test_512(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: movl %esi, %eax
; X32-NEXT: mull %edi
; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %eax, (%esp) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
; X32-NEXT: movl %ebp, %eax
; X32-NEXT: mull %edi
@@ -624,7 +621,7 @@ define void @test_512(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: movl 36(%eax), %ecx
; X32-NEXT: movl %esi, %eax
; X32-NEXT: movl %ecx, %esi
-; X32-NEXT: movl %ecx, (%esp) # 4-byte Spill
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %ecx
; X32-NEXT: addl %ebx, %eax
@@ -652,7 +649,7 @@ define void @test_512(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: addl %ecx, %ebp
; X32-NEXT: adcl $0, %edi
; X32-NEXT: movl %esi, %eax
-; X32-NEXT: movl (%esp), %ecx # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %esi
; X32-NEXT: addl %ebp, %eax
@@ -665,14 +662,14 @@ define void @test_512(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: addl %esi, %edi
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, %edx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: addl (%esp), %edi # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl 40(%eax), %esi
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %esi, (%esp) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: movl %ecx, %eax
; X32-NEXT: mull %esi
@@ -711,7 +708,7 @@ define void @test_512(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: movl (%esp), %edi # 4-byte Reload
; X32-NEXT: mull %edi
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
@@ -743,56 +740,53 @@ define void @test_512(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: adcl $0, %edx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl %edi, %esi
-; X32-NEXT: imull %eax, %esi
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: imull %eax, %edi
+; X32-NEXT: movl (%esp), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: addl %esi, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: imull %ebx, %ecx
+; X32-NEXT: addl %edi, %ecx
; X32-NEXT: addl %edx, %ecx
-; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %ecx, (%esp) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl %eax, %esi
-; X32-NEXT: movl (%esp), %edi # 4-byte Reload
-; X32-NEXT: imull %edi, %esi
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
-; X32-NEXT: mull %ebp
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: addl %esi, %edx
+; X32-NEXT: movl %eax, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: movl %ebp, %eax
-; X32-NEXT: imull %ebp, %esi
-; X32-NEXT: addl %edx, %esi
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: imull %esi, %edx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: imull %ecx, %edi
+; X32-NEXT: addl %edx, %edi
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %ebp
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl %edx, %edi
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: adcl (%esp), %edi # 4-byte Folded Reload
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: mull %edi
+; X32-NEXT: movl %edx, (%esp) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: mull %ecx
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: mull %edi
; X32-NEXT: movl %edx, %esi
; X32-NEXT: movl %eax, %edi
-; X32-NEXT: addl %ebp, %edi
+; X32-NEXT: addl (%esp), %edi # 4-byte Folded Reload
; X32-NEXT: adcl $0, %esi
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl %ecx, %eax
; X32-NEXT: mull %ebx
-; X32-NEXT: movl %ebx, %ebp
; X32-NEXT: movl %edx, %ecx
; X32-NEXT: addl %edi, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %esi, %ecx
; X32-NEXT: setb %bl
-; X32-NEXT: movl (%esp), %eax # 4-byte Reload
-; X32-NEXT: mull %ebp
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
; X32-NEXT: addl %ecx, %eax
; X32-NEXT: movzbl %bl, %ecx
; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
@@ -801,62 +795,60 @@ define void @test_512(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: imull %eax, %ecx
; X32-NEXT: movl 56(%edi), %esi
-; X32-NEXT: movl %edi, %ebx
; X32-NEXT: mull %esi
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: addl %ecx, %edx
-; X32-NEXT: imull {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: movl %eax, (%esp) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: imull %ebp, %esi
+; X32-NEXT: addl %ecx, %esi
; X32-NEXT: addl %edx, %esi
-; X32-NEXT: movl 48(%edi), %edi
-; X32-NEXT: movl 52(%ebx), %ebp
+; X32-NEXT: movl 48(%edi), %ecx
+; X32-NEXT: movl 52(%edi), %ebx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: imull %ebp, %ebx
-; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: mull %edi
-; X32-NEXT: addl %ebx, %edx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: imull %edi, %ecx
-; X32-NEXT: addl %edx, %ecx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %esi, %ecx
-; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: movl %eax, %edx
+; X32-NEXT: imull %ebx, %edx
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: imull %ecx, %edi
+; X32-NEXT: addl %edx, %edi
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %esi
+; X32-NEXT: addl %edx, %edi
+; X32-NEXT: addl (%esp), %eax # 4-byte Folded Reload
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %ebp, %eax
-; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: movl %eax, %ebp
-; X32-NEXT: addl %esi, %ebp
-; X32-NEXT: adcl $0, %ebx
-; X32-NEXT: movl %edi, %eax
+; X32-NEXT: adcl %esi, %edi
+; X32-NEXT: movl %ecx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
+; X32-NEXT: movl %edx, (%esp) # 4-byte Spill
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: mull %esi
+; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: movl %eax, %esi
+; X32-NEXT: addl (%esp), %esi # 4-byte Folded Reload
+; X32-NEXT: adcl $0, %ebx
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: mull %ebp
; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: movl %eax, %edi
-; X32-NEXT: addl %ebp, %edi
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl %esi, %ebp
; X32-NEXT: adcl %ebx, %ecx
; X32-NEXT: setb %bl
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %esi
+; X32-NEXT: mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
; X32-NEXT: addl %ecx, %eax
; X32-NEXT: movzbl %bl, %ecx
; X32-NEXT: adcl %ecx, %edx
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT: adcl %edi, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
@@ -888,10 +880,10 @@ define void @test_512(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: movl %edi, %eax
; X32-NEXT: mull %ebp
; X32-NEXT: addl %esi, %eax
-; X32-NEXT: movl %eax, (%esp) # 4-byte Spill
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movzbl %cl, %eax
; X32-NEXT: adcl %eax, %edx
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edx, (%esp) # 4-byte Spill
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl 32(%ecx), %esi
; X32-NEXT: movl %esi, %eax
@@ -925,8 +917,8 @@ define void @test_512(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl $0, (%esp) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl $0, (%esp) # 4-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: movl %ecx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
@@ -959,20 +951,20 @@ define void @test_512(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, %edi
; X32-NEXT: adcl $0, %ecx
-; X32-NEXT: addl (%esp), %edi # 4-byte Folded Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: adcl (%esp), %ecx # 4-byte Folded Reload
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: movl %esi, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: mull %ebx
-; X32-NEXT: movl %edx, (%esp) # 4-byte Spill
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %eax, (%esp) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %ebx
; X32-NEXT: movl %edx, %ebx
; X32-NEXT: movl %eax, %ebp
-; X32-NEXT: addl (%esp), %ebp # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: adcl $0, %ebx
; X32-NEXT: movl %esi, %eax
; X32-NEXT: mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
@@ -980,60 +972,60 @@ define void @test_512(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movl %eax, %ebp
; X32-NEXT: adcl %ebx, %esi
-; X32-NEXT: setb (%esp) # 1-byte Folded Spill
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: mull %ebx
; X32-NEXT: addl %esi, %eax
; X32-NEXT: movl %eax, %esi
-; X32-NEXT: movzbl (%esp), %eax # 1-byte Folded Reload
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, %edx
-; X32-NEXT: addl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: addl %edi, (%esp) # 4-byte Folded Spill
; X32-NEXT: adcl %ecx, %ebp
-; X32-NEXT: movl %ebp, (%esp) # 4-byte Spill
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, %esi
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, %edx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT: movl 48(%ecx), %edi
+; X32-NEXT: movl 48(%ecx), %ebp
; X32-NEXT: movl %ebx, %esi
-; X32-NEXT: imull %edi, %esi
-; X32-NEXT: movl %edi, %eax
+; X32-NEXT: imull %ebp, %esi
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: mull %ebx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: addl %esi, %edx
; X32-NEXT: movl 52(%ecx), %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: imull %eax, %ebx
+; X32-NEXT: addl %esi, %ebx
; X32-NEXT: addl %edx, %ebx
; X32-NEXT: movl 56(%ecx), %eax
-; X32-NEXT: movl %eax, %esi
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
-; X32-NEXT: imull %ebp, %esi
+; X32-NEXT: movl %ecx, %esi
+; X32-NEXT: movl %eax, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: mull %ecx
-; X32-NEXT: addl %esi, %edx
-; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X32-NEXT: imull %ecx, %edx
; X32-NEXT: movl 60(%esi), %esi
-; X32-NEXT: imull %ecx, %esi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: imull %edi, %esi
+; X32-NEXT: addl %edx, %esi
+; X32-NEXT: mull %edi
; X32-NEXT: addl %edx, %esi
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %ebx, %esi
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: mull %edi
+; X32-NEXT: movl %edi, %eax
+; X32-NEXT: mull %ebp
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %ebp, %eax
-; X32-NEXT: mull %edi
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: mull %ebp
; X32-NEXT: movl %edx, %ecx
; X32-NEXT: movl %eax, %ebx
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: adcl $0, %ecx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl %edi, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
; X32-NEXT: mull %ebp
; X32-NEXT: movl %edx, %edi
@@ -1050,43 +1042,43 @@ define void @test_512(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %esi, %edx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: imull %eax, %edi
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: imull {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: addl %edi, %ecx
+; X32-NEXT: addl %edx, %ecx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: imull %eax, %ecx
-; X32-NEXT: movl %eax, %esi
+; X32-NEXT: movl %eax, %edx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: imull %esi, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: imull %edi, %ebp
+; X32-NEXT: addl %edx, %ebp
; X32-NEXT: mull %edi
+; X32-NEXT: addl %edx, %ebp
+; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: addl %ecx, %edx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
-; X32-NEXT: imull %ebp, %edi
-; X32-NEXT: addl %edx, %edi
-; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: imull %edi, %ecx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: mull %ebx
-; X32-NEXT: addl %ecx, %edx
+; X32-NEXT: adcl %ecx, %ebp
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl %edi, %ebx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: imull %ebx, %ecx
-; X32-NEXT: addl %edx, %ecx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %edi
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: mull %esi
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %esi
; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: addl %ebx, %ecx
+; X32-NEXT: addl %edi, %ecx
; X32-NEXT: adcl $0, %esi
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
; X32-NEXT: mull %ebp
; X32-NEXT: movl %edx, %ebx
; X32-NEXT: movl %eax, %edi
@@ -1105,9 +1097,9 @@ define void @test_512(ptr %a, ptr %b, ptr %out) nounwind {
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: addl (%esp), %ecx # 4-byte Folded Reload
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl (%esp), %edi # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
@@ -1179,271 +1171,270 @@ define void @test_512(ptr %a, ptr %b, ptr %out) nounwind {
; X64-NEXT: pushq %r13
; X64-NEXT: pushq %r12
; X64-NEXT: pushq %rbx
-; X64-NEXT: pushq %rax
-; X64-NEXT: movq %rdx, (%rsp) # 8-byte Spill
+; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT: movq (%rdi), %rbx
+; X64-NEXT: movq (%rdi), %r14
; X64-NEXT: movq 8(%rdi), %r9
-; X64-NEXT: movq 24(%rdi), %r12
-; X64-NEXT: movq 16(%rdi), %r14
-; X64-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT: movq (%rsi), %rcx
-; X64-NEXT: movq 8(%rsi), %r11
-; X64-NEXT: movq %rsi, %rdi
-; X64-NEXT: movq %r14, %rax
-; X64-NEXT: movq %rcx, %rsi
-; X64-NEXT: mulq %rcx
+; X64-NEXT: movq 24(%rdi), %r15
+; X64-NEXT: movq 16(%rdi), %rax
+; X64-NEXT: movq (%rsi), %rdi
+; X64-NEXT: movq 8(%rsi), %rbx
+; X64-NEXT: movq %rsi, %r12
+; X64-NEXT: movq %rax, %rsi
+; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT: mulq %rdi
; X64-NEXT: movq %rdx, %rcx
; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT: movq %r12, %rax
-; X64-NEXT: movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT: mulq %rsi
-; X64-NEXT: movq %rsi, %r15
+; X64-NEXT: movq %r15, %rax
+; X64-NEXT: movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT: mulq %rdi
; X64-NEXT: movq %rdx, %r8
; X64-NEXT: movq %rax, %r10
; X64-NEXT: addq %rcx, %r10
; X64-NEXT: adcq $0, %r8
-; X64-NEXT: movq %r14, %rax
-; X64-NEXT: mulq %r11
-; X64-NEXT: movq %rdx, %r14
-; X64-NEXT: movq %rax, %rcx
-; X64-NEXT: addq %r10, %rcx
-; X64-NEXT: adcq %r8, %r14
+; X64-NEXT: movq %rsi, %rax
+; X64-NEXT: mulq %rbx
+; X64-NEXT: movq %rdx, %rcx
+; X64-NEXT: movq %rax, %r11
+; X64-NEXT: addq %r10, %r11
+; X64-NEXT: adcq %r8, %rcx
; X64-NEXT: setb %al
; X64-NEXT: movzbl %al, %esi
-; X64-NEXT: movq %r12, %rax
-; X64-NEXT: mulq %r11
+; X64-NEXT: movq %r15, %rax
+; X64-NEXT: mulq %rbx
; X64-NEXT: movq %rdx, %r13
; X64-NEXT: movq %rax, %r10
-; X64-NEXT: addq %r14, %r10
+; X64-NEXT: addq %rcx, %r10
; X64-NEXT: adcq %rsi, %r13
-; X64-NEXT: movq %rbx, %rax
-; X64-NEXT: movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT: mulq %r15
-; X64-NEXT: movq %rdx, %r14
+; X64-NEXT: movq %r14, %rsi
+; X64-NEXT: movq %r14, %rax
+; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT: mulq %rdi
+; X64-NEXT: movq %rdx, %rcx
; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: movq %r9, %rax
-; X64-NEXT: mulq %r15
-; X64-NEXT: movq %rdx, %rbp
+; X64-NEXT: mulq %rdi
+; X64-NEXT: movq %rdx, %r14
; X64-NEXT: movq %rax, %r15
-; X64-NEXT: addq %r14, %r15
-; X64-NEXT: adcq $0, %rbp
-; X64-NEXT: movq %rbx, %rax
-; X64-NEXT: movq %rbx, %r12
-; X64-NEXT: movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT: mulq %r11
-; X64-NEXT: movq %rdx, %rbx
+; X64-NEXT: addq %rcx, %r15
+; X64-NEXT: adcq $0, %r14
+; X64-NEXT: movq %rsi, %rax
+; X64-NEXT: movq %rsi, %r8
+; X64-NEXT: movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT: mulq %rbx
+; X64-NEXT: movq %rdx, %rbp
; X64-NEXT: addq %r15, %rax
; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT: adcq %rbp, %rbx
+; X64-NEXT: adcq %r14, %rbp
; X64-NEXT: setb %sil
+; X64-NEXT: movq %r9, %rdi
; X64-NEXT: movq %r9, %rax
-; X64-NEXT: mulq %r11
+; X64-NEXT: mulq %rbx
; X64-NEXT: movq %rdx, %r14
-; X64-NEXT: movq %rax, %rbp
-; X64-NEXT: addq %rbx, %rbp
+; X64-NEXT: movq %rax, %rcx
+; X64-NEXT: addq %rbp, %rcx
; X64-NEXT: movzbl %sil, %eax
; X64-NEXT: adcq %rax, %r14
-; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Folded Reload
-; X64-NEXT: adcq %rcx, %r14
+; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload
+; X64-NEXT: adcq %r11, %r14
; X64-NEXT: adcq $0, %r10
; X64-NEXT: adcq $0, %r13
-; X64-NEXT: movq %rdi, %rsi
-; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT: movq 16(%rdi), %r8
-; X64-NEXT: movq %r12, %r11
+; X64-NEXT: movq %r12, %r9
; X64-NEXT: movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT: movq %r12, %rax
-; X64-NEXT: mulq %r8
-; X64-NEXT: movq %rdx, %rcx
-; X64-NEXT: movq %rax, %rdi
-; X64-NEXT: movq %r9, %rax
-; X64-NEXT: movq %r9, %r12
-; X64-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT: mulq %r8
-; X64-NEXT: movq %rdx, %rbx
-; X64-NEXT: movq %rax, %r15
-; X64-NEXT: addq %rcx, %r15
-; X64-NEXT: adcq $0, %rbx
-; X64-NEXT: movq 24(%rsi), %rsi
-; X64-NEXT: movq %r11, %rax
+; X64-NEXT: movq 16(%r12), %rsi
+; X64-NEXT: movq %r8, %rbx
+; X64-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT: movq %r8, %rax
+; X64-NEXT: mulq %rsi
+; X64-NEXT: movq %rdx, %r11
+; X64-NEXT: movq %rax, %r8
+; X64-NEXT: movq %rdi, %rax
+; X64-NEXT: movq %rdi, %r12
+; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: mulq %rsi
+; X64-NEXT: movq %rdx, %r15
+; X64-NEXT: movq %rax, %rbp
+; X64-NEXT: addq %r11, %rbp
+; X64-NEXT: adcq $0, %r15
+; X64-NEXT: movq 24(%r9), %rdi
+; X64-NEXT: movq %rbx, %rax
+; X64-NEXT: mulq %rdi
; X64-NEXT: movq %rdx, %r9
-; X64-NEXT: movq %rax, %r11
-; X64-NEXT: addq %r15, %r11
-; X64-NEXT: adcq %rbx, %r9
-; X64-NEXT: setb %bl
+; X64-NEXT: movq %rax, %rbx
+; X64-NEXT: addq %rbp, %rbx
+; X64-NEXT: adcq %r15, %r9
+; X64-NEXT: setb %bpl
; X64-NEXT: movq %r12, %rax
-; X64-NEXT: mulq %rsi
+; X64-NEXT: mulq %rdi
; X64-NEXT: movq %rdx, %r15
-; X64-NEXT: movq %rax, %rcx
-; X64-NEXT: addq %r9, %rcx
-; X64-NEXT: movzbl %bl, %eax
+; X64-NEXT: movq %rax, %r11
+; X64-NEXT: addq %r9, %r11
+; X64-NEXT: movzbl %bpl, %eax
; X64-NEXT: adcq %rax, %r15
-; X64-NEXT: addq %rbp, %rdi
-; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT: adcq %r14, %r11
-; X64-NEXT: movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT: adcq $0, %rcx
+; X64-NEXT: addq %rcx, %r8
+; X64-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT: adcq %r14, %rbx
+; X64-NEXT: movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT: adcq $0, %r11
; X64-NEXT: adcq $0, %r15
-; X64-NEXT: addq %r10, %rcx
+; X64-NEXT: addq %r10, %r11
; X64-NEXT: adcq %r13, %r15
-; X64-NEXT: setb %r12b
-; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload
-; X64-NEXT: movq %r14, %rax
-; X64-NEXT: mulq %r8
-; X64-NEXT: movq %rdx, %rdi
-; X64-NEXT: movq %rax, %r11
+; X64-NEXT: setb %bpl
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload
; X64-NEXT: movq %r10, %rax
-; X64-NEXT: mulq %r8
-; X64-NEXT: movq %rdx, %r9
-; X64-NEXT: movq %rax, %rbx
-; X64-NEXT: addq %rdi, %rbx
-; X64-NEXT: adcq $0, %r9
-; X64-NEXT: movq %r14, %rax
; X64-NEXT: mulq %rsi
-; X64-NEXT: movq %rdx, %rbp
-; X64-NEXT: addq %rbx, %rax
+; X64-NEXT: movq %rdx, %rcx
; X64-NEXT: movq %rax, %rbx
-; X64-NEXT: adcq %r9, %rbp
-; X64-NEXT: setb %dil
-; X64-NEXT: movq %r10, %rax
+; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload
+; X64-NEXT: movq %r13, %rax
; X64-NEXT: mulq %rsi
-; X64-NEXT: addq %rbp, %rax
-; X64-NEXT: movzbl %dil, %edi
-; X64-NEXT: adcq %rdi, %rdx
-; X64-NEXT: addq %rcx, %r11
-; X64-NEXT: movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT: adcq %r15, %rbx
+; X64-NEXT: movq %rdx, %r8
+; X64-NEXT: movq %rax, %r9
+; X64-NEXT: addq %rcx, %r9
+; X64-NEXT: adcq $0, %r8
+; X64-NEXT: movq %r10, %rax
+; X64-NEXT: mulq %rdi
+; X64-NEXT: movq %rdx, %rcx
+; X64-NEXT: addq %r9, %rax
+; X64-NEXT: movq %rax, %r9
+; X64-NEXT: adcq %r8, %rcx
+; X64-NEXT: setb %r8b
+; X64-NEXT: movq %r13, %rax
+; X64-NEXT: mulq %rdi
+; X64-NEXT: movq %rax, %r14
+; X64-NEXT: addq %rcx, %r14
+; X64-NEXT: movzbl %r8b, %eax
+; X64-NEXT: adcq %rax, %rdx
+; X64-NEXT: addq %r11, %rbx
; X64-NEXT: movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT: movzbl %r12b, %ecx
-; X64-NEXT: adcq %rcx, %rax
-; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT: adcq %r15, %r9
+; X64-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT: movzbl %bpl, %eax
+; X64-NEXT: adcq %rax, %r14
; X64-NEXT: adcq $0, %rdx
; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
-; X64-NEXT: movq 32(%rcx), %r15
-; X64-NEXT: imulq %r15, %rsi
-; X64-NEXT: movq %r15, %rax
-; X64-NEXT: mulq %r8
+; X64-NEXT: movq 32(%rcx), %r11
+; X64-NEXT: imulq %r11, %rdi
+; X64-NEXT: movq %r11, %rax
+; X64-NEXT: mulq %rsi
; X64-NEXT: movq %rax, %r9
-; X64-NEXT: addq %rsi, %rdx
-; X64-NEXT: movq 40(%rcx), %rsi
-; X64-NEXT: imulq %rsi, %r8
-; X64-NEXT: addq %rdx, %r8
+; X64-NEXT: movq 40(%rcx), %r8
+; X64-NEXT: imulq %r8, %rsi
+; X64-NEXT: addq %rdi, %rsi
+; X64-NEXT: addq %rdx, %rsi
; X64-NEXT: movq 48(%rcx), %rax
-; X64-NEXT: movq %rcx, %r11
-; X64-NEXT: movq %rax, %rdi
-; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload
-; X64-NEXT: imulq %r14, %rdi
-; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload
-; X64-NEXT: mulq %rbx
+; X64-NEXT: movq %rcx, %rdx
; X64-NEXT: movq %rax, %rcx
-; X64-NEXT: addq %rdi, %rdx
-; X64-NEXT: movq 56(%r11), %r11
-; X64-NEXT: imulq %rbx, %r11
-; X64-NEXT: addq %rdx, %r11
+; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload
+; X64-NEXT: imulq %r10, %rcx
+; X64-NEXT: movq 56(%rdx), %rbp
+; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
+; X64-NEXT: imulq %rdi, %rbp
+; X64-NEXT: addq %rcx, %rbp
+; X64-NEXT: mulq %rdi
+; X64-NEXT: movq %rax, %rcx
+; X64-NEXT: addq %rdx, %rbp
; X64-NEXT: addq %r9, %rcx
-; X64-NEXT: adcq %r8, %r11
-; X64-NEXT: movq %rbx, %rax
-; X64-NEXT: movq %rbx, %r8
-; X64-NEXT: mulq %r15
+; X64-NEXT: adcq %rsi, %rbp
+; X64-NEXT: movq %rdi, %rax
+; X64-NEXT: movq %rdi, %rsi
+; X64-NEXT: mulq %r11
; X64-NEXT: movq %rdx, %rdi
; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT: movq %r14, %rax
-; X64-NEXT: mulq %r15
+; X64-NEXT: movq %r10, %rax
+; X64-NEXT: mulq %r11
; X64-NEXT: movq %rdx, %r9
-; X64-NEXT: movq %rax, %rbx
-; X64-NEXT: addq %rdi, %rbx
+; X64-NEXT: movq %rax, %r11
+; X64-NEXT: addq %rdi, %r11
; X64-NEXT: adcq $0, %r9
-; X64-NEXT: movq %r8, %rax
-; X64-NEXT: mulq %rsi
+; X64-NEXT: movq %rsi, %rax
+; X64-NEXT: mulq %r8
; X64-NEXT: movq %rdx, %r15
-; X64-NEXT: movq %rax, %r13
-; X64-NEXT: addq %rbx, %r13
+; X64-NEXT: movq %rax, %rbx
+; X64-NEXT: addq %r11, %rbx
; X64-NEXT: adcq %r9, %r15
-; X64-NEXT: setb %dil
-; X64-NEXT: movq %r14, %rax
-; X64-NEXT: mulq %rsi
+; X64-NEXT: setb %sil
+; X64-NEXT: movq %r10, %rax
+; X64-NEXT: mulq %r8
; X64-NEXT: movq %rdx, %r12
; X64-NEXT: movq %rax, %r8
; X64-NEXT: addq %r15, %r8
-; X64-NEXT: movzbl %dil, %eax
+; X64-NEXT: movzbl %sil, %eax
; X64-NEXT: adcq %rax, %r12
; X64-NEXT: addq %rcx, %r8
-; X64-NEXT: adcq %r11, %r12
+; X64-NEXT: adcq %rbp, %r12
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
; X64-NEXT: movq 56(%rcx), %rsi
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
; X64-NEXT: imulq %rax, %rsi
; X64-NEXT: movq 48(%rcx), %r11
; X64-NEXT: movq %rcx, %rdi
-; X64-NEXT: movq %rax, %rbx
+; X64-NEXT: movq %rax, %rbp
; X64-NEXT: mulq %r11
-; X64-NEXT: movq %rax, %rcx
-; X64-NEXT: addq %rsi, %rdx
-; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload
-; X64-NEXT: imulq %r14, %r11
+; X64-NEXT: movq %rax, %r10
+; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT: imulq %rcx, %r11
+; X64-NEXT: addq %rsi, %r11
; X64-NEXT: addq %rdx, %r11
; X64-NEXT: movq 32(%rdi), %r9
-; X64-NEXT: movq 40(%rdi), %r15
+; X64-NEXT: movq 40(%rdi), %rdi
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
-; X64-NEXT: movq %rax, %rsi
-; X64-NEXT: imulq %r15, %rsi
+; X64-NEXT: movq %rax, %rdx
+; X64-NEXT: imulq %rdi, %rdx
+; X64-NEXT: imulq %r9, %r13
+; X64-NEXT: addq %rdx, %r13
; X64-NEXT: mulq %r9
-; X64-NEXT: movq %rax, %rdi
-; X64-NEXT: addq %rsi, %rdx
-; X64-NEXT: imulq %r9, %r10
-; X64-NEXT: addq %rdx, %r10
-; X64-NEXT: addq %rcx, %rdi
-; X64-NEXT: adcq %r11, %r10
+; X64-NEXT: movq %rax, %r15
+; X64-NEXT: addq %rdx, %r13
+; X64-NEXT: addq %r10, %r15
+; X64-NEXT: adcq %r11, %r13
; X64-NEXT: movq %r9, %rax
-; X64-NEXT: mulq %rbx
-; X64-NEXT: movq %rdx, %rsi
-; X64-NEXT: movq %rax, %rcx
-; X64-NEXT: movq %r15, %rax
-; X64-NEXT: mulq %rbx
+; X64-NEXT: mulq %rbp
+; X64-NEXT: movq %rdx, %r10
+; X64-NEXT: movq %rax, %rsi
+; X64-NEXT: movq %rdi, %rax
+; X64-NEXT: mulq %rbp
; X64-NEXT: movq %rdx, %rbp
-; X64-NEXT: movq %rax, %rbx
-; X64-NEXT: addq %rsi, %rbx
+; X64-NEXT: movq %rax, %r11
+; X64-NEXT: addq %r10, %r11
; X64-NEXT: adcq $0, %rbp
; X64-NEXT: movq %r9, %rax
-; X64-NEXT: mulq %r14
-; X64-NEXT: movq %rdx, %rsi
-; X64-NEXT: movq %rax, %r9
-; X64-NEXT: addq %rbx, %r9
-; X64-NEXT: adcq %rbp, %rsi
-; X64-NEXT: setb %bl
-; X64-NEXT: movq %r15, %rax
-; X64-NEXT: mulq %r14
-; X64-NEXT: addq %rsi, %rax
-; X64-NEXT: movzbl %bl, %esi
-; X64-NEXT: adcq %rsi, %rdx
-; X64-NEXT: addq %rdi, %rax
-; X64-NEXT: adcq %r10, %rdx
-; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload
-; X64-NEXT: adcq %r13, %r9
+; X64-NEXT: mulq %rcx
+; X64-NEXT: movq %rcx, %r10
+; X64-NEXT: movq %rdx, %r9
+; X64-NEXT: movq %rax, %rcx
+; X64-NEXT: addq %r11, %rcx
+; X64-NEXT: adcq %rbp, %r9
+; X64-NEXT: setb %r11b
+; X64-NEXT: movq %rdi, %rax
+; X64-NEXT: mulq %r10
+; X64-NEXT: addq %r9, %rax
+; X64-NEXT: movzbl %r11b, %edi
+; X64-NEXT: adcq %rdi, %rdx
+; X64-NEXT: addq %r15, %rax
+; X64-NEXT: adcq %r13, %rdx
+; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload
+; X64-NEXT: adcq %rbx, %rcx
; X64-NEXT: adcq %r8, %rax
; X64-NEXT: adcq %r12, %rdx
-; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload
-; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Folded Reload
-; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
+; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload
+; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload
+; X64-NEXT: adcq %r14, %rax
; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Folded Reload
-; X64-NEXT: movq (%rsp), %rsi # 8-byte Reload
-; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
-; X64-NEXT: movq %rdi, (%rsi)
-; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
-; X64-NEXT: movq %rdi, 8(%rsi)
-; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
-; X64-NEXT: movq %rdi, 16(%rsi)
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
-; X64-NEXT: movq %rdi, 24(%rsi)
-; X64-NEXT: movq %rcx, 32(%rsi)
-; X64-NEXT: movq %r9, 40(%rsi)
-; X64-NEXT: movq %rax, 48(%rsi)
-; X64-NEXT: movq %rdx, 56(%rsi)
-; X64-NEXT: addq $8, %rsp
+; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; X64-NEXT: movq %r8, (%rdi)
+; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; X64-NEXT: movq %r8, 8(%rdi)
+; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; X64-NEXT: movq %r8, 16(%rdi)
+; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; X64-NEXT: movq %r8, 24(%rdi)
+; X64-NEXT: movq %rsi, 32(%rdi)
+; X64-NEXT: movq %rcx, 40(%rdi)
+; X64-NEXT: movq %rax, 48(%rdi)
+; X64-NEXT: movq %rdx, 56(%rdi)
; X64-NEXT: popq %rbx
; X64-NEXT: popq %r12
; X64-NEXT: popq %r13
diff --git a/llvm/test/CodeGen/X86/mul128.ll b/llvm/test/CodeGen/X86/mul128.ll
index 1edd1fa4373c8..0bc23af920eed 100644
--- a/llvm/test/CodeGen/X86/mul128.ll
+++ b/llvm/test/CodeGen/X86/mul128.ll
@@ -9,8 +9,8 @@ define i128 @foo(i128 %t, i128 %u) {
; X64-NEXT: movq %rdi, %rax
; X64-NEXT: imulq %rdi, %rcx
; X64-NEXT: mulq %rdx
-; X64-NEXT: addq %rcx, %rdx
; X64-NEXT: imulq %rsi, %r8
+; X64-NEXT: addq %rcx, %r8
; X64-NEXT: addq %r8, %rdx
; X64-NEXT: retq
;
@@ -30,57 +30,56 @@ define i128 @foo(i128 %t, i128 %u) {
; X86-NEXT: .cfi_offset %edi, -16
; X86-NEXT: .cfi_offset %ebx, -12
; X86-NEXT: .cfi_offset %ebp, -8
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: imull %ecx, %ebp
-; X86-NEXT: movl %esi, %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: imull %ecx, %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: imull %ebp, %esi
+; X86-NEXT: addl %eax, %esi
+; X86-NEXT: movl %ebp, %eax
; X86-NEXT: mull %ecx
; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: addl %edx, %esi
+; X86-NEXT: movl %edi, %eax
+; X86-NEXT: movl %edi, %edx
+; X86-NEXT: imull {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: imull %esi, %edi
-; X86-NEXT: addl %edx, %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: addl %ebp, %edi
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: imull %ebp, %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: mull %esi
-; X86-NEXT: addl %ecx, %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: imull %esi, %ecx
+; X86-NEXT: imull %edi, %ecx
+; X86-NEXT: addl %edx, %ecx
+; X86-NEXT: mull %edi
; X86-NEXT: addl %edx, %ecx
; X86-NEXT: addl %ebx, %eax
; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
-; X86-NEXT: adcl %edi, %ecx
-; X86-NEXT: movl %esi, %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: mull %esi
-; X86-NEXT: movl %edx, %edi
+; X86-NEXT: adcl %esi, %ecx
+; X86-NEXT: movl %edi, %eax
+; X86-NEXT: mull %ebp
+; X86-NEXT: movl %edx, %esi
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl %ebp, %eax
-; X86-NEXT: mull %esi
-; X86-NEXT: movl %edx, %ebx
-; X86-NEXT: movl %eax, %esi
-; X86-NEXT: addl %edi, %esi
-; X86-NEXT: adcl $0, %ebx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: mull {{[0-9]+}}(%esp)
-; X86-NEXT: movl %edx, %edi
+; X86-NEXT: mull %ebp
+; X86-NEXT: movl %edx, %ebx
; X86-NEXT: movl %eax, %ebp
; X86-NEXT: addl %esi, %ebp
-; X86-NEXT: adcl %ebx, %edi
+; X86-NEXT: adcl $0, %ebx
+; X86-NEXT: movl %edi, %eax
+; X86-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, %esi
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: addl %ebp, %edi
+; X86-NEXT: adcl %ebx, %esi
; X86-NEXT: setb %bl
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: mull {{[0-9]+}}(%esp)
-; X86-NEXT: addl %edi, %eax
+; X86-NEXT: addl %esi, %eax
; X86-NEXT: movzbl %bl, %esi
; X86-NEXT: adcl %esi, %edx
; X86-NEXT: addl (%esp), %eax # 4-byte Folded Reload
; X86-NEXT: adcl %ecx, %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl %ebp, 4(%ecx)
+; X86-NEXT: movl %edi, 4(%ecx)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X86-NEXT: movl %esi, (%ecx)
; X86-NEXT: movl %eax, 8(%ecx)
diff --git a/llvm/test/CodeGen/X86/mul64.ll b/llvm/test/CodeGen/X86/mul64.ll
index 1feed4b207a23..d2afb2c529e42 100644
--- a/llvm/test/CodeGen/X86/mul64.ll
+++ b/llvm/test/CodeGen/X86/mul64.ll
@@ -11,8 +11,8 @@ define i64 @foo(i64 %t, i64 %u) nounwind {
; X32-NEXT: movl %ecx, %eax
; X32-NEXT: mull %esi
; X32-NEXT: imull {{[0-9]+}}(%esp), %ecx
-; X32-NEXT: addl %ecx, %edx
; X32-NEXT: imull {{[0-9]+}}(%esp), %esi
+; X32-NEXT: addl %ecx, %esi
; X32-NEXT: addl %esi, %edx
; X32-NEXT: popl %esi
; X32-NEXT: retl
diff --git a/llvm/test/CodeGen/X86/muloti.ll b/llvm/test/CodeGen/X86/muloti.ll
index 9a6cf0b065662..60a2e21dcd03d 100644
--- a/llvm/test/CodeGen/X86/muloti.ll
+++ b/llvm/test/CodeGen/X86/muloti.ll
@@ -22,8 +22,8 @@ define %0 @x(i64 %a.coerce0, i64 %a.coerce1, i64 %b.coerce0, i64 %b.coerce1) nou
; CHECK-NEXT: movq %rdx, %rax
; CHECK-NEXT: mulq %rbx
; CHECK-NEXT: movq %rax, %r8
-; CHECK-NEXT: addq %rdi, %rdx
; CHECK-NEXT: imulq %rcx, %rbx
+; CHECK-NEXT: addq %rdi, %rbx
; CHECK-NEXT: addq %rdx, %rbx
; CHECK-NEXT: movq %rcx, %rdi
; CHECK-NEXT: sarq $63, %rdi
@@ -32,8 +32,8 @@ define %0 @x(i64 %a.coerce0, i64 %a.coerce1, i64 %b.coerce0, i64 %b.coerce1) nou
; CHECK-NEXT: movq %rdi, %rax
; CHECK-NEXT: mulq %r9
; CHECK-NEXT: movq %rax, %r10
-; CHECK-NEXT: addq %r14, %rdx
; CHECK-NEXT: imulq %r9, %rdi
+; CHECK-NEXT: addq %r14, %rdi
; CHECK-NEXT: addq %rdx, %rdi
; CHECK-NEXT: addq %r8, %r10
; CHECK-NEXT: adcq %rbx, %rdi
diff --git a/llvm/test/CodeGen/X86/popcnt.ll b/llvm/test/CodeGen/X86/popcnt.ll
index 43110af64c77e..fb222cd2b2e10 100644
--- a/llvm/test/CodeGen/X86/popcnt.ll
+++ b/llvm/test/CodeGen/X86/popcnt.ll
@@ -352,23 +352,23 @@ define i128 @cnt128(i128 %x) nounwind readnone {
; X86-NOSSE-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F
; X86-NOSSE-NEXT: imull $16843009, %edi, %edx # imm = 0x1010101
; X86-NOSSE-NEXT: shrl $24, %edx
-; X86-NOSSE-NEXT: movl %ecx, %edi
-; X86-NOSSE-NEXT: shrl %edi
-; X86-NOSSE-NEXT: andl $1431655765, %edi # imm = 0x55555555
-; X86-NOSSE-NEXT: subl %edi, %ecx
-; X86-NOSSE-NEXT: movl %ecx, %edi
-; X86-NOSSE-NEXT: andl $858993459, %edi # imm = 0x33333333
+; X86-NOSSE-NEXT: addl %esi, %edx
+; X86-NOSSE-NEXT: movl %ecx, %esi
+; X86-NOSSE-NEXT: shrl %esi
+; X86-NOSSE-NEXT: andl $1431655765, %esi # imm = 0x55555555
+; X86-NOSSE-NEXT: subl %esi, %ecx
+; X86-NOSSE-NEXT: movl %ecx, %esi
+; X86-NOSSE-NEXT: andl $858993459, %esi # imm = 0x33333333
; X86-NOSSE-NEXT: shrl $2, %ecx
; X86-NOSSE-NEXT: andl $858993459, %ecx # imm = 0x33333333
-; X86-NOSSE-NEXT: addl %edi, %ecx
-; X86-NOSSE-NEXT: movl %ecx, %edi
-; X86-NOSSE-NEXT: shrl $4, %edi
-; X86-NOSSE-NEXT: addl %ecx, %edi
-; X86-NOSSE-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F
-; X86-NOSSE-NEXT: imull $16843009, %edi, %ecx # imm = 0x1010101
+; X86-NOSSE-NEXT: addl %esi, %ecx
+; X86-NOSSE-NEXT: movl %ecx, %esi
+; X86-NOSSE-NEXT: shrl $4, %esi
+; X86-NOSSE-NEXT: addl %ecx, %esi
+; X86-NOSSE-NEXT: andl $252645135, %esi # imm = 0xF0F0F0F
+; X86-NOSSE-NEXT: imull $16843009, %esi, %ecx # imm = 0x1010101
; X86-NOSSE-NEXT: shrl $24, %ecx
; X86-NOSSE-NEXT: addl %edx, %ecx
-; X86-NOSSE-NEXT: addl %esi, %ecx
; X86-NOSSE-NEXT: movl %ecx, (%eax)
; X86-NOSSE-NEXT: movl $0, 12(%eax)
; X86-NOSSE-NEXT: movl $0, 8(%eax)
@@ -420,20 +420,18 @@ define i128 @cnt128(i128 %x) nounwind readnone {
;
; X86-POPCNT-LABEL: cnt128:
; X86-POPCNT: # %bb.0:
-; X86-POPCNT-NEXT: pushl %esi
; X86-POPCNT-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx
; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %edx
; X86-POPCNT-NEXT: addl %ecx, %edx
; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx
-; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %esi
-; X86-POPCNT-NEXT: addl %ecx, %esi
-; X86-POPCNT-NEXT: addl %edx, %esi
-; X86-POPCNT-NEXT: movl %esi, (%eax)
+; X86-POPCNT-NEXT: addl %edx, %ecx
+; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %edx
+; X86-POPCNT-NEXT: addl %ecx, %edx
+; X86-POPCNT-NEXT: movl %edx, (%eax)
; X86-POPCNT-NEXT: movl $0, 12(%eax)
; X86-POPCNT-NEXT: movl $0, 8(%eax)
; X86-POPCNT-NEXT: movl $0, 4(%eax)
-; X86-POPCNT-NEXT: popl %esi
; X86-POPCNT-NEXT: retl $4
;
; X64-POPCNT-LABEL: cnt128:
@@ -800,83 +798,80 @@ define i128 @cnt128_optsize(i128 %x) nounwind readnone optsize {
; X86-NOSSE-NEXT: pushl %esi
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NOSSE-NEXT: movl %ebx, %ecx
-; X86-NOSSE-NEXT: shrl %ecx
-; X86-NOSSE-NEXT: movl $1431655765, %edi # imm = 0x55555555
-; X86-NOSSE-NEXT: andl %edi, %ecx
-; X86-NOSSE-NEXT: movl $1431655765, %edi # imm = 0x55555555
-; X86-NOSSE-NEXT: subl %ecx, %ebx
-; X86-NOSSE-NEXT: movl $858993459, %ecx # imm = 0x33333333
-; X86-NOSSE-NEXT: movl %ebx, %ebp
-; X86-NOSSE-NEXT: andl %ecx, %ebp
+; X86-NOSSE-NEXT: movl %ebx, %eax
+; X86-NOSSE-NEXT: shrl %eax
+; X86-NOSSE-NEXT: movl $1431655765, %ecx # imm = 0x55555555
+; X86-NOSSE-NEXT: andl %ecx, %eax
+; X86-NOSSE-NEXT: subl %eax, %ebx
+; X86-NOSSE-NEXT: movl $858993459, %eax # imm = 0x33333333
+; X86-NOSSE-NEXT: movl %ebx, %edi
+; X86-NOSSE-NEXT: andl %eax, %edi
; X86-NOSSE-NEXT: shrl $2, %ebx
-; X86-NOSSE-NEXT: andl %ecx, %ebx
-; X86-NOSSE-NEXT: addl %ebp, %ebx
-; X86-NOSSE-NEXT: movl %ebx, %ebp
-; X86-NOSSE-NEXT: shrl $4, %ebp
-; X86-NOSSE-NEXT: addl %ebx, %ebp
-; X86-NOSSE-NEXT: movl %eax, %ebx
+; X86-NOSSE-NEXT: andl %eax, %ebx
+; X86-NOSSE-NEXT: addl %edi, %ebx
+; X86-NOSSE-NEXT: movl %ebx, %edi
+; X86-NOSSE-NEXT: shrl $4, %edi
+; X86-NOSSE-NEXT: addl %ebx, %edi
+; X86-NOSSE-NEXT: movl %esi, %ebx
; X86-NOSSE-NEXT: shrl %ebx
-; X86-NOSSE-NEXT: andl %edi, %ebx
-; X86-NOSSE-NEXT: subl %ebx, %eax
-; X86-NOSSE-NEXT: movl %eax, %ebx
; X86-NOSSE-NEXT: andl %ecx, %ebx
-; X86-NOSSE-NEXT: shrl $2, %eax
-; X86-NOSSE-NEXT: andl %ecx, %eax
-; X86-NOSSE-NEXT: addl %ebx, %eax
-; X86-NOSSE-NEXT: movl %eax, %edi
-; X86-NOSSE-NEXT: shrl $4, %edi
-; X86-NOSSE-NEXT: addl %eax, %edi
-; X86-NOSSE-NEXT: movl $252645135, %ebx # imm = 0xF0F0F0F
-; X86-NOSSE-NEXT: andl %ebx, %ebp
-; X86-NOSSE-NEXT: imull $16843009, %ebp, %eax # imm = 0x1010101
-; X86-NOSSE-NEXT: shrl $24, %eax
-; X86-NOSSE-NEXT: andl %ebx, %edi
-; X86-NOSSE-NEXT: imull $16843009, %edi, %edi # imm = 0x1010101
-; X86-NOSSE-NEXT: shrl $24, %edi
-; X86-NOSSE-NEXT: addl %eax, %edi
-; X86-NOSSE-NEXT: movl %esi, %eax
-; X86-NOSSE-NEXT: shrl %eax
-; X86-NOSSE-NEXT: movl $1431655765, %ebp # imm = 0x55555555
-; X86-NOSSE-NEXT: andl %ebp, %eax
-; X86-NOSSE-NEXT: subl %eax, %esi
-; X86-NOSSE-NEXT: movl %esi, %eax
-; X86-NOSSE-NEXT: andl %ecx, %eax
+; X86-NOSSE-NEXT: subl %ebx, %esi
+; X86-NOSSE-NEXT: movl %esi, %ebx
+; X86-NOSSE-NEXT: andl %eax, %ebx
; X86-NOSSE-NEXT: shrl $2, %esi
-; X86-NOSSE-NEXT: andl %ecx, %esi
-; X86-NOSSE-NEXT: addl %eax, %esi
-; X86-NOSSE-NEXT: movl %esi, %ebp
-; X86-NOSSE-NEXT: shrl $4, %ebp
-; X86-NOSSE-NEXT: addl %esi, %ebp
-; X86-NOSSE-NEXT: movl %edx, %eax
-; X86-NOSSE-NEXT: shrl %eax
-; X86-NOSSE-NEXT: movl $1431655765, %esi # imm = 0x55555555
-; X86-NOSSE-NEXT: andl %esi, %eax
-; X86-NOSSE-NEXT: subl %eax, %edx
-; X86-NOSSE-NEXT: movl %edx, %eax
-; X86-NOSSE-NEXT: andl %ecx, %eax
+; X86-NOSSE-NEXT: andl %eax, %esi
+; X86-NOSSE-NEXT: addl %ebx, %esi
+; X86-NOSSE-NEXT: movl %esi, %ebx
+; X86-NOSSE-NEXT: shrl $4, %ebx
+; X86-NOSSE-NEXT: addl %esi, %ebx
+; X86-NOSSE-NEXT: movl $252645135, %esi # imm = 0xF0F0F0F
+; X86-NOSSE-NEXT: andl %esi, %edi
+; X86-NOSSE-NEXT: imull $16843009, %edi, %ebp # imm = 0x1010101
+; X86-NOSSE-NEXT: shrl $24, %ebp
+; X86-NOSSE-NEXT: andl %esi, %ebx
+; X86-NOSSE-NEXT: imull $16843009, %ebx, %edi # imm = 0x1010101
+; X86-NOSSE-NEXT: shrl $24, %edi
+; X86-NOSSE-NEXT: addl %ebp, %edi
+; X86-NOSSE-NEXT: movl %edx, %ebx
+; X86-NOSSE-NEXT: shrl %ebx
+; X86-NOSSE-NEXT: andl %ecx, %ebx
+; X86-NOSSE-NEXT: subl %ebx, %edx
+; X86-NOSSE-NEXT: movl %edx, %ebx
+; X86-NOSSE-NEXT: andl %eax, %ebx
; X86-NOSSE-NEXT: shrl $2, %edx
-; X86-NOSSE-NEXT: andl %ecx, %edx
-; X86-NOSSE-NEXT: addl %eax, %edx
-; X86-NOSSE-NEXT: movl %edx, %eax
-; X86-NOSSE-NEXT: shrl $4, %eax
-; X86-NOSSE-NEXT: addl %edx, %eax
-; X86-NOSSE-NEXT: andl %ebx, %ebp
-; X86-NOSSE-NEXT: andl %ebx, %eax
-; X86-NOSSE-NEXT: imull $16843009, %ebp, %ecx # imm = 0x1010101
-; X86-NOSSE-NEXT: shrl $24, %ecx
-; X86-NOSSE-NEXT: imull $16843009, %eax, %edx # imm = 0x1010101
+; X86-NOSSE-NEXT: andl %eax, %edx
+; X86-NOSSE-NEXT: addl %ebx, %edx
+; X86-NOSSE-NEXT: movl %edx, %ebp
+; X86-NOSSE-NEXT: shrl $4, %ebp
+; X86-NOSSE-NEXT: addl %edx, %ebp
+; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NOSSE-NEXT: andl %esi, %ebp
+; X86-NOSSE-NEXT: imull $16843009, %ebp, %edx # imm = 0x1010101
; X86-NOSSE-NEXT: shrl $24, %edx
-; X86-NOSSE-NEXT: addl %ecx, %edx
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOSSE-NEXT: addl %edi, %edx
-; X86-NOSSE-NEXT: xorl %ecx, %ecx
-; X86-NOSSE-NEXT: movl %ecx, 12(%eax)
-; X86-NOSSE-NEXT: movl %ecx, 8(%eax)
-; X86-NOSSE-NEXT: movl %ecx, 4(%eax)
-; X86-NOSSE-NEXT: movl %edx, (%eax)
+; X86-NOSSE-NEXT: movl %ebx, %edi
+; X86-NOSSE-NEXT: shrl %edi
+; X86-NOSSE-NEXT: andl %ecx, %edi
+; X86-NOSSE-NEXT: subl %edi, %ebx
+; X86-NOSSE-NEXT: movl %ebx, %ecx
+; X86-NOSSE-NEXT: andl %eax, %ecx
+; X86-NOSSE-NEXT: shrl $2, %ebx
+; X86-NOSSE-NEXT: andl %eax, %ebx
+; X86-NOSSE-NEXT: addl %ecx, %ebx
+; X86-NOSSE-NEXT: movl %ebx, %ecx
+; X86-NOSSE-NEXT: shrl $4, %ecx
+; X86-NOSSE-NEXT: addl %ebx, %ecx
+; X86-NOSSE-NEXT: andl %esi, %ecx
+; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NOSSE-NEXT: imull $16843009, %ecx, %ecx # imm = 0x1010101
+; X86-NOSSE-NEXT: shrl $24, %ecx
+; X86-NOSSE-NEXT: addl %edx, %ecx
+; X86-NOSSE-NEXT: xorl %edx, %edx
+; X86-NOSSE-NEXT: movl %edx, 12(%eax)
+; X86-NOSSE-NEXT: movl %edx, 8(%eax)
+; X86-NOSSE-NEXT: movl %edx, 4(%eax)
+; X86-NOSSE-NEXT: movl %ecx, (%eax)
; X86-NOSSE-NEXT: popl %esi
; X86-NOSSE-NEXT: popl %edi
; X86-NOSSE-NEXT: popl %ebx
@@ -925,21 +920,19 @@ define i128 @cnt128_optsize(i128 %x) nounwind readnone optsize {
;
; X86-POPCNT-LABEL: cnt128_optsize:
; X86-POPCNT: # %bb.0:
-; X86-POPCNT-NEXT: pushl %esi
; X86-POPCNT-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx
; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %edx
; X86-POPCNT-NEXT: addl %ecx, %edx
; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx
-; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %esi
-; X86-POPCNT-NEXT: addl %ecx, %esi
-; X86-POPCNT-NEXT: addl %edx, %esi
+; X86-POPCNT-NEXT: addl %edx, %ecx
+; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %edx
+; X86-POPCNT-NEXT: addl %ecx, %edx
; X86-POPCNT-NEXT: xorl %ecx, %ecx
; X86-POPCNT-NEXT: movl %ecx, 12(%eax)
; X86-POPCNT-NEXT: movl %ecx, 8(%eax)
; X86-POPCNT-NEXT: movl %ecx, 4(%eax)
-; X86-POPCNT-NEXT: movl %esi, (%eax)
-; X86-POPCNT-NEXT: popl %esi
+; X86-POPCNT-NEXT: movl %edx, (%eax)
; X86-POPCNT-NEXT: retl $4
;
; X64-POPCNT-LABEL: cnt128_optsize:
@@ -1230,83 +1223,80 @@ define i128 @cnt128_pgso(i128 %x) nounwind readnone !prof !14 {
; X86-NOSSE-NEXT: pushl %esi
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NOSSE-NEXT: movl %ebx, %ecx
-; X86-NOSSE-NEXT: shrl %ecx
-; X86-NOSSE-NEXT: movl $1431655765, %edi # imm = 0x55555555
-; X86-NOSSE-NEXT: andl %edi, %ecx
-; X86-NOSSE-NEXT: movl $1431655765, %edi # imm = 0x55555555
-; X86-NOSSE-NEXT: subl %ecx, %ebx
-; X86-NOSSE-NEXT: movl $858993459, %ecx # imm = 0x33333333
-; X86-NOSSE-NEXT: movl %ebx, %ebp
-; X86-NOSSE-NEXT: andl %ecx, %ebp
+; X86-NOSSE-NEXT: movl %ebx, %eax
+; X86-NOSSE-NEXT: shrl %eax
+; X86-NOSSE-NEXT: movl $1431655765, %ecx # imm = 0x55555555
+; X86-NOSSE-NEXT: andl %ecx, %eax
+; X86-NOSSE-NEXT: subl %eax, %ebx
+; X86-NOSSE-NEXT: movl $858993459, %eax # imm = 0x33333333
+; X86-NOSSE-NEXT: movl %ebx, %edi
+; X86-NOSSE-NEXT: andl %eax, %edi
; X86-NOSSE-NEXT: shrl $2, %ebx
-; X86-NOSSE-NEXT: andl %ecx, %ebx
-; X86-NOSSE-NEXT: addl %ebp, %ebx
-; X86-NOSSE-NEXT: movl %ebx, %ebp
-; X86-NOSSE-NEXT: shrl $4, %ebp
-; X86-NOSSE-NEXT: addl %ebx, %ebp
-; X86-NOSSE-NEXT: movl %eax, %ebx
+; X86-NOSSE-NEXT: andl %eax, %ebx
+; X86-NOSSE-NEXT: addl %edi, %ebx
+; X86-NOSSE-NEXT: movl %ebx, %edi
+; X86-NOSSE-NEXT: shrl $4, %edi
+; X86-NOSSE-NEXT: addl %ebx, %edi
+; X86-NOSSE-NEXT: movl %esi, %ebx
; X86-NOSSE-NEXT: shrl %ebx
-; X86-NOSSE-NEXT: andl %edi, %ebx
-; X86-NOSSE-NEXT: subl %ebx, %eax
-; X86-NOSSE-NEXT: movl %eax, %ebx
; X86-NOSSE-NEXT: andl %ecx, %ebx
-; X86-NOSSE-NEXT: shrl $2, %eax
-; X86-NOSSE-NEXT: andl %ecx, %eax
-; X86-NOSSE-NEXT: addl %ebx, %eax
-; X86-NOSSE-NEXT: movl %eax, %edi
-; X86-NOSSE-NEXT: shrl $4, %edi
-; X86-NOSSE-NEXT: addl %eax, %edi
-; X86-NOSSE-NEXT: movl $252645135, %ebx # imm = 0xF0F0F0F
-; X86-NOSSE-NEXT: andl %ebx, %ebp
-; X86-NOSSE-NEXT: imull $16843009, %ebp, %eax # imm = 0x1010101
-; X86-NOSSE-NEXT: shrl $24, %eax
-; X86-NOSSE-NEXT: andl %ebx, %edi
-; X86-NOSSE-NEXT: imull $16843009, %edi, %edi # imm = 0x1010101
-; X86-NOSSE-NEXT: shrl $24, %edi
-; X86-NOSSE-NEXT: addl %eax, %edi
-; X86-NOSSE-NEXT: movl %esi, %eax
-; X86-NOSSE-NEXT: shrl %eax
-; X86-NOSSE-NEXT: movl $1431655765, %ebp # imm = 0x55555555
-; X86-NOSSE-NEXT: andl %ebp, %eax
-; X86-NOSSE-NEXT: subl %eax, %esi
-; X86-NOSSE-NEXT: movl %esi, %eax
-; X86-NOSSE-NEXT: andl %ecx, %eax
+; X86-NOSSE-NEXT: subl %ebx, %esi
+; X86-NOSSE-NEXT: movl %esi, %ebx
+; X86-NOSSE-NEXT: andl %eax, %ebx
; X86-NOSSE-NEXT: shrl $2, %esi
-; X86-NOSSE-NEXT: andl %ecx, %esi
-; X86-NOSSE-NEXT: addl %eax, %esi
-; X86-NOSSE-NEXT: movl %esi, %ebp
-; X86-NOSSE-NEXT: shrl $4, %ebp
-; X86-NOSSE-NEXT: addl %esi, %ebp
-; X86-NOSSE-NEXT: movl %edx, %eax
-; X86-NOSSE-NEXT: shrl %eax
-; X86-NOSSE-NEXT: movl $1431655765, %esi # imm = 0x55555555
-; X86-NOSSE-NEXT: andl %esi, %eax
-; X86-NOSSE-NEXT: subl %eax, %edx
-; X86-NOSSE-NEXT: movl %edx, %eax
-; X86-NOSSE-NEXT: andl %ecx, %eax
+; X86-NOSSE-NEXT: andl %eax, %esi
+; X86-NOSSE-NEXT: addl %ebx, %esi
+; X86-NOSSE-NEXT: movl %esi, %ebx
+; X86-NOSSE-NEXT: shrl $4, %ebx
+; X86-NOSSE-NEXT: addl %esi, %ebx
+; X86-NOSSE-NEXT: movl $252645135, %esi # imm = 0xF0F0F0F
+; X86-NOSSE-NEXT: andl %esi, %edi
+; X86-NOSSE-NEXT: imull $16843009, %edi, %ebp # imm = 0x1010101
+; X86-NOSSE-NEXT: shrl $24, %ebp
+; X86-NOSSE-NEXT: andl %esi, %ebx
+; X86-NOSSE-NEXT: imull $16843009, %ebx, %edi # imm = 0x1010101
+; X86-NOSSE-NEXT: shrl $24, %edi
+; X86-NOSSE-NEXT: addl %ebp, %edi
+; X86-NOSSE-NEXT: movl %edx, %ebx
+; X86-NOSSE-NEXT: shrl %ebx
+; X86-NOSSE-NEXT: andl %ecx, %ebx
+; X86-NOSSE-NEXT: subl %ebx, %edx
+; X86-NOSSE-NEXT: movl %edx, %ebx
+; X86-NOSSE-NEXT: andl %eax, %ebx
; X86-NOSSE-NEXT: shrl $2, %edx
-; X86-NOSSE-NEXT: andl %ecx, %edx
-; X86-NOSSE-NEXT: addl %eax, %edx
-; X86-NOSSE-NEXT: movl %edx, %eax
-; X86-NOSSE-NEXT: shrl $4, %eax
-; X86-NOSSE-NEXT: addl %edx, %eax
-; X86-NOSSE-NEXT: andl %ebx, %ebp
-; X86-NOSSE-NEXT: andl %ebx, %eax
-; X86-NOSSE-NEXT: imull $16843009, %ebp, %ecx # imm = 0x1010101
-; X86-NOSSE-NEXT: shrl $24, %ecx
-; X86-NOSSE-NEXT: imull $16843009, %eax, %edx # imm = 0x1010101
+; X86-NOSSE-NEXT: andl %eax, %edx
+; X86-NOSSE-NEXT: addl %ebx, %edx
+; X86-NOSSE-NEXT: movl %edx, %ebp
+; X86-NOSSE-NEXT: shrl $4, %ebp
+; X86-NOSSE-NEXT: addl %edx, %ebp
+; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NOSSE-NEXT: andl %esi, %ebp
+; X86-NOSSE-NEXT: imull $16843009, %ebp, %edx # imm = 0x1010101
; X86-NOSSE-NEXT: shrl $24, %edx
-; X86-NOSSE-NEXT: addl %ecx, %edx
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOSSE-NEXT: addl %edi, %edx
-; X86-NOSSE-NEXT: xorl %ecx, %ecx
-; X86-NOSSE-NEXT: movl %ecx, 12(%eax)
-; X86-NOSSE-NEXT: movl %ecx, 8(%eax)
-; X86-NOSSE-NEXT: movl %ecx, 4(%eax)
-; X86-NOSSE-NEXT: movl %edx, (%eax)
+; X86-NOSSE-NEXT: movl %ebx, %edi
+; X86-NOSSE-NEXT: shrl %edi
+; X86-NOSSE-NEXT: andl %ecx, %edi
+; X86-NOSSE-NEXT: subl %edi, %ebx
+; X86-NOSSE-NEXT: movl %ebx, %ecx
+; X86-NOSSE-NEXT: andl %eax, %ecx
+; X86-NOSSE-NEXT: shrl $2, %ebx
+; X86-NOSSE-NEXT: andl %eax, %ebx
+; X86-NOSSE-NEXT: addl %ecx, %ebx
+; X86-NOSSE-NEXT: movl %ebx, %ecx
+; X86-NOSSE-NEXT: shrl $4, %ecx
+; X86-NOSSE-NEXT: addl %ebx, %ecx
+; X86-NOSSE-NEXT: andl %esi, %ecx
+; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NOSSE-NEXT: imull $16843009, %ecx, %ecx # imm = 0x1010101
+; X86-NOSSE-NEXT: shrl $24, %ecx
+; X86-NOSSE-NEXT: addl %edx, %ecx
+; X86-NOSSE-NEXT: xorl %edx, %edx
+; X86-NOSSE-NEXT: movl %edx, 12(%eax)
+; X86-NOSSE-NEXT: movl %edx, 8(%eax)
+; X86-NOSSE-NEXT: movl %edx, 4(%eax)
+; X86-NOSSE-NEXT: movl %ecx, (%eax)
; X86-NOSSE-NEXT: popl %esi
; X86-NOSSE-NEXT: popl %edi
; X86-NOSSE-NEXT: popl %ebx
@@ -1355,21 +1345,19 @@ define i128 @cnt128_pgso(i128 %x) nounwind readnone !prof !14 {
;
; X86-POPCNT-LABEL: cnt128_pgso:
; X86-POPCNT: # %bb.0:
-; X86-POPCNT-NEXT: pushl %esi
; X86-POPCNT-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx
; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %edx
; X86-POPCNT-NEXT: addl %ecx, %edx
; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx
-; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %esi
-; X86-POPCNT-NEXT: addl %ecx, %esi
-; X86-POPCNT-NEXT: addl %edx, %esi
+; X86-POPCNT-NEXT: addl %edx, %ecx
+; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %edx
+; X86-POPCNT-NEXT: addl %ecx, %edx
; X86-POPCNT-NEXT: xorl %ecx, %ecx
; X86-POPCNT-NEXT: movl %ecx, 12(%eax)
; X86-POPCNT-NEXT: movl %ecx, 8(%eax)
; X86-POPCNT-NEXT: movl %ecx, 4(%eax)
-; X86-POPCNT-NEXT: movl %esi, (%eax)
-; X86-POPCNT-NEXT: popl %esi
+; X86-POPCNT-NEXT: movl %edx, (%eax)
; X86-POPCNT-NEXT: retl $4
;
; X64-POPCNT-LABEL: cnt128_pgso:
diff --git a/llvm/test/CodeGen/X86/pr34080-2.ll b/llvm/test/CodeGen/X86/pr34080-2.ll
index de34bfb13159c..1705edab9e82b 100644
--- a/llvm/test/CodeGen/X86/pr34080-2.ll
+++ b/llvm/test/CodeGen/X86/pr34080-2.ll
@@ -31,10 +31,6 @@ define void @computeJD(ptr) nounwind {
; CHECK-NEXT: movl %esi, %eax
; CHECK-NEXT: imull %edx
; CHECK-NEXT: movl %edx, %edi
-; CHECK-NEXT: movl %edx, %eax
-; CHECK-NEXT: shrl $31, %eax
-; CHECK-NEXT: sarl $7, %edi
-; CHECK-NEXT: addl %eax, %edi
; CHECK-NEXT: imull $36525, %esi, %eax # imm = 0x8EAD
; CHECK-NEXT: addl $172251900, %eax # imm = 0xA445AFC
; CHECK-NEXT: movl $1374389535, %edx # imm = 0x51EB851F
@@ -43,7 +39,11 @@ define void @computeJD(ptr) nounwind {
; CHECK-NEXT: shrl $31, %eax
; CHECK-NEXT: sarl $5, %edx
; CHECK-NEXT: addl %eax, %edx
+; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: shrl $31, %eax
; CHECK-NEXT: addl 16(%ebx), %ecx
+; CHECK-NEXT: addl %eax, %ecx
+; CHECK-NEXT: sarl $7, %edi
; CHECK-NEXT: addl %edi, %ecx
; CHECK-NEXT: leal 257(%ecx,%edx), %eax
; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp)
diff --git a/llvm/test/CodeGen/X86/pr36865.ll b/llvm/test/CodeGen/X86/pr36865.ll
index 580426a069f41..35d644027e1b0 100644
--- a/llvm/test/CodeGen/X86/pr36865.ll
+++ b/llvm/test/CodeGen/X86/pr36865.ll
@@ -13,10 +13,10 @@ define void @main() {
; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
; CHECK-NEXT: movl (%rax), %ecx
; CHECK-NEXT: addl 0, %eax
-; CHECK-NEXT: addl %ecx, %eax
-; CHECK-NEXT: addl %ecx, %eax
-; CHECK-NEXT: addl {{[0-9]+}}(%rsp), %eax
-; CHECK-NEXT: movl %eax, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: addl %ecx, %ecx
+; CHECK-NEXT: addl %eax, %ecx
+; CHECK-NEXT: addl {{[0-9]+}}(%rsp), %ecx
+; CHECK-NEXT: movl %ecx, {{[0-9]+}}(%rsp)
; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
; CHECK-NEXT: movl %eax, %ecx
; CHECK-NEXT: imull %eax, %ecx
diff --git a/llvm/test/CodeGen/X86/reassociate-add.ll b/llvm/test/CodeGen/X86/reassociate-add.ll
index 402cd5166e362..76f5535a3beb1 100644
--- a/llvm/test/CodeGen/X86/reassociate-add.ll
+++ b/llvm/test/CodeGen/X86/reassociate-add.ll
@@ -12,12 +12,12 @@ define void @add8(i8 %x0, i8 %x1, i8 %x2, i8* %p) {
; CHECK: # %bb.0:
; CHECK-NEXT: orb $16, %dil
; CHECK-NEXT: orb $32, %sil
+; CHECK-NEXT: addb %dil, %sil
; CHECK-NEXT: addb $-8, %dl
; CHECK-NEXT: orb $7, %dl
; CHECK-NEXT: movzbl %dl, %eax
; CHECK-NEXT: imull $100, %eax, %eax
; CHECK-NEXT: addb %sil, %al
-; CHECK-NEXT: addb %dil, %al
; CHECK-NEXT: movb %al, (%rcx)
; CHECK-NEXT: retq
%v0 = or i8 %x0, 16
@@ -36,11 +36,11 @@ define void @add16(i16 %x0, i16 %x1, i16 %x2, i16* %p) {
; CHECK: # %bb.0:
; CHECK-NEXT: orl $16, %edi
; CHECK-NEXT: orl $32, %esi
+; CHECK-NEXT: addl %edi, %esi
; CHECK-NEXT: addl $-8, %edx
; CHECK-NEXT: orl $7, %edx
; CHECK-NEXT: imull $100, %edx, %eax
; CHECK-NEXT: addl %esi, %eax
-; CHECK-NEXT: addl %edi, %eax
; CHECK-NEXT: movw %ax, (%rcx)
; CHECK-NEXT: retq
%v0 = or i16 %x0, 16
@@ -59,11 +59,11 @@ define void @add32(i32 %x0, i32 %x1, i32 %x2, i32* %p) {
; CHECK: # %bb.0:
; CHECK-NEXT: orl $16, %edi
; CHECK-NEXT: orl $32, %esi
+; CHECK-NEXT: addl %edi, %esi
; CHECK-NEXT: addl $-8, %edx
; CHECK-NEXT: orl $7, %edx
; CHECK-NEXT: imull $100, %edx, %eax
; CHECK-NEXT: addl %esi, %eax
-; CHECK-NEXT: addl %edi, %eax
; CHECK-NEXT: movl %eax, (%rcx)
; CHECK-NEXT: retq
%v0 = or i32 %x0, 16
@@ -82,11 +82,11 @@ define void @add64(i64 %x0, i64 %x1, i64 %x2, i64* %p) {
; CHECK: # %bb.0:
; CHECK-NEXT: orq $16, %rdi
; CHECK-NEXT: orq $32, %rsi
+; CHECK-NEXT: addq %rdi, %rsi
; CHECK-NEXT: addq $-8, %rdx
; CHECK-NEXT: orq $7, %rdx
; CHECK-NEXT: imulq $100, %rdx, %rax
; CHECK-NEXT: addq %rsi, %rax
-; CHECK-NEXT: addq %rdi, %rax
; CHECK-NEXT: movq %rax, (%rcx)
; CHECK-NEXT: retq
%v0 = or i64 %x0, 16
diff --git a/llvm/test/CodeGen/X86/smul-with-overflow.ll b/llvm/test/CodeGen/X86/smul-with-overflow.ll
index 83802ce434426..83b9460c7dae3 100644
--- a/llvm/test/CodeGen/X86/smul-with-overflow.ll
+++ b/llvm/test/CodeGen/X86/smul-with-overflow.ll
@@ -212,6 +212,7 @@ define { i129, i1 } @smul_ovf(i129 %x, i129 %y) nounwind {
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: mull %ebx
; X86-NEXT: movl %ebx, %ebp
+; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: addl %eax, %ecx
@@ -233,16 +234,14 @@ define { i129, i1 } @smul_ovf(i129 %x, i129 %y) nounwind {
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: mull %ebp
-; X86-NEXT: movl %ebp, %ecx
-; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl %eax, %ebp
; X86-NEXT: movl %eax, %edi
+; X86-NEXT: movl %eax, %esi
; X86-NEXT: movl %edx, %ebx
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: addl %edx, %edi
; X86-NEXT: adcl $0, %ebx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: mull %ecx
+; X86-NEXT: mull %ebp
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: addl %eax, %edi
@@ -251,26 +250,27 @@ define { i129, i1 } @smul_ovf(i129 %x, i129 %y) nounwind {
; X86-NEXT: addl %eax, %ebx
; X86-NEXT: movzbl %cl, %eax
; X86-NEXT: adcl %edx, %eax
-; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: addl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: movl %esi, %ecx
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: addl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X86-NEXT: adcl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X86-NEXT: movl %ebx, %edx
-; X86-NEXT: adcl $0, %edx
+; X86-NEXT: movl %ebx, %ebp
+; X86-NEXT: adcl $0, %ebp
; X86-NEXT: movl %eax, %esi
-; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: movl %eax, %edx
; X86-NEXT: adcl $0, %esi
-; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X86-NEXT: setb %al
-; X86-NEXT: addl %ebp, %edx
-; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: addl %ecx, %ebp
+; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: adcl %edi, %esi
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movzbl %al, %eax
; X86-NEXT: adcl %ebx, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: adcl $0, %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: adcl $0, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl %edi, %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
@@ -294,76 +294,76 @@ define { i129, i1 } @smul_ovf(i129 %x, i129 %y) nounwind {
; X86-NEXT: setb %bl
; X86-NEXT: movl %ebp, %eax
; X86-NEXT: mull %edi
+; X86-NEXT: movl %edx, %esi
; X86-NEXT: addl %ecx, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movzbl %bl, %eax
-; X86-NEXT: adcl %eax, %edx
-; X86-NEXT: movl %edx, %ebx
+; X86-NEXT: adcl %eax, %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
; X86-NEXT: movl %ebp, %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: mull %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: mull %edi
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl %edx, %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: mull %esi
-; X86-NEXT: movl %edx, %esi
+; X86-NEXT: mull %edi
+; X86-NEXT: movl %edx, %ebx
; X86-NEXT: movl %eax, %edi
; X86-NEXT: addl %ecx, %edi
-; X86-NEXT: adcl $0, %esi
+; X86-NEXT: adcl $0, %ebx
; X86-NEXT: movl %ebp, %eax
; X86-NEXT: mull {{[0-9]+}}(%esp)
; X86-NEXT: movl %edx, %ebp
; X86-NEXT: addl %edi, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: adcl %esi, %ebp
+; X86-NEXT: adcl %ebx, %ebp
; X86-NEXT: setb %cl
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: mull {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, %esi
-; X86-NEXT: addl %ebp, %esi
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: addl %ebp, %ebx
; X86-NEXT: movzbl %cl, %eax
; X86-NEXT: adcl %eax, %edx
-; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X86-NEXT: adcl (%esp), %edx # 4-byte Folded Reload
; X86-NEXT: movl %edx, (%esp) # 4-byte Spill
; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X86-NEXT: adcl $0, %ebx
-; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: adcl $0, %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: mull %ebx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: mull %esi
; X86-NEXT: movl %edx, %edi
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: mull %ebx
+; X86-NEXT: mull %esi
; X86-NEXT: movl %edx, %ebp
-; X86-NEXT: movl %eax, %ebx
-; X86-NEXT: addl %edi, %ebx
+; X86-NEXT: movl %eax, %esi
+; X86-NEXT: addl %edi, %esi
; X86-NEXT: adcl $0, %ebp
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: mull %edi
; X86-NEXT: movl %edx, %ecx
-; X86-NEXT: addl %ebx, %eax
-; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: addl %esi, %eax
+; X86-NEXT: movl %eax, %esi
; X86-NEXT: adcl %ebp, %ecx
; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: mull %edi
-; X86-NEXT: movl %edx, %ebp
-; X86-NEXT: movl %eax, %edi
-; X86-NEXT: addl %ecx, %edi
+; X86-NEXT: movl %edx, %edi
+; X86-NEXT: movl %eax, %ebp
+; X86-NEXT: addl %ecx, %ebp
; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
-; X86-NEXT: adcl %eax, %ebp
-; X86-NEXT: addl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X86-NEXT: adcl (%esp), %ebx # 4-byte Folded Reload
-; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: adcl $0, %edi
+; X86-NEXT: adcl %eax, %edi
+; X86-NEXT: addl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: adcl (%esp), %esi # 4-byte Folded Reload
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: adcl $0, %ebp
-; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT: adcl $0, %edi
+; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X86-NEXT: setb (%esp) # 1-byte Folded Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
@@ -386,182 +386,176 @@ define { i129, i1 } @smul_ovf(i129 %x, i129 %y) nounwind {
; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, %esi
; X86-NEXT: movl %eax, %ebx
; X86-NEXT: addl %ecx, %ebx
; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
-; X86-NEXT: adcl %eax, %edx
+; X86-NEXT: adcl %eax, %esi
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT: addl %edi, %ecx
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X86-NEXT: adcl %ebp, %esi
+; X86-NEXT: addl %ebp, %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: adcl %edi, %edx
; X86-NEXT: movzbl (%esp), %eax # 1-byte Folded Reload
; X86-NEXT: adcl %eax, %ebx
-; X86-NEXT: adcl $0, %edx
+; X86-NEXT: adcl $0, %esi
; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X86-NEXT: movl %esi, %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl %ecx, %eax
; X86-NEXT: mull {{[0-9]+}}(%esp)
-; X86-NEXT: movl %edx, %ebx
+; X86-NEXT: movl %edx, %edi
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl %esi, %eax
+; X86-NEXT: movl %ecx, %eax
; X86-NEXT: mull {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, %edi
+; X86-NEXT: movl %eax, %esi
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: addl %ebx, %eax
-; X86-NEXT: movl %edx, %ecx
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: addl %edi, %eax
+; X86-NEXT: movl %edx, %ebx
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: adcl $0, %ecx
+; X86-NEXT: adcl $0, %ebx
; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: adcl %ebx, %ecx
+; X86-NEXT: adcl %edi, %ebx
; X86-NEXT: setb %al
-; X86-NEXT: addl %edi, %ecx
+; X86-NEXT: addl %esi, %ebx
; X86-NEXT: movzbl %al, %eax
; X86-NEXT: adcl %edx, %eax
; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
-; X86-NEXT: movl %esi, %eax
+; X86-NEXT: movl %ecx, %eax
; X86-NEXT: mull {{[0-9]+}}(%esp)
-; X86-NEXT: movl %edx, %edi
+; X86-NEXT: movl %edx, %esi
; X86-NEXT: movl %eax, %ebp
-; X86-NEXT: movl %esi, %eax
+; X86-NEXT: movl %ecx, %eax
; X86-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, %edi
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl %eax, %esi
+; X86-NEXT: movl %eax, %ecx
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: addl %edi, %esi
-; X86-NEXT: movl %edx, %ebx
-; X86-NEXT: adcl $0, %ebx
-; X86-NEXT: addl %ebp, %esi
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: adcl %edi, %ebx
-; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X86-NEXT: addl %eax, %ebx
-; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload
-; X86-NEXT: adcl %edx, %esi
+; X86-NEXT: addl %esi, %ecx
+; X86-NEXT: adcl $0, %edx
+; X86-NEXT: addl %ebp, %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: adcl %esi, %edx
+; X86-NEXT: setb %cl
+; X86-NEXT: addl %eax, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movzbl %cl, %esi
+; X86-NEXT: adcl %edi, %esi
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X86-NEXT: addl %ebx, %edi
+; X86-NEXT: addl %edx, %edi
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: adcl %esi, %eax
-; X86-NEXT: movl %ecx, %ebx
-; X86-NEXT: adcl $0, %ebx
+; X86-NEXT: movl %ebx, %esi
+; X86-NEXT: adcl $0, %esi
; X86-NEXT: movl (%esp), %edx # 4-byte Reload
; X86-NEXT: adcl $0, %edx
; X86-NEXT: addl %ebp, %edi
; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X86-NEXT: adcl %esi, %eax
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X86-NEXT: adcl $0, %edi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: adcl $0, %ecx
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: adcl $0, %eax
-; X86-NEXT: addl %ebx, %edi
+; X86-NEXT: addl %esi, %ecx
; X86-NEXT: adcl %edx, %eax
-; X86-NEXT: movl %eax, %edx
-; X86-NEXT: setb %al
-; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; X86-NEXT: movl %edi, %ebx
-; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X86-NEXT: movzbl %al, %edi
-; X86-NEXT: adcl %ecx, %edi
-; X86-NEXT: movl (%esp), %eax # 4-byte Reload
-; X86-NEXT: adcl $0, %eax
-; X86-NEXT: addl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X86-NEXT: adcl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT: adcl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT: adcl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: setb %dl
+; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT: movzbl %dl, %esi
+; X86-NEXT: adcl %ebx, %esi
+; X86-NEXT: movl (%esp), %ebx # 4-byte Reload
; X86-NEXT: adcl $0, %ebx
-; X86-NEXT: adcl $0, %edx
-; X86-NEXT: adcl $0, %edi
-; X86-NEXT: adcl $0, %eax
-; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: addl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: adcl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: adcl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: adcl $0, %ecx
+; X86-NEXT: adcl $0, %eax
+; X86-NEXT: adcl $0, %esi
+; X86-NEXT: movl %ebx, %edx
+; X86-NEXT: adcl $0, %edx
+; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT: movl %edx, (%esp) # 4-byte Spill
; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT: movl %eax, %edi
; X86-NEXT: movl %eax, %ebp
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: addl %edx, %ecx
-; X86-NEXT: movl %edx, %edi
-; X86-NEXT: adcl $0, %edi
-; X86-NEXT: addl %eax, %ecx
-; X86-NEXT: movl %ecx, %ebx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: adcl %edx, %edi
+; X86-NEXT: addl %edx, %ebp
+; X86-NEXT: movl %edx, %ebx
+; X86-NEXT: adcl $0, %ebx
+; X86-NEXT: addl %eax, %ebp
+; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: adcl %edx, %ebx
; X86-NEXT: setb %cl
-; X86-NEXT: addl %eax, %edi
+; X86-NEXT: addl %eax, %ebx
; X86-NEXT: movzbl %cl, %ecx
; X86-NEXT: adcl %edx, %ecx
; X86-NEXT: movl %eax, %edx
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: addl %edi, %edx
+; X86-NEXT: addl %ebx, %edx
+; X86-NEXT: adcl %ecx, %ebp
+; X86-NEXT: movl %ebx, %esi
; X86-NEXT: movl %ebx, %eax
-; X86-NEXT: adcl %ecx, %eax
-; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl %edi, %esi
-; X86-NEXT: movl %edi, %eax
+; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: adcl $0, %esi
-; X86-NEXT: movl %ecx, %edi
+; X86-NEXT: movl %ecx, %ebx
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: adcl $0, %ecx
-; X86-NEXT: addl %ebp, %edx
+; X86-NEXT: addl %edi, %edx
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
-; X86-NEXT: adcl %ebx, %ebp
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT: adcl %edi, %ebp
; X86-NEXT: movl %eax, %edx
-; X86-NEXT: movl %eax, %ebx
-; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: adcl $0, %edx
-; X86-NEXT: movl %edi, %eax
+; X86-NEXT: movl %ebx, %eax
; X86-NEXT: adcl $0, %eax
; X86-NEXT: addl %esi, %edx
; X86-NEXT: adcl %ecx, %eax
-; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT: setb %bl
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: addl %ecx, %edx
-; X86-NEXT: movl %edx, %edi
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT: movl %edx, %esi
; X86-NEXT: movl %eax, %edx
-; X86-NEXT: adcl %esi, %edx
-; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
-; X86-NEXT: adcl %ebx, %eax
+; X86-NEXT: adcl %edi, %edx
+; X86-NEXT: movzbl %bl, %eax
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X86-NEXT: movl %eax, %ebx
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: adcl $0, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X86-NEXT: adcl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: adcl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X86-NEXT: adcl (%esp), %ebp # 4-byte Folded Reload
; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
-; X86-NEXT: adcl %eax, %edi
-; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: adcl %eax, %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: adcl $0, %edx
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: adcl $0, %ebx
@@ -623,51 +617,49 @@ define { i129, i1 } @smul_ovf(i129 %x, i129 %y) nounwind {
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: adcl $0, (%esp) # 4-byte Folded Spill
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: imull %eax, %edx
-; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: imull %eax, %ecx
-; X86-NEXT: addl %edx, %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: imull %eax, %ebx
+; X86-NEXT: addl %ecx, %ebx
+; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: imull %eax, %ecx
-; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: imull %eax, %edx
-; X86-NEXT: movl %eax, %ebx
; X86-NEXT: addl %ecx, %edx
+; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: addl %esi, %eax
-; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT: adcl %ebx, %edx
; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: imull %ebx
-; X86-NEXT: addl %eax, %eax
+; X86-NEXT: imull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: addl %eax, %ebx
; X86-NEXT: adcl %edx, %edx
-; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X86-NEXT: movl %edx, %ebx
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X86-NEXT: adcl %ebx, %edx
+; X86-NEXT: adcl %ebx, %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT: adcl %edx, %ebx
; X86-NEXT: addl %edi, %esi
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: adcl %ebp, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: adcl (%esp), %edx # 4-byte Folded Reload
-; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: adcl (%esp), %ebx # 4-byte Folded Reload
+; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X86-NEXT: movl %esi, %edi
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
@@ -721,18 +713,17 @@ define { i129, i1 } @smul_ovf(i129 %x, i129 %y) nounwind {
; X86-NEXT: adcl $0, %ebx
; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: imull %edx, %ecx
-; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X86-NEXT: movl %ecx, %ebp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT: imull %edx, %ebp
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: imull %edx, %ecx
; X86-NEXT: addl %ebp, %ecx
+; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
; X86-NEXT: imull %edx, %ebp
-; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X86-NEXT: imull {{[0-9]+}}(%esp), %edx
; X86-NEXT: addl %ebp, %edx
+; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: addl %eax, %ebx
@@ -841,188 +832,190 @@ define { i129, i1 } @smul_ovf(i129 %x, i129 %y) nounwind {
; X64-NEXT: pushq %r13
; X64-NEXT: pushq %r12
; X64-NEXT: pushq %rbx
-; X64-NEXT: movq %r9, %r10
-; X64-NEXT: movq %r8, %rbp
-; X64-NEXT: movq %rcx, %r12
-; X64-NEXT: movq %rdx, %r8
-; X64-NEXT: movq %rsi, %rbx
+; X64-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT: movq %r8, %r12
+; X64-NEXT: movq %rcx, %r15
+; X64-NEXT: movq %rdx, %r14
+; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT: movq %rsi, %r8
; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx
-; X64-NEXT: andl $1, %ecx
-; X64-NEXT: negq %rcx
-; X64-NEXT: andl $1, %r12d
-; X64-NEXT: negq %r12
-; X64-NEXT: movq %r12, %rax
-; X64-NEXT: mulq %rbp
+; X64-NEXT: movq {{[0-9]+}}(%rsp), %rsi
+; X64-NEXT: andl $1, %esi
+; X64-NEXT: negq %rsi
+; X64-NEXT: andl $1, %r15d
+; X64-NEXT: negq %r15
+; X64-NEXT: movq %r15, %rax
+; X64-NEXT: mulq %r12
+; X64-NEXT: movq %rax, %rbx
+; X64-NEXT: movq %rax, %rbp
; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT: movq %rax, %r15
-; X64-NEXT: movq %rdx, %r9
+; X64-NEXT: movq %rdx, %rcx
; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT: addq %rdx, %r15
-; X64-NEXT: adcq $0, %r9
-; X64-NEXT: movq %r12, %rax
-; X64-NEXT: mulq %r10
-; X64-NEXT: movq %r10, %r14
+; X64-NEXT: addq %rdx, %rbx
+; X64-NEXT: adcq $0, %rcx
+; X64-NEXT: movq %r15, %rax
+; X64-NEXT: mulq %r9
; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT: addq %rax, %r15
-; X64-NEXT: adcq %rdx, %r9
+; X64-NEXT: addq %rax, %rbx
+; X64-NEXT: adcq %rdx, %rcx
; X64-NEXT: setb %dil
-; X64-NEXT: movzbl %dil, %r10d
-; X64-NEXT: addq %rax, %r9
-; X64-NEXT: adcq %rdx, %r10
-; X64-NEXT: movq %rsi, %rax
-; X64-NEXT: mulq %rbp
-; X64-NEXT: movq %rdx, %rsi
-; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT: movzbl %dil, %r13d
+; X64-NEXT: addq %rax, %rcx
+; X64-NEXT: adcq %rdx, %r13
; X64-NEXT: movq %r8, %rax
-; X64-NEXT: mulq %rbp
+; X64-NEXT: mulq %r12
; X64-NEXT: movq %rdx, %rdi
+; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT: movq %r14, %rax
+; X64-NEXT: mulq %r12
+; X64-NEXT: movq %rdx, %r10
; X64-NEXT: movq %rax, %r11
-; X64-NEXT: addq %rsi, %r11
-; X64-NEXT: adcq $0, %rdi
-; X64-NEXT: movq %rbx, %rax
-; X64-NEXT: movq %rbx, %r13
-; X64-NEXT: movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT: mulq %r14
-; X64-NEXT: movq %rdx, %rbx
+; X64-NEXT: addq %rdi, %r11
+; X64-NEXT: adcq $0, %r10
+; X64-NEXT: movq %r8, %rax
+; X64-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT: mulq %r9
+; X64-NEXT: movq %rdx, %rdi
; X64-NEXT: addq %r11, %rax
; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT: adcq %rdi, %rbx
-; X64-NEXT: setb %dil
-; X64-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT: movq %r8, %rax
-; X64-NEXT: mulq %r14
-; X64-NEXT: movq %rdx, %rsi
-; X64-NEXT: addq %rbx, %rax
-; X64-NEXT: movzbl %dil, %edx
-; X64-NEXT: adcq %rdx, %rsi
-; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
+; X64-NEXT: adcq %r10, %rdi
+; X64-NEXT: setb %r11b
+; X64-NEXT: movq %r14, %rax
+; X64-NEXT: mulq %r9
+; X64-NEXT: movq %rdx, %r10
+; X64-NEXT: addq %rdi, %rax
+; X64-NEXT: movzbl %r11b, %edx
+; X64-NEXT: adcq %rdx, %r10
+; X64-NEXT: addq %rbp, %rax
; X64-NEXT: movq %rax, %rdi
-; X64-NEXT: adcq %r15, %rsi
-; X64-NEXT: adcq $0, %r9
-; X64-NEXT: adcq $0, %r10
-; X64-NEXT: movq %rcx, %rax
-; X64-NEXT: mulq %r13
-; X64-NEXT: movq %rdx, %r11
-; X64-NEXT: movq %rax, %r13
-; X64-NEXT: movq %rcx, %rax
+; X64-NEXT: adcq %rbx, %r10
+; X64-NEXT: adcq $0, %rcx
+; X64-NEXT: adcq $0, %r13
+; X64-NEXT: movq %rsi, %rax
; X64-NEXT: mulq %r8
-; X64-NEXT: movq %rax, %rbx
+; X64-NEXT: movq %rdx, %r11
; X64-NEXT: movq %rax, %r8
+; X64-NEXT: movq %rsi, %rax
+; X64-NEXT: mulq %r14
+; X64-NEXT: movq %rax, %rbp
+; X64-NEXT: movq %rax, %r9
; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT: movq %r11, %rax
; X64-NEXT: movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT: addq %r11, %rbx
-; X64-NEXT: movq %rdx, %r15
+; X64-NEXT: addq %r11, %rbp
+; X64-NEXT: movq %rdx, %r11
; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT: adcq $0, %r15
-; X64-NEXT: addq %r13, %rbx
-; X64-NEXT: adcq %r11, %r15
+; X64-NEXT: adcq $0, %r11
+; X64-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT: addq %r8, %rbp
+; X64-NEXT: adcq %rax, %r11
; X64-NEXT: setb %al
-; X64-NEXT: addq %r8, %r15
-; X64-NEXT: movzbl %al, %r8d
-; X64-NEXT: adcq %rdx, %r8
-; X64-NEXT: addq %r13, %rdi
+; X64-NEXT: addq %r9, %r11
+; X64-NEXT: movzbl %al, %r14d
+; X64-NEXT: adcq %rdx, %r14
+; X64-NEXT: addq %r8, %rdi
; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT: adcq %rsi, %rbx
-; X64-NEXT: adcq $0, %r15
-; X64-NEXT: adcq $0, %r8
-; X64-NEXT: addq %r9, %r15
-; X64-NEXT: adcq %r10, %r8
-; X64-NEXT: setb %r10b
-; X64-NEXT: movq %r12, %rax
-; X64-NEXT: mulq %rcx
-; X64-NEXT: movq %rax, %r11
-; X64-NEXT: addq %rdx, %r11
-; X64-NEXT: movq %rdx, %rdi
-; X64-NEXT: adcq $0, %rdi
-; X64-NEXT: addq %rax, %r11
-; X64-NEXT: adcq %rdx, %rdi
-; X64-NEXT: setb %r9b
-; X64-NEXT: addq %rax, %rdi
-; X64-NEXT: movzbl %r9b, %esi
-; X64-NEXT: adcq %rdx, %rsi
-; X64-NEXT: addq %rax, %r15
-; X64-NEXT: adcq %r8, %r11
-; X64-NEXT: movzbl %r10b, %eax
-; X64-NEXT: adcq %rax, %rdi
-; X64-NEXT: adcq $0, %rsi
-; X64-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
-; X64-NEXT: movq %rsi, %r8
-; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload
-; X64-NEXT: addq %r14, %r8
-; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; X64-NEXT: adcq %r10, %rbp
+; X64-NEXT: adcq $0, %r11
+; X64-NEXT: adcq $0, %r14
+; X64-NEXT: addq %rcx, %r11
+; X64-NEXT: adcq %r13, %r14
+; X64-NEXT: setb %dil
+; X64-NEXT: movq %r15, %rax
+; X64-NEXT: mulq %rsi
+; X64-NEXT: movq %rax, %rbx
+; X64-NEXT: addq %rdx, %rbx
; X64-NEXT: movq %rdx, %r10
; X64-NEXT: adcq $0, %r10
+; X64-NEXT: addq %rax, %rbx
+; X64-NEXT: adcq %rdx, %r10
+; X64-NEXT: setb %cl
+; X64-NEXT: addq %rax, %r10
+; X64-NEXT: movzbl %cl, %ecx
+; X64-NEXT: adcq %rdx, %rcx
+; X64-NEXT: addq %rax, %r11
+; X64-NEXT: adcq %r14, %rbx
+; X64-NEXT: movzbl %dil, %eax
+; X64-NEXT: adcq %rax, %r10
+; X64-NEXT: adcq $0, %rcx
+; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
+; X64-NEXT: movq %rdi, %r14
+; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT: addq %rax, %r14
+; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; X64-NEXT: movq %r8, %rcx
+; X64-NEXT: adcq $0, %rcx
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload
-; X64-NEXT: addq %r9, %r8
-; X64-NEXT: adcq %r14, %r10
+; X64-NEXT: addq %r9, %r14
+; X64-NEXT: adcq %rax, %rcx
+; X64-NEXT: movq %rax, %rdx
; X64-NEXT: setb %al
-; X64-NEXT: addq %rsi, %r10
-; X64-NEXT: movzbl %al, %esi
-; X64-NEXT: adcq %rdx, %rsi
+; X64-NEXT: addq %rdi, %rcx
+; X64-NEXT: movzbl %al, %edi
+; X64-NEXT: adcq %r8, %rdi
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
-; X64-NEXT: imulq %r12, %rax
-; X64-NEXT: addq %r14, %rax
-; X64-NEXT: imulq %r12, %rbp
-; X64-NEXT: addq %rax, %rbp
-; X64-NEXT: movq %r12, %rax
-; X64-NEXT: imulq %rcx
-; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT: movq %r9, %r12
+; X64-NEXT: imulq %r15, %rax
+; X64-NEXT: imulq %r15, %r12
; X64-NEXT: addq %rax, %r12
-; X64-NEXT: adcq %rdx, %rbp
-; X64-NEXT: addq %r10, %r12
-; X64-NEXT: adcq %rsi, %rbp
-; X64-NEXT: movq %r13, %r14
-; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
-; X64-NEXT: addq %rsi, %r14
-; X64-NEXT: adcq $0, %rsi
+; X64-NEXT: addq %rdx, %r12
+; X64-NEXT: movq %r15, %rax
+; X64-NEXT: imulq %rsi
+; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT: movq %r9, %r8
+; X64-NEXT: addq %rax, %r8
+; X64-NEXT: adcq %rdx, %r12
+; X64-NEXT: addq %rcx, %r8
+; X64-NEXT: adcq %rdi, %r12
+; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload
+; X64-NEXT: movq %r13, %r15
+; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT: addq %rcx, %r15
+; X64-NEXT: adcq $0, %rcx
+; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
+; X64-NEXT: addq %rdi, %r15
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
-; X64-NEXT: addq %rdx, %r14
-; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload
-; X64-NEXT: adcq %r9, %rsi
-; X64-NEXT: setb %r10b
+; X64-NEXT: adcq %rdx, %rcx
+; X64-NEXT: setb %r9b
+; X64-NEXT: addq %rdi, %rcx
+; X64-NEXT: movzbl %r9b, %edi
+; X64-NEXT: adcq %rdx, %rdi
+; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; X64-NEXT: imulq %rsi, %rdx
+; X64-NEXT: imulq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload
; X64-NEXT: addq %rdx, %rsi
-; X64-NEXT: movzbl %r10b, %r10d
-; X64-NEXT: adcq %r9, %r10
-; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload
-; X64-NEXT: imulq %rcx, %r9
-; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Folded Reload
-; X64-NEXT: imulq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload
-; X64-NEXT: addq %r9, %rcx
+; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload
; X64-NEXT: addq %r13, %rax
-; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload
-; X64-NEXT: addq %rsi, %rax
-; X64-NEXT: adcq %r10, %rcx
+; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload
+; X64-NEXT: addq %rcx, %rax
+; X64-NEXT: adcq %rdi, %rsi
; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload
-; X64-NEXT: adcq %r8, %r14
-; X64-NEXT: adcq %r12, %rax
-; X64-NEXT: adcq %rbp, %rcx
-; X64-NEXT: addq %r15, %r13
-; X64-NEXT: adcq %r11, %r14
-; X64-NEXT: adcq %rdi, %rax
-; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload
-; X64-NEXT: movq %rbx, %rdx
-; X64-NEXT: sarq $63, %rdx
-; X64-NEXT: xorq %rdx, %rcx
-; X64-NEXT: xorq %rdx, %r14
-; X64-NEXT: orq %rcx, %r14
-; X64-NEXT: xorq %rdx, %rax
-; X64-NEXT: orq %r14, %rax
-; X64-NEXT: xorq %r13, %rdx
-; X64-NEXT: orq %rax, %rdx
+; X64-NEXT: adcq %r14, %r15
+; X64-NEXT: adcq %r8, %rax
+; X64-NEXT: adcq %r12, %rsi
+; X64-NEXT: addq %r11, %r13
+; X64-NEXT: adcq %rbx, %r15
+; X64-NEXT: adcq %r10, %rax
+; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload
+; X64-NEXT: movq %rbp, %rcx
+; X64-NEXT: sarq $63, %rcx
+; X64-NEXT: xorq %rcx, %rsi
+; X64-NEXT: xorq %rcx, %r15
+; X64-NEXT: orq %rsi, %r15
+; X64-NEXT: xorq %rcx, %rax
+; X64-NEXT: orq %r15, %rax
+; X64-NEXT: xorq %r13, %rcx
+; X64-NEXT: orq %rax, %rcx
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
; X64-NEXT: movl %eax, %esi
; X64-NEXT: andl $1, %esi
-; X64-NEXT: movq %rsi, %rcx
-; X64-NEXT: negq %rcx
-; X64-NEXT: xorq %rcx, %rbx
-; X64-NEXT: xorq %rax, %rcx
-; X64-NEXT: orq %rbx, %rcx
-; X64-NEXT: orq %rdx, %rcx
+; X64-NEXT: movq %rsi, %rdx
+; X64-NEXT: negq %rdx
+; X64-NEXT: xorq %rdx, %rbp
+; X64-NEXT: xorq %rax, %rdx
+; X64-NEXT: orq %rbp, %rdx
+; X64-NEXT: orq %rcx, %rdx
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
; X64-NEXT: movq %rcx, 8(%rax)
diff --git a/llvm/test/CodeGen/X86/smul_fix.ll b/llvm/test/CodeGen/X86/smul_fix.ll
index 8c2b945d6a8ce..fd25842180922 100644
--- a/llvm/test/CodeGen/X86/smul_fix.ll
+++ b/llvm/test/CodeGen/X86/smul_fix.ll
@@ -231,8 +231,8 @@ define i64 @func5(i64 %x, i64 %y) {
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: mull %esi
; X86-NEXT: imull {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: addl %ecx, %edx
; X86-NEXT: imull {{[0-9]+}}(%esp), %esi
+; X86-NEXT: addl %ecx, %esi
; X86-NEXT: addl %esi, %edx
; X86-NEXT: popl %esi
; X86-NEXT: .cfi_def_cfa_offset 4
diff --git a/llvm/test/CodeGen/X86/smul_fix_sat.ll b/llvm/test/CodeGen/X86/smul_fix_sat.ll
index 996601ed3be64..0532916b1e4ca 100644
--- a/llvm/test/CodeGen/X86/smul_fix_sat.ll
+++ b/llvm/test/CodeGen/X86/smul_fix_sat.ll
@@ -369,8 +369,8 @@ define i64 @func5(i64 %x, i64 %y) {
; X86-NEXT: .cfi_def_cfa_offset 16
; X86-NEXT: pushl %esi
; X86-NEXT: .cfi_def_cfa_offset 20
-; X86-NEXT: subl $8, %esp
-; X86-NEXT: .cfi_def_cfa_offset 28
+; X86-NEXT: subl $12, %esp
+; X86-NEXT: .cfi_def_cfa_offset 32
; X86-NEXT: .cfi_offset %esi, -20
; X86-NEXT: .cfi_offset %edi, -16
; X86-NEXT: .cfi_offset %ebx, -12
@@ -383,62 +383,62 @@ define i64 @func5(i64 %x, i64 %y) {
; X86-NEXT: movl %eax, %edi
; X86-NEXT: imull %ebx, %edi
; X86-NEXT: mull %ebx
-; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
-; X86-NEXT: addl %edi, %edx
-; X86-NEXT: movl %ebp, %edi
+; X86-NEXT: movl %eax, %esi
; X86-NEXT: imull %ebp, %ebx
+; X86-NEXT: addl %edi, %ebx
; X86-NEXT: addl %edx, %ebx
+; X86-NEXT: movl %ebp, %edi
; X86-NEXT: sarl $31, %edi
-; X86-NEXT: movl %edi, %ebp
-; X86-NEXT: imull %ecx, %ebp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl %edi, %edx
+; X86-NEXT: imull %ecx, %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
; X86-NEXT: movl %edi, %eax
-; X86-NEXT: mull %esi
-; X86-NEXT: addl %ebp, %edx
-; X86-NEXT: imull %esi, %edi
+; X86-NEXT: imull %ebp, %edi
+; X86-NEXT: addl %edx, %edi
+; X86-NEXT: mull %ebp
; X86-NEXT: addl %edx, %edi
-; X86-NEXT: addl (%esp), %eax # 4-byte Folded Reload
+; X86-NEXT: addl %esi, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: adcl %ebx, %edi
-; X86-NEXT: movl %esi, %eax
+; X86-NEXT: movl %ebp, %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: mull %esi
-; X86-NEXT: movl %edx, %ebp
-; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
+; X86-NEXT: movl %edx, %ebx
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: mull %esi
-; X86-NEXT: movl %edx, %ebx
-; X86-NEXT: addl %eax, %ebp
-; X86-NEXT: adcl $0, %ebx
+; X86-NEXT: movl %edx, %ebp
+; X86-NEXT: addl %eax, %ebx
+; X86-NEXT: adcl $0, %ebp
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: mull %edx
+; X86-NEXT: mull {{[0-9]+}}(%esp)
; X86-NEXT: movl %edx, %esi
-; X86-NEXT: addl %eax, %ebp
-; X86-NEXT: adcl %ebx, %esi
-; X86-NEXT: setb %bl
+; X86-NEXT: addl %eax, %ebx
+; X86-NEXT: adcl %ebp, %esi
+; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT: mull %ebp
; X86-NEXT: addl %esi, %eax
-; X86-NEXT: movzbl %bl, %esi
+; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload
; X86-NEXT: adcl %esi, %edx
; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X86-NEXT: adcl %edi, %edx
-; X86-NEXT: movl %ebp, %edi
+; X86-NEXT: movl %ebx, %edi
; X86-NEXT: sarl $31, %edi
; X86-NEXT: xorl %edi, %edx
; X86-NEXT: xorl %eax, %edi
-; X86-NEXT: xorl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: xorl %ebp, %ecx
; X86-NEXT: sarl $31, %ecx
; X86-NEXT: movl %ecx, %esi
; X86-NEXT: xorl $2147483647, %esi # imm = 0x7FFFFFFF
; X86-NEXT: orl %edx, %edi
; X86-NEXT: notl %ecx
-; X86-NEXT: cmovel (%esp), %ecx # 4-byte Folded Reload
-; X86-NEXT: cmovel %ebp, %esi
+; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT: cmovel %ebx, %esi
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: movl %esi, %edx
-; X86-NEXT: addl $8, %esp
+; X86-NEXT: addl $12, %esp
; X86-NEXT: .cfi_def_cfa_offset 20
; X86-NEXT: popl %esi
; X86-NEXT: .cfi_def_cfa_offset 16
diff --git a/llvm/test/CodeGen/X86/smulo-128-legalisation-lowering.ll b/llvm/test/CodeGen/X86/smulo-128-legalisation-lowering.ll
index 367ca660cda14..fc40c539f37c7 100644
--- a/llvm/test/CodeGen/X86/smulo-128-legalisation-lowering.ll
+++ b/llvm/test/CodeGen/X86/smulo-128-legalisation-lowering.ll
@@ -23,8 +23,8 @@ define zeroext i1 @smuloi128(i128 %v1, i128 %v2, ptr %res) {
; X64-NEXT: movq %rdx, %rax
; X64-NEXT: mulq %r14
; X64-NEXT: movq %rax, %r9
-; X64-NEXT: addq %rdi, %rdx
; X64-NEXT: imulq %rcx, %r14
+; X64-NEXT: addq %rdi, %r14
; X64-NEXT: addq %rdx, %r14
; X64-NEXT: movq %rcx, %rdi
; X64-NEXT: sarq $63, %rdi
@@ -33,8 +33,8 @@ define zeroext i1 @smuloi128(i128 %v1, i128 %v2, ptr %res) {
; X64-NEXT: movq %rdi, %rax
; X64-NEXT: mulq %r10
; X64-NEXT: movq %rax, %r11
-; X64-NEXT: addq %r15, %rdx
; X64-NEXT: imulq %r10, %rdi
+; X64-NEXT: addq %r15, %rdi
; X64-NEXT: addq %rdx, %rdi
; X64-NEXT: addq %r9, %r11
; X64-NEXT: adcq %r14, %rdi
@@ -84,8 +84,8 @@ define zeroext i1 @smuloi128(i128 %v1, i128 %v2, ptr %res) {
; X86-NEXT: .cfi_def_cfa_offset 16
; X86-NEXT: pushl %esi
; X86-NEXT: .cfi_def_cfa_offset 20
-; X86-NEXT: subl $56, %esp
-; X86-NEXT: .cfi_def_cfa_offset 76
+; X86-NEXT: subl $52, %esp
+; X86-NEXT: .cfi_def_cfa_offset 72
; X86-NEXT: .cfi_offset %esi, -20
; X86-NEXT: .cfi_offset %edi, -16
; X86-NEXT: .cfi_offset %ebx, -12
@@ -102,20 +102,19 @@ define zeroext i1 @smuloi128(i128 %v1, i128 %v2, ptr %res) {
; X86-NEXT: movl %edx, %esi
; X86-NEXT: movl %eax, %edi
; X86-NEXT: addl %ecx, %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: adcl $0, %esi
; X86-NEXT: movl %ebx, %eax
-; X86-NEXT: mull %ecx
-; X86-NEXT: movl %edx, %ebp
-; X86-NEXT: addl %edi, %eax
-; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: adcl %esi, %ebp
-; X86-NEXT: setb %bl
+; X86-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, %ebx
+; X86-NEXT: movl %eax, %ebp
+; X86-NEXT: addl %edi, %ebp
+; X86-NEXT: adcl %esi, %ebx
+; X86-NEXT: setb %cl
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: mull %ecx
-; X86-NEXT: addl %ebp, %eax
+; X86-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NEXT: addl %ebx, %eax
; X86-NEXT: movl %eax, (%esp) ## 4-byte Spill
-; X86-NEXT: movzbl %bl, %eax
+; X86-NEXT: movzbl %cl, %eax
; X86-NEXT: adcl %eax, %edx
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
@@ -127,198 +126,197 @@ define zeroext i1 @smuloi128(i128 %v1, i128 %v2, ptr %res) {
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: mull %edi
; X86-NEXT: movl %edx, %edi
-; X86-NEXT: movl %eax, %ebp
-; X86-NEXT: addl %esi, %ebp
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: addl %esi, %ebx
; X86-NEXT: adcl $0, %edi
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: mull {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl %edx, %ebx
-; X86-NEXT: addl %ebp, %eax
+; X86-NEXT: movl %edx, %esi
+; X86-NEXT: addl %ebx, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: adcl %edi, %ebx
+; X86-NEXT: adcl %edi, %esi
; X86-NEXT: setb %cl
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: movl %ebp, %eax
-; X86-NEXT: mull %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: mull %edx
; X86-NEXT: movl %eax, %edi
-; X86-NEXT: addl %ebx, %edi
+; X86-NEXT: addl %esi, %edi
; X86-NEXT: movzbl %cl, %eax
; X86-NEXT: adcl %eax, %edx
; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload
-; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Folded Reload
+; X86-NEXT: adcl %ebp, %edx
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
; X86-NEXT: adcl $0, (%esp) ## 4-byte Folded Spill
; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl %esi, %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: movl %ebx, %eax
; X86-NEXT: mull %ecx
-; X86-NEXT: movl %edx, %ebx
+; X86-NEXT: movl %edx, %ebp
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: movl %ebp, %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: mull %ecx
-; X86-NEXT: movl %edx, %ebp
+; X86-NEXT: movl %edx, %esi
; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: addl %ebx, %ecx
-; X86-NEXT: adcl $0, %ebp
-; X86-NEXT: movl %esi, %eax
+; X86-NEXT: addl %ebp, %ecx
+; X86-NEXT: adcl $0, %esi
+; X86-NEXT: movl %ebx, %eax
; X86-NEXT: mull {{[0-9]+}}(%esp)
-; X86-NEXT: movl %edx, %esi
-; X86-NEXT: movl %eax, %ebx
-; X86-NEXT: addl %ecx, %ebx
-; X86-NEXT: adcl %ebp, %esi
+; X86-NEXT: movl %edx, %ebx
+; X86-NEXT: addl %ecx, %eax
+; X86-NEXT: movl %eax, %ebp
+; X86-NEXT: adcl %esi, %ebx
; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: mull {{[0-9]+}}(%esp)
-; X86-NEXT: movl %edx, %ebp
+; X86-NEXT: movl %edx, %esi
; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: addl %esi, %ecx
+; X86-NEXT: addl %ebx, %ecx
; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 1-byte Folded Reload
-; X86-NEXT: adcl %eax, %ebp
+; X86-NEXT: adcl %eax, %esi
; X86-NEXT: addl %edi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
-; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Folded Reload
-; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Folded Reload
+; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
; X86-NEXT: adcl $0, %ecx
-; X86-NEXT: adcl $0, %ebp
+; X86-NEXT: adcl $0, %esi
; X86-NEXT: addl (%esp), %ecx ## 4-byte Folded Reload
-; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Folded Reload
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Folded Reload
; X86-NEXT: setb (%esp) ## 1-byte Folded Spill
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: movl %ebx, %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: mull %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT: movl %ebp, %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: mull %edi
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: mull %esi
-; X86-NEXT: movl %edx, %edi
-; X86-NEXT: movl %eax, %esi
-; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Folded Reload
-; X86-NEXT: adcl $0, %edi
-; X86-NEXT: movl %ebx, %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: mull %edx
+; X86-NEXT: mull %edi
; X86-NEXT: movl %edx, %ebx
-; X86-NEXT: addl %esi, %eax
-; X86-NEXT: movl %eax, %esi
-; X86-NEXT: adcl %edi, %ebx
-; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl %edi, %eax
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload
+; X86-NEXT: adcl $0, %ebx
+; X86-NEXT: movl %ebp, %eax
; X86-NEXT: mull {{[0-9]+}}(%esp)
-; X86-NEXT: addl %ebx, %eax
-; X86-NEXT: movl %eax, %ebx
-; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 1-byte Folded Reload
+; X86-NEXT: movl %edx, %ebp
+; X86-NEXT: addl %edi, %eax
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: adcl %ebx, %ebp
+; X86-NEXT: setb %bl
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NEXT: addl %ebp, %eax
+; X86-NEXT: movl %eax, %ebp
+; X86-NEXT: movzbl %bl, %eax
; X86-NEXT: adcl %eax, %edx
; X86-NEXT: addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
-; X86-NEXT: adcl %ebp, %esi
-; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: adcl %esi, %edi
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
; X86-NEXT: movzbl (%esp), %eax ## 1-byte Folded Reload
-; X86-NEXT: adcl %eax, %ebx
-; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: adcl %eax, %ebp
+; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
; X86-NEXT: adcl $0, %edx
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: movl %edi, %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: sarl $31, %esi
; X86-NEXT: movl %esi, %edi
; X86-NEXT: imull {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: mull %ebx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT: mull %ebp
; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: addl %edi, %edx
+; X86-NEXT: imull %esi, %ebp
+; X86-NEXT: addl %edi, %ebp
+; X86-NEXT: addl %edx, %ebp
+; X86-NEXT: movl %esi, %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: imull %eax, %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: movl %ebx, %eax
; X86-NEXT: imull %esi, %ebx
; X86-NEXT: addl %edx, %ebx
-; X86-NEXT: movl %ebx, (%esp) ## 4-byte Spill
-; X86-NEXT: movl %esi, %ebx
-; X86-NEXT: imull {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: movl %ebp, %eax
; X86-NEXT: mull %esi
; X86-NEXT: movl %edx, %edi
; X86-NEXT: addl %edx, %ebx
-; X86-NEXT: imull %esi, %ebp
-; X86-NEXT: addl %ebx, %ebp
-; X86-NEXT: movl %eax, %ebx
-; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
; X86-NEXT: addl %eax, %ecx
-; X86-NEXT: adcl (%esp), %ebp ## 4-byte Folded Reload
-; X86-NEXT: movl %ebp, (%esp) ## 4-byte Spill
+; X86-NEXT: movl %ecx, (%esp) ## 4-byte Spill
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: adcl %ebp, %ebx
+; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: mull %esi
; X86-NEXT: movl %eax, %esi
; X86-NEXT: addl %edi, %esi
; X86-NEXT: movl %edx, %ebp
; X86-NEXT: adcl $0, %ebp
-; X86-NEXT: addl %ebx, %esi
+; X86-NEXT: addl %ecx, %esi
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
; X86-NEXT: adcl %edi, %ebp
; X86-NEXT: setb %bl
; X86-NEXT: addl %eax, %ebp
; X86-NEXT: movzbl %bl, %eax
; X86-NEXT: adcl %edx, %eax
-; X86-NEXT: addl %ecx, %ebp
-; X86-NEXT: adcl (%esp), %eax ## 4-byte Folded Reload
+; X86-NEXT: addl (%esp), %ebp ## 4-byte Folded Reload
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Folded Reload
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: sarl $31, %ebx
-; X86-NEXT: movl %ebx, %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: sarl $31, %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl %esi, %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: mull %ecx
-; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: movl %edx, %esi
-; X86-NEXT: imull %ebx, %ecx
-; X86-NEXT: addl %edx, %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: imull %ebx, %edi
-; X86-NEXT: addl %ecx, %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: imull %ebx, %ecx
-; X86-NEXT: mull %ebx
; X86-NEXT: movl %eax, (%esp) ## 4-byte Spill
-; X86-NEXT: addl %ecx, %edx
+; X86-NEXT: movl %edx, %edi
+; X86-NEXT: imull %esi, %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: imull %esi, %ebx
+; X86-NEXT: addl %ecx, %ebx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: imull %ebx, %eax
-; X86-NEXT: addl %edx, %eax
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload
-; X86-NEXT: addl %ecx, (%esp) ## 4-byte Folded Spill
-; X86-NEXT: adcl %edi, %eax
-; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: movl %ecx, %edi
-; X86-NEXT: addl %esi, %edi
-; X86-NEXT: adcl $0, %esi
-; X86-NEXT: movl %ebx, %eax
+; X86-NEXT: movl %eax, %edx
+; X86-NEXT: imull %esi, %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: imull %esi, %ecx
+; X86-NEXT: addl %edx, %ecx
+; X86-NEXT: mull %esi
+; X86-NEXT: movl %eax, %esi
+; X86-NEXT: addl %edx, %ecx
+; X86-NEXT: addl %edi, %ebx
+; X86-NEXT: movl (%esp), %eax ## 4-byte Reload
+; X86-NEXT: addl %eax, %esi
+; X86-NEXT: adcl %ebx, %ecx
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: addl %edi, %ebx
+; X86-NEXT: adcl $0, %edi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload
; X86-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NEXT: addl %eax, %ebx
+; X86-NEXT: adcl %edx, %edi
+; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill
; X86-NEXT: addl %eax, %edi
-; X86-NEXT: adcl %edx, %esi
-; X86-NEXT: setb %bl
-; X86-NEXT: addl %eax, %esi
-; X86-NEXT: movzbl %bl, %eax
+; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 1-byte Folded Reload
; X86-NEXT: adcl %edx, %eax
-; X86-NEXT: addl (%esp), %esi ## 4-byte Folded Reload
-; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Folded Reload
-; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Folded Reload
-; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload
-; X86-NEXT: adcl %ebp, %esi
+; X86-NEXT: addl %esi, %edi
+; X86-NEXT: adcl %ecx, %eax
+; X86-NEXT: movl (%esp), %esi ## 4-byte Reload
+; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Folded Reload
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Folded Reload
+; X86-NEXT: adcl %ebp, %edi
; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Folded Reload
-; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Folded Reload
+; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Folded Reload
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Folded Reload
; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload
-; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Folded Reload
; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Folded Reload
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Reload
-; X86-NEXT: movl %ebx, %edx
-; X86-NEXT: sarl $31, %edx
-; X86-NEXT: xorl %edx, %eax
-; X86-NEXT: xorl %edx, %edi
-; X86-NEXT: orl %eax, %edi
-; X86-NEXT: xorl %edx, %esi
-; X86-NEXT: xorl %ecx, %edx
-; X86-NEXT: orl %esi, %edx
-; X86-NEXT: orl %edi, %edx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Reload
+; X86-NEXT: movl %edx, %ecx
+; X86-NEXT: sarl $31, %ecx
+; X86-NEXT: xorl %ecx, %eax
+; X86-NEXT: xorl %ecx, %ebx
+; X86-NEXT: orl %eax, %ebx
+; X86-NEXT: xorl %ecx, %edi
+; X86-NEXT: xorl %esi, %ecx
+; X86-NEXT: orl %edi, %ecx
+; X86-NEXT: orl %ebx, %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl %ebx, 12(%eax)
+; X86-NEXT: movl %edx, 12(%eax)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload
; X86-NEXT: movl %ecx, (%eax)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload
@@ -326,7 +324,7 @@ define zeroext i1 @smuloi128(i128 %v1, i128 %v2, ptr %res) {
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload
; X86-NEXT: movl %ecx, 8(%eax)
; X86-NEXT: setne %al
-; X86-NEXT: addl $56, %esp
+; X86-NEXT: addl $52, %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
@@ -360,8 +358,9 @@ define zeroext i1 @smuloi256(i256 %v1, i256 %v2, ptr %res) {
; X64-NEXT: .cfi_offset %r14, -32
; X64-NEXT: .cfi_offset %r15, -24
; X64-NEXT: .cfi_offset %rbp, -16
+; X64-NEXT: movq %r8, %rbx
; X64-NEXT: movq %rcx, %r11
-; X64-NEXT: movq %rdx, %rbx
+; X64-NEXT: movq %rdx, %r10
; X64-NEXT: movq %rsi, %r15
; X64-NEXT: movq %rdx, %rax
; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
@@ -372,31 +371,29 @@ define zeroext i1 @smuloi256(i256 %v1, i256 %v2, ptr %res) {
; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
; X64-NEXT: mulq %r8
; X64-NEXT: movq %rdx, %rcx
-; X64-NEXT: movq %rax, %r10
-; X64-NEXT: addq %rsi, %r10
+; X64-NEXT: movq %rax, %r8
+; X64-NEXT: addq %rsi, %r8
; X64-NEXT: adcq $0, %rcx
-; X64-NEXT: movq %rbx, %rax
+; X64-NEXT: movq %r10, %rax
; X64-NEXT: mulq %r9
; X64-NEXT: movq %rdx, %r12
-; X64-NEXT: movq %rax, %r14
-; X64-NEXT: addq %r10, %r14
+; X64-NEXT: movq %rax, %r10
+; X64-NEXT: addq %r8, %r10
; X64-NEXT: adcq %rcx, %r12
; X64-NEXT: setb %al
; X64-NEXT: movzbl %al, %ecx
; X64-NEXT: movq %r11, %rax
; X64-NEXT: mulq %r9
-; X64-NEXT: movq %rdx, %r11
-; X64-NEXT: movq %rax, %rbx
-; X64-NEXT: addq %r12, %rbx
-; X64-NEXT: adcq %rcx, %r11
+; X64-NEXT: movq %rax, %r14
+; X64-NEXT: addq %r12, %r14
+; X64-NEXT: adcq %rcx, %rdx
+; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
; X64-NEXT: movq %rdi, %rax
-; X64-NEXT: movq %r8, %rcx
-; X64-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
-; X64-NEXT: mulq %r8
+; X64-NEXT: mulq %rbx
; X64-NEXT: movq %rdx, %r8
; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
; X64-NEXT: movq %r15, %rax
-; X64-NEXT: mulq %rcx
+; X64-NEXT: mulq %rbx
; X64-NEXT: movq %rdx, %r12
; X64-NEXT: movq %rax, %r13
; X64-NEXT: addq %r8, %r13
@@ -405,69 +402,69 @@ define zeroext i1 @smuloi256(i256 %v1, i256 %v2, ptr %res) {
; X64-NEXT: movq %r9, %rsi
; X64-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
; X64-NEXT: mulq %r9
-; X64-NEXT: movq %rdx, %r10
+; X64-NEXT: movq %rdx, %r11
; X64-NEXT: addq %r13, %rax
; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
-; X64-NEXT: adcq %r12, %r10
+; X64-NEXT: adcq %r12, %r11
; X64-NEXT: setb %cl
; X64-NEXT: movq %r15, %r9
; X64-NEXT: movq %r15, %rax
; X64-NEXT: mulq %rsi
; X64-NEXT: movq %rdx, %rbp
; X64-NEXT: movq %rax, %r8
-; X64-NEXT: addq %r10, %r8
+; X64-NEXT: addq %r11, %r8
; X64-NEXT: movzbl %cl, %eax
; X64-NEXT: adcq %rax, %rbp
; X64-NEXT: movq {{[0-9]+}}(%rsp), %r15
; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r8 ## 8-byte Folded Reload
-; X64-NEXT: adcq %r14, %rbp
-; X64-NEXT: adcq $0, %rbx
-; X64-NEXT: adcq $0, %r11
-; X64-NEXT: movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; X64-NEXT: adcq %r10, %rbp
+; X64-NEXT: adcq $0, %r14
+; X64-NEXT: adcq $0, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Folded Spill
+; X64-NEXT: movq %rdi, %rcx
; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
; X64-NEXT: movq %rdi, %rax
; X64-NEXT: mulq %r15
-; X64-NEXT: movq %rdx, %r10
-; X64-NEXT: movq %rax, %r14
+; X64-NEXT: movq %rdx, %r11
+; X64-NEXT: movq %rax, %rdi
; X64-NEXT: movq %r9, %rax
; X64-NEXT: movq %r9, %rsi
; X64-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
; X64-NEXT: mulq %r15
; X64-NEXT: movq %rdx, %r13
; X64-NEXT: movq %rax, %r9
-; X64-NEXT: addq %r10, %r9
+; X64-NEXT: addq %r11, %r9
; X64-NEXT: adcq $0, %r13
; X64-NEXT: movq {{[0-9]+}}(%rsp), %r12
-; X64-NEXT: movq %rdi, %rax
+; X64-NEXT: movq %rcx, %rax
; X64-NEXT: mulq %r12
-; X64-NEXT: movq %rdx, %r11
+; X64-NEXT: movq %rdx, %r10
; X64-NEXT: addq %r9, %rax
-; X64-NEXT: movq %rax, %rdi
-; X64-NEXT: adcq %r13, %r11
+; X64-NEXT: movq %rax, %r9
+; X64-NEXT: adcq %r13, %r10
; X64-NEXT: setb %cl
; X64-NEXT: movq %rsi, %rax
; X64-NEXT: mulq %r12
-; X64-NEXT: movq %rdx, %r10
+; X64-NEXT: movq %rdx, %r11
; X64-NEXT: movq %rax, %r13
-; X64-NEXT: addq %r11, %r13
+; X64-NEXT: addq %r10, %r13
; X64-NEXT: movzbl %cl, %eax
-; X64-NEXT: adcq %rax, %r10
-; X64-NEXT: addq %r8, %r14
-; X64-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
-; X64-NEXT: adcq %rbp, %rdi
+; X64-NEXT: adcq %rax, %r11
+; X64-NEXT: addq %r8, %rdi
; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; X64-NEXT: adcq %rbp, %r9
+; X64-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
; X64-NEXT: adcq $0, %r13
-; X64-NEXT: adcq $0, %r10
-; X64-NEXT: addq %rbx, %r13
-; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r10 ## 8-byte Folded Reload
-; X64-NEXT: setb %cl
+; X64-NEXT: adcq $0, %r11
+; X64-NEXT: addq %r14, %r13
+; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r11 ## 8-byte Folded Reload
+; X64-NEXT: setb {{[-0-9]+}}(%r{{[sb]}}p) ## 1-byte Folded Spill
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 ## 8-byte Reload
; X64-NEXT: movq %r9, %rax
; X64-NEXT: mulq %r15
; X64-NEXT: movq %rdx, %rsi
-; X64-NEXT: movq %rax, %r11
-; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx ## 8-byte Reload
-; X64-NEXT: movq %rbx, %rax
+; X64-NEXT: movq %rax, %rcx
+; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 ## 8-byte Reload
+; X64-NEXT: movq %r10, %rax
; X64-NEXT: mulq %r15
; X64-NEXT: movq %rdx, %rdi
; X64-NEXT: movq %rax, %r8
@@ -476,118 +473,117 @@ define zeroext i1 @smuloi256(i256 %v1, i256 %v2, ptr %res) {
; X64-NEXT: movq %r9, %rax
; X64-NEXT: mulq %r12
; X64-NEXT: movq %rdx, %r9
-; X64-NEXT: addq %r8, %rax
; X64-NEXT: movq %rax, %rsi
+; X64-NEXT: addq %r8, %rsi
; X64-NEXT: adcq %rdi, %r9
; X64-NEXT: setb %r8b
-; X64-NEXT: movq %rbx, %rax
+; X64-NEXT: movq %r10, %rax
; X64-NEXT: mulq %r12
; X64-NEXT: movq %rdx, %rbp
; X64-NEXT: movq %rax, %r14
; X64-NEXT: addq %r9, %r14
; X64-NEXT: movzbl %r8b, %eax
; X64-NEXT: adcq %rax, %rbp
-; X64-NEXT: addq %r13, %r11
-; X64-NEXT: movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
-; X64-NEXT: adcq %r10, %rsi
+; X64-NEXT: addq %r13, %rcx
+; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; X64-NEXT: adcq %r11, %rsi
; X64-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
-; X64-NEXT: movzbl %cl, %eax
+; X64-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax ## 1-byte Folded Reload
; X64-NEXT: adcq %rax, %r14
; X64-NEXT: adcq $0, %rbp
-; X64-NEXT: movq %rbx, %r13
-; X64-NEXT: movq %rbx, %r10
+; X64-NEXT: movq %r10, %r13
; X64-NEXT: sarq $63, %r13
; X64-NEXT: movq %r13, %rcx
; X64-NEXT: imulq %r12, %rcx
; X64-NEXT: movq %r13, %rax
; X64-NEXT: mulq %r15
; X64-NEXT: movq %rax, %r8
-; X64-NEXT: addq %rcx, %rdx
; X64-NEXT: imulq %r13, %r15
+; X64-NEXT: addq %rcx, %r15
; X64-NEXT: addq %rdx, %r15
; X64-NEXT: movq %r13, %rcx
-; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi ## 8-byte Reload
-; X64-NEXT: imulq %rdi, %rcx
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi ## 8-byte Reload
-; X64-NEXT: movq %rsi, %rax
+; X64-NEXT: imulq %rsi, %rcx
+; X64-NEXT: movq %rbx, %rax
+; X64-NEXT: imulq %r13, %rbx
+; X64-NEXT: addq %rcx, %rbx
; X64-NEXT: mulq %r13
+; X64-NEXT: movq %rax, %r11
; X64-NEXT: movq %rdx, %r9
-; X64-NEXT: addq %rdx, %rcx
-; X64-NEXT: imulq %r13, %rsi
-; X64-NEXT: addq %rcx, %rsi
-; X64-NEXT: movq %rax, %rcx
-; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; X64-NEXT: addq %rdx, %rbx
; X64-NEXT: addq %rax, %r8
-; X64-NEXT: adcq %r15, %rsi
-; X64-NEXT: movq %rdi, %rax
+; X64-NEXT: adcq %r15, %rbx
+; X64-NEXT: movq %rsi, %rax
; X64-NEXT: mulq %r13
; X64-NEXT: movq %rax, %r15
; X64-NEXT: addq %r9, %r15
; X64-NEXT: movq %rdx, %r13
; X64-NEXT: adcq $0, %r13
-; X64-NEXT: addq %rcx, %r15
+; X64-NEXT: addq %r11, %r15
; X64-NEXT: adcq %r9, %r13
; X64-NEXT: setb %cl
; X64-NEXT: addq %rax, %r13
; X64-NEXT: movzbl %cl, %r9d
; X64-NEXT: adcq %rdx, %r9
; X64-NEXT: addq %r8, %r13
-; X64-NEXT: adcq %rsi, %r9
+; X64-NEXT: adcq %rbx, %r9
; X64-NEXT: sarq $63, %r12
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax ## 8-byte Reload
-; X64-NEXT: movq %rax, %r8
-; X64-NEXT: imulq %r12, %r8
-; X64-NEXT: mulq %r12
-; X64-NEXT: movq %rax, %rsi
-; X64-NEXT: movq %rdx, %r11
-; X64-NEXT: addq %rdx, %r8
+; X64-NEXT: movq %rax, %rcx
+; X64-NEXT: imulq %r12, %rcx
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi ## 8-byte Reload
; X64-NEXT: movq %rdi, %rbx
; X64-NEXT: imulq %r12, %rbx
-; X64-NEXT: addq %r8, %rbx
-; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax ## 8-byte Reload
-; X64-NEXT: movq %rax, %rcx
-; X64-NEXT: imulq %r12, %rcx
-; X64-NEXT: mulq %r12
-; X64-NEXT: movq %rax, %r8
-; X64-NEXT: addq %rcx, %rdx
+; X64-NEXT: addq %rcx, %rbx
+; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 ## 8-byte Reload
+; X64-NEXT: movq %r8, %rdx
+; X64-NEXT: imulq %r12, %rdx
; X64-NEXT: imulq %r12, %r10
; X64-NEXT: addq %rdx, %r10
+; X64-NEXT: movq %r10, %rcx
+; X64-NEXT: mulq %r12
+; X64-NEXT: movq %rdx, %r10
+; X64-NEXT: movq %rax, %rsi
+; X64-NEXT: movq %r8, %rax
+; X64-NEXT: mulq %r12
+; X64-NEXT: movq %rax, %r8
+; X64-NEXT: addq %rdx, %rcx
+; X64-NEXT: addq %r10, %rbx
; X64-NEXT: addq %rsi, %r8
-; X64-NEXT: adcq %rbx, %r10
+; X64-NEXT: adcq %rbx, %rcx
; X64-NEXT: movq %rsi, %rbx
-; X64-NEXT: addq %r11, %rbx
-; X64-NEXT: adcq $0, %r11
+; X64-NEXT: addq %r10, %rbx
+; X64-NEXT: adcq $0, %r10
; X64-NEXT: movq %r12, %rax
; X64-NEXT: mulq %rdi
; X64-NEXT: addq %rax, %rbx
-; X64-NEXT: adcq %rdx, %r11
-; X64-NEXT: setb %cl
-; X64-NEXT: addq %rax, %r11
-; X64-NEXT: movzbl %cl, %eax
+; X64-NEXT: adcq %rdx, %r10
+; X64-NEXT: setb %r12b
+; X64-NEXT: addq %rax, %r10
+; X64-NEXT: movzbl %r12b, %eax
; X64-NEXT: adcq %rdx, %rax
-; X64-NEXT: addq %r8, %r11
-; X64-NEXT: adcq %r10, %rax
-; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rsi ## 8-byte Folded Reload
+; X64-NEXT: addq %r8, %r10
+; X64-NEXT: adcq %rcx, %rax
+; X64-NEXT: addq %r11, %rsi
; X64-NEXT: adcq %r15, %rbx
-; X64-NEXT: adcq %r13, %r11
+; X64-NEXT: adcq %r13, %r10
; X64-NEXT: adcq %r9, %rax
; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rsi ## 8-byte Folded Reload
; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rbx ## 8-byte Folded Reload
-; X64-NEXT: adcq %r14, %r11
+; X64-NEXT: adcq %r14, %r10
; X64-NEXT: adcq %rbp, %rax
-; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx ## 8-byte Reload
-; X64-NEXT: movq %rdx, %rcx
-; X64-NEXT: sarq $63, %rcx
-; X64-NEXT: xorq %rcx, %rax
-; X64-NEXT: xorq %rcx, %rbx
+; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx ## 8-byte Reload
+; X64-NEXT: movq %rcx, %rdx
+; X64-NEXT: sarq $63, %rdx
+; X64-NEXT: xorq %rdx, %rax
+; X64-NEXT: xorq %rdx, %rbx
; X64-NEXT: orq %rax, %rbx
-; X64-NEXT: xorq %rcx, %r11
-; X64-NEXT: xorq %rsi, %rcx
-; X64-NEXT: orq %r11, %rcx
-; X64-NEXT: orq %rbx, %rcx
+; X64-NEXT: xorq %rdx, %r10
+; X64-NEXT: xorq %rsi, %rdx
+; X64-NEXT: orq %r10, %rdx
+; X64-NEXT: orq %rbx, %rdx
; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax
-; X64-NEXT: movq %rdx, 24(%rax)
+; X64-NEXT: movq %rcx, 24(%rax)
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx ## 8-byte Reload
; X64-NEXT: movq %rcx, (%rax)
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx ## 8-byte Reload
@@ -613,42 +609,43 @@ define zeroext i1 @smuloi256(i256 %v1, i256 %v2, ptr %res) {
; X86-NEXT: .cfi_def_cfa_offset 16
; X86-NEXT: pushl %esi
; X86-NEXT: .cfi_def_cfa_offset 20
-; X86-NEXT: subl $156, %esp
-; X86-NEXT: .cfi_def_cfa_offset 176
+; X86-NEXT: subl $152, %esp
+; X86-NEXT: .cfi_def_cfa_offset 172
; X86-NEXT: .cfi_offset %esi, -20
; X86-NEXT: .cfi_offset %edi, -16
; X86-NEXT: .cfi_offset %ebx, -12
; X86-NEXT: .cfi_offset %ebp, -8
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl %ebx, %eax
+; X86-NEXT: movl %ebp, %eax
; X86-NEXT: mull %edi
; X86-NEXT: movl %edx, %ecx
-; X86-NEXT: movl %eax, (%esp) ## 4-byte Spill
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
; X86-NEXT: movl %esi, %eax
; X86-NEXT: mull %edi
-; X86-NEXT: movl %edx, %esi
-; X86-NEXT: movl %eax, %edi
-; X86-NEXT: addl %ecx, %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: adcl $0, %esi
-; X86-NEXT: movl %ebx, %eax
-; X86-NEXT: mull %ebp
+; X86-NEXT: movl %edx, %edi
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: addl %ecx, %ebx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: adcl $0, %edi
+; X86-NEXT: movl %ebp, %eax
+; X86-NEXT: mull %ecx
+; X86-NEXT: movl %ecx, %ebp
; X86-NEXT: movl %edx, %ecx
-; X86-NEXT: addl %edi, %eax
+; X86-NEXT: addl %ebx, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: adcl %esi, %ecx
+; X86-NEXT: adcl %edi, %ecx
; X86-NEXT: setb %bl
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %esi, %eax
; X86-NEXT: mull %ebp
-; X86-NEXT: movl %edx, %esi
; X86-NEXT: addl %ecx, %eax
-; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl %eax, (%esp) ## 4-byte Spill
; X86-NEXT: movzbl %bl, %eax
-; X86-NEXT: adcl %eax, %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: movl %ebx, %eax
+; X86-NEXT: adcl %eax, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl %esi, %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: mull %edi
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
@@ -659,354 +656,353 @@ define zeroext i1 @smuloi256(i256 %v1, i256 %v2, ptr %res) {
; X86-NEXT: movl %eax, %ebp
; X86-NEXT: addl %ecx, %ebp
; X86-NEXT: adcl $0, %edi
-; X86-NEXT: movl %ebx, %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: mull %ecx
-; X86-NEXT: movl %edx, %ebx
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: mull %ebx
+; X86-NEXT: movl %edx, %esi
; X86-NEXT: addl %ebp, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: adcl %edi, %ebx
-; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill
+; X86-NEXT: adcl %edi, %esi
+; X86-NEXT: setb %cl
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
; X86-NEXT: movl %ebp, %eax
-; X86-NEXT: mull %ecx
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: addl %ebx, %ecx
-; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 1-byte Folded Reload
+; X86-NEXT: mull %ebx
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: addl %esi, %edi
+; X86-NEXT: movzbl %cl, %eax
; X86-NEXT: adcl %eax, %edx
-; X86-NEXT: addl (%esp), %ecx ## 4-byte Folded Reload
+; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload
; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Folded Reload
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: adcl $0, (%esp) ## 4-byte Folded Spill
; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
-; X86-NEXT: adcl $0, %esi
-; X86-NEXT: movl %esi, (%esp) ## 4-byte Spill
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl %edi, %eax
-; X86-NEXT: mull %esi
-; X86-NEXT: movl %edx, %ebx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: mull %ebx
+; X86-NEXT: movl %edx, %esi
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
; X86-NEXT: movl %ebp, %eax
-; X86-NEXT: mull %esi
+; X86-NEXT: mull %ebx
; X86-NEXT: movl %edx, %ebp
-; X86-NEXT: movl %eax, %esi
-; X86-NEXT: addl %ebx, %esi
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: addl %esi, %ebx
; X86-NEXT: adcl $0, %ebp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: movl %edi, %eax
-; X86-NEXT: mull %ebx
-; X86-NEXT: movl %edx, %edi
-; X86-NEXT: addl %esi, %eax
-; X86-NEXT: movl %eax, %esi
-; X86-NEXT: adcl %ebp, %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: mull %esi
+; X86-NEXT: movl %edx, %ecx
+; X86-NEXT: addl %ebx, %eax
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: adcl %ebp, %ecx
; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: mull %ebx
-; X86-NEXT: movl %edx, %ebx
-; X86-NEXT: movl %eax, %ebp
-; X86-NEXT: addl %edi, %ebp
+; X86-NEXT: mull %esi
+; X86-NEXT: movl %edx, %ebp
+; X86-NEXT: movl %eax, %esi
+; X86-NEXT: addl %ecx, %esi
; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 1-byte Folded Reload
-; X86-NEXT: adcl %eax, %ebx
-; X86-NEXT: addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
-; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Folded Reload
-; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: adcl %eax, %ebp
+; X86-NEXT: addl %edi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Folded Reload
+; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: adcl $0, %esi
; X86-NEXT: adcl $0, %ebp
-; X86-NEXT: adcl $0, %ebx
-; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Folded Reload
-; X86-NEXT: adcl (%esp), %ebx ## 4-byte Folded Reload
+; X86-NEXT: addl (%esp), %esi ## 4-byte Folded Reload
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Folded Reload
; X86-NEXT: setb (%esp) ## 1-byte Folded Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: mull %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: mull %edi
; X86-NEXT: movl %edx, %ecx
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: mull %esi
-; X86-NEXT: movl %edx, %esi
-; X86-NEXT: movl %eax, %edi
-; X86-NEXT: addl %ecx, %edi
-; X86-NEXT: adcl $0, %esi
+; X86-NEXT: mull %edi
+; X86-NEXT: movl %edx, %edi
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: addl %ecx, %ebx
+; X86-NEXT: adcl $0, %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: mull %ecx
; X86-NEXT: movl %edx, %ecx
-; X86-NEXT: addl %edi, %eax
-; X86-NEXT: movl %eax, %edi
-; X86-NEXT: adcl %esi, %ecx
+; X86-NEXT: addl %ebx, %eax
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: adcl %edi, %ecx
; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: mull {{[0-9]+}}(%esp)
; X86-NEXT: addl %ecx, %eax
; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 1-byte Folded Reload
; X86-NEXT: adcl %ecx, %edx
-; X86-NEXT: addl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
-; X86-NEXT: adcl %ebx, %edi
-; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: addl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
+; X86-NEXT: adcl %ebp, %ebx
+; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
; X86-NEXT: movzbl (%esp), %ecx ## 1-byte Folded Reload
; X86-NEXT: adcl %ecx, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
; X86-NEXT: adcl $0, %edx
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: movl %ebx, %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT: movl %ebp, %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: mull %esi
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
; X86-NEXT: movl %edx, %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: mull %esi
-; X86-NEXT: movl %edx, %esi
-; X86-NEXT: movl %eax, %edi
-; X86-NEXT: addl %ecx, %edi
-; X86-NEXT: adcl $0, %esi
-; X86-NEXT: movl %ebx, %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: mull %ebp
-; X86-NEXT: movl %edx, %ecx
-; X86-NEXT: addl %edi, %eax
-; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: adcl %esi, %ecx
-; X86-NEXT: setb %bl
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: mull %ebp
-; X86-NEXT: movl %edx, %esi
-; X86-NEXT: addl %ecx, %eax
-; X86-NEXT: movl %eax, (%esp) ## 4-byte Spill
-; X86-NEXT: movzbl %bl, %eax
-; X86-NEXT: adcl %eax, %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: movl %ebp, %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: mull %edi
-; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: movl %edx, %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: mull %edi
; X86-NEXT: movl %edx, %edi
; X86-NEXT: movl %eax, %ebx
; X86-NEXT: addl %ecx, %ebx
; X86-NEXT: adcl $0, %edi
; X86-NEXT: movl %ebp, %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: mull %ecx
-; X86-NEXT: movl %edx, %ebp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: mull %esi
+; X86-NEXT: movl %edx, %ecx
; X86-NEXT: addl %ebx, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: adcl %edi, %ebp
+; X86-NEXT: adcl %edi, %ecx
; X86-NEXT: setb %bl
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: mull %ecx
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: addl %ebp, %ecx
+; X86-NEXT: mull %esi
+; X86-NEXT: addl %ecx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
; X86-NEXT: movzbl %bl, %eax
; X86-NEXT: adcl %eax, %edx
-; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Folded Reload
-; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Folded Reload
-; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: adcl $0, (%esp) ## 4-byte Folded Spill
-; X86-NEXT: adcl $0, %esi
-; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %edx, (%esp) ## 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: movl %ebx, %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: mull %esi
-; X86-NEXT: movl %edx, %ebx
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl %edx, %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: mull %esi
-; X86-NEXT: movl %edx, %ebp
-; X86-NEXT: movl %eax, %esi
-; X86-NEXT: addl %ebx, %esi
-; X86-NEXT: adcl $0, %ebp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %edx, %edi
+; X86-NEXT: movl %eax, %ebp
+; X86-NEXT: addl %ecx, %ebp
+; X86-NEXT: adcl $0, %edi
+; X86-NEXT: movl %ebx, %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-NEXT: mull %ebx
-; X86-NEXT: movl %edx, %edi
-; X86-NEXT: addl %esi, %eax
-; X86-NEXT: movl %eax, %esi
-; X86-NEXT: adcl %ebp, %edi
-; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %edx, %esi
+; X86-NEXT: addl %ebp, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: adcl %edi, %esi
+; X86-NEXT: setb %cl
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT: movl %ebp, %eax
; X86-NEXT: mull %ebx
-; X86-NEXT: movl %edx, %ebx
-; X86-NEXT: movl %eax, %ebp
-; X86-NEXT: addl %edi, %ebp
-; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 1-byte Folded Reload
-; X86-NEXT: adcl %eax, %ebx
-; X86-NEXT: addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
-; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Folded Reload
-; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: addl %esi, %edi
+; X86-NEXT: movzbl %cl, %eax
+; X86-NEXT: adcl %eax, %edx
+; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Folded Reload
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
+; X86-NEXT: adcl $0, (%esp) ## 4-byte Folded Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: mull %ecx
+; X86-NEXT: movl %edx, %esi
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl %ebp, %eax
+; X86-NEXT: mull %ecx
+; X86-NEXT: movl %edx, %ebp
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: addl %esi, %ebx
; X86-NEXT: adcl $0, %ebp
-; X86-NEXT: adcl $0, %ebx
-; X86-NEXT: addl (%esp), %ebp ## 4-byte Folded Reload
-; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Folded Reload
-; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: mull %esi
; X86-NEXT: movl %edx, %ecx
-; X86-NEXT: movl %eax, (%esp) ## 4-byte Spill
+; X86-NEXT: addl %ebx, %eax
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: adcl %ebp, %ecx
+; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: mull %esi
-; X86-NEXT: movl %edx, %esi
-; X86-NEXT: movl %eax, %edi
-; X86-NEXT: addl %ecx, %edi
+; X86-NEXT: movl %edx, %ebp
+; X86-NEXT: movl %eax, %esi
+; X86-NEXT: addl %ecx, %esi
+; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 1-byte Folded Reload
+; X86-NEXT: adcl %eax, %ebp
+; X86-NEXT: addl %edi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Folded Reload
+; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
; X86-NEXT: adcl $0, %esi
+; X86-NEXT: adcl $0, %ebp
+; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Folded Reload
+; X86-NEXT: adcl (%esp), %ebp ## 4-byte Folded Reload
+; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: mull %edi
+; X86-NEXT: movl %edx, %ecx
+; X86-NEXT: movl %eax, (%esp) ## 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: mull %edi
+; X86-NEXT: movl %edx, %edi
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: addl %ecx, %ebx
+; X86-NEXT: adcl $0, %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: mull %ecx
; X86-NEXT: movl %edx, %ecx
-; X86-NEXT: addl %edi, %eax
-; X86-NEXT: movl %eax, %edi
-; X86-NEXT: adcl %esi, %ecx
+; X86-NEXT: addl %ebx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: adcl %edi, %ecx
; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: mull {{[0-9]+}}(%esp)
-; X86-NEXT: addl %ecx, %eax
-; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 1-byte Folded Reload
-; X86-NEXT: adcl %ecx, %edx
+; X86-NEXT: movl %edx, %ebx
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: addl %ecx, %edi
+; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 1-byte Folded Reload
+; X86-NEXT: adcl %eax, %ebx
; X86-NEXT: movl (%esp), %ecx ## 4-byte Reload
-; X86-NEXT: addl %ebp, %ecx
-; X86-NEXT: movl %edi, %esi
-; X86-NEXT: adcl %ebx, %esi
-; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 1-byte Folded Reload
-; X86-NEXT: adcl %edi, %eax
-; X86-NEXT: adcl $0, %edx
+; X86-NEXT: addl %esi, %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Reload
+; X86-NEXT: adcl %ebp, %edx
+; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 1-byte Folded Reload
+; X86-NEXT: adcl %eax, %edi
+; X86-NEXT: adcl $0, %ebx
; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Folded Reload
; X86-NEXT: movl %ecx, (%esp) ## 4-byte Spill
-; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Folded Reload
-; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Folded Reload
-; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Folded Reload
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Folded Reload
+; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl %edi, %eax
-; X86-NEXT: mull %esi
-; X86-NEXT: movl %edx, %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: mull %edi
+; X86-NEXT: movl %edx, %esi
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: movl %ebp, %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: mull %edi
+; X86-NEXT: movl %edx, %ebx
+; X86-NEXT: movl %eax, %ebp
+; X86-NEXT: addl %esi, %ebp
+; X86-NEXT: adcl $0, %ebx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl %ecx, %eax
; X86-NEXT: mull %esi
+; X86-NEXT: movl %esi, %edi
; X86-NEXT: movl %edx, %esi
-; X86-NEXT: movl %eax, %ebx
-; X86-NEXT: addl %ecx, %ebx
-; X86-NEXT: adcl $0, %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl %edi, %eax
-; X86-NEXT: mull %ecx
-; X86-NEXT: movl %ecx, %edi
-; X86-NEXT: movl %edx, %ecx
-; X86-NEXT: addl %ebx, %eax
+; X86-NEXT: addl %ebp, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: adcl %esi, %ecx
-; X86-NEXT: setb %bl
-; X86-NEXT: movl %ebp, %eax
+; X86-NEXT: adcl %ebx, %esi
+; X86-NEXT: setb %cl
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: mull %edi
-; X86-NEXT: movl %edi, %ebp
-; X86-NEXT: addl %ecx, %eax
+; X86-NEXT: addl %esi, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: movzbl %bl, %eax
+; X86-NEXT: movzbl %cl, %eax
; X86-NEXT: adcl %eax, %edx
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl %edi, %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: mull %ecx
+; X86-NEXT: movl %edx, %esi
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-NEXT: movl %ebx, %eax
+; X86-NEXT: mull %ecx
+; X86-NEXT: movl %edx, %ebp
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: addl %esi, %ecx
+; X86-NEXT: adcl $0, %ebp
+; X86-NEXT: movl %edi, %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: mull %esi
-; X86-NEXT: movl %edx, %ecx
+; X86-NEXT: movl %edx, %edi
+; X86-NEXT: addl %ecx, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: mull %esi
-; X86-NEXT: movl %edx, %esi
-; X86-NEXT: movl %eax, %edi
-; X86-NEXT: addl %ecx, %edi
-; X86-NEXT: adcl $0, %esi
+; X86-NEXT: adcl %ebp, %edi
+; X86-NEXT: setb %cl
; X86-NEXT: movl %ebx, %eax
-; X86-NEXT: movl %ebp, %ecx
-; X86-NEXT: mull %ebp
-; X86-NEXT: movl %edx, %ebp
-; X86-NEXT: addl %edi, %eax
-; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: adcl %esi, %ebp
-; X86-NEXT: setb %bl
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: mull %ecx
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: addl %ebp, %ecx
-; X86-NEXT: movzbl %bl, %eax
+; X86-NEXT: mull %esi
+; X86-NEXT: movl %eax, %esi
+; X86-NEXT: addl %edi, %esi
+; X86-NEXT: movzbl %cl, %eax
; X86-NEXT: adcl %eax, %edx
-; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Folded Reload
+; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Folded Reload
; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Folded Reload
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl %esi, %eax
-; X86-NEXT: mull %ebx
-; X86-NEXT: movl %edx, %edi
-; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: mull %ebx
-; X86-NEXT: movl %edx, %ebp
-; X86-NEXT: movl %eax, %ebx
-; X86-NEXT: addl %edi, %ebx
-; X86-NEXT: adcl $0, %ebp
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl %esi, %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT: movl %ebp, %eax
; X86-NEXT: mull %edi
-; X86-NEXT: movl %edx, %esi
+; X86-NEXT: movl %edx, %ecx
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl %ebx, %eax
+; X86-NEXT: mull %edi
+; X86-NEXT: movl %edx, %edi
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: addl %ecx, %ebx
+; X86-NEXT: adcl $0, %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ebp, %eax
+; X86-NEXT: mull %ecx
+; X86-NEXT: movl %edx, %ebp
; X86-NEXT: addl %ebx, %eax
; X86-NEXT: movl %eax, %ebx
-; X86-NEXT: adcl %ebp, %esi
+; X86-NEXT: adcl %edi, %ebp
; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: mull %edi
-; X86-NEXT: movl %edx, %ebp
-; X86-NEXT: movl %eax, %edi
-; X86-NEXT: addl %esi, %edi
+; X86-NEXT: mull %ecx
+; X86-NEXT: movl %edx, %edi
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: addl %ebp, %ecx
; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 1-byte Folded Reload
-; X86-NEXT: adcl %eax, %ebp
-; X86-NEXT: addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
+; X86-NEXT: adcl %eax, %edi
+; X86-NEXT: addl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Folded Reload
; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: adcl $0, %ecx
; X86-NEXT: adcl $0, %edi
-; X86-NEXT: adcl $0, %ebp
-; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload
-; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Folded Reload
+; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Folded Reload
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload
; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: mull %esi
-; X86-NEXT: movl %edx, %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: mull %ebx
+; X86-NEXT: movl %edx, %esi
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: mull %esi
-; X86-NEXT: movl %edx, %esi
-; X86-NEXT: movl %eax, %ebx
-; X86-NEXT: addl %ecx, %ebx
-; X86-NEXT: adcl $0, %esi
+; X86-NEXT: mull %ebx
+; X86-NEXT: movl %edx, %ebx
+; X86-NEXT: movl %eax, %ebp
+; X86-NEXT: addl %esi, %ebp
+; X86-NEXT: adcl $0, %ebx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: mull %ecx
-; X86-NEXT: movl %edx, %ecx
-; X86-NEXT: addl %ebx, %eax
-; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: adcl %esi, %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: mull %edx
+; X86-NEXT: movl %edx, %esi
+; X86-NEXT: addl %ebp, %eax
+; X86-NEXT: movl %eax, %ebp
+; X86-NEXT: adcl %ebx, %esi
; X86-NEXT: setb %bl
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: mull {{[0-9]+}}(%esp)
-; X86-NEXT: addl %ecx, %eax
-; X86-NEXT: movzbl %bl, %ecx
-; X86-NEXT: adcl %ecx, %edx
+; X86-NEXT: addl %esi, %eax
+; X86-NEXT: movzbl %bl, %esi
+; X86-NEXT: adcl %esi, %edx
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Reload
-; X86-NEXT: addl %edi, %ebx
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Reload
-; X86-NEXT: adcl %ebp, %edi
+; X86-NEXT: addl %ecx, %ebx
+; X86-NEXT: adcl %edi, %ebp
; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 1-byte Folded Reload
; X86-NEXT: adcl %ecx, %eax
; X86-NEXT: adcl $0, %edx
@@ -1021,7 +1017,7 @@ define zeroext i1 @smuloi256(i256 %v1, i256 %v2, ptr %res) {
; X86-NEXT: adcl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
; X86-NEXT: movl %ebx, %edx
; X86-NEXT: adcl $0, %edx
-; X86-NEXT: movl %edi, %ecx
+; X86-NEXT: movl %ebp, %ecx
; X86-NEXT: adcl $0, %ecx
; X86-NEXT: adcl $0, %eax
; X86-NEXT: adcl $0, %esi
@@ -1063,103 +1059,103 @@ define zeroext i1 @smuloi256(i256 %v1, i256 %v2, ptr %res) {
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: mull %edi
-; X86-NEXT: movl %edx, %esi
-; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: movl %ebp, %eax
-; X86-NEXT: mull %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: mull %ebx
; X86-NEXT: movl %edx, %edi
-; X86-NEXT: movl %eax, %ebx
-; X86-NEXT: addl %esi, %ebx
-; X86-NEXT: adcl $0, %edi
-; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: mull %esi
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: mull %ebx
+; X86-NEXT: movl %edx, %ebx
+; X86-NEXT: movl %eax, %ebp
+; X86-NEXT: addl %edi, %ebp
+; X86-NEXT: adcl $0, %ebx
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: mull %edi
; X86-NEXT: movl %edx, %ecx
-; X86-NEXT: addl %ebx, %eax
+; X86-NEXT: addl %ebp, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: adcl %edi, %ecx
+; X86-NEXT: adcl %ebx, %ecx
; X86-NEXT: setb %bl
-; X86-NEXT: movl %ebp, %eax
-; X86-NEXT: mull %esi
-; X86-NEXT: movl %eax, %esi
-; X86-NEXT: addl %ecx, %esi
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: mull %edi
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: addl %ecx, %edi
; X86-NEXT: movzbl %bl, %eax
; X86-NEXT: adcl %eax, %edx
-; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Folded Reload
+; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload
; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Folded Reload
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
; X86-NEXT: adcl $0, (%esp) ## 4-byte Folded Spill
; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-NEXT: movl %ebx, %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: mull %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT: mull %ebp
; X86-NEXT: movl %edx, %ecx
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: movl %ebp, %eax
-; X86-NEXT: mull %edi
-; X86-NEXT: movl %edx, %edi
-; X86-NEXT: movl %eax, %ebp
-; X86-NEXT: addl %ecx, %ebp
-; X86-NEXT: adcl $0, %edi
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: mull %ebp
+; X86-NEXT: movl %edx, %ebp
+; X86-NEXT: movl %eax, %esi
+; X86-NEXT: addl %ecx, %esi
+; X86-NEXT: adcl $0, %ebp
; X86-NEXT: movl %ebx, %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: mull %ecx
; X86-NEXT: movl %edx, %ebx
-; X86-NEXT: addl %ebp, %eax
-; X86-NEXT: movl %eax, %ebp
-; X86-NEXT: adcl %edi, %ebx
+; X86-NEXT: addl %esi, %eax
+; X86-NEXT: movl %eax, %esi
+; X86-NEXT: adcl %ebp, %ebx
; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: mull %ecx
-; X86-NEXT: movl %edx, %edi
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: addl %ebx, %ecx
+; X86-NEXT: movl %edx, %ecx
+; X86-NEXT: movl %eax, %ebp
+; X86-NEXT: addl %ebx, %ebp
; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 1-byte Folded Reload
-; X86-NEXT: adcl %eax, %edi
-; X86-NEXT: addl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
-; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Folded Reload
-; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: adcl %eax, %ecx
+; X86-NEXT: addl %edi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Folded Reload
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: adcl $0, %ebp
; X86-NEXT: adcl $0, %ecx
-; X86-NEXT: adcl $0, %edi
-; X86-NEXT: addl (%esp), %ecx ## 4-byte Folded Reload
-; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload
+; X86-NEXT: addl (%esp), %ebp ## 4-byte Folded Reload
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Folded Reload
; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: mull %ebx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: mull %edi
; X86-NEXT: movl %edx, %esi
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: mull %ebx
-; X86-NEXT: movl %edx, %ebx
-; X86-NEXT: movl %eax, %ebp
-; X86-NEXT: addl %esi, %ebp
-; X86-NEXT: adcl $0, %ebx
+; X86-NEXT: mull %edi
+; X86-NEXT: movl %edx, %edi
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: addl %esi, %ebx
+; X86-NEXT: adcl $0, %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: mull %edx
; X86-NEXT: movl %edx, %esi
-; X86-NEXT: addl %ebp, %eax
-; X86-NEXT: movl %eax, %ebp
-; X86-NEXT: adcl %ebx, %esi
+; X86-NEXT: addl %ebx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: adcl %edi, %esi
; X86-NEXT: setb (%esp) ## 1-byte Folded Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: mull {{[0-9]+}}(%esp)
; X86-NEXT: movl %eax, %ebx
; X86-NEXT: addl %esi, %ebx
; X86-NEXT: movzbl (%esp), %eax ## 1-byte Folded Reload
-; X86-NEXT: movl %edx, %esi
-; X86-NEXT: adcl %eax, %esi
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Reload
-; X86-NEXT: addl %ecx, %edx
-; X86-NEXT: adcl %edi, %ebp
+; X86-NEXT: adcl %eax, %edx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Reload
+; X86-NEXT: addl %ebp, %edi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Reload
+; X86-NEXT: adcl %ecx, %esi
; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 1-byte Folded Reload
; X86-NEXT: adcl %eax, %ebx
-; X86-NEXT: adcl $0, %esi
+; X86-NEXT: adcl $0, %edx
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload
; X86-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload
@@ -1169,14 +1165,14 @@ define zeroext i1 @smuloi256(i256 %v1, i256 %v2, ptr %res) {
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload
; X86-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 1-byte Folded Reload
-; X86-NEXT: adcl %eax, %edx
-; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: adcl $0, %ebp
-; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: adcl $0, %ebx
-; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: adcl %eax, %edi
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
; X86-NEXT: adcl $0, %esi
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: adcl $0, %ebx
+; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: adcl $0, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: sarl $31, %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -1187,89 +1183,88 @@ define zeroext i1 @smuloi256(i256 %v1, i256 %v2, ptr %res) {
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: mull %edi
-; X86-NEXT: movl %eax, %ebp
-; X86-NEXT: addl %esi, %ebp
-; X86-NEXT: movl %edx, %ebx
-; X86-NEXT: adcl $0, %ebx
-; X86-NEXT: addl %ecx, %ebp
-; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: adcl %esi, %ebx
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: addl %esi, %ebx
+; X86-NEXT: movl %edx, %ebp
+; X86-NEXT: adcl $0, %ebp
+; X86-NEXT: addl %ecx, %ebx
+; X86-NEXT: movl %ebx, (%esp) ## 4-byte Spill
+; X86-NEXT: adcl %esi, %ebp
; X86-NEXT: setb %cl
-; X86-NEXT: addl %eax, %ebx
+; X86-NEXT: addl %eax, %ebp
; X86-NEXT: movzbl %cl, %eax
; X86-NEXT: adcl %edx, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: mull %edi
-; X86-NEXT: movl %edx, %ecx
-; X86-NEXT: movl %eax, (%esp) ## 4-byte Spill
+; X86-NEXT: movl %edx, %esi
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: mull %edi
-; X86-NEXT: movl %edx, %esi
+; X86-NEXT: movl %edx, %ebx
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: movl %eax, %ebp
+; X86-NEXT: movl %eax, %ecx
; X86-NEXT: movl %eax, %edx
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: addl %ecx, %edx
-; X86-NEXT: movl %esi, %ecx
-; X86-NEXT: adcl $0, %ecx
-; X86-NEXT: addl (%esp), %edx ## 4-byte Folded Reload
+; X86-NEXT: addl %esi, %edx
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl %ebx, %esi
+; X86-NEXT: adcl $0, %esi
+; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Folded Reload
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: adcl %eax, %ecx
-; X86-NEXT: setb %al
-; X86-NEXT: addl %ebp, %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: movzbl %al, %edx
-; X86-NEXT: adcl %esi, %edx
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload
-; X86-NEXT: addl %ecx, %eax
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload
+; X86-NEXT: adcl %eax, %esi
+; X86-NEXT: setb %dl
+; X86-NEXT: addl %ecx, %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movzbl %dl, %edx
+; X86-NEXT: adcl %ebx, %edx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload
+; X86-NEXT: addl %esi, %ecx
+; X86-NEXT: movl (%esp), %eax ## 4-byte Reload
; X86-NEXT: adcl %edx, %eax
-; X86-NEXT: movl %ebx, %ebp
-; X86-NEXT: adcl $0, %ebp
+; X86-NEXT: movl %ebp, %ebx
+; X86-NEXT: adcl $0, %ebx
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Reload
; X86-NEXT: adcl $0, %esi
-; X86-NEXT: addl (%esp), %ecx ## 4-byte Folded Reload
+; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Folded Reload
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Folded Reload
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload
; X86-NEXT: adcl $0, %ecx
; X86-NEXT: adcl $0, %edx
-; X86-NEXT: addl %ebp, %ecx
+; X86-NEXT: addl %ebx, %ecx
; X86-NEXT: adcl %esi, %edx
; X86-NEXT: setb %al
; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Folded Reload
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Folded Reload
+; X86-NEXT: adcl (%esp), %edx ## 4-byte Folded Reload
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
; X86-NEXT: movzbl %al, %eax
-; X86-NEXT: adcl %ebx, %eax
+; X86-NEXT: adcl %ebp, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
; X86-NEXT: movl %edi, %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: mull %ecx
-; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl %eax, (%esp) ## 4-byte Spill
; X86-NEXT: movl %edx, %ebp
; X86-NEXT: imull %edi, %ecx
-; X86-NEXT: addl %edx, %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: imull %edi, %esi
; X86-NEXT: addl %ecx, %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: movl %eax, %edx
+; X86-NEXT: imull %edi, %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: imull %edi, %ecx
+; X86-NEXT: addl %edx, %ecx
; X86-NEXT: mull %edi
; X86-NEXT: movl %eax, %ebx
-; X86-NEXT: addl %ecx, %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: imull %edi, %ecx
; X86-NEXT: addl %edx, %ecx
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload
+; X86-NEXT: addl %ebp, %esi
+; X86-NEXT: movl (%esp), %eax ## 4-byte Reload
; X86-NEXT: addl %eax, %ebx
; X86-NEXT: adcl %esi, %ecx
; X86-NEXT: movl %eax, %esi
@@ -1287,26 +1282,23 @@ define zeroext i1 @smuloi256(i256 %v1, i256 %v2, ptr %res) {
; X86-NEXT: addl %ebx, %ebp
; X86-NEXT: adcl %ecx, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: imull %edi, %ecx
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload
-; X86-NEXT: addl %eax, %ecx
-; X86-NEXT: movl %ecx, %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: imull %edi, %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: imull %edi, %ecx
; X86-NEXT: addl %edx, %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: imull %edi, %edx
-; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Folded Reload
; X86-NEXT: imull {{[0-9]+}}(%esp), %edi
; X86-NEXT: addl %edx, %edi
-; X86-NEXT: movl (%esp), %edx ## 4-byte Reload
+; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Reload
+; X86-NEXT: addl %edx, %ecx
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Reload
-; X86-NEXT: addl %edx, %esi
+; X86-NEXT: addl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
; X86-NEXT: adcl %ecx, %edi
-; X86-NEXT: movl %edx, %ecx
-; X86-NEXT: movl %eax, %edx
-; X86-NEXT: addl %eax, %ecx
+; X86-NEXT: movl %esi, %ecx
+; X86-NEXT: addl %edx, %ecx
; X86-NEXT: adcl $0, %edx
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Reload
; X86-NEXT: addl %ebx, %ecx
@@ -1317,35 +1309,36 @@ define zeroext i1 @smuloi256(i256 %v1, i256 %v2, ptr %res) {
; X86-NEXT: addl %ebx, %edx
; X86-NEXT: movzbl %cl, %ecx
; X86-NEXT: adcl %eax, %ecx
-; X86-NEXT: addl %esi, %edx
+; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Folded Reload
; X86-NEXT: adcl %edi, %ecx
; X86-NEXT: movl %ecx, %edi
+; X86-NEXT: movl (%esp), %ecx ## 4-byte Reload
+; X86-NEXT: addl %esi, %ecx
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Reload
-; X86-NEXT: addl (%esp), %esi ## 4-byte Folded Reload
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload
-; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Folded Reload
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Folded Reload
; X86-NEXT: adcl %ebp, %edx
; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload
-; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Folded Reload
+; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Folded Reload
+; X86-NEXT: movl %ecx, (%esp) ## 4-byte Spill
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Folded Reload
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Folded Reload
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Folded Reload
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload
; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: sarl $31, %eax
-; X86-NEXT: movl %eax, %ebp
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
; X86-NEXT: mull {{[0-9]+}}(%esp)
; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: movl %eax, %edi
+; X86-NEXT: movl %eax, %ebp
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
; X86-NEXT: movl %edx, %esi
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
; X86-NEXT: addl %edx, %ecx
; X86-NEXT: adcl $0, %esi
-; X86-NEXT: movl %ebp, %eax
+; X86-NEXT: movl %edi, %eax
; X86-NEXT: mull {{[0-9]+}}(%esp)
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
@@ -1354,189 +1347,187 @@ define zeroext i1 @smuloi256(i256 %v1, i256 %v2, ptr %res) {
; X86-NEXT: adcl %edx, %esi
; X86-NEXT: setb %bl
; X86-NEXT: addl %eax, %esi
-; X86-NEXT: movzbl %bl, %ebx
-; X86-NEXT: adcl %edx, %ebx
-; X86-NEXT: movl %edi, %eax
+; X86-NEXT: movzbl %bl, %edi
+; X86-NEXT: adcl %edx, %edi
+; X86-NEXT: movl %ebp, %eax
; X86-NEXT: addl %esi, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: adcl %ebx, %eax
+; X86-NEXT: adcl %edi, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
; X86-NEXT: adcl $0, %esi
-; X86-NEXT: adcl $0, %ebx
-; X86-NEXT: movl %ebp, %ecx
-; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: movl %ebp, %eax
+; X86-NEXT: adcl $0, %edi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload
+; X86-NEXT: movl %ecx, %eax
; X86-NEXT: mull {{[0-9]+}}(%esp)
-; X86-NEXT: movl %edx, %edi
+; X86-NEXT: movl %edx, %ebx
; X86-NEXT: movl %eax, %ebp
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
; X86-NEXT: addl %edx, %ebp
-; X86-NEXT: adcl $0, %edi
+; X86-NEXT: adcl $0, %ebx
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: mull {{[0-9]+}}(%esp)
; X86-NEXT: addl %eax, %ebp
-; X86-NEXT: adcl %edx, %edi
+; X86-NEXT: adcl %edx, %ebx
; X86-NEXT: setb %cl
-; X86-NEXT: addl %eax, %edi
+; X86-NEXT: addl %eax, %ebx
; X86-NEXT: movzbl %cl, %eax
; X86-NEXT: adcl %edx, %eax
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload
; X86-NEXT: addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
; X86-NEXT: adcl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
-; X86-NEXT: movl %edi, %edx
+; X86-NEXT: movl %ebx, %edx
; X86-NEXT: adcl $0, %edx
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload
; X86-NEXT: adcl $0, %eax
; X86-NEXT: addl %esi, %edx
-; X86-NEXT: adcl %ebx, %eax
+; X86-NEXT: adcl %edi, %eax
; X86-NEXT: movl %eax, %esi
; X86-NEXT: setb %al
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Reload
-; X86-NEXT: addl %ebx, %edx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Reload
+; X86-NEXT: addl %edi, %edx
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
; X86-NEXT: adcl %ebp, %esi
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
; X86-NEXT: movzbl %al, %eax
-; X86-NEXT: adcl %edi, %eax
+; X86-NEXT: adcl %ebx, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
; X86-NEXT: adcl $0, %ecx
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Reload
+; X86-NEXT: imull %esi, %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Reload
-; X86-NEXT: imull %ebp, %eax
-; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Folded Reload
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: imull %ebp, %eax
+; X86-NEXT: imull %esi, %eax
; X86-NEXT: addl %ecx, %eax
+; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Folded Reload
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: imull %esi, %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: imull %ebp, %ecx
+; X86-NEXT: imull %esi, %ecx
+; X86-NEXT: addl %edx, %ecx
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Reload
; X86-NEXT: addl %esi, %ecx
-; X86-NEXT: movl %ecx, %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: imull %ebp, %ecx
-; X86-NEXT: addl %edx, %ecx
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Reload
-; X86-NEXT: addl %edx, %ebx
-; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: addl %edx, %edi
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
; X86-NEXT: adcl %eax, %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Reload
-; X86-NEXT: movl %ebx, %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Reload
+; X86-NEXT: movl %edi, %eax
; X86-NEXT: addl %esi, %eax
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload
-; X86-NEXT: movl %ecx, %edi
-; X86-NEXT: adcl $0, %edi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Reload
+; X86-NEXT: movl %ebx, %ebp
+; X86-NEXT: adcl $0, %ebp
; X86-NEXT: addl %edx, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: adcl %esi, %edi
-; X86-NEXT: setb %dl
-; X86-NEXT: addl %ebx, %edi
-; X86-NEXT: movzbl %dl, %eax
+; X86-NEXT: adcl %esi, %ebp
+; X86-NEXT: setb %al
+; X86-NEXT: addl %edi, %ebp
+; X86-NEXT: movzbl %al, %eax
+; X86-NEXT: adcl %ebx, %eax
+; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Folded Reload
; X86-NEXT: adcl %ecx, %eax
-; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload
-; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Folded Reload
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: imull %ebp, %ecx
-; X86-NEXT: movl %ebp, %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Reload
+; X86-NEXT: imull %ebx, %ecx
+; X86-NEXT: movl %ebx, %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: mull %esi
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: addl %ecx, %edx
-; X86-NEXT: imull %ebp, %esi
+; X86-NEXT: imull %ebx, %esi
+; X86-NEXT: addl %ecx, %esi
; X86-NEXT: addl %edx, %esi
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: movl %ebp, %esi
-; X86-NEXT: imull {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl %ebx, %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: mull %ebp
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: movl %edx, %ebx
+; X86-NEXT: imull %eax, %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: imull %ebx, %esi
+; X86-NEXT: addl %ecx, %esi
+; X86-NEXT: mull %ebx
+; X86-NEXT: movl %ebx, %ecx
+; X86-NEXT: movl %edx, %edi
; X86-NEXT: addl %edx, %esi
+; X86-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Folded Reload
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: imull %ebp, %eax
-; X86-NEXT: addl %esi, %eax
-; X86-NEXT: addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
-; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Folded Reload
-; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: mull %ebp
-; X86-NEXT: movl %eax, %ebp
-; X86-NEXT: addl %ebx, %ebp
-; X86-NEXT: movl %edx, %esi
-; X86-NEXT: adcl $0, %esi
-; X86-NEXT: addl %ecx, %ebp
-; X86-NEXT: adcl %ebx, %esi
-; X86-NEXT: setb %bl
-; X86-NEXT: addl %eax, %esi
-; X86-NEXT: movzbl %bl, %eax
-; X86-NEXT: adcl %edx, %eax
-; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Folded Reload
-; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Folded Reload
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Reload
-; X86-NEXT: addl %edx, %ecx
-; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Folded Reload
-; X86-NEXT: adcl %edi, %esi
+; X86-NEXT: mull %ecx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: addl %edi, %eax
+; X86-NEXT: adcl $0, %edx
+; X86-NEXT: addl %ebx, %eax
+; X86-NEXT: adcl %edi, %edx
+; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill
+; X86-NEXT: addl %ecx, %edx
+; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 1-byte Folded Reload
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Folded Reload
+; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Folded Reload
+; X86-NEXT: adcl %esi, %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Reload
+; X86-NEXT: addl %esi, %ebx
; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Folded Reload
-; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Folded Reload
-; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Folded Reload
-; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Folded Reload
+; X86-NEXT: adcl %ebp, %edx
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Folded Reload
+; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Folded Reload
; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Folded Reload
-; X86-NEXT: addl (%esp), %edx ## 4-byte Folded Reload
-; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Reload
; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Folded Reload
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Reload
-; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload
-; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Reload
-; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload
; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Folded Reload
+; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Folded Reload
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Reload
; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Folded Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Reload
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Reload
; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Folded Reload
+; X86-NEXT: adcl (%esp), %ebx ## 4-byte Folded Reload
+; X86-NEXT: movl %ebx, (%esp) ## 4-byte Spill
; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Folded Reload
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Folded Reload
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Folded Reload
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Reload
; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Folded Reload
; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Folded Reload
-; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Reload
-; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Folded Reload
-; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload
-; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Folded Reload
; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Folded Reload
+; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload
; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Folded Reload
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl (%esp), %ebx ## 4-byte Reload
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Folded Reload
; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Folded Reload
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Reload
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Folded Reload
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Folded Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Reload
+; X86-NEXT: movl %ebp, %esi
+; X86-NEXT: sarl $31, %esi
+; X86-NEXT: xorl %esi, %edi
+; X86-NEXT: xorl %esi, %edx
+; X86-NEXT: orl %edi, %edx
; X86-NEXT: movl %ebx, %edi
-; X86-NEXT: sarl $31, %edi
-; X86-NEXT: xorl %edi, %edx
-; X86-NEXT: xorl %edi, %esi
-; X86-NEXT: orl %edx, %esi
-; X86-NEXT: xorl %edi, %ecx
-; X86-NEXT: orl %esi, %ecx
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Reload
-; X86-NEXT: xorl %edi, %edx
-; X86-NEXT: orl %ecx, %edx
-; X86-NEXT: movl %edx, %ecx
+; X86-NEXT: xorl %esi, %edi
+; X86-NEXT: orl %edx, %edi
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Reload
-; X86-NEXT: xorl %edi, %edx
-; X86-NEXT: xorl %edi, %eax
-; X86-NEXT: orl %edx, %eax
-; X86-NEXT: xorl %edi, %ebp
-; X86-NEXT: orl %eax, %ebp
-; X86-NEXT: xorl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload
-; X86-NEXT: orl %ebp, %edi
-; X86-NEXT: orl %ecx, %edi
+; X86-NEXT: xorl %esi, %edx
+; X86-NEXT: orl %edi, %edx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Reload
+; X86-NEXT: xorl %esi, %ebx
+; X86-NEXT: xorl %esi, %ecx
+; X86-NEXT: orl %ebx, %ecx
+; X86-NEXT: xorl %esi, %eax
+; X86-NEXT: orl %ecx, %eax
+; X86-NEXT: xorl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Folded Reload
+; X86-NEXT: orl %eax, %esi
+; X86-NEXT: orl %edx, %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl %ebx, 28(%eax)
+; X86-NEXT: movl %ebp, 28(%eax)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload
; X86-NEXT: movl %ecx, (%eax)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload
@@ -1552,7 +1543,7 @@ define zeroext i1 @smuloi256(i256 %v1, i256 %v2, ptr %res) {
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload
; X86-NEXT: movl %ecx, 24(%eax)
; X86-NEXT: setne %al
-; X86-NEXT: addl $156, %esp
+; X86-NEXT: addl $152, %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
diff --git a/llvm/test/CodeGen/X86/sse-regcall.ll b/llvm/test/CodeGen/X86/sse-regcall.ll
index 0226052402cb8..7a3e48572b8e2 100644
--- a/llvm/test/CodeGen/X86/sse-regcall.ll
+++ b/llvm/test/CodeGen/X86/sse-regcall.ll
@@ -196,7 +196,7 @@ define x86_regcallcc i32 @testi32_inp(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a
; WIN32: # %bb.0:
; WIN32-NEXT: pushl %ebp
; WIN32-NEXT: pushl %ebx
-; WIN32-NEXT: subl $12, %esp
+; WIN32-NEXT: subl $16, %esp
; WIN32-NEXT: movl %esi, (%esp) # 4-byte Spill
; WIN32-NEXT: movl %edi, %esi
; WIN32-NEXT: movl %edx, %ebx
@@ -207,36 +207,37 @@ define x86_regcallcc i32 @testi32_inp(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a
; WIN32-NEXT: subl %esi, %ebx
; WIN32-NEXT: movl %edi, %eax
; WIN32-NEXT: subl %ecx, %eax
-; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; WIN32-NEXT: movl %ebp, %ecx
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN32-NEXT: subl {{[0-9]+}}(%esp), %ecx
; WIN32-NEXT: imull %eax, %ecx
+; WIN32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi
-; WIN32-NEXT: movl %esi, %eax
-; WIN32-NEXT: subl {{[0-9]+}}(%esp), %eax
-; WIN32-NEXT: imull %ebx, %eax
-; WIN32-NEXT: addl %ecx, %eax
+; WIN32-NEXT: movl %esi, %edx
+; WIN32-NEXT: subl {{[0-9]+}}(%esp), %edx
+; WIN32-NEXT: imull %ebx, %edx
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebp
; WIN32-NEXT: movl (%esp), %ebx # 4-byte Reload
-; WIN32-NEXT: subl {{[0-9]+}}(%esp), %ebx
-; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx
-; WIN32-NEXT: movl %edx, %ecx
+; WIN32-NEXT: subl %ebp, %ebx
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; WIN32-NEXT: movl %eax, %ecx
; WIN32-NEXT: subl {{[0-9]+}}(%esp), %ecx
; WIN32-NEXT: imull %ebx, %ecx
-; WIN32-NEXT: addl %eax, %ecx
+; WIN32-NEXT: addl %edx, %ecx
; WIN32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; WIN32-NEXT: addl (%esp), %eax # 4-byte Folded Reload
-; WIN32-NEXT: addl {{[0-9]+}}(%esp), %ebp
-; WIN32-NEXT: imull %ebp, %edi
+; WIN32-NEXT: addl (%esp), %ebp # 4-byte Folded Reload
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx
+; WIN32-NEXT: addl {{[0-9]+}}(%esp), %edx
+; WIN32-NEXT: imull %edx, %edi
; WIN32-NEXT: addl {{[0-9]+}}(%esp), %esi
; WIN32-NEXT: imull {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; WIN32-NEXT: addl %esi, %edi
-; WIN32-NEXT: addl {{[0-9]+}}(%esp), %edx
-; WIN32-NEXT: imull %eax, %edx
-; WIN32-NEXT: addl %edx, %edi
+; WIN32-NEXT: addl {{[0-9]+}}(%esp), %eax
+; WIN32-NEXT: imull %ebp, %eax
+; WIN32-NEXT: addl %esi, %eax
+; WIN32-NEXT: addl %eax, %edi
; WIN32-NEXT: addl %ecx, %edi
+; WIN32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; WIN32-NEXT: movl %edi, %eax
-; WIN32-NEXT: addl $12, %esp
+; WIN32-NEXT: addl $16, %esp
; WIN32-NEXT: popl %ebx
; WIN32-NEXT: popl %ebp
; WIN32-NEXT: retl
@@ -270,18 +271,18 @@ define x86_regcallcc i32 @testi32_inp(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a
; WIN64-NEXT: # kill: def $r11d killed $r11d killed $r11
; WIN64-NEXT: subl %r12d, %r11d
; WIN64-NEXT: imull %edx, %r11d
-; WIN64-NEXT: addl %r9d, %r11d
; WIN64-NEXT: leal (%r14,%r15), %edx
-; WIN64-NEXT: movl %r14d, %r9d
-; WIN64-NEXT: subl %r15d, %r9d
-; WIN64-NEXT: imull %esi, %r9d
-; WIN64-NEXT: addl %r11d, %r9d
+; WIN64-NEXT: # kill: def $r14d killed $r14d killed $r14
+; WIN64-NEXT: subl %r15d, %r14d
+; WIN64-NEXT: imull %esi, %r14d
+; WIN64-NEXT: addl %r11d, %r14d
; WIN64-NEXT: addl %ecx, %eax
; WIN64-NEXT: imull %r8d, %eax
; WIN64-NEXT: imull %ebx, %r10d
-; WIN64-NEXT: addl %r10d, %eax
; WIN64-NEXT: imull %edi, %edx
+; WIN64-NEXT: addl %r10d, %edx
; WIN64-NEXT: addl %edx, %eax
+; WIN64-NEXT: addl %r14d, %eax
; WIN64-NEXT: addl %r9d, %eax
; WIN64-NEXT: popq %rbx
; WIN64-NEXT: retq
@@ -311,19 +312,19 @@ define x86_regcallcc i32 @testi32_inp(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a
; LINUXOSX-NEXT: leal (%r13,%r14), %r11d
; LINUXOSX-NEXT: movl %r13d, %r12d
; LINUXOSX-NEXT: subl %r14d, %r12d
+; LINUXOSX-NEXT: movl {{[0-9]+}}(%rsp), %r14d
; LINUXOSX-NEXT: imull %edx, %r12d
-; LINUXOSX-NEXT: movl {{[0-9]+}}(%rsp), %edx
-; LINUXOSX-NEXT: addl %r9d, %r12d
-; LINUXOSX-NEXT: movl %r15d, %r9d
-; LINUXOSX-NEXT: subl %edx, %r9d
-; LINUXOSX-NEXT: imull %esi, %r9d
-; LINUXOSX-NEXT: addl %r12d, %r9d
+; LINUXOSX-NEXT: movl %r15d, %edx
+; LINUXOSX-NEXT: subl %r14d, %edx
+; LINUXOSX-NEXT: imull %esi, %edx
+; LINUXOSX-NEXT: addl %r12d, %edx
; LINUXOSX-NEXT: addl %ecx, %eax
; LINUXOSX-NEXT: imull %r8d, %eax
; LINUXOSX-NEXT: imull %r10d, %r11d
-; LINUXOSX-NEXT: addl %r11d, %eax
-; LINUXOSX-NEXT: addl %r15d, %edx
-; LINUXOSX-NEXT: imull %edi, %edx
+; LINUXOSX-NEXT: addl %r15d, %r14d
+; LINUXOSX-NEXT: imull %edi, %r14d
+; LINUXOSX-NEXT: addl %r11d, %r14d
+; LINUXOSX-NEXT: addl %r14d, %eax
; LINUXOSX-NEXT: addl %edx, %eax
; LINUXOSX-NEXT: addl %r9d, %eax
; LINUXOSX-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/stack-clash-large.ll b/llvm/test/CodeGen/X86/stack-clash-large.ll
index b5b9ce95a46ba..833b13e796787 100644
--- a/llvm/test/CodeGen/X86/stack-clash-large.ll
+++ b/llvm/test/CodeGen/X86/stack-clash-large.ll
@@ -98,13 +98,13 @@ define void @push_before_probe(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i
; CHECK-X64-NEXT: .cfi_def_cfa_offset 71888
; CHECK-X64-NEXT: .cfi_offset %rax, -16
; CHECK-X64-NEXT: movl 71888(%rsp), %eax
-; CHECK-X64-NEXT: addl %esi, %edi
; CHECK-X64-NEXT: addl %ecx, %edx
-; CHECK-X64-NEXT: addl %edi, %edx
-; CHECK-X64-NEXT: addl %r9d, %r8d
; CHECK-X64-NEXT: addl 71896(%rsp), %eax
+; CHECK-X64-NEXT: addl %esi, %edx
+; CHECK-X64-NEXT: addl %r9d, %eax
; CHECK-X64-NEXT: addl %r8d, %eax
; CHECK-X64-NEXT: addl %edx, %eax
+; CHECK-X64-NEXT: addl %edi, %eax
; CHECK-X64-NEXT: movl %eax, 264(%rsp)
; CHECK-X64-NEXT: movl %eax, 28664(%rsp)
; CHECK-X64-NEXT: addq $71872, %rsp # imm = 0x118C0
@@ -141,16 +141,16 @@ define void @push_before_probe(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i
; CHECK-X86-NEXT: .cfi_offset %edx, -12
; CHECK-X86-NEXT: .cfi_offset %esi, -8
; CHECK-X86-NEXT: movl 72056(%esp), %eax
-; CHECK-X86-NEXT: movl 72048(%esp), %edx
-; CHECK-X86-NEXT: movl 72040(%esp), %ecx
+; CHECK-X86-NEXT: movl 72048(%esp), %ecx
+; CHECK-X86-NEXT: movl 72040(%esp), %edx
; CHECK-X86-NEXT: movl 72032(%esp), %esi
; CHECK-X86-NEXT: addl 72036(%esp), %esi
-; CHECK-X86-NEXT: addl 72044(%esp), %ecx
-; CHECK-X86-NEXT: addl %esi, %ecx
-; CHECK-X86-NEXT: addl 72052(%esp), %edx
+; CHECK-X86-NEXT: addl 72044(%esp), %edx
+; CHECK-X86-NEXT: addl 72052(%esp), %ecx
; CHECK-X86-NEXT: addl 72060(%esp), %eax
-; CHECK-X86-NEXT: addl %edx, %eax
; CHECK-X86-NEXT: addl %ecx, %eax
+; CHECK-X86-NEXT: addl %edx, %eax
+; CHECK-X86-NEXT: addl %esi, %eax
; CHECK-X86-NEXT: movl %eax, 392(%esp)
; CHECK-X86-NEXT: movl %eax, 28792(%esp)
; CHECK-X86-NEXT: addl $72012, %esp # imm = 0x1194C
@@ -184,13 +184,13 @@ define void @push_before_probe(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i
; CHECK-X32-NEXT: .cfi_def_cfa_offset 71888
; CHECK-X32-NEXT: .cfi_offset %rax, -16
; CHECK-X32-NEXT: movl 71888(%esp), %eax
-; CHECK-X32-NEXT: addl %esi, %edi
; CHECK-X32-NEXT: addl %ecx, %edx
-; CHECK-X32-NEXT: addl %edi, %edx
-; CHECK-X32-NEXT: addl %r9d, %r8d
; CHECK-X32-NEXT: addl 71896(%esp), %eax
+; CHECK-X32-NEXT: addl %esi, %edx
+; CHECK-X32-NEXT: addl %r9d, %eax
; CHECK-X32-NEXT: addl %r8d, %eax
; CHECK-X32-NEXT: addl %edx, %eax
+; CHECK-X32-NEXT: addl %edi, %eax
; CHECK-X32-NEXT: movl %eax, 264(%esp)
; CHECK-X32-NEXT: movl %eax, 28664(%esp)
; CHECK-X32-NEXT: addl $71872, %esp # imm = 0x118C0
diff --git a/llvm/test/CodeGen/X86/statepoint-live-in.ll b/llvm/test/CodeGen/X86/statepoint-live-in.ll
index 2b02656071a7b..84ace4b5b0950 100644
--- a/llvm/test/CodeGen/X86/statepoint-live-in.ll
+++ b/llvm/test/CodeGen/X86/statepoint-live-in.ll
@@ -442,12 +442,12 @@ define i64 @test11(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %
; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
-; CHECK-NEXT: movl %edi, %ebx
-; CHECK-NEXT: movl %esi, %r15d
+; CHECK-NEXT: movl %edi, %ebp
+; CHECK-NEXT: movl %esi, %ebx
; CHECK-NEXT: movl %edx, %r12d
; CHECK-NEXT: movl %ecx, %r13d
-; CHECK-NEXT: movl %r8d, %ebp
-; CHECK-NEXT: movl %r9d, %r14d
+; CHECK-NEXT: movl %r8d, %r14d
+; CHECK-NEXT: movl %r9d, %r15d
; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
@@ -472,11 +472,10 @@ define i64 @test11(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %
; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
; CHECK-NEXT: callq _bar ## 160-byte Folded Reload
; CHECK-NEXT: Ltmp13:
-; CHECK-NEXT: addq %r15, %rbx
; CHECK-NEXT: addq %r12, %rbx
; CHECK-NEXT: addq %r13, %rbx
-; CHECK-NEXT: addq %rbp, %rbx
; CHECK-NEXT: addq %r14, %rbx
+; CHECK-NEXT: addq %r15, %rbx
; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
; CHECK-NEXT: addq %rax, %rbx
; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
@@ -517,6 +516,7 @@ define i64 @test11(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %
; CHECK-NEXT: addq %rax, %rbx
; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
; CHECK-NEXT: addq %rax, %rbx
+; CHECK-NEXT: addq %rbp, %rbx
; CHECK-NEXT: movq %rbx, %rax
; CHECK-NEXT: addq $168, %rsp
; CHECK-NEXT: popq %rbx
diff --git a/llvm/test/CodeGen/X86/statepoint-regs.ll b/llvm/test/CodeGen/X86/statepoint-regs.ll
index 9338da5de8001..d6fef28bc8c66 100644
--- a/llvm/test/CodeGen/X86/statepoint-regs.ll
+++ b/llvm/test/CodeGen/X86/statepoint-regs.ll
@@ -554,12 +554,12 @@ define i64 @test11(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %
; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
-; CHECK-NEXT: movl %edi, %ebx
-; CHECK-NEXT: movl %esi, %r15d
+; CHECK-NEXT: movl %edi, %ebp
+; CHECK-NEXT: movl %esi, %ebx
; CHECK-NEXT: movl %edx, %r12d
; CHECK-NEXT: movl %ecx, %r13d
-; CHECK-NEXT: movl %r8d, %ebp
-; CHECK-NEXT: movl %r9d, %r14d
+; CHECK-NEXT: movl %r8d, %r14d
+; CHECK-NEXT: movl %r9d, %r15d
; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
@@ -584,11 +584,10 @@ define i64 @test11(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %
; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
; CHECK-NEXT: callq _bar ## 160-byte Folded Reload
; CHECK-NEXT: Ltmp14:
-; CHECK-NEXT: addq %r15, %rbx
; CHECK-NEXT: addq %r12, %rbx
; CHECK-NEXT: addq %r13, %rbx
-; CHECK-NEXT: addq %rbp, %rbx
; CHECK-NEXT: addq %r14, %rbx
+; CHECK-NEXT: addq %r15, %rbx
; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
; CHECK-NEXT: addq %rax, %rbx
; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
@@ -629,6 +628,7 @@ define i64 @test11(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %
; CHECK-NEXT: addq %rax, %rbx
; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
; CHECK-NEXT: addq %rax, %rbx
+; CHECK-NEXT: addq %rbp, %rbx
; CHECK-NEXT: movq %rbx, %rax
; CHECK-NEXT: addq $168, %rsp
; CHECK-NEXT: popq %rbx
diff --git a/llvm/test/CodeGen/X86/swift-return.ll b/llvm/test/CodeGen/X86/swift-return.ll
index d29548d488e63..24c1509b2325e 100644
--- a/llvm/test/CodeGen/X86/swift-return.ll
+++ b/llvm/test/CodeGen/X86/swift-return.ll
@@ -147,9 +147,11 @@ define dso_local i32 @test3(i32 %key) #0 {
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: movl %edi, {{[0-9]+}}(%rsp)
; CHECK-NEXT: callq gen3 at PLT
-; CHECK-NEXT: addl %edx, %eax
+; CHECK-NEXT: # kill: def $edx killed $edx def $rdx
+; CHECK-NEXT: # kill: def $ecx killed $ecx def $rcx
+; CHECK-NEXT: addl %edx, %ecx
+; CHECK-NEXT: addl %r8d, %ecx
; CHECK-NEXT: addl %ecx, %eax
-; CHECK-NEXT: addl %r8d, %eax
; CHECK-NEXT: popq %rcx
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
@@ -358,9 +360,9 @@ define swiftcc { double, i64 } @test6() #0 {
; CHECK-NEXT: addsd %xmm1, %xmm0
; CHECK-NEXT: addsd %xmm2, %xmm0
; CHECK-NEXT: addsd %xmm3, %xmm0
-; CHECK-NEXT: addq %rdx, %rax
+; CHECK-NEXT: addq %rdx, %rcx
+; CHECK-NEXT: addq %r8, %rcx
; CHECK-NEXT: addq %rcx, %rax
-; CHECK-NEXT: addq %r8, %rax
; CHECK-NEXT: popq %rcx
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/twoaddr-lea.ll b/llvm/test/CodeGen/X86/twoaddr-lea.ll
index 14186537daea5..b11c9de00886f 100644
--- a/llvm/test/CodeGen/X86/twoaddr-lea.ll
+++ b/llvm/test/CodeGen/X86/twoaddr-lea.ll
@@ -31,11 +31,13 @@ define i32 @test1(i32 %X) nounwind {
define i32 @test2(i32 inreg %a, i32 inreg %b, i32 %c, i32 %d) nounwind {
; CHECK-LABEL: test2:
; CHECK: ## %bb.0: ## %entry
+; CHECK-NEXT: ## kill: def $ecx killed $ecx def $rcx
+; CHECK-NEXT: ## kill: def $edx killed $edx def $rdx
; CHECK-NEXT: ## kill: def $esi killed $esi def $rsi
; CHECK-NEXT: ## kill: def $edi killed $edi def $rdi
-; CHECK-NEXT: leal (%rdi,%rsi), %eax
-; CHECK-NEXT: addl %edx, %eax
-; CHECK-NEXT: addl %ecx, %eax
+; CHECK-NEXT: addl %edi, %esi
+; CHECK-NEXT: leal (%rdx,%rcx), %eax
+; CHECK-NEXT: addl %esi, %eax
; CHECK-NEXT: retq
entry:
%add = add i32 %b, %a
diff --git a/llvm/test/CodeGen/X86/umul-with-overflow.ll b/llvm/test/CodeGen/X86/umul-with-overflow.ll
index b516d69c676df..fcebba2e90fef 100644
--- a/llvm/test/CodeGen/X86/umul-with-overflow.ll
+++ b/llvm/test/CodeGen/X86/umul-with-overflow.ll
@@ -93,7 +93,7 @@ define i300 @test4(i300 %a, i300 %b) nounwind {
; X86-NEXT: movl %ebx, %eax
; X86-NEXT: mull %edi
; X86-NEXT: movl %edx, %esi
-; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: mull %edi
; X86-NEXT: movl %edx, %edi
@@ -106,7 +106,7 @@ define i300 @test4(i300 %a, i300 %b) nounwind {
; X86-NEXT: movl %ecx, %ebx
; X86-NEXT: movl %edx, %esi
; X86-NEXT: addl %ebp, %eax
-; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: adcl %edi, %esi
; X86-NEXT: setb %cl
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -142,12 +142,12 @@ define i300 @test4(i300 %a, i300 %b) nounwind {
; X86-NEXT: addl %ebx, %esi
; X86-NEXT: movzbl %cl, %eax
; X86-NEXT: adcl %eax, %edx
-; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X86-NEXT: adcl (%esp), %edx # 4-byte Folded Reload
-; X86-NEXT: movl %edx, (%esp) # 4-byte Spill
+; X86-NEXT: addl (%esp), %esi # 4-byte Folded Reload
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X86-NEXT: adcl $0, %edi
-; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %edi, (%esp) # 4-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
; X86-NEXT: movl %ebp, %eax
@@ -176,13 +176,13 @@ define i300 @test4(i300 %a, i300 %b) nounwind {
; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X86-NEXT: adcl %eax, %ecx
; X86-NEXT: addl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X86-NEXT: adcl (%esp), %edi # 4-byte Folded Reload
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: adcl $0, %ebx
; X86-NEXT: adcl $0, %ecx
; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT: adcl (%esp), %ecx # 4-byte Folded Reload
+; X86-NEXT: setb (%esp) # 1-byte Folded Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: mull %edi
@@ -201,123 +201,120 @@ define i300 @test4(i300 %a, i300 %b) nounwind {
; X86-NEXT: addl %ebp, %eax
; X86-NEXT: movl %eax, %ebp
; X86-NEXT: adcl %edi, %esi
-; X86-NEXT: setb (%esp) # 1-byte Folded Spill
+; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: mull {{[0-9]+}}(%esp)
; X86-NEXT: addl %esi, %eax
-; X86-NEXT: movzbl (%esp), %esi # 1-byte Folded Reload
+; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload
; X86-NEXT: adcl %esi, %edx
; X86-NEXT: addl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X86-NEXT: adcl %ecx, %ebp
; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X86-NEXT: movzbl (%esp), %ecx # 1-byte Folded Reload
; X86-NEXT: adcl %ecx, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: adcl $0, %edx
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl %edi, %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: movl %ebx, %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: mull %esi
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl %edx, %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: movl %ebp, %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: mull %esi
; X86-NEXT: movl %edx, %esi
-; X86-NEXT: movl %eax, %ebx
-; X86-NEXT: addl %ecx, %ebx
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: addl %ecx, %edi
; X86-NEXT: adcl $0, %esi
-; X86-NEXT: movl %edi, %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: mull %edi
-; X86-NEXT: movl %edx, %ecx
-; X86-NEXT: addl %ebx, %eax
-; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: adcl %esi, %ecx
-; X86-NEXT: setb %bl
-; X86-NEXT: movl %ebp, %eax
-; X86-NEXT: mull %edi
-; X86-NEXT: addl %ecx, %eax
-; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
-; X86-NEXT: movzbl %bl, %eax
-; X86-NEXT: adcl %eax, %edx
-; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl %edi, %eax
+; X86-NEXT: movl %ebx, %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
; X86-NEXT: mull %ebp
+; X86-NEXT: movl %edx, %ebx
+; X86-NEXT: addl %edi, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl %edx, %ecx
+; X86-NEXT: adcl %esi, %ebx
+; X86-NEXT: setb %cl
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: mull %ebp
-; X86-NEXT: movl %edx, %esi
-; X86-NEXT: movl %eax, %ebx
-; X86-NEXT: addl %ecx, %ebx
-; X86-NEXT: adcl $0, %esi
-; X86-NEXT: movl %edi, %ebp
-; X86-NEXT: movl %edi, %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: mull %edi
-; X86-NEXT: movl %edx, %ecx
+; X86-NEXT: movl %edx, %edi
; X86-NEXT: addl %ebx, %eax
+; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
+; X86-NEXT: movzbl %cl, %eax
+; X86-NEXT: adcl %eax, %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: mull %ecx
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: adcl %esi, %ecx
-; X86-NEXT: setb %bl
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl %esi, %eax
-; X86-NEXT: mull %edi
-; X86-NEXT: addl %ecx, %eax
-; X86-NEXT: movzbl %bl, %ecx
-; X86-NEXT: adcl %ecx, %edx
-; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT: movl %edx, %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: mull %ecx
+; X86-NEXT: movl %edx, %ebx
+; X86-NEXT: movl %eax, %ebp
+; X86-NEXT: addl %esi, %ebp
+; X86-NEXT: adcl $0, %ebx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, %esi
+; X86-NEXT: addl %ebp, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: adcl %ebx, %esi
+; X86-NEXT: setb %bl
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: addl %esi, %ecx
+; X86-NEXT: movzbl %bl, %eax
+; X86-NEXT: adcl %eax, %edx
+; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: adcl $0, (%esp) # 4-byte Folded Spill
-; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: adcl $0, %edi
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
; X86-NEXT: movl %ebp, %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: mull %edi
-; X86-NEXT: movl %edx, %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: mull %esi
+; X86-NEXT: movl %edx, %ebx
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl %esi, %eax
-; X86-NEXT: mull %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: mull %esi
; X86-NEXT: movl %edx, %esi
; X86-NEXT: movl %eax, %edi
-; X86-NEXT: addl %ecx, %edi
+; X86-NEXT: addl %ebx, %edi
; X86-NEXT: adcl $0, %esi
; X86-NEXT: movl %ebp, %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: mull %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: mull %ebx
; X86-NEXT: movl %edx, %ebp
; X86-NEXT: addl %edi, %eax
; X86-NEXT: movl %eax, %edi
; X86-NEXT: adcl %esi, %ebp
; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: mull %ecx
+; X86-NEXT: mull %ebx
; X86-NEXT: movl %edx, %ebx
; X86-NEXT: movl %eax, %esi
; X86-NEXT: addl %ebp, %esi
; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X86-NEXT: adcl %eax, %ebx
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
-; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT: addl %ecx, %ebp
; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: adcl $0, %esi
; X86-NEXT: adcl $0, %ebx
; X86-NEXT: addl (%esp), %esi # 4-byte Folded Reload
; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl %edi, %eax
+; X86-NEXT: imull {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: imull %edx, %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl %edi, %eax
; X86-NEXT: mull %edx
-; X86-NEXT: addl %edx, %ecx
-; X86-NEXT: imull {{[0-9]+}}(%esp), %edi
+; X86-NEXT: addl %edx, %edi
; X86-NEXT: addl %ecx, %edi
; X86-NEXT: movl %eax, %edx
; X86-NEXT: addl %esi, %edx
@@ -368,7 +365,7 @@ define i300 @test4(i300 %a, i300 %b) nounwind {
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: mull %esi
; X86-NEXT: movl %edx, %edi
-; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: mull %esi
; X86-NEXT: movl %edx, %ebx
@@ -380,7 +377,7 @@ define i300 @test4(i300 %a, i300 %b) nounwind {
; X86-NEXT: mull %esi
; X86-NEXT: movl %edx, %edi
; X86-NEXT: addl %ebp, %eax
-; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: adcl %ebx, %edi
; X86-NEXT: setb %cl
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -428,23 +425,20 @@ define i300 @test4(i300 %a, i300 %b) nounwind {
; X86-NEXT: adcl $0, %edi
; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: movl %ebx, %eax
+; X86-NEXT: imull {{[0-9]+}}(%esp), %ebx
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: imull %ecx, %ebp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: movl %ebx, %eax
; X86-NEXT: mull %ecx
; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: addl %edx, %ebp
-; X86-NEXT: imull {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: addl %edx, %ebx
; X86-NEXT: addl %ebp, %ebx
; X86-NEXT: addl %esi, %ecx
; X86-NEXT: adcl %edi, %ebx
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl (%esp), %eax # 4-byte Reload
-; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
@@ -452,41 +446,44 @@ define i300 @test4(i300 %a, i300 %b) nounwind {
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: adcl $0, %ecx
; X86-NEXT: adcl $0, %ebx
; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: imull {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: imull %edx, %ebp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl %esi, %eax
; X86-NEXT: mull %edx
; X86-NEXT: movl %eax, %edi
-; X86-NEXT: addl %edx, %ebp
-; X86-NEXT: imull {{[0-9]+}}(%esp), %esi
+; X86-NEXT: addl %edx, %esi
; X86-NEXT: addl %ebp, %esi
; X86-NEXT: addl %ecx, %edi
; X86-NEXT: adcl %ebx, %esi
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: imull {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: imull %edx, %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl %esi, %eax
; X86-NEXT: mull %edx
; X86-NEXT: movl %eax, %ebx
-; X86-NEXT: addl %edx, %ecx
-; X86-NEXT: imull {{[0-9]+}}(%esp), %esi
+; X86-NEXT: addl %edx, %esi
; X86-NEXT: addl %ecx, %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: imull {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: imull %edx, %ebp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl %ecx, %eax
; X86-NEXT: mull %edx
-; X86-NEXT: addl %edx, %ebp
-; X86-NEXT: imull {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: addl %edx, %ecx
; X86-NEXT: addl %ebp, %ecx
; X86-NEXT: addl %ebx, %eax
; X86-NEXT: adcl %esi, %ecx
@@ -501,9 +498,9 @@ define i300 @test4(i300 %a, i300 %b) nounwind {
; X86-NEXT: movl %esi, 8(%edx)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X86-NEXT: movl %esi, 12(%edx)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X86-NEXT: movl %esi, 16(%edx)
; X86-NEXT: movl (%esp), %esi # 4-byte Reload
+; X86-NEXT: movl %esi, 16(%edx)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X86-NEXT: movl %esi, 20(%edx)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X86-NEXT: movl %esi, 24(%edx)
@@ -530,16 +527,16 @@ define i300 @test4(i300 %a, i300 %b) nounwind {
; X64-NEXT: pushq %rbx
; X64-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: movq %r8, %r11
-; X64-NEXT: movq %rdx, %r10
-; X64-NEXT: movq {{[0-9]+}}(%rsp), %r8
+; X64-NEXT: movq %rdx, %r8
+; X64-NEXT: movq {{[0-9]+}}(%rsp), %r10
; X64-NEXT: movq {{[0-9]+}}(%rsp), %r9
; X64-NEXT: movq %rsi, %rax
-; X64-NEXT: mulq %r8
+; X64-NEXT: mulq %r10
; X64-NEXT: movq %rdx, %rbx
; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT: movq %r10, %rax
-; X64-NEXT: mulq %r8
-; X64-NEXT: movq %r8, %rbp
+; X64-NEXT: movq %r8, %rax
+; X64-NEXT: mulq %r10
+; X64-NEXT: movq %r10, %rbp
; X64-NEXT: movq %rdx, %r14
; X64-NEXT: movq %rax, %r15
; X64-NEXT: addq %rbx, %r15
@@ -551,13 +548,13 @@ define i300 @test4(i300 %a, i300 %b) nounwind {
; X64-NEXT: addq %r15, %rbx
; X64-NEXT: adcq %r14, %r12
; X64-NEXT: setb %al
-; X64-NEXT: movzbl %al, %r8d
-; X64-NEXT: movq %r10, %rax
+; X64-NEXT: movzbl %al, %r10d
+; X64-NEXT: movq %r8, %rax
; X64-NEXT: mulq %r9
; X64-NEXT: movq %rdx, %r15
; X64-NEXT: movq %rax, %r13
; X64-NEXT: addq %r12, %r13
-; X64-NEXT: adcq %r8, %r15
+; X64-NEXT: adcq %r10, %r15
; X64-NEXT: movq %rcx, %rax
; X64-NEXT: mulq %rbp
; X64-NEXT: movq %rdx, %r12
@@ -565,13 +562,13 @@ define i300 @test4(i300 %a, i300 %b) nounwind {
; X64-NEXT: movq %r11, %rax
; X64-NEXT: mulq %rbp
; X64-NEXT: movq %rdx, %rbp
-; X64-NEXT: movq %rax, %r8
-; X64-NEXT: addq %r12, %r8
+; X64-NEXT: movq %rax, %r10
+; X64-NEXT: addq %r12, %r10
; X64-NEXT: adcq $0, %rbp
; X64-NEXT: movq %rcx, %rax
; X64-NEXT: mulq %r9
; X64-NEXT: movq %rax, %r12
-; X64-NEXT: addq %r8, %r12
+; X64-NEXT: addq %r10, %r12
; X64-NEXT: adcq %rbp, %rdx
; X64-NEXT: imulq %r9, %r11
; X64-NEXT: movq {{[0-9]+}}(%rsp), %r9
@@ -580,30 +577,30 @@ define i300 @test4(i300 %a, i300 %b) nounwind {
; X64-NEXT: adcq %rdx, %r11
; X64-NEXT: movq %rsi, %rax
; X64-NEXT: mulq %r9
-; X64-NEXT: movq %rdx, %r8
+; X64-NEXT: movq %rdx, %r10
; X64-NEXT: movq %rax, %r15
-; X64-NEXT: movq %r10, %rax
+; X64-NEXT: movq %r8, %rax
; X64-NEXT: mulq %r9
; X64-NEXT: movq %rdx, %r13
; X64-NEXT: movq %rax, %rbp
-; X64-NEXT: addq %r8, %rbp
+; X64-NEXT: addq %r10, %rbp
; X64-NEXT: adcq $0, %r13
-; X64-NEXT: movq {{[0-9]+}}(%rsp), %r8
+; X64-NEXT: movq {{[0-9]+}}(%rsp), %r10
; X64-NEXT: movq %rsi, %rax
-; X64-NEXT: mulq %r8
+; X64-NEXT: mulq %r10
; X64-NEXT: addq %rbp, %rax
; X64-NEXT: adcq %r13, %rdx
-; X64-NEXT: imulq %r8, %r10
-; X64-NEXT: addq %rdx, %r10
+; X64-NEXT: imulq %r10, %r8
+; X64-NEXT: addq %rdx, %r8
; X64-NEXT: addq %r14, %r15
; X64-NEXT: adcq %r12, %rax
-; X64-NEXT: adcq %r11, %r10
+; X64-NEXT: adcq %r11, %r8
; X64-NEXT: imulq %r9, %rcx
-; X64-NEXT: addq %r10, %rcx
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
; X64-NEXT: imulq {{[0-9]+}}(%rsp), %rdx
; X64-NEXT: imulq {{[0-9]+}}(%rsp), %rsi
; X64-NEXT: addq %rdx, %rsi
+; X64-NEXT: addq %r8, %rsi
; X64-NEXT: addq %rcx, %rsi
; X64-NEXT: movq %rbx, 8(%rdi)
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
diff --git a/llvm/test/CodeGen/X86/umul_fix.ll b/llvm/test/CodeGen/X86/umul_fix.ll
index cb4bdd1ede75c..dca010064b587 100644
--- a/llvm/test/CodeGen/X86/umul_fix.ll
+++ b/llvm/test/CodeGen/X86/umul_fix.ll
@@ -185,8 +185,8 @@ define i64 @func5(i64 %x, i64 %y) nounwind {
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: mull %esi
; X86-NEXT: imull {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: addl %ecx, %edx
; X86-NEXT: imull {{[0-9]+}}(%esp), %esi
+; X86-NEXT: addl %ecx, %esi
; X86-NEXT: addl %esi, %edx
; X86-NEXT: popl %esi
; X86-NEXT: retl
diff --git a/llvm/test/CodeGen/X86/urem-seteq-nonzero.ll b/llvm/test/CodeGen/X86/urem-seteq-nonzero.ll
index 671d7c21013de..7a5198dd6a0fa 100644
--- a/llvm/test/CodeGen/X86/urem-seteq-nonzero.ll
+++ b/llvm/test/CodeGen/X86/urem-seteq-nonzero.ll
@@ -295,19 +295,21 @@ define i1 @t8_3_2(i8 %X) nounwind {
define i1 @t64_3_2(i64 %X) nounwind {
; X86-LABEL: t64_3_2:
; X86: # %bb.0:
+; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl $-1431655765, %edx # imm = 0xAAAAAAAB
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: mull %edx
; X86-NEXT: imull $-1431655766, %ecx, %ecx # imm = 0xAAAAAAAA
-; X86-NEXT: addl %edx, %ecx
-; X86-NEXT: imull $-1431655765, {{[0-9]+}}(%esp), %edx # imm = 0xAAAAAAAB
-; X86-NEXT: addl %ecx, %edx
+; X86-NEXT: imull $-1431655765, {{[0-9]+}}(%esp), %esi # imm = 0xAAAAAAAB
+; X86-NEXT: addl %ecx, %esi
+; X86-NEXT: addl %edx, %esi
; X86-NEXT: addl $-1431655766, %eax # imm = 0xAAAAAAAA
-; X86-NEXT: adcl $-1431655766, %edx # imm = 0xAAAAAAAA
+; X86-NEXT: adcl $-1431655766, %esi # imm = 0xAAAAAAAA
; X86-NEXT: cmpl $1431655765, %eax # imm = 0x55555555
-; X86-NEXT: sbbl $1431655765, %edx # imm = 0x55555555
+; X86-NEXT: sbbl $1431655765, %esi # imm = 0x55555555
; X86-NEXT: setb %al
+; X86-NEXT: popl %esi
; X86-NEXT: retl
;
; X64-LABEL: t64_3_2:
diff --git a/llvm/test/CodeGen/X86/vec_smulo.ll b/llvm/test/CodeGen/X86/vec_smulo.ll
index dbec86755a969..7bb7c9b481d39 100644
--- a/llvm/test/CodeGen/X86/vec_smulo.ll
+++ b/llvm/test/CodeGen/X86/vec_smulo.ll
@@ -3309,8 +3309,8 @@ define <2 x i32> @smulo_v2i128(<2 x i128> %a0, <2 x i128> %a1, ptr %p2) nounwind
; SSE2-NEXT: movq %r14, %rax
; SSE2-NEXT: mulq %r12
; SSE2-NEXT: movq %rax, %rdi
-; SSE2-NEXT: addq %rbx, %rdx
; SSE2-NEXT: imulq %r9, %r12
+; SSE2-NEXT: addq %rbx, %r12
; SSE2-NEXT: addq %rdx, %r12
; SSE2-NEXT: movq %r9, %rbx
; SSE2-NEXT: sarq $63, %rbx
@@ -3319,8 +3319,8 @@ define <2 x i32> @smulo_v2i128(<2 x i128> %a0, <2 x i128> %a1, ptr %p2) nounwind
; SSE2-NEXT: movq %rbx, %rax
; SSE2-NEXT: mulq %r10
; SSE2-NEXT: movq %rax, %r15
-; SSE2-NEXT: addq %r13, %rdx
; SSE2-NEXT: imulq %r10, %rbx
+; SSE2-NEXT: addq %r13, %rbx
; SSE2-NEXT: addq %rdx, %rbx
; SSE2-NEXT: addq %rdi, %r15
; SSE2-NEXT: adcq %r12, %rbx
@@ -3363,8 +3363,8 @@ define <2 x i32> @smulo_v2i128(<2 x i128> %a0, <2 x i128> %a1, ptr %p2) nounwind
; SSE2-NEXT: movq %rsi, %rax
; SSE2-NEXT: mulq %rbx
; SSE2-NEXT: movq %rax, %r9
-; SSE2-NEXT: addq %r10, %rdx
; SSE2-NEXT: imulq %rbp, %rbx
+; SSE2-NEXT: addq %r10, %rbx
; SSE2-NEXT: addq %rdx, %rbx
; SSE2-NEXT: movq %rbp, %r10
; SSE2-NEXT: sarq $63, %r10
@@ -3373,8 +3373,8 @@ define <2 x i32> @smulo_v2i128(<2 x i128> %a0, <2 x i128> %a1, ptr %p2) nounwind
; SSE2-NEXT: movq %r10, %rax
; SSE2-NEXT: mulq %r8
; SSE2-NEXT: movq %rax, %r11
-; SSE2-NEXT: addq %r14, %rdx
; SSE2-NEXT: imulq %r8, %r10
+; SSE2-NEXT: addq %r14, %r10
; SSE2-NEXT: addq %rdx, %r10
; SSE2-NEXT: addq %r9, %r11
; SSE2-NEXT: adcq %rbx, %r10
@@ -3445,8 +3445,8 @@ define <2 x i32> @smulo_v2i128(<2 x i128> %a0, <2 x i128> %a1, ptr %p2) nounwind
; SSSE3-NEXT: movq %r14, %rax
; SSSE3-NEXT: mulq %r12
; SSSE3-NEXT: movq %rax, %rdi
-; SSSE3-NEXT: addq %rbx, %rdx
; SSSE3-NEXT: imulq %r9, %r12
+; SSSE3-NEXT: addq %rbx, %r12
; SSSE3-NEXT: addq %rdx, %r12
; SSSE3-NEXT: movq %r9, %rbx
; SSSE3-NEXT: sarq $63, %rbx
@@ -3455,8 +3455,8 @@ define <2 x i32> @smulo_v2i128(<2 x i128> %a0, <2 x i128> %a1, ptr %p2) nounwind
; SSSE3-NEXT: movq %rbx, %rax
; SSSE3-NEXT: mulq %r10
; SSSE3-NEXT: movq %rax, %r15
-; SSSE3-NEXT: addq %r13, %rdx
; SSSE3-NEXT: imulq %r10, %rbx
+; SSSE3-NEXT: addq %r13, %rbx
; SSSE3-NEXT: addq %rdx, %rbx
; SSSE3-NEXT: addq %rdi, %r15
; SSSE3-NEXT: adcq %r12, %rbx
@@ -3499,8 +3499,8 @@ define <2 x i32> @smulo_v2i128(<2 x i128> %a0, <2 x i128> %a1, ptr %p2) nounwind
; SSSE3-NEXT: movq %rsi, %rax
; SSSE3-NEXT: mulq %rbx
; SSSE3-NEXT: movq %rax, %r9
-; SSSE3-NEXT: addq %r10, %rdx
; SSSE3-NEXT: imulq %rbp, %rbx
+; SSSE3-NEXT: addq %r10, %rbx
; SSSE3-NEXT: addq %rdx, %rbx
; SSSE3-NEXT: movq %rbp, %r10
; SSSE3-NEXT: sarq $63, %r10
@@ -3509,8 +3509,8 @@ define <2 x i32> @smulo_v2i128(<2 x i128> %a0, <2 x i128> %a1, ptr %p2) nounwind
; SSSE3-NEXT: movq %r10, %rax
; SSSE3-NEXT: mulq %r8
; SSSE3-NEXT: movq %rax, %r11
-; SSSE3-NEXT: addq %r14, %rdx
; SSSE3-NEXT: imulq %r8, %r10
+; SSSE3-NEXT: addq %r14, %r10
; SSSE3-NEXT: addq %rdx, %r10
; SSSE3-NEXT: addq %r9, %r11
; SSSE3-NEXT: adcq %rbx, %r10
@@ -3581,8 +3581,8 @@ define <2 x i32> @smulo_v2i128(<2 x i128> %a0, <2 x i128> %a1, ptr %p2) nounwind
; SSE41-NEXT: movq %r14, %rax
; SSE41-NEXT: mulq %r12
; SSE41-NEXT: movq %rax, %rdi
-; SSE41-NEXT: addq %rbx, %rdx
; SSE41-NEXT: imulq %r9, %r12
+; SSE41-NEXT: addq %rbx, %r12
; SSE41-NEXT: addq %rdx, %r12
; SSE41-NEXT: movq %r9, %rbx
; SSE41-NEXT: sarq $63, %rbx
@@ -3591,8 +3591,8 @@ define <2 x i32> @smulo_v2i128(<2 x i128> %a0, <2 x i128> %a1, ptr %p2) nounwind
; SSE41-NEXT: movq %rbx, %rax
; SSE41-NEXT: mulq %r10
; SSE41-NEXT: movq %rax, %r15
-; SSE41-NEXT: addq %r13, %rdx
; SSE41-NEXT: imulq %r10, %rbx
+; SSE41-NEXT: addq %r13, %rbx
; SSE41-NEXT: addq %rdx, %rbx
; SSE41-NEXT: addq %rdi, %r15
; SSE41-NEXT: adcq %r12, %rbx
@@ -3635,8 +3635,8 @@ define <2 x i32> @smulo_v2i128(<2 x i128> %a0, <2 x i128> %a1, ptr %p2) nounwind
; SSE41-NEXT: movq %rsi, %rax
; SSE41-NEXT: mulq %rbx
; SSE41-NEXT: movq %rax, %r9
-; SSE41-NEXT: addq %r10, %rdx
; SSE41-NEXT: imulq %rbp, %rbx
+; SSE41-NEXT: addq %r10, %rbx
; SSE41-NEXT: addq %rdx, %rbx
; SSE41-NEXT: movq %rbp, %r10
; SSE41-NEXT: sarq $63, %r10
@@ -3645,8 +3645,8 @@ define <2 x i32> @smulo_v2i128(<2 x i128> %a0, <2 x i128> %a1, ptr %p2) nounwind
; SSE41-NEXT: movq %r10, %rax
; SSE41-NEXT: mulq %r8
; SSE41-NEXT: movq %rax, %r11
-; SSE41-NEXT: addq %r14, %rdx
; SSE41-NEXT: imulq %r8, %r10
+; SSE41-NEXT: addq %r14, %r10
; SSE41-NEXT: addq %rdx, %r10
; SSE41-NEXT: addq %r9, %r11
; SSE41-NEXT: adcq %rbx, %r10
@@ -3716,8 +3716,8 @@ define <2 x i32> @smulo_v2i128(<2 x i128> %a0, <2 x i128> %a1, ptr %p2) nounwind
; AVX-NEXT: movq %r14, %rax
; AVX-NEXT: mulq %r12
; AVX-NEXT: movq %rax, %rdi
-; AVX-NEXT: addq %rbx, %rdx
; AVX-NEXT: imulq %r9, %r12
+; AVX-NEXT: addq %rbx, %r12
; AVX-NEXT: addq %rdx, %r12
; AVX-NEXT: movq %r9, %rbx
; AVX-NEXT: sarq $63, %rbx
@@ -3726,8 +3726,8 @@ define <2 x i32> @smulo_v2i128(<2 x i128> %a0, <2 x i128> %a1, ptr %p2) nounwind
; AVX-NEXT: movq %rbx, %rax
; AVX-NEXT: mulq %r10
; AVX-NEXT: movq %rax, %r15
-; AVX-NEXT: addq %r13, %rdx
; AVX-NEXT: imulq %r10, %rbx
+; AVX-NEXT: addq %r13, %rbx
; AVX-NEXT: addq %rdx, %rbx
; AVX-NEXT: addq %rdi, %r15
; AVX-NEXT: adcq %r12, %rbx
@@ -3770,8 +3770,8 @@ define <2 x i32> @smulo_v2i128(<2 x i128> %a0, <2 x i128> %a1, ptr %p2) nounwind
; AVX-NEXT: movq %rsi, %rax
; AVX-NEXT: mulq %rbx
; AVX-NEXT: movq %rax, %r9
-; AVX-NEXT: addq %r10, %rdx
; AVX-NEXT: imulq %rbp, %rbx
+; AVX-NEXT: addq %r10, %rbx
; AVX-NEXT: addq %rdx, %rbx
; AVX-NEXT: movq %rbp, %r10
; AVX-NEXT: sarq $63, %r10
@@ -3780,8 +3780,8 @@ define <2 x i32> @smulo_v2i128(<2 x i128> %a0, <2 x i128> %a1, ptr %p2) nounwind
; AVX-NEXT: movq %r10, %rax
; AVX-NEXT: mulq %r8
; AVX-NEXT: movq %rax, %r11
-; AVX-NEXT: addq %r14, %rdx
; AVX-NEXT: imulq %r8, %r10
+; AVX-NEXT: addq %r14, %r10
; AVX-NEXT: addq %rdx, %r10
; AVX-NEXT: addq %r9, %r11
; AVX-NEXT: adcq %rbx, %r10
@@ -3851,8 +3851,8 @@ define <2 x i32> @smulo_v2i128(<2 x i128> %a0, <2 x i128> %a1, ptr %p2) nounwind
; AVX512F-NEXT: movq %r15, %rax
; AVX512F-NEXT: mulq %r12
; AVX512F-NEXT: movq %rax, %rcx
-; AVX512F-NEXT: addq %rbx, %rdx
; AVX512F-NEXT: imulq %rsi, %r12
+; AVX512F-NEXT: addq %rbx, %r12
; AVX512F-NEXT: addq %rdx, %r12
; AVX512F-NEXT: movq %rsi, %rbx
; AVX512F-NEXT: sarq $63, %rbx
@@ -3861,8 +3861,8 @@ define <2 x i32> @smulo_v2i128(<2 x i128> %a0, <2 x i128> %a1, ptr %p2) nounwind
; AVX512F-NEXT: movq %rbx, %rax
; AVX512F-NEXT: mulq %r10
; AVX512F-NEXT: movq %rax, %r14
-; AVX512F-NEXT: addq %r13, %rdx
; AVX512F-NEXT: imulq %r10, %rbx
+; AVX512F-NEXT: addq %r13, %rbx
; AVX512F-NEXT: addq %rdx, %rbx
; AVX512F-NEXT: addq %rcx, %r14
; AVX512F-NEXT: adcq %r12, %rbx
@@ -3905,8 +3905,8 @@ define <2 x i32> @smulo_v2i128(<2 x i128> %a0, <2 x i128> %a1, ptr %p2) nounwind
; AVX512F-NEXT: movq %r8, %rax
; AVX512F-NEXT: mulq %rsi
; AVX512F-NEXT: movq %rax, %r10
-; AVX512F-NEXT: addq %r11, %rdx
; AVX512F-NEXT: imulq %rbp, %rsi
+; AVX512F-NEXT: addq %r11, %rsi
; AVX512F-NEXT: addq %rdx, %rsi
; AVX512F-NEXT: movq %rbp, %r11
; AVX512F-NEXT: sarq $63, %r11
@@ -3915,8 +3915,8 @@ define <2 x i32> @smulo_v2i128(<2 x i128> %a0, <2 x i128> %a1, ptr %p2) nounwind
; AVX512F-NEXT: movq %r11, %rax
; AVX512F-NEXT: mulq %rdi
; AVX512F-NEXT: movq %rax, %rbx
-; AVX512F-NEXT: addq %r14, %rdx
; AVX512F-NEXT: imulq %rdi, %r11
+; AVX512F-NEXT: addq %r14, %r11
; AVX512F-NEXT: addq %rdx, %r11
; AVX512F-NEXT: addq %r10, %rbx
; AVX512F-NEXT: adcq %rsi, %r11
@@ -3987,8 +3987,8 @@ define <2 x i32> @smulo_v2i128(<2 x i128> %a0, <2 x i128> %a1, ptr %p2) nounwind
; AVX512BW-NEXT: movq %r15, %rax
; AVX512BW-NEXT: mulq %r12
; AVX512BW-NEXT: movq %rax, %rcx
-; AVX512BW-NEXT: addq %rbx, %rdx
; AVX512BW-NEXT: imulq %rsi, %r12
+; AVX512BW-NEXT: addq %rbx, %r12
; AVX512BW-NEXT: addq %rdx, %r12
; AVX512BW-NEXT: movq %rsi, %rbx
; AVX512BW-NEXT: sarq $63, %rbx
@@ -3997,8 +3997,8 @@ define <2 x i32> @smulo_v2i128(<2 x i128> %a0, <2 x i128> %a1, ptr %p2) nounwind
; AVX512BW-NEXT: movq %rbx, %rax
; AVX512BW-NEXT: mulq %r10
; AVX512BW-NEXT: movq %rax, %r14
-; AVX512BW-NEXT: addq %r13, %rdx
; AVX512BW-NEXT: imulq %r10, %rbx
+; AVX512BW-NEXT: addq %r13, %rbx
; AVX512BW-NEXT: addq %rdx, %rbx
; AVX512BW-NEXT: addq %rcx, %r14
; AVX512BW-NEXT: adcq %r12, %rbx
@@ -4041,8 +4041,8 @@ define <2 x i32> @smulo_v2i128(<2 x i128> %a0, <2 x i128> %a1, ptr %p2) nounwind
; AVX512BW-NEXT: movq %r8, %rax
; AVX512BW-NEXT: mulq %rsi
; AVX512BW-NEXT: movq %rax, %r10
-; AVX512BW-NEXT: addq %r11, %rdx
; AVX512BW-NEXT: imulq %rbp, %rsi
+; AVX512BW-NEXT: addq %r11, %rsi
; AVX512BW-NEXT: addq %rdx, %rsi
; AVX512BW-NEXT: movq %rbp, %r11
; AVX512BW-NEXT: sarq $63, %r11
@@ -4051,8 +4051,8 @@ define <2 x i32> @smulo_v2i128(<2 x i128> %a0, <2 x i128> %a1, ptr %p2) nounwind
; AVX512BW-NEXT: movq %r11, %rax
; AVX512BW-NEXT: mulq %rdi
; AVX512BW-NEXT: movq %rax, %rbx
-; AVX512BW-NEXT: addq %r14, %rdx
; AVX512BW-NEXT: imulq %rdi, %r11
+; AVX512BW-NEXT: addq %r14, %r11
; AVX512BW-NEXT: addq %rdx, %r11
; AVX512BW-NEXT: addq %r10, %rbx
; AVX512BW-NEXT: adcq %rsi, %r11
diff --git a/llvm/test/CodeGen/X86/vp2intersect_multiple_pairs.ll b/llvm/test/CodeGen/X86/vp2intersect_multiple_pairs.ll
index 7ddff59c26b0e..943424171d662 100644
--- a/llvm/test/CodeGen/X86/vp2intersect_multiple_pairs.ll
+++ b/llvm/test/CodeGen/X86/vp2intersect_multiple_pairs.ll
@@ -46,16 +46,16 @@ define void @test(<16 x i32> %a0, <16 x i32> %b0, <16 x i32> %a1, <16 x i32> %b1
; X86-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k0 # 2-byte Reload
; X86-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k1 # 2-byte Reload
; X86-NEXT: kmovw %k0, %edi
-; X86-NEXT: addl %edi, %eax
-; X86-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k2 # 2-byte Reload
-; X86-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k3 # 2-byte Reload
-; X86-NEXT: kmovw %k2, %edi
-; X86-NEXT: addl %ecx, %edx
-; X86-NEXT: kmovw %k1, %ecx
-; X86-NEXT: addl %edi, %ecx
-; X86-NEXT: addl %ecx, %eax
+; X86-NEXT: addl %eax, %ecx
+; X86-NEXT: kmovw %k1, %eax
; X86-NEXT: addl %edx, %eax
-; X86-NEXT: movw %ax, (%esi)
+; X86-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k0 # 2-byte Reload
+; X86-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k1 # 2-byte Reload
+; X86-NEXT: kmovw %k0, %edx
+; X86-NEXT: addl %eax, %edx
+; X86-NEXT: addl %ecx, %edx
+; X86-NEXT: addl %edi, %edx
+; X86-NEXT: movw %dx, (%esi)
; X86-NEXT: leal -8(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
@@ -104,11 +104,11 @@ define void @test(<16 x i32> %a0, <16 x i32> %b0, <16 x i32> %a1, <16 x i32> %b1
; X64-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
; X64-NEXT: kmovw %k0, %edi
; X64-NEXT: kmovw %k1, %r8d
-; X64-NEXT: addl %edi, %eax
; X64-NEXT: addl %ecx, %edx
; X64-NEXT: addl %r8d, %eax
; X64-NEXT: addl %esi, %eax
; X64-NEXT: addl %edx, %eax
+; X64-NEXT: addl %edi, %eax
; X64-NEXT: movw %ax, (%rbx)
; X64-NEXT: leaq -8(%rbp), %rsp
; X64-NEXT: popq %rbx
diff --git a/llvm/test/CodeGen/X86/win-smallparams.ll b/llvm/test/CodeGen/X86/win-smallparams.ll
index 191f1642b10dc..63ac6e0582add 100644
--- a/llvm/test/CodeGen/X86/win-smallparams.ll
+++ b/llvm/test/CodeGen/X86/win-smallparams.ll
@@ -57,14 +57,13 @@ entry:
; WIN32: calll _manyargs
; WIN32-LABEL: _manyargs:
-; WIN32: pushl %ebx
; WIN32: pushl %edi
; WIN32: pushl %esi
-; WIN32-DAG: movsbl 16(%esp),
-; WIN32-DAG: movswl 20(%esp),
-; WIN32-DAG: movzbl 24(%esp),
-; WIN32-DAG: movzwl 28(%esp),
-; WIN32-DAG: movzbl 32(%esp),
-; WIN32-DAG: movzwl 36(%esp),
+; WIN32-DAG: movzwl 32(%esp),
+; WIN32-DAG: movzbl 28(%esp),
+; WIN32-DAG: movzwl 24(%esp),
+; WIN32-DAG: movzbl 20(%esp),
+; WIN32-DAG: movswl 16(%esp),
+; WIN32-DAG: movsbl 12(%esp),
; WIN32: retl
diff --git a/llvm/test/CodeGen/X86/x86-cmov-converter.ll b/llvm/test/CodeGen/X86/x86-cmov-converter.ll
index 9d55df0602f99..bfb402c7cf0c0 100644
--- a/llvm/test/CodeGen/X86/x86-cmov-converter.ll
+++ b/llvm/test/CodeGen/X86/x86-cmov-converter.ll
@@ -766,8 +766,8 @@ define i32 @test_cmov_memoperand_in_group(i32 %a, i32 %b, i32 %x, ptr %y.ptr) #0
; CHECK-NEXT: movl %edi, %eax
; CHECK-NEXT: movl %esi, %r8d
; CHECK-NEXT: .LBB9_2: # %entry
-; CHECK-NEXT: addl %edx, %eax
; CHECK-NEXT: addl %r8d, %eax
+; CHECK-NEXT: addl %edx, %eax
; CHECK-NEXT: retq
;
; CHECK-FORCEALL-LABEL: test_cmov_memoperand_in_group:
@@ -781,8 +781,8 @@ define i32 @test_cmov_memoperand_in_group(i32 %a, i32 %b, i32 %x, ptr %y.ptr) #0
; CHECK-FORCEALL-NEXT: movl %edi, %eax
; CHECK-FORCEALL-NEXT: movl %esi, %r8d
; CHECK-FORCEALL-NEXT: .LBB9_2: # %entry
-; CHECK-FORCEALL-NEXT: addl %edx, %eax
; CHECK-FORCEALL-NEXT: addl %r8d, %eax
+; CHECK-FORCEALL-NEXT: addl %edx, %eax
; CHECK-FORCEALL-NEXT: retq
entry:
%cond = icmp ugt i32 %a, %b
@@ -808,8 +808,8 @@ define i32 @test_cmov_memoperand_in_group2(i32 %a, i32 %b, i32 %x, ptr %y.ptr) #
; CHECK-NEXT: movl %edi, %eax
; CHECK-NEXT: movl %esi, %r8d
; CHECK-NEXT: .LBB10_2: # %entry
-; CHECK-NEXT: addl %edx, %eax
; CHECK-NEXT: addl %r8d, %eax
+; CHECK-NEXT: addl %edx, %eax
; CHECK-NEXT: retq
;
; CHECK-FORCEALL-LABEL: test_cmov_memoperand_in_group2:
@@ -823,8 +823,8 @@ define i32 @test_cmov_memoperand_in_group2(i32 %a, i32 %b, i32 %x, ptr %y.ptr) #
; CHECK-FORCEALL-NEXT: movl %edi, %eax
; CHECK-FORCEALL-NEXT: movl %esi, %r8d
; CHECK-FORCEALL-NEXT: .LBB10_2: # %entry
-; CHECK-FORCEALL-NEXT: addl %edx, %eax
; CHECK-FORCEALL-NEXT: addl %r8d, %eax
+; CHECK-FORCEALL-NEXT: addl %edx, %eax
; CHECK-FORCEALL-NEXT: retq
entry:
%cond = icmp ugt i32 %a, %b
diff --git a/llvm/test/CodeGen/X86/x86-no_caller_saved_registers-preserve.ll b/llvm/test/CodeGen/X86/x86-no_caller_saved_registers-preserve.ll
index 455f0c706d70e..263ff329163c5 100644
--- a/llvm/test/CodeGen/X86/x86-no_caller_saved_registers-preserve.ll
+++ b/llvm/test/CodeGen/X86/x86-no_caller_saved_registers-preserve.ll
@@ -38,8 +38,8 @@ define x86_64_sysvcc float @foo(i32 %a0, i32 %a1, float %b0) {
; CHECK-NEXT: movl %esi, %ecx
; CHECK-NEXT: movl %edi, %edx
; CHECK-NEXT: callq bar at PLT
-; CHECK-NEXT: addl %edx, %eax
; CHECK-NEXT: addl %ecx, %eax
+; CHECK-NEXT: addl %edx, %eax
; CHECK-NEXT: xorps %xmm0, %xmm0
; CHECK-NEXT: cvtsi2ss %eax, %xmm0
; CHECK-NEXT: addss %xmm1, %xmm0
diff --git a/llvm/test/CodeGen/X86/xmulo.ll b/llvm/test/CodeGen/X86/xmulo.ll
index 4adc80b3b8bd6..8d68303300ec6 100644
--- a/llvm/test/CodeGen/X86/xmulo.ll
+++ b/llvm/test/CodeGen/X86/xmulo.ll
@@ -213,53 +213,52 @@ define zeroext i1 @smuloi64(i64 %v1, i64 %v2, ptr %res) {
; WIN32-NEXT: pushl %edi
; WIN32-NEXT: pushl %esi
; WIN32-NEXT: subl $8, %esp
-; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebp
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; WIN32-NEXT: movl %ecx, %edi
-; WIN32-NEXT: sarl $31, %edi
-; WIN32-NEXT: movl %eax, %esi
-; WIN32-NEXT: imull %edi, %esi
-; WIN32-NEXT: mull %edi
-; WIN32-NEXT: movl %eax, %ebx
-; WIN32-NEXT: addl %esi, %edx
+; WIN32-NEXT: movl %ecx, %ebx
+; WIN32-NEXT: sarl $31, %ebx
; WIN32-NEXT: movl %ebp, %esi
-; WIN32-NEXT: imull %ebp, %edi
-; WIN32-NEXT: addl %edx, %edi
+; WIN32-NEXT: imull %ebx, %esi
+; WIN32-NEXT: movl %ebp, %eax
+; WIN32-NEXT: mull %ebx
+; WIN32-NEXT: movl %eax, %edi
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; WIN32-NEXT: imull %eax, %ebx
+; WIN32-NEXT: addl %esi, %ebx
+; WIN32-NEXT: addl %edx, %ebx
+; WIN32-NEXT: movl %eax, %esi
; WIN32-NEXT: sarl $31, %esi
-; WIN32-NEXT: movl %esi, %ebp
-; WIN32-NEXT: imull %ecx, %ebp
+; WIN32-NEXT: movl %esi, %edx
+; WIN32-NEXT: imull %ecx, %edx
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN32-NEXT: movl %esi, %eax
-; WIN32-NEXT: mull %ecx
-; WIN32-NEXT: addl %ebp, %edx
; WIN32-NEXT: imull %ecx, %esi
; WIN32-NEXT: addl %edx, %esi
-; WIN32-NEXT: addl %ebx, %eax
+; WIN32-NEXT: mull %ecx
+; WIN32-NEXT: addl %edx, %esi
+; WIN32-NEXT: addl %edi, %eax
; WIN32-NEXT: movl %eax, (%esp) # 4-byte Spill
-; WIN32-NEXT: adcl %edi, %esi
+; WIN32-NEXT: adcl %ebx, %esi
; WIN32-NEXT: movl %ecx, %eax
-; WIN32-NEXT: movl %ecx, %edi
-; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; WIN32-NEXT: mull %ecx
-; WIN32-NEXT: movl %edx, %ebp
+; WIN32-NEXT: mull %ebp
+; WIN32-NEXT: movl %edx, %ebx
; WIN32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; WIN32-NEXT: mull %ecx
-; WIN32-NEXT: movl %edx, %ebx
+; WIN32-NEXT: mull %ebp
+; WIN32-NEXT: movl %edx, %edi
; WIN32-NEXT: movl %eax, %ecx
-; WIN32-NEXT: addl %ebp, %ecx
-; WIN32-NEXT: adcl $0, %ebx
-; WIN32-NEXT: movl %edi, %eax
+; WIN32-NEXT: addl %ebx, %ecx
+; WIN32-NEXT: adcl $0, %edi
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN32-NEXT: mull {{[0-9]+}}(%esp)
-; WIN32-NEXT: movl %edx, %edi
+; WIN32-NEXT: movl %edx, %ebx
; WIN32-NEXT: movl %eax, %ebp
; WIN32-NEXT: addl %ecx, %ebp
-; WIN32-NEXT: adcl %ebx, %edi
+; WIN32-NEXT: adcl %edi, %ebx
; WIN32-NEXT: setb %cl
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN32-NEXT: mull {{[0-9]+}}(%esp)
-; WIN32-NEXT: addl %edi, %eax
+; WIN32-NEXT: addl %ebx, %eax
; WIN32-NEXT: movzbl %cl, %ecx
; WIN32-NEXT: adcl %ecx, %edx
; WIN32-NEXT: addl (%esp), %eax # 4-byte Folded Reload
@@ -575,29 +574,26 @@ define i64 @smuloselecti64(i64 %v1, i64 %v2) {
; WIN32-NEXT: pushl %eax
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebp
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; WIN32-NEXT: movl %eax, %ecx
-; WIN32-NEXT: movl %eax, %esi
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN32-NEXT: sarl $31, %ecx
; WIN32-NEXT: movl %ebp, %edi
; WIN32-NEXT: imull %ecx, %edi
; WIN32-NEXT: movl %ebp, %eax
; WIN32-NEXT: mull %ecx
-; WIN32-NEXT: movl %eax, (%esp) # 4-byte Spill
-; WIN32-NEXT: addl %edi, %edx
+; WIN32-NEXT: movl %eax, %esi
; WIN32-NEXT: imull %ebx, %ecx
+; WIN32-NEXT: addl %edi, %ecx
; WIN32-NEXT: addl %edx, %ecx
; WIN32-NEXT: sarl $31, %ebx
-; WIN32-NEXT: movl %ebx, %edi
-; WIN32-NEXT: imull %esi, %edi
-; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi
+; WIN32-NEXT: movl %ebx, %edx
+; WIN32-NEXT: imull {{[0-9]+}}(%esp), %edx
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edi
; WIN32-NEXT: movl %ebx, %eax
-; WIN32-NEXT: mull %esi
-; WIN32-NEXT: addl %edi, %edx
-; WIN32-NEXT: movl %esi, %edi
-; WIN32-NEXT: imull %esi, %ebx
+; WIN32-NEXT: imull %edi, %ebx
; WIN32-NEXT: addl %edx, %ebx
-; WIN32-NEXT: addl (%esp), %eax # 4-byte Folded Reload
+; WIN32-NEXT: mull %edi
+; WIN32-NEXT: addl %edx, %ebx
+; WIN32-NEXT: addl %esi, %eax
; WIN32-NEXT: movl %eax, (%esp) # 4-byte Spill
; WIN32-NEXT: adcl %ecx, %ebx
; WIN32-NEXT: movl %edi, %eax
@@ -606,20 +602,20 @@ define i64 @smuloselecti64(i64 %v1, i64 %v2) {
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN32-NEXT: mull %ebp
; WIN32-NEXT: movl %edx, %ecx
-; WIN32-NEXT: movl %eax, %ebp
-; WIN32-NEXT: addl %esi, %ebp
+; WIN32-NEXT: movl %eax, %edi
+; WIN32-NEXT: addl %esi, %edi
; WIN32-NEXT: adcl $0, %ecx
-; WIN32-NEXT: movl %edi, %eax
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN32-NEXT: mull {{[0-9]+}}(%esp)
-; WIN32-NEXT: movl %edx, %edi
+; WIN32-NEXT: movl %edx, %ebp
; WIN32-NEXT: movl %eax, %esi
-; WIN32-NEXT: addl %ebp, %esi
-; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; WIN32-NEXT: adcl %ecx, %edi
+; WIN32-NEXT: addl %edi, %esi
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edi
+; WIN32-NEXT: adcl %ecx, %ebp
; WIN32-NEXT: setb %cl
-; WIN32-NEXT: movl %ebp, %eax
+; WIN32-NEXT: movl %edi, %eax
; WIN32-NEXT: mull {{[0-9]+}}(%esp)
-; WIN32-NEXT: addl %edi, %eax
+; WIN32-NEXT: addl %ebp, %eax
; WIN32-NEXT: movzbl %cl, %ecx
; WIN32-NEXT: adcl %ecx, %edx
; WIN32-NEXT: addl (%esp), %eax # 4-byte Folded Reload
@@ -632,9 +628,9 @@ define i64 @smuloselecti64(i64 %v1, i64 %v2) {
; WIN32-NEXT: jne LBB12_2
; WIN32-NEXT: # %bb.1:
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edi
; WIN32-NEXT: LBB12_2:
-; WIN32-NEXT: movl %ebp, %edx
+; WIN32-NEXT: movl %edi, %edx
; WIN32-NEXT: addl $4, %esp
; WIN32-NEXT: popl %esi
; WIN32-NEXT: popl %edi
@@ -996,60 +992,60 @@ define zeroext i1 @smulobri64(i64 %v1, i64 %v2) {
; WIN32-NEXT: pushl %edi
; WIN32-NEXT: pushl %esi
; WIN32-NEXT: pushl %eax
-; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebp
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN32-NEXT: movl %ecx, %edi
; WIN32-NEXT: sarl $31, %edi
-; WIN32-NEXT: movl %eax, %esi
+; WIN32-NEXT: movl %ebp, %esi
; WIN32-NEXT: imull %edi, %esi
+; WIN32-NEXT: movl %ebp, %eax
; WIN32-NEXT: mull %edi
-; WIN32-NEXT: movl %eax, %ebp
-; WIN32-NEXT: addl %esi, %edx
-; WIN32-NEXT: movl %ebx, %esi
-; WIN32-NEXT: imull %ebx, %edi
+; WIN32-NEXT: movl %eax, %ebx
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; WIN32-NEXT: imull %eax, %edi
+; WIN32-NEXT: addl %esi, %edi
; WIN32-NEXT: addl %edx, %edi
+; WIN32-NEXT: movl %eax, %esi
; WIN32-NEXT: sarl $31, %esi
-; WIN32-NEXT: movl %esi, %ebx
-; WIN32-NEXT: imull %ecx, %ebx
+; WIN32-NEXT: movl %esi, %edx
+; WIN32-NEXT: imull %ecx, %edx
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN32-NEXT: movl %esi, %eax
-; WIN32-NEXT: mull %ecx
-; WIN32-NEXT: addl %ebx, %edx
; WIN32-NEXT: imull %ecx, %esi
; WIN32-NEXT: addl %edx, %esi
-; WIN32-NEXT: addl %ebp, %eax
+; WIN32-NEXT: mull %ecx
+; WIN32-NEXT: addl %edx, %esi
+; WIN32-NEXT: addl %ebx, %eax
; WIN32-NEXT: movl %eax, (%esp) # 4-byte Spill
; WIN32-NEXT: adcl %edi, %esi
; WIN32-NEXT: movl %ecx, %eax
-; WIN32-NEXT: movl %ecx, %edi
-; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; WIN32-NEXT: mull %ecx
-; WIN32-NEXT: movl %edx, %ebx
+; WIN32-NEXT: movl %ecx, %ebx
+; WIN32-NEXT: mull %ebp
+; WIN32-NEXT: movl %edx, %edi
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; WIN32-NEXT: mull %ecx
+; WIN32-NEXT: mull %ebp
; WIN32-NEXT: movl %edx, %ebp
; WIN32-NEXT: movl %eax, %ecx
-; WIN32-NEXT: addl %ebx, %ecx
+; WIN32-NEXT: addl %edi, %ecx
; WIN32-NEXT: adcl $0, %ebp
-; WIN32-NEXT: movl %edi, %eax
+; WIN32-NEXT: movl %ebx, %eax
; WIN32-NEXT: mull {{[0-9]+}}(%esp)
-; WIN32-NEXT: movl %edx, %edi
-; WIN32-NEXT: movl %eax, %ebx
-; WIN32-NEXT: addl %ecx, %ebx
-; WIN32-NEXT: adcl %ebp, %edi
+; WIN32-NEXT: movl %edx, %ebx
+; WIN32-NEXT: movl %eax, %edi
+; WIN32-NEXT: addl %ecx, %edi
+; WIN32-NEXT: adcl %ebp, %ebx
; WIN32-NEXT: setb %cl
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN32-NEXT: mull {{[0-9]+}}(%esp)
-; WIN32-NEXT: addl %edi, %eax
+; WIN32-NEXT: addl %ebx, %eax
; WIN32-NEXT: movzbl %cl, %ecx
; WIN32-NEXT: adcl %ecx, %edx
; WIN32-NEXT: addl (%esp), %eax # 4-byte Folded Reload
; WIN32-NEXT: adcl %esi, %edx
-; WIN32-NEXT: sarl $31, %ebx
-; WIN32-NEXT: xorl %ebx, %edx
-; WIN32-NEXT: xorl %eax, %ebx
-; WIN32-NEXT: orl %edx, %ebx
+; WIN32-NEXT: sarl $31, %edi
+; WIN32-NEXT: xorl %edi, %edx
+; WIN32-NEXT: xorl %eax, %edi
+; WIN32-NEXT: orl %edx, %edi
; WIN32-NEXT: jne LBB18_1
; WIN32-NEXT: # %bb.3: # %continue
; WIN32-NEXT: movb $1, %al
@@ -1712,20 +1708,20 @@ define zeroext i1 @smuloi64_load(ptr %ptr1, i64 %v2, ptr %res) {
; WIN32-NEXT: movl %ebx, %ecx
; WIN32-NEXT: imull %ebp, %ecx
; WIN32-NEXT: movl %ebx, %eax
+; WIN32-NEXT: imull %esi, %ebx
+; WIN32-NEXT: addl %ecx, %ebx
; WIN32-NEXT: mull %esi
; WIN32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; WIN32-NEXT: addl %ecx, %edx
-; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; WIN32-NEXT: imull %esi, %ebx
; WIN32-NEXT: addl %edx, %ebx
; WIN32-NEXT: movl %ebp, %ecx
; WIN32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; WIN32-NEXT: sarl $31, %ecx
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN32-NEXT: movl %eax, %edi
; WIN32-NEXT: imull %ecx, %edi
; WIN32-NEXT: mull %ecx
-; WIN32-NEXT: addl %edi, %edx
; WIN32-NEXT: imull {{[0-9]+}}(%esp), %ecx
+; WIN32-NEXT: addl %edi, %ecx
; WIN32-NEXT: addl %edx, %ecx
; WIN32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; WIN32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
@@ -1809,55 +1805,55 @@ define zeroext i1 @smuloi64_load2(i64 %v1, ptr %ptr2, ptr %res) {
; WIN32-NEXT: pushl %ebx
; WIN32-NEXT: pushl %edi
; WIN32-NEXT: pushl %esi
-; WIN32-NEXT: subl $12, %esp
+; WIN32-NEXT: subl $16, %esp
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; WIN32-NEXT: movl (%eax), %ebp
-; WIN32-NEXT: movl 4(%eax), %ebx
+; WIN32-NEXT: movl (%eax), %ebx
+; WIN32-NEXT: movl 4(%eax), %ebp
; WIN32-NEXT: movl %ecx, %edi
; WIN32-NEXT: sarl $31, %edi
-; WIN32-NEXT: movl %ebp, %esi
+; WIN32-NEXT: movl %ebx, %esi
; WIN32-NEXT: imull %edi, %esi
-; WIN32-NEXT: movl %ebp, %eax
+; WIN32-NEXT: movl %ebx, %eax
; WIN32-NEXT: mull %edi
; WIN32-NEXT: movl %eax, (%esp) # 4-byte Spill
-; WIN32-NEXT: addl %esi, %edx
-; WIN32-NEXT: movl %ebx, %esi
-; WIN32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; WIN32-NEXT: imull %ebx, %edi
+; WIN32-NEXT: imull %ebp, %edi
+; WIN32-NEXT: addl %esi, %edi
; WIN32-NEXT: addl %edx, %edi
+; WIN32-NEXT: movl %ebp, %esi
+; WIN32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; WIN32-NEXT: sarl $31, %esi
-; WIN32-NEXT: movl %esi, %ebx
-; WIN32-NEXT: imull %ecx, %ebx
+; WIN32-NEXT: movl %esi, %edx
+; WIN32-NEXT: imull %ecx, %edx
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN32-NEXT: movl %esi, %eax
-; WIN32-NEXT: mull %ecx
-; WIN32-NEXT: addl %ebx, %edx
; WIN32-NEXT: imull %ecx, %esi
; WIN32-NEXT: addl %edx, %esi
+; WIN32-NEXT: mull %ecx
+; WIN32-NEXT: addl %edx, %esi
; WIN32-NEXT: addl (%esp), %eax # 4-byte Folded Reload
; WIN32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; WIN32-NEXT: adcl %edi, %esi
; WIN32-NEXT: movl %ecx, %eax
-; WIN32-NEXT: mull %ebp
-; WIN32-NEXT: movl %edx, %edi
+; WIN32-NEXT: mull %ebx
+; WIN32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; WIN32-NEXT: movl %eax, (%esp) # 4-byte Spill
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; WIN32-NEXT: mull %ebp
-; WIN32-NEXT: movl %edx, %ebx
+; WIN32-NEXT: mull %ebx
+; WIN32-NEXT: movl %edx, %edi
; WIN32-NEXT: movl %eax, %ecx
-; WIN32-NEXT: addl %edi, %ecx
-; WIN32-NEXT: adcl $0, %ebx
+; WIN32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; WIN32-NEXT: adcl $0, %edi
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; WIN32-NEXT: mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
-; WIN32-NEXT: movl %edx, %edi
+; WIN32-NEXT: mull %ebp
+; WIN32-NEXT: movl %edx, %ebx
; WIN32-NEXT: movl %eax, %ebp
; WIN32-NEXT: addl %ecx, %ebp
-; WIN32-NEXT: adcl %ebx, %edi
+; WIN32-NEXT: adcl %edi, %ebx
; WIN32-NEXT: setb %cl
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN32-NEXT: mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
-; WIN32-NEXT: addl %edi, %eax
+; WIN32-NEXT: addl %ebx, %eax
; WIN32-NEXT: movzbl %cl, %ecx
; WIN32-NEXT: adcl %ecx, %edx
; WIN32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
@@ -1872,7 +1868,7 @@ define zeroext i1 @smuloi64_load2(i64 %v1, ptr %ptr2, ptr %res) {
; WIN32-NEXT: movl (%esp), %ecx # 4-byte Reload
; WIN32-NEXT: movl %ecx, (%eax)
; WIN32-NEXT: setne %al
-; WIN32-NEXT: addl $12, %esp
+; WIN32-NEXT: addl $16, %esp
; WIN32-NEXT: popl %esi
; WIN32-NEXT: popl %edi
; WIN32-NEXT: popl %ebx
More information about the llvm-commits
mailing list