[llvm-branch-commits] [llvm-branch] r332933 - Merge r329657.
Chandler Carruth via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Mon May 21 19:46:37 PDT 2018
Author: chandlerc
Date: Mon May 21 19:46:36 2018
New Revision: 332933
URL: http://llvm.org/viewvc/llvm-project?rev=332933&view=rev
Log:
Merge r329657.
This is the main patch that introduced the new EFLAGS lowering infrastructure.
All the source merges were clean, but the tests required help to merge.
Specifically, I had to regenerate the CHECK lines in the tests (using the trunk
update_llc_test_checks.py script) because trunk has copy propagation that the
branch doesn't. I also had to update the MIR test to use the old MIR syntax for
physical registers (%name instead of $name).
Added:
llvm/branches/release_60/lib/Target/X86/X86FlagsCopyLowering.cpp
- copied unchanged from r329657, llvm/trunk/lib/Target/X86/X86FlagsCopyLowering.cpp
llvm/branches/release_60/test/CodeGen/X86/flags-copy-lowering.mir
- copied, changed from r329657, llvm/trunk/test/CodeGen/X86/flags-copy-lowering.mir
Removed:
llvm/branches/release_60/test/CodeGen/X86/clobber-fi0.ll
llvm/branches/release_60/test/CodeGen/X86/eflags-copy-expansion.mir
Modified:
llvm/branches/release_60/ (props changed)
llvm/branches/release_60/include/llvm/CodeGen/MachineBasicBlock.h
llvm/branches/release_60/lib/CodeGen/MachineBasicBlock.cpp
llvm/branches/release_60/lib/Target/X86/CMakeLists.txt
llvm/branches/release_60/lib/Target/X86/X86.h
llvm/branches/release_60/lib/Target/X86/X86ISelLowering.cpp
llvm/branches/release_60/lib/Target/X86/X86ISelLowering.h
llvm/branches/release_60/lib/Target/X86/X86InstrInfo.cpp
llvm/branches/release_60/lib/Target/X86/X86TargetMachine.cpp
llvm/branches/release_60/test/CodeGen/X86/GlobalISel/add-scalar.ll
llvm/branches/release_60/test/CodeGen/X86/O0-pipeline.ll
llvm/branches/release_60/test/CodeGen/X86/cmpxchg-clobber-flags.ll
llvm/branches/release_60/test/CodeGen/X86/copy-eflags.ll
llvm/branches/release_60/test/CodeGen/X86/mul-i1024.ll
llvm/branches/release_60/test/CodeGen/X86/peephole-na-phys-copy-folding.ll
llvm/branches/release_60/test/CodeGen/X86/win64_frame.ll
llvm/branches/release_60/test/CodeGen/X86/x86-repmov-copy-eflags.ll
Propchange: llvm/branches/release_60/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Mon May 21 19:46:36 2018
@@ -1,3 +1,3 @@
/llvm/branches/Apple/Pertwee:110850,110961
/llvm/branches/type-system-rewrite:133420-134817
-/llvm/trunk:155241,321751,321789,321791,321806,321862,321870,321872,321878,321911,321980,321991,321993-321994,322003,322016,322053,322056,322103,322106,322108,322123,322131,322223,322272,322313,322372,322473,322521,322623,322644,322724,322767,322875,322878-322879,322900,322904-322905,322973,322993,323034,323155,323190,323307,323331,323355,323369,323371,323384,323469,323515,323536,323582,323643,323671-323672,323706,323710,323759,323781,323810-323811,323813,323857,323907-323909,323913,323915,324002,324039,324110,324195,324353,324422,324449,324497,324576,324645,324746,324772,324916,324962,325049,325085,325139,325148,325168,325463,325525,325550,325654,325687,325739,325894,325946,326393,328945,329040,329055-329057
+/llvm/trunk:155241,321751,321789,321791,321806,321862,321870,321872,321878,321911,321980,321991,321993-321994,322003,322016,322053,322056,322103,322106,322108,322123,322131,322223,322272,322313,322372,322473,322521,322623,322644,322724,322767,322875,322878-322879,322900,322904-322905,322973,322993,323034,323155,323190,323307,323331,323355,323369,323371,323384,323469,323515,323536,323582,323643,323671-323672,323706,323710,323759,323781,323810-323811,323813,323857,323907-323909,323913,323915,324002,324039,324110,324195,324353,324422,324449,324497,324576,324645,324746,324772,324916,324962,325049,325085,325139,325148,325168,325463,325525,325550,325654,325687,325739,325894,325946,326393,328945,329040,329055-329057,329657
Modified: llvm/branches/release_60/include/llvm/CodeGen/MachineBasicBlock.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_60/include/llvm/CodeGen/MachineBasicBlock.h?rev=332933&r1=332932&r2=332933&view=diff
==============================================================================
--- llvm/branches/release_60/include/llvm/CodeGen/MachineBasicBlock.h (original)
+++ llvm/branches/release_60/include/llvm/CodeGen/MachineBasicBlock.h Mon May 21 19:46:36 2018
@@ -449,6 +449,13 @@ public:
/// Replace successor OLD with NEW and update probability info.
void replaceSuccessor(MachineBasicBlock *Old, MachineBasicBlock *New);
+ /// Copy a successor (and any probability info) from original block to this
+ /// block's. Uses an iterator into the original blocks successors.
+ ///
+ /// This is useful when doing a partial clone of successors. Afterward, the
+ /// probabilities may need to be normalized.
+ void copySuccessor(MachineBasicBlock *Orig, succ_iterator I);
+
/// Transfers all the successors from MBB to this machine basic block (i.e.,
/// copies all the successors FromMBB and remove all the successors from
/// FromMBB).
Modified: llvm/branches/release_60/lib/CodeGen/MachineBasicBlock.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_60/lib/CodeGen/MachineBasicBlock.cpp?rev=332933&r1=332932&r2=332933&view=diff
==============================================================================
--- llvm/branches/release_60/lib/CodeGen/MachineBasicBlock.cpp (original)
+++ llvm/branches/release_60/lib/CodeGen/MachineBasicBlock.cpp Mon May 21 19:46:36 2018
@@ -646,6 +646,14 @@ void MachineBasicBlock::replaceSuccessor
removeSuccessor(OldI);
}
+void MachineBasicBlock::copySuccessor(MachineBasicBlock *Orig,
+ succ_iterator I) {
+ if (Orig->Probs.empty())
+ addSuccessor(*I, Orig->getSuccProbability(I));
+ else
+ addSuccessorWithoutProb(*I);
+}
+
void MachineBasicBlock::addPredecessor(MachineBasicBlock *Pred) {
Predecessors.push_back(Pred);
}
Modified: llvm/branches/release_60/lib/Target/X86/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_60/lib/Target/X86/CMakeLists.txt?rev=332933&r1=332932&r2=332933&view=diff
==============================================================================
--- llvm/branches/release_60/lib/Target/X86/CMakeLists.txt (original)
+++ llvm/branches/release_60/lib/Target/X86/CMakeLists.txt Mon May 21 19:46:36 2018
@@ -31,6 +31,7 @@ set(sources
X86FixupBWInsts.cpp
X86FixupLEAs.cpp
X86FixupSetCC.cpp
+ X86FlagsCopyLowering.cpp
X86FloatingPoint.cpp
X86FrameLowering.cpp
X86InstructionSelector.cpp
Modified: llvm/branches/release_60/lib/Target/X86/X86.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_60/lib/Target/X86/X86.h?rev=332933&r1=332932&r2=332933&view=diff
==============================================================================
--- llvm/branches/release_60/lib/Target/X86/X86.h (original)
+++ llvm/branches/release_60/lib/Target/X86/X86.h Mon May 21 19:46:36 2018
@@ -66,6 +66,9 @@ FunctionPass *createX86OptimizeLEAs();
/// Return a pass that transforms setcc + movzx pairs into xor + setcc.
FunctionPass *createX86FixupSetCC();
+/// Return a pass that lowers EFLAGS copy pseudo instructions.
+FunctionPass *createX86FlagsCopyLoweringPass();
+
/// Return a pass that expands WinAlloca pseudo-instructions.
FunctionPass *createX86WinAllocaExpander();
Modified: llvm/branches/release_60/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_60/lib/Target/X86/X86ISelLowering.cpp?rev=332933&r1=332932&r2=332933&view=diff
==============================================================================
--- llvm/branches/release_60/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/branches/release_60/lib/Target/X86/X86ISelLowering.cpp Mon May 21 19:46:36 2018
@@ -37829,25 +37829,6 @@ bool X86TargetLowering::isTypeDesirableF
}
}
-/// This function checks if any of the users of EFLAGS copies the EFLAGS. We
-/// know that the code that lowers COPY of EFLAGS has to use the stack, and if
-/// we don't adjust the stack we clobber the first frame index.
-/// See X86InstrInfo::copyPhysReg.
-static bool hasCopyImplyingStackAdjustment(const MachineFunction &MF) {
- const MachineRegisterInfo &MRI = MF.getRegInfo();
- return any_of(MRI.reg_instructions(X86::EFLAGS),
- [](const MachineInstr &RI) { return RI.isCopy(); });
-}
-
-void X86TargetLowering::finalizeLowering(MachineFunction &MF) const {
- if (hasCopyImplyingStackAdjustment(MF)) {
- MachineFrameInfo &MFI = MF.getFrameInfo();
- MFI.setHasCopyImplyingStackAdjustment(true);
- }
-
- TargetLoweringBase::finalizeLowering(MF);
-}
-
/// This method query the target whether it is beneficial for dag combiner to
/// promote the specified node. If true, it should return the desired promotion
/// type by reference.
Modified: llvm/branches/release_60/lib/Target/X86/X86ISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_60/lib/Target/X86/X86ISelLowering.h?rev=332933&r1=332932&r2=332933&view=diff
==============================================================================
--- llvm/branches/release_60/lib/Target/X86/X86ISelLowering.h (original)
+++ llvm/branches/release_60/lib/Target/X86/X86ISelLowering.h Mon May 21 19:46:36 2018
@@ -1099,9 +1099,6 @@ namespace llvm {
bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
unsigned Factor) const override;
-
- void finalizeLowering(MachineFunction &MF) const override;
-
protected:
std::pair<const TargetRegisterClass *, uint8_t>
findRepresentativeClass(const TargetRegisterInfo *TRI,
Modified: llvm/branches/release_60/lib/Target/X86/X86InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_60/lib/Target/X86/X86InstrInfo.cpp?rev=332933&r1=332932&r2=332933&view=diff
==============================================================================
--- llvm/branches/release_60/lib/Target/X86/X86InstrInfo.cpp (original)
+++ llvm/branches/release_60/lib/Target/X86/X86InstrInfo.cpp Mon May 21 19:46:36 2018
@@ -6710,102 +6710,12 @@ void X86InstrInfo::copyPhysReg(MachineBa
return;
}
- bool FromEFLAGS = SrcReg == X86::EFLAGS;
- bool ToEFLAGS = DestReg == X86::EFLAGS;
- int Reg = FromEFLAGS ? DestReg : SrcReg;
- bool is32 = X86::GR32RegClass.contains(Reg);
- bool is64 = X86::GR64RegClass.contains(Reg);
-
- if ((FromEFLAGS || ToEFLAGS) && (is32 || is64)) {
- int Mov = is64 ? X86::MOV64rr : X86::MOV32rr;
- int Push = is64 ? X86::PUSH64r : X86::PUSH32r;
- int PushF = is64 ? X86::PUSHF64 : X86::PUSHF32;
- int Pop = is64 ? X86::POP64r : X86::POP32r;
- int PopF = is64 ? X86::POPF64 : X86::POPF32;
- int AX = is64 ? X86::RAX : X86::EAX;
-
- if (!Subtarget.hasLAHFSAHF()) {
- assert(Subtarget.is64Bit() &&
- "Not having LAHF/SAHF only happens on 64-bit.");
- // Moving EFLAGS to / from another register requires a push and a pop.
- // Notice that we have to adjust the stack if we don't want to clobber the
- // first frame index. See X86FrameLowering.cpp - usesTheStack.
- if (FromEFLAGS) {
- BuildMI(MBB, MI, DL, get(PushF));
- BuildMI(MBB, MI, DL, get(Pop), DestReg);
- }
- if (ToEFLAGS) {
- BuildMI(MBB, MI, DL, get(Push))
- .addReg(SrcReg, getKillRegState(KillSrc));
- BuildMI(MBB, MI, DL, get(PopF));
- }
- return;
- }
-
- // The flags need to be saved, but saving EFLAGS with PUSHF/POPF is
- // inefficient. Instead:
- // - Save the overflow flag OF into AL using SETO, and restore it using a
- // signed 8-bit addition of AL and INT8_MAX.
- // - Save/restore the bottom 8 EFLAGS bits (CF, PF, AF, ZF, SF) to/from AH
- // using LAHF/SAHF.
- // - When RAX/EAX is live and isn't the destination register, make sure it
- // isn't clobbered by PUSH/POP'ing it before and after saving/restoring
- // the flags.
- // This approach is ~2.25x faster than using PUSHF/POPF.
- //
- // This is still somewhat inefficient because we don't know which flags are
- // actually live inside EFLAGS. Were we able to do a single SETcc instead of
- // SETO+LAHF / ADDB+SAHF the code could be 1.02x faster.
- //
- // PUSHF/POPF is also potentially incorrect because it affects other flags
- // such as TF/IF/DF, which LLVM doesn't model.
- //
- // Notice that we have to adjust the stack if we don't want to clobber the
- // first frame index.
- // See X86ISelLowering.cpp - X86::hasCopyImplyingStackAdjustment.
-
- const TargetRegisterInfo &TRI = getRegisterInfo();
- MachineBasicBlock::LivenessQueryResult LQR =
- MBB.computeRegisterLiveness(&TRI, AX, MI);
- // We do not want to save and restore AX if we do not have to.
- // Moreover, if we do so whereas AX is dead, we would need to set
- // an undef flag on the use of AX, otherwise the verifier will
- // complain that we read an undef value.
- // We do not want to change the behavior of the machine verifier
- // as this is usually wrong to read an undef value.
- if (MachineBasicBlock::LQR_Unknown == LQR) {
- LivePhysRegs LPR(TRI);
- LPR.addLiveOuts(MBB);
- MachineBasicBlock::iterator I = MBB.end();
- while (I != MI) {
- --I;
- LPR.stepBackward(*I);
- }
- // AX contains the top most register in the aliasing hierarchy.
- // It may not be live, but one of its aliases may be.
- for (MCRegAliasIterator AI(AX, &TRI, true);
- AI.isValid() && LQR != MachineBasicBlock::LQR_Live; ++AI)
- LQR = LPR.contains(*AI) ? MachineBasicBlock::LQR_Live
- : MachineBasicBlock::LQR_Dead;
- }
- bool AXDead = (Reg == AX) || (MachineBasicBlock::LQR_Dead == LQR);
- if (!AXDead)
- BuildMI(MBB, MI, DL, get(Push)).addReg(AX, getKillRegState(true));
- if (FromEFLAGS) {
- BuildMI(MBB, MI, DL, get(X86::SETOr), X86::AL);
- BuildMI(MBB, MI, DL, get(X86::LAHF));
- BuildMI(MBB, MI, DL, get(Mov), Reg).addReg(AX);
- }
- if (ToEFLAGS) {
- BuildMI(MBB, MI, DL, get(Mov), AX).addReg(Reg, getKillRegState(KillSrc));
- BuildMI(MBB, MI, DL, get(X86::ADD8ri), X86::AL)
- .addReg(X86::AL)
- .addImm(INT8_MAX);
- BuildMI(MBB, MI, DL, get(X86::SAHF));
- }
- if (!AXDead)
- BuildMI(MBB, MI, DL, get(Pop), AX);
- return;
+ if (SrcReg == X86::EFLAGS || DestReg == X86::EFLAGS) {
+ // FIXME: We use a fatal error here because historically LLVM has tried
+ // lower some of these physreg copies and we want to ensure we get
+ // reasonable bug reports if someone encounters a case no other testing
+ // found. This path should be removed after the LLVM 7 release.
+ report_fatal_error("Unable to copy EFLAGS physical register!");
}
DEBUG(dbgs() << "Cannot copy " << RI.getName(SrcReg)
Modified: llvm/branches/release_60/lib/Target/X86/X86TargetMachine.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_60/lib/Target/X86/X86TargetMachine.cpp?rev=332933&r1=332932&r2=332933&view=diff
==============================================================================
--- llvm/branches/release_60/lib/Target/X86/X86TargetMachine.cpp (original)
+++ llvm/branches/release_60/lib/Target/X86/X86TargetMachine.cpp Mon May 21 19:46:36 2018
@@ -62,6 +62,7 @@ void initializeX86CallFrameOptimizationP
void initializeX86CmovConverterPassPass(PassRegistry &);
void initializeX86ExecutionDepsFixPass(PassRegistry &);
void initializeX86DomainReassignmentPass(PassRegistry &);
+void initializeX86FlagsCopyLoweringPassPass(PassRegistry &);
} // end namespace llvm
@@ -80,6 +81,7 @@ extern "C" void LLVMInitializeX86Target(
initializeX86CmovConverterPassPass(PR);
initializeX86ExecutionDepsFixPass(PR);
initializeX86DomainReassignmentPass(PR);
+ initializeX86FlagsCopyLoweringPassPass(PR);
}
static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
@@ -415,6 +417,7 @@ void X86PassConfig::addPreRegAlloc() {
addPass(createX86CallFrameOptimization());
}
+ addPass(createX86FlagsCopyLoweringPass());
addPass(createX86WinAllocaExpander());
}
void X86PassConfig::addMachineSSAOptimization() {
Modified: llvm/branches/release_60/test/CodeGen/X86/GlobalISel/add-scalar.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_60/test/CodeGen/X86/GlobalISel/add-scalar.ll?rev=332933&r1=332932&r2=332933&view=diff
==============================================================================
--- llvm/branches/release_60/test/CodeGen/X86/GlobalISel/add-scalar.ll (original)
+++ llvm/branches/release_60/test/CodeGen/X86/GlobalISel/add-scalar.ll Mon May 21 19:46:36 2018
@@ -10,16 +10,10 @@ define i64 @test_add_i64(i64 %arg1, i64
;
; X32-LABEL: test_add_i64:
; X32: # %bb.0:
-; X32-NEXT: pushl %ebp
-; X32-NEXT: .cfi_def_cfa_offset 8
-; X32-NEXT: .cfi_offset %ebp, -8
-; X32-NEXT: movl %esp, %ebp
-; X32-NEXT: .cfi_def_cfa_register %ebp
-; X32-NEXT: movl 16(%ebp), %eax
-; X32-NEXT: movl 20(%ebp), %edx
-; X32-NEXT: addl 8(%ebp), %eax
-; X32-NEXT: adcl 12(%ebp), %edx
-; X32-NEXT: popl %ebp
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X32-NEXT: addl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: adcl {{[0-9]+}}(%esp), %edx
; X32-NEXT: retl
%ret = add i64 %arg1, %arg2
ret i64 %ret
Modified: llvm/branches/release_60/test/CodeGen/X86/O0-pipeline.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_60/test/CodeGen/X86/O0-pipeline.ll?rev=332933&r1=332932&r2=332933&view=diff
==============================================================================
--- llvm/branches/release_60/test/CodeGen/X86/O0-pipeline.ll (original)
+++ llvm/branches/release_60/test/CodeGen/X86/O0-pipeline.ll Mon May 21 19:46:36 2018
@@ -37,6 +37,7 @@
; CHECK-NEXT: X86 PIC Global Base Reg Initialization
; CHECK-NEXT: Expand ISel Pseudo-instructions
; CHECK-NEXT: Local Stack Slot Allocation
+; CHECK-NEXT: X86 EFLAGS copy lowering
; CHECK-NEXT: X86 WinAlloca Expander
; CHECK-NEXT: Eliminate PHI nodes for register allocation
; CHECK-NEXT: Two-Address instruction pass
Removed: llvm/branches/release_60/test/CodeGen/X86/clobber-fi0.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_60/test/CodeGen/X86/clobber-fi0.ll?rev=332932&view=auto
==============================================================================
--- llvm/branches/release_60/test/CodeGen/X86/clobber-fi0.ll (original)
+++ llvm/branches/release_60/test/CodeGen/X86/clobber-fi0.ll (removed)
@@ -1,37 +0,0 @@
-; RUN: llc < %s -verify-machineinstrs -mcpu=generic -mtriple=x86_64-linux | FileCheck %s
-
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-target triple = "x86_64-apple-macosx10.7.0"
-
-; In the code below we need to copy the EFLAGS because of scheduling constraints.
-; When copying the EFLAGS we need to write to the stack with push/pop. This forces
-; us to emit the prolog.
-
-; CHECK: main
-; CHECK: subq{{.*}}rsp
-; CHECK: ret
-define i32 @main(i32 %arg, i8** %arg1) nounwind {
-bb:
- %tmp = alloca i32, align 4 ; [#uses=3 type=i32*]
- %tmp2 = alloca i32, align 4 ; [#uses=3 type=i32*]
- %tmp3 = alloca i32 ; [#uses=1 type=i32*]
- store volatile i32 1, i32* %tmp, align 4
- store volatile i32 1, i32* %tmp2, align 4
- br label %bb4
-
-bb4: ; preds = %bb4, %bb
- %tmp6 = load volatile i32, i32* %tmp2, align 4 ; [#uses=1 type=i32]
- %tmp7 = add i32 %tmp6, -1 ; [#uses=2 type=i32]
- store volatile i32 %tmp7, i32* %tmp2, align 4
- %tmp8 = icmp eq i32 %tmp7, 0 ; [#uses=1 type=i1]
- %tmp9 = load volatile i32, i32* %tmp ; [#uses=1 type=i32]
- %tmp10 = add i32 %tmp9, -1 ; [#uses=1 type=i32]
- store volatile i32 %tmp10, i32* %tmp3
- br i1 %tmp8, label %bb11, label %bb4
-
-bb11: ; preds = %bb4
- %tmp12 = load volatile i32, i32* %tmp, align 4 ; [#uses=1 type=i32]
- ret i32 %tmp12
-}
-
-
Modified: llvm/branches/release_60/test/CodeGen/X86/cmpxchg-clobber-flags.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_60/test/CodeGen/X86/cmpxchg-clobber-flags.ll?rev=332933&r1=332932&r2=332933&view=diff
==============================================================================
--- llvm/branches/release_60/test/CodeGen/X86/cmpxchg-clobber-flags.ll (original)
+++ llvm/branches/release_60/test/CodeGen/X86/cmpxchg-clobber-flags.ll Mon May 21 19:46:36 2018
@@ -1,15 +1,12 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=i386-linux-gnu %s -o - | FileCheck %s --check-prefixes=32-ALL,32-GOOD-RA
-; RUN: llc -mtriple=i386-linux-gnu -pre-RA-sched=fast %s -o - | FileCheck %s --check-prefixes=32-ALL,32-FAST-RA
+; RUN: llc -mtriple=i386-linux-gnu -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=32-ALL,32-GOOD-RA
+; RUN: llc -mtriple=i386-linux-gnu -verify-machineinstrs -pre-RA-sched=fast %s -o - | FileCheck %s --check-prefixes=32-ALL,32-FAST-RA
-; RUN: llc -mtriple=x86_64-linux-gnu %s -o - | FileCheck %s --check-prefixes=64-ALL,64-GOOD-RA
-; RUN: llc -mtriple=x86_64-linux-gnu -pre-RA-sched=fast %s -o - | FileCheck %s --check-prefixes=64-ALL,64-FAST-RA
-; RUN: llc -mtriple=x86_64-linux-gnu -mattr=+sahf %s -o - | FileCheck %s --check-prefixes=64-ALL,64-GOOD-RA-SAHF
-; RUN: llc -mtriple=x86_64-linux-gnu -mattr=+sahf -pre-RA-sched=fast %s -o - | FileCheck %s --check-prefixes=64-ALL,64-FAST-RA-SAHF
-; RUN: llc -mtriple=x86_64-linux-gnu -mcpu=corei7 %s -o - | FileCheck %s --check-prefixes=64-ALL,64-GOOD-RA-SAHF
-
-; TODO: Reenable verify-machineinstr once the if (!AXDead) // FIXME
-; in X86InstrInfo::copyPhysReg() is resolved.
+; RUN: llc -mtriple=x86_64-linux-gnu -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=64-ALL,64-GOOD-RA
+; RUN: llc -mtriple=x86_64-linux-gnu -verify-machineinstrs -pre-RA-sched=fast %s -o - | FileCheck %s --check-prefixes=64-ALL,64-FAST-RA
+; RUN: llc -mtriple=x86_64-linux-gnu -verify-machineinstrs -mattr=+sahf %s -o - | FileCheck %s --check-prefixes=64-ALL,64-GOOD-RA-SAHF
+; RUN: llc -mtriple=x86_64-linux-gnu -verify-machineinstrs -mattr=+sahf -pre-RA-sched=fast %s -o - | FileCheck %s --check-prefixes=64-ALL,64-FAST-RA-SAHF
+; RUN: llc -mtriple=x86_64-linux-gnu -verify-machineinstrs -mcpu=corei7 %s -o - | FileCheck %s --check-prefixes=64-ALL,64-GOOD-RA-SAHF
declare i32 @foo()
declare i32 @bar(i64)
@@ -22,37 +19,29 @@ declare i32 @bar(i64)
; ...
; use of eax
; During PEI the adjcallstackdown32 is replaced with the subl which
-; clobbers eflags, effectively interfering in the liveness interval.
-; Is this a case we care about? Maybe no, considering this issue
-; happens with the fast pre-regalloc scheduler enforced. A more
-; performant scheduler would move the adjcallstackdown32 out of the
-; eflags liveness interval.
+; clobbers eflags, effectively interfering in the liveness interval. However,
+; we then promote these copies into independent conditions in GPRs that avoids
+; repeated saving and restoring logic and can be trivially managed by the
+; register allocator.
define i64 @test_intervening_call(i64* %foo, i64 %bar, i64 %baz) nounwind {
; 32-GOOD-RA-LABEL: test_intervening_call:
; 32-GOOD-RA: # %bb.0: # %entry
-; 32-GOOD-RA-NEXT: pushl %ebp
-; 32-GOOD-RA-NEXT: movl %esp, %ebp
; 32-GOOD-RA-NEXT: pushl %ebx
; 32-GOOD-RA-NEXT: pushl %esi
-; 32-GOOD-RA-NEXT: movl 12(%ebp), %eax
-; 32-GOOD-RA-NEXT: movl 16(%ebp), %edx
-; 32-GOOD-RA-NEXT: movl 20(%ebp), %ebx
-; 32-GOOD-RA-NEXT: movl 24(%ebp), %ecx
-; 32-GOOD-RA-NEXT: movl 8(%ebp), %esi
-; 32-GOOD-RA-NEXT: lock cmpxchg8b (%esi)
; 32-GOOD-RA-NEXT: pushl %eax
-; 32-GOOD-RA-NEXT: seto %al
-; 32-GOOD-RA-NEXT: lahf
-; 32-GOOD-RA-NEXT: movl %eax, %esi
-; 32-GOOD-RA-NEXT: popl %eax
+; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %eax
+; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %edx
+; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %esi
+; 32-GOOD-RA-NEXT: lock cmpxchg8b (%esi)
+; 32-GOOD-RA-NEXT: setne %bl
; 32-GOOD-RA-NEXT: subl $8, %esp
; 32-GOOD-RA-NEXT: pushl %edx
; 32-GOOD-RA-NEXT: pushl %eax
; 32-GOOD-RA-NEXT: calll bar
; 32-GOOD-RA-NEXT: addl $16, %esp
-; 32-GOOD-RA-NEXT: movl %esi, %eax
-; 32-GOOD-RA-NEXT: addb $127, %al
-; 32-GOOD-RA-NEXT: sahf
+; 32-GOOD-RA-NEXT: testb $-1, %bl
; 32-GOOD-RA-NEXT: jne .LBB0_3
; 32-GOOD-RA-NEXT: # %bb.1: # %t
; 32-GOOD-RA-NEXT: movl $42, %eax
@@ -61,46 +50,29 @@ define i64 @test_intervening_call(i64* %
; 32-GOOD-RA-NEXT: xorl %eax, %eax
; 32-GOOD-RA-NEXT: .LBB0_2: # %t
; 32-GOOD-RA-NEXT: xorl %edx, %edx
+; 32-GOOD-RA-NEXT: addl $4, %esp
; 32-GOOD-RA-NEXT: popl %esi
; 32-GOOD-RA-NEXT: popl %ebx
-; 32-GOOD-RA-NEXT: popl %ebp
; 32-GOOD-RA-NEXT: retl
;
; 32-FAST-RA-LABEL: test_intervening_call:
; 32-FAST-RA: # %bb.0: # %entry
-; 32-FAST-RA-NEXT: pushl %ebp
-; 32-FAST-RA-NEXT: movl %esp, %ebp
; 32-FAST-RA-NEXT: pushl %ebx
; 32-FAST-RA-NEXT: pushl %esi
-; 32-FAST-RA-NEXT: movl 8(%ebp), %esi
-; 32-FAST-RA-NEXT: movl 20(%ebp), %ebx
-; 32-FAST-RA-NEXT: movl 24(%ebp), %ecx
-; 32-FAST-RA-NEXT: movl 12(%ebp), %eax
-; 32-FAST-RA-NEXT: movl 16(%ebp), %edx
-; 32-FAST-RA-NEXT: lock cmpxchg8b (%esi)
; 32-FAST-RA-NEXT: pushl %eax
-; 32-FAST-RA-NEXT: seto %al
-; 32-FAST-RA-NEXT: lahf
-; 32-FAST-RA-NEXT: movl %eax, %ecx
-; 32-FAST-RA-NEXT: popl %eax
+; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %esi
+; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %eax
+; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %edx
+; 32-FAST-RA-NEXT: lock cmpxchg8b (%esi)
+; 32-FAST-RA-NEXT: setne %bl
; 32-FAST-RA-NEXT: subl $8, %esp
-; 32-FAST-RA-NEXT: pushl %eax
-; 32-FAST-RA-NEXT: movl %ecx, %eax
-; 32-FAST-RA-NEXT: addb $127, %al
-; 32-FAST-RA-NEXT: sahf
-; 32-FAST-RA-NEXT: popl %eax
-; 32-FAST-RA-NEXT: pushl %eax
-; 32-FAST-RA-NEXT: seto %al
-; 32-FAST-RA-NEXT: lahf
-; 32-FAST-RA-NEXT: movl %eax, %esi
-; 32-FAST-RA-NEXT: popl %eax
; 32-FAST-RA-NEXT: pushl %edx
; 32-FAST-RA-NEXT: pushl %eax
; 32-FAST-RA-NEXT: calll bar
; 32-FAST-RA-NEXT: addl $16, %esp
-; 32-FAST-RA-NEXT: movl %esi, %eax
-; 32-FAST-RA-NEXT: addb $127, %al
-; 32-FAST-RA-NEXT: sahf
+; 32-FAST-RA-NEXT: testb $-1, %bl
; 32-FAST-RA-NEXT: jne .LBB0_3
; 32-FAST-RA-NEXT: # %bb.1: # %t
; 32-FAST-RA-NEXT: movl $42, %eax
@@ -109,122 +81,29 @@ define i64 @test_intervening_call(i64* %
; 32-FAST-RA-NEXT: xorl %eax, %eax
; 32-FAST-RA-NEXT: .LBB0_2: # %t
; 32-FAST-RA-NEXT: xorl %edx, %edx
+; 32-FAST-RA-NEXT: addl $4, %esp
; 32-FAST-RA-NEXT: popl %esi
; 32-FAST-RA-NEXT: popl %ebx
-; 32-FAST-RA-NEXT: popl %ebp
; 32-FAST-RA-NEXT: retl
;
-; 64-GOOD-RA-LABEL: test_intervening_call:
-; 64-GOOD-RA: # %bb.0: # %entry
-; 64-GOOD-RA-NEXT: pushq %rbp
-; 64-GOOD-RA-NEXT: movq %rsp, %rbp
-; 64-GOOD-RA-NEXT: pushq %rbx
-; 64-GOOD-RA-NEXT: pushq %rax
-; 64-GOOD-RA-NEXT: movq %rsi, %rax
-; 64-GOOD-RA-NEXT: lock cmpxchgq %rdx, (%rdi)
-; 64-GOOD-RA-NEXT: pushfq
-; 64-GOOD-RA-NEXT: popq %rbx
-; 64-GOOD-RA-NEXT: movq %rax, %rdi
-; 64-GOOD-RA-NEXT: callq bar
-; 64-GOOD-RA-NEXT: pushq %rbx
-; 64-GOOD-RA-NEXT: popfq
-; 64-GOOD-RA-NEXT: jne .LBB0_3
-; 64-GOOD-RA-NEXT: # %bb.1: # %t
-; 64-GOOD-RA-NEXT: movl $42, %eax
-; 64-GOOD-RA-NEXT: jmp .LBB0_2
-; 64-GOOD-RA-NEXT: .LBB0_3: # %f
-; 64-GOOD-RA-NEXT: xorl %eax, %eax
-; 64-GOOD-RA-NEXT: .LBB0_2: # %t
-; 64-GOOD-RA-NEXT: addq $8, %rsp
-; 64-GOOD-RA-NEXT: popq %rbx
-; 64-GOOD-RA-NEXT: popq %rbp
-; 64-GOOD-RA-NEXT: retq
-;
-; 64-FAST-RA-LABEL: test_intervening_call:
-; 64-FAST-RA: # %bb.0: # %entry
-; 64-FAST-RA-NEXT: pushq %rbp
-; 64-FAST-RA-NEXT: movq %rsp, %rbp
-; 64-FAST-RA-NEXT: pushq %rbx
-; 64-FAST-RA-NEXT: pushq %rax
-; 64-FAST-RA-NEXT: movq %rsi, %rax
-; 64-FAST-RA-NEXT: lock cmpxchgq %rdx, (%rdi)
-; 64-FAST-RA-NEXT: pushfq
-; 64-FAST-RA-NEXT: popq %rbx
-; 64-FAST-RA-NEXT: movq %rax, %rdi
-; 64-FAST-RA-NEXT: callq bar
-; 64-FAST-RA-NEXT: pushq %rbx
-; 64-FAST-RA-NEXT: popfq
-; 64-FAST-RA-NEXT: jne .LBB0_3
-; 64-FAST-RA-NEXT: # %bb.1: # %t
-; 64-FAST-RA-NEXT: movl $42, %eax
-; 64-FAST-RA-NEXT: jmp .LBB0_2
-; 64-FAST-RA-NEXT: .LBB0_3: # %f
-; 64-FAST-RA-NEXT: xorl %eax, %eax
-; 64-FAST-RA-NEXT: .LBB0_2: # %t
-; 64-FAST-RA-NEXT: addq $8, %rsp
-; 64-FAST-RA-NEXT: popq %rbx
-; 64-FAST-RA-NEXT: popq %rbp
-; 64-FAST-RA-NEXT: retq
-;
-; 64-GOOD-RA-SAHF-LABEL: test_intervening_call:
-; 64-GOOD-RA-SAHF: # %bb.0: # %entry
-; 64-GOOD-RA-SAHF-NEXT: pushq %rbp
-; 64-GOOD-RA-SAHF-NEXT: movq %rsp, %rbp
-; 64-GOOD-RA-SAHF-NEXT: pushq %rbx
-; 64-GOOD-RA-SAHF-NEXT: pushq %rax
-; 64-GOOD-RA-SAHF-NEXT: movq %rsi, %rax
-; 64-GOOD-RA-SAHF-NEXT: lock cmpxchgq %rdx, (%rdi)
-; 64-GOOD-RA-SAHF-NEXT: pushq %rax
-; 64-GOOD-RA-SAHF-NEXT: seto %al
-; 64-GOOD-RA-SAHF-NEXT: lahf
-; 64-GOOD-RA-SAHF-NEXT: movq %rax, %rbx
-; 64-GOOD-RA-SAHF-NEXT: popq %rax
-; 64-GOOD-RA-SAHF-NEXT: movq %rax, %rdi
-; 64-GOOD-RA-SAHF-NEXT: callq bar
-; 64-GOOD-RA-SAHF-NEXT: movq %rbx, %rax
-; 64-GOOD-RA-SAHF-NEXT: addb $127, %al
-; 64-GOOD-RA-SAHF-NEXT: sahf
-; 64-GOOD-RA-SAHF-NEXT: jne .LBB0_3
-; 64-GOOD-RA-SAHF-NEXT: # %bb.1: # %t
-; 64-GOOD-RA-SAHF-NEXT: movl $42, %eax
-; 64-GOOD-RA-SAHF-NEXT: jmp .LBB0_2
-; 64-GOOD-RA-SAHF-NEXT: .LBB0_3: # %f
-; 64-GOOD-RA-SAHF-NEXT: xorl %eax, %eax
-; 64-GOOD-RA-SAHF-NEXT: .LBB0_2: # %t
-; 64-GOOD-RA-SAHF-NEXT: addq $8, %rsp
-; 64-GOOD-RA-SAHF-NEXT: popq %rbx
-; 64-GOOD-RA-SAHF-NEXT: popq %rbp
-; 64-GOOD-RA-SAHF-NEXT: retq
-;
-; 64-FAST-RA-SAHF-LABEL: test_intervening_call:
-; 64-FAST-RA-SAHF: # %bb.0: # %entry
-; 64-FAST-RA-SAHF-NEXT: pushq %rbp
-; 64-FAST-RA-SAHF-NEXT: movq %rsp, %rbp
-; 64-FAST-RA-SAHF-NEXT: pushq %rbx
-; 64-FAST-RA-SAHF-NEXT: pushq %rax
-; 64-FAST-RA-SAHF-NEXT: movq %rsi, %rax
-; 64-FAST-RA-SAHF-NEXT: lock cmpxchgq %rdx, (%rdi)
-; 64-FAST-RA-SAHF-NEXT: pushq %rax
-; 64-FAST-RA-SAHF-NEXT: seto %al
-; 64-FAST-RA-SAHF-NEXT: lahf
-; 64-FAST-RA-SAHF-NEXT: movq %rax, %rbx
-; 64-FAST-RA-SAHF-NEXT: popq %rax
-; 64-FAST-RA-SAHF-NEXT: movq %rax, %rdi
-; 64-FAST-RA-SAHF-NEXT: callq bar
-; 64-FAST-RA-SAHF-NEXT: movq %rbx, %rax
-; 64-FAST-RA-SAHF-NEXT: addb $127, %al
-; 64-FAST-RA-SAHF-NEXT: sahf
-; 64-FAST-RA-SAHF-NEXT: jne .LBB0_3
-; 64-FAST-RA-SAHF-NEXT: # %bb.1: # %t
-; 64-FAST-RA-SAHF-NEXT: movl $42, %eax
-; 64-FAST-RA-SAHF-NEXT: jmp .LBB0_2
-; 64-FAST-RA-SAHF-NEXT: .LBB0_3: # %f
-; 64-FAST-RA-SAHF-NEXT: xorl %eax, %eax
-; 64-FAST-RA-SAHF-NEXT: .LBB0_2: # %t
-; 64-FAST-RA-SAHF-NEXT: addq $8, %rsp
-; 64-FAST-RA-SAHF-NEXT: popq %rbx
-; 64-FAST-RA-SAHF-NEXT: popq %rbp
-; 64-FAST-RA-SAHF-NEXT: retq
+; 64-ALL-LABEL: test_intervening_call:
+; 64-ALL: # %bb.0: # %entry
+; 64-ALL-NEXT: pushq %rbx
+; 64-ALL-NEXT: movq %rsi, %rax
+; 64-ALL-NEXT: lock cmpxchgq %rdx, (%rdi)
+; 64-ALL-NEXT: setne %bl
+; 64-ALL-NEXT: movq %rax, %rdi
+; 64-ALL-NEXT: callq bar
+; 64-ALL-NEXT: testb $-1, %bl
+; 64-ALL-NEXT: jne .LBB0_2
+; 64-ALL-NEXT: # %bb.1: # %t
+; 64-ALL-NEXT: movl $42, %eax
+; 64-ALL-NEXT: popq %rbx
+; 64-ALL-NEXT: retq
+; 64-ALL-NEXT: .LBB0_2: # %f
+; 64-ALL-NEXT: xorl %eax, %eax
+; 64-ALL-NEXT: popq %rbx
+; 64-ALL-NEXT: retq
entry:
%cx = cmpxchg i64* %foo, i64 %bar, i64 %baz seq_cst seq_cst
%v = extractvalue { i64, i1 } %cx, 0
@@ -331,149 +210,64 @@ cond.end:
define i32 @test_feed_cmov(i32* %addr, i32 %desired, i32 %new) nounwind {
; 32-GOOD-RA-LABEL: test_feed_cmov:
; 32-GOOD-RA: # %bb.0: # %entry
-; 32-GOOD-RA-NEXT: pushl %ebp
-; 32-GOOD-RA-NEXT: movl %esp, %ebp
-; 32-GOOD-RA-NEXT: pushl %edi
+; 32-GOOD-RA-NEXT: pushl %ebx
; 32-GOOD-RA-NEXT: pushl %esi
-; 32-GOOD-RA-NEXT: movl 12(%ebp), %eax
-; 32-GOOD-RA-NEXT: movl 16(%ebp), %esi
-; 32-GOOD-RA-NEXT: movl 8(%ebp), %ecx
+; 32-GOOD-RA-NEXT: pushl %eax
+; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %eax
+; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %esi
+; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %ecx
; 32-GOOD-RA-NEXT: lock cmpxchgl %esi, (%ecx)
-; 32-GOOD-RA-NEXT: seto %al
-; 32-GOOD-RA-NEXT: lahf
-; 32-GOOD-RA-NEXT: movl %eax, %edi
+; 32-GOOD-RA-NEXT: sete %bl
; 32-GOOD-RA-NEXT: calll foo
-; 32-GOOD-RA-NEXT: pushl %eax
-; 32-GOOD-RA-NEXT: movl %edi, %eax
-; 32-GOOD-RA-NEXT: addb $127, %al
-; 32-GOOD-RA-NEXT: sahf
-; 32-GOOD-RA-NEXT: popl %eax
-; 32-GOOD-RA-NEXT: je .LBB2_2
+; 32-GOOD-RA-NEXT: testb $-1, %bl
+; 32-GOOD-RA-NEXT: jne .LBB2_2
; 32-GOOD-RA-NEXT: # %bb.1: # %entry
; 32-GOOD-RA-NEXT: movl %eax, %esi
; 32-GOOD-RA-NEXT: .LBB2_2: # %entry
; 32-GOOD-RA-NEXT: movl %esi, %eax
+; 32-GOOD-RA-NEXT: addl $4, %esp
; 32-GOOD-RA-NEXT: popl %esi
-; 32-GOOD-RA-NEXT: popl %edi
-; 32-GOOD-RA-NEXT: popl %ebp
+; 32-GOOD-RA-NEXT: popl %ebx
; 32-GOOD-RA-NEXT: retl
;
; 32-FAST-RA-LABEL: test_feed_cmov:
; 32-FAST-RA: # %bb.0: # %entry
-; 32-FAST-RA-NEXT: pushl %ebp
-; 32-FAST-RA-NEXT: movl %esp, %ebp
-; 32-FAST-RA-NEXT: pushl %edi
+; 32-FAST-RA-NEXT: pushl %ebx
; 32-FAST-RA-NEXT: pushl %esi
-; 32-FAST-RA-NEXT: movl 8(%ebp), %ecx
-; 32-FAST-RA-NEXT: movl 16(%ebp), %esi
-; 32-FAST-RA-NEXT: movl 12(%ebp), %eax
+; 32-FAST-RA-NEXT: pushl %eax
+; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %esi
+; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %eax
; 32-FAST-RA-NEXT: lock cmpxchgl %esi, (%ecx)
-; 32-FAST-RA-NEXT: seto %al
-; 32-FAST-RA-NEXT: lahf
-; 32-FAST-RA-NEXT: movl %eax, %edi
+; 32-FAST-RA-NEXT: sete %bl
; 32-FAST-RA-NEXT: calll foo
-; 32-FAST-RA-NEXT: pushl %eax
-; 32-FAST-RA-NEXT: movl %edi, %eax
-; 32-FAST-RA-NEXT: addb $127, %al
-; 32-FAST-RA-NEXT: sahf
-; 32-FAST-RA-NEXT: popl %eax
-; 32-FAST-RA-NEXT: je .LBB2_2
+; 32-FAST-RA-NEXT: testb $-1, %bl
+; 32-FAST-RA-NEXT: jne .LBB2_2
; 32-FAST-RA-NEXT: # %bb.1: # %entry
; 32-FAST-RA-NEXT: movl %eax, %esi
; 32-FAST-RA-NEXT: .LBB2_2: # %entry
; 32-FAST-RA-NEXT: movl %esi, %eax
+; 32-FAST-RA-NEXT: addl $4, %esp
; 32-FAST-RA-NEXT: popl %esi
-; 32-FAST-RA-NEXT: popl %edi
-; 32-FAST-RA-NEXT: popl %ebp
+; 32-FAST-RA-NEXT: popl %ebx
; 32-FAST-RA-NEXT: retl
;
-; 64-GOOD-RA-LABEL: test_feed_cmov:
-; 64-GOOD-RA: # %bb.0: # %entry
-; 64-GOOD-RA-NEXT: pushq %rbp
-; 64-GOOD-RA-NEXT: movq %rsp, %rbp
-; 64-GOOD-RA-NEXT: pushq %r14
-; 64-GOOD-RA-NEXT: pushq %rbx
-; 64-GOOD-RA-NEXT: movl %edx, %ebx
-; 64-GOOD-RA-NEXT: movl %esi, %eax
-; 64-GOOD-RA-NEXT: lock cmpxchgl %ebx, (%rdi)
-; 64-GOOD-RA-NEXT: pushfq
-; 64-GOOD-RA-NEXT: popq %r14
-; 64-GOOD-RA-NEXT: callq foo
-; 64-GOOD-RA-NEXT: pushq %r14
-; 64-GOOD-RA-NEXT: popfq
-; 64-GOOD-RA-NEXT: cmovel %ebx, %eax
-; 64-GOOD-RA-NEXT: popq %rbx
-; 64-GOOD-RA-NEXT: popq %r14
-; 64-GOOD-RA-NEXT: popq %rbp
-; 64-GOOD-RA-NEXT: retq
-;
-; 64-FAST-RA-LABEL: test_feed_cmov:
-; 64-FAST-RA: # %bb.0: # %entry
-; 64-FAST-RA-NEXT: pushq %rbp
-; 64-FAST-RA-NEXT: movq %rsp, %rbp
-; 64-FAST-RA-NEXT: pushq %r14
-; 64-FAST-RA-NEXT: pushq %rbx
-; 64-FAST-RA-NEXT: movl %edx, %ebx
-; 64-FAST-RA-NEXT: movl %esi, %eax
-; 64-FAST-RA-NEXT: lock cmpxchgl %ebx, (%rdi)
-; 64-FAST-RA-NEXT: pushfq
-; 64-FAST-RA-NEXT: popq %r14
-; 64-FAST-RA-NEXT: callq foo
-; 64-FAST-RA-NEXT: pushq %r14
-; 64-FAST-RA-NEXT: popfq
-; 64-FAST-RA-NEXT: cmovel %ebx, %eax
-; 64-FAST-RA-NEXT: popq %rbx
-; 64-FAST-RA-NEXT: popq %r14
-; 64-FAST-RA-NEXT: popq %rbp
-; 64-FAST-RA-NEXT: retq
-;
-; 64-GOOD-RA-SAHF-LABEL: test_feed_cmov:
-; 64-GOOD-RA-SAHF: # %bb.0: # %entry
-; 64-GOOD-RA-SAHF-NEXT: pushq %rbp
-; 64-GOOD-RA-SAHF-NEXT: movq %rsp, %rbp
-; 64-GOOD-RA-SAHF-NEXT: pushq %r14
-; 64-GOOD-RA-SAHF-NEXT: pushq %rbx
-; 64-GOOD-RA-SAHF-NEXT: movl %edx, %ebx
-; 64-GOOD-RA-SAHF-NEXT: movl %esi, %eax
-; 64-GOOD-RA-SAHF-NEXT: lock cmpxchgl %ebx, (%rdi)
-; 64-GOOD-RA-SAHF-NEXT: seto %al
-; 64-GOOD-RA-SAHF-NEXT: lahf
-; 64-GOOD-RA-SAHF-NEXT: movq %rax, %r14
-; 64-GOOD-RA-SAHF-NEXT: callq foo
-; 64-GOOD-RA-SAHF-NEXT: pushq %rax
-; 64-GOOD-RA-SAHF-NEXT: movq %r14, %rax
-; 64-GOOD-RA-SAHF-NEXT: addb $127, %al
-; 64-GOOD-RA-SAHF-NEXT: sahf
-; 64-GOOD-RA-SAHF-NEXT: popq %rax
-; 64-GOOD-RA-SAHF-NEXT: cmovel %ebx, %eax
-; 64-GOOD-RA-SAHF-NEXT: popq %rbx
-; 64-GOOD-RA-SAHF-NEXT: popq %r14
-; 64-GOOD-RA-SAHF-NEXT: popq %rbp
-; 64-GOOD-RA-SAHF-NEXT: retq
-;
-; 64-FAST-RA-SAHF-LABEL: test_feed_cmov:
-; 64-FAST-RA-SAHF: # %bb.0: # %entry
-; 64-FAST-RA-SAHF-NEXT: pushq %rbp
-; 64-FAST-RA-SAHF-NEXT: movq %rsp, %rbp
-; 64-FAST-RA-SAHF-NEXT: pushq %r14
-; 64-FAST-RA-SAHF-NEXT: pushq %rbx
-; 64-FAST-RA-SAHF-NEXT: movl %edx, %ebx
-; 64-FAST-RA-SAHF-NEXT: movl %esi, %eax
-; 64-FAST-RA-SAHF-NEXT: lock cmpxchgl %ebx, (%rdi)
-; 64-FAST-RA-SAHF-NEXT: seto %al
-; 64-FAST-RA-SAHF-NEXT: lahf
-; 64-FAST-RA-SAHF-NEXT: movq %rax, %r14
-; 64-FAST-RA-SAHF-NEXT: callq foo
-; 64-FAST-RA-SAHF-NEXT: pushq %rax
-; 64-FAST-RA-SAHF-NEXT: movq %r14, %rax
-; 64-FAST-RA-SAHF-NEXT: addb $127, %al
-; 64-FAST-RA-SAHF-NEXT: sahf
-; 64-FAST-RA-SAHF-NEXT: popq %rax
-; 64-FAST-RA-SAHF-NEXT: cmovel %ebx, %eax
-; 64-FAST-RA-SAHF-NEXT: popq %rbx
-; 64-FAST-RA-SAHF-NEXT: popq %r14
-; 64-FAST-RA-SAHF-NEXT: popq %rbp
-; 64-FAST-RA-SAHF-NEXT: retq
+; 64-ALL-LABEL: test_feed_cmov:
+; 64-ALL: # %bb.0: # %entry
+; 64-ALL-NEXT: pushq %rbp
+; 64-ALL-NEXT: pushq %rbx
+; 64-ALL-NEXT: pushq %rax
+; 64-ALL-NEXT: movl %edx, %ebx
+; 64-ALL-NEXT: movl %esi, %eax
+; 64-ALL-NEXT: lock cmpxchgl %ebx, (%rdi)
+; 64-ALL-NEXT: sete %bpl
+; 64-ALL-NEXT: callq foo
+; 64-ALL-NEXT: testb $-1, %bpl
+; 64-ALL-NEXT: cmovnel %ebx, %eax
+; 64-ALL-NEXT: addq $8, %rsp
+; 64-ALL-NEXT: popq %rbx
+; 64-ALL-NEXT: popq %rbp
+; 64-ALL-NEXT: retq
entry:
%res = cmpxchg i32* %addr, i32 %desired, i32 %new seq_cst seq_cst
%success = extractvalue { i32, i1 } %res, 1
Modified: llvm/branches/release_60/test/CodeGen/X86/copy-eflags.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_60/test/CodeGen/X86/copy-eflags.ll?rev=332933&r1=332932&r2=332933&view=diff
==============================================================================
--- llvm/branches/release_60/test/CodeGen/X86/copy-eflags.ll (original)
+++ llvm/branches/release_60/test/CodeGen/X86/copy-eflags.ll Mon May 21 19:46:36 2018
@@ -19,35 +19,24 @@ define i32 @test1() nounwind {
; X32-LABEL: test1:
; X32: # %bb.0: # %entry
; X32-NEXT: movb b, %cl
-; X32-NEXT: movb %cl, %al
+; X32-NEXT: movl %ecx, %eax
; X32-NEXT: incb %al
; X32-NEXT: movb %al, b
; X32-NEXT: incl c
-; X32-NEXT: pushl %eax
-; X32-NEXT: seto %al
-; X32-NEXT: lahf
-; X32-NEXT: movl %eax, %edx
-; X32-NEXT: popl %eax
+; X32-NEXT: sete %dl
; X32-NEXT: movb a, %ah
; X32-NEXT: movb %ah, %ch
; X32-NEXT: incb %ch
; X32-NEXT: cmpb %cl, %ah
; X32-NEXT: sete d
; X32-NEXT: movb %ch, a
-; X32-NEXT: pushl %eax
-; X32-NEXT: movl %edx, %eax
-; X32-NEXT: addb $127, %al
-; X32-NEXT: sahf
-; X32-NEXT: popl %eax
-; X32-NEXT: je .LBB0_2
+; X32-NEXT: testb $-1, %dl
+; X32-NEXT: jne .LBB0_2
; X32-NEXT: # %bb.1: # %if.then
-; X32-NEXT: pushl %ebp
-; X32-NEXT: movl %esp, %ebp
; X32-NEXT: movsbl %al, %eax
; X32-NEXT: pushl %eax
; X32-NEXT: calll external
; X32-NEXT: addl $4, %esp
-; X32-NEXT: popl %ebp
; X32-NEXT: .LBB0_2: # %if.end
; X32-NEXT: xorl %eax, %eax
; X32-NEXT: retl
@@ -59,23 +48,20 @@ define i32 @test1() nounwind {
; X64-NEXT: incb %al
; X64-NEXT: movb %al, {{.*}}(%rip)
; X64-NEXT: incl {{.*}}(%rip)
-; X64-NEXT: pushfq
-; X64-NEXT: popq %rsi
+; X64-NEXT: sete %sil
; X64-NEXT: movb {{.*}}(%rip), %cl
; X64-NEXT: movl %ecx, %edx
; X64-NEXT: incb %dl
; X64-NEXT: cmpb %dil, %cl
; X64-NEXT: sete {{.*}}(%rip)
; X64-NEXT: movb %dl, {{.*}}(%rip)
-; X64-NEXT: pushq %rsi
-; X64-NEXT: popfq
-; X64-NEXT: je .LBB0_2
+; X64-NEXT: testb $-1, %sil
+; X64-NEXT: jne .LBB0_2
; X64-NEXT: # %bb.1: # %if.then
-; X64-NEXT: pushq %rbp
-; X64-NEXT: movq %rsp, %rbp
+; X64-NEXT: pushq %rax
; X64-NEXT: movsbl %al, %edi
; X64-NEXT: callq external
-; X64-NEXT: popq %rbp
+; X64-NEXT: addq $8, %rsp
; X64-NEXT: .LBB0_2: # %if.end
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: retq
@@ -108,54 +94,40 @@ if.end:
define i32 @test2(i32* %ptr) nounwind {
; X32-LABEL: test2:
; X32: # %bb.0: # %entry
-; X32-NEXT: pushl %ebp
-; X32-NEXT: movl %esp, %ebp
-; X32-NEXT: pushl %esi
-; X32-NEXT: movl 8(%ebp), %eax
+; X32-NEXT: pushl %ebx
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: incl (%eax)
-; X32-NEXT: seto %al
-; X32-NEXT: lahf
-; X32-NEXT: movl %eax, %esi
+; X32-NEXT: setne %bl
; X32-NEXT: pushl $42
; X32-NEXT: calll external
; X32-NEXT: addl $4, %esp
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: addb $127, %al
-; X32-NEXT: sahf
+; X32-NEXT: testb $-1, %bl
; X32-NEXT: je .LBB1_1
-; X32-NEXT: # %bb.3: # %else
+; X32-NEXT: # %bb.2: # %else
; X32-NEXT: xorl %eax, %eax
-; X32-NEXT: jmp .LBB1_2
+; X32-NEXT: popl %ebx
+; X32-NEXT: retl
; X32-NEXT: .LBB1_1: # %then
; X32-NEXT: movl $64, %eax
-; X32-NEXT: .LBB1_2: # %then
-; X32-NEXT: popl %esi
-; X32-NEXT: popl %ebp
+; X32-NEXT: popl %ebx
; X32-NEXT: retl
;
; X64-LABEL: test2:
; X64: # %bb.0: # %entry
-; X64-NEXT: pushq %rbp
-; X64-NEXT: movq %rsp, %rbp
; X64-NEXT: pushq %rbx
-; X64-NEXT: pushq %rax
; X64-NEXT: incl (%rdi)
-; X64-NEXT: pushfq
-; X64-NEXT: popq %rbx
+; X64-NEXT: setne %bl
; X64-NEXT: movl $42, %edi
; X64-NEXT: callq external
-; X64-NEXT: pushq %rbx
-; X64-NEXT: popfq
+; X64-NEXT: testb $-1, %bl
; X64-NEXT: je .LBB1_1
-; X64-NEXT: # %bb.3: # %else
+; X64-NEXT: # %bb.2: # %else
; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: jmp .LBB1_2
+; X64-NEXT: popq %rbx
+; X64-NEXT: retq
; X64-NEXT: .LBB1_1: # %then
; X64-NEXT: movl $64, %eax
-; X64-NEXT: .LBB1_2: # %then
-; X64-NEXT: addq $8, %rsp
; X64-NEXT: popq %rbx
-; X64-NEXT: popq %rbp
; X64-NEXT: retq
entry:
%val = load i32, i32* %ptr
@@ -183,43 +155,25 @@ declare void @external_b()
define void @test_tail_call(i32* %ptr) nounwind optsize {
; X32-LABEL: test_tail_call:
; X32: # %bb.0: # %entry
-; X32-NEXT: pushl %ebp
-; X32-NEXT: movl %esp, %ebp
-; X32-NEXT: movl 8(%ebp), %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: incl (%eax)
-; X32-NEXT: seto %al
-; X32-NEXT: lahf
-; X32-NEXT: movl %eax, %eax
+; X32-NEXT: setne %al
; X32-NEXT: incb a
; X32-NEXT: sete d
-; X32-NEXT: movl %eax, %eax
-; X32-NEXT: addb $127, %al
-; X32-NEXT: sahf
-; X32-NEXT: je .LBB2_1
-; X32-NEXT: # %bb.2: # %else
-; X32-NEXT: popl %ebp
-; X32-NEXT: jmp external_b # TAILCALL
-; X32-NEXT: .LBB2_1: # %then
-; X32-NEXT: popl %ebp
+; X32-NEXT: testb $-1, %al
+; X32-NEXT: jne external_b # TAILCALL
+; X32-NEXT: # %bb.1: # %then
; X32-NEXT: jmp external_a # TAILCALL
;
; X64-LABEL: test_tail_call:
; X64: # %bb.0: # %entry
-; X64-NEXT: pushq %rbp
-; X64-NEXT: movq %rsp, %rbp
; X64-NEXT: incl (%rdi)
-; X64-NEXT: pushfq
-; X64-NEXT: popq %rax
+; X64-NEXT: setne %al
; X64-NEXT: incb {{.*}}(%rip)
; X64-NEXT: sete {{.*}}(%rip)
-; X64-NEXT: pushq %rax
-; X64-NEXT: popfq
-; X64-NEXT: je .LBB2_1
-; X64-NEXT: # %bb.2: # %else
-; X64-NEXT: popq %rbp
-; X64-NEXT: jmp external_b # TAILCALL
-; X64-NEXT: .LBB2_1: # %then
-; X64-NEXT: popq %rbp
+; X64-NEXT: testb $-1, %al
+; X64-NEXT: jne external_b # TAILCALL
+; X64-NEXT: # %bb.1: # %then
; X64-NEXT: jmp external_a # TAILCALL
entry:
%val = load i32, i32* %ptr
Removed: llvm/branches/release_60/test/CodeGen/X86/eflags-copy-expansion.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_60/test/CodeGen/X86/eflags-copy-expansion.mir?rev=332932&view=auto
==============================================================================
--- llvm/branches/release_60/test/CodeGen/X86/eflags-copy-expansion.mir (original)
+++ llvm/branches/release_60/test/CodeGen/X86/eflags-copy-expansion.mir (removed)
@@ -1,64 +0,0 @@
-# RUN: llc -run-pass postrapseudos -mtriple=i386-apple-macosx -o - %s | FileCheck %s
-
-# Verify that we correctly save and restore eax when copying eflags,
-# even when only a smaller alias of eax is used. We used to check only
-# eax and not its aliases.
-# PR27624.
-
---- |
- target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
-
- define void @foo() {
- entry:
- br label %false
- false:
- ret void
- }
-
-...
-
----
-name: foo
-tracksRegLiveness: true
-liveins:
- - { reg: '%edi' }
-body: |
- bb.0.entry:
- liveins: %edi
- NOOP implicit-def %al
-
- ; The bug was triggered only when LivePhysReg is used, which
- ; happens only when the heuristic for the liveness computation
- ; failed. The liveness computation heuristic looks at 10 instructions
- ; before and after the copy. Make sure we do not reach the definition of
- ; AL in 10 instructions, otherwise the heuristic will see that it is live.
- NOOP
- NOOP
- NOOP
- NOOP
- NOOP
- NOOP
- NOOP
- NOOP
- NOOP
- NOOP
- NOOP
- NOOP
- NOOP
- ; Save AL.
- ; CHECK: PUSH32r killed %eax
-
- ; Copy edi into EFLAGS
- ; CHECK-NEXT: %eax = MOV32rr %edi
- ; CHECK-NEXT: %al = ADD8ri %al, 127, implicit-def %eflags
- ; CHECK-NEXT: SAHF implicit-def %eflags, implicit %ah
- %eflags = COPY %edi
-
- ; Restore AL.
- ; CHECK-NEXT: %eax = POP32r
- bb.1.false:
- liveins: %al
- NOOP implicit %al
- RETQ
-
-...
Copied: llvm/branches/release_60/test/CodeGen/X86/flags-copy-lowering.mir (from r329657, llvm/trunk/test/CodeGen/X86/flags-copy-lowering.mir)
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_60/test/CodeGen/X86/flags-copy-lowering.mir?p2=llvm/branches/release_60/test/CodeGen/X86/flags-copy-lowering.mir&p1=llvm/trunk/test/CodeGen/X86/flags-copy-lowering.mir&r1=329657&r2=332933&rev=332933&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/flags-copy-lowering.mir (original)
+++ llvm/branches/release_60/test/CodeGen/X86/flags-copy-lowering.mir Mon May 21 19:46:36 2018
@@ -71,149 +71,149 @@
name: test_branch
# CHECK-LABEL: name: test_branch
liveins:
- - { reg: '$rdi', virtual-reg: '%0' }
- - { reg: '$rsi', virtual-reg: '%1' }
+ - { reg: '%rdi', virtual-reg: '%0' }
+ - { reg: '%rsi', virtual-reg: '%1' }
body: |
bb.0:
successors: %bb.1, %bb.2, %bb.3
- liveins: $rdi, $rsi
+ liveins: %rdi, %rsi
- %0:gr64 = COPY $rdi
- %1:gr64 = COPY $rsi
- CMP64rr %0, %1, implicit-def $eflags
- %2:gr64 = COPY $eflags
- ; CHECK-NOT: COPY{{( killed)?}} $eflags
- ; CHECK: %[[A_REG:[^:]*]]:gr8 = SETAr implicit $eflags
- ; CHECK-NEXT: %[[B_REG:[^:]*]]:gr8 = SETBr implicit $eflags
- ; CHECK-NOT: COPY{{( killed)?}} $eflags
-
- ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
- CALL64pcrel32 @foo, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def $eax
- ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
-
- $eflags = COPY %2
- JA_1 %bb.1, implicit $eflags
- JB_1 %bb.2, implicit $eflags
+ %0:gr64 = COPY %rdi
+ %1:gr64 = COPY %rsi
+ CMP64rr %0, %1, implicit-def %eflags
+ %2:gr64 = COPY %eflags
+ ; CHECK-NOT: COPY{{( killed)?}} %eflags
+ ; CHECK: %[[A_REG:[^:]*]]:gr8 = SETAr implicit %eflags
+ ; CHECK-NEXT: %[[B_REG:[^:]*]]:gr8 = SETBr implicit %eflags
+ ; CHECK-NOT: COPY{{( killed)?}} %eflags
+
+ ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead %rsp, implicit-def dead %eflags, implicit-def dead %ssp, implicit %rsp, implicit %ssp
+ CALL64pcrel32 @foo, csr_64, implicit %rsp, implicit %ssp, implicit %rdi, implicit-def %rsp, implicit-def %ssp, implicit-def %eax
+ ADJCALLSTACKUP64 0, 0, implicit-def dead %rsp, implicit-def dead %eflags, implicit-def dead %ssp, implicit %rsp, implicit %ssp
+
+ %eflags = COPY %2
+ JA_1 %bb.1, implicit %eflags
+ JB_1 %bb.2, implicit %eflags
JMP_1 %bb.3
- ; CHECK-NOT: $eflags =
+ ; CHECK-NOT: %eflags =
;
- ; CHECK: TEST8ri %[[A_REG]], -1, implicit-def $eflags
- ; CHECK-NEXT: JNE_1 %bb.1, implicit killed $eflags
+ ; CHECK: TEST8ri %[[A_REG]], -1, implicit-def %eflags
+ ; CHECK-NEXT: JNE_1 %bb.1, implicit killed %eflags
; CHECK-SAME: {{$[[:space:]]}}
; CHECK-NEXT: bb.4:
; CHECK-NEXT: successors: {{.*$}}
; CHECK-SAME: {{$[[:space:]]}}
- ; CHECK-NEXT: TEST8ri %[[B_REG]], -1, implicit-def $eflags
- ; CHECK-NEXT: JNE_1 %bb.2, implicit killed $eflags
+ ; CHECK-NEXT: TEST8ri %[[B_REG]], -1, implicit-def %eflags
+ ; CHECK-NEXT: JNE_1 %bb.2, implicit killed %eflags
; CHECK-NEXT: JMP_1 %bb.3
bb.1:
%3:gr32 = MOV32ri64 42
- $eax = COPY %3
- RET 0, $eax
+ %eax = COPY %3
+ RET 0, %eax
bb.2:
%4:gr32 = MOV32ri64 43
- $eax = COPY %4
- RET 0, $eax
+ %eax = COPY %4
+ RET 0, %eax
bb.3:
- %5:gr32 = MOV32r0 implicit-def dead $eflags
- $eax = COPY %5
- RET 0, $eax
+ %5:gr32 = MOV32r0 implicit-def dead %eflags
+ %eax = COPY %5
+ RET 0, %eax
...
---
name: test_branch_fallthrough
# CHECK-LABEL: name: test_branch_fallthrough
liveins:
- - { reg: '$rdi', virtual-reg: '%0' }
- - { reg: '$rsi', virtual-reg: '%1' }
+ - { reg: '%rdi', virtual-reg: '%0' }
+ - { reg: '%rsi', virtual-reg: '%1' }
body: |
bb.0:
successors: %bb.1, %bb.2, %bb.3
- liveins: $rdi, $rsi
+ liveins: %rdi, %rsi
- %0:gr64 = COPY $rdi
- %1:gr64 = COPY $rsi
- CMP64rr %0, %1, implicit-def $eflags
- %2:gr64 = COPY $eflags
- ; CHECK-NOT: COPY{{( killed)?}} $eflags
- ; CHECK: %[[A_REG:[^:]*]]:gr8 = SETAr implicit $eflags
- ; CHECK-NEXT: %[[B_REG:[^:]*]]:gr8 = SETBr implicit $eflags
- ; CHECK-NOT: COPY{{( killed)?}} $eflags
-
- ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
- CALL64pcrel32 @foo, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def $eax
- ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
-
- $eflags = COPY %2
- JA_1 %bb.2, implicit $eflags
- JB_1 %bb.3, implicit $eflags
- ; CHECK-NOT: $eflags =
+ %0:gr64 = COPY %rdi
+ %1:gr64 = COPY %rsi
+ CMP64rr %0, %1, implicit-def %eflags
+ %2:gr64 = COPY %eflags
+ ; CHECK-NOT: COPY{{( killed)?}} %eflags
+ ; CHECK: %[[A_REG:[^:]*]]:gr8 = SETAr implicit %eflags
+ ; CHECK-NEXT: %[[B_REG:[^:]*]]:gr8 = SETBr implicit %eflags
+ ; CHECK-NOT: COPY{{( killed)?}} %eflags
+
+ ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead %rsp, implicit-def dead %eflags, implicit-def dead %ssp, implicit %rsp, implicit %ssp
+ CALL64pcrel32 @foo, csr_64, implicit %rsp, implicit %ssp, implicit %rdi, implicit-def %rsp, implicit-def %ssp, implicit-def %eax
+ ADJCALLSTACKUP64 0, 0, implicit-def dead %rsp, implicit-def dead %eflags, implicit-def dead %ssp, implicit %rsp, implicit %ssp
+
+ %eflags = COPY %2
+ JA_1 %bb.2, implicit %eflags
+ JB_1 %bb.3, implicit %eflags
+ ; CHECK-NOT: %eflags =
;
- ; CHECK: TEST8ri %[[A_REG]], -1, implicit-def $eflags
- ; CHECK-NEXT: JNE_1 %bb.2, implicit killed $eflags
+ ; CHECK: TEST8ri %[[A_REG]], -1, implicit-def %eflags
+ ; CHECK-NEXT: JNE_1 %bb.2, implicit killed %eflags
; CHECK-SAME: {{$[[:space:]]}}
; CHECK-NEXT: bb.4:
; CHECK-NEXT: successors: {{.*$}}
; CHECK-SAME: {{$[[:space:]]}}
- ; CHECK-NEXT: TEST8ri %[[B_REG]], -1, implicit-def $eflags
- ; CHECK-NEXT: JNE_1 %bb.3, implicit killed $eflags
+ ; CHECK-NEXT: TEST8ri %[[B_REG]], -1, implicit-def %eflags
+ ; CHECK-NEXT: JNE_1 %bb.3, implicit killed %eflags
; CHECK-SAME: {{$[[:space:]]}}
; CHECK-NEXT: bb.1:
bb.1:
- %5:gr32 = MOV32r0 implicit-def dead $eflags
- $eax = COPY %5
- RET 0, $eax
+ %5:gr32 = MOV32r0 implicit-def dead %eflags
+ %eax = COPY %5
+ RET 0, %eax
bb.2:
%3:gr32 = MOV32ri64 42
- $eax = COPY %3
- RET 0, $eax
+ %eax = COPY %3
+ RET 0, %eax
bb.3:
%4:gr32 = MOV32ri64 43
- $eax = COPY %4
- RET 0, $eax
+ %eax = COPY %4
+ RET 0, %eax
...
---
name: test_setcc
# CHECK-LABEL: name: test_setcc
liveins:
- - { reg: '$rdi', virtual-reg: '%0' }
- - { reg: '$rsi', virtual-reg: '%1' }
+ - { reg: '%rdi', virtual-reg: '%0' }
+ - { reg: '%rsi', virtual-reg: '%1' }
body: |
bb.0:
- liveins: $rdi, $rsi
+ liveins: %rdi, %rsi
- %0:gr64 = COPY $rdi
- %1:gr64 = COPY $rsi
- CMP64rr %0, %1, implicit-def $eflags
- %2:gr64 = COPY $eflags
- ; CHECK-NOT: COPY{{( killed)?}} $eflags
- ; CHECK: %[[A_REG:[^:]*]]:gr8 = SETAr implicit $eflags
- ; CHECK-NEXT: %[[B_REG:[^:]*]]:gr8 = SETBr implicit $eflags
- ; CHECK-NEXT: %[[E_REG:[^:]*]]:gr8 = SETEr implicit $eflags
- ; CHECK-NEXT: %[[NE_REG:[^:]*]]:gr8 = SETNEr implicit $eflags
- ; CHECK-NOT: COPY{{( killed)?}} $eflags
-
- ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
- CALL64pcrel32 @foo, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def $eax
- ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
-
- $eflags = COPY %2
- %3:gr8 = SETAr implicit $eflags
- %4:gr8 = SETBr implicit $eflags
- %5:gr8 = SETEr implicit $eflags
- %6:gr8 = SETNEr implicit killed $eflags
- MOV8mr $rsp, 1, $noreg, -16, $noreg, killed %3
- MOV8mr $rsp, 1, $noreg, -16, $noreg, killed %4
- MOV8mr $rsp, 1, $noreg, -16, $noreg, killed %5
- MOV8mr $rsp, 1, $noreg, -16, $noreg, killed %6
- ; CHECK-NOT: $eflags =
+ %0:gr64 = COPY %rdi
+ %1:gr64 = COPY %rsi
+ CMP64rr %0, %1, implicit-def %eflags
+ %2:gr64 = COPY %eflags
+ ; CHECK-NOT: COPY{{( killed)?}} %eflags
+ ; CHECK: %[[A_REG:[^:]*]]:gr8 = SETAr implicit %eflags
+ ; CHECK-NEXT: %[[B_REG:[^:]*]]:gr8 = SETBr implicit %eflags
+ ; CHECK-NEXT: %[[E_REG:[^:]*]]:gr8 = SETEr implicit %eflags
+ ; CHECK-NEXT: %[[NE_REG:[^:]*]]:gr8 = SETNEr implicit %eflags
+ ; CHECK-NOT: COPY{{( killed)?}} %eflags
+
+ ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead %rsp, implicit-def dead %eflags, implicit-def dead %ssp, implicit %rsp, implicit %ssp
+ CALL64pcrel32 @foo, csr_64, implicit %rsp, implicit %ssp, implicit %rdi, implicit-def %rsp, implicit-def %ssp, implicit-def %eax
+ ADJCALLSTACKUP64 0, 0, implicit-def dead %rsp, implicit-def dead %eflags, implicit-def dead %ssp, implicit %rsp, implicit %ssp
+
+ %eflags = COPY %2
+ %3:gr8 = SETAr implicit %eflags
+ %4:gr8 = SETBr implicit %eflags
+ %5:gr8 = SETEr implicit %eflags
+ %6:gr8 = SETNEr implicit killed %eflags
+ MOV8mr %rsp, 1, %noreg, -16, %noreg, killed %3
+ MOV8mr %rsp, 1, %noreg, -16, %noreg, killed %4
+ MOV8mr %rsp, 1, %noreg, -16, %noreg, killed %5
+ MOV8mr %rsp, 1, %noreg, -16, %noreg, killed %6
+ ; CHECK-NOT: %eflags =
; CHECK-NOT: = SET{{.*}}
; CHECK: MOV8mr {{.*}}, killed %[[A_REG]]
; CHECK-CHECK: MOV8mr {{.*}}, killed %[[B_REG]]
@@ -227,44 +227,44 @@ body: |
name: test_cmov
# CHECK-LABEL: name: test_cmov
liveins:
- - { reg: '$rdi', virtual-reg: '%0' }
- - { reg: '$rsi', virtual-reg: '%1' }
+ - { reg: '%rdi', virtual-reg: '%0' }
+ - { reg: '%rsi', virtual-reg: '%1' }
body: |
bb.0:
- liveins: $rdi, $rsi
+ liveins: %rdi, %rsi
- %0:gr64 = COPY $rdi
- %1:gr64 = COPY $rsi
- CMP64rr %0, %1, implicit-def $eflags
- %2:gr64 = COPY $eflags
- ; CHECK-NOT: COPY{{( killed)?}} $eflags
- ; CHECK: %[[A_REG:[^:]*]]:gr8 = SETAr implicit $eflags
- ; CHECK-NEXT: %[[B_REG:[^:]*]]:gr8 = SETBr implicit $eflags
- ; CHECK-NEXT: %[[E_REG:[^:]*]]:gr8 = SETEr implicit $eflags
- ; CHECK-NOT: COPY{{( killed)?}} $eflags
-
- ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
- CALL64pcrel32 @foo, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def $eax
- ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
-
- $eflags = COPY %2
- %3:gr64 = CMOVA64rr %0, %1, implicit $eflags
- %4:gr64 = CMOVB64rr %0, %1, implicit $eflags
- %5:gr64 = CMOVE64rr %0, %1, implicit $eflags
- %6:gr64 = CMOVNE64rr %0, %1, implicit killed $eflags
- ; CHECK-NOT: $eflags =
- ; CHECK: TEST8ri %[[A_REG]], -1, implicit-def $eflags
- ; CHECK-NEXT: %3:gr64 = CMOVNE64rr %0, %1, implicit killed $eflags
- ; CHECK-NEXT: TEST8ri %[[B_REG]], -1, implicit-def $eflags
- ; CHECK-NEXT: %4:gr64 = CMOVNE64rr %0, %1, implicit killed $eflags
- ; CHECK-NEXT: TEST8ri %[[E_REG]], -1, implicit-def $eflags
- ; CHECK-NEXT: %5:gr64 = CMOVNE64rr %0, %1, implicit killed $eflags
- ; CHECK-NEXT: TEST8ri %[[E_REG]], -1, implicit-def $eflags
- ; CHECK-NEXT: %6:gr64 = CMOVE64rr %0, %1, implicit killed $eflags
- MOV64mr $rsp, 1, $noreg, -16, $noreg, killed %3
- MOV64mr $rsp, 1, $noreg, -16, $noreg, killed %4
- MOV64mr $rsp, 1, $noreg, -16, $noreg, killed %5
- MOV64mr $rsp, 1, $noreg, -16, $noreg, killed %6
+ %0:gr64 = COPY %rdi
+ %1:gr64 = COPY %rsi
+ CMP64rr %0, %1, implicit-def %eflags
+ %2:gr64 = COPY %eflags
+ ; CHECK-NOT: COPY{{( killed)?}} %eflags
+ ; CHECK: %[[A_REG:[^:]*]]:gr8 = SETAr implicit %eflags
+ ; CHECK-NEXT: %[[B_REG:[^:]*]]:gr8 = SETBr implicit %eflags
+ ; CHECK-NEXT: %[[E_REG:[^:]*]]:gr8 = SETEr implicit %eflags
+ ; CHECK-NOT: COPY{{( killed)?}} %eflags
+
+ ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead %rsp, implicit-def dead %eflags, implicit-def dead %ssp, implicit %rsp, implicit %ssp
+ CALL64pcrel32 @foo, csr_64, implicit %rsp, implicit %ssp, implicit %rdi, implicit-def %rsp, implicit-def %ssp, implicit-def %eax
+ ADJCALLSTACKUP64 0, 0, implicit-def dead %rsp, implicit-def dead %eflags, implicit-def dead %ssp, implicit %rsp, implicit %ssp
+
+ %eflags = COPY %2
+ %3:gr64 = CMOVA64rr %0, %1, implicit %eflags
+ %4:gr64 = CMOVB64rr %0, %1, implicit %eflags
+ %5:gr64 = CMOVE64rr %0, %1, implicit %eflags
+ %6:gr64 = CMOVNE64rr %0, %1, implicit killed %eflags
+ ; CHECK-NOT: %eflags =
+ ; CHECK: TEST8ri %[[A_REG]], -1, implicit-def %eflags
+ ; CHECK-NEXT: %3:gr64 = CMOVNE64rr %0, %1, implicit killed %eflags
+ ; CHECK-NEXT: TEST8ri %[[B_REG]], -1, implicit-def %eflags
+ ; CHECK-NEXT: %4:gr64 = CMOVNE64rr %0, %1, implicit killed %eflags
+ ; CHECK-NEXT: TEST8ri %[[E_REG]], -1, implicit-def %eflags
+ ; CHECK-NEXT: %5:gr64 = CMOVNE64rr %0, %1, implicit killed %eflags
+ ; CHECK-NEXT: TEST8ri %[[E_REG]], -1, implicit-def %eflags
+ ; CHECK-NEXT: %6:gr64 = CMOVE64rr %0, %1, implicit killed %eflags
+ MOV64mr %rsp, 1, %noreg, -16, %noreg, killed %3
+ MOV64mr %rsp, 1, %noreg, -16, %noreg, killed %4
+ MOV64mr %rsp, 1, %noreg, -16, %noreg, killed %5
+ MOV64mr %rsp, 1, %noreg, -16, %noreg, killed %6
RET 0
@@ -273,32 +273,32 @@ body: |
name: test_adc
# CHECK-LABEL: name: test_adc
liveins:
- - { reg: '$rdi', virtual-reg: '%0' }
- - { reg: '$rsi', virtual-reg: '%1' }
+ - { reg: '%rdi', virtual-reg: '%0' }
+ - { reg: '%rsi', virtual-reg: '%1' }
body: |
bb.0:
- liveins: $rdi, $rsi
+ liveins: %rdi, %rsi
- %0:gr64 = COPY $rdi
- %1:gr64 = COPY $rsi
- %2:gr64 = ADD64rr %0, %1, implicit-def $eflags
- %3:gr64 = COPY $eflags
- ; CHECK-NOT: COPY{{( killed)?}} $eflags
- ; CHECK: %[[CF_REG:[^:]*]]:gr8 = SETBr implicit $eflags
- ; CHECK-NOT: COPY{{( killed)?}} $eflags
-
- ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
- CALL64pcrel32 @foo, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def $eax
- ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
-
- $eflags = COPY %3
- %4:gr64 = ADC64ri32 %2:gr64, 42, implicit-def $eflags, implicit $eflags
- %5:gr64 = ADC64ri32 %4:gr64, 42, implicit-def $eflags, implicit $eflags
- ; CHECK-NOT: $eflags =
- ; CHECK: dead %{{[^:]*}}:gr8 = ADD8ri %[[CF_REG]], 255, implicit-def $eflags
- ; CHECK-NEXT: %4:gr64 = ADC64ri32 %2, 42, implicit-def $eflags, implicit killed $eflags
- ; CHECK-NEXT: %5:gr64 = ADC64ri32 %4, 42, implicit-def{{( dead)?}} $eflags, implicit{{( killed)?}} $eflags
- MOV64mr $rsp, 1, $noreg, -16, $noreg, killed %5
+ %0:gr64 = COPY %rdi
+ %1:gr64 = COPY %rsi
+ %2:gr64 = ADD64rr %0, %1, implicit-def %eflags
+ %3:gr64 = COPY %eflags
+ ; CHECK-NOT: COPY{{( killed)?}} %eflags
+ ; CHECK: %[[CF_REG:[^:]*]]:gr8 = SETBr implicit %eflags
+ ; CHECK-NOT: COPY{{( killed)?}} %eflags
+
+ ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead %rsp, implicit-def dead %eflags, implicit-def dead %ssp, implicit %rsp, implicit %ssp
+ CALL64pcrel32 @foo, csr_64, implicit %rsp, implicit %ssp, implicit %rdi, implicit-def %rsp, implicit-def %ssp, implicit-def %eax
+ ADJCALLSTACKUP64 0, 0, implicit-def dead %rsp, implicit-def dead %eflags, implicit-def dead %ssp, implicit %rsp, implicit %ssp
+
+ %eflags = COPY %3
+ %4:gr64 = ADC64ri32 %2:gr64, 42, implicit-def %eflags, implicit %eflags
+ %5:gr64 = ADC64ri32 %4:gr64, 42, implicit-def %eflags, implicit %eflags
+ ; CHECK-NOT: %eflags =
+ ; CHECK: dead %{{[^:]*}}:gr8 = ADD8ri %[[CF_REG]], 255, implicit-def %eflags
+ ; CHECK-NEXT: %4:gr64 = ADC64ri32 %2, 42, implicit-def %eflags, implicit killed %eflags
+ ; CHECK-NEXT: %5:gr64 = ADC64ri32 %4, 42, implicit-def{{( dead)?}} %eflags, implicit{{( killed)?}} %eflags
+ MOV64mr %rsp, 1, %noreg, -16, %noreg, killed %5
RET 0
@@ -307,32 +307,32 @@ body: |
name: test_sbb
# CHECK-LABEL: name: test_sbb
liveins:
- - { reg: '$rdi', virtual-reg: '%0' }
- - { reg: '$rsi', virtual-reg: '%1' }
+ - { reg: '%rdi', virtual-reg: '%0' }
+ - { reg: '%rsi', virtual-reg: '%1' }
body: |
bb.0:
- liveins: $rdi, $rsi
+ liveins: %rdi, %rsi
- %0:gr64 = COPY $rdi
- %1:gr64 = COPY $rsi
- %2:gr64 = SUB64rr %0, %1, implicit-def $eflags
- %3:gr64 = COPY killed $eflags
- ; CHECK-NOT: COPY{{( killed)?}} $eflags
- ; CHECK: %[[CF_REG:[^:]*]]:gr8 = SETBr implicit $eflags
- ; CHECK-NOT: COPY{{( killed)?}} $eflags
-
- ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
- CALL64pcrel32 @foo, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def $eax
- ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
-
- $eflags = COPY %3
- %4:gr64 = SBB64ri32 %2:gr64, 42, implicit-def $eflags, implicit killed $eflags
- %5:gr64 = SBB64ri32 %4:gr64, 42, implicit-def dead $eflags, implicit killed $eflags
- ; CHECK-NOT: $eflags =
- ; CHECK: dead %{{[^:]*}}:gr8 = ADD8ri %[[CF_REG]], 255, implicit-def $eflags
- ; CHECK-NEXT: %4:gr64 = SBB64ri32 %2, 42, implicit-def $eflags, implicit killed $eflags
- ; CHECK-NEXT: %5:gr64 = SBB64ri32 %4, 42, implicit-def{{( dead)?}} $eflags, implicit{{( killed)?}} $eflags
- MOV64mr $rsp, 1, $noreg, -16, $noreg, killed %5
+ %0:gr64 = COPY %rdi
+ %1:gr64 = COPY %rsi
+ %2:gr64 = SUB64rr %0, %1, implicit-def %eflags
+ %3:gr64 = COPY killed %eflags
+ ; CHECK-NOT: COPY{{( killed)?}} %eflags
+ ; CHECK: %[[CF_REG:[^:]*]]:gr8 = SETBr implicit %eflags
+ ; CHECK-NOT: COPY{{( killed)?}} %eflags
+
+ ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead %rsp, implicit-def dead %eflags, implicit-def dead %ssp, implicit %rsp, implicit %ssp
+ CALL64pcrel32 @foo, csr_64, implicit %rsp, implicit %ssp, implicit %rdi, implicit-def %rsp, implicit-def %ssp, implicit-def %eax
+ ADJCALLSTACKUP64 0, 0, implicit-def dead %rsp, implicit-def dead %eflags, implicit-def dead %ssp, implicit %rsp, implicit %ssp
+
+ %eflags = COPY %3
+ %4:gr64 = SBB64ri32 %2:gr64, 42, implicit-def %eflags, implicit killed %eflags
+ %5:gr64 = SBB64ri32 %4:gr64, 42, implicit-def dead %eflags, implicit killed %eflags
+ ; CHECK-NOT: %eflags =
+ ; CHECK: dead %{{[^:]*}}:gr8 = ADD8ri %[[CF_REG]], 255, implicit-def %eflags
+ ; CHECK-NEXT: %4:gr64 = SBB64ri32 %2, 42, implicit-def %eflags, implicit killed %eflags
+ ; CHECK-NEXT: %5:gr64 = SBB64ri32 %4, 42, implicit-def{{( dead)?}} %eflags, implicit{{( killed)?}} %eflags
+ MOV64mr %rsp, 1, %noreg, -16, %noreg, killed %5
RET 0
@@ -341,37 +341,37 @@ body: |
name: test_adcx
# CHECK-LABEL: name: test_adcx
liveins:
- - { reg: '$rdi', virtual-reg: '%0' }
- - { reg: '$rsi', virtual-reg: '%1' }
+ - { reg: '%rdi', virtual-reg: '%0' }
+ - { reg: '%rsi', virtual-reg: '%1' }
body: |
bb.0:
- liveins: $rdi, $rsi
+ liveins: %rdi, %rsi
- %0:gr64 = COPY $rdi
- %1:gr64 = COPY $rsi
- %2:gr64 = ADD64rr %0, %1, implicit-def $eflags
- %3:gr64 = COPY $eflags
- ; CHECK-NOT: COPY{{( killed)?}} $eflags
- ; CHECK: %[[E_REG:[^:]*]]:gr8 = SETEr implicit $eflags
- ; CHECK-NEXT: %[[CF_REG:[^:]*]]:gr8 = SETBr implicit $eflags
- ; CHECK-NOT: COPY{{( killed)?}} $eflags
-
- ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
- CALL64pcrel32 @foo, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def $eax
- ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ %0:gr64 = COPY %rdi
+ %1:gr64 = COPY %rsi
+ %2:gr64 = ADD64rr %0, %1, implicit-def %eflags
+ %3:gr64 = COPY %eflags
+ ; CHECK-NOT: COPY{{( killed)?}} %eflags
+ ; CHECK: %[[E_REG:[^:]*]]:gr8 = SETEr implicit %eflags
+ ; CHECK-NEXT: %[[CF_REG:[^:]*]]:gr8 = SETBr implicit %eflags
+ ; CHECK-NOT: COPY{{( killed)?}} %eflags
+
+ ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead %rsp, implicit-def dead %eflags, implicit-def dead %ssp, implicit %rsp, implicit %ssp
+ CALL64pcrel32 @foo, csr_64, implicit %rsp, implicit %ssp, implicit %rdi, implicit-def %rsp, implicit-def %ssp, implicit-def %eax
+ ADJCALLSTACKUP64 0, 0, implicit-def dead %rsp, implicit-def dead %eflags, implicit-def dead %ssp, implicit %rsp, implicit %ssp
- $eflags = COPY %3
- %4:gr64 = CMOVE64rr %0, %1, implicit $eflags
+ %eflags = COPY %3
+ %4:gr64 = CMOVE64rr %0, %1, implicit %eflags
%5:gr64 = MOV64ri32 42
- %6:gr64 = ADCX64rr %2, %5, implicit-def $eflags, implicit $eflags
- ; CHECK-NOT: $eflags =
- ; CHECK: TEST8ri %[[E_REG]], -1, implicit-def $eflags
- ; CHECK-NEXT: %4:gr64 = CMOVNE64rr %0, %1, implicit killed $eflags
+ %6:gr64 = ADCX64rr %2, %5, implicit-def %eflags, implicit %eflags
+ ; CHECK-NOT: %eflags =
+ ; CHECK: TEST8ri %[[E_REG]], -1, implicit-def %eflags
+ ; CHECK-NEXT: %4:gr64 = CMOVNE64rr %0, %1, implicit killed %eflags
; CHECK-NEXT: %5:gr64 = MOV64ri32 42
- ; CHECK-NEXT: dead %{{[^:]*}}:gr8 = ADD8ri %[[CF_REG]], 255, implicit-def $eflags
- ; CHECK-NEXT: %6:gr64 = ADCX64rr %2, %5, implicit-def{{( dead)?}} $eflags, implicit killed $eflags
- MOV64mr $rsp, 1, $noreg, -16, $noreg, killed %4
- MOV64mr $rsp, 1, $noreg, -16, $noreg, killed %6
+ ; CHECK-NEXT: dead %{{[^:]*}}:gr8 = ADD8ri %[[CF_REG]], 255, implicit-def %eflags
+ ; CHECK-NEXT: %6:gr64 = ADCX64rr %2, %5, implicit-def{{( dead)?}} %eflags, implicit killed %eflags
+ MOV64mr %rsp, 1, %noreg, -16, %noreg, killed %4
+ MOV64mr %rsp, 1, %noreg, -16, %noreg, killed %6
RET 0
@@ -380,37 +380,37 @@ body: |
name: test_adox
# CHECK-LABEL: name: test_adox
liveins:
- - { reg: '$rdi', virtual-reg: '%0' }
- - { reg: '$rsi', virtual-reg: '%1' }
+ - { reg: '%rdi', virtual-reg: '%0' }
+ - { reg: '%rsi', virtual-reg: '%1' }
body: |
bb.0:
- liveins: $rdi, $rsi
+ liveins: %rdi, %rsi
- %0:gr64 = COPY $rdi
- %1:gr64 = COPY $rsi
- %2:gr64 = ADD64rr %0, %1, implicit-def $eflags
- %3:gr64 = COPY $eflags
- ; CHECK-NOT: COPY{{( killed)?}} $eflags
- ; CHECK: %[[E_REG:[^:]*]]:gr8 = SETEr implicit $eflags
- ; CHECK-NEXT: %[[OF_REG:[^:]*]]:gr8 = SETOr implicit $eflags
- ; CHECK-NOT: COPY{{( killed)?}} $eflags
-
- ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
- CALL64pcrel32 @foo, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def $eax
- ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ %0:gr64 = COPY %rdi
+ %1:gr64 = COPY %rsi
+ %2:gr64 = ADD64rr %0, %1, implicit-def %eflags
+ %3:gr64 = COPY %eflags
+ ; CHECK-NOT: COPY{{( killed)?}} %eflags
+ ; CHECK: %[[E_REG:[^:]*]]:gr8 = SETEr implicit %eflags
+ ; CHECK-NEXT: %[[OF_REG:[^:]*]]:gr8 = SETOr implicit %eflags
+ ; CHECK-NOT: COPY{{( killed)?}} %eflags
+
+ ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead %rsp, implicit-def dead %eflags, implicit-def dead %ssp, implicit %rsp, implicit %ssp
+ CALL64pcrel32 @foo, csr_64, implicit %rsp, implicit %ssp, implicit %rdi, implicit-def %rsp, implicit-def %ssp, implicit-def %eax
+ ADJCALLSTACKUP64 0, 0, implicit-def dead %rsp, implicit-def dead %eflags, implicit-def dead %ssp, implicit %rsp, implicit %ssp
- $eflags = COPY %3
- %4:gr64 = CMOVE64rr %0, %1, implicit $eflags
+ %eflags = COPY %3
+ %4:gr64 = CMOVE64rr %0, %1, implicit %eflags
%5:gr64 = MOV64ri32 42
- %6:gr64 = ADOX64rr %2, %5, implicit-def $eflags, implicit $eflags
- ; CHECK-NOT: $eflags =
- ; CHECK: TEST8ri %[[E_REG]], -1, implicit-def $eflags
- ; CHECK-NEXT: %4:gr64 = CMOVNE64rr %0, %1, implicit killed $eflags
+ %6:gr64 = ADOX64rr %2, %5, implicit-def %eflags, implicit %eflags
+ ; CHECK-NOT: %eflags =
+ ; CHECK: TEST8ri %[[E_REG]], -1, implicit-def %eflags
+ ; CHECK-NEXT: %4:gr64 = CMOVNE64rr %0, %1, implicit killed %eflags
; CHECK-NEXT: %5:gr64 = MOV64ri32 42
- ; CHECK-NEXT: dead %{{[^:]*}}:gr8 = ADD8ri %[[OF_REG]], 127, implicit-def $eflags
- ; CHECK-NEXT: %6:gr64 = ADOX64rr %2, %5, implicit-def{{( dead)?}} $eflags, implicit killed $eflags
- MOV64mr $rsp, 1, $noreg, -16, $noreg, killed %4
- MOV64mr $rsp, 1, $noreg, -16, $noreg, killed %6
+ ; CHECK-NEXT: dead %{{[^:]*}}:gr8 = ADD8ri %[[OF_REG]], 127, implicit-def %eflags
+ ; CHECK-NEXT: %6:gr64 = ADOX64rr %2, %5, implicit-def{{( dead)?}} %eflags, implicit killed %eflags
+ MOV64mr %rsp, 1, %noreg, -16, %noreg, killed %4
+ MOV64mr %rsp, 1, %noreg, -16, %noreg, killed %6
RET 0
@@ -419,32 +419,32 @@ body: |
name: test_rcl
# CHECK-LABEL: name: test_rcl
liveins:
- - { reg: '$rdi', virtual-reg: '%0' }
- - { reg: '$rsi', virtual-reg: '%1' }
+ - { reg: '%rdi', virtual-reg: '%0' }
+ - { reg: '%rsi', virtual-reg: '%1' }
body: |
bb.0:
- liveins: $rdi, $rsi
+ liveins: %rdi, %rsi
- %0:gr64 = COPY $rdi
- %1:gr64 = COPY $rsi
- %2:gr64 = ADD64rr %0, %1, implicit-def $eflags
- %3:gr64 = COPY $eflags
- ; CHECK-NOT: COPY{{( killed)?}} $eflags
- ; CHECK: %[[CF_REG:[^:]*]]:gr8 = SETBr implicit $eflags
- ; CHECK-NOT: COPY{{( killed)?}} $eflags
-
- ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
- CALL64pcrel32 @foo, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def $eax
- ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
-
- $eflags = COPY %3
- %4:gr64 = RCL64r1 %2:gr64, implicit-def $eflags, implicit $eflags
- %5:gr64 = RCL64r1 %4:gr64, implicit-def $eflags, implicit $eflags
- ; CHECK-NOT: $eflags =
- ; CHECK: dead %{{[^:]*}}:gr8 = ADD8ri %[[CF_REG]], 255, implicit-def $eflags
- ; CHECK-NEXT: %4:gr64 = RCL64r1 %2, implicit-def $eflags, implicit killed $eflags
- ; CHECK-NEXT: %5:gr64 = RCL64r1 %4, implicit-def{{( dead)?}} $eflags, implicit{{( killed)?}} $eflags
- MOV64mr $rsp, 1, $noreg, -16, $noreg, killed %5
+ %0:gr64 = COPY %rdi
+ %1:gr64 = COPY %rsi
+ %2:gr64 = ADD64rr %0, %1, implicit-def %eflags
+ %3:gr64 = COPY %eflags
+ ; CHECK-NOT: COPY{{( killed)?}} %eflags
+ ; CHECK: %[[CF_REG:[^:]*]]:gr8 = SETBr implicit %eflags
+ ; CHECK-NOT: COPY{{( killed)?}} %eflags
+
+ ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead %rsp, implicit-def dead %eflags, implicit-def dead %ssp, implicit %rsp, implicit %ssp
+ CALL64pcrel32 @foo, csr_64, implicit %rsp, implicit %ssp, implicit %rdi, implicit-def %rsp, implicit-def %ssp, implicit-def %eax
+ ADJCALLSTACKUP64 0, 0, implicit-def dead %rsp, implicit-def dead %eflags, implicit-def dead %ssp, implicit %rsp, implicit %ssp
+
+ %eflags = COPY %3
+ %4:gr64 = RCL64r1 %2:gr64, implicit-def %eflags, implicit %eflags
+ %5:gr64 = RCL64r1 %4:gr64, implicit-def %eflags, implicit %eflags
+ ; CHECK-NOT: %eflags =
+ ; CHECK: dead %{{[^:]*}}:gr8 = ADD8ri %[[CF_REG]], 255, implicit-def %eflags
+ ; CHECK-NEXT: %4:gr64 = RCL64r1 %2, implicit-def %eflags, implicit killed %eflags
+ ; CHECK-NEXT: %5:gr64 = RCL64r1 %4, implicit-def{{( dead)?}} %eflags, implicit{{( killed)?}} %eflags
+ MOV64mr %rsp, 1, %noreg, -16, %noreg, killed %5
RET 0
@@ -453,32 +453,32 @@ body: |
name: test_rcr
# CHECK-LABEL: name: test_rcr
liveins:
- - { reg: '$rdi', virtual-reg: '%0' }
- - { reg: '$rsi', virtual-reg: '%1' }
+ - { reg: '%rdi', virtual-reg: '%0' }
+ - { reg: '%rsi', virtual-reg: '%1' }
body: |
bb.0:
- liveins: $rdi, $rsi
+ liveins: %rdi, %rsi
- %0:gr64 = COPY $rdi
- %1:gr64 = COPY $rsi
- %2:gr64 = ADD64rr %0, %1, implicit-def $eflags
- %3:gr64 = COPY $eflags
- ; CHECK-NOT: COPY{{( killed)?}} $eflags
- ; CHECK: %[[CF_REG:[^:]*]]:gr8 = SETBr implicit $eflags
- ; CHECK-NOT: COPY{{( killed)?}} $eflags
-
- ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
- CALL64pcrel32 @foo, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def $eax
- ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
-
- $eflags = COPY %3
- %4:gr64 = RCR64r1 %2:gr64, implicit-def $eflags, implicit $eflags
- %5:gr64 = RCR64r1 %4:gr64, implicit-def $eflags, implicit $eflags
- ; CHECK-NOT: $eflags =
- ; CHECK: dead %{{[^:]*}}:gr8 = ADD8ri %[[CF_REG]], 255, implicit-def $eflags
- ; CHECK-NEXT: %4:gr64 = RCR64r1 %2, implicit-def $eflags, implicit killed $eflags
- ; CHECK-NEXT: %5:gr64 = RCR64r1 %4, implicit-def{{( dead)?}} $eflags, implicit{{( killed)?}} $eflags
- MOV64mr $rsp, 1, $noreg, -16, $noreg, killed %5
+ %0:gr64 = COPY %rdi
+ %1:gr64 = COPY %rsi
+ %2:gr64 = ADD64rr %0, %1, implicit-def %eflags
+ %3:gr64 = COPY %eflags
+ ; CHECK-NOT: COPY{{( killed)?}} %eflags
+ ; CHECK: %[[CF_REG:[^:]*]]:gr8 = SETBr implicit %eflags
+ ; CHECK-NOT: COPY{{( killed)?}} %eflags
+
+ ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead %rsp, implicit-def dead %eflags, implicit-def dead %ssp, implicit %rsp, implicit %ssp
+ CALL64pcrel32 @foo, csr_64, implicit %rsp, implicit %ssp, implicit %rdi, implicit-def %rsp, implicit-def %ssp, implicit-def %eax
+ ADJCALLSTACKUP64 0, 0, implicit-def dead %rsp, implicit-def dead %eflags, implicit-def dead %ssp, implicit %rsp, implicit %ssp
+
+ %eflags = COPY %3
+ %4:gr64 = RCR64r1 %2:gr64, implicit-def %eflags, implicit %eflags
+ %5:gr64 = RCR64r1 %4:gr64, implicit-def %eflags, implicit %eflags
+ ; CHECK-NOT: %eflags =
+ ; CHECK: dead %{{[^:]*}}:gr8 = ADD8ri %[[CF_REG]], 255, implicit-def %eflags
+ ; CHECK-NEXT: %4:gr64 = RCR64r1 %2, implicit-def %eflags, implicit killed %eflags
+ ; CHECK-NEXT: %5:gr64 = RCR64r1 %4, implicit-def{{( dead)?}} %eflags, implicit{{( killed)?}} %eflags
+ MOV64mr %rsp, 1, %noreg, -16, %noreg, killed %5
RET 0
Modified: llvm/branches/release_60/test/CodeGen/X86/mul-i1024.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_60/test/CodeGen/X86/mul-i1024.ll?rev=332933&r1=332932&r2=332933&view=diff
==============================================================================
--- llvm/branches/release_60/test/CodeGen/X86/mul-i1024.ll (original)
+++ llvm/branches/release_60/test/CodeGen/X86/mul-i1024.ll Mon May 21 19:46:36 2018
@@ -6,202 +6,195 @@ define void @test_1024(i1024* %a, i1024*
; X32-LABEL: test_1024:
; X32: # %bb.0:
; X32-NEXT: pushl %ebp
-; X32-NEXT: movl %esp, %ebp
; X32-NEXT: pushl %ebx
; X32-NEXT: pushl %edi
; X32-NEXT: pushl %esi
-; X32-NEXT: subl $996, %esp # imm = 0x3E4
-; X32-NEXT: movl 12(%ebp), %eax
-; X32-NEXT: movl 32(%eax), %eax
+; X32-NEXT: subl $1000, %esp # imm = 0x3E8
+; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl 48(%eax), %ecx
+; X32-NEXT: movl %eax, %esi
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl 32(%edx), %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: xorl %ecx, %ecx
-; X32-NEXT: mull %ecx
+; X32-NEXT: xorl %edi, %edi
+; X32-NEXT: mull %edi
+; X32-NEXT: movl %edx, %ebp
; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: movl %edx, %edi
-; X32-NEXT: movl 8(%ebp), %esi
-; X32-NEXT: movl 48(%esi), %eax
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: mull %ecx
-; X32-NEXT: xorl %ecx, %ecx
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: mull %edi
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: addl %ebx, %eax
-; X32-NEXT: adcl %edi, %edx
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: addl %ebx, %ecx
+; X32-NEXT: movl %edx, %eax
+; X32-NEXT: adcl %ebp, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl 32(%esi), %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: mull %ecx
+; X32-NEXT: mull %edi
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, %ecx
; X32-NEXT: addl %ebx, %ecx
; X32-NEXT: movl %edx, %eax
-; X32-NEXT: adcl %edi, %eax
-; X32-NEXT: movl %edi, %ecx
-; X32-NEXT: movl %ecx, -204(%ebp) # 4-byte Spill
-; X32-NEXT: movl %eax, -892(%ebp) # 4-byte Spill
-; X32-NEXT: movl 12(%ebp), %eax
+; X32-NEXT: adcl %ebp, %eax
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl 36(%eax), %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: xorl %edx, %edx
-; X32-NEXT: mull %edx
+; X32-NEXT: mull %edi
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %eax, %edi
-; X32-NEXT: movl %edi, -304(%ebp) # 4-byte Spill
-; X32-NEXT: addl %ecx, %edi
-; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: addl %ebp, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edx, %eax
; X32-NEXT: adcl $0, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl 36(%esi), %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: xorl %ecx, %ecx
-; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: movl %ecx, -124(%ebp) # 4-byte Spill
-; X32-NEXT: movl %eax, -184(%ebp) # 4-byte Spill
-; X32-NEXT: movl %eax, %edx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: addl %esi, %edx
-; X32-NEXT: adcl $0, %ecx
-; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: mull %edi
+; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: addl %edi, %ebp
+; X32-NEXT: adcl $0, %esi
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: movl %ecx, %eax
; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: leal (%ebx,%edi), %eax
-; X32-NEXT: movl %edx, %edi
-; X32-NEXT: leal (%ecx,%edi), %edx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: leal (%ebx,%eax), %eax
+; X32-NEXT: leal (%ecx,%ebp), %edx
; X32-NEXT: adcl %eax, %edx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: seto %al
-; X32-NEXT: lahf
-; X32-NEXT: movl %eax, %eax
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: addl %ecx, %edi
-; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl %esi, %ebx
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl 12(%ebp), %eax
+; X32-NEXT: addl %ecx, %ebp
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %edi, %esi
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl (%eax), %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: xorl %ecx, %ecx
; X32-NEXT: mull %ecx
; X32-NEXT: movl %eax, %esi
-; X32-NEXT: movl %edx, %edi
-; X32-NEXT: movl 8(%ebp), %ecx
-; X32-NEXT: movl 16(%ecx), %eax
+; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X32-NEXT: movl 16(%ebp), %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: xorl %edx, %edx
-; X32-NEXT: mull %edx
+; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: addl %esi, %eax
-; X32-NEXT: adcl %edi, %edx
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: addl %esi, %ecx
+; X32-NEXT: adcl %ebx, %edx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl (%ecx), %eax
+; X32-NEXT: movl (%ebp), %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: xorl %ecx, %ecx
; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: addl %esi, %eax
+; X32-NEXT: movl %esi, %ebp
+; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movl %edx, %eax
-; X32-NEXT: adcl %edi, %eax
+; X32-NEXT: adcl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: addl %esi, %eax
+; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: adcl %edi, %eax
+; X32-NEXT: adcl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: addl %esi, %eax
-; X32-NEXT: movl %esi, %ecx
-; X32-NEXT: adcl %edi, %ebx
-; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edi, %ebx
-; X32-NEXT: movl %ebx, -256(%ebp) # 4-byte Spill
-; X32-NEXT: movl -100(%ebp), %eax # 4-byte Reload
-; X32-NEXT: addl %eax, -80(%ebp) # 4-byte Folded Spill
-; X32-NEXT: movl -204(%ebp), %eax # 4-byte Reload
-; X32-NEXT: adcl %eax, -220(%ebp) # 4-byte Folded Spill
-; X32-NEXT: setb -388(%ebp) # 1-byte Folded Spill
-; X32-NEXT: movl 12(%ebp), %eax
-; X32-NEXT: movl 4(%eax), %eax
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: xorl %edx, %edx
-; X32-NEXT: mull %edx
-; X32-NEXT: movl %eax, %edi
-; X32-NEXT: addl %ebx, %edi
-; X32-NEXT: movl %edx, %esi
-; X32-NEXT: adcl $0, %esi
-; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: addl %ecx, %edi
-; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %ebx, %esi
-; X32-NEXT: setb %bh
-; X32-NEXT: addl %eax, %esi
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movzbl %bh, %eax
-; X32-NEXT: adcl %edx, %eax
-; X32-NEXT: movl %eax, %edi
-; X32-NEXT: movl %edi, -72(%ebp) # 4-byte Spill
-; X32-NEXT: movl 12(%ebp), %eax
-; X32-NEXT: movl 8(%eax), %eax
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: xorl %ebx, %ebx
-; X32-NEXT: mull %ebx
+; X32-NEXT: addl %ebp, %eax
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edi, %eax
+; X32-NEXT: adcl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: addl %eax, %ecx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: adcl %edx, %eax
-; X32-NEXT: addl %esi, %ecx
-; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %eax
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl 8(%ebp), %eax
-; X32-NEXT: movl 52(%eax), %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X32-NEXT: movl 4(%esi), %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: mull %ebx
-; X32-NEXT: movl %eax, %edi
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: addl %ecx, %edi
-; X32-NEXT: movl %edx, %esi
-; X32-NEXT: adcl $0, %esi
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: addl %ebx, %edi
-; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %ecx, %esi
-; X32-NEXT: movl %ecx, %edi
+; X32-NEXT: xorl %ecx, %ecx
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: addl %ebx, %ecx
+; X32-NEXT: movl %edx, %edi
+; X32-NEXT: adcl $0, %edi
+; X32-NEXT: addl %ebp, %ecx
+; X32-NEXT: movl %ebp, %esi
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %ebx, %edi
+; X32-NEXT: movl %ebx, %ebp
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: setb %cl
-; X32-NEXT: addl %eax, %esi
+; X32-NEXT: addl %eax, %edi
+; X32-NEXT: movl %edi, (%esp) # 4-byte Spill
; X32-NEXT: movzbl %cl, %eax
; X32-NEXT: adcl %edx, %eax
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl 8(%ebp), %eax
-; X32-NEXT: movl 56(%eax), %eax
+; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl 8(%eax), %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: xorl %ecx, %ecx
; X32-NEXT: mull %ecx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %ebx, %ecx
-; X32-NEXT: addl %eax, %ebx
-; X32-NEXT: adcl %edx, %edi
+; X32-NEXT: addl %eax, %esi
+; X32-NEXT: adcl %edx, %ebp
+; X32-NEXT: addl %edi, %esi
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %ebx, %ebp
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X32-NEXT: movl 52(%ebp), %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: xorl %ecx, %ecx
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: addl %edi, %ebx
+; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: adcl $0, %ecx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: addl %esi, %ebx
; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: adcl %edi, %ecx
+; X32-NEXT: setb %bl
+; X32-NEXT: addl %eax, %ecx
+; X32-NEXT: movzbl %bl, %ebx
+; X32-NEXT: adcl %edx, %ebx
+; X32-NEXT: movl 56(%ebp), %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: xorl %edx, %edx
+; X32-NEXT: mull %edx
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %esi, %ebp
+; X32-NEXT: addl %eax, %ebp
+; X32-NEXT: adcl %edx, %edi
+; X32-NEXT: addl %ecx, %ebp
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %ebx, %edi
; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edi, %eax
@@ -210,64 +203,60 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl 8(%ebp), %eax
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 1-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl 40(%eax), %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: xorl %ecx, %ecx
; X32-NEXT: mull %ecx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: movl %ebx, -396(%ebp) # 4-byte Spill
-; X32-NEXT: movl -324(%ebp), %edx # 4-byte Reload
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X32-NEXT: movl %edx, %edi
; X32-NEXT: addl %eax, %edi
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: adcl %ebx, %ecx
; X32-NEXT: addl %esi, %edi
; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: adcl %ebp, %ecx
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: seto %al
-; X32-NEXT: lahf
-; X32-NEXT: movl %eax, %eax
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl %edi, %eax
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %ecx, %eax
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl 12(%ebp), %ecx
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl 16(%ecx), %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: xorl %ebx, %ebx
; X32-NEXT: mull %ebx
; X32-NEXT: movl %eax, %edi
-; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl %esi, -84(%ebp) # 4-byte Spill
+; X32-NEXT: movl %edx, %ebp
; X32-NEXT: movl 20(%ecx), %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: mull %ebx
; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl %esi, %ebx
+; X32-NEXT: addl %ebp, %ebx
; X32-NEXT: movl %edx, %ecx
; X32-NEXT: adcl $0, %ecx
; X32-NEXT: addl %edi, %ebx
; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %esi, %ecx
+; X32-NEXT: adcl %ebp, %ecx
; X32-NEXT: setb %bl
; X32-NEXT: addl %eax, %ecx
; X32-NEXT: movzbl %bl, %esi
; X32-NEXT: adcl %edx, %esi
-; X32-NEXT: movl 12(%ebp), %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl 24(%eax), %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: xorl %edx, %edx
@@ -276,55 +265,54 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edi, %ebx
; X32-NEXT: addl %eax, %ebx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: adcl %edx, %eax
; X32-NEXT: addl %ecx, %ebx
-; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %esi, %eax
-; X32-NEXT: movl %eax, %edx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl %eax, %esi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: movl %ecx, %eax
; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: addl %edi, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: adcl %ecx, %eax
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %ebp, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl %ecx, %eax
; X32-NEXT: addl %edi, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: adcl %esi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-NEXT: adcl %edx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: adcl %edx, %eax
-; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: movl %ebx, -56(%ebp) # 4-byte Spill
-; X32-NEXT: movl %eax, -780(%ebp) # 4-byte Spill
-; X32-NEXT: movl -132(%ebp), %edx # 4-byte Reload
-; X32-NEXT: movl %edx, %eax
+; X32-NEXT: adcl %esi, %eax
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: movl %ecx, %eax
; X32-NEXT: addl %edi, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: adcl %ecx, %eax
+; X32-NEXT: adcl %ebp, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edx, %eax
+; X32-NEXT: movl %ecx, %eax
; X32-NEXT: addl %edi, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: adcl %esi, %eax
+; X32-NEXT: adcl %edx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: adcl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: adcl %ebx, %eax
+; X32-NEXT: adcl %esi, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl 8(%ebp), %eax
-; X32-NEXT: movl 20(%eax), %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X32-NEXT: movl 20(%edi), %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: xorl %ecx, %ecx
; X32-NEXT: mull %ecx
@@ -333,31 +321,30 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: addl %ebx, %esi
; X32-NEXT: movl %edx, %ecx
; X32-NEXT: adcl $0, %ecx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: addl %edi, %esi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: addl %ebp, %esi
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %ebx, %ecx
; X32-NEXT: setb %bl
; X32-NEXT: addl %eax, %ecx
; X32-NEXT: movzbl %bl, %esi
; X32-NEXT: adcl %edx, %esi
-; X32-NEXT: movl 8(%ebp), %eax
-; X32-NEXT: movl 24(%eax), %eax
+; X32-NEXT: movl 24(%edi), %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: xorl %edx, %edx
; X32-NEXT: mull %edx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edi, %edx
+; X32-NEXT: movl %ebp, %edi
; X32-NEXT: addl %eax, %edi
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: adcl %edx, %ebx
; X32-NEXT: addl %ecx, %edi
; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %esi, %ebx
; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
@@ -367,7 +354,8 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: movl %ebx, %eax
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl 8(%ebp), %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: movl %ecx, %eax
; X32-NEXT: movl 4(%eax), %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: xorl %ecx, %ecx
@@ -377,8 +365,8 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: addl %ecx, %esi
; X32-NEXT: movl %edx, %edi
; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: addl %ebx, %esi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: addl %ebp, %esi
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %ecx, %edi
; X32-NEXT: setb %cl
@@ -386,66 +374,64 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movzbl %cl, %eax
; X32-NEXT: adcl %edx, %eax
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl 8(%ebp), %eax
+; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl 8(%eax), %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: xorl %ecx, %ecx
; X32-NEXT: mull %ecx
; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: movl %ecx, -160(%ebp) # 4-byte Spill
-; X32-NEXT: movl %edx, -268(%ebp) # 4-byte Spill
-; X32-NEXT: movl %ebx, %esi
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %ebp, %esi
; X32-NEXT: movl %esi, %eax
; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: movl %ebx, %ecx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %ecx
; X32-NEXT: adcl %edx, %ecx
; X32-NEXT: addl %edi, %eax
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: adcl %ebx, %ecx
; X32-NEXT: movl %esi, %edx
+; X32-NEXT: movl %esi, %ebx
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: movl %edi, %edx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: movl %esi, %edx
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: pushl %eax
-; X32-NEXT: seto %al
-; X32-NEXT: lahf
-; X32-NEXT: movl %eax, %edx
-; X32-NEXT: popl %eax
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl %eax, %edx
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %ecx, %edx
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %esi, %edx
-; X32-NEXT: movl %edx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: addl %esi, %eax
; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: adcl %ebx, %eax
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edx, %eax
-; X32-NEXT: addl %esi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: addl %ecx, %eax
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: adcl %ebp, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: addl %ecx, %ebx
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edi, %eax
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl %edx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: adcl %edi, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X32-NEXT: addl %edx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
@@ -458,82 +444,78 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl %ecx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: movl %ebx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X32-NEXT: addl %edx, %eax
-; X32-NEXT: adcl %ebx, %esi
+; X32-NEXT: adcl %ebp, %esi
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: addl %edx, %ecx
-; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: addl %edx, %ebx
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: adcl %edi, %eax
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: adcl %edi, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl 12(%ebp), %eax
+; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl 40(%eax), %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: xorl %ecx, %ecx
; X32-NEXT: mull %ecx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: movl %ebx, %edi
-; X32-NEXT: addl %eax, %edi
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: adcl %edx, %ecx
-; X32-NEXT: addl %esi, %edi
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: movl %ecx, %edx
+; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: movl %esi, %edx
+; X32-NEXT: addl %eax, %edx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: adcl %ecx, %ebp
+; X32-NEXT: addl %edi, %edx
+; X32-NEXT: adcl %ebx, %ebp
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: addl %esi, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: adcl %ecx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: adcl %edi, %eax
-; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %edx, %eax
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edx, %eax
+; X32-NEXT: adcl %ebp, %eax
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: addl %esi, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl %ecx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: seto %al
-; X32-NEXT: lahf
-; X32-NEXT: movl %eax, %eax
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: adcl %edi, %eax
+; X32-NEXT: adcl %edx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: adcl %edx, %eax
+; X32-NEXT: adcl %ebp, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl 12(%ebp), %esi
+; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
; X32-NEXT: movl 48(%esi), %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: xorl %ecx, %ecx
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: movl %ebx, -336(%ebp) # 4-byte Spill
+; X32-NEXT: movl %eax, %ebp
; X32-NEXT: movl %edx, %edi
; X32-NEXT: movl 52(%esi), %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
@@ -542,38 +524,37 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: addl %edi, %esi
; X32-NEXT: movl %edx, %ecx
; X32-NEXT: adcl $0, %ecx
-; X32-NEXT: addl %ebx, %esi
+; X32-NEXT: addl %ebp, %esi
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %edi, %ecx
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: setb %bl
; X32-NEXT: addl %eax, %ecx
; X32-NEXT: movzbl %bl, %esi
; X32-NEXT: adcl %edx, %esi
-; X32-NEXT: movl 12(%ebp), %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl 56(%eax), %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: xorl %edx, %edx
; X32-NEXT: mull %edx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebp, %ebx
; X32-NEXT: addl %eax, %ebx
-; X32-NEXT: movl %edi, %edx
-; X32-NEXT: movl %edx, -176(%ebp) # 4-byte Spill
-; X32-NEXT: adcl -360(%ebp), %edi # 4-byte Folded Reload
+; X32-NEXT: adcl %edx, %edi
; X32-NEXT: addl %ecx, %ebx
; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %esi, %edi
; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: addl %esi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-NEXT: movl %edx, %eax
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: adcl %edx, %eax
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: addl %esi, %eax
+; X32-NEXT: movl %edx, %eax
+; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
@@ -584,67 +565,65 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl %edi, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl 8(%ebp), %eax
-; X32-NEXT: movl 64(%eax), %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X32-NEXT: movl 64(%edi), %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: xorl %ecx, %ecx
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl %esi, -384(%ebp) # 4-byte Spill
-; X32-NEXT: movl -116(%ebp), %edi # 4-byte Reload
-; X32-NEXT: movl %edi, %ecx
-; X32-NEXT: movl %eax, %edx
-; X32-NEXT: movl %edx, -480(%ebp) # 4-byte Spill
-; X32-NEXT: addl %edx, %ecx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: adcl %esi, %eax
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: addl %edx, %eax
+; X32-NEXT: movl %esi, %ecx
+; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: addl %ebx, %ecx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %edx, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: adcl %edx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl 8(%ebp), %eax
-; X32-NEXT: movl 80(%eax), %eax
+; X32-NEXT: movl 80(%edi), %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: xorl %edx, %edx
; X32-NEXT: mull %edx
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: addl %esi, %eax
+; X32-NEXT: movl %ebp, %edi
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: addl %ebp, %edi
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edx, %ecx
-; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: addl %esi, %edi
; X32-NEXT: adcl %edx, %ebx
; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl 12(%ebp), %ecx
+; X32-NEXT: addl %ebp, %esi
+; X32-NEXT: adcl %edx, %ecx
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl 80(%ecx), %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: xorl %ebx, %ebx
-; X32-NEXT: mull %ebx
+; X32-NEXT: xorl %edi, %edi
+; X32-NEXT: mull %edi
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: addl %esi, %eax
; X32-NEXT: movl %edx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: adcl %edi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: adcl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl 64(%ecx), %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: mull %ebx
+; X32-NEXT: mull %edi
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, %ecx
; X32-NEXT: addl %esi, %ecx
; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl %esi, -496(%ebp) # 4-byte Spill
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %esi, %ecx
-; X32-NEXT: adcl %edi, %ecx
+; X32-NEXT: adcl %ebx, %ecx
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, %ecx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
@@ -654,15 +633,16 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: adcl %ecx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: addl %edi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: addl %esi, %eax
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edx, %eax
-; X32-NEXT: addl %edi, %eax
+; X32-NEXT: addl %esi, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: adcl %ebp, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
@@ -677,122 +657,116 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X32-NEXT: adcl %edx, %eax
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: adcl %ecx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: pushl %eax
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: addb $127, %al
-; X32-NEXT: sahf
-; X32-NEXT: popl %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X32-NEXT: adcl %edx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Reload
+; X32-NEXT: addb $255, %al
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
; X32-NEXT: adcl %ecx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: movl %edi, %eax
+; X32-NEXT: adcl %edx, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: addl %edi, %eax
+; X32-NEXT: addl %esi, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X32-NEXT: adcl %edx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: addl %edi, %eax
+; X32-NEXT: addl %esi, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: adcl %ebp, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %ebx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: adcl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: adcl %esi, %eax
+; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: adcl %edi, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: addl %edi, %eax
+; X32-NEXT: addl %esi, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl %edx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: addl %edi, %eax
+; X32-NEXT: addl %esi, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: adcl %ebp, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: adcl %esi, %eax
+; X32-NEXT: adcl %edi, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl 8(%ebp), %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl 68(%eax), %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: xorl %ecx, %ecx
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %eax, %esi
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: addl %edi, %esi
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: addl %ebp, %edi
; X32-NEXT: movl %edx, %ecx
; X32-NEXT: adcl $0, %ecx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: addl %ebx, %esi
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %ecx
+; X32-NEXT: addl %ebx, %edi
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %ebp, %ecx
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: addl %eax, %ecx
-; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 1-byte Folded Reload
-; X32-NEXT: adcl %edx, %edi
-; X32-NEXT: movl 8(%ebp), %eax
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload
+; X32-NEXT: adcl %edx, %esi
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl 72(%eax), %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: xorl %edx, %edx
; X32-NEXT: mull %edx
-; X32-NEXT: movl %eax, %esi
-; X32-NEXT: movl %esi, -484(%ebp) # 4-byte Spill
-; X32-NEXT: movl %edx, -488(%ebp) # 4-byte Spill
-; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: addl %esi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: adcl %edx, %ebx
-; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: adcl %edi, %ebx
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edx, %edi
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %ebx, %edx
+; X32-NEXT: addl %eax, %ebx
+; X32-NEXT: adcl %edi, %ebp
+; X32-NEXT: addl %ecx, %ebx
+; X32-NEXT: adcl %esi, %ebp
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: addl %edx, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X32-NEXT: addl %edx, %ecx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: adcl %eax, %ecx
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: adcl %ecx, %esi
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: adcl %eax, %esi
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: adcl %ebx, %esi
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: addl %edx, %esi
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: adcl %ebx, %ecx
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: adcl %ebp, %ecx
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: addl %edx, %ecx
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl 8(%ebp), %ecx
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl %ecx, %eax
; X32-NEXT: movl 84(%eax), %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
@@ -807,48 +781,46 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: addl %edi, %esi
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %ebx, %ecx
-; X32-NEXT: setb %bl
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: addl %eax, %ecx
-; X32-NEXT: movzbl %bl, %esi
-; X32-NEXT: adcl %edx, %esi
-; X32-NEXT: movl 8(%ebp), %eax
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 1-byte Folded Reload
+; X32-NEXT: adcl %edx, %ebp
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl 88(%eax), %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: xorl %edx, %edx
; X32-NEXT: mull %edx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edi, %ebx
-; X32-NEXT: addl %eax, %edi
+; X32-NEXT: movl %edi, %esi
+; X32-NEXT: addl %eax, %esi
+; X32-NEXT: adcl %edx, %ebx
+; X32-NEXT: addl %ecx, %esi
+; X32-NEXT: adcl %ebp, %ebx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: adcl %edx, %eax
-; X32-NEXT: addl %ecx, %edi
-; X32-NEXT: adcl %esi, %eax
-; X32-NEXT: movl %eax, %esi
+; X32-NEXT: addl %edi, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: adcl %ebp, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: adcl %eax, %edx
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %esi, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %esi, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X32-NEXT: adcl %edi, %edx
+; X32-NEXT: adcl %ebx, %edx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edi, %edx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: adcl %esi, %edi
-; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: addl %ebx, %edi
-; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: addl %edi, %esi
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl 12(%ebp), %eax
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl 84(%eax), %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: xorl %ecx, %ecx
@@ -858,100 +830,97 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: addl %esi, %edi
; X32-NEXT: movl %edx, %ecx
; X32-NEXT: adcl $0, %ecx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: addl %ebp, %edi
; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %esi, %ecx
; X32-NEXT: setb %bl
; X32-NEXT: addl %eax, %ecx
; X32-NEXT: movzbl %bl, %edi
; X32-NEXT: adcl %edx, %edi
-; X32-NEXT: movl 12(%ebp), %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl 88(%eax), %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: xorl %edx, %edx
; X32-NEXT: mull %edx
-; X32-NEXT: movl %eax, %esi
-; X32-NEXT: movl %esi, -556(%ebp) # 4-byte Spill
-; X32-NEXT: movl %edx, -560(%ebp) # 4-byte Spill
-; X32-NEXT: movl -524(%ebp), %eax # 4-byte Reload
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl %esi, %ebx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %ebp, %ebx
+; X32-NEXT: addl %eax, %ebx
; X32-NEXT: adcl %edx, %esi
; X32-NEXT: addl %ecx, %ebx
-; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %esi
-; X32-NEXT: movl %esi, %edx
-; X32-NEXT: movl %edx, -728(%ebp) # 4-byte Spill
-; X32-NEXT: addl -136(%ebp), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, -712(%ebp) # 4-byte Spill
-; X32-NEXT: movl -668(%ebp), %ecx # 4-byte Reload
-; X32-NEXT: adcl -276(%ebp), %ecx # 4-byte Folded Reload
-; X32-NEXT: movl %ecx, -968(%ebp) # 4-byte Spill
; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %edi, %esi
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %ebp, %ecx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl 12(%ebp), %eax
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: movl %ecx, %eax
; X32-NEXT: movl 68(%eax), %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: xorl %ecx, %ecx
; X32-NEXT: mull %ecx
-; X32-NEXT: xorl %ebx, %ebx
; X32-NEXT: movl %eax, %esi
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: addl %edi, %esi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: addl %ebx, %esi
; X32-NEXT: movl %edx, %ecx
; X32-NEXT: adcl $0, %ecx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: addl %ebp, %esi
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %ecx
+; X32-NEXT: adcl %ebx, %ecx
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: addl %eax, %ecx
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload
; X32-NEXT: adcl %edx, %esi
-; X32-NEXT: movl 12(%ebp), %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl 72(%eax), %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: mull %ebx
+; X32-NEXT: xorl %edx, %edx
+; X32-NEXT: mull %edx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: movl %ebx, -568(%ebp) # 4-byte Spill
-; X32-NEXT: movl -500(%ebp), %edx # 4-byte Reload
; X32-NEXT: movl %edx, %edi
-; X32-NEXT: addl %eax, %edi
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: adcl %ebx, %eax
-; X32-NEXT: addl %ecx, %edi
-; X32-NEXT: adcl %esi, %eax
-; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %ebp, %edx
+; X32-NEXT: addl %eax, %ebp
+; X32-NEXT: adcl %edi, %ebx
+; X32-NEXT: addl %ecx, %ebp
+; X32-NEXT: adcl %esi, %ebx
; X32-NEXT: movl %edx, %eax
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl %eax, %esi
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edi, %esi
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %ecx, %esi
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %ebp, %ecx
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %ebx, %ecx
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edx, %ecx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X32-NEXT: addl %edx, %esi
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: addl %edx, %ecx
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: adcl %esi, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: adcl %ebx, %edi
-; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: adcl %edi, %ecx
-; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %edi, %ebp
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: adcl %ebp, %ebx
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: movl %ecx, %eax
; X32-NEXT: addl %edx, %eax
@@ -962,9 +931,9 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: addl %edx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: adcl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl 12(%ebp), %eax
+; X32-NEXT: adcl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl 12(%eax), %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: xorl %ecx, %ecx
@@ -985,136 +954,132 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: movl %ecx, %edx
; X32-NEXT: addl %edx, %ebx
; X32-NEXT: adcl %esi, %eax
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl (%esp), %edi # 4-byte Reload
+; X32-NEXT: addl %ebp, %edi
+; X32-NEXT: movl %edi, (%esp) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: addl %edi, %ecx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: addl %ecx, %edx
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edx, %esi
+; X32-NEXT: addl %edi, %esi
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X32-NEXT: adcl %esi, %edx
-; X32-NEXT: movl %esi, %eax
+; X32-NEXT: adcl %ecx, %edx
+; X32-NEXT: movl %ebx, %ecx
+; X32-NEXT: adcl $0, %ecx
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: adcl $0, %edi
+; X32-NEXT: addl %ebp, %esi
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %ebx, %edx
-; X32-NEXT: adcl $0, %edx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: movl (%esp), %esi # 4-byte Reload
; X32-NEXT: adcl $0, %esi
-; X32-NEXT: addl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: adcl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: adcl $0, %ecx
-; X32-NEXT: adcl $0, %eax
-; X32-NEXT: addl %edx, %ecx
-; X32-NEXT: adcl %esi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-NEXT: adcl $0, %edx
+; X32-NEXT: addl %ecx, %esi
+; X32-NEXT: adcl %edi, %edx
+; X32-NEXT: movl %edx, %ecx
; X32-NEXT: setb %dl
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: movl %esi, (%esp) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: movzbl %dl, %edx
; X32-NEXT: adcl %ebx, %edx
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: adcl $0, %eax
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X32-NEXT: adcl %edi, %edx
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; X32-NEXT: addl %ecx, %ebx
-; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %eax, %edx
+; X32-NEXT: addl (%esp), %ebp # 4-byte Folded Reload
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %ecx, %edx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: adcl %ebx, %esi
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: adcl %eax, %edi
; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl 8(%ebp), %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl 44(%eax), %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: xorl %ecx, %ecx
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %edi
+; X32-NEXT: movl %edx, %ecx
; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: movl %ebx, %ecx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: addl %esi, %ecx
+; X32-NEXT: addl %esi, %ebx
; X32-NEXT: adcl $0, %edx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: addl %eax, %ecx
-; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: addl %edi, %ebx
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %esi, %edx
-; X32-NEXT: movl %esi, %ecx
-; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: addl %ebx, %edx
-; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 1-byte Folded Reload
-; X32-NEXT: adcl %edi, %ebx
-; X32-NEXT: movl %eax, %esi
-; X32-NEXT: addl %esi, %edx
-; X32-NEXT: adcl %ecx, %ebx
+; X32-NEXT: setb %bl
+; X32-NEXT: addl %eax, %edx
+; X32-NEXT: movzbl %bl, %eax
+; X32-NEXT: adcl %ecx, %eax
+; X32-NEXT: movl %edi, %ecx
+; X32-NEXT: addl %ecx, %edx
+; X32-NEXT: adcl %esi, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: addl %ebp, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: addl %eax, %edi
+; X32-NEXT: movl %edi, (%esp) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: addl %eax, %esi
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: adcl %ecx, %esi
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %ebx, %ecx
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edx, %esi
; X32-NEXT: adcl $0, %esi
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %ebx, %edi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: movl %ecx, %edi
; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: addl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: addl %ebp, (%esp) # 4-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, %eax
-; X32-NEXT: adcl $0, %ecx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: adcl %edi, %ecx
+; X32-NEXT: adcl $0, %ebx
+; X32-NEXT: addl %esi, %eax
+; X32-NEXT: adcl %edi, %ebx
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload
-; X32-NEXT: adcl %edx, %esi
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl $0, %ebx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 1-byte Folded Reload
+; X32-NEXT: adcl %edx, %ebp
+; X32-NEXT: adcl $0, %ecx
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: adcl %ecx, %edx
+; X32-NEXT: addl %eax, %ecx
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %ebx, %edx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: adcl %ebp, %esi
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %ebx, %edi
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: pushl %eax
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: addb $127, %al
-; X32-NEXT: sahf
-; X32-NEXT: popl %eax
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: addb $255, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: movl (%esp), %eax # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edx, %eax
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
@@ -1124,76 +1089,74 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: movl %edi, %eax
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl 8(%ebp), %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl 12(%eax), %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: xorl %ecx, %ecx
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: addl %ebx, %ecx
-; X32-NEXT: adcl $0, %edx
+; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: movl %eax, %ebx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: addl %edi, %ecx
-; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %ebx, %edx
-; X32-NEXT: setb %cl
+; X32-NEXT: addl %edi, %ebx
+; X32-NEXT: adcl $0, %edx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: addl %esi, %ebx
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %edi, %edx
+; X32-NEXT: setb %bl
; X32-NEXT: addl %eax, %edx
-; X32-NEXT: movzbl %cl, %eax
-; X32-NEXT: adcl %esi, %eax
-; X32-NEXT: movl %edi, %esi
-; X32-NEXT: addl %esi, %edx
-; X32-NEXT: adcl %ebx, %eax
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: addl %edi, %ecx
+; X32-NEXT: movzbl %bl, %ebp
+; X32-NEXT: adcl %ecx, %ebp
+; X32-NEXT: movl %esi, %ecx
+; X32-NEXT: addl %ecx, %edx
+; X32-NEXT: adcl %edi, %ebp
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: addl %ecx, %esi
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: adcl %eax, %esi
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl %ecx, %edi
+; X32-NEXT: addl %eax, %edi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: adcl %esi, %ecx
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: adcl $0, %ecx
+; X32-NEXT: movl %ebp, %esi
; X32-NEXT: adcl $0, %esi
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: adcl $0, %ebx
-; X32-NEXT: addl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: addl %ebx, %edi
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: adcl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: adcl $0, %ecx
; X32-NEXT: adcl $0, %eax
-; X32-NEXT: addl %esi, %ecx
-; X32-NEXT: adcl %ebx, %eax
-; X32-NEXT: setb %bl
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movzbl %bl, %esi
-; X32-NEXT: adcl %edx, %esi
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: adcl $0, %edi
+; X32-NEXT: addl %ecx, %eax
+; X32-NEXT: adcl %esi, %edi
+; X32-NEXT: setb %cl
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movzbl %cl, %ecx
+; X32-NEXT: adcl %edx, %ecx
+; X32-NEXT: adcl $0, %ebp
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X32-NEXT: movl %edi, %edx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; X32-NEXT: addl %ecx, %ebx
+; X32-NEXT: addl %eax, %ebx
; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %eax, %edx
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: adcl %ecx, %esi
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: adcl %ebp, %edi
; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl %eax, %eax
-; X32-NEXT: addb $127, %al
-; X32-NEXT: sahf
+; X32-NEXT: addb $255, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
@@ -1204,59 +1167,60 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: adcl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl 12(%ebp), %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl 44(%eax), %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: xorl %ecx, %ecx
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: addl %esi, %ecx
+; X32-NEXT: movl %edx, %ebp
+; X32-NEXT: adcl $0, %ebp
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: addl %edi, %ebx
-; X32-NEXT: movl %edx, %esi
-; X32-NEXT: adcl $0, %esi
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: addl %ecx, %ebx
-; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %esi
-; X32-NEXT: setb %bl
-; X32-NEXT: addl %eax, %esi
-; X32-NEXT: movzbl %bl, %eax
+; X32-NEXT: addl %edi, %ecx
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %esi, %ebp
+; X32-NEXT: setb %cl
+; X32-NEXT: addl %eax, %ebp
+; X32-NEXT: movzbl %cl, %eax
; X32-NEXT: adcl %edx, %eax
-; X32-NEXT: movl %ecx, %edx
-; X32-NEXT: addl %edx, %esi
-; X32-NEXT: adcl %edi, %eax
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: movl %edi, %edx
+; X32-NEXT: addl %edx, %ebp
+; X32-NEXT: adcl %esi, %eax
+; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: addl %edi, %eax
+; X32-NEXT: addl %esi, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: addl %eax, %edx
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: addl %eax, %edi
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X32-NEXT: adcl %ecx, %edx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %esi, %edx
+; X32-NEXT: movl %ebp, %edx
; X32-NEXT: adcl $0, %edx
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %ebx, %edi
+; X32-NEXT: adcl $0, %edi
+; X32-NEXT: addl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: adcl $0, %ebx
-; X32-NEXT: addl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, %eax
; X32-NEXT: adcl $0, %ecx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: adcl %ebx, %ecx
+; X32-NEXT: addl %edx, %eax
+; X32-NEXT: adcl %edi, %ecx
; X32-NEXT: setb %dl
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: movzbl %dl, %edx
-; X32-NEXT: adcl %esi, %edx
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl %edi, %ebx
+; X32-NEXT: movzbl %dl, %eax
+; X32-NEXT: adcl %ebp, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: adcl $0, %ebp
+; X32-NEXT: movl %esi, %ebx
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
@@ -1264,67 +1228,60 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: addl %eax, %ebx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: adcl %eax, %edi
+; X32-NEXT: adcl %ebp, %esi
+; X32-NEXT: addb $255, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl %eax, %eax
-; X32-NEXT: addb $127, %al
-; X32-NEXT: sahf
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X32-NEXT: adcl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: adcl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl %eax, %eax
-; X32-NEXT: addb $127, %al
-; X32-NEXT: sahf
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: addb $255, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: adcl (%esp), %eax # 4-byte Folded Reload
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl 8(%ebp), %eax
-; X32-NEXT: movl 60(%eax), %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: mull %edi
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: mull %esi
+; X32-NEXT: movl %edx, %ebp
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl 60(%eax), %esi
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: mull %edi
; X32-NEXT: movl %edx, %edi
; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: addl %ebp, %ebx
; X32-NEXT: adcl $0, %edi
; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %ebp
; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %ecx
+; X32-NEXT: adcl %edi, %ebp
; X32-NEXT: setb %bl
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movzbl %bl, %ecx
-; X32-NEXT: adcl %ecx, %edx
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: mull %ecx
+; X32-NEXT: addl %ebp, %eax
+; X32-NEXT: movzbl %bl, %edi
+; X32-NEXT: adcl %edi, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
@@ -1333,64 +1290,64 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %edx, %esi
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: mull %ebx
+; X32-NEXT: movl %edx, %edi
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: movl %esi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %edi
+; X32-NEXT: mull %ebx
+; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl %edi, %ebp
+; X32-NEXT: adcl $0, %ebx
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %edi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl %ecx, %ebx
-; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %ecx
+; X32-NEXT: adcl %ebx, %edi
; X32-NEXT: setb %bl
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: mull %esi
-; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movzbl %bl, %ecx
-; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl %edi, %ebp
+; X32-NEXT: movzbl %bl, %eax
+; X32-NEXT: adcl %eax, %edx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: adcl $0, %ebx
-; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %edi
-; X32-NEXT: addl %ecx, %eax
+; X32-NEXT: movl %edx, %ecx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %ebx, %edi
-; X32-NEXT: setb %cl
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %esi
+; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl %ecx, %edi
+; X32-NEXT: adcl $0, %esi
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %ebx
; X32-NEXT: addl %edi, %eax
-; X32-NEXT: movzbl %cl, %ecx
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: adcl %esi, %ebx
+; X32-NEXT: setb (%esp) # 1-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: movzbl (%esp), %ecx # 1-byte Folded Reload
; X32-NEXT: adcl %ecx, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
@@ -1398,38 +1355,38 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: addl %eax, %ecx
; X32-NEXT: adcl %edx, %esi
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: addl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, %ecx
; X32-NEXT: adcl $0, %esi
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edx, %ebp
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %edi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl %ebp, %edi
; X32-NEXT: adcl $0, %edx
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: addl %ebx, %eax
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT: movl %edx, (%esp) # 4-byte Spill
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: mull %ebp
; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: addl %edi, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl (%esp), %ebx # 4-byte Folded Reload
+; X32-NEXT: setb (%esp) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %edi
+; X32-NEXT: mull %ebp
; X32-NEXT: addl %ebx, %eax
-; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 1-byte Folded Reload
+; X32-NEXT: movzbl (%esp), %edi # 1-byte Folded Reload
; X32-NEXT: adcl %edi, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
@@ -1452,193 +1409,192 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: movl %ecx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: movl %eax, %esi
-; X32-NEXT: addl %ecx, %esi
-; X32-NEXT: adcl $0, %ebx
-; X32-NEXT: movl %edi, %eax
+; X32-NEXT: mull %edi
+; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: mull %edi
+; X32-NEXT: movl %edx, %edi
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl %esi, %ebp
+; X32-NEXT: adcl $0, %edi
+; X32-NEXT: movl %ecx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %edi
-; X32-NEXT: addl %esi, %eax
+; X32-NEXT: movl %edx, %esi
+; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %ebx, %edi
-; X32-NEXT: setb %bl
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: adcl %edi, %esi
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: movl %ebx, %eax
; X32-NEXT: mull %ecx
-; X32-NEXT: addl %edi, %eax
-; X32-NEXT: movzbl %bl, %ecx
+; X32-NEXT: addl %esi, %eax
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
; X32-NEXT: adcl %ecx, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: addl %eax, %ecx
-; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %ecx, (%esp) # 4-byte Spill
; X32-NEXT: adcl %edx, %esi
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: movl %ecx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %edi
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %esi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %edi
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: adcl $0, %edi
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl %edi, %ebx
-; X32-NEXT: adcl $0, %esi
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %esi, %ecx
+; X32-NEXT: adcl %edi, %esi
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: mull %edi
-; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
-; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl %esi, %ebp
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X32-NEXT: adcl %eax, %edx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl $0, (%esp) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: movl %edi, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %edi
+; X32-NEXT: mull %esi
+; X32-NEXT: movl %edx, %ecx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: addl %edi, %ecx
-; X32-NEXT: adcl $0, %ebx
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
+; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %esi
-; X32-NEXT: addl %ecx, %eax
+; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: addl %ecx, %ebx
+; X32-NEXT: adcl $0, %esi
+; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %edi
+; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %ebx, %esi
-; X32-NEXT: setb %cl
+; X32-NEXT: adcl %esi, %edi
+; X32-NEXT: setb %bl
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: addl %esi, %eax
-; X32-NEXT: movzbl %cl, %ecx
+; X32-NEXT: mull %ecx
+; X32-NEXT: addl %edi, %eax
+; X32-NEXT: movzbl %bl, %ecx
; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: addl %eax, %edi
+; X32-NEXT: addl %eax, %ebx
; X32-NEXT: adcl %edx, %ecx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: addl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: adcl $0, %edi
+; X32-NEXT: adcl $0, %ebx
; X32-NEXT: adcl $0, %ecx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: addl (%esp), %ebx # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: setb (%esp) # 1-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: adcl $0, %edx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: movl %edx, %ebp
+; X32-NEXT: addl %edi, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload
; X32-NEXT: adcl %esi, %edx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: addl %eax, %ebx
-; X32-NEXT: adcl %edx, %esi
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: addl %eax, %ebp
+; X32-NEXT: adcl %edx, %edi
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X32-NEXT: addl %edi, %edx
+; X32-NEXT: addl %ebx, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: adcl %ecx, %esi
-; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
-; X32-NEXT: adcl %eax, %ebx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: adcl $0, %eax
+; X32-NEXT: movzbl (%esp), %eax # 1-byte Folded Reload
+; X32-NEXT: adcl %eax, %ebp
+; X32-NEXT: adcl $0, %edi
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: mull %esi
+; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X32-NEXT: adcl $0, %esi
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
-; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %esi, %ecx
-; X32-NEXT: setb %bl
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %edi
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: addl %esi, %ecx
+; X32-NEXT: adcl $0, %edi
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: mull %ebx
+; X32-NEXT: movl %edx, %esi
; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movzbl %bl, %ecx
+; X32-NEXT: movl %eax, (%esp) # 4-byte Spill
+; X32-NEXT: adcl %edi, %esi
+; X32-NEXT: setb %cl
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: mull %ebx
+; X32-NEXT: addl %esi, %eax
+; X32-NEXT: movzbl %cl, %ecx
; X32-NEXT: adcl %ecx, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
@@ -1648,35 +1604,37 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %edx, %esi
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edx, %ecx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl %ecx, %edi
; X32-NEXT: adcl $0, %esi
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: mull %edi
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: mull %ebx
; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: addl %edi, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %esi, %ecx
-; X32-NEXT: setb %bl
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movzbl %bl, %ecx
-; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: mull %ebx
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl %ecx, %edi
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X32-NEXT: adcl %eax, %edx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: adcl (%esp), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
@@ -1686,132 +1644,131 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %ebx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %edi
+; X32-NEXT: movl %edx, %ebp
; X32-NEXT: movl %eax, %ecx
; X32-NEXT: addl %ebx, %ecx
-; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl 12(%ebp), %eax
+; X32-NEXT: adcl $0, %ebp
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl 28(%eax), %ebx
; X32-NEXT: movl %esi, %eax
; X32-NEXT: mull %ebx
; X32-NEXT: movl %ebx, %esi
-; X32-NEXT: movl %esi, -48(%ebp) # 4-byte Spill
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edx, %ebx
; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %ebx
+; X32-NEXT: movl %eax, (%esp) # 4-byte Spill
+; X32-NEXT: adcl %ebp, %ebx
; X32-NEXT: setb %cl
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %esi
; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movzbl %cl, %ecx
; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: addl %eax, %ebx
+; X32-NEXT: addl %eax, %ebp
; X32-NEXT: adcl %edx, %ecx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: addl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: movl (%esp), %eax # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl $0, %ebx
+; X32-NEXT: movl %eax, (%esp) # 4-byte Spill
+; X32-NEXT: adcl $0, %ebp
; X32-NEXT: adcl $0, %ecx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: movl %edi, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: movl %eax, %edi
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: adcl $0, %edx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl %edi, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: addl %edi, %eax
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, %edi
-; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: setb %bl
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %esi
; X32-NEXT: addl %edi, %eax
-; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload
+; X32-NEXT: movzbl %bl, %esi
; X32-NEXT: adcl %esi, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: addl %eax, %edi
-; X32-NEXT: adcl %edx, %esi
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %edx, %ebx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X32-NEXT: addl %ebx, %edx
+; X32-NEXT: addl %ebp, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: adcl %ecx, %esi
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, %edi
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: adcl $0, %eax
+; X32-NEXT: adcl $0, %ebx
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: adcl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: adcl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: adcl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: adcl %eax, (%esp) # 4-byte Folded Spill
; X32-NEXT: adcl $0, %edx
; X32-NEXT: adcl $0, %esi
; X32-NEXT: adcl $0, %edi
-; X32-NEXT: adcl $0, %eax
+; X32-NEXT: adcl $0, %ebx
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: movl %edi, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %ecx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl %eax, %edi
-; X32-NEXT: addl %ecx, %edi
-; X32-NEXT: adcl $0, %esi
-; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: mull %ebx
+; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: movl %eax, %esi
+; X32-NEXT: addl %ecx, %esi
+; X32-NEXT: adcl $0, %ebx
+; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: mull %edi
; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %edi, %eax
+; X32-NEXT: addl %esi, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %esi, %ecx
-; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %ebx
+; X32-NEXT: adcl %ebx, %ecx
+; X32-NEXT: setb %bl
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: mull %edi
; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X32-NEXT: movzbl %bl, %ecx
; X32-NEXT: adcl %ecx, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
@@ -1821,81 +1778,81 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %edx, %esi
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %ecx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: movl %edi, %eax
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl %eax, %edi
-; X32-NEXT: addl %ecx, %edi
+; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: addl %ecx, %ebx
; X32-NEXT: adcl $0, %esi
-; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: mull %ebx
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: mull %ebp
; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %edi, %eax
+; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %esi, %ecx
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: movl %edi, %eax
-; X32-NEXT: mull %ebx
-; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
-; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: mull %ebp
+; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: addl %ecx, %ebx
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X32-NEXT: adcl %eax, %edx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edi, %eax
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %edi
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: addl %ebx, %ecx
-; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: mull %ebx
; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: adcl $0, %esi
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: mull %edi
+; X32-NEXT: movl %edx, %ebp
; X32-NEXT: addl %ecx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %esi
+; X32-NEXT: adcl %esi, %ebp
; X32-NEXT: setb %cl
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %ebx
-; X32-NEXT: addl %esi, %eax
+; X32-NEXT: mull %edi
+; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movzbl %cl, %ecx
; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: addl %eax, %ebx
+; X32-NEXT: addl %eax, %ebp
; X32-NEXT: adcl %edx, %ecx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: addl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: adcl $0, %ebx
+; X32-NEXT: adcl $0, %ebp
; X32-NEXT: adcl $0, %ecx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
@@ -1906,42 +1863,40 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: adcl $0, %edx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
+; X32-NEXT: movl %edx, %ebx
; X32-NEXT: addl %edi, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, %edi
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: addl %edi, %eax
+; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload
; X32-NEXT: adcl %esi, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: addl %eax, %edi
-; X32-NEXT: adcl %edx, %esi
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %edx, %ebx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X32-NEXT: addl %ebx, %edx
+; X32-NEXT: addl %ebp, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: adcl %ecx, %esi
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, %edi
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: adcl $0, %eax
+; X32-NEXT: adcl $0, %ebx
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: movl %edx, %ebp
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
@@ -1951,8 +1906,7 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
-; X32-NEXT: adcl %eax, %ebx
-; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %eax, %ebp
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl $0, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
@@ -1966,41 +1920,42 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: adcl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: mull %edi
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl 8(%ebp), %eax
-; X32-NEXT: movl 28(%eax), %eax
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl 28(%eax), %ecx
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: mull %edi
; X32-NEXT: movl %edx, %edi
; X32-NEXT: movl %eax, %ebx
; X32-NEXT: addl %esi, %ebx
; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: movl %edx, %ebp
; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %ecx
+; X32-NEXT: adcl %edi, %ebp
; X32-NEXT: setb %bl
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl %ecx, %eax
; X32-NEXT: mull %esi
-; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movzbl %bl, %ecx
-; X32-NEXT: adcl %ecx, %edx
+; X32-NEXT: addl %ebp, %eax
+; X32-NEXT: movzbl %bl, %edi
+; X32-NEXT: adcl %edi, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
@@ -2011,62 +1966,63 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %esi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: mull %ebx
; X32-NEXT: movl %edx, %edi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: mull %ebx
+; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl %edi, %ebp
+; X32-NEXT: adcl $0, %ebx
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %edi
+; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %ecx
-; X32-NEXT: setb %bl
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: mull %esi
-; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movzbl %bl, %ecx
-; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: adcl %ebx, %edi
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: movl %esi, %ebx
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl %edi, %ebp
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X32-NEXT: adcl %eax, %edx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: movl %esi, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: adcl $0, %ebx
-; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl %ecx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %edi
-; X32-NEXT: addl %ecx, %eax
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %ebx, %edi
-; X32-NEXT: setb %cl
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
; X32-NEXT: mull %esi
+; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: adcl $0, %esi
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %ebx
; X32-NEXT: addl %edi, %eax
-; X32-NEXT: movzbl %cl, %ecx
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: adcl %esi, %ebx
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
; X32-NEXT: adcl %ecx, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
@@ -2074,36 +2030,36 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: addl %eax, %ecx
; X32-NEXT: adcl %edx, %esi
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: adcl $0, %ecx
+; X32-NEXT: addl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl $0, %ecx
; X32-NEXT: adcl $0, %esi
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edx, %ebp
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %edi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl %ebp, %edi
; X32-NEXT: adcl $0, %edx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: addl %ebx, %eax
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: mull %ebp
; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: addl %edi, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %edi
+; X32-NEXT: mull %ebp
; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 1-byte Folded Reload
; X32-NEXT: adcl %edi, %edx
@@ -2128,31 +2084,31 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %esi
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: movl %eax, %esi
-; X32-NEXT: addl %ecx, %esi
-; X32-NEXT: adcl $0, %ebx
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %edi
-; X32-NEXT: addl %esi, %eax
+; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: addl %esi, %ebx
+; X32-NEXT: adcl $0, %edi
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %esi
+; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %ebx, %edi
+; X32-NEXT: adcl %edi, %esi
; X32-NEXT: setb %bl
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %ecx
-; X32-NEXT: addl %edi, %eax
-; X32-NEXT: movzbl %bl, %ecx
-; X32-NEXT: adcl %ecx, %edx
+; X32-NEXT: addl %esi, %eax
+; X32-NEXT: movzbl %bl, %esi
+; X32-NEXT: adcl %esi, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
@@ -2161,160 +2117,158 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %edx, %esi
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: movl %edi, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %edi
; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl %ecx, %ebx
-; X32-NEXT: adcl $0, %esi
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: adcl $0, %edi
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %esi
; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %esi, %ecx
+; X32-NEXT: adcl %edi, %esi
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: mull %edi
-; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
-; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: addl %esi, %ebx
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X32-NEXT: adcl %eax, %edx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: movl %edi, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: movl %esi, %eax
+; X32-NEXT: mull %esi
+; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: mull %esi
+; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl %ecx, %ebp
+; X32-NEXT: adcl $0, %esi
+; X32-NEXT: movl %edi, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %edi
+; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: addl %edi, %ecx
-; X32-NEXT: adcl $0, %ebx
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, %esi
-; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %ebx, %esi
-; X32-NEXT: setb %cl
+; X32-NEXT: adcl %esi, %edi
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: addl %esi, %eax
-; X32-NEXT: movzbl %cl, %ecx
+; X32-NEXT: mull %ecx
+; X32-NEXT: addl %edi, %eax
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
; X32-NEXT: adcl %ecx, %edx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: addl %eax, %edi
-; X32-NEXT: adcl %edx, %ecx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: addl %eax, %ebp
+; X32-NEXT: adcl %edx, %edi
+; X32-NEXT: addl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl $0, %ebp
; X32-NEXT: adcl $0, %edi
-; X32-NEXT: adcl $0, %ecx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: adcl $0, %edx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: addl %ebx, %eax
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: addl %ecx, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: setb %cl
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %esi
; X32-NEXT: addl %ebx, %eax
-; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload
+; X32-NEXT: movzbl %cl, %esi
; X32-NEXT: adcl %esi, %edx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: addl %eax, %ebx
-; X32-NEXT: adcl %edx, %esi
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: addl %eax, %esi
+; X32-NEXT: adcl %edx, %ebx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: addl %ebp, %ecx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X32-NEXT: addl %edi, %edx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: adcl %ecx, %esi
+; X32-NEXT: adcl %edi, %edx
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
-; X32-NEXT: adcl %eax, %ebx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: adcl $0, %eax
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT: adcl %eax, %esi
+; X32-NEXT: adcl $0, %ebx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: mull %esi
+; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X32-NEXT: adcl $0, %esi
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
-; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %esi, %ecx
-; X32-NEXT: setb %bl
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %edi
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: addl %esi, %ecx
+; X32-NEXT: adcl $0, %edi
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: mull %ebp
+; X32-NEXT: movl %edx, %esi
; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movzbl %bl, %ecx
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %edi, %esi
+; X32-NEXT: setb %cl
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: mull %ebp
+; X32-NEXT: addl %esi, %eax
+; X32-NEXT: movzbl %cl, %ecx
; X32-NEXT: adcl %ecx, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
@@ -2324,62 +2278,64 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %edx, %esi
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edx, %ecx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl %ecx, %edi
; X32-NEXT: adcl $0, %esi
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: mull %edi
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: mull %ebx
; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: addl %edi, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %esi, %ecx
-; X32-NEXT: setb %bl
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: mull %edi
-; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movzbl %bl, %ecx
-; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: movl %ebp, %esi
+; X32-NEXT: mull %ebx
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl %ecx, %edi
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X32-NEXT: adcl %eax, %edx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: movl %edx, %ebp
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %esi, %eax
; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %esi
; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: addl %ebx, %ecx
+; X32-NEXT: addl %ebp, %ecx
; X32-NEXT: adcl $0, %esi
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: mull %ebx
+; X32-NEXT: movl %edx, %ebp
; X32-NEXT: addl %ecx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %esi, %ebx
+; X32-NEXT: adcl %esi, %ebp
; X32-NEXT: setb %cl
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: mull %ebx
+; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movzbl %cl, %ecx
; X32-NEXT: adcl %ecx, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
@@ -2388,8 +2344,7 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: addl %eax, %ebx
; X32-NEXT: adcl %edx, %ecx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: addl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, %ebx
@@ -2397,24 +2352,25 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: movl %edi, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: movl %eax, %edi
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: adcl $0, %edx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl %edi, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: addl %edi, %eax
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, %edi
+; X32-NEXT: addl %ebp, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %esi
@@ -2423,43 +2379,41 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: adcl %esi, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: addl %eax, %edi
-; X32-NEXT: adcl %edx, %esi
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %edx, %ebp
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X32-NEXT: addl %ebx, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: adcl %ecx, %esi
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, %edi
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: adcl $0, %eax
+; X32-NEXT: adcl $0, %ebp
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: adcl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: adcl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: adcl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, %edx
; X32-NEXT: adcl $0, %esi
; X32-NEXT: adcl $0, %edi
-; X32-NEXT: adcl $0, %eax
+; X32-NEXT: adcl $0, %ebp
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: movl %ebx, %eax
@@ -2467,21 +2421,22 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %ecx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: movl %edi, %eax
; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl %eax, %edi
-; X32-NEXT: addl %ecx, %edi
-; X32-NEXT: adcl $0, %esi
+; X32-NEXT: movl %edx, %ebp
+; X32-NEXT: movl %eax, %esi
+; X32-NEXT: addl %ecx, %esi
+; X32-NEXT: adcl $0, %ebp
; X32-NEXT: movl %ebx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: mull %ebx
; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %edi, %eax
+; X32-NEXT: addl %esi, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %esi, %ecx
+; X32-NEXT: adcl %ebp, %ecx
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl %edi, %eax
; X32-NEXT: mull %ebx
; X32-NEXT: addl %ecx, %eax
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
@@ -2500,167 +2455,162 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %ecx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: movl %edi, %eax
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl %eax, %edi
-; X32-NEXT: addl %ecx, %edi
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl %ecx, %ebp
; X32-NEXT: adcl $0, %esi
; X32-NEXT: movl %ebx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: mull %ebx
; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %edi, %eax
+; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %esi, %ecx
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: movl %edi, %esi
; X32-NEXT: movl %esi, %eax
; X32-NEXT: mull %ebx
-; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
-; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl %ecx, %ebp
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X32-NEXT: adcl %eax, %edx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: movl %edi, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edx, %ebx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %esi, %eax
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %edi
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: mull %ebx
; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: addl %ebx, %ecx
+; X32-NEXT: adcl $0, %esi
+; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: mull %edi
+; X32-NEXT: movl %edx, %ebx
; X32-NEXT: addl %ecx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %esi
+; X32-NEXT: adcl %esi, %ebx
; X32-NEXT: setb %cl
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %ebx
-; X32-NEXT: addl %esi, %eax
+; X32-NEXT: mull %edi
+; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movzbl %cl, %ecx
; X32-NEXT: adcl %ecx, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: addl %eax, %ebx
-; X32-NEXT: adcl %edx, %edi
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl %edx, %ecx
+; X32-NEXT: addl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, %ebx
-; X32-NEXT: adcl $0, %edi
+; X32-NEXT: adcl $0, %ecx
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: adcl $0, %edx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: addl %ecx, %eax
+; X32-NEXT: movl %edx, %ebp
+; X32-NEXT: addl %edi, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: addl %ecx, %eax
+; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload
; X32-NEXT: adcl %esi, %edx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: addl %eax, %esi
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edx, %ecx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: addl %eax, %edi
+; X32-NEXT: adcl %edx, %ebp
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X32-NEXT: addl %ebx, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: adcl %edi, %esi
+; X32-NEXT: adcl %ecx, %esi
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: adcl %eax, %edi
-; X32-NEXT: adcl $0, %ecx
+; X32-NEXT: adcl $0, %ebp
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edx, %ebx
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: movl %ecx, %edi
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
-; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl %eax, %ebx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: adcl $0, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: adcl $0, %ebp
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: adcl $0, %edi
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl (%esp), %eax # 4-byte Folded Reload
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
@@ -2669,29 +2619,30 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: movl %ecx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: movl %edx, %ebp
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: movl %esi, %eax
; X32-NEXT: mull %edi
; X32-NEXT: movl %edx, %edi
; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl %ecx, %ebx
+; X32-NEXT: addl %ebp, %ebx
; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %ebp
; X32-NEXT: addl %ebx, %eax
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %ecx
+; X32-NEXT: movl %eax, (%esp) # 4-byte Spill
+; X32-NEXT: adcl %edi, %ebp
; X32-NEXT: setb %bl
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: addl %ecx, %eax
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: mull %ecx
+; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movzbl %bl, %ecx
; X32-NEXT: adcl %ecx, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
@@ -2702,81 +2653,82 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %edx, %esi
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: movl %ecx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %edi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: movl %edi, %eax
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: adcl $0, %ebx
+; X32-NEXT: movl %esi, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %ecx
-; X32-NEXT: setb %bl
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: movl %edi, %eax
+; X32-NEXT: adcl %ebx, %ecx
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: movl %edi, %ebp
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: mull %esi
-; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movzbl %bl, %ecx
-; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: addl %ecx, %ebx
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X32-NEXT: adcl %eax, %edx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: adcl (%esp), %edx # 4-byte Folded Reload
+; X32-NEXT: movl %edx, (%esp) # 4-byte Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: movl %edi, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: mull %esi
+; X32-NEXT: movl %edx, %ecx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: mull %esi
+; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl %ecx, %ebp
+; X32-NEXT: adcl $0, %esi
; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %edi
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: addl %ebx, %ecx
-; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: addl %ecx, %eax
+; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %ebx
-; X32-NEXT: setb %cl
+; X32-NEXT: adcl %esi, %edi
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: addl %ebx, %eax
-; X32-NEXT: movzbl %cl, %ecx
+; X32-NEXT: mull %ecx
+; X32-NEXT: addl %edi, %eax
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: addl %eax, %ebx
+; X32-NEXT: addl %eax, %ebp
; X32-NEXT: adcl %edx, %ecx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: addl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: movl (%esp), %eax # 4-byte Reload
; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: adcl $0, %ebx
+; X32-NEXT: adcl $0, %ebp
; X32-NEXT: adcl $0, %ecx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: setb (%esp) # 1-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
@@ -2787,67 +2739,66 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: adcl $0, %edx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
+; X32-NEXT: movl %edx, %ebx
; X32-NEXT: addl %edi, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, %edi
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: addl %edi, %eax
+; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload
; X32-NEXT: adcl %esi, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: addl %eax, %edi
-; X32-NEXT: adcl %edx, %esi
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %edx, %ebx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X32-NEXT: addl %ebx, %edx
+; X32-NEXT: addl %ebp, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: adcl %ecx, %esi
-; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X32-NEXT: movzbl (%esp), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, %edi
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: adcl $0, %eax
+; X32-NEXT: adcl $0, %ebx
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, %esi
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, (%esp) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %edi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %edi
; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl %esi, %ebx
+; X32-NEXT: addl (%esp), %ebx # 4-byte Folded Reload
; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl %edx, %ebp
; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %esi
+; X32-NEXT: adcl %edi, %ebp
; X32-NEXT: setb %bl
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl %esi, %eax
; X32-NEXT: mull %ecx
-; X32-NEXT: addl %esi, %eax
-; X32-NEXT: movzbl %bl, %ecx
-; X32-NEXT: adcl %ecx, %edx
+; X32-NEXT: addl %ebp, %eax
+; X32-NEXT: movzbl %bl, %edi
+; X32-NEXT: adcl %edi, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
@@ -2855,65 +2806,65 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: addl %eax, %ecx
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %edx, %esi
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: movl %esi, (%esp) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %edi
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %edi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl %edi, %ebp
+; X32-NEXT: adcl $0, %ebx
+; X32-NEXT: movl %esi, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: movl %edx, %edi
+; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %ecx
-; X32-NEXT: setb %bl
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: movl %edi, %eax
+; X32-NEXT: adcl %ebx, %edi
+; X32-NEXT: setb %cl
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
; X32-NEXT: mull %esi
-; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movzbl %bl, %ecx
-; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl %edi, %ebp
+; X32-NEXT: movzbl %cl, %eax
+; X32-NEXT: adcl %eax, %edx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: movl %esi, %eax
+; X32-NEXT: adcl $0, (%esp) # 4-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %edi
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: addl %ebx, %ecx
-; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl %ecx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: addl %ecx, %eax
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %ebx
-; X32-NEXT: setb %cl
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
; X32-NEXT: mull %esi
+; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: adcl $0, %esi
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: addl %edi, %eax
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: adcl %esi, %ebx
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: mull %ecx
; X32-NEXT: addl %ebx, %eax
-; X32-NEXT: movzbl %cl, %ecx
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
; X32-NEXT: adcl %ecx, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
@@ -2921,38 +2872,38 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: addl %eax, %esi
; X32-NEXT: adcl %edx, %ecx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: addl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, %esi
; X32-NEXT: adcl $0, %ecx
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: adcl (%esp), %ecx # 4-byte Folded Reload
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: mull %edi
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %eax, (%esp) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %edi
; X32-NEXT: movl %eax, %ebx
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: adcl $0, %edx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: mull %edi
+; X32-NEXT: movl %edx, %ebp
; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: setb %bl
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %edi
-; X32-NEXT: addl %ebx, %eax
-; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 1-byte Folded Reload
+; X32-NEXT: addl %ebp, %eax
+; X32-NEXT: movzbl %bl, %edi
; X32-NEXT: adcl %edi, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
@@ -2960,7 +2911,7 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: addl %eax, %edi
; X32-NEXT: adcl %edx, %ebx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-NEXT: movl (%esp), %edx # 4-byte Reload
; X32-NEXT: addl %esi, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: adcl %ecx, %esi
@@ -2972,7 +2923,7 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edx, (%esp) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
@@ -2983,30 +2934,31 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: movl %esi, %eax
-; X32-NEXT: mull %edi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %edi
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: movl %eax, %edi
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: addl %edi, %ecx
; X32-NEXT: adcl $0, %ebx
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
-; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %edi, %eax
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %ebx, %ecx
-; X32-NEXT: setb %bl
; X32-NEXT: movl %esi, %eax
-; X32-NEXT: mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: mull %esi
+; X32-NEXT: movl %edx, %edi
; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movzbl %bl, %ecx
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %ebx, %edi
+; X32-NEXT: setb %cl
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: mull %esi
+; X32-NEXT: addl %edi, %eax
+; X32-NEXT: movzbl %cl, %ecx
; X32-NEXT: adcl %ecx, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
@@ -3016,106 +2968,107 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %edx, %esi
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl %edx, %ecx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: movl %esi, %eax
; X32-NEXT: mull %edi
; X32-NEXT: movl %edx, %edi
; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl %esi, %ebx
+; X32-NEXT: addl %ecx, %ebx
; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: mull %ebp
; X32-NEXT: movl %edx, %ecx
; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %edi, %ecx
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: mull %esi
-; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
-; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl %esi, %edi
+; X32-NEXT: mull %ebp
+; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: addl %ecx, %ebx
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X32-NEXT: adcl %eax, %edx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: movl %esi, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl %edx, %ebp
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: addl %esi, %ecx
-; X32-NEXT: adcl $0, %ebx
-; X32-NEXT: movl 12(%ebp), %eax
-; X32-NEXT: movl 60(%eax), %esi
; X32-NEXT: movl %edi, %eax
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %edi
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: addl %ebp, %ecx
+; X32-NEXT: adcl $0, %edi
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl 60(%eax), %ebp
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: mull %ebp
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edx, %esi
; X32-NEXT: addl %ecx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %ebx, %edi
+; X32-NEXT: adcl %edi, %esi
; X32-NEXT: setb %cl
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: addl %edi, %eax
+; X32-NEXT: mull %ebp
+; X32-NEXT: addl %esi, %eax
; X32-NEXT: movzbl %cl, %ecx
; X32-NEXT: adcl %ecx, %edx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: addl %eax, %ecx
-; X32-NEXT: adcl %edx, %esi
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: addl %eax, %ebp
+; X32-NEXT: adcl %edx, %ecx
+; X32-NEXT: addl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl $0, %ebp
; X32-NEXT: adcl $0, %ecx
-; X32-NEXT: adcl $0, %esi
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: mull %esi
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: adcl $0, %edx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: addl %ebx, %eax
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: addl %edi, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %edi
+; X32-NEXT: mull %esi
; X32-NEXT: addl %ebx, %eax
-; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 1-byte Folded Reload
-; X32-NEXT: adcl %edi, %edx
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload
+; X32-NEXT: adcl %esi, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
@@ -3123,17 +3076,17 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: addl %eax, %edi
; X32-NEXT: adcl %edx, %ebx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X32-NEXT: addl %ecx, %edx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: adcl %esi, %ecx
+; X32-NEXT: addl %ebp, %edx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: adcl %ecx, %esi
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, %edi
; X32-NEXT: adcl $0, %ebx
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl (%esp), %eax # 4-byte Reload
; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
@@ -3142,13 +3095,13 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, %edx
-; X32-NEXT: adcl $0, %ecx
+; X32-NEXT: adcl $0, %esi
; X32-NEXT: adcl $0, %edi
; X32-NEXT: adcl $0, %ebx
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
@@ -3156,28 +3109,29 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %ecx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: mull %esi
+; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: movl %eax, (%esp) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %edi
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: addl %esi, %ecx
+; X32-NEXT: movl %eax, %esi
+; X32-NEXT: addl %ecx, %esi
; X32-NEXT: adcl $0, %edi
; X32-NEXT: movl %ebx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: mull %ebx
-; X32-NEXT: movl %edx, %esi
-; X32-NEXT: addl %ecx, %eax
+; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: addl %esi, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %esi
-; X32-NEXT: setb %cl
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: adcl %edi, %ecx
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: mull %ebx
-; X32-NEXT: addl %esi, %eax
-; X32-NEXT: movzbl %cl, %ecx
+; X32-NEXT: addl %ecx, %eax
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
; X32-NEXT: adcl %ecx, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
@@ -3193,7 +3147,8 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %ecx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %esi
; X32-NEXT: movl %eax, %edi
@@ -3207,107 +3162,104 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %esi, %ecx
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: mull %ebx
-; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
-; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl %ecx, %edi
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X32-NEXT: adcl %eax, %edx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: addl (%esp), %edi # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edx, (%esp) # 4-byte Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %ebx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %edi
+; X32-NEXT: movl %edx, %esi
; X32-NEXT: movl %eax, %ecx
; X32-NEXT: addl %ebx, %ecx
-; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl %esi, %eax
+; X32-NEXT: adcl $0, %esi
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: mull %ebx
-; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl %edx, %ebp
; X32-NEXT: addl %ecx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %esi
+; X32-NEXT: adcl %esi, %ebp
; X32-NEXT: setb %cl
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %ebx
-; X32-NEXT: addl %esi, %eax
+; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movzbl %cl, %ecx
; X32-NEXT: adcl %ecx, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: addl %eax, %ebx
-; X32-NEXT: adcl %edx, %ecx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: adcl %edx, %ebp
+; X32-NEXT: addl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: movl (%esp), %eax # 4-byte Reload
; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, %ebx
-; X32-NEXT: adcl $0, %ecx
+; X32-NEXT: adcl $0, %ebp
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: movl %ecx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edx, (%esp) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %esi
+; X32-NEXT: movl %edx, %esi
; X32-NEXT: movl %eax, %edi
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; X32-NEXT: adcl $0, %edx
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
+; X32-NEXT: addl (%esp), %edi # 4-byte Folded Reload
+; X32-NEXT: adcl $0, %esi
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
; X32-NEXT: addl %edi, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, %edi
-; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: addl %edi, %eax
-; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload
; X32-NEXT: adcl %esi, %edx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl %edx, %esi
+; X32-NEXT: setb (%esp) # 1-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: addl %esi, %eax
+; X32-NEXT: movzbl (%esp), %ecx # 1-byte Folded Reload
+; X32-NEXT: adcl %ecx, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: addl %eax, %edi
-; X32-NEXT: adcl %edx, %esi
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: addl %eax, %esi
+; X32-NEXT: adcl %edx, %edi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: addl %ebx, %ecx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X32-NEXT: addl %ebx, %edx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: adcl %ecx, %esi
+; X32-NEXT: adcl %ebp, %edx
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
-; X32-NEXT: adcl %eax, %edi
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: adcl $0, %eax
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: adcl %eax, %esi
+; X32-NEXT: adcl $0, %edi
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: movl %ecx, %ebx
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT: movl %edx, %ebp
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %esi, (%esp) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
@@ -3319,9 +3271,47 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, %ebx
; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl $0, %ebp
+; X32-NEXT: adcl $0, (%esp) # 4-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: adcl $0, %ebx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: movl (%esp), %eax # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: movl %eax, (%esp) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl $0, %ecx
+; X32-NEXT: adcl $0, %edx
+; X32-NEXT: adcl $0, %esi
+; X32-NEXT: adcl $0, %edi
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl $0, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: adcl $0, %ebp
+; X32-NEXT: movl (%esp), %ebx # 4-byte Reload
; X32-NEXT: adcl $0, %ebx
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
@@ -3332,84 +3322,40 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %ebx, (%esp) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X32-NEXT: addl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X32-NEXT: adcl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X32-NEXT: adcl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X32-NEXT: adcl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X32-NEXT: adcl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X32-NEXT: adcl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X32-NEXT: adcl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X32-NEXT: adcl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X32-NEXT: adcl $0, %edx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: adcl $0, %esi
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: adcl $0, %ebx
-; X32-NEXT: adcl $0, %ecx
-; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: adcl $0, %eax
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: movl %esi, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: mull %edi
+; X32-NEXT: movl %edx, %ebp
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %ecx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: mull %edi
; X32-NEXT: movl %edx, %edi
; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: addl %ebp, %ebx
; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl %ecx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl %edx, %ebp
; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %esi
+; X32-NEXT: adcl %edi, %ebp
; X32-NEXT: setb %bl
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl %esi, %eax
; X32-NEXT: mull %ecx
-; X32-NEXT: addl %esi, %eax
+; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movzbl %bl, %ecx
; X32-NEXT: adcl %ecx, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
@@ -3420,64 +3366,64 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %edx, %esi
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: movl %ecx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %edi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: movl %edi, %eax
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: adcl $0, %ebx
+; X32-NEXT: movl %esi, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %ecx
+; X32-NEXT: adcl %ebx, %ecx
; X32-NEXT: setb %bl
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: movl %edi, %eax
; X32-NEXT: mull %esi
-; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movzbl %bl, %ecx
-; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl %ecx, %ebp
+; X32-NEXT: movzbl %bl, %eax
+; X32-NEXT: adcl %eax, %edx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: movl %edi, %eax
-; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %edi
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: addl %esi, %ecx
-; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl %ebx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: addl %ecx, %eax
+; X32-NEXT: movl %edx, %ecx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %ebx
-; X32-NEXT: setb %cl
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %esi
+; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: addl %ecx, %ebx
+; X32-NEXT: adcl $0, %esi
+; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %edi
; X32-NEXT: addl %ebx, %eax
-; X32-NEXT: movzbl %cl, %ecx
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %esi, %edi
+; X32-NEXT: setb %bl
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: addl %edi, %eax
+; X32-NEXT: movzbl %bl, %ecx
; X32-NEXT: adcl %ecx, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
@@ -3485,8 +3431,7 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: addl %eax, %ebx
; X32-NEXT: adcl %edx, %ecx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: addl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, %ebx
@@ -3494,7 +3439,8 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
@@ -3505,133 +3451,133 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: adcl $0, %edx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
+; X32-NEXT: movl %edx, %ebp
; X32-NEXT: addl %edi, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, %edi
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: addl %edi, %eax
+; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload
; X32-NEXT: adcl %esi, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: addl %eax, %edi
-; X32-NEXT: adcl %edx, %esi
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %edx, %ebp
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X32-NEXT: addl %ebx, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: adcl %ecx, %esi
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, %edi
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: adcl $0, %eax
+; X32-NEXT: adcl $0, %ebp
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: movl %ecx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edx, %ebp
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: movl %esi, %eax
; X32-NEXT: mull %edi
; X32-NEXT: movl %edx, %edi
; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: addl %ebp, %ebx
; X32-NEXT: adcl $0, %edi
; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
-; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %ebp
; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %ecx
+; X32-NEXT: adcl %edi, %ebp
; X32-NEXT: setb %bl
; X32-NEXT: movl %esi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: addl %ecx, %eax
+; X32-NEXT: mull %ecx
+; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movzbl %bl, %ecx
; X32-NEXT: adcl %ecx, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: addl %eax, %ecx
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edx, %edi
-; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %edx, %esi
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: movl %esi, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
+; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, %edi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: movl %edi, %eax
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: adcl $0, %ebx
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %ecx
-; X32-NEXT: setb %bl
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: movl %edi, %eax
+; X32-NEXT: adcl %ebx, %ecx
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: movl %edi, %ebp
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: mull %esi
-; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movzbl %bl, %ecx
-; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: addl %ecx, %ebx
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X32-NEXT: adcl %eax, %edx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: movl %edi, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: mull %esi
+; X32-NEXT: movl %edx, %ecx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: mull %esi
+; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl %ecx, %ebp
+; X32-NEXT: adcl $0, %esi
; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %edi
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: addl %ebx, %ecx
-; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %ebx
-; X32-NEXT: setb %cl
+; X32-NEXT: addl %ebp, %eax
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: adcl %esi, %edi
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: addl %ebx, %eax
-; X32-NEXT: movzbl %cl, %ecx
+; X32-NEXT: mull %ecx
+; X32-NEXT: addl %edi, %eax
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
; X32-NEXT: adcl %ecx, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
@@ -3639,16 +3585,16 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: addl %eax, %ecx
; X32-NEXT: adcl %edx, %esi
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: addl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, %ecx
; X32-NEXT: adcl $0, %esi
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: mull %edi
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
@@ -3659,18 +3605,18 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: adcl $0, %edx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: mull %edi
+; X32-NEXT: movl %edx, %ebp
; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: setb %bl
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %edi
-; X32-NEXT: addl %ebx, %eax
-; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 1-byte Folded Reload
+; X32-NEXT: addl %ebp, %eax
+; X32-NEXT: movzbl %bl, %edi
; X32-NEXT: adcl %edi, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
@@ -3701,29 +3647,30 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: movl %esi, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl %edx, %edi
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %edi
+; X32-NEXT: movl %edx, %ebp
; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: addl %esi, %ecx
-; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: mull %ebx
-; X32-NEXT: movl %edx, %esi
+; X32-NEXT: addl %edi, %ecx
+; X32-NEXT: adcl $0, %ebp
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: mull %esi
+; X32-NEXT: movl %edx, %edi
; X32-NEXT: addl %ecx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %esi
+; X32-NEXT: adcl %ebp, %edi
; X32-NEXT: setb %cl
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %ebx
-; X32-NEXT: addl %esi, %eax
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: mull %esi
+; X32-NEXT: addl %edi, %eax
; X32-NEXT: movzbl %cl, %ecx
; X32-NEXT: adcl %ecx, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
@@ -3736,61 +3683,61 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: mull %edi
; X32-NEXT: movl %edx, %ecx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl %eax, %edi
-; X32-NEXT: addl %ecx, %edi
-; X32-NEXT: adcl $0, %esi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: mull %edi
+; X32-NEXT: movl %edx, %edi
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl %ecx, %ebp
+; X32-NEXT: adcl $0, %edi
; X32-NEXT: movl %ebx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: mull %ebx
; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %edi, %eax
+; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %esi, %ecx
+; X32-NEXT: adcl %edi, %ecx
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: movl %esi, %eax
; X32-NEXT: mull %ebx
-; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
-; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl %ecx, %ebp
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X32-NEXT: adcl %eax, %edx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: movl %esi, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %ebx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl %edx, %edi
; X32-NEXT: movl %eax, %ecx
; X32-NEXT: addl %ebx, %ecx
-; X32-NEXT: adcl $0, %esi
-; X32-NEXT: movl %edi, %eax
+; X32-NEXT: adcl $0, %edi
+; X32-NEXT: movl %esi, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: mull %ebx
-; X32-NEXT: movl %edx, %edi
+; X32-NEXT: movl %edx, %esi
; X32-NEXT: addl %ecx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %esi, %edi
+; X32-NEXT: adcl %edi, %esi
; X32-NEXT: setb %cl
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %ebx
-; X32-NEXT: addl %edi, %eax
+; X32-NEXT: addl %esi, %eax
; X32-NEXT: movzbl %cl, %ecx
; X32-NEXT: adcl %ecx, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
@@ -3799,8 +3746,7 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: addl %eax, %ebx
; X32-NEXT: adcl %edx, %ecx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: addl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, %ebx
@@ -3808,24 +3754,25 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: movl %edi, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: movl %eax, %edi
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: adcl $0, %edx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl %edi, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: addl %edi, %eax
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, %edi
+; X32-NEXT: addl %ebp, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %esi
@@ -3834,43 +3781,41 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: adcl %esi, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: addl %eax, %edi
-; X32-NEXT: adcl %edx, %esi
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %edx, %ebp
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X32-NEXT: addl %ebx, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: adcl %ecx, %esi
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, %edi
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: adcl $0, %eax
+; X32-NEXT: adcl $0, %ebp
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: adcl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: adcl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: adcl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, %edx
; X32-NEXT: adcl $0, %esi
; X32-NEXT: adcl $0, %edi
-; X32-NEXT: adcl $0, %eax
+; X32-NEXT: adcl $0, %ebp
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: movl %ebx, %eax
@@ -3878,21 +3823,22 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %ecx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: movl %edi, %eax
; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %edi
+; X32-NEXT: movl %edx, %ebp
; X32-NEXT: movl %eax, %esi
; X32-NEXT: addl %ecx, %esi
-; X32-NEXT: adcl $0, %edi
+; X32-NEXT: adcl $0, %ebp
; X32-NEXT: movl %ebx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: mull %ebx
; X32-NEXT: movl %edx, %ecx
; X32-NEXT: addl %esi, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %ecx
+; X32-NEXT: adcl %ebp, %ecx
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl %edi, %eax
; X32-NEXT: mull %ebx
; X32-NEXT: addl %ecx, %eax
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
@@ -3911,56 +3857,57 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %ecx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: movl %edi, %eax
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl %eax, %edi
-; X32-NEXT: addl %ecx, %edi
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl %ecx, %ebp
; X32-NEXT: adcl $0, %esi
; X32-NEXT: movl %ebx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: mull %ebx
; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %edi, %eax
+; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %esi, %ecx
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl %edi, %eax
; X32-NEXT: mull %ebx
-; X32-NEXT: movl %eax, %edi
-; X32-NEXT: addl %ecx, %edi
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl %ecx, %ebp
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, %edx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl %edi, %eax
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: movl %edx, %esi
; X32-NEXT: movl %eax, %ecx
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: adcl $0, %ebx
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
-; X32-NEXT: movl %edx, %esi
+; X32-NEXT: adcl $0, %esi
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: mull %edi
+; X32-NEXT: movl %edx, %ebx
; X32-NEXT: addl %ecx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %ebx, %esi
+; X32-NEXT: adcl %esi, %ebx
; X32-NEXT: setb %cl
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: mull %ebx
-; X32-NEXT: addl %esi, %eax
+; X32-NEXT: mull %edi
+; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movzbl %cl, %ecx
; X32-NEXT: adcl %ecx, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
@@ -3969,7 +3916,7 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: addl %eax, %esi
; X32-NEXT: adcl %edx, %ecx
-; X32-NEXT: addl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: addl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, %esi
@@ -3979,23 +3926,24 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edx, %ecx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %esi
; X32-NEXT: movl %eax, %edi
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: addl %ecx, %edi
; X32-NEXT: adcl $0, %esi
-; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: mull %ebx
; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %edi, %eax
-; X32-NEXT: movl %eax, %edi
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl %edi, %ebp
; X32-NEXT: adcl %esi, %ecx
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
@@ -4011,15 +3959,15 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: adcl %edx, %ecx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, %esi
; X32-NEXT: adcl $0, %ecx
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %esi, %ebp
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
@@ -4033,23 +3981,21 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, %ebx
; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl $0, %eax
+; X32-NEXT: adcl $0, %ebp
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: adcl $0, %ebx
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
@@ -4063,47 +4009,49 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl (%esp), %eax # 4-byte Reload
; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl $0, %edx
+; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, %esi
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, %edi
; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, %ecx
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl $0, %edx
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl $0, %ebp
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, %ebx
; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: movl %esi, %eax
; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %edi
; X32-NEXT: movl %eax, %ebx
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl %edx, %ebp
; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %esi
+; X32-NEXT: adcl %edi, %ebp
; X32-NEXT: setb %bl
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl %esi, %eax
; X32-NEXT: mull %ecx
-; X32-NEXT: addl %esi, %eax
+; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movzbl %bl, %ecx
; X32-NEXT: adcl %ecx, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
@@ -4114,28 +4062,28 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %edx, %esi
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: movl %esi, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %esi
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, %edi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl %ecx, %ebx
-; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl %esi, %eax
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl %esi, %ebp
+; X32-NEXT: adcl $0, %ebx
+; X32-NEXT: movl %edi, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %ecx
+; X32-NEXT: adcl %ebx, %ecx
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: mull %esi
; X32-NEXT: movl %eax, %ebx
; X32-NEXT: addl %ecx, %ebx
@@ -4148,70 +4096,72 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edi, %eax
-; X32-NEXT: mull %ecx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: mull %esi
+; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl %ecx, %ebp
; X32-NEXT: adcl $0, %esi
-; X32-NEXT: movl 8(%ebp), %eax
-; X32-NEXT: movl 76(%eax), %edx
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %edx
-; X32-NEXT: movl %edx, %edi
-; X32-NEXT: addl %ecx, %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl 76(%eax), %ecx
+; X32-NEXT: movl %edi, %eax
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %ecx, %edi
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %esi, %edi
-; X32-NEXT: setb %cl
+; X32-NEXT: adcl %esi, %ecx
+; X32-NEXT: setb (%esp) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
-; X32-NEXT: addl %edi, %eax
-; X32-NEXT: movzbl %cl, %ecx
+; X32-NEXT: mull %edi
+; X32-NEXT: addl %ecx, %eax
+; X32-NEXT: movzbl (%esp), %ecx # 1-byte Folded Reload
; X32-NEXT: adcl %ecx, %edx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: addl %eax, %ecx
-; X32-NEXT: adcl %edx, %esi
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: addl %eax, %ebp
+; X32-NEXT: adcl %edx, %ecx
; X32-NEXT: addl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl $0, %ebp
; X32-NEXT: adcl $0, %ecx
-; X32-NEXT: adcl $0, %esi
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: mull %esi
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: adcl $0, %edx
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: addl %ebx, %eax
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT: movl %edx, (%esp) # 4-byte Spill
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: addl %edi, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl (%esp), %ebx # 4-byte Folded Reload
+; X32-NEXT: setb (%esp) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %edi
+; X32-NEXT: mull %esi
; X32-NEXT: addl %ebx, %eax
-; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 1-byte Folded Reload
-; X32-NEXT: adcl %edi, %edx
+; X32-NEXT: movzbl (%esp), %esi # 1-byte Folded Reload
+; X32-NEXT: adcl %esi, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
@@ -4219,245 +4169,245 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: addl %eax, %edi
; X32-NEXT: adcl %edx, %ebx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X32-NEXT: addl %ecx, %edx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: adcl %esi, %ecx
+; X32-NEXT: addl %ebp, %edx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: adcl %ecx, %esi
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, %edi
; X32-NEXT: adcl $0, %ebx
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: movl %esi, %eax
; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %edi
; X32-NEXT: movl %eax, %ebx
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl %edx, %ebp
; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %esi
+; X32-NEXT: adcl %edi, %ebp
; X32-NEXT: setb %bl
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl %esi, %eax
; X32-NEXT: mull %ecx
-; X32-NEXT: addl %esi, %eax
-; X32-NEXT: movzbl %bl, %ecx
-; X32-NEXT: adcl %ecx, %edx
+; X32-NEXT: addl %ebp, %eax
+; X32-NEXT: movzbl %bl, %edi
+; X32-NEXT: adcl %edi, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: addl %eax, %ecx
-; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %ecx, (%esp) # 4-byte Spill
; X32-NEXT: adcl %edx, %esi
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: movl %ecx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %edi
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %esi
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl %edi, %ebp
+; X32-NEXT: adcl $0, %ebx
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %edi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %ecx
+; X32-NEXT: adcl %ebx, %edi
; X32-NEXT: setb %bl
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: mull %esi
-; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movzbl %bl, %ecx
-; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl %edi, %ebp
+; X32-NEXT: movzbl %bl, %eax
+; X32-NEXT: adcl %eax, %edx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl $0, (%esp) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: mull %edi
+; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %esi, %eax
+; X32-NEXT: mull %edi
+; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl %ecx, %edi
+; X32-NEXT: adcl $0, %esi
+; X32-NEXT: movl %ebx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: adcl $0, %ebx
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %edi
-; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %ebx, %edi
-; X32-NEXT: setb %cl
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %esi
; X32-NEXT: addl %edi, %eax
-; X32-NEXT: movzbl %cl, %ecx
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: adcl %esi, %ebx
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: addl %eax, %edi
+; X32-NEXT: addl %eax, %esi
; X32-NEXT: adcl %edx, %ecx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: adcl $0, %edi
+; X32-NEXT: addl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl $0, %esi
; X32-NEXT: adcl $0, %ecx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: addl (%esp), %esi # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: mull %edi
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %eax, (%esp) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %esi
+; X32-NEXT: mull %edi
; X32-NEXT: movl %eax, %ebx
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: adcl $0, %edx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: mull %edi
+; X32-NEXT: movl %edx, %ebp
; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: setb %bl
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: addl %ebx, %eax
-; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload
-; X32-NEXT: adcl %esi, %edx
+; X32-NEXT: mull %edi
+; X32-NEXT: addl %ebp, %eax
+; X32-NEXT: movzbl %bl, %edi
+; X32-NEXT: adcl %edi, %edx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: addl %eax, %ebx
-; X32-NEXT: adcl %edx, %esi
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X32-NEXT: addl %edi, %edx
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: addl %eax, %edi
+; X32-NEXT: adcl %edx, %ebx
+; X32-NEXT: movl (%esp), %edx # 4-byte Reload
+; X32-NEXT: addl %esi, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: adcl %ecx, %esi
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
-; X32-NEXT: adcl %eax, %ebx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: adcl $0, %eax
+; X32-NEXT: adcl %eax, %edi
+; X32-NEXT: adcl $0, %ebx
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edx, (%esp) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %edi
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: addl %edi, %ecx
+; X32-NEXT: adcl $0, %ebx
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: mull %ebp
+; X32-NEXT: movl %edx, %edi
+; X32-NEXT: addl %ecx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: movl %edi, %eax
+; X32-NEXT: adcl %ebx, %edi
+; X32-NEXT: setb %cl
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl %ebp, %esi
; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X32-NEXT: adcl $0, %esi
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
-; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %ebx, %eax
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %esi, %ecx
-; X32-NEXT: setb %bl
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movzbl %bl, %ecx
+; X32-NEXT: addl %edi, %eax
+; X32-NEXT: movzbl %cl, %ecx
; X32-NEXT: adcl %ecx, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: addl %eax, %ecx
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edx, %esi
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %edx, %edi
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: mull %edi
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: mull %edi
+; X32-NEXT: movl %edx, %edi
; X32-NEXT: movl %eax, %ebx
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X32-NEXT: adcl $0, %esi
+; X32-NEXT: adcl $0, %edi
; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: mull %edi
+; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %ecx
; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %esi, %ecx
+; X32-NEXT: adcl %edi, %ecx
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: mull %edi
-; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
-; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: mull %esi
+; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: addl %ecx, %ebx
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X32-NEXT: adcl %eax, %edx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
@@ -4468,67 +4418,66 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %edi
; X32-NEXT: movl %eax, %ecx
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl 8(%ebp), %eax
-; X32-NEXT: movl 92(%eax), %ebx
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl 92(%eax), %ebp
; X32-NEXT: movl %esi, %eax
-; X32-NEXT: mull %ebx
-; X32-NEXT: movl %ebx, %esi
-; X32-NEXT: movl %esi, -140(%ebp) # 4-byte Spill
-; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: mull %ebp
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edx, %esi
; X32-NEXT: addl %ecx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %ebx
+; X32-NEXT: adcl %edi, %esi
; X32-NEXT: setb %cl
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: mull %ebp
+; X32-NEXT: addl %esi, %eax
; X32-NEXT: movzbl %cl, %ecx
; X32-NEXT: adcl %ecx, %edx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: addl %eax, %ecx
-; X32-NEXT: adcl %edx, %esi
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: addl %eax, %ebp
+; X32-NEXT: adcl %edx, %ecx
+; X32-NEXT: addl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl $0, %ebp
; X32-NEXT: adcl $0, %ecx
-; X32-NEXT: adcl $0, %esi
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: mull %esi
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: adcl $0, %edx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: addl %ebx, %eax
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: addl %edi, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %edi
+; X32-NEXT: mull %esi
; X32-NEXT: addl %ebx, %eax
-; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 1-byte Folded Reload
-; X32-NEXT: adcl %edi, %edx
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload
+; X32-NEXT: adcl %esi, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
@@ -4536,17 +4485,17 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: addl %eax, %edi
; X32-NEXT: adcl %edx, %ebx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X32-NEXT: addl %ecx, %edx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: adcl %esi, %ecx
+; X32-NEXT: addl %ebp, %edx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: adcl %ecx, %esi
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, %edi
; X32-NEXT: adcl $0, %ebx
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl (%esp), %eax # 4-byte Reload
; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
@@ -4555,42 +4504,43 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, %edx
-; X32-NEXT: adcl $0, %ecx
+; X32-NEXT: adcl $0, %esi
; X32-NEXT: adcl $0, %edi
; X32-NEXT: adcl $0, %ebx
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: mull %ecx
+; X32-NEXT: mull %esi
+; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %edi
+; X32-NEXT: movl %eax, %esi
+; X32-NEXT: addl %ecx, %esi
+; X32-NEXT: adcl $0, %edi
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: mull %ebp
+; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: addl %esi, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: addl %edi, %ecx
-; X32-NEXT: adcl $0, %ebx
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, %esi
+; X32-NEXT: adcl %edi, %ecx
+; X32-NEXT: setb (%esp) # 1-byte Folded Spill
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: mull %ebp
; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %ebx, %esi
-; X32-NEXT: setb %cl
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: addl %esi, %eax
-; X32-NEXT: movzbl %cl, %ecx
+; X32-NEXT: movzbl (%esp), %ecx # 1-byte Folded Reload
; X32-NEXT: adcl %ecx, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
@@ -4600,63 +4550,63 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %edx, %ecx
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl %eax, (%esp) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl %ecx, %ebx
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl %ecx, %edi
; X32-NEXT: adcl $0, %esi
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: mull %ebx
; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: addl %edi, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %esi, %ecx
-; X32-NEXT: setb %bl
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: mull %edi
-; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movzbl %bl, %ecx
-; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: mull %ebx
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl %ecx, %edi
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X32-NEXT: adcl %eax, %edx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %edi
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: addl %edi, %ecx
-; X32-NEXT: adcl $0, %ebx
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
+; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: addl %ebx, %ecx
+; X32-NEXT: adcl $0, %esi
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: mull %ebx
+; X32-NEXT: movl %edx, %ebp
; X32-NEXT: addl %ecx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %ebx, %esi
+; X32-NEXT: adcl %esi, %ebp
; X32-NEXT: setb %cl
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: movl %edi, %ebx
-; X32-NEXT: addl %esi, %eax
+; X32-NEXT: mull %ebx
+; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movzbl %cl, %ecx
; X32-NEXT: adcl %ecx, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
@@ -4665,8 +4615,7 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: addl %eax, %esi
; X32-NEXT: adcl %edx, %ecx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: addl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, %esi
@@ -4682,7 +4631,8 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %esi
; X32-NEXT: movl %eax, %edi
@@ -4695,7 +4645,7 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: movl %eax, %edi
; X32-NEXT: adcl %esi, %ecx
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: mull %ebx
; X32-NEXT: addl %ecx, %eax
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
@@ -4708,56 +4658,59 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: adcl %edx, %ecx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl %edi, %ebp
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, %esi
; X32-NEXT: adcl $0, %ecx
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: addl %eax, (%esp) # 4-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, %edx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl $0, %ebp
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, %esi
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, %ecx
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: movl %esi, %eax
; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %edi
; X32-NEXT: movl %eax, %ebx
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %ebp
; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %ecx
+; X32-NEXT: adcl %edi, %ebp
; X32-NEXT: setb %bl
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movzbl %bl, %ecx
-; X32-NEXT: adcl %ecx, %edx
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: mull %ecx
+; X32-NEXT: addl %ebp, %eax
+; X32-NEXT: movzbl %bl, %edi
+; X32-NEXT: adcl %edi, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
@@ -4766,41 +4719,43 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %edx, %esi
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: mull %ebx
+; X32-NEXT: movl %edx, %ecx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %edi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl %ecx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: mull %ebx
+; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl %ecx, %ebp
+; X32-NEXT: adcl $0, %ebx
+; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %edi
+; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %ecx
+; X32-NEXT: adcl %ebx, %edi
; X32-NEXT: setb %bl
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movzbl %bl, %ecx
-; X32-NEXT: adcl %ecx, %edx
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl %esi, %ebp
+; X32-NEXT: mull %ecx
+; X32-NEXT: addl %edi, %eax
+; X32-NEXT: movzbl %bl, %edi
+; X32-NEXT: adcl %edi, %edx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: addl %eax, %esi
-; X32-NEXT: adcl %edx, %ecx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: addl %eax, %edi
+; X32-NEXT: adcl %edx, %esi
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
@@ -4809,7 +4764,7 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %ecx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %esi
; X32-NEXT: movl %eax, %edi
@@ -4823,7 +4778,7 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: movl %eax, %edi
; X32-NEXT: adcl %esi, %ecx
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: mull %ebx
; X32-NEXT: addl %ecx, %eax
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
@@ -4845,31 +4800,32 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: movl %ecx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: mull %edi
+; X32-NEXT: movl %edx, %ecx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: mull %edi
+; X32-NEXT: movl %edx, %edi
+; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: addl %ecx, %ebx
+; X32-NEXT: adcl $0, %edi
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl %eax, %edi
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; X32-NEXT: adcl $0, %esi
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %edi, %eax
+; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %esi, %ecx
-; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
+; X32-NEXT: adcl %edi, %ecx
+; X32-NEXT: setb %bl
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: mull %esi
+; X32-NEXT: movl %esi, %ebp
; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X32-NEXT: movzbl %bl, %ecx
; X32-NEXT: adcl %ecx, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
@@ -4887,15 +4843,14 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: adcl $0, %ecx
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl %edi, %ecx
-; X32-NEXT: imull %eax, %ecx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
+; X32-NEXT: imull %eax, %ebp
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: addl %ecx, %edx
-; X32-NEXT: imull {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: addl %edx, %esi
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: addl %ebp, %edx
+; X32-NEXT: imull {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: addl %edx, %ecx
+; X32-NEXT: movl %ecx, %ebp
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: movl %eax, %esi
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
@@ -4909,7 +4864,7 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: addl %edx, %esi
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: adcl %ebp, %esi
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edi, %eax
; X32-NEXT: movl %edi, %esi
@@ -4919,20 +4874,20 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %ebx, %eax
; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, %edi
+; X32-NEXT: movl %edx, %ebp
; X32-NEXT: movl %eax, %ebx
; X32-NEXT: addl %ecx, %ebx
-; X32-NEXT: adcl $0, %edi
+; X32-NEXT: adcl $0, %ebp
; X32-NEXT: movl %esi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: mull %ecx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: mull %edi
; X32-NEXT: movl %edx, %esi
; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %esi
+; X32-NEXT: adcl %ebp, %esi
; X32-NEXT: setb %bl
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %ecx
+; X32-NEXT: mull %edi
; X32-NEXT: addl %esi, %eax
; X32-NEXT: movzbl %bl, %ecx
; X32-NEXT: adcl %ecx, %edx
@@ -4940,9 +4895,10 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: imull %eax, %esi
+; X32-NEXT: imull %ebp, %esi
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
@@ -4951,62 +4907,62 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: addl %edx, %ecx
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl %eax, %esi
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: imull %ebx, %esi
+; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: imull %edi, %ebx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %eax, %edi
-; X32-NEXT: addl %esi, %edx
+; X32-NEXT: movl %eax, %esi
+; X32-NEXT: addl %ebx, %edx
; X32-NEXT: movl %ecx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: imull %eax, %ecx
; X32-NEXT: addl %edx, %ecx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
+; X32-NEXT: movl %eax, %esi
+; X32-NEXT: mull %ebp
; X32-NEXT: movl %edx, %ecx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %edi
-; X32-NEXT: movl %eax, %esi
-; X32-NEXT: addl %ecx, %esi
-; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X32-NEXT: movl %edi, %eax
+; X32-NEXT: mull %ebp
; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: addl %esi, %ecx
-; X32-NEXT: adcl %edi, %ebx
-; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl %ecx, %ebp
+; X32-NEXT: adcl $0, %ebx
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl %ebp, %edi
+; X32-NEXT: adcl %ebx, %esi
+; X32-NEXT: setb %bl
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
-; X32-NEXT: addl %ebx, %eax
-; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload
-; X32-NEXT: adcl %esi, %edx
+; X32-NEXT: mull %ecx
+; X32-NEXT: addl %esi, %eax
+; X32-NEXT: movzbl %bl, %ecx
+; X32-NEXT: adcl %ecx, %edx
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl 8(%ebp), %esi
-; X32-NEXT: movl 104(%esi), %ebx
-; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X32-NEXT: movl 104(%esi), %ebp
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: mull %edi
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
@@ -5015,274 +4971,274 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: mull %edi
; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl %eax, %edi
-; X32-NEXT: addl %ecx, %edi
+; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: addl %ecx, %ebx
; X32-NEXT: adcl $0, %esi
-; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: addl %edi, %eax
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: mull %edi
+; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %esi, %ebx
+; X32-NEXT: adcl %esi, %ecx
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %ecx
+; X32-NEXT: mull %edi
; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl %eax, %edi
-; X32-NEXT: addl %ebx, %edi
+; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: addl %ecx, %ebx
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, %esi
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: xorl %ecx, %ecx
; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: addl %edi, %eax
+; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %esi, %edx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl 8(%ebp), %ecx
-; X32-NEXT: movl 96(%ecx), %edi
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X32-NEXT: movl 96(%edi), %ebx
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl 100(%ecx), %eax
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: movl %eax, %esi
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: adcl $0, %ebx
+; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: movl 100(%edi), %edi
; X32-NEXT: movl %edi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %edi
-; X32-NEXT: addl %esi, %eax
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: mull %esi
+; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl %ecx, %ebp
+; X32-NEXT: adcl $0, %esi
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: mull %ebx
+; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %ebx, %edi
+; X32-NEXT: adcl %esi, %ecx
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: addl %edi, %ecx
-; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
-; X32-NEXT: adcl %eax, %esi
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: xorl %edx, %edx
-; X32-NEXT: mull %edx
+; X32-NEXT: movl %edi, %eax
+; X32-NEXT: mull %ebx
+; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl %ecx, %edi
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X32-NEXT: adcl %eax, %ebx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: xorl %ecx, %ecx
+; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: addl %eax, %edi
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: addl %ebp, %ecx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl %edx, %eax
-; X32-NEXT: addl %ecx, %edi
-; X32-NEXT: adcl %esi, %eax
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: addl %edi, %ecx
+; X32-NEXT: adcl %ebx, %eax
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: movl %esi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: mull %edi
+; X32-NEXT: movl %edx, %ecx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: mull %ecx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: mull %edi
; X32-NEXT: movl %edx, %edi
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: addl %ecx, %ebx
; X32-NEXT: adcl $0, %edi
; X32-NEXT: movl %esi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: addl %ecx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %esi
+; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %ebx
-; X32-NEXT: setb %cl
+; X32-NEXT: adcl %edi, %esi
+; X32-NEXT: setb %bl
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: addl %ebx, %eax
-; X32-NEXT: movzbl %cl, %ecx
+; X32-NEXT: mull %ecx
+; X32-NEXT: addl %esi, %eax
+; X32-NEXT: movzbl %bl, %ecx
; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: addl %eax, %esi
+; X32-NEXT: addl %eax, %ebp
; X32-NEXT: adcl %edx, %ecx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: adcl $0, %esi
+; X32-NEXT: adcl $0, %ebp
; X32-NEXT: adcl $0, %ecx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %esi
+; X32-NEXT: mull %edi
+; X32-NEXT: movl %edx, %ebp
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: mull %edi
+; X32-NEXT: movl %edx, %edi
; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X32-NEXT: adcl $0, %esi
+; X32-NEXT: addl %ebp, %ebx
+; X32-NEXT: adcl $0, %edi
; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: mull %ebp
; X32-NEXT: movl %edx, %ecx
; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %esi, %ecx
+; X32-NEXT: adcl %edi, %ecx
; X32-NEXT: setb %bl
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: mull %ebp
; X32-NEXT: addl %ecx, %eax
; X32-NEXT: movzbl %bl, %ecx
; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: addl %eax, %edi
+; X32-NEXT: addl %eax, %esi
; X32-NEXT: adcl %edx, %ecx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
-; X32-NEXT: adcl %eax, %edi
-; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %eax, %esi
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, %ecx
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl 8(%ebp), %ecx
-; X32-NEXT: movl 112(%ecx), %eax
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: imull %eax, %esi
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
+; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X32-NEXT: movl 112(%esi), %edi
+; X32-NEXT: imull %edi, %ebp
+; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: addl %esi, %edx
-; X32-NEXT: movl 116(%ecx), %eax
+; X32-NEXT: addl %ebp, %edx
+; X32-NEXT: movl 116(%esi), %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: imull %eax, %edi
-; X32-NEXT: addl %edx, %edi
-; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl 120(%ecx), %eax
+; X32-NEXT: imull %eax, %ecx
+; X32-NEXT: addl %edx, %ecx
; X32-NEXT: movl %ecx, %ebx
-; X32-NEXT: movl %eax, %edi
+; X32-NEXT: movl 120(%esi), %eax
+; X32-NEXT: movl %eax, %ecx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: imull %esi, %edi
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: mull %ecx
+; X32-NEXT: imull %esi, %ecx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: mull %ebp
+; X32-NEXT: addl %ecx, %edx
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: movl 124(%ecx), %ecx
+; X32-NEXT: imull %ebp, %ecx
+; X32-NEXT: addl %edx, %ecx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: addl %edi, %edx
-; X32-NEXT: movl 124(%ebx), %ebx
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: imull %eax, %ebx
-; X32-NEXT: addl %edx, %ebx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %ebx, %ecx
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: mull %edi
+; X32-NEXT: movl %edx, %ebp
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %esi, %eax
-; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: movl %eax, %edi
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; X32-NEXT: adcl $0, %ecx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X32-NEXT: mull %edi
; X32-NEXT: movl %edx, %esi
-; X32-NEXT: addl %edi, %eax
+; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: addl %ebp, %ebx
+; X32-NEXT: adcl $0, %esi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: mull %edi
+; X32-NEXT: movl %edx, %ebp
+; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %ecx, %esi
-; X32-NEXT: setb %cl
+; X32-NEXT: adcl %esi, %ebp
+; X32-NEXT: setb %bl
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
-; X32-NEXT: addl %esi, %eax
-; X32-NEXT: movzbl %cl, %ecx
-; X32-NEXT: adcl %ecx, %edx
+; X32-NEXT: mull %edi
+; X32-NEXT: addl %ebp, %eax
+; X32-NEXT: movzbl %bl, %esi
+; X32-NEXT: adcl %esi, %edx
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %ebx, %edx
+; X32-NEXT: adcl %ecx, %edx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: imull %eax, %edi
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: mull %ecx
+; X32-NEXT: imull %eax, %esi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: mull %ebx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: addl %edi, %edx
-; X32-NEXT: imull {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: addl %edx, %ecx
-; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: addl %esi, %edx
+; X32-NEXT: imull {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: addl %edx, %ebx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: imull %ebx, %ecx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
; X32-NEXT: movl %eax, %esi
-; X32-NEXT: addl %ecx, %edx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: imull %ebp, %esi
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: imull %edi, %ecx
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl %esi, %edx
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: imull %eax, %ecx
; X32-NEXT: addl %edx, %ecx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %ebx, %ecx
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: movl %edi, %ecx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: mull %esi
+; X32-NEXT: movl %edx, %ebx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, %edi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl %esi, %ebx
-; X32-NEXT: adcl $0, %edi
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: mull %esi
+; X32-NEXT: movl %edx, %ebp
+; X32-NEXT: movl %eax, %esi
+; X32-NEXT: addl %ebx, %esi
+; X32-NEXT: adcl $0, %ebp
; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
-; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: addl %ebx, %ecx
-; X32-NEXT: adcl %edi, %esi
-; X32-NEXT: setb %bl
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl %esi, %edi
+; X32-NEXT: adcl %ebp, %ebx
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
-; X32-NEXT: addl %esi, %eax
-; X32-NEXT: movzbl %bl, %esi
+; X32-NEXT: mull %ecx
+; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload
; X32-NEXT: adcl %esi, %edx
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: movl %edi, %ecx
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
@@ -5293,7 +5249,7 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edx, %ebp
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
@@ -5306,10 +5262,9 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: adcl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: addl (%esp), %eax # 4-byte Folded Reload
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
@@ -5321,37 +5276,38 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: movl %esi, %eax
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl 12(%ebp), %eax
-; X32-NEXT: movl 92(%eax), %eax
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl 92(%eax), %esi
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %edi
; X32-NEXT: movl %eax, %ebx
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %ebp
; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %ecx
+; X32-NEXT: adcl %edi, %ebp
; X32-NEXT: setb %bl
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movzbl %bl, %ecx
-; X32-NEXT: adcl %ecx, %edx
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: mull %ecx
+; X32-NEXT: addl %ebp, %eax
+; X32-NEXT: movzbl %bl, %edi
+; X32-NEXT: adcl %edi, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
@@ -5362,62 +5318,62 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %esi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: mull %ebx
; X32-NEXT: movl %edx, %edi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X32-NEXT: adcl $0, %edi
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: mull %ebx
+; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl %edi, %ebp
+; X32-NEXT: adcl $0, %ebx
; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %edi
+; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %ecx
+; X32-NEXT: adcl %ebx, %edi
; X32-NEXT: setb %bl
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: mull %esi
-; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movzbl %bl, %ecx
-; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl %edi, %ebp
+; X32-NEXT: movzbl %bl, %eax
+; X32-NEXT: adcl %eax, %edx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: adcl $0, %ebx
-; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %edi
-; X32-NEXT: addl %ecx, %eax
+; X32-NEXT: movl %edx, %ecx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %ebx, %edi
-; X32-NEXT: setb %cl
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %esi
+; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl %ecx, %edi
+; X32-NEXT: adcl $0, %esi
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %ebx
; X32-NEXT: addl %edi, %eax
-; X32-NEXT: movzbl %cl, %ecx
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: adcl %esi, %ebx
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
; X32-NEXT: adcl %ecx, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
@@ -5425,36 +5381,36 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: addl %eax, %ecx
; X32-NEXT: adcl %edx, %esi
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: addl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, %ecx
; X32-NEXT: adcl $0, %esi
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edx, %ebp
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %edi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl %ebp, %edi
; X32-NEXT: adcl $0, %edx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: addl %ebx, %eax
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: mull %ebp
; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: addl %edi, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %edi
+; X32-NEXT: mull %ebp
; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 1-byte Folded Reload
; X32-NEXT: adcl %edi, %edx
@@ -5479,32 +5435,32 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl 12(%ebp), %eax
+; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl 76(%eax), %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %edi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl %esi, %ebp
; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: mull %ebx
+; X32-NEXT: movl %edx, %esi
+; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %ecx
-; X32-NEXT: setb %bl
+; X32-NEXT: adcl %edi, %esi
+; X32-NEXT: setb %cl
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movzbl %bl, %ecx
+; X32-NEXT: mull %ebx
+; X32-NEXT: addl %esi, %eax
+; X32-NEXT: movzbl %cl, %ecx
; X32-NEXT: adcl %ecx, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
@@ -5514,157 +5470,158 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %edx, %esi
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: movl %ecx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %esi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %edi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %esi
+; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %ecx
-; X32-NEXT: setb %bl
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: mull %esi
-; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movzbl %bl, %ecx
-; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: adcl %edi, %esi
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl %esi, %ebp
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X32-NEXT: adcl %eax, %edx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: movl %edi, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: mull %esi
+; X32-NEXT: movl %edx, %ecx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: mull %esi
+; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: addl %ecx, %ebx
+; X32-NEXT: adcl $0, %esi
; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: adcl $0, %ebx
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %edi
-; X32-NEXT: addl %ecx, %eax
+; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %ebx, %edi
-; X32-NEXT: setb %cl
+; X32-NEXT: adcl %esi, %edi
+; X32-NEXT: setb %bl
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %esi
+; X32-NEXT: mull %ecx
; X32-NEXT: addl %edi, %eax
-; X32-NEXT: movzbl %cl, %ecx
+; X32-NEXT: movzbl %bl, %ecx
; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: addl %eax, %esi
+; X32-NEXT: addl %eax, %ebx
; X32-NEXT: adcl %edx, %ecx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: addl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: adcl $0, %esi
+; X32-NEXT: adcl $0, %ebx
; X32-NEXT: adcl $0, %ecx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: mull %esi
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: adcl $0, %edx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: mull %esi
+; X32-NEXT: movl %edx, %ebp
+; X32-NEXT: addl %edi, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: addl %ebx, %eax
-; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 1-byte Folded Reload
-; X32-NEXT: adcl %edi, %edx
+; X32-NEXT: mull %esi
+; X32-NEXT: addl %ebp, %eax
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload
+; X32-NEXT: adcl %esi, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: addl %eax, %edi
-; X32-NEXT: adcl %edx, %ebx
+; X32-NEXT: adcl %edx, %ebp
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X32-NEXT: addl %esi, %edx
+; X32-NEXT: addl %ebx, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: adcl %ecx, %esi
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, %edi
-; X32-NEXT: adcl $0, %ebx
+; X32-NEXT: adcl $0, %ebp
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, %edi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl %ecx, %ebx
-; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %esi
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %ecx
-; X32-NEXT: setb %bl
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %esi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %edi
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: addl %esi, %ecx
+; X32-NEXT: adcl $0, %edi
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: mull %ebx
+; X32-NEXT: movl %edx, %esi
; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movzbl %bl, %ecx
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %edi, %esi
+; X32-NEXT: setb %cl
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: mull %ebx
+; X32-NEXT: addl %esi, %eax
+; X32-NEXT: movzbl %cl, %ecx
; X32-NEXT: adcl %ecx, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
@@ -5674,104 +5631,105 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %edx, %esi
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, %edi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl %esi, %ebx
-; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %ecx
-; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: mull %esi
-; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
-; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl %ecx, %edi
+; X32-NEXT: adcl $0, %esi
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: mull %ebx
+; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: addl %edi, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %esi, %ecx
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: movl %ebp, %esi
+; X32-NEXT: mull %ebx
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl %ecx, %edi
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X32-NEXT: adcl %eax, %edx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %edi
+; X32-NEXT: movl %edx, %ebx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl %esi, %eax
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: movl %edx, %esi
; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: addl %edi, %ecx
-; X32-NEXT: adcl $0, %ebx
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %edi
+; X32-NEXT: addl %ebx, %ecx
+; X32-NEXT: adcl $0, %esi
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: mull %ebp
+; X32-NEXT: movl %edx, %ebx
; X32-NEXT: addl %ecx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %ebx, %edi
+; X32-NEXT: adcl %esi, %ebx
; X32-NEXT: setb %cl
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: addl %edi, %eax
+; X32-NEXT: mull %ebp
+; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movzbl %cl, %ecx
; X32-NEXT: adcl %ecx, %edx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: addl %eax, %ecx
-; X32-NEXT: adcl %edx, %esi
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: addl %eax, %ebp
+; X32-NEXT: adcl %edx, %ecx
+; X32-NEXT: addl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl $0, %ebp
; X32-NEXT: adcl $0, %ecx
-; X32-NEXT: adcl $0, %esi
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
+; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %edi
+; X32-NEXT: mull %esi
; X32-NEXT: movl %eax, %ebx
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: adcl $0, %edx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
+; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: mull %esi
+; X32-NEXT: movl %edx, %edi
; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: setb %bl
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: addl %ebx, %eax
-; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 1-byte Folded Reload
-; X32-NEXT: adcl %edi, %edx
+; X32-NEXT: mull %esi
+; X32-NEXT: addl %edi, %eax
+; X32-NEXT: movzbl %bl, %esi
+; X32-NEXT: adcl %esi, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
@@ -5779,14 +5737,14 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: addl %eax, %edi
; X32-NEXT: adcl %edx, %ebx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X32-NEXT: addl %ecx, %edx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: adcl %esi, %ecx
+; X32-NEXT: addl %ebp, %edx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: adcl %ecx, %esi
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, %edi
; X32-NEXT: adcl $0, %ebx
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
@@ -5798,42 +5756,43 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, %edx
-; X32-NEXT: adcl $0, %ecx
+; X32-NEXT: adcl $0, %esi
; X32-NEXT: adcl $0, %edi
; X32-NEXT: adcl $0, %ebx
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: movl %edi, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %ecx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %edi
+; X32-NEXT: movl %edx, %ebx
; X32-NEXT: movl %eax, %esi
; X32-NEXT: addl %ecx, %esi
-; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: mull %ebx
+; X32-NEXT: adcl $0, %ebx
+; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: mull %edi
; X32-NEXT: movl %edx, %ecx
; X32-NEXT: addl %esi, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %ecx
-; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %ebx
+; X32-NEXT: adcl %ebx, %ecx
+; X32-NEXT: setb %bl
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: mull %edi
; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X32-NEXT: movzbl %bl, %ecx
; X32-NEXT: adcl %ecx, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
@@ -5843,61 +5802,64 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %edx, %ecx
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %ecx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: movl %edi, %eax
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl %eax, %edi
-; X32-NEXT: addl %ecx, %edi
+; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: addl %ecx, %ebx
; X32-NEXT: adcl $0, %esi
-; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: mull %ebx
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: mull %ebp
; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %edi, %eax
+; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %esi, %ecx
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %ebx
-; X32-NEXT: movl %eax, %edi
-; X32-NEXT: addl %ecx, %edi
+; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl %edi, %esi
+; X32-NEXT: mull %ebp
+; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: addl %ecx, %ebx
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, %edx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: movl %edi, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edx, %ebp
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: adcl $0, %ebx
; X32-NEXT: movl %esi, %eax
-; X32-NEXT: mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %esi
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: addl %ebp, %ecx
+; X32-NEXT: adcl $0, %esi
+; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: mull %edi
+; X32-NEXT: movl %edx, %ebp
; X32-NEXT: addl %ecx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %ebx, %esi
+; X32-NEXT: adcl %esi, %ebp
; X32-NEXT: setb %cl
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
-; X32-NEXT: addl %esi, %eax
+; X32-NEXT: mull %edi
+; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movzbl %cl, %ecx
; X32-NEXT: adcl %ecx, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
@@ -5906,7 +5868,7 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: addl %eax, %esi
; X32-NEXT: adcl %edx, %ecx
-; X32-NEXT: addl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: addl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, %esi
@@ -5916,28 +5878,29 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edx, %ecx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: movl %ebx, %eax
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %esi
; X32-NEXT: movl %eax, %edi
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: addl %ecx, %edi
; X32-NEXT: adcl $0, %esi
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: mull %ebp
; X32-NEXT: movl %edx, %ecx
; X32-NEXT: addl %edi, %eax
; X32-NEXT: movl %eax, %edi
; X32-NEXT: adcl %esi, %ecx
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X32-NEXT: mull %ebp
; X32-NEXT: addl %ecx, %eax
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
; X32-NEXT: adcl %ecx, %edx
@@ -5974,74 +5937,77 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, %ecx
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl 12(%ebp), %ebx
-; X32-NEXT: movl 96(%ebx), %ecx
-; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %edi
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl 100(%ebx), %ebx
-; X32-NEXT: movl %esi, %eax
-; X32-NEXT: mull %ebx
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: movl 96(%ecx), %ebx
; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: mull %ebx
; X32-NEXT: movl %edx, %esi
-; X32-NEXT: addl %ecx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %esi
-; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: movl %edi, %eax
; X32-NEXT: mull %ebx
+; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl %esi, %ebp
+; X32-NEXT: adcl $0, %ebx
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl 100(%eax), %esi
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: mull %esi
+; X32-NEXT: movl %esi, %ecx
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edx, %esi
+; X32-NEXT: addl %ebp, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl %ebx, %esi
+; X32-NEXT: setb %bl
+; X32-NEXT: movl %edi, %eax
+; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl %esi, %ebx
-; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl %esi, %edi
+; X32-NEXT: movzbl %bl, %eax
; X32-NEXT: adcl %eax, %ecx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: xorl %edx, %edx
; X32-NEXT: mull %edx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: addl %eax, %edi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: addl %eax, %ebx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: adcl %edx, %esi
-; X32-NEXT: addl %ebx, %edi
-; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: addl %edi, %ebx
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %ecx, %esi
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: mull %ebp
+; X32-NEXT: movl %edx, %ecx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %ecx
+; X32-NEXT: mull %ebp
; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: addl %ebx, %ecx
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl %ecx, %edi
; X32-NEXT: adcl $0, %esi
-; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl %ebx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: mull %ebx
-; X32-NEXT: movl %edx, %edi
-; X32-NEXT: addl %ecx, %eax
+; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: addl %edi, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %esi, %edi
-; X32-NEXT: setb %cl
+; X32-NEXT: adcl %esi, %ecx
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: movl %esi, %eax
; X32-NEXT: mull %ebx
-; X32-NEXT: addl %edi, %eax
-; X32-NEXT: movzbl %cl, %ecx
+; X32-NEXT: addl %ecx, %eax
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
; X32-NEXT: adcl %ecx, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
@@ -6055,37 +6021,35 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl 12(%ebp), %eax
-; X32-NEXT: movl 104(%eax), %ecx
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X32-NEXT: movl 104(%ebp), %ecx
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: movl %edi, %eax
; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %esi, %eax
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, %edi
+; X32-NEXT: movl %edx, %ebx
; X32-NEXT: movl %eax, %ecx
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl 12(%ebp), %eax
-; X32-NEXT: movl 108(%eax), %edx
-; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: movl %ebx, -112(%ebp) # 4-byte Spill
-; X32-NEXT: mull %ebx
-; X32-NEXT: movl %edx, %esi
+; X32-NEXT: adcl $0, %ebx
+; X32-NEXT: movl 108(%ebp), %esi
+; X32-NEXT: movl %edi, %eax
+; X32-NEXT: mull %esi
+; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edx, %edi
; X32-NEXT: addl %ecx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %esi
-; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: adcl %ebx, %edi
+; X32-NEXT: setb %bl
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %ebx
+; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: movl %eax, %edi
-; X32-NEXT: addl %esi, %edi
-; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X32-NEXT: movl %eax, %esi
+; X32-NEXT: addl %edi, %esi
+; X32-NEXT: movzbl %bl, %eax
; X32-NEXT: adcl %eax, %ecx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: movl %ebx, %eax
@@ -6093,20 +6057,20 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: mull %edx
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: addl %eax, %esi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: addl %eax, %edi
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl %edx, %eax
-; X32-NEXT: addl %edi, %esi
+; X32-NEXT: addl %esi, %edi
; X32-NEXT: adcl %ecx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: adcl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: adcl $0, %esi
+; X32-NEXT: adcl $0, %edi
; X32-NEXT: adcl $0, %eax
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
@@ -6114,25 +6078,26 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: movl %ecx, %eax
; X32-NEXT: movl %ebx, %esi
; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edx, %ebx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: movl %edi, %eax
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl %eax, %edi
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl %ebx, %ebp
; X32-NEXT: adcl $0, %esi
; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: mull %ebx
; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %edi, %eax
-; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl %ebp, %eax
+; X32-NEXT: movl %eax, %ebp
; X32-NEXT: adcl %esi, %ecx
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: mull %ebx
+; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl %ebx, %edi
+; X32-NEXT: mull %edi
; X32-NEXT: addl %ecx, %eax
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
; X32-NEXT: adcl %ecx, %edx
@@ -6144,15 +6109,15 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: adcl %edx, %ecx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, %esi
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, %ecx
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl %ebx, %ecx
+; X32-NEXT: movl %edi, %ecx
; X32-NEXT: imull %eax, %ecx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
@@ -6160,51 +6125,50 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: addl %ecx, %edx
; X32-NEXT: imull {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: addl %edx, %esi
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %esi, %ebx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: movl %eax, %esi
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: imull %ebx, %esi
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: imull %edi, %esi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %eax, %ebp
; X32-NEXT: addl %esi, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: imull %edi, %esi
+; X32-NEXT: imull %ecx, %esi
; X32-NEXT: addl %edx, %esi
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: adcl %ebx, %esi
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %ebx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: mull %esi
+; X32-NEXT: movl %edi, %eax
+; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl %ecx, %ebx
+; X32-NEXT: movl %eax, %edi
+; X32-NEXT: addl %ebx, %edi
; X32-NEXT: adcl $0, %esi
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: addl %edi, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %esi, %ecx
-; X32-NEXT: setb %bl
+; X32-NEXT: adcl %esi, %ebx
+; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movzbl %bl, %ecx
+; X32-NEXT: mull %ecx
+; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
; X32-NEXT: adcl %ecx, %edx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl 12(%ebp), %edx
+; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
; X32-NEXT: movl 124(%edx), %ecx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: imull %eax, %ecx
@@ -6215,84 +6179,86 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: addl %ecx, %edx
; X32-NEXT: imull {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: addl %edx, %esi
-; X32-NEXT: movl 112(%edi), %ebx
-; X32-NEXT: movl 116(%edi), %ecx
-; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl 112(%edi), %ebp
+; X32-NEXT: movl 116(%edi), %ebx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: movl %eax, %edi
-; X32-NEXT: imull %ecx, %edi
-; X32-NEXT: mull %ebx
+; X32-NEXT: imull %ebx, %edi
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: mull %ebp
; X32-NEXT: addl %edi, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: imull %ebx, %ecx
+; X32-NEXT: imull %ebp, %ecx
; X32-NEXT: addl %edx, %ecx
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %esi, %ecx
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edx, %esi
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl %ebx, %eax
; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %edi
-; X32-NEXT: movl %eax, %esi
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: addl %esi, %ebx
; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
-; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: addl %esi, %ecx
-; X32-NEXT: adcl %edi, %ebx
-; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: mull %esi
+; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl %ebx, %ebp
+; X32-NEXT: adcl %edi, %ecx
+; X32-NEXT: setb %bl
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
-; X32-NEXT: addl %ebx, %eax
-; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload
-; X32-NEXT: adcl %esi, %edx
+; X32-NEXT: mull %esi
+; X32-NEXT: addl %ecx, %eax
+; X32-NEXT: movzbl %bl, %ecx
+; X32-NEXT: adcl %ecx, %edx
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %edi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: addl %edi, %ebx
+; X32-NEXT: adcl $0, %ecx
+; X32-NEXT: movl %ebp, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: mull %edi
+; X32-NEXT: movl %edx, %ebp
; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %ecx
+; X32-NEXT: adcl %ecx, %ebp
; X32-NEXT: setb %bl
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movzbl %bl, %ecx
-; X32-NEXT: adcl %ecx, %edx
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: mull %edi
+; X32-NEXT: addl %ebp, %eax
+; X32-NEXT: movzbl %bl, %edi
+; X32-NEXT: adcl %edi, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
@@ -6301,41 +6267,43 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl %edx, %esi
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: mull %ebx
+; X32-NEXT: movl %edx, %ecx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %edi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl %ecx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %ecx
-; X32-NEXT: addl %ebx, %eax
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: mull %ebx
+; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl %ecx, %ebp
+; X32-NEXT: adcl $0, %ebx
+; X32-NEXT: movl %edi, %eax
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %edi
+; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %edi, %ecx
+; X32-NEXT: adcl %ebx, %edi
; X32-NEXT: setb %bl
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movzbl %bl, %ecx
-; X32-NEXT: adcl %ecx, %edx
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: movl %esi, %ebp
+; X32-NEXT: mull %ecx
+; X32-NEXT: addl %edi, %eax
+; X32-NEXT: movzbl %bl, %edi
+; X32-NEXT: adcl %edi, %edx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: addl %eax, %esi
-; X32-NEXT: adcl %edx, %ecx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: addl %eax, %edi
+; X32-NEXT: adcl %edx, %esi
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
@@ -6344,7 +6312,7 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %ecx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %esi
; X32-NEXT: movl %eax, %edi
@@ -6358,7 +6326,7 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: movl %eax, %edi
; X32-NEXT: adcl %esi, %ecx
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: mull %ebx
; X32-NEXT: addl %ecx, %eax
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
@@ -6384,25 +6352,25 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: movl %ecx, %eax
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %edx, %ebp
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: movl %ebx, %eax
; X32-NEXT: mull %esi
; X32-NEXT: movl %edx, %esi
; X32-NEXT: movl %eax, %edi
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: addl %ebp, %edi
; X32-NEXT: adcl $0, %esi
; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: mull %ebp
; X32-NEXT: movl %edx, %ecx
; X32-NEXT: addl %edi, %eax
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %eax, %edi
; X32-NEXT: adcl %esi, %ecx
; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
+; X32-NEXT: mull %ebp
; X32-NEXT: addl %ecx, %eax
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
; X32-NEXT: adcl %ecx, %edx
@@ -6414,61 +6382,62 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: adcl %edx, %ecx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X32-NEXT: adcl %eax, %esi
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl $0, %ecx
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl %edi, %ecx
+; X32-NEXT: movl %ebp, %ecx
; X32-NEXT: imull %eax, %ecx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: mull %esi
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: addl %ecx, %edx
-; X32-NEXT: imull {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: imull %ebp, %esi
; X32-NEXT: addl %edx, %esi
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl %eax, %esi
+; X32-NEXT: movl %eax, %edi
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: imull %ebx, %esi
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: mull %edi
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: addl %esi, %edx
+; X32-NEXT: imull %ebx, %edi
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: imull %edi, %esi
-; X32-NEXT: addl %edx, %esi
+; X32-NEXT: mull %esi
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: addl %edi, %edx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: imull %eax, %edi
+; X32-NEXT: addl %edx, %edi
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %ecx
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %esi
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %esi
-; X32-NEXT: movl %eax, %ebx
-; X32-NEXT: addl %ecx, %ebx
-; X32-NEXT: adcl $0, %esi
-; X32-NEXT: movl %edi, %eax
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %edi
+; X32-NEXT: movl %eax, %ebx
+; X32-NEXT: addl %esi, %ebx
+; X32-NEXT: adcl $0, %edi
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: mull %ebp
+; X32-NEXT: movl %ebp, %esi
+; X32-NEXT: movl %edx, %ebp
; X32-NEXT: addl %ebx, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl %esi, %edi
-; X32-NEXT: setb %bl
+; X32-NEXT: adcl %edi, %ebp
+; X32-NEXT: setb %cl
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: mull %ecx
-; X32-NEXT: addl %edi, %eax
-; X32-NEXT: movzbl %bl, %ecx
+; X32-NEXT: mull %esi
+; X32-NEXT: addl %ebp, %eax
+; X32-NEXT: movzbl %cl, %ecx
; X32-NEXT: adcl %ecx, %edx
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
@@ -6477,146 +6446,139 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: imull %eax, %edi
-; X32-NEXT: movl %eax, %esi
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: mull %ecx
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %eax, %ebp
; X32-NEXT: addl %edi, %edx
-; X32-NEXT: imull {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: imull %ebx, %ecx
; X32-NEXT: addl %edx, %ecx
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: imull %esi, %ecx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: imull %edi, %ecx
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: mull %ebx
+; X32-NEXT: mull %edi
; X32-NEXT: addl %ecx, %edx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: imull %ebx, %ecx
+; X32-NEXT: imull %edi, %ecx
; X32-NEXT: addl %edx, %ecx
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: addl %ebp, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: mull %esi
-; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edi, %eax
-; X32-NEXT: mull %esi
+; X32-NEXT: movl %edi, %ebp
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: mull %ecx
+; X32-NEXT: movl %edx, %edi
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %esi, %eax
+; X32-NEXT: mull %ecx
; X32-NEXT: movl %edx, %esi
; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: addl %ebx, %ecx
+; X32-NEXT: addl %edi, %ecx
; X32-NEXT: adcl $0, %esi
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: movl %ebp, %eax
; X32-NEXT: mull %ebx
; X32-NEXT: movl %edx, %edi
-; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl %eax, %ebp
+; X32-NEXT: addl %ecx, %ebp
; X32-NEXT: adcl %esi, %edi
; X32-NEXT: setb %cl
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: mull %ebx
; X32-NEXT: movl %edx, %esi
-; X32-NEXT: addl %edi, %eax
+; X32-NEXT: movl %eax, %edx
+; X32-NEXT: addl %edi, %edx
; X32-NEXT: movzbl %cl, %ecx
; X32-NEXT: adcl %ecx, %esi
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %eax, %edx
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: movl %ecx, %ebp
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
@@ -6626,18 +6588,18 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
@@ -6650,13 +6612,10 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl 16(%ebp), %ecx
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: movl %esi, (%ecx)
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
@@ -6689,36 +6648,34 @@ define void @test_1024(i1024* %a, i1024*
; X32-NEXT: movl %esi, 56(%ecx)
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X32-NEXT: movl %esi, 60(%ecx)
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl %eax, 64(%ecx)
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl %eax, 68(%ecx)
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl %eax, 72(%ecx)
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl %eax, 76(%ecx)
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl %eax, 80(%ecx)
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl %eax, 84(%ecx)
-; X32-NEXT: movl %ebx, 88(%ecx)
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl %eax, 92(%ecx)
-; X32-NEXT: movl %edi, 96(%ecx)
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl %eax, 100(%ecx)
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl %eax, 104(%ecx)
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl %eax, 108(%ecx)
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl %eax, 112(%ecx)
-; X32-NEXT: movl %edx, 116(%ecx)
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT: movl %eax, 120(%ecx)
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: movl %esi, 64(%ecx)
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: movl %esi, 68(%ecx)
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: movl %esi, 72(%ecx)
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: movl %esi, 76(%ecx)
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: movl %esi, 80(%ecx)
+; X32-NEXT: movl %ebp, 84(%ecx)
+; X32-NEXT: movl %edi, 88(%ecx)
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: movl %esi, 92(%ecx)
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: movl %esi, 96(%ecx)
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: movl %esi, 100(%ecx)
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: movl %esi, 104(%ecx)
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: movl %esi, 108(%ecx)
+; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT: movl %esi, 112(%ecx)
+; X32-NEXT: movl %ebx, 116(%ecx)
+; X32-NEXT: movl %edx, 120(%ecx)
; X32-NEXT: movl %eax, 124(%ecx)
-; X32-NEXT: addl $996, %esp # imm = 0x3E4
+; X32-NEXT: addl $1000, %esp # imm = 0x3E8
; X32-NEXT: popl %esi
; X32-NEXT: popl %edi
; X32-NEXT: popl %ebx
@@ -6798,8 +6755,8 @@ define void @test_1024(i1024* %a, i1024*
; X64-NEXT: adcq %rdx, %rbx
; X64-NEXT: movq 16(%rsi), %rax
; X64-NEXT: movq %rsi, %r13
-; X64-NEXT: movq %r13, {{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT: movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: mulq %r11
; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
@@ -6811,7 +6768,7 @@ define void @test_1024(i1024* %a, i1024*
; X64-NEXT: adcq %rbx, %r11
; X64-NEXT: movq %r8, %rax
; X64-NEXT: movq %r8, %rbp
-; X64-NEXT: movq %rbp, -{{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT: movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: addq %rdi, %rax
; X64-NEXT: movq %r9, %rax
; X64-NEXT: adcq %rcx, %rax
@@ -6841,9 +6798,9 @@ define void @test_1024(i1024* %a, i1024*
; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: movq %rbp, %rax
; X64-NEXT: addq %r9, %rax
-; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT: movq %r9, {{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rax # 8-byte Reload
+; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
; X64-NEXT: adcq %r15, %rax
; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: adcq %r14, %r12
@@ -6881,7 +6838,7 @@ define void @test_1024(i1024* %a, i1024*
; X64-NEXT: adcq %rbx, %r10
; X64-NEXT: movq %rcx, %rdx
; X64-NEXT: movq %rcx, %r12
-; X64-NEXT: movq %r12, {{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT: movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: addq %r9, %rdx
; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: movq %r11, %r8
@@ -7031,7 +6988,7 @@ define void @test_1024(i1024* %a, i1024*
; X64-NEXT: movq %rbx, %rax
; X64-NEXT: mulq %rcx
; X64-NEXT: movq %rcx, %rbx
-; X64-NEXT: movq %rbx, -{{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT: movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: movq %rdx, %rcx
; X64-NEXT: movq %rax, %r8
; X64-NEXT: addq %rbp, %r8
@@ -7395,7 +7352,7 @@ define void @test_1024(i1024* %a, i1024*
; X64-NEXT: adcq $0, %r15
; X64-NEXT: movq %rbp, %rax
; X64-NEXT: movq %r8, %rdi
-; X64-NEXT: movq %rdi, {{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: mulq %rdi
; X64-NEXT: movq %rdx, %r9
; X64-NEXT: movq %rax, %r8
@@ -7643,7 +7600,7 @@ define void @test_1024(i1024* %a, i1024*
; X64-NEXT: movq %r9, %rax
; X64-NEXT: mulq %rcx
; X64-NEXT: movq %rcx, %r10
-; X64-NEXT: movq %r10, -{{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT: movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: movq %rdx, %rcx
; X64-NEXT: movq %rax, %rdi
; X64-NEXT: addq %rsi, %rdi
@@ -7663,7 +7620,7 @@ define void @test_1024(i1024* %a, i1024*
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
; X64-NEXT: movq %rcx, %rax
; X64-NEXT: movq %r11, %rsi
-; X64-NEXT: movq %rsi, -{{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: mulq %rsi
; X64-NEXT: movq %rdx, %r11
; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
@@ -7906,7 +7863,7 @@ define void @test_1024(i1024* %a, i1024*
; X64-NEXT: movq 88(%rsi), %rax
; X64-NEXT: movq %rsi, %r9
; X64-NEXT: movq %rax, %rsi
-; X64-NEXT: movq %rsi, -{{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: mulq %rcx
; X64-NEXT: movq %rcx, %r11
; X64-NEXT: movq %rdx, %rbp
Modified: llvm/branches/release_60/test/CodeGen/X86/peephole-na-phys-copy-folding.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_60/test/CodeGen/X86/peephole-na-phys-copy-folding.ll?rev=332933&r1=332932&r2=332933&view=diff
==============================================================================
--- llvm/branches/release_60/test/CodeGen/X86/peephole-na-phys-copy-folding.ll (original)
+++ llvm/branches/release_60/test/CodeGen/X86/peephole-na-phys-copy-folding.ll Mon May 21 19:46:36 2018
@@ -1,13 +1,10 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=i386-linux-gnu %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK32
-; RUN: llc -mtriple=x86_64-linux-gnu -mattr=+sahf %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK64
-
-; TODO: Reenable verify-machineinstrs once the if (!AXDead) // FIXME in
-; X86InstrInfo::copyPhysReg() is resolved.
+; RUN: llc -mtriple=i386-linux-gnu -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK32
+; RUN: llc -mtriple=x86_64-linux-gnu -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK64
; The peephole optimizer can elide some physical register copies such as
; EFLAGS. Make sure the flags are used directly, instead of needlessly using
-; lahf, when possible.
+; saving and restoring specific conditions.
@L = external global i32
@M = external global i8
@@ -209,29 +206,22 @@ exit2:
define i64 @test_intervening_call(i64* %foo, i64 %bar, i64 %baz) nounwind {
; CHECK32-LABEL: test_intervening_call:
; CHECK32: # %bb.0: # %entry
-; CHECK32-NEXT: pushl %ebp
-; CHECK32-NEXT: movl %esp, %ebp
; CHECK32-NEXT: pushl %ebx
; CHECK32-NEXT: pushl %esi
-; CHECK32-NEXT: movl 12(%ebp), %eax
-; CHECK32-NEXT: movl 16(%ebp), %edx
-; CHECK32-NEXT: movl 20(%ebp), %ebx
-; CHECK32-NEXT: movl 24(%ebp), %ecx
-; CHECK32-NEXT: movl 8(%ebp), %esi
-; CHECK32-NEXT: lock cmpxchg8b (%esi)
; CHECK32-NEXT: pushl %eax
-; CHECK32-NEXT: seto %al
-; CHECK32-NEXT: lahf
-; CHECK32-NEXT: movl %eax, %esi
-; CHECK32-NEXT: popl %eax
+; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %edx
+; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %esi
+; CHECK32-NEXT: lock cmpxchg8b (%esi)
+; CHECK32-NEXT: setne %bl
; CHECK32-NEXT: subl $8, %esp
; CHECK32-NEXT: pushl %edx
; CHECK32-NEXT: pushl %eax
; CHECK32-NEXT: calll bar
; CHECK32-NEXT: addl $16, %esp
-; CHECK32-NEXT: movl %esi, %eax
-; CHECK32-NEXT: addb $127, %al
-; CHECK32-NEXT: sahf
+; CHECK32-NEXT: testb $-1, %bl
; CHECK32-NEXT: jne .LBB4_3
; CHECK32-NEXT: # %bb.1: # %t
; CHECK32-NEXT: movl $42, %eax
@@ -240,39 +230,28 @@ define i64 @test_intervening_call(i64* %
; CHECK32-NEXT: xorl %eax, %eax
; CHECK32-NEXT: .LBB4_2: # %t
; CHECK32-NEXT: xorl %edx, %edx
+; CHECK32-NEXT: addl $4, %esp
; CHECK32-NEXT: popl %esi
; CHECK32-NEXT: popl %ebx
-; CHECK32-NEXT: popl %ebp
; CHECK32-NEXT: retl
;
; CHECK64-LABEL: test_intervening_call:
; CHECK64: # %bb.0: # %entry
-; CHECK64-NEXT: pushq %rbp
-; CHECK64-NEXT: movq %rsp, %rbp
; CHECK64-NEXT: pushq %rbx
-; CHECK64-NEXT: pushq %rax
; CHECK64-NEXT: movq %rsi, %rax
; CHECK64-NEXT: lock cmpxchgq %rdx, (%rdi)
-; CHECK64-NEXT: pushq %rax
-; CHECK64-NEXT: seto %al
-; CHECK64-NEXT: lahf
-; CHECK64-NEXT: movq %rax, %rbx
-; CHECK64-NEXT: popq %rax
+; CHECK64-NEXT: setne %bl
; CHECK64-NEXT: movq %rax, %rdi
; CHECK64-NEXT: callq bar
-; CHECK64-NEXT: movq %rbx, %rax
-; CHECK64-NEXT: addb $127, %al
-; CHECK64-NEXT: sahf
-; CHECK64-NEXT: jne .LBB4_3
+; CHECK64-NEXT: testb $-1, %bl
+; CHECK64-NEXT: jne .LBB4_2
; CHECK64-NEXT: # %bb.1: # %t
; CHECK64-NEXT: movl $42, %eax
-; CHECK64-NEXT: jmp .LBB4_2
-; CHECK64-NEXT: .LBB4_3: # %f
+; CHECK64-NEXT: popq %rbx
+; CHECK64-NEXT: retq
+; CHECK64-NEXT: .LBB4_2: # %f
; CHECK64-NEXT: xorl %eax, %eax
-; CHECK64-NEXT: .LBB4_2: # %t
-; CHECK64-NEXT: addq $8, %rsp
; CHECK64-NEXT: popq %rbx
-; CHECK64-NEXT: popq %rbp
; CHECK64-NEXT: retq
entry:
; cmpxchg sets EFLAGS, call clobbers it, then br uses EFLAGS.
@@ -293,32 +272,27 @@ define i64 @test_two_live_flags(i64* %fo
; CHECK32-LABEL: test_two_live_flags:
; CHECK32: # %bb.0: # %entry
; CHECK32-NEXT: pushl %ebp
-; CHECK32-NEXT: movl %esp, %ebp
; CHECK32-NEXT: pushl %ebx
; CHECK32-NEXT: pushl %edi
; CHECK32-NEXT: pushl %esi
-; CHECK32-NEXT: movl 44(%ebp), %edi
-; CHECK32-NEXT: movl 12(%ebp), %eax
-; CHECK32-NEXT: movl 16(%ebp), %edx
-; CHECK32-NEXT: movl 20(%ebp), %ebx
-; CHECK32-NEXT: movl 24(%ebp), %ecx
-; CHECK32-NEXT: movl 8(%ebp), %esi
+; CHECK32-NEXT: pushl %eax
+; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %edi
+; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %edx
+; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %esi
+; CHECK32-NEXT: lock cmpxchg8b (%esi)
+; CHECK32-NEXT: setne {{[0-9]+}}(%esp) # 1-byte Folded Spill
+; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK32-NEXT: movl %edi, %edx
+; CHECK32-NEXT: movl %ebp, %ecx
+; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %esi
; CHECK32-NEXT: lock cmpxchg8b (%esi)
-; CHECK32-NEXT: seto %al
-; CHECK32-NEXT: lahf
-; CHECK32-NEXT: movl %eax, %esi
-; CHECK32-NEXT: movl 32(%ebp), %eax
-; CHECK32-NEXT: movl 36(%ebp), %edx
-; CHECK32-NEXT: movl %edi, %ecx
-; CHECK32-NEXT: movl 40(%ebp), %ebx
-; CHECK32-NEXT: movl 28(%ebp), %edi
-; CHECK32-NEXT: lock cmpxchg8b (%edi)
; CHECK32-NEXT: sete %al
-; CHECK32-NEXT: pushl %eax
-; CHECK32-NEXT: movl %esi, %eax
-; CHECK32-NEXT: addb $127, %al
-; CHECK32-NEXT: sahf
-; CHECK32-NEXT: popl %eax
+; CHECK32-NEXT: testb $-1, {{[0-9]+}}(%esp) # 1-byte Folded Reload
; CHECK32-NEXT: jne .LBB5_4
; CHECK32-NEXT: # %bb.1: # %entry
; CHECK32-NEXT: testb %al, %al
@@ -330,6 +304,7 @@ define i64 @test_two_live_flags(i64* %fo
; CHECK32-NEXT: xorl %eax, %eax
; CHECK32-NEXT: .LBB5_3: # %t
; CHECK32-NEXT: xorl %edx, %edx
+; CHECK32-NEXT: addl $4, %esp
; CHECK32-NEXT: popl %esi
; CHECK32-NEXT: popl %edi
; CHECK32-NEXT: popl %ebx
@@ -338,32 +313,22 @@ define i64 @test_two_live_flags(i64* %fo
;
; CHECK64-LABEL: test_two_live_flags:
; CHECK64: # %bb.0: # %entry
-; CHECK64-NEXT: pushq %rbp
-; CHECK64-NEXT: movq %rsp, %rbp
; CHECK64-NEXT: movq %rsi, %rax
; CHECK64-NEXT: lock cmpxchgq %rdx, (%rdi)
-; CHECK64-NEXT: seto %al
-; CHECK64-NEXT: lahf
-; CHECK64-NEXT: movq %rax, %rdx
+; CHECK64-NEXT: setne %dl
; CHECK64-NEXT: movq %r8, %rax
; CHECK64-NEXT: lock cmpxchgq %r9, (%rcx)
; CHECK64-NEXT: sete %al
-; CHECK64-NEXT: pushq %rax
-; CHECK64-NEXT: movq %rdx, %rax
-; CHECK64-NEXT: addb $127, %al
-; CHECK64-NEXT: sahf
-; CHECK64-NEXT: popq %rax
+; CHECK64-NEXT: testb $-1, %dl
; CHECK64-NEXT: jne .LBB5_3
; CHECK64-NEXT: # %bb.1: # %entry
; CHECK64-NEXT: testb %al, %al
; CHECK64-NEXT: je .LBB5_3
; CHECK64-NEXT: # %bb.2: # %t
; CHECK64-NEXT: movl $42, %eax
-; CHECK64-NEXT: popq %rbp
; CHECK64-NEXT: retq
; CHECK64-NEXT: .LBB5_3: # %f
; CHECK64-NEXT: xorl %eax, %eax
-; CHECK64-NEXT: popq %rbp
; CHECK64-NEXT: retq
entry:
%cx0 = cmpxchg i64* %foo0, i64 %bar0, i64 %baz0 seq_cst seq_cst
Modified: llvm/branches/release_60/test/CodeGen/X86/win64_frame.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_60/test/CodeGen/X86/win64_frame.ll?rev=332933&r1=332932&r2=332933&view=diff
==============================================================================
--- llvm/branches/release_60/test/CodeGen/X86/win64_frame.ll (original)
+++ llvm/branches/release_60/test/CodeGen/X86/win64_frame.ll Mon May 21 19:46:36 2018
@@ -225,71 +225,29 @@ entry:
declare i64 @dummy()
define i64 @f10(i64* %foo, i64 %bar, i64 %baz) {
-; PUSHF-LABEL: f10:
-; PUSHF: # %bb.0:
-; PUSHF-NEXT: pushq %rbp
-; PUSHF-NEXT: .seh_pushreg 5
-; PUSHF-NEXT: pushq %rsi
-; PUSHF-NEXT: .seh_pushreg 6
-; PUSHF-NEXT: pushq %rdi
-; PUSHF-NEXT: .seh_pushreg 7
-; PUSHF-NEXT: subq $32, %rsp
-; PUSHF-NEXT: .seh_stackalloc 32
-; PUSHF-NEXT: leaq {{[0-9]+}}(%rsp), %rbp
-; PUSHF-NEXT: .seh_setframe 5, 32
-; PUSHF-NEXT: .seh_endprologue
-; PUSHF-NEXT: movq %rdx, %rsi
-; PUSHF-NEXT: movq %rsi, %rax
-; PUSHF-NEXT: lock cmpxchgq %r8, (%rcx)
-; PUSHF-NEXT: pushfq
-; PUSHF-NEXT: popq %rdi
-; PUSHF-NEXT: callq dummy
-; PUSHF-NEXT: pushq %rdi
-; PUSHF-NEXT: popfq
-; PUSHF-NEXT: cmovneq %rsi, %rax
-; PUSHF-NEXT: addq $32, %rsp
-; PUSHF-NEXT: popq %rdi
-; PUSHF-NEXT: popq %rsi
-; PUSHF-NEXT: popq %rbp
-; PUSHF-NEXT: retq
-; PUSHF-NEXT: .seh_handlerdata
-; PUSHF-NEXT: .text
-; PUSHF-NEXT: .seh_endproc
-;
-; SAHF-LABEL: f10:
-; SAHF: # %bb.0:
-; SAHF-NEXT: pushq %rbp
-; SAHF-NEXT: .seh_pushreg 5
-; SAHF-NEXT: pushq %rsi
-; SAHF-NEXT: .seh_pushreg 6
-; SAHF-NEXT: pushq %rdi
-; SAHF-NEXT: .seh_pushreg 7
-; SAHF-NEXT: subq $32, %rsp
-; SAHF-NEXT: .seh_stackalloc 32
-; SAHF-NEXT: leaq {{[0-9]+}}(%rsp), %rbp
-; SAHF-NEXT: .seh_setframe 5, 32
-; SAHF-NEXT: .seh_endprologue
-; SAHF-NEXT: movq %rdx, %rsi
-; SAHF-NEXT: movq %rsi, %rax
-; SAHF-NEXT: lock cmpxchgq %r8, (%rcx)
-; SAHF-NEXT: seto %al
-; SAHF-NEXT: lahf
-; SAHF-NEXT: movq %rax, %rdi
-; SAHF-NEXT: callq dummy
-; SAHF-NEXT: pushq %rax
-; SAHF-NEXT: movq %rdi, %rax
-; SAHF-NEXT: addb $127, %al
-; SAHF-NEXT: sahf
-; SAHF-NEXT: popq %rax
-; SAHF-NEXT: cmovneq %rsi, %rax
-; SAHF-NEXT: addq $32, %rsp
-; SAHF-NEXT: popq %rdi
-; SAHF-NEXT: popq %rsi
-; SAHF-NEXT: popq %rbp
-; SAHF-NEXT: retq
-; SAHF-NEXT: .seh_handlerdata
-; SAHF-NEXT: .text
-; SAHF-NEXT: .seh_endproc
+; ALL-LABEL: f10:
+; ALL: # %bb.0:
+; ALL-NEXT: pushq %rsi
+; ALL-NEXT: .seh_pushreg 6
+; ALL-NEXT: pushq %rbx
+; ALL-NEXT: .seh_pushreg 3
+; ALL-NEXT: subq $40, %rsp
+; ALL-NEXT: .seh_stackalloc 40
+; ALL-NEXT: .seh_endprologue
+; ALL-NEXT: movq %rdx, %rsi
+; ALL-NEXT: movq %rsi, %rax
+; ALL-NEXT: lock cmpxchgq %r8, (%rcx)
+; ALL-NEXT: sete %bl
+; ALL-NEXT: callq dummy
+; ALL-NEXT: testb $-1, %bl
+; ALL-NEXT: cmoveq %rsi, %rax
+; ALL-NEXT: addq $40, %rsp
+; ALL-NEXT: popq %rbx
+; ALL-NEXT: popq %rsi
+; ALL-NEXT: retq
+; ALL-NEXT: .seh_handlerdata
+; ALL-NEXT: .text
+; ALL-NEXT: .seh_endproc
%cx = cmpxchg i64* %foo, i64 %bar, i64 %baz seq_cst seq_cst
%v = extractvalue { i64, i1 } %cx, 0
%p = extractvalue { i64, i1 } %cx, 1
Modified: llvm/branches/release_60/test/CodeGen/X86/x86-repmov-copy-eflags.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_60/test/CodeGen/X86/x86-repmov-copy-eflags.ll?rev=332933&r1=332932&r2=332933&view=diff
==============================================================================
--- llvm/branches/release_60/test/CodeGen/X86/x86-repmov-copy-eflags.ll (original)
+++ llvm/branches/release_60/test/CodeGen/X86/x86-repmov-copy-eflags.ll Mon May 21 19:46:36 2018
@@ -1,4 +1,4 @@
-; RUN: llc < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s | FileCheck %s
target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
target triple = "i686-pc-windows-msvc18.0.0"
@@ -39,15 +39,12 @@ declare void @g(%struct.T*)
; CHECK: leal 8(%esp), %esi
; CHECK: decl (%esp)
-; CHECK: seto %al
-; CHECK: lahf
-; CHECK: movl %eax, %edi
+; CHECK: setne %[[NE_REG:.*]]
; CHECK: pushl %esi
; CHECK: calll _g
; CHECK: addl $4, %esp
-; CHECK: movl %edi, %eax
-; CHECK: addb $127, %al
-; CHECK: sahf
+; CHECK: testb $-1, %[[NE_REG]]
+; CHECK: jne
attributes #0 = { nounwind optsize }
attributes #1 = { argmemonly nounwind }
More information about the llvm-branch-commits
mailing list