[llvm] r342578 - [X86] Handle COPYs of physregs better (regalloc hints)

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Wed Sep 19 11:59:11 PDT 2018


Author: rksimon
Date: Wed Sep 19 11:59:08 2018
New Revision: 342578

URL: http://llvm.org/viewvc/llvm-project?rev=342578&view=rev
Log:
[X86] Handle COPYs of physregs better (regalloc hints)

Enable enableMultipleCopyHints() on X86.

Original Patch by @jonpa:

While enabling the mischeduler for SystemZ, it was discovered that for some reason a test needed one extra seemingly needless COPY (test/CodeGen/SystemZ/call-03.ll). The handling for that is resulted in this patch, which improves the register coalescing by providing not just one copy hint, but a sorted list of copy hints. On SystemZ, this gives ~12500 less register moves on SPEC, as well as marginally less spilling.

Instead of improving just the SystemZ backend, the improvement has been implemented in common-code (calculateSpillWeightAndHint(). This gives a lot of test failures, but since this should be a general improvement I hope that the involved targets will help and review the test updates.

Differential Revision: https://reviews.llvm.org/D38128

Modified:
    llvm/trunk/lib/Target/X86/X86RegisterInfo.h
    llvm/trunk/test/CodeGen/X86/GlobalISel/add-scalar.ll
    llvm/trunk/test/CodeGen/X86/GlobalISel/and-scalar.ll
    llvm/trunk/test/CodeGen/X86/GlobalISel/ashr-scalar.ll
    llvm/trunk/test/CodeGen/X86/GlobalISel/binop.ll
    llvm/trunk/test/CodeGen/X86/GlobalISel/callingconv.ll
    llvm/trunk/test/CodeGen/X86/GlobalISel/ext-x86-64.ll
    llvm/trunk/test/CodeGen/X86/GlobalISel/ext.ll
    llvm/trunk/test/CodeGen/X86/GlobalISel/lshr-scalar.ll
    llvm/trunk/test/CodeGen/X86/GlobalISel/memop-scalar.ll
    llvm/trunk/test/CodeGen/X86/GlobalISel/mul-scalar.ll
    llvm/trunk/test/CodeGen/X86/GlobalISel/or-scalar.ll
    llvm/trunk/test/CodeGen/X86/GlobalISel/phi.ll
    llvm/trunk/test/CodeGen/X86/GlobalISel/ptrtoint.ll
    llvm/trunk/test/CodeGen/X86/GlobalISel/shl-scalar-widening.ll
    llvm/trunk/test/CodeGen/X86/GlobalISel/shl-scalar.ll
    llvm/trunk/test/CodeGen/X86/GlobalISel/sub-scalar.ll
    llvm/trunk/test/CodeGen/X86/GlobalISel/trunc.ll
    llvm/trunk/test/CodeGen/X86/GlobalISel/undef.ll
    llvm/trunk/test/CodeGen/X86/GlobalISel/xor-scalar.ll
    llvm/trunk/test/CodeGen/X86/add.ll
    llvm/trunk/test/CodeGen/X86/addcarry.ll
    llvm/trunk/test/CodeGen/X86/and-encoding.ll
    llvm/trunk/test/CodeGen/X86/andimm8.ll
    llvm/trunk/test/CodeGen/X86/anyext.ll
    llvm/trunk/test/CodeGen/X86/apm.ll
    llvm/trunk/test/CodeGen/X86/atomic-eflags-reuse.ll
    llvm/trunk/test/CodeGen/X86/atomic128.ll
    llvm/trunk/test/CodeGen/X86/avg.ll
    llvm/trunk/test/CodeGen/X86/avoid-sfb.ll
    llvm/trunk/test/CodeGen/X86/avx-intel-ocl.ll
    llvm/trunk/test/CodeGen/X86/avx-vinsertf128.ll
    llvm/trunk/test/CodeGen/X86/avx512-arith.ll
    llvm/trunk/test/CodeGen/X86/avx512-calling-conv.ll
    llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll
    llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll
    llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll
    llvm/trunk/test/CodeGen/X86/avx512-regcall-NoMask.ll
    llvm/trunk/test/CodeGen/X86/avx512-schedule.ll
    llvm/trunk/test/CodeGen/X86/avx512-select.ll
    llvm/trunk/test/CodeGen/X86/avx512bw-mask-op.ll
    llvm/trunk/test/CodeGen/X86/avx512dq-mask-op.ll
    llvm/trunk/test/CodeGen/X86/avx512vl-arith.ll
    llvm/trunk/test/CodeGen/X86/bigstructret.ll
    llvm/trunk/test/CodeGen/X86/bitcast-i256.ll
    llvm/trunk/test/CodeGen/X86/bitcast-int-to-vector-bool.ll
    llvm/trunk/test/CodeGen/X86/bitreverse.ll
    llvm/trunk/test/CodeGen/X86/bmi-intrinsics-fast-isel-x86_64.ll
    llvm/trunk/test/CodeGen/X86/bmi-intrinsics-fast-isel.ll
    llvm/trunk/test/CodeGen/X86/bmi.ll
    llvm/trunk/test/CodeGen/X86/bmi2.ll
    llvm/trunk/test/CodeGen/X86/bool-math.ll
    llvm/trunk/test/CodeGen/X86/bool-simplify.ll
    llvm/trunk/test/CodeGen/X86/bswap-rotate.ll
    llvm/trunk/test/CodeGen/X86/bswap-wide-int.ll
    llvm/trunk/test/CodeGen/X86/bswap.ll
    llvm/trunk/test/CodeGen/X86/bswap_tree.ll
    llvm/trunk/test/CodeGen/X86/bswap_tree2.ll
    llvm/trunk/test/CodeGen/X86/bt.ll
    llvm/trunk/test/CodeGen/X86/btc_bts_btr.ll
    llvm/trunk/test/CodeGen/X86/bypass-slow-division-64.ll
    llvm/trunk/test/CodeGen/X86/clear-highbits.ll
    llvm/trunk/test/CodeGen/X86/clear-lowbits.ll
    llvm/trunk/test/CodeGen/X86/cmov-into-branch.ll
    llvm/trunk/test/CodeGen/X86/cmov.ll
    llvm/trunk/test/CodeGen/X86/cmovcmov.ll
    llvm/trunk/test/CodeGen/X86/cmp.ll
    llvm/trunk/test/CodeGen/X86/cmpxchg-clobber-flags.ll
    llvm/trunk/test/CodeGen/X86/cmpxchg-i128-i1.ll
    llvm/trunk/test/CodeGen/X86/combine-add.ll
    llvm/trunk/test/CodeGen/X86/combine-rotates.ll
    llvm/trunk/test/CodeGen/X86/combine-sdiv.ll
    llvm/trunk/test/CodeGen/X86/combine-udiv.ll
    llvm/trunk/test/CodeGen/X86/combine-urem.ll
    llvm/trunk/test/CodeGen/X86/conditional-indecrement.ll
    llvm/trunk/test/CodeGen/X86/dagcombine-select.ll
    llvm/trunk/test/CodeGen/X86/divide-by-constant.ll
    llvm/trunk/test/CodeGen/X86/divrem.ll
    llvm/trunk/test/CodeGen/X86/divrem8_ext.ll
    llvm/trunk/test/CodeGen/X86/extract-lowbits.ll
    llvm/trunk/test/CodeGen/X86/fast-isel-fold-mem.ll
    llvm/trunk/test/CodeGen/X86/fast-isel-select-cmov.ll
    llvm/trunk/test/CodeGen/X86/fast-isel-select-cmov2.ll
    llvm/trunk/test/CodeGen/X86/fast-isel-select-pseudo-cmov.ll
    llvm/trunk/test/CodeGen/X86/fast-isel-sext-zext.ll
    llvm/trunk/test/CodeGen/X86/fast-isel-shift.ll
    llvm/trunk/test/CodeGen/X86/fast-isel-store.ll
    llvm/trunk/test/CodeGen/X86/fixup-bw-copy.ll
    llvm/trunk/test/CodeGen/X86/fma.ll
    llvm/trunk/test/CodeGen/X86/fold-vector-sext-crash2.ll
    llvm/trunk/test/CodeGen/X86/funnel-shift-rot.ll
    llvm/trunk/test/CodeGen/X86/funnel-shift.ll
    llvm/trunk/test/CodeGen/X86/ghc-cc64.ll
    llvm/trunk/test/CodeGen/X86/hipe-cc64.ll
    llvm/trunk/test/CodeGen/X86/i128-mul.ll
    llvm/trunk/test/CodeGen/X86/iabs.ll
    llvm/trunk/test/CodeGen/X86/imul.ll
    llvm/trunk/test/CodeGen/X86/lea-opt.ll
    llvm/trunk/test/CodeGen/X86/legalize-shift-64.ll
    llvm/trunk/test/CodeGen/X86/legalize-shl-vec.ll
    llvm/trunk/test/CodeGen/X86/machine-combiner-int.ll
    llvm/trunk/test/CodeGen/X86/machine-cp.ll
    llvm/trunk/test/CodeGen/X86/machine-cse.ll
    llvm/trunk/test/CodeGen/X86/madd.ll
    llvm/trunk/test/CodeGen/X86/mask-negated-bool.ll
    llvm/trunk/test/CodeGen/X86/misched-matmul.ll
    llvm/trunk/test/CodeGen/X86/mul-constant-i16.ll
    llvm/trunk/test/CodeGen/X86/mul-constant-i32.ll
    llvm/trunk/test/CodeGen/X86/mul-constant-i64.ll
    llvm/trunk/test/CodeGen/X86/mul-i1024.ll
    llvm/trunk/test/CodeGen/X86/mul-i256.ll
    llvm/trunk/test/CodeGen/X86/mul-i512.ll
    llvm/trunk/test/CodeGen/X86/mul128.ll
    llvm/trunk/test/CodeGen/X86/mul64.ll
    llvm/trunk/test/CodeGen/X86/mwaitx-schedule.ll
    llvm/trunk/test/CodeGen/X86/mwaitx.ll
    llvm/trunk/test/CodeGen/X86/negate-i1.ll
    llvm/trunk/test/CodeGen/X86/negate-shift.ll
    llvm/trunk/test/CodeGen/X86/negate.ll
    llvm/trunk/test/CodeGen/X86/no-sse2-avg.ll
    llvm/trunk/test/CodeGen/X86/not-and-simplify.ll
    llvm/trunk/test/CodeGen/X86/palignr.ll
    llvm/trunk/test/CodeGen/X86/peep-setb.ll
    llvm/trunk/test/CodeGen/X86/pku.ll
    llvm/trunk/test/CodeGen/X86/pmaddubsw.ll
    llvm/trunk/test/CodeGen/X86/pmulh.ll
    llvm/trunk/test/CodeGen/X86/pr12360.ll
    llvm/trunk/test/CodeGen/X86/pr15705.ll
    llvm/trunk/test/CodeGen/X86/pr15981.ll
    llvm/trunk/test/CodeGen/X86/pr23664.ll
    llvm/trunk/test/CodeGen/X86/pr28173.ll
    llvm/trunk/test/CodeGen/X86/pr34653.ll
    llvm/trunk/test/CodeGen/X86/pr34657.ll
    llvm/trunk/test/CodeGen/X86/promote-i16.ll
    llvm/trunk/test/CodeGen/X86/ptest.ll
    llvm/trunk/test/CodeGen/X86/rot16.ll
    llvm/trunk/test/CodeGen/X86/rot64.ll
    llvm/trunk/test/CodeGen/X86/rotate.ll
    llvm/trunk/test/CodeGen/X86/rotate2.ll
    llvm/trunk/test/CodeGen/X86/rotate4.ll
    llvm/trunk/test/CodeGen/X86/sar_fold64.ll
    llvm/trunk/test/CodeGen/X86/sat-add.ll
    llvm/trunk/test/CodeGen/X86/scalar_widen_div.ll
    llvm/trunk/test/CodeGen/X86/schedule-x86-64-shld.ll
    llvm/trunk/test/CodeGen/X86/schedule-x86_64.ll
    llvm/trunk/test/CodeGen/X86/select.ll
    llvm/trunk/test/CodeGen/X86/select_const.ll
    llvm/trunk/test/CodeGen/X86/selectcc-to-shiftand.ll
    llvm/trunk/test/CodeGen/X86/setcc-logic.ll
    llvm/trunk/test/CodeGen/X86/sext-i1.ll
    llvm/trunk/test/CodeGen/X86/shift-and.ll
    llvm/trunk/test/CodeGen/X86/shift-bmi2.ll
    llvm/trunk/test/CodeGen/X86/shift-double-x86_64.ll
    llvm/trunk/test/CodeGen/X86/shift-double.ll
    llvm/trunk/test/CodeGen/X86/shift-pair.ll
    llvm/trunk/test/CodeGen/X86/shuffle-of-insert.ll
    llvm/trunk/test/CodeGen/X86/signbit-shift.ll
    llvm/trunk/test/CodeGen/X86/sret-implicit.ll
    llvm/trunk/test/CodeGen/X86/sse1.ll
    llvm/trunk/test/CodeGen/X86/sse3-intrinsics-x86.ll
    llvm/trunk/test/CodeGen/X86/sse3-schedule.ll
    llvm/trunk/test/CodeGen/X86/sse42-intrinsics-fast-isel-x86_64.ll
    llvm/trunk/test/CodeGen/X86/sse42-intrinsics-fast-isel.ll
    llvm/trunk/test/CodeGen/X86/sse42-intrinsics-x86.ll
    llvm/trunk/test/CodeGen/X86/sse42-intrinsics-x86_64.ll
    llvm/trunk/test/CodeGen/X86/sse42-schedule.ll
    llvm/trunk/test/CodeGen/X86/sttni.ll
    llvm/trunk/test/CodeGen/X86/subcarry.ll
    llvm/trunk/test/CodeGen/X86/swift-return.ll
    llvm/trunk/test/CodeGen/X86/swifterror.ll
    llvm/trunk/test/CodeGen/X86/system-intrinsics-xsetbv.ll
    llvm/trunk/test/CodeGen/X86/tbm-intrinsics-fast-isel-x86_64.ll
    llvm/trunk/test/CodeGen/X86/tbm-intrinsics-fast-isel.ll
    llvm/trunk/test/CodeGen/X86/tbm_patterns.ll
    llvm/trunk/test/CodeGen/X86/trunc-subvector.ll
    llvm/trunk/test/CodeGen/X86/twoaddr-lea.ll
    llvm/trunk/test/CodeGen/X86/umul-with-overflow.ll
    llvm/trunk/test/CodeGen/X86/umulo-128-legalisation-lowering.ll
    llvm/trunk/test/CodeGen/X86/unfold-masked-merge-scalar-constmask-innerouter.ll
    llvm/trunk/test/CodeGen/X86/unfold-masked-merge-scalar-constmask-interleavedbits.ll
    llvm/trunk/test/CodeGen/X86/unfold-masked-merge-scalar-constmask-interleavedbytehalves.ll
    llvm/trunk/test/CodeGen/X86/unfold-masked-merge-scalar-constmask-lowhigh.ll
    llvm/trunk/test/CodeGen/X86/unfold-masked-merge-scalar-variablemask.ll
    llvm/trunk/test/CodeGen/X86/unfold-masked-merge-vector-variablemask-const.ll
    llvm/trunk/test/CodeGen/X86/unfold-masked-merge-vector-variablemask.ll
    llvm/trunk/test/CodeGen/X86/urem-power-of-two.ll
    llvm/trunk/test/CodeGen/X86/urem-seteq-optsize.ll
    llvm/trunk/test/CodeGen/X86/use-add-flags.ll
    llvm/trunk/test/CodeGen/X86/vec_cast.ll
    llvm/trunk/test/CodeGen/X86/vector-bitreverse.ll
    llvm/trunk/test/CodeGen/X86/vector-blend.ll
    llvm/trunk/test/CodeGen/X86/vector-compare-results.ll
    llvm/trunk/test/CodeGen/X86/vector-interleave.ll
    llvm/trunk/test/CodeGen/X86/vector-pcmp.ll
    llvm/trunk/test/CodeGen/X86/vector-reduce-fadd-fast.ll
    llvm/trunk/test/CodeGen/X86/vector-shift-ashr-128.ll
    llvm/trunk/test/CodeGen/X86/vector-shift-lshr-128.ll
    llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v2.ll
    llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-sse4a.ll
    llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll
    llvm/trunk/test/CodeGen/X86/vector-shuffle-combining.ll
    llvm/trunk/test/CodeGen/X86/vector-zext.ll
    llvm/trunk/test/CodeGen/X86/vectorcall.ll
    llvm/trunk/test/CodeGen/X86/vselect-minmax.ll
    llvm/trunk/test/CodeGen/X86/vselect.ll
    llvm/trunk/test/CodeGen/X86/widen_bitops-0.ll
    llvm/trunk/test/CodeGen/X86/widen_bitops-1.ll
    llvm/trunk/test/CodeGen/X86/widen_load-2.ll
    llvm/trunk/test/CodeGen/X86/widen_load-3.ll
    llvm/trunk/test/CodeGen/X86/win64_vararg.ll
    llvm/trunk/test/CodeGen/X86/x64-cet-intrinsics.ll
    llvm/trunk/test/CodeGen/X86/x86-64-bittest-logic.ll
    llvm/trunk/test/CodeGen/X86/x86-64-double-shifts-Oz-Os-O2.ll
    llvm/trunk/test/CodeGen/X86/x86-cmov-converter.ll
    llvm/trunk/test/CodeGen/X86/x86-shrink-wrapping.ll
    llvm/trunk/test/CodeGen/X86/xaluo.ll
    llvm/trunk/test/CodeGen/X86/xchg-nofold.ll
    llvm/trunk/test/CodeGen/X86/xmulo.ll
    llvm/trunk/test/CodeGen/X86/xor.ll
    llvm/trunk/test/DebugInfo/COFF/pieces.ll
    llvm/trunk/test/DebugInfo/X86/live-debug-values.ll
    llvm/trunk/test/DebugInfo/X86/live-debug-variables.ll
    llvm/trunk/test/DebugInfo/X86/pieces-3.ll

Modified: llvm/trunk/lib/Target/X86/X86RegisterInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86RegisterInfo.h?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86RegisterInfo.h (original)
+++ llvm/trunk/lib/Target/X86/X86RegisterInfo.h Wed Sep 19 11:59:08 2018
@@ -95,6 +95,8 @@ public:
   unsigned getRegPressureLimit(const TargetRegisterClass *RC,
                                MachineFunction &MF) const override;
 
+  bool enableMultipleCopyHints() const override { return true; }
+
   /// getCalleeSavedRegs - Return a null-terminated list of all of the
   /// callee-save registers on this target.
   const MCPhysReg *

Modified: llvm/trunk/test/CodeGen/X86/GlobalISel/add-scalar.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/GlobalISel/add-scalar.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/GlobalISel/add-scalar.ll (original)
+++ llvm/trunk/test/CodeGen/X86/GlobalISel/add-scalar.ll Wed Sep 19 11:59:08 2018
@@ -54,14 +54,15 @@ define i16 @test_add_i16(i16 %arg1, i16
   ret i16 %ret
 }
 
-define i8 @test_add_i8(i8 %arg1, i8 %arg2) {
-; X64-LABEL: test_add_i8:
-; X64:       # %bb.0:
-; X64-NEXT:    addb %dil, %sil
-; X64-NEXT:    movl %esi, %eax
-; X64-NEXT:    retq
-;
-; X32-LABEL: test_add_i8:
+define i8 @test_add_i8(i8 %arg1, i8 %arg2) {
+; X64-LABEL: test_add_i8:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %esi, %eax
+; X64-NEXT:    addb %dil, %al
+; X64-NEXT:    # kill: def $al killed $al killed $eax
+; X64-NEXT:    retq
+;
+; X32-LABEL: test_add_i8:
 ; X32:       # %bb.0:
 ; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X32-NEXT:    addb {{[0-9]+}}(%esp), %al

Modified: llvm/trunk/test/CodeGen/X86/GlobalISel/and-scalar.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/GlobalISel/and-scalar.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/GlobalISel/and-scalar.ll (original)
+++ llvm/trunk/test/CodeGen/X86/GlobalISel/and-scalar.ll Wed Sep 19 11:59:08 2018
@@ -16,43 +16,45 @@ define i32 @test_and_i1(i32 %arg1, i32 %
   ret i32 %ret
 }
 
-define i8 @test_and_i8(i8 %arg1, i8 %arg2) {
-; ALL-LABEL: test_and_i8:
-; ALL:       # %bb.0:
-; ALL-NEXT:    andb %dil, %sil
-; ALL-NEXT:    movl %esi, %eax
-; ALL-NEXT:    retq
-  %ret = and i8 %arg1, %arg2
-  ret i8 %ret
-}
-
-define i16 @test_and_i16(i16 %arg1, i16 %arg2) {
-; ALL-LABEL: test_and_i16:
-; ALL:       # %bb.0:
-; ALL-NEXT:    andw %di, %si
-; ALL-NEXT:    movl %esi, %eax
-; ALL-NEXT:    retq
-  %ret = and i16 %arg1, %arg2
-  ret i16 %ret
-}
-
-define i32 @test_and_i32(i32 %arg1, i32 %arg2) {
-; ALL-LABEL: test_and_i32:
-; ALL:       # %bb.0:
-; ALL-NEXT:    andl %edi, %esi
-; ALL-NEXT:    movl %esi, %eax
-; ALL-NEXT:    retq
-  %ret = and i32 %arg1, %arg2
-  ret i32 %ret
-}
-
-define i64 @test_and_i64(i64 %arg1, i64 %arg2) {
-; ALL-LABEL: test_and_i64:
-; ALL:       # %bb.0:
-; ALL-NEXT:    andq %rdi, %rsi
-; ALL-NEXT:    movq %rsi, %rax
-; ALL-NEXT:    retq
-  %ret = and i64 %arg1, %arg2
-  ret i64 %ret
+define i8 @test_and_i8(i8 %arg1, i8 %arg2) {
+; ALL-LABEL: test_and_i8:
+; ALL:       # %bb.0:
+; ALL-NEXT:    movl %esi, %eax
+; ALL-NEXT:    andb %dil, %al
+; ALL-NEXT:    # kill: def $al killed $al killed $eax
+; ALL-NEXT:    retq
+  %ret = and i8 %arg1, %arg2
+  ret i8 %ret
+}
+
+define i16 @test_and_i16(i16 %arg1, i16 %arg2) {
+; ALL-LABEL: test_and_i16:
+; ALL:       # %bb.0:
+; ALL-NEXT:    movl %esi, %eax
+; ALL-NEXT:    andw %di, %ax
+; ALL-NEXT:    # kill: def $ax killed $ax killed $eax
+; ALL-NEXT:    retq
+  %ret = and i16 %arg1, %arg2
+  ret i16 %ret
+}
+
+define i32 @test_and_i32(i32 %arg1, i32 %arg2) {
+; ALL-LABEL: test_and_i32:
+; ALL:       # %bb.0:
+; ALL-NEXT:    movl %esi, %eax
+; ALL-NEXT:    andl %edi, %eax
+; ALL-NEXT:    retq
+  %ret = and i32 %arg1, %arg2
+  ret i32 %ret
+}
+
+define i64 @test_and_i64(i64 %arg1, i64 %arg2) {
+; ALL-LABEL: test_and_i64:
+; ALL:       # %bb.0:
+; ALL-NEXT:    movq %rsi, %rax
+; ALL-NEXT:    andq %rdi, %rax
+; ALL-NEXT:    retq
+  %ret = and i64 %arg1, %arg2
+  ret i64 %ret
 }
 

Modified: llvm/trunk/test/CodeGen/X86/GlobalISel/ashr-scalar.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/GlobalISel/ashr-scalar.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/GlobalISel/ashr-scalar.ll (original)
+++ llvm/trunk/test/CodeGen/X86/GlobalISel/ashr-scalar.ll Wed Sep 19 11:59:08 2018
@@ -1,180 +1,191 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=x86_64-linux-gnu -global-isel -verify-machineinstrs < %s -o - | FileCheck %s --check-prefix=X64
 
-define i64 @test_ashr_i64(i64 %arg1, i64 %arg2) {
-; X64-LABEL: test_ashr_i64:
-; X64:       # %bb.0:
-; X64-NEXT:    movq %rsi, %rcx
-; X64-NEXT:    # kill: def $cl killed $rcx
-; X64-NEXT:    sarq %cl, %rdi
-; X64-NEXT:    movq %rdi, %rax
-; X64-NEXT:    retq
-  %res = ashr i64 %arg1, %arg2
-  ret i64 %res
-}
-
-define i64 @test_ashr_i64_imm(i64 %arg1) {
-; X64-LABEL: test_ashr_i64_imm:
-; X64:       # %bb.0:
-; X64-NEXT:    movq $5, %rcx
-; X64-NEXT:    # kill: def $cl killed $rcx
-; X64-NEXT:    sarq %cl, %rdi
-; X64-NEXT:    movq %rdi, %rax
-; X64-NEXT:    retq
-  %res = ashr i64 %arg1, 5
-  ret i64 %res
-}
-
-define i64 @test_ashr_i64_imm1(i64 %arg1) {
-; X64-LABEL: test_ashr_i64_imm1:
-; X64:       # %bb.0:
-; X64-NEXT:    movq $1, %rcx
-; X64-NEXT:    # kill: def $cl killed $rcx
-; X64-NEXT:    sarq %cl, %rdi
-; X64-NEXT:    movq %rdi, %rax
-; X64-NEXT:    retq
-  %res = ashr i64 %arg1, 1
-  ret i64 %res
-}
-
-define i32 @test_ashr_i32(i32 %arg1, i32 %arg2) {
-; X64-LABEL: test_ashr_i32:
-; X64:       # %bb.0:
-; X64-NEXT:    movl %esi, %ecx
-; X64-NEXT:    # kill: def $cl killed $ecx
-; X64-NEXT:    sarl %cl, %edi
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    retq
-  %res = ashr i32 %arg1, %arg2
-  ret i32 %res
-}
-
-define i32 @test_ashr_i32_imm(i32 %arg1) {
-; X64-LABEL: test_ashr_i32_imm:
-; X64:       # %bb.0:
-; X64-NEXT:    movl $5, %ecx
-; X64-NEXT:    # kill: def $cl killed $ecx
-; X64-NEXT:    sarl %cl, %edi
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    retq
-  %res = ashr i32 %arg1, 5
-  ret i32 %res
-}
-
-define i32 @test_ashr_i32_imm1(i32 %arg1) {
-; X64-LABEL: test_ashr_i32_imm1:
-; X64:       # %bb.0:
-; X64-NEXT:    movl $1, %ecx
-; X64-NEXT:    # kill: def $cl killed $ecx
-; X64-NEXT:    sarl %cl, %edi
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    retq
-  %res = ashr i32 %arg1, 1
-  ret i32 %res
-}
-
-define i16 @test_ashr_i16(i32 %arg1, i32 %arg2) {
-; X64-LABEL: test_ashr_i16:
-; X64:       # %bb.0:
-; X64-NEXT:    movl %esi, %ecx
-; X64-NEXT:    # kill: def $cl killed $cx
-; X64-NEXT:    sarw %cl, %di
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    retq
-  %a = trunc i32 %arg1 to i16
-  %a2 = trunc i32 %arg2 to i16
+define i64 @test_ashr_i64(i64 %arg1, i64 %arg2) {
+; X64-LABEL: test_ashr_i64:
+; X64:       # %bb.0:
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    movq %rsi, %rcx
+; X64-NEXT:    # kill: def $cl killed $rcx
+; X64-NEXT:    sarq %cl, %rax
+; X64-NEXT:    retq
+  %res = ashr i64 %arg1, %arg2
+  ret i64 %res
+}
+
+define i64 @test_ashr_i64_imm(i64 %arg1) {
+; X64-LABEL: test_ashr_i64_imm:
+; X64:       # %bb.0:
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    movq $5, %rcx
+; X64-NEXT:    # kill: def $cl killed $rcx
+; X64-NEXT:    sarq %cl, %rax
+; X64-NEXT:    retq
+  %res = ashr i64 %arg1, 5
+  ret i64 %res
+}
+
+define i64 @test_ashr_i64_imm1(i64 %arg1) {
+; X64-LABEL: test_ashr_i64_imm1:
+; X64:       # %bb.0:
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    movq $1, %rcx
+; X64-NEXT:    # kill: def $cl killed $rcx
+; X64-NEXT:    sarq %cl, %rax
+; X64-NEXT:    retq
+  %res = ashr i64 %arg1, 1
+  ret i64 %res
+}
+
+define i32 @test_ashr_i32(i32 %arg1, i32 %arg2) {
+; X64-LABEL: test_ashr_i32:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    # kill: def $cl killed $ecx
+; X64-NEXT:    sarl %cl, %eax
+; X64-NEXT:    retq
+  %res = ashr i32 %arg1, %arg2
+  ret i32 %res
+}
+
+define i32 @test_ashr_i32_imm(i32 %arg1) {
+; X64-LABEL: test_ashr_i32_imm:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    movl $5, %ecx
+; X64-NEXT:    # kill: def $cl killed $ecx
+; X64-NEXT:    sarl %cl, %eax
+; X64-NEXT:    retq
+  %res = ashr i32 %arg1, 5
+  ret i32 %res
+}
+
+define i32 @test_ashr_i32_imm1(i32 %arg1) {
+; X64-LABEL: test_ashr_i32_imm1:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    movl $1, %ecx
+; X64-NEXT:    # kill: def $cl killed $ecx
+; X64-NEXT:    sarl %cl, %eax
+; X64-NEXT:    retq
+  %res = ashr i32 %arg1, 1
+  ret i32 %res
+}
+
+define i16 @test_ashr_i16(i32 %arg1, i32 %arg2) {
+; X64-LABEL: test_ashr_i16:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    # kill: def $cx killed $cx killed $ecx
+; X64-NEXT:    # kill: def $cl killed $cx
+; X64-NEXT:    sarw %cl, %ax
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    retq
+  %a = trunc i32 %arg1 to i16
+  %a2 = trunc i32 %arg2 to i16
   %res = ashr i16 %a, %a2
   ret i16 %res
 }
 
-define i16 @test_ashr_i16_imm(i32 %arg1) {
-; X64-LABEL: test_ashr_i16_imm:
-; X64:       # %bb.0:
-; X64-NEXT:    movw $5, %cx
-; X64-NEXT:    # kill: def $cl killed $cx
-; X64-NEXT:    sarw %cl, %di
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    retq
-  %a = trunc i32 %arg1 to i16
-  %res = ashr i16 %a, 5
+define i16 @test_ashr_i16_imm(i32 %arg1) {
+; X64-LABEL: test_ashr_i16_imm:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    movw $5, %cx
+; X64-NEXT:    # kill: def $cl killed $cx
+; X64-NEXT:    sarw %cl, %ax
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    retq
+  %a = trunc i32 %arg1 to i16
+  %res = ashr i16 %a, 5
   ret i16 %res
 }
 
-define i16 @test_ashr_i16_imm1(i32 %arg1) {
-; X64-LABEL: test_ashr_i16_imm1:
-; X64:       # %bb.0:
-; X64-NEXT:    movw $1, %cx
-; X64-NEXT:    # kill: def $cl killed $cx
-; X64-NEXT:    sarw %cl, %di
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    retq
-  %a = trunc i32 %arg1 to i16
-  %res = ashr i16 %a, 1
+define i16 @test_ashr_i16_imm1(i32 %arg1) {
+; X64-LABEL: test_ashr_i16_imm1:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    movw $1, %cx
+; X64-NEXT:    # kill: def $cl killed $cx
+; X64-NEXT:    sarw %cl, %ax
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    retq
+  %a = trunc i32 %arg1 to i16
+  %res = ashr i16 %a, 1
   ret i16 %res
 }
 
-define i8 @test_ashr_i8(i32 %arg1, i32 %arg2) {
-; X64-LABEL: test_ashr_i8:
-; X64:       # %bb.0:
-; X64-NEXT:    movl %esi, %ecx
-; X64-NEXT:    sarb %cl, %dil
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    retq
-  %a = trunc i32 %arg1 to i8
-  %a2 = trunc i32 %arg2 to i8
+define i8 @test_ashr_i8(i32 %arg1, i32 %arg2) {
+; X64-LABEL: test_ashr_i8:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    sarb %cl, %al
+; X64-NEXT:    # kill: def $al killed $al killed $eax
+; X64-NEXT:    retq
+  %a = trunc i32 %arg1 to i8
+  %a2 = trunc i32 %arg2 to i8
   %res = ashr i8 %a, %a2
   ret i8 %res
 }
 
-define i8 @test_ashr_i8_imm(i32 %arg1) {
-; X64-LABEL: test_ashr_i8_imm:
-; X64:       # %bb.0:
-; X64-NEXT:    sarb $5, %dil
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    retq
-  %a = trunc i32 %arg1 to i8
-  %res = ashr i8 %a, 5
+define i8 @test_ashr_i8_imm(i32 %arg1) {
+; X64-LABEL: test_ashr_i8_imm:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    sarb $5, %al
+; X64-NEXT:    # kill: def $al killed $al killed $eax
+; X64-NEXT:    retq
+  %a = trunc i32 %arg1 to i8
+  %res = ashr i8 %a, 5
   ret i8 %res
 }
 
-define i8 @test_ashr_i8_imm1(i32 %arg1) {
-; X64-LABEL: test_ashr_i8_imm1:
-; X64:       # %bb.0:
-; X64-NEXT:    sarb %dil
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    retq
-  %a = trunc i32 %arg1 to i8
-  %res = ashr i8 %a, 1
+define i8 @test_ashr_i8_imm1(i32 %arg1) {
+; X64-LABEL: test_ashr_i8_imm1:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    sarb %al
+; X64-NEXT:    # kill: def $al killed $al killed $eax
+; X64-NEXT:    retq
+  %a = trunc i32 %arg1 to i8
+  %res = ashr i8 %a, 1
   ret i8 %res
 }
 
-define i1 @test_ashr_i1(i32 %arg1, i32 %arg2) {
-; X64-LABEL: test_ashr_i1:
-; X64:       # %bb.0:
-; X64-NEXT:    shlb $7, %dil
-; X64-NEXT:    sarb $7, %dil
-; X64-NEXT:    andb $1, %sil
-; X64-NEXT:    movl %esi, %ecx
-; X64-NEXT:    sarb %cl, %dil
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    retq
-  %a = trunc i32 %arg1 to i1
-  %a2 = trunc i32 %arg2 to i1
+define i1 @test_ashr_i1(i32 %arg1, i32 %arg2) {
+; X64-LABEL: test_ashr_i1:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    shlb $7, %al
+; X64-NEXT:    sarb $7, %al
+; X64-NEXT:    andb $1, %cl
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    sarb %cl, %al
+; X64-NEXT:    # kill: def $al killed $al killed $eax
+; X64-NEXT:    retq
+  %a = trunc i32 %arg1 to i1
+  %a2 = trunc i32 %arg2 to i1
   %res = ashr i1 %a, %a2
   ret i1 %res
 }
 
-define i1 @test_ashr_i1_imm1(i32 %arg1) {
-; X64-LABEL: test_ashr_i1_imm1:
-; X64:       # %bb.0:
-; X64-NEXT:    movb $-1, %cl
-; X64-NEXT:    shlb $7, %dil
-; X64-NEXT:    sarb $7, %dil
-; X64-NEXT:    andb $1, %cl
-; X64-NEXT:    sarb %cl, %dil
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    retq
-  %a = trunc i32 %arg1 to i1
-  %res = ashr i1 %a, 1
+define i1 @test_ashr_i1_imm1(i32 %arg1) {
+; X64-LABEL: test_ashr_i1_imm1:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    movb $-1, %cl
+; X64-NEXT:    shlb $7, %al
+; X64-NEXT:    sarb $7, %al
+; X64-NEXT:    andb $1, %cl
+; X64-NEXT:    sarb %cl, %al
+; X64-NEXT:    # kill: def $al killed $al killed $eax
+; X64-NEXT:    retq
+  %a = trunc i32 %arg1 to i1
+  %res = ashr i1 %a, 1
   ret i1 %res
 }

Modified: llvm/trunk/test/CodeGen/X86/GlobalISel/binop.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/GlobalISel/binop.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/GlobalISel/binop.ll (original)
+++ llvm/trunk/test/CodeGen/X86/GlobalISel/binop.ll Wed Sep 19 11:59:08 2018
@@ -4,24 +4,24 @@
 ; RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx512f                  -global-isel -verify-machineinstrs < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=ALL_AVX --check-prefix=AVX512F
 ; RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx512f -mattr=+avx512vl -global-isel -verify-machineinstrs < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=ALL_AVX --check-prefix=AVX512VL
 
-define i64 @test_sub_i64(i64 %arg1, i64 %arg2) {
-; ALL-LABEL: test_sub_i64:
-; ALL:       # %bb.0:
-; ALL-NEXT:    subq %rsi, %rdi
-; ALL-NEXT:    movq %rdi, %rax
-; ALL-NEXT:    retq
-  %ret = sub i64 %arg1, %arg2
-  ret i64 %ret
-}
-
-define i32 @test_sub_i32(i32 %arg1, i32 %arg2) {
-; ALL-LABEL: test_sub_i32:
-; ALL:       # %bb.0:
-; ALL-NEXT:    subl %esi, %edi
-; ALL-NEXT:    movl %edi, %eax
-; ALL-NEXT:    retq
-  %ret = sub i32 %arg1, %arg2
-  ret i32 %ret
+define i64 @test_sub_i64(i64 %arg1, i64 %arg2) {
+; ALL-LABEL: test_sub_i64:
+; ALL:       # %bb.0:
+; ALL-NEXT:    movq %rdi, %rax
+; ALL-NEXT:    subq %rsi, %rax
+; ALL-NEXT:    retq
+  %ret = sub i64 %arg1, %arg2
+  ret i64 %ret
+}
+
+define i32 @test_sub_i32(i32 %arg1, i32 %arg2) {
+; ALL-LABEL: test_sub_i32:
+; ALL:       # %bb.0:
+; ALL-NEXT:    movl %edi, %eax
+; ALL-NEXT:    subl %esi, %eax
+; ALL-NEXT:    retq
+  %ret = sub i32 %arg1, %arg2
+  ret i32 %ret
 }
 
 define float @test_add_float(float %arg1, float %arg2) {

Modified: llvm/trunk/test/CodeGen/X86/GlobalISel/callingconv.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/GlobalISel/callingconv.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/GlobalISel/callingconv.ll (original)
+++ llvm/trunk/test/CodeGen/X86/GlobalISel/callingconv.ll Wed Sep 19 11:59:08 2018
@@ -35,12 +35,13 @@ define i8 @test_arg_i8(i8 %a) {
 ; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X32-NEXT:    retl
 ;
-; X64-LABEL: test_arg_i8:
-; X64:       # %bb.0:
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    retq
-  ret i8 %a
-}
+; X64-LABEL: test_arg_i8:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    # kill: def $al killed $al killed $eax
+; X64-NEXT:    retq
+  ret i8 %a
+}
 
 define i16 @test_arg_i16(i16 %a) {
 ; X32-LABEL: test_arg_i16:
@@ -48,12 +49,13 @@ define i16 @test_arg_i16(i16 %a) {
 ; X32-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    retl
 ;
-; X64-LABEL: test_arg_i16:
-; X64:       # %bb.0:
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    retq
-  ret i16 %a
-}
+; X64-LABEL: test_arg_i16:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    retq
+  ret i16 %a
+}
 
 define i32 @test_arg_i32(i32 %a) {
 ; X32-LABEL: test_arg_i32:
@@ -111,14 +113,14 @@ define <4 x i32> @test_v4i32_args(<4 x i
 
 define <8 x i32> @test_v8i32_args(<8 x i32> %arg1, <8 x i32> %arg2) {
 ; X32-LABEL: test_v8i32_args:
-; X32:       # %bb.0:
-; X32-NEXT:    subl $12, %esp
-; X32-NEXT:    .cfi_def_cfa_offset 16
-; X32-NEXT:    movups {{[0-9]+}}(%esp), %xmm1
-; X32-NEXT:    movaps %xmm2, %xmm0
-; X32-NEXT:    addl $12, %esp
-; X32-NEXT:    .cfi_def_cfa_offset 4
-; X32-NEXT:    retl
+; X32:       # %bb.0:
+; X32-NEXT:    subl $12, %esp
+; X32-NEXT:    .cfi_def_cfa_offset 16
+; X32-NEXT:    movaps %xmm2, %xmm0
+; X32-NEXT:    movups {{[0-9]+}}(%esp), %xmm1
+; X32-NEXT:    addl $12, %esp
+; X32-NEXT:    .cfi_def_cfa_offset 4
+; X32-NEXT:    retl
 ;
 ; X64-LABEL: test_v8i32_args:
 ; X64:       # %bb.0:
@@ -254,14 +256,14 @@ define <8 x i32> @test_split_return_call
 ; X32-LABEL: test_split_return_callee:
 ; X32:       # %bb.0:
 ; X32-NEXT:    subl $44, %esp
-; X32-NEXT:    .cfi_def_cfa_offset 48
-; X32-NEXT:    movaps %xmm0, (%esp) # 16-byte Spill
-; X32-NEXT:    movaps %xmm1, {{[0-9]+}}(%esp) # 16-byte Spill
-; X32-NEXT:    movdqu {{[0-9]+}}(%esp), %xmm1
-; X32-NEXT:    movdqa %xmm2, %xmm0
-; X32-NEXT:    calll split_return_callee
-; X32-NEXT:    paddd (%esp), %xmm0 # 16-byte Folded Reload
-; X32-NEXT:    paddd {{[0-9]+}}(%esp), %xmm1 # 16-byte Folded Reload
+; X32-NEXT:    .cfi_def_cfa_offset 48
+; X32-NEXT:    movaps %xmm0, (%esp) # 16-byte Spill
+; X32-NEXT:    movaps %xmm1, {{[0-9]+}}(%esp) # 16-byte Spill
+; X32-NEXT:    movdqa %xmm2, %xmm0
+; X32-NEXT:    movdqu {{[0-9]+}}(%esp), %xmm1
+; X32-NEXT:    calll split_return_callee
+; X32-NEXT:    paddd (%esp), %xmm0 # 16-byte Folded Reload
+; X32-NEXT:    paddd {{[0-9]+}}(%esp), %xmm1 # 16-byte Folded Reload
 ; X32-NEXT:    addl $44, %esp
 ; X32-NEXT:    .cfi_def_cfa_offset 4
 ; X32-NEXT:    retl

Modified: llvm/trunk/test/CodeGen/X86/GlobalISel/ext-x86-64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/GlobalISel/ext-x86-64.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/GlobalISel/ext-x86-64.ll (original)
+++ llvm/trunk/test/CodeGen/X86/GlobalISel/ext-x86-64.ll Wed Sep 19 11:59:08 2018
@@ -3,48 +3,45 @@
 
 ; TODO merge with ext.ll after i64 sext supported on 32bit platform
 
-define i64 @test_zext_i1(i8 %a) {
-; X64-LABEL: test_zext_i1:
-; X64:       # %bb.0:
-; X64-NEXT:    # kill: def $edi killed $edi def $rdi
-; X64-NEXT:    andq $1, %rdi
-; X64-NEXT:    movq %rdi, %rax
-; X64-NEXT:    retq
-  %val = trunc i8 %a to i1
-  %r = zext i1 %val to i64
+define i64 @test_zext_i1(i8 %a) {
+; X64-LABEL: test_zext_i1:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    andq $1, %rax
+; X64-NEXT:    retq
+  %val = trunc i8 %a to i1
+  %r = zext i1 %val to i64
   ret i64 %r
 }
 
-define i64 @test_sext_i8(i8 %val) {
-; X64-LABEL: test_sext_i8:
-; X64:       # %bb.0:
-; X64-NEXT:    # kill: def $edi killed $edi def $rdi
-; X64-NEXT:    movq $56, %rcx
-; X64-NEXT:    # kill: def $cl killed $rcx
-; X64-NEXT:    shlq %cl, %rdi
-; X64-NEXT:    movq $56, %rcx
-; X64-NEXT:    # kill: def $cl killed $rcx
-; X64-NEXT:    sarq %cl, %rdi
-; X64-NEXT:    movq %rdi, %rax
-; X64-NEXT:    retq
-  %r = sext i8 %val to i64
-  ret i64 %r
-}
-
-define i64 @test_sext_i16(i16 %val) {
-; X64-LABEL: test_sext_i16:
-; X64:       # %bb.0:
-; X64-NEXT:    # kill: def $edi killed $edi def $rdi
-; X64-NEXT:    movq $48, %rcx
-; X64-NEXT:    # kill: def $cl killed $rcx
-; X64-NEXT:    shlq %cl, %rdi
-; X64-NEXT:    movq $48, %rcx
-; X64-NEXT:    # kill: def $cl killed $rcx
-; X64-NEXT:    sarq %cl, %rdi
-; X64-NEXT:    movq %rdi, %rax
-; X64-NEXT:    retq
-  %r = sext i16 %val to i64
-  ret i64 %r
+define i64 @test_sext_i8(i8 %val) {
+; X64-LABEL: test_sext_i8:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    movq $56, %rcx
+; X64-NEXT:    # kill: def $cl killed $rcx
+; X64-NEXT:    shlq %cl, %rax
+; X64-NEXT:    movq $56, %rcx
+; X64-NEXT:    # kill: def $cl killed $rcx
+; X64-NEXT:    sarq %cl, %rax
+; X64-NEXT:    retq
+  %r = sext i8 %val to i64
+  ret i64 %r
+}
+
+define i64 @test_sext_i16(i16 %val) {
+; X64-LABEL: test_sext_i16:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    movq $48, %rcx
+; X64-NEXT:    # kill: def $cl killed $rcx
+; X64-NEXT:    shlq %cl, %rax
+; X64-NEXT:    movq $48, %rcx
+; X64-NEXT:    # kill: def $cl killed $rcx
+; X64-NEXT:    sarq %cl, %rax
+; X64-NEXT:    retq
+  %r = sext i16 %val to i64
+  ret i64 %r
 }
 
 ; TODO enable after selection supported

Modified: llvm/trunk/test/CodeGen/X86/GlobalISel/ext.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/GlobalISel/ext.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/GlobalISel/ext.ll (original)
+++ llvm/trunk/test/CodeGen/X86/GlobalISel/ext.ll Wed Sep 19 11:59:08 2018
@@ -2,14 +2,15 @@
 ; RUN: llc -mtriple=x86_64-linux-gnu    -global-isel -verify-machineinstrs < %s -o - | FileCheck %s --check-prefix=X64
 ; RUN: llc -mtriple=i386-linux-gnu      -global-isel -verify-machineinstrs < %s -o - | FileCheck %s --check-prefix=X32
 
-define i8 @test_zext_i1toi8(i32 %a) {
-; X64-LABEL: test_zext_i1toi8:
-; X64:       # %bb.0:
-; X64-NEXT:    andb $1, %dil
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    retq
-;
-; X32-LABEL: test_zext_i1toi8:
+define i8 @test_zext_i1toi8(i32 %a) {
+; X64-LABEL: test_zext_i1toi8:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    andb $1, %al
+; X64-NEXT:    # kill: def $al killed $al killed $eax
+; X64-NEXT:    retq
+;
+; X32-LABEL: test_zext_i1toi8:
 ; X32:       # %bb.0:
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    andb $1, %al
@@ -20,14 +21,15 @@ define i8 @test_zext_i1toi8(i32 %a) {
   ret i8 %r
 }
 
-define i16 @test_zext_i1toi16(i32 %a) {
-; X64-LABEL: test_zext_i1toi16:
-; X64:       # %bb.0:
-; X64-NEXT:    andw $1, %di
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    retq
-;
-; X32-LABEL: test_zext_i1toi16:
+define i16 @test_zext_i1toi16(i32 %a) {
+; X64-LABEL: test_zext_i1toi16:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    andw $1, %ax
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    retq
+;
+; X32-LABEL: test_zext_i1toi16:
 ; X32:       # %bb.0:
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    andw $1, %ax
@@ -38,14 +40,14 @@ define i16 @test_zext_i1toi16(i32 %a) {
   ret i16 %r
 }
 
-define i32 @test_zext_i1(i32 %a) {
-; X64-LABEL: test_zext_i1:
-; X64:       # %bb.0:
-; X64-NEXT:    andl $1, %edi
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    retq
-;
-; X32-LABEL: test_zext_i1:
+define i32 @test_zext_i1(i32 %a) {
+; X64-LABEL: test_zext_i1:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    andl $1, %eax
+; X64-NEXT:    retq
+;
+; X32-LABEL: test_zext_i1:
 ; X32:       # %bb.0:
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    andl $1, %eax
@@ -83,19 +85,19 @@ define i32 @test_zext_i16(i16 %val) {
   ret i32 %r
 }
 
-define i32 @test_sext_i8(i8 %val) {
-; X64-LABEL: test_sext_i8:
-; X64:       # %bb.0:
-; X64-NEXT:    movl $24, %ecx
-; X64-NEXT:    # kill: def $cl killed $ecx
-; X64-NEXT:    shll %cl, %edi
-; X64-NEXT:    movl $24, %ecx
-; X64-NEXT:    # kill: def $cl killed $ecx
-; X64-NEXT:    sarl %cl, %edi
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    retq
-;
-; X32-LABEL: test_sext_i8:
+define i32 @test_sext_i8(i8 %val) {
+; X64-LABEL: test_sext_i8:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    movl $24, %ecx
+; X64-NEXT:    # kill: def $cl killed $ecx
+; X64-NEXT:    shll %cl, %eax
+; X64-NEXT:    movl $24, %ecx
+; X64-NEXT:    # kill: def $cl killed $ecx
+; X64-NEXT:    sarl %cl, %eax
+; X64-NEXT:    retq
+;
+; X32-LABEL: test_sext_i8:
 ; X32:       # %bb.0:
 ; X32-NEXT:    movsbl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    retl
@@ -103,19 +105,19 @@ define i32 @test_sext_i8(i8 %val) {
   ret i32 %r
 }
 
-define i32 @test_sext_i16(i16 %val) {
-; X64-LABEL: test_sext_i16:
-; X64:       # %bb.0:
-; X64-NEXT:    movl $16, %ecx
-; X64-NEXT:    # kill: def $cl killed $ecx
-; X64-NEXT:    shll %cl, %edi
-; X64-NEXT:    movl $16, %ecx
-; X64-NEXT:    # kill: def $cl killed $ecx
-; X64-NEXT:    sarl %cl, %edi
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    retq
-;
-; X32-LABEL: test_sext_i16:
+define i32 @test_sext_i16(i16 %val) {
+; X64-LABEL: test_sext_i16:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    movl $16, %ecx
+; X64-NEXT:    # kill: def $cl killed $ecx
+; X64-NEXT:    shll %cl, %eax
+; X64-NEXT:    movl $16, %ecx
+; X64-NEXT:    # kill: def $cl killed $ecx
+; X64-NEXT:    sarl %cl, %eax
+; X64-NEXT:    retq
+;
+; X32-LABEL: test_sext_i16:
 ; X32:       # %bb.0:
 ; X32-NEXT:    movswl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    retl

Modified: llvm/trunk/test/CodeGen/X86/GlobalISel/lshr-scalar.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/GlobalISel/lshr-scalar.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/GlobalISel/lshr-scalar.ll (original)
+++ llvm/trunk/test/CodeGen/X86/GlobalISel/lshr-scalar.ll Wed Sep 19 11:59:08 2018
@@ -1,178 +1,189 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=x86_64-linux-gnu -global-isel -verify-machineinstrs < %s -o - | FileCheck %s --check-prefix=X64
 
-define i64 @test_lshr_i64(i64 %arg1, i64 %arg2) {
-; X64-LABEL: test_lshr_i64:
-; X64:       # %bb.0:
-; X64-NEXT:    movq %rsi, %rcx
-; X64-NEXT:    # kill: def $cl killed $rcx
-; X64-NEXT:    shrq %cl, %rdi
-; X64-NEXT:    movq %rdi, %rax
-; X64-NEXT:    retq
-  %res = lshr i64 %arg1, %arg2
-  ret i64 %res
-}
-
-define i64 @test_lshr_i64_imm(i64 %arg1) {
-; X64-LABEL: test_lshr_i64_imm:
-; X64:       # %bb.0:
-; X64-NEXT:    movq $5, %rcx
-; X64-NEXT:    # kill: def $cl killed $rcx
-; X64-NEXT:    shrq %cl, %rdi
-; X64-NEXT:    movq %rdi, %rax
-; X64-NEXT:    retq
-  %res = lshr i64 %arg1, 5
-  ret i64 %res
-}
-
-define i64 @test_lshr_i64_imm1(i64 %arg1) {
-; X64-LABEL: test_lshr_i64_imm1:
-; X64:       # %bb.0:
-; X64-NEXT:    movq $1, %rcx
-; X64-NEXT:    # kill: def $cl killed $rcx
-; X64-NEXT:    shrq %cl, %rdi
-; X64-NEXT:    movq %rdi, %rax
-; X64-NEXT:    retq
-  %res = lshr i64 %arg1, 1
-  ret i64 %res
-}
-
-define i32 @test_lshr_i32(i32 %arg1, i32 %arg2) {
-; X64-LABEL: test_lshr_i32:
-; X64:       # %bb.0:
-; X64-NEXT:    movl %esi, %ecx
-; X64-NEXT:    # kill: def $cl killed $ecx
-; X64-NEXT:    shrl %cl, %edi
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    retq
-  %res = lshr i32 %arg1, %arg2
-  ret i32 %res
-}
-
-define i32 @test_lshr_i32_imm(i32 %arg1) {
-; X64-LABEL: test_lshr_i32_imm:
-; X64:       # %bb.0:
-; X64-NEXT:    movl $5, %ecx
-; X64-NEXT:    # kill: def $cl killed $ecx
-; X64-NEXT:    shrl %cl, %edi
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    retq
-  %res = lshr i32 %arg1, 5
-  ret i32 %res
-}
-
-define i32 @test_lshr_i32_imm1(i32 %arg1) {
-; X64-LABEL: test_lshr_i32_imm1:
-; X64:       # %bb.0:
-; X64-NEXT:    movl $1, %ecx
-; X64-NEXT:    # kill: def $cl killed $ecx
-; X64-NEXT:    shrl %cl, %edi
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    retq
-  %res = lshr i32 %arg1, 1
-  ret i32 %res
-}
-
-define i16 @test_lshr_i16(i32 %arg1, i32 %arg2) {
-; X64-LABEL: test_lshr_i16:
-; X64:       # %bb.0:
-; X64-NEXT:    movl %esi, %ecx
-; X64-NEXT:    # kill: def $cl killed $cx
-; X64-NEXT:    shrw %cl, %di
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    retq
-  %a = trunc i32 %arg1 to i16
-  %a2 = trunc i32 %arg2 to i16
+define i64 @test_lshr_i64(i64 %arg1, i64 %arg2) {
+; X64-LABEL: test_lshr_i64:
+; X64:       # %bb.0:
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    movq %rsi, %rcx
+; X64-NEXT:    # kill: def $cl killed $rcx
+; X64-NEXT:    shrq %cl, %rax
+; X64-NEXT:    retq
+  %res = lshr i64 %arg1, %arg2
+  ret i64 %res
+}
+
+define i64 @test_lshr_i64_imm(i64 %arg1) {
+; X64-LABEL: test_lshr_i64_imm:
+; X64:       # %bb.0:
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    movq $5, %rcx
+; X64-NEXT:    # kill: def $cl killed $rcx
+; X64-NEXT:    shrq %cl, %rax
+; X64-NEXT:    retq
+  %res = lshr i64 %arg1, 5
+  ret i64 %res
+}
+
+define i64 @test_lshr_i64_imm1(i64 %arg1) {
+; X64-LABEL: test_lshr_i64_imm1:
+; X64:       # %bb.0:
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    movq $1, %rcx
+; X64-NEXT:    # kill: def $cl killed $rcx
+; X64-NEXT:    shrq %cl, %rax
+; X64-NEXT:    retq
+  %res = lshr i64 %arg1, 1
+  ret i64 %res
+}
+
+define i32 @test_lshr_i32(i32 %arg1, i32 %arg2) {
+; X64-LABEL: test_lshr_i32:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    # kill: def $cl killed $ecx
+; X64-NEXT:    shrl %cl, %eax
+; X64-NEXT:    retq
+  %res = lshr i32 %arg1, %arg2
+  ret i32 %res
+}
+
+define i32 @test_lshr_i32_imm(i32 %arg1) {
+; X64-LABEL: test_lshr_i32_imm:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    movl $5, %ecx
+; X64-NEXT:    # kill: def $cl killed $ecx
+; X64-NEXT:    shrl %cl, %eax
+; X64-NEXT:    retq
+  %res = lshr i32 %arg1, 5
+  ret i32 %res
+}
+
+define i32 @test_lshr_i32_imm1(i32 %arg1) {
+; X64-LABEL: test_lshr_i32_imm1:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    movl $1, %ecx
+; X64-NEXT:    # kill: def $cl killed $ecx
+; X64-NEXT:    shrl %cl, %eax
+; X64-NEXT:    retq
+  %res = lshr i32 %arg1, 1
+  ret i32 %res
+}
+
+define i16 @test_lshr_i16(i32 %arg1, i32 %arg2) {
+; X64-LABEL: test_lshr_i16:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    # kill: def $cx killed $cx killed $ecx
+; X64-NEXT:    # kill: def $cl killed $cx
+; X64-NEXT:    shrw %cl, %ax
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    retq
+  %a = trunc i32 %arg1 to i16
+  %a2 = trunc i32 %arg2 to i16
   %res = lshr i16 %a, %a2
   ret i16 %res
 }
 
-define i16 @test_lshr_i16_imm(i32 %arg1) {
-; X64-LABEL: test_lshr_i16_imm:
-; X64:       # %bb.0:
-; X64-NEXT:    movw $5, %cx
-; X64-NEXT:    # kill: def $cl killed $cx
-; X64-NEXT:    shrw %cl, %di
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    retq
-  %a = trunc i32 %arg1 to i16
-  %res = lshr i16 %a, 5
+define i16 @test_lshr_i16_imm(i32 %arg1) {
+; X64-LABEL: test_lshr_i16_imm:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    movw $5, %cx
+; X64-NEXT:    # kill: def $cl killed $cx
+; X64-NEXT:    shrw %cl, %ax
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    retq
+  %a = trunc i32 %arg1 to i16
+  %res = lshr i16 %a, 5
   ret i16 %res
 }
 
-define i16 @test_lshr_i16_imm1(i32 %arg1) {
-; X64-LABEL: test_lshr_i16_imm1:
-; X64:       # %bb.0:
-; X64-NEXT:    movw $1, %cx
-; X64-NEXT:    # kill: def $cl killed $cx
-; X64-NEXT:    shrw %cl, %di
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    retq
-  %a = trunc i32 %arg1 to i16
-  %res = lshr i16 %a, 1
+define i16 @test_lshr_i16_imm1(i32 %arg1) {
+; X64-LABEL: test_lshr_i16_imm1:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    movw $1, %cx
+; X64-NEXT:    # kill: def $cl killed $cx
+; X64-NEXT:    shrw %cl, %ax
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    retq
+  %a = trunc i32 %arg1 to i16
+  %res = lshr i16 %a, 1
   ret i16 %res
 }
 
-define i8 @test_lshr_i8(i32 %arg1, i32 %arg2) {
-; X64-LABEL: test_lshr_i8:
-; X64:       # %bb.0:
-; X64-NEXT:    movl %esi, %ecx
-; X64-NEXT:    shrb %cl, %dil
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    retq
-  %a = trunc i32 %arg1 to i8
-  %a2 = trunc i32 %arg2 to i8
+define i8 @test_lshr_i8(i32 %arg1, i32 %arg2) {
+; X64-LABEL: test_lshr_i8:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    shrb %cl, %al
+; X64-NEXT:    # kill: def $al killed $al killed $eax
+; X64-NEXT:    retq
+  %a = trunc i32 %arg1 to i8
+  %a2 = trunc i32 %arg2 to i8
   %res = lshr i8 %a, %a2
   ret i8 %res
 }
 
-define i8 @test_lshr_i8_imm(i32 %arg1) {
-; X64-LABEL: test_lshr_i8_imm:
-; X64:       # %bb.0:
-; X64-NEXT:    shrb $5, %dil
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    retq
-  %a = trunc i32 %arg1 to i8
-  %res = lshr i8 %a, 5
+define i8 @test_lshr_i8_imm(i32 %arg1) {
+; X64-LABEL: test_lshr_i8_imm:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    shrb $5, %al
+; X64-NEXT:    # kill: def $al killed $al killed $eax
+; X64-NEXT:    retq
+  %a = trunc i32 %arg1 to i8
+  %res = lshr i8 %a, 5
   ret i8 %res
 }
 
-define i8 @test_lshr_i8_imm1(i32 %arg1) {
-; X64-LABEL: test_lshr_i8_imm1:
-; X64:       # %bb.0:
-; X64-NEXT:    shrb %dil
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    retq
-  %a = trunc i32 %arg1 to i8
-  %res = lshr i8 %a, 1
+define i8 @test_lshr_i8_imm1(i32 %arg1) {
+; X64-LABEL: test_lshr_i8_imm1:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    shrb %al
+; X64-NEXT:    # kill: def $al killed $al killed $eax
+; X64-NEXT:    retq
+  %a = trunc i32 %arg1 to i8
+  %res = lshr i8 %a, 1
   ret i8 %res
 }
 
-define i1 @test_lshr_i1(i32 %arg1, i32 %arg2) {
-; X64-LABEL: test_lshr_i1:
-; X64:       # %bb.0:
-; X64-NEXT:    andb $1, %dil
-; X64-NEXT:    andb $1, %sil
-; X64-NEXT:    movl %esi, %ecx
-; X64-NEXT:    shrb %cl, %dil
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    retq
-  %a = trunc i32 %arg1 to i1
-  %a2 = trunc i32 %arg2 to i1
+define i1 @test_lshr_i1(i32 %arg1, i32 %arg2) {
+; X64-LABEL: test_lshr_i1:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    andb $1, %al
+; X64-NEXT:    andb $1, %cl
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    shrb %cl, %al
+; X64-NEXT:    # kill: def $al killed $al killed $eax
+; X64-NEXT:    retq
+  %a = trunc i32 %arg1 to i1
+  %a2 = trunc i32 %arg2 to i1
   %res = lshr i1 %a, %a2
   ret i1 %res
 }
 
-define i1 @test_lshr_i1_imm1(i32 %arg1) {
-; X64-LABEL: test_lshr_i1_imm1:
-; X64:       # %bb.0:
-; X64-NEXT:    movb $-1, %cl
-; X64-NEXT:    andb $1, %dil
-; X64-NEXT:    andb $1, %cl
-; X64-NEXT:    shrb %cl, %dil
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    retq
-  %a = trunc i32 %arg1 to i1
-  %res = lshr i1 %a, 1
+define i1 @test_lshr_i1_imm1(i32 %arg1) {
+; X64-LABEL: test_lshr_i1_imm1:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    movb $-1, %cl
+; X64-NEXT:    andb $1, %al
+; X64-NEXT:    andb $1, %cl
+; X64-NEXT:    shrb %cl, %al
+; X64-NEXT:    # kill: def $al killed $al killed $eax
+; X64-NEXT:    retq
+  %a = trunc i32 %arg1 to i1
+  %res = lshr i1 %a, 1
   ret i1 %res
 }

Modified: llvm/trunk/test/CodeGen/X86/GlobalISel/memop-scalar.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/GlobalISel/memop-scalar.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/GlobalISel/memop-scalar.ll (original)
+++ llvm/trunk/test/CodeGen/X86/GlobalISel/memop-scalar.ll Wed Sep 19 11:59:08 2018
@@ -79,71 +79,71 @@ define double @test_load_double(double *
   ret double %r
 }
 
-define i1 * @test_store_i1(i1 %val, i1 * %p1) {
-; ALL-LABEL: test_store_i1:
-; ALL:       # %bb.0:
-; ALL-NEXT:    andb $1, %dil
-; ALL-NEXT:    movb %dil, (%rsi)
-; ALL-NEXT:    movq %rsi, %rax
-; ALL-NEXT:    retq
-  store i1 %val, i1* %p1
-  ret i1 * %p1;
-}
-
-define i32 * @test_store_i32(i32 %val, i32 * %p1) {
-; ALL-LABEL: test_store_i32:
-; ALL:       # %bb.0:
-; ALL-NEXT:    movl %edi, (%rsi)
-; ALL-NEXT:    movq %rsi, %rax
-; ALL-NEXT:    retq
-  store i32 %val, i32* %p1
-  ret i32 * %p1;
-}
-
-define i64 * @test_store_i64(i64 %val, i64 * %p1) {
-; ALL-LABEL: test_store_i64:
-; ALL:       # %bb.0:
-; ALL-NEXT:    movq %rdi, (%rsi)
-; ALL-NEXT:    movq %rsi, %rax
-; ALL-NEXT:    retq
-  store i64 %val, i64* %p1
-  ret i64 * %p1;
+define i1 * @test_store_i1(i1 %val, i1 * %p1) {
+; ALL-LABEL: test_store_i1:
+; ALL:       # %bb.0:
+; ALL-NEXT:    movq %rsi, %rax
+; ALL-NEXT:    andb $1, %dil
+; ALL-NEXT:    movb %dil, (%rsi)
+; ALL-NEXT:    retq
+  store i1 %val, i1* %p1
+  ret i1 * %p1;
+}
+
+define i32 * @test_store_i32(i32 %val, i32 * %p1) {
+; ALL-LABEL: test_store_i32:
+; ALL:       # %bb.0:
+; ALL-NEXT:    movq %rsi, %rax
+; ALL-NEXT:    movl %edi, (%rsi)
+; ALL-NEXT:    retq
+  store i32 %val, i32* %p1
+  ret i32 * %p1;
+}
+
+define i64 * @test_store_i64(i64 %val, i64 * %p1) {
+; ALL-LABEL: test_store_i64:
+; ALL:       # %bb.0:
+; ALL-NEXT:    movq %rsi, %rax
+; ALL-NEXT:    movq %rdi, (%rsi)
+; ALL-NEXT:    retq
+  store i64 %val, i64* %p1
+  ret i64 * %p1;
 }
 
 define float * @test_store_float(float %val, float * %p1) {
-;
-; SSE_FAST-LABEL: test_store_float:
-; SSE_FAST:       # %bb.0:
-; SSE_FAST-NEXT:    movd %xmm0, %eax
-; SSE_FAST-NEXT:    movl %eax, (%rdi)
-; SSE_FAST-NEXT:    movq %rdi, %rax
-; SSE_FAST-NEXT:    retq
-;
-; SSE_GREEDY-LABEL: test_store_float:
-; SSE_GREEDY:       # %bb.0:
-; SSE_GREEDY-NEXT:    movss %xmm0, (%rdi)
-; SSE_GREEDY-NEXT:    movq %rdi, %rax
-; SSE_GREEDY-NEXT:    retq
-  store float %val, float* %p1
-  ret float * %p1;
+;
+; SSE_FAST-LABEL: test_store_float:
+; SSE_FAST:       # %bb.0:
+; SSE_FAST-NEXT:    movq %rdi, %rax
+; SSE_FAST-NEXT:    movd %xmm0, %ecx
+; SSE_FAST-NEXT:    movl %ecx, (%rdi)
+; SSE_FAST-NEXT:    retq
+;
+; SSE_GREEDY-LABEL: test_store_float:
+; SSE_GREEDY:       # %bb.0:
+; SSE_GREEDY-NEXT:    movq %rdi, %rax
+; SSE_GREEDY-NEXT:    movss %xmm0, (%rdi)
+; SSE_GREEDY-NEXT:    retq
+  store float %val, float* %p1
+  ret float * %p1;
 }
 
 define double * @test_store_double(double %val, double * %p1) {
-;
-; SSE_FAST-LABEL: test_store_double:
-; SSE_FAST:       # %bb.0:
-; SSE_FAST-NEXT:    movq %xmm0, %rax
-; SSE_FAST-NEXT:    movq %rax, (%rdi)
-; SSE_FAST-NEXT:    movq %rdi, %rax
-; SSE_FAST-NEXT:    retq
-;
-; SSE_GREEDY-LABEL: test_store_double:
-; SSE_GREEDY:       # %bb.0:
-; SSE_GREEDY-NEXT:    movsd %xmm0, (%rdi)
-; SSE_GREEDY-NEXT:    movq %rdi, %rax
-; SSE_GREEDY-NEXT:    retq
-  store double %val, double* %p1
-  ret double * %p1;
+;
+; SSE_FAST-LABEL: test_store_double:
+; SSE_FAST:       # %bb.0:
+; SSE_FAST-NEXT:    movq %rdi, %rax
+; SSE_FAST-NEXT:    movq %xmm0, %rcx
+; SSE_FAST-NEXT:    movq %rcx, (%rdi)
+; SSE_FAST-NEXT:    retq
+;
+; SSE_GREEDY-LABEL: test_store_double:
+; SSE_GREEDY:       # %bb.0:
+; SSE_GREEDY-NEXT:    movq %rdi, %rax
+; SSE_GREEDY-NEXT:    movsd %xmm0, (%rdi)
+; SSE_GREEDY-NEXT:    retq
+  store double %val, double* %p1
+  ret double * %p1;
 }
 
 define i32* @test_load_ptr(i32** %ptr1) {

Modified: llvm/trunk/test/CodeGen/X86/GlobalISel/mul-scalar.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/GlobalISel/mul-scalar.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/GlobalISel/mul-scalar.ll (original)
+++ llvm/trunk/test/CodeGen/X86/GlobalISel/mul-scalar.ll Wed Sep 19 11:59:08 2018
@@ -5,35 +5,36 @@
 ;define i8 @test_mul_i8(i8 %arg1, i8 %arg2) {
 ;  %ret = mul i8 %arg1, %arg2
 ;  ret i8 %ret
-;}
-
-define i16 @test_mul_i16(i16 %arg1, i16 %arg2) {
-; X64-LABEL: test_mul_i16:
-; X64:       # %bb.0:
-; X64-NEXT:    imulw %di, %si
-; X64-NEXT:    movl %esi, %eax
-; X64-NEXT:    retq
-  %ret = mul i16 %arg1, %arg2
-  ret i16 %ret
-}
-
-define i32 @test_mul_i32(i32 %arg1, i32 %arg2) {
-; X64-LABEL: test_mul_i32:
-; X64:       # %bb.0:
-; X64-NEXT:    imull %edi, %esi
-; X64-NEXT:    movl %esi, %eax
-; X64-NEXT:    retq
-  %ret = mul i32 %arg1, %arg2
-  ret i32 %ret
-}
-
-define i64 @test_mul_i64(i64 %arg1, i64 %arg2) {
-; X64-LABEL: test_mul_i64:
-; X64:       # %bb.0:
-; X64-NEXT:    imulq %rdi, %rsi
-; X64-NEXT:    movq %rsi, %rax
-; X64-NEXT:    retq
-  %ret = mul i64 %arg1, %arg2
-  ret i64 %ret
-}
+;}
+
+define i16 @test_mul_i16(i16 %arg1, i16 %arg2) {
+; ALL-LABEL: test_mul_i16:
+; ALL:       # %bb.0:
+; ALL-NEXT:    movl %esi, %eax
+; ALL-NEXT:    imulw %di, %ax
+; ALL-NEXT:    # kill: def $ax killed $ax killed $eax
+; ALL-NEXT:    retq
+  %ret = mul i16 %arg1, %arg2
+  ret i16 %ret
+}
+
+define i32 @test_mul_i32(i32 %arg1, i32 %arg2) {
+; ALL-LABEL: test_mul_i32:
+; ALL:       # %bb.0:
+; ALL-NEXT:    movl %esi, %eax
+; ALL-NEXT:    imull %edi, %eax
+; ALL-NEXT:    retq
+  %ret = mul i32 %arg1, %arg2
+  ret i32 %ret
+}
+
+define i64 @test_mul_i64(i64 %arg1, i64 %arg2) {
+; ALL-LABEL: test_mul_i64:
+; ALL:       # %bb.0:
+; ALL-NEXT:    movq %rsi, %rax
+; ALL-NEXT:    imulq %rdi, %rax
+; ALL-NEXT:    retq
+  %ret = mul i64 %arg1, %arg2
+  ret i64 %ret
+}
 

Modified: llvm/trunk/test/CodeGen/X86/GlobalISel/or-scalar.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/GlobalISel/or-scalar.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/GlobalISel/or-scalar.ll (original)
+++ llvm/trunk/test/CodeGen/X86/GlobalISel/or-scalar.ll Wed Sep 19 11:59:08 2018
@@ -16,43 +16,45 @@ define i32 @test_or_i1(i32 %arg1, i32 %a
   ret i32 %ret
 }
 
-define i8 @test_or_i8(i8 %arg1, i8 %arg2) {
-; ALL-LABEL: test_or_i8:
-; ALL:       # %bb.0:
-; ALL-NEXT:    orb %dil, %sil
-; ALL-NEXT:    movl %esi, %eax
-; ALL-NEXT:    retq
-  %ret = or i8 %arg1, %arg2
-  ret i8 %ret
-}
-
-define i16 @test_or_i16(i16 %arg1, i16 %arg2) {
-; ALL-LABEL: test_or_i16:
-; ALL:       # %bb.0:
-; ALL-NEXT:    orw %di, %si
-; ALL-NEXT:    movl %esi, %eax
-; ALL-NEXT:    retq
-  %ret = or i16 %arg1, %arg2
-  ret i16 %ret
-}
-
-define i32 @test_or_i32(i32 %arg1, i32 %arg2) {
-; ALL-LABEL: test_or_i32:
-; ALL:       # %bb.0:
-; ALL-NEXT:    orl %edi, %esi
-; ALL-NEXT:    movl %esi, %eax
-; ALL-NEXT:    retq
-  %ret = or i32 %arg1, %arg2
-  ret i32 %ret
-}
-
-define i64 @test_or_i64(i64 %arg1, i64 %arg2) {
-; ALL-LABEL: test_or_i64:
-; ALL:       # %bb.0:
-; ALL-NEXT:    orq %rdi, %rsi
-; ALL-NEXT:    movq %rsi, %rax
-; ALL-NEXT:    retq
-  %ret = or i64 %arg1, %arg2
-  ret i64 %ret
+define i8 @test_or_i8(i8 %arg1, i8 %arg2) {
+; ALL-LABEL: test_or_i8:
+; ALL:       # %bb.0:
+; ALL-NEXT:    movl %esi, %eax
+; ALL-NEXT:    orb %dil, %al
+; ALL-NEXT:    # kill: def $al killed $al killed $eax
+; ALL-NEXT:    retq
+  %ret = or i8 %arg1, %arg2
+  ret i8 %ret
+}
+
+define i16 @test_or_i16(i16 %arg1, i16 %arg2) {
+; ALL-LABEL: test_or_i16:
+; ALL:       # %bb.0:
+; ALL-NEXT:    movl %esi, %eax
+; ALL-NEXT:    orw %di, %ax
+; ALL-NEXT:    # kill: def $ax killed $ax killed $eax
+; ALL-NEXT:    retq
+  %ret = or i16 %arg1, %arg2
+  ret i16 %ret
+}
+
+define i32 @test_or_i32(i32 %arg1, i32 %arg2) {
+; ALL-LABEL: test_or_i32:
+; ALL:       # %bb.0:
+; ALL-NEXT:    movl %esi, %eax
+; ALL-NEXT:    orl %edi, %eax
+; ALL-NEXT:    retq
+  %ret = or i32 %arg1, %arg2
+  ret i32 %ret
+}
+
+define i64 @test_or_i64(i64 %arg1, i64 %arg2) {
+; ALL-LABEL: test_or_i64:
+; ALL:       # %bb.0:
+; ALL-NEXT:    movq %rsi, %rax
+; ALL-NEXT:    orq %rdi, %rax
+; ALL-NEXT:    retq
+  %ret = or i64 %arg1, %arg2
+  ret i64 %ret
 }
 

Modified: llvm/trunk/test/CodeGen/X86/GlobalISel/phi.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/GlobalISel/phi.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/GlobalISel/phi.ll (original)
+++ llvm/trunk/test/CodeGen/X86/GlobalISel/phi.ll Wed Sep 19 11:59:08 2018
@@ -1,21 +1,24 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=x86_64-linux-gnu -global-isel -verify-machineinstrs < %s -o - | FileCheck %s --check-prefix=ALL
 
-define i8 @test_i8(i32 %a, i8 %f, i8 %t) {
-; ALL-LABEL: test_i8:
-; ALL:       # %bb.0: # %entry
-; ALL-NEXT:    xorl %eax, %eax
-; ALL-NEXT:    cmpl %eax, %edi
-; ALL-NEXT:    setg %al
-; ALL-NEXT:    testb $1, %al
-; ALL-NEXT:    jne .LBB0_2
-; ALL-NEXT:  # %bb.1: # %cond.false
-; ALL-NEXT:    movl %edx, %esi
-; ALL-NEXT:  .LBB0_2: # %cond.end
-; ALL-NEXT:    movl %esi, %eax
-; ALL-NEXT:    retq
-entry:
-  %cmp = icmp sgt i32 %a, 0
+define i8 @test_i8(i32 %a, i8 %f, i8 %t) {
+; ALL-LABEL: test_i8:
+; ALL:       # %bb.0: # %entry
+; ALL-NEXT:    xorl %ecx, %ecx
+; ALL-NEXT:    cmpl %ecx, %edi
+; ALL-NEXT:    setg %cl
+; ALL-NEXT:    testb $1, %cl
+; ALL-NEXT:    je .LBB0_2
+; ALL-NEXT:  # %bb.1:
+; ALL-NEXT:    movl %esi, %eax
+; ALL-NEXT:    # kill: def $al killed $al killed $eax
+; ALL-NEXT:    retq
+; ALL-NEXT:  .LBB0_2: # %cond.false
+; ALL-NEXT:    movl %edx, %eax
+; ALL-NEXT:    # kill: def $al killed $al killed $eax
+; ALL-NEXT:    retq
+entry:
+  %cmp = icmp sgt i32 %a, 0
   br i1 %cmp, label %cond.true, label %cond.false
 
 cond.true:                                        ; preds = %entry
@@ -29,21 +32,24 @@ cond.end:
   ret i8 %cond
 }
 
-define i16 @test_i16(i32 %a, i16 %f, i16 %t) {
-; ALL-LABEL: test_i16:
-; ALL:       # %bb.0: # %entry
-; ALL-NEXT:    xorl %eax, %eax
-; ALL-NEXT:    cmpl %eax, %edi
-; ALL-NEXT:    setg %al
-; ALL-NEXT:    testb $1, %al
-; ALL-NEXT:    jne .LBB1_2
-; ALL-NEXT:  # %bb.1: # %cond.false
-; ALL-NEXT:    movl %edx, %esi
-; ALL-NEXT:  .LBB1_2: # %cond.end
-; ALL-NEXT:    movl %esi, %eax
-; ALL-NEXT:    retq
-entry:
-  %cmp = icmp sgt i32 %a, 0
+define i16 @test_i16(i32 %a, i16 %f, i16 %t) {
+; ALL-LABEL: test_i16:
+; ALL:       # %bb.0: # %entry
+; ALL-NEXT:    xorl %ecx, %ecx
+; ALL-NEXT:    cmpl %ecx, %edi
+; ALL-NEXT:    setg %cl
+; ALL-NEXT:    testb $1, %cl
+; ALL-NEXT:    je .LBB1_2
+; ALL-NEXT:  # %bb.1:
+; ALL-NEXT:    movl %esi, %eax
+; ALL-NEXT:    # kill: def $ax killed $ax killed $eax
+; ALL-NEXT:    retq
+; ALL-NEXT:  .LBB1_2: # %cond.false
+; ALL-NEXT:    movl %edx, %eax
+; ALL-NEXT:    # kill: def $ax killed $ax killed $eax
+; ALL-NEXT:    retq
+entry:
+  %cmp = icmp sgt i32 %a, 0
   br i1 %cmp, label %cond.true, label %cond.false
 
 cond.true:                                        ; preds = %entry
@@ -57,21 +63,21 @@ cond.end:
   ret i16 %cond
 }
 
-define i32 @test_i32(i32 %a, i32 %f, i32 %t) {
-; ALL-LABEL: test_i32:
-; ALL:       # %bb.0: # %entry
-; ALL-NEXT:    xorl %eax, %eax
-; ALL-NEXT:    cmpl %eax, %edi
-; ALL-NEXT:    setg %al
-; ALL-NEXT:    testb $1, %al
-; ALL-NEXT:    jne .LBB2_2
-; ALL-NEXT:  # %bb.1: # %cond.false
-; ALL-NEXT:    movl %edx, %esi
-; ALL-NEXT:  .LBB2_2: # %cond.end
-; ALL-NEXT:    movl %esi, %eax
-; ALL-NEXT:    retq
-entry:
-  %cmp = icmp sgt i32 %a, 0
+define i32 @test_i32(i32 %a, i32 %f, i32 %t) {
+; ALL-LABEL: test_i32:
+; ALL:       # %bb.0: # %entry
+; ALL-NEXT:    movl %esi, %eax
+; ALL-NEXT:    xorl %ecx, %ecx
+; ALL-NEXT:    cmpl %ecx, %edi
+; ALL-NEXT:    setg %cl
+; ALL-NEXT:    testb $1, %cl
+; ALL-NEXT:    jne .LBB2_2
+; ALL-NEXT:  # %bb.1: # %cond.false
+; ALL-NEXT:    movl %edx, %eax
+; ALL-NEXT:  .LBB2_2: # %cond.end
+; ALL-NEXT:    retq
+entry:
+  %cmp = icmp sgt i32 %a, 0
   br i1 %cmp, label %cond.true, label %cond.false
 
 cond.true:                                        ; preds = %entry
@@ -85,21 +91,21 @@ cond.end:
   ret i32 %cond
 }
 
-define i64 @test_i64(i32 %a, i64 %f, i64 %t) {
-; ALL-LABEL: test_i64:
-; ALL:       # %bb.0: # %entry
-; ALL-NEXT:    xorl %eax, %eax
-; ALL-NEXT:    cmpl %eax, %edi
-; ALL-NEXT:    setg %al
-; ALL-NEXT:    testb $1, %al
-; ALL-NEXT:    jne .LBB3_2
-; ALL-NEXT:  # %bb.1: # %cond.false
-; ALL-NEXT:    movq %rdx, %rsi
-; ALL-NEXT:  .LBB3_2: # %cond.end
-; ALL-NEXT:    movq %rsi, %rax
-; ALL-NEXT:    retq
-entry:
-  %cmp = icmp sgt i32 %a, 0
+define i64 @test_i64(i32 %a, i64 %f, i64 %t) {
+; ALL-LABEL: test_i64:
+; ALL:       # %bb.0: # %entry
+; ALL-NEXT:    movq %rsi, %rax
+; ALL-NEXT:    xorl %ecx, %ecx
+; ALL-NEXT:    cmpl %ecx, %edi
+; ALL-NEXT:    setg %cl
+; ALL-NEXT:    testb $1, %cl
+; ALL-NEXT:    jne .LBB3_2
+; ALL-NEXT:  # %bb.1: # %cond.false
+; ALL-NEXT:    movq %rdx, %rax
+; ALL-NEXT:  .LBB3_2: # %cond.end
+; ALL-NEXT:    retq
+entry:
+  %cmp = icmp sgt i32 %a, 0
   br i1 %cmp, label %cond.true, label %cond.false
 
 cond.true:                                        ; preds = %entry

Modified: llvm/trunk/test/CodeGen/X86/GlobalISel/ptrtoint.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/GlobalISel/ptrtoint.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/GlobalISel/ptrtoint.ll (original)
+++ llvm/trunk/test/CodeGen/X86/GlobalISel/ptrtoint.ll Wed Sep 19 11:59:08 2018
@@ -1,43 +1,47 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=x86_64-linux-gnu -global-isel -verify-machineinstrs < %s -o - | FileCheck %s --check-prefix=CHECK
 
-define i1 @ptrtoint_s1_p0(i64* %p) {
-; CHECK-LABEL: ptrtoint_s1_p0:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    movl %edi, %eax
-; CHECK-NEXT:    retq
-entry:
-  %0 = ptrtoint i64* %p to i1
+define i1 @ptrtoint_s1_p0(i64* %p) {
+; CHECK-LABEL: ptrtoint_s1_p0:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movq %rdi, %rax
+; CHECK-NEXT:    # kill: def $al killed $al killed $rax
+; CHECK-NEXT:    retq
+entry:
+  %0 = ptrtoint i64* %p to i1
   ret i1 %0
 }
 
-define i8 @ptrtoint_s8_p0(i64* %p) {
-; CHECK-LABEL: ptrtoint_s8_p0:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    movl %edi, %eax
-; CHECK-NEXT:    retq
-entry:
-  %0 = ptrtoint i64* %p to i8
+define i8 @ptrtoint_s8_p0(i64* %p) {
+; CHECK-LABEL: ptrtoint_s8_p0:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movq %rdi, %rax
+; CHECK-NEXT:    # kill: def $al killed $al killed $rax
+; CHECK-NEXT:    retq
+entry:
+  %0 = ptrtoint i64* %p to i8
   ret i8 %0
 }
 
-define i16 @ptrtoint_s16_p0(i64* %p) {
-; CHECK-LABEL: ptrtoint_s16_p0:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    movl %edi, %eax
-; CHECK-NEXT:    retq
-entry:
-  %0 = ptrtoint i64* %p to i16
+define i16 @ptrtoint_s16_p0(i64* %p) {
+; CHECK-LABEL: ptrtoint_s16_p0:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movq %rdi, %rax
+; CHECK-NEXT:    # kill: def $ax killed $ax killed $rax
+; CHECK-NEXT:    retq
+entry:
+  %0 = ptrtoint i64* %p to i16
   ret i16 %0
 }
 
-define i32 @ptrtoint_s32_p0(i64* %p) {
-; CHECK-LABEL: ptrtoint_s32_p0:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    movl %edi, %eax
-; CHECK-NEXT:    retq
-entry:
-  %0 = ptrtoint i64* %p to i32
+define i32 @ptrtoint_s32_p0(i64* %p) {
+; CHECK-LABEL: ptrtoint_s32_p0:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movq %rdi, %rax
+; CHECK-NEXT:    # kill: def $eax killed $eax killed $rax
+; CHECK-NEXT:    retq
+entry:
+  %0 = ptrtoint i64* %p to i32
   ret i32 %0
 }
 

Modified: llvm/trunk/test/CodeGen/X86/GlobalISel/shl-scalar-widening.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/GlobalISel/shl-scalar-widening.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/GlobalISel/shl-scalar-widening.ll (original)
+++ llvm/trunk/test/CodeGen/X86/GlobalISel/shl-scalar-widening.ll Wed Sep 19 11:59:08 2018
@@ -1,67 +1,54 @@
-; RUN: llc -mtriple=x86_64-linux-gnu -global-isel -verify-machineinstrs < %s -o - | FileCheck %s --check-prefix=X64
-
-define i16 @test_shl_i4(i16 %v, i16 %a, i16 %b) {
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=x86_64-linux-gnu -global-isel -verify-machineinstrs < %s -o - | FileCheck %s --check-prefix=X64
+
+define i16 @test_shl_i4(i16 %v, i16 %a, i16 %b) {
 ; Let's say the arguments are the following unsigned
 ; integers in two’s complement representation:
 ;
-; %v: 77 (0000 0000  0100 1101)
-; %a: 74 (0000 0000  0100 1010)
-; %b: 72 (0000 0000  0100 1000)
-  %v.t = trunc i16 %v to i4  ; %v.t: 13 (1101)
-  %a.t = trunc i16 %a to i4  ; %a.t: 10 (1010)
-  %b.t = trunc i16 %b to i4  ; %b.t:  8 (1000)
+; %v: 77 (0000 0000  0100 1101)
+; %a: 74 (0000 0000  0100 1010)
+; %b: 72 (0000 0000  0100 1000)
+; X64-LABEL: test_shl_i4:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    movl %edx, %ecx
+; X64-NEXT:    addb %sil, %cl
+; X64-NEXT:    andb $15, %cl
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    shlb %cl, %al
+; X64-NEXT:    andw $15, %ax
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    retq
+  %v.t = trunc i16 %v to i4  ; %v.t: 13 (1101)
+  %a.t = trunc i16 %a to i4  ; %a.t: 10 (1010)
+  %b.t = trunc i16 %b to i4  ; %b.t:  8 (1000)
   %n.t = add i4 %a.t, %b.t   ; %n.t:  2 (0010)
   %r.t = shl i4 %v.t, %n.t   ; %r.t:  4 (0100)
   %r = zext i4 %r.t to i16
-; %r:  4 (0000 0000 0000 0100)
-  ret i16 %r
-
-; X64-LABEL: test_shl_i4
-;
-; %di:  77 (0000 0000  0100 1101)
-; %si:  74 (0000 0000  0100 1010)
-; %dx:  72 (0000 0000  0100 1000)
-;
-; X64:       # %bb.0:
-;
-; X64-NEXT:    addb %sil, %dl
-; %dx: 146 (0000 0000  1001 0010)
-;
-; X64-NEXT:    andb $15, %dl
-; %dx:   2 (0000 0000  0000 0010)
-;
-; X64-NEXT:    movl %edx, %ecx
-; %cx:   2 (0000 0000  0000 0010)
-;
-; X64-NEXT:    shlb %cl, %dil
-; %di:  52 (0000 0000  0011 0100)
-;
-; X64-NEXT:    andw $15, %di
-; %di:   4 (0000 0000  0000 0100)
-;
-; X64-NEXT:    movl %edi, %eax
-; %ax:   4 (0000 0000  0000 0100)
-;
-; X64-NEXT:    retq
-;
-; Let's pretend that legalizing G_SHL by widening its second
-; source operand is done via G_ANYEXT rather than G_ZEXT and
-; see what happens:
-;
-;              addb %sil, %dl
-; %dx: 146 (0000 0000  1001 0010)
-;
-;              movl %edx, %ecx
-; %cx: 146 (0000 0000  1001 0010)
-;
-;              shlb %cl, %dil
-; %di:   0 (0000 0000  0000 0000)
-;
-;              andw $15, %di
-; %di:   0 (0000 0000  0000 0000)
-;
-;              movl %edi, %eax
-; %ax:   0 (0000 0000  0000 0000)
-;
-;              retq
-}
+; %r:  4 (0000 0000 0000 0100)
+  ret i16 %r
+
+; %di:  77 (0000 0000  0100 1101)
+; %si:  74 (0000 0000  0100 1010)
+; %dx:  72 (0000 0000  0100 1000)
+; %dx: 146 (0000 0000  1001 0010)
+; %dx:   2 (0000 0000  0000 0010)
+; %cx:   2 (0000 0000  0000 0010)
+; %di:  52 (0000 0000  0011 0100)
+; %di:   4 (0000 0000  0000 0100)
+; %ax:   4 (0000 0000  0000 0100)
+; Let's pretend that legalizing G_SHL by widening its second
+; source operand is done via G_ANYEXT rather than G_ZEXT and
+; see what happens:
+;              addb %sil, %dl
+; %dx: 146 (0000 0000  1001 0010)
+;              movl %edx, %ecx
+; %cx: 146 (0000 0000  1001 0010)
+;              shlb %cl, %dil
+; %di:   0 (0000 0000  0000 0000)
+;              andw $15, %di
+; %di:   0 (0000 0000  0000 0000)
+;              movl %edi, %eax
+; %ax:   0 (0000 0000  0000 0000)
+;              retq
+}

Modified: llvm/trunk/test/CodeGen/X86/GlobalISel/shl-scalar.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/GlobalISel/shl-scalar.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/GlobalISel/shl-scalar.ll (original)
+++ llvm/trunk/test/CodeGen/X86/GlobalISel/shl-scalar.ll Wed Sep 19 11:59:08 2018
@@ -1,176 +1,187 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=x86_64-linux-gnu -global-isel -verify-machineinstrs < %s -o - | FileCheck %s --check-prefix=X64
 
-define i64 @test_shl_i64(i64 %arg1, i64 %arg2) {
-; X64-LABEL: test_shl_i64:
-; X64:       # %bb.0:
-; X64-NEXT:    movq %rsi, %rcx
-; X64-NEXT:    # kill: def $cl killed $rcx
-; X64-NEXT:    shlq %cl, %rdi
-; X64-NEXT:    movq %rdi, %rax
-; X64-NEXT:    retq
-  %res = shl i64 %arg1, %arg2
-  ret i64 %res
-}
-
-define i64 @test_shl_i64_imm(i64 %arg1) {
-; X64-LABEL: test_shl_i64_imm:
-; X64:       # %bb.0:
-; X64-NEXT:    movq $5, %rcx
-; X64-NEXT:    # kill: def $cl killed $rcx
-; X64-NEXT:    shlq %cl, %rdi
-; X64-NEXT:    movq %rdi, %rax
-; X64-NEXT:    retq
-  %res = shl i64 %arg1, 5
-  ret i64 %res
-}
-
-define i64 @test_shl_i64_imm1(i64 %arg1) {
-; X64-LABEL: test_shl_i64_imm1:
-; X64:       # %bb.0:
-; X64-NEXT:    movq $1, %rcx
-; X64-NEXT:    # kill: def $cl killed $rcx
-; X64-NEXT:    shlq %cl, %rdi
-; X64-NEXT:    movq %rdi, %rax
-; X64-NEXT:    retq
-  %res = shl i64 %arg1, 1
-  ret i64 %res
-}
-
-define i32 @test_shl_i32(i32 %arg1, i32 %arg2) {
-; X64-LABEL: test_shl_i32:
-; X64:       # %bb.0:
-; X64-NEXT:    movl %esi, %ecx
-; X64-NEXT:    # kill: def $cl killed $ecx
-; X64-NEXT:    shll %cl, %edi
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    retq
-  %res = shl i32 %arg1, %arg2
-  ret i32 %res
-}
-
-define i32 @test_shl_i32_imm(i32 %arg1) {
-; X64-LABEL: test_shl_i32_imm:
-; X64:       # %bb.0:
-; X64-NEXT:    movl $5, %ecx
-; X64-NEXT:    # kill: def $cl killed $ecx
-; X64-NEXT:    shll %cl, %edi
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    retq
-  %res = shl i32 %arg1, 5
-  ret i32 %res
-}
-
-define i32 @test_shl_i32_imm1(i32 %arg1) {
-; X64-LABEL: test_shl_i32_imm1:
-; X64:       # %bb.0:
-; X64-NEXT:    movl $1, %ecx
-; X64-NEXT:    # kill: def $cl killed $ecx
-; X64-NEXT:    shll %cl, %edi
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    retq
-  %res = shl i32 %arg1, 1
-  ret i32 %res
-}
-
-define i16 @test_shl_i16(i32 %arg1, i32 %arg2) {
-; X64-LABEL: test_shl_i16:
-; X64:       # %bb.0:
-; X64-NEXT:    movl %esi, %ecx
-; X64-NEXT:    # kill: def $cl killed $cx
-; X64-NEXT:    shlw %cl, %di
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    retq
-  %a = trunc i32 %arg1 to i16
-  %a2 = trunc i32 %arg2 to i16
+define i64 @test_shl_i64(i64 %arg1, i64 %arg2) {
+; X64-LABEL: test_shl_i64:
+; X64:       # %bb.0:
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    movq %rsi, %rcx
+; X64-NEXT:    # kill: def $cl killed $rcx
+; X64-NEXT:    shlq %cl, %rax
+; X64-NEXT:    retq
+  %res = shl i64 %arg1, %arg2
+  ret i64 %res
+}
+
+define i64 @test_shl_i64_imm(i64 %arg1) {
+; X64-LABEL: test_shl_i64_imm:
+; X64:       # %bb.0:
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    movq $5, %rcx
+; X64-NEXT:    # kill: def $cl killed $rcx
+; X64-NEXT:    shlq %cl, %rax
+; X64-NEXT:    retq
+  %res = shl i64 %arg1, 5
+  ret i64 %res
+}
+
+define i64 @test_shl_i64_imm1(i64 %arg1) {
+; X64-LABEL: test_shl_i64_imm1:
+; X64:       # %bb.0:
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    movq $1, %rcx
+; X64-NEXT:    # kill: def $cl killed $rcx
+; X64-NEXT:    shlq %cl, %rax
+; X64-NEXT:    retq
+  %res = shl i64 %arg1, 1
+  ret i64 %res
+}
+
+define i32 @test_shl_i32(i32 %arg1, i32 %arg2) {
+; X64-LABEL: test_shl_i32:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    # kill: def $cl killed $ecx
+; X64-NEXT:    shll %cl, %eax
+; X64-NEXT:    retq
+  %res = shl i32 %arg1, %arg2
+  ret i32 %res
+}
+
+define i32 @test_shl_i32_imm(i32 %arg1) {
+; X64-LABEL: test_shl_i32_imm:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    movl $5, %ecx
+; X64-NEXT:    # kill: def $cl killed $ecx
+; X64-NEXT:    shll %cl, %eax
+; X64-NEXT:    retq
+  %res = shl i32 %arg1, 5
+  ret i32 %res
+}
+
+define i32 @test_shl_i32_imm1(i32 %arg1) {
+; X64-LABEL: test_shl_i32_imm1:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    movl $1, %ecx
+; X64-NEXT:    # kill: def $cl killed $ecx
+; X64-NEXT:    shll %cl, %eax
+; X64-NEXT:    retq
+  %res = shl i32 %arg1, 1
+  ret i32 %res
+}
+
+define i16 @test_shl_i16(i32 %arg1, i32 %arg2) {
+; X64-LABEL: test_shl_i16:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    # kill: def $cx killed $cx killed $ecx
+; X64-NEXT:    # kill: def $cl killed $cx
+; X64-NEXT:    shlw %cl, %ax
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    retq
+  %a = trunc i32 %arg1 to i16
+  %a2 = trunc i32 %arg2 to i16
   %res = shl i16 %a, %a2
   ret i16 %res
 }
 
-define i16 @test_shl_i16_imm(i32 %arg1) {
-; X64-LABEL: test_shl_i16_imm:
-; X64:       # %bb.0:
-; X64-NEXT:    movw $5, %cx
-; X64-NEXT:    # kill: def $cl killed $cx
-; X64-NEXT:    shlw %cl, %di
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    retq
-  %a = trunc i32 %arg1 to i16
-  %res = shl i16 %a, 5
+define i16 @test_shl_i16_imm(i32 %arg1) {
+; X64-LABEL: test_shl_i16_imm:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    movw $5, %cx
+; X64-NEXT:    # kill: def $cl killed $cx
+; X64-NEXT:    shlw %cl, %ax
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    retq
+  %a = trunc i32 %arg1 to i16
+  %res = shl i16 %a, 5
   ret i16 %res
 }
 
-define i16 @test_shl_i16_imm1(i32 %arg1) {
-; X64-LABEL: test_shl_i16_imm1:
-; X64:       # %bb.0:
-; X64-NEXT:    movw $1, %cx
-; X64-NEXT:    # kill: def $cl killed $cx
-; X64-NEXT:    shlw %cl, %di
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    retq
-  %a = trunc i32 %arg1 to i16
-  %res = shl i16 %a, 1
+define i16 @test_shl_i16_imm1(i32 %arg1) {
+; X64-LABEL: test_shl_i16_imm1:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    movw $1, %cx
+; X64-NEXT:    # kill: def $cl killed $cx
+; X64-NEXT:    shlw %cl, %ax
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    retq
+  %a = trunc i32 %arg1 to i16
+  %res = shl i16 %a, 1
   ret i16 %res
 }
 
-define i8 @test_shl_i8(i32 %arg1, i32 %arg2) {
-; X64-LABEL: test_shl_i8:
-; X64:       # %bb.0:
-; X64-NEXT:    movl %esi, %ecx
-; X64-NEXT:    shlb %cl, %dil
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    retq
-  %a = trunc i32 %arg1 to i8
-  %a2 = trunc i32 %arg2 to i8
+define i8 @test_shl_i8(i32 %arg1, i32 %arg2) {
+; X64-LABEL: test_shl_i8:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    shlb %cl, %al
+; X64-NEXT:    # kill: def $al killed $al killed $eax
+; X64-NEXT:    retq
+  %a = trunc i32 %arg1 to i8
+  %a2 = trunc i32 %arg2 to i8
   %res = shl i8 %a, %a2
   ret i8 %res
 }
 
-define i8 @test_shl_i8_imm(i32 %arg1) {
-; X64-LABEL: test_shl_i8_imm:
-; X64:       # %bb.0:
-; X64-NEXT:    shlb $5, %dil
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    retq
-  %a = trunc i32 %arg1 to i8
-  %res = shl i8 %a, 5
+define i8 @test_shl_i8_imm(i32 %arg1) {
+; X64-LABEL: test_shl_i8_imm:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    shlb $5, %al
+; X64-NEXT:    # kill: def $al killed $al killed $eax
+; X64-NEXT:    retq
+  %a = trunc i32 %arg1 to i8
+  %res = shl i8 %a, 5
   ret i8 %res
 }
 
-define i8 @test_shl_i8_imm1(i32 %arg1) {
-; X64-LABEL: test_shl_i8_imm1:
-; X64:       # %bb.0:
-; X64-NEXT:    addb %dil, %dil
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    retq
-  %a = trunc i32 %arg1 to i8
-  %res = shl i8 %a, 1
+define i8 @test_shl_i8_imm1(i32 %arg1) {
+; X64-LABEL: test_shl_i8_imm1:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    addb %al, %al
+; X64-NEXT:    # kill: def $al killed $al killed $eax
+; X64-NEXT:    retq
+  %a = trunc i32 %arg1 to i8
+  %res = shl i8 %a, 1
   ret i8 %res
 }
 
-define i1 @test_shl_i1(i32 %arg1, i32 %arg2) {
-; X64-LABEL: test_shl_i1:
-; X64:       # %bb.0:
-; X64-NEXT:    andb $1, %sil
-; X64-NEXT:    movl %esi, %ecx
-; X64-NEXT:    shlb %cl, %dil
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    retq
-  %a = trunc i32 %arg1 to i1
-  %a2 = trunc i32 %arg2 to i1
+define i1 @test_shl_i1(i32 %arg1, i32 %arg2) {
+; X64-LABEL: test_shl_i1:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    andb $1, %cl
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    shlb %cl, %al
+; X64-NEXT:    # kill: def $al killed $al killed $eax
+; X64-NEXT:    retq
+  %a = trunc i32 %arg1 to i1
+  %a2 = trunc i32 %arg2 to i1
   %res = shl i1 %a, %a2
   ret i1 %res
 }
 
-define i1 @test_shl_i1_imm1(i32 %arg1) {
-; X64-LABEL: test_shl_i1_imm1:
-; X64:       # %bb.0:
-; X64-NEXT:    movb $-1, %cl
-; X64-NEXT:    andb $1, %cl
-; X64-NEXT:    shlb %cl, %dil
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    retq
-  %a = trunc i32 %arg1 to i1
-  %res = shl i1 %a, 1
+define i1 @test_shl_i1_imm1(i32 %arg1) {
+; X64-LABEL: test_shl_i1_imm1:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    movb $-1, %cl
+; X64-NEXT:    andb $1, %cl
+; X64-NEXT:    shlb %cl, %al
+; X64-NEXT:    # kill: def $al killed $al killed $eax
+; X64-NEXT:    retq
+  %a = trunc i32 %arg1 to i1
+  %res = shl i1 %a, 1
   ret i1 %res
 }

Modified: llvm/trunk/test/CodeGen/X86/GlobalISel/sub-scalar.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/GlobalISel/sub-scalar.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/GlobalISel/sub-scalar.ll (original)
+++ llvm/trunk/test/CodeGen/X86/GlobalISel/sub-scalar.ll Wed Sep 19 11:59:08 2018
@@ -1,44 +1,46 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=x86_64-linux-gnu -global-isel -verify-machineinstrs < %s -o - | FileCheck %s --check-prefix=X64
 
-define i64 @test_sub_i64(i64 %arg1, i64 %arg2) {
-; X64-LABEL: test_sub_i64:
-; X64:       # %bb.0:
-; X64-NEXT:    subq %rsi, %rdi
-; X64-NEXT:    movq %rdi, %rax
-; X64-NEXT:    retq
-  %ret = sub i64 %arg1, %arg2
-  ret i64 %ret
-}
-
-define i32 @test_sub_i32(i32 %arg1, i32 %arg2) {
-; X64-LABEL: test_sub_i32:
-; X64:       # %bb.0:
-; X64-NEXT:    subl %esi, %edi
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    retq
-  %ret = sub i32 %arg1, %arg2
-  ret i32 %ret
-}
-
-define i16 @test_sub_i16(i16 %arg1, i16 %arg2) {
-; X64-LABEL: test_sub_i16:
-; X64:       # %bb.0:
-; X64-NEXT:    subw %si, %di
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    retq
-  %ret = sub i16 %arg1, %arg2
-  ret i16 %ret
-}
-
-define i8 @test_sub_i8(i8 %arg1, i8 %arg2) {
-; X64-LABEL: test_sub_i8:
-; X64:       # %bb.0:
-; X64-NEXT:    subb %sil, %dil
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    retq
-  %ret = sub i8 %arg1, %arg2
-  ret i8 %ret
+define i64 @test_sub_i64(i64 %arg1, i64 %arg2) {
+; X64-LABEL: test_sub_i64:
+; X64:       # %bb.0:
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    subq %rsi, %rax
+; X64-NEXT:    retq
+  %ret = sub i64 %arg1, %arg2
+  ret i64 %ret
+}
+
+define i32 @test_sub_i32(i32 %arg1, i32 %arg2) {
+; X64-LABEL: test_sub_i32:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    subl %esi, %eax
+; X64-NEXT:    retq
+  %ret = sub i32 %arg1, %arg2
+  ret i32 %ret
+}
+
+define i16 @test_sub_i16(i16 %arg1, i16 %arg2) {
+; X64-LABEL: test_sub_i16:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    subw %si, %ax
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    retq
+  %ret = sub i16 %arg1, %arg2
+  ret i16 %ret
+}
+
+define i8 @test_sub_i8(i8 %arg1, i8 %arg2) {
+; X64-LABEL: test_sub_i8:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    subb %sil, %al
+; X64-NEXT:    # kill: def $al killed $al killed $eax
+; X64-NEXT:    retq
+  %ret = sub i8 %arg1, %arg2
+  ret i8 %ret
 }
 
 define i32 @test_sub_i1(i32 %arg1, i32 %arg2) {

Modified: llvm/trunk/test/CodeGen/X86/GlobalISel/trunc.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/GlobalISel/trunc.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/GlobalISel/trunc.ll (original)
+++ llvm/trunk/test/CodeGen/X86/GlobalISel/trunc.ll Wed Sep 19 11:59:08 2018
@@ -2,56 +2,62 @@
 ; RUN: llc -mtriple=x86_64-linux-gnu -global-isel -verify-machineinstrs < %s -o - | FileCheck %s --check-prefix=CHECK
 
 define i1 @trunc_i32toi1(i32 %a) {
-; CHECK-LABEL: trunc_i32toi1:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    movl %edi, %eax
-; CHECK-NEXT:    retq
-  %r = trunc i32 %a to i1
-  ret i1 %r
+; CHECK-LABEL: trunc_i32toi1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    # kill: def $al killed $al killed $eax
+; CHECK-NEXT:    retq
+  %r = trunc i32 %a to i1
+  ret i1 %r
 }
 
 define i8 @trunc_i32toi8(i32 %a) {
-; CHECK-LABEL: trunc_i32toi8:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    movl %edi, %eax
-; CHECK-NEXT:    retq
-  %r = trunc i32 %a to i8
-  ret i8 %r
+; CHECK-LABEL: trunc_i32toi8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    # kill: def $al killed $al killed $eax
+; CHECK-NEXT:    retq
+  %r = trunc i32 %a to i8
+  ret i8 %r
 }
 
 define i16 @trunc_i32toi16(i32 %a) {
-; CHECK-LABEL: trunc_i32toi16:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    movl %edi, %eax
-; CHECK-NEXT:    retq
-  %r = trunc i32 %a to i16
-  ret i16 %r
-}
-
-define i8 @trunc_i64toi8(i64 %a) {
-; CHECK-LABEL: trunc_i64toi8:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    movl %edi, %eax
-; CHECK-NEXT:    retq
-  %r = trunc i64 %a to i8
-  ret i8 %r
-}
-
-define i16 @trunc_i64toi16(i64 %a) {
-; CHECK-LABEL: trunc_i64toi16:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    movl %edi, %eax
-; CHECK-NEXT:    retq
-  %r = trunc i64 %a to i16
-  ret i16 %r
-}
-
-define i32 @trunc_i64toi32(i64 %a) {
-; CHECK-LABEL: trunc_i64toi32:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    movl %edi, %eax
-; CHECK-NEXT:    retq
-  %r = trunc i64 %a to i32
-  ret i32 %r
+; CHECK-LABEL: trunc_i32toi16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
+; CHECK-NEXT:    retq
+  %r = trunc i32 %a to i16
+  ret i16 %r
+}
+
+define i8 @trunc_i64toi8(i64 %a) {
+; CHECK-LABEL: trunc_i64toi8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movq %rdi, %rax
+; CHECK-NEXT:    # kill: def $al killed $al killed $rax
+; CHECK-NEXT:    retq
+  %r = trunc i64 %a to i8
+  ret i8 %r
+}
+
+define i16 @trunc_i64toi16(i64 %a) {
+; CHECK-LABEL: trunc_i64toi16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movq %rdi, %rax
+; CHECK-NEXT:    # kill: def $ax killed $ax killed $rax
+; CHECK-NEXT:    retq
+  %r = trunc i64 %a to i16
+  ret i16 %r
+}
+
+define i32 @trunc_i64toi32(i64 %a) {
+; CHECK-LABEL: trunc_i64toi32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movq %rdi, %rax
+; CHECK-NEXT:    # kill: def $eax killed $eax killed $rax
+; CHECK-NEXT:    retq
+  %r = trunc i64 %a to i32
+  ret i32 %r
 }
 

Modified: llvm/trunk/test/CodeGen/X86/GlobalISel/undef.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/GlobalISel/undef.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/GlobalISel/undef.ll (original)
+++ llvm/trunk/test/CodeGen/X86/GlobalISel/undef.ll Wed Sep 19 11:59:08 2018
@@ -8,14 +8,15 @@ define i8 @test() {
   ret i8 undef
 }
 
-define i8 @test2(i8 %a) {
-; ALL-LABEL: test2:
-; ALL:       # %bb.0:
-; ALL-NEXT:    addb %al, %dil
-; ALL-NEXT:    movl %edi, %eax
-; ALL-NEXT:    retq
-  %r = add i8 %a, undef
-  ret i8 %r
+define i8 @test2(i8 %a) {
+; ALL-LABEL: test2:
+; ALL:       # %bb.0:
+; ALL-NEXT:    movl %edi, %eax
+; ALL-NEXT:    addb %al, %al
+; ALL-NEXT:    # kill: def $al killed $al killed $eax
+; ALL-NEXT:    retq
+  %r = add i8 %a, undef
+  ret i8 %r
 }
 
 

Modified: llvm/trunk/test/CodeGen/X86/GlobalISel/xor-scalar.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/GlobalISel/xor-scalar.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/GlobalISel/xor-scalar.ll (original)
+++ llvm/trunk/test/CodeGen/X86/GlobalISel/xor-scalar.ll Wed Sep 19 11:59:08 2018
@@ -16,43 +16,45 @@ define i32 @test_xor_i1(i32 %arg1, i32 %
   ret i32 %ret
 }
 
-define i8 @test_xor_i8(i8 %arg1, i8 %arg2) {
-; ALL-LABEL: test_xor_i8:
-; ALL:       # %bb.0:
-; ALL-NEXT:    xorb %dil, %sil
-; ALL-NEXT:    movl %esi, %eax
-; ALL-NEXT:    retq
-  %ret = xor i8 %arg1, %arg2
-  ret i8 %ret
-}
-
-define i16 @test_xor_i16(i16 %arg1, i16 %arg2) {
-; ALL-LABEL: test_xor_i16:
-; ALL:       # %bb.0:
-; ALL-NEXT:    xorw %di, %si
-; ALL-NEXT:    movl %esi, %eax
-; ALL-NEXT:    retq
-  %ret = xor i16 %arg1, %arg2
-  ret i16 %ret
-}
-
-define i32 @test_xor_i32(i32 %arg1, i32 %arg2) {
-; ALL-LABEL: test_xor_i32:
-; ALL:       # %bb.0:
-; ALL-NEXT:    xorl %edi, %esi
-; ALL-NEXT:    movl %esi, %eax
-; ALL-NEXT:    retq
-  %ret = xor i32 %arg1, %arg2
-  ret i32 %ret
-}
-
-define i64 @test_xor_i64(i64 %arg1, i64 %arg2) {
-; ALL-LABEL: test_xor_i64:
-; ALL:       # %bb.0:
-; ALL-NEXT:    xorq %rdi, %rsi
-; ALL-NEXT:    movq %rsi, %rax
-; ALL-NEXT:    retq
-  %ret = xor i64 %arg1, %arg2
-  ret i64 %ret
+define i8 @test_xor_i8(i8 %arg1, i8 %arg2) {
+; ALL-LABEL: test_xor_i8:
+; ALL:       # %bb.0:
+; ALL-NEXT:    movl %esi, %eax
+; ALL-NEXT:    xorb %dil, %al
+; ALL-NEXT:    # kill: def $al killed $al killed $eax
+; ALL-NEXT:    retq
+  %ret = xor i8 %arg1, %arg2
+  ret i8 %ret
+}
+
+define i16 @test_xor_i16(i16 %arg1, i16 %arg2) {
+; ALL-LABEL: test_xor_i16:
+; ALL:       # %bb.0:
+; ALL-NEXT:    movl %esi, %eax
+; ALL-NEXT:    xorw %di, %ax
+; ALL-NEXT:    # kill: def $ax killed $ax killed $eax
+; ALL-NEXT:    retq
+  %ret = xor i16 %arg1, %arg2
+  ret i16 %ret
+}
+
+define i32 @test_xor_i32(i32 %arg1, i32 %arg2) {
+; ALL-LABEL: test_xor_i32:
+; ALL:       # %bb.0:
+; ALL-NEXT:    movl %esi, %eax
+; ALL-NEXT:    xorl %edi, %eax
+; ALL-NEXT:    retq
+  %ret = xor i32 %arg1, %arg2
+  ret i32 %ret
+}
+
+define i64 @test_xor_i64(i64 %arg1, i64 %arg2) {
+; ALL-LABEL: test_xor_i64:
+; ALL:       # %bb.0:
+; ALL-NEXT:    movq %rsi, %rax
+; ALL-NEXT:    xorq %rdi, %rax
+; ALL-NEXT:    retq
+  %ret = xor i64 %arg1, %arg2
+  ret i64 %ret
 }
 

Modified: llvm/trunk/test/CodeGen/X86/add.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/add.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/add.ll (original)
+++ llvm/trunk/test/CodeGen/X86/add.ll Wed Sep 19 11:59:08 2018
@@ -16,14 +16,14 @@ define i32 @test1(i32 inreg %a) nounwind
 ;
 ; X64-LINUX-LABEL: test1:
 ; X64-LINUX:       # %bb.0: # %entry
-; X64-LINUX-NEXT:    subl $-128, %edi
 ; X64-LINUX-NEXT:    movl %edi, %eax
+; X64-LINUX-NEXT:    subl $-128, %eax
 ; X64-LINUX-NEXT:    retq
 ;
 ; X64-WIN32-LABEL: test1:
 ; X64-WIN32:       # %bb.0: # %entry
-; X64-WIN32-NEXT:    subl $-128, %ecx
 ; X64-WIN32-NEXT:    movl %ecx, %eax
+; X64-WIN32-NEXT:    subl $-128, %eax
 ; X64-WIN32-NEXT:    retq
 entry:
   %b = add i32 %a, 128
@@ -38,14 +38,14 @@ define i64 @test2(i64 inreg %a) nounwind
 ;
 ; X64-LINUX-LABEL: test2:
 ; X64-LINUX:       # %bb.0: # %entry
-; X64-LINUX-NEXT:    subq $-2147483648, %rdi # imm = 0x80000000
 ; X64-LINUX-NEXT:    movq %rdi, %rax
+; X64-LINUX-NEXT:    subq $-2147483648, %rax # imm = 0x80000000
 ; X64-LINUX-NEXT:    retq
 ;
 ; X64-WIN32-LABEL: test2:
 ; X64-WIN32:       # %bb.0: # %entry
-; X64-WIN32-NEXT:    subq $-2147483648, %rcx # imm = 0x80000000
 ; X64-WIN32-NEXT:    movq %rcx, %rax
+; X64-WIN32-NEXT:    subq $-2147483648, %rax # imm = 0x80000000
 ; X64-WIN32-NEXT:    retq
 entry:
   %b = add i64 %a, 2147483648
@@ -60,14 +60,14 @@ define i64 @test3(i64 inreg %a) nounwind
 ;
 ; X64-LINUX-LABEL: test3:
 ; X64-LINUX:       # %bb.0: # %entry
-; X64-LINUX-NEXT:    subq $-128, %rdi
 ; X64-LINUX-NEXT:    movq %rdi, %rax
+; X64-LINUX-NEXT:    subq $-128, %rax
 ; X64-LINUX-NEXT:    retq
 ;
 ; X64-WIN32-LABEL: test3:
 ; X64-WIN32:       # %bb.0: # %entry
-; X64-WIN32-NEXT:    subq $-128, %rcx
 ; X64-WIN32-NEXT:    movq %rcx, %rax
+; X64-WIN32-NEXT:    subq $-128, %rax
 ; X64-WIN32-NEXT:    retq
 entry:
   %b = add i64 %a, 128
@@ -204,16 +204,16 @@ define {i32, i1} @test7(i32 %v1, i32 %v2
 ;
 ; X64-LINUX-LABEL: test7:
 ; X64-LINUX:       # %bb.0: # %entry
-; X64-LINUX-NEXT:    addl %esi, %edi
-; X64-LINUX-NEXT:    setb %dl
 ; X64-LINUX-NEXT:    movl %edi, %eax
+; X64-LINUX-NEXT:    addl %esi, %eax
+; X64-LINUX-NEXT:    setb %dl
 ; X64-LINUX-NEXT:    retq
 ;
 ; X64-WIN32-LABEL: test7:
 ; X64-WIN32:       # %bb.0: # %entry
-; X64-WIN32-NEXT:    addl %edx, %ecx
-; X64-WIN32-NEXT:    setb %dl
 ; X64-WIN32-NEXT:    movl %ecx, %eax
+; X64-WIN32-NEXT:    addl %edx, %eax
+; X64-WIN32-NEXT:    setb %dl
 ; X64-WIN32-NEXT:    retq
 entry:
   %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %v1, i32 %v2)
@@ -233,16 +233,16 @@ define {i64, i1} @test8(i64 %left, i64 %
 ;
 ; X64-LINUX-LABEL: test8:
 ; X64-LINUX:       # %bb.0: # %entry
-; X64-LINUX-NEXT:    addq %rsi, %rdi
-; X64-LINUX-NEXT:    setb %dl
 ; X64-LINUX-NEXT:    movq %rdi, %rax
+; X64-LINUX-NEXT:    addq %rsi, %rax
+; X64-LINUX-NEXT:    setb %dl
 ; X64-LINUX-NEXT:    retq
 ;
 ; X64-WIN32-LABEL: test8:
 ; X64-WIN32:       # %bb.0: # %entry
-; X64-WIN32-NEXT:    addq %rdx, %rcx
-; X64-WIN32-NEXT:    setb %dl
 ; X64-WIN32-NEXT:    movq %rcx, %rax
+; X64-WIN32-NEXT:    addq %rdx, %rax
+; X64-WIN32-NEXT:    setb %dl
 ; X64-WIN32-NEXT:    retq
 entry:
   %extleft = zext i64 %left to i65
@@ -268,20 +268,20 @@ define i32 @test9(i32 %x, i32 %y) nounwi
 ;
 ; X64-LINUX-LABEL: test9:
 ; X64-LINUX:       # %bb.0: # %entry
-; X64-LINUX-NEXT:    xorl %eax, %eax
-; X64-LINUX-NEXT:    cmpl $10, %edi
-; X64-LINUX-NEXT:    sete %al
-; X64-LINUX-NEXT:    subl %eax, %esi
 ; X64-LINUX-NEXT:    movl %esi, %eax
+; X64-LINUX-NEXT:    xorl %ecx, %ecx
+; X64-LINUX-NEXT:    cmpl $10, %edi
+; X64-LINUX-NEXT:    sete %cl
+; X64-LINUX-NEXT:    subl %ecx, %eax
 ; X64-LINUX-NEXT:    retq
 ;
 ; X64-WIN32-LABEL: test9:
 ; X64-WIN32:       # %bb.0: # %entry
-; X64-WIN32-NEXT:    xorl %eax, %eax
-; X64-WIN32-NEXT:    cmpl $10, %ecx
-; X64-WIN32-NEXT:    sete %al
-; X64-WIN32-NEXT:    subl %eax, %edx
 ; X64-WIN32-NEXT:    movl %edx, %eax
+; X64-WIN32-NEXT:    xorl %edx, %edx
+; X64-WIN32-NEXT:    cmpl $10, %ecx
+; X64-WIN32-NEXT:    sete %dl
+; X64-WIN32-NEXT:    subl %edx, %eax
 ; X64-WIN32-NEXT:    retq
 entry:
   %cmp = icmp eq i32 %x, 10
@@ -392,14 +392,14 @@ define i32 @inc_not(i32 %a) {
 ;
 ; X64-LINUX-LABEL: inc_not:
 ; X64-LINUX:       # %bb.0:
-; X64-LINUX-NEXT:    negl %edi
 ; X64-LINUX-NEXT:    movl %edi, %eax
+; X64-LINUX-NEXT:    negl %eax
 ; X64-LINUX-NEXT:    retq
 ;
 ; X64-WIN32-LABEL: inc_not:
 ; X64-WIN32:       # %bb.0:
-; X64-WIN32-NEXT:    negl %ecx
 ; X64-WIN32-NEXT:    movl %ecx, %eax
+; X64-WIN32-NEXT:    negl %eax
 ; X64-WIN32-NEXT:    retq
   %nota = xor i32 %a, -1
   %r = add i32 %nota, 1

Modified: llvm/trunk/test/CodeGen/X86/addcarry.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/addcarry.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/addcarry.ll (original)
+++ llvm/trunk/test/CodeGen/X86/addcarry.ll Wed Sep 19 11:59:08 2018
@@ -4,9 +4,9 @@
 define i128 @add128(i128 %a, i128 %b) nounwind {
 ; CHECK-LABEL: add128:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addq %rdx, %rdi
-; CHECK-NEXT:    adcq %rcx, %rsi
 ; CHECK-NEXT:    movq %rdi, %rax
+; CHECK-NEXT:    addq %rdx, %rax
+; CHECK-NEXT:    adcq %rcx, %rsi
 ; CHECK-NEXT:    movq %rsi, %rdx
 ; CHECK-NEXT:    retq
 entry:
@@ -43,6 +43,7 @@ entry:
 define i256 @add256(i256 %a, i256 %b) nounwind {
 ; CHECK-LABEL: add256:
 ; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movq %rdi, %rax
 ; CHECK-NEXT:    addq %r9, %rsi
 ; CHECK-NEXT:    adcq {{[0-9]+}}(%rsp), %rdx
 ; CHECK-NEXT:    adcq {{[0-9]+}}(%rsp), %rcx
@@ -51,7 +52,6 @@ define i256 @add256(i256 %a, i256 %b) no
 ; CHECK-NEXT:    movq %rsi, (%rdi)
 ; CHECK-NEXT:    movq %rcx, 16(%rdi)
 ; CHECK-NEXT:    movq %r8, 24(%rdi)
-; CHECK-NEXT:    movq %rdi, %rax
 ; CHECK-NEXT:    retq
 entry:
   %0 = add i256 %a, %b
@@ -197,6 +197,7 @@ define i8 @e(i32* nocapture %a, i32 %b)
 define %scalar @pr31719(%scalar* nocapture readonly %this, %scalar %arg.b) {
 ; CHECK-LABEL: pr31719:
 ; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movq %rdi, %rax
 ; CHECK-NEXT:    addq (%rsi), %rdx
 ; CHECK-NEXT:    adcq 8(%rsi), %rcx
 ; CHECK-NEXT:    adcq 16(%rsi), %r8
@@ -205,7 +206,6 @@ define %scalar @pr31719(%scalar* nocaptu
 ; CHECK-NEXT:    movq %rcx, 8(%rdi)
 ; CHECK-NEXT:    movq %r8, 16(%rdi)
 ; CHECK-NEXT:    movq %r9, 24(%rdi)
-; CHECK-NEXT:    movq %rdi, %rax
 ; CHECK-NEXT:    retq
 entry:
   %0 = extractvalue %scalar %arg.b, 0
@@ -292,9 +292,9 @@ entry:
 define i64 @shiftadd(i64 %a, i64 %b, i64 %c, i64 %d) {
 ; CHECK-LABEL: shiftadd:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addq %rsi, %rdi
-; CHECK-NEXT:    adcq %rcx, %rdx
 ; CHECK-NEXT:    movq %rdx, %rax
+; CHECK-NEXT:    addq %rsi, %rdi
+; CHECK-NEXT:    adcq %rcx, %rax
 ; CHECK-NEXT:    retq
 entry:
   %0 = zext i64 %a to i128
@@ -312,23 +312,23 @@ entry:
 define %S @readd(%S* nocapture readonly %this, %S %arg.b) {
 ; CHECK-LABEL: readd:
 ; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movq %rdi, %rax
 ; CHECK-NEXT:    addq (%rsi), %rdx
-; CHECK-NEXT:    movq 8(%rsi), %r10
-; CHECK-NEXT:    adcq $0, %r10
-; CHECK-NEXT:    setb %al
-; CHECK-NEXT:    movzbl %al, %eax
-; CHECK-NEXT:    addq %rcx, %r10
-; CHECK-NEXT:    adcq 16(%rsi), %rax
+; CHECK-NEXT:    movq 8(%rsi), %r11
+; CHECK-NEXT:    adcq $0, %r11
+; CHECK-NEXT:    setb %r10b
+; CHECK-NEXT:    movzbl %r10b, %edi
+; CHECK-NEXT:    addq %rcx, %r11
+; CHECK-NEXT:    adcq 16(%rsi), %rdi
 ; CHECK-NEXT:    setb %cl
 ; CHECK-NEXT:    movzbl %cl, %ecx
-; CHECK-NEXT:    addq %r8, %rax
+; CHECK-NEXT:    addq %r8, %rdi
 ; CHECK-NEXT:    adcq 24(%rsi), %rcx
 ; CHECK-NEXT:    addq %r9, %rcx
-; CHECK-NEXT:    movq %rdx, (%rdi)
-; CHECK-NEXT:    movq %r10, 8(%rdi)
-; CHECK-NEXT:    movq %rax, 16(%rdi)
-; CHECK-NEXT:    movq %rcx, 24(%rdi)
-; CHECK-NEXT:    movq %rdi, %rax
+; CHECK-NEXT:    movq %rdx, (%rax)
+; CHECK-NEXT:    movq %r11, 8(%rax)
+; CHECK-NEXT:    movq %rdi, 16(%rax)
+; CHECK-NEXT:    movq %rcx, 24(%rax)
 ; CHECK-NEXT:    retq
 entry:
   %0 = extractvalue %S %arg.b, 0
@@ -377,10 +377,10 @@ entry:
 define i128 @addcarry1_not(i128 %n) {
 ; CHECK-LABEL: addcarry1_not:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    movq %rdi, %rax
 ; CHECK-NEXT:    xorl %edx, %edx
-; CHECK-NEXT:    negq %rdi
+; CHECK-NEXT:    negq %rax
 ; CHECK-NEXT:    sbbq %rsi, %rdx
-; CHECK-NEXT:    movq %rdi, %rax
 ; CHECK-NEXT:    retq
   %1 = xor i128 %n, -1
   %2 = add i128 %1, 1

Modified: llvm/trunk/test/CodeGen/X86/and-encoding.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/and-encoding.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/and-encoding.ll (original)
+++ llvm/trunk/test/CodeGen/X86/and-encoding.ll Wed Sep 19 11:59:08 2018
@@ -46,9 +46,9 @@ define void @f3(i32 %x, i1 *%y) nounwind
 define i32 @lopped32_32to8(i32 %x) {
 ; CHECK-LABEL: lopped32_32to8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    shrl $4, %edi # encoding: [0xc1,0xef,0x04]
-; CHECK-NEXT:    andl $-16, %edi # encoding: [0x83,0xe7,0xf0]
 ; CHECK-NEXT:    movl %edi, %eax # encoding: [0x89,0xf8]
+; CHECK-NEXT:    shrl $4, %eax # encoding: [0xc1,0xe8,0x04]
+; CHECK-NEXT:    andl $-16, %eax # encoding: [0x83,0xe0,0xf0]
 ; CHECK-NEXT:    retq # encoding: [0xc3]
   %shr = lshr i32 %x, 4
   %and = and i32 %shr, 268435440
@@ -60,9 +60,9 @@ define i32 @lopped32_32to8(i32 %x) {
 define i64 @lopped64_32to8(i64 %x) {
 ; CHECK-LABEL: lopped64_32to8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    shrq $36, %rdi # encoding: [0x48,0xc1,0xef,0x24]
-; CHECK-NEXT:    andl $-16, %edi # encoding: [0x83,0xe7,0xf0]
 ; CHECK-NEXT:    movq %rdi, %rax # encoding: [0x48,0x89,0xf8]
+; CHECK-NEXT:    shrq $36, %rax # encoding: [0x48,0xc1,0xe8,0x24]
+; CHECK-NEXT:    andl $-16, %eax # encoding: [0x83,0xe0,0xf0]
 ; CHECK-NEXT:    retq # encoding: [0xc3]
   %shr = lshr i64 %x, 36
   %and = and i64 %shr, 268435440
@@ -74,9 +74,9 @@ define i64 @lopped64_32to8(i64 %x) {
 define i64 @lopped64_64to8(i64 %x) {
 ; CHECK-LABEL: lopped64_64to8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    shrq $4, %rdi # encoding: [0x48,0xc1,0xef,0x04]
-; CHECK-NEXT:    andq $-16, %rdi # encoding: [0x48,0x83,0xe7,0xf0]
 ; CHECK-NEXT:    movq %rdi, %rax # encoding: [0x48,0x89,0xf8]
+; CHECK-NEXT:    shrq $4, %rax # encoding: [0x48,0xc1,0xe8,0x04]
+; CHECK-NEXT:    andq $-16, %rax # encoding: [0x48,0x83,0xe0,0xf0]
 ; CHECK-NEXT:    retq # encoding: [0xc3]
   %shr = lshr i64 %x, 4
   %and = and i64 %shr, 1152921504606846960
@@ -88,10 +88,10 @@ define i64 @lopped64_64to8(i64 %x) {
 define i64 @lopped64_64to32(i64 %x) {
 ; CHECK-LABEL: lopped64_64to32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    shrq $4, %rdi # encoding: [0x48,0xc1,0xef,0x04]
-; CHECK-NEXT:    andq $-983056, %rdi # encoding: [0x48,0x81,0xe7,0xf0,0xff,0xf0,0xff]
-; CHECK-NEXT:    # imm = 0xFFF0FFF0
 ; CHECK-NEXT:    movq %rdi, %rax # encoding: [0x48,0x89,0xf8]
+; CHECK-NEXT:    shrq $4, %rax # encoding: [0x48,0xc1,0xe8,0x04]
+; CHECK-NEXT:    andq $-983056, %rax # encoding: [0x48,0x25,0xf0,0xff,0xf0,0xff]
+; CHECK-NEXT:    # imm = 0xFFF0FFF0
 ; CHECK-NEXT:    retq # encoding: [0xc3]
   %shr = lshr i64 %x, 4
   %and = and i64 %shr, 1152921504605863920

Modified: llvm/trunk/test/CodeGen/X86/andimm8.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/andimm8.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/andimm8.ll (original)
+++ llvm/trunk/test/CodeGen/X86/andimm8.ll Wed Sep 19 11:59:08 2018
@@ -14,9 +14,8 @@ define i64 @bra(i32 %zed) nounwind {
 ;
 ; X64-LABEL: bra:
 ; X64:       # %bb.0:
-; X64-NEXT:    # kill: def $edi killed $edi def $rdi
-; X64-NEXT:    andl $-64, %edi # encoding: [0x83,0xe7,0xc0]
-; X64-NEXT:    movq %rdi, %rax # encoding: [0x48,0x89,0xf8]
+; X64-NEXT:    movl %edi, %eax # encoding: [0x89,0xf8]
+; X64-NEXT:    andl $-64, %eax # encoding: [0x83,0xe0,0xc0]
 ; X64-NEXT:    retq # encoding: [0xc3]
  %t1 = zext i32 %zed to i64
  %t2 = and i64  %t1, 4294967232
@@ -57,8 +56,8 @@ define i64 @bar(i64 %zed) nounwind {
 ;
 ; X64-LABEL: bar:
 ; X64:       # %bb.0:
-; X64-NEXT:    andl $42, %edi # encoding: [0x83,0xe7,0x2a]
 ; X64-NEXT:    movq %rdi, %rax # encoding: [0x48,0x89,0xf8]
+; X64-NEXT:    andl $42, %eax # encoding: [0x83,0xe0,0x2a]
 ; X64-NEXT:    retq # encoding: [0xc3]
   %t1 = and i64 %zed, 42
   ret i64 %t1
@@ -75,9 +74,9 @@ define i64 @baz(i64 %zed) nounwind {
 ;
 ; X64-LABEL: baz:
 ; X64:       # %bb.0:
-; X64-NEXT:    andl $2147483647, %edi # encoding: [0x81,0xe7,0xff,0xff,0xff,0x7f]
-; X64-NEXT:    # imm = 0x7FFFFFFF
 ; X64-NEXT:    movq %rdi, %rax # encoding: [0x48,0x89,0xf8]
+; X64-NEXT:    andl $2147483647, %eax # encoding: [0x25,0xff,0xff,0xff,0x7f]
+; X64-NEXT:    # imm = 0x7FFFFFFF
 ; X64-NEXT:    retq # encoding: [0xc3]
   %t1 = and i64 %zed, 2147483647
   ret i64 %t1

Modified: llvm/trunk/test/CodeGen/X86/anyext.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/anyext.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/anyext.ll (original)
+++ llvm/trunk/test/CodeGen/X86/anyext.ll Wed Sep 19 11:59:08 2018
@@ -41,8 +41,9 @@ define i32 @bar(i32 %p, i16 zeroext %x)
 ;
 ; X64-LABEL: bar:
 ; X64:       # %bb.0:
-; X64-NEXT:    xorl %edx, %edx
 ; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    xorl %edx, %edx
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    divw %si
 ; X64-NEXT:    # kill: def $ax killed $ax def $eax
 ; X64-NEXT:    andl $1, %eax

Modified: llvm/trunk/test/CodeGen/X86/apm.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/apm.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/apm.ll (original)
+++ llvm/trunk/test/CodeGen/X86/apm.ll Wed Sep 19 11:59:08 2018
@@ -17,8 +17,8 @@ define void @foo(i8* %P, i32 %E, i32 %H)
 ;
 ; X64-LABEL: foo:
 ; X64:       # %bb.0: # %entry
-; X64-NEXT:    leaq (%rdi), %rax
 ; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    leaq (%rdi), %rax
 ; X64-NEXT:    monitor
 ; X64-NEXT:    retq
 ;
@@ -46,8 +46,8 @@ define void @bar(i32 %E, i32 %H) nounwin
 ;
 ; X64-LABEL: bar:
 ; X64:       # %bb.0: # %entry
-; X64-NEXT:    movl %edi, %ecx
 ; X64-NEXT:    movl %esi, %eax
+; X64-NEXT:    movl %edi, %ecx
 ; X64-NEXT:    mwait
 ; X64-NEXT:    retq
 ;

Modified: llvm/trunk/test/CodeGen/X86/atomic-eflags-reuse.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/atomic-eflags-reuse.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/atomic-eflags-reuse.ll (original)
+++ llvm/trunk/test/CodeGen/X86/atomic-eflags-reuse.ll Wed Sep 19 11:59:08 2018
@@ -5,16 +5,16 @@
 define i32 @test_add_1_cmov_slt(i64* %p, i32 %a0, i32 %a1) #0 {
 ; FASTINCDEC-LABEL: test_add_1_cmov_slt:
 ; FASTINCDEC:       # %bb.0: # %entry
-; FASTINCDEC-NEXT:    lock incq (%rdi)
-; FASTINCDEC-NEXT:    cmovgl %edx, %esi
 ; FASTINCDEC-NEXT:    movl %esi, %eax
+; FASTINCDEC-NEXT:    lock incq (%rdi)
+; FASTINCDEC-NEXT:    cmovgl %edx, %eax
 ; FASTINCDEC-NEXT:    retq
 ;
 ; SLOWINCDEC-LABEL: test_add_1_cmov_slt:
 ; SLOWINCDEC:       # %bb.0: # %entry
-; SLOWINCDEC-NEXT:    lock addq $1, (%rdi)
-; SLOWINCDEC-NEXT:    cmovgl %edx, %esi
 ; SLOWINCDEC-NEXT:    movl %esi, %eax
+; SLOWINCDEC-NEXT:    lock addq $1, (%rdi)
+; SLOWINCDEC-NEXT:    cmovgl %edx, %eax
 ; SLOWINCDEC-NEXT:    retq
 entry:
   %tmp0 = atomicrmw add i64* %p, i64 1 seq_cst
@@ -26,16 +26,16 @@ entry:
 define i32 @test_add_1_cmov_sge(i64* %p, i32 %a0, i32 %a1) #0 {
 ; FASTINCDEC-LABEL: test_add_1_cmov_sge:
 ; FASTINCDEC:       # %bb.0: # %entry
-; FASTINCDEC-NEXT:    lock incq (%rdi)
-; FASTINCDEC-NEXT:    cmovlel %edx, %esi
 ; FASTINCDEC-NEXT:    movl %esi, %eax
+; FASTINCDEC-NEXT:    lock incq (%rdi)
+; FASTINCDEC-NEXT:    cmovlel %edx, %eax
 ; FASTINCDEC-NEXT:    retq
 ;
 ; SLOWINCDEC-LABEL: test_add_1_cmov_sge:
 ; SLOWINCDEC:       # %bb.0: # %entry
-; SLOWINCDEC-NEXT:    lock addq $1, (%rdi)
-; SLOWINCDEC-NEXT:    cmovlel %edx, %esi
 ; SLOWINCDEC-NEXT:    movl %esi, %eax
+; SLOWINCDEC-NEXT:    lock addq $1, (%rdi)
+; SLOWINCDEC-NEXT:    cmovlel %edx, %eax
 ; SLOWINCDEC-NEXT:    retq
 entry:
   %tmp0 = atomicrmw add i64* %p, i64 1 seq_cst
@@ -47,16 +47,16 @@ entry:
 define i32 @test_sub_1_cmov_sle(i64* %p, i32 %a0, i32 %a1) #0 {
 ; FASTINCDEC-LABEL: test_sub_1_cmov_sle:
 ; FASTINCDEC:       # %bb.0: # %entry
-; FASTINCDEC-NEXT:    lock decq (%rdi)
-; FASTINCDEC-NEXT:    cmovgel %edx, %esi
 ; FASTINCDEC-NEXT:    movl %esi, %eax
+; FASTINCDEC-NEXT:    lock decq (%rdi)
+; FASTINCDEC-NEXT:    cmovgel %edx, %eax
 ; FASTINCDEC-NEXT:    retq
 ;
 ; SLOWINCDEC-LABEL: test_sub_1_cmov_sle:
 ; SLOWINCDEC:       # %bb.0: # %entry
-; SLOWINCDEC-NEXT:    lock addq $-1, (%rdi)
-; SLOWINCDEC-NEXT:    cmovgel %edx, %esi
 ; SLOWINCDEC-NEXT:    movl %esi, %eax
+; SLOWINCDEC-NEXT:    lock addq $-1, (%rdi)
+; SLOWINCDEC-NEXT:    cmovgel %edx, %eax
 ; SLOWINCDEC-NEXT:    retq
 entry:
   %tmp0 = atomicrmw sub i64* %p, i64 1 seq_cst
@@ -68,16 +68,16 @@ entry:
 define i32 @test_sub_1_cmov_sgt(i64* %p, i32 %a0, i32 %a1) #0 {
 ; FASTINCDEC-LABEL: test_sub_1_cmov_sgt:
 ; FASTINCDEC:       # %bb.0: # %entry
-; FASTINCDEC-NEXT:    lock decq (%rdi)
-; FASTINCDEC-NEXT:    cmovll %edx, %esi
 ; FASTINCDEC-NEXT:    movl %esi, %eax
+; FASTINCDEC-NEXT:    lock decq (%rdi)
+; FASTINCDEC-NEXT:    cmovll %edx, %eax
 ; FASTINCDEC-NEXT:    retq
 ;
 ; SLOWINCDEC-LABEL: test_sub_1_cmov_sgt:
 ; SLOWINCDEC:       # %bb.0: # %entry
-; SLOWINCDEC-NEXT:    lock addq $-1, (%rdi)
-; SLOWINCDEC-NEXT:    cmovll %edx, %esi
 ; SLOWINCDEC-NEXT:    movl %esi, %eax
+; SLOWINCDEC-NEXT:    lock addq $-1, (%rdi)
+; SLOWINCDEC-NEXT:    cmovll %edx, %eax
 ; SLOWINCDEC-NEXT:    retq
 entry:
   %tmp0 = atomicrmw sub i64* %p, i64 1 seq_cst
@@ -159,11 +159,11 @@ f:
 define i32 @test_add_1_cmov_sle(i64* %p, i32 %a0, i32 %a1) #0 {
 ; CHECK-LABEL: test_add_1_cmov_sle:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    movl $1, %eax
-; CHECK-NEXT:    lock xaddq %rax, (%rdi)
-; CHECK-NEXT:    testq %rax, %rax
-; CHECK-NEXT:    cmovgl %edx, %esi
 ; CHECK-NEXT:    movl %esi, %eax
+; CHECK-NEXT:    movl $1, %ecx
+; CHECK-NEXT:    lock xaddq %rcx, (%rdi)
+; CHECK-NEXT:    testq %rcx, %rcx
+; CHECK-NEXT:    cmovgl %edx, %eax
 ; CHECK-NEXT:    retq
 entry:
   %tmp0 = atomicrmw add i64* %p, i64 1 seq_cst
@@ -175,11 +175,11 @@ entry:
 define i32 @test_add_1_cmov_sgt(i64* %p, i32 %a0, i32 %a1) #0 {
 ; CHECK-LABEL: test_add_1_cmov_sgt:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    movl $1, %eax
-; CHECK-NEXT:    lock xaddq %rax, (%rdi)
-; CHECK-NEXT:    testq %rax, %rax
-; CHECK-NEXT:    cmovlel %edx, %esi
 ; CHECK-NEXT:    movl %esi, %eax
+; CHECK-NEXT:    movl $1, %ecx
+; CHECK-NEXT:    lock xaddq %rcx, (%rdi)
+; CHECK-NEXT:    testq %rcx, %rcx
+; CHECK-NEXT:    cmovlel %edx, %eax
 ; CHECK-NEXT:    retq
 entry:
   %tmp0 = atomicrmw add i64* %p, i64 1 seq_cst

Modified: llvm/trunk/test/CodeGen/X86/atomic128.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/atomic128.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/atomic128.ll (original)
+++ llvm/trunk/test/CodeGen/X86/atomic128.ll Wed Sep 19 11:59:08 2018
@@ -12,10 +12,9 @@ define i128 @val_compare_and_swap(i128*
 ; CHECK-NEXT:    pushq %rbx
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    .cfi_offset %rbx, -16
-; CHECK-NEXT:    movq %rcx, %r9
+; CHECK-NEXT:    movq %rcx, %rbx
 ; CHECK-NEXT:    movq %rsi, %rax
 ; CHECK-NEXT:    movq %r8, %rcx
-; CHECK-NEXT:    movq %r9, %rbx
 ; CHECK-NEXT:    lock cmpxchg16b (%rdi)
 ; CHECK-NEXT:    popq %rbx
 ; CHECK-NEXT:    retq

Modified: llvm/trunk/test/CodeGen/X86/avg.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avg.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avg.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avg.ll Wed Sep 19 11:59:08 2018
@@ -1638,6 +1638,7 @@ define <64 x i8> @avg_v64i8_3(<64 x i8>
 define <512 x i8> @avg_v512i8_3(<512 x i8> %a, <512 x i8> %b) nounwind {
 ; SSE2-LABEL: avg_v512i8_3:
 ; SSE2:       # %bb.0:
+; SSE2-NEXT:    movq %rdi, %rax
 ; SSE2-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm8
 ; SSE2-NEXT:    pavgb {{[0-9]+}}(%rsp), %xmm8
 ; SSE2-NEXT:    movdqa %xmm8, 496(%rdi)
@@ -1726,7 +1727,6 @@ define <512 x i8> @avg_v512i8_3(<512 x i
 ; SSE2-NEXT:    movdqa %xmm1, 16(%rdi)
 ; SSE2-NEXT:    pavgb {{[0-9]+}}(%rsp), %xmm0
 ; SSE2-NEXT:    movdqa %xmm0, (%rdi)
-; SSE2-NEXT:    movq %rdi, %rax
 ; SSE2-NEXT:    retq
 ;
 ; AVX1-LABEL: avg_v512i8_3:
@@ -1735,6 +1735,7 @@ define <512 x i8> @avg_v512i8_3(<512 x i
 ; AVX1-NEXT:    movq %rsp, %rbp
 ; AVX1-NEXT:    andq $-32, %rsp
 ; AVX1-NEXT:    subq $128, %rsp
+; AVX1-NEXT:    movq %rdi, %rax
 ; AVX1-NEXT:    vmovdqa 144(%rbp), %ymm8
 ; AVX1-NEXT:    vmovdqa 112(%rbp), %ymm9
 ; AVX1-NEXT:    vmovdqa 80(%rbp), %ymm10
@@ -1861,7 +1862,6 @@ define <512 x i8> @avg_v512i8_3(<512 x i
 ; AVX1-NEXT:    vmovaps %ymm0, 32(%rdi)
 ; AVX1-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
 ; AVX1-NEXT:    vmovaps %ymm0, (%rdi)
-; AVX1-NEXT:    movq %rdi, %rax
 ; AVX1-NEXT:    movq %rbp, %rsp
 ; AVX1-NEXT:    popq %rbp
 ; AVX1-NEXT:    vzeroupper
@@ -1873,6 +1873,7 @@ define <512 x i8> @avg_v512i8_3(<512 x i
 ; AVX2-NEXT:    movq %rsp, %rbp
 ; AVX2-NEXT:    andq $-32, %rsp
 ; AVX2-NEXT:    subq $32, %rsp
+; AVX2-NEXT:    movq %rdi, %rax
 ; AVX2-NEXT:    vmovdqa 240(%rbp), %ymm8
 ; AVX2-NEXT:    vmovdqa 208(%rbp), %ymm9
 ; AVX2-NEXT:    vmovdqa 176(%rbp), %ymm10
@@ -1913,7 +1914,6 @@ define <512 x i8> @avg_v512i8_3(<512 x i
 ; AVX2-NEXT:    vmovdqa %ymm2, 64(%rdi)
 ; AVX2-NEXT:    vmovdqa %ymm1, 32(%rdi)
 ; AVX2-NEXT:    vmovdqa %ymm0, (%rdi)
-; AVX2-NEXT:    movq %rdi, %rax
 ; AVX2-NEXT:    movq %rbp, %rsp
 ; AVX2-NEXT:    popq %rbp
 ; AVX2-NEXT:    vzeroupper
@@ -1925,6 +1925,7 @@ define <512 x i8> @avg_v512i8_3(<512 x i
 ; AVX512F-NEXT:    movq %rsp, %rbp
 ; AVX512F-NEXT:    andq $-32, %rsp
 ; AVX512F-NEXT:    subq $32, %rsp
+; AVX512F-NEXT:    movq %rdi, %rax
 ; AVX512F-NEXT:    vmovdqa 240(%rbp), %ymm8
 ; AVX512F-NEXT:    vmovdqa 208(%rbp), %ymm9
 ; AVX512F-NEXT:    vmovdqa 176(%rbp), %ymm10
@@ -1965,7 +1966,6 @@ define <512 x i8> @avg_v512i8_3(<512 x i
 ; AVX512F-NEXT:    vmovdqa %ymm2, 64(%rdi)
 ; AVX512F-NEXT:    vmovdqa %ymm1, 32(%rdi)
 ; AVX512F-NEXT:    vmovdqa %ymm0, (%rdi)
-; AVX512F-NEXT:    movq %rdi, %rax
 ; AVX512F-NEXT:    movq %rbp, %rsp
 ; AVX512F-NEXT:    popq %rbp
 ; AVX512F-NEXT:    vzeroupper
@@ -1977,6 +1977,7 @@ define <512 x i8> @avg_v512i8_3(<512 x i
 ; AVX512BW-NEXT:    movq %rsp, %rbp
 ; AVX512BW-NEXT:    andq $-64, %rsp
 ; AVX512BW-NEXT:    subq $64, %rsp
+; AVX512BW-NEXT:    movq %rdi, %rax
 ; AVX512BW-NEXT:    vpavgb 16(%rbp), %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpavgb 80(%rbp), %zmm1, %zmm1
 ; AVX512BW-NEXT:    vpavgb 144(%rbp), %zmm2, %zmm2
@@ -1993,7 +1994,6 @@ define <512 x i8> @avg_v512i8_3(<512 x i
 ; AVX512BW-NEXT:    vmovdqa64 %zmm2, 128(%rdi)
 ; AVX512BW-NEXT:    vmovdqa64 %zmm1, 64(%rdi)
 ; AVX512BW-NEXT:    vmovdqa64 %zmm0, (%rdi)
-; AVX512BW-NEXT:    movq %rdi, %rax
 ; AVX512BW-NEXT:    movq %rbp, %rsp
 ; AVX512BW-NEXT:    popq %rbp
 ; AVX512BW-NEXT:    vzeroupper

Modified: llvm/trunk/test/CodeGen/X86/avoid-sfb.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avoid-sfb.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avoid-sfb.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avoid-sfb.ll Wed Sep 19 11:59:08 2018
@@ -727,28 +727,29 @@ if.end:
 define void @test_stack(%struct.S6* noalias nocapture sret %agg.result, %struct.S6* byval nocapture readnone align 8 %s1, %struct.S6* byval nocapture align 8 %s2, i32 %x) local_unnamed_addr #0 {
 ; CHECK-LABEL: test_stack:
 ; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movq %rdi, %rax
 ; CHECK-NEXT:    movl %esi, {{[0-9]+}}(%rsp)
 ; CHECK-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm0
 ; CHECK-NEXT:    movups %xmm0, (%rdi)
-; CHECK-NEXT:    movq {{[0-9]+}}(%rsp), %rax
-; CHECK-NEXT:    movq %rax, 16(%rdi)
-; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %eax
-; CHECK-NEXT:    movl %eax, 24(%rdi)
-; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %eax
-; CHECK-NEXT:    movl %eax, 28(%rdi)
-; CHECK-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm0
-; CHECK-NEXT:    movq {{[0-9]+}}(%rsp), %rax
+; CHECK-NEXT:    movq {{[0-9]+}}(%rsp), %rcx
+; CHECK-NEXT:    movq %rcx, 16(%rdi)
+; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %ecx
+; CHECK-NEXT:    movl %ecx, 24(%rdi)
 ; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %ecx
+; CHECK-NEXT:    movl %ecx, 28(%rdi)
+; CHECK-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm0
+; CHECK-NEXT:    movq {{[0-9]+}}(%rsp), %rcx
 ; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %edx
+; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %esi
 ; CHECK-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
-; CHECK-NEXT:    movq %rax, {{[0-9]+}}(%rsp)
-; CHECK-NEXT:    movl %ecx, {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    movq %rcx, {{[0-9]+}}(%rsp)
 ; CHECK-NEXT:    movl %edx, {{[0-9]+}}(%rsp)
-; CHECK-NEXT:    movq %rdi, %rax
+; CHECK-NEXT:    movl %esi, {{[0-9]+}}(%rsp)
 ; CHECK-NEXT:    retq
 ;
 ; DISABLED-LABEL: test_stack:
 ; DISABLED:       # %bb.0: # %entry
+; DISABLED-NEXT:    movq %rdi, %rax
 ; DISABLED-NEXT:    movl %esi, {{[0-9]+}}(%rsp)
 ; DISABLED-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm0
 ; DISABLED-NEXT:    movups %xmm0, (%rdi)
@@ -758,51 +759,50 @@ define void @test_stack(%struct.S6* noal
 ; DISABLED-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm1
 ; DISABLED-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; DISABLED-NEXT:    movaps %xmm1, {{[0-9]+}}(%rsp)
-; DISABLED-NEXT:    movq %rdi, %rax
 ; DISABLED-NEXT:    retq
 ;
 ; CHECK-AVX2-LABEL: test_stack:
 ; CHECK-AVX2:       # %bb.0: # %entry
+; CHECK-AVX2-NEXT:    movq %rdi, %rax
 ; CHECK-AVX2-NEXT:    movl %esi, {{[0-9]+}}(%rsp)
 ; CHECK-AVX2-NEXT:    vmovups {{[0-9]+}}(%rsp), %xmm0
 ; CHECK-AVX2-NEXT:    vmovups %xmm0, (%rdi)
-; CHECK-AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %rax
-; CHECK-AVX2-NEXT:    movq %rax, 16(%rdi)
-; CHECK-AVX2-NEXT:    movl {{[0-9]+}}(%rsp), %eax
-; CHECK-AVX2-NEXT:    movl %eax, 24(%rdi)
-; CHECK-AVX2-NEXT:    movl {{[0-9]+}}(%rsp), %eax
-; CHECK-AVX2-NEXT:    movl %eax, 28(%rdi)
+; CHECK-AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %rcx
+; CHECK-AVX2-NEXT:    movq %rcx, 16(%rdi)
+; CHECK-AVX2-NEXT:    movl {{[0-9]+}}(%rsp), %ecx
+; CHECK-AVX2-NEXT:    movl %ecx, 24(%rdi)
+; CHECK-AVX2-NEXT:    movl {{[0-9]+}}(%rsp), %ecx
+; CHECK-AVX2-NEXT:    movl %ecx, 28(%rdi)
 ; CHECK-AVX2-NEXT:    vmovups {{[0-9]+}}(%rsp), %xmm0
 ; CHECK-AVX2-NEXT:    vmovups %xmm0, {{[0-9]+}}(%rsp)
-; CHECK-AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %rax
-; CHECK-AVX2-NEXT:    movq %rax, {{[0-9]+}}(%rsp)
-; CHECK-AVX2-NEXT:    movl {{[0-9]+}}(%rsp), %eax
-; CHECK-AVX2-NEXT:    movl %eax, {{[0-9]+}}(%rsp)
-; CHECK-AVX2-NEXT:    movl {{[0-9]+}}(%rsp), %eax
-; CHECK-AVX2-NEXT:    movl %eax, {{[0-9]+}}(%rsp)
-; CHECK-AVX2-NEXT:    movq %rdi, %rax
+; CHECK-AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %rcx
+; CHECK-AVX2-NEXT:    movq %rcx, {{[0-9]+}}(%rsp)
+; CHECK-AVX2-NEXT:    movl {{[0-9]+}}(%rsp), %ecx
+; CHECK-AVX2-NEXT:    movl %ecx, {{[0-9]+}}(%rsp)
+; CHECK-AVX2-NEXT:    movl {{[0-9]+}}(%rsp), %ecx
+; CHECK-AVX2-NEXT:    movl %ecx, {{[0-9]+}}(%rsp)
 ; CHECK-AVX2-NEXT:    retq
 ;
 ; CHECK-AVX512-LABEL: test_stack:
 ; CHECK-AVX512:       # %bb.0: # %entry
+; CHECK-AVX512-NEXT:    movq %rdi, %rax
 ; CHECK-AVX512-NEXT:    movl %esi, {{[0-9]+}}(%rsp)
 ; CHECK-AVX512-NEXT:    vmovups {{[0-9]+}}(%rsp), %xmm0
 ; CHECK-AVX512-NEXT:    vmovups %xmm0, (%rdi)
-; CHECK-AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %rax
-; CHECK-AVX512-NEXT:    movq %rax, 16(%rdi)
-; CHECK-AVX512-NEXT:    movl {{[0-9]+}}(%rsp), %eax
-; CHECK-AVX512-NEXT:    movl %eax, 24(%rdi)
-; CHECK-AVX512-NEXT:    movl {{[0-9]+}}(%rsp), %eax
-; CHECK-AVX512-NEXT:    movl %eax, 28(%rdi)
+; CHECK-AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %rcx
+; CHECK-AVX512-NEXT:    movq %rcx, 16(%rdi)
+; CHECK-AVX512-NEXT:    movl {{[0-9]+}}(%rsp), %ecx
+; CHECK-AVX512-NEXT:    movl %ecx, 24(%rdi)
+; CHECK-AVX512-NEXT:    movl {{[0-9]+}}(%rsp), %ecx
+; CHECK-AVX512-NEXT:    movl %ecx, 28(%rdi)
 ; CHECK-AVX512-NEXT:    vmovups {{[0-9]+}}(%rsp), %xmm0
 ; CHECK-AVX512-NEXT:    vmovups %xmm0, {{[0-9]+}}(%rsp)
-; CHECK-AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %rax
-; CHECK-AVX512-NEXT:    movq %rax, {{[0-9]+}}(%rsp)
-; CHECK-AVX512-NEXT:    movl {{[0-9]+}}(%rsp), %eax
-; CHECK-AVX512-NEXT:    movl %eax, {{[0-9]+}}(%rsp)
-; CHECK-AVX512-NEXT:    movl {{[0-9]+}}(%rsp), %eax
-; CHECK-AVX512-NEXT:    movl %eax, {{[0-9]+}}(%rsp)
-; CHECK-AVX512-NEXT:    movq %rdi, %rax
+; CHECK-AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %rcx
+; CHECK-AVX512-NEXT:    movq %rcx, {{[0-9]+}}(%rsp)
+; CHECK-AVX512-NEXT:    movl {{[0-9]+}}(%rsp), %ecx
+; CHECK-AVX512-NEXT:    movl %ecx, {{[0-9]+}}(%rsp)
+; CHECK-AVX512-NEXT:    movl {{[0-9]+}}(%rsp), %ecx
+; CHECK-AVX512-NEXT:    movl %ecx, {{[0-9]+}}(%rsp)
 ; CHECK-AVX512-NEXT:    retq
 entry:
   %s6.sroa.0.0..sroa_cast1 = bitcast %struct.S6* %s2 to i8*

Modified: llvm/trunk/test/CodeGen/X86/avx-intel-ocl.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-intel-ocl.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx-intel-ocl.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx-intel-ocl.ll Wed Sep 19 11:59:08 2018
@@ -122,8 +122,8 @@ define intel_ocl_bicc <16 x float> @test
 
 ; pass parameters in registers for 64-bit platform
 ; X64-LABEL: test_int
-; X64: leal {{.*}}, %edi
 ; X64: movl {{.*}}, %esi
+; X64: leal {{.*}}, %edi
 ; X64: call
 ; X64: addl {{.*}}, %eax
 define i32 @test_int(i32 %a, i32 %b) nounwind {

Modified: llvm/trunk/test/CodeGen/X86/avx-vinsertf128.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-vinsertf128.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx-vinsertf128.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx-vinsertf128.ll Wed Sep 19 11:59:08 2018
@@ -75,8 +75,7 @@ define <8 x i32> @DAGCombineB(<8 x i32>
 define <4 x double> @insert_undef_pd(<4 x double> %a0, <2 x double> %a1) {
 ; CHECK-LABEL: insert_undef_pd:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    # kill: def $xmm1 killed $xmm1 def $ymm1
-; CHECK-NEXT:    vmovaps %ymm1, %ymm0
+; CHECK-NEXT:    vmovaps %xmm1, %xmm0
 ; CHECK-NEXT:    retq
 %res = call <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double> undef, <2 x double> %a1, i8 0)
 ret <4 x double> %res
@@ -86,8 +85,7 @@ declare <4 x double> @llvm.x86.avx.vinse
 define <8 x float> @insert_undef_ps(<8 x float> %a0, <4 x float> %a1) {
 ; CHECK-LABEL: insert_undef_ps:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    # kill: def $xmm1 killed $xmm1 def $ymm1
-; CHECK-NEXT:    vmovaps %ymm1, %ymm0
+; CHECK-NEXT:    vmovaps %xmm1, %xmm0
 ; CHECK-NEXT:    retq
 %res = call <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float> undef, <4 x float> %a1, i8 0)
 ret <8 x float> %res
@@ -97,8 +95,7 @@ declare <8 x float> @llvm.x86.avx.vinser
 define <8 x i32> @insert_undef_si(<8 x i32> %a0, <4 x i32> %a1) {
 ; CHECK-LABEL: insert_undef_si:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    # kill: def $xmm1 killed $xmm1 def $ymm1
-; CHECK-NEXT:    vmovaps %ymm1, %ymm0
+; CHECK-NEXT:    vmovaps %xmm1, %xmm0
 ; CHECK-NEXT:    retq
 %res = call <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32> undef, <4 x i32> %a1, i8 0)
 ret <8 x i32> %res

Modified: llvm/trunk/test/CodeGen/X86/avx512-arith.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-arith.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-arith.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-arith.ll Wed Sep 19 11:59:08 2018
@@ -904,9 +904,9 @@ define <8 x double> @test_broadcast_vadd
 define <8 x double> @test_mask_broadcast_vaddpd(<8 x double> %dst, <8 x double> %i,
 ; CHECK-LABEL: test_mask_broadcast_vaddpd:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vptestmq %zmm2, %zmm2, %k1
-; CHECK-NEXT:    vaddpd (%rdi){1to8}, %zmm1, %zmm1 {%k1}
 ; CHECK-NEXT:    vmovapd %zmm1, %zmm0
+; CHECK-NEXT:    vptestmq %zmm2, %zmm2, %k1
+; CHECK-NEXT:    vaddpd (%rdi){1to8}, %zmm1, %zmm0 {%k1}
 ; CHECK-NEXT:    retq
                                       double* %j, <8 x i64> %mask1) nounwind {
   %mask = icmp ne <8 x i64> %mask1, zeroinitializer

Modified: llvm/trunk/test/CodeGen/X86/avx512-calling-conv.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-calling-conv.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-calling-conv.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-calling-conv.ll Wed Sep 19 11:59:08 2018
@@ -272,9 +272,9 @@ define i1 @test9(double %a, double %b) {
 define i32 @test10(i32 %a, i32 %b, i1 %cond) {
 ; ALL_X64-LABEL: test10:
 ; ALL_X64:       ## %bb.0:
-; ALL_X64-NEXT:    testb $1, %dl
-; ALL_X64-NEXT:    cmovel %esi, %edi
 ; ALL_X64-NEXT:    movl %edi, %eax
+; ALL_X64-NEXT:    testb $1, %dl
+; ALL_X64-NEXT:    cmovel %esi, %eax
 ; ALL_X64-NEXT:    retq
 ;
 ; KNL_X32-LABEL: test10:

Modified: llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll Wed Sep 19 11:59:08 2018
@@ -195,21 +195,21 @@ define <16 x i32> @test11(<16 x i32>%a,
 define i64 @test12(<16 x i64>%a, <16 x i64>%b, i64 %a1, i64 %b1) {
 ; KNL-LABEL: test12:
 ; KNL:       ## %bb.0:
-; KNL-NEXT:    vpcmpgtq %zmm0, %zmm2, %k0
-; KNL-NEXT:    kmovw %k0, %eax
-; KNL-NEXT:    testb $1, %al
-; KNL-NEXT:    cmoveq %rsi, %rdi
 ; KNL-NEXT:    movq %rdi, %rax
+; KNL-NEXT:    vpcmpgtq %zmm0, %zmm2, %k0
+; KNL-NEXT:    kmovw %k0, %ecx
+; KNL-NEXT:    testb $1, %cl
+; KNL-NEXT:    cmoveq %rsi, %rax
 ; KNL-NEXT:    vzeroupper
 ; KNL-NEXT:    retq
 ;
 ; SKX-LABEL: test12:
 ; SKX:       ## %bb.0:
-; SKX-NEXT:    vpcmpgtq %zmm0, %zmm2, %k0
-; SKX-NEXT:    kmovd %k0, %eax
-; SKX-NEXT:    testb $1, %al
-; SKX-NEXT:    cmoveq %rsi, %rdi
 ; SKX-NEXT:    movq %rdi, %rax
+; SKX-NEXT:    vpcmpgtq %zmm0, %zmm2, %k0
+; SKX-NEXT:    kmovd %k0, %ecx
+; SKX-NEXT:    testb $1, %cl
+; SKX-NEXT:    cmoveq %rsi, %rax
 ; SKX-NEXT:    vzeroupper
 ; SKX-NEXT:    retq
   %cmpvector_func.i = icmp slt <16 x i64> %a, %b
@@ -257,23 +257,23 @@ define i16 @test13(i32 %a, i32 %b) {
 define i64 @test14(<8 x i64>%a, <8 x i64>%b, i64 %a1, i64 %b1) {
 ; KNL-LABEL: test14:
 ; KNL:       ## %bb.0:
+; KNL-NEXT:    movq %rdi, %rax
 ; KNL-NEXT:    vpcmpgtq %zmm0, %zmm1, %k0
 ; KNL-NEXT:    kshiftrw $4, %k0, %k0
-; KNL-NEXT:    kmovw %k0, %eax
-; KNL-NEXT:    testb $1, %al
-; KNL-NEXT:    cmoveq %rsi, %rdi
-; KNL-NEXT:    movq %rdi, %rax
+; KNL-NEXT:    kmovw %k0, %ecx
+; KNL-NEXT:    testb $1, %cl
+; KNL-NEXT:    cmoveq %rsi, %rax
 ; KNL-NEXT:    vzeroupper
 ; KNL-NEXT:    retq
 ;
 ; SKX-LABEL: test14:
 ; SKX:       ## %bb.0:
+; SKX-NEXT:    movq %rdi, %rax
 ; SKX-NEXT:    vpcmpgtq %zmm0, %zmm1, %k0
 ; SKX-NEXT:    kshiftrw $4, %k0, %k0
-; SKX-NEXT:    kmovd %k0, %eax
-; SKX-NEXT:    testb $1, %al
-; SKX-NEXT:    cmoveq %rsi, %rdi
-; SKX-NEXT:    movq %rdi, %rax
+; SKX-NEXT:    kmovd %k0, %ecx
+; SKX-NEXT:    testb $1, %cl
+; SKX-NEXT:    cmoveq %rsi, %rax
 ; SKX-NEXT:    vzeroupper
 ; SKX-NEXT:    retq
   %cmpvector_func.i = icmp slt <8 x i64> %a, %b

Modified: llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll Wed Sep 19 11:59:08 2018
@@ -5853,9 +5853,10 @@ define i16 @test_kand(i16 %a0, i16 %a1)
 ;
 ; X64-LABEL: test_kand:
 ; X64:       ## %bb.0:
-; X64-NEXT:    andl %esi, %edi ## encoding: [0x21,0xf7]
-; X64-NEXT:    andl $8, %edi ## encoding: [0x83,0xe7,0x08]
 ; X64-NEXT:    movl %edi, %eax ## encoding: [0x89,0xf8]
+; X64-NEXT:    andl %esi, %eax ## encoding: [0x21,0xf0]
+; X64-NEXT:    andl $8, %eax ## encoding: [0x83,0xe0,0x08]
+; X64-NEXT:    ## kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    retq ## encoding: [0xc3]
   %t1 = call i16 @llvm.x86.avx512.kand.w(i16 %a0, i16 8)
   %t2 = call i16 @llvm.x86.avx512.kand.w(i16 %t1, i16 %a1)
@@ -5875,9 +5876,10 @@ define i16 @test_kandn(i16 %a0, i16 %a1)
 ;
 ; X64-LABEL: test_kandn:
 ; X64:       ## %bb.0:
-; X64-NEXT:    orl $-9, %edi ## encoding: [0x83,0xcf,0xf7]
-; X64-NEXT:    andl %esi, %edi ## encoding: [0x21,0xf7]
 ; X64-NEXT:    movl %edi, %eax ## encoding: [0x89,0xf8]
+; X64-NEXT:    orl $-9, %eax ## encoding: [0x83,0xc8,0xf7]
+; X64-NEXT:    andl %esi, %eax ## encoding: [0x21,0xf0]
+; X64-NEXT:    ## kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    retq ## encoding: [0xc3]
   %t1 = call i16 @llvm.x86.avx512.kandn.w(i16 %a0, i16 8)
   %t2 = call i16 @llvm.x86.avx512.kandn.w(i16 %t1, i16 %a1)
@@ -5895,8 +5897,9 @@ define i16 @test_knot(i16 %a0) {
 ;
 ; X64-LABEL: test_knot:
 ; X64:       ## %bb.0:
-; X64-NEXT:    notl %edi ## encoding: [0xf7,0xd7]
 ; X64-NEXT:    movl %edi, %eax ## encoding: [0x89,0xf8]
+; X64-NEXT:    notl %eax ## encoding: [0xf7,0xd0]
+; X64-NEXT:    ## kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    retq ## encoding: [0xc3]
   %res = call i16 @llvm.x86.avx512.knot.w(i16 %a0)
   ret i16 %res
@@ -5914,9 +5917,10 @@ define i16 @test_kor(i16 %a0, i16 %a1) {
 ;
 ; X64-LABEL: test_kor:
 ; X64:       ## %bb.0:
-; X64-NEXT:    orl %esi, %edi ## encoding: [0x09,0xf7]
-; X64-NEXT:    orl $8, %edi ## encoding: [0x83,0xcf,0x08]
 ; X64-NEXT:    movl %edi, %eax ## encoding: [0x89,0xf8]
+; X64-NEXT:    orl %esi, %eax ## encoding: [0x09,0xf0]
+; X64-NEXT:    orl $8, %eax ## encoding: [0x83,0xc8,0x08]
+; X64-NEXT:    ## kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    retq ## encoding: [0xc3]
   %t1 = call i16 @llvm.x86.avx512.kor.w(i16 %a0, i16 8)
   %t2 = call i16 @llvm.x86.avx512.kor.w(i16 %t1, i16 %a1)
@@ -5937,9 +5941,10 @@ define i16 @test_kxnor(i16 %a0, i16 %a1)
 ;
 ; X64-LABEL: test_kxnor:
 ; X64:       ## %bb.0:
-; X64-NEXT:    xorl %esi, %edi ## encoding: [0x31,0xf7]
-; X64-NEXT:    xorl $8, %edi ## encoding: [0x83,0xf7,0x08]
 ; X64-NEXT:    movl %edi, %eax ## encoding: [0x89,0xf8]
+; X64-NEXT:    xorl %esi, %eax ## encoding: [0x31,0xf0]
+; X64-NEXT:    xorl $8, %eax ## encoding: [0x83,0xf0,0x08]
+; X64-NEXT:    ## kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    retq ## encoding: [0xc3]
   %t1 = call i16 @llvm.x86.avx512.kxnor.w(i16 %a0, i16 8)
   %t2 = call i16 @llvm.x86.avx512.kxnor.w(i16 %t1, i16 %a1)
@@ -5958,9 +5963,10 @@ define i16 @test_kxor(i16 %a0, i16 %a1)
 ;
 ; X64-LABEL: test_kxor:
 ; X64:       ## %bb.0:
-; X64-NEXT:    xorl %esi, %edi ## encoding: [0x31,0xf7]
-; X64-NEXT:    xorl $8, %edi ## encoding: [0x83,0xf7,0x08]
 ; X64-NEXT:    movl %edi, %eax ## encoding: [0x89,0xf8]
+; X64-NEXT:    xorl %esi, %eax ## encoding: [0x31,0xf0]
+; X64-NEXT:    xorl $8, %eax ## encoding: [0x83,0xf0,0x08]
+; X64-NEXT:    ## kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    retq ## encoding: [0xc3]
   %t1 = call i16 @llvm.x86.avx512.kxor.w(i16 %a0, i16 8)
   %t2 = call i16 @llvm.x86.avx512.kxor.w(i16 %t1, i16 %a1)

Modified: llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll Wed Sep 19 11:59:08 2018
@@ -9,8 +9,9 @@
 define i16 @mask16(i16 %x) {
 ; CHECK-LABEL: mask16:
 ; CHECK:       ## %bb.0:
-; CHECK-NEXT:    notl %edi
 ; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    notl %eax
+; CHECK-NEXT:    ## kill: def $ax killed $ax killed $eax
 ; CHECK-NEXT:    retq
 ;
 ; X86-LABEL: mask16:
@@ -47,8 +48,9 @@ define i32 @mask16_zext(i16 %x) {
 define i8 @mask8(i8 %x) {
 ; CHECK-LABEL: mask8:
 ; CHECK:       ## %bb.0:
-; CHECK-NEXT:    notb %dil
 ; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    notb %al
+; CHECK-NEXT:    ## kill: def $al killed $al killed $eax
 ; CHECK-NEXT:    retq
 ;
 ; X86-LABEL: mask8:
@@ -149,10 +151,11 @@ define i16 @mand16(i16 %x, i16 %y) {
 ; CHECK-LABEL: mand16:
 ; CHECK:       ## %bb.0:
 ; CHECK-NEXT:    movl %edi, %eax
-; CHECK-NEXT:    xorl %esi, %eax
-; CHECK-NEXT:    andl %esi, %edi
-; CHECK-NEXT:    orl %eax, %edi
-; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    movl %edi, %ecx
+; CHECK-NEXT:    xorl %esi, %ecx
+; CHECK-NEXT:    andl %esi, %eax
+; CHECK-NEXT:    orl %ecx, %eax
+; CHECK-NEXT:    ## kill: def $ax killed $ax killed $eax
 ; CHECK-NEXT:    retq
 ;
 ; X86-LABEL: mand16:

Modified: llvm/trunk/test/CodeGen/X86/avx512-regcall-NoMask.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-regcall-NoMask.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-regcall-NoMask.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-regcall-NoMask.ll Wed Sep 19 11:59:08 2018
@@ -924,47 +924,46 @@ define x86_regcallcc i32 @testi32_inp(i3
 ; X32-NEXT:    pushl %ebx
 ; X32-NEXT:    subl $20, %esp
 ; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT:    movl %edi, %esi
 ; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT:    movl %edx, %ebx
 ; X32-NEXT:    movl %edx, (%esp) # 4-byte Spill
 ; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT:    movl %eax, %edx
+; X32-NEXT:    movl %eax, %ebx
 ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT:    subl %ecx, %edx
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %edi
-; X32-NEXT:    movl %edi, %ebp
-; X32-NEXT:    subl {{[0-9]+}}(%esp), %ebp
-; X32-NEXT:    imull %ebp, %edx
-; X32-NEXT:    subl %esi, %ebx
+; X32-NEXT:    subl %ecx, %ebx
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X32-NEXT:    movl %esi, %ecx
+; X32-NEXT:    movl %esi, %ebp
+; X32-NEXT:    subl {{[0-9]+}}(%esp), %ebp
+; X32-NEXT:    imull %ebp, %ebx
+; X32-NEXT:    movl %edx, %ebp
+; X32-NEXT:    subl %edi, %ebp
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X32-NEXT:    movl %edx, %ecx
 ; X32-NEXT:    subl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    imull %ebx, %ecx
-; X32-NEXT:    addl %ecx, %edx
-; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT:    movl %ebx, %ebp
+; X32-NEXT:    imull %ebp, %ecx
+; X32-NEXT:    addl %ecx, %ebx
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT:    movl %edi, %ebp
 ; X32-NEXT:    subl {{[0-9]+}}(%esp), %ebp
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X32-NEXT:    movl %ecx, %eax
 ; X32-NEXT:    subl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    imull %ebp, %eax
-; X32-NEXT:    addl %eax, %edx
+; X32-NEXT:    addl %eax, %ebx
 ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
 ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
 ; X32-NEXT:    movl (%esp), %ebp # 4-byte Reload
 ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
-; X32-NEXT:    addl {{[0-9]+}}(%esp), %ebx
 ; X32-NEXT:    addl {{[0-9]+}}(%esp), %edi
-; X32-NEXT:    imull %eax, %edi
 ; X32-NEXT:    addl {{[0-9]+}}(%esp), %esi
-; X32-NEXT:    imull %ebp, %esi
-; X32-NEXT:    addl %edi, %esi
+; X32-NEXT:    imull %eax, %esi
+; X32-NEXT:    addl {{[0-9]+}}(%esp), %edx
+; X32-NEXT:    imull %ebp, %edx
+; X32-NEXT:    addl %esi, %edx
 ; X32-NEXT:    addl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    imull %ebx, %ecx
-; X32-NEXT:    addl %esi, %ecx
-; X32-NEXT:    addl %ecx, %edx
-; X32-NEXT:    movl %edx, %eax
+; X32-NEXT:    imull %edi, %ecx
+; X32-NEXT:    addl %edx, %ecx
+; X32-NEXT:    addl %ecx, %ebx
+; X32-NEXT:    movl %ebx, %eax
 ; X32-NEXT:    addl $20, %esp
 ; X32-NEXT:    popl %ebx
 ; X32-NEXT:    popl %ebp

Modified: llvm/trunk/test/CodeGen/X86/avx512-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-schedule.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-schedule.ll Wed Sep 19 11:59:08 2018
@@ -947,16 +947,16 @@ define <8 x double> @test_broadcast_vadd
 define <8 x double> @test_mask_broadcast_vaddpd(<8 x double> %dst, <8 x double> %i, double* %j, <8 x i64> %mask1) nounwind {
 ; GENERIC-LABEL: test_mask_broadcast_vaddpd:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vaddpd (%rdi){1to8}, %zmm1, %zmm1 {%k1} # sched: [10:1.00]
 ; GENERIC-NEXT:    vmovapd %zmm1, %zmm0 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
+; GENERIC-NEXT:    vaddpd (%rdi){1to8}, %zmm1, %zmm0 {%k1} # sched: [10:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_mask_broadcast_vaddpd:
 ; SKX:       # %bb.0:
-; SKX-NEXT:    vptestmq %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT:    vaddpd (%rdi){1to8}, %zmm1, %zmm1 {%k1} # sched: [11:0.50]
 ; SKX-NEXT:    vmovapd %zmm1, %zmm0 # sched: [1:0.33]
+; SKX-NEXT:    vptestmq %zmm2, %zmm2, %k1 # sched: [3:1.00]
+; SKX-NEXT:    vaddpd (%rdi){1to8}, %zmm1, %zmm0 {%k1} # sched: [11:0.50]
 ; SKX-NEXT:    retq # sched: [7:1.00]
   %mask = icmp ne <8 x i64> %mask1, zeroinitializer
   %tmp = load double, double* %j
@@ -6669,14 +6669,16 @@ define <8 x double> @mov_test47(i8 * %ad
 define i16 @mask16(i16 %x) {
 ; GENERIC-LABEL: mask16:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    notl %edi # sched: [1:0.33]
 ; GENERIC-NEXT:    movl %edi, %eax # sched: [1:0.33]
+; GENERIC-NEXT:    notl %eax # sched: [1:0.33]
+; GENERIC-NEXT:    # kill: def $ax killed $ax killed $eax
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: mask16:
 ; SKX:       # %bb.0:
-; SKX-NEXT:    notl %edi # sched: [1:0.25]
 ; SKX-NEXT:    movl %edi, %eax # sched: [1:0.25]
+; SKX-NEXT:    notl %eax # sched: [1:0.25]
+; SKX-NEXT:    # kill: def $ax killed $ax killed $eax
 ; SKX-NEXT:    retq # sched: [7:1.00]
   %m0 = bitcast i16 %x to <16 x i1>
   %m1 = xor <16 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
@@ -6706,14 +6708,16 @@ define i32 @mask16_zext(i16 %x) {
 define i8 @mask8(i8 %x) {
 ; GENERIC-LABEL: mask8:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    notb %dil # sched: [1:0.33]
 ; GENERIC-NEXT:    movl %edi, %eax # sched: [1:0.33]
+; GENERIC-NEXT:    notb %al # sched: [1:0.33]
+; GENERIC-NEXT:    # kill: def $al killed $al killed $eax
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: mask8:
 ; SKX:       # %bb.0:
-; SKX-NEXT:    notb %dil # sched: [1:0.25]
 ; SKX-NEXT:    movl %edi, %eax # sched: [1:0.25]
+; SKX-NEXT:    notb %al # sched: [1:0.25]
+; SKX-NEXT:    # kill: def $al killed $al killed $eax
 ; SKX-NEXT:    retq # sched: [7:1.00]
   %m0 = bitcast i8 %x to <8 x i1>
   %m1 = xor <8 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
@@ -6788,19 +6792,21 @@ define i16 @mand16(i16 %x, i16 %y) {
 ; GENERIC-LABEL: mand16:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    movl %edi, %eax # sched: [1:0.33]
-; GENERIC-NEXT:    xorl %esi, %eax # sched: [1:0.33]
-; GENERIC-NEXT:    andl %esi, %edi # sched: [1:0.33]
-; GENERIC-NEXT:    orl %eax, %edi # sched: [1:0.33]
-; GENERIC-NEXT:    movl %edi, %eax # sched: [1:0.33]
+; GENERIC-NEXT:    movl %edi, %ecx # sched: [1:0.33]
+; GENERIC-NEXT:    xorl %esi, %ecx # sched: [1:0.33]
+; GENERIC-NEXT:    andl %esi, %eax # sched: [1:0.33]
+; GENERIC-NEXT:    orl %ecx, %eax # sched: [1:0.33]
+; GENERIC-NEXT:    # kill: def $ax killed $ax killed $eax
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: mand16:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    movl %edi, %eax # sched: [1:0.25]
-; SKX-NEXT:    xorl %esi, %eax # sched: [1:0.25]
-; SKX-NEXT:    andl %esi, %edi # sched: [1:0.25]
-; SKX-NEXT:    orl %eax, %edi # sched: [1:0.25]
-; SKX-NEXT:    movl %edi, %eax # sched: [1:0.25]
+; SKX-NEXT:    movl %edi, %ecx # sched: [1:0.25]
+; SKX-NEXT:    xorl %esi, %ecx # sched: [1:0.25]
+; SKX-NEXT:    andl %esi, %eax # sched: [1:0.25]
+; SKX-NEXT:    orl %ecx, %eax # sched: [1:0.25]
+; SKX-NEXT:    # kill: def $ax killed $ax killed $eax
 ; SKX-NEXT:    retq # sched: [7:1.00]
   %ma = bitcast i16 %x to <16 x i1>
   %mb = bitcast i16 %y to <16 x i1>

Modified: llvm/trunk/test/CodeGen/X86/avx512-select.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-select.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-select.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-select.ll Wed Sep 19 11:59:08 2018
@@ -135,8 +135,9 @@ define i8 @select05(i8 %a.0, i8 %m) {
 ;
 ; X64-LABEL: select05:
 ; X64:       # %bb.0:
-; X64-NEXT:    orl %esi, %edi
 ; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    orl %esi, %eax
+; X64-NEXT:    # kill: def $al killed $al killed $eax
 ; X64-NEXT:    retq
   %mask = bitcast i8 %m to <8 x i1>
   %a = bitcast i8 %a.0 to <8 x i1>
@@ -185,8 +186,9 @@ define i8 @select06(i8 %a.0, i8 %m) {
 ;
 ; X64-LABEL: select06:
 ; X64:       # %bb.0:
-; X64-NEXT:    andl %esi, %edi
 ; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    andl %esi, %eax
+; X64-NEXT:    # kill: def $al killed $al killed $eax
 ; X64-NEXT:    retq
   %mask = bitcast i8 %m to <8 x i1>
   %a = bitcast i8 %a.0 to <8 x i1>

Modified: llvm/trunk/test/CodeGen/X86/avx512bw-mask-op.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512bw-mask-op.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512bw-mask-op.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512bw-mask-op.ll Wed Sep 19 11:59:08 2018
@@ -4,8 +4,8 @@
 define i32 @mask32(i32 %x) {
 ; CHECK-LABEL: mask32:
 ; CHECK:       ## %bb.0:
-; CHECK-NEXT:    notl %edi
 ; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    notl %eax
 ; CHECK-NEXT:    retq
   %m0 = bitcast i32 %x to <32 x i1>
   %m1 = xor <32 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1,
@@ -19,8 +19,8 @@ define i32 @mask32(i32 %x) {
 define i64 @mask64(i64 %x) {
 ; CHECK-LABEL: mask64:
 ; CHECK:       ## %bb.0:
-; CHECK-NEXT:    notq %rdi
 ; CHECK-NEXT:    movq %rdi, %rax
+; CHECK-NEXT:    notq %rax
 ; CHECK-NEXT:    retq
   %m0 = bitcast i64 %x to <64 x i1>
   %m1 = xor <64 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1,
@@ -79,10 +79,10 @@ define i32 @mand32(i32 %x, i32 %y) {
 ; CHECK-LABEL: mand32:
 ; CHECK:       ## %bb.0:
 ; CHECK-NEXT:    movl %edi, %eax
-; CHECK-NEXT:    andl %esi, %eax
-; CHECK-NEXT:    xorl %esi, %edi
-; CHECK-NEXT:    orl %eax, %edi
-; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    movl %edi, %ecx
+; CHECK-NEXT:    andl %esi, %ecx
+; CHECK-NEXT:    xorl %esi, %eax
+; CHECK-NEXT:    orl %ecx, %eax
 ; CHECK-NEXT:    retq
   %ma = bitcast i32 %x to <32 x i1>
   %mb = bitcast i32 %y to <32 x i1>
@@ -116,10 +116,10 @@ define i64 @mand64(i64 %x, i64 %y) {
 ; CHECK-LABEL: mand64:
 ; CHECK:       ## %bb.0:
 ; CHECK-NEXT:    movq %rdi, %rax
-; CHECK-NEXT:    andq %rsi, %rax
-; CHECK-NEXT:    xorq %rsi, %rdi
-; CHECK-NEXT:    orq %rax, %rdi
-; CHECK-NEXT:    movq %rdi, %rax
+; CHECK-NEXT:    movq %rdi, %rcx
+; CHECK-NEXT:    andq %rsi, %rcx
+; CHECK-NEXT:    xorq %rsi, %rax
+; CHECK-NEXT:    orq %rcx, %rax
 ; CHECK-NEXT:    retq
   %ma = bitcast i64 %x to <64 x i1>
   %mb = bitcast i64 %y to <64 x i1>

Modified: llvm/trunk/test/CodeGen/X86/avx512dq-mask-op.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512dq-mask-op.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512dq-mask-op.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512dq-mask-op.ll Wed Sep 19 11:59:08 2018
@@ -4,8 +4,9 @@
 define i8 @mask8(i8 %x) {
 ; CHECK-LABEL: mask8:
 ; CHECK:       ## %bb.0:
-; CHECK-NEXT:    notb %dil
 ; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    notb %al
+; CHECK-NEXT:    ## kill: def $al killed $al killed $eax
 ; CHECK-NEXT:    retq
   %m0 = bitcast i8 %x to <8 x i1>
   %m1 = xor <8 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
@@ -32,10 +33,11 @@ define i8 @mand8(i8 %x, i8 %y) {
 ; CHECK-LABEL: mand8:
 ; CHECK:       ## %bb.0:
 ; CHECK-NEXT:    movl %edi, %eax
-; CHECK-NEXT:    xorl %esi, %eax
-; CHECK-NEXT:    andl %esi, %edi
-; CHECK-NEXT:    orl %eax, %edi
-; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    movl %edi, %ecx
+; CHECK-NEXT:    xorl %esi, %ecx
+; CHECK-NEXT:    andl %esi, %eax
+; CHECK-NEXT:    orl %ecx, %eax
+; CHECK-NEXT:    ## kill: def $al killed $al killed $eax
 ; CHECK-NEXT:    retq
   %ma = bitcast i8 %x to <8 x i1>
   %mb = bitcast i8 %y to <8 x i1>

Modified: llvm/trunk/test/CodeGen/X86/avx512vl-arith.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512vl-arith.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512vl-arith.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512vl-arith.ll Wed Sep 19 11:59:08 2018
@@ -408,9 +408,9 @@ define <4 x double> @test_broadcast2_vad
 define <4 x double> @test_mask_broadcast_vaddpd_256(<4 x double> %dst, <4 x double> %i, double* %j, <4 x i64> %mask1) nounwind {
 ; CHECK-LABEL: test_mask_broadcast_vaddpd_256:
 ; CHECK:       ## %bb.0:
-; CHECK-NEXT:    vptestmq %ymm2, %ymm2, %k1 ## encoding: [0x62,0xf2,0xed,0x28,0x27,0xca]
-; CHECK-NEXT:    vaddpd (%rdi){1to4}, %ymm1, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x39,0x58,0x0f]
 ; CHECK-NEXT:    vmovapd %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc1]
+; CHECK-NEXT:    vptestmq %ymm2, %ymm2, %k1 ## encoding: [0x62,0xf2,0xed,0x28,0x27,0xca]
+; CHECK-NEXT:    vaddpd (%rdi){1to4}, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf1,0xf5,0x39,0x58,0x07]
 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   %mask = icmp ne <4 x i64> %mask1, zeroinitializer
   %tmp = load double, double* %j
@@ -835,9 +835,9 @@ define <2 x double> @test_broadcast2_vad
 define <2 x double> @test_mask_broadcast_vaddpd_128(<2 x double> %dst, <2 x double> %i, double* %j, <2 x i64> %mask1) nounwind {
 ; CHECK-LABEL: test_mask_broadcast_vaddpd_128:
 ; CHECK:       ## %bb.0:
-; CHECK-NEXT:    vptestmq %xmm2, %xmm2, %k1 ## encoding: [0x62,0xf2,0xed,0x08,0x27,0xca]
-; CHECK-NEXT:    vaddpd (%rdi){1to2}, %xmm1, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x19,0x58,0x0f]
 ; CHECK-NEXT:    vmovapd %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc1]
+; CHECK-NEXT:    vptestmq %xmm2, %xmm2, %k1 ## encoding: [0x62,0xf2,0xed,0x08,0x27,0xca]
+; CHECK-NEXT:    vaddpd (%rdi){1to2}, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0xf5,0x19,0x58,0x07]
 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   %mask = icmp ne <2 x i64> %mask1, zeroinitializer
   %tmp = load double, double* %j

Modified: llvm/trunk/test/CodeGen/X86/bigstructret.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/bigstructret.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/bigstructret.ll (original)
+++ llvm/trunk/test/CodeGen/X86/bigstructret.ll Wed Sep 19 11:59:08 2018
@@ -8,20 +8,20 @@
 define fastcc %0 @ReturnBigStruct() nounwind readnone {
 ; X86-LABEL: ReturnBigStruct:
 ; X86:       # %bb.0: # %entry
+; X86-NEXT:    movl %ecx, %eax
 ; X86-NEXT:    movl $24601, 12(%ecx) # imm = 0x6019
 ; X86-NEXT:    movl $48, 8(%ecx)
 ; X86-NEXT:    movl $24, 4(%ecx)
 ; X86-NEXT:    movl $12, (%ecx)
-; X86-NEXT:    movl %ecx, %eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: ReturnBigStruct:
 ; X64:       # %bb.0: # %entry
-; X64-NEXT:    movabsq $105660490448944, %rax # imm = 0x601900000030
-; X64-NEXT:    movq %rax, 8(%rdi)
-; X64-NEXT:    movabsq $103079215116, %rax # imm = 0x180000000C
-; X64-NEXT:    movq %rax, (%rdi)
 ; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    movabsq $105660490448944, %rcx # imm = 0x601900000030
+; X64-NEXT:    movq %rcx, 8(%rdi)
+; X64-NEXT:    movabsq $103079215116, %rcx # imm = 0x180000000C
+; X64-NEXT:    movq %rcx, (%rdi)
 ; X64-NEXT:    retq
 entry:
   %0 = insertvalue %0 zeroinitializer, i32 12, 0
@@ -35,18 +35,18 @@ entry:
 define fastcc %1 @ReturnBigStruct2() nounwind readnone {
 ; X86-LABEL: ReturnBigStruct2:
 ; X86:       # %bb.0: # %entry
+; X86-NEXT:    movl %ecx, %eax
 ; X86-NEXT:    movl $48, 4(%ecx)
 ; X86-NEXT:    movb $1, 2(%ecx)
 ; X86-NEXT:    movw $256, (%ecx) # imm = 0x100
-; X86-NEXT:    movl %ecx, %eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: ReturnBigStruct2:
 ; X64:       # %bb.0: # %entry
+; X64-NEXT:    movq %rdi, %rax
 ; X64-NEXT:    movl $48, 4(%rdi)
 ; X64-NEXT:    movb $1, 2(%rdi)
 ; X64-NEXT:    movw $256, (%rdi) # imm = 0x100
-; X64-NEXT:    movq %rdi, %rax
 ; X64-NEXT:    retq
 entry:
   %0 = insertvalue %1 zeroinitializer, i1 false, 0

Modified: llvm/trunk/test/CodeGen/X86/bitcast-i256.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/bitcast-i256.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/bitcast-i256.ll (original)
+++ llvm/trunk/test/CodeGen/X86/bitcast-i256.ll Wed Sep 19 11:59:08 2018
@@ -5,16 +5,16 @@
 define i256 @foo(<8 x i32> %a) {
 ; FAST-LABEL: foo:
 ; FAST:       # %bb.0:
-; FAST-NEXT:    vmovups %ymm0, (%rdi)
 ; FAST-NEXT:    movq %rdi, %rax
+; FAST-NEXT:    vmovups %ymm0, (%rdi)
 ; FAST-NEXT:    vzeroupper
 ; FAST-NEXT:    retq
 ;
 ; SLOW-LABEL: foo:
 ; SLOW:       # %bb.0:
+; SLOW-NEXT:    movq %rdi, %rax
 ; SLOW-NEXT:    vextractf128 $1, %ymm0, 16(%rdi)
 ; SLOW-NEXT:    vmovups %xmm0, (%rdi)
-; SLOW-NEXT:    movq %rdi, %rax
 ; SLOW-NEXT:    vzeroupper
 ; SLOW-NEXT:    retq
   %r = bitcast <8 x i32> %a to i256

Modified: llvm/trunk/test/CodeGen/X86/bitcast-int-to-vector-bool.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/bitcast-int-to-vector-bool.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/bitcast-int-to-vector-bool.ll (original)
+++ llvm/trunk/test/CodeGen/X86/bitcast-int-to-vector-bool.ll Wed Sep 19 11:59:08 2018
@@ -193,8 +193,8 @@ define <16 x i1> @bitcast_i16_16i1(i16 z
 define <32 x i1> @bitcast_i32_32i1(i32 %a0) {
 ; SSE2-SSSE3-LABEL: bitcast_i32_32i1:
 ; SSE2-SSSE3:       # %bb.0:
-; SSE2-SSSE3-NEXT:    movl %esi, (%rdi)
 ; SSE2-SSSE3-NEXT:    movq %rdi, %rax
+; SSE2-SSSE3-NEXT:    movl %esi, (%rdi)
 ; SSE2-SSSE3-NEXT:    retq
 ;
 ; AVX1-LABEL: bitcast_i32_32i1:
@@ -250,14 +250,14 @@ define <32 x i1> @bitcast_i32_32i1(i32 %
 define <64 x i1> @bitcast_i64_64i1(i64 %a0) {
 ; SSE2-SSSE3-LABEL: bitcast_i64_64i1:
 ; SSE2-SSSE3:       # %bb.0:
-; SSE2-SSSE3-NEXT:    movq %rsi, (%rdi)
 ; SSE2-SSSE3-NEXT:    movq %rdi, %rax
+; SSE2-SSSE3-NEXT:    movq %rsi, (%rdi)
 ; SSE2-SSSE3-NEXT:    retq
 ;
 ; AVX12-LABEL: bitcast_i64_64i1:
 ; AVX12:       # %bb.0:
-; AVX12-NEXT:    movq %rsi, (%rdi)
 ; AVX12-NEXT:    movq %rdi, %rax
+; AVX12-NEXT:    movq %rsi, (%rdi)
 ; AVX12-NEXT:    retq
 ;
 ; AVX512-LABEL: bitcast_i64_64i1:

Modified: llvm/trunk/test/CodeGen/X86/bitreverse.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/bitreverse.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/bitreverse.ll (original)
+++ llvm/trunk/test/CodeGen/X86/bitreverse.ll Wed Sep 19 11:59:08 2018
@@ -341,20 +341,21 @@ define i8 @test_bitreverse_i8(i8 %a) {
 ;
 ; X64-LABEL: test_bitreverse_i8:
 ; X64:       # %bb.0:
-; X64-NEXT:    rolb $4, %dil
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    andb $51, %al
-; X64-NEXT:    shlb $2, %al
-; X64-NEXT:    andb $-52, %dil
-; X64-NEXT:    shrb $2, %dil
-; X64-NEXT:    orb %al, %dil
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    andb $85, %al
-; X64-NEXT:    addb %al, %al
-; X64-NEXT:    andb $-86, %dil
-; X64-NEXT:    shrb %dil
-; X64-NEXT:    orb %al, %dil
 ; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    rolb $4, %al
+; X64-NEXT:    movl %eax, %ecx
+; X64-NEXT:    andb $51, %cl
+; X64-NEXT:    shlb $2, %cl
+; X64-NEXT:    andb $-52, %al
+; X64-NEXT:    shrb $2, %al
+; X64-NEXT:    orb %cl, %al
+; X64-NEXT:    movl %eax, %ecx
+; X64-NEXT:    andb $85, %cl
+; X64-NEXT:    addb %cl, %cl
+; X64-NEXT:    andb $-86, %al
+; X64-NEXT:    shrb %al
+; X64-NEXT:    orb %cl, %al
+; X64-NEXT:    # kill: def $al killed $al killed $eax
 ; X64-NEXT:    retq
   %b = call i8 @llvm.bitreverse.i8(i8 %a)
   ret i8 %b
@@ -384,21 +385,22 @@ define i4 @test_bitreverse_i4(i4 %a) {
 ;
 ; X64-LABEL: test_bitreverse_i4:
 ; X64:       # %bb.0:
-; X64-NEXT:    rolb $4, %dil
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    andb $51, %al
-; X64-NEXT:    shlb $2, %al
-; X64-NEXT:    andb $-52, %dil
-; X64-NEXT:    shrb $2, %dil
-; X64-NEXT:    orb %al, %dil
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    andb $80, %al
-; X64-NEXT:    addb %al, %al
-; X64-NEXT:    andb $-96, %dil
-; X64-NEXT:    shrb %dil
-; X64-NEXT:    orb %al, %dil
-; X64-NEXT:    shrb $4, %dil
 ; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    rolb $4, %al
+; X64-NEXT:    movl %eax, %ecx
+; X64-NEXT:    andb $51, %cl
+; X64-NEXT:    shlb $2, %cl
+; X64-NEXT:    andb $-52, %al
+; X64-NEXT:    shrb $2, %al
+; X64-NEXT:    orb %cl, %al
+; X64-NEXT:    movl %eax, %ecx
+; X64-NEXT:    andb $80, %cl
+; X64-NEXT:    addb %cl, %cl
+; X64-NEXT:    andb $-96, %al
+; X64-NEXT:    shrb %al
+; X64-NEXT:    orb %cl, %al
+; X64-NEXT:    shrb $4, %al
+; X64-NEXT:    # kill: def $al killed $al killed $eax
 ; X64-NEXT:    retq
   %b = call i4 @llvm.bitreverse.i4(i4 %a)
   ret i4 %b
@@ -474,6 +476,7 @@ define i8 @identity_i8(i8 %a) {
 ; X64-LABEL: identity_i8:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    # kill: def $al killed $al killed $eax
 ; X64-NEXT:    retq
   %b = call i8 @llvm.bitreverse.i8(i8 %a)
   %c = call i8 @llvm.bitreverse.i8(i8 %b)

Modified: llvm/trunk/test/CodeGen/X86/bmi-intrinsics-fast-isel-x86_64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/bmi-intrinsics-fast-isel-x86_64.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/bmi-intrinsics-fast-isel-x86_64.ll (original)
+++ llvm/trunk/test/CodeGen/X86/bmi-intrinsics-fast-isel-x86_64.ll Wed Sep 19 11:59:08 2018
@@ -10,9 +10,9 @@
 define i64 @test__andn_u64(i64 %a0, i64 %a1) {
 ; X64-LABEL: test__andn_u64:
 ; X64:       # %bb.0:
-; X64-NEXT:    xorq $-1, %rdi
-; X64-NEXT:    andq %rsi, %rdi
 ; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    xorq $-1, %rax
+; X64-NEXT:    andq %rsi, %rax
 ; X64-NEXT:    retq
   %xor = xor i64 %a0, -1
   %res = and i64 %xor, %a1
@@ -84,9 +84,9 @@ define i64 @test__tzcnt_u64(i64 %a0) {
 define i64 @test_andn_u64(i64 %a0, i64 %a1) {
 ; X64-LABEL: test_andn_u64:
 ; X64:       # %bb.0:
-; X64-NEXT:    xorq $-1, %rdi
-; X64-NEXT:    andq %rsi, %rdi
 ; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    xorq $-1, %rax
+; X64-NEXT:    andq %rsi, %rax
 ; X64-NEXT:    retq
   %xor = xor i64 %a0, -1
   %res = and i64 %xor, %a1

Modified: llvm/trunk/test/CodeGen/X86/bmi-intrinsics-fast-isel.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/bmi-intrinsics-fast-isel.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/bmi-intrinsics-fast-isel.ll (original)
+++ llvm/trunk/test/CodeGen/X86/bmi-intrinsics-fast-isel.ll Wed Sep 19 11:59:08 2018
@@ -47,9 +47,9 @@ define i32 @test__andn_u32(i32 %a0, i32
 ;
 ; X64-LABEL: test__andn_u32:
 ; X64:       # %bb.0:
-; X64-NEXT:    xorl $-1, %edi
-; X64-NEXT:    andl %esi, %edi
 ; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    xorl $-1, %eax
+; X64-NEXT:    andl %esi, %eax
 ; X64-NEXT:    retq
   %xor = xor i32 %a0, -1
   %res = and i32 %xor, %a1
@@ -199,9 +199,9 @@ define i32 @test_andn_u32(i32 %a0, i32 %
 ;
 ; X64-LABEL: test_andn_u32:
 ; X64:       # %bb.0:
-; X64-NEXT:    xorl $-1, %edi
-; X64-NEXT:    andl %esi, %edi
 ; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    xorl $-1, %eax
+; X64-NEXT:    andl %esi, %eax
 ; X64-NEXT:    retq
   %xor = xor i32 %a0, -1
   %res = and i32 %xor, %a1

Modified: llvm/trunk/test/CodeGen/X86/bmi.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/bmi.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/bmi.ll (original)
+++ llvm/trunk/test/CodeGen/X86/bmi.ll Wed Sep 19 11:59:08 2018
@@ -421,9 +421,9 @@ define i32 @non_bextr32(i32 %x) {
 ;
 ; X64-LABEL: non_bextr32:
 ; X64:       # %bb.0: # %entry
-; X64-NEXT:    shrl $2, %edi
-; X64-NEXT:    andl $111, %edi
 ; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    shrl $2, %eax
+; X64-NEXT:    andl $111, %eax
 ; X64-NEXT:    retq
 entry:
   %shr = lshr i32 %x, 2

Modified: llvm/trunk/test/CodeGen/X86/bmi2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/bmi2.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/bmi2.ll (original)
+++ llvm/trunk/test/CodeGen/X86/bmi2.ll Wed Sep 19 11:59:08 2018
@@ -130,15 +130,15 @@ define i32 @mulx32(i32 %x, i32 %y, i32*
 ;
 ; X64-LABEL: mulx32:
 ; X64:       # %bb.0:
-; X64-NEXT:    # kill: def $esi killed $esi def $rsi
+; X64-NEXT:    movl %esi, %eax
 ; X64-NEXT:    # kill: def $edi killed $edi def $rdi
 ; X64-NEXT:    addl %edi, %edi
-; X64-NEXT:    addl %esi, %esi
-; X64-NEXT:    imulq %rdi, %rsi
-; X64-NEXT:    movq %rsi, %rax
-; X64-NEXT:    shrq $32, %rax
-; X64-NEXT:    movl %eax, (%rdx)
-; X64-NEXT:    movl %esi, %eax
+; X64-NEXT:    addl %eax, %eax
+; X64-NEXT:    imulq %rdi, %rax
+; X64-NEXT:    movq %rax, %rcx
+; X64-NEXT:    shrq $32, %rcx
+; X64-NEXT:    movl %ecx, (%rdx)
+; X64-NEXT:    # kill: def $eax killed $eax killed $rax
 ; X64-NEXT:    retq
   %x1 = add i32 %x, %x
   %y1 = add i32 %y, %y
@@ -165,14 +165,14 @@ define i32 @mulx32_load(i32 %x, i32* %y,
 ;
 ; X64-LABEL: mulx32_load:
 ; X64:       # %bb.0:
-; X64-NEXT:    # kill: def $edi killed $edi def $rdi
-; X64-NEXT:    addl %edi, %edi
-; X64-NEXT:    movl (%rsi), %eax
-; X64-NEXT:    imulq %rax, %rdi
-; X64-NEXT:    movq %rdi, %rax
-; X64-NEXT:    shrq $32, %rax
-; X64-NEXT:    movl %eax, (%rdx)
 ; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    addl %eax, %eax
+; X64-NEXT:    movl (%rsi), %ecx
+; X64-NEXT:    imulq %rcx, %rax
+; X64-NEXT:    movq %rax, %rcx
+; X64-NEXT:    shrq $32, %rcx
+; X64-NEXT:    movl %ecx, (%rdx)
+; X64-NEXT:    # kill: def $eax killed $eax killed $rax
 ; X64-NEXT:    retq
   %x1 = add i32 %x, %x
   %y1 = load i32, i32* %y

Modified: llvm/trunk/test/CodeGen/X86/bool-math.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/bool-math.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/bool-math.ll (original)
+++ llvm/trunk/test/CodeGen/X86/bool-math.ll Wed Sep 19 11:59:08 2018
@@ -32,9 +32,10 @@ define i32 @sub_zext_cmp_mask_wider_resu
 define i8 @sub_zext_cmp_mask_narrower_result(i32 %x) {
 ; CHECK-LABEL: sub_zext_cmp_mask_narrower_result:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    andl $1, %edi
-; CHECK-NEXT:    orb $46, %dil
 ; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    andl $1, %eax
+; CHECK-NEXT:    orb $46, %al
+; CHECK-NEXT:    # kill: def $al killed $al killed $eax
 ; CHECK-NEXT:    retq
   %a = and i32 %x, 1
   %c = icmp eq i32 %a, 0
@@ -46,9 +47,10 @@ define i8 @sub_zext_cmp_mask_narrower_re
 define i8 @add_zext_cmp_mask_same_size_result(i8 %x) {
 ; CHECK-LABEL: add_zext_cmp_mask_same_size_result:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    andb $1, %dil
-; CHECK-NEXT:    xorb $27, %dil
 ; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    andb $1, %al
+; CHECK-NEXT:    xorb $27, %al
+; CHECK-NEXT:    # kill: def $al killed $al killed $eax
 ; CHECK-NEXT:    retq
   %a = and i8 %x, 1
   %c = icmp eq i8 %a, 0
@@ -60,9 +62,9 @@ define i8 @add_zext_cmp_mask_same_size_r
 define i32 @add_zext_cmp_mask_wider_result(i8 %x) {
 ; CHECK-LABEL: add_zext_cmp_mask_wider_result:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    andl $1, %edi
-; CHECK-NEXT:    xorl $27, %edi
 ; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    andl $1, %eax
+; CHECK-NEXT:    xorl $27, %eax
 ; CHECK-NEXT:    retq
   %a = and i8 %x, 1
   %c = icmp eq i8 %a, 0
@@ -74,9 +76,10 @@ define i32 @add_zext_cmp_mask_wider_resu
 define i8 @add_zext_cmp_mask_narrower_result(i32 %x) {
 ; CHECK-LABEL: add_zext_cmp_mask_narrower_result:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    andl $1, %edi
-; CHECK-NEXT:    xorb $43, %dil
 ; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    andl $1, %eax
+; CHECK-NEXT:    xorb $43, %al
+; CHECK-NEXT:    # kill: def $al killed $al killed $eax
 ; CHECK-NEXT:    retq
   %a = and i32 %x, 1
   %c = icmp eq i32 %a, 0
@@ -128,9 +131,10 @@ define i16 @low_bit_select_constants_big
 define i8 @low_bit_select_constants_bigger_true_same_size_result(i8 %x) {
 ; CHECK-LABEL: low_bit_select_constants_bigger_true_same_size_result:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    andb $1, %dil
-; CHECK-NEXT:    xorb $-29, %dil
 ; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    andb $1, %al
+; CHECK-NEXT:    xorb $-29, %al
+; CHECK-NEXT:    # kill: def $al killed $al killed $eax
 ; CHECK-NEXT:    retq
   %a = and i8 %x, 1
   %c = icmp eq i8 %a, 0
@@ -141,9 +145,9 @@ define i8 @low_bit_select_constants_bigg
 define i32 @low_bit_select_constants_bigger_true_wider_result(i8 %x) {
 ; CHECK-LABEL: low_bit_select_constants_bigger_true_wider_result:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    andl $1, %edi
-; CHECK-NEXT:    xorl $227, %edi
 ; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    andl $1, %eax
+; CHECK-NEXT:    xorl $227, %eax
 ; CHECK-NEXT:    retq
   %a = and i8 %x, 1
   %c = icmp eq i8 %a, 0
@@ -154,9 +158,10 @@ define i32 @low_bit_select_constants_big
 define i8 @low_bit_select_constants_bigger_true_narrower_result(i16 %x) {
 ; CHECK-LABEL: low_bit_select_constants_bigger_true_narrower_result:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    andl $1, %edi
-; CHECK-NEXT:    xorb $41, %dil
 ; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    andl $1, %eax
+; CHECK-NEXT:    xorb $41, %al
+; CHECK-NEXT:    # kill: def $al killed $al killed $eax
 ; CHECK-NEXT:    retq
   %a = and i16 %x, 1
   %c = icmp eq i16 %a, 0

Modified: llvm/trunk/test/CodeGen/X86/bool-simplify.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/bool-simplify.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/bool-simplify.ll (original)
+++ llvm/trunk/test/CodeGen/X86/bool-simplify.ll Wed Sep 19 11:59:08 2018
@@ -4,9 +4,9 @@
 define i32 @foo(<2 x i64> %c, i32 %a, i32 %b) {
 ; CHECK-LABEL: foo:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    ptest %xmm0, %xmm0
-; CHECK-NEXT:    cmovnel %esi, %edi
 ; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    ptest %xmm0, %xmm0
+; CHECK-NEXT:    cmovnel %esi, %eax
 ; CHECK-NEXT:    retq
   %t1 = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %c, <2 x i64> %c)
   %t2 = icmp ne i32 %t1, 0

Modified: llvm/trunk/test/CodeGen/X86/bswap-rotate.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/bswap-rotate.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/bswap-rotate.ll (original)
+++ llvm/trunk/test/CodeGen/X86/bswap-rotate.ll Wed Sep 19 11:59:08 2018
@@ -14,8 +14,9 @@ define i16 @combine_bswap_rotate(i16 %a0
 ;
 ; X64-LABEL: combine_bswap_rotate:
 ; X64:       # %bb.0:
-; X64-NEXT:    rolw $9, %di
 ; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    rolw $9, %ax
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    retq
   %1 = call i16 @llvm.bswap.i16(i16 %a0)
   %2 = shl i16 %1, 1

Modified: llvm/trunk/test/CodeGen/X86/bswap-wide-int.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/bswap-wide-int.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/bswap-wide-int.ll (original)
+++ llvm/trunk/test/CodeGen/X86/bswap-wide-int.ll Wed Sep 19 11:59:08 2018
@@ -25,14 +25,14 @@ define i64 @bswap_i64(i64 %a0) nounwind
 ;
 ; X64-LABEL: bswap_i64:
 ; X64:       # %bb.0:
-; X64-NEXT:    bswapq %rdi
 ; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    bswapq %rax
 ; X64-NEXT:    retq
 ;
 ; X64-MOVBE-LABEL: bswap_i64:
 ; X64-MOVBE:       # %bb.0:
-; X64-MOVBE-NEXT:    bswapq %rdi
 ; X64-MOVBE-NEXT:    movq %rdi, %rax
+; X64-MOVBE-NEXT:    bswapq %rax
 ; X64-MOVBE-NEXT:    retq
   %1 = call i64 @llvm.bswap.i64(i64 %a0)
   ret i64 %1
@@ -79,17 +79,17 @@ define i128 @bswap_i128(i128 %a0) nounwi
 ;
 ; X64-LABEL: bswap_i128:
 ; X64:       # %bb.0:
-; X64-NEXT:    bswapq %rsi
-; X64-NEXT:    bswapq %rdi
 ; X64-NEXT:    movq %rsi, %rax
+; X64-NEXT:    bswapq %rax
+; X64-NEXT:    bswapq %rdi
 ; X64-NEXT:    movq %rdi, %rdx
 ; X64-NEXT:    retq
 ;
 ; X64-MOVBE-LABEL: bswap_i128:
 ; X64-MOVBE:       # %bb.0:
-; X64-MOVBE-NEXT:    bswapq %rsi
-; X64-MOVBE-NEXT:    bswapq %rdi
 ; X64-MOVBE-NEXT:    movq %rsi, %rax
+; X64-MOVBE-NEXT:    bswapq %rax
+; X64-MOVBE-NEXT:    bswapq %rdi
 ; X64-MOVBE-NEXT:    movq %rdi, %rdx
 ; X64-MOVBE-NEXT:    retq
   %1 = call i128 @llvm.bswap.i128(i128 %a0)
@@ -149,6 +149,7 @@ define i256 @bswap_i256(i256 %a0) nounwi
 ;
 ; X64-LABEL: bswap_i256:
 ; X64:       # %bb.0:
+; X64-NEXT:    movq %rdi, %rax
 ; X64-NEXT:    bswapq %r8
 ; X64-NEXT:    bswapq %rcx
 ; X64-NEXT:    bswapq %rdx
@@ -157,16 +158,15 @@ define i256 @bswap_i256(i256 %a0) nounwi
 ; X64-NEXT:    movq %rdx, 16(%rdi)
 ; X64-NEXT:    movq %rcx, 8(%rdi)
 ; X64-NEXT:    movq %r8, (%rdi)
-; X64-NEXT:    movq %rdi, %rax
 ; X64-NEXT:    retq
 ;
 ; X64-MOVBE-LABEL: bswap_i256:
 ; X64-MOVBE:       # %bb.0:
+; X64-MOVBE-NEXT:    movq %rdi, %rax
 ; X64-MOVBE-NEXT:    movbeq %rsi, 24(%rdi)
 ; X64-MOVBE-NEXT:    movbeq %rdx, 16(%rdi)
 ; X64-MOVBE-NEXT:    movbeq %rcx, 8(%rdi)
 ; X64-MOVBE-NEXT:    movbeq %r8, (%rdi)
-; X64-MOVBE-NEXT:    movq %rdi, %rax
 ; X64-MOVBE-NEXT:    retq
   %1 = call i256 @llvm.bswap.i256(i256 %a0)
   ret i256 %1

Modified: llvm/trunk/test/CodeGen/X86/bswap.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/bswap.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/bswap.ll (original)
+++ llvm/trunk/test/CodeGen/X86/bswap.ll Wed Sep 19 11:59:08 2018
@@ -19,8 +19,9 @@ define i16 @W(i16 %A) {
 ;
 ; CHECK64-LABEL: W:
 ; CHECK64:       # %bb.0:
-; CHECK64-NEXT:    rolw $8, %di
 ; CHECK64-NEXT:    movl %edi, %eax
+; CHECK64-NEXT:    rolw $8, %ax
+; CHECK64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; CHECK64-NEXT:    retq
         %Z = call i16 @llvm.bswap.i16( i16 %A )         ; <i16> [#uses=1]
         ret i16 %Z
@@ -35,8 +36,8 @@ define i32 @X(i32 %A) {
 ;
 ; CHECK64-LABEL: X:
 ; CHECK64:       # %bb.0:
-; CHECK64-NEXT:    bswapl %edi
 ; CHECK64-NEXT:    movl %edi, %eax
+; CHECK64-NEXT:    bswapl %eax
 ; CHECK64-NEXT:    retq
         %Z = call i32 @llvm.bswap.i32( i32 %A )         ; <i32> [#uses=1]
         ret i32 %Z
@@ -53,8 +54,8 @@ define i64 @Y(i64 %A) {
 ;
 ; CHECK64-LABEL: Y:
 ; CHECK64:       # %bb.0:
-; CHECK64-NEXT:    bswapq %rdi
 ; CHECK64-NEXT:    movq %rdi, %rax
+; CHECK64-NEXT:    bswapq %rax
 ; CHECK64-NEXT:    retq
         %Z = call i64 @llvm.bswap.i64( i64 %A )         ; <i64> [#uses=1]
         ret i64 %Z
@@ -71,9 +72,9 @@ define i32 @test1(i32 %a) nounwind readn
 ;
 ; CHECK64-LABEL: test1:
 ; CHECK64:       # %bb.0: # %entry
-; CHECK64-NEXT:    bswapl %edi
-; CHECK64-NEXT:    shrl $16, %edi
 ; CHECK64-NEXT:    movl %edi, %eax
+; CHECK64-NEXT:    bswapl %eax
+; CHECK64-NEXT:    shrl $16, %eax
 ; CHECK64-NEXT:    retq
 entry:
 
@@ -95,9 +96,9 @@ define i32 @test2(i32 %a) nounwind readn
 ;
 ; CHECK64-LABEL: test2:
 ; CHECK64:       # %bb.0: # %entry
-; CHECK64-NEXT:    bswapl %edi
-; CHECK64-NEXT:    sarl $16, %edi
 ; CHECK64-NEXT:    movl %edi, %eax
+; CHECK64-NEXT:    bswapl %eax
+; CHECK64-NEXT:    sarl $16, %eax
 ; CHECK64-NEXT:    retq
 entry:
 

Modified: llvm/trunk/test/CodeGen/X86/bswap_tree.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/bswap_tree.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/bswap_tree.ll (original)
+++ llvm/trunk/test/CodeGen/X86/bswap_tree.ll Wed Sep 19 11:59:08 2018
@@ -20,9 +20,9 @@ define i32 @test1(i32 %x) nounwind {
 ;
 ; CHECK64-LABEL: test1:
 ; CHECK64:       # %bb.0:
-; CHECK64-NEXT:    bswapl %edi
-; CHECK64-NEXT:    roll $16, %edi
 ; CHECK64-NEXT:    movl %edi, %eax
+; CHECK64-NEXT:    bswapl %eax
+; CHECK64-NEXT:    roll $16, %eax
 ; CHECK64-NEXT:    retq
   %byte0 = and i32 %x, 255        ; 0x000000ff
   %byte1 = and i32 %x, 65280      ; 0x0000ff00
@@ -53,9 +53,9 @@ define i32 @test2(i32 %x) nounwind {
 ;
 ; CHECK64-LABEL: test2:
 ; CHECK64:       # %bb.0:
-; CHECK64-NEXT:    bswapl %edi
-; CHECK64-NEXT:    roll $16, %edi
 ; CHECK64-NEXT:    movl %edi, %eax
+; CHECK64-NEXT:    bswapl %eax
+; CHECK64-NEXT:    roll $16, %eax
 ; CHECK64-NEXT:    retq
   %byte1 = shl  i32 %x, 8
   %byte0 = lshr i32 %x, 8

Modified: llvm/trunk/test/CodeGen/X86/bswap_tree2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/bswap_tree2.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/bswap_tree2.ll (original)
+++ llvm/trunk/test/CodeGen/X86/bswap_tree2.ll Wed Sep 19 11:59:08 2018
@@ -25,16 +25,16 @@
 ; CHECK64-LABEL: test1:
 ; CHECK64:       # %bb.0:
 ; CHECK64-NEXT:    movl %edi, %eax
-; CHECK64-NEXT:    andl $16711680, %eax # imm = 0xFF0000
 ; CHECK64-NEXT:    movl %edi, %ecx
-; CHECK64-NEXT:    orl $-16777216, %ecx # imm = 0xFF000000
-; CHECK64-NEXT:    shll $8, %eax
-; CHECK64-NEXT:    shrl $8, %ecx
-; CHECK64-NEXT:    orl %eax, %ecx
-; CHECK64-NEXT:    bswapl %edi
-; CHECK64-NEXT:    shrl $16, %edi
-; CHECK64-NEXT:    orl %ecx, %edi
-; CHECK64-NEXT:    movl %edi, %eax
+; CHECK64-NEXT:    andl $16711680, %ecx # imm = 0xFF0000
+; CHECK64-NEXT:    movl %edi, %edx
+; CHECK64-NEXT:    orl $-16777216, %edx # imm = 0xFF000000
+; CHECK64-NEXT:    shll $8, %ecx
+; CHECK64-NEXT:    shrl $8, %edx
+; CHECK64-NEXT:    orl %ecx, %edx
+; CHECK64-NEXT:    bswapl %eax
+; CHECK64-NEXT:    shrl $16, %eax
+; CHECK64-NEXT:    orl %edx, %eax
 ; CHECK64-NEXT:    retq
   %byte0 = and i32 %x, 255        ; 0x000000ff
   %byte1 = and i32 %x, 65280      ; 0x0000ff00

Modified: llvm/trunk/test/CodeGen/X86/bt.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/bt.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/bt.ll (original)
+++ llvm/trunk/test/CodeGen/X86/bt.ll Wed Sep 19 11:59:08 2018
@@ -1112,16 +1112,16 @@ define void @demanded_i32(i32* nocapture
 ;
 ; X64-LABEL: demanded_i32:
 ; X64:       # %bb.0:
+; X64-NEXT:    movl %edx, %ecx
 ; X64-NEXT:    movl %edx, %eax
 ; X64-NEXT:    shrl $5, %eax
-; X64-NEXT:    movl (%rdi,%rax,4), %r8d
-; X64-NEXT:    movl $1, %edi
-; X64-NEXT:    movl %edx, %ecx
-; X64-NEXT:    shll %cl, %edi
-; X64-NEXT:    btl %edx, %r8d
+; X64-NEXT:    movl (%rdi,%rax,4), %edi
+; X64-NEXT:    movl $1, %edx
+; X64-NEXT:    shll %cl, %edx
+; X64-NEXT:    btl %ecx, %edi
 ; X64-NEXT:    jae .LBB30_2
 ; X64-NEXT:  # %bb.1:
-; X64-NEXT:    orl %edi, (%rsi,%rax,4)
+; X64-NEXT:    orl %edx, (%rsi,%rax,4)
 ; X64-NEXT:  .LBB30_2:
 ; X64-NEXT:    retq
   %4 = lshr i32 %2, 5

Modified: llvm/trunk/test/CodeGen/X86/btc_bts_btr.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/btc_bts_btr.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/btc_bts_btr.ll (original)
+++ llvm/trunk/test/CodeGen/X86/btc_bts_btr.ll Wed Sep 19 11:59:08 2018
@@ -6,8 +6,9 @@
 define i16 @btr_16(i16 %x, i16 %n) {
 ; X64-LABEL: btr_16:
 ; X64:       # %bb.0:
-; X64-NEXT:    movw $-2, %ax
 ; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    movw $-2, %ax
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NEXT:    rolw %cl, %ax
 ; X64-NEXT:    andl %edi, %eax
 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
@@ -28,8 +29,9 @@ define i16 @btr_16(i16 %x, i16 %n) {
 define i16 @bts_16(i16 %x, i16 %n) {
 ; X64-LABEL: bts_16:
 ; X64:       # %bb.0:
-; X64-NEXT:    btsl %esi, %edi
 ; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    btsl %esi, %eax
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    retq
 ;
 ; X86-LABEL: bts_16:
@@ -48,8 +50,9 @@ define i16 @bts_16(i16 %x, i16 %n) {
 define i16 @btc_16(i16 %x, i16 %n) {
 ; X64-LABEL: btc_16:
 ; X64:       # %bb.0:
-; X64-NEXT:    btcl %esi, %edi
 ; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    btcl %esi, %eax
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    retq
 ;
 ; X86-LABEL: btc_16:
@@ -68,8 +71,8 @@ define i16 @btc_16(i16 %x, i16 %n) {
 define i32 @btr_32(i32 %x, i32 %n) {
 ; X64-LABEL: btr_32:
 ; X64:       # %bb.0:
-; X64-NEXT:    btrl %esi, %edi
 ; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    btrl %esi, %eax
 ; X64-NEXT:    retq
 ;
 ; X86-LABEL: btr_32:
@@ -87,8 +90,8 @@ define i32 @btr_32(i32 %x, i32 %n) {
 define i32 @bts_32(i32 %x, i32 %n) {
 ; X64-LABEL: bts_32:
 ; X64:       # %bb.0:
-; X64-NEXT:    btsl %esi, %edi
 ; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    btsl %esi, %eax
 ; X64-NEXT:    retq
 ;
 ; X86-LABEL: bts_32:
@@ -105,8 +108,8 @@ define i32 @bts_32(i32 %x, i32 %n) {
 define i32 @btc_32(i32 %x, i32 %n) {
 ; X64-LABEL: btc_32:
 ; X64:       # %bb.0:
-; X64-NEXT:    btcl %esi, %edi
 ; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    btcl %esi, %eax
 ; X64-NEXT:    retq
 ;
 ; X86-LABEL: btc_32:
@@ -123,8 +126,8 @@ define i32 @btc_32(i32 %x, i32 %n) {
 define i64 @btr_64(i64 %x, i64 %n) {
 ; X64-LABEL: btr_64:
 ; X64:       # %bb.0:
-; X64-NEXT:    btrq %rsi, %rdi
 ; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    btrq %rsi, %rax
 ; X64-NEXT:    retq
 ;
 ; X86-LABEL: btr_64:
@@ -154,8 +157,8 @@ define i64 @btr_64(i64 %x, i64 %n) {
 define i64 @bts_64(i64 %x, i64 %n) {
 ; X64-LABEL: bts_64:
 ; X64:       # %bb.0:
-; X64-NEXT:    btsq %rsi, %rdi
 ; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    btsq %rsi, %rax
 ; X64-NEXT:    retq
 ;
 ; X86-LABEL: bts_64:
@@ -182,8 +185,8 @@ define i64 @bts_64(i64 %x, i64 %n) {
 define i64 @btc_64(i64 %x, i64 %n) {
 ; X64-LABEL: btc_64:
 ; X64:       # %bb.0:
-; X64-NEXT:    btcq %rsi, %rdi
 ; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    btcq %rsi, %rax
 ; X64-NEXT:    retq
 ;
 ; X86-LABEL: btc_64:
@@ -210,8 +213,9 @@ define i64 @btc_64(i64 %x, i64 %n) {
 define i16 @btr_16_mask(i16 %x, i16 %n) {
 ; X64-LABEL: btr_16_mask:
 ; X64:       # %bb.0:
-; X64-NEXT:    movw $-2, %ax
 ; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    movw $-2, %ax
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NEXT:    rolw %cl, %ax
 ; X64-NEXT:    andl %edi, %eax
 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
@@ -233,9 +237,10 @@ define i16 @btr_16_mask(i16 %x, i16 %n)
 define i16 @bts_16_mask(i16 %x, i16 %n) {
 ; X64-LABEL: bts_16_mask:
 ; X64:       # %bb.0:
-; X64-NEXT:    andb $15, %sil
-; X64-NEXT:    btsl %esi, %edi
 ; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    andb $15, %sil
+; X64-NEXT:    btsl %esi, %eax
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    retq
 ;
 ; X86-LABEL: bts_16_mask:
@@ -256,9 +261,10 @@ define i16 @bts_16_mask(i16 %x, i16 %n)
 define i16 @btc_16_mask(i16 %x, i16 %n) {
 ; X64-LABEL: btc_16_mask:
 ; X64:       # %bb.0:
-; X64-NEXT:    andb $15, %sil
-; X64-NEXT:    btcl %esi, %edi
 ; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    andb $15, %sil
+; X64-NEXT:    btcl %esi, %eax
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    retq
 ;
 ; X86-LABEL: btc_16_mask:
@@ -279,8 +285,8 @@ define i16 @btc_16_mask(i16 %x, i16 %n)
 define i32 @btr_32_mask(i32 %x, i32 %n) {
 ; X64-LABEL: btr_32_mask:
 ; X64:       # %bb.0:
-; X64-NEXT:    btrl %esi, %edi
 ; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    btrl %esi, %eax
 ; X64-NEXT:    retq
 ;
 ; X86-LABEL: btr_32_mask:
@@ -299,8 +305,8 @@ define i32 @btr_32_mask(i32 %x, i32 %n)
 define i32 @bts_32_mask(i32 %x, i32 %n) {
 ; X64-LABEL: bts_32_mask:
 ; X64:       # %bb.0:
-; X64-NEXT:    btsl %esi, %edi
 ; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    btsl %esi, %eax
 ; X64-NEXT:    retq
 ;
 ; X86-LABEL: bts_32_mask:
@@ -318,8 +324,8 @@ define i32 @bts_32_mask(i32 %x, i32 %n)
 define i32 @btc_32_mask(i32 %x, i32 %n) {
 ; X64-LABEL: btc_32_mask:
 ; X64:       # %bb.0:
-; X64-NEXT:    btcl %esi, %edi
 ; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    btcl %esi, %eax
 ; X64-NEXT:    retq
 ;
 ; X86-LABEL: btc_32_mask:
@@ -337,8 +343,8 @@ define i32 @btc_32_mask(i32 %x, i32 %n)
 define i64 @btr_64_mask(i64 %x, i64 %n) {
 ; X64-LABEL: btr_64_mask:
 ; X64:       # %bb.0:
-; X64-NEXT:    btrq %rsi, %rdi
 ; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    btrq %rsi, %rax
 ; X64-NEXT:    retq
 ;
 ; X86-LABEL: btr_64_mask:
@@ -369,8 +375,8 @@ define i64 @btr_64_mask(i64 %x, i64 %n)
 define i64 @bts_64_mask(i64 %x, i64 %n) {
 ; X64-LABEL: bts_64_mask:
 ; X64:       # %bb.0:
-; X64-NEXT:    btsq %rsi, %rdi
 ; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    btsq %rsi, %rax
 ; X64-NEXT:    retq
 ;
 ; X86-LABEL: bts_64_mask:
@@ -398,8 +404,8 @@ define i64 @bts_64_mask(i64 %x, i64 %n)
 define i64 @btc_64_mask(i64 %x, i64 %n) {
 ; X64-LABEL: btc_64_mask:
 ; X64:       # %bb.0:
-; X64-NEXT:    btcq %rsi, %rdi
 ; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    btcq %rsi, %rax
 ; X64-NEXT:    retq
 ;
 ; X86-LABEL: btc_64_mask:
@@ -450,8 +456,9 @@ define i16 @btr_16_load(i16* %x, i16 %n)
 define i16 @bts_16_load(i16* %x, i16 %n) {
 ; X64-LABEL: bts_16_load:
 ; X64:       # %bb.0:
-; X64-NEXT:    movl $1, %eax
 ; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    movl $1, %eax
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NEXT:    shll %cl, %eax
 ; X64-NEXT:    orw (%rdi), %ax
 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
@@ -475,8 +482,9 @@ define i16 @bts_16_load(i16* %x, i16 %n)
 define i16 @btc_16_load(i16* %x, i16 %n) {
 ; X64-LABEL: btc_16_load:
 ; X64:       # %bb.0:
-; X64-NEXT:    movl $1, %eax
 ; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    movl $1, %eax
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NEXT:    shll %cl, %eax
 ; X64-NEXT:    xorw (%rdi), %ax
 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
@@ -673,8 +681,9 @@ define i64 @btc_64_load(i64* %x, i64 %n)
 define void @btr_16_dont_fold(i16* %x, i16 %n) {
 ; X64-LABEL: btr_16_dont_fold:
 ; X64:       # %bb.0:
-; X64-NEXT:    movw $-2, %ax
 ; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    movw $-2, %ax
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NEXT:    rolw %cl, %ax
 ; X64-NEXT:    andw %ax, (%rdi)
 ; X64-NEXT:    retq
@@ -698,8 +707,9 @@ define void @btr_16_dont_fold(i16* %x, i
 define void @bts_16_dont_fold(i16* %x, i16 %n) {
 ; X64-LABEL: bts_16_dont_fold:
 ; X64:       # %bb.0:
-; X64-NEXT:    movl $1, %eax
 ; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    movl $1, %eax
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NEXT:    shll %cl, %eax
 ; X64-NEXT:    orw %ax, (%rdi)
 ; X64-NEXT:    retq
@@ -722,8 +732,9 @@ define void @bts_16_dont_fold(i16* %x, i
 define void @btc_16_dont_fold(i16* %x, i16 %n) {
 ; X64-LABEL: btc_16_dont_fold:
 ; X64:       # %bb.0:
-; X64-NEXT:    movl $1, %eax
 ; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    movl $1, %eax
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NEXT:    shll %cl, %eax
 ; X64-NEXT:    xorw %ax, (%rdi)
 ; X64-NEXT:    retq
@@ -746,8 +757,9 @@ define void @btc_16_dont_fold(i16* %x, i
 define void @btr_32_dont_fold(i32* %x, i32 %n) {
 ; X64-LABEL: btr_32_dont_fold:
 ; X64:       # %bb.0:
-; X64-NEXT:    movl $-2, %eax
 ; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    movl $-2, %eax
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NEXT:    roll %cl, %eax
 ; X64-NEXT:    andl %eax, (%rdi)
 ; X64-NEXT:    retq
@@ -771,8 +783,9 @@ define void @btr_32_dont_fold(i32* %x, i
 define void @bts_32_dont_fold(i32* %x, i32 %n) {
 ; X64-LABEL: bts_32_dont_fold:
 ; X64:       # %bb.0:
-; X64-NEXT:    movl $1, %eax
 ; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    movl $1, %eax
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NEXT:    shll %cl, %eax
 ; X64-NEXT:    orl %eax, (%rdi)
 ; X64-NEXT:    retq
@@ -795,8 +808,9 @@ define void @bts_32_dont_fold(i32* %x, i
 define void @btc_32_dont_fold(i32* %x, i32 %n) {
 ; X64-LABEL: btc_32_dont_fold:
 ; X64:       # %bb.0:
-; X64-NEXT:    movl $1, %eax
 ; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    movl $1, %eax
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NEXT:    shll %cl, %eax
 ; X64-NEXT:    xorl %eax, (%rdi)
 ; X64-NEXT:    retq
@@ -819,8 +833,9 @@ define void @btc_32_dont_fold(i32* %x, i
 define void @btr_64_dont_fold(i64* %x, i64 %n) {
 ; X64-LABEL: btr_64_dont_fold:
 ; X64:       # %bb.0:
+; X64-NEXT:    movq %rsi, %rcx
 ; X64-NEXT:    movq $-2, %rax
-; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    # kill: def $cl killed $cl killed $rcx
 ; X64-NEXT:    rolq %cl, %rax
 ; X64-NEXT:    andq %rax, (%rdi)
 ; X64-NEXT:    retq
@@ -860,8 +875,9 @@ define void @btr_64_dont_fold(i64* %x, i
 define void @bts_64_dont_fold(i64* %x, i64 %n) {
 ; X64-LABEL: bts_64_dont_fold:
 ; X64:       # %bb.0:
+; X64-NEXT:    movq %rsi, %rcx
 ; X64-NEXT:    movl $1, %eax
-; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    # kill: def $cl killed $cl killed $rcx
 ; X64-NEXT:    shlq %cl, %rax
 ; X64-NEXT:    orq %rax, (%rdi)
 ; X64-NEXT:    retq
@@ -898,8 +914,9 @@ define void @bts_64_dont_fold(i64* %x, i
 define void @btc_64_dont_fold(i64* %x, i64 %n) {
 ; X64-LABEL: btc_64_dont_fold:
 ; X64:       # %bb.0:
+; X64-NEXT:    movq %rsi, %rcx
 ; X64-NEXT:    movl $1, %eax
-; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    # kill: def $cl killed $cl killed $rcx
 ; X64-NEXT:    shlq %cl, %rax
 ; X64-NEXT:    xorq %rax, (%rdi)
 ; X64-NEXT:    retq

Modified: llvm/trunk/test/CodeGen/X86/bypass-slow-division-64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/bypass-slow-division-64.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/bypass-slow-division-64.ll (original)
+++ llvm/trunk/test/CodeGen/X86/bypass-slow-division-64.ll Wed Sep 19 11:59:08 2018
@@ -8,17 +8,17 @@ define i64 @Test_get_quotient(i64 %a, i6
 ; CHECK-LABEL: Test_get_quotient:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movq %rdi, %rax
-; CHECK-NEXT:    orq %rsi, %rax
-; CHECK-NEXT:    shrq $32, %rax
+; CHECK-NEXT:    movq %rdi, %rcx
+; CHECK-NEXT:    orq %rsi, %rcx
+; CHECK-NEXT:    shrq $32, %rcx
 ; CHECK-NEXT:    je .LBB0_1
 ; CHECK-NEXT:  # %bb.2:
-; CHECK-NEXT:    movq %rdi, %rax
 ; CHECK-NEXT:    cqto
 ; CHECK-NEXT:    idivq %rsi
 ; CHECK-NEXT:    retq
 ; CHECK-NEXT:  .LBB0_1:
 ; CHECK-NEXT:    xorl %edx, %edx
-; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    # kill: def $eax killed $eax killed $rax
 ; CHECK-NEXT:    divl %esi
 ; CHECK-NEXT:    # kill: def $eax killed $eax def $rax
 ; CHECK-NEXT:    retq
@@ -30,21 +30,20 @@ define i64 @Test_get_remainder(i64 %a, i
 ; CHECK-LABEL: Test_get_remainder:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movq %rdi, %rax
-; CHECK-NEXT:    orq %rsi, %rax
-; CHECK-NEXT:    shrq $32, %rax
+; CHECK-NEXT:    movq %rdi, %rcx
+; CHECK-NEXT:    orq %rsi, %rcx
+; CHECK-NEXT:    shrq $32, %rcx
 ; CHECK-NEXT:    je .LBB1_1
 ; CHECK-NEXT:  # %bb.2:
-; CHECK-NEXT:    movq %rdi, %rax
 ; CHECK-NEXT:    cqto
 ; CHECK-NEXT:    idivq %rsi
 ; CHECK-NEXT:    movq %rdx, %rax
 ; CHECK-NEXT:    retq
 ; CHECK-NEXT:  .LBB1_1:
 ; CHECK-NEXT:    xorl %edx, %edx
-; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    # kill: def $eax killed $eax killed $rax
 ; CHECK-NEXT:    divl %esi
-; CHECK-NEXT:    # kill: def $edx killed $edx def $rdx
-; CHECK-NEXT:    movq %rdx, %rax
+; CHECK-NEXT:    movl %edx, %eax
 ; CHECK-NEXT:    retq
   %result = srem i64 %a, %b
   ret i64 %result
@@ -54,18 +53,18 @@ define i64 @Test_get_quotient_and_remain
 ; CHECK-LABEL: Test_get_quotient_and_remainder:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movq %rdi, %rax
-; CHECK-NEXT:    orq %rsi, %rax
-; CHECK-NEXT:    shrq $32, %rax
+; CHECK-NEXT:    movq %rdi, %rcx
+; CHECK-NEXT:    orq %rsi, %rcx
+; CHECK-NEXT:    shrq $32, %rcx
 ; CHECK-NEXT:    je .LBB2_1
 ; CHECK-NEXT:  # %bb.2:
-; CHECK-NEXT:    movq %rdi, %rax
 ; CHECK-NEXT:    cqto
 ; CHECK-NEXT:    idivq %rsi
 ; CHECK-NEXT:    addq %rdx, %rax
 ; CHECK-NEXT:    retq
 ; CHECK-NEXT:  .LBB2_1:
 ; CHECK-NEXT:    xorl %edx, %edx
-; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    # kill: def $eax killed $eax killed $rax
 ; CHECK-NEXT:    divl %esi
 ; CHECK-NEXT:    # kill: def $edx killed $edx def $rdx
 ; CHECK-NEXT:    # kill: def $eax killed $eax def $rax

Modified: llvm/trunk/test/CodeGen/X86/clear-highbits.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/clear-highbits.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/clear-highbits.ll (original)
+++ llvm/trunk/test/CodeGen/X86/clear-highbits.ll Wed Sep 19 11:59:08 2018
@@ -33,10 +33,11 @@ define i8 @clear_highbits8_c0(i8 %val, i
 ; X64-LABEL: clear_highbits8_c0:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movl %esi, %ecx
-; X64-NEXT:    shlb %cl, %dil
-; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-NEXT:    shrb %cl, %dil
 ; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    shlb %cl, %al
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    shrb %cl, %al
+; X64-NEXT:    # kill: def $al killed $al killed $eax
 ; X64-NEXT:    retq
   %mask = lshr i8 -1, %numhighbits
   %masked = and i8 %mask, %val
@@ -79,10 +80,11 @@ define i8 @clear_highbits8_c4_commutativ
 ; X64-LABEL: clear_highbits8_c4_commutative:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movl %esi, %ecx
-; X64-NEXT:    shlb %cl, %dil
-; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-NEXT:    shrb %cl, %dil
 ; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    shlb %cl, %al
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    shrb %cl, %al
+; X64-NEXT:    # kill: def $al killed $al killed $eax
 ; X64-NEXT:    retq
   %mask = lshr i8 -1, %numhighbits
   %masked = and i8 %val, %mask ; swapped order
@@ -340,10 +342,10 @@ define i32 @clear_highbits32_c0(i32 %val
 ; X64-NOBMI2-LABEL: clear_highbits32_c0:
 ; X64-NOBMI2:       # %bb.0:
 ; X64-NOBMI2-NEXT:    movl %esi, %ecx
-; X64-NOBMI2-NEXT:    shll %cl, %edi
-; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-NOBMI2-NEXT:    shrl %cl, %edi
 ; X64-NOBMI2-NEXT:    movl %edi, %eax
+; X64-NOBMI2-NEXT:    shll %cl, %eax
+; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NOBMI2-NEXT:    shrl %cl, %eax
 ; X64-NOBMI2-NEXT:    retq
 ;
 ; X64-BMI2-LABEL: clear_highbits32_c0:
@@ -375,10 +377,10 @@ define i32 @clear_highbits32_c1_indexzex
 ; X64-NOBMI2-LABEL: clear_highbits32_c1_indexzext:
 ; X64-NOBMI2:       # %bb.0:
 ; X64-NOBMI2-NEXT:    movl %esi, %ecx
-; X64-NOBMI2-NEXT:    shll %cl, %edi
-; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-NOBMI2-NEXT:    shrl %cl, %edi
 ; X64-NOBMI2-NEXT:    movl %edi, %eax
+; X64-NOBMI2-NEXT:    shll %cl, %eax
+; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NOBMI2-NEXT:    shrl %cl, %eax
 ; X64-NOBMI2-NEXT:    retq
 ;
 ; X64-BMI2-LABEL: clear_highbits32_c1_indexzext:
@@ -488,10 +490,10 @@ define i32 @clear_highbits32_c4_commutat
 ; X64-NOBMI2-LABEL: clear_highbits32_c4_commutative:
 ; X64-NOBMI2:       # %bb.0:
 ; X64-NOBMI2-NEXT:    movl %esi, %ecx
-; X64-NOBMI2-NEXT:    shll %cl, %edi
-; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-NOBMI2-NEXT:    shrl %cl, %edi
 ; X64-NOBMI2-NEXT:    movl %edi, %eax
+; X64-NOBMI2-NEXT:    shll %cl, %eax
+; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NOBMI2-NEXT:    shrl %cl, %eax
 ; X64-NOBMI2-NEXT:    retq
 ;
 ; X64-BMI2-LABEL: clear_highbits32_c4_commutative:
@@ -545,10 +547,10 @@ define i64 @clear_highbits64_c0(i64 %val
 ; X64-NOBMI2-LABEL: clear_highbits64_c0:
 ; X64-NOBMI2:       # %bb.0:
 ; X64-NOBMI2-NEXT:    movq %rsi, %rcx
-; X64-NOBMI2-NEXT:    shlq %cl, %rdi
-; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $rcx
-; X64-NOBMI2-NEXT:    shrq %cl, %rdi
 ; X64-NOBMI2-NEXT:    movq %rdi, %rax
+; X64-NOBMI2-NEXT:    shlq %cl, %rax
+; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-NOBMI2-NEXT:    shrq %cl, %rax
 ; X64-NOBMI2-NEXT:    retq
 ;
 ; X64-BMI2-LABEL: clear_highbits64_c0:
@@ -598,10 +600,10 @@ define i64 @clear_highbits64_c1_indexzex
 ; X64-NOBMI2-LABEL: clear_highbits64_c1_indexzext:
 ; X64-NOBMI2:       # %bb.0:
 ; X64-NOBMI2-NEXT:    movl %esi, %ecx
-; X64-NOBMI2-NEXT:    shlq %cl, %rdi
-; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-NOBMI2-NEXT:    shrq %cl, %rdi
 ; X64-NOBMI2-NEXT:    movq %rdi, %rax
+; X64-NOBMI2-NEXT:    shlq %cl, %rax
+; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NOBMI2-NEXT:    shrq %cl, %rax
 ; X64-NOBMI2-NEXT:    retq
 ;
 ; X64-BMI2-LABEL: clear_highbits64_c1_indexzext:
@@ -775,10 +777,10 @@ define i64 @clear_highbits64_c4_commutat
 ; X64-NOBMI2-LABEL: clear_highbits64_c4_commutative:
 ; X64-NOBMI2:       # %bb.0:
 ; X64-NOBMI2-NEXT:    movq %rsi, %rcx
-; X64-NOBMI2-NEXT:    shlq %cl, %rdi
-; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $rcx
-; X64-NOBMI2-NEXT:    shrq %cl, %rdi
 ; X64-NOBMI2-NEXT:    movq %rdi, %rax
+; X64-NOBMI2-NEXT:    shlq %cl, %rax
+; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-NOBMI2-NEXT:    shrq %cl, %rax
 ; X64-NOBMI2-NEXT:    retq
 ;
 ; X64-BMI2-LABEL: clear_highbits64_c4_commutative:
@@ -834,9 +836,10 @@ define i32 @oneuse32(i32 %val, i32 %numh
 ; X64-NOBMI2-NEXT:    pushq %rbp
 ; X64-NOBMI2-NEXT:    pushq %rbx
 ; X64-NOBMI2-NEXT:    pushq %rax
+; X64-NOBMI2-NEXT:    movl %esi, %ecx
 ; X64-NOBMI2-NEXT:    movl %edi, %ebx
 ; X64-NOBMI2-NEXT:    movl $-1, %ebp
-; X64-NOBMI2-NEXT:    movl %esi, %ecx
+; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NOBMI2-NEXT:    shrl %cl, %ebp
 ; X64-NOBMI2-NEXT:    movl %ebp, %edi
 ; X64-NOBMI2-NEXT:    callq use32
@@ -934,9 +937,10 @@ define i64 @oneuse64(i64 %val, i64 %numh
 ; X64-NOBMI2-NEXT:    pushq %r14
 ; X64-NOBMI2-NEXT:    pushq %rbx
 ; X64-NOBMI2-NEXT:    pushq %rax
+; X64-NOBMI2-NEXT:    movq %rsi, %rcx
 ; X64-NOBMI2-NEXT:    movq %rdi, %r14
 ; X64-NOBMI2-NEXT:    movq $-1, %rbx
-; X64-NOBMI2-NEXT:    movl %esi, %ecx
+; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $rcx
 ; X64-NOBMI2-NEXT:    shrq %cl, %rbx
 ; X64-NOBMI2-NEXT:    movq %rbx, %rdi
 ; X64-NOBMI2-NEXT:    callq use64

Modified: llvm/trunk/test/CodeGen/X86/clear-lowbits.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/clear-lowbits.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/clear-lowbits.ll (original)
+++ llvm/trunk/test/CodeGen/X86/clear-lowbits.ll Wed Sep 19 11:59:08 2018
@@ -35,10 +35,11 @@ define i8 @clear_lowbits8_c0(i8 %val, i8
 ; X64-LABEL: clear_lowbits8_c0:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movl %esi, %ecx
-; X64-NEXT:    shrb %cl, %dil
-; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-NEXT:    shlb %cl, %dil
 ; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    shrb %cl, %al
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    shlb %cl, %al
+; X64-NEXT:    # kill: def $al killed $al killed $eax
 ; X64-NEXT:    retq
   %mask = shl i8 -1, %numlowbits
   %masked = and i8 %mask, %val
@@ -81,10 +82,11 @@ define i8 @clear_lowbits8_c4_commutative
 ; X64-LABEL: clear_lowbits8_c4_commutative:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movl %esi, %ecx
-; X64-NEXT:    shrb %cl, %dil
-; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-NEXT:    shlb %cl, %dil
 ; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    shrb %cl, %al
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    shlb %cl, %al
+; X64-NEXT:    # kill: def $al killed $al killed $eax
 ; X64-NEXT:    retq
   %mask = shl i8 -1, %numlowbits
   %masked = and i8 %val, %mask ; swapped order
@@ -327,10 +329,10 @@ define i32 @clear_lowbits32_c0(i32 %val,
 ; X64-NOBMI2-LABEL: clear_lowbits32_c0:
 ; X64-NOBMI2:       # %bb.0:
 ; X64-NOBMI2-NEXT:    movl %esi, %ecx
-; X64-NOBMI2-NEXT:    shrl %cl, %edi
-; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-NOBMI2-NEXT:    shll %cl, %edi
 ; X64-NOBMI2-NEXT:    movl %edi, %eax
+; X64-NOBMI2-NEXT:    shrl %cl, %eax
+; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NOBMI2-NEXT:    shll %cl, %eax
 ; X64-NOBMI2-NEXT:    retq
 ;
 ; X64-BMI2-LABEL: clear_lowbits32_c0:
@@ -362,10 +364,10 @@ define i32 @clear_lowbits32_c1_indexzext
 ; X64-NOBMI2-LABEL: clear_lowbits32_c1_indexzext:
 ; X64-NOBMI2:       # %bb.0:
 ; X64-NOBMI2-NEXT:    movl %esi, %ecx
-; X64-NOBMI2-NEXT:    shrl %cl, %edi
-; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-NOBMI2-NEXT:    shll %cl, %edi
 ; X64-NOBMI2-NEXT:    movl %edi, %eax
+; X64-NOBMI2-NEXT:    shrl %cl, %eax
+; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NOBMI2-NEXT:    shll %cl, %eax
 ; X64-NOBMI2-NEXT:    retq
 ;
 ; X64-BMI2-LABEL: clear_lowbits32_c1_indexzext:
@@ -475,10 +477,10 @@ define i32 @clear_lowbits32_c4_commutati
 ; X64-NOBMI2-LABEL: clear_lowbits32_c4_commutative:
 ; X64-NOBMI2:       # %bb.0:
 ; X64-NOBMI2-NEXT:    movl %esi, %ecx
-; X64-NOBMI2-NEXT:    shrl %cl, %edi
-; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-NOBMI2-NEXT:    shll %cl, %edi
 ; X64-NOBMI2-NEXT:    movl %edi, %eax
+; X64-NOBMI2-NEXT:    shrl %cl, %eax
+; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NOBMI2-NEXT:    shll %cl, %eax
 ; X64-NOBMI2-NEXT:    retq
 ;
 ; X64-BMI2-LABEL: clear_lowbits32_c4_commutative:
@@ -530,10 +532,10 @@ define i64 @clear_lowbits64_c0(i64 %val,
 ; X64-NOBMI2-LABEL: clear_lowbits64_c0:
 ; X64-NOBMI2:       # %bb.0:
 ; X64-NOBMI2-NEXT:    movq %rsi, %rcx
-; X64-NOBMI2-NEXT:    shrq %cl, %rdi
-; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $rcx
-; X64-NOBMI2-NEXT:    shlq %cl, %rdi
 ; X64-NOBMI2-NEXT:    movq %rdi, %rax
+; X64-NOBMI2-NEXT:    shrq %cl, %rax
+; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-NOBMI2-NEXT:    shlq %cl, %rax
 ; X64-NOBMI2-NEXT:    retq
 ;
 ; X64-BMI2-LABEL: clear_lowbits64_c0:
@@ -583,10 +585,10 @@ define i64 @clear_lowbits64_c1_indexzext
 ; X64-NOBMI2-LABEL: clear_lowbits64_c1_indexzext:
 ; X64-NOBMI2:       # %bb.0:
 ; X64-NOBMI2-NEXT:    movl %esi, %ecx
-; X64-NOBMI2-NEXT:    shrq %cl, %rdi
-; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-NOBMI2-NEXT:    shlq %cl, %rdi
 ; X64-NOBMI2-NEXT:    movq %rdi, %rax
+; X64-NOBMI2-NEXT:    shrq %cl, %rax
+; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NOBMI2-NEXT:    shlq %cl, %rax
 ; X64-NOBMI2-NEXT:    retq
 ;
 ; X64-BMI2-LABEL: clear_lowbits64_c1_indexzext:
@@ -760,10 +762,10 @@ define i64 @clear_lowbits64_c4_commutati
 ; X64-NOBMI2-LABEL: clear_lowbits64_c4_commutative:
 ; X64-NOBMI2:       # %bb.0:
 ; X64-NOBMI2-NEXT:    movq %rsi, %rcx
-; X64-NOBMI2-NEXT:    shrq %cl, %rdi
-; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $rcx
-; X64-NOBMI2-NEXT:    shlq %cl, %rdi
 ; X64-NOBMI2-NEXT:    movq %rdi, %rax
+; X64-NOBMI2-NEXT:    shrq %cl, %rax
+; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-NOBMI2-NEXT:    shlq %cl, %rax
 ; X64-NOBMI2-NEXT:    retq
 ;
 ; X64-BMI2-LABEL: clear_lowbits64_c4_commutative:
@@ -794,11 +796,12 @@ define i8 @clear_lowbits8_ic0(i8 %val, i
 ;
 ; X64-LABEL: clear_lowbits8_ic0:
 ; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
 ; X64-NEXT:    movb $8, %cl
 ; X64-NEXT:    subb %sil, %cl
-; X64-NEXT:    shrb %cl, %dil
-; X64-NEXT:    shlb %cl, %dil
-; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    shrb %cl, %al
+; X64-NEXT:    shlb %cl, %al
+; X64-NEXT:    # kill: def $al killed $al killed $eax
 ; X64-NEXT:    retq
   %numhighbits = sub i8 8, %numlowbits
   %mask = shl i8 -1, %numhighbits
@@ -844,11 +847,12 @@ define i8 @clear_lowbits8_ic4_commutativ
 ;
 ; X64-LABEL: clear_lowbits8_ic4_commutative:
 ; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
 ; X64-NEXT:    movb $8, %cl
 ; X64-NEXT:    subb %sil, %cl
-; X64-NEXT:    shrb %cl, %dil
-; X64-NEXT:    shlb %cl, %dil
-; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    shrb %cl, %al
+; X64-NEXT:    shlb %cl, %al
+; X64-NEXT:    # kill: def $al killed $al killed $eax
 ; X64-NEXT:    retq
   %numhighbits = sub i8 8, %numlowbits
   %mask = shl i8 -1, %numhighbits
@@ -1126,11 +1130,11 @@ define i32 @clear_lowbits32_ic0(i32 %val
 ; X64-NOBMI2-LABEL: clear_lowbits32_ic0:
 ; X64-NOBMI2:       # %bb.0:
 ; X64-NOBMI2-NEXT:    movl %esi, %ecx
+; X64-NOBMI2-NEXT:    movl %edi, %eax
 ; X64-NOBMI2-NEXT:    negl %ecx
-; X64-NOBMI2-NEXT:    shrl %cl, %edi
+; X64-NOBMI2-NEXT:    shrl %cl, %eax
 ; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-NOBMI2-NEXT:    shll %cl, %edi
-; X64-NOBMI2-NEXT:    movl %edi, %eax
+; X64-NOBMI2-NEXT:    shll %cl, %eax
 ; X64-NOBMI2-NEXT:    retq
 ;
 ; X64-BMI2-LABEL: clear_lowbits32_ic0:
@@ -1167,11 +1171,11 @@ define i32 @clear_lowbits32_ic1_indexzex
 ; X64-NOBMI2-LABEL: clear_lowbits32_ic1_indexzext:
 ; X64-NOBMI2:       # %bb.0:
 ; X64-NOBMI2-NEXT:    movl %esi, %ecx
+; X64-NOBMI2-NEXT:    movl %edi, %eax
 ; X64-NOBMI2-NEXT:    negb %cl
-; X64-NOBMI2-NEXT:    shrl %cl, %edi
+; X64-NOBMI2-NEXT:    shrl %cl, %eax
 ; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-NOBMI2-NEXT:    shll %cl, %edi
-; X64-NOBMI2-NEXT:    movl %edi, %eax
+; X64-NOBMI2-NEXT:    shll %cl, %eax
 ; X64-NOBMI2-NEXT:    retq
 ;
 ; X64-BMI2-LABEL: clear_lowbits32_ic1_indexzext:
@@ -1298,11 +1302,11 @@ define i32 @clear_lowbits32_ic4_commutat
 ; X64-NOBMI2-LABEL: clear_lowbits32_ic4_commutative:
 ; X64-NOBMI2:       # %bb.0:
 ; X64-NOBMI2-NEXT:    movl %esi, %ecx
+; X64-NOBMI2-NEXT:    movl %edi, %eax
 ; X64-NOBMI2-NEXT:    negl %ecx
-; X64-NOBMI2-NEXT:    shrl %cl, %edi
+; X64-NOBMI2-NEXT:    shrl %cl, %eax
 ; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-NOBMI2-NEXT:    shll %cl, %edi
-; X64-NOBMI2-NEXT:    movl %edi, %eax
+; X64-NOBMI2-NEXT:    shll %cl, %eax
 ; X64-NOBMI2-NEXT:    retq
 ;
 ; X64-BMI2-LABEL: clear_lowbits32_ic4_commutative:
@@ -1358,11 +1362,11 @@ define i64 @clear_lowbits64_ic0(i64 %val
 ; X64-NOBMI2-LABEL: clear_lowbits64_ic0:
 ; X64-NOBMI2:       # %bb.0:
 ; X64-NOBMI2-NEXT:    movq %rsi, %rcx
+; X64-NOBMI2-NEXT:    movq %rdi, %rax
 ; X64-NOBMI2-NEXT:    negl %ecx
-; X64-NOBMI2-NEXT:    shrq %cl, %rdi
+; X64-NOBMI2-NEXT:    shrq %cl, %rax
 ; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $rcx
-; X64-NOBMI2-NEXT:    shlq %cl, %rdi
-; X64-NOBMI2-NEXT:    movq %rdi, %rax
+; X64-NOBMI2-NEXT:    shlq %cl, %rax
 ; X64-NOBMI2-NEXT:    retq
 ;
 ; X64-BMI2-LABEL: clear_lowbits64_ic0:
@@ -1416,11 +1420,11 @@ define i64 @clear_lowbits64_ic1_indexzex
 ; X64-NOBMI2-LABEL: clear_lowbits64_ic1_indexzext:
 ; X64-NOBMI2:       # %bb.0:
 ; X64-NOBMI2-NEXT:    movl %esi, %ecx
+; X64-NOBMI2-NEXT:    movq %rdi, %rax
 ; X64-NOBMI2-NEXT:    negb %cl
-; X64-NOBMI2-NEXT:    shrq %cl, %rdi
+; X64-NOBMI2-NEXT:    shrq %cl, %rax
 ; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-NOBMI2-NEXT:    shlq %cl, %rdi
-; X64-NOBMI2-NEXT:    movq %rdi, %rax
+; X64-NOBMI2-NEXT:    shlq %cl, %rax
 ; X64-NOBMI2-NEXT:    retq
 ;
 ; X64-BMI2-LABEL: clear_lowbits64_ic1_indexzext:
@@ -1608,11 +1612,11 @@ define i64 @clear_lowbits64_ic4_commutat
 ; X64-NOBMI2-LABEL: clear_lowbits64_ic4_commutative:
 ; X64-NOBMI2:       # %bb.0:
 ; X64-NOBMI2-NEXT:    movq %rsi, %rcx
+; X64-NOBMI2-NEXT:    movq %rdi, %rax
 ; X64-NOBMI2-NEXT:    negl %ecx
-; X64-NOBMI2-NEXT:    shrq %cl, %rdi
+; X64-NOBMI2-NEXT:    shrq %cl, %rax
 ; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $rcx
-; X64-NOBMI2-NEXT:    shlq %cl, %rdi
-; X64-NOBMI2-NEXT:    movq %rdi, %rax
+; X64-NOBMI2-NEXT:    shlq %cl, %rax
 ; X64-NOBMI2-NEXT:    retq
 ;
 ; X64-BMI2-LABEL: clear_lowbits64_ic4_commutative:
@@ -1670,9 +1674,10 @@ define i32 @oneuse32(i32 %val, i32 %numl
 ; X64-NOBMI2-NEXT:    pushq %rbp
 ; X64-NOBMI2-NEXT:    pushq %rbx
 ; X64-NOBMI2-NEXT:    pushq %rax
+; X64-NOBMI2-NEXT:    movl %esi, %ecx
 ; X64-NOBMI2-NEXT:    movl %edi, %ebx
 ; X64-NOBMI2-NEXT:    movl $-1, %ebp
-; X64-NOBMI2-NEXT:    movl %esi, %ecx
+; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NOBMI2-NEXT:    shll %cl, %ebp
 ; X64-NOBMI2-NEXT:    movl %ebp, %edi
 ; X64-NOBMI2-NEXT:    callq use32
@@ -1770,9 +1775,10 @@ define i64 @oneuse64(i64 %val, i64 %numl
 ; X64-NOBMI2-NEXT:    pushq %r14
 ; X64-NOBMI2-NEXT:    pushq %rbx
 ; X64-NOBMI2-NEXT:    pushq %rax
+; X64-NOBMI2-NEXT:    movq %rsi, %rcx
 ; X64-NOBMI2-NEXT:    movq %rdi, %r14
 ; X64-NOBMI2-NEXT:    movq $-1, %rbx
-; X64-NOBMI2-NEXT:    movl %esi, %ecx
+; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $rcx
 ; X64-NOBMI2-NEXT:    shlq %cl, %rbx
 ; X64-NOBMI2-NEXT:    movq %rbx, %rdi
 ; X64-NOBMI2-NEXT:    callq use64

Modified: llvm/trunk/test/CodeGen/X86/cmov-into-branch.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/cmov-into-branch.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/cmov-into-branch.ll (original)
+++ llvm/trunk/test/CodeGen/X86/cmov-into-branch.ll Wed Sep 19 11:59:08 2018
@@ -5,9 +5,9 @@
 define i32 @test1(double %a, double* nocapture %b, i32 %x, i32 %y)  {
 ; CHECK-LABEL: test1:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    ucomisd (%rdi), %xmm0
-; CHECK-NEXT:    cmovbel %edx, %esi
 ; CHECK-NEXT:    movl %esi, %eax
+; CHECK-NEXT:    ucomisd (%rdi), %xmm0
+; CHECK-NEXT:    cmovbel %edx, %eax
 ; CHECK-NEXT:    retq
   %load = load double, double* %b, align 8
   %cmp = fcmp olt double %load, %a
@@ -19,9 +19,9 @@ define i32 @test1(double %a, double* noc
 define i32 @test2(double %a, double %b, i32 %x, i32 %y)  {
 ; CHECK-LABEL: test2:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    ucomisd %xmm1, %xmm0
-; CHECK-NEXT:    cmovbel %esi, %edi
 ; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    ucomisd %xmm1, %xmm0
+; CHECK-NEXT:    cmovbel %esi, %eax
 ; CHECK-NEXT:    retq
   %cmp = fcmp ogt double %a, %b
   %cond = select i1 %cmp, i32 %x, i32 %y
@@ -48,10 +48,10 @@ define i32 @test4(i32 %a, i32* nocapture
 define i32 @test5(i32 %a, i32* nocapture %b, i32 %x, i32 %y) {
 ; CHECK-LABEL: test5:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    cmpl %edi, (%rsi)
-; CHECK-NEXT:    cmoval %edi, %ecx
-; CHECK-NEXT:    cmovael %edx, %ecx
 ; CHECK-NEXT:    movl %ecx, %eax
+; CHECK-NEXT:    cmpl %edi, (%rsi)
+; CHECK-NEXT:    cmoval %edi, %eax
+; CHECK-NEXT:    cmovael %edx, %eax
 ; CHECK-NEXT:    retq
   %load = load i32, i32* %b, align 4
   %cmp = icmp ult i32 %load, %a
@@ -83,9 +83,9 @@ entry:
 define i32 @weighted_select1(i32 %a, i32 %b) {
 ; CHECK-LABEL: weighted_select1:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    testl %edi, %edi
-; CHECK-NEXT:    cmovnel %edi, %esi
 ; CHECK-NEXT:    movl %esi, %eax
+; CHECK-NEXT:    testl %edi, %edi
+; CHECK-NEXT:    cmovnel %edi, %eax
 ; CHECK-NEXT:    retq
   %cmp = icmp ne i32 %a, 0
   %sel = select i1 %cmp, i32 %a, i32 %b, !prof !0
@@ -96,12 +96,12 @@ define i32 @weighted_select1(i32 %a, i32
 define i32 @weighted_select2(i32 %a, i32 %b) {
 ; CHECK-LABEL: weighted_select2:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    movl %edi, %eax
 ; CHECK-NEXT:    testl %edi, %edi
 ; CHECK-NEXT:    jne .LBB6_2
 ; CHECK-NEXT:  # %bb.1: # %select.false
-; CHECK-NEXT:    movl %esi, %edi
+; CHECK-NEXT:    movl %esi, %eax
 ; CHECK-NEXT:  .LBB6_2: # %select.end
-; CHECK-NEXT:    movl %edi, %eax
 ; CHECK-NEXT:    retq
   %cmp = icmp ne i32 %a, 0
   %sel = select i1 %cmp, i32 %a, i32 %b, !prof !1
@@ -115,14 +115,13 @@ define i32 @weighted_select2(i32 %a, i32
 define i32 @weighted_select3(i32 %a, i32 %b) {
 ; CHECK-LABEL: weighted_select3:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    movl %edi, %eax
 ; CHECK-NEXT:    testl %edi, %edi
 ; CHECK-NEXT:    je .LBB7_1
 ; CHECK-NEXT:  # %bb.2: # %select.end
-; CHECK-NEXT:    movl %edi, %eax
 ; CHECK-NEXT:    retq
 ; CHECK-NEXT:  .LBB7_1: # %select.false
-; CHECK-NEXT:    movl %esi, %edi
-; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    movl %esi, %eax
 ; CHECK-NEXT:    retq
   %cmp = icmp ne i32 %a, 0
   %sel = select i1 %cmp, i32 %a, i32 %b, !prof !2
@@ -133,9 +132,9 @@ define i32 @weighted_select3(i32 %a, i32
 define i32 @unweighted_select(i32 %a, i32 %b) {
 ; CHECK-LABEL: unweighted_select:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    testl %edi, %edi
-; CHECK-NEXT:    cmovnel %edi, %esi
 ; CHECK-NEXT:    movl %esi, %eax
+; CHECK-NEXT:    testl %edi, %edi
+; CHECK-NEXT:    cmovnel %edi, %eax
 ; CHECK-NEXT:    retq
   %cmp = icmp ne i32 %a, 0
   %sel = select i1 %cmp, i32 %a, i32 %b, !prof !3

Modified: llvm/trunk/test/CodeGen/X86/cmov.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/cmov.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/cmov.ll (original)
+++ llvm/trunk/test/CodeGen/X86/cmov.ll Wed Sep 19 11:59:08 2018
@@ -194,11 +194,14 @@ define i8 @test7(i1 inreg %c, i8 inreg %
 ; CHECK-LABEL: test7:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    testb $1, %dil
-; CHECK-NEXT:    jne .LBB6_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    movl %edx, %esi
-; CHECK-NEXT:  .LBB6_2:
+; CHECK-NEXT:    jne .LBB6_1
+; CHECK-NEXT:  # %bb.2:
+; CHECK-NEXT:    movl %edx, %eax
+; CHECK-NEXT:    # kill: def $al killed $al killed $eax
+; CHECK-NEXT:    retq
+; CHECK-NEXT:  .LBB6_1:
 ; CHECK-NEXT:    movl %esi, %eax
+; CHECK-NEXT:    # kill: def $al killed $al killed $eax
 ; CHECK-NEXT:    retq
   %d = select i1 %c, i8 %a, i8 %b
   ret i8 %d

Modified: llvm/trunk/test/CodeGen/X86/cmovcmov.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/cmovcmov.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/cmovcmov.ll (original)
+++ llvm/trunk/test/CodeGen/X86/cmovcmov.ll Wed Sep 19 11:59:08 2018
@@ -9,10 +9,10 @@ target datalayout = "e-m:o-i64:64-f80:12
 
 ; CHECK-LABEL: test_select_fcmp_oeq_i32:
 
-; CMOV-NEXT: ucomiss  %xmm1, %xmm0
-; CMOV-NEXT: cmovnel  %esi, %edi
-; CMOV-NEXT: cmovpl  %esi, %edi
 ; CMOV-NEXT: movl  %edi, %eax
+; CMOV-NEXT: ucomiss  %xmm1, %xmm0
+; CMOV-NEXT: cmovnel  %esi, %eax
+; CMOV-NEXT: cmovpl  %esi, %eax
 ; CMOV-NEXT: retq
 
 ; NOCMOV-NEXT:  flds  8(%esp)
@@ -36,10 +36,10 @@ entry:
 
 ; CHECK-LABEL: test_select_fcmp_oeq_i64:
 
-; CMOV-NEXT:   ucomiss  %xmm1, %xmm0
-; CMOV-NEXT:   cmovneq  %rsi, %rdi
-; CMOV-NEXT:   cmovpq  %rsi, %rdi
 ; CMOV-NEXT:   movq  %rdi, %rax
+; CMOV-NEXT:   ucomiss  %xmm1, %xmm0
+; CMOV-NEXT:   cmovneq  %rsi, %rax
+; CMOV-NEXT:   cmovpq  %rsi, %rax
 ; CMOV-NEXT:   retq
 
 ; NOCMOV-NEXT:   flds  8(%esp)
@@ -64,10 +64,10 @@ entry:
 
 ; CHECK-LABEL: test_select_fcmp_une_i64:
 
-; CMOV-NEXT:   ucomiss  %xmm1, %xmm0
-; CMOV-NEXT:   cmovneq  %rdi, %rsi
-; CMOV-NEXT:   cmovpq  %rdi, %rsi
 ; CMOV-NEXT:   movq  %rsi, %rax
+; CMOV-NEXT:   ucomiss  %xmm1, %xmm0
+; CMOV-NEXT:   cmovneq  %rdi, %rax
+; CMOV-NEXT:   cmovpq  %rdi, %rax
 ; CMOV-NEXT:   retq
 
 ; NOCMOV-NEXT:   flds  8(%esp)

Modified: llvm/trunk/test/CodeGen/X86/cmp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/cmp.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/cmp.ll (original)
+++ llvm/trunk/test/CodeGen/X86/cmp.ll Wed Sep 19 11:59:08 2018
@@ -271,9 +271,9 @@ declare zeroext i1 @test12b()
 define i32 @test13(i32 %mask, i32 %base, i32 %intra) {
 ; CHECK-LABEL: test13:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    testb $8, %dil # encoding: [0x40,0xf6,0xc7,0x08]
-; CHECK-NEXT:    cmovnel %edx, %esi # encoding: [0x0f,0x45,0xf2]
 ; CHECK-NEXT:    movl %esi, %eax # encoding: [0x89,0xf0]
+; CHECK-NEXT:    testb $8, %dil # encoding: [0x40,0xf6,0xc7,0x08]
+; CHECK-NEXT:    cmovnel %edx, %eax # encoding: [0x0f,0x45,0xc2]
 ; CHECK-NEXT:    retq # encoding: [0xc3]
 entry:
   %and = and i32 %mask, 8
@@ -286,9 +286,9 @@ entry:
 define i32 @test14(i32 %mask, i32 %base, i32 %intra) {
 ; CHECK-LABEL: test14:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    shrl $7, %edi # encoding: [0xc1,0xef,0x07]
-; CHECK-NEXT:    cmovnsl %edx, %esi # encoding: [0x0f,0x49,0xf2]
 ; CHECK-NEXT:    movl %esi, %eax # encoding: [0x89,0xf0]
+; CHECK-NEXT:    shrl $7, %edi # encoding: [0xc1,0xef,0x07]
+; CHECK-NEXT:    cmovnsl %edx, %eax # encoding: [0x0f,0x49,0xc2]
 ; CHECK-NEXT:    retq # encoding: [0xc3]
 entry:
   %s = lshr i32 %mask, 7

Modified: llvm/trunk/test/CodeGen/X86/cmpxchg-clobber-flags.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/cmpxchg-clobber-flags.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/cmpxchg-clobber-flags.ll (original)
+++ llvm/trunk/test/CodeGen/X86/cmpxchg-clobber-flags.ll Wed Sep 19 11:59:08 2018
@@ -151,6 +151,7 @@ define i32 @test_control_flow(i32* %p, i
 ;
 ; 64-ALL-LABEL: test_control_flow:
 ; 64-ALL:       # %bb.0: # %entry
+; 64-ALL-NEXT:    movl %esi, %eax
 ; 64-ALL-NEXT:    cmpl %edx, %esi
 ; 64-ALL-NEXT:    jle .LBB1_5
 ; 64-ALL-NEXT:    .p2align 4, 0x90
@@ -171,9 +172,8 @@ define i32 @test_control_flow(i32* %p, i
 ; 64-ALL-NEXT:    lock cmpxchgl %eax, (%rdi)
 ; 64-ALL-NEXT:    jne .LBB1_1
 ; 64-ALL-NEXT:  # %bb.4:
-; 64-ALL-NEXT:    xorl %esi, %esi
+; 64-ALL-NEXT:    xorl %eax, %eax
 ; 64-ALL-NEXT:  .LBB1_5: # %cond.end
-; 64-ALL-NEXT:    movl %esi, %eax
 ; 64-ALL-NEXT:    retq
 entry:
   %cmp = icmp sgt i32 %i, %j

Modified: llvm/trunk/test/CodeGen/X86/cmpxchg-i128-i1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/cmpxchg-i128-i1.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/cmpxchg-i128-i1.ll (original)
+++ llvm/trunk/test/CodeGen/X86/cmpxchg-i128-i1.ll Wed Sep 19 11:59:08 2018
@@ -7,10 +7,9 @@ define i1 @try_cmpxchg(i128* %addr, i128
 ; CHECK-NEXT:    pushq %rbx
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    .cfi_offset %rbx, -16
-; CHECK-NEXT:    movq %rcx, %r9
+; CHECK-NEXT:    movq %rcx, %rbx
 ; CHECK-NEXT:    movq %rsi, %rax
 ; CHECK-NEXT:    movq %r8, %rcx
-; CHECK-NEXT:    movq %r9, %rbx
 ; CHECK-NEXT:    lock cmpxchg16b (%rdi)
 ; CHECK-NEXT:    sete %al
 ; CHECK-NEXT:    popq %rbx
@@ -27,10 +26,9 @@ define void @cmpxchg_flow(i128* %addr, i
 ; CHECK-NEXT:    pushq %rbx
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    .cfi_offset %rbx, -16
-; CHECK-NEXT:    movq %rcx, %r9
+; CHECK-NEXT:    movq %rcx, %rbx
 ; CHECK-NEXT:    movq %rsi, %rax
 ; CHECK-NEXT:    movq %r8, %rcx
-; CHECK-NEXT:    movq %r9, %rbx
 ; CHECK-NEXT:    lock cmpxchg16b (%rdi)
 ; CHECK-NEXT:    jne .LBB1_2
 ; CHECK-NEXT:  # %bb.1: # %true
@@ -64,14 +62,13 @@ define i1 @cmpxchg_arithcmp(i128* %addr,
 ; CHECK-NEXT:    pushq %rbx
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    .cfi_offset %rbx, -16
-; CHECK-NEXT:    movq %rcx, %r9
-; CHECK-NEXT:    movq %rdx, %r10
+; CHECK-NEXT:    movq %rcx, %rbx
+; CHECK-NEXT:    movq %rdx, %r9
 ; CHECK-NEXT:    movq %rsi, %rax
 ; CHECK-NEXT:    movq %r8, %rcx
-; CHECK-NEXT:    movq %r9, %rbx
 ; CHECK-NEXT:    lock cmpxchg16b (%rdi)
 ; CHECK-NEXT:    cmpq %rsi, %rax
-; CHECK-NEXT:    sbbq %r10, %rdx
+; CHECK-NEXT:    sbbq %r9, %rdx
 ; CHECK-NEXT:    setge %al
 ; CHECK-NEXT:    popq %rbx
 ; CHECK-NEXT:    .cfi_def_cfa_offset 8
@@ -88,15 +85,14 @@ define i128 @cmpxchg_zext(i128* %addr, i
 ; CHECK-NEXT:    pushq %rbx
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    .cfi_offset %rbx, -16
-; CHECK-NEXT:    movq %rcx, %r9
-; CHECK-NEXT:    xorl %r10d, %r10d
+; CHECK-NEXT:    movq %rcx, %rbx
 ; CHECK-NEXT:    movq %rsi, %rax
+; CHECK-NEXT:    xorl %esi, %esi
 ; CHECK-NEXT:    movq %r8, %rcx
-; CHECK-NEXT:    movq %r9, %rbx
 ; CHECK-NEXT:    lock cmpxchg16b (%rdi)
-; CHECK-NEXT:    sete %r10b
+; CHECK-NEXT:    sete %sil
 ; CHECK-NEXT:    xorl %edx, %edx
-; CHECK-NEXT:    movq %r10, %rax
+; CHECK-NEXT:    movq %rsi, %rax
 ; CHECK-NEXT:    popq %rbx
 ; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq

Modified: llvm/trunk/test/CodeGen/X86/combine-add.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/combine-add.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/combine-add.ll (original)
+++ llvm/trunk/test/CodeGen/X86/combine-add.ll Wed Sep 19 11:59:08 2018
@@ -103,8 +103,8 @@ define <4 x i32> @combine_vec_add_sub1(<
 define <4 x i32> @combine_vec_add_sub_add0(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
 ; SSE-LABEL: combine_vec_add_sub_add0:
 ; SSE:       # %bb.0:
-; SSE-NEXT:    psubd %xmm2, %xmm1
 ; SSE-NEXT:    movdqa %xmm1, %xmm0
+; SSE-NEXT:    psubd %xmm2, %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: combine_vec_add_sub_add0:
@@ -121,8 +121,8 @@ define <4 x i32> @combine_vec_add_sub_ad
 define <4 x i32> @combine_vec_add_sub_add1(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
 ; SSE-LABEL: combine_vec_add_sub_add1:
 ; SSE:       # %bb.0:
-; SSE-NEXT:    psubd %xmm2, %xmm1
 ; SSE-NEXT:    movdqa %xmm1, %xmm0
+; SSE-NEXT:    psubd %xmm2, %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: combine_vec_add_sub_add1:
@@ -139,8 +139,8 @@ define <4 x i32> @combine_vec_add_sub_ad
 define <4 x i32> @combine_vec_add_sub_add2(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
 ; SSE-LABEL: combine_vec_add_sub_add2:
 ; SSE:       # %bb.0:
-; SSE-NEXT:    paddd %xmm2, %xmm1
 ; SSE-NEXT:    movdqa %xmm1, %xmm0
+; SSE-NEXT:    paddd %xmm2, %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: combine_vec_add_sub_add2:
@@ -157,8 +157,8 @@ define <4 x i32> @combine_vec_add_sub_ad
 define <4 x i32> @combine_vec_add_sub_add3(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
 ; SSE-LABEL: combine_vec_add_sub_add3:
 ; SSE:       # %bb.0:
-; SSE-NEXT:    psubd %xmm2, %xmm1
 ; SSE-NEXT:    movdqa %xmm1, %xmm0
+; SSE-NEXT:    psubd %xmm2, %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: combine_vec_add_sub_add3:
@@ -203,9 +203,9 @@ define <4 x i32> @combine_vec_add_unique
 ;
 ; AVX-LABEL: combine_vec_add_uniquebits:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    vbroadcastss {{.*}}(%rip), %xmm2
+; AVX-NEXT:    vbroadcastss {{.*#+}} xmm2 = [61680,61680,61680,61680]
 ; AVX-NEXT:    vandps %xmm2, %xmm0, %xmm0
-; AVX-NEXT:    vbroadcastss {{.*}}(%rip), %xmm2
+; AVX-NEXT:    vbroadcastss {{.*#+}} xmm2 = [3855,3855,3855,3855]
 ; AVX-NEXT:    vandps %xmm2, %xmm1, %xmm1
 ; AVX-NEXT:    vorps %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    retq

Modified: llvm/trunk/test/CodeGen/X86/combine-rotates.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/combine-rotates.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/combine-rotates.ll (original)
+++ llvm/trunk/test/CodeGen/X86/combine-rotates.ll Wed Sep 19 11:59:08 2018
@@ -98,8 +98,8 @@ define <4 x i32> @combine_vec_rot_rot_sp
 define i32 @combine_rot_select_zero(i32, i32) {
 ; CHECK-LABEL: combine_rot_select_zero:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movl %edi, %eax
 ; CHECK-NEXT:    movl %esi, %ecx
+; CHECK-NEXT:    movl %edi, %eax
 ; CHECK-NEXT:    roll %cl, %eax
 ; CHECK-NEXT:    testl %esi, %esi
 ; CHECK-NEXT:    cmovel %edi, %eax

Modified: llvm/trunk/test/CodeGen/X86/combine-sdiv.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/combine-sdiv.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/combine-sdiv.ll (original)
+++ llvm/trunk/test/CodeGen/X86/combine-sdiv.ll Wed Sep 19 11:59:08 2018
@@ -29,8 +29,8 @@ define <4 x i32> @combine_vec_sdiv_by_on
 define i32 @combine_sdiv_by_negone(i32 %x) {
 ; CHECK-LABEL: combine_sdiv_by_negone:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    negl %edi
 ; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    negl %eax
 ; CHECK-NEXT:    retq
   %1 = sdiv i32 %x, -1
   ret i32 %1

Modified: llvm/trunk/test/CodeGen/X86/combine-udiv.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/combine-udiv.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/combine-udiv.ll (original)
+++ llvm/trunk/test/CodeGen/X86/combine-udiv.ll Wed Sep 19 11:59:08 2018
@@ -64,8 +64,8 @@ define <4 x i32> @combine_vec_udiv_by_ne
 define i32 @combine_udiv_by_minsigned(i32 %x) {
 ; CHECK-LABEL: combine_udiv_by_minsigned:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    shrl $31, %edi
 ; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    shrl $31, %eax
 ; CHECK-NEXT:    retq
   %1 = udiv i32 %x, -2147483648
   ret i32 %1

Modified: llvm/trunk/test/CodeGen/X86/combine-urem.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/combine-urem.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/combine-urem.ll (original)
+++ llvm/trunk/test/CodeGen/X86/combine-urem.ll Wed Sep 19 11:59:08 2018
@@ -62,8 +62,8 @@ define <4 x i32> @combine_vec_urem_by_ne
 define i32 @combine_urem_by_minsigned(i32 %x) {
 ; CHECK-LABEL: combine_urem_by_minsigned:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    andl $2147483647, %edi # imm = 0x7FFFFFFF
 ; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    andl $2147483647, %eax # imm = 0x7FFFFFFF
 ; CHECK-NEXT:    retq
   %1 = urem i32 %x, -2147483648
   ret i32 %1

Modified: llvm/trunk/test/CodeGen/X86/conditional-indecrement.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/conditional-indecrement.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/conditional-indecrement.ll (original)
+++ llvm/trunk/test/CodeGen/X86/conditional-indecrement.ll Wed Sep 19 11:59:08 2018
@@ -4,9 +4,9 @@
 define i32 @test1(i32 %a, i32 %b) nounwind readnone {
 ; CHECK-LABEL: test1:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    cmpl $1, %edi
-; CHECK-NEXT:    sbbl $-1, %esi
 ; CHECK-NEXT:    movl %esi, %eax
+; CHECK-NEXT:    cmpl $1, %edi
+; CHECK-NEXT:    sbbl $-1, %eax
 ; CHECK-NEXT:    retq
   %not.cmp = icmp ne i32 %a, 0
   %inc = zext i1 %not.cmp to i32
@@ -17,9 +17,9 @@ define i32 @test1(i32 %a, i32 %b) nounwi
 define i32 @test1_commute(i32 %a, i32 %b) nounwind readnone {
 ; CHECK-LABEL: test1_commute:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    cmpl $1, %edi
-; CHECK-NEXT:    sbbl $-1, %esi
 ; CHECK-NEXT:    movl %esi, %eax
+; CHECK-NEXT:    cmpl $1, %edi
+; CHECK-NEXT:    sbbl $-1, %eax
 ; CHECK-NEXT:    retq
   %cmp = icmp ne i32 %a, 0
   %inc = zext i1 %cmp to i32
@@ -30,9 +30,9 @@ define i32 @test1_commute(i32 %a, i32 %b
 define i32 @test2(i32 %a, i32 %b) nounwind readnone {
 ; CHECK-LABEL: test2:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    cmpl $1, %edi
-; CHECK-NEXT:    adcl $0, %esi
 ; CHECK-NEXT:    movl %esi, %eax
+; CHECK-NEXT:    cmpl $1, %edi
+; CHECK-NEXT:    adcl $0, %eax
 ; CHECK-NEXT:    retq
   %cmp = icmp eq i32 %a, 0
   %inc = zext i1 %cmp to i32
@@ -43,9 +43,9 @@ define i32 @test2(i32 %a, i32 %b) nounwi
 define i32 @test3(i32 %a, i32 %b) nounwind readnone {
 ; CHECK-LABEL: test3:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    cmpl $1, %edi
-; CHECK-NEXT:    adcl $0, %esi
 ; CHECK-NEXT:    movl %esi, %eax
+; CHECK-NEXT:    cmpl $1, %edi
+; CHECK-NEXT:    adcl $0, %eax
 ; CHECK-NEXT:    retq
   %cmp = icmp eq i32 %a, 0
   %inc = zext i1 %cmp to i32
@@ -56,9 +56,9 @@ define i32 @test3(i32 %a, i32 %b) nounwi
 define i32 @test4(i32 %a, i32 %b) nounwind readnone {
 ; CHECK-LABEL: test4:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    cmpl $1, %edi
-; CHECK-NEXT:    sbbl $-1, %esi
 ; CHECK-NEXT:    movl %esi, %eax
+; CHECK-NEXT:    cmpl $1, %edi
+; CHECK-NEXT:    sbbl $-1, %eax
 ; CHECK-NEXT:    retq
   %not.cmp = icmp ne i32 %a, 0
   %inc = zext i1 %not.cmp to i32
@@ -69,9 +69,9 @@ define i32 @test4(i32 %a, i32 %b) nounwi
 define i32 @test5(i32 %a, i32 %b) nounwind readnone {
 ; CHECK-LABEL: test5:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    cmpl $1, %edi
-; CHECK-NEXT:    adcl $-1, %esi
 ; CHECK-NEXT:    movl %esi, %eax
+; CHECK-NEXT:    cmpl $1, %edi
+; CHECK-NEXT:    adcl $-1, %eax
 ; CHECK-NEXT:    retq
   %not.cmp = icmp ne i32 %a, 0
   %inc = zext i1 %not.cmp to i32
@@ -82,9 +82,9 @@ define i32 @test5(i32 %a, i32 %b) nounwi
 define i32 @test6(i32 %a, i32 %b) nounwind readnone {
 ; CHECK-LABEL: test6:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    cmpl $1, %edi
-; CHECK-NEXT:    sbbl $0, %esi
 ; CHECK-NEXT:    movl %esi, %eax
+; CHECK-NEXT:    cmpl $1, %edi
+; CHECK-NEXT:    sbbl $0, %eax
 ; CHECK-NEXT:    retq
   %cmp = icmp eq i32 %a, 0
   %inc = zext i1 %cmp to i32
@@ -95,9 +95,9 @@ define i32 @test6(i32 %a, i32 %b) nounwi
 define i32 @test7(i32 %a, i32 %b) nounwind readnone {
 ; CHECK-LABEL: test7:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    cmpl $1, %edi
-; CHECK-NEXT:    sbbl $0, %esi
 ; CHECK-NEXT:    movl %esi, %eax
+; CHECK-NEXT:    cmpl $1, %edi
+; CHECK-NEXT:    sbbl $0, %eax
 ; CHECK-NEXT:    retq
   %cmp = icmp eq i32 %a, 0
   %inc = zext i1 %cmp to i32
@@ -108,9 +108,9 @@ define i32 @test7(i32 %a, i32 %b) nounwi
 define i32 @test8(i32 %a, i32 %b) nounwind readnone {
 ; CHECK-LABEL: test8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    cmpl $1, %edi
-; CHECK-NEXT:    adcl $-1, %esi
 ; CHECK-NEXT:    movl %esi, %eax
+; CHECK-NEXT:    cmpl $1, %edi
+; CHECK-NEXT:    adcl $-1, %eax
 ; CHECK-NEXT:    retq
   %not.cmp = icmp ne i32 %a, 0
   %inc = zext i1 %not.cmp to i32

Modified: llvm/trunk/test/CodeGen/X86/dagcombine-select.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/dagcombine-select.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/dagcombine-select.ll (original)
+++ llvm/trunk/test/CodeGen/X86/dagcombine-select.ll Wed Sep 19 11:59:08 2018
@@ -194,10 +194,11 @@ define i32 @sel_constants_shl_constant(i
 define i32 @shl_constant_sel_constants(i1 %cond) {
 ; CHECK-LABEL: shl_constant_sel_constants:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    andb $1, %dil
-; CHECK-NEXT:    xorb $3, %dil
-; CHECK-NEXT:    movl $1, %eax
 ; CHECK-NEXT:    movl %edi, %ecx
+; CHECK-NEXT:    andb $1, %cl
+; CHECK-NEXT:    xorb $3, %cl
+; CHECK-NEXT:    movl $1, %eax
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; CHECK-NEXT:    shll %cl, %eax
 ; CHECK-NEXT:    retq
   %sel = select i1 %cond, i32 2, i32 3
@@ -208,10 +209,11 @@ define i32 @shl_constant_sel_constants(i
 define i32 @lshr_constant_sel_constants(i1 %cond) {
 ; CHECK-LABEL: lshr_constant_sel_constants:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    andb $1, %dil
-; CHECK-NEXT:    xorb $3, %dil
-; CHECK-NEXT:    movl $64, %eax
 ; CHECK-NEXT:    movl %edi, %ecx
+; CHECK-NEXT:    andb $1, %cl
+; CHECK-NEXT:    xorb $3, %cl
+; CHECK-NEXT:    movl $64, %eax
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; CHECK-NEXT:    shrl %cl, %eax
 ; CHECK-NEXT:    retq
   %sel = select i1 %cond, i32 2, i32 3
@@ -222,10 +224,11 @@ define i32 @lshr_constant_sel_constants(
 define i32 @ashr_constant_sel_constants(i1 %cond) {
 ; CHECK-LABEL: ashr_constant_sel_constants:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    andb $1, %dil
-; CHECK-NEXT:    xorb $3, %dil
-; CHECK-NEXT:    movl $128, %eax
 ; CHECK-NEXT:    movl %edi, %ecx
+; CHECK-NEXT:    andb $1, %cl
+; CHECK-NEXT:    xorb $3, %cl
+; CHECK-NEXT:    movl $128, %eax
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; CHECK-NEXT:    shrl %cl, %eax
 ; CHECK-NEXT:    retq
   %sel = select i1 %cond, i32 2, i32 3

Modified: llvm/trunk/test/CodeGen/X86/divide-by-constant.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/divide-by-constant.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/divide-by-constant.ll (original)
+++ llvm/trunk/test/CodeGen/X86/divide-by-constant.ll Wed Sep 19 11:59:08 2018
@@ -95,8 +95,8 @@ define i32 @test5(i32 %A) nounwind {
 ; X32:       # %bb.0:
 ; X32-NEXT:    movl $365384439, %eax # imm = 0x15C752F7
 ; X32-NEXT:    mull {{[0-9]+}}(%esp)
-; X32-NEXT:    shrl $27, %edx
 ; X32-NEXT:    movl %edx, %eax
+; X32-NEXT:    shrl $27, %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: test5:
@@ -217,9 +217,9 @@ define i32 @testsize1(i32 %x) minsize no
 ;
 ; X64-LABEL: testsize1:
 ; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %edi, %eax
 ; X64-NEXT:    pushq $32
 ; X64-NEXT:    popq %rcx
-; X64-NEXT:    movl %edi, %eax
 ; X64-NEXT:    cltd
 ; X64-NEXT:    idivl %ecx
 ; X64-NEXT:    retq
@@ -240,9 +240,9 @@ define i32 @testsize2(i32 %x) minsize no
 ;
 ; X64-LABEL: testsize2:
 ; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %edi, %eax
 ; X64-NEXT:    pushq $33
 ; X64-NEXT:    popq %rcx
-; X64-NEXT:    movl %edi, %eax
 ; X64-NEXT:    cltd
 ; X64-NEXT:    idivl %ecx
 ; X64-NEXT:    retq
@@ -260,8 +260,8 @@ define i32 @testsize3(i32 %x) minsize no
 ;
 ; X64-LABEL: testsize3:
 ; X64:       # %bb.0: # %entry
-; X64-NEXT:    shrl $5, %edi
 ; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    shrl $5, %eax
 ; X64-NEXT:    retq
 entry:
 	%div = udiv i32 %x, 32
@@ -280,10 +280,10 @@ define i32 @testsize4(i32 %x) minsize no
 ;
 ; X64-LABEL: testsize4:
 ; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %edi, %eax
 ; X64-NEXT:    pushq $33
 ; X64-NEXT:    popq %rcx
 ; X64-NEXT:    xorl %edx, %edx
-; X64-NEXT:    movl %edi, %eax
 ; X64-NEXT:    divl %ecx
 ; X64-NEXT:    retq
 entry:
@@ -311,38 +311,36 @@ define i64 @PR23590(i64 %x) nounwind {
 ;
 ; X64-FAST-LABEL: PR23590:
 ; X64-FAST:       # %bb.0: # %entry
-; X64-FAST-NEXT:    movq %rdi, %rcx
-; X64-FAST-NEXT:    movabsq $6120523590596543007, %rdx # imm = 0x54F077C718E7C21F
+; X64-FAST-NEXT:    movabsq $6120523590596543007, %rcx # imm = 0x54F077C718E7C21F
 ; X64-FAST-NEXT:    movq %rdi, %rax
-; X64-FAST-NEXT:    mulq %rdx
+; X64-FAST-NEXT:    mulq %rcx
 ; X64-FAST-NEXT:    shrq $12, %rdx
 ; X64-FAST-NEXT:    imulq $12345, %rdx, %rax # imm = 0x3039
-; X64-FAST-NEXT:    subq %rax, %rcx
-; X64-FAST-NEXT:    movabsq $2635249153387078803, %rdx # imm = 0x2492492492492493
-; X64-FAST-NEXT:    movq %rcx, %rax
-; X64-FAST-NEXT:    mulq %rdx
-; X64-FAST-NEXT:    subq %rdx, %rcx
-; X64-FAST-NEXT:    shrq %rcx
-; X64-FAST-NEXT:    leaq (%rcx,%rdx), %rax
+; X64-FAST-NEXT:    subq %rax, %rdi
+; X64-FAST-NEXT:    movabsq $2635249153387078803, %rcx # imm = 0x2492492492492493
+; X64-FAST-NEXT:    movq %rdi, %rax
+; X64-FAST-NEXT:    mulq %rcx
+; X64-FAST-NEXT:    subq %rdx, %rdi
+; X64-FAST-NEXT:    shrq %rdi
+; X64-FAST-NEXT:    leaq (%rdi,%rdx), %rax
 ; X64-FAST-NEXT:    shrq $2, %rax
 ; X64-FAST-NEXT:    retq
 ;
 ; X64-SLOW-LABEL: PR23590:
 ; X64-SLOW:       # %bb.0: # %entry
-; X64-SLOW-NEXT:    movq %rdi, %rcx
-; X64-SLOW-NEXT:    movabsq $6120523590596543007, %rdx # imm = 0x54F077C718E7C21F
+; X64-SLOW-NEXT:    movabsq $6120523590596543007, %rcx # imm = 0x54F077C718E7C21F
 ; X64-SLOW-NEXT:    movq %rdi, %rax
-; X64-SLOW-NEXT:    mulq %rdx
+; X64-SLOW-NEXT:    mulq %rcx
 ; X64-SLOW-NEXT:    shrq $12, %rdx
 ; X64-SLOW-NEXT:    imulq $12345, %rdx, %rax # imm = 0x3039
-; X64-SLOW-NEXT:    subq %rax, %rcx
-; X64-SLOW-NEXT:    imulq $613566757, %rcx, %rax # imm = 0x24924925
+; X64-SLOW-NEXT:    subq %rax, %rdi
+; X64-SLOW-NEXT:    imulq $613566757, %rdi, %rax # imm = 0x24924925
 ; X64-SLOW-NEXT:    shrq $32, %rax
-; X64-SLOW-NEXT:    subl %eax, %ecx
-; X64-SLOW-NEXT:    shrl %ecx
-; X64-SLOW-NEXT:    addl %eax, %ecx
-; X64-SLOW-NEXT:    shrl $2, %ecx
-; X64-SLOW-NEXT:    movq %rcx, %rax
+; X64-SLOW-NEXT:    subl %eax, %edi
+; X64-SLOW-NEXT:    shrl %edi
+; X64-SLOW-NEXT:    addl %eax, %edi
+; X64-SLOW-NEXT:    shrl $2, %edi
+; X64-SLOW-NEXT:    movq %rdi, %rax
 ; X64-SLOW-NEXT:    retq
 entry:
 	%rem = urem i64 %x, 12345
@@ -390,10 +388,10 @@ define { i64, i32 } @PR38622(i64) nounwi
 ; X64-NEXT:    shrq $11, %rax
 ; X64-NEXT:    movabsq $4835703278458517, %rcx # imm = 0x112E0BE826D695
 ; X64-NEXT:    mulq %rcx
-; X64-NEXT:    shrq $9, %rdx
-; X64-NEXT:    imull $-294967296, %edx, %eax # imm = 0xEE6B2800
-; X64-NEXT:    subl %eax, %edi
 ; X64-NEXT:    movq %rdx, %rax
+; X64-NEXT:    shrq $9, %rax
+; X64-NEXT:    imull $-294967296, %eax, %ecx # imm = 0xEE6B2800
+; X64-NEXT:    subl %ecx, %edi
 ; X64-NEXT:    movl %edi, %edx
 ; X64-NEXT:    retq
   %2 = udiv i64 %0, 4000000000

Modified: llvm/trunk/test/CodeGen/X86/divrem.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/divrem.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/divrem.ll (original)
+++ llvm/trunk/test/CodeGen/X86/divrem.ll Wed Sep 19 11:59:08 2018
@@ -101,6 +101,7 @@ define void @si16(i16 %x, i16 %y, i16* %
 ; X64:       # %bb.0:
 ; X64-NEXT:    movq %rdx, %r8
 ; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    cwtd
 ; X64-NEXT:    idivw %si
 ; X64-NEXT:    movw %ax, (%r8)
@@ -131,6 +132,7 @@ define void @si8(i8 %x, i8 %y, i8* %p, i
 ; X64-LABEL: si8:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    # kill: def $al killed $al killed $eax
 ; X64-NEXT:    cbtw
 ; X64-NEXT:    idivb %sil
 ; X64-NEXT:    movsbl %ah, %esi
@@ -182,8 +184,8 @@ define void @ui64(i64 %x, i64 %y, i64* %
 ; X64-LABEL: ui64:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movq %rdx, %r8
-; X64-NEXT:    xorl %edx, %edx
 ; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    xorl %edx, %edx
 ; X64-NEXT:    divq %rsi
 ; X64-NEXT:    movq %rax, (%r8)
 ; X64-NEXT:    movq %rdx, (%rcx)
@@ -212,8 +214,8 @@ define void @ui32(i32 %x, i32 %y, i32* %
 ; X64-LABEL: ui32:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movq %rdx, %r8
-; X64-NEXT:    xorl %edx, %edx
 ; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    xorl %edx, %edx
 ; X64-NEXT:    divl %esi
 ; X64-NEXT:    movl %eax, (%r8)
 ; X64-NEXT:    movl %edx, (%rcx)
@@ -242,8 +244,9 @@ define void @ui16(i16 %x, i16 %y, i16* %
 ; X64-LABEL: ui16:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movq %rdx, %r8
-; X64-NEXT:    xorl %edx, %edx
 ; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    xorl %edx, %edx
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    divw %si
 ; X64-NEXT:    movw %ax, (%r8)
 ; X64-NEXT:    movw %dx, (%rcx)

Modified: llvm/trunk/test/CodeGen/X86/divrem8_ext.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/divrem8_ext.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/divrem8_ext.ll (original)
+++ llvm/trunk/test/CodeGen/X86/divrem8_ext.ll Wed Sep 19 11:59:08 2018
@@ -112,6 +112,7 @@ define signext i8 @test_sdivrem_sext_ah(
 ; X64-LABEL: test_sdivrem_sext_ah:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    # kill: def $al killed $al killed $eax
 ; X64-NEXT:    cbtw
 ; X64-NEXT:    idivb %sil
 ; X64-NEXT:    movsbl %ah, %ecx
@@ -137,6 +138,7 @@ define signext i8 @test_srem_sext_ah(i8
 ; X64-LABEL: test_srem_sext_ah:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    # kill: def $al killed $al killed $eax
 ; X64-NEXT:    cbtw
 ; X64-NEXT:    idivb %sil
 ; X64-NEXT:    movsbl %ah, %eax
@@ -161,6 +163,7 @@ define i8 @test_srem_noext_ah(i8 %x, i8
 ; X64-LABEL: test_srem_noext_ah:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    # kill: def $al killed $al killed $eax
 ; X64-NEXT:    cbtw
 ; X64-NEXT:    idivb %sil
 ; X64-NEXT:    movsbl %ah, %eax
@@ -186,6 +189,7 @@ define i64 @test_srem_sext64_ah(i8 %x, i
 ; X64-LABEL: test_srem_sext64_ah:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    # kill: def $al killed $al killed $eax
 ; X64-NEXT:    cbtw
 ; X64-NEXT:    idivb %sil
 ; X64-NEXT:    movsbl %ah, %eax

Modified: llvm/trunk/test/CodeGen/X86/extract-lowbits.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/extract-lowbits.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/extract-lowbits.ll (original)
+++ llvm/trunk/test/CodeGen/X86/extract-lowbits.ll Wed Sep 19 11:59:08 2018
@@ -45,8 +45,9 @@ define i32 @bzhi32_a0(i32 %val, i32 %num
 ;
 ; X64-NOBMI-LABEL: bzhi32_a0:
 ; X64-NOBMI:       # %bb.0:
-; X64-NOBMI-NEXT:    movl $1, %eax
 ; X64-NOBMI-NEXT:    movl %esi, %ecx
+; X64-NOBMI-NEXT:    movl $1, %eax
+; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NOBMI-NEXT:    shll %cl, %eax
 ; X64-NOBMI-NEXT:    decl %eax
 ; X64-NOBMI-NEXT:    andl %edi, %eax
@@ -80,8 +81,9 @@ define i32 @bzhi32_a1_indexzext(i32 %val
 ;
 ; X64-NOBMI-LABEL: bzhi32_a1_indexzext:
 ; X64-NOBMI:       # %bb.0:
-; X64-NOBMI-NEXT:    movl $1, %eax
 ; X64-NOBMI-NEXT:    movl %esi, %ecx
+; X64-NOBMI-NEXT:    movl $1, %eax
+; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NOBMI-NEXT:    shll %cl, %eax
 ; X64-NOBMI-NEXT:    decl %eax
 ; X64-NOBMI-NEXT:    andl %edi, %eax
@@ -118,8 +120,9 @@ define i32 @bzhi32_a2_load(i32* %w, i32
 ;
 ; X64-NOBMI-LABEL: bzhi32_a2_load:
 ; X64-NOBMI:       # %bb.0:
-; X64-NOBMI-NEXT:    movl $1, %eax
 ; X64-NOBMI-NEXT:    movl %esi, %ecx
+; X64-NOBMI-NEXT:    movl $1, %eax
+; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NOBMI-NEXT:    shll %cl, %eax
 ; X64-NOBMI-NEXT:    decl %eax
 ; X64-NOBMI-NEXT:    andl (%rdi), %eax
@@ -156,8 +159,9 @@ define i32 @bzhi32_a3_load_indexzext(i32
 ;
 ; X64-NOBMI-LABEL: bzhi32_a3_load_indexzext:
 ; X64-NOBMI:       # %bb.0:
-; X64-NOBMI-NEXT:    movl $1, %eax
 ; X64-NOBMI-NEXT:    movl %esi, %ecx
+; X64-NOBMI-NEXT:    movl $1, %eax
+; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NOBMI-NEXT:    shll %cl, %eax
 ; X64-NOBMI-NEXT:    decl %eax
 ; X64-NOBMI-NEXT:    andl (%rdi), %eax
@@ -193,8 +197,9 @@ define i32 @bzhi32_a4_commutative(i32 %v
 ;
 ; X64-NOBMI-LABEL: bzhi32_a4_commutative:
 ; X64-NOBMI:       # %bb.0:
-; X64-NOBMI-NEXT:    movl $1, %eax
 ; X64-NOBMI-NEXT:    movl %esi, %ecx
+; X64-NOBMI-NEXT:    movl $1, %eax
+; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NOBMI-NEXT:    shll %cl, %eax
 ; X64-NOBMI-NEXT:    decl %eax
 ; X64-NOBMI-NEXT:    andl %edi, %eax
@@ -253,8 +258,9 @@ define i64 @bzhi64_a0(i64 %val, i64 %num
 ;
 ; X64-NOBMI-LABEL: bzhi64_a0:
 ; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    movq %rsi, %rcx
 ; X64-NOBMI-NEXT:    movl $1, %eax
-; X64-NOBMI-NEXT:    movl %esi, %ecx
+; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
 ; X64-NOBMI-NEXT:    shlq %cl, %rax
 ; X64-NOBMI-NEXT:    decq %rax
 ; X64-NOBMI-NEXT:    andq %rdi, %rax
@@ -311,8 +317,9 @@ define i64 @bzhi64_a1_indexzext(i64 %val
 ;
 ; X64-NOBMI-LABEL: bzhi64_a1_indexzext:
 ; X64-NOBMI:       # %bb.0:
-; X64-NOBMI-NEXT:    movl $1, %eax
 ; X64-NOBMI-NEXT:    movl %esi, %ecx
+; X64-NOBMI-NEXT:    movl $1, %eax
+; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NOBMI-NEXT:    shlq %cl, %rax
 ; X64-NOBMI-NEXT:    decq %rax
 ; X64-NOBMI-NEXT:    andq %rdi, %rax
@@ -377,8 +384,9 @@ define i64 @bzhi64_a2_load(i64* %w, i64
 ;
 ; X64-NOBMI-LABEL: bzhi64_a2_load:
 ; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    movq %rsi, %rcx
 ; X64-NOBMI-NEXT:    movl $1, %eax
-; X64-NOBMI-NEXT:    movl %esi, %ecx
+; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
 ; X64-NOBMI-NEXT:    shlq %cl, %rax
 ; X64-NOBMI-NEXT:    decq %rax
 ; X64-NOBMI-NEXT:    andq (%rdi), %rax
@@ -442,8 +450,9 @@ define i64 @bzhi64_a3_load_indexzext(i64
 ;
 ; X64-NOBMI-LABEL: bzhi64_a3_load_indexzext:
 ; X64-NOBMI:       # %bb.0:
-; X64-NOBMI-NEXT:    movl $1, %eax
 ; X64-NOBMI-NEXT:    movl %esi, %ecx
+; X64-NOBMI-NEXT:    movl $1, %eax
+; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NOBMI-NEXT:    shlq %cl, %rax
 ; X64-NOBMI-NEXT:    decq %rax
 ; X64-NOBMI-NEXT:    andq (%rdi), %rax
@@ -503,8 +512,9 @@ define i64 @bzhi64_a4_commutative(i64 %v
 ;
 ; X64-NOBMI-LABEL: bzhi64_a4_commutative:
 ; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    movq %rsi, %rcx
 ; X64-NOBMI-NEXT:    movl $1, %eax
-; X64-NOBMI-NEXT:    movl %esi, %ecx
+; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
 ; X64-NOBMI-NEXT:    shlq %cl, %rax
 ; X64-NOBMI-NEXT:    decq %rax
 ; X64-NOBMI-NEXT:    andq %rdi, %rax
@@ -542,8 +552,9 @@ define i32 @bzhi32_b0(i32 %val, i32 %num
 ;
 ; X64-NOBMI-LABEL: bzhi32_b0:
 ; X64-NOBMI:       # %bb.0:
-; X64-NOBMI-NEXT:    movl $-1, %eax
 ; X64-NOBMI-NEXT:    movl %esi, %ecx
+; X64-NOBMI-NEXT:    movl $-1, %eax
+; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NOBMI-NEXT:    shll %cl, %eax
 ; X64-NOBMI-NEXT:    notl %eax
 ; X64-NOBMI-NEXT:    andl %edi, %eax
@@ -577,8 +588,9 @@ define i32 @bzhi32_b1_indexzext(i32 %val
 ;
 ; X64-NOBMI-LABEL: bzhi32_b1_indexzext:
 ; X64-NOBMI:       # %bb.0:
-; X64-NOBMI-NEXT:    movl $-1, %eax
 ; X64-NOBMI-NEXT:    movl %esi, %ecx
+; X64-NOBMI-NEXT:    movl $-1, %eax
+; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NOBMI-NEXT:    shll %cl, %eax
 ; X64-NOBMI-NEXT:    notl %eax
 ; X64-NOBMI-NEXT:    andl %edi, %eax
@@ -615,8 +627,9 @@ define i32 @bzhi32_b2_load(i32* %w, i32
 ;
 ; X64-NOBMI-LABEL: bzhi32_b2_load:
 ; X64-NOBMI:       # %bb.0:
-; X64-NOBMI-NEXT:    movl $-1, %eax
 ; X64-NOBMI-NEXT:    movl %esi, %ecx
+; X64-NOBMI-NEXT:    movl $-1, %eax
+; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NOBMI-NEXT:    shll %cl, %eax
 ; X64-NOBMI-NEXT:    notl %eax
 ; X64-NOBMI-NEXT:    andl (%rdi), %eax
@@ -653,8 +666,9 @@ define i32 @bzhi32_b3_load_indexzext(i32
 ;
 ; X64-NOBMI-LABEL: bzhi32_b3_load_indexzext:
 ; X64-NOBMI:       # %bb.0:
-; X64-NOBMI-NEXT:    movl $-1, %eax
 ; X64-NOBMI-NEXT:    movl %esi, %ecx
+; X64-NOBMI-NEXT:    movl $-1, %eax
+; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NOBMI-NEXT:    shll %cl, %eax
 ; X64-NOBMI-NEXT:    notl %eax
 ; X64-NOBMI-NEXT:    andl (%rdi), %eax
@@ -690,8 +704,9 @@ define i32 @bzhi32_b4_commutative(i32 %v
 ;
 ; X64-NOBMI-LABEL: bzhi32_b4_commutative:
 ; X64-NOBMI:       # %bb.0:
-; X64-NOBMI-NEXT:    movl $-1, %eax
 ; X64-NOBMI-NEXT:    movl %esi, %ecx
+; X64-NOBMI-NEXT:    movl $-1, %eax
+; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NOBMI-NEXT:    shll %cl, %eax
 ; X64-NOBMI-NEXT:    notl %eax
 ; X64-NOBMI-NEXT:    andl %edi, %eax
@@ -749,8 +764,9 @@ define i64 @bzhi64_b0(i64 %val, i64 %num
 ;
 ; X64-NOBMI-LABEL: bzhi64_b0:
 ; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    movq %rsi, %rcx
 ; X64-NOBMI-NEXT:    movq $-1, %rax
-; X64-NOBMI-NEXT:    movl %esi, %ecx
+; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
 ; X64-NOBMI-NEXT:    shlq %cl, %rax
 ; X64-NOBMI-NEXT:    notq %rax
 ; X64-NOBMI-NEXT:    andq %rdi, %rax
@@ -806,8 +822,9 @@ define i64 @bzhi64_b1_indexzext(i64 %val
 ;
 ; X64-NOBMI-LABEL: bzhi64_b1_indexzext:
 ; X64-NOBMI:       # %bb.0:
-; X64-NOBMI-NEXT:    movq $-1, %rax
 ; X64-NOBMI-NEXT:    movl %esi, %ecx
+; X64-NOBMI-NEXT:    movq $-1, %rax
+; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NOBMI-NEXT:    shlq %cl, %rax
 ; X64-NOBMI-NEXT:    notq %rax
 ; X64-NOBMI-NEXT:    andq %rdi, %rax
@@ -869,8 +886,9 @@ define i64 @bzhi64_b2_load(i64* %w, i64
 ;
 ; X64-NOBMI-LABEL: bzhi64_b2_load:
 ; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    movq %rsi, %rcx
 ; X64-NOBMI-NEXT:    movq $-1, %rax
-; X64-NOBMI-NEXT:    movl %esi, %ecx
+; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
 ; X64-NOBMI-NEXT:    shlq %cl, %rax
 ; X64-NOBMI-NEXT:    notq %rax
 ; X64-NOBMI-NEXT:    andq (%rdi), %rax
@@ -931,8 +949,9 @@ define i64 @bzhi64_b3_load_indexzext(i64
 ;
 ; X64-NOBMI-LABEL: bzhi64_b3_load_indexzext:
 ; X64-NOBMI:       # %bb.0:
-; X64-NOBMI-NEXT:    movq $-1, %rax
 ; X64-NOBMI-NEXT:    movl %esi, %ecx
+; X64-NOBMI-NEXT:    movq $-1, %rax
+; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NOBMI-NEXT:    shlq %cl, %rax
 ; X64-NOBMI-NEXT:    notq %rax
 ; X64-NOBMI-NEXT:    andq (%rdi), %rax
@@ -991,8 +1010,9 @@ define i64 @bzhi64_b4_commutative(i64 %v
 ;
 ; X64-NOBMI-LABEL: bzhi64_b4_commutative:
 ; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    movq %rsi, %rcx
 ; X64-NOBMI-NEXT:    movq $-1, %rax
-; X64-NOBMI-NEXT:    movl %esi, %ecx
+; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
 ; X64-NOBMI-NEXT:    shlq %cl, %rax
 ; X64-NOBMI-NEXT:    notq %rax
 ; X64-NOBMI-NEXT:    andq %rdi, %rax
@@ -1032,11 +1052,11 @@ define i32 @bzhi32_c0(i32 %val, i32 %num
 ; X64-NOBMI-LABEL: bzhi32_c0:
 ; X64-NOBMI:       # %bb.0:
 ; X64-NOBMI-NEXT:    movl %esi, %ecx
+; X64-NOBMI-NEXT:    movl %edi, %eax
 ; X64-NOBMI-NEXT:    negl %ecx
-; X64-NOBMI-NEXT:    shll %cl, %edi
+; X64-NOBMI-NEXT:    shll %cl, %eax
 ; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-NOBMI-NEXT:    shrl %cl, %edi
-; X64-NOBMI-NEXT:    movl %edi, %eax
+; X64-NOBMI-NEXT:    shrl %cl, %eax
 ; X64-NOBMI-NEXT:    retq
 ;
 ; X64-BMI1BMI2-LABEL: bzhi32_c0:
@@ -1069,11 +1089,11 @@ define i32 @bzhi32_c1_indexzext(i32 %val
 ; X64-NOBMI-LABEL: bzhi32_c1_indexzext:
 ; X64-NOBMI:       # %bb.0:
 ; X64-NOBMI-NEXT:    movl %esi, %ecx
+; X64-NOBMI-NEXT:    movl %edi, %eax
 ; X64-NOBMI-NEXT:    negb %cl
-; X64-NOBMI-NEXT:    shll %cl, %edi
+; X64-NOBMI-NEXT:    shll %cl, %eax
 ; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-NOBMI-NEXT:    shrl %cl, %edi
-; X64-NOBMI-NEXT:    movl %edi, %eax
+; X64-NOBMI-NEXT:    shrl %cl, %eax
 ; X64-NOBMI-NEXT:    retq
 ;
 ; X64-BMI1BMI2-LABEL: bzhi32_c1_indexzext:
@@ -1188,11 +1208,11 @@ define i32 @bzhi32_c4_commutative(i32 %v
 ; X64-NOBMI-LABEL: bzhi32_c4_commutative:
 ; X64-NOBMI:       # %bb.0:
 ; X64-NOBMI-NEXT:    movl %esi, %ecx
+; X64-NOBMI-NEXT:    movl %edi, %eax
 ; X64-NOBMI-NEXT:    negl %ecx
-; X64-NOBMI-NEXT:    shll %cl, %edi
+; X64-NOBMI-NEXT:    shll %cl, %eax
 ; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-NOBMI-NEXT:    shrl %cl, %edi
-; X64-NOBMI-NEXT:    movl %edi, %eax
+; X64-NOBMI-NEXT:    shrl %cl, %eax
 ; X64-NOBMI-NEXT:    retq
 ;
 ; X64-BMI1BMI2-LABEL: bzhi32_c4_commutative:
@@ -1246,11 +1266,11 @@ define i64 @bzhi64_c0(i64 %val, i64 %num
 ; X64-NOBMI-LABEL: bzhi64_c0:
 ; X64-NOBMI:       # %bb.0:
 ; X64-NOBMI-NEXT:    movq %rsi, %rcx
+; X64-NOBMI-NEXT:    movq %rdi, %rax
 ; X64-NOBMI-NEXT:    negl %ecx
-; X64-NOBMI-NEXT:    shlq %cl, %rdi
+; X64-NOBMI-NEXT:    shlq %cl, %rax
 ; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
-; X64-NOBMI-NEXT:    shrq %cl, %rdi
-; X64-NOBMI-NEXT:    movq %rdi, %rax
+; X64-NOBMI-NEXT:    shrq %cl, %rax
 ; X64-NOBMI-NEXT:    retq
 ;
 ; X64-BMI1BMI2-LABEL: bzhi64_c0:
@@ -1302,11 +1322,11 @@ define i64 @bzhi64_c1_indexzext(i64 %val
 ; X64-NOBMI-LABEL: bzhi64_c1_indexzext:
 ; X64-NOBMI:       # %bb.0:
 ; X64-NOBMI-NEXT:    movl %esi, %ecx
+; X64-NOBMI-NEXT:    movq %rdi, %rax
 ; X64-NOBMI-NEXT:    negb %cl
-; X64-NOBMI-NEXT:    shlq %cl, %rdi
+; X64-NOBMI-NEXT:    shlq %cl, %rax
 ; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-NOBMI-NEXT:    shrq %cl, %rdi
-; X64-NOBMI-NEXT:    movq %rdi, %rax
+; X64-NOBMI-NEXT:    shrq %cl, %rax
 ; X64-NOBMI-NEXT:    retq
 ;
 ; X64-BMI1BMI2-LABEL: bzhi64_c1_indexzext:
@@ -1488,11 +1508,11 @@ define i64 @bzhi64_c4_commutative(i64 %v
 ; X64-NOBMI-LABEL: bzhi64_c4_commutative:
 ; X64-NOBMI:       # %bb.0:
 ; X64-NOBMI-NEXT:    movq %rsi, %rcx
+; X64-NOBMI-NEXT:    movq %rdi, %rax
 ; X64-NOBMI-NEXT:    negl %ecx
-; X64-NOBMI-NEXT:    shlq %cl, %rdi
+; X64-NOBMI-NEXT:    shlq %cl, %rax
 ; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
-; X64-NOBMI-NEXT:    shrq %cl, %rdi
-; X64-NOBMI-NEXT:    movq %rdi, %rax
+; X64-NOBMI-NEXT:    shrq %cl, %rax
 ; X64-NOBMI-NEXT:    retq
 ;
 ; X64-BMI1BMI2-LABEL: bzhi64_c4_commutative:
@@ -1529,11 +1549,11 @@ define i32 @bzhi32_d0(i32 %val, i32 %num
 ; X64-NOBMI-LABEL: bzhi32_d0:
 ; X64-NOBMI:       # %bb.0:
 ; X64-NOBMI-NEXT:    movl %esi, %ecx
+; X64-NOBMI-NEXT:    movl %edi, %eax
 ; X64-NOBMI-NEXT:    negl %ecx
-; X64-NOBMI-NEXT:    shll %cl, %edi
+; X64-NOBMI-NEXT:    shll %cl, %eax
 ; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-NOBMI-NEXT:    shrl %cl, %edi
-; X64-NOBMI-NEXT:    movl %edi, %eax
+; X64-NOBMI-NEXT:    shrl %cl, %eax
 ; X64-NOBMI-NEXT:    retq
 ;
 ; X64-BMI1BMI2-LABEL: bzhi32_d0:
@@ -1566,11 +1586,11 @@ define i32 @bzhi32_d1_indexzext(i32 %val
 ; X64-NOBMI-LABEL: bzhi32_d1_indexzext:
 ; X64-NOBMI:       # %bb.0:
 ; X64-NOBMI-NEXT:    movl %esi, %ecx
+; X64-NOBMI-NEXT:    movl %edi, %eax
 ; X64-NOBMI-NEXT:    negb %cl
-; X64-NOBMI-NEXT:    shll %cl, %edi
+; X64-NOBMI-NEXT:    shll %cl, %eax
 ; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-NOBMI-NEXT:    shrl %cl, %edi
-; X64-NOBMI-NEXT:    movl %edi, %eax
+; X64-NOBMI-NEXT:    shrl %cl, %eax
 ; X64-NOBMI-NEXT:    retq
 ;
 ; X64-BMI1BMI2-LABEL: bzhi32_d1_indexzext:
@@ -1742,11 +1762,11 @@ define i64 @bzhi64_d0(i64 %val, i64 %num
 ; X64-NOBMI-LABEL: bzhi64_d0:
 ; X64-NOBMI:       # %bb.0:
 ; X64-NOBMI-NEXT:    movq %rsi, %rcx
+; X64-NOBMI-NEXT:    movq %rdi, %rax
 ; X64-NOBMI-NEXT:    negl %ecx
-; X64-NOBMI-NEXT:    shlq %cl, %rdi
+; X64-NOBMI-NEXT:    shlq %cl, %rax
 ; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
-; X64-NOBMI-NEXT:    shrq %cl, %rdi
-; X64-NOBMI-NEXT:    movq %rdi, %rax
+; X64-NOBMI-NEXT:    shrq %cl, %rax
 ; X64-NOBMI-NEXT:    retq
 ;
 ; X64-BMI1BMI2-LABEL: bzhi64_d0:
@@ -1834,11 +1854,11 @@ define i64 @bzhi64_d1_indexzext(i64 %val
 ; X64-NOBMI-LABEL: bzhi64_d1_indexzext:
 ; X64-NOBMI:       # %bb.0:
 ; X64-NOBMI-NEXT:    movl %esi, %ecx
+; X64-NOBMI-NEXT:    movq %rdi, %rax
 ; X64-NOBMI-NEXT:    negb %cl
-; X64-NOBMI-NEXT:    shlq %cl, %rdi
+; X64-NOBMI-NEXT:    shlq %cl, %rax
 ; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-NOBMI-NEXT:    shrq %cl, %rdi
-; X64-NOBMI-NEXT:    movq %rdi, %rax
+; X64-NOBMI-NEXT:    shrq %cl, %rax
 ; X64-NOBMI-NEXT:    retq
 ;
 ; X64-BMI1BMI2-LABEL: bzhi64_d1_indexzext:
@@ -2060,8 +2080,8 @@ define i32 @bzhi32_constant_mask32(i32 %
 ;
 ; X64-LABEL: bzhi32_constant_mask32:
 ; X64:       # %bb.0:
-; X64-NEXT:    andl $2147483647, %edi # imm = 0x7FFFFFFF
 ; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    andl $2147483647, %eax # imm = 0x7FFFFFFF
 ; X64-NEXT:    retq
   %masked = and i32 %val, 2147483647
   ret i32 %masked
@@ -2094,8 +2114,8 @@ define i32 @bzhi32_constant_mask16(i32 %
 ;
 ; X64-LABEL: bzhi32_constant_mask16:
 ; X64:       # %bb.0:
-; X64-NEXT:    andl $32767, %edi # imm = 0x7FFF
 ; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    andl $32767, %eax # imm = 0x7FFF
 ; X64-NEXT:    retq
   %masked = and i32 %val, 32767
   ret i32 %masked
@@ -2128,8 +2148,8 @@ define i32 @bzhi32_constant_mask8(i32 %v
 ;
 ; X64-LABEL: bzhi32_constant_mask8:
 ; X64:       # %bb.0:
-; X64-NEXT:    andl $127, %edi
 ; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    andl $127, %eax
 ; X64-NEXT:    retq
   %masked = and i32 %val, 127
   ret i32 %masked
@@ -2223,8 +2243,8 @@ define i64 @bzhi64_constant_mask32(i64 %
 ;
 ; X64-LABEL: bzhi64_constant_mask32:
 ; X64:       # %bb.0:
-; X64-NEXT:    andl $2147483647, %edi # imm = 0x7FFFFFFF
 ; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    andl $2147483647, %eax # imm = 0x7FFFFFFF
 ; X64-NEXT:    retq
   %masked = and i64 %val, 2147483647
   ret i64 %masked
@@ -2259,8 +2279,8 @@ define i64 @bzhi64_constant_mask16(i64 %
 ;
 ; X64-LABEL: bzhi64_constant_mask16:
 ; X64:       # %bb.0:
-; X64-NEXT:    andl $32767, %edi # imm = 0x7FFF
 ; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    andl $32767, %eax # imm = 0x7FFF
 ; X64-NEXT:    retq
   %masked = and i64 %val, 32767
   ret i64 %masked
@@ -2295,8 +2315,8 @@ define i64 @bzhi64_constant_mask8(i64 %v
 ;
 ; X64-LABEL: bzhi64_constant_mask8:
 ; X64:       # %bb.0:
-; X64-NEXT:    andl $127, %edi
 ; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    andl $127, %eax
 ; X64-NEXT:    retq
   %masked = and i64 %val, 127
   ret i64 %masked

Modified: llvm/trunk/test/CodeGen/X86/fast-isel-fold-mem.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fast-isel-fold-mem.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fast-isel-fold-mem.ll (original)
+++ llvm/trunk/test/CodeGen/X86/fast-isel-fold-mem.ll Wed Sep 19 11:59:08 2018
@@ -5,8 +5,8 @@
 define i64 @fold_load(i64* %a, i64 %b) {
 ; CHECK-LABEL: fold_load:
 ; CHECK:       ## %bb.0:
-; CHECK-NEXT:    addq (%rdi), %rsi
 ; CHECK-NEXT:    movq %rsi, %rax
+; CHECK-NEXT:    addq (%rdi), %rax
 ; CHECK-NEXT:    retq
   %1 = load i64, i64* %a, align 8
   %2 = add i64 %1, %b

Modified: llvm/trunk/test/CodeGen/X86/fast-isel-select-cmov.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fast-isel-select-cmov.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fast-isel-select-cmov.ll (original)
+++ llvm/trunk/test/CodeGen/X86/fast-isel-select-cmov.ll Wed Sep 19 11:59:08 2018
@@ -31,9 +31,9 @@ define zeroext i16 @select_cmp_cmov_i16(
 define i32 @select_cmov_i32(i1 zeroext %cond, i32 %a, i32 %b) {
 ; CHECK-LABEL: select_cmov_i32:
 ; CHECK:       ## %bb.0:
-; CHECK-NEXT:    testb $1, %dil
-; CHECK-NEXT:    cmovel %edx, %esi
 ; CHECK-NEXT:    movl %esi, %eax
+; CHECK-NEXT:    testb $1, %dil
+; CHECK-NEXT:    cmovel %edx, %eax
 ; CHECK-NEXT:    retq
   %1 = select i1 %cond, i32 %a, i32 %b
   ret i32 %1
@@ -42,9 +42,9 @@ define i32 @select_cmov_i32(i1 zeroext %
 define i32 @select_cmp_cmov_i32(i32 %a, i32 %b) {
 ; CHECK-LABEL: select_cmp_cmov_i32:
 ; CHECK:       ## %bb.0:
-; CHECK-NEXT:    cmpl %esi, %edi
-; CHECK-NEXT:    cmovbl %edi, %esi
 ; CHECK-NEXT:    movl %esi, %eax
+; CHECK-NEXT:    cmpl %esi, %edi
+; CHECK-NEXT:    cmovbl %edi, %eax
 ; CHECK-NEXT:    retq
   %1 = icmp ult i32 %a, %b
   %2 = select i1 %1, i32 %a, i32 %b
@@ -54,9 +54,9 @@ define i32 @select_cmp_cmov_i32(i32 %a,
 define i64 @select_cmov_i64(i1 zeroext %cond, i64 %a, i64 %b) {
 ; CHECK-LABEL: select_cmov_i64:
 ; CHECK:       ## %bb.0:
-; CHECK-NEXT:    testb $1, %dil
-; CHECK-NEXT:    cmoveq %rdx, %rsi
 ; CHECK-NEXT:    movq %rsi, %rax
+; CHECK-NEXT:    testb $1, %dil
+; CHECK-NEXT:    cmoveq %rdx, %rax
 ; CHECK-NEXT:    retq
   %1 = select i1 %cond, i64 %a, i64 %b
   ret i64 %1
@@ -65,9 +65,9 @@ define i64 @select_cmov_i64(i1 zeroext %
 define i64 @select_cmp_cmov_i64(i64 %a, i64 %b) {
 ; CHECK-LABEL: select_cmp_cmov_i64:
 ; CHECK:       ## %bb.0:
-; CHECK-NEXT:    cmpq %rsi, %rdi
-; CHECK-NEXT:    cmovbq %rdi, %rsi
 ; CHECK-NEXT:    movq %rsi, %rax
+; CHECK-NEXT:    cmpq %rsi, %rdi
+; CHECK-NEXT:    cmovbq %rdi, %rax
 ; CHECK-NEXT:    retq
   %1 = icmp ult i64 %a, %b
   %2 = select i1 %1, i64 %a, i64 %b

Modified: llvm/trunk/test/CodeGen/X86/fast-isel-select-cmov2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fast-isel-select-cmov2.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fast-isel-select-cmov2.ll (original)
+++ llvm/trunk/test/CodeGen/X86/fast-isel-select-cmov2.ll Wed Sep 19 11:59:08 2018
@@ -19,30 +19,30 @@ define i64 @select_fcmp_false_cmov(doubl
 define i64 @select_fcmp_oeq_cmov(double %a, double %b, i64 %c, i64 %d) {
 ; SDAG-LABEL: select_fcmp_oeq_cmov:
 ; SDAG:       ## %bb.0:
-; SDAG-NEXT:    ucomisd %xmm1, %xmm0
-; SDAG-NEXT:    cmovneq %rsi, %rdi
-; SDAG-NEXT:    cmovpq %rsi, %rdi
 ; SDAG-NEXT:    movq %rdi, %rax
+; SDAG-NEXT:    ucomisd %xmm1, %xmm0
+; SDAG-NEXT:    cmovneq %rsi, %rax
+; SDAG-NEXT:    cmovpq %rsi, %rax
 ; SDAG-NEXT:    retq
 ;
 ; FAST-LABEL: select_fcmp_oeq_cmov:
 ; FAST:       ## %bb.0:
-; FAST-NEXT:    ucomisd %xmm1, %xmm0
-; FAST-NEXT:    setnp %al
-; FAST-NEXT:    sete %cl
-; FAST-NEXT:    testb %al, %cl
-; FAST-NEXT:    cmoveq %rsi, %rdi
 ; FAST-NEXT:    movq %rdi, %rax
+; FAST-NEXT:    ucomisd %xmm1, %xmm0
+; FAST-NEXT:    setnp %cl
+; FAST-NEXT:    sete %dl
+; FAST-NEXT:    testb %cl, %dl
+; FAST-NEXT:    cmoveq %rsi, %rax
 ; FAST-NEXT:    retq
 ;
 ; FAST_AVX-LABEL: select_fcmp_oeq_cmov:
 ; FAST_AVX:       ## %bb.0:
-; FAST_AVX-NEXT:    vucomisd %xmm1, %xmm0
-; FAST_AVX-NEXT:    setnp %al
-; FAST_AVX-NEXT:    sete %cl
-; FAST_AVX-NEXT:    testb %al, %cl
-; FAST_AVX-NEXT:    cmoveq %rsi, %rdi
 ; FAST_AVX-NEXT:    movq %rdi, %rax
+; FAST_AVX-NEXT:    vucomisd %xmm1, %xmm0
+; FAST_AVX-NEXT:    setnp %cl
+; FAST_AVX-NEXT:    sete %dl
+; FAST_AVX-NEXT:    testb %cl, %dl
+; FAST_AVX-NEXT:    cmoveq %rsi, %rax
 ; FAST_AVX-NEXT:    retq
   %1 = fcmp oeq double %a, %b
   %2 = select i1 %1, i64 %c, i64 %d
@@ -52,16 +52,16 @@ define i64 @select_fcmp_oeq_cmov(double
 define i64 @select_fcmp_ogt_cmov(double %a, double %b, i64 %c, i64 %d) {
 ; NOAVX-LABEL: select_fcmp_ogt_cmov:
 ; NOAVX:       ## %bb.0:
-; NOAVX-NEXT:    ucomisd %xmm1, %xmm0
-; NOAVX-NEXT:    cmovbeq %rsi, %rdi
 ; NOAVX-NEXT:    movq %rdi, %rax
+; NOAVX-NEXT:    ucomisd %xmm1, %xmm0
+; NOAVX-NEXT:    cmovbeq %rsi, %rax
 ; NOAVX-NEXT:    retq
 ;
 ; FAST_AVX-LABEL: select_fcmp_ogt_cmov:
 ; FAST_AVX:       ## %bb.0:
-; FAST_AVX-NEXT:    vucomisd %xmm1, %xmm0
-; FAST_AVX-NEXT:    cmovbeq %rsi, %rdi
 ; FAST_AVX-NEXT:    movq %rdi, %rax
+; FAST_AVX-NEXT:    vucomisd %xmm1, %xmm0
+; FAST_AVX-NEXT:    cmovbeq %rsi, %rax
 ; FAST_AVX-NEXT:    retq
   %1 = fcmp ogt double %a, %b
   %2 = select i1 %1, i64 %c, i64 %d
@@ -71,16 +71,16 @@ define i64 @select_fcmp_ogt_cmov(double
 define i64 @select_fcmp_oge_cmov(double %a, double %b, i64 %c, i64 %d) {
 ; NOAVX-LABEL: select_fcmp_oge_cmov:
 ; NOAVX:       ## %bb.0:
-; NOAVX-NEXT:    ucomisd %xmm1, %xmm0
-; NOAVX-NEXT:    cmovbq %rsi, %rdi
 ; NOAVX-NEXT:    movq %rdi, %rax
+; NOAVX-NEXT:    ucomisd %xmm1, %xmm0
+; NOAVX-NEXT:    cmovbq %rsi, %rax
 ; NOAVX-NEXT:    retq
 ;
 ; FAST_AVX-LABEL: select_fcmp_oge_cmov:
 ; FAST_AVX:       ## %bb.0:
-; FAST_AVX-NEXT:    vucomisd %xmm1, %xmm0
-; FAST_AVX-NEXT:    cmovbq %rsi, %rdi
 ; FAST_AVX-NEXT:    movq %rdi, %rax
+; FAST_AVX-NEXT:    vucomisd %xmm1, %xmm0
+; FAST_AVX-NEXT:    cmovbq %rsi, %rax
 ; FAST_AVX-NEXT:    retq
   %1 = fcmp oge double %a, %b
   %2 = select i1 %1, i64 %c, i64 %d
@@ -90,16 +90,16 @@ define i64 @select_fcmp_oge_cmov(double
 define i64 @select_fcmp_olt_cmov(double %a, double %b, i64 %c, i64 %d) {
 ; NOAVX-LABEL: select_fcmp_olt_cmov:
 ; NOAVX:       ## %bb.0:
-; NOAVX-NEXT:    ucomisd %xmm0, %xmm1
-; NOAVX-NEXT:    cmovbeq %rsi, %rdi
 ; NOAVX-NEXT:    movq %rdi, %rax
+; NOAVX-NEXT:    ucomisd %xmm0, %xmm1
+; NOAVX-NEXT:    cmovbeq %rsi, %rax
 ; NOAVX-NEXT:    retq
 ;
 ; FAST_AVX-LABEL: select_fcmp_olt_cmov:
 ; FAST_AVX:       ## %bb.0:
-; FAST_AVX-NEXT:    vucomisd %xmm0, %xmm1
-; FAST_AVX-NEXT:    cmovbeq %rsi, %rdi
 ; FAST_AVX-NEXT:    movq %rdi, %rax
+; FAST_AVX-NEXT:    vucomisd %xmm0, %xmm1
+; FAST_AVX-NEXT:    cmovbeq %rsi, %rax
 ; FAST_AVX-NEXT:    retq
   %1 = fcmp olt double %a, %b
   %2 = select i1 %1, i64 %c, i64 %d
@@ -109,16 +109,16 @@ define i64 @select_fcmp_olt_cmov(double
 define i64 @select_fcmp_ole_cmov(double %a, double %b, i64 %c, i64 %d) {
 ; NOAVX-LABEL: select_fcmp_ole_cmov:
 ; NOAVX:       ## %bb.0:
-; NOAVX-NEXT:    ucomisd %xmm0, %xmm1
-; NOAVX-NEXT:    cmovbq %rsi, %rdi
 ; NOAVX-NEXT:    movq %rdi, %rax
+; NOAVX-NEXT:    ucomisd %xmm0, %xmm1
+; NOAVX-NEXT:    cmovbq %rsi, %rax
 ; NOAVX-NEXT:    retq
 ;
 ; FAST_AVX-LABEL: select_fcmp_ole_cmov:
 ; FAST_AVX:       ## %bb.0:
-; FAST_AVX-NEXT:    vucomisd %xmm0, %xmm1
-; FAST_AVX-NEXT:    cmovbq %rsi, %rdi
 ; FAST_AVX-NEXT:    movq %rdi, %rax
+; FAST_AVX-NEXT:    vucomisd %xmm0, %xmm1
+; FAST_AVX-NEXT:    cmovbq %rsi, %rax
 ; FAST_AVX-NEXT:    retq
   %1 = fcmp ole double %a, %b
   %2 = select i1 %1, i64 %c, i64 %d
@@ -128,16 +128,16 @@ define i64 @select_fcmp_ole_cmov(double
 define i64 @select_fcmp_one_cmov(double %a, double %b, i64 %c, i64 %d) {
 ; NOAVX-LABEL: select_fcmp_one_cmov:
 ; NOAVX:       ## %bb.0:
-; NOAVX-NEXT:    ucomisd %xmm1, %xmm0
-; NOAVX-NEXT:    cmoveq %rsi, %rdi
 ; NOAVX-NEXT:    movq %rdi, %rax
+; NOAVX-NEXT:    ucomisd %xmm1, %xmm0
+; NOAVX-NEXT:    cmoveq %rsi, %rax
 ; NOAVX-NEXT:    retq
 ;
 ; FAST_AVX-LABEL: select_fcmp_one_cmov:
 ; FAST_AVX:       ## %bb.0:
-; FAST_AVX-NEXT:    vucomisd %xmm1, %xmm0
-; FAST_AVX-NEXT:    cmoveq %rsi, %rdi
 ; FAST_AVX-NEXT:    movq %rdi, %rax
+; FAST_AVX-NEXT:    vucomisd %xmm1, %xmm0
+; FAST_AVX-NEXT:    cmoveq %rsi, %rax
 ; FAST_AVX-NEXT:    retq
   %1 = fcmp one double %a, %b
   %2 = select i1 %1, i64 %c, i64 %d
@@ -147,16 +147,16 @@ define i64 @select_fcmp_one_cmov(double
 define i64 @select_fcmp_ord_cmov(double %a, double %b, i64 %c, i64 %d) {
 ; NOAVX-LABEL: select_fcmp_ord_cmov:
 ; NOAVX:       ## %bb.0:
-; NOAVX-NEXT:    ucomisd %xmm1, %xmm0
-; NOAVX-NEXT:    cmovpq %rsi, %rdi
 ; NOAVX-NEXT:    movq %rdi, %rax
+; NOAVX-NEXT:    ucomisd %xmm1, %xmm0
+; NOAVX-NEXT:    cmovpq %rsi, %rax
 ; NOAVX-NEXT:    retq
 ;
 ; FAST_AVX-LABEL: select_fcmp_ord_cmov:
 ; FAST_AVX:       ## %bb.0:
-; FAST_AVX-NEXT:    vucomisd %xmm1, %xmm0
-; FAST_AVX-NEXT:    cmovpq %rsi, %rdi
 ; FAST_AVX-NEXT:    movq %rdi, %rax
+; FAST_AVX-NEXT:    vucomisd %xmm1, %xmm0
+; FAST_AVX-NEXT:    cmovpq %rsi, %rax
 ; FAST_AVX-NEXT:    retq
   %1 = fcmp ord double %a, %b
   %2 = select i1 %1, i64 %c, i64 %d
@@ -166,16 +166,16 @@ define i64 @select_fcmp_ord_cmov(double
 define i64 @select_fcmp_uno_cmov(double %a, double %b, i64 %c, i64 %d) {
 ; NOAVX-LABEL: select_fcmp_uno_cmov:
 ; NOAVX:       ## %bb.0:
-; NOAVX-NEXT:    ucomisd %xmm1, %xmm0
-; NOAVX-NEXT:    cmovnpq %rsi, %rdi
 ; NOAVX-NEXT:    movq %rdi, %rax
+; NOAVX-NEXT:    ucomisd %xmm1, %xmm0
+; NOAVX-NEXT:    cmovnpq %rsi, %rax
 ; NOAVX-NEXT:    retq
 ;
 ; FAST_AVX-LABEL: select_fcmp_uno_cmov:
 ; FAST_AVX:       ## %bb.0:
-; FAST_AVX-NEXT:    vucomisd %xmm1, %xmm0
-; FAST_AVX-NEXT:    cmovnpq %rsi, %rdi
 ; FAST_AVX-NEXT:    movq %rdi, %rax
+; FAST_AVX-NEXT:    vucomisd %xmm1, %xmm0
+; FAST_AVX-NEXT:    cmovnpq %rsi, %rax
 ; FAST_AVX-NEXT:    retq
   %1 = fcmp uno double %a, %b
   %2 = select i1 %1, i64 %c, i64 %d
@@ -185,16 +185,16 @@ define i64 @select_fcmp_uno_cmov(double
 define i64 @select_fcmp_ueq_cmov(double %a, double %b, i64 %c, i64 %d) {
 ; NOAVX-LABEL: select_fcmp_ueq_cmov:
 ; NOAVX:       ## %bb.0:
-; NOAVX-NEXT:    ucomisd %xmm1, %xmm0
-; NOAVX-NEXT:    cmovneq %rsi, %rdi
 ; NOAVX-NEXT:    movq %rdi, %rax
+; NOAVX-NEXT:    ucomisd %xmm1, %xmm0
+; NOAVX-NEXT:    cmovneq %rsi, %rax
 ; NOAVX-NEXT:    retq
 ;
 ; FAST_AVX-LABEL: select_fcmp_ueq_cmov:
 ; FAST_AVX:       ## %bb.0:
-; FAST_AVX-NEXT:    vucomisd %xmm1, %xmm0
-; FAST_AVX-NEXT:    cmovneq %rsi, %rdi
 ; FAST_AVX-NEXT:    movq %rdi, %rax
+; FAST_AVX-NEXT:    vucomisd %xmm1, %xmm0
+; FAST_AVX-NEXT:    cmovneq %rsi, %rax
 ; FAST_AVX-NEXT:    retq
   %1 = fcmp ueq double %a, %b
   %2 = select i1 %1, i64 %c, i64 %d
@@ -204,16 +204,16 @@ define i64 @select_fcmp_ueq_cmov(double
 define i64 @select_fcmp_ugt_cmov(double %a, double %b, i64 %c, i64 %d) {
 ; NOAVX-LABEL: select_fcmp_ugt_cmov:
 ; NOAVX:       ## %bb.0:
-; NOAVX-NEXT:    ucomisd %xmm0, %xmm1
-; NOAVX-NEXT:    cmovaeq %rsi, %rdi
 ; NOAVX-NEXT:    movq %rdi, %rax
+; NOAVX-NEXT:    ucomisd %xmm0, %xmm1
+; NOAVX-NEXT:    cmovaeq %rsi, %rax
 ; NOAVX-NEXT:    retq
 ;
 ; FAST_AVX-LABEL: select_fcmp_ugt_cmov:
 ; FAST_AVX:       ## %bb.0:
-; FAST_AVX-NEXT:    vucomisd %xmm0, %xmm1
-; FAST_AVX-NEXT:    cmovaeq %rsi, %rdi
 ; FAST_AVX-NEXT:    movq %rdi, %rax
+; FAST_AVX-NEXT:    vucomisd %xmm0, %xmm1
+; FAST_AVX-NEXT:    cmovaeq %rsi, %rax
 ; FAST_AVX-NEXT:    retq
   %1 = fcmp ugt double %a, %b
   %2 = select i1 %1, i64 %c, i64 %d
@@ -223,16 +223,16 @@ define i64 @select_fcmp_ugt_cmov(double
 define i64 @select_fcmp_uge_cmov(double %a, double %b, i64 %c, i64 %d) {
 ; NOAVX-LABEL: select_fcmp_uge_cmov:
 ; NOAVX:       ## %bb.0:
-; NOAVX-NEXT:    ucomisd %xmm0, %xmm1
-; NOAVX-NEXT:    cmovaq %rsi, %rdi
 ; NOAVX-NEXT:    movq %rdi, %rax
+; NOAVX-NEXT:    ucomisd %xmm0, %xmm1
+; NOAVX-NEXT:    cmovaq %rsi, %rax
 ; NOAVX-NEXT:    retq
 ;
 ; FAST_AVX-LABEL: select_fcmp_uge_cmov:
 ; FAST_AVX:       ## %bb.0:
-; FAST_AVX-NEXT:    vucomisd %xmm0, %xmm1
-; FAST_AVX-NEXT:    cmovaq %rsi, %rdi
 ; FAST_AVX-NEXT:    movq %rdi, %rax
+; FAST_AVX-NEXT:    vucomisd %xmm0, %xmm1
+; FAST_AVX-NEXT:    cmovaq %rsi, %rax
 ; FAST_AVX-NEXT:    retq
   %1 = fcmp uge double %a, %b
   %2 = select i1 %1, i64 %c, i64 %d
@@ -242,16 +242,16 @@ define i64 @select_fcmp_uge_cmov(double
 define i64 @select_fcmp_ult_cmov(double %a, double %b, i64 %c, i64 %d) {
 ; NOAVX-LABEL: select_fcmp_ult_cmov:
 ; NOAVX:       ## %bb.0:
-; NOAVX-NEXT:    ucomisd %xmm1, %xmm0
-; NOAVX-NEXT:    cmovaeq %rsi, %rdi
 ; NOAVX-NEXT:    movq %rdi, %rax
+; NOAVX-NEXT:    ucomisd %xmm1, %xmm0
+; NOAVX-NEXT:    cmovaeq %rsi, %rax
 ; NOAVX-NEXT:    retq
 ;
 ; FAST_AVX-LABEL: select_fcmp_ult_cmov:
 ; FAST_AVX:       ## %bb.0:
-; FAST_AVX-NEXT:    vucomisd %xmm1, %xmm0
-; FAST_AVX-NEXT:    cmovaeq %rsi, %rdi
 ; FAST_AVX-NEXT:    movq %rdi, %rax
+; FAST_AVX-NEXT:    vucomisd %xmm1, %xmm0
+; FAST_AVX-NEXT:    cmovaeq %rsi, %rax
 ; FAST_AVX-NEXT:    retq
   %1 = fcmp ult double %a, %b
   %2 = select i1 %1, i64 %c, i64 %d
@@ -261,16 +261,16 @@ define i64 @select_fcmp_ult_cmov(double
 define i64 @select_fcmp_ule_cmov(double %a, double %b, i64 %c, i64 %d) {
 ; NOAVX-LABEL: select_fcmp_ule_cmov:
 ; NOAVX:       ## %bb.0:
-; NOAVX-NEXT:    ucomisd %xmm1, %xmm0
-; NOAVX-NEXT:    cmovaq %rsi, %rdi
 ; NOAVX-NEXT:    movq %rdi, %rax
+; NOAVX-NEXT:    ucomisd %xmm1, %xmm0
+; NOAVX-NEXT:    cmovaq %rsi, %rax
 ; NOAVX-NEXT:    retq
 ;
 ; FAST_AVX-LABEL: select_fcmp_ule_cmov:
 ; FAST_AVX:       ## %bb.0:
-; FAST_AVX-NEXT:    vucomisd %xmm1, %xmm0
-; FAST_AVX-NEXT:    cmovaq %rsi, %rdi
 ; FAST_AVX-NEXT:    movq %rdi, %rax
+; FAST_AVX-NEXT:    vucomisd %xmm1, %xmm0
+; FAST_AVX-NEXT:    cmovaq %rsi, %rax
 ; FAST_AVX-NEXT:    retq
   %1 = fcmp ule double %a, %b
   %2 = select i1 %1, i64 %c, i64 %d
@@ -280,30 +280,30 @@ define i64 @select_fcmp_ule_cmov(double
 define i64 @select_fcmp_une_cmov(double %a, double %b, i64 %c, i64 %d) {
 ; SDAG-LABEL: select_fcmp_une_cmov:
 ; SDAG:       ## %bb.0:
-; SDAG-NEXT:    ucomisd %xmm1, %xmm0
-; SDAG-NEXT:    cmovneq %rdi, %rsi
-; SDAG-NEXT:    cmovpq %rdi, %rsi
 ; SDAG-NEXT:    movq %rsi, %rax
+; SDAG-NEXT:    ucomisd %xmm1, %xmm0
+; SDAG-NEXT:    cmovneq %rdi, %rax
+; SDAG-NEXT:    cmovpq %rdi, %rax
 ; SDAG-NEXT:    retq
 ;
 ; FAST-LABEL: select_fcmp_une_cmov:
 ; FAST:       ## %bb.0:
-; FAST-NEXT:    ucomisd %xmm1, %xmm0
-; FAST-NEXT:    setp %al
-; FAST-NEXT:    setne %cl
-; FAST-NEXT:    orb %al, %cl
-; FAST-NEXT:    cmoveq %rsi, %rdi
 ; FAST-NEXT:    movq %rdi, %rax
+; FAST-NEXT:    ucomisd %xmm1, %xmm0
+; FAST-NEXT:    setp %cl
+; FAST-NEXT:    setne %dl
+; FAST-NEXT:    orb %cl, %dl
+; FAST-NEXT:    cmoveq %rsi, %rax
 ; FAST-NEXT:    retq
 ;
 ; FAST_AVX-LABEL: select_fcmp_une_cmov:
 ; FAST_AVX:       ## %bb.0:
-; FAST_AVX-NEXT:    vucomisd %xmm1, %xmm0
-; FAST_AVX-NEXT:    setp %al
-; FAST_AVX-NEXT:    setne %cl
-; FAST_AVX-NEXT:    orb %al, %cl
-; FAST_AVX-NEXT:    cmoveq %rsi, %rdi
 ; FAST_AVX-NEXT:    movq %rdi, %rax
+; FAST_AVX-NEXT:    vucomisd %xmm1, %xmm0
+; FAST_AVX-NEXT:    setp %cl
+; FAST_AVX-NEXT:    setne %dl
+; FAST_AVX-NEXT:    orb %cl, %dl
+; FAST_AVX-NEXT:    cmoveq %rsi, %rax
 ; FAST_AVX-NEXT:    retq
   %1 = fcmp une double %a, %b
   %2 = select i1 %1, i64 %c, i64 %d
@@ -323,9 +323,9 @@ define i64 @select_fcmp_true_cmov(double
 define i64 @select_icmp_eq_cmov(i64 %a, i64 %b, i64 %c, i64 %d) {
 ; CHECK-LABEL: select_icmp_eq_cmov:
 ; CHECK:       ## %bb.0:
-; CHECK-NEXT:    cmpq %rsi, %rdi
-; CHECK-NEXT:    cmovneq %rcx, %rdx
 ; CHECK-NEXT:    movq %rdx, %rax
+; CHECK-NEXT:    cmpq %rsi, %rdi
+; CHECK-NEXT:    cmovneq %rcx, %rax
 ; CHECK-NEXT:    retq
   %1 = icmp eq i64 %a, %b
   %2 = select i1 %1, i64 %c, i64 %d
@@ -335,9 +335,9 @@ define i64 @select_icmp_eq_cmov(i64 %a,
 define i64 @select_icmp_ne_cmov(i64 %a, i64 %b, i64 %c, i64 %d) {
 ; CHECK-LABEL: select_icmp_ne_cmov:
 ; CHECK:       ## %bb.0:
-; CHECK-NEXT:    cmpq %rsi, %rdi
-; CHECK-NEXT:    cmoveq %rcx, %rdx
 ; CHECK-NEXT:    movq %rdx, %rax
+; CHECK-NEXT:    cmpq %rsi, %rdi
+; CHECK-NEXT:    cmoveq %rcx, %rax
 ; CHECK-NEXT:    retq
   %1 = icmp ne i64 %a, %b
   %2 = select i1 %1, i64 %c, i64 %d
@@ -347,9 +347,9 @@ define i64 @select_icmp_ne_cmov(i64 %a,
 define i64 @select_icmp_ugt_cmov(i64 %a, i64 %b, i64 %c, i64 %d) {
 ; CHECK-LABEL: select_icmp_ugt_cmov:
 ; CHECK:       ## %bb.0:
-; CHECK-NEXT:    cmpq %rsi, %rdi
-; CHECK-NEXT:    cmovbeq %rcx, %rdx
 ; CHECK-NEXT:    movq %rdx, %rax
+; CHECK-NEXT:    cmpq %rsi, %rdi
+; CHECK-NEXT:    cmovbeq %rcx, %rax
 ; CHECK-NEXT:    retq
   %1 = icmp ugt i64 %a, %b
   %2 = select i1 %1, i64 %c, i64 %d
@@ -360,9 +360,9 @@ define i64 @select_icmp_ugt_cmov(i64 %a,
 define i64 @select_icmp_uge_cmov(i64 %a, i64 %b, i64 %c, i64 %d) {
 ; CHECK-LABEL: select_icmp_uge_cmov:
 ; CHECK:       ## %bb.0:
-; CHECK-NEXT:    cmpq %rsi, %rdi
-; CHECK-NEXT:    cmovbq %rcx, %rdx
 ; CHECK-NEXT:    movq %rdx, %rax
+; CHECK-NEXT:    cmpq %rsi, %rdi
+; CHECK-NEXT:    cmovbq %rcx, %rax
 ; CHECK-NEXT:    retq
   %1 = icmp uge i64 %a, %b
   %2 = select i1 %1, i64 %c, i64 %d
@@ -372,9 +372,9 @@ define i64 @select_icmp_uge_cmov(i64 %a,
 define i64 @select_icmp_ult_cmov(i64 %a, i64 %b, i64 %c, i64 %d) {
 ; CHECK-LABEL: select_icmp_ult_cmov:
 ; CHECK:       ## %bb.0:
-; CHECK-NEXT:    cmpq %rsi, %rdi
-; CHECK-NEXT:    cmovaeq %rcx, %rdx
 ; CHECK-NEXT:    movq %rdx, %rax
+; CHECK-NEXT:    cmpq %rsi, %rdi
+; CHECK-NEXT:    cmovaeq %rcx, %rax
 ; CHECK-NEXT:    retq
   %1 = icmp ult i64 %a, %b
   %2 = select i1 %1, i64 %c, i64 %d
@@ -384,9 +384,9 @@ define i64 @select_icmp_ult_cmov(i64 %a,
 define i64 @select_icmp_ule_cmov(i64 %a, i64 %b, i64 %c, i64 %d) {
 ; CHECK-LABEL: select_icmp_ule_cmov:
 ; CHECK:       ## %bb.0:
-; CHECK-NEXT:    cmpq %rsi, %rdi
-; CHECK-NEXT:    cmovaq %rcx, %rdx
 ; CHECK-NEXT:    movq %rdx, %rax
+; CHECK-NEXT:    cmpq %rsi, %rdi
+; CHECK-NEXT:    cmovaq %rcx, %rax
 ; CHECK-NEXT:    retq
   %1 = icmp ule i64 %a, %b
   %2 = select i1 %1, i64 %c, i64 %d
@@ -396,9 +396,9 @@ define i64 @select_icmp_ule_cmov(i64 %a,
 define i64 @select_icmp_sgt_cmov(i64 %a, i64 %b, i64 %c, i64 %d) {
 ; CHECK-LABEL: select_icmp_sgt_cmov:
 ; CHECK:       ## %bb.0:
-; CHECK-NEXT:    cmpq %rsi, %rdi
-; CHECK-NEXT:    cmovleq %rcx, %rdx
 ; CHECK-NEXT:    movq %rdx, %rax
+; CHECK-NEXT:    cmpq %rsi, %rdi
+; CHECK-NEXT:    cmovleq %rcx, %rax
 ; CHECK-NEXT:    retq
   %1 = icmp sgt i64 %a, %b
   %2 = select i1 %1, i64 %c, i64 %d
@@ -408,9 +408,9 @@ define i64 @select_icmp_sgt_cmov(i64 %a,
 define i64 @select_icmp_sge_cmov(i64 %a, i64 %b, i64 %c, i64 %d) {
 ; CHECK-LABEL: select_icmp_sge_cmov:
 ; CHECK:       ## %bb.0:
-; CHECK-NEXT:    cmpq %rsi, %rdi
-; CHECK-NEXT:    cmovlq %rcx, %rdx
 ; CHECK-NEXT:    movq %rdx, %rax
+; CHECK-NEXT:    cmpq %rsi, %rdi
+; CHECK-NEXT:    cmovlq %rcx, %rax
 ; CHECK-NEXT:    retq
   %1 = icmp sge i64 %a, %b
   %2 = select i1 %1, i64 %c, i64 %d
@@ -420,9 +420,9 @@ define i64 @select_icmp_sge_cmov(i64 %a,
 define i64 @select_icmp_slt_cmov(i64 %a, i64 %b, i64 %c, i64 %d) {
 ; CHECK-LABEL: select_icmp_slt_cmov:
 ; CHECK:       ## %bb.0:
-; CHECK-NEXT:    cmpq %rsi, %rdi
-; CHECK-NEXT:    cmovgeq %rcx, %rdx
 ; CHECK-NEXT:    movq %rdx, %rax
+; CHECK-NEXT:    cmpq %rsi, %rdi
+; CHECK-NEXT:    cmovgeq %rcx, %rax
 ; CHECK-NEXT:    retq
   %1 = icmp slt i64 %a, %b
   %2 = select i1 %1, i64 %c, i64 %d
@@ -432,9 +432,9 @@ define i64 @select_icmp_slt_cmov(i64 %a,
 define i64 @select_icmp_sle_cmov(i64 %a, i64 %b, i64 %c, i64 %d) {
 ; CHECK-LABEL: select_icmp_sle_cmov:
 ; CHECK:       ## %bb.0:
-; CHECK-NEXT:    cmpq %rsi, %rdi
-; CHECK-NEXT:    cmovgq %rcx, %rdx
 ; CHECK-NEXT:    movq %rdx, %rax
+; CHECK-NEXT:    cmpq %rsi, %rdi
+; CHECK-NEXT:    cmovgq %rcx, %rax
 ; CHECK-NEXT:    retq
   %1 = icmp sle i64 %a, %b
   %2 = select i1 %1, i64 %c, i64 %d

Modified: llvm/trunk/test/CodeGen/X86/fast-isel-select-pseudo-cmov.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fast-isel-select-pseudo-cmov.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fast-isel-select-pseudo-cmov.ll (original)
+++ llvm/trunk/test/CodeGen/X86/fast-isel-select-pseudo-cmov.ll Wed Sep 19 11:59:08 2018
@@ -281,11 +281,14 @@ define i8 @select_icmp_sle_i8(i64 %a, i6
 ; CHECK-LABEL: select_icmp_sle_i8:
 ; CHECK:       ## %bb.0:
 ; CHECK-NEXT:    cmpq %rsi, %rdi
-; CHECK-NEXT:    jle LBB12_2
-; CHECK-NEXT:  ## %bb.1:
-; CHECK-NEXT:    movl %ecx, %edx
-; CHECK-NEXT:  LBB12_2:
+; CHECK-NEXT:    jle LBB12_1
+; CHECK-NEXT:  ## %bb.2:
+; CHECK-NEXT:    movl %ecx, %eax
+; CHECK-NEXT:    ## kill: def $al killed $al killed $eax
+; CHECK-NEXT:    retq
+; CHECK-NEXT:  LBB12_1:
 ; CHECK-NEXT:    movl %edx, %eax
+; CHECK-NEXT:    ## kill: def $al killed $al killed $eax
 ; CHECK-NEXT:    retq
   %1 = icmp sle i64 %a, %b
   %2 = select i1 %1, i8 %c, i8 %d

Modified: llvm/trunk/test/CodeGen/X86/fast-isel-sext-zext.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fast-isel-sext-zext.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fast-isel-sext-zext.ll (original)
+++ llvm/trunk/test/CodeGen/X86/fast-isel-sext-zext.ll Wed Sep 19 11:59:08 2018
@@ -12,9 +12,10 @@ define i8 @test1(i8 %x) nounwind {
 ;
 ; X64-LABEL: test1:
 ; X64:       ## %bb.0:
-; X64-NEXT:    andb $1, %dil
-; X64-NEXT:    negb %dil
 ; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    andb $1, %al
+; X64-NEXT:    negb %al
+; X64-NEXT:    ## kill: def $al killed $al killed $eax
 ; X64-NEXT:    retq
   %z = trunc i8 %x to i1
   %u = sext i1 %z to i8
@@ -92,8 +93,9 @@ define i8 @test5(i8 %x) nounwind {
 ;
 ; X64-LABEL: test5:
 ; X64:       ## %bb.0:
-; X64-NEXT:    andb $1, %dil
 ; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    andb $1, %al
+; X64-NEXT:    ## kill: def $al killed $al killed $eax
 ; X64-NEXT:    retq
   %z = trunc i8 %x to i1
   %u = zext i1 %z to i8

Modified: llvm/trunk/test/CodeGen/X86/fast-isel-shift.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fast-isel-shift.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fast-isel-shift.ll (original)
+++ llvm/trunk/test/CodeGen/X86/fast-isel-shift.ll Wed Sep 19 11:59:08 2018
@@ -5,8 +5,10 @@ define i8 @shl_i8(i8 %a, i8 %b) {
 ; CHECK-LABEL: shl_i8:
 ; CHECK:       ## %bb.0:
 ; CHECK-NEXT:    movl %esi, %ecx
-; CHECK-NEXT:    shlb %cl, %dil
 ; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    ## kill: def $cl killed $cl killed $ecx
+; CHECK-NEXT:    shlb %cl, %al
+; CHECK-NEXT:    ## kill: def $al killed $al killed $eax
 ; CHECK-NEXT:    retq
   %c = shl i8 %a, %b
   ret i8 %c
@@ -16,9 +18,11 @@ define i16 @shl_i16(i16 %a, i16 %b) {
 ; CHECK-LABEL: shl_i16:
 ; CHECK:       ## %bb.0:
 ; CHECK-NEXT:    movl %esi, %ecx
-; CHECK-NEXT:    ## kill: def $cl killed $cx
-; CHECK-NEXT:    shlw %cl, %di
 ; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    ## kill: def $cx killed $cx killed $ecx
+; CHECK-NEXT:    ## kill: def $cl killed $cx
+; CHECK-NEXT:    shlw %cl, %ax
+; CHECK-NEXT:    ## kill: def $ax killed $ax killed $eax
 ; CHECK-NEXT:    retq
   %c = shl i16 %a, %b
   ret i16 %c
@@ -28,9 +32,9 @@ define i32 @shl_i32(i32 %a, i32 %b) {
 ; CHECK-LABEL: shl_i32:
 ; CHECK:       ## %bb.0:
 ; CHECK-NEXT:    movl %esi, %ecx
-; CHECK-NEXT:    ## kill: def $cl killed $ecx
-; CHECK-NEXT:    shll %cl, %edi
 ; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    ## kill: def $cl killed $ecx
+; CHECK-NEXT:    shll %cl, %eax
 ; CHECK-NEXT:    retq
   %c = shl i32 %a, %b
   ret i32 %c
@@ -40,9 +44,9 @@ define i64 @shl_i64(i64 %a, i64 %b) {
 ; CHECK-LABEL: shl_i64:
 ; CHECK:       ## %bb.0:
 ; CHECK-NEXT:    movq %rsi, %rcx
-; CHECK-NEXT:    ## kill: def $cl killed $rcx
-; CHECK-NEXT:    shlq %cl, %rdi
 ; CHECK-NEXT:    movq %rdi, %rax
+; CHECK-NEXT:    ## kill: def $cl killed $rcx
+; CHECK-NEXT:    shlq %cl, %rax
 ; CHECK-NEXT:    retq
   %c = shl i64 %a, %b
   ret i64 %c
@@ -52,8 +56,10 @@ define i8 @lshr_i8(i8 %a, i8 %b) {
 ; CHECK-LABEL: lshr_i8:
 ; CHECK:       ## %bb.0:
 ; CHECK-NEXT:    movl %esi, %ecx
-; CHECK-NEXT:    shrb %cl, %dil
 ; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    ## kill: def $cl killed $cl killed $ecx
+; CHECK-NEXT:    shrb %cl, %al
+; CHECK-NEXT:    ## kill: def $al killed $al killed $eax
 ; CHECK-NEXT:    retq
   %c = lshr i8 %a, %b
   ret i8 %c
@@ -63,9 +69,11 @@ define i16 @lshr_i16(i16 %a, i16 %b) {
 ; CHECK-LABEL: lshr_i16:
 ; CHECK:       ## %bb.0:
 ; CHECK-NEXT:    movl %esi, %ecx
-; CHECK-NEXT:    ## kill: def $cl killed $cx
-; CHECK-NEXT:    shrw %cl, %di
 ; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    ## kill: def $cx killed $cx killed $ecx
+; CHECK-NEXT:    ## kill: def $cl killed $cx
+; CHECK-NEXT:    shrw %cl, %ax
+; CHECK-NEXT:    ## kill: def $ax killed $ax killed $eax
 ; CHECK-NEXT:    retq
   %c = lshr i16 %a, %b
   ret i16 %c
@@ -75,9 +83,9 @@ define i32 @lshr_i32(i32 %a, i32 %b) {
 ; CHECK-LABEL: lshr_i32:
 ; CHECK:       ## %bb.0:
 ; CHECK-NEXT:    movl %esi, %ecx
-; CHECK-NEXT:    ## kill: def $cl killed $ecx
-; CHECK-NEXT:    shrl %cl, %edi
 ; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    ## kill: def $cl killed $ecx
+; CHECK-NEXT:    shrl %cl, %eax
 ; CHECK-NEXT:    retq
   %c = lshr i32 %a, %b
   ret i32 %c
@@ -87,9 +95,9 @@ define i64 @lshr_i64(i64 %a, i64 %b) {
 ; CHECK-LABEL: lshr_i64:
 ; CHECK:       ## %bb.0:
 ; CHECK-NEXT:    movq %rsi, %rcx
-; CHECK-NEXT:    ## kill: def $cl killed $rcx
-; CHECK-NEXT:    shrq %cl, %rdi
 ; CHECK-NEXT:    movq %rdi, %rax
+; CHECK-NEXT:    ## kill: def $cl killed $rcx
+; CHECK-NEXT:    shrq %cl, %rax
 ; CHECK-NEXT:    retq
   %c = lshr i64 %a, %b
   ret i64 %c
@@ -99,8 +107,10 @@ define i8 @ashr_i8(i8 %a, i8 %b) {
 ; CHECK-LABEL: ashr_i8:
 ; CHECK:       ## %bb.0:
 ; CHECK-NEXT:    movl %esi, %ecx
-; CHECK-NEXT:    sarb %cl, %dil
 ; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    ## kill: def $cl killed $cl killed $ecx
+; CHECK-NEXT:    sarb %cl, %al
+; CHECK-NEXT:    ## kill: def $al killed $al killed $eax
 ; CHECK-NEXT:    retq
   %c = ashr i8 %a, %b
   ret i8 %c
@@ -110,9 +120,11 @@ define i16 @ashr_i16(i16 %a, i16 %b) {
 ; CHECK-LABEL: ashr_i16:
 ; CHECK:       ## %bb.0:
 ; CHECK-NEXT:    movl %esi, %ecx
-; CHECK-NEXT:    ## kill: def $cl killed $cx
-; CHECK-NEXT:    sarw %cl, %di
 ; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    ## kill: def $cx killed $cx killed $ecx
+; CHECK-NEXT:    ## kill: def $cl killed $cx
+; CHECK-NEXT:    sarw %cl, %ax
+; CHECK-NEXT:    ## kill: def $ax killed $ax killed $eax
 ; CHECK-NEXT:    retq
   %c = ashr i16 %a, %b
   ret i16 %c
@@ -122,9 +134,9 @@ define i32 @ashr_i32(i32 %a, i32 %b) {
 ; CHECK-LABEL: ashr_i32:
 ; CHECK:       ## %bb.0:
 ; CHECK-NEXT:    movl %esi, %ecx
-; CHECK-NEXT:    ## kill: def $cl killed $ecx
-; CHECK-NEXT:    sarl %cl, %edi
 ; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    ## kill: def $cl killed $ecx
+; CHECK-NEXT:    sarl %cl, %eax
 ; CHECK-NEXT:    retq
   %c = ashr i32 %a, %b
   ret i32 %c
@@ -134,9 +146,9 @@ define i64 @ashr_i64(i64 %a, i64 %b) {
 ; CHECK-LABEL: ashr_i64:
 ; CHECK:       ## %bb.0:
 ; CHECK-NEXT:    movq %rsi, %rcx
-; CHECK-NEXT:    ## kill: def $cl killed $rcx
-; CHECK-NEXT:    sarq %cl, %rdi
 ; CHECK-NEXT:    movq %rdi, %rax
+; CHECK-NEXT:    ## kill: def $cl killed $rcx
+; CHECK-NEXT:    sarq %cl, %rax
 ; CHECK-NEXT:    retq
   %c = ashr i64 %a, %b
   ret i64 %c
@@ -145,8 +157,9 @@ define i64 @ashr_i64(i64 %a, i64 %b) {
 define i8 @shl_imm1_i8(i8 %a) {
 ; CHECK-LABEL: shl_imm1_i8:
 ; CHECK:       ## %bb.0:
-; CHECK-NEXT:    shlb $1, %dil
 ; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    shlb $1, %al
+; CHECK-NEXT:    ## kill: def $al killed $al killed $eax
 ; CHECK-NEXT:    retq
   %c = shl i8 %a, 1
   ret i8 %c
@@ -185,8 +198,9 @@ define i64 @shl_imm1_i64(i64 %a) {
 define i8 @lshr_imm1_i8(i8 %a) {
 ; CHECK-LABEL: lshr_imm1_i8:
 ; CHECK:       ## %bb.0:
-; CHECK-NEXT:    shrb $1, %dil
 ; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    shrb $1, %al
+; CHECK-NEXT:    ## kill: def $al killed $al killed $eax
 ; CHECK-NEXT:    retq
   %c = lshr i8 %a, 1
   ret i8 %c
@@ -195,8 +209,9 @@ define i8 @lshr_imm1_i8(i8 %a) {
 define i16 @lshr_imm1_i16(i16 %a) {
 ; CHECK-LABEL: lshr_imm1_i16:
 ; CHECK:       ## %bb.0:
-; CHECK-NEXT:    shrw $1, %di
 ; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    shrw $1, %ax
+; CHECK-NEXT:    ## kill: def $ax killed $ax killed $eax
 ; CHECK-NEXT:    retq
   %c = lshr i16 %a, 1
   ret i16 %c
@@ -205,8 +220,8 @@ define i16 @lshr_imm1_i16(i16 %a) {
 define i32 @lshr_imm1_i32(i32 %a) {
 ; CHECK-LABEL: lshr_imm1_i32:
 ; CHECK:       ## %bb.0:
-; CHECK-NEXT:    shrl $1, %edi
 ; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    shrl $1, %eax
 ; CHECK-NEXT:    retq
   %c = lshr i32 %a, 1
   ret i32 %c
@@ -215,8 +230,8 @@ define i32 @lshr_imm1_i32(i32 %a) {
 define i64 @lshr_imm1_i64(i64 %a) {
 ; CHECK-LABEL: lshr_imm1_i64:
 ; CHECK:       ## %bb.0:
-; CHECK-NEXT:    shrq $1, %rdi
 ; CHECK-NEXT:    movq %rdi, %rax
+; CHECK-NEXT:    shrq $1, %rax
 ; CHECK-NEXT:    retq
   %c = lshr i64 %a, 1
   ret i64 %c
@@ -225,8 +240,9 @@ define i64 @lshr_imm1_i64(i64 %a) {
 define i8 @ashr_imm1_i8(i8 %a) {
 ; CHECK-LABEL: ashr_imm1_i8:
 ; CHECK:       ## %bb.0:
-; CHECK-NEXT:    sarb $1, %dil
 ; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    sarb $1, %al
+; CHECK-NEXT:    ## kill: def $al killed $al killed $eax
 ; CHECK-NEXT:    retq
   %c = ashr i8 %a, 1
   ret i8 %c
@@ -235,8 +251,9 @@ define i8 @ashr_imm1_i8(i8 %a) {
 define i16 @ashr_imm1_i16(i16 %a) {
 ; CHECK-LABEL: ashr_imm1_i16:
 ; CHECK:       ## %bb.0:
-; CHECK-NEXT:    sarw $1, %di
 ; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    sarw $1, %ax
+; CHECK-NEXT:    ## kill: def $ax killed $ax killed $eax
 ; CHECK-NEXT:    retq
   %c = ashr i16 %a, 1
   ret i16 %c
@@ -245,8 +262,8 @@ define i16 @ashr_imm1_i16(i16 %a) {
 define i32 @ashr_imm1_i32(i32 %a) {
 ; CHECK-LABEL: ashr_imm1_i32:
 ; CHECK:       ## %bb.0:
-; CHECK-NEXT:    sarl $1, %edi
 ; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    sarl $1, %eax
 ; CHECK-NEXT:    retq
   %c = ashr i32 %a, 1
   ret i32 %c
@@ -255,8 +272,8 @@ define i32 @ashr_imm1_i32(i32 %a) {
 define i64 @ashr_imm1_i64(i64 %a) {
 ; CHECK-LABEL: ashr_imm1_i64:
 ; CHECK:       ## %bb.0:
-; CHECK-NEXT:    sarq $1, %rdi
 ; CHECK-NEXT:    movq %rdi, %rax
+; CHECK-NEXT:    sarq $1, %rax
 ; CHECK-NEXT:    retq
   %c = ashr i64 %a, 1
   ret i64 %c
@@ -265,8 +282,9 @@ define i64 @ashr_imm1_i64(i64 %a) {
 define i8 @shl_imm4_i8(i8 %a) {
 ; CHECK-LABEL: shl_imm4_i8:
 ; CHECK:       ## %bb.0:
-; CHECK-NEXT:    shlb $4, %dil
 ; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    shlb $4, %al
+; CHECK-NEXT:    ## kill: def $al killed $al killed $eax
 ; CHECK-NEXT:    retq
   %c = shl i8 %a, 4
   ret i8 %c
@@ -275,8 +293,9 @@ define i8 @shl_imm4_i8(i8 %a) {
 define i16 @shl_imm4_i16(i16 %a) {
 ; CHECK-LABEL: shl_imm4_i16:
 ; CHECK:       ## %bb.0:
-; CHECK-NEXT:    shlw $4, %di
 ; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    shlw $4, %ax
+; CHECK-NEXT:    ## kill: def $ax killed $ax killed $eax
 ; CHECK-NEXT:    retq
   %c = shl i16 %a, 4
   ret i16 %c
@@ -285,8 +304,8 @@ define i16 @shl_imm4_i16(i16 %a) {
 define i32 @shl_imm4_i32(i32 %a) {
 ; CHECK-LABEL: shl_imm4_i32:
 ; CHECK:       ## %bb.0:
-; CHECK-NEXT:    shll $4, %edi
 ; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    shll $4, %eax
 ; CHECK-NEXT:    retq
   %c = shl i32 %a, 4
   ret i32 %c
@@ -295,8 +314,8 @@ define i32 @shl_imm4_i32(i32 %a) {
 define i64 @shl_imm4_i64(i64 %a) {
 ; CHECK-LABEL: shl_imm4_i64:
 ; CHECK:       ## %bb.0:
-; CHECK-NEXT:    shlq $4, %rdi
 ; CHECK-NEXT:    movq %rdi, %rax
+; CHECK-NEXT:    shlq $4, %rax
 ; CHECK-NEXT:    retq
   %c = shl i64 %a, 4
   ret i64 %c
@@ -305,8 +324,9 @@ define i64 @shl_imm4_i64(i64 %a) {
 define i8 @lshr_imm4_i8(i8 %a) {
 ; CHECK-LABEL: lshr_imm4_i8:
 ; CHECK:       ## %bb.0:
-; CHECK-NEXT:    shrb $4, %dil
 ; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    shrb $4, %al
+; CHECK-NEXT:    ## kill: def $al killed $al killed $eax
 ; CHECK-NEXT:    retq
   %c = lshr i8 %a, 4
   ret i8 %c
@@ -315,8 +335,9 @@ define i8 @lshr_imm4_i8(i8 %a) {
 define i16 @lshr_imm4_i16(i16 %a) {
 ; CHECK-LABEL: lshr_imm4_i16:
 ; CHECK:       ## %bb.0:
-; CHECK-NEXT:    shrw $4, %di
 ; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    shrw $4, %ax
+; CHECK-NEXT:    ## kill: def $ax killed $ax killed $eax
 ; CHECK-NEXT:    retq
   %c = lshr i16 %a, 4
   ret i16 %c
@@ -325,8 +346,8 @@ define i16 @lshr_imm4_i16(i16 %a) {
 define i32 @lshr_imm4_i32(i32 %a) {
 ; CHECK-LABEL: lshr_imm4_i32:
 ; CHECK:       ## %bb.0:
-; CHECK-NEXT:    shrl $4, %edi
 ; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    shrl $4, %eax
 ; CHECK-NEXT:    retq
   %c = lshr i32 %a, 4
   ret i32 %c
@@ -335,8 +356,8 @@ define i32 @lshr_imm4_i32(i32 %a) {
 define i64 @lshr_imm4_i64(i64 %a) {
 ; CHECK-LABEL: lshr_imm4_i64:
 ; CHECK:       ## %bb.0:
-; CHECK-NEXT:    shrq $4, %rdi
 ; CHECK-NEXT:    movq %rdi, %rax
+; CHECK-NEXT:    shrq $4, %rax
 ; CHECK-NEXT:    retq
   %c = lshr i64 %a, 4
   ret i64 %c
@@ -345,8 +366,9 @@ define i64 @lshr_imm4_i64(i64 %a) {
 define i8 @ashr_imm4_i8(i8 %a) {
 ; CHECK-LABEL: ashr_imm4_i8:
 ; CHECK:       ## %bb.0:
-; CHECK-NEXT:    sarb $4, %dil
 ; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    sarb $4, %al
+; CHECK-NEXT:    ## kill: def $al killed $al killed $eax
 ; CHECK-NEXT:    retq
   %c = ashr i8 %a, 4
   ret i8 %c
@@ -355,8 +377,9 @@ define i8 @ashr_imm4_i8(i8 %a) {
 define i16 @ashr_imm4_i16(i16 %a) {
 ; CHECK-LABEL: ashr_imm4_i16:
 ; CHECK:       ## %bb.0:
-; CHECK-NEXT:    sarw $4, %di
 ; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    sarw $4, %ax
+; CHECK-NEXT:    ## kill: def $ax killed $ax killed $eax
 ; CHECK-NEXT:    retq
   %c = ashr i16 %a, 4
   ret i16 %c
@@ -365,8 +388,8 @@ define i16 @ashr_imm4_i16(i16 %a) {
 define i32 @ashr_imm4_i32(i32 %a) {
 ; CHECK-LABEL: ashr_imm4_i32:
 ; CHECK:       ## %bb.0:
-; CHECK-NEXT:    sarl $4, %edi
 ; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    sarl $4, %eax
 ; CHECK-NEXT:    retq
   %c = ashr i32 %a, 4
   ret i32 %c
@@ -375,8 +398,8 @@ define i32 @ashr_imm4_i32(i32 %a) {
 define i64 @ashr_imm4_i64(i64 %a) {
 ; CHECK-LABEL: ashr_imm4_i64:
 ; CHECK:       ## %bb.0:
-; CHECK-NEXT:    sarq $4, %rdi
 ; CHECK-NEXT:    movq %rdi, %rax
+; CHECK-NEXT:    sarq $4, %rax
 ; CHECK-NEXT:    retq
   %c = ashr i64 %a, 4
   ret i64 %c
@@ -386,9 +409,10 @@ define i64 @ashr_imm4_i64(i64 %a) {
 define i8 @PR36731(i8 %a) {
 ; CHECK-LABEL: PR36731:
 ; CHECK:       ## %bb.0:
-; CHECK-NEXT:    movb $255, %cl
-; CHECK-NEXT:    shlb %cl, %dil
 ; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    movb $255, %cl
+; CHECK-NEXT:    shlb %cl, %al
+; CHECK-NEXT:    ## kill: def $al killed $al killed $eax
 ; CHECK-NEXT:    retq
   %b = shl i8 %a, -1
   ret i8 %b

Modified: llvm/trunk/test/CodeGen/X86/fast-isel-store.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fast-isel-store.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fast-isel-store.ll (original)
+++ llvm/trunk/test/CodeGen/X86/fast-isel-store.ll Wed Sep 19 11:59:08 2018
@@ -11,8 +11,8 @@
 define i32 @test_store_32(i32* nocapture %addr, i32 %value) {
 ; ALL32-LABEL: test_store_32:
 ; ALL32:       # %bb.0: # %entry
-; ALL32-NEXT:    movl %esi, (%rdi)
 ; ALL32-NEXT:    movl %esi, %eax
+; ALL32-NEXT:    movl %esi, (%rdi)
 ; ALL32-NEXT:    retq
 ;
 ; ALL64-LABEL: test_store_32:
@@ -29,8 +29,9 @@ entry:
 define i16 @test_store_16(i16* nocapture %addr, i16 %value) {
 ; ALL32-LABEL: test_store_16:
 ; ALL32:       # %bb.0: # %entry
-; ALL32-NEXT:    movw %si, (%rdi)
 ; ALL32-NEXT:    movl %esi, %eax
+; ALL32-NEXT:    movw %ax, (%rdi)
+; ALL32-NEXT:    # kill: def $ax killed $ax killed $eax
 ; ALL32-NEXT:    retq
 ;
 ; ALL64-LABEL: test_store_16:

Modified: llvm/trunk/test/CodeGen/X86/fixup-bw-copy.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fixup-bw-copy.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fixup-bw-copy.ll (original)
+++ llvm/trunk/test/CodeGen/X86/fixup-bw-copy.ll Wed Sep 19 11:59:08 2018
@@ -7,15 +7,11 @@
 target datalayout = "e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128"
 
 define i8 @test_movb(i8 %a0) {
-; BWON64-LABEL: test_movb:
-; BWON64:       # %bb.0:
-; BWON64-NEXT:    movl %edi, %eax
-; BWON64-NEXT:    retq
-;
-; BWOFF64-LABEL: test_movb:
-; BWOFF64:       # %bb.0:
-; BWOFF64-NEXT:    movb %dil, %al
-; BWOFF64-NEXT:    retq
+; X64-LABEL: test_movb:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    # kill: def $al killed $al killed $eax
+; X64-NEXT:    retq
 ;
 ; X32-LABEL: test_movb:
 ; X32:       # %bb.0:
@@ -25,15 +21,11 @@ define i8 @test_movb(i8 %a0) {
 }
 
 define i16 @test_movw(i16 %a0) {
-; BWON64-LABEL: test_movw:
-; BWON64:       # %bb.0:
-; BWON64-NEXT:    movl %edi, %eax
-; BWON64-NEXT:    retq
-;
-; BWOFF64-LABEL: test_movw:
-; BWOFF64:       # %bb.0:
-; BWOFF64-NEXT:    movw %di, %ax
-; BWOFF64-NEXT:    retq
+; X64-LABEL: test_movw:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    retq
 ;
 ; BWON32-LABEL: test_movw:
 ; BWON32:       # %bb.0:

Modified: llvm/trunk/test/CodeGen/X86/fma.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fma.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fma.ll (original)
+++ llvm/trunk/test/CodeGen/X86/fma.ll Wed Sep 19 11:59:08 2018
@@ -1351,14 +1351,13 @@ define <16 x float> @test_v16f32(<16 x f
 ; FMACALL64-NEXT:    ## xmm2 = xmm2[1,1,2,3]
 ; FMACALL64-NEXT:    callq _fmaf ## encoding: [0xe8,A,A,A,A]
 ; FMACALL64-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: reloc_branch_4byte_pcrel
-; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
-; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x4c,0x24,0x40]
-; FMACALL64-NEXT:    unpcklps %xmm0, %xmm1 ## encoding: [0x0f,0x14,0xc8]
-; FMACALL64-NEXT:    ## xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; FMACALL64-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Folded Reload
-; FMACALL64-NEXT:    ## encoding: [0x66,0x0f,0x14,0x4c,0x24,0x20]
-; FMACALL64-NEXT:    ## xmm1 = xmm1[0],mem[0]
-; FMACALL64-NEXT:    movaps %xmm1, %xmm3 ## encoding: [0x0f,0x28,0xd9]
+; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm3 ## 16-byte Reload
+; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x5c,0x24,0x40]
+; FMACALL64-NEXT:    unpcklps %xmm0, %xmm3 ## encoding: [0x0f,0x14,0xd8]
+; FMACALL64-NEXT:    ## xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
+; FMACALL64-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm3 ## 16-byte Folded Reload
+; FMACALL64-NEXT:    ## encoding: [0x66,0x0f,0x14,0x5c,0x24,0x20]
+; FMACALL64-NEXT:    ## xmm3 = xmm3[0],mem[0]
 ; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x44,0x24,0x60]
 ; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload

Modified: llvm/trunk/test/CodeGen/X86/fold-vector-sext-crash2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fold-vector-sext-crash2.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fold-vector-sext-crash2.ll (original)
+++ llvm/trunk/test/CodeGen/X86/fold-vector-sext-crash2.ll Wed Sep 19 11:59:08 2018
@@ -28,6 +28,7 @@ define <2 x i256> @test_sext1() {
 ;
 ; X64-LABEL: test_sext1:
 ; X64:       # %bb.0:
+; X64-NEXT:    movq %rdi, %rax
 ; X64-NEXT:    xorps %xmm0, %xmm0
 ; X64-NEXT:    movaps %xmm0, 16(%rdi)
 ; X64-NEXT:    movaps %xmm0, (%rdi)
@@ -35,7 +36,6 @@ define <2 x i256> @test_sext1() {
 ; X64-NEXT:    movq $-1, 48(%rdi)
 ; X64-NEXT:    movq $-1, 40(%rdi)
 ; X64-NEXT:    movq $-99, 32(%rdi)
-; X64-NEXT:    movq %rdi, %rax
 ; X64-NEXT:    retq
   %Se = sext <2 x i8> <i8 -100, i8 -99> to <2 x i256>
   %Shuff = shufflevector <2 x i256> zeroinitializer, <2 x i256> %Se, <2 x i32> <i32 1, i32 3>
@@ -66,6 +66,7 @@ define <2 x i256> @test_sext2() {
 ;
 ; X64-LABEL: test_sext2:
 ; X64:       # %bb.0:
+; X64-NEXT:    movq %rdi, %rax
 ; X64-NEXT:    xorps %xmm0, %xmm0
 ; X64-NEXT:    movaps %xmm0, 16(%rdi)
 ; X64-NEXT:    movaps %xmm0, (%rdi)
@@ -73,7 +74,6 @@ define <2 x i256> @test_sext2() {
 ; X64-NEXT:    movq $-1, 48(%rdi)
 ; X64-NEXT:    movq $-1, 40(%rdi)
 ; X64-NEXT:    movq $-1999, 32(%rdi) # imm = 0xF831
-; X64-NEXT:    movq %rdi, %rax
 ; X64-NEXT:    retq
   %Se = sext <2 x i128> <i128 -2000, i128 -1999> to <2 x i256>
   %Shuff = shufflevector <2 x i256> zeroinitializer, <2 x i256> %Se, <2 x i32> <i32 1, i32 3>
@@ -104,13 +104,13 @@ define <2 x i256> @test_zext1() {
 ;
 ; X64-LABEL: test_zext1:
 ; X64:       # %bb.0:
+; X64-NEXT:    movq %rdi, %rax
 ; X64-NEXT:    xorps %xmm0, %xmm0
 ; X64-NEXT:    movaps %xmm0, 48(%rdi)
 ; X64-NEXT:    movaps %xmm0, 16(%rdi)
 ; X64-NEXT:    movaps %xmm0, (%rdi)
 ; X64-NEXT:    movq $0, 40(%rdi)
 ; X64-NEXT:    movq $254, 32(%rdi)
-; X64-NEXT:    movq %rdi, %rax
 ; X64-NEXT:    retq
   %Se = zext <2 x i8> <i8 -1, i8 -2> to <2 x i256>
   %Shuff = shufflevector <2 x i256> zeroinitializer, <2 x i256> %Se, <2 x i32> <i32 1, i32 3>
@@ -141,13 +141,13 @@ define <2 x i256> @test_zext2() {
 ;
 ; X64-LABEL: test_zext2:
 ; X64:       # %bb.0:
+; X64-NEXT:    movq %rdi, %rax
 ; X64-NEXT:    xorps %xmm0, %xmm0
 ; X64-NEXT:    movaps %xmm0, 48(%rdi)
 ; X64-NEXT:    movaps %xmm0, 16(%rdi)
 ; X64-NEXT:    movaps %xmm0, (%rdi)
 ; X64-NEXT:    movq $-1, 40(%rdi)
 ; X64-NEXT:    movq $-2, 32(%rdi)
-; X64-NEXT:    movq %rdi, %rax
 ; X64-NEXT:    retq
   %Se = zext <2 x i128> <i128 -1, i128 -2> to <2 x i256>
   %Shuff = shufflevector <2 x i256> zeroinitializer, <2 x i256> %Se, <2 x i32> <i32 1, i32 3>

Modified: llvm/trunk/test/CodeGen/X86/funnel-shift-rot.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/funnel-shift-rot.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/funnel-shift-rot.ll (original)
+++ llvm/trunk/test/CodeGen/X86/funnel-shift-rot.ll Wed Sep 19 11:59:08 2018
@@ -25,8 +25,9 @@ define i8 @rotl_i8_const_shift(i8 %x) no
 ;
 ; X64-AVX2-LABEL: rotl_i8_const_shift:
 ; X64-AVX2:       # %bb.0:
-; X64-AVX2-NEXT:    rolb $3, %dil
 ; X64-AVX2-NEXT:    movl %edi, %eax
+; X64-AVX2-NEXT:    rolb $3, %al
+; X64-AVX2-NEXT:    # kill: def $al killed $al killed $eax
 ; X64-AVX2-NEXT:    retq
   %f = call i8 @llvm.fshl.i8(i8 %x, i8 %x, i8 3)
   ret i8 %f
@@ -44,8 +45,8 @@ define i64 @rotl_i64_const_shift(i64 %x)
 ;
 ; X64-AVX2-LABEL: rotl_i64_const_shift:
 ; X64-AVX2:       # %bb.0:
-; X64-AVX2-NEXT:    rolq $3, %rdi
 ; X64-AVX2-NEXT:    movq %rdi, %rax
+; X64-AVX2-NEXT:    rolq $3, %rax
 ; X64-AVX2-NEXT:    retq
   %f = call i64 @llvm.fshl.i64(i64 %x, i64 %x, i64 3)
   ret i64 %f
@@ -62,8 +63,10 @@ define i16 @rotl_i16(i16 %x, i16 %z) nou
 ; X64-AVX2-LABEL: rotl_i16:
 ; X64-AVX2:       # %bb.0:
 ; X64-AVX2-NEXT:    movl %esi, %ecx
-; X64-AVX2-NEXT:    rolw %cl, %di
 ; X64-AVX2-NEXT:    movl %edi, %eax
+; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-AVX2-NEXT:    rolw %cl, %ax
+; X64-AVX2-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-AVX2-NEXT:    retq
   %f = call i16 @llvm.fshl.i16(i16 %x, i16 %x, i16 %z)
   ret i16 %f
@@ -80,8 +83,9 @@ define i32 @rotl_i32(i32 %x, i32 %z) nou
 ; X64-AVX2-LABEL: rotl_i32:
 ; X64-AVX2:       # %bb.0:
 ; X64-AVX2-NEXT:    movl %esi, %ecx
-; X64-AVX2-NEXT:    roll %cl, %edi
 ; X64-AVX2-NEXT:    movl %edi, %eax
+; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-AVX2-NEXT:    roll %cl, %eax
 ; X64-AVX2-NEXT:    retq
   %f = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 %z)
   ret i32 %f
@@ -174,8 +178,9 @@ define i8 @rotr_i8_const_shift(i8 %x) no
 ;
 ; X64-AVX2-LABEL: rotr_i8_const_shift:
 ; X64-AVX2:       # %bb.0:
-; X64-AVX2-NEXT:    rorb $3, %dil
 ; X64-AVX2-NEXT:    movl %edi, %eax
+; X64-AVX2-NEXT:    rorb $3, %al
+; X64-AVX2-NEXT:    # kill: def $al killed $al killed $eax
 ; X64-AVX2-NEXT:    retq
   %f = call i8 @llvm.fshr.i8(i8 %x, i8 %x, i8 3)
   ret i8 %f
@@ -190,8 +195,8 @@ define i32 @rotr_i32_const_shift(i32 %x)
 ;
 ; X64-AVX2-LABEL: rotr_i32_const_shift:
 ; X64-AVX2:       # %bb.0:
-; X64-AVX2-NEXT:    rorl $3, %edi
 ; X64-AVX2-NEXT:    movl %edi, %eax
+; X64-AVX2-NEXT:    rorl $3, %eax
 ; X64-AVX2-NEXT:    retq
   %f = call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 3)
   ret i32 %f
@@ -210,8 +215,10 @@ define i16 @rotr_i16(i16 %x, i16 %z) nou
 ; X64-AVX2-LABEL: rotr_i16:
 ; X64-AVX2:       # %bb.0:
 ; X64-AVX2-NEXT:    movl %esi, %ecx
-; X64-AVX2-NEXT:    rorw %cl, %di
 ; X64-AVX2-NEXT:    movl %edi, %eax
+; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-AVX2-NEXT:    rorw %cl, %ax
+; X64-AVX2-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-AVX2-NEXT:    retq
   %f = call i16 @llvm.fshr.i16(i16 %x, i16 %x, i16 %z)
   ret i16 %f
@@ -257,9 +264,10 @@ define i64 @rotr_i64(i64 %x, i64 %z) nou
 ;
 ; X64-AVX2-LABEL: rotr_i64:
 ; X64-AVX2:       # %bb.0:
-; X64-AVX2-NEXT:    movl %esi, %ecx
-; X64-AVX2-NEXT:    rorq %cl, %rdi
+; X64-AVX2-NEXT:    movq %rsi, %rcx
 ; X64-AVX2-NEXT:    movq %rdi, %rax
+; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-AVX2-NEXT:    rorq %cl, %rax
 ; X64-AVX2-NEXT:    retq
   %f = call i64 @llvm.fshr.i64(i64 %x, i64 %x, i64 %z)
   ret i64 %f

Modified: llvm/trunk/test/CodeGen/X86/funnel-shift.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/funnel-shift.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/funnel-shift.ll (original)
+++ llvm/trunk/test/CodeGen/X86/funnel-shift.ll Wed Sep 19 11:59:08 2018
@@ -33,11 +33,11 @@ define i32 @fshl_i32(i32 %x, i32 %y, i32
 ;
 ; X64-AVX2-LABEL: fshl_i32:
 ; X64-AVX2:       # %bb.0:
-; X64-AVX2-NEXT:    andl $31, %edx
-; X64-AVX2-NEXT:    movl %edi, %eax
 ; X64-AVX2-NEXT:    movl %edx, %ecx
+; X64-AVX2-NEXT:    andl $31, %ecx
+; X64-AVX2-NEXT:    movl %edi, %eax
 ; X64-AVX2-NEXT:    shldl %cl, %esi, %eax
-; X64-AVX2-NEXT:    testl %edx, %edx
+; X64-AVX2-NEXT:    testl %ecx, %ecx
 ; X64-AVX2-NEXT:    cmovel %edi, %eax
 ; X64-AVX2-NEXT:    retq
   %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %z)
@@ -152,8 +152,8 @@ define i32 @fshl_i32_const_shift(i32 %x,
 ;
 ; X64-AVX2-LABEL: fshl_i32_const_shift:
 ; X64-AVX2:       # %bb.0:
-; X64-AVX2-NEXT:    shldl $9, %esi, %edi
 ; X64-AVX2-NEXT:    movl %edi, %eax
+; X64-AVX2-NEXT:    shldl $9, %esi, %eax
 ; X64-AVX2-NEXT:    retq
   %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 9)
   ret i32 %f
@@ -171,8 +171,8 @@ define i32 @fshl_i32_const_overshift(i32
 ;
 ; X64-AVX2-LABEL: fshl_i32_const_overshift:
 ; X64-AVX2:       # %bb.0:
-; X64-AVX2-NEXT:    shldl $9, %esi, %edi
 ; X64-AVX2-NEXT:    movl %edi, %eax
+; X64-AVX2-NEXT:    shldl $9, %esi, %eax
 ; X64-AVX2-NEXT:    retq
   %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 41)
   ret i32 %f
@@ -192,8 +192,8 @@ define i64 @fshl_i64_const_overshift(i64
 ;
 ; X64-AVX2-LABEL: fshl_i64_const_overshift:
 ; X64-AVX2:       # %bb.0:
-; X64-AVX2-NEXT:    shldq $41, %rsi, %rdi
 ; X64-AVX2-NEXT:    movq %rdi, %rax
+; X64-AVX2-NEXT:    shldq $41, %rsi, %rax
 ; X64-AVX2-NEXT:    retq
   %f = call i64 @llvm.fshl.i64(i64 %x, i64 %y, i64 105)
   ret i64 %f
@@ -231,11 +231,11 @@ define i32 @fshr_i32(i32 %x, i32 %y, i32
 ;
 ; X64-AVX2-LABEL: fshr_i32:
 ; X64-AVX2:       # %bb.0:
-; X64-AVX2-NEXT:    andl $31, %edx
-; X64-AVX2-NEXT:    movl %esi, %eax
 ; X64-AVX2-NEXT:    movl %edx, %ecx
+; X64-AVX2-NEXT:    andl $31, %ecx
+; X64-AVX2-NEXT:    movl %esi, %eax
 ; X64-AVX2-NEXT:    shrdl %cl, %edi, %eax
-; X64-AVX2-NEXT:    testl %edx, %edx
+; X64-AVX2-NEXT:    testl %ecx, %ecx
 ; X64-AVX2-NEXT:    cmovel %esi, %eax
 ; X64-AVX2-NEXT:    retq
   %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 %z)
@@ -346,8 +346,8 @@ define i32 @fshr_i32_const_shift(i32 %x,
 ;
 ; X64-AVX2-LABEL: fshr_i32_const_shift:
 ; X64-AVX2:       # %bb.0:
-; X64-AVX2-NEXT:    shldl $23, %esi, %edi
 ; X64-AVX2-NEXT:    movl %edi, %eax
+; X64-AVX2-NEXT:    shldl $23, %esi, %eax
 ; X64-AVX2-NEXT:    retq
   %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 9)
   ret i32 %f
@@ -365,8 +365,8 @@ define i32 @fshr_i32_const_overshift(i32
 ;
 ; X64-AVX2-LABEL: fshr_i32_const_overshift:
 ; X64-AVX2:       # %bb.0:
-; X64-AVX2-NEXT:    shldl $23, %esi, %edi
 ; X64-AVX2-NEXT:    movl %edi, %eax
+; X64-AVX2-NEXT:    shldl $23, %esi, %eax
 ; X64-AVX2-NEXT:    retq
   %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 41)
   ret i32 %f
@@ -386,8 +386,8 @@ define i64 @fshr_i64_const_overshift(i64
 ;
 ; X64-AVX2-LABEL: fshr_i64_const_overshift:
 ; X64-AVX2:       # %bb.0:
-; X64-AVX2-NEXT:    shldq $23, %rsi, %rdi
 ; X64-AVX2-NEXT:    movq %rdi, %rax
+; X64-AVX2-NEXT:    shldq $23, %rsi, %rax
 ; X64-AVX2-NEXT:    retq
   %f = call i64 @llvm.fshr.i64(i64 %x, i64 %y, i64 105)
   ret i64 %f

Modified: llvm/trunk/test/CodeGen/X86/ghc-cc64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/ghc-cc64.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/ghc-cc64.ll (original)
+++ llvm/trunk/test/CodeGen/X86/ghc-cc64.ll Wed Sep 19 11:59:08 2018
@@ -22,8 +22,8 @@
 
 define void @zap(i64 %a, i64 %b) nounwind {
 entry:
-  ; CHECK:      movq %rdi, %r13
-  ; CHECK-NEXT: movq %rsi, %rbp
+  ; CHECK:      movq %rsi, %rbp
+  ; CHECK-NEXT: movq %rdi, %r13
   ; CHECK-NEXT: callq addtwo
   %0 = call ghccc i64 @addtwo(i64 %a, i64 %b)
   ; CHECK:      callq foo

Modified: llvm/trunk/test/CodeGen/X86/hipe-cc64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/hipe-cc64.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/hipe-cc64.ll (original)
+++ llvm/trunk/test/CodeGen/X86/hipe-cc64.ll Wed Sep 19 11:59:08 2018
@@ -4,11 +4,10 @@
 
 define void @zap(i64 %a, i64 %b) nounwind {
 entry:
-  ; CHECK:      movq %rsi, %rax
+  ; CHECK:      movq %rsi, %rdx
   ; CHECK-NEXT: movl $8, %ecx
   ; CHECK-NEXT: movl $9, %r8d
   ; CHECK-NEXT: movq %rdi, %rsi
-  ; CHECK-NEXT: movq %rax, %rdx
   ; CHECK-NEXT: callq addfour
   %0 = call cc 11 {i64, i64, i64} @addfour(i64 undef, i64 undef, i64 %a, i64 %b, i64 8, i64 9)
   %res = extractvalue {i64, i64, i64} %0, 2

Modified: llvm/trunk/test/CodeGen/X86/i128-mul.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/i128-mul.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/i128-mul.ll (original)
+++ llvm/trunk/test/CodeGen/X86/i128-mul.ll Wed Sep 19 11:59:08 2018
@@ -336,17 +336,17 @@ define i64 @mul1(i64 %n, i64* nocapture
 ; X64-BMI-NEXT:    movq %rcx, %r8
 ; X64-BMI-NEXT:    movq %rdx, %r9
 ; X64-BMI-NEXT:    xorl %r10d, %r10d
-; X64-BMI-NEXT:    xorl %eax, %eax
+; X64-BMI-NEXT:    xorl %ecx, %ecx
 ; X64-BMI-NEXT:    .p2align 4, 0x90
 ; X64-BMI-NEXT:  .LBB1_2: # %for.body
 ; X64-BMI-NEXT:    # =>This Inner Loop Header: Depth=1
 ; X64-BMI-NEXT:    movq %r8, %rdx
-; X64-BMI-NEXT:    mulxq (%r9,%rax,8), %rcx, %rdx
-; X64-BMI-NEXT:    addq %r10, %rcx
+; X64-BMI-NEXT:    mulxq (%r9,%rcx,8), %rax, %rdx
+; X64-BMI-NEXT:    addq %r10, %rax
 ; X64-BMI-NEXT:    adcq $0, %rdx
-; X64-BMI-NEXT:    movq %rcx, (%rsi,%rax,8)
-; X64-BMI-NEXT:    incq %rax
-; X64-BMI-NEXT:    cmpq %rax, %rdi
+; X64-BMI-NEXT:    movq %rax, (%rsi,%rcx,8)
+; X64-BMI-NEXT:    incq %rcx
+; X64-BMI-NEXT:    cmpq %rcx, %rdi
 ; X64-BMI-NEXT:    movq %rdx, %r10
 ; X64-BMI-NEXT:    jne .LBB1_2
 ; X64-BMI-NEXT:  .LBB1_3: # %for.end

Modified: llvm/trunk/test/CodeGen/X86/iabs.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/iabs.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/iabs.ll (original)
+++ llvm/trunk/test/CodeGen/X86/iabs.ll Wed Sep 19 11:59:08 2018
@@ -22,10 +22,11 @@ define i8 @test_i8(i8 %a) nounwind {
 ; X64-LABEL: test_i8:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    sarb $7, %al
-; X64-NEXT:    addb %al, %dil
-; X64-NEXT:    xorb %al, %dil
-; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    movl %eax, %ecx
+; X64-NEXT:    sarb $7, %cl
+; X64-NEXT:    addb %cl, %al
+; X64-NEXT:    xorb %cl, %al
+; X64-NEXT:    # kill: def $al killed $al killed $eax
 ; X64-NEXT:    retq
   %tmp1neg = sub i8 0, %a
   %b = icmp sgt i8 %a, -1

Modified: llvm/trunk/test/CodeGen/X86/imul.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/imul.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/imul.ll (original)
+++ llvm/trunk/test/CodeGen/X86/imul.ll Wed Sep 19 11:59:08 2018
@@ -39,8 +39,8 @@ define i64 @mul4_64(i64 %A) {
 define i32 @mul4096_32(i32 %A) {
 ; X64-LABEL: mul4096_32:
 ; X64:       # %bb.0:
-; X64-NEXT:    shll $12, %edi
 ; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    shll $12, %eax
 ; X64-NEXT:    retq
 ;
 ; X86-LABEL: mul4096_32:
@@ -55,8 +55,8 @@ define i32 @mul4096_32(i32 %A) {
 define i64 @mul4096_64(i64 %A) {
 ; X64-LABEL: mul4096_64:
 ; X64:       # %bb.0:
-; X64-NEXT:    shlq $12, %rdi
 ; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    shlq $12, %rax
 ; X64-NEXT:    retq
 ;
 ; X86-LABEL: mul4096_64:
@@ -73,9 +73,9 @@ define i64 @mul4096_64(i64 %A) {
 define i32 @mulmin4096_32(i32 %A) {
 ; X64-LABEL: mulmin4096_32:
 ; X64:       # %bb.0:
-; X64-NEXT:    shll $12, %edi
-; X64-NEXT:    negl %edi
 ; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    shll $12, %eax
+; X64-NEXT:    negl %eax
 ; X64-NEXT:    retq
 ;
 ; X86-LABEL: mulmin4096_32:
@@ -91,9 +91,9 @@ define i32 @mulmin4096_32(i32 %A) {
 define i64 @mulmin4096_64(i64 %A) {
 ; X64-LABEL: mulmin4096_64:
 ; X64:       # %bb.0:
-; X64-NEXT:    shlq $12, %rdi
-; X64-NEXT:    negq %rdi
 ; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    shlq $12, %rax
+; X64-NEXT:    negq %rax
 ; X64-NEXT:    retq
 ;
 ; X86-LABEL: mulmin4096_64:
@@ -268,8 +268,8 @@ define i32 @mul0_32(i32 %A) {
 define i32 @mul4294967295_32(i32 %A) {
 ; X64-LABEL: mul4294967295_32:
 ; X64:       # %bb.0:
-; X64-NEXT:    negl %edi
 ; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    negl %eax
 ; X64-NEXT:    retq
 ;
 ; X86-LABEL: mul4294967295_32:
@@ -284,8 +284,8 @@ define i32 @mul4294967295_32(i32 %A) {
 define i64 @mul18446744073709551615_64(i64 %A) {
 ; X64-LABEL: mul18446744073709551615_64:
 ; X64:       # %bb.0:
-; X64-NEXT:    negq %rdi
 ; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    negq %rax
 ; X64-NEXT:    retq
 ;
 ; X86-LABEL: mul18446744073709551615_64:
@@ -323,9 +323,9 @@ define i32 @test1(i32 %a) {
 ; X64-LABEL: test1:
 ; X64:       # %bb.0: # %entry
 ; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    shll $5, %eax
-; X64-NEXT:    subl %eax, %edi
-; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    movl %edi, %ecx
+; X64-NEXT:    shll $5, %ecx
+; X64-NEXT:    subl %ecx, %eax
 ; X64-NEXT:    retq
 ;
 ; X86-LABEL: test1:
@@ -412,9 +412,9 @@ define i64 @test5(i64 %a) {
 ; X64-LABEL: test5:
 ; X64:       # %bb.0: # %entry
 ; X64-NEXT:    movq %rdi, %rax
-; X64-NEXT:    shlq $5, %rax
-; X64-NEXT:    subq %rax, %rdi
-; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    movq %rdi, %rcx
+; X64-NEXT:    shlq $5, %rcx
+; X64-NEXT:    subq %rcx, %rax
 ; X64-NEXT:    retq
 ;
 ; X86-LABEL: test5:
@@ -530,9 +530,9 @@ define i64 @testNegOverflow(i64 %a) {
 ; X64-LABEL: testNegOverflow:
 ; X64:       # %bb.0: # %entry
 ; X64-NEXT:    movq %rdi, %rax
-; X64-NEXT:    shlq $63, %rax
-; X64-NEXT:    subq %rax, %rdi
-; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    movq %rdi, %rcx
+; X64-NEXT:    shlq $63, %rcx
+; X64-NEXT:    subq %rcx, %rax
 ; X64-NEXT:    retq
 ;
 ; X86-LABEL: testNegOverflow:

Modified: llvm/trunk/test/CodeGen/X86/lea-opt.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/lea-opt.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/lea-opt.ll (original)
+++ llvm/trunk/test/CodeGen/X86/lea-opt.ll Wed Sep 19 11:59:08 2018
@@ -311,9 +311,9 @@ sw.epilog:
 define  i32 @test5(i32 %x, i32 %y)  #0 {
 ; CHECK-LABEL: test5:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addl %esi, %esi
-; CHECK-NEXT:    subl %esi, %edi
 ; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    addl %esi, %esi
+; CHECK-NEXT:    subl %esi, %eax
 ; CHECK-NEXT:    retq
 entry:
   %mul = mul nsw i32 %y, -2
@@ -325,9 +325,9 @@ define  i32 @test6(i32 %x, i32 %y)  #0 {
 ; CHECK-LABEL: test6:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    # kill: def $esi killed $esi def $rsi
-; CHECK-NEXT:    leal (%rsi,%rsi,2), %eax
-; CHECK-NEXT:    subl %eax, %edi
 ; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    leal (%rsi,%rsi,2), %ecx
+; CHECK-NEXT:    subl %ecx, %eax
 ; CHECK-NEXT:    retq
 entry:
   %mul = mul nsw i32 %y, -3
@@ -338,9 +338,9 @@ entry:
 define  i32 @test7(i32 %x, i32 %y)  #0 {
 ; CHECK-LABEL: test7:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    shll $2, %esi
-; CHECK-NEXT:    subl %esi, %edi
 ; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    shll $2, %esi
+; CHECK-NEXT:    subl %esi, %eax
 ; CHECK-NEXT:    retq
 entry:
   %mul = mul nsw i32 %y, -4
@@ -365,9 +365,9 @@ entry:
 define  i32 @test9(i32 %x, i32 %y) #0 {
 ; CHECK-LABEL: test9:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addl %esi, %esi
-; CHECK-NEXT:    subl %esi, %edi
 ; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    addl %esi, %esi
+; CHECK-NEXT:    subl %esi, %eax
 ; CHECK-NEXT:    retq
 entry:
   %mul = mul nsw i32 -2, %y
@@ -379,9 +379,9 @@ define  i32 @test10(i32 %x, i32 %y) #0 {
 ; CHECK-LABEL: test10:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    # kill: def $esi killed $esi def $rsi
-; CHECK-NEXT:    leal (%rsi,%rsi,2), %eax
-; CHECK-NEXT:    subl %eax, %edi
 ; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    leal (%rsi,%rsi,2), %ecx
+; CHECK-NEXT:    subl %ecx, %eax
 ; CHECK-NEXT:    retq
 entry:
   %mul = mul nsw i32 -3, %y
@@ -392,9 +392,9 @@ entry:
 define  i32 @test11(i32 %x, i32 %y) #0 {
 ; CHECK-LABEL: test11:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    shll $2, %esi
-; CHECK-NEXT:    subl %esi, %edi
 ; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    shll $2, %esi
+; CHECK-NEXT:    subl %esi, %eax
 ; CHECK-NEXT:    retq
 entry:
   %mul = mul nsw i32 -4, %y
@@ -418,9 +418,9 @@ entry:
 define  i64 @test13(i64 %x, i64 %y) #0 {
 ; CHECK-LABEL: test13:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    shlq $2, %rsi
-; CHECK-NEXT:    subq %rsi, %rdi
 ; CHECK-NEXT:    movq %rdi, %rax
+; CHECK-NEXT:    shlq $2, %rsi
+; CHECK-NEXT:    subq %rsi, %rax
 ; CHECK-NEXT:    retq
 entry:
   %mul = mul nsw i64 -4, %y
@@ -444,9 +444,10 @@ entry:
 define  zeroext i16 @test15(i16 zeroext %x, i16 zeroext %y) #0 {
 ; CHECK-LABEL: test15:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    shll $3, %esi
-; CHECK-NEXT:    subl %esi, %edi
 ; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    shll $3, %esi
+; CHECK-NEXT:    subl %esi, %eax
+; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
 ; CHECK-NEXT:    retq
 entry:
   %conv = zext i16 %x to i32

Modified: llvm/trunk/test/CodeGen/X86/legalize-shift-64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/legalize-shift-64.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/legalize-shift-64.ll (original)
+++ llvm/trunk/test/CodeGen/X86/legalize-shift-64.ll Wed Sep 19 11:59:08 2018
@@ -88,6 +88,8 @@ define <2 x i64> @test5(<2 x i64> %A, <2
 ; CHECK-NEXT:    .cfi_offset %ebx, -12
 ; CHECK-NEXT:    .cfi_offset %ebp, -8
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    movb {{[0-9]+}}(%esp), %ch
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; CHECK-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ebx
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %esi
@@ -101,12 +103,11 @@ define <2 x i64> @test5(<2 x i64> %A, <2
 ; CHECK-NEXT:    movl %edi, %esi
 ; CHECK-NEXT:    xorl %edi, %edi
 ; CHECK-NEXT:  .LBB4_2:
-; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; CHECK-NEXT:    movl %edx, %ebx
-; CHECK-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; CHECK-NEXT:    movb %ch, %cl
 ; CHECK-NEXT:    shll %cl, %ebx
 ; CHECK-NEXT:    shldl %cl, %edx, %ebp
-; CHECK-NEXT:    testb $32, %cl
+; CHECK-NEXT:    testb $32, %ch
 ; CHECK-NEXT:    je .LBB4_4
 ; CHECK-NEXT:  # %bb.3:
 ; CHECK-NEXT:    movl %ebx, %ebp

Modified: llvm/trunk/test/CodeGen/X86/legalize-shl-vec.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/legalize-shl-vec.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/legalize-shl-vec.ll (original)
+++ llvm/trunk/test/CodeGen/X86/legalize-shl-vec.ll Wed Sep 19 11:59:08 2018
@@ -42,21 +42,21 @@ define <2 x i256> @test_shl(<2 x i256> %
 ;
 ; X64-LABEL: test_shl:
 ; X64:       # %bb.0:
-; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rax
+; X64-NEXT:    movq %rdi, %rax
 ; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rcx
 ; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rdx
-; X64-NEXT:    shldq $2, %rax, %rcx
-; X64-NEXT:    shldq $2, %rdx, %rax
-; X64-NEXT:    shldq $2, %r9, %rdx
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rsi
+; X64-NEXT:    shldq $2, %rcx, %rdx
+; X64-NEXT:    shldq $2, %rsi, %rcx
+; X64-NEXT:    shldq $2, %r9, %rsi
 ; X64-NEXT:    shlq $2, %r9
-; X64-NEXT:    movq %rcx, 56(%rdi)
-; X64-NEXT:    movq %rax, 48(%rdi)
-; X64-NEXT:    movq %rdx, 40(%rdi)
+; X64-NEXT:    movq %rdx, 56(%rdi)
+; X64-NEXT:    movq %rcx, 48(%rdi)
+; X64-NEXT:    movq %rsi, 40(%rdi)
 ; X64-NEXT:    movq %r9, 32(%rdi)
 ; X64-NEXT:    xorps %xmm0, %xmm0
 ; X64-NEXT:    movaps %xmm0, 16(%rdi)
 ; X64-NEXT:    movaps %xmm0, (%rdi)
-; X64-NEXT:    movq %rdi, %rax
 ; X64-NEXT:    retq
   %Amt = insertelement <2 x i256> <i256 1, i256 2>, i256 -1, i32 0
   %Out = shl <2 x i256> %In, %Amt
@@ -88,7 +88,7 @@ define <2 x i256> @test_srl(<2 x i256> %
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ebp
 ; X32-NEXT:    movl %edx, %ecx
 ; X32-NEXT:    shldl $28, %eax, %ecx
-; X32-NEXT:    movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    shldl $28, %esi, %eax
 ; X32-NEXT:    movl %eax, (%esp) # 4-byte Spill
 ; X32-NEXT:    shldl $28, %edi, %esi
@@ -101,7 +101,7 @@ define <2 x i256> @test_srl(<2 x i256> %
 ; X32-NEXT:    shrl $4, %edx
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    movl %edx, 60(%eax)
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
 ; X32-NEXT:    movl %edx, 56(%eax)
 ; X32-NEXT:    movl (%esp), %edx # 4-byte Reload
 ; X32-NEXT:    movl %edx, 52(%eax)
@@ -132,21 +132,21 @@ define <2 x i256> @test_srl(<2 x i256> %
 ;
 ; X64-LABEL: test_srl:
 ; X64:       # %bb.0:
-; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rax
+; X64-NEXT:    movq %rdi, %rax
 ; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rcx
 ; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rdx
-; X64-NEXT:    shrdq $4, %rdx, %r9
-; X64-NEXT:    shrdq $4, %rax, %rdx
-; X64-NEXT:    shrdq $4, %rcx, %rax
-; X64-NEXT:    shrq $4, %rcx
-; X64-NEXT:    movq %rcx, 56(%rdi)
-; X64-NEXT:    movq %rax, 48(%rdi)
-; X64-NEXT:    movq %rdx, 40(%rdi)
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rsi
+; X64-NEXT:    shrdq $4, %rsi, %r9
+; X64-NEXT:    shrdq $4, %rcx, %rsi
+; X64-NEXT:    shrdq $4, %rdx, %rcx
+; X64-NEXT:    shrq $4, %rdx
+; X64-NEXT:    movq %rdx, 56(%rdi)
+; X64-NEXT:    movq %rcx, 48(%rdi)
+; X64-NEXT:    movq %rsi, 40(%rdi)
 ; X64-NEXT:    movq %r9, 32(%rdi)
 ; X64-NEXT:    xorps %xmm0, %xmm0
 ; X64-NEXT:    movaps %xmm0, 16(%rdi)
 ; X64-NEXT:    movaps %xmm0, (%rdi)
-; X64-NEXT:    movq %rdi, %rax
 ; X64-NEXT:    retq
   %Amt = insertelement <2 x i256> <i256 3, i256 4>, i256 -1, i32 0
   %Out = lshr <2 x i256> %In, %Amt
@@ -178,7 +178,7 @@ define <2 x i256> @test_sra(<2 x i256> %
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ebp
 ; X32-NEXT:    movl %edx, %ecx
 ; X32-NEXT:    shldl $26, %eax, %ecx
-; X32-NEXT:    movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    shldl $26, %esi, %eax
 ; X32-NEXT:    movl %eax, (%esp) # 4-byte Spill
 ; X32-NEXT:    shldl $26, %edi, %esi
@@ -191,7 +191,7 @@ define <2 x i256> @test_sra(<2 x i256> %
 ; X32-NEXT:    sarl $6, %edx
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    movl %edx, 60(%eax)
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
 ; X32-NEXT:    movl %edx, 56(%eax)
 ; X32-NEXT:    movl (%esp), %edx # 4-byte Reload
 ; X32-NEXT:    movl %edx, 52(%eax)
@@ -224,23 +224,23 @@ define <2 x i256> @test_sra(<2 x i256> %
 ;
 ; X64-LABEL: test_sra:
 ; X64:       # %bb.0:
-; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rax
+; X64-NEXT:    movq %rdi, %rax
 ; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rcx
 ; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rdx
-; X64-NEXT:    shrdq $6, %rdx, %r9
-; X64-NEXT:    shrdq $6, %rax, %rdx
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rsi
+; X64-NEXT:    shrdq $6, %rsi, %r9
+; X64-NEXT:    shrdq $6, %rcx, %rsi
 ; X64-NEXT:    sarq $63, %r8
-; X64-NEXT:    shrdq $6, %rcx, %rax
-; X64-NEXT:    sarq $6, %rcx
-; X64-NEXT:    movq %rcx, 56(%rdi)
-; X64-NEXT:    movq %rax, 48(%rdi)
-; X64-NEXT:    movq %rdx, 40(%rdi)
+; X64-NEXT:    shrdq $6, %rdx, %rcx
+; X64-NEXT:    sarq $6, %rdx
+; X64-NEXT:    movq %rdx, 56(%rdi)
+; X64-NEXT:    movq %rcx, 48(%rdi)
+; X64-NEXT:    movq %rsi, 40(%rdi)
 ; X64-NEXT:    movq %r9, 32(%rdi)
 ; X64-NEXT:    movq %r8, 24(%rdi)
 ; X64-NEXT:    movq %r8, 16(%rdi)
 ; X64-NEXT:    movq %r8, 8(%rdi)
 ; X64-NEXT:    movq %r8, (%rdi)
-; X64-NEXT:    movq %rdi, %rax
 ; X64-NEXT:    retq
   %Amt = insertelement <2 x i256> <i256 5, i256 6>, i256 -1, i32 0
   %Out = ashr <2 x i256> %In, %Amt

Modified: llvm/trunk/test/CodeGen/X86/machine-combiner-int.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/machine-combiner-int.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/machine-combiner-int.ll (original)
+++ llvm/trunk/test/CodeGen/X86/machine-combiner-int.ll Wed Sep 19 11:59:08 2018
@@ -62,10 +62,11 @@ define i64 @reassociate_muls_i64(i64 %x0
 define i8 @reassociate_ands_i8(i8 %x0, i8 %x1, i8 %x2, i8 %x3) {
 ; CHECK-LABEL: reassociate_ands_i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    subb  %sil, %dil
-; CHECK-NEXT:    andb  %cl, %dl
-; CHECK-NEXT:    andb  %dil, %dl
 ; CHECK-NEXT:    movl  %edx, %eax
+; CHECK-NEXT:    subb  %sil, %dil
+; CHECK-NEXT:    andb  %cl, %al
+; CHECK-NEXT:    andb  %dil, %al
+; CHECK-NEXT:    # kill
 ; CHECK-NEXT:    retq
   %t0 = sub i8 %x0, %x1
   %t1 = and i8 %x2, %t0
@@ -78,10 +79,10 @@ define i8 @reassociate_ands_i8(i8 %x0, i
 define i32 @reassociate_ands_i32(i32 %x0, i32 %x1, i32 %x2, i32 %x3) {
 ; CHECK-LABEL: reassociate_ands_i32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    subl  %esi, %edi
-; CHECK-NEXT:    andl  %ecx, %edx
-; CHECK-NEXT:    andl  %edi, %edx
 ; CHECK-NEXT:    movl  %edx, %eax
+; CHECK-NEXT:    subl  %esi, %edi
+; CHECK-NEXT:    andl  %ecx, %eax
+; CHECK-NEXT:    andl  %edi, %eax
 ; CHECK-NEXT:    retq
   %t0 = sub i32 %x0, %x1
   %t1 = and i32 %x2, %t0
@@ -92,10 +93,10 @@ define i32 @reassociate_ands_i32(i32 %x0
 define i64 @reassociate_ands_i64(i64 %x0, i64 %x1, i64 %x2, i64 %x3) {
 ; CHECK-LABEL: reassociate_ands_i64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    subq  %rsi, %rdi
-; CHECK-NEXT:    andq  %rcx, %rdx
-; CHECK-NEXT:    andq  %rdi, %rdx
 ; CHECK-NEXT:    movq  %rdx, %rax
+; CHECK-NEXT:    subq  %rsi, %rdi
+; CHECK-NEXT:    andq  %rcx, %rax
+; CHECK-NEXT:    andq  %rdi, %rax
 ; CHECK-NEXT:    retq
   %t0 = sub i64 %x0, %x1
   %t1 = and i64 %x2, %t0
@@ -109,10 +110,11 @@ define i64 @reassociate_ands_i64(i64 %x0
 define i8 @reassociate_ors_i8(i8 %x0, i8 %x1, i8 %x2, i8 %x3) {
 ; CHECK-LABEL: reassociate_ors_i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    subb  %sil, %dil
-; CHECK-NEXT:    orb   %cl, %dl
-; CHECK-NEXT:    orb   %dil, %dl
 ; CHECK-NEXT:    movl  %edx, %eax
+; CHECK-NEXT:    subb  %sil, %dil
+; CHECK-NEXT:    orb   %cl, %al
+; CHECK-NEXT:    orb   %dil, %al
+; CHECK-NEXT:    # kill
 ; CHECK-NEXT:    retq
   %t0 = sub i8 %x0, %x1
   %t1 = or i8 %x2, %t0
@@ -125,10 +127,10 @@ define i8 @reassociate_ors_i8(i8 %x0, i8
 define i32 @reassociate_ors_i32(i32 %x0, i32 %x1, i32 %x2, i32 %x3) {
 ; CHECK-LABEL: reassociate_ors_i32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    subl  %esi, %edi
-; CHECK-NEXT:    orl   %ecx, %edx
-; CHECK-NEXT:    orl   %edi, %edx
 ; CHECK-NEXT:    movl  %edx, %eax
+; CHECK-NEXT:    subl  %esi, %edi
+; CHECK-NEXT:    orl   %ecx, %eax
+; CHECK-NEXT:    orl   %edi, %eax
 ; CHECK-NEXT:    retq
   %t0 = sub i32 %x0, %x1
   %t1 = or i32 %x2, %t0
@@ -139,10 +141,10 @@ define i32 @reassociate_ors_i32(i32 %x0,
 define i64 @reassociate_ors_i64(i64 %x0, i64 %x1, i64 %x2, i64 %x3) {
 ; CHECK-LABEL: reassociate_ors_i64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    subq  %rsi, %rdi
-; CHECK-NEXT:    orq   %rcx, %rdx
-; CHECK-NEXT:    orq   %rdi, %rdx
 ; CHECK-NEXT:    movq  %rdx, %rax
+; CHECK-NEXT:    subq  %rsi, %rdi
+; CHECK-NEXT:    orq   %rcx, %rax
+; CHECK-NEXT:    orq   %rdi, %rax
 ; CHECK-NEXT:    retq
   %t0 = sub i64 %x0, %x1
   %t1 = or i64 %x2, %t0
@@ -156,10 +158,11 @@ define i64 @reassociate_ors_i64(i64 %x0,
 define i8 @reassociate_xors_i8(i8 %x0, i8 %x1, i8 %x2, i8 %x3) {
 ; CHECK-LABEL: reassociate_xors_i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    subb  %sil, %dil
-; CHECK-NEXT:    xorb  %cl, %dl
-; CHECK-NEXT:    xorb  %dil, %dl
 ; CHECK-NEXT:    movl  %edx, %eax
+; CHECK-NEXT:    subb  %sil, %dil
+; CHECK-NEXT:    xorb  %cl, %al
+; CHECK-NEXT:    xorb  %dil, %al
+; CHECK-NEXT:    # kill
 ; CHECK-NEXT:    retq
   %t0 = sub i8 %x0, %x1
   %t1 = xor i8 %x2, %t0
@@ -172,10 +175,10 @@ define i8 @reassociate_xors_i8(i8 %x0, i
 define i32 @reassociate_xors_i32(i32 %x0, i32 %x1, i32 %x2, i32 %x3) {
 ; CHECK-LABEL: reassociate_xors_i32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    subl  %esi, %edi
-; CHECK-NEXT:    xorl  %ecx, %edx
-; CHECK-NEXT:    xorl  %edi, %edx
 ; CHECK-NEXT:    movl  %edx, %eax
+; CHECK-NEXT:    subl  %esi, %edi
+; CHECK-NEXT:    xorl  %ecx, %eax
+; CHECK-NEXT:    xorl  %edi, %eax
 ; CHECK-NEXT:    retq
   %t0 = sub i32 %x0, %x1
   %t1 = xor i32 %x2, %t0
@@ -186,10 +189,10 @@ define i32 @reassociate_xors_i32(i32 %x0
 define i64 @reassociate_xors_i64(i64 %x0, i64 %x1, i64 %x2, i64 %x3) {
 ; CHECK-LABEL: reassociate_xors_i64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    subq  %rsi, %rdi
-; CHECK-NEXT:    xorq  %rcx, %rdx
-; CHECK-NEXT:    xorq  %rdi, %rdx
 ; CHECK-NEXT:    movq  %rdx, %rax
+; CHECK-NEXT:    subq  %rsi, %rdi
+; CHECK-NEXT:    xorq  %rcx, %rax
+; CHECK-NEXT:    xorq  %rdi, %rax
 ; CHECK-NEXT:    retq
   %t0 = sub i64 %x0, %x1
   %t1 = xor i64 %x2, %t0

Modified: llvm/trunk/test/CodeGen/X86/machine-cp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/machine-cp.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/machine-cp.ll (original)
+++ llvm/trunk/test/CodeGen/X86/machine-cp.ll Wed Sep 19 11:59:08 2018
@@ -103,30 +103,29 @@ define <16 x float> @foo(<16 x float> %x
 ; CHECK:       ## %bb.0: ## %bb
 ; CHECK-NEXT:    movaps %xmm3, %xmm9
 ; CHECK-NEXT:    movaps %xmm2, %xmm8
-; CHECK-NEXT:    movaps %xmm1, %xmm6
 ; CHECK-NEXT:    movaps %xmm0, %xmm7
 ; CHECK-NEXT:    xorps %xmm0, %xmm0
-; CHECK-NEXT:    movaps %xmm3, %xmm1
-; CHECK-NEXT:    cmpltps %xmm0, %xmm1
-; CHECK-NEXT:    movaps %xmm1, %xmm4
+; CHECK-NEXT:    movaps %xmm3, %xmm2
+; CHECK-NEXT:    cmpltps %xmm0, %xmm2
+; CHECK-NEXT:    movaps %xmm2, %xmm4
 ; CHECK-NEXT:    orps {{.*}}(%rip), %xmm4
 ; CHECK-NEXT:    movaps %xmm4, %xmm10
-; CHECK-NEXT:    andnps %xmm1, %xmm10
-; CHECK-NEXT:    movaps %xmm2, %xmm1
-; CHECK-NEXT:    cmpltps %xmm0, %xmm1
+; CHECK-NEXT:    andnps %xmm2, %xmm10
+; CHECK-NEXT:    movaps %xmm8, %xmm5
+; CHECK-NEXT:    cmpltps %xmm0, %xmm5
 ; CHECK-NEXT:    movaps {{.*#+}} xmm11 = [9,10,11,12]
-; CHECK-NEXT:    movaps %xmm1, %xmm3
-; CHECK-NEXT:    orps %xmm11, %xmm3
-; CHECK-NEXT:    movaps %xmm3, %xmm14
-; CHECK-NEXT:    andnps %xmm1, %xmm14
-; CHECK-NEXT:    cvttps2dq %xmm6, %xmm12
-; CHECK-NEXT:    cmpltps %xmm0, %xmm6
+; CHECK-NEXT:    movaps %xmm5, %xmm2
+; CHECK-NEXT:    orps %xmm11, %xmm2
+; CHECK-NEXT:    movaps %xmm2, %xmm14
+; CHECK-NEXT:    andnps %xmm5, %xmm14
+; CHECK-NEXT:    cvttps2dq %xmm1, %xmm12
+; CHECK-NEXT:    cmpltps %xmm0, %xmm1
 ; CHECK-NEXT:    movaps {{.*#+}} xmm13 = [5,6,7,8]
-; CHECK-NEXT:    movaps %xmm6, %xmm2
-; CHECK-NEXT:    orps %xmm13, %xmm2
-; CHECK-NEXT:    movaps %xmm2, %xmm5
-; CHECK-NEXT:    andnps %xmm6, %xmm5
-; CHECK-NEXT:    cvttps2dq %xmm7, %xmm6
+; CHECK-NEXT:    movaps %xmm1, %xmm6
+; CHECK-NEXT:    orps %xmm13, %xmm6
+; CHECK-NEXT:    movaps %xmm6, %xmm5
+; CHECK-NEXT:    andnps %xmm1, %xmm5
+; CHECK-NEXT:    cvttps2dq %xmm7, %xmm3
 ; CHECK-NEXT:    cmpltps %xmm0, %xmm7
 ; CHECK-NEXT:    movaps {{.*#+}} xmm15 = [1,2,3,4]
 ; CHECK-NEXT:    movaps %xmm7, %xmm0
@@ -134,30 +133,29 @@ define <16 x float> @foo(<16 x float> %x
 ; CHECK-NEXT:    movaps %xmm0, %xmm1
 ; CHECK-NEXT:    andnps %xmm7, %xmm1
 ; CHECK-NEXT:    andps %xmm15, %xmm0
-; CHECK-NEXT:    cvtdq2ps %xmm6, %xmm6
-; CHECK-NEXT:    andps %xmm6, %xmm0
-; CHECK-NEXT:    movaps {{.*#+}} xmm6 = [1,1,1,1]
-; CHECK-NEXT:    andps %xmm6, %xmm1
+; CHECK-NEXT:    cvtdq2ps %xmm3, %xmm3
+; CHECK-NEXT:    andps %xmm3, %xmm0
+; CHECK-NEXT:    movaps {{.*#+}} xmm3 = [1,1,1,1]
+; CHECK-NEXT:    andps %xmm3, %xmm1
 ; CHECK-NEXT:    orps %xmm1, %xmm0
-; CHECK-NEXT:    andps %xmm13, %xmm2
+; CHECK-NEXT:    andps %xmm13, %xmm6
 ; CHECK-NEXT:    cvtdq2ps %xmm12, %xmm1
-; CHECK-NEXT:    andps %xmm1, %xmm2
-; CHECK-NEXT:    andps %xmm6, %xmm5
-; CHECK-NEXT:    orps %xmm5, %xmm2
-; CHECK-NEXT:    andps %xmm11, %xmm3
+; CHECK-NEXT:    andps %xmm1, %xmm6
+; CHECK-NEXT:    andps %xmm3, %xmm5
+; CHECK-NEXT:    orps %xmm5, %xmm6
+; CHECK-NEXT:    andps %xmm11, %xmm2
 ; CHECK-NEXT:    cvttps2dq %xmm8, %xmm1
 ; CHECK-NEXT:    cvtdq2ps %xmm1, %xmm1
-; CHECK-NEXT:    andps %xmm1, %xmm3
-; CHECK-NEXT:    andps %xmm6, %xmm14
-; CHECK-NEXT:    orps %xmm14, %xmm3
-; CHECK-NEXT:    andps %xmm6, %xmm10
+; CHECK-NEXT:    andps %xmm1, %xmm2
+; CHECK-NEXT:    andps %xmm3, %xmm14
+; CHECK-NEXT:    orps %xmm14, %xmm2
+; CHECK-NEXT:    andps %xmm3, %xmm10
 ; CHECK-NEXT:    andps {{.*}}(%rip), %xmm4
 ; CHECK-NEXT:    cvttps2dq %xmm9, %xmm1
 ; CHECK-NEXT:    cvtdq2ps %xmm1, %xmm1
 ; CHECK-NEXT:    andps %xmm1, %xmm4
 ; CHECK-NEXT:    orps %xmm10, %xmm4
-; CHECK-NEXT:    movaps %xmm2, %xmm1
-; CHECK-NEXT:    movaps %xmm3, %xmm2
+; CHECK-NEXT:    movaps %xmm6, %xmm1
 ; CHECK-NEXT:    movaps %xmm4, %xmm3
 ; CHECK-NEXT:    retq
 bb:

Modified: llvm/trunk/test/CodeGen/X86/machine-cse.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/machine-cse.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/machine-cse.ll (original)
+++ llvm/trunk/test/CodeGen/X86/machine-cse.ll Wed Sep 19 11:59:08 2018
@@ -136,21 +136,21 @@ define i8* @bsd_memchr(i8* %s, i32 %a, i
 ; CHECK-NEXT:    testq %rcx, %rcx
 ; CHECK-NEXT:    je .LBB3_4
 ; CHECK-NEXT:  # %bb.1: # %preheader
-; CHECK-NEXT:    movzbl %dl, %eax
+; CHECK-NEXT:    movq %rdi, %rax
+; CHECK-NEXT:    movzbl %dl, %edx
 ; CHECK-NEXT:    .p2align 4, 0x90
 ; CHECK-NEXT:  .LBB3_2: # %do.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    cmpl %eax, %esi
+; CHECK-NEXT:    cmpl %edx, %esi
 ; CHECK-NEXT:    je .LBB3_5
 ; CHECK-NEXT:  # %bb.3: # %do.cond
 ; CHECK-NEXT:    # in Loop: Header=BB3_2 Depth=1
-; CHECK-NEXT:    incq %rdi
+; CHECK-NEXT:    incq %rax
 ; CHECK-NEXT:    decq %rcx
 ; CHECK-NEXT:    jne .LBB3_2
 ; CHECK-NEXT:  .LBB3_4:
-; CHECK-NEXT:    xorl %edi, %edi
+; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:  .LBB3_5: # %return
-; CHECK-NEXT:    movq %rdi, %rax
 ; CHECK-NEXT:    retq
 entry:
   %cmp = icmp eq i64 %n, 0

Modified: llvm/trunk/test/CodeGen/X86/madd.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/madd.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/madd.ll (original)
+++ llvm/trunk/test/CodeGen/X86/madd.ll Wed Sep 19 11:59:08 2018
@@ -2219,6 +2219,7 @@ define <16 x i32> @jumbled_indices16(<32
 define <32 x i32> @jumbled_indices32(<64 x i16> %A, <64 x i16> %B) {
 ; SSE2-LABEL: jumbled_indices32:
 ; SSE2:       # %bb.0:
+; SSE2-NEXT:    movq %rdi, %rax
 ; SSE2-NEXT:    pmaddwd {{[0-9]+}}(%rsp), %xmm0
 ; SSE2-NEXT:    pmaddwd {{[0-9]+}}(%rsp), %xmm1
 ; SSE2-NEXT:    pmaddwd {{[0-9]+}}(%rsp), %xmm2
@@ -2235,7 +2236,6 @@ define <32 x i32> @jumbled_indices32(<64
 ; SSE2-NEXT:    movdqa %xmm2, 32(%rdi)
 ; SSE2-NEXT:    movdqa %xmm1, 16(%rdi)
 ; SSE2-NEXT:    movdqa %xmm0, (%rdi)
-; SSE2-NEXT:    movq %rdi, %rax
 ; SSE2-NEXT:    retq
 ;
 ; AVX1-LABEL: jumbled_indices32:
@@ -2437,6 +2437,7 @@ define <16 x i32> @pmaddwd_512(<32 x i16
 define <32 x i32> @pmaddwd_1024(<64 x i16>* %Aptr, <64 x i16>* %Bptr) {
 ; SSE2-LABEL: pmaddwd_1024:
 ; SSE2:       # %bb.0:
+; SSE2-NEXT:    movq %rdi, %rax
 ; SSE2-NEXT:    movdqa 112(%rsi), %xmm0
 ; SSE2-NEXT:    movdqa 96(%rsi), %xmm1
 ; SSE2-NEXT:    movdqa 80(%rsi), %xmm2
@@ -2461,7 +2462,6 @@ define <32 x i32> @pmaddwd_1024(<64 x i1
 ; SSE2-NEXT:    movdqa %xmm6, 32(%rdi)
 ; SSE2-NEXT:    movdqa %xmm5, 16(%rdi)
 ; SSE2-NEXT:    movdqa %xmm4, (%rdi)
-; SSE2-NEXT:    movq %rdi, %rax
 ; SSE2-NEXT:    retq
 ;
 ; AVX1-LABEL: pmaddwd_1024:

Modified: llvm/trunk/test/CodeGen/X86/mask-negated-bool.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/mask-negated-bool.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/mask-negated-bool.ll (original)
+++ llvm/trunk/test/CodeGen/X86/mask-negated-bool.ll Wed Sep 19 11:59:08 2018
@@ -4,8 +4,8 @@
 define i32 @mask_negated_zext_bool1(i1 %x) {
 ; CHECK-LABEL: mask_negated_zext_bool1:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    andl $1, %edi
 ; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    andl $1, %eax
 ; CHECK-NEXT:    retq
   %ext = zext i1 %x to i32
   %neg = sub i32 0, %ext
@@ -38,8 +38,8 @@ define <4 x i32> @mask_negated_zext_bool
 define i32 @mask_negated_sext_bool1(i1 %x) {
 ; CHECK-LABEL: mask_negated_sext_bool1:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    andl $1, %edi
 ; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    andl $1, %eax
 ; CHECK-NEXT:    retq
   %ext = sext i1 %x to i32
   %neg = sub i32 0, %ext

Modified: llvm/trunk/test/CodeGen/X86/misched-matmul.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/misched-matmul.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/misched-matmul.ll (original)
+++ llvm/trunk/test/CodeGen/X86/misched-matmul.ll Wed Sep 19 11:59:08 2018
@@ -10,7 +10,7 @@
 ; more complex cases.
 ;
 ; CHECK: @wrap_mul4
-; CHECK: 23 regalloc - Number of spills inserted
+; CHECK: 25 regalloc - Number of spills inserted
 
 define void @wrap_mul4(double* nocapture %Out, [4 x double]* nocapture %A, [4 x double]* nocapture %B) #0 {
 entry:

Modified: llvm/trunk/test/CodeGen/X86/mul-constant-i16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/mul-constant-i16.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/mul-constant-i16.ll (original)
+++ llvm/trunk/test/CodeGen/X86/mul-constant-i16.ll Wed Sep 19 11:59:08 2018
@@ -11,6 +11,7 @@ define i16 @test_mul_by_1(i16 %x) {
 ; X64-LABEL: test_mul_by_1:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    retq
   %mul = mul nsw i16 %x, 1
   ret i16 %mul
@@ -297,8 +298,9 @@ define i16 @test_mul_by_16(i16 %x) {
 ;
 ; X64-LABEL: test_mul_by_16:
 ; X64:       # %bb.0:
-; X64-NEXT:    shll $4, %edi
 ; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    shll $4, %eax
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    retq
   %mul = mul nsw i16 %x, 16
   ret i16 %mul
@@ -632,8 +634,9 @@ define i16 @test_mul_by_32(i16 %x) {
 ;
 ; X64-LABEL: test_mul_by_32:
 ; X64:       # %bb.0:
-; X64-NEXT:    shll $5, %edi
 ; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    shll $5, %eax
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    retq
   %mul = mul nsw i16 %x, 32
   ret i16 %mul

Modified: llvm/trunk/test/CodeGen/X86/mul-constant-i32.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/mul-constant-i32.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/mul-constant-i32.ll (original)
+++ llvm/trunk/test/CodeGen/X86/mul-constant-i32.ll Wed Sep 19 11:59:08 2018
@@ -787,14 +787,14 @@ define i32 @test_mul_by_16(i32 %x) {
 ;
 ; X64-HSW-LABEL: test_mul_by_16:
 ; X64-HSW:       # %bb.0:
-; X64-HSW-NEXT:    shll $4, %edi # sched: [1:0.50]
 ; X64-HSW-NEXT:    movl %edi, %eax # sched: [1:0.25]
+; X64-HSW-NEXT:    shll $4, %eax # sched: [1:0.50]
 ; X64-HSW-NEXT:    retq # sched: [7:1.00]
 ;
 ; X64-JAG-LABEL: test_mul_by_16:
 ; X64-JAG:       # %bb.0:
-; X64-JAG-NEXT:    shll $4, %edi # sched: [1:0.50]
 ; X64-JAG-NEXT:    movl %edi, %eax # sched: [1:0.50]
+; X64-JAG-NEXT:    shll $4, %eax # sched: [1:0.50]
 ; X64-JAG-NEXT:    retq # sched: [4:1.00]
 ;
 ; X86-NOOPT-LABEL: test_mul_by_16:
@@ -805,26 +805,26 @@ define i32 @test_mul_by_16(i32 %x) {
 ;
 ; HSW-NOOPT-LABEL: test_mul_by_16:
 ; HSW-NOOPT:       # %bb.0:
-; HSW-NOOPT-NEXT:    shll $4, %edi # sched: [1:0.50]
 ; HSW-NOOPT-NEXT:    movl %edi, %eax # sched: [1:0.25]
+; HSW-NOOPT-NEXT:    shll $4, %eax # sched: [1:0.50]
 ; HSW-NOOPT-NEXT:    retq # sched: [7:1.00]
 ;
 ; JAG-NOOPT-LABEL: test_mul_by_16:
 ; JAG-NOOPT:       # %bb.0:
-; JAG-NOOPT-NEXT:    shll $4, %edi # sched: [1:0.50]
 ; JAG-NOOPT-NEXT:    movl %edi, %eax # sched: [1:0.50]
+; JAG-NOOPT-NEXT:    shll $4, %eax # sched: [1:0.50]
 ; JAG-NOOPT-NEXT:    retq # sched: [4:1.00]
 ;
 ; X64-SLM-LABEL: test_mul_by_16:
 ; X64-SLM:       # %bb.0:
-; X64-SLM-NEXT:    shll $4, %edi # sched: [1:1.00]
 ; X64-SLM-NEXT:    movl %edi, %eax # sched: [1:0.50]
+; X64-SLM-NEXT:    shll $4, %eax # sched: [1:1.00]
 ; X64-SLM-NEXT:    retq # sched: [4:1.00]
 ;
 ; SLM-NOOPT-LABEL: test_mul_by_16:
 ; SLM-NOOPT:       # %bb.0:
-; SLM-NOOPT-NEXT:    shll $4, %edi # sched: [1:1.00]
 ; SLM-NOOPT-NEXT:    movl %edi, %eax # sched: [1:0.50]
+; SLM-NOOPT-NEXT:    shll $4, %eax # sched: [1:1.00]
 ; SLM-NOOPT-NEXT:    retq # sched: [4:1.00]
   %mul = mul nsw i32 %x, 16
   ret i32 %mul
@@ -1633,14 +1633,14 @@ define i32 @test_mul_by_32(i32 %x) {
 ;
 ; X64-HSW-LABEL: test_mul_by_32:
 ; X64-HSW:       # %bb.0:
-; X64-HSW-NEXT:    shll $5, %edi # sched: [1:0.50]
 ; X64-HSW-NEXT:    movl %edi, %eax # sched: [1:0.25]
+; X64-HSW-NEXT:    shll $5, %eax # sched: [1:0.50]
 ; X64-HSW-NEXT:    retq # sched: [7:1.00]
 ;
 ; X64-JAG-LABEL: test_mul_by_32:
 ; X64-JAG:       # %bb.0:
-; X64-JAG-NEXT:    shll $5, %edi # sched: [1:0.50]
 ; X64-JAG-NEXT:    movl %edi, %eax # sched: [1:0.50]
+; X64-JAG-NEXT:    shll $5, %eax # sched: [1:0.50]
 ; X64-JAG-NEXT:    retq # sched: [4:1.00]
 ;
 ; X86-NOOPT-LABEL: test_mul_by_32:
@@ -1651,26 +1651,26 @@ define i32 @test_mul_by_32(i32 %x) {
 ;
 ; HSW-NOOPT-LABEL: test_mul_by_32:
 ; HSW-NOOPT:       # %bb.0:
-; HSW-NOOPT-NEXT:    shll $5, %edi # sched: [1:0.50]
 ; HSW-NOOPT-NEXT:    movl %edi, %eax # sched: [1:0.25]
+; HSW-NOOPT-NEXT:    shll $5, %eax # sched: [1:0.50]
 ; HSW-NOOPT-NEXT:    retq # sched: [7:1.00]
 ;
 ; JAG-NOOPT-LABEL: test_mul_by_32:
 ; JAG-NOOPT:       # %bb.0:
-; JAG-NOOPT-NEXT:    shll $5, %edi # sched: [1:0.50]
 ; JAG-NOOPT-NEXT:    movl %edi, %eax # sched: [1:0.50]
+; JAG-NOOPT-NEXT:    shll $5, %eax # sched: [1:0.50]
 ; JAG-NOOPT-NEXT:    retq # sched: [4:1.00]
 ;
 ; X64-SLM-LABEL: test_mul_by_32:
 ; X64-SLM:       # %bb.0:
-; X64-SLM-NEXT:    shll $5, %edi # sched: [1:1.00]
 ; X64-SLM-NEXT:    movl %edi, %eax # sched: [1:0.50]
+; X64-SLM-NEXT:    shll $5, %eax # sched: [1:1.00]
 ; X64-SLM-NEXT:    retq # sched: [4:1.00]
 ;
 ; SLM-NOOPT-LABEL: test_mul_by_32:
 ; SLM-NOOPT:       # %bb.0:
-; SLM-NOOPT-NEXT:    shll $5, %edi # sched: [1:1.00]
 ; SLM-NOOPT-NEXT:    movl %edi, %eax # sched: [1:0.50]
+; SLM-NOOPT-NEXT:    shll $5, %eax # sched: [1:1.00]
 ; SLM-NOOPT-NEXT:    retq # sched: [4:1.00]
   %mul = mul nsw i32 %x, 32
   ret i32 %mul
@@ -2200,18 +2200,18 @@ define i32 @mul_neg_fold(i32 %a, i32 %b)
 ;
 ; X64-HSW-LABEL: mul_neg_fold:
 ; X64-HSW:       # %bb.0:
-; X64-HSW-NEXT:    # kill: def $edi killed $edi def $rdi
-; X64-HSW-NEXT:    leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
-; X64-HSW-NEXT:    subl %eax, %esi # sched: [1:0.25]
 ; X64-HSW-NEXT:    movl %esi, %eax # sched: [1:0.25]
+; X64-HSW-NEXT:    # kill: def $edi killed $edi def $rdi
+; X64-HSW-NEXT:    leal (%rdi,%rdi,8), %ecx # sched: [1:0.50]
+; X64-HSW-NEXT:    subl %ecx, %eax # sched: [1:0.25]
 ; X64-HSW-NEXT:    retq # sched: [7:1.00]
 ;
 ; X64-JAG-LABEL: mul_neg_fold:
 ; X64-JAG:       # %bb.0:
 ; X64-JAG-NEXT:    # kill: def $edi killed $edi def $rdi
-; X64-JAG-NEXT:    leal (%rdi,%rdi,8), %eax # sched: [2:1.00]
-; X64-JAG-NEXT:    subl %eax, %esi # sched: [1:0.50]
+; X64-JAG-NEXT:    leal (%rdi,%rdi,8), %ecx # sched: [2:1.00]
 ; X64-JAG-NEXT:    movl %esi, %eax # sched: [1:0.50]
+; X64-JAG-NEXT:    subl %ecx, %eax # sched: [1:0.50]
 ; X64-JAG-NEXT:    retq # sched: [4:1.00]
 ;
 ; X86-NOOPT-LABEL: mul_neg_fold:
@@ -2235,9 +2235,9 @@ define i32 @mul_neg_fold(i32 %a, i32 %b)
 ; X64-SLM-LABEL: mul_neg_fold:
 ; X64-SLM:       # %bb.0:
 ; X64-SLM-NEXT:    # kill: def $edi killed $edi def $rdi
-; X64-SLM-NEXT:    leal (%rdi,%rdi,8), %eax # sched: [1:1.00]
-; X64-SLM-NEXT:    subl %eax, %esi # sched: [1:0.50]
 ; X64-SLM-NEXT:    movl %esi, %eax # sched: [1:0.50]
+; X64-SLM-NEXT:    leal (%rdi,%rdi,8), %ecx # sched: [1:1.00]
+; X64-SLM-NEXT:    subl %ecx, %eax # sched: [1:0.50]
 ; X64-SLM-NEXT:    retq # sched: [4:1.00]
 ;
 ; SLM-NOOPT-LABEL: mul_neg_fold:

Modified: llvm/trunk/test/CodeGen/X86/mul-constant-i64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/mul-constant-i64.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/mul-constant-i64.ll (original)
+++ llvm/trunk/test/CodeGen/X86/mul-constant-i64.ll Wed Sep 19 11:59:08 2018
@@ -811,14 +811,14 @@ define i64 @test_mul_by_16(i64 %x) {
 ;
 ; X64-HSW-LABEL: test_mul_by_16:
 ; X64-HSW:       # %bb.0:
-; X64-HSW-NEXT:    shlq $4, %rdi # sched: [1:0.50]
 ; X64-HSW-NEXT:    movq %rdi, %rax # sched: [1:0.25]
+; X64-HSW-NEXT:    shlq $4, %rax # sched: [1:0.50]
 ; X64-HSW-NEXT:    retq # sched: [7:1.00]
 ;
 ; X64-JAG-LABEL: test_mul_by_16:
 ; X64-JAG:       # %bb.0:
-; X64-JAG-NEXT:    shlq $4, %rdi # sched: [1:0.50]
 ; X64-JAG-NEXT:    movq %rdi, %rax # sched: [1:0.50]
+; X64-JAG-NEXT:    shlq $4, %rax # sched: [1:0.50]
 ; X64-JAG-NEXT:    retq # sched: [4:1.00]
 ;
 ; X86-NOOPT-LABEL: test_mul_by_16:
@@ -831,26 +831,26 @@ define i64 @test_mul_by_16(i64 %x) {
 ;
 ; HSW-NOOPT-LABEL: test_mul_by_16:
 ; HSW-NOOPT:       # %bb.0:
-; HSW-NOOPT-NEXT:    shlq $4, %rdi # sched: [1:0.50]
 ; HSW-NOOPT-NEXT:    movq %rdi, %rax # sched: [1:0.25]
+; HSW-NOOPT-NEXT:    shlq $4, %rax # sched: [1:0.50]
 ; HSW-NOOPT-NEXT:    retq # sched: [7:1.00]
 ;
 ; JAG-NOOPT-LABEL: test_mul_by_16:
 ; JAG-NOOPT:       # %bb.0:
-; JAG-NOOPT-NEXT:    shlq $4, %rdi # sched: [1:0.50]
 ; JAG-NOOPT-NEXT:    movq %rdi, %rax # sched: [1:0.50]
+; JAG-NOOPT-NEXT:    shlq $4, %rax # sched: [1:0.50]
 ; JAG-NOOPT-NEXT:    retq # sched: [4:1.00]
 ;
 ; X64-SLM-LABEL: test_mul_by_16:
 ; X64-SLM:       # %bb.0:
-; X64-SLM-NEXT:    shlq $4, %rdi # sched: [1:1.00]
 ; X64-SLM-NEXT:    movq %rdi, %rax # sched: [1:0.50]
+; X64-SLM-NEXT:    shlq $4, %rax # sched: [1:1.00]
 ; X64-SLM-NEXT:    retq # sched: [4:1.00]
 ;
 ; SLM-NOOPT-LABEL: test_mul_by_16:
 ; SLM-NOOPT:       # %bb.0:
-; SLM-NOOPT-NEXT:    shlq $4, %rdi # sched: [1:1.00]
 ; SLM-NOOPT-NEXT:    movq %rdi, %rax # sched: [1:0.50]
+; SLM-NOOPT-NEXT:    shlq $4, %rax # sched: [1:1.00]
 ; SLM-NOOPT-NEXT:    retq # sched: [4:1.00]
   %mul = mul nsw i64 %x, 16
   ret i64 %mul
@@ -1716,14 +1716,14 @@ define i64 @test_mul_by_32(i64 %x) {
 ;
 ; X64-HSW-LABEL: test_mul_by_32:
 ; X64-HSW:       # %bb.0:
-; X64-HSW-NEXT:    shlq $5, %rdi # sched: [1:0.50]
 ; X64-HSW-NEXT:    movq %rdi, %rax # sched: [1:0.25]
+; X64-HSW-NEXT:    shlq $5, %rax # sched: [1:0.50]
 ; X64-HSW-NEXT:    retq # sched: [7:1.00]
 ;
 ; X64-JAG-LABEL: test_mul_by_32:
 ; X64-JAG:       # %bb.0:
-; X64-JAG-NEXT:    shlq $5, %rdi # sched: [1:0.50]
 ; X64-JAG-NEXT:    movq %rdi, %rax # sched: [1:0.50]
+; X64-JAG-NEXT:    shlq $5, %rax # sched: [1:0.50]
 ; X64-JAG-NEXT:    retq # sched: [4:1.00]
 ;
 ; X86-NOOPT-LABEL: test_mul_by_32:
@@ -1736,26 +1736,26 @@ define i64 @test_mul_by_32(i64 %x) {
 ;
 ; HSW-NOOPT-LABEL: test_mul_by_32:
 ; HSW-NOOPT:       # %bb.0:
-; HSW-NOOPT-NEXT:    shlq $5, %rdi # sched: [1:0.50]
 ; HSW-NOOPT-NEXT:    movq %rdi, %rax # sched: [1:0.25]
+; HSW-NOOPT-NEXT:    shlq $5, %rax # sched: [1:0.50]
 ; HSW-NOOPT-NEXT:    retq # sched: [7:1.00]
 ;
 ; JAG-NOOPT-LABEL: test_mul_by_32:
 ; JAG-NOOPT:       # %bb.0:
-; JAG-NOOPT-NEXT:    shlq $5, %rdi # sched: [1:0.50]
 ; JAG-NOOPT-NEXT:    movq %rdi, %rax # sched: [1:0.50]
+; JAG-NOOPT-NEXT:    shlq $5, %rax # sched: [1:0.50]
 ; JAG-NOOPT-NEXT:    retq # sched: [4:1.00]
 ;
 ; X64-SLM-LABEL: test_mul_by_32:
 ; X64-SLM:       # %bb.0:
-; X64-SLM-NEXT:    shlq $5, %rdi # sched: [1:1.00]
 ; X64-SLM-NEXT:    movq %rdi, %rax # sched: [1:0.50]
+; X64-SLM-NEXT:    shlq $5, %rax # sched: [1:1.00]
 ; X64-SLM-NEXT:    retq # sched: [4:1.00]
 ;
 ; SLM-NOOPT-LABEL: test_mul_by_32:
 ; SLM-NOOPT:       # %bb.0:
-; SLM-NOOPT-NEXT:    shlq $5, %rdi # sched: [1:1.00]
 ; SLM-NOOPT-NEXT:    movq %rdi, %rax # sched: [1:0.50]
+; SLM-NOOPT-NEXT:    shlq $5, %rax # sched: [1:1.00]
 ; SLM-NOOPT-NEXT:    retq # sched: [4:1.00]
   %mul = mul nsw i64 %x, 32
   ret i64 %mul

Modified: llvm/trunk/test/CodeGen/X86/mul-i1024.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/mul-i1024.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/mul-i1024.ll (original)
+++ llvm/trunk/test/CodeGen/X86/mul-i1024.ll Wed Sep 19 11:59:08 2018
@@ -774,14 +774,15 @@ define void @test_1024(i1024* %a, i1024*
 ; X32-NEXT:    addl %ebx, %esi
 ; X32-NEXT:    movl %edx, %ecx
 ; X32-NEXT:    adcl $0, %ecx
-; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT:    addl %edi, %esi
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT:    addl %ebp, %esi
 ; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    adcl %ebx, %ecx
-; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT:    movl %ebx, %esi
+; X32-NEXT:    setb %bl
 ; X32-NEXT:    addl %eax, %ecx
-; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 1-byte Folded Reload
-; X32-NEXT:    adcl %edx, %ebp
+; X32-NEXT:    movzbl %bl, %edi
+; X32-NEXT:    adcl %edx, %edi
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    movl 88(%eax), %eax
 ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
@@ -789,34 +790,35 @@ define void @test_1024(i1024* %a, i1024*
 ; X32-NEXT:    mull %edx
 ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT:    movl %edi, %esi
-; X32-NEXT:    addl %eax, %esi
-; X32-NEXT:    adcl %edx, %ebx
-; X32-NEXT:    addl %ecx, %esi
-; X32-NEXT:    adcl %ebp, %ebx
+; X32-NEXT:    movl %ebp, %ebx
+; X32-NEXT:    addl %eax, %ebx
+; X32-NEXT:    adcl %edx, %esi
+; X32-NEXT:    addl %ecx, %ebx
+; X32-NEXT:    adcl %edi, %esi
 ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT:    addl %edi, %eax
+; X32-NEXT:    movl %ebp, %ecx
+; X32-NEXT:    addl %ebp, %eax
 ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
 ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
 ; X32-NEXT:    adcl %ebp, %eax
 ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT:    adcl %ebx, %eax
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
 ; X32-NEXT:    adcl %esi, %eax
 ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    movl %esi, %eax
-; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X32-NEXT:    adcl %ebx, %edx
-; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT:    addl %edi, %esi
+; X32-NEXT:    addl %ecx, %esi
 ; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
 ; X32-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
 ; X32-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    movl 84(%eax), %eax
 ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
@@ -860,34 +862,35 @@ define void @test_1024(i1024* %a, i1024*
 ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
 ; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    movl 68(%ecx), %eax
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; X32-NEXT:    movl 68(%ebp), %eax
 ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    xorl %ecx, %ecx
 ; X32-NEXT:    mull %ecx
 ; X32-NEXT:    movl %eax, %esi
-; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT:    addl %ebx, %esi
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT:    addl %edi, %esi
 ; X32-NEXT:    movl %edx, %ecx
 ; X32-NEXT:    adcl $0, %ecx
-; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
-; X32-NEXT:    addl %ebp, %esi
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT:    addl %ebx, %esi
 ; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT:    adcl %ebx, %ecx
+; X32-NEXT:    adcl %edi, %ecx
 ; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
 ; X32-NEXT:    addl %eax, %ecx
 ; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload
 ; X32-NEXT:    adcl %edx, %esi
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    movl 72(%eax), %eax
+; X32-NEXT:    movl 72(%ebp), %eax
 ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    xorl %edx, %edx
 ; X32-NEXT:    mull %edx
 ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    movl %edx, %edi
 ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT:    movl %ebp, %edx
+; X32-NEXT:    movl %ebx, %edx
+; X32-NEXT:    movl %ebx, %ebp
 ; X32-NEXT:    addl %eax, %ebp
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
 ; X32-NEXT:    adcl %edi, %ebx
 ; X32-NEXT:    addl %ecx, %ebp
 ; X32-NEXT:    adcl %esi, %ebx
@@ -1167,14 +1170,13 @@ define void @test_1024(i1024* %a, i1024*
 ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    xorl %ecx, %ecx
 ; X32-NEXT:    mull %ecx
-; X32-NEXT:    movl %eax, %ecx
+; X32-NEXT:    movl %eax, %ebx
 ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT:    addl %esi, %ecx
+; X32-NEXT:    addl %esi, %ebx
 ; X32-NEXT:    movl %edx, %ebp
 ; X32-NEXT:    adcl $0, %ebp
 ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT:    addl %edi, %ecx
-; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    addl %edi, %ebx
 ; X32-NEXT:    adcl %esi, %ebp
 ; X32-NEXT:    setb %cl
 ; X32-NEXT:    addl %eax, %ebp
@@ -1182,7 +1184,6 @@ define void @test_1024(i1024* %a, i1024*
 ; X32-NEXT:    adcl %edx, %eax
 ; X32-NEXT:    addl %edi, %ebp
 ; X32-NEXT:    adcl %esi, %eax
-; X32-NEXT:    movl %eax, %ebx
 ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
 ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
@@ -1191,17 +1192,17 @@ define void @test_1024(i1024* %a, i1024*
 ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
 ; X32-NEXT:    addl %eax, %edi
 ; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-NEXT:    movl %ebx, %edx
 ; X32-NEXT:    adcl %ecx, %edx
 ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    movl %ebp, %edx
 ; X32-NEXT:    adcl $0, %edx
-; X32-NEXT:    movl %ebx, %edi
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
 ; X32-NEXT:    adcl $0, %edi
 ; X32-NEXT:    addl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X32-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    adcl $0, %eax
 ; X32-NEXT:    adcl $0, %ecx
 ; X32-NEXT:    addl %edx, %eax
@@ -1209,12 +1210,12 @@ define void @test_1024(i1024* %a, i1024*
 ; X32-NEXT:    setb %dl
 ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
 ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT:    adcl %ebx, %ecx
 ; X32-NEXT:    movzbl %dl, %eax
 ; X32-NEXT:    adcl %ebp, %eax
 ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
 ; X32-NEXT:    adcl $0, %ebp
-; X32-NEXT:    movl %esi, %ebx
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
 ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
 ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
 ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
@@ -1437,29 +1438,29 @@ define void @test_1024(i1024* %a, i1024*
 ; X32-NEXT:    movl %ecx, (%esp) # 4-byte Spill
 ; X32-NEXT:    adcl %edx, %esi
 ; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT:    movl %esi, %eax
-; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT:    mull %ecx
-; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
 ; X32-NEXT:    movl %ebx, %eax
-; X32-NEXT:    mull %ecx
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT:    mull %edi
+; X32-NEXT:    movl %edx, %esi
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT:    movl %ecx, %eax
+; X32-NEXT:    mull %edi
 ; X32-NEXT:    movl %edx, %edi
 ; X32-NEXT:    movl %eax, %ebp
-; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT:    addl %esi, %ebp
 ; X32-NEXT:    adcl $0, %edi
-; X32-NEXT:    movl %esi, %eax
-; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT:    mull %ecx
+; X32-NEXT:    movl %ebx, %eax
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT:    mull %ebx
 ; X32-NEXT:    movl %edx, %esi
 ; X32-NEXT:    addl %ebp, %eax
 ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    adcl %edi, %esi
 ; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT:    movl %ebx, %eax
-; X32-NEXT:    mull %ecx
+; X32-NEXT:    movl %ecx, %eax
+; X32-NEXT:    mull %ebx
 ; X32-NEXT:    movl %eax, %ebp
 ; X32-NEXT:    addl %esi, %ebp
 ; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
@@ -1477,7 +1478,7 @@ define void @test_1024(i1024* %a, i1024*
 ; X32-NEXT:    mull %esi
 ; X32-NEXT:    movl %edx, %ecx
 ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT:    movl %ebx, %eax
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
 ; X32-NEXT:    mull %esi
 ; X32-NEXT:    movl %edx, %esi
 ; X32-NEXT:    movl %eax, %ebx
@@ -2431,6 +2432,7 @@ define void @test_1024(i1024* %a, i1024*
 ; X32-NEXT:    adcl %ebp, %ecx
 ; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
 ; X32-NEXT:    movl %edi, %eax
+; X32-NEXT:    movl %ebx, %edi
 ; X32-NEXT:    mull %ebx
 ; X32-NEXT:    addl %ecx, %eax
 ; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
@@ -2449,27 +2451,25 @@ define void @test_1024(i1024* %a, i1024*
 ; X32-NEXT:    mull %esi
 ; X32-NEXT:    movl %edx, %ecx
 ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT:    movl %edi, %eax
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
 ; X32-NEXT:    mull %esi
 ; X32-NEXT:    movl %edx, %esi
 ; X32-NEXT:    movl %eax, %ebp
 ; X32-NEXT:    addl %ecx, %ebp
 ; X32-NEXT:    adcl $0, %esi
 ; X32-NEXT:    movl %ebx, %eax
-; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-NEXT:    mull %ebx
+; X32-NEXT:    mull %edi
 ; X32-NEXT:    movl %edx, %ecx
 ; X32-NEXT:    addl %ebp, %eax
 ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    adcl %esi, %ecx
-; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT:    movl %edi, %esi
-; X32-NEXT:    movl %edi, %eax
-; X32-NEXT:    mull %ebx
+; X32-NEXT:    setb %bl
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT:    movl %esi, %eax
+; X32-NEXT:    mull %edi
 ; X32-NEXT:    movl %eax, %ebp
 ; X32-NEXT:    addl %ecx, %ebp
-; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X32-NEXT:    movzbl %bl, %eax
 ; X32-NEXT:    adcl %eax, %edx
 ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
 ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
@@ -2647,30 +2647,30 @@ define void @test_1024(i1024* %a, i1024*
 ; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    adcl %edx, %esi
 ; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT:    movl %esi, %eax
 ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT:    mull %ecx
-; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    movl %ecx, %eax
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT:    mull %ebx
+; X32-NEXT:    movl %edx, %edi
 ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT:    movl %edi, %eax
-; X32-NEXT:    mull %ecx
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT:    movl %esi, %eax
+; X32-NEXT:    mull %ebx
 ; X32-NEXT:    movl %edx, %ebx
 ; X32-NEXT:    movl %eax, %ebp
-; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT:    addl %edi, %ebp
 ; X32-NEXT:    adcl $0, %ebx
-; X32-NEXT:    movl %esi, %eax
-; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT:    mull %esi
+; X32-NEXT:    movl %ecx, %eax
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT:    mull %edi
 ; X32-NEXT:    movl %edx, %ecx
 ; X32-NEXT:    addl %ebp, %eax
 ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    adcl %ebx, %ecx
 ; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT:    movl %edi, %ebp
-; X32-NEXT:    movl %edi, %eax
-; X32-NEXT:    mull %esi
+; X32-NEXT:    movl %esi, %ebp
+; X32-NEXT:    movl %esi, %eax
+; X32-NEXT:    mull %edi
 ; X32-NEXT:    movl %eax, %ebx
 ; X32-NEXT:    addl %ecx, %ebx
 ; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
@@ -2682,8 +2682,7 @@ define void @test_1024(i1024* %a, i1024*
 ; X32-NEXT:    movl %edx, (%esp) # 4-byte Spill
 ; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
 ; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT:    movl %edi, %eax
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
 ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
 ; X32-NEXT:    mull %esi
 ; X32-NEXT:    movl %edx, %ecx
@@ -2694,7 +2693,7 @@ define void @test_1024(i1024* %a, i1024*
 ; X32-NEXT:    movl %eax, %ebp
 ; X32-NEXT:    addl %ecx, %ebp
 ; X32-NEXT:    adcl $0, %esi
-; X32-NEXT:    movl %edi, %eax
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
 ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
 ; X32-NEXT:    mull %ecx
 ; X32-NEXT:    movl %edx, %edi
@@ -3360,29 +3359,29 @@ define void @test_1024(i1024* %a, i1024*
 ; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    adcl %edx, %esi
 ; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT:    movl %esi, %eax
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT:    movl %edi, %eax
 ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
 ; X32-NEXT:    mull %ecx
 ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT:    movl %edi, %eax
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT:    movl %esi, %eax
 ; X32-NEXT:    mull %ecx
 ; X32-NEXT:    movl %edx, %ebx
 ; X32-NEXT:    movl %eax, %ebp
 ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
 ; X32-NEXT:    adcl $0, %ebx
-; X32-NEXT:    movl %esi, %eax
-; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT:    mull %esi
+; X32-NEXT:    movl %edi, %eax
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT:    mull %edi
 ; X32-NEXT:    movl %edx, %ecx
 ; X32-NEXT:    addl %ebp, %eax
 ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    adcl %ebx, %ecx
 ; X32-NEXT:    setb %bl
-; X32-NEXT:    movl %edi, %eax
-; X32-NEXT:    mull %esi
+; X32-NEXT:    movl %esi, %eax
+; X32-NEXT:    mull %edi
 ; X32-NEXT:    movl %eax, %ebp
 ; X32-NEXT:    addl %ecx, %ebp
 ; X32-NEXT:    movzbl %bl, %eax
@@ -3479,20 +3478,20 @@ define void @test_1024(i1024* %a, i1024*
 ; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
 ; X32-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT:    movl %ebp, %eax
 ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT:    movl %ecx, %eax
-; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT:    mull %edi
-; X32-NEXT:    movl %edx, %ebp
+; X32-NEXT:    mull %ecx
+; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
 ; X32-NEXT:    movl %esi, %eax
-; X32-NEXT:    mull %edi
+; X32-NEXT:    mull %ecx
 ; X32-NEXT:    movl %edx, %edi
 ; X32-NEXT:    movl %eax, %ebx
-; X32-NEXT:    addl %ebp, %ebx
+; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
 ; X32-NEXT:    adcl $0, %edi
-; X32-NEXT:    movl %ecx, %eax
+; X32-NEXT:    movl %ebp, %eax
 ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
 ; X32-NEXT:    mull %ecx
 ; X32-NEXT:    movl %edx, %ebp
@@ -3513,30 +3512,30 @@ define void @test_1024(i1024* %a, i1024*
 ; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    adcl %edx, %esi
 ; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT:    movl %esi, %eax
 ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT:    mull %ecx
-; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    movl %ecx, %eax
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT:    mull %ebx
+; X32-NEXT:    movl %edx, %edi
 ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT:    movl %edi, %eax
-; X32-NEXT:    mull %ecx
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT:    movl %esi, %eax
+; X32-NEXT:    mull %ebx
 ; X32-NEXT:    movl %edx, %ebx
 ; X32-NEXT:    movl %eax, %ebp
-; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT:    addl %edi, %ebp
 ; X32-NEXT:    adcl $0, %ebx
-; X32-NEXT:    movl %esi, %eax
-; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT:    mull %esi
+; X32-NEXT:    movl %ecx, %eax
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT:    mull %edi
 ; X32-NEXT:    movl %edx, %ecx
 ; X32-NEXT:    addl %ebp, %eax
 ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    adcl %ebx, %ecx
 ; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT:    movl %edi, %ebp
-; X32-NEXT:    movl %edi, %eax
-; X32-NEXT:    mull %esi
+; X32-NEXT:    movl %esi, %ebp
+; X32-NEXT:    movl %esi, %eax
+; X32-NEXT:    mull %edi
 ; X32-NEXT:    movl %eax, %ebx
 ; X32-NEXT:    addl %ecx, %ebx
 ; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
@@ -4603,36 +4602,37 @@ define void @test_1024(i1024* %a, i1024*
 ; X32-NEXT:    addl %ebp, %eax
 ; X32-NEXT:    movzbl %cl, %ecx
 ; X32-NEXT:    adcl %ecx, %edx
-; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
 ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT:    addl %eax, %esi
-; X32-NEXT:    adcl %edx, %ecx
+; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT:    addl %eax, %ecx
+; X32-NEXT:    adcl %edx, %esi
 ; X32-NEXT:    addl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
 ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
 ; X32-NEXT:    adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT:    adcl $0, %esi
 ; X32-NEXT:    adcl $0, %ecx
-; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT:    adcl $0, %esi
+; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
 ; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
 ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
 ; X32-NEXT:    movl %ecx, %eax
 ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
 ; X32-NEXT:    mull %esi
-; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    movl %edx, %ebx
 ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
 ; X32-NEXT:    movl %ebp, %eax
 ; X32-NEXT:    mull %esi
 ; X32-NEXT:    movl %edx, %esi
 ; X32-NEXT:    movl %eax, %edi
-; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT:    addl %ebx, %edi
 ; X32-NEXT:    adcl $0, %esi
 ; X32-NEXT:    movl %ecx, %eax
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
 ; X32-NEXT:    mull %ebx
 ; X32-NEXT:    movl %edx, %ecx
 ; X32-NEXT:    addl %edi, %eax
@@ -4666,8 +4666,7 @@ define void @test_1024(i1024* %a, i1024*
 ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
 ; X32-NEXT:    adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
 ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
 ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
 ; X32-NEXT:    adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
 ; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
@@ -4735,31 +4734,30 @@ define void @test_1024(i1024* %a, i1024*
 ; X32-NEXT:    adcl %ebx, %edi
 ; X32-NEXT:    setb %bl
 ; X32-NEXT:    movl %esi, %eax
-; X32-NEXT:    movl %esi, %ebp
 ; X32-NEXT:    mull %ecx
 ; X32-NEXT:    addl %edi, %eax
 ; X32-NEXT:    movzbl %bl, %edi
 ; X32-NEXT:    adcl %edi, %edx
 ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
 ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
 ; X32-NEXT:    addl %eax, %edi
-; X32-NEXT:    adcl %edx, %esi
+; X32-NEXT:    adcl %edx, %ecx
 ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
 ; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
 ; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
 ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
 ; X32-NEXT:    movl %ebx, %eax
-; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT:    mull %esi
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT:    mull %ebp
 ; X32-NEXT:    movl %edx, %ecx
 ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT:    movl %ebp, %eax
-; X32-NEXT:    mull %esi
+; X32-NEXT:    movl %esi, %eax
+; X32-NEXT:    mull %ebp
 ; X32-NEXT:    movl %edx, %esi
 ; X32-NEXT:    movl %eax, %edi
 ; X32-NEXT:    addl %ecx, %edi
@@ -4772,7 +4770,7 @@ define void @test_1024(i1024* %a, i1024*
 ; X32-NEXT:    movl %eax, %edi
 ; X32-NEXT:    adcl %esi, %ecx
 ; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT:    movl %ebp, %eax
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
 ; X32-NEXT:    mull %ebx
 ; X32-NEXT:    addl %ecx, %eax
 ; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
@@ -4796,8 +4794,8 @@ define void @test_1024(i1024* %a, i1024*
 ; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
 ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
 ; X32-NEXT:    movl %esi, %eax
-; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT:    mull %edi
+; X32-NEXT:    movl %ebp, %edi
+; X32-NEXT:    mull %ebp
 ; X32-NEXT:    movl %edx, %ecx
 ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
@@ -4817,7 +4815,7 @@ define void @test_1024(i1024* %a, i1024*
 ; X32-NEXT:    setb %bl
 ; X32-NEXT:    movl %ebp, %eax
 ; X32-NEXT:    mull %esi
-; X32-NEXT:    movl %esi, %ebp
+; X32-NEXT:    movl %esi, %edi
 ; X32-NEXT:    addl %ecx, %eax
 ; X32-NEXT:    movzbl %bl, %ecx
 ; X32-NEXT:    adcl %ecx, %edx
@@ -4836,41 +4834,41 @@ define void @test_1024(i1024* %a, i1024*
 ; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    adcl $0, %ecx
 ; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT:    imull %eax, %ebp
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT:    imull %ebp, %edi
+; X32-NEXT:    movl %ebp, %eax
 ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
 ; X32-NEXT:    mull %ecx
 ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT:    addl %ebp, %edx
+; X32-NEXT:    addl %edi, %edx
 ; X32-NEXT:    imull {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
 ; X32-NEXT:    addl %edx, %ecx
-; X32-NEXT:    movl %ecx, %ebp
+; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
 ; X32-NEXT:    movl %eax, %esi
 ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
 ; X32-NEXT:    imull %ebx, %esi
 ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
 ; X32-NEXT:    mull %edi
-; X32-NEXT:    movl %eax, %ecx
 ; X32-NEXT:    addl %esi, %edx
-; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT:    imull %edi, %esi
-; X32-NEXT:    addl %edx, %esi
-; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT:    imull %edi, %ecx
+; X32-NEXT:    addl %edx, %ecx
+; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
 ; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT:    adcl %ebp, %esi
-; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    movl %edi, %eax
 ; X32-NEXT:    movl %edi, %esi
-; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT:    mull %edi
-; X32-NEXT:    movl %edx, %ecx
+; X32-NEXT:    mull %ebp
+; X32-NEXT:    movl %edx, %edi
 ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    movl %ebx, %eax
-; X32-NEXT:    mull %edi
+; X32-NEXT:    movl %ebx, %ecx
+; X32-NEXT:    mull %ebp
 ; X32-NEXT:    movl %edx, %ebp
 ; X32-NEXT:    movl %eax, %ebx
-; X32-NEXT:    addl %ecx, %ebx
+; X32-NEXT:    addl %edi, %ebx
 ; X32-NEXT:    adcl $0, %ebp
 ; X32-NEXT:    movl %esi, %eax
 ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
@@ -4880,7 +4878,7 @@ define void @test_1024(i1024* %a, i1024*
 ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    adcl %ebp, %esi
 ; X32-NEXT:    setb %bl
-; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT:    movl %ecx, %eax
 ; X32-NEXT:    mull %edi
 ; X32-NEXT:    addl %esi, %eax
 ; X32-NEXT:    movzbl %bl, %ecx
@@ -5647,7 +5645,6 @@ define void @test_1024(i1024* %a, i1024*
 ; X32-NEXT:    adcl %esi, %ecx
 ; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
 ; X32-NEXT:    movl %ebp, %eax
-; X32-NEXT:    movl %ebp, %esi
 ; X32-NEXT:    mull %ebx
 ; X32-NEXT:    movl %eax, %edi
 ; X32-NEXT:    addl %ecx, %edi
@@ -5660,19 +5657,19 @@ define void @test_1024(i1024* %a, i1024*
 ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
 ; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
-; X32-NEXT:    movl %ebp, %eax
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT:    movl %ebx, %eax
 ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
 ; X32-NEXT:    mull %ecx
-; X32-NEXT:    movl %edx, %ebx
+; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT:    movl %esi, %eax
+; X32-NEXT:    movl %ebp, %eax
 ; X32-NEXT:    mull %ecx
 ; X32-NEXT:    movl %edx, %esi
 ; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    addl %ebx, %ecx
+; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
 ; X32-NEXT:    adcl $0, %esi
-; X32-NEXT:    movl %ebp, %eax
+; X32-NEXT:    movl %ebx, %eax
 ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
 ; X32-NEXT:    mull %ebp
 ; X32-NEXT:    movl %edx, %ebx
@@ -5818,7 +5815,6 @@ define void @test_1024(i1024* %a, i1024*
 ; X32-NEXT:    adcl %esi, %ecx
 ; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
 ; X32-NEXT:    movl %edi, %eax
-; X32-NEXT:    movl %edi, %esi
 ; X32-NEXT:    mull %ebp
 ; X32-NEXT:    movl %eax, %ebx
 ; X32-NEXT:    addl %ecx, %ebx
@@ -5831,19 +5827,19 @@ define void @test_1024(i1024* %a, i1024*
 ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
 ; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT:    movl %edi, %eax
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT:    movl %ebp, %eax
 ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
 ; X32-NEXT:    mull %ecx
-; X32-NEXT:    movl %edx, %ebp
+; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT:    movl %esi, %eax
+; X32-NEXT:    movl %edi, %eax
 ; X32-NEXT:    mull %ecx
 ; X32-NEXT:    movl %edx, %esi
 ; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    addl %ebp, %ecx
+; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
 ; X32-NEXT:    adcl $0, %esi
-; X32-NEXT:    movl %edi, %eax
+; X32-NEXT:    movl %ebp, %eax
 ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
 ; X32-NEXT:    mull %edi
 ; X32-NEXT:    movl %edx, %ebp
@@ -6003,20 +5999,19 @@ define void @test_1024(i1024* %a, i1024*
 ; X32-NEXT:    addl %ecx, %eax
 ; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
 ; X32-NEXT:    adcl %ecx, %edx
-; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
 ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
 ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT:    addl %eax, %edi
+; X32-NEXT:    addl %eax, %ebp
 ; X32-NEXT:    adcl %edx, %ecx
-; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
 ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
 ; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
 ; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ebp
-; X32-NEXT:    movl 104(%ebp), %ecx
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    movl 104(%eax), %ecx
 ; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
 ; X32-NEXT:    movl %edi, %eax
@@ -6029,7 +6024,8 @@ define void @test_1024(i1024* %a, i1024*
 ; X32-NEXT:    movl %eax, %ecx
 ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
 ; X32-NEXT:    adcl $0, %ebx
-; X32-NEXT:    movl 108(%ebp), %esi
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    movl 108(%eax), %esi
 ; X32-NEXT:    movl %edi, %eax
 ; X32-NEXT:    mull %esi
 ; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
@@ -6057,8 +6053,7 @@ define void @test_1024(i1024* %a, i1024*
 ; X32-NEXT:    adcl %edx, %eax
 ; X32-NEXT:    addl %esi, %edi
 ; X32-NEXT:    adcl %ecx, %eax
-; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT:    addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT:    addl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
 ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
 ; X32-NEXT:    adcl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
 ; X32-NEXT:    adcl $0, %edi
@@ -6110,12 +6105,11 @@ define void @test_1024(i1024* %a, i1024*
 ; X32-NEXT:    adcl $0, %ecx
 ; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT:    movl %ebx, %ecx
-; X32-NEXT:    imull %eax, %ecx
+; X32-NEXT:    imull %eax, %ebx
 ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
 ; X32-NEXT:    mull %esi
 ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT:    addl %ecx, %edx
+; X32-NEXT:    addl %ebx, %edx
 ; X32-NEXT:    imull {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
 ; X32-NEXT:    addl %edx, %esi
 ; X32-NEXT:    movl %esi, %ebx
@@ -6161,12 +6155,11 @@ define void @test_1024(i1024* %a, i1024*
 ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
 ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X32-NEXT:    movl 124(%edx), %ecx
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X32-NEXT:    movl 124(%edi), %ecx
 ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
 ; X32-NEXT:    imull %eax, %ecx
-; X32-NEXT:    movl 120(%edx), %esi
-; X32-NEXT:    movl %edx, %edi
+; X32-NEXT:    movl 120(%edi), %esi
 ; X32-NEXT:    mull %esi
 ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    addl %ecx, %edx
@@ -6230,25 +6223,25 @@ define void @test_1024(i1024* %a, i1024*
 ; X32-NEXT:    movl %ebp, %eax
 ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
 ; X32-NEXT:    mull %ecx
-; X32-NEXT:    movl %edx, %edi
+; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
 ; X32-NEXT:    movl %esi, %eax
 ; X32-NEXT:    mull %ecx
-; X32-NEXT:    movl %edx, %ecx
+; X32-NEXT:    movl %edx, %edi
 ; X32-NEXT:    movl %eax, %ebx
-; X32-NEXT:    addl %edi, %ebx
-; X32-NEXT:    adcl $0, %ecx
+; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT:    adcl $0, %edi
 ; X32-NEXT:    movl %ebp, %eax
-; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT:    mull %edi
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT:    mull %ecx
 ; X32-NEXT:    movl %edx, %ebp
 ; X32-NEXT:    addl %ebx, %eax
 ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT:    adcl %ecx, %ebp
+; X32-NEXT:    adcl %edi, %ebp
 ; X32-NEXT:    setb %bl
 ; X32-NEXT:    movl %esi, %eax
-; X32-NEXT:    mull %edi
+; X32-NEXT:    mull %ecx
 ; X32-NEXT:    addl %ebp, %eax
 ; X32-NEXT:    movzbl %bl, %edi
 ; X32-NEXT:    adcl %edi, %edx
@@ -6264,26 +6257,26 @@ define void @test_1024(i1024* %a, i1024*
 ; X32-NEXT:    movl %edi, %eax
 ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
 ; X32-NEXT:    mull %ebx
-; X32-NEXT:    movl %edx, %ecx
+; X32-NEXT:    movl %edx, %esi
 ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT:    movl %esi, %eax
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT:    movl %ecx, %eax
 ; X32-NEXT:    mull %ebx
 ; X32-NEXT:    movl %edx, %ebx
 ; X32-NEXT:    movl %eax, %ebp
-; X32-NEXT:    addl %ecx, %ebp
+; X32-NEXT:    addl %esi, %ebp
 ; X32-NEXT:    adcl $0, %ebx
 ; X32-NEXT:    movl %edi, %eax
-; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT:    mull %ecx
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT:    mull %esi
 ; X32-NEXT:    movl %edx, %edi
 ; X32-NEXT:    addl %ebp, %eax
 ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    adcl %ebx, %edi
 ; X32-NEXT:    setb %bl
-; X32-NEXT:    movl %esi, %eax
-; X32-NEXT:    movl %esi, %ebp
-; X32-NEXT:    mull %ecx
+; X32-NEXT:    movl %ecx, %eax
+; X32-NEXT:    movl %ecx, %ebp
+; X32-NEXT:    mull %esi
 ; X32-NEXT:    addl %edi, %eax
 ; X32-NEXT:    movzbl %bl, %edi
 ; X32-NEXT:    adcl %edi, %edx
@@ -6321,51 +6314,51 @@ define void @test_1024(i1024* %a, i1024*
 ; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
 ; X32-NEXT:    movl %ebp, %eax
 ; X32-NEXT:    mull %ebx
+; X32-NEXT:    movl %ebx, %ebp
 ; X32-NEXT:    addl %ecx, %eax
 ; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
 ; X32-NEXT:    adcl %ecx, %edx
-; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
 ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-NEXT:    addl %eax, %esi
-; X32-NEXT:    adcl %edx, %ecx
+; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT:    addl %eax, %ecx
+; X32-NEXT:    adcl %edx, %esi
 ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
 ; X32-NEXT:    addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
 ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
 ; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT:    adcl $0, %esi
 ; X32-NEXT:    adcl $0, %ecx
-; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT:    adcl $0, %esi
+; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
 ; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT:    movl %ecx, %eax
-; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X32-NEXT:    mull %esi
-; X32-NEXT:    movl %edx, %ebp
-; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
 ; X32-NEXT:    movl %ebx, %eax
-; X32-NEXT:    mull %esi
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT:    mull %ecx
+; X32-NEXT:    movl %edx, %edi
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT:    mull %ecx
 ; X32-NEXT:    movl %edx, %esi
-; X32-NEXT:    movl %eax, %edi
-; X32-NEXT:    addl %ebp, %edi
+; X32-NEXT:    movl %eax, %ecx
+; X32-NEXT:    addl %edi, %ecx
 ; X32-NEXT:    adcl $0, %esi
-; X32-NEXT:    movl %ecx, %eax
-; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
-; X32-NEXT:    mull %ebp
-; X32-NEXT:    movl %edx, %ecx
-; X32-NEXT:    addl %edi, %eax
-; X32-NEXT:    movl %eax, %edi
-; X32-NEXT:    adcl %esi, %ecx
-; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
 ; X32-NEXT:    movl %ebx, %eax
+; X32-NEXT:    movl %ebp, %edi
 ; X32-NEXT:    mull %ebp
-; X32-NEXT:    addl %ecx, %eax
-; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X32-NEXT:    movl %edx, %ebx
+; X32-NEXT:    movl %eax, %ebp
+; X32-NEXT:    addl %ecx, %ebp
+; X32-NEXT:    adcl %esi, %ebx
+; X32-NEXT:    setb %cl
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT:    mull %edi
+; X32-NEXT:    addl %ebx, %eax
+; X32-NEXT:    movzbl %cl, %ecx
 ; X32-NEXT:    adcl %ecx, %edx
 ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
 ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
@@ -6375,20 +6368,19 @@ define void @test_1024(i1024* %a, i1024*
 ; X32-NEXT:    adcl %edx, %ecx
 ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
 ; X32-NEXT:    addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
 ; X32-NEXT:    adcl %eax, %esi
 ; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    adcl $0, %ecx
 ; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-NEXT:    movl %ebp, %ecx
-; X32-NEXT:    imull %eax, %ecx
+; X32-NEXT:    imull %eax, %edi
 ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
 ; X32-NEXT:    mull %esi
 ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT:    addl %ecx, %edx
+; X32-NEXT:    addl %edi, %edx
 ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
 ; X32-NEXT:    imull %ebp, %esi
 ; X32-NEXT:    addl %edx, %esi
@@ -6533,12 +6525,12 @@ define void @test_1024(i1024* %a, i1024*
 ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
 ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
 ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
 ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
 ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
 ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
 ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
 ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
 ; X32-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
@@ -6572,12 +6564,12 @@ define void @test_1024(i1024* %a, i1024*
 ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
 ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
 ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
 ; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
 ; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
 ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
 ; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
@@ -6704,6 +6696,7 @@ define void @test_1024(i1024* %a, i1024*
 ; X64-NEXT:    adcq $0, %rbp
 ; X64-NEXT:    addq %rcx, %rbx
 ; X64-NEXT:    movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %rcx, %r11
 ; X64-NEXT:    adcq %rdi, %rbp
 ; X64-NEXT:    setb %bl
 ; X64-NEXT:    movzbl %bl, %ebx
@@ -6713,17 +6706,16 @@ define void @test_1024(i1024* %a, i1024*
 ; X64-NEXT:    mulq %r8
 ; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; X64-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    addq %rax, %rcx
+; X64-NEXT:    movq %rdi, %r14
+; X64-NEXT:    movq %rdi, %r8
+; X64-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    adcq %rdx, %r14
+; X64-NEXT:    addq %rbp, %rcx
 ; X64-NEXT:    movq %rcx, %r12
-; X64-NEXT:    movq %rcx, %r8
-; X64-NEXT:    addq %rax, %r12
-; X64-NEXT:    movq %rdi, %rax
-; X64-NEXT:    movq %rdi, %r9
-; X64-NEXT:    movq %rdi, (%rsp) # 8-byte Spill
-; X64-NEXT:    adcq %rdx, %rax
-; X64-NEXT:    addq %rbp, %r12
-; X64-NEXT:    movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT:    adcq %rbx, %rax
-; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    adcq %rbx, %r14
+; X64-NEXT:    movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; X64-NEXT:    movq (%rsi), %rax
 ; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; X64-NEXT:    xorl %ebp, %ebp
@@ -6733,7 +6725,7 @@ define void @test_1024(i1024* %a, i1024*
 ; X64-NEXT:    movq 8(%rsi), %rax
 ; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; X64-NEXT:    mulq %rbp
-; X64-NEXT:    xorl %r11d, %r11d
+; X64-NEXT:    xorl %r9d, %r9d
 ; X64-NEXT:    movq %rax, %r15
 ; X64-NEXT:    addq %rcx, %r15
 ; X64-NEXT:    movq %rdx, %rbp
@@ -6749,22 +6741,21 @@ define void @test_1024(i1024* %a, i1024*
 ; X64-NEXT:    movq %rsi, %r13
 ; X64-NEXT:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT:    mulq %r11
+; X64-NEXT:    mulq %r9
 ; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; X64-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT:    movq %rdi, %r14
-; X64-NEXT:    addq %rax, %r14
-; X64-NEXT:    movq %rcx, %r11
-; X64-NEXT:    adcq %rdx, %r11
-; X64-NEXT:    addq %rbp, %r14
-; X64-NEXT:    adcq %rbx, %r11
-; X64-NEXT:    movq %r8, %rax
-; X64-NEXT:    movq %r8, %rbp
-; X64-NEXT:    movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %rdi, %r9
+; X64-NEXT:    addq %rax, %r9
+; X64-NEXT:    movq %rcx, %rax
+; X64-NEXT:    adcq %rdx, %rax
+; X64-NEXT:    addq %rbp, %r9
+; X64-NEXT:    adcq %rbx, %rax
+; X64-NEXT:    movq %rax, %rbp
+; X64-NEXT:    movq %r11, %rax
+; X64-NEXT:    movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; X64-NEXT:    addq %rdi, %rax
-; X64-NEXT:    movq %r9, %rax
-; X64-NEXT:    adcq %rcx, %rax
-; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    adcq %rcx, %r8
+; X64-NEXT:    movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; X64-NEXT:    movq (%r10), %rax
 ; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; X64-NEXT:    xorl %r8d, %r8d
@@ -6772,44 +6763,44 @@ define void @test_1024(i1024* %a, i1024*
 ; X64-NEXT:    movq %rdx, %rsi
 ; X64-NEXT:    movq %rax, %rbx
 ; X64-NEXT:    addq %rdi, %rax
-; X64-NEXT:    movq %rdi, %r9
 ; X64-NEXT:    movq %rdx, %rax
 ; X64-NEXT:    adcq %rcx, %rax
 ; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; X64-NEXT:    movq 32(%r13), %rax
 ; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; X64-NEXT:    mulq %r8
-; X64-NEXT:    xorl %r8d, %r8d
+; X64-NEXT:    xorl %ecx, %ecx
 ; X64-NEXT:    movq %rax, %r13
 ; X64-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; X64-NEXT:    movq %rbx, %rax
-; X64-NEXT:    movq %rbx, %rcx
+; X64-NEXT:    movq %rbx, %r8
 ; X64-NEXT:    addq %r13, %rax
 ; X64-NEXT:    movq %rsi, %rax
 ; X64-NEXT:    adcq %rdx, %rax
 ; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT:    movq %rbp, %rax
+; X64-NEXT:    movq %r11, %rax
 ; X64-NEXT:    addq %rdi, %rax
 ; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %rdi, %r11
 ; X64-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
 ; X64-NEXT:    adcq %r15, %rax
 ; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT:    adcq %r14, %r12
-; X64-NEXT:    movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
-; X64-NEXT:    adcq %r11, %rax
+; X64-NEXT:    movq %r12, %rax
+; X64-NEXT:    adcq %r9, %rax
 ; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT:    movq %r11, %rdi
+; X64-NEXT:    adcq %rbp, %r14
+; X64-NEXT:    movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %rbp, %rdi
 ; X64-NEXT:    movq 8(%r10), %rax
 ; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; X64-NEXT:    movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT:    mulq %r8
-; X64-NEXT:    movq %rax, %r11
-; X64-NEXT:    addq %rsi, %r11
+; X64-NEXT:    mulq %rcx
+; X64-NEXT:    movq %rax, %r12
+; X64-NEXT:    addq %rsi, %r12
 ; X64-NEXT:    movq %rdx, %rbp
 ; X64-NEXT:    adcq $0, %rbp
-; X64-NEXT:    addq %rbx, %r11
+; X64-NEXT:    addq %rbx, %r12
 ; X64-NEXT:    adcq %rsi, %rbp
 ; X64-NEXT:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; X64-NEXT:    setb %bl
@@ -6818,92 +6809,91 @@ define void @test_1024(i1024* %a, i1024*
 ; X64-NEXT:    adcq %rdx, %rbx
 ; X64-NEXT:    movq 16(%r10), %rax
 ; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT:    mulq %r8
+; X64-NEXT:    mulq %rcx
 ; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; X64-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT:    movq %rcx, %r8
-; X64-NEXT:    addq %rax, %r8
+; X64-NEXT:    movq %r8, %rcx
+; X64-NEXT:    addq %rax, %rcx
 ; X64-NEXT:    movq %rsi, %r10
 ; X64-NEXT:    adcq %rdx, %r10
-; X64-NEXT:    addq %rbp, %r8
-; X64-NEXT:    movq %r8, %rax
+; X64-NEXT:    addq %rbp, %rcx
 ; X64-NEXT:    adcq %rbx, %r10
-; X64-NEXT:    movq %rcx, %rdx
-; X64-NEXT:    movq %rcx, %r12
-; X64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT:    addq %r9, %rdx
+; X64-NEXT:    movq %r8, %rdx
+; X64-NEXT:    movq %r8, %r14
+; X64-NEXT:    movq %r8, (%rsp) # 8-byte Spill
+; X64-NEXT:    addq %r11, %rdx
 ; X64-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT:    movq %r11, %r8
-; X64-NEXT:    adcq %r11, %r15
+; X64-NEXT:    movq %r12, %rsi
+; X64-NEXT:    adcq %r12, %r15
 ; X64-NEXT:    movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT:    adcq %rax, %r14
-; X64-NEXT:    movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT:    movq %rax, %rcx
+; X64-NEXT:    adcq %rcx, %r9
+; X64-NEXT:    movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %rcx, %r8
 ; X64-NEXT:    adcq %r10, %rdi
 ; X64-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
-; X64-NEXT:    movq 40(%rsi), %rax
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
+; X64-NEXT:    movq 40(%rdi), %rax
 ; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT:    xorl %r14d, %r14d
-; X64-NEXT:    mulq %r14
-; X64-NEXT:    movq %rax, %rdi
-; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload
-; X64-NEXT:    addq %r9, %rdi
+; X64-NEXT:    xorl %r9d, %r9d
+; X64-NEXT:    mulq %r9
+; X64-NEXT:    movq %rax, %rcx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload
+; X64-NEXT:    addq %r11, %rcx
 ; X64-NEXT:    movq %rdx, %rbp
 ; X64-NEXT:    adcq $0, %rbp
-; X64-NEXT:    addq %r13, %rdi
-; X64-NEXT:    adcq %r9, %rbp
+; X64-NEXT:    addq %r13, %rcx
+; X64-NEXT:    adcq %r11, %rbp
 ; X64-NEXT:    setb %bl
 ; X64-NEXT:    addq %rax, %rbp
-; X64-NEXT:    movzbl %bl, %r11d
-; X64-NEXT:    adcq %rdx, %r11
-; X64-NEXT:    movq 48(%rsi), %rax
+; X64-NEXT:    movzbl %bl, %ebx
+; X64-NEXT:    adcq %rdx, %rbx
+; X64-NEXT:    movq 48(%rdi), %rax
 ; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT:    mulq %r14
+; X64-NEXT:    mulq %r9
 ; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; X64-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT:    movq %r13, %rbx
-; X64-NEXT:    addq %rax, %rbx
-; X64-NEXT:    movq %r9, %rsi
-; X64-NEXT:    adcq %rdx, %rsi
-; X64-NEXT:    addq %rbp, %rbx
-; X64-NEXT:    adcq %r11, %rsi
+; X64-NEXT:    movq %r13, %r12
+; X64-NEXT:    addq %rax, %r12
+; X64-NEXT:    movq %r11, %rdi
+; X64-NEXT:    adcq %rdx, %rdi
+; X64-NEXT:    addq %rbp, %r12
+; X64-NEXT:    adcq %rbx, %rdi
 ; X64-NEXT:    movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT:    addq %r13, %r12
-; X64-NEXT:    movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT:    adcq %rdi, %r8
+; X64-NEXT:    addq %r13, %r14
+; X64-NEXT:    movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    adcq %rcx, %rsi
+; X64-NEXT:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    adcq %r12, %r8
 ; X64-NEXT:    movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT:    adcq %rbx, %rcx
-; X64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT:    adcq %rsi, %r10
+; X64-NEXT:    adcq %rdi, %r10
 ; X64-NEXT:    movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
-; X64-NEXT:    movq %rdx, %rax
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; X64-NEXT:    movq %r8, %rax
 ; X64-NEXT:    addq %r13, %rax
-; X64-NEXT:    movq (%rsp), %rax # 8-byte Reload
-; X64-NEXT:    adcq %r9, %rax
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    adcq %r11, %rax
 ; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT:    movq %rdx, %rax
+; X64-NEXT:    movq %r8, %rax
+; X64-NEXT:    movq %r8, %r10
 ; X64-NEXT:    addq %r13, %rax
 ; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    adcq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload
+; X64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    adcq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Folded Reload
+; X64-NEXT:    movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; X64-NEXT:    adcq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Folded Reload
 ; X64-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT:    adcq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Folded Reload
-; X64-NEXT:    movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT:    adcq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload
-; X64-NEXT:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
 ; X64-NEXT:    movq %rcx, %rax
-; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
-; X64-NEXT:    mulq %rdi
-; X64-NEXT:    movq %rax, %r9
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload
+; X64-NEXT:    mulq %r9
+; X64-NEXT:    movq %rax, %r14
 ; X64-NEXT:    movq %rdx, %rsi
 ; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
 ; X64-NEXT:    movq 56(%rax), %r11
 ; X64-NEXT:    movq %r11, %rax
 ; X64-NEXT:    movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT:    mulq %rdi
-; X64-NEXT:    movq %rdi, %r10
+; X64-NEXT:    mulq %r9
 ; X64-NEXT:    movq %rdx, %rbp
 ; X64-NEXT:    movq %rax, %rbx
 ; X64-NEXT:    addq %rsi, %rbx
@@ -6918,7 +6908,7 @@ define void @test_1024(i1024* %a, i1024*
 ; X64-NEXT:    setb %cl
 ; X64-NEXT:    movq %r11, %rax
 ; X64-NEXT:    mulq %rdi
-; X64-NEXT:    movq %rdi, %r11
+; X64-NEXT:    movq %rdi, %r13
 ; X64-NEXT:    addq %rsi, %rax
 ; X64-NEXT:    movzbl %cl, %ecx
 ; X64-NEXT:    adcq %rcx, %rdx
@@ -6930,26 +6920,26 @@ define void @test_1024(i1024* %a, i1024*
 ; X64-NEXT:    adcq %rdx, %r12
 ; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
 ; X64-NEXT:    movq %rcx, %rax
-; X64-NEXT:    mulq %r10
+; X64-NEXT:    mulq %r9
 ; X64-NEXT:    movq %rdx, %rsi
 ; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
 ; X64-NEXT:    movq %rdi, %rax
-; X64-NEXT:    mulq %r10
+; X64-NEXT:    mulq %r9
 ; X64-NEXT:    movq %rdx, %rbp
 ; X64-NEXT:    movq %rax, %rbx
 ; X64-NEXT:    addq %rsi, %rbx
 ; X64-NEXT:    adcq $0, %rbp
 ; X64-NEXT:    movq %rcx, %rax
-; X64-NEXT:    movq %rcx, %r10
-; X64-NEXT:    mulq %r11
+; X64-NEXT:    movq %rcx, %r11
+; X64-NEXT:    mulq %r13
 ; X64-NEXT:    movq %rdx, %rcx
 ; X64-NEXT:    addq %rbx, %rax
 ; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; X64-NEXT:    adcq %rbp, %rcx
 ; X64-NEXT:    setb %bl
 ; X64-NEXT:    movq %rdi, %rax
-; X64-NEXT:    mulq %r11
+; X64-NEXT:    mulq %r13
 ; X64-NEXT:    movq %rdx, %r13
 ; X64-NEXT:    movq %rax, %rsi
 ; X64-NEXT:    addq %rcx, %rsi
@@ -6957,49 +6947,47 @@ define void @test_1024(i1024* %a, i1024*
 ; X64-NEXT:    adcq %rax, %r13
 ; X64-NEXT:    addq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload
 ; X64-NEXT:    adcq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload
-; X64-NEXT:    addq %r9, %rsi
+; X64-NEXT:    addq %r14, %rsi
 ; X64-NEXT:    adcq %r8, %r13
 ; X64-NEXT:    adcq $0, %r15
 ; X64-NEXT:    adcq $0, %r12
-; X64-NEXT:    movq %r10, %rbx
-; X64-NEXT:    movq %r10, %rax
-; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload
-; X64-NEXT:    mulq %r11
+; X64-NEXT:    movq %r11, %rbx
+; X64-NEXT:    movq %r11, %rax
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
+; X64-NEXT:    mulq %rbp
 ; X64-NEXT:    movq %rdx, %rcx
-; X64-NEXT:    movq %rax, %r10
+; X64-NEXT:    movq %rax, %r14
 ; X64-NEXT:    movq %rdi, %rax
-; X64-NEXT:    movq %rdi, %r9
-; X64-NEXT:    mulq %r11
+; X64-NEXT:    movq %rdi, %r11
+; X64-NEXT:    mulq %rbp
 ; X64-NEXT:    movq %rdx, %rdi
 ; X64-NEXT:    movq %rax, %rbp
 ; X64-NEXT:    addq %rcx, %rbp
 ; X64-NEXT:    adcq $0, %rdi
 ; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
-; X64-NEXT:    movq 24(%rax), %rcx
+; X64-NEXT:    movq 24(%rax), %r9
 ; X64-NEXT:    movq %rbx, %rax
-; X64-NEXT:    mulq %rcx
-; X64-NEXT:    movq %rcx, %rbx
-; X64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    mulq %r9
 ; X64-NEXT:    movq %rdx, %rcx
 ; X64-NEXT:    movq %rax, %r8
 ; X64-NEXT:    addq %rbp, %r8
 ; X64-NEXT:    adcq %rdi, %rcx
-; X64-NEXT:    setb %dil
-; X64-NEXT:    movq %r9, %rax
-; X64-NEXT:    mulq %rbx
+; X64-NEXT:    setb %bl
+; X64-NEXT:    movq %r11, %rax
+; X64-NEXT:    mulq %r9
 ; X64-NEXT:    addq %rcx, %rax
-; X64-NEXT:    movzbl %dil, %ecx
+; X64-NEXT:    movzbl %bl, %ecx
 ; X64-NEXT:    adcq %rcx, %rdx
-; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
-; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload
-; X64-NEXT:    addq %r14, %rbp
-; X64-NEXT:    movq (%rsp), %rbx # 8-byte Reload
-; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload
-; X64-NEXT:    adcq %r9, %rbx
+; X64-NEXT:    movq %r10, %rbp
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload
+; X64-NEXT:    addq %r11, %rbp
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload
+; X64-NEXT:    adcq %r10, %rbx
 ; X64-NEXT:    addq %rax, %rbp
 ; X64-NEXT:    adcq %rdx, %rbx
-; X64-NEXT:    addq %rsi, %r10
-; X64-NEXT:    movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    addq %rsi, %r14
+; X64-NEXT:    movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; X64-NEXT:    adcq %r13, %r8
 ; X64-NEXT:    movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; X64-NEXT:    adcq $0, %rbp
@@ -7009,76 +6997,74 @@ define void @test_1024(i1024* %a, i1024*
 ; X64-NEXT:    setb %r15b
 ; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
 ; X64-NEXT:    movq %rcx, %rax
-; X64-NEXT:    movq %r11, %rsi
-; X64-NEXT:    mulq %r11
-; X64-NEXT:    movq %rdx, %r11
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; X64-NEXT:    mulq %rsi
+; X64-NEXT:    movq %rdx, %r14
 ; X64-NEXT:    movq %rax, %r13
 ; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload
 ; X64-NEXT:    movq %r12, %rax
 ; X64-NEXT:    mulq %rsi
 ; X64-NEXT:    movq %rdx, %rsi
 ; X64-NEXT:    movq %rax, %rdi
-; X64-NEXT:    addq %r11, %rdi
+; X64-NEXT:    addq %r14, %rdi
 ; X64-NEXT:    adcq $0, %rsi
 ; X64-NEXT:    movq %rcx, %rax
-; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
-; X64-NEXT:    mulq %r8
+; X64-NEXT:    mulq %r9
 ; X64-NEXT:    movq %rdx, %rcx
-; X64-NEXT:    movq %rax, %r11
-; X64-NEXT:    addq %rdi, %r11
+; X64-NEXT:    movq %rax, %r14
+; X64-NEXT:    addq %rdi, %r14
 ; X64-NEXT:    adcq %rsi, %rcx
 ; X64-NEXT:    setb %sil
 ; X64-NEXT:    movq %r12, %rax
-; X64-NEXT:    mulq %r8
-; X64-NEXT:    movq %r8, %r12
+; X64-NEXT:    mulq %r9
+; X64-NEXT:    movq %r9, %r12
 ; X64-NEXT:    addq %rcx, %rax
 ; X64-NEXT:    movzbl %sil, %ecx
 ; X64-NEXT:    adcq %rcx, %rdx
 ; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
-; X64-NEXT:    addq %r14, %rcx
-; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload
-; X64-NEXT:    adcq %r9, %r14
+; X64-NEXT:    addq %r11, %rcx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload
+; X64-NEXT:    adcq %r10, %r9
 ; X64-NEXT:    addq %rax, %rcx
-; X64-NEXT:    adcq %rdx, %r14
+; X64-NEXT:    adcq %rdx, %r9
 ; X64-NEXT:    addq %rbp, %r13
-; X64-NEXT:    adcq %rbx, %r11
+; X64-NEXT:    adcq %rbx, %r14
 ; X64-NEXT:    movzbl %r15b, %eax
 ; X64-NEXT:    adcq %rax, %rcx
-; X64-NEXT:    adcq $0, %r14
+; X64-NEXT:    adcq $0, %r9
 ; X64-NEXT:    addq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload
 ; X64-NEXT:    movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT:    adcq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Folded Reload
-; X64-NEXT:    movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT:    adcq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload
-; X64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; X64-NEXT:    adcq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload
 ; X64-NEXT:    movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    adcq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload
+; X64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    adcq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Folded Reload
+; X64-NEXT:    movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
 ; X64-NEXT:    movq %rdi, %rax
-; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
-; X64-NEXT:    mulq %rsi
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload
+; X64-NEXT:    mulq %r9
 ; X64-NEXT:    movq %rax, %r14
 ; X64-NEXT:    movq %rdx, %rbx
 ; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
 ; X64-NEXT:    movq 24(%rax), %rcx
 ; X64-NEXT:    movq %rcx, %rax
 ; X64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT:    mulq %rsi
-; X64-NEXT:    movq %rsi, %r11
+; X64-NEXT:    mulq %r9
 ; X64-NEXT:    movq %rdx, %rsi
 ; X64-NEXT:    movq %rax, %rbp
 ; X64-NEXT:    addq %rbx, %rbp
 ; X64-NEXT:    adcq $0, %rsi
 ; X64-NEXT:    movq %rdi, %rax
-; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload
-; X64-NEXT:    mulq %r9
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload
+; X64-NEXT:    mulq %r11
 ; X64-NEXT:    movq %rdx, %rbx
 ; X64-NEXT:    movq %rax, %r15
 ; X64-NEXT:    addq %rbp, %r15
 ; X64-NEXT:    adcq %rsi, %rbx
 ; X64-NEXT:    setb %sil
 ; X64-NEXT:    movq %rcx, %rax
-; X64-NEXT:    mulq %r9
+; X64-NEXT:    mulq %r11
 ; X64-NEXT:    addq %rbx, %rax
 ; X64-NEXT:    movzbl %sil, %ecx
 ; X64-NEXT:    adcq %rcx, %rdx
@@ -7090,19 +7076,19 @@ define void @test_1024(i1024* %a, i1024*
 ; X64-NEXT:    adcq %rdx, %r10
 ; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
 ; X64-NEXT:    movq %rcx, %rax
-; X64-NEXT:    mulq %r11
+; X64-NEXT:    mulq %r9
 ; X64-NEXT:    movq %rdx, %rdi
 ; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
 ; X64-NEXT:    movq %rsi, %rax
-; X64-NEXT:    mulq %r11
+; X64-NEXT:    mulq %r9
 ; X64-NEXT:    movq %rdx, %rbp
 ; X64-NEXT:    movq %rax, %rbx
 ; X64-NEXT:    addq %rdi, %rbx
 ; X64-NEXT:    adcq $0, %rbp
 ; X64-NEXT:    movq %rcx, %rax
-; X64-NEXT:    movq %rcx, %r11
-; X64-NEXT:    mulq %r9
+; X64-NEXT:    movq %rcx, %r9
+; X64-NEXT:    mulq %r11
 ; X64-NEXT:    movq %rdx, %rdi
 ; X64-NEXT:    addq %rbx, %rax
 ; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
@@ -7110,7 +7096,7 @@ define void @test_1024(i1024* %a, i1024*
 ; X64-NEXT:    setb %cl
 ; X64-NEXT:    movq %rsi, %rax
 ; X64-NEXT:    movq %rsi, %rbp
-; X64-NEXT:    mulq %r9
+; X64-NEXT:    mulq %r11
 ; X64-NEXT:    movq %rdx, %rsi
 ; X64-NEXT:    movq %rax, %rbx
 ; X64-NEXT:    addq %rdi, %rbx
@@ -7122,11 +7108,11 @@ define void @test_1024(i1024* %a, i1024*
 ; X64-NEXT:    adcq %r15, %rsi
 ; X64-NEXT:    adcq $0, %r8
 ; X64-NEXT:    adcq $0, %r10
-; X64-NEXT:    movq %r11, %rax
+; X64-NEXT:    movq %r9, %rax
 ; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
 ; X64-NEXT:    mulq %rdi
 ; X64-NEXT:    movq %rdx, %rcx
-; X64-NEXT:    movq %rax, %r9
+; X64-NEXT:    movq %rax, %r11
 ; X64-NEXT:    movq %rbp, %rax
 ; X64-NEXT:    movq %rbp, %r14
 ; X64-NEXT:    mulq %rdi
@@ -7135,11 +7121,11 @@ define void @test_1024(i1024* %a, i1024*
 ; X64-NEXT:    movq %rax, %rbp
 ; X64-NEXT:    addq %rcx, %rbp
 ; X64-NEXT:    adcq $0, %rdi
-; X64-NEXT:    movq %r11, %rax
+; X64-NEXT:    movq %r9, %rax
 ; X64-NEXT:    mulq %r12
 ; X64-NEXT:    movq %rdx, %rcx
 ; X64-NEXT:    addq %rbp, %rax
-; X64-NEXT:    movq %rax, %r11
+; X64-NEXT:    movq %rax, %r9
 ; X64-NEXT:    adcq %rdi, %rcx
 ; X64-NEXT:    setb %dil
 ; X64-NEXT:    movq %r14, %rax
@@ -7147,7 +7133,7 @@ define void @test_1024(i1024* %a, i1024*
 ; X64-NEXT:    addq %rcx, %rax
 ; X64-NEXT:    movzbl %dil, %ecx
 ; X64-NEXT:    adcq %rcx, %rdx
-; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
+; X64-NEXT:    movq (%rsp), %rdi # 8-byte Reload
 ; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload
 ; X64-NEXT:    addq %r13, %rdi
 ; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
@@ -7155,65 +7141,63 @@ define void @test_1024(i1024* %a, i1024*
 ; X64-NEXT:    adcq %r14, %rbp
 ; X64-NEXT:    addq %rax, %rdi
 ; X64-NEXT:    adcq %rdx, %rbp
-; X64-NEXT:    addq %rbx, %r9
-; X64-NEXT:    movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT:    adcq %rsi, %r11
+; X64-NEXT:    addq %rbx, %r11
 ; X64-NEXT:    movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    adcq %rsi, %r9
+; X64-NEXT:    movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; X64-NEXT:    adcq $0, %rdi
 ; X64-NEXT:    adcq $0, %rbp
 ; X64-NEXT:    addq %r8, %rdi
 ; X64-NEXT:    adcq %r10, %rbp
-; X64-NEXT:    setb %r9b
+; X64-NEXT:    setb %r10b
 ; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
 ; X64-NEXT:    movq %rcx, %rax
 ; X64-NEXT:    mulq %r15
-; X64-NEXT:    movq %rdx, %r10
+; X64-NEXT:    movq %rdx, %r8
 ; X64-NEXT:    movq %rax, %r11
-; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
-; X64-NEXT:    movq %r8, %rax
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload
+; X64-NEXT:    movq %r9, %rax
 ; X64-NEXT:    mulq %r15
 ; X64-NEXT:    movq %rdx, %rsi
 ; X64-NEXT:    movq %rax, %rbx
-; X64-NEXT:    addq %r10, %rbx
+; X64-NEXT:    addq %r8, %rbx
 ; X64-NEXT:    adcq $0, %rsi
 ; X64-NEXT:    movq %rcx, %rax
+; X64-NEXT:    movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; X64-NEXT:    mulq %r12
 ; X64-NEXT:    movq %rdx, %rcx
-; X64-NEXT:    movq %rax, %r15
-; X64-NEXT:    addq %rbx, %r15
+; X64-NEXT:    movq %rax, %r8
+; X64-NEXT:    addq %rbx, %r8
 ; X64-NEXT:    adcq %rsi, %rcx
 ; X64-NEXT:    setb %bl
-; X64-NEXT:    movq %r8, %rax
+; X64-NEXT:    movq %r9, %rax
 ; X64-NEXT:    mulq %r12
 ; X64-NEXT:    addq %rcx, %rax
 ; X64-NEXT:    movzbl %bl, %ecx
 ; X64-NEXT:    adcq %rcx, %rdx
-; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload
-; X64-NEXT:    movq %r10, %rcx
-; X64-NEXT:    addq %r13, %rcx
-; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload
-; X64-NEXT:    movq %rbx, %rsi
-; X64-NEXT:    movq %rbx, %r12
-; X64-NEXT:    adcq %r14, %rsi
-; X64-NEXT:    addq %rax, %rcx
-; X64-NEXT:    adcq %rdx, %rsi
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload
+; X64-NEXT:    movq %r9, %r15
+; X64-NEXT:    addq %r13, %r15
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload
+; X64-NEXT:    movq %r12, %r13
+; X64-NEXT:    adcq %r14, %r13
+; X64-NEXT:    addq %rax, %r15
+; X64-NEXT:    adcq %rdx, %r13
 ; X64-NEXT:    addq %rdi, %r11
-; X64-NEXT:    adcq %rbp, %r15
-; X64-NEXT:    movzbl %r9b, %eax
-; X64-NEXT:    adcq %rax, %rcx
-; X64-NEXT:    adcq $0, %rsi
+; X64-NEXT:    adcq %rbp, %r8
+; X64-NEXT:    movzbl %r10b, %eax
+; X64-NEXT:    adcq %rax, %r15
+; X64-NEXT:    adcq $0, %r13
 ; X64-NEXT:    addq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Folded Reload
+; X64-NEXT:    adcq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Folded Reload
 ; X64-NEXT:    adcq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Folded Reload
-; X64-NEXT:    adcq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload
-; X64-NEXT:    adcq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload
+; X64-NEXT:    adcq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload
 ; X64-NEXT:    addq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Folded Reload
 ; X64-NEXT:    movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    adcq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Folded Reload
+; X64-NEXT:    movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; X64-NEXT:    adcq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Folded Reload
-; X64-NEXT:    movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT:    adcq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload
-; X64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT:    adcq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload
-; X64-NEXT:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    adcq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload
 ; X64-NEXT:    adcq $0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
 ; X64-NEXT:    adcq $0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
 ; X64-NEXT:    adcq $0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
@@ -7224,104 +7208,106 @@ define void @test_1024(i1024* %a, i1024*
 ; X64-NEXT:    mulq %rcx
 ; X64-NEXT:    movq %rdx, %rsi
 ; X64-NEXT:    movq %rax, %r14
-; X64-NEXT:    movq %r8, %rbp
-; X64-NEXT:    movq %r8, %rax
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload
+; X64-NEXT:    movq %r10, %rax
 ; X64-NEXT:    mulq %rcx
-; X64-NEXT:    movq %rcx, %r11
+; X64-NEXT:    movq %rcx, %rbp
 ; X64-NEXT:    movq %rdx, %rbx
 ; X64-NEXT:    movq %rax, %rcx
 ; X64-NEXT:    addq %rsi, %rcx
 ; X64-NEXT:    adcq $0, %rbx
 ; X64-NEXT:    movq %rdi, %rax
-; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
-; X64-NEXT:    mulq %rdi
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload
+; X64-NEXT:    mulq %r11
 ; X64-NEXT:    movq %rdx, %rsi
 ; X64-NEXT:    movq %rax, %r8
 ; X64-NEXT:    addq %rcx, %r8
 ; X64-NEXT:    adcq %rbx, %rsi
 ; X64-NEXT:    setb %cl
-; X64-NEXT:    movq %rbp, %rax
-; X64-NEXT:    mulq %rdi
-; X64-NEXT:    movq %rdi, %r15
+; X64-NEXT:    movq %r10, %rax
+; X64-NEXT:    mulq %r11
 ; X64-NEXT:    addq %rsi, %rax
 ; X64-NEXT:    movzbl %cl, %ecx
 ; X64-NEXT:    adcq %rcx, %rdx
-; X64-NEXT:    movq %r10, %r9
 ; X64-NEXT:    addq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Folded Reload
 ; X64-NEXT:    movq %r12, %r10
 ; X64-NEXT:    adcq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Folded Reload
 ; X64-NEXT:    addq %rax, %r9
 ; X64-NEXT:    adcq %rdx, %r10
-; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
-; X64-NEXT:    movq %rbp, %rax
-; X64-NEXT:    mulq %r11
-; X64-NEXT:    movq %rdx, %rcx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    movq %rcx, %rax
+; X64-NEXT:    mulq %rbp
+; X64-NEXT:    movq %rdx, %r12
 ; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
 ; X64-NEXT:    movq %rdi, %rax
-; X64-NEXT:    mulq %r11
+; X64-NEXT:    mulq %rbp
 ; X64-NEXT:    movq %rdx, %rsi
 ; X64-NEXT:    movq %rax, %rbx
-; X64-NEXT:    addq %rcx, %rbx
+; X64-NEXT:    addq %r12, %rbx
 ; X64-NEXT:    adcq $0, %rsi
-; X64-NEXT:    movq %rbp, %rax
-; X64-NEXT:    mulq %r15
+; X64-NEXT:    movq %rcx, %rax
+; X64-NEXT:    movq %rcx, %r12
+; X64-NEXT:    mulq %r11
 ; X64-NEXT:    movq %rdx, %rcx
 ; X64-NEXT:    addq %rbx, %rax
 ; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; X64-NEXT:    adcq %rsi, %rcx
 ; X64-NEXT:    setb %sil
 ; X64-NEXT:    movq %rdi, %rax
-; X64-NEXT:    mulq %r15
-; X64-NEXT:    movq %rdx, %r15
+; X64-NEXT:    movq %rdi, %rbp
+; X64-NEXT:    mulq %r11
+; X64-NEXT:    movq %rdx, %rdi
 ; X64-NEXT:    movq %rax, %rbx
 ; X64-NEXT:    addq %rcx, %rbx
 ; X64-NEXT:    movzbl %sil, %eax
-; X64-NEXT:    adcq %rax, %r15
+; X64-NEXT:    adcq %rax, %rdi
 ; X64-NEXT:    addq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Folded Reload
-; X64-NEXT:    adcq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Folded Reload
+; X64-NEXT:    adcq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Folded Reload
 ; X64-NEXT:    addq %r14, %rbx
-; X64-NEXT:    adcq %r8, %r15
+; X64-NEXT:    adcq %r8, %rdi
 ; X64-NEXT:    adcq $0, %r9
 ; X64-NEXT:    adcq $0, %r10
-; X64-NEXT:    movq %rbp, %rsi
-; X64-NEXT:    movq %rbp, %rax
+; X64-NEXT:    movq %r12, %r11
+; X64-NEXT:    movq %r12, %rax
 ; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
 ; X64-NEXT:    mulq %rcx
 ; X64-NEXT:    movq %rdx, %r14
 ; X64-NEXT:    movq %rax, %r12
-; X64-NEXT:    movq %rdi, %rax
-; X64-NEXT:    movq %rdi, %r8
+; X64-NEXT:    movq %rbp, %rax
+; X64-NEXT:    movq %rbp, %r8
 ; X64-NEXT:    mulq %rcx
 ; X64-NEXT:    movq %rdx, %rbp
 ; X64-NEXT:    movq %rax, %rcx
 ; X64-NEXT:    addq %r14, %rcx
 ; X64-NEXT:    adcq $0, %rbp
 ; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
-; X64-NEXT:    movq 56(%rax), %rdi
-; X64-NEXT:    movq %rsi, %rax
-; X64-NEXT:    mulq %rdi
+; X64-NEXT:    movq 56(%rax), %rsi
+; X64-NEXT:    movq %r11, %rax
+; X64-NEXT:    mulq %rsi
+; X64-NEXT:    movq %rsi, %r11
+; X64-NEXT:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; X64-NEXT:    movq %rdx, %rsi
 ; X64-NEXT:    movq %rax, %r14
 ; X64-NEXT:    addq %rcx, %r14
 ; X64-NEXT:    adcq %rbp, %rsi
 ; X64-NEXT:    setb %cl
 ; X64-NEXT:    movq %r8, %rax
-; X64-NEXT:    mulq %rdi
-; X64-NEXT:    movq %rdi, %r8
+; X64-NEXT:    mulq %r11
 ; X64-NEXT:    addq %rsi, %rax
 ; X64-NEXT:    movzbl %cl, %ecx
 ; X64-NEXT:    adcq %rcx, %rdx
-; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
-; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload
-; X64-NEXT:    addq %r11, %rcx
+; X64-NEXT:    movq (%rsp), %rcx # 8-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; X64-NEXT:    addq %r8, %rcx
 ; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
-; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload
-; X64-NEXT:    adcq %r13, %rsi
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload
+; X64-NEXT:    adcq %r11, %rsi
 ; X64-NEXT:    addq %rax, %rcx
 ; X64-NEXT:    adcq %rdx, %rsi
 ; X64-NEXT:    addq %rbx, %r12
-; X64-NEXT:    adcq %r15, %r14
+; X64-NEXT:    movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    adcq %rdi, %r14
 ; X64-NEXT:    adcq $0, %rcx
 ; X64-NEXT:    adcq $0, %rsi
 ; X64-NEXT:    addq %r9, %rcx
@@ -7336,69 +7322,65 @@ define void @test_1024(i1024* %a, i1024*
 ; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload
 ; X64-NEXT:    movq %r10, %rax
 ; X64-NEXT:    mulq %rdi
-; X64-NEXT:    movq %rdx, %r15
+; X64-NEXT:    movq %rdx, %r12
 ; X64-NEXT:    movq %rax, %rbx
 ; X64-NEXT:    addq %r9, %rbx
-; X64-NEXT:    adcq $0, %r15
+; X64-NEXT:    adcq $0, %r12
 ; X64-NEXT:    movq %rbp, %rax
-; X64-NEXT:    movq %r8, %rdi
-; X64-NEXT:    movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT:    mulq %r8
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
+; X64-NEXT:    mulq %rdi
 ; X64-NEXT:    movq %rdx, %r9
-; X64-NEXT:    movq %rax, %r8
-; X64-NEXT:    addq %rbx, %r8
-; X64-NEXT:    adcq %r15, %r9
+; X64-NEXT:    movq %rax, %rbp
+; X64-NEXT:    addq %rbx, %rbp
+; X64-NEXT:    adcq %r12, %r9
 ; X64-NEXT:    setb %bl
 ; X64-NEXT:    movq %r10, %rax
 ; X64-NEXT:    mulq %rdi
 ; X64-NEXT:    addq %r9, %rax
 ; X64-NEXT:    movzbl %bl, %edi
 ; X64-NEXT:    adcq %rdi, %rdx
-; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload
-; X64-NEXT:    addq %r11, %r15
-; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
-; X64-NEXT:    adcq %r13, %rbp
-; X64-NEXT:    addq %rax, %r15
-; X64-NEXT:    adcq %rdx, %rbp
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload
+; X64-NEXT:    addq %r8, %r12
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload
+; X64-NEXT:    adcq %r11, %r10
+; X64-NEXT:    addq %rax, %r12
+; X64-NEXT:    adcq %rdx, %r10
 ; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
 ; X64-NEXT:    addq %rcx, %rdx
-; X64-NEXT:    adcq %rsi, %r8
+; X64-NEXT:    adcq %rsi, %rbp
 ; X64-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
-; X64-NEXT:    adcq %rax, %r15
-; X64-NEXT:    adcq $0, %rbp
+; X64-NEXT:    adcq %rax, %r12
+; X64-NEXT:    adcq $0, %r10
 ; X64-NEXT:    addq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Folded Reload
-; X64-NEXT:    adcq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Folded Reload
-; X64-NEXT:    adcq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Folded Reload
 ; X64-NEXT:    adcq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Folded Reload
+; X64-NEXT:    adcq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Folded Reload
+; X64-NEXT:    adcq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Folded Reload
 ; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
 ; X64-NEXT:    addq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
 ; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
 ; X64-NEXT:    adcq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
-; X64-NEXT:    adcq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Folded Reload
-; X64-NEXT:    movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT:    adcq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload
+; X64-NEXT:    adcq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
+; X64-NEXT:    adcq %r13, %r14
 ; X64-NEXT:    movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; X64-NEXT:    adcq $0, %rdx
-; X64-NEXT:    adcq $0, %r8
-; X64-NEXT:    adcq $0, %r15
 ; X64-NEXT:    adcq $0, %rbp
+; X64-NEXT:    adcq $0, %r12
+; X64-NEXT:    adcq $0, %r10
 ; X64-NEXT:    addq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Folded Reload
 ; X64-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT:    adcq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Folded Reload
-; X64-NEXT:    movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT:    adcq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Folded Reload
 ; X64-NEXT:    adcq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Folded Reload
+; X64-NEXT:    adcq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Folded Reload
+; X64-NEXT:    adcq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Folded Reload
 ; X64-NEXT:    setb {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Folded Spill
 ; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
 ; X64-NEXT:    movq %rcx, %rax
-; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
-; X64-NEXT:    mulq %rsi
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload
+; X64-NEXT:    mulq %r14
 ; X64-NEXT:    movq %rdx, %r11
 ; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload
-; X64-NEXT:    movq %r9, %rax
-; X64-NEXT:    mulq %rsi
-; X64-NEXT:    movq %rsi, %r10
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload
+; X64-NEXT:    movq %r15, %rax
+; X64-NEXT:    mulq %r14
 ; X64-NEXT:    movq %rdx, %rdi
 ; X64-NEXT:    movq %rax, %rbx
 ; X64-NEXT:    addq %r11, %rbx
@@ -7407,176 +7389,171 @@ define void @test_1024(i1024* %a, i1024*
 ; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
 ; X64-NEXT:    mulq %rsi
 ; X64-NEXT:    movq %rdx, %rcx
-; X64-NEXT:    movq %rax, %r12
-; X64-NEXT:    addq %rbx, %r12
+; X64-NEXT:    movq %rax, %r9
+; X64-NEXT:    addq %rbx, %r9
 ; X64-NEXT:    adcq %rdi, %rcx
 ; X64-NEXT:    setb %bl
-; X64-NEXT:    movq %r9, %rax
+; X64-NEXT:    movq %r15, %rax
 ; X64-NEXT:    mulq %rsi
-; X64-NEXT:    movq %rsi, %r9
+; X64-NEXT:    movq %rsi, %r13
 ; X64-NEXT:    addq %rcx, %rax
 ; X64-NEXT:    movzbl %bl, %ecx
 ; X64-NEXT:    adcq %rcx, %rdx
 ; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
 ; X64-NEXT:    addq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Folded Reload
-; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
-; X64-NEXT:    adcq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload
+; X64-NEXT:    adcq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Folded Reload
 ; X64-NEXT:    addq %rax, %r8
-; X64-NEXT:    adcq %rdx, %rcx
-; X64-NEXT:    movq %rcx, %r14
+; X64-NEXT:    adcq %rdx, %r15
 ; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
 ; X64-NEXT:    movq %rcx, %rax
-; X64-NEXT:    mulq %r10
+; X64-NEXT:    mulq %r14
 ; X64-NEXT:    movq %rdx, %r11
 ; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
 ; X64-NEXT:    movq %rsi, %rax
-; X64-NEXT:    mulq %r10
+; X64-NEXT:    mulq %r14
 ; X64-NEXT:    movq %rdx, %rdi
 ; X64-NEXT:    movq %rax, %rbx
 ; X64-NEXT:    addq %r11, %rbx
 ; X64-NEXT:    adcq $0, %rdi
 ; X64-NEXT:    movq %rcx, %rax
-; X64-NEXT:    movq %rcx, %r13
-; X64-NEXT:    mulq %r9
+; X64-NEXT:    movq %rcx, %r14
+; X64-NEXT:    mulq %r13
 ; X64-NEXT:    movq %rdx, %rcx
 ; X64-NEXT:    addq %rbx, %rax
 ; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; X64-NEXT:    adcq %rdi, %rcx
 ; X64-NEXT:    setb %bl
 ; X64-NEXT:    movq %rsi, %rax
-; X64-NEXT:    mulq %r9
+; X64-NEXT:    mulq %r13
 ; X64-NEXT:    movq %rdx, %r11
-; X64-NEXT:    movq %rax, %rdi
-; X64-NEXT:    addq %rcx, %rdi
+; X64-NEXT:    movq %rax, %r13
+; X64-NEXT:    addq %rcx, %r13
 ; X64-NEXT:    movzbl %bl, %eax
 ; X64-NEXT:    adcq %rax, %r11
-; X64-NEXT:    addq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Folded Reload
+; X64-NEXT:    addq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload
 ; X64-NEXT:    adcq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Folded Reload
-; X64-NEXT:    addq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Folded Reload
-; X64-NEXT:    adcq %r12, %r11
+; X64-NEXT:    addq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload
+; X64-NEXT:    adcq %r9, %r11
 ; X64-NEXT:    adcq $0, %r8
 ; X64-NEXT:    movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT:    adcq $0, %r14
-; X64-NEXT:    movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT:    movq %r13, %rax
+; X64-NEXT:    adcq $0, %r15
+; X64-NEXT:    movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %r14, %rax
 ; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
 ; X64-NEXT:    mulq %rcx
-; X64-NEXT:    movq %rdx, %r8
-; X64-NEXT:    movq %rax, %r12
+; X64-NEXT:    movq %rdx, %rbx
+; X64-NEXT:    movq %rax, %r9
 ; X64-NEXT:    movq %rsi, %rax
-; X64-NEXT:    movq %rsi, %r9
+; X64-NEXT:    movq %rsi, %r15
 ; X64-NEXT:    mulq %rcx
-; X64-NEXT:    movq %rcx, %r10
 ; X64-NEXT:    movq %rdx, %rsi
 ; X64-NEXT:    movq %rax, %rcx
-; X64-NEXT:    addq %r8, %rcx
+; X64-NEXT:    addq %rbx, %rcx
 ; X64-NEXT:    adcq $0, %rsi
-; X64-NEXT:    movq %r13, %rax
-; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload
-; X64-NEXT:    mulq %r13
+; X64-NEXT:    movq %r14, %rax
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
+; X64-NEXT:    mulq %rdi
 ; X64-NEXT:    movq %rdx, %rbx
-; X64-NEXT:    addq %rcx, %rax
 ; X64-NEXT:    movq %rax, %r8
+; X64-NEXT:    addq %rcx, %r8
 ; X64-NEXT:    adcq %rsi, %rbx
 ; X64-NEXT:    setb %cl
-; X64-NEXT:    movq %r9, %rax
-; X64-NEXT:    mulq %r13
-; X64-NEXT:    movq %r13, %r9
+; X64-NEXT:    movq %r15, %rax
+; X64-NEXT:    mulq %rdi
 ; X64-NEXT:    addq %rbx, %rax
 ; X64-NEXT:    movzbl %cl, %ecx
 ; X64-NEXT:    adcq %rcx, %rdx
-; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
-; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload
-; X64-NEXT:    addq %r13, %rsi
-; X64-NEXT:    movq (%rsp), %rcx # 8-byte Reload
 ; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload
-; X64-NEXT:    adcq %r14, %rcx
-; X64-NEXT:    addq %rax, %rsi
+; X64-NEXT:    addq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    adcq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload
+; X64-NEXT:    addq %rax, %r14
 ; X64-NEXT:    adcq %rdx, %rcx
-; X64-NEXT:    addq %rdi, %r12
+; X64-NEXT:    addq %r13, %r9
+; X64-NEXT:    movq %r9, %r13
 ; X64-NEXT:    adcq %r11, %r8
-; X64-NEXT:    movq %r8, %r11
-; X64-NEXT:    adcq $0, %rsi
+; X64-NEXT:    movq %r8, %r15
+; X64-NEXT:    adcq $0, %r14
 ; X64-NEXT:    adcq $0, %rcx
-; X64-NEXT:    addq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload
-; X64-NEXT:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    addq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload
 ; X64-NEXT:    adcq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload
-; X64-NEXT:    movq %rcx, (%rsp) # 8-byte Spill
+; X64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; X64-NEXT:    setb {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Folded Spill
 ; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload
 ; X64-NEXT:    movq %rbx, %rax
-; X64-NEXT:    mulq %r10
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; X64-NEXT:    mulq %rsi
 ; X64-NEXT:    movq %rdx, %rcx
-; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %rax, %r11
 ; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
 ; X64-NEXT:    movq %r8, %rax
-; X64-NEXT:    mulq %r10
+; X64-NEXT:    mulq %rsi
 ; X64-NEXT:    movq %rdx, %rsi
 ; X64-NEXT:    movq %rax, %rdi
 ; X64-NEXT:    addq %rcx, %rdi
 ; X64-NEXT:    adcq $0, %rsi
 ; X64-NEXT:    movq %rbx, %rax
-; X64-NEXT:    mulq %r9
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload
+; X64-NEXT:    mulq %rbx
 ; X64-NEXT:    movq %rdx, %rcx
-; X64-NEXT:    movq %rax, %r10
-; X64-NEXT:    addq %rdi, %r10
+; X64-NEXT:    movq %rax, %r9
+; X64-NEXT:    addq %rdi, %r9
 ; X64-NEXT:    adcq %rsi, %rcx
-; X64-NEXT:    setb %bl
+; X64-NEXT:    setb %sil
 ; X64-NEXT:    movq %r8, %rax
-; X64-NEXT:    mulq %r9
+; X64-NEXT:    mulq %rbx
 ; X64-NEXT:    addq %rcx, %rax
-; X64-NEXT:    movzbl %bl, %ecx
+; X64-NEXT:    movzbl %sil, %ecx
 ; X64-NEXT:    adcq %rcx, %rdx
 ; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
-; X64-NEXT:    addq %r13, %rsi
+; X64-NEXT:    addq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload
 ; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
-; X64-NEXT:    adcq %r14, %rcx
+; X64-NEXT:    adcq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload
 ; X64-NEXT:    addq %rax, %rsi
 ; X64-NEXT:    adcq %rdx, %rcx
-; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload
-; X64-NEXT:    addq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload
-; X64-NEXT:    adcq (%rsp), %r10 # 8-byte Folded Reload
+; X64-NEXT:    addq %r14, %r11
+; X64-NEXT:    adcq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Folded Reload
 ; X64-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
 ; X64-NEXT:    adcq %rax, %rsi
 ; X64-NEXT:    adcq $0, %rcx
-; X64-NEXT:    addq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload
-; X64-NEXT:    adcq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Folded Reload
+; X64-NEXT:    addq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Folded Reload
+; X64-NEXT:    adcq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Folded Reload
 ; X64-NEXT:    adcq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload
 ; X64-NEXT:    adcq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload
-; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
-; X64-NEXT:    addq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
-; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
-; X64-NEXT:    adcq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
-; X64-NEXT:    adcq %r15, %r12
-; X64-NEXT:    movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT:    adcq %rbp, %r11
-; X64-NEXT:    movq %r11, (%rsp) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT:    addq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
+; X64-NEXT:    adcq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
+; X64-NEXT:    adcq %r12, %r13
+; X64-NEXT:    movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    adcq %r10, %r15
+; X64-NEXT:    movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; X64-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
-; X64-NEXT:    adcq %rax, %r14
-; X64-NEXT:    movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT:    adcq $0, %r10
-; X64-NEXT:    movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    adcq %rax, %r11
+; X64-NEXT:    movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    adcq $0, %r9
+; X64-NEXT:    movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; X64-NEXT:    adcq $0, %rsi
 ; X64-NEXT:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; X64-NEXT:    adcq $0, %rcx
 ; X64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload
+; X64-NEXT:    movq 64(%r9), %r11
 ; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
-; X64-NEXT:    movq 64(%rcx), %r11
-; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
-; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    movq %rcx, %rax
 ; X64-NEXT:    mulq %r11
 ; X64-NEXT:    movq %rdx, %rsi
-; X64-NEXT:    movq %rax, %r13
-; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload
-; X64-NEXT:    movq %r9, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
+; X64-NEXT:    movq %rdi, %rax
 ; X64-NEXT:    mulq %r11
 ; X64-NEXT:    movq %rdx, %rbp
 ; X64-NEXT:    movq %rax, %rbx
 ; X64-NEXT:    addq %rsi, %rbx
 ; X64-NEXT:    adcq $0, %rbp
-; X64-NEXT:    movq 72(%rcx), %rsi
-; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    movq 72(%r9), %rsi
+; X64-NEXT:    movq %rcx, %rax
 ; X64-NEXT:    mulq %rsi
 ; X64-NEXT:    movq %rsi, %rcx
 ; X64-NEXT:    movq %rdx, %rsi
@@ -7584,9 +7561,9 @@ define void @test_1024(i1024* %a, i1024*
 ; X64-NEXT:    addq %rbx, %r8
 ; X64-NEXT:    adcq %rbp, %rsi
 ; X64-NEXT:    setb %bl
-; X64-NEXT:    movq %r9, %rax
+; X64-NEXT:    movq %rdi, %rax
 ; X64-NEXT:    mulq %rcx
-; X64-NEXT:    movq %rcx, %r10
+; X64-NEXT:    movq %rcx, %r13
 ; X64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; X64-NEXT:    movq %rdx, %rcx
 ; X64-NEXT:    movq %rax, %rdi
@@ -7598,141 +7575,138 @@ define void @test_1024(i1024* %a, i1024*
 ; X64-NEXT:    mulq %rdx
 ; X64-NEXT:    movq %rax, %rbx
 ; X64-NEXT:    movq %rdx, %r14
-; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload
-; X64-NEXT:    addq %rax, %r12
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload
+; X64-NEXT:    addq %rax, %r10
 ; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload
 ; X64-NEXT:    adcq %rdx, %r15
-; X64-NEXT:    addq %rdi, %r12
+; X64-NEXT:    addq %rdi, %r10
 ; X64-NEXT:    adcq %rcx, %r15
-; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
-; X64-NEXT:    movq %rcx, %rax
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload
+; X64-NEXT:    movq %r12, %rax
 ; X64-NEXT:    movq %r11, %rsi
 ; X64-NEXT:    movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; X64-NEXT:    mulq %r11
 ; X64-NEXT:    movq %rdx, %r11
 ; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload
-; X64-NEXT:    movq %r9, %rax
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
+; X64-NEXT:    movq %rbp, %rax
 ; X64-NEXT:    mulq %rsi
 ; X64-NEXT:    movq %rdx, %rsi
 ; X64-NEXT:    movq %rax, %rdi
 ; X64-NEXT:    addq %r11, %rdi
 ; X64-NEXT:    adcq $0, %rsi
-; X64-NEXT:    movq %rcx, %rax
-; X64-NEXT:    movq %rcx, %r11
-; X64-NEXT:    mulq %r10
+; X64-NEXT:    movq %r12, %rax
+; X64-NEXT:    mulq %r13
 ; X64-NEXT:    movq %rdx, %rcx
 ; X64-NEXT:    addq %rdi, %rax
 ; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; X64-NEXT:    adcq %rsi, %rcx
 ; X64-NEXT:    setb %sil
-; X64-NEXT:    movq %r9, %rax
-; X64-NEXT:    mulq %r10
+; X64-NEXT:    movq %rbp, %rax
+; X64-NEXT:    movq %rbp, %r11
+; X64-NEXT:    mulq %r13
 ; X64-NEXT:    addq %rcx, %rax
 ; X64-NEXT:    movzbl %sil, %ecx
 ; X64-NEXT:    adcq %rcx, %rdx
 ; X64-NEXT:    addq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Folded Reload
-; X64-NEXT:    adcq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload
+; X64-NEXT:    adcq %r13, %r14
 ; X64-NEXT:    addq %rax, %rbx
 ; X64-NEXT:    adcq %rdx, %r14
-; X64-NEXT:    addq %r13, %rbx
+; X64-NEXT:    addq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Folded Reload
 ; X64-NEXT:    adcq %r8, %r14
-; X64-NEXT:    adcq $0, %r12
+; X64-NEXT:    adcq $0, %r10
 ; X64-NEXT:    adcq $0, %r15
-; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
-; X64-NEXT:    movq 80(%rbp), %rdi
-; X64-NEXT:    movq %r11, %rax
+; X64-NEXT:    movq 80(%r9), %rdi
+; X64-NEXT:    movq %r12, %rax
 ; X64-NEXT:    mulq %rdi
 ; X64-NEXT:    movq %rdx, %r8
-; X64-NEXT:    movq %rax, %r13
-; X64-NEXT:    movq %r9, %rax
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %rbp, %rax
 ; X64-NEXT:    mulq %rdi
 ; X64-NEXT:    movq %rdx, %rsi
 ; X64-NEXT:    movq %rax, %rcx
 ; X64-NEXT:    addq %r8, %rcx
 ; X64-NEXT:    adcq $0, %rsi
-; X64-NEXT:    movq 88(%rbp), %r10
-; X64-NEXT:    movq %r11, %rax
-; X64-NEXT:    mulq %r10
+; X64-NEXT:    movq 88(%r9), %r9
+; X64-NEXT:    movq %r12, %rax
+; X64-NEXT:    mulq %r9
 ; X64-NEXT:    movq %rdx, %rbp
 ; X64-NEXT:    movq %rax, %r8
 ; X64-NEXT:    addq %rcx, %r8
 ; X64-NEXT:    adcq %rsi, %rbp
-; X64-NEXT:    setb %r11b
-; X64-NEXT:    movq %r9, %rax
-; X64-NEXT:    mulq %r10
+; X64-NEXT:    setb %r12b
+; X64-NEXT:    movq %r11, %rax
+; X64-NEXT:    mulq %r9
 ; X64-NEXT:    movq %rdx, %rcx
 ; X64-NEXT:    movq %rax, %rsi
 ; X64-NEXT:    addq %rbp, %rsi
-; X64-NEXT:    movzbl %r11b, %eax
+; X64-NEXT:    movzbl %r12b, %eax
 ; X64-NEXT:    adcq %rax, %rcx
 ; X64-NEXT:    movq %rdi, %rax
 ; X64-NEXT:    xorl %edx, %edx
 ; X64-NEXT:    mulq %rdx
-; X64-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT:    movq %rax, %r9
-; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
-; X64-NEXT:    addq %rax, %rbp
+; X64-NEXT:    movq %rdx, %r12
+; X64-NEXT:    movq %rax, %r11
 ; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
-; X64-NEXT:    adcq %rdx, %rax
-; X64-NEXT:    addq %rsi, %rbp
-; X64-NEXT:    adcq %rcx, %rax
-; X64-NEXT:    addq %rbx, %r13
-; X64-NEXT:    movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    addq %r11, %rax
+; X64-NEXT:    adcq %rdx, %r13
+; X64-NEXT:    addq %rsi, %rax
+; X64-NEXT:    adcq %rcx, %r13
+; X64-NEXT:    addq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
 ; X64-NEXT:    adcq %r14, %r8
 ; X64-NEXT:    movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT:    adcq $0, %rbp
 ; X64-NEXT:    adcq $0, %rax
-; X64-NEXT:    addq %r12, %rbp
-; X64-NEXT:    movq %rbp, %r8
-; X64-NEXT:    adcq %r15, %rax
-; X64-NEXT:    movq %rax, %r11
+; X64-NEXT:    adcq $0, %r13
+; X64-NEXT:    addq %r10, %rax
+; X64-NEXT:    movq %rax, %r8
+; X64-NEXT:    adcq %r15, %r13
 ; X64-NEXT:    setb %r14b
-; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
-; X64-NEXT:    movq %rcx, %rax
-; X64-NEXT:    mulq %rdi
-; X64-NEXT:    movq %rdx, %r15
-; X64-NEXT:    movq %rax, %r12
 ; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
 ; X64-NEXT:    movq %rbp, %rax
 ; X64-NEXT:    mulq %rdi
+; X64-NEXT:    movq %rdx, %rcx
+; X64-NEXT:    movq %rax, %r15
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload
+; X64-NEXT:    movq %r10, %rax
+; X64-NEXT:    mulq %rdi
 ; X64-NEXT:    movq %rdx, %rsi
 ; X64-NEXT:    movq %rax, %rbx
-; X64-NEXT:    addq %r15, %rbx
+; X64-NEXT:    addq %rcx, %rbx
 ; X64-NEXT:    adcq $0, %rsi
-; X64-NEXT:    movq %rcx, %rax
-; X64-NEXT:    mulq %r10
+; X64-NEXT:    movq %rbp, %rax
+; X64-NEXT:    mulq %r9
 ; X64-NEXT:    movq %rdx, %rcx
 ; X64-NEXT:    addq %rbx, %rax
-; X64-NEXT:    movq %rax, %rbx
+; X64-NEXT:    movq %rax, %rbp
 ; X64-NEXT:    adcq %rsi, %rcx
-; X64-NEXT:    setb %sil
-; X64-NEXT:    movq %rbp, %rax
-; X64-NEXT:    mulq %r10
+; X64-NEXT:    setb %bl
+; X64-NEXT:    movq %r10, %rax
+; X64-NEXT:    mulq %r9
 ; X64-NEXT:    addq %rcx, %rax
-; X64-NEXT:    movzbl %sil, %ecx
+; X64-NEXT:    movzbl %bl, %ecx
 ; X64-NEXT:    adcq %rcx, %rdx
 ; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
-; X64-NEXT:    addq %r9, %rsi
+; X64-NEXT:    addq %r11, %rsi
 ; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
-; X64-NEXT:    adcq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload
+; X64-NEXT:    adcq %r12, %rcx
 ; X64-NEXT:    addq %rax, %rsi
 ; X64-NEXT:    adcq %rdx, %rcx
-; X64-NEXT:    addq %r8, %r12
-; X64-NEXT:    movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT:    adcq %r11, %rbx
-; X64-NEXT:    movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    addq %r8, %r15
+; X64-NEXT:    movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    adcq %r13, %rbp
+; X64-NEXT:    movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; X64-NEXT:    movzbl %r14b, %eax
 ; X64-NEXT:    adcq %rax, %rsi
 ; X64-NEXT:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; X64-NEXT:    adcq $0, %rcx
 ; X64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
-; X64-NEXT:    imulq %rax, %r10
-; X64-NEXT:    movq %rax, %r14
+; X64-NEXT:    imulq %rax, %r9
+; X64-NEXT:    movq %rax, %r10
 ; X64-NEXT:    mulq %rdi
 ; X64-NEXT:    movq %rax, %r8
-; X64-NEXT:    addq %r10, %rdx
+; X64-NEXT:    addq %r9, %rdx
 ; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
 ; X64-NEXT:    imulq %rbp, %rdi
 ; X64-NEXT:    addq %rdx, %rdi
@@ -7752,11 +7726,11 @@ define void @test_1024(i1024* %a, i1024*
 ; X64-NEXT:    movq %rax, %r8
 ; X64-NEXT:    movq %rcx, %rax
 ; X64-NEXT:    movq %rcx, %rdi
-; X64-NEXT:    mulq %r14
+; X64-NEXT:    mulq %r10
 ; X64-NEXT:    movq %rdx, %rcx
 ; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; X64-NEXT:    movq %r11, %rax
-; X64-NEXT:    mulq %r14
+; X64-NEXT:    mulq %r10
 ; X64-NEXT:    movq %rdx, %rsi
 ; X64-NEXT:    movq %rax, %rbx
 ; X64-NEXT:    addq %rcx, %rbx
@@ -7777,12 +7751,11 @@ define void @test_1024(i1024* %a, i1024*
 ; X64-NEXT:    adcq %rax, %r12
 ; X64-NEXT:    addq %r9, %r13
 ; X64-NEXT:    adcq %r8, %r12
-; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
-; X64-NEXT:    movq 120(%rdx), %rcx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
+; X64-NEXT:    movq 120(%rbp), %rcx
 ; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload
 ; X64-NEXT:    imulq %r10, %rcx
-; X64-NEXT:    movq 112(%rdx), %rsi
-; X64-NEXT:    movq %rdx, %rbp
+; X64-NEXT:    movq 112(%rbp), %rsi
 ; X64-NEXT:    movq %r10, %rax
 ; X64-NEXT:    mulq %rsi
 ; X64-NEXT:    movq %rax, %r11
@@ -7840,46 +7813,45 @@ define void @test_1024(i1024* %a, i1024*
 ; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; X64-NEXT:    adcq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Folded Reload
 ; X64-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
-; X64-NEXT:    movq 80(%rsi), %rdi
-; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload
+; X64-NEXT:    movq 80(%r9), %rsi
+; X64-NEXT:    movq %rsi, %rax
 ; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
 ; X64-NEXT:    mulq %rcx
 ; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT:    movq %rdx, %r8
-; X64-NEXT:    movq 88(%rsi), %rax
-; X64-NEXT:    movq %rsi, %r9
-; X64-NEXT:    movq %rax, %rsi
-; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %rdx, %rdi
+; X64-NEXT:    movq 88(%r9), %r8
+; X64-NEXT:    movq %r8, %rax
+; X64-NEXT:    movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; X64-NEXT:    mulq %rcx
 ; X64-NEXT:    movq %rcx, %r11
 ; X64-NEXT:    movq %rdx, %rbp
 ; X64-NEXT:    movq %rax, %rbx
-; X64-NEXT:    addq %r8, %rbx
+; X64-NEXT:    addq %rdi, %rbx
 ; X64-NEXT:    adcq $0, %rbp
-; X64-NEXT:    movq %rdi, %rax
-; X64-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %rsi, %rax
+; X64-NEXT:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload
 ; X64-NEXT:    mulq %r15
 ; X64-NEXT:    movq %rdx, %rcx
 ; X64-NEXT:    movq %rax, %r14
 ; X64-NEXT:    addq %rbx, %r14
 ; X64-NEXT:    adcq %rbp, %rcx
-; X64-NEXT:    setb %r8b
-; X64-NEXT:    movq %rsi, %rax
+; X64-NEXT:    setb %r10b
+; X64-NEXT:    movq %r8, %rax
 ; X64-NEXT:    mulq %r15
 ; X64-NEXT:    movq %rdx, %rbp
 ; X64-NEXT:    movq %rax, %rbx
 ; X64-NEXT:    addq %rcx, %rbx
-; X64-NEXT:    movzbl %r8b, %eax
+; X64-NEXT:    movzbl %r10b, %eax
 ; X64-NEXT:    adcq %rax, %rbp
-; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    movq %rsi, %rax
 ; X64-NEXT:    xorl %ecx, %ecx
 ; X64-NEXT:    mulq %rcx
 ; X64-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; X64-NEXT:    movq %rax, %rsi
-; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload
+; X64-NEXT:    movq (%rsp), %r12 # 8-byte Reload
 ; X64-NEXT:    addq %r12, %rsi
 ; X64-NEXT:    movq %rdx, %r10
 ; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
@@ -7891,8 +7863,8 @@ define void @test_1024(i1024* %a, i1024*
 ; X64-NEXT:    mulq %r11
 ; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; X64-NEXT:    movq %rdx, %rcx
-; X64-NEXT:    movq 72(%r9), %r9
-; X64-NEXT:    movq %r9, %rax
+; X64-NEXT:    movq 72(%r9), %rdi
+; X64-NEXT:    movq %rdi, %rax
 ; X64-NEXT:    mulq %r11
 ; X64-NEXT:    movq %rdx, %rbp
 ; X64-NEXT:    movq %rax, %rbx
@@ -7905,8 +7877,7 @@ define void @test_1024(i1024* %a, i1024*
 ; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; X64-NEXT:    adcq %rbp, %rcx
 ; X64-NEXT:    setb %r11b
-; X64-NEXT:    movq %r9, %rax
-; X64-NEXT:    movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %rdi, %rax
 ; X64-NEXT:    mulq %r15
 ; X64-NEXT:    movq %rdx, %rbx
 ; X64-NEXT:    movq %rax, %rbp
@@ -7924,20 +7895,20 @@ define void @test_1024(i1024* %a, i1024*
 ; X64-NEXT:    addq %rbp, %rcx
 ; X64-NEXT:    adcq %rbx, %r8
 ; X64-NEXT:    addq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload
-; X64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %rcx, (%rsp) # 8-byte Spill
 ; X64-NEXT:    adcq %r14, %r8
 ; X64-NEXT:    movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; X64-NEXT:    adcq $0, %rsi
 ; X64-NEXT:    adcq $0, %r10
-; X64-NEXT:    movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; X64-NEXT:    movq %r13, %rax
-; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
-; X64-NEXT:    mulq %rdi
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload
+; X64-NEXT:    mulq %r14
 ; X64-NEXT:    movq %rdx, %rcx
 ; X64-NEXT:    movq %rax, %r12
-; X64-NEXT:    movq %r9, %rax
-; X64-NEXT:    mulq %rdi
+; X64-NEXT:    movq %rdi, %rax
 ; X64-NEXT:    movq %rdi, %r8
+; X64-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    mulq %r14
 ; X64-NEXT:    movq %rdx, %rdi
 ; X64-NEXT:    movq %rax, %rbp
 ; X64-NEXT:    addq %rcx, %rbp
@@ -7950,74 +7921,73 @@ define void @test_1024(i1024* %a, i1024*
 ; X64-NEXT:    movq %rax, %rbp
 ; X64-NEXT:    adcq %rdi, %rcx
 ; X64-NEXT:    setb %dil
-; X64-NEXT:    movq %r9, %rax
+; X64-NEXT:    movq %r8, %rax
 ; X64-NEXT:    mulq %rbx
 ; X64-NEXT:    addq %rcx, %rax
 ; X64-NEXT:    movzbl %dil, %ecx
 ; X64-NEXT:    adcq %rcx, %rdx
-; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload
-; X64-NEXT:    addq %r14, %r15
-; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload
-; X64-NEXT:    adcq %r13, %r11
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload
+; X64-NEXT:    addq %r9, %r15
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; X64-NEXT:    adcq %r8, %r11
 ; X64-NEXT:    addq %rax, %r15
 ; X64-NEXT:    adcq %rdx, %r11
-; X64-NEXT:    addq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Folded Reload
-; X64-NEXT:    movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    addq (%rsp), %r12 # 8-byte Folded Reload
+; X64-NEXT:    movq %r12, (%rsp) # 8-byte Spill
 ; X64-NEXT:    adcq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Folded Reload
 ; X64-NEXT:    movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; X64-NEXT:    adcq $0, %r15
 ; X64-NEXT:    adcq $0, %r11
 ; X64-NEXT:    addq %rsi, %r15
 ; X64-NEXT:    adcq %r10, %r11
-; X64-NEXT:    setb %r10b
-; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
-; X64-NEXT:    movq %rsi, %rax
-; X64-NEXT:    mulq %r8
-; X64-NEXT:    movq %rdx, %rcx
-; X64-NEXT:    movq %rax, %r9
-; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
-; X64-NEXT:    movq %rbp, %rax
-; X64-NEXT:    mulq %r8
-; X64-NEXT:    movq %r8, %r12
+; X64-NEXT:    setb {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Folded Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    movq %rcx, %rax
+; X64-NEXT:    movq %r14, %rsi
+; X64-NEXT:    mulq %r14
+; X64-NEXT:    movq %rdx, %r10
+; X64-NEXT:    movq %rax, %rbp
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload
+; X64-NEXT:    movq %r14, %rax
+; X64-NEXT:    mulq %rsi
 ; X64-NEXT:    movq %rdx, %rdi
 ; X64-NEXT:    movq %rax, %rbx
-; X64-NEXT:    addq %rcx, %rbx
+; X64-NEXT:    addq %r10, %rbx
 ; X64-NEXT:    adcq $0, %rdi
-; X64-NEXT:    movq %rsi, %rax
-; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
-; X64-NEXT:    mulq %rsi
-; X64-NEXT:    movq %rdx, %rcx
+; X64-NEXT:    movq %rcx, %rax
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    mulq %rcx
+; X64-NEXT:    movq %rdx, %r10
 ; X64-NEXT:    addq %rbx, %rax
-; X64-NEXT:    movq %rax, %rbx
-; X64-NEXT:    adcq %rdi, %rcx
-; X64-NEXT:    setb %r8b
-; X64-NEXT:    movq %rbp, %rax
-; X64-NEXT:    mulq %rsi
-; X64-NEXT:    movq %rsi, %rdi
-; X64-NEXT:    addq %rcx, %rax
-; X64-NEXT:    movzbl %r8b, %ecx
+; X64-NEXT:    movq %rax, %r12
+; X64-NEXT:    adcq %rdi, %r10
+; X64-NEXT:    setb %bl
+; X64-NEXT:    movq %r14, %rax
+; X64-NEXT:    mulq %rcx
+; X64-NEXT:    movq %rcx, %rdi
+; X64-NEXT:    addq %r10, %rax
+; X64-NEXT:    movzbl %bl, %ecx
 ; X64-NEXT:    adcq %rcx, %rdx
-; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
-; X64-NEXT:    addq %r14, %rsi
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload
+; X64-NEXT:    addq %r9, %rbx
 ; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
-; X64-NEXT:    adcq %r13, %rcx
-; X64-NEXT:    addq %rax, %rsi
+; X64-NEXT:    adcq %r8, %rcx
+; X64-NEXT:    addq %rax, %rbx
 ; X64-NEXT:    adcq %rdx, %rcx
-; X64-NEXT:    addq %r15, %r9
-; X64-NEXT:    movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT:    adcq %r11, %rbx
+; X64-NEXT:    addq %r15, %rbp
+; X64-NEXT:    movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    adcq %r11, %r12
+; X64-NEXT:    movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
+; X64-NEXT:    adcq %rax, %rbx
 ; X64-NEXT:    movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT:    movzbl %r10b, %eax
-; X64-NEXT:    adcq %rax, %rsi
-; X64-NEXT:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; X64-NEXT:    adcq $0, %rcx
 ; X64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
 ; X64-NEXT:    movq 96(%rbp), %rcx
 ; X64-NEXT:    imulq %rcx, %rdi
 ; X64-NEXT:    movq %rcx, %rax
-; X64-NEXT:    movq %r12, %rsi
-; X64-NEXT:    mulq %r12
+; X64-NEXT:    mulq %rsi
 ; X64-NEXT:    movq %rax, %r9
 ; X64-NEXT:    addq %rdi, %rdx
 ; X64-NEXT:    movq 104(%rbp), %r8
@@ -8067,32 +8037,31 @@ define void @test_1024(i1024* %a, i1024*
 ; X64-NEXT:    addq %r10, %rbp
 ; X64-NEXT:    adcq %rdi, %rbx
 ; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
-; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
-; X64-NEXT:    imulq %rax, %rsi
-; X64-NEXT:    movq %rax, %r13
-; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
-; X64-NEXT:    mulq %rcx
-; X64-NEXT:    movq %rax, %r8
+; X64-NEXT:    imulq %r13, %rsi
+; X64-NEXT:    movq %r13, %rax
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; X64-NEXT:    mulq %r8
+; X64-NEXT:    movq %rax, %r9
 ; X64-NEXT:    addq %rsi, %rdx
 ; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload
-; X64-NEXT:    imulq %r11, %rcx
-; X64-NEXT:    addq %rdx, %rcx
-; X64-NEXT:    movq %rcx, %r9
+; X64-NEXT:    imulq %r11, %r8
+; X64-NEXT:    addq %rdx, %r8
 ; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
 ; X64-NEXT:    movq %rax, %rcx
 ; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload
 ; X64-NEXT:    imulq %r15, %rcx
-; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload
-; X64-NEXT:    mulq %r14
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
+; X64-NEXT:    mulq %rdi
 ; X64-NEXT:    movq %rax, %r10
 ; X64-NEXT:    addq %rcx, %rdx
-; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
-; X64-NEXT:    imulq %r14, %rax
+; X64-NEXT:    movq %r14, %rax
+; X64-NEXT:    imulq %rdi, %rax
 ; X64-NEXT:    addq %rdx, %rax
-; X64-NEXT:    addq %r8, %r10
-; X64-NEXT:    adcq %r9, %rax
+; X64-NEXT:    addq %r9, %r10
+; X64-NEXT:    adcq %r8, %rax
 ; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT:    movq %r14, %rax
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    movq %rdi, %r14
 ; X64-NEXT:    mulq %r13
 ; X64-NEXT:    movq %rdx, %rdi
 ; X64-NEXT:    movq %rax, %r8
@@ -8128,7 +8097,7 @@ define void @test_1024(i1024* %a, i1024*
 ; X64-NEXT:    addq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload
 ; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
 ; X64-NEXT:    adcq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Folded Reload
-; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
+; X64-NEXT:    movq (%rsp), %rbp # 8-byte Reload
 ; X64-NEXT:    adcq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Folded Reload
 ; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload
 ; X64-NEXT:    adcq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Folded Reload
@@ -8141,7 +8110,7 @@ define void @test_1024(i1024* %a, i1024*
 ; X64-NEXT:    adcq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Folded Reload
 ; X64-NEXT:    movq %rdi, %r10
 ; X64-NEXT:    adcq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Folded Reload
-; X64-NEXT:    adcq (%rsp), %rbx # 8-byte Folded Reload
+; X64-NEXT:    adcq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Folded Reload
 ; X64-NEXT:    adcq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Folded Reload
 ; X64-NEXT:    adcq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload
 ; X64-NEXT:    adcq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload

Modified: llvm/trunk/test/CodeGen/X86/mul-i256.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/mul-i256.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/mul-i256.ll (original)
+++ llvm/trunk/test/CodeGen/X86/mul-i256.ll Wed Sep 19 11:59:08 2018
@@ -25,15 +25,15 @@ define void @test(i256* %a, i256* %b, i2
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X32-NEXT:    movl 12(%ecx), %ebp
 ; X32-NEXT:    movl 8(%ecx), %edi
-; X32-NEXT:    movl %edi, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    movl (%eax), %ebx
-; X32-NEXT:    movl %ebx, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    movl %edi, %eax
 ; X32-NEXT:    mull %ebx
 ; X32-NEXT:    movl %edx, %ecx
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    movl %ebp, %eax
-; X32-NEXT:    movl %ebp, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    mull %ebx
 ; X32-NEXT:    movl %edx, %esi
 ; X32-NEXT:    movl %eax, %ebx
@@ -44,60 +44,60 @@ define void @test(i256* %a, i256* %b, i2
 ; X32-NEXT:    movl %edi, %eax
 ; X32-NEXT:    mull %ecx
 ; X32-NEXT:    movl %ecx, %edi
-; X32-NEXT:    movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    movl %edx, %ecx
 ; X32-NEXT:    addl %ebx, %eax
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    adcl %esi, %ecx
-; X32-NEXT:    setb {{[0-9]+}}(%esp) # 1-byte Folded Spill
+; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
 ; X32-NEXT:    movl %ebp, %eax
 ; X32-NEXT:    mull %edi
 ; X32-NEXT:    movl %edx, %ebx
 ; X32-NEXT:    movl %eax, %edi
 ; X32-NEXT:    addl %ecx, %edi
-; X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # 1-byte Folded Reload
+; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
 ; X32-NEXT:    adcl %eax, %ebx
 ; X32-NEXT:    xorl %edx, %edx
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
 ; X32-NEXT:    movl %ecx, %eax
 ; X32-NEXT:    mull %edx
 ; X32-NEXT:    movl %edx, %ebp
-; X32-NEXT:    movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    movl %eax, %esi
 ; X32-NEXT:    movl %eax, (%esp) # 4-byte Spill
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
 ; X32-NEXT:    xorl %edx, %edx
 ; X32-NEXT:    mull %edx
-; X32-NEXT:    movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    addl %esi, %eax
 ; X32-NEXT:    adcl %ebp, %edx
 ; X32-NEXT:    addl %edi, %eax
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    adcl %ebx, %edx
-; X32-NEXT:    movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X32-NEXT:    movl (%esi), %ebp
 ; X32-NEXT:    movl %ebp, %eax
-; X32-NEXT:    movl %ebp, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    mull %ecx
 ; X32-NEXT:    movl %ecx, %edi
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    movl %edx, %ecx
 ; X32-NEXT:    movl 4(%esi), %esi
 ; X32-NEXT:    movl %esi, %eax
-; X32-NEXT:    movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    mull %edi
 ; X32-NEXT:    movl %edx, %edi
 ; X32-NEXT:    movl %eax, %ebx
 ; X32-NEXT:    addl %ecx, %ebx
 ; X32-NEXT:    adcl $0, %edi
 ; X32-NEXT:    movl %ebp, %eax
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ebp # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
 ; X32-NEXT:    mull %ebp
 ; X32-NEXT:    movl %edx, %ecx
 ; X32-NEXT:    addl %ebx, %eax
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    adcl %edi, %ecx
 ; X32-NEXT:    setb %bl
 ; X32-NEXT:    movl %esi, %eax
@@ -107,84 +107,84 @@ define void @test(i256* %a, i256* %b, i2
 ; X32-NEXT:    addl %ecx, %ebp
 ; X32-NEXT:    movzbl %bl, %eax
 ; X32-NEXT:    adcl %eax, %edi
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
 ; X32-NEXT:    movl %esi, %eax
 ; X32-NEXT:    xorl %edx, %edx
 ; X32-NEXT:    mull %edx
-; X32-NEXT:    movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    movl (%esp), %ecx # 4-byte Reload
 ; X32-NEXT:    addl %eax, %ecx
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
 ; X32-NEXT:    adcl %edx, %eax
 ; X32-NEXT:    addl %ebp, %ecx
 ; X32-NEXT:    adcl %edi, %eax
-; X32-NEXT:    addl {{[0-9]+}}(%esp), %ecx # 4-byte Folded Reload
+; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
 ; X32-NEXT:    movl %ecx, (%esp) # 4-byte Spill
-; X32-NEXT:    adcl {{[0-9]+}}(%esp), %eax # 4-byte Folded Reload
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    adcl $0, {{[0-9]+}}(%esp) # 4-byte Folded Spill
-; X32-NEXT:    adcl $0, {{[0-9]+}}(%esp) # 4-byte Folded Spill
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    movl 8(%eax), %ebx
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X32-NEXT:    movl 8(%edi), %ebx
 ; X32-NEXT:    movl %esi, %eax
-; X32-NEXT:    movl %esi, %edi
+; X32-NEXT:    movl %esi, %ecx
 ; X32-NEXT:    mull %ebx
-; X32-NEXT:    movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax # 4-byte Reload
+; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
 ; X32-NEXT:    mull %ebx
 ; X32-NEXT:    movl %edx, %ebp
 ; X32-NEXT:    movl %eax, %esi
-; X32-NEXT:    addl {{[0-9]+}}(%esp), %esi # 4-byte Folded Reload
+; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
 ; X32-NEXT:    adcl $0, %ebp
-; X32-NEXT:    movl 12(%ecx), %ecx
-; X32-NEXT:    movl %edi, %eax
-; X32-NEXT:    mull %ecx
-; X32-NEXT:    movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl 12(%edi), %edi
+; X32-NEXT:    movl %ecx, %eax
+; X32-NEXT:    mull %edi
+; X32-NEXT:    movl %edi, %ecx
+; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    movl %edx, %edi
 ; X32-NEXT:    addl %esi, %eax
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    adcl %ebp, %edi
-; X32-NEXT:    setb {{[0-9]+}}(%esp) # 1-byte Folded Spill
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax # 4-byte Reload
+; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
 ; X32-NEXT:    mull %ecx
 ; X32-NEXT:    movl %edx, %esi
 ; X32-NEXT:    movl %eax, %ebp
 ; X32-NEXT:    addl %edi, %ebp
-; X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # 1-byte Folded Reload
+; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
 ; X32-NEXT:    adcl %eax, %esi
 ; X32-NEXT:    movl %ebx, %edi
 ; X32-NEXT:    movl %ebx, %eax
 ; X32-NEXT:    xorl %ecx, %ecx
 ; X32-NEXT:    mull %ecx
-; X32-NEXT:    movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ebx # 4-byte Reload
+; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
 ; X32-NEXT:    addl %eax, %ebx
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
 ; X32-NEXT:    adcl %edx, %eax
 ; X32-NEXT:    addl %ebp, %ebx
 ; X32-NEXT:    adcl %esi, %eax
 ; X32-NEXT:    movl (%esp), %ecx # 4-byte Reload
-; X32-NEXT:    addl %ecx, {{[0-9]+}}(%esp) # 4-byte Folded Spill
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload
-; X32-NEXT:    adcl %ecx, {{[0-9]+}}(%esp) # 4-byte Folded Spill
+; X32-NEXT:    addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT:    adcl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
 ; X32-NEXT:    adcl $0, %ebx
 ; X32-NEXT:    adcl $0, %eax
-; X32-NEXT:    addl {{[0-9]+}}(%esp), %ebx # 4-byte Folded Reload
-; X32-NEXT:    movl %ebx, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    adcl {{[0-9]+}}(%esp), %eax # 4-byte Folded Reload
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    setb (%esp) # 1-byte Folded Spill
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
 ; X32-NEXT:    movl %esi, %eax
-; X32-NEXT:    movl %edi, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    mull %edi
 ; X32-NEXT:    movl %edx, %ebx
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
 ; X32-NEXT:    movl %ecx, %eax
 ; X32-NEXT:    mull %edi
 ; X32-NEXT:    movl %edx, %edi
@@ -192,80 +192,80 @@ define void @test(i256* %a, i256* %b, i2
 ; X32-NEXT:    addl %ebx, %ebp
 ; X32-NEXT:    adcl $0, %edi
 ; X32-NEXT:    movl %esi, %eax
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ebx # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
 ; X32-NEXT:    mull %ebx
 ; X32-NEXT:    movl %edx, %esi
 ; X32-NEXT:    addl %ebp, %eax
 ; X32-NEXT:    movl %eax, %ebp
 ; X32-NEXT:    adcl %edi, %esi
-; X32-NEXT:    setb {{[0-9]+}}(%esp) # 1-byte Folded Spill
+; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
 ; X32-NEXT:    movl %ecx, %eax
 ; X32-NEXT:    mull %ebx
 ; X32-NEXT:    addl %esi, %eax
-; X32-NEXT:    movzbl {{[0-9]+}}(%esp), %esi # 1-byte Folded Reload
+; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload
 ; X32-NEXT:    adcl %esi, %edx
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi # 4-byte Reload
-; X32-NEXT:    addl {{[0-9]+}}(%esp), %esi # 4-byte Folded Reload
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload
-; X32-NEXT:    adcl {{[0-9]+}}(%esp), %ecx # 4-byte Folded Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
 ; X32-NEXT:    addl %eax, %esi
 ; X32-NEXT:    adcl %edx, %ecx
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax # 4-byte Reload
-; X32-NEXT:    addl %eax, {{[0-9]+}}(%esp) # 4-byte Folded Spill
-; X32-NEXT:    adcl {{[0-9]+}}(%esp), %ebp # 4-byte Folded Reload
-; X32-NEXT:    movl %ebp, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT:    addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    movzbl (%esp), %eax # 1-byte Folded Reload
 ; X32-NEXT:    adcl %eax, %esi
-; X32-NEXT:    movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    adcl $0, %ecx
-; X32-NEXT:    movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X32-NEXT:    movl 16(%ecx), %esi
 ; X32-NEXT:    imull %esi, %ebx
 ; X32-NEXT:    movl %esi, %eax
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %edi # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
 ; X32-NEXT:    mull %edi
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    addl %ebx, %edx
 ; X32-NEXT:    movl 20(%ecx), %eax
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    imull %eax, %edi
 ; X32-NEXT:    addl %edx, %edi
-; X32-NEXT:    movl %edi, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    movl 24(%ecx), %eax
 ; X32-NEXT:    movl %ecx, %ebp
 ; X32-NEXT:    movl %eax, %edi
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
 ; X32-NEXT:    imull %ecx, %edi
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ebx # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
 ; X32-NEXT:    mull %ebx
 ; X32-NEXT:    movl %eax, (%esp) # 4-byte Spill
 ; X32-NEXT:    addl %edi, %edx
 ; X32-NEXT:    movl 28(%ebp), %ebp
 ; X32-NEXT:    imull %ebx, %ebp
 ; X32-NEXT:    addl %edx, %ebp
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
 ; X32-NEXT:    addl %edx, (%esp) # 4-byte Folded Spill
-; X32-NEXT:    adcl {{[0-9]+}}(%esp), %ebp # 4-byte Folded Reload
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
 ; X32-NEXT:    movl %ebx, %eax
 ; X32-NEXT:    mull %esi
 ; X32-NEXT:    movl %edx, %ebx
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    movl %ecx, %eax
 ; X32-NEXT:    mull %esi
 ; X32-NEXT:    movl %edx, %ecx
 ; X32-NEXT:    movl %eax, %edi
 ; X32-NEXT:    addl %ebx, %edi
 ; X32-NEXT:    adcl $0, %ecx
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax # 4-byte Reload
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ebx # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
 ; X32-NEXT:    mull %ebx
 ; X32-NEXT:    movl %edx, %esi
 ; X32-NEXT:    addl %edi, %eax
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    adcl %ecx, %esi
 ; X32-NEXT:    setb %cl
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
 ; X32-NEXT:    mull %ebx
 ; X32-NEXT:    addl %esi, %eax
 ; X32-NEXT:    movzbl %cl, %ecx
@@ -273,37 +273,37 @@ define void @test(i256* %a, i256* %b, i2
 ; X32-NEXT:    addl (%esp), %eax # 4-byte Folded Reload
 ; X32-NEXT:    movl %eax, (%esp) # 4-byte Spill
 ; X32-NEXT:    adcl %ebp, %edx
-; X32-NEXT:    movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ebx
 ; X32-NEXT:    movl 28(%ebx), %ecx
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
 ; X32-NEXT:    imull %esi, %ecx
 ; X32-NEXT:    movl 24(%ebx), %edi
 ; X32-NEXT:    movl %esi, %eax
 ; X32-NEXT:    mull %edi
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    addl %ecx, %edx
-; X32-NEXT:    imull {{[0-9]+}}(%esp), %edi # 4-byte Folded Reload
+; X32-NEXT:    imull {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
 ; X32-NEXT:    addl %edx, %edi
 ; X32-NEXT:    movl 16(%ebx), %ebp
 ; X32-NEXT:    movl 20(%ebx), %ebx
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
 ; X32-NEXT:    movl %eax, %ecx
 ; X32-NEXT:    imull %ebx, %ecx
-; X32-NEXT:    movl %ebx, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    mull %ebp
 ; X32-NEXT:    addl %ecx, %edx
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
 ; X32-NEXT:    imull %ebp, %ecx
 ; X32-NEXT:    addl %edx, %ecx
-; X32-NEXT:    addl {{[0-9]+}}(%esp), %eax # 4-byte Folded Reload
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    adcl %edi, %ecx
-; X32-NEXT:    movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    movl %ebp, %eax
 ; X32-NEXT:    mull %esi
 ; X32-NEXT:    movl %edx, %edi
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    movl %ebx, %eax
 ; X32-NEXT:    mull %esi
 ; X32-NEXT:    movl %edx, %ecx
@@ -311,38 +311,38 @@ define void @test(i256* %a, i256* %b, i2
 ; X32-NEXT:    addl %edi, %ebx
 ; X32-NEXT:    adcl $0, %ecx
 ; X32-NEXT:    movl %ebp, %eax
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ebp # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
 ; X32-NEXT:    mull %ebp
 ; X32-NEXT:    movl %edx, %edi
 ; X32-NEXT:    movl %eax, %esi
 ; X32-NEXT:    addl %ebx, %esi
 ; X32-NEXT:    adcl %ecx, %edi
 ; X32-NEXT:    setb %cl
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
 ; X32-NEXT:    mull %ebp
 ; X32-NEXT:    addl %edi, %eax
 ; X32-NEXT:    movzbl %cl, %ecx
 ; X32-NEXT:    adcl %ecx, %edx
-; X32-NEXT:    addl {{[0-9]+}}(%esp), %eax # 4-byte Folded Reload
-; X32-NEXT:    adcl {{[0-9]+}}(%esp), %edx # 4-byte Folded Reload
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload
-; X32-NEXT:    addl {{[0-9]+}}(%esp), %ecx # 4-byte Folded Reload
-; X32-NEXT:    adcl {{[0-9]+}}(%esp), %esi # 4-byte Folded Reload
+; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
 ; X32-NEXT:    adcl (%esp), %eax # 4-byte Folded Reload
-; X32-NEXT:    adcl {{[0-9]+}}(%esp), %edx # 4-byte Folded Reload
-; X32-NEXT:    addl {{[0-9]+}}(%esp), %ecx # 4-byte Folded Reload
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
 ; X32-NEXT:    movl %ecx, %ebx
-; X32-NEXT:    adcl {{[0-9]+}}(%esp), %esi # 4-byte Folded Reload
-; X32-NEXT:    adcl {{[0-9]+}}(%esp), %eax # 4-byte Folded Reload
-; X32-NEXT:    adcl {{[0-9]+}}(%esp), %edx # 4-byte Folded Reload
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %edi # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
 ; X32-NEXT:    movl %edi, (%ecx)
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %edi # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
 ; X32-NEXT:    movl %edi, 4(%ecx)
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %edi # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
 ; X32-NEXT:    movl %edi, 8(%ecx)
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %edi # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
 ; X32-NEXT:    movl %edi, 12(%ecx)
 ; X32-NEXT:    movl %ebx, 16(%ecx)
 ; X32-NEXT:    movl %esi, 20(%ecx)

Modified: llvm/trunk/test/CodeGen/X86/mul-i512.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/mul-i512.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/mul-i512.ll (original)
+++ llvm/trunk/test/CodeGen/X86/mul-i512.ll Wed Sep 19 11:59:08 2018
@@ -12,9 +12,9 @@ define void @test_512(i512* %a, i512* %b
 ; X32-NEXT:    subl $244, %esp
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X32-NEXT:    movl 20(%ecx), %edi
-; X32-NEXT:    movl %edi, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    movl 16(%ecx), %eax
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    movl %ecx, %ebp
 ; X32-NEXT:    xorl %ebx, %ebx
 ; X32-NEXT:    mull %ebx
@@ -27,37 +27,37 @@ define void @test_512(i512* %a, i512* %b
 ; X32-NEXT:    movl %edx, %ebx
 ; X32-NEXT:    adcl $0, %ebx
 ; X32-NEXT:    addl %esi, %edi
-; X32-NEXT:    movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    movl %edi, (%esp) # 4-byte Spill
+; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    adcl %ecx, %ebx
 ; X32-NEXT:    movl %ecx, %edi
-; X32-NEXT:    movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    setb %cl
 ; X32-NEXT:    addl %eax, %ebx
 ; X32-NEXT:    movzbl %cl, %ecx
 ; X32-NEXT:    adcl %edx, %ecx
 ; X32-NEXT:    movl 24(%ebp), %eax
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    xorl %edx, %edx
 ; X32-NEXT:    mull %edx
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    addl %eax, %esi
 ; X32-NEXT:    adcl %edx, %edi
 ; X32-NEXT:    addl %ebx, %esi
-; X32-NEXT:    movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    adcl %ecx, %edi
-; X32-NEXT:    movl %edi, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X32-NEXT:    movl (%ecx), %eax
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    xorl %ebx, %ebx
 ; X32-NEXT:    mull %ebx
 ; X32-NEXT:    movl %eax, %ebp
 ; X32-NEXT:    movl %edx, %edi
-; X32-NEXT:    movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    movl 4(%ecx), %eax
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    movl %ecx, %esi
 ; X32-NEXT:    mull %ebx
 ; X32-NEXT:    movl %eax, %ebx
@@ -65,73 +65,73 @@ define void @test_512(i512* %a, i512* %b
 ; X32-NEXT:    movl %edx, %ecx
 ; X32-NEXT:    adcl $0, %ecx
 ; X32-NEXT:    addl %ebp, %ebx
-; X32-NEXT:    movl %ebx, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %ebx, (%esp) # 4-byte Spill
 ; X32-NEXT:    adcl %edi, %ecx
 ; X32-NEXT:    setb %bl
 ; X32-NEXT:    addl %eax, %ecx
 ; X32-NEXT:    movzbl %bl, %ebx
 ; X32-NEXT:    adcl %edx, %ebx
 ; X32-NEXT:    movl 8(%esi), %eax
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    xorl %edx, %edx
 ; X32-NEXT:    mull %edx
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    movl %ebp, %esi
 ; X32-NEXT:    addl %eax, %esi
 ; X32-NEXT:    movl %edi, %eax
 ; X32-NEXT:    adcl %edx, %eax
 ; X32-NEXT:    addl %ecx, %esi
-; X32-NEXT:    movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    adcl %ebx, %eax
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax # 4-byte Reload
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
 ; X32-NEXT:    addl %ebp, %eax
 ; X32-NEXT:    movl %ebp, %esi
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
 ; X32-NEXT:    adcl %edi, %eax
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X32-NEXT:    movl (%ecx), %eax
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    xorl %ebp, %ebp
 ; X32-NEXT:    mull %ebp
 ; X32-NEXT:    movl %edx, %ebx
 ; X32-NEXT:    movl %eax, %ecx
 ; X32-NEXT:    movl %eax, %edx
 ; X32-NEXT:    addl %esi, %edx
-; X32-NEXT:    movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    movl %ebx, %eax
 ; X32-NEXT:    adcl %edi, %eax
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    movl 16(%eax), %eax
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    mull %ebp
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    movl %ecx, %edi
 ; X32-NEXT:    movl %ecx, %ebp
-; X32-NEXT:    movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    addl %eax, %edi
 ; X32-NEXT:    movl %ebx, %eax
 ; X32-NEXT:    adcl %edx, %eax
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax # 4-byte Reload
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
 ; X32-NEXT:    addl %esi, %eax
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    movl (%esp), %eax # 4-byte Reload
-; X32-NEXT:    adcl {{[0-9]+}}(%esp), %eax # 4-byte Folded Reload
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax # 4-byte Reload
-; X32-NEXT:    adcl {{[0-9]+}}(%esp), %eax # 4-byte Folded Reload
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax # 4-byte Reload
-; X32-NEXT:    adcl {{[0-9]+}}(%esp), %eax # 4-byte Folded Reload
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT:    adcl (%esp), %eax # 4-byte Folded Reload
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    movl 4(%eax), %eax
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    xorl %edi, %edi
 ; X32-NEXT:    mull %edi
 ; X32-NEXT:    movl %eax, %esi
@@ -139,107 +139,107 @@ define void @test_512(i512* %a, i512* %b
 ; X32-NEXT:    movl %edx, %ecx
 ; X32-NEXT:    adcl $0, %ecx
 ; X32-NEXT:    addl %ebp, %esi
-; X32-NEXT:    movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    adcl %ebx, %ecx
 ; X32-NEXT:    movl %ebx, %esi
-; X32-NEXT:    movl %ebx, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    setb %bl
 ; X32-NEXT:    addl %eax, %ecx
 ; X32-NEXT:    movzbl %bl, %ebx
 ; X32-NEXT:    adcl %edx, %ebx
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    movl 8(%eax), %eax
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    mull %edi
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    movl %ebp, %edi
 ; X32-NEXT:    addl %eax, %edi
 ; X32-NEXT:    adcl %edx, %esi
 ; X32-NEXT:    addl %ecx, %edi
-; X32-NEXT:    movl %edi, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    adcl %ebx, %esi
-; X32-NEXT:    movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    addl {{[0-9]+}}(%esp), %ebp # 4-byte Folded Reload
-; X32-NEXT:    movl %ebp, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax # 4-byte Reload
-; X32-NEXT:    adcl %eax, {{[0-9]+}}(%esp) # 4-byte Folded Spill
-; X32-NEXT:    adcl %edi, {{[0-9]+}}(%esp) # 4-byte Folded Spill
-; X32-NEXT:    adcl %esi, {{[0-9]+}}(%esp) # 4-byte Folded Spill
+; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT:    adcl %eax, (%esp) # 4-byte Folded Spill
+; X32-NEXT:    adcl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT:    adcl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X32-NEXT:    movl 20(%esi), %eax
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    xorl %ecx, %ecx
 ; X32-NEXT:    mull %ecx
 ; X32-NEXT:    movl %eax, %ebx
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %edi # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
 ; X32-NEXT:    addl %edi, %ebx
 ; X32-NEXT:    movl %edx, %ecx
 ; X32-NEXT:    adcl $0, %ecx
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ebp # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
 ; X32-NEXT:    addl %ebp, %ebx
-; X32-NEXT:    movl %ebx, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    adcl %edi, %ecx
 ; X32-NEXT:    setb %bl
 ; X32-NEXT:    addl %eax, %ecx
 ; X32-NEXT:    movzbl %bl, %ebx
 ; X32-NEXT:    adcl %edx, %ebx
 ; X32-NEXT:    movl 24(%esi), %eax
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    xorl %edx, %edx
 ; X32-NEXT:    mull %edx
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    movl %ebp, %esi
 ; X32-NEXT:    addl %eax, %esi
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %edi # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
 ; X32-NEXT:    adcl %edx, %edi
 ; X32-NEXT:    addl %ecx, %esi
 ; X32-NEXT:    adcl %ebx, %edi
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
 ; X32-NEXT:    addl %ebp, %eax
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    movl %ebp, %edx
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ebp # 4-byte Reload
-; X32-NEXT:    adcl %ebp, {{[0-9]+}}(%esp) # 4-byte Folded Spill
-; X32-NEXT:    adcl %esi, {{[0-9]+}}(%esp) # 4-byte Folded Spill
-; X32-NEXT:    adcl %edi, {{[0-9]+}}(%esp) # 4-byte Folded Spill
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT:    adcl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT:    adcl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT:    adcl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
 ; X32-NEXT:    movl %ecx, %eax
 ; X32-NEXT:    addl %edx, %eax
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax # 4-byte Reload
-; X32-NEXT:    adcl {{[0-9]+}}(%esp), %eax # 4-byte Folded Reload
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    movl %ecx, %eax
 ; X32-NEXT:    addl %edx, %eax
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    adcl (%esp), %ebp # 4-byte Folded Reload
-; X32-NEXT:    movl %ebp, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    adcl {{[0-9]+}}(%esp), %esi # 4-byte Folded Reload
-; X32-NEXT:    movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    adcl {{[0-9]+}}(%esp), %edi # 4-byte Folded Reload
-; X32-NEXT:    movl %edi, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
 ; X32-NEXT:    movl %ecx, %eax
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %edi # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
 ; X32-NEXT:    mull %edi
-; X32-NEXT:    movl %eax, (%esp) # 4-byte Spill
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    movl %edx, %ebp
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    movl 28(%eax), %esi
 ; X32-NEXT:    movl %esi, %eax
-; X32-NEXT:    movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    mull %edi
 ; X32-NEXT:    movl %edx, %edi
 ; X32-NEXT:    movl %eax, %ebx
 ; X32-NEXT:    addl %ebp, %ebx
 ; X32-NEXT:    adcl $0, %edi
 ; X32-NEXT:    movl %ecx, %eax
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
 ; X32-NEXT:    mull %ecx
 ; X32-NEXT:    movl %edx, %ebp
 ; X32-NEXT:    addl %ebx, %eax
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    adcl %edi, %ebp
 ; X32-NEXT:    setb %bl
 ; X32-NEXT:    movl %esi, %eax
@@ -247,54 +247,54 @@ define void @test_512(i512* %a, i512* %b
 ; X32-NEXT:    addl %ebp, %eax
 ; X32-NEXT:    movzbl %bl, %ecx
 ; X32-NEXT:    adcl %ecx, %edx
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload
-; X32-NEXT:    addl {{[0-9]+}}(%esp), %ecx # 4-byte Folded Reload
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi # 4-byte Reload
-; X32-NEXT:    adcl {{[0-9]+}}(%esp), %esi # 4-byte Folded Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
 ; X32-NEXT:    addl %eax, %ecx
-; X32-NEXT:    movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    adcl %edx, %esi
-; X32-NEXT:    movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload
+; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
 ; X32-NEXT:    movl %ecx, %eax
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
 ; X32-NEXT:    mull %esi
-; X32-NEXT:    movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %edi # 4-byte Reload
+; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
 ; X32-NEXT:    movl %edi, %eax
 ; X32-NEXT:    mull %esi
 ; X32-NEXT:    movl %edx, %ebx
 ; X32-NEXT:    movl %eax, %ebp
-; X32-NEXT:    addl {{[0-9]+}}(%esp), %ebp # 4-byte Folded Reload
+; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
 ; X32-NEXT:    adcl $0, %ebx
 ; X32-NEXT:    movl %ecx, %eax
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
 ; X32-NEXT:    mull %esi
 ; X32-NEXT:    movl %edx, %ecx
 ; X32-NEXT:    addl %ebp, %eax
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    adcl %ebx, %ecx
-; X32-NEXT:    setb {{[0-9]+}}(%esp) # 1-byte Folded Spill
+; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
 ; X32-NEXT:    movl %edi, %ebp
 ; X32-NEXT:    movl %edi, %eax
 ; X32-NEXT:    mull %esi
 ; X32-NEXT:    movl %edx, %edi
 ; X32-NEXT:    movl %eax, %ebx
 ; X32-NEXT:    addl %ecx, %ebx
-; X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # 1-byte Folded Reload
+; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
 ; X32-NEXT:    adcl %eax, %edi
-; X32-NEXT:    addl {{[0-9]+}}(%esp), %ebx # 4-byte Folded Reload
-; X32-NEXT:    adcl {{[0-9]+}}(%esp), %edi # 4-byte Folded Reload
-; X32-NEXT:    addl (%esp), %ebx # 4-byte Folded Reload
-; X32-NEXT:    adcl {{[0-9]+}}(%esp), %edi # 4-byte Folded Reload
-; X32-NEXT:    adcl $0, {{[0-9]+}}(%esp) # 4-byte Folded Spill
-; X32-NEXT:    adcl $0, {{[0-9]+}}(%esp) # 4-byte Folded Spill
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax # 4-byte Reload
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi # 4-byte Reload
+; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
 ; X32-NEXT:    mull %esi
 ; X32-NEXT:    movl %edx, %ecx
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    movl %ebp, %eax
 ; X32-NEXT:    mull %esi
 ; X32-NEXT:    movl %edx, %esi
@@ -303,154 +303,154 @@ define void @test_512(i512* %a, i512* %b
 ; X32-NEXT:    adcl $0, %esi
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    movl 12(%eax), %ecx
-; X32-NEXT:    movl %ecx, (%esp) # 4-byte Spill
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax # 4-byte Reload
+; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
 ; X32-NEXT:    mull %ecx
 ; X32-NEXT:    movl %edx, %ecx
 ; X32-NEXT:    addl %ebp, %eax
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    adcl %esi, %ecx
-; X32-NEXT:    setb {{[0-9]+}}(%esp) # 1-byte Folded Spill
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax # 4-byte Reload
-; X32-NEXT:    mull (%esp) # 4-byte Folded Reload
+; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT:    mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
 ; X32-NEXT:    addl %ecx, %eax
-; X32-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # 1-byte Folded Reload
+; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
 ; X32-NEXT:    adcl %ecx, %edx
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ebp # 4-byte Reload
-; X32-NEXT:    addl {{[0-9]+}}(%esp), %ebp # 4-byte Folded Reload
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload
-; X32-NEXT:    adcl {{[0-9]+}}(%esp), %ecx # 4-byte Folded Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
 ; X32-NEXT:    addl %eax, %ebp
 ; X32-NEXT:    adcl %edx, %ecx
-; X32-NEXT:    addl %ebx, {{[0-9]+}}(%esp) # 4-byte Folded Spill
-; X32-NEXT:    adcl %edi, {{[0-9]+}}(%esp) # 4-byte Folded Spill
+; X32-NEXT:    addl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT:    adcl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
 ; X32-NEXT:    adcl $0, %ebp
 ; X32-NEXT:    adcl $0, %ecx
-; X32-NEXT:    addl {{[0-9]+}}(%esp), %ebp # 4-byte Folded Reload
-; X32-NEXT:    adcl {{[0-9]+}}(%esp), %ecx # 4-byte Folded Reload
-; X32-NEXT:    setb {{[0-9]+}}(%esp) # 1-byte Folded Spill
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ebx # 4-byte Reload
-; X32-NEXT:    movl %ebx, %eax
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi # 4-byte Reload
-; X32-NEXT:    mull %esi
-; X32-NEXT:    movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax # 4-byte Reload
+; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT:    movl %ebx, %eax
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT:    mull %esi
+; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
 ; X32-NEXT:    mull %esi
 ; X32-NEXT:    movl %eax, %edi
-; X32-NEXT:    addl {{[0-9]+}}(%esp), %edi # 4-byte Folded Reload
+; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
 ; X32-NEXT:    adcl $0, %edx
-; X32-NEXT:    movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    movl %ebx, %eax
-; X32-NEXT:    movl (%esp), %esi # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
 ; X32-NEXT:    mull %esi
 ; X32-NEXT:    movl %edx, %ebx
 ; X32-NEXT:    addl %edi, %eax
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    adcl {{[0-9]+}}(%esp), %ebx # 4-byte Folded Reload
-; X32-NEXT:    setb {{[0-9]+}}(%esp) # 1-byte Folded Spill
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax # 4-byte Reload
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
 ; X32-NEXT:    mull %esi
 ; X32-NEXT:    addl %ebx, %eax
-; X32-NEXT:    movzbl {{[0-9]+}}(%esp), %esi # 1-byte Folded Reload
+; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload
 ; X32-NEXT:    adcl %esi, %edx
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %edi # 4-byte Reload
-; X32-NEXT:    addl {{[0-9]+}}(%esp), %edi # 4-byte Folded Reload
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ebx # 4-byte Reload
-; X32-NEXT:    adcl {{[0-9]+}}(%esp), %ebx # 4-byte Folded Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
 ; X32-NEXT:    addl %eax, %edi
 ; X32-NEXT:    adcl %edx, %ebx
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
 ; X32-NEXT:    addl %ebp, %edx
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
 ; X32-NEXT:    adcl %ecx, %esi
-; X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # 1-byte Folded Reload
+; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
 ; X32-NEXT:    adcl %eax, %edi
 ; X32-NEXT:    adcl $0, %ebx
-; X32-NEXT:    addl {{[0-9]+}}(%esp), %edx # 4-byte Folded Reload
-; X32-NEXT:    movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    adcl {{[0-9]+}}(%esp), %esi # 4-byte Folded Reload
-; X32-NEXT:    movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    adcl {{[0-9]+}}(%esp), %edi # 4-byte Folded Reload
-; X32-NEXT:    movl %edi, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    adcl {{[0-9]+}}(%esp), %ebx # 4-byte Folded Reload
-; X32-NEXT:    movl %ebx, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload
+; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
 ; X32-NEXT:    movl %ecx, %eax
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %edi # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
 ; X32-NEXT:    mull %edi
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    movl %edx, %esi
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    movl 12(%eax), %eax
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    mull %edi
 ; X32-NEXT:    movl %edx, %edi
 ; X32-NEXT:    movl %eax, %ebp
 ; X32-NEXT:    addl %esi, %ebp
 ; X32-NEXT:    adcl $0, %edi
 ; X32-NEXT:    movl %ecx, %eax
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ebx # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
 ; X32-NEXT:    mull %ebx
 ; X32-NEXT:    movl %edx, %esi
 ; X32-NEXT:    addl %ebp, %eax
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    adcl %edi, %esi
 ; X32-NEXT:    setb %cl
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
 ; X32-NEXT:    mull %ebx
 ; X32-NEXT:    addl %esi, %eax
 ; X32-NEXT:    movzbl %cl, %ecx
 ; X32-NEXT:    adcl %ecx, %edx
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload
-; X32-NEXT:    addl {{[0-9]+}}(%esp), %ecx # 4-byte Folded Reload
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi # 4-byte Reload
-; X32-NEXT:    adcl {{[0-9]+}}(%esp), %esi # 4-byte Folded Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
 ; X32-NEXT:    addl %eax, %ecx
-; X32-NEXT:    movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    adcl %edx, %esi
-; X32-NEXT:    movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi # 4-byte Reload
+; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
 ; X32-NEXT:    movl %esi, %eax
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
 ; X32-NEXT:    mull %ecx
-; X32-NEXT:    movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ebx # 4-byte Reload
+; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
 ; X32-NEXT:    movl %ebx, %eax
 ; X32-NEXT:    mull %ecx
 ; X32-NEXT:    movl %edx, %edi
 ; X32-NEXT:    movl %eax, %ebp
-; X32-NEXT:    addl {{[0-9]+}}(%esp), %ebp # 4-byte Folded Reload
+; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
 ; X32-NEXT:    adcl $0, %edi
 ; X32-NEXT:    movl %esi, %eax
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
 ; X32-NEXT:    mull %esi
 ; X32-NEXT:    movl %edx, %ecx
 ; X32-NEXT:    addl %ebp, %eax
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    adcl %edi, %ecx
-; X32-NEXT:    setb {{[0-9]+}}(%esp) # 1-byte Folded Spill
+; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
 ; X32-NEXT:    movl %ebx, %edi
 ; X32-NEXT:    movl %ebx, %eax
 ; X32-NEXT:    mull %esi
 ; X32-NEXT:    movl %eax, %ebp
 ; X32-NEXT:    addl %ecx, %ebp
-; X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # 1-byte Folded Reload
+; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
 ; X32-NEXT:    adcl %eax, %edx
-; X32-NEXT:    addl {{[0-9]+}}(%esp), %ebp # 4-byte Folded Reload
-; X32-NEXT:    adcl {{[0-9]+}}(%esp), %edx # 4-byte Folded Reload
-; X32-NEXT:    addl {{[0-9]+}}(%esp), %ebp # 4-byte Folded Reload
-; X32-NEXT:    adcl {{[0-9]+}}(%esp), %edx # 4-byte Folded Reload
-; X32-NEXT:    movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    adcl $0, {{[0-9]+}}(%esp) # 4-byte Folded Spill
-; X32-NEXT:    adcl $0, {{[0-9]+}}(%esp) # 4-byte Folded Spill
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi # 4-byte Reload
+; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
 ; X32-NEXT:    movl %esi, %eax
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ebx # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
 ; X32-NEXT:    mull %ebx
 ; X32-NEXT:    movl %edx, %ecx
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    movl %edi, %eax
 ; X32-NEXT:    mull %ebx
 ; X32-NEXT:    movl %edx, %edi
@@ -458,93 +458,93 @@ define void @test_512(i512* %a, i512* %b
 ; X32-NEXT:    addl %ecx, %ebx
 ; X32-NEXT:    adcl $0, %edi
 ; X32-NEXT:    movl %esi, %eax
-; X32-NEXT:    movl (%esp), %ecx # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
 ; X32-NEXT:    mull %ecx
 ; X32-NEXT:    movl %edx, %esi
 ; X32-NEXT:    addl %ebx, %eax
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    adcl %edi, %esi
 ; X32-NEXT:    setb %bl
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
 ; X32-NEXT:    mull %ecx
 ; X32-NEXT:    addl %esi, %eax
 ; X32-NEXT:    movzbl %bl, %ecx
 ; X32-NEXT:    adcl %ecx, %edx
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ebx # 4-byte Reload
-; X32-NEXT:    addl {{[0-9]+}}(%esp), %ebx # 4-byte Folded Reload
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload
-; X32-NEXT:    adcl {{[0-9]+}}(%esp), %ecx # 4-byte Folded Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
 ; X32-NEXT:    addl %eax, %ebx
 ; X32-NEXT:    adcl %edx, %ecx
-; X32-NEXT:    addl %ebp, {{[0-9]+}}(%esp) # 4-byte Folded Spill
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax # 4-byte Reload
-; X32-NEXT:    adcl %eax, {{[0-9]+}}(%esp) # 4-byte Folded Spill
+; X32-NEXT:    addl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT:    adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
 ; X32-NEXT:    adcl $0, %ebx
 ; X32-NEXT:    adcl $0, %ecx
-; X32-NEXT:    addl {{[0-9]+}}(%esp), %ebx # 4-byte Folded Reload
-; X32-NEXT:    adcl {{[0-9]+}}(%esp), %ecx # 4-byte Folded Reload
-; X32-NEXT:    setb {{[0-9]+}}(%esp) # 1-byte Folded Spill
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ebp # 4-byte Reload
-; X32-NEXT:    movl %ebp, %eax
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi # 4-byte Reload
-; X32-NEXT:    mull %esi
-; X32-NEXT:    movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax # 4-byte Reload
+; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT:    movl %ebp, %eax
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT:    mull %esi
+; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
 ; X32-NEXT:    mull %esi
 ; X32-NEXT:    movl %eax, %edi
-; X32-NEXT:    addl {{[0-9]+}}(%esp), %edi # 4-byte Folded Reload
+; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
 ; X32-NEXT:    adcl $0, %edx
-; X32-NEXT:    movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    movl %ebp, %eax
-; X32-NEXT:    movl (%esp), %esi # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
 ; X32-NEXT:    mull %esi
 ; X32-NEXT:    movl %edx, %ebp
 ; X32-NEXT:    addl %edi, %eax
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    adcl {{[0-9]+}}(%esp), %ebp # 4-byte Folded Reload
-; X32-NEXT:    setb {{[0-9]+}}(%esp) # 1-byte Folded Spill
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax # 4-byte Reload
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
 ; X32-NEXT:    mull %esi
 ; X32-NEXT:    addl %ebp, %eax
-; X32-NEXT:    movzbl {{[0-9]+}}(%esp), %esi # 1-byte Folded Reload
+; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload
 ; X32-NEXT:    adcl %esi, %edx
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ebp # 4-byte Reload
-; X32-NEXT:    addl {{[0-9]+}}(%esp), %ebp # 4-byte Folded Reload
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %edi # 4-byte Reload
-; X32-NEXT:    adcl {{[0-9]+}}(%esp), %edi # 4-byte Folded Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
 ; X32-NEXT:    addl %eax, %ebp
 ; X32-NEXT:    adcl %edx, %edi
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
 ; X32-NEXT:    addl %ebx, %edx
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
 ; X32-NEXT:    adcl %ecx, %esi
-; X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # 1-byte Folded Reload
+; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
 ; X32-NEXT:    adcl %eax, %ebp
 ; X32-NEXT:    adcl $0, %edi
-; X32-NEXT:    addl {{[0-9]+}}(%esp), %edx # 4-byte Folded Reload
-; X32-NEXT:    adcl {{[0-9]+}}(%esp), %esi # 4-byte Folded Reload
-; X32-NEXT:    adcl {{[0-9]+}}(%esp), %ebp # 4-byte Folded Reload
-; X32-NEXT:    adcl {{[0-9]+}}(%esp), %edi # 4-byte Folded Reload
-; X32-NEXT:    addl {{[0-9]+}}(%esp), %edx # 4-byte Folded Reload
-; X32-NEXT:    movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    adcl {{[0-9]+}}(%esp), %esi # 4-byte Folded Reload
-; X32-NEXT:    movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    adcl {{[0-9]+}}(%esp), %ebp # 4-byte Folded Reload
-; X32-NEXT:    movl %ebp, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    adcl {{[0-9]+}}(%esp), %edi # 4-byte Folded Reload
-; X32-NEXT:    movl %edi, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    adcl $0, {{[0-9]+}}(%esp) # 4-byte Folded Spill
-; X32-NEXT:    adcl $0, {{[0-9]+}}(%esp) # 4-byte Folded Spill
-; X32-NEXT:    adcl $0, {{[0-9]+}}(%esp) # 4-byte Folded Spill
-; X32-NEXT:    adcl $0, {{[0-9]+}}(%esp) # 4-byte Folded Spill
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ebx # 4-byte Reload
+; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT:    adcl (%esp), %esi # 4-byte Folded Reload
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
 ; X32-NEXT:    movl %ebx, %eax
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
 ; X32-NEXT:    mull %ecx
 ; X32-NEXT:    movl %edx, %esi
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ebp # 4-byte Reload
+; X32-NEXT:    movl %eax, (%esp) # 4-byte Spill
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
 ; X32-NEXT:    movl %ebp, %eax
 ; X32-NEXT:    mull %ecx
 ; X32-NEXT:    movl %edx, %edi
@@ -552,11 +552,11 @@ define void @test_512(i512* %a, i512* %b
 ; X32-NEXT:    addl %esi, %ecx
 ; X32-NEXT:    adcl $0, %edi
 ; X32-NEXT:    movl %ebx, %eax
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ebx # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
 ; X32-NEXT:    mull %ebx
 ; X32-NEXT:    movl %edx, %esi
 ; X32-NEXT:    addl %ecx, %eax
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    adcl %edi, %esi
 ; X32-NEXT:    setb %cl
 ; X32-NEXT:    movl %ebp, %eax
@@ -564,21 +564,21 @@ define void @test_512(i512* %a, i512* %b
 ; X32-NEXT:    addl %esi, %eax
 ; X32-NEXT:    movzbl %cl, %ecx
 ; X32-NEXT:    adcl %ecx, %edx
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload
-; X32-NEXT:    addl {{[0-9]+}}(%esp), %ecx # 4-byte Folded Reload
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi # 4-byte Reload
-; X32-NEXT:    adcl {{[0-9]+}}(%esp), %esi # 4-byte Folded Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
 ; X32-NEXT:    addl %eax, %ecx
-; X32-NEXT:    movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    adcl %edx, %esi
-; X32-NEXT:    movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ebx # 4-byte Reload
+; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
 ; X32-NEXT:    movl %ebx, %eax
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
 ; X32-NEXT:    mull %esi
 ; X32-NEXT:    movl %edx, %ecx
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ebp # 4-byte Reload
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
 ; X32-NEXT:    movl %ebp, %eax
 ; X32-NEXT:    mull %esi
 ; X32-NEXT:    movl %edx, %esi
@@ -586,137 +586,137 @@ define void @test_512(i512* %a, i512* %b
 ; X32-NEXT:    addl %ecx, %edi
 ; X32-NEXT:    adcl $0, %esi
 ; X32-NEXT:    movl %ebx, %eax
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ebx # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
 ; X32-NEXT:    mull %ebx
 ; X32-NEXT:    movl %edx, %ecx
 ; X32-NEXT:    addl %edi, %eax
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    adcl %esi, %ecx
-; X32-NEXT:    setb {{[0-9]+}}(%esp) # 1-byte Folded Spill
+; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
 ; X32-NEXT:    movl %ebp, %eax
 ; X32-NEXT:    mull %ebx
 ; X32-NEXT:    movl %eax, %edi
 ; X32-NEXT:    addl %ecx, %edi
-; X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # 1-byte Folded Reload
+; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
 ; X32-NEXT:    adcl %eax, %edx
-; X32-NEXT:    addl {{[0-9]+}}(%esp), %edi # 4-byte Folded Reload
-; X32-NEXT:    adcl {{[0-9]+}}(%esp), %edx # 4-byte Folded Reload
-; X32-NEXT:    addl {{[0-9]+}}(%esp), %edi # 4-byte Folded Reload
-; X32-NEXT:    adcl {{[0-9]+}}(%esp), %edx # 4-byte Folded Reload
-; X32-NEXT:    movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    adcl $0, {{[0-9]+}}(%esp) # 4-byte Folded Spill
-; X32-NEXT:    adcl $0, {{[0-9]+}}(%esp) # 4-byte Folded Spill
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ebx # 4-byte Reload
+; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT:    addl (%esp), %edi # 4-byte Folded Reload
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
 ; X32-NEXT:    movl %ebx, %eax
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
 ; X32-NEXT:    mull %ecx
-; X32-NEXT:    movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %edx, (%esp) # 4-byte Spill
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    movl %ebp, %eax
 ; X32-NEXT:    mull %ecx
 ; X32-NEXT:    movl %edx, %esi
 ; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    addl {{[0-9]+}}(%esp), %ecx # 4-byte Folded Reload
+; X32-NEXT:    addl (%esp), %ecx # 4-byte Folded Reload
 ; X32-NEXT:    adcl $0, %esi
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    movl 28(%eax), %ebp
 ; X32-NEXT:    movl %ebx, %eax
 ; X32-NEXT:    mull %ebp
-; X32-NEXT:    movl %ebp, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %ebp, (%esp) # 4-byte Spill
 ; X32-NEXT:    movl %edx, %ebx
 ; X32-NEXT:    addl %ecx, %eax
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    adcl %esi, %ebx
 ; X32-NEXT:    setb %cl
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
 ; X32-NEXT:    mull %ebp
 ; X32-NEXT:    addl %ebx, %eax
 ; X32-NEXT:    movzbl %cl, %ecx
 ; X32-NEXT:    adcl %ecx, %edx
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ebp # 4-byte Reload
-; X32-NEXT:    addl {{[0-9]+}}(%esp), %ebp # 4-byte Folded Reload
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload
-; X32-NEXT:    adcl {{[0-9]+}}(%esp), %ecx # 4-byte Folded Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
 ; X32-NEXT:    addl %eax, %ebp
 ; X32-NEXT:    adcl %edx, %ecx
-; X32-NEXT:    addl %edi, {{[0-9]+}}(%esp) # 4-byte Folded Spill
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax # 4-byte Reload
-; X32-NEXT:    adcl %eax, {{[0-9]+}}(%esp) # 4-byte Folded Spill
+; X32-NEXT:    addl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT:    adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
 ; X32-NEXT:    adcl $0, %ebp
 ; X32-NEXT:    adcl $0, %ecx
-; X32-NEXT:    addl {{[0-9]+}}(%esp), %ebp # 4-byte Folded Reload
-; X32-NEXT:    adcl {{[0-9]+}}(%esp), %ecx # 4-byte Folded Reload
-; X32-NEXT:    setb {{[0-9]+}}(%esp) # 1-byte Folded Spill
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi # 4-byte Reload
+; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
 ; X32-NEXT:    movl %esi, %eax
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %edi # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
 ; X32-NEXT:    mull %edi
-; X32-NEXT:    movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax # 4-byte Reload
+; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
 ; X32-NEXT:    mull %edi
 ; X32-NEXT:    movl %edx, %edi
 ; X32-NEXT:    movl %eax, %ebx
-; X32-NEXT:    addl {{[0-9]+}}(%esp), %ebx # 4-byte Folded Reload
+; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
 ; X32-NEXT:    adcl $0, %edi
 ; X32-NEXT:    movl %esi, %eax
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi # 4-byte Reload
+; X32-NEXT:    movl (%esp), %esi # 4-byte Reload
 ; X32-NEXT:    mull %esi
 ; X32-NEXT:    addl %ebx, %eax
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    adcl %edi, %edx
 ; X32-NEXT:    movl %edx, %edi
 ; X32-NEXT:    setb %bl
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
 ; X32-NEXT:    mull %esi
 ; X32-NEXT:    addl %edi, %eax
 ; X32-NEXT:    movzbl %bl, %esi
 ; X32-NEXT:    adcl %esi, %edx
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %edi # 4-byte Reload
-; X32-NEXT:    addl {{[0-9]+}}(%esp), %edi # 4-byte Folded Reload
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ebx # 4-byte Reload
-; X32-NEXT:    adcl {{[0-9]+}}(%esp), %ebx # 4-byte Folded Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
 ; X32-NEXT:    addl %eax, %edi
 ; X32-NEXT:    adcl %edx, %ebx
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
 ; X32-NEXT:    addl %ebp, %edx
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
 ; X32-NEXT:    adcl %ecx, %esi
-; X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # 1-byte Folded Reload
+; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
 ; X32-NEXT:    adcl %eax, %edi
 ; X32-NEXT:    adcl $0, %ebx
-; X32-NEXT:    addl {{[0-9]+}}(%esp), %edx # 4-byte Folded Reload
-; X32-NEXT:    adcl {{[0-9]+}}(%esp), %esi # 4-byte Folded Reload
-; X32-NEXT:    adcl {{[0-9]+}}(%esp), %edi # 4-byte Folded Reload
-; X32-NEXT:    adcl {{[0-9]+}}(%esp), %ebx # 4-byte Folded Reload
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax # 4-byte Reload
-; X32-NEXT:    addl %eax, {{[0-9]+}}(%esp) # 4-byte Folded Spill
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax # 4-byte Reload
-; X32-NEXT:    adcl %eax, {{[0-9]+}}(%esp) # 4-byte Folded Spill
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax # 4-byte Reload
-; X32-NEXT:    adcl %eax, {{[0-9]+}}(%esp) # 4-byte Folded Spill
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax # 4-byte Reload
-; X32-NEXT:    adcl %eax, {{[0-9]+}}(%esp) # 4-byte Folded Spill
+; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT:    addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT:    adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT:    adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT:    adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
 ; X32-NEXT:    adcl $0, %edx
 ; X32-NEXT:    adcl $0, %esi
 ; X32-NEXT:    adcl $0, %edi
 ; X32-NEXT:    adcl $0, %ebx
-; X32-NEXT:    addl {{[0-9]+}}(%esp), %edx # 4-byte Folded Reload
-; X32-NEXT:    movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    adcl {{[0-9]+}}(%esp), %esi # 4-byte Folded Reload
-; X32-NEXT:    movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    adcl {{[0-9]+}}(%esp), %edi # 4-byte Folded Reload
-; X32-NEXT:    movl %edi, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    adcl {{[0-9]+}}(%esp), %ebx # 4-byte Folded Reload
-; X32-NEXT:    movl %ebx, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    setb {{[0-9]+}}(%esp) # 1-byte Folded Spill
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi # 4-byte Reload
+; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
 ; X32-NEXT:    movl %esi, %eax
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %edi # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
 ; X32-NEXT:    mull %edi
 ; X32-NEXT:    movl %edx, %ecx
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ebp # 4-byte Reload
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
 ; X32-NEXT:    movl %ebp, %eax
 ; X32-NEXT:    mull %edi
 ; X32-NEXT:    movl %edx, %ebx
@@ -724,11 +724,11 @@ define void @test_512(i512* %a, i512* %b
 ; X32-NEXT:    addl %ecx, %edi
 ; X32-NEXT:    adcl $0, %ebx
 ; X32-NEXT:    movl %esi, %eax
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
 ; X32-NEXT:    mull %esi
 ; X32-NEXT:    movl %edx, %ecx
 ; X32-NEXT:    addl %edi, %eax
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    adcl %ebx, %ecx
 ; X32-NEXT:    setb %bl
 ; X32-NEXT:    movl %ebp, %eax
@@ -736,21 +736,21 @@ define void @test_512(i512* %a, i512* %b
 ; X32-NEXT:    addl %ecx, %eax
 ; X32-NEXT:    movzbl %bl, %ecx
 ; X32-NEXT:    adcl %ecx, %edx
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi # 4-byte Reload
-; X32-NEXT:    addl {{[0-9]+}}(%esp), %esi # 4-byte Folded Reload
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload
-; X32-NEXT:    adcl {{[0-9]+}}(%esp), %ecx # 4-byte Folded Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
 ; X32-NEXT:    addl %eax, %esi
-; X32-NEXT:    movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    adcl %edx, %ecx
-; X32-NEXT:    movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload
+; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
 ; X32-NEXT:    movl %ecx, %eax
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %edi # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
 ; X32-NEXT:    mull %edi
 ; X32-NEXT:    movl %edx, %esi
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ebp # 4-byte Reload
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
 ; X32-NEXT:    movl %ebp, %eax
 ; X32-NEXT:    mull %edi
 ; X32-NEXT:    movl %edx, %edi
@@ -758,33 +758,33 @@ define void @test_512(i512* %a, i512* %b
 ; X32-NEXT:    addl %esi, %ebx
 ; X32-NEXT:    adcl $0, %edi
 ; X32-NEXT:    movl %ecx, %eax
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
 ; X32-NEXT:    mull %esi
 ; X32-NEXT:    movl %edx, %ecx
 ; X32-NEXT:    addl %ebx, %eax
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    adcl %edi, %ecx
-; X32-NEXT:    setb {{[0-9]+}}(%esp) # 1-byte Folded Spill
+; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
 ; X32-NEXT:    movl %ebp, %eax
 ; X32-NEXT:    movl %ebp, %edi
 ; X32-NEXT:    mull %esi
 ; X32-NEXT:    movl %eax, %ebx
 ; X32-NEXT:    addl %ecx, %ebx
-; X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # 1-byte Folded Reload
+; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
 ; X32-NEXT:    adcl %eax, %edx
-; X32-NEXT:    addl {{[0-9]+}}(%esp), %ebx # 4-byte Folded Reload
-; X32-NEXT:    adcl {{[0-9]+}}(%esp), %edx # 4-byte Folded Reload
-; X32-NEXT:    addl {{[0-9]+}}(%esp), %ebx # 4-byte Folded Reload
-; X32-NEXT:    adcl {{[0-9]+}}(%esp), %edx # 4-byte Folded Reload
-; X32-NEXT:    movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    adcl $0, {{[0-9]+}}(%esp) # 4-byte Folded Spill
-; X32-NEXT:    adcl $0, {{[0-9]+}}(%esp) # 4-byte Folded Spill
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ebp # 4-byte Reload
+; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
 ; X32-NEXT:    movl %ebp, %eax
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
 ; X32-NEXT:    mull %ecx
 ; X32-NEXT:    movl %edx, %esi
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    movl %edi, %eax
 ; X32-NEXT:    mull %ecx
 ; X32-NEXT:    movl %edx, %edi
@@ -792,101 +792,101 @@ define void @test_512(i512* %a, i512* %b
 ; X32-NEXT:    addl %esi, %ecx
 ; X32-NEXT:    adcl $0, %edi
 ; X32-NEXT:    movl %ebp, %eax
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi # 4-byte Reload
+; X32-NEXT:    movl (%esp), %esi # 4-byte Reload
 ; X32-NEXT:    mull %esi
 ; X32-NEXT:    movl %edx, %ebp
 ; X32-NEXT:    addl %ecx, %eax
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    adcl %edi, %ebp
 ; X32-NEXT:    setb %cl
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
 ; X32-NEXT:    mull %esi
 ; X32-NEXT:    addl %ebp, %eax
 ; X32-NEXT:    movzbl %cl, %ecx
 ; X32-NEXT:    adcl %ecx, %edx
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi # 4-byte Reload
-; X32-NEXT:    addl {{[0-9]+}}(%esp), %esi # 4-byte Folded Reload
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload
-; X32-NEXT:    adcl {{[0-9]+}}(%esp), %ecx # 4-byte Folded Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
 ; X32-NEXT:    addl %eax, %esi
 ; X32-NEXT:    adcl %edx, %ecx
-; X32-NEXT:    addl %ebx, {{[0-9]+}}(%esp) # 4-byte Folded Spill
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax # 4-byte Reload
-; X32-NEXT:    adcl %eax, {{[0-9]+}}(%esp) # 4-byte Folded Spill
+; X32-NEXT:    addl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT:    adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
 ; X32-NEXT:    adcl $0, %esi
 ; X32-NEXT:    adcl $0, %ecx
-; X32-NEXT:    addl {{[0-9]+}}(%esp), %esi # 4-byte Folded Reload
-; X32-NEXT:    movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    adcl {{[0-9]+}}(%esp), %ecx # 4-byte Folded Reload
-; X32-NEXT:    movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    setb {{[0-9]+}}(%esp) # 1-byte Folded Spill
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ebp # 4-byte Reload
+; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
 ; X32-NEXT:    movl %ebp, %eax
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
 ; X32-NEXT:    mull %esi
 ; X32-NEXT:    movl %edx, %ecx
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax # 4-byte Reload
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
 ; X32-NEXT:    mull %esi
 ; X32-NEXT:    movl %edx, %esi
 ; X32-NEXT:    movl %eax, %edi
 ; X32-NEXT:    addl %ecx, %edi
 ; X32-NEXT:    adcl $0, %esi
 ; X32-NEXT:    movl %ebp, %eax
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ebx # 4-byte Reload
+; X32-NEXT:    movl (%esp), %ebx # 4-byte Reload
 ; X32-NEXT:    mull %ebx
 ; X32-NEXT:    movl %edx, %ecx
 ; X32-NEXT:    movl %eax, %ebp
 ; X32-NEXT:    addl %edi, %ebp
 ; X32-NEXT:    adcl %esi, %ecx
-; X32-NEXT:    setb {{[0-9]+}}(%esp) # 1-byte Folded Spill
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax # 4-byte Reload
+; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
 ; X32-NEXT:    mull %ebx
 ; X32-NEXT:    addl %ecx, %eax
-; X32-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # 1-byte Folded Reload
+; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
 ; X32-NEXT:    adcl %ecx, %edx
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi # 4-byte Reload
-; X32-NEXT:    addl {{[0-9]+}}(%esp), %esi # 4-byte Folded Reload
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload
-; X32-NEXT:    adcl {{[0-9]+}}(%esp), %ecx # 4-byte Folded Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
 ; X32-NEXT:    addl %eax, %esi
 ; X32-NEXT:    adcl %edx, %ecx
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx # 4-byte Reload
-; X32-NEXT:    addl {{[0-9]+}}(%esp), %edx # 4-byte Folded Reload
-; X32-NEXT:    adcl {{[0-9]+}}(%esp), %ebp # 4-byte Folded Reload
-; X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # 1-byte Folded Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
 ; X32-NEXT:    adcl %eax, %esi
 ; X32-NEXT:    adcl $0, %ecx
-; X32-NEXT:    addl {{[0-9]+}}(%esp), %edx # 4-byte Folded Reload
-; X32-NEXT:    adcl {{[0-9]+}}(%esp), %ebp # 4-byte Folded Reload
-; X32-NEXT:    adcl {{[0-9]+}}(%esp), %esi # 4-byte Folded Reload
-; X32-NEXT:    adcl {{[0-9]+}}(%esp), %ecx # 4-byte Folded Reload
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax # 4-byte Reload
-; X32-NEXT:    addl %eax, {{[0-9]+}}(%esp) # 4-byte Folded Spill
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax # 4-byte Reload
-; X32-NEXT:    adcl %eax, {{[0-9]+}}(%esp) # 4-byte Folded Spill
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax # 4-byte Reload
-; X32-NEXT:    adcl %eax, {{[0-9]+}}(%esp) # 4-byte Folded Spill
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax # 4-byte Reload
-; X32-NEXT:    adcl %eax, {{[0-9]+}}(%esp) # 4-byte Folded Spill
-; X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # 1-byte Folded Reload
+; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT:    addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT:    adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT:    adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT:    adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
 ; X32-NEXT:    adcl %eax, %edx
-; X32-NEXT:    movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    adcl $0, %ebp
-; X32-NEXT:    movl %ebp, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    adcl $0, %esi
-; X32-NEXT:    movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    adcl $0, %ecx
-; X32-NEXT:    movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X32-NEXT:    movl 32(%ecx), %edi
-; X32-NEXT:    movl %edi, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload
+; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
 ; X32-NEXT:    movl %ecx, %eax
 ; X32-NEXT:    mull %edi
 ; X32-NEXT:    movl %edx, %esi
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    movl (%esp), %ebp # 4-byte Reload
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
 ; X32-NEXT:    movl %ebp, %eax
 ; X32-NEXT:    mull %edi
 ; X32-NEXT:    movl %edx, %edi
@@ -898,10 +898,10 @@ define void @test_512(i512* %a, i512* %b
 ; X32-NEXT:    movl %ecx, %eax
 ; X32-NEXT:    mull %esi
 ; X32-NEXT:    movl %esi, %ecx
-; X32-NEXT:    movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    movl %edx, %esi
 ; X32-NEXT:    addl %ebx, %eax
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    adcl %edi, %esi
 ; X32-NEXT:    setb %bl
 ; X32-NEXT:    movl %ebp, %eax
@@ -911,27 +911,27 @@ define void @test_512(i512* %a, i512* %b
 ; X32-NEXT:    addl %esi, %ebp
 ; X32-NEXT:    movzbl %bl, %eax
 ; X32-NEXT:    adcl %eax, %ecx
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ebx # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
 ; X32-NEXT:    movl %ebx, %eax
 ; X32-NEXT:    xorl %edx, %edx
 ; X32-NEXT:    mull %edx
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %edi # 4-byte Reload
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
 ; X32-NEXT:    addl %eax, %edi
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
 ; X32-NEXT:    adcl %edx, %esi
 ; X32-NEXT:    addl %ebp, %edi
-; X32-NEXT:    movl %edi, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    adcl %ecx, %esi
-; X32-NEXT:    movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload
+; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
 ; X32-NEXT:    movl %ecx, %eax
 ; X32-NEXT:    movl %ebx, %esi
 ; X32-NEXT:    mull %ebx
 ; X32-NEXT:    movl %edx, %edi
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ebx # 4-byte Reload
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
 ; X32-NEXT:    movl %ebx, %eax
 ; X32-NEXT:    mull %esi
 ; X32-NEXT:    movl %edx, %esi
@@ -939,102 +939,101 @@ define void @test_512(i512* %a, i512* %b
 ; X32-NEXT:    addl %edi, %ebp
 ; X32-NEXT:    adcl $0, %esi
 ; X32-NEXT:    movl %ecx, %eax
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %edi # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
 ; X32-NEXT:    mull %edi
 ; X32-NEXT:    movl %edx, %ecx
 ; X32-NEXT:    addl %ebp, %eax
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    adcl %esi, %ecx
-; X32-NEXT:    setb {{[0-9]+}}(%esp) # 1-byte Folded Spill
+; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
 ; X32-NEXT:    movl %ebx, %eax
-; X32-NEXT:    movl %ebx, %esi
 ; X32-NEXT:    mull %edi
 ; X32-NEXT:    addl %ecx, %eax
-; X32-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # 1-byte Folded Reload
+; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
 ; X32-NEXT:    adcl %ecx, %edx
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %edi # 4-byte Reload
-; X32-NEXT:    addl {{[0-9]+}}(%esp), %edi # 4-byte Folded Reload
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload
-; X32-NEXT:    adcl {{[0-9]+}}(%esp), %ecx # 4-byte Folded Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
 ; X32-NEXT:    addl %eax, %edi
 ; X32-NEXT:    adcl %edx, %ecx
-; X32-NEXT:    addl {{[0-9]+}}(%esp), %edi # 4-byte Folded Reload
-; X32-NEXT:    movl %edi, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    adcl {{[0-9]+}}(%esp), %ecx # 4-byte Folded Reload
-; X32-NEXT:    movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    adcl $0, {{[0-9]+}}(%esp) # 4-byte Folded Spill
-; X32-NEXT:    adcl $0, {{[0-9]+}}(%esp) # 4-byte Folded Spill
+; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    movl 40(%eax), %ebp
-; X32-NEXT:    movl %eax, %ebx
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %edi # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
 ; X32-NEXT:    movl %edi, %eax
 ; X32-NEXT:    mull %ebp
-; X32-NEXT:    movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    movl %esi, %eax
+; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    movl %ebx, %eax
 ; X32-NEXT:    mull %ebp
 ; X32-NEXT:    movl %edx, %esi
 ; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    addl {{[0-9]+}}(%esp), %ecx # 4-byte Folded Reload
+; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
 ; X32-NEXT:    adcl $0, %esi
-; X32-NEXT:    movl 44(%ebx), %ebx
-; X32-NEXT:    movl %ebx, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    movl 44(%eax), %ebx
+; X32-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    movl %edi, %eax
 ; X32-NEXT:    mull %ebx
 ; X32-NEXT:    movl %edx, %edi
 ; X32-NEXT:    addl %ecx, %eax
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    adcl %esi, %edi
-; X32-NEXT:    setb {{[0-9]+}}(%esp) # 1-byte Folded Spill
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax # 4-byte Reload
+; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
 ; X32-NEXT:    mull %ebx
 ; X32-NEXT:    movl %edx, %ecx
 ; X32-NEXT:    movl %eax, %esi
 ; X32-NEXT:    addl %edi, %esi
-; X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # 1-byte Folded Reload
+; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
 ; X32-NEXT:    adcl %eax, %ecx
 ; X32-NEXT:    movl %ebp, %eax
 ; X32-NEXT:    xorl %edx, %edx
 ; X32-NEXT:    mull %edx
-; X32-NEXT:    movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %edi # 4-byte Reload
+; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
 ; X32-NEXT:    addl %eax, %edi
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
 ; X32-NEXT:    adcl %edx, %eax
 ; X32-NEXT:    addl %esi, %edi
 ; X32-NEXT:    adcl %ecx, %eax
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload
-; X32-NEXT:    addl %ecx, {{[0-9]+}}(%esp) # 4-byte Folded Spill
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload
-; X32-NEXT:    adcl %ecx, {{[0-9]+}}(%esp) # 4-byte Folded Spill
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT:    addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT:    adcl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
 ; X32-NEXT:    adcl $0, %edi
 ; X32-NEXT:    adcl $0, %eax
-; X32-NEXT:    addl {{[0-9]+}}(%esp), %edi # 4-byte Folded Reload
-; X32-NEXT:    movl %edi, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    adcl {{[0-9]+}}(%esp), %eax # 4-byte Folded Reload
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    setb {{[0-9]+}}(%esp) # 1-byte Folded Spill
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload
+; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
 ; X32-NEXT:    movl %ecx, %eax
-; X32-NEXT:    movl %ebp, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    mull %ebp
-; X32-NEXT:    movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    movl (%esp), %edi # 4-byte Reload
+; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
 ; X32-NEXT:    movl %edi, %eax
 ; X32-NEXT:    mull %ebp
 ; X32-NEXT:    movl %edx, %esi
 ; X32-NEXT:    movl %eax, %ebx
-; X32-NEXT:    addl {{[0-9]+}}(%esp), %ebx # 4-byte Folded Reload
+; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
 ; X32-NEXT:    adcl $0, %esi
 ; X32-NEXT:    movl %ecx, %eax
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ebp # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
 ; X32-NEXT:    mull %ebp
 ; X32-NEXT:    movl %edx, %ecx
 ; X32-NEXT:    addl %ebx, %eax
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    adcl %esi, %ecx
 ; X32-NEXT:    setb %bl
 ; X32-NEXT:    movl %edi, %eax
@@ -1042,104 +1041,103 @@ define void @test_512(i512* %a, i512* %b
 ; X32-NEXT:    addl %ecx, %eax
 ; X32-NEXT:    movzbl %bl, %ecx
 ; X32-NEXT:    adcl %ecx, %edx
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi # 4-byte Reload
-; X32-NEXT:    addl {{[0-9]+}}(%esp), %esi # 4-byte Folded Reload
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload
-; X32-NEXT:    adcl {{[0-9]+}}(%esp), %ecx # 4-byte Folded Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
 ; X32-NEXT:    addl %eax, %esi
 ; X32-NEXT:    adcl %edx, %ecx
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax # 4-byte Reload
-; X32-NEXT:    addl %eax, {{[0-9]+}}(%esp) # 4-byte Folded Spill
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax # 4-byte Reload
-; X32-NEXT:    adcl %eax, {{[0-9]+}}(%esp) # 4-byte Folded Spill
-; X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # 1-byte Folded Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT:    addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT:    adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
 ; X32-NEXT:    adcl %eax, %esi
-; X32-NEXT:    movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    adcl $0, %ecx
-; X32-NEXT:    movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax # 4-byte Reload
+; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
 ; X32-NEXT:    imull %eax, %ebp
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
 ; X32-NEXT:    mull %esi
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    addl %ebp, %edx
-; X32-NEXT:    imull {{[0-9]+}}(%esp), %esi # 4-byte Folded Reload
+; X32-NEXT:    imull {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
 ; X32-NEXT:    addl %edx, %esi
 ; X32-NEXT:    movl %esi, %ebp
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
 ; X32-NEXT:    movl %eax, %esi
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ebx # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
 ; X32-NEXT:    imull %ebx, %esi
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %edi # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
 ; X32-NEXT:    mull %edi
 ; X32-NEXT:    movl %eax, %ecx
 ; X32-NEXT:    addl %esi, %edx
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi # 4-byte Reload
+; X32-NEXT:    movl (%esp), %esi # 4-byte Reload
 ; X32-NEXT:    movl %edi, %eax
 ; X32-NEXT:    imull %edi, %esi
 ; X32-NEXT:    addl %edx, %esi
-; X32-NEXT:    addl {{[0-9]+}}(%esp), %ecx # 4-byte Folded Reload
-; X32-NEXT:    movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT:    movl %ecx, %edi
 ; X32-NEXT:    adcl %ebp, %esi
-; X32-NEXT:    movl %esi, %edi
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload
+; X32-NEXT:    movl %esi, (%esp) # 4-byte Spill
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
 ; X32-NEXT:    mull %ecx
 ; X32-NEXT:    movl %edx, %ebp
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    movl %ebx, %eax
 ; X32-NEXT:    mull %ecx
 ; X32-NEXT:    movl %edx, %esi
 ; X32-NEXT:    movl %eax, %ebx
 ; X32-NEXT:    addl %ebp, %ebx
 ; X32-NEXT:    adcl $0, %esi
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax # 4-byte Reload
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ebp # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
 ; X32-NEXT:    mull %ebp
 ; X32-NEXT:    movl %edx, %ecx
 ; X32-NEXT:    addl %ebx, %eax
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    adcl %esi, %ecx
 ; X32-NEXT:    setb %bl
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
 ; X32-NEXT:    mull %ebp
 ; X32-NEXT:    addl %ecx, %eax
 ; X32-NEXT:    movzbl %bl, %ecx
 ; X32-NEXT:    adcl %ecx, %edx
-; X32-NEXT:    addl {{[0-9]+}}(%esp), %eax # 4-byte Folded Reload
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    adcl %edi, %edx
-; X32-NEXT:    movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X32-NEXT:    movl 60(%edx), %ecx
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax # 4-byte Reload
+; X32-NEXT:    addl %edi, %eax
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    adcl (%esp), %edx # 4-byte Folded Reload
+; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X32-NEXT:    movl 60(%edi), %ecx
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
 ; X32-NEXT:    imull %eax, %ecx
-; X32-NEXT:    movl 56(%edx), %esi
-; X32-NEXT:    movl %edx, %edi
+; X32-NEXT:    movl 56(%edi), %esi
 ; X32-NEXT:    mull %esi
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    addl %ecx, %edx
-; X32-NEXT:    imull {{[0-9]+}}(%esp), %esi # 4-byte Folded Reload
+; X32-NEXT:    imull {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
 ; X32-NEXT:    addl %edx, %esi
 ; X32-NEXT:    movl 48(%edi), %ebx
 ; X32-NEXT:    movl 52(%edi), %ebp
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
 ; X32-NEXT:    movl %eax, %edi
 ; X32-NEXT:    imull %ebp, %edi
-; X32-NEXT:    movl %ebp, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    mull %ebx
 ; X32-NEXT:    addl %edi, %edx
-; X32-NEXT:    movl (%esp), %ecx # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
 ; X32-NEXT:    imull %ebx, %ecx
 ; X32-NEXT:    addl %edx, %ecx
-; X32-NEXT:    addl {{[0-9]+}}(%esp), %eax # 4-byte Folded Reload
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    adcl %esi, %ecx
-; X32-NEXT:    movl %ecx, (%esp) # 4-byte Spill
+; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    movl %ebx, %eax
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
 ; X32-NEXT:    mull %ecx
 ; X32-NEXT:    movl %edx, %esi
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    movl %ebp, %eax
 ; X32-NEXT:    mull %ecx
 ; X32-NEXT:    movl %edx, %ebp
@@ -1147,98 +1145,98 @@ define void @test_512(i512* %a, i512* %b
 ; X32-NEXT:    addl %esi, %edi
 ; X32-NEXT:    adcl $0, %ebp
 ; X32-NEXT:    movl %ebx, %eax
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
 ; X32-NEXT:    mull %esi
 ; X32-NEXT:    movl %edx, %ecx
 ; X32-NEXT:    movl %eax, %ebx
 ; X32-NEXT:    addl %edi, %ebx
 ; X32-NEXT:    adcl %ebp, %ecx
-; X32-NEXT:    setb {{[0-9]+}}(%esp) # 1-byte Folded Spill
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax # 4-byte Reload
+; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
 ; X32-NEXT:    mull %esi
 ; X32-NEXT:    addl %ecx, %eax
-; X32-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # 1-byte Folded Reload
+; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
 ; X32-NEXT:    adcl %ecx, %edx
-; X32-NEXT:    addl {{[0-9]+}}(%esp), %eax # 4-byte Folded Reload
-; X32-NEXT:    adcl (%esp), %edx # 4-byte Folded Reload
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload
-; X32-NEXT:    addl {{[0-9]+}}(%esp), %ecx # 4-byte Folded Reload
-; X32-NEXT:    adcl {{[0-9]+}}(%esp), %ebx # 4-byte Folded Reload
-; X32-NEXT:    adcl {{[0-9]+}}(%esp), %eax # 4-byte Folded Reload
-; X32-NEXT:    adcl {{[0-9]+}}(%esp), %edx # 4-byte Folded Reload
-; X32-NEXT:    addl {{[0-9]+}}(%esp), %ecx # 4-byte Folded Reload
-; X32-NEXT:    movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    adcl {{[0-9]+}}(%esp), %ebx # 4-byte Folded Reload
-; X32-NEXT:    movl %ebx, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    adcl {{[0-9]+}}(%esp), %eax # 4-byte Folded Reload
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    adcl {{[0-9]+}}(%esp), %edx # 4-byte Folded Reload
-; X32-NEXT:    movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X32-NEXT:    movl %esi, %ecx
 ; X32-NEXT:    movl 40(%esi), %ebx
 ; X32-NEXT:    movl %ebx, %eax
-; X32-NEXT:    movl %ebx, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi # 4-byte Reload
+; X32-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
 ; X32-NEXT:    mull %esi
-; X32-NEXT:    movl %eax, (%esp) # 4-byte Spill
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    movl %edx, %ebp
 ; X32-NEXT:    movl 44(%ecx), %ecx
 ; X32-NEXT:    movl %ecx, %eax
-; X32-NEXT:    movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    mull %esi
 ; X32-NEXT:    movl %edx, %esi
 ; X32-NEXT:    movl %eax, %edi
 ; X32-NEXT:    addl %ebp, %edi
 ; X32-NEXT:    adcl $0, %esi
 ; X32-NEXT:    movl %ebx, %eax
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ebp # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
 ; X32-NEXT:    mull %ebp
 ; X32-NEXT:    movl %edx, %ebx
 ; X32-NEXT:    addl %edi, %eax
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    adcl %esi, %ebx
-; X32-NEXT:    setb {{[0-9]+}}(%esp) # 1-byte Folded Spill
+; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
 ; X32-NEXT:    movl %ecx, %eax
 ; X32-NEXT:    mull %ebp
 ; X32-NEXT:    movl %edx, %esi
 ; X32-NEXT:    movl %eax, %edi
 ; X32-NEXT:    addl %ebx, %edi
-; X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # 1-byte Folded Reload
+; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
 ; X32-NEXT:    adcl %eax, %esi
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
 ; X32-NEXT:    xorl %ecx, %ecx
 ; X32-NEXT:    mull %ecx
-; X32-NEXT:    movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    addl {{[0-9]+}}(%esp), %eax # 4-byte Folded Reload
-; X32-NEXT:    adcl {{[0-9]+}}(%esp), %edx # 4-byte Folded Reload
+; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
 ; X32-NEXT:    addl %edi, %eax
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    adcl %esi, %edx
-; X32-NEXT:    movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %edx, (%esp) # 4-byte Spill
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X32-NEXT:    movl 32(%esi), %edi
 ; X32-NEXT:    movl %edi, %eax
-; X32-NEXT:    movl %edi, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload
+; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
 ; X32-NEXT:    mull %ecx
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    movl 36(%esi), %esi
 ; X32-NEXT:    movl %esi, %eax
-; X32-NEXT:    movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    mull %ecx
 ; X32-NEXT:    movl %edx, %ebx
 ; X32-NEXT:    movl %eax, %ebp
-; X32-NEXT:    addl {{[0-9]+}}(%esp), %ebp # 4-byte Folded Reload
+; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
 ; X32-NEXT:    adcl $0, %ebx
 ; X32-NEXT:    movl %edi, %eax
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
 ; X32-NEXT:    mull %ecx
 ; X32-NEXT:    movl %edx, %edi
 ; X32-NEXT:    addl %ebp, %eax
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    adcl %ebx, %edi
 ; X32-NEXT:    setb %bl
 ; X32-NEXT:    movl %esi, %eax
@@ -1248,30 +1246,30 @@ define void @test_512(i512* %a, i512* %b
 ; X32-NEXT:    addl %edi, %esi
 ; X32-NEXT:    movzbl %bl, %eax
 ; X32-NEXT:    adcl %eax, %ebp
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %edi # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
 ; X32-NEXT:    movl %edi, %eax
 ; X32-NEXT:    xorl %ecx, %ecx
 ; X32-NEXT:    mull %ecx
-; X32-NEXT:    movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload
+; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
 ; X32-NEXT:    addl %eax, %ecx
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
 ; X32-NEXT:    adcl %edx, %eax
 ; X32-NEXT:    addl %esi, %ecx
 ; X32-NEXT:    adcl %ebp, %eax
-; X32-NEXT:    addl (%esp), %ecx # 4-byte Folded Reload
-; X32-NEXT:    movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    adcl {{[0-9]+}}(%esp), %eax # 4-byte Folded Reload
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    adcl $0, {{[0-9]+}}(%esp) # 4-byte Folded Spill
-; X32-NEXT:    adcl $0, {{[0-9]+}}(%esp) # 4-byte Folded Spill
+; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT:    adcl $0, (%esp) # 4-byte Folded Spill
 ; X32-NEXT:    movl %edi, %eax
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
 ; X32-NEXT:    mull %esi
 ; X32-NEXT:    movl %edx, %ecx
-; X32-NEXT:    movl %eax, (%esp) # 4-byte Spill
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ebx # 4-byte Reload
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
 ; X32-NEXT:    movl %ebx, %eax
 ; X32-NEXT:    mull %esi
 ; X32-NEXT:    movl %edx, %esi
@@ -1279,48 +1277,48 @@ define void @test_512(i512* %a, i512* %b
 ; X32-NEXT:    addl %ecx, %ebp
 ; X32-NEXT:    adcl $0, %esi
 ; X32-NEXT:    movl %edi, %eax
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
 ; X32-NEXT:    mull %ecx
 ; X32-NEXT:    movl %edx, %edi
 ; X32-NEXT:    addl %ebp, %eax
 ; X32-NEXT:    movl %eax, %ebp
 ; X32-NEXT:    adcl %esi, %edi
-; X32-NEXT:    setb {{[0-9]+}}(%esp) # 1-byte Folded Spill
+; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
 ; X32-NEXT:    movl %ebx, %eax
 ; X32-NEXT:    mull %ecx
 ; X32-NEXT:    movl %ecx, %ebx
 ; X32-NEXT:    addl %edi, %eax
-; X32-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # 1-byte Folded Reload
+; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
 ; X32-NEXT:    adcl %ecx, %edx
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi # 4-byte Reload
-; X32-NEXT:    addl {{[0-9]+}}(%esp), %esi # 4-byte Folded Reload
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload
-; X32-NEXT:    adcl {{[0-9]+}}(%esp), %ecx # 4-byte Folded Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
 ; X32-NEXT:    addl %eax, %esi
 ; X32-NEXT:    adcl %edx, %ecx
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax # 4-byte Reload
-; X32-NEXT:    addl %eax, (%esp) # 4-byte Folded Spill
-; X32-NEXT:    adcl {{[0-9]+}}(%esp), %ebp # 4-byte Folded Reload
-; X32-NEXT:    movl %ebp, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT:    addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    adcl $0, %esi
 ; X32-NEXT:    adcl $0, %ecx
-; X32-NEXT:    addl {{[0-9]+}}(%esp), %esi # 4-byte Folded Reload
-; X32-NEXT:    movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    adcl {{[0-9]+}}(%esp), %ecx # 4-byte Folded Reload
-; X32-NEXT:    movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    setb {{[0-9]+}}(%esp) # 1-byte Folded Spill
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload
-; X32-NEXT:    movl %ecx, %eax
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi # 4-byte Reload
-; X32-NEXT:    mull %esi
-; X32-NEXT:    movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %edi # 4-byte Reload
+; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    adcl (%esp), %ecx # 4-byte Folded Reload
+; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT:    movl %ecx, %eax
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT:    mull %esi
+; X32-NEXT:    movl %edx, (%esp) # 4-byte Spill
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
 ; X32-NEXT:    movl %edi, %eax
 ; X32-NEXT:    mull %esi
 ; X32-NEXT:    movl %edx, %esi
 ; X32-NEXT:    movl %eax, %ebp
-; X32-NEXT:    addl {{[0-9]+}}(%esp), %ebp # 4-byte Folded Reload
+; X32-NEXT:    addl (%esp), %ebp # 4-byte Folded Reload
 ; X32-NEXT:    adcl $0, %esi
 ; X32-NEXT:    movl %ecx, %eax
 ; X32-NEXT:    mull %ebx
@@ -1328,193 +1326,193 @@ define void @test_512(i512* %a, i512* %b
 ; X32-NEXT:    addl %ebp, %eax
 ; X32-NEXT:    movl %eax, %ebp
 ; X32-NEXT:    adcl %esi, %ecx
-; X32-NEXT:    setb {{[0-9]+}}(%esp) # 1-byte Folded Spill
+; X32-NEXT:    setb (%esp) # 1-byte Folded Spill
 ; X32-NEXT:    movl %edi, %eax
 ; X32-NEXT:    mull %ebx
 ; X32-NEXT:    addl %ecx, %eax
-; X32-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # 1-byte Folded Reload
+; X32-NEXT:    movzbl (%esp), %ecx # 1-byte Folded Reload
 ; X32-NEXT:    adcl %ecx, %edx
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %edi # 4-byte Reload
-; X32-NEXT:    addl {{[0-9]+}}(%esp), %edi # 4-byte Folded Reload
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload
-; X32-NEXT:    adcl {{[0-9]+}}(%esp), %ecx # 4-byte Folded Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
 ; X32-NEXT:    addl %eax, %edi
 ; X32-NEXT:    adcl %edx, %ecx
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax # 4-byte Reload
-; X32-NEXT:    addl %eax, {{[0-9]+}}(%esp) # 4-byte Folded Spill
-; X32-NEXT:    adcl {{[0-9]+}}(%esp), %ebp # 4-byte Folded Reload
-; X32-NEXT:    movl %ebp, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # 1-byte Folded Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT:    addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
 ; X32-NEXT:    adcl %eax, %edi
-; X32-NEXT:    movl %edi, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    adcl $0, %ecx
-; X32-NEXT:    movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X32-NEXT:    movl 48(%ecx), %ebp
 ; X32-NEXT:    imull %ebp, %ebx
 ; X32-NEXT:    movl %ebp, %eax
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %edi # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
 ; X32-NEXT:    mull %edi
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    addl %ebx, %edx
 ; X32-NEXT:    movl 52(%ecx), %eax
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    imull %eax, %edi
 ; X32-NEXT:    addl %edx, %edi
-; X32-NEXT:    movl %edi, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    movl 56(%ecx), %eax
 ; X32-NEXT:    movl %ecx, %ebx
 ; X32-NEXT:    movl %eax, %edi
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
 ; X32-NEXT:    imull %esi, %edi
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
 ; X32-NEXT:    mull %ecx
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    addl %edi, %edx
 ; X32-NEXT:    movl 60(%ebx), %ebx
 ; X32-NEXT:    movl %ecx, %eax
 ; X32-NEXT:    imull %ecx, %ebx
 ; X32-NEXT:    addl %edx, %ebx
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload
-; X32-NEXT:    addl %ecx, {{[0-9]+}}(%esp) # 4-byte Folded Spill
-; X32-NEXT:    adcl {{[0-9]+}}(%esp), %ebx # 4-byte Folded Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT:    addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
 ; X32-NEXT:    mull %ebp
-; X32-NEXT:    movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    movl %esi, %eax
 ; X32-NEXT:    mull %ebp
 ; X32-NEXT:    movl %edx, %ecx
 ; X32-NEXT:    movl %eax, %edi
-; X32-NEXT:    addl {{[0-9]+}}(%esp), %edi # 4-byte Folded Reload
+; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
 ; X32-NEXT:    adcl $0, %ecx
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax # 4-byte Reload
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ebp # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
 ; X32-NEXT:    mull %ebp
 ; X32-NEXT:    movl %edx, %esi
 ; X32-NEXT:    addl %edi, %eax
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    adcl %ecx, %esi
 ; X32-NEXT:    setb %cl
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
 ; X32-NEXT:    mull %ebp
 ; X32-NEXT:    addl %esi, %eax
 ; X32-NEXT:    movzbl %cl, %ecx
 ; X32-NEXT:    adcl %ecx, %edx
-; X32-NEXT:    addl {{[0-9]+}}(%esp), %eax # 4-byte Folded Reload
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    adcl %ebx, %edx
-; X32-NEXT:    movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %edi # 4-byte Reload
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ebp # 4-byte Reload
+; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
 ; X32-NEXT:    imull %ebp, %edi
 ; X32-NEXT:    movl %ebp, %eax
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
 ; X32-NEXT:    mull %ecx
 ; X32-NEXT:    movl %eax, %esi
 ; X32-NEXT:    addl %edi, %edx
-; X32-NEXT:    imull {{[0-9]+}}(%esp), %ecx # 4-byte Folded Reload
+; X32-NEXT:    imull {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
 ; X32-NEXT:    addl %edx, %ecx
-; X32-NEXT:    movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax # 4-byte Reload
+; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
 ; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ebx # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
 ; X32-NEXT:    imull %ebx, %ecx
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %edi # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
 ; X32-NEXT:    mull %edi
 ; X32-NEXT:    addl %ecx, %edx
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
 ; X32-NEXT:    imull %edi, %ecx
 ; X32-NEXT:    addl %edx, %ecx
 ; X32-NEXT:    addl %esi, %eax
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    adcl {{[0-9]+}}(%esp), %ecx # 4-byte Folded Reload
-; X32-NEXT:    movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    movl %edi, %eax
 ; X32-NEXT:    mull %ebp
 ; X32-NEXT:    movl %edx, %esi
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X32-NEXT:    movl %ebx, %eax
 ; X32-NEXT:    mull %ebp
 ; X32-NEXT:    movl %edx, %edi
 ; X32-NEXT:    movl %eax, %ecx
 ; X32-NEXT:    addl %esi, %ecx
 ; X32-NEXT:    adcl $0, %edi
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax # 4-byte Reload
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ebx # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
 ; X32-NEXT:    mull %ebx
 ; X32-NEXT:    movl %edx, %esi
 ; X32-NEXT:    movl %eax, %ebp
 ; X32-NEXT:    addl %ecx, %ebp
 ; X32-NEXT:    adcl %edi, %esi
 ; X32-NEXT:    setb %cl
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
 ; X32-NEXT:    mull %ebx
 ; X32-NEXT:    movl %edx, %ebx
 ; X32-NEXT:    addl %esi, %eax
 ; X32-NEXT:    movzbl %cl, %ecx
 ; X32-NEXT:    adcl %ecx, %ebx
-; X32-NEXT:    addl {{[0-9]+}}(%esp), %eax # 4-byte Folded Reload
-; X32-NEXT:    adcl {{[0-9]+}}(%esp), %ebx # 4-byte Folded Reload
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload
-; X32-NEXT:    addl {{[0-9]+}}(%esp), %ecx # 4-byte Folded Reload
-; X32-NEXT:    adcl {{[0-9]+}}(%esp), %ebp # 4-byte Folded Reload
-; X32-NEXT:    adcl {{[0-9]+}}(%esp), %eax # 4-byte Folded Reload
-; X32-NEXT:    adcl {{[0-9]+}}(%esp), %ebx # 4-byte Folded Reload
-; X32-NEXT:    addl {{[0-9]+}}(%esp), %ecx # 4-byte Folded Reload
-; X32-NEXT:    movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    adcl {{[0-9]+}}(%esp), %ebp # 4-byte Folded Reload
-; X32-NEXT:    movl %ebp, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    adcl {{[0-9]+}}(%esp), %eax # 4-byte Folded Reload
-; X32-NEXT:    adcl {{[0-9]+}}(%esp), %ebx # 4-byte Folded Reload
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload
-; X32-NEXT:    addl {{[0-9]+}}(%esp), %ecx # 4-byte Folded Reload
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %edi # 4-byte Reload
-; X32-NEXT:    adcl {{[0-9]+}}(%esp), %edi # 4-byte Folded Reload
-; X32-NEXT:    movl (%esp), %edx # 4-byte Reload
-; X32-NEXT:    adcl {{[0-9]+}}(%esp), %edx # 4-byte Folded Reload
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi # 4-byte Reload
-; X32-NEXT:    adcl {{[0-9]+}}(%esp), %esi # 4-byte Folded Reload
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ebp # 4-byte Reload
-; X32-NEXT:    adcl {{[0-9]+}}(%esp), %ebp # 4-byte Folded Reload
-; X32-NEXT:    movl %ebp, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ebp # 4-byte Reload
-; X32-NEXT:    adcl {{[0-9]+}}(%esp), %ebp # 4-byte Folded Reload
-; X32-NEXT:    adcl {{[0-9]+}}(%esp), %eax # 4-byte Folded Reload
-; X32-NEXT:    adcl {{[0-9]+}}(%esp), %ebx # 4-byte Folded Reload
-; X32-NEXT:    addl {{[0-9]+}}(%esp), %ecx # 4-byte Folded Reload
-; X32-NEXT:    movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    adcl {{[0-9]+}}(%esp), %edi # 4-byte Folded Reload
-; X32-NEXT:    movl %edi, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT:    adcl {{[0-9]+}}(%esp), %edx # 4-byte Folded Reload
-; X32-NEXT:    movl %edx, (%esp) # 4-byte Spill
-; X32-NEXT:    adcl {{[0-9]+}}(%esp), %esi # 4-byte Folded Reload
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx # 4-byte Reload
-; X32-NEXT:    adcl {{[0-9]+}}(%esp), %edx # 4-byte Folded Reload
-; X32-NEXT:    adcl {{[0-9]+}}(%esp), %ebp # 4-byte Folded Reload
-; X32-NEXT:    adcl {{[0-9]+}}(%esp), %eax # 4-byte Folded Reload
-; X32-NEXT:    adcl {{[0-9]+}}(%esp), %ebx # 4-byte Folded Reload
+; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %edi # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
 ; X32-NEXT:    movl %edi, (%ecx)
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %edi # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
 ; X32-NEXT:    movl %edi, 4(%ecx)
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %edi # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
 ; X32-NEXT:    movl %edi, 8(%ecx)
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %edi # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
 ; X32-NEXT:    movl %edi, 12(%ecx)
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %edi # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
 ; X32-NEXT:    movl %edi, 16(%ecx)
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %edi # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
 ; X32-NEXT:    movl %edi, 20(%ecx)
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %edi # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
 ; X32-NEXT:    movl %edi, 24(%ecx)
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %edi # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
 ; X32-NEXT:    movl %edi, 28(%ecx)
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %edi # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
 ; X32-NEXT:    movl %edi, 32(%ecx)
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %edi # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
 ; X32-NEXT:    movl %edi, 36(%ecx)
-; X32-NEXT:    movl (%esp), %edi # 4-byte Reload
+; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
 ; X32-NEXT:    movl %edi, 40(%ecx)
 ; X32-NEXT:    movl %esi, 44(%ecx)
 ; X32-NEXT:    movl %edx, 48(%ecx)
@@ -1540,7 +1538,7 @@ define void @test_512(i512* %a, i512* %b
 ; X64-NEXT:    movq %rdx, (%rsp) # 8-byte Spill
 ; X64-NEXT:    movq 24(%rdi), %r11
 ; X64-NEXT:    movq 16(%rdi), %r15
-; X64-NEXT:    movq %rsi, -{{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; X64-NEXT:    movq (%rsi), %rdx
 ; X64-NEXT:    movq 8(%rsi), %rbp
 ; X64-NEXT:    movq %r15, %rax
@@ -1549,7 +1547,7 @@ define void @test_512(i512* %a, i512* %b
 ; X64-NEXT:    movq %rdx, %r9
 ; X64-NEXT:    movq %rax, %r8
 ; X64-NEXT:    movq %r11, %rax
-; X64-NEXT:    movq %r11, -{{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; X64-NEXT:    mulq %rsi
 ; X64-NEXT:    movq %rsi, %r10
 ; X64-NEXT:    movq %rdx, %rbx
@@ -1557,7 +1555,7 @@ define void @test_512(i512* %a, i512* %b
 ; X64-NEXT:    addq %r9, %rsi
 ; X64-NEXT:    adcq $0, %rbx
 ; X64-NEXT:    movq %r15, %rax
-; X64-NEXT:    movq %r15, -{{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; X64-NEXT:    mulq %rbp
 ; X64-NEXT:    movq %rdx, %rcx
 ; X64-NEXT:    movq %rax, %r9
@@ -1568,37 +1566,37 @@ define void @test_512(i512* %a, i512* %b
 ; X64-NEXT:    movq %r11, %rax
 ; X64-NEXT:    mulq %rbp
 ; X64-NEXT:    movq %rbp, %r14
-; X64-NEXT:    movq %rbp, -{{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; X64-NEXT:    movq %rdx, %rsi
 ; X64-NEXT:    movq %rax, %rbp
 ; X64-NEXT:    addq %rcx, %rbp
 ; X64-NEXT:    adcq %rbx, %rsi
 ; X64-NEXT:    xorl %ecx, %ecx
 ; X64-NEXT:    movq %r10, %rbx
-; X64-NEXT:    movq %r10, -{{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; X64-NEXT:    movq %r10, %rax
 ; X64-NEXT:    mulq %rcx
 ; X64-NEXT:    movq %rdx, %r13
 ; X64-NEXT:    movq %rax, %r10
 ; X64-NEXT:    movq %r15, %rax
 ; X64-NEXT:    mulq %rcx
-; X64-NEXT:    movq %rdx, -{{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; X64-NEXT:    movq %rax, %r15
-; X64-NEXT:    movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; X64-NEXT:    addq %r10, %r15
 ; X64-NEXT:    adcq %r13, %rdx
 ; X64-NEXT:    addq %rbp, %r15
 ; X64-NEXT:    adcq %rsi, %rdx
 ; X64-NEXT:    movq %rdx, %r12
-; X64-NEXT:    movq %rdi, -{{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; X64-NEXT:    movq (%rdi), %rcx
 ; X64-NEXT:    movq %rcx, %rax
 ; X64-NEXT:    mulq %rbx
-; X64-NEXT:    movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; X64-NEXT:    movq %rdx, %r11
 ; X64-NEXT:    movq 8(%rdi), %rdi
 ; X64-NEXT:    movq %rdi, %rax
-; X64-NEXT:    movq %rdi, -{{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; X64-NEXT:    mulq %rbx
 ; X64-NEXT:    movq %rdx, %rbp
 ; X64-NEXT:    movq %rax, %rsi
@@ -1608,7 +1606,7 @@ define void @test_512(i512* %a, i512* %b
 ; X64-NEXT:    mulq %r14
 ; X64-NEXT:    movq %rdx, %rbx
 ; X64-NEXT:    addq %rsi, %rax
-; X64-NEXT:    movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; X64-NEXT:    adcq %rbp, %rbx
 ; X64-NEXT:    setb %r11b
 ; X64-NEXT:    movq %rdi, %rax
@@ -1631,16 +1629,16 @@ define void @test_512(i512* %a, i512* %b
 ; X64-NEXT:    adcq %r9, %r13
 ; X64-NEXT:    adcq $0, %r15
 ; X64-NEXT:    adcq $0, %r12
-; X64-NEXT:    movq %r12, -{{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rsi # 8-byte Reload
+; X64-NEXT:    movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
 ; X64-NEXT:    movq 16(%rsi), %r8
 ; X64-NEXT:    movq %rcx, %rax
 ; X64-NEXT:    movq %rcx, %r9
-; X64-NEXT:    movq %rcx, -{{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; X64-NEXT:    mulq %r8
 ; X64-NEXT:    movq %rdx, %rdi
 ; X64-NEXT:    movq %rax, %r12
-; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rcx # 8-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
 ; X64-NEXT:    movq %rcx, %rax
 ; X64-NEXT:    mulq %r8
 ; X64-NEXT:    movq %rdx, %rbp
@@ -1652,7 +1650,7 @@ define void @test_512(i512* %a, i512* %b
 ; X64-NEXT:    mulq %rdi
 ; X64-NEXT:    movq %rdx, %rsi
 ; X64-NEXT:    addq %rbx, %rax
-; X64-NEXT:    movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; X64-NEXT:    adcq %rbp, %rsi
 ; X64-NEXT:    setb %bpl
 ; X64-NEXT:    movq %rcx, %rax
@@ -1665,31 +1663,31 @@ define void @test_512(i512* %a, i512* %b
 ; X64-NEXT:    movq %r8, %rax
 ; X64-NEXT:    xorl %ecx, %ecx
 ; X64-NEXT:    mulq %rcx
-; X64-NEXT:    movq %rdx, -{{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; X64-NEXT:    movq %rax, %rbp
 ; X64-NEXT:    addq %rax, %r11
 ; X64-NEXT:    adcq %rdx, %r14
 ; X64-NEXT:    addq %r9, %r11
 ; X64-NEXT:    adcq %rbx, %r14
 ; X64-NEXT:    addq %r10, %r12
-; X64-NEXT:    movq %r12, -{{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    adcq %r13, -{{[0-9]+}}(%rsp) # 8-byte Folded Spill
+; X64-NEXT:    movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    adcq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
 ; X64-NEXT:    adcq $0, %r11
 ; X64-NEXT:    adcq $0, %r14
 ; X64-NEXT:    addq %r15, %r11
-; X64-NEXT:    adcq -{{[0-9]+}}(%rsp), %r14 # 8-byte Folded Reload
+; X64-NEXT:    adcq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload
 ; X64-NEXT:    setb %r9b
-; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rcx # 8-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
 ; X64-NEXT:    movq %rcx, %rax
 ; X64-NEXT:    mulq %r8
-; X64-NEXT:    movq %rdx, %r15
+; X64-NEXT:    movq %rdx, %r10
 ; X64-NEXT:    movq %rax, %r12
-; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %r10 # 8-byte Reload
-; X64-NEXT:    movq %r10, %rax
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload
+; X64-NEXT:    movq %r15, %rax
 ; X64-NEXT:    mulq %r8
 ; X64-NEXT:    movq %rdx, %rsi
 ; X64-NEXT:    movq %rax, %rbx
-; X64-NEXT:    addq %r15, %rbx
+; X64-NEXT:    addq %r10, %rbx
 ; X64-NEXT:    adcq $0, %rsi
 ; X64-NEXT:    movq %rcx, %rax
 ; X64-NEXT:    mulq %rdi
@@ -1698,27 +1696,27 @@ define void @test_512(i512* %a, i512* %b
 ; X64-NEXT:    movq %rax, %rbx
 ; X64-NEXT:    adcq %rsi, %rcx
 ; X64-NEXT:    setb %sil
-; X64-NEXT:    movq %r10, %rax
+; X64-NEXT:    movq %r15, %rax
 ; X64-NEXT:    mulq %rdi
 ; X64-NEXT:    addq %rcx, %rax
 ; X64-NEXT:    movzbl %sil, %ecx
 ; X64-NEXT:    adcq %rcx, %rdx
-; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rsi # 8-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
 ; X64-NEXT:    addq %rbp, %rsi
-; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rcx # 8-byte Reload
-; X64-NEXT:    adcq -{{[0-9]+}}(%rsp), %rcx # 8-byte Folded Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    adcq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload
 ; X64-NEXT:    addq %rax, %rsi
 ; X64-NEXT:    adcq %rdx, %rcx
 ; X64-NEXT:    addq %r11, %r12
-; X64-NEXT:    movq %r12, -{{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; X64-NEXT:    adcq %r14, %rbx
-; X64-NEXT:    movq %rbx, -{{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; X64-NEXT:    movzbl %r9b, %eax
 ; X64-NEXT:    adcq %rax, %rsi
-; X64-NEXT:    movq %rsi, -{{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; X64-NEXT:    adcq $0, %rcx
-; X64-NEXT:    movq %rcx, -{{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rcx # 8-byte Reload
+; X64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
 ; X64-NEXT:    movq 32(%rcx), %rsi
 ; X64-NEXT:    imulq %rsi, %rdi
 ; X64-NEXT:    movq %rsi, %rax
@@ -1731,9 +1729,9 @@ define void @test_512(i512* %a, i512* %b
 ; X64-NEXT:    movq 48(%rcx), %rax
 ; X64-NEXT:    movq %rcx, %rbx
 ; X64-NEXT:    movq %rax, %rdi
-; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rcx # 8-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
 ; X64-NEXT:    imulq %rcx, %rdi
-; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rbp # 8-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
 ; X64-NEXT:    mulq %rbp
 ; X64-NEXT:    movq %rax, %r14
 ; X64-NEXT:    addq %rdi, %rdx
@@ -1746,7 +1744,7 @@ define void @test_512(i512* %a, i512* %b
 ; X64-NEXT:    movq %rbp, %r10
 ; X64-NEXT:    mulq %rsi
 ; X64-NEXT:    movq %rdx, %rdi
-; X64-NEXT:    movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; X64-NEXT:    movq %rcx, %rax
 ; X64-NEXT:    movq %rcx, %r8
 ; X64-NEXT:    mulq %rsi
@@ -1770,33 +1768,32 @@ define void @test_512(i512* %a, i512* %b
 ; X64-NEXT:    adcq %rax, %r11
 ; X64-NEXT:    addq %r14, %r9
 ; X64-NEXT:    adcq %rbx, %r11
-; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rdx # 8-byte Reload
-; X64-NEXT:    movq 56(%rdx), %rcx
-; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %r10 # 8-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
+; X64-NEXT:    movq 56(%rbp), %rcx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload
 ; X64-NEXT:    imulq %r10, %rcx
-; X64-NEXT:    movq 48(%rdx), %rbx
-; X64-NEXT:    movq %rdx, %rbp
+; X64-NEXT:    movq 48(%rbp), %rbx
 ; X64-NEXT:    movq %r10, %rax
 ; X64-NEXT:    mulq %rbx
 ; X64-NEXT:    movq %rax, %rsi
 ; X64-NEXT:    addq %rcx, %rdx
-; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %r15 # 8-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload
 ; X64-NEXT:    imulq %r15, %rbx
 ; X64-NEXT:    addq %rdx, %rbx
 ; X64-NEXT:    movq 32(%rbp), %rdi
 ; X64-NEXT:    movq 40(%rbp), %r8
-; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rax # 8-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
 ; X64-NEXT:    movq %rax, %rcx
 ; X64-NEXT:    imulq %r8, %rcx
 ; X64-NEXT:    mulq %rdi
 ; X64-NEXT:    movq %rax, %r14
 ; X64-NEXT:    addq %rcx, %rdx
-; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rax # 8-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
 ; X64-NEXT:    imulq %rdi, %rax
 ; X64-NEXT:    addq %rdx, %rax
 ; X64-NEXT:    addq %rsi, %r14
 ; X64-NEXT:    adcq %rbx, %rax
-; X64-NEXT:    movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; X64-NEXT:    movq %rdi, %rax
 ; X64-NEXT:    mulq %r10
 ; X64-NEXT:    movq %rdx, %r12
@@ -1820,23 +1817,23 @@ define void @test_512(i512* %a, i512* %b
 ; X64-NEXT:    movzbl %cl, %ecx
 ; X64-NEXT:    adcq %rcx, %rdx
 ; X64-NEXT:    addq %r14, %rax
-; X64-NEXT:    adcq -{{[0-9]+}}(%rsp), %rdx # 8-byte Folded Reload
-; X64-NEXT:    addq -{{[0-9]+}}(%rsp), %rsi # 8-byte Folded Reload
+; X64-NEXT:    adcq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Folded Reload
+; X64-NEXT:    addq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload
 ; X64-NEXT:    adcq %r13, %rdi
 ; X64-NEXT:    adcq %r9, %rax
 ; X64-NEXT:    adcq %r11, %rdx
-; X64-NEXT:    addq -{{[0-9]+}}(%rsp), %rsi # 8-byte Folded Reload
-; X64-NEXT:    adcq -{{[0-9]+}}(%rsp), %rdi # 8-byte Folded Reload
-; X64-NEXT:    adcq -{{[0-9]+}}(%rsp), %rax # 8-byte Folded Reload
-; X64-NEXT:    adcq -{{[0-9]+}}(%rsp), %rdx # 8-byte Folded Reload
+; X64-NEXT:    addq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload
+; X64-NEXT:    adcq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Folded Reload
+; X64-NEXT:    adcq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
+; X64-NEXT:    adcq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Folded Reload
 ; X64-NEXT:    movq (%rsp), %rcx # 8-byte Reload
-; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rbp # 8-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
 ; X64-NEXT:    movq %rbp, (%rcx)
-; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rbp # 8-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
 ; X64-NEXT:    movq %rbp, 8(%rcx)
-; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rbp # 8-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
 ; X64-NEXT:    movq %rbp, 16(%rcx)
-; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rbp # 8-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
 ; X64-NEXT:    movq %rbp, 24(%rcx)
 ; X64-NEXT:    movq %rsi, 32(%rcx)
 ; X64-NEXT:    movq %rdi, 40(%rcx)

Modified: llvm/trunk/test/CodeGen/X86/mul128.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/mul128.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/mul128.ll (original)
+++ llvm/trunk/test/CodeGen/X86/mul128.ll Wed Sep 19 11:59:08 2018
@@ -6,8 +6,8 @@ define i128 @foo(i128 %t, i128 %u) {
 ; X64-LABEL: foo:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movq %rdx, %r8
-; X64-NEXT:    imulq %rdi, %rcx
 ; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    imulq %rdi, %rcx
 ; X64-NEXT:    mulq %rdx
 ; X64-NEXT:    addq %rcx, %rdx
 ; X64-NEXT:    imulq %r8, %rsi
@@ -51,7 +51,7 @@ define i128 @foo(i128 %t, i128 %u) {
 ; X86-NEXT:    imull %ebp, %edi
 ; X86-NEXT:    addl %edx, %edi
 ; X86-NEXT:    addl %ebx, %eax
-; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-NEXT:    adcl %ecx, %edi
 ; X86-NEXT:    movl %ebp, %eax
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -76,7 +76,7 @@ define i128 @foo(i128 %t, i128 %u) {
 ; X86-NEXT:    addl %ebx, %eax
 ; X86-NEXT:    movzbl %cl, %ecx
 ; X86-NEXT:    adcl %ecx, %edx
-; X86-NEXT:    addl {{[0-9]+}}(%esp), %eax # 4-byte Folded Reload
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
 ; X86-NEXT:    adcl %edi, %edx
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    movl (%esp), %esi # 4-byte Reload

Modified: llvm/trunk/test/CodeGen/X86/mul64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/mul64.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/mul64.ll (original)
+++ llvm/trunk/test/CodeGen/X86/mul64.ll Wed Sep 19 11:59:08 2018
@@ -19,8 +19,8 @@ define i64 @foo(i64 %t, i64 %u) nounwind
 ;
 ; X64-LABEL: foo:
 ; X64:       # %bb.0:
-; X64-NEXT:    imulq %rsi, %rdi
 ; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    imulq %rsi, %rax
 ; X64-NEXT:    retq
   %k = mul i64 %t, %u
   ret i64 %k

Modified: llvm/trunk/test/CodeGen/X86/mwaitx-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/mwaitx-schedule.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/mwaitx-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/mwaitx-schedule.ll Wed Sep 19 11:59:08 2018
@@ -6,22 +6,22 @@
 define void @foo(i8* %P, i32 %E, i32 %H) nounwind {
 ; GENERIC-LABEL: foo:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    leaq (%rdi), %rax # sched: [1:0.50]
 ; GENERIC-NEXT:    movl %esi, %ecx # sched: [1:0.33]
+; GENERIC-NEXT:    leaq (%rdi), %rax # sched: [1:0.50]
 ; GENERIC-NEXT:    monitorx # sched: [100:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; BDVER4-LABEL: foo:
 ; BDVER4:       # %bb.0:
-; BDVER4-NEXT:    leaq (%rdi), %rax
 ; BDVER4-NEXT:    movl %esi, %ecx
+; BDVER4-NEXT:    leaq (%rdi), %rax
 ; BDVER4-NEXT:    monitorx
 ; BDVER4-NEXT:    retq
 ;
 ; ZNVER1-LABEL: foo:
 ; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    leaq (%rdi), %rax # sched: [1:0.25]
 ; ZNVER1-NEXT:    movl %esi, %ecx # sched: [1:0.25]
+; ZNVER1-NEXT:    leaq (%rdi), %rax # sched: [1:0.25]
 ; ZNVER1-NEXT:    monitorx # sched: [100:0.25]
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   tail call void @llvm.x86.monitorx(i8* %P, i32 %E, i32 %H)
@@ -33,9 +33,9 @@ define void @bar(i32 %E, i32 %H, i32 %C)
 ; GENERIC-LABEL: bar:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    pushq %rbx # sched: [5:1.00]
-; GENERIC-NEXT:    movl %edi, %ecx # sched: [1:0.33]
-; GENERIC-NEXT:    movl %esi, %eax # sched: [1:0.33]
 ; GENERIC-NEXT:    movl %edx, %ebx # sched: [1:0.33]
+; GENERIC-NEXT:    movl %esi, %eax # sched: [1:0.33]
+; GENERIC-NEXT:    movl %edi, %ecx # sched: [1:0.33]
 ; GENERIC-NEXT:    mwaitx # sched: [100:0.33]
 ; GENERIC-NEXT:    popq %rbx # sched: [6:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -43,9 +43,9 @@ define void @bar(i32 %E, i32 %H, i32 %C)
 ; BDVER4-LABEL: bar:
 ; BDVER4:       # %bb.0:
 ; BDVER4-NEXT:    pushq %rbx
-; BDVER4-NEXT:    movl %edi, %ecx
-; BDVER4-NEXT:    movl %esi, %eax
 ; BDVER4-NEXT:    movl %edx, %ebx
+; BDVER4-NEXT:    movl %esi, %eax
+; BDVER4-NEXT:    movl %edi, %ecx
 ; BDVER4-NEXT:    mwaitx
 ; BDVER4-NEXT:    popq %rbx
 ; BDVER4-NEXT:    retq
@@ -53,9 +53,9 @@ define void @bar(i32 %E, i32 %H, i32 %C)
 ; ZNVER1-LABEL: bar:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    pushq %rbx # sched: [1:0.50]
-; ZNVER1-NEXT:    movl %edi, %ecx # sched: [1:0.25]
-; ZNVER1-NEXT:    movl %esi, %eax # sched: [1:0.25]
 ; ZNVER1-NEXT:    movl %edx, %ebx # sched: [1:0.25]
+; ZNVER1-NEXT:    movl %esi, %eax # sched: [1:0.25]
+; ZNVER1-NEXT:    movl %edi, %ecx # sched: [1:0.25]
 ; ZNVER1-NEXT:    mwaitx # sched: [100:0.25]
 ; ZNVER1-NEXT:    popq %rbx # sched: [8:0.50]
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]

Modified: llvm/trunk/test/CodeGen/X86/mwaitx.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/mwaitx.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/mwaitx.ll (original)
+++ llvm/trunk/test/CodeGen/X86/mwaitx.ll Wed Sep 19 11:59:08 2018
@@ -4,8 +4,9 @@
 ; RUN: llc < %s -mtriple=x86_64-win32 -mcpu=bdver4 | FileCheck %s -check-prefix=WIN64
 
 ; CHECK-LABEL: foo:
-; CHECK: leaq    (%rdi), %rax
-; CHECK-NEXT: movl    %esi, %ecx
+; CHECK-LABEL: # %bb.0:
+; CHECK-DAG: leaq    (%rdi), %rax
+; CHECK-DAG: movl    %esi, %ecx
 ; CHECK-NEXT: monitorx
 ; WIN64-LABEL: foo:
 ; WIN64:      leaq    (%rcx), %rax
@@ -21,13 +22,15 @@ entry:
 declare void @llvm.x86.monitorx(i8*, i32, i32) nounwind
 
 ; CHECK-LABEL: bar:
-; CHECK: movl    %edi, %ecx
-; CHECK-NEXT: movl    %esi, %eax
-; CHECK-NEXT: movl    %edx, %ebx
+; CHECK: pushq
+; CHECK-DAG: movl    %edi, %ecx
+; CHECK-DAG: movl    %esi, %eax
+; CHECK-DAG: movl    %edx, %ebx
 ; CHECK-NEXT: mwaitx
 ; WIN64-LABEL: bar:
-; WIN64:      movl    %edx, %eax
-; WIN64:      movl    %r8d, %ebx
+; WIN64: pushq
+; WIN64-DAG:      movl    %edx, %eax
+; WIN64-DAG:      movl    %r8d, %ebx
 ; WIN64-NEXT: mwaitx
 define void @bar(i32 %E, i32 %H, i32 %C) nounwind {
 entry:

Modified: llvm/trunk/test/CodeGen/X86/negate-i1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/negate-i1.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/negate-i1.ll (original)
+++ llvm/trunk/test/CodeGen/X86/negate-i1.ll Wed Sep 19 11:59:08 2018
@@ -5,9 +5,10 @@
 define i8 @select_i8_neg1_or_0(i1 %a) {
 ; X64-LABEL: select_i8_neg1_or_0:
 ; X64:       # %bb.0:
-; X64-NEXT:    andb $1, %dil
-; X64-NEXT:    negb %dil
 ; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    andb $1, %al
+; X64-NEXT:    negb %al
+; X64-NEXT:    # kill: def $al killed $al killed $eax
 ; X64-NEXT:    retq
 ;
 ; X32-LABEL: select_i8_neg1_or_0:
@@ -23,8 +24,9 @@ define i8 @select_i8_neg1_or_0(i1 %a) {
 define i8 @select_i8_neg1_or_0_zeroext(i1 zeroext %a) {
 ; X64-LABEL: select_i8_neg1_or_0_zeroext:
 ; X64:       # %bb.0:
-; X64-NEXT:    negb %dil
 ; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    negb %al
+; X64-NEXT:    # kill: def $al killed $al killed $eax
 ; X64-NEXT:    retq
 ;
 ; X32-LABEL: select_i8_neg1_or_0_zeroext:
@@ -39,9 +41,10 @@ define i8 @select_i8_neg1_or_0_zeroext(i
 define i16 @select_i16_neg1_or_0(i1 %a) {
 ; X64-LABEL: select_i16_neg1_or_0:
 ; X64:       # %bb.0:
-; X64-NEXT:    andl $1, %edi
-; X64-NEXT:    negl %edi
 ; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    andl $1, %eax
+; X64-NEXT:    negl %eax
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    retq
 ;
 ; X32-LABEL: select_i16_neg1_or_0:
@@ -58,8 +61,9 @@ define i16 @select_i16_neg1_or_0(i1 %a)
 define i16 @select_i16_neg1_or_0_zeroext(i1 zeroext %a) {
 ; X64-LABEL: select_i16_neg1_or_0_zeroext:
 ; X64:       # %bb.0:
-; X64-NEXT:    negl %edi
 ; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    negl %eax
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    retq
 ;
 ; X32-LABEL: select_i16_neg1_or_0_zeroext:
@@ -75,9 +79,9 @@ define i16 @select_i16_neg1_or_0_zeroext
 define i32 @select_i32_neg1_or_0(i1 %a) {
 ; X64-LABEL: select_i32_neg1_or_0:
 ; X64:       # %bb.0:
-; X64-NEXT:    andl $1, %edi
-; X64-NEXT:    negl %edi
 ; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    andl $1, %eax
+; X64-NEXT:    negl %eax
 ; X64-NEXT:    retq
 ;
 ; X32-LABEL: select_i32_neg1_or_0:
@@ -93,8 +97,8 @@ define i32 @select_i32_neg1_or_0(i1 %a)
 define i32 @select_i32_neg1_or_0_zeroext(i1 zeroext %a) {
 ; X64-LABEL: select_i32_neg1_or_0_zeroext:
 ; X64:       # %bb.0:
-; X64-NEXT:    negl %edi
 ; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    negl %eax
 ; X64-NEXT:    retq
 ;
 ; X32-LABEL: select_i32_neg1_or_0_zeroext:
@@ -109,10 +113,9 @@ define i32 @select_i32_neg1_or_0_zeroext
 define i64 @select_i64_neg1_or_0(i1 %a) {
 ; X64-LABEL: select_i64_neg1_or_0:
 ; X64:       # %bb.0:
-; X64-NEXT:    # kill: def $edi killed $edi def $rdi
-; X64-NEXT:    andl $1, %edi
-; X64-NEXT:    negq %rdi
-; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    andl $1, %eax
+; X64-NEXT:    negq %rax
 ; X64-NEXT:    retq
 ;
 ; X32-LABEL: select_i64_neg1_or_0:

Modified: llvm/trunk/test/CodeGen/X86/negate-shift.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/negate-shift.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/negate-shift.ll (original)
+++ llvm/trunk/test/CodeGen/X86/negate-shift.ll Wed Sep 19 11:59:08 2018
@@ -4,8 +4,8 @@
 define i32 @neg_lshr_signbit(i32 %x) {
 ; X64-LABEL: neg_lshr_signbit:
 ; X64:       # %bb.0:
-; X64-NEXT:    sarl $31, %edi
 ; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    sarl $31, %eax
 ; X64-NEXT:    retq
   %sh = lshr i32 %x, 31
   %neg = sub i32 0, %sh
@@ -15,8 +15,8 @@ define i32 @neg_lshr_signbit(i32 %x) {
 define i64 @neg_ashr_signbit(i64 %x) {
 ; X64-LABEL: neg_ashr_signbit:
 ; X64:       # %bb.0:
-; X64-NEXT:    shrq $63, %rdi
 ; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    shrq $63, %rax
 ; X64-NEXT:    retq
   %sh = ashr i64 %x, 63
   %neg = sub i64 0, %sh

Modified: llvm/trunk/test/CodeGen/X86/negate.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/negate.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/negate.ll (original)
+++ llvm/trunk/test/CodeGen/X86/negate.ll Wed Sep 19 11:59:08 2018
@@ -42,8 +42,9 @@ define <4 x i32> @negate_zero_or_minsign
 define i8 @negate_zero_or_minsigned(i8 %x) {
 ; CHECK-LABEL: negate_zero_or_minsigned:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    shlb $7, %dil
 ; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    shlb $7, %al
+; CHECK-NEXT:    # kill: def $al killed $al killed $eax
 ; CHECK-NEXT:    retq
   %signbit = shl i8 %x, 7
   %neg = sub i8 0, %signbit

Modified: llvm/trunk/test/CodeGen/X86/no-sse2-avg.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/no-sse2-avg.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/no-sse2-avg.ll (original)
+++ llvm/trunk/test/CodeGen/X86/no-sse2-avg.ll Wed Sep 19 11:59:08 2018
@@ -5,9 +5,9 @@
 define <16 x i8> @PR27973() {
 ; CHECK-LABEL: PR27973:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    movq %rdi, %rax
 ; CHECK-NEXT:    movq $0, 8(%rdi)
 ; CHECK-NEXT:    movq $0, (%rdi)
-; CHECK-NEXT:    movq %rdi, %rax
 ; CHECK-NEXT:    retq
   %t0 = zext <16 x i8> zeroinitializer to <16 x i32>
   %t1 = add nuw nsw <16 x i32> %t0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>

Modified: llvm/trunk/test/CodeGen/X86/not-and-simplify.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/not-and-simplify.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/not-and-simplify.ll (original)
+++ llvm/trunk/test/CodeGen/X86/not-and-simplify.ll Wed Sep 19 11:59:08 2018
@@ -7,9 +7,9 @@
 define i32 @shrink_xor_constant1(i32 %x) {
 ; ALL-LABEL: shrink_xor_constant1:
 ; ALL:       # %bb.0:
-; ALL-NEXT:    shrl $31, %edi
-; ALL-NEXT:    xorl $1, %edi
 ; ALL-NEXT:    movl %edi, %eax
+; ALL-NEXT:    shrl $31, %eax
+; ALL-NEXT:    xorl $1, %eax
 ; ALL-NEXT:    retq
   %sh = lshr i32 %x, 31
   %not = xor i32 %sh, -1
@@ -34,9 +34,10 @@ define <4 x i32> @shrink_xor_constant1_s
 define i8 @shrink_xor_constant2(i8 %x) {
 ; ALL-LABEL: shrink_xor_constant2:
 ; ALL:       # %bb.0:
-; ALL-NEXT:    shlb $5, %dil
-; ALL-NEXT:    xorb $-32, %dil
 ; ALL-NEXT:    movl %edi, %eax
+; ALL-NEXT:    shlb $5, %al
+; ALL-NEXT:    xorb $-32, %al
+; ALL-NEXT:    # kill: def $al killed $al killed $eax
 ; ALL-NEXT:    retq
   %sh = shl i8 %x, 5
   %not = xor i8 %sh, -1

Modified: llvm/trunk/test/CodeGen/X86/palignr.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/palignr.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/palignr.ll (original)
+++ llvm/trunk/test/CodeGen/X86/palignr.ll Wed Sep 19 11:59:08 2018
@@ -167,16 +167,15 @@ define <8 x i16> @test9(<8 x i16> %A, <8
 ; CHECK-SSE2-LABEL: test9:
 ; CHECK-SSE2:       # %bb.0:
 ; CHECK-SSE2-NEXT:    movdqa %xmm1, %xmm0
-; CHECK-SSE2-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
-; CHECK-SSE2-NEXT:    pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
-; CHECK-SSE2-NEXT:    por %xmm0, %xmm1
-; CHECK-SSE2-NEXT:    movdqa %xmm1, %xmm0
+; CHECK-SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
+; CHECK-SSE2-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1]
+; CHECK-SSE2-NEXT:    por %xmm1, %xmm0
 ; CHECK-SSE2-NEXT:    retl
 ;
 ; CHECK-SSSE3-LABEL: test9:
 ; CHECK-SSSE3:       # %bb.0:
-; CHECK-SSSE3-NEXT:    palignr {{.*#+}} xmm1 = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1]
 ; CHECK-SSSE3-NEXT:    movdqa %xmm1, %xmm0
+; CHECK-SSSE3-NEXT:    palignr {{.*#+}} xmm0 = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1]
 ; CHECK-SSSE3-NEXT:    retl
 ;
 ; CHECK-AVX-LABEL: test9:

Modified: llvm/trunk/test/CodeGen/X86/peep-setb.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/peep-setb.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/peep-setb.ll (original)
+++ llvm/trunk/test/CodeGen/X86/peep-setb.ll Wed Sep 19 11:59:08 2018
@@ -7,9 +7,10 @@
 define i8 @test1(i8 %a, i8 %b) nounwind {
 ; CHECK-LABEL: test1:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    cmpb %sil, %dil
-; CHECK-NEXT:    adcb $0, %sil
 ; CHECK-NEXT:    movl %esi, %eax
+; CHECK-NEXT:    cmpb %al, %dil
+; CHECK-NEXT:    adcb $0, %al
+; CHECK-NEXT:    # kill: def $al killed $al killed $eax
 ; CHECK-NEXT:    retq
   %cmp = icmp ult i8 %a, %b
   %cond = zext i1 %cmp to i8
@@ -20,9 +21,9 @@ define i8 @test1(i8 %a, i8 %b) nounwind
 define i32 @test2(i32 %a, i32 %b) nounwind {
 ; CHECK-LABEL: test2:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    cmpl %esi, %edi
-; CHECK-NEXT:    adcl $0, %esi
 ; CHECK-NEXT:    movl %esi, %eax
+; CHECK-NEXT:    cmpl %esi, %edi
+; CHECK-NEXT:    adcl $0, %eax
 ; CHECK-NEXT:    retq
   %cmp = icmp ult i32 %a, %b
   %cond = zext i1 %cmp to i32
@@ -33,9 +34,9 @@ define i32 @test2(i32 %a, i32 %b) nounwi
 define i64 @test3(i64 %a, i64 %b) nounwind {
 ; CHECK-LABEL: test3:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    cmpq %rsi, %rdi
-; CHECK-NEXT:    adcq $0, %rsi
 ; CHECK-NEXT:    movq %rsi, %rax
+; CHECK-NEXT:    cmpq %rsi, %rdi
+; CHECK-NEXT:    adcq $0, %rax
 ; CHECK-NEXT:    retq
   %cmp = icmp ult i64 %a, %b
   %conv = zext i1 %cmp to i64
@@ -46,9 +47,10 @@ define i64 @test3(i64 %a, i64 %b) nounwi
 define i8 @test4(i8 %a, i8 %b) nounwind {
 ; CHECK-LABEL: test4:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    cmpb %sil, %dil
-; CHECK-NEXT:    sbbb $0, %sil
 ; CHECK-NEXT:    movl %esi, %eax
+; CHECK-NEXT:    cmpb %al, %dil
+; CHECK-NEXT:    sbbb $0, %al
+; CHECK-NEXT:    # kill: def $al killed $al killed $eax
 ; CHECK-NEXT:    retq
   %cmp = icmp ult i8 %a, %b
   %cond = zext i1 %cmp to i8
@@ -59,9 +61,9 @@ define i8 @test4(i8 %a, i8 %b) nounwind
 define i32 @test5(i32 %a, i32 %b) nounwind {
 ; CHECK-LABEL: test5:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    cmpl %esi, %edi
-; CHECK-NEXT:    sbbl $0, %esi
 ; CHECK-NEXT:    movl %esi, %eax
+; CHECK-NEXT:    cmpl %esi, %edi
+; CHECK-NEXT:    sbbl $0, %eax
 ; CHECK-NEXT:    retq
   %cmp = icmp ult i32 %a, %b
   %cond = zext i1 %cmp to i32
@@ -72,9 +74,9 @@ define i32 @test5(i32 %a, i32 %b) nounwi
 define i64 @test6(i64 %a, i64 %b) nounwind {
 ; CHECK-LABEL: test6:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    cmpq %rsi, %rdi
-; CHECK-NEXT:    sbbq $0, %rsi
 ; CHECK-NEXT:    movq %rsi, %rax
+; CHECK-NEXT:    cmpq %rsi, %rdi
+; CHECK-NEXT:    sbbq $0, %rax
 ; CHECK-NEXT:    retq
   %cmp = icmp ult i64 %a, %b
   %conv = zext i1 %cmp to i64
@@ -85,9 +87,10 @@ define i64 @test6(i64 %a, i64 %b) nounwi
 define i8 @test7(i8 %a, i8 %b) nounwind {
 ; CHECK-LABEL: test7:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    cmpb %sil, %dil
-; CHECK-NEXT:    adcb $0, %sil
 ; CHECK-NEXT:    movl %esi, %eax
+; CHECK-NEXT:    cmpb %al, %dil
+; CHECK-NEXT:    adcb $0, %al
+; CHECK-NEXT:    # kill: def $al killed $al killed $eax
 ; CHECK-NEXT:    retq
   %cmp = icmp ult i8 %a, %b
   %cond = sext i1 %cmp to i8
@@ -98,9 +101,9 @@ define i8 @test7(i8 %a, i8 %b) nounwind
 define i32 @test8(i32 %a, i32 %b) nounwind {
 ; CHECK-LABEL: test8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    cmpl %esi, %edi
-; CHECK-NEXT:    adcl $0, %esi
 ; CHECK-NEXT:    movl %esi, %eax
+; CHECK-NEXT:    cmpl %esi, %edi
+; CHECK-NEXT:    adcl $0, %eax
 ; CHECK-NEXT:    retq
   %cmp = icmp ult i32 %a, %b
   %cond = sext i1 %cmp to i32
@@ -111,9 +114,9 @@ define i32 @test8(i32 %a, i32 %b) nounwi
 define i64 @test9(i64 %a, i64 %b) nounwind {
 ; CHECK-LABEL: test9:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    cmpq %rsi, %rdi
-; CHECK-NEXT:    adcq $0, %rsi
 ; CHECK-NEXT:    movq %rsi, %rax
+; CHECK-NEXT:    cmpq %rsi, %rdi
+; CHECK-NEXT:    adcq $0, %rax
 ; CHECK-NEXT:    retq
   %cmp = icmp ult i64 %a, %b
   %conv = sext i1 %cmp to i64

Modified: llvm/trunk/test/CodeGen/X86/pku.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/pku.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/pku.ll (original)
+++ llvm/trunk/test/CodeGen/X86/pku.ll Wed Sep 19 11:59:08 2018
@@ -16,9 +16,9 @@ define void @test_x86_wrpkru(i32 %src) {
 ;
 ; X64-LABEL: test_x86_wrpkru:
 ; X64:       ## %bb.0:
+; X64-NEXT:    movl %edi, %eax ## encoding: [0x89,0xf8]
 ; X64-NEXT:    xorl %ecx, %ecx ## encoding: [0x31,0xc9]
 ; X64-NEXT:    xorl %edx, %edx ## encoding: [0x31,0xd2]
-; X64-NEXT:    movl %edi, %eax ## encoding: [0x89,0xf8]
 ; X64-NEXT:    wrpkru ## encoding: [0x0f,0x01,0xef]
 ; X64-NEXT:    retq ## encoding: [0xc3]
   call void @llvm.x86.wrpkru(i32 %src)

Modified: llvm/trunk/test/CodeGen/X86/pmaddubsw.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/pmaddubsw.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/pmaddubsw.ll (original)
+++ llvm/trunk/test/CodeGen/X86/pmaddubsw.ll Wed Sep 19 11:59:08 2018
@@ -89,6 +89,7 @@ define <16 x i16> @pmaddubsw_256(<32 x i
 define <64 x i16> @pmaddubsw_512(<128 x i8>* %Aptr, <128 x i8>* %Bptr) {
 ; SSE-LABEL: pmaddubsw_512:
 ; SSE:       # %bb.0:
+; SSE-NEXT:    movq %rdi, %rax
 ; SSE-NEXT:    movdqa 112(%rdx), %xmm0
 ; SSE-NEXT:    movdqa 96(%rdx), %xmm1
 ; SSE-NEXT:    movdqa 80(%rdx), %xmm2
@@ -113,7 +114,6 @@ define <64 x i16> @pmaddubsw_512(<128 x
 ; SSE-NEXT:    movdqa %xmm6, 32(%rdi)
 ; SSE-NEXT:    movdqa %xmm5, 16(%rdi)
 ; SSE-NEXT:    movdqa %xmm4, (%rdi)
-; SSE-NEXT:    movq %rdi, %rax
 ; SSE-NEXT:    retq
 ;
 ; AVX1-LABEL: pmaddubsw_512:

Modified: llvm/trunk/test/CodeGen/X86/pmulh.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/pmulh.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/pmulh.ll (original)
+++ llvm/trunk/test/CodeGen/X86/pmulh.ll Wed Sep 19 11:59:08 2018
@@ -228,6 +228,7 @@ define <32 x i16> @mulhw_v32i16(<32 x i1
 define <64 x i16> @mulhuw_v64i16(<64 x i16> %a, <64 x i16> %b) {
 ; SSE-LABEL: mulhuw_v64i16:
 ; SSE:       # %bb.0:
+; SSE-NEXT:    movq %rdi, %rax
 ; SSE-NEXT:    pmulhuw {{[0-9]+}}(%rsp), %xmm0
 ; SSE-NEXT:    pmulhuw {{[0-9]+}}(%rsp), %xmm1
 ; SSE-NEXT:    pmulhuw {{[0-9]+}}(%rsp), %xmm2
@@ -244,7 +245,6 @@ define <64 x i16> @mulhuw_v64i16(<64 x i
 ; SSE-NEXT:    movdqa %xmm2, 32(%rdi)
 ; SSE-NEXT:    movdqa %xmm1, 16(%rdi)
 ; SSE-NEXT:    movdqa %xmm0, (%rdi)
-; SSE-NEXT:    movq %rdi, %rax
 ; SSE-NEXT:    retq
 ;
 ; AVX2-LABEL: mulhuw_v64i16:
@@ -279,6 +279,7 @@ define <64 x i16> @mulhuw_v64i16(<64 x i
 define <64 x i16> @mulhw_v64i16(<64 x i16> %a, <64 x i16> %b) {
 ; SSE-LABEL: mulhw_v64i16:
 ; SSE:       # %bb.0:
+; SSE-NEXT:    movq %rdi, %rax
 ; SSE-NEXT:    pmulhw {{[0-9]+}}(%rsp), %xmm0
 ; SSE-NEXT:    pmulhw {{[0-9]+}}(%rsp), %xmm1
 ; SSE-NEXT:    pmulhw {{[0-9]+}}(%rsp), %xmm2
@@ -295,7 +296,6 @@ define <64 x i16> @mulhw_v64i16(<64 x i1
 ; SSE-NEXT:    movdqa %xmm2, 32(%rdi)
 ; SSE-NEXT:    movdqa %xmm1, 16(%rdi)
 ; SSE-NEXT:    movdqa %xmm0, (%rdi)
-; SSE-NEXT:    movq %rdi, %rax
 ; SSE-NEXT:    retq
 ;
 ; AVX2-LABEL: mulhw_v64i16:

Modified: llvm/trunk/test/CodeGen/X86/pr12360.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/pr12360.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/pr12360.ll (original)
+++ llvm/trunk/test/CodeGen/X86/pr12360.ll Wed Sep 19 11:59:08 2018
@@ -32,8 +32,9 @@ entry:
 define zeroext i1 @f3(i1 %x) {
 ; CHECK-LABEL: f3:
 ; CHECK:       ## %bb.0: ## %entry
-; CHECK-NEXT:    andb $1, %dil
 ; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    andb $1, %al
+; CHECK-NEXT:    ## kill: def $al killed $al killed $eax
 ; CHECK-NEXT:    retq
 
 entry:

Modified: llvm/trunk/test/CodeGen/X86/pr15705.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/pr15705.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/pr15705.ll (original)
+++ llvm/trunk/test/CodeGen/X86/pr15705.ll Wed Sep 19 11:59:08 2018
@@ -22,14 +22,14 @@ define i32 @PR15705(i32 %x, i32 %a, i32
 ;
 ; X64-LABEL: PR15705:
 ; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %edx, %eax
 ; X64-NEXT:    cmpl %esi, %edi
 ; X64-NEXT:    je .LBB0_2
 ; X64-NEXT:  # %bb.1: # %if.end
-; X64-NEXT:    cmpl %edx, %edi
+; X64-NEXT:    cmpl %eax, %edi
 ; X64-NEXT:    cmovel %ecx, %esi
-; X64-NEXT:    movl %esi, %edx
+; X64-NEXT:    movl %esi, %eax
 ; X64-NEXT:  .LBB0_2: # %return
-; X64-NEXT:    movl %edx, %eax
 ; X64-NEXT:    retq
 entry:
   %cmp = icmp eq i32 %x, %a

Modified: llvm/trunk/test/CodeGen/X86/pr15981.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/pr15981.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/pr15981.ll (original)
+++ llvm/trunk/test/CodeGen/X86/pr15981.ll Wed Sep 19 11:59:08 2018
@@ -19,9 +19,9 @@ define i32 @fn1(i32, i32) {
 ;
 ; X64-LABEL: fn1:
 ; X64:       # %bb.0:
-; X64-NEXT:    testl %esi, %esi
-; X64-NEXT:    cmovel %esi, %edi
 ; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    testl %esi, %esi
+; X64-NEXT:    cmovel %esi, %eax
 ; X64-NEXT:    retq
   %3 = icmp ne i32 %1, 0
   %4 = select i1 %3, i32 %0, i32 0

Modified: llvm/trunk/test/CodeGen/X86/pr23664.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/pr23664.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/pr23664.ll (original)
+++ llvm/trunk/test/CodeGen/X86/pr23664.ll Wed Sep 19 11:59:08 2018
@@ -7,8 +7,9 @@ define i2 @f(i32 %arg) {
   ret i2 %or
 
 ; CHECK-LABEL: f:
-; CHECK:      addb    %dil, %dil
-; CHECK-NEXT: orb     $1, %dil
-; CHECK-NEXT: movl    %edi, %eax
+; CHECK:      movl    %edi, %eax
+; CHECK-NEXT: addb    %al, %al
+; CHECK-NEXT: orb     $1, %al
+; CHECK-NEXT: # kill
 ; CHECK-NEXT: retq
 }

Modified: llvm/trunk/test/CodeGen/X86/pr28173.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/pr28173.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/pr28173.ll (original)
+++ llvm/trunk/test/CodeGen/X86/pr28173.ll Wed Sep 19 11:59:08 2018
@@ -78,8 +78,9 @@ end:
 define i8 @foo8(i1 zeroext %i) #0 {
 ; CHECK-LABEL: foo8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    orb $-2, %dil
 ; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    orb $-2, %al
+; CHECK-NEXT:    # kill: def $al killed $al killed $eax
 ; CHECK-NEXT:    retq
   br label %bb
 

Modified: llvm/trunk/test/CodeGen/X86/pr34653.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/pr34653.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/pr34653.ll (original)
+++ llvm/trunk/test/CodeGen/X86/pr34653.ll Wed Sep 19 11:59:08 2018
@@ -33,170 +33,170 @@ define void @pr34653() {
 ; CHECK-NEXT:    vmovaps %xmm13, %xmm14
 ; CHECK-NEXT:    vmovaps %xmm10, %xmm15
 ; CHECK-NEXT:    vmovaps %xmm15, %xmm2
-; CHECK-NEXT:    vmovsd %xmm0, {{[0-9]+}}(%rsp) # 8-byte Spill
+; CHECK-NEXT:    vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; CHECK-NEXT:    vextractf32x4 $3, %zmm9, %xmm0
-; CHECK-NEXT:    vmovaps %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
-; CHECK-NEXT:    vmovsd %xmm0, {{[0-9]+}}(%rsp) # 8-byte Spill
+; CHECK-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT:    vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; CHECK-NEXT:    vextractf32x4 $2, %zmm9, %xmm0
-; CHECK-NEXT:    vmovaps %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
-; CHECK-NEXT:    vmovsd %xmm0, {{[0-9]+}}(%rsp) # 8-byte Spill
+; CHECK-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT:    vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; CHECK-NEXT:    vmovaps %xmm9, %xmm0
-; CHECK-NEXT:    vmovaps %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
-; CHECK-NEXT:    vmovsd %xmm0, {{[0-9]+}}(%rsp) # 8-byte Spill
+; CHECK-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT:    vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; CHECK-NEXT:    vextractf32x4 $3, %zmm8, %xmm0
-; CHECK-NEXT:    vmovaps %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
-; CHECK-NEXT:    vmovsd %xmm0, {{[0-9]+}}(%rsp) # 8-byte Spill
+; CHECK-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT:    vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; CHECK-NEXT:    vextractf32x4 $2, %zmm8, %xmm0
-; CHECK-NEXT:    vmovaps %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
-; CHECK-NEXT:    vmovsd %xmm0, {{[0-9]+}}(%rsp) # 8-byte Spill
+; CHECK-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT:    vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; CHECK-NEXT:    vmovaps %xmm8, %xmm0
-; CHECK-NEXT:    vmovaps %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
-; CHECK-NEXT:    vmovsd %xmm0, {{[0-9]+}}(%rsp) # 8-byte Spill
+; CHECK-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT:    vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; CHECK-NEXT:    vextractf32x4 $3, %zmm7, %xmm0
-; CHECK-NEXT:    vmovaps %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
-; CHECK-NEXT:    vmovsd %xmm0, {{[0-9]+}}(%rsp) # 8-byte Spill
+; CHECK-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT:    vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; CHECK-NEXT:    vextractf32x4 $2, %zmm7, %xmm0
-; CHECK-NEXT:    vmovaps %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
-; CHECK-NEXT:    vmovsd %xmm0, {{[0-9]+}}(%rsp) # 8-byte Spill
+; CHECK-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT:    vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; CHECK-NEXT:    vmovaps %xmm7, %xmm0
-; CHECK-NEXT:    vmovaps %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
+; CHECK-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 ; CHECK-NEXT:    vpermilpd {{.*#+}} xmm3 = xmm3[1,0]
 ; CHECK-NEXT:    vpermilpd {{.*#+}} xmm5 = xmm5[1,0]
 ; CHECK-NEXT:    vpermilpd {{.*#+}} xmm11 = xmm11[1,0]
 ; CHECK-NEXT:    vpermilpd {{.*#+}} xmm13 = xmm13[1,0]
 ; CHECK-NEXT:    # kill: def $ymm10 killed $ymm10 killed $zmm10
 ; CHECK-NEXT:    vextractf128 $1, %ymm10, %xmm10
-; CHECK-NEXT:    vmovsd %xmm0, {{[0-9]+}}(%rsp) # 8-byte Spill
+; CHECK-NEXT:    vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; CHECK-NEXT:    vmovaps %xmm10, %xmm0
 ; CHECK-NEXT:    vpermilpd {{.*#+}} xmm15 = xmm15[1,0]
-; CHECK-NEXT:    vmovsd %xmm0, {{[0-9]+}}(%rsp) # 8-byte Spill
-; CHECK-NEXT:    vmovaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT:    vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
 ; CHECK-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
-; CHECK-NEXT:    vmovsd %xmm0, {{[0-9]+}}(%rsp) # 8-byte Spill
-; CHECK-NEXT:    vmovaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT:    vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
 ; CHECK-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
 ; CHECK-NEXT:    # kill: def $ymm9 killed $ymm9 killed $zmm9
 ; CHECK-NEXT:    vextractf128 $1, %ymm9, %xmm9
-; CHECK-NEXT:    vmovsd %xmm0, {{[0-9]+}}(%rsp) # 8-byte Spill
+; CHECK-NEXT:    vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; CHECK-NEXT:    vmovaps %xmm9, %xmm0
-; CHECK-NEXT:    vmovsd %xmm0, {{[0-9]+}}(%rsp) # 8-byte Spill
-; CHECK-NEXT:    vmovaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT:    vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
 ; CHECK-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
-; CHECK-NEXT:    vmovsd %xmm0, {{[0-9]+}}(%rsp) # 8-byte Spill
-; CHECK-NEXT:    vmovaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT:    vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
 ; CHECK-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
-; CHECK-NEXT:    vmovsd %xmm0, {{[0-9]+}}(%rsp) # 8-byte Spill
-; CHECK-NEXT:    vmovaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT:    vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
 ; CHECK-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
 ; CHECK-NEXT:    # kill: def $ymm8 killed $ymm8 killed $zmm8
 ; CHECK-NEXT:    vextractf128 $1, %ymm8, %xmm8
-; CHECK-NEXT:    vmovsd %xmm0, {{[0-9]+}}(%rsp) # 8-byte Spill
+; CHECK-NEXT:    vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; CHECK-NEXT:    vmovaps %xmm8, %xmm0
-; CHECK-NEXT:    vmovsd %xmm0, {{[0-9]+}}(%rsp) # 8-byte Spill
-; CHECK-NEXT:    vmovaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT:    vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
 ; CHECK-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
-; CHECK-NEXT:    vmovsd %xmm0, {{[0-9]+}}(%rsp) # 8-byte Spill
-; CHECK-NEXT:    vmovaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT:    vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
 ; CHECK-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
-; CHECK-NEXT:    vmovsd %xmm0, {{[0-9]+}}(%rsp) # 8-byte Spill
-; CHECK-NEXT:    vmovaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT:    vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
 ; CHECK-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
 ; CHECK-NEXT:    # kill: def $ymm7 killed $ymm7 killed $zmm7
 ; CHECK-NEXT:    vextractf128 $1, %ymm7, %xmm7
-; CHECK-NEXT:    vmovsd %xmm0, {{[0-9]+}}(%rsp) # 8-byte Spill
+; CHECK-NEXT:    vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; CHECK-NEXT:    vmovaps %xmm7, %xmm0
-; CHECK-NEXT:    vmovsd %xmm0, {{[0-9]+}}(%rsp) # 8-byte Spill
-; CHECK-NEXT:    vmovaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT:    vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
 ; CHECK-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
 ; CHECK-NEXT:    vpermilpd {{.*#+}} xmm10 = xmm10[1,0]
 ; CHECK-NEXT:    vpermilpd {{.*#+}} xmm9 = xmm9[1,0]
 ; CHECK-NEXT:    vpermilpd {{.*#+}} xmm8 = xmm8[1,0]
 ; CHECK-NEXT:    vpermilpd {{.*#+}} xmm7 = xmm7[1,0]
-; CHECK-NEXT:    vmovsd %xmm0, {{[0-9]+}}(%rsp) # 8-byte Spill
-; CHECK-NEXT:    vmovsd {{[0-9]+}}(%rsp), %xmm0 # 8-byte Reload
+; CHECK-NEXT:    vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT:    vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
 ; CHECK-NEXT:    # xmm0 = mem[0],zero
-; CHECK-NEXT:    vmovsd %xmm0, {{[0-9]+}}(%rsp) # 8-byte Spill
-; CHECK-NEXT:    vmovsd {{[0-9]+}}(%rsp), %xmm0 # 8-byte Reload
+; CHECK-NEXT:    vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT:    vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
 ; CHECK-NEXT:    # xmm0 = mem[0],zero
-; CHECK-NEXT:    vmovsd %xmm0, {{[0-9]+}}(%rsp) # 8-byte Spill
-; CHECK-NEXT:    vmovsd {{[0-9]+}}(%rsp), %xmm0 # 8-byte Reload
+; CHECK-NEXT:    vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT:    vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
 ; CHECK-NEXT:    # xmm0 = mem[0],zero
-; CHECK-NEXT:    vmovsd %xmm0, {{[0-9]+}}(%rsp) # 8-byte Spill
-; CHECK-NEXT:    vmovsd {{[0-9]+}}(%rsp), %xmm0 # 8-byte Reload
+; CHECK-NEXT:    vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT:    vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
 ; CHECK-NEXT:    # xmm0 = mem[0],zero
-; CHECK-NEXT:    vmovsd %xmm0, {{[0-9]+}}(%rsp) # 8-byte Spill
-; CHECK-NEXT:    vmovsd {{[0-9]+}}(%rsp), %xmm0 # 8-byte Reload
+; CHECK-NEXT:    vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT:    vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
 ; CHECK-NEXT:    # xmm0 = mem[0],zero
-; CHECK-NEXT:    vmovsd %xmm0, {{[0-9]+}}(%rsp) # 8-byte Spill
-; CHECK-NEXT:    vmovsd {{[0-9]+}}(%rsp), %xmm0 # 8-byte Reload
+; CHECK-NEXT:    vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT:    vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
 ; CHECK-NEXT:    # xmm0 = mem[0],zero
-; CHECK-NEXT:    vmovsd %xmm0, {{[0-9]+}}(%rsp) # 8-byte Spill
-; CHECK-NEXT:    vmovsd {{[0-9]+}}(%rsp), %xmm0 # 8-byte Reload
+; CHECK-NEXT:    vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT:    vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
 ; CHECK-NEXT:    # xmm0 = mem[0],zero
-; CHECK-NEXT:    vmovsd %xmm0, {{[0-9]+}}(%rsp) # 8-byte Spill
-; CHECK-NEXT:    vmovsd {{[0-9]+}}(%rsp), %xmm0 # 8-byte Reload
+; CHECK-NEXT:    vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT:    vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
 ; CHECK-NEXT:    # xmm0 = mem[0],zero
-; CHECK-NEXT:    vmovsd %xmm0, {{[0-9]+}}(%rsp) # 8-byte Spill
-; CHECK-NEXT:    vmovsd {{[0-9]+}}(%rsp), %xmm0 # 8-byte Reload
+; CHECK-NEXT:    vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT:    vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
 ; CHECK-NEXT:    # xmm0 = mem[0],zero
-; CHECK-NEXT:    vmovsd %xmm0, {{[0-9]+}}(%rsp) # 8-byte Spill
-; CHECK-NEXT:    vmovsd {{[0-9]+}}(%rsp), %xmm0 # 8-byte Reload
+; CHECK-NEXT:    vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT:    vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
 ; CHECK-NEXT:    # xmm0 = mem[0],zero
-; CHECK-NEXT:    vmovsd %xmm0, {{[0-9]+}}(%rsp) # 8-byte Spill
-; CHECK-NEXT:    vmovsd {{[0-9]+}}(%rsp), %xmm0 # 8-byte Reload
+; CHECK-NEXT:    vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT:    vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
 ; CHECK-NEXT:    # xmm0 = mem[0],zero
-; CHECK-NEXT:    vmovsd %xmm0, {{[0-9]+}}(%rsp) # 8-byte Spill
-; CHECK-NEXT:    vmovsd {{[0-9]+}}(%rsp), %xmm0 # 8-byte Reload
+; CHECK-NEXT:    vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT:    vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
 ; CHECK-NEXT:    # xmm0 = mem[0],zero
-; CHECK-NEXT:    vmovsd %xmm0, {{[0-9]+}}(%rsp) # 8-byte Spill
-; CHECK-NEXT:    vmovsd {{[0-9]+}}(%rsp), %xmm0 # 8-byte Reload
+; CHECK-NEXT:    vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT:    vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
 ; CHECK-NEXT:    # xmm0 = mem[0],zero
-; CHECK-NEXT:    vmovsd %xmm0, {{[0-9]+}}(%rsp) # 8-byte Spill
-; CHECK-NEXT:    vmovsd {{[0-9]+}}(%rsp), %xmm0 # 8-byte Reload
+; CHECK-NEXT:    vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT:    vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
 ; CHECK-NEXT:    # xmm0 = mem[0],zero
-; CHECK-NEXT:    vmovsd %xmm0, {{[0-9]+}}(%rsp) # 8-byte Spill
-; CHECK-NEXT:    vmovsd {{[0-9]+}}(%rsp), %xmm0 # 8-byte Reload
+; CHECK-NEXT:    vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT:    vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
 ; CHECK-NEXT:    # xmm0 = mem[0],zero
-; CHECK-NEXT:    vmovsd %xmm0, {{[0-9]+}}(%rsp) # 8-byte Spill
-; CHECK-NEXT:    vmovsd {{[0-9]+}}(%rsp), %xmm0 # 8-byte Reload
+; CHECK-NEXT:    vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT:    vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
 ; CHECK-NEXT:    # xmm0 = mem[0],zero
-; CHECK-NEXT:    vmovsd %xmm0, {{[0-9]+}}(%rsp) # 8-byte Spill
-; CHECK-NEXT:    vmovsd {{[0-9]+}}(%rsp), %xmm0 # 8-byte Reload
+; CHECK-NEXT:    vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT:    vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
 ; CHECK-NEXT:    # xmm0 = mem[0],zero
-; CHECK-NEXT:    vmovsd %xmm0, {{[0-9]+}}(%rsp) # 8-byte Spill
-; CHECK-NEXT:    vmovsd {{[0-9]+}}(%rsp), %xmm0 # 8-byte Reload
+; CHECK-NEXT:    vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT:    vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
 ; CHECK-NEXT:    # xmm0 = mem[0],zero
-; CHECK-NEXT:    vmovsd %xmm0, {{[0-9]+}}(%rsp) # 8-byte Spill
-; CHECK-NEXT:    vmovsd {{[0-9]+}}(%rsp), %xmm0 # 8-byte Reload
+; CHECK-NEXT:    vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT:    vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
 ; CHECK-NEXT:    # xmm0 = mem[0],zero
-; CHECK-NEXT:    vmovsd %xmm0, {{[0-9]+}}(%rsp) # 8-byte Spill
-; CHECK-NEXT:    vmovsd {{[0-9]+}}(%rsp), %xmm0 # 8-byte Reload
+; CHECK-NEXT:    vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT:    vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
 ; CHECK-NEXT:    # xmm0 = mem[0],zero
-; CHECK-NEXT:    vmovsd %xmm0, {{[0-9]+}}(%rsp) # 8-byte Spill
-; CHECK-NEXT:    vmovsd {{[0-9]+}}(%rsp), %xmm0 # 8-byte Reload
+; CHECK-NEXT:    vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT:    vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
 ; CHECK-NEXT:    # xmm0 = mem[0],zero
-; CHECK-NEXT:    vmovsd %xmm0, {{[0-9]+}}(%rsp) # 8-byte Spill
-; CHECK-NEXT:    vmovsd {{[0-9]+}}(%rsp), %xmm0 # 8-byte Reload
+; CHECK-NEXT:    vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT:    vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
 ; CHECK-NEXT:    # xmm0 = mem[0],zero
-; CHECK-NEXT:    vmovsd %xmm0, {{[0-9]+}}(%rsp) # 8-byte Spill
-; CHECK-NEXT:    vmovsd {{[0-9]+}}(%rsp), %xmm0 # 8-byte Reload
+; CHECK-NEXT:    vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT:    vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
 ; CHECK-NEXT:    # xmm0 = mem[0],zero
-; CHECK-NEXT:    vmovsd %xmm8, {{[0-9]+}}(%rsp) # 8-byte Spill
-; CHECK-NEXT:    vmovsd %xmm13, {{[0-9]+}}(%rsp) # 8-byte Spill
-; CHECK-NEXT:    vmovsd %xmm1, {{[0-9]+}}(%rsp) # 8-byte Spill
-; CHECK-NEXT:    vmovsd %xmm14, {{[0-9]+}}(%rsp) # 8-byte Spill
-; CHECK-NEXT:    vmovsd %xmm2, {{[0-9]+}}(%rsp) # 8-byte Spill
-; CHECK-NEXT:    vmovsd %xmm4, {{[0-9]+}}(%rsp) # 8-byte Spill
-; CHECK-NEXT:    vmovsd %xmm9, {{[0-9]+}}(%rsp) # 8-byte Spill
-; CHECK-NEXT:    vmovsd %xmm10, {{[0-9]+}}(%rsp) # 8-byte Spill
-; CHECK-NEXT:    vmovsd %xmm15, {{[0-9]+}}(%rsp) # 8-byte Spill
-; CHECK-NEXT:    vmovsd %xmm11, {{[0-9]+}}(%rsp) # 8-byte Spill
-; CHECK-NEXT:    vmovsd %xmm3, {{[0-9]+}}(%rsp) # 8-byte Spill
-; CHECK-NEXT:    vmovsd %xmm6, {{[0-9]+}}(%rsp) # 8-byte Spill
-; CHECK-NEXT:    vmovsd %xmm5, {{[0-9]+}}(%rsp) # 8-byte Spill
-; CHECK-NEXT:    vmovsd %xmm12, {{[0-9]+}}(%rsp) # 8-byte Spill
-; CHECK-NEXT:    vmovsd %xmm0, {{[0-9]+}}(%rsp) # 8-byte Spill
-; CHECK-NEXT:    vmovsd %xmm7, {{[0-9]+}}(%rsp) # 8-byte Spill
+; CHECK-NEXT:    vmovsd %xmm8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT:    vmovsd %xmm13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT:    vmovsd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT:    vmovsd %xmm14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT:    vmovsd %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT:    vmovsd %xmm4, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT:    vmovsd %xmm9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT:    vmovsd %xmm10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT:    vmovsd %xmm15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT:    vmovsd %xmm11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT:    vmovsd %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT:    vmovsd %xmm6, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT:    vmovsd %xmm5, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT:    vmovsd %xmm12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT:    vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT:    vmovsd %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; CHECK-NEXT:    movq %rbp, %rsp
 ; CHECK-NEXT:    popq %rbp
 ; CHECK-NEXT:    .cfi_def_cfa %rsp, 8

Modified: llvm/trunk/test/CodeGen/X86/pr34657.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/pr34657.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/pr34657.ll (original)
+++ llvm/trunk/test/CodeGen/X86/pr34657.ll Wed Sep 19 11:59:08 2018
@@ -1,9 +1,10 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc %s -O2 -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw -o - | FileCheck %s 
+; RUN: llc %s -O2 -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw -o - | FileCheck %s
 
 define <112 x i8> @pr34657() local_unnamed_addr {
-; CHECK-LABEL: pr34657
+; CHECK-LABEL: pr34657:
 ; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movq %rdi, %rax
 ; CHECK-NEXT:    vmovups (%rax), %xmm0
 ; CHECK-NEXT:    vmovups (%rax), %ymm1
 ; CHECK-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
@@ -11,7 +12,6 @@ define <112 x i8> @pr34657() local_unnam
 ; CHECK-NEXT:    vmovaps %ymm1, 64(%rdi)
 ; CHECK-NEXT:    vmovaps %zmm2, (%rdi)
 ; CHECK-NEXT:    vextractf32x4 $2, %zmm0, 96(%rdi)
-; CHECK-NEXT:    movq %rdi, %rax
 ; CHECK-NEXT:    vzeroupper
 ; CHECK-NEXT:    retq
 entry:

Modified: llvm/trunk/test/CodeGen/X86/promote-i16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/promote-i16.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/promote-i16.ll (original)
+++ llvm/trunk/test/CodeGen/X86/promote-i16.ll Wed Sep 19 11:59:08 2018
@@ -12,8 +12,9 @@ define signext i16 @foo(i16 signext %x)
 ;
 ; X64-LABEL: foo:
 ; X64:       # %bb.0: # %entry
-; X64-NEXT:    xorl $21998, %edi # imm = 0x55EE
 ; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    xorl $21998, %eax # imm = 0x55EE
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    retq
 entry:
   %0 = xor i16 %x, 21998
@@ -30,8 +31,9 @@ define signext i16 @bar(i16 signext %x)
 ;
 ; X64-LABEL: bar:
 ; X64:       # %bb.0: # %entry
-; X64-NEXT:    xorl $54766, %edi # imm = 0xD5EE
 ; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    xorl $54766, %eax # imm = 0xD5EE
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    retq
 entry:
   %0 = xor i16 %x, 54766

Modified: llvm/trunk/test/CodeGen/X86/ptest.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/ptest.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/ptest.ll (original)
+++ llvm/trunk/test/CodeGen/X86/ptest.ll Wed Sep 19 11:59:08 2018
@@ -233,16 +233,16 @@ define i32 @vectest512(<16 x i32> %input
 define i32 @vecsel128(<4 x i32> %input, i32 %a, i32 %b) {
 ; SSE41-LABEL: vecsel128:
 ; SSE41:       # %bb.0:
-; SSE41-NEXT:    ptest %xmm0, %xmm0
-; SSE41-NEXT:    cmovel %esi, %edi
 ; SSE41-NEXT:    movl %edi, %eax
+; SSE41-NEXT:    ptest %xmm0, %xmm0
+; SSE41-NEXT:    cmovel %esi, %eax
 ; SSE41-NEXT:    retq
 ;
 ; AVX-LABEL: vecsel128:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    vptest %xmm0, %xmm0
-; AVX-NEXT:    cmovel %esi, %edi
 ; AVX-NEXT:    movl %edi, %eax
+; AVX-NEXT:    vptest %xmm0, %xmm0
+; AVX-NEXT:    cmovel %esi, %eax
 ; AVX-NEXT:    retq
   %t0 = bitcast <4 x i32> %input to i128
   %t1 = icmp ne i128 %t0, 0
@@ -253,17 +253,17 @@ define i32 @vecsel128(<4 x i32> %input,
 define i32 @vecsel256(<8 x i32> %input, i32 %a, i32 %b) {
 ; SSE41-LABEL: vecsel256:
 ; SSE41:       # %bb.0:
+; SSE41-NEXT:    movl %edi, %eax
 ; SSE41-NEXT:    por %xmm1, %xmm0
 ; SSE41-NEXT:    ptest %xmm0, %xmm0
-; SSE41-NEXT:    cmovel %esi, %edi
-; SSE41-NEXT:    movl %edi, %eax
+; SSE41-NEXT:    cmovel %esi, %eax
 ; SSE41-NEXT:    retq
 ;
 ; AVX-LABEL: vecsel256:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    vptest %ymm0, %ymm0
-; AVX-NEXT:    cmovel %esi, %edi
 ; AVX-NEXT:    movl %edi, %eax
+; AVX-NEXT:    vptest %ymm0, %ymm0
+; AVX-NEXT:    cmovel %esi, %eax
 ; AVX-NEXT:    vzeroupper
 ; AVX-NEXT:    retq
   %t0 = bitcast <8 x i32> %input to i256
@@ -275,45 +275,45 @@ define i32 @vecsel256(<8 x i32> %input,
 define i32 @vecsel512(<16 x i32> %input, i32 %a, i32 %b) {
 ; SSE41-LABEL: vecsel512:
 ; SSE41:       # %bb.0:
+; SSE41-NEXT:    movl %edi, %eax
 ; SSE41-NEXT:    por %xmm3, %xmm1
 ; SSE41-NEXT:    por %xmm2, %xmm1
 ; SSE41-NEXT:    por %xmm0, %xmm1
 ; SSE41-NEXT:    ptest %xmm1, %xmm1
-; SSE41-NEXT:    cmovel %esi, %edi
-; SSE41-NEXT:    movl %edi, %eax
+; SSE41-NEXT:    cmovel %esi, %eax
 ; SSE41-NEXT:    retq
 ;
 ; AVX1-LABEL: vecsel512:
 ; AVX1:       # %bb.0:
+; AVX1-NEXT:    movl %edi, %eax
 ; AVX1-NEXT:    vorps %ymm1, %ymm0, %ymm0
 ; AVX1-NEXT:    vptest %ymm0, %ymm0
-; AVX1-NEXT:    cmovel %esi, %edi
-; AVX1-NEXT:    movl %edi, %eax
+; AVX1-NEXT:    cmovel %esi, %eax
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
 ; AVX512-LABEL: vecsel512:
 ; AVX512:       # %bb.0:
+; AVX512-NEXT:    movl %edi, %eax
 ; AVX512-NEXT:    vextracti32x4 $3, %zmm0, %xmm1
-; AVX512-NEXT:    vmovq %xmm1, %rax
+; AVX512-NEXT:    vmovq %xmm1, %rcx
 ; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm2
-; AVX512-NEXT:    vmovq %xmm2, %rcx
-; AVX512-NEXT:    orq %rax, %rcx
+; AVX512-NEXT:    vmovq %xmm2, %rdx
+; AVX512-NEXT:    orq %rcx, %rdx
 ; AVX512-NEXT:    vextracti32x4 $2, %zmm0, %xmm3
-; AVX512-NEXT:    vmovq %xmm3, %rax
-; AVX512-NEXT:    orq %rcx, %rax
-; AVX512-NEXT:    vmovq %xmm0, %rcx
-; AVX512-NEXT:    orq %rax, %rcx
-; AVX512-NEXT:    vpextrq $1, %xmm1, %rax
-; AVX512-NEXT:    vpextrq $1, %xmm2, %rdx
-; AVX512-NEXT:    orq %rax, %rdx
-; AVX512-NEXT:    vpextrq $1, %xmm3, %rax
-; AVX512-NEXT:    orq %rdx, %rax
-; AVX512-NEXT:    vpextrq $1, %xmm0, %rdx
-; AVX512-NEXT:    orq %rax, %rdx
+; AVX512-NEXT:    vmovq %xmm3, %rcx
+; AVX512-NEXT:    orq %rdx, %rcx
+; AVX512-NEXT:    vmovq %xmm0, %rdx
 ; AVX512-NEXT:    orq %rcx, %rdx
-; AVX512-NEXT:    cmovel %esi, %edi
-; AVX512-NEXT:    movl %edi, %eax
+; AVX512-NEXT:    vpextrq $1, %xmm1, %rcx
+; AVX512-NEXT:    vpextrq $1, %xmm2, %rdi
+; AVX512-NEXT:    orq %rcx, %rdi
+; AVX512-NEXT:    vpextrq $1, %xmm3, %rcx
+; AVX512-NEXT:    orq %rdi, %rcx
+; AVX512-NEXT:    vpextrq $1, %xmm0, %rdi
+; AVX512-NEXT:    orq %rcx, %rdi
+; AVX512-NEXT:    orq %rdx, %rdi
+; AVX512-NEXT:    cmovel %esi, %eax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
   %t0 = bitcast <16 x i32> %input to i512

Modified: llvm/trunk/test/CodeGen/X86/rot16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/rot16.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/rot16.ll (original)
+++ llvm/trunk/test/CodeGen/X86/rot16.ll Wed Sep 19 11:59:08 2018
@@ -13,8 +13,10 @@ define i16 @foo(i16 %x, i16 %y, i16 %z)
 ; X64-LABEL: foo:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movl %edx, %ecx
-; X64-NEXT:    shldw %cl, %di, %di
 ; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    shldw %cl, %ax, %ax
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    retq
 	%t0 = shl i16 %x, %z
 	%t1 = sub i16 16, %z
@@ -35,8 +37,10 @@ define i16 @bar(i16 %x, i16 %y, i16 %z)
 ; X64-LABEL: bar:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movl %edx, %ecx
-; X64-NEXT:    shldw %cl, %di, %si
 ; X64-NEXT:    movl %esi, %eax
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    shldw %cl, %di, %ax
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    retq
 	%t0 = shl i16 %y, %z
 	%t1 = sub i16 16, %z
@@ -56,8 +60,10 @@ define i16 @un(i16 %x, i16 %y, i16 %z) n
 ; X64-LABEL: un:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movl %edx, %ecx
-; X64-NEXT:    shrdw %cl, %di, %di
 ; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    shrdw %cl, %ax, %ax
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    retq
 	%t0 = lshr i16 %x, %z
 	%t1 = sub i16 16, %z
@@ -78,8 +84,10 @@ define i16 @bu(i16 %x, i16 %y, i16 %z) n
 ; X64-LABEL: bu:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movl %edx, %ecx
-; X64-NEXT:    shrdw %cl, %di, %si
 ; X64-NEXT:    movl %esi, %eax
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    shrdw %cl, %di, %ax
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    retq
 	%t0 = lshr i16 %y, %z
 	%t1 = sub i16 16, %z
@@ -97,8 +105,9 @@ define i16 @xfoo(i16 %x, i16 %y, i16 %z)
 ;
 ; X64-LABEL: xfoo:
 ; X64:       # %bb.0:
-; X64-NEXT:    rolw $5, %di
 ; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    rolw $5, %ax
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    retq
 	%t0 = lshr i16 %x, 11
 	%t1 = shl i16 %x, 5
@@ -116,8 +125,9 @@ define i16 @xbar(i16 %x, i16 %y, i16 %z)
 ;
 ; X64-LABEL: xbar:
 ; X64:       # %bb.0:
-; X64-NEXT:    shldw $5, %di, %si
 ; X64-NEXT:    movl %esi, %eax
+; X64-NEXT:    shldw $5, %di, %ax
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    retq
 	%t0 = shl i16 %y, 5
 	%t1 = lshr i16 %x, 11
@@ -134,8 +144,9 @@ define i16 @xun(i16 %x, i16 %y, i16 %z)
 ;
 ; X64-LABEL: xun:
 ; X64:       # %bb.0:
-; X64-NEXT:    rolw $11, %di
 ; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    rolw $11, %ax
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    retq
 	%t0 = lshr i16 %x, 5
 	%t1 = shl i16 %x, 11
@@ -153,8 +164,9 @@ define i16 @xbu(i16 %x, i16 %y, i16 %z)
 ;
 ; X64-LABEL: xbu:
 ; X64:       # %bb.0:
-; X64-NEXT:    shldw $11, %si, %di
 ; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    shldw $11, %si, %ax
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    retq
 	%t0 = lshr i16 %y, 5
 	%t1 = shl i16 %x, 11

Modified: llvm/trunk/test/CodeGen/X86/rot64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/rot64.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/rot64.ll (original)
+++ llvm/trunk/test/CodeGen/X86/rot64.ll Wed Sep 19 11:59:08 2018
@@ -6,9 +6,10 @@
 define i64 @foo(i64 %x, i64 %y, i64 %z) nounwind readnone {
 ; ALL-LABEL: foo:
 ; ALL:       # %bb.0: # %entry
-; ALL-NEXT:    movl %edx, %ecx
-; ALL-NEXT:    rolq %cl, %rdi
+; ALL-NEXT:    movq %rdx, %rcx
 ; ALL-NEXT:    movq %rdi, %rax
+; ALL-NEXT:    # kill: def $cl killed $cl killed $rcx
+; ALL-NEXT:    rolq %cl, %rax
 ; ALL-NEXT:    retq
 entry:
 	%0 = shl i64 %x, %z
@@ -21,9 +22,10 @@ entry:
 define i64 @bar(i64 %x, i64 %y, i64 %z) nounwind readnone {
 ; ALL-LABEL: bar:
 ; ALL:       # %bb.0: # %entry
-; ALL-NEXT:    movl %edx, %ecx
-; ALL-NEXT:    shldq %cl, %rdi, %rsi
+; ALL-NEXT:    movq %rdx, %rcx
 ; ALL-NEXT:    movq %rsi, %rax
+; ALL-NEXT:    # kill: def $cl killed $cl killed $rcx
+; ALL-NEXT:    shldq %cl, %rdi, %rax
 ; ALL-NEXT:    retq
 entry:
 	%0 = shl i64 %y, %z
@@ -36,9 +38,10 @@ entry:
 define i64 @un(i64 %x, i64 %y, i64 %z) nounwind readnone {
 ; ALL-LABEL: un:
 ; ALL:       # %bb.0: # %entry
-; ALL-NEXT:    movl %edx, %ecx
-; ALL-NEXT:    rorq %cl, %rdi
+; ALL-NEXT:    movq %rdx, %rcx
 ; ALL-NEXT:    movq %rdi, %rax
+; ALL-NEXT:    # kill: def $cl killed $cl killed $rcx
+; ALL-NEXT:    rorq %cl, %rax
 ; ALL-NEXT:    retq
 entry:
 	%0 = lshr i64 %x, %z
@@ -51,9 +54,10 @@ entry:
 define i64 @bu(i64 %x, i64 %y, i64 %z) nounwind readnone {
 ; ALL-LABEL: bu:
 ; ALL:       # %bb.0: # %entry
-; ALL-NEXT:    movl %edx, %ecx
-; ALL-NEXT:    shrdq %cl, %rdi, %rsi
+; ALL-NEXT:    movq %rdx, %rcx
 ; ALL-NEXT:    movq %rsi, %rax
+; ALL-NEXT:    # kill: def $cl killed $cl killed $rcx
+; ALL-NEXT:    shrdq %cl, %rdi, %rax
 ; ALL-NEXT:    retq
 entry:
 	%0 = lshr i64 %y, %z
@@ -66,14 +70,14 @@ entry:
 define i64 @xfoo(i64 %x, i64 %y, i64 %z) nounwind readnone {
 ; X64-LABEL: xfoo:
 ; X64:       # %bb.0: # %entry
-; X64-NEXT:    rolq $7, %rdi
 ; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    rolq $7, %rax
 ; X64-NEXT:    retq
 ;
 ; SHLD-LABEL: xfoo:
 ; SHLD:       # %bb.0: # %entry
-; SHLD-NEXT:    shldq $7, %rdi, %rdi
 ; SHLD-NEXT:    movq %rdi, %rax
+; SHLD-NEXT:    shldq $7, %rdi, %rax
 ; SHLD-NEXT:    retq
 ;
 ; BMI2-LABEL: xfoo:
@@ -115,8 +119,8 @@ entry:
 define i64 @xbar(i64 %x, i64 %y, i64 %z) nounwind readnone {
 ; ALL-LABEL: xbar:
 ; ALL:       # %bb.0: # %entry
-; ALL-NEXT:    shrdq $57, %rsi, %rdi
 ; ALL-NEXT:    movq %rdi, %rax
+; ALL-NEXT:    shrdq $57, %rsi, %rax
 ; ALL-NEXT:    retq
 entry:
 	%0 = shl i64 %y, 7
@@ -128,14 +132,14 @@ entry:
 define i64 @xun(i64 %x, i64 %y, i64 %z) nounwind readnone {
 ; X64-LABEL: xun:
 ; X64:       # %bb.0: # %entry
-; X64-NEXT:    rolq $57, %rdi
 ; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    rolq $57, %rax
 ; X64-NEXT:    retq
 ;
 ; SHLD-LABEL: xun:
 ; SHLD:       # %bb.0: # %entry
-; SHLD-NEXT:    shldq $57, %rdi, %rdi
 ; SHLD-NEXT:    movq %rdi, %rax
+; SHLD-NEXT:    shldq $57, %rdi, %rax
 ; SHLD-NEXT:    retq
 ;
 ; BMI2-LABEL: xun:
@@ -177,8 +181,8 @@ entry:
 define i64 @xbu(i64 %x, i64 %y, i64 %z) nounwind readnone {
 ; ALL-LABEL: xbu:
 ; ALL:       # %bb.0: # %entry
-; ALL-NEXT:    shldq $57, %rsi, %rdi
 ; ALL-NEXT:    movq %rdi, %rax
+; ALL-NEXT:    shldq $57, %rsi, %rax
 ; ALL-NEXT:    retq
 entry:
 	%0 = lshr i64 %y, 7

Modified: llvm/trunk/test/CodeGen/X86/rotate.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/rotate.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/rotate.ll (original)
+++ llvm/trunk/test/CodeGen/X86/rotate.ll Wed Sep 19 11:59:08 2018
@@ -43,8 +43,9 @@ define i64 @rotl64(i64 %A, i8 %Amt) noun
 ; X64-LABEL: rotl64:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movl %esi, %ecx
-; X64-NEXT:    rolq %cl, %rdi
 ; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    rolq %cl, %rax
 ; X64-NEXT:    retq
 	%shift.upgrd.1 = zext i8 %Amt to i64
 	%B = shl i64 %A, %shift.upgrd.1
@@ -96,8 +97,9 @@ define i64 @rotr64(i64 %A, i8 %Amt) noun
 ; X64-LABEL: rotr64:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movl %esi, %ecx
-; X64-NEXT:    rorq %cl, %rdi
 ; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    rorq %cl, %rax
 ; X64-NEXT:    retq
 	%shift.upgrd.3 = zext i8 %Amt to i64
 	%B = lshr i64 %A, %shift.upgrd.3
@@ -120,8 +122,8 @@ define i64 @rotli64(i64 %A) nounwind {
 ;
 ; X64-LABEL: rotli64:
 ; X64:       # %bb.0:
-; X64-NEXT:    rolq $5, %rdi
 ; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    rolq $5, %rax
 ; X64-NEXT:    retq
 	%B = shl i64 %A, 5
 	%C = lshr i64 %A, 59
@@ -141,8 +143,8 @@ define i64 @rotri64(i64 %A) nounwind {
 ;
 ; X64-LABEL: rotri64:
 ; X64:       # %bb.0:
-; X64-NEXT:    rolq $59, %rdi
 ; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    rolq $59, %rax
 ; X64-NEXT:    retq
 	%B = lshr i64 %A, 5
 	%C = shl i64 %A, 59
@@ -162,8 +164,8 @@ define i64 @rotl1_64(i64 %A) nounwind {
 ;
 ; X64-LABEL: rotl1_64:
 ; X64:       # %bb.0:
-; X64-NEXT:    rolq %rdi
 ; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    rolq %rax
 ; X64-NEXT:    retq
 	%B = shl i64 %A, 1
 	%C = lshr i64 %A, 63
@@ -183,8 +185,8 @@ define i64 @rotr1_64(i64 %A) nounwind {
 ;
 ; X64-LABEL: rotr1_64:
 ; X64:       # %bb.0:
-; X64-NEXT:    rorq %rdi
 ; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    rorq %rax
 ; X64-NEXT:    retq
 	%B = shl i64 %A, 63
 	%C = lshr i64 %A, 1
@@ -203,8 +205,9 @@ define i32 @rotl32(i32 %A, i8 %Amt) noun
 ; X64-LABEL: rotl32:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movl %esi, %ecx
-; X64-NEXT:    roll %cl, %edi
 ; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    roll %cl, %eax
 ; X64-NEXT:    retq
 	%shift.upgrd.1 = zext i8 %Amt to i32
 	%B = shl i32 %A, %shift.upgrd.1
@@ -226,8 +229,9 @@ define i32 @rotr32(i32 %A, i8 %Amt) noun
 ; X64-LABEL: rotr32:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movl %esi, %ecx
-; X64-NEXT:    rorl %cl, %edi
 ; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    rorl %cl, %eax
 ; X64-NEXT:    retq
 	%shift.upgrd.3 = zext i8 %Amt to i32
 	%B = lshr i32 %A, %shift.upgrd.3
@@ -247,8 +251,8 @@ define i32 @rotli32(i32 %A) nounwind {
 ;
 ; X64-LABEL: rotli32:
 ; X64:       # %bb.0:
-; X64-NEXT:    roll $5, %edi
 ; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    roll $5, %eax
 ; X64-NEXT:    retq
 	%B = shl i32 %A, 5
 	%C = lshr i32 %A, 27
@@ -265,8 +269,8 @@ define i32 @rotri32(i32 %A) nounwind {
 ;
 ; X64-LABEL: rotri32:
 ; X64:       # %bb.0:
-; X64-NEXT:    roll $27, %edi
 ; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    roll $27, %eax
 ; X64-NEXT:    retq
 	%B = lshr i32 %A, 5
 	%C = shl i32 %A, 27
@@ -283,8 +287,8 @@ define i32 @rotl1_32(i32 %A) nounwind {
 ;
 ; X64-LABEL: rotl1_32:
 ; X64:       # %bb.0:
-; X64-NEXT:    roll %edi
 ; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    roll %eax
 ; X64-NEXT:    retq
 	%B = shl i32 %A, 1
 	%C = lshr i32 %A, 31
@@ -301,8 +305,8 @@ define i32 @rotr1_32(i32 %A) nounwind {
 ;
 ; X64-LABEL: rotr1_32:
 ; X64:       # %bb.0:
-; X64-NEXT:    rorl %edi
 ; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    rorl %eax
 ; X64-NEXT:    retq
 	%B = shl i32 %A, 31
 	%C = lshr i32 %A, 1
@@ -321,8 +325,10 @@ define i16 @rotl16(i16 %A, i8 %Amt) noun
 ; X64-LABEL: rotl16:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movl %esi, %ecx
-; X64-NEXT:    rolw %cl, %di
 ; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    rolw %cl, %ax
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    retq
 	%shift.upgrd.5 = zext i8 %Amt to i16
 	%B = shl i16 %A, %shift.upgrd.5
@@ -344,8 +350,10 @@ define i16 @rotr16(i16 %A, i8 %Amt) noun
 ; X64-LABEL: rotr16:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movl %esi, %ecx
-; X64-NEXT:    rorw %cl, %di
 ; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    rorw %cl, %ax
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    retq
 	%shift.upgrd.7 = zext i8 %Amt to i16
 	%B = lshr i16 %A, %shift.upgrd.7
@@ -365,8 +373,9 @@ define i16 @rotli16(i16 %A) nounwind {
 ;
 ; X64-LABEL: rotli16:
 ; X64:       # %bb.0:
-; X64-NEXT:    rolw $5, %di
 ; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    rolw $5, %ax
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    retq
 	%B = shl i16 %A, 5
 	%C = lshr i16 %A, 11
@@ -383,8 +392,9 @@ define i16 @rotri16(i16 %A) nounwind {
 ;
 ; X64-LABEL: rotri16:
 ; X64:       # %bb.0:
-; X64-NEXT:    rolw $11, %di
 ; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    rolw $11, %ax
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    retq
 	%B = lshr i16 %A, 5
 	%C = shl i16 %A, 11
@@ -401,8 +411,9 @@ define i16 @rotl1_16(i16 %A) nounwind {
 ;
 ; X64-LABEL: rotl1_16:
 ; X64:       # %bb.0:
-; X64-NEXT:    rolw %di
 ; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    rolw %ax
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    retq
 	%B = shl i16 %A, 1
 	%C = lshr i16 %A, 15
@@ -419,8 +430,9 @@ define i16 @rotr1_16(i16 %A) nounwind {
 ;
 ; X64-LABEL: rotr1_16:
 ; X64:       # %bb.0:
-; X64-NEXT:    rorw %di
 ; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    rorw %ax
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    retq
 	%B = lshr i16 %A, 1
 	%C = shl i16 %A, 15
@@ -439,8 +451,10 @@ define i8 @rotl8(i8 %A, i8 %Amt) nounwin
 ; X64-LABEL: rotl8:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movl %esi, %ecx
-; X64-NEXT:    rolb %cl, %dil
 ; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    rolb %cl, %al
+; X64-NEXT:    # kill: def $al killed $al killed $eax
 ; X64-NEXT:    retq
 	%B = shl i8 %A, %Amt
 	%Amt2 = sub i8 8, %Amt
@@ -460,8 +474,10 @@ define i8 @rotr8(i8 %A, i8 %Amt) nounwin
 ; X64-LABEL: rotr8:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movl %esi, %ecx
-; X64-NEXT:    rorb %cl, %dil
 ; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    rorb %cl, %al
+; X64-NEXT:    # kill: def $al killed $al killed $eax
 ; X64-NEXT:    retq
 	%B = lshr i8 %A, %Amt
 	%Amt2 = sub i8 8, %Amt
@@ -479,8 +495,9 @@ define i8 @rotli8(i8 %A) nounwind {
 ;
 ; X64-LABEL: rotli8:
 ; X64:       # %bb.0:
-; X64-NEXT:    rolb $5, %dil
 ; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    rolb $5, %al
+; X64-NEXT:    # kill: def $al killed $al killed $eax
 ; X64-NEXT:    retq
 	%B = shl i8 %A, 5
 	%C = lshr i8 %A, 3
@@ -497,8 +514,9 @@ define i8 @rotri8(i8 %A) nounwind {
 ;
 ; X64-LABEL: rotri8:
 ; X64:       # %bb.0:
-; X64-NEXT:    rolb $3, %dil
 ; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    rolb $3, %al
+; X64-NEXT:    # kill: def $al killed $al killed $eax
 ; X64-NEXT:    retq
 	%B = lshr i8 %A, 5
 	%C = shl i8 %A, 3
@@ -515,8 +533,9 @@ define i8 @rotl1_8(i8 %A) nounwind {
 ;
 ; X64-LABEL: rotl1_8:
 ; X64:       # %bb.0:
-; X64-NEXT:    rolb %dil
 ; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    rolb %al
+; X64-NEXT:    # kill: def $al killed $al killed $eax
 ; X64-NEXT:    retq
 	%B = shl i8 %A, 1
 	%C = lshr i8 %A, 7
@@ -533,8 +552,9 @@ define i8 @rotr1_8(i8 %A) nounwind {
 ;
 ; X64-LABEL: rotr1_8:
 ; X64:       # %bb.0:
-; X64-NEXT:    rorb %dil
 ; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    rorb %al
+; X64-NEXT:    # kill: def $al killed $al killed $eax
 ; X64-NEXT:    retq
 	%B = lshr i8 %A, 1
 	%C = shl i8 %A, 7
@@ -665,6 +685,7 @@ define i64 @truncated_rot(i64 %x, i32 %a
 ; X64-LABEL: truncated_rot:
 ; X64:       # %bb.0: # %entry
 ; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NEXT:    rolq %cl, %rdi
 ; X64-NEXT:    movl %edi, %eax
 ; X64-NEXT:    retq

Modified: llvm/trunk/test/CodeGen/X86/rotate2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/rotate2.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/rotate2.ll (original)
+++ llvm/trunk/test/CodeGen/X86/rotate2.ll Wed Sep 19 11:59:08 2018
@@ -14,8 +14,8 @@ define i64 @test1(i64 %x) nounwind  {
 ;
 ; X64-LABEL: test1:
 ; X64:       # %bb.0: # %entry
-; X64-NEXT:    rolq $9, %rdi
 ; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    rolq $9, %rax
 ; X64-NEXT:    retq
 entry:
 	%tmp2 = lshr i64 %x, 55		; <i64> [#uses=1]
@@ -34,9 +34,8 @@ define i64 @test2(i32 %x) nounwind  {
 ;
 ; X64-LABEL: test2:
 ; X64:       # %bb.0: # %entry
-; X64-NEXT:    # kill: def $edi killed $edi def $rdi
-; X64-NEXT:    roll $10, %edi
-; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    roll $10, %eax
 ; X64-NEXT:    retq
 entry:
 	%tmp2 = lshr i32 %x, 22		; <i32> [#uses=1]

Modified: llvm/trunk/test/CodeGen/X86/rotate4.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/rotate4.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/rotate4.ll (original)
+++ llvm/trunk/test/CodeGen/X86/rotate4.ll Wed Sep 19 11:59:08 2018
@@ -16,8 +16,9 @@ define i32 @rotate_left_32(i32 %a, i32 %
 ; X64-LABEL: rotate_left_32:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movl %esi, %ecx
-; X64-NEXT:    roll %cl, %edi
 ; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    roll %cl, %eax
 ; X64-NEXT:    retq
   %and = and i32 %b, 31
   %shl = shl i32 %a, %and
@@ -39,8 +40,9 @@ define i32 @rotate_right_32(i32 %a, i32
 ; X64-LABEL: rotate_right_32:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movl %esi, %ecx
-; X64-NEXT:    rorl %cl, %edi
 ; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    rorl %cl, %eax
 ; X64-NEXT:    retq
   %and = and i32 %b, 31
   %shl = lshr i32 %a, %and
@@ -98,9 +100,10 @@ define i64 @rotate_left_64(i64 %a, i64 %
 ;
 ; X64-LABEL: rotate_left_64:
 ; X64:       # %bb.0:
-; X64-NEXT:    movl %esi, %ecx
-; X64-NEXT:    rolq %cl, %rdi
+; X64-NEXT:    movq %rsi, %rcx
 ; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-NEXT:    rolq %cl, %rax
 ; X64-NEXT:    retq
   %and = and i64 %b, 63
   %shl = shl i64 %a, %and
@@ -158,9 +161,10 @@ define i64 @rotate_right_64(i64 %a, i64
 ;
 ; X64-LABEL: rotate_right_64:
 ; X64:       # %bb.0:
-; X64-NEXT:    movl %esi, %ecx
-; X64-NEXT:    rorq %cl, %rdi
+; X64-NEXT:    movq %rsi, %rcx
 ; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-NEXT:    rorq %cl, %rax
 ; X64-NEXT:    retq
   %and = and i64 %b, 63
   %shl = lshr i64 %a, %and
@@ -184,6 +188,7 @@ define void @rotate_left_m32(i32 *%pa, i
 ; X64-LABEL: rotate_left_m32:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NEXT:    roll %cl, (%rdi)
 ; X64-NEXT:    retq
   %a = load i32, i32* %pa, align 16
@@ -208,6 +213,7 @@ define void @rotate_right_m32(i32 *%pa,
 ; X64-LABEL: rotate_right_m32:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NEXT:    rorl %cl, (%rdi)
 ; X64-NEXT:    retq
   %a = load i32, i32* %pa, align 16
@@ -276,7 +282,8 @@ define void @rotate_left_m64(i64 *%pa, i
 ;
 ; X64-LABEL: rotate_left_m64:
 ; X64:       # %bb.0:
-; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    movq %rsi, %rcx
+; X64-NEXT:    # kill: def $cl killed $cl killed $rcx
 ; X64-NEXT:    rolq %cl, (%rdi)
 ; X64-NEXT:    retq
   %a = load i64, i64* %pa, align 16
@@ -345,7 +352,8 @@ define void @rotate_right_m64(i64 *%pa,
 ;
 ; X64-LABEL: rotate_right_m64:
 ; X64:       # %bb.0:
-; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    movq %rsi, %rcx
+; X64-NEXT:    # kill: def $cl killed $cl killed $rcx
 ; X64-NEXT:    rorq %cl, (%rdi)
 ; X64-NEXT:    retq
   %a = load i64, i64* %pa, align 16
@@ -373,8 +381,10 @@ define i8 @rotate_left_8(i8 %x, i32 %amo
 ; X64-LABEL: rotate_left_8:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movl %esi, %ecx
-; X64-NEXT:    rolb %cl, %dil
 ; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    rolb %cl, %al
+; X64-NEXT:    # kill: def $al killed $al killed $eax
 ; X64-NEXT:    retq
   %amt = trunc i32 %amount to i8
   %sub = sub i8 0, %amt
@@ -397,8 +407,10 @@ define i8 @rotate_right_8(i8 %x, i32 %am
 ; X64-LABEL: rotate_right_8:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movl %esi, %ecx
-; X64-NEXT:    rorb %cl, %dil
 ; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    rorb %cl, %al
+; X64-NEXT:    # kill: def $al killed $al killed $eax
 ; X64-NEXT:    retq
   %amt = trunc i32 %amount to i8
   %sub = sub i8 0, %amt
@@ -421,8 +433,10 @@ define i16 @rotate_left_16(i16 %x, i32 %
 ; X64-LABEL: rotate_left_16:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movl %esi, %ecx
-; X64-NEXT:    rolw %cl, %di
 ; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    rolw %cl, %ax
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    retq
   %amt = trunc i32 %amount to i16
   %sub = sub i16 0, %amt
@@ -445,8 +459,10 @@ define i16 @rotate_right_16(i16 %x, i32
 ; X64-LABEL: rotate_right_16:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movl %esi, %ecx
-; X64-NEXT:    rorw %cl, %di
 ; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    rorw %cl, %ax
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    retq
   %amt = trunc i32 %amount to i16
   %sub = sub i16 0, %amt
@@ -469,6 +485,7 @@ define void @rotate_left_m8(i8* %p, i32
 ; X64-LABEL: rotate_left_m8:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NEXT:    rolb %cl, (%rdi)
 ; X64-NEXT:    retq
   %x = load i8, i8* %p, align 1
@@ -494,6 +511,7 @@ define void @rotate_right_m8(i8* %p, i32
 ; X64-LABEL: rotate_right_m8:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NEXT:    rorb %cl, (%rdi)
 ; X64-NEXT:    retq
   %x = load i8, i8* %p, align 1
@@ -519,6 +537,7 @@ define void @rotate_left_m16(i16* %p, i3
 ; X64-LABEL: rotate_left_m16:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NEXT:    rolw %cl, (%rdi)
 ; X64-NEXT:    retq
   %x = load i16, i16* %p, align 1
@@ -544,6 +563,7 @@ define void @rotate_right_m16(i16* %p, i
 ; X64-LABEL: rotate_right_m16:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NEXT:    rorw %cl, (%rdi)
 ; X64-NEXT:    retq
   %x = load i16, i16* %p, align 1
@@ -569,10 +589,11 @@ define i32 @rotate_demanded_bits(i32, i3
 ;
 ; X64-LABEL: rotate_demanded_bits:
 ; X64:       # %bb.0:
-; X64-NEXT:    andb $30, %sil
 ; X64-NEXT:    movl %esi, %ecx
-; X64-NEXT:    roll %cl, %edi
 ; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    andb $30, %cl
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    roll %cl, %eax
 ; X64-NEXT:    retq
   %3 = and i32 %1, 30
   %4 = shl i32 %0, %3
@@ -594,10 +615,11 @@ define i32 @rotate_demanded_bits_2(i32,
 ;
 ; X64-LABEL: rotate_demanded_bits_2:
 ; X64:       # %bb.0:
-; X64-NEXT:    andb $23, %sil
 ; X64-NEXT:    movl %esi, %ecx
-; X64-NEXT:    roll %cl, %edi
 ; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    andb $23, %cl
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    roll %cl, %eax
 ; X64-NEXT:    retq
   %3 = and i32 %1, 23
   %4 = shl i32 %0, %3
@@ -620,11 +642,12 @@ define i32 @rotate_demanded_bits_3(i32,
 ;
 ; X64-LABEL: rotate_demanded_bits_3:
 ; X64:       # %bb.0:
-; X64-NEXT:    addb %sil, %sil
-; X64-NEXT:    andb $30, %sil
 ; X64-NEXT:    movl %esi, %ecx
-; X64-NEXT:    roll %cl, %edi
 ; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    addb %cl, %cl
+; X64-NEXT:    andb $30, %cl
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    roll %cl, %eax
 ; X64-NEXT:    retq
   %3 = shl i32 %1, 1
   %4 = and i32 %3, 30

Modified: llvm/trunk/test/CodeGen/X86/sar_fold64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sar_fold64.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sar_fold64.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sar_fold64.ll Wed Sep 19 11:59:08 2018
@@ -56,9 +56,10 @@ define i32 @shl56sar57(i64 %a) #0 {
 define i8 @all_sign_bit_ashr(i8 %x) {
 ; CHECK-LABEL: all_sign_bit_ashr:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    andb $1, %dil
-; CHECK-NEXT:    negb %dil
 ; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    andb $1, %al
+; CHECK-NEXT:    negb %al
+; CHECK-NEXT:    # kill: def $al killed $al killed $eax
 ; CHECK-NEXT:    retq
   %and = and i8 %x, 1
   %neg = sub i8 0, %and

Modified: llvm/trunk/test/CodeGen/X86/sat-add.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sat-add.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sat-add.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sat-add.ll Wed Sep 19 11:59:08 2018
@@ -10,13 +10,14 @@
 define i8 @unsigned_sat_constant_i8_using_min(i8 %x) {
 ; ANY-LABEL: unsigned_sat_constant_i8_using_min:
 ; ANY:       # %bb.0:
-; ANY-NEXT:    cmpb $-43, %dil
+; ANY-NEXT:    movl %edi, %eax
+; ANY-NEXT:    cmpb $-43, %al
 ; ANY-NEXT:    jb .LBB0_2
 ; ANY-NEXT:  # %bb.1:
-; ANY-NEXT:    movb $-43, %dil
+; ANY-NEXT:    movb $-43, %al
 ; ANY-NEXT:  .LBB0_2:
-; ANY-NEXT:    addb $42, %dil
-; ANY-NEXT:    movl %edi, %eax
+; ANY-NEXT:    addb $42, %al
+; ANY-NEXT:    # kill: def $al killed $al killed $eax
 ; ANY-NEXT:    retq
   %c = icmp ult i8 %x, -43
   %s = select i1 %c, i8 %x, i8 -43
@@ -190,15 +191,16 @@ define i64 @unsigned_sat_constant_i64_us
 define i8 @unsigned_sat_variable_i8_using_min(i8 %x, i8 %y) {
 ; ANY-LABEL: unsigned_sat_variable_i8_using_min:
 ; ANY:       # %bb.0:
-; ANY-NEXT:    movl %esi, %eax
-; ANY-NEXT:    notb %al
-; ANY-NEXT:    cmpb %al, %dil
+; ANY-NEXT:    movl %edi, %eax
+; ANY-NEXT:    movl %esi, %ecx
+; ANY-NEXT:    notb %cl
+; ANY-NEXT:    cmpb %cl, %al
 ; ANY-NEXT:    jb .LBB12_2
 ; ANY-NEXT:  # %bb.1:
-; ANY-NEXT:    movl %eax, %edi
+; ANY-NEXT:    movl %ecx, %eax
 ; ANY-NEXT:  .LBB12_2:
-; ANY-NEXT:    addb %sil, %dil
-; ANY-NEXT:    movl %edi, %eax
+; ANY-NEXT:    addb %sil, %al
+; ANY-NEXT:    # kill: def $al killed $al killed $eax
 ; ANY-NEXT:    retq
   %noty = xor i8 %y, -1
   %c = icmp ult i8 %x, %noty

Modified: llvm/trunk/test/CodeGen/X86/scalar_widen_div.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/scalar_widen_div.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/scalar_widen_div.ll (original)
+++ llvm/trunk/test/CodeGen/X86/scalar_widen_div.ll Wed Sep 19 11:59:08 2018
@@ -56,20 +56,21 @@ entry:
 define <3 x i8> @test_char_div(<3 x i8> %num, <3 x i8> %div) {
 ; CHECK-LABEL: test_char_div:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    movl %edx, %r10d
 ; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    # kill: def $al killed $al killed $eax
 ; CHECK-NEXT:    cbtw
 ; CHECK-NEXT:    idivb %cl
 ; CHECK-NEXT:    movl %eax, %edi
 ; CHECK-NEXT:    movl %esi, %eax
 ; CHECK-NEXT:    cbtw
 ; CHECK-NEXT:    idivb %r8b
-; CHECK-NEXT:    movl %eax, %esi
-; CHECK-NEXT:    movl %edx, %eax
+; CHECK-NEXT:    movl %eax, %edx
+; CHECK-NEXT:    movl %r10d, %eax
 ; CHECK-NEXT:    cbtw
 ; CHECK-NEXT:    idivb %r9b
 ; CHECK-NEXT:    movl %eax, %ecx
 ; CHECK-NEXT:    movl %edi, %eax
-; CHECK-NEXT:    movl %esi, %edx
 ; CHECK-NEXT:    retq
   %div.r = sdiv <3 x i8> %num, %div
   ret <3 x i8>  %div.r
@@ -232,8 +233,8 @@ define <3 x i64> @test_ulong_div(<3 x i6
 ; CHECK-LABEL: test_ulong_div:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movq %rdx, %r10
-; CHECK-NEXT:    xorl %edx, %edx
 ; CHECK-NEXT:    movq %rdi, %rax
+; CHECK-NEXT:    xorl %edx, %edx
 ; CHECK-NEXT:    divq %rcx
 ; CHECK-NEXT:    movq %rax, %rcx
 ; CHECK-NEXT:    xorl %edx, %edx

Modified: llvm/trunk/test/CodeGen/X86/schedule-x86-64-shld.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/schedule-x86-64-shld.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/schedule-x86-64-shld.ll (original)
+++ llvm/trunk/test/CodeGen/X86/schedule-x86-64-shld.ll Wed Sep 19 11:59:08 2018
@@ -12,20 +12,20 @@
 define i64 @lshift10_optsize(i64 %a, i64 %b) nounwind readnone optsize {
 ; GENERIC-LABEL: lshift10_optsize:
 ; GENERIC:       # %bb.0: # %entry
-; GENERIC-NEXT:    shldq $10, %rsi, %rdi # sched: [2:0.67]
 ; GENERIC-NEXT:    movq %rdi, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    shldq $10, %rsi, %rax # sched: [2:0.67]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; BTVER2-LABEL: lshift10_optsize:
 ; BTVER2:       # %bb.0: # %entry
-; BTVER2-NEXT:    shldq $10, %rsi, %rdi # sched: [3:3.00]
 ; BTVER2-NEXT:    movq %rdi, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    shldq $10, %rsi, %rax # sched: [3:3.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; BDVER1-LABEL: lshift10_optsize:
 ; BDVER1:       # %bb.0: # %entry
-; BDVER1-NEXT:    shldq $10, %rsi, %rdi
 ; BDVER1-NEXT:    movq %rdi, %rax
+; BDVER1-NEXT:    shldq $10, %rsi, %rax
 ; BDVER1-NEXT:    retq
 entry:
   %shl = shl i64 %a, 10
@@ -37,8 +37,8 @@ entry:
 define i64 @lshift10(i64 %a, i64 %b) nounwind readnone {
 ; GENERIC-LABEL: lshift10:
 ; GENERIC:       # %bb.0: # %entry
-; GENERIC-NEXT:    shldq $10, %rsi, %rdi # sched: [2:0.67]
 ; GENERIC-NEXT:    movq %rdi, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    shldq $10, %rsi, %rax # sched: [2:0.67]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; BTVER2-LABEL: lshift10:
@@ -70,20 +70,20 @@ entry:
 define i64 @rshift10_optsize(i64 %a, i64 %b) nounwind readnone optsize {
 ; GENERIC-LABEL: rshift10_optsize:
 ; GENERIC:       # %bb.0: # %entry
-; GENERIC-NEXT:    shrdq $62, %rsi, %rdi # sched: [2:0.67]
 ; GENERIC-NEXT:    movq %rdi, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    shrdq $62, %rsi, %rax # sched: [2:0.67]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; BTVER2-LABEL: rshift10_optsize:
 ; BTVER2:       # %bb.0: # %entry
-; BTVER2-NEXT:    shrdq $62, %rsi, %rdi # sched: [3:3.00]
 ; BTVER2-NEXT:    movq %rdi, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    shrdq $62, %rsi, %rax # sched: [3:3.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; BDVER1-LABEL: rshift10_optsize:
 ; BDVER1:       # %bb.0: # %entry
-; BDVER1-NEXT:    shrdq $62, %rsi, %rdi
 ; BDVER1-NEXT:    movq %rdi, %rax
+; BDVER1-NEXT:    shrdq $62, %rsi, %rax
 ; BDVER1-NEXT:    retq
 entry:
   %shl = lshr i64 %a, 62
@@ -96,8 +96,8 @@ entry:
 define i64 @rshift10(i64 %a, i64 %b) nounwind readnone {
 ; GENERIC-LABEL: rshift10:
 ; GENERIC:       # %bb.0: # %entry
-; GENERIC-NEXT:    shrdq $62, %rsi, %rdi # sched: [2:0.67]
 ; GENERIC-NEXT:    movq %rdi, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    shrdq $62, %rsi, %rax # sched: [2:0.67]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; BTVER2-LABEL: rshift10:
@@ -126,23 +126,26 @@ entry:
 define i64 @lshift_cl_optsize(i64 %a, i64 %b, i64 %c) nounwind readnone optsize {
 ; GENERIC-LABEL: lshift_cl_optsize:
 ; GENERIC:       # %bb.0: # %entry
-; GENERIC-NEXT:    movl %edx, %ecx # sched: [1:0.33]
-; GENERIC-NEXT:    shldq %cl, %rsi, %rdi # sched: [4:1.50]
+; GENERIC-NEXT:    movq %rdx, %rcx # sched: [1:0.33]
 ; GENERIC-NEXT:    movq %rdi, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    # kill: def $cl killed $cl killed $rcx
+; GENERIC-NEXT:    shldq %cl, %rsi, %rax # sched: [4:1.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; BTVER2-LABEL: lshift_cl_optsize:
 ; BTVER2:       # %bb.0: # %entry
-; BTVER2-NEXT:    movl %edx, %ecx # sched: [1:0.50]
-; BTVER2-NEXT:    shldq %cl, %rsi, %rdi # sched: [4:4.00]
+; BTVER2-NEXT:    movq %rdx, %rcx # sched: [1:0.50]
 ; BTVER2-NEXT:    movq %rdi, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    # kill: def $cl killed $cl killed $rcx
+; BTVER2-NEXT:    shldq %cl, %rsi, %rax # sched: [4:4.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; BDVER1-LABEL: lshift_cl_optsize:
 ; BDVER1:       # %bb.0: # %entry
-; BDVER1-NEXT:    movl %edx, %ecx
-; BDVER1-NEXT:    shldq %cl, %rsi, %rdi
+; BDVER1-NEXT:    movq %rdx, %rcx
 ; BDVER1-NEXT:    movq %rdi, %rax
+; BDVER1-NEXT:    # kill: def $cl killed $cl killed $rcx
+; BDVER1-NEXT:    shldq %cl, %rsi, %rax
 ; BDVER1-NEXT:    retq
 entry:
   %shl = shl i64 %a, %c
@@ -155,31 +158,32 @@ entry:
 define i64 @lshift_cl(i64 %a, i64 %b, i64 %c) nounwind readnone {
 ; GENERIC-LABEL: lshift_cl:
 ; GENERIC:       # %bb.0: # %entry
-; GENERIC-NEXT:    movl %edx, %ecx # sched: [1:0.33]
-; GENERIC-NEXT:    shldq %cl, %rsi, %rdi # sched: [4:1.50]
+; GENERIC-NEXT:    movq %rdx, %rcx # sched: [1:0.33]
 ; GENERIC-NEXT:    movq %rdi, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    # kill: def $cl killed $cl killed $rcx
+; GENERIC-NEXT:    shldq %cl, %rsi, %rax # sched: [4:1.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; BTVER2-LABEL: lshift_cl:
 ; BTVER2:       # %bb.0: # %entry
 ; BTVER2-NEXT:    movq %rdx, %rcx # sched: [1:0.50]
+; BTVER2-NEXT:    movq %rsi, %rax # sched: [1:0.50]
 ; BTVER2-NEXT:    shlq %cl, %rdi # sched: [1:0.50]
 ; BTVER2-NEXT:    negl %ecx # sched: [1:0.50]
 ; BTVER2-NEXT:    # kill: def $cl killed $cl killed $rcx
-; BTVER2-NEXT:    shrq %cl, %rsi # sched: [1:0.50]
-; BTVER2-NEXT:    orq %rdi, %rsi # sched: [1:0.50]
-; BTVER2-NEXT:    movq %rsi, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    shrq %cl, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    orq %rdi, %rax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; BDVER1-LABEL: lshift_cl:
 ; BDVER1:       # %bb.0: # %entry
 ; BDVER1-NEXT:    movq %rdx, %rcx
+; BDVER1-NEXT:    movq %rsi, %rax
 ; BDVER1-NEXT:    shlq %cl, %rdi
 ; BDVER1-NEXT:    negl %ecx
 ; BDVER1-NEXT:    # kill: def $cl killed $cl killed $rcx
-; BDVER1-NEXT:    shrq %cl, %rsi
-; BDVER1-NEXT:    orq %rdi, %rsi
-; BDVER1-NEXT:    movq %rsi, %rax
+; BDVER1-NEXT:    shrq %cl, %rax
+; BDVER1-NEXT:    orq %rdi, %rax
 ; BDVER1-NEXT:    retq
 entry:
   %shl = shl i64 %a, %c
@@ -198,23 +202,26 @@ entry:
 define i64 @rshift_cl_optsize(i64 %a, i64 %b, i64 %c) nounwind readnone optsize {
 ; GENERIC-LABEL: rshift_cl_optsize:
 ; GENERIC:       # %bb.0: # %entry
-; GENERIC-NEXT:    movl %edx, %ecx # sched: [1:0.33]
-; GENERIC-NEXT:    shrdq %cl, %rsi, %rdi # sched: [4:1.50]
+; GENERIC-NEXT:    movq %rdx, %rcx # sched: [1:0.33]
 ; GENERIC-NEXT:    movq %rdi, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    # kill: def $cl killed $cl killed $rcx
+; GENERIC-NEXT:    shrdq %cl, %rsi, %rax # sched: [4:1.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; BTVER2-LABEL: rshift_cl_optsize:
 ; BTVER2:       # %bb.0: # %entry
-; BTVER2-NEXT:    movl %edx, %ecx # sched: [1:0.50]
-; BTVER2-NEXT:    shrdq %cl, %rsi, %rdi # sched: [4:4.00]
+; BTVER2-NEXT:    movq %rdx, %rcx # sched: [1:0.50]
 ; BTVER2-NEXT:    movq %rdi, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    # kill: def $cl killed $cl killed $rcx
+; BTVER2-NEXT:    shrdq %cl, %rsi, %rax # sched: [4:4.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; BDVER1-LABEL: rshift_cl_optsize:
 ; BDVER1:       # %bb.0: # %entry
-; BDVER1-NEXT:    movl %edx, %ecx
-; BDVER1-NEXT:    shrdq %cl, %rsi, %rdi
+; BDVER1-NEXT:    movq %rdx, %rcx
 ; BDVER1-NEXT:    movq %rdi, %rax
+; BDVER1-NEXT:    # kill: def $cl killed $cl killed $rcx
+; BDVER1-NEXT:    shrdq %cl, %rsi, %rax
 ; BDVER1-NEXT:    retq
 entry:
   %shr = lshr i64 %a, %c
@@ -227,31 +234,32 @@ entry:
 define i64 @rshift_cl(i64 %a, i64 %b, i64 %c) nounwind readnone {
 ; GENERIC-LABEL: rshift_cl:
 ; GENERIC:       # %bb.0: # %entry
-; GENERIC-NEXT:    movl %edx, %ecx # sched: [1:0.33]
-; GENERIC-NEXT:    shrdq %cl, %rsi, %rdi # sched: [4:1.50]
+; GENERIC-NEXT:    movq %rdx, %rcx # sched: [1:0.33]
 ; GENERIC-NEXT:    movq %rdi, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    # kill: def $cl killed $cl killed $rcx
+; GENERIC-NEXT:    shrdq %cl, %rsi, %rax # sched: [4:1.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; BTVER2-LABEL: rshift_cl:
 ; BTVER2:       # %bb.0: # %entry
 ; BTVER2-NEXT:    movq %rdx, %rcx # sched: [1:0.50]
+; BTVER2-NEXT:    movq %rsi, %rax # sched: [1:0.50]
 ; BTVER2-NEXT:    shrq %cl, %rdi # sched: [1:0.50]
 ; BTVER2-NEXT:    negl %ecx # sched: [1:0.50]
 ; BTVER2-NEXT:    # kill: def $cl killed $cl killed $rcx
-; BTVER2-NEXT:    shlq %cl, %rsi # sched: [1:0.50]
-; BTVER2-NEXT:    orq %rdi, %rsi # sched: [1:0.50]
-; BTVER2-NEXT:    movq %rsi, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    shlq %cl, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    orq %rdi, %rax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; BDVER1-LABEL: rshift_cl:
 ; BDVER1:       # %bb.0: # %entry
 ; BDVER1-NEXT:    movq %rdx, %rcx
+; BDVER1-NEXT:    movq %rsi, %rax
 ; BDVER1-NEXT:    shrq %cl, %rdi
 ; BDVER1-NEXT:    negl %ecx
 ; BDVER1-NEXT:    # kill: def $cl killed $cl killed $rcx
-; BDVER1-NEXT:    shlq %cl, %rsi
-; BDVER1-NEXT:    orq %rdi, %rsi
-; BDVER1-NEXT:    movq %rsi, %rax
+; BDVER1-NEXT:    shlq %cl, %rax
+; BDVER1-NEXT:    orq %rdi, %rax
 ; BDVER1-NEXT:    retq
 entry:
   %shr = lshr i64 %a, %c
@@ -271,19 +279,22 @@ entry:
 define void @lshift_mem_cl_optsize(i64 %a, i64 %c) nounwind readnone optsize {
 ; GENERIC-LABEL: lshift_mem_cl_optsize:
 ; GENERIC:       # %bb.0: # %entry
-; GENERIC-NEXT:    movl %esi, %ecx # sched: [1:0.33]
+; GENERIC-NEXT:    movq %rsi, %rcx # sched: [1:0.33]
+; GENERIC-NEXT:    # kill: def $cl killed $cl killed $rcx
 ; GENERIC-NEXT:    shldq %cl, %rdi, {{.*}}(%rip) # sched: [10:1.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; BTVER2-LABEL: lshift_mem_cl_optsize:
 ; BTVER2:       # %bb.0: # %entry
-; BTVER2-NEXT:    movl %esi, %ecx # sched: [1:0.50]
+; BTVER2-NEXT:    movq %rsi, %rcx # sched: [1:0.50]
+; BTVER2-NEXT:    # kill: def $cl killed $cl killed $rcx
 ; BTVER2-NEXT:    shldq %cl, %rdi, {{.*}}(%rip) # sched: [9:11.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; BDVER1-LABEL: lshift_mem_cl_optsize:
 ; BDVER1:       # %bb.0: # %entry
-; BDVER1-NEXT:    movl %esi, %ecx
+; BDVER1-NEXT:    movq %rsi, %rcx
+; BDVER1-NEXT:    # kill: def $cl killed $cl killed $rcx
 ; BDVER1-NEXT:    shldq %cl, %rdi, {{.*}}(%rip)
 ; BDVER1-NEXT:    retq
 entry:
@@ -299,7 +310,8 @@ entry:
 define void @lshift_mem_cl(i64 %a, i64 %c) nounwind readnone {
 ; GENERIC-LABEL: lshift_mem_cl:
 ; GENERIC:       # %bb.0: # %entry
-; GENERIC-NEXT:    movl %esi, %ecx # sched: [1:0.33]
+; GENERIC-NEXT:    movq %rsi, %rcx # sched: [1:0.33]
+; GENERIC-NEXT:    # kill: def $cl killed $cl killed $rcx
 ; GENERIC-NEXT:    shldq %cl, %rdi, {{.*}}(%rip) # sched: [10:1.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;

Modified: llvm/trunk/test/CodeGen/X86/schedule-x86_64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/schedule-x86_64.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/schedule-x86_64.ll (original)
+++ llvm/trunk/test/CodeGen/X86/schedule-x86_64.ll Wed Sep 19 11:59:08 2018
@@ -2541,62 +2541,62 @@ define i64 @test_bsr64(i64 %a0, i64* %a1
 define i32 @test_bswap32(i32 %a0) optsize {
 ; GENERIC-LABEL: test_bswap32:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    bswapl %edi # sched: [1:1.00]
 ; GENERIC-NEXT:    movl %edi, %eax # sched: [1:0.33]
+; GENERIC-NEXT:    bswapl %eax # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_bswap32:
 ; ATOM:       # %bb.0:
-; ATOM-NEXT:    bswapl %edi # sched: [1:1.00]
 ; ATOM-NEXT:    movl %edi, %eax # sched: [1:0.50]
+; ATOM-NEXT:    bswapl %eax # sched: [1:1.00]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_bswap32:
 ; SLM:       # %bb.0:
-; SLM-NEXT:    bswapl %edi # sched: [1:0.50]
 ; SLM-NEXT:    movl %edi, %eax # sched: [1:0.50]
+; SLM-NEXT:    bswapl %eax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
 ; SANDY-LABEL: test_bswap32:
 ; SANDY:       # %bb.0:
-; SANDY-NEXT:    bswapl %edi # sched: [1:1.00]
 ; SANDY-NEXT:    movl %edi, %eax # sched: [1:0.33]
+; SANDY-NEXT:    bswapl %eax # sched: [1:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_bswap32:
 ; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    bswapl %edi # sched: [1:0.50]
 ; HASWELL-NEXT:    movl %edi, %eax # sched: [1:0.25]
+; HASWELL-NEXT:    bswapl %eax # sched: [1:0.50]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_bswap32:
 ; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    bswapl %edi # sched: [1:0.50]
 ; BROADWELL-NEXT:    movl %edi, %eax # sched: [1:0.25]
+; BROADWELL-NEXT:    bswapl %eax # sched: [1:0.50]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_bswap32:
 ; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    bswapl %edi # sched: [1:0.50]
 ; SKYLAKE-NEXT:    movl %edi, %eax # sched: [1:0.25]
+; SKYLAKE-NEXT:    bswapl %eax # sched: [1:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-LABEL: test_bswap32:
 ; SKX:       # %bb.0:
-; SKX-NEXT:    bswapl %edi # sched: [1:0.50]
 ; SKX-NEXT:    movl %edi, %eax # sched: [1:0.25]
+; SKX-NEXT:    bswapl %eax # sched: [1:0.50]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; BTVER2-LABEL: test_bswap32:
 ; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    bswapl %edi # sched: [1:0.50]
 ; BTVER2-NEXT:    movl %edi, %eax # sched: [1:0.50]
+; BTVER2-NEXT:    bswapl %eax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_bswap32:
 ; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    bswapl %edi # sched: [1:1.00]
 ; ZNVER1-NEXT:    movl %edi, %eax # sched: [1:0.25]
+; ZNVER1-NEXT:    bswapl %eax # sched: [1:1.00]
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   %1 = tail call i32 asm "bswap $0", "=r,0"(i32 %a0) nounwind
   ret i32 %1
@@ -2604,62 +2604,62 @@ define i32 @test_bswap32(i32 %a0) optsiz
 define i64 @test_bswap64(i64 %a0) optsize {
 ; GENERIC-LABEL: test_bswap64:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    bswapq %rdi # sched: [2:1.00]
 ; GENERIC-NEXT:    movq %rdi, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    bswapq %rax # sched: [2:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_bswap64:
 ; ATOM:       # %bb.0:
-; ATOM-NEXT:    bswapq %rdi # sched: [1:1.00]
 ; ATOM-NEXT:    movq %rdi, %rax # sched: [1:0.50]
+; ATOM-NEXT:    bswapq %rax # sched: [1:1.00]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_bswap64:
 ; SLM:       # %bb.0:
-; SLM-NEXT:    bswapq %rdi # sched: [1:0.50]
 ; SLM-NEXT:    movq %rdi, %rax # sched: [1:0.50]
+; SLM-NEXT:    bswapq %rax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
 ; SANDY-LABEL: test_bswap64:
 ; SANDY:       # %bb.0:
-; SANDY-NEXT:    bswapq %rdi # sched: [2:1.00]
 ; SANDY-NEXT:    movq %rdi, %rax # sched: [1:0.33]
+; SANDY-NEXT:    bswapq %rax # sched: [2:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_bswap64:
 ; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    bswapq %rdi # sched: [2:0.50]
 ; HASWELL-NEXT:    movq %rdi, %rax # sched: [1:0.25]
+; HASWELL-NEXT:    bswapq %rax # sched: [2:0.50]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_bswap64:
 ; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    bswapq %rdi # sched: [2:0.50]
 ; BROADWELL-NEXT:    movq %rdi, %rax # sched: [1:0.25]
+; BROADWELL-NEXT:    bswapq %rax # sched: [2:0.50]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_bswap64:
 ; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    bswapq %rdi # sched: [2:0.50]
 ; SKYLAKE-NEXT:    movq %rdi, %rax # sched: [1:0.25]
+; SKYLAKE-NEXT:    bswapq %rax # sched: [2:0.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-LABEL: test_bswap64:
 ; SKX:       # %bb.0:
-; SKX-NEXT:    bswapq %rdi # sched: [2:0.50]
 ; SKX-NEXT:    movq %rdi, %rax # sched: [1:0.25]
+; SKX-NEXT:    bswapq %rax # sched: [2:0.50]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; BTVER2-LABEL: test_bswap64:
 ; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    bswapq %rdi # sched: [1:0.50]
 ; BTVER2-NEXT:    movq %rdi, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    bswapq %rax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_bswap64:
 ; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    bswapq %rdi # sched: [1:1.00]
 ; ZNVER1-NEXT:    movq %rdi, %rax # sched: [1:0.25]
+; ZNVER1-NEXT:    bswapq %rax # sched: [1:1.00]
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   %1 = tail call i64 asm "bswap $0", "=r,0"(i64 %a0) nounwind
   ret i64 %1

Modified: llvm/trunk/test/CodeGen/X86/select.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/select.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/select.ll (original)
+++ llvm/trunk/test/CodeGen/X86/select.ll Wed Sep 19 11:59:08 2018
@@ -1062,16 +1062,18 @@ entry:
 define i8 @test18(i32 %x, i8 zeroext %a, i8 zeroext %b) nounwind {
 ; GENERIC-LABEL: test18:
 ; GENERIC:       ## %bb.0:
-; GENERIC-NEXT:    cmpl $15, %edi
-; GENERIC-NEXT:    cmovgel %edx, %esi
 ; GENERIC-NEXT:    movl %esi, %eax
+; GENERIC-NEXT:    cmpl $15, %edi
+; GENERIC-NEXT:    cmovgel %edx, %eax
+; GENERIC-NEXT:    ## kill: def $al killed $al killed $eax
 ; GENERIC-NEXT:    retq
 ;
 ; ATOM-LABEL: test18:
 ; ATOM:       ## %bb.0:
-; ATOM-NEXT:    cmpl $15, %edi
-; ATOM-NEXT:    cmovgel %edx, %esi
 ; ATOM-NEXT:    movl %esi, %eax
+; ATOM-NEXT:    cmpl $15, %edi
+; ATOM-NEXT:    cmovgel %edx, %eax
+; ATOM-NEXT:    ## kill: def $al killed $al killed $eax
 ; ATOM-NEXT:    nop
 ; ATOM-NEXT:    nop
 ; ATOM-NEXT:    retq
@@ -1102,10 +1104,11 @@ define i8 @test18(i32 %x, i8 zeroext %a,
 define i32 @trunc_select_miscompile(i32 %a, i1 zeroext %cc) {
 ; CHECK-LABEL: trunc_select_miscompile:
 ; CHECK:       ## %bb.0:
-; CHECK-NEXT:    orb $2, %sil
 ; CHECK-NEXT:    movl %esi, %ecx
-; CHECK-NEXT:    shll %cl, %edi
 ; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    orb $2, %cl
+; CHECK-NEXT:    ## kill: def $cl killed $cl killed $ecx
+; CHECK-NEXT:    shll %cl, %eax
 ; CHECK-NEXT:    retq
 ;
 ; ATHLON-LABEL: trunc_select_miscompile:
@@ -1118,8 +1121,9 @@ define i32 @trunc_select_miscompile(i32
 ;
 ; MCU-LABEL: trunc_select_miscompile:
 ; MCU:       # %bb.0:
-; MCU-NEXT:    orb $2, %dl
 ; MCU-NEXT:    movl %edx, %ecx
+; MCU-NEXT:    orb $2, %cl
+; MCU-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; MCU-NEXT:    shll %cl, %eax
 ; MCU-NEXT:    retl
   %tmp1 = select i1 %cc, i32 3, i32 2
@@ -1438,10 +1442,10 @@ entry:
 define i32 @select_xor_2(i32 %A, i32 %B, i8 %cond) {
 ; CHECK-LABEL: select_xor_2:
 ; CHECK:       ## %bb.0: ## %entry
-; CHECK-NEXT:    xorl %edi, %esi
-; CHECK-NEXT:    testb $1, %dl
-; CHECK-NEXT:    cmovel %edi, %esi
 ; CHECK-NEXT:    movl %esi, %eax
+; CHECK-NEXT:    xorl %edi, %eax
+; CHECK-NEXT:    testb $1, %dl
+; CHECK-NEXT:    cmovel %edi, %eax
 ; CHECK-NEXT:    retq
 ;
 ; ATHLON-LABEL: select_xor_2:
@@ -1473,10 +1477,10 @@ entry:
 define i32 @select_xor_2b(i32 %A, i32 %B, i8 %cond) {
 ; CHECK-LABEL: select_xor_2b:
 ; CHECK:       ## %bb.0: ## %entry
-; CHECK-NEXT:    xorl %edi, %esi
-; CHECK-NEXT:    testb $1, %dl
-; CHECK-NEXT:    cmovel %edi, %esi
 ; CHECK-NEXT:    movl %esi, %eax
+; CHECK-NEXT:    xorl %edi, %eax
+; CHECK-NEXT:    testb $1, %dl
+; CHECK-NEXT:    cmovel %edi, %eax
 ; CHECK-NEXT:    retq
 ;
 ; ATHLON-LABEL: select_xor_2b:
@@ -1507,10 +1511,10 @@ entry:
 define i32 @select_or(i32 %A, i32 %B, i8 %cond) {
 ; CHECK-LABEL: select_or:
 ; CHECK:       ## %bb.0: ## %entry
-; CHECK-NEXT:    orl %edi, %esi
-; CHECK-NEXT:    testb $1, %dl
-; CHECK-NEXT:    cmovel %edi, %esi
 ; CHECK-NEXT:    movl %esi, %eax
+; CHECK-NEXT:    orl %edi, %eax
+; CHECK-NEXT:    testb $1, %dl
+; CHECK-NEXT:    cmovel %edi, %eax
 ; CHECK-NEXT:    retq
 ;
 ; ATHLON-LABEL: select_or:
@@ -1542,10 +1546,10 @@ entry:
 define i32 @select_or_b(i32 %A, i32 %B, i8 %cond) {
 ; CHECK-LABEL: select_or_b:
 ; CHECK:       ## %bb.0: ## %entry
-; CHECK-NEXT:    orl %edi, %esi
-; CHECK-NEXT:    testb $1, %dl
-; CHECK-NEXT:    cmovel %edi, %esi
 ; CHECK-NEXT:    movl %esi, %eax
+; CHECK-NEXT:    orl %edi, %eax
+; CHECK-NEXT:    testb $1, %dl
+; CHECK-NEXT:    cmovel %edi, %eax
 ; CHECK-NEXT:    retq
 ;
 ; ATHLON-LABEL: select_or_b:
@@ -1576,10 +1580,10 @@ entry:
 define i32 @select_or_1(i32 %A, i32 %B, i32 %cond) {
 ; CHECK-LABEL: select_or_1:
 ; CHECK:       ## %bb.0: ## %entry
-; CHECK-NEXT:    orl %edi, %esi
-; CHECK-NEXT:    testb $1, %dl
-; CHECK-NEXT:    cmovel %edi, %esi
 ; CHECK-NEXT:    movl %esi, %eax
+; CHECK-NEXT:    orl %edi, %eax
+; CHECK-NEXT:    testb $1, %dl
+; CHECK-NEXT:    cmovel %edi, %eax
 ; CHECK-NEXT:    retq
 ;
 ; ATHLON-LABEL: select_or_1:
@@ -1611,10 +1615,10 @@ entry:
 define i32 @select_or_1b(i32 %A, i32 %B, i32 %cond) {
 ; CHECK-LABEL: select_or_1b:
 ; CHECK:       ## %bb.0: ## %entry
-; CHECK-NEXT:    orl %edi, %esi
-; CHECK-NEXT:    testb $1, %dl
-; CHECK-NEXT:    cmovel %edi, %esi
 ; CHECK-NEXT:    movl %esi, %eax
+; CHECK-NEXT:    orl %edi, %eax
+; CHECK-NEXT:    testb $1, %dl
+; CHECK-NEXT:    cmovel %edi, %eax
 ; CHECK-NEXT:    retq
 ;
 ; ATHLON-LABEL: select_or_1b:

Modified: llvm/trunk/test/CodeGen/X86/select_const.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/select_const.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/select_const.ll (original)
+++ llvm/trunk/test/CodeGen/X86/select_const.ll Wed Sep 19 11:59:08 2018
@@ -43,8 +43,8 @@ define i32 @select_0_or_1_signext(i1 sig
 define i32 @select_1_or_0(i1 %cond) {
 ; CHECK-LABEL: select_1_or_0:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    andl $1, %edi
 ; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    andl $1, %eax
 ; CHECK-NEXT:    retq
   %sel = select i1 %cond, i32 1, i32 0
   ret i32 %sel
@@ -62,8 +62,8 @@ define i32 @select_1_or_0_zeroext(i1 zer
 define i32 @select_1_or_0_signext(i1 signext %cond) {
 ; CHECK-LABEL: select_1_or_0_signext:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    andl $1, %edi
 ; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    andl $1, %eax
 ; CHECK-NEXT:    retq
   %sel = select i1 %cond, i32 1, i32 0
   ret i32 %sel
@@ -95,8 +95,8 @@ define i32 @select_0_or_neg1_zeroext(i1
 define i32 @select_0_or_neg1_signext(i1 signext %cond) {
 ; CHECK-LABEL: select_0_or_neg1_signext:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    notl %edi
 ; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    notl %eax
 ; CHECK-NEXT:    retq
   %sel = select i1 %cond, i32 0, i32 -1
   ret i32 %sel
@@ -107,9 +107,9 @@ define i32 @select_0_or_neg1_signext(i1
 define i32 @select_neg1_or_0(i1 %cond) {
 ; CHECK-LABEL: select_neg1_or_0:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    andl $1, %edi
-; CHECK-NEXT:    negl %edi
 ; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    andl $1, %eax
+; CHECK-NEXT:    negl %eax
 ; CHECK-NEXT:    retq
   %sel = select i1 %cond, i32 -1, i32 0
   ret i32 %sel
@@ -118,8 +118,8 @@ define i32 @select_neg1_or_0(i1 %cond) {
 define i32 @select_neg1_or_0_zeroext(i1 zeroext %cond) {
 ; CHECK-LABEL: select_neg1_or_0_zeroext:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    negl %edi
 ; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    negl %eax
 ; CHECK-NEXT:    retq
   %sel = select i1 %cond, i32 -1, i32 0
   ret i32 %sel
@@ -329,9 +329,10 @@ define i32 @sel_neg1_1_32(i32 %x) {
 define i8 @select_pow2_diff(i1 zeroext %cond) {
 ; CHECK-LABEL: select_pow2_diff:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    shlb $4, %dil
-; CHECK-NEXT:    orb $3, %dil
 ; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    shlb $4, %al
+; CHECK-NEXT:    orb $3, %al
+; CHECK-NEXT:    # kill: def $al killed $al killed $eax
 ; CHECK-NEXT:    retq
   %sel = select i1 %cond, i8 19, i8 3
   ret i8 %sel

Modified: llvm/trunk/test/CodeGen/X86/selectcc-to-shiftand.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/selectcc-to-shiftand.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/selectcc-to-shiftand.ll (original)
+++ llvm/trunk/test/CodeGen/X86/selectcc-to-shiftand.ll Wed Sep 19 11:59:08 2018
@@ -7,16 +7,16 @@
 define i32 @neg_sel_constants(i32 %a) {
 ; CHECK-NOBMI-LABEL: neg_sel_constants:
 ; CHECK-NOBMI:       # %bb.0:
-; CHECK-NOBMI-NEXT:    sarl $31, %edi
-; CHECK-NOBMI-NEXT:    andl $5, %edi
 ; CHECK-NOBMI-NEXT:    movl %edi, %eax
+; CHECK-NOBMI-NEXT:    sarl $31, %eax
+; CHECK-NOBMI-NEXT:    andl $5, %eax
 ; CHECK-NOBMI-NEXT:    retq
 ;
 ; CHECK-BMI-LABEL: neg_sel_constants:
 ; CHECK-BMI:       # %bb.0:
-; CHECK-BMI-NEXT:    sarl $31, %edi
-; CHECK-BMI-NEXT:    andl $5, %edi
 ; CHECK-BMI-NEXT:    movl %edi, %eax
+; CHECK-BMI-NEXT:    sarl $31, %eax
+; CHECK-BMI-NEXT:    andl $5, %eax
 ; CHECK-BMI-NEXT:    retq
   %tmp.1 = icmp slt i32 %a, 0
   %retval = select i1 %tmp.1, i32 5, i32 0
@@ -28,16 +28,16 @@ define i32 @neg_sel_constants(i32 %a) {
 define i32 @neg_sel_special_constant(i32 %a) {
 ; CHECK-NOBMI-LABEL: neg_sel_special_constant:
 ; CHECK-NOBMI:       # %bb.0:
-; CHECK-NOBMI-NEXT:    shrl $22, %edi
-; CHECK-NOBMI-NEXT:    andl $512, %edi # imm = 0x200
 ; CHECK-NOBMI-NEXT:    movl %edi, %eax
+; CHECK-NOBMI-NEXT:    shrl $22, %eax
+; CHECK-NOBMI-NEXT:    andl $512, %eax # imm = 0x200
 ; CHECK-NOBMI-NEXT:    retq
 ;
 ; CHECK-BMI-LABEL: neg_sel_special_constant:
 ; CHECK-BMI:       # %bb.0:
-; CHECK-BMI-NEXT:    shrl $22, %edi
-; CHECK-BMI-NEXT:    andl $512, %edi # imm = 0x200
 ; CHECK-BMI-NEXT:    movl %edi, %eax
+; CHECK-BMI-NEXT:    shrl $22, %eax
+; CHECK-BMI-NEXT:    andl $512, %eax # imm = 0x200
 ; CHECK-BMI-NEXT:    retq
   %tmp.1 = icmp slt i32 %a, 0
   %retval = select i1 %tmp.1, i32 512, i32 0
@@ -49,16 +49,16 @@ define i32 @neg_sel_special_constant(i32
 define i32 @neg_sel_variable_and_zero(i32 %a, i32 %b) {
 ; CHECK-NOBMI-LABEL: neg_sel_variable_and_zero:
 ; CHECK-NOBMI:       # %bb.0:
-; CHECK-NOBMI-NEXT:    sarl $31, %edi
-; CHECK-NOBMI-NEXT:    andl %esi, %edi
 ; CHECK-NOBMI-NEXT:    movl %edi, %eax
+; CHECK-NOBMI-NEXT:    sarl $31, %eax
+; CHECK-NOBMI-NEXT:    andl %esi, %eax
 ; CHECK-NOBMI-NEXT:    retq
 ;
 ; CHECK-BMI-LABEL: neg_sel_variable_and_zero:
 ; CHECK-BMI:       # %bb.0:
-; CHECK-BMI-NEXT:    sarl $31, %edi
-; CHECK-BMI-NEXT:    andl %esi, %edi
 ; CHECK-BMI-NEXT:    movl %edi, %eax
+; CHECK-BMI-NEXT:    sarl $31, %eax
+; CHECK-BMI-NEXT:    andl %esi, %eax
 ; CHECK-BMI-NEXT:    retq
   %tmp.1 = icmp slt i32 %a, 0
   %retval = select i1 %tmp.1, i32 %b, i32 0
@@ -116,18 +116,18 @@ define i32 @pos_sel_constants(i32 %a) {
 define i32 @pos_sel_special_constant(i32 %a) {
 ; CHECK-NOBMI-LABEL: pos_sel_special_constant:
 ; CHECK-NOBMI:       # %bb.0:
-; CHECK-NOBMI-NEXT:    notl %edi
-; CHECK-NOBMI-NEXT:    shrl $22, %edi
-; CHECK-NOBMI-NEXT:    andl $512, %edi # imm = 0x200
 ; CHECK-NOBMI-NEXT:    movl %edi, %eax
+; CHECK-NOBMI-NEXT:    notl %eax
+; CHECK-NOBMI-NEXT:    shrl $22, %eax
+; CHECK-NOBMI-NEXT:    andl $512, %eax # imm = 0x200
 ; CHECK-NOBMI-NEXT:    retq
 ;
 ; CHECK-BMI-LABEL: pos_sel_special_constant:
 ; CHECK-BMI:       # %bb.0:
-; CHECK-BMI-NEXT:    notl %edi
-; CHECK-BMI-NEXT:    shrl $22, %edi
-; CHECK-BMI-NEXT:    andl $512, %edi # imm = 0x200
 ; CHECK-BMI-NEXT:    movl %edi, %eax
+; CHECK-BMI-NEXT:    notl %eax
+; CHECK-BMI-NEXT:    shrl $22, %eax
+; CHECK-BMI-NEXT:    andl $512, %eax # imm = 0x200
 ; CHECK-BMI-NEXT:    retq
   %tmp.1 = icmp sgt i32 %a, -1
   %retval = select i1 %tmp.1, i32 512, i32 0

Modified: llvm/trunk/test/CodeGen/X86/setcc-logic.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/setcc-logic.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/setcc-logic.ll (original)
+++ llvm/trunk/test/CodeGen/X86/setcc-logic.ll Wed Sep 19 11:59:08 2018
@@ -41,9 +41,10 @@ define zeroext i1 @all_bits_set(i32 %P,
 define zeroext i1 @all_sign_bits_set(i32 %P, i32 %Q) nounwind {
 ; CHECK-LABEL: all_sign_bits_set:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    andl %esi, %edi
-; CHECK-NEXT:    shrl $31, %edi
 ; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    andl %esi, %eax
+; CHECK-NEXT:    shrl $31, %eax
+; CHECK-NEXT:    # kill: def $al killed $al killed $eax
 ; CHECK-NEXT:    retq
   %a = icmp slt i32 %P, 0
   %b = icmp slt i32 %Q, 0
@@ -66,9 +67,10 @@ define zeroext i1 @any_bits_set(i32 %P,
 define zeroext i1 @any_sign_bits_set(i32 %P, i32 %Q) nounwind {
 ; CHECK-LABEL: any_sign_bits_set:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    orl %esi, %edi
-; CHECK-NEXT:    shrl $31, %edi
 ; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    orl %esi, %eax
+; CHECK-NEXT:    shrl $31, %eax
+; CHECK-NEXT:    # kill: def $al killed $al killed $eax
 ; CHECK-NEXT:    retq
   %a = icmp slt i32 %P, 0
   %b = icmp slt i32 %Q, 0

Modified: llvm/trunk/test/CodeGen/X86/sext-i1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sext-i1.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sext-i1.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sext-i1.ll Wed Sep 19 11:59:08 2018
@@ -164,8 +164,8 @@ define i32 @select_0_or_1s_signext(i1 si
 ;
 ; X64-LABEL: select_0_or_1s_signext:
 ; X64:       # %bb.0:
-; X64-NEXT:    notl %edi
 ; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    notl %eax
 ; X64-NEXT:    retq
   %not = xor i1 %cond, 1
   %sext = sext i1 %not to i32

Modified: llvm/trunk/test/CodeGen/X86/shift-and.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/shift-and.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/shift-and.ll (original)
+++ llvm/trunk/test/CodeGen/X86/shift-and.ll Wed Sep 19 11:59:08 2018
@@ -12,9 +12,10 @@ define i32 @t1(i32 %t, i32 %val) nounwin
 ;
 ; X64-LABEL: t1:
 ; X64:       # %bb.0:
-; X64-NEXT:    movl %edi, %ecx
-; X64-NEXT:    shll %cl, %esi
 ; X64-NEXT:    movl %esi, %eax
+; X64-NEXT:    movl %edi, %ecx
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    shll %cl, %eax
 ; X64-NEXT:    retq
        %shamt = and i32 %t, 31
        %res = shl i32 %val, %shamt
@@ -31,9 +32,10 @@ define i32 @t2(i32 %t, i32 %val) nounwin
 ;
 ; X64-LABEL: t2:
 ; X64:       # %bb.0:
-; X64-NEXT:    movl %edi, %ecx
-; X64-NEXT:    shll %cl, %esi
 ; X64-NEXT:    movl %esi, %eax
+; X64-NEXT:    movl %edi, %ecx
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    shll %cl, %eax
 ; X64-NEXT:    retq
        %shamt = and i32 %t, 63
        %res = shl i32 %val, %shamt
@@ -52,6 +54,7 @@ define void @t3(i16 %t) nounwind {
 ; X64-LABEL: t3:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movl %edi, %ecx
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NEXT:    sarw %cl, {{.*}}(%rip)
 ; X64-NEXT:    retq
        %shamt = and i16 %t, 31
@@ -82,9 +85,10 @@ define i64 @t4(i64 %t, i64 %val) nounwin
 ;
 ; X64-LABEL: t4:
 ; X64:       # %bb.0:
-; X64-NEXT:    movl %edi, %ecx
-; X64-NEXT:    shrq %cl, %rsi
 ; X64-NEXT:    movq %rsi, %rax
+; X64-NEXT:    movq %rdi, %rcx
+; X64-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-NEXT:    shrq %cl, %rax
 ; X64-NEXT:    retq
        %shamt = and i64 %t, 63
        %res = lshr i64 %val, %shamt
@@ -112,9 +116,10 @@ define i64 @t5(i64 %t, i64 %val) nounwin
 ;
 ; X64-LABEL: t5:
 ; X64:       # %bb.0:
-; X64-NEXT:    movl %edi, %ecx
-; X64-NEXT:    shrq %cl, %rsi
 ; X64-NEXT:    movq %rsi, %rax
+; X64-NEXT:    movq %rdi, %rcx
+; X64-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-NEXT:    shrq %cl, %rax
 ; X64-NEXT:    retq
        %shamt = and i64 %t, 191
        %res = lshr i64 %val, %shamt
@@ -147,7 +152,8 @@ define void @t5ptr(i64 %t, i64* %ptr) no
 ;
 ; X64-LABEL: t5ptr:
 ; X64:       # %bb.0:
-; X64-NEXT:    movl %edi, %ecx
+; X64-NEXT:    movq %rdi, %rcx
+; X64-NEXT:    # kill: def $cl killed $cl killed $rcx
 ; X64-NEXT:    shrq %cl, (%rsi)
 ; X64-NEXT:    retq
        %shamt = and i64 %t, 191
@@ -205,9 +211,9 @@ define i64 @big_mask_constant(i64 %x) no
 ;
 ; X64-LABEL: big_mask_constant:
 ; X64:       # %bb.0:
-; X64-NEXT:    shrq $7, %rdi
-; X64-NEXT:    andl $134217728, %edi # imm = 0x8000000
 ; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    shrq $7, %rax
+; X64-NEXT:    andl $134217728, %eax # imm = 0x8000000
 ; X64-NEXT:    retq
   %and = and i64 %x, 17179869184 ; 0x400000000
   %sh = lshr i64 %and, 7

Modified: llvm/trunk/test/CodeGen/X86/shift-bmi2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/shift-bmi2.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/shift-bmi2.ll (original)
+++ llvm/trunk/test/CodeGen/X86/shift-bmi2.ll Wed Sep 19 11:59:08 2018
@@ -26,8 +26,8 @@ define i32 @shl32i(i32 %x) nounwind uwta
 ;
 ; BMI264-LABEL: shl32i:
 ; BMI264:       # %bb.0:
-; BMI264-NEXT:    shll $5, %edi
 ; BMI264-NEXT:    movl %edi, %eax
+; BMI264-NEXT:    shll $5, %eax
 ; BMI264-NEXT:    retq
   %shl = shl i32 %x, 5
   ret i32 %shl
@@ -69,6 +69,24 @@ define i32 @shl32pi(i32* %p) nounwind uw
 }
 
 define i64 @shl64(i64 %x, i64 %shamt) nounwind uwtable readnone {
+; BMI2-LABEL: shl64:
+; BMI2:       # %bb.0:
+; BMI2-NEXT:    pushl %esi
+; BMI2-NEXT:    .cfi_def_cfa_offset 8
+; BMI2-NEXT:    .cfi_offset %esi, -8
+; BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; BMI2-NEXT:    shldl %cl, %eax, %edx
+; BMI2-NEXT:    shlxl %ecx, %eax, %esi
+; BMI2-NEXT:    xorl %eax, %eax
+; BMI2-NEXT:    testb $32, %cl
+; BMI2-NEXT:    cmovnel %esi, %edx
+; BMI2-NEXT:    cmovel %esi, %eax
+; BMI2-NEXT:    popl %esi
+; BMI2-NEXT:    .cfi_def_cfa_offset 4
+; BMI2-NEXT:    retl
+;
 ; BMI264-LABEL: shl64:
 ; BMI264:       # %bb.0:
 ; BMI264-NEXT:    shlxq %rsi, %rdi, %rax
@@ -78,16 +96,43 @@ define i64 @shl64(i64 %x, i64 %shamt) no
 }
 
 define i64 @shl64i(i64 %x) nounwind uwtable readnone {
+; BMI2-LABEL: shl64i:
+; BMI2:       # %bb.0:
+; BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; BMI2-NEXT:    shldl $7, %eax, %edx
+; BMI2-NEXT:    shll $7, %eax
+; BMI2-NEXT:    retl
+;
 ; BMI264-LABEL: shl64i:
 ; BMI264:       # %bb.0:
-; BMI264-NEXT:    shlq $7, %rdi
 ; BMI264-NEXT:    movq %rdi, %rax
+; BMI264-NEXT:    shlq $7, %rax
 ; BMI264-NEXT:    retq
   %shl = shl i64 %x, 7
   ret i64 %shl
 }
 
 define i64 @shl64p(i64* %p, i64 %shamt) nounwind uwtable readnone {
+; BMI2-LABEL: shl64p:
+; BMI2:       # %bb.0:
+; BMI2-NEXT:    pushl %esi
+; BMI2-NEXT:    .cfi_def_cfa_offset 8
+; BMI2-NEXT:    .cfi_offset %esi, -8
+; BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; BMI2-NEXT:    movl (%eax), %esi
+; BMI2-NEXT:    movl 4(%eax), %edx
+; BMI2-NEXT:    shldl %cl, %esi, %edx
+; BMI2-NEXT:    shlxl %ecx, %esi, %esi
+; BMI2-NEXT:    xorl %eax, %eax
+; BMI2-NEXT:    testb $32, %cl
+; BMI2-NEXT:    cmovnel %esi, %edx
+; BMI2-NEXT:    cmovel %esi, %eax
+; BMI2-NEXT:    popl %esi
+; BMI2-NEXT:    .cfi_def_cfa_offset 4
+; BMI2-NEXT:    retl
+;
 ; BMI264-LABEL: shl64p:
 ; BMI264:       # %bb.0:
 ; BMI264-NEXT:    shlxq %rsi, (%rdi), %rax
@@ -98,6 +143,15 @@ define i64 @shl64p(i64* %p, i64 %shamt)
 }
 
 define i64 @shl64pi(i64* %p) nounwind uwtable readnone {
+; BMI2-LABEL: shl64pi:
+; BMI2:       # %bb.0:
+; BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; BMI2-NEXT:    movl (%ecx), %eax
+; BMI2-NEXT:    movl 4(%ecx), %edx
+; BMI2-NEXT:    shldl $7, %eax, %edx
+; BMI2-NEXT:    shll $7, %eax
+; BMI2-NEXT:    retl
+;
 ; BMI264-LABEL: shl64pi:
 ; BMI264:       # %bb.0:
 ; BMI264-NEXT:    movq (%rdi), %rax
@@ -141,6 +195,24 @@ define i32 @lshr32p(i32* %p, i32 %shamt)
 }
 
 define i64 @lshr64(i64 %x, i64 %shamt) nounwind uwtable readnone {
+; BMI2-LABEL: lshr64:
+; BMI2:       # %bb.0:
+; BMI2-NEXT:    pushl %esi
+; BMI2-NEXT:    .cfi_def_cfa_offset 8
+; BMI2-NEXT:    .cfi_offset %esi, -8
+; BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; BMI2-NEXT:    shrdl %cl, %edx, %eax
+; BMI2-NEXT:    shrxl %ecx, %edx, %esi
+; BMI2-NEXT:    xorl %edx, %edx
+; BMI2-NEXT:    testb $32, %cl
+; BMI2-NEXT:    cmovnel %esi, %eax
+; BMI2-NEXT:    cmovel %esi, %edx
+; BMI2-NEXT:    popl %esi
+; BMI2-NEXT:    .cfi_def_cfa_offset 4
+; BMI2-NEXT:    retl
+;
 ; BMI264-LABEL: lshr64:
 ; BMI264:       # %bb.0:
 ; BMI264-NEXT:    shrxq %rsi, %rdi, %rax
@@ -150,6 +222,25 @@ define i64 @lshr64(i64 %x, i64 %shamt) n
 }
 
 define i64 @lshr64p(i64* %p, i64 %shamt) nounwind uwtable readnone {
+; BMI2-LABEL: lshr64p:
+; BMI2:       # %bb.0:
+; BMI2-NEXT:    pushl %esi
+; BMI2-NEXT:    .cfi_def_cfa_offset 8
+; BMI2-NEXT:    .cfi_offset %esi, -8
+; BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; BMI2-NEXT:    movl (%edx), %eax
+; BMI2-NEXT:    movl 4(%edx), %edx
+; BMI2-NEXT:    shrdl %cl, %edx, %eax
+; BMI2-NEXT:    shrxl %ecx, %edx, %esi
+; BMI2-NEXT:    xorl %edx, %edx
+; BMI2-NEXT:    testb $32, %cl
+; BMI2-NEXT:    cmovnel %esi, %eax
+; BMI2-NEXT:    cmovel %esi, %edx
+; BMI2-NEXT:    popl %esi
+; BMI2-NEXT:    .cfi_def_cfa_offset 4
+; BMI2-NEXT:    retl
+;
 ; BMI264-LABEL: lshr64p:
 ; BMI264:       # %bb.0:
 ; BMI264-NEXT:    shrxq %rsi, (%rdi), %rax
@@ -192,6 +283,24 @@ define i32 @ashr32p(i32* %p, i32 %shamt)
 }
 
 define i64 @ashr64(i64 %x, i64 %shamt) nounwind uwtable readnone {
+; BMI2-LABEL: ashr64:
+; BMI2:       # %bb.0:
+; BMI2-NEXT:    pushl %esi
+; BMI2-NEXT:    .cfi_def_cfa_offset 8
+; BMI2-NEXT:    .cfi_offset %esi, -8
+; BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; BMI2-NEXT:    shrdl %cl, %edx, %eax
+; BMI2-NEXT:    sarxl %ecx, %edx, %esi
+; BMI2-NEXT:    sarl $31, %edx
+; BMI2-NEXT:    testb $32, %cl
+; BMI2-NEXT:    cmovnel %esi, %eax
+; BMI2-NEXT:    cmovel %esi, %edx
+; BMI2-NEXT:    popl %esi
+; BMI2-NEXT:    .cfi_def_cfa_offset 4
+; BMI2-NEXT:    retl
+;
 ; BMI264-LABEL: ashr64:
 ; BMI264:       # %bb.0:
 ; BMI264-NEXT:    sarxq %rsi, %rdi, %rax
@@ -201,6 +310,25 @@ define i64 @ashr64(i64 %x, i64 %shamt) n
 }
 
 define i64 @ashr64p(i64* %p, i64 %shamt) nounwind uwtable readnone {
+; BMI2-LABEL: ashr64p:
+; BMI2:       # %bb.0:
+; BMI2-NEXT:    pushl %esi
+; BMI2-NEXT:    .cfi_def_cfa_offset 8
+; BMI2-NEXT:    .cfi_offset %esi, -8
+; BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; BMI2-NEXT:    movl (%edx), %eax
+; BMI2-NEXT:    movl 4(%edx), %edx
+; BMI2-NEXT:    shrdl %cl, %edx, %eax
+; BMI2-NEXT:    sarxl %ecx, %edx, %esi
+; BMI2-NEXT:    sarl $31, %edx
+; BMI2-NEXT:    testb $32, %cl
+; BMI2-NEXT:    cmovnel %esi, %eax
+; BMI2-NEXT:    cmovel %esi, %edx
+; BMI2-NEXT:    popl %esi
+; BMI2-NEXT:    .cfi_def_cfa_offset 4
+; BMI2-NEXT:    retl
+;
 ; BMI264-LABEL: ashr64p:
 ; BMI264:       # %bb.0:
 ; BMI264-NEXT:    sarxq %rsi, (%rdi), %rax
@@ -227,6 +355,21 @@ define i32 @shl32and(i32 %t, i32 %val) n
 }
 
 define i64 @shl64and(i64 %t, i64 %val) nounwind {
+; BMI2-LABEL: shl64and:
+; BMI2:       # %bb.0:
+; BMI2-NEXT:    pushl %esi
+; BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; BMI2-NEXT:    shldl %cl, %eax, %edx
+; BMI2-NEXT:    shlxl %ecx, %eax, %esi
+; BMI2-NEXT:    xorl %eax, %eax
+; BMI2-NEXT:    testb $32, %cl
+; BMI2-NEXT:    cmovnel %esi, %edx
+; BMI2-NEXT:    cmovel %esi, %eax
+; BMI2-NEXT:    popl %esi
+; BMI2-NEXT:    retl
+;
 ; BMI264-LABEL: shl64and:
 ; BMI264:       # %bb.0:
 ; BMI264-NEXT:    shlxq %rdi, %rsi, %rax
@@ -253,6 +396,21 @@ define i32 @lshr32and(i32 %t, i32 %val)
 }
 
 define i64 @lshr64and(i64 %t, i64 %val) nounwind {
+; BMI2-LABEL: lshr64and:
+; BMI2:       # %bb.0:
+; BMI2-NEXT:    pushl %esi
+; BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; BMI2-NEXT:    shrdl %cl, %edx, %eax
+; BMI2-NEXT:    shrxl %ecx, %edx, %esi
+; BMI2-NEXT:    xorl %edx, %edx
+; BMI2-NEXT:    testb $32, %cl
+; BMI2-NEXT:    cmovnel %esi, %eax
+; BMI2-NEXT:    cmovel %esi, %edx
+; BMI2-NEXT:    popl %esi
+; BMI2-NEXT:    retl
+;
 ; BMI264-LABEL: lshr64and:
 ; BMI264:       # %bb.0:
 ; BMI264-NEXT:    shrxq %rdi, %rsi, %rax
@@ -279,6 +437,21 @@ define i32 @ashr32and(i32 %t, i32 %val)
 }
 
 define i64 @ashr64and(i64 %t, i64 %val) nounwind {
+; BMI2-LABEL: ashr64and:
+; BMI2:       # %bb.0:
+; BMI2-NEXT:    pushl %esi
+; BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; BMI2-NEXT:    shrdl %cl, %edx, %eax
+; BMI2-NEXT:    sarxl %ecx, %edx, %esi
+; BMI2-NEXT:    sarl $31, %edx
+; BMI2-NEXT:    testb $32, %cl
+; BMI2-NEXT:    cmovnel %esi, %eax
+; BMI2-NEXT:    cmovel %esi, %edx
+; BMI2-NEXT:    popl %esi
+; BMI2-NEXT:    retl
+;
 ; BMI264-LABEL: ashr64and:
 ; BMI264:       # %bb.0:
 ; BMI264-NEXT:    sarxq %rdi, %rsi, %rax

Modified: llvm/trunk/test/CodeGen/X86/shift-double-x86_64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/shift-double-x86_64.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/shift-double-x86_64.ll (original)
+++ llvm/trunk/test/CodeGen/X86/shift-double-x86_64.ll Wed Sep 19 11:59:08 2018
@@ -6,10 +6,11 @@
 define i64 @test1(i64 %hi, i64 %lo, i64 %bits) nounwind {
 ; CHECK-LABEL: test1:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    andl $63, %edx
-; CHECK-NEXT:    movl %edx, %ecx
-; CHECK-NEXT:    shldq %cl, %rsi, %rdi
+; CHECK-NEXT:    movq %rdx, %rcx
 ; CHECK-NEXT:    movq %rdi, %rax
+; CHECK-NEXT:    andl $63, %ecx
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $rcx
+; CHECK-NEXT:    shldq %cl, %rsi, %rax
 ; CHECK-NEXT:    retq
   %and = and i64 %bits, 63
   %and64 = sub i64 64, %and
@@ -22,10 +23,11 @@ define i64 @test1(i64 %hi, i64 %lo, i64
 define i64 @test2(i64 %hi, i64 %lo, i64 %bits) nounwind {
 ; CHECK-LABEL: test2:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    andl $63, %edx
-; CHECK-NEXT:    movl %edx, %ecx
-; CHECK-NEXT:    shrdq %cl, %rdi, %rsi
+; CHECK-NEXT:    movq %rdx, %rcx
 ; CHECK-NEXT:    movq %rsi, %rax
+; CHECK-NEXT:    andl $63, %ecx
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $rcx
+; CHECK-NEXT:    shrdq %cl, %rdi, %rax
 ; CHECK-NEXT:    retq
   %and = and i64 %bits, 63
   %and64 = sub i64 64, %and
@@ -38,9 +40,10 @@ define i64 @test2(i64 %hi, i64 %lo, i64
 define i64 @test3(i64 %hi, i64 %lo, i64 %bits) nounwind {
 ; CHECK-LABEL: test3:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movl %edx, %ecx
-; CHECK-NEXT:    shldq %cl, %rsi, %rdi
+; CHECK-NEXT:    movq %rdx, %rcx
 ; CHECK-NEXT:    movq %rdi, %rax
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $rcx
+; CHECK-NEXT:    shldq %cl, %rsi, %rax
 ; CHECK-NEXT:    retq
   %bits64 = sub i64 64, %bits
   %sh_lo = lshr i64 %lo, %bits64
@@ -52,9 +55,10 @@ define i64 @test3(i64 %hi, i64 %lo, i64
 define i64 @test4(i64 %hi, i64 %lo, i64 %bits) nounwind {
 ; CHECK-LABEL: test4:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movl %edx, %ecx
-; CHECK-NEXT:    shrdq %cl, %rdi, %rsi
+; CHECK-NEXT:    movq %rdx, %rcx
 ; CHECK-NEXT:    movq %rsi, %rax
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $rcx
+; CHECK-NEXT:    shrdq %cl, %rdi, %rax
 ; CHECK-NEXT:    retq
   %bits64 = sub i64 64, %bits
   %sh_lo = shl i64 %hi, %bits64
@@ -66,9 +70,10 @@ define i64 @test4(i64 %hi, i64 %lo, i64
 define i64 @test5(i64 %hi, i64 %lo, i64 %bits) nounwind {
 ; CHECK-LABEL: test5:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movl %edx, %ecx
-; CHECK-NEXT:    shldq %cl, %rsi, %rdi
+; CHECK-NEXT:    movq %rdx, %rcx
 ; CHECK-NEXT:    movq %rdi, %rax
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $rcx
+; CHECK-NEXT:    shldq %cl, %rsi, %rax
 ; CHECK-NEXT:    retq
   %bits64 = xor i64 %bits, 63
   %lo2 = lshr i64 %lo, 1
@@ -81,9 +86,10 @@ define i64 @test5(i64 %hi, i64 %lo, i64
 define i64 @test6(i64 %hi, i64 %lo, i64 %bits) nounwind {
 ; CHECK-LABEL: test6:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movl %edx, %ecx
-; CHECK-NEXT:    shrdq %cl, %rsi, %rdi
+; CHECK-NEXT:    movq %rdx, %rcx
 ; CHECK-NEXT:    movq %rdi, %rax
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $rcx
+; CHECK-NEXT:    shrdq %cl, %rsi, %rax
 ; CHECK-NEXT:    retq
   %bits64 = xor i64 %bits, 63
   %lo2 = shl i64 %lo, 1
@@ -96,9 +102,10 @@ define i64 @test6(i64 %hi, i64 %lo, i64
 define i64 @test7(i64 %hi, i64 %lo, i64 %bits) nounwind {
 ; CHECK-LABEL: test7:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movl %edx, %ecx
-; CHECK-NEXT:    shrdq %cl, %rsi, %rdi
+; CHECK-NEXT:    movq %rdx, %rcx
 ; CHECK-NEXT:    movq %rdi, %rax
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $rcx
+; CHECK-NEXT:    shrdq %cl, %rsi, %rax
 ; CHECK-NEXT:    retq
   %bits64 = xor i64 %bits, 63
   %lo2 = add i64 %lo, %lo

Modified: llvm/trunk/test/CodeGen/X86/shift-double.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/shift-double.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/shift-double.ll (original)
+++ llvm/trunk/test/CodeGen/X86/shift-double.ll Wed Sep 19 11:59:08 2018
@@ -26,8 +26,9 @@ define i64 @test1(i64 %X, i8 %C) nounwin
 ; X64-LABEL: test1:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movl %esi, %ecx
-; X64-NEXT:    shlq %cl, %rdi
 ; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    shlq %cl, %rax
 ; X64-NEXT:    retq
         %shift.upgrd.1 = zext i8 %C to i64              ; <i64> [#uses=1]
         %Y = shl i64 %X, %shift.upgrd.1         ; <i64> [#uses=1]
@@ -57,8 +58,9 @@ define i64 @test2(i64 %X, i8 %C) nounwin
 ; X64-LABEL: test2:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movl %esi, %ecx
-; X64-NEXT:    sarq %cl, %rdi
 ; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    sarq %cl, %rax
 ; X64-NEXT:    retq
         %shift.upgrd.2 = zext i8 %C to i64              ; <i64> [#uses=1]
         %Y = ashr i64 %X, %shift.upgrd.2                ; <i64> [#uses=1]
@@ -87,8 +89,9 @@ define i64 @test3(i64 %X, i8 %C) nounwin
 ; X64-LABEL: test3:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movl %esi, %ecx
-; X64-NEXT:    shrq %cl, %rdi
 ; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    shrq %cl, %rax
 ; X64-NEXT:    retq
         %shift.upgrd.3 = zext i8 %C to i64              ; <i64> [#uses=1]
         %Y = lshr i64 %X, %shift.upgrd.3                ; <i64> [#uses=1]
@@ -109,8 +112,9 @@ define i32 @test4(i32 %A, i32 %B, i8 %C)
 ; X64-LABEL: test4:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movl %edx, %ecx
-; X64-NEXT:    shldl %cl, %esi, %edi
 ; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    shldl %cl, %esi, %eax
 ; X64-NEXT:    retq
         %shift.upgrd.4 = zext i8 %C to i32              ; <i32> [#uses=1]
         %X = shl i32 %A, %shift.upgrd.4         ; <i32> [#uses=1]
@@ -133,8 +137,10 @@ define i16 @test5(i16 %A, i16 %B, i8 %C)
 ; X64-LABEL: test5:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movl %edx, %ecx
-; X64-NEXT:    shldw %cl, %si, %di
 ; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    shldw %cl, %si, %ax
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    retq
         %shift.upgrd.6 = zext i8 %C to i16              ; <i16> [#uses=1]
         %X = shl i16 %A, %shift.upgrd.6         ; <i16> [#uses=1]
@@ -159,8 +165,9 @@ define i32 @test6(i32 %A, i32 %B, i8 %C)
 ; X64-LABEL: test6:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movl %edx, %ecx
-; X64-NEXT:    shrdl %cl, %esi, %edi
 ; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    shrdl %cl, %esi, %eax
 ; X64-NEXT:    retq
         %shift.upgrd.4 = zext i8 %C to i32              ; <i32> [#uses=1]
         %X = lshr i32 %A, %shift.upgrd.4         ; <i32> [#uses=1]
@@ -183,8 +190,10 @@ define i16 @test7(i16 %A, i16 %B, i8 %C)
 ; X64-LABEL: test7:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movl %edx, %ecx
-; X64-NEXT:    shrdw %cl, %si, %di
 ; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    shrdw %cl, %si, %ax
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    retq
         %shift.upgrd.6 = zext i8 %C to i16              ; <i16> [#uses=1]
         %X = lshr i16 %A, %shift.upgrd.6         ; <i16> [#uses=1]
@@ -212,10 +221,11 @@ define i64 @test8(i64 %val, i32 %bits) n
 ;
 ; X64-LABEL: test8:
 ; X64:       # %bb.0:
-; X64-NEXT:    andb $31, %sil
 ; X64-NEXT:    movl %esi, %ecx
-; X64-NEXT:    shlq %cl, %rdi
 ; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    andb $31, %cl
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    shlq %cl, %rax
 ; X64-NEXT:    retq
   %and = and i32 %bits, 31
   %sh_prom = zext i32 %and to i64
@@ -235,10 +245,11 @@ define i64 @test9(i64 %val, i32 %bits) n
 ;
 ; X64-LABEL: test9:
 ; X64:       # %bb.0:
-; X64-NEXT:    andb $31, %sil
 ; X64-NEXT:    movl %esi, %ecx
-; X64-NEXT:    sarq %cl, %rdi
 ; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    andb $31, %cl
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    sarq %cl, %rax
 ; X64-NEXT:    retq
   %and = and i32 %bits, 31
   %sh_prom = zext i32 %and to i64
@@ -258,10 +269,11 @@ define i64 @test10(i64 %val, i32 %bits)
 ;
 ; X64-LABEL: test10:
 ; X64:       # %bb.0:
-; X64-NEXT:    andb $31, %sil
 ; X64-NEXT:    movl %esi, %ecx
-; X64-NEXT:    shrq %cl, %rdi
 ; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    andb $31, %cl
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    shrq %cl, %rax
 ; X64-NEXT:    retq
   %and = and i32 %bits, 31
   %sh_prom = zext i32 %and to i64
@@ -284,10 +296,11 @@ define i32 @test11(i32 %hi, i32 %lo, i32
 ;
 ; X64-LABEL: test11:
 ; X64:       # %bb.0:
-; X64-NEXT:    andl $31, %edx
 ; X64-NEXT:    movl %edx, %ecx
-; X64-NEXT:    shldl %cl, %esi, %edi
 ; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    andl $31, %ecx
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    shldl %cl, %esi, %eax
 ; X64-NEXT:    retq
   %and = and i32 %bits, 31
   %and32 = sub i32 32, %and
@@ -310,10 +323,11 @@ define i32 @test12(i32 %hi, i32 %lo, i32
 ;
 ; X64-LABEL: test12:
 ; X64:       # %bb.0:
-; X64-NEXT:    andl $31, %edx
 ; X64-NEXT:    movl %edx, %ecx
-; X64-NEXT:    shrdl %cl, %edi, %esi
 ; X64-NEXT:    movl %esi, %eax
+; X64-NEXT:    andl $31, %ecx
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    shrdl %cl, %edi, %eax
 ; X64-NEXT:    retq
   %and = and i32 %bits, 31
   %and32 = sub i32 32, %and
@@ -335,8 +349,9 @@ define i32 @test13(i32 %hi, i32 %lo, i32
 ; X64-LABEL: test13:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movl %edx, %ecx
-; X64-NEXT:    shldl %cl, %esi, %edi
 ; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    shldl %cl, %esi, %eax
 ; X64-NEXT:    retq
   %bits32 = sub i32 32, %bits
   %sh_lo = lshr i32 %lo, %bits32
@@ -357,8 +372,9 @@ define i32 @test14(i32 %hi, i32 %lo, i32
 ; X64-LABEL: test14:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movl %edx, %ecx
-; X64-NEXT:    shrdl %cl, %edi, %esi
 ; X64-NEXT:    movl %esi, %eax
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    shrdl %cl, %edi, %eax
 ; X64-NEXT:    retq
   %bits32 = sub i32 32, %bits
   %sh_lo = shl i32 %hi, %bits32
@@ -379,8 +395,9 @@ define i32 @test15(i32 %hi, i32 %lo, i32
 ; X64-LABEL: test15:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movl %edx, %ecx
-; X64-NEXT:    shldl %cl, %esi, %edi
 ; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    shldl %cl, %esi, %eax
 ; X64-NEXT:    retq
   %bits32 = xor i32 %bits, 31
   %lo2 = lshr i32 %lo, 1
@@ -402,8 +419,9 @@ define i32 @test16(i32 %hi, i32 %lo, i32
 ; X64-LABEL: test16:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movl %edx, %ecx
-; X64-NEXT:    shrdl %cl, %esi, %edi
 ; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    shrdl %cl, %esi, %eax
 ; X64-NEXT:    retq
   %bits32 = xor i32 %bits, 31
   %lo2 = shl i32 %lo, 1
@@ -425,8 +443,9 @@ define i32 @test17(i32 %hi, i32 %lo, i32
 ; X64-LABEL: test17:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movl %edx, %ecx
-; X64-NEXT:    shrdl %cl, %esi, %edi
 ; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    shrdl %cl, %esi, %eax
 ; X64-NEXT:    retq
   %bits32 = xor i32 %bits, 31
   %lo2 = add i32 %lo, %lo

Modified: llvm/trunk/test/CodeGen/X86/shift-pair.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/shift-pair.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/shift-pair.ll (original)
+++ llvm/trunk/test/CodeGen/X86/shift-pair.ll Wed Sep 19 11:59:08 2018
@@ -4,9 +4,9 @@
 define i64 @test(i64 %A) {
 ; CHECK-LABEL: test:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    shrq $54, %rdi
-; CHECK-NEXT:    andl $-4, %edi
 ; CHECK-NEXT:    movq %rdi, %rax
+; CHECK-NEXT:    shrq $54, %rax
+; CHECK-NEXT:    andl $-4, %eax
 ; CHECK-NEXT:    retq
     %B = lshr i64 %A, 56
     %C = shl i64 %B, 2

Modified: llvm/trunk/test/CodeGen/X86/shuffle-of-insert.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/shuffle-of-insert.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/shuffle-of-insert.ll (original)
+++ llvm/trunk/test/CodeGen/X86/shuffle-of-insert.ll Wed Sep 19 11:59:08 2018
@@ -6,15 +6,15 @@
 define <4 x i32> @ins_elt_0(i32 %x, <4 x i32> %v1, <4 x i32> %v2) {
 ; SSE2-LABEL: ins_elt_0:
 ; SSE2:       # %bb.0:
-; SSE2-NEXT:    movd %edi, %xmm0
-; SSE2-NEXT:    movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
 ; SSE2-NEXT:    movaps %xmm1, %xmm0
+; SSE2-NEXT:    movd %edi, %xmm1
+; SSE2-NEXT:    movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
 ; SSE2-NEXT:    retq
 ;
 ; SSE4-LABEL: ins_elt_0:
 ; SSE4:       # %bb.0:
-; SSE4-NEXT:    pinsrd $0, %edi, %xmm1
 ; SSE4-NEXT:    movdqa %xmm1, %xmm0
+; SSE4-NEXT:    pinsrd $0, %edi, %xmm0
 ; SSE4-NEXT:    retq
 ;
 ; AVX-LABEL: ins_elt_0:
@@ -36,8 +36,8 @@ define <4 x i32> @ins_elt_1(i32 %x, <4 x
 ;
 ; SSE4-LABEL: ins_elt_1:
 ; SSE4:       # %bb.0:
-; SSE4-NEXT:    pinsrd $1, %edi, %xmm1
 ; SSE4-NEXT:    movdqa %xmm1, %xmm0
+; SSE4-NEXT:    pinsrd $1, %edi, %xmm0
 ; SSE4-NEXT:    retq
 ;
 ; AVX-LABEL: ins_elt_1:
@@ -54,16 +54,16 @@ define <4 x i32> @ins_elt_1(i32 %x, <4 x
 define <4 x i32> @ins_elt_2_commute(i32 %x, <4 x i32> %v1, <4 x i32> %v2) {
 ; SSE2-LABEL: ins_elt_2_commute:
 ; SSE2:       # %bb.0:
-; SSE2-NEXT:    movd %edi, %xmm0
-; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[3,0]
-; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[0,2]
 ; SSE2-NEXT:    movaps %xmm1, %xmm0
+; SSE2-NEXT:    movd %edi, %xmm1
+; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0]
+; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
 ; SSE2-NEXT:    retq
 ;
 ; SSE4-LABEL: ins_elt_2_commute:
 ; SSE4:       # %bb.0:
-; SSE4-NEXT:    pinsrd $2, %edi, %xmm1
 ; SSE4-NEXT:    movdqa %xmm1, %xmm0
+; SSE4-NEXT:    pinsrd $2, %edi, %xmm0
 ; SSE4-NEXT:    retq
 ;
 ; AVX-LABEL: ins_elt_2_commute:
@@ -78,16 +78,16 @@ define <4 x i32> @ins_elt_2_commute(i32
 define <4 x i32> @ins_elt_3_commute(i32 %x, <4 x i32> %v1, <4 x i32> %v2) {
 ; SSE2-LABEL: ins_elt_3_commute:
 ; SSE2:       # %bb.0:
-; SSE2-NEXT:    movd %edi, %xmm0
-; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[2,0]
-; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
 ; SSE2-NEXT:    movaps %xmm1, %xmm0
+; SSE2-NEXT:    movd %edi, %xmm1
+; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0]
+; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0]
 ; SSE2-NEXT:    retq
 ;
 ; SSE4-LABEL: ins_elt_3_commute:
 ; SSE4:       # %bb.0:
-; SSE4-NEXT:    pinsrd $3, %edi, %xmm1
 ; SSE4-NEXT:    movdqa %xmm1, %xmm0
+; SSE4-NEXT:    pinsrd $3, %edi, %xmm0
 ; SSE4-NEXT:    retq
 ;
 ; AVX-LABEL: ins_elt_3_commute:
@@ -104,16 +104,16 @@ define <4 x i32> @ins_elt_3_commute(i32
 define <4 x i32> @ins_elt_0_to_2(i32 %x, <4 x i32> %v1, <4 x i32> %v2) {
 ; SSE2-LABEL: ins_elt_0_to_2:
 ; SSE2:       # %bb.0:
-; SSE2-NEXT:    movd %edi, %xmm0
-; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[3,0]
-; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[0,2]
 ; SSE2-NEXT:    movaps %xmm1, %xmm0
+; SSE2-NEXT:    movd %edi, %xmm1
+; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0]
+; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
 ; SSE2-NEXT:    retq
 ;
 ; SSE4-LABEL: ins_elt_0_to_2:
 ; SSE4:       # %bb.0:
-; SSE4-NEXT:    pinsrd $2, %edi, %xmm1
 ; SSE4-NEXT:    movdqa %xmm1, %xmm0
+; SSE4-NEXT:    pinsrd $2, %edi, %xmm0
 ; SSE4-NEXT:    retq
 ;
 ; AVX-LABEL: ins_elt_0_to_2:
@@ -128,15 +128,15 @@ define <4 x i32> @ins_elt_0_to_2(i32 %x,
 define <4 x i32> @ins_elt_1_to_0(i32 %x, <4 x i32> %v1, <4 x i32> %v2) {
 ; SSE2-LABEL: ins_elt_1_to_0:
 ; SSE2:       # %bb.0:
-; SSE2-NEXT:    movd %edi, %xmm0
-; SSE2-NEXT:    movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
 ; SSE2-NEXT:    movaps %xmm1, %xmm0
+; SSE2-NEXT:    movd %edi, %xmm1
+; SSE2-NEXT:    movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
 ; SSE2-NEXT:    retq
 ;
 ; SSE4-LABEL: ins_elt_1_to_0:
 ; SSE4:       # %bb.0:
-; SSE4-NEXT:    pinsrd $0, %edi, %xmm1
 ; SSE4-NEXT:    movdqa %xmm1, %xmm0
+; SSE4-NEXT:    pinsrd $0, %edi, %xmm0
 ; SSE4-NEXT:    retq
 ;
 ; AVX-LABEL: ins_elt_1_to_0:
@@ -151,16 +151,16 @@ define <4 x i32> @ins_elt_1_to_0(i32 %x,
 define <4 x i32> @ins_elt_2_to_3(i32 %x, <4 x i32> %v1, <4 x i32> %v2) {
 ; SSE2-LABEL: ins_elt_2_to_3:
 ; SSE2:       # %bb.0:
-; SSE2-NEXT:    movd %edi, %xmm0
-; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[2,0]
-; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
 ; SSE2-NEXT:    movaps %xmm1, %xmm0
+; SSE2-NEXT:    movd %edi, %xmm1
+; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0]
+; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0]
 ; SSE2-NEXT:    retq
 ;
 ; SSE4-LABEL: ins_elt_2_to_3:
 ; SSE4:       # %bb.0:
-; SSE4-NEXT:    pinsrd $3, %edi, %xmm1
 ; SSE4-NEXT:    movdqa %xmm1, %xmm0
+; SSE4-NEXT:    pinsrd $3, %edi, %xmm0
 ; SSE4-NEXT:    retq
 ;
 ; AVX-LABEL: ins_elt_2_to_3:
@@ -182,8 +182,8 @@ define <4 x i32> @ins_elt_3_to_1(i32 %x,
 ;
 ; SSE4-LABEL: ins_elt_3_to_1:
 ; SSE4:       # %bb.0:
-; SSE4-NEXT:    pinsrd $1, %edi, %xmm1
 ; SSE4-NEXT:    movdqa %xmm1, %xmm0
+; SSE4-NEXT:    pinsrd $1, %edi, %xmm0
 ; SSE4-NEXT:    retq
 ;
 ; AVX-LABEL: ins_elt_3_to_1:

Modified: llvm/trunk/test/CodeGen/X86/signbit-shift.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/signbit-shift.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/signbit-shift.ll (original)
+++ llvm/trunk/test/CodeGen/X86/signbit-shift.ll Wed Sep 19 11:59:08 2018
@@ -6,9 +6,9 @@
 define i32 @zext_ifpos(i32 %x) {
 ; CHECK-LABEL: zext_ifpos:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    notl %edi
-; CHECK-NEXT:    shrl $31, %edi
 ; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    notl %eax
+; CHECK-NEXT:    shrl $31, %eax
 ; CHECK-NEXT:    retq
   %c = icmp sgt i32 %x, -1
   %e = zext i1 %c to i32
@@ -57,9 +57,9 @@ define i32 @sel_ifpos_tval_bigger(i32 %x
 define i32 @sext_ifpos(i32 %x) {
 ; CHECK-LABEL: sext_ifpos:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    notl %edi
-; CHECK-NEXT:    sarl $31, %edi
 ; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    notl %eax
+; CHECK-NEXT:    sarl $31, %eax
 ; CHECK-NEXT:    retq
   %c = icmp sgt i32 %x, -1
   %e = sext i1 %c to i32
@@ -109,8 +109,8 @@ define i32 @sel_ifpos_fval_bigger(i32 %x
 define i32 @zext_ifneg(i32 %x) {
 ; CHECK-LABEL: zext_ifneg:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    shrl $31, %edi
 ; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    shrl $31, %eax
 ; CHECK-NEXT:    retq
   %c = icmp slt i32 %x, 0
   %r = zext i1 %c to i32
@@ -145,8 +145,8 @@ define i32 @sel_ifneg_tval_bigger(i32 %x
 define i32 @sext_ifneg(i32 %x) {
 ; CHECK-LABEL: sext_ifneg:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    sarl $31, %edi
 ; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    sarl $31, %eax
 ; CHECK-NEXT:    retq
   %c = icmp slt i32 %x, 0
   %r = sext i1 %c to i32

Modified: llvm/trunk/test/CodeGen/X86/sret-implicit.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sret-implicit.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sret-implicit.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sret-implicit.ll Wed Sep 19 11:59:08 2018
@@ -10,8 +10,8 @@ define void @sret_void(i32* sret %p) {
 }
 
 ; X64-LABEL: sret_void
-; X64-DAG: movl $0, (%rdi)
 ; X64-DAG: movq %rdi, %rax
+; X64-DAG: movl $0, (%rdi)
 ; X64: retq
 
 ; X86-LABEL: sret_void
@@ -24,8 +24,8 @@ define i256 @sret_demoted() {
 }
 
 ; X64-LABEL: sret_demoted
-; X64-DAG: movq $0, (%rdi)
 ; X64-DAG: movq %rdi, %rax
+; X64-DAG: movq $0, (%rdi)
 ; X64: retq
 
 ; X86-LABEL: sret_demoted

Modified: llvm/trunk/test/CodeGen/X86/sse1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse1.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse1.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse1.ll Wed Sep 19 11:59:08 2018
@@ -190,26 +190,27 @@ define <4 x i32> @PR30512(<4 x i32> %x,
 ;
 ; X64-LABEL: PR30512:
 ; X64:       # %bb.0:
-; X64-NEXT:    xorl %eax, %eax
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    xorl %edi, %edi
 ; X64-NEXT:    cmpl {{[0-9]+}}(%rsp), %r8d
-; X64-NEXT:    sete %al
-; X64-NEXT:    negl %eax
-; X64-NEXT:    movl %eax, -{{[0-9]+}}(%rsp)
-; X64-NEXT:    xorl %eax, %eax
+; X64-NEXT:    sete %dil
+; X64-NEXT:    negl %edi
+; X64-NEXT:    movl %edi, -{{[0-9]+}}(%rsp)
+; X64-NEXT:    xorl %edi, %edi
 ; X64-NEXT:    cmpl {{[0-9]+}}(%rsp), %ecx
-; X64-NEXT:    sete %al
-; X64-NEXT:    negl %eax
-; X64-NEXT:    movl %eax, -{{[0-9]+}}(%rsp)
-; X64-NEXT:    xorl %eax, %eax
+; X64-NEXT:    sete %dil
+; X64-NEXT:    negl %edi
+; X64-NEXT:    movl %edi, -{{[0-9]+}}(%rsp)
+; X64-NEXT:    xorl %ecx, %ecx
 ; X64-NEXT:    cmpl {{[0-9]+}}(%rsp), %edx
-; X64-NEXT:    sete %al
-; X64-NEXT:    negl %eax
-; X64-NEXT:    movl %eax, -{{[0-9]+}}(%rsp)
-; X64-NEXT:    xorl %eax, %eax
+; X64-NEXT:    sete %cl
+; X64-NEXT:    negl %ecx
+; X64-NEXT:    movl %ecx, -{{[0-9]+}}(%rsp)
+; X64-NEXT:    xorl %ecx, %ecx
 ; X64-NEXT:    cmpl %r9d, %esi
-; X64-NEXT:    sete %al
-; X64-NEXT:    negl %eax
-; X64-NEXT:    movl %eax, -{{[0-9]+}}(%rsp)
+; X64-NEXT:    sete %cl
+; X64-NEXT:    negl %ecx
+; X64-NEXT:    movl %ecx, -{{[0-9]+}}(%rsp)
 ; X64-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; X64-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
 ; X64-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
@@ -218,8 +219,7 @@ define <4 x i32> @PR30512(<4 x i32> %x,
 ; X64-NEXT:    unpcklps {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
 ; X64-NEXT:    movlhps {{.*#+}} xmm2 = xmm2[0],xmm1[0]
 ; X64-NEXT:    andps {{.*}}(%rip), %xmm2
-; X64-NEXT:    movaps %xmm2, (%rdi)
-; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    movaps %xmm2, (%rax)
 ; X64-NEXT:    retq
   %cmp = icmp eq <4 x i32> %x, %y
   %zext = zext <4 x i1> %cmp to <4 x i32>

Modified: llvm/trunk/test/CodeGen/X86/sse3-intrinsics-x86.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse3-intrinsics-x86.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse3-intrinsics-x86.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse3-intrinsics-x86.ll Wed Sep 19 11:59:08 2018
@@ -143,8 +143,8 @@ define void @monitor(i8* %P, i32 %E, i32
 ;
 ; X64-LABEL: monitor:
 ; X64:       ## %bb.0:
-; X64-NEXT:    leaq (%rdi), %rax ## encoding: [0x48,0x8d,0x07]
 ; X64-NEXT:    movl %esi, %ecx ## encoding: [0x89,0xf1]
+; X64-NEXT:    leaq (%rdi), %rax ## encoding: [0x48,0x8d,0x07]
 ; X64-NEXT:    monitor ## encoding: [0x0f,0x01,0xc8]
 ; X64-NEXT:    retq ## encoding: [0xc3]
   tail call void @llvm.x86.sse3.monitor(i8* %P, i32 %E, i32 %H)
@@ -162,8 +162,8 @@ define void @mwait(i32 %E, i32 %H) nounw
 ;
 ; X64-LABEL: mwait:
 ; X64:       ## %bb.0:
-; X64-NEXT:    movl %edi, %ecx ## encoding: [0x89,0xf9]
 ; X64-NEXT:    movl %esi, %eax ## encoding: [0x89,0xf0]
+; X64-NEXT:    movl %edi, %ecx ## encoding: [0x89,0xf9]
 ; X64-NEXT:    mwait ## encoding: [0x0f,0x01,0xc9]
 ; X64-NEXT:    retq ## encoding: [0xc3]
   tail call void @llvm.x86.sse3.mwait(i32 %E, i32 %H)

Modified: llvm/trunk/test/CodeGen/X86/sse3-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse3-schedule.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse3-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse3-schedule.ll Wed Sep 19 11:59:08 2018
@@ -768,120 +768,120 @@ declare <16 x i8> @llvm.x86.sse3.ldu.dq(
 define void @test_monitor(i8* %a0, i32 %a1, i32 %a2) {
 ; GENERIC-LABEL: test_monitor:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    leaq (%rdi), %rax # sched: [1:0.50]
 ; GENERIC-NEXT:    movl %esi, %ecx # sched: [1:0.33]
+; GENERIC-NEXT:    leaq (%rdi), %rax # sched: [1:0.50]
 ; GENERIC-NEXT:    monitor # sched: [100:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_monitor:
 ; ATOM:       # %bb.0:
-; ATOM-NEXT:    leaq (%rdi), %rax # sched: [1:1.00]
 ; ATOM-NEXT:    movl %esi, %ecx # sched: [1:0.50]
+; ATOM-NEXT:    leaq (%rdi), %rax # sched: [1:1.00]
 ; ATOM-NEXT:    monitor # sched: [45:22.50]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_monitor:
 ; SLM:       # %bb.0:
-; SLM-NEXT:    leaq (%rdi), %rax # sched: [1:1.00]
 ; SLM-NEXT:    movl %esi, %ecx # sched: [1:0.50]
+; SLM-NEXT:    leaq (%rdi), %rax # sched: [1:1.00]
 ; SLM-NEXT:    monitor # sched: [100:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
 ; SANDY-SSE-LABEL: test_monitor:
 ; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    leaq (%rdi), %rax # sched: [1:0.50]
 ; SANDY-SSE-NEXT:    movl %esi, %ecx # sched: [1:0.33]
+; SANDY-SSE-NEXT:    leaq (%rdi), %rax # sched: [1:0.50]
 ; SANDY-SSE-NEXT:    monitor # sched: [100:0.33]
 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
 ;
 ; SANDY-LABEL: test_monitor:
 ; SANDY:       # %bb.0:
-; SANDY-NEXT:    leaq (%rdi), %rax # sched: [1:0.50]
 ; SANDY-NEXT:    movl %esi, %ecx # sched: [1:0.33]
+; SANDY-NEXT:    leaq (%rdi), %rax # sched: [1:0.50]
 ; SANDY-NEXT:    monitor # sched: [100:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-SSE-LABEL: test_monitor:
 ; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    leaq (%rdi), %rax # sched: [1:0.50]
 ; HASWELL-SSE-NEXT:    movl %esi, %ecx # sched: [1:0.25]
+; HASWELL-SSE-NEXT:    leaq (%rdi), %rax # sched: [1:0.50]
 ; HASWELL-SSE-NEXT:    monitor # sched: [100:0.25]
 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
 ;
 ; HASWELL-LABEL: test_monitor:
 ; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    leaq (%rdi), %rax # sched: [1:0.50]
 ; HASWELL-NEXT:    movl %esi, %ecx # sched: [1:0.25]
+; HASWELL-NEXT:    leaq (%rdi), %rax # sched: [1:0.50]
 ; HASWELL-NEXT:    monitor # sched: [100:0.25]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-SSE-LABEL: test_monitor:
 ; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    leaq (%rdi), %rax # sched: [1:0.50]
 ; BROADWELL-SSE-NEXT:    movl %esi, %ecx # sched: [1:0.25]
+; BROADWELL-SSE-NEXT:    leaq (%rdi), %rax # sched: [1:0.50]
 ; BROADWELL-SSE-NEXT:    monitor # sched: [100:0.25]
 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_monitor:
 ; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    leaq (%rdi), %rax # sched: [1:0.50]
 ; BROADWELL-NEXT:    movl %esi, %ecx # sched: [1:0.25]
+; BROADWELL-NEXT:    leaq (%rdi), %rax # sched: [1:0.50]
 ; BROADWELL-NEXT:    monitor # sched: [100:0.25]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-SSE-LABEL: test_monitor:
 ; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    leaq (%rdi), %rax # sched: [1:0.50]
 ; SKYLAKE-SSE-NEXT:    movl %esi, %ecx # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT:    leaq (%rdi), %rax # sched: [1:0.50]
 ; SKYLAKE-SSE-NEXT:    monitor # sched: [100:0.25]
 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_monitor:
 ; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    leaq (%rdi), %rax # sched: [1:0.50]
 ; SKYLAKE-NEXT:    movl %esi, %ecx # sched: [1:0.25]
+; SKYLAKE-NEXT:    leaq (%rdi), %rax # sched: [1:0.50]
 ; SKYLAKE-NEXT:    monitor # sched: [100:0.25]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-SSE-LABEL: test_monitor:
 ; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    leaq (%rdi), %rax # sched: [1:0.50]
 ; SKX-SSE-NEXT:    movl %esi, %ecx # sched: [1:0.25]
+; SKX-SSE-NEXT:    leaq (%rdi), %rax # sched: [1:0.50]
 ; SKX-SSE-NEXT:    monitor # sched: [100:0.25]
 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-LABEL: test_monitor:
 ; SKX:       # %bb.0:
-; SKX-NEXT:    leaq (%rdi), %rax # sched: [1:0.50]
 ; SKX-NEXT:    movl %esi, %ecx # sched: [1:0.25]
+; SKX-NEXT:    leaq (%rdi), %rax # sched: [1:0.50]
 ; SKX-NEXT:    monitor # sched: [100:0.25]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; BTVER2-SSE-LABEL: test_monitor:
 ; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    leaq (%rdi), %rax # sched: [1:0.50]
 ; BTVER2-SSE-NEXT:    movl %esi, %ecx # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    leaq (%rdi), %rax # sched: [1:0.50]
 ; BTVER2-SSE-NEXT:    monitor # sched: [100:0.50]
 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
 ;
 ; BTVER2-LABEL: test_monitor:
 ; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    leaq (%rdi), %rax # sched: [1:0.50]
 ; BTVER2-NEXT:    movl %esi, %ecx # sched: [1:0.50]
+; BTVER2-NEXT:    leaq (%rdi), %rax # sched: [1:0.50]
 ; BTVER2-NEXT:    monitor # sched: [100:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-SSE-LABEL: test_monitor:
 ; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    leaq (%rdi), %rax # sched: [1:0.25]
 ; ZNVER1-SSE-NEXT:    movl %esi, %ecx # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    leaq (%rdi), %rax # sched: [1:0.25]
 ; ZNVER1-SSE-NEXT:    monitor # sched: [100:0.25]
 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
 ;
 ; ZNVER1-LABEL: test_monitor:
 ; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    leaq (%rdi), %rax # sched: [1:0.25]
 ; ZNVER1-NEXT:    movl %esi, %ecx # sched: [1:0.25]
+; ZNVER1-NEXT:    leaq (%rdi), %rax # sched: [1:0.25]
 ; ZNVER1-NEXT:    monitor # sched: [100:0.25]
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   tail call void @llvm.x86.sse3.monitor(i8* %a0, i32 %a1, i32 %a2)
@@ -1273,120 +1273,120 @@ define <4 x float> @test_movsldup(<4 x f
 define void @test_mwait(i32 %a0, i32 %a1) {
 ; GENERIC-LABEL: test_mwait:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    movl %edi, %ecx # sched: [1:0.33]
 ; GENERIC-NEXT:    movl %esi, %eax # sched: [1:0.33]
+; GENERIC-NEXT:    movl %edi, %ecx # sched: [1:0.33]
 ; GENERIC-NEXT:    mwait # sched: [100:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_mwait:
 ; ATOM:       # %bb.0:
-; ATOM-NEXT:    movl %edi, %ecx # sched: [1:0.50]
 ; ATOM-NEXT:    movl %esi, %eax # sched: [1:0.50]
+; ATOM-NEXT:    movl %edi, %ecx # sched: [1:0.50]
 ; ATOM-NEXT:    mwait # sched: [46:23.00]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_mwait:
 ; SLM:       # %bb.0:
-; SLM-NEXT:    movl %edi, %ecx # sched: [1:0.50]
 ; SLM-NEXT:    movl %esi, %eax # sched: [1:0.50]
+; SLM-NEXT:    movl %edi, %ecx # sched: [1:0.50]
 ; SLM-NEXT:    mwait # sched: [100:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
 ; SANDY-SSE-LABEL: test_mwait:
 ; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    movl %edi, %ecx # sched: [1:0.33]
 ; SANDY-SSE-NEXT:    movl %esi, %eax # sched: [1:0.33]
+; SANDY-SSE-NEXT:    movl %edi, %ecx # sched: [1:0.33]
 ; SANDY-SSE-NEXT:    mwait # sched: [100:0.33]
 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
 ;
 ; SANDY-LABEL: test_mwait:
 ; SANDY:       # %bb.0:
-; SANDY-NEXT:    movl %edi, %ecx # sched: [1:0.33]
 ; SANDY-NEXT:    movl %esi, %eax # sched: [1:0.33]
+; SANDY-NEXT:    movl %edi, %ecx # sched: [1:0.33]
 ; SANDY-NEXT:    mwait # sched: [100:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-SSE-LABEL: test_mwait:
 ; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    movl %edi, %ecx # sched: [1:0.25]
 ; HASWELL-SSE-NEXT:    movl %esi, %eax # sched: [1:0.25]
+; HASWELL-SSE-NEXT:    movl %edi, %ecx # sched: [1:0.25]
 ; HASWELL-SSE-NEXT:    mwait # sched: [20:2.50]
 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
 ;
 ; HASWELL-LABEL: test_mwait:
 ; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    movl %edi, %ecx # sched: [1:0.25]
 ; HASWELL-NEXT:    movl %esi, %eax # sched: [1:0.25]
+; HASWELL-NEXT:    movl %edi, %ecx # sched: [1:0.25]
 ; HASWELL-NEXT:    mwait # sched: [20:2.50]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-SSE-LABEL: test_mwait:
 ; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    movl %edi, %ecx # sched: [1:0.25]
 ; BROADWELL-SSE-NEXT:    movl %esi, %eax # sched: [1:0.25]
+; BROADWELL-SSE-NEXT:    movl %edi, %ecx # sched: [1:0.25]
 ; BROADWELL-SSE-NEXT:    mwait # sched: [100:0.25]
 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_mwait:
 ; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    movl %edi, %ecx # sched: [1:0.25]
 ; BROADWELL-NEXT:    movl %esi, %eax # sched: [1:0.25]
+; BROADWELL-NEXT:    movl %edi, %ecx # sched: [1:0.25]
 ; BROADWELL-NEXT:    mwait # sched: [100:0.25]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-SSE-LABEL: test_mwait:
 ; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    movl %edi, %ecx # sched: [1:0.25]
 ; SKYLAKE-SSE-NEXT:    movl %esi, %eax # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT:    movl %edi, %ecx # sched: [1:0.25]
 ; SKYLAKE-SSE-NEXT:    mwait # sched: [20:2.50]
 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_mwait:
 ; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    movl %edi, %ecx # sched: [1:0.25]
 ; SKYLAKE-NEXT:    movl %esi, %eax # sched: [1:0.25]
+; SKYLAKE-NEXT:    movl %edi, %ecx # sched: [1:0.25]
 ; SKYLAKE-NEXT:    mwait # sched: [20:2.50]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-SSE-LABEL: test_mwait:
 ; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    movl %edi, %ecx # sched: [1:0.25]
 ; SKX-SSE-NEXT:    movl %esi, %eax # sched: [1:0.25]
+; SKX-SSE-NEXT:    movl %edi, %ecx # sched: [1:0.25]
 ; SKX-SSE-NEXT:    mwait # sched: [20:2.50]
 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-LABEL: test_mwait:
 ; SKX:       # %bb.0:
-; SKX-NEXT:    movl %edi, %ecx # sched: [1:0.25]
 ; SKX-NEXT:    movl %esi, %eax # sched: [1:0.25]
+; SKX-NEXT:    movl %edi, %ecx # sched: [1:0.25]
 ; SKX-NEXT:    mwait # sched: [20:2.50]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; BTVER2-SSE-LABEL: test_mwait:
 ; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    movl %edi, %ecx # sched: [1:0.50]
 ; BTVER2-SSE-NEXT:    movl %esi, %eax # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    movl %edi, %ecx # sched: [1:0.50]
 ; BTVER2-SSE-NEXT:    mwait # sched: [100:0.50]
 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
 ;
 ; BTVER2-LABEL: test_mwait:
 ; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    movl %edi, %ecx # sched: [1:0.50]
 ; BTVER2-NEXT:    movl %esi, %eax # sched: [1:0.50]
+; BTVER2-NEXT:    movl %edi, %ecx # sched: [1:0.50]
 ; BTVER2-NEXT:    mwait # sched: [100:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-SSE-LABEL: test_mwait:
 ; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    movl %edi, %ecx # sched: [1:0.25]
 ; ZNVER1-SSE-NEXT:    movl %esi, %eax # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    movl %edi, %ecx # sched: [1:0.25]
 ; ZNVER1-SSE-NEXT:    mwait # sched: [100:0.25]
 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
 ;
 ; ZNVER1-LABEL: test_mwait:
 ; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    movl %edi, %ecx # sched: [1:0.25]
 ; ZNVER1-NEXT:    movl %esi, %eax # sched: [1:0.25]
+; ZNVER1-NEXT:    movl %edi, %ecx # sched: [1:0.25]
 ; ZNVER1-NEXT:    mwait # sched: [100:0.25]
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   tail call void @llvm.x86.sse3.mwait(i32 %a0, i32 %a1)

Modified: llvm/trunk/test/CodeGen/X86/sse42-intrinsics-fast-isel-x86_64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse42-intrinsics-fast-isel-x86_64.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse42-intrinsics-fast-isel-x86_64.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse42-intrinsics-fast-isel-x86_64.ll Wed Sep 19 11:59:08 2018
@@ -19,8 +19,8 @@ declare i64 @llvm.x86.sse42.crc32.64.8(i
 define i64 @test_mm_crc64_u64(i64 %a0, i64 %a1) nounwind{
 ; CHECK-LABEL: test_mm_crc64_u64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    crc32q %rsi, %rdi
 ; CHECK-NEXT:    movq %rdi, %rax
+; CHECK-NEXT:    crc32q %rsi, %rax
 ; CHECK-NEXT:    retq
   %res = call i64 @llvm.x86.sse42.crc32.64.64(i64 %a0, i64 %a1)
   ret i64 %res

Modified: llvm/trunk/test/CodeGen/X86/sse42-intrinsics-fast-isel.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse42-intrinsics-fast-isel.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse42-intrinsics-fast-isel.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse42-intrinsics-fast-isel.ll Wed Sep 19 11:59:08 2018
@@ -35,22 +35,22 @@ define i32 @test_mm_cmpestra(<2 x i64> %
 ;
 ; X64-SSE-LABEL: test_mm_cmpestra:
 ; X64-SSE:       # %bb.0:
-; X64-SSE-NEXT:    xorl %r8d, %r8d
-; X64-SSE-NEXT:    movl %edi, %eax
 ; X64-SSE-NEXT:    movl %esi, %edx
+; X64-SSE-NEXT:    movl %edi, %eax
+; X64-SSE-NEXT:    xorl %esi, %esi
 ; X64-SSE-NEXT:    pcmpestri $7, %xmm1, %xmm0
-; X64-SSE-NEXT:    seta %r8b
-; X64-SSE-NEXT:    movl %r8d, %eax
+; X64-SSE-NEXT:    seta %sil
+; X64-SSE-NEXT:    movl %esi, %eax
 ; X64-SSE-NEXT:    retq
 ;
 ; X64-AVX-LABEL: test_mm_cmpestra:
 ; X64-AVX:       # %bb.0:
-; X64-AVX-NEXT:    xorl %r8d, %r8d
-; X64-AVX-NEXT:    movl %edi, %eax
 ; X64-AVX-NEXT:    movl %esi, %edx
+; X64-AVX-NEXT:    movl %edi, %eax
+; X64-AVX-NEXT:    xorl %esi, %esi
 ; X64-AVX-NEXT:    vpcmpestri $7, %xmm1, %xmm0
-; X64-AVX-NEXT:    seta %r8b
-; X64-AVX-NEXT:    movl %r8d, %eax
+; X64-AVX-NEXT:    seta %sil
+; X64-AVX-NEXT:    movl %esi, %eax
 ; X64-AVX-NEXT:    retq
   %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
   %arg2 = bitcast <2 x i64> %a2 to <16 x i8>
@@ -86,22 +86,22 @@ define i32 @test_mm_cmpestrc(<2 x i64> %
 ;
 ; X64-SSE-LABEL: test_mm_cmpestrc:
 ; X64-SSE:       # %bb.0:
-; X64-SSE-NEXT:    xorl %r8d, %r8d
-; X64-SSE-NEXT:    movl %edi, %eax
 ; X64-SSE-NEXT:    movl %esi, %edx
+; X64-SSE-NEXT:    movl %edi, %eax
+; X64-SSE-NEXT:    xorl %esi, %esi
 ; X64-SSE-NEXT:    pcmpestri $7, %xmm1, %xmm0
-; X64-SSE-NEXT:    setb %r8b
-; X64-SSE-NEXT:    movl %r8d, %eax
+; X64-SSE-NEXT:    setb %sil
+; X64-SSE-NEXT:    movl %esi, %eax
 ; X64-SSE-NEXT:    retq
 ;
 ; X64-AVX-LABEL: test_mm_cmpestrc:
 ; X64-AVX:       # %bb.0:
-; X64-AVX-NEXT:    xorl %r8d, %r8d
-; X64-AVX-NEXT:    movl %edi, %eax
 ; X64-AVX-NEXT:    movl %esi, %edx
+; X64-AVX-NEXT:    movl %edi, %eax
+; X64-AVX-NEXT:    xorl %esi, %esi
 ; X64-AVX-NEXT:    vpcmpestri $7, %xmm1, %xmm0
-; X64-AVX-NEXT:    setb %r8b
-; X64-AVX-NEXT:    movl %r8d, %eax
+; X64-AVX-NEXT:    setb %sil
+; X64-AVX-NEXT:    movl %esi, %eax
 ; X64-AVX-NEXT:    retq
   %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
   %arg2 = bitcast <2 x i64> %a2 to <16 x i8>
@@ -129,16 +129,16 @@ define i32 @test_mm_cmpestri(<2 x i64> %
 ;
 ; X64-SSE-LABEL: test_mm_cmpestri:
 ; X64-SSE:       # %bb.0:
-; X64-SSE-NEXT:    movl %edi, %eax
 ; X64-SSE-NEXT:    movl %esi, %edx
+; X64-SSE-NEXT:    movl %edi, %eax
 ; X64-SSE-NEXT:    pcmpestri $7, %xmm1, %xmm0
 ; X64-SSE-NEXT:    movl %ecx, %eax
 ; X64-SSE-NEXT:    retq
 ;
 ; X64-AVX-LABEL: test_mm_cmpestri:
 ; X64-AVX:       # %bb.0:
-; X64-AVX-NEXT:    movl %edi, %eax
 ; X64-AVX-NEXT:    movl %esi, %edx
+; X64-AVX-NEXT:    movl %edi, %eax
 ; X64-AVX-NEXT:    vpcmpestri $7, %xmm1, %xmm0
 ; X64-AVX-NEXT:    movl %ecx, %eax
 ; X64-AVX-NEXT:    retq
@@ -166,15 +166,15 @@ define <2 x i64> @test_mm_cmpestrm(<2 x
 ;
 ; X64-SSE-LABEL: test_mm_cmpestrm:
 ; X64-SSE:       # %bb.0:
-; X64-SSE-NEXT:    movl %edi, %eax
 ; X64-SSE-NEXT:    movl %esi, %edx
+; X64-SSE-NEXT:    movl %edi, %eax
 ; X64-SSE-NEXT:    pcmpestrm $7, %xmm1, %xmm0
 ; X64-SSE-NEXT:    retq
 ;
 ; X64-AVX-LABEL: test_mm_cmpestrm:
 ; X64-AVX:       # %bb.0:
-; X64-AVX-NEXT:    movl %edi, %eax
 ; X64-AVX-NEXT:    movl %esi, %edx
+; X64-AVX-NEXT:    movl %edi, %eax
 ; X64-AVX-NEXT:    vpcmpestrm $7, %xmm1, %xmm0
 ; X64-AVX-NEXT:    retq
   %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
@@ -212,22 +212,22 @@ define i32 @test_mm_cmpestro(<2 x i64> %
 ;
 ; X64-SSE-LABEL: test_mm_cmpestro:
 ; X64-SSE:       # %bb.0:
-; X64-SSE-NEXT:    xorl %r8d, %r8d
-; X64-SSE-NEXT:    movl %edi, %eax
 ; X64-SSE-NEXT:    movl %esi, %edx
+; X64-SSE-NEXT:    movl %edi, %eax
+; X64-SSE-NEXT:    xorl %esi, %esi
 ; X64-SSE-NEXT:    pcmpestri $7, %xmm1, %xmm0
-; X64-SSE-NEXT:    seto %r8b
-; X64-SSE-NEXT:    movl %r8d, %eax
+; X64-SSE-NEXT:    seto %sil
+; X64-SSE-NEXT:    movl %esi, %eax
 ; X64-SSE-NEXT:    retq
 ;
 ; X64-AVX-LABEL: test_mm_cmpestro:
 ; X64-AVX:       # %bb.0:
-; X64-AVX-NEXT:    xorl %r8d, %r8d
-; X64-AVX-NEXT:    movl %edi, %eax
 ; X64-AVX-NEXT:    movl %esi, %edx
+; X64-AVX-NEXT:    movl %edi, %eax
+; X64-AVX-NEXT:    xorl %esi, %esi
 ; X64-AVX-NEXT:    vpcmpestri $7, %xmm1, %xmm0
-; X64-AVX-NEXT:    seto %r8b
-; X64-AVX-NEXT:    movl %r8d, %eax
+; X64-AVX-NEXT:    seto %sil
+; X64-AVX-NEXT:    movl %esi, %eax
 ; X64-AVX-NEXT:    retq
   %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
   %arg2 = bitcast <2 x i64> %a2 to <16 x i8>
@@ -263,22 +263,22 @@ define i32 @test_mm_cmpestrs(<2 x i64> %
 ;
 ; X64-SSE-LABEL: test_mm_cmpestrs:
 ; X64-SSE:       # %bb.0:
-; X64-SSE-NEXT:    xorl %r8d, %r8d
-; X64-SSE-NEXT:    movl %edi, %eax
 ; X64-SSE-NEXT:    movl %esi, %edx
+; X64-SSE-NEXT:    movl %edi, %eax
+; X64-SSE-NEXT:    xorl %esi, %esi
 ; X64-SSE-NEXT:    pcmpestri $7, %xmm1, %xmm0
-; X64-SSE-NEXT:    sets %r8b
-; X64-SSE-NEXT:    movl %r8d, %eax
+; X64-SSE-NEXT:    sets %sil
+; X64-SSE-NEXT:    movl %esi, %eax
 ; X64-SSE-NEXT:    retq
 ;
 ; X64-AVX-LABEL: test_mm_cmpestrs:
 ; X64-AVX:       # %bb.0:
-; X64-AVX-NEXT:    xorl %r8d, %r8d
-; X64-AVX-NEXT:    movl %edi, %eax
 ; X64-AVX-NEXT:    movl %esi, %edx
+; X64-AVX-NEXT:    movl %edi, %eax
+; X64-AVX-NEXT:    xorl %esi, %esi
 ; X64-AVX-NEXT:    vpcmpestri $7, %xmm1, %xmm0
-; X64-AVX-NEXT:    sets %r8b
-; X64-AVX-NEXT:    movl %r8d, %eax
+; X64-AVX-NEXT:    sets %sil
+; X64-AVX-NEXT:    movl %esi, %eax
 ; X64-AVX-NEXT:    retq
   %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
   %arg2 = bitcast <2 x i64> %a2 to <16 x i8>
@@ -314,22 +314,22 @@ define i32 @test_mm_cmpestrz(<2 x i64> %
 ;
 ; X64-SSE-LABEL: test_mm_cmpestrz:
 ; X64-SSE:       # %bb.0:
-; X64-SSE-NEXT:    xorl %r8d, %r8d
-; X64-SSE-NEXT:    movl %edi, %eax
 ; X64-SSE-NEXT:    movl %esi, %edx
+; X64-SSE-NEXT:    movl %edi, %eax
+; X64-SSE-NEXT:    xorl %esi, %esi
 ; X64-SSE-NEXT:    pcmpestri $7, %xmm1, %xmm0
-; X64-SSE-NEXT:    sete %r8b
-; X64-SSE-NEXT:    movl %r8d, %eax
+; X64-SSE-NEXT:    sete %sil
+; X64-SSE-NEXT:    movl %esi, %eax
 ; X64-SSE-NEXT:    retq
 ;
 ; X64-AVX-LABEL: test_mm_cmpestrz:
 ; X64-AVX:       # %bb.0:
-; X64-AVX-NEXT:    xorl %r8d, %r8d
-; X64-AVX-NEXT:    movl %edi, %eax
 ; X64-AVX-NEXT:    movl %esi, %edx
+; X64-AVX-NEXT:    movl %edi, %eax
+; X64-AVX-NEXT:    xorl %esi, %esi
 ; X64-AVX-NEXT:    vpcmpestri $7, %xmm1, %xmm0
-; X64-AVX-NEXT:    sete %r8b
-; X64-AVX-NEXT:    movl %r8d, %eax
+; X64-AVX-NEXT:    sete %sil
+; X64-AVX-NEXT:    movl %esi, %eax
 ; X64-AVX-NEXT:    retq
   %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
   %arg2 = bitcast <2 x i64> %a2 to <16 x i8>
@@ -510,8 +510,8 @@ define i32 @test_mm_crc32_u8(i32 %a0, i8
 ;
 ; X64-LABEL: test_mm_crc32_u8:
 ; X64:       # %bb.0:
-; X64-NEXT:    crc32b %sil, %edi
 ; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    crc32b %sil, %eax
 ; X64-NEXT:    retq
   %res = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a0, i8 %a1)
   ret i32 %res
@@ -527,8 +527,8 @@ define i32 @test_mm_crc32_u16(i32 %a0, i
 ;
 ; X64-LABEL: test_mm_crc32_u16:
 ; X64:       # %bb.0:
-; X64-NEXT:    crc32w %si, %edi
 ; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    crc32w %si, %eax
 ; X64-NEXT:    retq
   %res = call i32 @llvm.x86.sse42.crc32.32.16(i32 %a0, i16 %a1)
   ret i32 %res
@@ -544,8 +544,8 @@ define i32 @test_mm_crc32_u32(i32 %a0, i
 ;
 ; X64-LABEL: test_mm_crc32_u32:
 ; X64:       # %bb.0:
-; X64-NEXT:    crc32l %esi, %edi
 ; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    crc32l %esi, %eax
 ; X64-NEXT:    retq
   %res = call i32 @llvm.x86.sse42.crc32.32.32(i32 %a0, i32 %a1)
   ret i32 %res

Modified: llvm/trunk/test/CodeGen/X86/sse42-intrinsics-x86.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse42-intrinsics-x86.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse42-intrinsics-x86.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse42-intrinsics-x86.ll Wed Sep 19 11:59:08 2018
@@ -626,8 +626,8 @@ define i32 @crc32_32_8(i32 %a, i8 %b) no
 ;
 ; X64-LABEL: crc32_32_8:
 ; X64:       ## %bb.0:
-; X64-NEXT:    crc32b %sil, %edi ## encoding: [0xf2,0x40,0x0f,0x38,0xf0,0xfe]
 ; X64-NEXT:    movl %edi, %eax ## encoding: [0x89,0xf8]
+; X64-NEXT:    crc32b %sil, %eax ## encoding: [0xf2,0x40,0x0f,0x38,0xf0,0xc6]
 ; X64-NEXT:    retq ## encoding: [0xc3]
   %tmp = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a, i8 %b)
   ret i32 %tmp
@@ -643,8 +643,8 @@ define i32 @crc32_32_16(i32 %a, i16 %b)
 ;
 ; X64-LABEL: crc32_32_16:
 ; X64:       ## %bb.0:
-; X64-NEXT:    crc32w %si, %edi ## encoding: [0x66,0xf2,0x0f,0x38,0xf1,0xfe]
 ; X64-NEXT:    movl %edi, %eax ## encoding: [0x89,0xf8]
+; X64-NEXT:    crc32w %si, %eax ## encoding: [0x66,0xf2,0x0f,0x38,0xf1,0xc6]
 ; X64-NEXT:    retq ## encoding: [0xc3]
   %tmp = call i32 @llvm.x86.sse42.crc32.32.16(i32 %a, i16 %b)
   ret i32 %tmp
@@ -660,8 +660,8 @@ define i32 @crc32_32_32(i32 %a, i32 %b)
 ;
 ; X64-LABEL: crc32_32_32:
 ; X64:       ## %bb.0:
-; X64-NEXT:    crc32l %esi, %edi ## encoding: [0xf2,0x0f,0x38,0xf1,0xfe]
 ; X64-NEXT:    movl %edi, %eax ## encoding: [0x89,0xf8]
+; X64-NEXT:    crc32l %esi, %eax ## encoding: [0xf2,0x0f,0x38,0xf1,0xc6]
 ; X64-NEXT:    retq ## encoding: [0xc3]
   %tmp = call i32 @llvm.x86.sse42.crc32.32.32(i32 %a, i32 %b)
   ret i32 %tmp

Modified: llvm/trunk/test/CodeGen/X86/sse42-intrinsics-x86_64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse42-intrinsics-x86_64.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse42-intrinsics-x86_64.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse42-intrinsics-x86_64.ll Wed Sep 19 11:59:08 2018
@@ -9,8 +9,8 @@ declare i64 @llvm.x86.sse42.crc32.64.64(
 define i64 @crc32_64_8(i64 %a, i8 %b) nounwind {
 ; CHECK-LABEL: crc32_64_8:
 ; CHECK:       ## %bb.0:
-; CHECK-NEXT:    crc32b %sil, %edi ## encoding: [0xf2,0x40,0x0f,0x38,0xf0,0xfe]
 ; CHECK-NEXT:    movq %rdi, %rax ## encoding: [0x48,0x89,0xf8]
+; CHECK-NEXT:    crc32b %sil, %eax ## encoding: [0xf2,0x40,0x0f,0x38,0xf0,0xc6]
 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   %tmp = call i64 @llvm.x86.sse42.crc32.64.8(i64 %a, i8 %b)
   ret i64 %tmp
@@ -19,8 +19,8 @@ define i64 @crc32_64_8(i64 %a, i8 %b) no
 define i64 @crc32_64_64(i64 %a, i64 %b) nounwind {
 ; CHECK-LABEL: crc32_64_64:
 ; CHECK:       ## %bb.0:
-; CHECK-NEXT:    crc32q %rsi, %rdi ## encoding: [0xf2,0x48,0x0f,0x38,0xf1,0xfe]
 ; CHECK-NEXT:    movq %rdi, %rax ## encoding: [0x48,0x89,0xf8]
+; CHECK-NEXT:    crc32q %rsi, %rax ## encoding: [0xf2,0x48,0x0f,0x38,0xf1,0xc6]
 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   %tmp = call i64 @llvm.x86.sse42.crc32.64.64(i64 %a, i64 %b)
   ret i64 %tmp

Modified: llvm/trunk/test/CodeGen/X86/sse42-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse42-schedule.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse42-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse42-schedule.ll Wed Sep 19 11:59:08 2018
@@ -21,114 +21,114 @@
 define i32 @crc32_32_8(i32 %a0, i8 %a1, i8 *%a2) {
 ; GENERIC-LABEL: crc32_32_8:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    crc32b %sil, %edi # sched: [3:1.00]
-; GENERIC-NEXT:    crc32b (%rdx), %edi # sched: [8:1.00]
 ; GENERIC-NEXT:    movl %edi, %eax # sched: [1:0.33]
+; GENERIC-NEXT:    crc32b %sil, %eax # sched: [3:1.00]
+; GENERIC-NEXT:    crc32b (%rdx), %eax # sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SLM-LABEL: crc32_32_8:
 ; SLM:       # %bb.0:
-; SLM-NEXT:    crc32b %sil, %edi # sched: [3:1.00]
-; SLM-NEXT:    crc32b (%rdx), %edi # sched: [6:1.00]
 ; SLM-NEXT:    movl %edi, %eax # sched: [1:0.50]
+; SLM-NEXT:    crc32b %sil, %eax # sched: [3:1.00]
+; SLM-NEXT:    crc32b (%rdx), %eax # sched: [6:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
 ; SANDY-SSE-LABEL: crc32_32_8:
 ; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    crc32b %sil, %edi # sched: [3:1.00]
-; SANDY-SSE-NEXT:    crc32b (%rdx), %edi # sched: [8:1.00]
 ; SANDY-SSE-NEXT:    movl %edi, %eax # sched: [1:0.33]
+; SANDY-SSE-NEXT:    crc32b %sil, %eax # sched: [3:1.00]
+; SANDY-SSE-NEXT:    crc32b (%rdx), %eax # sched: [8:1.00]
 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
 ;
 ; SANDY-LABEL: crc32_32_8:
 ; SANDY:       # %bb.0:
-; SANDY-NEXT:    crc32b %sil, %edi # sched: [3:1.00]
-; SANDY-NEXT:    crc32b (%rdx), %edi # sched: [8:1.00]
 ; SANDY-NEXT:    movl %edi, %eax # sched: [1:0.33]
+; SANDY-NEXT:    crc32b %sil, %eax # sched: [3:1.00]
+; SANDY-NEXT:    crc32b (%rdx), %eax # sched: [8:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-SSE-LABEL: crc32_32_8:
 ; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    crc32b %sil, %edi # sched: [3:1.00]
-; HASWELL-SSE-NEXT:    crc32b (%rdx), %edi # sched: [8:1.00]
 ; HASWELL-SSE-NEXT:    movl %edi, %eax # sched: [1:0.25]
+; HASWELL-SSE-NEXT:    crc32b %sil, %eax # sched: [3:1.00]
+; HASWELL-SSE-NEXT:    crc32b (%rdx), %eax # sched: [8:1.00]
 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
 ;
 ; HASWELL-LABEL: crc32_32_8:
 ; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    crc32b %sil, %edi # sched: [3:1.00]
-; HASWELL-NEXT:    crc32b (%rdx), %edi # sched: [8:1.00]
 ; HASWELL-NEXT:    movl %edi, %eax # sched: [1:0.25]
+; HASWELL-NEXT:    crc32b %sil, %eax # sched: [3:1.00]
+; HASWELL-NEXT:    crc32b (%rdx), %eax # sched: [8:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-SSE-LABEL: crc32_32_8:
 ; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    crc32b %sil, %edi # sched: [3:1.00]
-; BROADWELL-SSE-NEXT:    crc32b (%rdx), %edi # sched: [8:1.00]
 ; BROADWELL-SSE-NEXT:    movl %edi, %eax # sched: [1:0.25]
+; BROADWELL-SSE-NEXT:    crc32b %sil, %eax # sched: [3:1.00]
+; BROADWELL-SSE-NEXT:    crc32b (%rdx), %eax # sched: [8:1.00]
 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: crc32_32_8:
 ; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    crc32b %sil, %edi # sched: [3:1.00]
-; BROADWELL-NEXT:    crc32b (%rdx), %edi # sched: [8:1.00]
 ; BROADWELL-NEXT:    movl %edi, %eax # sched: [1:0.25]
+; BROADWELL-NEXT:    crc32b %sil, %eax # sched: [3:1.00]
+; BROADWELL-NEXT:    crc32b (%rdx), %eax # sched: [8:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-SSE-LABEL: crc32_32_8:
 ; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    crc32b %sil, %edi # sched: [3:1.00]
-; SKYLAKE-SSE-NEXT:    crc32b (%rdx), %edi # sched: [8:1.00]
 ; SKYLAKE-SSE-NEXT:    movl %edi, %eax # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT:    crc32b %sil, %eax # sched: [3:1.00]
+; SKYLAKE-SSE-NEXT:    crc32b (%rdx), %eax # sched: [8:1.00]
 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: crc32_32_8:
 ; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    crc32b %sil, %edi # sched: [3:1.00]
-; SKYLAKE-NEXT:    crc32b (%rdx), %edi # sched: [8:1.00]
 ; SKYLAKE-NEXT:    movl %edi, %eax # sched: [1:0.25]
+; SKYLAKE-NEXT:    crc32b %sil, %eax # sched: [3:1.00]
+; SKYLAKE-NEXT:    crc32b (%rdx), %eax # sched: [8:1.00]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-SSE-LABEL: crc32_32_8:
 ; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    crc32b %sil, %edi # sched: [3:1.00]
-; SKX-SSE-NEXT:    crc32b (%rdx), %edi # sched: [8:1.00]
 ; SKX-SSE-NEXT:    movl %edi, %eax # sched: [1:0.25]
+; SKX-SSE-NEXT:    crc32b %sil, %eax # sched: [3:1.00]
+; SKX-SSE-NEXT:    crc32b (%rdx), %eax # sched: [8:1.00]
 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-LABEL: crc32_32_8:
 ; SKX:       # %bb.0:
-; SKX-NEXT:    crc32b %sil, %edi # sched: [3:1.00]
-; SKX-NEXT:    crc32b (%rdx), %edi # sched: [8:1.00]
 ; SKX-NEXT:    movl %edi, %eax # sched: [1:0.25]
+; SKX-NEXT:    crc32b %sil, %eax # sched: [3:1.00]
+; SKX-NEXT:    crc32b (%rdx), %eax # sched: [8:1.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; BTVER2-SSE-LABEL: crc32_32_8:
 ; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    crc32b %sil, %edi # sched: [3:2.00]
-; BTVER2-SSE-NEXT:    crc32b (%rdx), %edi # sched: [6:2.00]
 ; BTVER2-SSE-NEXT:    movl %edi, %eax # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    crc32b %sil, %eax # sched: [3:2.00]
+; BTVER2-SSE-NEXT:    crc32b (%rdx), %eax # sched: [6:2.00]
 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
 ;
 ; BTVER2-LABEL: crc32_32_8:
 ; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    crc32b %sil, %edi # sched: [3:2.00]
-; BTVER2-NEXT:    crc32b (%rdx), %edi # sched: [6:2.00]
 ; BTVER2-NEXT:    movl %edi, %eax # sched: [1:0.50]
+; BTVER2-NEXT:    crc32b %sil, %eax # sched: [3:2.00]
+; BTVER2-NEXT:    crc32b (%rdx), %eax # sched: [6:2.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-SSE-LABEL: crc32_32_8:
 ; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    crc32b %sil, %edi # sched: [3:1.00]
-; ZNVER1-SSE-NEXT:    crc32b (%rdx), %edi # sched: [10:1.00]
 ; ZNVER1-SSE-NEXT:    movl %edi, %eax # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    crc32b %sil, %eax # sched: [3:1.00]
+; ZNVER1-SSE-NEXT:    crc32b (%rdx), %eax # sched: [10:1.00]
 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
 ;
 ; ZNVER1-LABEL: crc32_32_8:
 ; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    crc32b %sil, %edi # sched: [3:1.00]
-; ZNVER1-NEXT:    crc32b (%rdx), %edi # sched: [10:1.00]
 ; ZNVER1-NEXT:    movl %edi, %eax # sched: [1:0.25]
+; ZNVER1-NEXT:    crc32b %sil, %eax # sched: [3:1.00]
+; ZNVER1-NEXT:    crc32b (%rdx), %eax # sched: [10:1.00]
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   %1 = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a0, i8 %a1)
   %2 = load i8, i8 *%a2
@@ -140,114 +140,114 @@ declare i32 @llvm.x86.sse42.crc32.32.8(i
 define i32 @crc32_32_16(i32 %a0, i16 %a1, i16 *%a2) {
 ; GENERIC-LABEL: crc32_32_16:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    crc32w %si, %edi # sched: [3:1.00]
-; GENERIC-NEXT:    crc32w (%rdx), %edi # sched: [8:1.00]
 ; GENERIC-NEXT:    movl %edi, %eax # sched: [1:0.33]
+; GENERIC-NEXT:    crc32w %si, %eax # sched: [3:1.00]
+; GENERIC-NEXT:    crc32w (%rdx), %eax # sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SLM-LABEL: crc32_32_16:
 ; SLM:       # %bb.0:
-; SLM-NEXT:    crc32w %si, %edi # sched: [3:1.00]
-; SLM-NEXT:    crc32w (%rdx), %edi # sched: [6:1.00]
 ; SLM-NEXT:    movl %edi, %eax # sched: [1:0.50]
+; SLM-NEXT:    crc32w %si, %eax # sched: [3:1.00]
+; SLM-NEXT:    crc32w (%rdx), %eax # sched: [6:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
 ; SANDY-SSE-LABEL: crc32_32_16:
 ; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    crc32w %si, %edi # sched: [3:1.00]
-; SANDY-SSE-NEXT:    crc32w (%rdx), %edi # sched: [8:1.00]
 ; SANDY-SSE-NEXT:    movl %edi, %eax # sched: [1:0.33]
+; SANDY-SSE-NEXT:    crc32w %si, %eax # sched: [3:1.00]
+; SANDY-SSE-NEXT:    crc32w (%rdx), %eax # sched: [8:1.00]
 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
 ;
 ; SANDY-LABEL: crc32_32_16:
 ; SANDY:       # %bb.0:
-; SANDY-NEXT:    crc32w %si, %edi # sched: [3:1.00]
-; SANDY-NEXT:    crc32w (%rdx), %edi # sched: [8:1.00]
 ; SANDY-NEXT:    movl %edi, %eax # sched: [1:0.33]
+; SANDY-NEXT:    crc32w %si, %eax # sched: [3:1.00]
+; SANDY-NEXT:    crc32w (%rdx), %eax # sched: [8:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-SSE-LABEL: crc32_32_16:
 ; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    crc32w %si, %edi # sched: [3:1.00]
-; HASWELL-SSE-NEXT:    crc32w (%rdx), %edi # sched: [8:1.00]
 ; HASWELL-SSE-NEXT:    movl %edi, %eax # sched: [1:0.25]
+; HASWELL-SSE-NEXT:    crc32w %si, %eax # sched: [3:1.00]
+; HASWELL-SSE-NEXT:    crc32w (%rdx), %eax # sched: [8:1.00]
 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
 ;
 ; HASWELL-LABEL: crc32_32_16:
 ; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    crc32w %si, %edi # sched: [3:1.00]
-; HASWELL-NEXT:    crc32w (%rdx), %edi # sched: [8:1.00]
 ; HASWELL-NEXT:    movl %edi, %eax # sched: [1:0.25]
+; HASWELL-NEXT:    crc32w %si, %eax # sched: [3:1.00]
+; HASWELL-NEXT:    crc32w (%rdx), %eax # sched: [8:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-SSE-LABEL: crc32_32_16:
 ; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    crc32w %si, %edi # sched: [3:1.00]
-; BROADWELL-SSE-NEXT:    crc32w (%rdx), %edi # sched: [8:1.00]
 ; BROADWELL-SSE-NEXT:    movl %edi, %eax # sched: [1:0.25]
+; BROADWELL-SSE-NEXT:    crc32w %si, %eax # sched: [3:1.00]
+; BROADWELL-SSE-NEXT:    crc32w (%rdx), %eax # sched: [8:1.00]
 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: crc32_32_16:
 ; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    crc32w %si, %edi # sched: [3:1.00]
-; BROADWELL-NEXT:    crc32w (%rdx), %edi # sched: [8:1.00]
 ; BROADWELL-NEXT:    movl %edi, %eax # sched: [1:0.25]
+; BROADWELL-NEXT:    crc32w %si, %eax # sched: [3:1.00]
+; BROADWELL-NEXT:    crc32w (%rdx), %eax # sched: [8:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-SSE-LABEL: crc32_32_16:
 ; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    crc32w %si, %edi # sched: [3:1.00]
-; SKYLAKE-SSE-NEXT:    crc32w (%rdx), %edi # sched: [8:1.00]
 ; SKYLAKE-SSE-NEXT:    movl %edi, %eax # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT:    crc32w %si, %eax # sched: [3:1.00]
+; SKYLAKE-SSE-NEXT:    crc32w (%rdx), %eax # sched: [8:1.00]
 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: crc32_32_16:
 ; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    crc32w %si, %edi # sched: [3:1.00]
-; SKYLAKE-NEXT:    crc32w (%rdx), %edi # sched: [8:1.00]
 ; SKYLAKE-NEXT:    movl %edi, %eax # sched: [1:0.25]
+; SKYLAKE-NEXT:    crc32w %si, %eax # sched: [3:1.00]
+; SKYLAKE-NEXT:    crc32w (%rdx), %eax # sched: [8:1.00]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-SSE-LABEL: crc32_32_16:
 ; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    crc32w %si, %edi # sched: [3:1.00]
-; SKX-SSE-NEXT:    crc32w (%rdx), %edi # sched: [8:1.00]
 ; SKX-SSE-NEXT:    movl %edi, %eax # sched: [1:0.25]
+; SKX-SSE-NEXT:    crc32w %si, %eax # sched: [3:1.00]
+; SKX-SSE-NEXT:    crc32w (%rdx), %eax # sched: [8:1.00]
 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-LABEL: crc32_32_16:
 ; SKX:       # %bb.0:
-; SKX-NEXT:    crc32w %si, %edi # sched: [3:1.00]
-; SKX-NEXT:    crc32w (%rdx), %edi # sched: [8:1.00]
 ; SKX-NEXT:    movl %edi, %eax # sched: [1:0.25]
+; SKX-NEXT:    crc32w %si, %eax # sched: [3:1.00]
+; SKX-NEXT:    crc32w (%rdx), %eax # sched: [8:1.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; BTVER2-SSE-LABEL: crc32_32_16:
 ; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    crc32w %si, %edi # sched: [3:2.00]
-; BTVER2-SSE-NEXT:    crc32w (%rdx), %edi # sched: [6:2.00]
 ; BTVER2-SSE-NEXT:    movl %edi, %eax # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    crc32w %si, %eax # sched: [3:2.00]
+; BTVER2-SSE-NEXT:    crc32w (%rdx), %eax # sched: [6:2.00]
 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
 ;
 ; BTVER2-LABEL: crc32_32_16:
 ; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    crc32w %si, %edi # sched: [3:2.00]
-; BTVER2-NEXT:    crc32w (%rdx), %edi # sched: [6:2.00]
 ; BTVER2-NEXT:    movl %edi, %eax # sched: [1:0.50]
+; BTVER2-NEXT:    crc32w %si, %eax # sched: [3:2.00]
+; BTVER2-NEXT:    crc32w (%rdx), %eax # sched: [6:2.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-SSE-LABEL: crc32_32_16:
 ; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    crc32w %si, %edi # sched: [3:1.00]
-; ZNVER1-SSE-NEXT:    crc32w (%rdx), %edi # sched: [10:1.00]
 ; ZNVER1-SSE-NEXT:    movl %edi, %eax # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    crc32w %si, %eax # sched: [3:1.00]
+; ZNVER1-SSE-NEXT:    crc32w (%rdx), %eax # sched: [10:1.00]
 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
 ;
 ; ZNVER1-LABEL: crc32_32_16:
 ; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    crc32w %si, %edi # sched: [3:1.00]
-; ZNVER1-NEXT:    crc32w (%rdx), %edi # sched: [10:1.00]
 ; ZNVER1-NEXT:    movl %edi, %eax # sched: [1:0.25]
+; ZNVER1-NEXT:    crc32w %si, %eax # sched: [3:1.00]
+; ZNVER1-NEXT:    crc32w (%rdx), %eax # sched: [10:1.00]
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   %1 = call i32 @llvm.x86.sse42.crc32.32.16(i32 %a0, i16 %a1)
   %2 = load i16, i16 *%a2
@@ -259,114 +259,114 @@ declare i32 @llvm.x86.sse42.crc32.32.16(
 define i32 @crc32_32_32(i32 %a0, i32 %a1, i32 *%a2) {
 ; GENERIC-LABEL: crc32_32_32:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    crc32l %esi, %edi # sched: [3:1.00]
-; GENERIC-NEXT:    crc32l (%rdx), %edi # sched: [8:1.00]
 ; GENERIC-NEXT:    movl %edi, %eax # sched: [1:0.33]
+; GENERIC-NEXT:    crc32l %esi, %eax # sched: [3:1.00]
+; GENERIC-NEXT:    crc32l (%rdx), %eax # sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SLM-LABEL: crc32_32_32:
 ; SLM:       # %bb.0:
-; SLM-NEXT:    crc32l %esi, %edi # sched: [3:1.00]
-; SLM-NEXT:    crc32l (%rdx), %edi # sched: [6:1.00]
 ; SLM-NEXT:    movl %edi, %eax # sched: [1:0.50]
+; SLM-NEXT:    crc32l %esi, %eax # sched: [3:1.00]
+; SLM-NEXT:    crc32l (%rdx), %eax # sched: [6:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
 ; SANDY-SSE-LABEL: crc32_32_32:
 ; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    crc32l %esi, %edi # sched: [3:1.00]
-; SANDY-SSE-NEXT:    crc32l (%rdx), %edi # sched: [8:1.00]
 ; SANDY-SSE-NEXT:    movl %edi, %eax # sched: [1:0.33]
+; SANDY-SSE-NEXT:    crc32l %esi, %eax # sched: [3:1.00]
+; SANDY-SSE-NEXT:    crc32l (%rdx), %eax # sched: [8:1.00]
 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
 ;
 ; SANDY-LABEL: crc32_32_32:
 ; SANDY:       # %bb.0:
-; SANDY-NEXT:    crc32l %esi, %edi # sched: [3:1.00]
-; SANDY-NEXT:    crc32l (%rdx), %edi # sched: [8:1.00]
 ; SANDY-NEXT:    movl %edi, %eax # sched: [1:0.33]
+; SANDY-NEXT:    crc32l %esi, %eax # sched: [3:1.00]
+; SANDY-NEXT:    crc32l (%rdx), %eax # sched: [8:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-SSE-LABEL: crc32_32_32:
 ; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    crc32l %esi, %edi # sched: [3:1.00]
-; HASWELL-SSE-NEXT:    crc32l (%rdx), %edi # sched: [8:1.00]
 ; HASWELL-SSE-NEXT:    movl %edi, %eax # sched: [1:0.25]
+; HASWELL-SSE-NEXT:    crc32l %esi, %eax # sched: [3:1.00]
+; HASWELL-SSE-NEXT:    crc32l (%rdx), %eax # sched: [8:1.00]
 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
 ;
 ; HASWELL-LABEL: crc32_32_32:
 ; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    crc32l %esi, %edi # sched: [3:1.00]
-; HASWELL-NEXT:    crc32l (%rdx), %edi # sched: [8:1.00]
 ; HASWELL-NEXT:    movl %edi, %eax # sched: [1:0.25]
+; HASWELL-NEXT:    crc32l %esi, %eax # sched: [3:1.00]
+; HASWELL-NEXT:    crc32l (%rdx), %eax # sched: [8:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-SSE-LABEL: crc32_32_32:
 ; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    crc32l %esi, %edi # sched: [3:1.00]
-; BROADWELL-SSE-NEXT:    crc32l (%rdx), %edi # sched: [8:1.00]
 ; BROADWELL-SSE-NEXT:    movl %edi, %eax # sched: [1:0.25]
+; BROADWELL-SSE-NEXT:    crc32l %esi, %eax # sched: [3:1.00]
+; BROADWELL-SSE-NEXT:    crc32l (%rdx), %eax # sched: [8:1.00]
 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: crc32_32_32:
 ; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    crc32l %esi, %edi # sched: [3:1.00]
-; BROADWELL-NEXT:    crc32l (%rdx), %edi # sched: [8:1.00]
 ; BROADWELL-NEXT:    movl %edi, %eax # sched: [1:0.25]
+; BROADWELL-NEXT:    crc32l %esi, %eax # sched: [3:1.00]
+; BROADWELL-NEXT:    crc32l (%rdx), %eax # sched: [8:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-SSE-LABEL: crc32_32_32:
 ; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    crc32l %esi, %edi # sched: [3:1.00]
-; SKYLAKE-SSE-NEXT:    crc32l (%rdx), %edi # sched: [8:1.00]
 ; SKYLAKE-SSE-NEXT:    movl %edi, %eax # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT:    crc32l %esi, %eax # sched: [3:1.00]
+; SKYLAKE-SSE-NEXT:    crc32l (%rdx), %eax # sched: [8:1.00]
 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: crc32_32_32:
 ; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    crc32l %esi, %edi # sched: [3:1.00]
-; SKYLAKE-NEXT:    crc32l (%rdx), %edi # sched: [8:1.00]
 ; SKYLAKE-NEXT:    movl %edi, %eax # sched: [1:0.25]
+; SKYLAKE-NEXT:    crc32l %esi, %eax # sched: [3:1.00]
+; SKYLAKE-NEXT:    crc32l (%rdx), %eax # sched: [8:1.00]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-SSE-LABEL: crc32_32_32:
 ; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    crc32l %esi, %edi # sched: [3:1.00]
-; SKX-SSE-NEXT:    crc32l (%rdx), %edi # sched: [8:1.00]
 ; SKX-SSE-NEXT:    movl %edi, %eax # sched: [1:0.25]
+; SKX-SSE-NEXT:    crc32l %esi, %eax # sched: [3:1.00]
+; SKX-SSE-NEXT:    crc32l (%rdx), %eax # sched: [8:1.00]
 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-LABEL: crc32_32_32:
 ; SKX:       # %bb.0:
-; SKX-NEXT:    crc32l %esi, %edi # sched: [3:1.00]
-; SKX-NEXT:    crc32l (%rdx), %edi # sched: [8:1.00]
 ; SKX-NEXT:    movl %edi, %eax # sched: [1:0.25]
+; SKX-NEXT:    crc32l %esi, %eax # sched: [3:1.00]
+; SKX-NEXT:    crc32l (%rdx), %eax # sched: [8:1.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; BTVER2-SSE-LABEL: crc32_32_32:
 ; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    crc32l %esi, %edi # sched: [3:2.00]
-; BTVER2-SSE-NEXT:    crc32l (%rdx), %edi # sched: [6:2.00]
 ; BTVER2-SSE-NEXT:    movl %edi, %eax # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    crc32l %esi, %eax # sched: [3:2.00]
+; BTVER2-SSE-NEXT:    crc32l (%rdx), %eax # sched: [6:2.00]
 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
 ;
 ; BTVER2-LABEL: crc32_32_32:
 ; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    crc32l %esi, %edi # sched: [3:2.00]
-; BTVER2-NEXT:    crc32l (%rdx), %edi # sched: [6:2.00]
 ; BTVER2-NEXT:    movl %edi, %eax # sched: [1:0.50]
+; BTVER2-NEXT:    crc32l %esi, %eax # sched: [3:2.00]
+; BTVER2-NEXT:    crc32l (%rdx), %eax # sched: [6:2.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-SSE-LABEL: crc32_32_32:
 ; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    crc32l %esi, %edi # sched: [3:1.00]
-; ZNVER1-SSE-NEXT:    crc32l (%rdx), %edi # sched: [10:1.00]
 ; ZNVER1-SSE-NEXT:    movl %edi, %eax # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    crc32l %esi, %eax # sched: [3:1.00]
+; ZNVER1-SSE-NEXT:    crc32l (%rdx), %eax # sched: [10:1.00]
 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
 ;
 ; ZNVER1-LABEL: crc32_32_32:
 ; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    crc32l %esi, %edi # sched: [3:1.00]
-; ZNVER1-NEXT:    crc32l (%rdx), %edi # sched: [10:1.00]
 ; ZNVER1-NEXT:    movl %edi, %eax # sched: [1:0.25]
+; ZNVER1-NEXT:    crc32l %esi, %eax # sched: [3:1.00]
+; ZNVER1-NEXT:    crc32l (%rdx), %eax # sched: [10:1.00]
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   %1 = call i32 @llvm.x86.sse42.crc32.32.32(i32 %a0, i32 %a1)
   %2 = load i32, i32 *%a2
@@ -378,114 +378,114 @@ declare i32 @llvm.x86.sse42.crc32.32.32(
 define i64 @crc32_64_8(i64 %a0, i8 %a1, i8 *%a2) nounwind {
 ; GENERIC-LABEL: crc32_64_8:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    crc32b %sil, %edi # sched: [3:1.00]
-; GENERIC-NEXT:    crc32b (%rdx), %edi # sched: [8:1.00]
 ; GENERIC-NEXT:    movq %rdi, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    crc32b %sil, %eax # sched: [3:1.00]
+; GENERIC-NEXT:    crc32b (%rdx), %eax # sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SLM-LABEL: crc32_64_8:
 ; SLM:       # %bb.0:
-; SLM-NEXT:    crc32b %sil, %edi # sched: [3:1.00]
-; SLM-NEXT:    crc32b (%rdx), %edi # sched: [6:1.00]
 ; SLM-NEXT:    movq %rdi, %rax # sched: [1:0.50]
+; SLM-NEXT:    crc32b %sil, %eax # sched: [3:1.00]
+; SLM-NEXT:    crc32b (%rdx), %eax # sched: [6:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
 ; SANDY-SSE-LABEL: crc32_64_8:
 ; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    crc32b %sil, %edi # sched: [3:1.00]
-; SANDY-SSE-NEXT:    crc32b (%rdx), %edi # sched: [8:1.00]
 ; SANDY-SSE-NEXT:    movq %rdi, %rax # sched: [1:0.33]
+; SANDY-SSE-NEXT:    crc32b %sil, %eax # sched: [3:1.00]
+; SANDY-SSE-NEXT:    crc32b (%rdx), %eax # sched: [8:1.00]
 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
 ;
 ; SANDY-LABEL: crc32_64_8:
 ; SANDY:       # %bb.0:
-; SANDY-NEXT:    crc32b %sil, %edi # sched: [3:1.00]
-; SANDY-NEXT:    crc32b (%rdx), %edi # sched: [8:1.00]
 ; SANDY-NEXT:    movq %rdi, %rax # sched: [1:0.33]
+; SANDY-NEXT:    crc32b %sil, %eax # sched: [3:1.00]
+; SANDY-NEXT:    crc32b (%rdx), %eax # sched: [8:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-SSE-LABEL: crc32_64_8:
 ; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    crc32b %sil, %edi # sched: [3:1.00]
-; HASWELL-SSE-NEXT:    crc32b (%rdx), %edi # sched: [8:1.00]
 ; HASWELL-SSE-NEXT:    movq %rdi, %rax # sched: [1:0.25]
+; HASWELL-SSE-NEXT:    crc32b %sil, %eax # sched: [3:1.00]
+; HASWELL-SSE-NEXT:    crc32b (%rdx), %eax # sched: [8:1.00]
 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
 ;
 ; HASWELL-LABEL: crc32_64_8:
 ; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    crc32b %sil, %edi # sched: [3:1.00]
-; HASWELL-NEXT:    crc32b (%rdx), %edi # sched: [8:1.00]
 ; HASWELL-NEXT:    movq %rdi, %rax # sched: [1:0.25]
+; HASWELL-NEXT:    crc32b %sil, %eax # sched: [3:1.00]
+; HASWELL-NEXT:    crc32b (%rdx), %eax # sched: [8:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-SSE-LABEL: crc32_64_8:
 ; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    crc32b %sil, %edi # sched: [3:1.00]
-; BROADWELL-SSE-NEXT:    crc32b (%rdx), %edi # sched: [8:1.00]
 ; BROADWELL-SSE-NEXT:    movq %rdi, %rax # sched: [1:0.25]
+; BROADWELL-SSE-NEXT:    crc32b %sil, %eax # sched: [3:1.00]
+; BROADWELL-SSE-NEXT:    crc32b (%rdx), %eax # sched: [8:1.00]
 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: crc32_64_8:
 ; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    crc32b %sil, %edi # sched: [3:1.00]
-; BROADWELL-NEXT:    crc32b (%rdx), %edi # sched: [8:1.00]
 ; BROADWELL-NEXT:    movq %rdi, %rax # sched: [1:0.25]
+; BROADWELL-NEXT:    crc32b %sil, %eax # sched: [3:1.00]
+; BROADWELL-NEXT:    crc32b (%rdx), %eax # sched: [8:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-SSE-LABEL: crc32_64_8:
 ; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    crc32b %sil, %edi # sched: [3:1.00]
-; SKYLAKE-SSE-NEXT:    crc32b (%rdx), %edi # sched: [8:1.00]
 ; SKYLAKE-SSE-NEXT:    movq %rdi, %rax # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT:    crc32b %sil, %eax # sched: [3:1.00]
+; SKYLAKE-SSE-NEXT:    crc32b (%rdx), %eax # sched: [8:1.00]
 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: crc32_64_8:
 ; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    crc32b %sil, %edi # sched: [3:1.00]
-; SKYLAKE-NEXT:    crc32b (%rdx), %edi # sched: [8:1.00]
 ; SKYLAKE-NEXT:    movq %rdi, %rax # sched: [1:0.25]
+; SKYLAKE-NEXT:    crc32b %sil, %eax # sched: [3:1.00]
+; SKYLAKE-NEXT:    crc32b (%rdx), %eax # sched: [8:1.00]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-SSE-LABEL: crc32_64_8:
 ; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    crc32b %sil, %edi # sched: [3:1.00]
-; SKX-SSE-NEXT:    crc32b (%rdx), %edi # sched: [8:1.00]
 ; SKX-SSE-NEXT:    movq %rdi, %rax # sched: [1:0.25]
+; SKX-SSE-NEXT:    crc32b %sil, %eax # sched: [3:1.00]
+; SKX-SSE-NEXT:    crc32b (%rdx), %eax # sched: [8:1.00]
 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-LABEL: crc32_64_8:
 ; SKX:       # %bb.0:
-; SKX-NEXT:    crc32b %sil, %edi # sched: [3:1.00]
-; SKX-NEXT:    crc32b (%rdx), %edi # sched: [8:1.00]
 ; SKX-NEXT:    movq %rdi, %rax # sched: [1:0.25]
+; SKX-NEXT:    crc32b %sil, %eax # sched: [3:1.00]
+; SKX-NEXT:    crc32b (%rdx), %eax # sched: [8:1.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; BTVER2-SSE-LABEL: crc32_64_8:
 ; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    crc32b %sil, %edi # sched: [3:2.00]
-; BTVER2-SSE-NEXT:    crc32b (%rdx), %edi # sched: [6:2.00]
 ; BTVER2-SSE-NEXT:    movq %rdi, %rax # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    crc32b %sil, %eax # sched: [3:2.00]
+; BTVER2-SSE-NEXT:    crc32b (%rdx), %eax # sched: [6:2.00]
 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
 ;
 ; BTVER2-LABEL: crc32_64_8:
 ; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    crc32b %sil, %edi # sched: [3:2.00]
-; BTVER2-NEXT:    crc32b (%rdx), %edi # sched: [6:2.00]
 ; BTVER2-NEXT:    movq %rdi, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    crc32b %sil, %eax # sched: [3:2.00]
+; BTVER2-NEXT:    crc32b (%rdx), %eax # sched: [6:2.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-SSE-LABEL: crc32_64_8:
 ; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    crc32b %sil, %edi # sched: [3:1.00]
-; ZNVER1-SSE-NEXT:    crc32b (%rdx), %edi # sched: [10:1.00]
 ; ZNVER1-SSE-NEXT:    movq %rdi, %rax # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    crc32b %sil, %eax # sched: [3:1.00]
+; ZNVER1-SSE-NEXT:    crc32b (%rdx), %eax # sched: [10:1.00]
 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
 ;
 ; ZNVER1-LABEL: crc32_64_8:
 ; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    crc32b %sil, %edi # sched: [3:1.00]
-; ZNVER1-NEXT:    crc32b (%rdx), %edi # sched: [10:1.00]
 ; ZNVER1-NEXT:    movq %rdi, %rax # sched: [1:0.25]
+; ZNVER1-NEXT:    crc32b %sil, %eax # sched: [3:1.00]
+; ZNVER1-NEXT:    crc32b (%rdx), %eax # sched: [10:1.00]
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   %1 = call i64 @llvm.x86.sse42.crc32.64.8(i64 %a0, i8 %a1)
   %2 = load i8, i8 *%a2
@@ -497,114 +497,114 @@ declare i64 @llvm.x86.sse42.crc32.64.8(i
 define i64 @crc32_64_64(i64 %a0, i64 %a1, i64 *%a2) {
 ; GENERIC-LABEL: crc32_64_64:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    crc32q %rsi, %rdi # sched: [3:1.00]
-; GENERIC-NEXT:    crc32q (%rdx), %rdi # sched: [8:1.00]
 ; GENERIC-NEXT:    movq %rdi, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    crc32q %rsi, %rax # sched: [3:1.00]
+; GENERIC-NEXT:    crc32q (%rdx), %rax # sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SLM-LABEL: crc32_64_64:
 ; SLM:       # %bb.0:
-; SLM-NEXT:    crc32q %rsi, %rdi # sched: [3:1.00]
-; SLM-NEXT:    crc32q (%rdx), %rdi # sched: [6:1.00]
 ; SLM-NEXT:    movq %rdi, %rax # sched: [1:0.50]
+; SLM-NEXT:    crc32q %rsi, %rax # sched: [3:1.00]
+; SLM-NEXT:    crc32q (%rdx), %rax # sched: [6:1.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
 ; SANDY-SSE-LABEL: crc32_64_64:
 ; SANDY-SSE:       # %bb.0:
-; SANDY-SSE-NEXT:    crc32q %rsi, %rdi # sched: [3:1.00]
-; SANDY-SSE-NEXT:    crc32q (%rdx), %rdi # sched: [8:1.00]
 ; SANDY-SSE-NEXT:    movq %rdi, %rax # sched: [1:0.33]
+; SANDY-SSE-NEXT:    crc32q %rsi, %rax # sched: [3:1.00]
+; SANDY-SSE-NEXT:    crc32q (%rdx), %rax # sched: [8:1.00]
 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
 ;
 ; SANDY-LABEL: crc32_64_64:
 ; SANDY:       # %bb.0:
-; SANDY-NEXT:    crc32q %rsi, %rdi # sched: [3:1.00]
-; SANDY-NEXT:    crc32q (%rdx), %rdi # sched: [8:1.00]
 ; SANDY-NEXT:    movq %rdi, %rax # sched: [1:0.33]
+; SANDY-NEXT:    crc32q %rsi, %rax # sched: [3:1.00]
+; SANDY-NEXT:    crc32q (%rdx), %rax # sched: [8:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-SSE-LABEL: crc32_64_64:
 ; HASWELL-SSE:       # %bb.0:
-; HASWELL-SSE-NEXT:    crc32q %rsi, %rdi # sched: [3:1.00]
-; HASWELL-SSE-NEXT:    crc32q (%rdx), %rdi # sched: [8:1.00]
 ; HASWELL-SSE-NEXT:    movq %rdi, %rax # sched: [1:0.25]
+; HASWELL-SSE-NEXT:    crc32q %rsi, %rax # sched: [3:1.00]
+; HASWELL-SSE-NEXT:    crc32q (%rdx), %rax # sched: [8:1.00]
 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
 ;
 ; HASWELL-LABEL: crc32_64_64:
 ; HASWELL:       # %bb.0:
-; HASWELL-NEXT:    crc32q %rsi, %rdi # sched: [3:1.00]
-; HASWELL-NEXT:    crc32q (%rdx), %rdi # sched: [8:1.00]
 ; HASWELL-NEXT:    movq %rdi, %rax # sched: [1:0.25]
+; HASWELL-NEXT:    crc32q %rsi, %rax # sched: [3:1.00]
+; HASWELL-NEXT:    crc32q (%rdx), %rax # sched: [8:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-SSE-LABEL: crc32_64_64:
 ; BROADWELL-SSE:       # %bb.0:
-; BROADWELL-SSE-NEXT:    crc32q %rsi, %rdi # sched: [3:1.00]
-; BROADWELL-SSE-NEXT:    crc32q (%rdx), %rdi # sched: [8:1.00]
 ; BROADWELL-SSE-NEXT:    movq %rdi, %rax # sched: [1:0.25]
+; BROADWELL-SSE-NEXT:    crc32q %rsi, %rax # sched: [3:1.00]
+; BROADWELL-SSE-NEXT:    crc32q (%rdx), %rax # sched: [8:1.00]
 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: crc32_64_64:
 ; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    crc32q %rsi, %rdi # sched: [3:1.00]
-; BROADWELL-NEXT:    crc32q (%rdx), %rdi # sched: [8:1.00]
 ; BROADWELL-NEXT:    movq %rdi, %rax # sched: [1:0.25]
+; BROADWELL-NEXT:    crc32q %rsi, %rax # sched: [3:1.00]
+; BROADWELL-NEXT:    crc32q (%rdx), %rax # sched: [8:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-SSE-LABEL: crc32_64_64:
 ; SKYLAKE-SSE:       # %bb.0:
-; SKYLAKE-SSE-NEXT:    crc32q %rsi, %rdi # sched: [3:1.00]
-; SKYLAKE-SSE-NEXT:    crc32q (%rdx), %rdi # sched: [8:1.00]
 ; SKYLAKE-SSE-NEXT:    movq %rdi, %rax # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT:    crc32q %rsi, %rax # sched: [3:1.00]
+; SKYLAKE-SSE-NEXT:    crc32q (%rdx), %rax # sched: [8:1.00]
 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: crc32_64_64:
 ; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    crc32q %rsi, %rdi # sched: [3:1.00]
-; SKYLAKE-NEXT:    crc32q (%rdx), %rdi # sched: [8:1.00]
 ; SKYLAKE-NEXT:    movq %rdi, %rax # sched: [1:0.25]
+; SKYLAKE-NEXT:    crc32q %rsi, %rax # sched: [3:1.00]
+; SKYLAKE-NEXT:    crc32q (%rdx), %rax # sched: [8:1.00]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-SSE-LABEL: crc32_64_64:
 ; SKX-SSE:       # %bb.0:
-; SKX-SSE-NEXT:    crc32q %rsi, %rdi # sched: [3:1.00]
-; SKX-SSE-NEXT:    crc32q (%rdx), %rdi # sched: [8:1.00]
 ; SKX-SSE-NEXT:    movq %rdi, %rax # sched: [1:0.25]
+; SKX-SSE-NEXT:    crc32q %rsi, %rax # sched: [3:1.00]
+; SKX-SSE-NEXT:    crc32q (%rdx), %rax # sched: [8:1.00]
 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-LABEL: crc32_64_64:
 ; SKX:       # %bb.0:
-; SKX-NEXT:    crc32q %rsi, %rdi # sched: [3:1.00]
-; SKX-NEXT:    crc32q (%rdx), %rdi # sched: [8:1.00]
 ; SKX-NEXT:    movq %rdi, %rax # sched: [1:0.25]
+; SKX-NEXT:    crc32q %rsi, %rax # sched: [3:1.00]
+; SKX-NEXT:    crc32q (%rdx), %rax # sched: [8:1.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; BTVER2-SSE-LABEL: crc32_64_64:
 ; BTVER2-SSE:       # %bb.0:
-; BTVER2-SSE-NEXT:    crc32q %rsi, %rdi # sched: [3:2.00]
-; BTVER2-SSE-NEXT:    crc32q (%rdx), %rdi # sched: [6:2.00]
 ; BTVER2-SSE-NEXT:    movq %rdi, %rax # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    crc32q %rsi, %rax # sched: [3:2.00]
+; BTVER2-SSE-NEXT:    crc32q (%rdx), %rax # sched: [6:2.00]
 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
 ;
 ; BTVER2-LABEL: crc32_64_64:
 ; BTVER2:       # %bb.0:
-; BTVER2-NEXT:    crc32q %rsi, %rdi # sched: [3:2.00]
-; BTVER2-NEXT:    crc32q (%rdx), %rdi # sched: [6:2.00]
 ; BTVER2-NEXT:    movq %rdi, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    crc32q %rsi, %rax # sched: [3:2.00]
+; BTVER2-NEXT:    crc32q (%rdx), %rax # sched: [6:2.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-SSE-LABEL: crc32_64_64:
 ; ZNVER1-SSE:       # %bb.0:
-; ZNVER1-SSE-NEXT:    crc32q %rsi, %rdi # sched: [3:1.00]
-; ZNVER1-SSE-NEXT:    crc32q (%rdx), %rdi # sched: [10:1.00]
 ; ZNVER1-SSE-NEXT:    movq %rdi, %rax # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    crc32q %rsi, %rax # sched: [3:1.00]
+; ZNVER1-SSE-NEXT:    crc32q (%rdx), %rax # sched: [10:1.00]
 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
 ;
 ; ZNVER1-LABEL: crc32_64_64:
 ; ZNVER1:       # %bb.0:
-; ZNVER1-NEXT:    crc32q %rsi, %rdi # sched: [3:1.00]
-; ZNVER1-NEXT:    crc32q (%rdx), %rdi # sched: [10:1.00]
 ; ZNVER1-NEXT:    movq %rdi, %rax # sched: [1:0.25]
+; ZNVER1-NEXT:    crc32q %rsi, %rax # sched: [3:1.00]
+; ZNVER1-NEXT:    crc32q (%rdx), %rax # sched: [10:1.00]
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   %1 = call i64 @llvm.x86.sse42.crc32.64.64(i64 %a0, i64 %a1)
   %2 = load i64, i64 *%a2

Modified: llvm/trunk/test/CodeGen/X86/sttni.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sttni.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sttni.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sttni.ll Wed Sep 19 11:59:08 2018
@@ -20,8 +20,8 @@ define i1 @pcmpestri_reg_eq_i8(<16 x i8>
 ;
 ; X64-LABEL: pcmpestri_reg_eq_i8:
 ; X64:       # %bb.0: # %entry
-; X64-NEXT:    movl %edi, %eax
 ; X64-NEXT:    movl %esi, %edx
+; X64-NEXT:    movl %edi, %eax
 ; X64-NEXT:    pcmpestri $24, %xmm1, %xmm0
 ; X64-NEXT:    setae %al
 ; X64-NEXT:    retq
@@ -42,8 +42,8 @@ define i32 @pcmpestri_reg_idx_i8(<16 x i
 ;
 ; X64-LABEL: pcmpestri_reg_idx_i8:
 ; X64:       # %bb.0: # %entry
-; X64-NEXT:    movl %edi, %eax
 ; X64-NEXT:    movl %esi, %edx
+; X64-NEXT:    movl %edi, %eax
 ; X64-NEXT:    pcmpestri $24, %xmm1, %xmm0
 ; X64-NEXT:    movl %ecx, %eax
 ; X64-NEXT:    retq
@@ -81,8 +81,8 @@ define i32 @pcmpestri_reg_diff_i8(<16 x
 ;
 ; X64-LABEL: pcmpestri_reg_diff_i8:
 ; X64:       # %bb.0: # %entry
-; X64-NEXT:    movl %edi, %eax
 ; X64-NEXT:    movl %esi, %edx
+; X64-NEXT:    movl %edi, %eax
 ; X64-NEXT:    pcmpestri $24, %xmm1, %xmm0
 ; X64-NEXT:    # kill: def $ecx killed $ecx def $rcx
 ; X64-NEXT:    cmpl $16, %ecx
@@ -133,8 +133,8 @@ define i1 @pcmpestri_mem_eq_i8(i8* %lhs_
 ; X64-LABEL: pcmpestri_mem_eq_i8:
 ; X64:       # %bb.0: # %entry
 ; X64-NEXT:    movq %rdx, %r8
-; X64-NEXT:    movdqu (%rdi), %xmm0
 ; X64-NEXT:    movl %esi, %eax
+; X64-NEXT:    movdqu (%rdi), %xmm0
 ; X64-NEXT:    movl %ecx, %edx
 ; X64-NEXT:    pcmpestri $24, (%r8), %xmm0
 ; X64-NEXT:    setae %al
@@ -166,8 +166,8 @@ define i32 @pcmpestri_mem_idx_i8(i8* %lh
 ; X64-LABEL: pcmpestri_mem_idx_i8:
 ; X64:       # %bb.0: # %entry
 ; X64-NEXT:    movq %rdx, %r8
-; X64-NEXT:    movdqu (%rdi), %xmm0
 ; X64-NEXT:    movl %esi, %eax
+; X64-NEXT:    movdqu (%rdi), %xmm0
 ; X64-NEXT:    movl %ecx, %edx
 ; X64-NEXT:    pcmpestri $24, (%r8), %xmm0
 ; X64-NEXT:    movl %ecx, %eax
@@ -216,9 +216,9 @@ define i32 @pcmpestri_mem_diff_i8(i8* %l
 ;
 ; X64-LABEL: pcmpestri_mem_diff_i8:
 ; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %eax
 ; X64-NEXT:    movdqu (%rdi), %xmm1
 ; X64-NEXT:    movdqu (%rdx), %xmm0
-; X64-NEXT:    movl %esi, %eax
 ; X64-NEXT:    movl %ecx, %edx
 ; X64-NEXT:    pcmpestri $24, %xmm0, %xmm1
 ; X64-NEXT:    # kill: def $ecx killed $ecx def $rcx
@@ -268,8 +268,8 @@ define i1 @pcmpestri_reg_eq_i16(<8 x i16
 ;
 ; X64-LABEL: pcmpestri_reg_eq_i16:
 ; X64:       # %bb.0: # %entry
-; X64-NEXT:    movl %edi, %eax
 ; X64-NEXT:    movl %esi, %edx
+; X64-NEXT:    movl %edi, %eax
 ; X64-NEXT:    pcmpestri $24, %xmm1, %xmm0
 ; X64-NEXT:    setae %al
 ; X64-NEXT:    retq
@@ -292,8 +292,8 @@ define i32 @pcmpestri_reg_idx_i16(<8 x i
 ;
 ; X64-LABEL: pcmpestri_reg_idx_i16:
 ; X64:       # %bb.0: # %entry
-; X64-NEXT:    movl %edi, %eax
 ; X64-NEXT:    movl %esi, %edx
+; X64-NEXT:    movl %edi, %eax
 ; X64-NEXT:    pcmpestri $24, %xmm1, %xmm0
 ; X64-NEXT:    movl %ecx, %eax
 ; X64-NEXT:    retq
@@ -334,8 +334,8 @@ define i32 @pcmpestri_reg_diff_i16(<8 x
 ;
 ; X64-LABEL: pcmpestri_reg_diff_i16:
 ; X64:       # %bb.0: # %entry
-; X64-NEXT:    movl %edi, %eax
 ; X64-NEXT:    movl %esi, %edx
+; X64-NEXT:    movl %edi, %eax
 ; X64-NEXT:    pcmpestri $24, %xmm1, %xmm0
 ; X64-NEXT:    # kill: def $ecx killed $ecx def $rcx
 ; X64-NEXT:    cmpl $16, %ecx
@@ -388,8 +388,8 @@ define i1 @pcmpestri_mem_eq_i16(i16* %lh
 ; X64-LABEL: pcmpestri_mem_eq_i16:
 ; X64:       # %bb.0: # %entry
 ; X64-NEXT:    movq %rdx, %r8
-; X64-NEXT:    movdqu (%rdi), %xmm0
 ; X64-NEXT:    movl %esi, %eax
+; X64-NEXT:    movdqu (%rdi), %xmm0
 ; X64-NEXT:    movl %ecx, %edx
 ; X64-NEXT:    pcmpestri $25, (%r8), %xmm0
 ; X64-NEXT:    setae %al
@@ -423,8 +423,8 @@ define i32 @pcmpestri_mem_idx_i16(i16* %
 ; X64-LABEL: pcmpestri_mem_idx_i16:
 ; X64:       # %bb.0: # %entry
 ; X64-NEXT:    movq %rdx, %r8
-; X64-NEXT:    movdqu (%rdi), %xmm0
 ; X64-NEXT:    movl %esi, %eax
+; X64-NEXT:    movdqu (%rdi), %xmm0
 ; X64-NEXT:    movl %ecx, %edx
 ; X64-NEXT:    pcmpestri $25, (%r8), %xmm0
 ; X64-NEXT:    movl %ecx, %eax
@@ -476,9 +476,9 @@ define i32 @pcmpestri_mem_diff_i16(i16*
 ;
 ; X64-LABEL: pcmpestri_mem_diff_i16:
 ; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %esi, %eax
 ; X64-NEXT:    movdqu (%rdi), %xmm1
 ; X64-NEXT:    movdqu (%rdx), %xmm0
-; X64-NEXT:    movl %esi, %eax
 ; X64-NEXT:    movl %ecx, %edx
 ; X64-NEXT:    pcmpestri $25, %xmm0, %xmm1
 ; X64-NEXT:    # kill: def $ecx killed $ecx def $rcx
@@ -989,13 +989,13 @@ define void @pcmpestr_index_flag(<16 x i
 ; X64:       # %bb.0: # %entry
 ; X64-NEXT:    movq %rcx, %r8
 ; X64-NEXT:    movq %rdx, %r9
-; X64-NEXT:    xorl %r10d, %r10d
-; X64-NEXT:    movl %edi, %eax
 ; X64-NEXT:    movl %esi, %edx
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    xorl %esi, %esi
 ; X64-NEXT:    pcmpestri $24, %xmm1, %xmm0
-; X64-NEXT:    setb %r10b
+; X64-NEXT:    setb %sil
 ; X64-NEXT:    movl %ecx, (%r9)
-; X64-NEXT:    movl %r10d, (%r8)
+; X64-NEXT:    movl %esi, (%r8)
 ; X64-NEXT:    retq
 entry:
   %flag = call i32 @llvm.x86.sse42.pcmpestric128(<16 x i8> %lhs, i32 %lhs_len, <16 x i8> %rhs, i32 %rhs_len, i8 24)
@@ -1026,13 +1026,13 @@ define void @pcmpestr_mask_flag(<16 x i8
 ; X64-LABEL: pcmpestr_mask_flag:
 ; X64:       # %bb.0: # %entry
 ; X64-NEXT:    movq %rdx, %r8
-; X64-NEXT:    xorl %r9d, %r9d
-; X64-NEXT:    movl %edi, %eax
 ; X64-NEXT:    movl %esi, %edx
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    xorl %esi, %esi
 ; X64-NEXT:    pcmpestrm $24, %xmm1, %xmm0
-; X64-NEXT:    setb %r9b
+; X64-NEXT:    setb %sil
 ; X64-NEXT:    movdqa %xmm0, (%r8)
-; X64-NEXT:    movl %r9d, (%rcx)
+; X64-NEXT:    movl %esi, (%rcx)
 ; X64-NEXT:    retq
 entry:
   %flag = call i32 @llvm.x86.sse42.pcmpestric128(<16 x i8> %lhs, i32 %lhs_len, <16 x i8> %rhs, i32 %rhs_len, i8 24)
@@ -1064,9 +1064,9 @@ define void @pcmpestr_mask_index(<16 x i
 ; X64:       # %bb.0: # %entry
 ; X64-NEXT:    movq %rcx, %r8
 ; X64-NEXT:    movq %rdx, %r9
-; X64-NEXT:    movdqa %xmm0, %xmm2
-; X64-NEXT:    movl %edi, %eax
 ; X64-NEXT:    movl %esi, %edx
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    movdqa %xmm0, %xmm2
 ; X64-NEXT:    pcmpestrm $24, %xmm1, %xmm0
 ; X64-NEXT:    pcmpestri $24, %xmm1, %xmm2
 ; X64-NEXT:    movdqa %xmm0, (%r9)
@@ -1110,9 +1110,9 @@ define void @pcmpestr_mask_index_flag(<1
 ; X64:       # %bb.0: # %entry
 ; X64-NEXT:    movq %rcx, %r9
 ; X64-NEXT:    movq %rdx, %r10
-; X64-NEXT:    movdqa %xmm0, %xmm2
-; X64-NEXT:    movl %edi, %eax
 ; X64-NEXT:    movl %esi, %edx
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    movdqa %xmm0, %xmm2
 ; X64-NEXT:    pcmpestrm $24, %xmm1, %xmm0
 ; X64-NEXT:    xorl %esi, %esi
 ; X64-NEXT:    pcmpestri $24, %xmm1, %xmm2
@@ -1321,9 +1321,9 @@ define i32 @pcmpestri_nontemporal(<16 x
 ;
 ; X64-LABEL: pcmpestri_nontemporal:
 ; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl %edi, %eax
 ; X64-NEXT:    movntdqa (%rsi), %xmm1
 ; X64-NEXT:    xorl %esi, %esi
-; X64-NEXT:    movl %edi, %eax
 ; X64-NEXT:    pcmpestri $24, %xmm1, %xmm0
 ; X64-NEXT:    setb %sil
 ; X64-NEXT:    movl %esi, %eax

Modified: llvm/trunk/test/CodeGen/X86/subcarry.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/subcarry.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/subcarry.ll (original)
+++ llvm/trunk/test/CodeGen/X86/subcarry.ll Wed Sep 19 11:59:08 2018
@@ -4,9 +4,9 @@
 define i128 @sub128(i128 %a, i128 %b) nounwind {
 ; CHECK-LABEL: sub128:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    subq %rdx, %rdi
-; CHECK-NEXT:    sbbq %rcx, %rsi
 ; CHECK-NEXT:    movq %rdi, %rax
+; CHECK-NEXT:    subq %rdx, %rax
+; CHECK-NEXT:    sbbq %rcx, %rsi
 ; CHECK-NEXT:    movq %rsi, %rdx
 ; CHECK-NEXT:    retq
 entry:
@@ -17,6 +17,7 @@ entry:
 define i256 @sub256(i256 %a, i256 %b) nounwind {
 ; CHECK-LABEL: sub256:
 ; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movq %rdi, %rax
 ; CHECK-NEXT:    subq %r9, %rsi
 ; CHECK-NEXT:    sbbq {{[0-9]+}}(%rsp), %rdx
 ; CHECK-NEXT:    sbbq {{[0-9]+}}(%rsp), %rcx
@@ -25,7 +26,6 @@ define i256 @sub256(i256 %a, i256 %b) no
 ; CHECK-NEXT:    movq %rsi, (%rdi)
 ; CHECK-NEXT:    movq %rcx, 16(%rdi)
 ; CHECK-NEXT:    movq %r8, 24(%rdi)
-; CHECK-NEXT:    movq %rdi, %rax
 ; CHECK-NEXT:    retq
 entry:
   %0 = sub i256 %a, %b
@@ -37,19 +37,19 @@ entry:
 define %S @negate(%S* nocapture readonly %this) {
 ; CHECK-LABEL: negate:
 ; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movq %rdi, %rax
 ; CHECK-NEXT:    xorl %r8d, %r8d
-; CHECK-NEXT:    xorl %ecx, %ecx
-; CHECK-NEXT:    subq (%rsi), %rcx
-; CHECK-NEXT:    movl $0, %edx
-; CHECK-NEXT:    sbbq 8(%rsi), %rdx
-; CHECK-NEXT:    movl $0, %eax
-; CHECK-NEXT:    sbbq 16(%rsi), %rax
+; CHECK-NEXT:    xorl %edx, %edx
+; CHECK-NEXT:    subq (%rsi), %rdx
+; CHECK-NEXT:    movl $0, %edi
+; CHECK-NEXT:    sbbq 8(%rsi), %rdi
+; CHECK-NEXT:    movl $0, %ecx
+; CHECK-NEXT:    sbbq 16(%rsi), %rcx
 ; CHECK-NEXT:    sbbq 24(%rsi), %r8
-; CHECK-NEXT:    movq %rcx, (%rdi)
-; CHECK-NEXT:    movq %rdx, 8(%rdi)
-; CHECK-NEXT:    movq %rax, 16(%rdi)
-; CHECK-NEXT:    movq %r8, 24(%rdi)
-; CHECK-NEXT:    movq %rdi, %rax
+; CHECK-NEXT:    movq %rdx, (%rax)
+; CHECK-NEXT:    movq %rdi, 8(%rax)
+; CHECK-NEXT:    movq %rcx, 16(%rax)
+; CHECK-NEXT:    movq %r8, 24(%rax)
 ; CHECK-NEXT:    retq
 entry:
   %0 = getelementptr inbounds %S, %S* %this, i64 0, i32 0, i64 0
@@ -90,29 +90,29 @@ entry:
 define %S @sub(%S* nocapture readonly %this, %S %arg.b) local_unnamed_addr {
 ; CHECK-LABEL: sub:
 ; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movq %rdi, %rax
 ; CHECK-NEXT:    notq %rdx
-; CHECK-NEXT:    xorl %r10d, %r10d
+; CHECK-NEXT:    xorl %edi, %edi
 ; CHECK-NEXT:    addq (%rsi), %rdx
-; CHECK-NEXT:    setb %r10b
+; CHECK-NEXT:    setb %dil
 ; CHECK-NEXT:    addq $1, %rdx
-; CHECK-NEXT:    adcq 8(%rsi), %r10
-; CHECK-NEXT:    setb %al
-; CHECK-NEXT:    movzbl %al, %r11d
+; CHECK-NEXT:    adcq 8(%rsi), %rdi
+; CHECK-NEXT:    setb %r10b
+; CHECK-NEXT:    movzbl %r10b, %r10d
 ; CHECK-NEXT:    notq %rcx
-; CHECK-NEXT:    addq %r10, %rcx
-; CHECK-NEXT:    adcq 16(%rsi), %r11
-; CHECK-NEXT:    setb %al
-; CHECK-NEXT:    movzbl %al, %eax
+; CHECK-NEXT:    addq %rdi, %rcx
+; CHECK-NEXT:    adcq 16(%rsi), %r10
+; CHECK-NEXT:    setb %dil
+; CHECK-NEXT:    movzbl %dil, %edi
 ; CHECK-NEXT:    notq %r8
-; CHECK-NEXT:    addq %r11, %r8
-; CHECK-NEXT:    adcq 24(%rsi), %rax
+; CHECK-NEXT:    addq %r10, %r8
+; CHECK-NEXT:    adcq 24(%rsi), %rdi
 ; CHECK-NEXT:    notq %r9
-; CHECK-NEXT:    addq %rax, %r9
-; CHECK-NEXT:    movq %rdx, (%rdi)
-; CHECK-NEXT:    movq %rcx, 8(%rdi)
-; CHECK-NEXT:    movq %r8, 16(%rdi)
-; CHECK-NEXT:    movq %r9, 24(%rdi)
-; CHECK-NEXT:    movq %rdi, %rax
+; CHECK-NEXT:    addq %rdi, %r9
+; CHECK-NEXT:    movq %rdx, (%rax)
+; CHECK-NEXT:    movq %rcx, 8(%rax)
+; CHECK-NEXT:    movq %r8, 16(%rax)
+; CHECK-NEXT:    movq %r9, 24(%rax)
 ; CHECK-NEXT:    retq
 entry:
   %0 = extractvalue %S %arg.b, 0

Modified: llvm/trunk/test/CodeGen/X86/swift-return.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/swift-return.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/swift-return.ll (original)
+++ llvm/trunk/test/CodeGen/X86/swift-return.ll Wed Sep 19 11:59:08 2018
@@ -457,18 +457,18 @@ define swiftcc { i8, i8, i8, i8 } @gen9(
 ; CHECK-LABEL: gen9:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movl %edi, %eax
-; CHECK-NEXT:    movl %edi, %edx
-; CHECK-NEXT:    movl %edi, %ecx
-; CHECK-NEXT:    movl %edi, %r8d
+; CHECK-NEXT:    movl %eax, %edx
+; CHECK-NEXT:    movl %eax, %ecx
+; CHECK-NEXT:    movl %eax, %r8d
 ; CHECK-NEXT:    retq
 ;
 ; CHECK-O0-LABEL: gen9:
 ; CHECK-O0:       # %bb.0:
 ; CHECK-O0-NEXT:    movb %dil, %al
-; CHECK-O0-NEXT:    movb %al, -{{[0-9]+}}(%rsp) # 1-byte Spill
-; CHECK-O0-NEXT:    movb -{{[0-9]+}}(%rsp), %dl # 1-byte Reload
-; CHECK-O0-NEXT:    movb -{{[0-9]+}}(%rsp), %cl # 1-byte Reload
-; CHECK-O0-NEXT:    movb -{{[0-9]+}}(%rsp), %r8b # 1-byte Reload
+; CHECK-O0-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
+; CHECK-O0-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %dl # 1-byte Reload
+; CHECK-O0-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload
+; CHECK-O0-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %r8b # 1-byte Reload
 ; CHECK-O0-NEXT:    retq
   %v0 = insertvalue { i8, i8, i8, i8 } undef, i8 %key, 0
   %v1 = insertvalue { i8, i8, i8, i8 } %v0, i8 %key, 1
@@ -479,10 +479,10 @@ define swiftcc { i8, i8, i8, i8 } @gen9(
 define swiftcc { double, double, double, double, i64, i64, i64, i64 } @gen10(double %keyd, i64 %keyi) {
 ; CHECK-LABEL: gen10:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    movq %rdi, %rax
 ; CHECK-NEXT:    movaps %xmm0, %xmm1
 ; CHECK-NEXT:    movaps %xmm0, %xmm2
 ; CHECK-NEXT:    movaps %xmm0, %xmm3
-; CHECK-NEXT:    movq %rdi, %rax
 ; CHECK-NEXT:    movq %rdi, %rdx
 ; CHECK-NEXT:    movq %rdi, %rcx
 ; CHECK-NEXT:    movq %rdi, %r8
@@ -490,12 +490,12 @@ define swiftcc { double, double, double,
 ;
 ; CHECK-O0-LABEL: gen10:
 ; CHECK-O0:       # %bb.0:
-; CHECK-O0-NEXT:    movsd %xmm0, -{{[0-9]+}}(%rsp) # 8-byte Spill
-; CHECK-O0-NEXT:    movsd -{{[0-9]+}}(%rsp), %xmm1 # 8-byte Reload
+; CHECK-O0-NEXT:    movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-O0-NEXT:    movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
 ; CHECK-O0-NEXT:    # xmm1 = mem[0],zero
-; CHECK-O0-NEXT:    movsd -{{[0-9]+}}(%rsp), %xmm2 # 8-byte Reload
+; CHECK-O0-NEXT:    movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 8-byte Reload
 ; CHECK-O0-NEXT:    # xmm2 = mem[0],zero
-; CHECK-O0-NEXT:    movsd -{{[0-9]+}}(%rsp), %xmm3 # 8-byte Reload
+; CHECK-O0-NEXT:    movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm3 # 8-byte Reload
 ; CHECK-O0-NEXT:    # xmm3 = mem[0],zero
 ; CHECK-O0-NEXT:    movq %rdi, %rax
 ; CHECK-O0-NEXT:    movq %rdi, %rdx

Modified: llvm/trunk/test/CodeGen/X86/swifterror.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/swifterror.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/swifterror.ll (original)
+++ llvm/trunk/test/CodeGen/X86/swifterror.ll Wed Sep 19 11:59:08 2018
@@ -1,6 +1,6 @@
-; RUN: llc -verify-machineinstrs < %s -mtriple=x86_64-apple-darwin | FileCheck --check-prefix=CHECK-APPLE %s
-; RUN: llc -verify-machineinstrs -O0 < %s -mtriple=x86_64-apple-darwin | FileCheck --check-prefix=CHECK-O0 %s
-; RUN: llc -verify-machineinstrs < %s -mtriple=i386-apple-darwin | FileCheck --check-prefix=CHECK-i386 %s
+; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-apple-darwin | FileCheck --check-prefix=CHECK-APPLE %s
+; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-apple-darwin -O0 | FileCheck --check-prefix=CHECK-O0 %s
+; RUN: llc < %s -verify-machineinstrs -mtriple=i386-apple-darwin | FileCheck --check-prefix=CHECK-i386 %s
 
 declare i8* @malloc(i64)
 declare void @free(i8*)
@@ -37,8 +37,7 @@ define float @caller(i8* %error_ref) {
 ; CHECK-APPLE: testq %r12, %r12
 ; CHECK-APPLE: jne
 ; Access part of the error object and save it to error_ref
-; CHECK-APPLE: movb 8(%r12)
-; CHECK-APPLE: movq %r12, %rdi
+; CHECK-APPLE: movb 8(%rdi)
 ; CHECK-APPLE: callq {{.*}}free
 
 ; CHECK-O0-LABEL: caller:
@@ -250,9 +249,8 @@ define float @caller3(i8* %error_ref) {
 ; CHECK-APPLE: testq %r12, %r12
 ; CHECK-APPLE: jne
 ; Access part of the error object and save it to error_ref
-; CHECK-APPLE: movb 8(%r12),
+; CHECK-APPLE: movb 8(%rdi),
 ; CHECK-APPLE: movb %{{.*}},
-; CHECK-APPLE: movq %r12, %rdi
 ; CHECK-APPLE: callq {{.*}}free
 
 ; CHECK-O0-LABEL: caller3:
@@ -300,8 +298,7 @@ define float @caller_with_multiple_swift
 ; CHECK-APPLE: testq %r12, %r12
 ; CHECK-APPLE: jne
 ; Access part of the error object and save it to error_ref
-; CHECK-APPLE: movb 8(%r12)
-; CHECK-APPLE: movq %r12, %rdi
+; CHECK-APPLE: movb 8(%rdi)
 ; CHECK-APPLE: callq {{.*}}free
 
 ; The second swifterror value:
@@ -310,8 +307,7 @@ define float @caller_with_multiple_swift
 ; CHECK-APPLE: testq %r12, %r12
 ; CHECK-APPLE: jne
 ; Access part of the error object and save it to error_ref
-; CHECK-APPLE: movb 8(%r12)
-; CHECK-APPLE: movq %r12, %rdi
+; CHECK-APPLE: movb 8(%rdi)
 ; CHECK-APPLE: callq {{.*}}free
 
 ; CHECK-O0-LABEL: caller_with_multiple_swifterror_values:
@@ -488,8 +484,8 @@ entry:
 ; CHECK-i386:  retl
 ; CHECK-APPLE-LABEL: empty_swiftcc:
 ; CHECK-APPLE:  movl    %edx, %ecx
-; CHECK-APPLE:  movl    %edi, %eax
-; CHECK-APPLE:  movl    %esi, %edx
+; CHECK-APPLE-DAG:  movl    %edi, %eax
+; CHECK-APPLE-DAG:  movl    %esi, %edx
 ; CHECK-APPLE:  retq
 define swiftcc {i32, i32, i32} @empty_swiftcc({i32, i32, i32} , %swift_error** swifterror %error_ptr_ref) {
 entry:

Modified: llvm/trunk/test/CodeGen/X86/system-intrinsics-xsetbv.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/system-intrinsics-xsetbv.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/system-intrinsics-xsetbv.ll (original)
+++ llvm/trunk/test/CodeGen/X86/system-intrinsics-xsetbv.ll Wed Sep 19 11:59:08 2018
@@ -11,8 +11,8 @@ define void @test_xsetbv(i32 %in, i32 %h
 
 ; CHECK64-LABEL: test_xsetbv
 ; CHECK64: movl  %edx, %eax
-; CHECK64: movl  %edi, %ecx
-; CHECK64: movl  %esi, %edx
+; CHECK64-DAG: movl  %edi, %ecx
+; CHECK64-DAG: movl  %esi, %edx
 ; CHECK64: xsetbv
 ; CHECK64: ret
 

Modified: llvm/trunk/test/CodeGen/X86/tbm-intrinsics-fast-isel-x86_64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/tbm-intrinsics-fast-isel-x86_64.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/tbm-intrinsics-fast-isel-x86_64.ll (original)
+++ llvm/trunk/test/CodeGen/X86/tbm-intrinsics-fast-isel-x86_64.ll Wed Sep 19 11:59:08 2018
@@ -40,10 +40,10 @@ define i64 @test__blcic_u64(i64 %a0) {
 ; X64-LABEL: test__blcic_u64:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movq %rdi, %rax
-; X64-NEXT:    xorq $-1, %rax
-; X64-NEXT:    addq $1, %rdi
-; X64-NEXT:    andq %rax, %rdi
-; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    movq %rdi, %rcx
+; X64-NEXT:    xorq $-1, %rcx
+; X64-NEXT:    addq $1, %rax
+; X64-NEXT:    andq %rcx, %rax
 ; X64-NEXT:    retq
   %1 = xor i64 %a0, -1
   %2 = add i64 %a0, 1
@@ -89,10 +89,10 @@ define i64 @test__blsic_u64(i64 %a0) {
 ; X64-LABEL: test__blsic_u64:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movq %rdi, %rax
-; X64-NEXT:    xorq $-1, %rax
-; X64-NEXT:    subq $1, %rdi
-; X64-NEXT:    orq %rax, %rdi
-; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    movq %rdi, %rcx
+; X64-NEXT:    xorq $-1, %rcx
+; X64-NEXT:    subq $1, %rax
+; X64-NEXT:    orq %rcx, %rax
 ; X64-NEXT:    retq
   %1 = xor i64 %a0, -1
   %2 = sub i64 %a0, 1
@@ -104,10 +104,10 @@ define i64 @test__t1mskc_u64(i64 %a0) {
 ; X64-LABEL: test__t1mskc_u64:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movq %rdi, %rax
-; X64-NEXT:    xorq $-1, %rax
-; X64-NEXT:    addq $1, %rdi
-; X64-NEXT:    orq %rax, %rdi
-; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    movq %rdi, %rcx
+; X64-NEXT:    xorq $-1, %rcx
+; X64-NEXT:    addq $1, %rax
+; X64-NEXT:    orq %rcx, %rax
 ; X64-NEXT:    retq
   %1 = xor i64 %a0, -1
   %2 = add i64 %a0, 1
@@ -119,10 +119,10 @@ define i64 @test__tzmsk_u64(i64 %a0) {
 ; X64-LABEL: test__tzmsk_u64:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movq %rdi, %rax
-; X64-NEXT:    xorq $-1, %rax
-; X64-NEXT:    subq $1, %rdi
-; X64-NEXT:    andq %rax, %rdi
-; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    movq %rdi, %rcx
+; X64-NEXT:    xorq $-1, %rcx
+; X64-NEXT:    subq $1, %rax
+; X64-NEXT:    andq %rcx, %rax
 ; X64-NEXT:    retq
   %1 = xor i64 %a0, -1
   %2 = sub i64 %a0, 1

Modified: llvm/trunk/test/CodeGen/X86/tbm-intrinsics-fast-isel.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/tbm-intrinsics-fast-isel.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/tbm-intrinsics-fast-isel.ll (original)
+++ llvm/trunk/test/CodeGen/X86/tbm-intrinsics-fast-isel.ll Wed Sep 19 11:59:08 2018
@@ -72,10 +72,10 @@ define i32 @test__blcic_u32(i32 %a0) {
 ; X64-LABEL: test__blcic_u32:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    xorl $-1, %eax
-; X64-NEXT:    addl $1, %edi
-; X64-NEXT:    andl %eax, %edi
-; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    movl %edi, %ecx
+; X64-NEXT:    xorl $-1, %ecx
+; X64-NEXT:    addl $1, %eax
+; X64-NEXT:    andl %ecx, %eax
 ; X64-NEXT:    retq
   %1 = xor i32 %a0, -1
   %2 = add i32 %a0, 1
@@ -154,10 +154,10 @@ define i32 @test__blsic_u32(i32 %a0) {
 ; X64-LABEL: test__blsic_u32:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    xorl $-1, %eax
-; X64-NEXT:    subl $1, %edi
-; X64-NEXT:    orl %eax, %edi
-; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    movl %edi, %ecx
+; X64-NEXT:    xorl $-1, %ecx
+; X64-NEXT:    subl $1, %eax
+; X64-NEXT:    orl %ecx, %eax
 ; X64-NEXT:    retq
   %1 = xor i32 %a0, -1
   %2 = sub i32 %a0, 1
@@ -178,10 +178,10 @@ define i32 @test__t1mskc_u32(i32 %a0) {
 ; X64-LABEL: test__t1mskc_u32:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    xorl $-1, %eax
-; X64-NEXT:    addl $1, %edi
-; X64-NEXT:    orl %eax, %edi
-; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    movl %edi, %ecx
+; X64-NEXT:    xorl $-1, %ecx
+; X64-NEXT:    addl $1, %eax
+; X64-NEXT:    orl %ecx, %eax
 ; X64-NEXT:    retq
   %1 = xor i32 %a0, -1
   %2 = add i32 %a0, 1
@@ -202,10 +202,10 @@ define i32 @test__tzmsk_u32(i32 %a0) {
 ; X64-LABEL: test__tzmsk_u32:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    xorl $-1, %eax
-; X64-NEXT:    subl $1, %edi
-; X64-NEXT:    andl %eax, %edi
-; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    movl %edi, %ecx
+; X64-NEXT:    xorl $-1, %ecx
+; X64-NEXT:    subl $1, %eax
+; X64-NEXT:    andl %ecx, %eax
 ; X64-NEXT:    retq
   %1 = xor i32 %a0, -1
   %2 = sub i32 %a0, 1

Modified: llvm/trunk/test/CodeGen/X86/tbm_patterns.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/tbm_patterns.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/tbm_patterns.ll (original)
+++ llvm/trunk/test/CodeGen/X86/tbm_patterns.ll Wed Sep 19 11:59:08 2018
@@ -52,10 +52,10 @@ define i32 @test_x86_tbm_bextri_u32_z(i3
 define i32 @test_x86_tbm_bextri_u32_z2(i32 %a, i32 %b, i32 %c) nounwind {
 ; CHECK-LABEL: test_x86_tbm_bextri_u32_z2:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    movl %esi, %eax
 ; CHECK-NEXT:    shrl $4, %edi
 ; CHECK-NEXT:    testl $4095, %edi # imm = 0xFFF
-; CHECK-NEXT:    cmovnel %edx, %esi
-; CHECK-NEXT:    movl %esi, %eax
+; CHECK-NEXT:    cmovnel %edx, %eax
 ; CHECK-NEXT:    retq
   %t0 = lshr i32 %a, 4
   %t1 = and i32 %t0, 4095
@@ -113,10 +113,10 @@ define i64 @test_x86_tbm_bextri_u64_z(i6
 define i64 @test_x86_tbm_bextri_u64_z2(i64 %a, i64 %b, i64 %c) nounwind {
 ; CHECK-LABEL: test_x86_tbm_bextri_u64_z2:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    movq %rsi, %rax
 ; CHECK-NEXT:    shrl $4, %edi
 ; CHECK-NEXT:    testl $4095, %edi # imm = 0xFFF
-; CHECK-NEXT:    cmovneq %rdx, %rsi
-; CHECK-NEXT:    movq %rsi, %rax
+; CHECK-NEXT:    cmovneq %rdx, %rax
 ; CHECK-NEXT:    retq
   %t0 = lshr i64 %a, 4
   %t1 = and i64 %t0, 4095
@@ -151,11 +151,11 @@ define i32 @test_x86_tbm_blcfill_u32_z(i
 define i32 @test_x86_tbm_blcfill_u32_z2(i32 %a, i32 %b, i32 %c) nounwind {
 ; CHECK-LABEL: test_x86_tbm_blcfill_u32_z2:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    # kill: def $edi killed $edi def $rdi
-; CHECK-NEXT:    leal 1(%rdi), %eax
-; CHECK-NEXT:    testl %edi, %eax
-; CHECK-NEXT:    cmovnel %edx, %esi
 ; CHECK-NEXT:    movl %esi, %eax
+; CHECK-NEXT:    # kill: def $edi killed $edi def $rdi
+; CHECK-NEXT:    leal 1(%rdi), %ecx
+; CHECK-NEXT:    testl %edi, %ecx
+; CHECK-NEXT:    cmovnel %edx, %eax
 ; CHECK-NEXT:    retq
   %t0 = add i32 %a, 1
   %t1 = and i32 %t0, %a
@@ -190,10 +190,10 @@ define i64 @test_x86_tbm_blcfill_u64_z(i
 define i64 @test_x86_tbm_blcfill_u64_z2(i64 %a, i64 %b, i64 %c) nounwind {
 ; CHECK-LABEL: test_x86_tbm_blcfill_u64_z2:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    leaq 1(%rdi), %rax
-; CHECK-NEXT:    testq %rdi, %rax
-; CHECK-NEXT:    cmovneq %rdx, %rsi
 ; CHECK-NEXT:    movq %rsi, %rax
+; CHECK-NEXT:    leaq 1(%rdi), %rcx
+; CHECK-NEXT:    testq %rdi, %rcx
+; CHECK-NEXT:    cmovneq %rdx, %rax
 ; CHECK-NEXT:    retq
   %t0 = add i64 %a, 1
   %t1 = and i64 %t0, %a
@@ -230,12 +230,12 @@ define i32 @test_x86_tbm_blci_u32_z(i32
 define i32 @test_x86_tbm_blci_u32_z2(i32 %a, i32 %b, i32 %c) nounwind {
 ; CHECK-LABEL: test_x86_tbm_blci_u32_z2:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    # kill: def $edi killed $edi def $rdi
-; CHECK-NEXT:    leal 1(%rdi), %eax
-; CHECK-NEXT:    notl %eax
-; CHECK-NEXT:    orl %edi, %eax
-; CHECK-NEXT:    cmovnel %edx, %esi
 ; CHECK-NEXT:    movl %esi, %eax
+; CHECK-NEXT:    # kill: def $edi killed $edi def $rdi
+; CHECK-NEXT:    leal 1(%rdi), %ecx
+; CHECK-NEXT:    notl %ecx
+; CHECK-NEXT:    orl %edi, %ecx
+; CHECK-NEXT:    cmovnel %edx, %eax
 ; CHECK-NEXT:    retq
   %t0 = add i32 1, %a
   %t1 = xor i32 %t0, -1
@@ -273,11 +273,11 @@ define i64 @test_x86_tbm_blci_u64_z(i64
 define i64 @test_x86_tbm_blci_u64_z2(i64 %a, i64 %b, i64 %c) nounwind {
 ; CHECK-LABEL: test_x86_tbm_blci_u64_z2:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    leaq 1(%rdi), %rax
-; CHECK-NEXT:    notq %rax
-; CHECK-NEXT:    orq %rdi, %rax
-; CHECK-NEXT:    cmovneq %rdx, %rsi
 ; CHECK-NEXT:    movq %rsi, %rax
+; CHECK-NEXT:    leaq 1(%rdi), %rcx
+; CHECK-NEXT:    notq %rcx
+; CHECK-NEXT:    orq %rdi, %rcx
+; CHECK-NEXT:    cmovneq %rdx, %rax
 ; CHECK-NEXT:    retq
   %t0 = add i64 1, %a
   %t1 = xor i64 %t0, -1
@@ -335,12 +335,12 @@ define i32 @test_x86_tbm_blcic_u32_z(i32
 define i32 @test_x86_tbm_blcic_u32_z2(i32 %a, i32 %b, i32 %c) nounwind {
 ; CHECK-LABEL: test_x86_tbm_blcic_u32_z2:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movl %edi, %eax
-; CHECK-NEXT:    notl %eax
-; CHECK-NEXT:    incl %edi
-; CHECK-NEXT:    testl %eax, %edi
-; CHECK-NEXT:    cmovnel %edx, %esi
 ; CHECK-NEXT:    movl %esi, %eax
+; CHECK-NEXT:    movl %edi, %ecx
+; CHECK-NEXT:    notl %ecx
+; CHECK-NEXT:    incl %edi
+; CHECK-NEXT:    testl %ecx, %edi
+; CHECK-NEXT:    cmovnel %edx, %eax
 ; CHECK-NEXT:    retq
   %t0 = xor i32 %a, -1
   %t1 = add i32 %a, 1
@@ -378,12 +378,12 @@ define i64 @test_x86_tbm_blcic_u64_z(i64
 define i64 @test_x86_tbm_blcic_u64_z2(i64 %a, i64 %b, i64 %c) nounwind {
 ; CHECK-LABEL: test_x86_tbm_blcic_u64_z2:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movq %rdi, %rax
-; CHECK-NEXT:    notq %rax
-; CHECK-NEXT:    incq %rdi
-; CHECK-NEXT:    testq %rax, %rdi
-; CHECK-NEXT:    cmovneq %rdx, %rsi
 ; CHECK-NEXT:    movq %rsi, %rax
+; CHECK-NEXT:    movq %rdi, %rcx
+; CHECK-NEXT:    notq %rcx
+; CHECK-NEXT:    incq %rdi
+; CHECK-NEXT:    testq %rcx, %rdi
+; CHECK-NEXT:    cmovneq %rdx, %rax
 ; CHECK-NEXT:    retq
   %t0 = xor i64 %a, -1
   %t1 = add i64 %a, 1
@@ -419,11 +419,11 @@ define i32 @test_x86_tbm_blcmsk_u32_z(i3
 define i32 @test_x86_tbm_blcmsk_u32_z2(i32 %a, i32 %b, i32 %c) nounwind {
 ; CHECK-LABEL: test_x86_tbm_blcmsk_u32_z2:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    # kill: def $edi killed $edi def $rdi
-; CHECK-NEXT:    leal 1(%rdi), %eax
-; CHECK-NEXT:    xorl %edi, %eax
-; CHECK-NEXT:    cmovnel %edx, %esi
 ; CHECK-NEXT:    movl %esi, %eax
+; CHECK-NEXT:    # kill: def $edi killed $edi def $rdi
+; CHECK-NEXT:    leal 1(%rdi), %ecx
+; CHECK-NEXT:    xorl %edi, %ecx
+; CHECK-NEXT:    cmovnel %edx, %eax
 ; CHECK-NEXT:    retq
   %t0 = add i32 %a, 1
   %t1 = xor i32 %t0, %a
@@ -458,10 +458,10 @@ define i64 @test_x86_tbm_blcmsk_u64_z(i6
 define i64 @test_x86_tbm_blcmsk_u64_z2(i64 %a, i64 %b, i64 %c) nounwind {
 ; CHECK-LABEL: test_x86_tbm_blcmsk_u64_z2:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    leaq 1(%rdi), %rax
-; CHECK-NEXT:    xorq %rdi, %rax
-; CHECK-NEXT:    cmovneq %rdx, %rsi
 ; CHECK-NEXT:    movq %rsi, %rax
+; CHECK-NEXT:    leaq 1(%rdi), %rcx
+; CHECK-NEXT:    xorq %rdi, %rcx
+; CHECK-NEXT:    cmovneq %rdx, %rax
 ; CHECK-NEXT:    retq
   %t0 = add i64 %a, 1
   %t1 = xor i64 %t0, %a
@@ -496,11 +496,11 @@ define i32 @test_x86_tbm_blcs_u32_z(i32
 define i32 @test_x86_tbm_blcs_u32_z2(i32 %a, i32 %b, i32 %c) nounwind {
 ; CHECK-LABEL: test_x86_tbm_blcs_u32_z2:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    # kill: def $edi killed $edi def $rdi
-; CHECK-NEXT:    leal 1(%rdi), %eax
-; CHECK-NEXT:    orl %edi, %eax
-; CHECK-NEXT:    cmovnel %edx, %esi
 ; CHECK-NEXT:    movl %esi, %eax
+; CHECK-NEXT:    # kill: def $edi killed $edi def $rdi
+; CHECK-NEXT:    leal 1(%rdi), %ecx
+; CHECK-NEXT:    orl %edi, %ecx
+; CHECK-NEXT:    cmovnel %edx, %eax
 ; CHECK-NEXT:    retq
   %t0 = add i32 %a, 1
   %t1 = or i32 %t0, %a
@@ -535,10 +535,10 @@ define i64 @test_x86_tbm_blcs_u64_z(i64
 define i64 @test_x86_tbm_blcs_u64_z2(i64 %a, i64 %b, i64 %c) nounwind {
 ; CHECK-LABEL: test_x86_tbm_blcs_u64_z2:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    leaq 1(%rdi), %rax
-; CHECK-NEXT:    orq %rdi, %rax
-; CHECK-NEXT:    cmovneq %rdx, %rsi
 ; CHECK-NEXT:    movq %rsi, %rax
+; CHECK-NEXT:    leaq 1(%rdi), %rcx
+; CHECK-NEXT:    orq %rdi, %rcx
+; CHECK-NEXT:    cmovneq %rdx, %rax
 ; CHECK-NEXT:    retq
   %t0 = add i64 %a, 1
   %t1 = or i64 %t0, %a
@@ -573,11 +573,11 @@ define i32 @test_x86_tbm_blsfill_u32_z(i
 define i32 @test_x86_tbm_blsfill_u32_z2(i32 %a, i32 %b, i32 %c) nounwind {
 ; CHECK-LABEL: test_x86_tbm_blsfill_u32_z2:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    # kill: def $edi killed $edi def $rdi
-; CHECK-NEXT:    leal -1(%rdi), %eax
-; CHECK-NEXT:    orl %edi, %eax
-; CHECK-NEXT:    cmovnel %edx, %esi
 ; CHECK-NEXT:    movl %esi, %eax
+; CHECK-NEXT:    # kill: def $edi killed $edi def $rdi
+; CHECK-NEXT:    leal -1(%rdi), %ecx
+; CHECK-NEXT:    orl %edi, %ecx
+; CHECK-NEXT:    cmovnel %edx, %eax
 ; CHECK-NEXT:    retq
   %t0 = add i32 %a, -1
   %t1 = or i32 %t0, %a
@@ -612,10 +612,10 @@ define i64 @test_x86_tbm_blsfill_u64_z(i
 define i64 @test_x86_tbm_blsfill_u64_z2(i64 %a, i64 %b, i64 %c) nounwind {
 ; CHECK-LABEL: test_x86_tbm_blsfill_u64_z2:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    leaq -1(%rdi), %rax
-; CHECK-NEXT:    orq %rdi, %rax
-; CHECK-NEXT:    cmovneq %rdx, %rsi
 ; CHECK-NEXT:    movq %rsi, %rax
+; CHECK-NEXT:    leaq -1(%rdi), %rcx
+; CHECK-NEXT:    orq %rdi, %rcx
+; CHECK-NEXT:    cmovneq %rdx, %rax
 ; CHECK-NEXT:    retq
   %t0 = add i64 %a, -1
   %t1 = or i64 %t0, %a
@@ -652,12 +652,12 @@ define i32 @test_x86_tbm_blsic_u32_z(i32
 define i32 @test_x86_tbm_blsic_u32_z2(i32 %a, i32 %b, i32 %c) nounwind {
 ; CHECK-LABEL: test_x86_tbm_blsic_u32_z2:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movl %edi, %eax
-; CHECK-NEXT:    notl %eax
-; CHECK-NEXT:    decl %edi
-; CHECK-NEXT:    orl %eax, %edi
-; CHECK-NEXT:    cmovnel %edx, %esi
 ; CHECK-NEXT:    movl %esi, %eax
+; CHECK-NEXT:    movl %edi, %ecx
+; CHECK-NEXT:    notl %ecx
+; CHECK-NEXT:    decl %edi
+; CHECK-NEXT:    orl %ecx, %edi
+; CHECK-NEXT:    cmovnel %edx, %eax
 ; CHECK-NEXT:    retq
   %t0 = xor i32 %a, -1
   %t1 = add i32 %a, -1
@@ -695,12 +695,12 @@ define i64 @test_x86_tbm_blsic_u64_z(i64
 define i64 @test_x86_tbm_blsic_u64_z2(i64 %a, i64 %b, i64 %c) nounwind {
 ; CHECK-LABEL: test_x86_tbm_blsic_u64_z2:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movq %rdi, %rax
-; CHECK-NEXT:    notq %rax
-; CHECK-NEXT:    decq %rdi
-; CHECK-NEXT:    orq %rax, %rdi
-; CHECK-NEXT:    cmovneq %rdx, %rsi
 ; CHECK-NEXT:    movq %rsi, %rax
+; CHECK-NEXT:    movq %rdi, %rcx
+; CHECK-NEXT:    notq %rcx
+; CHECK-NEXT:    decq %rdi
+; CHECK-NEXT:    orq %rcx, %rdi
+; CHECK-NEXT:    cmovneq %rdx, %rax
 ; CHECK-NEXT:    retq
   %t0 = xor i64 %a, -1
   %t1 = add i64 %a, -1
@@ -739,12 +739,12 @@ define i32 @test_x86_tbm_t1mskc_u32_z(i3
 define i32 @test_x86_tbm_t1mskc_u32_z2(i32 %a, i32 %b, i32 %c) nounwind {
 ; CHECK-LABEL: test_x86_tbm_t1mskc_u32_z2:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movl %edi, %eax
-; CHECK-NEXT:    notl %eax
-; CHECK-NEXT:    incl %edi
-; CHECK-NEXT:    orl %eax, %edi
-; CHECK-NEXT:    cmovnel %edx, %esi
 ; CHECK-NEXT:    movl %esi, %eax
+; CHECK-NEXT:    movl %edi, %ecx
+; CHECK-NEXT:    notl %ecx
+; CHECK-NEXT:    incl %edi
+; CHECK-NEXT:    orl %ecx, %edi
+; CHECK-NEXT:    cmovnel %edx, %eax
 ; CHECK-NEXT:    retq
   %t0 = xor i32 %a, -1
   %t1 = add i32 %a, 1
@@ -783,12 +783,12 @@ define i64 @test_x86_tbm_t1mskc_u64_z(i6
 define i64 @test_x86_tbm_t1mskc_u64_z2(i64 %a, i64 %b, i64 %c) nounwind {
 ; CHECK-LABEL: test_x86_tbm_t1mskc_u64_z2:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movq %rdi, %rax
-; CHECK-NEXT:    notq %rax
-; CHECK-NEXT:    incq %rdi
-; CHECK-NEXT:    orq %rax, %rdi
-; CHECK-NEXT:    cmovneq %rdx, %rsi
 ; CHECK-NEXT:    movq %rsi, %rax
+; CHECK-NEXT:    movq %rdi, %rcx
+; CHECK-NEXT:    notq %rcx
+; CHECK-NEXT:    incq %rdi
+; CHECK-NEXT:    orq %rcx, %rdi
+; CHECK-NEXT:    cmovneq %rdx, %rax
 ; CHECK-NEXT:    retq
   %t0 = xor i64 %a, -1
   %t1 = add i64 %a, 1
@@ -827,12 +827,12 @@ define i32 @test_x86_tbm_tzmsk_u32_z(i32
 define i32 @test_x86_tbm_tzmsk_u32_z2(i32 %a, i32 %b, i32 %c) nounwind {
 ; CHECK-LABEL: test_x86_tbm_tzmsk_u32_z2:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movl %edi, %eax
-; CHECK-NEXT:    notl %eax
-; CHECK-NEXT:    decl %edi
-; CHECK-NEXT:    testl %edi, %eax
-; CHECK-NEXT:    cmovnel %edx, %esi
 ; CHECK-NEXT:    movl %esi, %eax
+; CHECK-NEXT:    movl %edi, %ecx
+; CHECK-NEXT:    notl %ecx
+; CHECK-NEXT:    decl %edi
+; CHECK-NEXT:    testl %edi, %ecx
+; CHECK-NEXT:    cmovnel %edx, %eax
 ; CHECK-NEXT:    retq
   %t0 = xor i32 %a, -1
   %t1 = add i32 %a, -1
@@ -871,12 +871,12 @@ define i64 @test_x86_tbm_tzmsk_u64_z(i64
 define i64 @test_x86_tbm_tzmsk_u64_z2(i64 %a, i64 %b, i64 %c) nounwind {
 ; CHECK-LABEL: test_x86_tbm_tzmsk_u64_z2:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movq %rdi, %rax
-; CHECK-NEXT:    notq %rax
-; CHECK-NEXT:    decq %rdi
-; CHECK-NEXT:    testq %rdi, %rax
-; CHECK-NEXT:    cmovneq %rdx, %rsi
 ; CHECK-NEXT:    movq %rsi, %rax
+; CHECK-NEXT:    movq %rdi, %rcx
+; CHECK-NEXT:    notq %rcx
+; CHECK-NEXT:    decq %rdi
+; CHECK-NEXT:    testq %rdi, %rcx
+; CHECK-NEXT:    cmovneq %rdx, %rax
 ; CHECK-NEXT:    retq
   %t0 = xor i64 %a, -1
   %t1 = add i64 %a, -1

Modified: llvm/trunk/test/CodeGen/X86/trunc-subvector.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/trunc-subvector.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/trunc-subvector.ll (original)
+++ llvm/trunk/test/CodeGen/X86/trunc-subvector.ll Wed Sep 19 11:59:08 2018
@@ -41,9 +41,8 @@ define <2 x i32> @test3(<8 x i32> %v) {
 ; SSE2-LABEL: test3:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    movdqa %xmm1, %xmm0
-; SSE2-NEXT:    psrad $31, %xmm0
-; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; SSE2-NEXT:    movdqa %xmm1, %xmm0
+; SSE2-NEXT:    psrad $31, %xmm1
+; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
 ; SSE2-NEXT:    retq
 ;
 ; AVX2-LABEL: test3:
@@ -165,9 +164,9 @@ define <4 x i32> @test7(<8 x i32> %v) {
 define <2 x i32> @test8(<8 x i32> %v) {
 ; SSE2-LABEL: test8:
 ; SSE2:       # %bb.0:
-; SSE2-NEXT:    xorps %xmm0, %xmm0
-; SSE2-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
 ; SSE2-NEXT:    movaps %xmm1, %xmm0
+; SSE2-NEXT:    xorps %xmm1, %xmm1
+; SSE2-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
 ; SSE2-NEXT:    retq
 ;
 ; AVX2-LABEL: test8:

Modified: llvm/trunk/test/CodeGen/X86/twoaddr-lea.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/twoaddr-lea.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/twoaddr-lea.ll (original)
+++ llvm/trunk/test/CodeGen/X86/twoaddr-lea.ll Wed Sep 19 11:59:08 2018
@@ -11,8 +11,8 @@
 
 define i32 @test1(i32 %X) nounwind {
 ; CHECK-LABEL: test1:
-; CHECK-NOT: mov
-; CHECK: leal 1(%rdi)
+; CHECK: movl %edi, %eax
+; CHECK: leal 1(%rax)
         %Z = add i32 %X, 1
         store volatile i32 %Z, i32* @G
         ret i32 %X

Modified: llvm/trunk/test/CodeGen/X86/umul-with-overflow.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/umul-with-overflow.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/umul-with-overflow.ll (original)
+++ llvm/trunk/test/CodeGen/X86/umul-with-overflow.ll Wed Sep 19 11:59:08 2018
@@ -15,8 +15,8 @@ define zeroext i1 @a(i32 %x)  nounwind {
 ;
 ; X64-LABEL: a:
 ; X64:       # %bb.0:
-; X64-NEXT:    movl $3, %ecx
 ; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    movl $3, %ecx
 ; X64-NEXT:    mull %ecx
 ; X64-NEXT:    seto %al
 ; X64-NEXT:    retq

Modified: llvm/trunk/test/CodeGen/X86/umulo-128-legalisation-lowering.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/umulo-128-legalisation-lowering.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/umulo-128-legalisation-lowering.ll (original)
+++ llvm/trunk/test/CodeGen/X86/umulo-128-legalisation-lowering.ll Wed Sep 19 11:59:08 2018
@@ -6,13 +6,13 @@ define { i128, i8 } @muloti_test(i128 %l
 ; X64-LABEL: muloti_test:
 ; X64:       # %bb.0: # %start
 ; X64-NEXT:    movq %rdx, %r8
+; X64-NEXT:    movq %rsi, %rax
 ; X64-NEXT:    testq %rcx, %rcx
-; X64-NEXT:    setne %al
+; X64-NEXT:    setne %dl
 ; X64-NEXT:    testq %rsi, %rsi
 ; X64-NEXT:    setne %r9b
-; X64-NEXT:    andb %al, %r9b
-; X64-NEXT:    movq %rsi, %rax
-; X64-NEXT:    mulq %rdx
+; X64-NEXT:    andb %dl, %r9b
+; X64-NEXT:    mulq %r8
 ; X64-NEXT:    movq %rax, %rsi
 ; X64-NEXT:    seto %r10b
 ; X64-NEXT:    movq %rcx, %rax

Modified: llvm/trunk/test/CodeGen/X86/unfold-masked-merge-scalar-constmask-innerouter.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/unfold-masked-merge-scalar-constmask-innerouter.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/unfold-masked-merge-scalar-constmask-innerouter.ll (original)
+++ llvm/trunk/test/CodeGen/X86/unfold-masked-merge-scalar-constmask-innerouter.ll Wed Sep 19 11:59:08 2018
@@ -10,18 +10,20 @@
 define i8 @out8_constmask(i8 %x, i8 %y) {
 ; CHECK-NOBMI-LABEL: out8_constmask:
 ; CHECK-NOBMI:       # %bb.0:
-; CHECK-NOBMI-NEXT:    andb $60, %dil
-; CHECK-NOBMI-NEXT:    andb $-61, %sil
-; CHECK-NOBMI-NEXT:    orb %dil, %sil
 ; CHECK-NOBMI-NEXT:    movl %esi, %eax
+; CHECK-NOBMI-NEXT:    andb $60, %dil
+; CHECK-NOBMI-NEXT:    andb $-61, %al
+; CHECK-NOBMI-NEXT:    orb %dil, %al
+; CHECK-NOBMI-NEXT:    # kill: def $al killed $al killed $eax
 ; CHECK-NOBMI-NEXT:    retq
 ;
 ; CHECK-BMI-LABEL: out8_constmask:
 ; CHECK-BMI:       # %bb.0:
-; CHECK-BMI-NEXT:    andb $60, %dil
-; CHECK-BMI-NEXT:    andb $-61, %sil
-; CHECK-BMI-NEXT:    orb %dil, %sil
 ; CHECK-BMI-NEXT:    movl %esi, %eax
+; CHECK-BMI-NEXT:    andb $60, %dil
+; CHECK-BMI-NEXT:    andb $-61, %al
+; CHECK-BMI-NEXT:    orb %dil, %al
+; CHECK-BMI-NEXT:    # kill: def $al killed $al killed $eax
 ; CHECK-BMI-NEXT:    retq
   %mx = and i8 %x, 60
   %my = and i8 %y, -61
@@ -110,18 +112,20 @@ define i64 @out64_constmask(i64 %x, i64
 define i8 @in8_constmask(i8 %x, i8 %y) {
 ; CHECK-NOBMI-LABEL: in8_constmask:
 ; CHECK-NOBMI:       # %bb.0:
+; CHECK-NOBMI-NEXT:    movl %esi, %eax
 ; CHECK-NOBMI-NEXT:    xorl %esi, %edi
 ; CHECK-NOBMI-NEXT:    andb $60, %dil
-; CHECK-NOBMI-NEXT:    xorb %dil, %sil
-; CHECK-NOBMI-NEXT:    movl %esi, %eax
+; CHECK-NOBMI-NEXT:    xorb %dil, %al
+; CHECK-NOBMI-NEXT:    # kill: def $al killed $al killed $eax
 ; CHECK-NOBMI-NEXT:    retq
 ;
 ; CHECK-BMI-LABEL: in8_constmask:
 ; CHECK-BMI:       # %bb.0:
+; CHECK-BMI-NEXT:    movl %esi, %eax
 ; CHECK-BMI-NEXT:    xorl %esi, %edi
 ; CHECK-BMI-NEXT:    andb $60, %dil
-; CHECK-BMI-NEXT:    xorb %dil, %sil
-; CHECK-BMI-NEXT:    movl %esi, %eax
+; CHECK-BMI-NEXT:    xorb %dil, %al
+; CHECK-BMI-NEXT:    # kill: def $al killed $al killed $eax
 ; CHECK-BMI-NEXT:    retq
   %n0 = xor i8 %x, %y
   %n1 = and i8 %n0, 60
@@ -132,18 +136,20 @@ define i8 @in8_constmask(i8 %x, i8 %y) {
 define i16 @in16_constmask(i16 %x, i16 %y) {
 ; CHECK-NOBMI-LABEL: in16_constmask:
 ; CHECK-NOBMI:       # %bb.0:
-; CHECK-NOBMI-NEXT:    xorl %esi, %edi
-; CHECK-NOBMI-NEXT:    andl $4080, %edi # imm = 0xFF0
-; CHECK-NOBMI-NEXT:    xorl %esi, %edi
 ; CHECK-NOBMI-NEXT:    movl %edi, %eax
+; CHECK-NOBMI-NEXT:    xorl %esi, %eax
+; CHECK-NOBMI-NEXT:    andl $4080, %eax # imm = 0xFF0
+; CHECK-NOBMI-NEXT:    xorl %esi, %eax
+; CHECK-NOBMI-NEXT:    # kill: def $ax killed $ax killed $eax
 ; CHECK-NOBMI-NEXT:    retq
 ;
 ; CHECK-BMI-LABEL: in16_constmask:
 ; CHECK-BMI:       # %bb.0:
-; CHECK-BMI-NEXT:    xorl %esi, %edi
-; CHECK-BMI-NEXT:    andl $4080, %edi # imm = 0xFF0
-; CHECK-BMI-NEXT:    xorl %esi, %edi
 ; CHECK-BMI-NEXT:    movl %edi, %eax
+; CHECK-BMI-NEXT:    xorl %esi, %eax
+; CHECK-BMI-NEXT:    andl $4080, %eax # imm = 0xFF0
+; CHECK-BMI-NEXT:    xorl %esi, %eax
+; CHECK-BMI-NEXT:    # kill: def $ax killed $ax killed $eax
 ; CHECK-BMI-NEXT:    retq
   %n0 = xor i16 %x, %y
   %n1 = and i16 %n0, 4080
@@ -154,18 +160,18 @@ define i16 @in16_constmask(i16 %x, i16 %
 define i32 @in32_constmask(i32 %x, i32 %y) {
 ; CHECK-NOBMI-LABEL: in32_constmask:
 ; CHECK-NOBMI:       # %bb.0:
-; CHECK-NOBMI-NEXT:    xorl %esi, %edi
-; CHECK-NOBMI-NEXT:    andl $16776960, %edi # imm = 0xFFFF00
-; CHECK-NOBMI-NEXT:    xorl %esi, %edi
 ; CHECK-NOBMI-NEXT:    movl %edi, %eax
+; CHECK-NOBMI-NEXT:    xorl %esi, %eax
+; CHECK-NOBMI-NEXT:    andl $16776960, %eax # imm = 0xFFFF00
+; CHECK-NOBMI-NEXT:    xorl %esi, %eax
 ; CHECK-NOBMI-NEXT:    retq
 ;
 ; CHECK-BMI-LABEL: in32_constmask:
 ; CHECK-BMI:       # %bb.0:
-; CHECK-BMI-NEXT:    xorl %esi, %edi
-; CHECK-BMI-NEXT:    andl $16776960, %edi # imm = 0xFFFF00
-; CHECK-BMI-NEXT:    xorl %esi, %edi
 ; CHECK-BMI-NEXT:    movl %edi, %eax
+; CHECK-BMI-NEXT:    xorl %esi, %eax
+; CHECK-BMI-NEXT:    andl $16776960, %eax # imm = 0xFFFF00
+; CHECK-BMI-NEXT:    xorl %esi, %eax
 ; CHECK-BMI-NEXT:    retq
   %n0 = xor i32 %x, %y
   %n1 = and i32 %n0, 16776960
@@ -202,18 +208,18 @@ define i64 @in64_constmask(i64 %x, i64 %
 define i32 @in_constmask_commutativity_0_1(i32 %x, i32 %y) {
 ; CHECK-NOBMI-LABEL: in_constmask_commutativity_0_1:
 ; CHECK-NOBMI:       # %bb.0:
-; CHECK-NOBMI-NEXT:    xorl %esi, %edi
-; CHECK-NOBMI-NEXT:    andl $16776960, %edi # imm = 0xFFFF00
-; CHECK-NOBMI-NEXT:    xorl %esi, %edi
 ; CHECK-NOBMI-NEXT:    movl %edi, %eax
+; CHECK-NOBMI-NEXT:    xorl %esi, %eax
+; CHECK-NOBMI-NEXT:    andl $16776960, %eax # imm = 0xFFFF00
+; CHECK-NOBMI-NEXT:    xorl %esi, %eax
 ; CHECK-NOBMI-NEXT:    retq
 ;
 ; CHECK-BMI-LABEL: in_constmask_commutativity_0_1:
 ; CHECK-BMI:       # %bb.0:
-; CHECK-BMI-NEXT:    xorl %esi, %edi
-; CHECK-BMI-NEXT:    andl $16776960, %edi # imm = 0xFFFF00
-; CHECK-BMI-NEXT:    xorl %esi, %edi
 ; CHECK-BMI-NEXT:    movl %edi, %eax
+; CHECK-BMI-NEXT:    xorl %esi, %eax
+; CHECK-BMI-NEXT:    andl $16776960, %eax # imm = 0xFFFF00
+; CHECK-BMI-NEXT:    xorl %esi, %eax
 ; CHECK-BMI-NEXT:    retq
   %n0 = xor i32 %x, %y
   %n1 = and i32 %n0, 16776960
@@ -224,18 +230,18 @@ define i32 @in_constmask_commutativity_0
 define i32 @in_constmask_commutativity_1_0(i32 %x, i32 %y) {
 ; CHECK-NOBMI-LABEL: in_constmask_commutativity_1_0:
 ; CHECK-NOBMI:       # %bb.0:
-; CHECK-NOBMI-NEXT:    xorl %edi, %esi
-; CHECK-NOBMI-NEXT:    andl $16776960, %esi # imm = 0xFFFF00
-; CHECK-NOBMI-NEXT:    xorl %edi, %esi
 ; CHECK-NOBMI-NEXT:    movl %esi, %eax
+; CHECK-NOBMI-NEXT:    xorl %edi, %eax
+; CHECK-NOBMI-NEXT:    andl $16776960, %eax # imm = 0xFFFF00
+; CHECK-NOBMI-NEXT:    xorl %edi, %eax
 ; CHECK-NOBMI-NEXT:    retq
 ;
 ; CHECK-BMI-LABEL: in_constmask_commutativity_1_0:
 ; CHECK-BMI:       # %bb.0:
-; CHECK-BMI-NEXT:    xorl %edi, %esi
-; CHECK-BMI-NEXT:    andl $16776960, %esi # imm = 0xFFFF00
-; CHECK-BMI-NEXT:    xorl %edi, %esi
 ; CHECK-BMI-NEXT:    movl %esi, %eax
+; CHECK-BMI-NEXT:    xorl %edi, %eax
+; CHECK-BMI-NEXT:    andl $16776960, %eax # imm = 0xFFFF00
+; CHECK-BMI-NEXT:    xorl %edi, %eax
 ; CHECK-BMI-NEXT:    retq
   %n0 = xor i32 %x, %y
   %n1 = and i32 %n0, 16776960
@@ -246,18 +252,18 @@ define i32 @in_constmask_commutativity_1
 define i32 @in_constmask_commutativity_1_1(i32 %x, i32 %y) {
 ; CHECK-NOBMI-LABEL: in_constmask_commutativity_1_1:
 ; CHECK-NOBMI:       # %bb.0:
-; CHECK-NOBMI-NEXT:    xorl %edi, %esi
-; CHECK-NOBMI-NEXT:    andl $16776960, %esi # imm = 0xFFFF00
-; CHECK-NOBMI-NEXT:    xorl %edi, %esi
 ; CHECK-NOBMI-NEXT:    movl %esi, %eax
+; CHECK-NOBMI-NEXT:    xorl %edi, %eax
+; CHECK-NOBMI-NEXT:    andl $16776960, %eax # imm = 0xFFFF00
+; CHECK-NOBMI-NEXT:    xorl %edi, %eax
 ; CHECK-NOBMI-NEXT:    retq
 ;
 ; CHECK-BMI-LABEL: in_constmask_commutativity_1_1:
 ; CHECK-BMI:       # %bb.0:
-; CHECK-BMI-NEXT:    xorl %edi, %esi
-; CHECK-BMI-NEXT:    andl $16776960, %esi # imm = 0xFFFF00
-; CHECK-BMI-NEXT:    xorl %edi, %esi
 ; CHECK-BMI-NEXT:    movl %esi, %eax
+; CHECK-BMI-NEXT:    xorl %edi, %eax
+; CHECK-BMI-NEXT:    andl $16776960, %eax # imm = 0xFFFF00
+; CHECK-BMI-NEXT:    xorl %edi, %eax
 ; CHECK-BMI-NEXT:    retq
   %n0 = xor i32 %x, %y
   %n1 = and i32 %n0, 16776960
@@ -272,20 +278,20 @@ define i32 @in_constmask_commutativity_1
 define i32 @in_complex_y0_constmask(i32 %x, i32 %y_hi, i32 %y_low) {
 ; CHECK-NOBMI-LABEL: in_complex_y0_constmask:
 ; CHECK-NOBMI:       # %bb.0:
-; CHECK-NOBMI-NEXT:    andl %edx, %esi
-; CHECK-NOBMI-NEXT:    xorl %esi, %edi
-; CHECK-NOBMI-NEXT:    andl $16776960, %edi # imm = 0xFFFF00
-; CHECK-NOBMI-NEXT:    xorl %esi, %edi
 ; CHECK-NOBMI-NEXT:    movl %edi, %eax
+; CHECK-NOBMI-NEXT:    andl %edx, %esi
+; CHECK-NOBMI-NEXT:    xorl %esi, %eax
+; CHECK-NOBMI-NEXT:    andl $16776960, %eax # imm = 0xFFFF00
+; CHECK-NOBMI-NEXT:    xorl %esi, %eax
 ; CHECK-NOBMI-NEXT:    retq
 ;
 ; CHECK-BMI-LABEL: in_complex_y0_constmask:
 ; CHECK-BMI:       # %bb.0:
-; CHECK-BMI-NEXT:    andl %edx, %esi
-; CHECK-BMI-NEXT:    xorl %esi, %edi
-; CHECK-BMI-NEXT:    andl $16776960, %edi # imm = 0xFFFF00
-; CHECK-BMI-NEXT:    xorl %esi, %edi
 ; CHECK-BMI-NEXT:    movl %edi, %eax
+; CHECK-BMI-NEXT:    andl %edx, %esi
+; CHECK-BMI-NEXT:    xorl %esi, %eax
+; CHECK-BMI-NEXT:    andl $16776960, %eax # imm = 0xFFFF00
+; CHECK-BMI-NEXT:    xorl %esi, %eax
 ; CHECK-BMI-NEXT:    retq
   %y = and i32 %y_hi, %y_low
   %n0 = xor i32 %x, %y
@@ -297,20 +303,20 @@ define i32 @in_complex_y0_constmask(i32
 define i32 @in_complex_y1_constmask(i32 %x, i32 %y_hi, i32 %y_low) {
 ; CHECK-NOBMI-LABEL: in_complex_y1_constmask:
 ; CHECK-NOBMI:       # %bb.0:
-; CHECK-NOBMI-NEXT:    andl %edx, %esi
-; CHECK-NOBMI-NEXT:    xorl %esi, %edi
-; CHECK-NOBMI-NEXT:    andl $16776960, %edi # imm = 0xFFFF00
-; CHECK-NOBMI-NEXT:    xorl %esi, %edi
 ; CHECK-NOBMI-NEXT:    movl %edi, %eax
+; CHECK-NOBMI-NEXT:    andl %edx, %esi
+; CHECK-NOBMI-NEXT:    xorl %esi, %eax
+; CHECK-NOBMI-NEXT:    andl $16776960, %eax # imm = 0xFFFF00
+; CHECK-NOBMI-NEXT:    xorl %esi, %eax
 ; CHECK-NOBMI-NEXT:    retq
 ;
 ; CHECK-BMI-LABEL: in_complex_y1_constmask:
 ; CHECK-BMI:       # %bb.0:
-; CHECK-BMI-NEXT:    andl %edx, %esi
-; CHECK-BMI-NEXT:    xorl %esi, %edi
-; CHECK-BMI-NEXT:    andl $16776960, %edi # imm = 0xFFFF00
-; CHECK-BMI-NEXT:    xorl %esi, %edi
 ; CHECK-BMI-NEXT:    movl %edi, %eax
+; CHECK-BMI-NEXT:    andl %edx, %esi
+; CHECK-BMI-NEXT:    xorl %esi, %eax
+; CHECK-BMI-NEXT:    andl $16776960, %eax # imm = 0xFFFF00
+; CHECK-BMI-NEXT:    xorl %esi, %eax
 ; CHECK-BMI-NEXT:    retq
   %y = and i32 %y_hi, %y_low
   %n0 = xor i32 %x, %y
@@ -416,18 +422,18 @@ define i32 @in_multiuse_B_constmask(i32
 define i32 @n0_badconstmask(i32 %x, i32 %y) {
 ; CHECK-NOBMI-LABEL: n0_badconstmask:
 ; CHECK-NOBMI:       # %bb.0:
-; CHECK-NOBMI-NEXT:    andl $16776960, %edi # imm = 0xFFFF00
-; CHECK-NOBMI-NEXT:    andl $-16776960, %esi # imm = 0xFF000100
-; CHECK-NOBMI-NEXT:    orl %edi, %esi
 ; CHECK-NOBMI-NEXT:    movl %esi, %eax
+; CHECK-NOBMI-NEXT:    andl $16776960, %edi # imm = 0xFFFF00
+; CHECK-NOBMI-NEXT:    andl $-16776960, %eax # imm = 0xFF000100
+; CHECK-NOBMI-NEXT:    orl %edi, %eax
 ; CHECK-NOBMI-NEXT:    retq
 ;
 ; CHECK-BMI-LABEL: n0_badconstmask:
 ; CHECK-BMI:       # %bb.0:
-; CHECK-BMI-NEXT:    andl $16776960, %edi # imm = 0xFFFF00
-; CHECK-BMI-NEXT:    andl $-16776960, %esi # imm = 0xFF000100
-; CHECK-BMI-NEXT:    orl %edi, %esi
 ; CHECK-BMI-NEXT:    movl %esi, %eax
+; CHECK-BMI-NEXT:    andl $16776960, %edi # imm = 0xFFFF00
+; CHECK-BMI-NEXT:    andl $-16776960, %eax # imm = 0xFF000100
+; CHECK-BMI-NEXT:    orl %edi, %eax
 ; CHECK-BMI-NEXT:    retq
   %mx = and i32 %x, 16776960
   %my = and i32 %y, -16776960 ; instead of -16776961
@@ -438,18 +444,18 @@ define i32 @n0_badconstmask(i32 %x, i32
 define i32 @n1_thirdvar_constmask(i32 %x, i32 %y, i32 %z) {
 ; CHECK-NOBMI-LABEL: n1_thirdvar_constmask:
 ; CHECK-NOBMI:       # %bb.0:
-; CHECK-NOBMI-NEXT:    xorl %esi, %edi
-; CHECK-NOBMI-NEXT:    andl $16776960, %edi # imm = 0xFFFF00
-; CHECK-NOBMI-NEXT:    xorl %edx, %edi
 ; CHECK-NOBMI-NEXT:    movl %edi, %eax
+; CHECK-NOBMI-NEXT:    xorl %esi, %eax
+; CHECK-NOBMI-NEXT:    andl $16776960, %eax # imm = 0xFFFF00
+; CHECK-NOBMI-NEXT:    xorl %edx, %eax
 ; CHECK-NOBMI-NEXT:    retq
 ;
 ; CHECK-BMI-LABEL: n1_thirdvar_constmask:
 ; CHECK-BMI:       # %bb.0:
-; CHECK-BMI-NEXT:    xorl %esi, %edi
-; CHECK-BMI-NEXT:    andl $16776960, %edi # imm = 0xFFFF00
-; CHECK-BMI-NEXT:    xorl %edx, %edi
 ; CHECK-BMI-NEXT:    movl %edi, %eax
+; CHECK-BMI-NEXT:    xorl %esi, %eax
+; CHECK-BMI-NEXT:    andl $16776960, %eax # imm = 0xFFFF00
+; CHECK-BMI-NEXT:    xorl %edx, %eax
 ; CHECK-BMI-NEXT:    retq
   %n0 = xor i32 %x, %y
   %n1 = and i32 %n0, 16776960

Modified: llvm/trunk/test/CodeGen/X86/unfold-masked-merge-scalar-constmask-interleavedbits.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/unfold-masked-merge-scalar-constmask-interleavedbits.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/unfold-masked-merge-scalar-constmask-interleavedbits.ll (original)
+++ llvm/trunk/test/CodeGen/X86/unfold-masked-merge-scalar-constmask-interleavedbits.ll Wed Sep 19 11:59:08 2018
@@ -10,18 +10,20 @@
 define i8 @out8_constmask(i8 %x, i8 %y) {
 ; CHECK-NOBMI-LABEL: out8_constmask:
 ; CHECK-NOBMI:       # %bb.0:
-; CHECK-NOBMI-NEXT:    andb $85, %dil
-; CHECK-NOBMI-NEXT:    andb $-86, %sil
-; CHECK-NOBMI-NEXT:    orb %dil, %sil
 ; CHECK-NOBMI-NEXT:    movl %esi, %eax
+; CHECK-NOBMI-NEXT:    andb $85, %dil
+; CHECK-NOBMI-NEXT:    andb $-86, %al
+; CHECK-NOBMI-NEXT:    orb %dil, %al
+; CHECK-NOBMI-NEXT:    # kill: def $al killed $al killed $eax
 ; CHECK-NOBMI-NEXT:    retq
 ;
 ; CHECK-BMI-LABEL: out8_constmask:
 ; CHECK-BMI:       # %bb.0:
-; CHECK-BMI-NEXT:    andb $85, %dil
-; CHECK-BMI-NEXT:    andb $-86, %sil
-; CHECK-BMI-NEXT:    orb %dil, %sil
 ; CHECK-BMI-NEXT:    movl %esi, %eax
+; CHECK-BMI-NEXT:    andb $85, %dil
+; CHECK-BMI-NEXT:    andb $-86, %al
+; CHECK-BMI-NEXT:    orb %dil, %al
+; CHECK-BMI-NEXT:    # kill: def $al killed $al killed $eax
 ; CHECK-BMI-NEXT:    retq
   %mx = and i8 %x, 85
   %my = and i8 %y, -86
@@ -110,18 +112,20 @@ define i64 @out64_constmask(i64 %x, i64
 define i8 @in8_constmask(i8 %x, i8 %y) {
 ; CHECK-NOBMI-LABEL: in8_constmask:
 ; CHECK-NOBMI:       # %bb.0:
+; CHECK-NOBMI-NEXT:    movl %esi, %eax
 ; CHECK-NOBMI-NEXT:    xorl %esi, %edi
 ; CHECK-NOBMI-NEXT:    andb $85, %dil
-; CHECK-NOBMI-NEXT:    xorb %dil, %sil
-; CHECK-NOBMI-NEXT:    movl %esi, %eax
+; CHECK-NOBMI-NEXT:    xorb %dil, %al
+; CHECK-NOBMI-NEXT:    # kill: def $al killed $al killed $eax
 ; CHECK-NOBMI-NEXT:    retq
 ;
 ; CHECK-BMI-LABEL: in8_constmask:
 ; CHECK-BMI:       # %bb.0:
+; CHECK-BMI-NEXT:    movl %esi, %eax
 ; CHECK-BMI-NEXT:    xorl %esi, %edi
 ; CHECK-BMI-NEXT:    andb $85, %dil
-; CHECK-BMI-NEXT:    xorb %dil, %sil
-; CHECK-BMI-NEXT:    movl %esi, %eax
+; CHECK-BMI-NEXT:    xorb %dil, %al
+; CHECK-BMI-NEXT:    # kill: def $al killed $al killed $eax
 ; CHECK-BMI-NEXT:    retq
   %n0 = xor i8 %x, %y
   %n1 = and i8 %n0, 85
@@ -132,18 +136,20 @@ define i8 @in8_constmask(i8 %x, i8 %y) {
 define i16 @in16_constmask(i16 %x, i16 %y) {
 ; CHECK-NOBMI-LABEL: in16_constmask:
 ; CHECK-NOBMI:       # %bb.0:
-; CHECK-NOBMI-NEXT:    xorl %esi, %edi
-; CHECK-NOBMI-NEXT:    andl $21845, %edi # imm = 0x5555
-; CHECK-NOBMI-NEXT:    xorl %esi, %edi
 ; CHECK-NOBMI-NEXT:    movl %edi, %eax
+; CHECK-NOBMI-NEXT:    xorl %esi, %eax
+; CHECK-NOBMI-NEXT:    andl $21845, %eax # imm = 0x5555
+; CHECK-NOBMI-NEXT:    xorl %esi, %eax
+; CHECK-NOBMI-NEXT:    # kill: def $ax killed $ax killed $eax
 ; CHECK-NOBMI-NEXT:    retq
 ;
 ; CHECK-BMI-LABEL: in16_constmask:
 ; CHECK-BMI:       # %bb.0:
-; CHECK-BMI-NEXT:    xorl %esi, %edi
-; CHECK-BMI-NEXT:    andl $21845, %edi # imm = 0x5555
-; CHECK-BMI-NEXT:    xorl %esi, %edi
 ; CHECK-BMI-NEXT:    movl %edi, %eax
+; CHECK-BMI-NEXT:    xorl %esi, %eax
+; CHECK-BMI-NEXT:    andl $21845, %eax # imm = 0x5555
+; CHECK-BMI-NEXT:    xorl %esi, %eax
+; CHECK-BMI-NEXT:    # kill: def $ax killed $ax killed $eax
 ; CHECK-BMI-NEXT:    retq
   %n0 = xor i16 %x, %y
   %n1 = and i16 %n0, 21845
@@ -154,18 +160,18 @@ define i16 @in16_constmask(i16 %x, i16 %
 define i32 @in32_constmask(i32 %x, i32 %y) {
 ; CHECK-NOBMI-LABEL: in32_constmask:
 ; CHECK-NOBMI:       # %bb.0:
-; CHECK-NOBMI-NEXT:    xorl %esi, %edi
-; CHECK-NOBMI-NEXT:    andl $1431655765, %edi # imm = 0x55555555
-; CHECK-NOBMI-NEXT:    xorl %esi, %edi
 ; CHECK-NOBMI-NEXT:    movl %edi, %eax
+; CHECK-NOBMI-NEXT:    xorl %esi, %eax
+; CHECK-NOBMI-NEXT:    andl $1431655765, %eax # imm = 0x55555555
+; CHECK-NOBMI-NEXT:    xorl %esi, %eax
 ; CHECK-NOBMI-NEXT:    retq
 ;
 ; CHECK-BMI-LABEL: in32_constmask:
 ; CHECK-BMI:       # %bb.0:
-; CHECK-BMI-NEXT:    xorl %esi, %edi
-; CHECK-BMI-NEXT:    andl $1431655765, %edi # imm = 0x55555555
-; CHECK-BMI-NEXT:    xorl %esi, %edi
 ; CHECK-BMI-NEXT:    movl %edi, %eax
+; CHECK-BMI-NEXT:    xorl %esi, %eax
+; CHECK-BMI-NEXT:    andl $1431655765, %eax # imm = 0x55555555
+; CHECK-BMI-NEXT:    xorl %esi, %eax
 ; CHECK-BMI-NEXT:    retq
   %n0 = xor i32 %x, %y
   %n1 = and i32 %n0, 1431655765
@@ -202,18 +208,18 @@ define i64 @in64_constmask(i64 %x, i64 %
 define i32 @in_constmask_commutativity_0_1(i32 %x, i32 %y) {
 ; CHECK-NOBMI-LABEL: in_constmask_commutativity_0_1:
 ; CHECK-NOBMI:       # %bb.0:
-; CHECK-NOBMI-NEXT:    xorl %esi, %edi
-; CHECK-NOBMI-NEXT:    andl $1431655765, %edi # imm = 0x55555555
-; CHECK-NOBMI-NEXT:    xorl %esi, %edi
 ; CHECK-NOBMI-NEXT:    movl %edi, %eax
+; CHECK-NOBMI-NEXT:    xorl %esi, %eax
+; CHECK-NOBMI-NEXT:    andl $1431655765, %eax # imm = 0x55555555
+; CHECK-NOBMI-NEXT:    xorl %esi, %eax
 ; CHECK-NOBMI-NEXT:    retq
 ;
 ; CHECK-BMI-LABEL: in_constmask_commutativity_0_1:
 ; CHECK-BMI:       # %bb.0:
-; CHECK-BMI-NEXT:    xorl %esi, %edi
-; CHECK-BMI-NEXT:    andl $1431655765, %edi # imm = 0x55555555
-; CHECK-BMI-NEXT:    xorl %esi, %edi
 ; CHECK-BMI-NEXT:    movl %edi, %eax
+; CHECK-BMI-NEXT:    xorl %esi, %eax
+; CHECK-BMI-NEXT:    andl $1431655765, %eax # imm = 0x55555555
+; CHECK-BMI-NEXT:    xorl %esi, %eax
 ; CHECK-BMI-NEXT:    retq
   %n0 = xor i32 %x, %y
   %n1 = and i32 %n0, 1431655765
@@ -224,18 +230,18 @@ define i32 @in_constmask_commutativity_0
 define i32 @in_constmask_commutativity_1_0(i32 %x, i32 %y) {
 ; CHECK-NOBMI-LABEL: in_constmask_commutativity_1_0:
 ; CHECK-NOBMI:       # %bb.0:
-; CHECK-NOBMI-NEXT:    xorl %edi, %esi
-; CHECK-NOBMI-NEXT:    andl $1431655765, %esi # imm = 0x55555555
-; CHECK-NOBMI-NEXT:    xorl %edi, %esi
 ; CHECK-NOBMI-NEXT:    movl %esi, %eax
+; CHECK-NOBMI-NEXT:    xorl %edi, %eax
+; CHECK-NOBMI-NEXT:    andl $1431655765, %eax # imm = 0x55555555
+; CHECK-NOBMI-NEXT:    xorl %edi, %eax
 ; CHECK-NOBMI-NEXT:    retq
 ;
 ; CHECK-BMI-LABEL: in_constmask_commutativity_1_0:
 ; CHECK-BMI:       # %bb.0:
-; CHECK-BMI-NEXT:    xorl %edi, %esi
-; CHECK-BMI-NEXT:    andl $1431655765, %esi # imm = 0x55555555
-; CHECK-BMI-NEXT:    xorl %edi, %esi
 ; CHECK-BMI-NEXT:    movl %esi, %eax
+; CHECK-BMI-NEXT:    xorl %edi, %eax
+; CHECK-BMI-NEXT:    andl $1431655765, %eax # imm = 0x55555555
+; CHECK-BMI-NEXT:    xorl %edi, %eax
 ; CHECK-BMI-NEXT:    retq
   %n0 = xor i32 %x, %y
   %n1 = and i32 %n0, 1431655765
@@ -246,18 +252,18 @@ define i32 @in_constmask_commutativity_1
 define i32 @in_constmask_commutativity_1_1(i32 %x, i32 %y) {
 ; CHECK-NOBMI-LABEL: in_constmask_commutativity_1_1:
 ; CHECK-NOBMI:       # %bb.0:
-; CHECK-NOBMI-NEXT:    xorl %edi, %esi
-; CHECK-NOBMI-NEXT:    andl $1431655765, %esi # imm = 0x55555555
-; CHECK-NOBMI-NEXT:    xorl %edi, %esi
 ; CHECK-NOBMI-NEXT:    movl %esi, %eax
+; CHECK-NOBMI-NEXT:    xorl %edi, %eax
+; CHECK-NOBMI-NEXT:    andl $1431655765, %eax # imm = 0x55555555
+; CHECK-NOBMI-NEXT:    xorl %edi, %eax
 ; CHECK-NOBMI-NEXT:    retq
 ;
 ; CHECK-BMI-LABEL: in_constmask_commutativity_1_1:
 ; CHECK-BMI:       # %bb.0:
-; CHECK-BMI-NEXT:    xorl %edi, %esi
-; CHECK-BMI-NEXT:    andl $1431655765, %esi # imm = 0x55555555
-; CHECK-BMI-NEXT:    xorl %edi, %esi
 ; CHECK-BMI-NEXT:    movl %esi, %eax
+; CHECK-BMI-NEXT:    xorl %edi, %eax
+; CHECK-BMI-NEXT:    andl $1431655765, %eax # imm = 0x55555555
+; CHECK-BMI-NEXT:    xorl %edi, %eax
 ; CHECK-BMI-NEXT:    retq
   %n0 = xor i32 %x, %y
   %n1 = and i32 %n0, 1431655765
@@ -272,20 +278,20 @@ define i32 @in_constmask_commutativity_1
 define i32 @in_complex_y0_constmask(i32 %x, i32 %y_hi, i32 %y_low) {
 ; CHECK-NOBMI-LABEL: in_complex_y0_constmask:
 ; CHECK-NOBMI:       # %bb.0:
-; CHECK-NOBMI-NEXT:    andl %edx, %esi
-; CHECK-NOBMI-NEXT:    xorl %esi, %edi
-; CHECK-NOBMI-NEXT:    andl $1431655765, %edi # imm = 0x55555555
-; CHECK-NOBMI-NEXT:    xorl %esi, %edi
 ; CHECK-NOBMI-NEXT:    movl %edi, %eax
+; CHECK-NOBMI-NEXT:    andl %edx, %esi
+; CHECK-NOBMI-NEXT:    xorl %esi, %eax
+; CHECK-NOBMI-NEXT:    andl $1431655765, %eax # imm = 0x55555555
+; CHECK-NOBMI-NEXT:    xorl %esi, %eax
 ; CHECK-NOBMI-NEXT:    retq
 ;
 ; CHECK-BMI-LABEL: in_complex_y0_constmask:
 ; CHECK-BMI:       # %bb.0:
-; CHECK-BMI-NEXT:    andl %edx, %esi
-; CHECK-BMI-NEXT:    xorl %esi, %edi
-; CHECK-BMI-NEXT:    andl $1431655765, %edi # imm = 0x55555555
-; CHECK-BMI-NEXT:    xorl %esi, %edi
 ; CHECK-BMI-NEXT:    movl %edi, %eax
+; CHECK-BMI-NEXT:    andl %edx, %esi
+; CHECK-BMI-NEXT:    xorl %esi, %eax
+; CHECK-BMI-NEXT:    andl $1431655765, %eax # imm = 0x55555555
+; CHECK-BMI-NEXT:    xorl %esi, %eax
 ; CHECK-BMI-NEXT:    retq
   %y = and i32 %y_hi, %y_low
   %n0 = xor i32 %x, %y
@@ -297,20 +303,20 @@ define i32 @in_complex_y0_constmask(i32
 define i32 @in_complex_y1_constmask(i32 %x, i32 %y_hi, i32 %y_low) {
 ; CHECK-NOBMI-LABEL: in_complex_y1_constmask:
 ; CHECK-NOBMI:       # %bb.0:
-; CHECK-NOBMI-NEXT:    andl %edx, %esi
-; CHECK-NOBMI-NEXT:    xorl %esi, %edi
-; CHECK-NOBMI-NEXT:    andl $1431655765, %edi # imm = 0x55555555
-; CHECK-NOBMI-NEXT:    xorl %esi, %edi
 ; CHECK-NOBMI-NEXT:    movl %edi, %eax
+; CHECK-NOBMI-NEXT:    andl %edx, %esi
+; CHECK-NOBMI-NEXT:    xorl %esi, %eax
+; CHECK-NOBMI-NEXT:    andl $1431655765, %eax # imm = 0x55555555
+; CHECK-NOBMI-NEXT:    xorl %esi, %eax
 ; CHECK-NOBMI-NEXT:    retq
 ;
 ; CHECK-BMI-LABEL: in_complex_y1_constmask:
 ; CHECK-BMI:       # %bb.0:
-; CHECK-BMI-NEXT:    andl %edx, %esi
-; CHECK-BMI-NEXT:    xorl %esi, %edi
-; CHECK-BMI-NEXT:    andl $1431655765, %edi # imm = 0x55555555
-; CHECK-BMI-NEXT:    xorl %esi, %edi
 ; CHECK-BMI-NEXT:    movl %edi, %eax
+; CHECK-BMI-NEXT:    andl %edx, %esi
+; CHECK-BMI-NEXT:    xorl %esi, %eax
+; CHECK-BMI-NEXT:    andl $1431655765, %eax # imm = 0x55555555
+; CHECK-BMI-NEXT:    xorl %esi, %eax
 ; CHECK-BMI-NEXT:    retq
   %y = and i32 %y_hi, %y_low
   %n0 = xor i32 %x, %y
@@ -416,18 +422,18 @@ define i32 @in_multiuse_B_constmask(i32
 define i32 @n0_badconstmask(i32 %x, i32 %y) {
 ; CHECK-NOBMI-LABEL: n0_badconstmask:
 ; CHECK-NOBMI:       # %bb.0:
-; CHECK-NOBMI-NEXT:    andl $1431655765, %edi # imm = 0x55555555
-; CHECK-NOBMI-NEXT:    andl $-1431655765, %esi # imm = 0xAAAAAAAB
-; CHECK-NOBMI-NEXT:    orl %edi, %esi
 ; CHECK-NOBMI-NEXT:    movl %esi, %eax
+; CHECK-NOBMI-NEXT:    andl $1431655765, %edi # imm = 0x55555555
+; CHECK-NOBMI-NEXT:    andl $-1431655765, %eax # imm = 0xAAAAAAAB
+; CHECK-NOBMI-NEXT:    orl %edi, %eax
 ; CHECK-NOBMI-NEXT:    retq
 ;
 ; CHECK-BMI-LABEL: n0_badconstmask:
 ; CHECK-BMI:       # %bb.0:
-; CHECK-BMI-NEXT:    andl $1431655765, %edi # imm = 0x55555555
-; CHECK-BMI-NEXT:    andl $-1431655765, %esi # imm = 0xAAAAAAAB
-; CHECK-BMI-NEXT:    orl %edi, %esi
 ; CHECK-BMI-NEXT:    movl %esi, %eax
+; CHECK-BMI-NEXT:    andl $1431655765, %edi # imm = 0x55555555
+; CHECK-BMI-NEXT:    andl $-1431655765, %eax # imm = 0xAAAAAAAB
+; CHECK-BMI-NEXT:    orl %edi, %eax
 ; CHECK-BMI-NEXT:    retq
   %mx = and i32 %x, 1431655765
   %my = and i32 %y, -1431655765 ; instead of -1431655766
@@ -438,18 +444,18 @@ define i32 @n0_badconstmask(i32 %x, i32
 define i32 @n1_thirdvar_constmask(i32 %x, i32 %y, i32 %z) {
 ; CHECK-NOBMI-LABEL: n1_thirdvar_constmask:
 ; CHECK-NOBMI:       # %bb.0:
-; CHECK-NOBMI-NEXT:    xorl %esi, %edi
-; CHECK-NOBMI-NEXT:    andl $1431655765, %edi # imm = 0x55555555
-; CHECK-NOBMI-NEXT:    xorl %edx, %edi
 ; CHECK-NOBMI-NEXT:    movl %edi, %eax
+; CHECK-NOBMI-NEXT:    xorl %esi, %eax
+; CHECK-NOBMI-NEXT:    andl $1431655765, %eax # imm = 0x55555555
+; CHECK-NOBMI-NEXT:    xorl %edx, %eax
 ; CHECK-NOBMI-NEXT:    retq
 ;
 ; CHECK-BMI-LABEL: n1_thirdvar_constmask:
 ; CHECK-BMI:       # %bb.0:
-; CHECK-BMI-NEXT:    xorl %esi, %edi
-; CHECK-BMI-NEXT:    andl $1431655765, %edi # imm = 0x55555555
-; CHECK-BMI-NEXT:    xorl %edx, %edi
 ; CHECK-BMI-NEXT:    movl %edi, %eax
+; CHECK-BMI-NEXT:    xorl %esi, %eax
+; CHECK-BMI-NEXT:    andl $1431655765, %eax # imm = 0x55555555
+; CHECK-BMI-NEXT:    xorl %edx, %eax
 ; CHECK-BMI-NEXT:    retq
   %n0 = xor i32 %x, %y
   %n1 = and i32 %n0, 1431655765

Modified: llvm/trunk/test/CodeGen/X86/unfold-masked-merge-scalar-constmask-interleavedbytehalves.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/unfold-masked-merge-scalar-constmask-interleavedbytehalves.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/unfold-masked-merge-scalar-constmask-interleavedbytehalves.ll (original)
+++ llvm/trunk/test/CodeGen/X86/unfold-masked-merge-scalar-constmask-interleavedbytehalves.ll Wed Sep 19 11:59:08 2018
@@ -10,18 +10,20 @@
 define i8 @out8_constmask(i8 %x, i8 %y) {
 ; CHECK-NOBMI-LABEL: out8_constmask:
 ; CHECK-NOBMI:       # %bb.0:
-; CHECK-NOBMI-NEXT:    andb $15, %dil
-; CHECK-NOBMI-NEXT:    andb $-16, %sil
-; CHECK-NOBMI-NEXT:    orb %dil, %sil
 ; CHECK-NOBMI-NEXT:    movl %esi, %eax
+; CHECK-NOBMI-NEXT:    andb $15, %dil
+; CHECK-NOBMI-NEXT:    andb $-16, %al
+; CHECK-NOBMI-NEXT:    orb %dil, %al
+; CHECK-NOBMI-NEXT:    # kill: def $al killed $al killed $eax
 ; CHECK-NOBMI-NEXT:    retq
 ;
 ; CHECK-BMI-LABEL: out8_constmask:
 ; CHECK-BMI:       # %bb.0:
-; CHECK-BMI-NEXT:    andb $15, %dil
-; CHECK-BMI-NEXT:    andb $-16, %sil
-; CHECK-BMI-NEXT:    orb %dil, %sil
 ; CHECK-BMI-NEXT:    movl %esi, %eax
+; CHECK-BMI-NEXT:    andb $15, %dil
+; CHECK-BMI-NEXT:    andb $-16, %al
+; CHECK-BMI-NEXT:    orb %dil, %al
+; CHECK-BMI-NEXT:    # kill: def $al killed $al killed $eax
 ; CHECK-BMI-NEXT:    retq
   %mx = and i8 %x, 15
   %my = and i8 %y, -16
@@ -110,18 +112,20 @@ define i64 @out64_constmask(i64 %x, i64
 define i8 @in8_constmask(i8 %x, i8 %y) {
 ; CHECK-NOBMI-LABEL: in8_constmask:
 ; CHECK-NOBMI:       # %bb.0:
+; CHECK-NOBMI-NEXT:    movl %esi, %eax
 ; CHECK-NOBMI-NEXT:    xorl %esi, %edi
 ; CHECK-NOBMI-NEXT:    andb $15, %dil
-; CHECK-NOBMI-NEXT:    xorb %dil, %sil
-; CHECK-NOBMI-NEXT:    movl %esi, %eax
+; CHECK-NOBMI-NEXT:    xorb %dil, %al
+; CHECK-NOBMI-NEXT:    # kill: def $al killed $al killed $eax
 ; CHECK-NOBMI-NEXT:    retq
 ;
 ; CHECK-BMI-LABEL: in8_constmask:
 ; CHECK-BMI:       # %bb.0:
+; CHECK-BMI-NEXT:    movl %esi, %eax
 ; CHECK-BMI-NEXT:    xorl %esi, %edi
 ; CHECK-BMI-NEXT:    andb $15, %dil
-; CHECK-BMI-NEXT:    xorb %dil, %sil
-; CHECK-BMI-NEXT:    movl %esi, %eax
+; CHECK-BMI-NEXT:    xorb %dil, %al
+; CHECK-BMI-NEXT:    # kill: def $al killed $al killed $eax
 ; CHECK-BMI-NEXT:    retq
   %n0 = xor i8 %x, %y
   %n1 = and i8 %n0, 15
@@ -132,18 +136,20 @@ define i8 @in8_constmask(i8 %x, i8 %y) {
 define i16 @in16_constmask(i16 %x, i16 %y) {
 ; CHECK-NOBMI-LABEL: in16_constmask:
 ; CHECK-NOBMI:       # %bb.0:
-; CHECK-NOBMI-NEXT:    xorl %esi, %edi
-; CHECK-NOBMI-NEXT:    andl $3855, %edi # imm = 0xF0F
-; CHECK-NOBMI-NEXT:    xorl %esi, %edi
 ; CHECK-NOBMI-NEXT:    movl %edi, %eax
+; CHECK-NOBMI-NEXT:    xorl %esi, %eax
+; CHECK-NOBMI-NEXT:    andl $3855, %eax # imm = 0xF0F
+; CHECK-NOBMI-NEXT:    xorl %esi, %eax
+; CHECK-NOBMI-NEXT:    # kill: def $ax killed $ax killed $eax
 ; CHECK-NOBMI-NEXT:    retq
 ;
 ; CHECK-BMI-LABEL: in16_constmask:
 ; CHECK-BMI:       # %bb.0:
-; CHECK-BMI-NEXT:    xorl %esi, %edi
-; CHECK-BMI-NEXT:    andl $3855, %edi # imm = 0xF0F
-; CHECK-BMI-NEXT:    xorl %esi, %edi
 ; CHECK-BMI-NEXT:    movl %edi, %eax
+; CHECK-BMI-NEXT:    xorl %esi, %eax
+; CHECK-BMI-NEXT:    andl $3855, %eax # imm = 0xF0F
+; CHECK-BMI-NEXT:    xorl %esi, %eax
+; CHECK-BMI-NEXT:    # kill: def $ax killed $ax killed $eax
 ; CHECK-BMI-NEXT:    retq
   %n0 = xor i16 %x, %y
   %n1 = and i16 %n0, 3855
@@ -154,18 +160,18 @@ define i16 @in16_constmask(i16 %x, i16 %
 define i32 @in32_constmask(i32 %x, i32 %y) {
 ; CHECK-NOBMI-LABEL: in32_constmask:
 ; CHECK-NOBMI:       # %bb.0:
-; CHECK-NOBMI-NEXT:    xorl %esi, %edi
-; CHECK-NOBMI-NEXT:    andl $252645135, %edi # imm = 0xF0F0F0F
-; CHECK-NOBMI-NEXT:    xorl %esi, %edi
 ; CHECK-NOBMI-NEXT:    movl %edi, %eax
+; CHECK-NOBMI-NEXT:    xorl %esi, %eax
+; CHECK-NOBMI-NEXT:    andl $252645135, %eax # imm = 0xF0F0F0F
+; CHECK-NOBMI-NEXT:    xorl %esi, %eax
 ; CHECK-NOBMI-NEXT:    retq
 ;
 ; CHECK-BMI-LABEL: in32_constmask:
 ; CHECK-BMI:       # %bb.0:
-; CHECK-BMI-NEXT:    xorl %esi, %edi
-; CHECK-BMI-NEXT:    andl $252645135, %edi # imm = 0xF0F0F0F
-; CHECK-BMI-NEXT:    xorl %esi, %edi
 ; CHECK-BMI-NEXT:    movl %edi, %eax
+; CHECK-BMI-NEXT:    xorl %esi, %eax
+; CHECK-BMI-NEXT:    andl $252645135, %eax # imm = 0xF0F0F0F
+; CHECK-BMI-NEXT:    xorl %esi, %eax
 ; CHECK-BMI-NEXT:    retq
   %n0 = xor i32 %x, %y
   %n1 = and i32 %n0, 252645135
@@ -202,18 +208,18 @@ define i64 @in64_constmask(i64 %x, i64 %
 define i32 @in_constmask_commutativity_0_1(i32 %x, i32 %y) {
 ; CHECK-NOBMI-LABEL: in_constmask_commutativity_0_1:
 ; CHECK-NOBMI:       # %bb.0:
-; CHECK-NOBMI-NEXT:    xorl %esi, %edi
-; CHECK-NOBMI-NEXT:    andl $252645135, %edi # imm = 0xF0F0F0F
-; CHECK-NOBMI-NEXT:    xorl %esi, %edi
 ; CHECK-NOBMI-NEXT:    movl %edi, %eax
+; CHECK-NOBMI-NEXT:    xorl %esi, %eax
+; CHECK-NOBMI-NEXT:    andl $252645135, %eax # imm = 0xF0F0F0F
+; CHECK-NOBMI-NEXT:    xorl %esi, %eax
 ; CHECK-NOBMI-NEXT:    retq
 ;
 ; CHECK-BMI-LABEL: in_constmask_commutativity_0_1:
 ; CHECK-BMI:       # %bb.0:
-; CHECK-BMI-NEXT:    xorl %esi, %edi
-; CHECK-BMI-NEXT:    andl $252645135, %edi # imm = 0xF0F0F0F
-; CHECK-BMI-NEXT:    xorl %esi, %edi
 ; CHECK-BMI-NEXT:    movl %edi, %eax
+; CHECK-BMI-NEXT:    xorl %esi, %eax
+; CHECK-BMI-NEXT:    andl $252645135, %eax # imm = 0xF0F0F0F
+; CHECK-BMI-NEXT:    xorl %esi, %eax
 ; CHECK-BMI-NEXT:    retq
   %n0 = xor i32 %x, %y
   %n1 = and i32 %n0, 252645135
@@ -224,18 +230,18 @@ define i32 @in_constmask_commutativity_0
 define i32 @in_constmask_commutativity_1_0(i32 %x, i32 %y) {
 ; CHECK-NOBMI-LABEL: in_constmask_commutativity_1_0:
 ; CHECK-NOBMI:       # %bb.0:
-; CHECK-NOBMI-NEXT:    xorl %edi, %esi
-; CHECK-NOBMI-NEXT:    andl $252645135, %esi # imm = 0xF0F0F0F
-; CHECK-NOBMI-NEXT:    xorl %edi, %esi
 ; CHECK-NOBMI-NEXT:    movl %esi, %eax
+; CHECK-NOBMI-NEXT:    xorl %edi, %eax
+; CHECK-NOBMI-NEXT:    andl $252645135, %eax # imm = 0xF0F0F0F
+; CHECK-NOBMI-NEXT:    xorl %edi, %eax
 ; CHECK-NOBMI-NEXT:    retq
 ;
 ; CHECK-BMI-LABEL: in_constmask_commutativity_1_0:
 ; CHECK-BMI:       # %bb.0:
-; CHECK-BMI-NEXT:    xorl %edi, %esi
-; CHECK-BMI-NEXT:    andl $252645135, %esi # imm = 0xF0F0F0F
-; CHECK-BMI-NEXT:    xorl %edi, %esi
 ; CHECK-BMI-NEXT:    movl %esi, %eax
+; CHECK-BMI-NEXT:    xorl %edi, %eax
+; CHECK-BMI-NEXT:    andl $252645135, %eax # imm = 0xF0F0F0F
+; CHECK-BMI-NEXT:    xorl %edi, %eax
 ; CHECK-BMI-NEXT:    retq
   %n0 = xor i32 %x, %y
   %n1 = and i32 %n0, 252645135
@@ -246,18 +252,18 @@ define i32 @in_constmask_commutativity_1
 define i32 @in_constmask_commutativity_1_1(i32 %x, i32 %y) {
 ; CHECK-NOBMI-LABEL: in_constmask_commutativity_1_1:
 ; CHECK-NOBMI:       # %bb.0:
-; CHECK-NOBMI-NEXT:    xorl %edi, %esi
-; CHECK-NOBMI-NEXT:    andl $252645135, %esi # imm = 0xF0F0F0F
-; CHECK-NOBMI-NEXT:    xorl %edi, %esi
 ; CHECK-NOBMI-NEXT:    movl %esi, %eax
+; CHECK-NOBMI-NEXT:    xorl %edi, %eax
+; CHECK-NOBMI-NEXT:    andl $252645135, %eax # imm = 0xF0F0F0F
+; CHECK-NOBMI-NEXT:    xorl %edi, %eax
 ; CHECK-NOBMI-NEXT:    retq
 ;
 ; CHECK-BMI-LABEL: in_constmask_commutativity_1_1:
 ; CHECK-BMI:       # %bb.0:
-; CHECK-BMI-NEXT:    xorl %edi, %esi
-; CHECK-BMI-NEXT:    andl $252645135, %esi # imm = 0xF0F0F0F
-; CHECK-BMI-NEXT:    xorl %edi, %esi
 ; CHECK-BMI-NEXT:    movl %esi, %eax
+; CHECK-BMI-NEXT:    xorl %edi, %eax
+; CHECK-BMI-NEXT:    andl $252645135, %eax # imm = 0xF0F0F0F
+; CHECK-BMI-NEXT:    xorl %edi, %eax
 ; CHECK-BMI-NEXT:    retq
   %n0 = xor i32 %x, %y
   %n1 = and i32 %n0, 252645135
@@ -272,20 +278,20 @@ define i32 @in_constmask_commutativity_1
 define i32 @in_complex_y0_constmask(i32 %x, i32 %y_hi, i32 %y_low) {
 ; CHECK-NOBMI-LABEL: in_complex_y0_constmask:
 ; CHECK-NOBMI:       # %bb.0:
-; CHECK-NOBMI-NEXT:    andl %edx, %esi
-; CHECK-NOBMI-NEXT:    xorl %esi, %edi
-; CHECK-NOBMI-NEXT:    andl $252645135, %edi # imm = 0xF0F0F0F
-; CHECK-NOBMI-NEXT:    xorl %esi, %edi
 ; CHECK-NOBMI-NEXT:    movl %edi, %eax
+; CHECK-NOBMI-NEXT:    andl %edx, %esi
+; CHECK-NOBMI-NEXT:    xorl %esi, %eax
+; CHECK-NOBMI-NEXT:    andl $252645135, %eax # imm = 0xF0F0F0F
+; CHECK-NOBMI-NEXT:    xorl %esi, %eax
 ; CHECK-NOBMI-NEXT:    retq
 ;
 ; CHECK-BMI-LABEL: in_complex_y0_constmask:
 ; CHECK-BMI:       # %bb.0:
-; CHECK-BMI-NEXT:    andl %edx, %esi
-; CHECK-BMI-NEXT:    xorl %esi, %edi
-; CHECK-BMI-NEXT:    andl $252645135, %edi # imm = 0xF0F0F0F
-; CHECK-BMI-NEXT:    xorl %esi, %edi
 ; CHECK-BMI-NEXT:    movl %edi, %eax
+; CHECK-BMI-NEXT:    andl %edx, %esi
+; CHECK-BMI-NEXT:    xorl %esi, %eax
+; CHECK-BMI-NEXT:    andl $252645135, %eax # imm = 0xF0F0F0F
+; CHECK-BMI-NEXT:    xorl %esi, %eax
 ; CHECK-BMI-NEXT:    retq
   %y = and i32 %y_hi, %y_low
   %n0 = xor i32 %x, %y
@@ -297,20 +303,20 @@ define i32 @in_complex_y0_constmask(i32
 define i32 @in_complex_y1_constmask(i32 %x, i32 %y_hi, i32 %y_low) {
 ; CHECK-NOBMI-LABEL: in_complex_y1_constmask:
 ; CHECK-NOBMI:       # %bb.0:
-; CHECK-NOBMI-NEXT:    andl %edx, %esi
-; CHECK-NOBMI-NEXT:    xorl %esi, %edi
-; CHECK-NOBMI-NEXT:    andl $252645135, %edi # imm = 0xF0F0F0F
-; CHECK-NOBMI-NEXT:    xorl %esi, %edi
 ; CHECK-NOBMI-NEXT:    movl %edi, %eax
+; CHECK-NOBMI-NEXT:    andl %edx, %esi
+; CHECK-NOBMI-NEXT:    xorl %esi, %eax
+; CHECK-NOBMI-NEXT:    andl $252645135, %eax # imm = 0xF0F0F0F
+; CHECK-NOBMI-NEXT:    xorl %esi, %eax
 ; CHECK-NOBMI-NEXT:    retq
 ;
 ; CHECK-BMI-LABEL: in_complex_y1_constmask:
 ; CHECK-BMI:       # %bb.0:
-; CHECK-BMI-NEXT:    andl %edx, %esi
-; CHECK-BMI-NEXT:    xorl %esi, %edi
-; CHECK-BMI-NEXT:    andl $252645135, %edi # imm = 0xF0F0F0F
-; CHECK-BMI-NEXT:    xorl %esi, %edi
 ; CHECK-BMI-NEXT:    movl %edi, %eax
+; CHECK-BMI-NEXT:    andl %edx, %esi
+; CHECK-BMI-NEXT:    xorl %esi, %eax
+; CHECK-BMI-NEXT:    andl $252645135, %eax # imm = 0xF0F0F0F
+; CHECK-BMI-NEXT:    xorl %esi, %eax
 ; CHECK-BMI-NEXT:    retq
   %y = and i32 %y_hi, %y_low
   %n0 = xor i32 %x, %y
@@ -416,18 +422,18 @@ define i32 @in_multiuse_B_constmask(i32
 define i32 @n0_badconstmask(i32 %x, i32 %y) {
 ; CHECK-NOBMI-LABEL: n0_badconstmask:
 ; CHECK-NOBMI:       # %bb.0:
-; CHECK-NOBMI-NEXT:    andl $252645135, %edi # imm = 0xF0F0F0F
-; CHECK-NOBMI-NEXT:    andl $-252645135, %esi # imm = 0xF0F0F0F1
-; CHECK-NOBMI-NEXT:    orl %edi, %esi
 ; CHECK-NOBMI-NEXT:    movl %esi, %eax
+; CHECK-NOBMI-NEXT:    andl $252645135, %edi # imm = 0xF0F0F0F
+; CHECK-NOBMI-NEXT:    andl $-252645135, %eax # imm = 0xF0F0F0F1
+; CHECK-NOBMI-NEXT:    orl %edi, %eax
 ; CHECK-NOBMI-NEXT:    retq
 ;
 ; CHECK-BMI-LABEL: n0_badconstmask:
 ; CHECK-BMI:       # %bb.0:
-; CHECK-BMI-NEXT:    andl $252645135, %edi # imm = 0xF0F0F0F
-; CHECK-BMI-NEXT:    andl $-252645135, %esi # imm = 0xF0F0F0F1
-; CHECK-BMI-NEXT:    orl %edi, %esi
 ; CHECK-BMI-NEXT:    movl %esi, %eax
+; CHECK-BMI-NEXT:    andl $252645135, %edi # imm = 0xF0F0F0F
+; CHECK-BMI-NEXT:    andl $-252645135, %eax # imm = 0xF0F0F0F1
+; CHECK-BMI-NEXT:    orl %edi, %eax
 ; CHECK-BMI-NEXT:    retq
   %mx = and i32 %x, 252645135
   %my = and i32 %y, -252645135 ; instead of -252645136
@@ -438,18 +444,18 @@ define i32 @n0_badconstmask(i32 %x, i32
 define i32 @n1_thirdvar_constmask(i32 %x, i32 %y, i32 %z) {
 ; CHECK-NOBMI-LABEL: n1_thirdvar_constmask:
 ; CHECK-NOBMI:       # %bb.0:
-; CHECK-NOBMI-NEXT:    xorl %esi, %edi
-; CHECK-NOBMI-NEXT:    andl $252645135, %edi # imm = 0xF0F0F0F
-; CHECK-NOBMI-NEXT:    xorl %edx, %edi
 ; CHECK-NOBMI-NEXT:    movl %edi, %eax
+; CHECK-NOBMI-NEXT:    xorl %esi, %eax
+; CHECK-NOBMI-NEXT:    andl $252645135, %eax # imm = 0xF0F0F0F
+; CHECK-NOBMI-NEXT:    xorl %edx, %eax
 ; CHECK-NOBMI-NEXT:    retq
 ;
 ; CHECK-BMI-LABEL: n1_thirdvar_constmask:
 ; CHECK-BMI:       # %bb.0:
-; CHECK-BMI-NEXT:    xorl %esi, %edi
-; CHECK-BMI-NEXT:    andl $252645135, %edi # imm = 0xF0F0F0F
-; CHECK-BMI-NEXT:    xorl %edx, %edi
 ; CHECK-BMI-NEXT:    movl %edi, %eax
+; CHECK-BMI-NEXT:    xorl %esi, %eax
+; CHECK-BMI-NEXT:    andl $252645135, %eax # imm = 0xF0F0F0F
+; CHECK-BMI-NEXT:    xorl %edx, %eax
 ; CHECK-BMI-NEXT:    retq
   %n0 = xor i32 %x, %y
   %n1 = and i32 %n0, 252645135

Modified: llvm/trunk/test/CodeGen/X86/unfold-masked-merge-scalar-constmask-lowhigh.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/unfold-masked-merge-scalar-constmask-lowhigh.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/unfold-masked-merge-scalar-constmask-lowhigh.ll (original)
+++ llvm/trunk/test/CodeGen/X86/unfold-masked-merge-scalar-constmask-lowhigh.ll Wed Sep 19 11:59:08 2018
@@ -10,18 +10,20 @@
 define i8 @out8_constmask(i8 %x, i8 %y) {
 ; CHECK-NOBMI-LABEL: out8_constmask:
 ; CHECK-NOBMI:       # %bb.0:
-; CHECK-NOBMI-NEXT:    andb $15, %dil
-; CHECK-NOBMI-NEXT:    andb $-16, %sil
-; CHECK-NOBMI-NEXT:    orb %dil, %sil
 ; CHECK-NOBMI-NEXT:    movl %esi, %eax
+; CHECK-NOBMI-NEXT:    andb $15, %dil
+; CHECK-NOBMI-NEXT:    andb $-16, %al
+; CHECK-NOBMI-NEXT:    orb %dil, %al
+; CHECK-NOBMI-NEXT:    # kill: def $al killed $al killed $eax
 ; CHECK-NOBMI-NEXT:    retq
 ;
 ; CHECK-BMI-LABEL: out8_constmask:
 ; CHECK-BMI:       # %bb.0:
-; CHECK-BMI-NEXT:    andb $15, %dil
-; CHECK-BMI-NEXT:    andb $-16, %sil
-; CHECK-BMI-NEXT:    orb %dil, %sil
 ; CHECK-BMI-NEXT:    movl %esi, %eax
+; CHECK-BMI-NEXT:    andb $15, %dil
+; CHECK-BMI-NEXT:    andb $-16, %al
+; CHECK-BMI-NEXT:    orb %dil, %al
+; CHECK-BMI-NEXT:    # kill: def $al killed $al killed $eax
 ; CHECK-BMI-NEXT:    retq
   %mx = and i8 %x, 15
   %my = and i8 %y, -16
@@ -100,18 +102,20 @@ define i64 @out64_constmask(i64 %x, i64
 define i8 @in8_constmask(i8 %x, i8 %y) {
 ; CHECK-NOBMI-LABEL: in8_constmask:
 ; CHECK-NOBMI:       # %bb.0:
+; CHECK-NOBMI-NEXT:    movl %esi, %eax
 ; CHECK-NOBMI-NEXT:    xorl %esi, %edi
 ; CHECK-NOBMI-NEXT:    andb $15, %dil
-; CHECK-NOBMI-NEXT:    xorb %dil, %sil
-; CHECK-NOBMI-NEXT:    movl %esi, %eax
+; CHECK-NOBMI-NEXT:    xorb %dil, %al
+; CHECK-NOBMI-NEXT:    # kill: def $al killed $al killed $eax
 ; CHECK-NOBMI-NEXT:    retq
 ;
 ; CHECK-BMI-LABEL: in8_constmask:
 ; CHECK-BMI:       # %bb.0:
+; CHECK-BMI-NEXT:    movl %esi, %eax
 ; CHECK-BMI-NEXT:    xorl %esi, %edi
 ; CHECK-BMI-NEXT:    andb $15, %dil
-; CHECK-BMI-NEXT:    xorb %dil, %sil
-; CHECK-BMI-NEXT:    movl %esi, %eax
+; CHECK-BMI-NEXT:    xorb %dil, %al
+; CHECK-BMI-NEXT:    # kill: def $al killed $al killed $eax
 ; CHECK-BMI-NEXT:    retq
   %n0 = xor i8 %x, %y
   %n1 = and i8 %n0, 15

Modified: llvm/trunk/test/CodeGen/X86/unfold-masked-merge-scalar-variablemask.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/unfold-masked-merge-scalar-variablemask.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/unfold-masked-merge-scalar-variablemask.ll (original)
+++ llvm/trunk/test/CodeGen/X86/unfold-masked-merge-scalar-variablemask.ll Wed Sep 19 11:59:08 2018
@@ -6,20 +6,22 @@
 define i8 @out8(i8 %x, i8 %y, i8 %mask) {
 ; CHECK-NOBMI-LABEL: out8:
 ; CHECK-NOBMI:       # %bb.0:
-; CHECK-NOBMI-NEXT:    andl %edx, %edi
-; CHECK-NOBMI-NEXT:    notb %dl
-; CHECK-NOBMI-NEXT:    andb %sil, %dl
-; CHECK-NOBMI-NEXT:    orb %dil, %dl
 ; CHECK-NOBMI-NEXT:    movl %edx, %eax
+; CHECK-NOBMI-NEXT:    andl %edx, %edi
+; CHECK-NOBMI-NEXT:    notb %al
+; CHECK-NOBMI-NEXT:    andb %sil, %al
+; CHECK-NOBMI-NEXT:    orb %dil, %al
+; CHECK-NOBMI-NEXT:    # kill: def $al killed $al killed $eax
 ; CHECK-NOBMI-NEXT:    retq
 ;
 ; CHECK-BMI-LABEL: out8:
 ; CHECK-BMI:       # %bb.0:
-; CHECK-BMI-NEXT:    andl %edx, %edi
-; CHECK-BMI-NEXT:    notb %dl
-; CHECK-BMI-NEXT:    andb %sil, %dl
-; CHECK-BMI-NEXT:    orb %dil, %dl
 ; CHECK-BMI-NEXT:    movl %edx, %eax
+; CHECK-BMI-NEXT:    andl %edx, %edi
+; CHECK-BMI-NEXT:    notb %al
+; CHECK-BMI-NEXT:    andb %sil, %al
+; CHECK-BMI-NEXT:    orb %dil, %al
+; CHECK-BMI-NEXT:    # kill: def $al killed $al killed $eax
 ; CHECK-BMI-NEXT:    retq
   %mx = and i8 %x, %mask
   %notmask = xor i8 %mask, -1
@@ -31,11 +33,12 @@ define i8 @out8(i8 %x, i8 %y, i8 %mask)
 define i16 @out16(i16 %x, i16 %y, i16 %mask) {
 ; CHECK-NOBMI-LABEL: out16:
 ; CHECK-NOBMI:       # %bb.0:
-; CHECK-NOBMI-NEXT:    andl %edx, %edi
-; CHECK-NOBMI-NEXT:    notl %edx
-; CHECK-NOBMI-NEXT:    andl %esi, %edx
-; CHECK-NOBMI-NEXT:    orl %edi, %edx
 ; CHECK-NOBMI-NEXT:    movl %edx, %eax
+; CHECK-NOBMI-NEXT:    andl %edx, %edi
+; CHECK-NOBMI-NEXT:    notl %eax
+; CHECK-NOBMI-NEXT:    andl %esi, %eax
+; CHECK-NOBMI-NEXT:    orl %edi, %eax
+; CHECK-NOBMI-NEXT:    # kill: def $ax killed $ax killed $eax
 ; CHECK-NOBMI-NEXT:    retq
 ;
 ; CHECK-BMI-LABEL: out16:
@@ -55,11 +58,11 @@ define i16 @out16(i16 %x, i16 %y, i16 %m
 define i32 @out32(i32 %x, i32 %y, i32 %mask) {
 ; CHECK-NOBMI-LABEL: out32:
 ; CHECK-NOBMI:       # %bb.0:
-; CHECK-NOBMI-NEXT:    andl %edx, %edi
-; CHECK-NOBMI-NEXT:    notl %edx
-; CHECK-NOBMI-NEXT:    andl %esi, %edx
-; CHECK-NOBMI-NEXT:    orl %edi, %edx
 ; CHECK-NOBMI-NEXT:    movl %edx, %eax
+; CHECK-NOBMI-NEXT:    andl %edx, %edi
+; CHECK-NOBMI-NEXT:    notl %eax
+; CHECK-NOBMI-NEXT:    andl %esi, %eax
+; CHECK-NOBMI-NEXT:    orl %edi, %eax
 ; CHECK-NOBMI-NEXT:    retq
 ;
 ; CHECK-BMI-LABEL: out32:
@@ -78,11 +81,11 @@ define i32 @out32(i32 %x, i32 %y, i32 %m
 define i64 @out64(i64 %x, i64 %y, i64 %mask) {
 ; CHECK-NOBMI-LABEL: out64:
 ; CHECK-NOBMI:       # %bb.0:
-; CHECK-NOBMI-NEXT:    andq %rdx, %rdi
-; CHECK-NOBMI-NEXT:    notq %rdx
-; CHECK-NOBMI-NEXT:    andq %rsi, %rdx
-; CHECK-NOBMI-NEXT:    orq %rdi, %rdx
 ; CHECK-NOBMI-NEXT:    movq %rdx, %rax
+; CHECK-NOBMI-NEXT:    andq %rdx, %rdi
+; CHECK-NOBMI-NEXT:    notq %rax
+; CHECK-NOBMI-NEXT:    andq %rsi, %rax
+; CHECK-NOBMI-NEXT:    orq %rdi, %rax
 ; CHECK-NOBMI-NEXT:    retq
 ;
 ; CHECK-BMI-LABEL: out64:
@@ -104,10 +107,11 @@ define i64 @out64(i64 %x, i64 %y, i64 %m
 define i8 @in8(i8 %x, i8 %y, i8 %mask) {
 ; CHECK-NOBMI-LABEL: in8:
 ; CHECK-NOBMI:       # %bb.0:
-; CHECK-NOBMI-NEXT:    xorl %esi, %edi
-; CHECK-NOBMI-NEXT:    andl %edx, %edi
-; CHECK-NOBMI-NEXT:    xorl %esi, %edi
 ; CHECK-NOBMI-NEXT:    movl %edi, %eax
+; CHECK-NOBMI-NEXT:    xorl %esi, %eax
+; CHECK-NOBMI-NEXT:    andl %edx, %eax
+; CHECK-NOBMI-NEXT:    xorl %esi, %eax
+; CHECK-NOBMI-NEXT:    # kill: def $al killed $al killed $eax
 ; CHECK-NOBMI-NEXT:    retq
 ;
 ; CHECK-BMI-LABEL: in8:
@@ -126,10 +130,11 @@ define i8 @in8(i8 %x, i8 %y, i8 %mask) {
 define i16 @in16(i16 %x, i16 %y, i16 %mask) {
 ; CHECK-NOBMI-LABEL: in16:
 ; CHECK-NOBMI:       # %bb.0:
-; CHECK-NOBMI-NEXT:    xorl %esi, %edi
-; CHECK-NOBMI-NEXT:    andl %edx, %edi
-; CHECK-NOBMI-NEXT:    xorl %esi, %edi
 ; CHECK-NOBMI-NEXT:    movl %edi, %eax
+; CHECK-NOBMI-NEXT:    xorl %esi, %eax
+; CHECK-NOBMI-NEXT:    andl %edx, %eax
+; CHECK-NOBMI-NEXT:    xorl %esi, %eax
+; CHECK-NOBMI-NEXT:    # kill: def $ax killed $ax killed $eax
 ; CHECK-NOBMI-NEXT:    retq
 ;
 ; CHECK-BMI-LABEL: in16:
@@ -148,10 +153,10 @@ define i16 @in16(i16 %x, i16 %y, i16 %ma
 define i32 @in32(i32 %x, i32 %y, i32 %mask) {
 ; CHECK-NOBMI-LABEL: in32:
 ; CHECK-NOBMI:       # %bb.0:
-; CHECK-NOBMI-NEXT:    xorl %esi, %edi
-; CHECK-NOBMI-NEXT:    andl %edx, %edi
-; CHECK-NOBMI-NEXT:    xorl %esi, %edi
 ; CHECK-NOBMI-NEXT:    movl %edi, %eax
+; CHECK-NOBMI-NEXT:    xorl %esi, %eax
+; CHECK-NOBMI-NEXT:    andl %edx, %eax
+; CHECK-NOBMI-NEXT:    xorl %esi, %eax
 ; CHECK-NOBMI-NEXT:    retq
 ;
 ; CHECK-BMI-LABEL: in32:
@@ -169,10 +174,10 @@ define i32 @in32(i32 %x, i32 %y, i32 %ma
 define i64 @in64(i64 %x, i64 %y, i64 %mask) {
 ; CHECK-NOBMI-LABEL: in64:
 ; CHECK-NOBMI:       # %bb.0:
-; CHECK-NOBMI-NEXT:    xorq %rsi, %rdi
-; CHECK-NOBMI-NEXT:    andq %rdx, %rdi
-; CHECK-NOBMI-NEXT:    xorq %rsi, %rdi
 ; CHECK-NOBMI-NEXT:    movq %rdi, %rax
+; CHECK-NOBMI-NEXT:    xorq %rsi, %rax
+; CHECK-NOBMI-NEXT:    andq %rdx, %rax
+; CHECK-NOBMI-NEXT:    xorq %rsi, %rax
 ; CHECK-NOBMI-NEXT:    retq
 ;
 ; CHECK-BMI-LABEL: in64:
@@ -192,10 +197,10 @@ define i64 @in64(i64 %x, i64 %y, i64 %ma
 define i32 @in_commutativity_0_0_1(i32 %x, i32 %y, i32 %mask) {
 ; CHECK-NOBMI-LABEL: in_commutativity_0_0_1:
 ; CHECK-NOBMI:       # %bb.0:
-; CHECK-NOBMI-NEXT:    xorl %esi, %edi
-; CHECK-NOBMI-NEXT:    andl %edx, %edi
-; CHECK-NOBMI-NEXT:    xorl %esi, %edi
 ; CHECK-NOBMI-NEXT:    movl %edi, %eax
+; CHECK-NOBMI-NEXT:    xorl %esi, %eax
+; CHECK-NOBMI-NEXT:    andl %edx, %eax
+; CHECK-NOBMI-NEXT:    xorl %esi, %eax
 ; CHECK-NOBMI-NEXT:    retq
 ;
 ; CHECK-BMI-LABEL: in_commutativity_0_0_1:
@@ -212,10 +217,10 @@ define i32 @in_commutativity_0_0_1(i32 %
 define i32 @in_commutativity_0_1_0(i32 %x, i32 %y, i32 %mask) {
 ; CHECK-NOBMI-LABEL: in_commutativity_0_1_0:
 ; CHECK-NOBMI:       # %bb.0:
-; CHECK-NOBMI-NEXT:    xorl %esi, %edi
-; CHECK-NOBMI-NEXT:    andl %edx, %edi
-; CHECK-NOBMI-NEXT:    xorl %esi, %edi
 ; CHECK-NOBMI-NEXT:    movl %edi, %eax
+; CHECK-NOBMI-NEXT:    xorl %esi, %eax
+; CHECK-NOBMI-NEXT:    andl %edx, %eax
+; CHECK-NOBMI-NEXT:    xorl %esi, %eax
 ; CHECK-NOBMI-NEXT:    retq
 ;
 ; CHECK-BMI-LABEL: in_commutativity_0_1_0:
@@ -232,10 +237,10 @@ define i32 @in_commutativity_0_1_0(i32 %
 define i32 @in_commutativity_0_1_1(i32 %x, i32 %y, i32 %mask) {
 ; CHECK-NOBMI-LABEL: in_commutativity_0_1_1:
 ; CHECK-NOBMI:       # %bb.0:
-; CHECK-NOBMI-NEXT:    xorl %esi, %edi
-; CHECK-NOBMI-NEXT:    andl %edx, %edi
-; CHECK-NOBMI-NEXT:    xorl %esi, %edi
 ; CHECK-NOBMI-NEXT:    movl %edi, %eax
+; CHECK-NOBMI-NEXT:    xorl %esi, %eax
+; CHECK-NOBMI-NEXT:    andl %edx, %eax
+; CHECK-NOBMI-NEXT:    xorl %esi, %eax
 ; CHECK-NOBMI-NEXT:    retq
 ;
 ; CHECK-BMI-LABEL: in_commutativity_0_1_1:
@@ -252,10 +257,10 @@ define i32 @in_commutativity_0_1_1(i32 %
 define i32 @in_commutativity_1_0_0(i32 %x, i32 %y, i32 %mask) {
 ; CHECK-NOBMI-LABEL: in_commutativity_1_0_0:
 ; CHECK-NOBMI:       # %bb.0:
-; CHECK-NOBMI-NEXT:    xorl %edi, %esi
-; CHECK-NOBMI-NEXT:    andl %edx, %esi
-; CHECK-NOBMI-NEXT:    xorl %edi, %esi
 ; CHECK-NOBMI-NEXT:    movl %esi, %eax
+; CHECK-NOBMI-NEXT:    xorl %edi, %eax
+; CHECK-NOBMI-NEXT:    andl %edx, %eax
+; CHECK-NOBMI-NEXT:    xorl %edi, %eax
 ; CHECK-NOBMI-NEXT:    retq
 ;
 ; CHECK-BMI-LABEL: in_commutativity_1_0_0:
@@ -272,10 +277,10 @@ define i32 @in_commutativity_1_0_0(i32 %
 define i32 @in_commutativity_1_0_1(i32 %x, i32 %y, i32 %mask) {
 ; CHECK-NOBMI-LABEL: in_commutativity_1_0_1:
 ; CHECK-NOBMI:       # %bb.0:
-; CHECK-NOBMI-NEXT:    xorl %edi, %esi
-; CHECK-NOBMI-NEXT:    andl %edx, %esi
-; CHECK-NOBMI-NEXT:    xorl %edi, %esi
 ; CHECK-NOBMI-NEXT:    movl %esi, %eax
+; CHECK-NOBMI-NEXT:    xorl %edi, %eax
+; CHECK-NOBMI-NEXT:    andl %edx, %eax
+; CHECK-NOBMI-NEXT:    xorl %edi, %eax
 ; CHECK-NOBMI-NEXT:    retq
 ;
 ; CHECK-BMI-LABEL: in_commutativity_1_0_1:
@@ -292,10 +297,10 @@ define i32 @in_commutativity_1_0_1(i32 %
 define i32 @in_commutativity_1_1_0(i32 %x, i32 %y, i32 %mask) {
 ; CHECK-NOBMI-LABEL: in_commutativity_1_1_0:
 ; CHECK-NOBMI:       # %bb.0:
-; CHECK-NOBMI-NEXT:    xorl %edi, %esi
-; CHECK-NOBMI-NEXT:    andl %edx, %esi
-; CHECK-NOBMI-NEXT:    xorl %edi, %esi
 ; CHECK-NOBMI-NEXT:    movl %esi, %eax
+; CHECK-NOBMI-NEXT:    xorl %edi, %eax
+; CHECK-NOBMI-NEXT:    andl %edx, %eax
+; CHECK-NOBMI-NEXT:    xorl %edi, %eax
 ; CHECK-NOBMI-NEXT:    retq
 ;
 ; CHECK-BMI-LABEL: in_commutativity_1_1_0:
@@ -312,10 +317,10 @@ define i32 @in_commutativity_1_1_0(i32 %
 define i32 @in_commutativity_1_1_1(i32 %x, i32 %y, i32 %mask) {
 ; CHECK-NOBMI-LABEL: in_commutativity_1_1_1:
 ; CHECK-NOBMI:       # %bb.0:
-; CHECK-NOBMI-NEXT:    xorl %edi, %esi
-; CHECK-NOBMI-NEXT:    andl %edx, %esi
-; CHECK-NOBMI-NEXT:    xorl %edi, %esi
 ; CHECK-NOBMI-NEXT:    movl %esi, %eax
+; CHECK-NOBMI-NEXT:    xorl %edi, %eax
+; CHECK-NOBMI-NEXT:    andl %edx, %eax
+; CHECK-NOBMI-NEXT:    xorl %edi, %eax
 ; CHECK-NOBMI-NEXT:    retq
 ;
 ; CHECK-BMI-LABEL: in_commutativity_1_1_1:
@@ -335,11 +340,11 @@ define i32 @in_commutativity_1_1_1(i32 %
 define i32 @in_complex_y0(i32 %x, i32 %y_hi, i32 %y_low, i32 %mask) {
 ; CHECK-NOBMI-LABEL: in_complex_y0:
 ; CHECK-NOBMI:       # %bb.0:
-; CHECK-NOBMI-NEXT:    andl %edx, %esi
-; CHECK-NOBMI-NEXT:    xorl %esi, %edi
-; CHECK-NOBMI-NEXT:    andl %ecx, %edi
-; CHECK-NOBMI-NEXT:    xorl %esi, %edi
 ; CHECK-NOBMI-NEXT:    movl %edi, %eax
+; CHECK-NOBMI-NEXT:    andl %edx, %esi
+; CHECK-NOBMI-NEXT:    xorl %esi, %eax
+; CHECK-NOBMI-NEXT:    andl %ecx, %eax
+; CHECK-NOBMI-NEXT:    xorl %esi, %eax
 ; CHECK-NOBMI-NEXT:    retq
 ;
 ; CHECK-BMI-LABEL: in_complex_y0:
@@ -358,11 +363,11 @@ define i32 @in_complex_y0(i32 %x, i32 %y
 define i32 @in_complex_y1(i32 %x, i32 %y_hi, i32 %y_low, i32 %mask) {
 ; CHECK-NOBMI-LABEL: in_complex_y1:
 ; CHECK-NOBMI:       # %bb.0:
-; CHECK-NOBMI-NEXT:    andl %edx, %esi
-; CHECK-NOBMI-NEXT:    xorl %esi, %edi
-; CHECK-NOBMI-NEXT:    andl %ecx, %edi
-; CHECK-NOBMI-NEXT:    xorl %esi, %edi
 ; CHECK-NOBMI-NEXT:    movl %edi, %eax
+; CHECK-NOBMI-NEXT:    andl %edx, %esi
+; CHECK-NOBMI-NEXT:    xorl %esi, %eax
+; CHECK-NOBMI-NEXT:    andl %ecx, %eax
+; CHECK-NOBMI-NEXT:    xorl %esi, %eax
 ; CHECK-NOBMI-NEXT:    retq
 ;
 ; CHECK-BMI-LABEL: in_complex_y1:
@@ -384,11 +389,11 @@ define i32 @in_complex_y1(i32 %x, i32 %y
 define i32 @in_complex_m0(i32 %x, i32 %y, i32 %m_a, i32 %m_b) {
 ; CHECK-NOBMI-LABEL: in_complex_m0:
 ; CHECK-NOBMI:       # %bb.0:
-; CHECK-NOBMI-NEXT:    xorl %ecx, %edx
-; CHECK-NOBMI-NEXT:    xorl %esi, %edi
-; CHECK-NOBMI-NEXT:    andl %edx, %edi
-; CHECK-NOBMI-NEXT:    xorl %esi, %edi
 ; CHECK-NOBMI-NEXT:    movl %edi, %eax
+; CHECK-NOBMI-NEXT:    xorl %ecx, %edx
+; CHECK-NOBMI-NEXT:    xorl %esi, %eax
+; CHECK-NOBMI-NEXT:    andl %edx, %eax
+; CHECK-NOBMI-NEXT:    xorl %esi, %eax
 ; CHECK-NOBMI-NEXT:    retq
 ;
 ; CHECK-BMI-LABEL: in_complex_m0:
@@ -407,11 +412,11 @@ define i32 @in_complex_m0(i32 %x, i32 %y
 define i32 @in_complex_m1(i32 %x, i32 %y, i32 %m_a, i32 %m_b) {
 ; CHECK-NOBMI-LABEL: in_complex_m1:
 ; CHECK-NOBMI:       # %bb.0:
-; CHECK-NOBMI-NEXT:    xorl %ecx, %edx
-; CHECK-NOBMI-NEXT:    xorl %esi, %edi
-; CHECK-NOBMI-NEXT:    andl %edx, %edi
-; CHECK-NOBMI-NEXT:    xorl %esi, %edi
 ; CHECK-NOBMI-NEXT:    movl %edi, %eax
+; CHECK-NOBMI-NEXT:    xorl %ecx, %edx
+; CHECK-NOBMI-NEXT:    xorl %esi, %eax
+; CHECK-NOBMI-NEXT:    andl %edx, %eax
+; CHECK-NOBMI-NEXT:    xorl %esi, %eax
 ; CHECK-NOBMI-NEXT:    retq
 ;
 ; CHECK-BMI-LABEL: in_complex_m1:
@@ -433,12 +438,12 @@ define i32 @in_complex_m1(i32 %x, i32 %y
 define i32 @in_complex_y0_m0(i32 %x, i32 %y_hi, i32 %y_low, i32 %m_a, i32 %m_b) {
 ; CHECK-NOBMI-LABEL: in_complex_y0_m0:
 ; CHECK-NOBMI:       # %bb.0:
+; CHECK-NOBMI-NEXT:    movl %edi, %eax
 ; CHECK-NOBMI-NEXT:    andl %edx, %esi
 ; CHECK-NOBMI-NEXT:    xorl %r8d, %ecx
-; CHECK-NOBMI-NEXT:    xorl %esi, %edi
-; CHECK-NOBMI-NEXT:    andl %ecx, %edi
-; CHECK-NOBMI-NEXT:    xorl %esi, %edi
-; CHECK-NOBMI-NEXT:    movl %edi, %eax
+; CHECK-NOBMI-NEXT:    xorl %esi, %eax
+; CHECK-NOBMI-NEXT:    andl %ecx, %eax
+; CHECK-NOBMI-NEXT:    xorl %esi, %eax
 ; CHECK-NOBMI-NEXT:    retq
 ;
 ; CHECK-BMI-LABEL: in_complex_y0_m0:
@@ -459,12 +464,12 @@ define i32 @in_complex_y0_m0(i32 %x, i32
 define i32 @in_complex_y1_m0(i32 %x, i32 %y_hi, i32 %y_low, i32 %m_a, i32 %m_b) {
 ; CHECK-NOBMI-LABEL: in_complex_y1_m0:
 ; CHECK-NOBMI:       # %bb.0:
+; CHECK-NOBMI-NEXT:    movl %edi, %eax
 ; CHECK-NOBMI-NEXT:    andl %edx, %esi
 ; CHECK-NOBMI-NEXT:    xorl %r8d, %ecx
-; CHECK-NOBMI-NEXT:    xorl %esi, %edi
-; CHECK-NOBMI-NEXT:    andl %ecx, %edi
-; CHECK-NOBMI-NEXT:    xorl %esi, %edi
-; CHECK-NOBMI-NEXT:    movl %edi, %eax
+; CHECK-NOBMI-NEXT:    xorl %esi, %eax
+; CHECK-NOBMI-NEXT:    andl %ecx, %eax
+; CHECK-NOBMI-NEXT:    xorl %esi, %eax
 ; CHECK-NOBMI-NEXT:    retq
 ;
 ; CHECK-BMI-LABEL: in_complex_y1_m0:
@@ -485,12 +490,12 @@ define i32 @in_complex_y1_m0(i32 %x, i32
 define i32 @in_complex_y0_m1(i32 %x, i32 %y_hi, i32 %y_low, i32 %m_a, i32 %m_b) {
 ; CHECK-NOBMI-LABEL: in_complex_y0_m1:
 ; CHECK-NOBMI:       # %bb.0:
+; CHECK-NOBMI-NEXT:    movl %edi, %eax
 ; CHECK-NOBMI-NEXT:    andl %edx, %esi
 ; CHECK-NOBMI-NEXT:    xorl %r8d, %ecx
-; CHECK-NOBMI-NEXT:    xorl %esi, %edi
-; CHECK-NOBMI-NEXT:    andl %ecx, %edi
-; CHECK-NOBMI-NEXT:    xorl %esi, %edi
-; CHECK-NOBMI-NEXT:    movl %edi, %eax
+; CHECK-NOBMI-NEXT:    xorl %esi, %eax
+; CHECK-NOBMI-NEXT:    andl %ecx, %eax
+; CHECK-NOBMI-NEXT:    xorl %esi, %eax
 ; CHECK-NOBMI-NEXT:    retq
 ;
 ; CHECK-BMI-LABEL: in_complex_y0_m1:
@@ -511,12 +516,12 @@ define i32 @in_complex_y0_m1(i32 %x, i32
 define i32 @in_complex_y1_m1(i32 %x, i32 %y_hi, i32 %y_low, i32 %m_a, i32 %m_b) {
 ; CHECK-NOBMI-LABEL: in_complex_y1_m1:
 ; CHECK-NOBMI:       # %bb.0:
+; CHECK-NOBMI-NEXT:    movl %edi, %eax
 ; CHECK-NOBMI-NEXT:    andl %edx, %esi
 ; CHECK-NOBMI-NEXT:    xorl %r8d, %ecx
-; CHECK-NOBMI-NEXT:    xorl %esi, %edi
-; CHECK-NOBMI-NEXT:    andl %ecx, %edi
-; CHECK-NOBMI-NEXT:    xorl %esi, %edi
-; CHECK-NOBMI-NEXT:    movl %edi, %eax
+; CHECK-NOBMI-NEXT:    xorl %esi, %eax
+; CHECK-NOBMI-NEXT:    andl %ecx, %eax
+; CHECK-NOBMI-NEXT:    xorl %esi, %eax
 ; CHECK-NOBMI-NEXT:    retq
 ;
 ; CHECK-BMI-LABEL: in_complex_y1_m1:
@@ -540,18 +545,18 @@ define i32 @in_complex_y1_m1(i32 %x, i32
 define i32 @out_constant_varx_mone(i32 %x, i32 %y, i32 %mask) {
 ; CHECK-NOBMI-LABEL: out_constant_varx_mone:
 ; CHECK-NOBMI:       # %bb.0:
-; CHECK-NOBMI-NEXT:    andl %edx, %edi
-; CHECK-NOBMI-NEXT:    notl %edx
-; CHECK-NOBMI-NEXT:    orl %edx, %edi
 ; CHECK-NOBMI-NEXT:    movl %edi, %eax
+; CHECK-NOBMI-NEXT:    andl %edx, %eax
+; CHECK-NOBMI-NEXT:    notl %edx
+; CHECK-NOBMI-NEXT:    orl %edx, %eax
 ; CHECK-NOBMI-NEXT:    retq
 ;
 ; CHECK-BMI-LABEL: out_constant_varx_mone:
 ; CHECK-BMI:       # %bb.0:
-; CHECK-BMI-NEXT:    andl %edx, %edi
-; CHECK-BMI-NEXT:    notl %edx
-; CHECK-BMI-NEXT:    orl %edx, %edi
 ; CHECK-BMI-NEXT:    movl %edi, %eax
+; CHECK-BMI-NEXT:    andl %edx, %eax
+; CHECK-BMI-NEXT:    notl %edx
+; CHECK-BMI-NEXT:    orl %edx, %eax
 ; CHECK-BMI-NEXT:    retq
   %notmask = xor i32 %mask, -1
   %mx = and i32 %mask, %x
@@ -562,10 +567,10 @@ define i32 @out_constant_varx_mone(i32 %
 define i32 @in_constant_varx_mone(i32 %x, i32 %y, i32 %mask) {
 ; CHECK-NOBMI-LABEL: in_constant_varx_mone:
 ; CHECK-NOBMI:       # %bb.0:
-; CHECK-NOBMI-NEXT:    notl %edi
-; CHECK-NOBMI-NEXT:    andl %edx, %edi
-; CHECK-NOBMI-NEXT:    notl %edi
 ; CHECK-NOBMI-NEXT:    movl %edi, %eax
+; CHECK-NOBMI-NEXT:    notl %eax
+; CHECK-NOBMI-NEXT:    andl %edx, %eax
+; CHECK-NOBMI-NEXT:    notl %eax
 ; CHECK-NOBMI-NEXT:    retq
 ;
 ; CHECK-BMI-LABEL: in_constant_varx_mone:
@@ -603,11 +608,11 @@ define i32 @out_constant_varx_mone_invma
 define i32 @in_constant_varx_mone_invmask(i32 %x, i32 %y, i32 %mask) {
 ; CHECK-NOBMI-LABEL: in_constant_varx_mone_invmask:
 ; CHECK-NOBMI:       # %bb.0:
-; CHECK-NOBMI-NEXT:    notl %edx
-; CHECK-NOBMI-NEXT:    notl %edi
-; CHECK-NOBMI-NEXT:    andl %edx, %edi
-; CHECK-NOBMI-NEXT:    notl %edi
 ; CHECK-NOBMI-NEXT:    movl %edi, %eax
+; CHECK-NOBMI-NEXT:    notl %edx
+; CHECK-NOBMI-NEXT:    notl %eax
+; CHECK-NOBMI-NEXT:    andl %edx, %eax
+; CHECK-NOBMI-NEXT:    notl %eax
 ; CHECK-NOBMI-NEXT:    retq
 ;
 ; CHECK-BMI-LABEL: in_constant_varx_mone_invmask:
@@ -649,10 +654,10 @@ define i32 @out_constant_varx_42(i32 %x,
 define i32 @in_constant_varx_42(i32 %x, i32 %y, i32 %mask) {
 ; CHECK-NOBMI-LABEL: in_constant_varx_42:
 ; CHECK-NOBMI:       # %bb.0:
-; CHECK-NOBMI-NEXT:    xorl $42, %edi
-; CHECK-NOBMI-NEXT:    andl %edx, %edi
-; CHECK-NOBMI-NEXT:    xorl $42, %edi
 ; CHECK-NOBMI-NEXT:    movl %edi, %eax
+; CHECK-NOBMI-NEXT:    xorl $42, %eax
+; CHECK-NOBMI-NEXT:    andl %edx, %eax
+; CHECK-NOBMI-NEXT:    xorl $42, %eax
 ; CHECK-NOBMI-NEXT:    retq
 ;
 ; CHECK-BMI-LABEL: in_constant_varx_42:
@@ -671,11 +676,11 @@ define i32 @out_constant_varx_42_invmask
 ; CHECK-NOBMI-LABEL: out_constant_varx_42_invmask:
 ; CHECK-NOBMI:       # %bb.0:
 ; CHECK-NOBMI-NEXT:    movl %edx, %eax
-; CHECK-NOBMI-NEXT:    notl %eax
-; CHECK-NOBMI-NEXT:    andl %edi, %eax
-; CHECK-NOBMI-NEXT:    andl $42, %edx
-; CHECK-NOBMI-NEXT:    orl %eax, %edx
-; CHECK-NOBMI-NEXT:    movl %edx, %eax
+; CHECK-NOBMI-NEXT:    movl %edx, %ecx
+; CHECK-NOBMI-NEXT:    notl %ecx
+; CHECK-NOBMI-NEXT:    andl %edi, %ecx
+; CHECK-NOBMI-NEXT:    andl $42, %eax
+; CHECK-NOBMI-NEXT:    orl %ecx, %eax
 ; CHECK-NOBMI-NEXT:    retq
 ;
 ; CHECK-BMI-LABEL: out_constant_varx_42_invmask:
@@ -694,11 +699,11 @@ define i32 @out_constant_varx_42_invmask
 define i32 @in_constant_varx_42_invmask(i32 %x, i32 %y, i32 %mask) {
 ; CHECK-NOBMI-LABEL: in_constant_varx_42_invmask:
 ; CHECK-NOBMI:       # %bb.0:
-; CHECK-NOBMI-NEXT:    notl %edx
-; CHECK-NOBMI-NEXT:    xorl $42, %edi
-; CHECK-NOBMI-NEXT:    andl %edx, %edi
-; CHECK-NOBMI-NEXT:    xorl $42, %edi
 ; CHECK-NOBMI-NEXT:    movl %edi, %eax
+; CHECK-NOBMI-NEXT:    notl %edx
+; CHECK-NOBMI-NEXT:    xorl $42, %eax
+; CHECK-NOBMI-NEXT:    andl %edx, %eax
+; CHECK-NOBMI-NEXT:    xorl $42, %eax
 ; CHECK-NOBMI-NEXT:    retq
 ;
 ; CHECK-BMI-LABEL: in_constant_varx_42_invmask:
@@ -757,18 +762,18 @@ define i32 @in_constant_mone_vary(i32 %x
 define i32 @out_constant_mone_vary_invmask(i32 %x, i32 %y, i32 %mask) {
 ; CHECK-NOBMI-LABEL: out_constant_mone_vary_invmask:
 ; CHECK-NOBMI:       # %bb.0:
-; CHECK-NOBMI-NEXT:    andl %edx, %esi
-; CHECK-NOBMI-NEXT:    notl %edx
-; CHECK-NOBMI-NEXT:    orl %edx, %esi
 ; CHECK-NOBMI-NEXT:    movl %esi, %eax
+; CHECK-NOBMI-NEXT:    andl %edx, %eax
+; CHECK-NOBMI-NEXT:    notl %edx
+; CHECK-NOBMI-NEXT:    orl %edx, %eax
 ; CHECK-NOBMI-NEXT:    retq
 ;
 ; CHECK-BMI-LABEL: out_constant_mone_vary_invmask:
 ; CHECK-BMI:       # %bb.0:
-; CHECK-BMI-NEXT:    andl %edx, %esi
-; CHECK-BMI-NEXT:    notl %edx
-; CHECK-BMI-NEXT:    orl %edx, %esi
 ; CHECK-BMI-NEXT:    movl %esi, %eax
+; CHECK-BMI-NEXT:    andl %edx, %eax
+; CHECK-BMI-NEXT:    notl %edx
+; CHECK-BMI-NEXT:    orl %edx, %eax
 ; CHECK-BMI-NEXT:    retq
   %notmask = xor i32 %mask, -1
   %mx = and i32 %notmask, -1
@@ -845,20 +850,20 @@ define i32 @in_constant_42_vary(i32 %x,
 define i32 @out_constant_42_vary_invmask(i32 %x, i32 %y, i32 %mask) {
 ; CHECK-NOBMI-LABEL: out_constant_42_vary_invmask:
 ; CHECK-NOBMI:       # %bb.0:
-; CHECK-NOBMI-NEXT:    andl %edx, %esi
+; CHECK-NOBMI-NEXT:    movl %esi, %eax
+; CHECK-NOBMI-NEXT:    andl %edx, %eax
 ; CHECK-NOBMI-NEXT:    notl %edx
 ; CHECK-NOBMI-NEXT:    andl $42, %edx
-; CHECK-NOBMI-NEXT:    orl %edx, %esi
-; CHECK-NOBMI-NEXT:    movl %esi, %eax
+; CHECK-NOBMI-NEXT:    orl %edx, %eax
 ; CHECK-NOBMI-NEXT:    retq
 ;
 ; CHECK-BMI-LABEL: out_constant_42_vary_invmask:
 ; CHECK-BMI:       # %bb.0:
-; CHECK-BMI-NEXT:    andl %edx, %esi
+; CHECK-BMI-NEXT:    movl %esi, %eax
+; CHECK-BMI-NEXT:    andl %edx, %eax
 ; CHECK-BMI-NEXT:    notl %edx
 ; CHECK-BMI-NEXT:    andl $42, %edx
-; CHECK-BMI-NEXT:    orl %edx, %esi
-; CHECK-BMI-NEXT:    movl %esi, %eax
+; CHECK-BMI-NEXT:    orl %edx, %eax
 ; CHECK-BMI-NEXT:    retq
   %notmask = xor i32 %mask, -1
   %mx = and i32 %notmask, 42
@@ -879,11 +884,11 @@ define i32 @in_constant_42_vary_invmask(
 ;
 ; CHECK-BMI-LABEL: in_constant_42_vary_invmask:
 ; CHECK-BMI:       # %bb.0:
-; CHECK-BMI-NEXT:    andl %edx, %esi
-; CHECK-BMI-NEXT:    notl %edx
-; CHECK-BMI-NEXT:    andl $42, %edx
-; CHECK-BMI-NEXT:    orl %esi, %edx
 ; CHECK-BMI-NEXT:    movl %edx, %eax
+; CHECK-BMI-NEXT:    andl %edx, %esi
+; CHECK-BMI-NEXT:    notl %eax
+; CHECK-BMI-NEXT:    andl $42, %eax
+; CHECK-BMI-NEXT:    orl %esi, %eax
 ; CHECK-BMI-NEXT:    retq
   %notmask = xor i32 %mask, -1
   %n0 = xor i32 42, %y ; %x
@@ -982,11 +987,11 @@ define i32 @in_multiuse_B(i32 %x, i32 %y
 define i32 @n0_badmask(i32 %x, i32 %y, i32 %mask, i32 %mask2) {
 ; CHECK-NOBMI-LABEL: n0_badmask:
 ; CHECK-NOBMI:       # %bb.0:
-; CHECK-NOBMI-NEXT:    andl %edx, %edi
-; CHECK-NOBMI-NEXT:    notl %ecx
-; CHECK-NOBMI-NEXT:    andl %esi, %ecx
-; CHECK-NOBMI-NEXT:    orl %edi, %ecx
 ; CHECK-NOBMI-NEXT:    movl %ecx, %eax
+; CHECK-NOBMI-NEXT:    andl %edx, %edi
+; CHECK-NOBMI-NEXT:    notl %eax
+; CHECK-NOBMI-NEXT:    andl %esi, %eax
+; CHECK-NOBMI-NEXT:    orl %edi, %eax
 ; CHECK-NOBMI-NEXT:    retq
 ;
 ; CHECK-BMI-LABEL: n0_badmask:
@@ -1004,20 +1009,20 @@ define i32 @n0_badmask(i32 %x, i32 %y, i
 define i32 @n0_badxor(i32 %x, i32 %y, i32 %mask) {
 ; CHECK-NOBMI-LABEL: n0_badxor:
 ; CHECK-NOBMI:       # %bb.0:
-; CHECK-NOBMI-NEXT:    andl %edx, %edi
-; CHECK-NOBMI-NEXT:    xorl $1, %edx
-; CHECK-NOBMI-NEXT:    andl %esi, %edx
-; CHECK-NOBMI-NEXT:    orl %edi, %edx
 ; CHECK-NOBMI-NEXT:    movl %edx, %eax
+; CHECK-NOBMI-NEXT:    andl %edx, %edi
+; CHECK-NOBMI-NEXT:    xorl $1, %eax
+; CHECK-NOBMI-NEXT:    andl %esi, %eax
+; CHECK-NOBMI-NEXT:    orl %edi, %eax
 ; CHECK-NOBMI-NEXT:    retq
 ;
 ; CHECK-BMI-LABEL: n0_badxor:
 ; CHECK-BMI:       # %bb.0:
-; CHECK-BMI-NEXT:    andl %edx, %edi
-; CHECK-BMI-NEXT:    xorl $1, %edx
-; CHECK-BMI-NEXT:    andl %esi, %edx
-; CHECK-BMI-NEXT:    orl %edi, %edx
 ; CHECK-BMI-NEXT:    movl %edx, %eax
+; CHECK-BMI-NEXT:    andl %edx, %edi
+; CHECK-BMI-NEXT:    xorl $1, %eax
+; CHECK-BMI-NEXT:    andl %esi, %eax
+; CHECK-BMI-NEXT:    orl %edi, %eax
 ; CHECK-BMI-NEXT:    retq
   %mx = and i32 %x, %mask
   %notmask = xor i32 %mask, 1 ; instead of -1
@@ -1028,18 +1033,18 @@ define i32 @n0_badxor(i32 %x, i32 %y, i3
 define i32 @n1_thirdvar(i32 %x, i32 %y, i32 %z, i32 %mask) {
 ; CHECK-NOBMI-LABEL: n1_thirdvar:
 ; CHECK-NOBMI:       # %bb.0:
-; CHECK-NOBMI-NEXT:    xorl %esi, %edi
-; CHECK-NOBMI-NEXT:    andl %ecx, %edi
-; CHECK-NOBMI-NEXT:    xorl %edx, %edi
 ; CHECK-NOBMI-NEXT:    movl %edi, %eax
+; CHECK-NOBMI-NEXT:    xorl %esi, %eax
+; CHECK-NOBMI-NEXT:    andl %ecx, %eax
+; CHECK-NOBMI-NEXT:    xorl %edx, %eax
 ; CHECK-NOBMI-NEXT:    retq
 ;
 ; CHECK-BMI-LABEL: n1_thirdvar:
 ; CHECK-BMI:       # %bb.0:
-; CHECK-BMI-NEXT:    xorl %esi, %edi
-; CHECK-BMI-NEXT:    andl %ecx, %edi
-; CHECK-BMI-NEXT:    xorl %edx, %edi
 ; CHECK-BMI-NEXT:    movl %edi, %eax
+; CHECK-BMI-NEXT:    xorl %esi, %eax
+; CHECK-BMI-NEXT:    andl %ecx, %eax
+; CHECK-BMI-NEXT:    xorl %edx, %eax
 ; CHECK-BMI-NEXT:    retq
   %n0 = xor i32 %x, %y
   %n1 = and i32 %n0, %mask

Modified: llvm/trunk/test/CodeGen/X86/unfold-masked-merge-vector-variablemask-const.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/unfold-masked-merge-vector-variablemask-const.ll?rev=342578&r1=342577&r2=342578&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/unfold-masked-merge-vector-variablemask-const.ll (original)
+++ llvm/trunk/test/CodeGen/X86/unfold-masked-merge-vector-variablemask-const.ll Wed Sep 19 11:59:08 2018
@@ -10,13 +10,13 @@
 define <4 x i32> @out_constant_varx_mone(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
 ; CHECK-SSE1-LABEL: out_constant_varx_mone:
 ; CHECK-SSE1:       # %bb.0:
+; CHECK-SSE1-NEXT:    movq %rdi, %rax
 ; CHECK-SSE1-NEXT:    movaps (%rcx), %xmm0
 ; CHECK-SSE1-NEXT:    movaps {{.*#+}} xmm1 = [nan,nan,nan,nan]
 ; CHECK-SSE1-NEXT:    xorps %xmm0, %xmm1
 ; CHECK-SSE1-NEXT:    andps (%rsi), %xmm0
 ; CHECK-SSE1-NEXT:    orps %xmm1, %xmm0
 ; CHECK-SSE1-NEXT:    movaps %xmm0, (%rdi)
-; CHECK-SSE1-NEXT:    movq %rdi, %rax
 ; CHECK-SSE1-NEXT:    retq
 ;
 ; CHECK-SSE2-LABEL: out_constant_varx_mone:
@@ -49,11 +49,11 @@ define <4 x i32> @out_constant_varx_mone
 define <4 x i32> @in_constant_varx_mone(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
 ; CHECK-SSE1-LABEL: in_constant_varx_mone:
 ; CHECK-SSE1:       # %bb.0:
+; CHECK-SSE1-NEXT:    movq %rdi, %rax
 ; CHECK-SSE1-NEXT:    movaps (%rsi), %xmm0
 ; CHECK-SSE1-NEXT:    andnps (%rcx), %xmm0
 ; CHECK-SSE1-NEXT:    xorps {{.*}}(%rip), %xmm0
 ; CHECK-SSE1-NEXT:    movaps %xmm0, (%rdi)
-; CHECK-SSE1-NEXT:    movq %rdi, %rax
 ; CHECK-SSE1-NEXT:    retq
 ;
 ; CHECK-SSE2-LABEL: in_constant_varx_mone:
@@ -84,12 +84,12 @@ define <4 x i32> @in_constant_varx_mone(
 define <4 x i32> @out_constant_varx_mone_invmask(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
 ; CHECK-SSE1-LABEL: out_constant_varx_mone_invmask:
 ; CHECK-SSE1:       # %bb.0:
+; CHECK-SSE1-NEXT:    movq %rdi, %rax
 ; CHECK-SSE1-NEXT:    movaps (%rcx), %xmm0
 ; CHECK-SSE1-NEXT:    movaps %xmm0, %xmm1
 ; CHECK-SSE1-NEXT:    andnps (%rsi), %xmm1
 ; CHECK-SSE1-NEXT:    orps %xmm0, %xmm1
 ; CHECK-SSE1-NEXT:    movaps %xmm1, (%rdi)
-; CHECK-SSE1-NEXT:    movq %rdi, %rax
 ; CHECK-SSE1-NEXT:    retq
 ;
 ; CHECK-SSE2-LABEL: out_constant_varx_mone_invmask:
@@ -120,6 +120,7 @@ define <4 x i32> @out_constant_varx_mone
 define <4 x i32> @in_constant_varx_mone_invmask(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
 ; CHECK-SSE1-LABEL: in_constant_varx_mone_invmask:
 ; CHECK-SSE1:       # %bb.0:
+; CHECK-SSE1-NEXT:    movq %rdi, %rax
 ; CHECK-SSE1-NEXT:    movaps (%rsi), %xmm0
 ; CHECK-SSE1-NEXT:    movaps {{.*#+}} xmm1 = [nan,nan,nan,nan]
 ; CHECK-SSE1-NEXT:    movaps (%rcx), %xmm2
@@ -127,7 +128,6 @@ define <4 x i32> @in_constant_varx_mone_
 ; CHECK-SSE1-NEXT:    andnps %xmm2, %xmm0
 ; CHECK-SSE1-NEXT:    xorps %xmm1, %xmm0
 ; CHECK-SSE1-NEXT:    movaps %xmm0, (%rdi)
-; CHECK-SSE1-NEXT:    movq %rdi, %rax
 ; CHECK-SSE1-NEXT:    retq
 ;
 ; CHECK-SSE2-LABEL: in_constant_varx_mone_invmask:
@@ -161,13 +161,13 @@ define <4 x i32> @in_constant_varx_mone_
 define <4 x i32> @out_constant_varx_42(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
 ; CHECK-SSE1-LABEL: out_constant_varx_42:
 ; CHECK-SSE1:       # %bb.0:
+; CHECK-SSE1-NEXT:    movq %rdi, %rax
 ; CHECK-SSE1-NEXT:    movaps (%rcx), %xmm0
 ; CHECK-SSE1-NEXT:    movaps (%rsi), %xmm1
 ; CHECK-SSE1-NEXT:    andps %xmm0, %xmm1
 ; CHECK-SSE1-NEXT:    andnps {{.*}}(%rip), %xmm0
 ; CHECK-SSE1-NEXT:    orps %xmm1, %xmm0
 ; CHECK-SSE1-NEXT:    movaps %xmm0, (%rdi)
-; CHECK-SSE1-NEXT:    movq %rdi, %rax
 ; CHECK-SSE1-NEXT:    retq
 ;
 ; CHECK-SSE2-LABEL: out_constant_varx_42:
@@ -198,13 +198,13 @@ define <4 x i32> @out_constant_varx_42(<
 define <4 x i32> @in_constant_varx_42(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
 ; CHECK-SSE1-LABEL: in_constant_varx_42:
 ; CHECK-SSE1:       # %bb.0:
+; CHECK-SSE1-NEXT:    movq %rdi, %rax
 ; CHECK-SSE1-NEXT:    movaps (%rcx), %xmm0
 ; CHECK-SSE1-NEXT:    movaps (%rsi), %xmm1
 ; CHECK-SSE1-NEXT:    andps %xmm0, %xmm1
 ; CHECK-SSE1-NEXT:    andnps {{.*}}(%rip), %xmm0
 ; CHECK-SSE1-NEXT:    orps %xmm1, %xmm0
 ; CHECK-SSE1-NEXT:    movaps %xmm0, (%rdi)
-; CHECK-SSE1-NEXT:    movq %rdi, %rax
 ; CHECK-SSE1-NEXT:    retq
 ;
 ; CHECK-SSE2-LABEL: in_constant_varx_42:
@@ -235,13 +235,13 @@ define <4 x i32> @in_constant_varx_42(<4
 define <4 x i32> @out_constant_varx_42_invmask(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
 ; CHECK-SSE1-LABEL: out_constant_varx_42_invmask:
 ; CHECK-SSE1:       # %bb.0:
+; CHECK-SSE1-NEXT:    movq %rdi, %rax
 ; CHECK-SSE1-NEXT:    movaps (%rcx), %xmm0
 ; CHECK-SSE1-NEXT:    movaps %xmm0, %xmm1
 ; CHECK-SSE1-NEXT:    andnps (%rsi), %xmm1
 ; CHECK-SSE1-NEXT:    andps {{.*}}(%rip), %xmm0
 ; CHECK-SSE1-NEXT:    orps %xmm1, %xmm0
 ; CHECK-SSE1-NEXT:    movaps %xmm0, (%rdi)
-; CHECK-SSE1-NEXT:    movq %rdi, %rax
 ; CHECK-SSE1-NEXT:    retq
 ;
 ; CHECK-SSE2-LABEL: out_constant_varx_42_invmask:
@@ -273,13 +273,13 @@ define <4 x i32> @out_constant_varx_42_i
 define <4 x i32> @in_constant_varx_42_invmask(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
 ; CHECK-SSE1-LABEL: in_constant_varx_42_invmask:
 ; CHECK-SSE1:       # %bb.0:
+; CHECK-SSE1-NEXT:    movq %rdi, %rax
 ; CHECK-SSE1-NEXT:    movaps (%rcx), %xmm0
 ; CHECK-SSE1-NEXT:    movaps %xmm0, %xmm1
 ; CHECK-SSE1-NEXT:    andnps (%rsi), %xmm1
 ; CHECK-SSE1-NEXT:    andps {{.*}}(%rip), %xmm0
 ; CHECK-SSE1-NEXT:    orps %xmm1, %xmm0
 ; CHECK-SSE1-NEXT:    movaps %xmm0, (%rdi)
-; CHECK-SSE1-NEXT:    movq %rdi, %rax
 ; CHECK-SSE1-NEXT:    retq
 ;
 ; CHECK-SSE2-LABEL: in_constant_varx_42_invmask:
@@ -310,12 +310,12 @@ define <4 x i32> @in_constant_varx_42_in
 define <4 x i32> @out_constant_mone_vary(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
 ; CHECK-SSE1-LABEL: out_constant_mone_vary:
 ; CHECK-SSE1:       # %bb.0:
+; CHECK-SSE1-NEXT:    movq %rdi, %rax
 ; CHECK-SSE1-NEXT:    movaps (%rcx), %xmm0
 ; CHECK-SSE1-NEXT:    movaps %xmm0, %xmm1
 ; CHECK-SSE1-NEXT:    andnps (%rdx), %xmm1
 ; CHECK-SSE1-NEXT:    orps %xmm0, %xmm1
 ; CHECK-SSE1-NEXT:    movaps %xmm1, (%rdi)
-; CHECK-SSE1-NEXT:    movq %rdi, %rax
 ; CHECK-SSE1-NEXT:    retq
 ;
 ; CHECK-SSE2-LABEL: out_constant_mone_vary:
@@ -345,12 +345,12 @@ define <4 x i32> @out_constant_mone_vary
 define <4 x i32> @in_constant_mone_vary(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
 ; CHECK-SSE1-LABEL: in_constant_mone_vary:
 ; CHECK-SSE1:       # %bb.0:
+; CHECK-SSE1-NEXT:    movq %rdi, %rax
 ; CHECK-SSE1-NEXT:    movaps (%rcx), %xmm0
 ; CHECK-SSE1-NEXT:    movaps %xmm0, %xmm1
 ; CHECK-SSE1-NEXT:    andnps (%rdx), %xmm1
 ; CHECK-SSE1-NEXT:    orps %xmm0, %xmm1
 ; CHECK-SSE1-NEXT:    movaps %xmm1, (%rdi)
-; CHECK-SSE1-NEXT:    movq %rdi, %rax
 ; CHECK-SSE1-NEXT:    retq
 ;
 ; CHECK-SSE2-LABEL: in_constant_mone_vary:
@@ -380,13 +380,13 @@ define <4 x i32> @in_constant_mone_vary(
 define <4 x i32> @out_constant_mone_vary_invmask(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
 ; CHECK-SSE1-LABEL: out_constant_mone_vary_invmask:
 ; CHECK-SSE1:       # %bb.0:
+; CHECK-SSE1-NEXT:    movq %rdi, %rax
 ; CHECK-SSE1-NEXT:    movaps (%rcx), %xmm0
 ; CHECK-SSE1-NEXT:    movaps {{.*#+}} xmm1 = [nan,nan,nan,nan]
 ; CHECK-SSE1-NEXT:    xorps %xmm0, %xmm1
 ; CHECK-SSE1-NEXT:    andps (%rdx), %xmm0
 ; CHECK-SSE1-NEXT:    orps %xmm1, %xmm0
 ; CHECK-SSE1-NEXT:    movaps %xmm0, (%rdi)
-; CHECK-SSE1-NEXT:    movq %rdi, %rax
 ; CHECK-SSE1-NEXT:    retq
 ;
 ; CHECK-SSE2-LABEL: out_constant_mone_vary_invmask:
@@ -420,13 +420,13 @@ define <4 x i32> @out_constant_mone_vary
 define <4 x i32> @in_constant_mone_vary_invmask(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
 ; CHECK-SSE1-LABEL: in_constant_mone_vary_invmask:
 ; CHECK-SSE1:       # %bb.0:
+; CHECK-SSE1-NEXT:    movq %rdi, %rax
 ; CHECK-SSE1-NEXT:    movaps (%rcx), %xmm0
 ; CHECK-SSE1-NEXT:    movaps {{.*#+}} xmm1 = [nan,nan,nan,nan]
 ; CHECK-SSE1-NEXT:    xorps %xmm0, %xmm1
 ; CHECK-SSE1-NEXT:    andps (%rdx), %xmm0
 ; CHECK-SSE1-NEXT:    orps %xmm1, %xmm0
 ; CHECK-SSE1-NEXT:    movaps %xmm0, (%rdi)
-; CHECK-SSE1-NEXT:    movq %rdi, %rax
 ; CHECK-SSE1-NEXT:    retq
 ;
 ; CHECK-SSE2-LABEL: in_constant_mone_vary_invmask:
@@ -459,13 +459,13 @@ define <4 x i32> @in_constant_mone_vary_
 define <4 x i32> @out_constant_42_vary(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
 ; CHECK-SSE1-LABEL: out_constant_42_vary:
 ; CHECK-SSE1:       # %bb.0:
+; CHECK-SSE1-NEXT:    movq %rdi, %rax
 ; CHECK-SSE1-NEXT:    movaps (%rcx), %xmm0
 ; CHECK-SSE1-NEXT:    movaps {{.*#+}} xmm1 = [5.885454e-44,5.885454e-44,5.885454e-44,5.885454e-44]
 ; CHECK-SSE1-NEXT:    andps %xmm0, %xmm1
 ; CHECK-SSE1-NEXT:    andnps (%rdx), %xmm0
 ; CHECK-SSE1-NEXT:    orps %xmm1, %xmm0
 ; CHECK-SSE1-NEXT:    movaps %xmm0, (%rdi)
-; CHECK-SSE1-NEXT:    movq %rdi, %rax
 ; CHECK-SSE1-NEXT:    retq
 ;
 ; CHECK-SSE2-LABEL: out_constant_42_vary:
@@ -496,13 +496,13 @@ define <4 x i32> @out_constant_42_vary(<
 define <4 x i32> @in_constant_42_vary(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
 ; CHECK-SSE1-LABEL: in_constant_42_vary:
 ; CHECK-SSE1:       # %bb.0:
+; CHECK-SSE1-NEXT:    movq %rdi, %rax
 ; CHECK-SSE1-NEXT:    movaps (%rcx), %xmm0
 ; CHECK-SSE1-NEXT:    movaps %xmm0, %xmm1
 ; CHECK-SSE1-NEXT:    andnps (%rdx), %xmm1
 ; CHECK-SSE1-NEXT:    andps {{.*}}(%rip), %xmm0
 ; CHECK-SSE1-NEXT:    orps %xmm1, %xmm0
 ; CHECK-SSE1-NEXT:    movaps %xmm0, (%rdi)
-; CHECK-SSE1-NEXT:    movq %rdi, %rax
 ; CHECK-SSE1-NEXT:    retq
 ;
 ; CHECK-SSE2-LABEL: in_constant_42_vary:
@@ -533,13 +533,13 @@ define <4 x i32> @in_constant_42_vary(<4
 define <4 x i32> @out_constant_42_vary_invmask(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
 ; CHECK-SSE1-LABEL: out_constant_42_vary_invmask:
 ; CHECK-SSE1:       # %bb.0:
+; CHECK-SSE1-NEXT:    movq %rdi, %rax
 ; CHECK-SSE1-NEXT:    movaps (%rcx), %xmm0
 ; CHECK-SSE1-NEXT:    movaps %xmm0, %xmm1
 ; CHECK-SSE1-NEXT:    andnps {{.*}}(%rip), %xmm1
 ; CHECK-SSE1-NEXT:    andps (%rdx), %xmm0
 ; CHECK-SSE1-NEXT:    orps %xmm1, %xmm0
 ; CHECK-SSE1-NEXT:    movaps %xmm0, (%rdi)
-; CHECK-SSE1-NEXT:    movq %rdi, %rax
 ; CHECK-SSE1-NEXT:    retq
 ;
 ; CHECK-SSE2-LABEL: out_constant_42_vary_invmask:
@@ -571,13 +571,13 @@ define <4 x i32> @out_constant_42_vary_i
 define <4 x i32> @in_constant_42_vary_invmask(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
 ; CHECK-SSE1-LABEL: in_constant_42_vary_invmask:
 ; CHECK-SSE1:       # %bb.0:
+; CHECK-SSE1-NEXT:    movq %rdi, %rax
 ; CHECK-SSE1-NEXT:    movaps (%rcx), %xmm0
 ; CHECK-SSE1-NEXT:    movaps (%rdx), %xmm1
 ; CHECK-SSE1-NEXT:    andps %xmm0, %xmm1
 ; CHECK-SSE1-NEXT:    andnps {{.*}}(%rip), %xmm0
 ; CHECK-SSE1-NEXT:    orps %xmm1, %xmm0
 ; CHECK-SSE1-NEXT:    movaps %xmm0, (%rdi)
-; CHECK-SSE1-NEXT:    movq %rdi, %rax
 ; CHECK-SSE1-NEXT:    retq
 ;
 ; CHECK-SSE2-LABEL: in_constant_42_vary_invmask:




More information about the llvm-commits mailing list