[llvm] r332745 - [X86] Add GPR<->XMM Schedule Tags

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Fri May 18 10:58:36 PDT 2018


Author: rksimon
Date: Fri May 18 10:58:36 2018
New Revision: 332745

URL: http://llvm.org/viewvc/llvm-project?rev=332745&view=rev
Log:
[X86] Add GPR<->XMM Schedule Tags

BtVer2 - fix NumMicroOp and account for the Lat+6cy GPR->XMM and Lat+1cy XMm->GPR delays (see rL332737)

The high number of MOVD/MOVQ equivalent instructions meant that there were a number of missed patterns in SNB/Znver1:
SNB - add missing GPR<->MMX costs (taken from Agner / Intel AOM)
Znver1 - add missing GPR<->XMM MOVQ costs (taken from Agner)

Modified:
    llvm/trunk/lib/Target/X86/X86InstrAVX512.td
    llvm/trunk/lib/Target/X86/X86InstrMMX.td
    llvm/trunk/lib/Target/X86/X86InstrSSE.td
    llvm/trunk/lib/Target/X86/X86SchedBroadwell.td
    llvm/trunk/lib/Target/X86/X86SchedHaswell.td
    llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td
    llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td
    llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td
    llvm/trunk/lib/Target/X86/X86Schedule.td
    llvm/trunk/lib/Target/X86/X86ScheduleAtom.td
    llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td
    llvm/trunk/lib/Target/X86/X86ScheduleSLM.td
    llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td
    llvm/trunk/test/CodeGen/X86/3dnow-schedule.ll
    llvm/trunk/test/CodeGen/X86/avx512-schedule.ll
    llvm/trunk/test/CodeGen/X86/mmx-schedule.ll
    llvm/trunk/test/CodeGen/X86/sse2-schedule.ll
    llvm/trunk/test/tools/llvm-mca/X86/BtVer2/resources-avx1.s
    llvm/trunk/test/tools/llvm-mca/X86/BtVer2/resources-mmx.s
    llvm/trunk/test/tools/llvm-mca/X86/BtVer2/resources-sse2.s
    llvm/trunk/test/tools/llvm-mca/X86/SandyBridge/resources-mmx.s
    llvm/trunk/test/tools/llvm-mca/X86/Znver1/resources-sse2.s

Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=332745&r1=332744&r2=332745&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Fri May 18 10:58:36 2018
@@ -3617,7 +3617,7 @@ def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSr
                       "vmovd\t{$src, $dst|$dst, $src}",
                       [(set VR128X:$dst,
                         (v4i32 (scalar_to_vector GR32:$src)))]>,
-                        EVEX, Sched<[WriteMove]>;
+                        EVEX, Sched<[WriteVecMoveFromGpr]>;
 def VMOVDI2PDIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src),
                       "vmovd\t{$src, $dst|$dst, $src}",
                       [(set VR128X:$dst,
@@ -3627,7 +3627,7 @@ def VMOV64toPQIZrr : AVX512BI<0x6E, MRMS
                       "vmovq\t{$src, $dst|$dst, $src}",
                         [(set VR128X:$dst,
                           (v2i64 (scalar_to_vector GR64:$src)))]>,
-                      EVEX, VEX_W, Sched<[WriteMove]>;
+                      EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
 let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
 def VMOV64toPQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst),
                       (ins i64mem:$src),
@@ -3637,7 +3637,7 @@ let isCodeGenOnly = 1 in {
 def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64X:$dst), (ins GR64:$src),
                        "vmovq\t{$src, $dst|$dst, $src}",
                        [(set FR64X:$dst, (bitconvert GR64:$src))]>,
-                       EVEX, VEX_W, Sched<[WriteMove]>;
+                       EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
 def VMOV64toSDZrm : AVX512XSI<0x7E, MRMSrcMem, (outs FR64X:$dst), (ins i64mem:$src),
                       "vmovq\t{$src, $dst|$dst, $src}",
                       [(set FR64X:$dst, (bitconvert (loadi64 addr:$src)))]>,
@@ -3645,7 +3645,7 @@ def VMOV64toSDZrm : AVX512XSI<0x7E, MRMS
 def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64X:$src),
                          "vmovq\t{$src, $dst|$dst, $src}",
                          [(set GR64:$dst, (bitconvert FR64X:$src))]>,
-                         EVEX, VEX_W, Sched<[WriteMove]>;
+                         EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
 def VMOVSDto64Zmr : AVX512BI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64X:$src),
                          "vmovq\t{$src, $dst|$dst, $src}",
                          [(store (i64 (bitconvert FR64X:$src)), addr:$dst)]>,
@@ -3660,7 +3660,7 @@ let ExeDomain = SSEPackedInt, isCodeGenO
 def VMOVDI2SSZrr  : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src),
                       "vmovd\t{$src, $dst|$dst, $src}",
                       [(set FR32X:$dst, (bitconvert GR32:$src))]>,
-                      EVEX, Sched<[WriteMove]>;
+                      EVEX, Sched<[WriteVecMoveFromGpr]>;
 
 def VMOVDI2SSZrm  : AVX512BI<0x6E, MRMSrcMem, (outs FR32X:$dst), (ins i32mem:$src),
                       "vmovd\t{$src, $dst|$dst, $src}",
@@ -3675,7 +3675,7 @@ def VMOVPDI2DIZrr  : AVX512BI<0x7E, MRMD
                        "vmovd\t{$src, $dst|$dst, $src}",
                        [(set GR32:$dst, (extractelt (v4i32 VR128X:$src),
                                         (iPTR 0)))]>,
-                       EVEX, Sched<[WriteMove]>;
+                       EVEX, Sched<[WriteVecMoveToGpr]>;
 def VMOVPDI2DIZmr  : AVX512BI<0x7E, MRMDestMem, (outs),
                        (ins i32mem:$dst, VR128X:$src),
                        "vmovd\t{$src, $dst|$dst, $src}",
@@ -3691,7 +3691,7 @@ def VMOVPQIto64Zrr : I<0x7E, MRMDestReg,
                       "vmovq\t{$src, $dst|$dst, $src}",
                       [(set GR64:$dst, (extractelt (v2i64 VR128X:$src),
                                                    (iPTR 0)))]>,
-                      PD, EVEX, VEX_W, Sched<[WriteMove]>,
+                      PD, EVEX, VEX_W, Sched<[WriteVecMoveToGpr]>,
                       Requires<[HasAVX512, In64BitMode]>;
 
 let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
@@ -3722,7 +3722,7 @@ def VMOVSS2DIZrr  : AVX512BI<0x7E, MRMDe
                       (ins FR32X:$src),
                       "vmovd\t{$src, $dst|$dst, $src}",
                       [(set GR32:$dst, (bitconvert FR32X:$src))]>,
-                      EVEX, Sched<[WriteMove]>;
+                      EVEX, Sched<[WriteVecMoveToGpr]>;
 def VMOVSS2DIZmr  : AVX512BI<0x7E, MRMDestMem, (outs),
                       (ins i32mem:$dst, FR32X:$src),
                       "vmovd\t{$src, $dst|$dst, $src}",
@@ -9089,7 +9089,7 @@ multiclass cvt_by_vec_width<bits<8> opc,
 def rr : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src),
                   !strconcat(OpcodeStr##Vec.Suffix, "\t{$src, $dst|$dst, $src}"),
                   [(set Vec.RC:$dst, (Vec.VT (sext Vec.KRC:$src)))]>,
-                  EVEX, Sched<[WriteMove]>;
+                  EVEX, Sched<[WriteMove]>; // TODO - WriteVecTrunc?
 }
 
 multiclass cvt_mask_by_elt_width<bits<8> opc, AVX512VLVectorVTInfo VTInfo,

Modified: llvm/trunk/lib/Target/X86/X86InstrMMX.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrMMX.td?rev=332745&r1=332744&r2=332745&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrMMX.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrMMX.td Fri May 18 10:58:36 2018
@@ -165,7 +165,7 @@ def MMX_MOVD64rr : MMXI<0x6E, MRMSrcReg,
                         "movd\t{$src, $dst|$dst, $src}",
                         [(set VR64:$dst,
                          (x86mmx (scalar_to_vector GR32:$src)))]>,
-                        Sched<[WriteMove]>;
+                        Sched<[WriteVecMoveFromGpr]>;
 def MMX_MOVD64rm : MMXI<0x6E, MRMSrcMem, (outs VR64:$dst), (ins i32mem:$src),
                         "movd\t{$src, $dst|$dst, $src}",
                         [(set VR64:$dst,
@@ -193,13 +193,13 @@ def MMX_MOVD64grr : MMXI<0x7E, MRMDestRe
                          "movd\t{$src, $dst|$dst, $src}",
                          [(set GR32:$dst,
                           (MMX_X86movd2w (x86mmx VR64:$src)))]>,
-                         Sched<[WriteMove]>, FoldGenData<"MMX_MOVD64rr">;
+                         Sched<[WriteVecMoveToGpr]>, FoldGenData<"MMX_MOVD64rr">;
 
 let isBitcast = 1 in
 def MMX_MOVD64to64rr : MMXRI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src),
                              "movq\t{$src, $dst|$dst, $src}",
                              [(set VR64:$dst, (bitconvert GR64:$src))]>,
-                             Sched<[WriteMove]>;
+                             Sched<[WriteVecMoveFromGpr]>;
 
 let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
 def MMX_MOVD64to64rm : MMXRI<0x6E, MRMSrcMem, (outs VR64:$dst),
@@ -209,20 +209,21 @@ def MMX_MOVD64to64rm : MMXRI<0x6E, MRMSr
 // These are 64 bit moves, but since the OS X assembler doesn't
 // recognize a register-register movq, we write them as
 // movd.
-let SchedRW = [WriteMove], isBitcast = 1 in {
+let isBitcast = 1 in {
 def MMX_MOVD64from64rr : MMXRI<0x7E, MRMDestReg,
                                (outs GR64:$dst), (ins VR64:$src),
                                "movq\t{$src, $dst|$dst, $src}",
-                             [(set GR64:$dst, (bitconvert VR64:$src))]>;
-let hasSideEffects = 0 in
+                               [(set GR64:$dst, (bitconvert VR64:$src))]>,
+                               Sched<[WriteVecMoveToGpr]>;
+let SchedRW = [WriteVecMove], hasSideEffects = 0 in {
 def MMX_MOVQ64rr : MMXI<0x6F, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src),
                         "movq\t{$src, $dst|$dst, $src}", []>;
-let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in {
+let isCodeGenOnly = 1, ForceDisassemble = 1 in
 def MMX_MOVQ64rr_REV : MMXI<0x7F, MRMDestReg, (outs VR64:$dst), (ins VR64:$src),
                             "movq\t{$src, $dst|$dst, $src}", []>,
                             FoldGenData<"MMX_MOVQ64rr">;
-}
-} // SchedRW
+} // SchedRW, hasSideEffects
+} // isBitcast
 
 let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
 def MMX_MOVD64from64rm : MMXRI<0x7E, MRMDestMem,

Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=332745&r1=332744&r2=332745&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Fri May 18 10:58:36 2018
@@ -3965,7 +3965,7 @@ def VMOVDI2PDIrr : VS2I<0x6E, MRMSrcReg,
                         "movd\t{$src, $dst|$dst, $src}",
                         [(set VR128:$dst,
                           (v4i32 (scalar_to_vector GR32:$src)))]>,
-                          VEX, Sched<[WriteMove]>;
+                          VEX, Sched<[WriteVecMoveFromGpr]>;
 def VMOVDI2PDIrm : VS2I<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
                         "movd\t{$src, $dst|$dst, $src}",
                         [(set VR128:$dst,
@@ -3975,7 +3975,7 @@ def VMOV64toPQIrr : VRS2I<0x6E, MRMSrcRe
                           "movq\t{$src, $dst|$dst, $src}",
                           [(set VR128:$dst,
                             (v2i64 (scalar_to_vector GR64:$src)))]>,
-                          VEX, Sched<[WriteMove]>;
+                          VEX, Sched<[WriteVecMoveFromGpr]>;
 let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
 def VMOV64toPQIrm : VRS2I<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
                           "movq\t{$src, $dst|$dst, $src}", []>,
@@ -3984,13 +3984,13 @@ let isCodeGenOnly = 1 in
 def VMOV64toSDrr : VRS2I<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
                          "movq\t{$src, $dst|$dst, $src}",
                          [(set FR64:$dst, (bitconvert GR64:$src))]>,
-                         VEX, Sched<[WriteMove]>;
+                         VEX, Sched<[WriteVecMoveFromGpr]>;
 
 def MOVDI2PDIrr : S2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
                       "movd\t{$src, $dst|$dst, $src}",
                       [(set VR128:$dst,
                         (v4i32 (scalar_to_vector GR32:$src)))]>,
-                      Sched<[WriteMove]>;
+                      Sched<[WriteVecMoveFromGpr]>;
 def MOVDI2PDIrm : S2I<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
                       "movd\t{$src, $dst|$dst, $src}",
                       [(set VR128:$dst,
@@ -4000,7 +4000,7 @@ def MOV64toPQIrr : RS2I<0x6E, MRMSrcReg,
                         "movq\t{$src, $dst|$dst, $src}",
                         [(set VR128:$dst,
                           (v2i64 (scalar_to_vector GR64:$src)))]>,
-                        Sched<[WriteMove]>;
+                        Sched<[WriteVecMoveFromGpr]>;
 let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
 def MOV64toPQIrm : RS2I<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
                         "movq\t{$src, $dst|$dst, $src}", []>,
@@ -4009,7 +4009,7 @@ let isCodeGenOnly = 1 in
 def MOV64toSDrr : RS2I<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
                        "movq\t{$src, $dst|$dst, $src}",
                        [(set FR64:$dst, (bitconvert GR64:$src))]>,
-                       Sched<[WriteMove]>;
+                       Sched<[WriteVecMoveFromGpr]>;
 } // ExeDomain = SSEPackedInt
 
 //===---------------------------------------------------------------------===//
@@ -4019,7 +4019,7 @@ let ExeDomain = SSEPackedInt, isCodeGenO
   def VMOVDI2SSrr  : VS2I<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src),
                         "movd\t{$src, $dst|$dst, $src}",
                         [(set FR32:$dst, (bitconvert GR32:$src))]>,
-                        VEX, Sched<[WriteMove]>;
+                        VEX, Sched<[WriteVecMoveFromGpr]>;
 
   def VMOVDI2SSrm  : VS2I<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src),
                         "movd\t{$src, $dst|$dst, $src}",
@@ -4028,7 +4028,7 @@ let ExeDomain = SSEPackedInt, isCodeGenO
   def MOVDI2SSrr  : S2I<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src),
                         "movd\t{$src, $dst|$dst, $src}",
                         [(set FR32:$dst, (bitconvert GR32:$src))]>,
-                        Sched<[WriteMove]>;
+                        Sched<[WriteVecMoveFromGpr]>;
 
   def MOVDI2SSrm  : S2I<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src),
                         "movd\t{$src, $dst|$dst, $src}",
@@ -4044,7 +4044,7 @@ def VMOVPDI2DIrr  : VS2I<0x7E, MRMDestRe
                          "movd\t{$src, $dst|$dst, $src}",
                          [(set GR32:$dst, (extractelt (v4i32 VR128:$src),
                                           (iPTR 0)))]>, VEX,
-                         Sched<[WriteMove]>;
+                         Sched<[WriteVecMoveToGpr]>;
 def VMOVPDI2DImr  : VS2I<0x7E, MRMDestMem, (outs),
                          (ins i32mem:$dst, VR128:$src),
                          "movd\t{$src, $dst|$dst, $src}",
@@ -4055,7 +4055,7 @@ def MOVPDI2DIrr  : S2I<0x7E, MRMDestReg,
                        "movd\t{$src, $dst|$dst, $src}",
                        [(set GR32:$dst, (extractelt (v4i32 VR128:$src),
                                         (iPTR 0)))]>,
-                   Sched<[WriteMove]>;
+                   Sched<[WriteVecMoveToGpr]>;
 def MOVPDI2DImr  : S2I<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src),
                        "movd\t{$src, $dst|$dst, $src}",
                        [(store (i32 (extractelt (v4i32 VR128:$src),
@@ -4067,7 +4067,7 @@ def MOVPDI2DImr  : S2I<0x7E, MRMDestMem,
 // Move Packed Doubleword Int first element to Doubleword Int
 //
 let ExeDomain = SSEPackedInt in {
-let SchedRW = [WriteMove] in {
+let SchedRW = [WriteVecMoveToGpr] in {
 def VMOVPQIto64rr : VRS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
                           "movq\t{$src, $dst|$dst, $src}",
                           [(set GR64:$dst, (extractelt (v2i64 VR128:$src),
@@ -4103,7 +4103,7 @@ let ExeDomain = SSEPackedInt, isCodeGenO
   def VMOVSDto64rr : VRS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src),
                            "movq\t{$src, $dst|$dst, $src}",
                            [(set GR64:$dst, (bitconvert FR64:$src))]>,
-                           VEX, Sched<[WriteMove]>;
+                           VEX, Sched<[WriteVecMoveToGpr]>;
   def VMOVSDto64mr : VRS2I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
                            "movq\t{$src, $dst|$dst, $src}",
                            [(store (i64 (bitconvert FR64:$src)), addr:$dst)]>,
@@ -4116,7 +4116,7 @@ let ExeDomain = SSEPackedInt, isCodeGenO
   def MOVSDto64rr : RS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src),
                          "movq\t{$src, $dst|$dst, $src}",
                          [(set GR64:$dst, (bitconvert FR64:$src))]>,
-                         Sched<[WriteMove]>;
+                         Sched<[WriteVecMoveToGpr]>;
   def MOVSDto64mr : RS2I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
                          "movq\t{$src, $dst|$dst, $src}",
                          [(store (i64 (bitconvert FR64:$src)), addr:$dst)]>,
@@ -4130,7 +4130,7 @@ let ExeDomain = SSEPackedInt, isCodeGenO
   def VMOVSS2DIrr  : VS2I<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src),
                         "movd\t{$src, $dst|$dst, $src}",
                         [(set GR32:$dst, (bitconvert FR32:$src))]>,
-                        VEX, Sched<[WriteMove]>;
+                        VEX, Sched<[WriteVecMoveToGpr]>;
   def VMOVSS2DImr  : VS2I<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src),
                         "movd\t{$src, $dst|$dst, $src}",
                         [(store (i32 (bitconvert FR32:$src)), addr:$dst)]>,
@@ -4138,7 +4138,7 @@ let ExeDomain = SSEPackedInt, isCodeGenO
   def MOVSS2DIrr  : S2I<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src),
                         "movd\t{$src, $dst|$dst, $src}",
                         [(set GR32:$dst, (bitconvert FR32:$src))]>,
-                        Sched<[WriteMove]>;
+                        Sched<[WriteVecMoveToGpr]>;
   def MOVSS2DImr  : S2I<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src),
                         "movd\t{$src, $dst|$dst, $src}",
                         [(store (i32 (bitconvert FR32:$src)), addr:$dst)]>,

Modified: llvm/trunk/lib/Target/X86/X86SchedBroadwell.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedBroadwell.td?rev=332745&r1=332744&r2=332745&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedBroadwell.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedBroadwell.td Fri May 18 10:58:36 2018
@@ -280,6 +280,9 @@ defm : X86WriteRes<WriteVecMaskedStoreY,
 defm : X86WriteRes<WriteVecMove,         [BWPort015], 1, [1], 1>;
 defm : X86WriteRes<WriteVecMoveX,        [BWPort015], 1, [1], 1>;
 defm : X86WriteRes<WriteVecMoveY,        [BWPort015], 1, [1], 1>;
+defm : X86WriteRes<WriteVecMoveToGpr,    [BWPort0], 1, [1], 1>;
+defm : X86WriteRes<WriteVecMoveFromGpr,  [BWPort5], 1, [1], 1>;
+
 defm : X86WriteRes<WriteEMMS,            [BWPort01,BWPort15,BWPort015,BWPort0156], 31, [8,1,21,1], 31>;
 
 defm : BWWriteResPair<WriteVecALU,   [BWPort15], 1, [1], 1, 5>; // Vector integer ALU op, no logicals.
@@ -508,11 +511,7 @@ def BWWriteResGroup1 : SchedWriteRes<[BW
   let NumMicroOps = 1;
   let ResourceCycles = [1];
 }
-def: InstRW<[BWWriteResGroup1], (instregex "MMX_MOVD64from64rr",
-                                           "MMX_MOVD64grr",
-                                           "(V?)MOVPDI2DIrr",
-                                           "(V?)MOVPQIto64rr",
-                                           "VPSLLVQ(Y?)rr",
+def: InstRW<[BWWriteResGroup1], (instregex "VPSLLVQ(Y?)rr",
                                            "VPSRLVQ(Y?)rr")>;
 
 def BWWriteResGroup2 : SchedWriteRes<[BWPort1]> {
@@ -528,11 +527,7 @@ def BWWriteResGroup3 : SchedWriteRes<[BW
   let NumMicroOps = 1;
   let ResourceCycles = [1];
 }
-def: InstRW<[BWWriteResGroup3], (instregex "MMX_MOVD64rr",
-                                           "MMX_MOVD64to64rr",
-                                           "MMX_MOVQ2DQrr",
-                                           "(V?)MOV64toPQIrr",
-                                           "(V?)MOVDI2PDIrr")>;
+def: InstRW<[BWWriteResGroup3], (instregex "MMX_MOVQ2DQrr")>;
 
 def BWWriteResGroup4 : SchedWriteRes<[BWPort6]> {
   let Latency = 1;
@@ -578,8 +573,7 @@ def BWWriteResGroup8 : SchedWriteRes<[BW
   let NumMicroOps = 1;
   let ResourceCycles = [1];
 }
-def: InstRW<[BWWriteResGroup8], (instregex "MMX_MOVQ64rr",
-                                           "VPBLENDD(Y?)rri")>;
+def: InstRW<[BWWriteResGroup8], (instregex "VPBLENDD(Y?)rri")>;
 
 def BWWriteResGroup9 : SchedWriteRes<[BWPort0156]> {
   let Latency = 1;

Modified: llvm/trunk/lib/Target/X86/X86SchedHaswell.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedHaswell.td?rev=332745&r1=332744&r2=332745&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedHaswell.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedHaswell.td Fri May 18 10:58:36 2018
@@ -300,6 +300,8 @@ defm : X86WriteRes<WriteVecMaskedStoreY,
 defm : X86WriteRes<WriteVecMove,         [HWPort015], 1, [1], 1>;
 defm : X86WriteRes<WriteVecMoveX,        [HWPort015], 1, [1], 1>;
 defm : X86WriteRes<WriteVecMoveY,        [HWPort015], 1, [1], 1>;
+defm : X86WriteRes<WriteVecMoveToGpr,    [HWPort0], 1, [1], 1>;
+defm : X86WriteRes<WriteVecMoveFromGpr,  [HWPort5], 1, [1], 1>;
 
 defm : HWWriteResPair<WriteVecLogic, [HWPort015], 1, [1], 1, 5>;
 defm : HWWriteResPair<WriteVecLogicX,[HWPort015], 1, [1], 1, 6>;
@@ -794,11 +796,7 @@ def HWWriteResGroup2 : SchedWriteRes<[HW
   let NumMicroOps = 1;
   let ResourceCycles = [1];
 }
-def: InstRW<[HWWriteResGroup2], (instregex "MMX_MOVD64from64rr",
-                                           "MMX_MOVD64grr",
-                                           "(V?)MOVPDI2DIrr",
-                                           "(V?)MOVPQIto64rr",
-                                           "VPSLLVQ(Y?)rr",
+def: InstRW<[HWWriteResGroup2], (instregex "VPSLLVQ(Y?)rr",
                                            "VPSRLVQ(Y?)rr")>;
 
 def HWWriteResGroup3 : SchedWriteRes<[HWPort1]> {
@@ -814,11 +812,7 @@ def HWWriteResGroup4 : SchedWriteRes<[HW
   let NumMicroOps = 1;
   let ResourceCycles = [1];
 }
-def: InstRW<[HWWriteResGroup4], (instregex "MMX_MOVD64rr",
-                                           "MMX_MOVD64to64rr",
-                                           "MMX_MOVQ2DQrr",
-                                           "(V?)MOV64toPQIrr",
-                                           "(V?)MOVDI2PDIrr")>;
+def: InstRW<[HWWriteResGroup4], (instregex "MMX_MOVQ2DQrr")>;
 
 def HWWriteResGroup5 : SchedWriteRes<[HWPort6]> {
   let Latency = 1;
@@ -864,8 +858,7 @@ def HWWriteResGroup9 : SchedWriteRes<[HW
   let NumMicroOps = 1;
   let ResourceCycles = [1];
 }
-def: InstRW<[HWWriteResGroup9], (instregex "MMX_MOVQ64rr",
-                                           "VPBLENDD(Y?)rri")>;
+def: InstRW<[HWWriteResGroup9], (instregex "VPBLENDD(Y?)rri")>;
 
 def HWWriteResGroup10 : SchedWriteRes<[HWPort0156]> {
   let Latency = 1;

Modified: llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td?rev=332745&r1=332744&r2=332745&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td Fri May 18 10:58:36 2018
@@ -290,6 +290,8 @@ defm : X86WriteRes<WriteVecMaskedStoreY,
 defm : X86WriteRes<WriteVecMove,         [SBPort05], 1, [1], 1>;
 defm : X86WriteRes<WriteVecMoveX,        [SBPort05], 1, [1], 1>;
 defm : X86WriteRes<WriteVecMoveY,        [SBPort05], 1, [1], 1>;
+defm : X86WriteRes<WriteVecMoveToGpr,    [SBPort0], 2, [1], 1>;
+defm : X86WriteRes<WriteVecMoveFromGpr,  [SBPort5], 1, [1], 1>;
 
 defm : SBWriteResPair<WriteVecLogic, [SBPort015], 1, [1], 1, 5>;
 defm : SBWriteResPair<WriteVecLogicX,[SBPort015], 1, [1], 1, 6>;
@@ -497,8 +499,6 @@ def: InstRW<[SBWriteResGroup2], (instrs
                                         LD_Frr, ST_Frr, ST_FPrr)>;
 def: InstRW<[SBWriteResGroup2], (instrs LOOP, LOOPE, LOOPNE)>; // FIXME: This seems wrong compared to other Intel CPUs.
 def: InstRW<[SBWriteResGroup2], (instrs RETQ)>;
-def: InstRW<[SBWriteResGroup2], (instregex "(V?)MOV64toPQIrr",
-                                           "(V?)MOVDI2PDIrr")>;
 
 def SBWriteResGroup4 : SchedWriteRes<[SBPort05]> {
   let Latency = 1;
@@ -534,14 +534,6 @@ def SBWriteResGroup6 : SchedWriteRes<[SB
 def: InstRW<[SBWriteResGroup6], (instregex "MMX_MOVQ2DQrr",
                                            "MOVDQ(A|U)rr")>; // NOTE: Different port requirements to VEX equivalents
 
-def SBWriteResGroup7 : SchedWriteRes<[SBPort0]> {
-  let Latency = 2;
-  let NumMicroOps = 1;
-  let ResourceCycles = [1];
-}
-def: InstRW<[SBWriteResGroup7], (instregex "(V?)MOVPDI2DIrr",
-                                           "(V?)MOVPQIto64rr")>;
-
 def SBWriteResGroup9 : SchedWriteRes<[SBPort05]> {
   let Latency = 2;
   let NumMicroOps = 2;

Modified: llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td?rev=332745&r1=332744&r2=332745&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td Fri May 18 10:58:36 2018
@@ -269,9 +269,11 @@ defm : X86WriteRes<WriteVecStoreNT,
 defm : X86WriteRes<WriteVecStoreNTY,     [SKLPort237,SKLPort4], 1, [1,1], 2>;
 defm : X86WriteRes<WriteVecMaskedStore,  [SKLPort237,SKLPort0], 2, [1,1], 2>;
 defm : X86WriteRes<WriteVecMaskedStoreY, [SKLPort237,SKLPort0], 2, [1,1], 2>;
-defm : X86WriteRes<WriteVecMove,         [SKLPort015], 1, [1], 1>;
+defm : X86WriteRes<WriteVecMove,         [SKLPort05],  1, [1], 1>;
 defm : X86WriteRes<WriteVecMoveX,        [SKLPort015], 1, [1], 1>;
 defm : X86WriteRes<WriteVecMoveY,        [SKLPort015], 1, [1], 1>;
+defm : X86WriteRes<WriteVecMoveToGpr,    [SKLPort0], 2, [1], 1>;
+defm : X86WriteRes<WriteVecMoveFromGpr,  [SKLPort5], 1, [1], 1>;
 
 defm : SKLWriteResPair<WriteVecALU,   [SKLPort05], 1, [1], 1, 5>; // Vector integer ALU op, no logicals.
 defm : SKLWriteResPair<WriteVecALUX,  [SKLPort01], 1, [1], 1, 6>; // Vector integer ALU op, no logicals (XMM).
@@ -526,11 +528,7 @@ def SKLWriteResGroup3 : SchedWriteRes<[S
   let ResourceCycles = [1];
 }
 def: InstRW<[SKLWriteResGroup3], (instregex "COM(P?)_FST0r",
-                                            "MMX_MOVD64rr",
-                                            "MMX_MOVD64to64rr",
-                                            "UCOM_F(P?)r",
-                                            "(V?)MOV64toPQIrr",
-                                            "(V?)MOVDI2PDIrr")>;
+                                            "UCOM_F(P?)r")>;
 
 def SKLWriteResGroup4 : SchedWriteRes<[SKLPort6]> {
   let Latency = 1;
@@ -545,7 +543,6 @@ def SKLWriteResGroup6 : SchedWriteRes<[S
   let ResourceCycles = [1];
 }
 def: InstRW<[SKLWriteResGroup6], (instrs FINCSTP, FNOP)>;
-def: InstRW<[SKLWriteResGroup6], (instregex "MMX_MOVQ64rr")>;
 
 def SKLWriteResGroup7 : SchedWriteRes<[SKLPort06]> {
   let Latency = 1;
@@ -605,16 +602,6 @@ def: InstRW<[SKLWriteResGroup11], (instr
                                              "ST_FP(32|64|80)m",
                                              "VMPTRSTm")>;
 
-def SKLWriteResGroup12 : SchedWriteRes<[SKLPort0]> {
-  let Latency = 2;
-  let NumMicroOps = 1;
-  let ResourceCycles = [1];
-}
-def: InstRW<[SKLWriteResGroup12], (instregex "MMX_MOVD64from64rr",
-                                             "MMX_MOVD64grr",
-                                             "(V?)MOVPDI2DIrr",
-                                             "(V?)MOVPQIto64rr")>;
-
 def SKLWriteResGroup13 : SchedWriteRes<[SKLPort5]> {
   let Latency = 2;
   let NumMicroOps = 2;

Modified: llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td?rev=332745&r1=332744&r2=332745&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td Fri May 18 10:58:36 2018
@@ -269,9 +269,11 @@ defm : X86WriteRes<WriteVecStoreNT,
 defm : X86WriteRes<WriteVecStoreNTY,     [SKXPort237,SKXPort4], 1, [1,1], 2>;
 defm : X86WriteRes<WriteVecMaskedStore,  [SKXPort237,SKXPort0], 2, [1,1], 2>;
 defm : X86WriteRes<WriteVecMaskedStoreY, [SKXPort237,SKXPort0], 2, [1,1], 2>;
-defm : X86WriteRes<WriteVecMove,         [SKXPort015], 1, [1], 1>;
+defm : X86WriteRes<WriteVecMove,         [SKXPort05],  1, [1], 1>;
 defm : X86WriteRes<WriteVecMoveX,        [SKXPort015], 1, [1], 1>;
 defm : X86WriteRes<WriteVecMoveY,        [SKXPort015], 1, [1], 1>;
+defm : X86WriteRes<WriteVecMoveToGpr,    [SKXPort0], 2, [1], 1>;
+defm : X86WriteRes<WriteVecMoveFromGpr,  [SKXPort5], 1, [1], 1>;
 
 defm : SKXWriteResPair<WriteVecALU,   [SKXPort05], 1, [1], 1, 5>; // Vector integer ALU op, no logicals.
 defm : SKXWriteResPair<WriteVecALUX,  [SKXPort01], 1, [1], 1, 6>; // Vector integer ALU op, no logicals (XMM).
@@ -538,13 +540,7 @@ def SKXWriteResGroup3 : SchedWriteRes<[S
 }
 def: InstRW<[SKXWriteResGroup3], (instregex "COM(P?)_FST0r",
                                             "KMOV(B|D|Q|W)kr",
-                                            "MMX_MOVD64rr",
-                                            "MMX_MOVD64to64rr",
-                                            "MOV64toPQIrr",
-                                            "MOVDI2PDIrr",
-                                            "UCOM_F(P?)r",
-                                            "VMOV64toPQI(Z?)rr",
-                                            "VMOVDI2PDI(Z?)rr")>;
+                                            "UCOM_F(P?)r")>;
 
 def SKXWriteResGroup4 : SchedWriteRes<[SKXPort6]> {
   let Latency = 1;
@@ -559,7 +555,6 @@ def SKXWriteResGroup6 : SchedWriteRes<[S
   let ResourceCycles = [1];
 }
 def: InstRW<[SKXWriteResGroup6], (instrs FINCSTP, FNOP)>;
-def: InstRW<[SKXWriteResGroup6], (instregex "MMX_MOVQ64rr")>;
 
 def SKXWriteResGroup7 : SchedWriteRes<[SKXPort06]> {
   let Latency = 1;
@@ -630,20 +625,6 @@ def: InstRW<[SKXWriteResGroup11], (instr
                                              "ST_FP(32|64|80)m",
                                              "VMPTRSTm")>;
 
-def SKXWriteResGroup12 : SchedWriteRes<[SKXPort0]> {
-  let Latency = 2;
-  let NumMicroOps = 1;
-  let ResourceCycles = [1];
-}
-def: InstRW<[SKXWriteResGroup12], (instregex "MMX_MOVD64from64rr",
-                                             "MMX_MOVD64grr",
-                                             "MOVPDI2DIrr",
-                                             "MOVPQIto64rr",
-                                             "VMOVPDI2DIZrr",
-                                             "VMOVPDI2DIrr",
-                                             "VMOVPQIto64Zrr",
-                                             "VMOVPQIto64rr")>;
-
 def SKXWriteResGroup13 : SchedWriteRes<[SKXPort5]> {
   let Latency = 2;
   let NumMicroOps = 2;

Modified: llvm/trunk/lib/Target/X86/X86Schedule.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Schedule.td?rev=332745&r1=332744&r2=332745&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86Schedule.td (original)
+++ llvm/trunk/lib/Target/X86/X86Schedule.td Fri May 18 10:58:36 2018
@@ -250,6 +250,8 @@ def  WriteVecMaskedStoreY : SchedWrite;
 def  WriteVecMove         : SchedWrite;
 def  WriteVecMoveX        : SchedWrite;
 def  WriteVecMoveY        : SchedWrite;
+def  WriteVecMoveToGpr    : SchedWrite;
+def  WriteVecMoveFromGpr  : SchedWrite;
 
 defm WriteVecALU    : X86SchedWritePair; // Vector integer ALU op, no logicals.
 defm WriteVecALUX   : X86SchedWritePair; // Vector integer ALU op, no logicals (XMM).

Modified: llvm/trunk/lib/Target/X86/X86ScheduleAtom.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleAtom.td?rev=332745&r1=332744&r2=332745&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleAtom.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleAtom.td Fri May 18 10:58:36 2018
@@ -323,9 +323,11 @@ def  : WriteRes<WriteVecStoreNTY,     [A
 def  : WriteRes<WriteVecMaskedStore,  [AtomPort0]>;
 def  : WriteRes<WriteVecMaskedStoreY, [AtomPort0]>;
 
-def  : WriteRes<WriteVecMove,         [AtomPort01]>;
+def  : WriteRes<WriteVecMove,          [AtomPort0]>;
 def  : WriteRes<WriteVecMoveX,        [AtomPort01]>;
 def  : WriteRes<WriteVecMoveY,        [AtomPort01]>;
+defm : X86WriteRes<WriteVecMoveToGpr,   [AtomPort0], 3, [3], 1>;
+defm : X86WriteRes<WriteVecMoveFromGpr, [AtomPort0], 1, [1], 1>;
 
 defm : AtomWriteResPair<WriteVecALU,       [AtomPort01],  [AtomPort0], 1, 1>;
 defm : AtomWriteResPair<WriteVecALUX,      [AtomPort01],  [AtomPort0], 1, 1>;
@@ -435,26 +437,12 @@ def AtomWrite0_1 : SchedWriteRes<[AtomPo
 }
 def : InstRW<[AtomWrite0_1], (instrs FXAM, LD_Frr,
                                      BSWAP32r, BSWAP64r,
-                                     MOVSX64rr32,
-                                     MMX_MOVD64rr,
-                                     MMX_MOVD64to64rr,
-                                     MOVDI2PDIrr,
-                                     MOVDI2SSrr,
-                                     MOV64toPQIrr,
-                                     MOV64toSDrr)>;
+                                     MOVSX64rr32)>;
 def : SchedAlias<WriteALURMW, AtomWrite0_1>;
 def : SchedAlias<WriteADCRMW, AtomWrite0_1>;
 def : InstRW<[AtomWrite0_1], (instregex "(RCL|RCR|ROL|ROR|SAR|SHL|SHR)(8|16|32|64)m",
                                         "MOV(S|Z)X(32|64)rr(8|8_NOREX|16)")>;
 
-def AtomWrite0_3 : SchedWriteRes<[AtomPort0]> {
-  let Latency = 3;
-  let ResourceCycles = [3];
-}
-def : InstRW<[AtomWrite0_3], (instrs MMX_MOVD64from64rr, MMX_MOVD64grr,
-                                     MOVPDI2DIrr, MOVPQIto64rr,
-                                     MOVSDto64rr, MOVSS2DIrr)>;
-
 def AtomWrite0_5 : SchedWriteRes<[AtomPort0]> {
   let Latency = 5;
   let ResourceCycles = [5];

Modified: llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td?rev=332745&r1=332744&r2=332745&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td Fri May 18 10:58:36 2018
@@ -416,6 +416,8 @@ defm : X86WriteRes<WriteVecMaskedStoreY,
 defm : X86WriteRes<WriteVecMove,          [JFPU01, JVALU], 1, [1, 1], 1>;
 defm : X86WriteRes<WriteVecMoveX,         [JFPU01, JVALU], 1, [1, 1], 1>;
 defm : X86WriteRes<WriteVecMoveY,         [JFPU01, JVALU], 1, [2, 2], 2>;
+defm : X86WriteRes<WriteVecMoveToGpr,     [JFPU0, JFPA, JALU0], 4, [1, 1, 1], 1>;
+defm : X86WriteRes<WriteVecMoveFromGpr,   [JFPU01, JFPX], 8, [1, 1], 2>;
 
 defm : JWriteResFpuPair<WriteVecALU,      [JFPU01, JVALU], 1>;
 defm : JWriteResFpuPair<WriteVecALUX,     [JFPU01, JVALU], 1>;

Modified: llvm/trunk/lib/Target/X86/X86ScheduleSLM.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleSLM.td?rev=332745&r1=332744&r2=332745&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleSLM.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleSLM.td Fri May 18 10:58:36 2018
@@ -252,6 +252,8 @@ def  : WriteRes<WriteVecMaskedStoreY, [S
 def  : WriteRes<WriteVecMove,         [SLM_FPC_RSV01]>;
 def  : WriteRes<WriteVecMoveX,        [SLM_FPC_RSV01]>;
 def  : WriteRes<WriteVecMoveY,        [SLM_FPC_RSV01]>;
+def  : WriteRes<WriteVecMoveToGpr,    [SLM_IEC_RSV01]>;
+def  : WriteRes<WriteVecMoveFromGpr,  [SLM_IEC_RSV01]>;
 
 defm : SLMWriteResPair<WriteVecShift,    [SLM_FPC_RSV0],  1>;
 defm : SLMWriteResPair<WriteVecShiftX,   [SLM_FPC_RSV0],  1>;

Modified: llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td?rev=332745&r1=332744&r2=332745&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td Fri May 18 10:58:36 2018
@@ -301,7 +301,9 @@ defm : X86WriteRes<WriteVecMaskedStore,
 defm : X86WriteRes<WriteVecMaskedStoreY, [ZnAGU,ZnFPU01], 5, [1,2], 2>;
 defm : X86WriteRes<WriteVecMove,         [ZnFPU], 1, [1], 1>;
 defm : X86WriteRes<WriteVecMoveX,        [ZnFPU], 1, [1], 1>;
-defm : X86WriteRes<WriteVecMoveY,        [ZnFPU], 1, [1], 1>;
+defm : X86WriteRes<WriteVecMoveY,        [ZnFPU], 2, [1], 2>;
+defm : X86WriteRes<WriteVecMoveToGpr,    [ZnFPU2], 2, [1], 1>;
+defm : X86WriteRes<WriteVecMoveFromGpr,  [ZnFPU2], 3, [1], 1>;
 defm : X86WriteRes<WriteEMMS,            [ZnFPU], 2, [1], 1>;
 
 defm : ZnWriteResFpuPair<WriteVecShift,   [ZnFPU],   1>;
@@ -922,50 +924,6 @@ def : InstRW<[WriteMicrocoded], (instrs
 def : InstRW<[WriteMicrocoded], (instrs FNINIT)>;
 
 //=== Integer MMX and XMM Instructions ===//
-//-- Move instructions --//
-
-// Moves from GPR to FPR incurs a penalty
-def ZnWriteFPU2 : SchedWriteRes<[ZnFPU2]> {
-  let Latency = 3;
-}
-
-// Move to ALU doesn't incur penalty
-def ZnWriteToALU2 : SchedWriteRes<[ZnFPU2]> {
-  let Latency = 2;
-}
-
-def ZnWriteFPU : SchedWriteRes<[ZnFPU]>;
-def ZnWriteFPUY : SchedWriteRes<[ZnFPU]> {
-  let NumMicroOps = 2;
-  let Latency=2;
-}
-
-// MOVD.
-// r32/64 <- (x)mm.
-def : InstRW<[ZnWriteToALU2], (instrs MMX_MOVD64grr,
-                                      MMX_MOVD64from64rr,
-                                      MOVPDI2DIrr,
-                                      VMOVPDI2DIrr)>;
-
-// (x)mm <- r32/64.
-def : InstRW<[ZnWriteFPU2], (instrs MMX_MOVD64rr,
-                                    MMX_MOVD64to64rr,
-                                    MOVDI2PDIrr,
-                                    VMOVDI2PDIrr)>;
-
-// MOVQ.
-// r64 <- (x)mm.
-def : InstRW<[ZnWriteToALU2], (instrs VMOVPQIto64rr)>;
-
-// (x)mm <- r64.
-def : InstRW<[ZnWriteFPU2], (instrs VMOV64toPQIrr)>;
-
-// (x)mm <- (x)mm.
-def : InstRW<[ZnWriteFPU], (instregex "MMX_MOVQ64rr")>;
-
-// (V)MOVDQA/U.
-// y <- y.
-def : InstRW<[ZnWriteFPUY], (instregex "VMOVDQ(A|U)Yrr")>;
 
 // PACKSSWB/DW.
 // mm <- mm.

Modified: llvm/trunk/test/CodeGen/X86/3dnow-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/3dnow-schedule.ll?rev=332745&r1=332744&r2=332745&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/3dnow-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/3dnow-schedule.ll Fri May 18 10:58:36 2018
@@ -16,7 +16,7 @@ define i64 @test_pavgusb(x86_mmx %a0, x8
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    pavgusb %mm1, %mm0 # sched: [3:1.00]
 ; CHECK-NEXT:    pavgusb (%rdi), %mm0 # sched: [8:1.00]
-; CHECK-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; CHECK-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; CHECK-NEXT:    retq # sched: [1:1.00]
   %1 = call x86_mmx @llvm.x86.3dnow.pavgusb(x86_mmx %a0, x86_mmx %a1)
   %2 = load x86_mmx, x86_mmx *%a2, align 8
@@ -31,7 +31,7 @@ define i64 @test_pf2id(x86_mmx* %a0) opt
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    pf2id (%rdi), %mm0 # sched: [9:1.00]
 ; CHECK-NEXT:    pf2id %mm0, %mm0 # sched: [3:1.00]
-; CHECK-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; CHECK-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; CHECK-NEXT:    retq # sched: [1:1.00]
   %1 = load x86_mmx, x86_mmx *%a0, align 8
   %2 = call x86_mmx @llvm.x86.3dnow.pf2id(x86_mmx %1)
@@ -46,7 +46,7 @@ define i64 @test_pf2iw(x86_mmx* %a0) opt
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    pf2iw (%rdi), %mm0 # sched: [9:1.00]
 ; CHECK-NEXT:    pf2iw %mm0, %mm0 # sched: [3:1.00]
-; CHECK-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; CHECK-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; CHECK-NEXT:    retq # sched: [1:1.00]
   %1 = load x86_mmx, x86_mmx *%a0, align 8
   %2 = call x86_mmx @llvm.x86.3dnowa.pf2iw(x86_mmx %1)
@@ -61,7 +61,7 @@ define i64 @test_pfacc(x86_mmx %a0, x86_
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    pfacc %mm1, %mm0 # sched: [3:1.00]
 ; CHECK-NEXT:    pfacc (%rdi), %mm0 # sched: [9:1.00]
-; CHECK-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; CHECK-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; CHECK-NEXT:    retq # sched: [1:1.00]
   %1 = call x86_mmx @llvm.x86.3dnow.pfacc(x86_mmx %a0, x86_mmx %a1)
   %2 = load x86_mmx, x86_mmx *%a2, align 8
@@ -76,7 +76,7 @@ define i64 @test_pfadd(x86_mmx %a0, x86_
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    pfadd %mm1, %mm0 # sched: [3:1.00]
 ; CHECK-NEXT:    pfadd (%rdi), %mm0 # sched: [9:1.00]
-; CHECK-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; CHECK-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; CHECK-NEXT:    retq # sched: [1:1.00]
   %1 = call x86_mmx @llvm.x86.3dnow.pfadd(x86_mmx %a0, x86_mmx %a1)
   %2 = load x86_mmx, x86_mmx *%a2, align 8
@@ -91,7 +91,7 @@ define i64 @test_pfcmpeq(x86_mmx %a0, x8
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    pfcmpeq %mm1, %mm0 # sched: [3:1.00]
 ; CHECK-NEXT:    pfcmpeq (%rdi), %mm0 # sched: [9:1.00]
-; CHECK-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; CHECK-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; CHECK-NEXT:    retq # sched: [1:1.00]
   %1 = call x86_mmx @llvm.x86.3dnow.pfcmpeq(x86_mmx %a0, x86_mmx %a1)
   %2 = load x86_mmx, x86_mmx *%a2, align 8
@@ -106,7 +106,7 @@ define i64 @test_pfcmpge(x86_mmx %a0, x8
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    pfcmpge %mm1, %mm0 # sched: [3:1.00]
 ; CHECK-NEXT:    pfcmpge (%rdi), %mm0 # sched: [9:1.00]
-; CHECK-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; CHECK-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; CHECK-NEXT:    retq # sched: [1:1.00]
   %1 = call x86_mmx @llvm.x86.3dnow.pfcmpge(x86_mmx %a0, x86_mmx %a1)
   %2 = load x86_mmx, x86_mmx *%a2, align 8
@@ -121,7 +121,7 @@ define i64 @test_pfcmpgt(x86_mmx %a0, x8
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    pfcmpgt %mm1, %mm0 # sched: [3:1.00]
 ; CHECK-NEXT:    pfcmpgt (%rdi), %mm0 # sched: [9:1.00]
-; CHECK-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; CHECK-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; CHECK-NEXT:    retq # sched: [1:1.00]
   %1 = call x86_mmx @llvm.x86.3dnow.pfcmpgt(x86_mmx %a0, x86_mmx %a1)
   %2 = load x86_mmx, x86_mmx *%a2, align 8
@@ -136,7 +136,7 @@ define i64 @test_pfmax(x86_mmx %a0, x86_
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    pfmax %mm1, %mm0 # sched: [3:1.00]
 ; CHECK-NEXT:    pfmax (%rdi), %mm0 # sched: [9:1.00]
-; CHECK-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; CHECK-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; CHECK-NEXT:    retq # sched: [1:1.00]
   %1 = call x86_mmx @llvm.x86.3dnow.pfmax(x86_mmx %a0, x86_mmx %a1)
   %2 = load x86_mmx, x86_mmx *%a2, align 8
@@ -151,7 +151,7 @@ define i64 @test_pfmin(x86_mmx %a0, x86_
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    pfmin %mm1, %mm0 # sched: [3:1.00]
 ; CHECK-NEXT:    pfmin (%rdi), %mm0 # sched: [9:1.00]
-; CHECK-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; CHECK-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; CHECK-NEXT:    retq # sched: [1:1.00]
   %1 = call x86_mmx @llvm.x86.3dnow.pfmin(x86_mmx %a0, x86_mmx %a1)
   %2 = load x86_mmx, x86_mmx *%a2, align 8
@@ -166,7 +166,7 @@ define i64 @test_pfmul(x86_mmx %a0, x86_
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    pfmul %mm1, %mm0 # sched: [3:1.00]
 ; CHECK-NEXT:    pfmul (%rdi), %mm0 # sched: [9:1.00]
-; CHECK-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; CHECK-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; CHECK-NEXT:    retq # sched: [1:1.00]
   %1 = call x86_mmx @llvm.x86.3dnow.pfmul(x86_mmx %a0, x86_mmx %a1)
   %2 = load x86_mmx, x86_mmx *%a2, align 8
@@ -181,7 +181,7 @@ define i64 @test_pfnacc(x86_mmx %a0, x86
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    pfnacc %mm1, %mm0 # sched: [3:1.00]
 ; CHECK-NEXT:    pfnacc (%rdi), %mm0 # sched: [9:1.00]
-; CHECK-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; CHECK-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; CHECK-NEXT:    retq # sched: [1:1.00]
   %1 = call x86_mmx @llvm.x86.3dnowa.pfnacc(x86_mmx %a0, x86_mmx %a1)
   %2 = load x86_mmx, x86_mmx *%a2, align 8
@@ -196,7 +196,7 @@ define i64 @test_pfpnacc(x86_mmx %a0, x8
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    pfpnacc %mm1, %mm0 # sched: [3:1.00]
 ; CHECK-NEXT:    pfpnacc (%rdi), %mm0 # sched: [9:1.00]
-; CHECK-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; CHECK-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; CHECK-NEXT:    retq # sched: [1:1.00]
   %1 = call x86_mmx @llvm.x86.3dnowa.pfpnacc(x86_mmx %a0, x86_mmx %a1)
   %2 = load x86_mmx, x86_mmx *%a2, align 8
@@ -211,7 +211,7 @@ define i64 @test_pfrcp(x86_mmx* %a0) opt
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    pfrcp (%rdi), %mm0 # sched: [9:1.00]
 ; CHECK-NEXT:    pfrcp %mm0, %mm0 # sched: [3:1.00]
-; CHECK-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; CHECK-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; CHECK-NEXT:    retq # sched: [1:1.00]
   %1 = load x86_mmx, x86_mmx *%a0, align 8
   %2 = call x86_mmx @llvm.x86.3dnow.pfrcp(x86_mmx %1)
@@ -226,7 +226,7 @@ define i64 @test_pfrcpit1(x86_mmx %a0, x
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    pfrcpit1 %mm1, %mm0 # sched: [3:1.00]
 ; CHECK-NEXT:    pfrcpit1 (%rdi), %mm0 # sched: [9:1.00]
-; CHECK-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; CHECK-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; CHECK-NEXT:    retq # sched: [1:1.00]
   %1 = call x86_mmx @llvm.x86.3dnow.pfrcpit1(x86_mmx %a0, x86_mmx %a1)
   %2 = load x86_mmx, x86_mmx *%a2, align 8
@@ -241,7 +241,7 @@ define i64 @test_pfrcpit2(x86_mmx %a0, x
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    pfrcpit2 %mm1, %mm0 # sched: [3:1.00]
 ; CHECK-NEXT:    pfrcpit2 (%rdi), %mm0 # sched: [9:1.00]
-; CHECK-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; CHECK-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; CHECK-NEXT:    retq # sched: [1:1.00]
   %1 = call x86_mmx @llvm.x86.3dnow.pfrcpit2(x86_mmx %a0, x86_mmx %a1)
   %2 = load x86_mmx, x86_mmx *%a2, align 8
@@ -256,7 +256,7 @@ define i64 @test_pfrsqit1(x86_mmx %a0, x
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    pfrsqit1 %mm1, %mm0 # sched: [3:1.00]
 ; CHECK-NEXT:    pfrsqit1 (%rdi), %mm0 # sched: [9:1.00]
-; CHECK-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; CHECK-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; CHECK-NEXT:    retq # sched: [1:1.00]
   %1 = call x86_mmx @llvm.x86.3dnow.pfrsqit1(x86_mmx %a0, x86_mmx %a1)
   %2 = load x86_mmx, x86_mmx *%a2, align 8
@@ -271,7 +271,7 @@ define i64 @test_pfrsqrt(x86_mmx* %a0) o
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    pfrsqrt (%rdi), %mm0 # sched: [9:1.00]
 ; CHECK-NEXT:    pfrsqrt %mm0, %mm0 # sched: [3:1.00]
-; CHECK-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; CHECK-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; CHECK-NEXT:    retq # sched: [1:1.00]
   %1 = load x86_mmx, x86_mmx *%a0, align 8
   %2 = call x86_mmx @llvm.x86.3dnow.pfrsqrt(x86_mmx %1)
@@ -286,7 +286,7 @@ define i64 @test_pfsub(x86_mmx %a0, x86_
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    pfsub %mm1, %mm0 # sched: [3:1.00]
 ; CHECK-NEXT:    pfsub (%rdi), %mm0 # sched: [9:1.00]
-; CHECK-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; CHECK-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; CHECK-NEXT:    retq # sched: [1:1.00]
   %1 = call x86_mmx @llvm.x86.3dnow.pfsub(x86_mmx %a0, x86_mmx %a1)
   %2 = load x86_mmx, x86_mmx *%a2, align 8
@@ -301,7 +301,7 @@ define i64 @test_pfsubr(x86_mmx %a0, x86
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    pfsubr %mm1, %mm0 # sched: [3:1.00]
 ; CHECK-NEXT:    pfsubr (%rdi), %mm0 # sched: [9:1.00]
-; CHECK-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; CHECK-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; CHECK-NEXT:    retq # sched: [1:1.00]
   %1 = call x86_mmx @llvm.x86.3dnow.pfsubr(x86_mmx %a0, x86_mmx %a1)
   %2 = load x86_mmx, x86_mmx *%a2, align 8
@@ -316,7 +316,7 @@ define i64 @test_pi2fd(x86_mmx* %a0) opt
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    pi2fd (%rdi), %mm0 # sched: [9:1.00]
 ; CHECK-NEXT:    pi2fd %mm0, %mm0 # sched: [3:1.00]
-; CHECK-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; CHECK-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; CHECK-NEXT:    retq # sched: [1:1.00]
   %1 = load x86_mmx, x86_mmx *%a0, align 8
   %2 = call x86_mmx @llvm.x86.3dnow.pi2fd(x86_mmx %1)
@@ -331,7 +331,7 @@ define i64 @test_pi2fw(x86_mmx* %a0) opt
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    pi2fw (%rdi), %mm0 # sched: [9:1.00]
 ; CHECK-NEXT:    pi2fw %mm0, %mm0 # sched: [3:1.00]
-; CHECK-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; CHECK-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; CHECK-NEXT:    retq # sched: [1:1.00]
   %1 = load x86_mmx, x86_mmx *%a0, align 8
   %2 = call x86_mmx @llvm.x86.3dnowa.pi2fw(x86_mmx %1)
@@ -346,7 +346,7 @@ define i64 @test_pmulhrw(x86_mmx %a0, x8
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    pmulhrw %mm1, %mm0 # sched: [5:1.00]
 ; CHECK-NEXT:    pmulhrw (%rdi), %mm0 # sched: [10:1.00]
-; CHECK-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; CHECK-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; CHECK-NEXT:    retq # sched: [1:1.00]
   %1 = call x86_mmx @llvm.x86.3dnow.pmulhrw(x86_mmx %a0, x86_mmx %a1)
   %2 = load x86_mmx, x86_mmx *%a2, align 8
@@ -383,7 +383,7 @@ define i64 @test_pswapd(x86_mmx* %a0) op
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    pswapd (%rdi), %mm0 # mm0 = mem[1,0] sched: [6:1.00]
 ; CHECK-NEXT:    pswapd %mm0, %mm0 # mm0 = mm0[1,0] sched: [1:1.00]
-; CHECK-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; CHECK-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; CHECK-NEXT:    retq # sched: [1:1.00]
   %1 = load x86_mmx, x86_mmx *%a0, align 8
   %2 = call x86_mmx @llvm.x86.3dnowa.pswapd(x86_mmx %1)

Modified: llvm/trunk/test/CodeGen/X86/avx512-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-schedule.ll?rev=332745&r1=332744&r2=332745&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-schedule.ll Fri May 18 10:58:36 2018
@@ -1981,12 +1981,12 @@ entry:
 define double @long_to_double(i64 %x) {
 ; GENERIC-LABEL: long_to_double:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vmovq %rdi, %xmm0 # sched: [1:0.33]
+; GENERIC-NEXT:    vmovq %rdi, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: long_to_double:
 ; SKX:       # %bb.0:
-; SKX-NEXT:    vmovq %rdi, %xmm0 # sched: [1:0.25]
+; SKX-NEXT:    vmovq %rdi, %xmm0 # sched: [1:1.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
    %res = bitcast i64 %x to double
    ret double %res
@@ -1995,12 +1995,12 @@ define double @long_to_double(i64 %x) {
 define i64 @double_to_long(double %x) {
 ; GENERIC-LABEL: double_to_long:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vmovq %xmm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    vmovq %xmm0, %rax # sched: [2:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: double_to_long:
 ; SKX:       # %bb.0:
-; SKX-NEXT:    vmovq %xmm0, %rax # sched: [1:0.25]
+; SKX-NEXT:    vmovq %xmm0, %rax # sched: [2:1.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
    %res = bitcast double %x to i64
    ret i64 %res
@@ -2009,12 +2009,12 @@ define i64 @double_to_long(double %x) {
 define float @int_to_float(i32 %x) {
 ; GENERIC-LABEL: int_to_float:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vmovd %edi, %xmm0 # sched: [1:0.33]
+; GENERIC-NEXT:    vmovd %edi, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: int_to_float:
 ; SKX:       # %bb.0:
-; SKX-NEXT:    vmovd %edi, %xmm0 # sched: [1:0.25]
+; SKX-NEXT:    vmovd %edi, %xmm0 # sched: [1:1.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
    %res = bitcast i32 %x to float
    ret float %res
@@ -2023,12 +2023,12 @@ define float @int_to_float(i32 %x) {
 define i32 @float_to_int(float %x) {
 ; GENERIC-LABEL: float_to_int:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vmovd %xmm0, %eax # sched: [1:0.33]
+; GENERIC-NEXT:    vmovd %xmm0, %eax # sched: [2:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: float_to_int:
 ; SKX:       # %bb.0:
-; SKX-NEXT:    vmovd %xmm0, %eax # sched: [1:0.25]
+; SKX-NEXT:    vmovd %xmm0, %eax # sched: [2:1.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
    %res = bitcast float %x to i32
    ret i32 %res
@@ -5877,12 +5877,12 @@ entry:
 define i32 @mov_test1(float %x) {
 ; GENERIC-LABEL: mov_test1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vmovd %xmm0, %eax # sched: [1:0.33]
+; GENERIC-NEXT:    vmovd %xmm0, %eax # sched: [2:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: mov_test1:
 ; SKX:       # %bb.0:
-; SKX-NEXT:    vmovd %xmm0, %eax # sched: [1:0.25]
+; SKX-NEXT:    vmovd %xmm0, %eax # sched: [2:1.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
    %res = bitcast float %x to i32
    ret i32 %res

Modified: llvm/trunk/test/CodeGen/X86/mmx-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/mmx-schedule.ll?rev=332745&r1=332744&r2=332745&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/mmx-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/mmx-schedule.ll Fri May 18 10:58:36 2018
@@ -17,7 +17,7 @@ define i64 @test_cvtpd2pi(<2 x double> %
 ; GENERIC-NEXT:    cvtpd2pi (%rdi), %mm0 # sched: [10:1.00]
 ; GENERIC-NEXT:    cvtpd2pi %xmm0, %mm1 # sched: [4:1.00]
 ; GENERIC-NEXT:    por %mm1, %mm0 # sched: [1:0.33]
-; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_cvtpd2pi:
@@ -41,7 +41,7 @@ define i64 @test_cvtpd2pi(<2 x double> %
 ; SANDY-NEXT:    cvtpd2pi (%rdi), %mm0 # sched: [10:1.00]
 ; SANDY-NEXT:    cvtpd2pi %xmm0, %mm1 # sched: [4:1.00]
 ; SANDY-NEXT:    por %mm1, %mm0 # sched: [1:0.33]
-; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_cvtpd2pi:
@@ -81,7 +81,7 @@ define i64 @test_cvtpd2pi(<2 x double> %
 ; BTVER2-NEXT:    cvtpd2pi (%rdi), %mm1 # sched: [8:1.00]
 ; BTVER2-NEXT:    cvtpd2pi %xmm0, %mm0 # sched: [3:1.00]
 ; BTVER2-NEXT:    por %mm0, %mm1 # sched: [1:0.50]
-; BTVER2-NEXT:    movq %mm1, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm1, %rax # sched: [4:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_cvtpd2pi:
@@ -262,7 +262,7 @@ define i64 @test_cvtps2pi(<4 x float> %a
 ; GENERIC-NEXT:    cvtps2pi %xmm0, %mm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    cvtps2pi (%rdi), %mm1 # sched: [9:1.00]
 ; GENERIC-NEXT:    por %mm0, %mm1 # sched: [1:0.33]
-; GENERIC-NEXT:    movq %mm1, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm1, %rax # sched: [2:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_cvtps2pi:
@@ -286,7 +286,7 @@ define i64 @test_cvtps2pi(<4 x float> %a
 ; SANDY-NEXT:    cvtps2pi %xmm0, %mm0 # sched: [3:1.00]
 ; SANDY-NEXT:    cvtps2pi (%rdi), %mm1 # sched: [9:1.00]
 ; SANDY-NEXT:    por %mm0, %mm1 # sched: [1:0.33]
-; SANDY-NEXT:    movq %mm1, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm1, %rax # sched: [2:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_cvtps2pi:
@@ -326,7 +326,7 @@ define i64 @test_cvtps2pi(<4 x float> %a
 ; BTVER2-NEXT:    cvtps2pi (%rdi), %mm1 # sched: [8:1.00]
 ; BTVER2-NEXT:    cvtps2pi %xmm0, %mm0 # sched: [3:1.00]
 ; BTVER2-NEXT:    por %mm0, %mm1 # sched: [1:0.50]
-; BTVER2-NEXT:    movq %mm1, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm1, %rax # sched: [4:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_cvtps2pi:
@@ -351,7 +351,7 @@ define i64 @test_cvttpd2pi(<2 x double>
 ; GENERIC-NEXT:    cvttpd2pi (%rdi), %mm0 # sched: [10:1.00]
 ; GENERIC-NEXT:    cvttpd2pi %xmm0, %mm1 # sched: [4:1.00]
 ; GENERIC-NEXT:    por %mm1, %mm0 # sched: [1:0.33]
-; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_cvttpd2pi:
@@ -375,7 +375,7 @@ define i64 @test_cvttpd2pi(<2 x double>
 ; SANDY-NEXT:    cvttpd2pi (%rdi), %mm0 # sched: [10:1.00]
 ; SANDY-NEXT:    cvttpd2pi %xmm0, %mm1 # sched: [4:1.00]
 ; SANDY-NEXT:    por %mm1, %mm0 # sched: [1:0.33]
-; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_cvttpd2pi:
@@ -415,7 +415,7 @@ define i64 @test_cvttpd2pi(<2 x double>
 ; BTVER2-NEXT:    cvttpd2pi (%rdi), %mm1 # sched: [8:1.00]
 ; BTVER2-NEXT:    cvttpd2pi %xmm0, %mm0 # sched: [3:1.00]
 ; BTVER2-NEXT:    por %mm0, %mm1 # sched: [1:0.50]
-; BTVER2-NEXT:    movq %mm1, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm1, %rax # sched: [4:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_cvttpd2pi:
@@ -440,7 +440,7 @@ define i64 @test_cvttps2pi(<4 x float> %
 ; GENERIC-NEXT:    cvttps2pi %xmm0, %mm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    cvttps2pi (%rdi), %mm1 # sched: [9:1.00]
 ; GENERIC-NEXT:    por %mm0, %mm1 # sched: [1:0.33]
-; GENERIC-NEXT:    movq %mm1, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm1, %rax # sched: [2:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_cvttps2pi:
@@ -464,7 +464,7 @@ define i64 @test_cvttps2pi(<4 x float> %
 ; SANDY-NEXT:    cvttps2pi %xmm0, %mm0 # sched: [3:1.00]
 ; SANDY-NEXT:    cvttps2pi (%rdi), %mm1 # sched: [9:1.00]
 ; SANDY-NEXT:    por %mm0, %mm1 # sched: [1:0.33]
-; SANDY-NEXT:    movq %mm1, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm1, %rax # sched: [2:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_cvttps2pi:
@@ -504,7 +504,7 @@ define i64 @test_cvttps2pi(<4 x float> %
 ; BTVER2-NEXT:    cvttps2pi (%rdi), %mm1 # sched: [8:1.00]
 ; BTVER2-NEXT:    cvttps2pi %xmm0, %mm0 # sched: [3:1.00]
 ; BTVER2-NEXT:    por %mm0, %mm1 # sched: [1:0.50]
-; BTVER2-NEXT:    movq %mm1, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm1, %rax # sched: [4:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_cvttps2pi:
@@ -636,12 +636,12 @@ declare void @llvm.x86.mmx.maskmovq(x86_
 define i32 @test_movd(x86_mmx %a0, i32 %a1, i32 *%a2) {
 ; GENERIC-LABEL: test_movd:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    movd %edi, %mm1 # sched: [1:0.33]
+; GENERIC-NEXT:    movd %edi, %mm1 # sched: [1:1.00]
 ; GENERIC-NEXT:    movd (%rsi), %mm2 # sched: [5:0.50]
 ; GENERIC-NEXT:    paddd %mm1, %mm2 # sched: [3:1.00]
 ; GENERIC-NEXT:    paddd %mm2, %mm0 # sched: [3:1.00]
-; GENERIC-NEXT:    movd %mm2, %ecx # sched: [1:0.33]
-; GENERIC-NEXT:    movd %mm0, %eax # sched: [1:0.33]
+; GENERIC-NEXT:    movd %mm2, %ecx # sched: [2:1.00]
+; GENERIC-NEXT:    movd %mm0, %eax # sched: [2:1.00]
 ; GENERIC-NEXT:    movl %ecx, (%rsi) # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -669,12 +669,12 @@ define i32 @test_movd(x86_mmx %a0, i32 %
 ;
 ; SANDY-LABEL: test_movd:
 ; SANDY:       # %bb.0:
-; SANDY-NEXT:    movd %edi, %mm1 # sched: [1:0.33]
+; SANDY-NEXT:    movd %edi, %mm1 # sched: [1:1.00]
 ; SANDY-NEXT:    movd (%rsi), %mm2 # sched: [5:0.50]
 ; SANDY-NEXT:    paddd %mm1, %mm2 # sched: [3:1.00]
 ; SANDY-NEXT:    paddd %mm2, %mm0 # sched: [3:1.00]
-; SANDY-NEXT:    movd %mm2, %ecx # sched: [1:0.33]
-; SANDY-NEXT:    movd %mm0, %eax # sched: [1:0.33]
+; SANDY-NEXT:    movd %mm2, %ecx # sched: [2:1.00]
+; SANDY-NEXT:    movd %mm0, %eax # sched: [2:1.00]
 ; SANDY-NEXT:    movl %ecx, (%rsi) # sched: [1:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
@@ -724,12 +724,12 @@ define i32 @test_movd(x86_mmx %a0, i32 %
 ;
 ; BTVER2-LABEL: test_movd:
 ; BTVER2:       # %bb.0:
+; BTVER2-NEXT:    movd %edi, %mm1 # sched: [8:0.50]
 ; BTVER2-NEXT:    movd (%rsi), %mm2 # sched: [5:1.00]
-; BTVER2-NEXT:    movd %edi, %mm1 # sched: [1:0.50]
 ; BTVER2-NEXT:    paddd %mm1, %mm2 # sched: [1:0.50]
 ; BTVER2-NEXT:    paddd %mm2, %mm0 # sched: [1:0.50]
-; BTVER2-NEXT:    movd %mm2, %ecx # sched: [1:0.50]
-; BTVER2-NEXT:    movd %mm0, %eax # sched: [1:0.50]
+; BTVER2-NEXT:    movd %mm2, %ecx # sched: [4:1.00]
+; BTVER2-NEXT:    movd %mm0, %eax # sched: [4:1.00]
 ; BTVER2-NEXT:    movl %ecx, (%rsi) # sched: [1:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
@@ -763,7 +763,7 @@ define i64 @test_movdq2q(<2 x i64> %a0)
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    movdq2q %xmm0, %mm0 # sched: [2:1.00]
 ; GENERIC-NEXT:    paddd %mm0, %mm0 # sched: [3:1.00]
-; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_movdq2q:
@@ -784,7 +784,7 @@ define i64 @test_movdq2q(<2 x i64> %a0)
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    movdq2q %xmm0, %mm0 # sched: [2:1.00]
 ; SANDY-NEXT:    paddd %mm0, %mm0 # sched: [3:1.00]
-; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_movdq2q:
@@ -819,7 +819,7 @@ define i64 @test_movdq2q(<2 x i64> %a0)
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    movdq2q %xmm0, %mm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    paddd %mm0, %mm0 # sched: [1:0.50]
-; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_movdq2q:
@@ -1030,7 +1030,7 @@ define i64 @test_pabsb(x86_mmx *%a0) opt
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    pabsb (%rdi), %mm0 # sched: [6:0.50]
 ; GENERIC-NEXT:    pabsb %mm0, %mm0 # sched: [1:0.50]
-; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_pabsb:
@@ -1051,7 +1051,7 @@ define i64 @test_pabsb(x86_mmx *%a0) opt
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    pabsb (%rdi), %mm0 # sched: [6:0.50]
 ; SANDY-NEXT:    pabsb %mm0, %mm0 # sched: [1:0.50]
-; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pabsb:
@@ -1086,7 +1086,7 @@ define i64 @test_pabsb(x86_mmx *%a0) opt
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    pabsb (%rdi), %mm0 # sched: [6:1.00]
 ; BTVER2-NEXT:    pabsb %mm0, %mm0 # sched: [1:0.50]
-; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_pabsb:
@@ -1108,7 +1108,7 @@ define i64 @test_pabsd(x86_mmx *%a0) opt
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    pabsd (%rdi), %mm0 # sched: [6:0.50]
 ; GENERIC-NEXT:    pabsd %mm0, %mm0 # sched: [1:0.50]
-; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_pabsd:
@@ -1129,7 +1129,7 @@ define i64 @test_pabsd(x86_mmx *%a0) opt
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    pabsd (%rdi), %mm0 # sched: [6:0.50]
 ; SANDY-NEXT:    pabsd %mm0, %mm0 # sched: [1:0.50]
-; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pabsd:
@@ -1164,7 +1164,7 @@ define i64 @test_pabsd(x86_mmx *%a0) opt
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    pabsd (%rdi), %mm0 # sched: [6:1.00]
 ; BTVER2-NEXT:    pabsd %mm0, %mm0 # sched: [1:0.50]
-; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_pabsd:
@@ -1186,7 +1186,7 @@ define i64 @test_pabsw(x86_mmx *%a0) opt
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    pabsw (%rdi), %mm0 # sched: [6:0.50]
 ; GENERIC-NEXT:    pabsw %mm0, %mm0 # sched: [1:0.50]
-; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_pabsw:
@@ -1207,7 +1207,7 @@ define i64 @test_pabsw(x86_mmx *%a0) opt
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    pabsw (%rdi), %mm0 # sched: [6:0.50]
 ; SANDY-NEXT:    pabsw %mm0, %mm0 # sched: [1:0.50]
-; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pabsw:
@@ -1242,7 +1242,7 @@ define i64 @test_pabsw(x86_mmx *%a0) opt
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    pabsw (%rdi), %mm0 # sched: [6:1.00]
 ; BTVER2-NEXT:    pabsw %mm0, %mm0 # sched: [1:0.50]
-; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_pabsw:
@@ -1264,7 +1264,7 @@ define i64 @test_packssdw(x86_mmx %a0, x
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    packssdw %mm1, %mm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    packssdw (%rdi), %mm0 # sched: [6:1.00]
-; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_packssdw:
@@ -1285,7 +1285,7 @@ define i64 @test_packssdw(x86_mmx %a0, x
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    packssdw %mm1, %mm0 # sched: [1:1.00]
 ; SANDY-NEXT:    packssdw (%rdi), %mm0 # sched: [6:1.00]
-; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_packssdw:
@@ -1320,7 +1320,7 @@ define i64 @test_packssdw(x86_mmx %a0, x
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    packssdw %mm1, %mm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    packssdw (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_packssdw:
@@ -1342,7 +1342,7 @@ define i64 @test_packsswb(x86_mmx %a0, x
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    packsswb %mm1, %mm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    packsswb (%rdi), %mm0 # sched: [6:1.00]
-; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_packsswb:
@@ -1363,7 +1363,7 @@ define i64 @test_packsswb(x86_mmx %a0, x
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    packsswb %mm1, %mm0 # sched: [1:1.00]
 ; SANDY-NEXT:    packsswb (%rdi), %mm0 # sched: [6:1.00]
-; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_packsswb:
@@ -1398,7 +1398,7 @@ define i64 @test_packsswb(x86_mmx %a0, x
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    packsswb %mm1, %mm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    packsswb (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_packsswb:
@@ -1420,7 +1420,7 @@ define i64 @test_packuswb(x86_mmx %a0, x
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    packuswb %mm1, %mm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    packuswb (%rdi), %mm0 # sched: [6:1.00]
-; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_packuswb:
@@ -1441,7 +1441,7 @@ define i64 @test_packuswb(x86_mmx %a0, x
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    packuswb %mm1, %mm0 # sched: [1:1.00]
 ; SANDY-NEXT:    packuswb (%rdi), %mm0 # sched: [6:1.00]
-; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_packuswb:
@@ -1476,7 +1476,7 @@ define i64 @test_packuswb(x86_mmx %a0, x
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    packuswb %mm1, %mm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    packuswb (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_packuswb:
@@ -1498,7 +1498,7 @@ define i64 @test_paddb(x86_mmx %a0, x86_
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    paddb %mm1, %mm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    paddb (%rdi), %mm0 # sched: [8:1.00]
-; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_paddb:
@@ -1519,7 +1519,7 @@ define i64 @test_paddb(x86_mmx %a0, x86_
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    paddb %mm1, %mm0 # sched: [3:1.00]
 ; SANDY-NEXT:    paddb (%rdi), %mm0 # sched: [8:1.00]
-; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_paddb:
@@ -1554,7 +1554,7 @@ define i64 @test_paddb(x86_mmx %a0, x86_
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    paddb %mm1, %mm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    paddb (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_paddb:
@@ -1576,7 +1576,7 @@ define i64 @test_paddd(x86_mmx %a0, x86_
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    paddd %mm1, %mm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    paddd (%rdi), %mm0 # sched: [8:1.00]
-; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_paddd:
@@ -1597,7 +1597,7 @@ define i64 @test_paddd(x86_mmx %a0, x86_
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    paddd %mm1, %mm0 # sched: [3:1.00]
 ; SANDY-NEXT:    paddd (%rdi), %mm0 # sched: [8:1.00]
-; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_paddd:
@@ -1632,7 +1632,7 @@ define i64 @test_paddd(x86_mmx %a0, x86_
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    paddd %mm1, %mm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    paddd (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_paddd:
@@ -1654,7 +1654,7 @@ define i64 @test_paddq(x86_mmx %a0, x86_
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    paddq %mm1, %mm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    paddq (%rdi), %mm0 # sched: [7:0.50]
-; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_paddq:
@@ -1675,7 +1675,7 @@ define i64 @test_paddq(x86_mmx %a0, x86_
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    paddq %mm1, %mm0 # sched: [1:0.50]
 ; SANDY-NEXT:    paddq (%rdi), %mm0 # sched: [7:0.50]
-; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_paddq:
@@ -1710,7 +1710,7 @@ define i64 @test_paddq(x86_mmx %a0, x86_
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    paddq %mm1, %mm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    paddq (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_paddq:
@@ -1732,7 +1732,7 @@ define i64 @test_paddsb(x86_mmx %a0, x86
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    paddsb %mm1, %mm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    paddsb (%rdi), %mm0 # sched: [8:1.00]
-; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_paddsb:
@@ -1753,7 +1753,7 @@ define i64 @test_paddsb(x86_mmx %a0, x86
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    paddsb %mm1, %mm0 # sched: [3:1.00]
 ; SANDY-NEXT:    paddsb (%rdi), %mm0 # sched: [8:1.00]
-; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_paddsb:
@@ -1788,7 +1788,7 @@ define i64 @test_paddsb(x86_mmx %a0, x86
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    paddsb %mm1, %mm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    paddsb (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_paddsb:
@@ -1810,7 +1810,7 @@ define i64 @test_paddsw(x86_mmx %a0, x86
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    paddsw %mm1, %mm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    paddsw (%rdi), %mm0 # sched: [8:1.00]
-; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_paddsw:
@@ -1831,7 +1831,7 @@ define i64 @test_paddsw(x86_mmx %a0, x86
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    paddsw %mm1, %mm0 # sched: [3:1.00]
 ; SANDY-NEXT:    paddsw (%rdi), %mm0 # sched: [8:1.00]
-; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_paddsw:
@@ -1866,7 +1866,7 @@ define i64 @test_paddsw(x86_mmx %a0, x86
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    paddsw %mm1, %mm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    paddsw (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_paddsw:
@@ -1888,7 +1888,7 @@ define i64 @test_paddusb(x86_mmx %a0, x8
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    paddusb %mm1, %mm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    paddusb (%rdi), %mm0 # sched: [8:1.00]
-; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_paddusb:
@@ -1909,7 +1909,7 @@ define i64 @test_paddusb(x86_mmx %a0, x8
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    paddusb %mm1, %mm0 # sched: [3:1.00]
 ; SANDY-NEXT:    paddusb (%rdi), %mm0 # sched: [8:1.00]
-; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_paddusb:
@@ -1944,7 +1944,7 @@ define i64 @test_paddusb(x86_mmx %a0, x8
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    paddusb %mm1, %mm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    paddusb (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_paddusb:
@@ -1966,7 +1966,7 @@ define i64 @test_paddusw(x86_mmx %a0, x8
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    paddusw %mm1, %mm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    paddusw (%rdi), %mm0 # sched: [8:1.00]
-; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_paddusw:
@@ -1987,7 +1987,7 @@ define i64 @test_paddusw(x86_mmx %a0, x8
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    paddusw %mm1, %mm0 # sched: [3:1.00]
 ; SANDY-NEXT:    paddusw (%rdi), %mm0 # sched: [8:1.00]
-; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_paddusw:
@@ -2022,7 +2022,7 @@ define i64 @test_paddusw(x86_mmx %a0, x8
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    paddusw %mm1, %mm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    paddusw (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_paddusw:
@@ -2044,7 +2044,7 @@ define i64 @test_paddw(x86_mmx %a0, x86_
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    paddw %mm1, %mm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    paddw (%rdi), %mm0 # sched: [8:1.00]
-; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_paddw:
@@ -2065,7 +2065,7 @@ define i64 @test_paddw(x86_mmx %a0, x86_
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    paddw %mm1, %mm0 # sched: [3:1.00]
 ; SANDY-NEXT:    paddw (%rdi), %mm0 # sched: [8:1.00]
-; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_paddw:
@@ -2100,7 +2100,7 @@ define i64 @test_paddw(x86_mmx %a0, x86_
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    paddw %mm1, %mm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    paddw (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_paddw:
@@ -2122,7 +2122,7 @@ define i64 @test_palignr(x86_mmx %a0, x8
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    palignr $1, %mm1, %mm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    palignr $1, (%rdi), %mm0 # sched: [6:0.50]
-; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_palignr:
@@ -2143,7 +2143,7 @@ define i64 @test_palignr(x86_mmx %a0, x8
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    palignr $1, %mm1, %mm0 # sched: [1:0.50]
 ; SANDY-NEXT:    palignr $1, (%rdi), %mm0 # sched: [6:0.50]
-; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_palignr:
@@ -2178,7 +2178,7 @@ define i64 @test_palignr(x86_mmx %a0, x8
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    palignr $1, %mm1, %mm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    palignr $1, (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_palignr:
@@ -2200,7 +2200,7 @@ define i64 @test_pand(x86_mmx %a0, x86_m
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    pand %mm1, %mm0 # sched: [1:0.33]
 ; GENERIC-NEXT:    pand (%rdi), %mm0 # sched: [6:0.50]
-; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_pand:
@@ -2221,7 +2221,7 @@ define i64 @test_pand(x86_mmx %a0, x86_m
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    pand %mm1, %mm0 # sched: [1:0.33]
 ; SANDY-NEXT:    pand (%rdi), %mm0 # sched: [6:0.50]
-; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pand:
@@ -2256,7 +2256,7 @@ define i64 @test_pand(x86_mmx %a0, x86_m
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    pand %mm1, %mm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    pand (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_pand:
@@ -2278,7 +2278,7 @@ define i64 @test_pandn(x86_mmx %a0, x86_
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    pandn %mm1, %mm0 # sched: [1:0.33]
 ; GENERIC-NEXT:    pandn (%rdi), %mm0 # sched: [6:0.50]
-; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_pandn:
@@ -2299,7 +2299,7 @@ define i64 @test_pandn(x86_mmx %a0, x86_
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    pandn %mm1, %mm0 # sched: [1:0.33]
 ; SANDY-NEXT:    pandn (%rdi), %mm0 # sched: [6:0.50]
-; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pandn:
@@ -2334,7 +2334,7 @@ define i64 @test_pandn(x86_mmx %a0, x86_
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    pandn %mm1, %mm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    pandn (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_pandn:
@@ -2356,7 +2356,7 @@ define i64 @test_pavgb(x86_mmx %a0, x86_
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    pavgb %mm1, %mm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    pavgb (%rdi), %mm0 # sched: [8:1.00]
-; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_pavgb:
@@ -2377,7 +2377,7 @@ define i64 @test_pavgb(x86_mmx %a0, x86_
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    pavgb %mm1, %mm0 # sched: [3:1.00]
 ; SANDY-NEXT:    pavgb (%rdi), %mm0 # sched: [8:1.00]
-; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pavgb:
@@ -2412,7 +2412,7 @@ define i64 @test_pavgb(x86_mmx %a0, x86_
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    pavgb %mm1, %mm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    pavgb (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_pavgb:
@@ -2434,7 +2434,7 @@ define i64 @test_pavgw(x86_mmx %a0, x86_
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    pavgw %mm1, %mm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    pavgw (%rdi), %mm0 # sched: [8:1.00]
-; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_pavgw:
@@ -2455,7 +2455,7 @@ define i64 @test_pavgw(x86_mmx %a0, x86_
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    pavgw %mm1, %mm0 # sched: [3:1.00]
 ; SANDY-NEXT:    pavgw (%rdi), %mm0 # sched: [8:1.00]
-; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pavgw:
@@ -2490,7 +2490,7 @@ define i64 @test_pavgw(x86_mmx %a0, x86_
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    pavgw %mm1, %mm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    pavgw (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_pavgw:
@@ -2512,7 +2512,7 @@ define i64 @test_pcmpeqb(x86_mmx %a0, x8
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    pcmpeqb %mm1, %mm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    pcmpeqb (%rdi), %mm0 # sched: [8:1.00]
-; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_pcmpeqb:
@@ -2533,7 +2533,7 @@ define i64 @test_pcmpeqb(x86_mmx %a0, x8
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    pcmpeqb %mm1, %mm0 # sched: [3:1.00]
 ; SANDY-NEXT:    pcmpeqb (%rdi), %mm0 # sched: [8:1.00]
-; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pcmpeqb:
@@ -2568,7 +2568,7 @@ define i64 @test_pcmpeqb(x86_mmx %a0, x8
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    pcmpeqb %mm1, %mm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    pcmpeqb (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_pcmpeqb:
@@ -2590,7 +2590,7 @@ define i64 @test_pcmpeqd(x86_mmx %a0, x8
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    pcmpeqd %mm1, %mm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    pcmpeqd (%rdi), %mm0 # sched: [8:1.00]
-; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_pcmpeqd:
@@ -2611,7 +2611,7 @@ define i64 @test_pcmpeqd(x86_mmx %a0, x8
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    pcmpeqd %mm1, %mm0 # sched: [3:1.00]
 ; SANDY-NEXT:    pcmpeqd (%rdi), %mm0 # sched: [8:1.00]
-; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pcmpeqd:
@@ -2646,7 +2646,7 @@ define i64 @test_pcmpeqd(x86_mmx %a0, x8
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    pcmpeqd %mm1, %mm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    pcmpeqd (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_pcmpeqd:
@@ -2668,7 +2668,7 @@ define i64 @test_pcmpeqw(x86_mmx %a0, x8
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    pcmpeqw %mm1, %mm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    pcmpeqw (%rdi), %mm0 # sched: [8:1.00]
-; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_pcmpeqw:
@@ -2689,7 +2689,7 @@ define i64 @test_pcmpeqw(x86_mmx %a0, x8
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    pcmpeqw %mm1, %mm0 # sched: [3:1.00]
 ; SANDY-NEXT:    pcmpeqw (%rdi), %mm0 # sched: [8:1.00]
-; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pcmpeqw:
@@ -2724,7 +2724,7 @@ define i64 @test_pcmpeqw(x86_mmx %a0, x8
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    pcmpeqw %mm1, %mm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    pcmpeqw (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_pcmpeqw:
@@ -2746,7 +2746,7 @@ define i64 @test_pcmpgtb(x86_mmx %a0, x8
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    pcmpgtb %mm1, %mm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    pcmpgtb (%rdi), %mm0 # sched: [8:1.00]
-; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_pcmpgtb:
@@ -2767,7 +2767,7 @@ define i64 @test_pcmpgtb(x86_mmx %a0, x8
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    pcmpgtb %mm1, %mm0 # sched: [3:1.00]
 ; SANDY-NEXT:    pcmpgtb (%rdi), %mm0 # sched: [8:1.00]
-; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pcmpgtb:
@@ -2802,7 +2802,7 @@ define i64 @test_pcmpgtb(x86_mmx %a0, x8
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    pcmpgtb %mm1, %mm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    pcmpgtb (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_pcmpgtb:
@@ -2824,7 +2824,7 @@ define i64 @test_pcmpgtd(x86_mmx %a0, x8
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    pcmpgtd %mm1, %mm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    pcmpgtd (%rdi), %mm0 # sched: [8:1.00]
-; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_pcmpgtd:
@@ -2845,7 +2845,7 @@ define i64 @test_pcmpgtd(x86_mmx %a0, x8
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    pcmpgtd %mm1, %mm0 # sched: [3:1.00]
 ; SANDY-NEXT:    pcmpgtd (%rdi), %mm0 # sched: [8:1.00]
-; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pcmpgtd:
@@ -2880,7 +2880,7 @@ define i64 @test_pcmpgtd(x86_mmx %a0, x8
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    pcmpgtd %mm1, %mm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    pcmpgtd (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_pcmpgtd:
@@ -2902,7 +2902,7 @@ define i64 @test_pcmpgtw(x86_mmx %a0, x8
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    pcmpgtw %mm1, %mm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    pcmpgtw (%rdi), %mm0 # sched: [8:1.00]
-; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_pcmpgtw:
@@ -2923,7 +2923,7 @@ define i64 @test_pcmpgtw(x86_mmx %a0, x8
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    pcmpgtw %mm1, %mm0 # sched: [3:1.00]
 ; SANDY-NEXT:    pcmpgtw (%rdi), %mm0 # sched: [8:1.00]
-; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pcmpgtw:
@@ -2958,7 +2958,7 @@ define i64 @test_pcmpgtw(x86_mmx %a0, x8
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    pcmpgtw %mm1, %mm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    pcmpgtw (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_pcmpgtw:
@@ -3035,7 +3035,7 @@ define i64 @test_phaddd(x86_mmx %a0, x86
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    phaddd %mm1, %mm0 # sched: [3:1.50]
 ; GENERIC-NEXT:    phaddd (%rdi), %mm0 # sched: [8:1.50]
-; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_phaddd:
@@ -3056,7 +3056,7 @@ define i64 @test_phaddd(x86_mmx %a0, x86
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    phaddd %mm1, %mm0 # sched: [3:1.50]
 ; SANDY-NEXT:    phaddd (%rdi), %mm0 # sched: [8:1.50]
-; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_phaddd:
@@ -3091,7 +3091,7 @@ define i64 @test_phaddd(x86_mmx %a0, x86
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    phaddd %mm1, %mm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    phaddd (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_phaddd:
@@ -3113,7 +3113,7 @@ define i64 @test_phaddsw(x86_mmx %a0, x8
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    phaddsw %mm1, %mm0 # sched: [3:1.50]
 ; GENERIC-NEXT:    phaddsw (%rdi), %mm0 # sched: [8:1.50]
-; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_phaddsw:
@@ -3134,7 +3134,7 @@ define i64 @test_phaddsw(x86_mmx %a0, x8
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    phaddsw %mm1, %mm0 # sched: [3:1.50]
 ; SANDY-NEXT:    phaddsw (%rdi), %mm0 # sched: [8:1.50]
-; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_phaddsw:
@@ -3169,7 +3169,7 @@ define i64 @test_phaddsw(x86_mmx %a0, x8
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    phaddsw %mm1, %mm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    phaddsw (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_phaddsw:
@@ -3191,7 +3191,7 @@ define i64 @test_phaddw(x86_mmx %a0, x86
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    phaddw %mm1, %mm0 # sched: [3:1.50]
 ; GENERIC-NEXT:    phaddw (%rdi), %mm0 # sched: [8:1.50]
-; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_phaddw:
@@ -3212,7 +3212,7 @@ define i64 @test_phaddw(x86_mmx %a0, x86
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    phaddw %mm1, %mm0 # sched: [3:1.50]
 ; SANDY-NEXT:    phaddw (%rdi), %mm0 # sched: [8:1.50]
-; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_phaddw:
@@ -3247,7 +3247,7 @@ define i64 @test_phaddw(x86_mmx %a0, x86
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    phaddw %mm1, %mm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    phaddw (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_phaddw:
@@ -3269,7 +3269,7 @@ define i64 @test_phsubd(x86_mmx %a0, x86
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    phsubd %mm1, %mm0 # sched: [3:1.50]
 ; GENERIC-NEXT:    phsubd (%rdi), %mm0 # sched: [8:1.50]
-; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_phsubd:
@@ -3290,7 +3290,7 @@ define i64 @test_phsubd(x86_mmx %a0, x86
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    phsubd %mm1, %mm0 # sched: [3:1.50]
 ; SANDY-NEXT:    phsubd (%rdi), %mm0 # sched: [8:1.50]
-; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_phsubd:
@@ -3325,7 +3325,7 @@ define i64 @test_phsubd(x86_mmx %a0, x86
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    phsubd %mm1, %mm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    phsubd (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_phsubd:
@@ -3347,7 +3347,7 @@ define i64 @test_phsubsw(x86_mmx %a0, x8
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    phsubsw %mm1, %mm0 # sched: [3:1.50]
 ; GENERIC-NEXT:    phsubsw (%rdi), %mm0 # sched: [8:1.50]
-; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_phsubsw:
@@ -3368,7 +3368,7 @@ define i64 @test_phsubsw(x86_mmx %a0, x8
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    phsubsw %mm1, %mm0 # sched: [3:1.50]
 ; SANDY-NEXT:    phsubsw (%rdi), %mm0 # sched: [8:1.50]
-; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_phsubsw:
@@ -3403,7 +3403,7 @@ define i64 @test_phsubsw(x86_mmx %a0, x8
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    phsubsw %mm1, %mm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    phsubsw (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_phsubsw:
@@ -3425,7 +3425,7 @@ define i64 @test_phsubw(x86_mmx %a0, x86
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    phsubw %mm1, %mm0 # sched: [3:1.50]
 ; GENERIC-NEXT:    phsubw (%rdi), %mm0 # sched: [8:1.50]
-; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_phsubw:
@@ -3446,7 +3446,7 @@ define i64 @test_phsubw(x86_mmx %a0, x86
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    phsubw %mm1, %mm0 # sched: [3:1.50]
 ; SANDY-NEXT:    phsubw (%rdi), %mm0 # sched: [8:1.50]
-; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_phsubw:
@@ -3481,7 +3481,7 @@ define i64 @test_phsubw(x86_mmx %a0, x86
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    phsubw %mm1, %mm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    phsubw (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_phsubw:
@@ -3504,7 +3504,7 @@ define i64 @test_pinsrw(x86_mmx %a0, i32
 ; GENERIC-NEXT:    pinsrw $0, %edi, %mm0 # sched: [2:1.00]
 ; GENERIC-NEXT:    movswl (%rsi), %eax # sched: [5:0.50]
 ; GENERIC-NEXT:    pinsrw $1, %eax, %mm0 # sched: [2:1.00]
-; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_pinsrw:
@@ -3528,7 +3528,7 @@ define i64 @test_pinsrw(x86_mmx %a0, i32
 ; SANDY-NEXT:    pinsrw $0, %edi, %mm0 # sched: [2:1.00]
 ; SANDY-NEXT:    movswl (%rsi), %eax # sched: [5:0.50]
 ; SANDY-NEXT:    pinsrw $1, %eax, %mm0 # sched: [2:1.00]
-; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pinsrw:
@@ -3568,7 +3568,7 @@ define i64 @test_pinsrw(x86_mmx %a0, i32
 ; BTVER2-NEXT:    pinsrw $0, %edi, %mm0 # sched: [7:0.50]
 ; BTVER2-NEXT:    movswl (%rsi), %eax # sched: [4:1.00]
 ; BTVER2-NEXT:    pinsrw $1, %eax, %mm0 # sched: [7:0.50]
-; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_pinsrw:
@@ -3592,7 +3592,7 @@ define i64 @test_pmaddwd(x86_mmx %a0, x8
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    pmaddwd %mm1, %mm0 # sched: [5:1.00]
 ; GENERIC-NEXT:    pmaddwd (%rdi), %mm0 # sched: [10:1.00]
-; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_pmaddwd:
@@ -3613,7 +3613,7 @@ define i64 @test_pmaddwd(x86_mmx %a0, x8
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    pmaddwd %mm1, %mm0 # sched: [5:1.00]
 ; SANDY-NEXT:    pmaddwd (%rdi), %mm0 # sched: [10:1.00]
-; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pmaddwd:
@@ -3648,7 +3648,7 @@ define i64 @test_pmaddwd(x86_mmx %a0, x8
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    pmaddwd %mm1, %mm0 # sched: [2:1.00]
 ; BTVER2-NEXT:    pmaddwd (%rdi), %mm0 # sched: [7:1.00]
-; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_pmaddwd:
@@ -3670,7 +3670,7 @@ define i64 @test_pmaddubsw(x86_mmx %a0,
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    pmaddubsw %mm1, %mm0 # sched: [5:1.00]
 ; GENERIC-NEXT:    pmaddubsw (%rdi), %mm0 # sched: [10:1.00]
-; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_pmaddubsw:
@@ -3691,7 +3691,7 @@ define i64 @test_pmaddubsw(x86_mmx %a0,
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    pmaddubsw %mm1, %mm0 # sched: [5:1.00]
 ; SANDY-NEXT:    pmaddubsw (%rdi), %mm0 # sched: [10:1.00]
-; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pmaddubsw:
@@ -3726,7 +3726,7 @@ define i64 @test_pmaddubsw(x86_mmx %a0,
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    pmaddubsw %mm1, %mm0 # sched: [2:1.00]
 ; BTVER2-NEXT:    pmaddubsw (%rdi), %mm0 # sched: [7:1.00]
-; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_pmaddubsw:
@@ -3748,7 +3748,7 @@ define i64 @test_pmaxsw(x86_mmx %a0, x86
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    pmaxsw %mm1, %mm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    pmaxsw (%rdi), %mm0 # sched: [8:1.00]
-; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_pmaxsw:
@@ -3769,7 +3769,7 @@ define i64 @test_pmaxsw(x86_mmx %a0, x86
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    pmaxsw %mm1, %mm0 # sched: [3:1.00]
 ; SANDY-NEXT:    pmaxsw (%rdi), %mm0 # sched: [8:1.00]
-; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pmaxsw:
@@ -3804,7 +3804,7 @@ define i64 @test_pmaxsw(x86_mmx %a0, x86
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    pmaxsw %mm1, %mm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    pmaxsw (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_pmaxsw:
@@ -3826,7 +3826,7 @@ define i64 @test_pmaxub(x86_mmx %a0, x86
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    pmaxub %mm1, %mm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    pmaxub (%rdi), %mm0 # sched: [8:1.00]
-; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_pmaxub:
@@ -3847,7 +3847,7 @@ define i64 @test_pmaxub(x86_mmx %a0, x86
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    pmaxub %mm1, %mm0 # sched: [3:1.00]
 ; SANDY-NEXT:    pmaxub (%rdi), %mm0 # sched: [8:1.00]
-; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pmaxub:
@@ -3882,7 +3882,7 @@ define i64 @test_pmaxub(x86_mmx %a0, x86
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    pmaxub %mm1, %mm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    pmaxub (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_pmaxub:
@@ -3904,7 +3904,7 @@ define i64 @test_pminsw(x86_mmx %a0, x86
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    pminsw %mm1, %mm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    pminsw (%rdi), %mm0 # sched: [8:1.00]
-; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_pminsw:
@@ -3925,7 +3925,7 @@ define i64 @test_pminsw(x86_mmx %a0, x86
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    pminsw %mm1, %mm0 # sched: [3:1.00]
 ; SANDY-NEXT:    pminsw (%rdi), %mm0 # sched: [8:1.00]
-; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pminsw:
@@ -3960,7 +3960,7 @@ define i64 @test_pminsw(x86_mmx %a0, x86
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    pminsw %mm1, %mm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    pminsw (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_pminsw:
@@ -3982,7 +3982,7 @@ define i64 @test_pminub(x86_mmx %a0, x86
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    pminub %mm1, %mm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    pminub (%rdi), %mm0 # sched: [8:1.00]
-; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_pminub:
@@ -4003,7 +4003,7 @@ define i64 @test_pminub(x86_mmx %a0, x86
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    pminub %mm1, %mm0 # sched: [3:1.00]
 ; SANDY-NEXT:    pminub (%rdi), %mm0 # sched: [8:1.00]
-; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pminub:
@@ -4038,7 +4038,7 @@ define i64 @test_pminub(x86_mmx %a0, x86
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    pminub %mm1, %mm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    pminub (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_pminub:
@@ -4115,7 +4115,7 @@ define i64 @test_pmulhrsw(x86_mmx %a0, x
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    pmulhrsw %mm1, %mm0 # sched: [5:1.00]
 ; GENERIC-NEXT:    pmulhrsw (%rdi), %mm0 # sched: [10:1.00]
-; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_pmulhrsw:
@@ -4136,7 +4136,7 @@ define i64 @test_pmulhrsw(x86_mmx %a0, x
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    pmulhrsw %mm1, %mm0 # sched: [5:1.00]
 ; SANDY-NEXT:    pmulhrsw (%rdi), %mm0 # sched: [10:1.00]
-; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pmulhrsw:
@@ -4171,7 +4171,7 @@ define i64 @test_pmulhrsw(x86_mmx %a0, x
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    pmulhrsw %mm1, %mm0 # sched: [2:1.00]
 ; BTVER2-NEXT:    pmulhrsw (%rdi), %mm0 # sched: [7:1.00]
-; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_pmulhrsw:
@@ -4193,7 +4193,7 @@ define i64 @test_pmulhw(x86_mmx %a0, x86
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    pmulhw %mm1, %mm0 # sched: [5:1.00]
 ; GENERIC-NEXT:    pmulhw (%rdi), %mm0 # sched: [10:1.00]
-; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_pmulhw:
@@ -4214,7 +4214,7 @@ define i64 @test_pmulhw(x86_mmx %a0, x86
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    pmulhw %mm1, %mm0 # sched: [5:1.00]
 ; SANDY-NEXT:    pmulhw (%rdi), %mm0 # sched: [10:1.00]
-; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pmulhw:
@@ -4249,7 +4249,7 @@ define i64 @test_pmulhw(x86_mmx %a0, x86
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    pmulhw %mm1, %mm0 # sched: [2:1.00]
 ; BTVER2-NEXT:    pmulhw (%rdi), %mm0 # sched: [7:1.00]
-; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_pmulhw:
@@ -4271,7 +4271,7 @@ define i64 @test_pmulhuw(x86_mmx %a0, x8
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    pmulhuw %mm1, %mm0 # sched: [5:1.00]
 ; GENERIC-NEXT:    pmulhuw (%rdi), %mm0 # sched: [10:1.00]
-; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_pmulhuw:
@@ -4292,7 +4292,7 @@ define i64 @test_pmulhuw(x86_mmx %a0, x8
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    pmulhuw %mm1, %mm0 # sched: [5:1.00]
 ; SANDY-NEXT:    pmulhuw (%rdi), %mm0 # sched: [10:1.00]
-; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pmulhuw:
@@ -4327,7 +4327,7 @@ define i64 @test_pmulhuw(x86_mmx %a0, x8
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    pmulhuw %mm1, %mm0 # sched: [2:1.00]
 ; BTVER2-NEXT:    pmulhuw (%rdi), %mm0 # sched: [7:1.00]
-; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_pmulhuw:
@@ -4349,7 +4349,7 @@ define i64 @test_pmullw(x86_mmx %a0, x86
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    pmullw %mm1, %mm0 # sched: [5:1.00]
 ; GENERIC-NEXT:    pmullw (%rdi), %mm0 # sched: [10:1.00]
-; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_pmullw:
@@ -4370,7 +4370,7 @@ define i64 @test_pmullw(x86_mmx %a0, x86
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    pmullw %mm1, %mm0 # sched: [5:1.00]
 ; SANDY-NEXT:    pmullw (%rdi), %mm0 # sched: [10:1.00]
-; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pmullw:
@@ -4405,7 +4405,7 @@ define i64 @test_pmullw(x86_mmx %a0, x86
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    pmullw %mm1, %mm0 # sched: [2:1.00]
 ; BTVER2-NEXT:    pmullw (%rdi), %mm0 # sched: [7:1.00]
-; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_pmullw:
@@ -4427,7 +4427,7 @@ define i64 @test_pmuludq(x86_mmx %a0, x8
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    pmuludq %mm1, %mm0 # sched: [5:1.00]
 ; GENERIC-NEXT:    pmuludq (%rdi), %mm0 # sched: [10:1.00]
-; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_pmuludq:
@@ -4448,7 +4448,7 @@ define i64 @test_pmuludq(x86_mmx %a0, x8
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    pmuludq %mm1, %mm0 # sched: [5:1.00]
 ; SANDY-NEXT:    pmuludq (%rdi), %mm0 # sched: [10:1.00]
-; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pmuludq:
@@ -4483,7 +4483,7 @@ define i64 @test_pmuludq(x86_mmx %a0, x8
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    pmuludq %mm1, %mm0 # sched: [2:1.00]
 ; BTVER2-NEXT:    pmuludq (%rdi), %mm0 # sched: [7:1.00]
-; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_pmuludq:
@@ -4505,7 +4505,7 @@ define i64 @test_por(x86_mmx %a0, x86_mm
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    por %mm1, %mm0 # sched: [1:0.33]
 ; GENERIC-NEXT:    por (%rdi), %mm0 # sched: [6:0.50]
-; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_por:
@@ -4526,7 +4526,7 @@ define i64 @test_por(x86_mmx %a0, x86_mm
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    por %mm1, %mm0 # sched: [1:0.33]
 ; SANDY-NEXT:    por (%rdi), %mm0 # sched: [6:0.50]
-; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_por:
@@ -4561,7 +4561,7 @@ define i64 @test_por(x86_mmx %a0, x86_mm
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    por %mm1, %mm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    por (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_por:
@@ -4583,7 +4583,7 @@ define i64 @test_psadbw(x86_mmx %a0, x86
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    psadbw %mm1, %mm0 # sched: [5:1.00]
 ; GENERIC-NEXT:    psadbw (%rdi), %mm0 # sched: [10:1.00]
-; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_psadbw:
@@ -4604,7 +4604,7 @@ define i64 @test_psadbw(x86_mmx %a0, x86
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    psadbw %mm1, %mm0 # sched: [5:1.00]
 ; SANDY-NEXT:    psadbw (%rdi), %mm0 # sched: [10:1.00]
-; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_psadbw:
@@ -4639,7 +4639,7 @@ define i64 @test_psadbw(x86_mmx %a0, x86
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    psadbw %mm1, %mm0 # sched: [2:0.50]
 ; BTVER2-NEXT:    psadbw (%rdi), %mm0 # sched: [7:1.00]
-; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_psadbw:
@@ -4661,7 +4661,7 @@ define i64 @test_pshufb(x86_mmx %a0, x86
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    pshufb %mm1, %mm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    pshufb (%rdi), %mm0 # sched: [6:0.50]
-; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_pshufb:
@@ -4682,7 +4682,7 @@ define i64 @test_pshufb(x86_mmx %a0, x86
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    pshufb %mm1, %mm0 # sched: [1:0.50]
 ; SANDY-NEXT:    pshufb (%rdi), %mm0 # sched: [6:0.50]
-; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pshufb:
@@ -4717,7 +4717,7 @@ define i64 @test_pshufb(x86_mmx %a0, x86
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    pshufb %mm1, %mm0 # sched: [2:2.00]
 ; BTVER2-NEXT:    pshufb (%rdi), %mm0 # sched: [7:2.00]
-; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_pshufb:
@@ -4739,7 +4739,7 @@ define i64 @test_pshufw(x86_mmx *%a0) op
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [6:1.00]
 ; GENERIC-NEXT:    pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] sched: [1:1.00]
-; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_pshufw:
@@ -4760,7 +4760,7 @@ define i64 @test_pshufw(x86_mmx *%a0) op
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [6:1.00]
 ; SANDY-NEXT:    pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] sched: [1:1.00]
-; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pshufw:
@@ -4795,7 +4795,7 @@ define i64 @test_pshufw(x86_mmx *%a0) op
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [6:1.00]
 ; BTVER2-NEXT:    pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] sched: [1:0.50]
-; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_pshufw:
@@ -4817,7 +4817,7 @@ define i64 @test_psignb(x86_mmx %a0, x86
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    psignb %mm1, %mm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    psignb (%rdi), %mm0 # sched: [6:0.50]
-; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_psignb:
@@ -4838,7 +4838,7 @@ define i64 @test_psignb(x86_mmx %a0, x86
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    psignb %mm1, %mm0 # sched: [1:0.50]
 ; SANDY-NEXT:    psignb (%rdi), %mm0 # sched: [6:0.50]
-; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_psignb:
@@ -4873,7 +4873,7 @@ define i64 @test_psignb(x86_mmx %a0, x86
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    psignb %mm1, %mm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    psignb (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_psignb:
@@ -4895,7 +4895,7 @@ define i64 @test_psignd(x86_mmx %a0, x86
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    psignd %mm1, %mm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    psignd (%rdi), %mm0 # sched: [6:0.50]
-; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_psignd:
@@ -4916,7 +4916,7 @@ define i64 @test_psignd(x86_mmx %a0, x86
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    psignd %mm1, %mm0 # sched: [1:0.50]
 ; SANDY-NEXT:    psignd (%rdi), %mm0 # sched: [6:0.50]
-; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_psignd:
@@ -4951,7 +4951,7 @@ define i64 @test_psignd(x86_mmx %a0, x86
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    psignd %mm1, %mm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    psignd (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_psignd:
@@ -4973,7 +4973,7 @@ define i64 @test_psignw(x86_mmx %a0, x86
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    psignw %mm1, %mm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    psignw (%rdi), %mm0 # sched: [6:0.50]
-; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_psignw:
@@ -4994,7 +4994,7 @@ define i64 @test_psignw(x86_mmx %a0, x86
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    psignw %mm1, %mm0 # sched: [1:0.50]
 ; SANDY-NEXT:    psignw (%rdi), %mm0 # sched: [6:0.50]
-; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_psignw:
@@ -5029,7 +5029,7 @@ define i64 @test_psignw(x86_mmx %a0, x86
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    psignw %mm1, %mm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    psignw (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_psignw:
@@ -5052,7 +5052,7 @@ define i64 @test_pslld(x86_mmx %a0, x86_
 ; GENERIC-NEXT:    pslld %mm1, %mm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    pslld (%rdi), %mm0 # sched: [6:1.00]
 ; GENERIC-NEXT:    pslld $7, %mm0 # sched: [1:1.00]
-; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_pslld:
@@ -5076,7 +5076,7 @@ define i64 @test_pslld(x86_mmx %a0, x86_
 ; SANDY-NEXT:    pslld %mm1, %mm0 # sched: [1:1.00]
 ; SANDY-NEXT:    pslld (%rdi), %mm0 # sched: [6:1.00]
 ; SANDY-NEXT:    pslld $7, %mm0 # sched: [1:1.00]
-; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pslld:
@@ -5116,7 +5116,7 @@ define i64 @test_pslld(x86_mmx %a0, x86_
 ; BTVER2-NEXT:    pslld %mm1, %mm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    pslld (%rdi), %mm0 # sched: [6:1.00]
 ; BTVER2-NEXT:    pslld $7, %mm0 # sched: [1:0.50]
-; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_pslld:
@@ -5142,7 +5142,7 @@ define i64 @test_psllq(x86_mmx %a0, x86_
 ; GENERIC-NEXT:    psllq %mm1, %mm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    psllq (%rdi), %mm0 # sched: [6:1.00]
 ; GENERIC-NEXT:    psllq $7, %mm0 # sched: [1:1.00]
-; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_psllq:
@@ -5166,7 +5166,7 @@ define i64 @test_psllq(x86_mmx %a0, x86_
 ; SANDY-NEXT:    psllq %mm1, %mm0 # sched: [1:1.00]
 ; SANDY-NEXT:    psllq (%rdi), %mm0 # sched: [6:1.00]
 ; SANDY-NEXT:    psllq $7, %mm0 # sched: [1:1.00]
-; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_psllq:
@@ -5206,7 +5206,7 @@ define i64 @test_psllq(x86_mmx %a0, x86_
 ; BTVER2-NEXT:    psllq %mm1, %mm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    psllq (%rdi), %mm0 # sched: [6:1.00]
 ; BTVER2-NEXT:    psllq $7, %mm0 # sched: [1:0.50]
-; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_psllq:
@@ -5232,7 +5232,7 @@ define i64 @test_psllw(x86_mmx %a0, x86_
 ; GENERIC-NEXT:    psllw %mm1, %mm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    psllw (%rdi), %mm0 # sched: [6:1.00]
 ; GENERIC-NEXT:    psllw $7, %mm0 # sched: [1:1.00]
-; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_psllw:
@@ -5256,7 +5256,7 @@ define i64 @test_psllw(x86_mmx %a0, x86_
 ; SANDY-NEXT:    psllw %mm1, %mm0 # sched: [1:1.00]
 ; SANDY-NEXT:    psllw (%rdi), %mm0 # sched: [6:1.00]
 ; SANDY-NEXT:    psllw $7, %mm0 # sched: [1:1.00]
-; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_psllw:
@@ -5296,7 +5296,7 @@ define i64 @test_psllw(x86_mmx %a0, x86_
 ; BTVER2-NEXT:    psllw %mm1, %mm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    psllw (%rdi), %mm0 # sched: [6:1.00]
 ; BTVER2-NEXT:    psllw $7, %mm0 # sched: [1:0.50]
-; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_psllw:
@@ -5322,7 +5322,7 @@ define i64 @test_psrad(x86_mmx %a0, x86_
 ; GENERIC-NEXT:    psrad %mm1, %mm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    psrad (%rdi), %mm0 # sched: [6:1.00]
 ; GENERIC-NEXT:    psrad $7, %mm0 # sched: [1:1.00]
-; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_psrad:
@@ -5346,7 +5346,7 @@ define i64 @test_psrad(x86_mmx %a0, x86_
 ; SANDY-NEXT:    psrad %mm1, %mm0 # sched: [1:1.00]
 ; SANDY-NEXT:    psrad (%rdi), %mm0 # sched: [6:1.00]
 ; SANDY-NEXT:    psrad $7, %mm0 # sched: [1:1.00]
-; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_psrad:
@@ -5386,7 +5386,7 @@ define i64 @test_psrad(x86_mmx %a0, x86_
 ; BTVER2-NEXT:    psrad %mm1, %mm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    psrad (%rdi), %mm0 # sched: [6:1.00]
 ; BTVER2-NEXT:    psrad $7, %mm0 # sched: [1:0.50]
-; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_psrad:
@@ -5412,7 +5412,7 @@ define i64 @test_psraw(x86_mmx %a0, x86_
 ; GENERIC-NEXT:    psraw %mm1, %mm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    psraw (%rdi), %mm0 # sched: [6:1.00]
 ; GENERIC-NEXT:    psraw $7, %mm0 # sched: [1:1.00]
-; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_psraw:
@@ -5436,7 +5436,7 @@ define i64 @test_psraw(x86_mmx %a0, x86_
 ; SANDY-NEXT:    psraw %mm1, %mm0 # sched: [1:1.00]
 ; SANDY-NEXT:    psraw (%rdi), %mm0 # sched: [6:1.00]
 ; SANDY-NEXT:    psraw $7, %mm0 # sched: [1:1.00]
-; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_psraw:
@@ -5476,7 +5476,7 @@ define i64 @test_psraw(x86_mmx %a0, x86_
 ; BTVER2-NEXT:    psraw %mm1, %mm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    psraw (%rdi), %mm0 # sched: [6:1.00]
 ; BTVER2-NEXT:    psraw $7, %mm0 # sched: [1:0.50]
-; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_psraw:
@@ -5502,7 +5502,7 @@ define i64 @test_psrld(x86_mmx %a0, x86_
 ; GENERIC-NEXT:    psrld %mm1, %mm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    psrld (%rdi), %mm0 # sched: [6:1.00]
 ; GENERIC-NEXT:    psrld $7, %mm0 # sched: [1:1.00]
-; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_psrld:
@@ -5526,7 +5526,7 @@ define i64 @test_psrld(x86_mmx %a0, x86_
 ; SANDY-NEXT:    psrld %mm1, %mm0 # sched: [1:1.00]
 ; SANDY-NEXT:    psrld (%rdi), %mm0 # sched: [6:1.00]
 ; SANDY-NEXT:    psrld $7, %mm0 # sched: [1:1.00]
-; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_psrld:
@@ -5566,7 +5566,7 @@ define i64 @test_psrld(x86_mmx %a0, x86_
 ; BTVER2-NEXT:    psrld %mm1, %mm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    psrld (%rdi), %mm0 # sched: [6:1.00]
 ; BTVER2-NEXT:    psrld $7, %mm0 # sched: [1:0.50]
-; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_psrld:
@@ -5592,7 +5592,7 @@ define i64 @test_psrlq(x86_mmx %a0, x86_
 ; GENERIC-NEXT:    psrlq %mm1, %mm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    psrlq (%rdi), %mm0 # sched: [6:1.00]
 ; GENERIC-NEXT:    psrlq $7, %mm0 # sched: [1:1.00]
-; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_psrlq:
@@ -5616,7 +5616,7 @@ define i64 @test_psrlq(x86_mmx %a0, x86_
 ; SANDY-NEXT:    psrlq %mm1, %mm0 # sched: [1:1.00]
 ; SANDY-NEXT:    psrlq (%rdi), %mm0 # sched: [6:1.00]
 ; SANDY-NEXT:    psrlq $7, %mm0 # sched: [1:1.00]
-; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_psrlq:
@@ -5656,7 +5656,7 @@ define i64 @test_psrlq(x86_mmx %a0, x86_
 ; BTVER2-NEXT:    psrlq %mm1, %mm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    psrlq (%rdi), %mm0 # sched: [6:1.00]
 ; BTVER2-NEXT:    psrlq $7, %mm0 # sched: [1:0.50]
-; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_psrlq:
@@ -5682,7 +5682,7 @@ define i64 @test_psrlw(x86_mmx %a0, x86_
 ; GENERIC-NEXT:    psrlw %mm1, %mm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    psrlw (%rdi), %mm0 # sched: [6:1.00]
 ; GENERIC-NEXT:    psrlw $7, %mm0 # sched: [1:1.00]
-; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_psrlw:
@@ -5706,7 +5706,7 @@ define i64 @test_psrlw(x86_mmx %a0, x86_
 ; SANDY-NEXT:    psrlw %mm1, %mm0 # sched: [1:1.00]
 ; SANDY-NEXT:    psrlw (%rdi), %mm0 # sched: [6:1.00]
 ; SANDY-NEXT:    psrlw $7, %mm0 # sched: [1:1.00]
-; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_psrlw:
@@ -5746,7 +5746,7 @@ define i64 @test_psrlw(x86_mmx %a0, x86_
 ; BTVER2-NEXT:    psrlw %mm1, %mm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    psrlw (%rdi), %mm0 # sched: [6:1.00]
 ; BTVER2-NEXT:    psrlw $7, %mm0 # sched: [1:0.50]
-; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_psrlw:
@@ -5771,7 +5771,7 @@ define i64 @test_psubb(x86_mmx %a0, x86_
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    psubb %mm1, %mm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    psubb (%rdi), %mm0 # sched: [8:1.00]
-; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_psubb:
@@ -5792,7 +5792,7 @@ define i64 @test_psubb(x86_mmx %a0, x86_
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    psubb %mm1, %mm0 # sched: [3:1.00]
 ; SANDY-NEXT:    psubb (%rdi), %mm0 # sched: [8:1.00]
-; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_psubb:
@@ -5827,7 +5827,7 @@ define i64 @test_psubb(x86_mmx %a0, x86_
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    psubb %mm1, %mm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    psubb (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_psubb:
@@ -5849,7 +5849,7 @@ define i64 @test_psubd(x86_mmx %a0, x86_
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    psubd %mm1, %mm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    psubd (%rdi), %mm0 # sched: [8:1.00]
-; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_psubd:
@@ -5870,7 +5870,7 @@ define i64 @test_psubd(x86_mmx %a0, x86_
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    psubd %mm1, %mm0 # sched: [3:1.00]
 ; SANDY-NEXT:    psubd (%rdi), %mm0 # sched: [8:1.00]
-; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_psubd:
@@ -5905,7 +5905,7 @@ define i64 @test_psubd(x86_mmx %a0, x86_
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    psubd %mm1, %mm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    psubd (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_psubd:
@@ -5927,7 +5927,7 @@ define i64 @test_psubq(x86_mmx %a0, x86_
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    psubq %mm1, %mm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    psubq (%rdi), %mm0 # sched: [8:1.00]
-; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_psubq:
@@ -5948,7 +5948,7 @@ define i64 @test_psubq(x86_mmx %a0, x86_
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    psubq %mm1, %mm0 # sched: [3:1.00]
 ; SANDY-NEXT:    psubq (%rdi), %mm0 # sched: [8:1.00]
-; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_psubq:
@@ -5983,7 +5983,7 @@ define i64 @test_psubq(x86_mmx %a0, x86_
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    psubq %mm1, %mm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    psubq (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_psubq:
@@ -6005,7 +6005,7 @@ define i64 @test_psubsb(x86_mmx %a0, x86
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    psubsb %mm1, %mm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    psubsb (%rdi), %mm0 # sched: [8:1.00]
-; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_psubsb:
@@ -6026,7 +6026,7 @@ define i64 @test_psubsb(x86_mmx %a0, x86
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    psubsb %mm1, %mm0 # sched: [3:1.00]
 ; SANDY-NEXT:    psubsb (%rdi), %mm0 # sched: [8:1.00]
-; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_psubsb:
@@ -6061,7 +6061,7 @@ define i64 @test_psubsb(x86_mmx %a0, x86
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    psubsb %mm1, %mm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    psubsb (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_psubsb:
@@ -6083,7 +6083,7 @@ define i64 @test_psubsw(x86_mmx %a0, x86
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    psubsw %mm1, %mm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    psubsw (%rdi), %mm0 # sched: [8:1.00]
-; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_psubsw:
@@ -6104,7 +6104,7 @@ define i64 @test_psubsw(x86_mmx %a0, x86
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    psubsw %mm1, %mm0 # sched: [3:1.00]
 ; SANDY-NEXT:    psubsw (%rdi), %mm0 # sched: [8:1.00]
-; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_psubsw:
@@ -6139,7 +6139,7 @@ define i64 @test_psubsw(x86_mmx %a0, x86
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    psubsw %mm1, %mm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    psubsw (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_psubsw:
@@ -6161,7 +6161,7 @@ define i64 @test_psubusb(x86_mmx %a0, x8
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    psubusb %mm1, %mm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    psubusb (%rdi), %mm0 # sched: [8:1.00]
-; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_psubusb:
@@ -6182,7 +6182,7 @@ define i64 @test_psubusb(x86_mmx %a0, x8
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    psubusb %mm1, %mm0 # sched: [3:1.00]
 ; SANDY-NEXT:    psubusb (%rdi), %mm0 # sched: [8:1.00]
-; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_psubusb:
@@ -6217,7 +6217,7 @@ define i64 @test_psubusb(x86_mmx %a0, x8
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    psubusb %mm1, %mm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    psubusb (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_psubusb:
@@ -6239,7 +6239,7 @@ define i64 @test_psubusw(x86_mmx %a0, x8
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    psubusw %mm1, %mm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    psubusw (%rdi), %mm0 # sched: [8:1.00]
-; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_psubusw:
@@ -6260,7 +6260,7 @@ define i64 @test_psubusw(x86_mmx %a0, x8
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    psubusw %mm1, %mm0 # sched: [3:1.00]
 ; SANDY-NEXT:    psubusw (%rdi), %mm0 # sched: [8:1.00]
-; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_psubusw:
@@ -6295,7 +6295,7 @@ define i64 @test_psubusw(x86_mmx %a0, x8
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    psubusw %mm1, %mm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    psubusw (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_psubusw:
@@ -6317,7 +6317,7 @@ define i64 @test_psubw(x86_mmx %a0, x86_
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    psubw %mm1, %mm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    psubw (%rdi), %mm0 # sched: [8:1.00]
-; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_psubw:
@@ -6338,7 +6338,7 @@ define i64 @test_psubw(x86_mmx %a0, x86_
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    psubw %mm1, %mm0 # sched: [3:1.00]
 ; SANDY-NEXT:    psubw (%rdi), %mm0 # sched: [8:1.00]
-; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_psubw:
@@ -6373,7 +6373,7 @@ define i64 @test_psubw(x86_mmx %a0, x86_
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    psubw %mm1, %mm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    psubw (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_psubw:
@@ -6395,7 +6395,7 @@ define i64 @test_punpckhbw(x86_mmx %a0,
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] sched: [1:1.00]
 ; GENERIC-NEXT:    punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [6:1.00]
-; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_punpckhbw:
@@ -6416,7 +6416,7 @@ define i64 @test_punpckhbw(x86_mmx %a0,
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] sched: [1:1.00]
 ; SANDY-NEXT:    punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [6:1.00]
-; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_punpckhbw:
@@ -6451,7 +6451,7 @@ define i64 @test_punpckhbw(x86_mmx %a0,
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] sched: [1:0.50]
 ; BTVER2-NEXT:    punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [6:1.00]
-; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_punpckhbw:
@@ -6473,7 +6473,7 @@ define i64 @test_punpckhdq(x86_mmx %a0,
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] sched: [1:1.00]
 ; GENERIC-NEXT:    punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [6:1.00]
-; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_punpckhdq:
@@ -6494,7 +6494,7 @@ define i64 @test_punpckhdq(x86_mmx %a0,
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] sched: [1:1.00]
 ; SANDY-NEXT:    punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [6:1.00]
-; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_punpckhdq:
@@ -6529,7 +6529,7 @@ define i64 @test_punpckhdq(x86_mmx %a0,
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] sched: [1:0.50]
 ; BTVER2-NEXT:    punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [6:1.00]
-; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_punpckhdq:
@@ -6551,7 +6551,7 @@ define i64 @test_punpckhwd(x86_mmx %a0,
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00]
 ; GENERIC-NEXT:    punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00]
-; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_punpckhwd:
@@ -6572,7 +6572,7 @@ define i64 @test_punpckhwd(x86_mmx %a0,
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00]
 ; SANDY-NEXT:    punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00]
-; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_punpckhwd:
@@ -6607,7 +6607,7 @@ define i64 @test_punpckhwd(x86_mmx %a0,
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:0.50]
 ; BTVER2-NEXT:    punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00]
-; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_punpckhwd:
@@ -6629,7 +6629,7 @@ define i64 @test_punpcklbw(x86_mmx %a0,
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00]
 ; GENERIC-NEXT:    punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00]
-; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_punpcklbw:
@@ -6650,7 +6650,7 @@ define i64 @test_punpcklbw(x86_mmx %a0,
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00]
 ; SANDY-NEXT:    punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00]
-; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_punpcklbw:
@@ -6685,7 +6685,7 @@ define i64 @test_punpcklbw(x86_mmx %a0,
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:0.50]
 ; BTVER2-NEXT:    punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00]
-; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_punpcklbw:
@@ -6707,7 +6707,7 @@ define i64 @test_punpckldq(x86_mmx %a0,
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] sched: [1:1.00]
 ; GENERIC-NEXT:    punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [6:1.00]
-; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_punpckldq:
@@ -6728,7 +6728,7 @@ define i64 @test_punpckldq(x86_mmx %a0,
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] sched: [1:1.00]
 ; SANDY-NEXT:    punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [6:1.00]
-; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_punpckldq:
@@ -6763,7 +6763,7 @@ define i64 @test_punpckldq(x86_mmx %a0,
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] sched: [1:0.50]
 ; BTVER2-NEXT:    punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [6:1.00]
-; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_punpckldq:
@@ -6785,7 +6785,7 @@ define i64 @test_punpcklwd(x86_mmx %a0,
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] sched: [1:1.00]
 ; GENERIC-NEXT:    punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [6:1.00]
-; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_punpcklwd:
@@ -6806,7 +6806,7 @@ define i64 @test_punpcklwd(x86_mmx %a0,
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] sched: [1:1.00]
 ; SANDY-NEXT:    punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [6:1.00]
-; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_punpcklwd:
@@ -6841,7 +6841,7 @@ define i64 @test_punpcklwd(x86_mmx %a0,
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] sched: [1:0.50]
 ; BTVER2-NEXT:    punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [6:1.00]
-; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_punpcklwd:
@@ -6863,7 +6863,7 @@ define i64 @test_pxor(x86_mmx %a0, x86_m
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    pxor %mm1, %mm0 # sched: [1:0.33]
 ; GENERIC-NEXT:    pxor (%rdi), %mm0 # sched: [6:0.50]
-; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_pxor:
@@ -6884,7 +6884,7 @@ define i64 @test_pxor(x86_mmx %a0, x86_m
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    pxor %mm1, %mm0 # sched: [1:0.33]
 ; SANDY-NEXT:    pxor (%rdi), %mm0 # sched: [6:0.50]
-; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pxor:
@@ -6919,7 +6919,7 @@ define i64 @test_pxor(x86_mmx %a0, x86_m
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    pxor %mm1, %mm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    pxor (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_pxor:

Modified: llvm/trunk/test/CodeGen/X86/sse2-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse2-schedule.ll?rev=332745&r1=332744&r2=332745&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse2-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse2-schedule.ll Fri May 18 10:58:36 2018
@@ -4608,21 +4608,21 @@ define i32 @test_movd(<4 x i32> %a0, i32
 ; BTVER2-SSE-LABEL: test_movd:
 ; BTVER2-SSE:       # %bb.0:
 ; BTVER2-SSE-NEXT:    movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:1.00]
-; BTVER2-SSE-NEXT:    movd %edi, %xmm1 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    movd %edi, %xmm1 # sched: [8:0.50]
+; BTVER2-SSE-NEXT:    paddd %xmm0, %xmm2 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    movd %xmm2, %eax # sched: [4:1.00]
 ; BTVER2-SSE-NEXT:    paddd %xmm0, %xmm1 # sched: [1:0.50]
 ; BTVER2-SSE-NEXT:    movd %xmm1, (%rsi) # sched: [2:1.00]
-; BTVER2-SSE-NEXT:    paddd %xmm0, %xmm2 # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    movd %xmm2, %eax # sched: [1:0.50]
 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
 ;
 ; BTVER2-LABEL: test_movd:
 ; BTVER2:       # %bb.0:
+; BTVER2-NEXT:    vmovd %edi, %xmm1 # sched: [8:0.50]
 ; BTVER2-NEXT:    vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:1.00]
-; BTVER2-NEXT:    vmovd %edi, %xmm1 # sched: [1:0.50]
 ; BTVER2-NEXT:    vpaddd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
-; BTVER2-NEXT:    vmovd %xmm1, (%rsi) # sched: [2:1.00]
 ; BTVER2-NEXT:    vpaddd %xmm2, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT:    vmovd %xmm0, %eax # sched: [1:0.50]
+; BTVER2-NEXT:    vmovd %xmm0, %eax # sched: [4:1.00]
+; BTVER2-NEXT:    vmovd %xmm1, (%rsi) # sched: [2:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-SSE-LABEL: test_movd:
@@ -4789,31 +4789,31 @@ define i64 @test_movd_64(<2 x i64> %a0,
 ; BTVER2-SSE-LABEL: test_movd_64:
 ; BTVER2-SSE:       # %bb.0:
 ; BTVER2-SSE-NEXT:    movq {{.*#+}} xmm2 = mem[0],zero sched: [5:1.00]
-; BTVER2-SSE-NEXT:    movq %rdi, %xmm1 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    movq %rdi, %xmm1 # sched: [8:0.50]
+; BTVER2-SSE-NEXT:    paddq %xmm0, %xmm2 # sched: [1:0.50]
+; BTVER2-SSE-NEXT:    movq %xmm2, %rax # sched: [4:1.00]
 ; BTVER2-SSE-NEXT:    paddq %xmm0, %xmm1 # sched: [1:0.50]
 ; BTVER2-SSE-NEXT:    movq %xmm1, (%rsi) # sched: [2:1.00]
-; BTVER2-SSE-NEXT:    paddq %xmm0, %xmm2 # sched: [1:0.50]
-; BTVER2-SSE-NEXT:    movq %xmm2, %rax # sched: [1:0.50]
 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
 ;
 ; BTVER2-LABEL: test_movd_64:
 ; BTVER2:       # %bb.0:
+; BTVER2-NEXT:    vmovq %rdi, %xmm1 # sched: [8:0.50]
 ; BTVER2-NEXT:    vmovq {{.*#+}} xmm2 = mem[0],zero sched: [5:1.00]
-; BTVER2-NEXT:    vmovq %rdi, %xmm1 # sched: [1:0.50]
 ; BTVER2-NEXT:    vpaddq %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
-; BTVER2-NEXT:    vmovq %xmm1, (%rsi) # sched: [2:1.00]
 ; BTVER2-NEXT:    vpaddq %xmm2, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT:    vmovq %xmm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    vmovq %xmm0, %rax # sched: [4:1.00]
+; BTVER2-NEXT:    vmovq %xmm1, (%rsi) # sched: [2:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-SSE-LABEL: test_movd_64:
 ; ZNVER1-SSE:       # %bb.0:
 ; ZNVER1-SSE-NEXT:    movq {{.*#+}} xmm2 = mem[0],zero sched: [8:0.50]
-; ZNVER1-SSE-NEXT:    movq %rdi, %xmm1 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    movq %rdi, %xmm1 # sched: [3:1.00]
 ; ZNVER1-SSE-NEXT:    paddq %xmm0, %xmm1 # sched: [1:0.25]
 ; ZNVER1-SSE-NEXT:    movq %xmm1, (%rsi) # sched: [1:0.50]
 ; ZNVER1-SSE-NEXT:    paddq %xmm0, %xmm2 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT:    movq %xmm2, %rax # sched: [1:0.25]
+; ZNVER1-SSE-NEXT:    movq %xmm2, %rax # sched: [2:1.00]
 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
 ;
 ; ZNVER1-LABEL: test_movd_64:

Modified: llvm/trunk/test/tools/llvm-mca/X86/BtVer2/resources-avx1.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BtVer2/resources-avx1.s?rev=332745&r1=332744&r2=332745&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BtVer2/resources-avx1.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/BtVer2/resources-avx1.s Fri May 18 10:58:36 2018
@@ -1252,9 +1252,9 @@ vzeroupper
 # CHECK-NEXT:  2      1     1.00                        vmovaps	%ymm0, %ymm2
 # CHECK-NEXT:  1      1     1.00           *            vmovaps	%ymm0, (%rax)
 # CHECK-NEXT:  1      5     1.00    *                   vmovaps	(%rax), %ymm2
-# CHECK-NEXT:  1      1     0.50                        vmovd	%eax, %xmm2
+# CHECK-NEXT:  2      8     0.50                        vmovd	%eax, %xmm2
 # CHECK-NEXT:  1      5     1.00    *                   vmovd	(%rax), %xmm2
-# CHECK-NEXT:  1      1     0.50                        vmovd	%xmm0, %ecx
+# CHECK-NEXT:  1      4     1.00                        vmovd	%xmm0, %ecx
 # CHECK-NEXT:  1      2     1.00           *            vmovd	%xmm0, (%rax)
 # CHECK-NEXT:  1      1     0.50                        vmovddup	%xmm0, %xmm2
 # CHECK-NEXT:  1      6     1.00    *                   vmovddup	(%rax), %xmm2
@@ -1295,9 +1295,9 @@ vzeroupper
 # CHECK-NEXT:  1      3     1.00           *            vmovntps	%xmm0, (%rax)
 # CHECK-NEXT:  1      3     2.00           *            vmovntps	%ymm0, (%rax)
 # CHECK-NEXT:  1      1     0.50                        vmovq	%xmm0, %xmm2
-# CHECK-NEXT:  1      1     0.50                        vmovq	%rax, %xmm2
+# CHECK-NEXT:  2      8     0.50                        vmovq	%rax, %xmm2
 # CHECK-NEXT:  1      5     1.00    *                   vmovq	(%rax), %xmm2
-# CHECK-NEXT:  1      1     0.50                        vmovq	%xmm0, %rcx
+# CHECK-NEXT:  1      4     1.00                        vmovq	%xmm0, %rcx
 # CHECK-NEXT:  1      2     1.00           *            vmovq	%xmm0, (%rax)
 # CHECK-NEXT:  1      1     0.50                        vmovsd	%xmm0, %xmm1, %xmm2
 # CHECK-NEXT:  1      2     1.00           *            vmovsd	%xmm0, (%rax)
@@ -1720,7 +1720,7 @@ vzeroupper
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12]   [13]
-# CHECK-NEXT: 48.00  2.00    -     350.50 910.50 399.00 421.00 382.00  -     43.00  132.00 119.50 119.50 38.00
+# CHECK-NEXT: 48.00   -      -     353.50 911.50 402.00 422.00 382.00  -     43.00  132.00 119.50 119.50 38.00
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12]   [13]   Instructions:
@@ -1958,9 +1958,9 @@ vzeroupper
 # CHECK-NEXT:  -      -      -     1.00   1.00   1.00   1.00    -      -      -      -      -      -      -     vmovaps	%ymm0, %ymm2
 # CHECK-NEXT:  -      -      -      -      -      -     1.00    -      -     1.00   1.00    -      -      -     vmovaps	%ymm0, (%rax)
 # CHECK-NEXT:  -      -      -     0.50   0.50   0.50   0.50   1.00    -      -      -      -      -      -     vmovaps	(%rax), %ymm2
-# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -      -      -      -      -     vmovd	%eax, %xmm2
+# CHECK-NEXT:  -      -      -     0.50   0.50   0.50   0.50    -      -      -      -      -      -      -     vmovd	%eax, %xmm2
 # CHECK-NEXT:  -      -      -      -      -     0.50   0.50   1.00    -      -      -     0.50   0.50    -     vmovd	(%rax), %xmm2
-# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -      -      -      -      -     vmovd	%xmm0, %ecx
+# CHECK-NEXT: 1.00    -      -     1.00    -     1.00    -      -      -      -      -      -      -      -     vmovd	%xmm0, %ecx
 # CHECK-NEXT:  -      -      -      -      -      -     1.00    -      -     1.00   1.00    -      -      -     vmovd	%xmm0, (%rax)
 # CHECK-NEXT:  -      -      -     0.50   0.50   0.50   0.50    -      -      -      -      -      -      -     vmovddup	%xmm0, %xmm2
 # CHECK-NEXT:  -      -      -     0.50   0.50   0.50   0.50   1.00    -      -      -      -      -      -     vmovddup	(%rax), %xmm2
@@ -2001,9 +2001,9 @@ vzeroupper
 # CHECK-NEXT:  -      -      -      -      -      -     1.00    -      -     1.00   1.00    -      -      -     vmovntps	%xmm0, (%rax)
 # CHECK-NEXT:  -      -      -      -      -      -     2.00    -      -     2.00   2.00    -      -      -     vmovntps	%ymm0, (%rax)
 # CHECK-NEXT:  -      -      -      -      -     0.50   0.50    -      -      -      -     0.50   0.50    -     vmovq	%xmm0, %xmm2
-# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -      -      -      -      -     vmovq	%rax, %xmm2
+# CHECK-NEXT:  -      -      -     0.50   0.50   0.50   0.50    -      -      -      -      -      -      -     vmovq	%rax, %xmm2
 # CHECK-NEXT:  -      -      -      -      -     0.50   0.50   1.00    -      -      -     0.50   0.50    -     vmovq	(%rax), %xmm2
-# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -      -      -      -      -     vmovq	%xmm0, %rcx
+# CHECK-NEXT: 1.00    -      -     1.00    -     1.00    -      -      -      -      -      -      -      -     vmovq	%xmm0, %rcx
 # CHECK-NEXT:  -      -      -      -      -      -     1.00    -      -     1.00   1.00    -      -      -     vmovq	%xmm0, (%rax)
 # CHECK-NEXT:  -      -      -     0.50   0.50   0.50   0.50    -      -      -      -      -      -      -     vmovsd	%xmm0, %xmm1, %xmm2
 # CHECK-NEXT:  -      -      -      -      -      -     1.00    -      -     1.00   1.00    -      -      -     vmovsd	%xmm0, (%rax)

Modified: llvm/trunk/test/tools/llvm-mca/X86/BtVer2/resources-mmx.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BtVer2/resources-mmx.s?rev=332745&r1=332744&r2=332745&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BtVer2/resources-mmx.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/BtVer2/resources-mmx.s Fri May 18 10:58:36 2018
@@ -165,13 +165,13 @@ pxor        (%rax), %mm2
 
 # CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
 # CHECK-NEXT:  1      2     0.50    *      *      *     emms
-# CHECK-NEXT:  1      1     0.50                        movd	%eax, %mm2
+# CHECK-NEXT:  2      8     0.50                        movd	%eax, %mm2
 # CHECK-NEXT:  1      5     1.00    *                   movd	(%rax), %mm2
-# CHECK-NEXT:  1      1     0.50                        movd	%mm0, %ecx
+# CHECK-NEXT:  1      4     1.00                        movd	%mm0, %ecx
 # CHECK-NEXT:  1      2     1.00           *      *     movd	%mm0, (%rax)
-# CHECK-NEXT:  1      1     0.50                        movq	%rax, %mm2
+# CHECK-NEXT:  2      8     0.50                        movq	%rax, %mm2
 # CHECK-NEXT:  1      5     1.00    *                   movq	(%rax), %mm2
-# CHECK-NEXT:  1      1     0.50                        movq	%mm0, %rcx
+# CHECK-NEXT:  1      4     1.00                        movq	%mm0, %rcx
 # CHECK-NEXT:  1      2     1.00           *            movq	%mm0, (%rax)
 # CHECK-NEXT:  1      1     0.50                        packsswb	%mm0, %mm2
 # CHECK-NEXT:  1      6     1.00    *                   packsswb	(%rax), %mm2
@@ -288,18 +288,18 @@ pxor        (%rax), %mm2
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12]   [13]
-# CHECK-NEXT: 2.00   2.00    -     0.50   0.50   52.50  48.50  46.00   -     2.00   2.00   46.00  46.00  6.00
+# CHECK-NEXT: 2.00    -      -     3.50   1.50   55.50  49.50  46.00   -     2.00   2.00   46.00  46.00  6.00
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12]   [13]   Instructions:
 # CHECK-NEXT:  -      -      -     0.50   0.50   0.50   0.50    -      -      -      -      -      -      -     emms
-# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -      -      -      -      -     movd	%eax, %mm2
+# CHECK-NEXT:  -      -      -     0.50   0.50   0.50   0.50    -      -      -      -      -      -      -     movd	%eax, %mm2
 # CHECK-NEXT:  -      -      -      -      -     0.50   0.50   1.00    -      -      -     0.50   0.50    -     movd	(%rax), %mm2
-# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -      -      -      -      -     movd	%mm0, %ecx
+# CHECK-NEXT: 1.00    -      -     1.00    -     1.00    -      -      -      -      -      -      -      -     movd	%mm0, %ecx
 # CHECK-NEXT:  -      -      -      -      -      -     1.00    -      -     1.00   1.00    -      -      -     movd	%mm0, (%rax)
-# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -      -      -      -      -     movq	%rax, %mm2
+# CHECK-NEXT:  -      -      -     0.50   0.50   0.50   0.50    -      -      -      -      -      -      -     movq	%rax, %mm2
 # CHECK-NEXT:  -      -      -      -      -     0.50   0.50   1.00    -      -      -     0.50   0.50    -     movq	(%rax), %mm2
-# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -      -      -      -      -     movq	%mm0, %rcx
+# CHECK-NEXT: 1.00    -      -     1.00    -     1.00    -      -      -      -      -      -      -      -     movq	%mm0, %rcx
 # CHECK-NEXT:  -      -      -      -      -      -     1.00    -      -     1.00   1.00    -      -      -     movq	%mm0, (%rax)
 # CHECK-NEXT:  -      -      -      -      -     0.50   0.50    -      -      -      -     0.50   0.50    -     packsswb	%mm0, %mm2
 # CHECK-NEXT:  -      -      -      -      -     0.50   0.50   1.00    -      -      -     0.50   0.50    -     packsswb	(%rax), %mm2

Modified: llvm/trunk/test/tools/llvm-mca/X86/BtVer2/resources-sse2.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BtVer2/resources-sse2.s?rev=332745&r1=332744&r2=332745&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BtVer2/resources-sse2.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/BtVer2/resources-sse2.s Fri May 18 10:58:36 2018
@@ -472,9 +472,9 @@ xorpd       (%rax), %xmm2
 # CHECK-NEXT:  1      1     0.50                        movapd	%xmm0, %xmm2
 # CHECK-NEXT:  1      1     1.00           *            movapd	%xmm0, (%rax)
 # CHECK-NEXT:  1      5     1.00    *                   movapd	(%rax), %xmm2
-# CHECK-NEXT:  1      1     0.50                        movd	%eax, %xmm2
+# CHECK-NEXT:  2      8     0.50                        movd	%eax, %xmm2
 # CHECK-NEXT:  1      5     1.00    *                   movd	(%rax), %xmm2
-# CHECK-NEXT:  1      1     0.50                        movd	%xmm0, %ecx
+# CHECK-NEXT:  1      4     1.00                        movd	%xmm0, %ecx
 # CHECK-NEXT:  1      2     1.00           *            movd	%xmm0, (%rax)
 # CHECK-NEXT:  1      1     0.50                        movdqa	%xmm0, %xmm2
 # CHECK-NEXT:  1      1     1.00           *            movdqa	%xmm0, (%rax)
@@ -493,9 +493,9 @@ xorpd       (%rax), %xmm2
 # CHECK-NEXT:  1      2     1.00           *            movntdq	%xmm0, (%rax)
 # CHECK-NEXT:  1      3     1.00           *            movntpd	%xmm0, (%rax)
 # CHECK-NEXT:  1      1     0.50                        movq	%xmm0, %xmm2
-# CHECK-NEXT:  1      1     0.50                        movq	%rax, %xmm2
+# CHECK-NEXT:  2      8     0.50                        movq	%rax, %xmm2
 # CHECK-NEXT:  1      5     1.00    *                   movq	(%rax), %xmm2
-# CHECK-NEXT:  1      1     0.50                        movq	%xmm0, %rcx
+# CHECK-NEXT:  1      4     1.00                        movq	%xmm0, %rcx
 # CHECK-NEXT:  1      2     1.00           *            movq	%xmm0, (%rax)
 # CHECK-NEXT:  1      1     0.50                        movq2dq	%mm0, %xmm2
 # CHECK-NEXT:  1      1     0.50                        movsd	%xmm0, %xmm2
@@ -685,7 +685,7 @@ xorpd       (%rax), %xmm2
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12]   [13]
-# CHECK-NEXT: 17.00  2.00    -     46.00  203.00 116.50 139.50 117.00  -     15.00  54.00  66.50  66.50  12.00
+# CHECK-NEXT: 17.00   -      -     49.00  204.00 119.50 140.50 117.00  -     15.00  54.00  66.50  66.50  12.00
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12]   [13]   Instructions:
@@ -759,9 +759,9 @@ xorpd       (%rax), %xmm2
 # CHECK-NEXT:  -      -      -     0.50   0.50   0.50   0.50    -      -      -      -      -      -      -     movapd	%xmm0, %xmm2
 # CHECK-NEXT:  -      -      -      -      -      -     1.00    -      -     1.00   1.00    -      -      -     movapd	%xmm0, (%rax)
 # CHECK-NEXT:  -      -      -     0.50   0.50   0.50   0.50   1.00    -      -      -      -      -      -     movapd	(%rax), %xmm2
-# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -      -      -      -      -     movd	%eax, %xmm2
+# CHECK-NEXT:  -      -      -     0.50   0.50   0.50   0.50    -      -      -      -      -      -      -     movd	%eax, %xmm2
 # CHECK-NEXT:  -      -      -      -      -     0.50   0.50   1.00    -      -      -     0.50   0.50    -     movd	(%rax), %xmm2
-# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -      -      -      -      -     movd	%xmm0, %ecx
+# CHECK-NEXT: 1.00    -      -     1.00    -     1.00    -      -      -      -      -      -      -      -     movd	%xmm0, %ecx
 # CHECK-NEXT:  -      -      -      -      -      -     1.00    -      -     1.00   1.00    -      -      -     movd	%xmm0, (%rax)
 # CHECK-NEXT:  -      -      -      -      -     0.50   0.50    -      -      -      -     0.50   0.50    -     movdqa	%xmm0, %xmm2
 # CHECK-NEXT:  -      -      -      -      -      -     1.00    -      -     1.00   1.00    -      -      -     movdqa	%xmm0, (%rax)
@@ -780,9 +780,9 @@ xorpd       (%rax), %xmm2
 # CHECK-NEXT:  -      -      -      -      -      -     1.00    -      -     1.00   1.00    -      -      -     movntdq	%xmm0, (%rax)
 # CHECK-NEXT:  -      -      -      -      -      -     1.00    -      -     1.00   1.00    -      -      -     movntpd	%xmm0, (%rax)
 # CHECK-NEXT:  -      -      -      -      -     0.50   0.50    -      -      -      -     0.50   0.50    -     movq	%xmm0, %xmm2
-# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -      -      -      -      -     movq	%rax, %xmm2
+# CHECK-NEXT:  -      -      -     0.50   0.50   0.50   0.50    -      -      -      -      -      -      -     movq	%rax, %xmm2
 # CHECK-NEXT:  -      -      -      -      -     0.50   0.50   1.00    -      -      -     0.50   0.50    -     movq	(%rax), %xmm2
-# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -      -      -      -      -     movq	%xmm0, %rcx
+# CHECK-NEXT: 1.00    -      -     1.00    -     1.00    -      -      -      -      -      -      -      -     movq	%xmm0, %rcx
 # CHECK-NEXT:  -      -      -      -      -      -     1.00    -      -     1.00   1.00    -      -      -     movq	%xmm0, (%rax)
 # CHECK-NEXT:  -      -      -      -      -     0.50   0.50    -      -      -      -     0.50   0.50    -     movq2dq	%mm0, %xmm2
 # CHECK-NEXT:  -      -      -     0.50   0.50   0.50   0.50    -      -      -      -      -      -      -     movsd	%xmm0, %xmm2

Modified: llvm/trunk/test/tools/llvm-mca/X86/SandyBridge/resources-mmx.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/SandyBridge/resources-mmx.s?rev=332745&r1=332744&r2=332745&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/SandyBridge/resources-mmx.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/SandyBridge/resources-mmx.s Fri May 18 10:58:36 2018
@@ -165,13 +165,13 @@ pxor        (%rax), %mm2
 
 # CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
 # CHECK-NEXT:  31     31    10.33   *      *      *     emms
-# CHECK-NEXT:  1      1     0.33                        movd	%eax, %mm2
+# CHECK-NEXT:  1      1     1.00                        movd	%eax, %mm2
 # CHECK-NEXT:  1      5     0.50    *                   movd	(%rax), %mm2
-# CHECK-NEXT:  1      1     0.33                        movd	%mm0, %ecx
+# CHECK-NEXT:  1      2     1.00                        movd	%mm0, %ecx
 # CHECK-NEXT:  1      1     1.00           *      *     movd	%mm0, (%rax)
-# CHECK-NEXT:  1      1     0.33                        movq	%rax, %mm2
+# CHECK-NEXT:  1      1     1.00                        movq	%rax, %mm2
 # CHECK-NEXT:  1      5     0.50    *                   movq	(%rax), %mm2
-# CHECK-NEXT:  1      1     0.33                        movq	%mm0, %rcx
+# CHECK-NEXT:  1      2     1.00                        movq	%mm0, %rcx
 # CHECK-NEXT:  1      1     1.00           *            movq	%mm0, (%rax)
 # CHECK-NEXT:  1      1     1.00                        packsswb	%mm0, %mm2
 # CHECK-NEXT:  2      6     1.00    *                   packsswb	(%rax), %mm2
@@ -282,18 +282,18 @@ pxor        (%rax), %mm2
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6.0]  [6.1]
-# CHECK-NEXT:  -      -     20.33  54.33  2.00   56.33  24.00  24.00
+# CHECK-NEXT:  -      -     21.00  53.00  2.00   57.00  24.00  24.00
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6.0]  [6.1]  Instructions:
 # CHECK-NEXT:  -      -     10.33  10.33   -     10.33   -      -     emms
-# CHECK-NEXT:  -      -     0.33   0.33    -     0.33    -      -     movd	%eax, %mm2
+# CHECK-NEXT:  -      -      -      -      -     1.00    -      -     movd	%eax, %mm2
 # CHECK-NEXT:  -      -      -      -      -      -     0.50   0.50   movd	(%rax), %mm2
-# CHECK-NEXT:  -      -     0.33   0.33    -     0.33    -      -     movd	%mm0, %ecx
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     movd	%mm0, %ecx
 # CHECK-NEXT:  -      -      -      -     1.00    -     0.50   0.50   movd	%mm0, (%rax)
-# CHECK-NEXT:  -      -     0.33   0.33    -     0.33    -      -     movq	%rax, %mm2
+# CHECK-NEXT:  -      -      -      -      -     1.00    -      -     movq	%rax, %mm2
 # CHECK-NEXT:  -      -      -      -      -      -     0.50   0.50   movq	(%rax), %mm2
-# CHECK-NEXT:  -      -     0.33   0.33    -     0.33    -      -     movq	%mm0, %rcx
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     movq	%mm0, %rcx
 # CHECK-NEXT:  -      -      -      -     1.00    -     0.50   0.50   movq	%mm0, (%rax)
 # CHECK-NEXT:  -      -      -      -      -     1.00    -      -     packsswb	%mm0, %mm2
 # CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   packsswb	(%rax), %mm2

Modified: llvm/trunk/test/tools/llvm-mca/X86/Znver1/resources-sse2.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/Znver1/resources-sse2.s?rev=332745&r1=332744&r2=332745&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/Znver1/resources-sse2.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/Znver1/resources-sse2.s Fri May 18 10:58:36 2018
@@ -493,9 +493,9 @@ xorpd       (%rax), %xmm2
 # CHECK-NEXT:  1      1     0.50           *            movntdq	%xmm0, (%rax)
 # CHECK-NEXT:  1      1     0.50           *            movntpd	%xmm0, (%rax)
 # CHECK-NEXT:  1      1     0.25                        movq	%xmm0, %xmm2
-# CHECK-NEXT:  1      1     0.25                        movq	%rax, %xmm2
+# CHECK-NEXT:  1      3     1.00                        movq	%rax, %xmm2
 # CHECK-NEXT:  1      8     0.50    *                   movq	(%rax), %xmm2
-# CHECK-NEXT:  1      1     0.25                        movq	%xmm0, %rcx
+# CHECK-NEXT:  1      2     1.00                        movq	%xmm0, %rcx
 # CHECK-NEXT:  1      1     0.50           *            movq	%xmm0, (%rax)
 # CHECK-NEXT:  1      1     0.25                        movq2dq	%mm0, %xmm2
 # CHECK-NEXT:  1      1     0.50                        movsd	%xmm0, %xmm2
@@ -683,7 +683,7 @@ xorpd       (%rax), %xmm2
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]
-# CHECK-NEXT: 65.50  65.50  0.50   0.50   0.50   0.50    -     72.42  39.92  69.25  153.42  -
+# CHECK-NEXT: 65.50  65.50   -      -      -      -      -     72.42  39.92  71.25  153.42  -
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   Instructions:
@@ -778,9 +778,9 @@ xorpd       (%rax), %xmm2
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -      -      -     movntdq	%xmm0, (%rax)
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -      -      -     movntpd	%xmm0, (%rax)
 # CHECK-NEXT:  -      -      -      -      -      -      -     0.25   0.25   0.25   0.25    -     movq	%xmm0, %xmm2
-# CHECK-NEXT:  -      -     0.25   0.25   0.25   0.25    -      -      -      -      -      -     movq	%rax, %xmm2
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -     1.00    -      -     movq	%rax, %xmm2
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -      -      -     movq	(%rax), %xmm2
-# CHECK-NEXT:  -      -     0.25   0.25   0.25   0.25    -      -      -      -      -      -     movq	%xmm0, %rcx
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -     1.00    -      -     movq	%xmm0, %rcx
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -      -      -     movq	%xmm0, (%rax)
 # CHECK-NEXT:  -      -      -      -      -      -      -     0.25   0.25   0.25   0.25    -     movq2dq	%mm0, %xmm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50    -      -     movsd	%xmm0, %xmm2




More information about the llvm-commits mailing list