[llvm] r307529 - This patch completely replaces the scheduling information for the SandyBridge architecture target by modifying the file X86SchedSandyBridge.td located under the X86 Target.

Gadi Haber via llvm-commits llvm-commits at lists.llvm.org
Mon Jul 10 02:53:16 PDT 2017


Author: gadi.haber
Date: Mon Jul 10 02:53:16 2017
New Revision: 307529

URL: http://llvm.org/viewvc/llvm-project?rev=307529&view=rev
Log:
This patch completely replaces the scheduling information for the SandyBridge architecture target by modifying the file X86SchedSandyBridge.td located under the X86 Target.
The SandyBridge architects have provided us with a more accurate information about each instruction latency, number of uOPs and used ports and I used it to replace the existing estimated SNB instructions scheduling and to add missing scheduling information.

Please note that the patch extensively affects the X86 MC instr scheduling for SNB.

Also note that this patch will be followed by additional patches for the remaining target architectures HSW, IVB, BDW, SKL and SKX.

The updated and extended information about each instruction includes the following details:
•static latency of the instruction
•number of uOps from which the instruction consists of
•all ports used by the instruction's' uOPs

For example, the following code dictates that instructions, ADC64mr, ADC8mr, SBB64mr, SBB8mr have a static latency of 9 cycles. Each of these instructions is decoded into 6 micro operations which use ports 4, ports 2 or 3 and port 0 and ports 0 or 1 or 5:

def SBWriteResGroup94 : SchedWriteRes<[SBPort4,SBPort23,SBPort0,SBPort015]> {
let Latency = 9;
let NumMicroOps = 6;
let ResourceCycles = [1,2,2,1];

}
def: InstRW<[SBWriteResGroup94], (instregex "ADC64mr")>;
def: InstRW<[SBWriteResGroup94], (instregex "ADC8mr")>;
def: InstRW<[SBWriteResGroup94], (instregex "SBB64mr")>;
def: InstRW<[SBWriteResGroup94], (instregex "SBB8mr")>;

Note that apart for the header, most of the X86SchedSandyBridge.td file was generated by a script.

Reviewers: zvi, chandlerc, RKSimon, m_zuckerman, craig.topper, igorb

Differential Revision:  https://reviews.llvm.org/D35019#inline-304691



Modified:
    llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td
    llvm/trunk/test/CodeGen/X86/avx-schedule.ll
    llvm/trunk/test/CodeGen/X86/extractelement-legalization-store-ordering.ll
    llvm/trunk/test/CodeGen/X86/fp128-i128.ll
    llvm/trunk/test/CodeGen/X86/gather-addresses.ll
    llvm/trunk/test/CodeGen/X86/recip-fastmath.ll
    llvm/trunk/test/CodeGen/X86/recip-fastmath2.ll
    llvm/trunk/test/CodeGen/X86/sse-schedule.ll
    llvm/trunk/test/CodeGen/X86/sse2-schedule.ll
    llvm/trunk/test/CodeGen/X86/sse3-schedule.ll
    llvm/trunk/test/CodeGen/X86/sse41-schedule.ll
    llvm/trunk/test/CodeGen/X86/sse42-schedule.ll
    llvm/trunk/test/CodeGen/X86/ssse3-schedule.ll

Modified: llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td?rev=307529&r1=307528&r2=307529&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td Mon Jul 10 02:53:16 2017
@@ -24,8 +24,8 @@ def SandyBridgeModel : SchedMachineModel
   // Based on the LSD (loop-stream detector) queue size.
   let LoopMicroOpBufferSize = 28;
 
-  // FIXME: SSE4 and AVX are unimplemented. This flag is set to allow
-  // the scheduler to assign a default model to unrecognized opcodes.
+  // This flag is set to allow the scheduler to assign
+  // a default model to unrecognized opcodes.
   let CompleteModel = 0;
 }
 
@@ -48,6 +48,7 @@ def SBPort23 : ProcResource<2>;
 def SBPort4 : ProcResource<1>;
 
 // Many micro-ops are capable of issuing on multiple ports.
+def SBPort01  : ProcResGroup<[SBPort0, SBPort1]>;
 def SBPort05  : ProcResGroup<[SBPort0, SBPort5]>;
 def SBPort15  : ProcResGroup<[SBPort1, SBPort5]>;
 def SBPort015 : ProcResGroup<[SBPort0, SBPort1, SBPort5]>;
@@ -115,10 +116,10 @@ def : WriteRes<WriteIDivLd, [SBPort23, S
 // Scalar and vector floating point.
 defm : SBWriteResPair<WriteFAdd,   SBPort1, 3>;
 defm : SBWriteResPair<WriteFMul,   SBPort0, 5>;
-defm : SBWriteResPair<WriteFDiv,   SBPort0, 12>; // 10-14 cycles.
+defm : SBWriteResPair<WriteFDiv,   SBPort0, 24>;
 defm : SBWriteResPair<WriteFRcp,   SBPort0, 5>;
 defm : SBWriteResPair<WriteFRsqrt, SBPort0, 5>;
-defm : SBWriteResPair<WriteFSqrt,  SBPort0, 15>;
+defm : SBWriteResPair<WriteFSqrt,  SBPort0, 14>;
 defm : SBWriteResPair<WriteCvtF2I, SBPort1, 3>;
 defm : SBWriteResPair<WriteCvtI2F, SBPort1, 4>;
 defm : SBWriteResPair<WriteCvtF2F, SBPort1, 3>;
@@ -134,11 +135,11 @@ def : WriteRes<WriteFVarBlendLd, [SBPort
 }
 
 // Vector integer operations.
-defm : SBWriteResPair<WriteVecShift, SBPort05,  1>;
-defm : SBWriteResPair<WriteVecLogic, SBPort015, 1>;
-defm : SBWriteResPair<WriteVecALU,   SBPort15,  1>;
+defm : SBWriteResPair<WriteVecShift, SBPort5,  1>;
+defm : SBWriteResPair<WriteVecLogic, SBPort5, 1>;
+defm : SBWriteResPair<WriteVecALU,   SBPort1,  3>;
 defm : SBWriteResPair<WriteVecIMul,  SBPort0,   5>;
-defm : SBWriteResPair<WriteShuffle,  SBPort15,  1>;
+defm : SBWriteResPair<WriteShuffle,  SBPort5,  1>;
 defm : SBWriteResPair<WriteBlend,  SBPort15,  1>;
 def : WriteRes<WriteVarBlend, [SBPort1, SBPort5]> {
   let Latency = 2;
@@ -148,13 +149,15 @@ def : WriteRes<WriteVarBlendLd, [SBPort1
   let Latency = 6;
   let ResourceCycles = [1, 1, 1];
 }
-def : WriteRes<WriteMPSAD, [SBPort0, SBPort1, SBPort5]> {
-  let Latency = 6;
-  let ResourceCycles = [1, 1, 1];
+def : WriteRes<WriteMPSAD, [SBPort0,SBPort15]> {
+  let Latency = 5;
+  let NumMicroOps = 3;
+  let ResourceCycles = [1,2];
 }
-def : WriteRes<WriteMPSADLd, [SBPort0, SBPort1, SBPort5, SBPort23]> {
-  let Latency = 6;
-  let ResourceCycles = [1, 1, 1, 1];
+def : WriteRes<WriteMPSADLd, [SBPort0,SBPort23,SBPort15]> {
+  let Latency = 11;
+  let NumMicroOps = 4;
+  let ResourceCycles = [1,1,2];
 }
 
 ////////////////////////////////////////////////////////////////////////////////
@@ -204,13 +207,15 @@ def : WriteRes<WritePCmpEStrMLd, [SBPort
 }
 
 // Packed Compare Implicit Length Strings, Return Index
-def : WriteRes<WritePCmpIStrI, [SBPort015]> {
-  let Latency = 3;
+def : WriteRes<WritePCmpIStrI, [SBPort0]> {
+  let Latency = 11;
+  let NumMicroOps = 3;
   let ResourceCycles = [3];
 }
-def : WriteRes<WritePCmpIStrILd, [SBPort015, SBPort23]> {
-  let Latency = 3;
-  let ResourceCycles = [3, 1];
+def : WriteRes<WritePCmpIStrILd, [SBPort0,SBPort23]> {
+  let Latency = 17;
+  let NumMicroOps = 4;
+  let ResourceCycles = [3,1];
 }
 
 // Packed Compare Explicit Length Strings, Return Index
@@ -224,22 +229,26 @@ def : WriteRes<WritePCmpEStrILd, [SBPort
 }
 
 // AES Instructions.
-def : WriteRes<WriteAESDecEnc, [SBPort015]> {
-  let Latency = 8;
-  let ResourceCycles = [2];
+def : WriteRes<WriteAESDecEnc, [SBPort5,SBPort015]> {
+  let Latency = 7;
+  let NumMicroOps = 2;
+  let ResourceCycles = [1,1];
 }
-def : WriteRes<WriteAESDecEncLd, [SBPort015, SBPort23]> {
-  let Latency = 8;
-  let ResourceCycles = [2, 1];
+def : WriteRes<WriteAESDecEncLd, [SBPort5,SBPort23,SBPort015]> {
+  let Latency = 13;
+  let NumMicroOps = 3;
+  let ResourceCycles = [1,1,1];
 }
 
-def : WriteRes<WriteAESIMC, [SBPort015]> {
-  let Latency = 8;
+def : WriteRes<WriteAESIMC, [SBPort5]> {
+  let Latency = 12;
+  let NumMicroOps = 2;
   let ResourceCycles = [2];
 }
-def : WriteRes<WriteAESIMCLd, [SBPort015, SBPort23]> {
-  let Latency = 8;
-  let ResourceCycles = [2, 1];
+def : WriteRes<WriteAESIMCLd, [SBPort5,SBPort23]> {
+  let Latency = 18;
+  let NumMicroOps = 3;
+  let ResourceCycles = [2,1];
 }
 
 def : WriteRes<WriteAESKeyGen, [SBPort015]> {
@@ -272,4 +281,2407 @@ def : WriteRes<WriteNop, []>;
 defm : SBWriteResPair<WriteFShuffle256, SBPort0,  1>;
 defm : SBWriteResPair<WriteShuffle256, SBPort0,  1>;
 defm : SBWriteResPair<WriteVarVecShift, SBPort0,  1>;
+
+// Remaining SNB instrs.
+
+def SBWriteResGroup0 : SchedWriteRes<[SBPort0]> {
+  let Latency = 1;
+  let NumMicroOps = 1;
+  let ResourceCycles = [1];
+}
+def: InstRW<[SBWriteResGroup0], (instregex "CVTSS2SDrr")>;
+def: InstRW<[SBWriteResGroup0], (instregex "PSLLDri")>;
+def: InstRW<[SBWriteResGroup0], (instregex "PSLLQri")>;
+def: InstRW<[SBWriteResGroup0], (instregex "PSLLWri")>;
+def: InstRW<[SBWriteResGroup0], (instregex "PSRADri")>;
+def: InstRW<[SBWriteResGroup0], (instregex "PSRAWri")>;
+def: InstRW<[SBWriteResGroup0], (instregex "PSRLDri")>;
+def: InstRW<[SBWriteResGroup0], (instregex "PSRLQri")>;
+def: InstRW<[SBWriteResGroup0], (instregex "PSRLWri")>;
+def: InstRW<[SBWriteResGroup0], (instregex "VCVTSS2SDrr")>;
+def: InstRW<[SBWriteResGroup0], (instregex "VPMOVMSKBrr")>;
+def: InstRW<[SBWriteResGroup0], (instregex "VPSLLDri")>;
+def: InstRW<[SBWriteResGroup0], (instregex "VPSLLQri")>;
+def: InstRW<[SBWriteResGroup0], (instregex "VPSLLWri")>;
+def: InstRW<[SBWriteResGroup0], (instregex "VPSRADri")>;
+def: InstRW<[SBWriteResGroup0], (instregex "VPSRAWri")>;
+def: InstRW<[SBWriteResGroup0], (instregex "VPSRLDri")>;
+def: InstRW<[SBWriteResGroup0], (instregex "VPSRLQri")>;
+def: InstRW<[SBWriteResGroup0], (instregex "VPSRLWri")>;
+def: InstRW<[SBWriteResGroup0], (instregex "VTESTPDYrr")>;
+def: InstRW<[SBWriteResGroup0], (instregex "VTESTPDrr")>;
+def: InstRW<[SBWriteResGroup0], (instregex "VTESTPSYrr")>;
+def: InstRW<[SBWriteResGroup0], (instregex "VTESTPSrr")>;
+
+def SBWriteResGroup1 : SchedWriteRes<[SBPort1]> {
+  let Latency = 1;
+  let NumMicroOps = 1;
+  let ResourceCycles = [1];
+}
+def: InstRW<[SBWriteResGroup1], (instregex "COMP_FST0r")>;
+def: InstRW<[SBWriteResGroup1], (instregex "COM_FST0r")>;
+def: InstRW<[SBWriteResGroup1], (instregex "UCOM_FPr")>;
+def: InstRW<[SBWriteResGroup1], (instregex "UCOM_Fr")>;
+
+def SBWriteResGroup2 : SchedWriteRes<[SBPort5]> {
+  let Latency = 1;
+  let NumMicroOps = 1;
+  let ResourceCycles = [1];
+}
+def: InstRW<[SBWriteResGroup2], (instregex "ANDNPDrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "ANDNPSrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "ANDPDrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "ANDPSrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "FDECSTP")>;
+def: InstRW<[SBWriteResGroup2], (instregex "FFREE")>;
+def: InstRW<[SBWriteResGroup2], (instregex "FINCSTP")>;
+def: InstRW<[SBWriteResGroup2], (instregex "FNOP")>;
+def: InstRW<[SBWriteResGroup2], (instregex "INSERTPSrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "JMP64r")>;
+def: InstRW<[SBWriteResGroup2], (instregex "LD_Frr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "MOV64toPQIrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "MOVAPDrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "MOVAPSrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "MOVDDUPrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "MOVDI2PDIrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "MOVHLPSrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "MOVLHPSrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "MOVSDrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "MOVSHDUPrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "MOVSLDUPrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "MOVSSrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "MOVUPDrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "MOVUPSrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "ORPDrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "ORPSrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "RETQ")>;
+def: InstRW<[SBWriteResGroup2], (instregex "SHUFPDrri")>;
+def: InstRW<[SBWriteResGroup2], (instregex "SHUFPSrri")>;
+def: InstRW<[SBWriteResGroup2], (instregex "ST_FPrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "ST_Frr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "UNPCKHPDrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "UNPCKHPSrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "UNPCKLPDrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "UNPCKLPSrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VANDNPDYrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VANDNPDrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VANDNPSYrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VANDNPSrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VANDPDrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VANDPDrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VANDPSrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VEXTRACTF128rr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VINSERTF128rr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VINSERTPSrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VMOV64toPQIrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VMOV64toPQIrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VMOVAPDYrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VMOVAPDrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VMOVAPSYrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VMOVAPSrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VMOVDDUPYrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VMOVDDUPrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VMOVHLPSrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VMOVHLPSrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VMOVSDrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VMOVSHDUPYrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VMOVSHDUPrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VMOVSLDUPYrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VMOVSLDUPrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VMOVSSrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VMOVUPDYrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VMOVUPDrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VMOVUPSYrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VMOVUPSrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VORPDYrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VORPDrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VORPSYrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VORPSrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VPERMILPDri")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VPERMILPDrm")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VPERMILPDrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VPERMILPSri")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VPERMILPSrm")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VPERMILPSrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VPERMILPSrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VSHUFPDYrri")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VSHUFPDrri")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VSHUFPSYrri")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VSHUFPSrri")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VUNPCKHPDrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VUNPCKHPSrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VUNPCKLPDYrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VUNPCKLPDrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VUNPCKLPSYrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VUNPCKLPSrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VXORPDrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VXORPSrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "XORPDrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "XORPSrr")>;
+
+def SBWriteResGroup3 : SchedWriteRes<[SBPort01]> {
+  let Latency = 1;
+  let NumMicroOps = 1;
+  let ResourceCycles = [1];
+}
+def: InstRW<[SBWriteResGroup3], (instregex "LEA64_32r")>;
+
+def SBWriteResGroup4 : SchedWriteRes<[SBPort0]> {
+  let Latency = 1;
+  let NumMicroOps = 1;
+  let ResourceCycles = [1];
+}
+def: InstRW<[SBWriteResGroup4], (instregex "BLENDPDrri")>;
+def: InstRW<[SBWriteResGroup4], (instregex "BLENDPSrri")>;
+def: InstRW<[SBWriteResGroup4], (instregex "BT32ri8")>;
+def: InstRW<[SBWriteResGroup4], (instregex "BT32rr")>;
+def: InstRW<[SBWriteResGroup4], (instregex "BTC32ri8")>;
+def: InstRW<[SBWriteResGroup4], (instregex "BTC32rr")>;
+def: InstRW<[SBWriteResGroup4], (instregex "BTR32ri8")>;
+def: InstRW<[SBWriteResGroup4], (instregex "BTR32rr")>;
+def: InstRW<[SBWriteResGroup4], (instregex "BTS32ri8")>;
+def: InstRW<[SBWriteResGroup4], (instregex "BTS32rr")>;
+def: InstRW<[SBWriteResGroup4], (instregex "CDQ")>;
+def: InstRW<[SBWriteResGroup4], (instregex "CQO")>;
+def: InstRW<[SBWriteResGroup4], (instregex "LAHF")>;
+def: InstRW<[SBWriteResGroup4], (instregex "SAHF")>;
+def: InstRW<[SBWriteResGroup4], (instregex "SAR32ri")>;
+def: InstRW<[SBWriteResGroup4], (instregex "SAR8ri")>;
+def: InstRW<[SBWriteResGroup4], (instregex "SETAEr")>;
+def: InstRW<[SBWriteResGroup4], (instregex "SETBr")>;
+def: InstRW<[SBWriteResGroup4], (instregex "SETEr")>;
+def: InstRW<[SBWriteResGroup4], (instregex "SETGEr")>;
+def: InstRW<[SBWriteResGroup4], (instregex "SETGr")>;
+def: InstRW<[SBWriteResGroup4], (instregex "SETLEr")>;
+def: InstRW<[SBWriteResGroup4], (instregex "SETLr")>;
+def: InstRW<[SBWriteResGroup4], (instregex "SETNEr")>;
+def: InstRW<[SBWriteResGroup4], (instregex "SETNOr")>;
+def: InstRW<[SBWriteResGroup4], (instregex "SETNPr")>;
+def: InstRW<[SBWriteResGroup4], (instregex "SETNSr")>;
+def: InstRW<[SBWriteResGroup4], (instregex "SETOr")>;
+def: InstRW<[SBWriteResGroup4], (instregex "SETPr")>;
+def: InstRW<[SBWriteResGroup4], (instregex "SETSr")>;
+def: InstRW<[SBWriteResGroup4], (instregex "SHL32ri")>;
+def: InstRW<[SBWriteResGroup4], (instregex "SHL64r1")>;
+def: InstRW<[SBWriteResGroup4], (instregex "SHL8r1")>;
+def: InstRW<[SBWriteResGroup4], (instregex "SHL8ri")>;
+def: InstRW<[SBWriteResGroup4], (instregex "SHR32ri")>;
+def: InstRW<[SBWriteResGroup4], (instregex "SHR8ri")>;
+def: InstRW<[SBWriteResGroup4], (instregex "VBLENDPDYrri")>;
+def: InstRW<[SBWriteResGroup4], (instregex "VBLENDPDrri")>;
+def: InstRW<[SBWriteResGroup4], (instregex "VBLENDPSYrri")>;
+def: InstRW<[SBWriteResGroup4], (instregex "VBLENDPSrri")>;
+def: InstRW<[SBWriteResGroup4], (instregex "VMOVDQAYrr")>;
+def: InstRW<[SBWriteResGroup4], (instregex "VMOVDQArr")>;
+def: InstRW<[SBWriteResGroup4], (instregex "VMOVDQUYrr")>;
+def: InstRW<[SBWriteResGroup4], (instregex "VMOVDQUrr")>;
+
+def SBWriteResGroup5 : SchedWriteRes<[SBPort15]> {
+  let Latency = 1;
+  let NumMicroOps = 1;
+  let ResourceCycles = [1];
+}
+def: InstRW<[SBWriteResGroup5], (instregex "KORTESTBrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "MMX_PABSBrr64")>;
+def: InstRW<[SBWriteResGroup5], (instregex "MMX_PABSDrr64")>;
+def: InstRW<[SBWriteResGroup5], (instregex "MMX_PABSWrr64")>;
+def: InstRW<[SBWriteResGroup5], (instregex "MMX_PADDQirr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "MMX_PALIGNR64irr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "MMX_PSHUFBrr64")>;
+def: InstRW<[SBWriteResGroup5], (instregex "MMX_PSIGNBrr64")>;
+def: InstRW<[SBWriteResGroup5], (instregex "MMX_PSIGNDrr64")>;
+def: InstRW<[SBWriteResGroup5], (instregex "MMX_PSIGNWrr64")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PABSBrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PABSDrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PABSWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PACKSSDWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PACKSSWBrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PACKUSDWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PACKUSWBrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PADDBrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PADDDrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PADDQrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PADDSBrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PADDSWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PADDUSBrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PADDUSWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PADDWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PALIGNRrri")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PAVGBrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PAVGWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PBLENDWrri")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PCMPEQBrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PCMPEQDrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PCMPEQQrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PCMPEQWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PCMPGTBrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PCMPGTDrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PCMPGTWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PMAXSBrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PMAXSDrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PMAXSWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PMAXUBrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PMAXUDrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PMAXUWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PMINSBrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PMINSDrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PMINSWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PMINUBrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PMINUDrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PMINUWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PMOVSXBDrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PMOVSXBQrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PMOVSXBWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PMOVSXDQrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PMOVSXWDrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PMOVSXWQrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PMOVZXBDrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PMOVZXBQrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PMOVZXBWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PMOVZXDQrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PMOVZXWDrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PMOVZXWQrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PSHUFBrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PSHUFDri")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PSHUFHWri")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PSHUFLWri")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PSIGNBrr128")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PSIGNDrr128")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PSIGNWrr128")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PSLLDQri")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PSRLDQri")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PSUBBrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PSUBDrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PSUBQrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PSUBSBrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PSUBSWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PSUBUSBrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PSUBUSWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PSUBWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PUNPCKHBWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PUNPCKHDQrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PUNPCKHQDQrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PUNPCKHWDrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PUNPCKLBWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PUNPCKLDQrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PUNPCKLQDQrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PUNPCKLWDrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VMASKMOVPSYrm")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPABSBrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPABSDrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPABSWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPACKSSDWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPACKSSWBrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPACKUSDWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPACKUSWBrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPADDBrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPADDDrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPADDQrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPADDUSBrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPADDUSWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPALIGNRrri")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPAVGBrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPAVGWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPBLENDWrri")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPCMPEQBrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPCMPEQDrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPCMPEQWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPCMPGTBrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPCMPGTDrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPCMPGTWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPMAXSBrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPMAXSDrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPMAXSWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPMAXUBrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPMAXUDrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPMAXUWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPMINSBrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPMINSDrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPMINSWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPMINUBrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPMINUDrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPMINUWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPMOVSXBDrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPMOVSXBQrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPMOVSXBWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPMOVSXDQrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPMOVSXWDrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPMOVSXWQrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPMOVZXBDrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPMOVZXBQrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPMOVZXBWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPMOVZXDQrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPMOVZXWDrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPMOVZXWQrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPSHUFBrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPSHUFDri")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPSHUFLWri")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPSIGNBrr128")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPSIGNDrr128")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPSIGNWrr128")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPSLLDQri")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPSRLDQri")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPSUBBrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPSUBDrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPSUBQrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPSUBSBrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPSUBSWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPSUBUSBrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPSUBUSWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPSUBWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPUNPCKHBWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPUNPCKHDQrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPUNPCKHWDrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPUNPCKLDQrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPUNPCKLQDQrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPUNPCKLWDrr")>;
+
+def SBWriteResGroup6 : SchedWriteRes<[SBPort015]> {
+  let Latency = 1;
+  let NumMicroOps = 1;
+  let ResourceCycles = [1];
+}
+def: InstRW<[SBWriteResGroup6], (instregex "ADD32ri8")>;
+def: InstRW<[SBWriteResGroup6], (instregex "ADD32rr")>;
+def: InstRW<[SBWriteResGroup6], (instregex "ADD8ri")>;
+def: InstRW<[SBWriteResGroup6], (instregex "ADD8rr")>;
+def: InstRW<[SBWriteResGroup6], (instregex "AND32ri")>;
+def: InstRW<[SBWriteResGroup6], (instregex "AND64ri8")>;
+def: InstRW<[SBWriteResGroup6], (instregex "AND64rr")>;
+def: InstRW<[SBWriteResGroup6], (instregex "AND8ri")>;
+def: InstRW<[SBWriteResGroup6], (instregex "AND8rr")>;
+def: InstRW<[SBWriteResGroup6], (instregex "CBW")>;
+def: InstRW<[SBWriteResGroup6], (instregex "CMC")>;
+def: InstRW<[SBWriteResGroup6], (instregex "CMP16ri8")>;
+def: InstRW<[SBWriteResGroup6], (instregex "CMP32i32")>;
+def: InstRW<[SBWriteResGroup6], (instregex "CMP64rr")>;
+def: InstRW<[SBWriteResGroup6], (instregex "CMP8ri")>;
+def: InstRW<[SBWriteResGroup6], (instregex "CMP8rr")>;
+def: InstRW<[SBWriteResGroup6], (instregex "CWDE")>;
+def: InstRW<[SBWriteResGroup6], (instregex "DEC64r")>;
+def: InstRW<[SBWriteResGroup6], (instregex "DEC8r")>;
+def: InstRW<[SBWriteResGroup6], (instregex "INC64r")>;
+def: InstRW<[SBWriteResGroup6], (instregex "INC8r")>;
+def: InstRW<[SBWriteResGroup6], (instregex "MMX_MOVD64from64rr")>;
+def: InstRW<[SBWriteResGroup6], (instregex "MMX_MOVQ2DQrr")>;
+def: InstRW<[SBWriteResGroup6], (instregex "MOV32rr")>;
+def: InstRW<[SBWriteResGroup6], (instregex "MOV8ri")>;
+def: InstRW<[SBWriteResGroup6], (instregex "MOV8rr")>;
+def: InstRW<[SBWriteResGroup6], (instregex "MOVDQArr")>;
+def: InstRW<[SBWriteResGroup6], (instregex "MOVDQUrr")>;
+def: InstRW<[SBWriteResGroup6], (instregex "MOVPQI2QIrr")>;
+def: InstRW<[SBWriteResGroup6], (instregex "MOVSX32rr16")>;
+def: InstRW<[SBWriteResGroup6], (instregex "MOVSX32rr8")>;
+def: InstRW<[SBWriteResGroup6], (instregex "MOVZX32rr16")>;
+def: InstRW<[SBWriteResGroup6], (instregex "MOVZX32rr8")>;
+def: InstRW<[SBWriteResGroup6], (instregex "NEG64r")>;
+def: InstRW<[SBWriteResGroup6], (instregex "NEG8r")>;
+def: InstRW<[SBWriteResGroup6], (instregex "NOT64r")>;
+def: InstRW<[SBWriteResGroup6], (instregex "NOT8r")>;
+def: InstRW<[SBWriteResGroup6], (instregex "OR64ri8")>;
+def: InstRW<[SBWriteResGroup6], (instregex "OR64rr")>;
+def: InstRW<[SBWriteResGroup6], (instregex "OR8ri")>;
+def: InstRW<[SBWriteResGroup6], (instregex "OR8rr")>;
+def: InstRW<[SBWriteResGroup6], (instregex "PANDNrr")>;
+def: InstRW<[SBWriteResGroup6], (instregex "PANDrr")>;
+def: InstRW<[SBWriteResGroup6], (instregex "PORrr")>;
+def: InstRW<[SBWriteResGroup6], (instregex "PXORrr")>;
+def: InstRW<[SBWriteResGroup6], (instregex "STC")>;
+def: InstRW<[SBWriteResGroup6], (instregex "SUB64ri8")>;
+def: InstRW<[SBWriteResGroup6], (instregex "SUB64rr")>;
+def: InstRW<[SBWriteResGroup6], (instregex "SUB8ri")>;
+def: InstRW<[SBWriteResGroup6], (instregex "SUB8rr")>;
+def: InstRW<[SBWriteResGroup6], (instregex "TEST64rr")>;
+def: InstRW<[SBWriteResGroup6], (instregex "TEST8ri")>;
+def: InstRW<[SBWriteResGroup6], (instregex "TEST8rr")>;
+def: InstRW<[SBWriteResGroup6], (instregex "VMOVPQI2QIrr")>;
+def: InstRW<[SBWriteResGroup6], (instregex "VMOVZPQILo2PQIrr")>;
+def: InstRW<[SBWriteResGroup6], (instregex "VPANDNrr")>;
+def: InstRW<[SBWriteResGroup6], (instregex "VPANDrr")>;
+def: InstRW<[SBWriteResGroup6], (instregex "VPORrr")>;
+def: InstRW<[SBWriteResGroup6], (instregex "VPXORrr")>;
+def: InstRW<[SBWriteResGroup6], (instregex "XOR32rr")>;
+def: InstRW<[SBWriteResGroup6], (instregex "XOR64ri8")>;
+def: InstRW<[SBWriteResGroup6], (instregex "XOR8ri")>;
+def: InstRW<[SBWriteResGroup6], (instregex "XOR8rr")>;
+
+def SBWriteResGroup7 : SchedWriteRes<[SBPort0]> {
+  let Latency = 2;
+  let NumMicroOps = 1;
+  let ResourceCycles = [1];
+}
+def: InstRW<[SBWriteResGroup7], (instregex "MOVMSKPDrr")>;
+def: InstRW<[SBWriteResGroup7], (instregex "MOVMSKPSrr")>;
+def: InstRW<[SBWriteResGroup7], (instregex "MOVPDI2DIrr")>;
+def: InstRW<[SBWriteResGroup7], (instregex "MOVPQIto64rr")>;
+def: InstRW<[SBWriteResGroup7], (instregex "PMOVMSKBrr")>;
+def: InstRW<[SBWriteResGroup7], (instregex "VMOVMSKPDYrr")>;
+def: InstRW<[SBWriteResGroup7], (instregex "VMOVMSKPDrr")>;
+def: InstRW<[SBWriteResGroup7], (instregex "VMOVMSKPSrr")>;
+def: InstRW<[SBWriteResGroup7], (instregex "VMOVPDI2DIrr")>;
+def: InstRW<[SBWriteResGroup7], (instregex "VMOVPQIto64rr")>;
+
+def SBWriteResGroup9 : SchedWriteRes<[SBPort0]> {
+  let Latency = 2;
+  let NumMicroOps = 2;
+  let ResourceCycles = [2];
+}
+def: InstRW<[SBWriteResGroup9], (instregex "BLENDVPDrr0")>;
+def: InstRW<[SBWriteResGroup9], (instregex "BLENDVPSrr0")>;
+def: InstRW<[SBWriteResGroup9], (instregex "ROL32ri")>;
+def: InstRW<[SBWriteResGroup9], (instregex "ROL8ri")>;
+def: InstRW<[SBWriteResGroup9], (instregex "ROR32ri")>;
+def: InstRW<[SBWriteResGroup9], (instregex "ROR8ri")>;
+def: InstRW<[SBWriteResGroup9], (instregex "SETAr")>;
+def: InstRW<[SBWriteResGroup9], (instregex "SETBEr")>;
+def: InstRW<[SBWriteResGroup9], (instregex "VBLENDVPDYrr")>;
+def: InstRW<[SBWriteResGroup9], (instregex "VBLENDVPDrr")>;
+def: InstRW<[SBWriteResGroup9], (instregex "VBLENDVPSYrr")>;
+def: InstRW<[SBWriteResGroup9], (instregex "VBLENDVPSrr")>;
+
+def SBWriteResGroup10 : SchedWriteRes<[SBPort15]> {
+  let Latency = 2;
+  let NumMicroOps = 2;
+  let ResourceCycles = [2];
+}
+def: InstRW<[SBWriteResGroup10], (instregex "VPBLENDVBrr")>;
+
+def SBWriteResGroup11 : SchedWriteRes<[SBPort015]> {
+  let Latency = 2;
+  let NumMicroOps = 2;
+  let ResourceCycles = [2];
+}
+def: InstRW<[SBWriteResGroup11], (instregex "SCASB")>;
+def: InstRW<[SBWriteResGroup11], (instregex "SCASL")>;
+def: InstRW<[SBWriteResGroup11], (instregex "SCASQ")>;
+def: InstRW<[SBWriteResGroup11], (instregex "SCASW")>;
+
+def SBWriteResGroup12 : SchedWriteRes<[SBPort0,SBPort1]> {
+  let Latency = 2;
+  let NumMicroOps = 2;
+  let ResourceCycles = [1,1];
+}
+def: InstRW<[SBWriteResGroup12], (instregex "COMISDrr")>;
+def: InstRW<[SBWriteResGroup12], (instregex "COMISSrr")>;
+def: InstRW<[SBWriteResGroup12], (instregex "UCOMISDrr")>;
+def: InstRW<[SBWriteResGroup12], (instregex "UCOMISSrr")>;
+def: InstRW<[SBWriteResGroup12], (instregex "VCOMISDrr")>;
+def: InstRW<[SBWriteResGroup12], (instregex "VCOMISSrr")>;
+def: InstRW<[SBWriteResGroup12], (instregex "VUCOMISDrr")>;
+def: InstRW<[SBWriteResGroup12], (instregex "VUCOMISSrr")>;
+
+def SBWriteResGroup13 : SchedWriteRes<[SBPort0,SBPort5]> {
+  let Latency = 2;
+  let NumMicroOps = 2;
+  let ResourceCycles = [1,1];
+}
+def: InstRW<[SBWriteResGroup13], (instregex "CVTPS2PDrr")>;
+def: InstRW<[SBWriteResGroup13], (instregex "PTESTrr")>;
+def: InstRW<[SBWriteResGroup13], (instregex "VCVTPS2PDYrr")>;
+def: InstRW<[SBWriteResGroup13], (instregex "VCVTPS2PDrr")>;
+def: InstRW<[SBWriteResGroup13], (instregex "VPTESTYrr")>;
+def: InstRW<[SBWriteResGroup13], (instregex "VPTESTrr")>;
+
+def SBWriteResGroup14 : SchedWriteRes<[SBPort0,SBPort15]> {
+  let Latency = 2;
+  let NumMicroOps = 2;
+  let ResourceCycles = [1,1];
+}
+def: InstRW<[SBWriteResGroup14], (instregex "PSLLDrr")>;
+def: InstRW<[SBWriteResGroup14], (instregex "PSLLQrr")>;
+def: InstRW<[SBWriteResGroup14], (instregex "PSLLWrr")>;
+def: InstRW<[SBWriteResGroup14], (instregex "PSRADrr")>;
+def: InstRW<[SBWriteResGroup14], (instregex "PSRAWrr")>;
+def: InstRW<[SBWriteResGroup14], (instregex "PSRLDrr")>;
+def: InstRW<[SBWriteResGroup14], (instregex "PSRLQrr")>;
+def: InstRW<[SBWriteResGroup14], (instregex "PSRLWrr")>;
+def: InstRW<[SBWriteResGroup14], (instregex "VPSRADrr")>;
+def: InstRW<[SBWriteResGroup14], (instregex "VPSRAWrr")>;
+def: InstRW<[SBWriteResGroup14], (instregex "VPSRLDrr")>;
+def: InstRW<[SBWriteResGroup14], (instregex "VPSRLQrr")>;
+def: InstRW<[SBWriteResGroup14], (instregex "VPSRLWrr")>;
+
+def SBWriteResGroup15 : SchedWriteRes<[SBPort0,SBPort015]> {
+  let Latency = 2;
+  let NumMicroOps = 2;
+  let ResourceCycles = [1,1];
+}
+def: InstRW<[SBWriteResGroup15], (instregex "FNSTSW16r")>;
+
+def SBWriteResGroup16 : SchedWriteRes<[SBPort1,SBPort0]> {
+  let Latency = 2;
+  let NumMicroOps = 2;
+  let ResourceCycles = [1,1];
+}
+def: InstRW<[SBWriteResGroup16], (instregex "BSWAP32r")>;
+
+def SBWriteResGroup17 : SchedWriteRes<[SBPort5,SBPort15]> {
+  let Latency = 2;
+  let NumMicroOps = 2;
+  let ResourceCycles = [1,1];
+}
+def: InstRW<[SBWriteResGroup17], (instregex "PINSRBrr")>;
+def: InstRW<[SBWriteResGroup17], (instregex "PINSRDrr")>;
+def: InstRW<[SBWriteResGroup17], (instregex "PINSRQrr")>;
+def: InstRW<[SBWriteResGroup17], (instregex "PINSRWrri")>;
+def: InstRW<[SBWriteResGroup17], (instregex "VPINSRBrr")>;
+def: InstRW<[SBWriteResGroup17], (instregex "VPINSRDrr")>;
+def: InstRW<[SBWriteResGroup17], (instregex "VPINSRQrr")>;
+def: InstRW<[SBWriteResGroup17], (instregex "VPINSRWrri")>;
+
+def SBWriteResGroup18 : SchedWriteRes<[SBPort5,SBPort015]> {
+  let Latency = 2;
+  let NumMicroOps = 2;
+  let ResourceCycles = [1,1];
+}
+def: InstRW<[SBWriteResGroup18], (instregex "MMX_MOVDQ2Qrr")>;
+
+def SBWriteResGroup19 : SchedWriteRes<[SBPort0,SBPort015]> {
+  let Latency = 2;
+  let NumMicroOps = 2;
+  let ResourceCycles = [1,1];
+}
+def: InstRW<[SBWriteResGroup19], (instregex "ADC64ri8")>;
+def: InstRW<[SBWriteResGroup19], (instregex "ADC64rr")>;
+def: InstRW<[SBWriteResGroup19], (instregex "ADC8ri")>;
+def: InstRW<[SBWriteResGroup19], (instregex "ADC8rr")>;
+def: InstRW<[SBWriteResGroup19], (instregex "CMOVAE32rr")>;
+def: InstRW<[SBWriteResGroup19], (instregex "CMOVB32rr")>;
+def: InstRW<[SBWriteResGroup19], (instregex "CMOVE32rr")>;
+def: InstRW<[SBWriteResGroup19], (instregex "CMOVG32rr")>;
+def: InstRW<[SBWriteResGroup19], (instregex "CMOVGE32rr")>;
+def: InstRW<[SBWriteResGroup19], (instregex "CMOVL32rr")>;
+def: InstRW<[SBWriteResGroup19], (instregex "CMOVLE32rr")>;
+def: InstRW<[SBWriteResGroup19], (instregex "CMOVNE32rr")>;
+def: InstRW<[SBWriteResGroup19], (instregex "CMOVNO32rr")>;
+def: InstRW<[SBWriteResGroup19], (instregex "CMOVNP32rr")>;
+def: InstRW<[SBWriteResGroup19], (instregex "CMOVNS32rr")>;
+def: InstRW<[SBWriteResGroup19], (instregex "CMOVO32rr")>;
+def: InstRW<[SBWriteResGroup19], (instregex "CMOVP32rr")>;
+def: InstRW<[SBWriteResGroup19], (instregex "CMOVS32rr")>;
+def: InstRW<[SBWriteResGroup19], (instregex "SBB32rr")>;
+def: InstRW<[SBWriteResGroup19], (instregex "SBB64ri8")>;
+def: InstRW<[SBWriteResGroup19], (instregex "SBB8ri")>;
+def: InstRW<[SBWriteResGroup19], (instregex "SBB8rr")>;
+def: InstRW<[SBWriteResGroup19], (instregex "SHLD32rri8")>;
+def: InstRW<[SBWriteResGroup19], (instregex "SHRD32rri8")>;
+
+def SBWriteResGroup20 : SchedWriteRes<[SBPort0]> {
+  let Latency = 3;
+  let NumMicroOps = 1;
+  let ResourceCycles = [1];
+}
+def: InstRW<[SBWriteResGroup20], (instregex "MMX_PMADDUBSWrr64")>;
+def: InstRW<[SBWriteResGroup20], (instregex "MMX_PMULHRSWrr64")>;
+def: InstRW<[SBWriteResGroup20], (instregex "MMX_PMULUDQirr")>;
+def: InstRW<[SBWriteResGroup20], (instregex "PMADDUBSWrr")>;
+def: InstRW<[SBWriteResGroup20], (instregex "PMADDWDrr")>;
+def: InstRW<[SBWriteResGroup20], (instregex "PMULDQrr")>;
+def: InstRW<[SBWriteResGroup20], (instregex "PMULHRSWrr")>;
+def: InstRW<[SBWriteResGroup20], (instregex "PMULHUWrr")>;
+def: InstRW<[SBWriteResGroup20], (instregex "PMULHWrr")>;
+def: InstRW<[SBWriteResGroup20], (instregex "PMULLDrr")>;
+def: InstRW<[SBWriteResGroup20], (instregex "PMULLWrr")>;
+def: InstRW<[SBWriteResGroup20], (instregex "PMULUDQrr")>;
+def: InstRW<[SBWriteResGroup20], (instregex "PSADBWrr")>;
+def: InstRW<[SBWriteResGroup20], (instregex "VMOVMSKPSYrr")>;
+def: InstRW<[SBWriteResGroup20], (instregex "VPMADDUBSWrr")>;
+def: InstRW<[SBWriteResGroup20], (instregex "VPMADDWDrr")>;
+def: InstRW<[SBWriteResGroup20], (instregex "VPMULDQrr")>;
+def: InstRW<[SBWriteResGroup20], (instregex "VPMULHRSWrr")>;
+def: InstRW<[SBWriteResGroup20], (instregex "VPMULHWrr")>;
+def: InstRW<[SBWriteResGroup20], (instregex "VPMULLDrr")>;
+def: InstRW<[SBWriteResGroup20], (instregex "VPMULLWrr")>;
+def: InstRW<[SBWriteResGroup20], (instregex "VPSADBWrr")>;
+
+def SBWriteResGroup21 : SchedWriteRes<[SBPort1]> {
+  let Latency = 3;
+  let NumMicroOps = 1;
+  let ResourceCycles = [1];
+}
+def: InstRW<[SBWriteResGroup21], (instregex "ADDPDrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "ADDPSrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "ADDSDrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "ADDSSrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "ADDSUBPDrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "ADDSUBPSrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "ADD_FPrST0")>;
+def: InstRW<[SBWriteResGroup21], (instregex "ADD_FST0r")>;
+def: InstRW<[SBWriteResGroup21], (instregex "ADD_FrST0")>;
+def: InstRW<[SBWriteResGroup21], (instregex "BSF32rr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "BSR32rr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "CMPPDrri")>;
+def: InstRW<[SBWriteResGroup21], (instregex "CMPPSrri")>;
+def: InstRW<[SBWriteResGroup21], (instregex "CMPSDrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "CMPSSrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "CRC32r32r32")>;
+def: InstRW<[SBWriteResGroup21], (instregex "CRC32r32r8")>;
+def: InstRW<[SBWriteResGroup21], (instregex "CVTDQ2PSrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "CVTPS2DQrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "CVTTPS2DQrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "MAXPDrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "MAXPSrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "MAXSDrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "MAXSSrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "MINPDrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "MINPSrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "MINSDrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "MINSSrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "MMX_CVTPI2PSirr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "MMX_CVTPS2PIirr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "MMX_CVTTPS2PIirr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "MUL8r")>;
+def: InstRW<[SBWriteResGroup21], (instregex "POPCNT32rr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "ROUNDPDr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "ROUNDPSr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "ROUNDSDr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "ROUNDSSr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "SUBPDrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "SUBPSrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "SUBR_FPrST0")>;
+def: InstRW<[SBWriteResGroup21], (instregex "SUBR_FST0r")>;
+def: InstRW<[SBWriteResGroup21], (instregex "SUBR_FrST0")>;
+def: InstRW<[SBWriteResGroup21], (instregex "SUBSDrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "SUBSSrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "SUB_FPrST0")>;
+def: InstRW<[SBWriteResGroup21], (instregex "SUB_FST0r")>;
+def: InstRW<[SBWriteResGroup21], (instregex "SUB_FrST0")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VADDPDYrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VADDPDrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VADDPSYrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VADDPSrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VADDSDrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VADDSSrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VADDSUBPDYrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VADDSUBPDrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VADDSUBPSYrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VADDSUBPSrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VBROADCASTF128")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VCMPPDYrri")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VCMPPDrri")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VCMPPSYrri")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VCMPPSrri")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VCMPSDrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VCMPSSrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VCVTDQ2PSYrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VCVTDQ2PSrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VCVTPS2DQYrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VCVTPS2DQrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VCVTTPS2DQrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VMAXPDYrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VMAXPDrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VMAXPSYrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VMAXPSrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VMAXSDrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VMAXSSrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VMINPDrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VMINPSrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VMINSDrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VMINSSrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VROUNDPDr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VROUNDPSr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VROUNDSDr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VSUBPDYrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VSUBPDrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VSUBPSYrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VSUBPSrr")>;
+
+def SBWriteResGroup22 : SchedWriteRes<[SBPort0,SBPort5]> {
+  let Latency = 3;
+  let NumMicroOps = 2;
+  let ResourceCycles = [1,1];
+}
+def: InstRW<[SBWriteResGroup22], (instregex "EXTRACTPSrr")>;
+def: InstRW<[SBWriteResGroup22], (instregex "VEXTRACTPSrr")>;
+
+def SBWriteResGroup23 : SchedWriteRes<[SBPort0,SBPort15]> {
+  let Latency = 3;
+  let NumMicroOps = 2;
+  let ResourceCycles = [1,1];
+}
+def: InstRW<[SBWriteResGroup23], (instregex "PEXTRBrr")>;
+def: InstRW<[SBWriteResGroup23], (instregex "PEXTRDrr")>;
+def: InstRW<[SBWriteResGroup23], (instregex "PEXTRQrr")>;
+def: InstRW<[SBWriteResGroup23], (instregex "PEXTRWri")>;
+def: InstRW<[SBWriteResGroup23], (instregex "VPEXTRBrr")>;
+def: InstRW<[SBWriteResGroup23], (instregex "VPEXTRDrr")>;
+def: InstRW<[SBWriteResGroup23], (instregex "VPEXTRQrr")>;
+def: InstRW<[SBWriteResGroup23], (instregex "VPEXTRWri")>;
+def: InstRW<[SBWriteResGroup23], (instregex "SHL64rCL")>;
+def: InstRW<[SBWriteResGroup23], (instregex "SHL8rCL")>;
+
+def SBWriteResGroup24 : SchedWriteRes<[SBPort15]> {
+  let Latency = 3;
+  let NumMicroOps = 3;
+  let ResourceCycles = [3];
+}
+def: InstRW<[SBWriteResGroup24], (instregex "MMX_PHADDSWrr64")>;
+def: InstRW<[SBWriteResGroup24], (instregex "MMX_PHADDWrr64")>;
+def: InstRW<[SBWriteResGroup24], (instregex "MMX_PHADDrr64")>;
+def: InstRW<[SBWriteResGroup24], (instregex "MMX_PHSUBDrr64")>;
+def: InstRW<[SBWriteResGroup24], (instregex "MMX_PHSUBSWrr64")>;
+def: InstRW<[SBWriteResGroup24], (instregex "MMX_PHSUBWrr64")>;
+def: InstRW<[SBWriteResGroup24], (instregex "PHADDDrr")>;
+def: InstRW<[SBWriteResGroup24], (instregex "PHADDSWrr128")>;
+def: InstRW<[SBWriteResGroup24], (instregex "PHADDWrr")>;
+def: InstRW<[SBWriteResGroup24], (instregex "PHSUBDrr")>;
+def: InstRW<[SBWriteResGroup24], (instregex "PHSUBSWrr128")>;
+def: InstRW<[SBWriteResGroup24], (instregex "PHSUBWrr")>;
+def: InstRW<[SBWriteResGroup24], (instregex "VPHADDDrr")>;
+def: InstRW<[SBWriteResGroup24], (instregex "VPHADDSWrr128")>;
+def: InstRW<[SBWriteResGroup24], (instregex "VPHADDWrr")>;
+def: InstRW<[SBWriteResGroup24], (instregex "VPHSUBDrr")>;
+def: InstRW<[SBWriteResGroup24], (instregex "VPHSUBSWrr128")>;
+def: InstRW<[SBWriteResGroup24], (instregex "VPHSUBWrr")>;
+
+def SBWriteResGroup25 : SchedWriteRes<[SBPort015]> {
+  let Latency = 3;
+  let NumMicroOps = 3;
+  let ResourceCycles = [3];
+}
+def: InstRW<[SBWriteResGroup25], (instregex "LEAVE64")>;
+def: InstRW<[SBWriteResGroup25], (instregex "XADD32rr")>;
+def: InstRW<[SBWriteResGroup25], (instregex "XADD8rr")>;
+
+def SBWriteResGroup26 : SchedWriteRes<[SBPort0,SBPort015]> {
+  let Latency = 3;
+  let NumMicroOps = 3;
+  let ResourceCycles = [2,1];
+}
+def: InstRW<[SBWriteResGroup26], (instregex "CMOVA32rr")>;
+def: InstRW<[SBWriteResGroup26], (instregex "CMOVBE32rr")>;
+
+def SBWriteResGroup27 : SchedWriteRes<[SBPort0,SBPort1]> {
+  let Latency = 4;
+  let NumMicroOps = 2;
+  let ResourceCycles = [1,1];
+}
+def: InstRW<[SBWriteResGroup27], (instregex "MUL64r")>;
+
+def SBWriteResGroup28 : SchedWriteRes<[SBPort1,SBPort5]> {
+  let Latency = 4;
+  let NumMicroOps = 2;
+  let ResourceCycles = [1,1];
+}
+def: InstRW<[SBWriteResGroup28], (instregex "CVTDQ2PDrr")>;
+def: InstRW<[SBWriteResGroup28], (instregex "CVTPD2DQrr")>;
+def: InstRW<[SBWriteResGroup28], (instregex "CVTPD2PSrr")>;
+def: InstRW<[SBWriteResGroup28], (instregex "CVTSD2SSrr")>;
+def: InstRW<[SBWriteResGroup28], (instregex "CVTSI2SD64rr")>;
+def: InstRW<[SBWriteResGroup28], (instregex "CVTSI2SDrr")>;
+def: InstRW<[SBWriteResGroup28], (instregex "CVTTPD2DQrr")>;
+def: InstRW<[SBWriteResGroup28], (instregex "MMX_CVTPD2PIirr")>;
+def: InstRW<[SBWriteResGroup28], (instregex "MMX_CVTPI2PDirr")>;
+def: InstRW<[SBWriteResGroup28], (instregex "MMX_CVTTPD2PIirr")>;
+def: InstRW<[SBWriteResGroup28], (instregex "VCVTDQ2PDYrr")>;
+def: InstRW<[SBWriteResGroup28], (instregex "VCVTDQ2PDrr")>;
+def: InstRW<[SBWriteResGroup28], (instregex "VCVTPD2DQYrr")>;
+def: InstRW<[SBWriteResGroup28], (instregex "VCVTPD2DQrr")>;
+def: InstRW<[SBWriteResGroup28], (instregex "VCVTPD2PSYrr")>;
+def: InstRW<[SBWriteResGroup28], (instregex "VCVTPD2PSrr")>;
+def: InstRW<[SBWriteResGroup28], (instregex "VCVTSI2SD64rr")>;
+def: InstRW<[SBWriteResGroup28], (instregex "VCVTSI2SDrr")>;
+def: InstRW<[SBWriteResGroup28], (instregex "VCVTTPD2DQYrr")>;
+def: InstRW<[SBWriteResGroup28], (instregex "VCVTTPD2DQrr")>;
+
+def SBWriteResGroup29 : SchedWriteRes<[SBPort1,SBPort015]> {
+  let Latency = 4;
+  let NumMicroOps = 2;
+  let ResourceCycles = [1,1];
+}
+def: InstRW<[SBWriteResGroup29], (instregex "MOV64sr")>;
+def: InstRW<[SBWriteResGroup29], (instregex "PAUSE")>;
+
+def SBWriteResGroup30 : SchedWriteRes<[SBPort0]> {
+  let Latency = 5;
+  let NumMicroOps = 1;
+  let ResourceCycles = [1];
+}
+def: InstRW<[SBWriteResGroup30], (instregex "MULPDrr")>;
+def: InstRW<[SBWriteResGroup30], (instregex "MULPSrr")>;
+def: InstRW<[SBWriteResGroup30], (instregex "MULSDrr")>;
+def: InstRW<[SBWriteResGroup30], (instregex "MULSSrr")>;
+def: InstRW<[SBWriteResGroup30], (instregex "MUL_FPrST0")>;
+def: InstRW<[SBWriteResGroup30], (instregex "MUL_FST0r")>;
+def: InstRW<[SBWriteResGroup30], (instregex "MUL_FrST0")>;
+def: InstRW<[SBWriteResGroup30], (instregex "PCMPGTQrr")>;
+def: InstRW<[SBWriteResGroup30], (instregex "PHMINPOSUWrr128")>;
+def: InstRW<[SBWriteResGroup30], (instregex "RCPPSr")>;
+def: InstRW<[SBWriteResGroup30], (instregex "RCPSSr")>;
+def: InstRW<[SBWriteResGroup30], (instregex "RSQRTPSr")>;
+def: InstRW<[SBWriteResGroup30], (instregex "RSQRTSSr")>;
+def: InstRW<[SBWriteResGroup30], (instregex "VMULPDYrr")>;
+def: InstRW<[SBWriteResGroup30], (instregex "VMULPDrr")>;
+def: InstRW<[SBWriteResGroup30], (instregex "VMULPSYrr")>;
+def: InstRW<[SBWriteResGroup30], (instregex "VMULPSrr")>;
+def: InstRW<[SBWriteResGroup30], (instregex "VMULSDrr")>;
+def: InstRW<[SBWriteResGroup30], (instregex "VMULSSrr")>;
+def: InstRW<[SBWriteResGroup30], (instregex "VPCMPGTQrr")>;
+def: InstRW<[SBWriteResGroup30], (instregex "VPHMINPOSUWrr128")>;
+def: InstRW<[SBWriteResGroup30], (instregex "VRSQRTPSr")>;
+def: InstRW<[SBWriteResGroup30], (instregex "VRSQRTSSr")>;
+
+def SBWriteResGroup31 : SchedWriteRes<[SBPort23]> {
+  let Latency = 5;
+  let NumMicroOps = 1;
+  let ResourceCycles = [1];
+}
+def: InstRW<[SBWriteResGroup31], (instregex "MOV32rm")>;
+def: InstRW<[SBWriteResGroup31], (instregex "MOV8rm")>;
+def: InstRW<[SBWriteResGroup31], (instregex "MOVSX32rm16")>;
+def: InstRW<[SBWriteResGroup31], (instregex "MOVSX32rm8")>;
+def: InstRW<[SBWriteResGroup31], (instregex "MOVZX32rm16")>;
+def: InstRW<[SBWriteResGroup31], (instregex "MOVZX32rm8")>;
+def: InstRW<[SBWriteResGroup31], (instregex "PREFETCH")>;
+
+def SBWriteResGroup32 : SchedWriteRes<[SBPort0,SBPort1]> {
+  let Latency = 5;
+  let NumMicroOps = 2;
+  let ResourceCycles = [1,1];
+}
+def: InstRW<[SBWriteResGroup32], (instregex "CVTSD2SI64rr")>;
+def: InstRW<[SBWriteResGroup32], (instregex "CVTSD2SIrr")>;
+def: InstRW<[SBWriteResGroup32], (instregex "CVTSS2SI64rr")>;
+def: InstRW<[SBWriteResGroup32], (instregex "CVTSS2SIrr")>;
+def: InstRW<[SBWriteResGroup32], (instregex "CVTTSD2SI64rr")>;
+def: InstRW<[SBWriteResGroup32], (instregex "CVTTSD2SIrr")>;
+def: InstRW<[SBWriteResGroup32], (instregex "CVTTSS2SI64rr")>;
+def: InstRW<[SBWriteResGroup32], (instregex "CVTTSS2SIrr")>;
+def: InstRW<[SBWriteResGroup32], (instregex "VCVTSD2SI64rr")>;
+def: InstRW<[SBWriteResGroup32], (instregex "VCVTSS2SI64rr")>;
+def: InstRW<[SBWriteResGroup32], (instregex "VCVTSS2SIrr")>;
+def: InstRW<[SBWriteResGroup32], (instregex "VCVTTSD2SI64rr")>;
+def: InstRW<[SBWriteResGroup32], (instregex "VCVTTSD2SIrr")>;
+def: InstRW<[SBWriteResGroup32], (instregex "VCVTTSS2SI64rr")>;
+def: InstRW<[SBWriteResGroup32], (instregex "VCVTTSS2SIrr")>;
+
+def SBWriteResGroup33 : SchedWriteRes<[SBPort4,SBPort23]> {
+  let Latency = 5;
+  let NumMicroOps = 2;
+  let ResourceCycles = [1,1];
+}
+def: InstRW<[SBWriteResGroup33], (instregex "MOV64mr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "MOV8mr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "MOVAPDmr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "MOVAPSmr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "MOVDQAmr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "MOVDQUmr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "MOVHPDmr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "MOVHPSmr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "MOVLPDmr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "MOVLPSmr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "MOVNTDQmr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "MOVNTI_64mr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "MOVNTImr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "MOVNTPDmr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "MOVNTPSmr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "MOVPDI2DImr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "MOVPQI2QImr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "MOVPQIto64mr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "MOVSSmr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "MOVUPDmr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "MOVUPSmr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "PUSH64i8")>;
+def: InstRW<[SBWriteResGroup33], (instregex "PUSH64r")>;
+def: InstRW<[SBWriteResGroup33], (instregex "VEXTRACTF128mr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "VMOVAPDYmr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "VMOVAPDmr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "VMOVAPSYmr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "VMOVAPSmr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "VMOVDQAYmr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "VMOVDQAmr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "VMOVDQUYmr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "VMOVDQUmr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "VMOVHPDmr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "VMOVHPSmr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "VMOVLPDmr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "VMOVLPSmr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "VMOVNTDQYmr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "VMOVNTDQmr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "VMOVNTPDYmr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "VMOVNTPDmr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "VMOVNTPSYmr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "VMOVNTPSmr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "VMOVPDI2DImr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "VMOVPQI2QImr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "VMOVPQIto64mr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "VMOVSDmr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "VMOVSSmr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "VMOVUPDYmr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "VMOVUPDmr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "VMOVUPSYmr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "VMOVUPSmr")>;
+
+def SBWriteResGroup34 : SchedWriteRes<[SBPort0,SBPort15]> {
+  let Latency = 5;
+  let NumMicroOps = 3;
+  let ResourceCycles = [1,2];
+}
+def: InstRW<[SBWriteResGroup34], (instregex "MPSADBWrri")>;
+def: InstRW<[SBWriteResGroup34], (instregex "VMPSADBWrri")>;
+
+def SBWriteResGroup35 : SchedWriteRes<[SBPort1,SBPort5]> {
+  let Latency = 5;
+  let NumMicroOps = 3;
+  let ResourceCycles = [1,2];
+}
+def: InstRW<[SBWriteResGroup35], (instregex "CLI")>;
+def: InstRW<[SBWriteResGroup35], (instregex "CVTSI2SS64rr")>;
+def: InstRW<[SBWriteResGroup35], (instregex "CVTSI2SSrr")>;
+def: InstRW<[SBWriteResGroup35], (instregex "HADDPDrr")>;
+def: InstRW<[SBWriteResGroup35], (instregex "HADDPSrr")>;
+def: InstRW<[SBWriteResGroup35], (instregex "HSUBPDrr")>;
+def: InstRW<[SBWriteResGroup35], (instregex "HSUBPSrr")>;
+def: InstRW<[SBWriteResGroup35], (instregex "VCVTSI2SS64rr")>;
+def: InstRW<[SBWriteResGroup35], (instregex "VCVTSI2SSrr")>;
+def: InstRW<[SBWriteResGroup35], (instregex "VHADDPDrr")>;
+def: InstRW<[SBWriteResGroup35], (instregex "VHADDPSYrr")>;
+def: InstRW<[SBWriteResGroup35], (instregex "VHADDPSrr")>;
+def: InstRW<[SBWriteResGroup35], (instregex "VHSUBPDYrr")>;
+def: InstRW<[SBWriteResGroup35], (instregex "VHSUBPDrr")>;
+def: InstRW<[SBWriteResGroup35], (instregex "VHSUBPSYrr")>;
+def: InstRW<[SBWriteResGroup35], (instregex "VHSUBPSrr")>;
+
+def SBWriteResGroup36 : SchedWriteRes<[SBPort4,SBPort5,SBPort23]> {
+  let Latency = 5;
+  let NumMicroOps = 3;
+  let ResourceCycles = [1,1,1];
+}
+def: InstRW<[SBWriteResGroup36], (instregex "CALL64r")>;
+def: InstRW<[SBWriteResGroup36], (instregex "EXTRACTPSmr")>;
+def: InstRW<[SBWriteResGroup36], (instregex "VEXTRACTPSmr")>;
+
+def SBWriteResGroup37 : SchedWriteRes<[SBPort4,SBPort01,SBPort23]> {
+  let Latency = 5;
+  let NumMicroOps = 3;
+  let ResourceCycles = [1,1,1];
+}
+def: InstRW<[SBWriteResGroup37], (instregex "VMASKMOVPDYrm")>;
+def: InstRW<[SBWriteResGroup37], (instregex "VMASKMOVPDmr")>;
+def: InstRW<[SBWriteResGroup37], (instregex "VMASKMOVPSmr")>;
+
+def SBWriteResGroup38 : SchedWriteRes<[SBPort4,SBPort23,SBPort0]> {
+  let Latency = 5;
+  let NumMicroOps = 3;
+  let ResourceCycles = [1,1,1];
+}
+def: InstRW<[SBWriteResGroup38], (instregex "SETAEm")>;
+def: InstRW<[SBWriteResGroup38], (instregex "SETBm")>;
+def: InstRW<[SBWriteResGroup38], (instregex "SETEm")>;
+def: InstRW<[SBWriteResGroup38], (instregex "SETGEm")>;
+def: InstRW<[SBWriteResGroup38], (instregex "SETGm")>;
+def: InstRW<[SBWriteResGroup38], (instregex "SETLEm")>;
+def: InstRW<[SBWriteResGroup38], (instregex "SETLm")>;
+def: InstRW<[SBWriteResGroup38], (instregex "SETNEm")>;
+def: InstRW<[SBWriteResGroup38], (instregex "SETNOm")>;
+def: InstRW<[SBWriteResGroup38], (instregex "SETNPm")>;
+def: InstRW<[SBWriteResGroup38], (instregex "SETNSm")>;
+def: InstRW<[SBWriteResGroup38], (instregex "SETOm")>;
+def: InstRW<[SBWriteResGroup38], (instregex "SETPm")>;
+def: InstRW<[SBWriteResGroup38], (instregex "SETSm")>;
+
+def SBWriteResGroup39 : SchedWriteRes<[SBPort4,SBPort23,SBPort15]> {
+  let Latency = 5;
+  let NumMicroOps = 3;
+  let ResourceCycles = [1,1,1];
+}
+def: InstRW<[SBWriteResGroup39], (instregex "PEXTRBmr")>;
+def: InstRW<[SBWriteResGroup39], (instregex "VPEXTRBmr")>;
+def: InstRW<[SBWriteResGroup39], (instregex "VPEXTRDmr")>;
+def: InstRW<[SBWriteResGroup39], (instregex "VPEXTRWmr")>;
+
+def SBWriteResGroup40 : SchedWriteRes<[SBPort4,SBPort23,SBPort015]> {
+  let Latency = 5;
+  let NumMicroOps = 3;
+  let ResourceCycles = [1,1,1];
+}
+def: InstRW<[SBWriteResGroup40], (instregex "MOV8mi")>;
+def: InstRW<[SBWriteResGroup40], (instregex "STOSB")>;
+def: InstRW<[SBWriteResGroup40], (instregex "STOSL")>;
+def: InstRW<[SBWriteResGroup40], (instregex "STOSQ")>;
+def: InstRW<[SBWriteResGroup40], (instregex "STOSW")>;
+
+def SBWriteResGroup41 : SchedWriteRes<[SBPort5,SBPort015]> {
+  let Latency = 5;
+  let NumMicroOps = 4;
+  let ResourceCycles = [1,3];
+}
+def: InstRW<[SBWriteResGroup41], (instregex "FNINIT")>;
+
+def SBWriteResGroup42 : SchedWriteRes<[SBPort0,SBPort015]> {
+  let Latency = 5;
+  let NumMicroOps = 4;
+  let ResourceCycles = [1,3];
+}
+def: InstRW<[SBWriteResGroup42], (instregex "CMPXCHG32rr")>;
+def: InstRW<[SBWriteResGroup42], (instregex "CMPXCHG8rr")>;
+
+def SBWriteResGroup43 : SchedWriteRes<[SBPort4,SBPort23,SBPort0]> {
+  let Latency = 5;
+  let NumMicroOps = 4;
+  let ResourceCycles = [1,1,2];
+}
+def: InstRW<[SBWriteResGroup43], (instregex "SETAm")>;
+def: InstRW<[SBWriteResGroup43], (instregex "SETBEm")>;
+
+def SBWriteResGroup44 : SchedWriteRes<[SBPort0,SBPort4,SBPort5,SBPort23]> {
+  let Latency = 5;
+  let NumMicroOps = 4;
+  let ResourceCycles = [1,1,1,1];
+}
+def: InstRW<[SBWriteResGroup44], (instregex "LDMXCSR")>;
+def: InstRW<[SBWriteResGroup44], (instregex "STMXCSR")>;
+def: InstRW<[SBWriteResGroup44], (instregex "VLDMXCSR")>;
+def: InstRW<[SBWriteResGroup44], (instregex "VSTMXCSR")>;
+
+def SBWriteResGroup45 : SchedWriteRes<[SBPort0,SBPort4,SBPort23,SBPort15]> {
+  let Latency = 5;
+  let NumMicroOps = 4;
+  let ResourceCycles = [1,1,1,1];
+}
+def: InstRW<[SBWriteResGroup45], (instregex "PEXTRDmr")>;
+def: InstRW<[SBWriteResGroup45], (instregex "PEXTRQmr")>;
+def: InstRW<[SBWriteResGroup45], (instregex "VPEXTRQmr")>;
+def: InstRW<[SBWriteResGroup45], (instregex "PUSHF16")>;
+def: InstRW<[SBWriteResGroup45], (instregex "PUSHF64")>;
+
+def SBWriteResGroup46 : SchedWriteRes<[SBPort4,SBPort5,SBPort01,SBPort23]> {
+  let Latency = 5;
+  let NumMicroOps = 4;
+  let ResourceCycles = [1,1,1,1];
+}
+def: InstRW<[SBWriteResGroup46], (instregex "CLFLUSH")>;
+
+def SBWriteResGroup47 : SchedWriteRes<[SBPort4,SBPort5,SBPort01,SBPort23]> {
+  let Latency = 5;
+  let NumMicroOps = 5;
+  let ResourceCycles = [1,2,1,1];
+}
+def: InstRW<[SBWriteResGroup47], (instregex "FXRSTOR")>;
+
+def SBWriteResGroup48 : SchedWriteRes<[SBPort23]> {
+  let Latency = 6;
+  let NumMicroOps = 1;
+  let ResourceCycles = [1];
+}
+def: InstRW<[SBWriteResGroup48], (instregex "LDDQUrm")>;
+def: InstRW<[SBWriteResGroup48], (instregex "MMX_MOVD64from64rm")>;
+def: InstRW<[SBWriteResGroup48], (instregex "MOV64toPQIrm")>;
+def: InstRW<[SBWriteResGroup48], (instregex "MOVAPDrm")>;
+def: InstRW<[SBWriteResGroup48], (instregex "MOVAPSrm")>;
+def: InstRW<[SBWriteResGroup48], (instregex "MOVDDUPrm")>;
+def: InstRW<[SBWriteResGroup48], (instregex "MOVDI2PDIrm")>;
+def: InstRW<[SBWriteResGroup48], (instregex "MOVDQArm")>;
+def: InstRW<[SBWriteResGroup48], (instregex "MOVDQUrm")>;
+def: InstRW<[SBWriteResGroup48], (instregex "MOVNTDQArm")>;
+def: InstRW<[SBWriteResGroup48], (instregex "MOVSHDUPrm")>;
+def: InstRW<[SBWriteResGroup48], (instregex "MOVSLDUPrm")>;
+def: InstRW<[SBWriteResGroup48], (instregex "MOVSSrm")>;
+def: InstRW<[SBWriteResGroup48], (instregex "MOVUPDrm")>;
+def: InstRW<[SBWriteResGroup48], (instregex "MOVUPSrm")>;
+def: InstRW<[SBWriteResGroup48], (instregex "POP64r")>;
+def: InstRW<[SBWriteResGroup48], (instregex "VBROADCASTSSrm")>;
+def: InstRW<[SBWriteResGroup48], (instregex "VLDDQUYrm")>;
+def: InstRW<[SBWriteResGroup48], (instregex "VLDDQUrm")>;
+def: InstRW<[SBWriteResGroup48], (instregex "VMOV64toPQIrm")>;
+def: InstRW<[SBWriteResGroup48], (instregex "VMOVAPDrm")>;
+def: InstRW<[SBWriteResGroup48], (instregex "VMOVAPSrm")>;
+def: InstRW<[SBWriteResGroup48], (instregex "VMOVDDUPrm")>;
+def: InstRW<[SBWriteResGroup48], (instregex "VMOVDI2PDIrm")>;
+def: InstRW<[SBWriteResGroup48], (instregex "VMOVDQArm")>;
+def: InstRW<[SBWriteResGroup48], (instregex "VMOVDQUrm")>;
+def: InstRW<[SBWriteResGroup48], (instregex "VMOVNTDQArm")>;
+def: InstRW<[SBWriteResGroup48], (instregex "VMOVQI2PQIrm")>;
+def: InstRW<[SBWriteResGroup48], (instregex "VMOVSDrm")>;
+def: InstRW<[SBWriteResGroup48], (instregex "VMOVSHDUPrm")>;
+def: InstRW<[SBWriteResGroup48], (instregex "VMOVSLDUPrm")>;
+def: InstRW<[SBWriteResGroup48], (instregex "VMOVSSrm")>;
+def: InstRW<[SBWriteResGroup48], (instregex "VMOVUPDrm")>;
+def: InstRW<[SBWriteResGroup48], (instregex "VMOVUPSrm")>;
+
+def SBWriteResGroup49 : SchedWriteRes<[SBPort5,SBPort23]> {
+  let Latency = 6;
+  let NumMicroOps = 2;
+  let ResourceCycles = [1,1];
+}
+def: InstRW<[SBWriteResGroup49], (instregex "JMP64m")>;
+def: InstRW<[SBWriteResGroup49], (instregex "MOV64sm")>;
+
+def SBWriteResGroup50 : SchedWriteRes<[SBPort23,SBPort0]> {
+  let Latency = 6;
+  let NumMicroOps = 2;
+  let ResourceCycles = [1,1];
+}
+def: InstRW<[SBWriteResGroup50], (instregex "BT64mi8")>;
+
+def SBWriteResGroup51 : SchedWriteRes<[SBPort23,SBPort15]> {
+  let Latency = 6;
+  let NumMicroOps = 2;
+  let ResourceCycles = [1,1];
+}
+def: InstRW<[SBWriteResGroup51], (instregex "MMX_PABSBrm64")>;
+def: InstRW<[SBWriteResGroup51], (instregex "MMX_PABSDrm64")>;
+def: InstRW<[SBWriteResGroup51], (instregex "MMX_PABSWrm64")>;
+def: InstRW<[SBWriteResGroup51], (instregex "MMX_PALIGNR64irm")>;
+def: InstRW<[SBWriteResGroup51], (instregex "MMX_PSHUFBrm64")>;
+def: InstRW<[SBWriteResGroup51], (instregex "MMX_PSIGNBrm64")>;
+def: InstRW<[SBWriteResGroup51], (instregex "MMX_PSIGNDrm64")>;
+def: InstRW<[SBWriteResGroup51], (instregex "MMX_PSIGNWrm64")>;
+
+def SBWriteResGroup52 : SchedWriteRes<[SBPort23,SBPort015]> {
+  let Latency = 6;
+  let NumMicroOps = 2;
+  let ResourceCycles = [1,1];
+}
+def: InstRW<[SBWriteResGroup52], (instregex "ADD64rm")>;
+def: InstRW<[SBWriteResGroup52], (instregex "ADD8rm")>;
+def: InstRW<[SBWriteResGroup52], (instregex "AND64rm")>;
+def: InstRW<[SBWriteResGroup52], (instregex "AND8rm")>;
+def: InstRW<[SBWriteResGroup52], (instregex "CMP64mi8")>;
+def: InstRW<[SBWriteResGroup52], (instregex "CMP64mr")>;
+def: InstRW<[SBWriteResGroup52], (instregex "CMP64rm")>;
+def: InstRW<[SBWriteResGroup52], (instregex "CMP8mi")>;
+def: InstRW<[SBWriteResGroup52], (instregex "CMP8mr")>;
+def: InstRW<[SBWriteResGroup52], (instregex "CMP8rm")>;
+def: InstRW<[SBWriteResGroup52], (instregex "LODSL")>;
+def: InstRW<[SBWriteResGroup52], (instregex "LODSQ")>;
+def: InstRW<[SBWriteResGroup52], (instregex "OR64rm")>;
+def: InstRW<[SBWriteResGroup52], (instregex "OR8rm")>;
+def: InstRW<[SBWriteResGroup52], (instregex "SUB64rm")>;
+def: InstRW<[SBWriteResGroup52], (instregex "SUB8rm")>;
+def: InstRW<[SBWriteResGroup52], (instregex "XOR64rm")>;
+def: InstRW<[SBWriteResGroup52], (instregex "XOR8rm")>;
+
+def SBWriteResGroup53 : SchedWriteRes<[SBPort4,SBPort23]> {
+  let Latency = 6;
+  let NumMicroOps = 3;
+  let ResourceCycles = [1,2];
+}
+def: InstRW<[SBWriteResGroup53], (instregex "POP64rmm")>;
+def: InstRW<[SBWriteResGroup53], (instregex "PUSH64rmm")>;
+def: InstRW<[SBWriteResGroup53], (instregex "ST_F32m")>;
+def: InstRW<[SBWriteResGroup53], (instregex "ST_F64m")>;
+def: InstRW<[SBWriteResGroup53], (instregex "ST_FP32m")>;
+def: InstRW<[SBWriteResGroup53], (instregex "ST_FP64m")>;
+def: InstRW<[SBWriteResGroup53], (instregex "ST_FP80m")>;
+
+def SBWriteResGroup54 : SchedWriteRes<[SBPort23]> {
+  let Latency = 7;
+  let NumMicroOps = 1;
+  let ResourceCycles = [1];
+}
+def: InstRW<[SBWriteResGroup54], (instregex "VBROADCASTSDYrm")>;
+def: InstRW<[SBWriteResGroup54], (instregex "VBROADCASTSSrm")>;
+def: InstRW<[SBWriteResGroup54], (instregex "VMOVAPDYrm")>;
+def: InstRW<[SBWriteResGroup54], (instregex "VMOVAPSYrm")>;
+def: InstRW<[SBWriteResGroup54], (instregex "VMOVDDUPYrm")>;
+def: InstRW<[SBWriteResGroup54], (instregex "VMOVDQAYrm")>;
+def: InstRW<[SBWriteResGroup54], (instregex "VMOVDQUYrm")>;
+def: InstRW<[SBWriteResGroup54], (instregex "VMOVSHDUPYrm")>;
+def: InstRW<[SBWriteResGroup54], (instregex "VMOVSLDUPYrm")>;
+def: InstRW<[SBWriteResGroup54], (instregex "VMOVUPDYrm")>;
+def: InstRW<[SBWriteResGroup54], (instregex "VMOVUPSYrm")>;
+
+def SBWriteResGroup55 : SchedWriteRes<[SBPort0,SBPort23]> {
+  let Latency = 7;
+  let NumMicroOps = 2;
+  let ResourceCycles = [1,1];
+}
+def: InstRW<[SBWriteResGroup55], (instregex "CVTPS2PDrm")>;
+def: InstRW<[SBWriteResGroup55], (instregex "CVTSS2SDrm")>;
+def: InstRW<[SBWriteResGroup55], (instregex "VCVTPS2PDYrm")>;
+def: InstRW<[SBWriteResGroup55], (instregex "VCVTPS2PDrm")>;
+def: InstRW<[SBWriteResGroup55], (instregex "VCVTSS2SDrm")>;
+def: InstRW<[SBWriteResGroup55], (instregex "VTESTPDrm")>;
+def: InstRW<[SBWriteResGroup55], (instregex "VTESTPSrm")>;
+
+def SBWriteResGroup56 : SchedWriteRes<[SBPort5,SBPort23]> {
+  let Latency = 7;
+  let NumMicroOps = 2;
+  let ResourceCycles = [1,1];
+}
+def: InstRW<[SBWriteResGroup56], (instregex "ANDNPDrm")>;
+def: InstRW<[SBWriteResGroup56], (instregex "ANDNPSrm")>;
+def: InstRW<[SBWriteResGroup56], (instregex "ANDPDrm")>;
+def: InstRW<[SBWriteResGroup56], (instregex "ANDPSrm")>;
+def: InstRW<[SBWriteResGroup56], (instregex "INSERTPSrm")>;
+def: InstRW<[SBWriteResGroup56], (instregex "MOVHPDrm")>;
+def: InstRW<[SBWriteResGroup56], (instregex "MOVHPSrm")>;
+def: InstRW<[SBWriteResGroup56], (instregex "MOVLPDrm")>;
+def: InstRW<[SBWriteResGroup56], (instregex "MOVLPSrm")>;
+def: InstRW<[SBWriteResGroup56], (instregex "ORPDrm")>;
+def: InstRW<[SBWriteResGroup56], (instregex "ORPSrm")>;
+def: InstRW<[SBWriteResGroup56], (instregex "SHUFPDrmi")>;
+def: InstRW<[SBWriteResGroup56], (instregex "SHUFPSrmi")>;
+def: InstRW<[SBWriteResGroup56], (instregex "UNPCKHPDrm")>;
+def: InstRW<[SBWriteResGroup56], (instregex "UNPCKHPSrm")>;
+def: InstRW<[SBWriteResGroup56], (instregex "UNPCKLPDrm")>;
+def: InstRW<[SBWriteResGroup56], (instregex "UNPCKLPSrm")>;
+def: InstRW<[SBWriteResGroup56], (instregex "VANDNPDrm")>;
+def: InstRW<[SBWriteResGroup56], (instregex "VANDNPSrm")>;
+def: InstRW<[SBWriteResGroup56], (instregex "VANDPDrm")>;
+def: InstRW<[SBWriteResGroup56], (instregex "VANDPSrm")>;
+def: InstRW<[SBWriteResGroup56], (instregex "VBROADCASTF128")>;
+def: InstRW<[SBWriteResGroup56], (instregex "VINSERTPSrm")>;
+def: InstRW<[SBWriteResGroup56], (instregex "VMOVHPDrm")>;
+def: InstRW<[SBWriteResGroup56], (instregex "VMOVHPSrm")>;
+def: InstRW<[SBWriteResGroup56], (instregex "VMOVLPDrm")>;
+def: InstRW<[SBWriteResGroup56], (instregex "VMOVLPSrm")>;
+def: InstRW<[SBWriteResGroup56], (instregex "VORPDrm")>;
+def: InstRW<[SBWriteResGroup56], (instregex "VORPSrm")>;
+def: InstRW<[SBWriteResGroup56], (instregex "VPERMILPDmi")>;
+def: InstRW<[SBWriteResGroup56], (instregex "VPERMILPDri")>;
+def: InstRW<[SBWriteResGroup56], (instregex "VPERMILPSmi")>;
+def: InstRW<[SBWriteResGroup56], (instregex "VPERMILPSri")>;
+def: InstRW<[SBWriteResGroup56], (instregex "VSHUFPDrmi")>;
+def: InstRW<[SBWriteResGroup56], (instregex "VSHUFPSrmi")>;
+def: InstRW<[SBWriteResGroup56], (instregex "VUNPCKHPDrm")>;
+def: InstRW<[SBWriteResGroup56], (instregex "VUNPCKHPSrm")>;
+def: InstRW<[SBWriteResGroup56], (instregex "VUNPCKLPDrm")>;
+def: InstRW<[SBWriteResGroup56], (instregex "VUNPCKLPSrm")>;
+def: InstRW<[SBWriteResGroup56], (instregex "VXORPDrm")>;
+def: InstRW<[SBWriteResGroup56], (instregex "VXORPSrm")>;
+def: InstRW<[SBWriteResGroup56], (instregex "XORPDrm")>;
+def: InstRW<[SBWriteResGroup56], (instregex "XORPSrm")>;
+
+def SBWriteResGroup57 : SchedWriteRes<[SBPort5,SBPort015]> {
+  let Latency = 7;
+  let NumMicroOps = 2;
+  let ResourceCycles = [1,1];
+}
+def: InstRW<[SBWriteResGroup57], (instregex "AESDECLASTrr")>;
+def: InstRW<[SBWriteResGroup57], (instregex "AESDECrr")>;
+def: InstRW<[SBWriteResGroup57], (instregex "AESENCLASTrr")>;
+def: InstRW<[SBWriteResGroup57], (instregex "AESENCrr")>;
+def: InstRW<[SBWriteResGroup57], (instregex "KANDQrr")>;
+def: InstRW<[SBWriteResGroup57], (instregex "VAESDECLASTrr")>;
+def: InstRW<[SBWriteResGroup57], (instregex "VAESDECrr")>;
+def: InstRW<[SBWriteResGroup57], (instregex "VAESENCrr")>;
+
+def SBWriteResGroup58 : SchedWriteRes<[SBPort23,SBPort0]> {
+  let Latency = 7;
+  let NumMicroOps = 2;
+  let ResourceCycles = [1,1];
+}
+def: InstRW<[SBWriteResGroup58], (instregex "BLENDPDrmi")>;
+def: InstRW<[SBWriteResGroup58], (instregex "BLENDPSrmi")>;
+def: InstRW<[SBWriteResGroup58], (instregex "VBLENDPDrmi")>;
+def: InstRW<[SBWriteResGroup58], (instregex "VBLENDPSrmi")>;
+def: InstRW<[SBWriteResGroup58], (instregex "VINSERTF128rm")>;
+
+def SBWriteResGroup59 : SchedWriteRes<[SBPort23,SBPort15]> {
+  let Latency = 7;
+  let NumMicroOps = 2;
+  let ResourceCycles = [1,1];
+}
+def: InstRW<[SBWriteResGroup59], (instregex "MMX_PADDQirm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PABSBrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PABSDrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PABSWrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PACKSSDWrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PACKSSWBrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PACKUSDWrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PACKUSWBrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PADDBrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PADDDrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PADDQrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PADDSBrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PADDSWrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PADDUSBrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PADDUSWrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PADDWrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PALIGNRrmi")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PAVGBrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PAVGWrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PBLENDWrmi")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PCMPEQBrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PCMPEQDrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PCMPEQQrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PCMPEQWrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PCMPGTBrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PCMPGTDrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PCMPGTWrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PINSRBrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PINSRDrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PINSRQrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PINSRWrmi")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PMAXSBrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PMAXSDrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PMAXSWrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PMAXUBrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PMAXUDrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PMAXUWrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PMINSBrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PMINSDrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PMINSWrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PMINUBrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PMINUDrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PMINUWrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PMOVSXBDrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PMOVSXBQrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PMOVSXBWrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PMOVSXDQrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PMOVSXWDrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PMOVSXWQrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PMOVZXBDrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PMOVZXBQrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PMOVZXBWrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PMOVZXDQrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PMOVZXWDrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PMOVZXWQrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PSHUFBrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PSHUFDmi")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PSHUFHWmi")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PSHUFLWmi")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PSIGNBrm128")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PSIGNDrm128")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PSIGNWrm128")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PSUBBrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PSUBDrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PSUBQrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PSUBSBrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PSUBSWrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PSUBUSBrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PSUBUSWrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PSUBWrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PUNPCKHBWrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PUNPCKHDQrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PUNPCKHQDQrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PUNPCKHWDrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PUNPCKLBWrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PUNPCKLDQrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PUNPCKLQDQrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "PUNPCKLWDrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPABSBrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPABSDrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPABSWrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPACKSSDWrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPACKSSWBrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPACKUSDWrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPACKUSWBrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPADDBrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPADDDrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPADDQrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPADDSBrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPADDSWrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPADDUSBrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPADDUSWrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPADDWrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPALIGNRrmi")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPAVGBrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPAVGWrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPBLENDWrmi")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPCMPEQBrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPCMPEQDrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPCMPEQQrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPCMPEQWrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPCMPGTBrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPCMPGTDrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPCMPGTWrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPINSRBrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPINSRDrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPINSRQrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPINSRWrmi")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPMAXSBrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPMAXSDrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPMAXSWrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPMAXUBrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPMAXUDrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPMAXUWrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPMINSBrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPMINSDrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPMINSWrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPMINUBrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPMINUDrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPMINUWrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPMOVSXBDrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPMOVSXBQrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPMOVSXBWrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPMOVSXDQrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPMOVSXWDrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPMOVSXWQrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPMOVZXBDrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPMOVZXBQrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPMOVZXBWrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPMOVZXDQrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPMOVZXWDrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPMOVZXWQrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPSHUFBrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPSHUFDmi")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPSHUFHWmi")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPSHUFLWmi")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPSIGNBrm128")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPSIGNDrm128")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPSIGNWrm128")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPSUBBrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPSUBDrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPSUBQrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPSUBSBrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPSUBSWrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPSUBUSBrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPSUBUSWrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPSUBWrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPUNPCKHBWrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPUNPCKHDQrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPUNPCKHQDQrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPUNPCKHWDrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPUNPCKLBWrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPUNPCKLDQrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPUNPCKLQDQrm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "VPUNPCKLWDrm")>;
+
+def SBWriteResGroup60 : SchedWriteRes<[SBPort23,SBPort015]> {
+  let Latency = 7;
+  let NumMicroOps = 2;
+  let ResourceCycles = [1,1];
+}
+def: InstRW<[SBWriteResGroup60], (instregex "PANDNrm")>;
+def: InstRW<[SBWriteResGroup60], (instregex "PANDrm")>;
+def: InstRW<[SBWriteResGroup60], (instregex "PORrm")>;
+def: InstRW<[SBWriteResGroup60], (instregex "PXORrm")>;
+def: InstRW<[SBWriteResGroup60], (instregex "VPANDNrm")>;
+def: InstRW<[SBWriteResGroup60], (instregex "VPANDrm")>;
+def: InstRW<[SBWriteResGroup60], (instregex "VPORrm")>;
+def: InstRW<[SBWriteResGroup60], (instregex "VPXORrm")>;
+
+def SBWriteResGroup61 : SchedWriteRes<[SBPort0,SBPort0]> {
+  let Latency = 7;
+  let NumMicroOps = 3;
+  let ResourceCycles = [2,1];
+}
+def: InstRW<[SBWriteResGroup61], (instregex "VRCPPSr")>;
+def: InstRW<[SBWriteResGroup61], (instregex "VRSQRTPSYr")>;
+
+def SBWriteResGroup62 : SchedWriteRes<[SBPort5,SBPort23]> {
+  let Latency = 7;
+  let NumMicroOps = 3;
+  let ResourceCycles = [2,1];
+}
+def: InstRW<[SBWriteResGroup62], (instregex "VERRm")>;
+def: InstRW<[SBWriteResGroup62], (instregex "VERWm")>;
+
+def SBWriteResGroup63 : SchedWriteRes<[SBPort23,SBPort015]> {
+  let Latency = 7;
+  let NumMicroOps = 3;
+  let ResourceCycles = [1,2];
+}
+def: InstRW<[SBWriteResGroup63], (instregex "LODSB")>;
+def: InstRW<[SBWriteResGroup63], (instregex "LODSW")>;
+
+def SBWriteResGroup64 : SchedWriteRes<[SBPort5,SBPort01,SBPort23]> {
+  let Latency = 7;
+  let NumMicroOps = 3;
+  let ResourceCycles = [1,1,1];
+}
+def: InstRW<[SBWriteResGroup64], (instregex "FARJMP64")>;
+
+def SBWriteResGroup65 : SchedWriteRes<[SBPort23,SBPort0,SBPort015]> {
+  let Latency = 7;
+  let NumMicroOps = 3;
+  let ResourceCycles = [1,1,1];
+}
+def: InstRW<[SBWriteResGroup65], (instregex "ADC64rm")>;
+def: InstRW<[SBWriteResGroup65], (instregex "ADC8rm")>;
+def: InstRW<[SBWriteResGroup65], (instregex "CMOVAE64rm")>;
+def: InstRW<[SBWriteResGroup65], (instregex "CMOVB64rm")>;
+def: InstRW<[SBWriteResGroup65], (instregex "CMOVE64rm")>;
+def: InstRW<[SBWriteResGroup65], (instregex "CMOVG64rm")>;
+def: InstRW<[SBWriteResGroup65], (instregex "CMOVGE64rm")>;
+def: InstRW<[SBWriteResGroup65], (instregex "CMOVL64rm")>;
+def: InstRW<[SBWriteResGroup65], (instregex "CMOVLE64rm")>;
+def: InstRW<[SBWriteResGroup65], (instregex "CMOVNE64rm")>;
+def: InstRW<[SBWriteResGroup65], (instregex "CMOVNO64rm")>;
+def: InstRW<[SBWriteResGroup65], (instregex "CMOVNP64rm")>;
+def: InstRW<[SBWriteResGroup65], (instregex "CMOVNS64rm")>;
+def: InstRW<[SBWriteResGroup65], (instregex "CMOVO64rm")>;
+def: InstRW<[SBWriteResGroup65], (instregex "CMOVP64rm")>;
+def: InstRW<[SBWriteResGroup65], (instregex "CMOVS64rm")>;
+def: InstRW<[SBWriteResGroup65], (instregex "SBB64rm")>;
+def: InstRW<[SBWriteResGroup65], (instregex "SBB8rm")>;
+
+def SBWriteResGroup66 : SchedWriteRes<[SBPort0,SBPort4,SBPort23]> {
+  let Latency = 7;
+  let NumMicroOps = 4;
+  let ResourceCycles = [1,1,2];
+}
+def: InstRW<[SBWriteResGroup66], (instregex "FNSTSWm")>;
+
+def SBWriteResGroup67 : SchedWriteRes<[SBPort1,SBPort5,SBPort015]> {
+  let Latency = 7;
+  let NumMicroOps = 4;
+  let ResourceCycles = [1,2,1];
+}
+def: InstRW<[SBWriteResGroup67], (instregex "SLDT32r")>;
+def: InstRW<[SBWriteResGroup67], (instregex "STR32r")>;
+
+def SBWriteResGroup68 : SchedWriteRes<[SBPort4,SBPort5,SBPort23]> {
+  let Latency = 7;
+  let NumMicroOps = 4;
+  let ResourceCycles = [1,1,2];
+}
+def: InstRW<[SBWriteResGroup68], (instregex "CALL64m")>;
+def: InstRW<[SBWriteResGroup68], (instregex "FNSTCW16m")>;
+
+def SBWriteResGroup69 : SchedWriteRes<[SBPort4,SBPort23,SBPort0]> {
+  let Latency = 7;
+  let NumMicroOps = 4;
+  let ResourceCycles = [1,2,1];
+}
+def: InstRW<[SBWriteResGroup69], (instregex "BTC64mi8")>;
+def: InstRW<[SBWriteResGroup69], (instregex "BTR64mi8")>;
+def: InstRW<[SBWriteResGroup69], (instregex "BTS64mi8")>;
+def: InstRW<[SBWriteResGroup69], (instregex "SAR64mi")>;
+def: InstRW<[SBWriteResGroup69], (instregex "SAR8mi")>;
+def: InstRW<[SBWriteResGroup69], (instregex "SHL64m1")>;
+def: InstRW<[SBWriteResGroup69], (instregex "SHL64mi")>;
+def: InstRW<[SBWriteResGroup69], (instregex "SHL8m1")>;
+def: InstRW<[SBWriteResGroup69], (instregex "SHL8mi")>;
+def: InstRW<[SBWriteResGroup69], (instregex "SHR64mi")>;
+def: InstRW<[SBWriteResGroup69], (instregex "SHR8mi")>;
+
+def SBWriteResGroup70 : SchedWriteRes<[SBPort4,SBPort23,SBPort015]> {
+  let Latency = 7;
+  let NumMicroOps = 4;
+  let ResourceCycles = [1,2,1];
+}
+def: InstRW<[SBWriteResGroup70], (instregex "ADD64mi8")>;
+def: InstRW<[SBWriteResGroup70], (instregex "ADD64mr")>;
+def: InstRW<[SBWriteResGroup70], (instregex "ADD8mi")>;
+def: InstRW<[SBWriteResGroup70], (instregex "ADD8mr")>;
+def: InstRW<[SBWriteResGroup70], (instregex "AND64mi8")>;
+def: InstRW<[SBWriteResGroup70], (instregex "AND64mr")>;
+def: InstRW<[SBWriteResGroup70], (instregex "AND8mi")>;
+def: InstRW<[SBWriteResGroup70], (instregex "AND8mr")>;
+def: InstRW<[SBWriteResGroup70], (instregex "DEC64m")>;
+def: InstRW<[SBWriteResGroup70], (instregex "DEC8m")>;
+def: InstRW<[SBWriteResGroup70], (instregex "INC64m")>;
+def: InstRW<[SBWriteResGroup70], (instregex "INC8m")>;
+def: InstRW<[SBWriteResGroup70], (instregex "NEG64m")>;
+def: InstRW<[SBWriteResGroup70], (instregex "NEG8m")>;
+def: InstRW<[SBWriteResGroup70], (instregex "NOT64m")>;
+def: InstRW<[SBWriteResGroup70], (instregex "NOT8m")>;
+def: InstRW<[SBWriteResGroup70], (instregex "OR64mi8")>;
+def: InstRW<[SBWriteResGroup70], (instregex "OR64mr")>;
+def: InstRW<[SBWriteResGroup70], (instregex "OR8mi")>;
+def: InstRW<[SBWriteResGroup70], (instregex "OR8mr")>;
+def: InstRW<[SBWriteResGroup70], (instregex "SUB64mi8")>;
+def: InstRW<[SBWriteResGroup70], (instregex "SUB64mr")>;
+def: InstRW<[SBWriteResGroup70], (instregex "SUB8mi")>;
+def: InstRW<[SBWriteResGroup70], (instregex "SUB8mr")>;
+def: InstRW<[SBWriteResGroup70], (instregex "TEST64rm")>;
+def: InstRW<[SBWriteResGroup70], (instregex "TEST8mi")>;
+def: InstRW<[SBWriteResGroup70], (instregex "TEST8rm")>;
+def: InstRW<[SBWriteResGroup70], (instregex "XOR64mi8")>;
+def: InstRW<[SBWriteResGroup70], (instregex "XOR64mr")>;
+def: InstRW<[SBWriteResGroup70], (instregex "XOR8mi")>;
+def: InstRW<[SBWriteResGroup70], (instregex "XOR8mr")>;
+
+def SBWriteResGroup71 : SchedWriteRes<[SBPort0,SBPort23]> {
+  let Latency = 8;
+  let NumMicroOps = 2;
+  let ResourceCycles = [1,1];
+}
+def: InstRW<[SBWriteResGroup71], (instregex "MMX_PMADDUBSWrm64")>;
+def: InstRW<[SBWriteResGroup71], (instregex "MMX_PMULHRSWrm64")>;
+def: InstRW<[SBWriteResGroup71], (instregex "VTESTPDYrm")>;
+def: InstRW<[SBWriteResGroup71], (instregex "VTESTPSYrm")>;
+
+def SBWriteResGroup72 : SchedWriteRes<[SBPort1,SBPort23]> {
+  let Latency = 8;
+  let NumMicroOps = 2;
+  let ResourceCycles = [1,1];
+}
+def: InstRW<[SBWriteResGroup72], (instregex "BSF64rm")>;
+def: InstRW<[SBWriteResGroup72], (instregex "BSR64rm")>;
+def: InstRW<[SBWriteResGroup72], (instregex "CRC32r32m16")>;
+def: InstRW<[SBWriteResGroup72], (instregex "CRC32r32m8")>;
+def: InstRW<[SBWriteResGroup72], (instregex "FCOM32m")>;
+def: InstRW<[SBWriteResGroup72], (instregex "FCOM64m")>;
+def: InstRW<[SBWriteResGroup72], (instregex "FCOMP32m")>;
+def: InstRW<[SBWriteResGroup72], (instregex "FCOMP64m")>;
+def: InstRW<[SBWriteResGroup72], (instregex "MUL8m")>;
+
+def SBWriteResGroup73 : SchedWriteRes<[SBPort5,SBPort23]> {
+  let Latency = 8;
+  let NumMicroOps = 2;
+  let ResourceCycles = [1,1];
+}
+def: InstRW<[SBWriteResGroup73], (instregex "VANDNPDYrm")>;
+def: InstRW<[SBWriteResGroup73], (instregex "VANDNPSYrm")>;
+def: InstRW<[SBWriteResGroup73], (instregex "VANDPDrm")>;
+def: InstRW<[SBWriteResGroup73], (instregex "VANDPSrm")>;
+def: InstRW<[SBWriteResGroup73], (instregex "VORPDYrm")>;
+def: InstRW<[SBWriteResGroup73], (instregex "VORPSYrm")>;
+def: InstRW<[SBWriteResGroup73], (instregex "VPERM2F128rm")>;
+def: InstRW<[SBWriteResGroup73], (instregex "VPERMILPDYri")>;
+def: InstRW<[SBWriteResGroup73], (instregex "VPERMILPDmi")>;
+def: InstRW<[SBWriteResGroup73], (instregex "VPERMILPSYri")>;
+def: InstRW<[SBWriteResGroup73], (instregex "VPERMILPSmi")>;
+def: InstRW<[SBWriteResGroup73], (instregex "VSHUFPDYrmi")>;
+def: InstRW<[SBWriteResGroup73], (instregex "VSHUFPSYrmi")>;
+def: InstRW<[SBWriteResGroup73], (instregex "VUNPCKHPDrm")>;
+def: InstRW<[SBWriteResGroup73], (instregex "VUNPCKHPSrm")>;
+def: InstRW<[SBWriteResGroup73], (instregex "VUNPCKLPDYrm")>;
+def: InstRW<[SBWriteResGroup73], (instregex "VUNPCKLPSYrm")>;
+def: InstRW<[SBWriteResGroup73], (instregex "VXORPDrm")>;
+def: InstRW<[SBWriteResGroup73], (instregex "VXORPSrm")>;
+
+def SBWriteResGroup74 : SchedWriteRes<[SBPort23,SBPort0]> {
+  let Latency = 8;
+  let NumMicroOps = 2;
+  let ResourceCycles = [1,1];
+}
+def: InstRW<[SBWriteResGroup74], (instregex "VBLENDPDYrmi")>;
+def: InstRW<[SBWriteResGroup74], (instregex "VBLENDPSYrmi")>;
+
+def SBWriteResGroup75 : SchedWriteRes<[SBPort23,SBPort0]> {
+  let Latency = 8;
+  let NumMicroOps = 3;
+  let ResourceCycles = [1,2];
+}
+def: InstRW<[SBWriteResGroup75], (instregex "BLENDVPDrm0")>;
+def: InstRW<[SBWriteResGroup75], (instregex "BLENDVPSrm0")>;
+def: InstRW<[SBWriteResGroup75], (instregex "VBLENDVPDrm")>;
+def: InstRW<[SBWriteResGroup75], (instregex "VBLENDVPSrm")>;
+def: InstRW<[SBWriteResGroup75], (instregex "VMASKMOVPDrm")>;
+def: InstRW<[SBWriteResGroup75], (instregex "VMASKMOVPSrm")>;
+
+def SBWriteResGroup76 : SchedWriteRes<[SBPort23,SBPort15]> {
+  let Latency = 8;
+  let NumMicroOps = 3;
+  let ResourceCycles = [1,2];
+}
+def: InstRW<[SBWriteResGroup76], (instregex "PBLENDVBrr0")>;
+def: InstRW<[SBWriteResGroup76], (instregex "VPBLENDVBrm")>;
+
+def SBWriteResGroup77 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> {
+  let Latency = 8;
+  let NumMicroOps = 3;
+  let ResourceCycles = [1,1,1];
+}
+def: InstRW<[SBWriteResGroup77], (instregex "COMISDrm")>;
+def: InstRW<[SBWriteResGroup77], (instregex "COMISSrm")>;
+def: InstRW<[SBWriteResGroup77], (instregex "UCOMISDrm")>;
+def: InstRW<[SBWriteResGroup77], (instregex "UCOMISSrm")>;
+def: InstRW<[SBWriteResGroup77], (instregex "VCOMISDrm")>;
+def: InstRW<[SBWriteResGroup77], (instregex "VCOMISSrm")>;
+def: InstRW<[SBWriteResGroup77], (instregex "VUCOMISDrm")>;
+def: InstRW<[SBWriteResGroup77], (instregex "VUCOMISSrm")>;
+
+def SBWriteResGroup78 : SchedWriteRes<[SBPort0,SBPort5,SBPort23]> {
+  let Latency = 8;
+  let NumMicroOps = 3;
+  let ResourceCycles = [1,1,1];
+}
+def: InstRW<[SBWriteResGroup78], (instregex "PTESTrm")>;
+def: InstRW<[SBWriteResGroup78], (instregex "VPTESTrm")>;
+
+def SBWriteResGroup79 : SchedWriteRes<[SBPort0,SBPort23,SBPort15]> {
+  let Latency = 8;
+  let NumMicroOps = 3;
+  let ResourceCycles = [1,1,1];
+}
+def: InstRW<[SBWriteResGroup79], (instregex "PSLLDrm")>;
+def: InstRW<[SBWriteResGroup79], (instregex "PSLLQrm")>;
+def: InstRW<[SBWriteResGroup79], (instregex "PSLLWrm")>;
+def: InstRW<[SBWriteResGroup79], (instregex "PSRADrm")>;
+def: InstRW<[SBWriteResGroup79], (instregex "PSRAWrm")>;
+def: InstRW<[SBWriteResGroup79], (instregex "PSRLDrm")>;
+def: InstRW<[SBWriteResGroup79], (instregex "PSRLQrm")>;
+def: InstRW<[SBWriteResGroup79], (instregex "PSRLWrm")>;
+def: InstRW<[SBWriteResGroup79], (instregex "VPSLLDri")>;
+def: InstRW<[SBWriteResGroup79], (instregex "VPSLLQri")>;
+def: InstRW<[SBWriteResGroup79], (instregex "VPSLLWri")>;
+def: InstRW<[SBWriteResGroup79], (instregex "VPSRADrm")>;
+def: InstRW<[SBWriteResGroup79], (instregex "VPSRAWrm")>;
+def: InstRW<[SBWriteResGroup79], (instregex "VPSRLDrm")>;
+def: InstRW<[SBWriteResGroup79], (instregex "VPSRLQrm")>;
+def: InstRW<[SBWriteResGroup79], (instregex "VPSRLWrm")>;
+
+def SBWriteResGroup80 : SchedWriteRes<[SBPort23,SBPort15]> {
+  let Latency = 8;
+  let NumMicroOps = 4;
+  let ResourceCycles = [1,3];
+}
+def: InstRW<[SBWriteResGroup80], (instregex "MMX_PHADDSWrm64")>;
+def: InstRW<[SBWriteResGroup80], (instregex "MMX_PHADDWrm64")>;
+def: InstRW<[SBWriteResGroup80], (instregex "MMX_PHADDrm64")>;
+def: InstRW<[SBWriteResGroup80], (instregex "MMX_PHSUBDrm64")>;
+def: InstRW<[SBWriteResGroup80], (instregex "MMX_PHSUBSWrm64")>;
+def: InstRW<[SBWriteResGroup80], (instregex "MMX_PHSUBWrm64")>;
+
+def SBWriteResGroup81 : SchedWriteRes<[SBPort23,SBPort015]> {
+  let Latency = 8;
+  let NumMicroOps = 4;
+  let ResourceCycles = [1,3];
+}
+def: InstRW<[SBWriteResGroup81], (instregex "CMPXCHG64rm")>;
+def: InstRW<[SBWriteResGroup81], (instregex "CMPXCHG8rm")>;
+
+def SBWriteResGroup82 : SchedWriteRes<[SBPort23,SBPort0,SBPort015]> {
+  let Latency = 8;
+  let NumMicroOps = 4;
+  let ResourceCycles = [1,2,1];
+}
+def: InstRW<[SBWriteResGroup82], (instregex "CMOVA64rm")>;
+def: InstRW<[SBWriteResGroup82], (instregex "CMOVBE64rm")>;
+
+def SBWriteResGroup83 : SchedWriteRes<[SBPort23,SBPort015]> {
+  let Latency = 8;
+  let NumMicroOps = 5;
+  let ResourceCycles = [2,3];
+}
+def: InstRW<[SBWriteResGroup83], (instregex "CMPSB")>;
+def: InstRW<[SBWriteResGroup83], (instregex "CMPSL")>;
+def: InstRW<[SBWriteResGroup83], (instregex "CMPSQ")>;
+def: InstRW<[SBWriteResGroup83], (instregex "CMPSW")>;
+
+def SBWriteResGroup84 : SchedWriteRes<[SBPort4,SBPort5,SBPort23]> {
+  let Latency = 8;
+  let NumMicroOps = 5;
+  let ResourceCycles = [1,2,2];
+}
+def: InstRW<[SBWriteResGroup84], (instregex "FLDCW16m")>;
+
+def SBWriteResGroup85 : SchedWriteRes<[SBPort4,SBPort23,SBPort0]> {
+  let Latency = 8;
+  let NumMicroOps = 5;
+  let ResourceCycles = [1,2,2];
+}
+def: InstRW<[SBWriteResGroup85], (instregex "ROL64mi")>;
+def: InstRW<[SBWriteResGroup85], (instregex "ROL8mi")>;
+def: InstRW<[SBWriteResGroup85], (instregex "ROR64mi")>;
+def: InstRW<[SBWriteResGroup85], (instregex "ROR8mi")>;
+
+def SBWriteResGroup86 : SchedWriteRes<[SBPort4,SBPort23,SBPort015]> {
+  let Latency = 8;
+  let NumMicroOps = 5;
+  let ResourceCycles = [1,2,2];
+}
+def: InstRW<[SBWriteResGroup86], (instregex "MOVSB")>;
+def: InstRW<[SBWriteResGroup86], (instregex "MOVSL")>;
+def: InstRW<[SBWriteResGroup86], (instregex "MOVSQ")>;
+def: InstRW<[SBWriteResGroup86], (instregex "MOVSW")>;
+def: InstRW<[SBWriteResGroup86], (instregex "XADD64rm")>;
+def: InstRW<[SBWriteResGroup86], (instregex "XADD8rm")>;
+
+def SBWriteResGroup87 : SchedWriteRes<[SBPort4,SBPort5,SBPort01,SBPort23]> {
+  let Latency = 8;
+  let NumMicroOps = 5;
+  let ResourceCycles = [1,1,1,2];
+}
+def: InstRW<[SBWriteResGroup87], (instregex "FARCALL64")>;
+
+def SBWriteResGroup88 : SchedWriteRes<[SBPort4,SBPort23,SBPort0,SBPort015]> {
+  let Latency = 8;
+  let NumMicroOps = 5;
+  let ResourceCycles = [1,2,1,1];
+}
+def: InstRW<[SBWriteResGroup88], (instregex "SHLD64mri8")>;
+def: InstRW<[SBWriteResGroup88], (instregex "SHRD64mri8")>;
+
+def SBWriteResGroup89 : SchedWriteRes<[SBPort0,SBPort23]> {
+  let Latency = 9;
+  let NumMicroOps = 2;
+  let ResourceCycles = [1,1];
+}
+def: InstRW<[SBWriteResGroup89], (instregex "MMX_PMULUDQirm")>;
+def: InstRW<[SBWriteResGroup89], (instregex "PMADDUBSWrm")>;
+def: InstRW<[SBWriteResGroup89], (instregex "PMADDWDrm")>;
+def: InstRW<[SBWriteResGroup89], (instregex "PMULDQrm")>;
+def: InstRW<[SBWriteResGroup89], (instregex "PMULHRSWrm")>;
+def: InstRW<[SBWriteResGroup89], (instregex "PMULHUWrm")>;
+def: InstRW<[SBWriteResGroup89], (instregex "PMULHWrm")>;
+def: InstRW<[SBWriteResGroup89], (instregex "PMULLDrm")>;
+def: InstRW<[SBWriteResGroup89], (instregex "PMULLWrm")>;
+def: InstRW<[SBWriteResGroup89], (instregex "PMULUDQrm")>;
+def: InstRW<[SBWriteResGroup89], (instregex "PSADBWrm")>;
+def: InstRW<[SBWriteResGroup89], (instregex "VPMADDUBSWrm")>;
+def: InstRW<[SBWriteResGroup89], (instregex "VPMADDWDrm")>;
+def: InstRW<[SBWriteResGroup89], (instregex "VPMULDQrm")>;
+def: InstRW<[SBWriteResGroup89], (instregex "VPMULHRSWrm")>;
+def: InstRW<[SBWriteResGroup89], (instregex "VPMULHUWrm")>;
+def: InstRW<[SBWriteResGroup89], (instregex "VPMULHWrm")>;
+def: InstRW<[SBWriteResGroup89], (instregex "VPMULLDrm")>;
+def: InstRW<[SBWriteResGroup89], (instregex "VPMULLWrm")>;
+def: InstRW<[SBWriteResGroup89], (instregex "VPMULUDQrm")>;
+def: InstRW<[SBWriteResGroup89], (instregex "VPSADBWrm")>;
+
+def SBWriteResGroup90 : SchedWriteRes<[SBPort1,SBPort23]> {
+  let Latency = 9;
+  let NumMicroOps = 2;
+  let ResourceCycles = [1,1];
+}
+def: InstRW<[SBWriteResGroup90], (instregex "ADDPDrm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "ADDPSrm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "ADDSDrm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "ADDSSrm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "ADDSUBPDrm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "ADDSUBPSrm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "CMPPDrmi")>;
+def: InstRW<[SBWriteResGroup90], (instregex "CMPPSrmi")>;
+def: InstRW<[SBWriteResGroup90], (instregex "CMPSSrm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "CVTDQ2PSrm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "CVTPS2DQrm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "CVTSI2SD64rm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "CVTSI2SDrm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "CVTTPS2DQrm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "MAXPDrm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "MAXPSrm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "MAXSDrm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "MAXSSrm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "MINPDrm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "MINPSrm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "MINSDrm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "MINSSrm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "MMX_CVTPI2PSirm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "MMX_CVTPS2PIirm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "MMX_CVTTPS2PIirm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "POPCNT64rm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "ROUNDPDm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "ROUNDPSm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "ROUNDSDm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "ROUNDSSm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "SUBPDrm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "SUBPSrm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "SUBSDrm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "SUBSSrm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "VADDPDrm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "VADDPSrm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "VADDSDrm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "VADDSSrm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "VADDSUBPDrm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "VADDSUBPSrm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "VCMPPDrmi")>;
+def: InstRW<[SBWriteResGroup90], (instregex "VCMPPSrmi")>;
+def: InstRW<[SBWriteResGroup90], (instregex "VCMPSDrm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "VCMPSSrm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "VCVTDQ2PSrm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "VCVTPS2DQrm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "VCVTSI2SD64rm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "VCVTSI2SDrm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "VCVTTPS2DQrm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "VMAXPDrm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "VMAXPSrm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "VMAXSDrm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "VMAXSSrm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "VMINPDrm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "VMINPSrm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "VMINSDrm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "VMINSSrm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "VROUNDPDm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "VROUNDPSm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "VROUNDSDm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "VROUNDSSm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "VSUBPDrm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "VSUBPSrm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "VSUBSDrm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "VSUBSSrm")>;
+
+def SBWriteResGroup91 : SchedWriteRes<[SBPort23,SBPort0]> {
+  let Latency = 9;
+  let NumMicroOps = 3;
+  let ResourceCycles = [1,2];
+}
+def: InstRW<[SBWriteResGroup91], (instregex "VBLENDVPDYrm")>;
+def: InstRW<[SBWriteResGroup91], (instregex "VBLENDVPSYrm")>;
+def: InstRW<[SBWriteResGroup91], (instregex "VMASKMOVPDrm")>;
+def: InstRW<[SBWriteResGroup91], (instregex "VMASKMOVPSrm")>;
+
+def SBWriteResGroup92 : SchedWriteRes<[SBPort0,SBPort1,SBPort5]> {
+  let Latency = 9;
+  let NumMicroOps = 3;
+  let ResourceCycles = [1,1,1];
+}
+def: InstRW<[SBWriteResGroup92], (instregex "DPPDrri")>;
+def: InstRW<[SBWriteResGroup92], (instregex "VDPPDrri")>;
+
+def SBWriteResGroup93 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> {
+  let Latency = 9;
+  let NumMicroOps = 3;
+  let ResourceCycles = [1,1,1];
+}
+def: InstRW<[SBWriteResGroup93], (instregex "CVTSD2SI64rm")>;
+def: InstRW<[SBWriteResGroup93], (instregex "CVTSD2SIrm")>;
+def: InstRW<[SBWriteResGroup93], (instregex "CVTSS2SI64rm")>;
+def: InstRW<[SBWriteResGroup93], (instregex "CVTSS2SIrm")>;
+def: InstRW<[SBWriteResGroup93], (instregex "CVTTSD2SI64rm")>;
+def: InstRW<[SBWriteResGroup93], (instregex "CVTTSD2SIrm")>;
+def: InstRW<[SBWriteResGroup93], (instregex "CVTTSS2SI64rm")>;
+def: InstRW<[SBWriteResGroup93], (instregex "CVTTSS2SIrm")>;
+def: InstRW<[SBWriteResGroup93], (instregex "MUL64m")>;
+
+def SBWriteResGroup94 : SchedWriteRes<[SBPort0,SBPort5,SBPort23]> {
+  let Latency = 9;
+  let NumMicroOps = 3;
+  let ResourceCycles = [1,1,1];
+}
+def: InstRW<[SBWriteResGroup94], (instregex "VPTESTYrm")>;
+
+def SBWriteResGroup95 : SchedWriteRes<[SBPort5,SBPort01,SBPort23]> {
+  let Latency = 9;
+  let NumMicroOps = 3;
+  let ResourceCycles = [1,1,1];
+}
+def: InstRW<[SBWriteResGroup95], (instregex "LD_F32m")>;
+def: InstRW<[SBWriteResGroup95], (instregex "LD_F64m")>;
+def: InstRW<[SBWriteResGroup95], (instregex "LD_F80m")>;
+
+def SBWriteResGroup96 : SchedWriteRes<[SBPort23,SBPort15]> {
+  let Latency = 9;
+  let NumMicroOps = 4;
+  let ResourceCycles = [1,3];
+}
+def: InstRW<[SBWriteResGroup96], (instregex "PHADDDrm")>;
+def: InstRW<[SBWriteResGroup96], (instregex "PHADDSWrm128")>;
+def: InstRW<[SBWriteResGroup96], (instregex "PHADDWrm")>;
+def: InstRW<[SBWriteResGroup96], (instregex "PHSUBDrm")>;
+def: InstRW<[SBWriteResGroup96], (instregex "PHSUBSWrm128")>;
+def: InstRW<[SBWriteResGroup96], (instregex "PHSUBWrm")>;
+def: InstRW<[SBWriteResGroup96], (instregex "VPHADDDrm")>;
+def: InstRW<[SBWriteResGroup96], (instregex "VPHADDSWrm128")>;
+def: InstRW<[SBWriteResGroup96], (instregex "VPHADDWrm")>;
+def: InstRW<[SBWriteResGroup96], (instregex "VPHSUBDrm")>;
+def: InstRW<[SBWriteResGroup96], (instregex "VPHSUBSWrm128")>;
+def: InstRW<[SBWriteResGroup96], (instregex "VPHSUBWrm")>;
+
+def SBWriteResGroup97 : SchedWriteRes<[SBPort1,SBPort4,SBPort23]> {
+  let Latency = 9;
+  let NumMicroOps = 4;
+  let ResourceCycles = [1,1,2];
+}
+def: InstRW<[SBWriteResGroup97], (instregex "IST_F16m")>;
+def: InstRW<[SBWriteResGroup97], (instregex "IST_F32m")>;
+def: InstRW<[SBWriteResGroup97], (instregex "IST_FP16m")>;
+def: InstRW<[SBWriteResGroup97], (instregex "IST_FP32m")>;
+def: InstRW<[SBWriteResGroup97], (instregex "IST_FP64m")>;
+def: InstRW<[SBWriteResGroup97], (instregex "SHL64mCL")>;
+def: InstRW<[SBWriteResGroup97], (instregex "SHL8mCL")>;
+
+def SBWriteResGroup98 : SchedWriteRes<[SBPort4,SBPort23,SBPort015]> {
+  let Latency = 9;
+  let NumMicroOps = 6;
+  let ResourceCycles = [1,2,3];
+}
+def: InstRW<[SBWriteResGroup98], (instregex "ADC64mi8")>;
+def: InstRW<[SBWriteResGroup98], (instregex "ADC8mi")>;
+def: InstRW<[SBWriteResGroup98], (instregex "SBB64mi8")>;
+def: InstRW<[SBWriteResGroup98], (instregex "SBB8mi")>;
+
+def SBWriteResGroup99 : SchedWriteRes<[SBPort4,SBPort23,SBPort0,SBPort015]> {
+  let Latency = 9;
+  let NumMicroOps = 6;
+  let ResourceCycles = [1,2,2,1];
+}
+def: InstRW<[SBWriteResGroup99], (instregex "ADC64mr")>;
+def: InstRW<[SBWriteResGroup99], (instregex "ADC8mr")>;
+def: InstRW<[SBWriteResGroup99], (instregex "SBB64mr")>;
+def: InstRW<[SBWriteResGroup99], (instregex "SBB8mr")>;
+
+def SBWriteResGroup100 : SchedWriteRes<[SBPort4,SBPort5,SBPort23,SBPort0,SBPort015]> {
+  let Latency = 9;
+  let NumMicroOps = 6;
+  let ResourceCycles = [1,1,2,1,1];
+}
+def: InstRW<[SBWriteResGroup100], (instregex "BT64mr")>;
+def: InstRW<[SBWriteResGroup100], (instregex "BTC64mr")>;
+def: InstRW<[SBWriteResGroup100], (instregex "BTR64mr")>;
+def: InstRW<[SBWriteResGroup100], (instregex "BTS64mr")>;
+
+def SBWriteResGroup101 : SchedWriteRes<[SBPort1,SBPort23]> {
+  let Latency = 10;
+  let NumMicroOps = 2;
+  let ResourceCycles = [1,1];
+}
+def: InstRW<[SBWriteResGroup101], (instregex "ADD_F32m")>;
+def: InstRW<[SBWriteResGroup101], (instregex "ADD_F64m")>;
+def: InstRW<[SBWriteResGroup101], (instregex "ILD_F16m")>;
+def: InstRW<[SBWriteResGroup101], (instregex "ILD_F32m")>;
+def: InstRW<[SBWriteResGroup101], (instregex "ILD_F64m")>;
+def: InstRW<[SBWriteResGroup101], (instregex "SUBR_F32m")>;
+def: InstRW<[SBWriteResGroup101], (instregex "SUBR_F64m")>;
+def: InstRW<[SBWriteResGroup101], (instregex "SUB_F32m")>;
+def: InstRW<[SBWriteResGroup101], (instregex "SUB_F64m")>;
+def: InstRW<[SBWriteResGroup101], (instregex "VADDPDYrm")>;
+def: InstRW<[SBWriteResGroup101], (instregex "VADDPSYrm")>;
+def: InstRW<[SBWriteResGroup101], (instregex "VADDSUBPDYrm")>;
+def: InstRW<[SBWriteResGroup101], (instregex "VADDSUBPSYrm")>;
+def: InstRW<[SBWriteResGroup101], (instregex "VCMPPDYrmi")>;
+def: InstRW<[SBWriteResGroup101], (instregex "VCMPPSYrmi")>;
+def: InstRW<[SBWriteResGroup101], (instregex "VCVTDQ2PSYrm")>;
+def: InstRW<[SBWriteResGroup101], (instregex "VCVTPS2DQYrm")>;
+def: InstRW<[SBWriteResGroup101], (instregex "VCVTTPS2DQrm")>;
+def: InstRW<[SBWriteResGroup101], (instregex "VMAXPDYrm")>;
+def: InstRW<[SBWriteResGroup101], (instregex "VMAXPSYrm")>;
+def: InstRW<[SBWriteResGroup101], (instregex "VMINPDrm")>;
+def: InstRW<[SBWriteResGroup101], (instregex "VMINPSrm")>;
+def: InstRW<[SBWriteResGroup101], (instregex "VROUNDPDm")>;
+def: InstRW<[SBWriteResGroup101], (instregex "VROUNDPSm")>;
+def: InstRW<[SBWriteResGroup101], (instregex "VSUBPDYrm")>;
+def: InstRW<[SBWriteResGroup101], (instregex "VSUBPSYrm")>;
+
+def SBWriteResGroup102 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> {
+  let Latency = 10;
+  let NumMicroOps = 3;
+  let ResourceCycles = [1,1,1];
+}
+def: InstRW<[SBWriteResGroup102], (instregex "VCVTSD2SI64rm")>;
+def: InstRW<[SBWriteResGroup102], (instregex "VCVTSD2SI64rr")>;
+def: InstRW<[SBWriteResGroup102], (instregex "VCVTSS2SI64rm")>;
+def: InstRW<[SBWriteResGroup102], (instregex "VCVTSS2SIrm")>;
+def: InstRW<[SBWriteResGroup102], (instregex "VCVTTSD2SI64rm")>;
+def: InstRW<[SBWriteResGroup102], (instregex "VCVTTSD2SI64rr")>;
+def: InstRW<[SBWriteResGroup102], (instregex "VCVTTSS2SI64rm")>;
+def: InstRW<[SBWriteResGroup102], (instregex "VCVTTSS2SIrm")>;
+
+def SBWriteResGroup103 : SchedWriteRes<[SBPort1,SBPort5,SBPort23]> {
+  let Latency = 10;
+  let NumMicroOps = 3;
+  let ResourceCycles = [1,1,1];
+}
+def: InstRW<[SBWriteResGroup103], (instregex "CVTDQ2PDrm")>;
+def: InstRW<[SBWriteResGroup103], (instregex "CVTPD2DQrm")>;
+def: InstRW<[SBWriteResGroup103], (instregex "CVTPD2PSrm")>;
+def: InstRW<[SBWriteResGroup103], (instregex "CVTSD2SSrm")>;
+def: InstRW<[SBWriteResGroup103], (instregex "CVTSI2SS64rm")>;
+def: InstRW<[SBWriteResGroup103], (instregex "CVTSI2SSrm")>;
+def: InstRW<[SBWriteResGroup103], (instregex "CVTTPD2DQrm")>;
+def: InstRW<[SBWriteResGroup103], (instregex "MMX_CVTPD2PIirm")>;
+def: InstRW<[SBWriteResGroup103], (instregex "MMX_CVTPI2PDirm")>;
+def: InstRW<[SBWriteResGroup103], (instregex "MMX_CVTTPD2PIirm")>;
+def: InstRW<[SBWriteResGroup103], (instregex "VCVTDQ2PDYrm")>;
+def: InstRW<[SBWriteResGroup103], (instregex "VCVTDQ2PDrm")>;
+def: InstRW<[SBWriteResGroup103], (instregex "VCVTPD2DQrm")>;
+def: InstRW<[SBWriteResGroup103], (instregex "VCVTPD2PSrm")>;
+def: InstRW<[SBWriteResGroup103], (instregex "VCVTSD2SSrm")>;
+def: InstRW<[SBWriteResGroup103], (instregex "VCVTSI2SS64rm")>;
+def: InstRW<[SBWriteResGroup103], (instregex "VCVTSI2SSrm")>;
+def: InstRW<[SBWriteResGroup103], (instregex "VCVTTPD2DQrm")>;
+
+def SBWriteResGroup104 : SchedWriteRes<[SBPort0,SBPort23]> {
+  let Latency = 11;
+  let NumMicroOps = 2;
+  let ResourceCycles = [1,1];
+}
+def: InstRW<[SBWriteResGroup104], (instregex "MULPDrm")>;
+def: InstRW<[SBWriteResGroup104], (instregex "MULPSrm")>;
+def: InstRW<[SBWriteResGroup104], (instregex "MULSDrm")>;
+def: InstRW<[SBWriteResGroup104], (instregex "MULSSrm")>;
+def: InstRW<[SBWriteResGroup104], (instregex "PCMPGTQrm")>;
+def: InstRW<[SBWriteResGroup104], (instregex "PHMINPOSUWrm128")>;
+def: InstRW<[SBWriteResGroup104], (instregex "RCPPSm")>;
+def: InstRW<[SBWriteResGroup104], (instregex "RCPSSm")>;
+def: InstRW<[SBWriteResGroup104], (instregex "RSQRTPSm")>;
+def: InstRW<[SBWriteResGroup104], (instregex "RSQRTSSm")>;
+def: InstRW<[SBWriteResGroup104], (instregex "VMULPDrm")>;
+def: InstRW<[SBWriteResGroup104], (instregex "VMULPSrm")>;
+def: InstRW<[SBWriteResGroup104], (instregex "VMULSDrm")>;
+def: InstRW<[SBWriteResGroup104], (instregex "VMULSSrm")>;
+def: InstRW<[SBWriteResGroup104], (instregex "VPCMPGTQrm")>;
+def: InstRW<[SBWriteResGroup104], (instregex "VPHMINPOSUWrm128")>;
+def: InstRW<[SBWriteResGroup104], (instregex "VRCPPSm")>;
+def: InstRW<[SBWriteResGroup104], (instregex "VRCPSSm")>;
+def: InstRW<[SBWriteResGroup104], (instregex "VRSQRTPSm")>;
+def: InstRW<[SBWriteResGroup104], (instregex "VRSQRTSSm")>;
+
+def SBWriteResGroup105 : SchedWriteRes<[SBPort0]> {
+  let Latency = 11;
+  let NumMicroOps = 3;
+  let ResourceCycles = [3];
+}
+def: InstRW<[SBWriteResGroup105], (instregex "PCMPISTRIrr")>;
+def: InstRW<[SBWriteResGroup105], (instregex "PCMPISTRM128rr")>;
+def: InstRW<[SBWriteResGroup105], (instregex "VPCMPISTRIrr")>;
+def: InstRW<[SBWriteResGroup105], (instregex "VPCMPISTRM128rr")>;
+
+def SBWriteResGroup106 : SchedWriteRes<[SBPort1,SBPort23]> {
+  let Latency = 11;
+  let NumMicroOps = 3;
+  let ResourceCycles = [2,1];
+}
+def: InstRW<[SBWriteResGroup106], (instregex "FICOM16m")>;
+def: InstRW<[SBWriteResGroup106], (instregex "FICOM32m")>;
+def: InstRW<[SBWriteResGroup106], (instregex "FICOMP16m")>;
+def: InstRW<[SBWriteResGroup106], (instregex "FICOMP32m")>;
+
+def SBWriteResGroup107 : SchedWriteRes<[SBPort1,SBPort5,SBPort23]> {
+  let Latency = 11;
+  let NumMicroOps = 3;
+  let ResourceCycles = [1,1,1];
+}
+def: InstRW<[SBWriteResGroup107], (instregex "VCVTPD2DQYrm")>;
+def: InstRW<[SBWriteResGroup107], (instregex "VCVTPD2PSYrm")>;
+def: InstRW<[SBWriteResGroup107], (instregex "VCVTTPD2DQYrm")>;
+
+def SBWriteResGroup108 : SchedWriteRes<[SBPort0,SBPort23,SBPort15]> {
+  let Latency = 11;
+  let NumMicroOps = 4;
+  let ResourceCycles = [1,1,2];
+}
+def: InstRW<[SBWriteResGroup108], (instregex "MPSADBWrmi")>;
+def: InstRW<[SBWriteResGroup108], (instregex "VMPSADBWrmi")>;
+
+def SBWriteResGroup109 : SchedWriteRes<[SBPort1,SBPort5,SBPort23]> {
+  let Latency = 11;
+  let NumMicroOps = 4;
+  let ResourceCycles = [1,2,1];
+}
+def: InstRW<[SBWriteResGroup109], (instregex "HADDPDrm")>;
+def: InstRW<[SBWriteResGroup109], (instregex "HADDPSrm")>;
+def: InstRW<[SBWriteResGroup109], (instregex "HSUBPDrm")>;
+def: InstRW<[SBWriteResGroup109], (instregex "HSUBPSrm")>;
+def: InstRW<[SBWriteResGroup109], (instregex "VHADDPDrm")>;
+def: InstRW<[SBWriteResGroup109], (instregex "VHADDPSrm")>;
+def: InstRW<[SBWriteResGroup109], (instregex "VHSUBPDrm")>;
+def: InstRW<[SBWriteResGroup109], (instregex "VHSUBPSrm")>;
+
+def SBWriteResGroup110 : SchedWriteRes<[SBPort5]> {
+  let Latency = 12;
+  let NumMicroOps = 2;
+  let ResourceCycles = [2];
+}
+def: InstRW<[SBWriteResGroup110], (instregex "AESIMCrr")>;
+def: InstRW<[SBWriteResGroup110], (instregex "VAESIMCrr")>;
+
+def SBWriteResGroup111 : SchedWriteRes<[SBPort0,SBPort23]> {
+  let Latency = 12;
+  let NumMicroOps = 2;
+  let ResourceCycles = [1,1];
+}
+def: InstRW<[SBWriteResGroup111], (instregex "MUL_F32m")>;
+def: InstRW<[SBWriteResGroup111], (instregex "MUL_F64m")>;
+def: InstRW<[SBWriteResGroup111], (instregex "VMULPDYrm")>;
+def: InstRW<[SBWriteResGroup111], (instregex "VMULPSYrm")>;
+
+def SBWriteResGroup112 : SchedWriteRes<[SBPort0,SBPort1,SBPort5]> {
+  let Latency = 12;
+  let NumMicroOps = 4;
+  let ResourceCycles = [1,2,1];
+}
+def: InstRW<[SBWriteResGroup112], (instregex "DPPSrri")>;
+def: InstRW<[SBWriteResGroup112], (instregex "VDPPSYrri")>;
+def: InstRW<[SBWriteResGroup112], (instregex "VDPPSrri")>;
+
+def SBWriteResGroup113 : SchedWriteRes<[SBPort1,SBPort5,SBPort23]> {
+  let Latency = 12;
+  let NumMicroOps = 4;
+  let ResourceCycles = [1,2,1];
+}
+def: InstRW<[SBWriteResGroup113], (instregex "VHADDPDrm")>;
+def: InstRW<[SBWriteResGroup113], (instregex "VHADDPSYrm")>;
+def: InstRW<[SBWriteResGroup113], (instregex "VHSUBPDYrm")>;
+def: InstRW<[SBWriteResGroup113], (instregex "VHSUBPSYrm")>;
+
+def SBWriteResGroup114 : SchedWriteRes<[SBPort1,SBPort23]> {
+  let Latency = 13;
+  let NumMicroOps = 3;
+  let ResourceCycles = [2,1];
+}
+def: InstRW<[SBWriteResGroup114], (instregex "ADD_FI16m")>;
+def: InstRW<[SBWriteResGroup114], (instregex "ADD_FI32m")>;
+def: InstRW<[SBWriteResGroup114], (instregex "SUBR_FI16m")>;
+def: InstRW<[SBWriteResGroup114], (instregex "SUBR_FI32m")>;
+def: InstRW<[SBWriteResGroup114], (instregex "SUB_FI16m")>;
+def: InstRW<[SBWriteResGroup114], (instregex "SUB_FI32m")>;
+
+def SBWriteResGroup115 : SchedWriteRes<[SBPort5,SBPort23,SBPort015]> {
+  let Latency = 13;
+  let NumMicroOps = 3;
+  let ResourceCycles = [1,1,1];
+}
+def: InstRW<[SBWriteResGroup115], (instregex "AESDECLASTrm")>;
+def: InstRW<[SBWriteResGroup115], (instregex "AESDECrm")>;
+def: InstRW<[SBWriteResGroup115], (instregex "AESENCLASTrm")>;
+def: InstRW<[SBWriteResGroup115], (instregex "AESENCrm")>;
+def: InstRW<[SBWriteResGroup115], (instregex "VAESDECLASTrm")>;
+def: InstRW<[SBWriteResGroup115], (instregex "VAESDECrm")>;
+def: InstRW<[SBWriteResGroup115], (instregex "VAESENCLASTrm")>;
+def: InstRW<[SBWriteResGroup115], (instregex "VAESENCrm")>;
+
+def SBWriteResGroup116 : SchedWriteRes<[SBPort0]> {
+  let Latency = 14;
+  let NumMicroOps = 1;
+  let ResourceCycles = [1];
+}
+def: InstRW<[SBWriteResGroup116], (instregex "DIVPSrr")>;
+def: InstRW<[SBWriteResGroup116], (instregex "DIVSSrr")>;
+def: InstRW<[SBWriteResGroup116], (instregex "SQRTPSr")>;
+def: InstRW<[SBWriteResGroup116], (instregex "SQRTSSr")>;
+def: InstRW<[SBWriteResGroup116], (instregex "VDIVPSrr")>;
+def: InstRW<[SBWriteResGroup116], (instregex "VDIVSSrr")>;
+def: InstRW<[SBWriteResGroup116], (instregex "VSQRTPSr")>;
+
+def SBWriteResGroup117 : SchedWriteRes<[SBPort0,SBPort23]> {
+  let Latency = 14;
+  let NumMicroOps = 2;
+  let ResourceCycles = [1,1];
+}
+def: InstRW<[SBWriteResGroup117], (instregex "VSQRTSSm")>;
+
+def SBWriteResGroup118 : SchedWriteRes<[SBPort0,SBPort23,SBPort0]> {
+  let Latency = 14;
+  let NumMicroOps = 4;
+  let ResourceCycles = [2,1,1];
+}
+def: InstRW<[SBWriteResGroup118], (instregex "VRCPPSm")>;
+def: InstRW<[SBWriteResGroup118], (instregex "VRSQRTPSYm")>;
+
+def SBWriteResGroup119 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> {
+  let Latency = 15;
+  let NumMicroOps = 3;
+  let ResourceCycles = [1,1,1];
+}
+def: InstRW<[SBWriteResGroup119], (instregex "MUL_FI16m")>;
+def: InstRW<[SBWriteResGroup119], (instregex "MUL_FI32m")>;
+
+def SBWriteResGroup120 : SchedWriteRes<[SBPort0,SBPort1,SBPort5,SBPort23]> {
+  let Latency = 15;
+  let NumMicroOps = 4;
+  let ResourceCycles = [1,1,1,1];
+}
+def: InstRW<[SBWriteResGroup120], (instregex "DPPDrmi")>;
+def: InstRW<[SBWriteResGroup120], (instregex "VDPPDrmi")>;
+
+def SBWriteResGroup121 : SchedWriteRes<[SBPort0,SBPort23]> {
+  let Latency = 17;
+  let NumMicroOps = 4;
+  let ResourceCycles = [3,1];
+}
+def: InstRW<[SBWriteResGroup121], (instregex "PCMPISTRIrm")>;
+def: InstRW<[SBWriteResGroup121], (instregex "PCMPISTRM128rm")>;
+def: InstRW<[SBWriteResGroup121], (instregex "VPCMPISTRIrm")>;
+def: InstRW<[SBWriteResGroup121], (instregex "VPCMPISTRM128rm")>;
+
+def SBWriteResGroup122 : SchedWriteRes<[SBPort5,SBPort23]> {
+  let Latency = 18;
+  let NumMicroOps = 3;
+  let ResourceCycles = [2,1];
+}
+def: InstRW<[SBWriteResGroup122], (instregex "AESIMCrm")>;
+def: InstRW<[SBWriteResGroup122], (instregex "VAESIMCrm")>;
+
+def SBWriteResGroup123 : SchedWriteRes<[SBPort0,SBPort23]> {
+  let Latency = 20;
+  let NumMicroOps = 2;
+  let ResourceCycles = [1,1];
+}
+def: InstRW<[SBWriteResGroup123], (instregex "DIVPSrm")>;
+def: InstRW<[SBWriteResGroup123], (instregex "DIVSSrm")>;
+def: InstRW<[SBWriteResGroup123], (instregex "SQRTPSm")>;
+def: InstRW<[SBWriteResGroup123], (instregex "SQRTSSm")>;
+def: InstRW<[SBWriteResGroup123], (instregex "VDIVPSrm")>;
+def: InstRW<[SBWriteResGroup123], (instregex "VDIVSSrm")>;
+def: InstRW<[SBWriteResGroup123], (instregex "VSQRTPSm")>;
+
+def SBWriteResGroup124 : SchedWriteRes<[SBPort0]> {
+  let Latency = 21;
+  let NumMicroOps = 1;
+  let ResourceCycles = [1];
+}
+def: InstRW<[SBWriteResGroup124], (instregex "VSQRTSDr")>;
+
+def SBWriteResGroup125 : SchedWriteRes<[SBPort0,SBPort23]> {
+  let Latency = 21;
+  let NumMicroOps = 2;
+  let ResourceCycles = [1,1];
+}
+def: InstRW<[SBWriteResGroup125], (instregex "VSQRTSDm")>;
+
+def SBWriteResGroup126 : SchedWriteRes<[SBPort0]> {
+  let Latency = 22;
+  let NumMicroOps = 1;
+  let ResourceCycles = [1];
+}
+def: InstRW<[SBWriteResGroup126], (instregex "DIVPDrr")>;
+def: InstRW<[SBWriteResGroup126], (instregex "DIVSDrr")>;
+def: InstRW<[SBWriteResGroup126], (instregex "SQRTPDr")>;
+def: InstRW<[SBWriteResGroup126], (instregex "SQRTSDr")>;
+def: InstRW<[SBWriteResGroup126], (instregex "VDIVPDrr")>;
+def: InstRW<[SBWriteResGroup126], (instregex "VDIVSDrr")>;
+def: InstRW<[SBWriteResGroup126], (instregex "VSQRTPDr")>;
+
+def SBWriteResGroup127 : SchedWriteRes<[SBPort0]> {
+  let Latency = 24;
+  let NumMicroOps = 1;
+  let ResourceCycles = [1];
+}
+def: InstRW<[SBWriteResGroup127], (instregex "DIVR_FPrST0")>;
+def: InstRW<[SBWriteResGroup127], (instregex "DIVR_FST0r")>;
+def: InstRW<[SBWriteResGroup127], (instregex "DIVR_FrST0")>;
+def: InstRW<[SBWriteResGroup127], (instregex "DIV_FPrST0")>;
+def: InstRW<[SBWriteResGroup127], (instregex "DIV_FST0r")>;
+def: InstRW<[SBWriteResGroup127], (instregex "DIV_FrST0")>;
+
+def SBWriteResGroup128 : SchedWriteRes<[SBPort0,SBPort23]> {
+  let Latency = 28;
+  let NumMicroOps = 2;
+  let ResourceCycles = [1,1];
+}
+def: InstRW<[SBWriteResGroup128], (instregex "DIVPDrm")>;
+def: InstRW<[SBWriteResGroup128], (instregex "DIVSDrm")>;
+def: InstRW<[SBWriteResGroup128], (instregex "SQRTPDm")>;
+def: InstRW<[SBWriteResGroup128], (instregex "SQRTSDm")>;
+def: InstRW<[SBWriteResGroup128], (instregex "VDIVPDrm")>;
+def: InstRW<[SBWriteResGroup128], (instregex "VDIVSDrm")>;
+def: InstRW<[SBWriteResGroup128], (instregex "VSQRTPDm")>;
+
+def SBWriteResGroup129 : SchedWriteRes<[SBPort0,SBPort0]> {
+  let Latency = 29;
+  let NumMicroOps = 3;
+  let ResourceCycles = [2,1];
+}
+def: InstRW<[SBWriteResGroup129], (instregex "VDIVPSYrr")>;
+def: InstRW<[SBWriteResGroup129], (instregex "VSQRTPSYr")>;
+
+def SBWriteResGroup130 : SchedWriteRes<[SBPort0,SBPort23]> {
+  let Latency = 31;
+  let NumMicroOps = 2;
+  let ResourceCycles = [1,1];
+}
+def: InstRW<[SBWriteResGroup130], (instregex "DIVR_F32m")>;
+def: InstRW<[SBWriteResGroup130], (instregex "DIVR_F64m")>;
+def: InstRW<[SBWriteResGroup130], (instregex "DIV_F32m")>;
+def: InstRW<[SBWriteResGroup130], (instregex "DIV_F64m")>;
+
+def SBWriteResGroup131 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> {
+  let Latency = 34;
+  let NumMicroOps = 3;
+  let ResourceCycles = [1,1,1];
+}
+def: InstRW<[SBWriteResGroup131], (instregex "DIVR_FI16m")>;
+def: InstRW<[SBWriteResGroup131], (instregex "DIVR_FI32m")>;
+def: InstRW<[SBWriteResGroup131], (instregex "DIV_FI16m")>;
+def: InstRW<[SBWriteResGroup131], (instregex "DIV_FI32m")>;
+
+def SBWriteResGroup132 : SchedWriteRes<[SBPort0,SBPort23,SBPort0]> {
+  let Latency = 36;
+  let NumMicroOps = 4;
+  let ResourceCycles = [2,1,1];
+}
+def: InstRW<[SBWriteResGroup132], (instregex "VDIVPSYrm")>;
+def: InstRW<[SBWriteResGroup132], (instregex "VSQRTPSYm")>;
+
+def SBWriteResGroup133 : SchedWriteRes<[SBPort0,SBPort0]> {
+  let Latency = 45;
+  let NumMicroOps = 3;
+  let ResourceCycles = [2,1];
+}
+def: InstRW<[SBWriteResGroup133], (instregex "VDIVPDYrr")>;
+def: InstRW<[SBWriteResGroup133], (instregex "VSQRTPDYr")>;
+
+def SBWriteResGroup134 : SchedWriteRes<[SBPort0,SBPort23,SBPort0]> {
+  let Latency = 52;
+  let NumMicroOps = 4;
+  let ResourceCycles = [2,1,1];
+}
+def: InstRW<[SBWriteResGroup134], (instregex "VDIVPDYrm")>;
+def: InstRW<[SBWriteResGroup134], (instregex "VSQRTPDYm")>;
+
+def SBWriteResGroup135 : SchedWriteRes<[SBPort0]> {
+  let Latency = 114;
+  let NumMicroOps = 1;
+  let ResourceCycles = [1];
+}
+def: InstRW<[SBWriteResGroup135], (instregex "VSQRTSSr")>;
+
 } // SchedModel

Modified: llvm/trunk/test/CodeGen/X86/avx-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-schedule.ll?rev=307529&r1=307528&r2=307529&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx-schedule.ll Mon Jul 10 02:53:16 2017
@@ -10,8 +10,8 @@ define <4 x double> @test_addpd(<4 x dou
 ; SANDY-LABEL: test_addpd:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT:    vaddpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vaddpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_addpd:
 ; HASWELL:       # BB#0:
@@ -40,8 +40,8 @@ define <8 x float> @test_addps(<8 x floa
 ; SANDY-LABEL: test_addps:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT:    vaddps (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vaddps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_addps:
 ; HASWELL:       # BB#0:
@@ -70,8 +70,8 @@ define <4 x double> @test_addsubpd(<4 x
 ; SANDY-LABEL: test_addsubpd:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vaddsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT:    vaddsubpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vaddsubpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_addsubpd:
 ; HASWELL:       # BB#0:
@@ -101,8 +101,8 @@ define <8 x float> @test_addsubps(<8 x f
 ; SANDY-LABEL: test_addsubps:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vaddsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT:    vaddsubps (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vaddsubps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_addsubps:
 ; HASWELL:       # BB#0:
@@ -131,10 +131,10 @@ declare <8 x float> @llvm.x86.avx.addsub
 define <4 x double> @test_andnotpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) {
 ; SANDY-LABEL: test_andnotpd:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SANDY-NEXT:    vandnpd (%rdi), %ymm0, %ymm0 # sched: [5:0.50]
+; SANDY-NEXT:    vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; SANDY-NEXT:    vandnpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
 ; SANDY-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_andnotpd:
 ; HASWELL:       # BB#0:
@@ -172,10 +172,10 @@ define <4 x double> @test_andnotpd(<4 x
 define <8 x float> @test_andnotps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) {
 ; SANDY-LABEL: test_andnotps:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vandnps %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SANDY-NEXT:    vandnps (%rdi), %ymm0, %ymm0 # sched: [5:0.50]
+; SANDY-NEXT:    vandnps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; SANDY-NEXT:    vandnps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
 ; SANDY-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_andnotps:
 ; HASWELL:       # BB#0:
@@ -213,10 +213,10 @@ define <8 x float> @test_andnotps(<8 x f
 define <4 x double> @test_andpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) {
 ; SANDY-LABEL: test_andpd:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vandpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SANDY-NEXT:    vandpd (%rdi), %ymm0, %ymm0 # sched: [5:0.50]
+; SANDY-NEXT:    vandpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; SANDY-NEXT:    vandpd (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
 ; SANDY-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_andpd:
 ; HASWELL:       # BB#0:
@@ -252,10 +252,10 @@ define <4 x double> @test_andpd(<4 x dou
 define <8 x float> @test_andps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) {
 ; SANDY-LABEL: test_andps:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vandps %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SANDY-NEXT:    vandps (%rdi), %ymm0, %ymm0 # sched: [5:0.50]
+; SANDY-NEXT:    vandps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; SANDY-NEXT:    vandps (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
 ; SANDY-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_andps:
 ; HASWELL:       # BB#0:
@@ -291,10 +291,10 @@ define <8 x float> @test_andps(<8 x floa
 define <4 x double> @test_blendpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) {
 ; SANDY-LABEL: test_blendpd:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:0.50]
+; SANDY-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:1.00]
 ; SANDY-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],mem[1,2],ymm0[3] sched: [5:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],mem[1,2],ymm0[3] sched: [8:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_blendpd:
 ; HASWELL:       # BB#0:
@@ -326,9 +326,9 @@ define <4 x double> @test_blendpd(<4 x d
 define <8 x float> @test_blendps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) {
 ; SANDY-LABEL: test_blendps:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4,5,6,7] sched: [1:0.50]
-; SANDY-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1],mem[2],ymm0[3],mem[4,5,6],ymm0[7] sched: [5:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4,5,6,7] sched: [1:1.00]
+; SANDY-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1],mem[2],ymm0[3],mem[4,5,6],ymm0[7] sched: [8:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_blendps:
 ; HASWELL:       # BB#0:
@@ -356,9 +356,9 @@ define <8 x float> @test_blendps(<8 x fl
 define <4 x double> @test_blendvpd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) {
 ; SANDY-LABEL: test_blendvpd:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:1.00]
-; SANDY-NEXT:    vblendvpd %ymm2, (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:2.00]
+; SANDY-NEXT:    vblendvpd %ymm2, (%rdi), %ymm0, %ymm0 # sched: [9:2.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_blendvpd:
 ; HASWELL:       # BB#0:
@@ -387,9 +387,9 @@ declare <4 x double> @llvm.x86.avx.blend
 define <8 x float> @test_blendvps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) {
 ; SANDY-LABEL: test_blendvps:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vblendvps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:1.00]
-; SANDY-NEXT:    vblendvps %ymm2, (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vblendvps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:2.00]
+; SANDY-NEXT:    vblendvps %ymm2, (%rdi), %ymm0, %ymm0 # sched: [9:2.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_blendvps:
 ; HASWELL:       # BB#0:
@@ -418,8 +418,8 @@ declare <8 x float> @llvm.x86.avx.blendv
 define <8 x float> @test_broadcastf128(<4 x float> *%a0) {
 ; SANDY-LABEL: test_broadcastf128:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [5:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [3:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_broadcastf128:
 ; HASWELL:       # BB#0:
@@ -443,8 +443,8 @@ define <8 x float> @test_broadcastf128(<
 define <4 x double> @test_broadcastsd_ymm(double *%a0) {
 ; SANDY-LABEL: test_broadcastsd_ymm:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vbroadcastsd (%rdi), %ymm0 # sched: [5:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vbroadcastsd (%rdi), %ymm0 # sched: [7:0.50]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_broadcastsd_ymm:
 ; HASWELL:       # BB#0:
@@ -469,8 +469,8 @@ define <4 x double> @test_broadcastsd_ym
 define <4 x float> @test_broadcastss(float *%a0) {
 ; SANDY-LABEL: test_broadcastss:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vbroadcastss (%rdi), %xmm0 # sched: [4:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vbroadcastss (%rdi), %xmm0 # sched: [6:0.50]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_broadcastss:
 ; HASWELL:       # BB#0:
@@ -496,7 +496,7 @@ define <8 x float> @test_broadcastss_ymm
 ; SANDY-LABEL: test_broadcastss_ymm:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vbroadcastss (%rdi), %ymm0 # sched: [5:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_broadcastss_ymm:
 ; HASWELL:       # BB#0:
@@ -522,9 +522,9 @@ define <4 x double> @test_cmppd(<4 x dou
 ; SANDY-LABEL: test_cmppd:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vcmpeqpd %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
-; SANDY-NEXT:    vcmpeqpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
-; SANDY-NEXT:    vorpd %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vcmpeqpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
+; SANDY-NEXT:    vorpd %ymm0, %ymm1, %ymm0 # sched: [1:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_cmppd:
 ; HASWELL:       # BB#0:
@@ -560,9 +560,9 @@ define <8 x float> @test_cmpps(<8 x floa
 ; SANDY-LABEL: test_cmpps:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vcmpeqps %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
-; SANDY-NEXT:    vcmpeqps (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
-; SANDY-NEXT:    vorps %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vcmpeqps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
+; SANDY-NEXT:    vorps %ymm0, %ymm1, %ymm0 # sched: [1:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_cmpps:
 ; HASWELL:       # BB#0:
@@ -598,9 +598,9 @@ define <4 x double> @test_cvtdq2pd(<4 x
 ; SANDY-LABEL: test_cvtdq2pd:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vcvtdq2pd %xmm0, %ymm0 # sched: [4:1.00]
-; SANDY-NEXT:    vcvtdq2pd (%rdi), %ymm1 # sched: [8:1.00]
+; SANDY-NEXT:    vcvtdq2pd (%rdi), %ymm1 # sched: [10:1.00]
 ; SANDY-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_cvtdq2pd:
 ; HASWELL:       # BB#0:
@@ -632,12 +632,12 @@ define <4 x double> @test_cvtdq2pd(<4 x
 define <8 x float> @test_cvtdq2ps(<8 x i32> %a0, <8 x i32> *%a1) {
 ; SANDY-LABEL: test_cvtdq2ps:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vcvtdq2ps %ymm0, %ymm0 # sched: [4:1.00]
-; SANDY-NEXT:    vmovaps (%rdi), %xmm1 # sched: [4:0.50]
-; SANDY-NEXT:    vinsertf128 $1, 16(%rdi), %ymm1, %ymm1 # sched: [5:1.00]
-; SANDY-NEXT:    vcvtdq2ps %ymm1, %ymm1 # sched: [4:1.00]
+; SANDY-NEXT:    vcvtdq2ps %ymm0, %ymm0 # sched: [3:1.00]
+; SANDY-NEXT:    vmovaps (%rdi), %xmm1 # sched: [6:0.50]
+; SANDY-NEXT:    vinsertf128 $1, 16(%rdi), %ymm1, %ymm1 # sched: [7:1.00]
+; SANDY-NEXT:    vcvtdq2ps %ymm1, %ymm1 # sched: [3:1.00]
 ; SANDY-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_cvtdq2ps:
 ; HASWELL:       # BB#0:
@@ -669,10 +669,10 @@ define <8 x float> @test_cvtdq2ps(<8 x i
 define <8 x i32> @test_cvtpd2dq(<4 x double> %a0, <4 x double> *%a1) {
 ; SANDY-LABEL: test_cvtpd2dq:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vcvttpd2dq %ymm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    vcvttpd2dqy (%rdi), %xmm1 # sched: [7:1.00]
+; SANDY-NEXT:    vcvttpd2dq %ymm0, %xmm0 # sched: [4:1.00]
+; SANDY-NEXT:    vcvttpd2dqy (%rdi), %xmm1 # sched: [11:1.00]
 ; SANDY-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_cvtpd2dq:
 ; HASWELL:       # BB#0:
@@ -704,10 +704,10 @@ define <8 x i32> @test_cvtpd2dq(<4 x dou
 define <8 x float> @test_cvtpd2ps(<4 x double> %a0, <4 x double> *%a1) {
 ; SANDY-LABEL: test_cvtpd2ps:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vcvtpd2ps %ymm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    vcvtpd2psy (%rdi), %xmm1 # sched: [7:1.00]
+; SANDY-NEXT:    vcvtpd2ps %ymm0, %xmm0 # sched: [4:1.00]
+; SANDY-NEXT:    vcvtpd2psy (%rdi), %xmm1 # sched: [11:1.00]
 ; SANDY-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_cvtpd2ps:
 ; HASWELL:       # BB#0:
@@ -741,8 +741,8 @@ define <8 x i32> @test_cvtps2dq(<8 x flo
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vcvttps2dq %ymm0, %ymm0 # sched: [3:1.00]
 ; SANDY-NEXT:    vcvttps2dq (%rdi), %ymm1 # sched: [7:1.00]
-; SANDY-NEXT:    vorps %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_cvtps2dq:
 ; HASWELL:       # BB#0:
@@ -774,9 +774,9 @@ define <8 x i32> @test_cvtps2dq(<8 x flo
 define <4 x double> @test_divpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) {
 ; SANDY-LABEL: test_divpd:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vdivpd %ymm1, %ymm0, %ymm0 # sched: [12:1.00]
-; SANDY-NEXT:    vdivpd (%rdi), %ymm0, %ymm0 # sched: [16:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vdivpd %ymm1, %ymm0, %ymm0 # sched: [45:3.00]
+; SANDY-NEXT:    vdivpd (%rdi), %ymm0, %ymm0 # sched: [52:3.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_divpd:
 ; HASWELL:       # BB#0:
@@ -804,9 +804,9 @@ define <4 x double> @test_divpd(<4 x dou
 define <8 x float> @test_divps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) {
 ; SANDY-LABEL: test_divps:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vdivps %ymm1, %ymm0, %ymm0 # sched: [12:1.00]
-; SANDY-NEXT:    vdivps (%rdi), %ymm0, %ymm0 # sched: [16:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vdivps %ymm1, %ymm0, %ymm0 # sched: [29:3.00]
+; SANDY-NEXT:    vdivps (%rdi), %ymm0, %ymm0 # sched: [36:3.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_divps:
 ; HASWELL:       # BB#0:
@@ -834,9 +834,9 @@ define <8 x float> @test_divps(<8 x floa
 define <8 x float> @test_dpps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) {
 ; SANDY-LABEL: test_dpps:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vdpps $7, %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
+; SANDY-NEXT:    vdpps $7, %ymm1, %ymm0, %ymm0 # sched: [12:2.00]
 ; SANDY-NEXT:    vdpps $7, (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_dpps:
 ; HASWELL:       # BB#0:
@@ -866,9 +866,9 @@ define <4 x float> @test_extractf128(<8
 ; SANDY-LABEL: test_extractf128:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vextractf128 $1, %ymm0, %xmm0 # sched: [1:1.00]
-; SANDY-NEXT:    vextractf128 $1, %ymm1, (%rdi) # sched: [1:1.00]
+; SANDY-NEXT:    vextractf128 $1, %ymm1, (%rdi) # sched: [5:1.00]
 ; SANDY-NEXT:    vzeroupper # sched: [?:0.000000e+00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_extractf128:
 ; HASWELL:       # BB#0:
@@ -900,7 +900,7 @@ define <4 x double> @test_haddpd(<4 x do
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vhaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
 ; SANDY-NEXT:    vhaddpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_haddpd:
 ; HASWELL:       # BB#0:
@@ -929,9 +929,9 @@ declare <4 x double> @llvm.x86.avx.hadd.
 define <8 x float> @test_haddps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) {
 ; SANDY-LABEL: test_haddps:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vhaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT:    vhaddps (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vhaddps %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
+; SANDY-NEXT:    vhaddps (%rdi), %ymm0, %ymm0 # sched: [12:2.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_haddps:
 ; HASWELL:       # BB#0:
@@ -960,9 +960,9 @@ declare <8 x float> @llvm.x86.avx.hadd.p
 define <4 x double> @test_hsubpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) {
 ; SANDY-LABEL: test_hsubpd:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vhsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT:    vhsubpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vhsubpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
+; SANDY-NEXT:    vhsubpd (%rdi), %ymm0, %ymm0 # sched: [12:2.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_hsubpd:
 ; HASWELL:       # BB#0:
@@ -991,9 +991,9 @@ declare <4 x double> @llvm.x86.avx.hsub.
 define <8 x float> @test_hsubps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) {
 ; SANDY-LABEL: test_hsubps:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vhsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT:    vhsubps (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vhsubps %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
+; SANDY-NEXT:    vhsubps (%rdi), %ymm0, %ymm0 # sched: [12:2.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_hsubps:
 ; HASWELL:       # BB#0:
@@ -1023,9 +1023,9 @@ define <8 x float> @test_insertf128(<8 x
 ; SANDY-LABEL: test_insertf128:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm1 # sched: [1:1.00]
-; SANDY-NEXT:    vinsertf128 $1, (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
+; SANDY-NEXT:    vinsertf128 $1, (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
 ; SANDY-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_insertf128:
 ; HASWELL:       # BB#0:
@@ -1059,8 +1059,8 @@ define <8 x float> @test_insertf128(<8 x
 define <32 x i8> @test_lddqu(i8* %a0) {
 ; SANDY-LABEL: test_lddqu:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vlddqu (%rdi), %ymm0 # sched: [4:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vlddqu (%rdi), %ymm0 # sched: [6:0.50]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_lddqu:
 ; HASWELL:       # BB#0:
@@ -1084,10 +1084,10 @@ declare <32 x i8> @llvm.x86.avx.ldu.dq.2
 define <2 x double> @test_maskmovpd(i8* %a0, <2 x i64> %a1, <2 x double> %a2) {
 ; SANDY-LABEL: test_maskmovpd:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vmaskmovpd (%rdi), %xmm0, %xmm2 # sched: [?:0.000000e+00]
-; SANDY-NEXT:    vmaskmovpd %xmm1, %xmm0, (%rdi) # sched: [?:0.000000e+00]
+; SANDY-NEXT:    vmaskmovpd (%rdi), %xmm0, %xmm2 # sched: [8:2.00]
+; SANDY-NEXT:    vmaskmovpd %xmm1, %xmm0, (%rdi) # sched: [5:1.00]
 ; SANDY-NEXT:    vmovapd %xmm2, %xmm0 # sched: [1:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_maskmovpd:
 ; HASWELL:       # BB#0:
@@ -1119,10 +1119,10 @@ declare void @llvm.x86.avx.maskstore.pd(
 define <4 x double> @test_maskmovpd_ymm(i8* %a0, <4 x i64> %a1, <4 x double> %a2) {
 ; SANDY-LABEL: test_maskmovpd_ymm:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vmaskmovpd (%rdi), %ymm0, %ymm2 # sched: [?:0.000000e+00]
+; SANDY-NEXT:    vmaskmovpd (%rdi), %ymm0, %ymm2 # sched: [5:1.00]
 ; SANDY-NEXT:    vmaskmovpd %ymm1, %ymm0, (%rdi) # sched: [?:0.000000e+00]
 ; SANDY-NEXT:    vmovapd %ymm2, %ymm0 # sched: [1:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_maskmovpd_ymm:
 ; HASWELL:       # BB#0:
@@ -1154,10 +1154,10 @@ declare void @llvm.x86.avx.maskstore.pd.
 define <4 x float> @test_maskmovps(i8* %a0, <4 x i32> %a1, <4 x float> %a2) {
 ; SANDY-LABEL: test_maskmovps:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vmaskmovps (%rdi), %xmm0, %xmm2 # sched: [?:0.000000e+00]
-; SANDY-NEXT:    vmaskmovps %xmm1, %xmm0, (%rdi) # sched: [?:0.000000e+00]
+; SANDY-NEXT:    vmaskmovps (%rdi), %xmm0, %xmm2 # sched: [8:2.00]
+; SANDY-NEXT:    vmaskmovps %xmm1, %xmm0, (%rdi) # sched: [5:1.00]
 ; SANDY-NEXT:    vmovaps %xmm2, %xmm0 # sched: [1:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_maskmovps:
 ; HASWELL:       # BB#0:
@@ -1189,10 +1189,10 @@ declare void @llvm.x86.avx.maskstore.ps(
 define <8 x float> @test_maskmovps_ymm(i8* %a0, <8 x i32> %a1, <8 x float> %a2) {
 ; SANDY-LABEL: test_maskmovps_ymm:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vmaskmovps (%rdi), %ymm0, %ymm2 # sched: [?:0.000000e+00]
+; SANDY-NEXT:    vmaskmovps (%rdi), %ymm0, %ymm2 # sched: [1:0.50]
 ; SANDY-NEXT:    vmaskmovps %ymm1, %ymm0, (%rdi) # sched: [?:0.000000e+00]
 ; SANDY-NEXT:    vmovaps %ymm2, %ymm0 # sched: [1:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_maskmovps_ymm:
 ; HASWELL:       # BB#0:
@@ -1225,8 +1225,8 @@ define <4 x double> @test_maxpd(<4 x dou
 ; SANDY-LABEL: test_maxpd:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vmaxpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT:    vmaxpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vmaxpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_maxpd:
 ; HASWELL:       # BB#0:
@@ -1256,8 +1256,8 @@ define <8 x float> @test_maxps(<8 x floa
 ; SANDY-LABEL: test_maxps:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vmaxps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT:    vmaxps (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vmaxps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_maxps:
 ; HASWELL:       # BB#0:
@@ -1288,7 +1288,7 @@ define <4 x double> @test_minpd(<4 x dou
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vminpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
 ; SANDY-NEXT:    vminpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_minpd:
 ; HASWELL:       # BB#0:
@@ -1319,7 +1319,7 @@ define <8 x float> @test_minps(<8 x floa
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vminps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
 ; SANDY-NEXT:    vminps (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_minps:
 ; HASWELL:       # BB#0:
@@ -1348,10 +1348,10 @@ declare <8 x float> @llvm.x86.avx.min.ps
 define <4 x double> @test_movapd(<4 x double> *%a0, <4 x double> *%a1) {
 ; SANDY-LABEL: test_movapd:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vmovapd (%rdi), %ymm0 # sched: [4:0.50]
+; SANDY-NEXT:    vmovapd (%rdi), %ymm0 # sched: [7:0.50]
 ; SANDY-NEXT:    vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT:    vmovapd %ymm0, (%rsi) # sched: [1:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vmovapd %ymm0, (%rsi) # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_movapd:
 ; HASWELL:       # BB#0:
@@ -1382,10 +1382,10 @@ define <4 x double> @test_movapd(<4 x do
 define <8 x float> @test_movaps(<8 x float> *%a0, <8 x float> *%a1) {
 ; SANDY-LABEL: test_movaps:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vmovaps (%rdi), %ymm0 # sched: [4:0.50]
+; SANDY-NEXT:    vmovaps (%rdi), %ymm0 # sched: [7:0.50]
 ; SANDY-NEXT:    vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT:    vmovaps %ymm0, (%rsi) # sched: [1:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vmovaps %ymm0, (%rsi) # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_movaps:
 ; HASWELL:       # BB#0:
@@ -1417,9 +1417,9 @@ define <4 x double> @test_movddup(<4 x d
 ; SANDY-LABEL: test_movddup:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] sched: [1:1.00]
-; SANDY-NEXT:    vmovddup {{.*#+}} ymm1 = mem[0,0,2,2] sched: [4:0.50]
+; SANDY-NEXT:    vmovddup {{.*#+}} ymm1 = mem[0,0,2,2] sched: [7:0.50]
 ; SANDY-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_movddup:
 ; HASWELL:       # BB#0:
@@ -1451,9 +1451,9 @@ define <4 x double> @test_movddup(<4 x d
 define i32 @test_movmskpd(<4 x double> %a0) {
 ; SANDY-LABEL: test_movmskpd:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vmovmskpd %ymm0, %eax # sched: [1:0.33]
+; SANDY-NEXT:    vmovmskpd %ymm0, %eax # sched: [2:1.00]
 ; SANDY-NEXT:    vzeroupper # sched: [?:0.000000e+00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_movmskpd:
 ; HASWELL:       # BB#0:
@@ -1479,9 +1479,9 @@ declare i32 @llvm.x86.avx.movmsk.pd.256(
 define i32 @test_movmskps(<8 x float> %a0) {
 ; SANDY-LABEL: test_movmskps:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vmovmskps %ymm0, %eax # sched: [1:0.33]
+; SANDY-NEXT:    vmovmskps %ymm0, %eax # sched: [3:1.00]
 ; SANDY-NEXT:    vzeroupper # sched: [?:0.000000e+00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_movmskps:
 ; HASWELL:       # BB#0:
@@ -1508,8 +1508,8 @@ define <4 x double> @test_movntpd(<4 x d
 ; SANDY-LABEL: test_movntpd:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT:    vmovntpd %ymm0, (%rdi) # sched: [1:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vmovntpd %ymm0, (%rdi) # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_movntpd:
 ; HASWELL:       # BB#0:
@@ -1537,8 +1537,8 @@ define <8 x float> @test_movntps(<8 x fl
 ; SANDY-LABEL: test_movntps:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT:    vmovntps %ymm0, (%rdi) # sched: [1:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vmovntps %ymm0, (%rdi) # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_movntps:
 ; HASWELL:       # BB#0:
@@ -1566,9 +1566,9 @@ define <8 x float> @test_movshdup(<8 x f
 ; SANDY-LABEL: test_movshdup:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] sched: [1:1.00]
-; SANDY-NEXT:    vmovshdup {{.*#+}} ymm1 = mem[1,1,3,3,5,5,7,7] sched: [4:0.50]
+; SANDY-NEXT:    vmovshdup {{.*#+}} ymm1 = mem[1,1,3,3,5,5,7,7] sched: [7:0.50]
 ; SANDY-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_movshdup:
 ; HASWELL:       # BB#0:
@@ -1601,9 +1601,9 @@ define <8 x float> @test_movsldup(<8 x f
 ; SANDY-LABEL: test_movsldup:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] sched: [1:1.00]
-; SANDY-NEXT:    vmovsldup {{.*#+}} ymm1 = mem[0,0,2,2,4,4,6,6] sched: [4:0.50]
+; SANDY-NEXT:    vmovsldup {{.*#+}} ymm1 = mem[0,0,2,2,4,4,6,6] sched: [7:0.50]
 ; SANDY-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_movsldup:
 ; HASWELL:       # BB#0:
@@ -1635,12 +1635,12 @@ define <8 x float> @test_movsldup(<8 x f
 define <4 x double> @test_movupd(<4 x double> *%a0, <4 x double> *%a1) {
 ; SANDY-LABEL: test_movupd:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vmovups (%rdi), %xmm0 # sched: [4:0.50]
-; SANDY-NEXT:    vinsertf128 $1, 16(%rdi), %ymm0, %ymm0 # sched: [5:1.00]
+; SANDY-NEXT:    vmovups (%rdi), %xmm0 # sched: [6:0.50]
+; SANDY-NEXT:    vinsertf128 $1, 16(%rdi), %ymm0, %ymm0 # sched: [7:1.00]
 ; SANDY-NEXT:    vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT:    vextractf128 $1, %ymm0, 16(%rsi) # sched: [1:1.00]
-; SANDY-NEXT:    vmovupd %xmm0, (%rsi) # sched: [1:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vextractf128 $1, %ymm0, 16(%rsi) # sched: [5:1.00]
+; SANDY-NEXT:    vmovupd %xmm0, (%rsi) # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_movupd:
 ; HASWELL:       # BB#0:
@@ -1671,12 +1671,12 @@ define <4 x double> @test_movupd(<4 x do
 define <8 x float> @test_movups(<8 x float> *%a0, <8 x float> *%a1) {
 ; SANDY-LABEL: test_movups:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vmovups (%rdi), %xmm0 # sched: [4:0.50]
-; SANDY-NEXT:    vinsertf128 $1, 16(%rdi), %ymm0, %ymm0 # sched: [5:1.00]
+; SANDY-NEXT:    vmovups (%rdi), %xmm0 # sched: [6:0.50]
+; SANDY-NEXT:    vinsertf128 $1, 16(%rdi), %ymm0, %ymm0 # sched: [7:1.00]
 ; SANDY-NEXT:    vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT:    vextractf128 $1, %ymm0, 16(%rsi) # sched: [1:1.00]
-; SANDY-NEXT:    vmovups %xmm0, (%rsi) # sched: [1:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vextractf128 $1, %ymm0, 16(%rsi) # sched: [5:1.00]
+; SANDY-NEXT:    vmovups %xmm0, (%rsi) # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_movups:
 ; HASWELL:       # BB#0:
@@ -1708,8 +1708,8 @@ define <4 x double> @test_mulpd(<4 x dou
 ; SANDY-LABEL: test_mulpd:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vmulpd %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
-; SANDY-NEXT:    vmulpd (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vmulpd (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_mulpd:
 ; HASWELL:       # BB#0:
@@ -1738,8 +1738,8 @@ define <8 x float> @test_mulps(<8 x floa
 ; SANDY-LABEL: test_mulps:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
-; SANDY-NEXT:    vmulps (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vmulps (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_mulps:
 ; HASWELL:       # BB#0:
@@ -1767,10 +1767,10 @@ define <8 x float> @test_mulps(<8 x floa
 define <4 x double> @orpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) {
 ; SANDY-LABEL: orpd:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SANDY-NEXT:    vorpd (%rdi), %ymm0, %ymm0 # sched: [5:0.50]
+; SANDY-NEXT:    vorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; SANDY-NEXT:    vorpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
 ; SANDY-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: orpd:
 ; HASWELL:       # BB#0:
@@ -1806,10 +1806,10 @@ define <4 x double> @orpd(<4 x double> %
 define <8 x float> @test_orps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) {
 ; SANDY-LABEL: test_orps:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vorps %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SANDY-NEXT:    vorps (%rdi), %ymm0, %ymm0 # sched: [5:0.50]
+; SANDY-NEXT:    vorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; SANDY-NEXT:    vorps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
 ; SANDY-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_orps:
 ; HASWELL:       # BB#0:
@@ -1846,9 +1846,9 @@ define <2 x double> @test_permilpd(<2 x
 ; SANDY-LABEL: test_permilpd:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0] sched: [1:1.00]
-; SANDY-NEXT:    vpermilpd {{.*#+}} xmm1 = mem[1,0] sched: [5:1.00]
+; SANDY-NEXT:    vpermilpd {{.*#+}} xmm1 = mem[1,0] sched: [7:1.00]
 ; SANDY-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_permilpd:
 ; HASWELL:       # BB#0:
@@ -1880,10 +1880,10 @@ define <2 x double> @test_permilpd(<2 x
 define <4 x double> @test_permilpd_ymm(<4 x double> %a0, <4 x double> *%a1) {
 ; SANDY-LABEL: test_permilpd_ymm:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3] sched: [1:1.00]
+; SANDY-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3] sched: [8:1.00]
 ; SANDY-NEXT:    vpermilpd {{.*#+}} ymm1 = mem[1,0,2,3] sched: [5:1.00]
 ; SANDY-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_permilpd_ymm:
 ; HASWELL:       # BB#0:
@@ -1916,9 +1916,9 @@ define <4 x float> @test_permilps(<4 x f
 ; SANDY-LABEL: test_permilps:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] sched: [1:1.00]
-; SANDY-NEXT:    vpermilps {{.*#+}} xmm1 = mem[3,2,1,0] sched: [5:1.00]
+; SANDY-NEXT:    vpermilps {{.*#+}} xmm1 = mem[3,2,1,0] sched: [7:1.00]
 ; SANDY-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_permilps:
 ; HASWELL:       # BB#0:
@@ -1950,10 +1950,10 @@ define <4 x float> @test_permilps(<4 x f
 define <8 x float> @test_permilps_ymm(<8 x float> %a0, <8 x float> *%a1) {
 ; SANDY-LABEL: test_permilps_ymm:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00]
+; SANDY-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [8:1.00]
 ; SANDY-NEXT:    vpermilps {{.*#+}} ymm1 = mem[3,2,1,0,7,6,5,4] sched: [5:1.00]
 ; SANDY-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_permilps_ymm:
 ; HASWELL:       # BB#0:
@@ -1986,8 +1986,8 @@ define <2 x double> @test_permilvarpd(<2
 ; SANDY-LABEL: test_permilvarpd:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vpermilpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; SANDY-NEXT:    vpermilpd (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vpermilpd (%rdi), %xmm0, %xmm0 # sched: [1:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_permilvarpd:
 ; HASWELL:       # BB#0:
@@ -2018,7 +2018,7 @@ define <4 x double> @test_permilvarpd_ym
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vpermilpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
 ; SANDY-NEXT:    vpermilpd (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_permilvarpd_ymm:
 ; HASWELL:       # BB#0:
@@ -2048,8 +2048,8 @@ define <4 x float> @test_permilvarps(<4
 ; SANDY-LABEL: test_permilvarps:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vpermilps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; SANDY-NEXT:    vpermilps (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vpermilps (%rdi), %xmm0, %xmm0 # sched: [1:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_permilvarps:
 ; HASWELL:       # BB#0:
@@ -2080,7 +2080,7 @@ define <8 x float> @test_permilvarps_ymm
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vpermilps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
 ; SANDY-NEXT:    vpermilps (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_permilvarps_ymm:
 ; HASWELL:       # BB#0:
@@ -2112,7 +2112,7 @@ define <8 x float> @test_rcpps(<8 x floa
 ; SANDY-NEXT:    vrcpps %ymm0, %ymm0 # sched: [5:1.00]
 ; SANDY-NEXT:    vrcpps (%rdi), %ymm1 # sched: [9:1.00]
 ; SANDY-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_rcpps:
 ; HASWELL:       # BB#0:
@@ -2148,7 +2148,7 @@ define <4 x double> @test_roundpd(<4 x d
 ; SANDY-NEXT:    vroundpd $7, %ymm0, %ymm0 # sched: [3:1.00]
 ; SANDY-NEXT:    vroundpd $7, (%rdi), %ymm1 # sched: [7:1.00]
 ; SANDY-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_roundpd:
 ; HASWELL:       # BB#0:
@@ -2184,7 +2184,7 @@ define <8 x float> @test_roundps(<8 x fl
 ; SANDY-NEXT:    vroundps $7, %ymm0, %ymm0 # sched: [3:1.00]
 ; SANDY-NEXT:    vroundps $7, (%rdi), %ymm1 # sched: [7:1.00]
 ; SANDY-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_roundps:
 ; HASWELL:       # BB#0:
@@ -2217,10 +2217,10 @@ declare <8 x float> @llvm.x86.avx.round.
 define <8 x float> @test_rsqrtps(<8 x float> %a0, <8 x float> *%a1) {
 ; SANDY-LABEL: test_rsqrtps:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vrsqrtps %ymm0, %ymm0 # sched: [5:1.00]
-; SANDY-NEXT:    vrsqrtps (%rdi), %ymm1 # sched: [9:1.00]
+; SANDY-NEXT:    vrsqrtps (%rdi), %ymm1 # sched: [14:3.00]
+; SANDY-NEXT:    vrsqrtps %ymm0, %ymm0 # sched: [7:3.00]
 ; SANDY-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_rsqrtps:
 ; HASWELL:       # BB#0:
@@ -2254,9 +2254,9 @@ define <4 x double> @test_shufpd(<4 x do
 ; SANDY-LABEL: test_shufpd:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[0],ymm0[2],ymm1[3] sched: [1:1.00]
-; SANDY-NEXT:    vshufpd {{.*#+}} ymm1 = ymm1[1],mem[0],ymm1[2],mem[3] sched: [5:1.00]
+; SANDY-NEXT:    vshufpd {{.*#+}} ymm1 = ymm1[1],mem[0],ymm1[2],mem[3] sched: [8:1.00]
 ; SANDY-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_shufpd:
 ; HASWELL:       # BB#0:
@@ -2289,8 +2289,8 @@ define <8 x float> @test_shufps(<8 x flo
 ; SANDY-LABEL: test_shufps:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4] sched: [1:1.00]
-; SANDY-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,3],mem[0,0],ymm0[4,7],mem[4,4] sched: [5:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,3],mem[0,0],ymm0[4,7],mem[4,4] sched: [8:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_shufps:
 ; HASWELL:       # BB#0:
@@ -2318,10 +2318,10 @@ define <8 x float> @test_shufps(<8 x flo
 define <4 x double> @test_sqrtpd(<4 x double> %a0, <4 x double> *%a1) {
 ; SANDY-LABEL: test_sqrtpd:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vsqrtpd %ymm0, %ymm0 # sched: [15:1.00]
-; SANDY-NEXT:    vsqrtpd (%rdi), %ymm1 # sched: [19:1.00]
+; SANDY-NEXT:    vsqrtpd (%rdi), %ymm1 # sched: [52:3.00]
+; SANDY-NEXT:    vsqrtpd %ymm0, %ymm0 # sched: [45:3.00]
 ; SANDY-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_sqrtpd:
 ; HASWELL:       # BB#0:
@@ -2354,10 +2354,10 @@ declare <4 x double> @llvm.x86.avx.sqrt.
 define <8 x float> @test_sqrtps(<8 x float> %a0, <8 x float> *%a1) {
 ; SANDY-LABEL: test_sqrtps:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vsqrtps %ymm0, %ymm0 # sched: [15:1.00]
-; SANDY-NEXT:    vsqrtps (%rdi), %ymm1 # sched: [19:1.00]
+; SANDY-NEXT:    vsqrtps (%rdi), %ymm1 # sched: [36:3.00]
+; SANDY-NEXT:    vsqrtps %ymm0, %ymm0 # sched: [29:3.00]
 ; SANDY-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_sqrtps:
 ; HASWELL:       # BB#0:
@@ -2391,8 +2391,8 @@ define <4 x double> @test_subpd(<4 x dou
 ; SANDY-LABEL: test_subpd:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT:    vsubpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vsubpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_subpd:
 ; HASWELL:       # BB#0:
@@ -2421,8 +2421,8 @@ define <8 x float> @test_subps(<8 x floa
 ; SANDY-LABEL: test_subps:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT:    vsubps (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vsubps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_subps:
 ; HASWELL:       # BB#0:
@@ -2451,11 +2451,11 @@ define i32 @test_testpd(<2 x double> %a0
 ; SANDY-LABEL: test_testpd:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    xorl %eax, %eax # sched: [1:0.33]
-; SANDY-NEXT:    vtestpd %xmm1, %xmm0 # sched: [1:0.33]
-; SANDY-NEXT:    setb %al # sched: [1:0.33]
-; SANDY-NEXT:    vtestpd (%rdi), %xmm0 # sched: [5:0.50]
+; SANDY-NEXT:    vtestpd %xmm1, %xmm0 # sched: [1:1.00]
+; SANDY-NEXT:    setb %al # sched: [1:1.00]
+; SANDY-NEXT:    vtestpd (%rdi), %xmm0 # sched: [7:1.00]
 ; SANDY-NEXT:    adcl $0, %eax # sched: [1:0.33]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_testpd:
 ; HASWELL:       # BB#0:
@@ -2495,12 +2495,12 @@ define i32 @test_testpd_ymm(<4 x double>
 ; SANDY-LABEL: test_testpd_ymm:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    xorl %eax, %eax # sched: [1:0.33]
-; SANDY-NEXT:    vtestpd %ymm1, %ymm0 # sched: [1:0.33]
-; SANDY-NEXT:    setb %al # sched: [1:0.33]
-; SANDY-NEXT:    vtestpd (%rdi), %ymm0 # sched: [5:0.50]
+; SANDY-NEXT:    vtestpd %ymm1, %ymm0 # sched: [1:1.00]
+; SANDY-NEXT:    setb %al # sched: [1:1.00]
+; SANDY-NEXT:    vtestpd (%rdi), %ymm0 # sched: [8:1.00]
 ; SANDY-NEXT:    adcl $0, %eax # sched: [1:0.33]
 ; SANDY-NEXT:    vzeroupper # sched: [?:0.000000e+00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_testpd_ymm:
 ; HASWELL:       # BB#0:
@@ -2542,11 +2542,11 @@ define i32 @test_testps(<4 x float> %a0,
 ; SANDY-LABEL: test_testps:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    xorl %eax, %eax # sched: [1:0.33]
-; SANDY-NEXT:    vtestps %xmm1, %xmm0 # sched: [1:0.33]
-; SANDY-NEXT:    setb %al # sched: [1:0.33]
-; SANDY-NEXT:    vtestps (%rdi), %xmm0 # sched: [5:0.50]
+; SANDY-NEXT:    vtestps %xmm1, %xmm0 # sched: [1:1.00]
+; SANDY-NEXT:    setb %al # sched: [1:1.00]
+; SANDY-NEXT:    vtestps (%rdi), %xmm0 # sched: [7:1.00]
 ; SANDY-NEXT:    adcl $0, %eax # sched: [1:0.33]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_testps:
 ; HASWELL:       # BB#0:
@@ -2586,12 +2586,12 @@ define i32 @test_testps_ymm(<8 x float>
 ; SANDY-LABEL: test_testps_ymm:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    xorl %eax, %eax # sched: [1:0.33]
-; SANDY-NEXT:    vtestps %ymm1, %ymm0 # sched: [1:0.33]
-; SANDY-NEXT:    setb %al # sched: [1:0.33]
-; SANDY-NEXT:    vtestps (%rdi), %ymm0 # sched: [5:0.50]
+; SANDY-NEXT:    vtestps %ymm1, %ymm0 # sched: [1:1.00]
+; SANDY-NEXT:    setb %al # sched: [1:1.00]
+; SANDY-NEXT:    vtestps (%rdi), %ymm0 # sched: [8:1.00]
 ; SANDY-NEXT:    adcl $0, %eax # sched: [1:0.33]
 ; SANDY-NEXT:    vzeroupper # sched: [?:0.000000e+00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_testps_ymm:
 ; HASWELL:       # BB#0:
@@ -2635,7 +2635,7 @@ define <4 x double> @test_unpckhpd(<4 x
 ; SANDY-NEXT:    vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00]
 ; SANDY-NEXT:    vunpckhpd {{.*#+}} ymm1 = ymm1[1],mem[1],ymm1[3],mem[3] sched: [5:1.00]
 ; SANDY-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_unpckhpd:
 ; HASWELL:       # BB#0:
@@ -2669,7 +2669,7 @@ define <8 x float> @test_unpckhps(<8 x f
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00]
 ; SANDY-NEXT:    vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [5:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_unpckhps:
 ; HASWELL:       # BB#0:
@@ -2698,9 +2698,9 @@ define <4 x double> @test_unpcklpd(<4 x
 ; SANDY-LABEL: test_unpcklpd:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00]
-; SANDY-NEXT:    vunpcklpd {{.*#+}} ymm1 = ymm1[0],mem[0],ymm1[2],mem[2] sched: [5:1.00]
+; SANDY-NEXT:    vunpcklpd {{.*#+}} ymm1 = ymm1[0],mem[0],ymm1[2],mem[2] sched: [8:1.00]
 ; SANDY-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_unpcklpd:
 ; HASWELL:       # BB#0:
@@ -2733,8 +2733,8 @@ define <8 x float> @test_unpcklps(<8 x f
 ; SANDY-LABEL: test_unpcklps:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00]
-; SANDY-NEXT:    vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [5:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_unpcklps:
 ; HASWELL:       # BB#0:
@@ -2762,10 +2762,10 @@ define <8 x float> @test_unpcklps(<8 x f
 define <4 x double> @test_xorpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) {
 ; SANDY-LABEL: test_xorpd:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vxorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SANDY-NEXT:    vxorpd (%rdi), %ymm0, %ymm0 # sched: [5:0.50]
+; SANDY-NEXT:    vxorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; SANDY-NEXT:    vxorpd (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
 ; SANDY-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_xorpd:
 ; HASWELL:       # BB#0:
@@ -2801,10 +2801,10 @@ define <4 x double> @test_xorpd(<4 x dou
 define <8 x float> @test_xorps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) {
 ; SANDY-LABEL: test_xorps:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vxorps %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SANDY-NEXT:    vxorps (%rdi), %ymm0, %ymm0 # sched: [5:0.50]
+; SANDY-NEXT:    vxorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; SANDY-NEXT:    vxorps (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
 ; SANDY-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_xorps:
 ; HASWELL:       # BB#0:
@@ -2841,7 +2841,7 @@ define void @test_zeroall() {
 ; SANDY-LABEL: test_zeroall:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vzeroall # sched: [?:0.000000e+00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_zeroall:
 ; HASWELL:       # BB#0:
@@ -2866,7 +2866,7 @@ define void @test_zeroupper() {
 ; SANDY-LABEL: test_zeroupper:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vzeroupper # sched: [?:0.000000e+00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_zeroupper:
 ; HASWELL:       # BB#0:

Modified: llvm/trunk/test/CodeGen/X86/extractelement-legalization-store-ordering.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/extractelement-legalization-store-ordering.ll?rev=307529&r1=307528&r2=307529&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/extractelement-legalization-store-ordering.ll (original)
+++ llvm/trunk/test/CodeGen/X86/extractelement-legalization-store-ordering.ll Mon Jul 10 02:53:16 2017
@@ -15,18 +15,18 @@ define void @test_extractelement_legaliz
 ; CHECK-NEXT:    pushl %esi
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; CHECK-NEXT:    paddd (%ecx), %xmm0
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; CHECK-NEXT:    paddd (%edx), %xmm0
-; CHECK-NEXT:    movdqa %xmm0, (%edx)
-; CHECK-NEXT:    movl (%edx), %esi
-; CHECK-NEXT:    movl 4(%edx), %edi
-; CHECK-NEXT:    shll $4, %ecx
-; CHECK-NEXT:    movl 8(%edx), %ebx
-; CHECK-NEXT:    movl 12(%edx), %edx
-; CHECK-NEXT:    movl %esi, 12(%eax,%ecx)
-; CHECK-NEXT:    movl %edi, (%eax,%ecx)
-; CHECK-NEXT:    movl %ebx, 8(%eax,%ecx)
-; CHECK-NEXT:    movl %edx, 4(%eax,%ecx)
+; CHECK-NEXT:    movdqa %xmm0, (%ecx)
+; CHECK-NEXT:    movl (%ecx), %esi
+; CHECK-NEXT:    movl 4(%ecx), %edi
+; CHECK-NEXT:    shll $4, %edx
+; CHECK-NEXT:    movl 8(%ecx), %ebx
+; CHECK-NEXT:    movl 12(%ecx), %ecx
+; CHECK-NEXT:    movl %esi, 12(%eax,%edx)
+; CHECK-NEXT:    movl %edi, (%eax,%edx)
+; CHECK-NEXT:    movl %ebx, 8(%eax,%edx)
+; CHECK-NEXT:    movl %ecx, 4(%eax,%edx)
 ; CHECK-NEXT:    popl %esi
 ; CHECK-NEXT:    popl %edi
 ; CHECK-NEXT:    popl %ebx

Modified: llvm/trunk/test/CodeGen/X86/fp128-i128.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fp128-i128.ll?rev=307529&r1=307528&r2=307529&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fp128-i128.ll (original)
+++ llvm/trunk/test/CodeGen/X86/fp128-i128.ll Mon Jul 10 02:53:16 2017
@@ -50,8 +50,8 @@ define void @TestUnionLD1(fp128 %s, i64
 ; CHECK-NEXT:    andq %rdi, %rcx
 ; CHECK-NEXT:    movabsq $-281474976710656, %rdx # imm = 0xFFFF000000000000
 ; CHECK-NEXT:    andq -{{[0-9]+}}(%rsp), %rdx
-; CHECK-NEXT:    orq %rcx, %rdx
 ; CHECK-NEXT:    movq %rax, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT:    orq %rcx, %rdx
 ; CHECK-NEXT:    movq %rdx, -{{[0-9]+}}(%rsp)
 ; CHECK-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
 ; CHECK-NEXT:    jmp foo # TAILCALL

Modified: llvm/trunk/test/CodeGen/X86/gather-addresses.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/gather-addresses.ll?rev=307529&r1=307528&r2=307529&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/gather-addresses.ll (original)
+++ llvm/trunk/test/CodeGen/X86/gather-addresses.ll Mon Jul 10 02:53:16 2017
@@ -16,10 +16,10 @@
 ; LIN: sarq    $32, %r[[REG2]]
 ; LIN: movslq	%e[[REG4]], %r[[REG3:.+]]
 ; LIN: sarq    $32, %r[[REG4]]
-; LIN: movsd	(%rdi,%r[[REG1]],8), %xmm0
-; LIN: movhpd	(%rdi,%r[[REG2]],8), %xmm0
-; LIN: movsd	(%rdi,%r[[REG3]],8), %xmm1
-; LIN: movhpd	(%rdi,%r[[REG4]],8), %xmm1
+; LIN: movsd    (%rdi,%r[[REG3]],8), %xmm1
+; LIN: movhpd   (%rdi,%r[[REG4]],8), %xmm1 
+; LIN: movq     %rdi, %xmm1 
+; LIN: movq     %r[[REG3]], %xmm0
 
 ; WIN: movdqa	(%rdx), %xmm0
 ; WIN: pand 	(%r8), %xmm0
@@ -29,10 +29,10 @@
 ; WIN: sarq    $32, %r[[REG2]]
 ; WIN: movslq	%e[[REG4]], %r[[REG3:.+]]
 ; WIN: sarq    $32, %r[[REG4]]
-; WIN: movsd	(%rcx,%r[[REG1]],8), %xmm0
-; WIN: movhpd	(%rcx,%r[[REG2]],8), %xmm0
-; WIN: movsd	(%rcx,%r[[REG3]],8), %xmm1
-; WIN: movhpd	(%rcx,%r[[REG4]],8), %xmm1
+; WIN: movsd    (%rcx,%r[[REG3]],8), %xmm1
+; WIN: movhpd   (%rcx,%r[[REG4]],8), %xmm1
+; WIN: movdqa   (%r[[REG2]]), %xmm0
+; WIN: movq     %r[[REG2]], %xmm1
 
 define <4 x double> @foo(double* %p, <4 x i32>* %i, <4 x i32>* %h) nounwind {
   %a = load <4 x i32>, <4 x i32>* %i

Modified: llvm/trunk/test/CodeGen/X86/recip-fastmath.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/recip-fastmath.ll?rev=307529&r1=307528&r2=307529&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/recip-fastmath.ll (original)
+++ llvm/trunk/test/CodeGen/X86/recip-fastmath.ll Mon Jul 10 02:53:16 2017
@@ -45,9 +45,9 @@ define float @f32_no_estimate(float %x)
 ;
 ; SANDY-LABEL: f32_no_estimate:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [4:0.50]
-; SANDY-NEXT:    vdivss %xmm0, %xmm1, %xmm0 # sched: [12:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50]
+; SANDY-NEXT:    vdivss %xmm0, %xmm1, %xmm0 # sched: [14:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: f32_no_estimate:
 ; HASWELL:       # BB#0:
@@ -113,11 +113,11 @@ define float @f32_one_step(float %x) #1
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
 ; SANDY-NEXT:    vmulss %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [4:0.50]
+; SANDY-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [6:0.50]
 ; SANDY-NEXT:    vsubss %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
 ; SANDY-NEXT:    vmulss %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
 ; SANDY-NEXT:    vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: f32_one_step:
 ; HASWELL:       # BB#0:
@@ -207,7 +207,7 @@ define float @f32_two_step(float %x) #2
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
 ; SANDY-NEXT:    vmulss %xmm1, %xmm0, %xmm2 # sched: [5:1.00]
-; SANDY-NEXT:    vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero sched: [4:0.50]
+; SANDY-NEXT:    vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero sched: [6:0.50]
 ; SANDY-NEXT:    vsubss %xmm2, %xmm3, %xmm2 # sched: [3:1.00]
 ; SANDY-NEXT:    vmulss %xmm2, %xmm1, %xmm2 # sched: [5:1.00]
 ; SANDY-NEXT:    vaddss %xmm2, %xmm1, %xmm1 # sched: [3:1.00]
@@ -215,7 +215,7 @@ define float @f32_two_step(float %x) #2
 ; SANDY-NEXT:    vsubss %xmm0, %xmm3, %xmm0 # sched: [3:1.00]
 ; SANDY-NEXT:    vmulss %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
 ; SANDY-NEXT:    vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: f32_two_step:
 ; HASWELL:       # BB#0:
@@ -284,9 +284,9 @@ define <4 x float> @v4f32_no_estimate(<4
 ;
 ; SANDY-LABEL: v4f32_no_estimate:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vmovaps {{.*#+}} xmm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50]
-; SANDY-NEXT:    vdivps %xmm0, %xmm1, %xmm0 # sched: [12:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vmovaps {{.*#+}} xmm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [6:0.50]
+; SANDY-NEXT:    vdivps %xmm0, %xmm1, %xmm0 # sched: [14:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: v4f32_no_estimate:
 ; HASWELL:       # BB#0:
@@ -350,13 +350,13 @@ define <4 x float> @v4f32_one_step(<4 x
 ;
 ; SANDY-LABEL: v4f32_one_step:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vrcpps %xmm0, %xmm1 # sched: [5:1.00]
+; SANDY-NEXT:    vrcpps %xmm0, %xmm1 # sched: [7:3.00]
 ; SANDY-NEXT:    vmulps %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT:    vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50]
+; SANDY-NEXT:    vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [6:0.50]
 ; SANDY-NEXT:    vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
 ; SANDY-NEXT:    vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
 ; SANDY-NEXT:    vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: v4f32_one_step:
 ; HASWELL:       # BB#0:
@@ -453,9 +453,9 @@ define <4 x float> @v4f32_two_step(<4 x
 ;
 ; SANDY-LABEL: v4f32_two_step:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vrcpps %xmm0, %xmm1 # sched: [5:1.00]
+; SANDY-NEXT:    vrcpps %xmm0, %xmm1 # sched: [7:3.00]
 ; SANDY-NEXT:    vmulps %xmm1, %xmm0, %xmm2 # sched: [5:1.00]
-; SANDY-NEXT:    vmovaps {{.*#+}} xmm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50]
+; SANDY-NEXT:    vmovaps {{.*#+}} xmm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [6:0.50]
 ; SANDY-NEXT:    vsubps %xmm2, %xmm3, %xmm2 # sched: [3:1.00]
 ; SANDY-NEXT:    vmulps %xmm2, %xmm1, %xmm2 # sched: [5:1.00]
 ; SANDY-NEXT:    vaddps %xmm2, %xmm1, %xmm1 # sched: [3:1.00]
@@ -463,7 +463,7 @@ define <4 x float> @v4f32_two_step(<4 x
 ; SANDY-NEXT:    vsubps %xmm0, %xmm3, %xmm0 # sched: [3:1.00]
 ; SANDY-NEXT:    vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
 ; SANDY-NEXT:    vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: v4f32_two_step:
 ; HASWELL:       # BB#0:
@@ -546,9 +546,9 @@ define <8 x float> @v8f32_no_estimate(<8
 ;
 ; SANDY-LABEL: v8f32_no_estimate:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vmovaps {{.*#+}} ymm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50]
-; SANDY-NEXT:    vdivps %ymm0, %ymm1, %ymm0 # sched: [12:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vmovaps {{.*#+}} ymm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [7:0.50]
+; SANDY-NEXT:    vdivps %ymm0, %ymm1, %ymm0 # sched: [29:3.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: v8f32_no_estimate:
 ; HASWELL:       # BB#0:
@@ -621,11 +621,11 @@ define <8 x float> @v8f32_one_step(<8 x
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vrcpps %ymm0, %ymm1 # sched: [5:1.00]
 ; SANDY-NEXT:    vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
-; SANDY-NEXT:    vmovaps {{.*#+}} ymm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50]
+; SANDY-NEXT:    vmovaps {{.*#+}} ymm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [7:0.50]
 ; SANDY-NEXT:    vsubps %ymm0, %ymm2, %ymm0 # sched: [3:1.00]
 ; SANDY-NEXT:    vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
 ; SANDY-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: v8f32_one_step:
 ; HASWELL:       # BB#0:
@@ -737,7 +737,7 @@ define <8 x float> @v8f32_two_step(<8 x
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vrcpps %ymm0, %ymm1 # sched: [5:1.00]
 ; SANDY-NEXT:    vmulps %ymm1, %ymm0, %ymm2 # sched: [5:1.00]
-; SANDY-NEXT:    vmovaps {{.*#+}} ymm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50]
+; SANDY-NEXT:    vmovaps {{.*#+}} ymm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [7:0.50]
 ; SANDY-NEXT:    vsubps %ymm2, %ymm3, %ymm2 # sched: [3:1.00]
 ; SANDY-NEXT:    vmulps %ymm2, %ymm1, %ymm2 # sched: [5:1.00]
 ; SANDY-NEXT:    vaddps %ymm2, %ymm1, %ymm1 # sched: [3:1.00]
@@ -745,7 +745,7 @@ define <8 x float> @v8f32_two_step(<8 x
 ; SANDY-NEXT:    vsubps %ymm0, %ymm3, %ymm0 # sched: [3:1.00]
 ; SANDY-NEXT:    vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
 ; SANDY-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: v8f32_two_step:
 ; HASWELL:       # BB#0:

Modified: llvm/trunk/test/CodeGen/X86/recip-fastmath2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/recip-fastmath2.ll?rev=307529&r1=307528&r2=307529&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/recip-fastmath2.ll (original)
+++ llvm/trunk/test/CodeGen/X86/recip-fastmath2.ll Mon Jul 10 02:53:16 2017
@@ -39,8 +39,8 @@ define float @f32_no_step_2(float %x) #3
 ; SANDY-LABEL: f32_no_step_2:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vrcpss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT:    vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [11:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: f32_no_step_2:
 ; HASWELL:       # BB#0:
@@ -110,12 +110,12 @@ define float @f32_one_step_2(float %x) #
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
 ; SANDY-NEXT:    vmulss %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [4:0.50]
+; SANDY-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [6:0.50]
 ; SANDY-NEXT:    vsubss %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
 ; SANDY-NEXT:    vmulss %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
 ; SANDY-NEXT:    vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [11:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: f32_one_step_2:
 ; HASWELL:       # BB#0:
@@ -198,13 +198,13 @@ define float @f32_one_step_2_divs(float
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
 ; SANDY-NEXT:    vmulss %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [4:0.50]
+; SANDY-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [6:0.50]
 ; SANDY-NEXT:    vsubss %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
 ; SANDY-NEXT:    vmulss %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
 ; SANDY-NEXT:    vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    vmulss {{.*}}(%rip), %xmm0, %xmm1 # sched: [9:1.00]
+; SANDY-NEXT:    vmulss {{.*}}(%rip), %xmm0, %xmm1 # sched: [11:1.00]
 ; SANDY-NEXT:    vmulss %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: f32_one_step_2_divs:
 ; HASWELL:       # BB#0:
@@ -305,7 +305,7 @@ define float @f32_two_step_2(float %x) #
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
 ; SANDY-NEXT:    vmulss %xmm1, %xmm0, %xmm2 # sched: [5:1.00]
-; SANDY-NEXT:    vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero sched: [4:0.50]
+; SANDY-NEXT:    vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero sched: [6:0.50]
 ; SANDY-NEXT:    vsubss %xmm2, %xmm3, %xmm2 # sched: [3:1.00]
 ; SANDY-NEXT:    vmulss %xmm2, %xmm1, %xmm2 # sched: [5:1.00]
 ; SANDY-NEXT:    vaddss %xmm2, %xmm1, %xmm1 # sched: [3:1.00]
@@ -313,8 +313,8 @@ define float @f32_two_step_2(float %x) #
 ; SANDY-NEXT:    vsubss %xmm0, %xmm3, %xmm0 # sched: [3:1.00]
 ; SANDY-NEXT:    vmulss %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
 ; SANDY-NEXT:    vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [11:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: f32_two_step_2:
 ; HASWELL:       # BB#0:
@@ -403,14 +403,14 @@ define <4 x float> @v4f32_one_step2(<4 x
 ;
 ; SANDY-LABEL: v4f32_one_step2:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vrcpps %xmm0, %xmm1 # sched: [5:1.00]
+; SANDY-NEXT:    vrcpps %xmm0, %xmm1 # sched: [7:3.00]
 ; SANDY-NEXT:    vmulps %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT:    vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50]
+; SANDY-NEXT:    vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [6:0.50]
 ; SANDY-NEXT:    vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
 ; SANDY-NEXT:    vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
 ; SANDY-NEXT:    vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [11:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: v4f32_one_step2:
 ; HASWELL:       # BB#0:
@@ -501,15 +501,15 @@ define <4 x float> @v4f32_one_step_2_div
 ;
 ; SANDY-LABEL: v4f32_one_step_2_divs:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vrcpps %xmm0, %xmm1 # sched: [5:1.00]
+; SANDY-NEXT:    vrcpps %xmm0, %xmm1 # sched: [7:3.00]
 ; SANDY-NEXT:    vmulps %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT:    vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50]
+; SANDY-NEXT:    vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [6:0.50]
 ; SANDY-NEXT:    vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
 ; SANDY-NEXT:    vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
 ; SANDY-NEXT:    vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [9:1.00]
+; SANDY-NEXT:    vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [11:1.00]
 ; SANDY-NEXT:    vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: v4f32_one_step_2_divs:
 ; HASWELL:       # BB#0:
@@ -619,9 +619,9 @@ define <4 x float> @v4f32_two_step2(<4 x
 ;
 ; SANDY-LABEL: v4f32_two_step2:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vrcpps %xmm0, %xmm1 # sched: [5:1.00]
+; SANDY-NEXT:    vrcpps %xmm0, %xmm1 # sched: [7:3.00]
 ; SANDY-NEXT:    vmulps %xmm1, %xmm0, %xmm2 # sched: [5:1.00]
-; SANDY-NEXT:    vmovaps {{.*#+}} xmm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50]
+; SANDY-NEXT:    vmovaps {{.*#+}} xmm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [6:0.50]
 ; SANDY-NEXT:    vsubps %xmm2, %xmm3, %xmm2 # sched: [3:1.00]
 ; SANDY-NEXT:    vmulps %xmm2, %xmm1, %xmm2 # sched: [5:1.00]
 ; SANDY-NEXT:    vaddps %xmm2, %xmm1, %xmm1 # sched: [3:1.00]
@@ -629,8 +629,8 @@ define <4 x float> @v4f32_two_step2(<4 x
 ; SANDY-NEXT:    vsubps %xmm0, %xmm3, %xmm0 # sched: [3:1.00]
 ; SANDY-NEXT:    vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
 ; SANDY-NEXT:    vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [11:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: v4f32_two_step2:
 ; HASWELL:       # BB#0:
@@ -741,12 +741,12 @@ define <8 x float> @v8f32_one_step2(<8 x
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vrcpps %ymm0, %ymm1 # sched: [5:1.00]
 ; SANDY-NEXT:    vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
-; SANDY-NEXT:    vmovaps {{.*#+}} ymm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50]
+; SANDY-NEXT:    vmovaps {{.*#+}} ymm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [7:0.50]
 ; SANDY-NEXT:    vsubps %ymm0, %ymm2, %ymm0 # sched: [3:1.00]
 ; SANDY-NEXT:    vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
 ; SANDY-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT:    vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: v8f32_one_step2:
 ; HASWELL:       # BB#0:
@@ -848,13 +848,13 @@ define <8 x float> @v8f32_one_step_2_div
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vrcpps %ymm0, %ymm1 # sched: [5:1.00]
 ; SANDY-NEXT:    vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
-; SANDY-NEXT:    vmovaps {{.*#+}} ymm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50]
+; SANDY-NEXT:    vmovaps {{.*#+}} ymm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [7:0.50]
 ; SANDY-NEXT:    vsubps %ymm0, %ymm2, %ymm0 # sched: [3:1.00]
 ; SANDY-NEXT:    vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
 ; SANDY-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT:    vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [9:1.00]
+; SANDY-NEXT:    vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [12:1.00]
 ; SANDY-NEXT:    vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: v8f32_one_step_2_divs:
 ; HASWELL:       # BB#0:
@@ -980,7 +980,7 @@ define <8 x float> @v8f32_two_step2(<8 x
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vrcpps %ymm0, %ymm1 # sched: [5:1.00]
 ; SANDY-NEXT:    vmulps %ymm1, %ymm0, %ymm2 # sched: [5:1.00]
-; SANDY-NEXT:    vmovaps {{.*#+}} ymm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50]
+; SANDY-NEXT:    vmovaps {{.*#+}} ymm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [7:0.50]
 ; SANDY-NEXT:    vsubps %ymm2, %ymm3, %ymm2 # sched: [3:1.00]
 ; SANDY-NEXT:    vmulps %ymm2, %ymm1, %ymm2 # sched: [5:1.00]
 ; SANDY-NEXT:    vaddps %ymm2, %ymm1, %ymm1 # sched: [3:1.00]
@@ -988,8 +988,8 @@ define <8 x float> @v8f32_two_step2(<8 x
 ; SANDY-NEXT:    vsubps %ymm0, %ymm3, %ymm0 # sched: [3:1.00]
 ; SANDY-NEXT:    vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
 ; SANDY-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT:    vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: v8f32_two_step2:
 ; HASWELL:       # BB#0:
@@ -1070,7 +1070,7 @@ define <8 x float> @v8f32_no_step(<8 x f
 ; SANDY-LABEL: v8f32_no_step:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vrcpps %ymm0, %ymm0 # sched: [5:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: v8f32_no_step:
 ; HASWELL:       # BB#0:
@@ -1125,8 +1125,8 @@ define <8 x float> @v8f32_no_step2(<8 x
 ; SANDY-LABEL: v8f32_no_step2:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vrcpps %ymm0, %ymm0 # sched: [5:1.00]
-; SANDY-NEXT:    vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: v8f32_no_step2:
 ; HASWELL:       # BB#0:

Modified: llvm/trunk/test/CodeGen/X86/sse-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse-schedule.ll?rev=307529&r1=307528&r2=307529&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse-schedule.ll Mon Jul 10 02:53:16 2017
@@ -31,8 +31,8 @@ define <4 x float> @test_addps(<4 x floa
 ; SANDY-LABEL: test_addps:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    vaddps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vaddps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_addps:
 ; HASWELL:       # BB#0:
@@ -73,8 +73,8 @@ define float @test_addss(float %a0, floa
 ; SANDY-LABEL: test_addss:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    vaddss (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vaddss (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_addss:
 ; HASWELL:       # BB#0:
@@ -122,9 +122,9 @@ define <4 x float> @test_andps(<4 x floa
 ;
 ; SANDY-LABEL: test_andps:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vandps %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SANDY-NEXT:    vandps (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vandps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; SANDY-NEXT:    vandps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_andps:
 ; HASWELL:       # BB#0:
@@ -176,9 +176,9 @@ define <4 x float> @test_andnotps(<4 x f
 ;
 ; SANDY-LABEL: test_andnotps:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vandnps %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SANDY-NEXT:    vandnps (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vandnps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; SANDY-NEXT:    vandnps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_andnotps:
 ; HASWELL:       # BB#0:
@@ -228,9 +228,9 @@ define <4 x float> @test_cmpps(<4 x floa
 ; SANDY-LABEL: test_cmpps:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
-; SANDY-NEXT:    vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; SANDY-NEXT:    vorps %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
+; SANDY-NEXT:    vorps %xmm0, %xmm1, %xmm0 # sched: [1:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_cmpps:
 ; HASWELL:       # BB#0:
@@ -277,7 +277,7 @@ define float @test_cmpss(float %a0, floa
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; SANDY-NEXT:    vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_cmpss:
 ; HASWELL:       # BB#0:
@@ -347,16 +347,16 @@ define i32 @test_comiss(<4 x float> %a0,
 ; SANDY-LABEL: test_comiss:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vcomiss %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    setnp %al # sched: [1:0.33]
-; SANDY-NEXT:    sete %cl # sched: [1:0.33]
+; SANDY-NEXT:    setnp %al # sched: [1:1.00]
+; SANDY-NEXT:    sete %cl # sched: [1:1.00]
 ; SANDY-NEXT:    andb %al, %cl # sched: [1:0.33]
 ; SANDY-NEXT:    vcomiss (%rdi), %xmm0 # sched: [7:1.00]
-; SANDY-NEXT:    setnp %al # sched: [1:0.33]
-; SANDY-NEXT:    sete %dl # sched: [1:0.33]
+; SANDY-NEXT:    setnp %al # sched: [1:1.00]
+; SANDY-NEXT:    sete %dl # sched: [1:1.00]
 ; SANDY-NEXT:    andb %al, %dl # sched: [1:0.33]
 ; SANDY-NEXT:    orb %cl, %dl # sched: [1:0.33]
 ; SANDY-NEXT:    movzbl %dl, %eax # sched: [1:0.33]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_comiss:
 ; HASWELL:       # BB#0:
@@ -417,10 +417,10 @@ define float @test_cvtsi2ss(i32 %a0, i32
 ;
 ; SANDY-LABEL: test_cvtsi2ss:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [4:1.00]
-; SANDY-NEXT:    vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [8:1.00]
+; SANDY-NEXT:    vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [5:2.00]
+; SANDY-NEXT:    vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [10:1.00]
 ; SANDY-NEXT:    vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_cvtsi2ss:
 ; HASWELL:       # BB#0:
@@ -466,10 +466,10 @@ define float @test_cvtsi2ssq(i64 %a0, i6
 ;
 ; SANDY-LABEL: test_cvtsi2ssq:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [4:1.00]
-; SANDY-NEXT:    vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [8:1.00]
+; SANDY-NEXT:    vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [5:2.00]
+; SANDY-NEXT:    vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [10:1.00]
 ; SANDY-NEXT:    vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_cvtsi2ssq:
 ; HASWELL:       # BB#0:
@@ -515,10 +515,10 @@ define i32 @test_cvtss2si(float %a0, flo
 ;
 ; SANDY-LABEL: test_cvtss2si:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vcvtss2si %xmm0, %ecx # sched: [3:1.00]
-; SANDY-NEXT:    vcvtss2si (%rdi), %eax # sched: [7:1.00]
+; SANDY-NEXT:    vcvtss2si %xmm0, %ecx # sched: [5:1.00]
+; SANDY-NEXT:    vcvtss2si (%rdi), %eax # sched: [10:1.00]
 ; SANDY-NEXT:    addl %ecx, %eax # sched: [1:0.33]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_cvtss2si:
 ; HASWELL:       # BB#0:
@@ -567,10 +567,10 @@ define i64 @test_cvtss2siq(float %a0, fl
 ;
 ; SANDY-LABEL: test_cvtss2siq:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vcvtss2si %xmm0, %rcx # sched: [3:1.00]
-; SANDY-NEXT:    vcvtss2si (%rdi), %rax # sched: [7:1.00]
+; SANDY-NEXT:    vcvtss2si %xmm0, %rcx # sched: [5:1.00]
+; SANDY-NEXT:    vcvtss2si (%rdi), %rax # sched: [10:1.00]
 ; SANDY-NEXT:    addq %rcx, %rax # sched: [1:0.33]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_cvtss2siq:
 ; HASWELL:       # BB#0:
@@ -619,10 +619,10 @@ define i32 @test_cvttss2si(float %a0, fl
 ;
 ; SANDY-LABEL: test_cvttss2si:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vcvttss2si %xmm0, %ecx # sched: [3:1.00]
-; SANDY-NEXT:    vcvttss2si (%rdi), %eax # sched: [7:1.00]
+; SANDY-NEXT:    vcvttss2si %xmm0, %ecx # sched: [5:1.00]
+; SANDY-NEXT:    vcvttss2si (%rdi), %eax # sched: [10:1.00]
 ; SANDY-NEXT:    addl %ecx, %eax # sched: [1:0.33]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_cvttss2si:
 ; HASWELL:       # BB#0:
@@ -668,10 +668,10 @@ define i64 @test_cvttss2siq(float %a0, f
 ;
 ; SANDY-LABEL: test_cvttss2siq:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vcvttss2si %xmm0, %rcx # sched: [3:1.00]
-; SANDY-NEXT:    vcvttss2si (%rdi), %rax # sched: [7:1.00]
+; SANDY-NEXT:    vcvttss2si %xmm0, %rcx # sched: [5:1.00]
+; SANDY-NEXT:    vcvttss2si (%rdi), %rax # sched: [10:1.00]
 ; SANDY-NEXT:    addq %rcx, %rax # sched: [1:0.33]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_cvttss2siq:
 ; HASWELL:       # BB#0:
@@ -714,9 +714,9 @@ define <4 x float> @test_divps(<4 x floa
 ;
 ; SANDY-LABEL: test_divps:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vdivps %xmm1, %xmm0, %xmm0 # sched: [12:1.00]
-; SANDY-NEXT:    vdivps (%rdi), %xmm0, %xmm0 # sched: [16:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vdivps %xmm1, %xmm0, %xmm0 # sched: [14:1.00]
+; SANDY-NEXT:    vdivps (%rdi), %xmm0, %xmm0 # sched: [20:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_divps:
 ; HASWELL:       # BB#0:
@@ -756,9 +756,9 @@ define float @test_divss(float %a0, floa
 ;
 ; SANDY-LABEL: test_divss:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vdivss %xmm1, %xmm0, %xmm0 # sched: [12:1.00]
-; SANDY-NEXT:    vdivss (%rdi), %xmm0, %xmm0 # sched: [16:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vdivss %xmm1, %xmm0, %xmm0 # sched: [14:1.00]
+; SANDY-NEXT:    vdivss (%rdi), %xmm0, %xmm0 # sched: [20:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_divss:
 ; HASWELL:       # BB#0:
@@ -799,8 +799,8 @@ define void @test_ldmxcsr(i32 %a0) {
 ; SANDY-LABEL: test_ldmxcsr:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
-; SANDY-NEXT:    vldmxcsr -{{[0-9]+}}(%rsp) # sched: [4:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vldmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_ldmxcsr:
 ; HASWELL:       # BB#0:
@@ -843,8 +843,8 @@ define <4 x float> @test_maxps(<4 x floa
 ; SANDY-LABEL: test_maxps:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vmaxps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    vmaxps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vmaxps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_maxps:
 ; HASWELL:       # BB#0:
@@ -886,8 +886,8 @@ define <4 x float> @test_maxss(<4 x floa
 ; SANDY-LABEL: test_maxss:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vmaxss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    vmaxss (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vmaxss (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_maxss:
 ; HASWELL:       # BB#0:
@@ -929,8 +929,8 @@ define <4 x float> @test_minps(<4 x floa
 ; SANDY-LABEL: test_minps:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vminps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    vminps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vminps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_minps:
 ; HASWELL:       # BB#0:
@@ -972,8 +972,8 @@ define <4 x float> @test_minss(<4 x floa
 ; SANDY-LABEL: test_minss:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vminss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    vminss (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vminss (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_minss:
 ; HASWELL:       # BB#0:
@@ -1017,10 +1017,10 @@ define void @test_movaps(<4 x float> *%a
 ;
 ; SANDY-LABEL: test_movaps:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vmovaps (%rdi), %xmm0 # sched: [4:0.50]
+; SANDY-NEXT:    vmovaps (%rdi), %xmm0 # sched: [6:0.50]
 ; SANDY-NEXT:    vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    vmovaps %xmm0, (%rsi) # sched: [1:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vmovaps %xmm0, (%rsi) # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_movaps:
 ; HASWELL:       # BB#0:
@@ -1068,7 +1068,7 @@ define <4 x float> @test_movhlps(<4 x fl
 ; SANDY-LABEL: test_movhlps:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_movhlps:
 ; HASWELL:       # BB#0:
@@ -1111,10 +1111,10 @@ define void @test_movhps(<4 x float> %a0
 ;
 ; SANDY-LABEL: test_movhps:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [5:1.00]
+; SANDY-NEXT:    vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
 ; SANDY-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; SANDY-NEXT:    vpextrq $1, %xmm0, (%rdi) # sched: [5:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_movhps:
 ; HASWELL:       # BB#0:
@@ -1164,7 +1164,7 @@ define <4 x float> @test_movlhps(<4 x fl
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
 ; SANDY-NEXT:    vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_movlhps:
 ; HASWELL:       # BB#0:
@@ -1206,10 +1206,10 @@ define void @test_movlps(<4 x float> %a0
 ;
 ; SANDY-LABEL: test_movlps:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [5:1.00]
+; SANDY-NEXT:    vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00]
 ; SANDY-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    vmovlps %xmm0, (%rdi) # sched: [1:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vmovlps %xmm0, (%rdi) # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_movlps:
 ; HASWELL:       # BB#0:
@@ -1254,8 +1254,8 @@ define i32 @test_movmskps(<4 x float> %a
 ;
 ; SANDY-LABEL: test_movmskps:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vmovmskps %xmm0, %eax # sched: [1:0.33]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vmovmskps %xmm0, %eax # sched: [2:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_movmskps:
 ; HASWELL:       # BB#0:
@@ -1295,8 +1295,8 @@ define void @test_movntps(<4 x float> %a
 ;
 ; SANDY-LABEL: test_movntps:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vmovntps %xmm0, (%rdi) # sched: [1:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vmovntps %xmm0, (%rdi) # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_movntps:
 ; HASWELL:       # BB#0:
@@ -1335,10 +1335,10 @@ define void @test_movss_mem(float* %a0,
 ;
 ; SANDY-LABEL: test_movss_mem:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [4:0.50]
+; SANDY-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50]
 ; SANDY-NEXT:    vaddss %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    vmovss %xmm0, (%rsi) # sched: [1:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vmovss %xmm0, (%rsi) # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_movss_mem:
 ; HASWELL:       # BB#0:
@@ -1383,8 +1383,8 @@ define <4 x float> @test_movss_reg(<4 x
 ;
 ; SANDY-LABEL: test_movss_reg:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_movss_reg:
 ; HASWELL:       # BB#0:
@@ -1423,10 +1423,10 @@ define void @test_movups(<4 x float> *%a
 ;
 ; SANDY-LABEL: test_movups:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vmovups (%rdi), %xmm0 # sched: [4:0.50]
+; SANDY-NEXT:    vmovups (%rdi), %xmm0 # sched: [6:0.50]
 ; SANDY-NEXT:    vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    vmovups %xmm0, (%rsi) # sched: [1:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vmovups %xmm0, (%rsi) # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_movups:
 ; HASWELL:       # BB#0:
@@ -1469,8 +1469,8 @@ define <4 x float> @test_mulps(<4 x floa
 ; SANDY-LABEL: test_mulps:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vmulps %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT:    vmulps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vmulps (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_mulps:
 ; HASWELL:       # BB#0:
@@ -1511,8 +1511,8 @@ define float @test_mulss(float %a0, floa
 ; SANDY-LABEL: test_mulss:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vmulss %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT:    vmulss (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vmulss (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_mulss:
 ; HASWELL:       # BB#0:
@@ -1560,9 +1560,9 @@ define <4 x float> @test_orps(<4 x float
 ;
 ; SANDY-LABEL: test_orps:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vorps %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SANDY-NEXT:    vorps (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; SANDY-NEXT:    vorps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_orps:
 ; HASWELL:       # BB#0:
@@ -1609,8 +1609,8 @@ define void @test_prefetchnta(i8* %a0) {
 ;
 ; SANDY-LABEL: test_prefetchnta:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    prefetchnta (%rdi) # sched: [4:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    prefetchnta (%rdi) # sched: [5:0.50]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_prefetchnta:
 ; HASWELL:       # BB#0:
@@ -1652,10 +1652,10 @@ define <4 x float> @test_rcpps(<4 x floa
 ;
 ; SANDY-LABEL: test_rcpps:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vrcpps %xmm0, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT:    vrcpps (%rdi), %xmm1 # sched: [9:1.00]
+; SANDY-NEXT:    vrcpps %xmm0, %xmm0 # sched: [7:3.00]
+; SANDY-NEXT:    vrcpps (%rdi), %xmm1 # sched: [11:1.00]
 ; SANDY-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_rcpps:
 ; HASWELL:       # BB#0:
@@ -1708,10 +1708,10 @@ define <4 x float> @test_rcpss(float %a0
 ; SANDY-LABEL: test_rcpss:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vrcpss %xmm0, %xmm0, %xmm0 # sched: [9:1.00]
-; SANDY-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [4:0.50]
+; SANDY-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50]
 ; SANDY-NEXT:    vrcpss %xmm1, %xmm1, %xmm1 # sched: [9:1.00]
 ; SANDY-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_rcpss:
 ; HASWELL:       # BB#0:
@@ -1765,9 +1765,9 @@ define <4 x float> @test_rsqrtps(<4 x fl
 ; SANDY-LABEL: test_rsqrtps:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vrsqrtps %xmm0, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT:    vrsqrtps (%rdi), %xmm1 # sched: [9:1.00]
+; SANDY-NEXT:    vrsqrtps (%rdi), %xmm1 # sched: [11:1.00]
 ; SANDY-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_rsqrtps:
 ; HASWELL:       # BB#0:
@@ -1819,11 +1819,11 @@ define <4 x float> @test_rsqrtss(float %
 ;
 ; SANDY-LABEL: test_rsqrtss:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [9:1.00]
-; SANDY-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [4:0.50]
-; SANDY-NEXT:    vrsqrtss %xmm1, %xmm1, %xmm1 # sched: [9:1.00]
+; SANDY-NEXT:    vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
+; SANDY-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50]
+; SANDY-NEXT:    vrsqrtss %xmm1, %xmm1, %xmm1 # sched: [5:1.00]
 ; SANDY-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_rsqrtss:
 ; HASWELL:       # BB#0:
@@ -1875,7 +1875,7 @@ define void @test_sfence() {
 ; SANDY-LABEL: test_sfence:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    sfence # sched: [1:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_sfence:
 ; HASWELL:       # BB#0:
@@ -1917,8 +1917,8 @@ define <4 x float> @test_shufps(<4 x flo
 ; SANDY-LABEL: test_shufps:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
-; SANDY-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,3],mem[0,0] sched: [5:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,3],mem[0,0] sched: [7:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_shufps:
 ; HASWELL:       # BB#0:
@@ -1962,10 +1962,10 @@ define <4 x float> @test_sqrtps(<4 x flo
 ;
 ; SANDY-LABEL: test_sqrtps:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vsqrtps %xmm0, %xmm0 # sched: [15:1.00]
-; SANDY-NEXT:    vsqrtps (%rdi), %xmm1 # sched: [19:1.00]
+; SANDY-NEXT:    vsqrtps %xmm0, %xmm0 # sched: [14:1.00]
+; SANDY-NEXT:    vsqrtps (%rdi), %xmm1 # sched: [20:1.00]
 ; SANDY-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_sqrtps:
 ; HASWELL:       # BB#0:
@@ -2017,11 +2017,11 @@ define <4 x float> @test_sqrtss(<4 x flo
 ;
 ; SANDY-LABEL: test_sqrtss:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vsqrtss %xmm0, %xmm0, %xmm0 # sched: [19:1.00]
-; SANDY-NEXT:    vmovaps (%rdi), %xmm1 # sched: [4:0.50]
-; SANDY-NEXT:    vsqrtss %xmm1, %xmm1, %xmm1 # sched: [19:1.00]
+; SANDY-NEXT:    vsqrtss %xmm0, %xmm0, %xmm0 # sched: [114:1.00]
+; SANDY-NEXT:    vmovaps (%rdi), %xmm1 # sched: [6:0.50]
+; SANDY-NEXT:    vsqrtss %xmm1, %xmm1, %xmm1 # sched: [114:1.00]
 ; SANDY-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_sqrtss:
 ; HASWELL:       # BB#0:
@@ -2067,9 +2067,9 @@ define i32 @test_stmxcsr() {
 ;
 ; SANDY-LABEL: test_stmxcsr:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vstmxcsr -{{[0-9]+}}(%rsp) # sched: [1:1.00]
-; SANDY-NEXT:    movl -{{[0-9]+}}(%rsp), %eax # sched: [4:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vstmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00]
+; SANDY-NEXT:    movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_stmxcsr:
 ; HASWELL:       # BB#0:
@@ -2112,8 +2112,8 @@ define <4 x float> @test_subps(<4 x floa
 ; SANDY-LABEL: test_subps:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    vsubps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vsubps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_subps:
 ; HASWELL:       # BB#0:
@@ -2154,8 +2154,8 @@ define float @test_subss(float %a0, floa
 ; SANDY-LABEL: test_subss:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vsubss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    vsubss (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vsubss (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_subss:
 ; HASWELL:       # BB#0:
@@ -2220,16 +2220,16 @@ define i32 @test_ucomiss(<4 x float> %a0
 ; SANDY-LABEL: test_ucomiss:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vucomiss %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    setnp %al # sched: [1:0.33]
-; SANDY-NEXT:    sete %cl # sched: [1:0.33]
+; SANDY-NEXT:    setnp %al # sched: [1:1.00]
+; SANDY-NEXT:    sete %cl # sched: [1:1.00]
 ; SANDY-NEXT:    andb %al, %cl # sched: [1:0.33]
 ; SANDY-NEXT:    vucomiss (%rdi), %xmm0 # sched: [7:1.00]
-; SANDY-NEXT:    setnp %al # sched: [1:0.33]
-; SANDY-NEXT:    sete %dl # sched: [1:0.33]
+; SANDY-NEXT:    setnp %al # sched: [1:1.00]
+; SANDY-NEXT:    sete %dl # sched: [1:1.00]
 ; SANDY-NEXT:    andb %al, %dl # sched: [1:0.33]
 ; SANDY-NEXT:    orb %cl, %dl # sched: [1:0.33]
 ; SANDY-NEXT:    movzbl %dl, %eax # sched: [1:0.33]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_ucomiss:
 ; HASWELL:       # BB#0:
@@ -2292,8 +2292,8 @@ define <4 x float> @test_unpckhps(<4 x f
 ; SANDY-LABEL: test_unpckhps:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
-; SANDY-NEXT:    vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [5:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_unpckhps:
 ; HASWELL:       # BB#0:
@@ -2338,8 +2338,8 @@ define <4 x float> @test_unpcklps(<4 x f
 ; SANDY-LABEL: test_unpcklps:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
-; SANDY-NEXT:    vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [5:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_unpcklps:
 ; HASWELL:       # BB#0:
@@ -2387,9 +2387,9 @@ define <4 x float> @test_xorps(<4 x floa
 ;
 ; SANDY-LABEL: test_xorps:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vxorps %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SANDY-NEXT:    vxorps (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vxorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; SANDY-NEXT:    vxorps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_xorps:
 ; HASWELL:       # BB#0:

Modified: llvm/trunk/test/CodeGen/X86/sse2-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse2-schedule.ll?rev=307529&r1=307528&r2=307529&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse2-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse2-schedule.ll Mon Jul 10 02:53:16 2017
@@ -31,8 +31,8 @@ define <2 x double> @test_addpd(<2 x dou
 ; SANDY-LABEL: test_addpd:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    vaddpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vaddpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_addpd:
 ; HASWELL:       # BB#0:
@@ -73,8 +73,8 @@ define double @test_addsd(double %a0, do
 ; SANDY-LABEL: test_addsd:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    vaddsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vaddsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_addsd:
 ; HASWELL:       # BB#0:
@@ -117,10 +117,10 @@ define <2 x double> @test_andpd(<2 x dou
 ;
 ; SANDY-LABEL: test_andpd:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vandpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SANDY-NEXT:    vandpd (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; SANDY-NEXT:    vandpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; SANDY-NEXT:    vandpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
 ; SANDY-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_andpd:
 ; HASWELL:       # BB#0:
@@ -170,10 +170,10 @@ define <2 x double> @test_andnotpd(<2 x
 ;
 ; SANDY-LABEL: test_andnotpd:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SANDY-NEXT:    vandnpd (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; SANDY-NEXT:    vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; SANDY-NEXT:    vandnpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
 ; SANDY-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_andnotpd:
 ; HASWELL:       # BB#0:
@@ -226,9 +226,9 @@ define <2 x double> @test_cmppd(<2 x dou
 ; SANDY-LABEL: test_cmppd:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
-; SANDY-NEXT:    vcmpeqpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; SANDY-NEXT:    vorpd %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vcmpeqpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
+; SANDY-NEXT:    vorpd %xmm0, %xmm1, %xmm0 # sched: [1:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_cmppd:
 ; HASWELL:       # BB#0:
@@ -275,7 +275,7 @@ define double @test_cmpsd(double %a0, do
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; SANDY-NEXT:    vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_cmpsd:
 ; HASWELL:       # BB#0:
@@ -345,16 +345,16 @@ define i32 @test_comisd(<2 x double> %a0
 ; SANDY-LABEL: test_comisd:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vcomisd %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    setnp %al # sched: [1:0.33]
-; SANDY-NEXT:    sete %cl # sched: [1:0.33]
+; SANDY-NEXT:    setnp %al # sched: [1:1.00]
+; SANDY-NEXT:    sete %cl # sched: [1:1.00]
 ; SANDY-NEXT:    andb %al, %cl # sched: [1:0.33]
 ; SANDY-NEXT:    vcomisd (%rdi), %xmm0 # sched: [7:1.00]
-; SANDY-NEXT:    setnp %al # sched: [1:0.33]
-; SANDY-NEXT:    sete %dl # sched: [1:0.33]
+; SANDY-NEXT:    setnp %al # sched: [1:1.00]
+; SANDY-NEXT:    sete %dl # sched: [1:1.00]
 ; SANDY-NEXT:    andb %al, %dl # sched: [1:0.33]
 ; SANDY-NEXT:    orb %cl, %dl # sched: [1:0.33]
 ; SANDY-NEXT:    movzbl %dl, %eax # sched: [1:0.33]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_comisd:
 ; HASWELL:       # BB#0:
@@ -416,9 +416,9 @@ define <2 x double> @test_cvtdq2pd(<4 x
 ; SANDY-LABEL: test_cvtdq2pd:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vcvtdq2pd %xmm0, %xmm0 # sched: [4:1.00]
-; SANDY-NEXT:    vcvtdq2pd (%rdi), %xmm1 # sched: [8:1.00]
+; SANDY-NEXT:    vcvtdq2pd (%rdi), %xmm1 # sched: [10:1.00]
 ; SANDY-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_cvtdq2pd:
 ; HASWELL:       # BB#0:
@@ -467,10 +467,10 @@ define <4 x float> @test_cvtdq2ps(<4 x i
 ;
 ; SANDY-LABEL: test_cvtdq2ps:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vcvtdq2ps %xmm0, %xmm0 # sched: [4:1.00]
-; SANDY-NEXT:    vcvtdq2ps (%rdi), %xmm1 # sched: [8:1.00]
+; SANDY-NEXT:    vcvtdq2ps %xmm0, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT:    vcvtdq2ps (%rdi), %xmm1 # sched: [9:1.00]
 ; SANDY-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_cvtdq2ps:
 ; HASWELL:       # BB#0:
@@ -517,10 +517,10 @@ define <4 x i32> @test_cvtpd2dq(<2 x dou
 ;
 ; SANDY-LABEL: test_cvtpd2dq:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vcvtpd2dq %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    vcvtpd2dqx (%rdi), %xmm1 # sched: [7:1.00]
+; SANDY-NEXT:    vcvtpd2dq %xmm0, %xmm0 # sched: [4:1.00]
+; SANDY-NEXT:    vcvtpd2dqx (%rdi), %xmm1 # sched: [10:1.00]
 ; SANDY-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_cvtpd2dq:
 ; HASWELL:       # BB#0:
@@ -568,10 +568,10 @@ define <4 x float> @test_cvtpd2ps(<2 x d
 ;
 ; SANDY-LABEL: test_cvtpd2ps:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vcvtpd2ps %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    vcvtpd2psx (%rdi), %xmm1 # sched: [7:1.00]
+; SANDY-NEXT:    vcvtpd2ps %xmm0, %xmm0 # sched: [4:1.00]
+; SANDY-NEXT:    vcvtpd2psx (%rdi), %xmm1 # sched: [10:1.00]
 ; SANDY-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_cvtpd2ps:
 ; HASWELL:       # BB#0:
@@ -620,9 +620,9 @@ define <4 x i32> @test_cvtps2dq(<4 x flo
 ; SANDY-LABEL: test_cvtps2dq:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vcvtps2dq %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    vcvtps2dq (%rdi), %xmm1 # sched: [7:1.00]
+; SANDY-NEXT:    vcvtps2dq (%rdi), %xmm1 # sched: [9:1.00]
 ; SANDY-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_cvtps2dq:
 ; HASWELL:       # BB#0:
@@ -670,10 +670,10 @@ define <2 x double> @test_cvtps2pd(<4 x
 ;
 ; SANDY-LABEL: test_cvtps2pd:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vcvtps2pd %xmm0, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT:    vcvtps2pd %xmm0, %xmm0 # sched: [2:1.00]
 ; SANDY-NEXT:    vcvtps2pd (%rdi), %xmm1 # sched: [7:1.00]
 ; SANDY-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_cvtps2pd:
 ; HASWELL:       # BB#0:
@@ -724,7 +724,7 @@ define i32 @test_cvtsd2si(double %a0, do
 ; SANDY-NEXT:    vcvtsd2si %xmm0, %ecx # sched: [3:1.00]
 ; SANDY-NEXT:    vcvtsd2si (%rdi), %eax # sched: [7:1.00]
 ; SANDY-NEXT:    addl %ecx, %eax # sched: [1:0.33]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_cvtsd2si:
 ; HASWELL:       # BB#0:
@@ -773,10 +773,10 @@ define i64 @test_cvtsd2siq(double %a0, d
 ;
 ; SANDY-LABEL: test_cvtsd2siq:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vcvtsd2si %xmm0, %rcx # sched: [3:1.00]
-; SANDY-NEXT:    vcvtsd2si (%rdi), %rax # sched: [7:1.00]
+; SANDY-NEXT:    vcvtsd2si %xmm0, %rcx # sched: [5:1.00]
+; SANDY-NEXT:    vcvtsd2si (%rdi), %rax # sched: [10:1.00]
 ; SANDY-NEXT:    addq %rcx, %rax # sched: [1:0.33]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_cvtsd2siq:
 ; HASWELL:       # BB#0:
@@ -830,10 +830,10 @@ define float @test_cvtsd2ss(double %a0,
 ; SANDY-LABEL: test_cvtsd2ss:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [4:0.50]
+; SANDY-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [6:0.50]
 ; SANDY-NEXT:    vcvtsd2ss %xmm1, %xmm1, %xmm1 # sched: [3:1.00]
 ; SANDY-NEXT:    vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_cvtsd2ss:
 ; HASWELL:       # BB#0:
@@ -882,9 +882,9 @@ define double @test_cvtsi2sd(i32 %a0, i3
 ; SANDY-LABEL: test_cvtsi2sd:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [4:1.00]
-; SANDY-NEXT:    vcvtsi2sdl (%rsi), %xmm1, %xmm1 # sched: [8:1.00]
+; SANDY-NEXT:    vcvtsi2sdl (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
 ; SANDY-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_cvtsi2sd:
 ; HASWELL:       # BB#0:
@@ -931,9 +931,9 @@ define double @test_cvtsi2sdq(i64 %a0, i
 ; SANDY-LABEL: test_cvtsi2sdq:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [4:1.00]
-; SANDY-NEXT:    vcvtsi2sdq (%rsi), %xmm1, %xmm1 # sched: [8:1.00]
+; SANDY-NEXT:    vcvtsi2sdq (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
 ; SANDY-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_cvtsi2sdq:
 ; HASWELL:       # BB#0:
@@ -985,11 +985,11 @@ define double @test_cvtss2sd(float %a0,
 ;
 ; SANDY-LABEL: test_cvtss2sd:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [4:0.50]
-; SANDY-NEXT:    vcvtss2sd %xmm1, %xmm1, %xmm1 # sched: [3:1.00]
+; SANDY-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [1:1.00]
+; SANDY-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50]
+; SANDY-NEXT:    vcvtss2sd %xmm1, %xmm1, %xmm1 # sched: [1:1.00]
 ; SANDY-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_cvtss2sd:
 ; HASWELL:       # BB#0:
@@ -1038,10 +1038,10 @@ define <4 x i32> @test_cvttpd2dq(<2 x do
 ;
 ; SANDY-LABEL: test_cvttpd2dq:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vcvttpd2dq %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    vcvttpd2dqx (%rdi), %xmm1 # sched: [7:1.00]
+; SANDY-NEXT:    vcvttpd2dq %xmm0, %xmm0 # sched: [4:1.00]
+; SANDY-NEXT:    vcvttpd2dqx (%rdi), %xmm1 # sched: [10:1.00]
 ; SANDY-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_cvttpd2dq:
 ; HASWELL:       # BB#0:
@@ -1091,9 +1091,9 @@ define <4 x i32> @test_cvttps2dq(<4 x fl
 ; SANDY-LABEL: test_cvttps2dq:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vcvttps2dq %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    vcvttps2dq (%rdi), %xmm1 # sched: [7:1.00]
+; SANDY-NEXT:    vcvttps2dq (%rdi), %xmm1 # sched: [9:1.00]
 ; SANDY-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_cvttps2dq:
 ; HASWELL:       # BB#0:
@@ -1139,10 +1139,10 @@ define i32 @test_cvttsd2si(double %a0, d
 ;
 ; SANDY-LABEL: test_cvttsd2si:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vcvttsd2si %xmm0, %ecx # sched: [3:1.00]
+; SANDY-NEXT:    vcvttsd2si %xmm0, %ecx # sched: [5:1.00]
 ; SANDY-NEXT:    vcvttsd2si (%rdi), %eax # sched: [7:1.00]
 ; SANDY-NEXT:    addl %ecx, %eax # sched: [1:0.33]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_cvttsd2si:
 ; HASWELL:       # BB#0:
@@ -1188,10 +1188,10 @@ define i64 @test_cvttsd2siq(double %a0,
 ;
 ; SANDY-LABEL: test_cvttsd2siq:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vcvttsd2si %xmm0, %rcx # sched: [3:1.00]
-; SANDY-NEXT:    vcvttsd2si (%rdi), %rax # sched: [7:1.00]
+; SANDY-NEXT:    vcvttsd2si %xmm0, %rcx # sched: [5:1.00]
+; SANDY-NEXT:    vcvttsd2si (%rdi), %rax # sched: [10:1.00]
 ; SANDY-NEXT:    addq %rcx, %rax # sched: [1:0.33]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_cvttsd2siq:
 ; HASWELL:       # BB#0:
@@ -1234,9 +1234,9 @@ define <2 x double> @test_divpd(<2 x dou
 ;
 ; SANDY-LABEL: test_divpd:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vdivpd %xmm1, %xmm0, %xmm0 # sched: [12:1.00]
-; SANDY-NEXT:    vdivpd (%rdi), %xmm0, %xmm0 # sched: [16:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vdivpd %xmm1, %xmm0, %xmm0 # sched: [22:1.00]
+; SANDY-NEXT:    vdivpd (%rdi), %xmm0, %xmm0 # sched: [28:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_divpd:
 ; HASWELL:       # BB#0:
@@ -1276,9 +1276,9 @@ define double @test_divsd(double %a0, do
 ;
 ; SANDY-LABEL: test_divsd:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vdivsd %xmm1, %xmm0, %xmm0 # sched: [12:1.00]
-; SANDY-NEXT:    vdivsd (%rdi), %xmm0, %xmm0 # sched: [16:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vdivsd %xmm1, %xmm0, %xmm0 # sched: [22:1.00]
+; SANDY-NEXT:    vdivsd (%rdi), %xmm0, %xmm0 # sched: [28:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_divsd:
 ; HASWELL:       # BB#0:
@@ -1322,7 +1322,7 @@ define void @test_lfence() {
 ; SANDY-LABEL: test_lfence:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    lfence # sched: [1:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_lfence:
 ; HASWELL:       # BB#0:
@@ -1363,7 +1363,7 @@ define void @test_mfence() {
 ; SANDY-LABEL: test_mfence:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    mfence # sched: [1:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_mfence:
 ; HASWELL:       # BB#0:
@@ -1402,7 +1402,7 @@ define void @test_maskmovdqu(<16 x i8> %
 ; SANDY-LABEL: test_maskmovdqu:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vmaskmovdqu %xmm1, %xmm0 # sched: [1:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_maskmovdqu:
 ; HASWELL:       # BB#0:
@@ -1440,8 +1440,8 @@ define <2 x double> @test_maxpd(<2 x dou
 ; SANDY-LABEL: test_maxpd:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vmaxpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    vmaxpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vmaxpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_maxpd:
 ; HASWELL:       # BB#0:
@@ -1483,8 +1483,8 @@ define <2 x double> @test_maxsd(<2 x dou
 ; SANDY-LABEL: test_maxsd:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vmaxsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    vmaxsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vmaxsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_maxsd:
 ; HASWELL:       # BB#0:
@@ -1526,8 +1526,8 @@ define <2 x double> @test_minpd(<2 x dou
 ; SANDY-LABEL: test_minpd:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vminpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    vminpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vminpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_minpd:
 ; HASWELL:       # BB#0:
@@ -1569,8 +1569,8 @@ define <2 x double> @test_minsd(<2 x dou
 ; SANDY-LABEL: test_minsd:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vminsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    vminsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vminsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_minsd:
 ; HASWELL:       # BB#0:
@@ -1614,10 +1614,10 @@ define void @test_movapd(<2 x double> *%
 ;
 ; SANDY-LABEL: test_movapd:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vmovapd (%rdi), %xmm0 # sched: [4:0.50]
+; SANDY-NEXT:    vmovapd (%rdi), %xmm0 # sched: [6:0.50]
 ; SANDY-NEXT:    vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    vmovapd %xmm0, (%rsi) # sched: [1:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vmovapd %xmm0, (%rsi) # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_movapd:
 ; HASWELL:       # BB#0:
@@ -1662,10 +1662,10 @@ define void @test_movdqa(<2 x i64> *%a0,
 ;
 ; SANDY-LABEL: test_movdqa:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vmovdqa (%rdi), %xmm0 # sched: [4:0.50]
+; SANDY-NEXT:    vmovdqa (%rdi), %xmm0 # sched: [6:0.50]
 ; SANDY-NEXT:    vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    vmovdqa %xmm0, (%rsi) # sched: [1:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vmovdqa %xmm0, (%rsi) # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_movdqa:
 ; HASWELL:       # BB#0:
@@ -1710,10 +1710,10 @@ define void @test_movdqu(<2 x i64> *%a0,
 ;
 ; SANDY-LABEL: test_movdqu:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vmovdqu (%rdi), %xmm0 # sched: [4:0.50]
+; SANDY-NEXT:    vmovdqu (%rdi), %xmm0 # sched: [6:0.50]
 ; SANDY-NEXT:    vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    vmovdqu %xmm0, (%rsi) # sched: [1:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vmovdqu %xmm0, (%rsi) # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_movdqu:
 ; HASWELL:       # BB#0:
@@ -1768,12 +1768,12 @@ define i32 @test_movd(<4 x i32> %a0, i32
 ; SANDY-LABEL: test_movd:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vmovd %edi, %xmm1 # sched: [1:0.33]
-; SANDY-NEXT:    vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [4:0.50]
+; SANDY-NEXT:    vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [6:0.50]
 ; SANDY-NEXT:    vpaddd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
 ; SANDY-NEXT:    vpaddd %xmm2, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    vmovd %xmm0, %eax # sched: [1:0.33]
-; SANDY-NEXT:    vmovd %xmm1, (%rsi) # sched: [1:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vmovd %xmm0, %eax # sched: [2:1.00]
+; SANDY-NEXT:    vmovd %xmm1, (%rsi) # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_movd:
 ; HASWELL:       # BB#0:
@@ -1838,13 +1838,13 @@ define i64 @test_movd_64(<2 x i64> %a0,
 ;
 ; SANDY-LABEL: test_movd_64:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vmovq %rdi, %xmm1 # sched: [1:0.33]
-; SANDY-NEXT:    vmovq {{.*#+}} xmm2 = mem[0],zero sched: [4:0.50]
+; SANDY-NEXT:    vmovq %rdi, %xmm1 # sched: [1:1.00]
+; SANDY-NEXT:    vmovq {{.*#+}} xmm2 = mem[0],zero sched: [6:0.50]
 ; SANDY-NEXT:    vpaddq %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
 ; SANDY-NEXT:    vpaddq %xmm2, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    vmovq %xmm0, %rax # sched: [1:0.33]
-; SANDY-NEXT:    vmovq %xmm1, (%rsi) # sched: [1:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vmovq %xmm0, %rax # sched: [2:1.00]
+; SANDY-NEXT:    vmovq %xmm1, (%rsi) # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_movd_64:
 ; HASWELL:       # BB#0:
@@ -1900,10 +1900,10 @@ define void @test_movhpd(<2 x double> %a
 ;
 ; SANDY-LABEL: test_movhpd:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [5:1.00]
+; SANDY-NEXT:    vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
 ; SANDY-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    vmovhpd %xmm0, (%rdi) # sched: [1:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vmovhpd %xmm0, (%rdi) # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_movhpd:
 ; HASWELL:       # BB#0:
@@ -1951,10 +1951,10 @@ define void @test_movlpd(<2 x double> %a
 ;
 ; SANDY-LABEL: test_movlpd:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [5:1.00]
+; SANDY-NEXT:    vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00]
 ; SANDY-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    vmovlpd %xmm0, (%rdi) # sched: [1:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vmovlpd %xmm0, (%rdi) # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_movlpd:
 ; HASWELL:       # BB#0:
@@ -1998,8 +1998,8 @@ define i32 @test_movmskpd(<2 x double> %
 ;
 ; SANDY-LABEL: test_movmskpd:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vmovmskpd %xmm0, %eax # sched: [1:0.33]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vmovmskpd %xmm0, %eax # sched: [2:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_movmskpd:
 ; HASWELL:       # BB#0:
@@ -2039,8 +2039,8 @@ define void @test_movntdqa(<2 x i64> %a0
 ; SANDY-LABEL: test_movntdqa:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    vmovntdq %xmm0, (%rdi) # sched: [1:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vmovntdq %xmm0, (%rdi) # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_movntdqa:
 ; HASWELL:       # BB#0:
@@ -2080,8 +2080,8 @@ define void @test_movntpd(<2 x double> %
 ; SANDY-LABEL: test_movntpd:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    vmovntpd %xmm0, (%rdi) # sched: [1:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vmovntpd %xmm0, (%rdi) # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_movntpd:
 ; HASWELL:       # BB#0:
@@ -2123,10 +2123,10 @@ define <2 x i64> @test_movq_mem(<2 x i64
 ;
 ; SANDY-LABEL: test_movq_mem:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero sched: [4:0.50]
+; SANDY-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero sched: [6:0.50]
 ; SANDY-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    vmovq %xmm0, (%rdi) # sched: [1:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vmovq %xmm0, (%rdi) # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_movq_mem:
 ; HASWELL:       # BB#0:
@@ -2174,7 +2174,7 @@ define <2 x i64> @test_movq_reg(<2 x i64
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33]
 ; SANDY-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_movq_reg:
 ; HASWELL:       # BB#0:
@@ -2216,10 +2216,10 @@ define void @test_movsd_mem(double* %a0,
 ;
 ; SANDY-LABEL: test_movsd_mem:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [4:0.50]
+; SANDY-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [6:0.50]
 ; SANDY-NEXT:    vaddsd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    vmovsd %xmm0, (%rsi) # sched: [1:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vmovsd %xmm0, (%rsi) # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_movsd_mem:
 ; HASWELL:       # BB#0:
@@ -2266,7 +2266,7 @@ define <2 x double> @test_movsd_reg(<2 x
 ; SANDY-LABEL: test_movsd_reg:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_movsd_reg:
 ; HASWELL:       # BB#0:
@@ -2305,10 +2305,10 @@ define void @test_movupd(<2 x double> *%
 ;
 ; SANDY-LABEL: test_movupd:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vmovupd (%rdi), %xmm0 # sched: [4:0.50]
+; SANDY-NEXT:    vmovupd (%rdi), %xmm0 # sched: [6:0.50]
 ; SANDY-NEXT:    vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    vmovupd %xmm0, (%rsi) # sched: [1:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vmovupd %xmm0, (%rsi) # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_movupd:
 ; HASWELL:       # BB#0:
@@ -2351,8 +2351,8 @@ define <2 x double> @test_mulpd(<2 x dou
 ; SANDY-LABEL: test_mulpd:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vmulpd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT:    vmulpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vmulpd (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_mulpd:
 ; HASWELL:       # BB#0:
@@ -2393,8 +2393,8 @@ define double @test_mulsd(double %a0, do
 ; SANDY-LABEL: test_mulsd:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vmulsd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT:    vmulsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vmulsd (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_mulsd:
 ; HASWELL:       # BB#0:
@@ -2437,10 +2437,10 @@ define <2 x double> @test_orpd(<2 x doub
 ;
 ; SANDY-LABEL: test_orpd:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SANDY-NEXT:    vorpd (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; SANDY-NEXT:    vorpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; SANDY-NEXT:    vorpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
 ; SANDY-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_orpd:
 ; HASWELL:       # BB#0:
@@ -2496,8 +2496,8 @@ define <8 x i16> @test_packssdw(<4 x i32
 ; SANDY-LABEL: test_packssdw:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    vpackssdw (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vpackssdw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_packssdw:
 ; HASWELL:       # BB#0:
@@ -2548,8 +2548,8 @@ define <16 x i8> @test_packsswb(<8 x i16
 ; SANDY-LABEL: test_packsswb:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    vpacksswb (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vpacksswb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_packsswb:
 ; HASWELL:       # BB#0:
@@ -2600,8 +2600,8 @@ define <16 x i8> @test_packuswb(<8 x i16
 ; SANDY-LABEL: test_packuswb:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    vpackuswb (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vpackuswb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_packuswb:
 ; HASWELL:       # BB#0:
@@ -2648,8 +2648,8 @@ define <16 x i8> @test_paddb(<16 x i8> %
 ; SANDY-LABEL: test_paddb:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    vpaddb (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vpaddb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_paddb:
 ; HASWELL:       # BB#0:
@@ -2694,8 +2694,8 @@ define <4 x i32> @test_paddd(<4 x i32> %
 ; SANDY-LABEL: test_paddd:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    vpaddd (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vpaddd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_paddd:
 ; HASWELL:       # BB#0:
@@ -2736,8 +2736,8 @@ define <2 x i64> @test_paddq(<2 x i64> %
 ; SANDY-LABEL: test_paddq:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    vpaddq (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vpaddq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_paddq:
 ; HASWELL:       # BB#0:
@@ -2781,9 +2781,9 @@ define <16 x i8> @test_paddsb(<16 x i8>
 ;
 ; SANDY-LABEL: test_paddsb:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    vpaddsb (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vpaddsb %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT:    vpaddsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_paddsb:
 ; HASWELL:       # BB#0:
@@ -2828,9 +2828,9 @@ define <8 x i16> @test_paddsw(<8 x i16>
 ;
 ; SANDY-LABEL: test_paddsw:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    vpaddsw (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vpaddsw %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT:    vpaddsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_paddsw:
 ; HASWELL:       # BB#0:
@@ -2876,8 +2876,8 @@ define <16 x i8> @test_paddusb(<16 x i8>
 ; SANDY-LABEL: test_paddusb:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vpaddusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    vpaddusb (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vpaddusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_paddusb:
 ; HASWELL:       # BB#0:
@@ -2923,8 +2923,8 @@ define <8 x i16> @test_paddusw(<8 x i16>
 ; SANDY-LABEL: test_paddusw:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vpaddusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    vpaddusw (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vpaddusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_paddusw:
 ; HASWELL:       # BB#0:
@@ -2969,9 +2969,9 @@ define <8 x i16> @test_paddw(<8 x i16> %
 ;
 ; SANDY-LABEL: test_paddw:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    vpaddw (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT:    vpaddw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_paddw:
 ; HASWELL:       # BB#0:
@@ -3015,9 +3015,9 @@ define <2 x i64> @test_pand(<2 x i64> %a
 ; SANDY-LABEL: test_pand:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SANDY-NEXT:    vpand (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; SANDY-NEXT:    vpand (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SANDY-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pand:
 ; HASWELL:       # BB#0:
@@ -3070,9 +3070,9 @@ define <2 x i64> @test_pandn(<2 x i64> %
 ; SANDY-LABEL: test_pandn:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vpandn %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SANDY-NEXT:    vpandn (%rdi), %xmm0, %xmm1 # sched: [5:0.50]
+; SANDY-NEXT:    vpandn (%rdi), %xmm0, %xmm1 # sched: [7:0.50]
 ; SANDY-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pandn:
 ; HASWELL:       # BB#0:
@@ -3122,8 +3122,8 @@ define <16 x i8> @test_pavgb(<16 x i8> %
 ; SANDY-LABEL: test_pavgb:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vpavgb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    vpavgb (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vpavgb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pavgb:
 ; HASWELL:       # BB#0:
@@ -3169,8 +3169,8 @@ define <8 x i16> @test_pavgw(<8 x i16> %
 ; SANDY-LABEL: test_pavgw:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vpavgw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    vpavgw (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vpavgw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pavgw:
 ; HASWELL:       # BB#0:
@@ -3217,9 +3217,9 @@ define <16 x i8> @test_pcmpeqb(<16 x i8>
 ; SANDY-LABEL: test_pcmpeqb:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
-; SANDY-NEXT:    vpcmpeqb (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; SANDY-NEXT:    vpcmpeqb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SANDY-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pcmpeqb:
 ; HASWELL:       # BB#0:
@@ -3269,9 +3269,9 @@ define <4 x i32> @test_pcmpeqd(<4 x i32>
 ; SANDY-LABEL: test_pcmpeqd:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
-; SANDY-NEXT:    vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; SANDY-NEXT:    vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SANDY-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pcmpeqd:
 ; HASWELL:       # BB#0:
@@ -3321,9 +3321,9 @@ define <8 x i16> @test_pcmpeqw(<8 x i16>
 ; SANDY-LABEL: test_pcmpeqw:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
-; SANDY-NEXT:    vpcmpeqw (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; SANDY-NEXT:    vpcmpeqw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SANDY-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pcmpeqw:
 ; HASWELL:       # BB#0:
@@ -3374,9 +3374,9 @@ define <16 x i8> @test_pcmpgtb(<16 x i8>
 ; SANDY-LABEL: test_pcmpgtb:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vpcmpgtb %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
-; SANDY-NEXT:    vpcmpgtb (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; SANDY-NEXT:    vpcmpgtb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SANDY-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pcmpgtb:
 ; HASWELL:       # BB#0:
@@ -3427,9 +3427,9 @@ define <4 x i32> @test_pcmpgtd(<4 x i32>
 ; SANDY-LABEL: test_pcmpgtd:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
-; SANDY-NEXT:    vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; SANDY-NEXT:    vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SANDY-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pcmpgtd:
 ; HASWELL:       # BB#0:
@@ -3480,9 +3480,9 @@ define <8 x i16> @test_pcmpgtw(<8 x i16>
 ; SANDY-LABEL: test_pcmpgtw:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vpcmpgtw %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
-; SANDY-NEXT:    vpcmpgtw (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; SANDY-NEXT:    vpcmpgtw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SANDY-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pcmpgtw:
 ; HASWELL:       # BB#0:
@@ -3526,9 +3526,9 @@ define i16 @test_pextrw(<8 x i16> %a0) {
 ;
 ; SANDY-LABEL: test_pextrw:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vpextrw $6, %xmm0, %eax # sched: [1:0.50]
+; SANDY-NEXT:    vpextrw $6, %xmm0, %eax # sched: [3:1.00]
 ; SANDY-NEXT:    # kill: %AX<def> %AX<kill> %EAX<kill>
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pextrw:
 ; HASWELL:       # BB#0:
@@ -3570,9 +3570,9 @@ define <8 x i16> @test_pinsrw(<8 x i16>
 ;
 ; SANDY-LABEL: test_pinsrw:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [5:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [2:1.00]
+; SANDY-NEXT:    vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [7:0.50]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pinsrw:
 ; HASWELL:       # BB#0:
@@ -3620,9 +3620,9 @@ define <4 x i32> @test_pmaddwd(<8 x i16>
 ;
 ; SANDY-LABEL: test_pmaddwd:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
+; SANDY-NEXT:    vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; SANDY-NEXT:    vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pmaddwd:
 ; HASWELL:       # BB#0:
@@ -3669,8 +3669,8 @@ define <8 x i16> @test_pmaxsw(<8 x i16>
 ; SANDY-LABEL: test_pmaxsw:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pmaxsw:
 ; HASWELL:       # BB#0:
@@ -3716,8 +3716,8 @@ define <16 x i8> @test_pmaxub(<16 x i8>
 ; SANDY-LABEL: test_pmaxub:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    vpmaxub (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vpmaxub (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pmaxub:
 ; HASWELL:       # BB#0:
@@ -3763,8 +3763,8 @@ define <8 x i16> @test_pminsw(<8 x i16>
 ; SANDY-LABEL: test_pminsw:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vpminsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    vpminsw (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vpminsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pminsw:
 ; HASWELL:       # BB#0:
@@ -3810,8 +3810,8 @@ define <16 x i8> @test_pminub(<16 x i8>
 ; SANDY-LABEL: test_pminub:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vpminub %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    vpminub (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vpminub (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pminub:
 ; HASWELL:       # BB#0:
@@ -3851,8 +3851,8 @@ define i32 @test_pmovmskb(<16 x i8> %a0)
 ;
 ; SANDY-LABEL: test_pmovmskb:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vpmovmskb %xmm0, %eax # sched: [1:0.33]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vpmovmskb %xmm0, %eax # sched: [1:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pmovmskb:
 ; HASWELL:       # BB#0:
@@ -3891,7 +3891,7 @@ define <8 x i16> @test_pmulhuw(<8 x i16>
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
 ; SANDY-NEXT:    vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pmulhuw:
 ; HASWELL:       # BB#0:
@@ -3932,9 +3932,9 @@ define <8 x i16> @test_pmulhw(<8 x i16>
 ;
 ; SANDY-LABEL: test_pmulhw:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vpmulhw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
+; SANDY-NEXT:    vpmulhw %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; SANDY-NEXT:    vpmulhw (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pmulhw:
 ; HASWELL:       # BB#0:
@@ -3975,9 +3975,9 @@ define <8 x i16> @test_pmullw(<8 x i16>
 ;
 ; SANDY-LABEL: test_pmullw:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vpmullw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
+; SANDY-NEXT:    vpmullw %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; SANDY-NEXT:    vpmullw (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pmullw:
 ; HASWELL:       # BB#0:
@@ -4027,7 +4027,7 @@ define <2 x i64> @test_pmuludq(<4 x i32>
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vpmuludq %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
 ; SANDY-NEXT:    vpmuludq (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pmuludq:
 ; HASWELL:       # BB#0:
@@ -4073,9 +4073,9 @@ define <2 x i64> @test_por(<2 x i64> %a0
 ; SANDY-LABEL: test_por:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SANDY-NEXT:    vpor (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; SANDY-NEXT:    vpor (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SANDY-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_por:
 ; HASWELL:       # BB#0:
@@ -4126,9 +4126,9 @@ define <2 x i64> @test_psadbw(<16 x i8>
 ;
 ; SANDY-LABEL: test_psadbw:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
+; SANDY-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; SANDY-NEXT:    vpsadbw (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_psadbw:
 ; HASWELL:       # BB#0:
@@ -4176,9 +4176,9 @@ define <4 x i32> @test_pshufd(<4 x i32>
 ; SANDY-LABEL: test_pshufd:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [1:0.50]
-; SANDY-NEXT:    vpshufd {{.*#+}} xmm1 = mem[3,2,1,0] sched: [5:0.50]
+; SANDY-NEXT:    vpshufd {{.*#+}} xmm1 = mem[3,2,1,0] sched: [7:0.50]
 ; SANDY-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pshufd:
 ; HASWELL:       # BB#0:
@@ -4226,10 +4226,10 @@ define <8 x i16> @test_pshufhw(<8 x i16>
 ;
 ; SANDY-LABEL: test_pshufhw:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:0.50]
-; SANDY-NEXT:    vpshufhw {{.*#+}} xmm1 = mem[0,1,2,3,7,6,5,4] sched: [5:0.50]
-; SANDY-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00]
+; SANDY-NEXT:    vpshufhw {{.*#+}} xmm1 = mem[0,1,2,3,7,6,5,4] sched: [7:0.50]
+; SANDY-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pshufhw:
 ; HASWELL:       # BB#0:
@@ -4278,9 +4278,9 @@ define <8 x i16> @test_pshuflw(<8 x i16>
 ; SANDY-LABEL: test_pshuflw:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [1:0.50]
-; SANDY-NEXT:    vpshuflw {{.*#+}} xmm1 = mem[3,2,1,0,4,5,6,7] sched: [5:0.50]
-; SANDY-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vpshuflw {{.*#+}} xmm1 = mem[3,2,1,0,4,5,6,7] sched: [7:0.50]
+; SANDY-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pshuflw:
 ; HASWELL:       # BB#0:
@@ -4326,10 +4326,10 @@ define <4 x i32> @test_pslld(<4 x i32> %
 ;
 ; SANDY-LABEL: test_pslld:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vpslld %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    vpslld (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
-; SANDY-NEXT:    vpslld $2, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vpslld %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; SANDY-NEXT:    vpslld (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
+; SANDY-NEXT:    vpslld $2, %xmm0, %xmm0 # sched: [1:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pslld:
 ; HASWELL:       # BB#0:
@@ -4378,7 +4378,7 @@ define <4 x i32> @test_pslldq(<4 x i32>
 ; SANDY-LABEL: test_pslldq:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pslldq:
 ; HASWELL:       # BB#0:
@@ -4417,10 +4417,10 @@ define <2 x i64> @test_psllq(<2 x i64> %
 ;
 ; SANDY-LABEL: test_psllq:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vpsllq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    vpsllq (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
-; SANDY-NEXT:    vpsllq $2, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vpsllq %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; SANDY-NEXT:    vpsllq (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
+; SANDY-NEXT:    vpsllq $2, %xmm0, %xmm0 # sched: [1:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_psllq:
 ; HASWELL:       # BB#0:
@@ -4468,10 +4468,10 @@ define <8 x i16> @test_psllw(<8 x i16> %
 ;
 ; SANDY-LABEL: test_psllw:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vpsllw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    vpsllw (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
-; SANDY-NEXT:    vpsllw $2, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vpsllw %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; SANDY-NEXT:    vpsllw (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
+; SANDY-NEXT:    vpsllw $2, %xmm0, %xmm0 # sched: [1:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_psllw:
 ; HASWELL:       # BB#0:
@@ -4519,10 +4519,10 @@ define <4 x i32> @test_psrad(<4 x i32> %
 ;
 ; SANDY-LABEL: test_psrad:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vpsrad %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    vpsrad (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
-; SANDY-NEXT:    vpsrad $2, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vpsrad %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
+; SANDY-NEXT:    vpsrad (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
+; SANDY-NEXT:    vpsrad $2, %xmm0, %xmm0 # sched: [1:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_psrad:
 ; HASWELL:       # BB#0:
@@ -4570,10 +4570,10 @@ define <8 x i16> @test_psraw(<8 x i16> %
 ;
 ; SANDY-LABEL: test_psraw:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vpsraw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    vpsraw (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
-; SANDY-NEXT:    vpsraw $2, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vpsraw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
+; SANDY-NEXT:    vpsraw (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
+; SANDY-NEXT:    vpsraw $2, %xmm0, %xmm0 # sched: [1:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_psraw:
 ; HASWELL:       # BB#0:
@@ -4621,10 +4621,10 @@ define <4 x i32> @test_psrld(<4 x i32> %
 ;
 ; SANDY-LABEL: test_psrld:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vpsrld %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    vpsrld (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
-; SANDY-NEXT:    vpsrld $2, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vpsrld %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
+; SANDY-NEXT:    vpsrld (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
+; SANDY-NEXT:    vpsrld $2, %xmm0, %xmm0 # sched: [1:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_psrld:
 ; HASWELL:       # BB#0:
@@ -4673,7 +4673,7 @@ define <4 x i32> @test_psrldq(<4 x i32>
 ; SANDY-LABEL: test_psrldq:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_psrldq:
 ; HASWELL:       # BB#0:
@@ -4712,10 +4712,10 @@ define <2 x i64> @test_psrlq(<2 x i64> %
 ;
 ; SANDY-LABEL: test_psrlq:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vpsrlq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    vpsrlq (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
-; SANDY-NEXT:    vpsrlq $2, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vpsrlq %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
+; SANDY-NEXT:    vpsrlq (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
+; SANDY-NEXT:    vpsrlq $2, %xmm0, %xmm0 # sched: [1:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_psrlq:
 ; HASWELL:       # BB#0:
@@ -4763,10 +4763,10 @@ define <8 x i16> @test_psrlw(<8 x i16> %
 ;
 ; SANDY-LABEL: test_psrlw:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vpsrlw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    vpsrlw (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
-; SANDY-NEXT:    vpsrlw $2, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vpsrlw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
+; SANDY-NEXT:    vpsrlw (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
+; SANDY-NEXT:    vpsrlw $2, %xmm0, %xmm0 # sched: [1:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_psrlw:
 ; HASWELL:       # BB#0:
@@ -4816,8 +4816,8 @@ define <16 x i8> @test_psubb(<16 x i8> %
 ; SANDY-LABEL: test_psubb:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vpsubb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    vpsubb (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vpsubb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_psubb:
 ; HASWELL:       # BB#0:
@@ -4862,8 +4862,8 @@ define <4 x i32> @test_psubd(<4 x i32> %
 ; SANDY-LABEL: test_psubd:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vpsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    vpsubd (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vpsubd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_psubd:
 ; HASWELL:       # BB#0:
@@ -4904,8 +4904,8 @@ define <2 x i64> @test_psubq(<2 x i64> %
 ; SANDY-LABEL: test_psubq:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vpsubq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    vpsubq (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vpsubq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_psubq:
 ; HASWELL:       # BB#0:
@@ -4950,8 +4950,8 @@ define <16 x i8> @test_psubsb(<16 x i8>
 ; SANDY-LABEL: test_psubsb:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vpsubsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    vpsubsb (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vpsubsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_psubsb:
 ; HASWELL:       # BB#0:
@@ -4997,8 +4997,8 @@ define <8 x i16> @test_psubsw(<8 x i16>
 ; SANDY-LABEL: test_psubsw:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vpsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    vpsubsw (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vpsubsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_psubsw:
 ; HASWELL:       # BB#0:
@@ -5044,8 +5044,8 @@ define <16 x i8> @test_psubusb(<16 x i8>
 ; SANDY-LABEL: test_psubusb:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vpsubusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    vpsubusb (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vpsubusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_psubusb:
 ; HASWELL:       # BB#0:
@@ -5091,8 +5091,8 @@ define <8 x i16> @test_psubusw(<8 x i16>
 ; SANDY-LABEL: test_psubusw:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vpsubusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    vpsubusw (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vpsubusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_psubusw:
 ; HASWELL:       # BB#0:
@@ -5138,8 +5138,8 @@ define <8 x i16> @test_psubw(<8 x i16> %
 ; SANDY-LABEL: test_psubw:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vpsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    vpsubw (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vpsubw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_psubw:
 ; HASWELL:       # BB#0:
@@ -5184,8 +5184,8 @@ define <16 x i8> @test_punpckhbw(<16 x i
 ; SANDY-LABEL: test_punpckhbw:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:0.50]
-; SANDY-NEXT:    vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [5:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:0.50]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_punpckhbw:
 ; HASWELL:       # BB#0:
@@ -5231,9 +5231,9 @@ define <4 x i32> @test_punpckhdq(<4 x i3
 ; SANDY-LABEL: test_punpckhdq:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
-; SANDY-NEXT:    vpunpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [5:0.50]
+; SANDY-NEXT:    vpunpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:0.50]
 ; SANDY-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_punpckhdq:
 ; HASWELL:       # BB#0:
@@ -5279,10 +5279,10 @@ define <2 x i64> @test_punpckhqdq(<2 x i
 ;
 ; SANDY-LABEL: test_punpckhqdq:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50]
-; SANDY-NEXT:    vpunpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [5:0.50]
+; SANDY-NEXT:    vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
+; SANDY-NEXT:    vpunpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:0.50]
 ; SANDY-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_punpckhqdq:
 ; HASWELL:       # BB#0:
@@ -5330,8 +5330,8 @@ define <8 x i16> @test_punpckhwd(<8 x i1
 ; SANDY-LABEL: test_punpckhwd:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50]
-; SANDY-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [5:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:0.50]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_punpckhwd:
 ; HASWELL:       # BB#0:
@@ -5375,9 +5375,9 @@ define <16 x i8> @test_punpcklbw(<16 x i
 ;
 ; SANDY-LABEL: test_punpcklbw:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50]
-; SANDY-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [5:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
+; SANDY-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:0.50]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_punpcklbw:
 ; HASWELL:       # BB#0:
@@ -5423,9 +5423,9 @@ define <4 x i32> @test_punpckldq(<4 x i3
 ; SANDY-LABEL: test_punpckldq:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50]
-; SANDY-NEXT:    vpunpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [5:0.50]
+; SANDY-NEXT:    vpunpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:0.50]
 ; SANDY-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_punpckldq:
 ; HASWELL:       # BB#0:
@@ -5472,9 +5472,9 @@ define <2 x i64> @test_punpcklqdq(<2 x i
 ; SANDY-LABEL: test_punpcklqdq:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50]
-; SANDY-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [5:0.50]
+; SANDY-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:0.50]
 ; SANDY-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_punpcklqdq:
 ; HASWELL:       # BB#0:
@@ -5522,8 +5522,8 @@ define <8 x i16> @test_punpcklwd(<8 x i1
 ; SANDY-LABEL: test_punpcklwd:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
-; SANDY-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [5:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:0.50]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_punpcklwd:
 ; HASWELL:       # BB#0:
@@ -5567,9 +5567,9 @@ define <2 x i64> @test_pxor(<2 x i64> %a
 ; SANDY-LABEL: test_pxor:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vpxor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SANDY-NEXT:    vpxor (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; SANDY-NEXT:    vpxor (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SANDY-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pxor:
 ; HASWELL:       # BB#0:
@@ -5616,9 +5616,9 @@ define <2 x double> @test_shufpd(<2 x do
 ; SANDY-LABEL: test_shufpd:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00]
-; SANDY-NEXT:    vshufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [5:1.00]
+; SANDY-NEXT:    vshufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:1.00]
 ; SANDY-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_shufpd:
 ; HASWELL:       # BB#0:
@@ -5665,10 +5665,10 @@ define <2 x double> @test_sqrtpd(<2 x do
 ;
 ; SANDY-LABEL: test_sqrtpd:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vsqrtpd %xmm0, %xmm0 # sched: [15:1.00]
-; SANDY-NEXT:    vsqrtpd (%rdi), %xmm1 # sched: [19:1.00]
+; SANDY-NEXT:    vsqrtpd %xmm0, %xmm0 # sched: [22:1.00]
+; SANDY-NEXT:    vsqrtpd (%rdi), %xmm1 # sched: [28:1.00]
 ; SANDY-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_sqrtpd:
 ; HASWELL:       # BB#0:
@@ -5720,11 +5720,11 @@ define <2 x double> @test_sqrtsd(<2 x do
 ;
 ; SANDY-LABEL: test_sqrtsd:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [19:1.00]
-; SANDY-NEXT:    vmovapd (%rdi), %xmm1 # sched: [4:0.50]
-; SANDY-NEXT:    vsqrtsd %xmm1, %xmm1, %xmm1 # sched: [19:1.00]
+; SANDY-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [21:1.00]
+; SANDY-NEXT:    vmovapd (%rdi), %xmm1 # sched: [6:0.50]
+; SANDY-NEXT:    vsqrtsd %xmm1, %xmm1, %xmm1 # sched: [21:1.00]
 ; SANDY-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_sqrtsd:
 ; HASWELL:       # BB#0:
@@ -5771,8 +5771,8 @@ define <2 x double> @test_subpd(<2 x dou
 ; SANDY-LABEL: test_subpd:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    vsubpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vsubpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_subpd:
 ; HASWELL:       # BB#0:
@@ -5813,8 +5813,8 @@ define double @test_subsd(double %a0, do
 ; SANDY-LABEL: test_subsd:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vsubsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    vsubsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vsubsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_subsd:
 ; HASWELL:       # BB#0:
@@ -5879,16 +5879,16 @@ define i32 @test_ucomisd(<2 x double> %a
 ; SANDY-LABEL: test_ucomisd:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vucomisd %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    setnp %al # sched: [1:0.33]
-; SANDY-NEXT:    sete %cl # sched: [1:0.33]
+; SANDY-NEXT:    setnp %al # sched: [1:1.00]
+; SANDY-NEXT:    sete %cl # sched: [1:1.00]
 ; SANDY-NEXT:    andb %al, %cl # sched: [1:0.33]
 ; SANDY-NEXT:    vucomisd (%rdi), %xmm0 # sched: [7:1.00]
-; SANDY-NEXT:    setnp %al # sched: [1:0.33]
-; SANDY-NEXT:    sete %dl # sched: [1:0.33]
+; SANDY-NEXT:    setnp %al # sched: [1:1.00]
+; SANDY-NEXT:    sete %dl # sched: [1:1.00]
 ; SANDY-NEXT:    andb %al, %dl # sched: [1:0.33]
 ; SANDY-NEXT:    orb %cl, %dl # sched: [1:0.33]
 ; SANDY-NEXT:    movzbl %dl, %eax # sched: [1:0.33]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_ucomisd:
 ; HASWELL:       # BB#0:
@@ -5950,9 +5950,9 @@ define <2 x double> @test_unpckhpd(<2 x
 ; SANDY-LABEL: test_unpckhpd:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
-; SANDY-NEXT:    vunpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [5:1.00]
+; SANDY-NEXT:    vunpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00]
 ; SANDY-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_unpckhpd:
 ; HASWELL:       # BB#0:
@@ -6005,9 +6005,9 @@ define <2 x double> @test_unpcklpd(<2 x
 ; SANDY-LABEL: test_unpcklpd:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
-; SANDY-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm0[0],mem[0] sched: [5:1.00]
+; SANDY-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm0[0],mem[0] sched: [7:1.00]
 ; SANDY-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_unpcklpd:
 ; HASWELL:       # BB#0:
@@ -6053,10 +6053,10 @@ define <2 x double> @test_xorpd(<2 x dou
 ;
 ; SANDY-LABEL: test_xorpd:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SANDY-NEXT:    vxorpd (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
+; SANDY-NEXT:    vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
+; SANDY-NEXT:    vxorpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
 ; SANDY-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_xorpd:
 ; HASWELL:       # BB#0:

Modified: llvm/trunk/test/CodeGen/X86/sse3-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse3-schedule.ll?rev=307529&r1=307528&r2=307529&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse3-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse3-schedule.ll Mon Jul 10 02:53:16 2017
@@ -31,8 +31,8 @@ define <2 x double> @test_addsubpd(<2 x
 ; SANDY-LABEL: test_addsubpd:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_addsubpd:
 ; HASWELL:       # BB#0:
@@ -74,8 +74,8 @@ define <4 x float> @test_addsubps(<4 x f
 ; SANDY-LABEL: test_addsubps:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vaddsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    vaddsubps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vaddsubps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_addsubps:
 ; HASWELL:       # BB#0:
@@ -116,9 +116,9 @@ define <2 x double> @test_haddpd(<2 x do
 ;
 ; SANDY-LABEL: test_haddpd:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vhaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    vhaddpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vhaddpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
+; SANDY-NEXT:    vhaddpd (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_haddpd:
 ; HASWELL:       # BB#0:
@@ -159,9 +159,9 @@ define <4 x float> @test_haddps(<4 x flo
 ;
 ; SANDY-LABEL: test_haddps:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vhaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    vhaddps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vhaddps %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
+; SANDY-NEXT:    vhaddps (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_haddps:
 ; HASWELL:       # BB#0:
@@ -202,9 +202,9 @@ define <2 x double> @test_hsubpd(<2 x do
 ;
 ; SANDY-LABEL: test_hsubpd:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vhsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    vhsubpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vhsubpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
+; SANDY-NEXT:    vhsubpd (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_hsubpd:
 ; HASWELL:       # BB#0:
@@ -245,9 +245,9 @@ define <4 x float> @test_hsubps(<4 x flo
 ;
 ; SANDY-LABEL: test_hsubps:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vhsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    vhsubps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vhsubps %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
+; SANDY-NEXT:    vhsubps (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_hsubps:
 ; HASWELL:       # BB#0:
@@ -287,8 +287,8 @@ define <16 x i8> @test_lddqu(i8* %a0) {
 ;
 ; SANDY-LABEL: test_lddqu:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vlddqu (%rdi), %xmm0 # sched: [4:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vlddqu (%rdi), %xmm0 # sched: [6:0.50]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_lddqu:
 ; HASWELL:       # BB#0:
@@ -330,9 +330,9 @@ define <2 x double> @test_movddup(<2 x d
 ; SANDY-LABEL: test_movddup:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00]
-; SANDY-NEXT:    vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [4:0.50]
+; SANDY-NEXT:    vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [6:0.50]
 ; SANDY-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_movddup:
 ; HASWELL:       # BB#0:
@@ -380,9 +380,9 @@ define <4 x float> @test_movshdup(<4 x f
 ; SANDY-LABEL: test_movshdup:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00]
-; SANDY-NEXT:    vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [4:0.50]
+; SANDY-NEXT:    vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [6:0.50]
 ; SANDY-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_movshdup:
 ; HASWELL:       # BB#0:
@@ -430,9 +430,9 @@ define <4 x float> @test_movsldup(<4 x f
 ; SANDY-LABEL: test_movsldup:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00]
-; SANDY-NEXT:    vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [4:0.50]
+; SANDY-NEXT:    vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [6:0.50]
 ; SANDY-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_movsldup:
 ; HASWELL:       # BB#0:

Modified: llvm/trunk/test/CodeGen/X86/sse41-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse41-schedule.ll?rev=307529&r1=307528&r2=307529&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse41-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse41-schedule.ll Mon Jul 10 02:53:16 2017
@@ -25,10 +25,10 @@ define <2 x double> @test_blendpd(<2 x d
 ;
 ; SANDY-LABEL: test_blendpd:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.50]
+; SANDY-NEXT:    vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:1.00]
 ; SANDY-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [5:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_blendpd:
 ; HASWELL:       # BB#0:
@@ -65,9 +65,9 @@ define <4 x float> @test_blendps(<4 x fl
 ;
 ; SANDY-LABEL: test_blendps:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.50]
-; SANDY-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2,3] sched: [5:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:1.00]
+; SANDY-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2,3] sched: [7:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_blendps:
 ; HASWELL:       # BB#0:
@@ -107,9 +107,9 @@ define <2 x double> @test_blendvpd(<2 x
 ;
 ; SANDY-LABEL: test_blendvpd:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; SANDY-NEXT:    vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00]
+; SANDY-NEXT:    vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:2.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_blendvpd:
 ; HASWELL:       # BB#0:
@@ -150,9 +150,9 @@ define <4 x float> @test_blendvps(<4 x f
 ;
 ; SANDY-LABEL: test_blendvps:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; SANDY-NEXT:    vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00]
+; SANDY-NEXT:    vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:2.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_blendvps:
 ; HASWELL:       # BB#0:
@@ -187,9 +187,9 @@ define <2 x double> @test_dppd(<2 x doub
 ;
 ; SANDY-LABEL: test_dppd:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [9:1.00]
+; SANDY-NEXT:    vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [15:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_dppd:
 ; HASWELL:       # BB#0:
@@ -224,9 +224,9 @@ define <4 x float> @test_dpps(<4 x float
 ;
 ; SANDY-LABEL: test_dpps:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT:    vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [12:2.00]
 ; SANDY-NEXT:    vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_dpps:
 ; HASWELL:       # BB#0:
@@ -262,8 +262,8 @@ define <4 x float> @test_insertps(<4 x f
 ; SANDY-LABEL: test_insertps:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00]
-; SANDY-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [5:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_insertps:
 ; HASWELL:       # BB#0:
@@ -296,8 +296,8 @@ define <2 x i64> @test_movntdqa(i8* %a0)
 ;
 ; SANDY-LABEL: test_movntdqa:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vmovntdqa (%rdi), %xmm0 # sched: [4:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vmovntdqa (%rdi), %xmm0 # sched: [6:0.50]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_movntdqa:
 ; HASWELL:       # BB#0:
@@ -328,9 +328,9 @@ define <8 x i16> @test_mpsadbw(<16 x i8>
 ;
 ; SANDY-LABEL: test_mpsadbw:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [6:1.00]
-; SANDY-NEXT:    vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
+; SANDY-NEXT:    vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_mpsadbw:
 ; HASWELL:       # BB#0:
@@ -367,8 +367,8 @@ define <8 x i16> @test_packusdw(<4 x i32
 ; SANDY-LABEL: test_packusdw:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vpackusdw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    vpackusdw (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vpackusdw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_packusdw:
 ; HASWELL:       # BB#0:
@@ -411,8 +411,8 @@ define <16 x i8> @test_pblendvb(<16 x i8
 ; SANDY-LABEL: test_pblendvb:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; SANDY-NEXT:    vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pblendvb:
 ; HASWELL:       # BB#0:
@@ -448,8 +448,8 @@ define <8 x i16> @test_pblendw(<8 x i16>
 ; SANDY-LABEL: test_pblendw:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:0.50]
-; SANDY-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],mem[2,3],xmm0[4,5,6],mem[7] sched: [5:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],mem[2,3],xmm0[4,5,6],mem[7] sched: [7:0.50]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pblendw:
 ; HASWELL:       # BB#0:
@@ -483,9 +483,9 @@ define <2 x i64> @test_pcmpeqq(<2 x i64>
 ;
 ; SANDY-LABEL: test_pcmpeqq:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT:    vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pcmpeqq:
 ; HASWELL:       # BB#0:
@@ -521,9 +521,9 @@ define i32 @test_pextrb(<16 x i8> %a0, i
 ;
 ; SANDY-LABEL: test_pextrb:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vpextrb $3, %xmm0, %eax # sched: [1:0.50]
+; SANDY-NEXT:    vpextrb $3, %xmm0, %eax # sched: [3:1.00]
 ; SANDY-NEXT:    vpextrb $1, %xmm0, (%rdi) # sched: [5:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pextrb:
 ; HASWELL:       # BB#0:
@@ -558,9 +558,9 @@ define i32 @test_pextrd(<4 x i32> %a0, i
 ;
 ; SANDY-LABEL: test_pextrd:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vpextrd $3, %xmm0, %eax # sched: [1:0.50]
+; SANDY-NEXT:    vpextrd $3, %xmm0, %eax # sched: [3:1.00]
 ; SANDY-NEXT:    vpextrd $1, %xmm0, (%rdi) # sched: [5:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pextrd:
 ; HASWELL:       # BB#0:
@@ -594,9 +594,9 @@ define i64 @test_pextrq(<2 x i64> %a0, <
 ;
 ; SANDY-LABEL: test_pextrq:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vpextrq $1, %xmm0, %rax # sched: [1:0.50]
+; SANDY-NEXT:    vpextrq $1, %xmm0, %rax # sched: [3:1.00]
 ; SANDY-NEXT:    vpextrq $1, %xmm0, (%rdi) # sched: [5:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pextrq:
 ; HASWELL:       # BB#0:
@@ -630,9 +630,9 @@ define i32 @test_pextrw(<8 x i16> %a0, i
 ;
 ; SANDY-LABEL: test_pextrw:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vpextrw $3, %xmm0, %eax # sched: [1:0.50]
+; SANDY-NEXT:    vpextrw $3, %xmm0, %eax # sched: [3:1.00]
 ; SANDY-NEXT:    vpextrw $1, %xmm0, (%rdi) # sched: [5:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pextrw:
 ; HASWELL:       # BB#0:
@@ -667,9 +667,9 @@ define <8 x i16> @test_phminposuw(<8 x i
 ;
 ; SANDY-LABEL: test_phminposuw:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vphminposuw (%rdi), %xmm0 # sched: [9:1.00]
+; SANDY-NEXT:    vphminposuw (%rdi), %xmm0 # sched: [11:1.00]
 ; SANDY-NEXT:    vphminposuw %xmm0, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_phminposuw:
 ; HASWELL:       # BB#0:
@@ -704,9 +704,9 @@ define <16 x i8> @test_pinsrb(<16 x i8>
 ;
 ; SANDY-LABEL: test_pinsrb:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [5:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [2:1.00]
+; SANDY-NEXT:    vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [7:0.50]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pinsrb:
 ; HASWELL:       # BB#0:
@@ -740,9 +740,9 @@ define <4 x i32> @test_pinsrd(<4 x i32>
 ;
 ; SANDY-LABEL: test_pinsrd:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [5:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [2:1.00]
+; SANDY-NEXT:    vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [7:0.50]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pinsrd:
 ; HASWELL:       # BB#0:
@@ -778,10 +778,10 @@ define <2 x i64> @test_pinsrq(<2 x i64>
 ;
 ; SANDY-LABEL: test_pinsrq:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    vpinsrq $1, (%rsi), %xmm1, %xmm1 # sched: [5:0.50]
+; SANDY-NEXT:    vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [2:1.00]
+; SANDY-NEXT:    vpinsrq $1, (%rsi), %xmm1, %xmm1 # sched: [7:0.50]
 ; SANDY-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pinsrq:
 ; HASWELL:       # BB#0:
@@ -819,8 +819,8 @@ define <16 x i8> @test_pmaxsb(<16 x i8>
 ; SANDY-LABEL: test_pmaxsb:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pmaxsb:
 ; HASWELL:       # BB#0:
@@ -856,8 +856,8 @@ define <4 x i32> @test_pmaxsd(<4 x i32>
 ; SANDY-LABEL: test_pmaxsd:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pmaxsd:
 ; HASWELL:       # BB#0:
@@ -893,8 +893,8 @@ define <4 x i32> @test_pmaxud(<4 x i32>
 ; SANDY-LABEL: test_pmaxud:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    vpmaxud (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vpmaxud (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pmaxud:
 ; HASWELL:       # BB#0:
@@ -930,8 +930,8 @@ define <8 x i16> @test_pmaxuw(<8 x i16>
 ; SANDY-LABEL: test_pmaxuw:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pmaxuw:
 ; HASWELL:       # BB#0:
@@ -967,8 +967,8 @@ define <16 x i8> @test_pminsb(<16 x i8>
 ; SANDY-LABEL: test_pminsb:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    vpminsb (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vpminsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pminsb:
 ; HASWELL:       # BB#0:
@@ -1004,8 +1004,8 @@ define <4 x i32> @test_pminsd(<4 x i32>
 ; SANDY-LABEL: test_pminsd:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    vpminsd (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vpminsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pminsd:
 ; HASWELL:       # BB#0:
@@ -1041,8 +1041,8 @@ define <4 x i32> @test_pminud(<4 x i32>
 ; SANDY-LABEL: test_pminud:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vpminud %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    vpminud (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vpminud (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pminud:
 ; HASWELL:       # BB#0:
@@ -1078,8 +1078,8 @@ define <8 x i16> @test_pminuw(<8 x i16>
 ; SANDY-LABEL: test_pminuw:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    vpminuw (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vpminuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pminuw:
 ; HASWELL:       # BB#0:
@@ -1118,9 +1118,9 @@ define <8 x i16> @test_pmovsxbw(<16 x i8
 ; SANDY-LABEL: test_pmovsxbw:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vpmovsxbw %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    vpmovsxbw (%rdi), %xmm1 # sched: [5:0.50]
-; SANDY-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vpmovsxbw (%rdi), %xmm1 # sched: [7:0.50]
+; SANDY-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pmovsxbw:
 ; HASWELL:       # BB#0:
@@ -1162,9 +1162,9 @@ define <4 x i32> @test_pmovsxbd(<16 x i8
 ; SANDY-LABEL: test_pmovsxbd:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vpmovsxbd %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    vpmovsxbd (%rdi), %xmm1 # sched: [5:0.50]
+; SANDY-NEXT:    vpmovsxbd (%rdi), %xmm1 # sched: [7:0.50]
 ; SANDY-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pmovsxbd:
 ; HASWELL:       # BB#0:
@@ -1206,9 +1206,9 @@ define <2 x i64> @test_pmovsxbq(<16 x i8
 ; SANDY-LABEL: test_pmovsxbq:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vpmovsxbq %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    vpmovsxbq (%rdi), %xmm1 # sched: [5:0.50]
+; SANDY-NEXT:    vpmovsxbq (%rdi), %xmm1 # sched: [7:0.50]
 ; SANDY-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pmovsxbq:
 ; HASWELL:       # BB#0:
@@ -1250,9 +1250,9 @@ define <2 x i64> @test_pmovsxdq(<4 x i32
 ; SANDY-LABEL: test_pmovsxdq:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vpmovsxdq %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    vpmovsxdq (%rdi), %xmm1 # sched: [5:0.50]
+; SANDY-NEXT:    vpmovsxdq (%rdi), %xmm1 # sched: [7:0.50]
 ; SANDY-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pmovsxdq:
 ; HASWELL:       # BB#0:
@@ -1294,9 +1294,9 @@ define <4 x i32> @test_pmovsxwd(<8 x i16
 ; SANDY-LABEL: test_pmovsxwd:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vpmovsxwd %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    vpmovsxwd (%rdi), %xmm1 # sched: [5:0.50]
+; SANDY-NEXT:    vpmovsxwd (%rdi), %xmm1 # sched: [7:0.50]
 ; SANDY-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pmovsxwd:
 ; HASWELL:       # BB#0:
@@ -1338,9 +1338,9 @@ define <2 x i64> @test_pmovsxwq(<8 x i16
 ; SANDY-LABEL: test_pmovsxwq:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vpmovsxwq %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    vpmovsxwq (%rdi), %xmm1 # sched: [5:0.50]
+; SANDY-NEXT:    vpmovsxwq (%rdi), %xmm1 # sched: [7:0.50]
 ; SANDY-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pmovsxwq:
 ; HASWELL:       # BB#0:
@@ -1382,9 +1382,9 @@ define <8 x i16> @test_pmovzxbw(<16 x i8
 ; SANDY-LABEL: test_pmovzxbw:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:0.50]
-; SANDY-NEXT:    vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [5:0.50]
-; SANDY-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [7:0.50]
+; SANDY-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pmovzxbw:
 ; HASWELL:       # BB#0:
@@ -1426,9 +1426,9 @@ define <4 x i32> @test_pmovzxbd(<16 x i8
 ; SANDY-LABEL: test_pmovzxbd:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:0.50]
-; SANDY-NEXT:    vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [5:0.50]
+; SANDY-NEXT:    vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [7:0.50]
 ; SANDY-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pmovzxbd:
 ; HASWELL:       # BB#0:
@@ -1470,9 +1470,9 @@ define <2 x i64> @test_pmovzxbq(<16 x i8
 ; SANDY-LABEL: test_pmovzxbq:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:0.50]
-; SANDY-NEXT:    vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [5:0.50]
+; SANDY-NEXT:    vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [7:0.50]
 ; SANDY-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pmovzxbq:
 ; HASWELL:       # BB#0:
@@ -1514,9 +1514,9 @@ define <2 x i64> @test_pmovzxdq(<4 x i32
 ; SANDY-LABEL: test_pmovzxdq:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero sched: [1:0.50]
-; SANDY-NEXT:    vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero sched: [5:0.50]
+; SANDY-NEXT:    vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero sched: [7:0.50]
 ; SANDY-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pmovzxdq:
 ; HASWELL:       # BB#0:
@@ -1558,9 +1558,9 @@ define <4 x i32> @test_pmovzxwd(<8 x i16
 ; SANDY-LABEL: test_pmovzxwd:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:0.50]
-; SANDY-NEXT:    vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [5:0.50]
+; SANDY-NEXT:    vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [7:0.50]
 ; SANDY-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pmovzxwd:
 ; HASWELL:       # BB#0:
@@ -1602,9 +1602,9 @@ define <2 x i64> @test_pmovzxwq(<8 x i16
 ; SANDY-LABEL: test_pmovzxwq:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:0.50]
-; SANDY-NEXT:    vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [5:0.50]
+; SANDY-NEXT:    vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [7:0.50]
 ; SANDY-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pmovzxwq:
 ; HASWELL:       # BB#0:
@@ -1642,9 +1642,9 @@ define <2 x i64> @test_pmuldq(<4 x i32>
 ;
 ; SANDY-LABEL: test_pmuldq:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vpmuldq %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
+; SANDY-NEXT:    vpmuldq %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; SANDY-NEXT:    vpmuldq (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pmuldq:
 ; HASWELL:       # BB#0:
@@ -1680,9 +1680,9 @@ define <4 x i32> @test_pmulld(<4 x i32>
 ;
 ; SANDY-LABEL: test_pmulld:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vpmulld %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
+; SANDY-NEXT:    vpmulld %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; SANDY-NEXT:    vpmulld (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pmulld:
 ; HASWELL:       # BB#0:
@@ -1724,13 +1724,13 @@ define i32 @test_ptest(<2 x i64> %a0, <2
 ;
 ; SANDY-LABEL: test_ptest:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vptest %xmm1, %xmm0 # sched: [1:0.33]
-; SANDY-NEXT:    setb %al # sched: [1:0.33]
-; SANDY-NEXT:    vptest (%rdi), %xmm0 # sched: [5:0.50]
-; SANDY-NEXT:    setb %cl # sched: [1:0.33]
+; SANDY-NEXT:    vptest %xmm1, %xmm0 # sched: [2:1.00]
+; SANDY-NEXT:    setb %al # sched: [1:1.00]
+; SANDY-NEXT:    vptest (%rdi), %xmm0 # sched: [8:1.00]
+; SANDY-NEXT:    setb %cl # sched: [1:1.00]
 ; SANDY-NEXT:    andb %al, %cl # sched: [1:0.33]
 ; SANDY-NEXT:    movzbl %cl, %eax # sched: [1:0.33]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_ptest:
 ; HASWELL:       # BB#0:
@@ -1778,9 +1778,9 @@ define <2 x double> @test_roundpd(<2 x d
 ; SANDY-LABEL: test_roundpd:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vroundpd $7, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    vroundpd $7, (%rdi), %xmm1 # sched: [7:1.00]
+; SANDY-NEXT:    vroundpd $7, (%rdi), %xmm1 # sched: [9:1.00]
 ; SANDY-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_roundpd:
 ; HASWELL:       # BB#0:
@@ -1822,9 +1822,9 @@ define <4 x float> @test_roundps(<4 x fl
 ; SANDY-LABEL: test_roundps:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vroundps $7, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    vroundps $7, (%rdi), %xmm1 # sched: [7:1.00]
+; SANDY-NEXT:    vroundps $7, (%rdi), %xmm1 # sched: [9:1.00]
 ; SANDY-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_roundps:
 ; HASWELL:       # BB#0:
@@ -1867,9 +1867,9 @@ define <2 x double> @test_roundsd(<2 x d
 ; SANDY-LABEL: test_roundsd:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
-; SANDY-NEXT:    vroundsd $7, (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; SANDY-NEXT:    vroundsd $7, (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
 ; SANDY-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_roundsd:
 ; HASWELL:       # BB#0:
@@ -1912,9 +1912,9 @@ define <4 x float> @test_roundss(<4 x fl
 ; SANDY-LABEL: test_roundss:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
-; SANDY-NEXT:    vroundss $7, (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
+; SANDY-NEXT:    vroundss $7, (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
 ; SANDY-NEXT:    vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_roundss:
 ; HASWELL:       # BB#0:

Modified: llvm/trunk/test/CodeGen/X86/sse42-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse42-schedule.ll?rev=307529&r1=307528&r2=307529&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse42-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse42-schedule.ll Mon Jul 10 02:53:16 2017
@@ -26,9 +26,9 @@ define i32 @crc32_32_8(i32 %a0, i8 %a1,
 ; SANDY-LABEL: crc32_32_8:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    crc32b %sil, %edi # sched: [3:1.00]
-; SANDY-NEXT:    crc32b (%rdx), %edi # sched: [7:1.00]
+; SANDY-NEXT:    crc32b (%rdx), %edi # sched: [8:1.00]
 ; SANDY-NEXT:    movl %edi, %eax # sched: [1:0.33]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: crc32_32_8:
 ; HASWELL:       # BB#0:
@@ -68,9 +68,9 @@ define i32 @crc32_32_16(i32 %a0, i16 %a1
 ; SANDY-LABEL: crc32_32_16:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    crc32w %si, %edi # sched: [3:1.00]
-; SANDY-NEXT:    crc32w (%rdx), %edi # sched: [7:1.00]
+; SANDY-NEXT:    crc32w (%rdx), %edi # sched: [8:1.00]
 ; SANDY-NEXT:    movl %edi, %eax # sched: [1:0.33]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: crc32_32_16:
 ; HASWELL:       # BB#0:
@@ -112,7 +112,7 @@ define i32 @crc32_32_32(i32 %a0, i32 %a1
 ; SANDY-NEXT:    crc32l %esi, %edi # sched: [3:1.00]
 ; SANDY-NEXT:    crc32l (%rdx), %edi # sched: [7:1.00]
 ; SANDY-NEXT:    movl %edi, %eax # sched: [1:0.33]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: crc32_32_32:
 ; HASWELL:       # BB#0:
@@ -152,9 +152,9 @@ define i64 @crc32_64_8(i64 %a0, i8 %a1,
 ; SANDY-LABEL: crc32_64_8:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    crc32b %sil, %edi # sched: [3:1.00]
-; SANDY-NEXT:    crc32b (%rdx), %edi # sched: [7:1.00]
+; SANDY-NEXT:    crc32b (%rdx), %edi # sched: [8:1.00]
 ; SANDY-NEXT:    movq %rdi, %rax # sched: [1:0.33]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: crc32_64_8:
 ; HASWELL:       # BB#0:
@@ -196,7 +196,7 @@ define i64 @crc32_64_64(i64 %a0, i64 %a1
 ; SANDY-NEXT:    crc32q %rsi, %rdi # sched: [3:1.00]
 ; SANDY-NEXT:    crc32q (%rdx), %rdi # sched: [7:1.00]
 ; SANDY-NEXT:    movq %rdi, %rax # sched: [1:0.33]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: crc32_64_64:
 ; HASWELL:       # BB#0:
@@ -256,7 +256,7 @@ define i32 @test_pcmpestri(<16 x i8> %a0
 ; SANDY-NEXT:    vpcmpestri $7, (%rdi), %xmm0 # sched: [4:2.33]
 ; SANDY-NEXT:    # kill: %ECX<def> %ECX<kill> %RCX<def>
 ; SANDY-NEXT:    leal (%rcx,%rsi), %eax # sched: [1:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pcmpestri:
 ; HASWELL:       # BB#0:
@@ -320,7 +320,7 @@ define <16 x i8> @test_pcmpestrm(<16 x i
 ; SANDY-NEXT:    movl $7, %eax # sched: [1:0.33]
 ; SANDY-NEXT:    movl $7, %edx # sched: [1:0.33]
 ; SANDY-NEXT:    vpcmpestrm $7, (%rdi), %xmm0 # sched: [11:2.33]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pcmpestrm:
 ; HASWELL:       # BB#0:
@@ -369,12 +369,12 @@ define i32 @test_pcmpistri(<16 x i8> %a0
 ;
 ; SANDY-LABEL: test_pcmpistri:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vpcmpistri $7, %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT:    vpcmpistri $7, %xmm1, %xmm0 # sched: [11:3.00]
 ; SANDY-NEXT:    movl %ecx, %eax # sched: [1:0.33]
-; SANDY-NEXT:    vpcmpistri $7, (%rdi), %xmm0 # sched: [3:1.00]
+; SANDY-NEXT:    vpcmpistri $7, (%rdi), %xmm0 # sched: [17:3.00]
 ; SANDY-NEXT:    # kill: %ECX<def> %ECX<kill> %RCX<def>
 ; SANDY-NEXT:    leal (%rcx,%rax), %eax # sched: [1:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pcmpistri:
 ; HASWELL:       # BB#0:
@@ -416,9 +416,9 @@ define <16 x i8> @test_pcmpistrm(<16 x i
 ;
 ; SANDY-LABEL: test_pcmpistrm:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vpcmpistrm $7, %xmm1, %xmm0 # sched: [11:1.00]
-; SANDY-NEXT:    vpcmpistrm $7, (%rdi), %xmm0 # sched: [11:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vpcmpistrm $7, %xmm1, %xmm0 # sched: [11:3.00]
+; SANDY-NEXT:    vpcmpistrm $7, (%rdi), %xmm0 # sched: [17:3.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pcmpistrm:
 ; HASWELL:       # BB#0:
@@ -453,9 +453,9 @@ define <2 x i64> @test_pcmpgtq(<2 x i64>
 ;
 ; SANDY-LABEL: test_pcmpgtq:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    vpcmpgtq (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
+; SANDY-NEXT:    vpcmpgtq (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pcmpgtq:
 ; HASWELL:       # BB#0:

Modified: llvm/trunk/test/CodeGen/X86/ssse3-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/ssse3-schedule.ll?rev=307529&r1=307528&r2=307529&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/ssse3-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/ssse3-schedule.ll Mon Jul 10 02:53:16 2017
@@ -35,9 +35,9 @@ define <16 x i8> @test_pabsb(<16 x i8> %
 ; SANDY-LABEL: test_pabsb:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vpabsb %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    vpabsb (%rdi), %xmm1 # sched: [5:0.50]
+; SANDY-NEXT:    vpabsb (%rdi), %xmm1 # sched: [7:0.50]
 ; SANDY-NEXT:    vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pabsb:
 ; HASWELL:       # BB#0:
@@ -86,9 +86,9 @@ define <4 x i32> @test_pabsd(<4 x i32> %
 ; SANDY-LABEL: test_pabsd:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vpabsd %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    vpabsd (%rdi), %xmm1 # sched: [5:0.50]
+; SANDY-NEXT:    vpabsd (%rdi), %xmm1 # sched: [7:0.50]
 ; SANDY-NEXT:    vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pabsd:
 ; HASWELL:       # BB#0:
@@ -136,7 +136,7 @@ define <8 x i16> @test_pabsw(<8 x i16> %
 ; SANDY-LABEL: test_pabsw:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vpabsw %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pabsw:
 ; HASWELL:       # BB#0:
@@ -182,8 +182,8 @@ define <8 x i16> @test_palignr(<8 x i16>
 ; SANDY-LABEL: test_palignr:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:0.50]
-; SANDY-NEXT:    vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [5:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [7:0.50]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_palignr:
 ; HASWELL:       # BB#0:
@@ -223,9 +223,9 @@ define <4 x i32> @test_phaddd(<4 x i32>
 ;
 ; SANDY-LABEL: test_phaddd:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vphaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    vphaddd (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vphaddd %xmm1, %xmm0, %xmm0 # sched: [3:1.50]
+; SANDY-NEXT:    vphaddd (%rdi), %xmm0, %xmm0 # sched: [9:1.50]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_phaddd:
 ; HASWELL:       # BB#0:
@@ -274,9 +274,9 @@ define <8 x i16> @test_phaddsw(<8 x i16>
 ;
 ; SANDY-LABEL: test_phaddsw:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vphaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    vphaddsw (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vphaddsw %xmm1, %xmm0, %xmm0 # sched: [3:1.50]
+; SANDY-NEXT:    vphaddsw (%rdi), %xmm0, %xmm0 # sched: [9:1.50]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_phaddsw:
 ; HASWELL:       # BB#0:
@@ -317,9 +317,9 @@ define <8 x i16> @test_phaddw(<8 x i16>
 ;
 ; SANDY-LABEL: test_phaddw:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vphaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    vphaddw (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vphaddw %xmm1, %xmm0, %xmm0 # sched: [3:1.50]
+; SANDY-NEXT:    vphaddw (%rdi), %xmm0, %xmm0 # sched: [9:1.50]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_phaddw:
 ; HASWELL:       # BB#0:
@@ -360,9 +360,9 @@ define <4 x i32> @test_phsubd(<4 x i32>
 ;
 ; SANDY-LABEL: test_phsubd:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vphsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    vphsubd (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vphsubd %xmm1, %xmm0, %xmm0 # sched: [3:1.50]
+; SANDY-NEXT:    vphsubd (%rdi), %xmm0, %xmm0 # sched: [9:1.50]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_phsubd:
 ; HASWELL:       # BB#0:
@@ -411,9 +411,9 @@ define <8 x i16> @test_phsubsw(<8 x i16>
 ;
 ; SANDY-LABEL: test_phsubsw:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vphsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    vphsubsw (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vphsubsw %xmm1, %xmm0, %xmm0 # sched: [3:1.50]
+; SANDY-NEXT:    vphsubsw (%rdi), %xmm0, %xmm0 # sched: [9:1.50]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_phsubsw:
 ; HASWELL:       # BB#0:
@@ -454,9 +454,9 @@ define <8 x i16> @test_phsubw(<8 x i16>
 ;
 ; SANDY-LABEL: test_phsubw:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vphsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    vphsubw (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vphsubw %xmm1, %xmm0, %xmm0 # sched: [3:1.50]
+; SANDY-NEXT:    vphsubw (%rdi), %xmm0, %xmm0 # sched: [9:1.50]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_phsubw:
 ; HASWELL:       # BB#0:
@@ -497,9 +497,9 @@ define <8 x i16> @test_pmaddubsw(<16 x i
 ;
 ; SANDY-LABEL: test_pmaddubsw:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
+; SANDY-NEXT:    vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; SANDY-NEXT:    vpmaddubsw (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pmaddubsw:
 ; HASWELL:       # BB#0:
@@ -538,8 +538,8 @@ define <8 x i16> @test_pmulhrsw(<8 x i16
 ;
 ; SANDY-LABEL: test_pmulhrsw:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pmulhrsw:
 ; HASWELL:       # BB#0:
@@ -579,8 +579,8 @@ define <16 x i8> @test_pshufb(<16 x i8>
 ; SANDY-LABEL: test_pshufb:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vpshufb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    vpshufb (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vpshufb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pshufb:
 ; HASWELL:       # BB#0:
@@ -630,8 +630,8 @@ define <16 x i8> @test_psignb(<16 x i8>
 ; SANDY-LABEL: test_psignb:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vpsignb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    vpsignb (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vpsignb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_psignb:
 ; HASWELL:       # BB#0:
@@ -681,8 +681,8 @@ define <4 x i32> @test_psignd(<4 x i32>
 ; SANDY-LABEL: test_psignd:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vpsignd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    vpsignd (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vpsignd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_psignd:
 ; HASWELL:       # BB#0:
@@ -732,8 +732,8 @@ define <8 x i16> @test_psignw(<8 x i16>
 ; SANDY-LABEL: test_psignw:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vpsignw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT:    vpsignw (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
-; SANDY-NEXT:    retq # sched: [5:1.00]
+; SANDY-NEXT:    vpsignw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_psignw:
 ; HASWELL:       # BB#0:




More information about the llvm-commits mailing list