[llvm] r310792 - [X86][SandyBridge] Additional updates to the SNB instructions scheduling information

Gadi Haber via llvm-commits llvm-commits at lists.llvm.org
Sun Aug 13 06:59:24 PDT 2017


Author: gadi.haber
Date: Sun Aug 13 06:59:24 2017
New Revision: 310792

URL: http://llvm.org/viewvc/llvm-project?rev=310792&view=rev
Log:
[X86][SandyBridge] Additional updates to the SNB instructions scheduling information

This is a continuation patch for commit r307529 which completely replaces the scheduling information for the SandyBridge architecture target by modifying the file X86SchedSandyBridge.td located under the X86 Target (see also https://reviews.llvm.org/D35019).

In this patch we added the scheduling information of additional SNB instructions that were missing from the patch commit r307529, fixed the scheduling of several resource groups that include only port0 instead of port05 (i.e., port0 OR port5) and fixed several incorrect instructions' scheduling in the r307529 commit.

The patch also includes the X87 instructions which were missing in previous patch commit r307529 as reported in bugzilla bug 34080.

Reviewers: zvi, RKSimon, chandlerc, igorb, m_zuckerman, craig.topper, aymanmus, dim

Differential Revision: https://reviews.llvm.org/D36388



Modified:
    llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td
    llvm/trunk/test/CodeGen/X86/avx-schedule.ll
    llvm/trunk/test/CodeGen/X86/bmi-schedule.ll
    llvm/trunk/test/CodeGen/X86/popcnt-schedule.ll
    llvm/trunk/test/CodeGen/X86/pr34080.ll
    llvm/trunk/test/CodeGen/X86/recip-fastmath.ll
    llvm/trunk/test/CodeGen/X86/recip-fastmath2.ll
    llvm/trunk/test/CodeGen/X86/sse-schedule.ll
    llvm/trunk/test/CodeGen/X86/sse2-schedule.ll
    llvm/trunk/test/CodeGen/X86/sse41-schedule.ll
    llvm/trunk/test/CodeGen/X86/sse42-schedule.ll
    llvm/trunk/test/CodeGen/X86/x86-cmov-converter.ll

Modified: llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td?rev=310792&r1=310791&r2=310792&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td Sun Aug 13 06:59:24 2017
@@ -289,28 +289,28 @@ def SBWriteResGroup0 : SchedWriteRes<[SB
   let NumMicroOps = 1;
   let ResourceCycles = [1];
 }
-def: InstRW<[SBWriteResGroup0], (instregex "CVTSS2SDrr")>;
-def: InstRW<[SBWriteResGroup0], (instregex "PSLLDri")>;
-def: InstRW<[SBWriteResGroup0], (instregex "PSLLQri")>;
-def: InstRW<[SBWriteResGroup0], (instregex "PSLLWri")>;
-def: InstRW<[SBWriteResGroup0], (instregex "PSRADri")>;
-def: InstRW<[SBWriteResGroup0], (instregex "PSRAWri")>;
-def: InstRW<[SBWriteResGroup0], (instregex "PSRLDri")>;
-def: InstRW<[SBWriteResGroup0], (instregex "PSRLQri")>;
-def: InstRW<[SBWriteResGroup0], (instregex "PSRLWri")>;
-def: InstRW<[SBWriteResGroup0], (instregex "VCVTSS2SDrr")>;
-def: InstRW<[SBWriteResGroup0], (instregex "VPMOVMSKBrr")>;
-def: InstRW<[SBWriteResGroup0], (instregex "VPSLLDri")>;
-def: InstRW<[SBWriteResGroup0], (instregex "VPSLLQri")>;
-def: InstRW<[SBWriteResGroup0], (instregex "VPSLLWri")>;
-def: InstRW<[SBWriteResGroup0], (instregex "VPSRADri")>;
-def: InstRW<[SBWriteResGroup0], (instregex "VPSRAWri")>;
-def: InstRW<[SBWriteResGroup0], (instregex "VPSRLDri")>;
-def: InstRW<[SBWriteResGroup0], (instregex "VPSRLQri")>;
-def: InstRW<[SBWriteResGroup0], (instregex "VPSRLWri")>;
-def: InstRW<[SBWriteResGroup0], (instregex "VTESTPDYrr")>;
-def: InstRW<[SBWriteResGroup0], (instregex "VTESTPDrr")>;
-def: InstRW<[SBWriteResGroup0], (instregex "VTESTPSYrr")>;
+def: InstRW<[SBWriteResGroup0], (instregex "CVTSS2SDrr")>;
+def: InstRW<[SBWriteResGroup0], (instregex "PSLLDri")>;
+def: InstRW<[SBWriteResGroup0], (instregex "PSLLQri")>;
+def: InstRW<[SBWriteResGroup0], (instregex "PSLLWri")>;
+def: InstRW<[SBWriteResGroup0], (instregex "PSRADri")>;
+def: InstRW<[SBWriteResGroup0], (instregex "PSRAWri")>;
+def: InstRW<[SBWriteResGroup0], (instregex "PSRLDri")>;
+def: InstRW<[SBWriteResGroup0], (instregex "PSRLQri")>;
+def: InstRW<[SBWriteResGroup0], (instregex "PSRLWri")>;
+def: InstRW<[SBWriteResGroup0], (instregex "VCVTSS2SDrr")>;
+def: InstRW<[SBWriteResGroup0], (instregex "VPMOVMSKBrr")>;
+def: InstRW<[SBWriteResGroup0], (instregex "VPSLLDri")>;
+def: InstRW<[SBWriteResGroup0], (instregex "VPSLLQri")>;
+def: InstRW<[SBWriteResGroup0], (instregex "VPSLLWri")>;
+def: InstRW<[SBWriteResGroup0], (instregex "VPSRADri")>;
+def: InstRW<[SBWriteResGroup0], (instregex "VPSRAWri")>;
+def: InstRW<[SBWriteResGroup0], (instregex "VPSRLDri")>;
+def: InstRW<[SBWriteResGroup0], (instregex "VPSRLQri")>;
+def: InstRW<[SBWriteResGroup0], (instregex "VPSRLWri")>;
+def: InstRW<[SBWriteResGroup0], (instregex "VTESTPDYrr")>;
+def: InstRW<[SBWriteResGroup0], (instregex "VTESTPDrr")>;
+def: InstRW<[SBWriteResGroup0], (instregex "VTESTPSYrr")>;
 def: InstRW<[SBWriteResGroup0], (instregex "VTESTPSrr")>;
 
 def SBWriteResGroup1 : SchedWriteRes<[SBPort1]> {
@@ -328,95 +328,139 @@ def SBWriteResGroup2 : SchedWriteRes<[SB
   let NumMicroOps = 1;
   let ResourceCycles = [1];
 }
-def: InstRW<[SBWriteResGroup2], (instregex "ANDNPDrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "ANDNPSrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "ANDPDrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "ANDPSrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "FDECSTP")>;
-def: InstRW<[SBWriteResGroup2], (instregex "FFREE")>;
-def: InstRW<[SBWriteResGroup2], (instregex "FINCSTP")>;
-def: InstRW<[SBWriteResGroup2], (instregex "FNOP")>;
-def: InstRW<[SBWriteResGroup2], (instregex "INSERTPSrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "JMP64r")>;
-def: InstRW<[SBWriteResGroup2], (instregex "LD_Frr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "MOV64toPQIrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "MOVAPDrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "MOVAPSrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "MOVDDUPrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "MOVDI2PDIrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "MOVHLPSrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "MOVLHPSrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "MOVSDrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "MOVSHDUPrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "MOVSLDUPrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "MOVSSrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "MOVUPDrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "MOVUPSrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "ORPDrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "ORPSrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "RETQ")>;
-def: InstRW<[SBWriteResGroup2], (instregex "SHUFPDrri")>;
-def: InstRW<[SBWriteResGroup2], (instregex "SHUFPSrri")>;
-def: InstRW<[SBWriteResGroup2], (instregex "ST_FPrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "ST_Frr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "UNPCKHPDrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "UNPCKHPSrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "UNPCKLPDrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "UNPCKLPSrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VANDNPDYrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VANDNPDrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VANDNPSYrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VANDNPSrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VANDPDrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VANDPDrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VANDPSrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VEXTRACTF128rr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VINSERTF128rr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VINSERTPSrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VMOV64toPQIrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VMOV64toPQIrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VMOVAPDYrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VMOVAPDrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VMOVAPSYrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VMOVAPSrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VMOVDDUPYrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VMOVDDUPrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VMOVHLPSrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VMOVHLPSrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VMOVSDrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VMOVSHDUPYrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VMOVSHDUPrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VMOVSLDUPYrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VMOVSLDUPrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VMOVSSrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VMOVUPDYrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VMOVUPDrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VMOVUPSYrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VMOVUPSrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VORPDYrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VORPDrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VORPSYrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VORPSrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VPERMILPDri")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VPERMILPDrm")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VPERMILPDrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VPERMILPSri")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VPERMILPSrm")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VPERMILPSrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VPERMILPSrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VSHUFPDYrri")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VSHUFPDrri")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VSHUFPSYrri")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VSHUFPSrri")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VUNPCKHPDrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VUNPCKHPSrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VUNPCKLPDYrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VUNPCKLPDrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VUNPCKLPSYrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VUNPCKLPSrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VXORPDrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "VXORPSrr")>;
-def: InstRW<[SBWriteResGroup2], (instregex "XORPDrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "ANDNPDrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "ANDNPSrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "ANDPDrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "ANDPSrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "FDECSTP")>;
+def: InstRW<[SBWriteResGroup2], (instregex "FFREE")>;
+def: InstRW<[SBWriteResGroup2], (instregex "FINCSTP")>;
+def: InstRW<[SBWriteResGroup2], (instregex "FNOP")>;
+def: InstRW<[SBWriteResGroup2], (instregex "INSERTPSrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "JAE_1")>;
+def: InstRW<[SBWriteResGroup2], (instregex "JAE_4")>;
+def: InstRW<[SBWriteResGroup2], (instregex "JA_1")>;
+def: InstRW<[SBWriteResGroup2], (instregex "JA_4")>;
+def: InstRW<[SBWriteResGroup2], (instregex "JBE_1")>;
+def: InstRW<[SBWriteResGroup2], (instregex "JBE_4")>;
+def: InstRW<[SBWriteResGroup2], (instregex "JB_1")>;
+def: InstRW<[SBWriteResGroup2], (instregex "JB_4")>;
+def: InstRW<[SBWriteResGroup2], (instregex "JE_1")>;
+def: InstRW<[SBWriteResGroup2], (instregex "JE_4")>;
+def: InstRW<[SBWriteResGroup2], (instregex "JGE_1")>;
+def: InstRW<[SBWriteResGroup2], (instregex "JGE_4")>;
+def: InstRW<[SBWriteResGroup2], (instregex "JG_1")>;
+def: InstRW<[SBWriteResGroup2], (instregex "JG_4")>;
+def: InstRW<[SBWriteResGroup2], (instregex "JLE_1")>;
+def: InstRW<[SBWriteResGroup2], (instregex "JLE_4")>;
+def: InstRW<[SBWriteResGroup2], (instregex "JL_1")>;
+def: InstRW<[SBWriteResGroup2], (instregex "JL_4")>;
+def: InstRW<[SBWriteResGroup2], (instregex "JMP64r")>;
+def: InstRW<[SBWriteResGroup2], (instregex "JMP_1")>;
+def: InstRW<[SBWriteResGroup2], (instregex "JMP_4")>;
+def: InstRW<[SBWriteResGroup2], (instregex "JNE_1")>;
+def: InstRW<[SBWriteResGroup2], (instregex "JNE_4")>;
+def: InstRW<[SBWriteResGroup2], (instregex "JNO_1")>;
+def: InstRW<[SBWriteResGroup2], (instregex "JNO_4")>;
+def: InstRW<[SBWriteResGroup2], (instregex "JNP_1")>;
+def: InstRW<[SBWriteResGroup2], (instregex "JNP_4")>;
+def: InstRW<[SBWriteResGroup2], (instregex "JNS_1")>;
+def: InstRW<[SBWriteResGroup2], (instregex "JNS_4")>;
+def: InstRW<[SBWriteResGroup2], (instregex "JO_1")>;
+def: InstRW<[SBWriteResGroup2], (instregex "JO_4")>;
+def: InstRW<[SBWriteResGroup2], (instregex "JP_1")>;
+def: InstRW<[SBWriteResGroup2], (instregex "JP_4")>;
+def: InstRW<[SBWriteResGroup2], (instregex "JS_1")>;
+def: InstRW<[SBWriteResGroup2], (instregex "JS_4")>;
+def: InstRW<[SBWriteResGroup2], (instregex "LD_Frr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "LOOP")>;
+def: InstRW<[SBWriteResGroup2], (instregex "LOOPE")>;
+def: InstRW<[SBWriteResGroup2], (instregex "LOOPNE")>;
+def: InstRW<[SBWriteResGroup2], (instregex "MOV64toPQIrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "MOVAPDrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "MOVAPSrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "MOVDDUPrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "MOVDI2PDIrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "MOVHLPSrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "MOVLHPSrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "MOVSDrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "MOVSHDUPrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "MOVSLDUPrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "MOVSSrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "MOVUPDrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "MOVUPSrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "ORPDrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "ORPSrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "RETQ")>;
+def: InstRW<[SBWriteResGroup2], (instregex "SHUFPDrri")>;
+def: InstRW<[SBWriteResGroup2], (instregex "SHUFPSrri")>;
+def: InstRW<[SBWriteResGroup2], (instregex "ST_FPrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "ST_Frr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "UNPCKHPDrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "UNPCKHPSrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "UNPCKLPDrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "UNPCKLPSrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VANDNPDYrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VANDNPDrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VANDNPSYrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VANDNPSrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VANDPDYrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VANDPDrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VANDPSYrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VANDPSrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VEXTRACTF128rr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VINSERTF128rr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VINSERTPSrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VMOV64toPQIrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VMOVAPDYrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VMOVAPDrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VMOVAPSYrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VMOVAPSrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VMOVDDUPYrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VMOVDDUPrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VMOVDI2PDIrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VMOVHLPSrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VMOVHLPSrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VMOVSDrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VMOVSHDUPYrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VMOVSHDUPrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VMOVSLDUPYrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VMOVSLDUPrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VMOVSSrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VMOVUPDYrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VMOVUPDrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VMOVUPSYrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VMOVUPSrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VORPDYrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VORPDrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VORPSYrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VORPSrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VPERM2F128rr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VPERMILPDYri")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VPERMILPDYrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VPERMILPDri")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VPERMILPDrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VPERMILPSYri")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VPERMILPSYrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VPERMILPSri")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VPERMILPSrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VSHUFPDYrri")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VSHUFPDrri")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VSHUFPSYrri")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VSHUFPSrri")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VUNPCKHPDYrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VUNPCKHPDrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VUNPCKHPSYrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VUNPCKHPSrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VUNPCKLPDYrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VUNPCKLPDrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VUNPCKLPSYrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VUNPCKLPSrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VXORPDYrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VXORPDrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VXORPSYrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "VXORPSrr")>;
+def: InstRW<[SBWriteResGroup2], (instregex "XORPDrr")>;
 def: InstRW<[SBWriteResGroup2], (instregex "XORPSrr")>;
 
 def SBWriteResGroup3 : SchedWriteRes<[SBPort01]> {
@@ -424,56 +468,56 @@ def SBWriteResGroup3 : SchedWriteRes<[SB
   let NumMicroOps = 1;
   let ResourceCycles = [1];
 }
-def: InstRW<[SBWriteResGroup3], (instregex "LEA64_32r")>;
+def: InstRW<[SBWriteResGroup3], (instregex "LEA(16|32|64)r")>;
 
-def SBWriteResGroup4 : SchedWriteRes<[SBPort0]> {
+def SBWriteResGroup4 : SchedWriteRes<[SBPort05]> {
   let Latency = 1;
   let NumMicroOps = 1;
   let ResourceCycles = [1];
 }
-def: InstRW<[SBWriteResGroup4], (instregex "BLENDPDrri")>;
-def: InstRW<[SBWriteResGroup4], (instregex "BLENDPSrri")>;
-def: InstRW<[SBWriteResGroup4], (instregex "BT32ri8")>;
-def: InstRW<[SBWriteResGroup4], (instregex "BT32rr")>;
-def: InstRW<[SBWriteResGroup4], (instregex "BTC32ri8")>;
-def: InstRW<[SBWriteResGroup4], (instregex "BTC32rr")>;
-def: InstRW<[SBWriteResGroup4], (instregex "BTR32ri8")>;
-def: InstRW<[SBWriteResGroup4], (instregex "BTR32rr")>;
-def: InstRW<[SBWriteResGroup4], (instregex "BTS32ri8")>;
-def: InstRW<[SBWriteResGroup4], (instregex "BTS32rr")>;
-def: InstRW<[SBWriteResGroup4], (instregex "CDQ")>;
-def: InstRW<[SBWriteResGroup4], (instregex "CQO")>;
-def: InstRW<[SBWriteResGroup4], (instregex "LAHF")>;
-def: InstRW<[SBWriteResGroup4], (instregex "SAHF")>;
-def: InstRW<[SBWriteResGroup4], (instregex "SAR32ri")>;
-def: InstRW<[SBWriteResGroup4], (instregex "SAR8ri")>;
-def: InstRW<[SBWriteResGroup4], (instregex "SETAEr")>;
-def: InstRW<[SBWriteResGroup4], (instregex "SETBr")>;
-def: InstRW<[SBWriteResGroup4], (instregex "SETEr")>;
-def: InstRW<[SBWriteResGroup4], (instregex "SETGEr")>;
-def: InstRW<[SBWriteResGroup4], (instregex "SETGr")>;
-def: InstRW<[SBWriteResGroup4], (instregex "SETLEr")>;
-def: InstRW<[SBWriteResGroup4], (instregex "SETLr")>;
-def: InstRW<[SBWriteResGroup4], (instregex "SETNEr")>;
-def: InstRW<[SBWriteResGroup4], (instregex "SETNOr")>;
-def: InstRW<[SBWriteResGroup4], (instregex "SETNPr")>;
-def: InstRW<[SBWriteResGroup4], (instregex "SETNSr")>;
-def: InstRW<[SBWriteResGroup4], (instregex "SETOr")>;
-def: InstRW<[SBWriteResGroup4], (instregex "SETPr")>;
-def: InstRW<[SBWriteResGroup4], (instregex "SETSr")>;
-def: InstRW<[SBWriteResGroup4], (instregex "SHL32ri")>;
-def: InstRW<[SBWriteResGroup4], (instregex "SHL64r1")>;
-def: InstRW<[SBWriteResGroup4], (instregex "SHL8r1")>;
-def: InstRW<[SBWriteResGroup4], (instregex "SHL8ri")>;
-def: InstRW<[SBWriteResGroup4], (instregex "SHR32ri")>;
-def: InstRW<[SBWriteResGroup4], (instregex "SHR8ri")>;
-def: InstRW<[SBWriteResGroup4], (instregex "VBLENDPDYrri")>;
-def: InstRW<[SBWriteResGroup4], (instregex "VBLENDPDrri")>;
-def: InstRW<[SBWriteResGroup4], (instregex "VBLENDPSYrri")>;
-def: InstRW<[SBWriteResGroup4], (instregex "VBLENDPSrri")>;
-def: InstRW<[SBWriteResGroup4], (instregex "VMOVDQAYrr")>;
-def: InstRW<[SBWriteResGroup4], (instregex "VMOVDQArr")>;
-def: InstRW<[SBWriteResGroup4], (instregex "VMOVDQUYrr")>;
+def: InstRW<[SBWriteResGroup4], (instregex "BLENDPDrri")>;
+def: InstRW<[SBWriteResGroup4], (instregex "BLENDPSrri")>;
+def: InstRW<[SBWriteResGroup4], (instregex "BT(16|32|64)ri8")>;
+def: InstRW<[SBWriteResGroup4], (instregex "BT(16|32|64)rr")>;
+def: InstRW<[SBWriteResGroup4], (instregex "BTC(16|32|64)ri8")>;
+def: InstRW<[SBWriteResGroup4], (instregex "BTC(16|32|64)rr")>;
+def: InstRW<[SBWriteResGroup4], (instregex "BTR(16|32|64)ri8")>;
+def: InstRW<[SBWriteResGroup4], (instregex "BTR(16|32|64)rr")>;
+def: InstRW<[SBWriteResGroup4], (instregex "BTS(16|32|64)ri8")>;
+def: InstRW<[SBWriteResGroup4], (instregex "BTS(16|32|64)rr")>;
+def: InstRW<[SBWriteResGroup4], (instregex "CDQ")>;
+def: InstRW<[SBWriteResGroup4], (instregex "CQO")>;
+def: InstRW<[SBWriteResGroup4], (instregex "LAHF")>;
+def: InstRW<[SBWriteResGroup4], (instregex "SAHF")>;
+def: InstRW<[SBWriteResGroup4], (instregex "SAR(16|32|64)ri")>;
+def: InstRW<[SBWriteResGroup4], (instregex "SAR8ri")>;
+def: InstRW<[SBWriteResGroup4], (instregex "SETAEr")>;
+def: InstRW<[SBWriteResGroup4], (instregex "SETBr")>;
+def: InstRW<[SBWriteResGroup4], (instregex "SETEr")>;
+def: InstRW<[SBWriteResGroup4], (instregex "SETGEr")>;
+def: InstRW<[SBWriteResGroup4], (instregex "SETGr")>;
+def: InstRW<[SBWriteResGroup4], (instregex "SETLEr")>;
+def: InstRW<[SBWriteResGroup4], (instregex "SETLr")>;
+def: InstRW<[SBWriteResGroup4], (instregex "SETNEr")>;
+def: InstRW<[SBWriteResGroup4], (instregex "SETNOr")>;
+def: InstRW<[SBWriteResGroup4], (instregex "SETNPr")>;
+def: InstRW<[SBWriteResGroup4], (instregex "SETNSr")>;
+def: InstRW<[SBWriteResGroup4], (instregex "SETOr")>;
+def: InstRW<[SBWriteResGroup4], (instregex "SETPr")>;
+def: InstRW<[SBWriteResGroup4], (instregex "SETSr")>;
+def: InstRW<[SBWriteResGroup4], (instregex "SHL(16|32|64)ri")>;
+def: InstRW<[SBWriteResGroup4], (instregex "SHL(16|32|64)r1")>;
+def: InstRW<[SBWriteResGroup4], (instregex "SHL8r1")>;
+def: InstRW<[SBWriteResGroup4], (instregex "SHL8ri")>;
+def: InstRW<[SBWriteResGroup4], (instregex "SHR(16|32|64)ri")>;
+def: InstRW<[SBWriteResGroup4], (instregex "SHR8ri")>;
+def: InstRW<[SBWriteResGroup4], (instregex "VBLENDPDYrri")>;
+def: InstRW<[SBWriteResGroup4], (instregex "VBLENDPDrri")>;
+def: InstRW<[SBWriteResGroup4], (instregex "VBLENDPSYrri")>;
+def: InstRW<[SBWriteResGroup4], (instregex "VBLENDPSrri")>;
+def: InstRW<[SBWriteResGroup4], (instregex "VMOVDQAYrr")>;
+def: InstRW<[SBWriteResGroup4], (instregex "VMOVDQArr")>;
+def: InstRW<[SBWriteResGroup4], (instregex "VMOVDQUYrr")>;
 def: InstRW<[SBWriteResGroup4], (instregex "VMOVDQUrr")>;
 
 def SBWriteResGroup5 : SchedWriteRes<[SBPort15]> {
@@ -481,159 +525,164 @@ def SBWriteResGroup5 : SchedWriteRes<[SB
   let NumMicroOps = 1;
   let ResourceCycles = [1];
 }
-def: InstRW<[SBWriteResGroup5], (instregex "KORTESTBrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "MMX_PABSBrr64")>;
-def: InstRW<[SBWriteResGroup5], (instregex "MMX_PABSDrr64")>;
-def: InstRW<[SBWriteResGroup5], (instregex "MMX_PABSWrr64")>;
-def: InstRW<[SBWriteResGroup5], (instregex "MMX_PADDQirr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "MMX_PALIGNR64irr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "MMX_PSHUFBrr64")>;
-def: InstRW<[SBWriteResGroup5], (instregex "MMX_PSIGNBrr64")>;
-def: InstRW<[SBWriteResGroup5], (instregex "MMX_PSIGNDrr64")>;
-def: InstRW<[SBWriteResGroup5], (instregex "MMX_PSIGNWrr64")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PABSBrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PABSDrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PABSWrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PACKSSDWrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PACKSSWBrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PACKUSDWrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PACKUSWBrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PADDBrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PADDDrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PADDQrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PADDSBrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PADDSWrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PADDUSBrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PADDUSWrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PADDWrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PALIGNRrri")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PAVGBrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PAVGWrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PBLENDWrri")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PCMPEQBrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PCMPEQDrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PCMPEQQrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PCMPEQWrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PCMPGTBrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PCMPGTDrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PCMPGTWrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PMAXSBrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PMAXSDrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PMAXSWrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PMAXUBrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PMAXUDrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PMAXUWrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PMINSBrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PMINSDrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PMINSWrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PMINUBrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PMINUDrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PMINUWrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PMOVSXBDrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PMOVSXBQrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PMOVSXBWrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PMOVSXDQrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PMOVSXWDrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PMOVSXWQrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PMOVZXBDrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PMOVZXBQrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PMOVZXBWrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PMOVZXDQrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PMOVZXWDrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PMOVZXWQrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PSHUFBrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PSHUFDri")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PSHUFHWri")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PSHUFLWri")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PSIGNBrr128")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PSIGNDrr128")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PSIGNWrr128")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PSLLDQri")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PSRLDQri")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PSUBBrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PSUBDrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PSUBQrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PSUBSBrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PSUBSWrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PSUBUSBrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PSUBUSWrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PSUBWrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PUNPCKHBWrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PUNPCKHDQrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PUNPCKHQDQrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PUNPCKHWDrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PUNPCKLBWrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PUNPCKLDQrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PUNPCKLQDQrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "PUNPCKLWDrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VMASKMOVPSYrm")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPABSBrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPABSDrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPABSWrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPACKSSDWrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPACKSSWBrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPACKUSDWrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPACKUSWBrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPADDBrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPADDDrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPADDQrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPADDUSBrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPADDUSWrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPALIGNRrri")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPAVGBrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPAVGWrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPBLENDWrri")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPCMPEQBrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPCMPEQDrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPCMPEQWrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPCMPGTBrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPCMPGTDrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPCMPGTWrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPMAXSBrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPMAXSDrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPMAXSWrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPMAXUBrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPMAXUDrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPMAXUWrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPMINSBrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPMINSDrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPMINSWrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPMINUBrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPMINUDrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPMINUWrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPMOVSXBDrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPMOVSXBQrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPMOVSXBWrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPMOVSXDQrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPMOVSXWDrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPMOVSXWQrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPMOVZXBDrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPMOVZXBQrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPMOVZXBWrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPMOVZXDQrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPMOVZXWDrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPMOVZXWQrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPSHUFBrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPSHUFDri")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPSHUFLWri")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPSIGNBrr128")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPSIGNDrr128")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPSIGNWrr128")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPSLLDQri")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPSRLDQri")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPSUBBrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPSUBDrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPSUBQrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPSUBSBrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPSUBSWrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPSUBUSBrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPSUBUSWrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPSUBWrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPUNPCKHBWrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPUNPCKHDQrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPUNPCKHWDrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPUNPCKLDQrr")>;
-def: InstRW<[SBWriteResGroup5], (instregex "VPUNPCKLQDQrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "MMX_PABSBrr64")>;
+def: InstRW<[SBWriteResGroup5], (instregex "MMX_PABSDrr64")>;
+def: InstRW<[SBWriteResGroup5], (instregex "MMX_PABSWrr64")>;
+def: InstRW<[SBWriteResGroup5], (instregex "MMX_PADDQirr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "MMX_PALIGNR64irr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "MMX_PSHUFBrr64")>;
+def: InstRW<[SBWriteResGroup5], (instregex "MMX_PSIGNBrr64")>;
+def: InstRW<[SBWriteResGroup5], (instregex "MMX_PSIGNDrr64")>;
+def: InstRW<[SBWriteResGroup5], (instregex "MMX_PSIGNWrr64")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PABSBrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PABSDrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PABSWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PACKSSDWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PACKSSWBrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PACKUSDWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PACKUSWBrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PADDBrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PADDDrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PADDQrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PADDSBrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PADDSWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PADDUSBrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PADDUSWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PADDWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PALIGNRrri")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PAVGBrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PAVGWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PBLENDWrri")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PCMPEQBrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PCMPEQDrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PCMPEQQrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PCMPEQWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PCMPGTBrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PCMPGTDrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PCMPGTWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PMAXSBrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PMAXSDrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PMAXSWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PMAXUBrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PMAXUDrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PMAXUWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PMINSBrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PMINSDrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PMINSWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PMINUBrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PMINUDrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PMINUWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PMOVSXBDrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PMOVSXBQrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PMOVSXBWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PMOVSXDQrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PMOVSXWDrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PMOVSXWQrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PMOVZXBDrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PMOVZXBQrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PMOVZXBWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PMOVZXDQrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PMOVZXWDrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PMOVZXWQrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PSHUFBrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PSHUFDri")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PSHUFHWri")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PSHUFLWri")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PSIGNBrr128")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PSIGNDrr128")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PSIGNWrr128")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PSLLDQri")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PSRLDQri")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PSUBBrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PSUBDrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PSUBQrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PSUBSBrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PSUBSWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PSUBUSBrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PSUBUSWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PSUBWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PUNPCKHBWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PUNPCKHDQrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PUNPCKHQDQrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PUNPCKHWDrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PUNPCKLBWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PUNPCKLDQrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PUNPCKLQDQrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "PUNPCKLWDrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPABSBrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPABSDrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPABSWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPACKSSDWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPACKSSWBrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPACKUSDWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPACKUSWBrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPADDBrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPADDDrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPADDQrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPADDSBrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPADDSWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPADDUSBrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPADDUSWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPADDWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPALIGNRrri")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPAVGBrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPAVGWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPBLENDWrri")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPCMPEQBrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPCMPEQDrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPCMPEQQrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPCMPEQWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPCMPGTBrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPCMPGTDrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPCMPGTWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPMAXSBrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPMAXSDrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPMAXSWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPMAXUBrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPMAXUDrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPMAXUWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPMINSBrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPMINSDrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPMINSWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPMINUBrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPMINUDrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPMINUWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPMOVSXBDrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPMOVSXBQrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPMOVSXBWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPMOVSXDQrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPMOVSXWDrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPMOVSXWQrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPMOVZXBDrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPMOVZXBQrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPMOVZXBWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPMOVZXDQrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPMOVZXWDrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPMOVZXWQrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPSHUFBrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPSHUFDri")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPSHUFHWri")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPSHUFLWri")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPSIGNBrr128")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPSIGNDrr128")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPSIGNWrr128")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPSLLDQri")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPSRLDQri")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPSUBBrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPSUBDrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPSUBQrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPSUBSBrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPSUBSWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPSUBUSBrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPSUBUSWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPSUBWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPUNPCKHBWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPUNPCKHDQrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPUNPCKHQDQrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPUNPCKHWDrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPUNPCKLBWrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPUNPCKLDQrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "VPUNPCKLQDQrr")>;
 def: InstRW<[SBWriteResGroup5], (instregex "VPUNPCKLWDrr")>;
 
 def SBWriteResGroup6 : SchedWriteRes<[SBPort015]> {
@@ -641,68 +690,74 @@ def SBWriteResGroup6 : SchedWriteRes<[SB
   let NumMicroOps = 1;
   let ResourceCycles = [1];
 }
-def: InstRW<[SBWriteResGroup6], (instregex "ADD32ri8")>;
-def: InstRW<[SBWriteResGroup6], (instregex "ADD32rr")>;
-def: InstRW<[SBWriteResGroup6], (instregex "ADD8ri")>;
-def: InstRW<[SBWriteResGroup6], (instregex "ADD8rr")>;
-def: InstRW<[SBWriteResGroup6], (instregex "AND32ri")>;
-def: InstRW<[SBWriteResGroup6], (instregex "AND64ri8")>;
-def: InstRW<[SBWriteResGroup6], (instregex "AND64rr")>;
-def: InstRW<[SBWriteResGroup6], (instregex "AND8ri")>;
-def: InstRW<[SBWriteResGroup6], (instregex "AND8rr")>;
-def: InstRW<[SBWriteResGroup6], (instregex "CBW")>;
-def: InstRW<[SBWriteResGroup6], (instregex "CMC")>;
-def: InstRW<[SBWriteResGroup6], (instregex "CMP16ri8")>;
-def: InstRW<[SBWriteResGroup6], (instregex "CMP32i32")>;
-def: InstRW<[SBWriteResGroup6], (instregex "CMP64rr")>;
-def: InstRW<[SBWriteResGroup6], (instregex "CMP8ri")>;
-def: InstRW<[SBWriteResGroup6], (instregex "CMP8rr")>;
-def: InstRW<[SBWriteResGroup6], (instregex "CWDE")>;
-def: InstRW<[SBWriteResGroup6], (instregex "DEC64r")>;
-def: InstRW<[SBWriteResGroup6], (instregex "DEC8r")>;
-def: InstRW<[SBWriteResGroup6], (instregex "INC64r")>;
-def: InstRW<[SBWriteResGroup6], (instregex "INC8r")>;
-def: InstRW<[SBWriteResGroup6], (instregex "MMX_MOVD64from64rr")>;
-def: InstRW<[SBWriteResGroup6], (instregex "MMX_MOVQ2DQrr")>;
-def: InstRW<[SBWriteResGroup6], (instregex "MOV32rr")>;
-def: InstRW<[SBWriteResGroup6], (instregex "MOV8ri")>;
-def: InstRW<[SBWriteResGroup6], (instregex "MOV8rr")>;
-def: InstRW<[SBWriteResGroup6], (instregex "MOVDQArr")>;
-def: InstRW<[SBWriteResGroup6], (instregex "MOVDQUrr")>;
-def: InstRW<[SBWriteResGroup6], (instregex "MOVPQI2QIrr")>;
-def: InstRW<[SBWriteResGroup6], (instregex "MOVSX32rr16")>;
-def: InstRW<[SBWriteResGroup6], (instregex "MOVSX32rr8")>;
-def: InstRW<[SBWriteResGroup6], (instregex "MOVZX32rr16")>;
-def: InstRW<[SBWriteResGroup6], (instregex "MOVZX32rr8")>;
-def: InstRW<[SBWriteResGroup6], (instregex "NEG64r")>;
-def: InstRW<[SBWriteResGroup6], (instregex "NEG8r")>;
-def: InstRW<[SBWriteResGroup6], (instregex "NOT64r")>;
-def: InstRW<[SBWriteResGroup6], (instregex "NOT8r")>;
-def: InstRW<[SBWriteResGroup6], (instregex "OR64ri8")>;
-def: InstRW<[SBWriteResGroup6], (instregex "OR64rr")>;
-def: InstRW<[SBWriteResGroup6], (instregex "OR8ri")>;
-def: InstRW<[SBWriteResGroup6], (instregex "OR8rr")>;
-def: InstRW<[SBWriteResGroup6], (instregex "PANDNrr")>;
-def: InstRW<[SBWriteResGroup6], (instregex "PANDrr")>;
-def: InstRW<[SBWriteResGroup6], (instregex "PORrr")>;
-def: InstRW<[SBWriteResGroup6], (instregex "PXORrr")>;
-def: InstRW<[SBWriteResGroup6], (instregex "STC")>;
-def: InstRW<[SBWriteResGroup6], (instregex "SUB64ri8")>;
-def: InstRW<[SBWriteResGroup6], (instregex "SUB64rr")>;
-def: InstRW<[SBWriteResGroup6], (instregex "SUB8ri")>;
-def: InstRW<[SBWriteResGroup6], (instregex "SUB8rr")>;
-def: InstRW<[SBWriteResGroup6], (instregex "TEST64rr")>;
-def: InstRW<[SBWriteResGroup6], (instregex "TEST8ri")>;
-def: InstRW<[SBWriteResGroup6], (instregex "TEST8rr")>;
-def: InstRW<[SBWriteResGroup6], (instregex "VMOVPQI2QIrr")>;
-def: InstRW<[SBWriteResGroup6], (instregex "VMOVZPQILo2PQIrr")>;
-def: InstRW<[SBWriteResGroup6], (instregex "VPANDNrr")>;
-def: InstRW<[SBWriteResGroup6], (instregex "VPANDrr")>;
-def: InstRW<[SBWriteResGroup6], (instregex "VPORrr")>;
-def: InstRW<[SBWriteResGroup6], (instregex "VPXORrr")>;
-def: InstRW<[SBWriteResGroup6], (instregex "XOR32rr")>;
-def: InstRW<[SBWriteResGroup6], (instregex "XOR64ri8")>;
-def: InstRW<[SBWriteResGroup6], (instregex "XOR8ri")>;
+def: InstRW<[SBWriteResGroup6], (instregex "ADD(16|32|64)ri8")>;
+def: InstRW<[SBWriteResGroup6], (instregex "ADD(16|32|64)rr")>;
+def: InstRW<[SBWriteResGroup6], (instregex "ADD8i8")>;
+def: InstRW<[SBWriteResGroup6], (instregex "ADD8ri")>;
+def: InstRW<[SBWriteResGroup6], (instregex "ADD8rr")>;
+def: InstRW<[SBWriteResGroup6], (instregex "AND(16|32|64)ri8")>;
+def: InstRW<[SBWriteResGroup6], (instregex "AND(16|32|64)rr")>;
+def: InstRW<[SBWriteResGroup6], (instregex "AND8i8")>;
+def: InstRW<[SBWriteResGroup6], (instregex "AND8ri")>;
+def: InstRW<[SBWriteResGroup6], (instregex "AND8rr")>;
+def: InstRW<[SBWriteResGroup6], (instregex "CBW")>;
+def: InstRW<[SBWriteResGroup6], (instregex "CMC")>;
+def: InstRW<[SBWriteResGroup6], (instregex "CMP(16|32|64)ri8")>;
+def: InstRW<[SBWriteResGroup6], (instregex "CMP(16|32|64)rr")>;
+def: InstRW<[SBWriteResGroup6], (instregex "CMP8i8")>;
+def: InstRW<[SBWriteResGroup6], (instregex "CMP8ri")>;
+def: InstRW<[SBWriteResGroup6], (instregex "CMP8rr")>;
+def: InstRW<[SBWriteResGroup6], (instregex "CWDE")>;
+def: InstRW<[SBWriteResGroup6], (instregex "DEC(16|32|64)r")>;
+def: InstRW<[SBWriteResGroup6], (instregex "DEC8r")>;
+def: InstRW<[SBWriteResGroup6], (instregex "INC(16|32|64)r")>;
+def: InstRW<[SBWriteResGroup6], (instregex "INC8r")>;
+def: InstRW<[SBWriteResGroup6], (instregex "MMX_MOVD64from64rr")>;
+def: InstRW<[SBWriteResGroup6], (instregex "MMX_MOVQ2DQrr")>;
+def: InstRW<[SBWriteResGroup6], (instregex "MOV(16|32|64)rr")>;
+def: InstRW<[SBWriteResGroup6], (instregex "MOV8ri")>;
+def: InstRW<[SBWriteResGroup6], (instregex "MOV8rr")>;
+def: InstRW<[SBWriteResGroup6], (instregex "MOVDQArr")>;
+def: InstRW<[SBWriteResGroup6], (instregex "MOVDQUrr")>;
+def: InstRW<[SBWriteResGroup6], (instregex "MOVPQI2QIrr")>;
+def: InstRW<[SBWriteResGroup6], (instregex "MOVSX(16|32|64)rr16")>;
+def: InstRW<[SBWriteResGroup6], (instregex "MOVSX(16|32|64)rr32")>;
+def: InstRW<[SBWriteResGroup6], (instregex "MOVSX(16|32|64)rr8")>;
+def: InstRW<[SBWriteResGroup6], (instregex "MOVZX(16|32|64)rr16")>;
+def: InstRW<[SBWriteResGroup6], (instregex "MOVZX(16|32|64)rr8")>;
+def: InstRW<[SBWriteResGroup6], (instregex "NEG(16|32|64)r")>;
+def: InstRW<[SBWriteResGroup6], (instregex "NEG8r")>;
+def: InstRW<[SBWriteResGroup6], (instregex "NOT(16|32|64)r")>;
+def: InstRW<[SBWriteResGroup6], (instregex "NOT8r")>;
+def: InstRW<[SBWriteResGroup6], (instregex "OR(16|32|64)ri8")>;
+def: InstRW<[SBWriteResGroup6], (instregex "OR(16|32|64)rr")>;
+def: InstRW<[SBWriteResGroup6], (instregex "OR8i8")>;
+def: InstRW<[SBWriteResGroup6], (instregex "OR8ri")>;
+def: InstRW<[SBWriteResGroup6], (instregex "OR8rr")>;
+def: InstRW<[SBWriteResGroup6], (instregex "PANDNrr")>;
+def: InstRW<[SBWriteResGroup6], (instregex "PANDrr")>;
+def: InstRW<[SBWriteResGroup6], (instregex "PORrr")>;
+def: InstRW<[SBWriteResGroup6], (instregex "PXORrr")>;
+def: InstRW<[SBWriteResGroup6], (instregex "STC")>;
+def: InstRW<[SBWriteResGroup6], (instregex "SUB(16|32|64)ri8")>;
+def: InstRW<[SBWriteResGroup6], (instregex "SUB(16|32|64)rr")>;
+def: InstRW<[SBWriteResGroup6], (instregex "SUB8i8")>;
+def: InstRW<[SBWriteResGroup6], (instregex "SUB8ri")>;
+def: InstRW<[SBWriteResGroup6], (instregex "SUB8rr")>;
+def: InstRW<[SBWriteResGroup6], (instregex "TEST(16|32|64)rr")>;
+def: InstRW<[SBWriteResGroup6], (instregex "TEST8i8")>;
+def: InstRW<[SBWriteResGroup6], (instregex "TEST8ri")>;
+def: InstRW<[SBWriteResGroup6], (instregex "TEST8rr")>;
+def: InstRW<[SBWriteResGroup6], (instregex "VMOVPQI2QIrr")>;
+def: InstRW<[SBWriteResGroup6], (instregex "VMOVZPQILo2PQIrr")>;
+def: InstRW<[SBWriteResGroup6], (instregex "VPANDNrr")>;
+def: InstRW<[SBWriteResGroup6], (instregex "VPANDrr")>;
+def: InstRW<[SBWriteResGroup6], (instregex "VPORrr")>;
+def: InstRW<[SBWriteResGroup6], (instregex "VPXORrr")>;
+def: InstRW<[SBWriteResGroup6], (instregex "XOR(16|32|64)ri8")>;
+def: InstRW<[SBWriteResGroup6], (instregex "XOR(16|32|64)rr")>;
+def: InstRW<[SBWriteResGroup6], (instregex "XOR8i8")>;
+def: InstRW<[SBWriteResGroup6], (instregex "XOR8ri")>;
 def: InstRW<[SBWriteResGroup6], (instregex "XOR8rr")>;
 
 def SBWriteResGroup7 : SchedWriteRes<[SBPort0]> {
@@ -710,33 +765,34 @@ def SBWriteResGroup7 : SchedWriteRes<[SB
   let NumMicroOps = 1;
   let ResourceCycles = [1];
 }
-def: InstRW<[SBWriteResGroup7], (instregex "MOVMSKPDrr")>;
-def: InstRW<[SBWriteResGroup7], (instregex "MOVMSKPSrr")>;
-def: InstRW<[SBWriteResGroup7], (instregex "MOVPDI2DIrr")>;
-def: InstRW<[SBWriteResGroup7], (instregex "MOVPQIto64rr")>;
-def: InstRW<[SBWriteResGroup7], (instregex "PMOVMSKBrr")>;
-def: InstRW<[SBWriteResGroup7], (instregex "VMOVMSKPDYrr")>;
-def: InstRW<[SBWriteResGroup7], (instregex "VMOVMSKPDrr")>;
-def: InstRW<[SBWriteResGroup7], (instregex "VMOVMSKPSrr")>;
-def: InstRW<[SBWriteResGroup7], (instregex "VMOVPDI2DIrr")>;
+def: InstRW<[SBWriteResGroup7], (instregex "MOVMSKPDrr")>;
+def: InstRW<[SBWriteResGroup7], (instregex "MOVMSKPSrr")>;
+def: InstRW<[SBWriteResGroup7], (instregex "MOVPDI2DIrr")>;
+def: InstRW<[SBWriteResGroup7], (instregex "MOVPQIto64rr")>;
+def: InstRW<[SBWriteResGroup7], (instregex "PMOVMSKBrr")>;
+def: InstRW<[SBWriteResGroup7], (instregex "VMOVMSKPDYrr")>;
+def: InstRW<[SBWriteResGroup7], (instregex "VMOVMSKPDrr")>;
+def: InstRW<[SBWriteResGroup7], (instregex "VMOVMSKPSYrr")>;
+def: InstRW<[SBWriteResGroup7], (instregex "VMOVMSKPSrr")>;
+def: InstRW<[SBWriteResGroup7], (instregex "VMOVPDI2DIrr")>;
 def: InstRW<[SBWriteResGroup7], (instregex "VMOVPQIto64rr")>;
 
-def SBWriteResGroup9 : SchedWriteRes<[SBPort0]> {
+def SBWriteResGroup9 : SchedWriteRes<[SBPort05]> {
   let Latency = 2;
   let NumMicroOps = 2;
   let ResourceCycles = [2];
 }
-def: InstRW<[SBWriteResGroup9], (instregex "BLENDVPDrr0")>;
-def: InstRW<[SBWriteResGroup9], (instregex "BLENDVPSrr0")>;
-def: InstRW<[SBWriteResGroup9], (instregex "ROL32ri")>;
-def: InstRW<[SBWriteResGroup9], (instregex "ROL8ri")>;
-def: InstRW<[SBWriteResGroup9], (instregex "ROR32ri")>;
-def: InstRW<[SBWriteResGroup9], (instregex "ROR8ri")>;
-def: InstRW<[SBWriteResGroup9], (instregex "SETAr")>;
-def: InstRW<[SBWriteResGroup9], (instregex "SETBEr")>;
-def: InstRW<[SBWriteResGroup9], (instregex "VBLENDVPDYrr")>;
-def: InstRW<[SBWriteResGroup9], (instregex "VBLENDVPDrr")>;
-def: InstRW<[SBWriteResGroup9], (instregex "VBLENDVPSYrr")>;
+def: InstRW<[SBWriteResGroup9], (instregex "BLENDVPDrr0")>;
+def: InstRW<[SBWriteResGroup9], (instregex "BLENDVPSrr0")>;
+def: InstRW<[SBWriteResGroup9], (instregex "ROL(16|32|64)ri")>;
+def: InstRW<[SBWriteResGroup9], (instregex "ROL8ri")>;
+def: InstRW<[SBWriteResGroup9], (instregex "ROR(16|32|64)ri")>;
+def: InstRW<[SBWriteResGroup9], (instregex "ROR8ri")>;
+def: InstRW<[SBWriteResGroup9], (instregex "SETAr")>;
+def: InstRW<[SBWriteResGroup9], (instregex "SETBEr")>;
+def: InstRW<[SBWriteResGroup9], (instregex "VBLENDVPDYrr")>;
+def: InstRW<[SBWriteResGroup9], (instregex "VBLENDVPDrr")>;
+def: InstRW<[SBWriteResGroup9], (instregex "VBLENDVPSYrr")>;
 def: InstRW<[SBWriteResGroup9], (instregex "VBLENDVPSrr")>;
 
 def SBWriteResGroup10 : SchedWriteRes<[SBPort15]> {
@@ -787,18 +843,21 @@ def SBWriteResGroup14 : SchedWriteRes<[S
   let NumMicroOps = 2;
   let ResourceCycles = [1,1];
 }
-def: InstRW<[SBWriteResGroup14], (instregex "PSLLDrr")>;
-def: InstRW<[SBWriteResGroup14], (instregex "PSLLQrr")>;
-def: InstRW<[SBWriteResGroup14], (instregex "PSLLWrr")>;
-def: InstRW<[SBWriteResGroup14], (instregex "PSRADrr")>;
-def: InstRW<[SBWriteResGroup14], (instregex "PSRAWrr")>;
-def: InstRW<[SBWriteResGroup14], (instregex "PSRLDrr")>;
-def: InstRW<[SBWriteResGroup14], (instregex "PSRLQrr")>;
-def: InstRW<[SBWriteResGroup14], (instregex "PSRLWrr")>;
-def: InstRW<[SBWriteResGroup14], (instregex "VPSRADrr")>;
-def: InstRW<[SBWriteResGroup14], (instregex "VPSRAWrr")>;
-def: InstRW<[SBWriteResGroup14], (instregex "VPSRLDrr")>;
-def: InstRW<[SBWriteResGroup14], (instregex "VPSRLQrr")>;
+def: InstRW<[SBWriteResGroup14], (instregex "PSLLDrr")>;
+def: InstRW<[SBWriteResGroup14], (instregex "PSLLQrr")>;
+def: InstRW<[SBWriteResGroup14], (instregex "PSLLWrr")>;
+def: InstRW<[SBWriteResGroup14], (instregex "PSRADrr")>;
+def: InstRW<[SBWriteResGroup14], (instregex "PSRAWrr")>;
+def: InstRW<[SBWriteResGroup14], (instregex "PSRLDrr")>;
+def: InstRW<[SBWriteResGroup14], (instregex "PSRLQrr")>;
+def: InstRW<[SBWriteResGroup14], (instregex "PSRLWrr")>;
+def: InstRW<[SBWriteResGroup14], (instregex "VPSLLDrr")>;
+def: InstRW<[SBWriteResGroup14], (instregex "VPSLLQrr")>;
+def: InstRW<[SBWriteResGroup14], (instregex "VPSLLWrr")>;
+def: InstRW<[SBWriteResGroup14], (instregex "VPSRADrr")>;
+def: InstRW<[SBWriteResGroup14], (instregex "VPSRAWrr")>;
+def: InstRW<[SBWriteResGroup14], (instregex "VPSRLDrr")>;
+def: InstRW<[SBWriteResGroup14], (instregex "VPSRLQrr")>;
 def: InstRW<[SBWriteResGroup14], (instregex "VPSRLWrr")>;
 
 def SBWriteResGroup15 : SchedWriteRes<[SBPort0,SBPort015]> {
@@ -808,12 +867,12 @@ def SBWriteResGroup15 : SchedWriteRes<[S
 }
 def: InstRW<[SBWriteResGroup15], (instregex "FNSTSW16r")>;
 
-def SBWriteResGroup16 : SchedWriteRes<[SBPort1,SBPort0]> {
+def SBWriteResGroup16 : SchedWriteRes<[SBPort1,SBPort05]> {
   let Latency = 2;
   let NumMicroOps = 2;
   let ResourceCycles = [1,1];
 }
-def: InstRW<[SBWriteResGroup16], (instregex "BSWAP32r")>;
+def: InstRW<[SBWriteResGroup16], (instregex "BSWAP(16|32|64)r")>;
 
 def SBWriteResGroup17 : SchedWriteRes<[SBPort5,SBPort15]> {
   let Latency = 2;
@@ -834,64 +893,66 @@ def SBWriteResGroup18 : SchedWriteRes<[S
   let NumMicroOps = 2;
   let ResourceCycles = [1,1];
 }
+def: InstRW<[SBWriteResGroup18], (instregex "JRCXZ")>;
 def: InstRW<[SBWriteResGroup18], (instregex "MMX_MOVDQ2Qrr")>;
 
-def SBWriteResGroup19 : SchedWriteRes<[SBPort0,SBPort015]> {
+def SBWriteResGroup19 : SchedWriteRes<[SBPort05,SBPort015]> {
   let Latency = 2;
   let NumMicroOps = 2;
   let ResourceCycles = [1,1];
 }
-def: InstRW<[SBWriteResGroup19], (instregex "ADC64ri8")>;
-def: InstRW<[SBWriteResGroup19], (instregex "ADC64rr")>;
-def: InstRW<[SBWriteResGroup19], (instregex "ADC8ri")>;
-def: InstRW<[SBWriteResGroup19], (instregex "ADC8rr")>;
-def: InstRW<[SBWriteResGroup19], (instregex "CMOVAE32rr")>;
-def: InstRW<[SBWriteResGroup19], (instregex "CMOVB32rr")>;
-def: InstRW<[SBWriteResGroup19], (instregex "CMOVE32rr")>;
-def: InstRW<[SBWriteResGroup19], (instregex "CMOVG32rr")>;
-def: InstRW<[SBWriteResGroup19], (instregex "CMOVGE32rr")>;
-def: InstRW<[SBWriteResGroup19], (instregex "CMOVL32rr")>;
-def: InstRW<[SBWriteResGroup19], (instregex "CMOVLE32rr")>;
-def: InstRW<[SBWriteResGroup19], (instregex "CMOVNE32rr")>;
-def: InstRW<[SBWriteResGroup19], (instregex "CMOVNO32rr")>;
-def: InstRW<[SBWriteResGroup19], (instregex "CMOVNP32rr")>;
-def: InstRW<[SBWriteResGroup19], (instregex "CMOVNS32rr")>;
-def: InstRW<[SBWriteResGroup19], (instregex "CMOVO32rr")>;
-def: InstRW<[SBWriteResGroup19], (instregex "CMOVP32rr")>;
-def: InstRW<[SBWriteResGroup19], (instregex "CMOVS32rr")>;
-def: InstRW<[SBWriteResGroup19], (instregex "SBB32rr")>;
-def: InstRW<[SBWriteResGroup19], (instregex "SBB64ri8")>;
-def: InstRW<[SBWriteResGroup19], (instregex "SBB8ri")>;
-def: InstRW<[SBWriteResGroup19], (instregex "SBB8rr")>;
-def: InstRW<[SBWriteResGroup19], (instregex "SHLD32rri8")>;
-def: InstRW<[SBWriteResGroup19], (instregex "SHRD32rri8")>;
+def: InstRW<[SBWriteResGroup19], (instregex "ADC(16|32|64)ri8")>;
+def: InstRW<[SBWriteResGroup19], (instregex "ADC(16|32|64)rr")>;
+def: InstRW<[SBWriteResGroup19], (instregex "ADC8ri")>;
+def: InstRW<[SBWriteResGroup19], (instregex "ADC8rr")>;
+def: InstRW<[SBWriteResGroup19], (instregex "CMOVAE(16|32|64)rr")>;
+def: InstRW<[SBWriteResGroup19], (instregex "CMOVB(16|32|64)rr")>;
+def: InstRW<[SBWriteResGroup19], (instregex "CMOVE(16|32|64)rr")>;
+def: InstRW<[SBWriteResGroup19], (instregex "CMOVG(16|32|64)rr")>;
+def: InstRW<[SBWriteResGroup19], (instregex "CMOVGE(16|32|64)rr")>;
+def: InstRW<[SBWriteResGroup19], (instregex "CMOVL(16|32|64)rr")>;
+def: InstRW<[SBWriteResGroup19], (instregex "CMOVLE(16|32|64)rr")>;
+def: InstRW<[SBWriteResGroup19], (instregex "CMOVNE(16|32|64)rr")>;
+def: InstRW<[SBWriteResGroup19], (instregex "CMOVNO(16|32|64)rr")>;
+def: InstRW<[SBWriteResGroup19], (instregex "CMOVNP(16|32|64)rr")>;
+def: InstRW<[SBWriteResGroup19], (instregex "CMOVNS(16|32|64)rr")>;
+def: InstRW<[SBWriteResGroup19], (instregex "CMOVO(16|32|64)rr")>;
+def: InstRW<[SBWriteResGroup19], (instregex "CMOVP(16|32|64)rr")>;
+def: InstRW<[SBWriteResGroup19], (instregex "CMOVS(16|32|64)rr")>;
+def: InstRW<[SBWriteResGroup19], (instregex "SBB(16|32|64)ri8")>;
+def: InstRW<[SBWriteResGroup19], (instregex "SBB(16|32|64)rr")>;
+def: InstRW<[SBWriteResGroup19], (instregex "SBB8ri")>;
+def: InstRW<[SBWriteResGroup19], (instregex "SBB8rr")>;
+def: InstRW<[SBWriteResGroup19], (instregex "SHLD(16|32|64)rri8")>;
+def: InstRW<[SBWriteResGroup19], (instregex "SHRD(16|32|64)rri8")>;
 
 def SBWriteResGroup20 : SchedWriteRes<[SBPort0]> {
   let Latency = 3;
   let NumMicroOps = 1;
   let ResourceCycles = [1];
 }
-def: InstRW<[SBWriteResGroup20], (instregex "MMX_PMADDUBSWrr64")>;
-def: InstRW<[SBWriteResGroup20], (instregex "MMX_PMULHRSWrr64")>;
-def: InstRW<[SBWriteResGroup20], (instregex "MMX_PMULUDQirr")>;
-def: InstRW<[SBWriteResGroup20], (instregex "PMADDUBSWrr")>;
-def: InstRW<[SBWriteResGroup20], (instregex "PMADDWDrr")>;
-def: InstRW<[SBWriteResGroup20], (instregex "PMULDQrr")>;
-def: InstRW<[SBWriteResGroup20], (instregex "PMULHRSWrr")>;
-def: InstRW<[SBWriteResGroup20], (instregex "PMULHUWrr")>;
-def: InstRW<[SBWriteResGroup20], (instregex "PMULHWrr")>;
-def: InstRW<[SBWriteResGroup20], (instregex "PMULLDrr")>;
-def: InstRW<[SBWriteResGroup20], (instregex "PMULLWrr")>;
-def: InstRW<[SBWriteResGroup20], (instregex "PMULUDQrr")>;
-def: InstRW<[SBWriteResGroup20], (instregex "PSADBWrr")>;
-def: InstRW<[SBWriteResGroup20], (instregex "VMOVMSKPSYrr")>;
-def: InstRW<[SBWriteResGroup20], (instregex "VPMADDUBSWrr")>;
-def: InstRW<[SBWriteResGroup20], (instregex "VPMADDWDrr")>;
-def: InstRW<[SBWriteResGroup20], (instregex "VPMULDQrr")>;
-def: InstRW<[SBWriteResGroup20], (instregex "VPMULHRSWrr")>;
-def: InstRW<[SBWriteResGroup20], (instregex "VPMULHWrr")>;
-def: InstRW<[SBWriteResGroup20], (instregex "VPMULLDrr")>;
-def: InstRW<[SBWriteResGroup20], (instregex "VPMULLWrr")>;
+def: InstRW<[SBWriteResGroup20], (instregex "MMX_PMADDUBSWrr64")>;
+def: InstRW<[SBWriteResGroup20], (instregex "MMX_PMULHRSWrr64")>;
+def: InstRW<[SBWriteResGroup20], (instregex "MMX_PMULUDQirr")>;
+def: InstRW<[SBWriteResGroup20], (instregex "PMADDUBSWrr")>;
+def: InstRW<[SBWriteResGroup20], (instregex "PMADDWDrr")>;
+def: InstRW<[SBWriteResGroup20], (instregex "PMULDQrr")>;
+def: InstRW<[SBWriteResGroup20], (instregex "PMULHRSWrr")>;
+def: InstRW<[SBWriteResGroup20], (instregex "PMULHUWrr")>;
+def: InstRW<[SBWriteResGroup20], (instregex "PMULHWrr")>;
+def: InstRW<[SBWriteResGroup20], (instregex "PMULLDrr")>;
+def: InstRW<[SBWriteResGroup20], (instregex "PMULLWrr")>;
+def: InstRW<[SBWriteResGroup20], (instregex "PMULUDQrr")>;
+def: InstRW<[SBWriteResGroup20], (instregex "PSADBWrr")>;
+def: InstRW<[SBWriteResGroup20], (instregex "VPMADDUBSWrr")>;
+def: InstRW<[SBWriteResGroup20], (instregex "VPMADDWDrr")>;
+def: InstRW<[SBWriteResGroup20], (instregex "VPMULDQrr")>;
+def: InstRW<[SBWriteResGroup20], (instregex "VPMULHRSWrr")>;
+def: InstRW<[SBWriteResGroup20], (instregex "VPMULHUWrr")>;
+def: InstRW<[SBWriteResGroup20], (instregex "VPMULHWrr")>;
+def: InstRW<[SBWriteResGroup20], (instregex "VPMULLDrr")>;
+def: InstRW<[SBWriteResGroup20], (instregex "VPMULLWrr")>;
+def: InstRW<[SBWriteResGroup20], (instregex "VPMULUDQrr")>;
 def: InstRW<[SBWriteResGroup20], (instregex "VPSADBWrr")>;
 
 def SBWriteResGroup21 : SchedWriteRes<[SBPort1]> {
@@ -899,92 +960,99 @@ def SBWriteResGroup21 : SchedWriteRes<[S
   let NumMicroOps = 1;
   let ResourceCycles = [1];
 }
-def: InstRW<[SBWriteResGroup21], (instregex "ADDPDrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "ADDPSrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "ADDSDrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "ADDSSrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "ADDSUBPDrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "ADDSUBPSrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "ADD_FPrST0")>;
-def: InstRW<[SBWriteResGroup21], (instregex "ADD_FST0r")>;
-def: InstRW<[SBWriteResGroup21], (instregex "ADD_FrST0")>;
-def: InstRW<[SBWriteResGroup21], (instregex "BSF32rr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "BSR32rr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "CMPPDrri")>;
-def: InstRW<[SBWriteResGroup21], (instregex "CMPPSrri")>;
-def: InstRW<[SBWriteResGroup21], (instregex "CMPSDrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "CMPSSrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "CRC32r32r32")>;
-def: InstRW<[SBWriteResGroup21], (instregex "CRC32r32r8")>;
-def: InstRW<[SBWriteResGroup21], (instregex "CVTDQ2PSrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "CVTPS2DQrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "CVTTPS2DQrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "MAXPDrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "MAXPSrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "MAXSDrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "MAXSSrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "MINPDrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "MINPSrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "MINSDrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "MINSSrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "MMX_CVTPI2PSirr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "MMX_CVTPS2PIirr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "MMX_CVTTPS2PIirr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "MUL8r")>;
-def: InstRW<[SBWriteResGroup21], (instregex "POPCNT32rr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "ROUNDPDr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "ROUNDPSr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "ROUNDSDr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "ROUNDSSr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "SUBPDrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "SUBPSrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "SUBR_FPrST0")>;
-def: InstRW<[SBWriteResGroup21], (instregex "SUBR_FST0r")>;
-def: InstRW<[SBWriteResGroup21], (instregex "SUBR_FrST0")>;
-def: InstRW<[SBWriteResGroup21], (instregex "SUBSDrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "SUBSSrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "SUB_FPrST0")>;
-def: InstRW<[SBWriteResGroup21], (instregex "SUB_FST0r")>;
-def: InstRW<[SBWriteResGroup21], (instregex "SUB_FrST0")>;
-def: InstRW<[SBWriteResGroup21], (instregex "VADDPDYrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "VADDPDrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "VADDPSYrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "VADDPSrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "VADDSDrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "VADDSSrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "VADDSUBPDYrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "VADDSUBPDrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "VADDSUBPSYrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "VADDSUBPSrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "VBROADCASTF128")>;
-def: InstRW<[SBWriteResGroup21], (instregex "VCMPPDYrri")>;
-def: InstRW<[SBWriteResGroup21], (instregex "VCMPPDrri")>;
-def: InstRW<[SBWriteResGroup21], (instregex "VCMPPSYrri")>;
-def: InstRW<[SBWriteResGroup21], (instregex "VCMPPSrri")>;
-def: InstRW<[SBWriteResGroup21], (instregex "VCMPSDrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "VCMPSSrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "VCVTDQ2PSYrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "VCVTDQ2PSrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "VCVTPS2DQYrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "VCVTPS2DQrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "VCVTTPS2DQrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "VMAXPDYrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "VMAXPDrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "VMAXPSYrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "VMAXPSrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "VMAXSDrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "VMAXSSrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "VMINPDrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "VMINPSrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "VMINSDrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "VMINSSrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "VROUNDPDr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "VROUNDPSr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "VROUNDSDr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "VSUBPDYrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "VSUBPDrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "VSUBPSYrr")>;
-def: InstRW<[SBWriteResGroup21], (instregex "VSUBPSrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "ADDPDrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "ADDPSrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "ADDSDrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "ADDSSrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "ADDSUBPDrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "ADDSUBPSrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "ADD_FPrST0")>;
+def: InstRW<[SBWriteResGroup21], (instregex "ADD_FST0r")>;
+def: InstRW<[SBWriteResGroup21], (instregex "ADD_FrST0")>;
+def: InstRW<[SBWriteResGroup21], (instregex "BSF(16|32|64)rr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "BSR(16|32|64)rr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "CMPPDrri")>;
+def: InstRW<[SBWriteResGroup21], (instregex "CMPPSrri")>;
+def: InstRW<[SBWriteResGroup21], (instregex "CMPSSrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "CRC32r(16|32|64)r8")>;
+def: InstRW<[SBWriteResGroup21], (instregex "CRC32r(16|32|64)r64")>;
+def: InstRW<[SBWriteResGroup21], (instregex "CVTDQ2PSrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "CVTPS2DQrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "CVTTPS2DQrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "MAXPDrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "MAXPSrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "MAXSDrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "MAXSSrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "MINPDrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "MINPSrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "MINSDrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "MINSSrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "MMX_CVTPI2PSirr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "MMX_CVTPS2PIirr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "MMX_CVTTPS2PIirr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "MUL8r")>;
+def: InstRW<[SBWriteResGroup21], (instregex "POPCNT(16|32|64)rr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "PUSHFS64")>;
+def: InstRW<[SBWriteResGroup21], (instregex "ROUNDPDr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "ROUNDPSr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "ROUNDSDr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "ROUNDSSr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "SUBPDrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "SUBPSrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "SUBR_FPrST0")>;
+def: InstRW<[SBWriteResGroup21], (instregex "SUBR_FST0r")>;
+def: InstRW<[SBWriteResGroup21], (instregex "SUBR_FrST0")>;
+def: InstRW<[SBWriteResGroup21], (instregex "SUBSDrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "SUBSSrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "SUB_FPrST0")>;
+def: InstRW<[SBWriteResGroup21], (instregex "SUB_FST0r")>;
+def: InstRW<[SBWriteResGroup21], (instregex "SUB_FrST0")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VADDPDYrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VADDPDrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VADDPSYrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VADDPSrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VADDSDrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VADDSSrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VADDSUBPDYrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VADDSUBPDrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VADDSUBPSYrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VADDSUBPSrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VCMPPDYrri")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VCMPPDrri")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VCMPPSYrri")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VCMPPSrri")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VCMPSDrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VCMPSSrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VCVTDQ2PSYrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VCVTDQ2PSrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VCVTPS2DQYrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VCVTPS2DQrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VCVTTPS2DQYrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VCVTTPS2DQrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VMAXPDYrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VMAXPDrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VMAXPSYrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VMAXPSrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VMAXSDrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VMAXSSrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VMINPDYrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VMINPDrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VMINPSYrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VMINPSrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VMINSDrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VMINSSrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VROUNDPDr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VROUNDPSr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VROUNDSDr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VROUNDSSr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VROUNDYPDr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VROUNDYPSr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VSUBPDYrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VSUBPDrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VSUBPSYrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VSUBPSrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VSUBSDrr")>;
+def: InstRW<[SBWriteResGroup21], (instregex "VSUBSSrr")>;
 
 def SBWriteResGroup22 : SchedWriteRes<[SBPort0,SBPort5]> {
   let Latency = 3;
@@ -1007,8 +1075,22 @@ def: InstRW<[SBWriteResGroup23], (instre
 def: InstRW<[SBWriteResGroup23], (instregex "VPEXTRDrr")>;
 def: InstRW<[SBWriteResGroup23], (instregex "VPEXTRQrr")>;
 def: InstRW<[SBWriteResGroup23], (instregex "VPEXTRWri")>;
-def: InstRW<[SBWriteResGroup23], (instregex "SHL64rCL")>;
-def: InstRW<[SBWriteResGroup23], (instregex "SHL8rCL")>;
+
+def SBWriteResGroup23_2 : SchedWriteRes<[SBPort05]> {
+  let Latency = 3;
+  let NumMicroOps = 3;
+  let ResourceCycles = [3];
+}
+def: InstRW<[SBWriteResGroup23_2], (instregex "ROL(16|32|64)rCL")>;
+def: InstRW<[SBWriteResGroup23_2], (instregex "ROL8rCL")>;
+def: InstRW<[SBWriteResGroup23_2], (instregex "ROR(16|32|64)rCL")>;
+def: InstRW<[SBWriteResGroup23_2], (instregex "ROR8rCL")>;
+def: InstRW<[SBWriteResGroup23_2], (instregex "SAR(16|32|64)rCL")>;
+def: InstRW<[SBWriteResGroup23_2], (instregex "SAR8rCL")>;
+def: InstRW<[SBWriteResGroup23_2], (instregex "SHL(16|32|64)rCL")>;
+def: InstRW<[SBWriteResGroup23_2], (instregex "SHL8rCL")>;
+def: InstRW<[SBWriteResGroup23_2], (instregex "SHR(16|32|64)rCL")>;
+def: InstRW<[SBWriteResGroup23_2], (instregex "SHR8rCL")>;
 
 def SBWriteResGroup24 : SchedWriteRes<[SBPort15]> {
   let Latency = 3;
@@ -1039,24 +1121,52 @@ def SBWriteResGroup25 : SchedWriteRes<[S
   let NumMicroOps = 3;
   let ResourceCycles = [3];
 }
-def: InstRW<[SBWriteResGroup25], (instregex "LEAVE64")>;
-def: InstRW<[SBWriteResGroup25], (instregex "XADD32rr")>;
+def: InstRW<[SBWriteResGroup25], (instregex "ADC8i8")>;
+def: InstRW<[SBWriteResGroup25], (instregex "LEAVE64")>;
+def: InstRW<[SBWriteResGroup25], (instregex "OUT32rr")>;
+def: InstRW<[SBWriteResGroup25], (instregex "OUT8rr")>;
+def: InstRW<[SBWriteResGroup25], (instregex "SBB8i8")>;
+def: InstRW<[SBWriteResGroup25], (instregex "XADD(16|32|64)rr")>;
 def: InstRW<[SBWriteResGroup25], (instregex "XADD8rr")>;
 
-def SBWriteResGroup26 : SchedWriteRes<[SBPort0,SBPort015]> {
+def SBWriteResGroup25_2 : SchedWriteRes<[SBPort5,SBPort05]> {
+  let Latency = 3;
+  let NumMicroOps = 3;
+  let ResourceCycles = [2,1];
+}
+def: InstRW<[SBWriteResGroup25_2], (instregex "CMOVBE_F")>;
+def: InstRW<[SBWriteResGroup25_2], (instregex "CMOVB_F")>;
+def: InstRW<[SBWriteResGroup25_2], (instregex "CMOVE_F")>;
+def: InstRW<[SBWriteResGroup25_2], (instregex "CMOVNBE_F")>;
+def: InstRW<[SBWriteResGroup25_2], (instregex "CMOVNB_F")>;
+def: InstRW<[SBWriteResGroup25_2], (instregex "CMOVNE_F")>;
+def: InstRW<[SBWriteResGroup25_2], (instregex "CMOVNP_F")>;
+def: InstRW<[SBWriteResGroup25_2], (instregex "CMOVP_F")>;
+
+def SBWriteResGroup26 : SchedWriteRes<[SBPort05,SBPort015]> {
   let Latency = 3;
   let NumMicroOps = 3;
   let ResourceCycles = [2,1];
 }
-def: InstRW<[SBWriteResGroup26], (instregex "CMOVA32rr")>;
-def: InstRW<[SBWriteResGroup26], (instregex "CMOVBE32rr")>;
+def: InstRW<[SBWriteResGroup26], (instregex "CMOVA(16|32|64)rr")>;
+def: InstRW<[SBWriteResGroup26], (instregex "CMOVBE(16|32|64)rr")>;
+
+def SBWriteResGroup26_2 : SchedWriteRes<[SBPort0,SBPort1,SBPort5]> {
+  let Latency = 3;
+  let NumMicroOps = 3;
+  let ResourceCycles = [1,1,1];
+}
+def: InstRW<[SBWriteResGroup26_2], (instregex "COM_FIPr")>;
+def: InstRW<[SBWriteResGroup26_2], (instregex "COM_FIr")>;
+def: InstRW<[SBWriteResGroup26_2], (instregex "UCOM_FIPr")>;
+def: InstRW<[SBWriteResGroup26_2], (instregex "UCOM_FIr")>;
 
 def SBWriteResGroup27 : SchedWriteRes<[SBPort0,SBPort1]> {
   let Latency = 4;
   let NumMicroOps = 2;
   let ResourceCycles = [1,1];
 }
-def: InstRW<[SBWriteResGroup27], (instregex "MUL64r")>;
+def: InstRW<[SBWriteResGroup27], (instregex "MUL(16|32|64)r")>;
 
 def SBWriteResGroup28 : SchedWriteRes<[SBPort1,SBPort5]> {
   let Latency = 4;
@@ -1079,6 +1189,7 @@ def: InstRW<[SBWriteResGroup28], (instre
 def: InstRW<[SBWriteResGroup28], (instregex "VCVTPD2DQrr")>;
 def: InstRW<[SBWriteResGroup28], (instregex "VCVTPD2PSYrr")>;
 def: InstRW<[SBWriteResGroup28], (instregex "VCVTPD2PSrr")>;
+def: InstRW<[SBWriteResGroup28], (instregex "VCVTSD2SSrr")>;
 def: InstRW<[SBWriteResGroup28], (instregex "VCVTSI2SD64rr")>;
 def: InstRW<[SBWriteResGroup28], (instregex "VCVTSI2SDrr")>;
 def: InstRW<[SBWriteResGroup28], (instregex "VCVTTPD2DQYrr")>;
@@ -1089,8 +1200,24 @@ def SBWriteResGroup29 : SchedWriteRes<[S
   let NumMicroOps = 2;
   let ResourceCycles = [1,1];
 }
-def: InstRW<[SBWriteResGroup29], (instregex "MOV64sr")>;
-def: InstRW<[SBWriteResGroup29], (instregex "PAUSE")>;
+def: InstRW<[SBWriteResGroup29], (instregex "MOV64sr")>;
+
+def SBWriteResGroup29_2 : SchedWriteRes<[SBPort5,SBPort015]> {
+  let Latency = 4;
+  let NumMicroOps = 4;
+  let ResourceCycles = [1,3];
+}
+def: InstRW<[SBWriteResGroup29_2], (instregex "OUT32ir")>;
+def: InstRW<[SBWriteResGroup29_2], (instregex "OUT8ir")>;
+def: InstRW<[SBWriteResGroup29_2], (instregex "PAUSE")>;
+
+def SBWriteResGroup29_3 : SchedWriteRes<[SBPort05,SBPort015]> {
+  let Latency = 4;
+  let NumMicroOps = 4;
+  let ResourceCycles = [3,1];
+}
+def: InstRW<[SBWriteResGroup29_3], (instregex "SHLD(16|32|64)rrCL")>;
+def: InstRW<[SBWriteResGroup29_3], (instregex "SHRD(16|32|64)rrCL")>;
 
 def SBWriteResGroup30 : SchedWriteRes<[SBPort0]> {
   let Latency = 5;
@@ -1118,6 +1245,8 @@ def: InstRW<[SBWriteResGroup30], (instre
 def: InstRW<[SBWriteResGroup30], (instregex "VMULSSrr")>;
 def: InstRW<[SBWriteResGroup30], (instregex "VPCMPGTQrr")>;
 def: InstRW<[SBWriteResGroup30], (instregex "VPHMINPOSUWrr128")>;
+def: InstRW<[SBWriteResGroup30], (instregex "VRCPPSr")>;
+def: InstRW<[SBWriteResGroup30], (instregex "VRCPSSr")>;
 def: InstRW<[SBWriteResGroup30], (instregex "VRSQRTPSr")>;
 def: InstRW<[SBWriteResGroup30], (instregex "VRSQRTSSr")>;
 
@@ -1126,33 +1255,35 @@ def SBWriteResGroup31 : SchedWriteRes<[S
   let NumMicroOps = 1;
   let ResourceCycles = [1];
 }
-def: InstRW<[SBWriteResGroup31], (instregex "MOV32rm")>;
-def: InstRW<[SBWriteResGroup31], (instregex "MOV8rm")>;
-def: InstRW<[SBWriteResGroup31], (instregex "MOVSX32rm16")>;
-def: InstRW<[SBWriteResGroup31], (instregex "MOVSX32rm8")>;
-def: InstRW<[SBWriteResGroup31], (instregex "MOVZX32rm16")>;
-def: InstRW<[SBWriteResGroup31], (instregex "MOVZX32rm8")>;
-def: InstRW<[SBWriteResGroup31], (instregex "PREFETCH")>;
+def: InstRW<[SBWriteResGroup31], (instregex "MOV(16|32|64)rm")>;
+def: InstRW<[SBWriteResGroup31], (instregex "MOV8rm")>;
+def: InstRW<[SBWriteResGroup31], (instregex "MOVSX(16|32|64)rm16")>;
+def: InstRW<[SBWriteResGroup31], (instregex "MOVSX(16|32|64)rm32")>;
+def: InstRW<[SBWriteResGroup31], (instregex "MOVSX(16|32|64)rm8")>;
+def: InstRW<[SBWriteResGroup31], (instregex "MOVZX(16|32|64)rm16")>;
+def: InstRW<[SBWriteResGroup31], (instregex "MOVZX(16|32|64)rm8")>;
+def: InstRW<[SBWriteResGroup31], (instregex "PREFETCH")>;
 
 def SBWriteResGroup32 : SchedWriteRes<[SBPort0,SBPort1]> {
   let Latency = 5;
   let NumMicroOps = 2;
   let ResourceCycles = [1,1];
 }
-def: InstRW<[SBWriteResGroup32], (instregex "CVTSD2SI64rr")>;
-def: InstRW<[SBWriteResGroup32], (instregex "CVTSD2SIrr")>;
-def: InstRW<[SBWriteResGroup32], (instregex "CVTSS2SI64rr")>;
-def: InstRW<[SBWriteResGroup32], (instregex "CVTSS2SIrr")>;
-def: InstRW<[SBWriteResGroup32], (instregex "CVTTSD2SI64rr")>;
-def: InstRW<[SBWriteResGroup32], (instregex "CVTTSD2SIrr")>;
-def: InstRW<[SBWriteResGroup32], (instregex "CVTTSS2SI64rr")>;
-def: InstRW<[SBWriteResGroup32], (instregex "CVTTSS2SIrr")>;
-def: InstRW<[SBWriteResGroup32], (instregex "VCVTSD2SI64rr")>;
-def: InstRW<[SBWriteResGroup32], (instregex "VCVTSS2SI64rr")>;
-def: InstRW<[SBWriteResGroup32], (instregex "VCVTSS2SIrr")>;
-def: InstRW<[SBWriteResGroup32], (instregex "VCVTTSD2SI64rr")>;
-def: InstRW<[SBWriteResGroup32], (instregex "VCVTTSD2SIrr")>;
-def: InstRW<[SBWriteResGroup32], (instregex "VCVTTSS2SI64rr")>;
+def: InstRW<[SBWriteResGroup32], (instregex "CVTSD2SI64rr")>;
+def: InstRW<[SBWriteResGroup32], (instregex "CVTSD2SIrr")>;
+def: InstRW<[SBWriteResGroup32], (instregex "CVTSS2SI64rr")>;
+def: InstRW<[SBWriteResGroup32], (instregex "CVTSS2SIrr")>;
+def: InstRW<[SBWriteResGroup32], (instregex "CVTTSD2SI64rr")>;
+def: InstRW<[SBWriteResGroup32], (instregex "CVTTSD2SIrr")>;
+def: InstRW<[SBWriteResGroup32], (instregex "CVTTSS2SI64rr")>;
+def: InstRW<[SBWriteResGroup32], (instregex "CVTTSS2SIrr")>;
+def: InstRW<[SBWriteResGroup32], (instregex "VCVTSD2SI64rr")>;
+def: InstRW<[SBWriteResGroup32], (instregex "VCVTSD2SIrr")>;
+def: InstRW<[SBWriteResGroup32], (instregex "VCVTSS2SI64rr")>;
+def: InstRW<[SBWriteResGroup32], (instregex "VCVTSS2SIrr")>;
+def: InstRW<[SBWriteResGroup32], (instregex "VCVTTSD2SI64rr")>;
+def: InstRW<[SBWriteResGroup32], (instregex "VCVTTSD2SIrr")>;
+def: InstRW<[SBWriteResGroup32], (instregex "VCVTTSS2SI64rr")>;
 def: InstRW<[SBWriteResGroup32], (instregex "VCVTTSS2SIrr")>;
 
 def SBWriteResGroup33 : SchedWriteRes<[SBPort4,SBPort23]> {
@@ -1160,7 +1291,7 @@ def SBWriteResGroup33 : SchedWriteRes<[S
   let NumMicroOps = 2;
   let ResourceCycles = [1,1];
 }
-def: InstRW<[SBWriteResGroup33], (instregex "MOV64mr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "MOV(16|32|64)mr")>;
 def: InstRW<[SBWriteResGroup33], (instregex "MOV8mr")>;
 def: InstRW<[SBWriteResGroup33], (instregex "MOVAPDmr")>;
 def: InstRW<[SBWriteResGroup33], (instregex "MOVAPSmr")>;
@@ -1171,18 +1302,18 @@ def: InstRW<[SBWriteResGroup33], (instre
 def: InstRW<[SBWriteResGroup33], (instregex "MOVLPDmr")>;
 def: InstRW<[SBWriteResGroup33], (instregex "MOVLPSmr")>;
 def: InstRW<[SBWriteResGroup33], (instregex "MOVNTDQmr")>;
-def: InstRW<[SBWriteResGroup33], (instregex "MOVNTI_64mr")>;
-def: InstRW<[SBWriteResGroup33], (instregex "MOVNTImr")>;
-def: InstRW<[SBWriteResGroup33], (instregex "MOVNTPDmr")>;
-def: InstRW<[SBWriteResGroup33], (instregex "MOVNTPSmr")>;
-def: InstRW<[SBWriteResGroup33], (instregex "MOVPDI2DImr")>;
-def: InstRW<[SBWriteResGroup33], (instregex "MOVPQI2QImr")>;
-def: InstRW<[SBWriteResGroup33], (instregex "MOVPQIto64mr")>;
-def: InstRW<[SBWriteResGroup33], (instregex "MOVSSmr")>;
-def: InstRW<[SBWriteResGroup33], (instregex "MOVUPDmr")>;
-def: InstRW<[SBWriteResGroup33], (instregex "MOVUPSmr")>;
-def: InstRW<[SBWriteResGroup33], (instregex "PUSH64i8")>;
-def: InstRW<[SBWriteResGroup33], (instregex "PUSH64r")>;
+def: InstRW<[SBWriteResGroup33], (instregex "MOVNTI_64mr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "MOVNTImr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "MOVNTPDmr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "MOVNTPSmr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "MOVPDI2DImr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "MOVPQI2QImr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "MOVPQIto64mr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "MOVSSmr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "MOVUPDmr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "MOVUPSmr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "PUSH64i8")>;
+def: InstRW<[SBWriteResGroup33], (instregex "PUSH(16|32|64)r")>;
 def: InstRW<[SBWriteResGroup33], (instregex "VEXTRACTF128mr")>;
 def: InstRW<[SBWriteResGroup33], (instregex "VMOVAPDYmr")>;
 def: InstRW<[SBWriteResGroup33], (instregex "VMOVAPDmr")>;
@@ -1225,29 +1356,41 @@ def SBWriteResGroup35 : SchedWriteRes<[S
   let NumMicroOps = 3;
   let ResourceCycles = [1,2];
 }
-def: InstRW<[SBWriteResGroup35], (instregex "CLI")>;
-def: InstRW<[SBWriteResGroup35], (instregex "CVTSI2SS64rr")>;
-def: InstRW<[SBWriteResGroup35], (instregex "CVTSI2SSrr")>;
-def: InstRW<[SBWriteResGroup35], (instregex "HADDPDrr")>;
-def: InstRW<[SBWriteResGroup35], (instregex "HADDPSrr")>;
-def: InstRW<[SBWriteResGroup35], (instregex "HSUBPDrr")>;
-def: InstRW<[SBWriteResGroup35], (instregex "HSUBPSrr")>;
-def: InstRW<[SBWriteResGroup35], (instregex "VCVTSI2SS64rr")>;
-def: InstRW<[SBWriteResGroup35], (instregex "VCVTSI2SSrr")>;
-def: InstRW<[SBWriteResGroup35], (instregex "VHADDPDrr")>;
-def: InstRW<[SBWriteResGroup35], (instregex "VHADDPSYrr")>;
-def: InstRW<[SBWriteResGroup35], (instregex "VHADDPSrr")>;
-def: InstRW<[SBWriteResGroup35], (instregex "VHSUBPDYrr")>;
-def: InstRW<[SBWriteResGroup35], (instregex "VHSUBPDrr")>;
-def: InstRW<[SBWriteResGroup35], (instregex "VHSUBPSYrr")>;
+def: InstRW<[SBWriteResGroup35], (instregex "CLI")>;
+def: InstRW<[SBWriteResGroup35], (instregex "CVTSI2SS64rr")>;
+def: InstRW<[SBWriteResGroup35], (instregex "CVTSI2SSrr")>;
+def: InstRW<[SBWriteResGroup35], (instregex "HADDPDrr")>;
+def: InstRW<[SBWriteResGroup35], (instregex "HADDPSrr")>;
+def: InstRW<[SBWriteResGroup35], (instregex "HSUBPDrr")>;
+def: InstRW<[SBWriteResGroup35], (instregex "HSUBPSrr")>;
+def: InstRW<[SBWriteResGroup35], (instregex "VCVTSI2SS64rr")>;
+def: InstRW<[SBWriteResGroup35], (instregex "VCVTSI2SSrr")>;
+def: InstRW<[SBWriteResGroup35], (instregex "VHADDPDYrr")>;
+def: InstRW<[SBWriteResGroup35], (instregex "VHADDPDrr")>;
+def: InstRW<[SBWriteResGroup35], (instregex "VHADDPSYrr")>;
+def: InstRW<[SBWriteResGroup35], (instregex "VHADDPSrr")>;
+def: InstRW<[SBWriteResGroup35], (instregex "VHSUBPDYrr")>;
+def: InstRW<[SBWriteResGroup35], (instregex "VHSUBPDrr")>;
+def: InstRW<[SBWriteResGroup35], (instregex "VHSUBPSYrr")>;
 def: InstRW<[SBWriteResGroup35], (instregex "VHSUBPSrr")>;
 
+def SBWriteResGroup35_2 : SchedWriteRes<[SBPort1,SBPort4,SBPort23]> {
+  let Latency = 5;
+  let NumMicroOps = 3;
+  let ResourceCycles = [1,1,1];
+}
+def: InstRW<[SBWriteResGroup35_2], (instregex "ISTT_FP16m")>;
+def: InstRW<[SBWriteResGroup35_2], (instregex "ISTT_FP32m")>;
+def: InstRW<[SBWriteResGroup35_2], (instregex "ISTT_FP64m")>;
+def: InstRW<[SBWriteResGroup35_2], (instregex "PUSHGS64")>;
+
 def SBWriteResGroup36 : SchedWriteRes<[SBPort4,SBPort5,SBPort23]> {
   let Latency = 5;
   let NumMicroOps = 3;
   let ResourceCycles = [1,1,1];
 }
-def: InstRW<[SBWriteResGroup36], (instregex "CALL64r")>;
+def: InstRW<[SBWriteResGroup36], (instregex "CALL64pcrel32")>;
+def: InstRW<[SBWriteResGroup36], (instregex "CALL(16|32|64)r")>;
 def: InstRW<[SBWriteResGroup36], (instregex "EXTRACTPSmr")>;
 def: InstRW<[SBWriteResGroup36], (instregex "VEXTRACTPSmr")>;
 
@@ -1256,11 +1399,12 @@ def SBWriteResGroup37 : SchedWriteRes<[S
   let NumMicroOps = 3;
   let ResourceCycles = [1,1,1];
 }
-def: InstRW<[SBWriteResGroup37], (instregex "VMASKMOVPDYrm")>;
-def: InstRW<[SBWriteResGroup37], (instregex "VMASKMOVPDmr")>;
-def: InstRW<[SBWriteResGroup37], (instregex "VMASKMOVPSmr")>;
+def: InstRW<[SBWriteResGroup37], (instregex "VMASKMOVPDYmr")>;
+def: InstRW<[SBWriteResGroup37], (instregex "VMASKMOVPDmr")>;
+def: InstRW<[SBWriteResGroup37], (instregex "VMASKMOVPSYmr")>;
+def: InstRW<[SBWriteResGroup37], (instregex "VMASKMOVPSmr")>;
 
-def SBWriteResGroup38 : SchedWriteRes<[SBPort4,SBPort23,SBPort0]> {
+def SBWriteResGroup38 : SchedWriteRes<[SBPort4,SBPort23,SBPort05]> {
   let Latency = 5;
   let NumMicroOps = 3;
   let ResourceCycles = [1,1,1];
@@ -1308,15 +1452,15 @@ def SBWriteResGroup41 : SchedWriteRes<[S
 }
 def: InstRW<[SBWriteResGroup41], (instregex "FNINIT")>;
 
-def SBWriteResGroup42 : SchedWriteRes<[SBPort0,SBPort015]> {
+def SBWriteResGroup42 : SchedWriteRes<[SBPort05,SBPort015]> {
   let Latency = 5;
   let NumMicroOps = 4;
   let ResourceCycles = [1,3];
 }
-def: InstRW<[SBWriteResGroup42], (instregex "CMPXCHG32rr")>;
+def: InstRW<[SBWriteResGroup42], (instregex "CMPXCHG(16|32|64)rr")>;
 def: InstRW<[SBWriteResGroup42], (instregex "CMPXCHG8rr")>;
 
-def SBWriteResGroup43 : SchedWriteRes<[SBPort4,SBPort23,SBPort0]> {
+def SBWriteResGroup43 : SchedWriteRes<[SBPort4,SBPort23,SBPort05]> {
   let Latency = 5;
   let NumMicroOps = 4;
   let ResourceCycles = [1,1,2];
@@ -1379,7 +1523,7 @@ def: InstRW<[SBWriteResGroup48], (instre
 def: InstRW<[SBWriteResGroup48], (instregex "MOVSSrm")>;
 def: InstRW<[SBWriteResGroup48], (instregex "MOVUPDrm")>;
 def: InstRW<[SBWriteResGroup48], (instregex "MOVUPSrm")>;
-def: InstRW<[SBWriteResGroup48], (instregex "POP64r")>;
+def: InstRW<[SBWriteResGroup48], (instregex "POP(16|32|64)r")>;
 def: InstRW<[SBWriteResGroup48], (instregex "VBROADCASTSSrm")>;
 def: InstRW<[SBWriteResGroup48], (instregex "VLDDQUYrm")>;
 def: InstRW<[SBWriteResGroup48], (instregex "VLDDQUrm")>;
@@ -1404,15 +1548,15 @@ def SBWriteResGroup49 : SchedWriteRes<[S
   let NumMicroOps = 2;
   let ResourceCycles = [1,1];
 }
-def: InstRW<[SBWriteResGroup49], (instregex "JMP64m")>;
+def: InstRW<[SBWriteResGroup49], (instregex "JMP(16|32|64)m")>;
 def: InstRW<[SBWriteResGroup49], (instregex "MOV64sm")>;
 
-def SBWriteResGroup50 : SchedWriteRes<[SBPort23,SBPort0]> {
+def SBWriteResGroup50 : SchedWriteRes<[SBPort23,SBPort05]> {
   let Latency = 6;
   let NumMicroOps = 2;
   let ResourceCycles = [1,1];
 }
-def: InstRW<[SBWriteResGroup50], (instregex "BT64mi8")>;
+def: InstRW<[SBWriteResGroup50], (instregex "BT(16|32|64)mi8")>;
 
 def SBWriteResGroup51 : SchedWriteRes<[SBPort23,SBPort15]> {
   let Latency = 6;
@@ -1433,23 +1577,23 @@ def SBWriteResGroup52 : SchedWriteRes<[S
   let NumMicroOps = 2;
   let ResourceCycles = [1,1];
 }
-def: InstRW<[SBWriteResGroup52], (instregex "ADD64rm")>;
-def: InstRW<[SBWriteResGroup52], (instregex "ADD8rm")>;
-def: InstRW<[SBWriteResGroup52], (instregex "AND64rm")>;
-def: InstRW<[SBWriteResGroup52], (instregex "AND8rm")>;
-def: InstRW<[SBWriteResGroup52], (instregex "CMP64mi8")>;
-def: InstRW<[SBWriteResGroup52], (instregex "CMP64mr")>;
-def: InstRW<[SBWriteResGroup52], (instregex "CMP64rm")>;
-def: InstRW<[SBWriteResGroup52], (instregex "CMP8mi")>;
-def: InstRW<[SBWriteResGroup52], (instregex "CMP8mr")>;
-def: InstRW<[SBWriteResGroup52], (instregex "CMP8rm")>;
-def: InstRW<[SBWriteResGroup52], (instregex "LODSL")>;
-def: InstRW<[SBWriteResGroup52], (instregex "LODSQ")>;
-def: InstRW<[SBWriteResGroup52], (instregex "OR64rm")>;
-def: InstRW<[SBWriteResGroup52], (instregex "OR8rm")>;
-def: InstRW<[SBWriteResGroup52], (instregex "SUB64rm")>;
-def: InstRW<[SBWriteResGroup52], (instregex "SUB8rm")>;
-def: InstRW<[SBWriteResGroup52], (instregex "XOR64rm")>;
+def: InstRW<[SBWriteResGroup52], (instregex "ADD(16|32|64)rm")>;
+def: InstRW<[SBWriteResGroup52], (instregex "ADD8rm")>;
+def: InstRW<[SBWriteResGroup52], (instregex "AND(16|32|64)rm")>;
+def: InstRW<[SBWriteResGroup52], (instregex "AND8rm")>;
+def: InstRW<[SBWriteResGroup52], (instregex "CMP(16|32|64)mi8")>;
+def: InstRW<[SBWriteResGroup52], (instregex "CMP(16|32|64)mr")>;
+def: InstRW<[SBWriteResGroup52], (instregex "CMP(16|32|64)rm")>;
+def: InstRW<[SBWriteResGroup52], (instregex "CMP8mi")>;
+def: InstRW<[SBWriteResGroup52], (instregex "CMP8mr")>;
+def: InstRW<[SBWriteResGroup52], (instregex "CMP8rm")>;
+def: InstRW<[SBWriteResGroup52], (instregex "LODSL")>;
+def: InstRW<[SBWriteResGroup52], (instregex "LODSQ")>;
+def: InstRW<[SBWriteResGroup52], (instregex "OR(16|32|64)rm")>;
+def: InstRW<[SBWriteResGroup52], (instregex "OR8rm")>;
+def: InstRW<[SBWriteResGroup52], (instregex "SUB(16|32|64)rm")>;
+def: InstRW<[SBWriteResGroup52], (instregex "SUB8rm")>;
+def: InstRW<[SBWriteResGroup52], (instregex "XOR(16|32|64)rm")>;
 def: InstRW<[SBWriteResGroup52], (instregex "XOR8rm")>;
 
 def SBWriteResGroup53 : SchedWriteRes<[SBPort4,SBPort23]> {
@@ -1457,8 +1601,6 @@ def SBWriteResGroup53 : SchedWriteRes<[S
   let NumMicroOps = 3;
   let ResourceCycles = [1,2];
 }
-def: InstRW<[SBWriteResGroup53], (instregex "POP64rmm")>;
-def: InstRW<[SBWriteResGroup53], (instregex "PUSH64rmm")>;
 def: InstRW<[SBWriteResGroup53], (instregex "ST_F32m")>;
 def: InstRW<[SBWriteResGroup53], (instregex "ST_F64m")>;
 def: InstRW<[SBWriteResGroup53], (instregex "ST_FP32m")>;
@@ -1471,7 +1613,7 @@ def SBWriteResGroup54 : SchedWriteRes<[S
   let ResourceCycles = [1];
 }
 def: InstRW<[SBWriteResGroup54], (instregex "VBROADCASTSDYrm")>;
-def: InstRW<[SBWriteResGroup54], (instregex "VBROADCASTSSrm")>;
+def: InstRW<[SBWriteResGroup54], (instregex "VBROADCASTSSYrm")>;
 def: InstRW<[SBWriteResGroup54], (instregex "VMOVAPDYrm")>;
 def: InstRW<[SBWriteResGroup54], (instregex "VMOVAPSYrm")>;
 def: InstRW<[SBWriteResGroup54], (instregex "VMOVDDUPYrm")>;
@@ -1529,10 +1671,10 @@ def: InstRW<[SBWriteResGroup56], (instre
 def: InstRW<[SBWriteResGroup56], (instregex "VMOVLPSrm")>;
 def: InstRW<[SBWriteResGroup56], (instregex "VORPDrm")>;
 def: InstRW<[SBWriteResGroup56], (instregex "VORPSrm")>;
-def: InstRW<[SBWriteResGroup56], (instregex "VPERMILPDmi")>;
-def: InstRW<[SBWriteResGroup56], (instregex "VPERMILPDri")>;
-def: InstRW<[SBWriteResGroup56], (instregex "VPERMILPSmi")>;
-def: InstRW<[SBWriteResGroup56], (instregex "VPERMILPSri")>;
+def: InstRW<[SBWriteResGroup56], (instregex "VPERMILPDmi")>;
+def: InstRW<[SBWriteResGroup56], (instregex "VPERMILPDrm")>;
+def: InstRW<[SBWriteResGroup56], (instregex "VPERMILPSmi")>;
+def: InstRW<[SBWriteResGroup56], (instregex "VPERMILPSrm")>;
 def: InstRW<[SBWriteResGroup56], (instregex "VSHUFPDrmi")>;
 def: InstRW<[SBWriteResGroup56], (instregex "VSHUFPSrmi")>;
 def: InstRW<[SBWriteResGroup56], (instregex "VUNPCKHPDrm")>;
@@ -1549,16 +1691,16 @@ def SBWriteResGroup57 : SchedWriteRes<[S
   let NumMicroOps = 2;
   let ResourceCycles = [1,1];
 }
-def: InstRW<[SBWriteResGroup57], (instregex "AESDECLASTrr")>;
-def: InstRW<[SBWriteResGroup57], (instregex "AESDECrr")>;
-def: InstRW<[SBWriteResGroup57], (instregex "AESENCLASTrr")>;
-def: InstRW<[SBWriteResGroup57], (instregex "AESENCrr")>;
-def: InstRW<[SBWriteResGroup57], (instregex "KANDQrr")>;
-def: InstRW<[SBWriteResGroup57], (instregex "VAESDECLASTrr")>;
-def: InstRW<[SBWriteResGroup57], (instregex "VAESDECrr")>;
+def: InstRW<[SBWriteResGroup57], (instregex "AESDECLASTrr")>;
+def: InstRW<[SBWriteResGroup57], (instregex "AESDECrr")>;
+def: InstRW<[SBWriteResGroup57], (instregex "AESENCLASTrr")>;
+def: InstRW<[SBWriteResGroup57], (instregex "AESENCrr")>;
+def: InstRW<[SBWriteResGroup57], (instregex "VAESDECLASTrr")>;
+def: InstRW<[SBWriteResGroup57], (instregex "VAESDECrr")>;
+def: InstRW<[SBWriteResGroup57], (instregex "VAESENCLASTrr")>;
 def: InstRW<[SBWriteResGroup57], (instregex "VAESENCrr")>;
 
-def SBWriteResGroup58 : SchedWriteRes<[SBPort23,SBPort0]> {
+def SBWriteResGroup58 : SchedWriteRes<[SBPort23,SBPort05]> {
   let Latency = 7;
   let NumMicroOps = 2;
   let ResourceCycles = [1,1];
@@ -1744,12 +1886,12 @@ def: InstRW<[SBWriteResGroup60], (instre
 def: InstRW<[SBWriteResGroup60], (instregex "VPORrm")>;
 def: InstRW<[SBWriteResGroup60], (instregex "VPXORrm")>;
 
-def SBWriteResGroup61 : SchedWriteRes<[SBPort0,SBPort0]> {
+def SBWriteResGroup61 : SchedWriteRes<[SBPort0,SBPort05]> {
   let Latency = 7;
   let NumMicroOps = 3;
   let ResourceCycles = [2,1];
 }
-def: InstRW<[SBWriteResGroup61], (instregex "VRCPPSr")>;
+def: InstRW<[SBWriteResGroup61], (instregex "VRCPPSYr")>;
 def: InstRW<[SBWriteResGroup61], (instregex "VRSQRTPSYr")>;
 
 def SBWriteResGroup62 : SchedWriteRes<[SBPort5,SBPort23]> {
@@ -1775,28 +1917,28 @@ def SBWriteResGroup64 : SchedWriteRes<[S
 }
 def: InstRW<[SBWriteResGroup64], (instregex "FARJMP64")>;
 
-def SBWriteResGroup65 : SchedWriteRes<[SBPort23,SBPort0,SBPort015]> {
+def SBWriteResGroup65 : SchedWriteRes<[SBPort23,SBPort05,SBPort015]> {
   let Latency = 7;
   let NumMicroOps = 3;
   let ResourceCycles = [1,1,1];
 }
-def: InstRW<[SBWriteResGroup65], (instregex "ADC64rm")>;
-def: InstRW<[SBWriteResGroup65], (instregex "ADC8rm")>;
-def: InstRW<[SBWriteResGroup65], (instregex "CMOVAE64rm")>;
-def: InstRW<[SBWriteResGroup65], (instregex "CMOVB64rm")>;
-def: InstRW<[SBWriteResGroup65], (instregex "CMOVE64rm")>;
-def: InstRW<[SBWriteResGroup65], (instregex "CMOVG64rm")>;
-def: InstRW<[SBWriteResGroup65], (instregex "CMOVGE64rm")>;
-def: InstRW<[SBWriteResGroup65], (instregex "CMOVL64rm")>;
-def: InstRW<[SBWriteResGroup65], (instregex "CMOVLE64rm")>;
-def: InstRW<[SBWriteResGroup65], (instregex "CMOVNE64rm")>;
-def: InstRW<[SBWriteResGroup65], (instregex "CMOVNO64rm")>;
-def: InstRW<[SBWriteResGroup65], (instregex "CMOVNP64rm")>;
-def: InstRW<[SBWriteResGroup65], (instregex "CMOVNS64rm")>;
-def: InstRW<[SBWriteResGroup65], (instregex "CMOVO64rm")>;
-def: InstRW<[SBWriteResGroup65], (instregex "CMOVP64rm")>;
-def: InstRW<[SBWriteResGroup65], (instregex "CMOVS64rm")>;
-def: InstRW<[SBWriteResGroup65], (instregex "SBB64rm")>;
+def: InstRW<[SBWriteResGroup65], (instregex "ADC(16|32|64)rm")>;
+def: InstRW<[SBWriteResGroup65], (instregex "ADC8rm")>;
+def: InstRW<[SBWriteResGroup65], (instregex "CMOVAE(16|32|64)rm")>;
+def: InstRW<[SBWriteResGroup65], (instregex "CMOVB(16|32|64)rm")>;
+def: InstRW<[SBWriteResGroup65], (instregex "CMOVE(16|32|64)rm")>;
+def: InstRW<[SBWriteResGroup65], (instregex "CMOVG(16|32|64)rm")>;
+def: InstRW<[SBWriteResGroup65], (instregex "CMOVGE(16|32|64)rm")>;
+def: InstRW<[SBWriteResGroup65], (instregex "CMOVL(16|32|64)rm")>;
+def: InstRW<[SBWriteResGroup65], (instregex "CMOVLE(16|32|64)rm")>;
+def: InstRW<[SBWriteResGroup65], (instregex "CMOVNE(16|32|64)rm")>;
+def: InstRW<[SBWriteResGroup65], (instregex "CMOVNO(16|32|64)rm")>;
+def: InstRW<[SBWriteResGroup65], (instregex "CMOVNP(16|32|64)rm")>;
+def: InstRW<[SBWriteResGroup65], (instregex "CMOVNS(16|32|64)rm")>;
+def: InstRW<[SBWriteResGroup65], (instregex "CMOVO(16|32|64)rm")>;
+def: InstRW<[SBWriteResGroup65], (instregex "CMOVP(16|32|64)rm")>;
+def: InstRW<[SBWriteResGroup65], (instregex "CMOVS(16|32|64)rm")>;
+def: InstRW<[SBWriteResGroup65], (instregex "SBB(16|32|64)rm")>;
 def: InstRW<[SBWriteResGroup65], (instregex "SBB8rm")>;
 
 def SBWriteResGroup66 : SchedWriteRes<[SBPort0,SBPort4,SBPort23]> {
@@ -1811,69 +1953,69 @@ def SBWriteResGroup67 : SchedWriteRes<[S
   let NumMicroOps = 4;
   let ResourceCycles = [1,2,1];
 }
-def: InstRW<[SBWriteResGroup67], (instregex "SLDT32r")>;
-def: InstRW<[SBWriteResGroup67], (instregex "STR32r")>;
+def: InstRW<[SBWriteResGroup67], (instregex "SLDT(16|32|64)r")>;
+def: InstRW<[SBWriteResGroup67], (instregex "STR(16|32|64)r")>;
 
 def SBWriteResGroup68 : SchedWriteRes<[SBPort4,SBPort5,SBPort23]> {
   let Latency = 7;
   let NumMicroOps = 4;
   let ResourceCycles = [1,1,2];
 }
-def: InstRW<[SBWriteResGroup68], (instregex "CALL64m")>;
+def: InstRW<[SBWriteResGroup68], (instregex "CALL(16|32|64)m")>;
 def: InstRW<[SBWriteResGroup68], (instregex "FNSTCW16m")>;
 
-def SBWriteResGroup69 : SchedWriteRes<[SBPort4,SBPort23,SBPort0]> {
+def SBWriteResGroup69 : SchedWriteRes<[SBPort4,SBPort23,SBPort05]> {
   let Latency = 7;
   let NumMicroOps = 4;
   let ResourceCycles = [1,2,1];
 }
-def: InstRW<[SBWriteResGroup69], (instregex "BTC64mi8")>;
-def: InstRW<[SBWriteResGroup69], (instregex "BTR64mi8")>;
-def: InstRW<[SBWriteResGroup69], (instregex "BTS64mi8")>;
-def: InstRW<[SBWriteResGroup69], (instregex "SAR64mi")>;
-def: InstRW<[SBWriteResGroup69], (instregex "SAR8mi")>;
-def: InstRW<[SBWriteResGroup69], (instregex "SHL64m1")>;
-def: InstRW<[SBWriteResGroup69], (instregex "SHL64mi")>;
-def: InstRW<[SBWriteResGroup69], (instregex "SHL8m1")>;
-def: InstRW<[SBWriteResGroup69], (instregex "SHL8mi")>;
-def: InstRW<[SBWriteResGroup69], (instregex "SHR64mi")>;
-def: InstRW<[SBWriteResGroup69], (instregex "SHR8mi")>;
+def: InstRW<[SBWriteResGroup69], (instregex "BTC(16|32|64)mi8")>;
+def: InstRW<[SBWriteResGroup69], (instregex "BTR(16|32|64)mi8")>;
+def: InstRW<[SBWriteResGroup69], (instregex "BTS(16|32|64)mi8")>;
+def: InstRW<[SBWriteResGroup69], (instregex "SAR(16|32|64)mi")>;
+def: InstRW<[SBWriteResGroup69], (instregex "SAR8mi")>;
+def: InstRW<[SBWriteResGroup69], (instregex "SHL(16|32|64)m1")>;
+def: InstRW<[SBWriteResGroup69], (instregex "SHL(16|32|64)mi")>;
+def: InstRW<[SBWriteResGroup69], (instregex "SHL8m1")>;
+def: InstRW<[SBWriteResGroup69], (instregex "SHL8mi")>;
+def: InstRW<[SBWriteResGroup69], (instregex "SHR(16|32|64)mi")>;
+def: InstRW<[SBWriteResGroup69], (instregex "SHR8mi")>;
 
 def SBWriteResGroup70 : SchedWriteRes<[SBPort4,SBPort23,SBPort015]> {
   let Latency = 7;
   let NumMicroOps = 4;
   let ResourceCycles = [1,2,1];
 }
-def: InstRW<[SBWriteResGroup70], (instregex "ADD64mi8")>;
-def: InstRW<[SBWriteResGroup70], (instregex "ADD64mr")>;
-def: InstRW<[SBWriteResGroup70], (instregex "ADD8mi")>;
-def: InstRW<[SBWriteResGroup70], (instregex "ADD8mr")>;
-def: InstRW<[SBWriteResGroup70], (instregex "AND64mi8")>;
-def: InstRW<[SBWriteResGroup70], (instregex "AND64mr")>;
-def: InstRW<[SBWriteResGroup70], (instregex "AND8mi")>;
-def: InstRW<[SBWriteResGroup70], (instregex "AND8mr")>;
-def: InstRW<[SBWriteResGroup70], (instregex "DEC64m")>;
-def: InstRW<[SBWriteResGroup70], (instregex "DEC8m")>;
-def: InstRW<[SBWriteResGroup70], (instregex "INC64m")>;
-def: InstRW<[SBWriteResGroup70], (instregex "INC8m")>;
-def: InstRW<[SBWriteResGroup70], (instregex "NEG64m")>;
-def: InstRW<[SBWriteResGroup70], (instregex "NEG8m")>;
-def: InstRW<[SBWriteResGroup70], (instregex "NOT64m")>;
-def: InstRW<[SBWriteResGroup70], (instregex "NOT8m")>;
-def: InstRW<[SBWriteResGroup70], (instregex "OR64mi8")>;
-def: InstRW<[SBWriteResGroup70], (instregex "OR64mr")>;
-def: InstRW<[SBWriteResGroup70], (instregex "OR8mi")>;
-def: InstRW<[SBWriteResGroup70], (instregex "OR8mr")>;
-def: InstRW<[SBWriteResGroup70], (instregex "SUB64mi8")>;
-def: InstRW<[SBWriteResGroup70], (instregex "SUB64mr")>;
-def: InstRW<[SBWriteResGroup70], (instregex "SUB8mi")>;
-def: InstRW<[SBWriteResGroup70], (instregex "SUB8mr")>;
-def: InstRW<[SBWriteResGroup70], (instregex "TEST64rm")>;
-def: InstRW<[SBWriteResGroup70], (instregex "TEST8mi")>;
-def: InstRW<[SBWriteResGroup70], (instregex "TEST8rm")>;
-def: InstRW<[SBWriteResGroup70], (instregex "XOR64mi8")>;
-def: InstRW<[SBWriteResGroup70], (instregex "XOR64mr")>;
-def: InstRW<[SBWriteResGroup70], (instregex "XOR8mi")>;
+def: InstRW<[SBWriteResGroup70], (instregex "ADD(16|32|64)mi8")>;
+def: InstRW<[SBWriteResGroup70], (instregex "ADD(16|32|64)mr")>;
+def: InstRW<[SBWriteResGroup70], (instregex "ADD8mi")>;
+def: InstRW<[SBWriteResGroup70], (instregex "ADD8mr")>;
+def: InstRW<[SBWriteResGroup70], (instregex "AND(16|32|64)mi8")>;
+def: InstRW<[SBWriteResGroup70], (instregex "AND(16|32|64)mr")>;
+def: InstRW<[SBWriteResGroup70], (instregex "AND8mi")>;
+def: InstRW<[SBWriteResGroup70], (instregex "AND8mr")>;
+def: InstRW<[SBWriteResGroup70], (instregex "DEC(16|32|64)m")>;
+def: InstRW<[SBWriteResGroup70], (instregex "DEC8m")>;
+def: InstRW<[SBWriteResGroup70], (instregex "INC(16|32|64)m")>;
+def: InstRW<[SBWriteResGroup70], (instregex "INC8m")>;
+def: InstRW<[SBWriteResGroup70], (instregex "NEG(16|32|64)m")>;
+def: InstRW<[SBWriteResGroup70], (instregex "NEG8m")>;
+def: InstRW<[SBWriteResGroup70], (instregex "NOT(16|32|64)m")>;
+def: InstRW<[SBWriteResGroup70], (instregex "NOT8m")>;
+def: InstRW<[SBWriteResGroup70], (instregex "OR(16|32|64)mi8")>;
+def: InstRW<[SBWriteResGroup70], (instregex "OR(16|32|64)mr")>;
+def: InstRW<[SBWriteResGroup70], (instregex "OR8mi")>;
+def: InstRW<[SBWriteResGroup70], (instregex "OR8mr")>;
+def: InstRW<[SBWriteResGroup70], (instregex "SUB(16|32|64)mi8")>;
+def: InstRW<[SBWriteResGroup70], (instregex "SUB(16|32|64)mr")>;
+def: InstRW<[SBWriteResGroup70], (instregex "SUB8mi")>;
+def: InstRW<[SBWriteResGroup70], (instregex "SUB8mr")>;
+def: InstRW<[SBWriteResGroup70], (instregex "TEST(16|32|64)rm")>;
+def: InstRW<[SBWriteResGroup70], (instregex "TEST8mi")>;
+def: InstRW<[SBWriteResGroup70], (instregex "TEST8rm")>;
+def: InstRW<[SBWriteResGroup70], (instregex "XOR(16|32|64)mi8")>;
+def: InstRW<[SBWriteResGroup70], (instregex "XOR(16|32|64)mr")>;
+def: InstRW<[SBWriteResGroup70], (instregex "XOR8mi")>;
 def: InstRW<[SBWriteResGroup70], (instregex "XOR8mr")>;
 
 def SBWriteResGroup71 : SchedWriteRes<[SBPort0,SBPort23]> {
@@ -1891,14 +2033,14 @@ def SBWriteResGroup72 : SchedWriteRes<[S
   let NumMicroOps = 2;
   let ResourceCycles = [1,1];
 }
-def: InstRW<[SBWriteResGroup72], (instregex "BSF64rm")>;
-def: InstRW<[SBWriteResGroup72], (instregex "BSR64rm")>;
-def: InstRW<[SBWriteResGroup72], (instregex "CRC32r32m16")>;
-def: InstRW<[SBWriteResGroup72], (instregex "CRC32r32m8")>;
-def: InstRW<[SBWriteResGroup72], (instregex "FCOM32m")>;
-def: InstRW<[SBWriteResGroup72], (instregex "FCOM64m")>;
-def: InstRW<[SBWriteResGroup72], (instregex "FCOMP32m")>;
-def: InstRW<[SBWriteResGroup72], (instregex "FCOMP64m")>;
+def: InstRW<[SBWriteResGroup72], (instregex "BSF(16|32|64)rm")>;
+def: InstRW<[SBWriteResGroup72], (instregex "BSR(16|32|64)rm")>;
+def: InstRW<[SBWriteResGroup72], (instregex "CRC32r(16|32|64)m64")>;
+def: InstRW<[SBWriteResGroup72], (instregex "CRC32r(16|32|64)m8")>;
+def: InstRW<[SBWriteResGroup72], (instregex "FCOM32m")>;
+def: InstRW<[SBWriteResGroup72], (instregex "FCOM64m")>;
+def: InstRW<[SBWriteResGroup72], (instregex "FCOMP32m")>;
+def: InstRW<[SBWriteResGroup72], (instregex "FCOMP64m")>;
 def: InstRW<[SBWriteResGroup72], (instregex "MUL8m")>;
 
 def SBWriteResGroup73 : SchedWriteRes<[SBPort5,SBPort23]> {
@@ -1906,27 +2048,27 @@ def SBWriteResGroup73 : SchedWriteRes<[S
   let NumMicroOps = 2;
   let ResourceCycles = [1,1];
 }
-def: InstRW<[SBWriteResGroup73], (instregex "VANDNPDYrm")>;
-def: InstRW<[SBWriteResGroup73], (instregex "VANDNPSYrm")>;
-def: InstRW<[SBWriteResGroup73], (instregex "VANDPDrm")>;
-def: InstRW<[SBWriteResGroup73], (instregex "VANDPSrm")>;
-def: InstRW<[SBWriteResGroup73], (instregex "VORPDYrm")>;
-def: InstRW<[SBWriteResGroup73], (instregex "VORPSYrm")>;
-def: InstRW<[SBWriteResGroup73], (instregex "VPERM2F128rm")>;
-def: InstRW<[SBWriteResGroup73], (instregex "VPERMILPDYri")>;
-def: InstRW<[SBWriteResGroup73], (instregex "VPERMILPDmi")>;
-def: InstRW<[SBWriteResGroup73], (instregex "VPERMILPSYri")>;
-def: InstRW<[SBWriteResGroup73], (instregex "VPERMILPSmi")>;
-def: InstRW<[SBWriteResGroup73], (instregex "VSHUFPDYrmi")>;
-def: InstRW<[SBWriteResGroup73], (instregex "VSHUFPSYrmi")>;
-def: InstRW<[SBWriteResGroup73], (instregex "VUNPCKHPDrm")>;
-def: InstRW<[SBWriteResGroup73], (instregex "VUNPCKHPSrm")>;
-def: InstRW<[SBWriteResGroup73], (instregex "VUNPCKLPDYrm")>;
-def: InstRW<[SBWriteResGroup73], (instregex "VUNPCKLPSYrm")>;
-def: InstRW<[SBWriteResGroup73], (instregex "VXORPDrm")>;
-def: InstRW<[SBWriteResGroup73], (instregex "VXORPSrm")>;
+def: InstRW<[SBWriteResGroup73], (instregex "VANDNPDYrm")>;
+def: InstRW<[SBWriteResGroup73], (instregex "VANDNPSYrm")>;
+def: InstRW<[SBWriteResGroup73], (instregex "VANDPDYrm")>;
+def: InstRW<[SBWriteResGroup73], (instregex "VANDPSYrm")>;
+def: InstRW<[SBWriteResGroup73], (instregex "VORPDYrm")>;
+def: InstRW<[SBWriteResGroup73], (instregex "VORPSYrm")>;
+def: InstRW<[SBWriteResGroup73], (instregex "VPERM2F128rm")>;
+def: InstRW<[SBWriteResGroup73], (instregex "VPERMILPDYmi")>;
+def: InstRW<[SBWriteResGroup73], (instregex "VPERMILPDYrm")>;
+def: InstRW<[SBWriteResGroup73], (instregex "VPERMILPSYmi")>;
+def: InstRW<[SBWriteResGroup73], (instregex "VPERMILPSYrm")>;
+def: InstRW<[SBWriteResGroup73], (instregex "VSHUFPDYrmi")>;
+def: InstRW<[SBWriteResGroup73], (instregex "VSHUFPSYrmi")>;
+def: InstRW<[SBWriteResGroup73], (instregex "VUNPCKHPDYrm")>;
+def: InstRW<[SBWriteResGroup73], (instregex "VUNPCKHPSYrm")>;
+def: InstRW<[SBWriteResGroup73], (instregex "VUNPCKLPDYrm")>;
+def: InstRW<[SBWriteResGroup73], (instregex "VUNPCKLPSYrm")>;
+def: InstRW<[SBWriteResGroup73], (instregex "VXORPDYrm")>;
+def: InstRW<[SBWriteResGroup73], (instregex "VXORPSYrm")>;
 
-def SBWriteResGroup74 : SchedWriteRes<[SBPort23,SBPort0]> {
+def SBWriteResGroup74 : SchedWriteRes<[SBPort23,SBPort05]> {
   let Latency = 8;
   let NumMicroOps = 2;
   let ResourceCycles = [1,1];
@@ -1934,7 +2076,7 @@ def SBWriteResGroup74 : SchedWriteRes<[S
 def: InstRW<[SBWriteResGroup74], (instregex "VBLENDPDYrmi")>;
 def: InstRW<[SBWriteResGroup74], (instregex "VBLENDPSYrmi")>;
 
-def SBWriteResGroup75 : SchedWriteRes<[SBPort23,SBPort0]> {
+def SBWriteResGroup75 : SchedWriteRes<[SBPort23,SBPort05]> {
   let Latency = 8;
   let NumMicroOps = 3;
   let ResourceCycles = [1,2];
@@ -1981,21 +2123,21 @@ def SBWriteResGroup79 : SchedWriteRes<[S
   let NumMicroOps = 3;
   let ResourceCycles = [1,1,1];
 }
-def: InstRW<[SBWriteResGroup79], (instregex "PSLLDrm")>;
-def: InstRW<[SBWriteResGroup79], (instregex "PSLLQrm")>;
-def: InstRW<[SBWriteResGroup79], (instregex "PSLLWrm")>;
-def: InstRW<[SBWriteResGroup79], (instregex "PSRADrm")>;
-def: InstRW<[SBWriteResGroup79], (instregex "PSRAWrm")>;
-def: InstRW<[SBWriteResGroup79], (instregex "PSRLDrm")>;
-def: InstRW<[SBWriteResGroup79], (instregex "PSRLQrm")>;
-def: InstRW<[SBWriteResGroup79], (instregex "PSRLWrm")>;
-def: InstRW<[SBWriteResGroup79], (instregex "VPSLLDri")>;
-def: InstRW<[SBWriteResGroup79], (instregex "VPSLLQri")>;
-def: InstRW<[SBWriteResGroup79], (instregex "VPSLLWri")>;
-def: InstRW<[SBWriteResGroup79], (instregex "VPSRADrm")>;
-def: InstRW<[SBWriteResGroup79], (instregex "VPSRAWrm")>;
-def: InstRW<[SBWriteResGroup79], (instregex "VPSRLDrm")>;
-def: InstRW<[SBWriteResGroup79], (instregex "VPSRLQrm")>;
+def: InstRW<[SBWriteResGroup79], (instregex "PSLLDrm")>;
+def: InstRW<[SBWriteResGroup79], (instregex "PSLLQrm")>;
+def: InstRW<[SBWriteResGroup79], (instregex "PSLLWrm")>;
+def: InstRW<[SBWriteResGroup79], (instregex "PSRADrm")>;
+def: InstRW<[SBWriteResGroup79], (instregex "PSRAWrm")>;
+def: InstRW<[SBWriteResGroup79], (instregex "PSRLDrm")>;
+def: InstRW<[SBWriteResGroup79], (instregex "PSRLQrm")>;
+def: InstRW<[SBWriteResGroup79], (instregex "PSRLWrm")>;
+def: InstRW<[SBWriteResGroup79], (instregex "VPSLLDrm")>;
+def: InstRW<[SBWriteResGroup79], (instregex "VPSLLQrm")>;
+def: InstRW<[SBWriteResGroup79], (instregex "VPSLLWrm")>;
+def: InstRW<[SBWriteResGroup79], (instregex "VPSRADrm")>;
+def: InstRW<[SBWriteResGroup79], (instregex "VPSRAWrm")>;
+def: InstRW<[SBWriteResGroup79], (instregex "VPSRLDrm")>;
+def: InstRW<[SBWriteResGroup79], (instregex "VPSRLQrm")>;
 def: InstRW<[SBWriteResGroup79], (instregex "VPSRLWrm")>;
 
 def SBWriteResGroup80 : SchedWriteRes<[SBPort23,SBPort15]> {
@@ -2015,16 +2157,16 @@ def SBWriteResGroup81 : SchedWriteRes<[S
   let NumMicroOps = 4;
   let ResourceCycles = [1,3];
 }
-def: InstRW<[SBWriteResGroup81], (instregex "CMPXCHG64rm")>;
+def: InstRW<[SBWriteResGroup81], (instregex "CMPXCHG(16|32|64)rm")>;
 def: InstRW<[SBWriteResGroup81], (instregex "CMPXCHG8rm")>;
 
-def SBWriteResGroup82 : SchedWriteRes<[SBPort23,SBPort0,SBPort015]> {
+def SBWriteResGroup82 : SchedWriteRes<[SBPort23,SBPort05,SBPort015]> {
   let Latency = 8;
   let NumMicroOps = 4;
   let ResourceCycles = [1,2,1];
 }
-def: InstRW<[SBWriteResGroup82], (instregex "CMOVA64rm")>;
-def: InstRW<[SBWriteResGroup82], (instregex "CMOVBE64rm")>;
+def: InstRW<[SBWriteResGroup82], (instregex "CMOVA(16|32|64)rm")>;
+def: InstRW<[SBWriteResGroup82], (instregex "CMOVBE(16|32|64)rm")>;
 
 def SBWriteResGroup83 : SchedWriteRes<[SBPort23,SBPort015]> {
   let Latency = 8;
@@ -2043,14 +2185,14 @@ def SBWriteResGroup84 : SchedWriteRes<[S
 }
 def: InstRW<[SBWriteResGroup84], (instregex "FLDCW16m")>;
 
-def SBWriteResGroup85 : SchedWriteRes<[SBPort4,SBPort23,SBPort0]> {
+def SBWriteResGroup85 : SchedWriteRes<[SBPort4,SBPort23,SBPort05]> {
   let Latency = 8;
   let NumMicroOps = 5;
   let ResourceCycles = [1,2,2];
 }
-def: InstRW<[SBWriteResGroup85], (instregex "ROL64mi")>;
-def: InstRW<[SBWriteResGroup85], (instregex "ROL8mi")>;
-def: InstRW<[SBWriteResGroup85], (instregex "ROR64mi")>;
+def: InstRW<[SBWriteResGroup85], (instregex "ROL(16|32|64)mi")>;
+def: InstRW<[SBWriteResGroup85], (instregex "ROL8mi")>;
+def: InstRW<[SBWriteResGroup85], (instregex "ROR(16|32|64)mi")>;
 def: InstRW<[SBWriteResGroup85], (instregex "ROR8mi")>;
 
 def SBWriteResGroup86 : SchedWriteRes<[SBPort4,SBPort23,SBPort015]> {
@@ -2062,7 +2204,7 @@ def: InstRW<[SBWriteResGroup86], (instre
 def: InstRW<[SBWriteResGroup86], (instregex "MOVSL")>;
 def: InstRW<[SBWriteResGroup86], (instregex "MOVSQ")>;
 def: InstRW<[SBWriteResGroup86], (instregex "MOVSW")>;
-def: InstRW<[SBWriteResGroup86], (instregex "XADD64rm")>;
+def: InstRW<[SBWriteResGroup86], (instregex "XADD(16|32|64)rm")>;
 def: InstRW<[SBWriteResGroup86], (instregex "XADD8rm")>;
 
 def SBWriteResGroup87 : SchedWriteRes<[SBPort4,SBPort5,SBPort01,SBPort23]> {
@@ -2072,13 +2214,13 @@ def SBWriteResGroup87 : SchedWriteRes<[S
 }
 def: InstRW<[SBWriteResGroup87], (instregex "FARCALL64")>;
 
-def SBWriteResGroup88 : SchedWriteRes<[SBPort4,SBPort23,SBPort0,SBPort015]> {
+def SBWriteResGroup88 : SchedWriteRes<[SBPort4,SBPort23,SBPort05,SBPort015]> {
   let Latency = 8;
   let NumMicroOps = 5;
   let ResourceCycles = [1,2,1,1];
 }
-def: InstRW<[SBWriteResGroup88], (instregex "SHLD64mri8")>;
-def: InstRW<[SBWriteResGroup88], (instregex "SHRD64mri8")>;
+def: InstRW<[SBWriteResGroup88], (instregex "SHLD(16|32|64)mri8")>;
+def: InstRW<[SBWriteResGroup88], (instregex "SHRD(16|32|64)mri8")>;
 
 def SBWriteResGroup89 : SchedWriteRes<[SBPort0,SBPort23]> {
   let Latency = 9;
@@ -2137,7 +2279,7 @@ def: InstRW<[SBWriteResGroup90], (instre
 def: InstRW<[SBWriteResGroup90], (instregex "MMX_CVTPI2PSirm")>;
 def: InstRW<[SBWriteResGroup90], (instregex "MMX_CVTPS2PIirm")>;
 def: InstRW<[SBWriteResGroup90], (instregex "MMX_CVTTPS2PIirm")>;
-def: InstRW<[SBWriteResGroup90], (instregex "POPCNT64rm")>;
+def: InstRW<[SBWriteResGroup90], (instregex "POPCNT(16|32|64)rm")>;
 def: InstRW<[SBWriteResGroup90], (instregex "ROUNDPDm")>;
 def: InstRW<[SBWriteResGroup90], (instregex "ROUNDPSm")>;
 def: InstRW<[SBWriteResGroup90], (instregex "ROUNDSDm")>;
@@ -2178,15 +2320,15 @@ def: InstRW<[SBWriteResGroup90], (instre
 def: InstRW<[SBWriteResGroup90], (instregex "VSUBSDrm")>;
 def: InstRW<[SBWriteResGroup90], (instregex "VSUBSSrm")>;
 
-def SBWriteResGroup91 : SchedWriteRes<[SBPort23,SBPort0]> {
+def SBWriteResGroup91 : SchedWriteRes<[SBPort23,SBPort05]> {
   let Latency = 9;
   let NumMicroOps = 3;
   let ResourceCycles = [1,2];
 }
-def: InstRW<[SBWriteResGroup91], (instregex "VBLENDVPDYrm")>;
-def: InstRW<[SBWriteResGroup91], (instregex "VBLENDVPSYrm")>;
-def: InstRW<[SBWriteResGroup91], (instregex "VMASKMOVPDrm")>;
-def: InstRW<[SBWriteResGroup91], (instregex "VMASKMOVPSrm")>;
+def: InstRW<[SBWriteResGroup91], (instregex "VBLENDVPDYrm")>;
+def: InstRW<[SBWriteResGroup91], (instregex "VBLENDVPSYrm")>;
+def: InstRW<[SBWriteResGroup91], (instregex "VMASKMOVPDYrm")>;
+def: InstRW<[SBWriteResGroup91], (instregex "VMASKMOVPSYrm")>;
 
 def SBWriteResGroup92 : SchedWriteRes<[SBPort0,SBPort1,SBPort5]> {
   let Latency = 9;
@@ -2209,7 +2351,7 @@ def: InstRW<[SBWriteResGroup93], (instre
 def: InstRW<[SBWriteResGroup93], (instregex "CVTTSD2SIrm")>;
 def: InstRW<[SBWriteResGroup93], (instregex "CVTTSS2SI64rm")>;
 def: InstRW<[SBWriteResGroup93], (instregex "CVTTSS2SIrm")>;
-def: InstRW<[SBWriteResGroup93], (instregex "MUL64m")>;
+def: InstRW<[SBWriteResGroup93], (instregex "MUL(16|32|64)m")>;
 
 def SBWriteResGroup94 : SchedWriteRes<[SBPort0,SBPort5,SBPort23]> {
   let Latency = 9;
@@ -2255,69 +2397,83 @@ def: InstRW<[SBWriteResGroup97], (instre
 def: InstRW<[SBWriteResGroup97], (instregex "IST_FP16m")>;
 def: InstRW<[SBWriteResGroup97], (instregex "IST_FP32m")>;
 def: InstRW<[SBWriteResGroup97], (instregex "IST_FP64m")>;
-def: InstRW<[SBWriteResGroup97], (instregex "SHL64mCL")>;
-def: InstRW<[SBWriteResGroup97], (instregex "SHL8mCL")>;
+
+def SBWriteResGroup97_2 : SchedWriteRes<[SBPort4,SBPort23,SBPort05]> {
+  let Latency = 9;
+  let NumMicroOps = 6;
+  let ResourceCycles = [1,2,3];
+}
+def: InstRW<[SBWriteResGroup97_2], (instregex "ROL(16|32|64)mCL")>;
+def: InstRW<[SBWriteResGroup97_2], (instregex "ROL8mCL")>;
+def: InstRW<[SBWriteResGroup97_2], (instregex "ROR(16|32|64)mCL")>;
+def: InstRW<[SBWriteResGroup97_2], (instregex "ROR8mCL")>;
+def: InstRW<[SBWriteResGroup97_2], (instregex "SAR(16|32|64)mCL")>;
+def: InstRW<[SBWriteResGroup97_2], (instregex "SAR8mCL")>;
+def: InstRW<[SBWriteResGroup97_2], (instregex "SHL(16|32|64)mCL")>;
+def: InstRW<[SBWriteResGroup97_2], (instregex "SHL8mCL")>;
+def: InstRW<[SBWriteResGroup97_2], (instregex "SHR(16|32|64)mCL")>;
+def: InstRW<[SBWriteResGroup97_2], (instregex "SHR8mCL")>;
 
 def SBWriteResGroup98 : SchedWriteRes<[SBPort4,SBPort23,SBPort015]> {
   let Latency = 9;
   let NumMicroOps = 6;
   let ResourceCycles = [1,2,3];
 }
-def: InstRW<[SBWriteResGroup98], (instregex "ADC64mi8")>;
-def: InstRW<[SBWriteResGroup98], (instregex "ADC8mi")>;
-def: InstRW<[SBWriteResGroup98], (instregex "SBB64mi8")>;
-def: InstRW<[SBWriteResGroup98], (instregex "SBB8mi")>;
+def: InstRW<[SBWriteResGroup98], (instregex "ADC(16|32|64)mi8")>;
+def: InstRW<[SBWriteResGroup98], (instregex "ADC8mi")>;
+def: InstRW<[SBWriteResGroup98], (instregex "SBB(16|32|64)mi8")>;
+def: InstRW<[SBWriteResGroup98], (instregex "SBB8mi")>;
 
-def SBWriteResGroup99 : SchedWriteRes<[SBPort4,SBPort23,SBPort0,SBPort015]> {
+def SBWriteResGroup99 : SchedWriteRes<[SBPort4,SBPort23,SBPort05,SBPort015]> {
   let Latency = 9;
   let NumMicroOps = 6;
   let ResourceCycles = [1,2,2,1];
 }
-def: InstRW<[SBWriteResGroup99], (instregex "ADC64mr")>;
-def: InstRW<[SBWriteResGroup99], (instregex "ADC8mr")>;
-def: InstRW<[SBWriteResGroup99], (instregex "SBB64mr")>;
+def: InstRW<[SBWriteResGroup99], (instregex "ADC(16|32|64)mr")>;
+def: InstRW<[SBWriteResGroup99], (instregex "ADC8mr")>;
+def: InstRW<[SBWriteResGroup99], (instregex "SBB(16|32|64)mr")>;
 def: InstRW<[SBWriteResGroup99], (instregex "SBB8mr")>;
 
-def SBWriteResGroup100 : SchedWriteRes<[SBPort4,SBPort5,SBPort23,SBPort0,SBPort015]> {
+def SBWriteResGroup100 : SchedWriteRes<[SBPort4,SBPort5,SBPort23,SBPort05,SBPort015]> {
   let Latency = 9;
   let NumMicroOps = 6;
   let ResourceCycles = [1,1,2,1,1];
 }
-def: InstRW<[SBWriteResGroup100], (instregex "BT64mr")>;
-def: InstRW<[SBWriteResGroup100], (instregex "BTC64mr")>;
-def: InstRW<[SBWriteResGroup100], (instregex "BTR64mr")>;
-def: InstRW<[SBWriteResGroup100], (instregex "BTS64mr")>;
+def: InstRW<[SBWriteResGroup100], (instregex "BT(16|32|64)mr")>;
+def: InstRW<[SBWriteResGroup100], (instregex "BTC(16|32|64)mr")>;
+def: InstRW<[SBWriteResGroup100], (instregex "BTR(16|32|64)mr")>;
+def: InstRW<[SBWriteResGroup100], (instregex "BTS(16|32|64)mr")>;
 
 def SBWriteResGroup101 : SchedWriteRes<[SBPort1,SBPort23]> {
   let Latency = 10;
   let NumMicroOps = 2;
   let ResourceCycles = [1,1];
 }
-def: InstRW<[SBWriteResGroup101], (instregex "ADD_F32m")>;
-def: InstRW<[SBWriteResGroup101], (instregex "ADD_F64m")>;
-def: InstRW<[SBWriteResGroup101], (instregex "ILD_F16m")>;
-def: InstRW<[SBWriteResGroup101], (instregex "ILD_F32m")>;
-def: InstRW<[SBWriteResGroup101], (instregex "ILD_F64m")>;
-def: InstRW<[SBWriteResGroup101], (instregex "SUBR_F32m")>;
-def: InstRW<[SBWriteResGroup101], (instregex "SUBR_F64m")>;
-def: InstRW<[SBWriteResGroup101], (instregex "SUB_F32m")>;
-def: InstRW<[SBWriteResGroup101], (instregex "SUB_F64m")>;
-def: InstRW<[SBWriteResGroup101], (instregex "VADDPDYrm")>;
-def: InstRW<[SBWriteResGroup101], (instregex "VADDPSYrm")>;
-def: InstRW<[SBWriteResGroup101], (instregex "VADDSUBPDYrm")>;
-def: InstRW<[SBWriteResGroup101], (instregex "VADDSUBPSYrm")>;
-def: InstRW<[SBWriteResGroup101], (instregex "VCMPPDYrmi")>;
-def: InstRW<[SBWriteResGroup101], (instregex "VCMPPSYrmi")>;
-def: InstRW<[SBWriteResGroup101], (instregex "VCVTDQ2PSYrm")>;
-def: InstRW<[SBWriteResGroup101], (instregex "VCVTPS2DQYrm")>;
-def: InstRW<[SBWriteResGroup101], (instregex "VCVTTPS2DQrm")>;
-def: InstRW<[SBWriteResGroup101], (instregex "VMAXPDYrm")>;
-def: InstRW<[SBWriteResGroup101], (instregex "VMAXPSYrm")>;
-def: InstRW<[SBWriteResGroup101], (instregex "VMINPDrm")>;
-def: InstRW<[SBWriteResGroup101], (instregex "VMINPSrm")>;
-def: InstRW<[SBWriteResGroup101], (instregex "VROUNDPDm")>;
-def: InstRW<[SBWriteResGroup101], (instregex "VROUNDPSm")>;
-def: InstRW<[SBWriteResGroup101], (instregex "VSUBPDYrm")>;
+def: InstRW<[SBWriteResGroup101], (instregex "ADD_F32m")>;
+def: InstRW<[SBWriteResGroup101], (instregex "ADD_F64m")>;
+def: InstRW<[SBWriteResGroup101], (instregex "ILD_F16m")>;
+def: InstRW<[SBWriteResGroup101], (instregex "ILD_F32m")>;
+def: InstRW<[SBWriteResGroup101], (instregex "ILD_F64m")>;
+def: InstRW<[SBWriteResGroup101], (instregex "SUBR_F32m")>;
+def: InstRW<[SBWriteResGroup101], (instregex "SUBR_F64m")>;
+def: InstRW<[SBWriteResGroup101], (instregex "SUB_F32m")>;
+def: InstRW<[SBWriteResGroup101], (instregex "SUB_F64m")>;
+def: InstRW<[SBWriteResGroup101], (instregex "VADDPDYrm")>;
+def: InstRW<[SBWriteResGroup101], (instregex "VADDPSYrm")>;
+def: InstRW<[SBWriteResGroup101], (instregex "VADDSUBPDYrm")>;
+def: InstRW<[SBWriteResGroup101], (instregex "VADDSUBPSYrm")>;
+def: InstRW<[SBWriteResGroup101], (instregex "VCMPPDYrmi")>;
+def: InstRW<[SBWriteResGroup101], (instregex "VCMPPSYrmi")>;
+def: InstRW<[SBWriteResGroup101], (instregex "VCVTDQ2PSYrm")>;
+def: InstRW<[SBWriteResGroup101], (instregex "VCVTPS2DQYrm")>;
+def: InstRW<[SBWriteResGroup101], (instregex "VCVTTPS2DQYrm")>;
+def: InstRW<[SBWriteResGroup101], (instregex "VMAXPDYrm")>;
+def: InstRW<[SBWriteResGroup101], (instregex "VMAXPSYrm")>;
+def: InstRW<[SBWriteResGroup101], (instregex "VMINPDYrm")>;
+def: InstRW<[SBWriteResGroup101], (instregex "VMINPSYrm")>;
+def: InstRW<[SBWriteResGroup101], (instregex "VROUNDYPDm")>;
+def: InstRW<[SBWriteResGroup101], (instregex "VROUNDYPSm")>;
+def: InstRW<[SBWriteResGroup101], (instregex "VSUBPDYrm")>;
 def: InstRW<[SBWriteResGroup101], (instregex "VSUBPSYrm")>;
 
 def SBWriteResGroup102 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> {
@@ -2325,13 +2481,13 @@ def SBWriteResGroup102 : SchedWriteRes<[
   let NumMicroOps = 3;
   let ResourceCycles = [1,1,1];
 }
-def: InstRW<[SBWriteResGroup102], (instregex "VCVTSD2SI64rm")>;
-def: InstRW<[SBWriteResGroup102], (instregex "VCVTSD2SI64rr")>;
-def: InstRW<[SBWriteResGroup102], (instregex "VCVTSS2SI64rm")>;
-def: InstRW<[SBWriteResGroup102], (instregex "VCVTSS2SIrm")>;
-def: InstRW<[SBWriteResGroup102], (instregex "VCVTTSD2SI64rm")>;
-def: InstRW<[SBWriteResGroup102], (instregex "VCVTTSD2SI64rr")>;
-def: InstRW<[SBWriteResGroup102], (instregex "VCVTTSS2SI64rm")>;
+def: InstRW<[SBWriteResGroup102], (instregex "VCVTSD2SI64rm")>;
+def: InstRW<[SBWriteResGroup102], (instregex "VCVTSD2SIrm")>;
+def: InstRW<[SBWriteResGroup102], (instregex "VCVTSS2SI64rm")>;
+def: InstRW<[SBWriteResGroup102], (instregex "VCVTSS2SIrm")>;
+def: InstRW<[SBWriteResGroup102], (instregex "VCVTTSD2SI64rm")>;
+def: InstRW<[SBWriteResGroup102], (instregex "VCVTTSD2SIrm")>;
+def: InstRW<[SBWriteResGroup102], (instregex "VCVTTSS2SI64rm")>;
 def: InstRW<[SBWriteResGroup102], (instregex "VCVTTSS2SIrm")>;
 
 def SBWriteResGroup103 : SchedWriteRes<[SBPort1,SBPort5,SBPort23]> {
@@ -2358,6 +2514,14 @@ def: InstRW<[SBWriteResGroup103], (instr
 def: InstRW<[SBWriteResGroup103], (instregex "VCVTSI2SSrm")>;
 def: InstRW<[SBWriteResGroup103], (instregex "VCVTTPD2DQrm")>;
 
+def SBWriteResGroup103_2 : SchedWriteRes<[SBPort4,SBPort23,SBPort05,SBPort015]> {
+  let Latency = 10;
+  let NumMicroOps = 7;
+  let ResourceCycles = [1,2,3,1];
+}
+def: InstRW<[SBWriteResGroup103_2], (instregex "SHLD(16|32|64)mrCL")>;
+def: InstRW<[SBWriteResGroup103_2], (instregex "SHRD(16|32|64)mrCL")>;
+
 def SBWriteResGroup104 : SchedWriteRes<[SBPort0,SBPort23]> {
   let Latency = 11;
   let NumMicroOps = 2;
@@ -2467,7 +2631,7 @@ def SBWriteResGroup113 : SchedWriteRes<[
   let NumMicroOps = 4;
   let ResourceCycles = [1,2,1];
 }
-def: InstRW<[SBWriteResGroup113], (instregex "VHADDPDrm")>;
+def: InstRW<[SBWriteResGroup113], (instregex "VHADDPDYrm")>;
 def: InstRW<[SBWriteResGroup113], (instregex "VHADDPSYrm")>;
 def: InstRW<[SBWriteResGroup113], (instregex "VHSUBPDYrm")>;
 def: InstRW<[SBWriteResGroup113], (instregex "VHSUBPSYrm")>;
@@ -2518,12 +2682,12 @@ def SBWriteResGroup117 : SchedWriteRes<[
 }
 def: InstRW<[SBWriteResGroup117], (instregex "VSQRTSSm")>;
 
-def SBWriteResGroup118 : SchedWriteRes<[SBPort0,SBPort23,SBPort0]> {
+def SBWriteResGroup118 : SchedWriteRes<[SBPort0,SBPort23,SBPort05]> {
   let Latency = 14;
   let NumMicroOps = 4;
   let ResourceCycles = [2,1,1];
 }
-def: InstRW<[SBWriteResGroup118], (instregex "VRCPPSm")>;
+def: InstRW<[SBWriteResGroup118], (instregex "VRCPPSYm")>;
 def: InstRW<[SBWriteResGroup118], (instregex "VRSQRTPSYm")>;
 
 def SBWriteResGroup119 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> {
@@ -2625,7 +2789,7 @@ def: InstRW<[SBWriteResGroup128], (instr
 def: InstRW<[SBWriteResGroup128], (instregex "VDIVSDrm")>;
 def: InstRW<[SBWriteResGroup128], (instregex "VSQRTPDm")>;
 
-def SBWriteResGroup129 : SchedWriteRes<[SBPort0,SBPort0]> {
+def SBWriteResGroup129 : SchedWriteRes<[SBPort0,SBPort05]> {
   let Latency = 29;
   let NumMicroOps = 3;
   let ResourceCycles = [2,1];
@@ -2653,7 +2817,7 @@ def: InstRW<[SBWriteResGroup131], (instr
 def: InstRW<[SBWriteResGroup131], (instregex "DIV_FI16m")>;
 def: InstRW<[SBWriteResGroup131], (instregex "DIV_FI32m")>;
 
-def SBWriteResGroup132 : SchedWriteRes<[SBPort0,SBPort23,SBPort0]> {
+def SBWriteResGroup132 : SchedWriteRes<[SBPort0,SBPort23,SBPort05]> {
   let Latency = 36;
   let NumMicroOps = 4;
   let ResourceCycles = [2,1,1];
@@ -2661,7 +2825,7 @@ def SBWriteResGroup132 : SchedWriteRes<[
 def: InstRW<[SBWriteResGroup132], (instregex "VDIVPSYrm")>;
 def: InstRW<[SBWriteResGroup132], (instregex "VSQRTPSYm")>;
 
-def SBWriteResGroup133 : SchedWriteRes<[SBPort0,SBPort0]> {
+def SBWriteResGroup133 : SchedWriteRes<[SBPort0,SBPort05]> {
   let Latency = 45;
   let NumMicroOps = 3;
   let ResourceCycles = [2,1];
@@ -2669,7 +2833,7 @@ def SBWriteResGroup133 : SchedWriteRes<[
 def: InstRW<[SBWriteResGroup133], (instregex "VDIVPDYrr")>;
 def: InstRW<[SBWriteResGroup133], (instregex "VSQRTPDYr")>;
 
-def SBWriteResGroup134 : SchedWriteRes<[SBPort0,SBPort23,SBPort0]> {
+def SBWriteResGroup134 : SchedWriteRes<[SBPort0,SBPort23,SBPort05]> {
   let Latency = 52;
   let NumMicroOps = 4;
   let ResourceCycles = [2,1,1];

Modified: llvm/trunk/test/CodeGen/X86/avx-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-schedule.ll?rev=310792&r1=310791&r2=310792&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx-schedule.ll Sun Aug 13 06:59:24 2017
@@ -253,14 +253,14 @@ define <4 x double> @test_andpd(<4 x dou
 ; GENERIC-LABEL: test_andpd:
 ; GENERIC:       # BB#0:
 ; GENERIC-NEXT:    vandpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vandpd (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
+; GENERIC-NEXT:    vandpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SANDY-LABEL: test_andpd:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vandpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; SANDY-NEXT:    vandpd (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
+; SANDY-NEXT:    vandpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
 ; SANDY-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
@@ -299,14 +299,14 @@ define <8 x float> @test_andps(<8 x floa
 ; GENERIC-LABEL: test_andps:
 ; GENERIC:       # BB#0:
 ; GENERIC-NEXT:    vandps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vandps (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
+; GENERIC-NEXT:    vandps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SANDY-LABEL: test_andps:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vandps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; SANDY-NEXT:    vandps (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
+; SANDY-NEXT:    vandps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
 ; SANDY-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
@@ -344,16 +344,16 @@ define <8 x float> @test_andps(<8 x floa
 define <4 x double> @test_blendpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) {
 ; GENERIC-LABEL: test_blendpd:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:1.00]
+; GENERIC-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:0.50]
 ; GENERIC-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],mem[1,2],ymm0[3] sched: [8:1.00]
+; GENERIC-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],mem[1,2],ymm0[3] sched: [8:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SANDY-LABEL: test_blendpd:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:1.00]
+; SANDY-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:0.50]
 ; SANDY-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],mem[1,2],ymm0[3] sched: [8:1.00]
+; SANDY-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],mem[1,2],ymm0[3] sched: [8:0.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_blendpd:
@@ -386,14 +386,14 @@ define <4 x double> @test_blendpd(<4 x d
 define <8 x float> @test_blendps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) {
 ; GENERIC-LABEL: test_blendps:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4,5,6,7] sched: [1:1.00]
-; GENERIC-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1],mem[2],ymm0[3],mem[4,5,6],ymm0[7] sched: [8:1.00]
+; GENERIC-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4,5,6,7] sched: [1:0.50]
+; GENERIC-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1],mem[2],ymm0[3],mem[4,5,6],ymm0[7] sched: [8:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SANDY-LABEL: test_blendps:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4,5,6,7] sched: [1:1.00]
-; SANDY-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1],mem[2],ymm0[3],mem[4,5,6],ymm0[7] sched: [8:1.00]
+; SANDY-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4,5,6,7] sched: [1:0.50]
+; SANDY-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1],mem[2],ymm0[3],mem[4,5,6],ymm0[7] sched: [8:0.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_blendps:
@@ -422,14 +422,14 @@ define <8 x float> @test_blendps(<8 x fl
 define <4 x double> @test_blendvpd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) {
 ; GENERIC-LABEL: test_blendvpd:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:2.00]
-; GENERIC-NEXT:    vblendvpd %ymm2, (%rdi), %ymm0, %ymm0 # sched: [9:2.00]
+; GENERIC-NEXT:    vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:1.00]
+; GENERIC-NEXT:    vblendvpd %ymm2, (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SANDY-LABEL: test_blendvpd:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:2.00]
-; SANDY-NEXT:    vblendvpd %ymm2, (%rdi), %ymm0, %ymm0 # sched: [9:2.00]
+; SANDY-NEXT:    vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:1.00]
+; SANDY-NEXT:    vblendvpd %ymm2, (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_blendvpd:
@@ -459,14 +459,14 @@ declare <4 x double> @llvm.x86.avx.blend
 define <8 x float> @test_blendvps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) {
 ; GENERIC-LABEL: test_blendvps:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    vblendvps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:2.00]
-; GENERIC-NEXT:    vblendvps %ymm2, (%rdi), %ymm0, %ymm0 # sched: [9:2.00]
+; GENERIC-NEXT:    vblendvps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:1.00]
+; GENERIC-NEXT:    vblendvps %ymm2, (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SANDY-LABEL: test_blendvps:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vblendvps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:2.00]
-; SANDY-NEXT:    vblendvps %ymm2, (%rdi), %ymm0, %ymm0 # sched: [9:2.00]
+; SANDY-NEXT:    vblendvps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:1.00]
+; SANDY-NEXT:    vblendvps %ymm2, (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_blendvps:
@@ -496,12 +496,12 @@ declare <8 x float> @llvm.x86.avx.blendv
 define <8 x float> @test_broadcastf128(<4 x float> *%a0) {
 ; GENERIC-LABEL: test_broadcastf128:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [3:1.00]
+; GENERIC-NEXT:    vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [7:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SANDY-LABEL: test_broadcastf128:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [3:1.00]
+; SANDY-NEXT:    vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [7:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_broadcastf128:
@@ -588,12 +588,12 @@ define <4 x float> @test_broadcastss(flo
 define <8 x float> @test_broadcastss_ymm(float *%a0) {
 ; GENERIC-LABEL: test_broadcastss_ymm:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    vbroadcastss (%rdi), %ymm0 # sched: [5:1.00]
+; GENERIC-NEXT:    vbroadcastss (%rdi), %ymm0 # sched: [7:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SANDY-LABEL: test_broadcastss_ymm:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vbroadcastss (%rdi), %ymm0 # sched: [5:1.00]
+; SANDY-NEXT:    vbroadcastss (%rdi), %ymm0 # sched: [7:0.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_broadcastss_ymm:
@@ -760,7 +760,7 @@ define <8 x float> @test_cvtdq2ps(<8 x i
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vcvtdq2ps %ymm0, %ymm0 # sched: [3:1.00]
 ; SANDY-NEXT:    vmovaps (%rdi), %xmm1 # sched: [6:0.50]
-; SANDY-NEXT:    vinsertf128 $1, 16(%rdi), %ymm1, %ymm1 # sched: [7:1.00]
+; SANDY-NEXT:    vinsertf128 $1, 16(%rdi), %ymm1, %ymm1 # sched: [7:0.50]
 ; SANDY-NEXT:    vcvtdq2ps %ymm1, %ymm1 # sched: [3:1.00]
 ; SANDY-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
@@ -880,14 +880,14 @@ define <8 x i32> @test_cvtps2dq(<8 x flo
 ; GENERIC-LABEL: test_cvtps2dq:
 ; GENERIC:       # BB#0:
 ; GENERIC-NEXT:    vcvttps2dq %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT:    vcvttps2dq (%rdi), %ymm1 # sched: [7:1.00]
+; GENERIC-NEXT:    vcvttps2dq (%rdi), %ymm1 # sched: [10:1.00]
 ; GENERIC-NEXT:    vorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SANDY-LABEL: test_cvtps2dq:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vcvttps2dq %ymm0, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT:    vcvttps2dq (%rdi), %ymm1 # sched: [7:1.00]
+; SANDY-NEXT:    vcvttps2dq (%rdi), %ymm1 # sched: [10:1.00]
 ; SANDY-NEXT:    vorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
@@ -921,14 +921,14 @@ define <8 x i32> @test_cvtps2dq(<8 x flo
 define <4 x double> @test_divpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) {
 ; GENERIC-LABEL: test_divpd:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    vdivpd %ymm1, %ymm0, %ymm0 # sched: [45:3.00]
-; GENERIC-NEXT:    vdivpd (%rdi), %ymm0, %ymm0 # sched: [52:3.00]
+; GENERIC-NEXT:    vdivpd %ymm1, %ymm0, %ymm0 # sched: [45:2.00]
+; GENERIC-NEXT:    vdivpd (%rdi), %ymm0, %ymm0 # sched: [52:2.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SANDY-LABEL: test_divpd:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vdivpd %ymm1, %ymm0, %ymm0 # sched: [45:3.00]
-; SANDY-NEXT:    vdivpd (%rdi), %ymm0, %ymm0 # sched: [52:3.00]
+; SANDY-NEXT:    vdivpd %ymm1, %ymm0, %ymm0 # sched: [45:2.00]
+; SANDY-NEXT:    vdivpd (%rdi), %ymm0, %ymm0 # sched: [52:2.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_divpd:
@@ -957,14 +957,14 @@ define <4 x double> @test_divpd(<4 x dou
 define <8 x float> @test_divps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) {
 ; GENERIC-LABEL: test_divps:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    vdivps %ymm1, %ymm0, %ymm0 # sched: [29:3.00]
-; GENERIC-NEXT:    vdivps (%rdi), %ymm0, %ymm0 # sched: [36:3.00]
+; GENERIC-NEXT:    vdivps %ymm1, %ymm0, %ymm0 # sched: [29:2.00]
+; GENERIC-NEXT:    vdivps (%rdi), %ymm0, %ymm0 # sched: [36:2.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SANDY-LABEL: test_divps:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vdivps %ymm1, %ymm0, %ymm0 # sched: [29:3.00]
-; SANDY-NEXT:    vdivps (%rdi), %ymm0, %ymm0 # sched: [36:3.00]
+; SANDY-NEXT:    vdivps %ymm1, %ymm0, %ymm0 # sched: [29:2.00]
+; SANDY-NEXT:    vdivps (%rdi), %ymm0, %ymm0 # sched: [36:2.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_divps:
@@ -1070,14 +1070,14 @@ define <4 x float> @test_extractf128(<8
 define <4 x double> @test_haddpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) {
 ; GENERIC-LABEL: test_haddpd:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    vhaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT:    vhaddpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
+; GENERIC-NEXT:    vhaddpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
+; GENERIC-NEXT:    vhaddpd (%rdi), %ymm0, %ymm0 # sched: [12:2.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SANDY-LABEL: test_haddpd:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vhaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT:    vhaddpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
+; SANDY-NEXT:    vhaddpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
+; SANDY-NEXT:    vhaddpd (%rdi), %ymm0, %ymm0 # sched: [12:2.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_haddpd:
@@ -1219,14 +1219,14 @@ define <8 x float> @test_insertf128(<8 x
 ; GENERIC-LABEL: test_insertf128:
 ; GENERIC:       # BB#0:
 ; GENERIC-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm1 # sched: [1:1.00]
-; GENERIC-NEXT:    vinsertf128 $1, (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
+; GENERIC-NEXT:    vinsertf128 $1, (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
 ; GENERIC-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SANDY-LABEL: test_insertf128:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm1 # sched: [1:1.00]
-; SANDY-NEXT:    vinsertf128 $1, (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
+; SANDY-NEXT:    vinsertf128 $1, (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
 ; SANDY-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1292,14 +1292,14 @@ declare <32 x i8> @llvm.x86.avx.ldu.dq.2
 define <2 x double> @test_maskmovpd(i8* %a0, <2 x i64> %a1, <2 x double> %a2) {
 ; GENERIC-LABEL: test_maskmovpd:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    vmaskmovpd (%rdi), %xmm0, %xmm2 # sched: [8:2.00]
+; GENERIC-NEXT:    vmaskmovpd (%rdi), %xmm0, %xmm2 # sched: [8:1.00]
 ; GENERIC-NEXT:    vmaskmovpd %xmm1, %xmm0, (%rdi) # sched: [5:1.00]
 ; GENERIC-NEXT:    vmovapd %xmm2, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SANDY-LABEL: test_maskmovpd:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vmaskmovpd (%rdi), %xmm0, %xmm2 # sched: [8:2.00]
+; SANDY-NEXT:    vmaskmovpd (%rdi), %xmm0, %xmm2 # sched: [8:1.00]
 ; SANDY-NEXT:    vmaskmovpd %xmm1, %xmm0, (%rdi) # sched: [5:1.00]
 ; SANDY-NEXT:    vmovapd %xmm2, %xmm0 # sched: [1:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
@@ -1334,15 +1334,15 @@ declare void @llvm.x86.avx.maskstore.pd(
 define <4 x double> @test_maskmovpd_ymm(i8* %a0, <4 x i64> %a1, <4 x double> %a2) {
 ; GENERIC-LABEL: test_maskmovpd_ymm:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    vmaskmovpd (%rdi), %ymm0, %ymm2 # sched: [5:1.00]
-; GENERIC-NEXT:    vmaskmovpd %ymm1, %ymm0, (%rdi)
+; GENERIC-NEXT:    vmaskmovpd (%rdi), %ymm0, %ymm2 # sched: [9:1.00]
+; GENERIC-NEXT:    vmaskmovpd %ymm1, %ymm0, (%rdi) # sched: [5:1.00]
 ; GENERIC-NEXT:    vmovapd %ymm2, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SANDY-LABEL: test_maskmovpd_ymm:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vmaskmovpd (%rdi), %ymm0, %ymm2 # sched: [5:1.00]
-; SANDY-NEXT:    vmaskmovpd %ymm1, %ymm0, (%rdi)
+; SANDY-NEXT:    vmaskmovpd (%rdi), %ymm0, %ymm2 # sched: [9:1.00]
+; SANDY-NEXT:    vmaskmovpd %ymm1, %ymm0, (%rdi) # sched: [5:1.00]
 ; SANDY-NEXT:    vmovapd %ymm2, %ymm0 # sched: [1:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1376,14 +1376,14 @@ declare void @llvm.x86.avx.maskstore.pd.
 define <4 x float> @test_maskmovps(i8* %a0, <4 x i32> %a1, <4 x float> %a2) {
 ; GENERIC-LABEL: test_maskmovps:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    vmaskmovps (%rdi), %xmm0, %xmm2 # sched: [8:2.00]
+; GENERIC-NEXT:    vmaskmovps (%rdi), %xmm0, %xmm2 # sched: [8:1.00]
 ; GENERIC-NEXT:    vmaskmovps %xmm1, %xmm0, (%rdi) # sched: [5:1.00]
 ; GENERIC-NEXT:    vmovaps %xmm2, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SANDY-LABEL: test_maskmovps:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vmaskmovps (%rdi), %xmm0, %xmm2 # sched: [8:2.00]
+; SANDY-NEXT:    vmaskmovps (%rdi), %xmm0, %xmm2 # sched: [8:1.00]
 ; SANDY-NEXT:    vmaskmovps %xmm1, %xmm0, (%rdi) # sched: [5:1.00]
 ; SANDY-NEXT:    vmovaps %xmm2, %xmm0 # sched: [1:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
@@ -1418,15 +1418,15 @@ declare void @llvm.x86.avx.maskstore.ps(
 define <8 x float> @test_maskmovps_ymm(i8* %a0, <8 x i32> %a1, <8 x float> %a2) {
 ; GENERIC-LABEL: test_maskmovps_ymm:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    vmaskmovps (%rdi), %ymm0, %ymm2 # sched: [1:0.50]
-; GENERIC-NEXT:    vmaskmovps %ymm1, %ymm0, (%rdi)
+; GENERIC-NEXT:    vmaskmovps (%rdi), %ymm0, %ymm2 # sched: [9:1.00]
+; GENERIC-NEXT:    vmaskmovps %ymm1, %ymm0, (%rdi) # sched: [5:1.00]
 ; GENERIC-NEXT:    vmovaps %ymm2, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SANDY-LABEL: test_maskmovps_ymm:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vmaskmovps (%rdi), %ymm0, %ymm2 # sched: [1:0.50]
-; SANDY-NEXT:    vmaskmovps %ymm1, %ymm0, (%rdi)
+; SANDY-NEXT:    vmaskmovps (%rdi), %ymm0, %ymm2 # sched: [9:1.00]
+; SANDY-NEXT:    vmaskmovps %ymm1, %ymm0, (%rdi) # sched: [5:1.00]
 ; SANDY-NEXT:    vmovaps %ymm2, %ymm0 # sched: [1:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1535,13 +1535,13 @@ define <4 x double> @test_minpd(<4 x dou
 ; GENERIC-LABEL: test_minpd:
 ; GENERIC:       # BB#0:
 ; GENERIC-NEXT:    vminpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT:    vminpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
+; GENERIC-NEXT:    vminpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SANDY-LABEL: test_minpd:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vminpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT:    vminpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
+; SANDY-NEXT:    vminpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_minpd:
@@ -1572,13 +1572,13 @@ define <8 x float> @test_minps(<8 x floa
 ; GENERIC-LABEL: test_minps:
 ; GENERIC:       # BB#0:
 ; GENERIC-NEXT:    vminps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT:    vminps (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
+; GENERIC-NEXT:    vminps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SANDY-LABEL: test_minps:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vminps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT:    vminps (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
+; SANDY-NEXT:    vminps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_minps:
@@ -1766,13 +1766,13 @@ declare i32 @llvm.x86.avx.movmsk.pd.256(
 define i32 @test_movmskps(<8 x float> %a0) {
 ; GENERIC-LABEL: test_movmskps:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    vmovmskps %ymm0, %eax # sched: [3:1.00]
+; GENERIC-NEXT:    vmovmskps %ymm0, %eax # sched: [2:1.00]
 ; GENERIC-NEXT:    vzeroupper
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SANDY-LABEL: test_movmskps:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vmovmskps %ymm0, %eax # sched: [3:1.00]
+; SANDY-NEXT:    vmovmskps %ymm0, %eax # sched: [2:1.00]
 ; SANDY-NEXT:    vzeroupper
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1962,7 +1962,7 @@ define <4 x double> @test_movupd(<4 x do
 ; SANDY-LABEL: test_movupd:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vmovups (%rdi), %xmm0 # sched: [6:0.50]
-; SANDY-NEXT:    vinsertf128 $1, 16(%rdi), %ymm0, %ymm0 # sched: [7:1.00]
+; SANDY-NEXT:    vinsertf128 $1, 16(%rdi), %ymm0, %ymm0 # sched: [7:0.50]
 ; SANDY-NEXT:    vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
 ; SANDY-NEXT:    vextractf128 $1, %ymm0, 16(%rsi) # sched: [5:1.00]
 ; SANDY-NEXT:    vmovupd %xmm0, (%rsi) # sched: [5:1.00]
@@ -2005,7 +2005,7 @@ define <8 x float> @test_movups(<8 x flo
 ; SANDY-LABEL: test_movups:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vmovups (%rdi), %xmm0 # sched: [6:0.50]
-; SANDY-NEXT:    vinsertf128 $1, 16(%rdi), %ymm0, %ymm0 # sched: [7:1.00]
+; SANDY-NEXT:    vinsertf128 $1, 16(%rdi), %ymm0, %ymm0 # sched: [7:0.50]
 ; SANDY-NEXT:    vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
 ; SANDY-NEXT:    vextractf128 $1, %ymm0, 16(%rsi) # sched: [5:1.00]
 ; SANDY-NEXT:    vmovups %xmm0, (%rsi) # sched: [5:1.00]
@@ -2246,15 +2246,15 @@ define <2 x double> @test_permilpd(<2 x
 define <4 x double> @test_permilpd_ymm(<4 x double> %a0, <4 x double> *%a1) {
 ; GENERIC-LABEL: test_permilpd_ymm:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3] sched: [8:1.00]
-; GENERIC-NEXT:    vpermilpd {{.*#+}} ymm1 = mem[1,0,2,3] sched: [5:1.00]
+; GENERIC-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3] sched: [1:1.00]
+; GENERIC-NEXT:    vpermilpd {{.*#+}} ymm1 = mem[1,0,2,3] sched: [8:1.00]
 ; GENERIC-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SANDY-LABEL: test_permilpd_ymm:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3] sched: [8:1.00]
-; SANDY-NEXT:    vpermilpd {{.*#+}} ymm1 = mem[1,0,2,3] sched: [5:1.00]
+; SANDY-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3] sched: [1:1.00]
+; SANDY-NEXT:    vpermilpd {{.*#+}} ymm1 = mem[1,0,2,3] sched: [8:1.00]
 ; SANDY-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2330,15 +2330,15 @@ define <4 x float> @test_permilps(<4 x f
 define <8 x float> @test_permilps_ymm(<8 x float> %a0, <8 x float> *%a1) {
 ; GENERIC-LABEL: test_permilps_ymm:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [8:1.00]
-; GENERIC-NEXT:    vpermilps {{.*#+}} ymm1 = mem[3,2,1,0,7,6,5,4] sched: [5:1.00]
+; GENERIC-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00]
+; GENERIC-NEXT:    vpermilps {{.*#+}} ymm1 = mem[3,2,1,0,7,6,5,4] sched: [8:1.00]
 ; GENERIC-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SANDY-LABEL: test_permilps_ymm:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [8:1.00]
-; SANDY-NEXT:    vpermilps {{.*#+}} ymm1 = mem[3,2,1,0,7,6,5,4] sched: [5:1.00]
+; SANDY-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00]
+; SANDY-NEXT:    vpermilps {{.*#+}} ymm1 = mem[3,2,1,0,7,6,5,4] sched: [8:1.00]
 ; SANDY-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2373,13 +2373,13 @@ define <2 x double> @test_permilvarpd(<2
 ; GENERIC-LABEL: test_permilvarpd:
 ; GENERIC:       # BB#0:
 ; GENERIC-NEXT:    vpermilpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermilpd (%rdi), %xmm0, %xmm0 # sched: [1:1.00]
+; GENERIC-NEXT:    vpermilpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SANDY-LABEL: test_permilvarpd:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vpermilpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; SANDY-NEXT:    vpermilpd (%rdi), %xmm0, %xmm0 # sched: [1:1.00]
+; SANDY-NEXT:    vpermilpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_permilvarpd:
@@ -2410,13 +2410,13 @@ define <4 x double> @test_permilvarpd_ym
 ; GENERIC-LABEL: test_permilvarpd_ymm:
 ; GENERIC:       # BB#0:
 ; GENERIC-NEXT:    vpermilpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermilpd (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
+; GENERIC-NEXT:    vpermilpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SANDY-LABEL: test_permilvarpd_ymm:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vpermilpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; SANDY-NEXT:    vpermilpd (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
+; SANDY-NEXT:    vpermilpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_permilvarpd_ymm:
@@ -2447,13 +2447,13 @@ define <4 x float> @test_permilvarps(<4
 ; GENERIC-LABEL: test_permilvarps:
 ; GENERIC:       # BB#0:
 ; GENERIC-NEXT:    vpermilps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermilps (%rdi), %xmm0, %xmm0 # sched: [1:1.00]
+; GENERIC-NEXT:    vpermilps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SANDY-LABEL: test_permilvarps:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vpermilps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; SANDY-NEXT:    vpermilps (%rdi), %xmm0, %xmm0 # sched: [1:1.00]
+; SANDY-NEXT:    vpermilps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_permilvarps:
@@ -2484,13 +2484,13 @@ define <8 x float> @test_permilvarps_ymm
 ; GENERIC-LABEL: test_permilvarps_ymm:
 ; GENERIC:       # BB#0:
 ; GENERIC-NEXT:    vpermilps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vpermilps (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
+; GENERIC-NEXT:    vpermilps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SANDY-LABEL: test_permilvarps_ymm:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vpermilps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; SANDY-NEXT:    vpermilps (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
+; SANDY-NEXT:    vpermilps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_permilvarps_ymm:
@@ -2520,15 +2520,15 @@ declare <8 x float> @llvm.x86.avx.vpermi
 define <8 x float> @test_rcpps(<8 x float> %a0, <8 x float> *%a1) {
 ; GENERIC-LABEL: test_rcpps:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    vrcpps %ymm0, %ymm0 # sched: [5:1.00]
-; GENERIC-NEXT:    vrcpps (%rdi), %ymm1 # sched: [9:1.00]
+; GENERIC-NEXT:    vrcpps (%rdi), %ymm1 # sched: [14:2.00]
+; GENERIC-NEXT:    vrcpps %ymm0, %ymm0 # sched: [7:2.00]
 ; GENERIC-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SANDY-LABEL: test_rcpps:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vrcpps %ymm0, %ymm0 # sched: [5:1.00]
-; SANDY-NEXT:    vrcpps (%rdi), %ymm1 # sched: [9:1.00]
+; SANDY-NEXT:    vrcpps (%rdi), %ymm1 # sched: [14:2.00]
+; SANDY-NEXT:    vrcpps %ymm0, %ymm0 # sched: [7:2.00]
 ; SANDY-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2564,14 +2564,14 @@ define <4 x double> @test_roundpd(<4 x d
 ; GENERIC-LABEL: test_roundpd:
 ; GENERIC:       # BB#0:
 ; GENERIC-NEXT:    vroundpd $7, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT:    vroundpd $7, (%rdi), %ymm1 # sched: [7:1.00]
+; GENERIC-NEXT:    vroundpd $7, (%rdi), %ymm1 # sched: [10:1.00]
 ; GENERIC-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SANDY-LABEL: test_roundpd:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vroundpd $7, %ymm0, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT:    vroundpd $7, (%rdi), %ymm1 # sched: [7:1.00]
+; SANDY-NEXT:    vroundpd $7, (%rdi), %ymm1 # sched: [10:1.00]
 ; SANDY-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2607,14 +2607,14 @@ define <8 x float> @test_roundps(<8 x fl
 ; GENERIC-LABEL: test_roundps:
 ; GENERIC:       # BB#0:
 ; GENERIC-NEXT:    vroundps $7, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT:    vroundps $7, (%rdi), %ymm1 # sched: [7:1.00]
+; GENERIC-NEXT:    vroundps $7, (%rdi), %ymm1 # sched: [10:1.00]
 ; GENERIC-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SANDY-LABEL: test_roundps:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vroundps $7, %ymm0, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT:    vroundps $7, (%rdi), %ymm1 # sched: [7:1.00]
+; SANDY-NEXT:    vroundps $7, (%rdi), %ymm1 # sched: [10:1.00]
 ; SANDY-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2649,15 +2649,15 @@ declare <8 x float> @llvm.x86.avx.round.
 define <8 x float> @test_rsqrtps(<8 x float> %a0, <8 x float> *%a1) {
 ; GENERIC-LABEL: test_rsqrtps:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    vrsqrtps (%rdi), %ymm1 # sched: [14:3.00]
-; GENERIC-NEXT:    vrsqrtps %ymm0, %ymm0 # sched: [7:3.00]
+; GENERIC-NEXT:    vrsqrtps (%rdi), %ymm1 # sched: [14:2.00]
+; GENERIC-NEXT:    vrsqrtps %ymm0, %ymm0 # sched: [7:2.00]
 ; GENERIC-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SANDY-LABEL: test_rsqrtps:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vrsqrtps (%rdi), %ymm1 # sched: [14:3.00]
-; SANDY-NEXT:    vrsqrtps %ymm0, %ymm0 # sched: [7:3.00]
+; SANDY-NEXT:    vrsqrtps (%rdi), %ymm1 # sched: [14:2.00]
+; SANDY-NEXT:    vrsqrtps %ymm0, %ymm0 # sched: [7:2.00]
 ; SANDY-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2770,15 +2770,15 @@ define <8 x float> @test_shufps(<8 x flo
 define <4 x double> @test_sqrtpd(<4 x double> %a0, <4 x double> *%a1) {
 ; GENERIC-LABEL: test_sqrtpd:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    vsqrtpd (%rdi), %ymm1 # sched: [52:3.00]
-; GENERIC-NEXT:    vsqrtpd %ymm0, %ymm0 # sched: [45:3.00]
+; GENERIC-NEXT:    vsqrtpd (%rdi), %ymm1 # sched: [52:2.00]
+; GENERIC-NEXT:    vsqrtpd %ymm0, %ymm0 # sched: [45:2.00]
 ; GENERIC-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SANDY-LABEL: test_sqrtpd:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vsqrtpd (%rdi), %ymm1 # sched: [52:3.00]
-; SANDY-NEXT:    vsqrtpd %ymm0, %ymm0 # sched: [45:3.00]
+; SANDY-NEXT:    vsqrtpd (%rdi), %ymm1 # sched: [52:2.00]
+; SANDY-NEXT:    vsqrtpd %ymm0, %ymm0 # sched: [45:2.00]
 ; SANDY-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2813,15 +2813,15 @@ declare <4 x double> @llvm.x86.avx.sqrt.
 define <8 x float> @test_sqrtps(<8 x float> %a0, <8 x float> *%a1) {
 ; GENERIC-LABEL: test_sqrtps:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    vsqrtps (%rdi), %ymm1 # sched: [36:3.00]
-; GENERIC-NEXT:    vsqrtps %ymm0, %ymm0 # sched: [29:3.00]
+; GENERIC-NEXT:    vsqrtps (%rdi), %ymm1 # sched: [36:2.00]
+; GENERIC-NEXT:    vsqrtps %ymm0, %ymm0 # sched: [29:2.00]
 ; GENERIC-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SANDY-LABEL: test_sqrtps:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vsqrtps (%rdi), %ymm1 # sched: [36:3.00]
-; SANDY-NEXT:    vsqrtps %ymm0, %ymm0 # sched: [29:3.00]
+; SANDY-NEXT:    vsqrtps (%rdi), %ymm1 # sched: [36:2.00]
+; SANDY-NEXT:    vsqrtps %ymm0, %ymm0 # sched: [29:2.00]
 ; SANDY-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2930,18 +2930,18 @@ define i32 @test_testpd(<2 x double> %a0
 ; GENERIC:       # BB#0:
 ; GENERIC-NEXT:    xorl %eax, %eax # sched: [1:0.33]
 ; GENERIC-NEXT:    vtestpd %xmm1, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT:    setb %al # sched: [1:1.00]
+; GENERIC-NEXT:    setb %al # sched: [1:0.50]
 ; GENERIC-NEXT:    vtestpd (%rdi), %xmm0 # sched: [7:1.00]
-; GENERIC-NEXT:    adcl $0, %eax # sched: [1:0.33]
+; GENERIC-NEXT:    adcl $0, %eax # sched: [2:0.67]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SANDY-LABEL: test_testpd:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    xorl %eax, %eax # sched: [1:0.33]
 ; SANDY-NEXT:    vtestpd %xmm1, %xmm0 # sched: [1:1.00]
-; SANDY-NEXT:    setb %al # sched: [1:1.00]
+; SANDY-NEXT:    setb %al # sched: [1:0.50]
 ; SANDY-NEXT:    vtestpd (%rdi), %xmm0 # sched: [7:1.00]
-; SANDY-NEXT:    adcl $0, %eax # sched: [1:0.33]
+; SANDY-NEXT:    adcl $0, %eax # sched: [2:0.67]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_testpd:
@@ -2983,9 +2983,9 @@ define i32 @test_testpd_ymm(<4 x double>
 ; GENERIC:       # BB#0:
 ; GENERIC-NEXT:    xorl %eax, %eax # sched: [1:0.33]
 ; GENERIC-NEXT:    vtestpd %ymm1, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT:    setb %al # sched: [1:1.00]
+; GENERIC-NEXT:    setb %al # sched: [1:0.50]
 ; GENERIC-NEXT:    vtestpd (%rdi), %ymm0 # sched: [8:1.00]
-; GENERIC-NEXT:    adcl $0, %eax # sched: [1:0.33]
+; GENERIC-NEXT:    adcl $0, %eax # sched: [2:0.67]
 ; GENERIC-NEXT:    vzeroupper
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2993,9 +2993,9 @@ define i32 @test_testpd_ymm(<4 x double>
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    xorl %eax, %eax # sched: [1:0.33]
 ; SANDY-NEXT:    vtestpd %ymm1, %ymm0 # sched: [1:1.00]
-; SANDY-NEXT:    setb %al # sched: [1:1.00]
+; SANDY-NEXT:    setb %al # sched: [1:0.50]
 ; SANDY-NEXT:    vtestpd (%rdi), %ymm0 # sched: [8:1.00]
-; SANDY-NEXT:    adcl $0, %eax # sched: [1:0.33]
+; SANDY-NEXT:    adcl $0, %eax # sched: [2:0.67]
 ; SANDY-NEXT:    vzeroupper
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
@@ -3040,18 +3040,18 @@ define i32 @test_testps(<4 x float> %a0,
 ; GENERIC:       # BB#0:
 ; GENERIC-NEXT:    xorl %eax, %eax # sched: [1:0.33]
 ; GENERIC-NEXT:    vtestps %xmm1, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT:    setb %al # sched: [1:1.00]
+; GENERIC-NEXT:    setb %al # sched: [1:0.50]
 ; GENERIC-NEXT:    vtestps (%rdi), %xmm0 # sched: [7:1.00]
-; GENERIC-NEXT:    adcl $0, %eax # sched: [1:0.33]
+; GENERIC-NEXT:    adcl $0, %eax # sched: [2:0.67]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SANDY-LABEL: test_testps:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    xorl %eax, %eax # sched: [1:0.33]
 ; SANDY-NEXT:    vtestps %xmm1, %xmm0 # sched: [1:1.00]
-; SANDY-NEXT:    setb %al # sched: [1:1.00]
+; SANDY-NEXT:    setb %al # sched: [1:0.50]
 ; SANDY-NEXT:    vtestps (%rdi), %xmm0 # sched: [7:1.00]
-; SANDY-NEXT:    adcl $0, %eax # sched: [1:0.33]
+; SANDY-NEXT:    adcl $0, %eax # sched: [2:0.67]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_testps:
@@ -3093,9 +3093,9 @@ define i32 @test_testps_ymm(<8 x float>
 ; GENERIC:       # BB#0:
 ; GENERIC-NEXT:    xorl %eax, %eax # sched: [1:0.33]
 ; GENERIC-NEXT:    vtestps %ymm1, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT:    setb %al # sched: [1:1.00]
+; GENERIC-NEXT:    setb %al # sched: [1:0.50]
 ; GENERIC-NEXT:    vtestps (%rdi), %ymm0 # sched: [8:1.00]
-; GENERIC-NEXT:    adcl $0, %eax # sched: [1:0.33]
+; GENERIC-NEXT:    adcl $0, %eax # sched: [2:0.67]
 ; GENERIC-NEXT:    vzeroupper
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -3103,9 +3103,9 @@ define i32 @test_testps_ymm(<8 x float>
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    xorl %eax, %eax # sched: [1:0.33]
 ; SANDY-NEXT:    vtestps %ymm1, %ymm0 # sched: [1:1.00]
-; SANDY-NEXT:    setb %al # sched: [1:1.00]
+; SANDY-NEXT:    setb %al # sched: [1:0.50]
 ; SANDY-NEXT:    vtestps (%rdi), %ymm0 # sched: [8:1.00]
-; SANDY-NEXT:    adcl $0, %eax # sched: [1:0.33]
+; SANDY-NEXT:    adcl $0, %eax # sched: [2:0.67]
 ; SANDY-NEXT:    vzeroupper
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
@@ -3149,14 +3149,14 @@ define <4 x double> @test_unpckhpd(<4 x
 ; GENERIC-LABEL: test_unpckhpd:
 ; GENERIC:       # BB#0:
 ; GENERIC-NEXT:    vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00]
-; GENERIC-NEXT:    vunpckhpd {{.*#+}} ymm1 = ymm1[1],mem[1],ymm1[3],mem[3] sched: [5:1.00]
+; GENERIC-NEXT:    vunpckhpd {{.*#+}} ymm1 = ymm1[1],mem[1],ymm1[3],mem[3] sched: [8:1.00]
 ; GENERIC-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SANDY-LABEL: test_unpckhpd:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00]
-; SANDY-NEXT:    vunpckhpd {{.*#+}} ymm1 = ymm1[1],mem[1],ymm1[3],mem[3] sched: [5:1.00]
+; SANDY-NEXT:    vunpckhpd {{.*#+}} ymm1 = ymm1[1],mem[1],ymm1[3],mem[3] sched: [8:1.00]
 ; SANDY-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
@@ -3191,13 +3191,13 @@ define <8 x float> @test_unpckhps(<8 x f
 ; GENERIC-LABEL: test_unpckhps:
 ; GENERIC:       # BB#0:
 ; GENERIC-NEXT:    vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00]
-; GENERIC-NEXT:    vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [5:1.00]
+; GENERIC-NEXT:    vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SANDY-LABEL: test_unpckhps:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00]
-; SANDY-NEXT:    vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [5:1.00]
+; SANDY-NEXT:    vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_unpckhps:
@@ -3305,14 +3305,14 @@ define <4 x double> @test_xorpd(<4 x dou
 ; GENERIC-LABEL: test_xorpd:
 ; GENERIC:       # BB#0:
 ; GENERIC-NEXT:    vxorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vxorpd (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
+; GENERIC-NEXT:    vxorpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SANDY-LABEL: test_xorpd:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vxorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; SANDY-NEXT:    vxorpd (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
+; SANDY-NEXT:    vxorpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
 ; SANDY-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
@@ -3351,14 +3351,14 @@ define <8 x float> @test_xorps(<8 x floa
 ; GENERIC-LABEL: test_xorps:
 ; GENERIC:       # BB#0:
 ; GENERIC-NEXT:    vxorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vxorps (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
+; GENERIC-NEXT:    vxorps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
 ; GENERIC-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SANDY-LABEL: test_xorps:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vxorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; SANDY-NEXT:    vxorps (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
+; SANDY-NEXT:    vxorps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
 ; SANDY-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;

Modified: llvm/trunk/test/CodeGen/X86/bmi-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/bmi-schedule.ll?rev=310792&r1=310791&r2=310792&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/bmi-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/bmi-schedule.ll Sun Aug 13 06:59:24 2017
@@ -11,7 +11,7 @@ define i16 @test_andn_i16(i16 zeroext %a
 ; GENERIC:       # BB#0:
 ; GENERIC-NEXT:    andnl %esi, %edi, %eax # sched: [1:0.33]
 ; GENERIC-NEXT:    notl %edi # sched: [1:0.33]
-; GENERIC-NEXT:    andw (%rdx), %di # sched: [5:0.50]
+; GENERIC-NEXT:    andw (%rdx), %di # sched: [6:0.50]
 ; GENERIC-NEXT:    addl %edi, %eax # sched: [1:0.33]
 ; GENERIC-NEXT:    # kill: %AX<def> %AX<kill> %EAX<kill>
 ; GENERIC-NEXT:    retq # sched: [1:1.00]

Modified: llvm/trunk/test/CodeGen/X86/popcnt-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/popcnt-schedule.ll?rev=310792&r1=310791&r2=310792&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/popcnt-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/popcnt-schedule.ll Sun Aug 13 06:59:24 2017
@@ -13,7 +13,7 @@
 define i16 @test_ctpop_i16(i16 zeroext %a0, i16 *%a1) {
 ; GENERIC-LABEL: test_ctpop_i16:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    popcntw (%rsi), %cx # sched: [7:1.00]
+; GENERIC-NEXT:    popcntw (%rsi), %cx # sched: [9:1.00]
 ; GENERIC-NEXT:    popcntw %di, %ax # sched: [3:1.00]
 ; GENERIC-NEXT:    orl %ecx, %eax # sched: [1:0.33]
 ; GENERIC-NEXT:    # kill: %AX<def> %AX<kill> %EAX<kill>
@@ -29,7 +29,7 @@ define i16 @test_ctpop_i16(i16 zeroext %
 ;
 ; SANDY-LABEL: test_ctpop_i16:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    popcntw (%rsi), %cx # sched: [7:1.00]
+; SANDY-NEXT:    popcntw (%rsi), %cx # sched: [9:1.00]
 ; SANDY-NEXT:    popcntw %di, %ax # sched: [3:1.00]
 ; SANDY-NEXT:    orl %ecx, %eax # sched: [1:0.33]
 ; SANDY-NEXT:    # kill: %AX<def> %AX<kill> %EAX<kill>
@@ -69,7 +69,7 @@ declare i16 @llvm.ctpop.i16(i16)
 define i32 @test_ctpop_i32(i32 %a0, i32 *%a1) {
 ; GENERIC-LABEL: test_ctpop_i32:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    popcntl (%rsi), %ecx # sched: [7:1.00]
+; GENERIC-NEXT:    popcntl (%rsi), %ecx # sched: [9:1.00]
 ; GENERIC-NEXT:    popcntl %edi, %eax # sched: [3:1.00]
 ; GENERIC-NEXT:    orl %ecx, %eax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -83,7 +83,7 @@ define i32 @test_ctpop_i32(i32 %a0, i32
 ;
 ; SANDY-LABEL: test_ctpop_i32:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    popcntl (%rsi), %ecx # sched: [7:1.00]
+; SANDY-NEXT:    popcntl (%rsi), %ecx # sched: [9:1.00]
 ; SANDY-NEXT:    popcntl %edi, %eax # sched: [3:1.00]
 ; SANDY-NEXT:    orl %ecx, %eax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]

Modified: llvm/trunk/test/CodeGen/X86/pr34080.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/pr34080.ll?rev=310792&r1=310791&r2=310792&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/pr34080.ll (original)
+++ llvm/trunk/test/CodeGen/X86/pr34080.ll Sun Aug 13 06:59:24 2017
@@ -60,10 +60,10 @@ define void @_Z1fe(x86_fp80 %z) local_un
 ; SSE2-BROKEN-NEXT:  Lcfi2:
 ; SSE2-BROKEN-NEXT:    .cfi_def_cfa_register %rbp
 ; SSE2-BROKEN-NEXT:    fnstcw -4(%rbp)
+; SSE2-BROKEN-NEXT:    fldt 16(%rbp)
 ; SSE2-BROKEN-NEXT:    movzwl -4(%rbp), %eax
 ; SSE2-BROKEN-NEXT:    movw $3199, -4(%rbp) ## imm = 0xC7F
 ; SSE2-BROKEN-NEXT:    fldcw -4(%rbp)
-; SSE2-BROKEN-NEXT:    fldt 16(%rbp)
 ; SSE2-BROKEN-NEXT:    movw %ax, -4(%rbp)
 ; SSE2-BROKEN-NEXT:    fistl -8(%rbp)
 ; SSE2-BROKEN-NEXT:    fldcw -4(%rbp)
@@ -71,12 +71,12 @@ define void @_Z1fe(x86_fp80 %z) local_un
 ; SSE2-BROKEN-NEXT:    movsd %xmm0, -64(%rbp)
 ; SSE2-BROKEN-NEXT:    movsd %xmm0, -32(%rbp)
 ; SSE2-BROKEN-NEXT:    fsubl -32(%rbp)
-; SSE2-BROKEN-NEXT:    fnstcw -2(%rbp)
 ; SSE2-BROKEN-NEXT:    flds {{.*}}(%rip)
+; SSE2-BROKEN-NEXT:    fnstcw -2(%rbp)
+; SSE2-BROKEN-NEXT:    fmul %st(0), %st(1)
 ; SSE2-BROKEN-NEXT:    movzwl -2(%rbp), %eax
 ; SSE2-BROKEN-NEXT:    movw $3199, -2(%rbp) ## imm = 0xC7F
 ; SSE2-BROKEN-NEXT:    fldcw -2(%rbp)
-; SSE2-BROKEN-NEXT:    fmul %st(0), %st(1)
 ; SSE2-BROKEN-NEXT:    movw %ax, -2(%rbp)
 ; SSE2-BROKEN-NEXT:    fxch %st(1)
 ; SSE2-BROKEN-NEXT:    fistl -12(%rbp)

Modified: llvm/trunk/test/CodeGen/X86/recip-fastmath.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/recip-fastmath.ll?rev=310792&r1=310791&r2=310792&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/recip-fastmath.ll (original)
+++ llvm/trunk/test/CodeGen/X86/recip-fastmath.ll Sun Aug 13 06:59:24 2017
@@ -350,7 +350,7 @@ define <4 x float> @v4f32_one_step(<4 x
 ;
 ; SANDY-LABEL: v4f32_one_step:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vrcpps %xmm0, %xmm1 # sched: [7:3.00]
+; SANDY-NEXT:    vrcpps %xmm0, %xmm1 # sched: [5:1.00]
 ; SANDY-NEXT:    vmulps %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
 ; SANDY-NEXT:    vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [6:0.50]
 ; SANDY-NEXT:    vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
@@ -453,7 +453,7 @@ define <4 x float> @v4f32_two_step(<4 x
 ;
 ; SANDY-LABEL: v4f32_two_step:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vrcpps %xmm0, %xmm1 # sched: [7:3.00]
+; SANDY-NEXT:    vrcpps %xmm0, %xmm1 # sched: [5:1.00]
 ; SANDY-NEXT:    vmulps %xmm1, %xmm0, %xmm2 # sched: [5:1.00]
 ; SANDY-NEXT:    vmovaps {{.*#+}} xmm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [6:0.50]
 ; SANDY-NEXT:    vsubps %xmm2, %xmm3, %xmm2 # sched: [3:1.00]
@@ -547,7 +547,7 @@ define <8 x float> @v8f32_no_estimate(<8
 ; SANDY-LABEL: v8f32_no_estimate:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vmovaps {{.*#+}} ymm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [7:0.50]
-; SANDY-NEXT:    vdivps %ymm0, %ymm1, %ymm0 # sched: [29:3.00]
+; SANDY-NEXT:    vdivps %ymm0, %ymm1, %ymm0 # sched: [29:2.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: v8f32_no_estimate:
@@ -619,7 +619,7 @@ define <8 x float> @v8f32_one_step(<8 x
 ;
 ; SANDY-LABEL: v8f32_one_step:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vrcpps %ymm0, %ymm1 # sched: [5:1.00]
+; SANDY-NEXT:    vrcpps %ymm0, %ymm1 # sched: [7:2.00]
 ; SANDY-NEXT:    vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
 ; SANDY-NEXT:    vmovaps {{.*#+}} ymm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [7:0.50]
 ; SANDY-NEXT:    vsubps %ymm0, %ymm2, %ymm0 # sched: [3:1.00]
@@ -735,7 +735,7 @@ define <8 x float> @v8f32_two_step(<8 x
 ;
 ; SANDY-LABEL: v8f32_two_step:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vrcpps %ymm0, %ymm1 # sched: [5:1.00]
+; SANDY-NEXT:    vrcpps %ymm0, %ymm1 # sched: [7:2.00]
 ; SANDY-NEXT:    vmulps %ymm1, %ymm0, %ymm2 # sched: [5:1.00]
 ; SANDY-NEXT:    vmovaps {{.*#+}} ymm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [7:0.50]
 ; SANDY-NEXT:    vsubps %ymm2, %ymm3, %ymm2 # sched: [3:1.00]

Modified: llvm/trunk/test/CodeGen/X86/recip-fastmath2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/recip-fastmath2.ll?rev=310792&r1=310791&r2=310792&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/recip-fastmath2.ll (original)
+++ llvm/trunk/test/CodeGen/X86/recip-fastmath2.ll Sun Aug 13 06:59:24 2017
@@ -403,7 +403,7 @@ define <4 x float> @v4f32_one_step2(<4 x
 ;
 ; SANDY-LABEL: v4f32_one_step2:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vrcpps %xmm0, %xmm1 # sched: [7:3.00]
+; SANDY-NEXT:    vrcpps %xmm0, %xmm1 # sched: [5:1.00]
 ; SANDY-NEXT:    vmulps %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
 ; SANDY-NEXT:    vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [6:0.50]
 ; SANDY-NEXT:    vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
@@ -501,7 +501,7 @@ define <4 x float> @v4f32_one_step_2_div
 ;
 ; SANDY-LABEL: v4f32_one_step_2_divs:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vrcpps %xmm0, %xmm1 # sched: [7:3.00]
+; SANDY-NEXT:    vrcpps %xmm0, %xmm1 # sched: [5:1.00]
 ; SANDY-NEXT:    vmulps %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
 ; SANDY-NEXT:    vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [6:0.50]
 ; SANDY-NEXT:    vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
@@ -619,7 +619,7 @@ define <4 x float> @v4f32_two_step2(<4 x
 ;
 ; SANDY-LABEL: v4f32_two_step2:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vrcpps %xmm0, %xmm1 # sched: [7:3.00]
+; SANDY-NEXT:    vrcpps %xmm0, %xmm1 # sched: [5:1.00]
 ; SANDY-NEXT:    vmulps %xmm1, %xmm0, %xmm2 # sched: [5:1.00]
 ; SANDY-NEXT:    vmovaps {{.*#+}} xmm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [6:0.50]
 ; SANDY-NEXT:    vsubps %xmm2, %xmm3, %xmm2 # sched: [3:1.00]
@@ -739,7 +739,7 @@ define <8 x float> @v8f32_one_step2(<8 x
 ;
 ; SANDY-LABEL: v8f32_one_step2:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vrcpps %ymm0, %ymm1 # sched: [5:1.00]
+; SANDY-NEXT:    vrcpps %ymm0, %ymm1 # sched: [7:2.00]
 ; SANDY-NEXT:    vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
 ; SANDY-NEXT:    vmovaps {{.*#+}} ymm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [7:0.50]
 ; SANDY-NEXT:    vsubps %ymm0, %ymm2, %ymm0 # sched: [3:1.00]
@@ -846,7 +846,7 @@ define <8 x float> @v8f32_one_step_2_div
 ;
 ; SANDY-LABEL: v8f32_one_step_2_divs:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vrcpps %ymm0, %ymm1 # sched: [5:1.00]
+; SANDY-NEXT:    vrcpps %ymm0, %ymm1 # sched: [7:2.00]
 ; SANDY-NEXT:    vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
 ; SANDY-NEXT:    vmovaps {{.*#+}} ymm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [7:0.50]
 ; SANDY-NEXT:    vsubps %ymm0, %ymm2, %ymm0 # sched: [3:1.00]
@@ -978,7 +978,7 @@ define <8 x float> @v8f32_two_step2(<8 x
 ;
 ; SANDY-LABEL: v8f32_two_step2:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vrcpps %ymm0, %ymm1 # sched: [5:1.00]
+; SANDY-NEXT:    vrcpps %ymm0, %ymm1 # sched: [7:2.00]
 ; SANDY-NEXT:    vmulps %ymm1, %ymm0, %ymm2 # sched: [5:1.00]
 ; SANDY-NEXT:    vmovaps {{.*#+}} ymm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [7:0.50]
 ; SANDY-NEXT:    vsubps %ymm2, %ymm3, %ymm2 # sched: [3:1.00]
@@ -1069,7 +1069,7 @@ define <8 x float> @v8f32_no_step(<8 x f
 ;
 ; SANDY-LABEL: v8f32_no_step:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vrcpps %ymm0, %ymm0 # sched: [5:1.00]
+; SANDY-NEXT:    vrcpps %ymm0, %ymm0 # sched: [7:2.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: v8f32_no_step:
@@ -1124,7 +1124,7 @@ define <8 x float> @v8f32_no_step2(<8 x
 ;
 ; SANDY-LABEL: v8f32_no_step2:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vrcpps %ymm0, %ymm0 # sched: [5:1.00]
+; SANDY-NEXT:    vrcpps %ymm0, %ymm0 # sched: [7:2.00]
 ; SANDY-NEXT:    vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;

Modified: llvm/trunk/test/CodeGen/X86/sse-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse-schedule.ll?rev=310792&r1=310791&r2=310792&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse-schedule.ll Sun Aug 13 06:59:24 2017
@@ -334,12 +334,12 @@ define i32 @test_comiss(<4 x float> %a0,
 ; GENERIC-LABEL: test_comiss:
 ; GENERIC:       # BB#0:
 ; GENERIC-NEXT:    comiss %xmm1, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT:    setnp %al # sched: [1:1.00]
-; GENERIC-NEXT:    sete %cl # sched: [1:1.00]
+; GENERIC-NEXT:    setnp %al # sched: [1:0.50]
+; GENERIC-NEXT:    sete %cl # sched: [1:0.50]
 ; GENERIC-NEXT:    andb %al, %cl # sched: [1:0.33]
 ; GENERIC-NEXT:    comiss (%rdi), %xmm0 # sched: [7:1.00]
-; GENERIC-NEXT:    setnp %al # sched: [1:1.00]
-; GENERIC-NEXT:    sete %dl # sched: [1:1.00]
+; GENERIC-NEXT:    setnp %al # sched: [1:0.50]
+; GENERIC-NEXT:    sete %dl # sched: [1:0.50]
 ; GENERIC-NEXT:    andb %al, %dl # sched: [1:0.33]
 ; GENERIC-NEXT:    orb %cl, %dl # sched: [1:0.33]
 ; GENERIC-NEXT:    movzbl %dl, %eax # sched: [1:0.33]
@@ -376,12 +376,12 @@ define i32 @test_comiss(<4 x float> %a0,
 ; SANDY-LABEL: test_comiss:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vcomiss %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    setnp %al # sched: [1:1.00]
-; SANDY-NEXT:    sete %cl # sched: [1:1.00]
+; SANDY-NEXT:    setnp %al # sched: [1:0.50]
+; SANDY-NEXT:    sete %cl # sched: [1:0.50]
 ; SANDY-NEXT:    andb %al, %cl # sched: [1:0.33]
 ; SANDY-NEXT:    vcomiss (%rdi), %xmm0 # sched: [7:1.00]
-; SANDY-NEXT:    setnp %al # sched: [1:1.00]
-; SANDY-NEXT:    sete %dl # sched: [1:1.00]
+; SANDY-NEXT:    setnp %al # sched: [1:0.50]
+; SANDY-NEXT:    sete %dl # sched: [1:0.50]
 ; SANDY-NEXT:    andb %al, %dl # sched: [1:0.33]
 ; SANDY-NEXT:    orb %cl, %dl # sched: [1:0.33]
 ; SANDY-NEXT:    movzbl %dl, %eax # sched: [1:0.33]
@@ -877,7 +877,7 @@ define float @test_divss(float %a0, floa
 define void @test_ldmxcsr(i32 %a0) {
 ; GENERIC-LABEL: test_ldmxcsr:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
+; GENERIC-NEXT:    movl %edi, -{{[0-9]+}}(%rsp) # sched: [5:1.00]
 ; GENERIC-NEXT:    ldmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -895,7 +895,7 @@ define void @test_ldmxcsr(i32 %a0) {
 ;
 ; SANDY-LABEL: test_ldmxcsr:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
+; SANDY-NEXT:    movl %edi, -{{[0-9]+}}(%rsp) # sched: [5:1.00]
 ; SANDY-NEXT:    vldmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1559,7 +1559,7 @@ define <4 x float> @test_movss_reg(<4 x
 ;
 ; SANDY-LABEL: test_movss_reg:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:1.00]
+; SANDY-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_movss_reg:
@@ -1859,7 +1859,7 @@ define <4 x float> @test_rcpps(<4 x floa
 ;
 ; SANDY-LABEL: test_rcpps:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vrcpps %xmm0, %xmm0 # sched: [7:3.00]
+; SANDY-NEXT:    vrcpps %xmm0, %xmm0 # sched: [5:1.00]
 ; SANDY-NEXT:    vrcpps (%rdi), %xmm1 # sched: [11:1.00]
 ; SANDY-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
@@ -1921,9 +1921,9 @@ define <4 x float> @test_rcpss(float %a0
 ;
 ; SANDY-LABEL: test_rcpss:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vrcpss %xmm0, %xmm0, %xmm0 # sched: [9:1.00]
+; SANDY-NEXT:    vrcpss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
 ; SANDY-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50]
-; SANDY-NEXT:    vrcpss %xmm1, %xmm1, %xmm1 # sched: [9:1.00]
+; SANDY-NEXT:    vrcpss %xmm1, %xmm1, %xmm1 # sched: [5:1.00]
 ; SANDY-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2459,12 +2459,12 @@ define i32 @test_ucomiss(<4 x float> %a0
 ; GENERIC-LABEL: test_ucomiss:
 ; GENERIC:       # BB#0:
 ; GENERIC-NEXT:    ucomiss %xmm1, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT:    setnp %al # sched: [1:1.00]
-; GENERIC-NEXT:    sete %cl # sched: [1:1.00]
+; GENERIC-NEXT:    setnp %al # sched: [1:0.50]
+; GENERIC-NEXT:    sete %cl # sched: [1:0.50]
 ; GENERIC-NEXT:    andb %al, %cl # sched: [1:0.33]
 ; GENERIC-NEXT:    ucomiss (%rdi), %xmm0 # sched: [7:1.00]
-; GENERIC-NEXT:    setnp %al # sched: [1:1.00]
-; GENERIC-NEXT:    sete %dl # sched: [1:1.00]
+; GENERIC-NEXT:    setnp %al # sched: [1:0.50]
+; GENERIC-NEXT:    sete %dl # sched: [1:0.50]
 ; GENERIC-NEXT:    andb %al, %dl # sched: [1:0.33]
 ; GENERIC-NEXT:    orb %cl, %dl # sched: [1:0.33]
 ; GENERIC-NEXT:    movzbl %dl, %eax # sched: [1:0.33]
@@ -2501,12 +2501,12 @@ define i32 @test_ucomiss(<4 x float> %a0
 ; SANDY-LABEL: test_ucomiss:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vucomiss %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    setnp %al # sched: [1:1.00]
-; SANDY-NEXT:    sete %cl # sched: [1:1.00]
+; SANDY-NEXT:    setnp %al # sched: [1:0.50]
+; SANDY-NEXT:    sete %cl # sched: [1:0.50]
 ; SANDY-NEXT:    andb %al, %cl # sched: [1:0.33]
 ; SANDY-NEXT:    vucomiss (%rdi), %xmm0 # sched: [7:1.00]
-; SANDY-NEXT:    setnp %al # sched: [1:1.00]
-; SANDY-NEXT:    sete %dl # sched: [1:1.00]
+; SANDY-NEXT:    setnp %al # sched: [1:0.50]
+; SANDY-NEXT:    sete %dl # sched: [1:0.50]
 ; SANDY-NEXT:    andb %al, %dl # sched: [1:0.33]
 ; SANDY-NEXT:    orb %cl, %dl # sched: [1:0.33]
 ; SANDY-NEXT:    movzbl %dl, %eax # sched: [1:0.33]

Modified: llvm/trunk/test/CodeGen/X86/sse2-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse2-schedule.ll?rev=310792&r1=310791&r2=310792&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse2-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse2-schedule.ll Sun Aug 13 06:59:24 2017
@@ -342,12 +342,12 @@ define i32 @test_comisd(<2 x double> %a0
 ; GENERIC-LABEL: test_comisd:
 ; GENERIC:       # BB#0:
 ; GENERIC-NEXT:    comisd %xmm1, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT:    setnp %al # sched: [1:1.00]
-; GENERIC-NEXT:    sete %cl # sched: [1:1.00]
+; GENERIC-NEXT:    setnp %al # sched: [1:0.50]
+; GENERIC-NEXT:    sete %cl # sched: [1:0.50]
 ; GENERIC-NEXT:    andb %al, %cl # sched: [1:0.33]
 ; GENERIC-NEXT:    comisd (%rdi), %xmm0 # sched: [7:1.00]
-; GENERIC-NEXT:    setnp %al # sched: [1:1.00]
-; GENERIC-NEXT:    sete %dl # sched: [1:1.00]
+; GENERIC-NEXT:    setnp %al # sched: [1:0.50]
+; GENERIC-NEXT:    sete %dl # sched: [1:0.50]
 ; GENERIC-NEXT:    andb %al, %dl # sched: [1:0.33]
 ; GENERIC-NEXT:    orb %cl, %dl # sched: [1:0.33]
 ; GENERIC-NEXT:    movzbl %dl, %eax # sched: [1:0.33]
@@ -384,12 +384,12 @@ define i32 @test_comisd(<2 x double> %a0
 ; SANDY-LABEL: test_comisd:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vcomisd %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    setnp %al # sched: [1:1.00]
-; SANDY-NEXT:    sete %cl # sched: [1:1.00]
+; SANDY-NEXT:    setnp %al # sched: [1:0.50]
+; SANDY-NEXT:    sete %cl # sched: [1:0.50]
 ; SANDY-NEXT:    andb %al, %cl # sched: [1:0.33]
 ; SANDY-NEXT:    vcomisd (%rdi), %xmm0 # sched: [7:1.00]
-; SANDY-NEXT:    setnp %al # sched: [1:1.00]
-; SANDY-NEXT:    sete %dl # sched: [1:1.00]
+; SANDY-NEXT:    setnp %al # sched: [1:0.50]
+; SANDY-NEXT:    sete %dl # sched: [1:0.50]
 ; SANDY-NEXT:    andb %al, %dl # sched: [1:0.33]
 ; SANDY-NEXT:    orb %cl, %dl # sched: [1:0.33]
 ; SANDY-NEXT:    movzbl %dl, %eax # sched: [1:0.33]
@@ -816,8 +816,8 @@ define i32 @test_cvtsd2si(double %a0, do
 ;
 ; SANDY-LABEL: test_cvtsd2si:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vcvtsd2si %xmm0, %ecx # sched: [3:1.00]
-; SANDY-NEXT:    vcvtsd2si (%rdi), %eax # sched: [7:1.00]
+; SANDY-NEXT:    vcvtsd2si %xmm0, %ecx # sched: [5:1.00]
+; SANDY-NEXT:    vcvtsd2si (%rdi), %eax # sched: [10:1.00]
 ; SANDY-NEXT:    addl %ecx, %eax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
@@ -938,9 +938,9 @@ define float @test_cvtsd2ss(double %a0,
 ;
 ; SANDY-LABEL: test_cvtsd2ss:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT:    vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [4:1.00]
 ; SANDY-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [6:0.50]
-; SANDY-NEXT:    vcvtsd2ss %xmm1, %xmm1, %xmm1 # sched: [3:1.00]
+; SANDY-NEXT:    vcvtsd2ss %xmm1, %xmm1, %xmm1 # sched: [4:1.00]
 ; SANDY-NEXT:    vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1293,7 +1293,7 @@ define i32 @test_cvttsd2si(double %a0, d
 ; SANDY-LABEL: test_cvttsd2si:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vcvttsd2si %xmm0, %ecx # sched: [5:1.00]
-; SANDY-NEXT:    vcvttsd2si (%rdi), %eax # sched: [7:1.00]
+; SANDY-NEXT:    vcvttsd2si (%rdi), %eax # sched: [10:1.00]
 ; SANDY-NEXT:    addl %ecx, %eax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2006,7 +2006,7 @@ define i32 @test_movd(<4 x i32> %a0, i32
 ;
 ; SANDY-LABEL: test_movd:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vmovd %edi, %xmm1 # sched: [1:0.33]
+; SANDY-NEXT:    vmovd %edi, %xmm1 # sched: [1:1.00]
 ; SANDY-NEXT:    vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [6:0.50]
 ; SANDY-NEXT:    vpaddd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
 ; SANDY-NEXT:    vpaddd %xmm2, %xmm0, %xmm0 # sched: [1:0.50]
@@ -3146,7 +3146,7 @@ define <16 x i8> @test_paddsb(<16 x i8>
 ;
 ; SANDY-LABEL: test_paddsb:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vpaddsb %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT:    vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SANDY-NEXT:    vpaddsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
@@ -3199,7 +3199,7 @@ define <8 x i16> @test_paddsw(<8 x i16>
 ;
 ; SANDY-LABEL: test_paddsw:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vpaddsw %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT:    vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SANDY-NEXT:    vpaddsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
@@ -3358,7 +3358,7 @@ define <8 x i16> @test_paddw(<8 x i16> %
 ;
 ; SANDY-LABEL: test_paddw:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SANDY-NEXT:    vpaddw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4399,7 +4399,7 @@ define <8 x i16> @test_pmulhuw(<8 x i16>
 ;
 ; SANDY-LABEL: test_pmulhuw:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
+; SANDY-NEXT:    vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; SANDY-NEXT:    vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4553,7 +4553,7 @@ define <2 x i64> @test_pmuludq(<4 x i32>
 ;
 ; SANDY-LABEL: test_pmuludq:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vpmuludq %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
+; SANDY-NEXT:    vpmuludq %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; SANDY-NEXT:    vpmuludq (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4780,9 +4780,9 @@ define <8 x i16> @test_pshufhw(<8 x i16>
 ;
 ; SANDY-LABEL: test_pshufhw:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00]
+; SANDY-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:0.50]
 ; SANDY-NEXT:    vpshufhw {{.*#+}} xmm1 = mem[0,1,2,3,7,6,5,4] sched: [7:0.50]
-; SANDY-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pshufhw:
@@ -4840,7 +4840,7 @@ define <8 x i16> @test_pshuflw(<8 x i16>
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [1:0.50]
 ; SANDY-NEXT:    vpshuflw {{.*#+}} xmm1 = mem[3,2,1,0,4,5,6,7] sched: [7:0.50]
-; SANDY-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pshuflw:
@@ -4894,8 +4894,8 @@ define <4 x i32> @test_pslld(<4 x i32> %
 ;
 ; SANDY-LABEL: test_pslld:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vpslld %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; SANDY-NEXT:    vpslld (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
+; SANDY-NEXT:    vpslld %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
+; SANDY-NEXT:    vpslld (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
 ; SANDY-NEXT:    vpslld $2, %xmm0, %xmm0 # sched: [1:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4997,8 +4997,8 @@ define <2 x i64> @test_psllq(<2 x i64> %
 ;
 ; SANDY-LABEL: test_psllq:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vpsllq %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; SANDY-NEXT:    vpsllq (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
+; SANDY-NEXT:    vpsllq %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
+; SANDY-NEXT:    vpsllq (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
 ; SANDY-NEXT:    vpsllq $2, %xmm0, %xmm0 # sched: [1:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5055,8 +5055,8 @@ define <8 x i16> @test_psllw(<8 x i16> %
 ;
 ; SANDY-LABEL: test_psllw:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vpsllw %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; SANDY-NEXT:    vpsllw (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
+; SANDY-NEXT:    vpsllw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
+; SANDY-NEXT:    vpsllw (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
 ; SANDY-NEXT:    vpsllw $2, %xmm0, %xmm0 # sched: [1:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5974,7 +5974,7 @@ define <2 x i64> @test_punpckhqdq(<2 x i
 ;
 ; SANDY-LABEL: test_punpckhqdq:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
+; SANDY-NEXT:    vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50]
 ; SANDY-NEXT:    vpunpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:0.50]
 ; SANDY-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
@@ -6083,7 +6083,7 @@ define <16 x i8> @test_punpcklbw(<16 x i
 ;
 ; SANDY-LABEL: test_punpcklbw:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
+; SANDY-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50]
 ; SANDY-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:0.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6612,12 +6612,12 @@ define i32 @test_ucomisd(<2 x double> %a
 ; GENERIC-LABEL: test_ucomisd:
 ; GENERIC:       # BB#0:
 ; GENERIC-NEXT:    ucomisd %xmm1, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT:    setnp %al # sched: [1:1.00]
-; GENERIC-NEXT:    sete %cl # sched: [1:1.00]
+; GENERIC-NEXT:    setnp %al # sched: [1:0.50]
+; GENERIC-NEXT:    sete %cl # sched: [1:0.50]
 ; GENERIC-NEXT:    andb %al, %cl # sched: [1:0.33]
 ; GENERIC-NEXT:    ucomisd (%rdi), %xmm0 # sched: [7:1.00]
-; GENERIC-NEXT:    setnp %al # sched: [1:1.00]
-; GENERIC-NEXT:    sete %dl # sched: [1:1.00]
+; GENERIC-NEXT:    setnp %al # sched: [1:0.50]
+; GENERIC-NEXT:    sete %dl # sched: [1:0.50]
 ; GENERIC-NEXT:    andb %al, %dl # sched: [1:0.33]
 ; GENERIC-NEXT:    orb %cl, %dl # sched: [1:0.33]
 ; GENERIC-NEXT:    movzbl %dl, %eax # sched: [1:0.33]
@@ -6654,12 +6654,12 @@ define i32 @test_ucomisd(<2 x double> %a
 ; SANDY-LABEL: test_ucomisd:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vucomisd %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    setnp %al # sched: [1:1.00]
-; SANDY-NEXT:    sete %cl # sched: [1:1.00]
+; SANDY-NEXT:    setnp %al # sched: [1:0.50]
+; SANDY-NEXT:    sete %cl # sched: [1:0.50]
 ; SANDY-NEXT:    andb %al, %cl # sched: [1:0.33]
 ; SANDY-NEXT:    vucomisd (%rdi), %xmm0 # sched: [7:1.00]
-; SANDY-NEXT:    setnp %al # sched: [1:1.00]
-; SANDY-NEXT:    sete %dl # sched: [1:1.00]
+; SANDY-NEXT:    setnp %al # sched: [1:0.50]
+; SANDY-NEXT:    sete %dl # sched: [1:0.50]
 ; SANDY-NEXT:    andb %al, %dl # sched: [1:0.33]
 ; SANDY-NEXT:    orb %cl, %dl # sched: [1:0.33]
 ; SANDY-NEXT:    movzbl %dl, %eax # sched: [1:0.33]

Modified: llvm/trunk/test/CodeGen/X86/sse41-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse41-schedule.ll?rev=310792&r1=310791&r2=310792&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse41-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse41-schedule.ll Sun Aug 13 06:59:24 2017
@@ -11,9 +11,9 @@
 define <2 x double> @test_blendpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
 ; GENERIC-LABEL: test_blendpd:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:1.00]
+; GENERIC-NEXT:    blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.50]
 ; GENERIC-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT:    blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:1.00]
+; GENERIC-NEXT:    blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SLM-LABEL: test_blendpd:
@@ -25,9 +25,9 @@ define <2 x double> @test_blendpd(<2 x d
 ;
 ; SANDY-LABEL: test_blendpd:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:1.00]
+; SANDY-NEXT:    vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.50]
 ; SANDY-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT:    vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:1.00]
+; SANDY-NEXT:    vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_blendpd:
@@ -60,8 +60,8 @@ define <2 x double> @test_blendpd(<2 x d
 define <4 x float> @test_blendps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
 ; GENERIC-LABEL: test_blendps:
 ; GENERIC:       # BB#0:
-; GENERIC-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:1.00]
-; GENERIC-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2,3] sched: [7:1.00]
+; GENERIC-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.50]
+; GENERIC-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2,3] sched: [7:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SLM-LABEL: test_blendps:
@@ -72,8 +72,8 @@ define <4 x float> @test_blendps(<4 x fl
 ;
 ; SANDY-LABEL: test_blendps:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:1.00]
-; SANDY-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2,3] sched: [7:1.00]
+; SANDY-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.50]
+; SANDY-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2,3] sched: [7:0.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_blendps:
@@ -104,8 +104,8 @@ define <2 x double> @test_blendvpd(<2 x
 ; GENERIC:       # BB#0:
 ; GENERIC-NEXT:    movapd %xmm0, %xmm3 # sched: [1:1.00]
 ; GENERIC-NEXT:    movaps %xmm2, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT:    blendvpd %xmm0, %xmm1, %xmm3 # sched: [2:2.00]
-; GENERIC-NEXT:    blendvpd %xmm0, (%rdi), %xmm3 # sched: [8:2.00]
+; GENERIC-NEXT:    blendvpd %xmm0, %xmm1, %xmm3 # sched: [2:1.00]
+; GENERIC-NEXT:    blendvpd %xmm0, (%rdi), %xmm3 # sched: [8:1.00]
 ; GENERIC-NEXT:    movapd %xmm3, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -120,8 +120,8 @@ define <2 x double> @test_blendvpd(<2 x
 ;
 ; SANDY-LABEL: test_blendvpd:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00]
-; SANDY-NEXT:    vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:2.00]
+; SANDY-NEXT:    vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
+; SANDY-NEXT:    vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_blendvpd:
@@ -153,8 +153,8 @@ define <4 x float> @test_blendvps(<4 x f
 ; GENERIC:       # BB#0:
 ; GENERIC-NEXT:    movaps %xmm0, %xmm3 # sched: [1:1.00]
 ; GENERIC-NEXT:    movaps %xmm2, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT:    blendvps %xmm0, %xmm1, %xmm3 # sched: [2:2.00]
-; GENERIC-NEXT:    blendvps %xmm0, (%rdi), %xmm3 # sched: [8:2.00]
+; GENERIC-NEXT:    blendvps %xmm0, %xmm1, %xmm3 # sched: [2:1.00]
+; GENERIC-NEXT:    blendvps %xmm0, (%rdi), %xmm3 # sched: [8:1.00]
 ; GENERIC-NEXT:    movaps %xmm3, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -169,8 +169,8 @@ define <4 x float> @test_blendvps(<4 x f
 ;
 ; SANDY-LABEL: test_blendvps:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00]
-; SANDY-NEXT:    vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:2.00]
+; SANDY-NEXT:    vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
+; SANDY-NEXT:    vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_blendvps:
@@ -555,7 +555,7 @@ define <2 x i64> @test_pcmpeqq(<2 x i64>
 ;
 ; SANDY-LABEL: test_pcmpeqq:
 ; SANDY:       # BB#0:
-; SANDY-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SANDY-NEXT:    vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1294,7 +1294,7 @@ define <8 x i16> @test_pmovsxbw(<16 x i8
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vpmovsxbw %xmm0, %xmm0 # sched: [1:0.50]
 ; SANDY-NEXT:    vpmovsxbw (%rdi), %xmm1 # sched: [7:0.50]
-; SANDY-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pmovsxbw:
@@ -1600,7 +1600,7 @@ define <8 x i16> @test_pmovzxbw(<16 x i8
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:0.50]
 ; SANDY-NEXT:    vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [7:0.50]
-; SANDY-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pmovzxbw:
@@ -1976,9 +1976,9 @@ define i32 @test_ptest(<2 x i64> %a0, <2
 ; GENERIC-LABEL: test_ptest:
 ; GENERIC:       # BB#0:
 ; GENERIC-NEXT:    ptest %xmm1, %xmm0 # sched: [2:1.00]
-; GENERIC-NEXT:    setb %al # sched: [1:1.00]
+; GENERIC-NEXT:    setb %al # sched: [1:0.50]
 ; GENERIC-NEXT:    ptest (%rdi), %xmm0 # sched: [8:1.00]
-; GENERIC-NEXT:    setb %cl # sched: [1:1.00]
+; GENERIC-NEXT:    setb %cl # sched: [1:0.50]
 ; GENERIC-NEXT:    andb %al, %cl # sched: [1:0.33]
 ; GENERIC-NEXT:    movzbl %cl, %eax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -1996,9 +1996,9 @@ define i32 @test_ptest(<2 x i64> %a0, <2
 ; SANDY-LABEL: test_ptest:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    vptest %xmm1, %xmm0 # sched: [2:1.00]
-; SANDY-NEXT:    setb %al # sched: [1:1.00]
+; SANDY-NEXT:    setb %al # sched: [1:0.50]
 ; SANDY-NEXT:    vptest (%rdi), %xmm0 # sched: [8:1.00]
-; SANDY-NEXT:    setb %cl # sched: [1:1.00]
+; SANDY-NEXT:    setb %cl # sched: [1:0.50]
 ; SANDY-NEXT:    andb %al, %cl # sched: [1:0.33]
 ; SANDY-NEXT:    movzbl %cl, %eax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]

Modified: llvm/trunk/test/CodeGen/X86/sse42-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse42-schedule.ll?rev=310792&r1=310791&r2=310792&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse42-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse42-schedule.ll Sun Aug 13 06:59:24 2017
@@ -61,7 +61,7 @@ define i32 @crc32_32_16(i32 %a0, i16 %a1
 ; GENERIC-LABEL: crc32_32_16:
 ; GENERIC:       # BB#0:
 ; GENERIC-NEXT:    crc32w %si, %edi # sched: [3:1.00]
-; GENERIC-NEXT:    crc32w (%rdx), %edi # sched: [8:1.00]
+; GENERIC-NEXT:    crc32w (%rdx), %edi # sched: [7:1.00]
 ; GENERIC-NEXT:    movl %edi, %eax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -75,7 +75,7 @@ define i32 @crc32_32_16(i32 %a0, i16 %a1
 ; SANDY-LABEL: crc32_32_16:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    crc32w %si, %edi # sched: [3:1.00]
-; SANDY-NEXT:    crc32w (%rdx), %edi # sched: [8:1.00]
+; SANDY-NEXT:    crc32w (%rdx), %edi # sched: [7:1.00]
 ; SANDY-NEXT:    movl %edi, %eax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
@@ -208,7 +208,7 @@ define i64 @crc32_64_64(i64 %a0, i64 %a1
 ; GENERIC-LABEL: crc32_64_64:
 ; GENERIC:       # BB#0:
 ; GENERIC-NEXT:    crc32q %rsi, %rdi # sched: [3:1.00]
-; GENERIC-NEXT:    crc32q (%rdx), %rdi # sched: [7:1.00]
+; GENERIC-NEXT:    crc32q (%rdx), %rdi # sched: [8:1.00]
 ; GENERIC-NEXT:    movq %rdi, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -222,7 +222,7 @@ define i64 @crc32_64_64(i64 %a0, i64 %a1
 ; SANDY-LABEL: crc32_64_64:
 ; SANDY:       # BB#0:
 ; SANDY-NEXT:    crc32q %rsi, %rdi # sched: [3:1.00]
-; SANDY-NEXT:    crc32q (%rdx), %rdi # sched: [7:1.00]
+; SANDY-NEXT:    crc32q (%rdx), %rdi # sched: [8:1.00]
 ; SANDY-NEXT:    movq %rdi, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;

Modified: llvm/trunk/test/CodeGen/X86/x86-cmov-converter.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/x86-cmov-converter.ll?rev=310792&r1=310791&r2=310792&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/x86-cmov-converter.ll (original)
+++ llvm/trunk/test/CodeGen/X86/x86-cmov-converter.ll Sun Aug 13 06:59:24 2017
@@ -3,7 +3,7 @@
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;; This test checks that x86-cmov-converter optimization transform CMOV
 ;; instruction into branches when it is profitable.
-;; There are 6 cases below:
+;; There are 5 cases below:
 ;;   1. CmovInCriticalPath:
 ;;        CMOV depends on the condition and it is in the hot path.
 ;;        Thus, it worths transforming.
@@ -26,11 +26,6 @@
 ;;        Usually, binary search CMOV is not predicted.
 ;;        Thus, it does not worth transforming.
 ;;
-;;   6. SmallGainPerLoop:
-;;        The gain percentage from converting CMOV into branch is acceptable,
-;;        however, the absolute gain is smaller than a threshold.
-;;        Thus, it does not worth transforming.
-;;
 ;; Test was created using the following command line:
 ;; > clang -S -O2 -m64 -fno-vectorize -fno-unroll-loops -emit-llvm foo.c -o -
 ;; Where foo.c is:
@@ -270,35 +265,6 @@ while.end:
   ret i32 %.lcssa
 }
 
-; CHECK-LABEL: SmallGainPerLoop
-; CHECK-NOT: jg
-; CHECK: cmovg
-
-define void @SmallGainPerLoop(i32 %n, i32 %a, i32 %b, i32* nocapture %c, i32* nocapture readnone %d) #0 {
-entry:
-  %cmp14 = icmp sgt i32 %n, 0
-  br i1 %cmp14, label %for.body.preheader, label %for.cond.cleanup
-
-for.body.preheader:                               ; preds = %entry
-  %wide.trip.count = zext i32 %n to i64
-  br label %for.body
-
-for.cond.cleanup:                                 ; preds = %for.body, %entry
-  ret void
-
-for.body:                                         ; preds = %for.body.preheader, %for.body
-  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds i32, i32* %c, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
-  %mul = mul nsw i32 %0, %a
-  %cmp3 = icmp sgt i32 %mul, %b
-  %. = select i1 %cmp3, i32 10, i32 %0
-  store i32 %., i32* %arrayidx, align 4
-  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-  %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
-  br i1 %exitcond, label %for.cond.cleanup, label %for.body
-}
-
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;; The following test checks that x86-cmov-converter optimization transforms
 ;; CMOV instructions into branch correctly.




More information about the llvm-commits mailing list