[llvm] r331999 - [X86] Split WriteVecALU/WriteVecLogic/WriteShuffle/WriteVarShuffle/WritePSADBW/WritePHAdd scheduler classes

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Thu May 10 10:06:09 PDT 2018


Author: rksimon
Date: Thu May 10 10:06:09 2018
New Revision: 331999

URL: http://llvm.org/viewvc/llvm-project?rev=331999&view=rev
Log:
[X86] Split WriteVecALU/WriteVecLogic/WriteShuffle/WriteVarShuffle/WritePSADBW/WritePHAdd scheduler classes

Split off XMM classes from the default (MMX) classes.

Modified:
    llvm/trunk/lib/Target/X86/X86InstrAVX512.td
    llvm/trunk/lib/Target/X86/X86InstrXOP.td
    llvm/trunk/lib/Target/X86/X86SchedBroadwell.td
    llvm/trunk/lib/Target/X86/X86SchedHaswell.td
    llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td
    llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td
    llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td
    llvm/trunk/lib/Target/X86/X86Schedule.td
    llvm/trunk/lib/Target/X86/X86ScheduleAtom.td
    llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td
    llvm/trunk/lib/Target/X86/X86ScheduleSLM.td
    llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td
    llvm/trunk/test/CodeGen/X86/3dnow-schedule.ll
    llvm/trunk/test/CodeGen/X86/avx2-schedule.ll
    llvm/trunk/test/CodeGen/X86/avx512-shuffle-schedule.ll
    llvm/trunk/test/CodeGen/X86/xop-schedule.ll

Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=331999&r1=331998&r2=331999&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Thu May 10 10:06:09 2018
@@ -1333,7 +1333,7 @@ multiclass avx512_int_broadcast_rm_vl<bi
                 avx512_int_broadcast_rm_lowering<_.info256, _.info256>,
                                  EVEX_V256;
     defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle,
-                                    WriteShuffleLd, _.info128, _.info128>,
+                                    WriteShuffleXLd, _.info128, _.info128>,
                                  EVEX_V128;
   }
 }
@@ -1353,7 +1353,8 @@ multiclass avx512_subvec_broadcast_rm<bi
                            (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
                            (_Dst.VT (X86SubVBroadcast
                              (_Src.VT (bitconvert (_Src.LdFrag addr:$src)))))>,
-                           AVX5128IBase, EVEX, Sched<[WriteShuffleLd]>;
+                           Sched<[SchedWriteShuffle.YMM.Folded]>,
+                           AVX5128IBase, EVEX;
 }
 
 // This should be used for the AVX512DQ broadcast instructions. It disables
@@ -1367,7 +1368,8 @@ multiclass avx512_subvec_broadcast_rm_dq
                            (null_frag),
                            (_Dst.VT (X86SubVBroadcast
                              (_Src.VT (bitconvert (_Src.LdFrag addr:$src)))))>,
-                            AVX5128IBase, EVEX, Sched<[WriteShuffleLd]>;
+                           Sched<[SchedWriteShuffle.YMM.Folded]>,
+                           AVX5128IBase, EVEX;
 }
 
 let Predicates = [HasAVX512] in {
@@ -1646,7 +1648,7 @@ multiclass avx512_common_broadcast_i32x2
 
   let Predicates = [HasDQI, HasVLX] in
     defm Z128 : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle,
-                                          WriteShuffleLd, _Dst.info128,
+                                          WriteShuffleXLd, _Dst.info128,
                                           _Src.info128, _Src.info128, null_frag>,
                                           EVEX_V128;
 }

Modified: llvm/trunk/lib/Target/X86/X86InstrXOP.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrXOP.td?rev=331999&r1=331998&r2=331999&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrXOP.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrXOP.td Thu May 10 10:06:09 2018
@@ -14,11 +14,11 @@
 multiclass xop2op<bits<8> opc, string OpcodeStr, Intrinsic Int, PatFrag memop> {
   def rr : IXOP<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
            !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
-           [(set VR128:$dst, (Int VR128:$src))]>, XOP, Sched<[WritePHAdd]>;
+           [(set VR128:$dst, (Int VR128:$src))]>, XOP, Sched<[SchedWritePHAdd.XMM]>;
   def rm : IXOP<opc, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
            !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
            [(set VR128:$dst, (Int (bitconvert (memop addr:$src))))]>, XOP,
-           Sched<[WritePHAdd.Folded, ReadAfterLd]>;
+           Sched<[SchedWritePHAdd.XMM.Folded, ReadAfterLd]>;
 }
 
 let ExeDomain = SSEPackedInt in {

Modified: llvm/trunk/lib/Target/X86/X86SchedBroadwell.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedBroadwell.td?rev=331999&r1=331998&r2=331999&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedBroadwell.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedBroadwell.td Thu May 10 10:06:09 2018
@@ -265,8 +265,10 @@ defm : X86WriteRes<WriteVecMove,
 defm : X86WriteRes<WriteEMMS,            [BWPort01,BWPort15,BWPort015,BWPort0156], 31, [8,1,21,1], 31>;
 
 defm : BWWriteResPair<WriteVecALU,   [BWPort15], 1, [1], 1, 5>; // Vector integer ALU op, no logicals.
+defm : BWWriteResPair<WriteVecALUX,  [BWPort15], 1, [1], 1, 5>; // Vector integer ALU op, no logicals.
 defm : BWWriteResPair<WriteVecALUY,  [BWPort15], 1, [1], 1, 6>; // Vector integer ALU op, no logicals (YMM/ZMM).
 defm : BWWriteResPair<WriteVecLogic, [BWPort015], 1, [1], 1, 5>; // Vector integer and/or/xor.
+defm : BWWriteResPair<WriteVecLogicX,[BWPort015], 1, [1], 1, 5>; // Vector integer and/or/xor.
 defm : BWWriteResPair<WriteVecLogicY,[BWPort015], 1, [1], 1, 6>; // Vector integer and/or/xor (YMM/ZMM).
 defm : BWWriteResPair<WriteVecTest,  [BWPort0,BWPort5], 2, [1,1], 2, 5>; // Vector integer TEST instructions.
 defm : BWWriteResPair<WriteVecTestY, [BWPort0,BWPort5], 4, [1,1], 2, 6>; // Vector integer TEST instructions (YMM/ZMM).
@@ -276,8 +278,10 @@ defm : BWWriteResPair<WriteVecIMulY, [BW
 defm : BWWriteResPair<WritePMULLD,   [BWPort0], 10, [2], 2, 5>; // Vector PMULLD.
 defm : BWWriteResPair<WritePMULLDY,  [BWPort0], 10, [2], 2, 6>; // Vector PMULLD (YMM/ZMM).
 defm : BWWriteResPair<WriteShuffle,  [BWPort5], 1, [1], 1, 5>; // Vector shuffles.
+defm : BWWriteResPair<WriteShuffleX, [BWPort5], 1, [1], 1, 5>; // Vector shuffles.
 defm : BWWriteResPair<WriteShuffleY, [BWPort5], 1, [1], 1, 6>; // Vector shuffles (YMM/ZMM).
 defm : BWWriteResPair<WriteVarShuffle, [BWPort5], 1, [1], 1, 5>; // Vector variable shuffles.
+defm : BWWriteResPair<WriteVarShuffleX,[BWPort5], 1, [1], 1, 5>; // Vector variable shuffles.
 defm : BWWriteResPair<WriteVarShuffleY,[BWPort5], 1, [1], 1, 6>; // Vector variable shuffles (YMM/ZMM).
 defm : BWWriteResPair<WriteBlend,  [BWPort5], 1, [1], 1, 5>; // Vector blends.
 defm : BWWriteResPair<WriteBlendY, [BWPort5], 1, [1], 1, 6>; // Vector blends (YMM/ZMM).
@@ -286,6 +290,7 @@ defm : BWWriteResPair<WriteVarBlendY, [B
 defm : BWWriteResPair<WriteMPSAD,  [BWPort0, BWPort5], 7, [1, 2], 3, 5>; // Vector MPSAD.
 defm : BWWriteResPair<WriteMPSADY, [BWPort0, BWPort5], 7, [1, 2], 3, 6>; // Vector MPSAD.
 defm : BWWriteResPair<WritePSADBW,   [BWPort0], 5, [1], 1, 5>; // Vector PSADBW.
+defm : BWWriteResPair<WritePSADBWX,  [BWPort0], 5, [1], 1, 5>; // Vector PSADBW.
 defm : BWWriteResPair<WritePSADBWY,  [BWPort0], 5, [1], 1, 6>; // Vector PSADBW (YMM/ZMM).
 defm : BWWriteResPair<WritePHMINPOS, [BWPort0], 5>; // Vector PHMINPOS.
 
@@ -448,6 +453,7 @@ def : WriteRes<WriteNop, []>;
 defm : BWWriteResPair<WriteFHAdd,   [BWPort1,BWPort5], 5, [1,2], 3, 5>;
 defm : BWWriteResPair<WriteFHAddY,  [BWPort1,BWPort5], 5, [1,2], 3, 6>;
 defm : BWWriteResPair<WritePHAdd,  [BWPort5,BWPort15], 3, [2,1], 3, 5>;
+defm : BWWriteResPair<WritePHAddX, [BWPort5,BWPort15], 3, [2,1], 3, 5>;
 defm : BWWriteResPair<WritePHAddY, [BWPort5,BWPort15], 3, [2,1], 3, 6>;
 
 // Remaining instrs.

Modified: llvm/trunk/lib/Target/X86/X86SchedHaswell.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedHaswell.td?rev=331999&r1=331998&r2=331999&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedHaswell.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedHaswell.td Thu May 10 10:06:09 2018
@@ -255,11 +255,13 @@ defm : X86WriteRes<WriteVecMaskedStore,
 defm : X86WriteRes<WriteVecMaskedStoreY, [HWPort0,HWPort4,HWPort237,HWPort15], 5, [1,1,1,1], 4>;
 defm : X86WriteRes<WriteVecMove,         [HWPort015], 1, [1], 1>;
 
-defm : HWWriteResPair<WriteVecLogic, [HWPort015], 1, [1], 1, 6>;
+defm : HWWriteResPair<WriteVecLogic, [HWPort015], 1, [1], 1, 5>;
+defm : HWWriteResPair<WriteVecLogicX,[HWPort015], 1, [1], 1, 6>;
 defm : HWWriteResPair<WriteVecLogicY,[HWPort015], 1, [1], 1, 7>;
 defm : HWWriteResPair<WriteVecTest,  [HWPort0,HWPort5], 2, [1,1], 2, 6>;
 defm : HWWriteResPair<WriteVecTestY, [HWPort0,HWPort5], 4, [1,1], 2, 7>;
-defm : HWWriteResPair<WriteVecALU,   [HWPort15],  1, [1], 1, 6>;
+defm : HWWriteResPair<WriteVecALU,   [HWPort15],  1, [1], 1, 5>;
+defm : HWWriteResPair<WriteVecALUX,  [HWPort15],  1, [1], 1, 6>;
 defm : HWWriteResPair<WriteVecALUY,  [HWPort15],  1, [1], 1, 7>;
 defm : HWWriteResPair<WriteVecIMul,  [HWPort0],  5, [1], 1, 5>;
 defm : HWWriteResPair<WriteVecIMulX, [HWPort0],  5, [1], 1, 6>;
@@ -267,8 +269,10 @@ defm : HWWriteResPair<WriteVecIMulY, [HW
 defm : HWWriteResPair<WritePMULLD,   [HWPort0], 10, [2], 2, 6>;
 defm : HWWriteResPair<WritePMULLDY,  [HWPort0], 10, [2], 2, 7>;
 defm : HWWriteResPair<WriteShuffle,  [HWPort5],  1, [1], 1, 5>;
+defm : HWWriteResPair<WriteShuffleX, [HWPort5],  1, [1], 1, 6>;
 defm : HWWriteResPair<WriteShuffleY, [HWPort5],  1, [1], 1, 7>;
-defm : HWWriteResPair<WriteVarShuffle, [HWPort5], 1, [1], 1, 6>;
+defm : HWWriteResPair<WriteVarShuffle, [HWPort5], 1, [1], 1, 5>;
+defm : HWWriteResPair<WriteVarShuffleX,[HWPort5], 1, [1], 1, 6>;
 defm : HWWriteResPair<WriteVarShuffleY,[HWPort5], 1, [1], 1, 7>;
 defm : HWWriteResPair<WriteBlend,  [HWPort5], 1, [1], 1, 6>;
 defm : HWWriteResPair<WriteBlendY, [HWPort5], 1, [1], 1, 7>;
@@ -278,7 +282,8 @@ defm : HWWriteResPair<WriteVarBlend,  [H
 defm : HWWriteResPair<WriteVarBlendY, [HWPort5], 2, [2], 2, 7>;
 defm : HWWriteResPair<WriteMPSAD,  [HWPort0, HWPort5], 7, [1, 2], 3, 6>;
 defm : HWWriteResPair<WriteMPSADY, [HWPort0, HWPort5], 7, [1, 2], 3, 7>;
-defm : HWWriteResPair<WritePSADBW,  [HWPort0], 5, [1], 1, 6>;
+defm : HWWriteResPair<WritePSADBW,  [HWPort0], 5, [1], 1, 5>;
+defm : HWWriteResPair<WritePSADBWX, [HWPort0], 5, [1], 1, 6>;
 defm : HWWriteResPair<WritePSADBWY, [HWPort0], 5, [1], 1, 7>;
 defm : HWWriteResPair<WritePHMINPOS, [HWPort0],  5, [1], 1, 6>;
 
@@ -684,7 +689,8 @@ def : InstRW<[HWWriteFXTRACT], (instrs F
 
 defm : HWWriteResPair<WriteFHAdd,  [HWPort1, HWPort5], 5, [1,2], 3, 6>;
 defm : HWWriteResPair<WriteFHAddY, [HWPort1, HWPort5], 5, [1,2], 3, 7>;
-defm : HWWriteResPair<WritePHAdd,  [HWPort5, HWPort15], 3, [2,1], 3, 6>;
+defm : HWWriteResPair<WritePHAdd,  [HWPort5, HWPort15], 3, [2,1], 3, 5>;
+defm : HWWriteResPair<WritePHAddX, [HWPort5, HWPort15], 3, [2,1], 3, 6>;
 defm : HWWriteResPair<WritePHAddY, [HWPort5, HWPort15], 3, [2,1], 3, 7>;
 
 //=== Floating Point XMM and YMM Instructions ===//
@@ -913,26 +919,16 @@ def HWWriteResGroup12_2 : SchedWriteRes<
 def: InstRW<[HWWriteResGroup12_2], (instrs IMUL16m, MUL16m)>;
 
 def HWWriteResGroup13 : SchedWriteRes<[HWPort5,HWPort23]> {
-  let Latency = 7;
+  let Latency = 6;
   let NumMicroOps = 2;
   let ResourceCycles = [1,1];
 }
-def: InstRW<[HWWriteResGroup13], (instregex "(V?)PACKSSDWrm",
-                                            "(V?)PACKSSWBrm",
-                                            "(V?)PACKUSDWrm",
-                                            "(V?)PACKUSWBrm",
-                                            "(V?)PALIGNRrmi",
-                                            "(V?)PSHUFDmi",
-                                            "(V?)PSHUFHWmi",
-                                            "(V?)PSHUFLWmi",
-                                            "(V?)PUNPCKHBWrm",
-                                            "(V?)PUNPCKHDQrm",
-                                            "(V?)PUNPCKHQDQrm",
-                                            "(V?)PUNPCKHWDrm",
-                                            "(V?)PUNPCKLBWrm",
-                                            "(V?)PUNPCKLDQrm",
-                                            "(V?)PUNPCKLQDQrm",
-                                            "(V?)PUNPCKLWDrm")>;
+def: InstRW<[HWWriteResGroup13], (instregex "(V?)PMOV(SX|ZX)BDrm",
+                                            "(V?)PMOV(SX|ZX)BQrm",
+                                            "(V?)PMOV(SX|ZX)BWrm",
+                                            "(V?)PMOV(SX|ZX)DQrm",
+                                            "(V?)PMOV(SX|ZX)WDrm",
+                                            "(V?)PMOV(SX|ZX)WQrm")>;
 
 def HWWriteResGroup13_1 : SchedWriteRes<[HWPort5,HWPort23]> {
   let Latency = 8;
@@ -943,13 +939,6 @@ def: InstRW<[HWWriteResGroup13_1], (inst
                                               "VPMOVSXBQYrm",
                                               "VPMOVSXWQYrm")>;
 
-def HWWriteResGroup13_2 : SchedWriteRes<[HWPort5,HWPort23]> {
-  let Latency = 6;
-  let NumMicroOps = 2;
-  let ResourceCycles = [1,1];
-}
-def: InstRW<[HWWriteResGroup13_2], (instregex "MMX_PSHUFBrm")>;
-
 def HWWriteResGroup14 : SchedWriteRes<[HWPort6,HWPort23]> {
   let Latency = 6;
   let NumMicroOps = 2;
@@ -974,14 +963,7 @@ def: InstRW<[HWWriteResGroup16], (instre
                                             "BLSI(32|64)rm",
                                             "BLSMSK(32|64)rm",
                                             "BLSR(32|64)rm",
-                                            "MOVBE(16|32|64)rm",
-                                            "MMX_PABS(B|D|W)rm",
-                                            "MMX_P(ADD|SUB)(B|D|W|Q)irm",
-                                            "MMX_P(ADD|SUB)(U?)S(B|W)irm",
-                                            "MMX_PAVG(B|W)irm",
-                                            "MMX_PCMP(EQ|GT)(B|D|W)irm",
-                                            "MMX_P(MAX|MIN)(SW|UB)irm",
-                                            "MMX_PSIGN(B|D|W)rm")>;
+                                            "MOVBE(16|32|64)rm")>;
 
 def HWWriteResGroup17 : SchedWriteRes<[HWPort23,HWPort015]> {
   let Latency = 7;
@@ -992,16 +974,6 @@ def: InstRW<[HWWriteResGroup17], (instre
                                             "VINSERTI128rm",
                                             "VPBLENDDrmi")>;
 
-def HWWriteResGroup17_1 : SchedWriteRes<[HWPort23,HWPort015]> {
-  let Latency = 6;
-  let NumMicroOps = 2;
-  let ResourceCycles = [1,1];
-}
-def: InstRW<[HWWriteResGroup17_1], (instregex "MMX_PANDNirm",
-                                              "MMX_PANDirm",
-                                              "MMX_PORirm",
-                                              "MMX_PXORirm")>;
-
 def HWWriteResGroup17_2 : SchedWriteRes<[HWPort23,HWPort015]> {
   let Latency = 8;
   let NumMicroOps = 2;
@@ -1356,13 +1328,6 @@ def HWWriteResGroup62 : SchedWriteRes<[H
 def: InstRW<[HWWriteResGroup62], (instregex "IST(T?)_FP(16|32|64)m",
                                             "IST_F(16|32)m")>;
 
-def HWWriteResGroup64 : SchedWriteRes<[HWPort5,HWPort23,HWPort15]> {
-  let Latency = 8;
-  let NumMicroOps = 4;
-  let ResourceCycles = [2,1,1];
-}
-def: InstRW<[HWWriteResGroup64], (instregex "MMX_PH(ADD|SUB)(D|SW|W)rm")>;
-
 def HWWriteResGroup65 : SchedWriteRes<[HWPort23,HWPort06,HWPort0156]> {
   let Latency = 8;
   let NumMicroOps = 4;
@@ -1594,13 +1559,6 @@ def HWWriteResGroup91_3 : SchedWriteRes<
 def: InstRW<[HWWriteResGroup91_3], (instregex "MUL_F(32|64)m",
                                               "VPCMPGTQYrm")>;
 
-def HWWriteResGroup91_5 : SchedWriteRes<[HWPort0,HWPort23]> {
-  let Latency = 10;
-  let NumMicroOps = 2;
-  let ResourceCycles = [1,1];
-}
-def: InstRW<[HWWriteResGroup91_5], (instregex "MMX_PSADBWirm")>;
-
 def HWWriteResGroup93 : SchedWriteRes<[HWPort1,HWPort5]> {
   let Latency = 5;
   let NumMicroOps = 3;

Modified: llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td?rev=331999&r1=331998&r2=331999&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td Thu May 10 10:06:09 2018
@@ -235,11 +235,13 @@ defm : X86WriteRes<WriteVecMaskedStore,
 defm : X86WriteRes<WriteVecMaskedStoreY, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
 defm : X86WriteRes<WriteVecMove,         [SBPort05], 1, [1], 1>;
 
-defm : SBWriteResPair<WriteVecLogic, [SBPort015], 1, [1], 1, 6>;
+defm : SBWriteResPair<WriteVecLogic, [SBPort015], 1, [1], 1, 5>;
+defm : SBWriteResPair<WriteVecLogicX,[SBPort015], 1, [1], 1, 6>;
 defm : SBWriteResPair<WriteVecLogicY,[SBPort015], 1, [1], 1, 7>;
 defm : SBWriteResPair<WriteVecTest,  [SBPort0,SBPort5], 2, [1,1], 2, 6>;
 defm : SBWriteResPair<WriteVecTestY, [SBPort0,SBPort5], 2, [1,1], 2, 7>;
-defm : SBWriteResPair<WriteVecALU,   [SBPort15], 1, [1], 1, 6>;
+defm : SBWriteResPair<WriteVecALU,   [SBPort1],  3, [1], 1, 5>;
+defm : SBWriteResPair<WriteVecALUX,  [SBPort15], 1, [1], 1, 6>;
 defm : SBWriteResPair<WriteVecALUY,  [SBPort15], 1, [1], 1, 7>;
 defm : SBWriteResPair<WriteVecIMul,  [SBPort0], 5, [1], 1, 5>;
 defm : SBWriteResPair<WriteVecIMulX, [SBPort0], 5, [1], 1, 6>;
@@ -247,8 +249,10 @@ defm : SBWriteResPair<WriteVecIMulY, [SB
 defm : SBWriteResPair<WritePMULLD,   [SBPort0], 5, [1], 1, 6>;
 defm : SBWriteResPair<WritePMULLDY,  [SBPort0], 5, [1], 1, 7>; // TODO this is probably wrong for 256/512-bit for the "generic" model
 defm : SBWriteResPair<WriteShuffle,  [SBPort5], 1, [1], 1, 5>;
+defm : SBWriteResPair<WriteShuffleX, [SBPort15], 1, [1], 1, 6>;
 defm : SBWriteResPair<WriteShuffleY, [SBPort5], 1, [1], 1, 7>;
-defm : SBWriteResPair<WriteVarShuffle,  [SBPort15], 1, [1], 1, 6>;
+defm : SBWriteResPair<WriteVarShuffle,  [SBPort15], 1, [1], 1, 5>;
+defm : SBWriteResPair<WriteVarShuffleX, [SBPort15], 1, [1], 1, 6>;
 defm : SBWriteResPair<WriteVarShuffleY, [SBPort15], 1, [1], 1, 7>;
 defm : SBWriteResPair<WriteBlend,   [SBPort15], 1, [1], 1, 6>;
 defm : SBWriteResPair<WriteBlendY,  [SBPort15], 1, [1], 1, 7>;
@@ -256,7 +260,8 @@ defm : SBWriteResPair<WriteVarBlend, [SB
 defm : SBWriteResPair<WriteVarBlendY,[SBPort15], 2, [2], 2, 7>;
 defm : SBWriteResPair<WriteMPSAD,  [SBPort0, SBPort15], 7, [1,2], 3, 6>;
 defm : SBWriteResPair<WriteMPSADY, [SBPort0, SBPort15], 7, [1,2], 3, 7>;
-defm : SBWriteResPair<WritePSADBW,  [SBPort0], 5, [1], 1, 6>;
+defm : SBWriteResPair<WritePSADBW,  [SBPort0], 5, [1], 1, 5>;
+defm : SBWriteResPair<WritePSADBWX, [SBPort0], 5, [1], 1, 6>;
 defm : SBWriteResPair<WritePSADBWY, [SBPort0], 5, [1], 1, 7>;
 defm : SBWriteResPair<WritePHMINPOS,  [SBPort0], 5, [1], 1, 6>;
 
@@ -295,7 +300,8 @@ def : WriteRes<WriteVecExtractSt, [SBPor
 
 defm : SBWriteResPair<WriteFHAdd,  [SBPort1,SBPort5], 5, [1,2], 3, 6>;
 defm : SBWriteResPair<WriteFHAddY, [SBPort1,SBPort5], 5, [1,2], 3, 7>;
-defm : SBWriteResPair<WritePHAdd,  [SBPort15], 3, [3], 3, 6>;
+defm : SBWriteResPair<WritePHAdd,  [SBPort15], 3, [3], 3, 5>;
+defm : SBWriteResPair<WritePHAddX, [SBPort15], 3, [3], 3, 6>;
 defm : SBWriteResPair<WritePHAddY, [SBPort15], 3, [3], 3, 7>;
 
 ////////////////////////////////////////////////////////////////////////////////
@@ -471,37 +477,10 @@ def SBWriteResGroup5 : SchedWriteRes<[SB
   let NumMicroOps = 1;
   let ResourceCycles = [1];
 }
-def: InstRW<[SBWriteResGroup5], (instregex "MMX_PALIGNRrri",
-                                           "(V?)PACKSSDWrr",
-                                           "(V?)PACKSSWBrr",
-                                           "(V?)PACKUSDWrr",
-                                           "(V?)PACKUSWBrr",
-                                           "(V?)PALIGNRrri",
-                                           "(V?)PMOVSXBDrr",
-                                           "(V?)PMOVSXBQrr",
-                                           "(V?)PMOVSXBWrr",
-                                           "(V?)PMOVSXDQrr",
-                                           "(V?)PMOVSXWDrr",
-                                           "(V?)PMOVSXWQrr",
-                                           "(V?)PMOVZXBDrr",
-                                           "(V?)PMOVZXBQrr",
-                                           "(V?)PMOVZXBWrr",
-                                           "(V?)PMOVZXDQrr",
-                                           "(V?)PMOVZXWDrr",
-                                           "(V?)PMOVZXWQrr",
-                                           "(V?)PSHUFDri",
-                                           "(V?)PSHUFHWri",
-                                           "(V?)PSHUFLWri",
-                                           "(V?)PSLLDQri",
-                                           "(V?)PSRLDQri",
-                                           "(V?)PUNPCKHBWrr",
-                                           "(V?)PUNPCKHDQrr",
-                                           "(V?)PUNPCKHQDQrr",
-                                           "(V?)PUNPCKHWDrr",
-                                           "(V?)PUNPCKLBWrr",
-                                           "(V?)PUNPCKLDQrr",
-                                           "(V?)PUNPCKLQDQrr",
-                                           "(V?)PUNPCKLWDrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "MMX_PABS(B|D|W)rr",
+                                           "MMX_PADDQirr",
+                                           "MMX_PALIGNRrri",
+                                           "MMX_PSIGN(B|D|W)rr")>;
 
 def SBWriteResGroup6 : SchedWriteRes<[SBPort015]> {
   let Latency = 1;
@@ -608,12 +587,6 @@ def SBWriteResGroup21 : SchedWriteRes<[S
   let ResourceCycles = [1];
 }
 def: InstRW<[SBWriteResGroup21], (instregex "MMX_CVTPI2PSirr",
-                                            "MMX_PADD(B|D|W)irr",
-                                            "MMX_P(ADD|SUB)(U?)S(B|W)irr",
-                                            "MMX_PAVG(B|W)irr",
-                                            "MMX_PCMP(EQ|GT)(B|D|W)irr",
-                                            "MMX_P(MAX|MIN)(SW|UB)irr",
-                                            "MMX_PSUB(B|D|Q|W)irr",
                                             "PUSHFS64",
                                             "(V?)CVTDQ2PS(Y?)rr")>;
 
@@ -884,7 +857,6 @@ def SBWriteResGroup51 : SchedWriteRes<[S
 }
 def: InstRW<[SBWriteResGroup51], (instregex "MMX_PABS(B|D|W)rm",
                                             "MMX_PALIGNRrmi",
-                                            "MMX_PSHUFBrm",
                                             "MMX_PSIGN(B|D|W)rm")>;
 
 def SBWriteResGroup52 : SchedWriteRes<[SBPort23,SBPort015]> {
@@ -893,11 +865,7 @@ def SBWriteResGroup52 : SchedWriteRes<[S
   let ResourceCycles = [1,1];
 }
 def: InstRW<[SBWriteResGroup52], (instregex "LODSL",
-                                            "LODSQ",
-                                            "MMX_PANDirm",
-                                            "MMX_PANDNirm",
-                                            "MMX_PORirm",
-                                            "MMX_PXORirm")>;
+                                            "LODSQ")>;
 
 def SBWriteResGroup53 : SchedWriteRes<[SBPort4,SBPort23]> {
   let Latency = 6;
@@ -944,46 +912,7 @@ def SBWriteResGroup59 : SchedWriteRes<[S
   let NumMicroOps = 2;
   let ResourceCycles = [1,1];
 }
-def: InstRW<[SBWriteResGroup59], (instregex "(V?)PACKSSDWrm",
-                                            "(V?)PACKSSWBrm",
-                                            "(V?)PACKUSDWrm",
-                                            "(V?)PACKUSWBrm",
-                                            "(V?)PALIGNRrmi",
-                                            "(V?)PMOVSXBDrm",
-                                            "(V?)PMOVSXBQrm",
-                                            "(V?)PMOVSXBWrm",
-                                            "(V?)PMOVSXDQrm",
-                                            "(V?)PMOVSXWDrm",
-                                            "(V?)PMOVSXWQrm",
-                                            "(V?)PMOVZXBDrm",
-                                            "(V?)PMOVZXBQrm",
-                                            "(V?)PMOVZXBWrm",
-                                            "(V?)PMOVZXDQrm",
-                                            "(V?)PMOVZXWDrm",
-                                            "(V?)PMOVZXWQrm",
-                                            "(V?)PSHUFDmi",
-                                            "(V?)PSHUFHWmi",
-                                            "(V?)PSHUFLWmi",
-                                            "(V?)PUNPCKHBWrm",
-                                            "(V?)PUNPCKHDQrm",
-                                            "(V?)PUNPCKHQDQrm",
-                                            "(V?)PUNPCKHWDrm",
-                                            "(V?)PUNPCKLBWrm",
-                                            "(V?)PUNPCKLDQrm",
-                                            "(V?)PUNPCKLQDQrm",
-                                            "(V?)PUNPCKLWDrm")>;
-
-def SBWriteResGroup59a : SchedWriteRes<[SBPort23,SBPort1]> {
-  let Latency = 8;
-  let NumMicroOps = 2;
-  let ResourceCycles = [1,1];
-}
-def: InstRW<[SBWriteResGroup59a], (instregex "MMX_PADD(B|D|W)irm",
-                                             "MMX_P(ADD|SUB)(U?)S(B|W)irm",
-                                             "MMX_PAVG(B|W)irm",
-                                             "MMX_PCMP(EQ|GT)(B|D|W)irm",
-                                             "MMX_P(MAX|MIN)(SW|UB)irm",
-                                             "MMX_PSUB(B|D|Q|W)irm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "MMX_PADDQirm")>;
 
 def SBWriteResGroup62 : SchedWriteRes<[SBPort5,SBPort23]> {
   let Latency = 7;
@@ -1060,13 +989,6 @@ def SBWriteResGroup77 : SchedWriteRes<[S
 }
 def: InstRW<[SBWriteResGroup77], (instregex "(V?)(U?)COMI(SD|SS)rm")>;
 
-def SBWriteResGroup80 : SchedWriteRes<[SBPort23,SBPort15]> {
-  let Latency = 8;
-  let NumMicroOps = 4;
-  let ResourceCycles = [1,3];
-}
-def: InstRW<[SBWriteResGroup80], (instregex "MMX_PH(ADD|SUB)(D|SW|W)rm")>;
-
 def SBWriteResGroup81 : SchedWriteRes<[SBPort23,SBPort015]> {
   let Latency = 8;
   let NumMicroOps = 4;
@@ -1134,13 +1056,6 @@ def SBWriteResGroup88 : SchedWriteRes<[S
 def: InstRW<[SBWriteResGroup88], (instregex "SHLD(16|32|64)mri8",
                                             "SHRD(16|32|64)mri8")>;
 
-def SBWriteResGroup89_2 : SchedWriteRes<[SBPort0,SBPort23]> {
-  let Latency = 10;
-  let NumMicroOps = 2;
-  let ResourceCycles = [1,1];
-}
-def: InstRW<[SBWriteResGroup89_2], (instregex "MMX_PSADBWirm")>;
-
 def SBWriteResGroup90 : SchedWriteRes<[SBPort1,SBPort23]> {
   let Latency = 9;
   let NumMicroOps = 2;

Modified: llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td?rev=331999&r1=331998&r2=331999&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td Thu May 10 10:06:09 2018
@@ -256,9 +256,11 @@ defm : X86WriteRes<WriteVecMaskedStore,
 defm : X86WriteRes<WriteVecMaskedStoreY, [SKLPort237,SKLPort0], 2, [1,1], 2>;
 defm : X86WriteRes<WriteVecMove,         [SKLPort015], 1, [1], 1>;
 
-defm : SKLWriteResPair<WriteVecALU,   [SKLPort01], 1, [1], 1, 6>; // Vector integer ALU op, no logicals.
+defm : SKLWriteResPair<WriteVecALU,   [SKLPort05], 1, [1], 1, 5>; // Vector integer ALU op, no logicals.
+defm : SKLWriteResPair<WriteVecALUX,  [SKLPort01], 1, [1], 1, 6>; // Vector integer ALU op, no logicals (XMM).
 defm : SKLWriteResPair<WriteVecALUY,  [SKLPort01], 1, [1], 1, 7>; // Vector integer ALU op, no logicals (YMM/ZMM).
-defm : SKLWriteResPair<WriteVecLogic, [SKLPort015], 1, [1], 1, 6>; // Vector integer and/or/xor.
+defm : SKLWriteResPair<WriteVecLogic, [SKLPort05],  1, [1], 1, 5>; // Vector integer and/or/xor.
+defm : SKLWriteResPair<WriteVecLogicX,[SKLPort015], 1, [1], 1, 6>; // Vector integer and/or/xor (XMM).
 defm : SKLWriteResPair<WriteVecLogicY,[SKLPort015], 1, [1], 1, 7>; // Vector integer and/or/xor (YMM/ZMM).
 defm : SKLWriteResPair<WriteVecTest,  [SKLPort0,SKLPort5], 3, [1,1], 2, 6>; // Vector integer TEST instructions.
 defm : SKLWriteResPair<WriteVecTestY, [SKLPort0,SKLPort5], 3, [1,1], 2, 7>; // Vector integer TEST instructions (YMM/ZMM).
@@ -268,17 +270,20 @@ defm : SKLWriteResPair<WriteVecIMulY, [S
 defm : SKLWriteResPair<WritePMULLD,   [SKLPort01], 10, [2], 2, 6>; // Vector PMULLD.
 defm : SKLWriteResPair<WritePMULLDY,  [SKLPort01], 10, [2], 2, 7>; // Vector PMULLD (YMM/ZMM).
 defm : SKLWriteResPair<WriteShuffle,  [SKLPort5], 1, [1], 1, 5>; // Vector shuffles.
+defm : SKLWriteResPair<WriteShuffleX, [SKLPort5], 1, [1], 1, 6>; // Vector shuffles (XMM).
 defm : SKLWriteResPair<WriteShuffleY, [SKLPort5], 1, [1], 1, 7>; // Vector shuffles (YMM/ZMM).
-defm : SKLWriteResPair<WriteVarShuffle,  [SKLPort5], 1, [1], 1, 6>; // Vector shuffles.
+defm : SKLWriteResPair<WriteVarShuffle,  [SKLPort5], 1, [1], 1, 5>; // Vector shuffles.
+defm : SKLWriteResPair<WriteVarShuffleX, [SKLPort5], 1, [1], 1, 6>; // Vector shuffles (XMM).
 defm : SKLWriteResPair<WriteVarShuffleY, [SKLPort5], 1, [1], 1, 7>; // Vector shuffles (YMM/ZMM).
 defm : SKLWriteResPair<WriteBlend,  [SKLPort5], 1, [1], 1, 6>; // Vector blends.
 defm : SKLWriteResPair<WriteBlendY, [SKLPort5], 1, [1], 1, 7>; // Vector blends (YMM/ZMM).
 defm : SKLWriteResPair<WriteVarBlend,  [SKLPort015], 2, [2], 2, 6>; // Vector variable blends.
 defm : SKLWriteResPair<WriteVarBlendY, [SKLPort015], 2, [2], 2, 6>; // Vector variable blends (YMM/ZMM).
 defm : SKLWriteResPair<WriteMPSAD,  [SKLPort5], 4, [2], 2, 6>; // Vector MPSAD.
-defm : SKLWriteResPair<WriteMPSADY, [SKLPort5], 4, [2], 2, 7>; // Vector MPSAD.
-defm : SKLWriteResPair<WritePSADBW,  [SKLPort5], 3, [1], 1, 6>; // Vector PSADBW.
-defm : SKLWriteResPair<WritePSADBWY, [SKLPort5], 3, [1], 1, 7>; // Vector PSADBW.
+defm : SKLWriteResPair<WriteMPSADY, [SKLPort5], 4, [2], 2, 7>; // Vector MPSAD (YMM/ZMM).
+defm : SKLWriteResPair<WritePSADBW,  [SKLPort5], 3, [1], 1, 5>; // Vector PSADBW.
+defm : SKLWriteResPair<WritePSADBWX, [SKLPort5], 3, [1], 1, 6>; // Vector PSADBW (XMM).
+defm : SKLWriteResPair<WritePSADBWY, [SKLPort5], 3, [1], 1, 7>; // Vector PSADBW (YMM/ZMM).
 defm : SKLWriteResPair<WritePHMINPOS, [SKLPort01], 4, [1], 1, 6>; // Vector PHMINPOS.
 
 // Vector integer shifts.
@@ -450,7 +455,8 @@ def : WriteRes<WriteNop, []>;
 
 defm : SKLWriteResPair<WriteFHAdd,  [SKLPort5,SKLPort01], 6, [2,1], 3, 6>;
 defm : SKLWriteResPair<WriteFHAddY, [SKLPort5,SKLPort01], 6, [2,1], 3, 7>;
-defm : SKLWriteResPair<WritePHAdd,  [SKLPort5,SKLPort015], 3, [2,1], 3, 6>;
+defm : SKLWriteResPair<WritePHAdd,  [SKLPort5,SKLPort05],  3, [2,1], 3, 5>;
+defm : SKLWriteResPair<WritePHAddX, [SKLPort5,SKLPort015], 3, [2,1], 3, 6>;
 defm : SKLWriteResPair<WritePHAddY, [SKLPort5,SKLPort015], 3, [2,1], 3, 7>;
 
 // Remaining instrs.
@@ -497,15 +503,7 @@ def SKLWriteResGroup6 : SchedWriteRes<[S
   let ResourceCycles = [1];
 }
 def: InstRW<[SKLWriteResGroup6], (instrs FINCSTP, FNOP)>;
-def: InstRW<[SKLWriteResGroup6], (instregex "MMX_MOVQ64rr",
-                                            "MMX_PABS(B|D|W)rr",
-                                            "MMX_PADD(B|D|Q|W)irr",
-                                            "MMX_PANDNirr",
-                                            "MMX_PANDirr",
-                                            "MMX_PORirr",
-                                            "MMX_PSIGN(B|D|W)rr",
-                                            "MMX_PSUB(B|D|Q|W)irr",
-                                            "MMX_PXORirr")>;
+def: InstRW<[SKLWriteResGroup6], (instregex "MMX_MOVQ64rr")>;
 
 def SKLWriteResGroup7 : SchedWriteRes<[SKLPort06]> {
   let Latency = 1;
@@ -768,13 +766,6 @@ def SKLWriteResGroup36 : SchedWriteRes<[
 def: InstRW<[SKLWriteResGroup36], (instregex "(V?)PHADDSW(Y?)rr",
                                              "(V?)PHSUBSW(Y?)rr")>;
 
-def SKLWriteResGroup37 : SchedWriteRes<[SKLPort5,SKLPort05]> {
-  let Latency = 3;
-  let NumMicroOps = 3;
-  let ResourceCycles = [2,1];
-}
-def: InstRW<[SKLWriteResGroup37], (instregex "MMX_PH(ADD|SUB)(D|W)rr")>;
-
 def SKLWriteResGroup39 : SchedWriteRes<[SKLPort5,SKLPort0156]> {
   let Latency = 3;
   let NumMicroOps = 3;
@@ -1037,20 +1028,6 @@ def SKLWriteResGroup72 : SchedWriteRes<[
 def: InstRW<[SKLWriteResGroup72], (instregex "FARJMP64",
                                              "JMP(16|32|64)m")>;
 
-def SKLWriteResGroup73 : SchedWriteRes<[SKLPort23,SKLPort05]> {
-  let Latency = 6;
-  let NumMicroOps = 2;
-  let ResourceCycles = [1,1];
-}
-def: InstRW<[SKLWriteResGroup73], (instregex "MMX_PABS(B|D|W)rm",
-                                             "MMX_PADD(B|D|Q|W)irm",
-                                             "MMX_PANDNirm",
-                                             "MMX_PANDirm",
-                                             "MMX_PORirm",
-                                             "MMX_PSIGN(B|D|W)rm",
-                                             "MMX_PSUB(B|D|Q|W)irm",
-                                             "MMX_PXORirm")>;
-
 def SKLWriteResGroup74 : SchedWriteRes<[SKLPort23,SKLPort06]> {
   let Latency = 6;
   let NumMicroOps = 2;
@@ -1165,35 +1142,16 @@ def SKLWriteResGroup86 : SchedWriteRes<[
 def: InstRW<[SKLWriteResGroup86], (instregex "VCVTDQ2PDYrr")>;
 
 def SKLWriteResGroup88 : SchedWriteRes<[SKLPort5,SKLPort23]> {
-  let Latency = 7;
-  let NumMicroOps = 2;
-  let ResourceCycles = [1,1];
-}
-def: InstRW<[SKLWriteResGroup88], (instregex "(V?)PACKSSDWrm",
-                                             "(V?)PACKSSWBrm",
-                                             "(V?)PACKUSDWrm",
-                                             "(V?)PACKUSWBrm",
-                                             "(V?)PALIGNRrmi",
-                                             "VPBROADCASTBrm",
-                                             "VPBROADCASTWrm",
-                                             "(V?)PSHUFDmi",
-                                             "(V?)PSHUFHWmi",
-                                             "(V?)PSHUFLWmi",
-                                             "(V?)PUNPCKHBWrm",
-                                             "(V?)PUNPCKHDQrm",
-                                             "(V?)PUNPCKHQDQrm",
-                                             "(V?)PUNPCKHWDrm",
-                                             "(V?)PUNPCKLBWrm",
-                                             "(V?)PUNPCKLDQrm",
-                                             "(V?)PUNPCKLQDQrm",
-                                             "(V?)PUNPCKLWDrm")>;
-
-def SKLWriteResGroup88a : SchedWriteRes<[SKLPort5,SKLPort23]> {
   let Latency = 6;
   let NumMicroOps = 2;
   let ResourceCycles = [1,1];
 }
-def: InstRW<[SKLWriteResGroup88a], (instregex "MMX_PSHUFBrm")>;
+def: InstRW<[SKLWriteResGroup88], (instregex "(V?)PMOV(SX|ZX)BDrm",
+                                             "(V?)PMOV(SX|ZX)BQrm",
+                                             "(V?)PMOV(SX|ZX)BWrm",
+                                             "(V?)PMOV(SX|ZX)DQrm",
+                                             "(V?)PMOV(SX|ZX)WDrm",
+                                             "(V?)PMOV(SX|ZX)WQrm")>;
 
 def SKLWriteResGroup89 : SchedWriteRes<[SKLPort5,SKLPort01]> {
   let Latency = 7;
@@ -1326,7 +1284,6 @@ def: InstRW<[SKLWriteResGroup108], (inst
                                               "FCOM64m",
                                               "FCOMP32m",
                                               "FCOMP64m",
-                                              "MMX_PSADBWirm", // TODO - SKLWriteResGroup120??
                                               "VPBROADCASTBYrm",
                                               "VPBROADCASTWYrm",
                                               "VPMOVSXBDYrm",
@@ -1349,13 +1306,6 @@ def SKLWriteResGroup112 : SchedWriteRes<
 }
 def: InstRW<[SKLWriteResGroup112], (instregex "MMX_PH(ADD|SUB)SWrm")>;
 
-def SKLWriteResGroup113 : SchedWriteRes<[SKLPort5,SKLPort23,SKLPort05]> {
-  let Latency = 8;
-  let NumMicroOps = 4;
-  let ResourceCycles = [2,1,1];
-}
-def: InstRW<[SKLWriteResGroup113], (instregex "MMX_PH(ADD|SUB)(D|W)rm")>;
-
 def SKLWriteResGroup114 : SchedWriteRes<[SKLPort4,SKLPort5,SKLPort237,SKLPort01]> {
   let Latency = 8;
   let NumMicroOps = 4;

Modified: llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td?rev=331999&r1=331998&r2=331999&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td Thu May 10 10:06:09 2018
@@ -256,9 +256,11 @@ defm : X86WriteRes<WriteVecMaskedStore,
 defm : X86WriteRes<WriteVecMaskedStoreY, [SKXPort237,SKXPort0], 2, [1,1], 2>;
 defm : X86WriteRes<WriteVecMove,         [SKXPort015], 1, [1], 1>;
 
-defm : SKXWriteResPair<WriteVecALU,   [SKXPort01], 1, [1], 1, 6>; // Vector integer ALU op, no logicals.
+defm : SKXWriteResPair<WriteVecALU,   [SKXPort05], 1, [1], 1, 5>; // Vector integer ALU op, no logicals.
+defm : SKXWriteResPair<WriteVecALUX,  [SKXPort01], 1, [1], 1, 6>; // Vector integer ALU op, no logicals (XMM).
 defm : SKXWriteResPair<WriteVecALUY,  [SKXPort01], 1, [1], 1, 7>; // Vector integer ALU op, no logicals (YMM/ZMM).
-defm : SKXWriteResPair<WriteVecLogic, [SKXPort015], 1, [1], 1, 6>; // Vector integer and/or/xor.
+defm : SKXWriteResPair<WriteVecLogic, [SKXPort05],  1, [1], 1, 5>; // Vector integer and/or/xor.
+defm : SKXWriteResPair<WriteVecLogicX,[SKXPort015], 1, [1], 1, 6>; // Vector integer and/or/xor (XMM).
 defm : SKXWriteResPair<WriteVecLogicY,[SKXPort015], 1, [1], 1, 7>; // Vector integer and/or/xor (YMM/ZMM).
 defm : SKXWriteResPair<WriteVecTest,  [SKXPort0,SKXPort5], 3, [1,1], 2, 6>; // Vector integer TEST instructions.
 defm : SKXWriteResPair<WriteVecTestY, [SKXPort0,SKXPort5], 3, [1,1], 2, 7>; // Vector integer TEST instructions (YMM/ZMM).
@@ -268,8 +270,10 @@ defm : SKXWriteResPair<WriteVecIMulY, [S
 defm : SKXWriteResPair<WritePMULLD,   [SKXPort015], 10, [2], 2, 6>; // Vector PMULLD.
 defm : SKXWriteResPair<WritePMULLDY,  [SKXPort015], 10, [2], 2, 7>; // Vector PMULLD (YMM/ZMM).
 defm : SKXWriteResPair<WriteShuffle,  [SKXPort5], 1, [1], 1, 5>; // Vector shuffles.
+defm : SKXWriteResPair<WriteShuffleX, [SKXPort5], 1, [1], 1, 6>; // Vector shuffles (XMM).
 defm : SKXWriteResPair<WriteShuffleY, [SKXPort5], 1, [1], 1, 7>; // Vector shuffles (YMM/ZMM).
-defm : SKXWriteResPair<WriteVarShuffle,  [SKXPort5], 1, [1], 1, 6>; // Vector variable shuffles.
+defm : SKXWriteResPair<WriteVarShuffle,  [SKXPort5], 1, [1], 1, 5>; // Vector variable shuffles.
+defm : SKXWriteResPair<WriteVarShuffleX, [SKXPort5], 1, [1], 1, 6>; // Vector variable shuffles (XMM).
 defm : SKXWriteResPair<WriteVarShuffleY, [SKXPort5], 1, [1], 1, 7>; // Vector variable shuffles (YMM/ZMM).
 defm : SKXWriteResPair<WriteBlend, [SKXPort5], 1, [1], 1, 6>; // Vector blends.
 defm : SKXWriteResPair<WriteBlendY,[SKXPort5], 1, [1], 1, 7>; // Vector blends (YMM/ZMM).
@@ -277,7 +281,8 @@ defm : SKXWriteResPair<WriteVarBlend, [S
 defm : SKXWriteResPair<WriteVarBlendY,[SKXPort015], 2, [2], 2, 6>; // Vector variable blends (YMM/ZMM).
 defm : SKXWriteResPair<WriteMPSAD,   [SKXPort5], 4, [2], 2, 6>; // Vector MPSAD.
 defm : SKXWriteResPair<WriteMPSADY,  [SKXPort5], 4, [2], 2, 7>; // Vector MPSAD.
-defm : SKXWriteResPair<WritePSADBW,  [SKXPort5], 3, [1], 1, 6>; // Vector PSADBW.
+defm : SKXWriteResPair<WritePSADBW,  [SKXPort5], 3, [1], 1, 5>; // Vector PSADBW.
+defm : SKXWriteResPair<WritePSADBWX, [SKXPort5], 3, [1], 1, 6>; // Vector PSADBW.
 defm : SKXWriteResPair<WritePSADBWY, [SKXPort5], 3, [1], 1, 7>; // Vector PSADBW.
 defm : SKXWriteResPair<WritePHMINPOS, [SKXPort015], 4, [1], 1, 6>; // Vector PHMINPOS.
 
@@ -450,7 +455,8 @@ def : WriteRes<WriteNop, []>;
 
 defm : SKXWriteResPair<WriteFHAdd,  [SKXPort5,SKXPort015], 6, [2,1], 3, 6>;
 defm : SKXWriteResPair<WriteFHAddY, [SKXPort5,SKXPort015], 6, [2,1], 3, 7>;
-defm : SKXWriteResPair<WritePHAdd,  [SKXPort5,SKXPort015], 3, [2,1], 3, 6>;
+defm : SKXWriteResPair<WritePHAdd,  [SKXPort5,SKXPort05],  3, [2,1], 3, 5>;
+defm : SKXWriteResPair<WritePHAddX, [SKXPort5,SKXPort015], 3, [2,1], 3, 6>;
 defm : SKXWriteResPair<WritePHAddY, [SKXPort5,SKXPort015], 3, [2,1], 3, 7>;
 
 // Remaining instrs.
@@ -511,15 +517,7 @@ def SKXWriteResGroup6 : SchedWriteRes<[S
   let ResourceCycles = [1];
 }
 def: InstRW<[SKXWriteResGroup6], (instrs FINCSTP, FNOP)>;
-def: InstRW<[SKXWriteResGroup6], (instregex "MMX_MOVQ64rr",
-                                            "MMX_PABS(B|D|W)rr",
-                                            "MMX_PADD(B|D|Q|W)irr",
-                                            "MMX_PANDNirr",
-                                            "MMX_PANDirr",
-                                            "MMX_PORirr",
-                                            "MMX_PSIGN(B|D|W)rr",
-                                            "MMX_PSUB(B|D|Q|W)irr",
-                                            "MMX_PXORirr")>;
+def: InstRW<[SKXWriteResGroup6], (instregex "MMX_MOVQ64rr")>;
 
 def SKXWriteResGroup7 : SchedWriteRes<[SKXPort06]> {
   let Latency = 1;
@@ -601,7 +599,6 @@ def: InstRW<[SKXWriteResGroup11], (instr
                                              "MMX_MOVD64mr",
                                              "MMX_MOVNTQmr",
                                              "MMX_MOVQ64mr",
-                                             "MOVNTDQmr",
                                              "MOVNTI_64mr",
                                              "MOVNTImr",
                                              "ST_FP32m",
@@ -847,13 +844,6 @@ def SKXWriteResGroup38 : SchedWriteRes<[
 }
 def: InstRW<[SKXWriteResGroup38], (instregex "(V?)PH(ADD|SUB)SW(Y?)rr")>;
 
-def SKXWriteResGroup39 : SchedWriteRes<[SKXPort5,SKXPort05]> {
-  let Latency = 3;
-  let NumMicroOps = 3;
-  let ResourceCycles = [2,1];
-}
-def: InstRW<[SKXWriteResGroup39], (instregex "MMX_PH(ADD|SUB)(D|W)rr")>;
-
 def SKXWriteResGroup41 : SchedWriteRes<[SKXPort5,SKXPort0156]> {
   let Latency = 3;
   let NumMicroOps = 3;
@@ -1250,20 +1240,6 @@ def SKXWriteResGroup76 : SchedWriteRes<[
 def: InstRW<[SKXWriteResGroup76], (instregex "FARJMP64",
                                              "JMP(16|32|64)m")>;
 
-def SKXWriteResGroup77 : SchedWriteRes<[SKXPort23,SKXPort05]> {
-  let Latency = 6;
-  let NumMicroOps = 2;
-  let ResourceCycles = [1,1];
-}
-def: InstRW<[SKXWriteResGroup77], (instregex "MMX_PABS(B|D|W)rm",
-                                             "MMX_PADD(B|D|Q|W)irm",
-                                             "MMX_PANDNirm",
-                                             "MMX_PANDirm",
-                                             "MMX_PORirm",
-                                             "MMX_PSIGN(B|D|W)rm",
-                                             "MMX_PSUB(B|D|Q|W)irm",
-                                             "MMX_PXORirm")>;
-
 def SKXWriteResGroup78 : SchedWriteRes<[SKXPort23,SKXPort06]> {
   let Latency = 6;
   let NumMicroOps = 2;
@@ -1394,52 +1370,19 @@ def SKXWriteResGroup92 : SchedWriteRes<[
   let ResourceCycles = [1,1];
 }
 def: InstRW<[SKXWriteResGroup92], (instregex "VMOVSDZrm(b?)",
-                                             "VMOVSSZrm(b?)",
-                                             "VPACKSSDWZ128rm(b?)",
-                                             "(V?)PACKSSDWrm",
-                                             "VPACKSSWBZ128rm(b?)",
-                                             "(V?)PACKSSWBrm",
-                                             "VPACKUSDWZ128rm(b?)",
-                                             "(V?)PACKUSDWrm",
-                                             "VPACKUSWBZ128rm(b?)",
-                                             "(V?)PACKUSWBrm",
-                                             "VPALIGNRZ128rmi(b?)",
-                                             "(V?)PALIGNRrmi",
-                                             "VPBROADCASTBZ128m(b?)",
-                                             "VPBROADCASTBrm",
-                                             "VPBROADCASTWZ128m(b?)",
-                                             "VPBROADCASTWrm",
-                                             "VPSHUFDZ128m(b?)i",
-                                             "(V?)PSHUFDmi",
-                                             "VPSHUFHWZ128mi(b?)",
-                                             "(V?)PSHUFHWmi",
-                                             "VPSHUFLWZ128mi(b?)",
-                                             "(V?)PSHUFLWmi",
-                                             "VPSLLDQZ128rm(b?)",
-                                             "VPSRLDQZ128rm(b?)",
-                                             "VPUNPCKHBWZ128rm(b?)",
-                                             "(V?)PUNPCKHBWrm",
-                                             "VPUNPCKHDQZ128rm(b?)",
-                                             "(V?)PUNPCKHDQrm",
-                                             "VPUNPCKHQDQZ128rm(b?)",
-                                             "(V?)PUNPCKHQDQrm",
-                                             "VPUNPCKHWDZ128rm(b?)",
-                                             "(V?)PUNPCKHWDrm",
-                                             "VPUNPCKLBWZ128rm(b?)",
-                                             "(V?)PUNPCKLBWrm",
-                                             "VPUNPCKLDQZ128rm(b?)",
-                                             "(V?)PUNPCKLDQrm",
-                                             "VPUNPCKLQDQZ128rm(b?)",
-                                             "(V?)PUNPCKLQDQrm",
-                                             "VPUNPCKLWDZ128rm(b?)",
-                                             "(V?)PUNPCKLWDrm")>;
+                                             "VMOVSSZrm(b?)")>;
 
 def SKXWriteResGroup92a : SchedWriteRes<[SKXPort5,SKXPort23]> {
   let Latency = 6;
   let NumMicroOps = 2;
   let ResourceCycles = [1,1];
 }
-def: InstRW<[SKXWriteResGroup92a], (instregex "MMX_PSHUFBrm")>;
+def: InstRW<[SKXWriteResGroup92a], (instregex "(V?)PMOV(SX|ZX)BDrm",
+                                              "(V?)PMOV(SX|ZX)BQrm",
+                                              "(V?)PMOV(SX|ZX)BWrm",
+                                              "(V?)PMOV(SX|ZX)DQrm",
+                                              "(V?)PMOV(SX|ZX)WDrm",
+                                              "(V?)PMOV(SX|ZX)WQrm")>;
 
 def SKXWriteResGroup93 : SchedWriteRes<[SKXPort5,SKXPort015]> {
   let Latency = 7;
@@ -1676,7 +1619,6 @@ def SKXWriteResGroup119 : SchedWriteRes<
 }
 def: InstRW<[SKXWriteResGroup119], (instregex "FCOM(P?)32m",
                                               "FCOM(P?)64m",
-                                              "MMX_PSADBWirm",
                                               "VFPCLASSSDrm(b?)",
                                               "VPBROADCASTBYrm",
                                               "VPBROADCASTB(Z|Z256)m(b?)",
@@ -1751,13 +1693,6 @@ def SKXWriteResGroup123 : SchedWriteRes<
 }
 def: InstRW<[SKXWriteResGroup123], (instregex "MMX_PH(ADD|SUB)SWrm")>;
 
-def SKXWriteResGroup124 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort05]> {
-  let Latency = 8;
-  let NumMicroOps = 4;
-  let ResourceCycles = [2,1,1];
-}
-def: InstRW<[SKXWriteResGroup124], (instregex "MMX_PH(ADD|SUB)(D|W)rm")>;
-
 def SKXWriteResGroup125 : SchedWriteRes<[SKXPort4,SKXPort5,SKXPort237,SKXPort015]> {
   let Latency = 8;
   let NumMicroOps = 4;

Modified: llvm/trunk/lib/Target/X86/X86Schedule.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Schedule.td?rev=331999&r1=331998&r2=331999&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86Schedule.td (original)
+++ llvm/trunk/lib/Target/X86/X86Schedule.td Thu May 10 10:06:09 2018
@@ -194,6 +194,7 @@ class FMASC { X86FoldableSchedWrite Sche
 defm WriteFHAdd  : X86SchedWritePair;
 defm WriteFHAddY : X86SchedWritePair; // YMM/ZMM.
 defm WritePHAdd  : X86SchedWritePair;
+defm WritePHAddX : X86SchedWritePair; // XMM.
 defm WritePHAddY : X86SchedWritePair; // YMM/ZMM.
 
 // Vector integer operations.
@@ -205,10 +206,12 @@ def  WriteVecMaskedStore  : SchedWrite;
 def  WriteVecMaskedStoreY : SchedWrite;
 def  WriteVecMove         : SchedWrite;
 
-defm WriteVecALU   : X86SchedWritePair; // Vector integer ALU op, no logicals.
-defm WriteVecALUY  : X86SchedWritePair; // Vector integer ALU op, no logicals (YMM/ZMM).
-defm WriteVecLogic : X86SchedWritePair; // Vector integer and/or/xor logicals.
-defm WriteVecLogicY: X86SchedWritePair; // Vector integer and/or/xor logicals (YMM/ZMM).
+defm WriteVecALU    : X86SchedWritePair; // Vector integer ALU op, no logicals.
+defm WriteVecALUX   : X86SchedWritePair; // Vector integer ALU op, no logicals (XMM).
+defm WriteVecALUY   : X86SchedWritePair; // Vector integer ALU op, no logicals (YMM/ZMM).
+defm WriteVecLogic  : X86SchedWritePair; // Vector integer and/or/xor logicals.
+defm WriteVecLogicX : X86SchedWritePair; // Vector integer and/or/xor logicals (XMM).
+defm WriteVecLogicY : X86SchedWritePair; // Vector integer and/or/xor logicals (YMM/ZMM).
 defm WriteVecTest  : X86SchedWritePair; // Vector integer TEST instructions.
 defm WriteVecTestY : X86SchedWritePair; // Vector integer TEST instructions (YMM/ZMM).
 defm WriteVecShift  : X86SchedWritePair; // Vector integer shifts (default).
@@ -223,14 +226,17 @@ defm WriteVecIMulY : X86SchedWritePair;
 defm WritePMULLD   : X86SchedWritePair; // Vector PMULLD.
 defm WritePMULLDY   : X86SchedWritePair; // Vector PMULLD (YMM/ZMM).
 defm WriteShuffle  : X86SchedWritePair; // Vector shuffles.
+defm WriteShuffleX : X86SchedWritePair; // Vector shuffles (XMM).
 defm WriteShuffleY : X86SchedWritePair; // Vector shuffles (YMM/ZMM).
 defm WriteVarShuffle  : X86SchedWritePair; // Vector variable shuffles.
+defm WriteVarShuffleX : X86SchedWritePair; // Vector variable shuffles (XMM).
 defm WriteVarShuffleY : X86SchedWritePair; // Vector variable shuffles (YMM/ZMM).
 defm WriteBlend  : X86SchedWritePair; // Vector blends.
 defm WriteBlendY : X86SchedWritePair; // Vector blends (YMM/ZMM).
 defm WriteVarBlend  : X86SchedWritePair; // Vector variable blends.
 defm WriteVarBlendY : X86SchedWritePair; // Vector variable blends (YMM/ZMM).
 defm WritePSADBW  : X86SchedWritePair; // Vector PSADBW.
+defm WritePSADBWX : X86SchedWritePair; // Vector PSADBW (XMM).
 defm WritePSADBWY : X86SchedWritePair; // Vector PSADBW (YMM/ZMM).
 defm WriteMPSAD  : X86SchedWritePair; // Vector MPSAD.
 defm WriteMPSADY : X86SchedWritePair; // Vector MPSAD (YMM/ZMM).
@@ -356,11 +362,11 @@ def SchedWriteFVarBlend
                        WriteFVarBlendY, WriteFVarBlendY>;
 
 def SchedWriteVecALU
- : X86SchedWriteWidths<WriteVecALU, WriteVecALU, WriteVecALUY, WriteVecALUY>;
+ : X86SchedWriteWidths<WriteVecALU, WriteVecALUX, WriteVecALUY, WriteVecALUY>;
 def SchedWritePHAdd
- : X86SchedWriteWidths<WritePHAdd, WritePHAdd, WritePHAddY, WritePHAddY>;
+ : X86SchedWriteWidths<WritePHAdd, WritePHAddX, WritePHAddY, WritePHAddY>;
 def SchedWriteVecLogic
- : X86SchedWriteWidths<WriteVecLogic, WriteVecLogic,
+ : X86SchedWriteWidths<WriteVecLogic, WriteVecLogicX,
                        WriteVecLogicY, WriteVecLogicY>;
 def SchedWriteVecTest
  : X86SchedWriteWidths<WriteVecTest, WriteVecTest,
@@ -384,14 +390,14 @@ def SchedWriteMPSAD
  : X86SchedWriteWidths<WriteMPSAD, WriteMPSAD,
                        WriteMPSADY, WriteMPSADY>;
 def SchedWritePSADBW
- : X86SchedWriteWidths<WritePSADBW, WritePSADBW,
+ : X86SchedWriteWidths<WritePSADBW, WritePSADBWX,
                        WritePSADBWY, WritePSADBWY>;
 
 def SchedWriteShuffle
- : X86SchedWriteWidths<WriteShuffle, WriteShuffle,
+ : X86SchedWriteWidths<WriteShuffle, WriteShuffleX,
                        WriteShuffleY, WriteShuffleY>;
 def SchedWriteVarShuffle
- : X86SchedWriteWidths<WriteVarShuffle, WriteVarShuffle,
+ : X86SchedWriteWidths<WriteVarShuffle, WriteVarShuffleX,
                        WriteVarShuffleY, WriteVarShuffleY>;
 def SchedWriteBlend
  : X86SchedWriteWidths<WriteBlend, WriteBlend, WriteBlendY, WriteBlendY>;

Modified: llvm/trunk/lib/Target/X86/X86ScheduleAtom.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleAtom.td?rev=331999&r1=331998&r2=331999&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleAtom.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleAtom.td Thu May 10 10:06:09 2018
@@ -281,8 +281,10 @@ def  : WriteRes<WriteVecMaskedStoreY, [A
 def  : WriteRes<WriteVecMove,  [AtomPort01]>;
 
 defm : AtomWriteResPair<WriteVecALU,       [AtomPort01],  [AtomPort0], 1, 1>;
+defm : AtomWriteResPair<WriteVecALUX,      [AtomPort01],  [AtomPort0], 1, 1>;
 defm : AtomWriteResPair<WriteVecALUY,      [AtomPort01],  [AtomPort0], 1, 1>;
 defm : AtomWriteResPair<WriteVecLogic,     [AtomPort01],  [AtomPort0], 1, 1>;
+defm : AtomWriteResPair<WriteVecLogicX,    [AtomPort01],  [AtomPort0], 1, 1>;
 defm : AtomWriteResPair<WriteVecLogicY,    [AtomPort01],  [AtomPort0], 1, 1>;
 defm : AtomWriteResPair<WriteVecTest,      [AtomPort01],  [AtomPort0], 1, 1>;
 defm : AtomWriteResPair<WriteVecTestY,     [AtomPort01],  [AtomPort0], 1, 1>;
@@ -300,11 +302,14 @@ defm : AtomWriteResPair<WritePMULLDY,
 defm : AtomWriteResPair<WritePHMINPOS,      [AtomPort0],  [AtomPort0], 5, 5, [5], [5]>;
 defm : AtomWriteResPair<WriteMPSAD,        [AtomPort01],  [AtomPort0], 1, 1>;
 defm : AtomWriteResPair<WriteMPSADY,       [AtomPort01],  [AtomPort0], 1, 1>;
-defm : AtomWriteResPair<WritePSADBW,        [AtomPort0],  [AtomPort0], 5, 5, [5], [5]>;
+defm : AtomWriteResPair<WritePSADBW,       [AtomPort01], [AtomPort01], 4, 4, [4], [4]>;
+defm : AtomWriteResPair<WritePSADBWX,       [AtomPort0],  [AtomPort0], 5, 5, [5], [5]>;
 defm : AtomWriteResPair<WritePSADBWY,       [AtomPort0],  [AtomPort0], 5, 5, [5], [5]>;
 defm : AtomWriteResPair<WriteShuffle,       [AtomPort0],  [AtomPort0], 1, 1>;
+defm : AtomWriteResPair<WriteShuffleX,      [AtomPort0],  [AtomPort0], 1, 1>;
 defm : AtomWriteResPair<WriteShuffleY,      [AtomPort0],  [AtomPort0], 1, 1>;
-defm : AtomWriteResPair<WriteVarShuffle,   [AtomPort01], [AtomPort01], 4, 5, [4], [5]>;
+defm : AtomWriteResPair<WriteVarShuffle,    [AtomPort0],  [AtomPort0], 1, 1>;
+defm : AtomWriteResPair<WriteVarShuffleX,  [AtomPort01], [AtomPort01], 4, 5, [4], [5]>;
 defm : AtomWriteResPair<WriteVarShuffleY,  [AtomPort01], [AtomPort01], 4, 5, [4], [5]>;
 defm : AtomWriteResPair<WriteBlend,         [AtomPort0],  [AtomPort0]>; // NOTE: Doesn't exist on Atom.
 defm : AtomWriteResPair<WriteBlendY,        [AtomPort0],  [AtomPort0]>; // NOTE: Doesn't exist on Atom.
@@ -355,7 +360,8 @@ defm : AtomWriteResPair<WriteAESDecEnc,
 
 defm : AtomWriteResPair<WriteFHAdd,  [AtomPort01], [AtomPort01], 8, 9, [8], [9]>;
 defm : AtomWriteResPair<WriteFHAddY, [AtomPort01], [AtomPort01], 8, 9, [8], [9]>;
-defm : AtomWriteResPair<WritePHAdd,  [AtomPort01], [AtomPort01], 7, 8, [7], [8]>;
+defm : AtomWriteResPair<WritePHAdd,  [AtomPort01], [AtomPort01], 3, 4, [3], [4]>;
+defm : AtomWriteResPair<WritePHAddX, [AtomPort01], [AtomPort01], 7, 8, [7], [8]>;
 defm : AtomWriteResPair<WritePHAddY, [AtomPort01], [AtomPort01], 7, 8, [7], [8]>;
 
 ////////////////////////////////////////////////////////////////////////////////
@@ -387,7 +393,6 @@ def : InstRW<[AtomWrite0_1], (instrs FXA
                                      MOVSX64rr32,
                                      MMX_MOVD64rr,
                                      MMX_MOVD64to64rr,
-                                     MMX_PSHUFBrr, MMX_PSHUFBrm,
                                      MOVDI2PDIrr,
                                      MOVDI2SSrr,
                                      MOV64toPQIrr,
@@ -492,7 +497,7 @@ def : InstRW<[AtomWrite01_3], (instrs CL
                                       POP16rmm, POP32rmm, POP64rmm)>;
 def : InstRW<[AtomWrite01_3], (instregex "XADD(8|16|32|64)rm",
                                          "XCHG(8|16|32|64)rm",
-                                         "(MMX_)?PH(ADD|SUB)Drr",
+                                         "PH(ADD|SUB)Drr",
                                          "MOV(S|Z)X16rm8",
                                          "MMX_P(ADD|SUB)Qirm",
                                          "MOV(UPS|UPD|DQU)rm",
@@ -506,9 +511,8 @@ def : InstRW<[AtomWrite01_4], (instrs CB
                                       JCXZ, JECXZ, JRCXZ,
                                       SHLD32mrCL, SHRD32mrCL,
                                       SHLD32mri8, SHRD32mri8,
-                                      LD_F80m,
-                                      MMX_PSADBWirr, MMX_PSADBWirm)>;
-def : InstRW<[AtomWrite01_4], (instregex "(MMX_)?PH(ADD|SUB)Drm",
+                                      LD_F80m)>;
+def : InstRW<[AtomWrite01_4], (instregex "PH(ADD|SUB)Drm",
                                          "(MMX_)?PEXTRWrr(_REV)?")>;
 
 def AtomWrite01_5 : SchedWriteRes<[AtomPort01]> {

Modified: llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td?rev=331999&r1=331998&r2=331999&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td Thu May 10 10:06:09 2018
@@ -407,6 +407,7 @@ defm : X86WriteRes<WriteVecMaskedStoreY,
 def  : WriteRes<WriteVecMove,             [JFPU01, JVALU]>;
 
 defm : JWriteResFpuPair<WriteVecALU,      [JFPU01, JVALU], 1>;
+defm : JWriteResFpuPair<WriteVecALUX,     [JFPU01, JVALU], 1>;
 defm : JWriteResFpuPair<WriteVecALUY,     [JFPU01, JVALU], 1>;
 defm : JWriteResFpuPair<WriteVecShift,    [JFPU01, JVALU], 1>;
 defm : JWriteResFpuPair<WriteVecShiftX,   [JFPU01, JVALU], 1>;
@@ -422,17 +423,21 @@ defm : JWriteResFpuPair<WritePMULLDY,
 defm : JWriteResFpuPair<WriteMPSAD,       [JFPU0, JVIMUL], 3, [1, 2]>;
 defm : JWriteResFpuPair<WriteMPSADY,      [JFPU0, JVIMUL], 3, [1, 2]>;
 defm : JWriteResFpuPair<WritePSADBW,      [JFPU01, JVALU], 2>;
+defm : JWriteResFpuPair<WritePSADBWX,     [JFPU01, JVALU], 2>;
 defm : JWriteResFpuPair<WritePSADBWY,     [JFPU01, JVALU], 2>;
 defm : JWriteResFpuPair<WritePHMINPOS,    [JFPU0,  JVALU], 2>;
 defm : JWriteResFpuPair<WriteShuffle,     [JFPU01, JVALU], 1>;
+defm : JWriteResFpuPair<WriteShuffleX,    [JFPU01, JVALU], 1>;
 defm : JWriteResFpuPair<WriteShuffleY,    [JFPU01, JVALU], 1>;
 defm : JWriteResFpuPair<WriteVarShuffle,  [JFPU01, JVALU], 2, [1, 4], 3>;
+defm : JWriteResFpuPair<WriteVarShuffleX, [JFPU01, JVALU], 2, [1, 4], 3>;
 defm : JWriteResFpuPair<WriteVarShuffleY, [JFPU01, JVALU], 2, [1, 4], 3>;
 defm : JWriteResFpuPair<WriteBlend,       [JFPU01, JVALU], 1>;
 defm : JWriteResFpuPair<WriteBlendY,      [JFPU01, JVALU], 1>;
 defm : JWriteResFpuPair<WriteVarBlend,    [JFPU01, JVALU], 2, [1, 4], 3>;
 defm : JWriteResFpuPair<WriteVarBlendY,   [JFPU01, JVALU], 2, [1, 4], 3>;
 defm : JWriteResFpuPair<WriteVecLogic,    [JFPU01, JVALU], 1>;
+defm : JWriteResFpuPair<WriteVecLogicX,   [JFPU01, JVALU], 1>;
 defm : JWriteResFpuPair<WriteVecLogicY,   [JFPU01, JVALU], 1>; // NOTE: Doesn't exist on Jaguar.
 defm : JWriteResFpuPair<WriteVecTest,     [JFPU0, JFPA, JALU0], 3>;
 defm : JWriteResYMMPair<WriteVecTestY ,   [JFPU01, JFPX, JFPA, JALU0], 4, [2, 2, 2, 1], 3>;
@@ -482,6 +487,7 @@ defm : JWriteResFpuPair<WriteAESDecEnc,
 defm : JWriteResFpuPair<WriteFHAdd,         [JFPU0, JFPA], 3>;
 defm : JWriteResYMMPair<WriteFHAddY,        [JFPU0, JFPA], 3, [2,2], 2>;
 defm : JWriteResFpuPair<WritePHAdd,       [JFPU01, JVALU], 1>;
+defm : JWriteResFpuPair<WritePHAddX,      [JFPU01, JVALU], 1>;
 defm : JWriteResFpuPair<WritePHAddY,      [JFPU01, JVALU], 1>;
 
 ////////////////////////////////////////////////////////////////////////////////

Modified: llvm/trunk/lib/Target/X86/X86ScheduleSLM.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleSLM.td?rev=331999&r1=331998&r2=331999&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleSLM.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleSLM.td Thu May 10 10:06:09 2018
@@ -219,10 +219,12 @@ defm : SLMWriteResPair<WriteVecShiftImm,
 defm : SLMWriteResPair<WriteVecShiftImmX,[SLM_FPC_RSV0],  1>;
 defm : SLMWriteResPair<WriteVecShiftImmY,[SLM_FPC_RSV0],  1>;
 defm : SLMWriteResPair<WriteVecLogic, [SLM_FPC_RSV01], 1>;
+defm : SLMWriteResPair<WriteVecLogicX,[SLM_FPC_RSV01], 1>;
 defm : SLMWriteResPair<WriteVecLogicY,[SLM_FPC_RSV01], 1>;
 defm : SLMWriteResPair<WriteVecTest,  [SLM_FPC_RSV01], 1>;
 defm : SLMWriteResPair<WriteVecTestY, [SLM_FPC_RSV01], 1>;
 defm : SLMWriteResPair<WriteVecALU,   [SLM_FPC_RSV01],  1>;
+defm : SLMWriteResPair<WriteVecALUX,  [SLM_FPC_RSV01],  1>;
 defm : SLMWriteResPair<WriteVecALUY,  [SLM_FPC_RSV01],  1>;
 defm : SLMWriteResPair<WriteVecIMul,  [SLM_FPC_RSV0],   4>;
 defm : SLMWriteResPair<WriteVecIMulX, [SLM_FPC_RSV0],   4>;
@@ -233,13 +235,16 @@ defm : SLMWriteResPair<WritePMULLD,  [SL
 defm : SLMWriteResPair<WritePMULLDY, [SLM_FPC_RSV0],   4>;
 defm : SLMWriteResPair<WriteShuffle,  [SLM_FPC_RSV0],  1>;
 defm : SLMWriteResPair<WriteShuffleY, [SLM_FPC_RSV0],  1>;
+defm : SLMWriteResPair<WriteShuffleX, [SLM_FPC_RSV0],  1>;
 defm : SLMWriteResPair<WriteVarShuffle,  [SLM_FPC_RSV0],  1>;
+defm : SLMWriteResPair<WriteVarShuffleX, [SLM_FPC_RSV0],  1>;
 defm : SLMWriteResPair<WriteVarShuffleY, [SLM_FPC_RSV0],  1>;
 defm : SLMWriteResPair<WriteBlend,  [SLM_FPC_RSV0],  1>;
 defm : SLMWriteResPair<WriteBlendY, [SLM_FPC_RSV0],  1>;
 defm : SLMWriteResPair<WriteMPSAD,  [SLM_FPC_RSV0],  7>;
 defm : SLMWriteResPair<WriteMPSADY, [SLM_FPC_RSV0],  7>;
 defm : SLMWriteResPair<WritePSADBW,  [SLM_FPC_RSV0],  4>;
+defm : SLMWriteResPair<WritePSADBWX, [SLM_FPC_RSV0],  4>;
 defm : SLMWriteResPair<WritePSADBWY, [SLM_FPC_RSV0],  4>;
 defm : SLMWriteResPair<WritePHMINPOS,  [SLM_FPC_RSV0],   4>;
 
@@ -260,6 +265,7 @@ def  : WriteRes<WriteVecExtractSt, [SLM_
 defm : SLMWriteResPair<WriteFHAdd,   [SLM_FPC_RSV01], 3, [2]>;
 defm : SLMWriteResPair<WriteFHAddY,  [SLM_FPC_RSV01], 3, [2]>;
 defm : SLMWriteResPair<WritePHAdd,   [SLM_FPC_RSV01], 1>;
+defm : SLMWriteResPair<WritePHAddX,  [SLM_FPC_RSV01], 1>;
 defm : SLMWriteResPair<WritePHAddY,  [SLM_FPC_RSV01], 1>;
 
 // String instructions.

Modified: llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td?rev=331999&r1=331998&r2=331999&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td Thu May 10 10:06:09 2018
@@ -281,10 +281,12 @@ defm : ZnWriteResFpuPair<WriteVecShiftIm
 defm : ZnWriteResFpuPair<WriteVecShiftImmX, [ZnFPU], 1>;
 defm : ZnWriteResFpuPair<WriteVecShiftImmY, [ZnFPU], 1>;
 defm : ZnWriteResFpuPair<WriteVecLogic,   [ZnFPU],   1>;
+defm : ZnWriteResFpuPair<WriteVecLogicX,  [ZnFPU],   1>;
 defm : ZnWriteResFpuPair<WriteVecLogicY,  [ZnFPU],   1>;
 defm : ZnWriteResFpuPair<WriteVecTest,    [ZnFPU12], 1, [2], 1, 7, 1>;
 defm : ZnWriteResFpuPair<WriteVecTestY,   [ZnFPU12], 1, [2], 1, 7, 1>;
 defm : ZnWriteResFpuPair<WriteVecALU,     [ZnFPU],   1>;
+defm : ZnWriteResFpuPair<WriteVecALUX,    [ZnFPU],   1>;
 defm : ZnWriteResFpuPair<WriteVecALUY,    [ZnFPU],   1>;
 defm : ZnWriteResFpuPair<WriteVecIMul,    [ZnFPU0],  4>;
 defm : ZnWriteResFpuPair<WriteVecIMulX,   [ZnFPU0],  4>;
@@ -292,14 +294,17 @@ defm : ZnWriteResFpuPair<WriteVecIMulY,
 defm : ZnWriteResFpuPair<WritePMULLD,     [ZnFPU0],  4>; // FIXME
 defm : ZnWriteResFpuPair<WritePMULLDY,    [ZnFPU0],  5, [2]>; // FIXME
 defm : ZnWriteResFpuPair<WriteShuffle,    [ZnFPU],   1>;
+defm : ZnWriteResFpuPair<WriteShuffleX,   [ZnFPU],   1>;
 defm : ZnWriteResFpuPair<WriteShuffleY,   [ZnFPU],   1>;
 defm : ZnWriteResFpuPair<WriteVarShuffle, [ZnFPU],   1>;
+defm : ZnWriteResFpuPair<WriteVarShuffleX,[ZnFPU],   1>;
 defm : ZnWriteResFpuPair<WriteVarShuffleY,[ZnFPU],   1>;
 defm : ZnWriteResFpuPair<WriteBlend,      [ZnFPU01], 1>;
 defm : ZnWriteResFpuPair<WriteBlendY,     [ZnFPU01], 1>;
 defm : ZnWriteResFpuPair<WriteShuffle256, [ZnFPU],   2>;
 defm : ZnWriteResFpuPair<WriteVarShuffle256, [ZnFPU],   2>;
 defm : ZnWriteResFpuPair<WritePSADBW,     [ZnFPU0],  3>;
+defm : ZnWriteResFpuPair<WritePSADBWX,    [ZnFPU0],  3>;
 defm : ZnWriteResFpuPair<WritePSADBWY,    [ZnFPU0],  3>;
 defm : ZnWriteResFpuPair<WritePHMINPOS,   [ZnFPU0],  4>;
 
@@ -1046,6 +1051,8 @@ def : InstRW<[WriteMicrocoded], (instreg
 // PHADD|PHSUB (S) W/D.
 def : SchedAlias<WritePHAdd,    ZnWriteMicrocoded>;
 def : SchedAlias<WritePHAddLd,  ZnWriteMicrocoded>;
+def : SchedAlias<WritePHAddX,   ZnWriteMicrocoded>;
+def : SchedAlias<WritePHAddXLd, ZnWriteMicrocoded>;
 def : SchedAlias<WritePHAddY,   ZnWriteMicrocoded>;
 def : SchedAlias<WritePHAddYLd, ZnWriteMicrocoded>;
 

Modified: llvm/trunk/test/CodeGen/X86/3dnow-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/3dnow-schedule.ll?rev=331999&r1=331998&r2=331999&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/3dnow-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/3dnow-schedule.ll Thu May 10 10:06:09 2018
@@ -14,8 +14,8 @@ declare void @llvm.x86.mmx.femms() nounw
 define i64 @test_pavgusb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
 ; CHECK-LABEL: test_pavgusb:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    pavgusb %mm1, %mm0 # sched: [1:0.50]
-; CHECK-NEXT:    pavgusb (%rdi), %mm0 # sched: [7:0.50]
+; CHECK-NEXT:    pavgusb %mm1, %mm0 # sched: [3:1.00]
+; CHECK-NEXT:    pavgusb (%rdi), %mm0 # sched: [8:1.00]
 ; CHECK-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; CHECK-NEXT:    retq # sched: [1:1.00]
   %1 = call x86_mmx @llvm.x86.3dnow.pavgusb(x86_mmx %a0, x86_mmx %a1)

Modified: llvm/trunk/test/CodeGen/X86/avx2-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx2-schedule.ll?rev=331999&r1=331998&r2=331999&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx2-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx2-schedule.ll Thu May 10 10:06:09 2018
@@ -1761,8 +1761,8 @@ define <16 x i16> @test_pblendw(<16 x i1
 define <16 x i8> @test_pbroadcastb(<16 x i8> %a0, <16 x i8> *%a1) {
 ; GENERIC-LABEL: test_pbroadcastb:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vpbroadcastb %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vpbroadcastb (%rdi), %xmm1 # sched: [6:1.00]
+; GENERIC-NEXT:    vpbroadcastb %xmm0, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT:    vpbroadcastb (%rdi), %xmm1 # sched: [7:0.50]
 ; GENERIC-NEXT:    vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1811,7 +1811,7 @@ define <32 x i8> @test_pbroadcastb_ymm(<
 ; GENERIC-LABEL: test_pbroadcastb_ymm:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpbroadcastb %xmm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vpbroadcastb (%rdi), %ymm1 # sched: [6:1.00]
+; GENERIC-NEXT:    vpbroadcastb (%rdi), %ymm1 # sched: [7:0.50]
 ; GENERIC-NEXT:    vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1859,8 +1859,8 @@ define <32 x i8> @test_pbroadcastb_ymm(<
 define <4 x i32> @test_pbroadcastd(<4 x i32> %a0, <4 x i32> *%a1) {
 ; GENERIC-LABEL: test_pbroadcastd:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vpbroadcastd %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vpbroadcastd (%rdi), %xmm1 # sched: [6:1.00]
+; GENERIC-NEXT:    vpbroadcastd %xmm0, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT:    vpbroadcastd (%rdi), %xmm1 # sched: [7:0.50]
 ; GENERIC-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1909,7 +1909,7 @@ define <8 x i32> @test_pbroadcastd_ymm(<
 ; GENERIC-LABEL: test_pbroadcastd_ymm:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpbroadcastd %xmm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vpbroadcastd (%rdi), %ymm1 # sched: [6:1.00]
+; GENERIC-NEXT:    vpbroadcastd (%rdi), %ymm1 # sched: [7:0.50]
 ; GENERIC-NEXT:    vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1957,8 +1957,8 @@ define <8 x i32> @test_pbroadcastd_ymm(<
 define <2 x i64> @test_pbroadcastq(<2 x i64> %a0, <2 x i64> *%a1) {
 ; GENERIC-LABEL: test_pbroadcastq:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vpbroadcastq %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vpbroadcastq (%rdi), %xmm1 # sched: [6:1.00]
+; GENERIC-NEXT:    vpbroadcastq %xmm0, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT:    vpbroadcastq (%rdi), %xmm1 # sched: [7:0.50]
 ; GENERIC-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2007,7 +2007,7 @@ define <4 x i64> @test_pbroadcastq_ymm(<
 ; GENERIC-LABEL: test_pbroadcastq_ymm:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpbroadcastq %xmm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vpbroadcastq (%rdi), %ymm1 # sched: [6:1.00]
+; GENERIC-NEXT:    vpbroadcastq (%rdi), %ymm1 # sched: [7:0.50]
 ; GENERIC-NEXT:    vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2055,8 +2055,8 @@ define <4 x i64> @test_pbroadcastq_ymm(<
 define <8 x i16> @test_pbroadcastw(<8 x i16> %a0, <8 x i16> *%a1) {
 ; GENERIC-LABEL: test_pbroadcastw:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vpbroadcastw %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vpbroadcastw (%rdi), %xmm1 # sched: [6:1.00]
+; GENERIC-NEXT:    vpbroadcastw %xmm0, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT:    vpbroadcastw (%rdi), %xmm1 # sched: [7:0.50]
 ; GENERIC-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2105,7 +2105,7 @@ define <16 x i16> @test_pbroadcastw_ymm(
 ; GENERIC-LABEL: test_pbroadcastw_ymm:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpbroadcastw %xmm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vpbroadcastw (%rdi), %ymm1 # sched: [6:1.00]
+; GENERIC-NEXT:    vpbroadcastw (%rdi), %ymm1 # sched: [7:0.50]
 ; GENERIC-NEXT:    vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;

Modified: llvm/trunk/test/CodeGen/X86/avx512-shuffle-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-shuffle-schedule.ll?rev=331999&r1=331998&r2=331999&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-shuffle-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-shuffle-schedule.ll Thu May 10 10:06:09 2018
@@ -5657,7 +5657,7 @@ define <8 x i16> @test_masked_8xi16_perm
 ; GENERIC-LABEL: test_masked_8xi16_perm_high_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,6,5,7,6] sched: [1:1.00]
+; GENERIC-NEXT:    vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,6,5,7,6] sched: [1:0.50]
 ; GENERIC-NEXT:    vmovdqa %xmm1, %xmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5677,7 +5677,7 @@ define <8 x i16> @test_masked_z_8xi16_pe
 ; GENERIC-LABEL: test_masked_z_8xi16_perm_high_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,6,5,7,6] sched: [1:1.00]
+; GENERIC-NEXT:    vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,6,5,7,6] sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_8xi16_perm_high_mask0:
@@ -5694,7 +5694,7 @@ define <8 x i16> @test_masked_8xi16_perm
 ; GENERIC-LABEL: test_masked_8xi16_perm_low_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[0,3,0,0,4,5,6,7] sched: [1:1.00]
+; GENERIC-NEXT:    vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[0,3,0,0,4,5,6,7] sched: [1:0.50]
 ; GENERIC-NEXT:    vmovdqa %xmm1, %xmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5714,7 +5714,7 @@ define <8 x i16> @test_masked_z_8xi16_pe
 ; GENERIC-LABEL: test_masked_z_8xi16_perm_low_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,3,0,0,4,5,6,7] sched: [1:1.00]
+; GENERIC-NEXT:    vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,3,0,0,4,5,6,7] sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_8xi16_perm_low_mask1:
@@ -5731,7 +5731,7 @@ define <8 x i16> @test_masked_8xi16_perm
 ; GENERIC-LABEL: test_masked_8xi16_perm_high_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,5,4,4,5] sched: [1:1.00]
+; GENERIC-NEXT:    vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,5,4,4,5] sched: [1:0.50]
 ; GENERIC-NEXT:    vmovdqa %xmm1, %xmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5751,7 +5751,7 @@ define <8 x i16> @test_masked_z_8xi16_pe
 ; GENERIC-LABEL: test_masked_z_8xi16_perm_high_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,5,4,4,5] sched: [1:1.00]
+; GENERIC-NEXT:    vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,5,4,4,5] sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_8xi16_perm_high_mask2:
@@ -5781,7 +5781,7 @@ define <8 x i16> @test_masked_8xi16_perm
 ; GENERIC-LABEL: test_masked_8xi16_perm_low_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[2,1,1,1,4,5,6,7] sched: [1:1.00]
+; GENERIC-NEXT:    vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[2,1,1,1,4,5,6,7] sched: [1:0.50]
 ; GENERIC-NEXT:    vmovdqa %xmm1, %xmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5801,7 +5801,7 @@ define <8 x i16> @test_masked_z_8xi16_pe
 ; GENERIC-LABEL: test_masked_z_8xi16_perm_low_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[2,1,1,1,4,5,6,7] sched: [1:1.00]
+; GENERIC-NEXT:    vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[2,1,1,1,4,5,6,7] sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_8xi16_perm_low_mask3:
@@ -5818,7 +5818,7 @@ define <8 x i16> @test_masked_8xi16_perm
 ; GENERIC-LABEL: test_masked_8xi16_perm_high_mask4:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,5,5,7,6] sched: [1:1.00]
+; GENERIC-NEXT:    vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,5,5,7,6] sched: [1:0.50]
 ; GENERIC-NEXT:    vmovdqa %xmm1, %xmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5838,7 +5838,7 @@ define <8 x i16> @test_masked_z_8xi16_pe
 ; GENERIC-LABEL: test_masked_z_8xi16_perm_high_mask4:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,5,5,7,6] sched: [1:1.00]
+; GENERIC-NEXT:    vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,5,5,7,6] sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_8xi16_perm_high_mask4:
@@ -5855,7 +5855,7 @@ define <8 x i16> @test_masked_8xi16_perm
 ; GENERIC-LABEL: test_masked_8xi16_perm_low_mask5:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[3,3,2,1,4,5,6,7] sched: [1:1.00]
+; GENERIC-NEXT:    vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[3,3,2,1,4,5,6,7] sched: [1:0.50]
 ; GENERIC-NEXT:    vmovdqa %xmm1, %xmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5875,7 +5875,7 @@ define <8 x i16> @test_masked_z_8xi16_pe
 ; GENERIC-LABEL: test_masked_z_8xi16_perm_low_mask5:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[3,3,2,1,4,5,6,7] sched: [1:1.00]
+; GENERIC-NEXT:    vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[3,3,2,1,4,5,6,7] sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_8xi16_perm_low_mask5:
@@ -5905,7 +5905,7 @@ define <8 x i16> @test_masked_8xi16_perm
 ; GENERIC-LABEL: test_masked_8xi16_perm_high_mask6:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,6,5,6,5] sched: [1:1.00]
+; GENERIC-NEXT:    vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,6,5,6,5] sched: [1:0.50]
 ; GENERIC-NEXT:    vmovdqa %xmm1, %xmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5925,7 +5925,7 @@ define <8 x i16> @test_masked_z_8xi16_pe
 ; GENERIC-LABEL: test_masked_z_8xi16_perm_high_mask6:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,6,5,6,5] sched: [1:1.00]
+; GENERIC-NEXT:    vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,6,5,6,5] sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_8xi16_perm_high_mask6:
@@ -5942,7 +5942,7 @@ define <8 x i16> @test_masked_8xi16_perm
 ; GENERIC-LABEL: test_masked_8xi16_perm_low_mask7:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[1,0,2,0,4,5,6,7] sched: [1:1.00]
+; GENERIC-NEXT:    vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[1,0,2,0,4,5,6,7] sched: [1:0.50]
 ; GENERIC-NEXT:    vmovdqa %xmm1, %xmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5962,7 +5962,7 @@ define <8 x i16> @test_masked_z_8xi16_pe
 ; GENERIC-LABEL: test_masked_z_8xi16_perm_low_mask7:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[1,0,2,0,4,5,6,7] sched: [1:1.00]
+; GENERIC-NEXT:    vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[1,0,2,0,4,5,6,7] sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_8xi16_perm_low_mask7:
@@ -5993,7 +5993,7 @@ define <8 x i16> @test_masked_8xi16_perm
 ; GENERIC-LABEL: test_masked_8xi16_perm_high_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,7,7,4,6] sched: [6:1.00]
+; GENERIC-NEXT:    vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,7,7,4,6] sched: [7:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_8xi16_perm_high_mem_mask0:
@@ -6012,7 +6012,7 @@ define <8 x i16> @test_masked_z_8xi16_pe
 ; GENERIC-LABEL: test_masked_z_8xi16_perm_high_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmw %xmm0, %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,7,7,4,6] sched: [6:1.00]
+; GENERIC-NEXT:    vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,7,7,4,6] sched: [7:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_8xi16_perm_high_mem_mask0:
@@ -6031,7 +6031,7 @@ define <8 x i16> @test_masked_8xi16_perm
 ; GENERIC-LABEL: test_masked_8xi16_perm_low_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpshuflw {{.*#+}} xmm0 {%k1} = mem[1,3,3,2,4,5,6,7] sched: [6:1.00]
+; GENERIC-NEXT:    vpshuflw {{.*#+}} xmm0 {%k1} = mem[1,3,3,2,4,5,6,7] sched: [7:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_8xi16_perm_low_mem_mask1:
@@ -6050,7 +6050,7 @@ define <8 x i16> @test_masked_z_8xi16_pe
 ; GENERIC-LABEL: test_masked_z_8xi16_perm_low_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmw %xmm0, %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[1,3,3,2,4,5,6,7] sched: [6:1.00]
+; GENERIC-NEXT:    vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[1,3,3,2,4,5,6,7] sched: [7:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_8xi16_perm_low_mem_mask1:
@@ -6069,7 +6069,7 @@ define <8 x i16> @test_masked_8xi16_perm
 ; GENERIC-LABEL: test_masked_8xi16_perm_high_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,6,6,5,7] sched: [6:1.00]
+; GENERIC-NEXT:    vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,6,6,5,7] sched: [7:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_8xi16_perm_high_mem_mask2:
@@ -6088,7 +6088,7 @@ define <8 x i16> @test_masked_z_8xi16_pe
 ; GENERIC-LABEL: test_masked_z_8xi16_perm_high_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmw %xmm0, %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,6,6,5,7] sched: [6:1.00]
+; GENERIC-NEXT:    vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,6,6,5,7] sched: [7:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_8xi16_perm_high_mem_mask2:
@@ -6121,7 +6121,7 @@ define <8 x i16> @test_masked_8xi16_perm
 ; GENERIC-LABEL: test_masked_8xi16_perm_low_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpshuflw {{.*#+}} xmm0 {%k1} = mem[3,1,2,0,4,5,6,7] sched: [6:1.00]
+; GENERIC-NEXT:    vpshuflw {{.*#+}} xmm0 {%k1} = mem[3,1,2,0,4,5,6,7] sched: [7:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_8xi16_perm_low_mem_mask3:
@@ -6140,7 +6140,7 @@ define <8 x i16> @test_masked_z_8xi16_pe
 ; GENERIC-LABEL: test_masked_z_8xi16_perm_low_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmw %xmm0, %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[3,1,2,0,4,5,6,7] sched: [6:1.00]
+; GENERIC-NEXT:    vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[3,1,2,0,4,5,6,7] sched: [7:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_8xi16_perm_low_mem_mask3:
@@ -6159,7 +6159,7 @@ define <8 x i16> @test_masked_8xi16_perm
 ; GENERIC-LABEL: test_masked_8xi16_perm_high_mem_mask4:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,7,6,7,5] sched: [6:1.00]
+; GENERIC-NEXT:    vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,7,6,7,5] sched: [7:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_8xi16_perm_high_mem_mask4:
@@ -6178,7 +6178,7 @@ define <8 x i16> @test_masked_z_8xi16_pe
 ; GENERIC-LABEL: test_masked_z_8xi16_perm_high_mem_mask4:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmw %xmm0, %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,7,6,7,5] sched: [6:1.00]
+; GENERIC-NEXT:    vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,7,6,7,5] sched: [7:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_8xi16_perm_high_mem_mask4:
@@ -6197,7 +6197,7 @@ define <8 x i16> @test_masked_8xi16_perm
 ; GENERIC-LABEL: test_masked_8xi16_perm_low_mem_mask5:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpshuflw {{.*#+}} xmm0 {%k1} = mem[2,1,3,2,4,5,6,7] sched: [6:1.00]
+; GENERIC-NEXT:    vpshuflw {{.*#+}} xmm0 {%k1} = mem[2,1,3,2,4,5,6,7] sched: [7:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_8xi16_perm_low_mem_mask5:
@@ -6216,7 +6216,7 @@ define <8 x i16> @test_masked_z_8xi16_pe
 ; GENERIC-LABEL: test_masked_z_8xi16_perm_low_mem_mask5:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmw %xmm0, %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[2,1,3,2,4,5,6,7] sched: [6:1.00]
+; GENERIC-NEXT:    vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[2,1,3,2,4,5,6,7] sched: [7:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_8xi16_perm_low_mem_mask5:
@@ -6249,7 +6249,7 @@ define <8 x i16> @test_masked_8xi16_perm
 ; GENERIC-LABEL: test_masked_8xi16_perm_high_mem_mask6:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,7,4,4,4] sched: [6:1.00]
+; GENERIC-NEXT:    vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,7,4,4,4] sched: [7:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_8xi16_perm_high_mem_mask6:
@@ -6268,7 +6268,7 @@ define <8 x i16> @test_masked_z_8xi16_pe
 ; GENERIC-LABEL: test_masked_z_8xi16_perm_high_mem_mask6:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmw %xmm0, %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,7,4,4,4] sched: [6:1.00]
+; GENERIC-NEXT:    vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,7,4,4,4] sched: [7:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_8xi16_perm_high_mem_mask6:
@@ -6287,7 +6287,7 @@ define <8 x i16> @test_masked_8xi16_perm
 ; GENERIC-LABEL: test_masked_8xi16_perm_low_mem_mask7:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpshuflw {{.*#+}} xmm0 {%k1} = mem[0,3,3,1,4,5,6,7] sched: [6:1.00]
+; GENERIC-NEXT:    vpshuflw {{.*#+}} xmm0 {%k1} = mem[0,3,3,1,4,5,6,7] sched: [7:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_8xi16_perm_low_mem_mask7:
@@ -6306,7 +6306,7 @@ define <8 x i16> @test_masked_z_8xi16_pe
 ; GENERIC-LABEL: test_masked_z_8xi16_perm_low_mem_mask7:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmw %xmm0, %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[0,3,3,1,4,5,6,7] sched: [6:1.00]
+; GENERIC-NEXT:    vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[0,3,3,1,4,5,6,7] sched: [7:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_8xi16_perm_low_mem_mask7:
@@ -7704,7 +7704,7 @@ define <4 x i32> @test_masked_4xi32_perm
 ; GENERIC-LABEL: test_masked_4xi32_perm_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpshufd {{.*#+}} xmm1 {%k1} = xmm0[2,3,3,0] sched: [1:1.00]
+; GENERIC-NEXT:    vpshufd {{.*#+}} xmm1 {%k1} = xmm0[2,3,3,0] sched: [1:0.50]
 ; GENERIC-NEXT:    vmovdqa %xmm1, %xmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -7724,7 +7724,7 @@ define <4 x i32> @test_masked_z_4xi32_pe
 ; GENERIC-LABEL: test_masked_z_4xi32_perm_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[2,3,3,0] sched: [1:1.00]
+; GENERIC-NEXT:    vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[2,3,3,0] sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_4xi32_perm_mask0:
@@ -7741,7 +7741,7 @@ define <4 x i32> @test_masked_4xi32_perm
 ; GENERIC-LABEL: test_masked_4xi32_perm_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpshufd {{.*#+}} xmm1 {%k1} = xmm0[1,0,2,0] sched: [1:1.00]
+; GENERIC-NEXT:    vpshufd {{.*#+}} xmm1 {%k1} = xmm0[1,0,2,0] sched: [1:0.50]
 ; GENERIC-NEXT:    vmovdqa %xmm1, %xmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -7761,7 +7761,7 @@ define <4 x i32> @test_masked_z_4xi32_pe
 ; GENERIC-LABEL: test_masked_z_4xi32_perm_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[1,0,2,0] sched: [1:1.00]
+; GENERIC-NEXT:    vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[1,0,2,0] sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_4xi32_perm_mask1:
@@ -7778,7 +7778,7 @@ define <4 x i32> @test_masked_4xi32_perm
 ; GENERIC-LABEL: test_masked_4xi32_perm_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpshufd {{.*#+}} xmm1 {%k1} = xmm0[3,0,1,0] sched: [1:1.00]
+; GENERIC-NEXT:    vpshufd {{.*#+}} xmm1 {%k1} = xmm0[3,0,1,0] sched: [1:0.50]
 ; GENERIC-NEXT:    vmovdqa %xmm1, %xmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -7798,7 +7798,7 @@ define <4 x i32> @test_masked_z_4xi32_pe
 ; GENERIC-LABEL: test_masked_z_4xi32_perm_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[3,0,1,0] sched: [1:1.00]
+; GENERIC-NEXT:    vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[3,0,1,0] sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_4xi32_perm_mask2:
@@ -7828,7 +7828,7 @@ define <4 x i32> @test_masked_4xi32_perm
 ; GENERIC-LABEL: test_masked_4xi32_perm_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpshufd {{.*#+}} xmm1 {%k1} = xmm0[1,1,0,3] sched: [1:1.00]
+; GENERIC-NEXT:    vpshufd {{.*#+}} xmm1 {%k1} = xmm0[1,1,0,3] sched: [1:0.50]
 ; GENERIC-NEXT:    vmovdqa %xmm1, %xmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -7848,7 +7848,7 @@ define <4 x i32> @test_masked_z_4xi32_pe
 ; GENERIC-LABEL: test_masked_z_4xi32_perm_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[1,1,0,3] sched: [1:1.00]
+; GENERIC-NEXT:    vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[1,1,0,3] sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_4xi32_perm_mask3:
@@ -7879,7 +7879,7 @@ define <4 x i32> @test_masked_4xi32_perm
 ; GENERIC-LABEL: test_masked_4xi32_perm_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpshufd {{.*#+}} xmm0 {%k1} = mem[0,1,3,3] sched: [6:1.00]
+; GENERIC-NEXT:    vpshufd {{.*#+}} xmm0 {%k1} = mem[0,1,3,3] sched: [7:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_4xi32_perm_mem_mask0:
@@ -7898,7 +7898,7 @@ define <4 x i32> @test_masked_z_4xi32_pe
 ; GENERIC-LABEL: test_masked_z_4xi32_perm_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %xmm0, %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[0,1,3,3] sched: [6:1.00]
+; GENERIC-NEXT:    vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[0,1,3,3] sched: [7:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_4xi32_perm_mem_mask0:
@@ -7917,7 +7917,7 @@ define <4 x i32> @test_masked_4xi32_perm
 ; GENERIC-LABEL: test_masked_4xi32_perm_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpshufd {{.*#+}} xmm0 {%k1} = mem[2,2,3,1] sched: [6:1.00]
+; GENERIC-NEXT:    vpshufd {{.*#+}} xmm0 {%k1} = mem[2,2,3,1] sched: [7:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_4xi32_perm_mem_mask1:
@@ -7936,7 +7936,7 @@ define <4 x i32> @test_masked_z_4xi32_pe
 ; GENERIC-LABEL: test_masked_z_4xi32_perm_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %xmm0, %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[2,2,3,1] sched: [6:1.00]
+; GENERIC-NEXT:    vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[2,2,3,1] sched: [7:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_4xi32_perm_mem_mask1:
@@ -7955,7 +7955,7 @@ define <4 x i32> @test_masked_4xi32_perm
 ; GENERIC-LABEL: test_masked_4xi32_perm_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpshufd {{.*#+}} xmm0 {%k1} = mem[0,3,0,1] sched: [6:1.00]
+; GENERIC-NEXT:    vpshufd {{.*#+}} xmm0 {%k1} = mem[0,3,0,1] sched: [7:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_4xi32_perm_mem_mask2:
@@ -7974,7 +7974,7 @@ define <4 x i32> @test_masked_z_4xi32_pe
 ; GENERIC-LABEL: test_masked_z_4xi32_perm_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %xmm0, %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[0,3,0,1] sched: [6:1.00]
+; GENERIC-NEXT:    vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[0,3,0,1] sched: [7:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_4xi32_perm_mem_mask2:
@@ -8007,7 +8007,7 @@ define <4 x i32> @test_masked_4xi32_perm
 ; GENERIC-LABEL: test_masked_4xi32_perm_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpshufd {{.*#+}} xmm0 {%k1} = mem[1,0,1,0] sched: [6:1.00]
+; GENERIC-NEXT:    vpshufd {{.*#+}} xmm0 {%k1} = mem[1,0,1,0] sched: [7:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_4xi32_perm_mem_mask3:
@@ -8026,7 +8026,7 @@ define <4 x i32> @test_masked_z_4xi32_pe
 ; GENERIC-LABEL: test_masked_z_4xi32_perm_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vptestnmd %xmm0, %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[1,0,1,0] sched: [6:1.00]
+; GENERIC-NEXT:    vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[1,0,1,0] sched: [7:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_masked_z_4xi32_perm_mem_mask3:

Modified: llvm/trunk/test/CodeGen/X86/xop-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/xop-schedule.ll?rev=331999&r1=331998&r2=331999&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/xop-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/xop-schedule.ll Thu May 10 10:06:09 2018
@@ -101,9 +101,9 @@ define void @test_vpcmov_128(<2 x i64> %
 ; GENERIC-LABEL: test_vpcmov_128:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    #APP
-; GENERIC-NEXT:    vpcmov %xmm2, %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vpcmov (%rdi), %xmm1, %xmm0, %xmm0 # sched: [6:1.00]
-; GENERIC-NEXT:    vpcmov %xmm2, (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
+; GENERIC-NEXT:    vpcmov %xmm2, %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT:    vpcmov (%rdi), %xmm1, %xmm0, %xmm0 # sched: [7:0.50]
+; GENERIC-NEXT:    vpcmov %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;




More information about the llvm-commits mailing list