[llvm] r331445 - [X86] Split WriteVecIMul/WriteVecPMULLD/WriteMPSAD/WritePSADBW into XMM and YMM/ZMM scheduler classes

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Thu May 3 03:31:20 PDT 2018


Author: rksimon
Date: Thu May  3 03:31:20 2018
New Revision: 331445

URL: http://llvm.org/viewvc/llvm-project?rev=331445&view=rev
Log:
[X86] Split WriteVecIMul/WriteVecPMULLD/WriteMPSAD/WritePSADBW into XMM and YMM/ZMM scheduler classes

Also retagged VDBPSADBW instructions as SchedWritePSADBW instead of SchedWriteVecIMul which matches the behaviour on SkylakeServer (the only thing that supports it...)

Modified:
    llvm/trunk/lib/Target/X86/X86InstrAVX512.td
    llvm/trunk/lib/Target/X86/X86SchedBroadwell.td
    llvm/trunk/lib/Target/X86/X86SchedHaswell.td
    llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td
    llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td
    llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td
    llvm/trunk/lib/Target/X86/X86Schedule.td
    llvm/trunk/lib/Target/X86/X86ScheduleAtom.td
    llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td
    llvm/trunk/lib/Target/X86/X86ScheduleSLM.td
    llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td
    llvm/trunk/test/CodeGen/X86/3dnow-schedule.ll
    llvm/trunk/test/CodeGen/X86/avx2-schedule.ll
    llvm/trunk/test/CodeGen/X86/sha-schedule.ll
    llvm/trunk/test/CodeGen/X86/xop-schedule.ll

Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=331445&r1=331444&r2=331445&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Thu May  3 03:31:20 2018
@@ -9711,7 +9711,7 @@ let Predicates = [HasVLX, HasBWI] in {
 }
 
 defm VDBPSADBW: avx512_common_3Op_rm_imm8<0x42, X86dbpsadbw, "vdbpsadbw",
-                SchedWriteVecIMul, avx512vl_i16_info, avx512vl_i8_info>,
+                SchedWritePSADBW, avx512vl_i16_info, avx512vl_i8_info>,
                 EVEX_CD8<8, CD8VF>;
 
 multiclass avx512_unary_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,

Modified: llvm/trunk/lib/Target/X86/X86SchedBroadwell.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedBroadwell.td?rev=331445&r1=331444&r2=331445&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedBroadwell.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedBroadwell.td Thu May  3 03:31:20 2018
@@ -203,8 +203,10 @@ defm : BWWriteResPair<WriteVecALU,   [BW
 defm : BWWriteResPair<WriteVecLogic, [BWPort015], 1, [1], 1, 5>; // Vector integer and/or/xor.
 defm : BWWriteResPair<WriteVecLogicY,[BWPort015], 1, [1], 1, 6>; // Vector integer and/or/xor (YMM/ZMM).
 defm : BWWriteResPair<WriteVecShift, [BWPort0],  1>; // Vector integer shifts.
-defm : BWWriteResPair<WriteVecIMul,  [BWPort0],   5>; // Vector integer multiply.
-defm : BWWriteResPair<WritePMULLD,   [BWPort0], 10, [2], 2, 5>; // PMULLD
+defm : BWWriteResPair<WriteVecIMul,  [BWPort0],  5, [1], 1, 5>; // Vector integer multiply.
+defm : BWWriteResPair<WriteVecIMulY, [BWPort0],  5, [1], 1, 6>; // Vector integer multiply.
+defm : BWWriteResPair<WritePMULLD,   [BWPort0], 10, [2], 2, 5>; // Vector PMULLD.
+defm : BWWriteResPair<WritePMULLDY,  [BWPort0], 10, [2], 2, 6>; // Vector PMULLD (YMM/ZMM).
 defm : BWWriteResPair<WriteShuffle,  [BWPort5], 1, [1], 1, 5>; // Vector shuffles.
 defm : BWWriteResPair<WriteShuffleY, [BWPort5], 1, [1], 1, 6>; // Vector shuffles (YMM/ZMM).
 defm : BWWriteResPair<WriteVarShuffle, [BWPort5], 1, [1], 1, 5>; // Vector variable shuffles.
@@ -214,8 +216,10 @@ defm : BWWriteResPair<WriteBlendY, [BWPo
 defm : BWWriteResPair<WriteVarBlend,  [BWPort5], 2, [2], 2, 5>; // Vector variable blends.
 defm : BWWriteResPair<WriteVarBlendY, [BWPort5], 2, [2], 2, 6>; // Vector variable blends (YMM/ZMM).
 defm : BWWriteResPair<WriteMPSAD,  [BWPort0, BWPort5], 7, [1, 2], 3, 5>; // Vector MPSAD.
-defm : BWWriteResPair<WritePSADBW,   [BWPort0],   5>; // Vector PSADBW.
-defm : BWWriteResPair<WritePHMINPOS, [BWPort0],   5>; // Vector PHMINPOS.
+defm : BWWriteResPair<WriteMPSADY, [BWPort0, BWPort5], 7, [1, 2], 3, 6>; // Vector MPSAD.
+defm : BWWriteResPair<WritePSADBW,   [BWPort0], 5, [1], 1, 5>; // Vector PSADBW.
+defm : BWWriteResPair<WritePSADBWY,  [BWPort0], 5, [1], 1, 6>; // Vector PSADBW (YMM/ZMM).
+defm : BWWriteResPair<WritePHMINPOS, [BWPort0], 5>; // Vector PHMINPOS.
 
 // Vector insert/extract operations.
 def : WriteRes<WriteVecInsert, [BWPort5]> {
@@ -1504,16 +1508,7 @@ def BWWriteResGroup123 : SchedWriteRes<[
   let ResourceCycles = [1,1];
 }
 def: InstRW<[BWWriteResGroup123], (instregex "MUL_F(32|64)m",
-                                             "VPCMPGTQYrm",
-                                             "VPMADDUBSWYrm",
-                                             "VPMADDWDYrm",
-                                             "VPMULDQYrm",
-                                             "VPMULHRSWYrm",
-                                             "VPMULHUWYrm",
-                                             "VPMULHWYrm",
-                                             "VPMULLWYrm",
-                                             "VPMULUDQYrm",
-                                             "VPSADBWYrm")>;
+                                             "VPCMPGTQYrm")>;
 
 def BWWriteResGroup126 : SchedWriteRes<[BWPort0,BWPort015]> {
   let Latency = 11;
@@ -1594,13 +1589,6 @@ def BWWriteResGroup137_1 : SchedWriteRes
 }
 def: InstRW<[BWWriteResGroup137_1], (instregex "(V?)SQRTSSr")>;
 
-def BWWriteResGroup138 : SchedWriteRes<[BWPort0,BWPort5,BWPort23]> {
-  let Latency = 13;
-  let NumMicroOps = 4;
-  let ResourceCycles = [1,2,1];
-}
-def: InstRW<[BWWriteResGroup138], (instregex "VMPSADBWYrmi")>;
-
 def BWWriteResGroup139 : SchedWriteRes<[BWPort0,BWFPDivider]> {
   let Latency = 14;
   let NumMicroOps = 1;
@@ -1681,13 +1669,6 @@ def BWWriteResGroup150 : SchedWriteRes<[
 def: InstRW<[BWWriteResGroup150], (instregex "(V?)DIVPSrm",
                                              "(V?)DIVSSrm")>;
 
-def BWWriteResGroup151 : SchedWriteRes<[BWPort0,BWPort23]> {
-  let Latency = 16;
-  let NumMicroOps = 3;
-  let ResourceCycles = [2,1];
-}
-def: InstRW<[BWWriteResGroup151], (instregex "VPMULLDYrm")>;
-
 def BWWriteResGroup153 : SchedWriteRes<[BWPort4,BWPort23,BWPort237,BWPort06,BWPort15,BWPort0156]> {
   let Latency = 16;
   let NumMicroOps = 14;

Modified: llvm/trunk/lib/Target/X86/X86SchedHaswell.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedHaswell.td?rev=331445&r1=331444&r2=331445&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedHaswell.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedHaswell.td Thu May  3 03:31:20 2018
@@ -198,8 +198,10 @@ defm : HWWriteResPair<WriteVecShift, [HW
 defm : HWWriteResPair<WriteVecLogic, [HWPort015], 1, [1], 1, 6>;
 defm : HWWriteResPair<WriteVecLogicY,[HWPort015], 1, [1], 1, 7>;
 defm : HWWriteResPair<WriteVecALU,   [HWPort15],  1>;
-defm : HWWriteResPair<WriteVecIMul,  [HWPort0],   5>;
+defm : HWWriteResPair<WriteVecIMul,  [HWPort0],  5, [1], 1, 6>;
+defm : HWWriteResPair<WriteVecIMulY, [HWPort0],  5, [1], 1, 7>;
 defm : HWWriteResPair<WritePMULLD,   [HWPort0], 10, [2], 2, 6>;
+defm : HWWriteResPair<WritePMULLDY,  [HWPort0], 10, [2], 2, 7>;
 defm : HWWriteResPair<WriteShuffle,  [HWPort5],  1, [1], 1, 5>;
 defm : HWWriteResPair<WriteShuffleY, [HWPort5],  1, [1], 1, 7>;
 defm : HWWriteResPair<WriteVarShuffle, [HWPort5], 1, [1], 1, 6>;
@@ -212,7 +214,9 @@ defm : HWWriteResPair<WriteVarBlend,  [H
 defm : HWWriteResPair<WriteVarBlendY, [HWPort5], 2, [2], 2, 7>;
 defm : HWWriteResPair<WriteVarVecShift,  [HWPort0, HWPort5], 2, [2, 1]>;
 defm : HWWriteResPair<WriteMPSAD,  [HWPort0, HWPort5], 7, [1, 2], 3, 6>;
-defm : HWWriteResPair<WritePSADBW, [HWPort0], 5>;
+defm : HWWriteResPair<WriteMPSADY, [HWPort0, HWPort5], 7, [1, 2], 3, 7>;
+defm : HWWriteResPair<WritePSADBW,  [HWPort0], 5, [1], 1, 6>;
+defm : HWWriteResPair<WritePSADBWY, [HWPort0], 5, [1], 1, 7>;
 defm : HWWriteResPair<WritePHMINPOS, [HWPort0],  5, [1], 1, 6>;
 
 // Vector insert/extract operations.
@@ -1799,15 +1803,6 @@ def HWWriteResGroup91_2 : SchedWriteRes<
   let ResourceCycles = [1,1];
 }
 def: InstRW<[HWWriteResGroup91_2], (instregex "(V?)PCMPGTQrm",
-                                              "(V?)PMADDUBSWrm",
-                                              "(V?)PMADDWDrm",
-                                              "(V?)PMULDQrm",
-                                              "(V?)PMULHRSWrm",
-                                              "(V?)PMULHUWrm",
-                                              "(V?)PMULHWrm",
-                                              "(V?)PMULLWrm",
-                                              "(V?)PMULUDQrm",
-                                              "(V?)PSADBWrm",
                                               "(V?)RCPPSm",
                                               "(V?)RSQRTPSm")>;
 
@@ -1817,16 +1812,21 @@ def HWWriteResGroup91_3 : SchedWriteRes<
   let ResourceCycles = [1,1];
 }
 def: InstRW<[HWWriteResGroup91_3], (instregex "MUL_F(32|64)m",
-                                              "VPCMPGTQYrm",
-                                              "VPMADDUBSWYrm",
-                                              "VPMADDWDYrm",
-                                              "VPMULDQYrm",
-                                              "VPMULHRSWYrm",
-                                              "VPMULHUWYrm",
-                                              "VPMULHWYrm",
-                                              "VPMULLWYrm",
-                                              "VPMULUDQYrm",
-                                              "VPSADBWYrm")>;
+                                              "VPCMPGTQYrm")>;
+
+def HWWriteResGroup91_5 : SchedWriteRes<[HWPort0,HWPort23]> {
+  let Latency = 10;
+  let NumMicroOps = 2;
+  let ResourceCycles = [1,1];
+}
+def: InstRW<[HWWriteResGroup91_5], (instregex "MMX_PMADDUBSWrm",
+                                              "MMX_PMADDWDirm",
+                                              "MMX_PMULHRSWrm",
+                                              "MMX_PMULHUWirm",
+                                              "MMX_PMULHWirm",
+                                              "MMX_PMULLWirm",
+                                              "MMX_PMULUDQirm",
+                                              "MMX_PSADBWirm")>;
 
 def HWWriteResGroup92_2 : SchedWriteRes<[HWPort01,HWPort23]> {
   let Latency = 10;
@@ -1966,13 +1966,6 @@ def HWWriteResGroup109 : SchedWriteRes<[
 def: InstRW<[HWWriteResGroup109], (instregex "SHLD(16|32|64)mrCL",
                                              "SHRD(16|32|64)mrCL")>;
 
-def HWWriteResGroup113_1 : SchedWriteRes<[HWPort0,HWPort5,HWPort23]> {
-  let Latency = 14;
-  let NumMicroOps = 4;
-  let ResourceCycles = [1,2,1];
-}
-def: InstRW<[HWWriteResGroup113_1], (instregex "VMPSADBWYrmi")>;
-
 def HWWriteResGroup114 : SchedWriteRes<[HWPort6,HWPort06,HWPort15,HWPort0156]> {
   let Latency = 7;
   let NumMicroOps = 7;
@@ -2001,13 +1994,6 @@ def HWWriteResGroup117 : SchedWriteRes<[
 }
 def: InstRW<[HWWriteResGroup117], (instregex "(V?)DPPDrmi")>;
 
-def HWWriteResGroup119_1 : SchedWriteRes<[HWPort0,HWPort23]> {
-  let Latency = 17;
-  let NumMicroOps = 3;
-  let ResourceCycles = [2,1];
-}
-def: InstRW<[HWWriteResGroup119_1], (instregex "VPMULLDYrm")>;
-
 def HWWriteResGroup120 : SchedWriteRes<[HWPort1,HWPort23,HWPort237,HWPort06,HWPort15,HWPort0156]> {
   let Latency = 16;
   let NumMicroOps = 10;

Modified: llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td?rev=331445&r1=331444&r2=331445&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td Thu May  3 03:31:20 2018
@@ -178,8 +178,10 @@ defm : SBWriteResPair<WriteVecShift, [SB
 defm : SBWriteResPair<WriteVecLogic, [SBPort015], 1, [1], 1, 6>;
 defm : SBWriteResPair<WriteVecLogicY,[SBPort015], 1, [1], 1, 7>;
 defm : SBWriteResPair<WriteVecALU,   [SBPort1], 3>;
-defm : SBWriteResPair<WriteVecIMul,  [SBPort0], 5>;
-defm : SBWriteResPair<WritePMULLD,   [SBPort0], 5, [1], 1, 6>; // TODO this is probably wrong for 256/512-bit for the "generic" model
+defm : SBWriteResPair<WriteVecIMul,  [SBPort0], 5, [1], 1, 6>;
+defm : SBWriteResPair<WriteVecIMulY, [SBPort0], 5, [1], 1, 7>;
+defm : SBWriteResPair<WritePMULLD,   [SBPort0], 5, [1], 1, 6>;
+defm : SBWriteResPair<WritePMULLDY,  [SBPort0], 5, [1], 1, 7>; // TODO this is probably wrong for 256/512-bit for the "generic" model
 defm : SBWriteResPair<WriteShuffle,  [SBPort5], 1, [1], 1, 5>;
 defm : SBWriteResPair<WriteShuffleY, [SBPort5], 1, [1], 1, 7>;
 defm : SBWriteResPair<WriteVarShuffle,  [SBPort15], 1, [1], 1, 6>;
@@ -188,8 +190,10 @@ defm : SBWriteResPair<WriteBlend,   [SBP
 defm : SBWriteResPair<WriteBlendY,  [SBPort15], 1, [1], 1, 7>;
 defm : SBWriteResPair<WriteVarBlend, [SBPort15], 2, [2], 2, 6>;
 defm : SBWriteResPair<WriteVarBlendY,[SBPort15], 2, [2], 2, 7>;
-defm : SBWriteResPair<WriteMPSAD, [SBPort0, SBPort15], 7, [1,2], 3, 6>;
-defm : SBWriteResPair<WritePSADBW,  [SBPort0], 5>;
+defm : SBWriteResPair<WriteMPSAD,  [SBPort0, SBPort15], 7, [1,2], 3, 6>;
+defm : SBWriteResPair<WriteMPSADY, [SBPort0, SBPort15], 7, [1,2], 3, 7>;
+defm : SBWriteResPair<WritePSADBW,  [SBPort0], 5, [1], 1, 6>;
+defm : SBWriteResPair<WritePSADBWY, [SBPort0], 5, [1], 1, 7>;
 defm : SBWriteResPair<WritePHMINPOS,  [SBPort0], 5, [1], 1, 6>;
 
 // Vector insert/extract operations.
@@ -1227,21 +1231,6 @@ def SBWriteResGroup88 : SchedWriteRes<[S
 def: InstRW<[SBWriteResGroup88], (instregex "SHLD(16|32|64)mri8",
                                             "SHRD(16|32|64)mri8")>;
 
-def SBWriteResGroup89 : SchedWriteRes<[SBPort0,SBPort23]> {
-  let Latency = 11;
-  let NumMicroOps = 2;
-  let ResourceCycles = [1,1];
-}
-def: InstRW<[SBWriteResGroup89], (instregex "(V?)PMADDUBSWrm",
-                                            "(V?)PMADDWDrm",
-                                            "(V?)PMULDQrm",
-                                            "(V?)PMULHRSWrm",
-                                            "(V?)PMULHUWrm",
-                                            "(V?)PMULHWrm",
-                                            "(V?)PMULLWrm",
-                                            "(V?)PMULUDQrm",
-                                            "(V?)PSADBWrm")>;
-
 def SBWriteResGroup89_2 : SchedWriteRes<[SBPort0,SBPort23]> {
   let Latency = 10;
   let NumMicroOps = 2;

Modified: llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td?rev=331445&r1=331444&r2=331445&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td Thu May  3 03:31:20 2018
@@ -199,8 +199,10 @@ defm : SKLWriteResPair<WriteVecALU,   [S
 defm : SKLWriteResPair<WriteVecLogic, [SKLPort015], 1, [1], 1, 6>; // Vector integer and/or/xor.
 defm : SKLWriteResPair<WriteVecLogicY,[SKLPort015], 1, [1], 1, 7>; // Vector integer and/or/xor (YMM/ZMM).
 defm : SKLWriteResPair<WriteVecShift, [SKLPort0],  1>; // Vector integer shifts.
-defm : SKLWriteResPair<WriteVecIMul,  [SKLPort0],   5>; // Vector integer multiply.
-defm : SKLWriteResPair<WritePMULLD,   [SKLPort01], 10, [2], 2, 6>;
+defm : SKLWriteResPair<WriteVecIMul,  [SKLPort01],  4, [1], 1, 6>; // Vector integer multiply.
+defm : SKLWriteResPair<WriteVecIMulY, [SKLPort01],  4, [1], 1, 7>; // Vector integer multiply (YMM/ZMM).
+defm : SKLWriteResPair<WritePMULLD,   [SKLPort01], 10, [2], 2, 6>; // Vector PMULLD.
+defm : SKLWriteResPair<WritePMULLDY,  [SKLPort01], 10, [2], 2, 7>; // Vector PMULLD (YMM/ZMM).
 defm : SKLWriteResPair<WriteShuffle,  [SKLPort5], 1, [1], 1, 5>; // Vector shuffles.
 defm : SKLWriteResPair<WriteShuffleY, [SKLPort5], 1, [1], 1, 7>; // Vector shuffles (YMM/ZMM).
 defm : SKLWriteResPair<WriteVarShuffle,  [SKLPort5], 1, [1], 1, 6>; // Vector shuffles.
@@ -210,7 +212,9 @@ defm : SKLWriteResPair<WriteBlendY, [SKL
 defm : SKLWriteResPair<WriteVarBlend,  [SKLPort015], 2, [2], 2, 6>; // Vector variable blends.
 defm : SKLWriteResPair<WriteVarBlendY, [SKLPort015], 2, [2], 2, 6>; // Vector variable blends (YMM/ZMM).
 defm : SKLWriteResPair<WriteMPSAD,  [SKLPort5], 4, [2], 2, 6>; // Vector MPSAD.
-defm : SKLWriteResPair<WritePSADBW, [SKLPort5], 3>; // Vector PSADBW.
+defm : SKLWriteResPair<WriteMPSADY, [SKLPort5], 4, [2], 2, 7>; // Vector MPSAD.
+defm : SKLWriteResPair<WritePSADBW,  [SKLPort5], 3, [1], 1, 6>; // Vector PSADBW.
+defm : SKLWriteResPair<WritePSADBWY, [SKLPort5], 3, [1], 1, 7>; // Vector PSADBW.
 defm : SKLWriteResPair<WritePHMINPOS, [SKLPort01], 4, [1], 1, 6>; // Vector PHMINPOS.
 
 // Vector insert/extract operations.
@@ -918,15 +922,7 @@ def SKLWriteResGroup48 : SchedWriteRes<[
 }
 def: InstRW<[SKLWriteResGroup48], (instregex "(V?)CVTDQ2PS(Y?)rr",
                                              "(V?)CVTPS2DQ(Y?)rr",
-                                             "(V?)CVTTPS2DQ(Y?)rr",
-                                             "(V?)PMADDUBSW(Y?)rr",
-                                             "(V?)PMADDWD(Y?)rr",
-                                             "(V?)PMULDQ(Y?)rr",
-                                             "(V?)PMULHRSW(Y?)rr",
-                                             "(V?)PMULHUW(Y?)rr",
-                                             "(V?)PMULHW(Y?)rr",
-                                             "(V?)PMULLW(Y?)rr",
-                                             "(V?)PMULUDQ(Y?)rr")>;
+                                             "(V?)CVTTPS2DQ(Y?)rr")>;
 
 def SKLWriteResGroup51 : SchedWriteRes<[SKLPort1,SKLPort5]> {
   let Latency = 4;
@@ -1506,6 +1502,7 @@ def: InstRW<[SKLWriteResGroup108], (inst
                                               "FCOM64m",
                                               "FCOMP32m",
                                               "FCOMP64m",
+                                              "MMX_PSADBWirm", // TODO - SKLWriteResGroup120??
                                               "VPBROADCASTBYrm",
                                               "VPBROADCASTWYrm",
                                               "VPMOVSXBDYrm",
@@ -1669,8 +1666,7 @@ def: InstRW<[SKLWriteResGroup121], (inst
                                               "VPMOVSXBWYrm",
                                               "VPMOVSXDQYrm",
                                               "VPMOVSXWDYrm",
-                                              "VPMOVZXWDYrm",
-                                              "(V?)PSADBWrm")>;
+                                              "VPMOVZXWDYrm")>;
 
 def SKLWriteResGroup122 : SchedWriteRes<[SKLPort01,SKLPort23]> {
   let Latency = 9;
@@ -1775,8 +1771,7 @@ def: InstRW<[SKLWriteResGroup133], (inst
                                               "VPMOVZXBQYrm",
                                               "VPMOVZXBWYrm",
                                               "VPMOVZXDQYrm",
-                                              "VPMOVZXWQYrm",
-                                              "VPSADBWYrm")>;
+                                              "VPMOVZXWQYrm")>;
 
 def SKLWriteResGroup134 : SchedWriteRes<[SKLPort01,SKLPort23]> {
   let Latency = 10;
@@ -1787,15 +1782,7 @@ def: InstRW<[SKLWriteResGroup134], (inst
                                               "(V?)CVTPH2PSYrm",
                                               "(V?)CVTPS2DQrm",
                                               "(V?)CVTSS2SDrm",
-                                              "(V?)CVTTPS2DQrm",
-                                              "(V?)PMADDUBSWrm",
-                                              "(V?)PMADDWDrm",
-                                              "(V?)PMULDQrm",
-                                              "(V?)PMULHRSWrm",
-                                              "(V?)PMULHUWrm",
-                                              "(V?)PMULHWrm",
-                                              "(V?)PMULLWrm",
-                                              "(V?)PMULUDQrm")>;
+                                              "(V?)CVTTPS2DQrm")>;
 
 def SKLWriteResGroup138 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> {
   let Latency = 10;
@@ -1883,15 +1870,7 @@ def SKLWriteResGroup147 : SchedWriteRes<
 def: InstRW<[SKLWriteResGroup147], (instregex "VCVTDQ2PSYrm",
                                               "VCVTPS2DQYrm",
                                               "VCVTPS2PDYrm",
-                                              "VCVTTPS2DQYrm",
-                                              "VPMADDUBSWYrm",
-                                              "VPMADDWDYrm",
-                                              "VPMULDQYrm",
-                                              "VPMULHRSWYrm",
-                                              "VPMULHUWYrm",
-                                              "VPMULHWYrm",
-                                              "VPMULLWYrm",
-                                              "VPMULUDQYrm")>;
+                                              "VCVTTPS2DQYrm")>;
 
 def SKLWriteResGroup149 : SchedWriteRes<[SKLPort5,SKLPort23]> {
   let Latency = 11;
@@ -1901,8 +1880,7 @@ def SKLWriteResGroup149 : SchedWriteRes<
 def: InstRW<[SKLWriteResGroup149], (instregex "FICOM16m",
                                               "FICOM32m",
                                               "FICOMP16m",
-                                              "FICOMP32m",
-                                              "VMPSADBWYrmi")>;
+                                              "FICOMP32m")>;
 
 def SKLWriteResGroup150 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> {
   let Latency = 11;
@@ -2065,13 +2043,6 @@ def SKLWriteResGroup172 : SchedWriteRes<
 def: InstRW<[SKLWriteResGroup172], (instregex "VROUNDPDYm",
                                               "VROUNDPSYm")>;
 
-def SKLWriteResGroup172_2 : SchedWriteRes<[SKLPort23,SKLPort01]> {
-  let Latency = 17;
-  let NumMicroOps = 3;
-  let ResourceCycles = [1,2];
-}
-def: InstRW<[SKLWriteResGroup172_2], (instregex "VPMULLDYrm")>;
-
 def SKLWriteResGroup173 : SchedWriteRes<[SKLPort5,SKLPort23,SKLPort01]> {
   let Latency = 15;
   let NumMicroOps = 4;

Modified: llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td?rev=331445&r1=331444&r2=331445&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td Thu May  3 03:31:20 2018
@@ -199,8 +199,10 @@ defm : SKXWriteResPair<WriteVecALU,   [S
 defm : SKXWriteResPair<WriteVecLogic, [SKXPort015], 1, [1], 1, 6>; // Vector integer and/or/xor.
 defm : SKXWriteResPair<WriteVecLogicY,[SKXPort015], 1, [1], 1, 7>; // Vector integer and/or/xor (YMM/ZMM).
 defm : SKXWriteResPair<WriteVecShift, [SKXPort0],  1>; // Vector integer shifts.
-defm : SKXWriteResPair<WriteVecIMul,  [SKXPort0],   5>; // Vector integer multiply.
-defm : SKXWriteResPair<WritePMULLD,   [SKXPort015], 10, [2], 2, 6>; // Vector integer multiply.
+defm : SKXWriteResPair<WriteVecIMul,  [SKXPort015],  4, [1], 1, 6>; // Vector integer multiply.
+defm : SKXWriteResPair<WriteVecIMulY, [SKXPort015],  4, [1], 1, 7>; // Vector integer multiply (YMM/ZMM).
+defm : SKXWriteResPair<WritePMULLD,   [SKXPort015], 10, [2], 2, 6>; // Vector PMULLD.
+defm : SKXWriteResPair<WritePMULLDY,  [SKXPort015], 10, [2], 2, 7>; // Vector PMULLD (YMM/ZMM).
 defm : SKXWriteResPair<WriteShuffle,  [SKXPort5], 1, [1], 1, 5>; // Vector shuffles.
 defm : SKXWriteResPair<WriteShuffleY, [SKXPort5], 1, [1], 1, 7>; // Vector shuffles (YMM/ZMM).
 defm : SKXWriteResPair<WriteVarShuffle,  [SKXPort5], 1, [1], 1, 6>; // Vector variable shuffles.
@@ -209,8 +211,10 @@ defm : SKXWriteResPair<WriteBlend, [SKXP
 defm : SKXWriteResPair<WriteBlendY,[SKXPort5], 1, [1], 1, 7>; // Vector blends (YMM/ZMM).
 defm : SKXWriteResPair<WriteVarBlend, [SKXPort015], 2, [2], 2, 6>; // Vector variable blends.
 defm : SKXWriteResPair<WriteVarBlendY,[SKXPort015], 2, [2], 2, 6>; // Vector variable blends (YMM/ZMM).
-defm : SKXWriteResPair<WriteMPSAD,  [SKXPort5], 4, [2], 2, 6>; // Vector MPSAD.
-defm : SKXWriteResPair<WritePSADBW, [SKXPort5], 3, [1], 1, 6>; // Vector PSADBW.
+defm : SKXWriteResPair<WriteMPSAD,   [SKXPort5], 4, [2], 2, 6>; // Vector MPSAD.
+defm : SKXWriteResPair<WriteMPSADY,  [SKXPort5], 4, [2], 2, 7>; // Vector MPSAD.
+defm : SKXWriteResPair<WritePSADBW,  [SKXPort5], 3, [1], 1, 6>; // Vector PSADBW.
+defm : SKXWriteResPair<WritePSADBWY, [SKXPort5], 3, [1], 1, 7>; // Vector PSADBW.
 defm : SKXWriteResPair<WritePHMINPOS, [SKXPort015], 4, [1], 1, 6>; // Vector PHMINPOS.
 
 // Vector insert/extract operations.
@@ -1214,9 +1218,7 @@ def: InstRW<[SKXWriteResGroup32], (instr
                                              "VCMPPSZrri",
                                              "VCMPSDZrr",
                                              "VCMPSSZrr",
-                                             "VDBPSADBWZ128rri",
-                                             "VDBPSADBWZ256rri",
-                                             "VDBPSADBWZrri",
+                                             "VDBPSADBWZrri", // TODO: 512-bit ops require ports 0/1 to be joined.
                                              "VFPCLASSPDZ128rr",
                                              "VFPCLASSPDZ256rr",
                                              "VFPCLASSPDZrr",
@@ -1518,47 +1520,7 @@ def: InstRW<[SKXWriteResGroup50], (instr
                                              "VPLZCNTDZrr",
                                              "VPLZCNTQZ128rr",
                                              "VPLZCNTQZ256rr",
-                                             "VPLZCNTQZrr",
-                                             "VPMADDUBSWYrr",
-                                             "VPMADDUBSWZ128rr",
-                                             "VPMADDUBSWZ256rr",
-                                             "VPMADDUBSWZrr",
-                                             "(V?)PMADDUBSWrr",
-                                             "VPMADDWDYrr",
-                                             "VPMADDWDZ128rr",
-                                             "VPMADDWDZ256rr",
-                                             "VPMADDWDZrr",
-                                             "(V?)PMADDWDrr",
-                                             "VPMULDQYrr",
-                                             "VPMULDQZ128rr",
-                                             "VPMULDQZ256rr",
-                                             "VPMULDQZrr",
-                                             "(V?)PMULDQrr",
-                                             "VPMULHRSWYrr",
-                                             "VPMULHRSWZ128rr",
-                                             "VPMULHRSWZ256rr",
-                                             "VPMULHRSWZrr",
-                                             "(V?)PMULHRSWrr",
-                                             "VPMULHUWYrr",
-                                             "VPMULHUWZ128rr",
-                                             "VPMULHUWZ256rr",
-                                             "VPMULHUWZrr",
-                                             "(V?)PMULHUWrr",
-                                             "VPMULHWYrr",
-                                             "VPMULHWZ128rr",
-                                             "VPMULHWZ256rr",
-                                             "VPMULHWZrr",
-                                             "(V?)PMULHWrr",
-                                             "VPMULLWYrr",
-                                             "VPMULLWZ128rr",
-                                             "VPMULLWZ256rr",
-                                             "VPMULLWZrr",
-                                             "(V?)PMULLWrr",
-                                             "VPMULUDQYrr",
-                                             "VPMULUDQZ128rr",
-                                             "VPMULUDQZ256rr",
-                                             "VPMULUDQZrr",
-                                             "(V?)PMULUDQrr")>;
+                                             "VPLZCNTQZrr")>;
 
 def SKXWriteResGroup51 : SchedWriteRes<[SKXPort5]> {
   let Latency = 4;
@@ -3060,7 +3022,6 @@ def: InstRW<[SKXWriteResGroup136], (inst
                                               "VCMPPSZ128rm(b?)i",
                                               "VCMPSDZrm",
                                               "VCMPSSZrm",
-                                              "VDBPSADBWZ128rmi(b?)",
                                               "VFPCLASSSSrm(b?)",
                                               "VPCMPBZ128rmi(b?)",
                                               "VPCMPDZ128rmi(b?)",
@@ -3107,7 +3068,6 @@ def: InstRW<[SKXWriteResGroup136], (inst
                                               "VPMOVZXWDYrm",
                                               "VPMOVZXWDZ128rm(b?)",
                                               "VPMOVZXWQZ128rm(b?)",
-                                              "VPSADBWZ128rm(b?)",
                                               "VPTESTMBZ128rm(b?)",
                                               "VPTESTMDZ128rm(b?)",
                                               "VPTESTMQZ128rm(b?)",
@@ -3219,8 +3179,6 @@ def: InstRW<[SKXWriteResGroup148], (inst
                                               "VCMPPDZrm(b?)i",
                                               "VCMPPSZ256rm(b?)i",
                                               "VCMPPSZrm(b?)i",
-                                              "VDBPSADBWZ256rmi(b?)",
-                                              "VDBPSADBWZrmi(b?)",
                                               "VPCMPBZ256rmi(b?)",
                                               "VPCMPBZrmi(b?)",
                                               "VPCMPDZ256rmi(b?)",
@@ -3267,9 +3225,6 @@ def: InstRW<[SKXWriteResGroup148], (inst
                                               "VPMOVZXBWYrm",
                                               "VPMOVZXDQYrm",
                                               "VPMOVZXWQYrm",
-                                              "VPSADBWYrm",
-                                              "VPSADBWZ256rm(b?)",
-                                              "VPSADBWZrm(b?)",
                                               "VPTESTMBZ256rm(b?)",
                                               "VPTESTMBZrm(b?)",
                                               "VPTESTMDZ256rm(b?)",
@@ -3296,14 +3251,6 @@ def: InstRW<[SKXWriteResGroup149], (inst
                                               "CVTPS2DQrm",
                                               "CVTSS2SDrm",
                                               "CVTTPS2DQrm",
-                                              "PMADDUBSWrm",
-                                              "PMADDWDrm",
-                                              "PMULDQrm",
-                                              "PMULHRSWrm",
-                                              "PMULHUWrm",
-                                              "PMULHWrm",
-                                              "PMULLWrm",
-                                              "PMULUDQrm",
                                               "VCVTDQ2PDZ128rm(b?)",
                                               "VCVTDQ2PSZ128rm(b?)",
                                               "VCVTDQ2PSrm",
@@ -3333,23 +3280,7 @@ def: InstRW<[SKXWriteResGroup149], (inst
                                               "VCVTUQQ2PDZ128rm(b?)",
                                               "VCVTUQQ2PSZ128rm(b?)",
                                               "VPLZCNTDZ128rm(b?)",
-                                              "VPLZCNTQZ128rm(b?)",
-                                              "VPMADDUBSWZ128rm(b?)",
-                                              "VPMADDUBSWrm",
-                                              "VPMADDWDZ128rm(b?)",
-                                              "VPMADDWDrm",
-                                              "VPMULDQZ128rm(b?)",
-                                              "VPMULDQrm",
-                                              "VPMULHRSWZ128rm(b?)",
-                                              "VPMULHRSWrm",
-                                              "VPMULHUWZ128rm(b?)",
-                                              "VPMULHUWrm",
-                                              "VPMULHWZ128rm(b?)",
-                                              "VPMULHWrm",
-                                              "VPMULLWZ128rm(b?)",
-                                              "VPMULLWrm",
-                                              "VPMULUDQZ128rm(b?)",
-                                              "VPMULUDQrm")>;
+                                              "VPLZCNTQZ128rm(b?)")>;
 
 def SKXWriteResGroup151 : SchedWriteRes<[SKXPort5,SKXPort23]> {
   let Latency = 10;
@@ -3487,31 +3418,7 @@ def: InstRW<[SKXWriteResGroup161], (inst
                                               "VPLZCNTDZ256rm(b?)",
                                               "VPLZCNTDZrm(b?)",
                                               "VPLZCNTQZ256rm(b?)",
-                                              "VPLZCNTQZrm(b?)",
-                                              "VPMADDUBSWYrm",
-                                              "VPMADDUBSWZ256rm(b?)",
-                                              "VPMADDUBSWZrm(b?)",
-                                              "VPMADDWDYrm",
-                                              "VPMADDWDZ256rm(b?)",
-                                              "VPMADDWDZrm(b?)",
-                                              "VPMULDQYrm",
-                                              "VPMULDQZ256rm(b?)",
-                                              "VPMULDQZrm(b?)",
-                                              "VPMULHRSWYrm",
-                                              "VPMULHRSWZ256rm(b?)",
-                                              "VPMULHRSWZrm(b?)",
-                                              "VPMULHUWYrm",
-                                              "VPMULHUWZ256rm(b?)",
-                                              "VPMULHUWZrm(b?)",
-                                              "VPMULHWYrm",
-                                              "VPMULHWZ256rm(b?)",
-                                              "VPMULHWZrm(b?)",
-                                              "VPMULLWYrm",
-                                              "VPMULLWZ256rm(b?)",
-                                              "VPMULLWZrm(b?)",
-                                              "VPMULUDQYrm",
-                                              "VPMULUDQZ256rm(b?)",
-                                              "VPMULUDQZrm(b?)")>;
+                                              "VPLZCNTQZrm(b?)")>;
 
 def SKXWriteResGroup162 : SchedWriteRes<[SKXPort5,SKXPort23]> {
   let Latency = 11;
@@ -3526,7 +3433,6 @@ def: InstRW<[SKXWriteResGroup162], (inst
                                               "VEXPANDPDZrm(b?)",
                                               "VEXPANDPSZ256rm(b?)",
                                               "VEXPANDPSZrm(b?)",
-                                              "VMPSADBWYrmi",
                                               "VPEXPANDDZ256rm(b?)",
                                               "VPEXPANDDZrm(b?)",
                                               "VPEXPANDQZ256rm(b?)",
@@ -3805,15 +3711,6 @@ def: InstRW<[SKXWriteResGroup192], (inst
                                               "VROUNDPDYm",
                                               "VROUNDPSYm")>;
 
-def SKXWriteResGroup192_2 : SchedWriteRes<[SKXPort23,SKXPort015]> {
-  let Latency = 17;
-  let NumMicroOps = 3;
-  let ResourceCycles = [1,2];
-}
-def: InstRW<[SKXWriteResGroup192_2], (instregex "VPMULLDYrm",
-                                                "VPMULLDZ256rm(b?)",
-                                                "VPMULLDZrm(b?)")>;
-
 def SKXWriteResGroup193 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort015]> {
   let Latency = 15;
   let NumMicroOps = 4;

Modified: llvm/trunk/lib/Target/X86/X86Schedule.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Schedule.td?rev=331445&r1=331444&r2=331445&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86Schedule.td (original)
+++ llvm/trunk/lib/Target/X86/X86Schedule.td Thu May  3 03:31:20 2018
@@ -137,7 +137,9 @@ defm WriteVecLogic : X86SchedWritePair;
 defm WriteVecLogicY: X86SchedWritePair; // Vector integer and/or/xor logicals (YMM/ZMM).
 defm WriteVecShift : X86SchedWritePair; // Vector integer shifts.
 defm WriteVecIMul  : X86SchedWritePair; // Vector integer multiply.
-defm WritePMULLD : X86SchedWritePair; // PMULLD
+defm WriteVecIMulY : X86SchedWritePair; // Vector integer multiply (YMM/ZMM).
+defm WritePMULLD   : X86SchedWritePair; // Vector PMULLD.
+defm WritePMULLDY   : X86SchedWritePair; // Vector PMULLD (YMM/ZMM).
 defm WriteShuffle  : X86SchedWritePair; // Vector shuffles.
 defm WriteShuffleY : X86SchedWritePair; // Vector shuffles (YMM/ZMM).
 defm WriteVarShuffle  : X86SchedWritePair; // Vector variable shuffles.
@@ -146,8 +148,10 @@ defm WriteBlend  : X86SchedWritePair; //
 defm WriteBlendY : X86SchedWritePair; // Vector blends (YMM/ZMM).
 defm WriteVarBlend  : X86SchedWritePair; // Vector variable blends.
 defm WriteVarBlendY : X86SchedWritePair; // Vector variable blends (YMM/ZMM).
-defm WritePSADBW : X86SchedWritePair; // Vector PSADBW.
-defm WriteMPSAD : X86SchedWritePair; // Vector MPSAD.
+defm WritePSADBW  : X86SchedWritePair; // Vector PSADBW.
+defm WritePSADBWY : X86SchedWritePair; // Vector PSADBW (YMM/ZMM).
+defm WriteMPSAD  : X86SchedWritePair; // Vector MPSAD.
+defm WriteMPSADY : X86SchedWritePair; // Vector MPSAD (YMM/ZMM).
 defm WritePHMINPOS : X86SchedWritePair; // Vector PHMINPOS.
 
 // Vector insert/extract operations.
@@ -259,16 +263,16 @@ def SchedWriteVarVecShift
                        WriteVarVecShift, WriteVarVecShift>;
 def SchedWriteVecIMul
  : X86SchedWriteWidths<WriteVecIMul, WriteVecIMul,
-                       WriteVecIMul, WriteVecIMul>;
+                       WriteVecIMulY, WriteVecIMulY>;
 def SchedWritePMULLD
  : X86SchedWriteWidths<WritePMULLD, WritePMULLD,
-                       WritePMULLD, WritePMULLD>;
+                       WritePMULLDY, WritePMULLDY>;
 def SchedWriteMPSAD
  : X86SchedWriteWidths<WriteMPSAD, WriteMPSAD,
-                       WriteMPSAD, WriteMPSAD>;
+                       WriteMPSADY, WriteMPSADY>;
 def SchedWritePSADBW
  : X86SchedWriteWidths<WritePSADBW, WritePSADBW,
-                       WritePSADBW, WritePSADBW>;
+                       WritePSADBWY, WritePSADBWY>;
 
 def SchedWriteShuffle
  : X86SchedWriteWidths<WriteShuffle, WriteShuffle,

Modified: llvm/trunk/lib/Target/X86/X86ScheduleAtom.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleAtom.td?rev=331445&r1=331444&r2=331445&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleAtom.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleAtom.td Thu May  3 03:31:20 2018
@@ -256,10 +256,14 @@ defm : AtomWriteResPair<WriteVecLogic,
 defm : AtomWriteResPair<WriteVecLogicY,    [AtomPort01],  [AtomPort0], 1, 1>;
 defm : AtomWriteResPair<WriteVecShift,     [AtomPort01], [AtomPort01], 2, 3, [2], [3]>;
 defm : AtomWriteResPair<WriteVecIMul,       [AtomPort0],  [AtomPort0], 5, 5, [5], [5]>;
+defm : AtomWriteResPair<WriteVecIMulY,      [AtomPort0],  [AtomPort0], 5, 5, [5], [5]>;
 defm : AtomWriteResPair<WritePMULLD,       [AtomPort01],  [AtomPort0], 1, 1>;
+defm : AtomWriteResPair<WritePMULLDY,      [AtomPort01],  [AtomPort0], 1, 1>;
 defm : AtomWriteResPair<WritePHMINPOS,      [AtomPort0],  [AtomPort0], 5, 5, [5], [5]>;
 defm : AtomWriteResPair<WriteMPSAD,        [AtomPort01],  [AtomPort0], 1, 1>;
+defm : AtomWriteResPair<WriteMPSADY,       [AtomPort01],  [AtomPort0], 1, 1>;
 defm : AtomWriteResPair<WritePSADBW,        [AtomPort0],  [AtomPort0], 5, 5, [5], [5]>;
+defm : AtomWriteResPair<WritePSADBWY,       [AtomPort0],  [AtomPort0], 5, 5, [5], [5]>;
 defm : AtomWriteResPair<WriteShuffle,       [AtomPort0],  [AtomPort0], 1, 1>;
 defm : AtomWriteResPair<WriteShuffleY,      [AtomPort0],  [AtomPort0], 1, 1>;
 defm : AtomWriteResPair<WriteVarShuffle,   [AtomPort01], [AtomPort01], 4, 5, [4], [5]>;

Modified: llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td?rev=331445&r1=331444&r2=331445&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td Thu May  3 03:31:20 2018
@@ -406,9 +406,13 @@ def  : WriteRes<WriteVecMove,
 defm : JWriteResFpuPair<WriteVecALU,      [JFPU01, JVALU], 1>;
 defm : JWriteResFpuPair<WriteVecShift,    [JFPU01, JVALU], 1>;
 defm : JWriteResFpuPair<WriteVecIMul,     [JFPU0, JVIMUL], 2>;
+defm : JWriteResFpuPair<WriteVecIMulY,    [JFPU0, JVIMUL], 2>;
 defm : JWriteResFpuPair<WritePMULLD,      [JFPU0, JFPU01, JVIMUL, JVALU], 4, [2, 1, 2, 1], 3>;
+defm : JWriteResFpuPair<WritePMULLDY,     [JFPU0, JFPU01, JVIMUL, JVALU], 4, [2, 1, 2, 1], 3>;
 defm : JWriteResFpuPair<WriteMPSAD,       [JFPU0, JVIMUL], 3, [1, 2]>;
+defm : JWriteResFpuPair<WriteMPSADY,      [JFPU0, JVIMUL], 3, [1, 2]>;
 defm : JWriteResFpuPair<WritePSADBW,      [JFPU01, JVALU], 2>;
+defm : JWriteResFpuPair<WritePSADBWY,     [JFPU01, JVALU], 2>;
 defm : JWriteResFpuPair<WritePHMINPOS,    [JFPU0,  JVALU], 2>;
 defm : JWriteResFpuPair<WriteShuffle,     [JFPU01, JVALU], 1>;
 defm : JWriteResFpuPair<WriteShuffleY,    [JFPU01, JVALU], 1>;

Modified: llvm/trunk/lib/Target/X86/X86ScheduleSLM.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleSLM.td?rev=331445&r1=331444&r2=331445&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleSLM.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleSLM.td Thu May  3 03:31:20 2018
@@ -167,9 +167,11 @@ defm : SLMWriteResPair<WriteVecLogic, [S
 defm : SLMWriteResPair<WriteVecLogicY,[SLM_FPC_RSV01], 1>;
 defm : SLMWriteResPair<WriteVecALU,   [SLM_FPC_RSV01],  1>;
 defm : SLMWriteResPair<WriteVecIMul,  [SLM_FPC_RSV0],   4>;
+defm : SLMWriteResPair<WriteVecIMulY, [SLM_FPC_RSV0],   4>;
 // FIXME: The below is closer to correct, but caused some perf regressions.
 //defm : SLMWriteResPair<WritePMULLD,  [SLM_FPC_RSV0],   11, [11], 7>;
 defm : SLMWriteResPair<WritePMULLD,  [SLM_FPC_RSV0],   4>;
+defm : SLMWriteResPair<WritePMULLDY, [SLM_FPC_RSV0],   4>;
 defm : SLMWriteResPair<WriteShuffle,  [SLM_FPC_RSV0],  1>;
 defm : SLMWriteResPair<WriteShuffleY, [SLM_FPC_RSV0],  1>;
 defm : SLMWriteResPair<WriteVarShuffle,  [SLM_FPC_RSV0],  1>;
@@ -177,7 +179,9 @@ defm : SLMWriteResPair<WriteVarShuffleY,
 defm : SLMWriteResPair<WriteBlend,  [SLM_FPC_RSV0],  1>;
 defm : SLMWriteResPair<WriteBlendY, [SLM_FPC_RSV0],  1>;
 defm : SLMWriteResPair<WriteMPSAD,  [SLM_FPC_RSV0],  7>;
-defm : SLMWriteResPair<WritePSADBW, [SLM_FPC_RSV0],  4>;
+defm : SLMWriteResPair<WriteMPSADY, [SLM_FPC_RSV0],  7>;
+defm : SLMWriteResPair<WritePSADBW,  [SLM_FPC_RSV0],  4>;
+defm : SLMWriteResPair<WritePSADBWY, [SLM_FPC_RSV0],  4>;
 defm : SLMWriteResPair<WritePHMINPOS,  [SLM_FPC_RSV0],   4>;
 
 // Vector insert/extract operations.

Modified: llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td?rev=331445&r1=331444&r2=331445&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td Thu May  3 03:31:20 2018
@@ -240,7 +240,9 @@ defm : ZnWriteResFpuPair<WriteVecLogicY,
 defm : ZnWriteResFpuPair<WritePHAdd,      [ZnFPU],   1>;
 defm : ZnWriteResFpuPair<WriteVecALU,     [ZnFPU],   1>;
 defm : ZnWriteResFpuPair<WriteVecIMul,    [ZnFPU0],  4>;
+defm : ZnWriteResFpuPair<WriteVecIMulY,   [ZnFPU0],  4>;
 defm : ZnWriteResFpuPair<WritePMULLD,     [ZnFPU0],  4>; // FIXME
+defm : ZnWriteResFpuPair<WritePMULLDY,    [ZnFPU0],  5, [2]>; // FIXME
 defm : ZnWriteResFpuPair<WriteShuffle,    [ZnFPU],   1>;
 defm : ZnWriteResFpuPair<WriteShuffleY,   [ZnFPU],   1>;
 defm : ZnWriteResFpuPair<WriteVarShuffle, [ZnFPU],   1>;
@@ -250,6 +252,7 @@ defm : ZnWriteResFpuPair<WriteBlendY,
 defm : ZnWriteResFpuPair<WriteShuffle256, [ZnFPU],   2>;
 defm : ZnWriteResFpuPair<WriteVarShuffle256, [ZnFPU],   2>;
 defm : ZnWriteResFpuPair<WritePSADBW,     [ZnFPU0],  3>;
+defm : ZnWriteResFpuPair<WritePSADBWY,    [ZnFPU0],  3>;
 defm : ZnWriteResFpuPair<WritePHMINPOS,   [ZnFPU0],  4>;
 
 // Vector Shift Operations
@@ -291,7 +294,9 @@ let Latency = 100 in {
   def : WriteRes<WriteMicrocoded, []>;
   def : WriteRes<WriteSystem, []>;
   def : WriteRes<WriteMPSAD, []>;
+  def : WriteRes<WriteMPSADY, []>;
   def : WriteRes<WriteMPSADLd, []>;
+  def : WriteRes<WriteMPSADYLd, []>;
   def : WriteRes<WriteCLMul, []>;
   def : WriteRes<WriteCLMulLd, []>;
   def : WriteRes<WritePCmpIStrM, []>;
@@ -1042,18 +1047,6 @@ def : InstRW<[ZnWritePCMPGTQm], (instreg
 def : InstRW<[ZnWritePCMPGTQYm], (instregex "(V?)PCMPGTQYrm")>;
 
 // PMULLD.
-// x,x.
-def ZnWritePMULLDr : SchedWriteRes<[ZnFPU0]> {
-  let Latency = 4;
-}
-// ymm.
-def ZnWritePMULLDYr : SchedWriteRes<[ZnFPU0]> {
-  let Latency = 5;
-  let ResourceCycles = [2];
-}
-def : InstRW<[ZnWritePMULLDr], (instregex "(V?)PMULLDrr")>;
-def : InstRW<[ZnWritePMULLDYr], (instregex "(V?)PMULLDYrr")>;
-
 // x,m.
 def ZnWritePMULLDm : SchedWriteRes<[ZnAGU, ZnFPU0]> {
   let Latency = 11;

Modified: llvm/trunk/test/CodeGen/X86/3dnow-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/3dnow-schedule.ll?rev=331445&r1=331444&r2=331445&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/3dnow-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/3dnow-schedule.ll Thu May  3 03:31:20 2018
@@ -15,7 +15,7 @@ define i64 @test_pavgusb(x86_mmx %a0, x8
 ; CHECK-LABEL: test_pavgusb:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    pavgusb %mm1, %mm0 # sched: [5:1.00]
-; CHECK-NEXT:    pavgusb (%rdi), %mm0 # sched: [10:1.00]
+; CHECK-NEXT:    pavgusb (%rdi), %mm0 # sched: [11:1.00]
 ; CHECK-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; CHECK-NEXT:    retq # sched: [1:1.00]
   %1 = call x86_mmx @llvm.x86.3dnow.pavgusb(x86_mmx %a0, x86_mmx %a1)
@@ -345,7 +345,7 @@ define i64 @test_pmulhrw(x86_mmx %a0, x8
 ; CHECK-LABEL: test_pmulhrw:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    pmulhrw %mm1, %mm0 # sched: [5:1.00]
-; CHECK-NEXT:    pmulhrw (%rdi), %mm0 # sched: [10:1.00]
+; CHECK-NEXT:    pmulhrw (%rdi), %mm0 # sched: [11:1.00]
 ; CHECK-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; CHECK-NEXT:    retq # sched: [1:1.00]
   %1 = call x86_mmx @llvm.x86.3dnow.pmulhrw(x86_mmx %a0, x86_mmx %a1)

Modified: llvm/trunk/test/CodeGen/X86/avx2-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx2-schedule.ll?rev=331445&r1=331444&r2=331445&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx2-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx2-schedule.ll Thu May  3 03:31:20 2018
@@ -609,7 +609,7 @@ define <16 x i16> @test_mpsadbw(<32 x i8
 ; GENERIC-LABEL: test_mpsadbw:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmpsadbw $7, %ymm1, %ymm0, %ymm0 # sched: [7:1.00]
-; GENERIC-NEXT:    vmpsadbw $7, (%rdi), %ymm0, %ymm0 # sched: [13:1.00]
+; GENERIC-NEXT:    vmpsadbw $7, (%rdi), %ymm0, %ymm0 # sched: [14:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_mpsadbw:
@@ -3297,7 +3297,7 @@ define <16 x i16> @test_pmaddubsw(<32 x
 ; GENERIC-LABEL: test_pmaddubsw:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpmaddubsw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
-; GENERIC-NEXT:    vpmaddubsw (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
+; GENERIC-NEXT:    vpmaddubsw (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pmaddubsw:
@@ -3341,7 +3341,7 @@ define <8 x i32> @test_pmaddwd(<16 x i16
 ; GENERIC-LABEL: test_pmaddwd:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpmaddwd %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
-; GENERIC-NEXT:    vpmaddwd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
+; GENERIC-NEXT:    vpmaddwd (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pmaddwd:
@@ -4738,7 +4738,7 @@ define <4 x i64> @test_pmuldq(<8 x i32>
 ; GENERIC-LABEL: test_pmuldq:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpmuldq %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
-; GENERIC-NEXT:    vpmuldq (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
+; GENERIC-NEXT:    vpmuldq (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pmuldq:
@@ -4782,7 +4782,7 @@ define <16 x i16> @test_pmulhrsw(<16 x i
 ; GENERIC-LABEL: test_pmulhrsw:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpmulhrsw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
-; GENERIC-NEXT:    vpmulhrsw (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
+; GENERIC-NEXT:    vpmulhrsw (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pmulhrsw:
@@ -4825,7 +4825,7 @@ define <16 x i16> @test_pmulhuw(<16 x i1
 ; GENERIC-LABEL: test_pmulhuw:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpmulhuw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
-; GENERIC-NEXT:    vpmulhuw (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
+; GENERIC-NEXT:    vpmulhuw (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pmulhuw:
@@ -4868,7 +4868,7 @@ define <16 x i16> @test_pmulhw(<16 x i16
 ; GENERIC-LABEL: test_pmulhw:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpmulhw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
-; GENERIC-NEXT:    vpmulhw (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
+; GENERIC-NEXT:    vpmulhw (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pmulhw:
@@ -4911,7 +4911,7 @@ define <8 x i32> @test_pmulld(<8 x i32>
 ; GENERIC-LABEL: test_pmulld:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpmulld %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
-; GENERIC-NEXT:    vpmulld (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
+; GENERIC-NEXT:    vpmulld (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pmulld:
@@ -4953,7 +4953,7 @@ define <16 x i16> @test_pmullw(<16 x i16
 ; GENERIC-LABEL: test_pmullw:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpmullw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
-; GENERIC-NEXT:    vpmullw (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
+; GENERIC-NEXT:    vpmullw (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pmullw:
@@ -4995,7 +4995,7 @@ define <4 x i64> @test_pmuludq(<8 x i32>
 ; GENERIC-LABEL: test_pmuludq:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpmuludq %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
-; GENERIC-NEXT:    vpmuludq (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
+; GENERIC-NEXT:    vpmuludq (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pmuludq:
@@ -5088,7 +5088,7 @@ define <4 x i64> @test_psadbw(<32 x i8>
 ; GENERIC-LABEL: test_psadbw:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
-; GENERIC-NEXT:    vpsadbw (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
+; GENERIC-NEXT:    vpsadbw (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_psadbw:

Modified: llvm/trunk/test/CodeGen/X86/sha-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sha-schedule.ll?rev=331445&r1=331444&r2=331445&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sha-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sha-schedule.ll Thu May  3 03:31:20 2018
@@ -12,7 +12,7 @@ define <4 x i32> @test_sha1msg1(<4 x i32
 ; GENERIC-LABEL: test_sha1msg1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    sha1msg1 %xmm1, %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT:    sha1msg1 (%rdi), %xmm0 # sched: [10:1.00]
+; GENERIC-NEXT:    sha1msg1 (%rdi), %xmm0 # sched: [11:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; GOLDMONT-LABEL: test_sha1msg1:
@@ -23,8 +23,8 @@ define <4 x i32> @test_sha1msg1(<4 x i32
 ;
 ; CANNONLAKE-LABEL: test_sha1msg1:
 ; CANNONLAKE:       # %bb.0:
-; CANNONLAKE-NEXT:    sha1msg1 %xmm1, %xmm0 # sched: [5:1.00]
-; CANNONLAKE-NEXT:    sha1msg1 (%rdi), %xmm0 # sched: [10:1.00]
+; CANNONLAKE-NEXT:    sha1msg1 %xmm1, %xmm0 # sched: [4:0.33]
+; CANNONLAKE-NEXT:    sha1msg1 (%rdi), %xmm0 # sched: [10:0.50]
 ; CANNONLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; ZNVER1-LABEL: test_sha1msg1:
@@ -43,7 +43,7 @@ define <4 x i32> @test_sha1msg2(<4 x i32
 ; GENERIC-LABEL: test_sha1msg2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    sha1msg2 %xmm1, %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT:    sha1msg2 (%rdi), %xmm0 # sched: [10:1.00]
+; GENERIC-NEXT:    sha1msg2 (%rdi), %xmm0 # sched: [11:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; GOLDMONT-LABEL: test_sha1msg2:
@@ -54,8 +54,8 @@ define <4 x i32> @test_sha1msg2(<4 x i32
 ;
 ; CANNONLAKE-LABEL: test_sha1msg2:
 ; CANNONLAKE:       # %bb.0:
-; CANNONLAKE-NEXT:    sha1msg2 %xmm1, %xmm0 # sched: [5:1.00]
-; CANNONLAKE-NEXT:    sha1msg2 (%rdi), %xmm0 # sched: [10:1.00]
+; CANNONLAKE-NEXT:    sha1msg2 %xmm1, %xmm0 # sched: [4:0.33]
+; CANNONLAKE-NEXT:    sha1msg2 (%rdi), %xmm0 # sched: [10:0.50]
 ; CANNONLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; ZNVER1-LABEL: test_sha1msg2:
@@ -74,7 +74,7 @@ define <4 x i32> @test_sha1nexte(<4 x i3
 ; GENERIC-LABEL: test_sha1nexte:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    sha1nexte %xmm1, %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT:    sha1nexte (%rdi), %xmm0 # sched: [10:1.00]
+; GENERIC-NEXT:    sha1nexte (%rdi), %xmm0 # sched: [11:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; GOLDMONT-LABEL: test_sha1nexte:
@@ -85,8 +85,8 @@ define <4 x i32> @test_sha1nexte(<4 x i3
 ;
 ; CANNONLAKE-LABEL: test_sha1nexte:
 ; CANNONLAKE:       # %bb.0:
-; CANNONLAKE-NEXT:    sha1nexte %xmm1, %xmm0 # sched: [5:1.00]
-; CANNONLAKE-NEXT:    sha1nexte (%rdi), %xmm0 # sched: [10:1.00]
+; CANNONLAKE-NEXT:    sha1nexte %xmm1, %xmm0 # sched: [4:0.33]
+; CANNONLAKE-NEXT:    sha1nexte (%rdi), %xmm0 # sched: [10:0.50]
 ; CANNONLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; ZNVER1-LABEL: test_sha1nexte:
@@ -105,7 +105,7 @@ define <4 x i32> @test_sha1rnds4(<4 x i3
 ; GENERIC-LABEL: test_sha1rnds4:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    sha1rnds4 $3, %xmm1, %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT:    sha1rnds4 $3, (%rdi), %xmm0 # sched: [10:1.00]
+; GENERIC-NEXT:    sha1rnds4 $3, (%rdi), %xmm0 # sched: [11:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; GOLDMONT-LABEL: test_sha1rnds4:
@@ -116,8 +116,8 @@ define <4 x i32> @test_sha1rnds4(<4 x i3
 ;
 ; CANNONLAKE-LABEL: test_sha1rnds4:
 ; CANNONLAKE:       # %bb.0:
-; CANNONLAKE-NEXT:    sha1rnds4 $3, %xmm1, %xmm0 # sched: [5:1.00]
-; CANNONLAKE-NEXT:    sha1rnds4 $3, (%rdi), %xmm0 # sched: [10:1.00]
+; CANNONLAKE-NEXT:    sha1rnds4 $3, %xmm1, %xmm0 # sched: [4:0.33]
+; CANNONLAKE-NEXT:    sha1rnds4 $3, (%rdi), %xmm0 # sched: [10:0.50]
 ; CANNONLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; ZNVER1-LABEL: test_sha1rnds4:
@@ -140,7 +140,7 @@ define <4 x i32> @test_sha256msg1(<4 x i
 ; GENERIC-LABEL: test_sha256msg1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    sha256msg1 %xmm1, %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT:    sha256msg1 (%rdi), %xmm0 # sched: [10:1.00]
+; GENERIC-NEXT:    sha256msg1 (%rdi), %xmm0 # sched: [11:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; GOLDMONT-LABEL: test_sha256msg1:
@@ -151,8 +151,8 @@ define <4 x i32> @test_sha256msg1(<4 x i
 ;
 ; CANNONLAKE-LABEL: test_sha256msg1:
 ; CANNONLAKE:       # %bb.0:
-; CANNONLAKE-NEXT:    sha256msg1 %xmm1, %xmm0 # sched: [5:1.00]
-; CANNONLAKE-NEXT:    sha256msg1 (%rdi), %xmm0 # sched: [10:1.00]
+; CANNONLAKE-NEXT:    sha256msg1 %xmm1, %xmm0 # sched: [4:0.33]
+; CANNONLAKE-NEXT:    sha256msg1 (%rdi), %xmm0 # sched: [10:0.50]
 ; CANNONLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; ZNVER1-LABEL: test_sha256msg1:
@@ -171,7 +171,7 @@ define <4 x i32> @test_sha256msg2(<4 x i
 ; GENERIC-LABEL: test_sha256msg2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    sha256msg2 %xmm1, %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT:    sha256msg2 (%rdi), %xmm0 # sched: [10:1.00]
+; GENERIC-NEXT:    sha256msg2 (%rdi), %xmm0 # sched: [11:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; GOLDMONT-LABEL: test_sha256msg2:
@@ -182,8 +182,8 @@ define <4 x i32> @test_sha256msg2(<4 x i
 ;
 ; CANNONLAKE-LABEL: test_sha256msg2:
 ; CANNONLAKE:       # %bb.0:
-; CANNONLAKE-NEXT:    sha256msg2 %xmm1, %xmm0 # sched: [5:1.00]
-; CANNONLAKE-NEXT:    sha256msg2 (%rdi), %xmm0 # sched: [10:1.00]
+; CANNONLAKE-NEXT:    sha256msg2 %xmm1, %xmm0 # sched: [4:0.33]
+; CANNONLAKE-NEXT:    sha256msg2 (%rdi), %xmm0 # sched: [10:0.50]
 ; CANNONLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; ZNVER1-LABEL: test_sha256msg2:
@@ -204,7 +204,7 @@ define <4 x i32> @test_sha256rnds2(<4 x
 ; GENERIC-NEXT:    movaps %xmm0, %xmm3 # sched: [1:1.00]
 ; GENERIC-NEXT:    movaps %xmm2, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    sha256rnds2 %xmm0, %xmm1, %xmm3 # sched: [5:1.00]
-; GENERIC-NEXT:    sha256rnds2 %xmm0, (%rdi), %xmm3 # sched: [10:1.00]
+; GENERIC-NEXT:    sha256rnds2 %xmm0, (%rdi), %xmm3 # sched: [11:1.00]
 ; GENERIC-NEXT:    movaps %xmm3, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -221,8 +221,8 @@ define <4 x i32> @test_sha256rnds2(<4 x
 ; CANNONLAKE:       # %bb.0:
 ; CANNONLAKE-NEXT:    vmovaps %xmm0, %xmm3 # sched: [1:0.33]
 ; CANNONLAKE-NEXT:    vmovaps %xmm2, %xmm0 # sched: [1:0.33]
-; CANNONLAKE-NEXT:    sha256rnds2 %xmm0, %xmm1, %xmm3 # sched: [5:1.00]
-; CANNONLAKE-NEXT:    sha256rnds2 %xmm0, (%rdi), %xmm3 # sched: [10:1.00]
+; CANNONLAKE-NEXT:    sha256rnds2 %xmm0, %xmm1, %xmm3 # sched: [4:0.33]
+; CANNONLAKE-NEXT:    sha256rnds2 %xmm0, (%rdi), %xmm3 # sched: [10:0.50]
 ; CANNONLAKE-NEXT:    vmovaps %xmm3, %xmm0 # sched: [1:0.33]
 ; CANNONLAKE-NEXT:    retq # sched: [7:1.00]
 ;

Modified: llvm/trunk/test/CodeGen/X86/xop-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/xop-schedule.ll?rev=331445&r1=331444&r2=331445&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/xop-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/xop-schedule.ll Thu May  3 03:31:20 2018
@@ -724,7 +724,7 @@ define void @test_vpmacsswd(<2 x i64> %a
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    #APP
 ; GENERIC-NEXT:    vpmacsswd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT:    vpmacsswd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
+; GENERIC-NEXT:    vpmacsswd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -744,7 +744,7 @@ define void @test_vpmacssww(<2 x i64> %a
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    #APP
 ; GENERIC-NEXT:    vpmacssww %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT:    vpmacssww %xmm2, (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
+; GENERIC-NEXT:    vpmacssww %xmm2, (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -764,7 +764,7 @@ define void @test_vpmacswd(<2 x i64> %a0
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    #APP
 ; GENERIC-NEXT:    vpmacswd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT:    vpmacswd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
+; GENERIC-NEXT:    vpmacswd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -784,7 +784,7 @@ define void @test_vpmacsww(<2 x i64> %a0
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    #APP
 ; GENERIC-NEXT:    vpmacsww %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT:    vpmacsww %xmm2, (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
+; GENERIC-NEXT:    vpmacsww %xmm2, (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -804,7 +804,7 @@ define void @test_vpmadcsswd(<2 x i64> %
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    #APP
 ; GENERIC-NEXT:    vpmadcsswd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT:    vpmadcsswd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
+; GENERIC-NEXT:    vpmadcsswd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -824,7 +824,7 @@ define void @test_vpmadcswd(<2 x i64> %a
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    #APP
 ; GENERIC-NEXT:    vpmadcswd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT:    vpmadcswd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
+; GENERIC-NEXT:    vpmadcswd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
 ; GENERIC-NEXT:    #NO_APP
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;




More information about the llvm-commits mailing list