[llvm] r331999 - [X86] Split WriteVecALU/WriteVecLogic/WriteShuffle/WriteVarShuffle/WritePSADBW/WritePHAdd scheduler classes
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Thu May 10 10:06:09 PDT 2018
Author: rksimon
Date: Thu May 10 10:06:09 2018
New Revision: 331999
URL: http://llvm.org/viewvc/llvm-project?rev=331999&view=rev
Log:
[X86] Split WriteVecALU/WriteVecLogic/WriteShuffle/WriteVarShuffle/WritePSADBW/WritePHAdd scheduler classes
Split off XMM classes from the default (MMX) classes.
Modified:
llvm/trunk/lib/Target/X86/X86InstrAVX512.td
llvm/trunk/lib/Target/X86/X86InstrXOP.td
llvm/trunk/lib/Target/X86/X86SchedBroadwell.td
llvm/trunk/lib/Target/X86/X86SchedHaswell.td
llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td
llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td
llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td
llvm/trunk/lib/Target/X86/X86Schedule.td
llvm/trunk/lib/Target/X86/X86ScheduleAtom.td
llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td
llvm/trunk/lib/Target/X86/X86ScheduleSLM.td
llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td
llvm/trunk/test/CodeGen/X86/3dnow-schedule.ll
llvm/trunk/test/CodeGen/X86/avx2-schedule.ll
llvm/trunk/test/CodeGen/X86/avx512-shuffle-schedule.ll
llvm/trunk/test/CodeGen/X86/xop-schedule.ll
Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=331999&r1=331998&r2=331999&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Thu May 10 10:06:09 2018
@@ -1333,7 +1333,7 @@ multiclass avx512_int_broadcast_rm_vl<bi
avx512_int_broadcast_rm_lowering<_.info256, _.info256>,
EVEX_V256;
defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle,
- WriteShuffleLd, _.info128, _.info128>,
+ WriteShuffleXLd, _.info128, _.info128>,
EVEX_V128;
}
}
@@ -1353,7 +1353,8 @@ multiclass avx512_subvec_broadcast_rm<bi
(ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
(_Dst.VT (X86SubVBroadcast
(_Src.VT (bitconvert (_Src.LdFrag addr:$src)))))>,
- AVX5128IBase, EVEX, Sched<[WriteShuffleLd]>;
+ Sched<[SchedWriteShuffle.YMM.Folded]>,
+ AVX5128IBase, EVEX;
}
// This should be used for the AVX512DQ broadcast instructions. It disables
@@ -1367,7 +1368,8 @@ multiclass avx512_subvec_broadcast_rm_dq
(null_frag),
(_Dst.VT (X86SubVBroadcast
(_Src.VT (bitconvert (_Src.LdFrag addr:$src)))))>,
- AVX5128IBase, EVEX, Sched<[WriteShuffleLd]>;
+ Sched<[SchedWriteShuffle.YMM.Folded]>,
+ AVX5128IBase, EVEX;
}
let Predicates = [HasAVX512] in {
@@ -1646,7 +1648,7 @@ multiclass avx512_common_broadcast_i32x2
let Predicates = [HasDQI, HasVLX] in
defm Z128 : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle,
- WriteShuffleLd, _Dst.info128,
+ WriteShuffleXLd, _Dst.info128,
_Src.info128, _Src.info128, null_frag>,
EVEX_V128;
}
Modified: llvm/trunk/lib/Target/X86/X86InstrXOP.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrXOP.td?rev=331999&r1=331998&r2=331999&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrXOP.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrXOP.td Thu May 10 10:06:09 2018
@@ -14,11 +14,11 @@
multiclass xop2op<bits<8> opc, string OpcodeStr, Intrinsic Int, PatFrag memop> {
def rr : IXOP<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (Int VR128:$src))]>, XOP, Sched<[WritePHAdd]>;
+ [(set VR128:$dst, (Int VR128:$src))]>, XOP, Sched<[SchedWritePHAdd.XMM]>;
def rm : IXOP<opc, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (Int (bitconvert (memop addr:$src))))]>, XOP,
- Sched<[WritePHAdd.Folded, ReadAfterLd]>;
+ Sched<[SchedWritePHAdd.XMM.Folded, ReadAfterLd]>;
}
let ExeDomain = SSEPackedInt in {
Modified: llvm/trunk/lib/Target/X86/X86SchedBroadwell.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedBroadwell.td?rev=331999&r1=331998&r2=331999&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedBroadwell.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedBroadwell.td Thu May 10 10:06:09 2018
@@ -265,8 +265,10 @@ defm : X86WriteRes<WriteVecMove,
defm : X86WriteRes<WriteEMMS, [BWPort01,BWPort15,BWPort015,BWPort0156], 31, [8,1,21,1], 31>;
defm : BWWriteResPair<WriteVecALU, [BWPort15], 1, [1], 1, 5>; // Vector integer ALU op, no logicals.
+defm : BWWriteResPair<WriteVecALUX, [BWPort15], 1, [1], 1, 5>; // Vector integer ALU op, no logicals.
defm : BWWriteResPair<WriteVecALUY, [BWPort15], 1, [1], 1, 6>; // Vector integer ALU op, no logicals (YMM/ZMM).
defm : BWWriteResPair<WriteVecLogic, [BWPort015], 1, [1], 1, 5>; // Vector integer and/or/xor.
+defm : BWWriteResPair<WriteVecLogicX,[BWPort015], 1, [1], 1, 5>; // Vector integer and/or/xor.
defm : BWWriteResPair<WriteVecLogicY,[BWPort015], 1, [1], 1, 6>; // Vector integer and/or/xor (YMM/ZMM).
defm : BWWriteResPair<WriteVecTest, [BWPort0,BWPort5], 2, [1,1], 2, 5>; // Vector integer TEST instructions.
defm : BWWriteResPair<WriteVecTestY, [BWPort0,BWPort5], 4, [1,1], 2, 6>; // Vector integer TEST instructions (YMM/ZMM).
@@ -276,8 +278,10 @@ defm : BWWriteResPair<WriteVecIMulY, [BW
defm : BWWriteResPair<WritePMULLD, [BWPort0], 10, [2], 2, 5>; // Vector PMULLD.
defm : BWWriteResPair<WritePMULLDY, [BWPort0], 10, [2], 2, 6>; // Vector PMULLD (YMM/ZMM).
defm : BWWriteResPair<WriteShuffle, [BWPort5], 1, [1], 1, 5>; // Vector shuffles.
+defm : BWWriteResPair<WriteShuffleX, [BWPort5], 1, [1], 1, 5>; // Vector shuffles.
defm : BWWriteResPair<WriteShuffleY, [BWPort5], 1, [1], 1, 6>; // Vector shuffles (YMM/ZMM).
defm : BWWriteResPair<WriteVarShuffle, [BWPort5], 1, [1], 1, 5>; // Vector variable shuffles.
+defm : BWWriteResPair<WriteVarShuffleX,[BWPort5], 1, [1], 1, 5>; // Vector variable shuffles.
defm : BWWriteResPair<WriteVarShuffleY,[BWPort5], 1, [1], 1, 6>; // Vector variable shuffles (YMM/ZMM).
defm : BWWriteResPair<WriteBlend, [BWPort5], 1, [1], 1, 5>; // Vector blends.
defm : BWWriteResPair<WriteBlendY, [BWPort5], 1, [1], 1, 6>; // Vector blends (YMM/ZMM).
@@ -286,6 +290,7 @@ defm : BWWriteResPair<WriteVarBlendY, [B
defm : BWWriteResPair<WriteMPSAD, [BWPort0, BWPort5], 7, [1, 2], 3, 5>; // Vector MPSAD.
defm : BWWriteResPair<WriteMPSADY, [BWPort0, BWPort5], 7, [1, 2], 3, 6>; // Vector MPSAD.
defm : BWWriteResPair<WritePSADBW, [BWPort0], 5, [1], 1, 5>; // Vector PSADBW.
+defm : BWWriteResPair<WritePSADBWX, [BWPort0], 5, [1], 1, 5>; // Vector PSADBW.
defm : BWWriteResPair<WritePSADBWY, [BWPort0], 5, [1], 1, 6>; // Vector PSADBW (YMM/ZMM).
defm : BWWriteResPair<WritePHMINPOS, [BWPort0], 5>; // Vector PHMINPOS.
@@ -448,6 +453,7 @@ def : WriteRes<WriteNop, []>;
defm : BWWriteResPair<WriteFHAdd, [BWPort1,BWPort5], 5, [1,2], 3, 5>;
defm : BWWriteResPair<WriteFHAddY, [BWPort1,BWPort5], 5, [1,2], 3, 6>;
defm : BWWriteResPair<WritePHAdd, [BWPort5,BWPort15], 3, [2,1], 3, 5>;
+defm : BWWriteResPair<WritePHAddX, [BWPort5,BWPort15], 3, [2,1], 3, 5>;
defm : BWWriteResPair<WritePHAddY, [BWPort5,BWPort15], 3, [2,1], 3, 6>;
// Remaining instrs.
Modified: llvm/trunk/lib/Target/X86/X86SchedHaswell.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedHaswell.td?rev=331999&r1=331998&r2=331999&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedHaswell.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedHaswell.td Thu May 10 10:06:09 2018
@@ -255,11 +255,13 @@ defm : X86WriteRes<WriteVecMaskedStore,
defm : X86WriteRes<WriteVecMaskedStoreY, [HWPort0,HWPort4,HWPort237,HWPort15], 5, [1,1,1,1], 4>;
defm : X86WriteRes<WriteVecMove, [HWPort015], 1, [1], 1>;
-defm : HWWriteResPair<WriteVecLogic, [HWPort015], 1, [1], 1, 6>;
+defm : HWWriteResPair<WriteVecLogic, [HWPort015], 1, [1], 1, 5>;
+defm : HWWriteResPair<WriteVecLogicX,[HWPort015], 1, [1], 1, 6>;
defm : HWWriteResPair<WriteVecLogicY,[HWPort015], 1, [1], 1, 7>;
defm : HWWriteResPair<WriteVecTest, [HWPort0,HWPort5], 2, [1,1], 2, 6>;
defm : HWWriteResPair<WriteVecTestY, [HWPort0,HWPort5], 4, [1,1], 2, 7>;
-defm : HWWriteResPair<WriteVecALU, [HWPort15], 1, [1], 1, 6>;
+defm : HWWriteResPair<WriteVecALU, [HWPort15], 1, [1], 1, 5>;
+defm : HWWriteResPair<WriteVecALUX, [HWPort15], 1, [1], 1, 6>;
defm : HWWriteResPair<WriteVecALUY, [HWPort15], 1, [1], 1, 7>;
defm : HWWriteResPair<WriteVecIMul, [HWPort0], 5, [1], 1, 5>;
defm : HWWriteResPair<WriteVecIMulX, [HWPort0], 5, [1], 1, 6>;
@@ -267,8 +269,10 @@ defm : HWWriteResPair<WriteVecIMulY, [HW
defm : HWWriteResPair<WritePMULLD, [HWPort0], 10, [2], 2, 6>;
defm : HWWriteResPair<WritePMULLDY, [HWPort0], 10, [2], 2, 7>;
defm : HWWriteResPair<WriteShuffle, [HWPort5], 1, [1], 1, 5>;
+defm : HWWriteResPair<WriteShuffleX, [HWPort5], 1, [1], 1, 6>;
defm : HWWriteResPair<WriteShuffleY, [HWPort5], 1, [1], 1, 7>;
-defm : HWWriteResPair<WriteVarShuffle, [HWPort5], 1, [1], 1, 6>;
+defm : HWWriteResPair<WriteVarShuffle, [HWPort5], 1, [1], 1, 5>;
+defm : HWWriteResPair<WriteVarShuffleX,[HWPort5], 1, [1], 1, 6>;
defm : HWWriteResPair<WriteVarShuffleY,[HWPort5], 1, [1], 1, 7>;
defm : HWWriteResPair<WriteBlend, [HWPort5], 1, [1], 1, 6>;
defm : HWWriteResPair<WriteBlendY, [HWPort5], 1, [1], 1, 7>;
@@ -278,7 +282,8 @@ defm : HWWriteResPair<WriteVarBlend, [H
defm : HWWriteResPair<WriteVarBlendY, [HWPort5], 2, [2], 2, 7>;
defm : HWWriteResPair<WriteMPSAD, [HWPort0, HWPort5], 7, [1, 2], 3, 6>;
defm : HWWriteResPair<WriteMPSADY, [HWPort0, HWPort5], 7, [1, 2], 3, 7>;
-defm : HWWriteResPair<WritePSADBW, [HWPort0], 5, [1], 1, 6>;
+defm : HWWriteResPair<WritePSADBW, [HWPort0], 5, [1], 1, 5>;
+defm : HWWriteResPair<WritePSADBWX, [HWPort0], 5, [1], 1, 6>;
defm : HWWriteResPair<WritePSADBWY, [HWPort0], 5, [1], 1, 7>;
defm : HWWriteResPair<WritePHMINPOS, [HWPort0], 5, [1], 1, 6>;
@@ -684,7 +689,8 @@ def : InstRW<[HWWriteFXTRACT], (instrs F
defm : HWWriteResPair<WriteFHAdd, [HWPort1, HWPort5], 5, [1,2], 3, 6>;
defm : HWWriteResPair<WriteFHAddY, [HWPort1, HWPort5], 5, [1,2], 3, 7>;
-defm : HWWriteResPair<WritePHAdd, [HWPort5, HWPort15], 3, [2,1], 3, 6>;
+defm : HWWriteResPair<WritePHAdd, [HWPort5, HWPort15], 3, [2,1], 3, 5>;
+defm : HWWriteResPair<WritePHAddX, [HWPort5, HWPort15], 3, [2,1], 3, 6>;
defm : HWWriteResPair<WritePHAddY, [HWPort5, HWPort15], 3, [2,1], 3, 7>;
//=== Floating Point XMM and YMM Instructions ===//
@@ -913,26 +919,16 @@ def HWWriteResGroup12_2 : SchedWriteRes<
def: InstRW<[HWWriteResGroup12_2], (instrs IMUL16m, MUL16m)>;
def HWWriteResGroup13 : SchedWriteRes<[HWPort5,HWPort23]> {
- let Latency = 7;
+ let Latency = 6;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[HWWriteResGroup13], (instregex "(V?)PACKSSDWrm",
- "(V?)PACKSSWBrm",
- "(V?)PACKUSDWrm",
- "(V?)PACKUSWBrm",
- "(V?)PALIGNRrmi",
- "(V?)PSHUFDmi",
- "(V?)PSHUFHWmi",
- "(V?)PSHUFLWmi",
- "(V?)PUNPCKHBWrm",
- "(V?)PUNPCKHDQrm",
- "(V?)PUNPCKHQDQrm",
- "(V?)PUNPCKHWDrm",
- "(V?)PUNPCKLBWrm",
- "(V?)PUNPCKLDQrm",
- "(V?)PUNPCKLQDQrm",
- "(V?)PUNPCKLWDrm")>;
+def: InstRW<[HWWriteResGroup13], (instregex "(V?)PMOV(SX|ZX)BDrm",
+ "(V?)PMOV(SX|ZX)BQrm",
+ "(V?)PMOV(SX|ZX)BWrm",
+ "(V?)PMOV(SX|ZX)DQrm",
+ "(V?)PMOV(SX|ZX)WDrm",
+ "(V?)PMOV(SX|ZX)WQrm")>;
def HWWriteResGroup13_1 : SchedWriteRes<[HWPort5,HWPort23]> {
let Latency = 8;
@@ -943,13 +939,6 @@ def: InstRW<[HWWriteResGroup13_1], (inst
"VPMOVSXBQYrm",
"VPMOVSXWQYrm")>;
-def HWWriteResGroup13_2 : SchedWriteRes<[HWPort5,HWPort23]> {
- let Latency = 6;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[HWWriteResGroup13_2], (instregex "MMX_PSHUFBrm")>;
-
def HWWriteResGroup14 : SchedWriteRes<[HWPort6,HWPort23]> {
let Latency = 6;
let NumMicroOps = 2;
@@ -974,14 +963,7 @@ def: InstRW<[HWWriteResGroup16], (instre
"BLSI(32|64)rm",
"BLSMSK(32|64)rm",
"BLSR(32|64)rm",
- "MOVBE(16|32|64)rm",
- "MMX_PABS(B|D|W)rm",
- "MMX_P(ADD|SUB)(B|D|W|Q)irm",
- "MMX_P(ADD|SUB)(U?)S(B|W)irm",
- "MMX_PAVG(B|W)irm",
- "MMX_PCMP(EQ|GT)(B|D|W)irm",
- "MMX_P(MAX|MIN)(SW|UB)irm",
- "MMX_PSIGN(B|D|W)rm")>;
+ "MOVBE(16|32|64)rm")>;
def HWWriteResGroup17 : SchedWriteRes<[HWPort23,HWPort015]> {
let Latency = 7;
@@ -992,16 +974,6 @@ def: InstRW<[HWWriteResGroup17], (instre
"VINSERTI128rm",
"VPBLENDDrmi")>;
-def HWWriteResGroup17_1 : SchedWriteRes<[HWPort23,HWPort015]> {
- let Latency = 6;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[HWWriteResGroup17_1], (instregex "MMX_PANDNirm",
- "MMX_PANDirm",
- "MMX_PORirm",
- "MMX_PXORirm")>;
-
def HWWriteResGroup17_2 : SchedWriteRes<[HWPort23,HWPort015]> {
let Latency = 8;
let NumMicroOps = 2;
@@ -1356,13 +1328,6 @@ def HWWriteResGroup62 : SchedWriteRes<[H
def: InstRW<[HWWriteResGroup62], (instregex "IST(T?)_FP(16|32|64)m",
"IST_F(16|32)m")>;
-def HWWriteResGroup64 : SchedWriteRes<[HWPort5,HWPort23,HWPort15]> {
- let Latency = 8;
- let NumMicroOps = 4;
- let ResourceCycles = [2,1,1];
-}
-def: InstRW<[HWWriteResGroup64], (instregex "MMX_PH(ADD|SUB)(D|SW|W)rm")>;
-
def HWWriteResGroup65 : SchedWriteRes<[HWPort23,HWPort06,HWPort0156]> {
let Latency = 8;
let NumMicroOps = 4;
@@ -1594,13 +1559,6 @@ def HWWriteResGroup91_3 : SchedWriteRes<
def: InstRW<[HWWriteResGroup91_3], (instregex "MUL_F(32|64)m",
"VPCMPGTQYrm")>;
-def HWWriteResGroup91_5 : SchedWriteRes<[HWPort0,HWPort23]> {
- let Latency = 10;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[HWWriteResGroup91_5], (instregex "MMX_PSADBWirm")>;
-
def HWWriteResGroup93 : SchedWriteRes<[HWPort1,HWPort5]> {
let Latency = 5;
let NumMicroOps = 3;
Modified: llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td?rev=331999&r1=331998&r2=331999&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td Thu May 10 10:06:09 2018
@@ -235,11 +235,13 @@ defm : X86WriteRes<WriteVecMaskedStore,
defm : X86WriteRes<WriteVecMaskedStoreY, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
defm : X86WriteRes<WriteVecMove, [SBPort05], 1, [1], 1>;
-defm : SBWriteResPair<WriteVecLogic, [SBPort015], 1, [1], 1, 6>;
+defm : SBWriteResPair<WriteVecLogic, [SBPort015], 1, [1], 1, 5>;
+defm : SBWriteResPair<WriteVecLogicX,[SBPort015], 1, [1], 1, 6>;
defm : SBWriteResPair<WriteVecLogicY,[SBPort015], 1, [1], 1, 7>;
defm : SBWriteResPair<WriteVecTest, [SBPort0,SBPort5], 2, [1,1], 2, 6>;
defm : SBWriteResPair<WriteVecTestY, [SBPort0,SBPort5], 2, [1,1], 2, 7>;
-defm : SBWriteResPair<WriteVecALU, [SBPort15], 1, [1], 1, 6>;
+defm : SBWriteResPair<WriteVecALU, [SBPort1], 3, [1], 1, 5>;
+defm : SBWriteResPair<WriteVecALUX, [SBPort15], 1, [1], 1, 6>;
defm : SBWriteResPair<WriteVecALUY, [SBPort15], 1, [1], 1, 7>;
defm : SBWriteResPair<WriteVecIMul, [SBPort0], 5, [1], 1, 5>;
defm : SBWriteResPair<WriteVecIMulX, [SBPort0], 5, [1], 1, 6>;
@@ -247,8 +249,10 @@ defm : SBWriteResPair<WriteVecIMulY, [SB
defm : SBWriteResPair<WritePMULLD, [SBPort0], 5, [1], 1, 6>;
defm : SBWriteResPair<WritePMULLDY, [SBPort0], 5, [1], 1, 7>; // TODO this is probably wrong for 256/512-bit for the "generic" model
defm : SBWriteResPair<WriteShuffle, [SBPort5], 1, [1], 1, 5>;
+defm : SBWriteResPair<WriteShuffleX, [SBPort15], 1, [1], 1, 6>;
defm : SBWriteResPair<WriteShuffleY, [SBPort5], 1, [1], 1, 7>;
-defm : SBWriteResPair<WriteVarShuffle, [SBPort15], 1, [1], 1, 6>;
+defm : SBWriteResPair<WriteVarShuffle, [SBPort15], 1, [1], 1, 5>;
+defm : SBWriteResPair<WriteVarShuffleX, [SBPort15], 1, [1], 1, 6>;
defm : SBWriteResPair<WriteVarShuffleY, [SBPort15], 1, [1], 1, 7>;
defm : SBWriteResPair<WriteBlend, [SBPort15], 1, [1], 1, 6>;
defm : SBWriteResPair<WriteBlendY, [SBPort15], 1, [1], 1, 7>;
@@ -256,7 +260,8 @@ defm : SBWriteResPair<WriteVarBlend, [SB
defm : SBWriteResPair<WriteVarBlendY,[SBPort15], 2, [2], 2, 7>;
defm : SBWriteResPair<WriteMPSAD, [SBPort0, SBPort15], 7, [1,2], 3, 6>;
defm : SBWriteResPair<WriteMPSADY, [SBPort0, SBPort15], 7, [1,2], 3, 7>;
-defm : SBWriteResPair<WritePSADBW, [SBPort0], 5, [1], 1, 6>;
+defm : SBWriteResPair<WritePSADBW, [SBPort0], 5, [1], 1, 5>;
+defm : SBWriteResPair<WritePSADBWX, [SBPort0], 5, [1], 1, 6>;
defm : SBWriteResPair<WritePSADBWY, [SBPort0], 5, [1], 1, 7>;
defm : SBWriteResPair<WritePHMINPOS, [SBPort0], 5, [1], 1, 6>;
@@ -295,7 +300,8 @@ def : WriteRes<WriteVecExtractSt, [SBPor
defm : SBWriteResPair<WriteFHAdd, [SBPort1,SBPort5], 5, [1,2], 3, 6>;
defm : SBWriteResPair<WriteFHAddY, [SBPort1,SBPort5], 5, [1,2], 3, 7>;
-defm : SBWriteResPair<WritePHAdd, [SBPort15], 3, [3], 3, 6>;
+defm : SBWriteResPair<WritePHAdd, [SBPort15], 3, [3], 3, 5>;
+defm : SBWriteResPair<WritePHAddX, [SBPort15], 3, [3], 3, 6>;
defm : SBWriteResPair<WritePHAddY, [SBPort15], 3, [3], 3, 7>;
////////////////////////////////////////////////////////////////////////////////
@@ -471,37 +477,10 @@ def SBWriteResGroup5 : SchedWriteRes<[SB
let NumMicroOps = 1;
let ResourceCycles = [1];
}
-def: InstRW<[SBWriteResGroup5], (instregex "MMX_PALIGNRrri",
- "(V?)PACKSSDWrr",
- "(V?)PACKSSWBrr",
- "(V?)PACKUSDWrr",
- "(V?)PACKUSWBrr",
- "(V?)PALIGNRrri",
- "(V?)PMOVSXBDrr",
- "(V?)PMOVSXBQrr",
- "(V?)PMOVSXBWrr",
- "(V?)PMOVSXDQrr",
- "(V?)PMOVSXWDrr",
- "(V?)PMOVSXWQrr",
- "(V?)PMOVZXBDrr",
- "(V?)PMOVZXBQrr",
- "(V?)PMOVZXBWrr",
- "(V?)PMOVZXDQrr",
- "(V?)PMOVZXWDrr",
- "(V?)PMOVZXWQrr",
- "(V?)PSHUFDri",
- "(V?)PSHUFHWri",
- "(V?)PSHUFLWri",
- "(V?)PSLLDQri",
- "(V?)PSRLDQri",
- "(V?)PUNPCKHBWrr",
- "(V?)PUNPCKHDQrr",
- "(V?)PUNPCKHQDQrr",
- "(V?)PUNPCKHWDrr",
- "(V?)PUNPCKLBWrr",
- "(V?)PUNPCKLDQrr",
- "(V?)PUNPCKLQDQrr",
- "(V?)PUNPCKLWDrr")>;
+def: InstRW<[SBWriteResGroup5], (instregex "MMX_PABS(B|D|W)rr",
+ "MMX_PADDQirr",
+ "MMX_PALIGNRrri",
+ "MMX_PSIGN(B|D|W)rr")>;
def SBWriteResGroup6 : SchedWriteRes<[SBPort015]> {
let Latency = 1;
@@ -608,12 +587,6 @@ def SBWriteResGroup21 : SchedWriteRes<[S
let ResourceCycles = [1];
}
def: InstRW<[SBWriteResGroup21], (instregex "MMX_CVTPI2PSirr",
- "MMX_PADD(B|D|W)irr",
- "MMX_P(ADD|SUB)(U?)S(B|W)irr",
- "MMX_PAVG(B|W)irr",
- "MMX_PCMP(EQ|GT)(B|D|W)irr",
- "MMX_P(MAX|MIN)(SW|UB)irr",
- "MMX_PSUB(B|D|Q|W)irr",
"PUSHFS64",
"(V?)CVTDQ2PS(Y?)rr")>;
@@ -884,7 +857,6 @@ def SBWriteResGroup51 : SchedWriteRes<[S
}
def: InstRW<[SBWriteResGroup51], (instregex "MMX_PABS(B|D|W)rm",
"MMX_PALIGNRrmi",
- "MMX_PSHUFBrm",
"MMX_PSIGN(B|D|W)rm")>;
def SBWriteResGroup52 : SchedWriteRes<[SBPort23,SBPort015]> {
@@ -893,11 +865,7 @@ def SBWriteResGroup52 : SchedWriteRes<[S
let ResourceCycles = [1,1];
}
def: InstRW<[SBWriteResGroup52], (instregex "LODSL",
- "LODSQ",
- "MMX_PANDirm",
- "MMX_PANDNirm",
- "MMX_PORirm",
- "MMX_PXORirm")>;
+ "LODSQ")>;
def SBWriteResGroup53 : SchedWriteRes<[SBPort4,SBPort23]> {
let Latency = 6;
@@ -944,46 +912,7 @@ def SBWriteResGroup59 : SchedWriteRes<[S
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SBWriteResGroup59], (instregex "(V?)PACKSSDWrm",
- "(V?)PACKSSWBrm",
- "(V?)PACKUSDWrm",
- "(V?)PACKUSWBrm",
- "(V?)PALIGNRrmi",
- "(V?)PMOVSXBDrm",
- "(V?)PMOVSXBQrm",
- "(V?)PMOVSXBWrm",
- "(V?)PMOVSXDQrm",
- "(V?)PMOVSXWDrm",
- "(V?)PMOVSXWQrm",
- "(V?)PMOVZXBDrm",
- "(V?)PMOVZXBQrm",
- "(V?)PMOVZXBWrm",
- "(V?)PMOVZXDQrm",
- "(V?)PMOVZXWDrm",
- "(V?)PMOVZXWQrm",
- "(V?)PSHUFDmi",
- "(V?)PSHUFHWmi",
- "(V?)PSHUFLWmi",
- "(V?)PUNPCKHBWrm",
- "(V?)PUNPCKHDQrm",
- "(V?)PUNPCKHQDQrm",
- "(V?)PUNPCKHWDrm",
- "(V?)PUNPCKLBWrm",
- "(V?)PUNPCKLDQrm",
- "(V?)PUNPCKLQDQrm",
- "(V?)PUNPCKLWDrm")>;
-
-def SBWriteResGroup59a : SchedWriteRes<[SBPort23,SBPort1]> {
- let Latency = 8;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[SBWriteResGroup59a], (instregex "MMX_PADD(B|D|W)irm",
- "MMX_P(ADD|SUB)(U?)S(B|W)irm",
- "MMX_PAVG(B|W)irm",
- "MMX_PCMP(EQ|GT)(B|D|W)irm",
- "MMX_P(MAX|MIN)(SW|UB)irm",
- "MMX_PSUB(B|D|Q|W)irm")>;
+def: InstRW<[SBWriteResGroup59], (instregex "MMX_PADDQirm")>;
def SBWriteResGroup62 : SchedWriteRes<[SBPort5,SBPort23]> {
let Latency = 7;
@@ -1060,13 +989,6 @@ def SBWriteResGroup77 : SchedWriteRes<[S
}
def: InstRW<[SBWriteResGroup77], (instregex "(V?)(U?)COMI(SD|SS)rm")>;
-def SBWriteResGroup80 : SchedWriteRes<[SBPort23,SBPort15]> {
- let Latency = 8;
- let NumMicroOps = 4;
- let ResourceCycles = [1,3];
-}
-def: InstRW<[SBWriteResGroup80], (instregex "MMX_PH(ADD|SUB)(D|SW|W)rm")>;
-
def SBWriteResGroup81 : SchedWriteRes<[SBPort23,SBPort015]> {
let Latency = 8;
let NumMicroOps = 4;
@@ -1134,13 +1056,6 @@ def SBWriteResGroup88 : SchedWriteRes<[S
def: InstRW<[SBWriteResGroup88], (instregex "SHLD(16|32|64)mri8",
"SHRD(16|32|64)mri8")>;
-def SBWriteResGroup89_2 : SchedWriteRes<[SBPort0,SBPort23]> {
- let Latency = 10;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[SBWriteResGroup89_2], (instregex "MMX_PSADBWirm")>;
-
def SBWriteResGroup90 : SchedWriteRes<[SBPort1,SBPort23]> {
let Latency = 9;
let NumMicroOps = 2;
Modified: llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td?rev=331999&r1=331998&r2=331999&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td Thu May 10 10:06:09 2018
@@ -256,9 +256,11 @@ defm : X86WriteRes<WriteVecMaskedStore,
defm : X86WriteRes<WriteVecMaskedStoreY, [SKLPort237,SKLPort0], 2, [1,1], 2>;
defm : X86WriteRes<WriteVecMove, [SKLPort015], 1, [1], 1>;
-defm : SKLWriteResPair<WriteVecALU, [SKLPort01], 1, [1], 1, 6>; // Vector integer ALU op, no logicals.
+defm : SKLWriteResPair<WriteVecALU, [SKLPort05], 1, [1], 1, 5>; // Vector integer ALU op, no logicals.
+defm : SKLWriteResPair<WriteVecALUX, [SKLPort01], 1, [1], 1, 6>; // Vector integer ALU op, no logicals (XMM).
defm : SKLWriteResPair<WriteVecALUY, [SKLPort01], 1, [1], 1, 7>; // Vector integer ALU op, no logicals (YMM/ZMM).
-defm : SKLWriteResPair<WriteVecLogic, [SKLPort015], 1, [1], 1, 6>; // Vector integer and/or/xor.
+defm : SKLWriteResPair<WriteVecLogic, [SKLPort05], 1, [1], 1, 5>; // Vector integer and/or/xor.
+defm : SKLWriteResPair<WriteVecLogicX,[SKLPort015], 1, [1], 1, 6>; // Vector integer and/or/xor (XMM).
defm : SKLWriteResPair<WriteVecLogicY,[SKLPort015], 1, [1], 1, 7>; // Vector integer and/or/xor (YMM/ZMM).
defm : SKLWriteResPair<WriteVecTest, [SKLPort0,SKLPort5], 3, [1,1], 2, 6>; // Vector integer TEST instructions.
defm : SKLWriteResPair<WriteVecTestY, [SKLPort0,SKLPort5], 3, [1,1], 2, 7>; // Vector integer TEST instructions (YMM/ZMM).
@@ -268,17 +270,20 @@ defm : SKLWriteResPair<WriteVecIMulY, [S
defm : SKLWriteResPair<WritePMULLD, [SKLPort01], 10, [2], 2, 6>; // Vector PMULLD.
defm : SKLWriteResPair<WritePMULLDY, [SKLPort01], 10, [2], 2, 7>; // Vector PMULLD (YMM/ZMM).
defm : SKLWriteResPair<WriteShuffle, [SKLPort5], 1, [1], 1, 5>; // Vector shuffles.
+defm : SKLWriteResPair<WriteShuffleX, [SKLPort5], 1, [1], 1, 6>; // Vector shuffles (XMM).
defm : SKLWriteResPair<WriteShuffleY, [SKLPort5], 1, [1], 1, 7>; // Vector shuffles (YMM/ZMM).
-defm : SKLWriteResPair<WriteVarShuffle, [SKLPort5], 1, [1], 1, 6>; // Vector shuffles.
+defm : SKLWriteResPair<WriteVarShuffle, [SKLPort5], 1, [1], 1, 5>; // Vector shuffles.
+defm : SKLWriteResPair<WriteVarShuffleX, [SKLPort5], 1, [1], 1, 6>; // Vector shuffles (XMM).
defm : SKLWriteResPair<WriteVarShuffleY, [SKLPort5], 1, [1], 1, 7>; // Vector shuffles (YMM/ZMM).
defm : SKLWriteResPair<WriteBlend, [SKLPort5], 1, [1], 1, 6>; // Vector blends.
defm : SKLWriteResPair<WriteBlendY, [SKLPort5], 1, [1], 1, 7>; // Vector blends (YMM/ZMM).
defm : SKLWriteResPair<WriteVarBlend, [SKLPort015], 2, [2], 2, 6>; // Vector variable blends.
defm : SKLWriteResPair<WriteVarBlendY, [SKLPort015], 2, [2], 2, 6>; // Vector variable blends (YMM/ZMM).
defm : SKLWriteResPair<WriteMPSAD, [SKLPort5], 4, [2], 2, 6>; // Vector MPSAD.
-defm : SKLWriteResPair<WriteMPSADY, [SKLPort5], 4, [2], 2, 7>; // Vector MPSAD.
-defm : SKLWriteResPair<WritePSADBW, [SKLPort5], 3, [1], 1, 6>; // Vector PSADBW.
-defm : SKLWriteResPair<WritePSADBWY, [SKLPort5], 3, [1], 1, 7>; // Vector PSADBW.
+defm : SKLWriteResPair<WriteMPSADY, [SKLPort5], 4, [2], 2, 7>; // Vector MPSAD (YMM/ZMM).
+defm : SKLWriteResPair<WritePSADBW, [SKLPort5], 3, [1], 1, 5>; // Vector PSADBW.
+defm : SKLWriteResPair<WritePSADBWX, [SKLPort5], 3, [1], 1, 6>; // Vector PSADBW (XMM).
+defm : SKLWriteResPair<WritePSADBWY, [SKLPort5], 3, [1], 1, 7>; // Vector PSADBW (YMM/ZMM).
defm : SKLWriteResPair<WritePHMINPOS, [SKLPort01], 4, [1], 1, 6>; // Vector PHMINPOS.
// Vector integer shifts.
@@ -450,7 +455,8 @@ def : WriteRes<WriteNop, []>;
defm : SKLWriteResPair<WriteFHAdd, [SKLPort5,SKLPort01], 6, [2,1], 3, 6>;
defm : SKLWriteResPair<WriteFHAddY, [SKLPort5,SKLPort01], 6, [2,1], 3, 7>;
-defm : SKLWriteResPair<WritePHAdd, [SKLPort5,SKLPort015], 3, [2,1], 3, 6>;
+defm : SKLWriteResPair<WritePHAdd, [SKLPort5,SKLPort05], 3, [2,1], 3, 5>;
+defm : SKLWriteResPair<WritePHAddX, [SKLPort5,SKLPort015], 3, [2,1], 3, 6>;
defm : SKLWriteResPair<WritePHAddY, [SKLPort5,SKLPort015], 3, [2,1], 3, 7>;
// Remaining instrs.
@@ -497,15 +503,7 @@ def SKLWriteResGroup6 : SchedWriteRes<[S
let ResourceCycles = [1];
}
def: InstRW<[SKLWriteResGroup6], (instrs FINCSTP, FNOP)>;
-def: InstRW<[SKLWriteResGroup6], (instregex "MMX_MOVQ64rr",
- "MMX_PABS(B|D|W)rr",
- "MMX_PADD(B|D|Q|W)irr",
- "MMX_PANDNirr",
- "MMX_PANDirr",
- "MMX_PORirr",
- "MMX_PSIGN(B|D|W)rr",
- "MMX_PSUB(B|D|Q|W)irr",
- "MMX_PXORirr")>;
+def: InstRW<[SKLWriteResGroup6], (instregex "MMX_MOVQ64rr")>;
def SKLWriteResGroup7 : SchedWriteRes<[SKLPort06]> {
let Latency = 1;
@@ -768,13 +766,6 @@ def SKLWriteResGroup36 : SchedWriteRes<[
def: InstRW<[SKLWriteResGroup36], (instregex "(V?)PHADDSW(Y?)rr",
"(V?)PHSUBSW(Y?)rr")>;
-def SKLWriteResGroup37 : SchedWriteRes<[SKLPort5,SKLPort05]> {
- let Latency = 3;
- let NumMicroOps = 3;
- let ResourceCycles = [2,1];
-}
-def: InstRW<[SKLWriteResGroup37], (instregex "MMX_PH(ADD|SUB)(D|W)rr")>;
-
def SKLWriteResGroup39 : SchedWriteRes<[SKLPort5,SKLPort0156]> {
let Latency = 3;
let NumMicroOps = 3;
@@ -1037,20 +1028,6 @@ def SKLWriteResGroup72 : SchedWriteRes<[
def: InstRW<[SKLWriteResGroup72], (instregex "FARJMP64",
"JMP(16|32|64)m")>;
-def SKLWriteResGroup73 : SchedWriteRes<[SKLPort23,SKLPort05]> {
- let Latency = 6;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[SKLWriteResGroup73], (instregex "MMX_PABS(B|D|W)rm",
- "MMX_PADD(B|D|Q|W)irm",
- "MMX_PANDNirm",
- "MMX_PANDirm",
- "MMX_PORirm",
- "MMX_PSIGN(B|D|W)rm",
- "MMX_PSUB(B|D|Q|W)irm",
- "MMX_PXORirm")>;
-
def SKLWriteResGroup74 : SchedWriteRes<[SKLPort23,SKLPort06]> {
let Latency = 6;
let NumMicroOps = 2;
@@ -1165,35 +1142,16 @@ def SKLWriteResGroup86 : SchedWriteRes<[
def: InstRW<[SKLWriteResGroup86], (instregex "VCVTDQ2PDYrr")>;
def SKLWriteResGroup88 : SchedWriteRes<[SKLPort5,SKLPort23]> {
- let Latency = 7;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[SKLWriteResGroup88], (instregex "(V?)PACKSSDWrm",
- "(V?)PACKSSWBrm",
- "(V?)PACKUSDWrm",
- "(V?)PACKUSWBrm",
- "(V?)PALIGNRrmi",
- "VPBROADCASTBrm",
- "VPBROADCASTWrm",
- "(V?)PSHUFDmi",
- "(V?)PSHUFHWmi",
- "(V?)PSHUFLWmi",
- "(V?)PUNPCKHBWrm",
- "(V?)PUNPCKHDQrm",
- "(V?)PUNPCKHQDQrm",
- "(V?)PUNPCKHWDrm",
- "(V?)PUNPCKLBWrm",
- "(V?)PUNPCKLDQrm",
- "(V?)PUNPCKLQDQrm",
- "(V?)PUNPCKLWDrm")>;
-
-def SKLWriteResGroup88a : SchedWriteRes<[SKLPort5,SKLPort23]> {
let Latency = 6;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SKLWriteResGroup88a], (instregex "MMX_PSHUFBrm")>;
+def: InstRW<[SKLWriteResGroup88], (instregex "(V?)PMOV(SX|ZX)BDrm",
+ "(V?)PMOV(SX|ZX)BQrm",
+ "(V?)PMOV(SX|ZX)BWrm",
+ "(V?)PMOV(SX|ZX)DQrm",
+ "(V?)PMOV(SX|ZX)WDrm",
+ "(V?)PMOV(SX|ZX)WQrm")>;
def SKLWriteResGroup89 : SchedWriteRes<[SKLPort5,SKLPort01]> {
let Latency = 7;
@@ -1326,7 +1284,6 @@ def: InstRW<[SKLWriteResGroup108], (inst
"FCOM64m",
"FCOMP32m",
"FCOMP64m",
- "MMX_PSADBWirm", // TODO - SKLWriteResGroup120??
"VPBROADCASTBYrm",
"VPBROADCASTWYrm",
"VPMOVSXBDYrm",
@@ -1349,13 +1306,6 @@ def SKLWriteResGroup112 : SchedWriteRes<
}
def: InstRW<[SKLWriteResGroup112], (instregex "MMX_PH(ADD|SUB)SWrm")>;
-def SKLWriteResGroup113 : SchedWriteRes<[SKLPort5,SKLPort23,SKLPort05]> {
- let Latency = 8;
- let NumMicroOps = 4;
- let ResourceCycles = [2,1,1];
-}
-def: InstRW<[SKLWriteResGroup113], (instregex "MMX_PH(ADD|SUB)(D|W)rm")>;
-
def SKLWriteResGroup114 : SchedWriteRes<[SKLPort4,SKLPort5,SKLPort237,SKLPort01]> {
let Latency = 8;
let NumMicroOps = 4;
Modified: llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td?rev=331999&r1=331998&r2=331999&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td Thu May 10 10:06:09 2018
@@ -256,9 +256,11 @@ defm : X86WriteRes<WriteVecMaskedStore,
defm : X86WriteRes<WriteVecMaskedStoreY, [SKXPort237,SKXPort0], 2, [1,1], 2>;
defm : X86WriteRes<WriteVecMove, [SKXPort015], 1, [1], 1>;
-defm : SKXWriteResPair<WriteVecALU, [SKXPort01], 1, [1], 1, 6>; // Vector integer ALU op, no logicals.
+defm : SKXWriteResPair<WriteVecALU, [SKXPort05], 1, [1], 1, 5>; // Vector integer ALU op, no logicals.
+defm : SKXWriteResPair<WriteVecALUX, [SKXPort01], 1, [1], 1, 6>; // Vector integer ALU op, no logicals (XMM).
defm : SKXWriteResPair<WriteVecALUY, [SKXPort01], 1, [1], 1, 7>; // Vector integer ALU op, no logicals (YMM/ZMM).
-defm : SKXWriteResPair<WriteVecLogic, [SKXPort015], 1, [1], 1, 6>; // Vector integer and/or/xor.
+defm : SKXWriteResPair<WriteVecLogic, [SKXPort05], 1, [1], 1, 5>; // Vector integer and/or/xor.
+defm : SKXWriteResPair<WriteVecLogicX,[SKXPort015], 1, [1], 1, 6>; // Vector integer and/or/xor (XMM).
defm : SKXWriteResPair<WriteVecLogicY,[SKXPort015], 1, [1], 1, 7>; // Vector integer and/or/xor (YMM/ZMM).
defm : SKXWriteResPair<WriteVecTest, [SKXPort0,SKXPort5], 3, [1,1], 2, 6>; // Vector integer TEST instructions.
defm : SKXWriteResPair<WriteVecTestY, [SKXPort0,SKXPort5], 3, [1,1], 2, 7>; // Vector integer TEST instructions (YMM/ZMM).
@@ -268,8 +270,10 @@ defm : SKXWriteResPair<WriteVecIMulY, [S
defm : SKXWriteResPair<WritePMULLD, [SKXPort015], 10, [2], 2, 6>; // Vector PMULLD.
defm : SKXWriteResPair<WritePMULLDY, [SKXPort015], 10, [2], 2, 7>; // Vector PMULLD (YMM/ZMM).
defm : SKXWriteResPair<WriteShuffle, [SKXPort5], 1, [1], 1, 5>; // Vector shuffles.
+defm : SKXWriteResPair<WriteShuffleX, [SKXPort5], 1, [1], 1, 6>; // Vector shuffles (XMM).
defm : SKXWriteResPair<WriteShuffleY, [SKXPort5], 1, [1], 1, 7>; // Vector shuffles (YMM/ZMM).
-defm : SKXWriteResPair<WriteVarShuffle, [SKXPort5], 1, [1], 1, 6>; // Vector variable shuffles.
+defm : SKXWriteResPair<WriteVarShuffle, [SKXPort5], 1, [1], 1, 5>; // Vector variable shuffles.
+defm : SKXWriteResPair<WriteVarShuffleX, [SKXPort5], 1, [1], 1, 6>; // Vector variable shuffles (XMM).
defm : SKXWriteResPair<WriteVarShuffleY, [SKXPort5], 1, [1], 1, 7>; // Vector variable shuffles (YMM/ZMM).
defm : SKXWriteResPair<WriteBlend, [SKXPort5], 1, [1], 1, 6>; // Vector blends.
defm : SKXWriteResPair<WriteBlendY,[SKXPort5], 1, [1], 1, 7>; // Vector blends (YMM/ZMM).
@@ -277,7 +281,8 @@ defm : SKXWriteResPair<WriteVarBlend, [S
defm : SKXWriteResPair<WriteVarBlendY,[SKXPort015], 2, [2], 2, 6>; // Vector variable blends (YMM/ZMM).
defm : SKXWriteResPair<WriteMPSAD, [SKXPort5], 4, [2], 2, 6>; // Vector MPSAD.
defm : SKXWriteResPair<WriteMPSADY, [SKXPort5], 4, [2], 2, 7>; // Vector MPSAD.
-defm : SKXWriteResPair<WritePSADBW, [SKXPort5], 3, [1], 1, 6>; // Vector PSADBW.
+defm : SKXWriteResPair<WritePSADBW, [SKXPort5], 3, [1], 1, 5>; // Vector PSADBW.
+defm : SKXWriteResPair<WritePSADBWX, [SKXPort5], 3, [1], 1, 6>; // Vector PSADBW.
defm : SKXWriteResPair<WritePSADBWY, [SKXPort5], 3, [1], 1, 7>; // Vector PSADBW.
defm : SKXWriteResPair<WritePHMINPOS, [SKXPort015], 4, [1], 1, 6>; // Vector PHMINPOS.
@@ -450,7 +455,8 @@ def : WriteRes<WriteNop, []>;
defm : SKXWriteResPair<WriteFHAdd, [SKXPort5,SKXPort015], 6, [2,1], 3, 6>;
defm : SKXWriteResPair<WriteFHAddY, [SKXPort5,SKXPort015], 6, [2,1], 3, 7>;
-defm : SKXWriteResPair<WritePHAdd, [SKXPort5,SKXPort015], 3, [2,1], 3, 6>;
+defm : SKXWriteResPair<WritePHAdd, [SKXPort5,SKXPort05], 3, [2,1], 3, 5>;
+defm : SKXWriteResPair<WritePHAddX, [SKXPort5,SKXPort015], 3, [2,1], 3, 6>;
defm : SKXWriteResPair<WritePHAddY, [SKXPort5,SKXPort015], 3, [2,1], 3, 7>;
// Remaining instrs.
@@ -511,15 +517,7 @@ def SKXWriteResGroup6 : SchedWriteRes<[S
let ResourceCycles = [1];
}
def: InstRW<[SKXWriteResGroup6], (instrs FINCSTP, FNOP)>;
-def: InstRW<[SKXWriteResGroup6], (instregex "MMX_MOVQ64rr",
- "MMX_PABS(B|D|W)rr",
- "MMX_PADD(B|D|Q|W)irr",
- "MMX_PANDNirr",
- "MMX_PANDirr",
- "MMX_PORirr",
- "MMX_PSIGN(B|D|W)rr",
- "MMX_PSUB(B|D|Q|W)irr",
- "MMX_PXORirr")>;
+def: InstRW<[SKXWriteResGroup6], (instregex "MMX_MOVQ64rr")>;
def SKXWriteResGroup7 : SchedWriteRes<[SKXPort06]> {
let Latency = 1;
@@ -601,7 +599,6 @@ def: InstRW<[SKXWriteResGroup11], (instr
"MMX_MOVD64mr",
"MMX_MOVNTQmr",
"MMX_MOVQ64mr",
- "MOVNTDQmr",
"MOVNTI_64mr",
"MOVNTImr",
"ST_FP32m",
@@ -847,13 +844,6 @@ def SKXWriteResGroup38 : SchedWriteRes<[
}
def: InstRW<[SKXWriteResGroup38], (instregex "(V?)PH(ADD|SUB)SW(Y?)rr")>;
-def SKXWriteResGroup39 : SchedWriteRes<[SKXPort5,SKXPort05]> {
- let Latency = 3;
- let NumMicroOps = 3;
- let ResourceCycles = [2,1];
-}
-def: InstRW<[SKXWriteResGroup39], (instregex "MMX_PH(ADD|SUB)(D|W)rr")>;
-
def SKXWriteResGroup41 : SchedWriteRes<[SKXPort5,SKXPort0156]> {
let Latency = 3;
let NumMicroOps = 3;
@@ -1250,20 +1240,6 @@ def SKXWriteResGroup76 : SchedWriteRes<[
def: InstRW<[SKXWriteResGroup76], (instregex "FARJMP64",
"JMP(16|32|64)m")>;
-def SKXWriteResGroup77 : SchedWriteRes<[SKXPort23,SKXPort05]> {
- let Latency = 6;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[SKXWriteResGroup77], (instregex "MMX_PABS(B|D|W)rm",
- "MMX_PADD(B|D|Q|W)irm",
- "MMX_PANDNirm",
- "MMX_PANDirm",
- "MMX_PORirm",
- "MMX_PSIGN(B|D|W)rm",
- "MMX_PSUB(B|D|Q|W)irm",
- "MMX_PXORirm")>;
-
def SKXWriteResGroup78 : SchedWriteRes<[SKXPort23,SKXPort06]> {
let Latency = 6;
let NumMicroOps = 2;
@@ -1394,52 +1370,19 @@ def SKXWriteResGroup92 : SchedWriteRes<[
let ResourceCycles = [1,1];
}
def: InstRW<[SKXWriteResGroup92], (instregex "VMOVSDZrm(b?)",
- "VMOVSSZrm(b?)",
- "VPACKSSDWZ128rm(b?)",
- "(V?)PACKSSDWrm",
- "VPACKSSWBZ128rm(b?)",
- "(V?)PACKSSWBrm",
- "VPACKUSDWZ128rm(b?)",
- "(V?)PACKUSDWrm",
- "VPACKUSWBZ128rm(b?)",
- "(V?)PACKUSWBrm",
- "VPALIGNRZ128rmi(b?)",
- "(V?)PALIGNRrmi",
- "VPBROADCASTBZ128m(b?)",
- "VPBROADCASTBrm",
- "VPBROADCASTWZ128m(b?)",
- "VPBROADCASTWrm",
- "VPSHUFDZ128m(b?)i",
- "(V?)PSHUFDmi",
- "VPSHUFHWZ128mi(b?)",
- "(V?)PSHUFHWmi",
- "VPSHUFLWZ128mi(b?)",
- "(V?)PSHUFLWmi",
- "VPSLLDQZ128rm(b?)",
- "VPSRLDQZ128rm(b?)",
- "VPUNPCKHBWZ128rm(b?)",
- "(V?)PUNPCKHBWrm",
- "VPUNPCKHDQZ128rm(b?)",
- "(V?)PUNPCKHDQrm",
- "VPUNPCKHQDQZ128rm(b?)",
- "(V?)PUNPCKHQDQrm",
- "VPUNPCKHWDZ128rm(b?)",
- "(V?)PUNPCKHWDrm",
- "VPUNPCKLBWZ128rm(b?)",
- "(V?)PUNPCKLBWrm",
- "VPUNPCKLDQZ128rm(b?)",
- "(V?)PUNPCKLDQrm",
- "VPUNPCKLQDQZ128rm(b?)",
- "(V?)PUNPCKLQDQrm",
- "VPUNPCKLWDZ128rm(b?)",
- "(V?)PUNPCKLWDrm")>;
+ "VMOVSSZrm(b?)")>;
def SKXWriteResGroup92a : SchedWriteRes<[SKXPort5,SKXPort23]> {
let Latency = 6;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SKXWriteResGroup92a], (instregex "MMX_PSHUFBrm")>;
+def: InstRW<[SKXWriteResGroup92a], (instregex "(V?)PMOV(SX|ZX)BDrm",
+ "(V?)PMOV(SX|ZX)BQrm",
+ "(V?)PMOV(SX|ZX)BWrm",
+ "(V?)PMOV(SX|ZX)DQrm",
+ "(V?)PMOV(SX|ZX)WDrm",
+ "(V?)PMOV(SX|ZX)WQrm")>;
def SKXWriteResGroup93 : SchedWriteRes<[SKXPort5,SKXPort015]> {
let Latency = 7;
@@ -1676,7 +1619,6 @@ def SKXWriteResGroup119 : SchedWriteRes<
}
def: InstRW<[SKXWriteResGroup119], (instregex "FCOM(P?)32m",
"FCOM(P?)64m",
- "MMX_PSADBWirm",
"VFPCLASSSDrm(b?)",
"VPBROADCASTBYrm",
"VPBROADCASTB(Z|Z256)m(b?)",
@@ -1751,13 +1693,6 @@ def SKXWriteResGroup123 : SchedWriteRes<
}
def: InstRW<[SKXWriteResGroup123], (instregex "MMX_PH(ADD|SUB)SWrm")>;
-def SKXWriteResGroup124 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort05]> {
- let Latency = 8;
- let NumMicroOps = 4;
- let ResourceCycles = [2,1,1];
-}
-def: InstRW<[SKXWriteResGroup124], (instregex "MMX_PH(ADD|SUB)(D|W)rm")>;
-
def SKXWriteResGroup125 : SchedWriteRes<[SKXPort4,SKXPort5,SKXPort237,SKXPort015]> {
let Latency = 8;
let NumMicroOps = 4;
Modified: llvm/trunk/lib/Target/X86/X86Schedule.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Schedule.td?rev=331999&r1=331998&r2=331999&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86Schedule.td (original)
+++ llvm/trunk/lib/Target/X86/X86Schedule.td Thu May 10 10:06:09 2018
@@ -194,6 +194,7 @@ class FMASC { X86FoldableSchedWrite Sche
defm WriteFHAdd : X86SchedWritePair;
defm WriteFHAddY : X86SchedWritePair; // YMM/ZMM.
defm WritePHAdd : X86SchedWritePair;
+defm WritePHAddX : X86SchedWritePair; // XMM.
defm WritePHAddY : X86SchedWritePair; // YMM/ZMM.
// Vector integer operations.
@@ -205,10 +206,12 @@ def WriteVecMaskedStore : SchedWrite;
def WriteVecMaskedStoreY : SchedWrite;
def WriteVecMove : SchedWrite;
-defm WriteVecALU : X86SchedWritePair; // Vector integer ALU op, no logicals.
-defm WriteVecALUY : X86SchedWritePair; // Vector integer ALU op, no logicals (YMM/ZMM).
-defm WriteVecLogic : X86SchedWritePair; // Vector integer and/or/xor logicals.
-defm WriteVecLogicY: X86SchedWritePair; // Vector integer and/or/xor logicals (YMM/ZMM).
+defm WriteVecALU : X86SchedWritePair; // Vector integer ALU op, no logicals.
+defm WriteVecALUX : X86SchedWritePair; // Vector integer ALU op, no logicals (XMM).
+defm WriteVecALUY : X86SchedWritePair; // Vector integer ALU op, no logicals (YMM/ZMM).
+defm WriteVecLogic : X86SchedWritePair; // Vector integer and/or/xor logicals.
+defm WriteVecLogicX : X86SchedWritePair; // Vector integer and/or/xor logicals (XMM).
+defm WriteVecLogicY : X86SchedWritePair; // Vector integer and/or/xor logicals (YMM/ZMM).
defm WriteVecTest : X86SchedWritePair; // Vector integer TEST instructions.
defm WriteVecTestY : X86SchedWritePair; // Vector integer TEST instructions (YMM/ZMM).
defm WriteVecShift : X86SchedWritePair; // Vector integer shifts (default).
@@ -223,14 +226,17 @@ defm WriteVecIMulY : X86SchedWritePair;
defm WritePMULLD : X86SchedWritePair; // Vector PMULLD.
defm WritePMULLDY : X86SchedWritePair; // Vector PMULLD (YMM/ZMM).
defm WriteShuffle : X86SchedWritePair; // Vector shuffles.
+defm WriteShuffleX : X86SchedWritePair; // Vector shuffles (XMM).
defm WriteShuffleY : X86SchedWritePair; // Vector shuffles (YMM/ZMM).
defm WriteVarShuffle : X86SchedWritePair; // Vector variable shuffles.
+defm WriteVarShuffleX : X86SchedWritePair; // Vector variable shuffles (XMM).
defm WriteVarShuffleY : X86SchedWritePair; // Vector variable shuffles (YMM/ZMM).
defm WriteBlend : X86SchedWritePair; // Vector blends.
defm WriteBlendY : X86SchedWritePair; // Vector blends (YMM/ZMM).
defm WriteVarBlend : X86SchedWritePair; // Vector variable blends.
defm WriteVarBlendY : X86SchedWritePair; // Vector variable blends (YMM/ZMM).
defm WritePSADBW : X86SchedWritePair; // Vector PSADBW.
+defm WritePSADBWX : X86SchedWritePair; // Vector PSADBW (XMM).
defm WritePSADBWY : X86SchedWritePair; // Vector PSADBW (YMM/ZMM).
defm WriteMPSAD : X86SchedWritePair; // Vector MPSAD.
defm WriteMPSADY : X86SchedWritePair; // Vector MPSAD (YMM/ZMM).
@@ -356,11 +362,11 @@ def SchedWriteFVarBlend
WriteFVarBlendY, WriteFVarBlendY>;
def SchedWriteVecALU
- : X86SchedWriteWidths<WriteVecALU, WriteVecALU, WriteVecALUY, WriteVecALUY>;
+ : X86SchedWriteWidths<WriteVecALU, WriteVecALUX, WriteVecALUY, WriteVecALUY>;
def SchedWritePHAdd
- : X86SchedWriteWidths<WritePHAdd, WritePHAdd, WritePHAddY, WritePHAddY>;
+ : X86SchedWriteWidths<WritePHAdd, WritePHAddX, WritePHAddY, WritePHAddY>;
def SchedWriteVecLogic
- : X86SchedWriteWidths<WriteVecLogic, WriteVecLogic,
+ : X86SchedWriteWidths<WriteVecLogic, WriteVecLogicX,
WriteVecLogicY, WriteVecLogicY>;
def SchedWriteVecTest
: X86SchedWriteWidths<WriteVecTest, WriteVecTest,
@@ -384,14 +390,14 @@ def SchedWriteMPSAD
: X86SchedWriteWidths<WriteMPSAD, WriteMPSAD,
WriteMPSADY, WriteMPSADY>;
def SchedWritePSADBW
- : X86SchedWriteWidths<WritePSADBW, WritePSADBW,
+ : X86SchedWriteWidths<WritePSADBW, WritePSADBWX,
WritePSADBWY, WritePSADBWY>;
def SchedWriteShuffle
- : X86SchedWriteWidths<WriteShuffle, WriteShuffle,
+ : X86SchedWriteWidths<WriteShuffle, WriteShuffleX,
WriteShuffleY, WriteShuffleY>;
def SchedWriteVarShuffle
- : X86SchedWriteWidths<WriteVarShuffle, WriteVarShuffle,
+ : X86SchedWriteWidths<WriteVarShuffle, WriteVarShuffleX,
WriteVarShuffleY, WriteVarShuffleY>;
def SchedWriteBlend
: X86SchedWriteWidths<WriteBlend, WriteBlend, WriteBlendY, WriteBlendY>;
Modified: llvm/trunk/lib/Target/X86/X86ScheduleAtom.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleAtom.td?rev=331999&r1=331998&r2=331999&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleAtom.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleAtom.td Thu May 10 10:06:09 2018
@@ -281,8 +281,10 @@ def : WriteRes<WriteVecMaskedStoreY, [A
def : WriteRes<WriteVecMove, [AtomPort01]>;
defm : AtomWriteResPair<WriteVecALU, [AtomPort01], [AtomPort0], 1, 1>;
+defm : AtomWriteResPair<WriteVecALUX, [AtomPort01], [AtomPort0], 1, 1>;
defm : AtomWriteResPair<WriteVecALUY, [AtomPort01], [AtomPort0], 1, 1>;
defm : AtomWriteResPair<WriteVecLogic, [AtomPort01], [AtomPort0], 1, 1>;
+defm : AtomWriteResPair<WriteVecLogicX, [AtomPort01], [AtomPort0], 1, 1>;
defm : AtomWriteResPair<WriteVecLogicY, [AtomPort01], [AtomPort0], 1, 1>;
defm : AtomWriteResPair<WriteVecTest, [AtomPort01], [AtomPort0], 1, 1>;
defm : AtomWriteResPair<WriteVecTestY, [AtomPort01], [AtomPort0], 1, 1>;
@@ -300,11 +302,14 @@ defm : AtomWriteResPair<WritePMULLDY,
defm : AtomWriteResPair<WritePHMINPOS, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : AtomWriteResPair<WriteMPSAD, [AtomPort01], [AtomPort0], 1, 1>;
defm : AtomWriteResPair<WriteMPSADY, [AtomPort01], [AtomPort0], 1, 1>;
-defm : AtomWriteResPair<WritePSADBW, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
+defm : AtomWriteResPair<WritePSADBW, [AtomPort01], [AtomPort01], 4, 4, [4], [4]>;
+defm : AtomWriteResPair<WritePSADBWX, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : AtomWriteResPair<WritePSADBWY, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : AtomWriteResPair<WriteShuffle, [AtomPort0], [AtomPort0], 1, 1>;
+defm : AtomWriteResPair<WriteShuffleX, [AtomPort0], [AtomPort0], 1, 1>;
defm : AtomWriteResPair<WriteShuffleY, [AtomPort0], [AtomPort0], 1, 1>;
-defm : AtomWriteResPair<WriteVarShuffle, [AtomPort01], [AtomPort01], 4, 5, [4], [5]>;
+defm : AtomWriteResPair<WriteVarShuffle, [AtomPort0], [AtomPort0], 1, 1>;
+defm : AtomWriteResPair<WriteVarShuffleX, [AtomPort01], [AtomPort01], 4, 5, [4], [5]>;
defm : AtomWriteResPair<WriteVarShuffleY, [AtomPort01], [AtomPort01], 4, 5, [4], [5]>;
defm : AtomWriteResPair<WriteBlend, [AtomPort0], [AtomPort0]>; // NOTE: Doesn't exist on Atom.
defm : AtomWriteResPair<WriteBlendY, [AtomPort0], [AtomPort0]>; // NOTE: Doesn't exist on Atom.
@@ -355,7 +360,8 @@ defm : AtomWriteResPair<WriteAESDecEnc,
defm : AtomWriteResPair<WriteFHAdd, [AtomPort01], [AtomPort01], 8, 9, [8], [9]>;
defm : AtomWriteResPair<WriteFHAddY, [AtomPort01], [AtomPort01], 8, 9, [8], [9]>;
-defm : AtomWriteResPair<WritePHAdd, [AtomPort01], [AtomPort01], 7, 8, [7], [8]>;
+defm : AtomWriteResPair<WritePHAdd, [AtomPort01], [AtomPort01], 3, 4, [3], [4]>;
+defm : AtomWriteResPair<WritePHAddX, [AtomPort01], [AtomPort01], 7, 8, [7], [8]>;
defm : AtomWriteResPair<WritePHAddY, [AtomPort01], [AtomPort01], 7, 8, [7], [8]>;
////////////////////////////////////////////////////////////////////////////////
@@ -387,7 +393,6 @@ def : InstRW<[AtomWrite0_1], (instrs FXA
MOVSX64rr32,
MMX_MOVD64rr,
MMX_MOVD64to64rr,
- MMX_PSHUFBrr, MMX_PSHUFBrm,
MOVDI2PDIrr,
MOVDI2SSrr,
MOV64toPQIrr,
@@ -492,7 +497,7 @@ def : InstRW<[AtomWrite01_3], (instrs CL
POP16rmm, POP32rmm, POP64rmm)>;
def : InstRW<[AtomWrite01_3], (instregex "XADD(8|16|32|64)rm",
"XCHG(8|16|32|64)rm",
- "(MMX_)?PH(ADD|SUB)Drr",
+ "PH(ADD|SUB)Drr",
"MOV(S|Z)X16rm8",
"MMX_P(ADD|SUB)Qirm",
"MOV(UPS|UPD|DQU)rm",
@@ -506,9 +511,8 @@ def : InstRW<[AtomWrite01_4], (instrs CB
JCXZ, JECXZ, JRCXZ,
SHLD32mrCL, SHRD32mrCL,
SHLD32mri8, SHRD32mri8,
- LD_F80m,
- MMX_PSADBWirr, MMX_PSADBWirm)>;
-def : InstRW<[AtomWrite01_4], (instregex "(MMX_)?PH(ADD|SUB)Drm",
+ LD_F80m)>;
+def : InstRW<[AtomWrite01_4], (instregex "PH(ADD|SUB)Drm",
"(MMX_)?PEXTRWrr(_REV)?")>;
def AtomWrite01_5 : SchedWriteRes<[AtomPort01]> {
Modified: llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td?rev=331999&r1=331998&r2=331999&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td Thu May 10 10:06:09 2018
@@ -407,6 +407,7 @@ defm : X86WriteRes<WriteVecMaskedStoreY,
def : WriteRes<WriteVecMove, [JFPU01, JVALU]>;
defm : JWriteResFpuPair<WriteVecALU, [JFPU01, JVALU], 1>;
+defm : JWriteResFpuPair<WriteVecALUX, [JFPU01, JVALU], 1>;
defm : JWriteResFpuPair<WriteVecALUY, [JFPU01, JVALU], 1>;
defm : JWriteResFpuPair<WriteVecShift, [JFPU01, JVALU], 1>;
defm : JWriteResFpuPair<WriteVecShiftX, [JFPU01, JVALU], 1>;
@@ -422,17 +423,21 @@ defm : JWriteResFpuPair<WritePMULLDY,
defm : JWriteResFpuPair<WriteMPSAD, [JFPU0, JVIMUL], 3, [1, 2]>;
defm : JWriteResFpuPair<WriteMPSADY, [JFPU0, JVIMUL], 3, [1, 2]>;
defm : JWriteResFpuPair<WritePSADBW, [JFPU01, JVALU], 2>;
+defm : JWriteResFpuPair<WritePSADBWX, [JFPU01, JVALU], 2>;
defm : JWriteResFpuPair<WritePSADBWY, [JFPU01, JVALU], 2>;
defm : JWriteResFpuPair<WritePHMINPOS, [JFPU0, JVALU], 2>;
defm : JWriteResFpuPair<WriteShuffle, [JFPU01, JVALU], 1>;
+defm : JWriteResFpuPair<WriteShuffleX, [JFPU01, JVALU], 1>;
defm : JWriteResFpuPair<WriteShuffleY, [JFPU01, JVALU], 1>;
defm : JWriteResFpuPair<WriteVarShuffle, [JFPU01, JVALU], 2, [1, 4], 3>;
+defm : JWriteResFpuPair<WriteVarShuffleX, [JFPU01, JVALU], 2, [1, 4], 3>;
defm : JWriteResFpuPair<WriteVarShuffleY, [JFPU01, JVALU], 2, [1, 4], 3>;
defm : JWriteResFpuPair<WriteBlend, [JFPU01, JVALU], 1>;
defm : JWriteResFpuPair<WriteBlendY, [JFPU01, JVALU], 1>;
defm : JWriteResFpuPair<WriteVarBlend, [JFPU01, JVALU], 2, [1, 4], 3>;
defm : JWriteResFpuPair<WriteVarBlendY, [JFPU01, JVALU], 2, [1, 4], 3>;
defm : JWriteResFpuPair<WriteVecLogic, [JFPU01, JVALU], 1>;
+defm : JWriteResFpuPair<WriteVecLogicX, [JFPU01, JVALU], 1>;
defm : JWriteResFpuPair<WriteVecLogicY, [JFPU01, JVALU], 1>; // NOTE: Doesn't exist on Jaguar.
defm : JWriteResFpuPair<WriteVecTest, [JFPU0, JFPA, JALU0], 3>;
defm : JWriteResYMMPair<WriteVecTestY , [JFPU01, JFPX, JFPA, JALU0], 4, [2, 2, 2, 1], 3>;
@@ -482,6 +487,7 @@ defm : JWriteResFpuPair<WriteAESDecEnc,
defm : JWriteResFpuPair<WriteFHAdd, [JFPU0, JFPA], 3>;
defm : JWriteResYMMPair<WriteFHAddY, [JFPU0, JFPA], 3, [2,2], 2>;
defm : JWriteResFpuPair<WritePHAdd, [JFPU01, JVALU], 1>;
+defm : JWriteResFpuPair<WritePHAddX, [JFPU01, JVALU], 1>;
defm : JWriteResFpuPair<WritePHAddY, [JFPU01, JVALU], 1>;
////////////////////////////////////////////////////////////////////////////////
Modified: llvm/trunk/lib/Target/X86/X86ScheduleSLM.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleSLM.td?rev=331999&r1=331998&r2=331999&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleSLM.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleSLM.td Thu May 10 10:06:09 2018
@@ -219,10 +219,12 @@ defm : SLMWriteResPair<WriteVecShiftImm,
defm : SLMWriteResPair<WriteVecShiftImmX,[SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteVecShiftImmY,[SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteVecLogic, [SLM_FPC_RSV01], 1>;
+defm : SLMWriteResPair<WriteVecLogicX,[SLM_FPC_RSV01], 1>;
defm : SLMWriteResPair<WriteVecLogicY,[SLM_FPC_RSV01], 1>;
defm : SLMWriteResPair<WriteVecTest, [SLM_FPC_RSV01], 1>;
defm : SLMWriteResPair<WriteVecTestY, [SLM_FPC_RSV01], 1>;
defm : SLMWriteResPair<WriteVecALU, [SLM_FPC_RSV01], 1>;
+defm : SLMWriteResPair<WriteVecALUX, [SLM_FPC_RSV01], 1>;
defm : SLMWriteResPair<WriteVecALUY, [SLM_FPC_RSV01], 1>;
defm : SLMWriteResPair<WriteVecIMul, [SLM_FPC_RSV0], 4>;
defm : SLMWriteResPair<WriteVecIMulX, [SLM_FPC_RSV0], 4>;
@@ -233,13 +235,16 @@ defm : SLMWriteResPair<WritePMULLD, [SL
defm : SLMWriteResPair<WritePMULLDY, [SLM_FPC_RSV0], 4>;
defm : SLMWriteResPair<WriteShuffle, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteShuffleY, [SLM_FPC_RSV0], 1>;
+defm : SLMWriteResPair<WriteShuffleX, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteVarShuffle, [SLM_FPC_RSV0], 1>;
+defm : SLMWriteResPair<WriteVarShuffleX, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteVarShuffleY, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteBlend, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteBlendY, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteMPSAD, [SLM_FPC_RSV0], 7>;
defm : SLMWriteResPair<WriteMPSADY, [SLM_FPC_RSV0], 7>;
defm : SLMWriteResPair<WritePSADBW, [SLM_FPC_RSV0], 4>;
+defm : SLMWriteResPair<WritePSADBWX, [SLM_FPC_RSV0], 4>;
defm : SLMWriteResPair<WritePSADBWY, [SLM_FPC_RSV0], 4>;
defm : SLMWriteResPair<WritePHMINPOS, [SLM_FPC_RSV0], 4>;
@@ -260,6 +265,7 @@ def : WriteRes<WriteVecExtractSt, [SLM_
defm : SLMWriteResPair<WriteFHAdd, [SLM_FPC_RSV01], 3, [2]>;
defm : SLMWriteResPair<WriteFHAddY, [SLM_FPC_RSV01], 3, [2]>;
defm : SLMWriteResPair<WritePHAdd, [SLM_FPC_RSV01], 1>;
+defm : SLMWriteResPair<WritePHAddX, [SLM_FPC_RSV01], 1>;
defm : SLMWriteResPair<WritePHAddY, [SLM_FPC_RSV01], 1>;
// String instructions.
Modified: llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td?rev=331999&r1=331998&r2=331999&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td Thu May 10 10:06:09 2018
@@ -281,10 +281,12 @@ defm : ZnWriteResFpuPair<WriteVecShiftIm
defm : ZnWriteResFpuPair<WriteVecShiftImmX, [ZnFPU], 1>;
defm : ZnWriteResFpuPair<WriteVecShiftImmY, [ZnFPU], 1>;
defm : ZnWriteResFpuPair<WriteVecLogic, [ZnFPU], 1>;
+defm : ZnWriteResFpuPair<WriteVecLogicX, [ZnFPU], 1>;
defm : ZnWriteResFpuPair<WriteVecLogicY, [ZnFPU], 1>;
defm : ZnWriteResFpuPair<WriteVecTest, [ZnFPU12], 1, [2], 1, 7, 1>;
defm : ZnWriteResFpuPair<WriteVecTestY, [ZnFPU12], 1, [2], 1, 7, 1>;
defm : ZnWriteResFpuPair<WriteVecALU, [ZnFPU], 1>;
+defm : ZnWriteResFpuPair<WriteVecALUX, [ZnFPU], 1>;
defm : ZnWriteResFpuPair<WriteVecALUY, [ZnFPU], 1>;
defm : ZnWriteResFpuPair<WriteVecIMul, [ZnFPU0], 4>;
defm : ZnWriteResFpuPair<WriteVecIMulX, [ZnFPU0], 4>;
@@ -292,14 +294,17 @@ defm : ZnWriteResFpuPair<WriteVecIMulY,
defm : ZnWriteResFpuPair<WritePMULLD, [ZnFPU0], 4>; // FIXME
defm : ZnWriteResFpuPair<WritePMULLDY, [ZnFPU0], 5, [2]>; // FIXME
defm : ZnWriteResFpuPair<WriteShuffle, [ZnFPU], 1>;
+defm : ZnWriteResFpuPair<WriteShuffleX, [ZnFPU], 1>;
defm : ZnWriteResFpuPair<WriteShuffleY, [ZnFPU], 1>;
defm : ZnWriteResFpuPair<WriteVarShuffle, [ZnFPU], 1>;
+defm : ZnWriteResFpuPair<WriteVarShuffleX,[ZnFPU], 1>;
defm : ZnWriteResFpuPair<WriteVarShuffleY,[ZnFPU], 1>;
defm : ZnWriteResFpuPair<WriteBlend, [ZnFPU01], 1>;
defm : ZnWriteResFpuPair<WriteBlendY, [ZnFPU01], 1>;
defm : ZnWriteResFpuPair<WriteShuffle256, [ZnFPU], 2>;
defm : ZnWriteResFpuPair<WriteVarShuffle256, [ZnFPU], 2>;
defm : ZnWriteResFpuPair<WritePSADBW, [ZnFPU0], 3>;
+defm : ZnWriteResFpuPair<WritePSADBWX, [ZnFPU0], 3>;
defm : ZnWriteResFpuPair<WritePSADBWY, [ZnFPU0], 3>;
defm : ZnWriteResFpuPair<WritePHMINPOS, [ZnFPU0], 4>;
@@ -1046,6 +1051,8 @@ def : InstRW<[WriteMicrocoded], (instreg
// PHADD|PHSUB (S) W/D.
def : SchedAlias<WritePHAdd, ZnWriteMicrocoded>;
def : SchedAlias<WritePHAddLd, ZnWriteMicrocoded>;
+def : SchedAlias<WritePHAddX, ZnWriteMicrocoded>;
+def : SchedAlias<WritePHAddXLd, ZnWriteMicrocoded>;
def : SchedAlias<WritePHAddY, ZnWriteMicrocoded>;
def : SchedAlias<WritePHAddYLd, ZnWriteMicrocoded>;
Modified: llvm/trunk/test/CodeGen/X86/3dnow-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/3dnow-schedule.ll?rev=331999&r1=331998&r2=331999&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/3dnow-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/3dnow-schedule.ll Thu May 10 10:06:09 2018
@@ -14,8 +14,8 @@ declare void @llvm.x86.mmx.femms() nounw
define i64 @test_pavgusb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
; CHECK-LABEL: test_pavgusb:
; CHECK: # %bb.0:
-; CHECK-NEXT: pavgusb %mm1, %mm0 # sched: [1:0.50]
-; CHECK-NEXT: pavgusb (%rdi), %mm0 # sched: [7:0.50]
+; CHECK-NEXT: pavgusb %mm1, %mm0 # sched: [3:1.00]
+; CHECK-NEXT: pavgusb (%rdi), %mm0 # sched: [8:1.00]
; CHECK-NEXT: movq %mm0, %rax # sched: [1:0.33]
; CHECK-NEXT: retq # sched: [1:1.00]
%1 = call x86_mmx @llvm.x86.3dnow.pavgusb(x86_mmx %a0, x86_mmx %a1)
Modified: llvm/trunk/test/CodeGen/X86/avx2-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx2-schedule.ll?rev=331999&r1=331998&r2=331999&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx2-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx2-schedule.ll Thu May 10 10:06:09 2018
@@ -1761,8 +1761,8 @@ define <16 x i16> @test_pblendw(<16 x i1
define <16 x i8> @test_pbroadcastb(<16 x i8> %a0, <16 x i8> *%a1) {
; GENERIC-LABEL: test_pbroadcastb:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpbroadcastb %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpbroadcastb (%rdi), %xmm1 # sched: [6:1.00]
+; GENERIC-NEXT: vpbroadcastb %xmm0, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT: vpbroadcastb (%rdi), %xmm1 # sched: [7:0.50]
; GENERIC-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -1811,7 +1811,7 @@ define <32 x i8> @test_pbroadcastb_ymm(<
; GENERIC-LABEL: test_pbroadcastb_ymm:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpbroadcastb %xmm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpbroadcastb (%rdi), %ymm1 # sched: [6:1.00]
+; GENERIC-NEXT: vpbroadcastb (%rdi), %ymm1 # sched: [7:0.50]
; GENERIC-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -1859,8 +1859,8 @@ define <32 x i8> @test_pbroadcastb_ymm(<
define <4 x i32> @test_pbroadcastd(<4 x i32> %a0, <4 x i32> *%a1) {
; GENERIC-LABEL: test_pbroadcastd:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpbroadcastd %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpbroadcastd (%rdi), %xmm1 # sched: [6:1.00]
+; GENERIC-NEXT: vpbroadcastd %xmm0, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT: vpbroadcastd (%rdi), %xmm1 # sched: [7:0.50]
; GENERIC-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -1909,7 +1909,7 @@ define <8 x i32> @test_pbroadcastd_ymm(<
; GENERIC-LABEL: test_pbroadcastd_ymm:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpbroadcastd %xmm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpbroadcastd (%rdi), %ymm1 # sched: [6:1.00]
+; GENERIC-NEXT: vpbroadcastd (%rdi), %ymm1 # sched: [7:0.50]
; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -1957,8 +1957,8 @@ define <8 x i32> @test_pbroadcastd_ymm(<
define <2 x i64> @test_pbroadcastq(<2 x i64> %a0, <2 x i64> *%a1) {
; GENERIC-LABEL: test_pbroadcastq:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpbroadcastq %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpbroadcastq (%rdi), %xmm1 # sched: [6:1.00]
+; GENERIC-NEXT: vpbroadcastq %xmm0, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT: vpbroadcastq (%rdi), %xmm1 # sched: [7:0.50]
; GENERIC-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -2007,7 +2007,7 @@ define <4 x i64> @test_pbroadcastq_ymm(<
; GENERIC-LABEL: test_pbroadcastq_ymm:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpbroadcastq %xmm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpbroadcastq (%rdi), %ymm1 # sched: [6:1.00]
+; GENERIC-NEXT: vpbroadcastq (%rdi), %ymm1 # sched: [7:0.50]
; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -2055,8 +2055,8 @@ define <4 x i64> @test_pbroadcastq_ymm(<
define <8 x i16> @test_pbroadcastw(<8 x i16> %a0, <8 x i16> *%a1) {
; GENERIC-LABEL: test_pbroadcastw:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpbroadcastw %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpbroadcastw (%rdi), %xmm1 # sched: [6:1.00]
+; GENERIC-NEXT: vpbroadcastw %xmm0, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT: vpbroadcastw (%rdi), %xmm1 # sched: [7:0.50]
; GENERIC-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -2105,7 +2105,7 @@ define <16 x i16> @test_pbroadcastw_ymm(
; GENERIC-LABEL: test_pbroadcastw_ymm:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpbroadcastw %xmm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpbroadcastw (%rdi), %ymm1 # sched: [6:1.00]
+; GENERIC-NEXT: vpbroadcastw (%rdi), %ymm1 # sched: [7:0.50]
; GENERIC-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
Modified: llvm/trunk/test/CodeGen/X86/avx512-shuffle-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-shuffle-schedule.ll?rev=331999&r1=331998&r2=331999&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-shuffle-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-shuffle-schedule.ll Thu May 10 10:06:09 2018
@@ -5657,7 +5657,7 @@ define <8 x i16> @test_masked_8xi16_perm
; GENERIC-LABEL: test_masked_8xi16_perm_high_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,6,5,7,6] sched: [1:1.00]
+; GENERIC-NEXT: vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,6,5,7,6] sched: [1:0.50]
; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -5677,7 +5677,7 @@ define <8 x i16> @test_masked_z_8xi16_pe
; GENERIC-LABEL: test_masked_z_8xi16_perm_high_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,6,5,7,6] sched: [1:1.00]
+; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,6,5,7,6] sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_8xi16_perm_high_mask0:
@@ -5694,7 +5694,7 @@ define <8 x i16> @test_masked_8xi16_perm
; GENERIC-LABEL: test_masked_8xi16_perm_low_mask1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[0,3,0,0,4,5,6,7] sched: [1:1.00]
+; GENERIC-NEXT: vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[0,3,0,0,4,5,6,7] sched: [1:0.50]
; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -5714,7 +5714,7 @@ define <8 x i16> @test_masked_z_8xi16_pe
; GENERIC-LABEL: test_masked_z_8xi16_perm_low_mask1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,3,0,0,4,5,6,7] sched: [1:1.00]
+; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,3,0,0,4,5,6,7] sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_8xi16_perm_low_mask1:
@@ -5731,7 +5731,7 @@ define <8 x i16> @test_masked_8xi16_perm
; GENERIC-LABEL: test_masked_8xi16_perm_high_mask2:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,5,4,4,5] sched: [1:1.00]
+; GENERIC-NEXT: vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,5,4,4,5] sched: [1:0.50]
; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -5751,7 +5751,7 @@ define <8 x i16> @test_masked_z_8xi16_pe
; GENERIC-LABEL: test_masked_z_8xi16_perm_high_mask2:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,5,4,4,5] sched: [1:1.00]
+; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,5,4,4,5] sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_8xi16_perm_high_mask2:
@@ -5781,7 +5781,7 @@ define <8 x i16> @test_masked_8xi16_perm
; GENERIC-LABEL: test_masked_8xi16_perm_low_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[2,1,1,1,4,5,6,7] sched: [1:1.00]
+; GENERIC-NEXT: vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[2,1,1,1,4,5,6,7] sched: [1:0.50]
; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -5801,7 +5801,7 @@ define <8 x i16> @test_masked_z_8xi16_pe
; GENERIC-LABEL: test_masked_z_8xi16_perm_low_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[2,1,1,1,4,5,6,7] sched: [1:1.00]
+; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[2,1,1,1,4,5,6,7] sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_8xi16_perm_low_mask3:
@@ -5818,7 +5818,7 @@ define <8 x i16> @test_masked_8xi16_perm
; GENERIC-LABEL: test_masked_8xi16_perm_high_mask4:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,5,5,7,6] sched: [1:1.00]
+; GENERIC-NEXT: vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,5,5,7,6] sched: [1:0.50]
; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -5838,7 +5838,7 @@ define <8 x i16> @test_masked_z_8xi16_pe
; GENERIC-LABEL: test_masked_z_8xi16_perm_high_mask4:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,5,5,7,6] sched: [1:1.00]
+; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,5,5,7,6] sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_8xi16_perm_high_mask4:
@@ -5855,7 +5855,7 @@ define <8 x i16> @test_masked_8xi16_perm
; GENERIC-LABEL: test_masked_8xi16_perm_low_mask5:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[3,3,2,1,4,5,6,7] sched: [1:1.00]
+; GENERIC-NEXT: vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[3,3,2,1,4,5,6,7] sched: [1:0.50]
; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -5875,7 +5875,7 @@ define <8 x i16> @test_masked_z_8xi16_pe
; GENERIC-LABEL: test_masked_z_8xi16_perm_low_mask5:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[3,3,2,1,4,5,6,7] sched: [1:1.00]
+; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[3,3,2,1,4,5,6,7] sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_8xi16_perm_low_mask5:
@@ -5905,7 +5905,7 @@ define <8 x i16> @test_masked_8xi16_perm
; GENERIC-LABEL: test_masked_8xi16_perm_high_mask6:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,6,5,6,5] sched: [1:1.00]
+; GENERIC-NEXT: vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,6,5,6,5] sched: [1:0.50]
; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -5925,7 +5925,7 @@ define <8 x i16> @test_masked_z_8xi16_pe
; GENERIC-LABEL: test_masked_z_8xi16_perm_high_mask6:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,6,5,6,5] sched: [1:1.00]
+; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,6,5,6,5] sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_8xi16_perm_high_mask6:
@@ -5942,7 +5942,7 @@ define <8 x i16> @test_masked_8xi16_perm
; GENERIC-LABEL: test_masked_8xi16_perm_low_mask7:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[1,0,2,0,4,5,6,7] sched: [1:1.00]
+; GENERIC-NEXT: vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[1,0,2,0,4,5,6,7] sched: [1:0.50]
; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -5962,7 +5962,7 @@ define <8 x i16> @test_masked_z_8xi16_pe
; GENERIC-LABEL: test_masked_z_8xi16_perm_low_mask7:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[1,0,2,0,4,5,6,7] sched: [1:1.00]
+; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[1,0,2,0,4,5,6,7] sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_8xi16_perm_low_mask7:
@@ -5993,7 +5993,7 @@ define <8 x i16> @test_masked_8xi16_perm
; GENERIC-LABEL: test_masked_8xi16_perm_high_mem_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,7,7,4,6] sched: [6:1.00]
+; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,7,7,4,6] sched: [7:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_8xi16_perm_high_mem_mask0:
@@ -6012,7 +6012,7 @@ define <8 x i16> @test_masked_z_8xi16_pe
; GENERIC-LABEL: test_masked_z_8xi16_perm_high_mem_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,7,7,4,6] sched: [6:1.00]
+; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,7,7,4,6] sched: [7:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_8xi16_perm_high_mem_mask0:
@@ -6031,7 +6031,7 @@ define <8 x i16> @test_masked_8xi16_perm
; GENERIC-LABEL: test_masked_8xi16_perm_low_mem_mask1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} = mem[1,3,3,2,4,5,6,7] sched: [6:1.00]
+; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} = mem[1,3,3,2,4,5,6,7] sched: [7:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_8xi16_perm_low_mem_mask1:
@@ -6050,7 +6050,7 @@ define <8 x i16> @test_masked_z_8xi16_pe
; GENERIC-LABEL: test_masked_z_8xi16_perm_low_mem_mask1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[1,3,3,2,4,5,6,7] sched: [6:1.00]
+; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[1,3,3,2,4,5,6,7] sched: [7:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_8xi16_perm_low_mem_mask1:
@@ -6069,7 +6069,7 @@ define <8 x i16> @test_masked_8xi16_perm
; GENERIC-LABEL: test_masked_8xi16_perm_high_mem_mask2:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,6,6,5,7] sched: [6:1.00]
+; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,6,6,5,7] sched: [7:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_8xi16_perm_high_mem_mask2:
@@ -6088,7 +6088,7 @@ define <8 x i16> @test_masked_z_8xi16_pe
; GENERIC-LABEL: test_masked_z_8xi16_perm_high_mem_mask2:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,6,6,5,7] sched: [6:1.00]
+; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,6,6,5,7] sched: [7:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_8xi16_perm_high_mem_mask2:
@@ -6121,7 +6121,7 @@ define <8 x i16> @test_masked_8xi16_perm
; GENERIC-LABEL: test_masked_8xi16_perm_low_mem_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} = mem[3,1,2,0,4,5,6,7] sched: [6:1.00]
+; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} = mem[3,1,2,0,4,5,6,7] sched: [7:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_8xi16_perm_low_mem_mask3:
@@ -6140,7 +6140,7 @@ define <8 x i16> @test_masked_z_8xi16_pe
; GENERIC-LABEL: test_masked_z_8xi16_perm_low_mem_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[3,1,2,0,4,5,6,7] sched: [6:1.00]
+; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[3,1,2,0,4,5,6,7] sched: [7:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_8xi16_perm_low_mem_mask3:
@@ -6159,7 +6159,7 @@ define <8 x i16> @test_masked_8xi16_perm
; GENERIC-LABEL: test_masked_8xi16_perm_high_mem_mask4:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,7,6,7,5] sched: [6:1.00]
+; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,7,6,7,5] sched: [7:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_8xi16_perm_high_mem_mask4:
@@ -6178,7 +6178,7 @@ define <8 x i16> @test_masked_z_8xi16_pe
; GENERIC-LABEL: test_masked_z_8xi16_perm_high_mem_mask4:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,7,6,7,5] sched: [6:1.00]
+; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,7,6,7,5] sched: [7:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_8xi16_perm_high_mem_mask4:
@@ -6197,7 +6197,7 @@ define <8 x i16> @test_masked_8xi16_perm
; GENERIC-LABEL: test_masked_8xi16_perm_low_mem_mask5:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} = mem[2,1,3,2,4,5,6,7] sched: [6:1.00]
+; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} = mem[2,1,3,2,4,5,6,7] sched: [7:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_8xi16_perm_low_mem_mask5:
@@ -6216,7 +6216,7 @@ define <8 x i16> @test_masked_z_8xi16_pe
; GENERIC-LABEL: test_masked_z_8xi16_perm_low_mem_mask5:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[2,1,3,2,4,5,6,7] sched: [6:1.00]
+; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[2,1,3,2,4,5,6,7] sched: [7:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_8xi16_perm_low_mem_mask5:
@@ -6249,7 +6249,7 @@ define <8 x i16> @test_masked_8xi16_perm
; GENERIC-LABEL: test_masked_8xi16_perm_high_mem_mask6:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,7,4,4,4] sched: [6:1.00]
+; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,7,4,4,4] sched: [7:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_8xi16_perm_high_mem_mask6:
@@ -6268,7 +6268,7 @@ define <8 x i16> @test_masked_z_8xi16_pe
; GENERIC-LABEL: test_masked_z_8xi16_perm_high_mem_mask6:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,7,4,4,4] sched: [6:1.00]
+; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,7,4,4,4] sched: [7:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_8xi16_perm_high_mem_mask6:
@@ -6287,7 +6287,7 @@ define <8 x i16> @test_masked_8xi16_perm
; GENERIC-LABEL: test_masked_8xi16_perm_low_mem_mask7:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} = mem[0,3,3,1,4,5,6,7] sched: [6:1.00]
+; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} = mem[0,3,3,1,4,5,6,7] sched: [7:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_8xi16_perm_low_mem_mask7:
@@ -6306,7 +6306,7 @@ define <8 x i16> @test_masked_z_8xi16_pe
; GENERIC-LABEL: test_masked_z_8xi16_perm_low_mem_mask7:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[0,3,3,1,4,5,6,7] sched: [6:1.00]
+; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[0,3,3,1,4,5,6,7] sched: [7:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_8xi16_perm_low_mem_mask7:
@@ -7704,7 +7704,7 @@ define <4 x i32> @test_masked_4xi32_perm
; GENERIC-LABEL: test_masked_4xi32_perm_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufd {{.*#+}} xmm1 {%k1} = xmm0[2,3,3,0] sched: [1:1.00]
+; GENERIC-NEXT: vpshufd {{.*#+}} xmm1 {%k1} = xmm0[2,3,3,0] sched: [1:0.50]
; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -7724,7 +7724,7 @@ define <4 x i32> @test_masked_z_4xi32_pe
; GENERIC-LABEL: test_masked_z_4xi32_perm_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[2,3,3,0] sched: [1:1.00]
+; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[2,3,3,0] sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_4xi32_perm_mask0:
@@ -7741,7 +7741,7 @@ define <4 x i32> @test_masked_4xi32_perm
; GENERIC-LABEL: test_masked_4xi32_perm_mask1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufd {{.*#+}} xmm1 {%k1} = xmm0[1,0,2,0] sched: [1:1.00]
+; GENERIC-NEXT: vpshufd {{.*#+}} xmm1 {%k1} = xmm0[1,0,2,0] sched: [1:0.50]
; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -7761,7 +7761,7 @@ define <4 x i32> @test_masked_z_4xi32_pe
; GENERIC-LABEL: test_masked_z_4xi32_perm_mask1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[1,0,2,0] sched: [1:1.00]
+; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[1,0,2,0] sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_4xi32_perm_mask1:
@@ -7778,7 +7778,7 @@ define <4 x i32> @test_masked_4xi32_perm
; GENERIC-LABEL: test_masked_4xi32_perm_mask2:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufd {{.*#+}} xmm1 {%k1} = xmm0[3,0,1,0] sched: [1:1.00]
+; GENERIC-NEXT: vpshufd {{.*#+}} xmm1 {%k1} = xmm0[3,0,1,0] sched: [1:0.50]
; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -7798,7 +7798,7 @@ define <4 x i32> @test_masked_z_4xi32_pe
; GENERIC-LABEL: test_masked_z_4xi32_perm_mask2:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[3,0,1,0] sched: [1:1.00]
+; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[3,0,1,0] sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_4xi32_perm_mask2:
@@ -7828,7 +7828,7 @@ define <4 x i32> @test_masked_4xi32_perm
; GENERIC-LABEL: test_masked_4xi32_perm_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufd {{.*#+}} xmm1 {%k1} = xmm0[1,1,0,3] sched: [1:1.00]
+; GENERIC-NEXT: vpshufd {{.*#+}} xmm1 {%k1} = xmm0[1,1,0,3] sched: [1:0.50]
; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -7848,7 +7848,7 @@ define <4 x i32> @test_masked_z_4xi32_pe
; GENERIC-LABEL: test_masked_z_4xi32_perm_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[1,1,0,3] sched: [1:1.00]
+; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[1,1,0,3] sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_4xi32_perm_mask3:
@@ -7879,7 +7879,7 @@ define <4 x i32> @test_masked_4xi32_perm
; GENERIC-LABEL: test_masked_4xi32_perm_mem_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = mem[0,1,3,3] sched: [6:1.00]
+; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = mem[0,1,3,3] sched: [7:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_4xi32_perm_mem_mask0:
@@ -7898,7 +7898,7 @@ define <4 x i32> @test_masked_z_4xi32_pe
; GENERIC-LABEL: test_masked_z_4xi32_perm_mem_mask0:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %xmm0, %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[0,1,3,3] sched: [6:1.00]
+; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[0,1,3,3] sched: [7:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_4xi32_perm_mem_mask0:
@@ -7917,7 +7917,7 @@ define <4 x i32> @test_masked_4xi32_perm
; GENERIC-LABEL: test_masked_4xi32_perm_mem_mask1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = mem[2,2,3,1] sched: [6:1.00]
+; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = mem[2,2,3,1] sched: [7:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_4xi32_perm_mem_mask1:
@@ -7936,7 +7936,7 @@ define <4 x i32> @test_masked_z_4xi32_pe
; GENERIC-LABEL: test_masked_z_4xi32_perm_mem_mask1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %xmm0, %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[2,2,3,1] sched: [6:1.00]
+; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[2,2,3,1] sched: [7:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_4xi32_perm_mem_mask1:
@@ -7955,7 +7955,7 @@ define <4 x i32> @test_masked_4xi32_perm
; GENERIC-LABEL: test_masked_4xi32_perm_mem_mask2:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = mem[0,3,0,1] sched: [6:1.00]
+; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = mem[0,3,0,1] sched: [7:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_4xi32_perm_mem_mask2:
@@ -7974,7 +7974,7 @@ define <4 x i32> @test_masked_z_4xi32_pe
; GENERIC-LABEL: test_masked_z_4xi32_perm_mem_mask2:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %xmm0, %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[0,3,0,1] sched: [6:1.00]
+; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[0,3,0,1] sched: [7:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_4xi32_perm_mem_mask2:
@@ -8007,7 +8007,7 @@ define <4 x i32> @test_masked_4xi32_perm
; GENERIC-LABEL: test_masked_4xi32_perm_mem_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = mem[1,0,1,0] sched: [6:1.00]
+; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = mem[1,0,1,0] sched: [7:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_4xi32_perm_mem_mask3:
@@ -8026,7 +8026,7 @@ define <4 x i32> @test_masked_z_4xi32_pe
; GENERIC-LABEL: test_masked_z_4xi32_perm_mem_mask3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestnmd %xmm0, %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[1,0,1,0] sched: [6:1.00]
+; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[1,0,1,0] sched: [7:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_masked_z_4xi32_perm_mem_mask3:
Modified: llvm/trunk/test/CodeGen/X86/xop-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/xop-schedule.ll?rev=331999&r1=331998&r2=331999&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/xop-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/xop-schedule.ll Thu May 10 10:06:09 2018
@@ -101,9 +101,9 @@ define void @test_vpcmov_128(<2 x i64> %
; GENERIC-LABEL: test_vpcmov_128:
; GENERIC: # %bb.0:
; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpcmov (%rdi), %xmm1, %xmm0, %xmm0 # sched: [6:1.00]
-; GENERIC-NEXT: vpcmov %xmm2, (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
+; GENERIC-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT: vpcmov (%rdi), %xmm1, %xmm0, %xmm0 # sched: [7:0.50]
+; GENERIC-NEXT: vpcmov %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
More information about the llvm-commits
mailing list