[llvm] r331760 - [X86] Add vector masked load/store scheduler classes (PR32857)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Tue May 8 05:17:56 PDT 2018
Author: rksimon
Date: Tue May 8 05:17:55 2018
New Revision: 331760
URL: http://llvm.org/viewvc/llvm-project?rev=331760&view=rev
Log:
[X86] Add vector masked load/store scheduler classes (PR32857)
Split off from existing vector load/store classes to remove InstRW overrides.
Modified:
llvm/trunk/lib/Target/X86/X86InstrSSE.td
llvm/trunk/lib/Target/X86/X86SchedBroadwell.td
llvm/trunk/lib/Target/X86/X86SchedHaswell.td
llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td
llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td
llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td
llvm/trunk/lib/Target/X86/X86Schedule.td
llvm/trunk/lib/Target/X86/X86ScheduleAtom.td
llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td
llvm/trunk/lib/Target/X86/X86ScheduleSLM.td
llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td
llvm/trunk/test/CodeGen/X86/avx2-schedule.ll
Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=331760&r1=331759&r2=331760&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Tue May 8 05:17:55 2018
@@ -7102,22 +7102,22 @@ multiclass avx_movmask_rm<bits<8> opc_rm
(ins VR128:$src1, f128mem:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst, (IntLd addr:$src2, VR128:$src1))]>,
- VEX_4V, Sched<[WriteFLoad]>;
+ VEX_4V, Sched<[WriteFMaskedLoad]>;
def Yrm : AVX8I<opc_rm, MRMSrcMem, (outs VR256:$dst),
(ins VR256:$src1, f256mem:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR256:$dst, (IntLd256 addr:$src2, VR256:$src1))]>,
- VEX_4V, VEX_L, Sched<[WriteFLoad]>;
+ VEX_4V, VEX_L, Sched<[WriteFMaskedLoadY]>;
def mr : AVX8I<opc_mr, MRMDestMem, (outs),
(ins f128mem:$dst, VR128:$src1, VR128:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(IntSt addr:$dst, VR128:$src1, VR128:$src2)]>,
- VEX_4V, Sched<[WriteFStore]>;
+ VEX_4V, Sched<[WriteFMaskedStore]>;
def Ymr : AVX8I<opc_mr, MRMDestMem, (outs),
(ins f256mem:$dst, VR256:$src1, VR256:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(IntSt256 addr:$dst, VR256:$src1, VR256:$src2)]>,
- VEX_4V, VEX_L, Sched<[WriteFStore]>;
+ VEX_4V, VEX_L, Sched<[WriteFMaskedStoreY]>;
}
let ExeDomain = SSEPackedSingle in
@@ -7729,22 +7729,22 @@ multiclass avx2_pmovmask<string OpcodeSt
(ins VR128:$src1, i128mem:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst, (IntLd128 addr:$src2, VR128:$src1))]>,
- VEX_4V, Sched<[WriteVecLoad]>;
+ VEX_4V, Sched<[WriteVecMaskedLoad]>;
def Yrm : AVX28I<0x8c, MRMSrcMem, (outs VR256:$dst),
(ins VR256:$src1, i256mem:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR256:$dst, (IntLd256 addr:$src2, VR256:$src1))]>,
- VEX_4V, VEX_L, Sched<[WriteVecLoad]>;
+ VEX_4V, VEX_L, Sched<[WriteVecMaskedLoadY]>;
def mr : AVX28I<0x8e, MRMDestMem, (outs),
(ins i128mem:$dst, VR128:$src1, VR128:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(IntSt128 addr:$dst, VR128:$src1, VR128:$src2)]>,
- VEX_4V, Sched<[WriteVecStore]>;
+ VEX_4V, Sched<[WriteVecMaskedStore]>;
def Ymr : AVX28I<0x8e, MRMDestMem, (outs),
(ins i256mem:$dst, VR256:$src1, VR256:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(IntSt256 addr:$dst, VR256:$src1, VR256:$src2)]>,
- VEX_4V, VEX_L, Sched<[WriteVecStore]>;
+ VEX_4V, VEX_L, Sched<[WriteVecMaskedStoreY]>;
}
defm VPMASKMOVD : avx2_pmovmask<"vpmaskmovd",
Modified: llvm/trunk/lib/Target/X86/X86SchedBroadwell.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedBroadwell.td?rev=331760&r1=331759&r2=331760&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedBroadwell.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedBroadwell.td Tue May 8 05:17:55 2018
@@ -151,9 +151,13 @@ def : InstRW<[WriteMove], (instrs COPY)>
defm : BWWriteResPair<WriteJump, [BWPort06], 1>;
// Floating point. This covers both scalar and vector operations.
-def : WriteRes<WriteFLoad, [BWPort23]> { let Latency = 5; }
-def : WriteRes<WriteFStore, [BWPort237, BWPort4]>;
-def : WriteRes<WriteFMove, [BWPort5]>;
+defm : X86WriteRes<WriteFLoad, [BWPort23], 5, [1], 1>;
+defm : X86WriteRes<WriteFMaskedLoad, [BWPort23,BWPort5], 7, [1,2], 3>;
+defm : X86WriteRes<WriteFMaskedLoadY, [BWPort23,BWPort5], 8, [1,2], 3>;
+defm : X86WriteRes<WriteFStore, [BWPort237,BWPort4], 1, [1,1], 1>;
+defm : X86WriteRes<WriteFMaskedStore, [BWPort0,BWPort4,BWPort237,BWPort15], 5, [1,1,1,1], 4>;
+defm : X86WriteRes<WriteFMaskedStoreY, [BWPort0,BWPort4,BWPort237,BWPort15], 5, [1,1,1,1], 4>;
+defm : X86WriteRes<WriteFMove, [BWPort5], 1, [1], 1>;
defm : BWWriteResPair<WriteFAdd, [BWPort1], 3, [1], 1, 5>; // Floating point add/sub.
defm : BWWriteResPair<WriteFAddX, [BWPort1], 3, [1], 1, 5>; // Floating point add/sub (XMM).
@@ -241,10 +245,14 @@ def : WriteRes<WriteCvtF2FSt, [BWPort1,
// class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; }
// Vector integer operations.
-def : WriteRes<WriteVecLoad, [BWPort23]> { let Latency = 5; }
-def : WriteRes<WriteVecStore, [BWPort237, BWPort4]>;
-def : WriteRes<WriteVecMove, [BWPort015]>;
-defm : X86WriteRes<WriteEMMS, [BWPort01,BWPort15,BWPort015,BWPort0156], 31, [8,1,21,1], 31>;
+defm : X86WriteRes<WriteVecLoad, [BWPort23], 5, [1], 1>;
+defm : X86WriteRes<WriteVecMaskedLoad, [BWPort23,BWPort5], 7, [1,2], 3>;
+defm : X86WriteRes<WriteVecMaskedLoadY, [BWPort23,BWPort5], 8, [1,2], 3>;
+defm : X86WriteRes<WriteVecStore, [BWPort237,BWPort4], 1, [1,1], 1>;
+defm : X86WriteRes<WriteVecMaskedStore, [BWPort0,BWPort4,BWPort237,BWPort15], 5, [1,1,1,1], 4>;
+defm : X86WriteRes<WriteVecMaskedStoreY, [BWPort0,BWPort4,BWPort237,BWPort15], 5, [1,1,1,1], 4>;
+defm : X86WriteRes<WriteVecMove, [BWPort015], 1, [1], 1>;
+defm : X86WriteRes<WriteEMMS, [BWPort01,BWPort15,BWPort015,BWPort0156], 31, [8,1,21,1], 31>;
defm : BWWriteResPair<WriteVecALU, [BWPort15], 1, [1], 1, 5>; // Vector integer ALU op, no logicals.
defm : BWWriteResPair<WriteVecALUY, [BWPort15], 1, [1], 1, 6>; // Vector integer ALU op, no logicals (YMM/ZMM).
@@ -899,16 +907,6 @@ def BWWriteResGroup52 : SchedWriteRes<[B
}
def: InstRW<[BWWriteResGroup52], (instrs IMUL32r, MUL32r, MULX32rr)>;
-def BWWriteResGroup53 : SchedWriteRes<[BWPort0,BWPort4,BWPort237,BWPort15]> {
- let Latency = 5;
- let NumMicroOps = 4;
- let ResourceCycles = [1,1,1,1];
-}
-def: InstRW<[BWWriteResGroup53], (instregex "VMASKMOVPD(Y?)mr",
- "VMASKMOVPS(Y?)mr",
- "VPMASKMOVD(Y?)mr",
- "VPMASKMOVQ(Y?)mr")>;
-
def BWWriteResGroup54 : SchedWriteRes<[BWPort6,BWPort0156]> {
let Latency = 5;
let NumMicroOps = 5;
@@ -1107,11 +1105,7 @@ def BWWriteResGroup79 : SchedWriteRes<[B
}
def: InstRW<[BWWriteResGroup79], (instregex "MMX_PACKSSDWirm",
"MMX_PACKSSWBirm",
- "MMX_PACKUSWBirm",
- "VMASKMOVPDrm",
- "VMASKMOVPSrm",
- "VPMASKMOVDrm",
- "VPMASKMOVQrm")>;
+ "MMX_PACKUSWBirm")>;
def BWWriteResGroup80 : SchedWriteRes<[BWPort23,BWPort0156]> {
let Latency = 7;
@@ -1212,16 +1206,6 @@ def: InstRW<[BWWriteResGroup92], (instre
"VPMOVSXWQYrm",
"VPMOVZXWDYrm")>;
-def BWWriteResGroup94 : SchedWriteRes<[BWPort5,BWPort23]> {
- let Latency = 8;
- let NumMicroOps = 3;
- let ResourceCycles = [2,1];
-}
-def: InstRW<[BWWriteResGroup94], (instregex "VMASKMOVPDYrm",
- "VMASKMOVPSYrm",
- "VPMASKMOVDYrm",
- "VPMASKMOVQYrm")>;
-
def BWWriteResGroup97 : SchedWriteRes<[BWPort23,BWPort237,BWPort06,BWPort0156]> {
let Latency = 8;
let NumMicroOps = 5;
Modified: llvm/trunk/lib/Target/X86/X86SchedHaswell.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedHaswell.td?rev=331760&r1=331759&r2=331760&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedHaswell.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedHaswell.td Tue May 8 05:17:55 2018
@@ -143,11 +143,16 @@ defm : HWWriteResPair<WriteBZHI, [HWPort
// This is quite rough, latency depends on the dividend.
defm : HWWriteResPair<WriteIDiv, [HWPort0, HWDivider], 25, [1,10], 1, 4>;
+
// Scalar and vector floating point.
-def : WriteRes<WriteFStore, [HWPort237, HWPort4]>;
-def : WriteRes<WriteFLoad, [HWPort23]> { let Latency = 5; }
-def : WriteRes<WriteFMove, [HWPort5]>;
-defm : X86WriteRes<WriteEMMS, [HWPort01,HWPort15,HWPort015,HWPort0156], 31, [8,1,21,1], 31>;
+defm : X86WriteRes<WriteFLoad, [HWPort23], 5, [1], 1>;
+defm : X86WriteRes<WriteFMaskedLoad, [HWPort23,HWPort5], 8, [1,2], 3>;
+defm : X86WriteRes<WriteFMaskedLoadY, [HWPort23,HWPort5], 9, [1,2], 3>;
+defm : X86WriteRes<WriteFStore, [HWPort237,HWPort4], 1, [1,1], 1>;
+defm : X86WriteRes<WriteFMaskedStore, [HWPort0,HWPort4,HWPort237,HWPort15], 5, [1,1,1,1], 4>;
+defm : X86WriteRes<WriteFMaskedStoreY, [HWPort0,HWPort4,HWPort237,HWPort15], 5, [1,1,1,1], 4>;
+defm : X86WriteRes<WriteFMove, [HWPort5], 1, [1], 1>;
+defm : X86WriteRes<WriteEMMS, [HWPort01,HWPort15,HWPort015,HWPort0156], 31, [8,1,21,1], 31>;
defm : HWWriteResPair<WriteFAdd, [HWPort1], 3, [1], 1, 5>;
defm : HWWriteResPair<WriteFAddX, [HWPort1], 3, [1], 1, 6>;
@@ -235,9 +240,13 @@ def : WriteRes<WriteCvtF2FSt, [HWPort1,
}
// Vector integer operations.
-def : WriteRes<WriteVecStore, [HWPort237, HWPort4]>;
-def : WriteRes<WriteVecLoad, [HWPort23]> { let Latency = 5; }
-def : WriteRes<WriteVecMove, [HWPort015]>;
+defm : X86WriteRes<WriteVecLoad, [HWPort23], 5, [1], 1>;
+defm : X86WriteRes<WriteVecMaskedLoad, [HWPort23,HWPort5], 8, [1,2], 3>;
+defm : X86WriteRes<WriteVecMaskedLoadY, [HWPort23,HWPort5], 9, [1,2], 3>;
+defm : X86WriteRes<WriteVecStore, [HWPort237,HWPort4], 1, [1,1], 1>;
+defm : X86WriteRes<WriteVecMaskedStore, [HWPort0,HWPort4,HWPort237,HWPort15], 5, [1,1,1,1], 4>;
+defm : X86WriteRes<WriteVecMaskedStoreY, [HWPort0,HWPort4,HWPort237,HWPort15], 5, [1,1,1,1], 4>;
+defm : X86WriteRes<WriteVecMove, [HWPort015], 1, [1], 1>;
defm : HWWriteResPair<WriteVecLogic, [HWPort015], 1, [1], 1, 6>;
defm : HWWriteResPair<WriteVecLogicY,[HWPort015], 1, [1], 1, 7>;
@@ -1156,26 +1165,6 @@ def: InstRW<[HWWriteResGroup35], (instre
"SBB(8|16|32|64)i",
"SET(A|BE)r")>;
-def HWWriteResGroup36 : SchedWriteRes<[HWPort5,HWPort23]> {
- let Latency = 8;
- let NumMicroOps = 3;
- let ResourceCycles = [2,1];
-}
-def: InstRW<[HWWriteResGroup36], (instregex "VMASKMOVPDrm",
- "VMASKMOVPSrm",
- "VPMASKMOVDrm",
- "VPMASKMOVQrm")>;
-
-def HWWriteResGroup36_1 : SchedWriteRes<[HWPort5,HWPort23]> {
- let Latency = 9;
- let NumMicroOps = 3;
- let ResourceCycles = [2,1];
-}
-def: InstRW<[HWWriteResGroup36_1], (instregex "VMASKMOVPDYrm",
- "VMASKMOVPSYrm",
- "VPMASKMOVDYrm",
- "VPMASKMOVQYrm")>;
-
def HWWriteResGroup36_2 : SchedWriteRes<[HWPort5,HWPort23]> {
let Latency = 7;
let NumMicroOps = 3;
@@ -1579,16 +1568,6 @@ def HWWriteResGroup83 : SchedWriteRes<[H
}
def: InstRW<[HWWriteResGroup83], (instregex "LAR(16|32|64)rr")>;
-def HWWriteResGroup84 : SchedWriteRes<[HWPort0,HWPort4,HWPort237,HWPort15]> {
- let Latency = 5;
- let NumMicroOps = 4;
- let ResourceCycles = [1,1,1,1];
-}
-def: InstRW<[HWWriteResGroup84], (instregex "VMASKMOVPD(Y?)mr",
- "VMASKMOVPS(Y?)mr",
- "VPMASKMOVD(Y?)mr",
- "VPMASKMOVQ(Y?)mr")>;
-
def HWWriteResGroup86 : SchedWriteRes<[HWPort1,HWPort23,HWPort237,HWPort0156]> {
let Latency = 10;
let NumMicroOps = 4;
Modified: llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td?rev=331760&r1=331759&r2=331760&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td Tue May 8 05:17:55 2018
@@ -134,10 +134,14 @@ defm : SBWriteResPair<WriteBEXTR, [SBPor
defm : SBWriteResPair<WriteBZHI, [SBPort1], 1>;
// Scalar and vector floating point.
-def : WriteRes<WriteFStore, [SBPort23, SBPort4]>;
-def : WriteRes<WriteFLoad, [SBPort23]> { let Latency = 6; }
-def : WriteRes<WriteFMove, [SBPort5]>;
-defm : X86WriteRes<WriteEMMS, [SBPort015], 31, [31], 31>;
+defm : X86WriteRes<WriteFLoad, [SBPort23], 6, [1], 1>;
+defm : X86WriteRes<WriteFMaskedLoad, [SBPort23,SBPort05], 8, [1,2], 3>;
+defm : X86WriteRes<WriteFMaskedLoadY, [SBPort23,SBPort05], 9, [1,2], 3>;
+defm : X86WriteRes<WriteFStore, [SBPort23,SBPort4], 1, [1,1], 1>;
+defm : X86WriteRes<WriteFMaskedStore, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
+defm : X86WriteRes<WriteFMaskedStoreY, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
+defm : X86WriteRes<WriteFMove, [SBPort5], 1, [1], 1>;
+defm : X86WriteRes<WriteEMMS, [SBPort015], 31, [31], 31>;
defm : SBWriteResPair<WriteFAdd, [SBPort1], 3, [1], 1, 6>;
defm : SBWriteResPair<WriteFAddX, [SBPort1], 3, [1], 1, 6>;
@@ -213,9 +217,13 @@ defm : SBWriteResPair<WriteFVarBlendY,[S
def : WriteRes<WriteCvtF2FSt, [SBPort1, SBPort23, SBPort4]> { let Latency = 4; }
// Vector integer operations.
-def : WriteRes<WriteVecStore, [SBPort23, SBPort4]>;
-def : WriteRes<WriteVecLoad, [SBPort23]> { let Latency = 6; }
-def : WriteRes<WriteVecMove, [SBPort05]>;
+defm : X86WriteRes<WriteVecLoad, [SBPort23], 6, [1], 1>;
+defm : X86WriteRes<WriteVecMaskedLoad, [SBPort23,SBPort05], 8, [1,2], 3>;
+defm : X86WriteRes<WriteVecMaskedLoadY, [SBPort23,SBPort05], 9, [1,2], 3>;
+defm : X86WriteRes<WriteVecStore, [SBPort23,SBPort4], 1, [1,1], 1>;
+defm : X86WriteRes<WriteVecMaskedStore, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
+defm : X86WriteRes<WriteVecMaskedStoreY, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
+defm : X86WriteRes<WriteVecMove, [SBPort05], 1, [1], 1>;
defm : SBWriteResPair<WriteVecLogic, [SBPort015], 1, [1], 1, 6>;
defm : SBWriteResPair<WriteVecLogicY,[SBPort015], 1, [1], 1, 7>;
@@ -786,14 +794,6 @@ def: InstRW<[SBWriteResGroup36], (instre
"CALL(16|32|64)r",
"(V?)EXTRACTPSmr")>;
-def SBWriteResGroup37 : SchedWriteRes<[SBPort4,SBPort01,SBPort23]> {
- let Latency = 5;
- let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
-}
-def: InstRW<[SBWriteResGroup37], (instregex "VMASKMOVPD(Y?)mr",
- "VMASKMOVPS(Y?)mr")>;
-
def SBWriteResGroup40 : SchedWriteRes<[SBPort4,SBPort23,SBPort015]> {
let Latency = 5;
let NumMicroOps = 3;
@@ -1060,14 +1060,6 @@ def SBWriteResGroup72 : SchedWriteRes<[S
}
def: InstRW<[SBWriteResGroup72], (instrs MUL8m)>;
-def SBWriteResGroup75 : SchedWriteRes<[SBPort23,SBPort05]> {
- let Latency = 8;
- let NumMicroOps = 3;
- let ResourceCycles = [1,2];
-}
-def: InstRW<[SBWriteResGroup75], (instregex "VMASKMOVPDrm",
- "VMASKMOVPSrm")>;
-
def SBWriteResGroup77 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> {
let Latency = 8;
let NumMicroOps = 3;
@@ -1169,14 +1161,6 @@ def: InstRW<[SBWriteResGroup90], (instre
"(V?)CVTPS2DQrm",
"(V?)CVTTPS2DQrm")>;
-def SBWriteResGroup91 : SchedWriteRes<[SBPort23,SBPort05]> {
- let Latency = 9;
- let NumMicroOps = 3;
- let ResourceCycles = [1,2];
-}
-def: InstRW<[SBWriteResGroup91], (instregex "VMASKMOVPDYrm",
- "VMASKMOVPSYrm")>;
-
def SBWriteResGroup93 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> {
let Latency = 9;
let NumMicroOps = 3;
Modified: llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td?rev=331760&r1=331759&r2=331760&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td Tue May 8 05:17:55 2018
@@ -147,10 +147,14 @@ def : WriteRes<WriteZero, []>;
defm : SKLWriteResPair<WriteJump, [SKLPort06], 1>;
// Floating point. This covers both scalar and vector operations.
-def : WriteRes<WriteFLoad, [SKLPort23]> { let Latency = 6; }
-def : WriteRes<WriteFStore, [SKLPort237, SKLPort4]>;
-def : WriteRes<WriteFMove, [SKLPort015]>;
-defm : X86WriteRes<WriteEMMS, [SKLPort05,SKLPort0156], 10, [9,1], 10>;
+defm : X86WriteRes<WriteFLoad, [SKLPort23], 6, [1], 1>;
+defm : X86WriteRes<WriteFMaskedLoad, [SKLPort23,SKLPort015], 7, [1,1], 2>;
+defm : X86WriteRes<WriteFMaskedLoadY, [SKLPort23,SKLPort015], 8, [1,1], 2>;
+defm : X86WriteRes<WriteFStore, [SKLPort237,SKLPort4], 1, [1,1], 1>;
+defm : X86WriteRes<WriteFMaskedStore, [SKLPort237,SKLPort0], 2, [1,1], 2>;
+defm : X86WriteRes<WriteFMaskedStoreY, [SKLPort237,SKLPort0], 2, [1,1], 2>;
+defm : X86WriteRes<WriteFMove, [SKLPort015], 1, [1], 1>;
+defm : X86WriteRes<WriteEMMS, [SKLPort05,SKLPort0156], 10, [9,1], 10>;
defm : SKLWriteResPair<WriteFAdd, [SKLPort01], 4, [1], 1, 5>; // Floating point add/sub.
defm : SKLWriteResPair<WriteFAddX, [SKLPort01], 4, [1], 1, 6>; // Floating point add/sub (XMM).
@@ -234,9 +238,13 @@ def : WriteRes<WriteCvtF2FSt, [SKLPort4
// class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; }
// Vector integer operations.
-def : WriteRes<WriteVecLoad, [SKLPort23]> { let Latency = 6; }
-def : WriteRes<WriteVecStore, [SKLPort237, SKLPort4]>;
-def : WriteRes<WriteVecMove, [SKLPort015]>;
+defm : X86WriteRes<WriteVecLoad, [SKLPort23], 6, [1], 1>;
+defm : X86WriteRes<WriteVecMaskedLoad, [SKLPort23,SKLPort015], 7, [1,1], 2>;
+defm : X86WriteRes<WriteVecMaskedLoadY, [SKLPort23,SKLPort015], 8, [1,1], 2>;
+defm : X86WriteRes<WriteVecStore, [SKLPort237,SKLPort4], 1, [1,1], 1>;
+defm : X86WriteRes<WriteVecMaskedStore, [SKLPort237,SKLPort0], 2, [1,1], 2>;
+defm : X86WriteRes<WriteVecMaskedStoreY, [SKLPort237,SKLPort0], 2, [1,1], 2>;
+defm : X86WriteRes<WriteVecMove, [SKLPort015], 1, [1], 1>;
defm : SKLWriteResPair<WriteVecALU, [SKLPort01], 1, [1], 1, 6>; // Vector integer ALU op, no logicals.
defm : SKLWriteResPair<WriteVecALUY, [SKLPort01], 1, [1], 1, 7>; // Vector integer ALU op, no logicals (YMM/ZMM).
@@ -646,16 +654,6 @@ def: InstRW<[SKLWriteResGroup17], (instr
WAIT,
XGETBV)>;
-def SKLWriteResGroup18 : SchedWriteRes<[SKLPort0,SKLPort237]> {
- let Latency = 2;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[SKLWriteResGroup18], (instregex "VMASKMOVPD(Y?)mr",
- "VMASKMOVPS(Y?)mr",
- "VPMASKMOVD(Y?)mr",
- "VPMASKMOVQ(Y?)mr")>;
-
def SKLWriteResGroup20 : SchedWriteRes<[SKLPort6,SKLPort0156]> {
let Latency = 2;
let NumMicroOps = 2;
@@ -1246,15 +1244,11 @@ def SKLWriteResGroup91 : SchedWriteRes<[
}
def: InstRW<[SKLWriteResGroup91], (instregex "(V?)INSERTF128rm",
"(V?)INSERTI128rm",
- "(V?)MASKMOVPDrm",
- "(V?)MASKMOVPSrm",
"(V?)PADDBrm",
"(V?)PADDDrm",
"(V?)PADDQrm",
"(V?)PADDWrm",
"(V?)PBLENDDrmi",
- "(V?)PMASKMOVDrm",
- "(V?)PMASKMOVQrm",
"(V?)PSUBBrm",
"(V?)PSUBDrm",
"(V?)PSUBQrm",
@@ -1382,15 +1376,11 @@ def SKLWriteResGroup110 : SchedWriteRes<
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SKLWriteResGroup110], (instregex "VMASKMOVPDYrm",
- "VMASKMOVPSYrm",
- "VPADDBYrm",
+def: InstRW<[SKLWriteResGroup110], (instregex "VPADDBYrm",
"VPADDDYrm",
"VPADDQYrm",
"VPADDWYrm",
"VPBLENDDYrmi",
- "VPMASKMOVDYrm",
- "VPMASKMOVQYrm",
"VPSUBBYrm",
"VPSUBDYrm",
"VPSUBQYrm",
Modified: llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td?rev=331760&r1=331759&r2=331760&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td Tue May 8 05:17:55 2018
@@ -147,10 +147,14 @@ def : WriteRes<WriteZero, []>;
defm : SKXWriteResPair<WriteJump, [SKXPort06], 1>;
// Floating point. This covers both scalar and vector operations.
-def : WriteRes<WriteFLoad, [SKXPort23]> { let Latency = 5; }
-def : WriteRes<WriteFStore, [SKXPort237, SKXPort4]>;
-def : WriteRes<WriteFMove, [SKXPort015]>;
-defm : X86WriteRes<WriteEMMS, [SKXPort05,SKXPort0156], 10, [9,1], 10>;
+defm : X86WriteRes<WriteFLoad, [SKXPort23], 5, [1], 1>;
+defm : X86WriteRes<WriteFMaskedLoad, [SKXPort23,SKXPort015], 7, [1,1], 2>;
+defm : X86WriteRes<WriteFMaskedLoadY, [SKXPort23,SKXPort015], 8, [1,1], 2>;
+defm : X86WriteRes<WriteFStore, [SKXPort237,SKXPort4], 1, [1,1], 1>;
+defm : X86WriteRes<WriteFMaskedStore, [SKXPort237,SKXPort0], 2, [1,1], 2>;
+defm : X86WriteRes<WriteFMaskedStoreY, [SKXPort237,SKXPort0], 2, [1,1], 2>;
+defm : X86WriteRes<WriteFMove, [SKXPort015], 1, [1], 1>;
+defm : X86WriteRes<WriteEMMS, [SKXPort05,SKXPort0156], 10, [9,1], 10>;
defm : SKXWriteResPair<WriteFAdd, [SKXPort015], 4, [1], 1, 5>; // Floating point add/sub.
defm : SKXWriteResPair<WriteFAddX, [SKXPort015], 4, [1], 1, 6>; // Floating point add/sub (XMM).
@@ -234,9 +238,13 @@ def : WriteRes<WriteCvtF2FSt, [SKXPort4
// class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; }
// Vector integer operations.
-def : WriteRes<WriteVecLoad, [SKXPort23]> { let Latency = 5; }
-def : WriteRes<WriteVecStore, [SKXPort237, SKXPort4]>;
-def : WriteRes<WriteVecMove, [SKXPort015]>;
+defm : X86WriteRes<WriteVecLoad, [SKXPort23], 5, [1], 1>;
+defm : X86WriteRes<WriteVecMaskedLoad, [SKXPort23,SKXPort015], 7, [1,1], 2>;
+defm : X86WriteRes<WriteVecMaskedLoadY, [SKXPort23,SKXPort015], 8, [1,1], 2>;
+defm : X86WriteRes<WriteVecStore, [SKXPort237,SKXPort4], 1, [1,1], 1>;
+defm : X86WriteRes<WriteVecMaskedStore, [SKXPort237,SKXPort0], 2, [1,1], 2>;
+defm : X86WriteRes<WriteVecMaskedStoreY, [SKXPort237,SKXPort0], 2, [1,1], 2>;
+defm : X86WriteRes<WriteVecMove, [SKXPort015], 1, [1], 1>;
defm : SKXWriteResPair<WriteVecALU, [SKXPort01], 1, [1], 1, 6>; // Vector integer ALU op, no logicals.
defm : SKXWriteResPair<WriteVecALUY, [SKXPort01], 1, [1], 1, 7>; // Vector integer ALU op, no logicals (YMM/ZMM).
@@ -845,20 +853,6 @@ def: InstRW<[SKXWriteResGroup17], (instr
WAIT,
XGETBV)>;
-def SKXWriteResGroup18 : SchedWriteRes<[SKXPort0,SKXPort237]> {
- let Latency = 2;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[SKXWriteResGroup18], (instregex "VMASKMOVPDYmr",
- "VMASKMOVPDmr",
- "VMASKMOVPSYmr",
- "VMASKMOVPSmr",
- "VPMASKMOVDYmr",
- "VPMASKMOVDmr",
- "VPMASKMOVQYmr",
- "VPMASKMOVQmr")>;
-
def SKXWriteResGroup20 : SchedWriteRes<[SKXPort6,SKXPort0156]> {
let Latency = 2;
let NumMicroOps = 2;
@@ -1936,8 +1930,6 @@ def: InstRW<[SKXWriteResGroup95], (instr
"VBROADCASTSSZ128m(b?)",
"VINSERTF128rm",
"VINSERTI128rm",
- "VMASKMOVPDrm",
- "VMASKMOVPSrm",
"VMOVAPDZ128rm(b?)",
"VMOVAPSZ128rm(b?)",
"VMOVDDUPZ128rm(b?)",
@@ -1967,8 +1959,6 @@ def: InstRW<[SKXWriteResGroup95], (instr
"VPBLENDMWZ128rm(b?)",
"VPBROADCASTDZ128m(b?)",
"VPBROADCASTQZ128m(b?)",
- "VPMASKMOVDrm",
- "VPMASKMOVQrm",
"VPSUBBZ128rm(b?)",
"(V?)PSUBBrm",
"VPSUBDZ128rm(b?)",
@@ -2226,8 +2216,6 @@ def: InstRW<[SKXWriteResGroup121], (inst
"VINSERTI64x2Z256rm(b?)",
"VINSERTI64x2Zrm(b?)",
"VINSERTI64x4Zrm(b?)",
- "VMASKMOVPDYrm",
- "VMASKMOVPSYrm",
"VMOVAPDZ256rm(b?)",
"VMOVAPDZrm(b?)",
"VMOVAPSZ256rm(b?)",
@@ -2280,8 +2268,6 @@ def: InstRW<[SKXWriteResGroup121], (inst
"VPBROADCASTDZm(b?)",
"VPBROADCASTQZ256m(b?)",
"VPBROADCASTQZm(b?)",
- "VPMASKMOVDYrm",
- "VPMASKMOVQYrm",
"VPSUBBYrm",
"VPSUBBZ256rm(b?)",
"VPSUBBZrm(b?)",
Modified: llvm/trunk/lib/Target/X86/X86Schedule.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Schedule.td?rev=331760&r1=331759&r2=331760&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86Schedule.td (original)
+++ llvm/trunk/lib/Target/X86/X86Schedule.td Tue May 8 05:17:55 2018
@@ -105,9 +105,13 @@ def WriteZero : SchedWrite;
defm WriteJump : X86SchedWritePair;
// Floating point. This covers both scalar and vector operations.
-def WriteFLoad : SchedWrite;
-def WriteFStore : SchedWrite;
-def WriteFMove : SchedWrite;
+def WriteFLoad : SchedWrite;
+def WriteFMaskedLoad : SchedWrite;
+def WriteFMaskedLoadY : SchedWrite;
+def WriteFStore : SchedWrite;
+def WriteFMaskedStore : SchedWrite;
+def WriteFMaskedStoreY : SchedWrite;
+def WriteFMove : SchedWrite;
defm WriteFAdd : X86SchedWritePair; // Floating point add/sub.
defm WriteFAddX : X86SchedWritePair; // Floating point add/sub (XMM).
@@ -183,9 +187,13 @@ defm WritePHAdd : X86SchedWritePair;
defm WritePHAddY : X86SchedWritePair; // YMM/ZMM.
// Vector integer operations.
-def WriteVecLoad : SchedWrite;
-def WriteVecStore : SchedWrite;
-def WriteVecMove : SchedWrite;
+def WriteVecLoad : SchedWrite;
+def WriteVecMaskedLoad : SchedWrite;
+def WriteVecMaskedLoadY : SchedWrite;
+def WriteVecStore : SchedWrite;
+def WriteVecMaskedStore : SchedWrite;
+def WriteVecMaskedStoreY : SchedWrite;
+def WriteVecMove : SchedWrite;
defm WriteVecALU : X86SchedWritePair; // Vector integer ALU op, no logicals.
defm WriteVecALUY : X86SchedWritePair; // Vector integer ALU op, no logicals (YMM/ZMM).
Modified: llvm/trunk/lib/Target/X86/X86ScheduleAtom.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleAtom.td?rev=331760&r1=331759&r2=331760&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleAtom.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleAtom.td Tue May 8 05:17:55 2018
@@ -198,8 +198,14 @@ def : WriteRes<WriteNop, [AtomPort01]>;
// Floating point. This covers both scalar and vector operations.
////////////////////////////////////////////////////////////////////////////////
-def : WriteRes<WriteFLoad, [AtomPort0]>;
-def : WriteRes<WriteFStore, [AtomPort0]>;
+def : WriteRes<WriteFLoad, [AtomPort0]>;
+def : WriteRes<WriteFMaskedLoad, [AtomPort0]>;
+def : WriteRes<WriteFMaskedLoadY, [AtomPort0]>;
+
+def : WriteRes<WriteFStore, [AtomPort0]>;
+def : WriteRes<WriteFMaskedStore, [AtomPort0]>;
+def : WriteRes<WriteFMaskedStoreY, [AtomPort0]>;
+
def : WriteRes<WriteFMove, [AtomPort01]>;
defm : X86WriteRes<WriteEMMS,[AtomPort01], 5, [5], 1>;
@@ -282,8 +288,14 @@ def : WriteRes<WriteCvtF2FSt, [AtomPort
// Vector integer operations.
////////////////////////////////////////////////////////////////////////////////
-def : WriteRes<WriteVecLoad, [AtomPort0]>;
-def : WriteRes<WriteVecStore, [AtomPort0]>;
+def : WriteRes<WriteVecLoad, [AtomPort0]>;
+def : WriteRes<WriteVecMaskedLoad, [AtomPort0]>;
+def : WriteRes<WriteVecMaskedLoadY, [AtomPort0]>;
+
+def : WriteRes<WriteVecStore, [AtomPort0]>;
+def : WriteRes<WriteVecMaskedStore, [AtomPort0]>;
+def : WriteRes<WriteVecMaskedStoreY, [AtomPort0]>;
+
def : WriteRes<WriteVecMove, [AtomPort01]>;
defm : AtomWriteResPair<WriteVecALU, [AtomPort01], [AtomPort0], 1, 1>;
Modified: llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td?rev=331760&r1=331759&r2=331760&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td Tue May 8 05:17:55 2018
@@ -311,8 +311,14 @@ def : WriteRes<WriteNop, [JALU01]> { let
// Floating point. This covers both scalar and vector operations.
////////////////////////////////////////////////////////////////////////////////
-def : WriteRes<WriteFLoad, [JLAGU, JFPU01, JFPX]> { let Latency = 5; }
-def : WriteRes<WriteFStore, [JSAGU, JFPU1, JSTC]>;
+defm : X86WriteRes<WriteFLoad, [JLAGU, JFPU01, JFPX], 5, [1, 1, 1], 1>;
+defm : X86WriteRes<WriteFMaskedLoad, [JLAGU, JFPU01, JFPX], 6, [1, 1, 2], 1>;
+defm : X86WriteRes<WriteFMaskedLoadY, [JLAGU, JFPU01, JFPX], 6, [2, 2, 4], 2>;
+
+defm : X86WriteRes<WriteFStore, [JSAGU, JFPU1, JSTC], 1, [1, 1, 1], 1>;
+defm : X86WriteRes<WriteFMaskedStore, [JSAGU, JFPU01, JFPX], 6, [1, 1, 4], 1>;
+defm : X86WriteRes<WriteFMaskedStoreY, [JSAGU, JFPU01, JFPX], 6, [2, 2, 4], 2>;
+
def : WriteRes<WriteFMove, [JFPU01, JFPX]>;
def : WriteRes<WriteEMMS, [JFPU01, JFPX]> { let Latency = 2; }
@@ -434,8 +440,14 @@ def : InstRW<[JWriteCVTSI2FLd], (instreg
// Vector integer operations.
////////////////////////////////////////////////////////////////////////////////
-def : WriteRes<WriteVecLoad, [JLAGU, JFPU01, JVALU]> { let Latency = 5; }
-def : WriteRes<WriteVecStore, [JSAGU, JFPU1, JSTC]>;
+defm : X86WriteRes<WriteVecLoad, [JLAGU, JFPU01, JVALU], 5, [1, 1, 1], 1>;
+defm : X86WriteRes<WriteVecMaskedLoad, [JLAGU, JFPU01, JVALU], 6, [1, 1, 2], 1>;
+defm : X86WriteRes<WriteVecMaskedLoadY, [JLAGU, JFPU01, JVALU], 6, [2, 2, 4], 2>;
+
+defm : X86WriteRes<WriteVecStore, [JSAGU, JFPU1, JSTC], 1, [1, 1, 1], 1>;
+defm : X86WriteRes<WriteVecMaskedStore, [JSAGU, JFPU01, JVALU], 6, [1, 1, 4], 1>;
+defm : X86WriteRes<WriteVecMaskedStoreY, [JSAGU, JFPU01, JVALU], 6, [2, 2, 4], 2>;
+
def : WriteRes<WriteVecMove, [JFPU01, JVALU]>;
defm : JWriteResFpuPair<WriteVecALU, [JFPU01, JVALU], 1>;
@@ -622,32 +634,6 @@ def JWriteVBROADCASTYLd: SchedWriteRes<[
def : InstRW<[JWriteVBROADCASTYLd, ReadAfterLd], (instrs VBROADCASTSDYrm,
VBROADCASTSSYrm)>;
-def JWriteVMaskMovLd: SchedWriteRes<[JLAGU, JFPU01, JFPX]> {
- let Latency = 6;
- let ResourceCycles = [1, 1, 2];
-}
-def : InstRW<[JWriteVMaskMovLd], (instrs VMASKMOVPDrm, VMASKMOVPSrm)>;
-
-def JWriteVMaskMovYLd: SchedWriteRes<[JLAGU, JFPU01, JFPX]> {
- let Latency = 6;
- let ResourceCycles = [2, 2, 4];
- let NumMicroOps = 2;
-}
-def : InstRW<[JWriteVMaskMovYLd], (instrs VMASKMOVPDYrm, VMASKMOVPSYrm)>;
-
-def JWriteVMaskMovSt: SchedWriteRes<[JFPU01, JFPX, JSAGU]> {
- let Latency = 6;
- let ResourceCycles = [1, 4, 1];
-}
-def : InstRW<[JWriteVMaskMovSt], (instrs VMASKMOVPDmr, VMASKMOVPSmr)>;
-
-def JWriteVMaskMovYSt: SchedWriteRes<[JFPU01, JFPX, JSAGU]> {
- let Latency = 6;
- let ResourceCycles = [2, 4, 2];
- let NumMicroOps = 2;
-}
-def : InstRW<[JWriteVMaskMovYSt], (instrs VMASKMOVPDYmr, VMASKMOVPSYmr)>;
-
def JWriteJVZEROALL: SchedWriteRes<[]> {
let Latency = 90;
let NumMicroOps = 73;
Modified: llvm/trunk/lib/Target/X86/X86ScheduleSLM.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleSLM.td?rev=331760&r1=331759&r2=331760&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleSLM.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleSLM.td Tue May 8 05:17:55 2018
@@ -125,10 +125,14 @@ defm : SLMWriteResPair<WriteBZHI, [SLM_I
defm : SLMWriteResPair<WriteIDiv, [SLM_IEC_RSV01, SLMDivider], 25, [1,25], 1, 4>;
// Scalar and vector floating point.
-def : WriteRes<WriteFStore, [SLM_FPC_RSV01, SLM_MEC_RSV]>;
-def : WriteRes<WriteFLoad, [SLM_MEC_RSV]> { let Latency = 3; }
-def : WriteRes<WriteFMove, [SLM_FPC_RSV01]>;
-defm : X86WriteRes<WriteEMMS, [SLM_FPC_RSV01], 10, [10], 9>;
+def : WriteRes<WriteFLoad, [SLM_MEC_RSV]> { let Latency = 3; }
+def : WriteRes<WriteFMaskedLoad, [SLM_MEC_RSV]> { let Latency = 3; }
+def : WriteRes<WriteFMaskedLoadY, [SLM_MEC_RSV]> { let Latency = 3; }
+def : WriteRes<WriteFStore, [SLM_FPC_RSV01, SLM_MEC_RSV]>;
+def : WriteRes<WriteFMaskedStore, [SLM_FPC_RSV01, SLM_MEC_RSV]>;
+def : WriteRes<WriteFMaskedStoreY, [SLM_FPC_RSV01, SLM_MEC_RSV]>;
+def : WriteRes<WriteFMove, [SLM_FPC_RSV01]>;
+defm : X86WriteRes<WriteEMMS, [SLM_FPC_RSV01], 10, [10], 9>;
defm : SLMWriteResPair<WriteFAdd, [SLM_FPC_RSV1], 3>;
defm : SLMWriteResPair<WriteFAddX, [SLM_FPC_RSV1], 3>;
@@ -193,9 +197,13 @@ defm : SLMWriteResPair<WriteFBlend, [SL
def : WriteRes<WriteCvtF2FSt, [SLM_FPC_RSV01, SLM_MEC_RSV]>;
// Vector integer operations.
-def : WriteRes<WriteVecStore, [SLM_FPC_RSV01, SLM_MEC_RSV]>;
-def : WriteRes<WriteVecLoad, [SLM_MEC_RSV]> { let Latency = 3; }
-def : WriteRes<WriteVecMove, [SLM_FPC_RSV01]>;
+def : WriteRes<WriteVecLoad, [SLM_MEC_RSV]> { let Latency = 3; }
+def : WriteRes<WriteVecMaskedLoad, [SLM_MEC_RSV]> { let Latency = 3; }
+def : WriteRes<WriteVecMaskedLoadY, [SLM_MEC_RSV]> { let Latency = 3; }
+def : WriteRes<WriteVecStore, [SLM_FPC_RSV01, SLM_MEC_RSV]>;
+def : WriteRes<WriteVecMaskedStore, [SLM_FPC_RSV01, SLM_MEC_RSV]>;
+def : WriteRes<WriteVecMaskedStoreY, [SLM_FPC_RSV01, SLM_MEC_RSV]>;
+def : WriteRes<WriteVecMove, [SLM_FPC_RSV01]>;
defm : SLMWriteResPair<WriteVecShift, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteVecShiftX, [SLM_FPC_RSV0], 1>;
Modified: llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td?rev=331760&r1=331759&r2=331760&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td Tue May 8 05:17:55 2018
@@ -188,9 +188,13 @@ def : WriteRes<WriteIMulH, [ZnALU1, ZnM
}
// Floating point operations
-def : WriteRes<WriteFStore, [ZnAGU]>;
-def : WriteRes<WriteFMove, [ZnFPU]>;
-def : WriteRes<WriteFLoad, [ZnAGU]> { let Latency = 8; }
+defm : X86WriteRes<WriteFLoad, [ZnAGU], 8, [1], 1>;
+defm : X86WriteRes<WriteFMaskedLoad, [ZnAGU,ZnFPU01], 8, [1,1], 1>;
+defm : X86WriteRes<WriteFMaskedLoadY, [ZnAGU,ZnFPU01], 8, [1,2], 2>;
+defm : X86WriteRes<WriteFStore, [ZnAGU], 1, [1,1], 1>;
+defm : X86WriteRes<WriteFMaskedStore, [ZnAGU,ZnFPU01], 4, [1,1], 1>;
+defm : X86WriteRes<WriteFMaskedStoreY, [ZnAGU,ZnFPU01], 5, [1,2], 2>;
+defm : X86WriteRes<WriteFMove, [ZnFPU], 1, [1], 1>;
defm : ZnWriteResFpuPair<WriteFAdd, [ZnFPU0], 3>;
defm : ZnWriteResFpuPair<WriteFAddX, [ZnFPU0], 3>;
@@ -260,10 +264,14 @@ defm : ZnWriteResFpuPair<WriteFSqrt80,
def : WriteRes<WriteCvtF2FSt, [ZnFPU3, ZnAGU]>;
// Vector integer operations which uses FPU units
-def : WriteRes<WriteVecStore, [ZnAGU]>;
-def : WriteRes<WriteVecMove, [ZnFPU]>;
-def : WriteRes<WriteVecLoad, [ZnAGU]> { let Latency = 8; }
-def : WriteRes<WriteEMMS, [ZnFPU]> { let Latency = 2; }
+defm : X86WriteRes<WriteVecLoad, [ZnAGU], 8, [1], 1>;
+defm : X86WriteRes<WriteVecMaskedLoad, [ZnAGU,ZnFPU01], 8, [1,2], 2>;
+defm : X86WriteRes<WriteVecMaskedLoadY, [ZnAGU,ZnFPU01], 9, [1,3], 2>;
+defm : X86WriteRes<WriteVecStore, [ZnAGU], 1, [1,1], 1>;
+defm : X86WriteRes<WriteVecMaskedStore, [ZnAGU,ZnFPU01], 4, [1,1], 1>;
+defm : X86WriteRes<WriteVecMaskedStoreY, [ZnAGU,ZnFPU01], 5, [1,2], 2>;
+defm : X86WriteRes<WriteVecMove, [ZnFPU], 1, [1], 1>;
+defm : X86WriteRes<WriteEMMS, [ZnFPU], 2, [1], 1>;
defm : ZnWriteResFpuPair<WriteVecShift, [ZnFPU], 1>;
defm : ZnWriteResFpuPair<WriteVecShiftX, [ZnFPU2], 1>;
@@ -1030,11 +1038,8 @@ def : InstRW<[WriteMicrocoded], (instreg
// MASKMOVDQU.
def : InstRW<[WriteMicrocoded], (instregex "(V?)MASKMOVDQU(64)?")>;
-// VPMASKMOVQ.
+// VPMASKMOVD.
// ymm
-def : InstRW<[ZnWriteFPU01Op2],(instregex "VPMASKMOVQrm")>;
-def : InstRW<[ZnWriteFPU01Op2Y],(instregex "VPMASKMOVQYrm")>;
-
def : InstRW<[WriteMicrocoded],
(instregex "VPMASKMOVD(Y?)rm")>;
// m, v,v.
@@ -1168,32 +1173,6 @@ def ZnWriteVINSERT128Ld: SchedWriteRes<[
def : InstRW<[ZnWriteVINSERT128r], (instregex "VINSERTF128rr")>;
def : InstRW<[ZnWriteVINSERT128Ld], (instregex "VINSERTF128rm")>;
-// VMASKMOVP S/D.
-// x,x,m.
-def ZnWriteVMASKMOVPLd : SchedWriteRes<[ZnAGU, ZnFPU01]> {
- let Latency = 8;
-}
-// y,y,m.
-def ZnWriteVMASKMOVPLdY : SchedWriteRes<[ZnAGU, ZnFPU01]> {
- let Latency = 8;
- let NumMicroOps = 2;
- let ResourceCycles = [1, 2];
-}
-def ZnWriteVMASKMOVPm : SchedWriteRes<[ZnAGU, ZnFPU01]> {
- let Latency = 4;
-}
-def : InstRW<[ZnWriteVMASKMOVPLd], (instregex "VMASKMOVP(S|D)rm")>;
-def : InstRW<[ZnWriteVMASKMOVPLdY], (instregex "VMASKMOVP(S|D)Yrm")>;
-def : InstRW<[ZnWriteVMASKMOVPm], (instregex "VMASKMOVP(S|D)mr")>;
-
-// m256,y,y.
-def ZnWriteVMASKMOVPYmr : SchedWriteRes<[ZnAGU,ZnFPU01]> {
- let Latency = 5;
- let NumMicroOps = 2;
- let ResourceCycles = [1, 2];
-}
-def : InstRW<[ZnWriteVMASKMOVPYmr], (instregex "VMASKMOVP(S|D)Ymr")>;
-
// VGATHERDPS.
// x.
def : InstRW<[WriteMicrocoded], (instregex "VGATHERDPSrm")>;
Modified: llvm/trunk/test/CodeGen/X86/avx2-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx2-schedule.ll?rev=331760&r1=331759&r2=331760&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx2-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx2-schedule.ll Tue May 8 05:17:55 2018
@@ -3384,8 +3384,8 @@ declare <8 x i32> @llvm.x86.avx2.pmadd.w
define <4 x i32> @test_pmaskmovd(i8* %a0, <4 x i32> %a1, <4 x i32> %a2) {
; GENERIC-LABEL: test_pmaskmovd:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmaskmovd (%rdi), %xmm0, %xmm2 # sched: [6:0.50]
-; GENERIC-NEXT: vpmaskmovd %xmm1, %xmm0, (%rdi) # sched: [1:1.00]
+; GENERIC-NEXT: vpmaskmovd (%rdi), %xmm0, %xmm2 # sched: [8:1.00]
+; GENERIC-NEXT: vpmaskmovd %xmm1, %xmm0, (%rdi) # sched: [5:1.00]
; GENERIC-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -3433,8 +3433,8 @@ declare void @llvm.x86.avx2.maskstore.d(
define <8 x i32> @test_pmaskmovd_ymm(i8* %a0, <8 x i32> %a1, <8 x i32> %a2) {
; GENERIC-LABEL: test_pmaskmovd_ymm:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmaskmovd (%rdi), %ymm0, %ymm2 # sched: [6:0.50]
-; GENERIC-NEXT: vpmaskmovd %ymm1, %ymm0, (%rdi) # sched: [1:1.00]
+; GENERIC-NEXT: vpmaskmovd (%rdi), %ymm0, %ymm2 # sched: [9:1.00]
+; GENERIC-NEXT: vpmaskmovd %ymm1, %ymm0, (%rdi) # sched: [5:1.00]
; GENERIC-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -3482,8 +3482,8 @@ declare void @llvm.x86.avx2.maskstore.d.
define <2 x i64> @test_pmaskmovq(i8* %a0, <2 x i64> %a1, <2 x i64> %a2) {
; GENERIC-LABEL: test_pmaskmovq:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmaskmovq (%rdi), %xmm0, %xmm2 # sched: [6:0.50]
-; GENERIC-NEXT: vpmaskmovq %xmm1, %xmm0, (%rdi) # sched: [1:1.00]
+; GENERIC-NEXT: vpmaskmovq (%rdi), %xmm0, %xmm2 # sched: [8:1.00]
+; GENERIC-NEXT: vpmaskmovq %xmm1, %xmm0, (%rdi) # sched: [5:1.00]
; GENERIC-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -3531,8 +3531,8 @@ declare void @llvm.x86.avx2.maskstore.q(
define <4 x i64> @test_pmaskmovq_ymm(i8* %a0, <4 x i64> %a1, <4 x i64> %a2) {
; GENERIC-LABEL: test_pmaskmovq_ymm:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmaskmovq (%rdi), %ymm0, %ymm2 # sched: [6:0.50]
-; GENERIC-NEXT: vpmaskmovq %ymm1, %ymm0, (%rdi) # sched: [1:1.00]
+; GENERIC-NEXT: vpmaskmovq (%rdi), %ymm0, %ymm2 # sched: [9:1.00]
+; GENERIC-NEXT: vpmaskmovq %ymm1, %ymm0, (%rdi) # sched: [5:1.00]
; GENERIC-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
More information about the llvm-commits
mailing list