[llvm] r331629 - [X86] Split WriteFRcp/WriteFRsqrt/WriteFSqrt schedule classes
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Mon May 7 04:50:44 PDT 2018
Author: rksimon
Date: Mon May 7 04:50:44 2018
New Revision: 331629
URL: http://llvm.org/viewvc/llvm-project?rev=331629&view=rev
Log:
[X86] Split WriteFRcp/WriteFRsqrt/WriteFSqrt schedule classes
WriteFRcp/WriteFRsqrt are split to support scalar, XMM and YMM/ZMM instructions.
WriteFSqrt is split into single/double/long-double sizes and scalar, XMM, YMM and ZMM instructions.
This removes all InstrRW overrides for these instructions.
NOTE: There were a couple of typos in the Znver1 model - notably a 1cy throughput for SQRT that is highly unlikely and doesn't tally with Agner.
NOTE: I had to add Agner's numbers for several targets for WriteFSqrt80.
Modified:
llvm/trunk/lib/Target/X86/X86InstrAVX512.td
llvm/trunk/lib/Target/X86/X86InstrFPStack.td
llvm/trunk/lib/Target/X86/X86InstrSSE.td
llvm/trunk/lib/Target/X86/X86SchedBroadwell.td
llvm/trunk/lib/Target/X86/X86SchedHaswell.td
llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td
llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td
llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td
llvm/trunk/lib/Target/X86/X86Schedule.td
llvm/trunk/lib/Target/X86/X86ScheduleAtom.td
llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td
llvm/trunk/lib/Target/X86/X86ScheduleSLM.td
llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td
llvm/trunk/test/CodeGen/X86/avx-schedule.ll
llvm/trunk/test/CodeGen/X86/avx512-schedule.ll
llvm/trunk/test/CodeGen/X86/recip-fastmath.ll
llvm/trunk/test/CodeGen/X86/recip-fastmath2.ll
llvm/trunk/test/CodeGen/X86/sse-schedule.ll
llvm/trunk/test/CodeGen/X86/sse2-schedule.ll
llvm/trunk/test/CodeGen/X86/x87-schedule.ll
llvm/trunk/test/tools/llvm-mca/X86/Broadwell/resources-x87.s
llvm/trunk/test/tools/llvm-mca/X86/BtVer2/resources-x87.s
llvm/trunk/test/tools/llvm-mca/X86/Haswell/resources-x87.s
llvm/trunk/test/tools/llvm-mca/X86/SLM/resources-x87.s
llvm/trunk/test/tools/llvm-mca/X86/SandyBridge/resources-x87.s
llvm/trunk/test/tools/llvm-mca/X86/SkylakeClient/resources-x87.s
llvm/trunk/test/tools/llvm-mca/X86/SkylakeServer/resources-x87.s
llvm/trunk/test/tools/llvm-mca/X86/Znver1/resources-avx1.s
llvm/trunk/test/tools/llvm-mca/X86/Znver1/resources-sse1.s
llvm/trunk/test/tools/llvm-mca/X86/Znver1/resources-sse2.s
llvm/trunk/test/tools/llvm-mca/X86/Znver1/resources-x87.s
Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=331629&r1=331628&r2=331629&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Mon May 7 04:50:44 2018
@@ -8092,34 +8092,38 @@ multiclass avx512_sqrt_packed<bits<8> op
}
multiclass avx512_sqrt_packed_all<bits<8> opc, string OpcodeStr,
- X86SchedWriteWidths sched> {
- defm PSZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"), sched.ZMM, v16f32_info>,
+ X86SchedWriteSizes sched> {
+ defm PSZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
+ sched.PS.ZMM, v16f32_info>,
EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
- defm PDZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"), sched.ZMM, v8f64_info>,
+ defm PDZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
+ sched.PD.ZMM, v8f64_info>,
EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
// Define only if AVX512VL feature is present.
let Predicates = [HasVLX] in {
defm PSZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
- sched.XMM, v4f32x_info>,
+ sched.PS.XMM, v4f32x_info>,
EVEX_V128, PS, EVEX_CD8<32, CD8VF>;
defm PSZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
- sched.YMM, v8f32x_info>,
+ sched.PS.YMM, v8f32x_info>,
EVEX_V256, PS, EVEX_CD8<32, CD8VF>;
defm PDZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
- sched.XMM, v2f64x_info>,
+ sched.PD.XMM, v2f64x_info>,
EVEX_V128, VEX_W, PD, EVEX_CD8<64, CD8VF>;
defm PDZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
- sched.YMM, v4f64x_info>,
+ sched.PD.YMM, v4f64x_info>,
EVEX_V256, VEX_W, PD, EVEX_CD8<64, CD8VF>;
}
}
multiclass avx512_sqrt_packed_all_round<bits<8> opc, string OpcodeStr,
- X86SchedWriteWidths sched> {
- defm PSZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ps"), sched.ZMM,
- v16f32_info>, EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
- defm PDZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "pd"), sched.ZMM,
- v8f64_info>, EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
+ X86SchedWriteSizes sched> {
+ defm PSZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ps"),
+ sched.PS.ZMM, v16f32_info>,
+ EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
+ defm PDZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "pd"),
+ sched.PD.ZMM, v8f64_info>,
+ EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
}
multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched,
@@ -8182,20 +8186,20 @@ multiclass avx512_sqrt_scalar<bits<8> op
}
multiclass avx512_sqrt_scalar_all<bits<8> opc, string OpcodeStr,
- X86SchedWriteWidths sched> {
- defm SSZ : avx512_sqrt_scalar<opc, OpcodeStr#"ss", sched.Scl, f32x_info, "SS",
+ X86SchedWriteSizes sched> {
+ defm SSZ : avx512_sqrt_scalar<opc, OpcodeStr#"ss", sched.PS.Scl, f32x_info, "SS",
int_x86_sse_sqrt_ss>,
EVEX_CD8<32, CD8VT1>, EVEX_4V, XS, NotMemoryFoldable;
- defm SDZ : avx512_sqrt_scalar<opc, OpcodeStr#"sd", sched.Scl, f64x_info, "SD",
+ defm SDZ : avx512_sqrt_scalar<opc, OpcodeStr#"sd", sched.PD.Scl, f64x_info, "SD",
int_x86_sse2_sqrt_sd>,
EVEX_CD8<64, CD8VT1>, EVEX_4V, XD, VEX_W,
NotMemoryFoldable;
}
-defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", SchedWriteFSqrt>,
- avx512_sqrt_packed_all_round<0x51, "vsqrt", SchedWriteFSqrt>;
+defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", SchedWriteFSqrtSizes>,
+ avx512_sqrt_packed_all_round<0x51, "vsqrt", SchedWriteFSqrtSizes>;
-defm VSQRT : avx512_sqrt_scalar_all<0x51, "vsqrt", SchedWriteFSqrt>, VEX_LIG;
+defm VSQRT : avx512_sqrt_scalar_all<0x51, "vsqrt", SchedWriteFSqrtSizes>, VEX_LIG;
multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
X86FoldableSchedWrite sched, X86VectorVTInfo _> {
Modified: llvm/trunk/lib/Target/X86/X86InstrFPStack.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrFPStack.td?rev=331629&r1=331628&r2=331629&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrFPStack.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrFPStack.td Mon May 7 04:50:44 2018
@@ -314,7 +314,7 @@ defm CHS : FPUnary<fneg, MRM_E0, "fchs">
defm ABS : FPUnary<fabs, MRM_E1, "fabs">;
}
-let SchedRW = [WriteFSqrt] in
+let SchedRW = [WriteFSqrt80] in
defm SQRT: FPUnary<fsqrt,MRM_FA, "fsqrt">;
let SchedRW = [WriteMicrocoded] in {
Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=331629&r1=331628&r2=331629&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Mon May 7 04:50:44 2018
@@ -2935,8 +2935,8 @@ multiclass sse2_fp_unop_s<bits<8> opc, s
// Square root.
defm SQRT : sse1_fp_unop_s<0x51, "sqrt", fsqrt, SchedWriteFSqrt, UseAVX>,
sse1_fp_unop_p<0x51, "sqrt", fsqrt, SchedWriteFSqrt, [HasAVX, NoVLX]>,
- sse2_fp_unop_s<0x51, "sqrt", fsqrt, SchedWriteFSqrt, UseAVX>,
- sse2_fp_unop_p<0x51, "sqrt", fsqrt, SchedWriteFSqrt>;
+ sse2_fp_unop_s<0x51, "sqrt", fsqrt, SchedWriteFSqrt64, UseAVX>,
+ sse2_fp_unop_p<0x51, "sqrt", fsqrt, SchedWriteFSqrt64>;
// Reciprocal approximations. Note that these typically require refinement
// in order to obtain suitable precision.
Modified: llvm/trunk/lib/Target/X86/X86SchedBroadwell.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedBroadwell.td?rev=331629&r1=331628&r2=331629&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedBroadwell.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedBroadwell.td Mon May 7 04:50:44 2018
@@ -164,12 +164,27 @@ defm : BWWriteResPair<WriteFMul, [BWPo
defm : BWWriteResPair<WriteFMulY, [BWPort01], 3, [1], 1, 6>; // Floating point multiplication (YMM/ZMM).
defm : BWWriteResPair<WriteFDiv, [BWPort0], 12, [1], 1, 5>; // 10-14 cycles. // Floating point division.
defm : BWWriteResPair<WriteFDivY, [BWPort0], 12, [1], 1, 7>; // 10-14 cycles. // Floating point division (YMM/ZMM).
-defm : BWWriteResPair<WriteFSqrt, [BWPort0], 15, [1], 1, 5>; // Floating point square root.
-defm : BWWriteResPair<WriteFSqrtY, [BWPort0], 15, [1], 1, 7>; // Floating point square root (YMM/ZMM).
+
+defm : X86WriteRes<WriteFSqrt, [BWPort0,BWFPDivider], 11, [1,4], 1>; // Floating point square root.
+defm : X86WriteRes<WriteFSqrtLd, [BWPort0,BWPort23,BWFPDivider], 16, [1,1,7], 2>;
+defm : BWWriteResPair<WriteFSqrtX, [BWPort0,BWFPDivider], 11, [1,7], 1, 5>; // Floating point square root (XMM).
+defm : BWWriteResPair<WriteFSqrtY, [BWPort0,BWPort015,BWFPDivider], 21, [2,1,14], 3, 6>; // Floating point square root (YMM).
+defm : BWWriteResPair<WriteFSqrtZ, [BWPort0,BWPort015,BWFPDivider], 21, [2,1,14], 3, 6>; // Floating point square root (ZMM).
+defm : X86WriteRes<WriteFSqrt64, [BWPort0,BWFPDivider], 16, [1,8], 1>; // Floating point double square root.
+defm : X86WriteRes<WriteFSqrt64Ld, [BWPort0,BWPort23,BWFPDivider], 21, [1,1,14], 2>;
+defm : BWWriteResPair<WriteFSqrt64X, [BWPort0,BWFPDivider], 16, [1,14],1, 5>; // Floating point double square root (XMM).
+defm : BWWriteResPair<WriteFSqrt64Y, [BWPort0,BWPort015,BWFPDivider], 29, [2,1,28], 3, 6>; // Floating point double square root (YMM).
+defm : BWWriteResPair<WriteFSqrt64Z, [BWPort0,BWPort015,BWFPDivider], 29, [2,1,28], 3, 6>; // Floating point double square root (ZMM).
+defm : BWWriteResPair<WriteFSqrt80, [BWPort0,BWFPDivider], 23, [1,9]>; // Floating point long double square root.
+
defm : BWWriteResPair<WriteFRcp, [BWPort0], 5, [1], 1, 5>; // Floating point reciprocal estimate.
-defm : BWWriteResPair<WriteFRcpY, [BWPort0], 5, [1], 1, 7>; // Floating point reciprocal estimate (YMM/ZMM).
+defm : BWWriteResPair<WriteFRcpX, [BWPort0], 5, [1], 1, 5>; // Floating point reciprocal estimate (XMM).
+defm : BWWriteResPair<WriteFRcpY, [BWPort0,BWPort015], 11, [2,1], 3, 6>; // Floating point reciprocal estimate (YMM/ZMM).
+
defm : BWWriteResPair<WriteFRsqrt, [BWPort0], 5, [1], 1, 5>; // Floating point reciprocal square root estimate.
-defm : BWWriteResPair<WriteFRsqrtY,[BWPort0], 5, [1], 1, 7>; // Floating point reciprocal square root estimate (YMM/ZMM).
+defm : BWWriteResPair<WriteFRsqrtX,[BWPort0], 5, [1], 1, 5>; // Floating point reciprocal square root estimate (XMM).
+defm : BWWriteResPair<WriteFRsqrtY,[BWPort0,BWPort015], 11, [2,1], 3, 6>; // Floating point reciprocal square root estimate (YMM/ZMM).
+
defm : BWWriteResPair<WriteFMA, [BWPort01], 5, [1], 1, 5>; // Fused Multiply Add.
defm : BWWriteResPair<WriteFMAX, [BWPort01], 5, [1], 1, 5>; // Fused Multiply Add (XMM).
defm : BWWriteResPair<WriteFMAY, [BWPort01], 5, [1], 1, 6>; // Fused Multiply Add (YMM/ZMM).
@@ -1401,14 +1416,6 @@ def BWWriteResGroup123 : SchedWriteRes<[
def: InstRW<[BWWriteResGroup123], (instregex "MUL_F(32|64)m",
"VPCMPGTQYrm")>;
-def BWWriteResGroup126 : SchedWriteRes<[BWPort0,BWPort015]> {
- let Latency = 11;
- let NumMicroOps = 3;
- let ResourceCycles = [2,1];
-}
-def: InstRW<[BWWriteResGroup126], (instregex "VRCPPSYr",
- "VRSQRTPSYr")>;
-
def BWWriteResGroup128 : SchedWriteRes<[BWPort1,BWPort5,BWPort23]> {
let Latency = 11;
let NumMicroOps = 3;
@@ -1454,20 +1461,6 @@ def BWWriteResGroup135 : SchedWriteRes<[
}
def: InstRW<[BWWriteResGroup135], (instregex "(ADD|SUB|SUBR)_FI(16|32)m")>;
-def BWWriteResGroup137 : SchedWriteRes<[BWPort0,BWFPDivider]> {
- let Latency = 11;
- let NumMicroOps = 1;
- let ResourceCycles = [1,7];
-}
-def: InstRW<[BWWriteResGroup137], (instregex "(V?)SQRTPSr")>;
-
-def BWWriteResGroup137_1 : SchedWriteRes<[BWPort0,BWFPDivider]> {
- let Latency = 11;
- let NumMicroOps = 1;
- let ResourceCycles = [1,4];
-}
-def: InstRW<[BWWriteResGroup137_1], (instregex "(V?)SQRTSSr")>;
-
def BWWriteResGroup139 : SchedWriteRes<[BWPort0,BWFPDivider]> {
let Latency = 14;
let NumMicroOps = 1;
@@ -1555,22 +1548,6 @@ def BWWriteResGroup155 : SchedWriteRes<[
}
def: InstRW<[BWWriteResGroup155], (instregex "VDIVPSYrr")>;
-def BWWriteResGroup156 : SchedWriteRes<[BWPort0,BWPort23,BWPort015]> {
- let Latency = 17;
- let NumMicroOps = 4;
- let ResourceCycles = [2,1,1];
-}
-def: InstRW<[BWWriteResGroup156], (instregex "VRCPPSYm",
- "VRSQRTPSYm")>;
-
-def BWWriteResGroup157 : SchedWriteRes<[BWPort0,BWPort23,BWFPDivider]> {
- let Latency = 16;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1,7];
-}
-def: InstRW<[BWWriteResGroup157], (instregex "(V?)SQRTPSm",
- "(V?)SQRTSSm")>;
-
def BWWriteResGroup159 : SchedWriteRes<[BWPort5,BWPort6,BWPort06,BWPort0156]> {
let Latency = 18;
let NumMicroOps = 8;
@@ -1610,20 +1587,6 @@ def BWWriteResGroup167 : SchedWriteRes<[
}
def: InstRW<[BWWriteResGroup167], (instrs INSB, INSL, INSW)>;
-def BWWriteResGroup168 : SchedWriteRes<[BWPort0,BWFPDivider]> {
- let Latency = 16;
- let NumMicroOps = 1;
- let ResourceCycles = [1,14];
-}
-def: InstRW<[BWWriteResGroup168], (instregex "(V?)SQRTPDr")>;
-
-def BWWriteResGroup168_1 : SchedWriteRes<[BWPort0,BWFPDivider]> {
- let Latency = 16;
- let NumMicroOps = 1;
- let ResourceCycles = [1,8];
-}
-def: InstRW<[BWWriteResGroup168_1], (instregex "(V?)SQRTSDr")>;
-
def BWWriteResGroup169 : SchedWriteRes<[BWPort0,BWPort23]> {
let Latency = 21;
let NumMicroOps = 2;
@@ -1631,13 +1594,6 @@ def BWWriteResGroup169 : SchedWriteRes<[
}
def: InstRW<[BWWriteResGroup169], (instregex "DIV_F(32|64)m")>;
-def BWWriteResGroup170 : SchedWriteRes<[BWPort0,BWPort015,BWFPDivider]> {
- let Latency = 21;
- let NumMicroOps = 3;
- let ResourceCycles = [2,1,14];
-}
-def: InstRW<[BWWriteResGroup170], (instregex "VSQRTPSYr")>;
-
def BWWriteResGroup171 : SchedWriteRes<[BWPort0,BWPort4,BWPort5,BWPort23,BWPort237,BWPort06,BWPort0156]> {
let Latency = 21;
let NumMicroOps = 19;
@@ -1680,14 +1636,6 @@ def BWWriteResGroup177 : SchedWriteRes<[
}
def: InstRW<[BWWriteResGroup177], (instregex "DIV_FI(16|32)m")>;
-def BWWriteResGroup179 : SchedWriteRes<[BWPort0,BWPort23,BWFPDivider]> {
- let Latency = 21;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1,14];
-}
-def: InstRW<[BWWriteResGroup179], (instregex "(V?)SQRTPDm",
- "(V?)SQRTSDm")>;
-
def BWWriteResGroup180 : SchedWriteRes<[BWPort0,BWPort23]> {
let Latency = 26;
let NumMicroOps = 2;
@@ -1695,13 +1643,6 @@ def BWWriteResGroup180 : SchedWriteRes<[
}
def: InstRW<[BWWriteResGroup180], (instregex "DIVR_F(32|64)m")>;
-def BWWriteResGroup181 : SchedWriteRes<[BWPort0,BWPort23,BWPort015,BWFPDivider]> {
- let Latency = 27;
- let NumMicroOps = 4;
- let ResourceCycles = [2,1,1,14];
-}
-def: InstRW<[BWWriteResGroup181], (instregex "VSQRTPSYm")>;
-
def BWWriteResGroup182 : SchedWriteRes<[BWPort0,BWPort1,BWPort23]> {
let Latency = 29;
let NumMicroOps = 3;
@@ -1780,13 +1721,6 @@ def BWWriteResGroup186 : SchedWriteRes<[
}
def: InstRW<[BWWriteResGroup186], (instregex "^XSAVE$", "XSAVEC", "XSAVES", "XSAVEOPT")>;
-def BWWriteResGroup189 : SchedWriteRes<[BWPort0,BWPort015,BWFPDivider]> {
- let Latency = 29;
- let NumMicroOps = 3;
- let ResourceCycles = [2,1,28];
-}
-def: InstRW<[BWWriteResGroup189], (instregex "VSQRTPDYr")>;
-
def BWWriteResGroup190 : SchedWriteRes<[BWPort0,BWPort1,BWPort5,BWPort23,BWPort0156]> {
let Latency = 34;
let NumMicroOps = 8;
@@ -1817,13 +1751,6 @@ def BWWriteResGroup194 : SchedWriteRes<[
def: InstRW<[BWWriteResGroup194], (instregex "OUT(8|16|32)ir",
"OUT(8|16|32)rr")>;
-def BWWriteResGroup195 : SchedWriteRes<[BWPort0,BWPort23,BWPort015,BWFPDivider]> {
- let Latency = 35;
- let NumMicroOps = 4;
- let ResourceCycles = [2,1,1,28];
-}
-def: InstRW<[BWWriteResGroup195], (instregex "VSQRTPDYm")>;
-
def BWWriteResGroup196 : SchedWriteRes<[BWPort5,BWPort0156]> {
let Latency = 42;
let NumMicroOps = 22;
Modified: llvm/trunk/lib/Target/X86/X86SchedHaswell.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedHaswell.td?rev=331629&r1=331628&r2=331629&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedHaswell.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedHaswell.td Mon May 7 04:50:44 2018
@@ -158,12 +158,25 @@ defm : HWWriteResPair<WriteFMul, [HWPor
defm : HWWriteResPair<WriteFMulY, [HWPort01], 5, [1], 1, 7>;
defm : HWWriteResPair<WriteFDiv, [HWPort0], 12, [1], 1, 5>; // 10-14 cycles.
defm : HWWriteResPair<WriteFDivY, [HWPort0], 12, [1], 1, 7>; // 10-14 cycles.
+
defm : HWWriteResPair<WriteFRcp, [HWPort0], 5, [1], 1, 5>;
-defm : HWWriteResPair<WriteFRcpY, [HWPort0], 5, [1], 1, 7>;
+defm : HWWriteResPair<WriteFRcpX, [HWPort0], 5, [1], 1, 6>;
+defm : HWWriteResPair<WriteFRcpY, [HWPort0,HWPort015], 11, [2,1], 3, 7>;
+
defm : HWWriteResPair<WriteFRsqrt, [HWPort0], 5, [1], 1, 5>;
-defm : HWWriteResPair<WriteFRsqrtY,[HWPort0], 5, [1], 1, 7>;
-defm : HWWriteResPair<WriteFSqrt, [HWPort0], 15, [1], 1, 5>;
-defm : HWWriteResPair<WriteFSqrtY, [HWPort0], 15, [1], 1, 7>;
+defm : HWWriteResPair<WriteFRsqrtX,[HWPort0], 5, [1], 1, 6>;
+defm : HWWriteResPair<WriteFRsqrtY,[HWPort0,HWPort015], 11, [2,1], 3, 7>;
+
+defm : HWWriteResPair<WriteFSqrt, [HWPort0,HWFPDivider], 11, [1,7], 1, 5>;
+defm : HWWriteResPair<WriteFSqrtX, [HWPort0,HWFPDivider], 11, [1,7], 1, 6>;
+defm : HWWriteResPair<WriteFSqrtY, [HWPort0,HWPort15,HWFPDivider], 21, [2,1,14], 3, 7>;
+defm : HWWriteResPair<WriteFSqrtZ, [HWPort0,HWPort15,HWFPDivider], 21, [2,1,14], 3, 7>;
+defm : HWWriteResPair<WriteFSqrt64, [HWPort0,HWFPDivider], 16, [1,14], 1, 5>;
+defm : HWWriteResPair<WriteFSqrt64X, [HWPort0,HWFPDivider], 16, [1,14], 1, 6>;
+defm : HWWriteResPair<WriteFSqrt64Y, [HWPort0,HWPort15,HWFPDivider], 35, [2,1,28], 3, 7>;
+defm : HWWriteResPair<WriteFSqrt64Z, [HWPort0,HWPort15,HWFPDivider], 35, [2,1,28], 3, 7>;
+defm : HWWriteResPair<WriteFSqrt80, [HWPort0,HWFPDivider], 23, [1,17]>;
+
defm : HWWriteResPair<WriteCvtF2I, [HWPort1], 3>;
defm : HWWriteResPair<WriteCvtI2F, [HWPort1], 4>;
defm : HWWriteResPair<WriteCvtF2F, [HWPort1], 3>;
@@ -1639,13 +1652,6 @@ def: InstRW<[HWWriteResGroup89], (instre
"MUL_FST0r",
"MUL_FrST0")>;
-def HWWriteResGroup91_1 : SchedWriteRes<[HWPort0,HWPort23,HWFPDivider]> {
- let Latency = 16;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1,7];
-}
-def: InstRW<[HWWriteResGroup91_1], (instregex "(V?)SQRTSSm")>;
-
def HWWriteResGroup91_4 : SchedWriteRes<[HWPort0,HWPort23,HWFPDivider]> {
let Latency = 18;
let NumMicroOps = 2;
@@ -1658,9 +1664,7 @@ def HWWriteResGroup91_2 : SchedWriteRes<
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[HWWriteResGroup91_2], (instregex "(V?)PCMPGTQrm",
- "(V?)RCPPSm",
- "(V?)RSQRTPSm")>;
+def: InstRW<[HWWriteResGroup91_2], (instregex "(V?)PCMPGTQrm")>;
def HWWriteResGroup91_3 : SchedWriteRes<[HWPort0,HWPort23]> {
let Latency = 12;
@@ -1832,22 +1836,6 @@ def HWWriteResGroup121 : SchedWriteRes<[
def: InstRW<[HWWriteResGroup121], (instregex "(V?)DIVPSrr",
"(V?)DIVSSrr")>;
-def HWWriteResGroup125 : SchedWriteRes<[HWPort0,HWPort015]> {
- let Latency = 11;
- let NumMicroOps = 3;
- let ResourceCycles = [2,1];
-}
-def: InstRW<[HWWriteResGroup125], (instregex "VRCPPSYr",
- "VRSQRTPSYr")>;
-
-def HWWriteResGroup128 : SchedWriteRes<[HWPort0,HWPort23,HWPort015]> {
- let Latency = 18;
- let NumMicroOps = 4;
- let ResourceCycles = [2,1,1];
-}
-def: InstRW<[HWWriteResGroup128], (instregex "VRCPPSYm",
- "VRSQRTPSYm")>;
-
def HWWriteResGroup129 : SchedWriteRes<[HWPort1,HWPort06,HWPort0156]> {
let Latency = 11;
let NumMicroOps = 7;
@@ -1877,14 +1865,6 @@ def HWWriteResGroup132 : SchedWriteRes<[
}
def: InstRW<[HWWriteResGroup132], (instregex "CMPXCHG8B")>;
-def HWWriteResGroup133 : SchedWriteRes<[HWPort0,HWFPDivider]> {
- let Latency = 11;
- let NumMicroOps = 1;
- let ResourceCycles = [1,7];
-}
-def: InstRW<[HWWriteResGroup133], (instregex "(V?)SQRTPSr",
- "(V?)SQRTSSr")>;
-
def HWWriteResGroup134 : SchedWriteRes<[HWPort0,HWPort23,HWFPDivider]> {
let Latency = 19;
let NumMicroOps = 2;
@@ -1899,13 +1879,6 @@ def HWWriteResGroup135 : SchedWriteRes<[
}
def: InstRW<[HWWriteResGroup135], (instregex "RCR(8|16|32|64)mCL")>;
-def HWWriteResGroup138 : SchedWriteRes<[HWPort0,HWPort23,HWFPDivider]> {
- let Latency = 17;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1,7];
-}
-def: InstRW<[HWWriteResGroup138], (instregex "(V?)SQRTPSm")>;
-
def HWWriteResGroup142 : SchedWriteRes<[HWPort1,HWPort06,HWPort15,HWPort0156]> {
let Latency = 14;
let NumMicroOps = 10;
@@ -1994,20 +1967,6 @@ def HWWriteResGroup155_1 : SchedWriteRes
}
def: InstRW<[HWWriteResGroup155_1], (instregex "(V?)DIVPDrm")>;
-def HWWriteResGroup155_2 : SchedWriteRes<[HWPort0,HWPort23,HWFPDivider]> {
- let Latency = 21;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1,14];
-}
-def: InstRW<[HWWriteResGroup155_2], (instregex "(V?)SQRTSDm")>;
-
-def HWWriteResGroup155_3 : SchedWriteRes<[HWPort0,HWPort23,HWFPDivider]> {
- let Latency = 22;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1,14];
-}
-def: InstRW<[HWWriteResGroup155_3], (instregex "(V?)SQRTPDm")>;
-
def HWWriteResGroup155_4 : SchedWriteRes<[HWPort0,HWPort23,HWFPDivider]> {
let Latency = 25;
let NumMicroOps = 2;
@@ -2022,29 +1981,19 @@ def HWWriteResGroup156 : SchedWriteRes<[
}
def: InstRW<[HWWriteResGroup156], (instregex "MWAITrr")>;
-def HWWriteResGroup157 : SchedWriteRes<[HWPort0,HWFPDivider]> {
- let Latency = 16;
- let NumMicroOps = 1;
- let ResourceCycles = [1,14];
-}
-def: InstRW<[HWWriteResGroup157], (instregex "(V?)SQRTPDr",
- "(V?)SQRTSDr")>;
-
def HWWriteResGroup159 : SchedWriteRes<[HWPort0,HWPort15,HWFPDivider]> {
let Latency = 21;
let NumMicroOps = 3;
let ResourceCycles = [2,1,14];
}
-def: InstRW<[HWWriteResGroup159], (instregex "VDIVPSYrr",
- "VSQRTPSYr")>;
+def: InstRW<[HWWriteResGroup159], (instregex "VDIVPSYrr")>;
def HWWriteResGroup160 : SchedWriteRes<[HWPort0,HWPort23,HWPort15,HWFPDivider]> {
let Latency = 28;
let NumMicroOps = 4;
let ResourceCycles = [2,1,1,14];
}
-def: InstRW<[HWWriteResGroup160], (instregex "VDIVPSYrm",
- "VSQRTPSYm")>;
+def: InstRW<[HWWriteResGroup160], (instregex "VDIVPSYrm")>;
def HWWriteResGroup161 : SchedWriteRes<[HWPort0,HWPort1,HWPort23]> {
let Latency = 30;
@@ -2111,16 +2060,14 @@ def HWWriteResGroup173 : SchedWriteRes<[
let NumMicroOps = 3;
let ResourceCycles = [2,1,28];
}
-def: InstRW<[HWWriteResGroup173], (instregex "VDIVPDYrr",
- "VSQRTPDYr")>;
+def: InstRW<[HWWriteResGroup173], (instregex "VDIVPDYrr")>;
def HWWriteResGroup174 : SchedWriteRes<[HWPort0,HWPort23,HWPort15,HWFPDivider]> {
let Latency = 42;
let NumMicroOps = 4;
let ResourceCycles = [2,1,1,28];
}
-def: InstRW<[HWWriteResGroup174], (instregex "VDIVPDYrm",
- "VSQRTPDYm")>;
+def: InstRW<[HWWriteResGroup174], (instregex "VDIVPDYrm")>;
def HWWriteResGroup175 : SchedWriteRes<[HWPort1,HWPort4,HWPort5,HWPort6,HWPort23,HWPort237,HWPort15,HWPort0156]> {
let Latency = 41;
Modified: llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td?rev=331629&r1=331628&r2=331629&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td Mon May 7 04:50:44 2018
@@ -148,12 +148,25 @@ defm : SBWriteResPair<WriteFMul, [SBPo
defm : SBWriteResPair<WriteFMulY, [SBPort0], 5, [1], 1, 7>;
defm : SBWriteResPair<WriteFDiv, [SBPort0], 24, [1], 1, 5>;
defm : SBWriteResPair<WriteFDivY, [SBPort0], 24, [1], 1, 7>;
+
defm : SBWriteResPair<WriteFRcp, [SBPort0], 5, [1], 1, 6>;
-defm : SBWriteResPair<WriteFRcpY, [SBPort0], 5, [1], 1, 7>;
+defm : SBWriteResPair<WriteFRcpX, [SBPort0], 5, [1], 1, 6>;
+defm : SBWriteResPair<WriteFRcpY, [SBPort0,SBPort05], 7, [2,1], 3, 7>;
+
defm : SBWriteResPair<WriteFRsqrt, [SBPort0], 5, [1], 1, 6>;
-defm : SBWriteResPair<WriteFRsqrtY,[SBPort0], 5, [1], 1, 7>;
-defm : SBWriteResPair<WriteFSqrt, [SBPort0], 14, [1], 1, 5>;
-defm : SBWriteResPair<WriteFSqrtY, [SBPort0], 14, [1], 1, 7>;
+defm : SBWriteResPair<WriteFRsqrtX,[SBPort0], 5, [1], 1, 6>;
+defm : SBWriteResPair<WriteFRsqrtY,[SBPort0,SBPort05], 7, [2,1], 3, 7>;
+
+defm : SBWriteResPair<WriteFSqrt, [SBPort0,SBFPDivider], 14, [1,14], 1, 6>;
+defm : SBWriteResPair<WriteFSqrtX, [SBPort0,SBFPDivider], 14, [1,14], 1, 6>;
+defm : SBWriteResPair<WriteFSqrtY, [SBPort0,SBPort05,SBFPDivider], 29, [2,1,28], 3, 7>;
+defm : SBWriteResPair<WriteFSqrtZ, [SBPort0,SBPort05,SBFPDivider], 29, [2,1,28], 3, 7>;
+defm : SBWriteResPair<WriteFSqrt64, [SBPort0,SBFPDivider], 21, [1,21], 1, 6>;
+defm : SBWriteResPair<WriteFSqrt64X, [SBPort0,SBFPDivider], 21, [1,21], 1, 6>;
+defm : SBWriteResPair<WriteFSqrt64Y, [SBPort0,SBPort05,SBFPDivider], 45, [2,1,44], 3, 7>;
+defm : SBWriteResPair<WriteFSqrt64Z, [SBPort0,SBPort05,SBFPDivider], 45, [2,1,44], 3, 7>;
+defm : SBWriteResPair<WriteFSqrt80, [SBPort0,SBFPDivider], 24, [1,24], 1, 6>;
+
defm : SBWriteResPair<WriteDPPD, [SBPort0,SBPort1,SBPort5], 9, [1,1,1], 3, 6>;
defm : SBWriteResPair<WriteDPPS, [SBPort0,SBPort1,SBPort5], 12, [1,2,1], 4, 6>;
defm : SBWriteResPair<WriteDPPSY, [SBPort0,SBPort1,SBPort5], 12, [1,2,1], 4, 7>;
@@ -951,14 +964,6 @@ def: InstRW<[SBWriteResGroup59a], (instr
"MMX_P(MAX|MIN)(SW|UB)irm",
"MMX_PSUB(B|D|Q|W)irm")>;
-def SBWriteResGroup61 : SchedWriteRes<[SBPort0,SBPort05]> {
- let Latency = 7;
- let NumMicroOps = 3;
- let ResourceCycles = [2,1];
-}
-def: InstRW<[SBWriteResGroup61], (instregex "VRCPPSYr",
- "VRSQRTPSYr")>;
-
def SBWriteResGroup62 : SchedWriteRes<[SBPort5,SBPort23]> {
let Latency = 7;
let NumMicroOps = 3;
@@ -1361,18 +1366,8 @@ def SBWriteResGroup116 : SchedWriteRes<[
let NumMicroOps = 1;
let ResourceCycles = [1,14];
}
-def: InstRW<[SBWriteResGroup116], (instregex "(V?)SQRTSSr",
- "(V?)DIVPSrr",
- "(V?)DIVSSrr",
- "(V?)SQRTPSr")>;
-
-def SBWriteResGroup118 : SchedWriteRes<[SBPort0,SBPort23,SBPort05]> {
- let Latency = 14;
- let NumMicroOps = 4;
- let ResourceCycles = [2,1,1];
-}
-def: InstRW<[SBWriteResGroup118], (instregex "VRCPPSYm",
- "VRSQRTPSYm")>;
+def: InstRW<[SBWriteResGroup116], (instregex "(V?)DIVPSrr",
+ "(V?)DIVSSrr")>;
def SBWriteResGroup119 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> {
let Latency = 15;
@@ -1386,26 +1381,8 @@ def SBWriteResGroup123 : SchedWriteRes<[
let NumMicroOps = 2;
let ResourceCycles = [1,1,14];
}
-def: InstRW<[SBWriteResGroup123], (instregex "(V?)SQRTSSm",
- "(V?)DIVPSrm",
- "(V?)DIVSSrm",
- "(V?)SQRTPSm")>;
-
-def SBWriteResGroup124 : SchedWriteRes<[SBPort0,SBFPDivider]> {
- let Latency = 21;
- let NumMicroOps = 1;
- let ResourceCycles = [1,21];
-}
-def: InstRW<[SBWriteResGroup124], (instregex "(V?)SQRTPDr",
- "(V?)SQRTSDr")>;
-
-def SBWriteResGroup125 : SchedWriteRes<[SBPort0,SBPort23,SBFPDivider]> {
- let Latency = 27;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1,21];
-}
-def: InstRW<[SBWriteResGroup125], (instregex "(V?)SQRTPDm",
- "(V?)SQRTSDm")>;
+def: InstRW<[SBWriteResGroup123], (instregex "(V?)DIVPSrm",
+ "(V?)DIVSSrm")>;
def SBWriteResGroup126 : SchedWriteRes<[SBPort0,SBFPDivider]> {
let Latency = 22;
@@ -1428,8 +1405,7 @@ def SBWriteResGroup129 : SchedWriteRes<[
let NumMicroOps = 3;
let ResourceCycles = [2,1,28];
}
-def: InstRW<[SBWriteResGroup129], (instregex "VDIVPSYrr",
- "VSQRTPSYr")>;
+def: InstRW<[SBWriteResGroup129], (instregex "VDIVPSYrr")>;
def SBWriteResGroup130 : SchedWriteRes<[SBPort0,SBPort23]> {
let Latency = 31;
@@ -1450,23 +1426,20 @@ def SBWriteResGroup132 : SchedWriteRes<[
let NumMicroOps = 4;
let ResourceCycles = [2,1,1,28];
}
-def: InstRW<[SBWriteResGroup132], (instregex "VDIVPSYrm",
- "VSQRTPSYm")>;
+def: InstRW<[SBWriteResGroup132], (instregex "VDIVPSYrm")>;
def SBWriteResGroup133 : SchedWriteRes<[SBPort0,SBPort05,SBFPDivider]> {
let Latency = 45;
let NumMicroOps = 3;
let ResourceCycles = [2,1,44];
}
-def: InstRW<[SBWriteResGroup133], (instregex "VDIVPDYrr",
- "VSQRTPDYr")>;
+def: InstRW<[SBWriteResGroup133], (instregex "VDIVPDYrr")>;
def SBWriteResGroup134 : SchedWriteRes<[SBPort0,SBPort23,SBPort05,SBFPDivider]> {
let Latency = 52;
let NumMicroOps = 4;
let ResourceCycles = [2,1,1,44];
}
-def: InstRW<[SBWriteResGroup134], (instregex "VDIVPDYrm",
- "VSQRTPDYm")>;
+def: InstRW<[SBWriteResGroup134], (instregex "VDIVPDYrm")>;
} // SchedModel
Modified: llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td?rev=331629&r1=331628&r2=331629&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td Mon May 7 04:50:44 2018
@@ -161,12 +161,25 @@ defm : SKLWriteResPair<WriteFMul, [SKLP
defm : SKLWriteResPair<WriteFMulY, [SKLPort01], 4, [1], 1, 7>; // Floating point multiplication (YMM/ZMM).
defm : SKLWriteResPair<WriteFDiv, [SKLPort0], 12, [1], 1, 5>; // 10-14 cycles. // Floating point division.
defm : SKLWriteResPair<WriteFDivY, [SKLPort0], 12, [1], 1, 7>; // 10-14 cycles. // Floating point division (YMM/ZMM).
-defm : SKLWriteResPair<WriteFSqrt, [SKLPort0], 15, [1], 1, 5>; // Floating point square root.
-defm : SKLWriteResPair<WriteFSqrtY, [SKLPort0], 15, [1], 1, 7>; // Floating point square root (YMM/ZMM).
+
+defm : SKLWriteResPair<WriteFSqrt, [SKLPort0,SKLFPDivider], 12, [1,3], 1, 5>; // Floating point square root.
+defm : SKLWriteResPair<WriteFSqrtX, [SKLPort0,SKLFPDivider], 12, [1,3], 1, 6>; // Floating point square root (XMM).
+defm : SKLWriteResPair<WriteFSqrtY, [SKLPort0,SKLFPDivider], 12, [1,6], 1, 7>; // Floating point square root (YMM).
+defm : SKLWriteResPair<WriteFSqrtZ, [SKLPort0,SKLFPDivider], 12, [1,6], 1, 7>; // Floating point square root (ZMM).
+defm : SKLWriteResPair<WriteFSqrt64, [SKLPort0,SKLFPDivider], 18, [1,6], 1, 5>; // Floating point double square root.
+defm : SKLWriteResPair<WriteFSqrt64X, [SKLPort0,SKLFPDivider], 18, [1,6], 1, 6>; // Floating point double square root (XMM).
+defm : SKLWriteResPair<WriteFSqrt64Y, [SKLPort0,SKLFPDivider], 18, [1,12],1, 7>; // Floating point double square root (YMM).
+defm : SKLWriteResPair<WriteFSqrt64Z, [SKLPort0,SKLFPDivider], 18, [1,12],1, 7>; // Floating point double square root (ZMM).
+defm : SKLWriteResPair<WriteFSqrt80, [SKLPort0,SKLFPDivider], 21, [1,7]>; // Floating point long double square root.
+
defm : SKLWriteResPair<WriteFRcp, [SKLPort0], 4, [1], 1, 5>; // Floating point reciprocal estimate.
-defm : SKLWriteResPair<WriteFRcpY, [SKLPort0], 4, [1], 1, 5>; // Floating point reciprocal estimate (YMM/ZMM).
+defm : SKLWriteResPair<WriteFRcpX, [SKLPort0], 4, [1], 1, 6>; // Floating point reciprocal estimate (XMM).
+defm : SKLWriteResPair<WriteFRcpY, [SKLPort0], 4, [1], 1, 7>; // Floating point reciprocal estimate (YMM/ZMM).
+
defm : SKLWriteResPair<WriteFRsqrt, [SKLPort0], 4, [1], 1, 5>; // Floating point reciprocal square root estimate.
-defm : SKLWriteResPair<WriteFRsqrtY,[SKLPort0], 4, [1], 1, 5>; // Floating point reciprocal square root estimate (YMM/ZMM).
+defm : SKLWriteResPair<WriteFRsqrtX,[SKLPort0], 4, [1], 1, 6>; // Floating point reciprocal square root estimate (XMM).
+defm : SKLWriteResPair<WriteFRsqrtY,[SKLPort0], 4, [1], 1, 7>; // Floating point reciprocal square root estimate (YMM/ZMM).
+
defm : SKLWriteResPair<WriteFMA, [SKLPort01], 4, [1], 1, 5>; // Fused Multiply Add.
defm : SKLWriteResPair<WriteFMAX, [SKLPort01], 4, [1], 1, 6>; // Fused Multiply Add (XMM).
defm : SKLWriteResPair<WriteFMAY, [SKLPort01], 4, [1], 1, 7>; // Fused Multiply Add (YMM/ZMM).
@@ -1531,14 +1544,6 @@ def SKLWriteResGroup131 : SchedWriteRes<
def: InstRW<[SKLWriteResGroup131], (instregex "LAR(16|32|64)rm",
"LSL(16|32|64)rm")>;
-def SKLWriteResGroup132 : SchedWriteRes<[SKLPort0,SKLPort23]> {
- let Latency = 10;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[SKLWriteResGroup132], (instregex "(V?)RCPPSm",
- "(V?)RSQRTPSm")>;
-
def SKLWriteResGroup133 : SchedWriteRes<[SKLPort5,SKLPort23]> {
let Latency = 10;
let NumMicroOps = 2;
@@ -1621,9 +1626,7 @@ def SKLWriteResGroup146 : SchedWriteRes<
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SKLWriteResGroup146], (instregex "MUL_F(32|64)m",
- "VRCPPSYm",
- "VRSQRTPSYm")>;
+def: InstRW<[SKLWriteResGroup146], (instregex "MUL_F(32|64)m")>;
def SKLWriteResGroup147 : SchedWriteRes<[SKLPort01,SKLPort23]> {
let Latency = 11;
@@ -1707,21 +1710,6 @@ def SKLWriteResGroup156 : SchedWriteRes<
}
def: InstRW<[SKLWriteResGroup156], (instrs LOOPE, LOOPNE)>;
-def SKLWriteResGroup157 : SchedWriteRes<[SKLPort0,SKLFPDivider]> {
- let Latency = 12;
- let NumMicroOps = 1;
- let ResourceCycles = [1,3];
-}
-def: InstRW<[SKLWriteResGroup157], (instregex "(V?)SQRTPSr",
- "(V?)SQRTSSr")>;
-
-def SKLWriteResGroup158 : SchedWriteRes<[SKLPort0,SKLFPDivider]> {
- let Latency = 12;
- let NumMicroOps = 1;
- let ResourceCycles = [1,6];
-}
-def: InstRW<[SKLWriteResGroup158], (instregex "VSQRTPSYr")>;
-
def SKLWriteResGroup160 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23,SKLPort01]> {
let Latency = 12;
let NumMicroOps = 4;
@@ -1816,13 +1804,6 @@ def SKLWriteResGroup179 : SchedWriteRes<
}
def: InstRW<[SKLWriteResGroup179], (instregex "(V?)DIVPSrm")>;
-def SKLWriteResGroup179_1 : SchedWriteRes<[SKLPort0,SKLPort23,SKLFPDivider]> {
- let Latency = 17;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1,3];
-}
-def: InstRW<[SKLWriteResGroup179_1], (instregex "(V?)SQRTSSm")>;
-
def SKLWriteResGroup180 : SchedWriteRes<[SKLPort0,SKLPort1,SKLPort5,SKLPort6,SKLPort05,SKLPort0156]> {
let Latency = 17;
let NumMicroOps = 15;
@@ -1830,21 +1811,6 @@ def SKLWriteResGroup180 : SchedWriteRes<
}
def: InstRW<[SKLWriteResGroup180], (instrs XCH_F)>;
-def SKLWriteResGroup181 : SchedWriteRes<[SKLPort0,SKLFPDivider]> {
- let Latency = 18;
- let NumMicroOps = 1;
- let ResourceCycles = [1,6];
-}
-def: InstRW<[SKLWriteResGroup181], (instregex "(V?)SQRTPDr",
- "(V?)SQRTSDr")>;
-
-def SKLWriteResGroup181_1 : SchedWriteRes<[SKLPort0,SKLFPDivider]> {
- let Latency = 18;
- let NumMicroOps = 1;
- let ResourceCycles = [1,12];
-}
-def: InstRW<[SKLWriteResGroup181_1], (instregex "VSQRTPDYr")>;
-
def SKLWriteResGroup182 : SchedWriteRes<[SKLPort0,SKLPort23,SKLFPDivider]> {
let Latency = 18;
let NumMicroOps = 2;
@@ -1852,13 +1818,6 @@ def SKLWriteResGroup182 : SchedWriteRes<
}
def: InstRW<[SKLWriteResGroup182], (instregex "VDIVPSYrm")>;
-def SKLWriteResGroup183 : SchedWriteRes<[SKLPort0,SKLPort23,SKLFPDivider]> {
- let Latency = 18;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1,3];
-}
-def: InstRW<[SKLWriteResGroup183], (instregex "(V?)SQRTPSm")>;
-
def SKLWriteResGroup184 : SchedWriteRes<[SKLPort5,SKLPort6,SKLPort06,SKLPort0156]> {
let Latency = 18;
let NumMicroOps = 8;
@@ -1880,13 +1839,6 @@ def SKLWriteResGroup186 : SchedWriteRes<
}
def: InstRW<[SKLWriteResGroup186], (instregex "(V?)DIVSDrm")>;
-def SKLWriteResGroup186_1 : SchedWriteRes<[SKLPort0,SKLPort23,SKLFPDivider]> {
- let Latency = 19;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1,6];
-}
-def: InstRW<[SKLWriteResGroup186_1], (instregex "VSQRTPSYm")>;
-
def SKLWriteResGroup189 : SchedWriteRes<[SKLPort0]> {
let Latency = 20;
let NumMicroOps = 1;
@@ -1959,13 +1911,6 @@ def: InstRW<[SKLWriteResGroup196_2], (in
VPGATHERQQYrm,
VGATHERDPDYrm)>;
-def SKLWriteResGroup197 : SchedWriteRes<[SKLPort0,SKLPort23,SKLFPDivider]> {
- let Latency = 23;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1,6];
-}
-def: InstRW<[SKLWriteResGroup197], (instregex "(V?)SQRTSDm")>;
-
def SKLWriteResGroup198 : SchedWriteRes<[SKLPort0,SKLPort4,SKLPort5,SKLPort23,SKLPort237,SKLPort06,SKLPort0156]> {
let Latency = 23;
let NumMicroOps = 19;
@@ -1973,20 +1918,6 @@ def SKLWriteResGroup198 : SchedWriteRes<
}
def: InstRW<[SKLWriteResGroup198], (instregex "CMPXCHG16B")>;
-def SKLWriteResGroup199 : SchedWriteRes<[SKLPort0,SKLPort23,SKLFPDivider]> {
- let Latency = 24;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1,6];
-}
-def: InstRW<[SKLWriteResGroup199], (instregex "(V?)SQRTPDm")>;
-
-def SKLWriteResGroup201 : SchedWriteRes<[SKLPort0,SKLPort23,SKLFPDivider]> {
- let Latency = 25;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1,12];
-}
-def: InstRW<[SKLWriteResGroup201], (instregex "VSQRTPDYm")>;
-
def SKLWriteResGroup202 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> {
let Latency = 25;
let NumMicroOps = 3;
Modified: llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td?rev=331629&r1=331628&r2=331629&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td Mon May 7 04:50:44 2018
@@ -161,12 +161,25 @@ defm : SKXWriteResPair<WriteFMul, [SKXPo
defm : SKXWriteResPair<WriteFMulY,[SKXPort015], 4, [1], 1, 7>; // Floating point multiplication (YMM/ZMM).
defm : SKXWriteResPair<WriteFDiv, [SKXPort0], 12, [1], 1, 5>; // 10-14 cycles. // Floating point division.
defm : SKXWriteResPair<WriteFDivY, [SKXPort0], 12, [1], 1, 7>; // 10-14 cycles. // Floating point division (YMM/ZMM).
-defm : SKXWriteResPair<WriteFSqrt, [SKXPort0], 15, [1], 1, 5>; // Floating point square root.
-defm : SKXWriteResPair<WriteFSqrtY, [SKXPort0], 15, [1], 1, 7>; // Floating point square root (YMM/ZMM).
-defm : SKXWriteResPair<WriteFRcp, [SKXPort0], 4, [1], 1, 6>; // Floating point reciprocal estimate.
+
+defm : SKXWriteResPair<WriteFSqrt, [SKXPort0,SKXFPDivider], 12, [1,3], 1, 5>; // Floating point square root.
+defm : SKXWriteResPair<WriteFSqrtX, [SKXPort0,SKXFPDivider], 12, [1,3], 1, 6>; // Floating point square root (XMM).
+defm : SKXWriteResPair<WriteFSqrtY, [SKXPort0,SKXFPDivider], 12, [1,6], 1, 7>; // Floating point square root (YMM).
+defm : SKXWriteResPair<WriteFSqrtZ, [SKXPort0,SKXPort5,SKXFPDivider], 20, [2,1,12], 3, 7>; // Floating point square root (ZMM).
+defm : SKXWriteResPair<WriteFSqrt64, [SKXPort0,SKXFPDivider], 18, [1,6], 1, 5>; // Floating point double square root.
+defm : SKXWriteResPair<WriteFSqrt64X, [SKXPort0,SKXFPDivider], 18, [1,6], 1, 6>; // Floating point double square root (XMM).
+defm : SKXWriteResPair<WriteFSqrt64Y, [SKXPort0,SKXFPDivider], 18, [1,12],1, 7>; // Floating point double square root (YMM).
+defm : SKXWriteResPair<WriteFSqrt64Z, [SKXPort0,SKXPort5,SKXFPDivider], 32, [2,1,24], 3, 7>; // Floating point double square root (ZMM).
+defm : SKXWriteResPair<WriteFSqrt80, [SKXPort0,SKXFPDivider], 21, [1,7]>; // Floating point long double square root.
+
+defm : SKXWriteResPair<WriteFRcp, [SKXPort0], 4, [1], 1, 5>; // Floating point reciprocal estimate.
+defm : SKXWriteResPair<WriteFRcpX, [SKXPort0], 4, [1], 1, 6>; // Floating point reciprocal estimate (XMM).
defm : SKXWriteResPair<WriteFRcpY, [SKXPort0], 4, [1], 1, 7>; // Floating point reciprocal estimate (YMM/ZMM).
-defm : SKXWriteResPair<WriteFRsqrt, [SKXPort0], 4, [1], 1, 6>; // Floating point reciprocal square root estimate.
+
+defm : SKXWriteResPair<WriteFRsqrt, [SKXPort0], 4, [1], 1, 5>; // Floating point reciprocal square root estimate.
+defm : SKXWriteResPair<WriteFRsqrtX,[SKXPort0], 4, [1], 1, 6>; // Floating point reciprocal square root estimate (XMM).
defm : SKXWriteResPair<WriteFRsqrtY,[SKXPort0], 4, [1], 1, 7>; // Floating point reciprocal square root estimate (YMM/ZMM).
+
defm : SKXWriteResPair<WriteFMA, [SKXPort015], 4, [1], 1, 5>; // Fused Multiply Add.
defm : SKXWriteResPair<WriteFMAX, [SKXPort015], 4, [1], 1, 6>; // Fused Multiply Add (XMM).
defm : SKXWriteResPair<WriteFMAY, [SKXPort015], 4, [1], 1, 7>; // Fused Multiply Add (YMM/ZMM).
@@ -2388,10 +2401,6 @@ def SKXWriteResGroup135 : SchedWriteRes<
let ResourceCycles = [1,1];
}
def: InstRW<[SKXWriteResGroup135], (instregex "MMX_CVTPI2PSirm",
- "RCPSSm",
- "RSQRTSSm",
- "VRCPSSm",
- "VRSQRTSSm",
"VTESTPDYrm",
"VTESTPSYrm")>;
@@ -2877,21 +2886,6 @@ def SKXWriteResGroup171 : SchedWriteRes<
}
def: InstRW<[SKXWriteResGroup171], (instrs LOOPE, LOOPNE)>;
-def SKXWriteResGroup172 : SchedWriteRes<[SKXPort0,SKXFPDivider]> {
- let Latency = 12;
- let NumMicroOps = 1;
- let ResourceCycles = [1,3];
-}
-def: InstRW<[SKXWriteResGroup172], (instregex "(V?)SQRTPS(Z128)?r",
- "(V?)SQRTSS(Z?)r")>;
-
-def SKXWriteResGroup173 : SchedWriteRes<[SKXPort0,SKXFPDivider]> {
- let Latency = 12;
- let NumMicroOps = 1;
- let ResourceCycles = [1,6];
-}
-def: InstRW<[SKXWriteResGroup173], (instregex "VSQRTPS(Y|Z256)r")>;
-
def SKXWriteResGroup174 : SchedWriteRes<[SKXPort015]> {
let Latency = 12;
let NumMicroOps = 3;
@@ -3072,13 +3066,6 @@ def SKXWriteResGroup201 : SchedWriteRes<
}
def: InstRW<[SKXWriteResGroup201], (instregex "(V?)DIVPS(Z128)?rm")>;
-def SKXWriteResGroup201_1 : SchedWriteRes<[SKXPort0,SKXPort23,SKXFPDivider]> {
- let Latency = 17;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1,3];
-}
-def: InstRW<[SKXWriteResGroup201_1], (instregex "(V?)SQRTSS(Z?)m")>;
-
def SKXWriteResGroup202 : SchedWriteRes<[SKXPort0,SKXPort1,SKXPort5,SKXPort6,SKXPort05,SKXPort0156]> {
let Latency = 17;
let NumMicroOps = 15;
@@ -3086,21 +3073,6 @@ def SKXWriteResGroup202 : SchedWriteRes<
}
def: InstRW<[SKXWriteResGroup202], (instrs XCH_F)>;
-def SKXWriteResGroup203 : SchedWriteRes<[SKXPort0,SKXFPDivider]> {
- let Latency = 18;
- let NumMicroOps = 1;
- let ResourceCycles = [1,6];
-}
-def: InstRW<[SKXWriteResGroup203], (instregex "(V?)SQRTPD(Z128)?r",
- "(V?)SQRTSD(Z?)r")>;
-
-def SKXWriteResGroup203_1 : SchedWriteRes<[SKXPort0,SKXFPDivider]> {
- let Latency = 18;
- let NumMicroOps = 1;
- let ResourceCycles = [1,12];
-}
-def: InstRW<[SKXWriteResGroup203_1], (instregex "VSQRTPD(Y|Z256)r")>;
-
def SKXWriteResGroup204 : SchedWriteRes<[SKXPort0,SKXPort23,SKXFPDivider]> {
let Latency = 18;
let NumMicroOps = 2;
@@ -3108,13 +3080,6 @@ def SKXWriteResGroup204 : SchedWriteRes<
}
def: InstRW<[SKXWriteResGroup204], (instregex "VDIVPS(Y|Z256)rm")>;
-def SKXWriteResGroup204_1 : SchedWriteRes<[SKXPort0,SKXPort23,SKXFPDivider]> {
- let Latency = 18;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1,3];
-}
-def: InstRW<[SKXWriteResGroup204_1], (instregex "(V?)SQRTPS(Z128)?m")>;
-
def SKXWriteResGroup205 : SchedWriteRes<[SKXPort23,SKXPort015]> {
let Latency = 18;
let NumMicroOps = 4;
@@ -3143,20 +3108,6 @@ def SKXWriteResGroup209 : SchedWriteRes<
}
def: InstRW<[SKXWriteResGroup209], (instregex "(V?)DIVSD(Z?)rm")>;
-def SKXWriteResGroup209_1 : SchedWriteRes<[SKXPort0,SKXPort23,SKXFPDivider]> {
- let Latency = 19;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1,6];
-}
-def: InstRW<[SKXWriteResGroup209_1], (instregex "VSQRTPS(Y|Z256)m")>;
-
-def SKXWriteResGroup210 : SchedWriteRes<[SKXPort0,SKXPort5,SKXFPDivider]> {
- let Latency = 20;
- let NumMicroOps = 3;
- let ResourceCycles = [2,1,12];
-}
-def: InstRW<[SKXWriteResGroup210], (instregex "VSQRTPSZr")>;
-
def SKXWriteResGroup211 : SchedWriteRes<[SKXPort23,SKXPort015]> {
let Latency = 19;
let NumMicroOps = 4;
@@ -3287,13 +3238,6 @@ def SKXWriteResGroup225 : SchedWriteRes<
def: InstRW<[SKXWriteResGroup225], (instregex "VPCONFLICTDZ128rr",
"VPCONFLICTQZ256rr")>;
-def SKXWriteResGroup226 : SchedWriteRes<[SKXPort0,SKXPort23,SKXFPDivider]> {
- let Latency = 23;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1,6];
-}
-def: InstRW<[SKXWriteResGroup226], (instregex "(V?)SQRTSD(Z?)m")>;
-
def SKXWriteResGroup227 : SchedWriteRes<[SKXPort0,SKXPort5,SKXFPDivider]> {
let Latency = 23;
let NumMicroOps = 3;
@@ -3315,13 +3259,6 @@ def SKXWriteResGroup228 : SchedWriteRes<
}
def: InstRW<[SKXWriteResGroup228], (instregex "CMPXCHG16B")>;
-def SKXWriteResGroup229 : SchedWriteRes<[SKXPort0,SKXPort23,SKXFPDivider]> {
- let Latency = 24;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1,6];
-}
-def: InstRW<[SKXWriteResGroup229], (instregex "(V?)SQRTPD(Z128)?m")>;
-
def SKXWriteResGroup230 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort5,SKXFPDivider]> {
let Latency = 25;
let NumMicroOps = 4;
@@ -3329,13 +3266,6 @@ def SKXWriteResGroup230 : SchedWriteRes<
}
def: InstRW<[SKXWriteResGroup230], (instregex "VDIVPSZrm(b?)")>;
-def SKXWriteResGroup232 : SchedWriteRes<[SKXPort0,SKXPort23,SKXFPDivider]> {
- let Latency = 25;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1,12];
-}
-def: InstRW<[SKXWriteResGroup232], (instregex "VSQRTPD(Y|Z256)m")>;
-
def SKXWriteResGroup233 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> {
let Latency = 25;
let NumMicroOps = 3;
@@ -3354,13 +3284,6 @@ def: InstRW<[SKXWriteResGroup234], (inst
VPGATHERQDZrm,
VPGATHERQQZ256rm)>;
-def SKXWriteResGroup237 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort5,SKXFPDivider]> {
- let Latency = 27;
- let NumMicroOps = 4;
- let ResourceCycles = [2,1,1,12];
-}
-def: InstRW<[SKXWriteResGroup237], (instregex "VSQRTPSZm(b?)")>;
-
def SKXWriteResGroup238 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort0156]> {
let Latency = 26;
let NumMicroOps = 5;
@@ -3422,13 +3345,6 @@ def SKXWriteResGroup245 : SchedWriteRes<
def: InstRW<[SKXWriteResGroup245], (instrs VGATHERDPSZrm,
VPGATHERDDZrm)>;
-def SKXWriteResGroup246 : SchedWriteRes<[SKXPort0,SKXPort5,SKXFPDivider]> {
- let Latency = 32;
- let NumMicroOps = 3;
- let ResourceCycles = [2,1,24];
-}
-def: InstRW<[SKXWriteResGroup246], (instregex "VSQRTPDZr")>;
-
def SKXWriteResGroup247 : SchedWriteRes<[SKXPort5,SKXPort6,SKXPort23,SKXPort06,SKXPort0156]> {
let Latency = 35;
let NumMicroOps = 23;
@@ -3460,13 +3376,6 @@ def SKXWriteResGroup250 : SchedWriteRes<
}
def: InstRW<[SKXWriteResGroup250], (instregex "XRSTOR(64)?")>;
-def SKXWriteResGroup251 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort5,SKXFPDivider]> {
- let Latency = 39;
- let NumMicroOps = 4;
- let ResourceCycles = [2,1,1,24];
-}
-def: InstRW<[SKXWriteResGroup251], (instregex "VSQRTPDZm(b?)")>;
-
def SKXWriteResGroup252 : SchedWriteRes<[SKXPort1,SKXPort4,SKXPort5,SKXPort6,SKXPort23,SKXPort237,SKXPort15,SKXPort0156]> {
let Latency = 40;
let NumMicroOps = 18;
Modified: llvm/trunk/lib/Target/X86/X86Schedule.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Schedule.td?rev=331629&r1=331628&r2=331629&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86Schedule.td (original)
+++ llvm/trunk/lib/Target/X86/X86Schedule.td Mon May 7 04:50:44 2018
@@ -61,6 +61,13 @@ class X86SchedWriteWidths<X86FoldableSch
X86FoldableSchedWrite ZMM = s512; // ZMM operations.
}
+// Multiclass that wraps X86SchedWriteWidths for each fp vector type.
+class X86SchedWriteSizes<X86SchedWriteWidths sPS,
+ X86SchedWriteWidths sPD> {
+ X86SchedWriteWidths PS = sPS;
+ X86SchedWriteWidths PD = sPD;
+}
+
// Loads, stores, and moves, not folded with other operations.
def WriteLoad : SchedWrite;
def WriteStore : SchedWrite;
@@ -111,10 +118,19 @@ defm WriteFMulY : X86SchedWritePair; //
defm WriteFDiv : X86SchedWritePair; // Floating point division.
defm WriteFDivY : X86SchedWritePair; // Floating point division (YMM/ZMM).
defm WriteFSqrt : X86SchedWritePair; // Floating point square root.
-defm WriteFSqrtY : X86SchedWritePair; // Floating point square root (YMM/ZMM).
+defm WriteFSqrtX : X86SchedWritePair; // Floating point square root (XMM).
+defm WriteFSqrtY : X86SchedWritePair; // Floating point square root (YMM).
+defm WriteFSqrtZ : X86SchedWritePair; // Floating point square root (ZMM).
+defm WriteFSqrt64 : X86SchedWritePair; // Floating point double square root.
+defm WriteFSqrt64X : X86SchedWritePair; // Floating point double square root (XMM).
+defm WriteFSqrt64Y : X86SchedWritePair; // Floating point double square root (YMM).
+defm WriteFSqrt64Z : X86SchedWritePair; // Floating point double square root (ZMM).
+defm WriteFSqrt80 : X86SchedWritePair; // Floating point long double square root.
defm WriteFRcp : X86SchedWritePair; // Floating point reciprocal estimate.
+defm WriteFRcpX : X86SchedWritePair; // Floating point reciprocal estimate (XMM).
defm WriteFRcpY : X86SchedWritePair; // Floating point reciprocal estimate (YMM/ZMM).
defm WriteFRsqrt : X86SchedWritePair; // Floating point reciprocal square root estimate.
+defm WriteFRsqrtX: X86SchedWritePair; // Floating point reciprocal square root estimate (XMM).
defm WriteFRsqrtY: X86SchedWritePair; // Floating point reciprocal square root estimate (YMM/ZMM).
defm WriteFMA : X86SchedWritePair; // Fused Multiply Add.
defm WriteFMAX : X86SchedWritePair; // Fused Multiply Add (XMM).
@@ -261,11 +277,15 @@ def SchedWriteDPPS
def SchedWriteFDiv
: X86SchedWriteWidths<WriteFDiv, WriteFDiv, WriteFDivY, WriteFDivY>;
def SchedWriteFSqrt
- : X86SchedWriteWidths<WriteFSqrt, WriteFSqrt, WriteFSqrtY, WriteFSqrtY>;
+ : X86SchedWriteWidths<WriteFSqrt, WriteFSqrtX,
+ WriteFSqrtY, WriteFSqrtZ>;
+def SchedWriteFSqrt64
+ : X86SchedWriteWidths<WriteFSqrt64, WriteFSqrt64X,
+ WriteFSqrt64Y, WriteFSqrt64Z>;
def SchedWriteFRcp
- : X86SchedWriteWidths<WriteFRcp, WriteFRcp, WriteFRcpY, WriteFRcpY>;
+ : X86SchedWriteWidths<WriteFRcp, WriteFRcpX, WriteFRcpY, WriteFRcpY>;
def SchedWriteFRsqrt
- : X86SchedWriteWidths<WriteFRsqrt, WriteFRsqrt, WriteFRsqrtY, WriteFRsqrtY>;
+ : X86SchedWriteWidths<WriteFRsqrt, WriteFRsqrtX, WriteFRsqrtY, WriteFRsqrtY>;
def SchedWriteFRnd
: X86SchedWriteWidths<WriteFRnd, WriteFRnd, WriteFRndY, WriteFRndY>;
def SchedWriteFLogic
@@ -324,6 +344,16 @@ def SchedWriteVarBlend
: X86SchedWriteWidths<WriteVarBlend, WriteVarBlend,
WriteVarBlendY, WriteVarBlendY>;
+// Vector size wrappers.
+def SchedWriteFAddSizes
+ : X86SchedWriteSizes<SchedWriteFAdd, SchedWriteFAdd>;
+def SchedWriteFMulSizes
+ : X86SchedWriteSizes<SchedWriteFMul, SchedWriteFMul>;
+def SchedWriteFDivSizes
+ : X86SchedWriteSizes<SchedWriteFDiv, SchedWriteFDiv>;
+def SchedWriteFSqrtSizes
+ : X86SchedWriteSizes<SchedWriteFSqrt, SchedWriteFSqrt64>;
+
//===----------------------------------------------------------------------===//
// Generic Processor Scheduler Models.
Modified: llvm/trunk/lib/Target/X86/X86ScheduleAtom.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleAtom.td?rev=331629&r1=331628&r2=331629&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleAtom.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleAtom.td Mon May 7 04:50:44 2018
@@ -211,13 +211,22 @@ defm : AtomWriteResPair<WriteFCom,
defm : AtomWriteResPair<WriteFMul, [AtomPort0], [AtomPort0], 4, 4, [4], [4]>;
defm : AtomWriteResPair<WriteFMulY, [AtomPort0], [AtomPort0], 4, 4, [4], [4]>;
defm : AtomWriteResPair<WriteFRcp, [AtomPort0], [AtomPort0], 4, 4, [4], [4]>;
-defm : AtomWriteResPair<WriteFRcpY, [AtomPort0], [AtomPort0], 4, 4, [4], [4]>;
+defm : AtomWriteResPair<WriteFRcpX, [AtomPort01], [AtomPort01], 9, 10, [9], [10]>;
+defm : AtomWriteResPair<WriteFRcpY, [AtomPort01], [AtomPort01], 9, 10, [9], [10]>;
defm : AtomWriteResPair<WriteFRsqrt, [AtomPort0], [AtomPort0], 4, 4, [4], [4]>;
-defm : AtomWriteResPair<WriteFRsqrtY, [AtomPort0], [AtomPort0], 4, 4, [4], [4]>;
+defm : AtomWriteResPair<WriteFRsqrtX, [AtomPort01], [AtomPort01], 9, 10, [9], [10]>;
+defm : AtomWriteResPair<WriteFRsqrtY, [AtomPort01], [AtomPort01], 9, 10, [9], [10]>;
defm : AtomWriteResPair<WriteFDiv, [AtomPort01], [AtomPort01], 34, 34, [34], [34]>;
defm : AtomWriteResPair<WriteFDivY, [AtomPort01], [AtomPort01], 34, 34, [34], [34]>;
defm : AtomWriteResPair<WriteFSqrt, [AtomPort01], [AtomPort01], 34, 34, [34], [34]>;
-defm : AtomWriteResPair<WriteFSqrtY, [AtomPort01], [AtomPort01], 34, 34, [34], [34]>;
+defm : AtomWriteResPair<WriteFSqrtX, [AtomPort01], [AtomPort01], 70, 70, [70], [70]>;
+defm : AtomWriteResPair<WriteFSqrtY, [AtomPort01], [AtomPort01], 70, 70, [70], [70]>;
+defm : AtomWriteResPair<WriteFSqrtZ, [AtomPort01], [AtomPort01], 70, 70, [70], [70]>;
+defm : AtomWriteResPair<WriteFSqrt64, [AtomPort01], [AtomPort01], 62, 62, [62], [62]>;
+defm : AtomWriteResPair<WriteFSqrt64X, [AtomPort01], [AtomPort01],125,125,[125],[125]>;
+defm : AtomWriteResPair<WriteFSqrt64Y, [AtomPort01], [AtomPort01],125,125,[125],[125]>;
+defm : AtomWriteResPair<WriteFSqrt64Z, [AtomPort01], [AtomPort01],125,125,[125],[125]>;
+defm : AtomWriteResPair<WriteFSqrt80, [AtomPort01], [AtomPort01], 71, 71, [71], [71]>;
defm : AtomWriteResPair<WriteFSign, [AtomPort1], [AtomPort1]>;
defm : AtomWriteResPair<WriteFRnd, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : AtomWriteResPair<WriteFRndY, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
@@ -557,7 +566,7 @@ def : InstRW<[AtomWrite01_9], (instrs BT
SHLD64mri8, SHRD64mri8,
SHLD64rri8, SHRD64rri8,
CMPXCHG8rr,
- MULPDrr, RCPPSr, RSQRTPSr)>;
+ MULPDrr)>;
def : InstRW<[AtomWrite01_9], (instregex "CMOV(B|BE|E|P|NB|NBE|NE|NP)_F",
"(U)?COM_FI", "TST_F",
"(U)?COMIS(D|S)rr",
@@ -568,7 +577,7 @@ def AtomWrite01_10 : SchedWriteRes<[Atom
let ResourceCycles = [10];
}
def : InstRW<[AtomWrite01_10], (instrs FLDL2E, FLDL2T, FLDLG2, FLDLN2, FLDPI,
- MULPDrm, RCPPSm, RSQRTPSm)>;
+ MULPDrm)>;
def : InstRW<[AtomWrite01_10], (instregex "(U)?COMIS(D|S)rm",
"CVT(T)?SS2SI64rm(_Int)?")>;
@@ -697,8 +706,7 @@ def AtomWrite01_62 : SchedWriteRes<[Atom
let Latency = 62;
let ResourceCycles = [62];
}
-def : InstRW<[AtomWrite01_62], (instregex "DIVSD(r|m)(_Int)?",
- "SQRTSD(r|m)(_Int)?")>;
+def : InstRW<[AtomWrite01_62], (instregex "DIVSD(r|m)(_Int)?")>;
def AtomWrite01_63 : SchedWriteRes<[AtomPort01]> {
let Latency = 63;
@@ -716,7 +724,7 @@ def AtomWrite01_70 : SchedWriteRes<[Atom
let Latency = 70;
let ResourceCycles = [70];
}
-def : InstRW<[AtomWrite01_70], (instrs DIVPSrr, DIVPSrm, SQRTPSr, SQRTPSm)>;
+def : InstRW<[AtomWrite01_70], (instrs DIVPSrr, DIVPSrm)>;
def AtomWrite01_71 : SchedWriteRes<[AtomPort01]> {
let Latency = 71;
@@ -724,7 +732,6 @@ def AtomWrite01_71 : SchedWriteRes<[Atom
}
def : InstRW<[AtomWrite01_71], (instrs FPREM1,
INVLPG, INVLPGA32, INVLPGA64)>;
-def : InstRW<[AtomWrite01_71], (instregex "SQRT_F")>;
def AtomWrite01_72 : SchedWriteRes<[AtomPort01]> {
let Latency = 72;
@@ -785,7 +792,7 @@ def AtomWrite01_125 : SchedWriteRes<[Ato
let Latency = 125;
let ResourceCycles = [125];
}
-def : InstRW<[AtomWrite01_125], (instrs DIVPDrr, DIVPDrm, SQRTPDr, SQRTPDm)>;
+def : InstRW<[AtomWrite01_125], (instrs DIVPDrr, DIVPDrm)>;
def AtomWrite01_127 : SchedWriteRes<[AtomPort01]> {
let Latency = 127;
Modified: llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td?rev=331629&r1=331628&r2=331629&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td Mon May 7 04:50:44 2018
@@ -330,13 +330,22 @@ defm : JWriteResFpuPair<WriteDPPD, [JF
defm : JWriteResFpuPair<WriteDPPS, [JFPU1, JFPM, JFPA], 11, [1, 3, 3], 5>;
defm : JWriteResYMMPair<WriteDPPSY, [JFPU1, JFPM, JFPA], 12, [2, 6, 6], 10>;
defm : JWriteResFpuPair<WriteFRcp, [JFPU1, JFPM], 2>;
+defm : JWriteResFpuPair<WriteFRcpX, [JFPU1, JFPM], 2>;
defm : JWriteResYMMPair<WriteFRcpY, [JFPU1, JFPM], 2, [2,2], 2>;
defm : JWriteResFpuPair<WriteFRsqrt, [JFPU1, JFPM], 2>;
+defm : JWriteResFpuPair<WriteFRsqrtX, [JFPU1, JFPM], 2>;
defm : JWriteResYMMPair<WriteFRsqrtY, [JFPU1, JFPM], 2, [2,2], 2>;
defm : JWriteResFpuPair<WriteFDiv, [JFPU1, JFPM], 19, [1, 19]>;
defm : JWriteResYMMPair<WriteFDivY, [JFPU1, JFPM], 38, [2, 38], 2>;
defm : JWriteResFpuPair<WriteFSqrt, [JFPU1, JFPM], 21, [1, 21]>;
+defm : JWriteResFpuPair<WriteFSqrtX, [JFPU1, JFPM], 21, [1, 21]>;
defm : JWriteResYMMPair<WriteFSqrtY, [JFPU1, JFPM], 42, [2, 42], 2>;
+defm : JWriteResYMMPair<WriteFSqrtZ, [JFPU1, JFPM], 42, [2, 42], 2>;
+defm : JWriteResFpuPair<WriteFSqrt64, [JFPU1, JFPM], 27, [1, 27]>;
+defm : JWriteResFpuPair<WriteFSqrt64X, [JFPU1, JFPM], 27, [1, 27]>;
+defm : JWriteResYMMPair<WriteFSqrt64Y, [JFPU1, JFPM], 54, [2, 54], 2>;
+defm : JWriteResYMMPair<WriteFSqrt64Z, [JFPU1, JFPM], 54, [2, 54], 2>;
+defm : JWriteResFpuPair<WriteFSqrt80, [JFPU1, JFPM], 35, [1, 35]>;
defm : JWriteResFpuPair<WriteFSign, [JFPU1, JFPM], 2>;
defm : JWriteResFpuPair<WriteFRnd, [JFPU1, JSTC], 3>;
defm : JWriteResYMMPair<WriteFRndY, [JFPU1, JSTC], 3, [2,2], 2>;
@@ -667,36 +676,6 @@ def JWriteVTESTLd: SchedWriteRes<[JLAGU,
}
def : InstRW<[JWriteVTESTLd], (instrs PTESTrm, VPTESTrm, VTESTPDrm, VTESTPSrm)>;
-def JWriteVSQRTPD: SchedWriteRes<[JFPU1, JFPM]> {
- let Latency = 27;
- let ResourceCycles = [1, 27];
-}
-def : InstRW<[JWriteVSQRTPD], (instrs SQRTPDr, VSQRTPDr,
- SQRTSDr, VSQRTSDr,
- SQRTSDr_Int, VSQRTSDr_Int)>;
-
-def JWriteVSQRTPDLd: SchedWriteRes<[JLAGU, JFPU1, JFPM]> {
- let Latency = 32;
- let ResourceCycles = [1, 1, 27];
-}
-def : InstRW<[JWriteVSQRTPDLd], (instrs SQRTPDm, VSQRTPDm,
- SQRTSDm, VSQRTSDm,
- SQRTSDm_Int, VSQRTSDm_Int)>;
-
-def JWriteVSQRTYPD: SchedWriteRes<[JFPU1, JFPM]> {
- let Latency = 54; // each uOp is 27cy.
- let ResourceCycles = [2, 54];
- let NumMicroOps = 2;
-}
-def : InstRW<[JWriteVSQRTYPD], (instrs VSQRTPDYr)>;
-
-def JWriteVSQRTYPDLd: SchedWriteRes<[JLAGU, JFPU1, JFPM]> {
- let Latency = 59; // each uOp is 27cy (+5cy of memory load).
- let ResourceCycles = [2, 2, 54];
- let NumMicroOps = 2;
-}
-def : InstRW<[JWriteVSQRTYPDLd], (instrs VSQRTPDYm)>;
-
def JWriteJVZEROALL: SchedWriteRes<[]> {
let Latency = 90;
let NumMicroOps = 73;
Modified: llvm/trunk/lib/Target/X86/X86ScheduleSLM.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleSLM.td?rev=331629&r1=331628&r2=331629&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleSLM.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleSLM.td Mon May 7 04:50:44 2018
@@ -139,12 +139,21 @@ defm : SLMWriteResPair<WriteFMul, [SLM
defm : SLMWriteResPair<WriteFMulY, [SLM_FPC_RSV0, SLMFPMultiplier], 5, [1,2]>;
defm : SLMWriteResPair<WriteFDiv, [SLM_FPC_RSV0, SLMFPDivider], 34, [1,34]>;
defm : SLMWriteResPair<WriteFDivY, [SLM_FPC_RSV0, SLMFPDivider], 34, [1,34]>;
-defm : SLMWriteResPair<WriteFRcp, [SLM_FPC_RSV0], 5>;
-defm : SLMWriteResPair<WriteFRcpY, [SLM_FPC_RSV0], 5>;
-defm : SLMWriteResPair<WriteFRsqrt, [SLM_FPC_RSV0], 5>;
-defm : SLMWriteResPair<WriteFRsqrtY,[SLM_FPC_RSV0], 5>;
-defm : SLMWriteResPair<WriteFSqrt, [SLM_FPC_RSV0], 15>;
-defm : SLMWriteResPair<WriteFSqrtY, [SLM_FPC_RSV0], 15>;
+defm : SLMWriteResPair<WriteFRcp, [SLM_FPC_RSV0], 5>;
+defm : SLMWriteResPair<WriteFRcpX, [SLM_FPC_RSV0], 5>;
+defm : SLMWriteResPair<WriteFRcpY, [SLM_FPC_RSV0], 5>;
+defm : SLMWriteResPair<WriteFRsqrt, [SLM_FPC_RSV0], 5>;
+defm : SLMWriteResPair<WriteFRsqrtX, [SLM_FPC_RSV0], 5>;
+defm : SLMWriteResPair<WriteFRsqrtY, [SLM_FPC_RSV0], 5>;
+defm : SLMWriteResPair<WriteFSqrt, [SLM_FPC_RSV0,SLMFPDivider], 20, [1,20], 1, 3>;
+defm : SLMWriteResPair<WriteFSqrtX, [SLM_FPC_RSV0,SLMFPDivider], 41, [1,40], 1, 3>;
+defm : SLMWriteResPair<WriteFSqrtY, [SLM_FPC_RSV0,SLMFPDivider], 41, [1,40], 1, 3>;
+defm : SLMWriteResPair<WriteFSqrtZ, [SLM_FPC_RSV0,SLMFPDivider], 41, [1,40], 1, 3>;
+defm : SLMWriteResPair<WriteFSqrt64, [SLM_FPC_RSV0,SLMFPDivider], 35, [1,35], 1, 3>;
+defm : SLMWriteResPair<WriteFSqrt64X, [SLM_FPC_RSV0,SLMFPDivider], 71, [1,70], 1, 3>;
+defm : SLMWriteResPair<WriteFSqrt64Y, [SLM_FPC_RSV0,SLMFPDivider], 71, [1,70], 1, 3>;
+defm : SLMWriteResPair<WriteFSqrt64Z, [SLM_FPC_RSV0,SLMFPDivider], 71, [1,70], 1, 3>;
+defm : SLMWriteResPair<WriteFSqrt80, [SLM_FPC_RSV0,SLMFPDivider], 40, [1,40]>;
defm : SLMWriteResPair<WriteDPPD, [SLM_FPC_RSV1], 3>;
defm : SLMWriteResPair<WriteDPPS, [SLM_FPC_RSV1], 3>;
defm : SLMWriteResPair<WriteDPPSY, [SLM_FPC_RSV1], 3>;
@@ -382,60 +391,4 @@ def SLMriteResGroup8 : SchedWriteRes<[SL
}
def: InstRW<[SLMriteResGroup8], (instregex "(V?)DIVSSrm")>;
-def SLMriteResGroup9 : SchedWriteRes<[SLM_FPC_RSV0,SLMFPDivider]> {
- let Latency = 71;
- let NumMicroOps = 1;
- let ResourceCycles = [1,70];
-}
-def: InstRW<[SLMriteResGroup9], (instregex "(V?)SQRTPDr")>;
-
-def SLMriteResGroup10 : SchedWriteRes<[SLM_FPC_RSV0,SLMFPDivider]> {
- let Latency = 41;
- let NumMicroOps = 1;
- let ResourceCycles = [1,40];
-}
-def: InstRW<[SLMriteResGroup10], (instregex "(V?)SQRTPSr")>;
-
-def SLMriteResGroup11 : SchedWriteRes<[SLM_FPC_RSV0,SLMFPDivider]> {
- let Latency = 35;
- let NumMicroOps = 1;
- let ResourceCycles = [1,35];
-}
-def: InstRW<[SLMriteResGroup11], (instregex "(V?)SQRTSDr")>;
-
-def SLMriteResGroup12 : SchedWriteRes<[SLM_FPC_RSV0,SLMFPDivider]> {
- let Latency = 20;
- let NumMicroOps = 1;
- let ResourceCycles = [1,20];
-}
-def: InstRW<[SLMriteResGroup12], (instregex "(V?)SQRTSSr")>;
-
-def SLMriteResGroup13 : SchedWriteRes<[SLM_MEC_RSV,SLM_FPC_RSV0,SLMFPDivider]> {
- let Latency = 74;
- let NumMicroOps = 1;
- let ResourceCycles = [1,1,70];
-}
-def: InstRW<[SLMriteResGroup13], (instregex "(V?)SQRTPDm")>;
-
-def SLMriteResGroup14 : SchedWriteRes<[SLM_MEC_RSV,SLM_FPC_RSV0,SLMFPDivider]> {
- let Latency = 44;
- let NumMicroOps = 1;
- let ResourceCycles = [1,1,40];
-}
-def: InstRW<[SLMriteResGroup14], (instregex "(V?)SQRTPSm")>;
-
-def SLMriteResGroup15 : SchedWriteRes<[SLM_MEC_RSV,SLM_FPC_RSV0,SLMFPDivider]> {
- let Latency = 38;
- let NumMicroOps = 1;
- let ResourceCycles = [1,1,35];
-}
-def: InstRW<[SLMriteResGroup15], (instregex "(V?)SQRTSDm")>;
-
-def SLMriteResGroup16 : SchedWriteRes<[SLM_MEC_RSV,SLM_FPC_RSV0,SLMFPDivider]> {
- let Latency = 23;
- let NumMicroOps = 1;
- let ResourceCycles = [1,1,20];
-}
-def: InstRW<[SLMriteResGroup16], (instregex "(V?)SQRTSSm")>;
-
} // SchedModel
Modified: llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td?rev=331629&r1=331628&r2=331629&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td Mon May 7 04:50:44 2018
@@ -223,11 +223,20 @@ defm : ZnWriteResFpuPair<WriteFMA,
defm : ZnWriteResFpuPair<WriteFMAX, [ZnFPU03], 5>;
defm : ZnWriteResFpuPair<WriteFMAY, [ZnFPU03], 5>;
defm : ZnWriteResFpuPair<WriteFRcp, [ZnFPU01], 5>;
-defm : ZnWriteResFpuPair<WriteFRcpY, [ZnFPU01], 5>;
-defm : ZnWriteResFpuPair<WriteFRsqrt, [ZnFPU01], 5>;
-defm : ZnWriteResFpuPair<WriteFRsqrtY, [ZnFPU01], 5>;
-defm : ZnWriteResFpuPair<WriteFSqrt, [ZnFPU3], 20>;
-defm : ZnWriteResFpuPair<WriteFSqrtY, [ZnFPU3], 20>;
+defm : ZnWriteResFpuPair<WriteFRcpX, [ZnFPU01], 5>;
+//defm : ZnWriteResFpuPair<WriteFRcpY, [ZnFPU01], 5, [1], 1, 7, 1>;
+//defm : ZnWriteResFpuPair<WriteFRsqrt, [ZnFPU02], 5>;
+defm : ZnWriteResFpuPair<WriteFRsqrtX, [ZnFPU01], 5, [1], 1, 7, 1>;
+//defm : ZnWriteResFpuPair<WriteFRsqrtY, [ZnFPU01], 5, [2], 2>;
+defm : ZnWriteResFpuPair<WriteFSqrt, [ZnFPU3], 20, [20]>;
+defm : ZnWriteResFpuPair<WriteFSqrtX, [ZnFPU3], 20, [20]>;
+defm : ZnWriteResFpuPair<WriteFSqrtY, [ZnFPU3], 28, [28], 1, 7, 1>;
+defm : ZnWriteResFpuPair<WriteFSqrtZ, [ZnFPU3], 28, [28], 1, 7, 1>;
+defm : ZnWriteResFpuPair<WriteFSqrt64, [ZnFPU3], 20, [20]>;
+defm : ZnWriteResFpuPair<WriteFSqrt64X, [ZnFPU3], 20, [20]>;
+defm : ZnWriteResFpuPair<WriteFSqrt64Y, [ZnFPU3], 40, [40], 1, 7, 1>;
+defm : ZnWriteResFpuPair<WriteFSqrt64Z, [ZnFPU3], 40, [40], 1, 7, 1>;
+defm : ZnWriteResFpuPair<WriteFSqrt80, [ZnFPU3], 20, [20]>;
def : WriteRes<WriteCvtF2FSt, [ZnFPU3, ZnAGU]>;
// Vector integer operations which uses FPU units
@@ -1504,18 +1513,19 @@ def ZnWriteVDIVPDYLd : SchedWriteRes<[Zn
def : InstRW<[ZnWriteVDIVPDYLd], (instregex "VDIVPDYrm")>;
// VRCPPS.
+// TODO - convert to ZnWriteResFpuPair
// y,y.
-def ZnWriteVRCPPSr : SchedWriteRes<[ZnFPU01]> {
+def ZnWriteVRCPPSYr : SchedWriteRes<[ZnFPU01]> {
let Latency = 5;
}
-def : InstRW<[ZnWriteVRCPPSr], (instregex "VRCPPSYr")>;
+def : SchedAlias<WriteFRcpY, ZnWriteVRCPPSYr>;
// y,m256.
-def ZnWriteVRCPPSLd : SchedWriteRes<[ZnAGU, ZnFPU01]> {
+def ZnWriteVRCPPSYLd : SchedWriteRes<[ZnAGU, ZnFPU01]> {
let Latency = 12;
let NumMicroOps = 3;
}
-def : InstRW<[ZnWriteVRCPPSLd], (instregex "VRCPPSYm")>;
+def : SchedAlias<WriteFRcpYLd, ZnWriteVRCPPSYLd>;
// DPPS.
// x,x,i / v,v,v,i.
@@ -1533,83 +1543,38 @@ def : SchedAlias<WriteDPPD, ZnWriteMic
// x,m,i.
def : SchedAlias<WriteDPPDLd, ZnWriteMicrocoded>;
-// VSQRTPS.
-// y,y.
-def ZnWriteVSQRTPSYr : SchedWriteRes<[ZnFPU3]> {
- let Latency = 28;
- let ResourceCycles = [28];
-}
-def : InstRW<[ZnWriteVSQRTPSYr], (instregex "VSQRTPSYr")>;
-
-// y,m256.
-def ZnWriteVSQRTPSYLd : SchedWriteRes<[ZnAGU, ZnFPU3]> {
- let Latency = 35;
- let ResourceCycles = [1,35];
- let NumMicroOps = 2;
-}
-def : InstRW<[ZnWriteVSQRTPSYLd], (instregex "VSQRTPSYm")>;
-
-// VSQRTPD.
-// y,y.
-def ZnWriteVSQRTPDYr : SchedWriteRes<[ZnFPU3]> {
- let Latency = 40;
- let ResourceCycles = [40];
-}
-def : InstRW<[ZnWriteVSQRTPDYr], (instregex "VSQRTPDYr")>;
-
-// y,m256.
-def ZnWriteVSQRTPDYLd : SchedWriteRes<[ZnAGU, ZnFPU3]> {
- let Latency = 47;
- let NumMicroOps = 2;
- let ResourceCycles = [1,47];
-}
-def : InstRW<[ZnWriteVSQRTPDYLd], (instregex "VSQRTPDYm")>;
-
// RSQRTSS
+// TODO - convert to ZnWriteResFpuPair
// x,x.
def ZnWriteRSQRTSSr : SchedWriteRes<[ZnFPU02]> {
let Latency = 5;
}
-def : InstRW<[ZnWriteRSQRTSSr], (instregex "(V?)RSQRTSS(Y?)r")>;
-
-// RSQRTPS
-// x,x.
-def ZnWriteRSQRTPSr : SchedWriteRes<[ZnFPU01]> {
- let Latency = 5;
-}
-def : InstRW<[ZnWriteRSQRTPSr], (instregex "(V?)RSQRTPSr")>;
+def : SchedAlias<WriteFRsqrt, ZnWriteRSQRTSSr>;
-// RSQRTSSm
// x,m128.
def ZnWriteRSQRTSSLd: SchedWriteRes<[ZnAGU, ZnFPU02]> {
let Latency = 12;
let NumMicroOps = 2;
- let ResourceCycles = [1,2];
-}
-def : InstRW<[ZnWriteRSQRTSSLd], (instregex "(V?)RSQRTSSm")>;
-
-// RSQRTPSm
-def ZnWriteRSQRTPSLd : SchedWriteRes<[ZnAGU, ZnFPU01]> {
- let Latency = 12;
- let NumMicroOps = 2;
+ let ResourceCycles = [1,2]; // FIXME: Is this right?
}
-def : InstRW<[ZnWriteRSQRTPSLd], (instregex "(V?)RSQRTPSm")>;
+def : SchedAlias<WriteFRsqrtLd, ZnWriteRSQRTSSLd>;
-// RSQRTPS 256.
+// RSQRTPS
+// TODO - convert to ZnWriteResFpuPair
// y,y.
def ZnWriteRSQRTPSYr : SchedWriteRes<[ZnFPU01]> {
let Latency = 5;
let NumMicroOps = 2;
let ResourceCycles = [2];
}
-def : InstRW<[ZnWriteRSQRTPSYr], (instregex "VRSQRTPSYr")>;
+def : SchedAlias<WriteFRsqrtY, ZnWriteRSQRTPSYr>;
// y,m256.
def ZnWriteRSQRTPSYLd : SchedWriteRes<[ZnAGU, ZnFPU01]> {
let Latency = 12;
let NumMicroOps = 2;
}
-def : InstRW<[ZnWriteRSQRTPSYLd], (instregex "VRSQRTPSYm")>;
+def : SchedAlias<WriteFRsqrtYLd, ZnWriteRSQRTPSYLd>;
//-- Other instructions --//
Modified: llvm/trunk/test/CodeGen/X86/avx-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-schedule.ll?rev=331629&r1=331628&r2=331629&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx-schedule.ll Mon May 7 04:50:44 2018
@@ -4450,7 +4450,7 @@ define <4 x double> @test_sqrtpd(<4 x do
;
; ZNVER1-LABEL: test_sqrtpd:
; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vsqrtpd (%rdi), %ymm1 # sched: [47:47.00]
+; ZNVER1-NEXT: vsqrtpd (%rdi), %ymm1 # sched: [47:40.00]
; ZNVER1-NEXT: vsqrtpd %ymm0, %ymm0 # sched: [40:40.00]
; ZNVER1-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; ZNVER1-NEXT: retq # sched: [1:0.50]
@@ -4514,7 +4514,7 @@ define <8 x float> @test_sqrtps(<8 x flo
;
; ZNVER1-LABEL: test_sqrtps:
; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vsqrtps (%rdi), %ymm1 # sched: [35:35.00]
+; ZNVER1-NEXT: vsqrtps (%rdi), %ymm1 # sched: [35:28.00]
; ZNVER1-NEXT: vsqrtps %ymm0, %ymm0 # sched: [28:28.00]
; ZNVER1-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; ZNVER1-NEXT: retq # sched: [1:0.50]
Modified: llvm/trunk/test/CodeGen/X86/avx512-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-schedule.ll?rev=331629&r1=331628&r2=331629&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-schedule.ll Mon May 7 04:50:44 2018
@@ -600,7 +600,7 @@ declare <16 x float> @llvm.sqrt.v16f32(<
define <16 x float> @sqrtD(<16 x float> %a) nounwind {
; GENERIC-LABEL: sqrtD:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vsqrtps %zmm0, %zmm0 # sched: [14:1.00]
+; GENERIC-NEXT: vsqrtps %zmm0, %zmm0 # sched: [29:28.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: sqrtD:
@@ -615,7 +615,7 @@ declare <8 x double> @llvm.sqrt.v8f64(<8
define <8 x double> @sqrtE(<8 x double> %a) nounwind {
; GENERIC-LABEL: sqrtE:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vsqrtpd %zmm0, %zmm0 # sched: [14:1.00]
+; GENERIC-NEXT: vsqrtpd %zmm0, %zmm0 # sched: [45:44.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: sqrtE:
Modified: llvm/trunk/test/CodeGen/X86/recip-fastmath.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/recip-fastmath.ll?rev=331629&r1=331628&r2=331629&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/recip-fastmath.ll (original)
+++ llvm/trunk/test/CodeGen/X86/recip-fastmath.ll Mon May 7 04:50:44 2018
@@ -1024,7 +1024,7 @@ define <16 x float> @v16f32_one_step(<16
;
; KNL-LABEL: v16f32_one_step:
; KNL: # %bb.0:
-; KNL-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [5:1.00]
+; KNL-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [11:2.00]
; KNL-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) + mem sched: [12:0.50]
; KNL-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm1) + zmm1 sched: [5:0.50]
; KNL-NEXT: retq # sched: [7:1.00]
@@ -1224,7 +1224,7 @@ define <16 x float> @v16f32_two_step(<16
;
; KNL-LABEL: v16f32_two_step:
; KNL: # %bb.0:
-; KNL-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [5:1.00]
+; KNL-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [11:2.00]
; KNL-NEXT: vbroadcastss {{.*#+}} zmm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] sched: [10:1.00]
; KNL-NEXT: vmovaps %zmm1, %zmm3 # sched: [1:1.00]
; KNL-NEXT: vfnmadd213ps {{.*#+}} zmm3 = -(zmm0 * zmm3) + zmm2 sched: [5:0.50]
Modified: llvm/trunk/test/CodeGen/X86/recip-fastmath2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/recip-fastmath2.ll?rev=331629&r1=331628&r2=331629&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/recip-fastmath2.ll (original)
+++ llvm/trunk/test/CodeGen/X86/recip-fastmath2.ll Mon May 7 04:50:44 2018
@@ -1323,7 +1323,7 @@ define <16 x float> @v16f32_one_step2(<1
;
; KNL-LABEL: v16f32_one_step2:
; KNL: # %bb.0:
-; KNL-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [5:1.00]
+; KNL-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [11:2.00]
; KNL-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) + mem sched: [12:0.50]
; KNL-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm1) + zmm1 sched: [5:0.50]
; KNL-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm0 # sched: [12:0.50]
@@ -1489,7 +1489,7 @@ define <16 x float> @v16f32_one_step_2_d
;
; KNL-LABEL: v16f32_one_step_2_divs:
; KNL: # %bb.0:
-; KNL-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [5:1.00]
+; KNL-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [11:2.00]
; KNL-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) + mem sched: [12:0.50]
; KNL-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm1) + zmm1 sched: [5:0.50]
; KNL-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm1 # sched: [12:0.50]
@@ -1709,7 +1709,7 @@ define <16 x float> @v16f32_two_step2(<1
;
; KNL-LABEL: v16f32_two_step2:
; KNL: # %bb.0:
-; KNL-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [5:1.00]
+; KNL-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [11:2.00]
; KNL-NEXT: vbroadcastss {{.*#+}} zmm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] sched: [10:1.00]
; KNL-NEXT: vmovaps %zmm1, %zmm3 # sched: [1:1.00]
; KNL-NEXT: vfnmadd213ps {{.*#+}} zmm3 = -(zmm0 * zmm3) + zmm2 sched: [5:0.50]
@@ -1781,7 +1781,7 @@ define <16 x float> @v16f32_no_step(<16
;
; KNL-LABEL: v16f32_no_step:
; KNL: # %bb.0:
-; KNL-NEXT: vrcp14ps %zmm0, %zmm0 # sched: [5:1.00]
+; KNL-NEXT: vrcp14ps %zmm0, %zmm0 # sched: [11:2.00]
; KNL-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: v16f32_no_step:
@@ -1855,7 +1855,7 @@ define <16 x float> @v16f32_no_step2(<16
;
; KNL-LABEL: v16f32_no_step2:
; KNL: # %bb.0:
-; KNL-NEXT: vrcp14ps %zmm0, %zmm0 # sched: [5:1.00]
+; KNL-NEXT: vrcp14ps %zmm0, %zmm0 # sched: [11:2.00]
; KNL-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm0 # sched: [12:0.50]
; KNL-NEXT: retq # sched: [7:1.00]
;
Modified: llvm/trunk/test/CodeGen/X86/sse-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse-schedule.ll?rev=331629&r1=331628&r2=331629&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse-schedule.ll Mon May 7 04:50:44 2018
@@ -5007,15 +5007,15 @@ define <4 x float> @test_sqrtps(<4 x flo
;
; ZNVER1-SSE-LABEL: test_sqrtps:
; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: sqrtps %xmm0, %xmm1 # sched: [20:1.00]
-; ZNVER1-SSE-NEXT: sqrtps (%rdi), %xmm0 # sched: [27:1.00]
+; ZNVER1-SSE-NEXT: sqrtps %xmm0, %xmm1 # sched: [20:20.00]
+; ZNVER1-SSE-NEXT: sqrtps (%rdi), %xmm0 # sched: [27:20.00]
; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
;
; ZNVER1-LABEL: test_sqrtps:
; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vsqrtps (%rdi), %xmm1 # sched: [27:1.00]
-; ZNVER1-NEXT: vsqrtps %xmm0, %xmm0 # sched: [20:1.00]
+; ZNVER1-NEXT: vsqrtps (%rdi), %xmm1 # sched: [27:20.00]
+; ZNVER1-NEXT: vsqrtps %xmm0, %xmm0 # sched: [20:20.00]
; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %a0)
@@ -5152,16 +5152,16 @@ define <4 x float> @test_sqrtss(<4 x flo
; ZNVER1-SSE-LABEL: test_sqrtss:
; ZNVER1-SSE: # %bb.0:
; ZNVER1-SSE-NEXT: movaps (%rdi), %xmm1 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT: sqrtss %xmm0, %xmm0 # sched: [20:1.00]
-; ZNVER1-SSE-NEXT: sqrtss %xmm1, %xmm1 # sched: [20:1.00]
+; ZNVER1-SSE-NEXT: sqrtss %xmm0, %xmm0 # sched: [20:20.00]
+; ZNVER1-SSE-NEXT: sqrtss %xmm1, %xmm1 # sched: [20:20.00]
; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
;
; ZNVER1-LABEL: test_sqrtss:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vmovaps (%rdi), %xmm1 # sched: [8:0.50]
-; ZNVER1-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [20:1.00]
-; ZNVER1-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [20:1.00]
+; ZNVER1-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [20:20.00]
+; ZNVER1-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [20:20.00]
; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %a0)
Modified: llvm/trunk/test/CodeGen/X86/sse2-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse2-schedule.ll?rev=331629&r1=331628&r2=331629&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse2-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse2-schedule.ll Mon May 7 04:50:44 2018
@@ -14247,15 +14247,15 @@ define <2 x double> @test_sqrtpd(<2 x do
;
; ZNVER1-SSE-LABEL: test_sqrtpd:
; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: sqrtpd %xmm0, %xmm1 # sched: [20:1.00]
-; ZNVER1-SSE-NEXT: sqrtpd (%rdi), %xmm0 # sched: [27:1.00]
+; ZNVER1-SSE-NEXT: sqrtpd %xmm0, %xmm1 # sched: [20:20.00]
+; ZNVER1-SSE-NEXT: sqrtpd (%rdi), %xmm0 # sched: [27:20.00]
; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
;
; ZNVER1-LABEL: test_sqrtpd:
; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vsqrtpd (%rdi), %xmm1 # sched: [27:1.00]
-; ZNVER1-NEXT: vsqrtpd %xmm0, %xmm0 # sched: [20:1.00]
+; ZNVER1-NEXT: vsqrtpd (%rdi), %xmm1 # sched: [27:20.00]
+; ZNVER1-NEXT: vsqrtpd %xmm0, %xmm0 # sched: [20:20.00]
; ZNVER1-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %a0)
@@ -14392,16 +14392,16 @@ define <2 x double> @test_sqrtsd(<2 x do
; ZNVER1-SSE-LABEL: test_sqrtsd:
; ZNVER1-SSE: # %bb.0:
; ZNVER1-SSE-NEXT: movapd (%rdi), %xmm1 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT: sqrtsd %xmm0, %xmm0 # sched: [20:1.00]
-; ZNVER1-SSE-NEXT: sqrtsd %xmm1, %xmm1 # sched: [20:1.00]
+; ZNVER1-SSE-NEXT: sqrtsd %xmm0, %xmm0 # sched: [20:20.00]
+; ZNVER1-SSE-NEXT: sqrtsd %xmm1, %xmm1 # sched: [20:20.00]
; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
;
; ZNVER1-LABEL: test_sqrtsd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vmovapd (%rdi), %xmm1 # sched: [8:0.50]
-; ZNVER1-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [20:1.00]
-; ZNVER1-NEXT: vsqrtsd %xmm1, %xmm1, %xmm1 # sched: [20:1.00]
+; ZNVER1-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [20:20.00]
+; ZNVER1-NEXT: vsqrtsd %xmm1, %xmm1, %xmm1 # sched: [20:20.00]
; ZNVER1-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a0)
Modified: llvm/trunk/test/CodeGen/X86/x87-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/x87-schedule.ll?rev=331629&r1=331628&r2=331629&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/x87-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/x87-schedule.ll Mon May 7 04:50:44 2018
@@ -4083,56 +4083,56 @@ define void @test_fsqrt() optsize {
; SLM-LABEL: test_fsqrt:
; SLM: # %bb.0:
; SLM-NEXT: #APP
-; SLM-NEXT: fsqrt # sched: [15:1.00]
+; SLM-NEXT: fsqrt # sched: [40:40.00]
; SLM-NEXT: #NO_APP
; SLM-NEXT: retl # sched: [4:1.00]
;
; SANDY-LABEL: test_fsqrt:
; SANDY: # %bb.0:
; SANDY-NEXT: #APP
-; SANDY-NEXT: fsqrt # sched: [14:1.00]
+; SANDY-NEXT: fsqrt # sched: [24:24.00]
; SANDY-NEXT: #NO_APP
; SANDY-NEXT: retl # sched: [6:1.00]
;
; HASWELL-LABEL: test_fsqrt:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
-; HASWELL-NEXT: fsqrt # sched: [15:1.00]
+; HASWELL-NEXT: fsqrt # sched: [23:17.00]
; HASWELL-NEXT: #NO_APP
; HASWELL-NEXT: retl # sched: [7:1.00]
;
; BROADWELL-LABEL: test_fsqrt:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: fsqrt # sched: [15:1.00]
+; BROADWELL-NEXT: fsqrt # sched: [23:9.00]
; BROADWELL-NEXT: #NO_APP
; BROADWELL-NEXT: retl # sched: [6:0.50]
;
; SKYLAKE-LABEL: test_fsqrt:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: fsqrt # sched: [15:1.00]
+; SKYLAKE-NEXT: fsqrt # sched: [21:7.00]
; SKYLAKE-NEXT: #NO_APP
; SKYLAKE-NEXT: retl # sched: [6:0.50]
;
; SKX-LABEL: test_fsqrt:
; SKX: # %bb.0:
; SKX-NEXT: #APP
-; SKX-NEXT: fsqrt # sched: [15:1.00]
+; SKX-NEXT: fsqrt # sched: [21:7.00]
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
; BTVER2-LABEL: test_fsqrt:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
-; BTVER2-NEXT: fsqrt # sched: [21:21.00]
+; BTVER2-NEXT: fsqrt # sched: [35:35.00]
; BTVER2-NEXT: #NO_APP
; BTVER2-NEXT: retl # sched: [4:1.00]
;
; ZNVER1-LABEL: test_fsqrt:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: fsqrt # sched: [20:1.00]
+; ZNVER1-NEXT: fsqrt # sched: [20:20.00]
; ZNVER1-NEXT: #NO_APP
; ZNVER1-NEXT: retl # sched: [1:0.50]
tail call void asm sideeffect "fsqrt", ""() nounwind
Modified: llvm/trunk/test/tools/llvm-mca/X86/Broadwell/resources-x87.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/Broadwell/resources-x87.s?rev=331629&r1=331628&r2=331629&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/Broadwell/resources-x87.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/Broadwell/resources-x87.s Mon May 7 04:50:44 2018
@@ -307,7 +307,7 @@ fyl2xp1
# CHECK-NEXT: 1 100 0.25 * fscale
# CHECK-NEXT: 1 100 0.25 * fsin
# CHECK-NEXT: 1 100 0.25 * fsincos
-# CHECK-NEXT: 1 15 1.00 * fsqrt
+# CHECK-NEXT: 1 23 9.00 * fsqrt
# CHECK-NEXT: 1 1 0.25 * fst %st(0)
# CHECK-NEXT: 1 1 1.00 * * fsts (%edx)
# CHECK-NEXT: 1 1 1.00 * * fstl (%ecx)
@@ -369,7 +369,7 @@ fyl2xp1
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
-# CHECK-NEXT: - - 111.67 140.67 49.00 49.00 27.00 60.67 71.00 9.00
+# CHECK-NEXT: - 9.00 111.67 140.67 49.00 49.00 27.00 60.67 71.00 9.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
@@ -474,7 +474,7 @@ fyl2xp1
# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - fscale
# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - fsin
# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - fsincos
-# CHECK-NEXT: - - 1.00 - - - - - - - fsqrt
+# CHECK-NEXT: - 9.00 1.00 - - - - - - - fsqrt
# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - fst %st(0)
# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 fsts (%edx)
# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 fstl (%ecx)
Modified: llvm/trunk/test/tools/llvm-mca/X86/BtVer2/resources-x87.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BtVer2/resources-x87.s?rev=331629&r1=331628&r2=331629&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BtVer2/resources-x87.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/BtVer2/resources-x87.s Mon May 7 04:50:44 2018
@@ -307,7 +307,7 @@ fyl2xp1
# CHECK-NEXT: 1 100 0.50 * fscale
# CHECK-NEXT: 1 100 0.50 * fsin
# CHECK-NEXT: 1 100 0.50 * fsincos
-# CHECK-NEXT: 1 21 21.00 * fsqrt
+# CHECK-NEXT: 1 35 35.00 * fsqrt
# CHECK-NEXT: 1 1 0.50 * fst %st(0)
# CHECK-NEXT: 1 1 1.00 * * fsts (%edx)
# CHECK-NEXT: 1 1 1.00 * * fstl (%ecx)
@@ -373,7 +373,7 @@ fyl2xp1
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
-# CHECK-NEXT: 44.50 22.50 - 54.00 335.00 54.00 27.00 39.00 - 13.00 - - - -
+# CHECK-NEXT: 44.50 22.50 - 54.00 349.00 54.00 27.00 39.00 - 13.00 - - - -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
@@ -478,7 +478,7 @@ fyl2xp1
# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - fscale
# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - fsin
# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - fsincos
-# CHECK-NEXT: - - - - 21.00 - 1.00 - - - - - - - fsqrt
+# CHECK-NEXT: - - - - 35.00 - 1.00 - - - - - - - fsqrt
# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - fst %st(0)
# CHECK-NEXT: - - - - - - - - - 1.00 - - - - fsts (%edx)
# CHECK-NEXT: - - - - - - - - - 1.00 - - - - fstl (%ecx)
Modified: llvm/trunk/test/tools/llvm-mca/X86/Haswell/resources-x87.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/Haswell/resources-x87.s?rev=331629&r1=331628&r2=331629&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/Haswell/resources-x87.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/Haswell/resources-x87.s Mon May 7 04:50:44 2018
@@ -307,7 +307,7 @@ fyl2xp1
# CHECK-NEXT: 50 75 - * fscale
# CHECK-NEXT: 1 100 0.25 * fsin
# CHECK-NEXT: 1 100 0.25 * fsincos
-# CHECK-NEXT: 1 15 1.00 * fsqrt
+# CHECK-NEXT: 1 23 17.00 * fsqrt
# CHECK-NEXT: 1 1 0.50 * fst %st(0)
# CHECK-NEXT: 1 1 1.00 * * fsts (%edx)
# CHECK-NEXT: 1 1 1.00 * * fstl (%ecx)
@@ -369,7 +369,7 @@ fyl2xp1
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
-# CHECK-NEXT: - - 117.92 141.92 49.00 49.00 27.00 56.92 65.25 9.00
+# CHECK-NEXT: - 17.00 117.92 141.92 49.00 49.00 27.00 56.92 65.25 9.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
@@ -474,7 +474,7 @@ fyl2xp1
# CHECK-NEXT: - - - - - - - - - - fscale
# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - fsin
# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - fsincos
-# CHECK-NEXT: - - 1.00 - - - - - - - fsqrt
+# CHECK-NEXT: - 17.00 1.00 - - - - - - - fsqrt
# CHECK-NEXT: - - 0.50 0.50 - - - - - - fst %st(0)
# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 fsts (%edx)
# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 fstl (%ecx)
Modified: llvm/trunk/test/tools/llvm-mca/X86/SLM/resources-x87.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/SLM/resources-x87.s?rev=331629&r1=331628&r2=331629&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/SLM/resources-x87.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/SLM/resources-x87.s Mon May 7 04:50:44 2018
@@ -307,7 +307,7 @@ fyl2xp1
# CHECK-NEXT: 1 100 1.00 * fscale
# CHECK-NEXT: 1 100 1.00 * fsin
# CHECK-NEXT: 1 100 1.00 * fsincos
-# CHECK-NEXT: 1 15 1.00 * fsqrt
+# CHECK-NEXT: 1 40 40.00 * fsqrt
# CHECK-NEXT: 1 1 0.50 * fst %st(0)
# CHECK-NEXT: 1 1 1.00 * * fsts (%edx)
# CHECK-NEXT: 1 1 1.00 * * fstl (%ecx)
@@ -367,7 +367,7 @@ fyl2xp1
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7]
-# CHECK-NEXT: - 544.00 16.00 64.00 55.00 9.50 9.50 52.00
+# CHECK-NEXT: - 584.00 16.00 64.00 55.00 9.50 9.50 52.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] Instructions:
@@ -472,7 +472,7 @@ fyl2xp1
# CHECK-NEXT: - - - 1.00 - - - - fscale
# CHECK-NEXT: - - - 1.00 - - - - fsin
# CHECK-NEXT: - - - 1.00 - - - - fsincos
-# CHECK-NEXT: - - - 1.00 - - - - fsqrt
+# CHECK-NEXT: - 40.00 - 1.00 - - - - fsqrt
# CHECK-NEXT: - - - - - 0.50 0.50 - fst %st(0)
# CHECK-NEXT: - - - - - 0.50 0.50 1.00 fsts (%edx)
# CHECK-NEXT: - - - - - 0.50 0.50 1.00 fstl (%ecx)
Modified: llvm/trunk/test/tools/llvm-mca/X86/SandyBridge/resources-x87.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/SandyBridge/resources-x87.s?rev=331629&r1=331628&r2=331629&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/SandyBridge/resources-x87.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/SandyBridge/resources-x87.s Mon May 7 04:50:44 2018
@@ -307,7 +307,7 @@ fyl2xp1
# CHECK-NEXT: 1 100 0.33 * fscale
# CHECK-NEXT: 1 100 0.33 * fsin
# CHECK-NEXT: 1 100 0.33 * fsincos
-# CHECK-NEXT: 1 14 1.00 * fsqrt
+# CHECK-NEXT: 1 24 24.00 * fsqrt
# CHECK-NEXT: 1 1 1.00 * fst %st(0)
# CHECK-NEXT: 3 6 1.00 * * fsts (%edx)
# CHECK-NEXT: 3 6 1.00 * * fstl (%ecx)
@@ -367,7 +367,7 @@ fyl2xp1
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
-# CHECK-NEXT: - - 48.33 87.33 17.00 54.33 34.00 34.00
+# CHECK-NEXT: - 24.00 48.33 87.33 17.00 54.33 34.00 34.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
@@ -472,7 +472,7 @@ fyl2xp1
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - fscale
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - fsin
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - fsincos
-# CHECK-NEXT: - - 1.00 - - - - - fsqrt
+# CHECK-NEXT: - 24.00 1.00 - - - - - fsqrt
# CHECK-NEXT: - - - - - 1.00 - - fst %st(0)
# CHECK-NEXT: - - - - 1.00 - 1.00 1.00 fsts (%edx)
# CHECK-NEXT: - - - - 1.00 - 1.00 1.00 fstl (%ecx)
Modified: llvm/trunk/test/tools/llvm-mca/X86/SkylakeClient/resources-x87.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/SkylakeClient/resources-x87.s?rev=331629&r1=331628&r2=331629&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/SkylakeClient/resources-x87.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/SkylakeClient/resources-x87.s Mon May 7 04:50:44 2018
@@ -307,7 +307,7 @@ fyl2xp1
# CHECK-NEXT: 1 100 0.25 * fscale
# CHECK-NEXT: 1 100 0.25 * fsin
# CHECK-NEXT: 1 100 0.25 * fsincos
-# CHECK-NEXT: 1 15 1.00 * fsqrt
+# CHECK-NEXT: 1 21 7.00 * fsqrt
# CHECK-NEXT: 1 1 0.25 * fst %st(0)
# CHECK-NEXT: 1 1 1.00 * * fsts (%edx)
# CHECK-NEXT: 1 1 1.00 * * fstl (%ecx)
@@ -369,7 +369,7 @@ fyl2xp1
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
-# CHECK-NEXT: - - 121.50 53.50 49.00 49.00 27.00 144.50 70.50 9.00
+# CHECK-NEXT: - 7.00 121.50 53.50 49.00 49.00 27.00 144.50 70.50 9.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
@@ -474,7 +474,7 @@ fyl2xp1
# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - fscale
# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - fsin
# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - fsincos
-# CHECK-NEXT: - - 1.00 - - - - - - - fsqrt
+# CHECK-NEXT: - 7.00 1.00 - - - - - - - fsqrt
# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - fst %st(0)
# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 fsts (%edx)
# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 fstl (%ecx)
Modified: llvm/trunk/test/tools/llvm-mca/X86/SkylakeServer/resources-x87.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/SkylakeServer/resources-x87.s?rev=331629&r1=331628&r2=331629&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/SkylakeServer/resources-x87.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/SkylakeServer/resources-x87.s Mon May 7 04:50:44 2018
@@ -307,7 +307,7 @@ fyl2xp1
# CHECK-NEXT: 1 100 0.25 * fscale
# CHECK-NEXT: 1 100 0.25 * fsin
# CHECK-NEXT: 1 100 0.25 * fsincos
-# CHECK-NEXT: 1 15 1.00 * fsqrt
+# CHECK-NEXT: 1 21 7.00 * fsqrt
# CHECK-NEXT: 1 1 0.25 * fst %st(0)
# CHECK-NEXT: 1 1 1.00 * * fsts (%edx)
# CHECK-NEXT: 1 1 1.00 * * fstl (%ecx)
@@ -369,7 +369,7 @@ fyl2xp1
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
-# CHECK-NEXT: - - 121.50 53.50 49.00 49.00 27.00 144.50 70.50 9.00
+# CHECK-NEXT: - 7.00 121.50 53.50 49.00 49.00 27.00 144.50 70.50 9.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
@@ -474,7 +474,7 @@ fyl2xp1
# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - fscale
# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - fsin
# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - fsincos
-# CHECK-NEXT: - - 1.00 - - - - - - - fsqrt
+# CHECK-NEXT: - 7.00 1.00 - - - - - - - fsqrt
# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - fst %st(0)
# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 fsts (%edx)
# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 fstl (%ecx)
Modified: llvm/trunk/test/tools/llvm-mca/X86/Znver1/resources-avx1.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/Znver1/resources-avx1.s?rev=331629&r1=331628&r2=331629&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/Znver1/resources-avx1.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/Znver1/resources-avx1.s Mon May 7 04:50:44 2018
@@ -1638,18 +1638,18 @@ vzeroupper
# CHECK-NEXT: 1 8 0.50 * vshufps $1, (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vshufps $1, %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 8 0.50 * vshufps $1, (%rax), %ymm1, %ymm2
-# CHECK-NEXT: 1 20 1.00 vsqrtpd %xmm0, %xmm2
-# CHECK-NEXT: 1 27 1.00 * vsqrtpd (%rax), %xmm2
+# CHECK-NEXT: 1 20 20.00 vsqrtpd %xmm0, %xmm2
+# CHECK-NEXT: 1 27 20.00 * vsqrtpd (%rax), %xmm2
# CHECK-NEXT: 1 40 40.00 vsqrtpd %ymm0, %ymm2
-# CHECK-NEXT: 2 47 47.00 * vsqrtpd (%rax), %ymm2
-# CHECK-NEXT: 1 20 1.00 vsqrtps %xmm0, %xmm2
-# CHECK-NEXT: 1 27 1.00 * vsqrtps (%rax), %xmm2
+# CHECK-NEXT: 2 47 40.00 * vsqrtpd (%rax), %ymm2
+# CHECK-NEXT: 1 20 20.00 vsqrtps %xmm0, %xmm2
+# CHECK-NEXT: 1 27 20.00 * vsqrtps (%rax), %xmm2
# CHECK-NEXT: 1 28 28.00 vsqrtps %ymm0, %ymm2
-# CHECK-NEXT: 2 35 35.00 * vsqrtps (%rax), %ymm2
-# CHECK-NEXT: 1 20 1.00 vsqrtsd %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: 1 27 1.00 * vsqrtsd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 20 1.00 vsqrtss %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: 1 27 1.00 * vsqrtss (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 2 35 28.00 * vsqrtps (%rax), %ymm2
+# CHECK-NEXT: 1 20 20.00 vsqrtsd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 27 20.00 * vsqrtsd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 20 20.00 vsqrtss %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 27 20.00 * vsqrtss (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 100 - * * * vstmxcsr (%rax)
# CHECK-NEXT: 1 3 1.00 vsubpd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 10 1.00 * vsubpd (%rax), %xmm1, %xmm2
@@ -1718,7 +1718,7 @@ vzeroupper
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11]
-# CHECK-NEXT: 168.00 168.00 - - - - - 194.25 142.75 168.25 366.75 -
+# CHECK-NEXT: 168.00 168.00 - - - - - 194.25 142.75 168.25 504.75 -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions:
@@ -2342,18 +2342,18 @@ vzeroupper
# CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - vshufps $1, (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - vshufps $1, %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - vshufps $1, (%rax), %ymm1, %ymm2
-# CHECK-NEXT: - - - - - - - - - - 1.00 - vsqrtpd %xmm0, %xmm2
-# CHECK-NEXT: 0.50 0.50 - - - - - - - - 1.00 - vsqrtpd (%rax), %xmm2
+# CHECK-NEXT: - - - - - - - - - - 20.00 - vsqrtpd %xmm0, %xmm2
+# CHECK-NEXT: 0.50 0.50 - - - - - - - - 20.00 - vsqrtpd (%rax), %xmm2
# CHECK-NEXT: - - - - - - - - - - 40.00 - vsqrtpd %ymm0, %ymm2
-# CHECK-NEXT: 0.50 0.50 - - - - - - - - 47.00 - vsqrtpd (%rax), %ymm2
-# CHECK-NEXT: - - - - - - - - - - 1.00 - vsqrtps %xmm0, %xmm2
-# CHECK-NEXT: 0.50 0.50 - - - - - - - - 1.00 - vsqrtps (%rax), %xmm2
+# CHECK-NEXT: 0.50 0.50 - - - - - - - - 40.00 - vsqrtpd (%rax), %ymm2
+# CHECK-NEXT: - - - - - - - - - - 20.00 - vsqrtps %xmm0, %xmm2
+# CHECK-NEXT: 0.50 0.50 - - - - - - - - 20.00 - vsqrtps (%rax), %xmm2
# CHECK-NEXT: - - - - - - - - - - 28.00 - vsqrtps %ymm0, %ymm2
-# CHECK-NEXT: 0.50 0.50 - - - - - - - - 35.00 - vsqrtps (%rax), %ymm2
-# CHECK-NEXT: - - - - - - - - - - 1.00 - vsqrtsd %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: 0.50 0.50 - - - - - - - - 1.00 - vsqrtsd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - - - - - - - - - 1.00 - vsqrtss %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: 0.50 0.50 - - - - - - - - 1.00 - vsqrtss (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 0.50 0.50 - - - - - - - - 28.00 - vsqrtps (%rax), %ymm2
+# CHECK-NEXT: - - - - - - - - - - 20.00 - vsqrtsd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 0.50 0.50 - - - - - - - - 20.00 - vsqrtsd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - - - - - - - - 20.00 - vsqrtss %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 0.50 0.50 - - - - - - - - 20.00 - vsqrtss (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - - - - - - - - - - - vstmxcsr (%rax)
# CHECK-NEXT: - - - - - - - 1.00 - - - - vsubpd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - - - - vsubpd (%rax), %xmm1, %xmm2
Modified: llvm/trunk/test/tools/llvm-mca/X86/Znver1/resources-sse1.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/Znver1/resources-sse1.s?rev=331629&r1=331628&r2=331629&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/Znver1/resources-sse1.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/Znver1/resources-sse1.s Mon May 7 04:50:44 2018
@@ -301,10 +301,10 @@ xorps (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 * * * sfence
# CHECK-NEXT: 1 1 0.50 shufps $1, %xmm0, %xmm2
# CHECK-NEXT: 1 8 0.50 * shufps $1, (%rax), %xmm2
-# CHECK-NEXT: 1 20 1.00 sqrtps %xmm0, %xmm2
-# CHECK-NEXT: 1 27 1.00 * sqrtps (%rax), %xmm2
-# CHECK-NEXT: 1 20 1.00 sqrtss %xmm0, %xmm2
-# CHECK-NEXT: 1 27 1.00 * sqrtss (%rax), %xmm2
+# CHECK-NEXT: 1 20 20.00 sqrtps %xmm0, %xmm2
+# CHECK-NEXT: 1 27 20.00 * sqrtps (%rax), %xmm2
+# CHECK-NEXT: 1 20 20.00 sqrtss %xmm0, %xmm2
+# CHECK-NEXT: 1 27 20.00 * sqrtss (%rax), %xmm2
# CHECK-NEXT: 1 100 - * * * stmxcsr (%rax)
# CHECK-NEXT: 1 3 1.00 subps %xmm0, %xmm2
# CHECK-NEXT: 1 10 1.00 * subps (%rax), %xmm2
@@ -335,7 +335,7 @@ xorps (%rax), %xmm2
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11]
-# CHECK-NEXT: 32.50 32.50 - - - - - 41.00 21.50 22.00 32.50 -
+# CHECK-NEXT: 32.50 32.50 - - - - - 41.00 21.50 22.00 108.50 -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions:
@@ -446,10 +446,10 @@ xorps (%rax), %xmm2
# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - sfence
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - shufps $1, %xmm0, %xmm2
# CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - shufps $1, (%rax), %xmm2
-# CHECK-NEXT: - - - - - - - - - - 1.00 - sqrtps %xmm0, %xmm2
-# CHECK-NEXT: 0.50 0.50 - - - - - - - - 1.00 - sqrtps (%rax), %xmm2
-# CHECK-NEXT: - - - - - - - - - - 1.00 - sqrtss %xmm0, %xmm2
-# CHECK-NEXT: 0.50 0.50 - - - - - - - - 1.00 - sqrtss (%rax), %xmm2
+# CHECK-NEXT: - - - - - - - - - - 20.00 - sqrtps %xmm0, %xmm2
+# CHECK-NEXT: 0.50 0.50 - - - - - - - - 20.00 - sqrtps (%rax), %xmm2
+# CHECK-NEXT: - - - - - - - - - - 20.00 - sqrtss %xmm0, %xmm2
+# CHECK-NEXT: 0.50 0.50 - - - - - - - - 20.00 - sqrtss (%rax), %xmm2
# CHECK-NEXT: - - - - - - - - - - - - stmxcsr (%rax)
# CHECK-NEXT: - - - - - - - 1.00 - - - - subps %xmm0, %xmm2
# CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - - - - subps (%rax), %xmm2
Modified: llvm/trunk/test/tools/llvm-mca/X86/Znver1/resources-sse2.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/Znver1/resources-sse2.s?rev=331629&r1=331628&r2=331629&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/Znver1/resources-sse2.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/Znver1/resources-sse2.s Mon May 7 04:50:44 2018
@@ -650,10 +650,10 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: 1 8 0.50 * pxor (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 shufpd $1, %xmm0, %xmm2
# CHECK-NEXT: 1 8 0.50 * shufpd $1, (%rax), %xmm2
-# CHECK-NEXT: 1 20 1.00 sqrtpd %xmm0, %xmm2
-# CHECK-NEXT: 1 27 1.00 * sqrtpd (%rax), %xmm2
-# CHECK-NEXT: 1 20 1.00 sqrtsd %xmm0, %xmm2
-# CHECK-NEXT: 1 27 1.00 * sqrtsd (%rax), %xmm2
+# CHECK-NEXT: 1 20 20.00 sqrtpd %xmm0, %xmm2
+# CHECK-NEXT: 1 27 20.00 * sqrtpd (%rax), %xmm2
+# CHECK-NEXT: 1 20 20.00 sqrtsd %xmm0, %xmm2
+# CHECK-NEXT: 1 27 20.00 * sqrtsd (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 subpd %xmm0, %xmm2
# CHECK-NEXT: 1 10 1.00 * subpd (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 subsd %xmm0, %xmm2
@@ -683,7 +683,7 @@ xorpd (%rax), %xmm2
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11]
-# CHECK-NEXT: 65.00 65.00 0.50 0.50 0.50 0.50 - 74.08 39.58 69.25 77.08 -
+# CHECK-NEXT: 65.00 65.00 0.50 0.50 0.50 0.50 - 74.08 39.58 69.25 153.08 -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions:
@@ -935,10 +935,10 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: 0.50 0.50 - - - - - 0.25 0.25 0.25 0.25 - pxor (%rax), %xmm2
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - shufpd $1, %xmm0, %xmm2
# CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - shufpd $1, (%rax), %xmm2
-# CHECK-NEXT: - - - - - - - - - - 1.00 - sqrtpd %xmm0, %xmm2
-# CHECK-NEXT: 0.50 0.50 - - - - - - - - 1.00 - sqrtpd (%rax), %xmm2
-# CHECK-NEXT: - - - - - - - - - - 1.00 - sqrtsd %xmm0, %xmm2
-# CHECK-NEXT: 0.50 0.50 - - - - - - - - 1.00 - sqrtsd (%rax), %xmm2
+# CHECK-NEXT: - - - - - - - - - - 20.00 - sqrtpd %xmm0, %xmm2
+# CHECK-NEXT: 0.50 0.50 - - - - - - - - 20.00 - sqrtpd (%rax), %xmm2
+# CHECK-NEXT: - - - - - - - - - - 20.00 - sqrtsd %xmm0, %xmm2
+# CHECK-NEXT: 0.50 0.50 - - - - - - - - 20.00 - sqrtsd (%rax), %xmm2
# CHECK-NEXT: - - - - - - - 1.00 - - - - subpd %xmm0, %xmm2
# CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - - - - subpd (%rax), %xmm2
# CHECK-NEXT: - - - - - - - 1.00 - - - - subsd %xmm0, %xmm2
Modified: llvm/trunk/test/tools/llvm-mca/X86/Znver1/resources-x87.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/Znver1/resources-x87.s?rev=331629&r1=331628&r2=331629&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/Znver1/resources-x87.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/Znver1/resources-x87.s Mon May 7 04:50:44 2018
@@ -307,7 +307,7 @@ fyl2xp1
# CHECK-NEXT: 1 100 - * fscale
# CHECK-NEXT: 1 100 - * fsin
# CHECK-NEXT: 1 100 - * fsincos
-# CHECK-NEXT: 1 20 1.00 * fsqrt
+# CHECK-NEXT: 1 20 20.00 * fsqrt
# CHECK-NEXT: 2 5 0.50 * fst %st(0)
# CHECK-NEXT: 1 1 0.50 * * fsts (%edx)
# CHECK-NEXT: 1 1 0.50 * * fstl (%ecx)
@@ -371,7 +371,7 @@ fyl2xp1
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11]
-# CHECK-NEXT: 32.50 32.50 - - - - - 58.50 2.00 8.00 45.50 -
+# CHECK-NEXT: 32.50 32.50 - - - - - 58.50 2.00 8.00 64.50 -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions:
@@ -476,7 +476,7 @@ fyl2xp1
# CHECK-NEXT: - - - - - - - - - - - - fscale
# CHECK-NEXT: - - - - - - - - - - - - fsin
# CHECK-NEXT: - - - - - - - - - - - - fsincos
-# CHECK-NEXT: - - - - - - - - - - 1.00 - fsqrt
+# CHECK-NEXT: - - - - - - - - - - 20.00 - fsqrt
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fst %st(0)
# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - fsts (%edx)
# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - fstl (%ecx)
More information about the llvm-commits
mailing list