[llvm] r331515 - [X86] Add SchedWriteFRnd fp rounding scheduler classes
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Fri May 4 05:59:24 PDT 2018
Author: rksimon
Date: Fri May 4 05:59:24 2018
New Revision: 331515
URL: http://llvm.org/viewvc/llvm-project?rev=331515&view=rev
Log:
[X86] Add SchedWriteFRnd fp rounding scheduler classes
Split off from SchedWriteFAdd for fp rounding/bit-manipulation instructions.
Fixes an issue on btver2 which only had the ymm version using the JSTC pipe instead of JFPA.
Modified:
llvm/trunk/lib/Target/X86/X86InstrAVX512.td
llvm/trunk/lib/Target/X86/X86InstrSSE.td
llvm/trunk/lib/Target/X86/X86InstrXOP.td
llvm/trunk/lib/Target/X86/X86SchedBroadwell.td
llvm/trunk/lib/Target/X86/X86SchedHaswell.td
llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td
llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td
llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td
llvm/trunk/lib/Target/X86/X86Schedule.td
llvm/trunk/lib/Target/X86/X86ScheduleAtom.td
llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td
llvm/trunk/lib/Target/X86/X86ScheduleSLM.td
llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td
llvm/trunk/test/tools/llvm-mca/X86/BtVer2/resources-avx1.s
llvm/trunk/test/tools/llvm-mca/X86/BtVer2/resources-sse41.s
Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=331515&r1=331514&r2=331515&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Fri May 4 05:59:24 2018
@@ -7990,7 +7990,7 @@ let Predicates = [HasERI] in {
}
defm VGETEXP : avx512_eri_s<0x43, "vgetexp", X86fgetexpRnds,
- SchedWriteFAdd.Scl>, T8PD, EVEX_4V;
+ SchedWriteFRnd.Scl>, T8PD, EVEX_4V;
/// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd
multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
@@ -8057,9 +8057,9 @@ let Predicates = [HasERI] in {
defm VRCP28 : avx512_eri<0xCA, "vrcp28", X86rcp28, SchedWriteFRcp>, EVEX;
defm VEXP2 : avx512_eri<0xC8, "vexp2", X86exp2, SchedWriteFAdd>, EVEX;
}
-defm VGETEXP : avx512_eri<0x42, "vgetexp", X86fgetexpRnd, SchedWriteFAdd>,
+defm VGETEXP : avx512_eri<0x42, "vgetexp", X86fgetexpRnd, SchedWriteFRnd>,
avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexpRnd,
- SchedWriteFAdd>, EVEX;
+ SchedWriteFRnd>, EVEX;
multiclass avx512_sqrt_packed_round<bits<8> opc, string OpcodeStr,
X86FoldableSchedWrite sched, X86VectorVTInfo _>{
@@ -8274,12 +8274,12 @@ multiclass avx512_rndscale_scalar<bits<8
}
defm VRNDSCALESS : avx512_rndscale_scalar<0x0A, "vrndscaless",
- SchedWriteFAdd.Scl, f32x_info>,
+ SchedWriteFRnd.Scl, f32x_info>,
AVX512AIi8Base, EVEX_4V,
EVEX_CD8<32, CD8VT1>;
defm VRNDSCALESD : avx512_rndscale_scalar<0x0B, "vrndscalesd",
- SchedWriteFAdd.Scl, f64x_info>,
+ SchedWriteFRnd.Scl, f64x_info>,
VEX_W, AVX512AIi8Base, EVEX_4V,
EVEX_CD8<64, CD8VT1>;
@@ -9381,13 +9381,13 @@ multiclass avx512_common_unary_fp_sae_pa
}
defm VREDUCE : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56,
- X86VReduce, X86VReduceRnd, SchedWriteFAdd, HasDQI>,
+ X86VReduce, X86VReduceRnd, SchedWriteFRnd, HasDQI>,
AVX512AIi8Base, EVEX;
defm VRNDSCALE : avx512_common_unary_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09,
- X86VRndScale, X86VRndScaleRnd, SchedWriteFAdd, HasAVX512>,
+ X86VRndScale, X86VRndScaleRnd, SchedWriteFRnd, HasAVX512>,
AVX512AIi8Base, EVEX;
defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26,
- X86VGetMant, X86VGetMantRnd, SchedWriteFAdd, HasAVX512>,
+ X86VGetMant, X86VGetMantRnd, SchedWriteFRnd, HasAVX512>,
AVX512AIi8Base, EVEX;
defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info,
@@ -9407,17 +9407,17 @@ defm VRANGESS: avx512_common_fp_sae_scal
AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
defm VREDUCESD: avx512_common_fp_sae_scalar_imm<"vreducesd", f64x_info,
- 0x57, X86Reduces, X86ReducesRnd, SchedWriteFAdd, HasDQI>,
+ 0x57, X86Reduces, X86ReducesRnd, SchedWriteFRnd, HasDQI>,
AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
defm VREDUCESS: avx512_common_fp_sae_scalar_imm<"vreducess", f32x_info,
- 0x57, X86Reduces, X86ReducesRnd, SchedWriteFAdd, HasDQI>,
+ 0x57, X86Reduces, X86ReducesRnd, SchedWriteFRnd, HasDQI>,
AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
defm VGETMANTSD: avx512_common_fp_sae_scalar_imm<"vgetmantsd", f64x_info,
- 0x27, X86GetMants, X86GetMantsRnd, SchedWriteFAdd, HasAVX512>,
+ 0x27, X86GetMants, X86GetMantsRnd, SchedWriteFRnd, HasAVX512>,
AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
defm VGETMANTSS: avx512_common_fp_sae_scalar_imm<"vgetmantss", f32x_info,
- 0x27, X86GetMants, X86GetMantsRnd, SchedWriteFAdd, HasAVX512>,
+ 0x27, X86GetMants, X86GetMantsRnd, SchedWriteFRnd, HasAVX512>,
AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
let Predicates = [HasAVX512] in {
Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=331515&r1=331514&r2=331515&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Fri May 4 05:59:24 2018
@@ -5529,27 +5529,27 @@ let Predicates = [HasAVX, NoVLX] in {
let ExeDomain = SSEPackedSingle in {
// Intrinsic form
defm VROUNDPS : sse41_fp_unop_p<0x08, "vroundps", f128mem, VR128, v4f32,
- loadv4f32, X86VRndScale, SchedWriteFAdd.XMM>,
+ loadv4f32, X86VRndScale, SchedWriteFRnd.XMM>,
VEX, VEX_WIG;
defm VROUNDPSY : sse41_fp_unop_p<0x08, "vroundps", f256mem, VR256, v8f32,
- loadv8f32, X86VRndScale, SchedWriteFAdd.YMM>,
+ loadv8f32, X86VRndScale, SchedWriteFRnd.YMM>,
VEX, VEX_L, VEX_WIG;
}
let ExeDomain = SSEPackedDouble in {
defm VROUNDPD : sse41_fp_unop_p<0x09, "vroundpd", f128mem, VR128, v2f64,
- loadv2f64, X86VRndScale, SchedWriteFAdd.XMM>,
+ loadv2f64, X86VRndScale, SchedWriteFRnd.XMM>,
VEX, VEX_WIG;
defm VROUNDPDY : sse41_fp_unop_p<0x09, "vroundpd", f256mem, VR256, v4f64,
- loadv4f64, X86VRndScale, SchedWriteFAdd.YMM>,
+ loadv4f64, X86VRndScale, SchedWriteFRnd.YMM>,
VEX, VEX_L, VEX_WIG;
}
}
let Predicates = [HasAVX, NoAVX512] in {
- defm VROUND : sse41_fp_binop_s<0x0A, 0x0B, "vround", SchedWriteFAdd.Scl,
+ defm VROUND : sse41_fp_binop_s<0x0A, 0x0B, "vround", SchedWriteFRnd.Scl,
v4f32, v2f64, X86RndScales, 0>,
VEX_4V, VEX_LIG, VEX_WIG;
- defm VROUND : avx_fp_unop_rm<0x0A, 0x0B, "vround", SchedWriteFAdd.Scl>,
+ defm VROUND : avx_fp_unop_rm<0x0A, 0x0B, "vround", SchedWriteFRnd.Scl>,
VEX_4V, VEX_LIG, VEX_WIG;
}
@@ -5624,15 +5624,15 @@ let Predicates = [HasAVX, NoVLX] in {
let ExeDomain = SSEPackedSingle in
defm ROUNDPS : sse41_fp_unop_p<0x08, "roundps", f128mem, VR128, v4f32,
- memopv4f32, X86VRndScale, SchedWriteFAdd.XMM>;
+ memopv4f32, X86VRndScale, SchedWriteFRnd.XMM>;
let ExeDomain = SSEPackedDouble in
defm ROUNDPD : sse41_fp_unop_p<0x09, "roundpd", f128mem, VR128, v2f64,
- memopv2f64, X86VRndScale, SchedWriteFAdd.XMM>;
+ memopv2f64, X86VRndScale, SchedWriteFRnd.XMM>;
-defm ROUND : sse41_fp_unop_s<0x0A, 0x0B, "round", SchedWriteFAdd.Scl>;
+defm ROUND : sse41_fp_unop_s<0x0A, 0x0B, "round", SchedWriteFRnd.Scl>;
let Constraints = "$src1 = $dst" in
-defm ROUND : sse41_fp_binop_s<0x0A, 0x0B, "round", SchedWriteFAdd.Scl,
+defm ROUND : sse41_fp_binop_s<0x0A, 0x0B, "round", SchedWriteFRnd.Scl,
v4f32, v2f64, X86RndScales>;
let Predicates = [UseSSE41] in {
Modified: llvm/trunk/lib/Target/X86/X86InstrXOP.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrXOP.td?rev=331515&r1=331514&r2=331515&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrXOP.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrXOP.td Fri May 4 05:59:24 2018
@@ -76,20 +76,20 @@ multiclass xop2op256<bits<8> opc, string
let ExeDomain = SSEPackedSingle in {
defm VFRCZSS : xop2opsld<0x82, "vfrczss", int_x86_xop_vfrcz_ss,
- ssmem, sse_load_f32, SchedWriteFAdd.XMM>;
+ ssmem, sse_load_f32, SchedWriteFRnd.Scl>;
defm VFRCZPS : xop2op128<0x80, "vfrczps", int_x86_xop_vfrcz_ps, loadv4f32,
- SchedWriteFAdd.XMM>;
+ SchedWriteFRnd.XMM>;
defm VFRCZPS : xop2op256<0x80, "vfrczps", int_x86_xop_vfrcz_ps_256, loadv8f32,
- SchedWriteFAdd.YMM>;
+ SchedWriteFRnd.YMM>;
}
let ExeDomain = SSEPackedDouble in {
defm VFRCZSD : xop2opsld<0x83, "vfrczsd", int_x86_xop_vfrcz_sd,
- sdmem, sse_load_f64, SchedWriteFAdd.XMM>;
+ sdmem, sse_load_f64, SchedWriteFRnd.Scl>;
defm VFRCZPD : xop2op128<0x81, "vfrczpd", int_x86_xop_vfrcz_pd, loadv2f64,
- SchedWriteFAdd.XMM>;
+ SchedWriteFRnd.XMM>;
defm VFRCZPD : xop2op256<0x81, "vfrczpd", int_x86_xop_vfrcz_pd_256, loadv4f64,
- SchedWriteFAdd.YMM>;
+ SchedWriteFRnd.YMM>;
}
multiclass xop3op<bits<8> opc, string OpcodeStr, SDNode OpNode,
Modified: llvm/trunk/lib/Target/X86/X86SchedBroadwell.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedBroadwell.td?rev=331515&r1=331514&r2=331515&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedBroadwell.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedBroadwell.td Fri May 4 05:59:24 2018
@@ -176,7 +176,11 @@ defm : BWWriteResPair<WriteFMAY, [BWPo
defm : BWWriteResPair<WriteDPPD, [BWPort0,BWPort1,BWPort5], 9, [1,1,1], 3, 5>; // Floating point double dot product.
defm : BWWriteResPair<WriteDPPS, [BWPort0,BWPort1,BWPort5], 14, [2,1,1], 4, 5>; // Floating point single dot product.
defm : BWWriteResPair<WriteDPPSY, [BWPort0,BWPort1,BWPort5], 14, [2,1,1], 4, 6>; // Floating point single dot product (YMM).
-defm : BWWriteResPair<WriteFSign, [BWPort5], 1>; // Floating point fabs/fchs.
+defm : BWWriteResPair<WriteFSign, [BWPort5], 1>; // Floating point fabs/fchs.
+defm : X86WriteRes<WriteFRnd, [BWPort23], 6, [1], 1>; // Floating point rounding.
+defm : X86WriteRes<WriteFRndY, [BWPort23], 6, [1], 1>; // Floating point rounding (YMM/ZMM).
+defm : X86WriteRes<WriteFRndLd, [BWPort1,BWPort23], 11, [2,1], 3>;
+defm : X86WriteRes<WriteFRndYLd, [BWPort1,BWPort23], 12, [2,1], 3>;
defm : BWWriteResPair<WriteFLogic, [BWPort5], 1, [1], 1, 5>; // Floating point and/or/xor logicals.
defm : BWWriteResPair<WriteFLogicY, [BWPort5], 1, [1], 1, 6>; // Floating point and/or/xor logicals (YMM/ZMM).
defm : BWWriteResPair<WriteFShuffle, [BWPort5], 1, [1], 1, 5>; // Floating point vector shuffles.
@@ -926,11 +930,7 @@ def: InstRW<[BWWriteResGroup58], (instre
"VMOVUPDYrm",
"VMOVUPSYrm",
"VPBROADCASTDYrm",
- "VPBROADCASTQYrm",
- "(V?)ROUNDPD(Y?)r",
- "(V?)ROUNDPS(Y?)r",
- "(V?)ROUNDSDr",
- "(V?)ROUNDSSr")>;
+ "VPBROADCASTQYrm")>;
def BWWriteResGroup59 : SchedWriteRes<[BWPort0,BWPort23]> {
let Latency = 6;
@@ -1405,16 +1405,6 @@ def BWWriteResGroup126 : SchedWriteRes<[
def: InstRW<[BWWriteResGroup126], (instregex "VRCPPSYr",
"VRSQRTPSYr")>;
-def BWWriteResGroup127 : SchedWriteRes<[BWPort1,BWPort23]> {
- let Latency = 11;
- let NumMicroOps = 3;
- let ResourceCycles = [2,1];
-}
-def: InstRW<[BWWriteResGroup127], (instregex "(V?)ROUNDPDm",
- "(V?)ROUNDPSm",
- "(V?)ROUNDSDm",
- "(V?)ROUNDSSm")>;
-
def BWWriteResGroup128 : SchedWriteRes<[BWPort1,BWPort5,BWPort23]> {
let Latency = 11;
let NumMicroOps = 3;
@@ -1458,9 +1448,7 @@ def BWWriteResGroup135 : SchedWriteRes<[
let NumMicroOps = 3;
let ResourceCycles = [2,1];
}
-def: InstRW<[BWWriteResGroup135], (instregex "(ADD|SUB|SUBR)_FI(16|32)m",
- "VROUNDPDYm",
- "VROUNDPSYm")>;
+def: InstRW<[BWWriteResGroup135], (instregex "(ADD|SUB|SUBR)_FI(16|32)m")>;
def BWWriteResGroup137 : SchedWriteRes<[BWPort0,BWFPDivider]> {
let Latency = 11;
Modified: llvm/trunk/lib/Target/X86/X86SchedHaswell.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedHaswell.td?rev=331515&r1=331514&r2=331515&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedHaswell.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedHaswell.td Fri May 4 05:59:24 2018
@@ -173,6 +173,10 @@ defm : HWWriteResPair<WriteDPPD, [HWPor
defm : HWWriteResPair<WriteDPPS, [HWPort0,HWPort1,HWPort5], 14, [2,1,1], 4, 6>;
defm : HWWriteResPair<WriteDPPSY, [HWPort0,HWPort1,HWPort5], 14, [2,1,1], 4, 7>;
defm : HWWriteResPair<WriteFSign, [HWPort0], 1>;
+defm : X86WriteRes<WriteFRnd, [HWPort23], 6, [1], 1>;
+defm : X86WriteRes<WriteFRndY, [HWPort23], 6, [1], 1>;
+defm : X86WriteRes<WriteFRndLd, [HWPort1,HWPort23], 12, [2,1], 3>;
+defm : X86WriteRes<WriteFRndYLd, [HWPort1,HWPort23], 13, [2,1], 3>;
defm : HWWriteResPair<WriteFLogic, [HWPort5], 1, [1], 1, 6>;
defm : HWWriteResPair<WriteFLogicY, [HWPort5], 1, [1], 1, 7>;
defm : HWWriteResPair<WriteFShuffle, [HWPort5], 1, [1], 1, 6>;
@@ -645,11 +649,7 @@ def: InstRW<[HWWriteResGroup0], (instreg
"(V?)MOVUPDrm",
"(V?)MOVUPSrm",
"VPBROADCASTDrm",
- "VPBROADCASTQrm",
- "(V?)ROUNDPD(Y?)r",
- "(V?)ROUNDPS(Y?)r",
- "(V?)ROUNDSDr",
- "(V?)ROUNDSSr")>;
+ "VPBROADCASTQrm")>;
def HWWriteResGroup0_1 : SchedWriteRes<[HWPort23]> {
let Latency = 7;
@@ -1760,19 +1760,7 @@ def HWWriteResGroup103 : SchedWriteRes<[
let NumMicroOps = 3;
let ResourceCycles = [2,1];
}
-def: InstRW<[HWWriteResGroup103], (instregex "(ADD|SUB|SUBR)_FI(16|32)m",
- "VROUNDPDYm",
- "VROUNDPSYm")>;
-
-def HWWriteResGroup103_1 : SchedWriteRes<[HWPort1,HWPort23]> {
- let Latency = 12;
- let NumMicroOps = 3;
- let ResourceCycles = [2,1];
-}
-def: InstRW<[HWWriteResGroup103_1], (instregex "(V?)ROUNDPDm",
- "(V?)ROUNDPSm",
- "(V?)ROUNDSDm",
- "(V?)ROUNDSSm")>;
+def: InstRW<[HWWriteResGroup103], (instregex "(ADD|SUB|SUBR)_FI(16|32)m")>;
def HWWriteResGroup104 : SchedWriteRes<[HWPort1,HWPort5,HWPort23]> {
let Latency = 12;
Modified: llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td?rev=331515&r1=331514&r2=331515&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td Fri May 4 05:59:24 2018
@@ -160,6 +160,8 @@ defm : SBWriteResPair<WriteCvtF2I, [SBPo
defm : SBWriteResPair<WriteCvtI2F, [SBPort1], 4>;
defm : SBWriteResPair<WriteCvtF2F, [SBPort1], 3>;
defm : SBWriteResPair<WriteFSign, [SBPort5], 1>;
+defm : SBWriteResPair<WriteFRnd, [SBPort1], 3, [1], 1, 6>;
+defm : SBWriteResPair<WriteFRndY, [SBPort1], 3, [1], 1, 7>;
defm : SBWriteResPair<WriteFLogic, [SBPort5], 1, [1], 1, 6>;
defm : SBWriteResPair<WriteFLogicY, [SBPort5], 1, [1], 1, 7>;
defm : SBWriteResPair<WriteFShuffle, [SBPort5], 1, [1], 1, 6>;
@@ -1157,11 +1159,7 @@ def SBWriteResGroup90 : SchedWriteRes<[S
def: InstRW<[SBWriteResGroup90], (instregex "MMX_CVTPS2PIirm",
"MMX_CVTTPS2PIirm",
"(V?)CVTPS2DQrm",
- "(V?)CVTTPS2DQrm",
- "(V?)ROUNDPDm",
- "(V?)ROUNDPSm",
- "(V?)ROUNDSDm",
- "(V?)ROUNDSSm")>;
+ "(V?)CVTTPS2DQrm")>;
def SBWriteResGroup91 : SchedWriteRes<[SBPort23,SBPort05]> {
let Latency = 9;
Modified: llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td?rev=331515&r1=331514&r2=331515&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td Fri May 4 05:59:24 2018
@@ -173,6 +173,8 @@ defm : SKLWriteResPair<WriteDPPD, [SKL
defm : SKLWriteResPair<WriteDPPS, [SKLPort5,SKLPort01], 13, [1,3], 4, 6>; // Floating point single dot product.
defm : SKLWriteResPair<WriteDPPSY, [SKLPort5,SKLPort01], 13, [1,3], 4, 7>; // Floating point single dot product (YMM).
defm : SKLWriteResPair<WriteFSign, [SKLPort0], 1>; // Floating point fabs/fchs.
+defm : SKLWriteResPair<WriteFRnd, [SKLPort01], 8, [2], 2, 6>; // Floating point rounding.
+defm : SKLWriteResPair<WriteFRndY, [SKLPort01], 8, [2], 2, 7>; // Floating point rounding (YMM/ZMM).
defm : SKLWriteResPair<WriteFLogic, [SKLPort015], 1, [1], 1, 6>; // Floating point and/or/xor logicals.
defm : SKLWriteResPair<WriteFLogicY, [SKLPort015], 1, [1], 1, 7>; // Floating point and/or/xor logicals (YMM/ZMM).
defm : SKLWriteResPair<WriteFShuffle, [SKLPort5], 1, [1], 1, 6>; // Floating point vector shuffles.
@@ -1335,16 +1337,6 @@ def SKLWriteResGroup103 : SchedWriteRes<
}
def: InstRW<[SKLWriteResGroup103], (instrs LOOP)>;
-def SKLWriteResGroup105 : SchedWriteRes<[SKLPort01]> {
- let Latency = 8;
- let NumMicroOps = 2;
- let ResourceCycles = [2];
-}
-def: InstRW<[SKLWriteResGroup105], (instregex "(V?)ROUNDPD(Y?)r",
- "(V?)ROUNDPS(Y?)r",
- "(V?)ROUNDSDr",
- "(V?)ROUNDSSr")>;
-
def SKLWriteResGroup106 : SchedWriteRes<[SKLPort0,SKLPort23]> {
let Latency = 8;
let NumMicroOps = 2;
@@ -1796,16 +1788,6 @@ def SKLWriteResGroup166_1 : SchedWriteRe
}
def: InstRW<[SKLWriteResGroup166_1], (instregex "VDIVPDYrr")>;
-def SKLWriteResGroup168 : SchedWriteRes<[SKLPort23,SKLPort01]> {
- let Latency = 14;
- let NumMicroOps = 3;
- let ResourceCycles = [1,2];
-}
-def: InstRW<[SKLWriteResGroup168], (instregex "(V?)ROUNDPDm")>;
-def: InstRW<[SKLWriteResGroup168], (instregex "(V?)ROUNDPSm")>;
-def: InstRW<[SKLWriteResGroup168], (instregex "(V?)ROUNDSDm")>;
-def: InstRW<[SKLWriteResGroup168], (instregex "(V?)ROUNDSSm")>;
-
def SKLWriteResGroup169 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> {
let Latency = 14;
let NumMicroOps = 3;
@@ -1829,14 +1811,6 @@ def: InstRW<[SKLWriteResGroup171], (inst
"DIVR_FST0r",
"DIVR_FrST0")>;
-def SKLWriteResGroup172 : SchedWriteRes<[SKLPort23,SKLPort01]> {
- let Latency = 15;
- let NumMicroOps = 3;
- let ResourceCycles = [1,2];
-}
-def: InstRW<[SKLWriteResGroup172], (instregex "VROUNDPDYm",
- "VROUNDPSYm")>;
-
def SKLWriteResGroup174 : SchedWriteRes<[SKLPort1,SKLPort23,SKLPort237,SKLPort06,SKLPort15,SKLPort0156]> {
let Latency = 15;
let NumMicroOps = 10;
Modified: llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td?rev=331515&r1=331514&r2=331515&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td Fri May 4 05:59:24 2018
@@ -173,6 +173,8 @@ defm : SKXWriteResPair<WriteDPPD, [SKXPo
defm : SKXWriteResPair<WriteDPPS, [SKXPort5,SKXPort015], 13, [1,3], 4, 6>; // Floating point single dot product.
defm : SKXWriteResPair<WriteDPPSY,[SKXPort5,SKXPort015], 13, [1,3], 4, 7>; // Floating point single dot product (YMM).
defm : SKXWriteResPair<WriteFSign, [SKXPort0], 1>; // Floating point fabs/fchs.
+defm : SKXWriteResPair<WriteFRnd, [SKXPort015], 8, [2], 2, 6>; // Floating point rounding.
+defm : SKXWriteResPair<WriteFRndY, [SKXPort015], 8, [2], 2, 7>; // Floating point rounding (YMM/ZMM).
defm : SKXWriteResPair<WriteFLogic, [SKXPort015], 1, [1], 1, 6>; // Floating point and/or/xor logicals.
defm : SKXWriteResPair<WriteFLogicY, [SKXPort015], 1, [1], 1, 7>; // Floating point and/or/xor logicals (YMM/ZMM).
defm : SKXWriteResPair<WriteFShuffle, [SKXPort5], 1, [1], 1, 6>; // Floating point vector shuffles.
@@ -2127,24 +2129,6 @@ def SKXWriteResGroup114 : SchedWriteRes<
}
def: InstRW<[SKXWriteResGroup114], (instrs VSCATTERDPSZmr)>;
-def SKXWriteResGroup116 : SchedWriteRes<[SKXPort015]> {
- let Latency = 8;
- let NumMicroOps = 2;
- let ResourceCycles = [2];
-}
-def: InstRW<[SKXWriteResGroup116], (instregex "VRNDSCALEPDZ128rri",
- "VRNDSCALEPDZ256rri",
- "VRNDSCALEPDZrri",
- "VRNDSCALEPSZ128rri",
- "VRNDSCALEPSZ256rri",
- "VRNDSCALEPSZrri",
- "VRNDSCALESDr",
- "VRNDSCALESSr",
- "(V?)ROUNDPD(Y?)r",
- "(V?)ROUNDPS(Y?)r",
- "(V?)ROUNDSDr",
- "(V?)ROUNDSSr")>;
-
def SKXWriteResGroup117 : SchedWriteRes<[SKXPort0,SKXPort23]> {
let Latency = 8;
let NumMicroOps = 2;
@@ -3007,20 +2991,6 @@ def SKXWriteResGroup184_1 : SchedWriteRe
}
def: InstRW<[SKXWriteResGroup184_1], (instregex "VDIVPD(Y|Z256)rr")>;
-def SKXWriteResGroup186 : SchedWriteRes<[SKXPort23,SKXPort015]> {
- let Latency = 14;
- let NumMicroOps = 3;
- let ResourceCycles = [1,2];
-}
-def: InstRW<[SKXWriteResGroup186], (instregex "VRNDSCALEPDZ128rm(b?)i",
- "VRNDSCALEPSZ128rm(b?)i",
- "VRNDSCALESDm(b?)",
- "VRNDSCALESSm(b?)",
- "(V?)ROUNDPDm",
- "(V?)ROUNDPSm",
- "(V?)ROUNDSDm",
- "(V?)ROUNDSSm")>;
-
def SKXWriteResGroup187 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> {
let Latency = 14;
let NumMicroOps = 3;
@@ -3067,18 +3037,6 @@ def: InstRW<[SKXWriteResGroup191], (inst
"DIVR_FST0r",
"DIVR_FrST0")>;
-def SKXWriteResGroup192 : SchedWriteRes<[SKXPort23,SKXPort015]> {
- let Latency = 15;
- let NumMicroOps = 3;
- let ResourceCycles = [1,2];
-}
-def: InstRW<[SKXWriteResGroup192], (instregex "VRNDSCALEPDZ256rm(b?)i",
- "VRNDSCALEPDZrm(b?)i",
- "VRNDSCALEPSZ256rm(b?)i",
- "VRNDSCALEPSZrm(b?)i",
- "VROUNDPDYm",
- "VROUNDPSYm")>;
-
def SKXWriteResGroup194 : SchedWriteRes<[SKXPort1,SKXPort5,SKXPort01,SKXPort23,SKXPort015]> {
let Latency = 15;
let NumMicroOps = 8;
Modified: llvm/trunk/lib/Target/X86/X86Schedule.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Schedule.td?rev=331515&r1=331514&r2=331515&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86Schedule.td (original)
+++ llvm/trunk/lib/Target/X86/X86Schedule.td Fri May 4 05:59:24 2018
@@ -123,6 +123,8 @@ defm WriteDPPD : X86SchedWritePair; //
defm WriteDPPS : X86SchedWritePair; // Floating point single dot product.
defm WriteDPPSY : X86SchedWritePair; // Floating point single dot product (YMM).
defm WriteFSign : X86SchedWritePair; // Floating point fabs/fchs.
+defm WriteFRnd : X86SchedWritePair; // Floating point rounding.
+defm WriteFRndY : X86SchedWritePair; // Floating point rounding (YMM/ZMM).
defm WriteFLogic : X86SchedWritePair; // Floating point and/or/xor logicals.
defm WriteFLogicY : X86SchedWritePair; // Floating point and/or/xor logicals (YMM/ZMM).
defm WriteFShuffle : X86SchedWritePair; // Floating point vector shuffles.
@@ -258,6 +260,8 @@ def SchedWriteFRcp
: X86SchedWriteWidths<WriteFRcp, WriteFRcp, WriteFRcpY, WriteFRcpY>;
def SchedWriteFRsqrt
: X86SchedWriteWidths<WriteFRsqrt, WriteFRsqrt, WriteFRsqrtY, WriteFRsqrtY>;
+def SchedWriteFRnd
+ : X86SchedWriteWidths<WriteFRnd, WriteFRnd, WriteFRndY, WriteFRndY>;
def SchedWriteFLogic
: X86SchedWriteWidths<WriteFLogic, WriteFLogic, WriteFLogicY, WriteFLogicY>;
Modified: llvm/trunk/lib/Target/X86/X86ScheduleAtom.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleAtom.td?rev=331515&r1=331514&r2=331515&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleAtom.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleAtom.td Fri May 4 05:59:24 2018
@@ -218,6 +218,8 @@ defm : AtomWriteResPair<WriteFDivY,
defm : AtomWriteResPair<WriteFSqrt, [AtomPort01], [AtomPort01], 34, 34, [34], [34]>;
defm : AtomWriteResPair<WriteFSqrtY, [AtomPort01], [AtomPort01], 34, 34, [34], [34]>;
defm : AtomWriteResPair<WriteFSign, [AtomPort1], [AtomPort1]>;
+defm : AtomWriteResPair<WriteFRnd, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
+defm : AtomWriteResPair<WriteFRndY, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : AtomWriteResPair<WriteFLogic, [AtomPort01], [AtomPort0]>;
defm : AtomWriteResPair<WriteFLogicY, [AtomPort01], [AtomPort0]>; // NOTE: Doesn't exist on Atom.
defm : AtomWriteResPair<WriteFShuffle, [AtomPort0], [AtomPort0]>;
Modified: llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td?rev=331515&r1=331514&r2=331515&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td Fri May 4 05:59:24 2018
@@ -337,6 +337,8 @@ defm : JWriteResYMMPair<WriteFDivY,
defm : JWriteResFpuPair<WriteFSqrt, [JFPU1, JFPM], 21, [1, 21]>;
defm : JWriteResYMMPair<WriteFSqrtY, [JFPU1, JFPM], 42, [2, 42], 2>;
defm : JWriteResFpuPair<WriteFSign, [JFPU1, JFPM], 2>;
+defm : JWriteResFpuPair<WriteFRnd, [JFPU1, JSTC], 3>;
+defm : JWriteResYMMPair<WriteFRndY, [JFPU1, JSTC], 3, [2,2], 2>;
defm : JWriteResFpuPair<WriteFLogic, [JFPU01, JFPX], 1>;
defm : JWriteResYMMPair<WriteFLogicY, [JFPU01, JFPX], 1, [2, 2], 2>;
defm : JWriteResFpuPair<WriteFShuffle, [JFPU01, JFPX], 1>;
@@ -563,8 +565,7 @@ def JWriteVCVTY: SchedWriteRes<[JFPU1, J
let NumMicroOps = 2;
}
def : InstRW<[JWriteVCVTY], (instrs VCVTDQ2PDYrr, VCVTDQ2PSYrr,
- VCVTPS2DQYrr, VCVTTPS2DQYrr,
- VROUNDPDYr, VROUNDPSYr)>;
+ VCVTPS2DQYrr, VCVTTPS2DQYrr)>;
def JWriteVCVTYLd: SchedWriteRes<[JLAGU, JFPU1, JSTC]> {
let Latency = 8;
@@ -572,8 +573,7 @@ def JWriteVCVTYLd: SchedWriteRes<[JLAGU,
let NumMicroOps = 2;
}
def : InstRW<[JWriteVCVTYLd, ReadAfterLd], (instrs VCVTDQ2PDYrm, VCVTDQ2PSYrm,
- VCVTPS2DQYrm, VCVTTPS2DQYrm,
- VROUNDPDYm, VROUNDPSYm)>;
+ VCVTPS2DQYrm, VCVTTPS2DQYrm)>;
def JWriteVMOVNTDQSt: SchedWriteRes<[JFPU1, JSTC, JSAGU]> {
let Latency = 2;
Modified: llvm/trunk/lib/Target/X86/X86ScheduleSLM.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleSLM.td?rev=331515&r1=331514&r2=331515&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleSLM.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleSLM.td Fri May 4 05:59:24 2018
@@ -151,6 +151,8 @@ defm : SLMWriteResPair<WriteCvtF2I, [SLM
defm : SLMWriteResPair<WriteCvtI2F, [SLM_FPC_RSV01], 4>;
defm : SLMWriteResPair<WriteCvtF2F, [SLM_FPC_RSV01], 4>;
defm : SLMWriteResPair<WriteFSign, [SLM_FPC_RSV01], 1>;
+defm : SLMWriteResPair<WriteFRnd, [SLM_FPC_RSV1], 3>;
+defm : SLMWriteResPair<WriteFRndY, [SLM_FPC_RSV1], 3>;
defm : SLMWriteResPair<WriteFLogic, [SLM_FPC_RSV01], 1>;
defm : SLMWriteResPair<WriteFLogicY, [SLM_FPC_RSV01], 1>;
defm : SLMWriteResPair<WriteFShuffle, [SLM_FPC_RSV0], 1>;
Modified: llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td?rev=331515&r1=331514&r2=331515&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td Fri May 4 05:59:24 2018
@@ -120,7 +120,8 @@ multiclass ZnWriteResPair<X86FoldableSch
// This multiclass is for folded loads for floating point units.
multiclass ZnWriteResFpuPair<X86FoldableSchedWrite SchedRW,
list<ProcResourceKind> ExePorts,
- int Lat, list<int> Res = [], int UOps = 1> {
+ int Lat, list<int> Res = [], int UOps = 1,
+ int LoadLat = 7, int LoadUOps = 0> {
// Register variant takes 1-cycle on Execution Port.
def : WriteRes<SchedRW, ExePorts> {
let Latency = Lat;
@@ -129,11 +130,11 @@ multiclass ZnWriteResFpuPair<X86Foldable
}
// Memory variant also uses a cycle on ZnAGU
- // adds 7 cycles to the latency.
+ // adds LoadLat cycles to the latency (default = 7).
def : WriteRes<SchedRW.Folded, !listconcat([ZnAGU], ExePorts)> {
- let Latency = !add(Lat, 7);
+ let Latency = !add(Lat, LoadLat);
let ResourceCycles = !if(!empty(Res), [], !listconcat([1], Res));
- let NumMicroOps = UOps;
+ let NumMicroOps = !add(UOps, LoadUOps);
}
}
@@ -208,6 +209,8 @@ defm : ZnWriteResFpuPair<WriteCvtF2I,
defm : ZnWriteResFpuPair<WriteFDiv, [ZnFPU3], 15>;
defm : ZnWriteResFpuPair<WriteFDivY, [ZnFPU3], 15>;
defm : ZnWriteResFpuPair<WriteFSign, [ZnFPU3], 2>;
+defm : ZnWriteResFpuPair<WriteFRnd, [ZnFPU3], 4, [1], 1, 7, 1>; // FIXME: Should folds require 1 extra uops?
+defm : ZnWriteResFpuPair<WriteFRndY, [ZnFPU3], 4, [1], 1, 7, 1>; // FIXME: Should folds require 1 extra uops?
defm : ZnWriteResFpuPair<WriteFLogic, [ZnFPU], 1>;
defm : ZnWriteResFpuPair<WriteFLogicY, [ZnFPU], 1>;
defm : ZnWriteResFpuPair<WriteFShuffle, [ZnFPU12], 1>;
@@ -1524,20 +1527,6 @@ def ZnWriteVRCPPSLd : SchedWriteRes<[ZnA
}
def : InstRW<[ZnWriteVRCPPSLd], (instregex "VRCPPSYm")>;
-// ROUND SS/SD PS/PD.
-// v,v,i.
-def ZnWriteROUNDr : SchedWriteRes<[ZnFPU3]> {
- let Latency = 4;
-}
-def : InstRW<[ZnWriteROUNDr], (instregex "(V?)ROUND(S|P)(S|D)(Y?)r")>;
-
-// v,m,i.
-def ZnWriteROUNDm : SchedWriteRes<[ZnAGU, ZnFPU3]> {
- let Latency = 11;
- let NumMicroOps = 2;
-}
-def : InstRW<[ZnWriteROUNDm], (instregex "(V?)ROUND(S|P)(S|D)(Y?)m")>;
-
// DPPS.
// x,x,i / v,v,v,i.
def : SchedAlias<WriteDPPS, ZnWriteMicrocoded>;
Modified: llvm/trunk/test/tools/llvm-mca/X86/BtVer2/resources-avx1.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BtVer2/resources-avx1.s?rev=331515&r1=331514&r2=331515&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BtVer2/resources-avx1.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/BtVer2/resources-avx1.s Fri May 4 05:59:24 2018
@@ -1720,7 +1720,7 @@ vzeroupper
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
-# CHECK-NEXT: 48.00 2.00 - 355.50 907.50 402.00 398.00 381.00 - 43.00 114.00 117.50 117.50 38.00
+# CHECK-NEXT: 48.00 2.00 - 347.50 907.50 394.00 406.00 381.00 - 43.00 122.00 117.50 117.50 38.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
@@ -2318,18 +2318,18 @@ vzeroupper
# CHECK-NEXT: - - - - 2.00 - 2.00 2.00 - - - - - - vrcpps (%rax), %ymm2
# CHECK-NEXT: - - - - 1.00 - 1.00 - - - - - - - vrcpss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - - - 1.00 - 1.00 1.00 - - - - - - vrcpss (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - vroundpd $1, %xmm0, %xmm2
-# CHECK-NEXT: - - - 1.00 - 1.00 - 1.00 - - - - - - vroundpd $1, (%rax), %xmm2
+# CHECK-NEXT: - - - - - - 1.00 - - - 1.00 - - - vroundpd $1, %xmm0, %xmm2
+# CHECK-NEXT: - - - - - - 1.00 1.00 - - 1.00 - - - vroundpd $1, (%rax), %xmm2
# CHECK-NEXT: - - - - - - 2.00 - - - 2.00 - - - vroundpd $1, %ymm0, %ymm2
# CHECK-NEXT: - - - - - - 2.00 2.00 - - 2.00 - - - vroundpd $1, (%rax), %ymm2
-# CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - vroundps $1, %xmm0, %xmm2
-# CHECK-NEXT: - - - 1.00 - 1.00 - 1.00 - - - - - - vroundps $1, (%rax), %xmm2
+# CHECK-NEXT: - - - - - - 1.00 - - - 1.00 - - - vroundps $1, %xmm0, %xmm2
+# CHECK-NEXT: - - - - - - 1.00 1.00 - - 1.00 - - - vroundps $1, (%rax), %xmm2
# CHECK-NEXT: - - - - - - 2.00 - - - 2.00 - - - vroundps $1, %ymm0, %ymm2
# CHECK-NEXT: - - - - - - 2.00 2.00 - - 2.00 - - - vroundps $1, (%rax), %ymm2
-# CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - vroundsd $1, %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - - 1.00 - 1.00 - 1.00 - - - - - - vroundsd $1, (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - vroundss $1, %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - - 1.00 - 1.00 - 1.00 - - - - - - vroundss $1, (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - - - - 1.00 - - - 1.00 - - - vroundsd $1, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - - - - 1.00 1.00 - - 1.00 - - - vroundsd $1, (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - - - - 1.00 - - - 1.00 - - - vroundss $1, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - - - - 1.00 1.00 - - 1.00 - - - vroundss $1, (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - - - 1.00 - 1.00 - - - - - - - vrsqrtps %xmm0, %xmm2
# CHECK-NEXT: - - - - 1.00 - 1.00 1.00 - - - - - - vrsqrtps (%rax), %xmm2
# CHECK-NEXT: - - - - 2.00 - 2.00 - - - - - - - vrsqrtps %ymm0, %ymm2
Modified: llvm/trunk/test/tools/llvm-mca/X86/BtVer2/resources-sse41.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BtVer2/resources-sse41.s?rev=331515&r1=331514&r2=331515&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BtVer2/resources-sse41.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/BtVer2/resources-sse41.s Fri May 4 05:59:24 2018
@@ -270,7 +270,7 @@ roundss $1, (%rax), %xmm2
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
-# CHECK-NEXT: 6.00 - - 37.00 23.00 57.50 42.50 44.00 - 5.00 5.00 32.50 32.50 10.00
+# CHECK-NEXT: 6.00 - - 29.00 23.00 49.50 50.50 44.00 - 5.00 13.00 32.50 32.50 10.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
@@ -362,12 +362,12 @@ roundss $1, (%rax), %xmm2
# CHECK-NEXT: - - - - - 2.50 0.50 1.00 - - - 0.50 0.50 2.00 pmulld (%rax), %xmm2
# CHECK-NEXT: 1.00 - - 1.00 - 1.00 - - - - - - - - ptest %xmm0, %xmm1
# CHECK-NEXT: 1.00 - - 1.00 - 1.00 - 1.00 - - - - - - ptest (%rax), %xmm1
-# CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - roundpd $1, %xmm0, %xmm2
-# CHECK-NEXT: - - - 1.00 - 1.00 - 1.00 - - - - - - roundpd $1, (%rax), %xmm2
-# CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - roundps $1, %xmm0, %xmm2
-# CHECK-NEXT: - - - 1.00 - 1.00 - 1.00 - - - - - - roundps $1, (%rax), %xmm2
-# CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - roundsd $1, %xmm0, %xmm2
-# CHECK-NEXT: - - - 1.00 - 1.00 - 1.00 - - - - - - roundsd $1, (%rax), %xmm2
-# CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - roundss $1, %xmm0, %xmm2
-# CHECK-NEXT: - - - 1.00 - 1.00 - 1.00 - - - - - - roundss $1, (%rax), %xmm2
+# CHECK-NEXT: - - - - - - 1.00 - - - 1.00 - - - roundpd $1, %xmm0, %xmm2
+# CHECK-NEXT: - - - - - - 1.00 1.00 - - 1.00 - - - roundpd $1, (%rax), %xmm2
+# CHECK-NEXT: - - - - - - 1.00 - - - 1.00 - - - roundps $1, %xmm0, %xmm2
+# CHECK-NEXT: - - - - - - 1.00 1.00 - - 1.00 - - - roundps $1, (%rax), %xmm2
+# CHECK-NEXT: - - - - - - 1.00 - - - 1.00 - - - roundsd $1, %xmm0, %xmm2
+# CHECK-NEXT: - - - - - - 1.00 1.00 - - 1.00 - - - roundsd $1, (%rax), %xmm2
+# CHECK-NEXT: - - - - - - 1.00 - - - 1.00 - - - roundss $1, %xmm0, %xmm2
+# CHECK-NEXT: - - - - - - 1.00 1.00 - - 1.00 - - - roundss $1, (%rax), %xmm2
More information about the llvm-commits
mailing list