[llvm] r332357 - [X86] Split off F16C WriteCvtPH2PS/WriteCvtPS2PH scheduler classes
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Tue May 15 07:12:32 PDT 2018
Author: rksimon
Date: Tue May 15 07:12:32 2018
New Revision: 332357
URL: http://llvm.org/viewvc/llvm-project?rev=332357&view=rev
Log:
[X86] Split off F16C WriteCvtPH2PS/WriteCvtPS2PH scheduler classes
Btver2 - VCVTPH2PSYrm needs to double pump the AGU
Broadwell - missing VCVTPS2PH*mr stores extra latency
Allows us to remove the WriteCvtF2FSt conversion store class
Modified:
llvm/trunk/lib/Target/X86/X86InstrAVX512.td
llvm/trunk/lib/Target/X86/X86InstrSSE.td
llvm/trunk/lib/Target/X86/X86SchedBroadwell.td
llvm/trunk/lib/Target/X86/X86SchedHaswell.td
llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td
llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td
llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td
llvm/trunk/lib/Target/X86/X86Schedule.td
llvm/trunk/lib/Target/X86/X86ScheduleAtom.td
llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td
llvm/trunk/lib/Target/X86/X86ScheduleSLM.td
llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td
llvm/trunk/test/CodeGen/X86/f16c-schedule.ll
llvm/trunk/test/tools/llvm-mca/X86/Broadwell/resources-f16c.s
llvm/trunk/test/tools/llvm-mca/X86/BtVer2/resources-f16c.s
Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=332357&r1=332356&r2=332357&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Tue May 15 07:12:32 2018
@@ -7863,16 +7863,16 @@ multiclass avx512_cvtph2ps_sae<X86Vector
let Predicates = [HasAVX512] in
defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem, loadv4i64,
- WriteCvtF2F>,
+ WriteCvtPH2PSY>,
avx512_cvtph2ps_sae<v16f32_info, v16i16x_info, WriteCvtF2F>,
EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
let Predicates = [HasVLX] in {
defm VCVTPH2PSZ256 : avx512_cvtph2ps<v8f32x_info, v8i16x_info, f128mem,
- loadv2i64, WriteCvtF2F>, EVEX, EVEX_V256,
+ loadv2i64, WriteCvtPH2PSY>, EVEX, EVEX_V256,
EVEX_CD8<32, CD8VH>;
defm VCVTPH2PSZ128 : avx512_cvtph2ps<v4f32x_info, v8i16x_info, f64mem,
- loadv2i64, WriteCvtF2F>, EVEX, EVEX_V128,
+ loadv2i64, WriteCvtPH2PS>, EVEX, EVEX_V128,
EVEX_CD8<32, CD8VH>;
// Pattern match vcvtph2ps of a scalar i64 load.
@@ -7886,42 +7886,46 @@ let Predicates = [HasVLX] in {
}
multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src,
- X86MemOperand x86memop> {
+ X86MemOperand x86memop, SchedWrite RR, SchedWrite MR> {
defm rr : AVX512_maskable<0x1D, MRMDestReg, _dest ,(outs _dest.RC:$dst),
(ins _src.RC:$src1, i32u8imm:$src2),
"vcvtps2ph", "$src2, $src1", "$src1, $src2",
(X86cvtps2ph (_src.VT _src.RC:$src1),
(i32 imm:$src2)), 0, 0>,
- AVX512AIi8Base, Sched<[WriteCvtF2F]>;
+ AVX512AIi8Base, Sched<[RR]>;
let hasSideEffects = 0, mayStore = 1 in {
def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
(ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2),
"vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
- Sched<[WriteCvtF2FSt]>;
+ Sched<[MR]>;
def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs),
(ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
"vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", []>,
- EVEX_K, Sched<[WriteCvtF2FSt]>;
+ EVEX_K, Sched<[MR]>;
}
}
-multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src> {
+multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
+ SchedWrite Sched> {
let hasSideEffects = 0 in
defm rrb : AVX512_maskable_in_asm<0x1D, MRMDestReg, _dest,
(outs _dest.RC:$dst),
(ins _src.RC:$src1, i32u8imm:$src2),
"vcvtps2ph", "$src2, {sae}, $src1", "$src1, {sae}, $src2", []>,
- EVEX_B, AVX512AIi8Base, Sched<[WriteCvtF2F]>;
+ EVEX_B, AVX512AIi8Base, Sched<[Sched]>;
}
let Predicates = [HasAVX512] in {
- defm VCVTPS2PHZ : avx512_cvtps2ph<v16i16x_info, v16f32_info, f256mem>,
- avx512_cvtps2ph_sae<v16i16x_info, v16f32_info>,
+ defm VCVTPS2PHZ : avx512_cvtps2ph<v16i16x_info, v16f32_info, f256mem,
+ WriteCvtPS2PHY, WriteCvtPS2PHYSt>,
+ avx512_cvtps2ph_sae<v16i16x_info, v16f32_info, WriteCvtPS2PH>,
EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
let Predicates = [HasVLX] in {
- defm VCVTPS2PHZ256 : avx512_cvtps2ph<v8i16x_info, v8f32x_info, f128mem>,
+ defm VCVTPS2PHZ256 : avx512_cvtps2ph<v8i16x_info, v8f32x_info, f128mem,
+ WriteCvtPS2PHY, WriteCvtPS2PHYSt>,
EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>;
- defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f64mem>,
+ defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f64mem,
+ WriteCvtPS2PH, WriteCvtPS2PHSt>,
EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>;
}
Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=332357&r1=332356&r2=332357&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Tue May 15 07:12:32 2018
@@ -7303,37 +7303,41 @@ let Defs = [YMM0, YMM1, YMM2, YMM3, YMM4
// Half precision conversion instructions
//
-multiclass f16c_ph2ps<RegisterClass RC, X86MemOperand x86memop> {
+multiclass f16c_ph2ps<RegisterClass RC, X86MemOperand x86memop,
+ X86FoldableSchedWrite sched> {
def rr : I<0x13, MRMSrcReg, (outs RC:$dst), (ins VR128:$src),
"vcvtph2ps\t{$src, $dst|$dst, $src}",
[(set RC:$dst, (X86cvtph2ps VR128:$src))]>,
- T8PD, VEX, Sched<[WriteCvtF2F]>;
+ T8PD, VEX, Sched<[sched]>;
let hasSideEffects = 0, mayLoad = 1 in
def rm : I<0x13, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
"vcvtph2ps\t{$src, $dst|$dst, $src}",
[(set RC:$dst, (X86cvtph2ps (bc_v8i16
(loadv2i64 addr:$src))))]>,
- T8PD, VEX, Sched<[WriteCvtF2FLd]>;
+ T8PD, VEX, Sched<[sched.Folded]>;
}
-multiclass f16c_ps2ph<RegisterClass RC, X86MemOperand x86memop> {
+multiclass f16c_ps2ph<RegisterClass RC, X86MemOperand x86memop,
+ SchedWrite RR, SchedWrite MR> {
def rr : Ii8<0x1D, MRMDestReg, (outs VR128:$dst),
(ins RC:$src1, i32u8imm:$src2),
"vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst, (X86cvtps2ph RC:$src1, imm:$src2))]>,
- TAPD, VEX, Sched<[WriteCvtF2F]>;
+ TAPD, VEX, Sched<[RR]>;
let hasSideEffects = 0, mayStore = 1 in
def mr : Ii8<0x1D, MRMDestMem, (outs),
(ins x86memop:$dst, RC:$src1, i32u8imm:$src2),
"vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
- TAPD, VEX, Sched<[WriteCvtF2FSt]>;
+ TAPD, VEX, Sched<[MR]>;
}
let Predicates = [HasF16C, NoVLX] in {
- defm VCVTPH2PS : f16c_ph2ps<VR128, f64mem>;
- defm VCVTPH2PSY : f16c_ph2ps<VR256, f128mem>, VEX_L;
- defm VCVTPS2PH : f16c_ps2ph<VR128, f64mem>;
- defm VCVTPS2PHY : f16c_ps2ph<VR256, f128mem>, VEX_L;
+ defm VCVTPH2PS : f16c_ph2ps<VR128, f64mem, WriteCvtPH2PS>;
+ defm VCVTPH2PSY : f16c_ph2ps<VR256, f128mem, WriteCvtPH2PSY>, VEX_L;
+ defm VCVTPS2PH : f16c_ps2ph<VR128, f64mem, WriteCvtPS2PH,
+ WriteCvtPS2PHSt>;
+ defm VCVTPS2PHY : f16c_ps2ph<VR256, f128mem, WriteCvtPS2PHY,
+ WriteCvtPS2PHYSt>, VEX_L;
// Pattern match vcvtph2ps of a scalar i64 load.
def : Pat<(v4f32 (X86cvtph2ps (v8i16 (vzmovl_v2i64 addr:$src)))),
Modified: llvm/trunk/lib/Target/X86/X86SchedBroadwell.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedBroadwell.td?rev=332357&r1=332356&r2=332357&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedBroadwell.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedBroadwell.td Tue May 15 07:12:32 2018
@@ -257,12 +257,6 @@ defm : BWWriteResPair<WriteFBlendY, [BWP
defm : BWWriteResPair<WriteFVarBlend, [BWPort5], 2, [2], 2, 5>; // Fp vector variable blends.
defm : BWWriteResPair<WriteFVarBlendY, [BWPort5], 2, [2], 2, 6>; // Fp vector variable blends.
-def : WriteRes<WriteCvtF2FSt, [BWPort1,BWPort4,BWPort237]> {
- let Latency = 4;
- let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
-}
-
// FMA Scheduling helper class.
// class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; }
@@ -353,6 +347,16 @@ defm : BWWriteResPair<WriteCvtF2I, [BWPo
defm : BWWriteResPair<WriteCvtI2F, [BWPort1], 4>; // Integer -> Float.
defm : BWWriteResPair<WriteCvtF2F, [BWPort1], 3>; // Float -> Float size conversion.
+defm : X86WriteRes<WriteCvtPH2PS, [BWPort0,BWPort5], 2, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPH2PSY, [BWPort0,BWPort5], 2, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPH2PSLd, [BWPort0,BWPort23], 6, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPH2PSYLd, [BWPort0,BWPort23], 6, [1,1], 2>;
+
+defm : X86WriteRes<WriteCvtPS2PH, [BWPort1,BWPort5], 4, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPS2PHY, [BWPort1,BWPort5], 6, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPS2PHSt, [BWPort1,BWPort4,BWPort237], 5, [1,1,1], 3>;
+defm : X86WriteRes<WriteCvtPS2PHYSt, [BWPort1,BWPort4,BWPort237], 7, [1,1,1], 3>;
+
// Strings instructions.
// Packed Compare Implicit Length Strings, Return Mask
@@ -625,8 +629,7 @@ def BWWriteResGroup15 : SchedWriteRes<[B
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[BWWriteResGroup15], (instregex "VCVTPH2PS(Y?)rr",
- "(V?)CVTPS2PDrr",
+def: InstRW<[BWWriteResGroup15], (instregex "(V?)CVTPS2PDrr",
"(V?)CVTSS2SDrr")>;
def BWWriteResGroup16 : SchedWriteRes<[BWPort6,BWPort0156]> {
@@ -825,7 +828,6 @@ def: InstRW<[BWWriteResGroup42], (instre
"MMX_CVT(T?)PS2PIirr",
"(V?)CVTDQ2PDrr",
"(V?)CVTPD2PSrr",
- "VCVTPS2PHrr",
"(V?)CVTSD2SSrr",
"(V?)CVTSI642SDrr",
"(V?)CVTSI2SDrr",
@@ -963,8 +965,7 @@ def BWWriteResGroup59 : SchedWriteRes<[B
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[BWWriteResGroup59], (instregex "VCVTPH2PS(Y?)rm",
- "(V?)CVTPS2PDrm",
+def: InstRW<[BWWriteResGroup59], (instregex "(V?)CVTPS2PDrm",
"(V?)CVTSS2SDrm",
"VPSLLVQrm",
"VPSRLVQrm")>;
@@ -976,7 +977,6 @@ def BWWriteResGroup60 : SchedWriteRes<[B
}
def: InstRW<[BWWriteResGroup60], (instregex "VCVTDQ2PDYrr",
"VCVTPD2PSYrr",
- "VCVTPS2PHYrr",
"VCVT(T?)PD2DQYrr")>;
def BWWriteResGroup62 : SchedWriteRes<[BWPort6,BWPort23]> {
Modified: llvm/trunk/lib/Target/X86/X86SchedHaswell.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedHaswell.td?rev=332357&r1=332356&r2=332357&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedHaswell.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedHaswell.td Tue May 15 07:12:32 2018
@@ -251,11 +251,15 @@ defm : HWWriteResPair<WriteFVarShuffle25
defm : HWWriteResPair<WriteFVarBlend, [HWPort5], 2, [2], 2, 6>;
defm : HWWriteResPair<WriteFVarBlendY, [HWPort5], 2, [2], 2, 7>;
-def : WriteRes<WriteCvtF2FSt, [HWPort1,HWPort4,HWPort5,HWPort237]> {
- let Latency = 5;
- let NumMicroOps = 4;
- let ResourceCycles = [1,1,1,1];
-}
+defm : X86WriteRes<WriteCvtPH2PS, [HWPort0,HWPort5], 2, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPH2PSY, [HWPort0,HWPort5], 2, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPH2PSLd, [HWPort0,HWPort23], 6, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPH2PSYLd, [HWPort0,HWPort23], 7, [1,1], 2>;
+
+defm : X86WriteRes<WriteCvtPS2PH, [HWPort1,HWPort5], 4, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPS2PHY, [HWPort1,HWPort5], 6, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPS2PHSt, [HWPort1,HWPort4,HWPort5,HWPort237], 5, [1,1,1,1], 4>;
+defm : X86WriteRes<WriteCvtPS2PHYSt, [HWPort1,HWPort4,HWPort5,HWPort237], 7, [1,1,1,1], 4>;
// Vector integer operations.
defm : X86WriteRes<WriteVecLoad, [HWPort23], 5, [1], 1>;
@@ -868,16 +872,14 @@ def HWWriteResGroup11 : SchedWriteRes<[H
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[HWWriteResGroup11], (instregex "VCVTPH2PSrm",
- "(V?)CVTPS2PDrm")>;
+def: InstRW<[HWWriteResGroup11], (instregex "(V?)CVTPS2PDrm")>;
def HWWriteResGroup11_1 : SchedWriteRes<[HWPort0,HWPort23]> {
let Latency = 7;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[HWWriteResGroup11_1], (instregex "VCVTPH2PSYrm",
- "(V?)CVTSS2SDrm",
+def: InstRW<[HWWriteResGroup11_1], (instregex "(V?)CVTSS2SDrm",
"VPSLLVQrm",
"VPSRLVQrm")>;
@@ -1076,9 +1078,7 @@ def HWWriteResGroup31 : SchedWriteRes<[H
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[HWWriteResGroup31], (instregex "VCVTPH2PSYrr",
- "VCVTPH2PSrr",
- "(V?)CVTPS2PDrr",
+def: InstRW<[HWWriteResGroup31], (instregex "(V?)CVTPS2PDrr",
"(V?)CVTSS2SDrr")>;
def HWWriteResGroup32 : SchedWriteRes<[HWPort6,HWPort0156]> {
@@ -1397,7 +1397,6 @@ def: InstRW<[HWWriteResGroup73], (instre
"MMX_CVT(T?)PS2PIirr",
"(V?)CVTDQ2PDrr",
"(V?)CVTPD2PSrr",
- "VCVTPS2PHrr",
"(V?)CVTSD2SSrr",
"(V?)CVTSI(64)?2SDrr",
"(V?)CVTSI2SSrr",
@@ -1604,7 +1603,6 @@ def HWWriteResGroup102 : SchedWriteRes<[
}
def: InstRW<[HWWriteResGroup102], (instregex "VCVTDQ2PDYrr",
"VCVTPD2PSYrr",
- "VCVTPS2PHYrr",
"VCVT(T?)PD2DQYrr")>;
def HWWriteResGroup103 : SchedWriteRes<[HWPort1,HWPort23]> {
@@ -1629,13 +1627,6 @@ def HWWriteResGroup105 : SchedWriteRes<[
def: InstRW<[HWWriteResGroup105], (instregex "SHLD(16|32|64)rrCL",
"SHRD(16|32|64)rrCL")>;
-def HWWriteResGroup106 : SchedWriteRes<[HWPort1,HWPort4,HWPort5,HWPort237]> {
- let Latency = 7;
- let NumMicroOps = 4;
- let ResourceCycles = [1,1,1,1];
-}
-def: InstRW<[HWWriteResGroup106], (instregex "VCVTPS2PHYmr")>;
-
def HWWriteResGroup107 : SchedWriteRes<[HWPort1,HWPort6,HWPort06,HWPort0156]> {
let Latency = 6;
let NumMicroOps = 4;
Modified: llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td?rev=332357&r1=332356&r2=332357&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td Tue May 15 07:12:32 2018
@@ -235,7 +235,14 @@ defm : SBWriteResPair<WriteFBlend, [S
defm : SBWriteResPair<WriteFBlendY, [SBPort05], 1, [1], 1, 7>;
defm : SBWriteResPair<WriteFVarBlend, [SBPort05], 2, [2], 2, 6>;
defm : SBWriteResPair<WriteFVarBlendY,[SBPort05], 2, [2], 2, 7>;
-def : WriteRes<WriteCvtF2FSt, [SBPort1, SBPort23, SBPort4]> { let Latency = 4; }
+
+defm : SBWriteResPair<WriteCvtPH2PS, [SBPort1], 3>;
+defm : SBWriteResPair<WriteCvtPH2PSY, [SBPort1], 3>;
+
+defm : X86WriteRes<WriteCvtPS2PH, [SBPort1], 3, [1], 1>;
+defm : X86WriteRes<WriteCvtPS2PHY, [SBPort1], 3, [1], 1>;
+defm : X86WriteRes<WriteCvtPS2PHSt, [SBPort1, SBPort23, SBPort4], 4, [1,1,1], 1>;
+defm : X86WriteRes<WriteCvtPS2PHYSt, [SBPort1, SBPort23, SBPort4], 4, [1,1,1], 1>;
// Vector integer operations.
defm : X86WriteRes<WriteVecLoad, [SBPort23], 5, [1], 1>;
Modified: llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td?rev=332357&r1=332356&r2=332357&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td Tue May 15 07:12:32 2018
@@ -249,12 +249,6 @@ defm : SKLWriteResPair<WriteFBlendY, [SK
defm : SKLWriteResPair<WriteFVarBlend, [SKLPort015], 2, [2], 2, 6>; // Fp vector variable blends.
defm : SKLWriteResPair<WriteFVarBlendY,[SKLPort015], 2, [2], 2, 7>; // Fp vector variable blends.
-def : WriteRes<WriteCvtF2FSt, [SKLPort4,SKLPort5,SKLPort237,SKLPort01]> {
- let Latency = 6;
- let NumMicroOps = 4;
- let ResourceCycles = [1,1,1,1];
-}
-
// FMA Scheduling helper class.
// class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; }
@@ -346,6 +340,16 @@ defm : SKLWriteResPair<WriteCvtF2I, [SKL
defm : SKLWriteResPair<WriteCvtI2F, [SKLPort1], 4>; // Integer -> Float.
defm : SKLWriteResPair<WriteCvtF2F, [SKLPort1], 3>; // Float -> Float size conversion.
+defm : X86WriteRes<WriteCvtPH2PS, [SKLPort5,SKLPort015], 5, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPH2PSY, [SKLPort5,SKLPort01], 7, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPH2PSLd, [SKLPort23,SKLPort01], 9, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPH2PSYLd, [SKLPort23,SKLPort01], 10, [1,1], 2>;
+
+defm : X86WriteRes<WriteCvtPS2PH, [SKLPort5,SKLPort015], 5, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPS2PHY, [SKLPort5,SKLPort01], 7, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPS2PHSt, [SKLPort4,SKLPort5,SKLPort237,SKLPort01], 6, [1,1,1,1], 4>;
+defm : X86WriteRes<WriteCvtPS2PHYSt, [SKLPort4,SKLPort5,SKLPort237,SKLPort01], 8, [1,1,1,1], 4>;
+
// Strings instructions.
// Packed Compare Implicit Length Strings, Return Mask
@@ -930,9 +934,7 @@ def: InstRW<[SKLWriteResGroup60], (instr
"MMX_CVT(T?)PS2PIirr",
"(V?)CVT(T?)PD2DQrr",
"(V?)CVTPD2PSrr",
- "VCVTPH2PSrr",
"(V?)CVTPS2PDrr",
- "VCVTPS2PHrr",
"(V?)CVTSD2SSrr",
"(V?)CVTSI642SDrr",
"(V?)CVTSI2SDrr",
@@ -1157,9 +1159,7 @@ def SKLWriteResGroup89 : SchedWriteRes<[
let ResourceCycles = [1,1];
}
def: InstRW<[SKLWriteResGroup89], (instregex "VCVTPD2PSYrr",
- "VCVTPH2PSYrr",
"VCVTPS2PDYrr",
- "VCVTPS2PHYrr",
"VCVT(T?)PD2DQYrr")>;
def SKLWriteResGroup91 : SchedWriteRes<[SKLPort23,SKLPort015]> {
@@ -1300,13 +1300,6 @@ def SKLWriteResGroup112 : SchedWriteRes<
}
def: InstRW<[SKLWriteResGroup112], (instregex "MMX_PH(ADD|SUB)SWrm")>;
-def SKLWriteResGroup114 : SchedWriteRes<[SKLPort4,SKLPort5,SKLPort237,SKLPort01]> {
- let Latency = 8;
- let NumMicroOps = 4;
- let ResourceCycles = [1,1,1,1];
-}
-def: InstRW<[SKLWriteResGroup114], (instregex "VCVTPS2PHYmr")>;
-
def SKLWriteResGroup115 : SchedWriteRes<[SKLPort23,SKLPort237,SKLPort06]> {
let Latency = 8;
let NumMicroOps = 5;
@@ -1369,7 +1362,6 @@ def SKLWriteResGroup123 : SchedWriteRes<
let ResourceCycles = [1,1];
}
def: InstRW<[SKLWriteResGroup123], (instregex "MMX_CVT(T?)PS2PIirm",
- "VCVTPH2PSrm",
"(V?)CVTPS2PDrm")>;
def SKLWriteResGroup127 : SchedWriteRes<[SKLPort1,SKLPort5,SKLPort23]> {
@@ -1418,7 +1410,6 @@ def SKLWriteResGroup134 : SchedWriteRes<
let ResourceCycles = [1,1];
}
def: InstRW<[SKLWriteResGroup134], (instregex "(V?)CVTDQ2PSrm",
- "(V?)CVTPH2PSYrm",
"(V?)CVTPS2DQrm",
"(V?)CVTSS2SDrm",
"(V?)CVTTPS2DQrm")>;
Modified: llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td?rev=332357&r1=332356&r2=332357&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td Tue May 15 07:12:32 2018
@@ -249,12 +249,6 @@ defm : SKXWriteResPair<WriteFBlendY,[SKX
defm : SKXWriteResPair<WriteFVarBlend, [SKXPort015], 2, [2], 2, 6>; // Fp vector variable blends.
defm : SKXWriteResPair<WriteFVarBlendY,[SKXPort015], 2, [2], 2, 7>; // Fp vector variable blends.
-def : WriteRes<WriteCvtF2FSt, [SKXPort4,SKXPort5,SKXPort237,SKXPort015]> {
- let Latency = 6;
- let NumMicroOps = 4;
- let ResourceCycles = [1,1,1,1];
-}
-
// FMA Scheduling helper class.
// class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; }
@@ -346,6 +340,16 @@ defm : SKXWriteResPair<WriteCvtF2I, [SKX
defm : SKXWriteResPair<WriteCvtI2F, [SKXPort1], 4>; // Integer -> Float.
defm : SKXWriteResPair<WriteCvtF2F, [SKXPort1], 3>; // Float -> Float size conversion.
+defm : X86WriteRes<WriteCvtPH2PS, [SKXPort5,SKXPort015], 5, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPH2PSY, [SKXPort5,SKXPort015], 7, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPH2PSLd, [SKXPort23,SKXPort015], 9, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPH2PSYLd, [SKXPort23,SKXPort015], 10, [1,1], 2>;
+
+defm : X86WriteRes<WriteCvtPS2PH, [SKXPort5,SKXPort015], 5, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPS2PHY, [SKXPort5,SKXPort015], 7, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPS2PHSt, [SKXPort4,SKXPort5,SKXPort237,SKXPort015], 6, [1,1,1,1], 4>;
+defm : X86WriteRes<WriteCvtPS2PHYSt, [SKXPort4,SKXPort5,SKXPort237,SKXPort015], 8, [1,1,1,1], 4>;
+
// Strings instructions.
// Packed Compare Implicit Length Strings, Return Mask
@@ -1050,12 +1054,8 @@ def: InstRW<[SKXWriteResGroup61], (instr
"VCVTPD2PSZ128rr",
"(V?)CVTPD2PSrr",
"VCVTPD2UDQZ128rr",
- "VCVTPH2PSZ128rr",
- "VCVTPH2PSrr",
"VCVTPS2PDZ128rr",
"(V?)CVTPS2PDrr",
- "VCVTPS2PHZ128rr",
- "VCVTPS2PHrr",
"VCVTPS2QQZ128rr",
"VCVTPS2UQQZ128rr",
"VCVTQQ2PSZ128rr",
@@ -1370,9 +1370,7 @@ def: InstRW<[SKXWriteResGroup93], (instr
"VCVTPD2DQ(Y|Z|Z256)rr",
"VCVTPD2PS(Y|Z|Z256)rr",
"VCVTPD2UDQ(Z|Z256)rr",
- "VCVTPH2PS(Y|Z|Z256)rr",
"VCVTPS2PD(Y|Z|Z256)rr",
- "VCVTPS2PH(Y|Z|Z256)rr",
"VCVTPS2QQ(Z|Z256)rr",
"VCVTPS2UQQ(Z|Z256)rr",
"VCVTQQ2PS(Z|Z256)rr",
@@ -1668,13 +1666,6 @@ def SKXWriteResGroup123 : SchedWriteRes<
}
def: InstRW<[SKXWriteResGroup123], (instregex "MMX_PH(ADD|SUB)SWrm")>;
-def SKXWriteResGroup125 : SchedWriteRes<[SKXPort4,SKXPort5,SKXPort237,SKXPort015]> {
- let Latency = 8;
- let NumMicroOps = 4;
- let ResourceCycles = [1,1,1,1];
-}
-def: InstRW<[SKXWriteResGroup125], (instregex "VCVTPS2PHYmr")>;
-
def SKXWriteResGroup126 : SchedWriteRes<[SKXPort23,SKXPort237,SKXPort06]> {
let Latency = 8;
let NumMicroOps = 5;
@@ -1816,7 +1807,6 @@ def SKXWriteResGroup137 : SchedWriteRes<
let ResourceCycles = [1,1];
}
def: InstRW<[SKXWriteResGroup137], (instregex "MMX_CVT(T?)PS2PIirm",
- "VCVTPH2PSrm",
"(V?)CVTPS2PDrm")>;
def SKXWriteResGroup138 : SchedWriteRes<[SKXPort0,SKXPort015]> {
@@ -1905,7 +1895,6 @@ def: InstRW<[SKXWriteResGroup149], (inst
"(V?)CVTDQ2PSrm",
"VCVTPD2QQZ128rm(b?)",
"VCVTPD2UQQZ128rm(b?)",
- "VCVTPH2PSYrm",
"VCVTPH2PSZ128rm(b?)",
"VCVTPS2DQZ128rm(b?)",
"(V?)CVTPS2DQrm",
Modified: llvm/trunk/lib/Target/X86/X86Schedule.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Schedule.td?rev=332357&r1=332356&r2=332357&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86Schedule.td (original)
+++ llvm/trunk/lib/Target/X86/X86Schedule.td Tue May 15 07:12:32 2018
@@ -299,7 +299,14 @@ def WriteMMXMOVMSK : SchedWrite;
defm WriteCvtF2I : X86SchedWritePair; // Float -> Integer.
defm WriteCvtI2F : X86SchedWritePair; // Integer -> Float.
defm WriteCvtF2F : X86SchedWritePair; // Float -> Float size conversion.
-def WriteCvtF2FSt : SchedWrite; // // Float -> Float + store size conversion.
+
+defm WriteCvtPH2PS : X86SchedWritePair; // Half -> Float size conversion.
+defm WriteCvtPH2PSY : X86SchedWritePair; // Half -> Float size conversion (YMM/ZMM).
+
+def WriteCvtPS2PH : SchedWrite; // // Float -> Half size conversion.
+def WriteCvtPS2PHY : SchedWrite; // // Float -> Half size conversion (YMM/ZMM).
+def WriteCvtPS2PHSt : SchedWrite; // // Float -> Half + store size conversion.
+def WriteCvtPS2PHYSt : SchedWrite; // // Float -> Half + store size conversion (YMM/ZMM).
// CRC32 instruction.
defm WriteCRC32 : X86SchedWritePair;
Modified: llvm/trunk/lib/Target/X86/X86ScheduleAtom.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleAtom.td?rev=332357&r1=332356&r2=332357&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleAtom.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleAtom.td Tue May 15 07:12:32 2018
@@ -276,7 +276,13 @@ defm : AtomWriteResPair<WriteFVarShuffle
defm : AtomWriteResPair<WriteCvtF2I, [AtomPort01], [AtomPort01], 8, 9, [8], [9]>; // Float -> Integer.
defm : AtomWriteResPair<WriteCvtI2F, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>; // Integer -> Float.
defm : AtomWriteResPair<WriteCvtF2F, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>; // Float -> Float size conversion.
-def : WriteRes<WriteCvtF2FSt, [AtomPort0]>; // NOTE: Doesn't exist on Atom.
+
+defm : AtomWriteResPair<WriteCvtPH2PS, [AtomPort0], [AtomPort0]>; // NOTE: Doesn't exist on Atom.
+defm : AtomWriteResPair<WriteCvtPH2PSY, [AtomPort0], [AtomPort0]>; // NOTE: Doesn't exist on Atom.
+def : WriteRes<WriteCvtPS2PH, [AtomPort0]>; // NOTE: Doesn't exist on Atom.
+def : WriteRes<WriteCvtPS2PHY, [AtomPort0]>; // NOTE: Doesn't exist on Atom.
+def : WriteRes<WriteCvtPS2PHSt, [AtomPort0]>; // NOTE: Doesn't exist on Atom.
+def : WriteRes<WriteCvtPS2PHYSt, [AtomPort0]>; // NOTE: Doesn't exist on Atom.
////////////////////////////////////////////////////////////////////////////////
// Vector integer operations.
Modified: llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td?rev=332357&r1=332356&r2=332357&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td Tue May 15 07:12:32 2018
@@ -363,7 +363,14 @@ defm : JWriteResFpuPair<WriteFVarShuffle
defm : JWriteResFpuPair<WriteCvtF2I, [JFPU1, JSTC], 3>; // Float -> Integer.
defm : JWriteResFpuPair<WriteCvtI2F, [JFPU1, JSTC], 3>; // Integer -> Float.
defm : JWriteResFpuPair<WriteCvtF2F, [JFPU1, JSTC], 3>; // Float -> Float size conversion.
-def : WriteRes<WriteCvtF2FSt, [JFPU1, JSTC, JSAGU]> { let Latency = 4; }
+
+defm : JWriteResFpuPair<WriteCvtPH2PS, [JFPU1, JSTC], 3, [1,1], 1>;
+defm : JWriteResYMMPair<WriteCvtPH2PSY, [JFPU1, JSTC], 3, [2,2], 2>;
+
+defm : X86WriteRes<WriteCvtPS2PH, [JFPU1, JSTC], 3, [1,1], 1>;
+defm : X86WriteRes<WriteCvtPS2PHY, [JFPU1, JSTC, JFPX], 6, [2,2,2], 3>;
+defm : X86WriteRes<WriteCvtPS2PHSt, [JFPU1, JSTC, JSAGU], 4, [1,1,1], 1>;
+defm : X86WriteRes<WriteCvtPS2PHYSt, [JFPU1, JSTC, JFPX, JSAGU], 7, [2,2,2,1], 3>;
def JWriteCVTF2F : SchedWriteRes<[JFPU1, JSTC]> {
let Latency = 7;
@@ -529,38 +536,6 @@ def JWriteINSERTQ: SchedWriteRes<[JFPU01
def : InstRW<[JWriteINSERTQ], (instrs INSERTQ, INSERTQI)>;
////////////////////////////////////////////////////////////////////////////////
-// F16C instructions.
-////////////////////////////////////////////////////////////////////////////////
-
-def JWriteCVTPS2PHY: SchedWriteRes<[JFPU1, JSTC, JFPX]> {
- let Latency = 6;
- let ResourceCycles = [2, 2, 2];
- let NumMicroOps = 3;
-}
-def : InstRW<[JWriteCVTPS2PHY], (instrs VCVTPS2PHYrr)>;
-
-def JWriteCVTPS2PHYSt: SchedWriteRes<[JFPU1, JSTC, JFPX, JSAGU]> {
- let Latency = 7;
- let ResourceCycles = [2, 2, 2, 1];
- let NumMicroOps = 3;
-}
-def : InstRW<[JWriteCVTPS2PHYSt], (instrs VCVTPS2PHYmr)>;
-
-def JWriteCVTPH2PSY: SchedWriteRes<[JFPU1, JSTC]> {
- let Latency = 3;
- let ResourceCycles = [2, 2];
- let NumMicroOps = 2;
-}
-def : InstRW<[JWriteCVTPH2PSY], (instrs VCVTPH2PSYrr)>;
-
-def JWriteCVTPH2PSYLd: SchedWriteRes<[JLAGU, JFPU1, JSTC]> {
- let Latency = 8;
- let ResourceCycles = [1, 2, 2];
- let NumMicroOps = 2;
-}
-def : InstRW<[JWriteCVTPH2PSYLd], (instrs VCVTPH2PSYrm)>;
-
-////////////////////////////////////////////////////////////////////////////////
// AVX instructions.
////////////////////////////////////////////////////////////////////////////////
Modified: llvm/trunk/lib/Target/X86/X86ScheduleSLM.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleSLM.td?rev=332357&r1=332356&r2=332357&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleSLM.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleSLM.td Tue May 15 07:12:32 2018
@@ -212,7 +212,6 @@ defm : SLMWriteResPair<WriteFShuffleY, [
defm : SLMWriteResPair<WriteFVarShuffle, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteFVarShuffleY,[SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteFBlend, [SLM_FPC_RSV0], 1>;
-def : WriteRes<WriteCvtF2FSt, [SLM_FPC_RSV01, SLM_MEC_RSV]>;
// Vector integer operations.
def : WriteRes<WriteVecLoad, [SLM_MEC_RSV]> { let Latency = 3; }
@@ -397,4 +396,11 @@ defm : SLMWriteResPair<WriteFMA, [SLM_FP
defm : SLMWriteResPair<WriteFMAX, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteFMAY, [SLM_FPC_RSV0], 1>;
+defm : SLMWriteResPair<WriteCvtPH2PS, [SLM_FPC_RSV0], 1>;
+defm : SLMWriteResPair<WriteCvtPH2PSY, [SLM_FPC_RSV0], 1>;
+def : WriteRes<WriteCvtPS2PH, [SLM_FPC_RSV0]>;
+def : WriteRes<WriteCvtPS2PHY, [SLM_FPC_RSV0]>;
+def : WriteRes<WriteCvtPS2PHSt, [SLM_FPC_RSV0]>;
+def : WriteRes<WriteCvtPS2PHYSt, [SLM_FPC_RSV0]>;
+
} // SchedModel
Modified: llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td?rev=332357&r1=332356&r2=332357&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td Tue May 15 07:12:32 2018
@@ -272,7 +272,6 @@ defm : ZnWriteResFpuPair<WriteFSqrt64X,
defm : ZnWriteResFpuPair<WriteFSqrt64Y, [ZnFPU3], 40, [40], 1, 7, 1>;
defm : ZnWriteResFpuPair<WriteFSqrt64Z, [ZnFPU3], 40, [40], 1, 7, 1>;
defm : ZnWriteResFpuPair<WriteFSqrt80, [ZnFPU3], 20, [20]>;
-def : WriteRes<WriteCvtF2FSt, [ZnFPU3, ZnAGU]>;
// Vector integer operations which uses FPU units
defm : X86WriteRes<WriteVecLoad, [ZnAGU], 8, [1], 1>;
@@ -1326,18 +1325,21 @@ def : InstRW<[ZnWriteCVSTSI2SIr], (instr
// r32,m32.
def : InstRW<[ZnWriteCVSTSI2SILd], (instregex "(V?)CVT(T?)SD2SI(64)?rm")>;
-
// VCVTPS2PH.
// x,v,i.
-def : InstRW<[WriteMicrocoded], (instregex "VCVTPS2PH(Y?)rr")>;
+def : SchedAlias<WriteCvtPS2PH, ZnWriteMicrocoded>;
+def : SchedAlias<WriteCvtPS2PHY, ZnWriteMicrocoded>;
// m,v,i.
-def : InstRW<[WriteMicrocoded], (instregex "VCVTPS2PH(Y?)mr")>;
+def : SchedAlias<WriteCvtPS2PHSt, ZnWriteMicrocoded>;
+def : SchedAlias<WriteCvtPS2PHYSt, ZnWriteMicrocoded>;
// VCVTPH2PS.
// v,x.
-def : InstRW<[WriteMicrocoded], (instregex "VCVTPH2PS(Y?)rr")>;
+def : SchedAlias<WriteCvtPH2PS, ZnWriteMicrocoded>;
+def : SchedAlias<WriteCvtPH2PSY, ZnWriteMicrocoded>;
// v,m.
-def : InstRW<[WriteMicrocoded], (instregex "VCVTPH2PS(Y?)rm")>;
+def : SchedAlias<WriteCvtPH2PSLd, ZnWriteMicrocoded>;
+def : SchedAlias<WriteCvtPH2PSYLd, ZnWriteMicrocoded>;
//-- SSE4A instructions --//
// EXTRQ
Modified: llvm/trunk/test/CodeGen/X86/f16c-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/f16c-schedule.ll?rev=332357&r1=332356&r2=332357&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/f16c-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/f16c-schedule.ll Tue May 15 07:12:32 2018
@@ -143,7 +143,7 @@ define <8 x i16> @test_vcvtps2ph_128(<4
; BROADWELL-LABEL: test_vcvtps2ph_128:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vcvtps2ph $0, %xmm0, %xmm0 # sched: [4:1.00]
-; BROADWELL-NEXT: vcvtps2ph $0, %xmm1, (%rdi) # sched: [4:1.00]
+; BROADWELL-NEXT: vcvtps2ph $0, %xmm1, (%rdi) # sched: [5:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_vcvtps2ph_128:
@@ -196,7 +196,7 @@ define <8 x i16> @test_vcvtps2ph_256(<8
; BROADWELL-LABEL: test_vcvtps2ph_256:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vcvtps2ph $0, %ymm0, %xmm0 # sched: [6:1.00]
-; BROADWELL-NEXT: vcvtps2ph $0, %ymm1, (%rdi) # sched: [4:1.00]
+; BROADWELL-NEXT: vcvtps2ph $0, %ymm1, (%rdi) # sched: [7:1.00]
; BROADWELL-NEXT: vzeroupper # sched: [4:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
Modified: llvm/trunk/test/tools/llvm-mca/X86/Broadwell/resources-f16c.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/Broadwell/resources-f16c.s?rev=332357&r1=332356&r2=332357&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/Broadwell/resources-f16c.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/Broadwell/resources-f16c.s Tue May 15 07:12:32 2018
@@ -27,9 +27,9 @@ vcvtps2ph $0, %ymm0, (%rax)
# CHECK-NEXT: 2 2 1.00 vcvtph2ps %xmm0, %ymm2
# CHECK-NEXT: 2 6 1.00 * vcvtph2ps (%rax), %ymm2
# CHECK-NEXT: 2 4 1.00 vcvtps2ph $0, %xmm0, %xmm2
-# CHECK-NEXT: 3 4 1.00 * vcvtps2ph $0, %xmm0, (%rax)
+# CHECK-NEXT: 3 5 1.00 * vcvtps2ph $0, %xmm0, (%rax)
# CHECK-NEXT: 2 6 1.00 vcvtps2ph $0, %ymm0, %xmm2
-# CHECK-NEXT: 3 4 1.00 * vcvtps2ph $0, %ymm0, (%rax)
+# CHECK-NEXT: 3 7 1.00 * vcvtps2ph $0, %ymm0, (%rax)
# CHECK: Resources:
# CHECK-NEXT: [0] - BWDivider
Modified: llvm/trunk/test/tools/llvm-mca/X86/BtVer2/resources-f16c.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BtVer2/resources-f16c.s?rev=332357&r1=332356&r2=332357&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BtVer2/resources-f16c.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/BtVer2/resources-f16c.s Tue May 15 07:12:32 2018
@@ -49,14 +49,14 @@ vcvtps2ph $0, %ymm0, (%rax)
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
-# CHECK-NEXT: - - - 2.00 2.00 - 12.00 2.00 - 2.00 12.00 - - -
+# CHECK-NEXT: - - - 2.00 2.00 - 12.00 3.00 - 2.00 12.00 - - -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
# CHECK-NEXT: - - - - - - 1.00 - - - 1.00 - - - vcvtph2ps %xmm0, %xmm2
# CHECK-NEXT: - - - - - - 1.00 1.00 - - 1.00 - - - vcvtph2ps (%rax), %xmm2
# CHECK-NEXT: - - - - - - 2.00 - - - 2.00 - - - vcvtph2ps %xmm0, %ymm2
-# CHECK-NEXT: - - - - - - 2.00 1.00 - - 2.00 - - - vcvtph2ps (%rax), %ymm2
+# CHECK-NEXT: - - - - - - 2.00 2.00 - - 2.00 - - - vcvtph2ps (%rax), %ymm2
# CHECK-NEXT: - - - - - - 1.00 - - - 1.00 - - - vcvtps2ph $0, %xmm0, %xmm2
# CHECK-NEXT: - - - - - - 1.00 - - 1.00 1.00 - - - vcvtps2ph $0, %xmm0, (%rax)
# CHECK-NEXT: - - - 1.00 1.00 - 2.00 - - - 2.00 - - - vcvtps2ph $0, %ymm0, %xmm2
More information about the llvm-commits
mailing list